diff --git "a/checkpoint-4000/trainer_state.json" "b/checkpoint-4000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-4000/trainer_state.json" @@ -0,0 +1,133610 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1501186282263283, + "eval_steps": 500, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "ce_ib": 65.99971008300781, + "ce_orig": 0.8247115612030029, + "epoch": 0, + "kl_loss": 3969.01025390625, + "loss_ib": 39.756099700927734, + "step": 0 + }, + { + "ce_ib": 61.875301361083984, + "ce_orig": 0.3094598948955536, + "epoch": 0, + "kl_loss": 1816.435302734375, + "loss_ib": 18.226226806640625, + "step": 0 + }, + { + "ce_ib": 65.33805084228516, + "ce_orig": 1.0820972919464111, + "epoch": 0, + "kl_loss": 4051.13818359375, + "loss_ib": 40.576717376708984, + "step": 0 + }, + { + "ce_ib": 65.36083221435547, + "ce_orig": 0.8601827025413513, + "epoch": 0, + "kl_loss": 3727.80126953125, + "loss_ib": 37.3433723449707, + "step": 0 + }, + { + "ce_ib": 64.40461730957031, + "ce_orig": 1.3601988554000854, + "epoch": 0.00028758357897764035, + "kl_loss": 3548.660888671875, + "loss_ib": 35.5510139465332, + "step": 1 + }, + { + "ce_ib": 66.136474609375, + "ce_orig": 0.9451982975006104, + "epoch": 0.00028758357897764035, + "kl_loss": 4003.119140625, + "loss_ib": 40.097328186035156, + "step": 1 + }, + { + "ce_ib": 65.30732727050781, + "ce_orig": 1.3611608743667603, + "epoch": 0.00028758357897764035, + "kl_loss": 3076.302490234375, + "loss_ib": 30.828330993652344, + "step": 1 + }, + { + "ce_ib": 63.613216400146484, + "ce_orig": 0.5681392550468445, + "epoch": 0.00028758357897764035, + "kl_loss": 3922.22265625, + "loss_ib": 39.28583908081055, + "step": 1 + }, + { + "ce_ib": 65.20169067382812, + "ce_orig": 0.9869711399078369, + "epoch": 0.0005751671579552807, + "kl_loss": 4010.333251953125, + "loss_ib": 40.16853332519531, + "step": 2 + }, + { + "ce_ib": 64.6613540649414, + "ce_orig": 1.0124142169952393, + "epoch": 0.0005751671579552807, + "kl_loss": 3416.4658203125, + "loss_ib": 34.22931671142578, + "step": 2 + }, + { + "ce_ib": 64.3924560546875, + "ce_orig": 0.825140118598938, + "epoch": 0.0005751671579552807, + "kl_loss": 3954.5244140625, + "loss_ib": 39.60963439941406, + "step": 2 + }, + { + "ce_ib": 66.31563568115234, + "ce_orig": 1.6114795207977295, + "epoch": 0.0005751671579552807, + "kl_loss": 3360.53955078125, + "loss_ib": 33.67171096801758, + "step": 2 + }, + { + "ce_ib": 63.97846603393555, + "ce_orig": 1.0248628854751587, + "epoch": 0.0008627507369329212, + "kl_loss": 3866.74462890625, + "loss_ib": 38.73142623901367, + "step": 3 + }, + { + "ce_ib": 64.94669342041016, + "ce_orig": 0.7158174514770508, + "epoch": 0.0008627507369329212, + "kl_loss": 3586.52783203125, + "loss_ib": 35.93022537231445, + "step": 3 + }, + { + "ce_ib": 66.78568267822266, + "ce_orig": 1.1728931665420532, + "epoch": 0.0008627507369329212, + "kl_loss": 3981.269775390625, + "loss_ib": 39.87948226928711, + "step": 3 + }, + { + "ce_ib": 66.30445861816406, + "ce_orig": 0.9273799657821655, + "epoch": 0.0008627507369329212, + "kl_loss": 3999.728271484375, + "loss_ib": 40.0635871887207, + "step": 3 + }, + { + "ce_ib": 63.22294616699219, + "ce_orig": 0.6721798181533813, + "epoch": 0.0011503343159105614, + "kl_loss": 3434.2626953125, + "loss_ib": 34.40584945678711, + "step": 4 + }, + { + "ce_ib": 65.629150390625, + "ce_orig": 0.851636528968811, + "epoch": 0.0011503343159105614, + "kl_loss": 3777.80029296875, + "loss_ib": 37.843631744384766, + "step": 4 + }, + { + "ce_ib": 65.70416259765625, + "ce_orig": 0.8407150506973267, + "epoch": 0.0011503343159105614, + "kl_loss": 3663.44775390625, + "loss_ib": 36.70018005371094, + "step": 4 + }, + { + "ce_ib": 65.25149536132812, + "ce_orig": 0.8431562781333923, + "epoch": 0.0011503343159105614, + "kl_loss": 4073.102783203125, + "loss_ib": 40.79627990722656, + "step": 4 + }, + { + "epoch": 0.0014379178948882019, + "grad_norm": Infinity, + "learning_rate": 0.0, + "loss": 37.6651, + "step": 5 + }, + { + "ce_ib": 63.31033706665039, + "ce_orig": 0.5193647146224976, + "epoch": 0.0014379178948882019, + "kl_loss": 3829.75732421875, + "loss_ib": 38.36088180541992, + "step": 5 + }, + { + "ce_ib": 64.82113647460938, + "ce_orig": 0.9080048203468323, + "epoch": 0.0014379178948882019, + "kl_loss": 4034.60400390625, + "loss_ib": 40.41086196899414, + "step": 5 + }, + { + "ce_ib": 67.75746154785156, + "ce_orig": 1.7583141326904297, + "epoch": 0.0014379178948882019, + "kl_loss": 3362.895751953125, + "loss_ib": 33.696712493896484, + "step": 5 + }, + { + "ce_ib": 65.55052947998047, + "ce_orig": 1.0019645690917969, + "epoch": 0.0014379178948882019, + "kl_loss": 3561.7119140625, + "loss_ib": 35.68266677856445, + "step": 5 + }, + { + "ce_ib": 65.5093765258789, + "ce_orig": 1.2022827863693237, + "epoch": 0.0017255014738658423, + "kl_loss": 3854.793212890625, + "loss_ib": 38.613441467285156, + "step": 6 + }, + { + "ce_ib": 63.95633316040039, + "ce_orig": 0.5561846494674683, + "epoch": 0.0017255014738658423, + "kl_loss": 3231.163818359375, + "loss_ib": 32.37559509277344, + "step": 6 + }, + { + "ce_ib": 66.91143798828125, + "ce_orig": 1.007911205291748, + "epoch": 0.0017255014738658423, + "kl_loss": 3694.936767578125, + "loss_ib": 37.01627731323242, + "step": 6 + }, + { + "ce_ib": 65.86326599121094, + "ce_orig": 1.1325939893722534, + "epoch": 0.0017255014738658423, + "kl_loss": 3653.87255859375, + "loss_ib": 36.60458755493164, + "step": 6 + }, + { + "ce_ib": 61.932804107666016, + "ce_orig": 0.3588312268257141, + "epoch": 0.0020130850528434826, + "kl_loss": 2617.568359375, + "loss_ib": 26.23761749267578, + "step": 7 + }, + { + "ce_ib": 66.4891586303711, + "ce_orig": 0.9551964402198792, + "epoch": 0.0020130850528434826, + "kl_loss": 4009.619140625, + "loss_ib": 40.162681579589844, + "step": 7 + }, + { + "ce_ib": 64.68766021728516, + "ce_orig": 1.3480956554412842, + "epoch": 0.0020130850528434826, + "kl_loss": 3682.406494140625, + "loss_ib": 36.88875198364258, + "step": 7 + }, + { + "ce_ib": 65.71851348876953, + "ce_orig": 1.4119411706924438, + "epoch": 0.0020130850528434826, + "kl_loss": 3544.10595703125, + "loss_ib": 35.50677490234375, + "step": 7 + }, + { + "ce_ib": 64.80267333984375, + "ce_orig": 1.1264560222625732, + "epoch": 0.002300668631821123, + "kl_loss": 3803.631103515625, + "loss_ib": 38.101112365722656, + "step": 8 + }, + { + "ce_ib": 64.57341766357422, + "ce_orig": 0.8282275199890137, + "epoch": 0.002300668631821123, + "kl_loss": 4064.74267578125, + "loss_ib": 40.711997985839844, + "step": 8 + }, + { + "ce_ib": 64.71014404296875, + "ce_orig": 0.8245378732681274, + "epoch": 0.002300668631821123, + "kl_loss": 3696.43896484375, + "loss_ib": 37.02909851074219, + "step": 8 + }, + { + "ce_ib": 66.23856353759766, + "ce_orig": 0.7464695572853088, + "epoch": 0.002300668631821123, + "kl_loss": 3910.202880859375, + "loss_ib": 39.16826629638672, + "step": 8 + }, + { + "ce_ib": 66.02950286865234, + "ce_orig": 1.2234686613082886, + "epoch": 0.0025882522107987635, + "kl_loss": 3270.403076171875, + "loss_ib": 32.77006149291992, + "step": 9 + }, + { + "ce_ib": 61.797386169433594, + "ce_orig": 0.6015214920043945, + "epoch": 0.0025882522107987635, + "kl_loss": 3816.387939453125, + "loss_ib": 38.22567367553711, + "step": 9 + }, + { + "ce_ib": 61.85765075683594, + "ce_orig": 0.6827896237373352, + "epoch": 0.0025882522107987635, + "kl_loss": 3886.591064453125, + "loss_ib": 38.92776870727539, + "step": 9 + }, + { + "ce_ib": 66.08187866210938, + "ce_orig": 1.3109632730484009, + "epoch": 0.0025882522107987635, + "kl_loss": 3950.779541015625, + "loss_ib": 39.573875427246094, + "step": 9 + }, + { + "epoch": 0.0028758357897764038, + "grad_norm": 519.0091552734375, + "learning_rate": 1.2738853503184715e-07, + "loss": 37.7545, + "step": 10 + }, + { + "ce_ib": 64.04639434814453, + "ce_orig": 0.7621712684631348, + "epoch": 0.0028758357897764038, + "kl_loss": 3556.8876953125, + "loss_ib": 35.6329231262207, + "step": 10 + }, + { + "ce_ib": 68.0383071899414, + "ce_orig": 1.6497186422348022, + "epoch": 0.0028758357897764038, + "kl_loss": 3772.04345703125, + "loss_ib": 37.78847122192383, + "step": 10 + }, + { + "ce_ib": 68.69857025146484, + "ce_orig": 1.7943047285079956, + "epoch": 0.0028758357897764038, + "kl_loss": 3361.59521484375, + "loss_ib": 33.68465042114258, + "step": 10 + }, + { + "ce_ib": 66.5051040649414, + "ce_orig": 0.9888308644294739, + "epoch": 0.0028758357897764038, + "kl_loss": 3659.6396484375, + "loss_ib": 36.662899017333984, + "step": 10 + }, + { + "ce_ib": 69.01343536376953, + "ce_orig": 1.8538011312484741, + "epoch": 0.003163419368754044, + "kl_loss": 3817.212158203125, + "loss_ib": 38.24113464355469, + "step": 11 + }, + { + "ce_ib": 66.35260772705078, + "ce_orig": 1.5063494443893433, + "epoch": 0.003163419368754044, + "kl_loss": 3289.161376953125, + "loss_ib": 32.95796585083008, + "step": 11 + }, + { + "ce_ib": 63.46610641479492, + "ce_orig": 0.9150936007499695, + "epoch": 0.003163419368754044, + "kl_loss": 3871.0595703125, + "loss_ib": 38.77406311035156, + "step": 11 + }, + { + "ce_ib": 65.04010009765625, + "ce_orig": 0.8206998705863953, + "epoch": 0.003163419368754044, + "kl_loss": 1842.705322265625, + "loss_ib": 18.492094039916992, + "step": 11 + }, + { + "ce_ib": 64.97047424316406, + "ce_orig": 1.1343697309494019, + "epoch": 0.0034510029477316847, + "kl_loss": 3761.435546875, + "loss_ib": 37.679325103759766, + "step": 12 + }, + { + "ce_ib": 65.20761108398438, + "ce_orig": 0.8448718190193176, + "epoch": 0.0034510029477316847, + "kl_loss": 3675.133544921875, + "loss_ib": 36.81654357910156, + "step": 12 + }, + { + "ce_ib": 61.798465728759766, + "ce_orig": 0.8912767767906189, + "epoch": 0.0034510029477316847, + "kl_loss": 3862.031982421875, + "loss_ib": 38.6821174621582, + "step": 12 + }, + { + "ce_ib": 62.26899337768555, + "ce_orig": 0.6894794702529907, + "epoch": 0.0034510029477316847, + "kl_loss": 3852.8837890625, + "loss_ib": 38.59110641479492, + "step": 12 + }, + { + "ce_ib": 62.82128143310547, + "ce_orig": 0.8209494948387146, + "epoch": 0.003738586526709325, + "kl_loss": 4055.579345703125, + "loss_ib": 40.618614196777344, + "step": 13 + }, + { + "ce_ib": 63.56863021850586, + "ce_orig": 0.6192977428436279, + "epoch": 0.003738586526709325, + "kl_loss": 3213.953369140625, + "loss_ib": 32.203102111816406, + "step": 13 + }, + { + "ce_ib": 63.4449348449707, + "ce_orig": 0.8305644392967224, + "epoch": 0.003738586526709325, + "kl_loss": 4162.50732421875, + "loss_ib": 41.68851852416992, + "step": 13 + }, + { + "ce_ib": 63.81005859375, + "ce_orig": 0.63532555103302, + "epoch": 0.003738586526709325, + "kl_loss": 3868.43896484375, + "loss_ib": 38.748199462890625, + "step": 13 + }, + { + "ce_ib": 63.38967514038086, + "ce_orig": 1.0747102499008179, + "epoch": 0.004026170105686965, + "kl_loss": 3797.86328125, + "loss_ib": 38.04201889038086, + "step": 14 + }, + { + "ce_ib": 64.95621490478516, + "ce_orig": 0.2551676332950592, + "epoch": 0.004026170105686965, + "kl_loss": 3229.682861328125, + "loss_ib": 32.36178207397461, + "step": 14 + }, + { + "ce_ib": 63.782310485839844, + "ce_orig": 0.9092867374420166, + "epoch": 0.004026170105686965, + "kl_loss": 4164.35009765625, + "loss_ib": 41.70728302001953, + "step": 14 + }, + { + "ce_ib": 62.4797477722168, + "ce_orig": 0.47730547189712524, + "epoch": 0.004026170105686965, + "kl_loss": 3852.087158203125, + "loss_ib": 38.583351135253906, + "step": 14 + }, + { + "epoch": 0.004313753684664605, + "grad_norm": 523.5825805664062, + "learning_rate": 2.8662420382165606e-07, + "loss": 37.7441, + "step": 15 + }, + { + "ce_ib": 66.0411148071289, + "ce_orig": 1.2483270168304443, + "epoch": 0.004313753684664605, + "kl_loss": 3730.42578125, + "loss_ib": 37.370296478271484, + "step": 15 + }, + { + "ce_ib": 62.428306579589844, + "ce_orig": 0.6228333711624146, + "epoch": 0.004313753684664605, + "kl_loss": 3604.6357421875, + "loss_ib": 36.108787536621094, + "step": 15 + }, + { + "ce_ib": 63.51506042480469, + "ce_orig": 1.2859349250793457, + "epoch": 0.004313753684664605, + "kl_loss": 4202.3095703125, + "loss_ib": 42.08660888671875, + "step": 15 + }, + { + "ce_ib": 63.46177291870117, + "ce_orig": 0.7081962823867798, + "epoch": 0.004313753684664605, + "kl_loss": 3794.773193359375, + "loss_ib": 38.011192321777344, + "step": 15 + }, + { + "ce_ib": 63.84330749511719, + "ce_orig": 0.7915632724761963, + "epoch": 0.004601337263642246, + "kl_loss": 3308.423828125, + "loss_ib": 33.14807891845703, + "step": 16 + }, + { + "ce_ib": 64.69558715820312, + "ce_orig": 1.4574185609817505, + "epoch": 0.004601337263642246, + "kl_loss": 3845.239990234375, + "loss_ib": 38.517093658447266, + "step": 16 + }, + { + "ce_ib": 63.32929992675781, + "ce_orig": 0.954424262046814, + "epoch": 0.004601337263642246, + "kl_loss": 3470.448486328125, + "loss_ib": 34.76781463623047, + "step": 16 + }, + { + "ce_ib": 67.40885925292969, + "ce_orig": 0.8406963348388672, + "epoch": 0.004601337263642246, + "kl_loss": 4098.966796875, + "loss_ib": 41.05707550048828, + "step": 16 + }, + { + "ce_ib": 66.77290344238281, + "ce_orig": 1.3419686555862427, + "epoch": 0.004888920842619887, + "kl_loss": 3622.17578125, + "loss_ib": 36.28852844238281, + "step": 17 + }, + { + "ce_ib": 62.80875778198242, + "ce_orig": 0.6389923691749573, + "epoch": 0.004888920842619887, + "kl_loss": 4102.7236328125, + "loss_ib": 41.09004211425781, + "step": 17 + }, + { + "ce_ib": 64.88585662841797, + "ce_orig": 1.1766713857650757, + "epoch": 0.004888920842619887, + "kl_loss": 1908.9375, + "loss_ib": 19.154260635375977, + "step": 17 + }, + { + "ce_ib": 65.38214874267578, + "ce_orig": 1.252450942993164, + "epoch": 0.004888920842619887, + "kl_loss": 3970.094482421875, + "loss_ib": 39.766326904296875, + "step": 17 + }, + { + "ce_ib": 64.7368392944336, + "ce_orig": 1.1009352207183838, + "epoch": 0.005176504421597527, + "kl_loss": 3504.70361328125, + "loss_ib": 35.11177062988281, + "step": 18 + }, + { + "ce_ib": 62.09738540649414, + "ce_orig": 0.480591744184494, + "epoch": 0.005176504421597527, + "kl_loss": 3198.1953125, + "loss_ib": 32.04404830932617, + "step": 18 + }, + { + "ce_ib": 65.45724487304688, + "ce_orig": 0.8052865266799927, + "epoch": 0.005176504421597527, + "kl_loss": 3857.419189453125, + "loss_ib": 38.6396484375, + "step": 18 + }, + { + "ce_ib": 65.82563781738281, + "ce_orig": 1.2099261283874512, + "epoch": 0.005176504421597527, + "kl_loss": 3628.470703125, + "loss_ib": 36.35053253173828, + "step": 18 + }, + { + "ce_ib": 64.87178802490234, + "ce_orig": 1.0739271640777588, + "epoch": 0.005464088000575167, + "kl_loss": 3784.338623046875, + "loss_ib": 37.90825653076172, + "step": 19 + }, + { + "ce_ib": 66.35687255859375, + "ce_orig": 1.3444932699203491, + "epoch": 0.005464088000575167, + "kl_loss": 3339.950927734375, + "loss_ib": 33.46586608886719, + "step": 19 + }, + { + "ce_ib": 67.06342315673828, + "ce_orig": 0.8359652757644653, + "epoch": 0.005464088000575167, + "kl_loss": 4230.32666015625, + "loss_ib": 42.370330810546875, + "step": 19 + }, + { + "ce_ib": 67.96249389648438, + "ce_orig": 1.7302289009094238, + "epoch": 0.005464088000575167, + "kl_loss": 3579.07421875, + "loss_ib": 35.85870361328125, + "step": 19 + }, + { + "epoch": 0.0057516715795528075, + "grad_norm": 525.4837036132812, + "learning_rate": 4.45859872611465e-07, + "loss": 38.2435, + "step": 20 + }, + { + "ce_ib": 64.20391082763672, + "ce_orig": 0.7589595913887024, + "epoch": 0.0057516715795528075, + "kl_loss": 4128.66064453125, + "loss_ib": 41.35081100463867, + "step": 20 + }, + { + "ce_ib": 62.830806732177734, + "ce_orig": 0.6316859126091003, + "epoch": 0.0057516715795528075, + "kl_loss": 3403.72509765625, + "loss_ib": 34.10007858276367, + "step": 20 + }, + { + "ce_ib": 65.20977783203125, + "ce_orig": 0.8842067718505859, + "epoch": 0.0057516715795528075, + "kl_loss": 4221.73095703125, + "loss_ib": 42.28252029418945, + "step": 20 + }, + { + "ce_ib": 63.80289077758789, + "ce_orig": 1.1217824220657349, + "epoch": 0.0057516715795528075, + "kl_loss": 3923.58349609375, + "loss_ib": 39.29963684082031, + "step": 20 + }, + { + "ce_ib": 63.151161193847656, + "ce_orig": 0.725497305393219, + "epoch": 0.006039255158530448, + "kl_loss": 4119.48046875, + "loss_ib": 41.25795364379883, + "step": 21 + }, + { + "ce_ib": 62.518638610839844, + "ce_orig": 0.6902149319648743, + "epoch": 0.006039255158530448, + "kl_loss": 3765.32373046875, + "loss_ib": 37.715755462646484, + "step": 21 + }, + { + "ce_ib": 64.55998229980469, + "ce_orig": 1.0123672485351562, + "epoch": 0.006039255158530448, + "kl_loss": 3793.22509765625, + "loss_ib": 37.99681091308594, + "step": 21 + }, + { + "ce_ib": 61.955604553222656, + "ce_orig": 0.528033971786499, + "epoch": 0.006039255158530448, + "kl_loss": 3723.4833984375, + "loss_ib": 37.29678726196289, + "step": 21 + }, + { + "ce_ib": 62.95112609863281, + "ce_orig": 0.7799142003059387, + "epoch": 0.006326838737508088, + "kl_loss": 3707.60546875, + "loss_ib": 37.13900375366211, + "step": 22 + }, + { + "ce_ib": 63.943023681640625, + "ce_orig": 0.836663544178009, + "epoch": 0.006326838737508088, + "kl_loss": 3695.3837890625, + "loss_ib": 37.01778030395508, + "step": 22 + }, + { + "ce_ib": 61.96399688720703, + "ce_orig": 0.5541026592254639, + "epoch": 0.006326838737508088, + "kl_loss": 3717.060302734375, + "loss_ib": 37.232566833496094, + "step": 22 + }, + { + "ce_ib": 65.02377319335938, + "ce_orig": 1.0211303234100342, + "epoch": 0.006326838737508088, + "kl_loss": 3948.7412109375, + "loss_ib": 39.55243682861328, + "step": 22 + }, + { + "ce_ib": 64.2979736328125, + "ce_orig": 1.0038220882415771, + "epoch": 0.006614422316485728, + "kl_loss": 3556.190185546875, + "loss_ib": 35.626197814941406, + "step": 23 + }, + { + "ce_ib": 64.74838256835938, + "ce_orig": 1.4320262670516968, + "epoch": 0.006614422316485728, + "kl_loss": 3511.77587890625, + "loss_ib": 35.1825065612793, + "step": 23 + }, + { + "ce_ib": 66.12266540527344, + "ce_orig": 1.3454687595367432, + "epoch": 0.006614422316485728, + "kl_loss": 3485.62158203125, + "loss_ib": 34.92233657836914, + "step": 23 + }, + { + "ce_ib": 65.04991912841797, + "ce_orig": 1.1041706800460815, + "epoch": 0.006614422316485728, + "kl_loss": 3322.958984375, + "loss_ib": 33.294639587402344, + "step": 23 + }, + { + "ce_ib": 63.05337905883789, + "ce_orig": 0.8803771734237671, + "epoch": 0.006902005895463369, + "kl_loss": 4095.29345703125, + "loss_ib": 41.01598358154297, + "step": 24 + }, + { + "ce_ib": 62.59025192260742, + "ce_orig": 0.5355072021484375, + "epoch": 0.006902005895463369, + "kl_loss": 2086.240478515625, + "loss_ib": 20.92499542236328, + "step": 24 + }, + { + "ce_ib": 63.184295654296875, + "ce_orig": 0.7724276781082153, + "epoch": 0.006902005895463369, + "kl_loss": 4004.15185546875, + "loss_ib": 40.10470199584961, + "step": 24 + }, + { + "ce_ib": 65.41134643554688, + "ce_orig": 0.9222034811973572, + "epoch": 0.006902005895463369, + "kl_loss": 4123.5751953125, + "loss_ib": 41.30116271972656, + "step": 24 + }, + { + "epoch": 0.00718958947444101, + "grad_norm": 504.01654052734375, + "learning_rate": 6.05095541401274e-07, + "loss": 37.9487, + "step": 25 + }, + { + "ce_ib": 65.42027282714844, + "ce_orig": 1.304446816444397, + "epoch": 0.00718958947444101, + "kl_loss": 3540.853515625, + "loss_ib": 35.47395324707031, + "step": 25 + }, + { + "ce_ib": 61.20964050292969, + "ce_orig": 0.4327137768268585, + "epoch": 0.00718958947444101, + "kl_loss": 3690.677734375, + "loss_ib": 36.96798324584961, + "step": 25 + }, + { + "ce_ib": 64.6102066040039, + "ce_orig": 1.0307230949401855, + "epoch": 0.00718958947444101, + "kl_loss": 4015.37060546875, + "loss_ib": 40.21831512451172, + "step": 25 + }, + { + "ce_ib": 64.76322174072266, + "ce_orig": 0.9839794039726257, + "epoch": 0.00718958947444101, + "kl_loss": 4032.43212890625, + "loss_ib": 40.38908386230469, + "step": 25 + }, + { + "ce_ib": 65.33113098144531, + "ce_orig": 1.4617711305618286, + "epoch": 0.00747717305341865, + "kl_loss": 3820.6474609375, + "loss_ib": 38.27180480957031, + "step": 26 + }, + { + "ce_ib": 65.57540130615234, + "ce_orig": 0.7502631545066833, + "epoch": 0.00747717305341865, + "kl_loss": 3953.942138671875, + "loss_ib": 39.60499572753906, + "step": 26 + }, + { + "ce_ib": 66.47959899902344, + "ce_orig": 1.3454749584197998, + "epoch": 0.00747717305341865, + "kl_loss": 3569.14794921875, + "loss_ib": 35.757957458496094, + "step": 26 + }, + { + "ce_ib": 66.15406799316406, + "ce_orig": 1.0591317415237427, + "epoch": 0.00747717305341865, + "kl_loss": 3647.95068359375, + "loss_ib": 36.54566192626953, + "step": 26 + }, + { + "ce_ib": 61.56562042236328, + "ce_orig": 0.7531498074531555, + "epoch": 0.00776475663239629, + "kl_loss": 3646.81689453125, + "loss_ib": 36.52973175048828, + "step": 27 + }, + { + "ce_ib": 63.738616943359375, + "ce_orig": 0.7576659321784973, + "epoch": 0.00776475663239629, + "kl_loss": 4090.55078125, + "loss_ib": 40.96924591064453, + "step": 27 + }, + { + "ce_ib": 63.68565368652344, + "ce_orig": 0.8695321083068848, + "epoch": 0.00776475663239629, + "kl_loss": 3782.551025390625, + "loss_ib": 37.88919448852539, + "step": 27 + }, + { + "ce_ib": 62.30582046508789, + "ce_orig": 0.5045351982116699, + "epoch": 0.00776475663239629, + "kl_loss": 3620.3427734375, + "loss_ib": 36.26573181152344, + "step": 27 + }, + { + "ce_ib": 63.34856414794922, + "ce_orig": 0.741316556930542, + "epoch": 0.00805234021137393, + "kl_loss": 4024.19580078125, + "loss_ib": 40.30530548095703, + "step": 28 + }, + { + "ce_ib": 64.60435485839844, + "ce_orig": 0.7678440809249878, + "epoch": 0.00805234021137393, + "kl_loss": 3513.181884765625, + "loss_ib": 35.1964225769043, + "step": 28 + }, + { + "ce_ib": 64.85627746582031, + "ce_orig": 1.3411056995391846, + "epoch": 0.00805234021137393, + "kl_loss": 3659.3564453125, + "loss_ib": 36.65842056274414, + "step": 28 + }, + { + "ce_ib": 68.69432830810547, + "ce_orig": 1.4908052682876587, + "epoch": 0.00805234021137393, + "kl_loss": 4179.46630859375, + "loss_ib": 41.86335754394531, + "step": 28 + }, + { + "ce_ib": 64.14762115478516, + "ce_orig": 0.7301002740859985, + "epoch": 0.008339923790351571, + "kl_loss": 4014.65771484375, + "loss_ib": 40.210723876953125, + "step": 29 + }, + { + "ce_ib": 65.77961730957031, + "ce_orig": 1.078151822090149, + "epoch": 0.008339923790351571, + "kl_loss": 3336.4443359375, + "loss_ib": 33.43022155761719, + "step": 29 + }, + { + "ce_ib": 62.39012908935547, + "ce_orig": 1.5332895517349243, + "epoch": 0.008339923790351571, + "kl_loss": 4004.0390625, + "loss_ib": 40.102779388427734, + "step": 29 + }, + { + "ce_ib": 63.807186126708984, + "ce_orig": 0.9249582886695862, + "epoch": 0.008339923790351571, + "kl_loss": 3553.84716796875, + "loss_ib": 35.60227584838867, + "step": 29 + }, + { + "epoch": 0.00862750736932921, + "grad_norm": 537.8850708007812, + "learning_rate": 7.643312101910829e-07, + "loss": 38.6273, + "step": 30 + }, + { + "ce_ib": 64.0005111694336, + "ce_orig": 0.8134416937828064, + "epoch": 0.00862750736932921, + "kl_loss": 3766.99658203125, + "loss_ib": 37.73396682739258, + "step": 30 + }, + { + "ce_ib": 65.6531982421875, + "ce_orig": 1.2596931457519531, + "epoch": 0.00862750736932921, + "kl_loss": 3756.150146484375, + "loss_ib": 37.62715530395508, + "step": 30 + }, + { + "ce_ib": 66.32474517822266, + "ce_orig": 1.5833230018615723, + "epoch": 0.00862750736932921, + "kl_loss": 3616.305419921875, + "loss_ib": 36.22937774658203, + "step": 30 + }, + { + "ce_ib": 68.49303436279297, + "ce_orig": 1.2524874210357666, + "epoch": 0.00862750736932921, + "kl_loss": 3675.2001953125, + "loss_ib": 36.82049560546875, + "step": 30 + }, + { + "ce_ib": 66.44476318359375, + "ce_orig": 1.3207565546035767, + "epoch": 0.008915090948306852, + "kl_loss": 3776.052734375, + "loss_ib": 37.82697296142578, + "step": 31 + }, + { + "ce_ib": 66.1202163696289, + "ce_orig": 1.7769383192062378, + "epoch": 0.008915090948306852, + "kl_loss": 3872.0908203125, + "loss_ib": 38.78702926635742, + "step": 31 + }, + { + "ce_ib": 64.21722412109375, + "ce_orig": 1.2050706148147583, + "epoch": 0.008915090948306852, + "kl_loss": 3775.35009765625, + "loss_ib": 37.81771469116211, + "step": 31 + }, + { + "ce_ib": 64.00657653808594, + "ce_orig": 0.745306670665741, + "epoch": 0.008915090948306852, + "kl_loss": 4056.444580078125, + "loss_ib": 40.62845230102539, + "step": 31 + }, + { + "ce_ib": 64.75992584228516, + "ce_orig": 0.9638186097145081, + "epoch": 0.009202674527284491, + "kl_loss": 3975.2265625, + "loss_ib": 39.81702423095703, + "step": 32 + }, + { + "ce_ib": 63.76476287841797, + "ce_orig": 0.7001180052757263, + "epoch": 0.009202674527284491, + "kl_loss": 3708.013671875, + "loss_ib": 37.14390182495117, + "step": 32 + }, + { + "ce_ib": 62.59078598022461, + "ce_orig": 0.581017255783081, + "epoch": 0.009202674527284491, + "kl_loss": 4050.337890625, + "loss_ib": 40.56596755981445, + "step": 32 + }, + { + "ce_ib": 62.47100830078125, + "ce_orig": 0.4765642583370209, + "epoch": 0.009202674527284491, + "kl_loss": 3852.07666015625, + "loss_ib": 38.58323669433594, + "step": 32 + }, + { + "ce_ib": 66.74118041992188, + "ce_orig": 1.0750036239624023, + "epoch": 0.009490258106262132, + "kl_loss": 3840.857177734375, + "loss_ib": 38.475311279296875, + "step": 33 + }, + { + "ce_ib": 62.256229400634766, + "ce_orig": 1.111011028289795, + "epoch": 0.009490258106262132, + "kl_loss": 3756.697265625, + "loss_ib": 37.62922668457031, + "step": 33 + }, + { + "ce_ib": 60.74306869506836, + "ce_orig": 0.322427362203598, + "epoch": 0.009490258106262132, + "kl_loss": 3481.74169921875, + "loss_ib": 34.87815856933594, + "step": 33 + }, + { + "ce_ib": 63.85698318481445, + "ce_orig": 1.245665192604065, + "epoch": 0.009490258106262132, + "kl_loss": 3702.04638671875, + "loss_ib": 37.084320068359375, + "step": 33 + }, + { + "ce_ib": 64.15027618408203, + "ce_orig": 0.7340657114982605, + "epoch": 0.009777841685239774, + "kl_loss": 2706.0263671875, + "loss_ib": 27.124412536621094, + "step": 34 + }, + { + "ce_ib": 62.855018615722656, + "ce_orig": 1.0289608240127563, + "epoch": 0.009777841685239774, + "kl_loss": 3802.005615234375, + "loss_ib": 38.082908630371094, + "step": 34 + }, + { + "ce_ib": 63.055484771728516, + "ce_orig": 0.7458648681640625, + "epoch": 0.009777841685239774, + "kl_loss": 3990.32861328125, + "loss_ib": 39.96634292602539, + "step": 34 + }, + { + "ce_ib": 61.685733795166016, + "ce_orig": 0.4432576894760132, + "epoch": 0.009777841685239774, + "kl_loss": 2863.019775390625, + "loss_ib": 28.69188117980957, + "step": 34 + }, + { + "epoch": 0.010065425264217413, + "grad_norm": 523.54052734375, + "learning_rate": 9.235668789808917e-07, + "loss": 37.7138, + "step": 35 + }, + { + "ce_ib": 63.075679779052734, + "ce_orig": 0.7427234053611755, + "epoch": 0.010065425264217413, + "kl_loss": 3800.17724609375, + "loss_ib": 38.06484603881836, + "step": 35 + }, + { + "ce_ib": 66.27129364013672, + "ce_orig": 1.4802910089492798, + "epoch": 0.010065425264217413, + "kl_loss": 3349.9755859375, + "loss_ib": 33.5660285949707, + "step": 35 + }, + { + "ce_ib": 62.79461669921875, + "ce_orig": 0.8912234902381897, + "epoch": 0.010065425264217413, + "kl_loss": 4039.09765625, + "loss_ib": 40.45376968383789, + "step": 35 + }, + { + "ce_ib": 61.9300537109375, + "ce_orig": 0.6817716360092163, + "epoch": 0.010065425264217413, + "kl_loss": 3835.2861328125, + "loss_ib": 38.414791107177734, + "step": 35 + }, + { + "ce_ib": 63.25111770629883, + "ce_orig": 0.9492425918579102, + "epoch": 0.010353008843195054, + "kl_loss": 3599.0205078125, + "loss_ib": 36.0534553527832, + "step": 36 + }, + { + "ce_ib": 64.83221435546875, + "ce_orig": 1.1269358396530151, + "epoch": 0.010353008843195054, + "kl_loss": 3825.91796875, + "loss_ib": 38.32400894165039, + "step": 36 + }, + { + "ce_ib": 63.47658920288086, + "ce_orig": 0.7525137662887573, + "epoch": 0.010353008843195054, + "kl_loss": 3816.482421875, + "loss_ib": 38.22829818725586, + "step": 36 + }, + { + "ce_ib": 67.63275909423828, + "ce_orig": 1.4331247806549072, + "epoch": 0.010353008843195054, + "kl_loss": 3869.70654296875, + "loss_ib": 38.76469802856445, + "step": 36 + }, + { + "ce_ib": 62.58089065551758, + "ce_orig": 0.6857898235321045, + "epoch": 0.010640592422172693, + "kl_loss": 3316.5986328125, + "loss_ib": 33.22856521606445, + "step": 37 + }, + { + "ce_ib": 66.71737670898438, + "ce_orig": 1.6872270107269287, + "epoch": 0.010640592422172693, + "kl_loss": 3741.76953125, + "loss_ib": 37.48440933227539, + "step": 37 + }, + { + "ce_ib": 64.51302337646484, + "ce_orig": 1.0037118196487427, + "epoch": 0.010640592422172693, + "kl_loss": 4072.7705078125, + "loss_ib": 40.79221725463867, + "step": 37 + }, + { + "ce_ib": 64.44024658203125, + "ce_orig": 1.0666587352752686, + "epoch": 0.010640592422172693, + "kl_loss": 3476.2373046875, + "loss_ib": 34.826812744140625, + "step": 37 + }, + { + "ce_ib": 63.56709289550781, + "ce_orig": 0.6757309436798096, + "epoch": 0.010928176001150335, + "kl_loss": 3920.34814453125, + "loss_ib": 39.26704788208008, + "step": 38 + }, + { + "ce_ib": 66.18359375, + "ce_orig": 1.389379620552063, + "epoch": 0.010928176001150335, + "kl_loss": 3573.64013671875, + "loss_ib": 35.80258560180664, + "step": 38 + }, + { + "ce_ib": 65.27085876464844, + "ce_orig": 0.9928706884384155, + "epoch": 0.010928176001150335, + "kl_loss": 3934.83349609375, + "loss_ib": 39.413604736328125, + "step": 38 + }, + { + "ce_ib": 62.60868453979492, + "ce_orig": 0.5065615773200989, + "epoch": 0.010928176001150335, + "kl_loss": 3360.3466796875, + "loss_ib": 33.66607666015625, + "step": 38 + }, + { + "ce_ib": 63.16704177856445, + "ce_orig": 0.6447534561157227, + "epoch": 0.011215759580127974, + "kl_loss": 3981.157470703125, + "loss_ib": 39.87474060058594, + "step": 39 + }, + { + "ce_ib": 66.88977813720703, + "ce_orig": 1.1577696800231934, + "epoch": 0.011215759580127974, + "kl_loss": 3895.958984375, + "loss_ib": 39.0264778137207, + "step": 39 + }, + { + "ce_ib": 66.50093841552734, + "ce_orig": 1.4465612173080444, + "epoch": 0.011215759580127974, + "kl_loss": 3656.223388671875, + "loss_ib": 36.62873458862305, + "step": 39 + }, + { + "ce_ib": 63.415382385253906, + "ce_orig": 0.7691327929496765, + "epoch": 0.011215759580127974, + "kl_loss": 4074.531005859375, + "loss_ib": 40.80872344970703, + "step": 39 + }, + { + "epoch": 0.011503343159105615, + "grad_norm": 543.6448364257812, + "learning_rate": 1.0828025477707007e-06, + "loss": 38.3393, + "step": 40 + }, + { + "ce_ib": 68.20819091796875, + "ce_orig": 1.7859582901000977, + "epoch": 0.011503343159105615, + "kl_loss": 3490.180908203125, + "loss_ib": 34.97001647949219, + "step": 40 + }, + { + "ce_ib": 63.85101318359375, + "ce_orig": 0.798017144203186, + "epoch": 0.011503343159105615, + "kl_loss": 3664.87158203125, + "loss_ib": 36.71256637573242, + "step": 40 + }, + { + "ce_ib": 65.26078033447266, + "ce_orig": 1.3482457399368286, + "epoch": 0.011503343159105615, + "kl_loss": 3666.393310546875, + "loss_ib": 36.72919464111328, + "step": 40 + }, + { + "ce_ib": 64.7423324584961, + "ce_orig": 1.047332763671875, + "epoch": 0.011503343159105615, + "kl_loss": 3898.302734375, + "loss_ib": 39.047767639160156, + "step": 40 + }, + { + "ce_ib": 63.11514663696289, + "ce_orig": 0.707227349281311, + "epoch": 0.011790926738083256, + "kl_loss": 3996.748046875, + "loss_ib": 40.03059387207031, + "step": 41 + }, + { + "ce_ib": 62.858245849609375, + "ce_orig": 0.7572628259658813, + "epoch": 0.011790926738083256, + "kl_loss": 3845.11328125, + "loss_ib": 38.51399230957031, + "step": 41 + }, + { + "ce_ib": 64.94149780273438, + "ce_orig": 0.9001584649085999, + "epoch": 0.011790926738083256, + "kl_loss": 3669.0107421875, + "loss_ib": 36.75504684448242, + "step": 41 + }, + { + "ce_ib": 64.22615051269531, + "ce_orig": 0.9319191575050354, + "epoch": 0.011790926738083256, + "kl_loss": 3406.089599609375, + "loss_ib": 34.1251220703125, + "step": 41 + }, + { + "ce_ib": 66.28656005859375, + "ce_orig": 1.7123758792877197, + "epoch": 0.012078510317060896, + "kl_loss": 3858.2216796875, + "loss_ib": 38.648502349853516, + "step": 42 + }, + { + "ce_ib": 63.06233215332031, + "ce_orig": 0.7600352764129639, + "epoch": 0.012078510317060896, + "kl_loss": 3817.84375, + "loss_ib": 38.24149703979492, + "step": 42 + }, + { + "ce_ib": 62.57767868041992, + "ce_orig": 0.9215527772903442, + "epoch": 0.012078510317060896, + "kl_loss": 3511.0380859375, + "loss_ib": 35.17295837402344, + "step": 42 + }, + { + "ce_ib": 64.01197814941406, + "ce_orig": 0.6495408415794373, + "epoch": 0.012078510317060896, + "kl_loss": 3883.279541015625, + "loss_ib": 38.89680480957031, + "step": 42 + }, + { + "ce_ib": 63.950992584228516, + "ce_orig": 0.8470758199691772, + "epoch": 0.012366093896038537, + "kl_loss": 3836.8623046875, + "loss_ib": 38.43257141113281, + "step": 43 + }, + { + "ce_ib": 66.46541595458984, + "ce_orig": 1.070137619972229, + "epoch": 0.012366093896038537, + "kl_loss": 3574.319580078125, + "loss_ib": 35.80965805053711, + "step": 43 + }, + { + "ce_ib": 61.230316162109375, + "ce_orig": 0.6914916634559631, + "epoch": 0.012366093896038537, + "kl_loss": 4103.490234375, + "loss_ib": 41.09613037109375, + "step": 43 + }, + { + "ce_ib": 65.71780395507812, + "ce_orig": 1.2423909902572632, + "epoch": 0.012366093896038537, + "kl_loss": 3505.72607421875, + "loss_ib": 35.12297821044922, + "step": 43 + }, + { + "ce_ib": 63.5611572265625, + "ce_orig": 0.9509873986244202, + "epoch": 0.012653677475016176, + "kl_loss": 3528.08349609375, + "loss_ib": 35.34439468383789, + "step": 44 + }, + { + "ce_ib": 61.90439987182617, + "ce_orig": 1.0406547784805298, + "epoch": 0.012653677475016176, + "kl_loss": 3284.989501953125, + "loss_ib": 32.911800384521484, + "step": 44 + }, + { + "ce_ib": 62.566444396972656, + "ce_orig": 0.9737301468849182, + "epoch": 0.012653677475016176, + "kl_loss": 4007.912841796875, + "loss_ib": 40.141693115234375, + "step": 44 + }, + { + "ce_ib": 62.55556869506836, + "ce_orig": 0.9186174273490906, + "epoch": 0.012653677475016176, + "kl_loss": 3678.3505859375, + "loss_ib": 36.84606170654297, + "step": 44 + }, + { + "epoch": 0.012941261053993817, + "grad_norm": 492.6309509277344, + "learning_rate": 1.2420382165605097e-06, + "loss": 37.2694, + "step": 45 + }, + { + "ce_ib": 63.57815170288086, + "ce_orig": 1.3040772676467896, + "epoch": 0.012941261053993817, + "kl_loss": 3878.02587890625, + "loss_ib": 38.84383773803711, + "step": 45 + }, + { + "ce_ib": 61.98274230957031, + "ce_orig": 0.6795246601104736, + "epoch": 0.012941261053993817, + "kl_loss": 3593.56640625, + "loss_ib": 35.99764633178711, + "step": 45 + }, + { + "ce_ib": 63.4442138671875, + "ce_orig": 0.9516732692718506, + "epoch": 0.012941261053993817, + "kl_loss": 3920.56982421875, + "loss_ib": 39.269142150878906, + "step": 45 + }, + { + "ce_ib": 64.35120391845703, + "ce_orig": 0.7613200545310974, + "epoch": 0.012941261053993817, + "kl_loss": 3579.57763671875, + "loss_ib": 35.86012649536133, + "step": 45 + }, + { + "ce_ib": 62.913352966308594, + "ce_orig": 1.0408005714416504, + "epoch": 0.013228844632971457, + "kl_loss": 3620.77099609375, + "loss_ib": 36.27062225341797, + "step": 46 + }, + { + "ce_ib": 63.58440399169922, + "ce_orig": 0.8055190443992615, + "epoch": 0.013228844632971457, + "kl_loss": 3386.48876953125, + "loss_ib": 33.928470611572266, + "step": 46 + }, + { + "ce_ib": 63.420753479003906, + "ce_orig": 1.1024976968765259, + "epoch": 0.013228844632971457, + "kl_loss": 3643.9453125, + "loss_ib": 36.502872467041016, + "step": 46 + }, + { + "ce_ib": 62.746665954589844, + "ce_orig": 0.7064395546913147, + "epoch": 0.013228844632971457, + "kl_loss": 3931.369873046875, + "loss_ib": 39.37644577026367, + "step": 46 + }, + { + "ce_ib": 63.00592041015625, + "ce_orig": 0.8414040803909302, + "epoch": 0.013516428211949098, + "kl_loss": 4138.1728515625, + "loss_ib": 41.44473648071289, + "step": 47 + }, + { + "ce_ib": 66.07843017578125, + "ce_orig": 0.8475580811500549, + "epoch": 0.013516428211949098, + "kl_loss": 3865.19287109375, + "loss_ib": 38.7180061340332, + "step": 47 + }, + { + "ce_ib": 63.92705535888672, + "ce_orig": 0.9875443577766418, + "epoch": 0.013516428211949098, + "kl_loss": 4158.44189453125, + "loss_ib": 41.64834213256836, + "step": 47 + }, + { + "ce_ib": 68.14057922363281, + "ce_orig": 1.756430745124817, + "epoch": 0.013516428211949098, + "kl_loss": 3738.97314453125, + "loss_ib": 37.45787048339844, + "step": 47 + }, + { + "ce_ib": 63.9988899230957, + "ce_orig": 0.8397009968757629, + "epoch": 0.013804011790926739, + "kl_loss": 3744.52294921875, + "loss_ib": 37.50922775268555, + "step": 48 + }, + { + "ce_ib": 64.73321533203125, + "ce_orig": 1.5420986413955688, + "epoch": 0.013804011790926739, + "kl_loss": 3818.59228515625, + "loss_ib": 38.25065612792969, + "step": 48 + }, + { + "ce_ib": 64.00019073486328, + "ce_orig": 0.5949701070785522, + "epoch": 0.013804011790926739, + "kl_loss": 3568.99609375, + "loss_ib": 35.75395965576172, + "step": 48 + }, + { + "ce_ib": 64.06549072265625, + "ce_orig": 1.3993630409240723, + "epoch": 0.013804011790926739, + "kl_loss": 2974.96728515625, + "loss_ib": 29.813737869262695, + "step": 48 + }, + { + "ce_ib": 64.43647766113281, + "ce_orig": 1.1406134366989136, + "epoch": 0.014091595369904378, + "kl_loss": 3809.33447265625, + "loss_ib": 38.157779693603516, + "step": 49 + }, + { + "ce_ib": 61.58470153808594, + "ce_orig": 0.76979660987854, + "epoch": 0.014091595369904378, + "kl_loss": 4037.28759765625, + "loss_ib": 40.4344596862793, + "step": 49 + }, + { + "ce_ib": 62.92927551269531, + "ce_orig": 1.3724863529205322, + "epoch": 0.014091595369904378, + "kl_loss": 3676.944580078125, + "loss_ib": 36.832374572753906, + "step": 49 + }, + { + "ce_ib": 62.875492095947266, + "ce_orig": 0.6223806142807007, + "epoch": 0.014091595369904378, + "kl_loss": 3579.248046875, + "loss_ib": 35.85535430908203, + "step": 49 + }, + { + "epoch": 0.01437917894888202, + "grad_norm": 533.30029296875, + "learning_rate": 1.4012738853503185e-06, + "loss": 37.7487, + "step": 50 + }, + { + "ce_ib": 61.585453033447266, + "ce_orig": 0.8775674104690552, + "epoch": 0.01437917894888202, + "kl_loss": 3878.78076171875, + "loss_ib": 38.84939193725586, + "step": 50 + }, + { + "ce_ib": 58.95383834838867, + "ce_orig": 0.7002028822898865, + "epoch": 0.01437917894888202, + "kl_loss": 3601.2216796875, + "loss_ib": 36.0711669921875, + "step": 50 + }, + { + "ce_ib": 62.58400344848633, + "ce_orig": 0.7227221131324768, + "epoch": 0.01437917894888202, + "kl_loss": 3643.05615234375, + "loss_ib": 36.49314498901367, + "step": 50 + }, + { + "ce_ib": 66.29362487792969, + "ce_orig": 1.0485941171646118, + "epoch": 0.01437917894888202, + "kl_loss": 3717.5888671875, + "loss_ib": 37.242183685302734, + "step": 50 + }, + { + "ce_ib": 61.591148376464844, + "ce_orig": 0.6134757995605469, + "epoch": 0.014666762527859659, + "kl_loss": 4068.88330078125, + "loss_ib": 40.750423431396484, + "step": 51 + }, + { + "ce_ib": 64.17617797851562, + "ce_orig": 1.2959325313568115, + "epoch": 0.014666762527859659, + "kl_loss": 3517.951171875, + "loss_ib": 35.24368667602539, + "step": 51 + }, + { + "ce_ib": 61.61008834838867, + "ce_orig": 0.6165804862976074, + "epoch": 0.014666762527859659, + "kl_loss": 4074.12744140625, + "loss_ib": 40.80288314819336, + "step": 51 + }, + { + "ce_ib": 63.430118560791016, + "ce_orig": 1.3455349206924438, + "epoch": 0.014666762527859659, + "kl_loss": 3925.92333984375, + "loss_ib": 39.322662353515625, + "step": 51 + }, + { + "ce_ib": 63.63911819458008, + "ce_orig": 0.813752293586731, + "epoch": 0.0149543461068373, + "kl_loss": 3411.76806640625, + "loss_ib": 34.18132019042969, + "step": 52 + }, + { + "ce_ib": 62.01913833618164, + "ce_orig": 0.9041391611099243, + "epoch": 0.0149543461068373, + "kl_loss": 3552.44091796875, + "loss_ib": 35.58642578125, + "step": 52 + }, + { + "ce_ib": 63.11591339111328, + "ce_orig": 0.8099521994590759, + "epoch": 0.0149543461068373, + "kl_loss": 3363.9013671875, + "loss_ib": 33.70212936401367, + "step": 52 + }, + { + "ce_ib": 63.846641540527344, + "ce_orig": 1.0799516439437866, + "epoch": 0.0149543461068373, + "kl_loss": 3810.20947265625, + "loss_ib": 38.16594314575195, + "step": 52 + }, + { + "ce_ib": 63.17069625854492, + "ce_orig": 1.2767831087112427, + "epoch": 0.015241929685814939, + "kl_loss": 3672.79248046875, + "loss_ib": 36.79109573364258, + "step": 53 + }, + { + "ce_ib": 60.14902877807617, + "ce_orig": 0.5526849627494812, + "epoch": 0.015241929685814939, + "kl_loss": 2977.911376953125, + "loss_ib": 29.839262008666992, + "step": 53 + }, + { + "ce_ib": 63.31485366821289, + "ce_orig": 0.7787724137306213, + "epoch": 0.015241929685814939, + "kl_loss": 3514.63232421875, + "loss_ib": 35.20963668823242, + "step": 53 + }, + { + "ce_ib": 64.56353759765625, + "ce_orig": 1.6204540729522705, + "epoch": 0.015241929685814939, + "kl_loss": 3812.165771484375, + "loss_ib": 38.18621826171875, + "step": 53 + }, + { + "ce_ib": 66.47161865234375, + "ce_orig": 1.488782525062561, + "epoch": 0.01552951326479258, + "kl_loss": 3656.353759765625, + "loss_ib": 36.630008697509766, + "step": 54 + }, + { + "ce_ib": 63.27266311645508, + "ce_orig": 1.2602483034133911, + "epoch": 0.01552951326479258, + "kl_loss": 3836.474609375, + "loss_ib": 38.428016662597656, + "step": 54 + }, + { + "ce_ib": 61.19478225708008, + "ce_orig": 0.9387843608856201, + "epoch": 0.01552951326479258, + "kl_loss": 3736.385009765625, + "loss_ib": 37.425045013427734, + "step": 54 + }, + { + "ce_ib": 62.278865814208984, + "ce_orig": 0.6224288940429688, + "epoch": 0.01552951326479258, + "kl_loss": 3810.37646484375, + "loss_ib": 38.16604232788086, + "step": 54 + }, + { + "epoch": 0.01581709684377022, + "grad_norm": 496.5709533691406, + "learning_rate": 1.5605095541401275e-06, + "loss": 37.2354, + "step": 55 + }, + { + "ce_ib": 64.26878356933594, + "ce_orig": 0.8660982847213745, + "epoch": 0.01581709684377022, + "kl_loss": 3508.06201171875, + "loss_ib": 35.1448860168457, + "step": 55 + }, + { + "ce_ib": 58.63752365112305, + "ce_orig": 0.08659573644399643, + "epoch": 0.01581709684377022, + "kl_loss": 513.92724609375, + "loss_ib": 5.197909832000732, + "step": 55 + }, + { + "ce_ib": 61.785953521728516, + "ce_orig": 0.9901527762413025, + "epoch": 0.01581709684377022, + "kl_loss": 3603.44580078125, + "loss_ib": 36.09624481201172, + "step": 55 + }, + { + "ce_ib": 64.46088409423828, + "ce_orig": 0.8389644026756287, + "epoch": 0.01581709684377022, + "kl_loss": 3921.08642578125, + "loss_ib": 39.27532196044922, + "step": 55 + }, + { + "ce_ib": 60.15986251831055, + "ce_orig": 0.8044717311859131, + "epoch": 0.01610468042274786, + "kl_loss": 3653.1875, + "loss_ib": 36.59203338623047, + "step": 56 + }, + { + "ce_ib": 62.024410247802734, + "ce_orig": 0.6691257953643799, + "epoch": 0.01610468042274786, + "kl_loss": 4127.29736328125, + "loss_ib": 41.334999084472656, + "step": 56 + }, + { + "ce_ib": 62.78162384033203, + "ce_orig": 0.9230839014053345, + "epoch": 0.01610468042274786, + "kl_loss": 3697.928955078125, + "loss_ib": 37.04206848144531, + "step": 56 + }, + { + "ce_ib": 64.67729949951172, + "ce_orig": 1.3531347513198853, + "epoch": 0.01610468042274786, + "kl_loss": 3432.21728515625, + "loss_ib": 34.38684844970703, + "step": 56 + }, + { + "ce_ib": 61.56424331665039, + "ce_orig": 0.8097767233848572, + "epoch": 0.016392264001725502, + "kl_loss": 3895.91943359375, + "loss_ib": 39.02075958251953, + "step": 57 + }, + { + "ce_ib": 62.23572540283203, + "ce_orig": 0.5408704876899719, + "epoch": 0.016392264001725502, + "kl_loss": 3602.846923828125, + "loss_ib": 36.09070587158203, + "step": 57 + }, + { + "ce_ib": 63.37240982055664, + "ce_orig": 0.7751593589782715, + "epoch": 0.016392264001725502, + "kl_loss": 3264.66650390625, + "loss_ib": 32.71003723144531, + "step": 57 + }, + { + "ce_ib": 64.54995727539062, + "ce_orig": 1.2101812362670898, + "epoch": 0.016392264001725502, + "kl_loss": 3891.133544921875, + "loss_ib": 38.97588348388672, + "step": 57 + }, + { + "ce_ib": 64.34214782714844, + "ce_orig": 1.1633491516113281, + "epoch": 0.016679847580703143, + "kl_loss": 3507.682861328125, + "loss_ib": 35.141170501708984, + "step": 58 + }, + { + "ce_ib": 64.65531921386719, + "ce_orig": 1.0707935094833374, + "epoch": 0.016679847580703143, + "kl_loss": 3560.9189453125, + "loss_ib": 35.67384338378906, + "step": 58 + }, + { + "ce_ib": 62.398475646972656, + "ce_orig": 1.028975009918213, + "epoch": 0.016679847580703143, + "kl_loss": 3907.4013671875, + "loss_ib": 39.136409759521484, + "step": 58 + }, + { + "ce_ib": 62.34195327758789, + "ce_orig": 1.025146484375, + "epoch": 0.016679847580703143, + "kl_loss": 3447.82470703125, + "loss_ib": 34.54058837890625, + "step": 58 + }, + { + "ce_ib": 64.7926254272461, + "ce_orig": 1.7371116876602173, + "epoch": 0.01696743115968078, + "kl_loss": 3368.210205078125, + "loss_ib": 33.74689483642578, + "step": 59 + }, + { + "ce_ib": 63.827476501464844, + "ce_orig": 1.2639371156692505, + "epoch": 0.01696743115968078, + "kl_loss": 3764.4599609375, + "loss_ib": 37.70842742919922, + "step": 59 + }, + { + "ce_ib": 60.48318099975586, + "ce_orig": 0.5967444181442261, + "epoch": 0.01696743115968078, + "kl_loss": 3661.50927734375, + "loss_ib": 36.675575256347656, + "step": 59 + }, + { + "ce_ib": 62.90448760986328, + "ce_orig": 0.8884239792823792, + "epoch": 0.01696743115968078, + "kl_loss": 3642.8115234375, + "loss_ib": 36.49102020263672, + "step": 59 + }, + { + "epoch": 0.01725501473865842, + "grad_norm": 521.4768676757812, + "learning_rate": 1.7197452229299363e-06, + "loss": 37.203, + "step": 60 + }, + { + "ce_ib": 62.17192077636719, + "ce_orig": 0.7363674640655518, + "epoch": 0.01725501473865842, + "kl_loss": 3631.49609375, + "loss_ib": 36.377132415771484, + "step": 60 + }, + { + "ce_ib": 63.342933654785156, + "ce_orig": 1.0785236358642578, + "epoch": 0.01725501473865842, + "kl_loss": 3575.03369140625, + "loss_ib": 35.81367874145508, + "step": 60 + }, + { + "ce_ib": 61.84444046020508, + "ce_orig": 0.8591632843017578, + "epoch": 0.01725501473865842, + "kl_loss": 3692.08203125, + "loss_ib": 36.982662200927734, + "step": 60 + }, + { + "ce_ib": 63.40077209472656, + "ce_orig": 1.3737884759902954, + "epoch": 0.01725501473865842, + "kl_loss": 3226.7734375, + "loss_ib": 32.33113479614258, + "step": 60 + }, + { + "ce_ib": 60.77010726928711, + "ce_orig": 0.5792077779769897, + "epoch": 0.017542598317636063, + "kl_loss": 3807.59423828125, + "loss_ib": 38.13671112060547, + "step": 61 + }, + { + "ce_ib": 65.03406524658203, + "ce_orig": 1.6079394817352295, + "epoch": 0.017542598317636063, + "kl_loss": 3764.595703125, + "loss_ib": 37.71099090576172, + "step": 61 + }, + { + "ce_ib": 62.88136672973633, + "ce_orig": 0.9871428608894348, + "epoch": 0.017542598317636063, + "kl_loss": 3831.7412109375, + "loss_ib": 38.38029098510742, + "step": 61 + }, + { + "ce_ib": 62.55516815185547, + "ce_orig": 0.7838013172149658, + "epoch": 0.017542598317636063, + "kl_loss": 3860.99755859375, + "loss_ib": 38.67253112792969, + "step": 61 + }, + { + "ce_ib": 66.16559600830078, + "ce_orig": 1.8575478792190552, + "epoch": 0.017830181896613704, + "kl_loss": 3639.782958984375, + "loss_ib": 36.463993072509766, + "step": 62 + }, + { + "ce_ib": 59.800865173339844, + "ce_orig": 0.7499110698699951, + "epoch": 0.017830181896613704, + "kl_loss": 2505.564453125, + "loss_ib": 25.11544418334961, + "step": 62 + }, + { + "ce_ib": 64.14867401123047, + "ce_orig": 1.4294443130493164, + "epoch": 0.017830181896613704, + "kl_loss": 3695.159423828125, + "loss_ib": 37.015743255615234, + "step": 62 + }, + { + "ce_ib": 59.92710494995117, + "ce_orig": 0.7166628241539001, + "epoch": 0.017830181896613704, + "kl_loss": 3911.38427734375, + "loss_ib": 39.17376708984375, + "step": 62 + }, + { + "ce_ib": 64.37249755859375, + "ce_orig": 1.247324824333191, + "epoch": 0.018117765475591345, + "kl_loss": 3376.881103515625, + "loss_ib": 33.83318328857422, + "step": 63 + }, + { + "ce_ib": 62.949920654296875, + "ce_orig": 0.5989828705787659, + "epoch": 0.018117765475591345, + "kl_loss": 3794.5458984375, + "loss_ib": 38.00840759277344, + "step": 63 + }, + { + "ce_ib": 65.01542663574219, + "ce_orig": 1.4691771268844604, + "epoch": 0.018117765475591345, + "kl_loss": 3403.2939453125, + "loss_ib": 34.09795379638672, + "step": 63 + }, + { + "ce_ib": 61.29307556152344, + "ce_orig": 0.7313998937606812, + "epoch": 0.018117765475591345, + "kl_loss": 3450.58984375, + "loss_ib": 34.56719207763672, + "step": 63 + }, + { + "ce_ib": 60.609066009521484, + "ce_orig": 0.7843332886695862, + "epoch": 0.018405349054568983, + "kl_loss": 3731.23095703125, + "loss_ib": 37.37291717529297, + "step": 64 + }, + { + "ce_ib": 62.530723571777344, + "ce_orig": 0.6766409873962402, + "epoch": 0.018405349054568983, + "kl_loss": 3585.4892578125, + "loss_ib": 35.917423248291016, + "step": 64 + }, + { + "ce_ib": 59.86486053466797, + "ce_orig": 0.5166366696357727, + "epoch": 0.018405349054568983, + "kl_loss": 3537.461181640625, + "loss_ib": 35.43447494506836, + "step": 64 + }, + { + "ce_ib": 62.64058303833008, + "ce_orig": 0.9777031540870667, + "epoch": 0.018405349054568983, + "kl_loss": 3730.766845703125, + "loss_ib": 37.37030792236328, + "step": 64 + }, + { + "epoch": 0.018692932633546624, + "grad_norm": 518.4480590820312, + "learning_rate": 1.8789808917197455e-06, + "loss": 36.8274, + "step": 65 + }, + { + "ce_ib": 62.54412078857422, + "ce_orig": 0.7984204888343811, + "epoch": 0.018692932633546624, + "kl_loss": 3530.49755859375, + "loss_ib": 35.36751937866211, + "step": 65 + }, + { + "ce_ib": 60.793087005615234, + "ce_orig": 0.7615864276885986, + "epoch": 0.018692932633546624, + "kl_loss": 3840.51513671875, + "loss_ib": 38.4659423828125, + "step": 65 + }, + { + "ce_ib": 60.25053787231445, + "ce_orig": 0.9209924340248108, + "epoch": 0.018692932633546624, + "kl_loss": 3897.0556640625, + "loss_ib": 39.03080749511719, + "step": 65 + }, + { + "ce_ib": 63.16765594482422, + "ce_orig": 1.3678812980651855, + "epoch": 0.018692932633546624, + "kl_loss": 3938.2587890625, + "loss_ib": 39.44575500488281, + "step": 65 + }, + { + "ce_ib": 65.65672302246094, + "ce_orig": 1.016355037689209, + "epoch": 0.018980516212524265, + "kl_loss": 3797.2421875, + "loss_ib": 38.03807830810547, + "step": 66 + }, + { + "ce_ib": 61.379974365234375, + "ce_orig": 1.0548150539398193, + "epoch": 0.018980516212524265, + "kl_loss": 3564.20751953125, + "loss_ib": 35.703453063964844, + "step": 66 + }, + { + "ce_ib": 62.37001419067383, + "ce_orig": 0.9475827217102051, + "epoch": 0.018980516212524265, + "kl_loss": 3571.18115234375, + "loss_ib": 35.7741813659668, + "step": 66 + }, + { + "ce_ib": 60.9146728515625, + "ce_orig": 0.784809947013855, + "epoch": 0.018980516212524265, + "kl_loss": 3582.17822265625, + "loss_ib": 35.88269805908203, + "step": 66 + }, + { + "ce_ib": 63.815486907958984, + "ce_orig": 1.3292514085769653, + "epoch": 0.019268099791501906, + "kl_loss": 3440.73779296875, + "loss_ib": 34.47119140625, + "step": 67 + }, + { + "ce_ib": 62.98185348510742, + "ce_orig": 0.7911183834075928, + "epoch": 0.019268099791501906, + "kl_loss": 4040.31640625, + "loss_ib": 40.46614456176758, + "step": 67 + }, + { + "ce_ib": 65.0182876586914, + "ce_orig": 0.7907848358154297, + "epoch": 0.019268099791501906, + "kl_loss": 3936.2587890625, + "loss_ib": 39.42760467529297, + "step": 67 + }, + { + "ce_ib": 63.88326644897461, + "ce_orig": 1.092854380607605, + "epoch": 0.019268099791501906, + "kl_loss": 1859.8902587890625, + "loss_ib": 18.662784576416016, + "step": 67 + }, + { + "ce_ib": 61.63788604736328, + "ce_orig": 0.5948306322097778, + "epoch": 0.019555683370479547, + "kl_loss": 3881.165771484375, + "loss_ib": 38.873294830322266, + "step": 68 + }, + { + "ce_ib": 60.62575912475586, + "ce_orig": 0.5677074193954468, + "epoch": 0.019555683370479547, + "kl_loss": 3741.05078125, + "loss_ib": 37.47113037109375, + "step": 68 + }, + { + "ce_ib": 60.886016845703125, + "ce_orig": 0.5185374617576599, + "epoch": 0.019555683370479547, + "kl_loss": 3542.03857421875, + "loss_ib": 35.48126983642578, + "step": 68 + }, + { + "ce_ib": 60.4649543762207, + "ce_orig": 0.4317881166934967, + "epoch": 0.019555683370479547, + "kl_loss": 3231.02587890625, + "loss_ib": 32.370723724365234, + "step": 68 + }, + { + "ce_ib": 63.6849365234375, + "ce_orig": 1.51223886013031, + "epoch": 0.019843266949457185, + "kl_loss": 3498.36083984375, + "loss_ib": 35.04729080200195, + "step": 69 + }, + { + "ce_ib": 63.855262756347656, + "ce_orig": 2.3971757888793945, + "epoch": 0.019843266949457185, + "kl_loss": 3531.71630859375, + "loss_ib": 35.38101577758789, + "step": 69 + }, + { + "ce_ib": 61.03330993652344, + "ce_orig": 1.2093490362167358, + "epoch": 0.019843266949457185, + "kl_loss": 3718.281494140625, + "loss_ib": 37.24384689331055, + "step": 69 + }, + { + "ce_ib": 64.49278259277344, + "ce_orig": 1.474419355392456, + "epoch": 0.019843266949457185, + "kl_loss": 3354.119140625, + "loss_ib": 33.605682373046875, + "step": 69 + }, + { + "epoch": 0.020130850528434826, + "grad_norm": 502.4320068359375, + "learning_rate": 2.0382165605095544e-06, + "loss": 36.6694, + "step": 70 + }, + { + "ce_ib": 62.60323715209961, + "ce_orig": 0.8422316312789917, + "epoch": 0.020130850528434826, + "kl_loss": 3720.64892578125, + "loss_ib": 37.26909255981445, + "step": 70 + }, + { + "ce_ib": 61.0345573425293, + "ce_orig": 0.4435622990131378, + "epoch": 0.020130850528434826, + "kl_loss": 3550.4306640625, + "loss_ib": 35.56534194946289, + "step": 70 + }, + { + "ce_ib": 62.675987243652344, + "ce_orig": 1.0512957572937012, + "epoch": 0.020130850528434826, + "kl_loss": 3372.498046875, + "loss_ib": 33.787654876708984, + "step": 70 + }, + { + "ce_ib": 63.02972412109375, + "ce_orig": 0.8881096839904785, + "epoch": 0.020130850528434826, + "kl_loss": 3634.5400390625, + "loss_ib": 36.40842819213867, + "step": 70 + }, + { + "ce_ib": 61.19450759887695, + "ce_orig": 0.8308007717132568, + "epoch": 0.020418434107412467, + "kl_loss": 3668.546875, + "loss_ib": 36.74666213989258, + "step": 71 + }, + { + "ce_ib": 62.893211364746094, + "ce_orig": 0.6296738982200623, + "epoch": 0.020418434107412467, + "kl_loss": 3531.10107421875, + "loss_ib": 35.3739013671875, + "step": 71 + }, + { + "ce_ib": 61.59178924560547, + "ce_orig": 0.9227543473243713, + "epoch": 0.020418434107412467, + "kl_loss": 3549.591552734375, + "loss_ib": 35.5575065612793, + "step": 71 + }, + { + "ce_ib": 60.22856140136719, + "ce_orig": 0.6385669708251953, + "epoch": 0.020418434107412467, + "kl_loss": 3632.27978515625, + "loss_ib": 36.383026123046875, + "step": 71 + }, + { + "ce_ib": 64.18019104003906, + "ce_orig": 1.6183723211288452, + "epoch": 0.020706017686390108, + "kl_loss": 3154.05224609375, + "loss_ib": 31.60470199584961, + "step": 72 + }, + { + "ce_ib": 60.683284759521484, + "ce_orig": 0.7608581185340881, + "epoch": 0.020706017686390108, + "kl_loss": 3858.5693359375, + "loss_ib": 38.64637756347656, + "step": 72 + }, + { + "ce_ib": 60.50144577026367, + "ce_orig": 0.7999545335769653, + "epoch": 0.020706017686390108, + "kl_loss": 3660.2255859375, + "loss_ib": 36.66275405883789, + "step": 72 + }, + { + "ce_ib": 61.18289566040039, + "ce_orig": 1.1362156867980957, + "epoch": 0.020706017686390108, + "kl_loss": 3251.9755859375, + "loss_ib": 32.58094024658203, + "step": 72 + }, + { + "ce_ib": 61.33281326293945, + "ce_orig": 0.6669225096702576, + "epoch": 0.02099360126536775, + "kl_loss": 3684.797119140625, + "loss_ib": 36.9093017578125, + "step": 73 + }, + { + "ce_ib": 59.9718017578125, + "ce_orig": 0.9883142113685608, + "epoch": 0.02099360126536775, + "kl_loss": 3631.879638671875, + "loss_ib": 36.37876510620117, + "step": 73 + }, + { + "ce_ib": 63.71513748168945, + "ce_orig": 1.3611314296722412, + "epoch": 0.02099360126536775, + "kl_loss": 3757.900146484375, + "loss_ib": 37.64271545410156, + "step": 73 + }, + { + "ce_ib": 61.1561164855957, + "ce_orig": 0.8598636388778687, + "epoch": 0.02099360126536775, + "kl_loss": 3841.884033203125, + "loss_ib": 38.47999572753906, + "step": 73 + }, + { + "ce_ib": 61.85321044921875, + "ce_orig": 0.8115242719650269, + "epoch": 0.021281184844345387, + "kl_loss": 3850.6259765625, + "loss_ib": 38.568111419677734, + "step": 74 + }, + { + "ce_ib": 58.246185302734375, + "ce_orig": 0.5221734046936035, + "epoch": 0.021281184844345387, + "kl_loss": 3487.754150390625, + "loss_ib": 34.935787200927734, + "step": 74 + }, + { + "ce_ib": 61.469078063964844, + "ce_orig": 0.7319106459617615, + "epoch": 0.021281184844345387, + "kl_loss": 4135.6591796875, + "loss_ib": 41.418060302734375, + "step": 74 + }, + { + "ce_ib": 61.441585540771484, + "ce_orig": 0.9860128164291382, + "epoch": 0.021281184844345387, + "kl_loss": 3332.26318359375, + "loss_ib": 33.384071350097656, + "step": 74 + }, + { + "epoch": 0.021568768423323028, + "grad_norm": 524.3435668945312, + "learning_rate": 2.1974522292993634e-06, + "loss": 36.7901, + "step": 75 + }, + { + "ce_ib": 60.86553192138672, + "ce_orig": 0.6997863054275513, + "epoch": 0.021568768423323028, + "kl_loss": 3372.901123046875, + "loss_ib": 33.789878845214844, + "step": 75 + }, + { + "ce_ib": 62.65220260620117, + "ce_orig": 1.4506391286849976, + "epoch": 0.021568768423323028, + "kl_loss": 3574.6826171875, + "loss_ib": 35.80947494506836, + "step": 75 + }, + { + "ce_ib": 61.273311614990234, + "ce_orig": 0.7289857864379883, + "epoch": 0.021568768423323028, + "kl_loss": 3926.74658203125, + "loss_ib": 39.328739166259766, + "step": 75 + }, + { + "ce_ib": 60.932491302490234, + "ce_orig": 0.6170489192008972, + "epoch": 0.021568768423323028, + "kl_loss": 3798.67724609375, + "loss_ib": 38.04770278930664, + "step": 75 + }, + { + "ce_ib": 60.94731140136719, + "ce_orig": 1.1426844596862793, + "epoch": 0.02185635200230067, + "kl_loss": 3991.97900390625, + "loss_ib": 39.980735778808594, + "step": 76 + }, + { + "ce_ib": 59.70903396606445, + "ce_orig": 0.753753125667572, + "epoch": 0.02185635200230067, + "kl_loss": 3774.1162109375, + "loss_ib": 37.80086898803711, + "step": 76 + }, + { + "ce_ib": 60.583736419677734, + "ce_orig": 1.2148487567901611, + "epoch": 0.02185635200230067, + "kl_loss": 3596.19140625, + "loss_ib": 36.02249526977539, + "step": 76 + }, + { + "ce_ib": 61.524559020996094, + "ce_orig": 1.4590885639190674, + "epoch": 0.02185635200230067, + "kl_loss": 4056.229248046875, + "loss_ib": 40.62381362915039, + "step": 76 + }, + { + "ce_ib": 60.858001708984375, + "ce_orig": 1.2687323093414307, + "epoch": 0.02214393558127831, + "kl_loss": 3721.635009765625, + "loss_ib": 37.27720642089844, + "step": 77 + }, + { + "ce_ib": 60.991886138916016, + "ce_orig": 0.7511414289474487, + "epoch": 0.02214393558127831, + "kl_loss": 3765.05615234375, + "loss_ib": 37.711551666259766, + "step": 77 + }, + { + "ce_ib": 63.26270294189453, + "ce_orig": 1.4974795579910278, + "epoch": 0.02214393558127831, + "kl_loss": 3476.8515625, + "loss_ib": 34.8317756652832, + "step": 77 + }, + { + "ce_ib": 60.98178482055664, + "ce_orig": 0.8308293223381042, + "epoch": 0.02214393558127831, + "kl_loss": 3436.35009765625, + "loss_ib": 34.42448043823242, + "step": 77 + }, + { + "ce_ib": 60.006534576416016, + "ce_orig": 0.7794530391693115, + "epoch": 0.022431519160255948, + "kl_loss": 3976.89306640625, + "loss_ib": 39.82893753051758, + "step": 78 + }, + { + "ce_ib": 60.324668884277344, + "ce_orig": 1.0577147006988525, + "epoch": 0.022431519160255948, + "kl_loss": 3699.833984375, + "loss_ib": 37.05866241455078, + "step": 78 + }, + { + "ce_ib": 63.445526123046875, + "ce_orig": 1.2055957317352295, + "epoch": 0.022431519160255948, + "kl_loss": 3332.263671875, + "loss_ib": 33.38608169555664, + "step": 78 + }, + { + "ce_ib": 62.043052673339844, + "ce_orig": 1.2299708127975464, + "epoch": 0.022431519160255948, + "kl_loss": 3498.40966796875, + "loss_ib": 35.046138763427734, + "step": 78 + }, + { + "ce_ib": 62.630794525146484, + "ce_orig": 1.6087855100631714, + "epoch": 0.02271910273923359, + "kl_loss": 3403.677490234375, + "loss_ib": 34.09940719604492, + "step": 79 + }, + { + "ce_ib": 61.70748519897461, + "ce_orig": 1.395445466041565, + "epoch": 0.02271910273923359, + "kl_loss": 3506.6875, + "loss_ib": 35.12858200073242, + "step": 79 + }, + { + "ce_ib": 60.67852020263672, + "ce_orig": 0.8778415322303772, + "epoch": 0.02271910273923359, + "kl_loss": 3626.2841796875, + "loss_ib": 36.32352066040039, + "step": 79 + }, + { + "ce_ib": 62.01763153076172, + "ce_orig": 1.1855616569519043, + "epoch": 0.02271910273923359, + "kl_loss": 3562.23974609375, + "loss_ib": 35.68441390991211, + "step": 79 + }, + { + "epoch": 0.02300668631821123, + "grad_norm": 495.8802795410156, + "learning_rate": 2.356687898089172e-06, + "loss": 36.0833, + "step": 80 + }, + { + "ce_ib": 61.64104080200195, + "ce_orig": 1.086162805557251, + "epoch": 0.02300668631821123, + "kl_loss": 3640.07470703125, + "loss_ib": 36.46238708496094, + "step": 80 + }, + { + "ce_ib": 61.57204055786133, + "ce_orig": 0.9259805083274841, + "epoch": 0.02300668631821123, + "kl_loss": 3893.42578125, + "loss_ib": 38.99583053588867, + "step": 80 + }, + { + "ce_ib": 66.0667953491211, + "ce_orig": 1.7032816410064697, + "epoch": 0.02300668631821123, + "kl_loss": 3534.810546875, + "loss_ib": 35.4141731262207, + "step": 80 + }, + { + "ce_ib": 59.70638656616211, + "ce_orig": 0.9606295228004456, + "epoch": 0.02300668631821123, + "kl_loss": 3836.31494140625, + "loss_ib": 38.422855377197266, + "step": 80 + }, + { + "ce_ib": 62.870216369628906, + "ce_orig": 1.2989033460617065, + "epoch": 0.02329426989718887, + "kl_loss": 3637.41015625, + "loss_ib": 36.436973571777344, + "step": 81 + }, + { + "ce_ib": 62.709312438964844, + "ce_orig": 1.103994369506836, + "epoch": 0.02329426989718887, + "kl_loss": 3570.747802734375, + "loss_ib": 35.77018737792969, + "step": 81 + }, + { + "ce_ib": 61.99235916137695, + "ce_orig": 1.6946630477905273, + "epoch": 0.02329426989718887, + "kl_loss": 3532.9951171875, + "loss_ib": 35.39194107055664, + "step": 81 + }, + { + "ce_ib": 60.915489196777344, + "ce_orig": 0.9102914333343506, + "epoch": 0.02329426989718887, + "kl_loss": 3406.8291015625, + "loss_ib": 34.129207611083984, + "step": 81 + }, + { + "ce_ib": 59.30317306518555, + "ce_orig": 0.6368826031684875, + "epoch": 0.023581853476166512, + "kl_loss": 3712.42236328125, + "loss_ib": 37.183528900146484, + "step": 82 + }, + { + "ce_ib": 60.09833526611328, + "ce_orig": 0.9400615096092224, + "epoch": 0.023581853476166512, + "kl_loss": 3772.23291015625, + "loss_ib": 37.78242492675781, + "step": 82 + }, + { + "ce_ib": 59.77298355102539, + "ce_orig": 1.1513607501983643, + "epoch": 0.023581853476166512, + "kl_loss": 3608.817138671875, + "loss_ib": 36.147945404052734, + "step": 82 + }, + { + "ce_ib": 61.4395637512207, + "ce_orig": 0.8887077569961548, + "epoch": 0.023581853476166512, + "kl_loss": 3723.0693359375, + "loss_ib": 37.29213333129883, + "step": 82 + }, + { + "ce_ib": 59.11687469482422, + "ce_orig": 1.0084015130996704, + "epoch": 0.02386943705514415, + "kl_loss": 3634.8994140625, + "loss_ib": 36.40810775756836, + "step": 83 + }, + { + "ce_ib": 61.65114212036133, + "ce_orig": 1.2165615558624268, + "epoch": 0.02386943705514415, + "kl_loss": 3236.768310546875, + "loss_ib": 32.4293327331543, + "step": 83 + }, + { + "ce_ib": 61.245872497558594, + "ce_orig": 0.9978702068328857, + "epoch": 0.02386943705514415, + "kl_loss": 3855.5927734375, + "loss_ib": 38.61717224121094, + "step": 83 + }, + { + "ce_ib": 64.20453643798828, + "ce_orig": 1.7128099203109741, + "epoch": 0.02386943705514415, + "kl_loss": 3634.4951171875, + "loss_ib": 36.40915298461914, + "step": 83 + }, + { + "ce_ib": 64.26237487792969, + "ce_orig": 1.0946186780929565, + "epoch": 0.02415702063412179, + "kl_loss": 3485.233642578125, + "loss_ib": 34.91659927368164, + "step": 84 + }, + { + "ce_ib": 60.3849983215332, + "ce_orig": 0.9367128610610962, + "epoch": 0.02415702063412179, + "kl_loss": 3541.22021484375, + "loss_ib": 35.47258758544922, + "step": 84 + }, + { + "ce_ib": 58.71778869628906, + "ce_orig": 0.7141556143760681, + "epoch": 0.02415702063412179, + "kl_loss": 3839.89599609375, + "loss_ib": 38.45767593383789, + "step": 84 + }, + { + "ce_ib": 59.888084411621094, + "ce_orig": 0.76758873462677, + "epoch": 0.02415702063412179, + "kl_loss": 3747.78759765625, + "loss_ib": 37.53776168823242, + "step": 84 + }, + { + "epoch": 0.024444604213099432, + "grad_norm": 524.7237548828125, + "learning_rate": 2.515923566878981e-06, + "loss": 36.6037, + "step": 85 + }, + { + "ce_ib": 60.545921325683594, + "ce_orig": 1.2117584943771362, + "epoch": 0.024444604213099432, + "kl_loss": 3843.150634765625, + "loss_ib": 38.4920539855957, + "step": 85 + }, + { + "ce_ib": 58.764076232910156, + "ce_orig": 0.38705337047576904, + "epoch": 0.024444604213099432, + "kl_loss": 3380.5478515625, + "loss_ib": 33.86424255371094, + "step": 85 + }, + { + "ce_ib": 63.67119216918945, + "ce_orig": 1.6082842350006104, + "epoch": 0.024444604213099432, + "kl_loss": 3766.42333984375, + "loss_ib": 37.7279052734375, + "step": 85 + }, + { + "ce_ib": 59.66868591308594, + "ce_orig": 0.8970634341239929, + "epoch": 0.024444604213099432, + "kl_loss": 3629.2216796875, + "loss_ib": 36.35188293457031, + "step": 85 + }, + { + "ce_ib": 64.16704559326172, + "ce_orig": 2.276355743408203, + "epoch": 0.024732187792077073, + "kl_loss": 3044.044921875, + "loss_ib": 30.504615783691406, + "step": 86 + }, + { + "ce_ib": 60.19831848144531, + "ce_orig": 0.9945586919784546, + "epoch": 0.024732187792077073, + "kl_loss": 3570.64990234375, + "loss_ib": 35.76669692993164, + "step": 86 + }, + { + "ce_ib": 60.47395706176758, + "ce_orig": 0.7600452303886414, + "epoch": 0.024732187792077073, + "kl_loss": 3757.107666015625, + "loss_ib": 37.63154983520508, + "step": 86 + }, + { + "ce_ib": 61.33274841308594, + "ce_orig": 1.0650345087051392, + "epoch": 0.024732187792077073, + "kl_loss": 3494.167236328125, + "loss_ib": 35.00300598144531, + "step": 86 + }, + { + "ce_ib": 58.06923294067383, + "ce_orig": 1.0382181406021118, + "epoch": 0.025019771371054714, + "kl_loss": 3489.38623046875, + "loss_ib": 34.95193099975586, + "step": 87 + }, + { + "ce_ib": 59.217193603515625, + "ce_orig": 0.9920439720153809, + "epoch": 0.025019771371054714, + "kl_loss": 3379.11279296875, + "loss_ib": 33.850341796875, + "step": 87 + }, + { + "ce_ib": 59.13267517089844, + "ce_orig": 0.7496766448020935, + "epoch": 0.025019771371054714, + "kl_loss": 3705.8818359375, + "loss_ib": 37.117950439453125, + "step": 87 + }, + { + "ce_ib": 63.39867401123047, + "ce_orig": 1.7902837991714478, + "epoch": 0.025019771371054714, + "kl_loss": 3600.96484375, + "loss_ib": 36.07304763793945, + "step": 87 + }, + { + "ce_ib": 59.85429763793945, + "ce_orig": 0.6192349791526794, + "epoch": 0.025307354950032352, + "kl_loss": 3378.796630859375, + "loss_ib": 33.84782028198242, + "step": 88 + }, + { + "ce_ib": 59.664588928222656, + "ce_orig": 0.28315597772598267, + "epoch": 0.025307354950032352, + "kl_loss": 3035.71630859375, + "loss_ib": 30.416828155517578, + "step": 88 + }, + { + "ce_ib": 59.85287094116211, + "ce_orig": 0.8578697443008423, + "epoch": 0.025307354950032352, + "kl_loss": 3704.26123046875, + "loss_ib": 37.10246658325195, + "step": 88 + }, + { + "ce_ib": 60.02167510986328, + "ce_orig": 0.865718424320221, + "epoch": 0.025307354950032352, + "kl_loss": 3559.58740234375, + "loss_ib": 35.6558952331543, + "step": 88 + }, + { + "ce_ib": 61.92749786376953, + "ce_orig": 1.8417946100234985, + "epoch": 0.025594938529009993, + "kl_loss": 3461.548828125, + "loss_ib": 34.67741775512695, + "step": 89 + }, + { + "ce_ib": 60.532379150390625, + "ce_orig": 1.1578682661056519, + "epoch": 0.025594938529009993, + "kl_loss": 3567.969482421875, + "loss_ib": 35.74022674560547, + "step": 89 + }, + { + "ce_ib": 57.639652252197266, + "ce_orig": 0.9897644519805908, + "epoch": 0.025594938529009993, + "kl_loss": 3668.02587890625, + "loss_ib": 36.73789596557617, + "step": 89 + }, + { + "ce_ib": 60.55142593383789, + "ce_orig": 0.9929890632629395, + "epoch": 0.025594938529009993, + "kl_loss": 3560.878662109375, + "loss_ib": 35.66933822631836, + "step": 89 + }, + { + "epoch": 0.025882522107987634, + "grad_norm": 502.2541198730469, + "learning_rate": 2.67515923566879e-06, + "loss": 36.2964, + "step": 90 + }, + { + "ce_ib": 59.376216888427734, + "ce_orig": 0.7132657766342163, + "epoch": 0.025882522107987634, + "kl_loss": 3782.4814453125, + "loss_ib": 37.88418960571289, + "step": 90 + }, + { + "ce_ib": 61.76494216918945, + "ce_orig": 1.0529389381408691, + "epoch": 0.025882522107987634, + "kl_loss": 3165.3203125, + "loss_ib": 31.714967727661133, + "step": 90 + }, + { + "ce_ib": 63.20905303955078, + "ce_orig": 2.0391719341278076, + "epoch": 0.025882522107987634, + "kl_loss": 3383.056884765625, + "loss_ib": 33.893775939941406, + "step": 90 + }, + { + "ce_ib": 60.66413116455078, + "ce_orig": 1.1053894758224487, + "epoch": 0.025882522107987634, + "kl_loss": 3465.13671875, + "loss_ib": 34.712032318115234, + "step": 90 + }, + { + "ce_ib": 59.18904113769531, + "ce_orig": 0.6099984049797058, + "epoch": 0.026170105686965275, + "kl_loss": 3366.81494140625, + "loss_ib": 33.72733688354492, + "step": 91 + }, + { + "ce_ib": 60.47399139404297, + "ce_orig": 1.2133395671844482, + "epoch": 0.026170105686965275, + "kl_loss": 3215.339111328125, + "loss_ib": 32.213863372802734, + "step": 91 + }, + { + "ce_ib": 59.12387466430664, + "ce_orig": 1.0556620359420776, + "epoch": 0.026170105686965275, + "kl_loss": 3760.199462890625, + "loss_ib": 37.66111755371094, + "step": 91 + }, + { + "ce_ib": 58.41682052612305, + "ce_orig": 0.8695086240768433, + "epoch": 0.026170105686965275, + "kl_loss": 3594.7275390625, + "loss_ib": 36.00569152832031, + "step": 91 + }, + { + "ce_ib": 61.80458450317383, + "ce_orig": 1.1173126697540283, + "epoch": 0.026457689265942913, + "kl_loss": 2937.051513671875, + "loss_ib": 29.43231773376465, + "step": 92 + }, + { + "ce_ib": 58.19034957885742, + "ce_orig": 1.1345878839492798, + "epoch": 0.026457689265942913, + "kl_loss": 3721.2353515625, + "loss_ib": 37.27054214477539, + "step": 92 + }, + { + "ce_ib": 64.21983337402344, + "ce_orig": 2.1032416820526123, + "epoch": 0.026457689265942913, + "kl_loss": 3113.11279296875, + "loss_ib": 31.19534683227539, + "step": 92 + }, + { + "ce_ib": 56.81288146972656, + "ce_orig": 0.7186897993087769, + "epoch": 0.026457689265942913, + "kl_loss": 3551.26708984375, + "loss_ib": 35.56948471069336, + "step": 92 + }, + { + "ce_ib": 58.19551467895508, + "ce_orig": 0.6073794364929199, + "epoch": 0.026745272844920554, + "kl_loss": 3257.37744140625, + "loss_ib": 32.6319694519043, + "step": 93 + }, + { + "ce_ib": 59.8862190246582, + "ce_orig": 1.0717018842697144, + "epoch": 0.026745272844920554, + "kl_loss": 3663.1142578125, + "loss_ib": 36.6910285949707, + "step": 93 + }, + { + "ce_ib": 60.67866134643555, + "ce_orig": 0.8607167601585388, + "epoch": 0.026745272844920554, + "kl_loss": 3073.027099609375, + "loss_ib": 30.79094886779785, + "step": 93 + }, + { + "ce_ib": 57.884647369384766, + "ce_orig": 0.7569469809532166, + "epoch": 0.026745272844920554, + "kl_loss": 3273.467041015625, + "loss_ib": 32.79255676269531, + "step": 93 + }, + { + "ce_ib": 62.44021987915039, + "ce_orig": 1.394387125968933, + "epoch": 0.027032856423898195, + "kl_loss": 3389.59716796875, + "loss_ib": 33.958412170410156, + "step": 94 + }, + { + "ce_ib": 60.312530517578125, + "ce_orig": 1.1131266355514526, + "epoch": 0.027032856423898195, + "kl_loss": 3403.34423828125, + "loss_ib": 34.093753814697266, + "step": 94 + }, + { + "ce_ib": 58.25202941894531, + "ce_orig": 0.8151227831840515, + "epoch": 0.027032856423898195, + "kl_loss": 3855.5947265625, + "loss_ib": 38.61419677734375, + "step": 94 + }, + { + "ce_ib": 61.3564338684082, + "ce_orig": 1.1052205562591553, + "epoch": 0.027032856423898195, + "kl_loss": 3158.87841796875, + "loss_ib": 31.6501407623291, + "step": 94 + }, + { + "epoch": 0.027320440002875836, + "grad_norm": 506.2858581542969, + "learning_rate": 2.834394904458599e-06, + "loss": 35.9074, + "step": 95 + }, + { + "ce_ib": 65.28954315185547, + "ce_orig": 1.8119444847106934, + "epoch": 0.027320440002875836, + "kl_loss": 3606.654296875, + "loss_ib": 36.131832122802734, + "step": 95 + }, + { + "ce_ib": 59.008201599121094, + "ce_orig": 1.0341309309005737, + "epoch": 0.027320440002875836, + "kl_loss": 3603.617431640625, + "loss_ib": 36.09518051147461, + "step": 95 + }, + { + "ce_ib": 59.24610137939453, + "ce_orig": 0.8969424962997437, + "epoch": 0.027320440002875836, + "kl_loss": 3502.19482421875, + "loss_ib": 35.08119201660156, + "step": 95 + }, + { + "ce_ib": 60.19147491455078, + "ce_orig": 1.0157058238983154, + "epoch": 0.027320440002875836, + "kl_loss": 3443.8740234375, + "loss_ib": 34.49892807006836, + "step": 95 + }, + { + "ce_ib": 60.122161865234375, + "ce_orig": 0.915783703327179, + "epoch": 0.027608023581853477, + "kl_loss": 3757.819091796875, + "loss_ib": 37.63831329345703, + "step": 96 + }, + { + "ce_ib": 56.74940490722656, + "ce_orig": 0.5688998103141785, + "epoch": 0.027608023581853477, + "kl_loss": 3403.82177734375, + "loss_ib": 34.094966888427734, + "step": 96 + }, + { + "ce_ib": 59.47806167602539, + "ce_orig": 0.9031659364700317, + "epoch": 0.027608023581853477, + "kl_loss": 3398.11572265625, + "loss_ib": 34.04063415527344, + "step": 96 + }, + { + "ce_ib": 61.29694747924805, + "ce_orig": 1.6301769018173218, + "epoch": 0.027608023581853477, + "kl_loss": 3120.549560546875, + "loss_ib": 31.26679039001465, + "step": 96 + }, + { + "ce_ib": 57.831748962402344, + "ce_orig": 0.7541747689247131, + "epoch": 0.027895607160831115, + "kl_loss": 3017.34765625, + "loss_ib": 30.231307983398438, + "step": 97 + }, + { + "ce_ib": 58.9442253112793, + "ce_orig": 1.0282090902328491, + "epoch": 0.027895607160831115, + "kl_loss": 3543.724853515625, + "loss_ib": 35.496192932128906, + "step": 97 + }, + { + "ce_ib": 62.54707336425781, + "ce_orig": 1.1943050622940063, + "epoch": 0.027895607160831115, + "kl_loss": 3438.58935546875, + "loss_ib": 34.44844055175781, + "step": 97 + }, + { + "ce_ib": 59.76130676269531, + "ce_orig": 0.9242035150527954, + "epoch": 0.027895607160831115, + "kl_loss": 3405.859375, + "loss_ib": 34.11835479736328, + "step": 97 + }, + { + "ce_ib": 59.63943862915039, + "ce_orig": 1.080672025680542, + "epoch": 0.028183190739808756, + "kl_loss": 3514.46875, + "loss_ib": 35.20432662963867, + "step": 98 + }, + { + "ce_ib": 58.865867614746094, + "ce_orig": 0.8346519470214844, + "epoch": 0.028183190739808756, + "kl_loss": 3126.54638671875, + "loss_ib": 31.32432746887207, + "step": 98 + }, + { + "ce_ib": 59.8038330078125, + "ce_orig": 0.5681230425834656, + "epoch": 0.028183190739808756, + "kl_loss": 3689.09228515625, + "loss_ib": 36.95072555541992, + "step": 98 + }, + { + "ce_ib": 57.7287483215332, + "ce_orig": 1.3980305194854736, + "epoch": 0.028183190739808756, + "kl_loss": 3602.91552734375, + "loss_ib": 36.086883544921875, + "step": 98 + }, + { + "ce_ib": 59.70271682739258, + "ce_orig": 0.7515245676040649, + "epoch": 0.028470774318786397, + "kl_loss": 3556.43603515625, + "loss_ib": 35.624061584472656, + "step": 99 + }, + { + "ce_ib": 59.0880126953125, + "ce_orig": 1.349737524986267, + "epoch": 0.028470774318786397, + "kl_loss": 3817.392578125, + "loss_ib": 38.23301315307617, + "step": 99 + }, + { + "ce_ib": 58.57311248779297, + "ce_orig": 0.8737644553184509, + "epoch": 0.028470774318786397, + "kl_loss": 3711.00537109375, + "loss_ib": 37.16862869262695, + "step": 99 + }, + { + "ce_ib": 60.80643081665039, + "ce_orig": 1.3529119491577148, + "epoch": 0.028470774318786397, + "kl_loss": 3494.3671875, + "loss_ib": 35.004478454589844, + "step": 99 + }, + { + "epoch": 0.02875835789776404, + "grad_norm": 506.0978088378906, + "learning_rate": 2.993630573248408e-06, + "loss": 35.5561, + "step": 100 + }, + { + "ce_ib": 58.56661605834961, + "ce_orig": 1.1666901111602783, + "epoch": 0.02875835789776404, + "kl_loss": 3524.689453125, + "loss_ib": 35.30546188354492, + "step": 100 + }, + { + "ce_ib": 62.831783294677734, + "ce_orig": 2.2819557189941406, + "epoch": 0.02875835789776404, + "kl_loss": 3127.18798828125, + "loss_ib": 31.3347110748291, + "step": 100 + }, + { + "ce_ib": 58.19925308227539, + "ce_orig": 0.8314814567565918, + "epoch": 0.02875835789776404, + "kl_loss": 3376.9296875, + "loss_ib": 33.82749557495117, + "step": 100 + }, + { + "ce_ib": 57.46586608886719, + "ce_orig": 0.7712212800979614, + "epoch": 0.02875835789776404, + "kl_loss": 3693.83544921875, + "loss_ib": 36.995819091796875, + "step": 100 + }, + { + "ce_ib": 63.20458221435547, + "ce_orig": 2.0820841789245605, + "epoch": 0.02904594147674168, + "kl_loss": 3015.462890625, + "loss_ib": 30.21783447265625, + "step": 101 + }, + { + "ce_ib": 58.481056213378906, + "ce_orig": 0.621671736240387, + "epoch": 0.02904594147674168, + "kl_loss": 3436.01416015625, + "loss_ib": 34.41862106323242, + "step": 101 + }, + { + "ce_ib": 63.86113357543945, + "ce_orig": 1.306851863861084, + "epoch": 0.02904594147674168, + "kl_loss": 3503.22216796875, + "loss_ib": 35.09608459472656, + "step": 101 + }, + { + "ce_ib": 58.2662239074707, + "ce_orig": 0.7199594974517822, + "epoch": 0.02904594147674168, + "kl_loss": 3752.4775390625, + "loss_ib": 37.58304214477539, + "step": 101 + }, + { + "ce_ib": 57.16976547241211, + "ce_orig": 0.7961263656616211, + "epoch": 0.029333525055719317, + "kl_loss": 3487.98388671875, + "loss_ib": 34.937007904052734, + "step": 102 + }, + { + "ce_ib": 59.150516510009766, + "ce_orig": 1.1825999021530151, + "epoch": 0.029333525055719317, + "kl_loss": 3089.3515625, + "loss_ib": 30.952665328979492, + "step": 102 + }, + { + "ce_ib": 58.3362922668457, + "ce_orig": 1.7745475769042969, + "epoch": 0.029333525055719317, + "kl_loss": 3348.009033203125, + "loss_ib": 33.53842544555664, + "step": 102 + }, + { + "ce_ib": 57.58949661254883, + "ce_orig": 1.5423011779785156, + "epoch": 0.029333525055719317, + "kl_loss": 3305.6611328125, + "loss_ib": 33.114200592041016, + "step": 102 + }, + { + "ce_ib": 61.6888427734375, + "ce_orig": 1.5072656869888306, + "epoch": 0.02962110863469696, + "kl_loss": 3463.50439453125, + "loss_ib": 34.69673156738281, + "step": 103 + }, + { + "ce_ib": 60.02954864501953, + "ce_orig": 1.457014560699463, + "epoch": 0.02962110863469696, + "kl_loss": 3395.760986328125, + "loss_ib": 34.01763916015625, + "step": 103 + }, + { + "ce_ib": 59.91975021362305, + "ce_orig": 1.4898319244384766, + "epoch": 0.02962110863469696, + "kl_loss": 3560.35009765625, + "loss_ib": 35.66341781616211, + "step": 103 + }, + { + "ce_ib": 61.39513397216797, + "ce_orig": 1.2825353145599365, + "epoch": 0.02962110863469696, + "kl_loss": 3457.565673828125, + "loss_ib": 34.63705062866211, + "step": 103 + }, + { + "ce_ib": 58.27676773071289, + "ce_orig": 0.7831246256828308, + "epoch": 0.0299086922136746, + "kl_loss": 3082.635498046875, + "loss_ib": 30.88463020324707, + "step": 104 + }, + { + "ce_ib": 59.33658218383789, + "ce_orig": 1.2523558139801025, + "epoch": 0.0299086922136746, + "kl_loss": 3511.50048828125, + "loss_ib": 35.174339294433594, + "step": 104 + }, + { + "ce_ib": 56.923065185546875, + "ce_orig": 0.8949446678161621, + "epoch": 0.0299086922136746, + "kl_loss": 3366.443603515625, + "loss_ib": 33.72135925292969, + "step": 104 + }, + { + "ce_ib": 56.37105178833008, + "ce_orig": 0.2978222370147705, + "epoch": 0.0299086922136746, + "kl_loss": 2516.5166015625, + "loss_ib": 25.22153663635254, + "step": 104 + }, + { + "epoch": 0.03019627579265224, + "grad_norm": 481.3152770996094, + "learning_rate": 3.1528662420382165e-06, + "loss": 34.5437, + "step": 105 + }, + { + "ce_ib": 58.88717269897461, + "ce_orig": 1.2730207443237305, + "epoch": 0.03019627579265224, + "kl_loss": 3434.847412109375, + "loss_ib": 34.4073600769043, + "step": 105 + }, + { + "ce_ib": 61.64189147949219, + "ce_orig": 1.5729440450668335, + "epoch": 0.03019627579265224, + "kl_loss": 3480.771484375, + "loss_ib": 34.86935806274414, + "step": 105 + }, + { + "ce_ib": 58.20305633544922, + "ce_orig": 0.7971786856651306, + "epoch": 0.03019627579265224, + "kl_loss": 3075.66064453125, + "loss_ib": 30.814809799194336, + "step": 105 + }, + { + "ce_ib": 60.825565338134766, + "ce_orig": 1.2366398572921753, + "epoch": 0.03019627579265224, + "kl_loss": 3418.934326171875, + "loss_ib": 34.25016784667969, + "step": 105 + }, + { + "ce_ib": 58.60723114013672, + "ce_orig": 0.7808988690376282, + "epoch": 0.030483859371629878, + "kl_loss": 3320.3857421875, + "loss_ib": 33.26246643066406, + "step": 106 + }, + { + "ce_ib": 57.563995361328125, + "ce_orig": 0.8758856058120728, + "epoch": 0.030483859371629878, + "kl_loss": 3181.57958984375, + "loss_ib": 31.87335777282715, + "step": 106 + }, + { + "ce_ib": 55.23259353637695, + "ce_orig": 0.22707822918891907, + "epoch": 0.030483859371629878, + "kl_loss": 2120.76416015625, + "loss_ib": 21.26287269592285, + "step": 106 + }, + { + "ce_ib": 54.9997673034668, + "ce_orig": 0.2669578790664673, + "epoch": 0.030483859371629878, + "kl_loss": 2237.90966796875, + "loss_ib": 22.43409538269043, + "step": 106 + }, + { + "ce_ib": 58.98134994506836, + "ce_orig": 1.2273164987564087, + "epoch": 0.03077144295060752, + "kl_loss": 1990.204833984375, + "loss_ib": 19.961029052734375, + "step": 107 + }, + { + "ce_ib": 59.87055587768555, + "ce_orig": 1.3858299255371094, + "epoch": 0.03077144295060752, + "kl_loss": 3156.53173828125, + "loss_ib": 31.625186920166016, + "step": 107 + }, + { + "ce_ib": 55.48119354248047, + "ce_orig": 0.8153777122497559, + "epoch": 0.03077144295060752, + "kl_loss": 3736.759765625, + "loss_ib": 37.42307662963867, + "step": 107 + }, + { + "ce_ib": 58.34563446044922, + "ce_orig": 0.9158485531806946, + "epoch": 0.03077144295060752, + "kl_loss": 3471.65380859375, + "loss_ib": 34.77488327026367, + "step": 107 + }, + { + "ce_ib": 58.289031982421875, + "ce_orig": 1.0781446695327759, + "epoch": 0.03105902652958516, + "kl_loss": 3476.87841796875, + "loss_ib": 34.82707214355469, + "step": 108 + }, + { + "ce_ib": 56.98807144165039, + "ce_orig": 0.7857345938682556, + "epoch": 0.03105902652958516, + "kl_loss": 3440.1845703125, + "loss_ib": 34.458831787109375, + "step": 108 + }, + { + "ce_ib": 58.76127624511719, + "ce_orig": 0.7976669073104858, + "epoch": 0.03105902652958516, + "kl_loss": 3203.509765625, + "loss_ib": 32.0938606262207, + "step": 108 + }, + { + "ce_ib": 60.71581268310547, + "ce_orig": 1.6471302509307861, + "epoch": 0.03105902652958516, + "kl_loss": 3262.56396484375, + "loss_ib": 32.68635559082031, + "step": 108 + }, + { + "ce_ib": 58.29198455810547, + "ce_orig": 0.6282819509506226, + "epoch": 0.0313466101085628, + "kl_loss": 3269.9423828125, + "loss_ib": 32.757713317871094, + "step": 109 + }, + { + "ce_ib": 58.99745559692383, + "ce_orig": 1.346901297569275, + "epoch": 0.0313466101085628, + "kl_loss": 3163.72998046875, + "loss_ib": 31.6962947845459, + "step": 109 + }, + { + "ce_ib": 60.90428924560547, + "ce_orig": 1.9273408651351929, + "epoch": 0.0313466101085628, + "kl_loss": 3333.64697265625, + "loss_ib": 33.39737319946289, + "step": 109 + }, + { + "ce_ib": 56.191925048828125, + "ce_orig": 0.5414481163024902, + "epoch": 0.0313466101085628, + "kl_loss": 2611.040283203125, + "loss_ib": 26.166593551635742, + "step": 109 + }, + { + "epoch": 0.03163419368754044, + "grad_norm": 497.0209655761719, + "learning_rate": 3.3121019108280255e-06, + "loss": 34.4129, + "step": 110 + }, + { + "ce_ib": 59.72053909301758, + "ce_orig": 0.9706757664680481, + "epoch": 0.03163419368754044, + "kl_loss": 3381.095947265625, + "loss_ib": 33.87068176269531, + "step": 110 + }, + { + "ce_ib": 57.03586196899414, + "ce_orig": 0.7681443691253662, + "epoch": 0.03163419368754044, + "kl_loss": 3697.955322265625, + "loss_ib": 37.03658676147461, + "step": 110 + }, + { + "ce_ib": 56.56296157836914, + "ce_orig": 1.0412012338638306, + "epoch": 0.03163419368754044, + "kl_loss": 3397.827880859375, + "loss_ib": 34.03483963012695, + "step": 110 + }, + { + "ce_ib": 58.891090393066406, + "ce_orig": 1.897376537322998, + "epoch": 0.03163419368754044, + "kl_loss": 3184.9423828125, + "loss_ib": 31.908315658569336, + "step": 110 + }, + { + "ce_ib": 56.234859466552734, + "ce_orig": 0.5728248953819275, + "epoch": 0.031921777266518084, + "kl_loss": 3383.22705078125, + "loss_ib": 33.88850402832031, + "step": 111 + }, + { + "ce_ib": 57.63355255126953, + "ce_orig": 1.1094613075256348, + "epoch": 0.031921777266518084, + "kl_loss": 3412.992431640625, + "loss_ib": 34.187557220458984, + "step": 111 + }, + { + "ce_ib": 60.40391540527344, + "ce_orig": 1.840659737586975, + "epoch": 0.031921777266518084, + "kl_loss": 3138.0, + "loss_ib": 31.44040298461914, + "step": 111 + }, + { + "ce_ib": 59.63862609863281, + "ce_orig": 1.0923250913619995, + "epoch": 0.031921777266518084, + "kl_loss": 3231.64892578125, + "loss_ib": 32.37612533569336, + "step": 111 + }, + { + "ce_ib": 57.537532806396484, + "ce_orig": 1.1983774900436401, + "epoch": 0.03220936084549572, + "kl_loss": 3396.81494140625, + "loss_ib": 34.02568435668945, + "step": 112 + }, + { + "ce_ib": 58.707515716552734, + "ce_orig": 0.9369058012962341, + "epoch": 0.03220936084549572, + "kl_loss": 3233.78564453125, + "loss_ib": 32.39656448364258, + "step": 112 + }, + { + "ce_ib": 58.834808349609375, + "ce_orig": 1.592905044555664, + "epoch": 0.03220936084549572, + "kl_loss": 3009.6337890625, + "loss_ib": 30.15517234802246, + "step": 112 + }, + { + "ce_ib": 56.853721618652344, + "ce_orig": 1.277111291885376, + "epoch": 0.03220936084549572, + "kl_loss": 3313.233642578125, + "loss_ib": 33.18918991088867, + "step": 112 + }, + { + "ce_ib": 57.65592575073242, + "ce_orig": 1.234552025794983, + "epoch": 0.032496944424473366, + "kl_loss": 3572.73828125, + "loss_ib": 35.785037994384766, + "step": 113 + }, + { + "ce_ib": 57.79000473022461, + "ce_orig": 0.7371742725372314, + "epoch": 0.032496944424473366, + "kl_loss": 3456.331787109375, + "loss_ib": 34.62110900878906, + "step": 113 + }, + { + "ce_ib": 56.60825729370117, + "ce_orig": 0.4902428388595581, + "epoch": 0.032496944424473366, + "kl_loss": 3120.79296875, + "loss_ib": 31.264537811279297, + "step": 113 + }, + { + "ce_ib": 56.7935905456543, + "ce_orig": 1.1464534997940063, + "epoch": 0.032496944424473366, + "kl_loss": 3540.481201171875, + "loss_ib": 35.461605072021484, + "step": 113 + }, + { + "ce_ib": 57.10232925415039, + "ce_orig": 0.8498454093933105, + "epoch": 0.032784528003451004, + "kl_loss": 3277.51904296875, + "loss_ib": 32.83229064941406, + "step": 114 + }, + { + "ce_ib": 55.13420486450195, + "ce_orig": 0.6758685111999512, + "epoch": 0.032784528003451004, + "kl_loss": 3604.8583984375, + "loss_ib": 36.10371780395508, + "step": 114 + }, + { + "ce_ib": 56.1276741027832, + "ce_orig": 0.5766817331314087, + "epoch": 0.032784528003451004, + "kl_loss": 3085.36865234375, + "loss_ib": 30.909812927246094, + "step": 114 + }, + { + "ce_ib": 57.931053161621094, + "ce_orig": 1.4995805025100708, + "epoch": 0.032784528003451004, + "kl_loss": 3098.906005859375, + "loss_ib": 31.0469913482666, + "step": 114 + }, + { + "epoch": 0.03307211158242864, + "grad_norm": 491.3838806152344, + "learning_rate": 3.4713375796178345e-06, + "loss": 34.0102, + "step": 115 + }, + { + "ce_ib": 59.15627670288086, + "ce_orig": 1.2845157384872437, + "epoch": 0.03307211158242864, + "kl_loss": 3372.32470703125, + "loss_ib": 33.78240203857422, + "step": 115 + }, + { + "ce_ib": 53.61143112182617, + "ce_orig": 0.45459315180778503, + "epoch": 0.03307211158242864, + "kl_loss": 1565.43212890625, + "loss_ib": 15.707931518554688, + "step": 115 + }, + { + "ce_ib": 57.589969635009766, + "ce_orig": 0.9765967130661011, + "epoch": 0.03307211158242864, + "kl_loss": 3676.704345703125, + "loss_ib": 36.82463455200195, + "step": 115 + }, + { + "ce_ib": 60.20054244995117, + "ce_orig": 1.7167049646377563, + "epoch": 0.03307211158242864, + "kl_loss": 3165.83837890625, + "loss_ib": 31.718584060668945, + "step": 115 + }, + { + "ce_ib": 53.816566467285156, + "ce_orig": 0.67730712890625, + "epoch": 0.033359695161406286, + "kl_loss": 3459.393310546875, + "loss_ib": 34.64774703979492, + "step": 116 + }, + { + "ce_ib": 54.58203887939453, + "ce_orig": 0.911034107208252, + "epoch": 0.033359695161406286, + "kl_loss": 3594.5859375, + "loss_ib": 36.00044250488281, + "step": 116 + }, + { + "ce_ib": 57.87051773071289, + "ce_orig": 1.118709921836853, + "epoch": 0.033359695161406286, + "kl_loss": 3187.24072265625, + "loss_ib": 31.93027687072754, + "step": 116 + }, + { + "ce_ib": 56.325897216796875, + "ce_orig": 0.9163821935653687, + "epoch": 0.033359695161406286, + "kl_loss": 3129.631591796875, + "loss_ib": 31.35264015197754, + "step": 116 + }, + { + "ce_ib": 54.92118835449219, + "ce_orig": 0.38672956824302673, + "epoch": 0.033647278740383924, + "kl_loss": 3076.05810546875, + "loss_ib": 30.815502166748047, + "step": 117 + }, + { + "ce_ib": 54.690006256103516, + "ce_orig": 0.8246923685073853, + "epoch": 0.033647278740383924, + "kl_loss": 3273.61962890625, + "loss_ib": 32.79088592529297, + "step": 117 + }, + { + "ce_ib": 54.07289505004883, + "ce_orig": 0.7164103388786316, + "epoch": 0.033647278740383924, + "kl_loss": 3623.53173828125, + "loss_ib": 36.289390563964844, + "step": 117 + }, + { + "ce_ib": 60.675941467285156, + "ce_orig": 1.377237319946289, + "epoch": 0.033647278740383924, + "kl_loss": 3372.191650390625, + "loss_ib": 33.7825927734375, + "step": 117 + }, + { + "ce_ib": 56.7794189453125, + "ce_orig": 0.9724603295326233, + "epoch": 0.03393486231936156, + "kl_loss": 3288.50390625, + "loss_ib": 32.94181823730469, + "step": 118 + }, + { + "ce_ib": 54.519351959228516, + "ce_orig": 0.5410425662994385, + "epoch": 0.03393486231936156, + "kl_loss": 3287.3623046875, + "loss_ib": 32.92814254760742, + "step": 118 + }, + { + "ce_ib": 54.34451675415039, + "ce_orig": 0.8038122653961182, + "epoch": 0.03393486231936156, + "kl_loss": 3050.375, + "loss_ib": 30.558095932006836, + "step": 118 + }, + { + "ce_ib": 55.54384994506836, + "ce_orig": 0.8650107979774475, + "epoch": 0.03393486231936156, + "kl_loss": 3366.908447265625, + "loss_ib": 33.72462844848633, + "step": 118 + }, + { + "ce_ib": 57.188228607177734, + "ce_orig": 0.9910153746604919, + "epoch": 0.034222445898339206, + "kl_loss": 3439.970458984375, + "loss_ib": 34.45689010620117, + "step": 119 + }, + { + "ce_ib": 56.907711029052734, + "ce_orig": 0.7115373611450195, + "epoch": 0.034222445898339206, + "kl_loss": 3301.9189453125, + "loss_ib": 33.07609558105469, + "step": 119 + }, + { + "ce_ib": 57.2123908996582, + "ce_orig": 1.0718692541122437, + "epoch": 0.034222445898339206, + "kl_loss": 3499.33203125, + "loss_ib": 35.050533294677734, + "step": 119 + }, + { + "ce_ib": 53.44661331176758, + "ce_orig": 0.24829663336277008, + "epoch": 0.034222445898339206, + "kl_loss": 2903.864013671875, + "loss_ib": 29.092086791992188, + "step": 119 + }, + { + "epoch": 0.03451002947731684, + "grad_norm": 488.701416015625, + "learning_rate": 3.6305732484076435e-06, + "loss": 33.466, + "step": 120 + }, + { + "ce_ib": 58.145294189453125, + "ce_orig": 1.4251874685287476, + "epoch": 0.03451002947731684, + "kl_loss": 3136.32958984375, + "loss_ib": 31.42144012451172, + "step": 120 + }, + { + "ce_ib": 56.203006744384766, + "ce_orig": 0.8692367076873779, + "epoch": 0.03451002947731684, + "kl_loss": 3459.39892578125, + "loss_ib": 34.65019226074219, + "step": 120 + }, + { + "ce_ib": 53.20535659790039, + "ce_orig": 0.23018254339694977, + "epoch": 0.03451002947731684, + "kl_loss": 1242.847900390625, + "loss_ib": 12.481684684753418, + "step": 120 + }, + { + "ce_ib": 56.525516510009766, + "ce_orig": 1.0064990520477295, + "epoch": 0.03451002947731684, + "kl_loss": 3232.018310546875, + "loss_ib": 32.376708984375, + "step": 120 + }, + { + "ce_ib": 54.402645111083984, + "ce_orig": 0.8779659867286682, + "epoch": 0.03479761305629449, + "kl_loss": 3443.0625, + "loss_ib": 34.48502731323242, + "step": 121 + }, + { + "ce_ib": 57.10426330566406, + "ce_orig": 1.2402876615524292, + "epoch": 0.03479761305629449, + "kl_loss": 3028.55224609375, + "loss_ib": 30.342626571655273, + "step": 121 + }, + { + "ce_ib": 58.463226318359375, + "ce_orig": 1.786939263343811, + "epoch": 0.03479761305629449, + "kl_loss": 3314.563232421875, + "loss_ib": 33.204097747802734, + "step": 121 + }, + { + "ce_ib": 55.72274398803711, + "ce_orig": 0.8808313608169556, + "epoch": 0.03479761305629449, + "kl_loss": 3207.419677734375, + "loss_ib": 32.12991714477539, + "step": 121 + }, + { + "ce_ib": 54.48756408691406, + "ce_orig": 0.7947918772697449, + "epoch": 0.035085196635272126, + "kl_loss": 3365.093505859375, + "loss_ib": 33.705421447753906, + "step": 122 + }, + { + "ce_ib": 55.213951110839844, + "ce_orig": 0.8712708353996277, + "epoch": 0.035085196635272126, + "kl_loss": 3264.61181640625, + "loss_ib": 32.701332092285156, + "step": 122 + }, + { + "ce_ib": 56.54671096801758, + "ce_orig": 0.3289700150489807, + "epoch": 0.035085196635272126, + "kl_loss": 2472.1728515625, + "loss_ib": 24.778276443481445, + "step": 122 + }, + { + "ce_ib": 58.04352951049805, + "ce_orig": 1.5535142421722412, + "epoch": 0.035085196635272126, + "kl_loss": 2923.5888671875, + "loss_ib": 29.29393196105957, + "step": 122 + }, + { + "ce_ib": 55.66524124145508, + "ce_orig": 0.7867345809936523, + "epoch": 0.03537278021424976, + "kl_loss": 3238.93212890625, + "loss_ib": 32.444984436035156, + "step": 123 + }, + { + "ce_ib": 54.70219039916992, + "ce_orig": 0.9664096832275391, + "epoch": 0.03537278021424976, + "kl_loss": 3296.1142578125, + "loss_ib": 33.01584243774414, + "step": 123 + }, + { + "ce_ib": 56.01795196533203, + "ce_orig": 0.7100675702095032, + "epoch": 0.03537278021424976, + "kl_loss": 2146.758544921875, + "loss_ib": 21.523603439331055, + "step": 123 + }, + { + "ce_ib": 56.77366256713867, + "ce_orig": 1.1051965951919556, + "epoch": 0.03537278021424976, + "kl_loss": 3093.79345703125, + "loss_ib": 30.994707107543945, + "step": 123 + }, + { + "ce_ib": 57.009521484375, + "ce_orig": 0.7903485894203186, + "epoch": 0.03566036379322741, + "kl_loss": 3563.074462890625, + "loss_ib": 35.68775177001953, + "step": 124 + }, + { + "ce_ib": 55.95924758911133, + "ce_orig": 0.9127640724182129, + "epoch": 0.03566036379322741, + "kl_loss": 3396.574951171875, + "loss_ib": 34.02170944213867, + "step": 124 + }, + { + "ce_ib": 56.98552703857422, + "ce_orig": 1.0692386627197266, + "epoch": 0.03566036379322741, + "kl_loss": 3032.76025390625, + "loss_ib": 30.384586334228516, + "step": 124 + }, + { + "ce_ib": 54.63241958618164, + "ce_orig": 1.1963261365890503, + "epoch": 0.03566036379322741, + "kl_loss": 3243.490966796875, + "loss_ib": 32.489540100097656, + "step": 124 + }, + { + "epoch": 0.035947947372205045, + "grad_norm": 484.4737854003906, + "learning_rate": 3.789808917197453e-06, + "loss": 32.9421, + "step": 125 + }, + { + "ce_ib": 53.653743743896484, + "ce_orig": 0.7380173802375793, + "epoch": 0.035947947372205045, + "kl_loss": 3123.354736328125, + "loss_ib": 31.287200927734375, + "step": 125 + }, + { + "ce_ib": 59.990928649902344, + "ce_orig": 1.5150277614593506, + "epoch": 0.035947947372205045, + "kl_loss": 3297.04443359375, + "loss_ib": 33.030433654785156, + "step": 125 + }, + { + "ce_ib": 57.65206527709961, + "ce_orig": 0.8391215801239014, + "epoch": 0.035947947372205045, + "kl_loss": 3000.66748046875, + "loss_ib": 30.0643253326416, + "step": 125 + }, + { + "ce_ib": 56.54597854614258, + "ce_orig": 1.0180349349975586, + "epoch": 0.035947947372205045, + "kl_loss": 3431.23974609375, + "loss_ib": 34.36894226074219, + "step": 125 + }, + { + "ce_ib": 54.98883819580078, + "ce_orig": 0.7524531483650208, + "epoch": 0.03623553095118269, + "kl_loss": 3311.05419921875, + "loss_ib": 33.16552734375, + "step": 126 + }, + { + "ce_ib": 54.45235061645508, + "ce_orig": 1.1706984043121338, + "epoch": 0.03623553095118269, + "kl_loss": 2987.413818359375, + "loss_ib": 29.928590774536133, + "step": 126 + }, + { + "ce_ib": 56.98481369018555, + "ce_orig": 1.6676733493804932, + "epoch": 0.03623553095118269, + "kl_loss": 2663.32080078125, + "loss_ib": 26.69019317626953, + "step": 126 + }, + { + "ce_ib": 55.758174896240234, + "ce_orig": 1.054375410079956, + "epoch": 0.03623553095118269, + "kl_loss": 3309.024658203125, + "loss_ib": 33.14600372314453, + "step": 126 + }, + { + "ce_ib": 56.77848815917969, + "ce_orig": 1.3328591585159302, + "epoch": 0.03652311453016033, + "kl_loss": 3304.2041015625, + "loss_ib": 33.09881591796875, + "step": 127 + }, + { + "ce_ib": 57.968807220458984, + "ce_orig": 1.372467041015625, + "epoch": 0.03652311453016033, + "kl_loss": 3255.86376953125, + "loss_ib": 32.61660385131836, + "step": 127 + }, + { + "ce_ib": 53.87700653076172, + "ce_orig": 0.602317214012146, + "epoch": 0.03652311453016033, + "kl_loss": 2769.507080078125, + "loss_ib": 27.748947143554688, + "step": 127 + }, + { + "ce_ib": 57.150821685791016, + "ce_orig": 1.5336121320724487, + "epoch": 0.03652311453016033, + "kl_loss": 3108.03857421875, + "loss_ib": 31.137535095214844, + "step": 127 + }, + { + "ce_ib": 55.34831619262695, + "ce_orig": 1.2944082021713257, + "epoch": 0.036810698109137965, + "kl_loss": 3049.82568359375, + "loss_ib": 30.553606033325195, + "step": 128 + }, + { + "ce_ib": 55.08821105957031, + "ce_orig": 1.2756754159927368, + "epoch": 0.036810698109137965, + "kl_loss": 3294.335205078125, + "loss_ib": 32.99843978881836, + "step": 128 + }, + { + "ce_ib": 56.542110443115234, + "ce_orig": 1.3169299364089966, + "epoch": 0.036810698109137965, + "kl_loss": 3289.144287109375, + "loss_ib": 32.9479866027832, + "step": 128 + }, + { + "ce_ib": 54.76836395263672, + "ce_orig": 0.9867354035377502, + "epoch": 0.036810698109137965, + "kl_loss": 3241.2333984375, + "loss_ib": 32.46710205078125, + "step": 128 + }, + { + "ce_ib": 54.50560760498047, + "ce_orig": 0.9337356090545654, + "epoch": 0.03709828168811561, + "kl_loss": 3071.6708984375, + "loss_ib": 30.77121353149414, + "step": 129 + }, + { + "ce_ib": 56.29538345336914, + "ce_orig": 1.379828691482544, + "epoch": 0.03709828168811561, + "kl_loss": 2586.766845703125, + "loss_ib": 25.92396354675293, + "step": 129 + }, + { + "ce_ib": 54.395912170410156, + "ce_orig": 0.26942014694213867, + "epoch": 0.03709828168811561, + "kl_loss": 3214.80029296875, + "loss_ib": 32.20240020751953, + "step": 129 + }, + { + "ce_ib": 53.4327392578125, + "ce_orig": 1.03145432472229, + "epoch": 0.03709828168811561, + "kl_loss": 3447.09765625, + "loss_ib": 34.524410247802734, + "step": 129 + }, + { + "epoch": 0.03738586526709325, + "grad_norm": 485.5062255859375, + "learning_rate": 3.949044585987262e-06, + "loss": 32.6015, + "step": 130 + }, + { + "ce_ib": 57.03399658203125, + "ce_orig": 1.2322174310684204, + "epoch": 0.03738586526709325, + "kl_loss": 3191.55419921875, + "loss_ib": 31.97257423400879, + "step": 130 + }, + { + "ce_ib": 52.21621322631836, + "ce_orig": 0.743526816368103, + "epoch": 0.03738586526709325, + "kl_loss": 3611.2763671875, + "loss_ib": 36.16497802734375, + "step": 130 + }, + { + "ce_ib": 58.345272064208984, + "ce_orig": 1.358251929283142, + "epoch": 0.03738586526709325, + "kl_loss": 3283.64990234375, + "loss_ib": 32.89484405517578, + "step": 130 + }, + { + "ce_ib": 56.030879974365234, + "ce_orig": 0.6732674241065979, + "epoch": 0.03738586526709325, + "kl_loss": 3234.9384765625, + "loss_ib": 32.40541458129883, + "step": 130 + }, + { + "ce_ib": 57.00092697143555, + "ce_orig": 0.9541739225387573, + "epoch": 0.03767344884607089, + "kl_loss": 3000.126708984375, + "loss_ib": 30.05826759338379, + "step": 131 + }, + { + "ce_ib": 56.356407165527344, + "ce_orig": 1.652900218963623, + "epoch": 0.03767344884607089, + "kl_loss": 2972.48486328125, + "loss_ib": 29.781206130981445, + "step": 131 + }, + { + "ce_ib": 56.224830627441406, + "ce_orig": 1.5765421390533447, + "epoch": 0.03767344884607089, + "kl_loss": 3185.729248046875, + "loss_ib": 31.913516998291016, + "step": 131 + }, + { + "ce_ib": 54.61565017700195, + "ce_orig": 0.7351348996162415, + "epoch": 0.03767344884607089, + "kl_loss": 3193.2255859375, + "loss_ib": 31.98687171936035, + "step": 131 + }, + { + "ce_ib": 54.55472183227539, + "ce_orig": 1.086585283279419, + "epoch": 0.03796103242504853, + "kl_loss": 3093.6298828125, + "loss_ib": 30.990854263305664, + "step": 132 + }, + { + "ce_ib": 52.542877197265625, + "ce_orig": 1.0202161073684692, + "epoch": 0.03796103242504853, + "kl_loss": 3178.2353515625, + "loss_ib": 31.834896087646484, + "step": 132 + }, + { + "ce_ib": 58.19728088378906, + "ce_orig": 1.5428324937820435, + "epoch": 0.03796103242504853, + "kl_loss": 3120.078857421875, + "loss_ib": 31.25898551940918, + "step": 132 + }, + { + "ce_ib": 55.14646911621094, + "ce_orig": 1.1489073038101196, + "epoch": 0.03796103242504853, + "kl_loss": 3333.323974609375, + "loss_ib": 33.38838577270508, + "step": 132 + }, + { + "ce_ib": 58.42383575439453, + "ce_orig": 1.3307501077651978, + "epoch": 0.03824861600402617, + "kl_loss": 3119.617919921875, + "loss_ib": 31.254600524902344, + "step": 133 + }, + { + "ce_ib": 53.10658645629883, + "ce_orig": 0.47057151794433594, + "epoch": 0.03824861600402617, + "kl_loss": 2581.533203125, + "loss_ib": 25.868436813354492, + "step": 133 + }, + { + "ce_ib": 55.19010543823242, + "ce_orig": 1.461709976196289, + "epoch": 0.03824861600402617, + "kl_loss": 3218.223876953125, + "loss_ib": 32.2374267578125, + "step": 133 + }, + { + "ce_ib": 55.269439697265625, + "ce_orig": 0.9877228140830994, + "epoch": 0.03824861600402617, + "kl_loss": 3257.63916015625, + "loss_ib": 32.63166046142578, + "step": 133 + }, + { + "ce_ib": 54.13009262084961, + "ce_orig": 1.1180518865585327, + "epoch": 0.03853619958300381, + "kl_loss": 3161.342529296875, + "loss_ib": 31.66755485534668, + "step": 134 + }, + { + "ce_ib": 51.55915069580078, + "ce_orig": 0.476241797208786, + "epoch": 0.03853619958300381, + "kl_loss": 3272.551025390625, + "loss_ib": 32.777069091796875, + "step": 134 + }, + { + "ce_ib": 51.68458557128906, + "ce_orig": 0.78632652759552, + "epoch": 0.03853619958300381, + "kl_loss": 3286.03662109375, + "loss_ib": 32.91204833984375, + "step": 134 + }, + { + "ce_ib": 52.53611373901367, + "ce_orig": 0.601127564907074, + "epoch": 0.03853619958300381, + "kl_loss": 3292.48193359375, + "loss_ib": 32.97735595703125, + "step": 134 + }, + { + "epoch": 0.03882378316198145, + "grad_norm": 484.1724853515625, + "learning_rate": 4.10828025477707e-06, + "loss": 32.4331, + "step": 135 + }, + { + "ce_ib": 52.33879089355469, + "ce_orig": 0.9939437508583069, + "epoch": 0.03882378316198145, + "kl_loss": 3350.6103515625, + "loss_ib": 33.558441162109375, + "step": 135 + }, + { + "ce_ib": 57.75893783569336, + "ce_orig": 1.900199055671692, + "epoch": 0.03882378316198145, + "kl_loss": 3010.15771484375, + "loss_ib": 30.15933609008789, + "step": 135 + }, + { + "ce_ib": 52.984676361083984, + "ce_orig": 1.0698907375335693, + "epoch": 0.03882378316198145, + "kl_loss": 3073.521484375, + "loss_ib": 30.78820037841797, + "step": 135 + }, + { + "ce_ib": 52.373348236083984, + "ce_orig": 0.7963224649429321, + "epoch": 0.03882378316198145, + "kl_loss": 2985.98486328125, + "loss_ib": 29.912221908569336, + "step": 135 + }, + { + "ce_ib": 52.30683517456055, + "ce_orig": 0.7155612707138062, + "epoch": 0.039111366740959094, + "kl_loss": 3103.802978515625, + "loss_ib": 31.090335845947266, + "step": 136 + }, + { + "ce_ib": 53.33190155029297, + "ce_orig": 0.7446028590202332, + "epoch": 0.039111366740959094, + "kl_loss": 3185.09228515625, + "loss_ib": 31.904254913330078, + "step": 136 + }, + { + "ce_ib": 52.83795928955078, + "ce_orig": 0.77790367603302, + "epoch": 0.039111366740959094, + "kl_loss": 2887.193359375, + "loss_ib": 28.92477035522461, + "step": 136 + }, + { + "ce_ib": 54.76385498046875, + "ce_orig": 0.3635737895965576, + "epoch": 0.039111366740959094, + "kl_loss": 1756.336669921875, + "loss_ib": 17.618131637573242, + "step": 136 + }, + { + "ce_ib": 55.12189865112305, + "ce_orig": 1.2548986673355103, + "epoch": 0.03939895031993673, + "kl_loss": 3142.7470703125, + "loss_ib": 31.48259162902832, + "step": 137 + }, + { + "ce_ib": 52.87950897216797, + "ce_orig": 0.8208221793174744, + "epoch": 0.03939895031993673, + "kl_loss": 3317.441650390625, + "loss_ib": 33.227294921875, + "step": 137 + }, + { + "ce_ib": 53.339599609375, + "ce_orig": 0.7617712020874023, + "epoch": 0.03939895031993673, + "kl_loss": 3318.7333984375, + "loss_ib": 33.24067306518555, + "step": 137 + }, + { + "ce_ib": 52.08175277709961, + "ce_orig": 0.8632348775863647, + "epoch": 0.03939895031993673, + "kl_loss": 2449.124267578125, + "loss_ib": 24.543325424194336, + "step": 137 + }, + { + "ce_ib": 54.37158203125, + "ce_orig": 1.257277250289917, + "epoch": 0.03968653389891437, + "kl_loss": 3025.6142578125, + "loss_ib": 30.310514450073242, + "step": 138 + }, + { + "ce_ib": 54.39937210083008, + "ce_orig": 0.8799593448638916, + "epoch": 0.03968653389891437, + "kl_loss": 3261.203857421875, + "loss_ib": 32.666439056396484, + "step": 138 + }, + { + "ce_ib": 53.31037902832031, + "ce_orig": 0.5872792601585388, + "epoch": 0.03968653389891437, + "kl_loss": 2659.81982421875, + "loss_ib": 26.651508331298828, + "step": 138 + }, + { + "ce_ib": 52.60165786743164, + "ce_orig": 0.9464865922927856, + "epoch": 0.03968653389891437, + "kl_loss": 3371.40185546875, + "loss_ib": 33.76662063598633, + "step": 138 + }, + { + "ce_ib": 52.575260162353516, + "ce_orig": 0.46206337213516235, + "epoch": 0.039974117477892014, + "kl_loss": 2850.687255859375, + "loss_ib": 28.5594482421875, + "step": 139 + }, + { + "ce_ib": 53.40571975708008, + "ce_orig": 0.6606719493865967, + "epoch": 0.039974117477892014, + "kl_loss": 2885.156005859375, + "loss_ib": 28.904964447021484, + "step": 139 + }, + { + "ce_ib": 54.047019958496094, + "ce_orig": 1.0017621517181396, + "epoch": 0.039974117477892014, + "kl_loss": 2731.3134765625, + "loss_ib": 27.3671817779541, + "step": 139 + }, + { + "ce_ib": 53.62920379638672, + "ce_orig": 0.9739691615104675, + "epoch": 0.039974117477892014, + "kl_loss": 3102.27099609375, + "loss_ib": 31.076339721679688, + "step": 139 + }, + { + "epoch": 0.04026170105686965, + "grad_norm": 459.9145202636719, + "learning_rate": 4.26751592356688e-06, + "loss": 31.4281, + "step": 140 + }, + { + "ce_ib": 54.69853973388672, + "ce_orig": 1.1423823833465576, + "epoch": 0.04026170105686965, + "kl_loss": 3194.418701171875, + "loss_ib": 31.998886108398438, + "step": 140 + }, + { + "ce_ib": 50.82587432861328, + "ce_orig": 0.6784489750862122, + "epoch": 0.04026170105686965, + "kl_loss": 3204.444091796875, + "loss_ib": 32.09526443481445, + "step": 140 + }, + { + "ce_ib": 51.895416259765625, + "ce_orig": 0.7560833096504211, + "epoch": 0.04026170105686965, + "kl_loss": 3291.0400390625, + "loss_ib": 32.96229553222656, + "step": 140 + }, + { + "ce_ib": 51.76190948486328, + "ce_orig": 0.8880281448364258, + "epoch": 0.04026170105686965, + "kl_loss": 3192.113037109375, + "loss_ib": 31.97289276123047, + "step": 140 + }, + { + "ce_ib": 55.869300842285156, + "ce_orig": 1.4632692337036133, + "epoch": 0.040549284635847296, + "kl_loss": 3255.39404296875, + "loss_ib": 32.60980987548828, + "step": 141 + }, + { + "ce_ib": 51.57696533203125, + "ce_orig": 1.1582895517349243, + "epoch": 0.040549284635847296, + "kl_loss": 3429.54150390625, + "loss_ib": 34.34699249267578, + "step": 141 + }, + { + "ce_ib": 53.07311248779297, + "ce_orig": 1.187181830406189, + "epoch": 0.040549284635847296, + "kl_loss": 3198.211669921875, + "loss_ib": 32.03519058227539, + "step": 141 + }, + { + "ce_ib": 55.36647415161133, + "ce_orig": 1.443948745727539, + "epoch": 0.040549284635847296, + "kl_loss": 2707.63525390625, + "loss_ib": 27.131717681884766, + "step": 141 + }, + { + "ce_ib": 53.37755584716797, + "ce_orig": 1.1925128698349, + "epoch": 0.040836868214824934, + "kl_loss": 2913.3408203125, + "loss_ib": 29.186784744262695, + "step": 142 + }, + { + "ce_ib": 50.930152893066406, + "ce_orig": 0.631533682346344, + "epoch": 0.040836868214824934, + "kl_loss": 3136.6171875, + "loss_ib": 31.41710090637207, + "step": 142 + }, + { + "ce_ib": 54.71884536743164, + "ce_orig": 0.2883818447589874, + "epoch": 0.040836868214824934, + "kl_loss": 1113.8369140625, + "loss_ib": 11.19308853149414, + "step": 142 + }, + { + "ce_ib": 53.51261901855469, + "ce_orig": 0.9670610427856445, + "epoch": 0.040836868214824934, + "kl_loss": 3197.89208984375, + "loss_ib": 32.032432556152344, + "step": 142 + }, + { + "ce_ib": 53.469810485839844, + "ce_orig": 0.9970093965530396, + "epoch": 0.04112445179380257, + "kl_loss": 3185.8349609375, + "loss_ib": 31.91181755065918, + "step": 143 + }, + { + "ce_ib": 52.19078826904297, + "ce_orig": 0.9311832785606384, + "epoch": 0.04112445179380257, + "kl_loss": 3028.153076171875, + "loss_ib": 30.33371925354004, + "step": 143 + }, + { + "ce_ib": 54.6331901550293, + "ce_orig": 1.481334924697876, + "epoch": 0.04112445179380257, + "kl_loss": 2883.3681640625, + "loss_ib": 28.888315200805664, + "step": 143 + }, + { + "ce_ib": 54.52168273925781, + "ce_orig": 1.2398836612701416, + "epoch": 0.04112445179380257, + "kl_loss": 2991.29931640625, + "loss_ib": 29.967514038085938, + "step": 143 + }, + { + "ce_ib": 55.38058090209961, + "ce_orig": 1.4223779439926147, + "epoch": 0.041412035372780216, + "kl_loss": 2755.46337890625, + "loss_ib": 27.610013961791992, + "step": 144 + }, + { + "ce_ib": 52.242977142333984, + "ce_orig": 0.357547402381897, + "epoch": 0.041412035372780216, + "kl_loss": 2976.813232421875, + "loss_ib": 29.820375442504883, + "step": 144 + }, + { + "ce_ib": 50.533851623535156, + "ce_orig": 0.5890440940856934, + "epoch": 0.041412035372780216, + "kl_loss": 3232.82861328125, + "loss_ib": 32.37881851196289, + "step": 144 + }, + { + "ce_ib": 50.53768539428711, + "ce_orig": 0.7130187153816223, + "epoch": 0.041412035372780216, + "kl_loss": 3210.93310546875, + "loss_ib": 32.15986633300781, + "step": 144 + }, + { + "epoch": 0.041699618951757854, + "grad_norm": 441.0871887207031, + "learning_rate": 4.426751592356688e-06, + "loss": 31.0274, + "step": 145 + }, + { + "ce_ib": 54.21467208862305, + "ce_orig": 1.196142315864563, + "epoch": 0.041699618951757854, + "kl_loss": 3077.82958984375, + "loss_ib": 30.832509994506836, + "step": 145 + }, + { + "ce_ib": 52.88565444946289, + "ce_orig": 0.6000714898109436, + "epoch": 0.041699618951757854, + "kl_loss": 3106.078369140625, + "loss_ib": 31.11366844177246, + "step": 145 + }, + { + "ce_ib": 52.295101165771484, + "ce_orig": 0.775937020778656, + "epoch": 0.041699618951757854, + "kl_loss": 3244.89892578125, + "loss_ib": 32.50128173828125, + "step": 145 + }, + { + "ce_ib": 52.64463424682617, + "ce_orig": 1.2740693092346191, + "epoch": 0.041699618951757854, + "kl_loss": 2585.204833984375, + "loss_ib": 25.904691696166992, + "step": 145 + }, + { + "ce_ib": 49.40985870361328, + "ce_orig": 0.8610404133796692, + "epoch": 0.0419872025307355, + "kl_loss": 3257.540771484375, + "loss_ib": 32.62481689453125, + "step": 146 + }, + { + "ce_ib": 52.093650817871094, + "ce_orig": 0.6095184683799744, + "epoch": 0.0419872025307355, + "kl_loss": 2127.02783203125, + "loss_ib": 21.322372436523438, + "step": 146 + }, + { + "ce_ib": 50.681373596191406, + "ce_orig": 0.5939872860908508, + "epoch": 0.0419872025307355, + "kl_loss": 3274.1162109375, + "loss_ib": 32.79184341430664, + "step": 146 + }, + { + "ce_ib": 52.19261169433594, + "ce_orig": 1.2758476734161377, + "epoch": 0.0419872025307355, + "kl_loss": 3013.127685546875, + "loss_ib": 30.183467864990234, + "step": 146 + }, + { + "ce_ib": 53.93763732910156, + "ce_orig": 1.4467533826828003, + "epoch": 0.042274786109713136, + "kl_loss": 2893.376953125, + "loss_ib": 28.987707138061523, + "step": 147 + }, + { + "ce_ib": 50.615875244140625, + "ce_orig": 0.8429998159408569, + "epoch": 0.042274786109713136, + "kl_loss": 3275.34423828125, + "loss_ib": 32.80405807495117, + "step": 147 + }, + { + "ce_ib": 51.70206832885742, + "ce_orig": 0.6785926222801208, + "epoch": 0.042274786109713136, + "kl_loss": 2999.413818359375, + "loss_ib": 30.045839309692383, + "step": 147 + }, + { + "ce_ib": 52.60684585571289, + "ce_orig": 0.7887744903564453, + "epoch": 0.042274786109713136, + "kl_loss": 2922.228515625, + "loss_ib": 29.274892807006836, + "step": 147 + }, + { + "ce_ib": 53.9986457824707, + "ce_orig": 1.4007996320724487, + "epoch": 0.042562369688690774, + "kl_loss": 2788.4990234375, + "loss_ib": 27.938987731933594, + "step": 148 + }, + { + "ce_ib": 51.32645034790039, + "ce_orig": 0.51473069190979, + "epoch": 0.042562369688690774, + "kl_loss": 3179.380126953125, + "loss_ib": 31.84512710571289, + "step": 148 + }, + { + "ce_ib": 50.38296127319336, + "ce_orig": 0.7060822248458862, + "epoch": 0.042562369688690774, + "kl_loss": 3014.38427734375, + "loss_ib": 30.194225311279297, + "step": 148 + }, + { + "ce_ib": 50.93301773071289, + "ce_orig": 0.6229360699653625, + "epoch": 0.042562369688690774, + "kl_loss": 2787.4912109375, + "loss_ib": 27.925844192504883, + "step": 148 + }, + { + "ce_ib": 50.93965148925781, + "ce_orig": 0.719894528388977, + "epoch": 0.04284995326766842, + "kl_loss": 3031.69287109375, + "loss_ib": 30.367868423461914, + "step": 149 + }, + { + "ce_ib": 54.47814178466797, + "ce_orig": 2.0223007202148438, + "epoch": 0.04284995326766842, + "kl_loss": 2659.58544921875, + "loss_ib": 26.650331497192383, + "step": 149 + }, + { + "ce_ib": 49.89313888549805, + "ce_orig": 0.8729849457740784, + "epoch": 0.04284995326766842, + "kl_loss": 2983.194580078125, + "loss_ib": 29.881837844848633, + "step": 149 + }, + { + "ce_ib": 52.88545608520508, + "ce_orig": 1.3256616592407227, + "epoch": 0.04284995326766842, + "kl_loss": 3132.82666015625, + "loss_ib": 31.38115119934082, + "step": 149 + }, + { + "epoch": 0.043137536846646056, + "grad_norm": 459.27581787109375, + "learning_rate": 4.585987261146497e-06, + "loss": 30.1884, + "step": 150 + }, + { + "ce_ib": 50.605384826660156, + "ce_orig": 1.0216200351715088, + "epoch": 0.043137536846646056, + "kl_loss": 3048.3466796875, + "loss_ib": 30.53407096862793, + "step": 150 + }, + { + "ce_ib": 52.01803207397461, + "ce_orig": 1.3228956460952759, + "epoch": 0.043137536846646056, + "kl_loss": 2962.606201171875, + "loss_ib": 29.67807960510254, + "step": 150 + }, + { + "ce_ib": 53.09737014770508, + "ce_orig": 0.548643946647644, + "epoch": 0.043137536846646056, + "kl_loss": 2902.078125, + "loss_ib": 29.07387924194336, + "step": 150 + }, + { + "ce_ib": 55.06275177001953, + "ce_orig": 1.2563978433609009, + "epoch": 0.043137536846646056, + "kl_loss": 3022.199951171875, + "loss_ib": 30.277061462402344, + "step": 150 + }, + { + "ce_ib": 51.54874801635742, + "ce_orig": 0.9161649942398071, + "epoch": 0.043425120425623694, + "kl_loss": 3167.26806640625, + "loss_ib": 31.724227905273438, + "step": 151 + }, + { + "ce_ib": 55.47027587890625, + "ce_orig": 2.167449712753296, + "epoch": 0.043425120425623694, + "kl_loss": 2913.7783203125, + "loss_ib": 29.193254470825195, + "step": 151 + }, + { + "ce_ib": 52.68526077270508, + "ce_orig": 0.6615663766860962, + "epoch": 0.043425120425623694, + "kl_loss": 2900.98974609375, + "loss_ib": 29.06258201599121, + "step": 151 + }, + { + "ce_ib": 52.60275650024414, + "ce_orig": 1.29401695728302, + "epoch": 0.043425120425623694, + "kl_loss": 2888.08935546875, + "loss_ib": 28.933494567871094, + "step": 151 + }, + { + "ce_ib": 52.2644157409668, + "ce_orig": 0.8575372695922852, + "epoch": 0.04371270400460134, + "kl_loss": 2958.155029296875, + "loss_ib": 29.63381576538086, + "step": 152 + }, + { + "ce_ib": 53.492984771728516, + "ce_orig": 1.9527488946914673, + "epoch": 0.04371270400460134, + "kl_loss": 2661.2138671875, + "loss_ib": 26.665632247924805, + "step": 152 + }, + { + "ce_ib": 51.27360153198242, + "ce_orig": 0.8329764604568481, + "epoch": 0.04371270400460134, + "kl_loss": 3091.322265625, + "loss_ib": 30.964496612548828, + "step": 152 + }, + { + "ce_ib": 50.547359466552734, + "ce_orig": 0.8471105694770813, + "epoch": 0.04371270400460134, + "kl_loss": 3160.12841796875, + "loss_ib": 31.651830673217773, + "step": 152 + }, + { + "ce_ib": 54.09409713745117, + "ce_orig": 1.3073227405548096, + "epoch": 0.044000287583578976, + "kl_loss": 2638.63916015625, + "loss_ib": 26.44048500061035, + "step": 153 + }, + { + "ce_ib": 51.90098571777344, + "ce_orig": 1.1256515979766846, + "epoch": 0.044000287583578976, + "kl_loss": 2839.703857421875, + "loss_ib": 28.44894027709961, + "step": 153 + }, + { + "ce_ib": 52.50508499145508, + "ce_orig": 1.0821564197540283, + "epoch": 0.044000287583578976, + "kl_loss": 2833.94873046875, + "loss_ib": 28.391990661621094, + "step": 153 + }, + { + "ce_ib": 50.86665344238281, + "ce_orig": 0.8515676259994507, + "epoch": 0.044000287583578976, + "kl_loss": 3101.07177734375, + "loss_ib": 31.06158447265625, + "step": 153 + }, + { + "ce_ib": 50.96135711669922, + "ce_orig": 0.7307798862457275, + "epoch": 0.04428787116255662, + "kl_loss": 2950.802490234375, + "loss_ib": 29.55898666381836, + "step": 154 + }, + { + "ce_ib": 48.63287353515625, + "ce_orig": 0.43459776043891907, + "epoch": 0.04428787116255662, + "kl_loss": 2904.9091796875, + "loss_ib": 29.09772300720215, + "step": 154 + }, + { + "ce_ib": 52.380332946777344, + "ce_orig": 1.771660566329956, + "epoch": 0.04428787116255662, + "kl_loss": 2897.521240234375, + "loss_ib": 29.027591705322266, + "step": 154 + }, + { + "ce_ib": 51.10393524169922, + "ce_orig": 0.7512515783309937, + "epoch": 0.04428787116255662, + "kl_loss": 2788.21533203125, + "loss_ib": 27.933256149291992, + "step": 154 + }, + { + "epoch": 0.04457545474153426, + "grad_norm": 448.2861022949219, + "learning_rate": 4.745222929936306e-06, + "loss": 30.0583, + "step": 155 + }, + { + "ce_ib": 49.585208892822266, + "ce_orig": 1.1462219953536987, + "epoch": 0.04457545474153426, + "kl_loss": 3172.34619140625, + "loss_ib": 31.773046493530273, + "step": 155 + }, + { + "ce_ib": 52.14706802368164, + "ce_orig": 1.3412156105041504, + "epoch": 0.04457545474153426, + "kl_loss": 2451.83447265625, + "loss_ib": 24.57048988342285, + "step": 155 + }, + { + "ce_ib": 50.7850341796875, + "ce_orig": 0.6238870620727539, + "epoch": 0.04457545474153426, + "kl_loss": 2603.59716796875, + "loss_ib": 26.08675765991211, + "step": 155 + }, + { + "ce_ib": 53.027767181396484, + "ce_orig": 1.0817673206329346, + "epoch": 0.04457545474153426, + "kl_loss": 2944.25390625, + "loss_ib": 29.49556541442871, + "step": 155 + }, + { + "ce_ib": 52.55374526977539, + "ce_orig": 0.7729134559631348, + "epoch": 0.044863038320511896, + "kl_loss": 2258.596435546875, + "loss_ib": 22.638517379760742, + "step": 156 + }, + { + "ce_ib": 50.43128967285156, + "ce_orig": 0.5304668545722961, + "epoch": 0.044863038320511896, + "kl_loss": 2332.14794921875, + "loss_ib": 23.371912002563477, + "step": 156 + }, + { + "ce_ib": 48.52753448486328, + "ce_orig": 0.8267412185668945, + "epoch": 0.044863038320511896, + "kl_loss": 2993.38134765625, + "loss_ib": 29.98233985900879, + "step": 156 + }, + { + "ce_ib": 47.48508834838867, + "ce_orig": 0.8616426587104797, + "epoch": 0.044863038320511896, + "kl_loss": 2922.7626953125, + "loss_ib": 29.27511215209961, + "step": 156 + }, + { + "ce_ib": 48.53927230834961, + "ce_orig": 0.7964724898338318, + "epoch": 0.04515062189948954, + "kl_loss": 2892.180419921875, + "loss_ib": 28.97034454345703, + "step": 157 + }, + { + "ce_ib": 48.53841018676758, + "ce_orig": 0.7484610080718994, + "epoch": 0.04515062189948954, + "kl_loss": 2939.5947265625, + "loss_ib": 29.44448471069336, + "step": 157 + }, + { + "ce_ib": 47.32265090942383, + "ce_orig": 0.5679759979248047, + "epoch": 0.04515062189948954, + "kl_loss": 3089.84228515625, + "loss_ib": 30.945743560791016, + "step": 157 + }, + { + "ce_ib": 50.060791015625, + "ce_orig": 0.9825291633605957, + "epoch": 0.04515062189948954, + "kl_loss": 2617.75341796875, + "loss_ib": 26.22759437561035, + "step": 157 + }, + { + "ce_ib": 54.85032272338867, + "ce_orig": 1.4325385093688965, + "epoch": 0.04543820547846718, + "kl_loss": 2687.5576171875, + "loss_ib": 26.93042755126953, + "step": 158 + }, + { + "ce_ib": 51.16709899902344, + "ce_orig": 1.104825735092163, + "epoch": 0.04543820547846718, + "kl_loss": 2728.729736328125, + "loss_ib": 27.338462829589844, + "step": 158 + }, + { + "ce_ib": 51.86174774169922, + "ce_orig": 1.2090163230895996, + "epoch": 0.04543820547846718, + "kl_loss": 2632.31298828125, + "loss_ib": 26.37499237060547, + "step": 158 + }, + { + "ce_ib": 50.25190734863281, + "ce_orig": 0.7247406244277954, + "epoch": 0.04543820547846718, + "kl_loss": 2597.2509765625, + "loss_ib": 26.02276039123535, + "step": 158 + }, + { + "ce_ib": 50.78385543823242, + "ce_orig": 0.8480434417724609, + "epoch": 0.04572578905744482, + "kl_loss": 2646.48876953125, + "loss_ib": 26.515670776367188, + "step": 159 + }, + { + "ce_ib": 52.23230743408203, + "ce_orig": 1.2253400087356567, + "epoch": 0.04572578905744482, + "kl_loss": 2775.04931640625, + "loss_ib": 27.802724838256836, + "step": 159 + }, + { + "ce_ib": 54.1110954284668, + "ce_orig": 1.6037498712539673, + "epoch": 0.04572578905744482, + "kl_loss": 2728.93798828125, + "loss_ib": 27.343490600585938, + "step": 159 + }, + { + "ce_ib": 49.18448257446289, + "ce_orig": 0.9601776599884033, + "epoch": 0.04572578905744482, + "kl_loss": 2112.19775390625, + "loss_ib": 21.171161651611328, + "step": 159 + }, + { + "epoch": 0.04601337263642246, + "grad_norm": 445.6031799316406, + "learning_rate": 4.904458598726115e-06, + "loss": 28.9221, + "step": 160 + }, + { + "ce_ib": 50.62841033935547, + "ce_orig": 0.9100584387779236, + "epoch": 0.04601337263642246, + "kl_loss": 2742.574462890625, + "loss_ib": 27.47637176513672, + "step": 160 + }, + { + "ce_ib": 51.763214111328125, + "ce_orig": 1.2738044261932373, + "epoch": 0.04601337263642246, + "kl_loss": 2890.77099609375, + "loss_ib": 28.95947265625, + "step": 160 + }, + { + "ce_ib": 48.35606384277344, + "ce_orig": 0.8663270473480225, + "epoch": 0.04601337263642246, + "kl_loss": 2612.95361328125, + "loss_ib": 26.177892684936523, + "step": 160 + }, + { + "ce_ib": 48.03151321411133, + "ce_orig": 1.0263557434082031, + "epoch": 0.04601337263642246, + "kl_loss": 3139.1376953125, + "loss_ib": 31.439409255981445, + "step": 160 + }, + { + "ce_ib": 48.87652587890625, + "ce_orig": 0.6928001046180725, + "epoch": 0.0463009562154001, + "kl_loss": 2784.03466796875, + "loss_ib": 27.889223098754883, + "step": 161 + }, + { + "ce_ib": 48.439903259277344, + "ce_orig": 1.0835696458816528, + "epoch": 0.0463009562154001, + "kl_loss": 3102.0654296875, + "loss_ib": 31.069093704223633, + "step": 161 + }, + { + "ce_ib": 50.39353561401367, + "ce_orig": 0.7698415517807007, + "epoch": 0.0463009562154001, + "kl_loss": 1483.0360107421875, + "loss_ib": 14.880752563476562, + "step": 161 + }, + { + "ce_ib": 51.16973876953125, + "ce_orig": 1.4212281703948975, + "epoch": 0.0463009562154001, + "kl_loss": 2305.23681640625, + "loss_ib": 23.10353660583496, + "step": 161 + }, + { + "ce_ib": 49.88762283325195, + "ce_orig": 0.6684384346008301, + "epoch": 0.04658853979437774, + "kl_loss": 2829.216796875, + "loss_ib": 28.34205436706543, + "step": 162 + }, + { + "ce_ib": 47.18947219848633, + "ce_orig": 1.0846861600875854, + "epoch": 0.04658853979437774, + "kl_loss": 2960.41796875, + "loss_ib": 29.651369094848633, + "step": 162 + }, + { + "ce_ib": 48.81897735595703, + "ce_orig": 0.5415892004966736, + "epoch": 0.04658853979437774, + "kl_loss": 2178.2265625, + "loss_ib": 21.831083297729492, + "step": 162 + }, + { + "ce_ib": 48.079383850097656, + "ce_orig": 0.9551630020141602, + "epoch": 0.04658853979437774, + "kl_loss": 3026.81884765625, + "loss_ib": 30.316267013549805, + "step": 162 + }, + { + "ce_ib": 48.57440185546875, + "ce_orig": 0.8759099245071411, + "epoch": 0.04687612337335538, + "kl_loss": 2439.7685546875, + "loss_ib": 24.446258544921875, + "step": 163 + }, + { + "ce_ib": 49.86715316772461, + "ce_orig": 1.0347627401351929, + "epoch": 0.04687612337335538, + "kl_loss": 2655.91357421875, + "loss_ib": 26.6090030670166, + "step": 163 + }, + { + "ce_ib": 49.29155731201172, + "ce_orig": 0.8745110034942627, + "epoch": 0.04687612337335538, + "kl_loss": 2898.206298828125, + "loss_ib": 29.031354904174805, + "step": 163 + }, + { + "ce_ib": 50.21908950805664, + "ce_orig": 1.458777904510498, + "epoch": 0.04687612337335538, + "kl_loss": 2556.103515625, + "loss_ib": 25.61125373840332, + "step": 163 + }, + { + "ce_ib": 48.91142654418945, + "ce_orig": 0.6749159693717957, + "epoch": 0.047163706952333025, + "kl_loss": 2708.091796875, + "loss_ib": 27.12982940673828, + "step": 164 + }, + { + "ce_ib": 50.982364654541016, + "ce_orig": 0.6340957283973694, + "epoch": 0.047163706952333025, + "kl_loss": 2460.154296875, + "loss_ib": 24.652523040771484, + "step": 164 + }, + { + "ce_ib": 51.6622428894043, + "ce_orig": 0.8664228320121765, + "epoch": 0.047163706952333025, + "kl_loss": 2626.44091796875, + "loss_ib": 26.316070556640625, + "step": 164 + }, + { + "ce_ib": 48.696929931640625, + "ce_orig": 0.46712541580200195, + "epoch": 0.047163706952333025, + "kl_loss": 2673.508544921875, + "loss_ib": 26.783781051635742, + "step": 164 + }, + { + "epoch": 0.04745129053131066, + "grad_norm": 424.326904296875, + "learning_rate": 5.063694267515924e-06, + "loss": 28.6858, + "step": 165 + }, + { + "ce_ib": 47.32035827636719, + "ce_orig": 0.7065096497535706, + "epoch": 0.04745129053131066, + "kl_loss": 2612.828857421875, + "loss_ib": 26.175607681274414, + "step": 165 + }, + { + "ce_ib": 53.695343017578125, + "ce_orig": 0.9257593154907227, + "epoch": 0.04745129053131066, + "kl_loss": 2765.24658203125, + "loss_ib": 27.706161499023438, + "step": 165 + }, + { + "ce_ib": 46.061485290527344, + "ce_orig": 0.6095507740974426, + "epoch": 0.04745129053131066, + "kl_loss": 2924.33349609375, + "loss_ib": 29.289396286010742, + "step": 165 + }, + { + "ce_ib": 49.95652770996094, + "ce_orig": 0.39805570244789124, + "epoch": 0.04745129053131066, + "kl_loss": 2691.94287109375, + "loss_ib": 26.969383239746094, + "step": 165 + }, + { + "ce_ib": 48.33354187011719, + "ce_orig": 1.066157579421997, + "epoch": 0.0477388741102883, + "kl_loss": 2646.9306640625, + "loss_ib": 26.51763916015625, + "step": 166 + }, + { + "ce_ib": 44.15623474121094, + "ce_orig": 0.12060567736625671, + "epoch": 0.0477388741102883, + "kl_loss": 1492.6593017578125, + "loss_ib": 14.970748901367188, + "step": 166 + }, + { + "ce_ib": 49.3094367980957, + "ce_orig": 1.4275071620941162, + "epoch": 0.0477388741102883, + "kl_loss": 2720.3349609375, + "loss_ib": 27.25265884399414, + "step": 166 + }, + { + "ce_ib": 48.710384368896484, + "ce_orig": 0.6945171356201172, + "epoch": 0.0477388741102883, + "kl_loss": 2963.2177734375, + "loss_ib": 29.68088722229004, + "step": 166 + }, + { + "ce_ib": 50.00297927856445, + "ce_orig": 1.062921404838562, + "epoch": 0.048026457689265944, + "kl_loss": 2493.588134765625, + "loss_ib": 24.985883712768555, + "step": 167 + }, + { + "ce_ib": 48.40403366088867, + "ce_orig": 0.8593358397483826, + "epoch": 0.048026457689265944, + "kl_loss": 2742.294921875, + "loss_ib": 27.471351623535156, + "step": 167 + }, + { + "ce_ib": 46.95838165283203, + "ce_orig": 0.5543254017829895, + "epoch": 0.048026457689265944, + "kl_loss": 2647.65576171875, + "loss_ib": 26.523515701293945, + "step": 167 + }, + { + "ce_ib": 48.49465560913086, + "ce_orig": 0.9735248684883118, + "epoch": 0.048026457689265944, + "kl_loss": 2853.06201171875, + "loss_ib": 28.579113006591797, + "step": 167 + }, + { + "ce_ib": 47.27997589111328, + "ce_orig": 0.986024022102356, + "epoch": 0.04831404126824358, + "kl_loss": 2900.328369140625, + "loss_ib": 29.05056381225586, + "step": 168 + }, + { + "ce_ib": 47.31760025024414, + "ce_orig": 0.7703031897544861, + "epoch": 0.04831404126824358, + "kl_loss": 2637.726318359375, + "loss_ib": 26.424579620361328, + "step": 168 + }, + { + "ce_ib": 48.37574768066406, + "ce_orig": 0.9344309568405151, + "epoch": 0.04831404126824358, + "kl_loss": 2614.62939453125, + "loss_ib": 26.194669723510742, + "step": 168 + }, + { + "ce_ib": 46.49268341064453, + "ce_orig": 0.7439426183700562, + "epoch": 0.04831404126824358, + "kl_loss": 2807.43408203125, + "loss_ib": 28.120832443237305, + "step": 168 + }, + { + "ce_ib": 48.985836029052734, + "ce_orig": 1.2297948598861694, + "epoch": 0.04860162484722123, + "kl_loss": 2628.673828125, + "loss_ib": 26.335723876953125, + "step": 169 + }, + { + "ce_ib": 47.281558990478516, + "ce_orig": 1.0321601629257202, + "epoch": 0.04860162484722123, + "kl_loss": 2619.60400390625, + "loss_ib": 26.243322372436523, + "step": 169 + }, + { + "ce_ib": 47.78618621826172, + "ce_orig": 1.0097578763961792, + "epoch": 0.04860162484722123, + "kl_loss": 2793.48046875, + "loss_ib": 27.982589721679688, + "step": 169 + }, + { + "ce_ib": 47.44657897949219, + "ce_orig": 1.0025876760482788, + "epoch": 0.04860162484722123, + "kl_loss": 2977.064453125, + "loss_ib": 29.818090438842773, + "step": 169 + }, + { + "epoch": 0.048889208426198864, + "grad_norm": 426.09820556640625, + "learning_rate": 5.222929936305733e-06, + "loss": 27.4107, + "step": 170 + }, + { + "ce_ib": 48.28227996826172, + "ce_orig": 1.2277311086654663, + "epoch": 0.048889208426198864, + "kl_loss": 2577.318359375, + "loss_ib": 25.82146453857422, + "step": 170 + }, + { + "ce_ib": 48.456058502197266, + "ce_orig": 1.0684230327606201, + "epoch": 0.048889208426198864, + "kl_loss": 2181.768798828125, + "loss_ib": 21.86614418029785, + "step": 170 + }, + { + "ce_ib": 46.85710906982422, + "ce_orig": 0.6369960308074951, + "epoch": 0.048889208426198864, + "kl_loss": 2463.142578125, + "loss_ib": 24.67828369140625, + "step": 170 + }, + { + "ce_ib": 50.36212921142578, + "ce_orig": 1.4817943572998047, + "epoch": 0.048889208426198864, + "kl_loss": 2810.15185546875, + "loss_ib": 28.151878356933594, + "step": 170 + }, + { + "ce_ib": 52.663734436035156, + "ce_orig": 1.3452659845352173, + "epoch": 0.0491767920051765, + "kl_loss": 2626.7080078125, + "loss_ib": 26.31974220275879, + "step": 171 + }, + { + "ce_ib": 44.656044006347656, + "ce_orig": 0.6703915596008301, + "epoch": 0.0491767920051765, + "kl_loss": 2806.165771484375, + "loss_ib": 28.106313705444336, + "step": 171 + }, + { + "ce_ib": 49.89923858642578, + "ce_orig": 0.8543052673339844, + "epoch": 0.0491767920051765, + "kl_loss": 2713.99462890625, + "loss_ib": 27.18984603881836, + "step": 171 + }, + { + "ce_ib": 43.8620719909668, + "ce_orig": 0.943006157875061, + "epoch": 0.0491767920051765, + "kl_loss": 2850.892578125, + "loss_ib": 28.55278778076172, + "step": 171 + }, + { + "ce_ib": 49.70503234863281, + "ce_orig": 1.0421463251113892, + "epoch": 0.04946437558415415, + "kl_loss": 2539.814453125, + "loss_ib": 25.44784927368164, + "step": 172 + }, + { + "ce_ib": 46.116146087646484, + "ce_orig": 0.8185178637504578, + "epoch": 0.04946437558415415, + "kl_loss": 2844.80224609375, + "loss_ib": 28.494136810302734, + "step": 172 + }, + { + "ce_ib": 48.9494514465332, + "ce_orig": 1.0492463111877441, + "epoch": 0.04946437558415415, + "kl_loss": 2648.58056640625, + "loss_ib": 26.53475570678711, + "step": 172 + }, + { + "ce_ib": 45.931827545166016, + "ce_orig": 0.6465027332305908, + "epoch": 0.04946437558415415, + "kl_loss": 2837.117919921875, + "loss_ib": 28.417110443115234, + "step": 172 + }, + { + "ce_ib": 46.874114990234375, + "ce_orig": 0.8541361093521118, + "epoch": 0.049751959163131784, + "kl_loss": 2705.7646484375, + "loss_ib": 27.104520797729492, + "step": 173 + }, + { + "ce_ib": 49.18909454345703, + "ce_orig": 0.8849540948867798, + "epoch": 0.049751959163131784, + "kl_loss": 2525.669921875, + "loss_ib": 25.30588722229004, + "step": 173 + }, + { + "ce_ib": 49.24184036254883, + "ce_orig": 1.275468111038208, + "epoch": 0.049751959163131784, + "kl_loss": 2448.7724609375, + "loss_ib": 24.536964416503906, + "step": 173 + }, + { + "ce_ib": 48.2338981628418, + "ce_orig": 1.5680046081542969, + "epoch": 0.049751959163131784, + "kl_loss": 2418.929443359375, + "loss_ib": 24.23752784729004, + "step": 173 + }, + { + "ce_ib": 46.14396667480469, + "ce_orig": 0.541758120059967, + "epoch": 0.05003954274210943, + "kl_loss": 1417.190185546875, + "loss_ib": 14.218045234680176, + "step": 174 + }, + { + "ce_ib": 45.08213806152344, + "ce_orig": 1.0284433364868164, + "epoch": 0.05003954274210943, + "kl_loss": 2671.77490234375, + "loss_ib": 26.76283073425293, + "step": 174 + }, + { + "ce_ib": 47.65272903442383, + "ce_orig": 0.7605993151664734, + "epoch": 0.05003954274210943, + "kl_loss": 2763.138916015625, + "loss_ib": 27.67904281616211, + "step": 174 + }, + { + "ce_ib": 45.709381103515625, + "ce_orig": 0.6788672208786011, + "epoch": 0.05003954274210943, + "kl_loss": 2476.36376953125, + "loss_ib": 24.80934715270996, + "step": 174 + }, + { + "epoch": 0.050327126321087066, + "grad_norm": 413.482666015625, + "learning_rate": 5.3821656050955415e-06, + "loss": 27.0741, + "step": 175 + }, + { + "ce_ib": 50.374427795410156, + "ce_orig": 1.4046454429626465, + "epoch": 0.050327126321087066, + "kl_loss": 2682.432373046875, + "loss_ib": 26.874696731567383, + "step": 175 + }, + { + "ce_ib": 45.06935119628906, + "ce_orig": 0.6166799068450928, + "epoch": 0.050327126321087066, + "kl_loss": 2835.7060546875, + "loss_ib": 28.402128219604492, + "step": 175 + }, + { + "ce_ib": 48.812774658203125, + "ce_orig": 0.8136134147644043, + "epoch": 0.050327126321087066, + "kl_loss": 2307.1552734375, + "loss_ib": 23.120365142822266, + "step": 175 + }, + { + "ce_ib": 46.97080612182617, + "ce_orig": 0.8589736819267273, + "epoch": 0.050327126321087066, + "kl_loss": 2857.4111328125, + "loss_ib": 28.62108039855957, + "step": 175 + }, + { + "ce_ib": 47.42732238769531, + "ce_orig": 0.9090732336044312, + "epoch": 0.050614709900064704, + "kl_loss": 2458.442138671875, + "loss_ib": 24.631847381591797, + "step": 176 + }, + { + "ce_ib": 48.879520416259766, + "ce_orig": 1.1093182563781738, + "epoch": 0.050614709900064704, + "kl_loss": 2716.2275390625, + "loss_ib": 27.21115493774414, + "step": 176 + }, + { + "ce_ib": 45.72584915161133, + "ce_orig": 0.7976894378662109, + "epoch": 0.050614709900064704, + "kl_loss": 2734.431640625, + "loss_ib": 27.39004135131836, + "step": 176 + }, + { + "ce_ib": 48.863277435302734, + "ce_orig": 1.1800131797790527, + "epoch": 0.050614709900064704, + "kl_loss": 2549.98583984375, + "loss_ib": 25.54871940612793, + "step": 176 + }, + { + "ce_ib": 49.88660430908203, + "ce_orig": 1.3869460821151733, + "epoch": 0.05090229347904235, + "kl_loss": 2348.32080078125, + "loss_ib": 23.53309440612793, + "step": 177 + }, + { + "ce_ib": 46.74383544921875, + "ce_orig": 0.861750602722168, + "epoch": 0.05090229347904235, + "kl_loss": 2821.16943359375, + "loss_ib": 28.25843620300293, + "step": 177 + }, + { + "ce_ib": 46.5212516784668, + "ce_orig": 0.9110401272773743, + "epoch": 0.05090229347904235, + "kl_loss": 1920.51806640625, + "loss_ib": 19.25170135498047, + "step": 177 + }, + { + "ce_ib": 45.50922775268555, + "ce_orig": 0.762277364730835, + "epoch": 0.05090229347904235, + "kl_loss": 2755.8955078125, + "loss_ib": 27.60446548461914, + "step": 177 + }, + { + "ce_ib": 45.10780715942383, + "ce_orig": 0.9310659170150757, + "epoch": 0.051189877058019986, + "kl_loss": 2471.52685546875, + "loss_ib": 24.7603759765625, + "step": 178 + }, + { + "ce_ib": 46.319915771484375, + "ce_orig": 0.8169469237327576, + "epoch": 0.051189877058019986, + "kl_loss": 2594.20947265625, + "loss_ib": 25.988414764404297, + "step": 178 + }, + { + "ce_ib": 49.259620666503906, + "ce_orig": 1.4122384786605835, + "epoch": 0.051189877058019986, + "kl_loss": 2652.73583984375, + "loss_ib": 26.576618194580078, + "step": 178 + }, + { + "ce_ib": 47.23049545288086, + "ce_orig": 1.2408967018127441, + "epoch": 0.051189877058019986, + "kl_loss": 2727.913818359375, + "loss_ib": 27.32636833190918, + "step": 178 + }, + { + "ce_ib": 48.52485656738281, + "ce_orig": 0.561891496181488, + "epoch": 0.051477460636997624, + "kl_loss": 2130.169921875, + "loss_ib": 21.350223541259766, + "step": 179 + }, + { + "ce_ib": 51.518375396728516, + "ce_orig": 1.9220662117004395, + "epoch": 0.051477460636997624, + "kl_loss": 2634.577392578125, + "loss_ib": 26.39729118347168, + "step": 179 + }, + { + "ce_ib": 45.864845275878906, + "ce_orig": 0.9671883583068848, + "epoch": 0.051477460636997624, + "kl_loss": 2693.470458984375, + "loss_ib": 26.980567932128906, + "step": 179 + }, + { + "ce_ib": 45.78055191040039, + "ce_orig": 0.9218305349349976, + "epoch": 0.051477460636997624, + "kl_loss": 2529.023681640625, + "loss_ib": 25.336017608642578, + "step": 179 + }, + { + "epoch": 0.05176504421597527, + "grad_norm": 409.18316650390625, + "learning_rate": 5.541401273885351e-06, + "loss": 26.2136, + "step": 180 + }, + { + "ce_ib": 47.819671630859375, + "ce_orig": 1.0385197401046753, + "epoch": 0.05176504421597527, + "kl_loss": 2495.19580078125, + "loss_ib": 24.99977684020996, + "step": 180 + }, + { + "ce_ib": 48.391448974609375, + "ce_orig": 1.5398671627044678, + "epoch": 0.05176504421597527, + "kl_loss": 2459.814453125, + "loss_ib": 24.646535873413086, + "step": 180 + }, + { + "ce_ib": 45.70133590698242, + "ce_orig": 1.4346660375595093, + "epoch": 0.05176504421597527, + "kl_loss": 2470.22998046875, + "loss_ib": 24.74799919128418, + "step": 180 + }, + { + "ce_ib": 45.42562484741211, + "ce_orig": 1.027616262435913, + "epoch": 0.05176504421597527, + "kl_loss": 2735.41064453125, + "loss_ib": 27.3995304107666, + "step": 180 + }, + { + "ce_ib": 46.22211456298828, + "ce_orig": 1.2898683547973633, + "epoch": 0.052052627794952906, + "kl_loss": 2344.324462890625, + "loss_ib": 23.48946762084961, + "step": 181 + }, + { + "ce_ib": 45.933162689208984, + "ce_orig": 0.3807089924812317, + "epoch": 0.052052627794952906, + "kl_loss": 2405.65283203125, + "loss_ib": 24.102460861206055, + "step": 181 + }, + { + "ce_ib": 48.467613220214844, + "ce_orig": 1.0768738985061646, + "epoch": 0.052052627794952906, + "kl_loss": 2349.7158203125, + "loss_ib": 23.545623779296875, + "step": 181 + }, + { + "ce_ib": 43.66925811767578, + "ce_orig": 0.540174126625061, + "epoch": 0.052052627794952906, + "kl_loss": 2476.49609375, + "loss_ib": 24.808629989624023, + "step": 181 + }, + { + "ce_ib": 50.432796478271484, + "ce_orig": 1.4778696298599243, + "epoch": 0.05234021137393055, + "kl_loss": 2345.696533203125, + "loss_ib": 23.507396697998047, + "step": 182 + }, + { + "ce_ib": 44.97416687011719, + "ce_orig": 0.8222552537918091, + "epoch": 0.05234021137393055, + "kl_loss": 2430.04833984375, + "loss_ib": 24.345455169677734, + "step": 182 + }, + { + "ce_ib": 48.673431396484375, + "ce_orig": 1.0176633596420288, + "epoch": 0.05234021137393055, + "kl_loss": 2423.87109375, + "loss_ib": 24.287384033203125, + "step": 182 + }, + { + "ce_ib": 44.51708221435547, + "ce_orig": 0.6682273745536804, + "epoch": 0.05234021137393055, + "kl_loss": 2574.65380859375, + "loss_ib": 25.791053771972656, + "step": 182 + }, + { + "ce_ib": 46.51145935058594, + "ce_orig": 1.098725438117981, + "epoch": 0.05262779495290819, + "kl_loss": 2416.0830078125, + "loss_ib": 24.207340240478516, + "step": 183 + }, + { + "ce_ib": 48.851741790771484, + "ce_orig": 1.3513818979263306, + "epoch": 0.05262779495290819, + "kl_loss": 2428.113037109375, + "loss_ib": 24.32998275756836, + "step": 183 + }, + { + "ce_ib": 44.624210357666016, + "ce_orig": 0.6521418690681458, + "epoch": 0.05262779495290819, + "kl_loss": 2469.832275390625, + "loss_ib": 24.74294662475586, + "step": 183 + }, + { + "ce_ib": 48.94157791137695, + "ce_orig": 1.2012220621109009, + "epoch": 0.05262779495290819, + "kl_loss": 2484.021484375, + "loss_ib": 24.8891544342041, + "step": 183 + }, + { + "ce_ib": 47.77019500732422, + "ce_orig": 0.8514222502708435, + "epoch": 0.052915378531885826, + "kl_loss": 2497.58349609375, + "loss_ib": 25.023605346679688, + "step": 184 + }, + { + "ce_ib": 46.87969970703125, + "ce_orig": 1.339136004447937, + "epoch": 0.052915378531885826, + "kl_loss": 2096.02587890625, + "loss_ib": 21.007137298583984, + "step": 184 + }, + { + "ce_ib": 48.20975875854492, + "ce_orig": 1.381858229637146, + "epoch": 0.052915378531885826, + "kl_loss": 2225.2705078125, + "loss_ib": 22.300914764404297, + "step": 184 + }, + { + "ce_ib": 48.024993896484375, + "ce_orig": 0.6926367282867432, + "epoch": 0.052915378531885826, + "kl_loss": 2289.90087890625, + "loss_ib": 22.947032928466797, + "step": 184 + }, + { + "epoch": 0.05320296211086347, + "grad_norm": 394.3677062988281, + "learning_rate": 5.7006369426751594e-06, + "loss": 25.7313, + "step": 185 + }, + { + "ce_ib": 46.50983428955078, + "ce_orig": 1.1019322872161865, + "epoch": 0.05320296211086347, + "kl_loss": 2536.72998046875, + "loss_ib": 25.413808822631836, + "step": 185 + }, + { + "ce_ib": 44.686187744140625, + "ce_orig": 0.7587331533432007, + "epoch": 0.05320296211086347, + "kl_loss": 2358.890380859375, + "loss_ib": 23.633588790893555, + "step": 185 + }, + { + "ce_ib": 44.013580322265625, + "ce_orig": 0.7084860801696777, + "epoch": 0.05320296211086347, + "kl_loss": 2679.59033203125, + "loss_ib": 26.839916229248047, + "step": 185 + }, + { + "ce_ib": 43.398311614990234, + "ce_orig": 0.9784666299819946, + "epoch": 0.05320296211086347, + "kl_loss": 2370.73291015625, + "loss_ib": 23.7507266998291, + "step": 185 + }, + { + "ce_ib": 44.2143669128418, + "ce_orig": 0.786540687084198, + "epoch": 0.05349054568984111, + "kl_loss": 2228.3095703125, + "loss_ib": 22.327308654785156, + "step": 186 + }, + { + "ce_ib": 46.3162727355957, + "ce_orig": 1.3913073539733887, + "epoch": 0.05349054568984111, + "kl_loss": 2373.01953125, + "loss_ib": 23.77651023864746, + "step": 186 + }, + { + "ce_ib": 46.72264862060547, + "ce_orig": 1.0480103492736816, + "epoch": 0.05349054568984111, + "kl_loss": 2314.23193359375, + "loss_ib": 23.189043045043945, + "step": 186 + }, + { + "ce_ib": 48.647151947021484, + "ce_orig": 1.5039794445037842, + "epoch": 0.05349054568984111, + "kl_loss": 1132.71826171875, + "loss_ib": 11.375829696655273, + "step": 186 + }, + { + "ce_ib": 43.383331298828125, + "ce_orig": 0.5294433832168579, + "epoch": 0.05377812926881875, + "kl_loss": 1875.989990234375, + "loss_ib": 18.80328369140625, + "step": 187 + }, + { + "ce_ib": 46.24761962890625, + "ce_orig": 0.8533762693405151, + "epoch": 0.05377812926881875, + "kl_loss": 2202.668701171875, + "loss_ib": 22.072933197021484, + "step": 187 + }, + { + "ce_ib": 43.863502502441406, + "ce_orig": 1.4904100894927979, + "epoch": 0.05377812926881875, + "kl_loss": 2434.986083984375, + "loss_ib": 24.393722534179688, + "step": 187 + }, + { + "ce_ib": 44.60874557495117, + "ce_orig": 0.33239492774009705, + "epoch": 0.05377812926881875, + "kl_loss": 2029.0833740234375, + "loss_ib": 20.33544158935547, + "step": 187 + }, + { + "ce_ib": 43.16314697265625, + "ce_orig": 0.7957232594490051, + "epoch": 0.05406571284779639, + "kl_loss": 2502.121826171875, + "loss_ib": 25.064382553100586, + "step": 188 + }, + { + "ce_ib": 42.3214225769043, + "ce_orig": 0.8192757964134216, + "epoch": 0.05406571284779639, + "kl_loss": 2299.024169921875, + "loss_ib": 23.032562255859375, + "step": 188 + }, + { + "ce_ib": 45.48292541503906, + "ce_orig": 0.9447529911994934, + "epoch": 0.05406571284779639, + "kl_loss": 2239.673828125, + "loss_ib": 22.44222068786621, + "step": 188 + }, + { + "ce_ib": 43.36006164550781, + "ce_orig": 0.7006217837333679, + "epoch": 0.05406571284779639, + "kl_loss": 2508.6435546875, + "loss_ib": 25.12979507446289, + "step": 188 + }, + { + "ce_ib": 42.9240837097168, + "ce_orig": 0.9430150985717773, + "epoch": 0.05435329642677403, + "kl_loss": 2421.86962890625, + "loss_ib": 24.261621475219727, + "step": 189 + }, + { + "ce_ib": 44.628814697265625, + "ce_orig": 1.0881403684616089, + "epoch": 0.05435329642677403, + "kl_loss": 2387.1103515625, + "loss_ib": 23.91573143005371, + "step": 189 + }, + { + "ce_ib": 43.131500244140625, + "ce_orig": 0.6736454963684082, + "epoch": 0.05435329642677403, + "kl_loss": 2487.064453125, + "loss_ib": 24.913776397705078, + "step": 189 + }, + { + "ce_ib": 44.661094665527344, + "ce_orig": 0.9864615797996521, + "epoch": 0.05435329642677403, + "kl_loss": 2235.8447265625, + "loss_ib": 22.403106689453125, + "step": 189 + }, + { + "epoch": 0.05464088000575167, + "grad_norm": 399.3453369140625, + "learning_rate": 5.859872611464969e-06, + "loss": 24.4398, + "step": 190 + }, + { + "ce_ib": 45.041744232177734, + "ce_orig": 0.8773884177207947, + "epoch": 0.05464088000575167, + "kl_loss": 2267.437744140625, + "loss_ib": 22.719417572021484, + "step": 190 + }, + { + "ce_ib": 46.31273651123047, + "ce_orig": 1.0835819244384766, + "epoch": 0.05464088000575167, + "kl_loss": 2374.10107421875, + "loss_ib": 23.787322998046875, + "step": 190 + }, + { + "ce_ib": 42.20440673828125, + "ce_orig": 0.9557557106018066, + "epoch": 0.05464088000575167, + "kl_loss": 2271.1640625, + "loss_ib": 22.75384521484375, + "step": 190 + }, + { + "ce_ib": 45.23324203491211, + "ce_orig": 0.8508480787277222, + "epoch": 0.05464088000575167, + "kl_loss": 2288.09375, + "loss_ib": 22.926172256469727, + "step": 190 + }, + { + "ce_ib": 44.05533218383789, + "ce_orig": 1.154534101486206, + "epoch": 0.05492846358472931, + "kl_loss": 2600.14404296875, + "loss_ib": 26.045494079589844, + "step": 191 + }, + { + "ce_ib": 42.00983810424805, + "ce_orig": 0.7044571042060852, + "epoch": 0.05492846358472931, + "kl_loss": 2365.684326171875, + "loss_ib": 23.6988525390625, + "step": 191 + }, + { + "ce_ib": 45.84080123901367, + "ce_orig": 0.8432292938232422, + "epoch": 0.05492846358472931, + "kl_loss": 2435.255859375, + "loss_ib": 24.39839744567871, + "step": 191 + }, + { + "ce_ib": 41.58427047729492, + "ce_orig": 0.5829588770866394, + "epoch": 0.05492846358472931, + "kl_loss": 2457.0322265625, + "loss_ib": 24.611906051635742, + "step": 191 + }, + { + "ce_ib": 42.24211120605469, + "ce_orig": 0.9801141023635864, + "epoch": 0.055216047163706955, + "kl_loss": 2417.925537109375, + "loss_ib": 24.22149658203125, + "step": 192 + }, + { + "ce_ib": 45.56145095825195, + "ce_orig": 1.132083535194397, + "epoch": 0.055216047163706955, + "kl_loss": 2193.166259765625, + "loss_ib": 21.977224349975586, + "step": 192 + }, + { + "ce_ib": 46.364322662353516, + "ce_orig": 1.4373035430908203, + "epoch": 0.055216047163706955, + "kl_loss": 2148.3759765625, + "loss_ib": 21.53012466430664, + "step": 192 + }, + { + "ce_ib": 43.91224670410156, + "ce_orig": 1.0104138851165771, + "epoch": 0.055216047163706955, + "kl_loss": 2297.23193359375, + "loss_ib": 23.0162296295166, + "step": 192 + }, + { + "ce_ib": 43.70963668823242, + "ce_orig": 1.4233311414718628, + "epoch": 0.05550363074268459, + "kl_loss": 2309.242919921875, + "loss_ib": 23.136137008666992, + "step": 193 + }, + { + "ce_ib": 42.54071807861328, + "ce_orig": 1.2717257738113403, + "epoch": 0.05550363074268459, + "kl_loss": 2467.593994140625, + "loss_ib": 24.718481063842773, + "step": 193 + }, + { + "ce_ib": 44.76433181762695, + "ce_orig": 0.4072558581829071, + "epoch": 0.05550363074268459, + "kl_loss": 2135.32177734375, + "loss_ib": 21.397979736328125, + "step": 193 + }, + { + "ce_ib": 43.77593231201172, + "ce_orig": 0.9473220705986023, + "epoch": 0.05550363074268459, + "kl_loss": 2351.19873046875, + "loss_ib": 23.555763244628906, + "step": 193 + }, + { + "ce_ib": 41.941593170166016, + "ce_orig": 0.8737780451774597, + "epoch": 0.05579121432166223, + "kl_loss": 2363.43896484375, + "loss_ib": 23.67633056640625, + "step": 194 + }, + { + "ce_ib": 45.53238296508789, + "ce_orig": 1.0324821472167969, + "epoch": 0.05579121432166223, + "kl_loss": 1954.951416015625, + "loss_ib": 19.59504508972168, + "step": 194 + }, + { + "ce_ib": 43.686954498291016, + "ce_orig": 1.3375800848007202, + "epoch": 0.05579121432166223, + "kl_loss": 2365.90576171875, + "loss_ib": 23.702743530273438, + "step": 194 + }, + { + "ce_ib": 43.68901443481445, + "ce_orig": 0.5833651423454285, + "epoch": 0.05579121432166223, + "kl_loss": 2179.84130859375, + "loss_ib": 21.84210205078125, + "step": 194 + }, + { + "epoch": 0.056078797900639875, + "grad_norm": 384.2991943359375, + "learning_rate": 6.019108280254777e-06, + "loss": 23.4525, + "step": 195 + }, + { + "ce_ib": 43.28430938720703, + "ce_orig": 0.8238533139228821, + "epoch": 0.056078797900639875, + "kl_loss": 2549.11474609375, + "loss_ib": 25.53443145751953, + "step": 195 + }, + { + "ce_ib": 43.542911529541016, + "ce_orig": 0.9835025072097778, + "epoch": 0.056078797900639875, + "kl_loss": 2104.084228515625, + "loss_ib": 21.08438491821289, + "step": 195 + }, + { + "ce_ib": 44.020999908447266, + "ce_orig": 0.9901052713394165, + "epoch": 0.056078797900639875, + "kl_loss": 1952.4921875, + "loss_ib": 19.56894302368164, + "step": 195 + }, + { + "ce_ib": 45.76945877075195, + "ce_orig": 0.8322806358337402, + "epoch": 0.056078797900639875, + "kl_loss": 2153.4716796875, + "loss_ib": 21.580486297607422, + "step": 195 + }, + { + "ce_ib": 43.12766647338867, + "ce_orig": 0.9949517846107483, + "epoch": 0.05636638147961751, + "kl_loss": 2381.666259765625, + "loss_ib": 23.85978889465332, + "step": 196 + }, + { + "ce_ib": 42.28154754638672, + "ce_orig": 0.9624870419502258, + "epoch": 0.05636638147961751, + "kl_loss": 2345.0966796875, + "loss_ib": 23.493249893188477, + "step": 196 + }, + { + "ce_ib": 44.944583892822266, + "ce_orig": 0.47114697098731995, + "epoch": 0.05636638147961751, + "kl_loss": 1733.882080078125, + "loss_ib": 17.383766174316406, + "step": 196 + }, + { + "ce_ib": 42.78217697143555, + "ce_orig": 0.7587113976478577, + "epoch": 0.05636638147961751, + "kl_loss": 2418.23876953125, + "loss_ib": 24.225170135498047, + "step": 196 + }, + { + "ce_ib": 40.34938049316406, + "ce_orig": 0.5144612789154053, + "epoch": 0.05665396505859516, + "kl_loss": 2105.162109375, + "loss_ib": 21.091970443725586, + "step": 197 + }, + { + "ce_ib": 43.35491943359375, + "ce_orig": 0.6633918285369873, + "epoch": 0.05665396505859516, + "kl_loss": 2375.418212890625, + "loss_ib": 23.797536849975586, + "step": 197 + }, + { + "ce_ib": 45.42133331298828, + "ce_orig": 1.425979495048523, + "epoch": 0.05665396505859516, + "kl_loss": 1852.203857421875, + "loss_ib": 18.567459106445312, + "step": 197 + }, + { + "ce_ib": 42.6270751953125, + "ce_orig": 1.091071367263794, + "epoch": 0.05665396505859516, + "kl_loss": 2149.162353515625, + "loss_ib": 21.534250259399414, + "step": 197 + }, + { + "ce_ib": 48.074180603027344, + "ce_orig": 1.6172330379486084, + "epoch": 0.056941548637572795, + "kl_loss": 2109.87890625, + "loss_ib": 21.146862030029297, + "step": 198 + }, + { + "ce_ib": 42.51495361328125, + "ce_orig": 1.009562611579895, + "epoch": 0.056941548637572795, + "kl_loss": 2003.40283203125, + "loss_ib": 20.076541900634766, + "step": 198 + }, + { + "ce_ib": 44.50498580932617, + "ce_orig": 1.2147884368896484, + "epoch": 0.056941548637572795, + "kl_loss": 2296.0703125, + "loss_ib": 23.005207061767578, + "step": 198 + }, + { + "ce_ib": 45.61008834838867, + "ce_orig": 1.532022476196289, + "epoch": 0.056941548637572795, + "kl_loss": 2186.6220703125, + "loss_ib": 21.91183090209961, + "step": 198 + }, + { + "ce_ib": 42.80625534057617, + "ce_orig": 1.1610299348831177, + "epoch": 0.05722913221655043, + "kl_loss": 2305.758056640625, + "loss_ib": 23.100385665893555, + "step": 199 + }, + { + "ce_ib": 44.845333099365234, + "ce_orig": 1.0554615259170532, + "epoch": 0.05722913221655043, + "kl_loss": 2195.99072265625, + "loss_ib": 22.00475311279297, + "step": 199 + }, + { + "ce_ib": 41.97274398803711, + "ce_orig": 0.9705357551574707, + "epoch": 0.05722913221655043, + "kl_loss": 2215.55859375, + "loss_ib": 22.19755744934082, + "step": 199 + }, + { + "ce_ib": 41.66038131713867, + "ce_orig": 0.8861182928085327, + "epoch": 0.05722913221655043, + "kl_loss": 1905.2489013671875, + "loss_ib": 19.094148635864258, + "step": 199 + }, + { + "epoch": 0.05751671579552808, + "grad_norm": 379.6163024902344, + "learning_rate": 6.178343949044586e-06, + "loss": 23.0704, + "step": 200 + }, + { + "ce_ib": 41.82258987426758, + "ce_orig": 1.130007266998291, + "epoch": 0.05751671579552808, + "kl_loss": 2195.69970703125, + "loss_ib": 21.998821258544922, + "step": 200 + }, + { + "ce_ib": 46.66122817993164, + "ce_orig": 1.4467494487762451, + "epoch": 0.05751671579552808, + "kl_loss": 2178.219970703125, + "loss_ib": 21.828859329223633, + "step": 200 + }, + { + "ce_ib": 39.04912567138672, + "ce_orig": 1.1100558042526245, + "epoch": 0.05751671579552808, + "kl_loss": 2271.130859375, + "loss_ib": 22.75035858154297, + "step": 200 + }, + { + "ce_ib": 40.80558395385742, + "ce_orig": 0.830470860004425, + "epoch": 0.05751671579552808, + "kl_loss": 2165.312255859375, + "loss_ib": 21.693927764892578, + "step": 200 + }, + { + "ce_ib": 40.64300537109375, + "ce_orig": 1.0102934837341309, + "epoch": 0.057804299374505715, + "kl_loss": 2148.4072265625, + "loss_ib": 21.52471351623535, + "step": 201 + }, + { + "ce_ib": 43.90663528442383, + "ce_orig": 1.1066926717758179, + "epoch": 0.057804299374505715, + "kl_loss": 2117.95751953125, + "loss_ib": 21.223480224609375, + "step": 201 + }, + { + "ce_ib": 45.89930725097656, + "ce_orig": 1.7181086540222168, + "epoch": 0.057804299374505715, + "kl_loss": 1860.36865234375, + "loss_ib": 18.649585723876953, + "step": 201 + }, + { + "ce_ib": 39.91169357299805, + "ce_orig": 0.3805517554283142, + "epoch": 0.057804299374505715, + "kl_loss": 1964.464599609375, + "loss_ib": 19.68455696105957, + "step": 201 + }, + { + "ce_ib": 39.736671447753906, + "ce_orig": 0.5894677639007568, + "epoch": 0.05809188295348336, + "kl_loss": 2246.01611328125, + "loss_ib": 22.499897003173828, + "step": 202 + }, + { + "ce_ib": 40.524208068847656, + "ce_orig": 0.9540011882781982, + "epoch": 0.05809188295348336, + "kl_loss": 1354.6728515625, + "loss_ib": 13.587251663208008, + "step": 202 + }, + { + "ce_ib": 43.64582061767578, + "ce_orig": 1.034263253211975, + "epoch": 0.05809188295348336, + "kl_loss": 2062.5126953125, + "loss_ib": 20.668771743774414, + "step": 202 + }, + { + "ce_ib": 41.73001480102539, + "ce_orig": 0.8725116848945618, + "epoch": 0.05809188295348336, + "kl_loss": 2051.177734375, + "loss_ib": 20.553508758544922, + "step": 202 + }, + { + "ce_ib": 44.100730895996094, + "ce_orig": 1.4435735940933228, + "epoch": 0.058379466532461, + "kl_loss": 1786.4874267578125, + "loss_ib": 17.908973693847656, + "step": 203 + }, + { + "ce_ib": 39.613006591796875, + "ce_orig": 0.7148452401161194, + "epoch": 0.058379466532461, + "kl_loss": 2156.13232421875, + "loss_ib": 21.600934982299805, + "step": 203 + }, + { + "ce_ib": 40.6436653137207, + "ce_orig": 1.2090833187103271, + "epoch": 0.058379466532461, + "kl_loss": 2073.139404296875, + "loss_ib": 20.772037506103516, + "step": 203 + }, + { + "ce_ib": 38.75384521484375, + "ce_orig": 0.2740119993686676, + "epoch": 0.058379466532461, + "kl_loss": 1232.078125, + "loss_ib": 12.35953426361084, + "step": 203 + }, + { + "ce_ib": 41.146873474121094, + "ce_orig": 0.719344973564148, + "epoch": 0.058667050111438634, + "kl_loss": 1714.5777587890625, + "loss_ib": 17.18692398071289, + "step": 204 + }, + { + "ce_ib": 44.12678527832031, + "ce_orig": 1.2889289855957031, + "epoch": 0.058667050111438634, + "kl_loss": 1714.3292236328125, + "loss_ib": 17.187419891357422, + "step": 204 + }, + { + "ce_ib": 44.051700592041016, + "ce_orig": 1.077775478363037, + "epoch": 0.058667050111438634, + "kl_loss": 2116.22900390625, + "loss_ib": 21.206342697143555, + "step": 204 + }, + { + "ce_ib": 38.1263427734375, + "ce_orig": 0.6518339514732361, + "epoch": 0.058667050111438634, + "kl_loss": 2124.76513671875, + "loss_ib": 21.285778045654297, + "step": 204 + }, + { + "epoch": 0.05895463369041628, + "grad_norm": 334.65350341796875, + "learning_rate": 6.337579617834395e-06, + "loss": 21.643, + "step": 205 + }, + { + "ce_ib": 40.87744140625, + "ce_orig": 1.0821335315704346, + "epoch": 0.05895463369041628, + "kl_loss": 2118.4775390625, + "loss_ib": 21.225650787353516, + "step": 205 + }, + { + "ce_ib": 39.00108337402344, + "ce_orig": 0.5186193585395813, + "epoch": 0.05895463369041628, + "kl_loss": 2132.87939453125, + "loss_ib": 21.367794036865234, + "step": 205 + }, + { + "ce_ib": 42.660888671875, + "ce_orig": 1.201238989830017, + "epoch": 0.05895463369041628, + "kl_loss": 2008.679443359375, + "loss_ib": 20.12945556640625, + "step": 205 + }, + { + "ce_ib": 42.61842346191406, + "ce_orig": 0.9650039076805115, + "epoch": 0.05895463369041628, + "kl_loss": 2077.22314453125, + "loss_ib": 20.814849853515625, + "step": 205 + }, + { + "ce_ib": 38.07194137573242, + "ce_orig": 0.9797282814979553, + "epoch": 0.05924221726939392, + "kl_loss": 2239.80859375, + "loss_ib": 22.4361572265625, + "step": 206 + }, + { + "ce_ib": 42.637840270996094, + "ce_orig": 1.2511439323425293, + "epoch": 0.05924221726939392, + "kl_loss": 2223.17578125, + "loss_ib": 22.27439308166504, + "step": 206 + }, + { + "ce_ib": 40.19730758666992, + "ce_orig": 1.2803971767425537, + "epoch": 0.05924221726939392, + "kl_loss": 1948.2891845703125, + "loss_ib": 19.523088455200195, + "step": 206 + }, + { + "ce_ib": 40.72916030883789, + "ce_orig": 0.9539033770561218, + "epoch": 0.05924221726939392, + "kl_loss": 1906.385498046875, + "loss_ib": 19.104583740234375, + "step": 206 + }, + { + "ce_ib": 41.54541778564453, + "ce_orig": 0.9414616823196411, + "epoch": 0.05952980084837156, + "kl_loss": 1876.511474609375, + "loss_ib": 18.806659698486328, + "step": 207 + }, + { + "ce_ib": 41.48387908935547, + "ce_orig": 0.8939663171768188, + "epoch": 0.05952980084837156, + "kl_loss": 1365.2000732421875, + "loss_ib": 13.693485260009766, + "step": 207 + }, + { + "ce_ib": 39.200660705566406, + "ce_orig": 0.8465067148208618, + "epoch": 0.05952980084837156, + "kl_loss": 1455.4010009765625, + "loss_ib": 14.593210220336914, + "step": 207 + }, + { + "ce_ib": 44.42674255371094, + "ce_orig": 1.9264086484909058, + "epoch": 0.05952980084837156, + "kl_loss": 2006.107421875, + "loss_ib": 20.105499267578125, + "step": 207 + }, + { + "ce_ib": 41.2900505065918, + "ce_orig": 1.6340312957763672, + "epoch": 0.0598173844273492, + "kl_loss": 1908.3016357421875, + "loss_ib": 19.124305725097656, + "step": 208 + }, + { + "ce_ib": 41.597694396972656, + "ce_orig": 0.9764799475669861, + "epoch": 0.0598173844273492, + "kl_loss": 1880.7303466796875, + "loss_ib": 18.848901748657227, + "step": 208 + }, + { + "ce_ib": 42.240516662597656, + "ce_orig": 0.4290001690387726, + "epoch": 0.0598173844273492, + "kl_loss": 1004.71728515625, + "loss_ib": 10.0894136428833, + "step": 208 + }, + { + "ce_ib": 42.21794128417969, + "ce_orig": 1.0113506317138672, + "epoch": 0.0598173844273492, + "kl_loss": 1826.12890625, + "loss_ib": 18.30350685119629, + "step": 208 + }, + { + "ce_ib": 41.9946403503418, + "ce_orig": 1.175087332725525, + "epoch": 0.06010496800632684, + "kl_loss": 1176.065185546875, + "loss_ib": 11.80264663696289, + "step": 209 + }, + { + "ce_ib": 41.64639663696289, + "ce_orig": 1.211875081062317, + "epoch": 0.06010496800632684, + "kl_loss": 2071.13623046875, + "loss_ib": 20.753007888793945, + "step": 209 + }, + { + "ce_ib": 40.602378845214844, + "ce_orig": 1.2118444442749023, + "epoch": 0.06010496800632684, + "kl_loss": 2032.2001953125, + "loss_ib": 20.36260414123535, + "step": 209 + }, + { + "ce_ib": 39.86399459838867, + "ce_orig": 1.1752040386199951, + "epoch": 0.06010496800632684, + "kl_loss": 2107.810546875, + "loss_ib": 21.11796760559082, + "step": 209 + }, + { + "epoch": 0.06039255158530448, + "grad_norm": 353.71893310546875, + "learning_rate": 6.496815286624204e-06, + "loss": 20.5732, + "step": 210 + }, + { + "ce_ib": 42.62069320678711, + "ce_orig": 1.424117922782898, + "epoch": 0.06039255158530448, + "kl_loss": 1516.96630859375, + "loss_ib": 15.21228313446045, + "step": 210 + }, + { + "ce_ib": 39.955963134765625, + "ce_orig": 0.44305068254470825, + "epoch": 0.06039255158530448, + "kl_loss": 1933.53759765625, + "loss_ib": 19.37533187866211, + "step": 210 + }, + { + "ce_ib": 40.190860748291016, + "ce_orig": 0.597926914691925, + "epoch": 0.06039255158530448, + "kl_loss": 2069.74560546875, + "loss_ib": 20.737646102905273, + "step": 210 + }, + { + "ce_ib": 39.46810531616211, + "ce_orig": 0.713378369808197, + "epoch": 0.06039255158530448, + "kl_loss": 1792.88720703125, + "loss_ib": 17.968339920043945, + "step": 210 + }, + { + "ce_ib": 42.34745788574219, + "ce_orig": 1.0557239055633545, + "epoch": 0.06068013516428212, + "kl_loss": 1849.0069580078125, + "loss_ib": 18.53241729736328, + "step": 211 + }, + { + "ce_ib": 38.35053634643555, + "ce_orig": 0.603425145149231, + "epoch": 0.06068013516428212, + "kl_loss": 1920.453857421875, + "loss_ib": 19.242889404296875, + "step": 211 + }, + { + "ce_ib": 37.083927154541016, + "ce_orig": 1.2688902616500854, + "epoch": 0.06068013516428212, + "kl_loss": 2041.569580078125, + "loss_ib": 20.45277976989746, + "step": 211 + }, + { + "ce_ib": 38.7510871887207, + "ce_orig": 0.6306071877479553, + "epoch": 0.06068013516428212, + "kl_loss": 1976.189453125, + "loss_ib": 19.80064582824707, + "step": 211 + }, + { + "ce_ib": 36.3626594543457, + "ce_orig": 0.4798745810985565, + "epoch": 0.060967718743259756, + "kl_loss": 1731.49951171875, + "loss_ib": 17.351356506347656, + "step": 212 + }, + { + "ce_ib": 39.894752502441406, + "ce_orig": 0.991927444934845, + "epoch": 0.060967718743259756, + "kl_loss": 2037.4395751953125, + "loss_ib": 20.414289474487305, + "step": 212 + }, + { + "ce_ib": 41.493896484375, + "ce_orig": 1.472151756286621, + "epoch": 0.060967718743259756, + "kl_loss": 1865.9774169921875, + "loss_ib": 18.70126724243164, + "step": 212 + }, + { + "ce_ib": 42.53767013549805, + "ce_orig": 1.1901134252548218, + "epoch": 0.060967718743259756, + "kl_loss": 1645.129638671875, + "loss_ib": 16.493833541870117, + "step": 212 + }, + { + "ce_ib": 41.67341232299805, + "ce_orig": 2.049192428588867, + "epoch": 0.0612553023222374, + "kl_loss": 1779.3302001953125, + "loss_ib": 17.83497428894043, + "step": 213 + }, + { + "ce_ib": 42.051273345947266, + "ce_orig": 1.3447684049606323, + "epoch": 0.0612553023222374, + "kl_loss": 1777.956787109375, + "loss_ib": 17.821619033813477, + "step": 213 + }, + { + "ce_ib": 37.9370002746582, + "ce_orig": 0.4780416190624237, + "epoch": 0.0612553023222374, + "kl_loss": 1839.32177734375, + "loss_ib": 18.431154251098633, + "step": 213 + }, + { + "ce_ib": 39.32098388671875, + "ce_orig": 1.0718315839767456, + "epoch": 0.0612553023222374, + "kl_loss": 2034.295166015625, + "loss_ib": 20.382272720336914, + "step": 213 + }, + { + "ce_ib": 36.60554885864258, + "ce_orig": 0.5966328978538513, + "epoch": 0.06154288590121504, + "kl_loss": 1988.932861328125, + "loss_ib": 19.925933837890625, + "step": 214 + }, + { + "ce_ib": 41.9276123046875, + "ce_orig": 0.6651936173439026, + "epoch": 0.06154288590121504, + "kl_loss": 1789.4920654296875, + "loss_ib": 17.936847686767578, + "step": 214 + }, + { + "ce_ib": 40.556461334228516, + "ce_orig": 0.4927489459514618, + "epoch": 0.06154288590121504, + "kl_loss": 1789.6015625, + "loss_ib": 17.93657112121582, + "step": 214 + }, + { + "ce_ib": 39.251651763916016, + "ce_orig": 1.0295277833938599, + "epoch": 0.06154288590121504, + "kl_loss": 1894.323486328125, + "loss_ib": 18.982486724853516, + "step": 214 + }, + { + "epoch": 0.06183046948019268, + "grad_norm": 317.97894287109375, + "learning_rate": 6.6560509554140125e-06, + "loss": 19.6963, + "step": 215 + }, + { + "ce_ib": 38.56227111816406, + "ce_orig": 0.6546093821525574, + "epoch": 0.06183046948019268, + "kl_loss": 1737.800048828125, + "loss_ib": 17.416563034057617, + "step": 215 + }, + { + "ce_ib": 35.2888069152832, + "ce_orig": 0.7404365539550781, + "epoch": 0.06183046948019268, + "kl_loss": 1965.703857421875, + "loss_ib": 19.69232749938965, + "step": 215 + }, + { + "ce_ib": 40.4928092956543, + "ce_orig": 1.043062686920166, + "epoch": 0.06183046948019268, + "kl_loss": 1873.6024169921875, + "loss_ib": 18.77651596069336, + "step": 215 + }, + { + "ce_ib": 39.602508544921875, + "ce_orig": 1.0876483917236328, + "epoch": 0.06183046948019268, + "kl_loss": 1824.4144287109375, + "loss_ib": 18.28374671936035, + "step": 215 + }, + { + "ce_ib": 35.879852294921875, + "ce_orig": 0.8248341679573059, + "epoch": 0.06211805305917032, + "kl_loss": 1824.806640625, + "loss_ib": 18.283946990966797, + "step": 216 + }, + { + "ce_ib": 35.09049606323242, + "ce_orig": 0.5326448082923889, + "epoch": 0.06211805305917032, + "kl_loss": 1892.9739990234375, + "loss_ib": 18.96483039855957, + "step": 216 + }, + { + "ce_ib": 40.311012268066406, + "ce_orig": 1.6227895021438599, + "epoch": 0.06211805305917032, + "kl_loss": 1743.340087890625, + "loss_ib": 17.473711013793945, + "step": 216 + }, + { + "ce_ib": 36.35209274291992, + "ce_orig": 0.7696553468704224, + "epoch": 0.06211805305917032, + "kl_loss": 1989.461181640625, + "loss_ib": 19.93096351623535, + "step": 216 + }, + { + "ce_ib": 36.76679611206055, + "ce_orig": 0.7665999531745911, + "epoch": 0.06240563663814796, + "kl_loss": 1747.884033203125, + "loss_ib": 17.515605926513672, + "step": 217 + }, + { + "ce_ib": 38.807064056396484, + "ce_orig": 0.9963610172271729, + "epoch": 0.06240563663814796, + "kl_loss": 1780.5374755859375, + "loss_ib": 17.844181060791016, + "step": 217 + }, + { + "ce_ib": 39.64936828613281, + "ce_orig": 0.7059118151664734, + "epoch": 0.06240563663814796, + "kl_loss": 1668.8529052734375, + "loss_ib": 16.728178024291992, + "step": 217 + }, + { + "ce_ib": 37.85905838012695, + "ce_orig": 1.064191460609436, + "epoch": 0.06240563663814796, + "kl_loss": 1903.99462890625, + "loss_ib": 19.077804565429688, + "step": 217 + }, + { + "ce_ib": 36.819175720214844, + "ce_orig": 0.5811072587966919, + "epoch": 0.0626932202171256, + "kl_loss": 1824.97802734375, + "loss_ib": 18.286598205566406, + "step": 218 + }, + { + "ce_ib": 37.54027557373047, + "ce_orig": 0.9560117125511169, + "epoch": 0.0626932202171256, + "kl_loss": 1839.77392578125, + "loss_ib": 18.435279846191406, + "step": 218 + }, + { + "ce_ib": 39.93457794189453, + "ce_orig": 1.0178155899047852, + "epoch": 0.0626932202171256, + "kl_loss": 1711.14697265625, + "loss_ib": 17.151403427124023, + "step": 218 + }, + { + "ce_ib": 40.65945816040039, + "ce_orig": 0.742775022983551, + "epoch": 0.0626932202171256, + "kl_loss": 1604.2640380859375, + "loss_ib": 16.08329963684082, + "step": 218 + }, + { + "ce_ib": 41.853858947753906, + "ce_orig": 1.375777244567871, + "epoch": 0.06298080379610324, + "kl_loss": 1801.32958984375, + "loss_ib": 18.05514907836914, + "step": 219 + }, + { + "ce_ib": 36.28791046142578, + "ce_orig": 0.9161471724510193, + "epoch": 0.06298080379610324, + "kl_loss": 1644.6923828125, + "loss_ib": 16.483211517333984, + "step": 219 + }, + { + "ce_ib": 37.08815383911133, + "ce_orig": 0.728233814239502, + "epoch": 0.06298080379610324, + "kl_loss": 1778.4482421875, + "loss_ib": 17.821571350097656, + "step": 219 + }, + { + "ce_ib": 37.880104064941406, + "ce_orig": 1.3340355157852173, + "epoch": 0.06298080379610324, + "kl_loss": 1593.228759765625, + "loss_ib": 15.97016716003418, + "step": 219 + }, + { + "epoch": 0.06326838737508088, + "grad_norm": 318.8504943847656, + "learning_rate": 6.815286624203822e-06, + "loss": 18.6164, + "step": 220 + }, + { + "ce_ib": 39.489933013916016, + "ce_orig": 1.1903218030929565, + "epoch": 0.06326838737508088, + "kl_loss": 1687.38232421875, + "loss_ib": 16.913312911987305, + "step": 220 + }, + { + "ce_ib": 37.481624603271484, + "ce_orig": 0.7840384244918823, + "epoch": 0.06326838737508088, + "kl_loss": 1726.1568603515625, + "loss_ib": 17.299049377441406, + "step": 220 + }, + { + "ce_ib": 35.45810317993164, + "ce_orig": 0.7813513278961182, + "epoch": 0.06326838737508088, + "kl_loss": 1733.7164306640625, + "loss_ib": 17.372621536254883, + "step": 220 + }, + { + "ce_ib": 40.4595832824707, + "ce_orig": 1.3600130081176758, + "epoch": 0.06326838737508088, + "kl_loss": 1568.1533203125, + "loss_ib": 15.721991539001465, + "step": 220 + }, + { + "ce_ib": 35.940345764160156, + "ce_orig": 1.0896117687225342, + "epoch": 0.06355597095405853, + "kl_loss": 1728.0986328125, + "loss_ib": 17.316925048828125, + "step": 221 + }, + { + "ce_ib": 35.44572830200195, + "ce_orig": 0.2852933406829834, + "epoch": 0.06355597095405853, + "kl_loss": 963.3050537109375, + "loss_ib": 9.668496131896973, + "step": 221 + }, + { + "ce_ib": 33.80705261230469, + "ce_orig": 0.791994571685791, + "epoch": 0.06355597095405853, + "kl_loss": 1822.135009765625, + "loss_ib": 18.255157470703125, + "step": 221 + }, + { + "ce_ib": 36.91697692871094, + "ce_orig": 0.6960796117782593, + "epoch": 0.06355597095405853, + "kl_loss": 1718.594970703125, + "loss_ib": 17.22286605834961, + "step": 221 + }, + { + "ce_ib": 39.449485778808594, + "ce_orig": 0.9346453547477722, + "epoch": 0.06384355453303617, + "kl_loss": 1414.82958984375, + "loss_ib": 14.187745094299316, + "step": 222 + }, + { + "ce_ib": 38.30500793457031, + "ce_orig": 0.8375841975212097, + "epoch": 0.06384355453303617, + "kl_loss": 1562.5009765625, + "loss_ib": 15.663313865661621, + "step": 222 + }, + { + "ce_ib": 41.28805923461914, + "ce_orig": 0.7960017919540405, + "epoch": 0.06384355453303617, + "kl_loss": 1512.22802734375, + "loss_ib": 15.163567543029785, + "step": 222 + }, + { + "ce_ib": 32.99767303466797, + "ce_orig": 0.6580086946487427, + "epoch": 0.06384355453303617, + "kl_loss": 1726.3076171875, + "loss_ib": 17.29607391357422, + "step": 222 + }, + { + "ce_ib": 33.17844009399414, + "ce_orig": 0.7080318927764893, + "epoch": 0.0641311381120138, + "kl_loss": 1755.3681640625, + "loss_ib": 17.58686065673828, + "step": 223 + }, + { + "ce_ib": 37.70219421386719, + "ce_orig": 0.6540882587432861, + "epoch": 0.0641311381120138, + "kl_loss": 1503.2210693359375, + "loss_ib": 15.069912910461426, + "step": 223 + }, + { + "ce_ib": 40.99760818481445, + "ce_orig": 0.7546887993812561, + "epoch": 0.0641311381120138, + "kl_loss": 1461.0548095703125, + "loss_ib": 14.651545524597168, + "step": 223 + }, + { + "ce_ib": 36.96660614013672, + "ce_orig": 0.6669592261314392, + "epoch": 0.0641311381120138, + "kl_loss": 1589.726318359375, + "loss_ib": 15.934229850769043, + "step": 223 + }, + { + "ce_ib": 41.123348236083984, + "ce_orig": 1.2109692096710205, + "epoch": 0.06441872169099144, + "kl_loss": 1519.9962158203125, + "loss_ib": 15.241085052490234, + "step": 224 + }, + { + "ce_ib": 36.78544998168945, + "ce_orig": 0.9228610992431641, + "epoch": 0.06441872169099144, + "kl_loss": 1535.970703125, + "loss_ib": 15.396492958068848, + "step": 224 + }, + { + "ce_ib": 36.03901672363281, + "ce_orig": 1.0696367025375366, + "epoch": 0.06441872169099144, + "kl_loss": 1711.971923828125, + "loss_ib": 17.155757904052734, + "step": 224 + }, + { + "ce_ib": 38.314327239990234, + "ce_orig": 1.826366662979126, + "epoch": 0.06441872169099144, + "kl_loss": 1629.136962890625, + "loss_ib": 16.329683303833008, + "step": 224 + }, + { + "epoch": 0.06470630526996908, + "grad_norm": 308.50250244140625, + "learning_rate": 6.9745222929936305e-06, + "loss": 16.8856, + "step": 225 + }, + { + "ce_ib": 36.43165969848633, + "ce_orig": 0.878455638885498, + "epoch": 0.06470630526996908, + "kl_loss": 1289.9693603515625, + "loss_ib": 12.936124801635742, + "step": 225 + }, + { + "ce_ib": 40.33540344238281, + "ce_orig": 1.3886396884918213, + "epoch": 0.06470630526996908, + "kl_loss": 1656.87060546875, + "loss_ib": 16.609041213989258, + "step": 225 + }, + { + "ce_ib": 36.43739700317383, + "ce_orig": 0.4045904874801636, + "epoch": 0.06470630526996908, + "kl_loss": 1671.9920654296875, + "loss_ib": 16.756359100341797, + "step": 225 + }, + { + "ce_ib": 36.36072540283203, + "ce_orig": 1.0076552629470825, + "epoch": 0.06470630526996908, + "kl_loss": 1584.193115234375, + "loss_ib": 15.878292083740234, + "step": 225 + }, + { + "ce_ib": 38.3975830078125, + "ce_orig": 1.267250895500183, + "epoch": 0.06499388884894673, + "kl_loss": 1569.5213623046875, + "loss_ib": 15.733610153198242, + "step": 226 + }, + { + "ce_ib": 38.38508224487305, + "ce_orig": 1.265257477760315, + "epoch": 0.06499388884894673, + "kl_loss": 1453.87646484375, + "loss_ib": 14.577149391174316, + "step": 226 + }, + { + "ce_ib": 39.651161193847656, + "ce_orig": 0.6877183318138123, + "epoch": 0.06499388884894673, + "kl_loss": 1393.7093505859375, + "loss_ib": 13.976743698120117, + "step": 226 + }, + { + "ce_ib": 36.53251647949219, + "ce_orig": 1.5096549987792969, + "epoch": 0.06499388884894673, + "kl_loss": 1307.943115234375, + "loss_ib": 13.115962982177734, + "step": 226 + }, + { + "ce_ib": 37.703006744384766, + "ce_orig": 0.9586830139160156, + "epoch": 0.06528147242792437, + "kl_loss": 1452.981201171875, + "loss_ib": 14.567514419555664, + "step": 227 + }, + { + "ce_ib": 34.66203308105469, + "ce_orig": 0.658699095249176, + "epoch": 0.06528147242792437, + "kl_loss": 1551.1995849609375, + "loss_ib": 15.54665756225586, + "step": 227 + }, + { + "ce_ib": 35.329044342041016, + "ce_orig": 0.6904061436653137, + "epoch": 0.06528147242792437, + "kl_loss": 1509.483154296875, + "loss_ib": 15.130160331726074, + "step": 227 + }, + { + "ce_ib": 35.24424743652344, + "ce_orig": 0.5379785895347595, + "epoch": 0.06528147242792437, + "kl_loss": 1509.2982177734375, + "loss_ib": 15.128226280212402, + "step": 227 + }, + { + "ce_ib": 35.123985290527344, + "ce_orig": 0.7466920614242554, + "epoch": 0.06556905600690201, + "kl_loss": 1561.8984375, + "loss_ib": 15.654109001159668, + "step": 228 + }, + { + "ce_ib": 34.78830337524414, + "ce_orig": 0.7827273607254028, + "epoch": 0.06556905600690201, + "kl_loss": 1582.02099609375, + "loss_ib": 15.854998588562012, + "step": 228 + }, + { + "ce_ib": 31.581981658935547, + "ce_orig": 0.2600187063217163, + "epoch": 0.06556905600690201, + "kl_loss": 1321.60205078125, + "loss_ib": 13.247602462768555, + "step": 228 + }, + { + "ce_ib": 34.20478820800781, + "ce_orig": 1.0527675151824951, + "epoch": 0.06556905600690201, + "kl_loss": 1532.9888916015625, + "loss_ib": 15.364093780517578, + "step": 228 + }, + { + "ce_ib": 39.04445266723633, + "ce_orig": 1.652494192123413, + "epoch": 0.06585663958587964, + "kl_loss": 1284.142578125, + "loss_ib": 12.88046932220459, + "step": 229 + }, + { + "ce_ib": 37.834381103515625, + "ce_orig": 1.3008118867874146, + "epoch": 0.06585663958587964, + "kl_loss": 1602.8289794921875, + "loss_ib": 16.066123962402344, + "step": 229 + }, + { + "ce_ib": 34.8093147277832, + "ce_orig": 0.9119290113449097, + "epoch": 0.06585663958587964, + "kl_loss": 1195.5177001953125, + "loss_ib": 11.989986419677734, + "step": 229 + }, + { + "ce_ib": 37.39421081542969, + "ce_orig": 1.299423336982727, + "epoch": 0.06585663958587964, + "kl_loss": 1378.4208984375, + "loss_ib": 13.821602821350098, + "step": 229 + }, + { + "epoch": 0.06614422316485728, + "grad_norm": 279.78515625, + "learning_rate": 7.13375796178344e-06, + "loss": 15.8318, + "step": 230 + }, + { + "ce_ib": 39.00707244873047, + "ce_orig": 1.9128481149673462, + "epoch": 0.06614422316485728, + "kl_loss": 1414.39697265625, + "loss_ib": 14.182976722717285, + "step": 230 + }, + { + "ce_ib": 36.59072494506836, + "ce_orig": 1.3344916105270386, + "epoch": 0.06614422316485728, + "kl_loss": 1368.765869140625, + "loss_ib": 13.724248886108398, + "step": 230 + }, + { + "ce_ib": 36.91270065307617, + "ce_orig": 1.1911953687667847, + "epoch": 0.06614422316485728, + "kl_loss": 1506.248291015625, + "loss_ib": 15.099395751953125, + "step": 230 + }, + { + "ce_ib": 35.45751953125, + "ce_orig": 0.9839146733283997, + "epoch": 0.06614422316485728, + "kl_loss": 1557.294189453125, + "loss_ib": 15.608399391174316, + "step": 230 + }, + { + "ce_ib": 40.05620193481445, + "ce_orig": 0.5404795408248901, + "epoch": 0.06643180674383492, + "kl_loss": 1331.465576171875, + "loss_ib": 13.354711532592773, + "step": 231 + }, + { + "ce_ib": 35.91750717163086, + "ce_orig": 1.0286931991577148, + "epoch": 0.06643180674383492, + "kl_loss": 1538.906982421875, + "loss_ib": 15.42498779296875, + "step": 231 + }, + { + "ce_ib": 37.91292953491211, + "ce_orig": 0.6502935886383057, + "epoch": 0.06643180674383492, + "kl_loss": 1359.6444091796875, + "loss_ib": 13.634356498718262, + "step": 231 + }, + { + "ce_ib": 36.25615310668945, + "ce_orig": 1.0073096752166748, + "epoch": 0.06643180674383492, + "kl_loss": 1373.6658935546875, + "loss_ib": 13.77291488647461, + "step": 231 + }, + { + "ce_ib": 33.69683837890625, + "ce_orig": 0.46744322776794434, + "epoch": 0.06671939032281257, + "kl_loss": 1326.230712890625, + "loss_ib": 13.296003341674805, + "step": 232 + }, + { + "ce_ib": 33.371883392333984, + "ce_orig": 1.2536524534225464, + "epoch": 0.06671939032281257, + "kl_loss": 1449.79150390625, + "loss_ib": 14.531286239624023, + "step": 232 + }, + { + "ce_ib": 35.073936462402344, + "ce_orig": 1.0432262420654297, + "epoch": 0.06671939032281257, + "kl_loss": 1357.572021484375, + "loss_ib": 13.610794067382812, + "step": 232 + }, + { + "ce_ib": 35.290687561035156, + "ce_orig": 0.8194282650947571, + "epoch": 0.06671939032281257, + "kl_loss": 1452.7779541015625, + "loss_ib": 14.563069343566895, + "step": 232 + }, + { + "ce_ib": 39.44172668457031, + "ce_orig": 1.5786598920822144, + "epoch": 0.06700697390179021, + "kl_loss": 1254.966796875, + "loss_ib": 12.589109420776367, + "step": 233 + }, + { + "ce_ib": 34.740867614746094, + "ce_orig": 0.563507616519928, + "epoch": 0.06700697390179021, + "kl_loss": 1330.1361083984375, + "loss_ib": 13.336101531982422, + "step": 233 + }, + { + "ce_ib": 36.04484176635742, + "ce_orig": 1.1875897645950317, + "epoch": 0.06700697390179021, + "kl_loss": 1417.7935791015625, + "loss_ib": 14.213980674743652, + "step": 233 + }, + { + "ce_ib": 31.511131286621094, + "ce_orig": 0.6714182496070862, + "epoch": 0.06700697390179021, + "kl_loss": 1382.14794921875, + "loss_ib": 13.85299015045166, + "step": 233 + }, + { + "ce_ib": 33.84688186645508, + "ce_orig": 1.096521258354187, + "epoch": 0.06729455748076785, + "kl_loss": 1357.806884765625, + "loss_ib": 13.61191463470459, + "step": 234 + }, + { + "ce_ib": 34.99058532714844, + "ce_orig": 1.0461159944534302, + "epoch": 0.06729455748076785, + "kl_loss": 1225.052734375, + "loss_ib": 12.285517692565918, + "step": 234 + }, + { + "ce_ib": 34.9071044921875, + "ce_orig": 0.9976585507392883, + "epoch": 0.06729455748076785, + "kl_loss": 1408.259521484375, + "loss_ib": 14.117502212524414, + "step": 234 + }, + { + "ce_ib": 37.175872802734375, + "ce_orig": 1.5398781299591064, + "epoch": 0.06729455748076785, + "kl_loss": 1296.612548828125, + "loss_ib": 13.003300666809082, + "step": 234 + }, + { + "epoch": 0.06758214105974548, + "grad_norm": 268.0568542480469, + "learning_rate": 7.2929936305732485e-06, + "loss": 14.6834, + "step": 235 + }, + { + "ce_ib": 35.610557556152344, + "ce_orig": 0.7642791867256165, + "epoch": 0.06758214105974548, + "kl_loss": 1381.1488037109375, + "loss_ib": 13.847098350524902, + "step": 235 + }, + { + "ce_ib": 38.893550872802734, + "ce_orig": 1.8394078016281128, + "epoch": 0.06758214105974548, + "kl_loss": 1152.2271728515625, + "loss_ib": 11.561165809631348, + "step": 235 + }, + { + "ce_ib": 32.011322021484375, + "ce_orig": 0.9249970316886902, + "epoch": 0.06758214105974548, + "kl_loss": 1395.470458984375, + "loss_ib": 13.986716270446777, + "step": 235 + }, + { + "ce_ib": 35.36570739746094, + "ce_orig": 1.026782751083374, + "epoch": 0.06758214105974548, + "kl_loss": 1339.9072265625, + "loss_ib": 13.43443775177002, + "step": 235 + }, + { + "ce_ib": 33.16312789916992, + "ce_orig": 1.0328998565673828, + "epoch": 0.06786972463872312, + "kl_loss": 1300.6361083984375, + "loss_ib": 13.03952407836914, + "step": 236 + }, + { + "ce_ib": 35.08463668823242, + "ce_orig": 1.3721755743026733, + "epoch": 0.06786972463872312, + "kl_loss": 1194.1552734375, + "loss_ib": 11.97663688659668, + "step": 236 + }, + { + "ce_ib": 31.49561882019043, + "ce_orig": 0.3084181249141693, + "epoch": 0.06786972463872312, + "kl_loss": 934.6522216796875, + "loss_ib": 9.37801742553711, + "step": 236 + }, + { + "ce_ib": 39.625789642333984, + "ce_orig": 1.2989716529846191, + "epoch": 0.06786972463872312, + "kl_loss": 1081.615234375, + "loss_ib": 10.855777740478516, + "step": 236 + }, + { + "ce_ib": 33.67836380004883, + "ce_orig": 1.3483405113220215, + "epoch": 0.06815730821770077, + "kl_loss": 1198.904541015625, + "loss_ib": 12.022723197937012, + "step": 237 + }, + { + "ce_ib": 35.59366989135742, + "ce_orig": 0.9075685143470764, + "epoch": 0.06815730821770077, + "kl_loss": 1204.507568359375, + "loss_ib": 12.080668449401855, + "step": 237 + }, + { + "ce_ib": 36.645938873291016, + "ce_orig": 0.6160690188407898, + "epoch": 0.06815730821770077, + "kl_loss": 1352.1148681640625, + "loss_ib": 13.557793617248535, + "step": 237 + }, + { + "ce_ib": 34.851688385009766, + "ce_orig": 0.7488659024238586, + "epoch": 0.06815730821770077, + "kl_loss": 1261.7066650390625, + "loss_ib": 12.651918411254883, + "step": 237 + }, + { + "ce_ib": 31.677663803100586, + "ce_orig": 0.6202912330627441, + "epoch": 0.06844489179667841, + "kl_loss": 1198.86669921875, + "loss_ib": 12.020343780517578, + "step": 238 + }, + { + "ce_ib": 33.36151885986328, + "ce_orig": 0.7369568347930908, + "epoch": 0.06844489179667841, + "kl_loss": 1171.602294921875, + "loss_ib": 11.749384880065918, + "step": 238 + }, + { + "ce_ib": 37.02521896362305, + "ce_orig": 0.6275981664657593, + "epoch": 0.06844489179667841, + "kl_loss": 1234.6282958984375, + "loss_ib": 12.383307456970215, + "step": 238 + }, + { + "ce_ib": 33.56972885131836, + "ce_orig": 0.8399911522865295, + "epoch": 0.06844489179667841, + "kl_loss": 1177.68603515625, + "loss_ib": 11.810429573059082, + "step": 238 + }, + { + "ce_ib": 36.48527526855469, + "ce_orig": 1.2248564958572388, + "epoch": 0.06873247537565605, + "kl_loss": 1168.8984375, + "loss_ib": 11.725469589233398, + "step": 239 + }, + { + "ce_ib": 32.57621765136719, + "ce_orig": 0.8083109259605408, + "epoch": 0.06873247537565605, + "kl_loss": 1238.533935546875, + "loss_ib": 12.417914390563965, + "step": 239 + }, + { + "ce_ib": 36.354488372802734, + "ce_orig": 1.729040503501892, + "epoch": 0.06873247537565605, + "kl_loss": 1160.8841552734375, + "loss_ib": 11.645195960998535, + "step": 239 + }, + { + "ce_ib": 33.25252151489258, + "ce_orig": 0.8631963729858398, + "epoch": 0.06873247537565605, + "kl_loss": 1161.73974609375, + "loss_ib": 11.650649070739746, + "step": 239 + }, + { + "epoch": 0.06902005895463369, + "grad_norm": 242.57241821289062, + "learning_rate": 7.452229299363057e-06, + "loss": 13.2382, + "step": 240 + }, + { + "ce_ib": 32.97649002075195, + "ce_orig": 0.5754613876342773, + "epoch": 0.06902005895463369, + "kl_loss": 1180.76708984375, + "loss_ib": 11.840646743774414, + "step": 240 + }, + { + "ce_ib": 35.5557861328125, + "ce_orig": 1.3153690099716187, + "epoch": 0.06902005895463369, + "kl_loss": 1166.3897705078125, + "loss_ib": 11.69945240020752, + "step": 240 + }, + { + "ce_ib": 30.03131675720215, + "ce_orig": 0.5445340275764465, + "epoch": 0.06902005895463369, + "kl_loss": 1206.5845947265625, + "loss_ib": 12.095877647399902, + "step": 240 + }, + { + "ce_ib": 34.68654251098633, + "ce_orig": 1.0324212312698364, + "epoch": 0.06902005895463369, + "kl_loss": 1035.31201171875, + "loss_ib": 10.387805938720703, + "step": 240 + }, + { + "ce_ib": 34.29194259643555, + "ce_orig": 0.9263237714767456, + "epoch": 0.06930764253361132, + "kl_loss": 1131.122314453125, + "loss_ib": 11.345515251159668, + "step": 241 + }, + { + "ce_ib": 35.83911895751953, + "ce_orig": 0.6829422116279602, + "epoch": 0.06930764253361132, + "kl_loss": 1171.150634765625, + "loss_ib": 11.747344970703125, + "step": 241 + }, + { + "ce_ib": 34.61550521850586, + "ce_orig": 0.7391694188117981, + "epoch": 0.06930764253361132, + "kl_loss": 1050.002685546875, + "loss_ib": 10.534642219543457, + "step": 241 + }, + { + "ce_ib": 31.543256759643555, + "ce_orig": 0.8060687780380249, + "epoch": 0.06930764253361132, + "kl_loss": 1197.1962890625, + "loss_ib": 12.00350570678711, + "step": 241 + }, + { + "ce_ib": 32.99800109863281, + "ce_orig": 0.6455181837081909, + "epoch": 0.06959522611258898, + "kl_loss": 1041.767333984375, + "loss_ib": 10.450671195983887, + "step": 242 + }, + { + "ce_ib": 32.91671371459961, + "ce_orig": 0.7244043350219727, + "epoch": 0.06959522611258898, + "kl_loss": 1103.9139404296875, + "loss_ib": 11.07205581665039, + "step": 242 + }, + { + "ce_ib": 35.45330047607422, + "ce_orig": 0.9272658228874207, + "epoch": 0.06959522611258898, + "kl_loss": 1027.905517578125, + "loss_ib": 10.314507484436035, + "step": 242 + }, + { + "ce_ib": 34.885498046875, + "ce_orig": 0.8863522410392761, + "epoch": 0.06959522611258898, + "kl_loss": 1155.1387939453125, + "loss_ib": 11.586273193359375, + "step": 242 + }, + { + "ce_ib": 34.44084930419922, + "ce_orig": 1.152998924255371, + "epoch": 0.06988280969156661, + "kl_loss": 1133.0599365234375, + "loss_ib": 11.365039825439453, + "step": 243 + }, + { + "ce_ib": 35.273677825927734, + "ce_orig": 1.2428306341171265, + "epoch": 0.06988280969156661, + "kl_loss": 1112.822021484375, + "loss_ib": 11.163493156433105, + "step": 243 + }, + { + "ce_ib": 32.52173614501953, + "ce_orig": 1.016830325126648, + "epoch": 0.06988280969156661, + "kl_loss": 1070.12255859375, + "loss_ib": 10.733747482299805, + "step": 243 + }, + { + "ce_ib": 34.803653717041016, + "ce_orig": 0.615959107875824, + "epoch": 0.06988280969156661, + "kl_loss": 1086.107421875, + "loss_ib": 10.895877838134766, + "step": 243 + }, + { + "ce_ib": 32.82182693481445, + "ce_orig": 0.8602744936943054, + "epoch": 0.07017039327054425, + "kl_loss": 1035.995849609375, + "loss_ib": 10.392780303955078, + "step": 244 + }, + { + "ce_ib": 31.894535064697266, + "ce_orig": 0.6907263398170471, + "epoch": 0.07017039327054425, + "kl_loss": 979.4188232421875, + "loss_ib": 9.826082229614258, + "step": 244 + }, + { + "ce_ib": 35.21843719482422, + "ce_orig": 1.3801195621490479, + "epoch": 0.07017039327054425, + "kl_loss": 1049.561279296875, + "loss_ib": 10.530831336975098, + "step": 244 + }, + { + "ce_ib": 34.30471420288086, + "ce_orig": 0.7986380457878113, + "epoch": 0.07017039327054425, + "kl_loss": 997.249755859375, + "loss_ib": 10.006802558898926, + "step": 244 + }, + { + "epoch": 0.07045797684952189, + "grad_norm": 220.81076049804688, + "learning_rate": 7.611464968152867e-06, + "loss": 11.7147, + "step": 245 + }, + { + "ce_ib": 33.57966613769531, + "ce_orig": 0.7934867143630981, + "epoch": 0.07045797684952189, + "kl_loss": 1044.489501953125, + "loss_ib": 10.478473663330078, + "step": 245 + }, + { + "ce_ib": 30.60529327392578, + "ce_orig": 0.5140112638473511, + "epoch": 0.07045797684952189, + "kl_loss": 979.260498046875, + "loss_ib": 9.823209762573242, + "step": 245 + }, + { + "ce_ib": 36.327213287353516, + "ce_orig": 1.3819940090179443, + "epoch": 0.07045797684952189, + "kl_loss": 994.7232666015625, + "loss_ib": 9.983559608459473, + "step": 245 + }, + { + "ce_ib": 34.17820739746094, + "ce_orig": 0.8814669847488403, + "epoch": 0.07045797684952189, + "kl_loss": 885.5364990234375, + "loss_ib": 8.889543533325195, + "step": 245 + }, + { + "ce_ib": 34.66371154785156, + "ce_orig": 1.3104512691497803, + "epoch": 0.07074556042849953, + "kl_loss": 962.946533203125, + "loss_ib": 9.664129257202148, + "step": 246 + }, + { + "ce_ib": 32.48523712158203, + "ce_orig": 0.19654367864131927, + "epoch": 0.07074556042849953, + "kl_loss": 613.4555053710938, + "loss_ib": 6.1670403480529785, + "step": 246 + }, + { + "ce_ib": 37.2482795715332, + "ce_orig": 1.528881549835205, + "epoch": 0.07074556042849953, + "kl_loss": 901.1966552734375, + "loss_ib": 9.049215316772461, + "step": 246 + }, + { + "ce_ib": 32.179996490478516, + "ce_orig": 0.784110963344574, + "epoch": 0.07074556042849953, + "kl_loss": 928.5962524414062, + "loss_ib": 9.318142890930176, + "step": 246 + }, + { + "ce_ib": 29.451961517333984, + "ce_orig": 0.9916685819625854, + "epoch": 0.07103314400747718, + "kl_loss": 983.1722412109375, + "loss_ib": 9.861173629760742, + "step": 247 + }, + { + "ce_ib": 37.86883544921875, + "ce_orig": 1.3681284189224243, + "epoch": 0.07103314400747718, + "kl_loss": 783.9342041015625, + "loss_ib": 7.87721061706543, + "step": 247 + }, + { + "ce_ib": 35.45719528198242, + "ce_orig": 1.3227193355560303, + "epoch": 0.07103314400747718, + "kl_loss": 848.042724609375, + "loss_ib": 8.515884399414062, + "step": 247 + }, + { + "ce_ib": 32.25755310058594, + "ce_orig": 0.7888100147247314, + "epoch": 0.07103314400747718, + "kl_loss": 908.6535034179688, + "loss_ib": 9.118792533874512, + "step": 247 + }, + { + "ce_ib": 34.29731369018555, + "ce_orig": 1.3129619359970093, + "epoch": 0.07132072758645482, + "kl_loss": 884.159912109375, + "loss_ib": 8.875896453857422, + "step": 248 + }, + { + "ce_ib": 34.386695861816406, + "ce_orig": 0.8120501041412354, + "epoch": 0.07132072758645482, + "kl_loss": 871.235107421875, + "loss_ib": 8.746737480163574, + "step": 248 + }, + { + "ce_ib": 30.895753860473633, + "ce_orig": 0.7307835817337036, + "epoch": 0.07132072758645482, + "kl_loss": 892.11767578125, + "loss_ib": 8.952072143554688, + "step": 248 + }, + { + "ce_ib": 33.708763122558594, + "ce_orig": 0.9609125852584839, + "epoch": 0.07132072758645482, + "kl_loss": 908.0333251953125, + "loss_ib": 9.114041328430176, + "step": 248 + }, + { + "ce_ib": 33.838768005371094, + "ce_orig": 1.0209710597991943, + "epoch": 0.07160831116543245, + "kl_loss": 893.3701171875, + "loss_ib": 8.96753978729248, + "step": 249 + }, + { + "ce_ib": 32.03993606567383, + "ce_orig": 1.0714709758758545, + "epoch": 0.07160831116543245, + "kl_loss": 845.7508544921875, + "loss_ib": 8.489547729492188, + "step": 249 + }, + { + "ce_ib": 29.649864196777344, + "ce_orig": 1.0681530237197876, + "epoch": 0.07160831116543245, + "kl_loss": 1036.672119140625, + "loss_ib": 10.396370887756348, + "step": 249 + }, + { + "ce_ib": 32.387245178222656, + "ce_orig": 1.1253947019577026, + "epoch": 0.07160831116543245, + "kl_loss": 863.2434692382812, + "loss_ib": 8.66482162475586, + "step": 249 + }, + { + "epoch": 0.07189589474441009, + "grad_norm": 205.96604919433594, + "learning_rate": 7.770700636942676e-06, + "loss": 10.0614, + "step": 250 + }, + { + "ce_ib": 34.68352127075195, + "ce_orig": 1.0909476280212402, + "epoch": 0.07189589474441009, + "kl_loss": 867.9714965820312, + "loss_ib": 8.714398384094238, + "step": 250 + }, + { + "ce_ib": 31.70074462890625, + "ce_orig": 0.9184356331825256, + "epoch": 0.07189589474441009, + "kl_loss": 894.34326171875, + "loss_ib": 8.975132942199707, + "step": 250 + }, + { + "ce_ib": 36.209068298339844, + "ce_orig": 1.1706846952438354, + "epoch": 0.07189589474441009, + "kl_loss": 795.684814453125, + "loss_ib": 7.9930572509765625, + "step": 250 + }, + { + "ce_ib": 35.70771789550781, + "ce_orig": 0.712752640247345, + "epoch": 0.07189589474441009, + "kl_loss": 757.6834106445312, + "loss_ib": 7.612542152404785, + "step": 250 + }, + { + "ce_ib": 30.09554100036621, + "ce_orig": 0.7422066330909729, + "epoch": 0.07218347832338773, + "kl_loss": 993.2034301757812, + "loss_ib": 9.962129592895508, + "step": 251 + }, + { + "ce_ib": 30.356294631958008, + "ce_orig": 0.8336584568023682, + "epoch": 0.07218347832338773, + "kl_loss": 753.03955078125, + "loss_ib": 7.560751914978027, + "step": 251 + }, + { + "ce_ib": 34.888736724853516, + "ce_orig": 0.7718689441680908, + "epoch": 0.07218347832338773, + "kl_loss": 869.8005981445312, + "loss_ib": 8.732894897460938, + "step": 251 + }, + { + "ce_ib": 34.146759033203125, + "ce_orig": 1.3637772798538208, + "epoch": 0.07218347832338773, + "kl_loss": 775.9141235351562, + "loss_ib": 7.793287754058838, + "step": 251 + }, + { + "ce_ib": 36.634437561035156, + "ce_orig": 1.066004991531372, + "epoch": 0.07247106190236538, + "kl_loss": 765.8868408203125, + "loss_ib": 7.695502758026123, + "step": 252 + }, + { + "ce_ib": 34.45478439331055, + "ce_orig": 1.2148858308792114, + "epoch": 0.07247106190236538, + "kl_loss": 777.1328735351562, + "loss_ib": 7.805783748626709, + "step": 252 + }, + { + "ce_ib": 32.530982971191406, + "ce_orig": 1.1445003747940063, + "epoch": 0.07247106190236538, + "kl_loss": 722.8099975585938, + "loss_ib": 7.260631084442139, + "step": 252 + }, + { + "ce_ib": 33.494747161865234, + "ce_orig": 1.2305960655212402, + "epoch": 0.07247106190236538, + "kl_loss": 792.6470947265625, + "loss_ib": 7.959965705871582, + "step": 252 + }, + { + "ce_ib": 31.90201187133789, + "ce_orig": 1.0651308298110962, + "epoch": 0.07275864548134302, + "kl_loss": 772.0308227539062, + "loss_ib": 7.7522101402282715, + "step": 253 + }, + { + "ce_ib": 36.638572692871094, + "ce_orig": 1.0030566453933716, + "epoch": 0.07275864548134302, + "kl_loss": 877.4444580078125, + "loss_ib": 8.81108283996582, + "step": 253 + }, + { + "ce_ib": 33.78273010253906, + "ce_orig": 1.0988649129867554, + "epoch": 0.07275864548134302, + "kl_loss": 711.0679931640625, + "loss_ib": 7.144462585449219, + "step": 253 + }, + { + "ce_ib": 33.45583724975586, + "ce_orig": 0.7203549146652222, + "epoch": 0.07275864548134302, + "kl_loss": 781.1646728515625, + "loss_ib": 7.845102310180664, + "step": 253 + }, + { + "ce_ib": 33.97596740722656, + "ce_orig": 1.545408010482788, + "epoch": 0.07304622906032066, + "kl_loss": 652.7290649414062, + "loss_ib": 6.5612664222717285, + "step": 254 + }, + { + "ce_ib": 34.555152893066406, + "ce_orig": 0.9807875752449036, + "epoch": 0.07304622906032066, + "kl_loss": 776.7444458007812, + "loss_ib": 7.801999568939209, + "step": 254 + }, + { + "ce_ib": 31.55140495300293, + "ce_orig": 0.7501665949821472, + "epoch": 0.07304622906032066, + "kl_loss": 769.890869140625, + "loss_ib": 7.730460166931152, + "step": 254 + }, + { + "ce_ib": 35.186798095703125, + "ce_orig": 0.7437403202056885, + "epoch": 0.07304622906032066, + "kl_loss": 796.2337646484375, + "loss_ib": 7.997524261474609, + "step": 254 + }, + { + "epoch": 0.0733338126392983, + "grad_norm": 180.0628204345703, + "learning_rate": 7.929936305732485e-06, + "loss": 8.6844, + "step": 255 + }, + { + "ce_ib": 34.1368408203125, + "ce_orig": 0.9626627564430237, + "epoch": 0.0733338126392983, + "kl_loss": 747.2984008789062, + "loss_ib": 7.507120609283447, + "step": 255 + }, + { + "ce_ib": 31.835412979125977, + "ce_orig": 0.8661757707595825, + "epoch": 0.0733338126392983, + "kl_loss": 723.6868896484375, + "loss_ib": 7.268703937530518, + "step": 255 + }, + { + "ce_ib": 40.257713317871094, + "ce_orig": 2.202180862426758, + "epoch": 0.0733338126392983, + "kl_loss": 661.350341796875, + "loss_ib": 6.65376091003418, + "step": 255 + }, + { + "ce_ib": 32.313350677490234, + "ce_orig": 0.7933053374290466, + "epoch": 0.0733338126392983, + "kl_loss": 702.9135131835938, + "loss_ib": 7.061448097229004, + "step": 255 + }, + { + "ce_ib": 32.77560806274414, + "ce_orig": 0.7840645909309387, + "epoch": 0.07362139621827593, + "kl_loss": 670.623291015625, + "loss_ib": 6.73900842666626, + "step": 256 + }, + { + "ce_ib": 34.22809982299805, + "ce_orig": 0.9132450819015503, + "epoch": 0.07362139621827593, + "kl_loss": 655.1952514648438, + "loss_ib": 6.586180210113525, + "step": 256 + }, + { + "ce_ib": 36.01190948486328, + "ce_orig": 1.1352635622024536, + "epoch": 0.07362139621827593, + "kl_loss": 622.5662841796875, + "loss_ib": 6.261674880981445, + "step": 256 + }, + { + "ce_ib": 31.15489959716797, + "ce_orig": 0.7360689043998718, + "epoch": 0.07362139621827593, + "kl_loss": 706.380126953125, + "loss_ib": 7.094955921173096, + "step": 256 + }, + { + "ce_ib": 37.11478042602539, + "ce_orig": 0.8814412355422974, + "epoch": 0.07390897979725358, + "kl_loss": 654.782958984375, + "loss_ib": 6.584944248199463, + "step": 257 + }, + { + "ce_ib": 36.152000427246094, + "ce_orig": 1.3912250995635986, + "epoch": 0.07390897979725358, + "kl_loss": 541.4805297851562, + "loss_ib": 5.45095682144165, + "step": 257 + }, + { + "ce_ib": 35.5803108215332, + "ce_orig": 0.9307012557983398, + "epoch": 0.07390897979725358, + "kl_loss": 653.8729248046875, + "loss_ib": 6.574309349060059, + "step": 257 + }, + { + "ce_ib": 32.77763366699219, + "ce_orig": 1.1101514101028442, + "epoch": 0.07390897979725358, + "kl_loss": 638.899658203125, + "loss_ib": 6.421773910522461, + "step": 257 + }, + { + "ce_ib": 30.221391677856445, + "ce_orig": 0.6179237365722656, + "epoch": 0.07419656337623122, + "kl_loss": 560.8075561523438, + "loss_ib": 5.638297080993652, + "step": 258 + }, + { + "ce_ib": 33.3847770690918, + "ce_orig": 0.734990656375885, + "epoch": 0.07419656337623122, + "kl_loss": 551.2991333007812, + "loss_ib": 5.5463762283325195, + "step": 258 + }, + { + "ce_ib": 37.109310150146484, + "ce_orig": 1.5755938291549683, + "epoch": 0.07419656337623122, + "kl_loss": 494.017578125, + "loss_ib": 4.977284908294678, + "step": 258 + }, + { + "ce_ib": 35.911502838134766, + "ce_orig": 1.535288691520691, + "epoch": 0.07419656337623122, + "kl_loss": 573.640869140625, + "loss_ib": 5.772319793701172, + "step": 258 + }, + { + "ce_ib": 37.536102294921875, + "ce_orig": 0.8849014043807983, + "epoch": 0.07448414695520886, + "kl_loss": 512.6968994140625, + "loss_ib": 5.1645050048828125, + "step": 259 + }, + { + "ce_ib": 33.12932586669922, + "ce_orig": 0.5551506876945496, + "epoch": 0.07448414695520886, + "kl_loss": 503.11016845703125, + "loss_ib": 5.064230918884277, + "step": 259 + }, + { + "ce_ib": 33.1467170715332, + "ce_orig": 1.3075788021087646, + "epoch": 0.07448414695520886, + "kl_loss": 618.4505615234375, + "loss_ib": 6.217652320861816, + "step": 259 + }, + { + "ce_ib": 34.920448303222656, + "ce_orig": 0.9793207049369812, + "epoch": 0.07448414695520886, + "kl_loss": 603.3938598632812, + "loss_ib": 6.068859100341797, + "step": 259 + }, + { + "epoch": 0.0747717305341865, + "grad_norm": 157.27696228027344, + "learning_rate": 8.089171974522295e-06, + "loss": 7.372, + "step": 260 + }, + { + "ce_ib": 35.68424987792969, + "ce_orig": 1.031170129776001, + "epoch": 0.0747717305341865, + "kl_loss": 524.8594970703125, + "loss_ib": 5.284278869628906, + "step": 260 + }, + { + "ce_ib": 35.50361633300781, + "ce_orig": 0.5432813167572021, + "epoch": 0.0747717305341865, + "kl_loss": 531.8944091796875, + "loss_ib": 5.354447364807129, + "step": 260 + }, + { + "ce_ib": 34.80183792114258, + "ce_orig": 0.8772653937339783, + "epoch": 0.0747717305341865, + "kl_loss": 527.3760986328125, + "loss_ib": 5.308562278747559, + "step": 260 + }, + { + "ce_ib": 34.62561798095703, + "ce_orig": 0.8580355048179626, + "epoch": 0.0747717305341865, + "kl_loss": 524.876708984375, + "loss_ib": 5.283392906188965, + "step": 260 + }, + { + "ce_ib": 33.79844665527344, + "ce_orig": 0.6697705984115601, + "epoch": 0.07505931411316413, + "kl_loss": 518.130126953125, + "loss_ib": 5.215099334716797, + "step": 261 + }, + { + "ce_ib": 35.4140739440918, + "ce_orig": 1.116640567779541, + "epoch": 0.07505931411316413, + "kl_loss": 495.97052001953125, + "loss_ib": 4.995119094848633, + "step": 261 + }, + { + "ce_ib": 40.22637176513672, + "ce_orig": 1.194669485092163, + "epoch": 0.07505931411316413, + "kl_loss": 483.3502197265625, + "loss_ib": 4.8737287521362305, + "step": 261 + }, + { + "ce_ib": 37.521358489990234, + "ce_orig": 1.1145161390304565, + "epoch": 0.07505931411316413, + "kl_loss": 467.95684814453125, + "loss_ib": 4.717089653015137, + "step": 261 + }, + { + "ce_ib": 37.80555725097656, + "ce_orig": 0.8788209557533264, + "epoch": 0.07534689769214178, + "kl_loss": 423.5547180175781, + "loss_ib": 4.27335262298584, + "step": 262 + }, + { + "ce_ib": 36.85504150390625, + "ce_orig": 0.5465120077133179, + "epoch": 0.07534689769214178, + "kl_loss": 476.8722229003906, + "loss_ib": 4.805577278137207, + "step": 262 + }, + { + "ce_ib": 37.499755859375, + "ce_orig": 1.1639437675476074, + "epoch": 0.07534689769214178, + "kl_loss": 467.90582275390625, + "loss_ib": 4.71655797958374, + "step": 262 + }, + { + "ce_ib": 36.78924560546875, + "ce_orig": 1.2826528549194336, + "epoch": 0.07534689769214178, + "kl_loss": 477.6324768066406, + "loss_ib": 4.813113689422607, + "step": 262 + }, + { + "ce_ib": 38.421451568603516, + "ce_orig": 0.8689420819282532, + "epoch": 0.07563448127111942, + "kl_loss": 477.5055847167969, + "loss_ib": 4.813477039337158, + "step": 263 + }, + { + "ce_ib": 35.91413879394531, + "ce_orig": 0.8632240891456604, + "epoch": 0.07563448127111942, + "kl_loss": 485.8994445800781, + "loss_ib": 4.894908428192139, + "step": 263 + }, + { + "ce_ib": 39.28192901611328, + "ce_orig": 0.877941370010376, + "epoch": 0.07563448127111942, + "kl_loss": 447.78070068359375, + "loss_ib": 4.517088890075684, + "step": 263 + }, + { + "ce_ib": 40.37826156616211, + "ce_orig": 0.8957875370979309, + "epoch": 0.07563448127111942, + "kl_loss": 403.5020446777344, + "loss_ib": 4.0753984451293945, + "step": 263 + }, + { + "ce_ib": 42.27157974243164, + "ce_orig": 1.3913816213607788, + "epoch": 0.07592206485009706, + "kl_loss": 423.022216796875, + "loss_ib": 4.272493839263916, + "step": 264 + }, + { + "ce_ib": 36.27720260620117, + "ce_orig": 1.0942326784133911, + "epoch": 0.07592206485009706, + "kl_loss": 400.34161376953125, + "loss_ib": 4.039693355560303, + "step": 264 + }, + { + "ce_ib": 33.6429328918457, + "ce_orig": 0.8256592154502869, + "epoch": 0.07592206485009706, + "kl_loss": 424.09423828125, + "loss_ib": 4.274585247039795, + "step": 264 + }, + { + "ce_ib": 38.3378791809082, + "ce_orig": 1.0243308544158936, + "epoch": 0.07592206485009706, + "kl_loss": 472.14508056640625, + "loss_ib": 4.759788513183594, + "step": 264 + }, + { + "epoch": 0.0762096484290747, + "grad_norm": 123.24594116210938, + "learning_rate": 8.248407643312102e-06, + "loss": 5.7351, + "step": 265 + }, + { + "ce_ib": 38.452537536621094, + "ce_orig": 1.0122849941253662, + "epoch": 0.0762096484290747, + "kl_loss": 435.57763671875, + "loss_ib": 4.394228935241699, + "step": 265 + }, + { + "ce_ib": 39.90632629394531, + "ce_orig": 1.178553819656372, + "epoch": 0.0762096484290747, + "kl_loss": 329.4811096191406, + "loss_ib": 3.334717273712158, + "step": 265 + }, + { + "ce_ib": 34.292686462402344, + "ce_orig": 1.4103134870529175, + "epoch": 0.0762096484290747, + "kl_loss": 385.14447021484375, + "loss_ib": 3.885737180709839, + "step": 265 + }, + { + "ce_ib": 42.36109161376953, + "ce_orig": 1.1843057870864868, + "epoch": 0.0762096484290747, + "kl_loss": 398.69232177734375, + "loss_ib": 4.029284477233887, + "step": 265 + }, + { + "ce_ib": 38.15491485595703, + "ce_orig": 1.1772407293319702, + "epoch": 0.07649723200805233, + "kl_loss": 360.84259033203125, + "loss_ib": 3.646580696105957, + "step": 266 + }, + { + "ce_ib": 43.11347961425781, + "ce_orig": 1.3642669916152954, + "epoch": 0.07649723200805233, + "kl_loss": 326.4147033691406, + "loss_ib": 3.307260274887085, + "step": 266 + }, + { + "ce_ib": 46.28087615966797, + "ce_orig": 1.7264760732650757, + "epoch": 0.07649723200805233, + "kl_loss": 390.9441223144531, + "loss_ib": 3.955721855163574, + "step": 266 + }, + { + "ce_ib": 44.236488342285156, + "ce_orig": 2.0390701293945312, + "epoch": 0.07649723200805233, + "kl_loss": 340.2770690917969, + "loss_ib": 3.447007179260254, + "step": 266 + }, + { + "ce_ib": 48.65879821777344, + "ce_orig": 1.3260499238967896, + "epoch": 0.07678481558702999, + "kl_loss": 225.09542846679688, + "loss_ib": 2.2996129989624023, + "step": 267 + }, + { + "ce_ib": 44.989524841308594, + "ce_orig": 0.7958585619926453, + "epoch": 0.07678481558702999, + "kl_loss": 330.1822509765625, + "loss_ib": 3.3468120098114014, + "step": 267 + }, + { + "ce_ib": 44.695777893066406, + "ce_orig": 1.6154531240463257, + "epoch": 0.07678481558702999, + "kl_loss": 283.551513671875, + "loss_ib": 2.8802108764648438, + "step": 267 + }, + { + "ce_ib": 50.11431884765625, + "ce_orig": 2.1371569633483887, + "epoch": 0.07678481558702999, + "kl_loss": 288.13592529296875, + "loss_ib": 2.931473731994629, + "step": 267 + }, + { + "ce_ib": 49.849369049072266, + "ce_orig": 1.6994304656982422, + "epoch": 0.07707239916600762, + "kl_loss": 307.9149169921875, + "loss_ib": 3.128998279571533, + "step": 268 + }, + { + "ce_ib": 46.516693115234375, + "ce_orig": 2.531648635864258, + "epoch": 0.07707239916600762, + "kl_loss": 257.6400451660156, + "loss_ib": 2.6229171752929688, + "step": 268 + }, + { + "ce_ib": 49.18770980834961, + "ce_orig": 0.8902948498725891, + "epoch": 0.07707239916600762, + "kl_loss": 287.6979064941406, + "loss_ib": 2.926166534423828, + "step": 268 + }, + { + "ce_ib": 43.51984786987305, + "ce_orig": 0.8550523519515991, + "epoch": 0.07707239916600762, + "kl_loss": 319.45574951171875, + "loss_ib": 3.238077402114868, + "step": 268 + }, + { + "ce_ib": 57.45269012451172, + "ce_orig": 1.6990851163864136, + "epoch": 0.07735998274498526, + "kl_loss": 247.26651000976562, + "loss_ib": 2.5301177501678467, + "step": 269 + }, + { + "ce_ib": 53.520240783691406, + "ce_orig": 1.453221082687378, + "epoch": 0.07735998274498526, + "kl_loss": 240.28468322753906, + "loss_ib": 2.456367015838623, + "step": 269 + }, + { + "ce_ib": 45.94785690307617, + "ce_orig": 1.0849944353103638, + "epoch": 0.07735998274498526, + "kl_loss": 244.33505249023438, + "loss_ib": 2.4892983436584473, + "step": 269 + }, + { + "ce_ib": 43.29793930053711, + "ce_orig": 1.103232741355896, + "epoch": 0.07735998274498526, + "kl_loss": 253.640625, + "loss_ib": 2.5797042846679688, + "step": 269 + }, + { + "epoch": 0.0776475663239629, + "grad_norm": 89.99483489990234, + "learning_rate": 8.407643312101912e-06, + "loss": 4.4374, + "step": 270 + }, + { + "ce_ib": 39.90031814575195, + "ce_orig": 0.8725808262825012, + "epoch": 0.0776475663239629, + "kl_loss": 259.3697509765625, + "loss_ib": 2.6335976123809814, + "step": 270 + }, + { + "ce_ib": 42.03105926513672, + "ce_orig": 0.8716458678245544, + "epoch": 0.0776475663239629, + "kl_loss": 230.16326904296875, + "loss_ib": 2.3436636924743652, + "step": 270 + }, + { + "ce_ib": 42.68729782104492, + "ce_orig": 0.665064811706543, + "epoch": 0.0776475663239629, + "kl_loss": 249.3673095703125, + "loss_ib": 2.536360263824463, + "step": 270 + }, + { + "ce_ib": 39.38070297241211, + "ce_orig": 1.2106064558029175, + "epoch": 0.0776475663239629, + "kl_loss": 242.24522399902344, + "loss_ib": 2.4618327617645264, + "step": 270 + }, + { + "ce_ib": 45.427120208740234, + "ce_orig": 0.9336494207382202, + "epoch": 0.07793514990294054, + "kl_loss": 236.5987548828125, + "loss_ib": 2.411414623260498, + "step": 271 + }, + { + "ce_ib": 40.51905059814453, + "ce_orig": 0.82356858253479, + "epoch": 0.07793514990294054, + "kl_loss": 320.950927734375, + "loss_ib": 3.250028371810913, + "step": 271 + }, + { + "ce_ib": 45.87284469604492, + "ce_orig": 2.147392988204956, + "epoch": 0.07793514990294054, + "kl_loss": 183.42807006835938, + "loss_ib": 1.8801534175872803, + "step": 271 + }, + { + "ce_ib": 39.21931838989258, + "ce_orig": 0.6727441549301147, + "epoch": 0.07793514990294054, + "kl_loss": 259.3567199707031, + "loss_ib": 2.632786512374878, + "step": 271 + }, + { + "ce_ib": 47.522220611572266, + "ce_orig": 2.0349419116973877, + "epoch": 0.07822273348191819, + "kl_loss": 171.4339141845703, + "loss_ib": 1.7618613243103027, + "step": 272 + }, + { + "ce_ib": 52.649227142333984, + "ce_orig": 1.1070398092269897, + "epoch": 0.07822273348191819, + "kl_loss": 232.63453674316406, + "loss_ib": 2.3789944648742676, + "step": 272 + }, + { + "ce_ib": 46.19776916503906, + "ce_orig": 1.476123332977295, + "epoch": 0.07822273348191819, + "kl_loss": 210.17898559570312, + "loss_ib": 2.1479876041412354, + "step": 272 + }, + { + "ce_ib": 48.105499267578125, + "ce_orig": 1.1524395942687988, + "epoch": 0.07822273348191819, + "kl_loss": 196.00885009765625, + "loss_ib": 2.0081939697265625, + "step": 272 + }, + { + "ce_ib": 46.68330383300781, + "ce_orig": 0.884026050567627, + "epoch": 0.07851031706089583, + "kl_loss": 164.7951202392578, + "loss_ib": 1.6946345567703247, + "step": 273 + }, + { + "ce_ib": 46.4140739440918, + "ce_orig": 1.2737939357757568, + "epoch": 0.07851031706089583, + "kl_loss": 168.782470703125, + "loss_ib": 1.7342387437820435, + "step": 273 + }, + { + "ce_ib": 44.1322135925293, + "ce_orig": 0.8222432136535645, + "epoch": 0.07851031706089583, + "kl_loss": 183.51260375976562, + "loss_ib": 1.8792582750320435, + "step": 273 + }, + { + "ce_ib": 47.30166244506836, + "ce_orig": 1.5957039594650269, + "epoch": 0.07851031706089583, + "kl_loss": 154.28269958496094, + "loss_ib": 1.5901285409927368, + "step": 273 + }, + { + "ce_ib": 49.634849548339844, + "ce_orig": 1.259581208229065, + "epoch": 0.07879790063987346, + "kl_loss": 156.60406494140625, + "loss_ib": 1.615675449371338, + "step": 274 + }, + { + "ce_ib": 45.53166198730469, + "ce_orig": 1.591690182685852, + "epoch": 0.07879790063987346, + "kl_loss": 165.76138305664062, + "loss_ib": 1.7031453847885132, + "step": 274 + }, + { + "ce_ib": 48.556114196777344, + "ce_orig": 1.4561307430267334, + "epoch": 0.07879790063987346, + "kl_loss": 145.72293090820312, + "loss_ib": 1.505785346031189, + "step": 274 + }, + { + "ce_ib": 51.11494064331055, + "ce_orig": 1.5235646963119507, + "epoch": 0.07879790063987346, + "kl_loss": 142.93948364257812, + "loss_ib": 1.4805097579956055, + "step": 274 + }, + { + "epoch": 0.0790854842188511, + "grad_norm": 66.16566467285156, + "learning_rate": 8.566878980891721e-06, + "loss": 3.4286, + "step": 275 + }, + { + "ce_ib": 39.09020233154297, + "ce_orig": 0.8260847330093384, + "epoch": 0.0790854842188511, + "kl_loss": 139.89077758789062, + "loss_ib": 1.4379980564117432, + "step": 275 + }, + { + "ce_ib": 48.023948669433594, + "ce_orig": 1.396461009979248, + "epoch": 0.0790854842188511, + "kl_loss": 146.98944091796875, + "loss_ib": 1.517918348312378, + "step": 275 + }, + { + "ce_ib": 42.226966857910156, + "ce_orig": 1.0770304203033447, + "epoch": 0.0790854842188511, + "kl_loss": 156.64419555664062, + "loss_ib": 1.6086689233779907, + "step": 275 + }, + { + "ce_ib": 36.58985900878906, + "ce_orig": 1.2845979928970337, + "epoch": 0.0790854842188511, + "kl_loss": 211.11338806152344, + "loss_ib": 2.14772367477417, + "step": 275 + }, + { + "ce_ib": 44.49997329711914, + "ce_orig": 1.6609095335006714, + "epoch": 0.07937306779782874, + "kl_loss": 128.6660614013672, + "loss_ib": 1.331160545349121, + "step": 276 + }, + { + "ce_ib": 40.899295806884766, + "ce_orig": 1.6309945583343506, + "epoch": 0.07937306779782874, + "kl_loss": 139.94818115234375, + "loss_ib": 1.4403811693191528, + "step": 276 + }, + { + "ce_ib": 40.4159049987793, + "ce_orig": 0.8224959373474121, + "epoch": 0.07937306779782874, + "kl_loss": 169.31053161621094, + "loss_ib": 1.7335212230682373, + "step": 276 + }, + { + "ce_ib": 40.17991638183594, + "ce_orig": 1.1940970420837402, + "epoch": 0.07937306779782874, + "kl_loss": 130.6182403564453, + "loss_ib": 1.3463622331619263, + "step": 276 + }, + { + "ce_ib": 48.19374465942383, + "ce_orig": 1.0767353773117065, + "epoch": 0.07966065137680639, + "kl_loss": 152.88658142089844, + "loss_ib": 1.5770596265792847, + "step": 277 + }, + { + "ce_ib": 48.65080261230469, + "ce_orig": 1.0481511354446411, + "epoch": 0.07966065137680639, + "kl_loss": 114.82660675048828, + "loss_ib": 1.1969168186187744, + "step": 277 + }, + { + "ce_ib": 39.97406768798828, + "ce_orig": 1.4612183570861816, + "epoch": 0.07966065137680639, + "kl_loss": 131.66680908203125, + "loss_ib": 1.3566421270370483, + "step": 277 + }, + { + "ce_ib": 42.1298828125, + "ce_orig": 1.1980208158493042, + "epoch": 0.07966065137680639, + "kl_loss": 127.7425308227539, + "loss_ib": 1.3195551633834839, + "step": 277 + }, + { + "ce_ib": 40.26860427856445, + "ce_orig": 1.4410648345947266, + "epoch": 0.07994823495578403, + "kl_loss": 95.930908203125, + "loss_ib": 0.9995777010917664, + "step": 278 + }, + { + "ce_ib": 40.977745056152344, + "ce_orig": 1.2464489936828613, + "epoch": 0.07994823495578403, + "kl_loss": 109.65506744384766, + "loss_ib": 1.1375283002853394, + "step": 278 + }, + { + "ce_ib": 42.28449249267578, + "ce_orig": 0.9634944796562195, + "epoch": 0.07994823495578403, + "kl_loss": 126.33438110351562, + "loss_ib": 1.3056282997131348, + "step": 278 + }, + { + "ce_ib": 35.779815673828125, + "ce_orig": 1.0111249685287476, + "epoch": 0.07994823495578403, + "kl_loss": 108.74060821533203, + "loss_ib": 1.1231858730316162, + "step": 278 + }, + { + "ce_ib": 41.58162307739258, + "ce_orig": 0.858116626739502, + "epoch": 0.08023581853476167, + "kl_loss": 94.94490051269531, + "loss_ib": 0.9910306334495544, + "step": 279 + }, + { + "ce_ib": 40.43954086303711, + "ce_orig": 1.5376956462860107, + "epoch": 0.08023581853476167, + "kl_loss": 114.16512298583984, + "loss_ib": 1.1820907592773438, + "step": 279 + }, + { + "ce_ib": 39.77493667602539, + "ce_orig": 1.4295803308486938, + "epoch": 0.08023581853476167, + "kl_loss": 97.94513702392578, + "loss_ib": 1.019226312637329, + "step": 279 + }, + { + "ce_ib": 37.93669509887695, + "ce_orig": 0.9915726780891418, + "epoch": 0.08023581853476167, + "kl_loss": 100.8277359008789, + "loss_ib": 1.046213984489441, + "step": 279 + }, + { + "epoch": 0.0805234021137393, + "grad_norm": 46.49250793457031, + "learning_rate": 8.726114649681529e-06, + "loss": 2.5267, + "step": 280 + }, + { + "ce_ib": 40.85035705566406, + "ce_orig": 0.9079654216766357, + "epoch": 0.0805234021137393, + "kl_loss": 103.09587860107422, + "loss_ib": 1.07180917263031, + "step": 280 + }, + { + "ce_ib": 36.37861633300781, + "ce_orig": 0.7120267152786255, + "epoch": 0.0805234021137393, + "kl_loss": 100.90724182128906, + "loss_ib": 1.045451045036316, + "step": 280 + }, + { + "ce_ib": 41.00496292114258, + "ce_orig": 0.710496187210083, + "epoch": 0.0805234021137393, + "kl_loss": 125.49755096435547, + "loss_ib": 1.295980453491211, + "step": 280 + }, + { + "ce_ib": 41.43547058105469, + "ce_orig": 1.6307661533355713, + "epoch": 0.0805234021137393, + "kl_loss": 75.49640655517578, + "loss_ib": 0.7963995337486267, + "step": 280 + }, + { + "ce_ib": 35.050662994384766, + "ce_orig": 0.9704921245574951, + "epoch": 0.08081098569271694, + "kl_loss": 99.29730224609375, + "loss_ib": 1.028023600578308, + "step": 281 + }, + { + "ce_ib": 37.16071701049805, + "ce_orig": 0.9510385394096375, + "epoch": 0.08081098569271694, + "kl_loss": 84.18745422363281, + "loss_ib": 0.879035234451294, + "step": 281 + }, + { + "ce_ib": 40.97324752807617, + "ce_orig": 0.6980030536651611, + "epoch": 0.08081098569271694, + "kl_loss": 85.10052490234375, + "loss_ib": 0.8919785022735596, + "step": 281 + }, + { + "ce_ib": 37.02272033691406, + "ce_orig": 1.8392256498336792, + "epoch": 0.08081098569271694, + "kl_loss": 110.20343017578125, + "loss_ib": 1.1390570402145386, + "step": 281 + }, + { + "ce_ib": 37.121299743652344, + "ce_orig": 1.2205618619918823, + "epoch": 0.08109856927169459, + "kl_loss": 70.73722839355469, + "loss_ib": 0.7444935441017151, + "step": 282 + }, + { + "ce_ib": 37.17251968383789, + "ce_orig": 1.2131117582321167, + "epoch": 0.08109856927169459, + "kl_loss": 72.53062438964844, + "loss_ib": 0.762478768825531, + "step": 282 + }, + { + "ce_ib": 38.68415832519531, + "ce_orig": 1.2195942401885986, + "epoch": 0.08109856927169459, + "kl_loss": 97.90969848632812, + "loss_ib": 1.017781138420105, + "step": 282 + }, + { + "ce_ib": 44.60282897949219, + "ce_orig": 2.035943031311035, + "epoch": 0.08109856927169459, + "kl_loss": 74.025634765625, + "loss_ib": 0.7848591208457947, + "step": 282 + }, + { + "ce_ib": 32.7514533996582, + "ce_orig": 1.1092407703399658, + "epoch": 0.08138615285067223, + "kl_loss": 64.70214080810547, + "loss_ib": 0.6797728538513184, + "step": 283 + }, + { + "ce_ib": 34.242916107177734, + "ce_orig": 1.563724160194397, + "epoch": 0.08138615285067223, + "kl_loss": 74.12162780761719, + "loss_ib": 0.7754591703414917, + "step": 283 + }, + { + "ce_ib": 39.811283111572266, + "ce_orig": 1.3022565841674805, + "epoch": 0.08138615285067223, + "kl_loss": 79.62696075439453, + "loss_ib": 0.8360808491706848, + "step": 283 + }, + { + "ce_ib": 38.87461853027344, + "ce_orig": 0.7815361618995667, + "epoch": 0.08138615285067223, + "kl_loss": 73.88887023925781, + "loss_ib": 0.7777632474899292, + "step": 283 + }, + { + "ce_ib": 31.66046905517578, + "ce_orig": 1.1096495389938354, + "epoch": 0.08167373642964987, + "kl_loss": 92.97333526611328, + "loss_ib": 0.9613937735557556, + "step": 284 + }, + { + "ce_ib": 36.92252731323242, + "ce_orig": 0.7650741338729858, + "epoch": 0.08167373642964987, + "kl_loss": 67.56904602050781, + "loss_ib": 0.7126129865646362, + "step": 284 + }, + { + "ce_ib": 40.48139953613281, + "ce_orig": 1.4183719158172607, + "epoch": 0.08167373642964987, + "kl_loss": 63.496063232421875, + "loss_ib": 0.6754420399665833, + "step": 284 + }, + { + "ce_ib": 31.517629623413086, + "ce_orig": 0.8108515739440918, + "epoch": 0.08167373642964987, + "kl_loss": 62.563720703125, + "loss_ib": 0.6571548581123352, + "step": 284 + }, + { + "epoch": 0.0819613200086275, + "grad_norm": 30.389598846435547, + "learning_rate": 8.885350318471338e-06, + "loss": 2.0103, + "step": 285 + }, + { + "ce_ib": 32.3922119140625, + "ce_orig": 0.8194482326507568, + "epoch": 0.0819613200086275, + "kl_loss": 72.32354736328125, + "loss_ib": 0.7556276321411133, + "step": 285 + }, + { + "ce_ib": 38.804710388183594, + "ce_orig": 0.9490994811058044, + "epoch": 0.0819613200086275, + "kl_loss": 65.6147689819336, + "loss_ib": 0.6949523687362671, + "step": 285 + }, + { + "ce_ib": 34.912132263183594, + "ce_orig": 1.0493797063827515, + "epoch": 0.0819613200086275, + "kl_loss": 53.544273376464844, + "loss_ib": 0.5703548789024353, + "step": 285 + }, + { + "ce_ib": 32.628414154052734, + "ce_orig": 0.8485954403877258, + "epoch": 0.0819613200086275, + "kl_loss": 102.72671508789062, + "loss_ib": 1.0598955154418945, + "step": 285 + }, + { + "ce_ib": 37.96284866333008, + "ce_orig": 1.6377449035644531, + "epoch": 0.08224890358760514, + "kl_loss": 51.751041412353516, + "loss_ib": 0.555473268032074, + "step": 286 + }, + { + "ce_ib": 37.74256134033203, + "ce_orig": 1.1003116369247437, + "epoch": 0.08224890358760514, + "kl_loss": 77.45193481445312, + "loss_ib": 0.8122618794441223, + "step": 286 + }, + { + "ce_ib": 36.4458122253418, + "ce_orig": 1.4265295267105103, + "epoch": 0.08224890358760514, + "kl_loss": 75.32568359375, + "loss_ib": 0.7897026538848877, + "step": 286 + }, + { + "ce_ib": 38.405860900878906, + "ce_orig": 1.6222838163375854, + "epoch": 0.08224890358760514, + "kl_loss": 59.407928466796875, + "loss_ib": 0.6324851512908936, + "step": 286 + }, + { + "ce_ib": 32.6006965637207, + "ce_orig": 0.6943832635879517, + "epoch": 0.0825364871665828, + "kl_loss": 55.28700256347656, + "loss_ib": 0.5854707360267639, + "step": 287 + }, + { + "ce_ib": 36.58828353881836, + "ce_orig": 1.536011815071106, + "epoch": 0.0825364871665828, + "kl_loss": 66.80679321289062, + "loss_ib": 0.704656183719635, + "step": 287 + }, + { + "ce_ib": 28.62580680847168, + "ce_orig": 0.7646328210830688, + "epoch": 0.0825364871665828, + "kl_loss": 55.685218811035156, + "loss_ib": 0.5854779481887817, + "step": 287 + }, + { + "ce_ib": 33.84457778930664, + "ce_orig": 0.9994617700576782, + "epoch": 0.0825364871665828, + "kl_loss": 58.716033935546875, + "loss_ib": 0.6210048794746399, + "step": 287 + }, + { + "ce_ib": 30.92181968688965, + "ce_orig": 1.1378878355026245, + "epoch": 0.08282407074556043, + "kl_loss": 56.736080169677734, + "loss_ib": 0.5982826352119446, + "step": 288 + }, + { + "ce_ib": 33.0194091796875, + "ce_orig": 0.7808621525764465, + "epoch": 0.08282407074556043, + "kl_loss": 66.98883056640625, + "loss_ib": 0.7029076814651489, + "step": 288 + }, + { + "ce_ib": 34.579010009765625, + "ce_orig": 0.7823261022567749, + "epoch": 0.08282407074556043, + "kl_loss": 51.778160095214844, + "loss_ib": 0.5523605942726135, + "step": 288 + }, + { + "ce_ib": 31.606508255004883, + "ce_orig": 0.74196857213974, + "epoch": 0.08282407074556043, + "kl_loss": 49.30632019042969, + "loss_ib": 0.5246697068214417, + "step": 288 + }, + { + "ce_ib": 34.344478607177734, + "ce_orig": 0.6873656511306763, + "epoch": 0.08311165432453807, + "kl_loss": 53.74017333984375, + "loss_ib": 0.5717462301254272, + "step": 289 + }, + { + "ce_ib": 33.72829818725586, + "ce_orig": 1.6017377376556396, + "epoch": 0.08311165432453807, + "kl_loss": 46.42070770263672, + "loss_ib": 0.49793535470962524, + "step": 289 + }, + { + "ce_ib": 31.348825454711914, + "ce_orig": 1.2451566457748413, + "epoch": 0.08311165432453807, + "kl_loss": 46.207916259765625, + "loss_ib": 0.49342799186706543, + "step": 289 + }, + { + "ce_ib": 31.967754364013672, + "ce_orig": 0.8155576586723328, + "epoch": 0.08311165432453807, + "kl_loss": 50.1954460144043, + "loss_ib": 0.5339221954345703, + "step": 289 + }, + { + "epoch": 0.08339923790351571, + "grad_norm": 17.25715446472168, + "learning_rate": 9.044585987261148e-06, + "loss": 1.716, + "step": 290 + }, + { + "ce_ib": 29.394428253173828, + "ce_orig": 0.6837283968925476, + "epoch": 0.08339923790351571, + "kl_loss": 50.952415466308594, + "loss_ib": 0.5389185547828674, + "step": 290 + }, + { + "ce_ib": 29.528079986572266, + "ce_orig": 0.9319428205490112, + "epoch": 0.08339923790351571, + "kl_loss": 49.36161804199219, + "loss_ib": 0.5231442451477051, + "step": 290 + }, + { + "ce_ib": 31.811763763427734, + "ce_orig": 1.0387821197509766, + "epoch": 0.08339923790351571, + "kl_loss": 46.64958572387695, + "loss_ib": 0.49830758571624756, + "step": 290 + }, + { + "ce_ib": 26.820646286010742, + "ce_orig": 1.016945719718933, + "epoch": 0.08339923790351571, + "kl_loss": 50.506019592285156, + "loss_ib": 0.531880795955658, + "step": 290 + }, + { + "ce_ib": 31.8367919921875, + "ce_orig": 1.025216817855835, + "epoch": 0.08368682148249335, + "kl_loss": 35.0272216796875, + "loss_ib": 0.38210898637771606, + "step": 291 + }, + { + "ce_ib": 33.226322174072266, + "ce_orig": 1.2911221981048584, + "epoch": 0.08368682148249335, + "kl_loss": 58.08740997314453, + "loss_ib": 0.6141003966331482, + "step": 291 + }, + { + "ce_ib": 32.05693435668945, + "ce_orig": 1.126991629600525, + "epoch": 0.08368682148249335, + "kl_loss": 47.8099479675293, + "loss_ib": 0.5101563930511475, + "step": 291 + }, + { + "ce_ib": 31.257158279418945, + "ce_orig": 0.95924311876297, + "epoch": 0.08368682148249335, + "kl_loss": 45.37004089355469, + "loss_ib": 0.48495757579803467, + "step": 291 + }, + { + "ce_ib": 30.047136306762695, + "ce_orig": 1.108436942100525, + "epoch": 0.083974405061471, + "kl_loss": 44.887725830078125, + "loss_ib": 0.47892439365386963, + "step": 292 + }, + { + "ce_ib": 29.945829391479492, + "ce_orig": 1.0467983484268188, + "epoch": 0.083974405061471, + "kl_loss": 35.674888610839844, + "loss_ib": 0.38669469952583313, + "step": 292 + }, + { + "ce_ib": 31.76511001586914, + "ce_orig": 1.2328693866729736, + "epoch": 0.083974405061471, + "kl_loss": 43.979034423828125, + "loss_ib": 0.4715554416179657, + "step": 292 + }, + { + "ce_ib": 32.23439407348633, + "ce_orig": 1.2534009218215942, + "epoch": 0.083974405061471, + "kl_loss": 38.1639518737793, + "loss_ib": 0.4138738811016083, + "step": 292 + }, + { + "ce_ib": 30.920150756835938, + "ce_orig": 0.8429147005081177, + "epoch": 0.08426198864044863, + "kl_loss": 42.72565460205078, + "loss_ib": 0.4581766724586487, + "step": 293 + }, + { + "ce_ib": 29.249128341674805, + "ce_orig": 1.381933331489563, + "epoch": 0.08426198864044863, + "kl_loss": 38.22590255737305, + "loss_ib": 0.41150814294815063, + "step": 293 + }, + { + "ce_ib": 27.40785026550293, + "ce_orig": 0.7817553877830505, + "epoch": 0.08426198864044863, + "kl_loss": 34.802711486816406, + "loss_ib": 0.375434935092926, + "step": 293 + }, + { + "ce_ib": 31.316415786743164, + "ce_orig": 0.8332533836364746, + "epoch": 0.08426198864044863, + "kl_loss": 44.91577911376953, + "loss_ib": 0.48047420382499695, + "step": 293 + }, + { + "ce_ib": 25.795753479003906, + "ce_orig": 1.2377901077270508, + "epoch": 0.08454957221942627, + "kl_loss": 40.89204406738281, + "loss_ib": 0.4347161650657654, + "step": 294 + }, + { + "ce_ib": 32.04143142700195, + "ce_orig": 0.7857619524002075, + "epoch": 0.08454957221942627, + "kl_loss": 36.315128326416016, + "loss_ib": 0.3951927125453949, + "step": 294 + }, + { + "ce_ib": 29.322782516479492, + "ce_orig": 0.8479982614517212, + "epoch": 0.08454957221942627, + "kl_loss": 43.97806167602539, + "loss_ib": 0.4691033959388733, + "step": 294 + }, + { + "ce_ib": 27.92338752746582, + "ce_orig": 0.9372240900993347, + "epoch": 0.08454957221942627, + "kl_loss": 34.369789123535156, + "loss_ib": 0.3716212809085846, + "step": 294 + }, + { + "epoch": 0.08483715579840391, + "grad_norm": 11.152618408203125, + "learning_rate": 9.203821656050957e-06, + "loss": 1.4786, + "step": 295 + }, + { + "ce_ib": 31.50551986694336, + "ce_orig": 0.7309855818748474, + "epoch": 0.08483715579840391, + "kl_loss": 36.99781799316406, + "loss_ib": 0.4014836847782135, + "step": 295 + }, + { + "ce_ib": 28.562597274780273, + "ce_orig": 0.9782903790473938, + "epoch": 0.08483715579840391, + "kl_loss": 40.10491180419922, + "loss_ib": 0.4296116828918457, + "step": 295 + }, + { + "ce_ib": 24.42827796936035, + "ce_orig": 0.8424835205078125, + "epoch": 0.08483715579840391, + "kl_loss": 33.25676727294922, + "loss_ib": 0.35699597001075745, + "step": 295 + }, + { + "ce_ib": 29.361215591430664, + "ce_orig": 0.6136335730552673, + "epoch": 0.08483715579840391, + "kl_loss": 38.58903121948242, + "loss_ib": 0.4152515232563019, + "step": 295 + }, + { + "ce_ib": 26.14788818359375, + "ce_orig": 0.85167396068573, + "epoch": 0.08512473937738155, + "kl_loss": 38.04214096069336, + "loss_ib": 0.4065692722797394, + "step": 296 + }, + { + "ce_ib": 29.764019012451172, + "ce_orig": 1.7308716773986816, + "epoch": 0.08512473937738155, + "kl_loss": 32.98516845703125, + "loss_ib": 0.3596157133579254, + "step": 296 + }, + { + "ce_ib": 30.012575149536133, + "ce_orig": 1.0343323945999146, + "epoch": 0.08512473937738155, + "kl_loss": 38.74031066894531, + "loss_ib": 0.41741567850112915, + "step": 296 + }, + { + "ce_ib": 30.881479263305664, + "ce_orig": 1.7702387571334839, + "epoch": 0.08512473937738155, + "kl_loss": 21.21739959716797, + "loss_ib": 0.24305547773838043, + "step": 296 + }, + { + "ce_ib": 24.717079162597656, + "ce_orig": 0.9549234509468079, + "epoch": 0.08541232295635919, + "kl_loss": 37.45452117919922, + "loss_ib": 0.39926227927207947, + "step": 297 + }, + { + "ce_ib": 26.27513885498047, + "ce_orig": 0.7507061958312988, + "epoch": 0.08541232295635919, + "kl_loss": 34.658172607421875, + "loss_ib": 0.37285685539245605, + "step": 297 + }, + { + "ce_ib": 26.04485321044922, + "ce_orig": 0.6326652765274048, + "epoch": 0.08541232295635919, + "kl_loss": 41.222965240478516, + "loss_ib": 0.4382745027542114, + "step": 297 + }, + { + "ce_ib": 22.960899353027344, + "ce_orig": 0.855808675289154, + "epoch": 0.08541232295635919, + "kl_loss": 32.55647277832031, + "loss_ib": 0.34852561354637146, + "step": 297 + }, + { + "ce_ib": 27.634048461914062, + "ce_orig": 0.8469107747077942, + "epoch": 0.08569990653533684, + "kl_loss": 52.79608917236328, + "loss_ib": 0.5555949211120605, + "step": 298 + }, + { + "ce_ib": 25.107303619384766, + "ce_orig": 1.0334007740020752, + "epoch": 0.08569990653533684, + "kl_loss": 23.502300262451172, + "loss_ib": 0.2601303160190582, + "step": 298 + }, + { + "ce_ib": 26.35793113708496, + "ce_orig": 0.8023892641067505, + "epoch": 0.08569990653533684, + "kl_loss": 33.53641128540039, + "loss_ib": 0.36172202229499817, + "step": 298 + }, + { + "ce_ib": 28.72397804260254, + "ce_orig": 0.8318001627922058, + "epoch": 0.08569990653533684, + "kl_loss": 28.708393096923828, + "loss_ib": 0.31580790877342224, + "step": 298 + }, + { + "ce_ib": 25.58355140686035, + "ce_orig": 0.6520025134086609, + "epoch": 0.08598749011431447, + "kl_loss": 27.046260833740234, + "loss_ib": 0.2960461676120758, + "step": 299 + }, + { + "ce_ib": 27.802156448364258, + "ce_orig": 0.7854000926017761, + "epoch": 0.08598749011431447, + "kl_loss": 28.843101501464844, + "loss_ib": 0.31623315811157227, + "step": 299 + }, + { + "ce_ib": 23.300045013427734, + "ce_orig": 0.8532059192657471, + "epoch": 0.08598749011431447, + "kl_loss": 27.97170639038086, + "loss_ib": 0.3030170798301697, + "step": 299 + }, + { + "ce_ib": 27.139148712158203, + "ce_orig": 0.8574588894844055, + "epoch": 0.08598749011431447, + "kl_loss": 65.27938842773438, + "loss_ib": 0.6799330115318298, + "step": 299 + }, + { + "epoch": 0.08627507369329211, + "grad_norm": 8.516314506530762, + "learning_rate": 9.363057324840765e-06, + "loss": 1.3038, + "step": 300 + }, + { + "ce_ib": 27.859683990478516, + "ce_orig": 0.7980900406837463, + "epoch": 0.08627507369329211, + "kl_loss": 31.5460262298584, + "loss_ib": 0.34331992268562317, + "step": 300 + }, + { + "ce_ib": 26.832685470581055, + "ce_orig": 1.1817421913146973, + "epoch": 0.08627507369329211, + "kl_loss": 36.501564025878906, + "loss_ib": 0.3918483257293701, + "step": 300 + }, + { + "ce_ib": 24.322065353393555, + "ce_orig": 1.163743495941162, + "epoch": 0.08627507369329211, + "kl_loss": 25.613513946533203, + "loss_ib": 0.28045719861984253, + "step": 300 + }, + { + "ce_ib": 23.191028594970703, + "ce_orig": 0.7588955760002136, + "epoch": 0.08627507369329211, + "kl_loss": 25.832067489624023, + "loss_ib": 0.28151169419288635, + "step": 300 + }, + { + "ce_ib": 28.026681900024414, + "ce_orig": 1.1685611009597778, + "epoch": 0.08656265727226975, + "kl_loss": 27.28811264038086, + "loss_ib": 0.30090779066085815, + "step": 301 + }, + { + "ce_ib": 28.903520584106445, + "ce_orig": 0.8756263852119446, + "epoch": 0.08656265727226975, + "kl_loss": 40.801658630371094, + "loss_ib": 0.43692007660865784, + "step": 301 + }, + { + "ce_ib": 32.01344299316406, + "ce_orig": 1.7789306640625, + "epoch": 0.08656265727226975, + "kl_loss": 28.048770904541016, + "loss_ib": 0.31250113248825073, + "step": 301 + }, + { + "ce_ib": 24.778104782104492, + "ce_orig": 1.0734585523605347, + "epoch": 0.08656265727226975, + "kl_loss": 26.787107467651367, + "loss_ib": 0.29264917969703674, + "step": 301 + }, + { + "ce_ib": 26.043542861938477, + "ce_orig": 0.6056478023529053, + "epoch": 0.08685024085124739, + "kl_loss": 22.7061767578125, + "loss_ib": 0.2531053125858307, + "step": 302 + }, + { + "ce_ib": 28.747360229492188, + "ce_orig": 0.8331350684165955, + "epoch": 0.08685024085124739, + "kl_loss": 35.54327392578125, + "loss_ib": 0.38418009877204895, + "step": 302 + }, + { + "ce_ib": 25.376556396484375, + "ce_orig": 0.9154051542282104, + "epoch": 0.08685024085124739, + "kl_loss": 17.989727020263672, + "loss_ib": 0.2052738070487976, + "step": 302 + }, + { + "ce_ib": 28.94438362121582, + "ce_orig": 1.2490043640136719, + "epoch": 0.08685024085124739, + "kl_loss": 38.229942321777344, + "loss_ib": 0.4112437963485718, + "step": 302 + }, + { + "ce_ib": 24.730915069580078, + "ce_orig": 1.1931933164596558, + "epoch": 0.08713782443022504, + "kl_loss": 29.647769927978516, + "loss_ib": 0.3212085962295532, + "step": 303 + }, + { + "ce_ib": 22.95311737060547, + "ce_orig": 1.0858471393585205, + "epoch": 0.08713782443022504, + "kl_loss": 30.164508819580078, + "loss_ib": 0.32459819316864014, + "step": 303 + }, + { + "ce_ib": 25.692777633666992, + "ce_orig": 1.2659823894500732, + "epoch": 0.08713782443022504, + "kl_loss": 45.62352752685547, + "loss_ib": 0.4819280505180359, + "step": 303 + }, + { + "ce_ib": 23.456066131591797, + "ce_orig": 0.6252316236495972, + "epoch": 0.08713782443022504, + "kl_loss": 23.17284393310547, + "loss_ib": 0.25518450140953064, + "step": 303 + }, + { + "ce_ib": 26.05988883972168, + "ce_orig": 1.3680285215377808, + "epoch": 0.08742540800920268, + "kl_loss": 24.49565315246582, + "loss_ib": 0.2710164189338684, + "step": 304 + }, + { + "ce_ib": 25.749126434326172, + "ce_orig": 1.3819550275802612, + "epoch": 0.08742540800920268, + "kl_loss": 28.92790412902832, + "loss_ib": 0.3150281608104706, + "step": 304 + }, + { + "ce_ib": 22.42483139038086, + "ce_orig": 0.5715821981430054, + "epoch": 0.08742540800920268, + "kl_loss": 22.742015838623047, + "loss_ib": 0.2498449832201004, + "step": 304 + }, + { + "ce_ib": 25.59157371520996, + "ce_orig": 1.4033931493759155, + "epoch": 0.08742540800920268, + "kl_loss": 30.259891510009766, + "loss_ib": 0.32819050550460815, + "step": 304 + }, + { + "epoch": 0.08771299158818031, + "grad_norm": 3.7502119541168213, + "learning_rate": 9.522292993630574e-06, + "loss": 1.2559, + "step": 305 + }, + { + "ce_ib": 29.969907760620117, + "ce_orig": 2.0007097721099854, + "epoch": 0.08771299158818031, + "kl_loss": 22.26645278930664, + "loss_ib": 0.2526344358921051, + "step": 305 + }, + { + "ce_ib": 19.34192657470703, + "ce_orig": 0.8707708716392517, + "epoch": 0.08771299158818031, + "kl_loss": 26.55549430847168, + "loss_ib": 0.2848968505859375, + "step": 305 + }, + { + "ce_ib": 21.973365783691406, + "ce_orig": 1.0064218044281006, + "epoch": 0.08771299158818031, + "kl_loss": 22.693504333496094, + "loss_ib": 0.2489084005355835, + "step": 305 + }, + { + "ce_ib": 26.216506958007812, + "ce_orig": 0.8900886178016663, + "epoch": 0.08771299158818031, + "kl_loss": 19.825634002685547, + "loss_ib": 0.22447283565998077, + "step": 305 + }, + { + "ce_ib": 23.15169906616211, + "ce_orig": 1.2219324111938477, + "epoch": 0.08800057516715795, + "kl_loss": 24.46666717529297, + "loss_ib": 0.2678183615207672, + "step": 306 + }, + { + "ce_ib": 22.739940643310547, + "ce_orig": 1.0926791429519653, + "epoch": 0.08800057516715795, + "kl_loss": 27.521900177001953, + "loss_ib": 0.29795894026756287, + "step": 306 + }, + { + "ce_ib": 25.519643783569336, + "ce_orig": 0.8576159477233887, + "epoch": 0.08800057516715795, + "kl_loss": 20.15566062927246, + "loss_ib": 0.2270762324333191, + "step": 306 + }, + { + "ce_ib": 21.95849609375, + "ce_orig": 0.7875012755393982, + "epoch": 0.08800057516715795, + "kl_loss": 23.06599998474121, + "loss_ib": 0.2526184916496277, + "step": 306 + }, + { + "ce_ib": 29.18073272705078, + "ce_orig": 2.031694173812866, + "epoch": 0.08828815874613559, + "kl_loss": 23.33743667602539, + "loss_ib": 0.2625550925731659, + "step": 307 + }, + { + "ce_ib": 21.970809936523438, + "ce_orig": 0.589923620223999, + "epoch": 0.08828815874613559, + "kl_loss": 26.482616424560547, + "loss_ib": 0.2867969572544098, + "step": 307 + }, + { + "ce_ib": 21.679189682006836, + "ce_orig": 0.5775496363639832, + "epoch": 0.08828815874613559, + "kl_loss": 48.13336181640625, + "loss_ib": 0.5030127763748169, + "step": 307 + }, + { + "ce_ib": 25.202064514160156, + "ce_orig": 0.4014778137207031, + "epoch": 0.08828815874613559, + "kl_loss": 27.67257308959961, + "loss_ib": 0.301927775144577, + "step": 307 + }, + { + "ce_ib": 27.295534133911133, + "ce_orig": 1.3986626863479614, + "epoch": 0.08857574232511324, + "kl_loss": 19.44972801208496, + "loss_ib": 0.2217928022146225, + "step": 308 + }, + { + "ce_ib": 26.677940368652344, + "ce_orig": 1.4669982194900513, + "epoch": 0.08857574232511324, + "kl_loss": 22.110187530517578, + "loss_ib": 0.24777980148792267, + "step": 308 + }, + { + "ce_ib": 25.397268295288086, + "ce_orig": 0.7194269299507141, + "epoch": 0.08857574232511324, + "kl_loss": 21.656837463378906, + "loss_ib": 0.2419656366109848, + "step": 308 + }, + { + "ce_ib": 23.615497589111328, + "ce_orig": 0.7230740189552307, + "epoch": 0.08857574232511324, + "kl_loss": 20.54231071472168, + "loss_ib": 0.22903859615325928, + "step": 308 + }, + { + "ce_ib": 25.946504592895508, + "ce_orig": 0.7649667263031006, + "epoch": 0.08886332590409088, + "kl_loss": 19.743709564208984, + "loss_ib": 0.2233835905790329, + "step": 309 + }, + { + "ce_ib": 25.53705596923828, + "ce_orig": 1.3177523612976074, + "epoch": 0.08886332590409088, + "kl_loss": 17.188051223754883, + "loss_ib": 0.19741755723953247, + "step": 309 + }, + { + "ce_ib": 23.944272994995117, + "ce_orig": 1.095251202583313, + "epoch": 0.08886332590409088, + "kl_loss": 32.30261993408203, + "loss_ib": 0.34697046875953674, + "step": 309 + }, + { + "ce_ib": 24.48792266845703, + "ce_orig": 1.1920284032821655, + "epoch": 0.08886332590409088, + "kl_loss": 22.399471282958984, + "loss_ib": 0.2484826296567917, + "step": 309 + }, + { + "epoch": 0.08915090948306852, + "grad_norm": 4.221822738647461, + "learning_rate": 9.681528662420384e-06, + "loss": 1.2441, + "step": 310 + }, + { + "ce_ib": 21.77839469909668, + "ce_orig": 0.7371479272842407, + "epoch": 0.08915090948306852, + "kl_loss": 20.585721969604492, + "loss_ib": 0.22763560712337494, + "step": 310 + }, + { + "ce_ib": 19.997831344604492, + "ce_orig": 0.6613921523094177, + "epoch": 0.08915090948306852, + "kl_loss": 19.700130462646484, + "loss_ib": 0.2169991284608841, + "step": 310 + }, + { + "ce_ib": 26.353721618652344, + "ce_orig": 1.7854292392730713, + "epoch": 0.08915090948306852, + "kl_loss": 17.48261260986328, + "loss_ib": 0.2011798471212387, + "step": 310 + }, + { + "ce_ib": 19.99268913269043, + "ce_orig": 0.6877007484436035, + "epoch": 0.08915090948306852, + "kl_loss": 21.342994689941406, + "loss_ib": 0.2334226369857788, + "step": 310 + }, + { + "ce_ib": 18.89698028564453, + "ce_orig": 0.30386409163475037, + "epoch": 0.08943849306204615, + "kl_loss": 36.01085662841797, + "loss_ib": 0.3790055513381958, + "step": 311 + }, + { + "ce_ib": 22.5959415435791, + "ce_orig": 0.44536206126213074, + "epoch": 0.08943849306204615, + "kl_loss": 23.401079177856445, + "loss_ib": 0.25660672783851624, + "step": 311 + }, + { + "ce_ib": 26.67799186706543, + "ce_orig": 1.3645676374435425, + "epoch": 0.08943849306204615, + "kl_loss": 21.983440399169922, + "loss_ib": 0.24651238322257996, + "step": 311 + }, + { + "ce_ib": 27.179054260253906, + "ce_orig": 1.9106354713439941, + "epoch": 0.08943849306204615, + "kl_loss": 18.935150146484375, + "loss_ib": 0.21653054654598236, + "step": 311 + }, + { + "ce_ib": 21.86924934387207, + "ce_orig": 0.520060658454895, + "epoch": 0.08972607664102379, + "kl_loss": 16.354835510253906, + "loss_ib": 0.18541759252548218, + "step": 312 + }, + { + "ce_ib": 21.63800621032715, + "ce_orig": 1.0606663227081299, + "epoch": 0.08972607664102379, + "kl_loss": 43.62150573730469, + "loss_ib": 0.4578530490398407, + "step": 312 + }, + { + "ce_ib": 23.62603187561035, + "ce_orig": 1.3403759002685547, + "epoch": 0.08972607664102379, + "kl_loss": 19.3665771484375, + "loss_ib": 0.21729178726673126, + "step": 312 + }, + { + "ce_ib": 19.89155387878418, + "ce_orig": 0.7725546956062317, + "epoch": 0.08972607664102379, + "kl_loss": 18.36844253540039, + "loss_ib": 0.2035759687423706, + "step": 312 + }, + { + "ce_ib": 22.370769500732422, + "ce_orig": 0.6480394601821899, + "epoch": 0.09001366022000144, + "kl_loss": 17.688186645507812, + "loss_ib": 0.1992526352405548, + "step": 313 + }, + { + "ce_ib": 24.45700454711914, + "ce_orig": 0.5826147198677063, + "epoch": 0.09001366022000144, + "kl_loss": 18.841575622558594, + "loss_ib": 0.21287274360656738, + "step": 313 + }, + { + "ce_ib": 25.685205459594727, + "ce_orig": 1.5794010162353516, + "epoch": 0.09001366022000144, + "kl_loss": 16.19678497314453, + "loss_ib": 0.18765303492546082, + "step": 313 + }, + { + "ce_ib": 25.032880783081055, + "ce_orig": 1.691644549369812, + "epoch": 0.09001366022000144, + "kl_loss": 16.912708282470703, + "loss_ib": 0.19415995478630066, + "step": 313 + }, + { + "ce_ib": 19.637710571289062, + "ce_orig": 0.5982837080955505, + "epoch": 0.09030124379897908, + "kl_loss": 16.402633666992188, + "loss_ib": 0.18366405367851257, + "step": 314 + }, + { + "ce_ib": 22.416698455810547, + "ce_orig": 0.7960580587387085, + "epoch": 0.09030124379897908, + "kl_loss": 21.684656143188477, + "loss_ib": 0.23926326632499695, + "step": 314 + }, + { + "ce_ib": 24.144296646118164, + "ce_orig": 1.1491901874542236, + "epoch": 0.09030124379897908, + "kl_loss": 16.15469741821289, + "loss_ib": 0.1856912523508072, + "step": 314 + }, + { + "ce_ib": 22.414451599121094, + "ce_orig": 0.6498157382011414, + "epoch": 0.09030124379897908, + "kl_loss": 15.109755516052246, + "loss_ib": 0.17351199686527252, + "step": 314 + }, + { + "epoch": 0.09058882737795672, + "grad_norm": 1.5902462005615234, + "learning_rate": 9.840764331210191e-06, + "loss": 1.2124, + "step": 315 + }, + { + "ce_ib": 26.398815155029297, + "ce_orig": 1.2464390993118286, + "epoch": 0.09058882737795672, + "kl_loss": 15.754015922546387, + "loss_ib": 0.18393898010253906, + "step": 315 + }, + { + "ce_ib": 19.097566604614258, + "ce_orig": 0.7237119674682617, + "epoch": 0.09058882737795672, + "kl_loss": 21.74990463256836, + "loss_ib": 0.23659659922122955, + "step": 315 + }, + { + "ce_ib": 15.259860038757324, + "ce_orig": 0.2898668348789215, + "epoch": 0.09058882737795672, + "kl_loss": 29.279403686523438, + "loss_ib": 0.3080538809299469, + "step": 315 + }, + { + "ce_ib": 19.407550811767578, + "ce_orig": 0.8700235486030579, + "epoch": 0.09058882737795672, + "kl_loss": 19.4202938079834, + "loss_ib": 0.2136104851961136, + "step": 315 + }, + { + "ce_ib": 18.41707992553711, + "ce_orig": 0.8072125911712646, + "epoch": 0.09087641095693436, + "kl_loss": 20.386289596557617, + "loss_ib": 0.22227996587753296, + "step": 316 + }, + { + "ce_ib": 23.172889709472656, + "ce_orig": 0.4192945659160614, + "epoch": 0.09087641095693436, + "kl_loss": 16.535175323486328, + "loss_ib": 0.18852464854717255, + "step": 316 + }, + { + "ce_ib": 25.58465003967285, + "ce_orig": 0.7333693504333496, + "epoch": 0.09087641095693436, + "kl_loss": 16.984933853149414, + "loss_ib": 0.19543398916721344, + "step": 316 + }, + { + "ce_ib": 23.119829177856445, + "ce_orig": 1.2351598739624023, + "epoch": 0.09087641095693436, + "kl_loss": 17.573955535888672, + "loss_ib": 0.19885937869548798, + "step": 316 + }, + { + "ce_ib": 20.77093505859375, + "ce_orig": 0.638796865940094, + "epoch": 0.091163994535912, + "kl_loss": 13.646297454833984, + "loss_ib": 0.15723390877246857, + "step": 317 + }, + { + "ce_ib": 22.370588302612305, + "ce_orig": 0.7030758857727051, + "epoch": 0.091163994535912, + "kl_loss": 19.98332977294922, + "loss_ib": 0.22220388054847717, + "step": 317 + }, + { + "ce_ib": 23.507246017456055, + "ce_orig": 0.9605042934417725, + "epoch": 0.091163994535912, + "kl_loss": 17.364158630371094, + "loss_ib": 0.19714882969856262, + "step": 317 + }, + { + "ce_ib": 25.993309020996094, + "ce_orig": 1.3534818887710571, + "epoch": 0.091163994535912, + "kl_loss": 15.692447662353516, + "loss_ib": 0.1829177886247635, + "step": 317 + }, + { + "ce_ib": 26.447153091430664, + "ce_orig": 0.6662286520004272, + "epoch": 0.09145157811488965, + "kl_loss": 19.282215118408203, + "loss_ib": 0.2192692905664444, + "step": 318 + }, + { + "ce_ib": 21.66655158996582, + "ce_orig": 0.5830262899398804, + "epoch": 0.09145157811488965, + "kl_loss": 16.306236267089844, + "loss_ib": 0.18472890555858612, + "step": 318 + }, + { + "ce_ib": 25.510692596435547, + "ce_orig": 1.2415066957473755, + "epoch": 0.09145157811488965, + "kl_loss": 15.806875228881836, + "loss_ib": 0.1835794448852539, + "step": 318 + }, + { + "ce_ib": 19.910005569458008, + "ce_orig": 0.7447091341018677, + "epoch": 0.09145157811488965, + "kl_loss": 17.13539695739746, + "loss_ib": 0.19126397371292114, + "step": 318 + }, + { + "ce_ib": 24.191679000854492, + "ce_orig": 1.3691377639770508, + "epoch": 0.09173916169386728, + "kl_loss": 14.619853973388672, + "loss_ib": 0.17039021849632263, + "step": 319 + }, + { + "ce_ib": 24.796480178833008, + "ce_orig": 1.0005704164505005, + "epoch": 0.09173916169386728, + "kl_loss": 23.08795738220215, + "loss_ib": 0.2556760609149933, + "step": 319 + }, + { + "ce_ib": 18.99901580810547, + "ce_orig": 0.8472815155982971, + "epoch": 0.09173916169386728, + "kl_loss": 16.455188751220703, + "loss_ib": 0.1835509091615677, + "step": 319 + }, + { + "ce_ib": 23.486665725708008, + "ce_orig": 0.9926466345787048, + "epoch": 0.09173916169386728, + "kl_loss": 13.656463623046875, + "loss_ib": 0.16005130112171173, + "step": 319 + }, + { + "epoch": 0.09202674527284492, + "grad_norm": 1.6402511596679688, + "learning_rate": 1e-05, + "loss": 1.138, + "step": 320 + }, + { + "ce_ib": 17.978233337402344, + "ce_orig": 0.7672592401504517, + "epoch": 0.09202674527284492, + "kl_loss": 16.61567497253418, + "loss_ib": 0.18413497507572174, + "step": 320 + }, + { + "ce_ib": 22.24078369140625, + "ce_orig": 1.269389271736145, + "epoch": 0.09202674527284492, + "kl_loss": 13.635717391967773, + "loss_ib": 0.1585979461669922, + "step": 320 + }, + { + "ce_ib": 24.34579086303711, + "ce_orig": 0.9150453209877014, + "epoch": 0.09202674527284492, + "kl_loss": 17.23645782470703, + "loss_ib": 0.19671037793159485, + "step": 320 + }, + { + "ce_ib": 20.447195053100586, + "ce_orig": 0.7540422081947327, + "epoch": 0.09202674527284492, + "kl_loss": 16.120819091796875, + "loss_ib": 0.1816553771495819, + "step": 320 + }, + { + "ce_ib": 20.8408145904541, + "ce_orig": 0.6497412919998169, + "epoch": 0.09231432885182256, + "kl_loss": 13.179418563842773, + "loss_ib": 0.15263499319553375, + "step": 321 + }, + { + "ce_ib": 18.48600196838379, + "ce_orig": 0.9051377773284912, + "epoch": 0.09231432885182256, + "kl_loss": 13.9927339553833, + "loss_ib": 0.1584133356809616, + "step": 321 + }, + { + "ce_ib": 22.45148277282715, + "ce_orig": 1.3368862867355347, + "epoch": 0.09231432885182256, + "kl_loss": 13.805723190307617, + "loss_ib": 0.16050870716571808, + "step": 321 + }, + { + "ce_ib": 22.361835479736328, + "ce_orig": 0.4947150647640228, + "epoch": 0.09231432885182256, + "kl_loss": 13.823465347290039, + "loss_ib": 0.16059647500514984, + "step": 321 + }, + { + "ce_ib": 27.675626754760742, + "ce_orig": 1.5604900121688843, + "epoch": 0.0926019124308002, + "kl_loss": 12.6363525390625, + "loss_ib": 0.1540391445159912, + "step": 322 + }, + { + "ce_ib": 21.165691375732422, + "ce_orig": 0.9618255496025085, + "epoch": 0.0926019124308002, + "kl_loss": 18.356464385986328, + "loss_ib": 0.2047303318977356, + "step": 322 + }, + { + "ce_ib": 17.5034122467041, + "ce_orig": 0.6583025455474854, + "epoch": 0.0926019124308002, + "kl_loss": 14.965330123901367, + "loss_ib": 0.16715671122074127, + "step": 322 + }, + { + "ce_ib": 19.85868263244629, + "ce_orig": 0.8282234072685242, + "epoch": 0.0926019124308002, + "kl_loss": 14.633644104003906, + "loss_ib": 0.1661951243877411, + "step": 322 + }, + { + "ce_ib": 18.45901870727539, + "ce_orig": 0.7398732304573059, + "epoch": 0.09288949600977785, + "kl_loss": 15.228702545166016, + "loss_ib": 0.17074604332447052, + "step": 323 + }, + { + "ce_ib": 21.03128433227539, + "ce_orig": 0.8697280883789062, + "epoch": 0.09288949600977785, + "kl_loss": 14.359245300292969, + "loss_ib": 0.16462373733520508, + "step": 323 + }, + { + "ce_ib": 18.499732971191406, + "ce_orig": 0.5559062361717224, + "epoch": 0.09288949600977785, + "kl_loss": 14.474294662475586, + "loss_ib": 0.16324268281459808, + "step": 323 + }, + { + "ce_ib": 17.314205169677734, + "ce_orig": 0.5949759483337402, + "epoch": 0.09288949600977785, + "kl_loss": 12.18869400024414, + "loss_ib": 0.13920114934444427, + "step": 323 + }, + { + "ce_ib": 22.12590789794922, + "ce_orig": 1.2088953256607056, + "epoch": 0.09317707958875548, + "kl_loss": 12.109560012817383, + "loss_ib": 0.14322151243686676, + "step": 324 + }, + { + "ce_ib": 19.701847076416016, + "ce_orig": 0.6696236729621887, + "epoch": 0.09317707958875548, + "kl_loss": 14.687576293945312, + "loss_ib": 0.16657760739326477, + "step": 324 + }, + { + "ce_ib": 22.29142951965332, + "ce_orig": 1.1997344493865967, + "epoch": 0.09317707958875548, + "kl_loss": 12.63182258605957, + "loss_ib": 0.14860965311527252, + "step": 324 + }, + { + "ce_ib": 20.73260498046875, + "ce_orig": 0.9823715090751648, + "epoch": 0.09317707958875548, + "kl_loss": 12.120622634887695, + "loss_ib": 0.14193882048130035, + "step": 324 + }, + { + "epoch": 0.09346466316773312, + "grad_norm": 1.5406866073608398, + "learning_rate": 9.999993976919739e-06, + "loss": 1.1078, + "step": 325 + }, + { + "ce_ib": 17.106014251708984, + "ce_orig": 0.9145632982254028, + "epoch": 0.09346466316773312, + "kl_loss": 14.363018989562988, + "loss_ib": 0.16073618829250336, + "step": 325 + }, + { + "ce_ib": 19.175251007080078, + "ce_orig": 0.7700109481811523, + "epoch": 0.09346466316773312, + "kl_loss": 14.140527725219727, + "loss_ib": 0.16058051586151123, + "step": 325 + }, + { + "ce_ib": 15.837416648864746, + "ce_orig": 0.772528886795044, + "epoch": 0.09346466316773312, + "kl_loss": 12.842889785766602, + "loss_ib": 0.14426632225513458, + "step": 325 + }, + { + "ce_ib": 15.72890853881836, + "ce_orig": 0.7171897888183594, + "epoch": 0.09346466316773312, + "kl_loss": 11.653585433959961, + "loss_ib": 0.13226476311683655, + "step": 325 + }, + { + "ce_ib": 26.823095321655273, + "ce_orig": 1.9181299209594727, + "epoch": 0.09375224674671076, + "kl_loss": 11.220107078552246, + "loss_ib": 0.13902415335178375, + "step": 326 + }, + { + "ce_ib": 19.747053146362305, + "ce_orig": 0.9309285283088684, + "epoch": 0.09375224674671076, + "kl_loss": 10.759939193725586, + "loss_ib": 0.1273464411497116, + "step": 326 + }, + { + "ce_ib": 21.19155502319336, + "ce_orig": 1.635532259941101, + "epoch": 0.09375224674671076, + "kl_loss": 12.692992210388184, + "loss_ib": 0.14812147617340088, + "step": 326 + }, + { + "ce_ib": 21.15056037902832, + "ce_orig": 0.7623363137245178, + "epoch": 0.09375224674671076, + "kl_loss": 13.625322341918945, + "loss_ib": 0.15740378201007843, + "step": 326 + }, + { + "ce_ib": 16.852680206298828, + "ce_orig": 0.7469913363456726, + "epoch": 0.0940398303256884, + "kl_loss": 10.560868263244629, + "loss_ib": 0.12246136367321014, + "step": 327 + }, + { + "ce_ib": 16.11965560913086, + "ce_orig": 0.752680242061615, + "epoch": 0.0940398303256884, + "kl_loss": 12.737372398376465, + "loss_ib": 0.14349336922168732, + "step": 327 + }, + { + "ce_ib": 18.678325653076172, + "ce_orig": 0.8419510722160339, + "epoch": 0.0940398303256884, + "kl_loss": 12.982912063598633, + "loss_ib": 0.14850744605064392, + "step": 327 + }, + { + "ce_ib": 20.507835388183594, + "ce_orig": 0.933698296546936, + "epoch": 0.0940398303256884, + "kl_loss": 11.535351753234863, + "loss_ib": 0.1358613520860672, + "step": 327 + }, + { + "ce_ib": 16.491111755371094, + "ce_orig": 0.4157365560531616, + "epoch": 0.09432741390466605, + "kl_loss": 11.347173690795898, + "loss_ib": 0.12996284663677216, + "step": 328 + }, + { + "ce_ib": 20.290693283081055, + "ce_orig": 0.6378398537635803, + "epoch": 0.09432741390466605, + "kl_loss": 10.955358505249023, + "loss_ib": 0.1298442780971527, + "step": 328 + }, + { + "ce_ib": 20.74325942993164, + "ce_orig": 0.9873928427696228, + "epoch": 0.09432741390466605, + "kl_loss": 11.168992042541504, + "loss_ib": 0.13243317604064941, + "step": 328 + }, + { + "ce_ib": 22.277246475219727, + "ce_orig": 1.0927495956420898, + "epoch": 0.09432741390466605, + "kl_loss": 14.169788360595703, + "loss_ib": 0.16397511959075928, + "step": 328 + }, + { + "ce_ib": 23.106748580932617, + "ce_orig": 1.1934113502502441, + "epoch": 0.09461499748364369, + "kl_loss": 11.351577758789062, + "loss_ib": 0.13662251830101013, + "step": 329 + }, + { + "ce_ib": 19.171072006225586, + "ce_orig": 0.9011801481246948, + "epoch": 0.09461499748364369, + "kl_loss": 11.967233657836914, + "loss_ib": 0.13884340226650238, + "step": 329 + }, + { + "ce_ib": 19.95760726928711, + "ce_orig": 0.6395582556724548, + "epoch": 0.09461499748364369, + "kl_loss": 11.200337409973145, + "loss_ib": 0.13196097314357758, + "step": 329 + }, + { + "ce_ib": 20.071157455444336, + "ce_orig": 0.9473389983177185, + "epoch": 0.09461499748364369, + "kl_loss": 14.839773178100586, + "loss_ib": 0.1684688925743103, + "step": 329 + }, + { + "epoch": 0.09490258106262132, + "grad_norm": 0.9824780821800232, + "learning_rate": 9.999975907693462e-06, + "loss": 1.1087, + "step": 330 + }, + { + "ce_ib": 20.931066513061523, + "ce_orig": 1.4924328327178955, + "epoch": 0.09490258106262132, + "kl_loss": 10.959052085876465, + "loss_ib": 0.13052158057689667, + "step": 330 + }, + { + "ce_ib": 23.309009552001953, + "ce_orig": 0.8217906951904297, + "epoch": 0.09490258106262132, + "kl_loss": 13.162786483764648, + "loss_ib": 0.15493687987327576, + "step": 330 + }, + { + "ce_ib": 19.14462661743164, + "ce_orig": 0.7224079966545105, + "epoch": 0.09490258106262132, + "kl_loss": 15.73659610748291, + "loss_ib": 0.17651057243347168, + "step": 330 + }, + { + "ce_ib": 16.93087387084961, + "ce_orig": 0.3897332549095154, + "epoch": 0.09490258106262132, + "kl_loss": 9.802057266235352, + "loss_ib": 0.11495144665241241, + "step": 330 + }, + { + "ce_ib": 20.574167251586914, + "ce_orig": 0.7844420075416565, + "epoch": 0.09519016464159896, + "kl_loss": 12.472640991210938, + "loss_ib": 0.14530058205127716, + "step": 331 + }, + { + "ce_ib": 21.064104080200195, + "ce_orig": 0.8571724891662598, + "epoch": 0.09519016464159896, + "kl_loss": 12.077247619628906, + "loss_ib": 0.14183658361434937, + "step": 331 + }, + { + "ce_ib": 19.205732345581055, + "ce_orig": 0.591139554977417, + "epoch": 0.09519016464159896, + "kl_loss": 10.037530899047852, + "loss_ib": 0.11958103626966476, + "step": 331 + }, + { + "ce_ib": 19.454252243041992, + "ce_orig": 0.9017695188522339, + "epoch": 0.09519016464159896, + "kl_loss": 10.572440147399902, + "loss_ib": 0.12517865002155304, + "step": 331 + }, + { + "ce_ib": 15.203242301940918, + "ce_orig": 0.5361948013305664, + "epoch": 0.0954777482205766, + "kl_loss": 10.661357879638672, + "loss_ib": 0.12181682139635086, + "step": 332 + }, + { + "ce_ib": 21.122093200683594, + "ce_orig": 1.4699267148971558, + "epoch": 0.0954777482205766, + "kl_loss": 9.819560050964355, + "loss_ib": 0.1193176880478859, + "step": 332 + }, + { + "ce_ib": 19.769392013549805, + "ce_orig": 0.5775290131568909, + "epoch": 0.0954777482205766, + "kl_loss": 9.717262268066406, + "loss_ib": 0.11694201081991196, + "step": 332 + }, + { + "ce_ib": 18.72998046875, + "ce_orig": 0.5287953615188599, + "epoch": 0.0954777482205766, + "kl_loss": 13.474811553955078, + "loss_ib": 0.15347810089588165, + "step": 332 + }, + { + "ce_ib": 14.733593940734863, + "ce_orig": 0.4213125705718994, + "epoch": 0.09576533179955425, + "kl_loss": 11.17019271850586, + "loss_ib": 0.1264355182647705, + "step": 333 + }, + { + "ce_ib": 21.969772338867188, + "ce_orig": 1.0359489917755127, + "epoch": 0.09576533179955425, + "kl_loss": 12.137819290161133, + "loss_ib": 0.14334796369075775, + "step": 333 + }, + { + "ce_ib": 19.76876449584961, + "ce_orig": 0.8385518193244934, + "epoch": 0.09576533179955425, + "kl_loss": 12.675048828125, + "loss_ib": 0.1465192437171936, + "step": 333 + }, + { + "ce_ib": 16.6859188079834, + "ce_orig": 0.7033459544181824, + "epoch": 0.09576533179955425, + "kl_loss": 11.24110221862793, + "loss_ib": 0.12909694015979767, + "step": 333 + }, + { + "ce_ib": 18.942955017089844, + "ce_orig": 0.6511563062667847, + "epoch": 0.09605291537853189, + "kl_loss": 12.197677612304688, + "loss_ib": 0.1409197300672531, + "step": 334 + }, + { + "ce_ib": 12.525162696838379, + "ce_orig": 0.2835647463798523, + "epoch": 0.09605291537853189, + "kl_loss": 8.74501895904541, + "loss_ib": 0.0999753549695015, + "step": 334 + }, + { + "ce_ib": 19.1585693359375, + "ce_orig": 0.5772603750228882, + "epoch": 0.09605291537853189, + "kl_loss": 11.537700653076172, + "loss_ib": 0.13453558087348938, + "step": 334 + }, + { + "ce_ib": 18.65268898010254, + "ce_orig": 0.6586172580718994, + "epoch": 0.09605291537853189, + "kl_loss": 9.829732894897461, + "loss_ib": 0.11695001274347305, + "step": 334 + }, + { + "epoch": 0.09634049895750953, + "grad_norm": 1.5527466535568237, + "learning_rate": 9.999945792364704e-06, + "loss": 1.0047, + "step": 335 + }, + { + "ce_ib": 18.45952796936035, + "ce_orig": 0.9835706949234009, + "epoch": 0.09634049895750953, + "kl_loss": 11.043167114257812, + "loss_ib": 0.1288911998271942, + "step": 335 + }, + { + "ce_ib": 19.220260620117188, + "ce_orig": 1.0070465803146362, + "epoch": 0.09634049895750953, + "kl_loss": 9.694038391113281, + "loss_ib": 0.11616063863039017, + "step": 335 + }, + { + "ce_ib": 20.90534019470215, + "ce_orig": 0.7721905708312988, + "epoch": 0.09634049895750953, + "kl_loss": 10.956405639648438, + "loss_ib": 0.1304693967103958, + "step": 335 + }, + { + "ce_ib": 22.549360275268555, + "ce_orig": 0.7169628143310547, + "epoch": 0.09634049895750953, + "kl_loss": 10.45715618133545, + "loss_ib": 0.12712092697620392, + "step": 335 + }, + { + "ce_ib": 19.706451416015625, + "ce_orig": 1.2544941902160645, + "epoch": 0.09662808253648716, + "kl_loss": 5.737251281738281, + "loss_ib": 0.07707896083593369, + "step": 336 + }, + { + "ce_ib": 24.045684814453125, + "ce_orig": 1.628864049911499, + "epoch": 0.09662808253648716, + "kl_loss": 9.666478157043457, + "loss_ib": 0.12071046233177185, + "step": 336 + }, + { + "ce_ib": 22.39566993713379, + "ce_orig": 1.3797554969787598, + "epoch": 0.09662808253648716, + "kl_loss": 9.557376861572266, + "loss_ib": 0.11796943098306656, + "step": 336 + }, + { + "ce_ib": 16.502885818481445, + "ce_orig": 0.678697407245636, + "epoch": 0.09662808253648716, + "kl_loss": 12.349996566772461, + "loss_ib": 0.14000284671783447, + "step": 336 + }, + { + "ce_ib": 21.189701080322266, + "ce_orig": 1.7581653594970703, + "epoch": 0.0969156661154648, + "kl_loss": 11.109577178955078, + "loss_ib": 0.1322854608297348, + "step": 337 + }, + { + "ce_ib": 20.682483673095703, + "ce_orig": 1.0360445976257324, + "epoch": 0.0969156661154648, + "kl_loss": 9.650278091430664, + "loss_ib": 0.11718526482582092, + "step": 337 + }, + { + "ce_ib": 23.174293518066406, + "ce_orig": 0.9172191619873047, + "epoch": 0.0969156661154648, + "kl_loss": 10.747881889343262, + "loss_ib": 0.13065311312675476, + "step": 337 + }, + { + "ce_ib": 20.76695442199707, + "ce_orig": 0.5460869073867798, + "epoch": 0.0969156661154648, + "kl_loss": 11.630158424377441, + "loss_ib": 0.13706853985786438, + "step": 337 + }, + { + "ce_ib": 23.635868072509766, + "ce_orig": 1.7053964138031006, + "epoch": 0.09720324969444245, + "kl_loss": 9.801689147949219, + "loss_ib": 0.121652752161026, + "step": 338 + }, + { + "ce_ib": 19.619415283203125, + "ce_orig": 0.8643050193786621, + "epoch": 0.09720324969444245, + "kl_loss": 9.933218002319336, + "loss_ib": 0.11895159631967545, + "step": 338 + }, + { + "ce_ib": 21.83019256591797, + "ce_orig": 0.8322968482971191, + "epoch": 0.09720324969444245, + "kl_loss": 10.368824005126953, + "loss_ib": 0.12551842629909515, + "step": 338 + }, + { + "ce_ib": 18.191864013671875, + "ce_orig": 0.4908624589443207, + "epoch": 0.09720324969444245, + "kl_loss": 11.974782943725586, + "loss_ib": 0.1379396915435791, + "step": 338 + }, + { + "ce_ib": 20.153644561767578, + "ce_orig": 0.5820345282554626, + "epoch": 0.09749083327342009, + "kl_loss": 9.271571159362793, + "loss_ib": 0.11286935210227966, + "step": 339 + }, + { + "ce_ib": 16.755735397338867, + "ce_orig": 0.8004245758056641, + "epoch": 0.09749083327342009, + "kl_loss": 9.602378845214844, + "loss_ib": 0.11277952045202255, + "step": 339 + }, + { + "ce_ib": 21.61349868774414, + "ce_orig": 1.3253728151321411, + "epoch": 0.09749083327342009, + "kl_loss": 9.877376556396484, + "loss_ib": 0.12038726359605789, + "step": 339 + }, + { + "ce_ib": 23.785110473632812, + "ce_orig": 1.1449768543243408, + "epoch": 0.09749083327342009, + "kl_loss": 9.455681800842285, + "loss_ib": 0.11834193021059036, + "step": 339 + }, + { + "epoch": 0.09777841685239773, + "grad_norm": 0.7407243251800537, + "learning_rate": 9.999903631006022e-06, + "loss": 1.0521, + "step": 340 + }, + { + "ce_ib": 10.92531967163086, + "ce_orig": 0.2659711241722107, + "epoch": 0.09777841685239773, + "kl_loss": 7.5069732666015625, + "loss_ib": 0.08599505573511124, + "step": 340 + }, + { + "ce_ib": 19.90782356262207, + "ce_orig": 0.5355432033538818, + "epoch": 0.09777841685239773, + "kl_loss": 11.578774452209473, + "loss_ib": 0.13569556176662445, + "step": 340 + }, + { + "ce_ib": 15.142422676086426, + "ce_orig": 0.7282365560531616, + "epoch": 0.09777841685239773, + "kl_loss": 10.324485778808594, + "loss_ib": 0.11838727444410324, + "step": 340 + }, + { + "ce_ib": 20.629169464111328, + "ce_orig": 0.9958592057228088, + "epoch": 0.09777841685239773, + "kl_loss": 9.883302688598633, + "loss_ib": 0.11946219205856323, + "step": 340 + }, + { + "ce_ib": 18.038537979125977, + "ce_orig": 0.692686140537262, + "epoch": 0.09806600043137537, + "kl_loss": 9.38126277923584, + "loss_ib": 0.11185116320848465, + "step": 341 + }, + { + "ce_ib": 20.98015022277832, + "ce_orig": 0.5099575519561768, + "epoch": 0.09806600043137537, + "kl_loss": 9.678869247436523, + "loss_ib": 0.11776883900165558, + "step": 341 + }, + { + "ce_ib": 17.243499755859375, + "ce_orig": 0.692179799079895, + "epoch": 0.09806600043137537, + "kl_loss": 9.476663589477539, + "loss_ib": 0.11201013624668121, + "step": 341 + }, + { + "ce_ib": 25.067062377929688, + "ce_orig": 0.792171835899353, + "epoch": 0.09806600043137537, + "kl_loss": 10.635600090026855, + "loss_ib": 0.13142305612564087, + "step": 341 + }, + { + "ce_ib": 15.628839492797852, + "ce_orig": 0.7891074419021606, + "epoch": 0.098353584010353, + "kl_loss": 10.068859100341797, + "loss_ib": 0.11631742864847183, + "step": 342 + }, + { + "ce_ib": 18.483537673950195, + "ce_orig": 0.9051138758659363, + "epoch": 0.098353584010353, + "kl_loss": 10.133465766906738, + "loss_ib": 0.11981818825006485, + "step": 342 + }, + { + "ce_ib": 16.36567497253418, + "ce_orig": 0.6817749738693237, + "epoch": 0.098353584010353, + "kl_loss": 9.654040336608887, + "loss_ib": 0.1129060685634613, + "step": 342 + }, + { + "ce_ib": 20.89409828186035, + "ce_orig": 1.1035248041152954, + "epoch": 0.098353584010353, + "kl_loss": 10.291540145874023, + "loss_ib": 0.12380949407815933, + "step": 342 + }, + { + "ce_ib": 22.037933349609375, + "ce_orig": 1.694153904914856, + "epoch": 0.09864116758933066, + "kl_loss": 9.139327049255371, + "loss_ib": 0.11343120038509369, + "step": 343 + }, + { + "ce_ib": 21.519075393676758, + "ce_orig": 1.4431602954864502, + "epoch": 0.09864116758933066, + "kl_loss": 9.308493614196777, + "loss_ib": 0.11460401117801666, + "step": 343 + }, + { + "ce_ib": 15.5767240524292, + "ce_orig": 0.7400184273719788, + "epoch": 0.09864116758933066, + "kl_loss": 9.34182357788086, + "loss_ib": 0.10899496078491211, + "step": 343 + }, + { + "ce_ib": 14.478399276733398, + "ce_orig": 0.4603125751018524, + "epoch": 0.09864116758933066, + "kl_loss": 9.755362510681152, + "loss_ib": 0.11203201860189438, + "step": 343 + }, + { + "ce_ib": 17.020166397094727, + "ce_orig": 0.5673547387123108, + "epoch": 0.0989287511683083, + "kl_loss": 11.05903148651123, + "loss_ib": 0.1276104748249054, + "step": 344 + }, + { + "ce_ib": 17.05959701538086, + "ce_orig": 0.7991743683815002, + "epoch": 0.0989287511683083, + "kl_loss": 9.682877540588379, + "loss_ib": 0.11388836801052094, + "step": 344 + }, + { + "ce_ib": 23.936145782470703, + "ce_orig": 1.328629732131958, + "epoch": 0.0989287511683083, + "kl_loss": 10.604333877563477, + "loss_ib": 0.1299794763326645, + "step": 344 + }, + { + "ce_ib": 22.645984649658203, + "ce_orig": 1.3614290952682495, + "epoch": 0.0989287511683083, + "kl_loss": 9.37100601196289, + "loss_ib": 0.11635604500770569, + "step": 344 + }, + { + "epoch": 0.09921633474728593, + "grad_norm": 1.2907381057739258, + "learning_rate": 9.99984942371899e-06, + "loss": 1.0005, + "step": 345 + }, + { + "ce_ib": 19.495277404785156, + "ce_orig": 1.2335152626037598, + "epoch": 0.09921633474728593, + "kl_loss": 10.051846504211426, + "loss_ib": 0.12001373618841171, + "step": 345 + }, + { + "ce_ib": 16.080678939819336, + "ce_orig": 0.7765376567840576, + "epoch": 0.09921633474728593, + "kl_loss": 9.520978927612305, + "loss_ib": 0.11129046231508255, + "step": 345 + }, + { + "ce_ib": 17.06264305114746, + "ce_orig": 0.6252941489219666, + "epoch": 0.09921633474728593, + "kl_loss": 9.93945026397705, + "loss_ib": 0.11645714193582535, + "step": 345 + }, + { + "ce_ib": 18.10491371154785, + "ce_orig": 0.9270275831222534, + "epoch": 0.09921633474728593, + "kl_loss": 9.839916229248047, + "loss_ib": 0.11650407314300537, + "step": 345 + }, + { + "ce_ib": 21.288270950317383, + "ce_orig": 1.8509690761566162, + "epoch": 0.09950391832626357, + "kl_loss": 9.440530776977539, + "loss_ib": 0.1156935766339302, + "step": 346 + }, + { + "ce_ib": 16.040321350097656, + "ce_orig": 0.9570891261100769, + "epoch": 0.09950391832626357, + "kl_loss": 10.13039493560791, + "loss_ib": 0.11734427511692047, + "step": 346 + }, + { + "ce_ib": 20.773448944091797, + "ce_orig": 1.5323985815048218, + "epoch": 0.09950391832626357, + "kl_loss": 10.627010345458984, + "loss_ib": 0.12704354524612427, + "step": 346 + }, + { + "ce_ib": 13.598661422729492, + "ce_orig": 0.7118152379989624, + "epoch": 0.09950391832626357, + "kl_loss": 9.598701477050781, + "loss_ib": 0.10958567261695862, + "step": 346 + }, + { + "ce_ib": 13.722025871276855, + "ce_orig": 0.8525584936141968, + "epoch": 0.0997915019052412, + "kl_loss": 10.663893699645996, + "loss_ib": 0.12036096304655075, + "step": 347 + }, + { + "ce_ib": 17.72942352294922, + "ce_orig": 0.879482626914978, + "epoch": 0.0997915019052412, + "kl_loss": 10.045938491821289, + "loss_ib": 0.11818880587816238, + "step": 347 + }, + { + "ce_ib": 17.762847900390625, + "ce_orig": 1.0413190126419067, + "epoch": 0.0997915019052412, + "kl_loss": 10.000950813293457, + "loss_ib": 0.11777235567569733, + "step": 347 + }, + { + "ce_ib": 22.781930923461914, + "ce_orig": 1.1397019624710083, + "epoch": 0.0997915019052412, + "kl_loss": 9.38424301147461, + "loss_ib": 0.1166243627667427, + "step": 347 + }, + { + "ce_ib": 21.994335174560547, + "ce_orig": 0.43941164016723633, + "epoch": 0.10007908548421886, + "kl_loss": 9.569962501525879, + "loss_ib": 0.1176939532160759, + "step": 348 + }, + { + "ce_ib": 16.95915985107422, + "ce_orig": 0.5146183371543884, + "epoch": 0.10007908548421886, + "kl_loss": 9.597618103027344, + "loss_ib": 0.11293534189462662, + "step": 348 + }, + { + "ce_ib": 13.567898750305176, + "ce_orig": 0.6933156847953796, + "epoch": 0.10007908548421886, + "kl_loss": 9.457947731018066, + "loss_ib": 0.10814736783504486, + "step": 348 + }, + { + "ce_ib": 16.038314819335938, + "ce_orig": 0.9144206643104553, + "epoch": 0.10007908548421886, + "kl_loss": 9.451361656188965, + "loss_ib": 0.11055192351341248, + "step": 348 + }, + { + "ce_ib": 15.3102445602417, + "ce_orig": 0.6907638311386108, + "epoch": 0.1003666690631965, + "kl_loss": 8.99312973022461, + "loss_ib": 0.10524154454469681, + "step": 349 + }, + { + "ce_ib": 16.133337020874023, + "ce_orig": 0.885292649269104, + "epoch": 0.1003666690631965, + "kl_loss": 9.09972095489502, + "loss_ib": 0.10713054239749908, + "step": 349 + }, + { + "ce_ib": 21.13068962097168, + "ce_orig": 1.0056211948394775, + "epoch": 0.1003666690631965, + "kl_loss": 11.5963134765625, + "loss_ib": 0.13709382712841034, + "step": 349 + }, + { + "ce_ib": 20.446731567382812, + "ce_orig": 1.2884644269943237, + "epoch": 0.1003666690631965, + "kl_loss": 9.364080429077148, + "loss_ib": 0.1140875294804573, + "step": 349 + }, + { + "epoch": 0.10065425264217413, + "grad_norm": 0.45241594314575195, + "learning_rate": 9.999783170634207e-06, + "loss": 1.0049, + "step": 350 + }, + { + "ce_ib": 20.207548141479492, + "ce_orig": 0.9007079005241394, + "epoch": 0.10065425264217413, + "kl_loss": 8.826637268066406, + "loss_ib": 0.10847391933202744, + "step": 350 + }, + { + "ce_ib": 14.89907169342041, + "ce_orig": 0.848534882068634, + "epoch": 0.10065425264217413, + "kl_loss": 9.761069297790527, + "loss_ib": 0.11250976473093033, + "step": 350 + }, + { + "ce_ib": 20.37824058532715, + "ce_orig": 1.4016444683074951, + "epoch": 0.10065425264217413, + "kl_loss": 9.232507705688477, + "loss_ib": 0.11270332336425781, + "step": 350 + }, + { + "ce_ib": 16.76759910583496, + "ce_orig": 1.1012144088745117, + "epoch": 0.10065425264217413, + "kl_loss": 10.15268325805664, + "loss_ib": 0.11829442530870438, + "step": 350 + }, + { + "ce_ib": 15.976619720458984, + "ce_orig": 0.8754503726959229, + "epoch": 0.10094183622115177, + "kl_loss": 9.422735214233398, + "loss_ib": 0.11020396649837494, + "step": 351 + }, + { + "ce_ib": 19.49113655090332, + "ce_orig": 1.116249680519104, + "epoch": 0.10094183622115177, + "kl_loss": 9.386714935302734, + "loss_ib": 0.1133582815527916, + "step": 351 + }, + { + "ce_ib": 17.88022232055664, + "ce_orig": 0.9254348278045654, + "epoch": 0.10094183622115177, + "kl_loss": 9.275611877441406, + "loss_ib": 0.11063633859157562, + "step": 351 + }, + { + "ce_ib": 9.351910591125488, + "ce_orig": 0.16225206851959229, + "epoch": 0.10094183622115177, + "kl_loss": 9.214266777038574, + "loss_ib": 0.10149458050727844, + "step": 351 + }, + { + "ce_ib": 16.736501693725586, + "ce_orig": 0.8068060278892517, + "epoch": 0.10122941980012941, + "kl_loss": 9.62176513671875, + "loss_ib": 0.11295414716005325, + "step": 352 + }, + { + "ce_ib": 18.84697723388672, + "ce_orig": 0.6195511221885681, + "epoch": 0.10122941980012941, + "kl_loss": 9.747774124145508, + "loss_ib": 0.11632471531629562, + "step": 352 + }, + { + "ce_ib": 18.591981887817383, + "ce_orig": 0.8959230184555054, + "epoch": 0.10122941980012941, + "kl_loss": 8.57149887084961, + "loss_ib": 0.10430697351694107, + "step": 352 + }, + { + "ce_ib": 16.255172729492188, + "ce_orig": 0.7202159762382507, + "epoch": 0.10122941980012941, + "kl_loss": 12.734394073486328, + "loss_ib": 0.14359910786151886, + "step": 352 + }, + { + "ce_ib": 14.430315971374512, + "ce_orig": 0.9846038818359375, + "epoch": 0.10151700337910706, + "kl_loss": 9.04564094543457, + "loss_ib": 0.10488671809434891, + "step": 353 + }, + { + "ce_ib": 19.057903289794922, + "ce_orig": 0.708838701248169, + "epoch": 0.10151700337910706, + "kl_loss": 8.713069915771484, + "loss_ib": 0.10618860274553299, + "step": 353 + }, + { + "ce_ib": 20.327953338623047, + "ce_orig": 1.7989298105239868, + "epoch": 0.10151700337910706, + "kl_loss": 9.945318222045898, + "loss_ib": 0.11978112906217575, + "step": 353 + }, + { + "ce_ib": 14.250055313110352, + "ce_orig": 0.6734949350357056, + "epoch": 0.10151700337910706, + "kl_loss": 9.17054557800293, + "loss_ib": 0.10595551133155823, + "step": 353 + }, + { + "ce_ib": 15.82697868347168, + "ce_orig": 0.9444003701210022, + "epoch": 0.1018045869580847, + "kl_loss": 9.11765193939209, + "loss_ib": 0.10700349509716034, + "step": 354 + }, + { + "ce_ib": 15.842026710510254, + "ce_orig": 0.6050642132759094, + "epoch": 0.1018045869580847, + "kl_loss": 8.850725173950195, + "loss_ib": 0.1043492779135704, + "step": 354 + }, + { + "ce_ib": 18.677658081054688, + "ce_orig": 0.8625993728637695, + "epoch": 0.1018045869580847, + "kl_loss": 9.405292510986328, + "loss_ib": 0.11273057758808136, + "step": 354 + }, + { + "ce_ib": 13.065309524536133, + "ce_orig": 0.39125949144363403, + "epoch": 0.1018045869580847, + "kl_loss": 6.584395408630371, + "loss_ib": 0.0789092630147934, + "step": 354 + }, + { + "epoch": 0.10209217053706234, + "grad_norm": 0.49203693866729736, + "learning_rate": 9.999704871911289e-06, + "loss": 0.9968, + "step": 355 + }, + { + "ce_ib": 19.234111785888672, + "ce_orig": 0.9246622920036316, + "epoch": 0.10209217053706234, + "kl_loss": 8.509796142578125, + "loss_ib": 0.1043320745229721, + "step": 355 + }, + { + "ce_ib": 17.678333282470703, + "ce_orig": 0.8307465314865112, + "epoch": 0.10209217053706234, + "kl_loss": 8.600641250610352, + "loss_ib": 0.10368474572896957, + "step": 355 + }, + { + "ce_ib": 11.18205738067627, + "ce_orig": 0.1704210788011551, + "epoch": 0.10209217053706234, + "kl_loss": 10.59097957611084, + "loss_ib": 0.11709185689687729, + "step": 355 + }, + { + "ce_ib": 14.95598316192627, + "ce_orig": 0.8469982147216797, + "epoch": 0.10209217053706234, + "kl_loss": 9.722857475280762, + "loss_ib": 0.1121845617890358, + "step": 355 + }, + { + "ce_ib": 16.683542251586914, + "ce_orig": 1.2110188007354736, + "epoch": 0.10237975411603997, + "kl_loss": 8.89454460144043, + "loss_ib": 0.10562898218631744, + "step": 356 + }, + { + "ce_ib": 14.379767417907715, + "ce_orig": 0.6198569536209106, + "epoch": 0.10237975411603997, + "kl_loss": 10.061267852783203, + "loss_ib": 0.11499244719743729, + "step": 356 + }, + { + "ce_ib": 16.58348846435547, + "ce_orig": 1.1141889095306396, + "epoch": 0.10237975411603997, + "kl_loss": 9.346358299255371, + "loss_ib": 0.11004707217216492, + "step": 356 + }, + { + "ce_ib": 16.944780349731445, + "ce_orig": 0.8266158699989319, + "epoch": 0.10237975411603997, + "kl_loss": 9.04732894897461, + "loss_ib": 0.10741806775331497, + "step": 356 + }, + { + "ce_ib": 16.06398582458496, + "ce_orig": 0.904808759689331, + "epoch": 0.10266733769501761, + "kl_loss": 9.288269996643066, + "loss_ib": 0.10894668102264404, + "step": 357 + }, + { + "ce_ib": 18.518054962158203, + "ce_orig": 0.6518286466598511, + "epoch": 0.10266733769501761, + "kl_loss": 9.153616905212402, + "loss_ib": 0.11005422472953796, + "step": 357 + }, + { + "ce_ib": 13.961670875549316, + "ce_orig": 0.7661263942718506, + "epoch": 0.10266733769501761, + "kl_loss": 9.662349700927734, + "loss_ib": 0.11058516055345535, + "step": 357 + }, + { + "ce_ib": 16.693498611450195, + "ce_orig": 0.7039583325386047, + "epoch": 0.10266733769501761, + "kl_loss": 9.203071594238281, + "loss_ib": 0.1087242066860199, + "step": 357 + }, + { + "ce_ib": 14.941229820251465, + "ce_orig": 0.6703804135322571, + "epoch": 0.10295492127399525, + "kl_loss": 7.32505464553833, + "loss_ib": 0.08819177746772766, + "step": 358 + }, + { + "ce_ib": 10.472589492797852, + "ce_orig": 0.28409555554389954, + "epoch": 0.10295492127399525, + "kl_loss": 10.202686309814453, + "loss_ib": 0.11249945312738419, + "step": 358 + }, + { + "ce_ib": 18.69029998779297, + "ce_orig": 0.9642467498779297, + "epoch": 0.10295492127399525, + "kl_loss": 9.411130905151367, + "loss_ib": 0.11280160397291183, + "step": 358 + }, + { + "ce_ib": 17.212318420410156, + "ce_orig": 0.8138781785964966, + "epoch": 0.10295492127399525, + "kl_loss": 8.968740463256836, + "loss_ib": 0.10689971596002579, + "step": 358 + }, + { + "ce_ib": 17.45269203186035, + "ce_orig": 0.377750039100647, + "epoch": 0.1032425048529729, + "kl_loss": 6.718733787536621, + "loss_ib": 0.0846400260925293, + "step": 359 + }, + { + "ce_ib": 20.13898468017578, + "ce_orig": 1.5750316381454468, + "epoch": 0.1032425048529729, + "kl_loss": 9.422719955444336, + "loss_ib": 0.11436618864536285, + "step": 359 + }, + { + "ce_ib": 17.9411678314209, + "ce_orig": 0.8798750638961792, + "epoch": 0.1032425048529729, + "kl_loss": 9.073663711547852, + "loss_ib": 0.10867780447006226, + "step": 359 + }, + { + "ce_ib": 17.482316970825195, + "ce_orig": 1.371580958366394, + "epoch": 0.1032425048529729, + "kl_loss": 9.437213897705078, + "loss_ib": 0.11185445636510849, + "step": 359 + }, + { + "epoch": 0.10353008843195054, + "grad_norm": 0.40732964873313904, + "learning_rate": 9.999614527738882e-06, + "loss": 1.0384, + "step": 360 + }, + { + "ce_ib": 20.317060470581055, + "ce_orig": 0.6699705123901367, + "epoch": 0.10353008843195054, + "kl_loss": 8.502317428588867, + "loss_ib": 0.10534022748470306, + "step": 360 + }, + { + "ce_ib": 14.938705444335938, + "ce_orig": 0.696804404258728, + "epoch": 0.10353008843195054, + "kl_loss": 9.356005668640137, + "loss_ib": 0.10849875956773758, + "step": 360 + }, + { + "ce_ib": 15.835675239562988, + "ce_orig": 0.8596332669258118, + "epoch": 0.10353008843195054, + "kl_loss": 9.73591423034668, + "loss_ib": 0.11319481581449509, + "step": 360 + }, + { + "ce_ib": 15.533881187438965, + "ce_orig": 0.4826924502849579, + "epoch": 0.10353008843195054, + "kl_loss": 10.024049758911133, + "loss_ib": 0.11577437818050385, + "step": 360 + }, + { + "ce_ib": 12.633554458618164, + "ce_orig": 0.5069536566734314, + "epoch": 0.10381767201092817, + "kl_loss": 9.803174018859863, + "loss_ib": 0.11066529154777527, + "step": 361 + }, + { + "ce_ib": 13.828798294067383, + "ce_orig": 0.896979033946991, + "epoch": 0.10381767201092817, + "kl_loss": 8.719182968139648, + "loss_ib": 0.10102062672376633, + "step": 361 + }, + { + "ce_ib": 21.034914016723633, + "ce_orig": 1.8323390483856201, + "epoch": 0.10381767201092817, + "kl_loss": 9.037761688232422, + "loss_ib": 0.11141253262758255, + "step": 361 + }, + { + "ce_ib": 16.179244995117188, + "ce_orig": 0.7684395909309387, + "epoch": 0.10381767201092817, + "kl_loss": 8.469200134277344, + "loss_ib": 0.100871242582798, + "step": 361 + }, + { + "ce_ib": 13.840865135192871, + "ce_orig": 1.0341031551361084, + "epoch": 0.10410525558990581, + "kl_loss": 7.858333110809326, + "loss_ib": 0.0924241915345192, + "step": 362 + }, + { + "ce_ib": 22.43819236755371, + "ce_orig": 1.788472294807434, + "epoch": 0.10410525558990581, + "kl_loss": 9.436954498291016, + "loss_ib": 0.1168077364563942, + "step": 362 + }, + { + "ce_ib": 19.555612564086914, + "ce_orig": 1.5130561590194702, + "epoch": 0.10410525558990581, + "kl_loss": 9.508270263671875, + "loss_ib": 0.1146383136510849, + "step": 362 + }, + { + "ce_ib": 13.82888412475586, + "ce_orig": 0.5086873769760132, + "epoch": 0.10410525558990581, + "kl_loss": 9.382222175598145, + "loss_ib": 0.10765110701322556, + "step": 362 + }, + { + "ce_ib": 16.548250198364258, + "ce_orig": 1.2047643661499023, + "epoch": 0.10439283916888345, + "kl_loss": 8.958712577819824, + "loss_ib": 0.10613537579774857, + "step": 363 + }, + { + "ce_ib": 15.741909980773926, + "ce_orig": 0.8534471392631531, + "epoch": 0.10439283916888345, + "kl_loss": 9.691909790039062, + "loss_ib": 0.11266100406646729, + "step": 363 + }, + { + "ce_ib": 21.730342864990234, + "ce_orig": 1.475590467453003, + "epoch": 0.10439283916888345, + "kl_loss": 9.107532501220703, + "loss_ib": 0.11280567198991776, + "step": 363 + }, + { + "ce_ib": 13.439830780029297, + "ce_orig": 0.6186426877975464, + "epoch": 0.10439283916888345, + "kl_loss": 9.766258239746094, + "loss_ib": 0.11110240966081619, + "step": 363 + }, + { + "ce_ib": 16.56508445739746, + "ce_orig": 0.9517434239387512, + "epoch": 0.1046804227478611, + "kl_loss": 9.201667785644531, + "loss_ib": 0.10858175903558731, + "step": 364 + }, + { + "ce_ib": 15.66641902923584, + "ce_orig": 0.1410691887140274, + "epoch": 0.1046804227478611, + "kl_loss": 12.184783935546875, + "loss_ib": 0.13751424849033356, + "step": 364 + }, + { + "ce_ib": 16.9328670501709, + "ce_orig": 0.7690316438674927, + "epoch": 0.1046804227478611, + "kl_loss": 7.339565753936768, + "loss_ib": 0.09032852202653885, + "step": 364 + }, + { + "ce_ib": 19.301227569580078, + "ce_orig": 1.541357159614563, + "epoch": 0.1046804227478611, + "kl_loss": 8.6048583984375, + "loss_ib": 0.10534980893135071, + "step": 364 + }, + { + "epoch": 0.10496800632683874, + "grad_norm": 0.35407018661499023, + "learning_rate": 9.99951213833464e-06, + "loss": 1.0547, + "step": 365 + }, + { + "ce_ib": 16.928804397583008, + "ce_orig": 0.898991048336029, + "epoch": 0.10496800632683874, + "kl_loss": 8.901844024658203, + "loss_ib": 0.10594724118709564, + "step": 365 + }, + { + "ce_ib": 12.694430351257324, + "ce_orig": 0.6792229413986206, + "epoch": 0.10496800632683874, + "kl_loss": 9.286140441894531, + "loss_ib": 0.10555583983659744, + "step": 365 + }, + { + "ce_ib": 19.038597106933594, + "ce_orig": 1.1029527187347412, + "epoch": 0.10496800632683874, + "kl_loss": 8.959847450256348, + "loss_ib": 0.10863707214593887, + "step": 365 + }, + { + "ce_ib": 14.017401695251465, + "ce_orig": 0.8020762205123901, + "epoch": 0.10496800632683874, + "kl_loss": 10.084455490112305, + "loss_ib": 0.11486195027828217, + "step": 365 + }, + { + "ce_ib": 17.723241806030273, + "ce_orig": 1.3404425382614136, + "epoch": 0.10525558990581638, + "kl_loss": 8.840962409973145, + "loss_ib": 0.1061328649520874, + "step": 366 + }, + { + "ce_ib": 13.039340019226074, + "ce_orig": 0.7993932962417603, + "epoch": 0.10525558990581638, + "kl_loss": 8.735919952392578, + "loss_ib": 0.10039854049682617, + "step": 366 + }, + { + "ce_ib": 15.478903770446777, + "ce_orig": 0.7874028086662292, + "epoch": 0.10525558990581638, + "kl_loss": 8.431257247924805, + "loss_ib": 0.09979147464036942, + "step": 366 + }, + { + "ce_ib": 14.608510971069336, + "ce_orig": 0.8104147911071777, + "epoch": 0.10525558990581638, + "kl_loss": 8.885534286499023, + "loss_ib": 0.10346385091543198, + "step": 366 + }, + { + "ce_ib": 15.228021621704102, + "ce_orig": 0.8766224384307861, + "epoch": 0.10554317348479401, + "kl_loss": 9.194426536560059, + "loss_ib": 0.10717228800058365, + "step": 367 + }, + { + "ce_ib": 15.027702331542969, + "ce_orig": 0.7485743165016174, + "epoch": 0.10554317348479401, + "kl_loss": 8.38540267944336, + "loss_ib": 0.09888172894716263, + "step": 367 + }, + { + "ce_ib": 17.5020694732666, + "ce_orig": 1.4059276580810547, + "epoch": 0.10554317348479401, + "kl_loss": 9.084081649780273, + "loss_ib": 0.10834288597106934, + "step": 367 + }, + { + "ce_ib": 17.624956130981445, + "ce_orig": 1.1633917093276978, + "epoch": 0.10554317348479401, + "kl_loss": 9.179400444030762, + "loss_ib": 0.10941895842552185, + "step": 367 + }, + { + "ce_ib": 17.59357452392578, + "ce_orig": 1.2003300189971924, + "epoch": 0.10583075706377165, + "kl_loss": 8.543558120727539, + "loss_ib": 0.10302915424108505, + "step": 368 + }, + { + "ce_ib": 18.518356323242188, + "ce_orig": 1.2260750532150269, + "epoch": 0.10583075706377165, + "kl_loss": 8.891761779785156, + "loss_ib": 0.10743597149848938, + "step": 368 + }, + { + "ce_ib": 17.378921508789062, + "ce_orig": 1.023598551750183, + "epoch": 0.10583075706377165, + "kl_loss": 8.479592323303223, + "loss_ib": 0.10217484086751938, + "step": 368 + }, + { + "ce_ib": 19.736833572387695, + "ce_orig": 1.5643643140792847, + "epoch": 0.10583075706377165, + "kl_loss": 8.811531066894531, + "loss_ib": 0.10785213857889175, + "step": 368 + }, + { + "ce_ib": 13.935962677001953, + "ce_orig": 0.5430191159248352, + "epoch": 0.1061183406427493, + "kl_loss": 9.201333999633789, + "loss_ib": 0.10594930499792099, + "step": 369 + }, + { + "ce_ib": 17.181163787841797, + "ce_orig": 1.3840625286102295, + "epoch": 0.1061183406427493, + "kl_loss": 8.802513122558594, + "loss_ib": 0.10520629584789276, + "step": 369 + }, + { + "ce_ib": 14.592924118041992, + "ce_orig": 0.8257763385772705, + "epoch": 0.1061183406427493, + "kl_loss": 8.735795974731445, + "loss_ib": 0.10195088386535645, + "step": 369 + }, + { + "ce_ib": 15.771759986877441, + "ce_orig": 0.7309710383415222, + "epoch": 0.1061183406427493, + "kl_loss": 9.097509384155273, + "loss_ib": 0.1067468523979187, + "step": 369 + }, + { + "epoch": 0.10640592422172694, + "grad_norm": 0.4832640290260315, + "learning_rate": 9.999397703945243e-06, + "loss": 1.0498, + "step": 370 + }, + { + "ce_ib": 15.806496620178223, + "ce_orig": 0.879632294178009, + "epoch": 0.10640592422172694, + "kl_loss": 10.071746826171875, + "loss_ib": 0.11652395874261856, + "step": 370 + }, + { + "ce_ib": 15.620360374450684, + "ce_orig": 0.6177505254745483, + "epoch": 0.10640592422172694, + "kl_loss": 8.700934410095215, + "loss_ib": 0.10262969881296158, + "step": 370 + }, + { + "ce_ib": 14.942098617553711, + "ce_orig": 0.6238611936569214, + "epoch": 0.10640592422172694, + "kl_loss": 9.041037559509277, + "loss_ib": 0.1053524762392044, + "step": 370 + }, + { + "ce_ib": 13.575098991394043, + "ce_orig": 0.7554785013198853, + "epoch": 0.10640592422172694, + "kl_loss": 8.968904495239258, + "loss_ib": 0.10326413810253143, + "step": 370 + }, + { + "ce_ib": 10.327584266662598, + "ce_orig": 0.23832768201828003, + "epoch": 0.10669350780070458, + "kl_loss": 7.03424072265625, + "loss_ib": 0.08066999167203903, + "step": 371 + }, + { + "ce_ib": 19.388700485229492, + "ce_orig": 1.3649802207946777, + "epoch": 0.10669350780070458, + "kl_loss": 9.210512161254883, + "loss_ib": 0.11149382591247559, + "step": 371 + }, + { + "ce_ib": 11.52276611328125, + "ce_orig": 0.7070875763893127, + "epoch": 0.10669350780070458, + "kl_loss": 8.641892433166504, + "loss_ib": 0.09794168919324875, + "step": 371 + }, + { + "ce_ib": 19.951656341552734, + "ce_orig": 1.3112847805023193, + "epoch": 0.10669350780070458, + "kl_loss": 9.238996505737305, + "loss_ib": 0.11234162002801895, + "step": 371 + }, + { + "ce_ib": 19.08662986755371, + "ce_orig": 1.517033338546753, + "epoch": 0.10698109137968222, + "kl_loss": 9.000858306884766, + "loss_ib": 0.10909520834684372, + "step": 372 + }, + { + "ce_ib": 13.207756996154785, + "ce_orig": 0.596856951713562, + "epoch": 0.10698109137968222, + "kl_loss": 8.860549926757812, + "loss_ib": 0.10181325674057007, + "step": 372 + }, + { + "ce_ib": 19.195131301879883, + "ce_orig": 0.9714540839195251, + "epoch": 0.10698109137968222, + "kl_loss": 9.710214614868164, + "loss_ib": 0.11629726737737656, + "step": 372 + }, + { + "ce_ib": 17.687284469604492, + "ce_orig": 1.5548173189163208, + "epoch": 0.10698109137968222, + "kl_loss": 8.532265663146973, + "loss_ib": 0.10300993919372559, + "step": 372 + }, + { + "ce_ib": 18.050241470336914, + "ce_orig": 0.8549476265907288, + "epoch": 0.10726867495865985, + "kl_loss": 8.890786170959473, + "loss_ib": 0.10695809870958328, + "step": 373 + }, + { + "ce_ib": 15.486068725585938, + "ce_orig": 0.7641202807426453, + "epoch": 0.10726867495865985, + "kl_loss": 8.97690486907959, + "loss_ib": 0.1052551120519638, + "step": 373 + }, + { + "ce_ib": 20.0921573638916, + "ce_orig": 1.5870574712753296, + "epoch": 0.10726867495865985, + "kl_loss": 8.584416389465332, + "loss_ib": 0.10593631863594055, + "step": 373 + }, + { + "ce_ib": 14.03450870513916, + "ce_orig": 0.8633349537849426, + "epoch": 0.10726867495865985, + "kl_loss": 9.580089569091797, + "loss_ib": 0.10983540117740631, + "step": 373 + }, + { + "ce_ib": 18.23748779296875, + "ce_orig": 0.9740087985992432, + "epoch": 0.1075562585376375, + "kl_loss": 8.163890838623047, + "loss_ib": 0.09987638890743256, + "step": 374 + }, + { + "ce_ib": 16.242849349975586, + "ce_orig": 0.9537095427513123, + "epoch": 0.1075562585376375, + "kl_loss": 8.648405075073242, + "loss_ib": 0.10272689908742905, + "step": 374 + }, + { + "ce_ib": 15.156339645385742, + "ce_orig": 1.0340036153793335, + "epoch": 0.1075562585376375, + "kl_loss": 8.221203804016113, + "loss_ib": 0.09736837446689606, + "step": 374 + }, + { + "ce_ib": 16.337060928344727, + "ce_orig": 1.1412116289138794, + "epoch": 0.1075562585376375, + "kl_loss": 8.982345581054688, + "loss_ib": 0.10616051405668259, + "step": 374 + }, + { + "epoch": 0.10784384211661514, + "grad_norm": 0.609579861164093, + "learning_rate": 9.999271224846397e-06, + "loss": 1.0013, + "step": 375 + }, + { + "ce_ib": 9.566058158874512, + "ce_orig": 0.2985042929649353, + "epoch": 0.10784384211661514, + "kl_loss": 7.494280815124512, + "loss_ib": 0.08450886607170105, + "step": 375 + }, + { + "ce_ib": 12.867706298828125, + "ce_orig": 0.5363652110099792, + "epoch": 0.10784384211661514, + "kl_loss": 8.874717712402344, + "loss_ib": 0.10161488503217697, + "step": 375 + }, + { + "ce_ib": 14.6071195602417, + "ce_orig": 1.2598626613616943, + "epoch": 0.10784384211661514, + "kl_loss": 8.637162208557129, + "loss_ib": 0.10097873210906982, + "step": 375 + }, + { + "ce_ib": 17.33331871032715, + "ce_orig": 0.6892062425613403, + "epoch": 0.10784384211661514, + "kl_loss": 8.592702865600586, + "loss_ib": 0.1032603457570076, + "step": 375 + }, + { + "ce_ib": 15.182628631591797, + "ce_orig": 0.8526699542999268, + "epoch": 0.10813142569559278, + "kl_loss": 8.426027297973633, + "loss_ib": 0.09944289922714233, + "step": 376 + }, + { + "ce_ib": 18.341575622558594, + "ce_orig": 0.621722400188446, + "epoch": 0.10813142569559278, + "kl_loss": 7.857001304626465, + "loss_ib": 0.09691158682107925, + "step": 376 + }, + { + "ce_ib": 14.127799987792969, + "ce_orig": 0.7635291218757629, + "epoch": 0.10813142569559278, + "kl_loss": 8.97607421875, + "loss_ib": 0.10388854146003723, + "step": 376 + }, + { + "ce_ib": 16.845251083374023, + "ce_orig": 1.1074169874191284, + "epoch": 0.10813142569559278, + "kl_loss": 8.656087875366211, + "loss_ib": 0.10340613126754761, + "step": 376 + }, + { + "ce_ib": 16.19705581665039, + "ce_orig": 0.8272833824157715, + "epoch": 0.10841900927457042, + "kl_loss": 8.441411972045898, + "loss_ib": 0.10061117261648178, + "step": 377 + }, + { + "ce_ib": 11.034353256225586, + "ce_orig": 0.6063670516014099, + "epoch": 0.10841900927457042, + "kl_loss": 9.254929542541504, + "loss_ib": 0.10358364880084991, + "step": 377 + }, + { + "ce_ib": 16.136695861816406, + "ce_orig": 0.9638150930404663, + "epoch": 0.10841900927457042, + "kl_loss": 8.382518768310547, + "loss_ib": 0.09996187686920166, + "step": 377 + }, + { + "ce_ib": 14.821840286254883, + "ce_orig": 1.11579430103302, + "epoch": 0.10841900927457042, + "kl_loss": 8.752357482910156, + "loss_ib": 0.10234541445970535, + "step": 377 + }, + { + "ce_ib": 14.535453796386719, + "ce_orig": 0.5030508041381836, + "epoch": 0.10870659285354806, + "kl_loss": 8.088106155395508, + "loss_ib": 0.09541651606559753, + "step": 378 + }, + { + "ce_ib": 13.514139175415039, + "ce_orig": 0.43041279911994934, + "epoch": 0.10870659285354806, + "kl_loss": 8.524757385253906, + "loss_ib": 0.09876170754432678, + "step": 378 + }, + { + "ce_ib": 13.1725435256958, + "ce_orig": 0.5961971282958984, + "epoch": 0.10870659285354806, + "kl_loss": 8.799945831298828, + "loss_ib": 0.10117200016975403, + "step": 378 + }, + { + "ce_ib": 19.455663681030273, + "ce_orig": 1.371256947517395, + "epoch": 0.10870659285354806, + "kl_loss": 7.995181083679199, + "loss_ib": 0.09940747171640396, + "step": 378 + }, + { + "ce_ib": 13.492290496826172, + "ce_orig": 0.9726055264472961, + "epoch": 0.10899417643252571, + "kl_loss": 8.796764373779297, + "loss_ib": 0.10145992785692215, + "step": 379 + }, + { + "ce_ib": 11.911052703857422, + "ce_orig": 0.7123900651931763, + "epoch": 0.10899417643252571, + "kl_loss": 8.622007369995117, + "loss_ib": 0.09813112020492554, + "step": 379 + }, + { + "ce_ib": 15.950429916381836, + "ce_orig": 0.5447075963020325, + "epoch": 0.10899417643252571, + "kl_loss": 8.17884635925293, + "loss_ib": 0.09773889183998108, + "step": 379 + }, + { + "ce_ib": 17.951889038085938, + "ce_orig": 0.9959045052528381, + "epoch": 0.10899417643252571, + "kl_loss": 8.3568696975708, + "loss_ib": 0.1015205830335617, + "step": 379 + }, + { + "epoch": 0.10928176001150335, + "grad_norm": 0.6501461863517761, + "learning_rate": 9.99913270134281e-06, + "loss": 0.9988, + "step": 380 + }, + { + "ce_ib": 12.876981735229492, + "ce_orig": 0.5673431754112244, + "epoch": 0.10928176001150335, + "kl_loss": 7.431344032287598, + "loss_ib": 0.0871904194355011, + "step": 380 + }, + { + "ce_ib": 15.092470169067383, + "ce_orig": 0.2880413830280304, + "epoch": 0.10928176001150335, + "kl_loss": 8.873856544494629, + "loss_ib": 0.10383103042840958, + "step": 380 + }, + { + "ce_ib": 12.99868392944336, + "ce_orig": 0.5703913569450378, + "epoch": 0.10928176001150335, + "kl_loss": 8.380903244018555, + "loss_ib": 0.09680771827697754, + "step": 380 + }, + { + "ce_ib": 13.363186836242676, + "ce_orig": 0.5209031701087952, + "epoch": 0.10928176001150335, + "kl_loss": 8.865699768066406, + "loss_ib": 0.10202018171548843, + "step": 380 + }, + { + "ce_ib": 20.89650535583496, + "ce_orig": 1.6800005435943604, + "epoch": 0.10956934359048098, + "kl_loss": 8.10032844543457, + "loss_ib": 0.10189979523420334, + "step": 381 + }, + { + "ce_ib": 17.024986267089844, + "ce_orig": 0.6650580167770386, + "epoch": 0.10956934359048098, + "kl_loss": 7.477260112762451, + "loss_ib": 0.0917975902557373, + "step": 381 + }, + { + "ce_ib": 21.227222442626953, + "ce_orig": 1.5764412879943848, + "epoch": 0.10956934359048098, + "kl_loss": 8.041464805603027, + "loss_ib": 0.10164187103509903, + "step": 381 + }, + { + "ce_ib": 18.747699737548828, + "ce_orig": 1.2042864561080933, + "epoch": 0.10956934359048098, + "kl_loss": 7.809324264526367, + "loss_ib": 0.09684094041585922, + "step": 381 + }, + { + "ce_ib": 13.682280540466309, + "ce_orig": 0.604314386844635, + "epoch": 0.10985692716945862, + "kl_loss": 8.266580581665039, + "loss_ib": 0.09634808450937271, + "step": 382 + }, + { + "ce_ib": 11.598891258239746, + "ce_orig": 0.7977884411811829, + "epoch": 0.10985692716945862, + "kl_loss": 8.074966430664062, + "loss_ib": 0.09234855324029922, + "step": 382 + }, + { + "ce_ib": 10.722358703613281, + "ce_orig": 0.488436758518219, + "epoch": 0.10985692716945862, + "kl_loss": 8.471232414245605, + "loss_ib": 0.09543468058109283, + "step": 382 + }, + { + "ce_ib": 20.917461395263672, + "ce_orig": 1.5944008827209473, + "epoch": 0.10985692716945862, + "kl_loss": 8.631521224975586, + "loss_ib": 0.10723267495632172, + "step": 382 + }, + { + "ce_ib": 14.213534355163574, + "ce_orig": 0.9382426142692566, + "epoch": 0.11014451074843626, + "kl_loss": 8.12414836883545, + "loss_ib": 0.09545501321554184, + "step": 383 + }, + { + "ce_ib": 10.728821754455566, + "ce_orig": 0.7022181749343872, + "epoch": 0.11014451074843626, + "kl_loss": 8.53184700012207, + "loss_ib": 0.0960472822189331, + "step": 383 + }, + { + "ce_ib": 10.197715759277344, + "ce_orig": 0.512974202632904, + "epoch": 0.11014451074843626, + "kl_loss": 8.536653518676758, + "loss_ib": 0.09556424617767334, + "step": 383 + }, + { + "ce_ib": 12.831421852111816, + "ce_orig": 0.6488183736801147, + "epoch": 0.11014451074843626, + "kl_loss": 8.425491333007812, + "loss_ib": 0.0970863327383995, + "step": 383 + }, + { + "ce_ib": 13.790513038635254, + "ce_orig": 0.48860305547714233, + "epoch": 0.11043209432741391, + "kl_loss": 8.638005256652832, + "loss_ib": 0.1001705601811409, + "step": 384 + }, + { + "ce_ib": 18.993335723876953, + "ce_orig": 1.300746202468872, + "epoch": 0.11043209432741391, + "kl_loss": 7.856682777404785, + "loss_ib": 0.09756016731262207, + "step": 384 + }, + { + "ce_ib": 14.618206024169922, + "ce_orig": 0.6457257270812988, + "epoch": 0.11043209432741391, + "kl_loss": 5.0488386154174805, + "loss_ib": 0.0651065930724144, + "step": 384 + }, + { + "ce_ib": 13.924184799194336, + "ce_orig": 0.7180906534194946, + "epoch": 0.11043209432741391, + "kl_loss": 8.496256828308105, + "loss_ib": 0.09888675808906555, + "step": 384 + }, + { + "epoch": 0.11071967790639155, + "grad_norm": 0.7011797428131104, + "learning_rate": 9.998982133768226e-06, + "loss": 0.9557, + "step": 385 + }, + { + "ce_ib": 15.821268081665039, + "ce_orig": 1.4919018745422363, + "epoch": 0.11071967790639155, + "kl_loss": 8.213622093200684, + "loss_ib": 0.09795748442411423, + "step": 385 + }, + { + "ce_ib": 14.86267375946045, + "ce_orig": 1.0427626371383667, + "epoch": 0.11071967790639155, + "kl_loss": 7.624295234680176, + "loss_ib": 0.091105617582798, + "step": 385 + }, + { + "ce_ib": 12.792627334594727, + "ce_orig": 0.644935131072998, + "epoch": 0.11071967790639155, + "kl_loss": 8.272308349609375, + "loss_ib": 0.09551570564508438, + "step": 385 + }, + { + "ce_ib": 13.713454246520996, + "ce_orig": 0.8965685963630676, + "epoch": 0.11071967790639155, + "kl_loss": 9.92483901977539, + "loss_ib": 0.11296184360980988, + "step": 385 + }, + { + "ce_ib": 16.306076049804688, + "ce_orig": 0.7789519429206848, + "epoch": 0.11100726148536919, + "kl_loss": 7.479434967041016, + "loss_ib": 0.09110042452812195, + "step": 386 + }, + { + "ce_ib": 15.762940406799316, + "ce_orig": 0.8707707524299622, + "epoch": 0.11100726148536919, + "kl_loss": 7.34848165512085, + "loss_ib": 0.08924775570631027, + "step": 386 + }, + { + "ce_ib": 6.3066205978393555, + "ce_orig": 0.19392843544483185, + "epoch": 0.11100726148536919, + "kl_loss": 5.0305914878845215, + "loss_ib": 0.0566125325858593, + "step": 386 + }, + { + "ce_ib": 14.033432006835938, + "ce_orig": 0.7708988189697266, + "epoch": 0.11100726148536919, + "kl_loss": 8.070176124572754, + "loss_ib": 0.09473519027233124, + "step": 386 + }, + { + "ce_ib": 16.794458389282227, + "ce_orig": 0.8729531168937683, + "epoch": 0.11129484506434682, + "kl_loss": 8.056872367858887, + "loss_ib": 0.09736318141222, + "step": 387 + }, + { + "ce_ib": 14.828986167907715, + "ce_orig": 1.121248483657837, + "epoch": 0.11129484506434682, + "kl_loss": 7.610101699829102, + "loss_ib": 0.09092999994754791, + "step": 387 + }, + { + "ce_ib": 13.89840030670166, + "ce_orig": 0.740386426448822, + "epoch": 0.11129484506434682, + "kl_loss": 7.463097095489502, + "loss_ib": 0.08852936327457428, + "step": 387 + }, + { + "ce_ib": 12.567804336547852, + "ce_orig": 0.956537663936615, + "epoch": 0.11129484506434682, + "kl_loss": 7.956699848175049, + "loss_ib": 0.09213479608297348, + "step": 387 + }, + { + "ce_ib": 11.099259376525879, + "ce_orig": 0.6955797076225281, + "epoch": 0.11158242864332446, + "kl_loss": 8.742734909057617, + "loss_ib": 0.09852661192417145, + "step": 388 + }, + { + "ce_ib": 15.241199493408203, + "ce_orig": 0.7828308939933777, + "epoch": 0.11158242864332446, + "kl_loss": 8.080060005187988, + "loss_ib": 0.09604179859161377, + "step": 388 + }, + { + "ce_ib": 12.71835994720459, + "ce_orig": 0.5617780685424805, + "epoch": 0.11158242864332446, + "kl_loss": 7.975733280181885, + "loss_ib": 0.09247568994760513, + "step": 388 + }, + { + "ce_ib": 15.852190971374512, + "ce_orig": 0.9822865128517151, + "epoch": 0.11158242864332446, + "kl_loss": 7.865725040435791, + "loss_ib": 0.09450943768024445, + "step": 388 + }, + { + "ce_ib": 15.686773300170898, + "ce_orig": 0.8103384971618652, + "epoch": 0.11187001222230211, + "kl_loss": 7.693680763244629, + "loss_ib": 0.09262357652187347, + "step": 389 + }, + { + "ce_ib": 15.016551971435547, + "ce_orig": 0.9884656071662903, + "epoch": 0.11187001222230211, + "kl_loss": 7.716882228851318, + "loss_ib": 0.0921853706240654, + "step": 389 + }, + { + "ce_ib": 18.931434631347656, + "ce_orig": 1.5234384536743164, + "epoch": 0.11187001222230211, + "kl_loss": 7.528309345245361, + "loss_ib": 0.09421452134847641, + "step": 389 + }, + { + "ce_ib": 17.58110237121582, + "ce_orig": 1.5781915187835693, + "epoch": 0.11187001222230211, + "kl_loss": 7.889880180358887, + "loss_ib": 0.09647990018129349, + "step": 389 + }, + { + "epoch": 0.11215759580127975, + "grad_norm": 0.6288163661956787, + "learning_rate": 9.998819522485392e-06, + "loss": 1.0119, + "step": 390 + }, + { + "ce_ib": 15.403480529785156, + "ce_orig": 0.7215459942817688, + "epoch": 0.11215759580127975, + "kl_loss": 8.92459774017334, + "loss_ib": 0.10464945435523987, + "step": 390 + }, + { + "ce_ib": 12.422520637512207, + "ce_orig": 0.38442984223365784, + "epoch": 0.11215759580127975, + "kl_loss": 7.306635856628418, + "loss_ib": 0.08548887819051743, + "step": 390 + }, + { + "ce_ib": 16.295055389404297, + "ce_orig": 0.8220521211624146, + "epoch": 0.11215759580127975, + "kl_loss": 7.3963141441345215, + "loss_ib": 0.09025819599628448, + "step": 390 + }, + { + "ce_ib": 13.246119499206543, + "ce_orig": 0.6906241774559021, + "epoch": 0.11215759580127975, + "kl_loss": 7.956465721130371, + "loss_ib": 0.09281077235937119, + "step": 390 + }, + { + "ce_ib": 13.792742729187012, + "ce_orig": 0.844266414642334, + "epoch": 0.11244517938025739, + "kl_loss": 8.29489517211914, + "loss_ib": 0.0967416912317276, + "step": 391 + }, + { + "ce_ib": 16.884506225585938, + "ce_orig": 0.9576941132545471, + "epoch": 0.11244517938025739, + "kl_loss": 7.322904586791992, + "loss_ib": 0.0901135504245758, + "step": 391 + }, + { + "ce_ib": 12.04941463470459, + "ce_orig": 0.6010465025901794, + "epoch": 0.11244517938025739, + "kl_loss": 7.9065093994140625, + "loss_ib": 0.09111450612545013, + "step": 391 + }, + { + "ce_ib": 13.96172046661377, + "ce_orig": 0.6447573900222778, + "epoch": 0.11244517938025739, + "kl_loss": 7.576847076416016, + "loss_ib": 0.08973018079996109, + "step": 391 + }, + { + "ce_ib": 15.197918891906738, + "ce_orig": 0.7180730700492859, + "epoch": 0.11273276295923502, + "kl_loss": 7.893502235412598, + "loss_ib": 0.0941329374909401, + "step": 392 + }, + { + "ce_ib": 13.946671485900879, + "ce_orig": 0.7366077303886414, + "epoch": 0.11273276295923502, + "kl_loss": 7.63916015625, + "loss_ib": 0.09033826738595963, + "step": 392 + }, + { + "ce_ib": 14.26876163482666, + "ce_orig": 0.8613617420196533, + "epoch": 0.11273276295923502, + "kl_loss": 7.369016647338867, + "loss_ib": 0.087958924472332, + "step": 392 + }, + { + "ce_ib": 12.909405708312988, + "ce_orig": 0.7305828332901001, + "epoch": 0.11273276295923502, + "kl_loss": 7.314949035644531, + "loss_ib": 0.08605889230966568, + "step": 392 + }, + { + "ce_ib": 18.242956161499023, + "ce_orig": 1.3529711961746216, + "epoch": 0.11302034653821266, + "kl_loss": 7.389057636260986, + "loss_ib": 0.092133529484272, + "step": 393 + }, + { + "ce_ib": 9.846675872802734, + "ce_orig": 0.7093670964241028, + "epoch": 0.11302034653821266, + "kl_loss": 7.952066898345947, + "loss_ib": 0.0893673375248909, + "step": 393 + }, + { + "ce_ib": 12.509729385375977, + "ce_orig": 1.0103166103363037, + "epoch": 0.11302034653821266, + "kl_loss": 8.187524795532227, + "loss_ib": 0.09438497573137283, + "step": 393 + }, + { + "ce_ib": 6.737217426300049, + "ce_orig": 0.21716581284999847, + "epoch": 0.11302034653821266, + "kl_loss": 5.771838188171387, + "loss_ib": 0.06445559859275818, + "step": 393 + }, + { + "ce_ib": 12.970864295959473, + "ce_orig": 0.689153790473938, + "epoch": 0.11330793011719031, + "kl_loss": 7.806953430175781, + "loss_ib": 0.09104040265083313, + "step": 394 + }, + { + "ce_ib": 14.298376083374023, + "ce_orig": 0.6159449815750122, + "epoch": 0.11330793011719031, + "kl_loss": 7.474273204803467, + "loss_ib": 0.08904110640287399, + "step": 394 + }, + { + "ce_ib": 16.35755729675293, + "ce_orig": 1.128537893295288, + "epoch": 0.11330793011719031, + "kl_loss": 7.542243003845215, + "loss_ib": 0.09177998453378677, + "step": 394 + }, + { + "ce_ib": 16.245141983032227, + "ce_orig": 0.5840805768966675, + "epoch": 0.11330793011719031, + "kl_loss": 7.685763835906982, + "loss_ib": 0.09310277551412582, + "step": 394 + }, + { + "epoch": 0.11359551369616795, + "grad_norm": 0.43641915917396545, + "learning_rate": 9.998644867886077e-06, + "loss": 0.9567, + "step": 395 + }, + { + "ce_ib": 14.849902153015137, + "ce_orig": 0.9369897842407227, + "epoch": 0.11359551369616795, + "kl_loss": 7.421789646148682, + "loss_ib": 0.08906780183315277, + "step": 395 + }, + { + "ce_ib": 12.70493221282959, + "ce_orig": 0.7189629673957825, + "epoch": 0.11359551369616795, + "kl_loss": 7.841180801391602, + "loss_ib": 0.09111674129962921, + "step": 395 + }, + { + "ce_ib": 16.844697952270508, + "ce_orig": 0.6727548241615295, + "epoch": 0.11359551369616795, + "kl_loss": 6.990694522857666, + "loss_ib": 0.08675163984298706, + "step": 395 + }, + { + "ce_ib": 14.839481353759766, + "ce_orig": 1.255350947380066, + "epoch": 0.11359551369616795, + "kl_loss": 7.494076728820801, + "loss_ib": 0.08978024125099182, + "step": 395 + }, + { + "ce_ib": 16.765884399414062, + "ce_orig": 1.331714153289795, + "epoch": 0.11388309727514559, + "kl_loss": 7.407535552978516, + "loss_ib": 0.09084123373031616, + "step": 396 + }, + { + "ce_ib": 10.355143547058105, + "ce_orig": 0.6202336549758911, + "epoch": 0.11388309727514559, + "kl_loss": 7.605844497680664, + "loss_ib": 0.08641359210014343, + "step": 396 + }, + { + "ce_ib": 14.112444877624512, + "ce_orig": 0.8373818397521973, + "epoch": 0.11388309727514559, + "kl_loss": 7.473760604858398, + "loss_ib": 0.08885005116462708, + "step": 396 + }, + { + "ce_ib": 13.782599449157715, + "ce_orig": 0.816245973110199, + "epoch": 0.11388309727514559, + "kl_loss": 7.807076454162598, + "loss_ib": 0.09185335785150528, + "step": 396 + }, + { + "ce_ib": 15.40560245513916, + "ce_orig": 1.211715579032898, + "epoch": 0.11417068085412323, + "kl_loss": 7.456460475921631, + "loss_ib": 0.08997020870447159, + "step": 397 + }, + { + "ce_ib": 9.293402671813965, + "ce_orig": 0.4598255753517151, + "epoch": 0.11417068085412323, + "kl_loss": 8.316404342651367, + "loss_ib": 0.09245744347572327, + "step": 397 + }, + { + "ce_ib": 14.194599151611328, + "ce_orig": 0.8155964612960815, + "epoch": 0.11417068085412323, + "kl_loss": 7.829108715057373, + "loss_ib": 0.09248568117618561, + "step": 397 + }, + { + "ce_ib": 12.846905708312988, + "ce_orig": 0.8388014435768127, + "epoch": 0.11417068085412323, + "kl_loss": 7.558581829071045, + "loss_ib": 0.08843272179365158, + "step": 397 + }, + { + "ce_ib": 10.11628532409668, + "ce_orig": 0.6828235983848572, + "epoch": 0.11445826443310086, + "kl_loss": 7.854046821594238, + "loss_ib": 0.08865674585103989, + "step": 398 + }, + { + "ce_ib": 9.932538032531738, + "ce_orig": 0.49403539299964905, + "epoch": 0.11445826443310086, + "kl_loss": 8.248268127441406, + "loss_ib": 0.0924152135848999, + "step": 398 + }, + { + "ce_ib": 13.556487083435059, + "ce_orig": 0.49110883474349976, + "epoch": 0.11445826443310086, + "kl_loss": 7.1056108474731445, + "loss_ib": 0.08461259305477142, + "step": 398 + }, + { + "ce_ib": 17.799386978149414, + "ce_orig": 1.120202660560608, + "epoch": 0.11445826443310086, + "kl_loss": 7.619751930236816, + "loss_ib": 0.09399690479040146, + "step": 398 + }, + { + "ce_ib": 18.385801315307617, + "ce_orig": 1.4876097440719604, + "epoch": 0.11474584801207852, + "kl_loss": 7.5165114402771, + "loss_ib": 0.093550905585289, + "step": 399 + }, + { + "ce_ib": 14.88972282409668, + "ce_orig": 1.251899003982544, + "epoch": 0.11474584801207852, + "kl_loss": 7.511725902557373, + "loss_ib": 0.09000697731971741, + "step": 399 + }, + { + "ce_ib": 11.505959510803223, + "ce_orig": 0.5990752577781677, + "epoch": 0.11474584801207852, + "kl_loss": 7.720244884490967, + "loss_ib": 0.08870840817689896, + "step": 399 + }, + { + "ce_ib": 14.54392147064209, + "ce_orig": 0.8790196180343628, + "epoch": 0.11474584801207852, + "kl_loss": 7.9987359046936035, + "loss_ib": 0.09453127533197403, + "step": 399 + }, + { + "epoch": 0.11503343159105615, + "grad_norm": 0.5361573100090027, + "learning_rate": 9.998458170391065e-06, + "loss": 0.9792, + "step": 400 + }, + { + "ce_ib": 18.511137008666992, + "ce_orig": 0.8711093664169312, + "epoch": 0.11503343159105615, + "kl_loss": 7.370296478271484, + "loss_ib": 0.09221409261226654, + "step": 400 + }, + { + "ce_ib": 12.954771995544434, + "ce_orig": 0.9472190141677856, + "epoch": 0.11503343159105615, + "kl_loss": 7.668916702270508, + "loss_ib": 0.0896439328789711, + "step": 400 + }, + { + "ce_ib": 9.006753921508789, + "ce_orig": 0.6201379299163818, + "epoch": 0.11503343159105615, + "kl_loss": 7.630331516265869, + "loss_ib": 0.08531006425619125, + "step": 400 + }, + { + "ce_ib": 15.421821594238281, + "ce_orig": 1.425597906112671, + "epoch": 0.11503343159105615, + "kl_loss": 7.540129661560059, + "loss_ib": 0.09082311391830444, + "step": 400 + }, + { + "ce_ib": 15.149130821228027, + "ce_orig": 1.0793287754058838, + "epoch": 0.11532101517003379, + "kl_loss": 7.484195232391357, + "loss_ib": 0.08999107778072357, + "step": 401 + }, + { + "ce_ib": 10.156062126159668, + "ce_orig": 0.5667037963867188, + "epoch": 0.11532101517003379, + "kl_loss": 7.341686248779297, + "loss_ib": 0.08357291668653488, + "step": 401 + }, + { + "ce_ib": 11.375419616699219, + "ce_orig": 1.1060158014297485, + "epoch": 0.11532101517003379, + "kl_loss": 7.683910846710205, + "loss_ib": 0.08821453154087067, + "step": 401 + }, + { + "ce_ib": 17.671218872070312, + "ce_orig": 1.273245096206665, + "epoch": 0.11532101517003379, + "kl_loss": 7.472596645355225, + "loss_ib": 0.09239718317985535, + "step": 401 + }, + { + "ce_ib": 16.562593460083008, + "ce_orig": 1.0524271726608276, + "epoch": 0.11560859874901143, + "kl_loss": 7.498256683349609, + "loss_ib": 0.09154515713453293, + "step": 402 + }, + { + "ce_ib": 13.353780746459961, + "ce_orig": 0.8203778862953186, + "epoch": 0.11560859874901143, + "kl_loss": 7.896320343017578, + "loss_ib": 0.09231697767972946, + "step": 402 + }, + { + "ce_ib": 15.274881362915039, + "ce_orig": 1.007253885269165, + "epoch": 0.11560859874901143, + "kl_loss": 7.653887748718262, + "loss_ib": 0.09181375056505203, + "step": 402 + }, + { + "ce_ib": 16.868385314941406, + "ce_orig": 1.3746187686920166, + "epoch": 0.11560859874901143, + "kl_loss": 6.850212097167969, + "loss_ib": 0.0853705033659935, + "step": 402 + }, + { + "ce_ib": 14.26913833618164, + "ce_orig": 1.1488964557647705, + "epoch": 0.11589618232798907, + "kl_loss": 7.498937606811523, + "loss_ib": 0.08925851434469223, + "step": 403 + }, + { + "ce_ib": 13.987759590148926, + "ce_orig": 0.4179192781448364, + "epoch": 0.11589618232798907, + "kl_loss": 7.400528430938721, + "loss_ib": 0.08799304068088531, + "step": 403 + }, + { + "ce_ib": 13.340829849243164, + "ce_orig": 0.7595700025558472, + "epoch": 0.11589618232798907, + "kl_loss": 7.298229217529297, + "loss_ib": 0.08632311969995499, + "step": 403 + }, + { + "ce_ib": 12.933422088623047, + "ce_orig": 0.6614567041397095, + "epoch": 0.11589618232798907, + "kl_loss": 7.262064456939697, + "loss_ib": 0.08555406332015991, + "step": 403 + }, + { + "ce_ib": 12.648677825927734, + "ce_orig": 1.055274248123169, + "epoch": 0.11618376590696672, + "kl_loss": 7.3485426902771, + "loss_ib": 0.08613410592079163, + "step": 404 + }, + { + "ce_ib": 15.602824211120605, + "ce_orig": 1.1300698518753052, + "epoch": 0.11618376590696672, + "kl_loss": 7.553566932678223, + "loss_ib": 0.09113849699497223, + "step": 404 + }, + { + "ce_ib": 11.030848503112793, + "ce_orig": 0.6406192779541016, + "epoch": 0.11618376590696672, + "kl_loss": 7.551169395446777, + "loss_ib": 0.0865425392985344, + "step": 404 + }, + { + "ce_ib": 18.89850616455078, + "ce_orig": 1.4245729446411133, + "epoch": 0.11618376590696672, + "kl_loss": 7.921879768371582, + "loss_ib": 0.09811729937791824, + "step": 404 + }, + { + "epoch": 0.11647134948594436, + "grad_norm": 0.5651848316192627, + "learning_rate": 9.998259430450155e-06, + "loss": 1.0022, + "step": 405 + }, + { + "ce_ib": 14.010991096496582, + "ce_orig": 0.7675843834877014, + "epoch": 0.11647134948594436, + "kl_loss": 7.355520248413086, + "loss_ib": 0.08756618946790695, + "step": 405 + }, + { + "ce_ib": 16.283018112182617, + "ce_orig": 1.158023715019226, + "epoch": 0.11647134948594436, + "kl_loss": 7.45557975769043, + "loss_ib": 0.09083881229162216, + "step": 405 + }, + { + "ce_ib": 15.902323722839355, + "ce_orig": 1.0440561771392822, + "epoch": 0.11647134948594436, + "kl_loss": 7.513373851776123, + "loss_ib": 0.09103605896234512, + "step": 405 + }, + { + "ce_ib": 17.28444480895996, + "ce_orig": 1.34712815284729, + "epoch": 0.11647134948594436, + "kl_loss": 7.375174045562744, + "loss_ib": 0.09103618562221527, + "step": 405 + }, + { + "ce_ib": 16.60024070739746, + "ce_orig": 1.3468883037567139, + "epoch": 0.116758933064922, + "kl_loss": 7.249956130981445, + "loss_ib": 0.08909979462623596, + "step": 406 + }, + { + "ce_ib": 10.413902282714844, + "ce_orig": 0.792377233505249, + "epoch": 0.116758933064922, + "kl_loss": 7.449808120727539, + "loss_ib": 0.08491198718547821, + "step": 406 + }, + { + "ce_ib": 14.015169143676758, + "ce_orig": 0.9668265581130981, + "epoch": 0.116758933064922, + "kl_loss": 7.551003456115723, + "loss_ib": 0.08952520042657852, + "step": 406 + }, + { + "ce_ib": 10.880705833435059, + "ce_orig": 0.5008480548858643, + "epoch": 0.116758933064922, + "kl_loss": 7.180622100830078, + "loss_ib": 0.08268693089485168, + "step": 406 + }, + { + "ce_ib": 14.242043495178223, + "ce_orig": 0.5770112872123718, + "epoch": 0.11704651664389963, + "kl_loss": 6.760585308074951, + "loss_ib": 0.0818478912115097, + "step": 407 + }, + { + "ce_ib": 17.079055786132812, + "ce_orig": 1.7003521919250488, + "epoch": 0.11704651664389963, + "kl_loss": 7.319855690002441, + "loss_ib": 0.09027761220932007, + "step": 407 + }, + { + "ce_ib": 13.891350746154785, + "ce_orig": 0.849856972694397, + "epoch": 0.11704651664389963, + "kl_loss": 6.7790093421936035, + "loss_ib": 0.08168143779039383, + "step": 407 + }, + { + "ce_ib": 16.732454299926758, + "ce_orig": 1.2152447700500488, + "epoch": 0.11704651664389963, + "kl_loss": 7.023953437805176, + "loss_ib": 0.08697198331356049, + "step": 407 + }, + { + "ce_ib": 11.8071870803833, + "ce_orig": 0.5867161154747009, + "epoch": 0.11733410022287727, + "kl_loss": 7.046442985534668, + "loss_ib": 0.08227161318063736, + "step": 408 + }, + { + "ce_ib": 12.511359214782715, + "ce_orig": 0.7658900022506714, + "epoch": 0.11733410022287727, + "kl_loss": 7.0474042892456055, + "loss_ib": 0.08298540115356445, + "step": 408 + }, + { + "ce_ib": 14.617786407470703, + "ce_orig": 0.7642480731010437, + "epoch": 0.11733410022287727, + "kl_loss": 6.15494966506958, + "loss_ib": 0.0761672779917717, + "step": 408 + }, + { + "ce_ib": 13.998462677001953, + "ce_orig": 0.3132195472717285, + "epoch": 0.11733410022287727, + "kl_loss": 6.85751485824585, + "loss_ib": 0.08257361501455307, + "step": 408 + }, + { + "ce_ib": 11.64556884765625, + "ce_orig": 0.5106444954872131, + "epoch": 0.11762168380185492, + "kl_loss": 7.280755996704102, + "loss_ib": 0.08445312082767487, + "step": 409 + }, + { + "ce_ib": 15.677848815917969, + "ce_orig": 1.337202548980713, + "epoch": 0.11762168380185492, + "kl_loss": 7.441099166870117, + "loss_ib": 0.0900888442993164, + "step": 409 + }, + { + "ce_ib": 16.44597625732422, + "ce_orig": 0.7064342498779297, + "epoch": 0.11762168380185492, + "kl_loss": 7.3761820793151855, + "loss_ib": 0.0902077928185463, + "step": 409 + }, + { + "ce_ib": 15.110173225402832, + "ce_orig": 1.0219260454177856, + "epoch": 0.11762168380185492, + "kl_loss": 7.731924533843994, + "loss_ib": 0.09242941439151764, + "step": 409 + }, + { + "epoch": 0.11790926738083256, + "grad_norm": 0.3323611617088318, + "learning_rate": 9.998048648542153e-06, + "loss": 0.9117, + "step": 410 + }, + { + "ce_ib": 16.602235794067383, + "ce_orig": 1.2994755506515503, + "epoch": 0.11790926738083256, + "kl_loss": 7.015318870544434, + "loss_ib": 0.0867554247379303, + "step": 410 + }, + { + "ce_ib": 15.049646377563477, + "ce_orig": 0.3460133969783783, + "epoch": 0.11790926738083256, + "kl_loss": 6.893805503845215, + "loss_ib": 0.08398769795894623, + "step": 410 + }, + { + "ce_ib": 15.774944305419922, + "ce_orig": 0.9854010343551636, + "epoch": 0.11790926738083256, + "kl_loss": 7.28230094909668, + "loss_ib": 0.08859795331954956, + "step": 410 + }, + { + "ce_ib": 15.553691864013672, + "ce_orig": 1.1951489448547363, + "epoch": 0.11790926738083256, + "kl_loss": 7.345269680023193, + "loss_ib": 0.08900638669729233, + "step": 410 + }, + { + "ce_ib": 13.911700248718262, + "ce_orig": 0.7811279296875, + "epoch": 0.1181968509598102, + "kl_loss": 7.179073333740234, + "loss_ib": 0.08570243418216705, + "step": 411 + }, + { + "ce_ib": 15.475322723388672, + "ce_orig": 1.222588300704956, + "epoch": 0.1181968509598102, + "kl_loss": 6.665042877197266, + "loss_ib": 0.08212574571371078, + "step": 411 + }, + { + "ce_ib": 17.793344497680664, + "ce_orig": 1.2325204610824585, + "epoch": 0.1181968509598102, + "kl_loss": 7.175121307373047, + "loss_ib": 0.08954454958438873, + "step": 411 + }, + { + "ce_ib": 15.057259559631348, + "ce_orig": 1.0142443180084229, + "epoch": 0.1181968509598102, + "kl_loss": 7.372516632080078, + "loss_ib": 0.0887824222445488, + "step": 411 + }, + { + "ce_ib": 12.571479797363281, + "ce_orig": 0.9224892854690552, + "epoch": 0.11848443453878783, + "kl_loss": 7.373008728027344, + "loss_ib": 0.08630156517028809, + "step": 412 + }, + { + "ce_ib": 10.39489459991455, + "ce_orig": 0.5030142068862915, + "epoch": 0.11848443453878783, + "kl_loss": 8.151752471923828, + "loss_ib": 0.0919124186038971, + "step": 412 + }, + { + "ce_ib": 14.446382522583008, + "ce_orig": 0.6720181703567505, + "epoch": 0.11848443453878783, + "kl_loss": 6.821091651916504, + "loss_ib": 0.08265729993581772, + "step": 412 + }, + { + "ce_ib": 13.9717378616333, + "ce_orig": 0.8369989395141602, + "epoch": 0.11848443453878783, + "kl_loss": 7.489789962768555, + "loss_ib": 0.08886963874101639, + "step": 412 + }, + { + "ce_ib": 13.278703689575195, + "ce_orig": 0.5604954361915588, + "epoch": 0.11877201811776547, + "kl_loss": 6.503649711608887, + "loss_ib": 0.07831519842147827, + "step": 413 + }, + { + "ce_ib": 14.608017921447754, + "ce_orig": 1.0261048078536987, + "epoch": 0.11877201811776547, + "kl_loss": 5.455432891845703, + "loss_ib": 0.06916234642267227, + "step": 413 + }, + { + "ce_ib": 13.513481140136719, + "ce_orig": 0.8638569116592407, + "epoch": 0.11877201811776547, + "kl_loss": 7.192094802856445, + "loss_ib": 0.08543442189693451, + "step": 413 + }, + { + "ce_ib": 14.755450248718262, + "ce_orig": 1.0192320346832275, + "epoch": 0.11877201811776547, + "kl_loss": 6.588578224182129, + "loss_ib": 0.0806412324309349, + "step": 413 + }, + { + "ce_ib": 13.155234336853027, + "ce_orig": 1.021310806274414, + "epoch": 0.11905960169674312, + "kl_loss": 6.788153648376465, + "loss_ib": 0.0810367688536644, + "step": 414 + }, + { + "ce_ib": 16.875110626220703, + "ce_orig": 1.1028145551681519, + "epoch": 0.11905960169674312, + "kl_loss": 7.1112470626831055, + "loss_ib": 0.08798757940530777, + "step": 414 + }, + { + "ce_ib": 15.40821647644043, + "ce_orig": 1.1819595098495483, + "epoch": 0.11905960169674312, + "kl_loss": 6.240026473999023, + "loss_ib": 0.07780847698450089, + "step": 414 + }, + { + "ce_ib": 13.668084144592285, + "ce_orig": 0.795190155506134, + "epoch": 0.11905960169674312, + "kl_loss": 7.253881931304932, + "loss_ib": 0.08620689809322357, + "step": 414 + }, + { + "epoch": 0.11934718527572076, + "grad_norm": 0.39023032784461975, + "learning_rate": 9.997825825174889e-06, + "loss": 0.9566, + "step": 415 + }, + { + "ce_ib": 12.504598617553711, + "ce_orig": 1.0579363107681274, + "epoch": 0.11934718527572076, + "kl_loss": 7.502658843994141, + "loss_ib": 0.0875311866402626, + "step": 415 + }, + { + "ce_ib": 17.01618766784668, + "ce_orig": 1.1351312398910522, + "epoch": 0.11934718527572076, + "kl_loss": 7.204804420471191, + "loss_ib": 0.08906423300504684, + "step": 415 + }, + { + "ce_ib": 12.350006103515625, + "ce_orig": 0.7742664217948914, + "epoch": 0.11934718527572076, + "kl_loss": 6.772958278656006, + "loss_ib": 0.0800795927643776, + "step": 415 + }, + { + "ce_ib": 9.901521682739258, + "ce_orig": 0.73922199010849, + "epoch": 0.11934718527572076, + "kl_loss": 6.764502048492432, + "loss_ib": 0.07754654437303543, + "step": 415 + }, + { + "ce_ib": 14.0098237991333, + "ce_orig": 1.1979413032531738, + "epoch": 0.1196347688546984, + "kl_loss": 6.7593770027160645, + "loss_ib": 0.08160359412431717, + "step": 416 + }, + { + "ce_ib": 15.960185050964355, + "ce_orig": 1.1578559875488281, + "epoch": 0.1196347688546984, + "kl_loss": 6.498690128326416, + "loss_ib": 0.08094708621501923, + "step": 416 + }, + { + "ce_ib": 17.35699462890625, + "ce_orig": 1.3880764245986938, + "epoch": 0.1196347688546984, + "kl_loss": 6.918699264526367, + "loss_ib": 0.0865439847111702, + "step": 416 + }, + { + "ce_ib": 10.74183177947998, + "ce_orig": 0.8996087908744812, + "epoch": 0.1196347688546984, + "kl_loss": 6.894655227661133, + "loss_ib": 0.07968838512897491, + "step": 416 + }, + { + "ce_ib": 12.287108421325684, + "ce_orig": 0.556649386882782, + "epoch": 0.11992235243367604, + "kl_loss": 6.701784610748291, + "loss_ib": 0.07930494844913483, + "step": 417 + }, + { + "ce_ib": 16.17057991027832, + "ce_orig": 1.4443365335464478, + "epoch": 0.11992235243367604, + "kl_loss": 7.298962116241455, + "loss_ib": 0.08916020393371582, + "step": 417 + }, + { + "ce_ib": 14.572080612182617, + "ce_orig": 1.5205786228179932, + "epoch": 0.11992235243367604, + "kl_loss": 7.550329208374023, + "loss_ib": 0.09007536619901657, + "step": 417 + }, + { + "ce_ib": 16.126951217651367, + "ce_orig": 1.6121647357940674, + "epoch": 0.11992235243367604, + "kl_loss": 6.497934341430664, + "loss_ib": 0.0811062902212143, + "step": 417 + }, + { + "ce_ib": 11.263349533081055, + "ce_orig": 0.5618718266487122, + "epoch": 0.12020993601265367, + "kl_loss": 7.025567054748535, + "loss_ib": 0.08151902258396149, + "step": 418 + }, + { + "ce_ib": 17.31966781616211, + "ce_orig": 0.9057826399803162, + "epoch": 0.12020993601265367, + "kl_loss": 6.166810989379883, + "loss_ib": 0.07898777723312378, + "step": 418 + }, + { + "ce_ib": 14.19983959197998, + "ce_orig": 0.42357009649276733, + "epoch": 0.12020993601265367, + "kl_loss": 7.017156600952148, + "loss_ib": 0.0843714028596878, + "step": 418 + }, + { + "ce_ib": 12.986461639404297, + "ce_orig": 0.9464898705482483, + "epoch": 0.12020993601265367, + "kl_loss": 7.11696195602417, + "loss_ib": 0.0841560810804367, + "step": 418 + }, + { + "ce_ib": 5.582565784454346, + "ce_orig": 0.16826413571834564, + "epoch": 0.12049751959163132, + "kl_loss": 4.485371112823486, + "loss_ib": 0.05043627694249153, + "step": 419 + }, + { + "ce_ib": 10.89441204071045, + "ce_orig": 0.6798584461212158, + "epoch": 0.12049751959163132, + "kl_loss": 7.0565643310546875, + "loss_ib": 0.08146005123853683, + "step": 419 + }, + { + "ce_ib": 12.65652847290039, + "ce_orig": 0.6649844646453857, + "epoch": 0.12049751959163132, + "kl_loss": 6.781154632568359, + "loss_ib": 0.0804680734872818, + "step": 419 + }, + { + "ce_ib": 14.701452255249023, + "ce_orig": 1.5013635158538818, + "epoch": 0.12049751959163132, + "kl_loss": 6.757889747619629, + "loss_ib": 0.08228034526109695, + "step": 419 + }, + { + "epoch": 0.12078510317060896, + "grad_norm": 0.3697260916233063, + "learning_rate": 9.99759096088519e-06, + "loss": 1.025, + "step": 420 + }, + { + "ce_ib": 15.822488784790039, + "ce_orig": 1.4374024868011475, + "epoch": 0.12078510317060896, + "kl_loss": 6.555868148803711, + "loss_ib": 0.0813811644911766, + "step": 420 + }, + { + "ce_ib": 16.061962127685547, + "ce_orig": 1.07735013961792, + "epoch": 0.12078510317060896, + "kl_loss": 7.0854363441467285, + "loss_ib": 0.08691632002592087, + "step": 420 + }, + { + "ce_ib": 11.24513053894043, + "ce_orig": 0.944068968296051, + "epoch": 0.12078510317060896, + "kl_loss": 7.07620096206665, + "loss_ib": 0.08200713992118835, + "step": 420 + }, + { + "ce_ib": 9.319937705993652, + "ce_orig": 0.4624161422252655, + "epoch": 0.12078510317060896, + "kl_loss": 6.860795974731445, + "loss_ib": 0.07792789489030838, + "step": 420 + }, + { + "ce_ib": 14.560148239135742, + "ce_orig": 0.9780954718589783, + "epoch": 0.1210726867495866, + "kl_loss": 7.158355712890625, + "loss_ib": 0.08614370226860046, + "step": 421 + }, + { + "ce_ib": 11.736210823059082, + "ce_orig": 0.8180287480354309, + "epoch": 0.1210726867495866, + "kl_loss": 6.502464771270752, + "loss_ib": 0.07676085829734802, + "step": 421 + }, + { + "ce_ib": 11.860121726989746, + "ce_orig": 0.7424320578575134, + "epoch": 0.1210726867495866, + "kl_loss": 6.852118968963623, + "loss_ib": 0.08038130402565002, + "step": 421 + }, + { + "ce_ib": 8.209848403930664, + "ce_orig": 0.38280463218688965, + "epoch": 0.1210726867495866, + "kl_loss": 5.902113437652588, + "loss_ib": 0.06723098456859589, + "step": 421 + }, + { + "ce_ib": 11.078797340393066, + "ce_orig": 1.1233412027359009, + "epoch": 0.12136027032856424, + "kl_loss": 7.3105268478393555, + "loss_ib": 0.08418406546115875, + "step": 422 + }, + { + "ce_ib": 11.66649055480957, + "ce_orig": 0.6671274900436401, + "epoch": 0.12136027032856424, + "kl_loss": 7.258861064910889, + "loss_ib": 0.08425509929656982, + "step": 422 + }, + { + "ce_ib": 12.51806354522705, + "ce_orig": 0.8585453033447266, + "epoch": 0.12136027032856424, + "kl_loss": 4.624774932861328, + "loss_ib": 0.05876580998301506, + "step": 422 + }, + { + "ce_ib": 8.197091102600098, + "ce_orig": 0.7487708330154419, + "epoch": 0.12136027032856424, + "kl_loss": 7.244273662567139, + "loss_ib": 0.08063982427120209, + "step": 422 + }, + { + "ce_ib": 13.192282676696777, + "ce_orig": 0.7279910445213318, + "epoch": 0.12164785390754188, + "kl_loss": 6.454935073852539, + "loss_ib": 0.07774163037538528, + "step": 423 + }, + { + "ce_ib": 14.898761749267578, + "ce_orig": 0.8458772301673889, + "epoch": 0.12164785390754188, + "kl_loss": 6.546128273010254, + "loss_ib": 0.0803600400686264, + "step": 423 + }, + { + "ce_ib": 9.523457527160645, + "ce_orig": 0.5388420224189758, + "epoch": 0.12164785390754188, + "kl_loss": 6.858234405517578, + "loss_ib": 0.07810579985380173, + "step": 423 + }, + { + "ce_ib": 15.374958992004395, + "ce_orig": 0.9829406142234802, + "epoch": 0.12164785390754188, + "kl_loss": 7.067386627197266, + "loss_ib": 0.08604881912469864, + "step": 423 + }, + { + "ce_ib": 14.404019355773926, + "ce_orig": 0.7524064779281616, + "epoch": 0.12193543748651951, + "kl_loss": 6.316936492919922, + "loss_ib": 0.07757338136434555, + "step": 424 + }, + { + "ce_ib": 12.131924629211426, + "ce_orig": 0.5756824612617493, + "epoch": 0.12193543748651951, + "kl_loss": 6.499780654907227, + "loss_ib": 0.07712972909212112, + "step": 424 + }, + { + "ce_ib": 14.279351234436035, + "ce_orig": 1.1739262342453003, + "epoch": 0.12193543748651951, + "kl_loss": 6.669313430786133, + "loss_ib": 0.08097247779369354, + "step": 424 + }, + { + "ce_ib": 11.006293296813965, + "ce_orig": 0.9451432228088379, + "epoch": 0.12193543748651951, + "kl_loss": 6.912232875823975, + "loss_ib": 0.08012861758470535, + "step": 424 + }, + { + "epoch": 0.12222302106549716, + "grad_norm": 0.3377140164375305, + "learning_rate": 9.9973440562389e-06, + "loss": 0.9795, + "step": 425 + }, + { + "ce_ib": 9.749797821044922, + "ce_orig": 0.4160507023334503, + "epoch": 0.12222302106549716, + "kl_loss": 6.430499076843262, + "loss_ib": 0.0740547850728035, + "step": 425 + }, + { + "ce_ib": 13.227080345153809, + "ce_orig": 0.7790858745574951, + "epoch": 0.12222302106549716, + "kl_loss": 6.962891578674316, + "loss_ib": 0.08285599201917648, + "step": 425 + }, + { + "ce_ib": 11.88380241394043, + "ce_orig": 0.8634589314460754, + "epoch": 0.12222302106549716, + "kl_loss": 6.578843593597412, + "loss_ib": 0.07767223566770554, + "step": 425 + }, + { + "ce_ib": 11.239786148071289, + "ce_orig": 0.6265932321548462, + "epoch": 0.12222302106549716, + "kl_loss": 6.705416202545166, + "loss_ib": 0.07829394936561584, + "step": 425 + }, + { + "ce_ib": 13.436622619628906, + "ce_orig": 0.9564598202705383, + "epoch": 0.1225106046444748, + "kl_loss": 6.993147850036621, + "loss_ib": 0.08336810022592545, + "step": 426 + }, + { + "ce_ib": 10.594719886779785, + "ce_orig": 0.2862907946109772, + "epoch": 0.1225106046444748, + "kl_loss": 6.47970724105835, + "loss_ib": 0.07539179176092148, + "step": 426 + }, + { + "ce_ib": 11.986897468566895, + "ce_orig": 0.8417707681655884, + "epoch": 0.1225106046444748, + "kl_loss": 6.577683448791504, + "loss_ib": 0.07776372879743576, + "step": 426 + }, + { + "ce_ib": 12.561016082763672, + "ce_orig": 0.631502091884613, + "epoch": 0.1225106046444748, + "kl_loss": 6.709961891174316, + "loss_ib": 0.07966063171625137, + "step": 426 + }, + { + "ce_ib": 11.712442398071289, + "ce_orig": 0.6886617541313171, + "epoch": 0.12279818822345244, + "kl_loss": 5.167166709899902, + "loss_ib": 0.06338410824537277, + "step": 427 + }, + { + "ce_ib": 16.66871452331543, + "ce_orig": 1.3375333547592163, + "epoch": 0.12279818822345244, + "kl_loss": 6.451887130737305, + "loss_ib": 0.08118758350610733, + "step": 427 + }, + { + "ce_ib": 16.5571346282959, + "ce_orig": 0.8830122351646423, + "epoch": 0.12279818822345244, + "kl_loss": 6.616879463195801, + "loss_ib": 0.08272592723369598, + "step": 427 + }, + { + "ce_ib": 14.535783767700195, + "ce_orig": 1.0712122917175293, + "epoch": 0.12279818822345244, + "kl_loss": 7.098201274871826, + "loss_ib": 0.08551779389381409, + "step": 427 + }, + { + "ce_ib": 13.506948471069336, + "ce_orig": 0.8588005304336548, + "epoch": 0.12308577180243008, + "kl_loss": 6.7285261154174805, + "loss_ib": 0.08079220354557037, + "step": 428 + }, + { + "ce_ib": 17.66691780090332, + "ce_orig": 1.5079838037490845, + "epoch": 0.12308577180243008, + "kl_loss": 5.905591011047363, + "loss_ib": 0.07672282308340073, + "step": 428 + }, + { + "ce_ib": 13.11380672454834, + "ce_orig": 0.8807310461997986, + "epoch": 0.12308577180243008, + "kl_loss": 6.711277008056641, + "loss_ib": 0.08022657036781311, + "step": 428 + }, + { + "ce_ib": 16.873750686645508, + "ce_orig": 1.461756944656372, + "epoch": 0.12308577180243008, + "kl_loss": 6.9212870597839355, + "loss_ib": 0.08608661592006683, + "step": 428 + }, + { + "ce_ib": 8.633293151855469, + "ce_orig": 0.3702143728733063, + "epoch": 0.12337335538140771, + "kl_loss": 6.651648998260498, + "loss_ib": 0.0751497820019722, + "step": 429 + }, + { + "ce_ib": 12.873100280761719, + "ce_orig": 0.8258522152900696, + "epoch": 0.12337335538140771, + "kl_loss": 6.824582099914551, + "loss_ib": 0.08111891895532608, + "step": 429 + }, + { + "ce_ib": 13.301980972290039, + "ce_orig": 1.0934252738952637, + "epoch": 0.12337335538140771, + "kl_loss": 6.730596542358398, + "loss_ib": 0.08060794323682785, + "step": 429 + }, + { + "ce_ib": 9.3226318359375, + "ce_orig": 0.567753255367279, + "epoch": 0.12337335538140771, + "kl_loss": 6.865760326385498, + "loss_ib": 0.07798023521900177, + "step": 429 + }, + { + "epoch": 0.12366093896038537, + "grad_norm": 0.2692417800426483, + "learning_rate": 9.99708511183087e-06, + "loss": 1.0201, + "step": 430 + }, + { + "ce_ib": 13.1838960647583, + "ce_orig": 0.8950438499450684, + "epoch": 0.12366093896038537, + "kl_loss": 6.7184343338012695, + "loss_ib": 0.08036824315786362, + "step": 430 + }, + { + "ce_ib": 14.154587745666504, + "ce_orig": 1.36903977394104, + "epoch": 0.12366093896038537, + "kl_loss": 7.069368839263916, + "loss_ib": 0.08484827727079391, + "step": 430 + }, + { + "ce_ib": 19.739904403686523, + "ce_orig": 2.018660068511963, + "epoch": 0.12366093896038537, + "kl_loss": 6.694252967834473, + "loss_ib": 0.08668243139982224, + "step": 430 + }, + { + "ce_ib": 12.907319068908691, + "ce_orig": 1.3039312362670898, + "epoch": 0.12366093896038537, + "kl_loss": 6.895936012268066, + "loss_ib": 0.08186668157577515, + "step": 430 + }, + { + "ce_ib": 12.085116386413574, + "ce_orig": 0.8512650728225708, + "epoch": 0.123948522539363, + "kl_loss": 6.91609001159668, + "loss_ib": 0.08124601095914841, + "step": 431 + }, + { + "ce_ib": 13.673727989196777, + "ce_orig": 1.095442295074463, + "epoch": 0.123948522539363, + "kl_loss": 6.370013236999512, + "loss_ib": 0.07737386226654053, + "step": 431 + }, + { + "ce_ib": 12.386370658874512, + "ce_orig": 0.9683983325958252, + "epoch": 0.123948522539363, + "kl_loss": 6.190813064575195, + "loss_ib": 0.07429450005292892, + "step": 431 + }, + { + "ce_ib": 12.50558090209961, + "ce_orig": 0.49803832173347473, + "epoch": 0.123948522539363, + "kl_loss": 6.456753730773926, + "loss_ib": 0.0770731121301651, + "step": 431 + }, + { + "ce_ib": 17.67017936706543, + "ce_orig": 1.4665132761001587, + "epoch": 0.12423610611834064, + "kl_loss": 6.8333563804626465, + "loss_ib": 0.08600374311208725, + "step": 432 + }, + { + "ce_ib": 15.355440139770508, + "ce_orig": 1.0783402919769287, + "epoch": 0.12423610611834064, + "kl_loss": 6.24898624420166, + "loss_ib": 0.07784529775381088, + "step": 432 + }, + { + "ce_ib": 12.970996856689453, + "ce_orig": 0.908065676689148, + "epoch": 0.12423610611834064, + "kl_loss": 6.597336769104004, + "loss_ib": 0.0789443626999855, + "step": 432 + }, + { + "ce_ib": 10.632279396057129, + "ce_orig": 0.8153582215309143, + "epoch": 0.12423610611834064, + "kl_loss": 6.526000022888184, + "loss_ib": 0.07589227706193924, + "step": 432 + }, + { + "ce_ib": 15.43746566772461, + "ce_orig": 0.9074482917785645, + "epoch": 0.12452368969731828, + "kl_loss": 6.491483688354492, + "loss_ib": 0.0803523063659668, + "step": 433 + }, + { + "ce_ib": 8.455195426940918, + "ce_orig": 0.6100387573242188, + "epoch": 0.12452368969731828, + "kl_loss": 6.833198070526123, + "loss_ib": 0.07678717374801636, + "step": 433 + }, + { + "ce_ib": 12.123833656311035, + "ce_orig": 0.36186474561691284, + "epoch": 0.12452368969731828, + "kl_loss": 6.947832107543945, + "loss_ib": 0.08160214871168137, + "step": 433 + }, + { + "ce_ib": 10.345142364501953, + "ce_orig": 0.6522800326347351, + "epoch": 0.12452368969731828, + "kl_loss": 6.990741729736328, + "loss_ib": 0.08025255799293518, + "step": 433 + }, + { + "ce_ib": 16.296939849853516, + "ce_orig": 0.889448344707489, + "epoch": 0.12481127327629592, + "kl_loss": 6.41514778137207, + "loss_ib": 0.08044841885566711, + "step": 434 + }, + { + "ce_ib": 11.671658515930176, + "ce_orig": 0.5737780332565308, + "epoch": 0.12481127327629592, + "kl_loss": 6.855953693389893, + "loss_ib": 0.0802311971783638, + "step": 434 + }, + { + "ce_ib": 14.455811500549316, + "ce_orig": 0.9031121134757996, + "epoch": 0.12481127327629592, + "kl_loss": 6.662840843200684, + "loss_ib": 0.0810842216014862, + "step": 434 + }, + { + "ce_ib": 11.030969619750977, + "ce_orig": 0.5072352290153503, + "epoch": 0.12481127327629592, + "kl_loss": 6.787086486816406, + "loss_ib": 0.07890183478593826, + "step": 434 + }, + { + "epoch": 0.12509885685527355, + "grad_norm": 0.3564906418323517, + "learning_rate": 9.99681412828496e-06, + "loss": 0.9509, + "step": 435 + }, + { + "ce_ib": 13.964662551879883, + "ce_orig": 0.7778974175453186, + "epoch": 0.12509885685527355, + "kl_loss": 6.725362777709961, + "loss_ib": 0.08121828734874725, + "step": 435 + }, + { + "ce_ib": 11.405627250671387, + "ce_orig": 0.7385088801383972, + "epoch": 0.12509885685527355, + "kl_loss": 6.814925193786621, + "loss_ib": 0.07955487817525864, + "step": 435 + }, + { + "ce_ib": 9.46563720703125, + "ce_orig": 0.48979493975639343, + "epoch": 0.12509885685527355, + "kl_loss": 6.513537883758545, + "loss_ib": 0.07460101693868637, + "step": 435 + }, + { + "ce_ib": 11.945999145507812, + "ce_orig": 1.1584625244140625, + "epoch": 0.12509885685527355, + "kl_loss": 6.719861030578613, + "loss_ib": 0.07914461195468903, + "step": 435 + }, + { + "ce_ib": 10.591259002685547, + "ce_orig": 1.0169495344161987, + "epoch": 0.1253864404342512, + "kl_loss": 6.246793746948242, + "loss_ib": 0.07305919378995895, + "step": 436 + }, + { + "ce_ib": 11.989269256591797, + "ce_orig": 0.7858111262321472, + "epoch": 0.1253864404342512, + "kl_loss": 6.57621431350708, + "loss_ib": 0.07775141298770905, + "step": 436 + }, + { + "ce_ib": 19.393007278442383, + "ce_orig": 1.7850255966186523, + "epoch": 0.1253864404342512, + "kl_loss": 6.306278228759766, + "loss_ib": 0.08245578408241272, + "step": 436 + }, + { + "ce_ib": 12.036297798156738, + "ce_orig": 0.7531123161315918, + "epoch": 0.1253864404342512, + "kl_loss": 6.1202850341796875, + "loss_ib": 0.07323914766311646, + "step": 436 + }, + { + "ce_ib": 10.144059181213379, + "ce_orig": 0.3747836649417877, + "epoch": 0.12567402401322886, + "kl_loss": 4.424054145812988, + "loss_ib": 0.05438460409641266, + "step": 437 + }, + { + "ce_ib": 14.241146087646484, + "ce_orig": 1.154929280281067, + "epoch": 0.12567402401322886, + "kl_loss": 6.789527893066406, + "loss_ib": 0.08213642239570618, + "step": 437 + }, + { + "ce_ib": 10.732177734375, + "ce_orig": 0.37166017293930054, + "epoch": 0.12567402401322886, + "kl_loss": 6.624038219451904, + "loss_ib": 0.07697255909442902, + "step": 437 + }, + { + "ce_ib": 11.875864028930664, + "ce_orig": 0.5969848036766052, + "epoch": 0.12567402401322886, + "kl_loss": 6.518521308898926, + "loss_ib": 0.07706107199192047, + "step": 437 + }, + { + "ce_ib": 10.759546279907227, + "ce_orig": 0.5922636985778809, + "epoch": 0.12596160759220648, + "kl_loss": 6.725497245788574, + "loss_ib": 0.07801451534032822, + "step": 438 + }, + { + "ce_ib": 15.553537368774414, + "ce_orig": 0.734350323677063, + "epoch": 0.12596160759220648, + "kl_loss": 6.394842147827148, + "loss_ib": 0.07950195670127869, + "step": 438 + }, + { + "ce_ib": 6.804005146026611, + "ce_orig": 0.3962157666683197, + "epoch": 0.12596160759220648, + "kl_loss": 5.0119147300720215, + "loss_ib": 0.05692315101623535, + "step": 438 + }, + { + "ce_ib": 12.179630279541016, + "ce_orig": 0.42266571521759033, + "epoch": 0.12596160759220648, + "kl_loss": 6.785035133361816, + "loss_ib": 0.08002997934818268, + "step": 438 + }, + { + "ce_ib": 11.9126558303833, + "ce_orig": 0.6911407709121704, + "epoch": 0.12624919117118413, + "kl_loss": 5.950160026550293, + "loss_ib": 0.07141425460577011, + "step": 439 + }, + { + "ce_ib": 13.094669342041016, + "ce_orig": 0.827157199382782, + "epoch": 0.12624919117118413, + "kl_loss": 6.5986528396606445, + "loss_ib": 0.07908119261264801, + "step": 439 + }, + { + "ce_ib": 9.707082748413086, + "ce_orig": 0.6359635591506958, + "epoch": 0.12624919117118413, + "kl_loss": 5.76313591003418, + "loss_ib": 0.06733844429254532, + "step": 439 + }, + { + "ce_ib": 14.719921112060547, + "ce_orig": 0.9809554815292358, + "epoch": 0.12624919117118413, + "kl_loss": 6.400526523590088, + "loss_ib": 0.0787251815199852, + "step": 439 + }, + { + "epoch": 0.12653677475016176, + "grad_norm": 0.3794838488101959, + "learning_rate": 9.996531106254027e-06, + "loss": 0.9376, + "step": 440 + }, + { + "ce_ib": 12.931950569152832, + "ce_orig": 0.9358404278755188, + "epoch": 0.12653677475016176, + "kl_loss": 6.613272666931152, + "loss_ib": 0.07906467467546463, + "step": 440 + }, + { + "ce_ib": 13.42741584777832, + "ce_orig": 0.7416799068450928, + "epoch": 0.12653677475016176, + "kl_loss": 6.373296737670898, + "loss_ib": 0.07716038078069687, + "step": 440 + }, + { + "ce_ib": 12.632997512817383, + "ce_orig": 1.1645599603652954, + "epoch": 0.12653677475016176, + "kl_loss": 6.401862144470215, + "loss_ib": 0.07665161788463593, + "step": 440 + }, + { + "ce_ib": 16.439931869506836, + "ce_orig": 1.5734604597091675, + "epoch": 0.12653677475016176, + "kl_loss": 6.544001579284668, + "loss_ib": 0.08187995105981827, + "step": 440 + }, + { + "ce_ib": 13.676290512084961, + "ce_orig": 1.0207133293151855, + "epoch": 0.1268243583291394, + "kl_loss": 6.34605073928833, + "loss_ib": 0.0771367996931076, + "step": 441 + }, + { + "ce_ib": 10.394545555114746, + "ce_orig": 0.721448540687561, + "epoch": 0.1268243583291394, + "kl_loss": 6.206020355224609, + "loss_ib": 0.07245474308729172, + "step": 441 + }, + { + "ce_ib": 11.452078819274902, + "ce_orig": 1.0113741159439087, + "epoch": 0.1268243583291394, + "kl_loss": 6.649386405944824, + "loss_ib": 0.07794594019651413, + "step": 441 + }, + { + "ce_ib": 13.160637855529785, + "ce_orig": 0.8914715647697449, + "epoch": 0.1268243583291394, + "kl_loss": 6.297116279602051, + "loss_ib": 0.07613179832696915, + "step": 441 + }, + { + "ce_ib": 13.181857109069824, + "ce_orig": 1.0282145738601685, + "epoch": 0.12711194190811706, + "kl_loss": 6.352758884429932, + "loss_ib": 0.07670944184064865, + "step": 442 + }, + { + "ce_ib": 10.155783653259277, + "ce_orig": 0.7708104252815247, + "epoch": 0.12711194190811706, + "kl_loss": 6.537982940673828, + "loss_ib": 0.0755356103181839, + "step": 442 + }, + { + "ce_ib": 11.089113235473633, + "ce_orig": 0.761037290096283, + "epoch": 0.12711194190811706, + "kl_loss": 6.812819004058838, + "loss_ib": 0.07921729981899261, + "step": 442 + }, + { + "ce_ib": 13.044452667236328, + "ce_orig": 0.8526286482810974, + "epoch": 0.12711194190811706, + "kl_loss": 6.086942672729492, + "loss_ib": 0.07391387969255447, + "step": 442 + }, + { + "ce_ib": 14.216473579406738, + "ce_orig": 1.2142722606658936, + "epoch": 0.12739952548709468, + "kl_loss": 6.530941486358643, + "loss_ib": 0.079525887966156, + "step": 443 + }, + { + "ce_ib": 16.07403564453125, + "ce_orig": 1.820540428161621, + "epoch": 0.12739952548709468, + "kl_loss": 6.171257495880127, + "loss_ib": 0.07778660953044891, + "step": 443 + }, + { + "ce_ib": 15.684346199035645, + "ce_orig": 1.0308736562728882, + "epoch": 0.12739952548709468, + "kl_loss": 6.518500328063965, + "loss_ib": 0.08086934685707092, + "step": 443 + }, + { + "ce_ib": 10.408639907836914, + "ce_orig": 0.4763404130935669, + "epoch": 0.12739952548709468, + "kl_loss": 5.605975151062012, + "loss_ib": 0.06646838784217834, + "step": 443 + }, + { + "ce_ib": 12.710926055908203, + "ce_orig": 0.828603208065033, + "epoch": 0.12768710906607234, + "kl_loss": 6.303215026855469, + "loss_ib": 0.07574307173490524, + "step": 444 + }, + { + "ce_ib": 10.51518440246582, + "ce_orig": 0.46696940064430237, + "epoch": 0.12768710906607234, + "kl_loss": 6.153438568115234, + "loss_ib": 0.0720495656132698, + "step": 444 + }, + { + "ce_ib": 8.491880416870117, + "ce_orig": 0.7530315518379211, + "epoch": 0.12768710906607234, + "kl_loss": 6.541855812072754, + "loss_ib": 0.0739104375243187, + "step": 444 + }, + { + "ce_ib": 16.409770965576172, + "ce_orig": 1.3153358697891235, + "epoch": 0.12768710906607234, + "kl_loss": 6.441512107849121, + "loss_ib": 0.08082488924264908, + "step": 444 + }, + { + "epoch": 0.12797469264504996, + "grad_norm": 0.27752232551574707, + "learning_rate": 9.996236046419941e-06, + "loss": 0.993, + "step": 445 + }, + { + "ce_ib": 13.984725952148438, + "ce_orig": 0.8543322086334229, + "epoch": 0.12797469264504996, + "kl_loss": 5.812187194824219, + "loss_ib": 0.07210659980773926, + "step": 445 + }, + { + "ce_ib": 13.997330665588379, + "ce_orig": 0.5448931455612183, + "epoch": 0.12797469264504996, + "kl_loss": 6.332775592803955, + "loss_ib": 0.07732508331537247, + "step": 445 + }, + { + "ce_ib": 12.42682933807373, + "ce_orig": 0.7178173065185547, + "epoch": 0.12797469264504996, + "kl_loss": 6.424311637878418, + "loss_ib": 0.07666994631290436, + "step": 445 + }, + { + "ce_ib": 9.565461158752441, + "ce_orig": 0.8134915232658386, + "epoch": 0.12797469264504996, + "kl_loss": 6.6445112228393555, + "loss_ib": 0.0760105699300766, + "step": 445 + }, + { + "ce_ib": 8.924689292907715, + "ce_orig": 0.5386297106742859, + "epoch": 0.1282622762240276, + "kl_loss": 6.35334587097168, + "loss_ib": 0.07245814800262451, + "step": 446 + }, + { + "ce_ib": 16.734355926513672, + "ce_orig": 1.0333242416381836, + "epoch": 0.1282622762240276, + "kl_loss": 6.456616401672363, + "loss_ib": 0.0813005119562149, + "step": 446 + }, + { + "ce_ib": 11.987763404846191, + "ce_orig": 1.0803635120391846, + "epoch": 0.1282622762240276, + "kl_loss": 6.508333206176758, + "loss_ib": 0.07707109302282333, + "step": 446 + }, + { + "ce_ib": 10.953421592712402, + "ce_orig": 0.6203237175941467, + "epoch": 0.1282622762240276, + "kl_loss": 6.250512599945068, + "loss_ib": 0.07345854490995407, + "step": 446 + }, + { + "ce_ib": 9.890134811401367, + "ce_orig": 0.7359150648117065, + "epoch": 0.12854985980300526, + "kl_loss": 6.235504627227783, + "loss_ib": 0.07224518060684204, + "step": 447 + }, + { + "ce_ib": 14.127108573913574, + "ce_orig": 0.6828675866127014, + "epoch": 0.12854985980300526, + "kl_loss": 6.255981922149658, + "loss_ib": 0.07668692618608475, + "step": 447 + }, + { + "ce_ib": 7.48207426071167, + "ce_orig": 0.6292188167572021, + "epoch": 0.12854985980300526, + "kl_loss": 6.336426734924316, + "loss_ib": 0.07084634155035019, + "step": 447 + }, + { + "ce_ib": 14.48934268951416, + "ce_orig": 0.9720964431762695, + "epoch": 0.12854985980300526, + "kl_loss": 6.416128158569336, + "loss_ib": 0.07865062355995178, + "step": 447 + }, + { + "ce_ib": 9.954630851745605, + "ce_orig": 0.7827691435813904, + "epoch": 0.12883744338198289, + "kl_loss": 6.488411903381348, + "loss_ib": 0.07483874261379242, + "step": 448 + }, + { + "ce_ib": 9.191707611083984, + "ce_orig": 0.7463720440864563, + "epoch": 0.12883744338198289, + "kl_loss": 6.294938087463379, + "loss_ib": 0.07214108854532242, + "step": 448 + }, + { + "ce_ib": 11.25207805633545, + "ce_orig": 0.8659082651138306, + "epoch": 0.12883744338198289, + "kl_loss": 6.527953147888184, + "loss_ib": 0.07653161138296127, + "step": 448 + }, + { + "ce_ib": 13.492358207702637, + "ce_orig": 0.8569541573524475, + "epoch": 0.12883744338198289, + "kl_loss": 6.035775661468506, + "loss_ib": 0.073850117623806, + "step": 448 + }, + { + "ce_ib": 7.855426788330078, + "ce_orig": 0.2511903643608093, + "epoch": 0.12912502696096054, + "kl_loss": 4.307304859161377, + "loss_ib": 0.050928473472595215, + "step": 449 + }, + { + "ce_ib": 15.168793678283691, + "ce_orig": 0.9671982526779175, + "epoch": 0.12912502696096054, + "kl_loss": 6.6048150062561035, + "loss_ib": 0.08121694624423981, + "step": 449 + }, + { + "ce_ib": 12.226943969726562, + "ce_orig": 0.47483983635902405, + "epoch": 0.12912502696096054, + "kl_loss": 6.506505489349365, + "loss_ib": 0.07729199528694153, + "step": 449 + }, + { + "ce_ib": 10.032635688781738, + "ce_orig": 0.9576376676559448, + "epoch": 0.12912502696096054, + "kl_loss": 6.404877662658691, + "loss_ib": 0.0740814059972763, + "step": 449 + }, + { + "epoch": 0.12941261053993816, + "grad_norm": 0.2948267161846161, + "learning_rate": 9.995928949493568e-06, + "loss": 0.9556, + "step": 450 + }, + { + "ce_ib": 13.594502449035645, + "ce_orig": 0.7634487748146057, + "epoch": 0.12941261053993816, + "kl_loss": 6.214456558227539, + "loss_ib": 0.0757390707731247, + "step": 450 + }, + { + "ce_ib": 11.100218772888184, + "ce_orig": 0.5212127566337585, + "epoch": 0.12941261053993816, + "kl_loss": 6.206829071044922, + "loss_ib": 0.07316850870847702, + "step": 450 + }, + { + "ce_ib": 10.686315536499023, + "ce_orig": 0.7214837670326233, + "epoch": 0.12941261053993816, + "kl_loss": 6.353018760681152, + "loss_ib": 0.07421649992465973, + "step": 450 + }, + { + "ce_ib": 10.190530776977539, + "ce_orig": 0.8089180588722229, + "epoch": 0.12941261053993816, + "kl_loss": 6.573402404785156, + "loss_ib": 0.07592455297708511, + "step": 450 + }, + { + "ce_ib": 9.33697509765625, + "ce_orig": 0.5962152481079102, + "epoch": 0.1297001941189158, + "kl_loss": 6.153956413269043, + "loss_ib": 0.07087653875350952, + "step": 451 + }, + { + "ce_ib": 11.186933517456055, + "ce_orig": 0.6141796708106995, + "epoch": 0.1297001941189158, + "kl_loss": 6.5120697021484375, + "loss_ib": 0.07630763202905655, + "step": 451 + }, + { + "ce_ib": 14.915994644165039, + "ce_orig": 0.9922129511833191, + "epoch": 0.1297001941189158, + "kl_loss": 5.893243312835693, + "loss_ib": 0.0738484263420105, + "step": 451 + }, + { + "ce_ib": 11.000643730163574, + "ce_orig": 0.7535352110862732, + "epoch": 0.1297001941189158, + "kl_loss": 6.5356645584106445, + "loss_ib": 0.07635729014873505, + "step": 451 + }, + { + "ce_ib": 10.349251747131348, + "ce_orig": 0.5634931921958923, + "epoch": 0.12998777769789346, + "kl_loss": 6.1491804122924805, + "loss_ib": 0.0718410536646843, + "step": 452 + }, + { + "ce_ib": 14.159017562866211, + "ce_orig": 1.0384783744812012, + "epoch": 0.12998777769789346, + "kl_loss": 6.039100646972656, + "loss_ib": 0.07455001771450043, + "step": 452 + }, + { + "ce_ib": 9.425822257995605, + "ce_orig": 0.7196267247200012, + "epoch": 0.12998777769789346, + "kl_loss": 6.463262557983398, + "loss_ib": 0.07405844330787659, + "step": 452 + }, + { + "ce_ib": 13.532403945922852, + "ce_orig": 0.748589277267456, + "epoch": 0.12998777769789346, + "kl_loss": 6.038760662078857, + "loss_ib": 0.07392001152038574, + "step": 452 + }, + { + "ce_ib": 10.410351753234863, + "ce_orig": 0.5488285422325134, + "epoch": 0.1302753612768711, + "kl_loss": 6.22846794128418, + "loss_ib": 0.07269503176212311, + "step": 453 + }, + { + "ce_ib": 12.851318359375, + "ce_orig": 0.9907112717628479, + "epoch": 0.1302753612768711, + "kl_loss": 6.350039482116699, + "loss_ib": 0.07635170966386795, + "step": 453 + }, + { + "ce_ib": 13.529624938964844, + "ce_orig": 0.9963613152503967, + "epoch": 0.1302753612768711, + "kl_loss": 6.477352142333984, + "loss_ib": 0.07830314338207245, + "step": 453 + }, + { + "ce_ib": 15.717424392700195, + "ce_orig": 0.9456937313079834, + "epoch": 0.1302753612768711, + "kl_loss": 5.184221267700195, + "loss_ib": 0.0675596371293068, + "step": 453 + }, + { + "ce_ib": 11.294244766235352, + "ce_orig": 0.7741903066635132, + "epoch": 0.13056294485584874, + "kl_loss": 6.089672088623047, + "loss_ib": 0.07219096273183823, + "step": 454 + }, + { + "ce_ib": 18.31461524963379, + "ce_orig": 1.1526209115982056, + "epoch": 0.13056294485584874, + "kl_loss": 5.830255508422852, + "loss_ib": 0.07661716639995575, + "step": 454 + }, + { + "ce_ib": 10.833757400512695, + "ce_orig": 0.6415656805038452, + "epoch": 0.13056294485584874, + "kl_loss": 6.350003242492676, + "loss_ib": 0.0743337869644165, + "step": 454 + }, + { + "ce_ib": 13.974409103393555, + "ce_orig": 1.0648647546768188, + "epoch": 0.13056294485584874, + "kl_loss": 5.880853652954102, + "loss_ib": 0.07278294116258621, + "step": 454 + }, + { + "epoch": 0.13085052843482636, + "grad_norm": 0.3984464406967163, + "learning_rate": 9.995609816214774e-06, + "loss": 0.9742, + "step": 455 + }, + { + "ce_ib": 10.744162559509277, + "ce_orig": 0.9050172567367554, + "epoch": 0.13085052843482636, + "kl_loss": 6.5762224197387695, + "loss_ib": 0.07650638371706009, + "step": 455 + }, + { + "ce_ib": 8.354386329650879, + "ce_orig": 0.7557839155197144, + "epoch": 0.13085052843482636, + "kl_loss": 5.9736809730529785, + "loss_ib": 0.06809119880199432, + "step": 455 + }, + { + "ce_ib": 10.220728874206543, + "ce_orig": 0.536035418510437, + "epoch": 0.13085052843482636, + "kl_loss": 6.033191680908203, + "loss_ib": 0.07055263966321945, + "step": 455 + }, + { + "ce_ib": 18.092269897460938, + "ce_orig": 1.7775410413742065, + "epoch": 0.13085052843482636, + "kl_loss": 6.227479457855225, + "loss_ib": 0.0803670659661293, + "step": 455 + }, + { + "ce_ib": 11.44895076751709, + "ce_orig": 0.6709085702896118, + "epoch": 0.13113811201380401, + "kl_loss": 5.662714004516602, + "loss_ib": 0.06807608902454376, + "step": 456 + }, + { + "ce_ib": 14.361438751220703, + "ce_orig": 1.2514116764068604, + "epoch": 0.13113811201380401, + "kl_loss": 6.082196235656738, + "loss_ib": 0.07518339902162552, + "step": 456 + }, + { + "ce_ib": 15.36643123626709, + "ce_orig": 1.2210443019866943, + "epoch": 0.13113811201380401, + "kl_loss": 6.145846366882324, + "loss_ib": 0.07682488858699799, + "step": 456 + }, + { + "ce_ib": 9.23930835723877, + "ce_orig": 0.4679323732852936, + "epoch": 0.13113811201380401, + "kl_loss": 5.857122421264648, + "loss_ib": 0.0678105279803276, + "step": 456 + }, + { + "ce_ib": 18.075246810913086, + "ce_orig": 1.4876693487167358, + "epoch": 0.13142569559278164, + "kl_loss": 6.473189353942871, + "loss_ib": 0.08280713856220245, + "step": 457 + }, + { + "ce_ib": 10.967455863952637, + "ce_orig": 0.7587569355964661, + "epoch": 0.13142569559278164, + "kl_loss": 6.15573787689209, + "loss_ib": 0.07252483069896698, + "step": 457 + }, + { + "ce_ib": 15.197246551513672, + "ce_orig": 0.9926710724830627, + "epoch": 0.13142569559278164, + "kl_loss": 6.070707321166992, + "loss_ib": 0.07590431720018387, + "step": 457 + }, + { + "ce_ib": 13.815834999084473, + "ce_orig": 0.7014583349227905, + "epoch": 0.13142569559278164, + "kl_loss": 5.886041641235352, + "loss_ib": 0.07267624884843826, + "step": 457 + }, + { + "ce_ib": 11.019710540771484, + "ce_orig": 0.645072340965271, + "epoch": 0.1317132791717593, + "kl_loss": 6.059167861938477, + "loss_ib": 0.07161138951778412, + "step": 458 + }, + { + "ce_ib": 14.778169631958008, + "ce_orig": 1.0757112503051758, + "epoch": 0.1317132791717593, + "kl_loss": 5.94471549987793, + "loss_ib": 0.07422532141208649, + "step": 458 + }, + { + "ce_ib": 14.401138305664062, + "ce_orig": 0.5452439188957214, + "epoch": 0.1317132791717593, + "kl_loss": 6.017438888549805, + "loss_ib": 0.0745755285024643, + "step": 458 + }, + { + "ce_ib": 12.480062484741211, + "ce_orig": 0.863021731376648, + "epoch": 0.1317132791717593, + "kl_loss": 6.301546096801758, + "loss_ib": 0.07549552619457245, + "step": 458 + }, + { + "ce_ib": 6.62205696105957, + "ce_orig": 0.3466249108314514, + "epoch": 0.13200086275073694, + "kl_loss": 5.449361801147461, + "loss_ib": 0.061115674674510956, + "step": 459 + }, + { + "ce_ib": 17.263084411621094, + "ce_orig": 1.387851595878601, + "epoch": 0.13200086275073694, + "kl_loss": 5.80389404296875, + "loss_ib": 0.07530201971530914, + "step": 459 + }, + { + "ce_ib": 12.40281867980957, + "ce_orig": 0.5882412195205688, + "epoch": 0.13200086275073694, + "kl_loss": 5.933984756469727, + "loss_ib": 0.07174266874790192, + "step": 459 + }, + { + "ce_ib": 10.269407272338867, + "ce_orig": 0.6979908347129822, + "epoch": 0.13200086275073694, + "kl_loss": 6.144120216369629, + "loss_ib": 0.07171060889959335, + "step": 459 + }, + { + "epoch": 0.13228844632971457, + "grad_norm": 0.392301082611084, + "learning_rate": 9.995278647352428e-06, + "loss": 0.8929, + "step": 460 + }, + { + "ce_ib": 12.763174057006836, + "ce_orig": 0.5633679032325745, + "epoch": 0.13228844632971457, + "kl_loss": 5.510239601135254, + "loss_ib": 0.06786557286977768, + "step": 460 + }, + { + "ce_ib": 12.732988357543945, + "ce_orig": 0.23703357577323914, + "epoch": 0.13228844632971457, + "kl_loss": 5.470260143280029, + "loss_ib": 0.06743558496236801, + "step": 460 + }, + { + "ce_ib": 10.131989479064941, + "ce_orig": 0.6852651834487915, + "epoch": 0.13228844632971457, + "kl_loss": 6.123454570770264, + "loss_ib": 0.07136653363704681, + "step": 460 + }, + { + "ce_ib": 12.067431449890137, + "ce_orig": 0.7499302625656128, + "epoch": 0.13228844632971457, + "kl_loss": 5.742093086242676, + "loss_ib": 0.06948836147785187, + "step": 460 + }, + { + "ce_ib": 7.949161052703857, + "ce_orig": 0.5661373734474182, + "epoch": 0.13257602990869222, + "kl_loss": 5.77607536315918, + "loss_ib": 0.0657099112868309, + "step": 461 + }, + { + "ce_ib": 11.964921951293945, + "ce_orig": 0.7726590633392334, + "epoch": 0.13257602990869222, + "kl_loss": 5.976222991943359, + "loss_ib": 0.07172714918851852, + "step": 461 + }, + { + "ce_ib": 10.301544189453125, + "ce_orig": 0.6989135146141052, + "epoch": 0.13257602990869222, + "kl_loss": 5.811915397644043, + "loss_ib": 0.06842069327831268, + "step": 461 + }, + { + "ce_ib": 12.018680572509766, + "ce_orig": 0.7360786199569702, + "epoch": 0.13257602990869222, + "kl_loss": 5.752782821655273, + "loss_ib": 0.06954650580883026, + "step": 461 + }, + { + "ce_ib": 9.352438926696777, + "ce_orig": 0.6635019779205322, + "epoch": 0.13286361348766984, + "kl_loss": 5.842447280883789, + "loss_ib": 0.06777691096067429, + "step": 462 + }, + { + "ce_ib": 13.350435256958008, + "ce_orig": 1.5100382566452026, + "epoch": 0.13286361348766984, + "kl_loss": 5.89756965637207, + "loss_ib": 0.07232613116502762, + "step": 462 + }, + { + "ce_ib": 12.962512969970703, + "ce_orig": 0.5430381894111633, + "epoch": 0.13286361348766984, + "kl_loss": 5.671024322509766, + "loss_ib": 0.06967275589704514, + "step": 462 + }, + { + "ce_ib": 15.555392265319824, + "ce_orig": 1.549025297164917, + "epoch": 0.13286361348766984, + "kl_loss": 5.619405269622803, + "loss_ib": 0.07174944132566452, + "step": 462 + }, + { + "ce_ib": 12.196456909179688, + "ce_orig": 0.8404883742332458, + "epoch": 0.1331511970666475, + "kl_loss": 5.807744026184082, + "loss_ib": 0.07027389109134674, + "step": 463 + }, + { + "ce_ib": 10.140958786010742, + "ce_orig": 0.9204779267311096, + "epoch": 0.1331511970666475, + "kl_loss": 5.694162368774414, + "loss_ib": 0.06708257645368576, + "step": 463 + }, + { + "ce_ib": 14.248191833496094, + "ce_orig": 0.7399206161499023, + "epoch": 0.1331511970666475, + "kl_loss": 5.870853900909424, + "loss_ib": 0.07295673340559006, + "step": 463 + }, + { + "ce_ib": 12.88770580291748, + "ce_orig": 0.6557974219322205, + "epoch": 0.1331511970666475, + "kl_loss": 5.735309600830078, + "loss_ib": 0.0702408030629158, + "step": 463 + }, + { + "ce_ib": 11.3536376953125, + "ce_orig": 0.9138345718383789, + "epoch": 0.13343878064562514, + "kl_loss": 5.644110202789307, + "loss_ib": 0.06779474020004272, + "step": 464 + }, + { + "ce_ib": 12.081389427185059, + "ce_orig": 0.947689950466156, + "epoch": 0.13343878064562514, + "kl_loss": 5.870572090148926, + "loss_ib": 0.07078710943460464, + "step": 464 + }, + { + "ce_ib": 8.881162643432617, + "ce_orig": 0.952629029750824, + "epoch": 0.13343878064562514, + "kl_loss": 5.588096618652344, + "loss_ib": 0.06476213037967682, + "step": 464 + }, + { + "ce_ib": 9.299169540405273, + "ce_orig": 0.60807204246521, + "epoch": 0.13343878064562514, + "kl_loss": 5.195706844329834, + "loss_ib": 0.06125623732805252, + "step": 464 + }, + { + "epoch": 0.13372636422460277, + "grad_norm": 0.45935723185539246, + "learning_rate": 9.994935443704391e-06, + "loss": 0.9342, + "step": 465 + }, + { + "ce_ib": 15.007346153259277, + "ce_orig": 1.2609058618545532, + "epoch": 0.13372636422460277, + "kl_loss": 5.37989616394043, + "loss_ib": 0.06880630552768707, + "step": 465 + }, + { + "ce_ib": 7.51100492477417, + "ce_orig": 0.5963767170906067, + "epoch": 0.13372636422460277, + "kl_loss": 5.823338508605957, + "loss_ib": 0.06574439257383347, + "step": 465 + }, + { + "ce_ib": 11.900202751159668, + "ce_orig": 0.4563358724117279, + "epoch": 0.13372636422460277, + "kl_loss": 5.743979454040527, + "loss_ib": 0.06933999806642532, + "step": 465 + }, + { + "ce_ib": 14.910943984985352, + "ce_orig": 0.8666954636573792, + "epoch": 0.13372636422460277, + "kl_loss": 5.86253023147583, + "loss_ib": 0.07353624701499939, + "step": 465 + }, + { + "ce_ib": 11.311989784240723, + "ce_orig": 0.803551197052002, + "epoch": 0.13401394780358042, + "kl_loss": 5.7215681076049805, + "loss_ib": 0.06852766871452332, + "step": 466 + }, + { + "ce_ib": 12.853880882263184, + "ce_orig": 0.5360819697380066, + "epoch": 0.13401394780358042, + "kl_loss": 5.336982727050781, + "loss_ib": 0.06622370332479477, + "step": 466 + }, + { + "ce_ib": 12.081340789794922, + "ce_orig": 0.9268986582756042, + "epoch": 0.13401394780358042, + "kl_loss": 5.40451717376709, + "loss_ib": 0.0661265105009079, + "step": 466 + }, + { + "ce_ib": 10.360613822937012, + "ce_orig": 0.7845146059989929, + "epoch": 0.13401394780358042, + "kl_loss": 5.394956588745117, + "loss_ib": 0.06431017816066742, + "step": 466 + }, + { + "ce_ib": 13.594584465026855, + "ce_orig": 0.7611533999443054, + "epoch": 0.13430153138255804, + "kl_loss": 5.510991096496582, + "loss_ib": 0.06870449334383011, + "step": 467 + }, + { + "ce_ib": 11.39364242553711, + "ce_orig": 0.6709606647491455, + "epoch": 0.13430153138255804, + "kl_loss": 5.329100608825684, + "loss_ib": 0.06468464434146881, + "step": 467 + }, + { + "ce_ib": 15.204527854919434, + "ce_orig": 1.456774115562439, + "epoch": 0.13430153138255804, + "kl_loss": 5.282338619232178, + "loss_ib": 0.06802791357040405, + "step": 467 + }, + { + "ce_ib": 9.643840789794922, + "ce_orig": 0.6612191200256348, + "epoch": 0.13430153138255804, + "kl_loss": 5.333034992218018, + "loss_ib": 0.06297419220209122, + "step": 467 + }, + { + "ce_ib": 11.347843170166016, + "ce_orig": 1.0858099460601807, + "epoch": 0.1345891149615357, + "kl_loss": 5.0229034423828125, + "loss_ib": 0.061576876789331436, + "step": 468 + }, + { + "ce_ib": 12.166611671447754, + "ce_orig": 0.7169655561447144, + "epoch": 0.1345891149615357, + "kl_loss": 5.225884437561035, + "loss_ib": 0.06442546099424362, + "step": 468 + }, + { + "ce_ib": 13.179616928100586, + "ce_orig": 0.9957132339477539, + "epoch": 0.1345891149615357, + "kl_loss": 5.586675643920898, + "loss_ib": 0.06904637068510056, + "step": 468 + }, + { + "ce_ib": 10.90320110321045, + "ce_orig": 0.694044828414917, + "epoch": 0.1345891149615357, + "kl_loss": 4.852416038513184, + "loss_ib": 0.05942736193537712, + "step": 468 + }, + { + "ce_ib": 10.619782447814941, + "ce_orig": 0.4662715196609497, + "epoch": 0.13487669854051335, + "kl_loss": 5.184469223022461, + "loss_ib": 0.06246447563171387, + "step": 469 + }, + { + "ce_ib": 9.323065757751465, + "ce_orig": 0.6369857788085938, + "epoch": 0.13487669854051335, + "kl_loss": 5.136096000671387, + "loss_ib": 0.060684025287628174, + "step": 469 + }, + { + "ce_ib": 10.448844909667969, + "ce_orig": 0.4876580238342285, + "epoch": 0.13487669854051335, + "kl_loss": 5.2007832527160645, + "loss_ib": 0.062456678599119186, + "step": 469 + }, + { + "ce_ib": 11.4501371383667, + "ce_orig": 0.6889066100120544, + "epoch": 0.13487669854051335, + "kl_loss": 5.519144058227539, + "loss_ib": 0.06664157658815384, + "step": 469 + }, + { + "epoch": 0.13516428211949097, + "grad_norm": 0.4563974142074585, + "learning_rate": 9.994580206097524e-06, + "loss": 0.9271, + "step": 470 + }, + { + "ce_ib": 10.97485637664795, + "ce_orig": 0.6626176834106445, + "epoch": 0.13516428211949097, + "kl_loss": 5.277484893798828, + "loss_ib": 0.06374970078468323, + "step": 470 + }, + { + "ce_ib": 8.894420623779297, + "ce_orig": 0.4637753367424011, + "epoch": 0.13516428211949097, + "kl_loss": 5.26820182800293, + "loss_ib": 0.06157643720507622, + "step": 470 + }, + { + "ce_ib": 13.850231170654297, + "ce_orig": 0.8222572803497314, + "epoch": 0.13516428211949097, + "kl_loss": 3.3914904594421387, + "loss_ib": 0.047765135765075684, + "step": 470 + }, + { + "ce_ib": 9.599710464477539, + "ce_orig": 0.8736235499382019, + "epoch": 0.13516428211949097, + "kl_loss": 5.436291694641113, + "loss_ib": 0.06396262347698212, + "step": 470 + }, + { + "ce_ib": 9.535674095153809, + "ce_orig": 0.7246021032333374, + "epoch": 0.13545186569846862, + "kl_loss": 5.152594089508057, + "loss_ib": 0.061061613261699677, + "step": 471 + }, + { + "ce_ib": 11.557367324829102, + "ce_orig": 0.7219054102897644, + "epoch": 0.13545186569846862, + "kl_loss": 4.933101654052734, + "loss_ib": 0.0608883835375309, + "step": 471 + }, + { + "ce_ib": 11.222688674926758, + "ce_orig": 0.7581503987312317, + "epoch": 0.13545186569846862, + "kl_loss": 5.2493391036987305, + "loss_ib": 0.06371607631444931, + "step": 471 + }, + { + "ce_ib": 7.992416858673096, + "ce_orig": 0.7171717286109924, + "epoch": 0.13545186569846862, + "kl_loss": 5.1756591796875, + "loss_ib": 0.05974900722503662, + "step": 471 + }, + { + "ce_ib": 5.774598121643066, + "ce_orig": 0.2617477774620056, + "epoch": 0.13573944927744624, + "kl_loss": 3.714776039123535, + "loss_ib": 0.042922358959913254, + "step": 472 + }, + { + "ce_ib": 15.86950969696045, + "ce_orig": 1.3682712316513062, + "epoch": 0.13573944927744624, + "kl_loss": 5.002852439880371, + "loss_ib": 0.06589803844690323, + "step": 472 + }, + { + "ce_ib": 15.778973579406738, + "ce_orig": 1.778786063194275, + "epoch": 0.13573944927744624, + "kl_loss": 5.145055294036865, + "loss_ib": 0.06722952425479889, + "step": 472 + }, + { + "ce_ib": 9.892607688903809, + "ce_orig": 0.6026872992515564, + "epoch": 0.13573944927744624, + "kl_loss": 4.9220356941223145, + "loss_ib": 0.05911296233534813, + "step": 472 + }, + { + "ce_ib": 14.954146385192871, + "ce_orig": 1.6441103219985962, + "epoch": 0.1360270328564239, + "kl_loss": 5.022004127502441, + "loss_ib": 0.06517418473958969, + "step": 473 + }, + { + "ce_ib": 14.230586051940918, + "ce_orig": 1.1324756145477295, + "epoch": 0.1360270328564239, + "kl_loss": 5.008617401123047, + "loss_ib": 0.0643167570233345, + "step": 473 + }, + { + "ce_ib": 9.870515823364258, + "ce_orig": 0.50955730676651, + "epoch": 0.1360270328564239, + "kl_loss": 5.198309898376465, + "loss_ib": 0.06185361370444298, + "step": 473 + }, + { + "ce_ib": 7.58746862411499, + "ce_orig": 0.576608419418335, + "epoch": 0.1360270328564239, + "kl_loss": 5.101164817810059, + "loss_ib": 0.058599118143320084, + "step": 473 + }, + { + "ce_ib": 11.188053131103516, + "ce_orig": 1.0111771821975708, + "epoch": 0.13631461643540155, + "kl_loss": 4.980704307556152, + "loss_ib": 0.06099509447813034, + "step": 474 + }, + { + "ce_ib": 11.85481071472168, + "ce_orig": 0.8680632710456848, + "epoch": 0.13631461643540155, + "kl_loss": 4.838929653167725, + "loss_ib": 0.060244105756282806, + "step": 474 + }, + { + "ce_ib": 11.664010047912598, + "ce_orig": 0.6513270735740662, + "epoch": 0.13631461643540155, + "kl_loss": 4.969212532043457, + "loss_ib": 0.061356134712696075, + "step": 474 + }, + { + "ce_ib": 12.634693145751953, + "ce_orig": 0.6730305552482605, + "epoch": 0.13631461643540155, + "kl_loss": 4.7532453536987305, + "loss_ib": 0.06016714498400688, + "step": 474 + }, + { + "epoch": 0.13660220001437917, + "grad_norm": 0.41322335600852966, + "learning_rate": 9.99421293538767e-06, + "loss": 0.952, + "step": 475 + }, + { + "ce_ib": 9.952411651611328, + "ce_orig": 0.6906881928443909, + "epoch": 0.13660220001437917, + "kl_loss": 4.742203712463379, + "loss_ib": 0.05737444758415222, + "step": 475 + }, + { + "ce_ib": 11.219844818115234, + "ce_orig": 0.7193230986595154, + "epoch": 0.13660220001437917, + "kl_loss": 4.660732269287109, + "loss_ib": 0.057827167212963104, + "step": 475 + }, + { + "ce_ib": 10.129925727844238, + "ce_orig": 0.7533198595046997, + "epoch": 0.13660220001437917, + "kl_loss": 4.963289260864258, + "loss_ib": 0.05976282060146332, + "step": 475 + }, + { + "ce_ib": 8.743351936340332, + "ce_orig": 0.5432742238044739, + "epoch": 0.13660220001437917, + "kl_loss": 4.890501022338867, + "loss_ib": 0.05764836072921753, + "step": 475 + }, + { + "ce_ib": 16.304983139038086, + "ce_orig": 1.4945815801620483, + "epoch": 0.13688978359335682, + "kl_loss": 4.846185207366943, + "loss_ib": 0.06476683169603348, + "step": 476 + }, + { + "ce_ib": 15.700722694396973, + "ce_orig": 1.2550569772720337, + "epoch": 0.13688978359335682, + "kl_loss": 4.873666763305664, + "loss_ib": 0.0644373893737793, + "step": 476 + }, + { + "ce_ib": 11.274219512939453, + "ce_orig": 0.7325409054756165, + "epoch": 0.13688978359335682, + "kl_loss": 4.502358436584473, + "loss_ib": 0.056297801434993744, + "step": 476 + }, + { + "ce_ib": 12.488386154174805, + "ce_orig": 0.7823653221130371, + "epoch": 0.13688978359335682, + "kl_loss": 4.8517374992370605, + "loss_ib": 0.061005763709545135, + "step": 476 + }, + { + "ce_ib": 9.597527503967285, + "ce_orig": 0.8816280961036682, + "epoch": 0.13717736717233445, + "kl_loss": 4.802122592926025, + "loss_ib": 0.057618748396635056, + "step": 477 + }, + { + "ce_ib": 7.878790855407715, + "ce_orig": 0.669119119644165, + "epoch": 0.13717736717233445, + "kl_loss": 4.979962348937988, + "loss_ib": 0.05767841264605522, + "step": 477 + }, + { + "ce_ib": 10.391016960144043, + "ce_orig": 0.7251664400100708, + "epoch": 0.13717736717233445, + "kl_loss": 4.9956560134887695, + "loss_ib": 0.060347575694322586, + "step": 477 + }, + { + "ce_ib": 11.260332107543945, + "ce_orig": 0.7019518613815308, + "epoch": 0.13717736717233445, + "kl_loss": 4.841489791870117, + "loss_ib": 0.05967522785067558, + "step": 477 + }, + { + "ce_ib": 9.614903450012207, + "ce_orig": 0.8139093518257141, + "epoch": 0.1374649507513121, + "kl_loss": 4.899896621704102, + "loss_ib": 0.058613866567611694, + "step": 478 + }, + { + "ce_ib": 11.576051712036133, + "ce_orig": 0.5408310294151306, + "epoch": 0.1374649507513121, + "kl_loss": 4.424466133117676, + "loss_ib": 0.055820710957050323, + "step": 478 + }, + { + "ce_ib": 12.201531410217285, + "ce_orig": 1.1268466711044312, + "epoch": 0.1374649507513121, + "kl_loss": 4.69233512878418, + "loss_ib": 0.05912488326430321, + "step": 478 + }, + { + "ce_ib": 9.991912841796875, + "ce_orig": 0.9469978213310242, + "epoch": 0.1374649507513121, + "kl_loss": 4.918972969055176, + "loss_ib": 0.05918164178729057, + "step": 478 + }, + { + "ce_ib": 10.741558074951172, + "ce_orig": 0.4736107587814331, + "epoch": 0.13775253433028975, + "kl_loss": 4.647714138031006, + "loss_ib": 0.05721869692206383, + "step": 479 + }, + { + "ce_ib": 13.863398551940918, + "ce_orig": 0.7937755584716797, + "epoch": 0.13775253433028975, + "kl_loss": 4.548620700836182, + "loss_ib": 0.059349603950977325, + "step": 479 + }, + { + "ce_ib": 12.365586280822754, + "ce_orig": 0.7733124494552612, + "epoch": 0.13775253433028975, + "kl_loss": 4.751549243927002, + "loss_ib": 0.05988107621669769, + "step": 479 + }, + { + "ce_ib": 9.24503231048584, + "ce_orig": 0.6917009949684143, + "epoch": 0.13775253433028975, + "kl_loss": 4.599456787109375, + "loss_ib": 0.05523959919810295, + "step": 479 + }, + { + "epoch": 0.13804011790926737, + "grad_norm": 0.28194499015808105, + "learning_rate": 9.993833632459675e-06, + "loss": 0.9569, + "step": 480 + }, + { + "ce_ib": 12.575913429260254, + "ce_orig": 0.8253871202468872, + "epoch": 0.13804011790926737, + "kl_loss": 4.399908542633057, + "loss_ib": 0.056574996560811996, + "step": 480 + }, + { + "ce_ib": 12.079404830932617, + "ce_orig": 0.7573724985122681, + "epoch": 0.13804011790926737, + "kl_loss": 4.628897666931152, + "loss_ib": 0.05836838111281395, + "step": 480 + }, + { + "ce_ib": 14.135128021240234, + "ce_orig": 1.195788860321045, + "epoch": 0.13804011790926737, + "kl_loss": 4.788009166717529, + "loss_ib": 0.062015216797590256, + "step": 480 + }, + { + "ce_ib": 13.080334663391113, + "ce_orig": 1.0536785125732422, + "epoch": 0.13804011790926737, + "kl_loss": 4.861077308654785, + "loss_ib": 0.061691105365753174, + "step": 480 + }, + { + "ce_ib": 15.20880126953125, + "ce_orig": 1.4970366954803467, + "epoch": 0.13832770148824503, + "kl_loss": 4.581524848937988, + "loss_ib": 0.061024051159620285, + "step": 481 + }, + { + "ce_ib": 9.704122543334961, + "ce_orig": 0.7620049118995667, + "epoch": 0.13832770148824503, + "kl_loss": 4.4944915771484375, + "loss_ib": 0.05464903637766838, + "step": 481 + }, + { + "ce_ib": 14.204291343688965, + "ce_orig": 1.2470651865005493, + "epoch": 0.13832770148824503, + "kl_loss": 4.557282447814941, + "loss_ib": 0.059777114540338516, + "step": 481 + }, + { + "ce_ib": 10.805310249328613, + "ce_orig": 0.8926163911819458, + "epoch": 0.13832770148824503, + "kl_loss": 4.54813289642334, + "loss_ib": 0.05628664046525955, + "step": 481 + }, + { + "ce_ib": 8.615751266479492, + "ce_orig": 0.7180139422416687, + "epoch": 0.13861528506722265, + "kl_loss": 4.497965335845947, + "loss_ib": 0.0535954050719738, + "step": 482 + }, + { + "ce_ib": 13.28380012512207, + "ce_orig": 0.8565104007720947, + "epoch": 0.13861528506722265, + "kl_loss": 4.163414001464844, + "loss_ib": 0.05491793900728226, + "step": 482 + }, + { + "ce_ib": 16.117412567138672, + "ce_orig": 1.5288479328155518, + "epoch": 0.13861528506722265, + "kl_loss": 4.652551651000977, + "loss_ib": 0.06264292448759079, + "step": 482 + }, + { + "ce_ib": 15.705317497253418, + "ce_orig": 1.3943670988082886, + "epoch": 0.13861528506722265, + "kl_loss": 4.385931015014648, + "loss_ib": 0.05956462770700455, + "step": 482 + }, + { + "ce_ib": 12.427248001098633, + "ce_orig": 0.8222273588180542, + "epoch": 0.1389028686462003, + "kl_loss": 4.483578681945801, + "loss_ib": 0.05726303532719612, + "step": 483 + }, + { + "ce_ib": 9.109222412109375, + "ce_orig": 0.6663987636566162, + "epoch": 0.1389028686462003, + "kl_loss": 4.623089790344238, + "loss_ib": 0.05534011870622635, + "step": 483 + }, + { + "ce_ib": 16.840740203857422, + "ce_orig": 1.5569089651107788, + "epoch": 0.1389028686462003, + "kl_loss": 4.513226509094238, + "loss_ib": 0.061973001807928085, + "step": 483 + }, + { + "ce_ib": 13.14278507232666, + "ce_orig": 0.7401519417762756, + "epoch": 0.1389028686462003, + "kl_loss": 4.656795978546143, + "loss_ib": 0.05971074476838112, + "step": 483 + }, + { + "ce_ib": 14.228574752807617, + "ce_orig": 0.5264460444450378, + "epoch": 0.13919045222517795, + "kl_loss": 4.304325580596924, + "loss_ib": 0.05727183073759079, + "step": 484 + }, + { + "ce_ib": 16.023216247558594, + "ce_orig": 1.285567283630371, + "epoch": 0.13919045222517795, + "kl_loss": 4.187580585479736, + "loss_ib": 0.05789902061223984, + "step": 484 + }, + { + "ce_ib": 15.015509605407715, + "ce_orig": 1.6406548023223877, + "epoch": 0.13919045222517795, + "kl_loss": 4.492351531982422, + "loss_ib": 0.059939026832580566, + "step": 484 + }, + { + "ce_ib": 13.410754203796387, + "ce_orig": 1.4283090829849243, + "epoch": 0.13919045222517795, + "kl_loss": 4.561341762542725, + "loss_ib": 0.05902417004108429, + "step": 484 + }, + { + "epoch": 0.13947803580415558, + "grad_norm": 0.42398667335510254, + "learning_rate": 9.993442298227365e-06, + "loss": 1.0074, + "step": 485 + }, + { + "ce_ib": 16.616222381591797, + "ce_orig": 1.3357499837875366, + "epoch": 0.13947803580415558, + "kl_loss": 4.212893486022949, + "loss_ib": 0.05874515324831009, + "step": 485 + }, + { + "ce_ib": 11.187970161437988, + "ce_orig": 0.9521239995956421, + "epoch": 0.13947803580415558, + "kl_loss": 4.148205757141113, + "loss_ib": 0.05267002806067467, + "step": 485 + }, + { + "ce_ib": 11.176460266113281, + "ce_orig": 0.48416224122047424, + "epoch": 0.13947803580415558, + "kl_loss": 4.578839302062988, + "loss_ib": 0.05696485564112663, + "step": 485 + }, + { + "ce_ib": 14.064797401428223, + "ce_orig": 1.0060439109802246, + "epoch": 0.13947803580415558, + "kl_loss": 4.404331207275391, + "loss_ib": 0.05810810998082161, + "step": 485 + }, + { + "ce_ib": 15.115540504455566, + "ce_orig": 0.9532531499862671, + "epoch": 0.13976561938313323, + "kl_loss": 4.2361860275268555, + "loss_ib": 0.057477399706840515, + "step": 486 + }, + { + "ce_ib": 9.784770965576172, + "ce_orig": 0.7604672312736511, + "epoch": 0.13976561938313323, + "kl_loss": 3.9188036918640137, + "loss_ib": 0.048972804099321365, + "step": 486 + }, + { + "ce_ib": 10.663135528564453, + "ce_orig": 1.0786744356155396, + "epoch": 0.13976561938313323, + "kl_loss": 4.246434211730957, + "loss_ib": 0.0531274788081646, + "step": 486 + }, + { + "ce_ib": 8.10494613647461, + "ce_orig": 0.5798110961914062, + "epoch": 0.13976561938313323, + "kl_loss": 4.231927871704102, + "loss_ib": 0.05042422190308571, + "step": 486 + }, + { + "ce_ib": 12.337913513183594, + "ce_orig": 0.7753936648368835, + "epoch": 0.14005320296211085, + "kl_loss": 4.142770290374756, + "loss_ib": 0.053765613585710526, + "step": 487 + }, + { + "ce_ib": 15.07596206665039, + "ce_orig": 0.8154249787330627, + "epoch": 0.14005320296211085, + "kl_loss": 4.023566246032715, + "loss_ib": 0.055311620235443115, + "step": 487 + }, + { + "ce_ib": 13.26456069946289, + "ce_orig": 1.3517224788665771, + "epoch": 0.14005320296211085, + "kl_loss": 4.021327018737793, + "loss_ib": 0.053477831184864044, + "step": 487 + }, + { + "ce_ib": 11.321921348571777, + "ce_orig": 0.6585462093353271, + "epoch": 0.14005320296211085, + "kl_loss": 3.9664478302001953, + "loss_ib": 0.050986398011446, + "step": 487 + }, + { + "ce_ib": 11.75551700592041, + "ce_orig": 0.7766084671020508, + "epoch": 0.1403407865410885, + "kl_loss": 3.7924036979675293, + "loss_ib": 0.049679554998874664, + "step": 488 + }, + { + "ce_ib": 15.715422630310059, + "ce_orig": 1.3684426546096802, + "epoch": 0.1403407865410885, + "kl_loss": 3.701831817626953, + "loss_ib": 0.05273373797535896, + "step": 488 + }, + { + "ce_ib": 8.13493824005127, + "ce_orig": 0.45366278290748596, + "epoch": 0.1403407865410885, + "kl_loss": 4.0575456619262695, + "loss_ib": 0.04871039465069771, + "step": 488 + }, + { + "ce_ib": 13.73061752319336, + "ce_orig": 1.1564494371414185, + "epoch": 0.1403407865410885, + "kl_loss": 4.102625846862793, + "loss_ib": 0.05475687235593796, + "step": 488 + }, + { + "ce_ib": 13.535247802734375, + "ce_orig": 0.9498729109764099, + "epoch": 0.14062837012006615, + "kl_loss": 2.995370864868164, + "loss_ib": 0.04348895326256752, + "step": 489 + }, + { + "ce_ib": 7.940598487854004, + "ce_orig": 0.3024381101131439, + "epoch": 0.14062837012006615, + "kl_loss": 3.7741479873657227, + "loss_ib": 0.04568207636475563, + "step": 489 + }, + { + "ce_ib": 11.454663276672363, + "ce_orig": 0.6730047464370728, + "epoch": 0.14062837012006615, + "kl_loss": 3.8181753158569336, + "loss_ib": 0.049636416137218475, + "step": 489 + }, + { + "ce_ib": 8.796786308288574, + "ce_orig": 0.7032504677772522, + "epoch": 0.14062837012006615, + "kl_loss": 3.953084945678711, + "loss_ib": 0.04832763597369194, + "step": 489 + }, + { + "epoch": 0.14091595369904378, + "grad_norm": 0.40750885009765625, + "learning_rate": 9.993038933633556e-06, + "loss": 0.9795, + "step": 490 + }, + { + "ce_ib": 9.949647903442383, + "ce_orig": 0.5322861671447754, + "epoch": 0.14091595369904378, + "kl_loss": 3.8681774139404297, + "loss_ib": 0.048631418496370316, + "step": 490 + }, + { + "ce_ib": 15.904637336730957, + "ce_orig": 1.6171191930770874, + "epoch": 0.14091595369904378, + "kl_loss": 3.6979196071624756, + "loss_ib": 0.05288383364677429, + "step": 490 + }, + { + "ce_ib": 12.01391315460205, + "ce_orig": 1.2611167430877686, + "epoch": 0.14091595369904378, + "kl_loss": 3.5611257553100586, + "loss_ib": 0.04762516915798187, + "step": 490 + }, + { + "ce_ib": 9.39164924621582, + "ce_orig": 0.9851351380348206, + "epoch": 0.14091595369904378, + "kl_loss": 3.2981302738189697, + "loss_ib": 0.04237294942140579, + "step": 490 + }, + { + "ce_ib": 12.360732078552246, + "ce_orig": 1.4508610963821411, + "epoch": 0.14120353727802143, + "kl_loss": 3.5956835746765137, + "loss_ib": 0.0483175665140152, + "step": 491 + }, + { + "ce_ib": 12.937994003295898, + "ce_orig": 1.1618151664733887, + "epoch": 0.14120353727802143, + "kl_loss": 3.563871383666992, + "loss_ib": 0.048576705157756805, + "step": 491 + }, + { + "ce_ib": 17.118064880371094, + "ce_orig": 1.445876121520996, + "epoch": 0.14120353727802143, + "kl_loss": 3.657188892364502, + "loss_ib": 0.05368995666503906, + "step": 491 + }, + { + "ce_ib": 11.58482837677002, + "ce_orig": 0.9607007503509521, + "epoch": 0.14120353727802143, + "kl_loss": 3.736398458480835, + "loss_ib": 0.04894881322979927, + "step": 491 + }, + { + "ce_ib": 8.748994827270508, + "ce_orig": 0.9032168984413147, + "epoch": 0.14149112085699905, + "kl_loss": 3.5443851947784424, + "loss_ib": 0.0441928468644619, + "step": 492 + }, + { + "ce_ib": 10.45909309387207, + "ce_orig": 0.7486007809638977, + "epoch": 0.14149112085699905, + "kl_loss": 3.231372594833374, + "loss_ib": 0.042772818356752396, + "step": 492 + }, + { + "ce_ib": 17.54129409790039, + "ce_orig": 1.9069491624832153, + "epoch": 0.14149112085699905, + "kl_loss": 3.558845043182373, + "loss_ib": 0.05312974378466606, + "step": 492 + }, + { + "ce_ib": 15.910091400146484, + "ce_orig": 1.024116039276123, + "epoch": 0.14149112085699905, + "kl_loss": 3.7413394451141357, + "loss_ib": 0.05332348868250847, + "step": 492 + }, + { + "ce_ib": 8.667006492614746, + "ce_orig": 0.30886101722717285, + "epoch": 0.1417787044359767, + "kl_loss": 3.7348480224609375, + "loss_ib": 0.04601548612117767, + "step": 493 + }, + { + "ce_ib": 11.867700576782227, + "ce_orig": 1.1173264980316162, + "epoch": 0.1417787044359767, + "kl_loss": 3.64851713180542, + "loss_ib": 0.04835287109017372, + "step": 493 + }, + { + "ce_ib": 9.549649238586426, + "ce_orig": 0.6670407056808472, + "epoch": 0.1417787044359767, + "kl_loss": 3.220088481903076, + "loss_ib": 0.04175053536891937, + "step": 493 + }, + { + "ce_ib": 12.184220314025879, + "ce_orig": 0.7223286032676697, + "epoch": 0.1417787044359767, + "kl_loss": 2.9731616973876953, + "loss_ib": 0.041915833950042725, + "step": 493 + }, + { + "ce_ib": 8.040367126464844, + "ce_orig": 0.49942535161972046, + "epoch": 0.14206628801495436, + "kl_loss": 3.2337100505828857, + "loss_ib": 0.04037746787071228, + "step": 494 + }, + { + "ce_ib": 9.84688949584961, + "ce_orig": 0.3545916974544525, + "epoch": 0.14206628801495436, + "kl_loss": 3.2256717681884766, + "loss_ib": 0.0421036034822464, + "step": 494 + }, + { + "ce_ib": 13.763298034667969, + "ce_orig": 1.4035097360610962, + "epoch": 0.14206628801495436, + "kl_loss": 3.3014750480651855, + "loss_ib": 0.04677804931998253, + "step": 494 + }, + { + "ce_ib": 10.143733978271484, + "ce_orig": 0.5595765709877014, + "epoch": 0.14206628801495436, + "kl_loss": 3.2254323959350586, + "loss_ib": 0.042398057878017426, + "step": 494 + }, + { + "epoch": 0.14235387159393198, + "grad_norm": 0.3967653214931488, + "learning_rate": 9.992623539650048e-06, + "loss": 0.947, + "step": 495 + }, + { + "ce_ib": 8.59465217590332, + "ce_orig": 0.6234766840934753, + "epoch": 0.14235387159393198, + "kl_loss": 2.992295026779175, + "loss_ib": 0.038517601788043976, + "step": 495 + }, + { + "ce_ib": 11.884413719177246, + "ce_orig": 1.2884939908981323, + "epoch": 0.14235387159393198, + "kl_loss": 3.2238070964813232, + "loss_ib": 0.04412248358130455, + "step": 495 + }, + { + "ce_ib": 12.295158386230469, + "ce_orig": 0.9079825282096863, + "epoch": 0.14235387159393198, + "kl_loss": 3.261247158050537, + "loss_ib": 0.04490762948989868, + "step": 495 + }, + { + "ce_ib": 11.556446075439453, + "ce_orig": 0.9584062695503235, + "epoch": 0.14235387159393198, + "kl_loss": 2.946765661239624, + "loss_ib": 0.0410241037607193, + "step": 495 + }, + { + "ce_ib": 13.715659141540527, + "ce_orig": 1.301735520362854, + "epoch": 0.14264145517290963, + "kl_loss": 3.156116485595703, + "loss_ib": 0.04527682065963745, + "step": 496 + }, + { + "ce_ib": 12.423558235168457, + "ce_orig": 0.9063977599143982, + "epoch": 0.14264145517290963, + "kl_loss": 3.1704659461975098, + "loss_ib": 0.04412821680307388, + "step": 496 + }, + { + "ce_ib": 15.041820526123047, + "ce_orig": 0.9676111340522766, + "epoch": 0.14264145517290963, + "kl_loss": 3.3395094871520996, + "loss_ib": 0.04843691736459732, + "step": 496 + }, + { + "ce_ib": 17.061294555664062, + "ce_orig": 2.1067917346954346, + "epoch": 0.14264145517290963, + "kl_loss": 3.0767550468444824, + "loss_ib": 0.04782884567975998, + "step": 496 + }, + { + "ce_ib": 11.296546936035156, + "ce_orig": 0.9768355488777161, + "epoch": 0.14292903875188726, + "kl_loss": 2.777761936187744, + "loss_ib": 0.03907416760921478, + "step": 497 + }, + { + "ce_ib": 11.038111686706543, + "ce_orig": 0.8759608268737793, + "epoch": 0.14292903875188726, + "kl_loss": 3.004284381866455, + "loss_ib": 0.04108095541596413, + "step": 497 + }, + { + "ce_ib": 11.033641815185547, + "ce_orig": 0.5562906265258789, + "epoch": 0.14292903875188726, + "kl_loss": 2.9936814308166504, + "loss_ib": 0.04097045958042145, + "step": 497 + }, + { + "ce_ib": 11.028107643127441, + "ce_orig": 1.197322130203247, + "epoch": 0.14292903875188726, + "kl_loss": 2.7564334869384766, + "loss_ib": 0.038592442870140076, + "step": 497 + }, + { + "ce_ib": 14.40247631072998, + "ce_orig": 1.2345876693725586, + "epoch": 0.1432166223308649, + "kl_loss": 2.6974658966064453, + "loss_ib": 0.04137713462114334, + "step": 498 + }, + { + "ce_ib": 12.58112621307373, + "ce_orig": 0.6902965903282166, + "epoch": 0.1432166223308649, + "kl_loss": 2.812513828277588, + "loss_ib": 0.04070626199245453, + "step": 498 + }, + { + "ce_ib": 10.28808879852295, + "ce_orig": 0.5265849232673645, + "epoch": 0.1432166223308649, + "kl_loss": 2.965153217315674, + "loss_ib": 0.039939623326063156, + "step": 498 + }, + { + "ce_ib": 11.784356117248535, + "ce_orig": 0.7255687117576599, + "epoch": 0.1432166223308649, + "kl_loss": 2.6487035751342773, + "loss_ib": 0.03827139362692833, + "step": 498 + }, + { + "ce_ib": 15.454878807067871, + "ce_orig": 1.4307360649108887, + "epoch": 0.14350420590984256, + "kl_loss": 2.638218641281128, + "loss_ib": 0.04183706268668175, + "step": 499 + }, + { + "ce_ib": 11.41547679901123, + "ce_orig": 1.0924228429794312, + "epoch": 0.14350420590984256, + "kl_loss": 2.683103084564209, + "loss_ib": 0.038246504962444305, + "step": 499 + }, + { + "ce_ib": 7.627654552459717, + "ce_orig": 0.6519067883491516, + "epoch": 0.14350420590984256, + "kl_loss": 2.4674363136291504, + "loss_ib": 0.032302018254995346, + "step": 499 + }, + { + "ce_ib": 9.059030532836914, + "ce_orig": 0.9335259199142456, + "epoch": 0.14350420590984256, + "kl_loss": 2.7982892990112305, + "loss_ib": 0.03704192489385605, + "step": 499 + }, + { + "epoch": 0.14379178948882018, + "grad_norm": 0.3535037636756897, + "learning_rate": 9.99219611727762e-06, + "loss": 0.9542, + "step": 500 + }, + { + "ce_ib": 12.202539443969727, + "ce_orig": 1.0921835899353027, + "epoch": 0.14379178948882018, + "kl_loss": 2.6520490646362305, + "loss_ib": 0.03872302919626236, + "step": 500 + }, + { + "ce_ib": 10.411079406738281, + "ce_orig": 1.0469058752059937, + "epoch": 0.14379178948882018, + "kl_loss": 2.550952434539795, + "loss_ib": 0.035920605063438416, + "step": 500 + }, + { + "ce_ib": 10.579100608825684, + "ce_orig": 0.7717140316963196, + "epoch": 0.14379178948882018, + "kl_loss": 2.458261013031006, + "loss_ib": 0.03516170755028725, + "step": 500 + }, + { + "ce_ib": 10.30762004852295, + "ce_orig": 0.47040054202079773, + "epoch": 0.14379178948882018, + "kl_loss": 2.5514392852783203, + "loss_ib": 0.03582201525568962, + "step": 500 + }, + { + "ce_ib": 8.242244720458984, + "ce_orig": 0.7109037041664124, + "epoch": 0.14407937306779783, + "kl_loss": 2.2276763916015625, + "loss_ib": 0.03051900863647461, + "step": 501 + }, + { + "ce_ib": 12.867658615112305, + "ce_orig": 1.3448221683502197, + "epoch": 0.14407937306779783, + "kl_loss": 2.390228271484375, + "loss_ib": 0.036769941449165344, + "step": 501 + }, + { + "ce_ib": 12.047565460205078, + "ce_orig": 1.1863359212875366, + "epoch": 0.14407937306779783, + "kl_loss": 2.2493762969970703, + "loss_ib": 0.034541331231594086, + "step": 501 + }, + { + "ce_ib": 7.998773574829102, + "ce_orig": 0.7724082469940186, + "epoch": 0.14407937306779783, + "kl_loss": 2.218076229095459, + "loss_ib": 0.03017953597009182, + "step": 501 + }, + { + "ce_ib": 8.808771133422852, + "ce_orig": 0.346529096364975, + "epoch": 0.14436695664677546, + "kl_loss": 2.290055751800537, + "loss_ib": 0.031709328293800354, + "step": 502 + }, + { + "ce_ib": 14.042349815368652, + "ce_orig": 0.8455150723457336, + "epoch": 0.14436695664677546, + "kl_loss": 2.330575466156006, + "loss_ib": 0.037348102778196335, + "step": 502 + }, + { + "ce_ib": 9.860107421875, + "ce_orig": 0.6380610466003418, + "epoch": 0.14436695664677546, + "kl_loss": 2.2274303436279297, + "loss_ib": 0.03213440999388695, + "step": 502 + }, + { + "ce_ib": 7.90905237197876, + "ce_orig": 0.7825286388397217, + "epoch": 0.14436695664677546, + "kl_loss": 2.2258763313293457, + "loss_ib": 0.03016781434416771, + "step": 502 + }, + { + "ce_ib": 11.905344009399414, + "ce_orig": 0.5903149247169495, + "epoch": 0.1446545402257531, + "kl_loss": 2.3541271686553955, + "loss_ib": 0.035446614027023315, + "step": 503 + }, + { + "ce_ib": 9.341203689575195, + "ce_orig": 0.5810147523880005, + "epoch": 0.1446545402257531, + "kl_loss": 1.9661822319030762, + "loss_ib": 0.029003025963902473, + "step": 503 + }, + { + "ce_ib": 11.79522705078125, + "ce_orig": 0.9077520966529846, + "epoch": 0.1446545402257531, + "kl_loss": 2.15496826171875, + "loss_ib": 0.03334490954875946, + "step": 503 + }, + { + "ce_ib": 12.743915557861328, + "ce_orig": 0.9383360147476196, + "epoch": 0.1446545402257531, + "kl_loss": 2.3349556922912598, + "loss_ib": 0.03609347343444824, + "step": 503 + }, + { + "ce_ib": 14.691107749938965, + "ce_orig": 1.4200998544692993, + "epoch": 0.14494212380473076, + "kl_loss": 1.9536817073822021, + "loss_ib": 0.034227922558784485, + "step": 504 + }, + { + "ce_ib": 13.637428283691406, + "ce_orig": 1.0158778429031372, + "epoch": 0.14494212380473076, + "kl_loss": 2.0383381843566895, + "loss_ib": 0.0340208075940609, + "step": 504 + }, + { + "ce_ib": 12.56800651550293, + "ce_orig": 0.9646020531654358, + "epoch": 0.14494212380473076, + "kl_loss": 2.094742774963379, + "loss_ib": 0.033515434712171555, + "step": 504 + }, + { + "ce_ib": 12.06141185760498, + "ce_orig": 1.0306986570358276, + "epoch": 0.14494212380473076, + "kl_loss": 2.175787925720215, + "loss_ib": 0.0338192917406559, + "step": 504 + }, + { + "epoch": 0.14522970738370838, + "grad_norm": 0.33350127935409546, + "learning_rate": 9.991756667546032e-06, + "loss": 0.9489, + "step": 505 + }, + { + "ce_ib": 8.817842483520508, + "ce_orig": 0.6972077488899231, + "epoch": 0.14522970738370838, + "kl_loss": 1.7852230072021484, + "loss_ib": 0.026670072227716446, + "step": 505 + }, + { + "ce_ib": 12.437209129333496, + "ce_orig": 0.6099770069122314, + "epoch": 0.14522970738370838, + "kl_loss": 2.4981532096862793, + "loss_ib": 0.03741874173283577, + "step": 505 + }, + { + "ce_ib": 11.220137596130371, + "ce_orig": 0.48771539330482483, + "epoch": 0.14522970738370838, + "kl_loss": 2.1456246376037598, + "loss_ib": 0.03267638385295868, + "step": 505 + }, + { + "ce_ib": 8.924522399902344, + "ce_orig": 0.5051496624946594, + "epoch": 0.14522970738370838, + "kl_loss": 1.9803262948989868, + "loss_ib": 0.028727782890200615, + "step": 505 + }, + { + "ce_ib": 12.348464012145996, + "ce_orig": 0.8960937857627869, + "epoch": 0.14551729096268604, + "kl_loss": 1.814281702041626, + "loss_ib": 0.030491279438138008, + "step": 506 + }, + { + "ce_ib": 7.2094316482543945, + "ce_orig": 0.47113969922065735, + "epoch": 0.14551729096268604, + "kl_loss": 1.9257069826126099, + "loss_ib": 0.026466500014066696, + "step": 506 + }, + { + "ce_ib": 11.242286682128906, + "ce_orig": 0.4751087427139282, + "epoch": 0.14551729096268604, + "kl_loss": 2.0415260791778564, + "loss_ib": 0.03165754675865173, + "step": 506 + }, + { + "ce_ib": 10.97330379486084, + "ce_orig": 0.642387866973877, + "epoch": 0.14551729096268604, + "kl_loss": 2.0187416076660156, + "loss_ib": 0.03116072155535221, + "step": 506 + }, + { + "ce_ib": 16.359783172607422, + "ce_orig": 1.645643949508667, + "epoch": 0.14580487454166366, + "kl_loss": 1.8092081546783447, + "loss_ib": 0.034451864659786224, + "step": 507 + }, + { + "ce_ib": 10.260422706604004, + "ce_orig": 0.9393115639686584, + "epoch": 0.14580487454166366, + "kl_loss": 2.0392160415649414, + "loss_ib": 0.03065258450806141, + "step": 507 + }, + { + "ce_ib": 15.242369651794434, + "ce_orig": 0.9167593717575073, + "epoch": 0.14580487454166366, + "kl_loss": 1.7671477794647217, + "loss_ib": 0.03291384503245354, + "step": 507 + }, + { + "ce_ib": 11.220046043395996, + "ce_orig": 0.7460023164749146, + "epoch": 0.14580487454166366, + "kl_loss": 1.9201838970184326, + "loss_ib": 0.03042188659310341, + "step": 507 + }, + { + "ce_ib": 10.025154113769531, + "ce_orig": 0.6676295399665833, + "epoch": 0.1460924581206413, + "kl_loss": 2.052180767059326, + "loss_ib": 0.03054695948958397, + "step": 508 + }, + { + "ce_ib": 14.332385063171387, + "ce_orig": 0.7957293391227722, + "epoch": 0.1460924581206413, + "kl_loss": 1.7676377296447754, + "loss_ib": 0.032008763402700424, + "step": 508 + }, + { + "ce_ib": 14.902412414550781, + "ce_orig": 1.4487565755844116, + "epoch": 0.1460924581206413, + "kl_loss": 1.6765596866607666, + "loss_ib": 0.031668007373809814, + "step": 508 + }, + { + "ce_ib": 9.683943748474121, + "ce_orig": 1.0197540521621704, + "epoch": 0.1460924581206413, + "kl_loss": 1.610795021057129, + "loss_ib": 0.025791892781853676, + "step": 508 + }, + { + "ce_ib": 15.52167797088623, + "ce_orig": 1.7749841213226318, + "epoch": 0.14638004169961896, + "kl_loss": 1.6767610311508179, + "loss_ib": 0.0322892889380455, + "step": 509 + }, + { + "ce_ib": 13.908830642700195, + "ce_orig": 0.9751031994819641, + "epoch": 0.14638004169961896, + "kl_loss": 1.6931664943695068, + "loss_ib": 0.030840495601296425, + "step": 509 + }, + { + "ce_ib": 9.30358600616455, + "ce_orig": 0.5810970664024353, + "epoch": 0.14638004169961896, + "kl_loss": 1.6350352764129639, + "loss_ib": 0.025653937831521034, + "step": 509 + }, + { + "ce_ib": 12.388398170471191, + "ce_orig": 1.3628201484680176, + "epoch": 0.14638004169961896, + "kl_loss": 1.6909823417663574, + "loss_ib": 0.029298221692442894, + "step": 509 + }, + { + "epoch": 0.1466676252785966, + "grad_norm": 0.18405361473560333, + "learning_rate": 9.991305191514018e-06, + "loss": 0.8849, + "step": 510 + }, + { + "ce_ib": 8.311685562133789, + "ce_orig": 0.7321364283561707, + "epoch": 0.1466676252785966, + "kl_loss": 1.6412349939346313, + "loss_ib": 0.02472403459250927, + "step": 510 + }, + { + "ce_ib": 15.0967378616333, + "ce_orig": 1.2958650588989258, + "epoch": 0.1466676252785966, + "kl_loss": 1.786908745765686, + "loss_ib": 0.032965827733278275, + "step": 510 + }, + { + "ce_ib": 17.36980438232422, + "ce_orig": 1.9791719913482666, + "epoch": 0.1466676252785966, + "kl_loss": 1.8581990003585815, + "loss_ib": 0.03595179319381714, + "step": 510 + }, + { + "ce_ib": 12.289637565612793, + "ce_orig": 0.7127004265785217, + "epoch": 0.1466676252785966, + "kl_loss": 1.685407280921936, + "loss_ib": 0.02914370968937874, + "step": 510 + }, + { + "ce_ib": 15.004842758178711, + "ce_orig": 1.406548023223877, + "epoch": 0.14695520885757424, + "kl_loss": 1.5896825790405273, + "loss_ib": 0.030901670455932617, + "step": 511 + }, + { + "ce_ib": 10.542769432067871, + "ce_orig": 0.7553672790527344, + "epoch": 0.14695520885757424, + "kl_loss": 1.7919988632202148, + "loss_ib": 0.028462758287787437, + "step": 511 + }, + { + "ce_ib": 13.622482299804688, + "ce_orig": 1.207612156867981, + "epoch": 0.14695520885757424, + "kl_loss": 1.765162706375122, + "loss_ib": 0.03127410635352135, + "step": 511 + }, + { + "ce_ib": 8.20363998413086, + "ce_orig": 0.6386350989341736, + "epoch": 0.14695520885757424, + "kl_loss": 1.5854518413543701, + "loss_ib": 0.024058157578110695, + "step": 511 + }, + { + "ce_ib": 13.55444622039795, + "ce_orig": 1.339708924293518, + "epoch": 0.14724279243655186, + "kl_loss": 1.6126033067703247, + "loss_ib": 0.029680481180548668, + "step": 512 + }, + { + "ce_ib": 8.628886222839355, + "ce_orig": 0.5619939565658569, + "epoch": 0.14724279243655186, + "kl_loss": 1.6175487041473389, + "loss_ib": 0.02480437234044075, + "step": 512 + }, + { + "ce_ib": 10.747875213623047, + "ce_orig": 0.8628310561180115, + "epoch": 0.14724279243655186, + "kl_loss": 1.5580902099609375, + "loss_ib": 0.02632877789437771, + "step": 512 + }, + { + "ce_ib": 11.197033882141113, + "ce_orig": 0.588740348815918, + "epoch": 0.14724279243655186, + "kl_loss": 1.625337839126587, + "loss_ib": 0.02745041251182556, + "step": 512 + }, + { + "ce_ib": 17.5482177734375, + "ce_orig": 1.566094160079956, + "epoch": 0.1475303760155295, + "kl_loss": 1.5798025131225586, + "loss_ib": 0.03334624320268631, + "step": 513 + }, + { + "ce_ib": 9.978529930114746, + "ce_orig": 0.9000970125198364, + "epoch": 0.1475303760155295, + "kl_loss": 1.5787134170532227, + "loss_ib": 0.025765664875507355, + "step": 513 + }, + { + "ce_ib": 7.795269012451172, + "ce_orig": 0.5428386926651001, + "epoch": 0.1475303760155295, + "kl_loss": 1.6383824348449707, + "loss_ib": 0.024179093539714813, + "step": 513 + }, + { + "ce_ib": 8.96078109741211, + "ce_orig": 0.45578211545944214, + "epoch": 0.1475303760155295, + "kl_loss": 1.5609797239303589, + "loss_ib": 0.024570578709244728, + "step": 513 + }, + { + "ce_ib": 14.166945457458496, + "ce_orig": 1.237365484237671, + "epoch": 0.14781795959450716, + "kl_loss": 1.5814361572265625, + "loss_ib": 0.029981307685375214, + "step": 514 + }, + { + "ce_ib": 15.119035720825195, + "ce_orig": 1.146514654159546, + "epoch": 0.14781795959450716, + "kl_loss": 1.787841796875, + "loss_ib": 0.032997455447912216, + "step": 514 + }, + { + "ce_ib": 11.368758201599121, + "ce_orig": 1.2245467901229858, + "epoch": 0.14781795959450716, + "kl_loss": 1.763063669204712, + "loss_ib": 0.028999393805861473, + "step": 514 + }, + { + "ce_ib": 10.331769943237305, + "ce_orig": 0.8155557513237, + "epoch": 0.14781795959450716, + "kl_loss": 1.5401368141174316, + "loss_ib": 0.025733135640621185, + "step": 514 + }, + { + "epoch": 0.1481055431734848, + "grad_norm": 0.10760627686977386, + "learning_rate": 9.990841690269293e-06, + "loss": 0.9116, + "step": 515 + }, + { + "ce_ib": 9.445401191711426, + "ce_orig": 0.5457524061203003, + "epoch": 0.1481055431734848, + "kl_loss": 1.5743253231048584, + "loss_ib": 0.02518865466117859, + "step": 515 + }, + { + "ce_ib": 12.43077278137207, + "ce_orig": 1.0758410692214966, + "epoch": 0.1481055431734848, + "kl_loss": 1.5002497434616089, + "loss_ib": 0.02743327058851719, + "step": 515 + }, + { + "ce_ib": 9.92260456085205, + "ce_orig": 0.9019168615341187, + "epoch": 0.1481055431734848, + "kl_loss": 1.5244628190994263, + "loss_ib": 0.025167230516672134, + "step": 515 + }, + { + "ce_ib": 10.745101928710938, + "ce_orig": 0.7567242980003357, + "epoch": 0.1481055431734848, + "kl_loss": 1.851075530052185, + "loss_ib": 0.029255857691168785, + "step": 515 + }, + { + "ce_ib": 11.868610382080078, + "ce_orig": 1.2269304990768433, + "epoch": 0.14839312675246244, + "kl_loss": 1.5262444019317627, + "loss_ib": 0.027131054550409317, + "step": 516 + }, + { + "ce_ib": 12.609333992004395, + "ce_orig": 1.2865411043167114, + "epoch": 0.14839312675246244, + "kl_loss": 1.5142364501953125, + "loss_ib": 0.027751697227358818, + "step": 516 + }, + { + "ce_ib": 15.365309715270996, + "ce_orig": 1.3366944789886475, + "epoch": 0.14839312675246244, + "kl_loss": 1.5448601245880127, + "loss_ib": 0.03081391006708145, + "step": 516 + }, + { + "ce_ib": 7.554969787597656, + "ce_orig": 0.7058290243148804, + "epoch": 0.14839312675246244, + "kl_loss": 1.5178661346435547, + "loss_ib": 0.02273363061249256, + "step": 516 + }, + { + "ce_ib": 8.25823974609375, + "ce_orig": 0.839113175868988, + "epoch": 0.14868071033144006, + "kl_loss": 1.557971477508545, + "loss_ib": 0.023837953805923462, + "step": 517 + }, + { + "ce_ib": 12.297826766967773, + "ce_orig": 1.190186619758606, + "epoch": 0.14868071033144006, + "kl_loss": 1.658672571182251, + "loss_ib": 0.02888455241918564, + "step": 517 + }, + { + "ce_ib": 13.659313201904297, + "ce_orig": 1.3207685947418213, + "epoch": 0.14868071033144006, + "kl_loss": 1.6122772693634033, + "loss_ib": 0.029782084748148918, + "step": 517 + }, + { + "ce_ib": 12.351545333862305, + "ce_orig": 0.750295877456665, + "epoch": 0.14868071033144006, + "kl_loss": 1.6051900386810303, + "loss_ib": 0.028403444215655327, + "step": 517 + }, + { + "ce_ib": 11.500561714172363, + "ce_orig": 0.8358568549156189, + "epoch": 0.14896829391041772, + "kl_loss": 1.5413665771484375, + "loss_ib": 0.02691422961652279, + "step": 518 + }, + { + "ce_ib": 9.108760833740234, + "ce_orig": 0.8471581339836121, + "epoch": 0.14896829391041772, + "kl_loss": 1.4837156534194946, + "loss_ib": 0.023945918306708336, + "step": 518 + }, + { + "ce_ib": 11.258501052856445, + "ce_orig": 0.804084300994873, + "epoch": 0.14896829391041772, + "kl_loss": 1.5666208267211914, + "loss_ib": 0.026924708858132362, + "step": 518 + }, + { + "ce_ib": 13.515926361083984, + "ce_orig": 0.8296307325363159, + "epoch": 0.14896829391041772, + "kl_loss": 1.5592403411865234, + "loss_ib": 0.029108328744769096, + "step": 518 + }, + { + "ce_ib": 13.376294136047363, + "ce_orig": 1.156392216682434, + "epoch": 0.14925587748939537, + "kl_loss": 1.6085054874420166, + "loss_ib": 0.029461350291967392, + "step": 519 + }, + { + "ce_ib": 9.568916320800781, + "ce_orig": 0.4749041795730591, + "epoch": 0.14925587748939537, + "kl_loss": 1.536426305770874, + "loss_ib": 0.02493317984044552, + "step": 519 + }, + { + "ce_ib": 16.30808448791504, + "ce_orig": 1.5859891176223755, + "epoch": 0.14925587748939537, + "kl_loss": 1.6146225929260254, + "loss_ib": 0.03245430812239647, + "step": 519 + }, + { + "ce_ib": 7.4144062995910645, + "ce_orig": 0.6065052151679993, + "epoch": 0.14925587748939537, + "kl_loss": 1.601442575454712, + "loss_ib": 0.02342883124947548, + "step": 519 + }, + { + "epoch": 0.149543461068373, + "grad_norm": 0.1481025665998459, + "learning_rate": 9.990366164928538e-06, + "loss": 0.8984, + "step": 520 + }, + { + "ce_ib": 15.21028995513916, + "ce_orig": 1.826915979385376, + "epoch": 0.149543461068373, + "kl_loss": 1.557509422302246, + "loss_ib": 0.03078538365662098, + "step": 520 + }, + { + "ce_ib": 10.92094612121582, + "ce_orig": 0.6996050477027893, + "epoch": 0.149543461068373, + "kl_loss": 1.522399663925171, + "loss_ib": 0.02614494226872921, + "step": 520 + }, + { + "ce_ib": 11.630789756774902, + "ce_orig": 0.5110880136489868, + "epoch": 0.149543461068373, + "kl_loss": 1.6356468200683594, + "loss_ib": 0.02798725850880146, + "step": 520 + }, + { + "ce_ib": 13.426923751831055, + "ce_orig": 0.7888133525848389, + "epoch": 0.149543461068373, + "kl_loss": 1.5482978820800781, + "loss_ib": 0.028909901157021523, + "step": 520 + }, + { + "ce_ib": 9.767127990722656, + "ce_orig": 0.9814274907112122, + "epoch": 0.14983104464735064, + "kl_loss": 1.706017255783081, + "loss_ib": 0.02682730183005333, + "step": 521 + }, + { + "ce_ib": 8.689159393310547, + "ce_orig": 0.5986825823783875, + "epoch": 0.14983104464735064, + "kl_loss": 1.5086462497711182, + "loss_ib": 0.023775622248649597, + "step": 521 + }, + { + "ce_ib": 7.830185890197754, + "ce_orig": 0.6796808242797852, + "epoch": 0.14983104464735064, + "kl_loss": 1.5338833332061768, + "loss_ib": 0.02316901832818985, + "step": 521 + }, + { + "ce_ib": 8.914192199707031, + "ce_orig": 0.5978474617004395, + "epoch": 0.14983104464735064, + "kl_loss": 1.5090875625610352, + "loss_ib": 0.024005066603422165, + "step": 521 + }, + { + "ce_ib": 11.189813613891602, + "ce_orig": 0.7912343144416809, + "epoch": 0.15011862822632827, + "kl_loss": 1.486729383468628, + "loss_ib": 0.026057107374072075, + "step": 522 + }, + { + "ce_ib": 12.931374549865723, + "ce_orig": 1.4291115999221802, + "epoch": 0.15011862822632827, + "kl_loss": 1.575240135192871, + "loss_ib": 0.028683776035904884, + "step": 522 + }, + { + "ce_ib": 6.15507173538208, + "ce_orig": 0.567773163318634, + "epoch": 0.15011862822632827, + "kl_loss": 1.5185538530349731, + "loss_ib": 0.021340610459446907, + "step": 522 + }, + { + "ce_ib": 13.948975563049316, + "ce_orig": 0.893979012966156, + "epoch": 0.15011862822632827, + "kl_loss": 1.5725568532943726, + "loss_ib": 0.02967454306781292, + "step": 522 + }, + { + "ce_ib": 14.437583923339844, + "ce_orig": 1.6611443758010864, + "epoch": 0.15040621180530592, + "kl_loss": 1.545514702796936, + "loss_ib": 0.029892729595303535, + "step": 523 + }, + { + "ce_ib": 12.710461616516113, + "ce_orig": 0.8755899667739868, + "epoch": 0.15040621180530592, + "kl_loss": 1.5674240589141846, + "loss_ib": 0.028384702280163765, + "step": 523 + }, + { + "ce_ib": 8.738030433654785, + "ce_orig": 0.8012534976005554, + "epoch": 0.15040621180530592, + "kl_loss": 1.5125902891159058, + "loss_ib": 0.023863932117819786, + "step": 523 + }, + { + "ce_ib": 11.891736030578613, + "ce_orig": 1.0337281227111816, + "epoch": 0.15040621180530592, + "kl_loss": 1.5082712173461914, + "loss_ib": 0.026974448934197426, + "step": 523 + }, + { + "ce_ib": 11.432838439941406, + "ce_orig": 0.8564993739128113, + "epoch": 0.15069379538428357, + "kl_loss": 1.4956270456314087, + "loss_ib": 0.02638910710811615, + "step": 524 + }, + { + "ce_ib": 6.101011276245117, + "ce_orig": 0.4317745864391327, + "epoch": 0.15069379538428357, + "kl_loss": 1.626281499862671, + "loss_ib": 0.022363826632499695, + "step": 524 + }, + { + "ce_ib": 11.570913314819336, + "ce_orig": 0.5999628901481628, + "epoch": 0.15069379538428357, + "kl_loss": 1.573062539100647, + "loss_ib": 0.027301540598273277, + "step": 524 + }, + { + "ce_ib": 10.142786979675293, + "ce_orig": 0.8973500728607178, + "epoch": 0.15069379538428357, + "kl_loss": 1.529207468032837, + "loss_ib": 0.025434860959649086, + "step": 524 + }, + { + "epoch": 0.1509813789632612, + "grad_norm": 0.09314551949501038, + "learning_rate": 9.989878616637401e-06, + "loss": 0.9524, + "step": 525 + }, + { + "ce_ib": 17.579530715942383, + "ce_orig": 1.8244338035583496, + "epoch": 0.1509813789632612, + "kl_loss": 1.5392405986785889, + "loss_ib": 0.032971933484077454, + "step": 525 + }, + { + "ce_ib": 11.251588821411133, + "ce_orig": 0.863845705986023, + "epoch": 0.1509813789632612, + "kl_loss": 1.5278338193893433, + "loss_ib": 0.02652992680668831, + "step": 525 + }, + { + "ce_ib": 8.327178955078125, + "ce_orig": 0.787936806678772, + "epoch": 0.1509813789632612, + "kl_loss": 1.545323371887207, + "loss_ib": 0.02378041297197342, + "step": 525 + }, + { + "ce_ib": 10.291125297546387, + "ce_orig": 0.7874522805213928, + "epoch": 0.1509813789632612, + "kl_loss": 1.4875431060791016, + "loss_ib": 0.025166556239128113, + "step": 525 + }, + { + "ce_ib": 12.645198822021484, + "ce_orig": 0.6515507698059082, + "epoch": 0.15126896254223884, + "kl_loss": 1.5123183727264404, + "loss_ib": 0.02776838280260563, + "step": 526 + }, + { + "ce_ib": 10.556811332702637, + "ce_orig": 1.057904601097107, + "epoch": 0.15126896254223884, + "kl_loss": 1.5609617233276367, + "loss_ib": 0.026166429743170738, + "step": 526 + }, + { + "ce_ib": 10.272709846496582, + "ce_orig": 0.8701647520065308, + "epoch": 0.15126896254223884, + "kl_loss": 1.559139370918274, + "loss_ib": 0.02586410380899906, + "step": 526 + }, + { + "ce_ib": 9.878427505493164, + "ce_orig": 0.654448926448822, + "epoch": 0.15126896254223884, + "kl_loss": 1.5793863534927368, + "loss_ib": 0.025672290474176407, + "step": 526 + }, + { + "ce_ib": 10.595402717590332, + "ce_orig": 0.7197730541229248, + "epoch": 0.15155654612121647, + "kl_loss": 1.4838206768035889, + "loss_ib": 0.025433609262108803, + "step": 527 + }, + { + "ce_ib": 8.086220741271973, + "ce_orig": 0.7310401797294617, + "epoch": 0.15155654612121647, + "kl_loss": 1.4655554294586182, + "loss_ib": 0.022741774097085, + "step": 527 + }, + { + "ce_ib": 7.985743522644043, + "ce_orig": 0.873805582523346, + "epoch": 0.15155654612121647, + "kl_loss": 1.5185062885284424, + "loss_ib": 0.02317080646753311, + "step": 527 + }, + { + "ce_ib": 9.111749649047852, + "ce_orig": 0.605055034160614, + "epoch": 0.15155654612121647, + "kl_loss": 1.5961281061172485, + "loss_ib": 0.025073029100894928, + "step": 527 + }, + { + "ce_ib": 13.70055103302002, + "ce_orig": 1.3269081115722656, + "epoch": 0.15184412970019412, + "kl_loss": 1.5185916423797607, + "loss_ib": 0.0288864653557539, + "step": 528 + }, + { + "ce_ib": 12.714378356933594, + "ce_orig": 0.890455424785614, + "epoch": 0.15184412970019412, + "kl_loss": 1.547227382659912, + "loss_ib": 0.028186652809381485, + "step": 528 + }, + { + "ce_ib": 13.768203735351562, + "ce_orig": 0.5043600797653198, + "epoch": 0.15184412970019412, + "kl_loss": 1.6048271656036377, + "loss_ib": 0.02981647476553917, + "step": 528 + }, + { + "ce_ib": 9.12528133392334, + "ce_orig": 0.8668175339698792, + "epoch": 0.15184412970019412, + "kl_loss": 1.5656447410583496, + "loss_ib": 0.02478172816336155, + "step": 528 + }, + { + "ce_ib": 7.842939376831055, + "ce_orig": 0.6462977528572083, + "epoch": 0.15213171327917177, + "kl_loss": 1.4568753242492676, + "loss_ib": 0.022411691024899483, + "step": 529 + }, + { + "ce_ib": 8.972084999084473, + "ce_orig": 0.5574026703834534, + "epoch": 0.15213171327917177, + "kl_loss": 1.5518778562545776, + "loss_ib": 0.02449086308479309, + "step": 529 + }, + { + "ce_ib": 13.132000923156738, + "ce_orig": 1.5121755599975586, + "epoch": 0.15213171327917177, + "kl_loss": 1.761476993560791, + "loss_ib": 0.030746769160032272, + "step": 529 + }, + { + "ce_ib": 10.317779541015625, + "ce_orig": 0.7324342131614685, + "epoch": 0.15213171327917177, + "kl_loss": 1.5073950290679932, + "loss_ib": 0.025391731411218643, + "step": 529 + }, + { + "epoch": 0.1524192968581494, + "grad_norm": 0.09296334534883499, + "learning_rate": 9.989379046570502e-06, + "loss": 0.9041, + "step": 530 + }, + { + "ce_ib": 14.723713874816895, + "ce_orig": 1.1431795358657837, + "epoch": 0.1524192968581494, + "kl_loss": 1.4883091449737549, + "loss_ib": 0.029606804251670837, + "step": 530 + }, + { + "ce_ib": 9.255940437316895, + "ce_orig": 1.317234992980957, + "epoch": 0.1524192968581494, + "kl_loss": 1.508230209350586, + "loss_ib": 0.024338241666555405, + "step": 530 + }, + { + "ce_ib": 10.745719909667969, + "ce_orig": 0.7085793614387512, + "epoch": 0.1524192968581494, + "kl_loss": 1.5349406003952026, + "loss_ib": 0.026095125824213028, + "step": 530 + }, + { + "ce_ib": 11.930793762207031, + "ce_orig": 0.7779906392097473, + "epoch": 0.1524192968581494, + "kl_loss": 1.449808955192566, + "loss_ib": 0.026428882032632828, + "step": 530 + }, + { + "ce_ib": 9.066697120666504, + "ce_orig": 0.8550069332122803, + "epoch": 0.15270688043712705, + "kl_loss": 1.448561191558838, + "loss_ib": 0.02355230785906315, + "step": 531 + }, + { + "ce_ib": 14.419188499450684, + "ce_orig": 0.8470748066902161, + "epoch": 0.15270688043712705, + "kl_loss": 1.476117730140686, + "loss_ib": 0.029180364683270454, + "step": 531 + }, + { + "ce_ib": 4.3977861404418945, + "ce_orig": 0.16003404557704926, + "epoch": 0.15270688043712705, + "kl_loss": 1.4080500602722168, + "loss_ib": 0.018478285521268845, + "step": 531 + }, + { + "ce_ib": 8.737853050231934, + "ce_orig": 0.8578985929489136, + "epoch": 0.15270688043712705, + "kl_loss": 1.5061912536621094, + "loss_ib": 0.02379976399242878, + "step": 531 + }, + { + "ce_ib": 13.226619720458984, + "ce_orig": 1.234938621520996, + "epoch": 0.15299446401610467, + "kl_loss": 1.5700280666351318, + "loss_ib": 0.028926901519298553, + "step": 532 + }, + { + "ce_ib": 13.38469123840332, + "ce_orig": 1.4244154691696167, + "epoch": 0.15299446401610467, + "kl_loss": 1.483577013015747, + "loss_ib": 0.028220461681485176, + "step": 532 + }, + { + "ce_ib": 10.670931816101074, + "ce_orig": 1.0447449684143066, + "epoch": 0.15299446401610467, + "kl_loss": 1.472721815109253, + "loss_ib": 0.025398148223757744, + "step": 532 + }, + { + "ce_ib": 9.630074501037598, + "ce_orig": 0.4802638292312622, + "epoch": 0.15299446401610467, + "kl_loss": 1.4675498008728027, + "loss_ib": 0.024305572733283043, + "step": 532 + }, + { + "ce_ib": 9.232088088989258, + "ce_orig": 0.6876621246337891, + "epoch": 0.15328204759508232, + "kl_loss": 1.5124475955963135, + "loss_ib": 0.02435656450688839, + "step": 533 + }, + { + "ce_ib": 9.646381378173828, + "ce_orig": 0.6899409890174866, + "epoch": 0.15328204759508232, + "kl_loss": 1.4822652339935303, + "loss_ib": 0.024469034746289253, + "step": 533 + }, + { + "ce_ib": 12.952717781066895, + "ce_orig": 1.2678933143615723, + "epoch": 0.15328204759508232, + "kl_loss": 1.5268654823303223, + "loss_ib": 0.02822137251496315, + "step": 533 + }, + { + "ce_ib": 12.745079040527344, + "ce_orig": 0.4462144672870636, + "epoch": 0.15328204759508232, + "kl_loss": 1.4826654195785522, + "loss_ib": 0.02757173217833042, + "step": 533 + }, + { + "ce_ib": 11.242120742797852, + "ce_orig": 0.727728009223938, + "epoch": 0.15356963117405997, + "kl_loss": 1.4587228298187256, + "loss_ib": 0.02582934871315956, + "step": 534 + }, + { + "ce_ib": 13.557268142700195, + "ce_orig": 1.029449701309204, + "epoch": 0.15356963117405997, + "kl_loss": 1.5601624250411987, + "loss_ib": 0.02915889210999012, + "step": 534 + }, + { + "ce_ib": 12.117486000061035, + "ce_orig": 1.2025965452194214, + "epoch": 0.15356963117405997, + "kl_loss": 1.4851754903793335, + "loss_ib": 0.026969240978360176, + "step": 534 + }, + { + "ce_ib": 11.584373474121094, + "ce_orig": 1.0777106285095215, + "epoch": 0.15356963117405997, + "kl_loss": 1.4760735034942627, + "loss_ib": 0.026345109567046165, + "step": 534 + }, + { + "epoch": 0.1538572147530376, + "grad_norm": 0.08552956581115723, + "learning_rate": 9.988867455931422e-06, + "loss": 0.9482, + "step": 535 + }, + { + "ce_ib": 15.782403945922852, + "ce_orig": 1.24473237991333, + "epoch": 0.1538572147530376, + "kl_loss": 1.498823881149292, + "loss_ib": 0.030770642682909966, + "step": 535 + }, + { + "ce_ib": 11.243325233459473, + "ce_orig": 0.5300117135047913, + "epoch": 0.1538572147530376, + "kl_loss": 1.634958028793335, + "loss_ib": 0.02759290672838688, + "step": 535 + }, + { + "ce_ib": 10.156462669372559, + "ce_orig": 0.8086475133895874, + "epoch": 0.1538572147530376, + "kl_loss": 1.5278410911560059, + "loss_ib": 0.02543487399816513, + "step": 535 + }, + { + "ce_ib": 8.719620704650879, + "ce_orig": 0.7736819386482239, + "epoch": 0.1538572147530376, + "kl_loss": 1.4572741985321045, + "loss_ib": 0.023292362689971924, + "step": 535 + }, + { + "ce_ib": 11.448285102844238, + "ce_orig": 1.3073540925979614, + "epoch": 0.15414479833201525, + "kl_loss": 1.468369722366333, + "loss_ib": 0.02613198198378086, + "step": 536 + }, + { + "ce_ib": 10.234390258789062, + "ce_orig": 0.6196459531784058, + "epoch": 0.15414479833201525, + "kl_loss": 1.4478919506072998, + "loss_ib": 0.024713311344385147, + "step": 536 + }, + { + "ce_ib": 8.465949058532715, + "ce_orig": 0.5101594924926758, + "epoch": 0.15414479833201525, + "kl_loss": 1.4375674724578857, + "loss_ib": 0.022841624915599823, + "step": 536 + }, + { + "ce_ib": 14.232527732849121, + "ce_orig": 0.9877519011497498, + "epoch": 0.15414479833201525, + "kl_loss": 1.5590825080871582, + "loss_ib": 0.02982335351407528, + "step": 536 + }, + { + "ce_ib": 13.802165031433105, + "ce_orig": 1.1603584289550781, + "epoch": 0.15443238191099287, + "kl_loss": 1.4649747610092163, + "loss_ib": 0.028451912105083466, + "step": 537 + }, + { + "ce_ib": 9.437994956970215, + "ce_orig": 1.0976390838623047, + "epoch": 0.15443238191099287, + "kl_loss": 1.453848123550415, + "loss_ib": 0.02397647500038147, + "step": 537 + }, + { + "ce_ib": 14.96358585357666, + "ce_orig": 1.2715431451797485, + "epoch": 0.15443238191099287, + "kl_loss": 1.4742491245269775, + "loss_ib": 0.0297060776501894, + "step": 537 + }, + { + "ce_ib": 13.275339126586914, + "ce_orig": 1.3510757684707642, + "epoch": 0.15443238191099287, + "kl_loss": 1.4458937644958496, + "loss_ib": 0.02773427590727806, + "step": 537 + }, + { + "ce_ib": 12.93941879272461, + "ce_orig": 1.3727543354034424, + "epoch": 0.15471996548997052, + "kl_loss": 1.466170310974121, + "loss_ib": 0.027601122856140137, + "step": 538 + }, + { + "ce_ib": 19.104490280151367, + "ce_orig": 2.0155956745147705, + "epoch": 0.15471996548997052, + "kl_loss": 1.4700965881347656, + "loss_ib": 0.03380545601248741, + "step": 538 + }, + { + "ce_ib": 9.300647735595703, + "ce_orig": 1.1129015684127808, + "epoch": 0.15471996548997052, + "kl_loss": 1.4137213230133057, + "loss_ib": 0.02343786135315895, + "step": 538 + }, + { + "ce_ib": 9.72518253326416, + "ce_orig": 1.0089741945266724, + "epoch": 0.15471996548997052, + "kl_loss": 1.412247657775879, + "loss_ib": 0.023847658187150955, + "step": 538 + }, + { + "ce_ib": 11.109746932983398, + "ce_orig": 0.6238597631454468, + "epoch": 0.15500754906894817, + "kl_loss": 1.424318552017212, + "loss_ib": 0.025352930650115013, + "step": 539 + }, + { + "ce_ib": 12.226134300231934, + "ce_orig": 0.7648814916610718, + "epoch": 0.15500754906894817, + "kl_loss": 1.5073215961456299, + "loss_ib": 0.02729935199022293, + "step": 539 + }, + { + "ce_ib": 10.58513069152832, + "ce_orig": 0.5339838862419128, + "epoch": 0.15500754906894817, + "kl_loss": 1.5678870677947998, + "loss_ib": 0.026263998821377754, + "step": 539 + }, + { + "ce_ib": 12.42918586730957, + "ce_orig": 0.8719852566719055, + "epoch": 0.15500754906894817, + "kl_loss": 1.4344046115875244, + "loss_ib": 0.026773232966661453, + "step": 539 + }, + { + "epoch": 0.1552951326479258, + "grad_norm": 0.08646312355995178, + "learning_rate": 9.988343845952697e-06, + "loss": 0.9388, + "step": 540 + }, + { + "ce_ib": 14.183638572692871, + "ce_orig": 0.9510587453842163, + "epoch": 0.1552951326479258, + "kl_loss": 1.4567286968231201, + "loss_ib": 0.02875092439353466, + "step": 540 + }, + { + "ce_ib": 14.285439491271973, + "ce_orig": 1.5962088108062744, + "epoch": 0.1552951326479258, + "kl_loss": 1.4375842809677124, + "loss_ib": 0.028661280870437622, + "step": 540 + }, + { + "ce_ib": 8.6818265914917, + "ce_orig": 0.9919387698173523, + "epoch": 0.1552951326479258, + "kl_loss": 1.4143463373184204, + "loss_ib": 0.02282528765499592, + "step": 540 + }, + { + "ce_ib": 11.165204048156738, + "ce_orig": 0.8994119763374329, + "epoch": 0.1552951326479258, + "kl_loss": 1.5460944175720215, + "loss_ib": 0.02662614732980728, + "step": 540 + }, + { + "ce_ib": 11.784940719604492, + "ce_orig": 0.982570469379425, + "epoch": 0.15558271622690345, + "kl_loss": 1.4393483400344849, + "loss_ib": 0.026178423315286636, + "step": 541 + }, + { + "ce_ib": 11.36942195892334, + "ce_orig": 0.8527225255966187, + "epoch": 0.15558271622690345, + "kl_loss": 1.5223984718322754, + "loss_ib": 0.02659340761601925, + "step": 541 + }, + { + "ce_ib": 12.205092430114746, + "ce_orig": 0.6624218225479126, + "epoch": 0.15558271622690345, + "kl_loss": 1.471164584159851, + "loss_ib": 0.026916736736893654, + "step": 541 + }, + { + "ce_ib": 9.685622215270996, + "ce_orig": 0.652384877204895, + "epoch": 0.15558271622690345, + "kl_loss": 1.4571822881698608, + "loss_ib": 0.024257445707917213, + "step": 541 + }, + { + "ce_ib": 8.85888385772705, + "ce_orig": 0.6606395244598389, + "epoch": 0.15587029980588107, + "kl_loss": 1.4010136127471924, + "loss_ib": 0.022869018837809563, + "step": 542 + }, + { + "ce_ib": 13.208836555480957, + "ce_orig": 1.2833889722824097, + "epoch": 0.15587029980588107, + "kl_loss": 1.411513328552246, + "loss_ib": 0.027323970571160316, + "step": 542 + }, + { + "ce_ib": 10.047677040100098, + "ce_orig": 0.7564672827720642, + "epoch": 0.15587029980588107, + "kl_loss": 1.4649560451507568, + "loss_ib": 0.024697236716747284, + "step": 542 + }, + { + "ce_ib": 8.681495666503906, + "ce_orig": 0.7242369651794434, + "epoch": 0.15587029980588107, + "kl_loss": 1.3844184875488281, + "loss_ib": 0.02252567932009697, + "step": 542 + }, + { + "ce_ib": 9.65349292755127, + "ce_orig": 0.8696082830429077, + "epoch": 0.15615788338485873, + "kl_loss": 1.4093207120895386, + "loss_ib": 0.02374669909477234, + "step": 543 + }, + { + "ce_ib": 13.341421127319336, + "ce_orig": 1.152627944946289, + "epoch": 0.15615788338485873, + "kl_loss": 1.4343910217285156, + "loss_ib": 0.02768533118069172, + "step": 543 + }, + { + "ce_ib": 10.65963363647461, + "ce_orig": 0.5063934326171875, + "epoch": 0.15615788338485873, + "kl_loss": 1.4016281366348267, + "loss_ib": 0.024675915017724037, + "step": 543 + }, + { + "ce_ib": 11.079456329345703, + "ce_orig": 1.1200248003005981, + "epoch": 0.15615788338485873, + "kl_loss": 1.400475263595581, + "loss_ib": 0.025084208697080612, + "step": 543 + }, + { + "ce_ib": 10.452417373657227, + "ce_orig": 0.4226267635822296, + "epoch": 0.15644546696383638, + "kl_loss": 1.4564342498779297, + "loss_ib": 0.02501676045358181, + "step": 544 + }, + { + "ce_ib": 9.227188110351562, + "ce_orig": 0.3644406199455261, + "epoch": 0.15644546696383638, + "kl_loss": 1.4034581184387207, + "loss_ib": 0.023261768743395805, + "step": 544 + }, + { + "ce_ib": 12.85843276977539, + "ce_orig": 1.260372519493103, + "epoch": 0.15644546696383638, + "kl_loss": 1.4097330570220947, + "loss_ib": 0.02695576101541519, + "step": 544 + }, + { + "ce_ib": 15.890623092651367, + "ce_orig": 1.9688998460769653, + "epoch": 0.15644546696383638, + "kl_loss": 1.469724178314209, + "loss_ib": 0.03058786317706108, + "step": 544 + }, + { + "epoch": 0.156733050542814, + "grad_norm": 0.09022902697324753, + "learning_rate": 9.987808217895829e-06, + "loss": 0.9285, + "step": 545 + }, + { + "ce_ib": 8.582206726074219, + "ce_orig": 0.5587666630744934, + "epoch": 0.156733050542814, + "kl_loss": 1.3718297481536865, + "loss_ib": 0.02230050601065159, + "step": 545 + }, + { + "ce_ib": 15.280888557434082, + "ce_orig": 1.1607708930969238, + "epoch": 0.156733050542814, + "kl_loss": 1.4650864601135254, + "loss_ib": 0.029931753873825073, + "step": 545 + }, + { + "ce_ib": 13.70768928527832, + "ce_orig": 1.192724585533142, + "epoch": 0.156733050542814, + "kl_loss": 1.49911367893219, + "loss_ib": 0.02869882434606552, + "step": 545 + }, + { + "ce_ib": 12.85840129852295, + "ce_orig": 1.2321618795394897, + "epoch": 0.156733050542814, + "kl_loss": 1.4133646488189697, + "loss_ib": 0.02699204906821251, + "step": 545 + }, + { + "ce_ib": 10.931328773498535, + "ce_orig": 1.0996520519256592, + "epoch": 0.15702063412179165, + "kl_loss": 1.3952105045318604, + "loss_ib": 0.024883432313799858, + "step": 546 + }, + { + "ce_ib": 11.635273933410645, + "ce_orig": 0.7298911809921265, + "epoch": 0.15702063412179165, + "kl_loss": 1.4759316444396973, + "loss_ib": 0.026394590735435486, + "step": 546 + }, + { + "ce_ib": 9.857783317565918, + "ce_orig": 0.6076138019561768, + "epoch": 0.15702063412179165, + "kl_loss": 1.5050930976867676, + "loss_ib": 0.02490871399641037, + "step": 546 + }, + { + "ce_ib": 11.730413436889648, + "ce_orig": 0.9210866093635559, + "epoch": 0.15702063412179165, + "kl_loss": 1.3834307193756104, + "loss_ib": 0.02556472085416317, + "step": 546 + }, + { + "ce_ib": 9.36272144317627, + "ce_orig": 0.7209946513175964, + "epoch": 0.15730821770076928, + "kl_loss": 1.3859405517578125, + "loss_ib": 0.023222126066684723, + "step": 547 + }, + { + "ce_ib": 10.93961238861084, + "ce_orig": 1.1104698181152344, + "epoch": 0.15730821770076928, + "kl_loss": 1.3689725399017334, + "loss_ib": 0.024629337713122368, + "step": 547 + }, + { + "ce_ib": 10.149394989013672, + "ce_orig": 0.9216436147689819, + "epoch": 0.15730821770076928, + "kl_loss": 1.3667898178100586, + "loss_ib": 0.023817293345928192, + "step": 547 + }, + { + "ce_ib": 9.941133499145508, + "ce_orig": 0.8583170771598816, + "epoch": 0.15730821770076928, + "kl_loss": 1.4201359748840332, + "loss_ib": 0.024142494425177574, + "step": 547 + }, + { + "ce_ib": 13.535919189453125, + "ce_orig": 0.8294936418533325, + "epoch": 0.15759580127974693, + "kl_loss": 1.4394149780273438, + "loss_ib": 0.027930067852139473, + "step": 548 + }, + { + "ce_ib": 6.7044782638549805, + "ce_orig": 0.655543863773346, + "epoch": 0.15759580127974693, + "kl_loss": 1.398592233657837, + "loss_ib": 0.020690400153398514, + "step": 548 + }, + { + "ce_ib": 12.02395248413086, + "ce_orig": 0.5793411731719971, + "epoch": 0.15759580127974693, + "kl_loss": 1.454443335533142, + "loss_ib": 0.02656838670372963, + "step": 548 + }, + { + "ce_ib": 9.43730354309082, + "ce_orig": 0.6028481125831604, + "epoch": 0.15759580127974693, + "kl_loss": 1.3634798526763916, + "loss_ib": 0.023072101175785065, + "step": 548 + }, + { + "ce_ib": 9.443431854248047, + "ce_orig": 0.8150414228439331, + "epoch": 0.15788338485872458, + "kl_loss": 1.3247261047363281, + "loss_ib": 0.02269069105386734, + "step": 549 + }, + { + "ce_ib": 12.465729713439941, + "ce_orig": 0.912677526473999, + "epoch": 0.15788338485872458, + "kl_loss": 1.4468390941619873, + "loss_ib": 0.026934120804071426, + "step": 549 + }, + { + "ce_ib": 11.708540916442871, + "ce_orig": 1.2497539520263672, + "epoch": 0.15788338485872458, + "kl_loss": 1.3749089241027832, + "loss_ib": 0.025457629933953285, + "step": 549 + }, + { + "ce_ib": 5.031269073486328, + "ce_orig": 0.17525199055671692, + "epoch": 0.15788338485872458, + "kl_loss": 1.4064466953277588, + "loss_ib": 0.019095735624432564, + "step": 549 + }, + { + "epoch": 0.1581709684377022, + "grad_norm": 0.09811785817146301, + "learning_rate": 9.987260573051268e-06, + "loss": 0.8876, + "step": 550 + }, + { + "ce_ib": 11.644174575805664, + "ce_orig": 1.3015292882919312, + "epoch": 0.1581709684377022, + "kl_loss": 1.3472862243652344, + "loss_ib": 0.02511703595519066, + "step": 550 + }, + { + "ce_ib": 10.139188766479492, + "ce_orig": 1.1227424144744873, + "epoch": 0.1581709684377022, + "kl_loss": 1.3244848251342773, + "loss_ib": 0.023384036496281624, + "step": 550 + }, + { + "ce_ib": 5.7533979415893555, + "ce_orig": 0.4904988706111908, + "epoch": 0.1581709684377022, + "kl_loss": 1.3460612297058105, + "loss_ib": 0.01921400986611843, + "step": 550 + }, + { + "ce_ib": 14.475028991699219, + "ce_orig": 1.008355736732483, + "epoch": 0.1581709684377022, + "kl_loss": 1.3830070495605469, + "loss_ib": 0.02830510027706623, + "step": 550 + }, + { + "ce_ib": 9.800948143005371, + "ce_orig": 0.5951581001281738, + "epoch": 0.15845855201667985, + "kl_loss": 1.4228395223617554, + "loss_ib": 0.024029342457652092, + "step": 551 + }, + { + "ce_ib": 12.266356468200684, + "ce_orig": 1.480778455734253, + "epoch": 0.15845855201667985, + "kl_loss": 1.3874316215515137, + "loss_ib": 0.026140673086047173, + "step": 551 + }, + { + "ce_ib": 11.24101734161377, + "ce_orig": 0.6378637552261353, + "epoch": 0.15845855201667985, + "kl_loss": 1.3844799995422363, + "loss_ib": 0.0250858161598444, + "step": 551 + }, + { + "ce_ib": 12.70676040649414, + "ce_orig": 1.2595171928405762, + "epoch": 0.15845855201667985, + "kl_loss": 1.3542779684066772, + "loss_ib": 0.026249539107084274, + "step": 551 + }, + { + "ce_ib": 13.323479652404785, + "ce_orig": 1.104166030883789, + "epoch": 0.15874613559565748, + "kl_loss": 1.3231797218322754, + "loss_ib": 0.026555275544524193, + "step": 552 + }, + { + "ce_ib": 8.531795501708984, + "ce_orig": 0.4913962483406067, + "epoch": 0.15874613559565748, + "kl_loss": 1.3631434440612793, + "loss_ib": 0.022163229063153267, + "step": 552 + }, + { + "ce_ib": 12.574892044067383, + "ce_orig": 0.9339185953140259, + "epoch": 0.15874613559565748, + "kl_loss": 1.434931993484497, + "loss_ib": 0.026924211531877518, + "step": 552 + }, + { + "ce_ib": 10.622230529785156, + "ce_orig": 0.9095126390457153, + "epoch": 0.15874613559565748, + "kl_loss": 1.363985538482666, + "loss_ib": 0.0242620836943388, + "step": 552 + }, + { + "ce_ib": 10.206563949584961, + "ce_orig": 0.5735985040664673, + "epoch": 0.15903371917463513, + "kl_loss": 1.4209774732589722, + "loss_ib": 0.02441633865237236, + "step": 553 + }, + { + "ce_ib": 15.614920616149902, + "ce_orig": 1.3737772703170776, + "epoch": 0.15903371917463513, + "kl_loss": 1.4201138019561768, + "loss_ib": 0.02981605939567089, + "step": 553 + }, + { + "ce_ib": 12.950101852416992, + "ce_orig": 0.9557084441184998, + "epoch": 0.15903371917463513, + "kl_loss": 1.4211621284484863, + "loss_ib": 0.027161721140146255, + "step": 553 + }, + { + "ce_ib": 10.123566627502441, + "ce_orig": 0.7820416688919067, + "epoch": 0.15903371917463513, + "kl_loss": 1.3192014694213867, + "loss_ib": 0.023315582424402237, + "step": 553 + }, + { + "ce_ib": 5.311279296875, + "ce_orig": 0.5123794078826904, + "epoch": 0.15932130275361278, + "kl_loss": 1.2463486194610596, + "loss_ib": 0.017774764448404312, + "step": 554 + }, + { + "ce_ib": 10.679170608520508, + "ce_orig": 0.7276657223701477, + "epoch": 0.15932130275361278, + "kl_loss": 1.3026518821716309, + "loss_ib": 0.02370568923652172, + "step": 554 + }, + { + "ce_ib": 13.4666109085083, + "ce_orig": 1.2032169103622437, + "epoch": 0.15932130275361278, + "kl_loss": 1.417797327041626, + "loss_ib": 0.027644583955407143, + "step": 554 + }, + { + "ce_ib": 12.01272201538086, + "ce_orig": 0.9139970541000366, + "epoch": 0.15932130275361278, + "kl_loss": 1.4650115966796875, + "loss_ib": 0.026662837713956833, + "step": 554 + }, + { + "epoch": 0.1596088863325904, + "grad_norm": 0.09588459134101868, + "learning_rate": 9.98670091273842e-06, + "loss": 0.9863, + "step": 555 + }, + { + "ce_ib": 12.853775978088379, + "ce_orig": 0.8478192090988159, + "epoch": 0.1596088863325904, + "kl_loss": 1.337024211883545, + "loss_ib": 0.02622401714324951, + "step": 555 + }, + { + "ce_ib": 9.791227340698242, + "ce_orig": 0.7623945474624634, + "epoch": 0.1596088863325904, + "kl_loss": 1.3745824098587036, + "loss_ib": 0.023537050932645798, + "step": 555 + }, + { + "ce_ib": 11.515276908874512, + "ce_orig": 0.6505551338195801, + "epoch": 0.1596088863325904, + "kl_loss": 1.350890040397644, + "loss_ib": 0.025024177506566048, + "step": 555 + }, + { + "ce_ib": 13.35179328918457, + "ce_orig": 1.0168282985687256, + "epoch": 0.1596088863325904, + "kl_loss": 1.336082100868225, + "loss_ib": 0.026712613180279732, + "step": 555 + }, + { + "ce_ib": 17.056640625, + "ce_orig": 1.6616370677947998, + "epoch": 0.15989646991156806, + "kl_loss": 1.3520833253860474, + "loss_ib": 0.03057747334241867, + "step": 556 + }, + { + "ce_ib": 6.637577056884766, + "ce_orig": 0.5979457497596741, + "epoch": 0.15989646991156806, + "kl_loss": 1.32643723487854, + "loss_ib": 0.019901949912309647, + "step": 556 + }, + { + "ce_ib": 11.517195701599121, + "ce_orig": 1.0731699466705322, + "epoch": 0.15989646991156806, + "kl_loss": 1.3701467514038086, + "loss_ib": 0.0252186618745327, + "step": 556 + }, + { + "ce_ib": 7.839071750640869, + "ce_orig": 0.57491534948349, + "epoch": 0.15989646991156806, + "kl_loss": 1.281550645828247, + "loss_ib": 0.020654577761888504, + "step": 556 + }, + { + "ce_ib": 5.376894950866699, + "ce_orig": 0.27646228671073914, + "epoch": 0.16018405349054568, + "kl_loss": 1.4170054197311401, + "loss_ib": 0.019546950235962868, + "step": 557 + }, + { + "ce_ib": 7.960681915283203, + "ce_orig": 0.8380683064460754, + "epoch": 0.16018405349054568, + "kl_loss": 1.3399286270141602, + "loss_ib": 0.021359967067837715, + "step": 557 + }, + { + "ce_ib": 12.966280937194824, + "ce_orig": 1.0689243078231812, + "epoch": 0.16018405349054568, + "kl_loss": 1.425252914428711, + "loss_ib": 0.027218809351325035, + "step": 557 + }, + { + "ce_ib": 12.531590461730957, + "ce_orig": 1.2268368005752563, + "epoch": 0.16018405349054568, + "kl_loss": 1.3414859771728516, + "loss_ib": 0.025946449488401413, + "step": 557 + }, + { + "ce_ib": 8.23051929473877, + "ce_orig": 0.6497761607170105, + "epoch": 0.16047163706952333, + "kl_loss": 1.3148771524429321, + "loss_ib": 0.021379288285970688, + "step": 558 + }, + { + "ce_ib": 11.831758499145508, + "ce_orig": 1.0878973007202148, + "epoch": 0.16047163706952333, + "kl_loss": 1.344857096672058, + "loss_ib": 0.025280330330133438, + "step": 558 + }, + { + "ce_ib": 10.697997093200684, + "ce_orig": 0.9739592671394348, + "epoch": 0.16047163706952333, + "kl_loss": 1.2617008686065674, + "loss_ib": 0.023315005004405975, + "step": 558 + }, + { + "ce_ib": 10.034689903259277, + "ce_orig": 0.7774488925933838, + "epoch": 0.16047163706952333, + "kl_loss": 1.3792924880981445, + "loss_ib": 0.02382761426270008, + "step": 558 + }, + { + "ce_ib": 8.980086326599121, + "ce_orig": 0.5476792454719543, + "epoch": 0.16075922064850098, + "kl_loss": 1.3168349266052246, + "loss_ib": 0.022148434072732925, + "step": 559 + }, + { + "ce_ib": 7.035679340362549, + "ce_orig": 0.627990186214447, + "epoch": 0.16075922064850098, + "kl_loss": 1.3569344282150269, + "loss_ib": 0.020605022087693214, + "step": 559 + }, + { + "ce_ib": 12.099848747253418, + "ce_orig": 1.201551914215088, + "epoch": 0.16075922064850098, + "kl_loss": 1.2747890949249268, + "loss_ib": 0.024847740307450294, + "step": 559 + }, + { + "ce_ib": 8.865999221801758, + "ce_orig": 0.7412122488021851, + "epoch": 0.16075922064850098, + "kl_loss": 1.4771380424499512, + "loss_ib": 0.023637380450963974, + "step": 559 + }, + { + "epoch": 0.1610468042274786, + "grad_norm": 0.0971461683511734, + "learning_rate": 9.986129238305635e-06, + "loss": 0.8747, + "step": 560 + }, + { + "ce_ib": 7.6997246742248535, + "ce_orig": 0.6025680303573608, + "epoch": 0.1610468042274786, + "kl_loss": 1.276071310043335, + "loss_ib": 0.02046043798327446, + "step": 560 + }, + { + "ce_ib": 7.523832321166992, + "ce_orig": 0.7658670544624329, + "epoch": 0.1610468042274786, + "kl_loss": 1.4311625957489014, + "loss_ib": 0.021835457533597946, + "step": 560 + }, + { + "ce_ib": 11.750297546386719, + "ce_orig": 0.4812588095664978, + "epoch": 0.1610468042274786, + "kl_loss": 1.3406429290771484, + "loss_ib": 0.02515672706067562, + "step": 560 + }, + { + "ce_ib": 10.141862869262695, + "ce_orig": 0.8624674081802368, + "epoch": 0.1610468042274786, + "kl_loss": 1.345240831375122, + "loss_ib": 0.023594269528985023, + "step": 560 + }, + { + "ce_ib": 9.809609413146973, + "ce_orig": 0.9545562863349915, + "epoch": 0.16133438780645626, + "kl_loss": 1.27205228805542, + "loss_ib": 0.02253013104200363, + "step": 561 + }, + { + "ce_ib": 12.615915298461914, + "ce_orig": 1.327091932296753, + "epoch": 0.16133438780645626, + "kl_loss": 1.282986044883728, + "loss_ib": 0.02544577606022358, + "step": 561 + }, + { + "ce_ib": 10.288837432861328, + "ce_orig": 0.5523210763931274, + "epoch": 0.16133438780645626, + "kl_loss": 1.3225059509277344, + "loss_ib": 0.023513898253440857, + "step": 561 + }, + { + "ce_ib": 10.36892032623291, + "ce_orig": 0.6983376741409302, + "epoch": 0.16133438780645626, + "kl_loss": 1.2711155414581299, + "loss_ib": 0.02308007702231407, + "step": 561 + }, + { + "ce_ib": 14.066039085388184, + "ce_orig": 0.9940349459648132, + "epoch": 0.16162197138543388, + "kl_loss": 1.3311264514923096, + "loss_ib": 0.027377303689718246, + "step": 562 + }, + { + "ce_ib": 9.398420333862305, + "ce_orig": 1.150452733039856, + "epoch": 0.16162197138543388, + "kl_loss": 1.3074944019317627, + "loss_ib": 0.02247336320579052, + "step": 562 + }, + { + "ce_ib": 9.80187702178955, + "ce_orig": 0.8328919410705566, + "epoch": 0.16162197138543388, + "kl_loss": 1.331373929977417, + "loss_ib": 0.02311561442911625, + "step": 562 + }, + { + "ce_ib": 12.888148307800293, + "ce_orig": 1.2748291492462158, + "epoch": 0.16162197138543388, + "kl_loss": 1.2818001508712769, + "loss_ib": 0.025706149637699127, + "step": 562 + }, + { + "ce_ib": 8.798264503479004, + "ce_orig": 0.66322261095047, + "epoch": 0.16190955496441153, + "kl_loss": 1.3028491735458374, + "loss_ib": 0.02182675525546074, + "step": 563 + }, + { + "ce_ib": 13.072640419006348, + "ce_orig": 1.0565416812896729, + "epoch": 0.16190955496441153, + "kl_loss": 1.2858983278274536, + "loss_ib": 0.025931624695658684, + "step": 563 + }, + { + "ce_ib": 11.12070083618164, + "ce_orig": 0.8622493743896484, + "epoch": 0.16190955496441153, + "kl_loss": 1.2600901126861572, + "loss_ib": 0.0237216018140316, + "step": 563 + }, + { + "ce_ib": 11.012995719909668, + "ce_orig": 0.7809346914291382, + "epoch": 0.16190955496441153, + "kl_loss": 1.2810771465301514, + "loss_ib": 0.02382376603782177, + "step": 563 + }, + { + "ce_ib": 10.03192138671875, + "ce_orig": 0.5545583367347717, + "epoch": 0.16219713854338919, + "kl_loss": 1.372998833656311, + "loss_ib": 0.023761911317706108, + "step": 564 + }, + { + "ce_ib": 8.590304374694824, + "ce_orig": 0.7225477695465088, + "epoch": 0.16219713854338919, + "kl_loss": 1.3027560710906982, + "loss_ib": 0.021617865189909935, + "step": 564 + }, + { + "ce_ib": 10.352544784545898, + "ce_orig": 0.8774200081825256, + "epoch": 0.16219713854338919, + "kl_loss": 1.2814412117004395, + "loss_ib": 0.02316695638000965, + "step": 564 + }, + { + "ce_ib": 8.562765121459961, + "ce_orig": 0.6415224075317383, + "epoch": 0.16219713854338919, + "kl_loss": 1.2804956436157227, + "loss_ib": 0.021367721259593964, + "step": 564 + }, + { + "epoch": 0.1624847221223668, + "grad_norm": 0.08632536977529526, + "learning_rate": 9.98554555113021e-06, + "loss": 0.8462, + "step": 565 + }, + { + "ce_ib": 12.551013946533203, + "ce_orig": 0.995196521282196, + "epoch": 0.1624847221223668, + "kl_loss": 1.3475830554962158, + "loss_ib": 0.02602684497833252, + "step": 565 + }, + { + "ce_ib": 11.848214149475098, + "ce_orig": 1.4025741815567017, + "epoch": 0.1624847221223668, + "kl_loss": 1.2776107788085938, + "loss_ib": 0.024624323472380638, + "step": 565 + }, + { + "ce_ib": 8.570831298828125, + "ce_orig": 0.6328908205032349, + "epoch": 0.1624847221223668, + "kl_loss": 1.2646872997283936, + "loss_ib": 0.021217703819274902, + "step": 565 + }, + { + "ce_ib": 9.687134742736816, + "ce_orig": 0.7903947234153748, + "epoch": 0.1624847221223668, + "kl_loss": 1.3254048824310303, + "loss_ib": 0.022941183298826218, + "step": 565 + }, + { + "ce_ib": 13.013336181640625, + "ce_orig": 1.2271647453308105, + "epoch": 0.16277230570134446, + "kl_loss": 1.2702226638793945, + "loss_ib": 0.02571556344628334, + "step": 566 + }, + { + "ce_ib": 12.480305671691895, + "ce_orig": 1.1103395223617554, + "epoch": 0.16277230570134446, + "kl_loss": 1.2821930646896362, + "loss_ib": 0.025302235037088394, + "step": 566 + }, + { + "ce_ib": 9.443026542663574, + "ce_orig": 0.5126791596412659, + "epoch": 0.16277230570134446, + "kl_loss": 1.2745044231414795, + "loss_ib": 0.02218807116150856, + "step": 566 + }, + { + "ce_ib": 9.337321281433105, + "ce_orig": 0.7954445481300354, + "epoch": 0.16277230570134446, + "kl_loss": 1.2490813732147217, + "loss_ib": 0.02182813547551632, + "step": 566 + }, + { + "ce_ib": 10.778318405151367, + "ce_orig": 0.7537108659744263, + "epoch": 0.16305988928032208, + "kl_loss": 1.2937819957733154, + "loss_ib": 0.023716138675808907, + "step": 567 + }, + { + "ce_ib": 9.771125793457031, + "ce_orig": 0.40443235635757446, + "epoch": 0.16305988928032208, + "kl_loss": 1.3131227493286133, + "loss_ib": 0.0229023527354002, + "step": 567 + }, + { + "ce_ib": 9.85836124420166, + "ce_orig": 0.5563979744911194, + "epoch": 0.16305988928032208, + "kl_loss": 1.2832622528076172, + "loss_ib": 0.02269098162651062, + "step": 567 + }, + { + "ce_ib": 13.68719482421875, + "ce_orig": 0.9920021295547485, + "epoch": 0.16305988928032208, + "kl_loss": 1.2698428630828857, + "loss_ib": 0.02638562209904194, + "step": 567 + }, + { + "ce_ib": 9.411405563354492, + "ce_orig": 0.9342118501663208, + "epoch": 0.16334747285929974, + "kl_loss": 1.2332212924957275, + "loss_ib": 0.021743619814515114, + "step": 568 + }, + { + "ce_ib": 15.157500267028809, + "ce_orig": 1.5584248304367065, + "epoch": 0.16334747285929974, + "kl_loss": 1.2862458229064941, + "loss_ib": 0.02801995724439621, + "step": 568 + }, + { + "ce_ib": 10.411499977111816, + "ce_orig": 0.8281600475311279, + "epoch": 0.16334747285929974, + "kl_loss": 1.2496166229248047, + "loss_ib": 0.022907666862010956, + "step": 568 + }, + { + "ce_ib": 10.096942901611328, + "ce_orig": 0.6468956470489502, + "epoch": 0.16334747285929974, + "kl_loss": 1.2605764865875244, + "loss_ib": 0.02270270697772503, + "step": 568 + }, + { + "ce_ib": 9.528172492980957, + "ce_orig": 0.8783382773399353, + "epoch": 0.1636350564382774, + "kl_loss": 1.2387837171554565, + "loss_ib": 0.021916009485721588, + "step": 569 + }, + { + "ce_ib": 8.954733848571777, + "ce_orig": 0.8919200897216797, + "epoch": 0.1636350564382774, + "kl_loss": 1.2251062393188477, + "loss_ib": 0.02120579592883587, + "step": 569 + }, + { + "ce_ib": 8.508342742919922, + "ce_orig": 0.6990381479263306, + "epoch": 0.1636350564382774, + "kl_loss": 1.2213623523712158, + "loss_ib": 0.02072196640074253, + "step": 569 + }, + { + "ce_ib": 11.344082832336426, + "ce_orig": 0.9525802731513977, + "epoch": 0.1636350564382774, + "kl_loss": 1.248590111732483, + "loss_ib": 0.023829983547329903, + "step": 569 + }, + { + "epoch": 0.163922640017255, + "grad_norm": 0.09219575673341751, + "learning_rate": 9.984949852618381e-06, + "loss": 0.8852, + "step": 570 + }, + { + "ce_ib": 8.961785316467285, + "ce_orig": 0.7948698997497559, + "epoch": 0.163922640017255, + "kl_loss": 1.2222837209701538, + "loss_ib": 0.02118462324142456, + "step": 570 + }, + { + "ce_ib": 12.20908260345459, + "ce_orig": 0.9208235144615173, + "epoch": 0.163922640017255, + "kl_loss": 1.2455497980117798, + "loss_ib": 0.024664580821990967, + "step": 570 + }, + { + "ce_ib": 9.334521293640137, + "ce_orig": 0.9958298206329346, + "epoch": 0.163922640017255, + "kl_loss": 1.2763538360595703, + "loss_ib": 0.022098058834671974, + "step": 570 + }, + { + "ce_ib": 12.762809753417969, + "ce_orig": 1.1121208667755127, + "epoch": 0.163922640017255, + "kl_loss": 1.2300899028778076, + "loss_ib": 0.025063710287213326, + "step": 570 + }, + { + "ce_ib": 15.606366157531738, + "ce_orig": 1.6491622924804688, + "epoch": 0.16421022359623266, + "kl_loss": 1.2227786779403687, + "loss_ib": 0.027834152802824974, + "step": 571 + }, + { + "ce_ib": 10.236468315124512, + "ce_orig": 0.8589720726013184, + "epoch": 0.16421022359623266, + "kl_loss": 1.256370186805725, + "loss_ib": 0.02280016802251339, + "step": 571 + }, + { + "ce_ib": 9.943655967712402, + "ce_orig": 0.5925063490867615, + "epoch": 0.16421022359623266, + "kl_loss": 1.2559595108032227, + "loss_ib": 0.02250325120985508, + "step": 571 + }, + { + "ce_ib": 11.613914489746094, + "ce_orig": 0.7779126763343811, + "epoch": 0.16421022359623266, + "kl_loss": 1.392745018005371, + "loss_ib": 0.025541365146636963, + "step": 571 + }, + { + "ce_ib": 12.392518043518066, + "ce_orig": 0.4280785620212555, + "epoch": 0.1644978071752103, + "kl_loss": 1.3646981716156006, + "loss_ib": 0.026039499789476395, + "step": 572 + }, + { + "ce_ib": 9.918338775634766, + "ce_orig": 0.5322098731994629, + "epoch": 0.1644978071752103, + "kl_loss": 1.2963712215423584, + "loss_ib": 0.02288205176591873, + "step": 572 + }, + { + "ce_ib": 14.274211883544922, + "ce_orig": 1.3050851821899414, + "epoch": 0.1644978071752103, + "kl_loss": 1.2759932279586792, + "loss_ib": 0.02703414298593998, + "step": 572 + }, + { + "ce_ib": 11.698960304260254, + "ce_orig": 1.155306100845337, + "epoch": 0.1644978071752103, + "kl_loss": 1.2604784965515137, + "loss_ib": 0.024303745478391647, + "step": 572 + }, + { + "ce_ib": 10.412774085998535, + "ce_orig": 1.052548885345459, + "epoch": 0.16478539075418794, + "kl_loss": 1.2234094142913818, + "loss_ib": 0.022646868601441383, + "step": 573 + }, + { + "ce_ib": 10.732352256774902, + "ce_orig": 0.9245730042457581, + "epoch": 0.16478539075418794, + "kl_loss": 1.2317912578582764, + "loss_ib": 0.02305026538670063, + "step": 573 + }, + { + "ce_ib": 9.59011173248291, + "ce_orig": 0.7064767479896545, + "epoch": 0.16478539075418794, + "kl_loss": 1.224424123764038, + "loss_ib": 0.021834352985024452, + "step": 573 + }, + { + "ce_ib": 9.361687660217285, + "ce_orig": 0.8686097264289856, + "epoch": 0.16478539075418794, + "kl_loss": 1.1842972040176392, + "loss_ib": 0.021204659715294838, + "step": 573 + }, + { + "ce_ib": 7.736264228820801, + "ce_orig": 0.6079578995704651, + "epoch": 0.1650729743331656, + "kl_loss": 1.213087558746338, + "loss_ib": 0.01986713893711567, + "step": 574 + }, + { + "ce_ib": 11.044466972351074, + "ce_orig": 0.8884755969047546, + "epoch": 0.1650729743331656, + "kl_loss": 1.2339417934417725, + "loss_ib": 0.023383883759379387, + "step": 574 + }, + { + "ce_ib": 10.914957046508789, + "ce_orig": 0.9813688397407532, + "epoch": 0.1650729743331656, + "kl_loss": 1.2557547092437744, + "loss_ib": 0.023472504690289497, + "step": 574 + }, + { + "ce_ib": 13.932374000549316, + "ce_orig": 1.102638840675354, + "epoch": 0.1650729743331656, + "kl_loss": 1.2306591272354126, + "loss_ib": 0.02623896487057209, + "step": 574 + }, + { + "epoch": 0.1653605579121432, + "grad_norm": 0.09370694309473038, + "learning_rate": 9.984342144205327e-06, + "loss": 0.9041, + "step": 575 + }, + { + "ce_ib": 9.729941368103027, + "ce_orig": 0.8137699961662292, + "epoch": 0.1653605579121432, + "kl_loss": 1.218010663986206, + "loss_ib": 0.0219100471585989, + "step": 575 + }, + { + "ce_ib": 11.345166206359863, + "ce_orig": 0.8795411586761475, + "epoch": 0.1653605579121432, + "kl_loss": 1.238341212272644, + "loss_ib": 0.02372857742011547, + "step": 575 + }, + { + "ce_ib": 12.664711952209473, + "ce_orig": 1.4619799852371216, + "epoch": 0.1653605579121432, + "kl_loss": 1.2479004859924316, + "loss_ib": 0.025143718346953392, + "step": 575 + }, + { + "ce_ib": 7.3197340965271, + "ce_orig": 0.5423354506492615, + "epoch": 0.1653605579121432, + "kl_loss": 1.4118754863739014, + "loss_ib": 0.021438488736748695, + "step": 575 + }, + { + "ce_ib": 11.586548805236816, + "ce_orig": 0.8300837874412537, + "epoch": 0.16564814149112086, + "kl_loss": 1.2648472785949707, + "loss_ib": 0.024235021322965622, + "step": 576 + }, + { + "ce_ib": 9.670539855957031, + "ce_orig": 0.8994592428207397, + "epoch": 0.16564814149112086, + "kl_loss": 1.202571988105774, + "loss_ib": 0.021696260198950768, + "step": 576 + }, + { + "ce_ib": 5.678918838500977, + "ce_orig": 0.28384220600128174, + "epoch": 0.16564814149112086, + "kl_loss": 1.3513402938842773, + "loss_ib": 0.019192321226000786, + "step": 576 + }, + { + "ce_ib": 11.467476844787598, + "ce_orig": 0.5451651215553284, + "epoch": 0.16564814149112086, + "kl_loss": 1.2790919542312622, + "loss_ib": 0.02425839565694332, + "step": 576 + }, + { + "ce_ib": 8.113414764404297, + "ce_orig": 0.7992563843727112, + "epoch": 0.1659357250700985, + "kl_loss": 1.1959567070007324, + "loss_ib": 0.02007298171520233, + "step": 577 + }, + { + "ce_ib": 6.4770002365112305, + "ce_orig": 0.4411783218383789, + "epoch": 0.1659357250700985, + "kl_loss": 1.2061271667480469, + "loss_ib": 0.018538272008299828, + "step": 577 + }, + { + "ce_ib": 9.995020866394043, + "ce_orig": 0.5499460697174072, + "epoch": 0.1659357250700985, + "kl_loss": 1.2197155952453613, + "loss_ib": 0.022192176431417465, + "step": 577 + }, + { + "ce_ib": 5.73984956741333, + "ce_orig": 0.6255266666412354, + "epoch": 0.1659357250700985, + "kl_loss": 1.1914690732955933, + "loss_ib": 0.0176545400172472, + "step": 577 + }, + { + "ce_ib": 12.113245964050293, + "ce_orig": 1.025524377822876, + "epoch": 0.16622330864907614, + "kl_loss": 1.2450977563858032, + "loss_ib": 0.024564223363995552, + "step": 578 + }, + { + "ce_ib": 5.700209617614746, + "ce_orig": 0.28980499505996704, + "epoch": 0.16622330864907614, + "kl_loss": 1.2563034296035767, + "loss_ib": 0.01826324500143528, + "step": 578 + }, + { + "ce_ib": 12.162859916687012, + "ce_orig": 0.8536310195922852, + "epoch": 0.16622330864907614, + "kl_loss": 1.220404028892517, + "loss_ib": 0.024366900324821472, + "step": 578 + }, + { + "ce_ib": 12.775392532348633, + "ce_orig": 1.3573917150497437, + "epoch": 0.16622330864907614, + "kl_loss": 1.1893036365509033, + "loss_ib": 0.024668429046869278, + "step": 578 + }, + { + "ce_ib": 13.641351699829102, + "ce_orig": 1.5364866256713867, + "epoch": 0.1665108922280538, + "kl_loss": 1.261054277420044, + "loss_ib": 0.026251891627907753, + "step": 579 + }, + { + "ce_ib": 11.468977928161621, + "ce_orig": 1.2083629369735718, + "epoch": 0.1665108922280538, + "kl_loss": 1.2402012348175049, + "loss_ib": 0.023870989680290222, + "step": 579 + }, + { + "ce_ib": 14.304160118103027, + "ce_orig": 1.1799192428588867, + "epoch": 0.1665108922280538, + "kl_loss": 1.2703068256378174, + "loss_ib": 0.027007225900888443, + "step": 579 + }, + { + "ce_ib": 13.134546279907227, + "ce_orig": 1.2673437595367432, + "epoch": 0.1665108922280538, + "kl_loss": 1.1939582824707031, + "loss_ib": 0.025074128061532974, + "step": 579 + }, + { + "epoch": 0.16679847580703142, + "grad_norm": 0.08239021897315979, + "learning_rate": 9.983722427355157e-06, + "loss": 0.9056, + "step": 580 + }, + { + "ce_ib": 13.295975685119629, + "ce_orig": 1.0868593454360962, + "epoch": 0.16679847580703142, + "kl_loss": 1.2228111028671265, + "loss_ib": 0.025524087250232697, + "step": 580 + }, + { + "ce_ib": 6.748361110687256, + "ce_orig": 0.49680906534194946, + "epoch": 0.16679847580703142, + "kl_loss": 1.2128095626831055, + "loss_ib": 0.0188764575868845, + "step": 580 + }, + { + "ce_ib": 13.180870056152344, + "ce_orig": 0.8746943473815918, + "epoch": 0.16679847580703142, + "kl_loss": 1.2134785652160645, + "loss_ib": 0.025315655395388603, + "step": 580 + }, + { + "ce_ib": 12.037747383117676, + "ce_orig": 0.9534928202629089, + "epoch": 0.16679847580703142, + "kl_loss": 1.2241135835647583, + "loss_ib": 0.024278882890939713, + "step": 580 + }, + { + "ce_ib": 8.912028312683105, + "ce_orig": 0.7467697858810425, + "epoch": 0.16708605938600907, + "kl_loss": 1.1925266981124878, + "loss_ib": 0.020837293937802315, + "step": 581 + }, + { + "ce_ib": 9.094566345214844, + "ce_orig": 0.6505690813064575, + "epoch": 0.16708605938600907, + "kl_loss": 1.1850394010543823, + "loss_ib": 0.020944960415363312, + "step": 581 + }, + { + "ce_ib": 12.56716251373291, + "ce_orig": 1.2765976190567017, + "epoch": 0.16708605938600907, + "kl_loss": 1.2733515501022339, + "loss_ib": 0.025300677865743637, + "step": 581 + }, + { + "ce_ib": 13.4862699508667, + "ce_orig": 1.0705842971801758, + "epoch": 0.16708605938600907, + "kl_loss": 1.2059423923492432, + "loss_ib": 0.025545692071318626, + "step": 581 + }, + { + "ce_ib": 8.2174711227417, + "ce_orig": 0.7286640405654907, + "epoch": 0.1673736429649867, + "kl_loss": 1.2195181846618652, + "loss_ib": 0.02041265182197094, + "step": 582 + }, + { + "ce_ib": 13.159209251403809, + "ce_orig": 1.2096375226974487, + "epoch": 0.1673736429649867, + "kl_loss": 1.2165024280548096, + "loss_ib": 0.02532423473894596, + "step": 582 + }, + { + "ce_ib": 11.165864944458008, + "ce_orig": 0.7757768630981445, + "epoch": 0.1673736429649867, + "kl_loss": 1.1681039333343506, + "loss_ib": 0.022846903651952744, + "step": 582 + }, + { + "ce_ib": 8.71281623840332, + "ce_orig": 0.7169674634933472, + "epoch": 0.1673736429649867, + "kl_loss": 1.191447377204895, + "loss_ib": 0.02062728814780712, + "step": 582 + }, + { + "ce_ib": 9.235655784606934, + "ce_orig": 0.6690336465835571, + "epoch": 0.16766122654396434, + "kl_loss": 1.2143634557724, + "loss_ib": 0.021379288285970688, + "step": 583 + }, + { + "ce_ib": 8.412071228027344, + "ce_orig": 0.843682587146759, + "epoch": 0.16766122654396434, + "kl_loss": 1.2117249965667725, + "loss_ib": 0.02052932232618332, + "step": 583 + }, + { + "ce_ib": 10.572442054748535, + "ce_orig": 0.7864252328872681, + "epoch": 0.16766122654396434, + "kl_loss": 1.2299147844314575, + "loss_ib": 0.0228715892881155, + "step": 583 + }, + { + "ce_ib": 8.599762916564941, + "ce_orig": 0.7645632028579712, + "epoch": 0.16766122654396434, + "kl_loss": 1.2004691362380981, + "loss_ib": 0.02060445211827755, + "step": 583 + }, + { + "ce_ib": 11.767807960510254, + "ce_orig": 1.0832188129425049, + "epoch": 0.167948810122942, + "kl_loss": 1.1508519649505615, + "loss_ib": 0.023276329040527344, + "step": 584 + }, + { + "ce_ib": 9.03587818145752, + "ce_orig": 0.7372077107429504, + "epoch": 0.167948810122942, + "kl_loss": 1.1607894897460938, + "loss_ib": 0.020643772557377815, + "step": 584 + }, + { + "ce_ib": 11.698123931884766, + "ce_orig": 0.8797475695610046, + "epoch": 0.167948810122942, + "kl_loss": 1.2153360843658447, + "loss_ib": 0.023851484060287476, + "step": 584 + }, + { + "ce_ib": 14.928531646728516, + "ce_orig": 1.3900351524353027, + "epoch": 0.167948810122942, + "kl_loss": 1.276613473892212, + "loss_ib": 0.027694664895534515, + "step": 584 + }, + { + "epoch": 0.16823639370191962, + "grad_norm": 0.11208527535200119, + "learning_rate": 9.983090703560911e-06, + "loss": 0.8947, + "step": 585 + }, + { + "ce_ib": 11.16375732421875, + "ce_orig": 0.9449269771575928, + "epoch": 0.16823639370191962, + "kl_loss": 1.197243094444275, + "loss_ib": 0.023136189207434654, + "step": 585 + }, + { + "ce_ib": 9.852100372314453, + "ce_orig": 1.078002691268921, + "epoch": 0.16823639370191962, + "kl_loss": 1.1607494354248047, + "loss_ib": 0.021459592506289482, + "step": 585 + }, + { + "ce_ib": 11.235018730163574, + "ce_orig": 1.1319299936294556, + "epoch": 0.16823639370191962, + "kl_loss": 1.2304571866989136, + "loss_ib": 0.0235395897179842, + "step": 585 + }, + { + "ce_ib": 7.328647613525391, + "ce_orig": 0.9203292727470398, + "epoch": 0.16823639370191962, + "kl_loss": 1.1338272094726562, + "loss_ib": 0.018666919320821762, + "step": 585 + }, + { + "ce_ib": 7.884848117828369, + "ce_orig": 0.5528172850608826, + "epoch": 0.16852397728089727, + "kl_loss": 1.2366106510162354, + "loss_ib": 0.02025095373392105, + "step": 586 + }, + { + "ce_ib": 13.856200218200684, + "ce_orig": 0.9960865378379822, + "epoch": 0.16852397728089727, + "kl_loss": 1.2090309858322144, + "loss_ib": 0.02594650909304619, + "step": 586 + }, + { + "ce_ib": 9.895005226135254, + "ce_orig": 0.5365419387817383, + "epoch": 0.16852397728089727, + "kl_loss": 1.1710314750671387, + "loss_ib": 0.021605320274829865, + "step": 586 + }, + { + "ce_ib": 11.017683029174805, + "ce_orig": 0.8266361951828003, + "epoch": 0.16852397728089727, + "kl_loss": 1.2141588926315308, + "loss_ib": 0.023159272968769073, + "step": 586 + }, + { + "ce_ib": 11.70738410949707, + "ce_orig": 0.9832698702812195, + "epoch": 0.1688115608598749, + "kl_loss": 1.1586453914642334, + "loss_ib": 0.023293837904930115, + "step": 587 + }, + { + "ce_ib": 8.602079391479492, + "ce_orig": 0.4699603319168091, + "epoch": 0.1688115608598749, + "kl_loss": 1.2227380275726318, + "loss_ib": 0.02082945965230465, + "step": 587 + }, + { + "ce_ib": 14.872537612915039, + "ce_orig": 1.5698349475860596, + "epoch": 0.1688115608598749, + "kl_loss": 1.2066720724105835, + "loss_ib": 0.026939257979393005, + "step": 587 + }, + { + "ce_ib": 9.357597351074219, + "ce_orig": 0.9672819972038269, + "epoch": 0.1688115608598749, + "kl_loss": 1.1683168411254883, + "loss_ib": 0.021040765568614006, + "step": 587 + }, + { + "ce_ib": 12.048833847045898, + "ce_orig": 1.0971179008483887, + "epoch": 0.16909914443885254, + "kl_loss": 1.2246365547180176, + "loss_ib": 0.024295201525092125, + "step": 588 + }, + { + "ce_ib": 7.782527446746826, + "ce_orig": 0.43647029995918274, + "epoch": 0.16909914443885254, + "kl_loss": 1.1583232879638672, + "loss_ib": 0.019365761429071426, + "step": 588 + }, + { + "ce_ib": 7.263584136962891, + "ce_orig": 0.7806444764137268, + "epoch": 0.16909914443885254, + "kl_loss": 1.1670253276824951, + "loss_ib": 0.018933836370706558, + "step": 588 + }, + { + "ce_ib": 5.9214959144592285, + "ce_orig": 0.5120716094970703, + "epoch": 0.16909914443885254, + "kl_loss": 1.1531264781951904, + "loss_ib": 0.01745275966823101, + "step": 588 + }, + { + "ce_ib": 11.046984672546387, + "ce_orig": 1.098747730255127, + "epoch": 0.16938672801783017, + "kl_loss": 1.141385793685913, + "loss_ib": 0.02246084250509739, + "step": 589 + }, + { + "ce_ib": 12.190630912780762, + "ce_orig": 0.7543506622314453, + "epoch": 0.16938672801783017, + "kl_loss": 1.1634085178375244, + "loss_ib": 0.023824715986847878, + "step": 589 + }, + { + "ce_ib": 11.490245819091797, + "ce_orig": 1.0767667293548584, + "epoch": 0.16938672801783017, + "kl_loss": 1.2043827772140503, + "loss_ib": 0.023534072563052177, + "step": 589 + }, + { + "ce_ib": 10.710217475891113, + "ce_orig": 0.803483247756958, + "epoch": 0.16938672801783017, + "kl_loss": 1.1858105659484863, + "loss_ib": 0.022568322718143463, + "step": 589 + }, + { + "epoch": 0.16967431159680782, + "grad_norm": 0.09646889567375183, + "learning_rate": 9.982446974344561e-06, + "loss": 0.893, + "step": 590 + }, + { + "ce_ib": 7.601869583129883, + "ce_orig": 0.4696982204914093, + "epoch": 0.16967431159680782, + "kl_loss": 1.1903643608093262, + "loss_ib": 0.019505511969327927, + "step": 590 + }, + { + "ce_ib": 9.697887420654297, + "ce_orig": 0.4679652452468872, + "epoch": 0.16967431159680782, + "kl_loss": 1.1964094638824463, + "loss_ib": 0.02166198194026947, + "step": 590 + }, + { + "ce_ib": 5.316531658172607, + "ce_orig": 0.6131843328475952, + "epoch": 0.16967431159680782, + "kl_loss": 1.1232414245605469, + "loss_ib": 0.016548944637179375, + "step": 590 + }, + { + "ce_ib": 11.569339752197266, + "ce_orig": 0.532830536365509, + "epoch": 0.16967431159680782, + "kl_loss": 1.2179789543151855, + "loss_ib": 0.023749129846692085, + "step": 590 + }, + { + "ce_ib": 14.859304428100586, + "ce_orig": 1.699377417564392, + "epoch": 0.16996189517578547, + "kl_loss": 1.1641483306884766, + "loss_ib": 0.02650078758597374, + "step": 591 + }, + { + "ce_ib": 12.174185752868652, + "ce_orig": 1.0933891534805298, + "epoch": 0.16996189517578547, + "kl_loss": 1.205374002456665, + "loss_ib": 0.024227924644947052, + "step": 591 + }, + { + "ce_ib": 12.189451217651367, + "ce_orig": 1.1713759899139404, + "epoch": 0.16996189517578547, + "kl_loss": 1.1780178546905518, + "loss_ib": 0.023969629779458046, + "step": 591 + }, + { + "ce_ib": 6.584260940551758, + "ce_orig": 0.531925618648529, + "epoch": 0.16996189517578547, + "kl_loss": 1.1629016399383545, + "loss_ib": 0.01821327582001686, + "step": 591 + }, + { + "ce_ib": 10.2810697555542, + "ce_orig": 1.2692686319351196, + "epoch": 0.1702494787547631, + "kl_loss": 1.1622142791748047, + "loss_ib": 0.02190321311354637, + "step": 592 + }, + { + "ce_ib": 8.695658683776855, + "ce_orig": 1.1439961194992065, + "epoch": 0.1702494787547631, + "kl_loss": 1.2565195560455322, + "loss_ib": 0.021260853856801987, + "step": 592 + }, + { + "ce_ib": 12.925812721252441, + "ce_orig": 1.113416314125061, + "epoch": 0.1702494787547631, + "kl_loss": 1.1489940881729126, + "loss_ib": 0.02441575564444065, + "step": 592 + }, + { + "ce_ib": 8.097929954528809, + "ce_orig": 0.3692007064819336, + "epoch": 0.1702494787547631, + "kl_loss": 1.2639890909194946, + "loss_ib": 0.020737819373607635, + "step": 592 + }, + { + "ce_ib": 12.470227241516113, + "ce_orig": 1.292494535446167, + "epoch": 0.17053706233374075, + "kl_loss": 1.2025644779205322, + "loss_ib": 0.024495873600244522, + "step": 593 + }, + { + "ce_ib": 8.987156867980957, + "ce_orig": 0.7146295309066772, + "epoch": 0.17053706233374075, + "kl_loss": 1.148221492767334, + "loss_ib": 0.02046937122941017, + "step": 593 + }, + { + "ce_ib": 8.649067878723145, + "ce_orig": 0.6290068626403809, + "epoch": 0.17053706233374075, + "kl_loss": 1.1626759767532349, + "loss_ib": 0.020275825634598732, + "step": 593 + }, + { + "ce_ib": 9.381182670593262, + "ce_orig": 0.8150485157966614, + "epoch": 0.17053706233374075, + "kl_loss": 1.1636794805526733, + "loss_ib": 0.021017977967858315, + "step": 593 + }, + { + "ce_ib": 13.075675010681152, + "ce_orig": 1.3097158670425415, + "epoch": 0.17082464591271837, + "kl_loss": 1.1714305877685547, + "loss_ib": 0.024789981544017792, + "step": 594 + }, + { + "ce_ib": 16.90681266784668, + "ce_orig": 1.63013756275177, + "epoch": 0.17082464591271837, + "kl_loss": 1.1639704704284668, + "loss_ib": 0.028546517714858055, + "step": 594 + }, + { + "ce_ib": 9.945943832397461, + "ce_orig": 0.620954692363739, + "epoch": 0.17082464591271837, + "kl_loss": 1.1276791095733643, + "loss_ib": 0.021222734823822975, + "step": 594 + }, + { + "ce_ib": 7.538568496704102, + "ce_orig": 0.7477220296859741, + "epoch": 0.17082464591271837, + "kl_loss": 1.1129953861236572, + "loss_ib": 0.01866852305829525, + "step": 594 + }, + { + "epoch": 0.17111222949169602, + "grad_norm": 0.0950080156326294, + "learning_rate": 9.981791241257001e-06, + "loss": 0.8499, + "step": 595 + }, + { + "ce_ib": 9.64914321899414, + "ce_orig": 0.8221632242202759, + "epoch": 0.17111222949169602, + "kl_loss": 1.2603493928909302, + "loss_ib": 0.022252636030316353, + "step": 595 + }, + { + "ce_ib": 10.428970336914062, + "ce_orig": 0.9476694464683533, + "epoch": 0.17111222949169602, + "kl_loss": 1.2723057270050049, + "loss_ib": 0.023152027279138565, + "step": 595 + }, + { + "ce_ib": 11.565075874328613, + "ce_orig": 0.7016614675521851, + "epoch": 0.17111222949169602, + "kl_loss": 1.214961290359497, + "loss_ib": 0.023714689537882805, + "step": 595 + }, + { + "ce_ib": 7.760899066925049, + "ce_orig": 0.655583918094635, + "epoch": 0.17111222949169602, + "kl_loss": 1.1367559432983398, + "loss_ib": 0.019128458574414253, + "step": 595 + }, + { + "ce_ib": 13.004273414611816, + "ce_orig": 1.171642541885376, + "epoch": 0.17139981307067367, + "kl_loss": 1.1165239810943604, + "loss_ib": 0.02416951209306717, + "step": 596 + }, + { + "ce_ib": 8.871601104736328, + "ce_orig": 0.6587203145027161, + "epoch": 0.17139981307067367, + "kl_loss": 1.1157963275909424, + "loss_ib": 0.020029563456773758, + "step": 596 + }, + { + "ce_ib": 8.543595314025879, + "ce_orig": 0.680071234703064, + "epoch": 0.17139981307067367, + "kl_loss": 1.2704293727874756, + "loss_ib": 0.02124788984656334, + "step": 596 + }, + { + "ce_ib": 12.145650863647461, + "ce_orig": 0.9769390821456909, + "epoch": 0.17139981307067367, + "kl_loss": 1.1289055347442627, + "loss_ib": 0.023434706032276154, + "step": 596 + }, + { + "ce_ib": 11.884757041931152, + "ce_orig": 0.7717639803886414, + "epoch": 0.1716873966496513, + "kl_loss": 1.1151626110076904, + "loss_ib": 0.02303638495504856, + "step": 597 + }, + { + "ce_ib": 13.524641036987305, + "ce_orig": 1.0390403270721436, + "epoch": 0.1716873966496513, + "kl_loss": 1.1430152654647827, + "loss_ib": 0.024954792112112045, + "step": 597 + }, + { + "ce_ib": 10.59398365020752, + "ce_orig": 1.0760669708251953, + "epoch": 0.1716873966496513, + "kl_loss": 1.267032265663147, + "loss_ib": 0.023264307528734207, + "step": 597 + }, + { + "ce_ib": 9.16975212097168, + "ce_orig": 0.7946443557739258, + "epoch": 0.1716873966496513, + "kl_loss": 1.332892894744873, + "loss_ib": 0.022498680278658867, + "step": 597 + }, + { + "ce_ib": 15.392538070678711, + "ce_orig": 1.7411152124404907, + "epoch": 0.17197498022862895, + "kl_loss": 1.1676359176635742, + "loss_ib": 0.02706889621913433, + "step": 598 + }, + { + "ce_ib": 10.269882202148438, + "ce_orig": 0.9134522676467896, + "epoch": 0.17197498022862895, + "kl_loss": 1.14839768409729, + "loss_ib": 0.021753858774900436, + "step": 598 + }, + { + "ce_ib": 8.251395225524902, + "ce_orig": 0.6777662634849548, + "epoch": 0.17197498022862895, + "kl_loss": 1.0949749946594238, + "loss_ib": 0.019201144576072693, + "step": 598 + }, + { + "ce_ib": 7.8162455558776855, + "ce_orig": 0.5873789191246033, + "epoch": 0.17197498022862895, + "kl_loss": 1.1189738512039185, + "loss_ib": 0.01900598406791687, + "step": 598 + }, + { + "ce_ib": 8.662796974182129, + "ce_orig": 0.919073760509491, + "epoch": 0.17226256380760657, + "kl_loss": 1.1068514585494995, + "loss_ib": 0.01973131112754345, + "step": 599 + }, + { + "ce_ib": 11.996581077575684, + "ce_orig": 1.1948639154434204, + "epoch": 0.17226256380760657, + "kl_loss": 1.1755080223083496, + "loss_ib": 0.02375166118144989, + "step": 599 + }, + { + "ce_ib": 12.551753044128418, + "ce_orig": 1.4086652994155884, + "epoch": 0.17226256380760657, + "kl_loss": 1.1488478183746338, + "loss_ib": 0.024040231481194496, + "step": 599 + }, + { + "ce_ib": 12.489664077758789, + "ce_orig": 1.4374010562896729, + "epoch": 0.17226256380760657, + "kl_loss": 1.163723111152649, + "loss_ib": 0.024126896634697914, + "step": 599 + }, + { + "epoch": 0.17255014738658422, + "grad_norm": 0.08578155934810638, + "learning_rate": 9.98112350587804e-06, + "loss": 0.8373, + "step": 600 + }, + { + "ce_ib": 14.228236198425293, + "ce_orig": 1.494513750076294, + "epoch": 0.17255014738658422, + "kl_loss": 1.1346684694290161, + "loss_ib": 0.025574922561645508, + "step": 600 + }, + { + "ce_ib": 10.47590160369873, + "ce_orig": 0.6758707165718079, + "epoch": 0.17255014738658422, + "kl_loss": 1.149749755859375, + "loss_ib": 0.021973399445414543, + "step": 600 + }, + { + "ce_ib": 9.56252670288086, + "ce_orig": 0.6861965656280518, + "epoch": 0.17255014738658422, + "kl_loss": 1.146235704421997, + "loss_ib": 0.021024884656071663, + "step": 600 + }, + { + "ce_ib": 8.769157409667969, + "ce_orig": 0.6426496505737305, + "epoch": 0.17255014738658422, + "kl_loss": 1.1051974296569824, + "loss_ib": 0.019821131601929665, + "step": 600 + }, + { + "ce_ib": 6.642595291137695, + "ce_orig": 0.5588423013687134, + "epoch": 0.17283773096556188, + "kl_loss": 1.1010701656341553, + "loss_ib": 0.017653297632932663, + "step": 601 + }, + { + "ce_ib": 10.748079299926758, + "ce_orig": 0.837460458278656, + "epoch": 0.17283773096556188, + "kl_loss": 1.1604368686676025, + "loss_ib": 0.022352447733283043, + "step": 601 + }, + { + "ce_ib": 8.990370750427246, + "ce_orig": 0.8383088707923889, + "epoch": 0.17283773096556188, + "kl_loss": 1.1006824970245361, + "loss_ib": 0.01999719627201557, + "step": 601 + }, + { + "ce_ib": 9.197964668273926, + "ce_orig": 0.686692476272583, + "epoch": 0.17283773096556188, + "kl_loss": 1.1443568468093872, + "loss_ib": 0.02064153179526329, + "step": 601 + }, + { + "ce_ib": 8.66501522064209, + "ce_orig": 0.525370180606842, + "epoch": 0.1731253145445395, + "kl_loss": 1.1227505207061768, + "loss_ib": 0.01989252120256424, + "step": 602 + }, + { + "ce_ib": 11.858187675476074, + "ce_orig": 0.8876397013664246, + "epoch": 0.1731253145445395, + "kl_loss": 1.1747143268585205, + "loss_ib": 0.023605331778526306, + "step": 602 + }, + { + "ce_ib": 6.1840105056762695, + "ce_orig": 0.6205452680587769, + "epoch": 0.1731253145445395, + "kl_loss": 1.0966415405273438, + "loss_ib": 0.017150426283478737, + "step": 602 + }, + { + "ce_ib": 11.21971607208252, + "ce_orig": 0.9764306545257568, + "epoch": 0.1731253145445395, + "kl_loss": 1.116091012954712, + "loss_ib": 0.02238062582910061, + "step": 602 + }, + { + "ce_ib": 6.916097164154053, + "ce_orig": 0.5436660051345825, + "epoch": 0.17341289812351715, + "kl_loss": 1.1902745962142944, + "loss_ib": 0.018818842247128487, + "step": 603 + }, + { + "ce_ib": 8.293400764465332, + "ce_orig": 0.8351038694381714, + "epoch": 0.17341289812351715, + "kl_loss": 1.1848680973052979, + "loss_ib": 0.0201420821249485, + "step": 603 + }, + { + "ce_ib": 6.613530158996582, + "ce_orig": 0.18875178694725037, + "epoch": 0.17341289812351715, + "kl_loss": 1.1903159618377686, + "loss_ib": 0.01851668953895569, + "step": 603 + }, + { + "ce_ib": 11.624032020568848, + "ce_orig": 1.285302758216858, + "epoch": 0.17341289812351715, + "kl_loss": 1.1025352478027344, + "loss_ib": 0.022649383172392845, + "step": 603 + }, + { + "ce_ib": 15.579438209533691, + "ce_orig": 1.5998573303222656, + "epoch": 0.17370048170249477, + "kl_loss": 1.2801098823547363, + "loss_ib": 0.028380535542964935, + "step": 604 + }, + { + "ce_ib": 12.44752025604248, + "ce_orig": 1.4038703441619873, + "epoch": 0.17370048170249477, + "kl_loss": 1.1902649402618408, + "loss_ib": 0.02435017004609108, + "step": 604 + }, + { + "ce_ib": 10.84005355834961, + "ce_orig": 0.6248370409011841, + "epoch": 0.17370048170249477, + "kl_loss": 1.1507587432861328, + "loss_ib": 0.022347640246152878, + "step": 604 + }, + { + "ce_ib": 8.29090404510498, + "ce_orig": 0.7321332097053528, + "epoch": 0.17370048170249477, + "kl_loss": 1.1337003707885742, + "loss_ib": 0.019627906382083893, + "step": 604 + }, + { + "epoch": 0.17398806528147243, + "grad_norm": 0.08757986128330231, + "learning_rate": 9.980443769816412e-06, + "loss": 0.8879, + "step": 605 + }, + { + "ce_ib": 16.679454803466797, + "ce_orig": 1.7097995281219482, + "epoch": 0.17398806528147243, + "kl_loss": 1.1676082611083984, + "loss_ib": 0.028355535119771957, + "step": 605 + }, + { + "ce_ib": 8.775374412536621, + "ce_orig": 1.0261955261230469, + "epoch": 0.17398806528147243, + "kl_loss": 1.0867377519607544, + "loss_ib": 0.019642751663923264, + "step": 605 + }, + { + "ce_ib": 9.250091552734375, + "ce_orig": 0.7789836525917053, + "epoch": 0.17398806528147243, + "kl_loss": 1.149935007095337, + "loss_ib": 0.020749442279338837, + "step": 605 + }, + { + "ce_ib": 9.42496395111084, + "ce_orig": 0.8384370803833008, + "epoch": 0.17398806528147243, + "kl_loss": 1.1811635494232178, + "loss_ib": 0.02123660035431385, + "step": 605 + }, + { + "ce_ib": 15.161707878112793, + "ce_orig": 2.020648717880249, + "epoch": 0.17427564886045008, + "kl_loss": 1.1757433414459229, + "loss_ib": 0.026919139549136162, + "step": 606 + }, + { + "ce_ib": 4.969954013824463, + "ce_orig": 0.5350307822227478, + "epoch": 0.17427564886045008, + "kl_loss": 1.0870544910430908, + "loss_ib": 0.015840498730540276, + "step": 606 + }, + { + "ce_ib": 10.833635330200195, + "ce_orig": 1.334272861480713, + "epoch": 0.17427564886045008, + "kl_loss": 1.116763949394226, + "loss_ib": 0.022001275792717934, + "step": 606 + }, + { + "ce_ib": 13.298807144165039, + "ce_orig": 1.2746291160583496, + "epoch": 0.17427564886045008, + "kl_loss": 1.1182126998901367, + "loss_ib": 0.02448093332350254, + "step": 606 + }, + { + "ce_ib": 8.028116226196289, + "ce_orig": 0.2780713737010956, + "epoch": 0.1745632324394277, + "kl_loss": 1.3144466876983643, + "loss_ib": 0.02117258310317993, + "step": 607 + }, + { + "ce_ib": 9.471242904663086, + "ce_orig": 0.8981790542602539, + "epoch": 0.1745632324394277, + "kl_loss": 1.1512048244476318, + "loss_ib": 0.020983289927244186, + "step": 607 + }, + { + "ce_ib": 11.815406799316406, + "ce_orig": 0.8096030950546265, + "epoch": 0.1745632324394277, + "kl_loss": 1.0907784700393677, + "loss_ib": 0.02272319234907627, + "step": 607 + }, + { + "ce_ib": 8.835672378540039, + "ce_orig": 0.8250407576560974, + "epoch": 0.1745632324394277, + "kl_loss": 1.0707385540008545, + "loss_ib": 0.019543059170246124, + "step": 607 + }, + { + "ce_ib": 14.99797248840332, + "ce_orig": 1.081514835357666, + "epoch": 0.17485081601840535, + "kl_loss": 1.3153797388076782, + "loss_ib": 0.028151769191026688, + "step": 608 + }, + { + "ce_ib": 13.794142723083496, + "ce_orig": 1.310433268547058, + "epoch": 0.17485081601840535, + "kl_loss": 1.173295497894287, + "loss_ib": 0.025527097284793854, + "step": 608 + }, + { + "ce_ib": 6.75504732131958, + "ce_orig": 0.7089630365371704, + "epoch": 0.17485081601840535, + "kl_loss": 1.1751198768615723, + "loss_ib": 0.01850624568760395, + "step": 608 + }, + { + "ce_ib": 10.636677742004395, + "ce_orig": 0.9004390239715576, + "epoch": 0.17485081601840535, + "kl_loss": 1.1462738513946533, + "loss_ib": 0.022099414840340614, + "step": 608 + }, + { + "ce_ib": 16.850482940673828, + "ce_orig": 1.1504744291305542, + "epoch": 0.17513839959738298, + "kl_loss": 1.2235054969787598, + "loss_ib": 0.029085537418723106, + "step": 609 + }, + { + "ce_ib": 13.915315628051758, + "ce_orig": 0.9233617782592773, + "epoch": 0.17513839959738298, + "kl_loss": 1.2110941410064697, + "loss_ib": 0.026026258245110512, + "step": 609 + }, + { + "ce_ib": 11.495623588562012, + "ce_orig": 0.9102531671524048, + "epoch": 0.17513839959738298, + "kl_loss": 1.1143162250518799, + "loss_ib": 0.02263878472149372, + "step": 609 + }, + { + "ce_ib": 11.01639175415039, + "ce_orig": 1.2989511489868164, + "epoch": 0.17513839959738298, + "kl_loss": 1.1308627128601074, + "loss_ib": 0.022325018420815468, + "step": 609 + }, + { + "epoch": 0.17542598317636063, + "grad_norm": 0.09487691521644592, + "learning_rate": 9.979752034709756e-06, + "loss": 0.943, + "step": 610 + }, + { + "ce_ib": 9.711048126220703, + "ce_orig": 0.9693747162818909, + "epoch": 0.17542598317636063, + "kl_loss": 1.074932336807251, + "loss_ib": 0.020460370928049088, + "step": 610 + }, + { + "ce_ib": 14.960775375366211, + "ce_orig": 1.6066879034042358, + "epoch": 0.17542598317636063, + "kl_loss": 1.1578710079193115, + "loss_ib": 0.026539484038949013, + "step": 610 + }, + { + "ce_ib": 11.009191513061523, + "ce_orig": 0.753725528717041, + "epoch": 0.17542598317636063, + "kl_loss": 1.1147217750549316, + "loss_ib": 0.022156409919261932, + "step": 610 + }, + { + "ce_ib": 7.617627143859863, + "ce_orig": 0.5374597311019897, + "epoch": 0.17542598317636063, + "kl_loss": 1.0917261838912964, + "loss_ib": 0.018534889444708824, + "step": 610 + }, + { + "ce_ib": 9.379159927368164, + "ce_orig": 0.839108943939209, + "epoch": 0.17571356675533828, + "kl_loss": 1.1200788021087646, + "loss_ib": 0.020579947158694267, + "step": 611 + }, + { + "ce_ib": 9.289336204528809, + "ce_orig": 0.5786874890327454, + "epoch": 0.17571356675533828, + "kl_loss": 1.251037836074829, + "loss_ib": 0.021799713373184204, + "step": 611 + }, + { + "ce_ib": 6.256180286407471, + "ce_orig": 0.496167927980423, + "epoch": 0.17571356675533828, + "kl_loss": 1.1585159301757812, + "loss_ib": 0.017841339111328125, + "step": 611 + }, + { + "ce_ib": 15.044721603393555, + "ce_orig": 1.532287836074829, + "epoch": 0.17571356675533828, + "kl_loss": 1.1178569793701172, + "loss_ib": 0.02622329257428646, + "step": 611 + }, + { + "ce_ib": 8.996905326843262, + "ce_orig": 0.9385986328125, + "epoch": 0.1760011503343159, + "kl_loss": 1.0947903394699097, + "loss_ib": 0.0199448075145483, + "step": 612 + }, + { + "ce_ib": 13.072503089904785, + "ce_orig": 0.988423764705658, + "epoch": 0.1760011503343159, + "kl_loss": 1.1813626289367676, + "loss_ib": 0.024886131286621094, + "step": 612 + }, + { + "ce_ib": 8.529322624206543, + "ce_orig": 0.733309805393219, + "epoch": 0.1760011503343159, + "kl_loss": 1.091768741607666, + "loss_ib": 0.01944701001048088, + "step": 612 + }, + { + "ce_ib": 12.893845558166504, + "ce_orig": 1.074021339416504, + "epoch": 0.1760011503343159, + "kl_loss": 1.106937050819397, + "loss_ib": 0.023963216692209244, + "step": 612 + }, + { + "ce_ib": 12.352333068847656, + "ce_orig": 0.9047135710716248, + "epoch": 0.17628873391329355, + "kl_loss": 1.1527860164642334, + "loss_ib": 0.023880193009972572, + "step": 613 + }, + { + "ce_ib": 11.772270202636719, + "ce_orig": 0.8379567265510559, + "epoch": 0.17628873391329355, + "kl_loss": 1.1471657752990723, + "loss_ib": 0.02324392832815647, + "step": 613 + }, + { + "ce_ib": 8.202688217163086, + "ce_orig": 0.5956844091415405, + "epoch": 0.17628873391329355, + "kl_loss": 1.133353352546692, + "loss_ib": 0.019536221399903297, + "step": 613 + }, + { + "ce_ib": 13.991854667663574, + "ce_orig": 1.4801995754241943, + "epoch": 0.17628873391329355, + "kl_loss": 1.1663241386413574, + "loss_ib": 0.025655098259449005, + "step": 613 + }, + { + "ce_ib": 9.825779914855957, + "ce_orig": 0.581002950668335, + "epoch": 0.17657631749227118, + "kl_loss": 1.0918254852294922, + "loss_ib": 0.02074403502047062, + "step": 614 + }, + { + "ce_ib": 9.005266189575195, + "ce_orig": 0.6865787506103516, + "epoch": 0.17657631749227118, + "kl_loss": 1.0758470296859741, + "loss_ib": 0.019763736054301262, + "step": 614 + }, + { + "ce_ib": 8.376243591308594, + "ce_orig": 0.7356083393096924, + "epoch": 0.17657631749227118, + "kl_loss": 1.0604243278503418, + "loss_ib": 0.018980486318469048, + "step": 614 + }, + { + "ce_ib": 10.727423667907715, + "ce_orig": 0.5603511333465576, + "epoch": 0.17657631749227118, + "kl_loss": 1.1472487449645996, + "loss_ib": 0.02219991199672222, + "step": 614 + }, + { + "epoch": 0.17686390107124883, + "grad_norm": 0.09426393359899521, + "learning_rate": 9.979048302224624e-06, + "loss": 0.8892, + "step": 615 + }, + { + "ce_ib": 13.396039962768555, + "ce_orig": 1.0348241329193115, + "epoch": 0.17686390107124883, + "kl_loss": 1.1215533018112183, + "loss_ib": 0.024611571803689003, + "step": 615 + }, + { + "ce_ib": 13.98047924041748, + "ce_orig": 0.8722472190856934, + "epoch": 0.17686390107124883, + "kl_loss": 1.12811279296875, + "loss_ib": 0.02526160702109337, + "step": 615 + }, + { + "ce_ib": 6.756248950958252, + "ce_orig": 0.654313862323761, + "epoch": 0.17686390107124883, + "kl_loss": 1.0764236450195312, + "loss_ib": 0.017520485445857048, + "step": 615 + }, + { + "ce_ib": 7.255735397338867, + "ce_orig": 0.381597638130188, + "epoch": 0.17686390107124883, + "kl_loss": 1.1145398616790771, + "loss_ib": 0.01840113289654255, + "step": 615 + }, + { + "ce_ib": 11.688261032104492, + "ce_orig": 0.495026558637619, + "epoch": 0.17715148465022648, + "kl_loss": 1.1114025115966797, + "loss_ib": 0.022802285850048065, + "step": 616 + }, + { + "ce_ib": 9.351430892944336, + "ce_orig": 0.6223934292793274, + "epoch": 0.17715148465022648, + "kl_loss": 1.0816258192062378, + "loss_ib": 0.02016768977046013, + "step": 616 + }, + { + "ce_ib": 7.122152328491211, + "ce_orig": 0.6948713660240173, + "epoch": 0.17715148465022648, + "kl_loss": 1.10568106174469, + "loss_ib": 0.018178964033722878, + "step": 616 + }, + { + "ce_ib": 6.703441619873047, + "ce_orig": 0.7388578653335571, + "epoch": 0.17715148465022648, + "kl_loss": 1.0877346992492676, + "loss_ib": 0.0175807885825634, + "step": 616 + }, + { + "ce_ib": 7.440734386444092, + "ce_orig": 0.5273018479347229, + "epoch": 0.1774390682292041, + "kl_loss": 1.0771890878677368, + "loss_ib": 0.01821262575685978, + "step": 617 + }, + { + "ce_ib": 7.850268363952637, + "ce_orig": 0.6726818680763245, + "epoch": 0.1774390682292041, + "kl_loss": 1.0832056999206543, + "loss_ib": 0.01868232525885105, + "step": 617 + }, + { + "ce_ib": 11.564708709716797, + "ce_orig": 0.9945627450942993, + "epoch": 0.1774390682292041, + "kl_loss": 1.079153299331665, + "loss_ib": 0.022356241941452026, + "step": 617 + }, + { + "ce_ib": 9.48259449005127, + "ce_orig": 1.3080958127975464, + "epoch": 0.1774390682292041, + "kl_loss": 1.0585891008377075, + "loss_ib": 0.020068485289812088, + "step": 617 + }, + { + "ce_ib": 6.543670654296875, + "ce_orig": 0.6545018553733826, + "epoch": 0.17772665180818176, + "kl_loss": 1.0263237953186035, + "loss_ib": 0.016806907951831818, + "step": 618 + }, + { + "ce_ib": 14.908156394958496, + "ce_orig": 1.3185038566589355, + "epoch": 0.17772665180818176, + "kl_loss": 1.0666757822036743, + "loss_ib": 0.02557491324841976, + "step": 618 + }, + { + "ce_ib": 7.1459832191467285, + "ce_orig": 0.6320990920066833, + "epoch": 0.17772665180818176, + "kl_loss": 1.0741521120071411, + "loss_ib": 0.017887502908706665, + "step": 618 + }, + { + "ce_ib": 13.496872901916504, + "ce_orig": 1.4044506549835205, + "epoch": 0.17772665180818176, + "kl_loss": 1.0919251441955566, + "loss_ib": 0.0244161244481802, + "step": 618 + }, + { + "ce_ib": 11.312828063964844, + "ce_orig": 0.8793609738349915, + "epoch": 0.17801423538715938, + "kl_loss": 1.1443016529083252, + "loss_ib": 0.02275584451854229, + "step": 619 + }, + { + "ce_ib": 6.740575790405273, + "ce_orig": 0.5719377398490906, + "epoch": 0.17801423538715938, + "kl_loss": 1.0880985260009766, + "loss_ib": 0.017621560022234917, + "step": 619 + }, + { + "ce_ib": 5.965404033660889, + "ce_orig": 0.7937454581260681, + "epoch": 0.17801423538715938, + "kl_loss": 1.054682970046997, + "loss_ib": 0.016512233763933182, + "step": 619 + }, + { + "ce_ib": 12.703347206115723, + "ce_orig": 0.7259251475334167, + "epoch": 0.17801423538715938, + "kl_loss": 1.1318557262420654, + "loss_ib": 0.024021903052926064, + "step": 619 + }, + { + "epoch": 0.17830181896613703, + "grad_norm": 0.10790643841028214, + "learning_rate": 9.978332574056468e-06, + "loss": 0.8558, + "step": 620 + }, + { + "ce_ib": 7.719181537628174, + "ce_orig": 0.7105019092559814, + "epoch": 0.17830181896613703, + "kl_loss": 1.0935070514678955, + "loss_ib": 0.018654251471161842, + "step": 620 + }, + { + "ce_ib": 7.3571600914001465, + "ce_orig": 0.46813029050827026, + "epoch": 0.17830181896613703, + "kl_loss": 1.1948972940444946, + "loss_ib": 0.019306132569909096, + "step": 620 + }, + { + "ce_ib": 15.235627174377441, + "ce_orig": 1.8518890142440796, + "epoch": 0.17830181896613703, + "kl_loss": 1.1301660537719727, + "loss_ib": 0.02653728984296322, + "step": 620 + }, + { + "ce_ib": 9.883748054504395, + "ce_orig": 0.7214540839195251, + "epoch": 0.17830181896613703, + "kl_loss": 1.0958552360534668, + "loss_ib": 0.0208422988653183, + "step": 620 + }, + { + "ce_ib": 9.291598320007324, + "ce_orig": 0.8704307079315186, + "epoch": 0.17858940254511468, + "kl_loss": 1.1199560165405273, + "loss_ib": 0.020491158589720726, + "step": 621 + }, + { + "ce_ib": 7.236078262329102, + "ce_orig": 0.6710290312767029, + "epoch": 0.17858940254511468, + "kl_loss": 1.0643848180770874, + "loss_ib": 0.017879927530884743, + "step": 621 + }, + { + "ce_ib": 10.731121063232422, + "ce_orig": 0.8992137908935547, + "epoch": 0.17858940254511468, + "kl_loss": 1.0887260437011719, + "loss_ib": 0.021618379279971123, + "step": 621 + }, + { + "ce_ib": 6.691609859466553, + "ce_orig": 0.23901374638080597, + "epoch": 0.17858940254511468, + "kl_loss": 1.2286667823791504, + "loss_ib": 0.01897827908396721, + "step": 621 + }, + { + "ce_ib": 10.243730545043945, + "ce_orig": 0.6851696968078613, + "epoch": 0.1788769861240923, + "kl_loss": 1.1422102451324463, + "loss_ib": 0.02166583202779293, + "step": 622 + }, + { + "ce_ib": 9.9014892578125, + "ce_orig": 0.861434280872345, + "epoch": 0.1788769861240923, + "kl_loss": 1.1246778964996338, + "loss_ib": 0.02114826813340187, + "step": 622 + }, + { + "ce_ib": 9.937746047973633, + "ce_orig": 0.7701823711395264, + "epoch": 0.1788769861240923, + "kl_loss": 1.0272612571716309, + "loss_ib": 0.02021035924553871, + "step": 622 + }, + { + "ce_ib": 11.04038143157959, + "ce_orig": 1.1329587697982788, + "epoch": 0.1788769861240923, + "kl_loss": 1.0876765251159668, + "loss_ib": 0.021917147561907768, + "step": 622 + }, + { + "ce_ib": 9.774174690246582, + "ce_orig": 0.6889547109603882, + "epoch": 0.17916456970306996, + "kl_loss": 1.1281490325927734, + "loss_ib": 0.021055664867162704, + "step": 623 + }, + { + "ce_ib": 8.301161766052246, + "ce_orig": 0.9744248390197754, + "epoch": 0.17916456970306996, + "kl_loss": 1.1025155782699585, + "loss_ib": 0.019326316192746162, + "step": 623 + }, + { + "ce_ib": 11.626615524291992, + "ce_orig": 1.0212253332138062, + "epoch": 0.17916456970306996, + "kl_loss": 1.1386442184448242, + "loss_ib": 0.023013057187199593, + "step": 623 + }, + { + "ce_ib": 9.228303909301758, + "ce_orig": 0.9091984629631042, + "epoch": 0.17916456970306996, + "kl_loss": 1.0976030826568604, + "loss_ib": 0.02020433358848095, + "step": 623 + }, + { + "ce_ib": 12.572793960571289, + "ce_orig": 1.365990161895752, + "epoch": 0.17945215328204758, + "kl_loss": 1.0628557205200195, + "loss_ib": 0.0232013501226902, + "step": 624 + }, + { + "ce_ib": 6.557443141937256, + "ce_orig": 0.38862401247024536, + "epoch": 0.17945215328204758, + "kl_loss": 1.0641001462936401, + "loss_ib": 0.01719844341278076, + "step": 624 + }, + { + "ce_ib": 11.218859672546387, + "ce_orig": 1.0366816520690918, + "epoch": 0.17945215328204758, + "kl_loss": 1.0765401124954224, + "loss_ib": 0.021984262391924858, + "step": 624 + }, + { + "ce_ib": 13.108721733093262, + "ce_orig": 1.2099130153656006, + "epoch": 0.17945215328204758, + "kl_loss": 1.0831751823425293, + "loss_ib": 0.023940471932291985, + "step": 624 + }, + { + "epoch": 0.17973973686102523, + "grad_norm": 0.0922677144408226, + "learning_rate": 9.977604851929648e-06, + "loss": 0.9102, + "step": 625 + }, + { + "ce_ib": 8.263596534729004, + "ce_orig": 0.6577824354171753, + "epoch": 0.17973973686102523, + "kl_loss": 1.1678799390792847, + "loss_ib": 0.019942395389080048, + "step": 625 + }, + { + "ce_ib": 7.4331560134887695, + "ce_orig": 0.6097143888473511, + "epoch": 0.17973973686102523, + "kl_loss": 1.0634379386901855, + "loss_ib": 0.018067536875605583, + "step": 625 + }, + { + "ce_ib": 9.178628921508789, + "ce_orig": 0.8557302355766296, + "epoch": 0.17973973686102523, + "kl_loss": 1.1199026107788086, + "loss_ib": 0.02037765458226204, + "step": 625 + }, + { + "ce_ib": 8.077595710754395, + "ce_orig": 0.7694026231765747, + "epoch": 0.17973973686102523, + "kl_loss": 0.9992647171020508, + "loss_ib": 0.018070241436362267, + "step": 625 + }, + { + "ce_ib": 14.997899055480957, + "ce_orig": 1.3085542917251587, + "epoch": 0.18002732044000289, + "kl_loss": 1.0909315347671509, + "loss_ib": 0.025907214730978012, + "step": 626 + }, + { + "ce_ib": 14.065059661865234, + "ce_orig": 1.3615686893463135, + "epoch": 0.18002732044000289, + "kl_loss": 1.1272248029708862, + "loss_ib": 0.025337306782603264, + "step": 626 + }, + { + "ce_ib": 9.213851928710938, + "ce_orig": 0.4737341105937958, + "epoch": 0.18002732044000289, + "kl_loss": 1.1103893518447876, + "loss_ib": 0.020317744463682175, + "step": 626 + }, + { + "ce_ib": 10.141777992248535, + "ce_orig": 1.0340423583984375, + "epoch": 0.18002732044000289, + "kl_loss": 1.1753777265548706, + "loss_ib": 0.021895555779337883, + "step": 626 + }, + { + "ce_ib": 10.606011390686035, + "ce_orig": 0.5892968773841858, + "epoch": 0.1803149040189805, + "kl_loss": 1.1311826705932617, + "loss_ib": 0.021917838603258133, + "step": 627 + }, + { + "ce_ib": 8.124170303344727, + "ce_orig": 0.42015740275382996, + "epoch": 0.1803149040189805, + "kl_loss": 1.108632206916809, + "loss_ib": 0.019210491329431534, + "step": 627 + }, + { + "ce_ib": 10.349467277526855, + "ce_orig": 0.7760807275772095, + "epoch": 0.1803149040189805, + "kl_loss": 1.0804953575134277, + "loss_ib": 0.02115442231297493, + "step": 627 + }, + { + "ce_ib": 8.495221138000488, + "ce_orig": 0.5720870494842529, + "epoch": 0.1803149040189805, + "kl_loss": 1.1552854776382446, + "loss_ib": 0.020048074424266815, + "step": 627 + }, + { + "ce_ib": 13.331109046936035, + "ce_orig": 1.5061442852020264, + "epoch": 0.18060248759795816, + "kl_loss": 1.1464059352874756, + "loss_ib": 0.0247951690107584, + "step": 628 + }, + { + "ce_ib": 11.41611099243164, + "ce_orig": 1.1628519296646118, + "epoch": 0.18060248759795816, + "kl_loss": 1.0600998401641846, + "loss_ib": 0.022017108276486397, + "step": 628 + }, + { + "ce_ib": 12.698983192443848, + "ce_orig": 1.187642216682434, + "epoch": 0.18060248759795816, + "kl_loss": 1.1207376718521118, + "loss_ib": 0.02390635944902897, + "step": 628 + }, + { + "ce_ib": 15.049531936645508, + "ce_orig": 1.8772828578948975, + "epoch": 0.18060248759795816, + "kl_loss": 1.087199330329895, + "loss_ib": 0.025921525433659554, + "step": 628 + }, + { + "ce_ib": 8.250436782836914, + "ce_orig": 0.7029029130935669, + "epoch": 0.18089007117693578, + "kl_loss": 1.1021440029144287, + "loss_ib": 0.01927187666296959, + "step": 629 + }, + { + "ce_ib": 13.998225212097168, + "ce_orig": 1.2588441371917725, + "epoch": 0.18089007117693578, + "kl_loss": 1.080070972442627, + "loss_ib": 0.024798937141895294, + "step": 629 + }, + { + "ce_ib": 12.915166854858398, + "ce_orig": 1.296819806098938, + "epoch": 0.18089007117693578, + "kl_loss": 1.1545573472976685, + "loss_ib": 0.024460740387439728, + "step": 629 + }, + { + "ce_ib": 9.678729057312012, + "ce_orig": 0.5961591601371765, + "epoch": 0.18089007117693578, + "kl_loss": 1.220086932182312, + "loss_ib": 0.02187959849834442, + "step": 629 + }, + { + "epoch": 0.18117765475591344, + "grad_norm": 0.0966140478849411, + "learning_rate": 9.97686513759741e-06, + "loss": 0.9272, + "step": 630 + }, + { + "ce_ib": 13.0838623046875, + "ce_orig": 1.260634422302246, + "epoch": 0.18117765475591344, + "kl_loss": 1.1091606616973877, + "loss_ib": 0.02417546696960926, + "step": 630 + }, + { + "ce_ib": 6.934885025024414, + "ce_orig": 0.4972129762172699, + "epoch": 0.18117765475591344, + "kl_loss": 1.0627329349517822, + "loss_ib": 0.01756221428513527, + "step": 630 + }, + { + "ce_ib": 12.18768310546875, + "ce_orig": 1.4976786375045776, + "epoch": 0.18117765475591344, + "kl_loss": 1.0954440832138062, + "loss_ib": 0.023142123594880104, + "step": 630 + }, + { + "ce_ib": 11.109347343444824, + "ce_orig": 1.1459025144577026, + "epoch": 0.18117765475591344, + "kl_loss": 1.054826259613037, + "loss_ib": 0.021657610312104225, + "step": 630 + }, + { + "ce_ib": 10.95541763305664, + "ce_orig": 0.6836705207824707, + "epoch": 0.1814652383348911, + "kl_loss": 1.0429816246032715, + "loss_ib": 0.021385235711932182, + "step": 631 + }, + { + "ce_ib": 10.109951972961426, + "ce_orig": 0.8095191717147827, + "epoch": 0.1814652383348911, + "kl_loss": 1.072096347808838, + "loss_ib": 0.020830916240811348, + "step": 631 + }, + { + "ce_ib": 7.40610408782959, + "ce_orig": 0.6426340341567993, + "epoch": 0.1814652383348911, + "kl_loss": 1.0597965717315674, + "loss_ib": 0.018004069104790688, + "step": 631 + }, + { + "ce_ib": 8.1773681640625, + "ce_orig": 0.9187619090080261, + "epoch": 0.1814652383348911, + "kl_loss": 1.0321749448776245, + "loss_ib": 0.018499117344617844, + "step": 631 + }, + { + "ce_ib": 7.07262659072876, + "ce_orig": 0.8202535510063171, + "epoch": 0.1817528219138687, + "kl_loss": 0.9864460229873657, + "loss_ib": 0.01693708635866642, + "step": 632 + }, + { + "ce_ib": 7.180476188659668, + "ce_orig": 0.4838648736476898, + "epoch": 0.1817528219138687, + "kl_loss": 1.0089023113250732, + "loss_ib": 0.017269499599933624, + "step": 632 + }, + { + "ce_ib": 15.649883270263672, + "ce_orig": 1.6325815916061401, + "epoch": 0.1817528219138687, + "kl_loss": 1.1197898387908936, + "loss_ib": 0.026847781613469124, + "step": 632 + }, + { + "ce_ib": 10.30772590637207, + "ce_orig": 0.8685612082481384, + "epoch": 0.1817528219138687, + "kl_loss": 1.0974705219268799, + "loss_ib": 0.02128242887556553, + "step": 632 + }, + { + "ce_ib": 8.978386878967285, + "ce_orig": 1.0552774667739868, + "epoch": 0.18204040549284636, + "kl_loss": 1.012761116027832, + "loss_ib": 0.019105996936559677, + "step": 633 + }, + { + "ce_ib": 10.887259483337402, + "ce_orig": 1.5764906406402588, + "epoch": 0.18204040549284636, + "kl_loss": 1.0496618747711182, + "loss_ib": 0.021383875980973244, + "step": 633 + }, + { + "ce_ib": 10.534551620483398, + "ce_orig": 1.1252496242523193, + "epoch": 0.18204040549284636, + "kl_loss": 1.0642296075820923, + "loss_ib": 0.02117684856057167, + "step": 633 + }, + { + "ce_ib": 9.471675872802734, + "ce_orig": 0.3606927990913391, + "epoch": 0.18204040549284636, + "kl_loss": 1.1879955530166626, + "loss_ib": 0.021351629868149757, + "step": 633 + }, + { + "ce_ib": 9.976670265197754, + "ce_orig": 1.006685495376587, + "epoch": 0.182327989071824, + "kl_loss": 1.0557842254638672, + "loss_ib": 0.020534511655569077, + "step": 634 + }, + { + "ce_ib": 8.402043342590332, + "ce_orig": 1.0922396183013916, + "epoch": 0.182327989071824, + "kl_loss": 1.023686408996582, + "loss_ib": 0.018638907000422478, + "step": 634 + }, + { + "ce_ib": 10.423868179321289, + "ce_orig": 0.9032129049301147, + "epoch": 0.182327989071824, + "kl_loss": 1.1368285417556763, + "loss_ib": 0.021792152896523476, + "step": 634 + }, + { + "ce_ib": 7.688118934631348, + "ce_orig": 0.46169134974479675, + "epoch": 0.182327989071824, + "kl_loss": 1.095708966255188, + "loss_ib": 0.018645208328962326, + "step": 634 + }, + { + "epoch": 0.18261557265080164, + "grad_norm": 0.09018189460039139, + "learning_rate": 9.976113432841903e-06, + "loss": 0.9332, + "step": 635 + }, + { + "ce_ib": 9.740583419799805, + "ce_orig": 0.7694171667098999, + "epoch": 0.18261557265080164, + "kl_loss": 1.0555506944656372, + "loss_ib": 0.020296089351177216, + "step": 635 + }, + { + "ce_ib": 8.057018280029297, + "ce_orig": 0.48739856481552124, + "epoch": 0.18261557265080164, + "kl_loss": 1.2050068378448486, + "loss_ib": 0.02010708674788475, + "step": 635 + }, + { + "ce_ib": 8.886861801147461, + "ce_orig": 0.4765666127204895, + "epoch": 0.18261557265080164, + "kl_loss": 1.071749210357666, + "loss_ib": 0.019604353234171867, + "step": 635 + }, + { + "ce_ib": 13.106306076049805, + "ce_orig": 0.9705209136009216, + "epoch": 0.18261557265080164, + "kl_loss": 1.1007217168807983, + "loss_ib": 0.024113522842526436, + "step": 635 + }, + { + "ce_ib": 10.954051971435547, + "ce_orig": 1.050213098526001, + "epoch": 0.1829031562297793, + "kl_loss": 1.066004991531372, + "loss_ib": 0.02161410264670849, + "step": 636 + }, + { + "ce_ib": 9.91700553894043, + "ce_orig": 0.680712103843689, + "epoch": 0.1829031562297793, + "kl_loss": 1.132023572921753, + "loss_ib": 0.021237241104245186, + "step": 636 + }, + { + "ce_ib": 8.767101287841797, + "ce_orig": 0.44354522228240967, + "epoch": 0.1829031562297793, + "kl_loss": 1.108346939086914, + "loss_ib": 0.01985057070851326, + "step": 636 + }, + { + "ce_ib": 9.128534317016602, + "ce_orig": 0.3988248407840729, + "epoch": 0.1829031562297793, + "kl_loss": 1.086971402168274, + "loss_ib": 0.019998246803879738, + "step": 636 + }, + { + "ce_ib": 13.582477569580078, + "ce_orig": 1.6579183340072632, + "epoch": 0.1831907398087569, + "kl_loss": 1.0730781555175781, + "loss_ib": 0.02431325800716877, + "step": 637 + }, + { + "ce_ib": 17.059223175048828, + "ce_orig": 1.6747536659240723, + "epoch": 0.1831907398087569, + "kl_loss": 1.1021394729614258, + "loss_ib": 0.028080618008971214, + "step": 637 + }, + { + "ce_ib": 13.071850776672363, + "ce_orig": 1.5738227367401123, + "epoch": 0.1831907398087569, + "kl_loss": 1.1231281757354736, + "loss_ib": 0.0243031308054924, + "step": 637 + }, + { + "ce_ib": 9.79615592956543, + "ce_orig": 1.0085848569869995, + "epoch": 0.1831907398087569, + "kl_loss": 1.0980044603347778, + "loss_ib": 0.02077619917690754, + "step": 637 + }, + { + "ce_ib": 6.036836624145508, + "ce_orig": 0.4214065670967102, + "epoch": 0.18347832338773457, + "kl_loss": 1.0429325103759766, + "loss_ib": 0.016466161236166954, + "step": 638 + }, + { + "ce_ib": 7.146793365478516, + "ce_orig": 0.6582375764846802, + "epoch": 0.18347832338773457, + "kl_loss": 1.0144261121749878, + "loss_ib": 0.017291054129600525, + "step": 638 + }, + { + "ce_ib": 13.084831237792969, + "ce_orig": 1.4547115564346313, + "epoch": 0.18347832338773457, + "kl_loss": 1.0451858043670654, + "loss_ib": 0.023536689579486847, + "step": 638 + }, + { + "ce_ib": 10.764507293701172, + "ce_orig": 1.1796584129333496, + "epoch": 0.18347832338773457, + "kl_loss": 1.077453851699829, + "loss_ib": 0.02153904363512993, + "step": 638 + }, + { + "ce_ib": 4.687036514282227, + "ce_orig": 0.18363694846630096, + "epoch": 0.1837659069667122, + "kl_loss": 1.1889199018478394, + "loss_ib": 0.016576234251260757, + "step": 639 + }, + { + "ce_ib": 10.766698837280273, + "ce_orig": 0.825289785861969, + "epoch": 0.1837659069667122, + "kl_loss": 1.0548924207687378, + "loss_ib": 0.021315621212124825, + "step": 639 + }, + { + "ce_ib": 8.863663673400879, + "ce_orig": 0.8204382061958313, + "epoch": 0.1837659069667122, + "kl_loss": 1.019961953163147, + "loss_ib": 0.019063282757997513, + "step": 639 + }, + { + "ce_ib": 13.531327247619629, + "ce_orig": 1.1665120124816895, + "epoch": 0.1837659069667122, + "kl_loss": 1.0381747484207153, + "loss_ib": 0.023913072422146797, + "step": 639 + }, + { + "epoch": 0.18405349054568984, + "grad_norm": 0.09696701914072037, + "learning_rate": 9.975349739474156e-06, + "loss": 0.8875, + "step": 640 + }, + { + "ce_ib": 7.491870403289795, + "ce_orig": 0.907927930355072, + "epoch": 0.18405349054568984, + "kl_loss": 0.9900725483894348, + "loss_ib": 0.01739259622991085, + "step": 640 + }, + { + "ce_ib": 7.663341999053955, + "ce_orig": 0.5722526907920837, + "epoch": 0.18405349054568984, + "kl_loss": 1.0283775329589844, + "loss_ib": 0.01794711872935295, + "step": 640 + }, + { + "ce_ib": 10.040138244628906, + "ce_orig": 0.7578917145729065, + "epoch": 0.18405349054568984, + "kl_loss": 1.0339435338974, + "loss_ib": 0.020379573106765747, + "step": 640 + }, + { + "ce_ib": 11.124543190002441, + "ce_orig": 1.2751201391220093, + "epoch": 0.18405349054568984, + "kl_loss": 1.0301541090011597, + "loss_ib": 0.021426083520054817, + "step": 640 + }, + { + "ce_ib": 11.990524291992188, + "ce_orig": 1.2356369495391846, + "epoch": 0.1843410741246675, + "kl_loss": 1.1035387516021729, + "loss_ib": 0.023025913164019585, + "step": 641 + }, + { + "ce_ib": 7.862361431121826, + "ce_orig": 0.964101254940033, + "epoch": 0.1843410741246675, + "kl_loss": 1.1734020709991455, + "loss_ib": 0.01959638111293316, + "step": 641 + }, + { + "ce_ib": 9.207853317260742, + "ce_orig": 0.9062885642051697, + "epoch": 0.1843410741246675, + "kl_loss": 1.020951747894287, + "loss_ib": 0.019417371600866318, + "step": 641 + }, + { + "ce_ib": 9.713889122009277, + "ce_orig": 1.302318811416626, + "epoch": 0.1843410741246675, + "kl_loss": 1.1001968383789062, + "loss_ib": 0.020715856924653053, + "step": 641 + }, + { + "ce_ib": 6.821298599243164, + "ce_orig": 0.7004828453063965, + "epoch": 0.18462865770364512, + "kl_loss": 0.9664976596832275, + "loss_ib": 0.01648627407848835, + "step": 642 + }, + { + "ce_ib": 9.508224487304688, + "ce_orig": 0.832171618938446, + "epoch": 0.18462865770364512, + "kl_loss": 1.0534615516662598, + "loss_ib": 0.020042838528752327, + "step": 642 + }, + { + "ce_ib": 7.55943489074707, + "ce_orig": 0.6746954917907715, + "epoch": 0.18462865770364512, + "kl_loss": 1.0909643173217773, + "loss_ib": 0.018469078466296196, + "step": 642 + }, + { + "ce_ib": 7.165235996246338, + "ce_orig": 0.9785995483398438, + "epoch": 0.18462865770364512, + "kl_loss": 1.0123915672302246, + "loss_ib": 0.01728915236890316, + "step": 642 + }, + { + "ce_ib": 10.234342575073242, + "ce_orig": 0.9009053707122803, + "epoch": 0.18491624128262277, + "kl_loss": 1.1004221439361572, + "loss_ib": 0.02123856544494629, + "step": 643 + }, + { + "ce_ib": 8.069269180297852, + "ce_orig": 0.7381336688995361, + "epoch": 0.18491624128262277, + "kl_loss": 1.0298247337341309, + "loss_ib": 0.01836751587688923, + "step": 643 + }, + { + "ce_ib": 10.050087928771973, + "ce_orig": 0.9887573719024658, + "epoch": 0.18491624128262277, + "kl_loss": 1.0247390270233154, + "loss_ib": 0.020297478884458542, + "step": 643 + }, + { + "ce_ib": 14.767660140991211, + "ce_orig": 1.0759152173995972, + "epoch": 0.18491624128262277, + "kl_loss": 1.0658990144729614, + "loss_ib": 0.025426648557186127, + "step": 643 + }, + { + "ce_ib": 9.213264465332031, + "ce_orig": 0.7979077696800232, + "epoch": 0.1852038248616004, + "kl_loss": 1.041956901550293, + "loss_ib": 0.01963283307850361, + "step": 644 + }, + { + "ce_ib": 11.142786979675293, + "ce_orig": 1.003310203552246, + "epoch": 0.1852038248616004, + "kl_loss": 1.024578332901001, + "loss_ib": 0.021388567984104156, + "step": 644 + }, + { + "ce_ib": 9.134848594665527, + "ce_orig": 0.9812929034233093, + "epoch": 0.1852038248616004, + "kl_loss": 1.1117109060287476, + "loss_ib": 0.020251957699656487, + "step": 644 + }, + { + "ce_ib": 12.999911308288574, + "ce_orig": 1.423545479774475, + "epoch": 0.1852038248616004, + "kl_loss": 1.049558401107788, + "loss_ib": 0.023495495319366455, + "step": 644 + }, + { + "epoch": 0.18549140844057804, + "grad_norm": 0.09764409065246582, + "learning_rate": 9.974574059334082e-06, + "loss": 0.9161, + "step": 645 + }, + { + "ce_ib": 14.430760383605957, + "ce_orig": 0.45323994755744934, + "epoch": 0.18549140844057804, + "kl_loss": 1.1187865734100342, + "loss_ib": 0.025618623942136765, + "step": 645 + }, + { + "ce_ib": 6.7074151039123535, + "ce_orig": 0.8436351418495178, + "epoch": 0.18549140844057804, + "kl_loss": 0.9737348556518555, + "loss_ib": 0.01644476316869259, + "step": 645 + }, + { + "ce_ib": 9.11927604675293, + "ce_orig": 1.0630453824996948, + "epoch": 0.18549140844057804, + "kl_loss": 0.966637134552002, + "loss_ib": 0.018785648047924042, + "step": 645 + }, + { + "ce_ib": 9.039983749389648, + "ce_orig": 0.4303601086139679, + "epoch": 0.18549140844057804, + "kl_loss": 1.0904786586761475, + "loss_ib": 0.019944770261645317, + "step": 645 + }, + { + "ce_ib": 9.382161140441895, + "ce_orig": 0.8849090933799744, + "epoch": 0.1857789920195557, + "kl_loss": 1.0450866222381592, + "loss_ib": 0.019833028316497803, + "step": 646 + }, + { + "ce_ib": 7.5546064376831055, + "ce_orig": 0.5726062655448914, + "epoch": 0.1857789920195557, + "kl_loss": 1.0881624221801758, + "loss_ib": 0.018436230719089508, + "step": 646 + }, + { + "ce_ib": 8.01627254486084, + "ce_orig": 0.65036940574646, + "epoch": 0.1857789920195557, + "kl_loss": 1.0229861736297607, + "loss_ib": 0.018246134743094444, + "step": 646 + }, + { + "ce_ib": 7.372588634490967, + "ce_orig": 0.6817443370819092, + "epoch": 0.1857789920195557, + "kl_loss": 0.9919678568840027, + "loss_ib": 0.017292266711592674, + "step": 646 + }, + { + "ce_ib": 10.641061782836914, + "ce_orig": 0.9732003211975098, + "epoch": 0.18606657559853332, + "kl_loss": 1.0023996829986572, + "loss_ib": 0.020665058866143227, + "step": 647 + }, + { + "ce_ib": 10.230724334716797, + "ce_orig": 0.749370276927948, + "epoch": 0.18606657559853332, + "kl_loss": 1.0023596286773682, + "loss_ib": 0.02025432139635086, + "step": 647 + }, + { + "ce_ib": 8.159378051757812, + "ce_orig": 0.7660282850265503, + "epoch": 0.18606657559853332, + "kl_loss": 1.0770816802978516, + "loss_ib": 0.018930193036794662, + "step": 647 + }, + { + "ce_ib": 11.307751655578613, + "ce_orig": 0.7283535003662109, + "epoch": 0.18606657559853332, + "kl_loss": 1.1148804426193237, + "loss_ib": 0.02245655469596386, + "step": 647 + }, + { + "ce_ib": 10.556684494018555, + "ce_orig": 1.098810076713562, + "epoch": 0.18635415917751097, + "kl_loss": 1.0427453517913818, + "loss_ib": 0.020984139293432236, + "step": 648 + }, + { + "ce_ib": 5.236063003540039, + "ce_orig": 0.2835554778575897, + "epoch": 0.18635415917751097, + "kl_loss": 1.169142246246338, + "loss_ib": 0.016927484422922134, + "step": 648 + }, + { + "ce_ib": 9.705862998962402, + "ce_orig": 1.00014328956604, + "epoch": 0.18635415917751097, + "kl_loss": 0.9767618179321289, + "loss_ib": 0.019473480060696602, + "step": 648 + }, + { + "ce_ib": 11.64765453338623, + "ce_orig": 1.3154453039169312, + "epoch": 0.18635415917751097, + "kl_loss": 1.0039262771606445, + "loss_ib": 0.021686915308237076, + "step": 648 + }, + { + "ce_ib": 8.285019874572754, + "ce_orig": 0.970478892326355, + "epoch": 0.1866417427564886, + "kl_loss": 0.9888217449188232, + "loss_ib": 0.018173236399888992, + "step": 649 + }, + { + "ce_ib": 10.269580841064453, + "ce_orig": 0.5915196537971497, + "epoch": 0.1866417427564886, + "kl_loss": 1.0976067781448364, + "loss_ib": 0.021245649084448814, + "step": 649 + }, + { + "ce_ib": 7.284735202789307, + "ce_orig": 0.6597338914871216, + "epoch": 0.1866417427564886, + "kl_loss": 1.0240867137908936, + "loss_ib": 0.017525602132081985, + "step": 649 + }, + { + "ce_ib": 5.271642208099365, + "ce_orig": 0.3803437352180481, + "epoch": 0.1866417427564886, + "kl_loss": 1.114564061164856, + "loss_ib": 0.016417281702160835, + "step": 649 + }, + { + "epoch": 0.18692932633546624, + "grad_norm": 0.1129700243473053, + "learning_rate": 9.973786394290475e-06, + "loss": 0.8796, + "step": 650 + }, + { + "ce_ib": 12.168571472167969, + "ce_orig": 1.0805795192718506, + "epoch": 0.18692932633546624, + "kl_loss": 1.005476713180542, + "loss_ib": 0.0222233384847641, + "step": 650 + }, + { + "ce_ib": 9.973319053649902, + "ce_orig": 1.3568997383117676, + "epoch": 0.18692932633546624, + "kl_loss": 1.1083464622497559, + "loss_ib": 0.021056782454252243, + "step": 650 + }, + { + "ce_ib": 10.53954792022705, + "ce_orig": 0.7421830892562866, + "epoch": 0.18692932633546624, + "kl_loss": 1.061907410621643, + "loss_ib": 0.021158622577786446, + "step": 650 + }, + { + "ce_ib": 4.828139781951904, + "ce_orig": 0.21636667847633362, + "epoch": 0.18692932633546624, + "kl_loss": 1.1125696897506714, + "loss_ib": 0.01595383696258068, + "step": 650 + }, + { + "ce_ib": 9.867918014526367, + "ce_orig": 0.3963659703731537, + "epoch": 0.1872169099144439, + "kl_loss": 1.1094999313354492, + "loss_ib": 0.020962918177247047, + "step": 651 + }, + { + "ce_ib": 9.336775779724121, + "ce_orig": 0.7294745445251465, + "epoch": 0.1872169099144439, + "kl_loss": 0.9877459406852722, + "loss_ib": 0.019214235246181488, + "step": 651 + }, + { + "ce_ib": 7.323286533355713, + "ce_orig": 0.42315956950187683, + "epoch": 0.1872169099144439, + "kl_loss": 0.990585207939148, + "loss_ib": 0.01722913794219494, + "step": 651 + }, + { + "ce_ib": 7.316843032836914, + "ce_orig": 0.44551074504852295, + "epoch": 0.1872169099144439, + "kl_loss": 1.039764165878296, + "loss_ib": 0.01771448366343975, + "step": 651 + }, + { + "ce_ib": 10.291936874389648, + "ce_orig": 0.7233940362930298, + "epoch": 0.18750449349342152, + "kl_loss": 1.0752284526824951, + "loss_ib": 0.02104422077536583, + "step": 652 + }, + { + "ce_ib": 7.927389621734619, + "ce_orig": 0.7603445053100586, + "epoch": 0.18750449349342152, + "kl_loss": 1.0002994537353516, + "loss_ib": 0.01793038286268711, + "step": 652 + }, + { + "ce_ib": 10.401845932006836, + "ce_orig": 1.1115306615829468, + "epoch": 0.18750449349342152, + "kl_loss": 1.0052720308303833, + "loss_ib": 0.020454566925764084, + "step": 652 + }, + { + "ce_ib": 11.643852233886719, + "ce_orig": 1.4086114168167114, + "epoch": 0.18750449349342152, + "kl_loss": 1.0603067874908447, + "loss_ib": 0.022246917709708214, + "step": 652 + }, + { + "ce_ib": 6.45538330078125, + "ce_orig": 0.8548846244812012, + "epoch": 0.18779207707239917, + "kl_loss": 0.945244312286377, + "loss_ib": 0.015907825902104378, + "step": 653 + }, + { + "ce_ib": 11.45804500579834, + "ce_orig": 0.9945077300071716, + "epoch": 0.18779207707239917, + "kl_loss": 0.9493337273597717, + "loss_ib": 0.02095138281583786, + "step": 653 + }, + { + "ce_ib": 12.099946975708008, + "ce_orig": 1.0418729782104492, + "epoch": 0.18779207707239917, + "kl_loss": 1.0050960779190063, + "loss_ib": 0.022150907665491104, + "step": 653 + }, + { + "ce_ib": 8.56289005279541, + "ce_orig": 0.7450115084648132, + "epoch": 0.18779207707239917, + "kl_loss": 0.9960210919380188, + "loss_ib": 0.01852310076355934, + "step": 653 + }, + { + "ce_ib": 12.9265775680542, + "ce_orig": 1.5550066232681274, + "epoch": 0.1880796606513768, + "kl_loss": 0.9958865642547607, + "loss_ib": 0.022885441780090332, + "step": 654 + }, + { + "ce_ib": 5.975699424743652, + "ce_orig": 0.6117834448814392, + "epoch": 0.1880796606513768, + "kl_loss": 0.9439165592193604, + "loss_ib": 0.015414864756166935, + "step": 654 + }, + { + "ce_ib": 9.573440551757812, + "ce_orig": 0.972037672996521, + "epoch": 0.1880796606513768, + "kl_loss": 0.9460088610649109, + "loss_ib": 0.019033528864383698, + "step": 654 + }, + { + "ce_ib": 10.364381790161133, + "ce_orig": 0.6434758305549622, + "epoch": 0.1880796606513768, + "kl_loss": 1.0528744459152222, + "loss_ib": 0.020893124863505363, + "step": 654 + }, + { + "epoch": 0.18836724423035445, + "grad_norm": 0.09933654963970184, + "learning_rate": 9.972986746241005e-06, + "loss": 0.9236, + "step": 655 + }, + { + "ce_ib": 11.423005104064941, + "ce_orig": 0.504085898399353, + "epoch": 0.18836724423035445, + "kl_loss": 1.0513900518417358, + "loss_ib": 0.02193690463900566, + "step": 655 + }, + { + "ce_ib": 7.901275634765625, + "ce_orig": 0.8905818462371826, + "epoch": 0.18836724423035445, + "kl_loss": 1.0549099445343018, + "loss_ib": 0.01845037378370762, + "step": 655 + }, + { + "ce_ib": 12.124505043029785, + "ce_orig": 1.5267455577850342, + "epoch": 0.18836724423035445, + "kl_loss": 1.0243613719940186, + "loss_ib": 0.022368118166923523, + "step": 655 + }, + { + "ce_ib": 8.49409008026123, + "ce_orig": 0.4249543845653534, + "epoch": 0.18836724423035445, + "kl_loss": 0.9719215631484985, + "loss_ib": 0.01821330562233925, + "step": 655 + }, + { + "ce_ib": 11.336442947387695, + "ce_orig": 1.0143924951553345, + "epoch": 0.1886548278093321, + "kl_loss": 1.0201416015625, + "loss_ib": 0.021537858992815018, + "step": 656 + }, + { + "ce_ib": 10.415478706359863, + "ce_orig": 0.9272794127464294, + "epoch": 0.1886548278093321, + "kl_loss": 1.0198733806610107, + "loss_ib": 0.02061421424150467, + "step": 656 + }, + { + "ce_ib": 6.768211841583252, + "ce_orig": 0.8758606910705566, + "epoch": 0.1886548278093321, + "kl_loss": 0.9514140486717224, + "loss_ib": 0.016282351687550545, + "step": 656 + }, + { + "ce_ib": 8.312684059143066, + "ce_orig": 0.8438398838043213, + "epoch": 0.1886548278093321, + "kl_loss": 0.9719037413597107, + "loss_ib": 0.01803172007203102, + "step": 656 + }, + { + "ce_ib": 12.224742889404297, + "ce_orig": 0.791438102722168, + "epoch": 0.18894241138830972, + "kl_loss": 1.07827889919281, + "loss_ib": 0.023007530719041824, + "step": 657 + }, + { + "ce_ib": 10.384964942932129, + "ce_orig": 0.6525826454162598, + "epoch": 0.18894241138830972, + "kl_loss": 1.042457103729248, + "loss_ib": 0.020809534937143326, + "step": 657 + }, + { + "ce_ib": 10.262805938720703, + "ce_orig": 0.6871621608734131, + "epoch": 0.18894241138830972, + "kl_loss": 1.0356314182281494, + "loss_ib": 0.020619120448827744, + "step": 657 + }, + { + "ce_ib": 10.362799644470215, + "ce_orig": 0.842133104801178, + "epoch": 0.18894241138830972, + "kl_loss": 1.0107371807098389, + "loss_ib": 0.02047017030417919, + "step": 657 + }, + { + "ce_ib": 12.086880683898926, + "ce_orig": 1.178883671760559, + "epoch": 0.18922999496728737, + "kl_loss": 0.9930935502052307, + "loss_ib": 0.022017816081643105, + "step": 658 + }, + { + "ce_ib": 9.931184768676758, + "ce_orig": 0.5481682419776917, + "epoch": 0.18922999496728737, + "kl_loss": 0.983871579170227, + "loss_ib": 0.019769899547100067, + "step": 658 + }, + { + "ce_ib": 9.0752534866333, + "ce_orig": 0.6848755478858948, + "epoch": 0.18922999496728737, + "kl_loss": 0.9044622182846069, + "loss_ib": 0.018119875341653824, + "step": 658 + }, + { + "ce_ib": 10.438591003417969, + "ce_orig": 1.0720442533493042, + "epoch": 0.18922999496728737, + "kl_loss": 1.0088304281234741, + "loss_ib": 0.02052689529955387, + "step": 658 + }, + { + "ce_ib": 9.08749008178711, + "ce_orig": 1.1288058757781982, + "epoch": 0.189517578546265, + "kl_loss": 0.9419035315513611, + "loss_ib": 0.018506525084376335, + "step": 659 + }, + { + "ce_ib": 10.958107948303223, + "ce_orig": 0.6390134692192078, + "epoch": 0.189517578546265, + "kl_loss": 1.059513807296753, + "loss_ib": 0.021553244441747665, + "step": 659 + }, + { + "ce_ib": 12.540372848510742, + "ce_orig": 1.3411237001419067, + "epoch": 0.189517578546265, + "kl_loss": 0.990556001663208, + "loss_ib": 0.022445930168032646, + "step": 659 + }, + { + "ce_ib": 14.00953483581543, + "ce_orig": 1.3561463356018066, + "epoch": 0.189517578546265, + "kl_loss": 1.1565253734588623, + "loss_ib": 0.025574788451194763, + "step": 659 + }, + { + "epoch": 0.18980516212524265, + "grad_norm": 0.10293088853359222, + "learning_rate": 9.972175117112208e-06, + "loss": 0.8983, + "step": 660 + }, + { + "ce_ib": 9.332024574279785, + "ce_orig": 0.8229407668113708, + "epoch": 0.18980516212524265, + "kl_loss": 0.9839355945587158, + "loss_ib": 0.01917138136923313, + "step": 660 + }, + { + "ce_ib": 10.171957015991211, + "ce_orig": 0.8634912967681885, + "epoch": 0.18980516212524265, + "kl_loss": 0.9303529858589172, + "loss_ib": 0.019475486129522324, + "step": 660 + }, + { + "ce_ib": 10.142843246459961, + "ce_orig": 0.9541047215461731, + "epoch": 0.18980516212524265, + "kl_loss": 0.9670487642288208, + "loss_ib": 0.019813330844044685, + "step": 660 + }, + { + "ce_ib": 8.594452857971191, + "ce_orig": 0.660327136516571, + "epoch": 0.18980516212524265, + "kl_loss": 1.0246200561523438, + "loss_ib": 0.01884065382182598, + "step": 660 + }, + { + "ce_ib": 9.446117401123047, + "ce_orig": 0.6387197375297546, + "epoch": 0.1900927457042203, + "kl_loss": 0.9925702810287476, + "loss_ib": 0.019371818751096725, + "step": 661 + }, + { + "ce_ib": 12.91454792022705, + "ce_orig": 1.6437798738479614, + "epoch": 0.1900927457042203, + "kl_loss": 1.0110113620758057, + "loss_ib": 0.0230246614664793, + "step": 661 + }, + { + "ce_ib": 9.189199447631836, + "ce_orig": 0.9704218506813049, + "epoch": 0.1900927457042203, + "kl_loss": 0.9272133111953735, + "loss_ib": 0.018461331725120544, + "step": 661 + }, + { + "ce_ib": 10.05646800994873, + "ce_orig": 0.6157249212265015, + "epoch": 0.1900927457042203, + "kl_loss": 0.9937683939933777, + "loss_ib": 0.01999415084719658, + "step": 661 + }, + { + "ce_ib": 4.904020309448242, + "ce_orig": 0.5232660174369812, + "epoch": 0.19038032928319792, + "kl_loss": 0.9572215676307678, + "loss_ib": 0.014476235955953598, + "step": 662 + }, + { + "ce_ib": 11.595885276794434, + "ce_orig": 0.5337156057357788, + "epoch": 0.19038032928319792, + "kl_loss": 1.1112439632415771, + "loss_ib": 0.02270832471549511, + "step": 662 + }, + { + "ce_ib": 9.98287296295166, + "ce_orig": 0.9497804045677185, + "epoch": 0.19038032928319792, + "kl_loss": 1.0508451461791992, + "loss_ib": 0.02049132250249386, + "step": 662 + }, + { + "ce_ib": 8.917495727539062, + "ce_orig": 1.0380656719207764, + "epoch": 0.19038032928319792, + "kl_loss": 0.9809185266494751, + "loss_ib": 0.018726680427789688, + "step": 662 + }, + { + "ce_ib": 8.603903770446777, + "ce_orig": 0.8844617605209351, + "epoch": 0.19066791286217558, + "kl_loss": 0.9898391366004944, + "loss_ib": 0.01850229501724243, + "step": 663 + }, + { + "ce_ib": 8.496954917907715, + "ce_orig": 0.889788031578064, + "epoch": 0.19066791286217558, + "kl_loss": 0.968459963798523, + "loss_ib": 0.018181554973125458, + "step": 663 + }, + { + "ce_ib": 13.463947296142578, + "ce_orig": 1.5212091207504272, + "epoch": 0.19066791286217558, + "kl_loss": 1.3541009426116943, + "loss_ib": 0.02700495719909668, + "step": 663 + }, + { + "ce_ib": 7.513195991516113, + "ce_orig": 0.6632037162780762, + "epoch": 0.19066791286217558, + "kl_loss": 0.9870805144309998, + "loss_ib": 0.01738400012254715, + "step": 663 + }, + { + "ce_ib": 7.072785377502441, + "ce_orig": 0.5436831116676331, + "epoch": 0.1909554964411532, + "kl_loss": 0.9044357538223267, + "loss_ib": 0.016117142513394356, + "step": 664 + }, + { + "ce_ib": 11.165665626525879, + "ce_orig": 1.3044129610061646, + "epoch": 0.1909554964411532, + "kl_loss": 0.9187023043632507, + "loss_ib": 0.020352687686681747, + "step": 664 + }, + { + "ce_ib": 8.00759506225586, + "ce_orig": 0.5903300046920776, + "epoch": 0.1909554964411532, + "kl_loss": 0.9148058891296387, + "loss_ib": 0.017155654728412628, + "step": 664 + }, + { + "ce_ib": 9.988378524780273, + "ce_orig": 0.9803927540779114, + "epoch": 0.1909554964411532, + "kl_loss": 0.883601188659668, + "loss_ib": 0.018824391067028046, + "step": 664 + }, + { + "epoch": 0.19124308002013085, + "grad_norm": 0.1290864795446396, + "learning_rate": 9.971351508859488e-06, + "loss": 0.9177, + "step": 665 + }, + { + "ce_ib": 8.51546859741211, + "ce_orig": 0.7638845443725586, + "epoch": 0.19124308002013085, + "kl_loss": 0.8143119812011719, + "loss_ib": 0.016658587381243706, + "step": 665 + }, + { + "ce_ib": 6.580199718475342, + "ce_orig": 0.5441961884498596, + "epoch": 0.19124308002013085, + "kl_loss": 0.898180365562439, + "loss_ib": 0.015562002547085285, + "step": 665 + }, + { + "ce_ib": 9.424781799316406, + "ce_orig": 0.8517243266105652, + "epoch": 0.19124308002013085, + "kl_loss": 0.9845488667488098, + "loss_ib": 0.019270269200205803, + "step": 665 + }, + { + "ce_ib": 11.227746963500977, + "ce_orig": 0.7697399258613586, + "epoch": 0.19124308002013085, + "kl_loss": 1.012844204902649, + "loss_ib": 0.021356189623475075, + "step": 665 + }, + { + "ce_ib": 13.270841598510742, + "ce_orig": 1.091408133506775, + "epoch": 0.1915306635991085, + "kl_loss": 0.9616622924804688, + "loss_ib": 0.022887462750077248, + "step": 666 + }, + { + "ce_ib": 5.886293411254883, + "ce_orig": 0.31356731057167053, + "epoch": 0.1915306635991085, + "kl_loss": 0.971168041229248, + "loss_ib": 0.015597973950207233, + "step": 666 + }, + { + "ce_ib": 12.855634689331055, + "ce_orig": 1.4516594409942627, + "epoch": 0.1915306635991085, + "kl_loss": 0.9783580303192139, + "loss_ib": 0.022639214992523193, + "step": 666 + }, + { + "ce_ib": 9.463467597961426, + "ce_orig": 0.9096047878265381, + "epoch": 0.1915306635991085, + "kl_loss": 0.9198427200317383, + "loss_ib": 0.018661893904209137, + "step": 666 + }, + { + "ce_ib": 10.283754348754883, + "ce_orig": 1.3314762115478516, + "epoch": 0.19181824717808613, + "kl_loss": 1.057845115661621, + "loss_ib": 0.02086220681667328, + "step": 667 + }, + { + "ce_ib": 7.73128080368042, + "ce_orig": 0.6765826940536499, + "epoch": 0.19181824717808613, + "kl_loss": 0.9598665833473206, + "loss_ib": 0.017329946160316467, + "step": 667 + }, + { + "ce_ib": 13.801294326782227, + "ce_orig": 1.7749202251434326, + "epoch": 0.19181824717808613, + "kl_loss": 0.9999049305915833, + "loss_ib": 0.02380034327507019, + "step": 667 + }, + { + "ce_ib": 14.625811576843262, + "ce_orig": 1.5362874269485474, + "epoch": 0.19181824717808613, + "kl_loss": 0.9581853747367859, + "loss_ib": 0.024207664653658867, + "step": 667 + }, + { + "ce_ib": 9.781160354614258, + "ce_orig": 0.941349983215332, + "epoch": 0.19210583075706378, + "kl_loss": 0.9020639657974243, + "loss_ib": 0.018801799044013023, + "step": 668 + }, + { + "ce_ib": 10.93012523651123, + "ce_orig": 1.1870393753051758, + "epoch": 0.19210583075706378, + "kl_loss": 0.9062073230743408, + "loss_ib": 0.019992198795080185, + "step": 668 + }, + { + "ce_ib": 10.397263526916504, + "ce_orig": 0.7395128011703491, + "epoch": 0.19210583075706378, + "kl_loss": 0.9939507246017456, + "loss_ib": 0.02033677138388157, + "step": 668 + }, + { + "ce_ib": 8.197195053100586, + "ce_orig": 0.802003800868988, + "epoch": 0.19210583075706378, + "kl_loss": 1.0717787742614746, + "loss_ib": 0.018914982676506042, + "step": 668 + }, + { + "ce_ib": 8.156253814697266, + "ce_orig": 0.6003333926200867, + "epoch": 0.1923934143360414, + "kl_loss": 0.9009373188018799, + "loss_ib": 0.01716562733054161, + "step": 669 + }, + { + "ce_ib": 9.363840103149414, + "ce_orig": 0.7796534895896912, + "epoch": 0.1923934143360414, + "kl_loss": 0.9603847861289978, + "loss_ib": 0.01896768808364868, + "step": 669 + }, + { + "ce_ib": 6.624312400817871, + "ce_orig": 0.7180654406547546, + "epoch": 0.1923934143360414, + "kl_loss": 0.8511247038841248, + "loss_ib": 0.015135559253394604, + "step": 669 + }, + { + "ce_ib": 11.258011817932129, + "ce_orig": 0.6326055526733398, + "epoch": 0.1923934143360414, + "kl_loss": 1.004683256149292, + "loss_ib": 0.021304845809936523, + "step": 669 + }, + { + "epoch": 0.19268099791501905, + "grad_norm": 0.07958894968032837, + "learning_rate": 9.970515923467106e-06, + "loss": 0.8465, + "step": 670 + }, + { + "ce_ib": 7.144465923309326, + "ce_orig": 0.5346347093582153, + "epoch": 0.19268099791501905, + "kl_loss": 1.0311341285705566, + "loss_ib": 0.017455806955695152, + "step": 670 + }, + { + "ce_ib": 9.569890975952148, + "ce_orig": 1.0249342918395996, + "epoch": 0.19268099791501905, + "kl_loss": 1.0497715473175049, + "loss_ib": 0.02006760612130165, + "step": 670 + }, + { + "ce_ib": 7.342098236083984, + "ce_orig": 0.5448954105377197, + "epoch": 0.19268099791501905, + "kl_loss": 1.0126290321350098, + "loss_ib": 0.017468387261033058, + "step": 670 + }, + { + "ce_ib": 13.511815071105957, + "ce_orig": 1.6245704889297485, + "epoch": 0.19268099791501905, + "kl_loss": 0.9676916599273682, + "loss_ib": 0.02318873070180416, + "step": 670 + }, + { + "ce_ib": 9.829526901245117, + "ce_orig": 0.7800765633583069, + "epoch": 0.1929685814939967, + "kl_loss": 0.9008135199546814, + "loss_ib": 0.018837660551071167, + "step": 671 + }, + { + "ce_ib": 7.996551990509033, + "ce_orig": 0.7976639270782471, + "epoch": 0.1929685814939967, + "kl_loss": 0.9442075490951538, + "loss_ib": 0.017438627779483795, + "step": 671 + }, + { + "ce_ib": 11.391491889953613, + "ce_orig": 1.0702950954437256, + "epoch": 0.1929685814939967, + "kl_loss": 0.8962193131446838, + "loss_ib": 0.020353684201836586, + "step": 671 + }, + { + "ce_ib": 13.608918190002441, + "ce_orig": 1.1835685968399048, + "epoch": 0.1929685814939967, + "kl_loss": 1.0202008485794067, + "loss_ib": 0.02381092496216297, + "step": 671 + }, + { + "ce_ib": 7.881733417510986, + "ce_orig": 0.9604411125183105, + "epoch": 0.19325616507297433, + "kl_loss": 0.9708698391914368, + "loss_ib": 0.01759043149650097, + "step": 672 + }, + { + "ce_ib": 10.143787384033203, + "ce_orig": 0.805814802646637, + "epoch": 0.19325616507297433, + "kl_loss": 1.020028829574585, + "loss_ib": 0.020344074815511703, + "step": 672 + }, + { + "ce_ib": 8.782960891723633, + "ce_orig": 0.6452949047088623, + "epoch": 0.19325616507297433, + "kl_loss": 0.9659644961357117, + "loss_ib": 0.018442604690790176, + "step": 672 + }, + { + "ce_ib": 10.82643985748291, + "ce_orig": 0.8774537444114685, + "epoch": 0.19325616507297433, + "kl_loss": 0.8915767669677734, + "loss_ib": 0.01974220760166645, + "step": 672 + }, + { + "ce_ib": 8.120016098022461, + "ce_orig": 0.778851330280304, + "epoch": 0.19354374865195198, + "kl_loss": 0.9042708277702332, + "loss_ib": 0.01716272346675396, + "step": 673 + }, + { + "ce_ib": 10.228824615478516, + "ce_orig": 0.9065789580345154, + "epoch": 0.19354374865195198, + "kl_loss": 0.8779407143592834, + "loss_ib": 0.019008230417966843, + "step": 673 + }, + { + "ce_ib": 11.03148078918457, + "ce_orig": 1.0241479873657227, + "epoch": 0.19354374865195198, + "kl_loss": 1.204726219177246, + "loss_ib": 0.023078741505742073, + "step": 673 + }, + { + "ce_ib": 12.928085327148438, + "ce_orig": 1.197397232055664, + "epoch": 0.19354374865195198, + "kl_loss": 0.9511390328407288, + "loss_ib": 0.02243947423994541, + "step": 673 + }, + { + "ce_ib": 9.485182762145996, + "ce_orig": 0.6011760234832764, + "epoch": 0.1938313322309296, + "kl_loss": 1.0419973134994507, + "loss_ib": 0.019905155524611473, + "step": 674 + }, + { + "ce_ib": 6.017853736877441, + "ce_orig": 0.6303385496139526, + "epoch": 0.1938313322309296, + "kl_loss": 0.9145022034645081, + "loss_ib": 0.015162874944508076, + "step": 674 + }, + { + "ce_ib": 8.599544525146484, + "ce_orig": 0.5987470149993896, + "epoch": 0.1938313322309296, + "kl_loss": 0.8968786001205444, + "loss_ib": 0.017568331211805344, + "step": 674 + }, + { + "ce_ib": 5.367640018463135, + "ce_orig": 0.26319989562034607, + "epoch": 0.1938313322309296, + "kl_loss": 0.851106584072113, + "loss_ib": 0.013878704980015755, + "step": 674 + }, + { + "epoch": 0.19411891580990726, + "grad_norm": 0.08299347013235092, + "learning_rate": 9.969668362948186e-06, + "loss": 0.8623, + "step": 675 + }, + { + "ce_ib": 8.513809204101562, + "ce_orig": 0.46651700139045715, + "epoch": 0.19411891580990726, + "kl_loss": 1.1063188314437866, + "loss_ib": 0.019576996564865112, + "step": 675 + }, + { + "ce_ib": 13.300872802734375, + "ce_orig": 1.5554956197738647, + "epoch": 0.19411891580990726, + "kl_loss": 0.911888837814331, + "loss_ib": 0.0224197618663311, + "step": 675 + }, + { + "ce_ib": 10.305272102355957, + "ce_orig": 0.7289824485778809, + "epoch": 0.19411891580990726, + "kl_loss": 0.9203410148620605, + "loss_ib": 0.019508682191371918, + "step": 675 + }, + { + "ce_ib": 8.656975746154785, + "ce_orig": 0.8286035656929016, + "epoch": 0.19411891580990726, + "kl_loss": 0.9431976079940796, + "loss_ib": 0.018088949844241142, + "step": 675 + }, + { + "ce_ib": 7.818869113922119, + "ce_orig": 0.4440051317214966, + "epoch": 0.1944064993888849, + "kl_loss": 1.0188822746276855, + "loss_ib": 0.018007691949605942, + "step": 676 + }, + { + "ce_ib": 7.344069957733154, + "ce_orig": 0.45538222789764404, + "epoch": 0.1944064993888849, + "kl_loss": 1.0300064086914062, + "loss_ib": 0.017644133418798447, + "step": 676 + }, + { + "ce_ib": 7.1561384201049805, + "ce_orig": 0.5423603653907776, + "epoch": 0.1944064993888849, + "kl_loss": 0.8832270503044128, + "loss_ib": 0.015988409519195557, + "step": 676 + }, + { + "ce_ib": 9.0109281539917, + "ce_orig": 0.8102385401725769, + "epoch": 0.1944064993888849, + "kl_loss": 0.915709376335144, + "loss_ib": 0.018168022856116295, + "step": 676 + }, + { + "ce_ib": 7.42899751663208, + "ce_orig": 0.5108758807182312, + "epoch": 0.19469408296786253, + "kl_loss": 0.8757451176643372, + "loss_ib": 0.01618644967675209, + "step": 677 + }, + { + "ce_ib": 12.158177375793457, + "ce_orig": 0.9250555634498596, + "epoch": 0.19469408296786253, + "kl_loss": 0.9929431676864624, + "loss_ib": 0.02208760939538479, + "step": 677 + }, + { + "ce_ib": 9.143503189086914, + "ce_orig": 0.7566794157028198, + "epoch": 0.19469408296786253, + "kl_loss": 1.0857152938842773, + "loss_ib": 0.020000655204057693, + "step": 677 + }, + { + "ce_ib": 11.579789161682129, + "ce_orig": 1.1167807579040527, + "epoch": 0.19469408296786253, + "kl_loss": 0.9444433450698853, + "loss_ib": 0.021024221554398537, + "step": 677 + }, + { + "ce_ib": 10.260880470275879, + "ce_orig": 1.0816290378570557, + "epoch": 0.19498166654684018, + "kl_loss": 0.9158464670181274, + "loss_ib": 0.019419346004724503, + "step": 678 + }, + { + "ce_ib": 12.340543746948242, + "ce_orig": 0.7932845950126648, + "epoch": 0.19498166654684018, + "kl_loss": 0.9657367467880249, + "loss_ib": 0.021997911855578423, + "step": 678 + }, + { + "ce_ib": 9.458576202392578, + "ce_orig": 1.1390806436538696, + "epoch": 0.19498166654684018, + "kl_loss": 0.8594547510147095, + "loss_ib": 0.018053123727440834, + "step": 678 + }, + { + "ce_ib": 7.780326843261719, + "ce_orig": 0.803799569606781, + "epoch": 0.19498166654684018, + "kl_loss": 0.9818826913833618, + "loss_ib": 0.01759915240108967, + "step": 678 + }, + { + "ce_ib": 4.1620354652404785, + "ce_orig": 0.2644416093826294, + "epoch": 0.1952692501258178, + "kl_loss": 1.0103613138198853, + "loss_ib": 0.014265649020671844, + "step": 679 + }, + { + "ce_ib": 10.257436752319336, + "ce_orig": 0.8702826499938965, + "epoch": 0.1952692501258178, + "kl_loss": 0.911620020866394, + "loss_ib": 0.019373636692762375, + "step": 679 + }, + { + "ce_ib": 9.919028282165527, + "ce_orig": 0.7849615216255188, + "epoch": 0.1952692501258178, + "kl_loss": 0.982805609703064, + "loss_ib": 0.019747084006667137, + "step": 679 + }, + { + "ce_ib": 8.560178756713867, + "ce_orig": 0.5670347809791565, + "epoch": 0.1952692501258178, + "kl_loss": 0.8926770687103271, + "loss_ib": 0.017486948519945145, + "step": 679 + }, + { + "epoch": 0.19555683370479546, + "grad_norm": 0.09906060248613358, + "learning_rate": 9.968808829344692e-06, + "loss": 0.8659, + "step": 680 + }, + { + "ce_ib": 5.249537944793701, + "ce_orig": 0.43986520171165466, + "epoch": 0.19555683370479546, + "kl_loss": 1.0424385070800781, + "loss_ib": 0.01567392237484455, + "step": 680 + }, + { + "ce_ib": 14.56251049041748, + "ce_orig": 1.4373424053192139, + "epoch": 0.19555683370479546, + "kl_loss": 0.9467419385910034, + "loss_ib": 0.0240299291908741, + "step": 680 + }, + { + "ce_ib": 7.976964473724365, + "ce_orig": 0.5848947763442993, + "epoch": 0.19555683370479546, + "kl_loss": 0.7993128299713135, + "loss_ib": 0.01597009226679802, + "step": 680 + }, + { + "ce_ib": 8.310443878173828, + "ce_orig": 0.49164944887161255, + "epoch": 0.19555683370479546, + "kl_loss": 0.8992608189582825, + "loss_ib": 0.01730305142700672, + "step": 680 + }, + { + "ce_ib": 13.469533920288086, + "ce_orig": 1.6232653856277466, + "epoch": 0.1958444172837731, + "kl_loss": 0.9035540819168091, + "loss_ib": 0.022505072876811028, + "step": 681 + }, + { + "ce_ib": 7.154322624206543, + "ce_orig": 0.5702813267707825, + "epoch": 0.1958444172837731, + "kl_loss": 0.8137340545654297, + "loss_ib": 0.015291662886738777, + "step": 681 + }, + { + "ce_ib": 7.098222255706787, + "ce_orig": 0.735628068447113, + "epoch": 0.1958444172837731, + "kl_loss": 0.8927323818206787, + "loss_ib": 0.016025545075535774, + "step": 681 + }, + { + "ce_ib": 5.824950695037842, + "ce_orig": 0.766068160533905, + "epoch": 0.1958444172837731, + "kl_loss": 0.8107069730758667, + "loss_ib": 0.013932020403444767, + "step": 681 + }, + { + "ce_ib": 10.2679443359375, + "ce_orig": 0.5897117853164673, + "epoch": 0.19613200086275073, + "kl_loss": 0.8887754678726196, + "loss_ib": 0.019155697897076607, + "step": 682 + }, + { + "ce_ib": 7.825944900512695, + "ce_orig": 1.018816351890564, + "epoch": 0.19613200086275073, + "kl_loss": 0.7450103759765625, + "loss_ib": 0.0152760474011302, + "step": 682 + }, + { + "ce_ib": 10.176102638244629, + "ce_orig": 0.9543701410293579, + "epoch": 0.19613200086275073, + "kl_loss": 0.9644123911857605, + "loss_ib": 0.01982022635638714, + "step": 682 + }, + { + "ce_ib": 11.218408584594727, + "ce_orig": 1.213329553604126, + "epoch": 0.19613200086275073, + "kl_loss": 0.8925070762634277, + "loss_ib": 0.020143479108810425, + "step": 682 + }, + { + "ce_ib": 11.08190631866455, + "ce_orig": 1.1313570737838745, + "epoch": 0.19641958444172838, + "kl_loss": 0.9318565130233765, + "loss_ib": 0.02040047198534012, + "step": 683 + }, + { + "ce_ib": 9.102522850036621, + "ce_orig": 1.1076076030731201, + "epoch": 0.19641958444172838, + "kl_loss": 0.9036017060279846, + "loss_ib": 0.018138539046049118, + "step": 683 + }, + { + "ce_ib": 8.345836639404297, + "ce_orig": 0.9177960753440857, + "epoch": 0.19641958444172838, + "kl_loss": 0.9092356562614441, + "loss_ib": 0.017438193783164024, + "step": 683 + }, + { + "ce_ib": 13.083810806274414, + "ce_orig": 1.4744526147842407, + "epoch": 0.19641958444172838, + "kl_loss": 0.8807121515274048, + "loss_ib": 0.021890930831432343, + "step": 683 + }, + { + "ce_ib": 9.102757453918457, + "ce_orig": 0.7407585978507996, + "epoch": 0.196707168020706, + "kl_loss": 0.9970856308937073, + "loss_ib": 0.019073612987995148, + "step": 684 + }, + { + "ce_ib": 8.29672908782959, + "ce_orig": 0.6851121187210083, + "epoch": 0.196707168020706, + "kl_loss": 0.9605600833892822, + "loss_ib": 0.017902329564094543, + "step": 684 + }, + { + "ce_ib": 8.740421295166016, + "ce_orig": 0.8621047735214233, + "epoch": 0.196707168020706, + "kl_loss": 0.8722232580184937, + "loss_ib": 0.017462654039263725, + "step": 684 + }, + { + "ce_ib": 7.918392658233643, + "ce_orig": 0.7503530979156494, + "epoch": 0.196707168020706, + "kl_loss": 0.7531979084014893, + "loss_ib": 0.015450372360646725, + "step": 684 + }, + { + "epoch": 0.19699475159968366, + "grad_norm": 0.08257844299077988, + "learning_rate": 9.967937324727446e-06, + "loss": 0.8724, + "step": 685 + }, + { + "ce_ib": 11.491483688354492, + "ce_orig": 1.5021545886993408, + "epoch": 0.19699475159968366, + "kl_loss": 0.8115209341049194, + "loss_ib": 0.0196066927164793, + "step": 685 + }, + { + "ce_ib": 13.129999160766602, + "ce_orig": 1.1804494857788086, + "epoch": 0.19699475159968366, + "kl_loss": 0.7936393022537231, + "loss_ib": 0.021066393703222275, + "step": 685 + }, + { + "ce_ib": 10.34216022491455, + "ce_orig": 1.2397124767303467, + "epoch": 0.19699475159968366, + "kl_loss": 0.7960874438285828, + "loss_ib": 0.01830303482711315, + "step": 685 + }, + { + "ce_ib": 11.881531715393066, + "ce_orig": 1.3413195610046387, + "epoch": 0.19699475159968366, + "kl_loss": 0.913062334060669, + "loss_ib": 0.02101215347647667, + "step": 685 + }, + { + "ce_ib": 6.886374473571777, + "ce_orig": 0.8009768128395081, + "epoch": 0.1972823351786613, + "kl_loss": 0.6990371942520142, + "loss_ib": 0.013876745477318764, + "step": 686 + }, + { + "ce_ib": 10.036053657531738, + "ce_orig": 1.1837598085403442, + "epoch": 0.1972823351786613, + "kl_loss": 0.8260252475738525, + "loss_ib": 0.01829630509018898, + "step": 686 + }, + { + "ce_ib": 9.117568016052246, + "ce_orig": 0.6687161922454834, + "epoch": 0.1972823351786613, + "kl_loss": 0.8096814155578613, + "loss_ib": 0.01721438206732273, + "step": 686 + }, + { + "ce_ib": 8.085248947143555, + "ce_orig": 1.340101718902588, + "epoch": 0.1972823351786613, + "kl_loss": 0.7643517851829529, + "loss_ib": 0.015728766098618507, + "step": 686 + }, + { + "ce_ib": 11.919937133789062, + "ce_orig": 1.2236392498016357, + "epoch": 0.19756991875763893, + "kl_loss": 0.7394053339958191, + "loss_ib": 0.0193139910697937, + "step": 687 + }, + { + "ce_ib": 8.260920524597168, + "ce_orig": 0.9186649322509766, + "epoch": 0.19756991875763893, + "kl_loss": 0.7759698629379272, + "loss_ib": 0.016020620241761208, + "step": 687 + }, + { + "ce_ib": 8.8240966796875, + "ce_orig": 1.0625789165496826, + "epoch": 0.19756991875763893, + "kl_loss": 0.9100702404975891, + "loss_ib": 0.017924798652529716, + "step": 687 + }, + { + "ce_ib": 12.304547309875488, + "ce_orig": 0.936794638633728, + "epoch": 0.19756991875763893, + "kl_loss": 0.930564820766449, + "loss_ib": 0.021610194817185402, + "step": 687 + }, + { + "ce_ib": 8.338387489318848, + "ce_orig": 0.7846511602401733, + "epoch": 0.1978575023366166, + "kl_loss": 0.8309119939804077, + "loss_ib": 0.01664750836789608, + "step": 688 + }, + { + "ce_ib": 7.630675315856934, + "ce_orig": 0.6111834645271301, + "epoch": 0.1978575023366166, + "kl_loss": 0.9697389602661133, + "loss_ib": 0.017328064888715744, + "step": 688 + }, + { + "ce_ib": 11.70467472076416, + "ce_orig": 0.7941328287124634, + "epoch": 0.1978575023366166, + "kl_loss": 0.8729178309440613, + "loss_ib": 0.020433852449059486, + "step": 688 + }, + { + "ce_ib": 15.2159423828125, + "ce_orig": 1.038719654083252, + "epoch": 0.1978575023366166, + "kl_loss": 0.9375836253166199, + "loss_ib": 0.024591779336333275, + "step": 688 + }, + { + "ce_ib": 9.57465648651123, + "ce_orig": 0.7560713887214661, + "epoch": 0.1981450859155942, + "kl_loss": 0.8299669027328491, + "loss_ib": 0.017874324694275856, + "step": 689 + }, + { + "ce_ib": 7.711019515991211, + "ce_orig": 0.8740010261535645, + "epoch": 0.1981450859155942, + "kl_loss": 0.9161753058433533, + "loss_ib": 0.016872772946953773, + "step": 689 + }, + { + "ce_ib": 8.346826553344727, + "ce_orig": 0.6488251686096191, + "epoch": 0.1981450859155942, + "kl_loss": 0.9876278042793274, + "loss_ib": 0.018223104998469353, + "step": 689 + }, + { + "ce_ib": 11.599117279052734, + "ce_orig": 1.0851843357086182, + "epoch": 0.1981450859155942, + "kl_loss": 0.8547726273536682, + "loss_ib": 0.020146843045949936, + "step": 689 + }, + { + "epoch": 0.19843266949457186, + "grad_norm": 0.09532356262207031, + "learning_rate": 9.9670538511961e-06, + "loss": 0.8666, + "step": 690 + }, + { + "ce_ib": 9.944104194641113, + "ce_orig": 0.9654097557067871, + "epoch": 0.19843266949457186, + "kl_loss": 0.996139645576477, + "loss_ib": 0.01990550011396408, + "step": 690 + }, + { + "ce_ib": 8.787851333618164, + "ce_orig": 0.6779507994651794, + "epoch": 0.19843266949457186, + "kl_loss": 0.9373599886894226, + "loss_ib": 0.018161451444029808, + "step": 690 + }, + { + "ce_ib": 7.5119452476501465, + "ce_orig": 1.0238131284713745, + "epoch": 0.19843266949457186, + "kl_loss": 0.6883217096328735, + "loss_ib": 0.014395162463188171, + "step": 690 + }, + { + "ce_ib": 12.74599552154541, + "ce_orig": 1.3570128679275513, + "epoch": 0.19843266949457186, + "kl_loss": 0.8598670363426208, + "loss_ib": 0.021344665437936783, + "step": 690 + }, + { + "ce_ib": 9.191486358642578, + "ce_orig": 0.5052329301834106, + "epoch": 0.1987202530735495, + "kl_loss": 0.8542582988739014, + "loss_ib": 0.017734069377183914, + "step": 691 + }, + { + "ce_ib": 10.381093978881836, + "ce_orig": 1.0137697458267212, + "epoch": 0.1987202530735495, + "kl_loss": 0.769364595413208, + "loss_ib": 0.01807473972439766, + "step": 691 + }, + { + "ce_ib": 8.68432331085205, + "ce_orig": 0.7807736992835999, + "epoch": 0.1987202530735495, + "kl_loss": 0.8298584222793579, + "loss_ib": 0.0169829074293375, + "step": 691 + }, + { + "ce_ib": 6.291802883148193, + "ce_orig": 0.3723978102207184, + "epoch": 0.1987202530735495, + "kl_loss": 0.8000516891479492, + "loss_ib": 0.014292319305241108, + "step": 691 + }, + { + "ce_ib": 12.264336585998535, + "ce_orig": 1.4954090118408203, + "epoch": 0.19900783665252714, + "kl_loss": 0.796958327293396, + "loss_ib": 0.020233919844031334, + "step": 692 + }, + { + "ce_ib": 10.001005172729492, + "ce_orig": 1.2661001682281494, + "epoch": 0.19900783665252714, + "kl_loss": 0.8150477409362793, + "loss_ib": 0.018151482567191124, + "step": 692 + }, + { + "ce_ib": 9.576811790466309, + "ce_orig": 1.3062323331832886, + "epoch": 0.19900783665252714, + "kl_loss": 0.7767390012741089, + "loss_ib": 0.017344200983643532, + "step": 692 + }, + { + "ce_ib": 6.84829044342041, + "ce_orig": 0.5050943493843079, + "epoch": 0.19900783665252714, + "kl_loss": 0.8827191591262817, + "loss_ib": 0.015675483271479607, + "step": 692 + }, + { + "ce_ib": 8.917977333068848, + "ce_orig": 1.199506163597107, + "epoch": 0.1992954202315048, + "kl_loss": 0.6274911165237427, + "loss_ib": 0.015192887745797634, + "step": 693 + }, + { + "ce_ib": 7.1223883628845215, + "ce_orig": 0.7409626841545105, + "epoch": 0.1992954202315048, + "kl_loss": 0.7926149368286133, + "loss_ib": 0.015048536472022533, + "step": 693 + }, + { + "ce_ib": 11.097723007202148, + "ce_orig": 1.1110416650772095, + "epoch": 0.1992954202315048, + "kl_loss": 0.8221895694732666, + "loss_ib": 0.019319618120789528, + "step": 693 + }, + { + "ce_ib": 10.933575630187988, + "ce_orig": 0.7388496994972229, + "epoch": 0.1992954202315048, + "kl_loss": 1.0315768718719482, + "loss_ib": 0.02124934457242489, + "step": 693 + }, + { + "ce_ib": 8.662981986999512, + "ce_orig": 0.5169118642807007, + "epoch": 0.1995830038104824, + "kl_loss": 0.9072721004486084, + "loss_ib": 0.01773570291697979, + "step": 694 + }, + { + "ce_ib": 11.462176322937012, + "ce_orig": 1.4820420742034912, + "epoch": 0.1995830038104824, + "kl_loss": 0.8469343185424805, + "loss_ib": 0.019931519404053688, + "step": 694 + }, + { + "ce_ib": 8.445666313171387, + "ce_orig": 0.9313681721687317, + "epoch": 0.1995830038104824, + "kl_loss": 0.6418800354003906, + "loss_ib": 0.014864466153085232, + "step": 694 + }, + { + "ce_ib": 7.5244975090026855, + "ce_orig": 0.591560423374176, + "epoch": 0.1995830038104824, + "kl_loss": 0.8560516834259033, + "loss_ib": 0.01608501374721527, + "step": 694 + }, + { + "epoch": 0.19987058738946006, + "grad_norm": 0.10591301321983337, + "learning_rate": 9.966158410879148e-06, + "loss": 0.9255, + "step": 695 + }, + { + "ce_ib": 9.375509262084961, + "ce_orig": 1.00591242313385, + "epoch": 0.19987058738946006, + "kl_loss": 0.9248544573783875, + "loss_ib": 0.01862405426800251, + "step": 695 + }, + { + "ce_ib": 11.656967163085938, + "ce_orig": 1.2776415348052979, + "epoch": 0.19987058738946006, + "kl_loss": 0.7392134666442871, + "loss_ib": 0.019049102440476418, + "step": 695 + }, + { + "ce_ib": 10.279980659484863, + "ce_orig": 0.7536091804504395, + "epoch": 0.19987058738946006, + "kl_loss": 0.7704986333847046, + "loss_ib": 0.017984967678785324, + "step": 695 + }, + { + "ce_ib": 9.143665313720703, + "ce_orig": 0.7292720079421997, + "epoch": 0.19987058738946006, + "kl_loss": 0.8170077800750732, + "loss_ib": 0.017313743010163307, + "step": 695 + }, + { + "ce_ib": 10.739947319030762, + "ce_orig": 0.5883066058158875, + "epoch": 0.20015817096843772, + "kl_loss": 0.9453576803207397, + "loss_ib": 0.020193524658679962, + "step": 696 + }, + { + "ce_ib": 8.81908130645752, + "ce_orig": 1.073760986328125, + "epoch": 0.20015817096843772, + "kl_loss": 0.6506307125091553, + "loss_ib": 0.015325388871133327, + "step": 696 + }, + { + "ce_ib": 5.927511692047119, + "ce_orig": 0.6327902674674988, + "epoch": 0.20015817096843772, + "kl_loss": 0.8255484104156494, + "loss_ib": 0.01418299600481987, + "step": 696 + }, + { + "ce_ib": 8.285160064697266, + "ce_orig": 0.7552601099014282, + "epoch": 0.20015817096843772, + "kl_loss": 1.0531952381134033, + "loss_ib": 0.01881711184978485, + "step": 696 + }, + { + "ce_ib": 15.350908279418945, + "ce_orig": 1.7327600717544556, + "epoch": 0.20044575454741534, + "kl_loss": 0.9541932940483093, + "loss_ib": 0.024892840534448624, + "step": 697 + }, + { + "ce_ib": 5.672478199005127, + "ce_orig": 0.4321776032447815, + "epoch": 0.20044575454741534, + "kl_loss": 1.0703692436218262, + "loss_ib": 0.01637617126107216, + "step": 697 + }, + { + "ce_ib": 9.48302936553955, + "ce_orig": 1.0044292211532593, + "epoch": 0.20044575454741534, + "kl_loss": 0.827777087688446, + "loss_ib": 0.017760800197720528, + "step": 697 + }, + { + "ce_ib": 12.105079650878906, + "ce_orig": 0.9517088532447815, + "epoch": 0.20044575454741534, + "kl_loss": 0.8866432905197144, + "loss_ib": 0.0209715124219656, + "step": 697 + }, + { + "ce_ib": 11.073482513427734, + "ce_orig": 1.1429903507232666, + "epoch": 0.200733338126393, + "kl_loss": 0.6459304690361023, + "loss_ib": 0.01753278635442257, + "step": 698 + }, + { + "ce_ib": 10.372661590576172, + "ce_orig": 0.9689992070198059, + "epoch": 0.200733338126393, + "kl_loss": 0.8759998679161072, + "loss_ib": 0.01913265883922577, + "step": 698 + }, + { + "ce_ib": 11.510743141174316, + "ce_orig": 0.5491771101951599, + "epoch": 0.200733338126393, + "kl_loss": 0.8505983352661133, + "loss_ib": 0.020016726106405258, + "step": 698 + }, + { + "ce_ib": 6.4225029945373535, + "ce_orig": 0.5128731727600098, + "epoch": 0.200733338126393, + "kl_loss": 0.6717511415481567, + "loss_ib": 0.013140014372766018, + "step": 698 + }, + { + "ce_ib": 13.731460571289062, + "ce_orig": 0.9959490895271301, + "epoch": 0.20102092170537061, + "kl_loss": 0.8257856369018555, + "loss_ib": 0.02198931574821472, + "step": 699 + }, + { + "ce_ib": 5.540718078613281, + "ce_orig": 0.5061084032058716, + "epoch": 0.20102092170537061, + "kl_loss": 0.750801146030426, + "loss_ib": 0.013048729859292507, + "step": 699 + }, + { + "ce_ib": 7.21422004699707, + "ce_orig": 0.7217543721199036, + "epoch": 0.20102092170537061, + "kl_loss": 0.7603013515472412, + "loss_ib": 0.014817233197391033, + "step": 699 + }, + { + "ce_ib": 9.123298645019531, + "ce_orig": 0.6771790981292725, + "epoch": 0.20102092170537061, + "kl_loss": 0.8053810596466064, + "loss_ib": 0.01717710867524147, + "step": 699 + }, + { + "epoch": 0.20130850528434827, + "grad_norm": 0.10600654780864716, + "learning_rate": 9.965251005933915e-06, + "loss": 0.8397, + "step": 700 + }, + { + "ce_ib": 6.327905654907227, + "ce_orig": 0.5939226746559143, + "epoch": 0.20130850528434827, + "kl_loss": 0.6826160550117493, + "loss_ib": 0.013154065236449242, + "step": 700 + }, + { + "ce_ib": 9.836183547973633, + "ce_orig": 0.9476636648178101, + "epoch": 0.20130850528434827, + "kl_loss": 0.7302706837654114, + "loss_ib": 0.01713889092206955, + "step": 700 + }, + { + "ce_ib": 11.0946683883667, + "ce_orig": 1.1720821857452393, + "epoch": 0.20130850528434827, + "kl_loss": 0.8669959306716919, + "loss_ib": 0.019764628261327744, + "step": 700 + }, + { + "ce_ib": 8.813426971435547, + "ce_orig": 1.1299982070922852, + "epoch": 0.20130850528434827, + "kl_loss": 0.6991374492645264, + "loss_ib": 0.015804801136255264, + "step": 700 + }, + { + "ce_ib": 8.754616737365723, + "ce_orig": 1.024243950843811, + "epoch": 0.20159608886332592, + "kl_loss": 0.6542124152183533, + "loss_ib": 0.01529674045741558, + "step": 701 + }, + { + "ce_ib": 7.708653450012207, + "ce_orig": 0.6619385480880737, + "epoch": 0.20159608886332592, + "kl_loss": 0.6982452273368835, + "loss_ib": 0.014691106043756008, + "step": 701 + }, + { + "ce_ib": 12.858272552490234, + "ce_orig": 1.6826236248016357, + "epoch": 0.20159608886332592, + "kl_loss": 0.752007007598877, + "loss_ib": 0.020378341898322105, + "step": 701 + }, + { + "ce_ib": 8.944458961486816, + "ce_orig": 1.0155476331710815, + "epoch": 0.20159608886332592, + "kl_loss": 0.7168185710906982, + "loss_ib": 0.016112644225358963, + "step": 701 + }, + { + "ce_ib": 13.010807991027832, + "ce_orig": 0.88566654920578, + "epoch": 0.20188367244230354, + "kl_loss": 0.6652591228485107, + "loss_ib": 0.01966339908540249, + "step": 702 + }, + { + "ce_ib": 10.586206436157227, + "ce_orig": 1.103887677192688, + "epoch": 0.20188367244230354, + "kl_loss": 0.8720508813858032, + "loss_ib": 0.019306715577840805, + "step": 702 + }, + { + "ce_ib": 10.615009307861328, + "ce_orig": 0.6724156141281128, + "epoch": 0.20188367244230354, + "kl_loss": 0.9320215582847595, + "loss_ib": 0.01993522420525551, + "step": 702 + }, + { + "ce_ib": 8.414154052734375, + "ce_orig": 0.5818290710449219, + "epoch": 0.20188367244230354, + "kl_loss": 0.7827242612838745, + "loss_ib": 0.016241395846009254, + "step": 702 + }, + { + "ce_ib": 12.710061073303223, + "ce_orig": 1.1648833751678467, + "epoch": 0.2021712560212812, + "kl_loss": 0.8374737501144409, + "loss_ib": 0.021084798499941826, + "step": 703 + }, + { + "ce_ib": 7.598687171936035, + "ce_orig": 0.5311350226402283, + "epoch": 0.2021712560212812, + "kl_loss": 0.6079769134521484, + "loss_ib": 0.013678456656634808, + "step": 703 + }, + { + "ce_ib": 9.277694702148438, + "ce_orig": 0.9339279532432556, + "epoch": 0.2021712560212812, + "kl_loss": 0.8527544736862183, + "loss_ib": 0.01780523918569088, + "step": 703 + }, + { + "ce_ib": 10.174999237060547, + "ce_orig": 1.0432989597320557, + "epoch": 0.2021712560212812, + "kl_loss": 0.7690742015838623, + "loss_ib": 0.0178657416254282, + "step": 703 + }, + { + "ce_ib": 14.90958309173584, + "ce_orig": 1.5392428636550903, + "epoch": 0.20245883960025882, + "kl_loss": 0.7166196703910828, + "loss_ib": 0.022075779736042023, + "step": 704 + }, + { + "ce_ib": 10.524937629699707, + "ce_orig": 0.6721472144126892, + "epoch": 0.20245883960025882, + "kl_loss": 0.9726794958114624, + "loss_ib": 0.02025173231959343, + "step": 704 + }, + { + "ce_ib": 5.51300048828125, + "ce_orig": 0.6144857406616211, + "epoch": 0.20245883960025882, + "kl_loss": 0.5522742867469788, + "loss_ib": 0.011035742238163948, + "step": 704 + }, + { + "ce_ib": 10.596753120422363, + "ce_orig": 1.102412223815918, + "epoch": 0.20245883960025882, + "kl_loss": 0.6392167806625366, + "loss_ib": 0.01698892004787922, + "step": 704 + }, + { + "epoch": 0.20274642317923647, + "grad_norm": 0.10856325924396515, + "learning_rate": 9.96433163854655e-06, + "loss": 0.8989, + "step": 705 + }, + { + "ce_ib": 8.712186813354492, + "ce_orig": 0.788031280040741, + "epoch": 0.20274642317923647, + "kl_loss": 1.01998770236969, + "loss_ib": 0.018912063911557198, + "step": 705 + }, + { + "ce_ib": 7.393013954162598, + "ce_orig": 0.6134517788887024, + "epoch": 0.20274642317923647, + "kl_loss": 0.6545846462249756, + "loss_ib": 0.013938860967755318, + "step": 705 + }, + { + "ce_ib": 8.60453987121582, + "ce_orig": 0.6714913249015808, + "epoch": 0.20274642317923647, + "kl_loss": 0.8573524951934814, + "loss_ib": 0.017178066074848175, + "step": 705 + }, + { + "ce_ib": 8.413896560668945, + "ce_orig": 0.7840592265129089, + "epoch": 0.20274642317923647, + "kl_loss": 0.671722412109375, + "loss_ib": 0.015131120570003986, + "step": 705 + }, + { + "ce_ib": 8.781254768371582, + "ce_orig": 1.1526238918304443, + "epoch": 0.20303400675821412, + "kl_loss": 0.579412579536438, + "loss_ib": 0.014575380831956863, + "step": 706 + }, + { + "ce_ib": 9.846917152404785, + "ce_orig": 0.8160407543182373, + "epoch": 0.20303400675821412, + "kl_loss": 0.7465524673461914, + "loss_ib": 0.017312441021203995, + "step": 706 + }, + { + "ce_ib": 7.303924560546875, + "ce_orig": 0.45040014386177063, + "epoch": 0.20303400675821412, + "kl_loss": 0.5369355082511902, + "loss_ib": 0.012673280201852322, + "step": 706 + }, + { + "ce_ib": 9.779562950134277, + "ce_orig": 0.7343361973762512, + "epoch": 0.20303400675821412, + "kl_loss": 0.6700093150138855, + "loss_ib": 0.016479656100273132, + "step": 706 + }, + { + "ce_ib": 13.944967269897461, + "ce_orig": 1.2236454486846924, + "epoch": 0.20332159033719174, + "kl_loss": 0.7157278060913086, + "loss_ib": 0.021102245897054672, + "step": 707 + }, + { + "ce_ib": 11.342824935913086, + "ce_orig": 1.2302511930465698, + "epoch": 0.20332159033719174, + "kl_loss": 0.6444662809371948, + "loss_ib": 0.01778748631477356, + "step": 707 + }, + { + "ce_ib": 12.286307334899902, + "ce_orig": 1.2609407901763916, + "epoch": 0.20332159033719174, + "kl_loss": 0.890746533870697, + "loss_ib": 0.021193772554397583, + "step": 707 + }, + { + "ce_ib": 8.960893630981445, + "ce_orig": 0.8889079093933105, + "epoch": 0.20332159033719174, + "kl_loss": 0.6222097873687744, + "loss_ib": 0.015182990580797195, + "step": 707 + }, + { + "ce_ib": 8.693357467651367, + "ce_orig": 0.9998373985290527, + "epoch": 0.2036091739161694, + "kl_loss": 0.615609884262085, + "loss_ib": 0.014849456027150154, + "step": 708 + }, + { + "ce_ib": 8.566075325012207, + "ce_orig": 0.7132963538169861, + "epoch": 0.2036091739161694, + "kl_loss": 0.6065419316291809, + "loss_ib": 0.014631494879722595, + "step": 708 + }, + { + "ce_ib": 10.03363037109375, + "ce_orig": 0.62996906042099, + "epoch": 0.2036091739161694, + "kl_loss": 0.7953388690948486, + "loss_ib": 0.017987018451094627, + "step": 708 + }, + { + "ce_ib": 4.836100101470947, + "ce_orig": 0.8530847430229187, + "epoch": 0.2036091739161694, + "kl_loss": 0.5656656622886658, + "loss_ib": 0.01049275603145361, + "step": 708 + }, + { + "ce_ib": 18.170930862426758, + "ce_orig": 2.1947522163391113, + "epoch": 0.20389675749514702, + "kl_loss": 0.7079334259033203, + "loss_ib": 0.0252502653747797, + "step": 709 + }, + { + "ce_ib": 7.828283309936523, + "ce_orig": 0.9819263219833374, + "epoch": 0.20389675749514702, + "kl_loss": 0.6801222562789917, + "loss_ib": 0.014629505574703217, + "step": 709 + }, + { + "ce_ib": 10.20206069946289, + "ce_orig": 1.1113237142562866, + "epoch": 0.20389675749514702, + "kl_loss": 0.7220378518104553, + "loss_ib": 0.01742243953049183, + "step": 709 + }, + { + "ce_ib": 10.351544380187988, + "ce_orig": 1.0983593463897705, + "epoch": 0.20389675749514702, + "kl_loss": 0.5909432172775269, + "loss_ib": 0.01626097597181797, + "step": 709 + }, + { + "epoch": 0.20418434107412467, + "grad_norm": 0.08941768109798431, + "learning_rate": 9.96340031093202e-06, + "loss": 0.9056, + "step": 710 + }, + { + "ce_ib": 11.825169563293457, + "ce_orig": 0.9450059533119202, + "epoch": 0.20418434107412467, + "kl_loss": 0.6044723391532898, + "loss_ib": 0.017869891598820686, + "step": 710 + }, + { + "ce_ib": 7.037207126617432, + "ce_orig": 0.7366377711296082, + "epoch": 0.20418434107412467, + "kl_loss": 0.6832977533340454, + "loss_ib": 0.013870184309780598, + "step": 710 + }, + { + "ce_ib": 5.916614055633545, + "ce_orig": 0.4967573285102844, + "epoch": 0.20418434107412467, + "kl_loss": 0.7460091710090637, + "loss_ib": 0.013376705348491669, + "step": 710 + }, + { + "ce_ib": 6.968353271484375, + "ce_orig": 0.7892439961433411, + "epoch": 0.20418434107412467, + "kl_loss": 0.6605713963508606, + "loss_ib": 0.013574067503213882, + "step": 710 + }, + { + "ce_ib": 6.03911018371582, + "ce_orig": 0.5401313304901123, + "epoch": 0.20447192465310232, + "kl_loss": 0.6317548155784607, + "loss_ib": 0.012356657534837723, + "step": 711 + }, + { + "ce_ib": 14.128073692321777, + "ce_orig": 1.5721606016159058, + "epoch": 0.20447192465310232, + "kl_loss": 0.6152846813201904, + "loss_ib": 0.02028091810643673, + "step": 711 + }, + { + "ce_ib": 13.067774772644043, + "ce_orig": 1.452383279800415, + "epoch": 0.20447192465310232, + "kl_loss": 0.6436014175415039, + "loss_ib": 0.019503789022564888, + "step": 711 + }, + { + "ce_ib": 8.091301918029785, + "ce_orig": 0.7273240685462952, + "epoch": 0.20447192465310232, + "kl_loss": 0.6026842594146729, + "loss_ib": 0.014118144288659096, + "step": 711 + }, + { + "ce_ib": 8.550193786621094, + "ce_orig": 0.7258903980255127, + "epoch": 0.20475950823207995, + "kl_loss": 0.6672005653381348, + "loss_ib": 0.015222198329865932, + "step": 712 + }, + { + "ce_ib": 6.334939002990723, + "ce_orig": 0.5934350490570068, + "epoch": 0.20475950823207995, + "kl_loss": 0.5275875329971313, + "loss_ib": 0.011610814370214939, + "step": 712 + }, + { + "ce_ib": 7.126665115356445, + "ce_orig": 0.6666408181190491, + "epoch": 0.20475950823207995, + "kl_loss": 0.5349841117858887, + "loss_ib": 0.01247650571167469, + "step": 712 + }, + { + "ce_ib": 10.216915130615234, + "ce_orig": 0.9188041090965271, + "epoch": 0.20475950823207995, + "kl_loss": 0.7570939064025879, + "loss_ib": 0.017787855118513107, + "step": 712 + }, + { + "ce_ib": 9.504396438598633, + "ce_orig": 0.8975003957748413, + "epoch": 0.2050470918110576, + "kl_loss": 0.6097875833511353, + "loss_ib": 0.01560227107256651, + "step": 713 + }, + { + "ce_ib": 9.296903610229492, + "ce_orig": 1.331569790840149, + "epoch": 0.2050470918110576, + "kl_loss": 0.6761667728424072, + "loss_ib": 0.01605857163667679, + "step": 713 + }, + { + "ce_ib": 7.583889961242676, + "ce_orig": 0.712594211101532, + "epoch": 0.2050470918110576, + "kl_loss": 0.5781969428062439, + "loss_ib": 0.013365860097110271, + "step": 713 + }, + { + "ce_ib": 11.214754104614258, + "ce_orig": 1.008966088294983, + "epoch": 0.2050470918110576, + "kl_loss": 0.5274929404258728, + "loss_ib": 0.016489684581756592, + "step": 713 + }, + { + "ce_ib": 13.248461723327637, + "ce_orig": 1.7106783390045166, + "epoch": 0.20533467539003522, + "kl_loss": 0.4616009593009949, + "loss_ib": 0.017864469438791275, + "step": 714 + }, + { + "ce_ib": 10.139266967773438, + "ce_orig": 0.7416722178459167, + "epoch": 0.20533467539003522, + "kl_loss": 0.49146589636802673, + "loss_ib": 0.015053926035761833, + "step": 714 + }, + { + "ce_ib": 6.59840202331543, + "ce_orig": 0.7029027342796326, + "epoch": 0.20533467539003522, + "kl_loss": 0.5324534177780151, + "loss_ib": 0.011922935955226421, + "step": 714 + }, + { + "ce_ib": 8.762123107910156, + "ce_orig": 0.8172003030776978, + "epoch": 0.20533467539003522, + "kl_loss": 0.5413413047790527, + "loss_ib": 0.0141755361109972, + "step": 714 + }, + { + "epoch": 0.20562225896901287, + "grad_norm": 0.09573056548833847, + "learning_rate": 9.962457025334114e-06, + "loss": 0.8855, + "step": 715 + }, + { + "ce_ib": 8.281875610351562, + "ce_orig": 0.8879901170730591, + "epoch": 0.20562225896901287, + "kl_loss": 0.47865283489227295, + "loss_ib": 0.01306840404868126, + "step": 715 + }, + { + "ce_ib": 7.326986789703369, + "ce_orig": 0.7315509915351868, + "epoch": 0.20562225896901287, + "kl_loss": 0.5715488195419312, + "loss_ib": 0.01304247509688139, + "step": 715 + }, + { + "ce_ib": 9.936646461486816, + "ce_orig": 0.5313572883605957, + "epoch": 0.20562225896901287, + "kl_loss": 0.7098532915115356, + "loss_ib": 0.01703517884016037, + "step": 715 + }, + { + "ce_ib": 8.325387954711914, + "ce_orig": 1.0558624267578125, + "epoch": 0.20562225896901287, + "kl_loss": 0.510680079460144, + "loss_ib": 0.013432187959551811, + "step": 715 + }, + { + "ce_ib": 6.32294225692749, + "ce_orig": 0.755020260810852, + "epoch": 0.2059098425479905, + "kl_loss": 0.4962891638278961, + "loss_ib": 0.011285834014415741, + "step": 716 + }, + { + "ce_ib": 6.709957122802734, + "ce_orig": 0.7449535131454468, + "epoch": 0.2059098425479905, + "kl_loss": 0.5581060647964478, + "loss_ib": 0.012291017919778824, + "step": 716 + }, + { + "ce_ib": 11.16142749786377, + "ce_orig": 1.2088764905929565, + "epoch": 0.2059098425479905, + "kl_loss": 0.5308176875114441, + "loss_ib": 0.016469605267047882, + "step": 716 + }, + { + "ce_ib": 8.455696105957031, + "ce_orig": 0.8597549200057983, + "epoch": 0.2059098425479905, + "kl_loss": 0.5310311317443848, + "loss_ib": 0.013766007497906685, + "step": 716 + }, + { + "ce_ib": 9.843843460083008, + "ce_orig": 0.9751378297805786, + "epoch": 0.20619742612696815, + "kl_loss": 0.7993011474609375, + "loss_ib": 0.017836853861808777, + "step": 717 + }, + { + "ce_ib": 8.898174285888672, + "ce_orig": 0.609527051448822, + "epoch": 0.20619742612696815, + "kl_loss": 0.9439896941184998, + "loss_ib": 0.018338071182370186, + "step": 717 + }, + { + "ce_ib": 12.661697387695312, + "ce_orig": 1.3917475938796997, + "epoch": 0.20619742612696815, + "kl_loss": 0.6134680509567261, + "loss_ib": 0.01879637874662876, + "step": 717 + }, + { + "ce_ib": 4.8259477615356445, + "ce_orig": 0.48925209045410156, + "epoch": 0.20619742612696815, + "kl_loss": 0.5553791522979736, + "loss_ib": 0.010379738174378872, + "step": 717 + }, + { + "ce_ib": 12.544927597045898, + "ce_orig": 0.7721540331840515, + "epoch": 0.2064850097059458, + "kl_loss": 0.4881455898284912, + "loss_ib": 0.01742638275027275, + "step": 718 + }, + { + "ce_ib": 10.139900207519531, + "ce_orig": 0.9012062549591064, + "epoch": 0.2064850097059458, + "kl_loss": 0.637915313243866, + "loss_ib": 0.016519052907824516, + "step": 718 + }, + { + "ce_ib": 10.471162796020508, + "ce_orig": 1.1879448890686035, + "epoch": 0.2064850097059458, + "kl_loss": 0.8380963802337646, + "loss_ib": 0.018852125853300095, + "step": 718 + }, + { + "ce_ib": 9.023296356201172, + "ce_orig": 0.45530980825424194, + "epoch": 0.2064850097059458, + "kl_loss": 0.6838054656982422, + "loss_ib": 0.015861351042985916, + "step": 718 + }, + { + "ce_ib": 13.007994651794434, + "ce_orig": 1.3200677633285522, + "epoch": 0.20677259328492342, + "kl_loss": 0.5242291688919067, + "loss_ib": 0.01825028657913208, + "step": 719 + }, + { + "ce_ib": 12.970458030700684, + "ce_orig": 1.702121376991272, + "epoch": 0.20677259328492342, + "kl_loss": 0.502326250076294, + "loss_ib": 0.01799372024834156, + "step": 719 + }, + { + "ce_ib": 5.795266151428223, + "ce_orig": 0.5755612254142761, + "epoch": 0.20677259328492342, + "kl_loss": 0.5967477560043335, + "loss_ib": 0.011762742884457111, + "step": 719 + }, + { + "ce_ib": 10.548118591308594, + "ce_orig": 0.9648749828338623, + "epoch": 0.20677259328492342, + "kl_loss": 0.5070229768753052, + "loss_ib": 0.015618347562849522, + "step": 719 + }, + { + "epoch": 0.20706017686390107, + "grad_norm": 0.09754003584384918, + "learning_rate": 9.961501784025423e-06, + "loss": 0.8849, + "step": 720 + }, + { + "ce_ib": 6.8866190910339355, + "ce_orig": 0.45583146810531616, + "epoch": 0.20706017686390107, + "kl_loss": 0.4389684200286865, + "loss_ib": 0.011276302859187126, + "step": 720 + }, + { + "ce_ib": 12.2448148727417, + "ce_orig": 1.3936138153076172, + "epoch": 0.20706017686390107, + "kl_loss": 0.42197367548942566, + "loss_ib": 0.01646455191075802, + "step": 720 + }, + { + "ce_ib": 8.876269340515137, + "ce_orig": 0.5300930738449097, + "epoch": 0.20706017686390107, + "kl_loss": 0.4920913577079773, + "loss_ib": 0.013797182589769363, + "step": 720 + }, + { + "ce_ib": 7.045018672943115, + "ce_orig": 0.7694444060325623, + "epoch": 0.20706017686390107, + "kl_loss": 0.502578854560852, + "loss_ib": 0.012070806697010994, + "step": 720 + }, + { + "ce_ib": 8.191524505615234, + "ce_orig": 0.5949411988258362, + "epoch": 0.2073477604428787, + "kl_loss": 0.6976209878921509, + "loss_ib": 0.01516773458570242, + "step": 721 + }, + { + "ce_ib": 8.799589157104492, + "ce_orig": 0.6730172634124756, + "epoch": 0.2073477604428787, + "kl_loss": 0.6129693388938904, + "loss_ib": 0.01492928247898817, + "step": 721 + }, + { + "ce_ib": 9.574918746948242, + "ce_orig": 1.5401928424835205, + "epoch": 0.2073477604428787, + "kl_loss": 0.4680205285549164, + "loss_ib": 0.014255124144256115, + "step": 721 + }, + { + "ce_ib": 11.500478744506836, + "ce_orig": 1.1516295671463013, + "epoch": 0.2073477604428787, + "kl_loss": 0.5712054967880249, + "loss_ib": 0.017212534323334694, + "step": 721 + }, + { + "ce_ib": 6.9377641677856445, + "ce_orig": 0.47552016377449036, + "epoch": 0.20763534402185635, + "kl_loss": 0.5818679332733154, + "loss_ib": 0.01275644265115261, + "step": 722 + }, + { + "ce_ib": 7.533665180206299, + "ce_orig": 0.8571917414665222, + "epoch": 0.20763534402185635, + "kl_loss": 0.4062355160713196, + "loss_ib": 0.011596020311117172, + "step": 722 + }, + { + "ce_ib": 4.523435592651367, + "ce_orig": 0.5144174098968506, + "epoch": 0.20763534402185635, + "kl_loss": 0.46129077672958374, + "loss_ib": 0.009136342443525791, + "step": 722 + }, + { + "ce_ib": 5.738825798034668, + "ce_orig": 0.3778877556324005, + "epoch": 0.20763534402185635, + "kl_loss": 0.4390355944633484, + "loss_ib": 0.010129181668162346, + "step": 722 + }, + { + "ce_ib": 6.621108055114746, + "ce_orig": 0.5974579453468323, + "epoch": 0.207922927600834, + "kl_loss": 0.444467157125473, + "loss_ib": 0.011065779253840446, + "step": 723 + }, + { + "ce_ib": 9.774152755737305, + "ce_orig": 0.6996477246284485, + "epoch": 0.207922927600834, + "kl_loss": 0.605032742023468, + "loss_ib": 0.01582447998225689, + "step": 723 + }, + { + "ce_ib": 8.50704574584961, + "ce_orig": 0.8165543079376221, + "epoch": 0.207922927600834, + "kl_loss": 0.6351085901260376, + "loss_ib": 0.014858131296932697, + "step": 723 + }, + { + "ce_ib": 5.1617231369018555, + "ce_orig": 0.5427976250648499, + "epoch": 0.207922927600834, + "kl_loss": 0.4432010054588318, + "loss_ib": 0.009593733586370945, + "step": 723 + }, + { + "ce_ib": 10.265229225158691, + "ce_orig": 0.3247712552547455, + "epoch": 0.20821051117981162, + "kl_loss": 0.9256395697593689, + "loss_ib": 0.019521623849868774, + "step": 724 + }, + { + "ce_ib": 12.566752433776855, + "ce_orig": 0.9292450547218323, + "epoch": 0.20821051117981162, + "kl_loss": 0.5198833346366882, + "loss_ib": 0.017765585333108902, + "step": 724 + }, + { + "ce_ib": 4.4857707023620605, + "ce_orig": 0.13291123509407043, + "epoch": 0.20821051117981162, + "kl_loss": 0.8171424865722656, + "loss_ib": 0.012657195329666138, + "step": 724 + }, + { + "ce_ib": 11.237386703491211, + "ce_orig": 1.4978471994400024, + "epoch": 0.20821051117981162, + "kl_loss": 0.6373554468154907, + "loss_ib": 0.01761094108223915, + "step": 724 + }, + { + "epoch": 0.20849809475878928, + "grad_norm": 0.08204614371061325, + "learning_rate": 9.960534589307342e-06, + "loss": 0.9127, + "step": 725 + }, + { + "ce_ib": 6.750695705413818, + "ce_orig": 0.7500053644180298, + "epoch": 0.20849809475878928, + "kl_loss": 0.44563400745391846, + "loss_ib": 0.0112070357427001, + "step": 725 + }, + { + "ce_ib": 7.751323223114014, + "ce_orig": 1.0083248615264893, + "epoch": 0.20849809475878928, + "kl_loss": 0.41058549284935, + "loss_ib": 0.011857178062200546, + "step": 725 + }, + { + "ce_ib": 8.529701232910156, + "ce_orig": 0.73545902967453, + "epoch": 0.20849809475878928, + "kl_loss": 0.5081138014793396, + "loss_ib": 0.01361083984375, + "step": 725 + }, + { + "ce_ib": 6.2612080574035645, + "ce_orig": 0.5796510577201843, + "epoch": 0.20849809475878928, + "kl_loss": 0.39641040563583374, + "loss_ib": 0.010225312784314156, + "step": 725 + }, + { + "ce_ib": 6.802225112915039, + "ce_orig": 0.6491565704345703, + "epoch": 0.2087856783377669, + "kl_loss": 0.40076354146003723, + "loss_ib": 0.010809860192239285, + "step": 726 + }, + { + "ce_ib": 10.529885292053223, + "ce_orig": 0.8254870772361755, + "epoch": 0.2087856783377669, + "kl_loss": 0.5448965430259705, + "loss_ib": 0.015978850424289703, + "step": 726 + }, + { + "ce_ib": 12.080382347106934, + "ce_orig": 1.3747988939285278, + "epoch": 0.2087856783377669, + "kl_loss": 0.607007622718811, + "loss_ib": 0.018150458112359047, + "step": 726 + }, + { + "ce_ib": 13.895086288452148, + "ce_orig": 1.569737195968628, + "epoch": 0.2087856783377669, + "kl_loss": 0.41842395067214966, + "loss_ib": 0.018079325556755066, + "step": 726 + }, + { + "ce_ib": 10.388771057128906, + "ce_orig": 0.8246784806251526, + "epoch": 0.20907326191674455, + "kl_loss": 0.6921413540840149, + "loss_ib": 0.017310185357928276, + "step": 727 + }, + { + "ce_ib": 10.750786781311035, + "ce_orig": 0.6747448444366455, + "epoch": 0.20907326191674455, + "kl_loss": 0.46249350905418396, + "loss_ib": 0.015375722199678421, + "step": 727 + }, + { + "ce_ib": 8.939618110656738, + "ce_orig": 0.611909806728363, + "epoch": 0.20907326191674455, + "kl_loss": 0.4692227244377136, + "loss_ib": 0.013631845824420452, + "step": 727 + }, + { + "ce_ib": 5.823955059051514, + "ce_orig": 0.5695940852165222, + "epoch": 0.20907326191674455, + "kl_loss": 0.4087258577346802, + "loss_ib": 0.00991121307015419, + "step": 727 + }, + { + "ce_ib": 7.428273677825928, + "ce_orig": 0.6111478805541992, + "epoch": 0.2093608454957222, + "kl_loss": 0.4407083988189697, + "loss_ib": 0.01183535810559988, + "step": 728 + }, + { + "ce_ib": 8.432186126708984, + "ce_orig": 0.7240220308303833, + "epoch": 0.2093608454957222, + "kl_loss": 0.5889390707015991, + "loss_ib": 0.014321576803922653, + "step": 728 + }, + { + "ce_ib": 9.506328582763672, + "ce_orig": 0.885880708694458, + "epoch": 0.2093608454957222, + "kl_loss": 0.39755725860595703, + "loss_ib": 0.013481900095939636, + "step": 728 + }, + { + "ce_ib": 10.747995376586914, + "ce_orig": 0.9302851557731628, + "epoch": 0.2093608454957222, + "kl_loss": 0.6458637118339539, + "loss_ib": 0.01720663346350193, + "step": 728 + }, + { + "ce_ib": 8.671708106994629, + "ce_orig": 0.6951517462730408, + "epoch": 0.20964842907469983, + "kl_loss": 0.547339916229248, + "loss_ib": 0.014145107008516788, + "step": 729 + }, + { + "ce_ib": 9.09277629852295, + "ce_orig": 0.6532163023948669, + "epoch": 0.20964842907469983, + "kl_loss": 0.4491899609565735, + "loss_ib": 0.013584675267338753, + "step": 729 + }, + { + "ce_ib": 8.521883964538574, + "ce_orig": 0.26346156001091003, + "epoch": 0.20964842907469983, + "kl_loss": 0.7473446130752563, + "loss_ib": 0.0159953311085701, + "step": 729 + }, + { + "ce_ib": 8.46525764465332, + "ce_orig": 0.761343777179718, + "epoch": 0.20964842907469983, + "kl_loss": 0.49551889300346375, + "loss_ib": 0.013420446775853634, + "step": 729 + }, + { + "epoch": 0.20993601265367748, + "grad_norm": 0.10517257452011108, + "learning_rate": 9.959555443510074e-06, + "loss": 0.8883, + "step": 730 + }, + { + "ce_ib": 9.539706230163574, + "ce_orig": 0.8811664581298828, + "epoch": 0.20993601265367748, + "kl_loss": 0.4699278473854065, + "loss_ib": 0.014238984324038029, + "step": 730 + }, + { + "ce_ib": 10.79475212097168, + "ce_orig": 0.9388590455055237, + "epoch": 0.20993601265367748, + "kl_loss": 1.093339443206787, + "loss_ib": 0.0217281486839056, + "step": 730 + }, + { + "ce_ib": 11.920876502990723, + "ce_orig": 1.155131220817566, + "epoch": 0.20993601265367748, + "kl_loss": 0.49514299631118774, + "loss_ib": 0.016872305423021317, + "step": 730 + }, + { + "ce_ib": 12.770062446594238, + "ce_orig": 1.2717667818069458, + "epoch": 0.20993601265367748, + "kl_loss": 0.467684268951416, + "loss_ib": 0.017446905374526978, + "step": 730 + }, + { + "ce_ib": 9.915130615234375, + "ce_orig": 0.7397444248199463, + "epoch": 0.2102235962326551, + "kl_loss": 0.5744942426681519, + "loss_ib": 0.01566007360816002, + "step": 731 + }, + { + "ce_ib": 11.337221145629883, + "ce_orig": 0.6278934478759766, + "epoch": 0.2102235962326551, + "kl_loss": 0.47471189498901367, + "loss_ib": 0.0160843413323164, + "step": 731 + }, + { + "ce_ib": 6.727563858032227, + "ce_orig": 0.5554884076118469, + "epoch": 0.2102235962326551, + "kl_loss": 0.38505086302757263, + "loss_ib": 0.01057807169854641, + "step": 731 + }, + { + "ce_ib": 4.216444492340088, + "ce_orig": 0.15175974369049072, + "epoch": 0.2102235962326551, + "kl_loss": 0.7317667007446289, + "loss_ib": 0.011534111574292183, + "step": 731 + }, + { + "ce_ib": 6.452449798583984, + "ce_orig": 0.48745986819267273, + "epoch": 0.21051117981163275, + "kl_loss": 0.4054802656173706, + "loss_ib": 0.0105072520673275, + "step": 732 + }, + { + "ce_ib": 7.071081161499023, + "ce_orig": 0.9309906363487244, + "epoch": 0.21051117981163275, + "kl_loss": 0.37749940156936646, + "loss_ib": 0.010846075601875782, + "step": 732 + }, + { + "ce_ib": 11.04909896850586, + "ce_orig": 0.874702513217926, + "epoch": 0.21051117981163275, + "kl_loss": 0.4390341639518738, + "loss_ib": 0.015439440496265888, + "step": 732 + }, + { + "ce_ib": 7.799300193786621, + "ce_orig": 0.8155895471572876, + "epoch": 0.21051117981163275, + "kl_loss": 0.4740564823150635, + "loss_ib": 0.01253986544907093, + "step": 732 + }, + { + "ce_ib": 7.702383995056152, + "ce_orig": 0.5887701511383057, + "epoch": 0.2107987633906104, + "kl_loss": 0.5023674964904785, + "loss_ib": 0.01272605825215578, + "step": 733 + }, + { + "ce_ib": 8.737090110778809, + "ce_orig": 0.8836251497268677, + "epoch": 0.2107987633906104, + "kl_loss": 0.48343145847320557, + "loss_ib": 0.013571404851973057, + "step": 733 + }, + { + "ce_ib": 9.702954292297363, + "ce_orig": 0.9348666071891785, + "epoch": 0.2107987633906104, + "kl_loss": 0.7717263698577881, + "loss_ib": 0.017420217394828796, + "step": 733 + }, + { + "ce_ib": 9.321721076965332, + "ce_orig": 0.6164664626121521, + "epoch": 0.2107987633906104, + "kl_loss": 0.5251142382621765, + "loss_ib": 0.014572863467037678, + "step": 733 + }, + { + "ce_ib": 10.808871269226074, + "ce_orig": 0.7415775656700134, + "epoch": 0.21108634696958803, + "kl_loss": 0.5616943836212158, + "loss_ib": 0.01642581634223461, + "step": 734 + }, + { + "ce_ib": 9.013411521911621, + "ce_orig": 0.6960151195526123, + "epoch": 0.21108634696958803, + "kl_loss": 0.45591843128204346, + "loss_ib": 0.013572595082223415, + "step": 734 + }, + { + "ce_ib": 10.70376205444336, + "ce_orig": 0.8505563735961914, + "epoch": 0.21108634696958803, + "kl_loss": 0.4155931770801544, + "loss_ib": 0.014859694056212902, + "step": 734 + }, + { + "ce_ib": 10.576190948486328, + "ce_orig": 1.013388752937317, + "epoch": 0.21108634696958803, + "kl_loss": 0.5600894689559937, + "loss_ib": 0.016177086159586906, + "step": 734 + }, + { + "epoch": 0.21137393054856568, + "grad_norm": 0.10216815024614334, + "learning_rate": 9.958564348992604e-06, + "loss": 0.9112, + "step": 735 + }, + { + "ce_ib": 8.351442337036133, + "ce_orig": 0.6271442174911499, + "epoch": 0.21137393054856568, + "kl_loss": 0.45761042833328247, + "loss_ib": 0.012927546165883541, + "step": 735 + }, + { + "ce_ib": 10.175559997558594, + "ce_orig": 0.9649935960769653, + "epoch": 0.21137393054856568, + "kl_loss": 0.5237610340118408, + "loss_ib": 0.01541317068040371, + "step": 735 + }, + { + "ce_ib": 11.151328086853027, + "ce_orig": 1.160567045211792, + "epoch": 0.21137393054856568, + "kl_loss": 0.44404470920562744, + "loss_ib": 0.015591775067150593, + "step": 735 + }, + { + "ce_ib": 8.23231029510498, + "ce_orig": 0.8223357200622559, + "epoch": 0.21137393054856568, + "kl_loss": 0.4122684597969055, + "loss_ib": 0.012354995124042034, + "step": 735 + }, + { + "ce_ib": 9.226214408874512, + "ce_orig": 1.0122361183166504, + "epoch": 0.2116615141275433, + "kl_loss": 0.3391454219818115, + "loss_ib": 0.012617669068276882, + "step": 736 + }, + { + "ce_ib": 12.772435188293457, + "ce_orig": 1.63818359375, + "epoch": 0.2116615141275433, + "kl_loss": 0.6720362901687622, + "loss_ib": 0.019492797553539276, + "step": 736 + }, + { + "ce_ib": 10.953327178955078, + "ce_orig": 1.301689624786377, + "epoch": 0.2116615141275433, + "kl_loss": 0.4017236828804016, + "loss_ib": 0.014970564283430576, + "step": 736 + }, + { + "ce_ib": 13.185845375061035, + "ce_orig": 1.3509107828140259, + "epoch": 0.2116615141275433, + "kl_loss": 0.3720896244049072, + "loss_ib": 0.0169067420065403, + "step": 736 + }, + { + "ce_ib": 5.992466449737549, + "ce_orig": 0.6994779706001282, + "epoch": 0.21194909770652096, + "kl_loss": 0.37846922874450684, + "loss_ib": 0.009777158498764038, + "step": 737 + }, + { + "ce_ib": 4.946014404296875, + "ce_orig": 0.448905885219574, + "epoch": 0.21194909770652096, + "kl_loss": 0.3910108208656311, + "loss_ib": 0.008856122381985188, + "step": 737 + }, + { + "ce_ib": 8.710723876953125, + "ce_orig": 0.955891489982605, + "epoch": 0.21194909770652096, + "kl_loss": 0.3540381193161011, + "loss_ib": 0.012251105159521103, + "step": 737 + }, + { + "ce_ib": 9.716163635253906, + "ce_orig": 0.777677595615387, + "epoch": 0.21194909770652096, + "kl_loss": 0.840921938419342, + "loss_ib": 0.0181253831833601, + "step": 737 + }, + { + "ce_ib": 9.742884635925293, + "ce_orig": 0.8019170761108398, + "epoch": 0.2122366812854986, + "kl_loss": 0.4170638918876648, + "loss_ib": 0.013913523405790329, + "step": 738 + }, + { + "ce_ib": 9.633951187133789, + "ce_orig": 0.9468701481819153, + "epoch": 0.2122366812854986, + "kl_loss": 0.44602394104003906, + "loss_ib": 0.014094190672039986, + "step": 738 + }, + { + "ce_ib": 9.999152183532715, + "ce_orig": 1.5994606018066406, + "epoch": 0.2122366812854986, + "kl_loss": 0.34741610288619995, + "loss_ib": 0.013473312370479107, + "step": 738 + }, + { + "ce_ib": 7.614631175994873, + "ce_orig": 0.44442856311798096, + "epoch": 0.2122366812854986, + "kl_loss": 0.42184555530548096, + "loss_ib": 0.011833085678517818, + "step": 738 + }, + { + "ce_ib": 3.805742025375366, + "ce_orig": 0.3643724024295807, + "epoch": 0.21252426486447623, + "kl_loss": 0.73167484998703, + "loss_ib": 0.011122490279376507, + "step": 739 + }, + { + "ce_ib": 7.001569747924805, + "ce_orig": 0.6076573133468628, + "epoch": 0.21252426486447623, + "kl_loss": 0.3848869204521179, + "loss_ib": 0.010850438848137856, + "step": 739 + }, + { + "ce_ib": 6.058032035827637, + "ce_orig": 0.9139571785926819, + "epoch": 0.21252426486447623, + "kl_loss": 0.43339502811431885, + "loss_ib": 0.010391981340944767, + "step": 739 + }, + { + "ce_ib": 9.221721649169922, + "ce_orig": 1.1071618795394897, + "epoch": 0.21252426486447623, + "kl_loss": 0.3810133934020996, + "loss_ib": 0.01303185522556305, + "step": 739 + }, + { + "epoch": 0.21281184844345388, + "grad_norm": 0.09019612520933151, + "learning_rate": 9.95756130814271e-06, + "loss": 0.8816, + "step": 740 + }, + { + "ce_ib": 9.799250602722168, + "ce_orig": 0.7617897987365723, + "epoch": 0.21281184844345388, + "kl_loss": 0.46645045280456543, + "loss_ib": 0.014463755302131176, + "step": 740 + }, + { + "ce_ib": 8.431238174438477, + "ce_orig": 0.7609443068504333, + "epoch": 0.21281184844345388, + "kl_loss": 0.353823721408844, + "loss_ib": 0.011969475075602531, + "step": 740 + }, + { + "ce_ib": 6.829717636108398, + "ce_orig": 0.6648247241973877, + "epoch": 0.21281184844345388, + "kl_loss": 0.3253084421157837, + "loss_ib": 0.01008280273526907, + "step": 740 + }, + { + "ce_ib": 9.891703605651855, + "ce_orig": 1.1517934799194336, + "epoch": 0.21281184844345388, + "kl_loss": 0.43470141291618347, + "loss_ib": 0.014238717034459114, + "step": 740 + }, + { + "ce_ib": 6.036440372467041, + "ce_orig": 0.5608431696891785, + "epoch": 0.2130994320224315, + "kl_loss": 0.5712899565696716, + "loss_ib": 0.01174934022128582, + "step": 741 + }, + { + "ce_ib": 7.961015224456787, + "ce_orig": 0.5696125626564026, + "epoch": 0.2130994320224315, + "kl_loss": 0.37901923060417175, + "loss_ib": 0.01175120659172535, + "step": 741 + }, + { + "ce_ib": 9.610107421875, + "ce_orig": 0.6799657344818115, + "epoch": 0.2130994320224315, + "kl_loss": 0.5099486112594604, + "loss_ib": 0.0147095937281847, + "step": 741 + }, + { + "ce_ib": 10.553266525268555, + "ce_orig": 1.0597807168960571, + "epoch": 0.2130994320224315, + "kl_loss": 0.9013509750366211, + "loss_ib": 0.019566776230931282, + "step": 741 + }, + { + "ce_ib": 11.050984382629395, + "ce_orig": 1.2893459796905518, + "epoch": 0.21338701560140916, + "kl_loss": 0.45824098587036133, + "loss_ib": 0.015633394941687584, + "step": 742 + }, + { + "ce_ib": 10.593522071838379, + "ce_orig": 1.4311033487319946, + "epoch": 0.21338701560140916, + "kl_loss": 0.33632180094718933, + "loss_ib": 0.013956740498542786, + "step": 742 + }, + { + "ce_ib": 8.308612823486328, + "ce_orig": 0.9085618257522583, + "epoch": 0.21338701560140916, + "kl_loss": 0.42611122131347656, + "loss_ib": 0.012569725513458252, + "step": 742 + }, + { + "ce_ib": 8.803531646728516, + "ce_orig": 0.5293317437171936, + "epoch": 0.21338701560140916, + "kl_loss": 0.42089781165122986, + "loss_ib": 0.013012508861720562, + "step": 742 + }, + { + "ce_ib": 10.681037902832031, + "ce_orig": 0.6758571863174438, + "epoch": 0.2136745991803868, + "kl_loss": 0.4455721974372864, + "loss_ib": 0.015136758796870708, + "step": 743 + }, + { + "ce_ib": 5.145867347717285, + "ce_orig": 0.4771097004413605, + "epoch": 0.2136745991803868, + "kl_loss": 0.3146681487560272, + "loss_ib": 0.0082925483584404, + "step": 743 + }, + { + "ce_ib": 8.998390197753906, + "ce_orig": 0.4767987132072449, + "epoch": 0.2136745991803868, + "kl_loss": 0.44158506393432617, + "loss_ib": 0.013414240442216396, + "step": 743 + }, + { + "ce_ib": 10.034675598144531, + "ce_orig": 0.8264948129653931, + "epoch": 0.2136745991803868, + "kl_loss": 0.36563044786453247, + "loss_ib": 0.013690979219973087, + "step": 743 + }, + { + "ce_ib": 8.774846076965332, + "ce_orig": 0.8711258172988892, + "epoch": 0.21396218275936443, + "kl_loss": 0.448600172996521, + "loss_ib": 0.013260847888886929, + "step": 744 + }, + { + "ce_ib": 6.589221954345703, + "ce_orig": 0.8571322560310364, + "epoch": 0.21396218275936443, + "kl_loss": 0.32901668548583984, + "loss_ib": 0.009879388846457005, + "step": 744 + }, + { + "ce_ib": 7.771622657775879, + "ce_orig": 0.9770241379737854, + "epoch": 0.21396218275936443, + "kl_loss": 0.5221589803695679, + "loss_ib": 0.01299321185797453, + "step": 744 + }, + { + "ce_ib": 12.875645637512207, + "ce_orig": 1.2932630777359009, + "epoch": 0.21396218275936443, + "kl_loss": 0.5584806799888611, + "loss_ib": 0.018460452556610107, + "step": 744 + }, + { + "epoch": 0.21424976633834208, + "grad_norm": 0.11534402519464493, + "learning_rate": 9.956546323376948e-06, + "loss": 0.8441, + "step": 745 + }, + { + "ce_ib": 12.278318405151367, + "ce_orig": 0.611358642578125, + "epoch": 0.21424976633834208, + "kl_loss": 0.5517995357513428, + "loss_ib": 0.017796313390135765, + "step": 745 + }, + { + "ce_ib": 9.37865161895752, + "ce_orig": 0.9221431612968445, + "epoch": 0.21424976633834208, + "kl_loss": 0.2928432822227478, + "loss_ib": 0.012307084165513515, + "step": 745 + }, + { + "ce_ib": 8.047618865966797, + "ce_orig": 0.9915336966514587, + "epoch": 0.21424976633834208, + "kl_loss": 0.45079469680786133, + "loss_ib": 0.012555565685033798, + "step": 745 + }, + { + "ce_ib": 9.573211669921875, + "ce_orig": 0.9948348999023438, + "epoch": 0.21424976633834208, + "kl_loss": 0.4275258481502533, + "loss_ib": 0.013848470523953438, + "step": 745 + }, + { + "ce_ib": 8.365690231323242, + "ce_orig": 0.8193045258522034, + "epoch": 0.2145373499173197, + "kl_loss": 0.44444945454597473, + "loss_ib": 0.012810184620320797, + "step": 746 + }, + { + "ce_ib": 6.967347621917725, + "ce_orig": 0.4872395098209381, + "epoch": 0.2145373499173197, + "kl_loss": 0.37023940682411194, + "loss_ib": 0.010669741779565811, + "step": 746 + }, + { + "ce_ib": 7.190746784210205, + "ce_orig": 0.6708618402481079, + "epoch": 0.2145373499173197, + "kl_loss": 0.3780478239059448, + "loss_ib": 0.010971223935484886, + "step": 746 + }, + { + "ce_ib": 11.051678657531738, + "ce_orig": 1.0682791471481323, + "epoch": 0.2145373499173197, + "kl_loss": 0.33363956212997437, + "loss_ib": 0.014388074167072773, + "step": 746 + }, + { + "ce_ib": 8.238478660583496, + "ce_orig": 0.7487781047821045, + "epoch": 0.21482493349629736, + "kl_loss": 0.37909066677093506, + "loss_ib": 0.012029385194182396, + "step": 747 + }, + { + "ce_ib": 12.03074836730957, + "ce_orig": 1.157182216644287, + "epoch": 0.21482493349629736, + "kl_loss": 0.37361055612564087, + "loss_ib": 0.015766853466629982, + "step": 747 + }, + { + "ce_ib": 8.529624938964844, + "ce_orig": 1.0179344415664673, + "epoch": 0.21482493349629736, + "kl_loss": 0.30930227041244507, + "loss_ib": 0.011622647754848003, + "step": 747 + }, + { + "ce_ib": 7.966403007507324, + "ce_orig": 0.6700258255004883, + "epoch": 0.21482493349629736, + "kl_loss": 0.37224870920181274, + "loss_ib": 0.011688889935612679, + "step": 747 + }, + { + "ce_ib": 9.755182266235352, + "ce_orig": 1.0040427446365356, + "epoch": 0.215112517075275, + "kl_loss": 0.3066710829734802, + "loss_ib": 0.012821893207728863, + "step": 748 + }, + { + "ce_ib": 10.194023132324219, + "ce_orig": 0.5760530233383179, + "epoch": 0.215112517075275, + "kl_loss": 0.5293586850166321, + "loss_ib": 0.015487611293792725, + "step": 748 + }, + { + "ce_ib": 11.507509231567383, + "ce_orig": 1.2553309202194214, + "epoch": 0.215112517075275, + "kl_loss": 0.3195509910583496, + "loss_ib": 0.01470301952213049, + "step": 748 + }, + { + "ce_ib": 10.408110618591309, + "ce_orig": 0.725406289100647, + "epoch": 0.215112517075275, + "kl_loss": 0.3801497220993042, + "loss_ib": 0.014209607616066933, + "step": 748 + }, + { + "ce_ib": 8.620140075683594, + "ce_orig": 0.7735955715179443, + "epoch": 0.21540010065425264, + "kl_loss": 0.5387185215950012, + "loss_ib": 0.014007325284183025, + "step": 749 + }, + { + "ce_ib": 5.869650363922119, + "ce_orig": 0.7176041603088379, + "epoch": 0.21540010065425264, + "kl_loss": 0.37166261672973633, + "loss_ib": 0.009586276486515999, + "step": 749 + }, + { + "ce_ib": 9.476244926452637, + "ce_orig": 1.0505683422088623, + "epoch": 0.21540010065425264, + "kl_loss": 0.3689851760864258, + "loss_ib": 0.013166096061468124, + "step": 749 + }, + { + "ce_ib": 10.051746368408203, + "ce_orig": 0.613065779209137, + "epoch": 0.21540010065425264, + "kl_loss": 0.3970192074775696, + "loss_ib": 0.014021937735378742, + "step": 749 + }, + { + "epoch": 0.2156876842332303, + "grad_norm": 0.09746310114860535, + "learning_rate": 9.955519397140656e-06, + "loss": 0.9247, + "step": 750 + }, + { + "ce_ib": 8.546123504638672, + "ce_orig": 0.8507575392723083, + "epoch": 0.2156876842332303, + "kl_loss": 0.3480690121650696, + "loss_ib": 0.012026812881231308, + "step": 750 + }, + { + "ce_ib": 12.74342155456543, + "ce_orig": 0.9122403860092163, + "epoch": 0.2156876842332303, + "kl_loss": 0.39195001125335693, + "loss_ib": 0.016662921756505966, + "step": 750 + }, + { + "ce_ib": 12.673941612243652, + "ce_orig": 1.130251407623291, + "epoch": 0.2156876842332303, + "kl_loss": 0.38955867290496826, + "loss_ib": 0.016569528728723526, + "step": 750 + }, + { + "ce_ib": 11.575626373291016, + "ce_orig": 0.889440655708313, + "epoch": 0.2156876842332303, + "kl_loss": 0.3685222864151001, + "loss_ib": 0.015260848216712475, + "step": 750 + }, + { + "ce_ib": 9.500494003295898, + "ce_orig": 0.5060651302337646, + "epoch": 0.2159752678122079, + "kl_loss": 0.2954930067062378, + "loss_ib": 0.01245542336255312, + "step": 751 + }, + { + "ce_ib": 11.144340515136719, + "ce_orig": 0.7600336670875549, + "epoch": 0.2159752678122079, + "kl_loss": 0.6074585914611816, + "loss_ib": 0.017218926921486855, + "step": 751 + }, + { + "ce_ib": 7.084355354309082, + "ce_orig": 0.8810204267501831, + "epoch": 0.2159752678122079, + "kl_loss": 0.4073539078235626, + "loss_ib": 0.01115789357572794, + "step": 751 + }, + { + "ce_ib": 8.677960395812988, + "ce_orig": 0.8457415103912354, + "epoch": 0.2159752678122079, + "kl_loss": 0.6076983213424683, + "loss_ib": 0.014754943549633026, + "step": 751 + }, + { + "ce_ib": 10.253267288208008, + "ce_orig": 1.123890995979309, + "epoch": 0.21626285139118556, + "kl_loss": 0.30214011669158936, + "loss_ib": 0.013274667784571648, + "step": 752 + }, + { + "ce_ib": 11.143954277038574, + "ce_orig": 0.9621107578277588, + "epoch": 0.21626285139118556, + "kl_loss": 0.40844857692718506, + "loss_ib": 0.015228440053761005, + "step": 752 + }, + { + "ce_ib": 7.633870601654053, + "ce_orig": 0.7040098309516907, + "epoch": 0.21626285139118556, + "kl_loss": 0.4934859871864319, + "loss_ib": 0.012568730860948563, + "step": 752 + }, + { + "ce_ib": 15.08940601348877, + "ce_orig": 1.8990564346313477, + "epoch": 0.21626285139118556, + "kl_loss": 0.3932008445262909, + "loss_ib": 0.0190214142203331, + "step": 752 + }, + { + "ce_ib": 6.168686389923096, + "ce_orig": 0.654904305934906, + "epoch": 0.2165504349701632, + "kl_loss": 0.34978413581848145, + "loss_ib": 0.00966652762144804, + "step": 753 + }, + { + "ce_ib": 14.297883987426758, + "ce_orig": 1.6642775535583496, + "epoch": 0.2165504349701632, + "kl_loss": 0.41902047395706177, + "loss_ib": 0.018488090485334396, + "step": 753 + }, + { + "ce_ib": 8.20667839050293, + "ce_orig": 1.1301182508468628, + "epoch": 0.2165504349701632, + "kl_loss": 0.36091458797454834, + "loss_ib": 0.011815824545919895, + "step": 753 + }, + { + "ce_ib": 10.069759368896484, + "ce_orig": 0.8426340818405151, + "epoch": 0.2165504349701632, + "kl_loss": 0.4236080050468445, + "loss_ib": 0.0143058393150568, + "step": 753 + }, + { + "ce_ib": 9.213713645935059, + "ce_orig": 1.3873950242996216, + "epoch": 0.21683801854914084, + "kl_loss": 0.41740885376930237, + "loss_ib": 0.013387802988290787, + "step": 754 + }, + { + "ce_ib": 8.983365058898926, + "ce_orig": 0.7299124598503113, + "epoch": 0.21683801854914084, + "kl_loss": 0.347156286239624, + "loss_ib": 0.012454927898943424, + "step": 754 + }, + { + "ce_ib": 8.433507919311523, + "ce_orig": 1.1746602058410645, + "epoch": 0.21683801854914084, + "kl_loss": 0.3870384693145752, + "loss_ib": 0.012303893454372883, + "step": 754 + }, + { + "ce_ib": 8.158003807067871, + "ce_orig": 0.9122118353843689, + "epoch": 0.21683801854914084, + "kl_loss": 0.32447659969329834, + "loss_ib": 0.011402769014239311, + "step": 754 + }, + { + "epoch": 0.2171256021281185, + "grad_norm": 0.10924255847930908, + "learning_rate": 9.954480531907935e-06, + "loss": 0.9322, + "step": 755 + }, + { + "ce_ib": 7.25479793548584, + "ce_orig": 0.7606655955314636, + "epoch": 0.2171256021281185, + "kl_loss": 0.38670188188552856, + "loss_ib": 0.011121816001832485, + "step": 755 + }, + { + "ce_ib": 6.150333881378174, + "ce_orig": 0.5132085680961609, + "epoch": 0.2171256021281185, + "kl_loss": 0.5129187107086182, + "loss_ib": 0.011279520578682423, + "step": 755 + }, + { + "ce_ib": 8.504961967468262, + "ce_orig": 0.43100351095199585, + "epoch": 0.2171256021281185, + "kl_loss": 0.36954519152641296, + "loss_ib": 0.012200413271784782, + "step": 755 + }, + { + "ce_ib": 7.746326923370361, + "ce_orig": 0.9925227165222168, + "epoch": 0.2171256021281185, + "kl_loss": 0.35602182149887085, + "loss_ib": 0.011306545697152615, + "step": 755 + }, + { + "ce_ib": 11.200096130371094, + "ce_orig": 0.693459689617157, + "epoch": 0.2174131857070961, + "kl_loss": 0.43727046251296997, + "loss_ib": 0.015572800301015377, + "step": 756 + }, + { + "ce_ib": 9.643890380859375, + "ce_orig": 0.9979680180549622, + "epoch": 0.2174131857070961, + "kl_loss": 0.4014120399951935, + "loss_ib": 0.0136580104008317, + "step": 756 + }, + { + "ce_ib": 9.413188934326172, + "ce_orig": 0.6425915360450745, + "epoch": 0.2174131857070961, + "kl_loss": 0.41752490401268005, + "loss_ib": 0.0135884378105402, + "step": 756 + }, + { + "ce_ib": 8.591798782348633, + "ce_orig": 0.6160233020782471, + "epoch": 0.2174131857070961, + "kl_loss": 0.39359089732170105, + "loss_ib": 0.0125277079641819, + "step": 756 + }, + { + "ce_ib": 9.97320556640625, + "ce_orig": 0.9202979803085327, + "epoch": 0.21770076928607376, + "kl_loss": 0.38434839248657227, + "loss_ib": 0.01381669007241726, + "step": 757 + }, + { + "ce_ib": 11.336276054382324, + "ce_orig": 1.1318460702896118, + "epoch": 0.21770076928607376, + "kl_loss": 0.426124632358551, + "loss_ib": 0.015597522258758545, + "step": 757 + }, + { + "ce_ib": 7.240839958190918, + "ce_orig": 0.6424944996833801, + "epoch": 0.21770076928607376, + "kl_loss": 0.47551506757736206, + "loss_ib": 0.011995989829301834, + "step": 757 + }, + { + "ce_ib": 8.176185607910156, + "ce_orig": 0.8402231931686401, + "epoch": 0.21770076928607376, + "kl_loss": 0.3214970827102661, + "loss_ib": 0.011391155421733856, + "step": 757 + }, + { + "ce_ib": 7.582910060882568, + "ce_orig": 0.6925665736198425, + "epoch": 0.21798835286505142, + "kl_loss": 0.2832297086715698, + "loss_ib": 0.010415206663310528, + "step": 758 + }, + { + "ce_ib": 6.832791805267334, + "ce_orig": 0.514137327671051, + "epoch": 0.21798835286505142, + "kl_loss": 0.3456891179084778, + "loss_ib": 0.010289683006703854, + "step": 758 + }, + { + "ce_ib": 10.459983825683594, + "ce_orig": 1.1698083877563477, + "epoch": 0.21798835286505142, + "kl_loss": 0.33782780170440674, + "loss_ib": 0.013838262297213078, + "step": 758 + }, + { + "ce_ib": 8.3121919631958, + "ce_orig": 0.6954447627067566, + "epoch": 0.21798835286505142, + "kl_loss": 0.38117825984954834, + "loss_ib": 0.012123974040150642, + "step": 758 + }, + { + "ce_ib": 6.0204010009765625, + "ce_orig": 0.7387468814849854, + "epoch": 0.21827593644402904, + "kl_loss": 0.42067545652389526, + "loss_ib": 0.01022715587168932, + "step": 759 + }, + { + "ce_ib": 8.076128005981445, + "ce_orig": 0.6554126143455505, + "epoch": 0.21827593644402904, + "kl_loss": 0.3814757466316223, + "loss_ib": 0.011890885420143604, + "step": 759 + }, + { + "ce_ib": 7.948196887969971, + "ce_orig": 0.7137631177902222, + "epoch": 0.21827593644402904, + "kl_loss": 0.4568563997745514, + "loss_ib": 0.012516760267317295, + "step": 759 + }, + { + "ce_ib": 7.82420015335083, + "ce_orig": 0.8517636060714722, + "epoch": 0.21827593644402904, + "kl_loss": 0.38165003061294556, + "loss_ib": 0.011640701442956924, + "step": 759 + }, + { + "epoch": 0.2185635200230067, + "grad_norm": 0.10263609886169434, + "learning_rate": 9.953429730181653e-06, + "loss": 0.897, + "step": 760 + }, + { + "ce_ib": 9.436492919921875, + "ce_orig": 0.9970396757125854, + "epoch": 0.2185635200230067, + "kl_loss": 0.350554496049881, + "loss_ib": 0.012942037545144558, + "step": 760 + }, + { + "ce_ib": 8.581624984741211, + "ce_orig": 0.445902556180954, + "epoch": 0.2185635200230067, + "kl_loss": 0.44985431432724, + "loss_ib": 0.013080167584121227, + "step": 760 + }, + { + "ce_ib": 5.427255153656006, + "ce_orig": 0.6384531855583191, + "epoch": 0.2185635200230067, + "kl_loss": 0.3149919807910919, + "loss_ib": 0.008577174507081509, + "step": 760 + }, + { + "ce_ib": 12.8211030960083, + "ce_orig": 1.4226137399673462, + "epoch": 0.2185635200230067, + "kl_loss": 0.297141969203949, + "loss_ib": 0.015792522579431534, + "step": 760 + }, + { + "ce_ib": 12.39339542388916, + "ce_orig": 1.4045170545578003, + "epoch": 0.21885110360198431, + "kl_loss": 0.3612511157989502, + "loss_ib": 0.016005907207727432, + "step": 761 + }, + { + "ce_ib": 7.98706579208374, + "ce_orig": 0.4089260399341583, + "epoch": 0.21885110360198431, + "kl_loss": 0.5325812101364136, + "loss_ib": 0.013312878087162971, + "step": 761 + }, + { + "ce_ib": 9.43854808807373, + "ce_orig": 0.47952842712402344, + "epoch": 0.21885110360198431, + "kl_loss": 0.39229825139045715, + "loss_ib": 0.013361530378460884, + "step": 761 + }, + { + "ce_ib": 7.874027729034424, + "ce_orig": 0.8605637550354004, + "epoch": 0.21885110360198431, + "kl_loss": 0.3315168619155884, + "loss_ib": 0.01118919625878334, + "step": 761 + }, + { + "ce_ib": 14.053482055664062, + "ce_orig": 1.8178443908691406, + "epoch": 0.21913868718096197, + "kl_loss": 0.4107596278190613, + "loss_ib": 0.018161077052354813, + "step": 762 + }, + { + "ce_ib": 12.965909957885742, + "ce_orig": 1.3146981000900269, + "epoch": 0.21913868718096197, + "kl_loss": 0.35652273893356323, + "loss_ib": 0.016531137749552727, + "step": 762 + }, + { + "ce_ib": 8.16763973236084, + "ce_orig": 1.06657874584198, + "epoch": 0.21913868718096197, + "kl_loss": 0.3259735703468323, + "loss_ib": 0.011427376419305801, + "step": 762 + }, + { + "ce_ib": 9.887922286987305, + "ce_orig": 0.7984805107116699, + "epoch": 0.21913868718096197, + "kl_loss": 0.4151677191257477, + "loss_ib": 0.01403959933668375, + "step": 762 + }, + { + "ce_ib": 12.173575401306152, + "ce_orig": 0.7809048891067505, + "epoch": 0.21942627075993962, + "kl_loss": 0.8220731019973755, + "loss_ib": 0.020394306629896164, + "step": 763 + }, + { + "ce_ib": 11.080092430114746, + "ce_orig": 1.3501423597335815, + "epoch": 0.21942627075993962, + "kl_loss": 0.3217220902442932, + "loss_ib": 0.014297313056886196, + "step": 763 + }, + { + "ce_ib": 5.698494911193848, + "ce_orig": 0.587028443813324, + "epoch": 0.21942627075993962, + "kl_loss": 0.4448961913585663, + "loss_ib": 0.010147457011044025, + "step": 763 + }, + { + "ce_ib": 7.876105785369873, + "ce_orig": 0.8657087087631226, + "epoch": 0.21942627075993962, + "kl_loss": 0.35243791341781616, + "loss_ib": 0.01140048447996378, + "step": 763 + }, + { + "ce_ib": 7.448467254638672, + "ce_orig": 0.6551787257194519, + "epoch": 0.21971385433891724, + "kl_loss": 0.31218671798706055, + "loss_ib": 0.010570335201919079, + "step": 764 + }, + { + "ce_ib": 6.403468608856201, + "ce_orig": 0.5633848905563354, + "epoch": 0.21971385433891724, + "kl_loss": 0.6558365821838379, + "loss_ib": 0.01296183466911316, + "step": 764 + }, + { + "ce_ib": 5.391688346862793, + "ce_orig": 0.3037964403629303, + "epoch": 0.21971385433891724, + "kl_loss": 0.3110879361629486, + "loss_ib": 0.008502568118274212, + "step": 764 + }, + { + "ce_ib": 11.914216995239258, + "ce_orig": 0.712795078754425, + "epoch": 0.21971385433891724, + "kl_loss": 0.8264556527137756, + "loss_ib": 0.020178772509098053, + "step": 764 + }, + { + "epoch": 0.2200014379178949, + "grad_norm": 0.08830351382493973, + "learning_rate": 9.952366994493438e-06, + "loss": 0.8629, + "step": 765 + }, + { + "ce_ib": 10.298672676086426, + "ce_orig": 1.1052519083023071, + "epoch": 0.2200014379178949, + "kl_loss": 0.29135391116142273, + "loss_ib": 0.013212212361395359, + "step": 765 + }, + { + "ce_ib": 5.10940408706665, + "ce_orig": 0.6949278116226196, + "epoch": 0.2200014379178949, + "kl_loss": 0.2762865126132965, + "loss_ib": 0.007872268557548523, + "step": 765 + }, + { + "ce_ib": 10.096806526184082, + "ce_orig": 1.0484293699264526, + "epoch": 0.2200014379178949, + "kl_loss": 0.4013941287994385, + "loss_ib": 0.0141107477247715, + "step": 765 + }, + { + "ce_ib": 6.776886463165283, + "ce_orig": 0.8039520382881165, + "epoch": 0.2200014379178949, + "kl_loss": 0.2949376702308655, + "loss_ib": 0.009726262651383877, + "step": 765 + }, + { + "ce_ib": 5.609190940856934, + "ce_orig": 0.5181265473365784, + "epoch": 0.22028902149687252, + "kl_loss": 0.31120482087135315, + "loss_ib": 0.008721238933503628, + "step": 766 + }, + { + "ce_ib": 7.555119037628174, + "ce_orig": 0.7217943668365479, + "epoch": 0.22028902149687252, + "kl_loss": 0.34005963802337646, + "loss_ib": 0.010955714620649815, + "step": 766 + }, + { + "ce_ib": 9.171826362609863, + "ce_orig": 1.1021441221237183, + "epoch": 0.22028902149687252, + "kl_loss": 0.39605045318603516, + "loss_ib": 0.01313233096152544, + "step": 766 + }, + { + "ce_ib": 9.409163475036621, + "ce_orig": 0.983971893787384, + "epoch": 0.22028902149687252, + "kl_loss": 0.3282513916492462, + "loss_ib": 0.012691677547991276, + "step": 766 + }, + { + "ce_ib": 10.134336471557617, + "ce_orig": 0.8422884345054626, + "epoch": 0.22057660507585017, + "kl_loss": 0.6862824559211731, + "loss_ib": 0.016997160390019417, + "step": 767 + }, + { + "ce_ib": 8.78597640991211, + "ce_orig": 0.6801238656044006, + "epoch": 0.22057660507585017, + "kl_loss": 0.383102685213089, + "loss_ib": 0.012617003172636032, + "step": 767 + }, + { + "ce_ib": 10.374561309814453, + "ce_orig": 1.0352544784545898, + "epoch": 0.22057660507585017, + "kl_loss": 0.32708740234375, + "loss_ib": 0.013645435683429241, + "step": 767 + }, + { + "ce_ib": 4.927638530731201, + "ce_orig": 0.5614314079284668, + "epoch": 0.22057660507585017, + "kl_loss": 0.37318044900894165, + "loss_ib": 0.008659442886710167, + "step": 767 + }, + { + "ce_ib": 11.366865158081055, + "ce_orig": 0.8449366688728333, + "epoch": 0.22086418865482782, + "kl_loss": 0.36128175258636475, + "loss_ib": 0.014979682862758636, + "step": 768 + }, + { + "ce_ib": 9.95175838470459, + "ce_orig": 1.076265811920166, + "epoch": 0.22086418865482782, + "kl_loss": 0.3022347688674927, + "loss_ib": 0.012974105775356293, + "step": 768 + }, + { + "ce_ib": 10.05750846862793, + "ce_orig": 0.8622808456420898, + "epoch": 0.22086418865482782, + "kl_loss": 0.3522017002105713, + "loss_ib": 0.013579525984823704, + "step": 768 + }, + { + "ce_ib": 8.465921401977539, + "ce_orig": 0.7081587910652161, + "epoch": 0.22086418865482782, + "kl_loss": 0.4725481867790222, + "loss_ib": 0.01319140288978815, + "step": 768 + }, + { + "ce_ib": 12.121952056884766, + "ce_orig": 0.9619759917259216, + "epoch": 0.22115177223380544, + "kl_loss": 0.5650833249092102, + "loss_ib": 0.01777278631925583, + "step": 769 + }, + { + "ce_ib": 7.01571798324585, + "ce_orig": 0.5827529430389404, + "epoch": 0.22115177223380544, + "kl_loss": 0.36164534091949463, + "loss_ib": 0.010632171295583248, + "step": 769 + }, + { + "ce_ib": 9.584782600402832, + "ce_orig": 0.9394201636314392, + "epoch": 0.22115177223380544, + "kl_loss": 0.3103780746459961, + "loss_ib": 0.012688562273979187, + "step": 769 + }, + { + "ce_ib": 13.475021362304688, + "ce_orig": 1.4583699703216553, + "epoch": 0.22115177223380544, + "kl_loss": 0.4726155996322632, + "loss_ib": 0.018201176077127457, + "step": 769 + }, + { + "epoch": 0.2214393558127831, + "grad_norm": 0.09313464909791946, + "learning_rate": 9.951292327403663e-06, + "loss": 0.9476, + "step": 770 + }, + { + "ce_ib": 10.434471130371094, + "ce_orig": 1.0447543859481812, + "epoch": 0.2214393558127831, + "kl_loss": 0.3816416561603546, + "loss_ib": 0.01425088755786419, + "step": 770 + }, + { + "ce_ib": 6.2150187492370605, + "ce_orig": 0.7222704291343689, + "epoch": 0.2214393558127831, + "kl_loss": 0.3173387050628662, + "loss_ib": 0.009388405829668045, + "step": 770 + }, + { + "ce_ib": 15.137702941894531, + "ce_orig": 2.1303963661193848, + "epoch": 0.2214393558127831, + "kl_loss": 0.3662612736225128, + "loss_ib": 0.018800314515829086, + "step": 770 + }, + { + "ce_ib": 9.431180953979492, + "ce_orig": 0.7595182061195374, + "epoch": 0.2214393558127831, + "kl_loss": 0.35950183868408203, + "loss_ib": 0.01302619930356741, + "step": 770 + }, + { + "ce_ib": 7.864904880523682, + "ce_orig": 0.48237812519073486, + "epoch": 0.22172693939176072, + "kl_loss": 0.35039085149765015, + "loss_ib": 0.011368812993168831, + "step": 771 + }, + { + "ce_ib": 10.381065368652344, + "ce_orig": 0.9781961441040039, + "epoch": 0.22172693939176072, + "kl_loss": 0.45010584592819214, + "loss_ib": 0.014882123097777367, + "step": 771 + }, + { + "ce_ib": 8.742616653442383, + "ce_orig": 0.7862986326217651, + "epoch": 0.22172693939176072, + "kl_loss": 0.439785897731781, + "loss_ib": 0.013140475377440453, + "step": 771 + }, + { + "ce_ib": 5.651560306549072, + "ce_orig": 0.42288464307785034, + "epoch": 0.22172693939176072, + "kl_loss": 0.660328209400177, + "loss_ib": 0.012254842557013035, + "step": 771 + }, + { + "ce_ib": 6.206586837768555, + "ce_orig": 0.48183295130729675, + "epoch": 0.22201452297073837, + "kl_loss": 0.30323436856269836, + "loss_ib": 0.00923893041908741, + "step": 772 + }, + { + "ce_ib": 9.07832145690918, + "ce_orig": 0.8749274611473083, + "epoch": 0.22201452297073837, + "kl_loss": 0.41655945777893066, + "loss_ib": 0.013243915513157845, + "step": 772 + }, + { + "ce_ib": 6.524220943450928, + "ce_orig": 0.6328911185264587, + "epoch": 0.22201452297073837, + "kl_loss": 0.4318011701107025, + "loss_ib": 0.010842232033610344, + "step": 772 + }, + { + "ce_ib": 5.257693290710449, + "ce_orig": 0.3358021080493927, + "epoch": 0.22201452297073837, + "kl_loss": 0.25356054306030273, + "loss_ib": 0.007793298922479153, + "step": 772 + }, + { + "ce_ib": 8.05919075012207, + "ce_orig": 0.5993462800979614, + "epoch": 0.22230210654971602, + "kl_loss": 0.37483856081962585, + "loss_ib": 0.011807575821876526, + "step": 773 + }, + { + "ce_ib": 7.331540584564209, + "ce_orig": 0.6803370118141174, + "epoch": 0.22230210654971602, + "kl_loss": 0.38549911975860596, + "loss_ib": 0.011186531744897366, + "step": 773 + }, + { + "ce_ib": 6.345643997192383, + "ce_orig": 0.6718232035636902, + "epoch": 0.22230210654971602, + "kl_loss": 0.38235121965408325, + "loss_ib": 0.010169154964387417, + "step": 773 + }, + { + "ce_ib": 12.004826545715332, + "ce_orig": 1.3992005586624146, + "epoch": 0.22230210654971602, + "kl_loss": 0.4480227530002594, + "loss_ib": 0.016485054045915604, + "step": 773 + }, + { + "ce_ib": 5.82366418838501, + "ce_orig": 0.5827267169952393, + "epoch": 0.22258969012869365, + "kl_loss": 0.38700664043426514, + "loss_ib": 0.009693730622529984, + "step": 774 + }, + { + "ce_ib": 12.45301342010498, + "ce_orig": 1.3923219442367554, + "epoch": 0.22258969012869365, + "kl_loss": 0.32873886823654175, + "loss_ib": 0.015740402042865753, + "step": 774 + }, + { + "ce_ib": 11.65266227722168, + "ce_orig": 1.4098445177078247, + "epoch": 0.22258969012869365, + "kl_loss": 0.34009307622909546, + "loss_ib": 0.015053593553602695, + "step": 774 + }, + { + "ce_ib": 7.026700019836426, + "ce_orig": 0.7507833242416382, + "epoch": 0.22258969012869365, + "kl_loss": 0.30786123871803284, + "loss_ib": 0.010105312801897526, + "step": 774 + }, + { + "epoch": 0.2228772737076713, + "grad_norm": 0.09665674716234207, + "learning_rate": 9.95020573150145e-06, + "loss": 0.8829, + "step": 775 + }, + { + "ce_ib": 9.703756332397461, + "ce_orig": 0.9238471388816833, + "epoch": 0.2228772737076713, + "kl_loss": 0.328873872756958, + "loss_ib": 0.012992494739592075, + "step": 775 + }, + { + "ce_ib": 13.514963150024414, + "ce_orig": 1.3463358879089355, + "epoch": 0.2228772737076713, + "kl_loss": 0.3467975854873657, + "loss_ib": 0.01698293909430504, + "step": 775 + }, + { + "ce_ib": 8.213980674743652, + "ce_orig": 0.8611528873443604, + "epoch": 0.2228772737076713, + "kl_loss": 0.7585752010345459, + "loss_ib": 0.015799731016159058, + "step": 775 + }, + { + "ce_ib": 8.563956260681152, + "ce_orig": 0.8483219146728516, + "epoch": 0.2228772737076713, + "kl_loss": 0.34036189317703247, + "loss_ib": 0.011967575177550316, + "step": 775 + }, + { + "ce_ib": 9.094470024108887, + "ce_orig": 1.0628784894943237, + "epoch": 0.22316485728664892, + "kl_loss": 0.46681421995162964, + "loss_ib": 0.013762611895799637, + "step": 776 + }, + { + "ce_ib": 7.872087478637695, + "ce_orig": 0.8701239228248596, + "epoch": 0.22316485728664892, + "kl_loss": 0.3346797227859497, + "loss_ib": 0.011218884028494358, + "step": 776 + }, + { + "ce_ib": 10.01878833770752, + "ce_orig": 0.8464388847351074, + "epoch": 0.22316485728664892, + "kl_loss": 0.47228649258613586, + "loss_ib": 0.014741652645170689, + "step": 776 + }, + { + "ce_ib": 7.2308549880981445, + "ce_orig": 0.8908596038818359, + "epoch": 0.22316485728664892, + "kl_loss": 0.3172210454940796, + "loss_ib": 0.010403065010905266, + "step": 776 + }, + { + "ce_ib": 8.690382957458496, + "ce_orig": 0.7194284200668335, + "epoch": 0.22345244086562657, + "kl_loss": 0.3316614031791687, + "loss_ib": 0.01200699619948864, + "step": 777 + }, + { + "ce_ib": 7.480838775634766, + "ce_orig": 0.7475316524505615, + "epoch": 0.22345244086562657, + "kl_loss": 0.3133096694946289, + "loss_ib": 0.010613935068249702, + "step": 777 + }, + { + "ce_ib": 16.189916610717773, + "ce_orig": 2.0802390575408936, + "epoch": 0.22345244086562657, + "kl_loss": 0.484038770198822, + "loss_ib": 0.021030303090810776, + "step": 777 + }, + { + "ce_ib": 7.3860626220703125, + "ce_orig": 0.5432776212692261, + "epoch": 0.22345244086562657, + "kl_loss": 0.39021003246307373, + "loss_ib": 0.011288163252174854, + "step": 777 + }, + { + "ce_ib": 5.361101150512695, + "ce_orig": 0.6422094106674194, + "epoch": 0.22374002444460422, + "kl_loss": 0.3227311372756958, + "loss_ib": 0.008588411845266819, + "step": 778 + }, + { + "ce_ib": 11.809074401855469, + "ce_orig": 1.1206345558166504, + "epoch": 0.22374002444460422, + "kl_loss": 0.34884482622146606, + "loss_ib": 0.015297521837055683, + "step": 778 + }, + { + "ce_ib": 8.916701316833496, + "ce_orig": 0.8444573283195496, + "epoch": 0.22374002444460422, + "kl_loss": 0.3931344151496887, + "loss_ib": 0.01284804567694664, + "step": 778 + }, + { + "ce_ib": 7.642374038696289, + "ce_orig": 1.1561375856399536, + "epoch": 0.22374002444460422, + "kl_loss": 0.35411643981933594, + "loss_ib": 0.011183538474142551, + "step": 778 + }, + { + "ce_ib": 6.7063140869140625, + "ce_orig": 0.6542351245880127, + "epoch": 0.22402760802358185, + "kl_loss": 0.34912005066871643, + "loss_ib": 0.010197513736784458, + "step": 779 + }, + { + "ce_ib": 7.014062404632568, + "ce_orig": 0.5142630934715271, + "epoch": 0.22402760802358185, + "kl_loss": 0.34287211298942566, + "loss_ib": 0.010442784056067467, + "step": 779 + }, + { + "ce_ib": 9.529996871948242, + "ce_orig": 0.7422083616256714, + "epoch": 0.22402760802358185, + "kl_loss": 0.3641512989997864, + "loss_ib": 0.013171510770916939, + "step": 779 + }, + { + "ce_ib": 6.52680778503418, + "ce_orig": 0.8021174669265747, + "epoch": 0.22402760802358185, + "kl_loss": 0.32752007246017456, + "loss_ib": 0.009802008979022503, + "step": 779 + }, + { + "epoch": 0.2243151916025595, + "grad_norm": 0.10656443983316422, + "learning_rate": 9.949107209404664e-06, + "loss": 0.8954, + "step": 780 + }, + { + "ce_ib": 9.61880874633789, + "ce_orig": 0.8786146640777588, + "epoch": 0.2243151916025595, + "kl_loss": 0.3575580418109894, + "loss_ib": 0.013194388709962368, + "step": 780 + }, + { + "ce_ib": 10.178520202636719, + "ce_orig": 0.807783305644989, + "epoch": 0.2243151916025595, + "kl_loss": 0.342892587184906, + "loss_ib": 0.013607447035610676, + "step": 780 + }, + { + "ce_ib": 9.118548393249512, + "ce_orig": 1.1404297351837158, + "epoch": 0.2243151916025595, + "kl_loss": 0.30617213249206543, + "loss_ib": 0.012180269695818424, + "step": 780 + }, + { + "ce_ib": 10.034414291381836, + "ce_orig": 0.7776552438735962, + "epoch": 0.2243151916025595, + "kl_loss": 0.4022853970527649, + "loss_ib": 0.014057268388569355, + "step": 780 + }, + { + "ce_ib": 9.740767478942871, + "ce_orig": 1.099388599395752, + "epoch": 0.22460277518153712, + "kl_loss": 0.6486046314239502, + "loss_ib": 0.016226813197135925, + "step": 781 + }, + { + "ce_ib": 6.6487016677856445, + "ce_orig": 0.6519371271133423, + "epoch": 0.22460277518153712, + "kl_loss": 0.3416748344898224, + "loss_ib": 0.010065450333058834, + "step": 781 + }, + { + "ce_ib": 4.7245869636535645, + "ce_orig": 0.6195375323295593, + "epoch": 0.22460277518153712, + "kl_loss": 0.2902180552482605, + "loss_ib": 0.007626766804605722, + "step": 781 + }, + { + "ce_ib": 12.154743194580078, + "ce_orig": 1.4246693849563599, + "epoch": 0.22460277518153712, + "kl_loss": 0.25261539220809937, + "loss_ib": 0.014680897817015648, + "step": 781 + }, + { + "ce_ib": 5.439098834991455, + "ce_orig": 0.8269286155700684, + "epoch": 0.22489035876051477, + "kl_loss": 0.34107422828674316, + "loss_ib": 0.00884984154254198, + "step": 782 + }, + { + "ce_ib": 9.174774169921875, + "ce_orig": 1.1341043710708618, + "epoch": 0.22489035876051477, + "kl_loss": 0.3221195936203003, + "loss_ib": 0.012395970523357391, + "step": 782 + }, + { + "ce_ib": 9.228188514709473, + "ce_orig": 1.234445333480835, + "epoch": 0.22489035876051477, + "kl_loss": 0.30531132221221924, + "loss_ib": 0.012281300500035286, + "step": 782 + }, + { + "ce_ib": 5.356451034545898, + "ce_orig": 0.8137478828430176, + "epoch": 0.22489035876051477, + "kl_loss": 0.37726348638534546, + "loss_ib": 0.009129085578024387, + "step": 782 + }, + { + "ce_ib": 9.502201080322266, + "ce_orig": 0.8526181578636169, + "epoch": 0.22517794233949243, + "kl_loss": 0.6065495014190674, + "loss_ib": 0.01556769572198391, + "step": 783 + }, + { + "ce_ib": 7.823635578155518, + "ce_orig": 0.6923622488975525, + "epoch": 0.22517794233949243, + "kl_loss": 0.394509494304657, + "loss_ib": 0.011768730357289314, + "step": 783 + }, + { + "ce_ib": 6.387758255004883, + "ce_orig": 0.6680426001548767, + "epoch": 0.22517794233949243, + "kl_loss": 0.30215880274772644, + "loss_ib": 0.009409346617758274, + "step": 783 + }, + { + "ce_ib": 7.9621663093566895, + "ce_orig": 0.2912781834602356, + "epoch": 0.22517794233949243, + "kl_loss": 0.6065285205841064, + "loss_ib": 0.01402745209634304, + "step": 783 + }, + { + "ce_ib": 8.740853309631348, + "ce_orig": 1.1666960716247559, + "epoch": 0.22546552591847005, + "kl_loss": 0.26575469970703125, + "loss_ib": 0.011398401111364365, + "step": 784 + }, + { + "ce_ib": 11.868843078613281, + "ce_orig": 1.1964963674545288, + "epoch": 0.22546552591847005, + "kl_loss": 0.34643134474754333, + "loss_ib": 0.01533315610140562, + "step": 784 + }, + { + "ce_ib": 11.849778175354004, + "ce_orig": 0.6975755095481873, + "epoch": 0.22546552591847005, + "kl_loss": 0.48860257863998413, + "loss_ib": 0.016735803335905075, + "step": 784 + }, + { + "ce_ib": 7.8105998039245605, + "ce_orig": 1.0470623970031738, + "epoch": 0.22546552591847005, + "kl_loss": 0.31021207571029663, + "loss_ib": 0.010912721045315266, + "step": 784 + }, + { + "epoch": 0.2257531094974477, + "grad_norm": 0.11681295186281204, + "learning_rate": 9.9479967637599e-06, + "loss": 0.9212, + "step": 785 + }, + { + "ce_ib": 11.292010307312012, + "ce_orig": 1.259683609008789, + "epoch": 0.2257531094974477, + "kl_loss": 0.2796482443809509, + "loss_ib": 0.014088491909205914, + "step": 785 + }, + { + "ce_ib": 10.634122848510742, + "ce_orig": 0.9388463497161865, + "epoch": 0.2257531094974477, + "kl_loss": 0.33059000968933105, + "loss_ib": 0.013940023258328438, + "step": 785 + }, + { + "ce_ib": 12.245208740234375, + "ce_orig": 1.6006643772125244, + "epoch": 0.2257531094974477, + "kl_loss": 0.43430644273757935, + "loss_ib": 0.016588272526860237, + "step": 785 + }, + { + "ce_ib": 7.943255424499512, + "ce_orig": 0.701815128326416, + "epoch": 0.2257531094974477, + "kl_loss": 0.42579883337020874, + "loss_ib": 0.012201243080198765, + "step": 785 + }, + { + "ce_ib": 5.053991794586182, + "ce_orig": 0.5053215622901917, + "epoch": 0.22604069307642533, + "kl_loss": 0.31988024711608887, + "loss_ib": 0.008252793923020363, + "step": 786 + }, + { + "ce_ib": 10.378332138061523, + "ce_orig": 1.0649505853652954, + "epoch": 0.22604069307642533, + "kl_loss": 0.3701839745044708, + "loss_ib": 0.014080171473324299, + "step": 786 + }, + { + "ce_ib": 6.406869888305664, + "ce_orig": 0.7461100220680237, + "epoch": 0.22604069307642533, + "kl_loss": 0.3331264853477478, + "loss_ib": 0.0097381342202425, + "step": 786 + }, + { + "ce_ib": 9.13780689239502, + "ce_orig": 0.6390551924705505, + "epoch": 0.22604069307642533, + "kl_loss": 0.5007272958755493, + "loss_ib": 0.014145080000162125, + "step": 786 + }, + { + "ce_ib": 8.147912979125977, + "ce_orig": 0.5080342292785645, + "epoch": 0.22632827665540298, + "kl_loss": 0.4450484812259674, + "loss_ib": 0.012598397210240364, + "step": 787 + }, + { + "ce_ib": 7.028100490570068, + "ce_orig": 0.593861997127533, + "epoch": 0.22632827665540298, + "kl_loss": 0.4049058258533478, + "loss_ib": 0.011077158153057098, + "step": 787 + }, + { + "ce_ib": 8.043399810791016, + "ce_orig": 0.4268825054168701, + "epoch": 0.22632827665540298, + "kl_loss": 0.35771211981773376, + "loss_ib": 0.011620521545410156, + "step": 787 + }, + { + "ce_ib": 6.9142866134643555, + "ce_orig": 0.4698316156864166, + "epoch": 0.22632827665540298, + "kl_loss": 0.8020865321159363, + "loss_ib": 0.014935152605175972, + "step": 787 + }, + { + "ce_ib": 6.95719575881958, + "ce_orig": 0.8023804426193237, + "epoch": 0.22661586023438063, + "kl_loss": 0.3693576455116272, + "loss_ib": 0.010650772601366043, + "step": 788 + }, + { + "ce_ib": 12.449686050415039, + "ce_orig": 1.3771573305130005, + "epoch": 0.22661586023438063, + "kl_loss": 0.4264632761478424, + "loss_ib": 0.016714317724108696, + "step": 788 + }, + { + "ce_ib": 11.89621353149414, + "ce_orig": 1.398105502128601, + "epoch": 0.22661586023438063, + "kl_loss": 0.30330199003219604, + "loss_ib": 0.014929232187569141, + "step": 788 + }, + { + "ce_ib": 5.163127422332764, + "ce_orig": 0.5273948907852173, + "epoch": 0.22661586023438063, + "kl_loss": 0.360205739736557, + "loss_ib": 0.008765184320509434, + "step": 788 + }, + { + "ce_ib": 10.611333847045898, + "ce_orig": 0.8973667025566101, + "epoch": 0.22690344381335825, + "kl_loss": 0.42807987332344055, + "loss_ib": 0.01489213202148676, + "step": 789 + }, + { + "ce_ib": 4.420098304748535, + "ce_orig": 0.5175418853759766, + "epoch": 0.22690344381335825, + "kl_loss": 0.30578553676605225, + "loss_ib": 0.007477953098714352, + "step": 789 + }, + { + "ce_ib": 7.4076008796691895, + "ce_orig": 0.5371741056442261, + "epoch": 0.22690344381335825, + "kl_loss": 0.3347129821777344, + "loss_ib": 0.010754730552434921, + "step": 789 + }, + { + "ce_ib": 9.960221290588379, + "ce_orig": 0.9401814937591553, + "epoch": 0.22690344381335825, + "kl_loss": 0.3342566192150116, + "loss_ib": 0.013302787207067013, + "step": 789 + }, + { + "epoch": 0.2271910273923359, + "grad_norm": 0.0974324494600296, + "learning_rate": 9.946874397242474e-06, + "loss": 0.9038, + "step": 790 + }, + { + "ce_ib": 10.756206512451172, + "ce_orig": 0.7469778060913086, + "epoch": 0.2271910273923359, + "kl_loss": 0.3871549963951111, + "loss_ib": 0.014627756550908089, + "step": 790 + }, + { + "ce_ib": 11.253997802734375, + "ce_orig": 1.2753102779388428, + "epoch": 0.2271910273923359, + "kl_loss": 0.37940990924835205, + "loss_ib": 0.015048096887767315, + "step": 790 + }, + { + "ce_ib": 7.96759557723999, + "ce_orig": 0.5226830840110779, + "epoch": 0.2271910273923359, + "kl_loss": 0.4906744360923767, + "loss_ib": 0.012874339707195759, + "step": 790 + }, + { + "ce_ib": 6.907197952270508, + "ce_orig": 0.8273372054100037, + "epoch": 0.2271910273923359, + "kl_loss": 0.29984915256500244, + "loss_ib": 0.009905689395964146, + "step": 790 + }, + { + "ce_ib": 11.577720642089844, + "ce_orig": 0.4754190146923065, + "epoch": 0.22747861097131353, + "kl_loss": 0.5800215601921082, + "loss_ib": 0.017377937212586403, + "step": 791 + }, + { + "ce_ib": 7.266067028045654, + "ce_orig": 0.7913497090339661, + "epoch": 0.22747861097131353, + "kl_loss": 0.32481786608695984, + "loss_ib": 0.010514246299862862, + "step": 791 + }, + { + "ce_ib": 10.663007736206055, + "ce_orig": 0.8187626004219055, + "epoch": 0.22747861097131353, + "kl_loss": 0.36268165707588196, + "loss_ib": 0.014289823360741138, + "step": 791 + }, + { + "ce_ib": 10.45195484161377, + "ce_orig": 1.220002293586731, + "epoch": 0.22747861097131353, + "kl_loss": 0.37439438700675964, + "loss_ib": 0.014195898547768593, + "step": 791 + }, + { + "ce_ib": 10.41888427734375, + "ce_orig": 0.6695852279663086, + "epoch": 0.22776619455029118, + "kl_loss": 0.5112478733062744, + "loss_ib": 0.01553136296570301, + "step": 792 + }, + { + "ce_ib": 10.26807689666748, + "ce_orig": 1.4850273132324219, + "epoch": 0.22776619455029118, + "kl_loss": 0.45192694664001465, + "loss_ib": 0.014787346124649048, + "step": 792 + }, + { + "ce_ib": 13.323302268981934, + "ce_orig": 1.7503565549850464, + "epoch": 0.22776619455029118, + "kl_loss": 0.3711824417114258, + "loss_ib": 0.01703512668609619, + "step": 792 + }, + { + "ce_ib": 5.721099853515625, + "ce_orig": 0.3669746220111847, + "epoch": 0.22776619455029118, + "kl_loss": 0.31216946244239807, + "loss_ib": 0.00884279515594244, + "step": 792 + }, + { + "ce_ib": 11.519103050231934, + "ce_orig": 0.9824369549751282, + "epoch": 0.22805377812926883, + "kl_loss": 0.3825758397579193, + "loss_ib": 0.015344860963523388, + "step": 793 + }, + { + "ce_ib": 6.631891250610352, + "ce_orig": 0.6841241121292114, + "epoch": 0.22805377812926883, + "kl_loss": 0.3822288513183594, + "loss_ib": 0.010454179719090462, + "step": 793 + }, + { + "ce_ib": 7.766302585601807, + "ce_orig": 0.9525435566902161, + "epoch": 0.22805377812926883, + "kl_loss": 0.3503475785255432, + "loss_ib": 0.011269778944551945, + "step": 793 + }, + { + "ce_ib": 10.979907035827637, + "ce_orig": 0.8087160587310791, + "epoch": 0.22805377812926883, + "kl_loss": 0.4700503349304199, + "loss_ib": 0.015680409967899323, + "step": 793 + }, + { + "ce_ib": 4.012631416320801, + "ce_orig": 0.49861544370651245, + "epoch": 0.22834136170824645, + "kl_loss": 0.8556938171386719, + "loss_ib": 0.012569569051265717, + "step": 794 + }, + { + "ce_ib": 8.279598236083984, + "ce_orig": 0.13107673823833466, + "epoch": 0.22834136170824645, + "kl_loss": 0.8502056002616882, + "loss_ib": 0.016781654208898544, + "step": 794 + }, + { + "ce_ib": 7.012378215789795, + "ce_orig": 0.4306791424751282, + "epoch": 0.22834136170824645, + "kl_loss": 0.317436158657074, + "loss_ib": 0.010186740197241306, + "step": 794 + }, + { + "ce_ib": 8.948698043823242, + "ce_orig": 0.9477734565734863, + "epoch": 0.22834136170824645, + "kl_loss": 0.6815387606620789, + "loss_ib": 0.015764085575938225, + "step": 794 + }, + { + "epoch": 0.2286289452872241, + "grad_norm": 0.11332917958498001, + "learning_rate": 9.945740112556433e-06, + "loss": 0.8909, + "step": 795 + }, + { + "ce_ib": 9.626729965209961, + "ce_orig": 0.807258665561676, + "epoch": 0.2286289452872241, + "kl_loss": 0.30290764570236206, + "loss_ib": 0.012655805796384811, + "step": 795 + }, + { + "ce_ib": 8.031062126159668, + "ce_orig": 0.6332983374595642, + "epoch": 0.2286289452872241, + "kl_loss": 0.4431205987930298, + "loss_ib": 0.012462267652153969, + "step": 795 + }, + { + "ce_ib": 6.183389663696289, + "ce_orig": 0.3490990400314331, + "epoch": 0.2286289452872241, + "kl_loss": 0.2828608751296997, + "loss_ib": 0.009011998772621155, + "step": 795 + }, + { + "ce_ib": 10.629033088684082, + "ce_orig": 1.2572027444839478, + "epoch": 0.2286289452872241, + "kl_loss": 0.2958335280418396, + "loss_ib": 0.013587366789579391, + "step": 795 + }, + { + "ce_ib": 12.632726669311523, + "ce_orig": 1.813147783279419, + "epoch": 0.22891652886620173, + "kl_loss": 0.34517595171928406, + "loss_ib": 0.01608448661863804, + "step": 796 + }, + { + "ce_ib": 9.120535850524902, + "ce_orig": 1.1200886964797974, + "epoch": 0.22891652886620173, + "kl_loss": 0.36460253596305847, + "loss_ib": 0.012766561470925808, + "step": 796 + }, + { + "ce_ib": 8.088251113891602, + "ce_orig": 0.8303477168083191, + "epoch": 0.22891652886620173, + "kl_loss": 0.3226301074028015, + "loss_ib": 0.01131455134600401, + "step": 796 + }, + { + "ce_ib": 10.398566246032715, + "ce_orig": 0.7473430633544922, + "epoch": 0.22891652886620173, + "kl_loss": 0.40119439363479614, + "loss_ib": 0.01441050972789526, + "step": 796 + }, + { + "ce_ib": 7.080221176147461, + "ce_orig": 0.7931245565414429, + "epoch": 0.22920411244517938, + "kl_loss": 0.29053109884262085, + "loss_ib": 0.00998553168028593, + "step": 797 + }, + { + "ce_ib": 7.256488800048828, + "ce_orig": 0.8034987449645996, + "epoch": 0.22920411244517938, + "kl_loss": 0.3614344596862793, + "loss_ib": 0.010870832949876785, + "step": 797 + }, + { + "ce_ib": 10.867740631103516, + "ce_orig": 0.8990861177444458, + "epoch": 0.22920411244517938, + "kl_loss": 0.46362748742103577, + "loss_ib": 0.015504015609622002, + "step": 797 + }, + { + "ce_ib": 9.055109977722168, + "ce_orig": 0.9181981086730957, + "epoch": 0.22920411244517938, + "kl_loss": 0.4794706106185913, + "loss_ib": 0.013849816285073757, + "step": 797 + }, + { + "ce_ib": 7.544148921966553, + "ce_orig": 0.6566915512084961, + "epoch": 0.22949169602415703, + "kl_loss": 0.30486202239990234, + "loss_ib": 0.010592768900096416, + "step": 798 + }, + { + "ce_ib": 5.6905059814453125, + "ce_orig": 0.8550714254379272, + "epoch": 0.22949169602415703, + "kl_loss": 0.45504266023635864, + "loss_ib": 0.010240932926535606, + "step": 798 + }, + { + "ce_ib": 6.176849842071533, + "ce_orig": 0.6315608024597168, + "epoch": 0.22949169602415703, + "kl_loss": 0.3964434266090393, + "loss_ib": 0.010141284205019474, + "step": 798 + }, + { + "ce_ib": 4.630974769592285, + "ce_orig": 0.44450247287750244, + "epoch": 0.22949169602415703, + "kl_loss": 0.36748573184013367, + "loss_ib": 0.00830583181232214, + "step": 798 + }, + { + "ce_ib": 11.174190521240234, + "ce_orig": 1.3261359930038452, + "epoch": 0.22977927960313466, + "kl_loss": 0.2634417414665222, + "loss_ib": 0.013808608055114746, + "step": 799 + }, + { + "ce_ib": 8.188764572143555, + "ce_orig": 0.5794906616210938, + "epoch": 0.22977927960313466, + "kl_loss": 0.40460944175720215, + "loss_ib": 0.01223485916852951, + "step": 799 + }, + { + "ce_ib": 8.544333457946777, + "ce_orig": 0.705375075340271, + "epoch": 0.22977927960313466, + "kl_loss": 0.3461841940879822, + "loss_ib": 0.012006175704300404, + "step": 799 + }, + { + "ce_ib": 10.321372985839844, + "ce_orig": 0.9484913945198059, + "epoch": 0.22977927960313466, + "kl_loss": 0.343988835811615, + "loss_ib": 0.01376126054674387, + "step": 799 + }, + { + "epoch": 0.2300668631821123, + "grad_norm": 0.08445550501346588, + "learning_rate": 9.94459391243453e-06, + "loss": 0.8778, + "step": 800 + }, + { + "ce_ib": 7.361359119415283, + "ce_orig": 0.678317129611969, + "epoch": 0.2300668631821123, + "kl_loss": 0.6545436382293701, + "loss_ib": 0.013906795531511307, + "step": 800 + }, + { + "ce_ib": 9.871505737304688, + "ce_orig": 1.0527350902557373, + "epoch": 0.2300668631821123, + "kl_loss": 0.6774921417236328, + "loss_ib": 0.016646428033709526, + "step": 800 + }, + { + "ce_ib": 10.61447525024414, + "ce_orig": 1.2886981964111328, + "epoch": 0.2300668631821123, + "kl_loss": 0.32457613945007324, + "loss_ib": 0.013860235922038555, + "step": 800 + }, + { + "ce_ib": 12.351025581359863, + "ce_orig": 1.449278712272644, + "epoch": 0.2300668631821123, + "kl_loss": 0.3870459496974945, + "loss_ib": 0.016221484169363976, + "step": 800 + }, + { + "ce_ib": 6.113245964050293, + "ce_orig": 0.49481338262557983, + "epoch": 0.23035444676108993, + "kl_loss": 0.31978094577789307, + "loss_ib": 0.009311055764555931, + "step": 801 + }, + { + "ce_ib": 9.732067108154297, + "ce_orig": 1.0109401941299438, + "epoch": 0.23035444676108993, + "kl_loss": 0.37637659907341003, + "loss_ib": 0.013495832681655884, + "step": 801 + }, + { + "ce_ib": 7.419297695159912, + "ce_orig": 0.680454671382904, + "epoch": 0.23035444676108993, + "kl_loss": 0.3423839509487152, + "loss_ib": 0.01084313727915287, + "step": 801 + }, + { + "ce_ib": 7.179284572601318, + "ce_orig": 0.8883647322654724, + "epoch": 0.23035444676108993, + "kl_loss": 0.3522745966911316, + "loss_ib": 0.01070203073322773, + "step": 801 + }, + { + "ce_ib": 12.911877632141113, + "ce_orig": 0.9866081476211548, + "epoch": 0.23064203034006758, + "kl_loss": 0.293899267911911, + "loss_ib": 0.015850869938731194, + "step": 802 + }, + { + "ce_ib": 10.869671821594238, + "ce_orig": 1.2515939474105835, + "epoch": 0.23064203034006758, + "kl_loss": 0.6851050853729248, + "loss_ib": 0.017720723524689674, + "step": 802 + }, + { + "ce_ib": 7.759640216827393, + "ce_orig": 0.6238282918930054, + "epoch": 0.23064203034006758, + "kl_loss": 0.36549922823905945, + "loss_ib": 0.011414632201194763, + "step": 802 + }, + { + "ce_ib": 3.205626964569092, + "ce_orig": 0.13584905862808228, + "epoch": 0.23064203034006758, + "kl_loss": 0.7906656265258789, + "loss_ib": 0.011112282983958721, + "step": 802 + }, + { + "ce_ib": 12.389301300048828, + "ce_orig": 1.3402420282363892, + "epoch": 0.23092961391904523, + "kl_loss": 0.35545194149017334, + "loss_ib": 0.015943819656968117, + "step": 803 + }, + { + "ce_ib": 10.47632122039795, + "ce_orig": 0.7908617854118347, + "epoch": 0.23092961391904523, + "kl_loss": 0.31981128454208374, + "loss_ib": 0.013674433343112469, + "step": 803 + }, + { + "ce_ib": 9.429079055786133, + "ce_orig": 1.371010661125183, + "epoch": 0.23092961391904523, + "kl_loss": 0.6973379850387573, + "loss_ib": 0.016402458772063255, + "step": 803 + }, + { + "ce_ib": 7.480784893035889, + "ce_orig": 0.8337413668632507, + "epoch": 0.23092961391904523, + "kl_loss": 0.3528236150741577, + "loss_ib": 0.01100902073085308, + "step": 803 + }, + { + "ce_ib": 5.842719078063965, + "ce_orig": 0.5573224425315857, + "epoch": 0.23121719749802286, + "kl_loss": 0.2670140862464905, + "loss_ib": 0.008512860164046288, + "step": 804 + }, + { + "ce_ib": 7.656364917755127, + "ce_orig": 0.7828370928764343, + "epoch": 0.23121719749802286, + "kl_loss": 0.2876054346561432, + "loss_ib": 0.010532419197261333, + "step": 804 + }, + { + "ce_ib": 4.491579532623291, + "ce_orig": 0.33045491576194763, + "epoch": 0.23121719749802286, + "kl_loss": 0.6809794902801514, + "loss_ib": 0.011301374062895775, + "step": 804 + }, + { + "ce_ib": 9.232762336730957, + "ce_orig": 0.7977848649024963, + "epoch": 0.23121719749802286, + "kl_loss": 0.2893211245536804, + "loss_ib": 0.012125973589718342, + "step": 804 + }, + { + "epoch": 0.2315047810770005, + "grad_norm": 0.09778746962547302, + "learning_rate": 9.943435799638226e-06, + "loss": 0.9126, + "step": 805 + }, + { + "ce_ib": 10.316106796264648, + "ce_orig": 1.1037230491638184, + "epoch": 0.2315047810770005, + "kl_loss": 0.38173002004623413, + "loss_ib": 0.014133407734334469, + "step": 805 + }, + { + "ce_ib": 10.278280258178711, + "ce_orig": 1.2289481163024902, + "epoch": 0.2315047810770005, + "kl_loss": 0.4344818592071533, + "loss_ib": 0.014623099006712437, + "step": 805 + }, + { + "ce_ib": 10.008515357971191, + "ce_orig": 1.114085078239441, + "epoch": 0.2315047810770005, + "kl_loss": 0.4228159785270691, + "loss_ib": 0.014236673712730408, + "step": 805 + }, + { + "ce_ib": 10.21235179901123, + "ce_orig": 1.434356451034546, + "epoch": 0.2315047810770005, + "kl_loss": 0.26860710978507996, + "loss_ib": 0.012898423708975315, + "step": 805 + }, + { + "ce_ib": 9.20308780670166, + "ce_orig": 0.6685881018638611, + "epoch": 0.23179236465597813, + "kl_loss": 0.35367828607559204, + "loss_ib": 0.012739870697259903, + "step": 806 + }, + { + "ce_ib": 12.784605026245117, + "ce_orig": 1.4579914808273315, + "epoch": 0.23179236465597813, + "kl_loss": 0.34686392545700073, + "loss_ib": 0.016253244131803513, + "step": 806 + }, + { + "ce_ib": 11.806829452514648, + "ce_orig": 1.5088831186294556, + "epoch": 0.23179236465597813, + "kl_loss": 0.28930962085723877, + "loss_ib": 0.014699925668537617, + "step": 806 + }, + { + "ce_ib": 9.593900680541992, + "ce_orig": 1.328742265701294, + "epoch": 0.23179236465597813, + "kl_loss": 0.33179858326911926, + "loss_ib": 0.012911886908113956, + "step": 806 + }, + { + "ce_ib": 9.164833068847656, + "ce_orig": 1.269349455833435, + "epoch": 0.23207994823495579, + "kl_loss": 0.3544562757015228, + "loss_ib": 0.012709395959973335, + "step": 807 + }, + { + "ce_ib": 7.470652103424072, + "ce_orig": 0.6890314817428589, + "epoch": 0.23207994823495579, + "kl_loss": 0.3289049565792084, + "loss_ib": 0.010759701952338219, + "step": 807 + }, + { + "ce_ib": 9.631521224975586, + "ce_orig": 1.2519800662994385, + "epoch": 0.23207994823495579, + "kl_loss": 0.2588420510292053, + "loss_ib": 0.01221994124352932, + "step": 807 + }, + { + "ce_ib": 8.615235328674316, + "ce_orig": 0.5310425162315369, + "epoch": 0.23207994823495579, + "kl_loss": 0.46589159965515137, + "loss_ib": 0.01327415183186531, + "step": 807 + }, + { + "ce_ib": 7.7025861740112305, + "ce_orig": 0.8470107913017273, + "epoch": 0.23236753181393344, + "kl_loss": 0.3353464603424072, + "loss_ib": 0.011056050658226013, + "step": 808 + }, + { + "ce_ib": 10.234197616577148, + "ce_orig": 1.0393426418304443, + "epoch": 0.23236753181393344, + "kl_loss": 0.49282306432724, + "loss_ib": 0.015162426978349686, + "step": 808 + }, + { + "ce_ib": 8.97604751586914, + "ce_orig": 0.8064647316932678, + "epoch": 0.23236753181393344, + "kl_loss": 0.4205772876739502, + "loss_ib": 0.013181819580495358, + "step": 808 + }, + { + "ce_ib": 10.455026626586914, + "ce_orig": 1.1835849285125732, + "epoch": 0.23236753181393344, + "kl_loss": 0.3391731381416321, + "loss_ib": 0.013846756890416145, + "step": 808 + }, + { + "ce_ib": 6.682322025299072, + "ce_orig": 0.7806753516197205, + "epoch": 0.23265511539291106, + "kl_loss": 0.2750164270401001, + "loss_ib": 0.00943248625844717, + "step": 809 + }, + { + "ce_ib": 7.129208087921143, + "ce_orig": 0.5480561256408691, + "epoch": 0.23265511539291106, + "kl_loss": 0.4469287395477295, + "loss_ib": 0.0115984957665205, + "step": 809 + }, + { + "ce_ib": 13.105687141418457, + "ce_orig": 0.685859739780426, + "epoch": 0.23265511539291106, + "kl_loss": 0.27275753021240234, + "loss_ib": 0.015833262354135513, + "step": 809 + }, + { + "ce_ib": 9.026814460754395, + "ce_orig": 1.057628870010376, + "epoch": 0.23265511539291106, + "kl_loss": 0.3957173228263855, + "loss_ib": 0.012983987107872963, + "step": 809 + }, + { + "epoch": 0.2329426989718887, + "grad_norm": 0.10287559032440186, + "learning_rate": 9.942265776957687e-06, + "loss": 0.9129, + "step": 810 + }, + { + "ce_ib": 9.951510429382324, + "ce_orig": 1.1167075634002686, + "epoch": 0.2329426989718887, + "kl_loss": 0.3010333776473999, + "loss_ib": 0.012961843982338905, + "step": 810 + }, + { + "ce_ib": 10.840296745300293, + "ce_orig": 1.0002007484436035, + "epoch": 0.2329426989718887, + "kl_loss": 0.38143882155418396, + "loss_ib": 0.01465468481183052, + "step": 810 + }, + { + "ce_ib": 7.044186592102051, + "ce_orig": 0.799129068851471, + "epoch": 0.2329426989718887, + "kl_loss": 0.3452165424823761, + "loss_ib": 0.0104963518679142, + "step": 810 + }, + { + "ce_ib": 13.050943374633789, + "ce_orig": 1.4166967868804932, + "epoch": 0.2329426989718887, + "kl_loss": 0.3720596432685852, + "loss_ib": 0.01677154004573822, + "step": 810 + }, + { + "ce_ib": 6.821801662445068, + "ce_orig": 0.7074012756347656, + "epoch": 0.23323028255086634, + "kl_loss": 0.3542909622192383, + "loss_ib": 0.010364711284637451, + "step": 811 + }, + { + "ce_ib": 8.195531845092773, + "ce_orig": 1.0204046964645386, + "epoch": 0.23323028255086634, + "kl_loss": 0.28537851572036743, + "loss_ib": 0.011049317196011543, + "step": 811 + }, + { + "ce_ib": 6.7708001136779785, + "ce_orig": 0.5408430695533752, + "epoch": 0.23323028255086634, + "kl_loss": 0.4277627468109131, + "loss_ib": 0.011048427782952785, + "step": 811 + }, + { + "ce_ib": 10.415853500366211, + "ce_orig": 0.7485983371734619, + "epoch": 0.23323028255086634, + "kl_loss": 0.27399230003356934, + "loss_ib": 0.01315577607601881, + "step": 811 + }, + { + "ce_ib": 11.759166717529297, + "ce_orig": 1.3388240337371826, + "epoch": 0.233517866129844, + "kl_loss": 0.3040698766708374, + "loss_ib": 0.014799864962697029, + "step": 812 + }, + { + "ce_ib": 7.798691272735596, + "ce_orig": 1.0542670488357544, + "epoch": 0.233517866129844, + "kl_loss": 0.3233657777309418, + "loss_ib": 0.01103234849870205, + "step": 812 + }, + { + "ce_ib": 14.092377662658691, + "ce_orig": 1.9352741241455078, + "epoch": 0.233517866129844, + "kl_loss": 0.4481472373008728, + "loss_ib": 0.01857384853065014, + "step": 812 + }, + { + "ce_ib": 6.820461750030518, + "ce_orig": 0.4188855290412903, + "epoch": 0.233517866129844, + "kl_loss": 0.2712195813655853, + "loss_ib": 0.009532657451927662, + "step": 812 + }, + { + "ce_ib": 10.21103572845459, + "ce_orig": 1.5039421319961548, + "epoch": 0.23380544970882164, + "kl_loss": 0.3258100152015686, + "loss_ib": 0.013469135388731956, + "step": 813 + }, + { + "ce_ib": 8.036877632141113, + "ce_orig": 0.608624279499054, + "epoch": 0.23380544970882164, + "kl_loss": 0.2845733165740967, + "loss_ib": 0.010882611386477947, + "step": 813 + }, + { + "ce_ib": 10.804905891418457, + "ce_orig": 1.0994369983673096, + "epoch": 0.23380544970882164, + "kl_loss": 0.2775050699710846, + "loss_ib": 0.013579956255853176, + "step": 813 + }, + { + "ce_ib": 6.5864362716674805, + "ce_orig": 0.5708433985710144, + "epoch": 0.23380544970882164, + "kl_loss": 0.2514118552207947, + "loss_ib": 0.009100555442273617, + "step": 813 + }, + { + "ce_ib": 7.656818866729736, + "ce_orig": 0.9114794731140137, + "epoch": 0.23409303328779926, + "kl_loss": 0.28084778785705566, + "loss_ib": 0.010465297847986221, + "step": 814 + }, + { + "ce_ib": 7.377896308898926, + "ce_orig": 0.9978185296058655, + "epoch": 0.23409303328779926, + "kl_loss": 0.2847989797592163, + "loss_ib": 0.010225885547697544, + "step": 814 + }, + { + "ce_ib": 6.432290077209473, + "ce_orig": 0.6785269379615784, + "epoch": 0.23409303328779926, + "kl_loss": 0.30970633029937744, + "loss_ib": 0.009529353119432926, + "step": 814 + }, + { + "ce_ib": 8.577414512634277, + "ce_orig": 0.6792881488800049, + "epoch": 0.23409303328779926, + "kl_loss": 0.3012913465499878, + "loss_ib": 0.011590328067541122, + "step": 814 + }, + { + "epoch": 0.23438061686677691, + "grad_norm": 0.10755941271781921, + "learning_rate": 9.941083847211765e-06, + "loss": 0.9294, + "step": 815 + }, + { + "ce_ib": 7.941585540771484, + "ce_orig": 0.5222452878952026, + "epoch": 0.23438061686677691, + "kl_loss": 0.3275793194770813, + "loss_ib": 0.01121737901121378, + "step": 815 + }, + { + "ce_ib": 11.759594917297363, + "ce_orig": 0.684937596321106, + "epoch": 0.23438061686677691, + "kl_loss": 0.3714814782142639, + "loss_ib": 0.01547440979629755, + "step": 815 + }, + { + "ce_ib": 8.328024864196777, + "ce_orig": 0.5521450042724609, + "epoch": 0.23438061686677691, + "kl_loss": 0.3888009488582611, + "loss_ib": 0.012216033414006233, + "step": 815 + }, + { + "ce_ib": 6.953820705413818, + "ce_orig": 0.537897527217865, + "epoch": 0.23438061686677691, + "kl_loss": 0.2841954827308655, + "loss_ib": 0.009795775637030602, + "step": 815 + }, + { + "ce_ib": 6.938723564147949, + "ce_orig": 1.0933799743652344, + "epoch": 0.23466820044575454, + "kl_loss": 0.2745826840400696, + "loss_ib": 0.009684550575911999, + "step": 816 + }, + { + "ce_ib": 7.015166759490967, + "ce_orig": 1.0554184913635254, + "epoch": 0.23466820044575454, + "kl_loss": 0.24745316803455353, + "loss_ib": 0.009489698335528374, + "step": 816 + }, + { + "ce_ib": 7.070734977722168, + "ce_orig": 0.6463883519172668, + "epoch": 0.23466820044575454, + "kl_loss": 0.3349419832229614, + "loss_ib": 0.010420155711472034, + "step": 816 + }, + { + "ce_ib": 11.36481761932373, + "ce_orig": 1.2426626682281494, + "epoch": 0.23466820044575454, + "kl_loss": 0.35446441173553467, + "loss_ib": 0.01490946114063263, + "step": 816 + }, + { + "ce_ib": 7.427221298217773, + "ce_orig": 0.8329726457595825, + "epoch": 0.2349557840247322, + "kl_loss": 0.34363722801208496, + "loss_ib": 0.0108635937795043, + "step": 817 + }, + { + "ce_ib": 5.901673316955566, + "ce_orig": 0.7341867685317993, + "epoch": 0.2349557840247322, + "kl_loss": 0.27652859687805176, + "loss_ib": 0.008666959591209888, + "step": 817 + }, + { + "ce_ib": 7.916401386260986, + "ce_orig": 0.6233932971954346, + "epoch": 0.2349557840247322, + "kl_loss": 0.38233524560928345, + "loss_ib": 0.011739754118025303, + "step": 817 + }, + { + "ce_ib": 6.952549934387207, + "ce_orig": 0.4051516354084015, + "epoch": 0.2349557840247322, + "kl_loss": 0.7161735892295837, + "loss_ib": 0.014114285819232464, + "step": 817 + }, + { + "ce_ib": 8.112340927124023, + "ce_orig": 0.41056379675865173, + "epoch": 0.23524336760370984, + "kl_loss": 0.4498043656349182, + "loss_ib": 0.012610385194420815, + "step": 818 + }, + { + "ce_ib": 7.351808547973633, + "ce_orig": 0.6276510953903198, + "epoch": 0.23524336760370984, + "kl_loss": 0.3949786424636841, + "loss_ib": 0.011301594786345959, + "step": 818 + }, + { + "ce_ib": 8.169877052307129, + "ce_orig": 0.9647888541221619, + "epoch": 0.23524336760370984, + "kl_loss": 0.32170844078063965, + "loss_ib": 0.011386961676180363, + "step": 818 + }, + { + "ce_ib": 10.368202209472656, + "ce_orig": 1.1499764919281006, + "epoch": 0.23524336760370984, + "kl_loss": 0.47000110149383545, + "loss_ib": 0.01506821345537901, + "step": 818 + }, + { + "ce_ib": 10.158099174499512, + "ce_orig": 1.1036230325698853, + "epoch": 0.23553095118268746, + "kl_loss": 0.42805948853492737, + "loss_ib": 0.014438693411648273, + "step": 819 + }, + { + "ce_ib": 9.477944374084473, + "ce_orig": 1.0577062368392944, + "epoch": 0.23553095118268746, + "kl_loss": 0.26351502537727356, + "loss_ib": 0.012113094329833984, + "step": 819 + }, + { + "ce_ib": 3.1393284797668457, + "ce_orig": 0.1595279723405838, + "epoch": 0.23553095118268746, + "kl_loss": 0.6530660390853882, + "loss_ib": 0.009669989347457886, + "step": 819 + }, + { + "ce_ib": 10.7774658203125, + "ce_orig": 1.2825448513031006, + "epoch": 0.23553095118268746, + "kl_loss": 0.6769564151763916, + "loss_ib": 0.01754703000187874, + "step": 819 + }, + { + "epoch": 0.23581853476166512, + "grad_norm": 0.10050812363624573, + "learning_rate": 9.939890013248006e-06, + "loss": 0.8356, + "step": 820 + }, + { + "ce_ib": 6.623199939727783, + "ce_orig": 0.5048424601554871, + "epoch": 0.23581853476166512, + "kl_loss": 0.32067549228668213, + "loss_ib": 0.00982995517551899, + "step": 820 + }, + { + "ce_ib": 9.443016052246094, + "ce_orig": 1.0931298732757568, + "epoch": 0.23581853476166512, + "kl_loss": 0.25568336248397827, + "loss_ib": 0.011999850161373615, + "step": 820 + }, + { + "ce_ib": 7.976568222045898, + "ce_orig": 0.669291079044342, + "epoch": 0.23581853476166512, + "kl_loss": 0.42081308364868164, + "loss_ib": 0.01218469813466072, + "step": 820 + }, + { + "ce_ib": 4.948537349700928, + "ce_orig": 0.4827899932861328, + "epoch": 0.23581853476166512, + "kl_loss": 0.25315725803375244, + "loss_ib": 0.007480109576135874, + "step": 820 + }, + { + "ce_ib": 14.804322242736816, + "ce_orig": 1.9942870140075684, + "epoch": 0.23610611834064274, + "kl_loss": 0.43627458810806274, + "loss_ib": 0.019167067483067513, + "step": 821 + }, + { + "ce_ib": 4.713957786560059, + "ce_orig": 0.42061755061149597, + "epoch": 0.23610611834064274, + "kl_loss": 0.3404502272605896, + "loss_ib": 0.008118459954857826, + "step": 821 + }, + { + "ce_ib": 9.87876033782959, + "ce_orig": 0.6623562574386597, + "epoch": 0.23610611834064274, + "kl_loss": 0.37182098627090454, + "loss_ib": 0.013596970587968826, + "step": 821 + }, + { + "ce_ib": 7.073148727416992, + "ce_orig": 0.8113523125648499, + "epoch": 0.23610611834064274, + "kl_loss": 0.3398459553718567, + "loss_ib": 0.010471608489751816, + "step": 821 + }, + { + "ce_ib": 7.666951656341553, + "ce_orig": 0.7712521553039551, + "epoch": 0.2363937019196204, + "kl_loss": 0.34804296493530273, + "loss_ib": 0.011147381737828255, + "step": 822 + }, + { + "ce_ib": 6.349161624908447, + "ce_orig": 0.7387241125106812, + "epoch": 0.2363937019196204, + "kl_loss": 0.28054261207580566, + "loss_ib": 0.009154587984085083, + "step": 822 + }, + { + "ce_ib": 5.58187198638916, + "ce_orig": 0.5994656085968018, + "epoch": 0.2363937019196204, + "kl_loss": 0.31460386514663696, + "loss_ib": 0.008727909997105598, + "step": 822 + }, + { + "ce_ib": 7.449942111968994, + "ce_orig": 0.5919069647789001, + "epoch": 0.2363937019196204, + "kl_loss": 0.2681111693382263, + "loss_ib": 0.010131053626537323, + "step": 822 + }, + { + "ce_ib": 13.98037052154541, + "ce_orig": 0.6386498808860779, + "epoch": 0.23668128549859804, + "kl_loss": 0.44783568382263184, + "loss_ib": 0.01845872774720192, + "step": 823 + }, + { + "ce_ib": 5.139134883880615, + "ce_orig": 0.7114477157592773, + "epoch": 0.23668128549859804, + "kl_loss": 0.27849745750427246, + "loss_ib": 0.007924109697341919, + "step": 823 + }, + { + "ce_ib": 6.715068340301514, + "ce_orig": 0.5276003479957581, + "epoch": 0.23668128549859804, + "kl_loss": 0.384267121553421, + "loss_ib": 0.010557739064097404, + "step": 823 + }, + { + "ce_ib": 7.8920793533325195, + "ce_orig": 0.8194569945335388, + "epoch": 0.23668128549859804, + "kl_loss": 0.2780001163482666, + "loss_ib": 0.01067208033055067, + "step": 823 + }, + { + "ce_ib": 8.298215866088867, + "ce_orig": 0.4189370572566986, + "epoch": 0.23696886907757567, + "kl_loss": 0.4898415207862854, + "loss_ib": 0.013196630403399467, + "step": 824 + }, + { + "ce_ib": 7.782186508178711, + "ce_orig": 0.4692075252532959, + "epoch": 0.23696886907757567, + "kl_loss": 0.4824924170970917, + "loss_ib": 0.012607110664248466, + "step": 824 + }, + { + "ce_ib": 9.8721923828125, + "ce_orig": 1.1827633380889893, + "epoch": 0.23696886907757567, + "kl_loss": 0.2802169919013977, + "loss_ib": 0.012674362398684025, + "step": 824 + }, + { + "ce_ib": 9.907919883728027, + "ce_orig": 1.1879762411117554, + "epoch": 0.23696886907757567, + "kl_loss": 0.41324368119239807, + "loss_ib": 0.014040356501936913, + "step": 824 + }, + { + "epoch": 0.23725645265655332, + "grad_norm": 0.10062714666128159, + "learning_rate": 9.938684277942631e-06, + "loss": 0.8766, + "step": 825 + }, + { + "ce_ib": 8.782609939575195, + "ce_orig": 1.2377029657363892, + "epoch": 0.23725645265655332, + "kl_loss": 0.3123038113117218, + "loss_ib": 0.011905648745596409, + "step": 825 + }, + { + "ce_ib": 6.110267639160156, + "ce_orig": 0.6393804550170898, + "epoch": 0.23725645265655332, + "kl_loss": 0.33634820580482483, + "loss_ib": 0.009473749436438084, + "step": 825 + }, + { + "ce_ib": 9.359816551208496, + "ce_orig": 1.2528795003890991, + "epoch": 0.23725645265655332, + "kl_loss": 0.28830617666244507, + "loss_ib": 0.012242878787219524, + "step": 825 + }, + { + "ce_ib": 9.707563400268555, + "ce_orig": 0.8692981004714966, + "epoch": 0.23725645265655332, + "kl_loss": 0.5571379661560059, + "loss_ib": 0.015278941951692104, + "step": 825 + }, + { + "ce_ib": 9.498275756835938, + "ce_orig": 1.071899652481079, + "epoch": 0.23754403623553094, + "kl_loss": 0.2672438621520996, + "loss_ib": 0.012170715257525444, + "step": 826 + }, + { + "ce_ib": 9.843942642211914, + "ce_orig": 0.9619124531745911, + "epoch": 0.23754403623553094, + "kl_loss": 0.2982301712036133, + "loss_ib": 0.012826244346797466, + "step": 826 + }, + { + "ce_ib": 10.634578704833984, + "ce_orig": 1.325036644935608, + "epoch": 0.23754403623553094, + "kl_loss": 0.32248958945274353, + "loss_ib": 0.01385947410017252, + "step": 826 + }, + { + "ce_ib": 12.585394859313965, + "ce_orig": 1.4135090112686157, + "epoch": 0.23754403623553094, + "kl_loss": 0.46502685546875, + "loss_ib": 0.017235664650797844, + "step": 826 + }, + { + "ce_ib": 10.126785278320312, + "ce_orig": 1.5693209171295166, + "epoch": 0.2378316198145086, + "kl_loss": 0.2683885097503662, + "loss_ib": 0.012810669839382172, + "step": 827 + }, + { + "ce_ib": 6.727275848388672, + "ce_orig": 0.9106936454772949, + "epoch": 0.2378316198145086, + "kl_loss": 0.2777571678161621, + "loss_ib": 0.009504847228527069, + "step": 827 + }, + { + "ce_ib": 8.126458168029785, + "ce_orig": 0.8495746850967407, + "epoch": 0.2378316198145086, + "kl_loss": 0.26818782091140747, + "loss_ib": 0.010808336548507214, + "step": 827 + }, + { + "ce_ib": 10.300277709960938, + "ce_orig": 1.3873240947723389, + "epoch": 0.2378316198145086, + "kl_loss": 0.41793563961982727, + "loss_ib": 0.014479633420705795, + "step": 827 + }, + { + "ce_ib": 12.66947078704834, + "ce_orig": 1.5648332834243774, + "epoch": 0.23811920339348625, + "kl_loss": 0.3042876422405243, + "loss_ib": 0.015712348744273186, + "step": 828 + }, + { + "ce_ib": 8.798270225524902, + "ce_orig": 0.920973539352417, + "epoch": 0.23811920339348625, + "kl_loss": 0.34468623995780945, + "loss_ib": 0.012245132587850094, + "step": 828 + }, + { + "ce_ib": 8.365591049194336, + "ce_orig": 0.5817134976387024, + "epoch": 0.23811920339348625, + "kl_loss": 0.39118778705596924, + "loss_ib": 0.012277469038963318, + "step": 828 + }, + { + "ce_ib": 11.80969524383545, + "ce_orig": 1.7373600006103516, + "epoch": 0.23811920339348625, + "kl_loss": 0.3660210371017456, + "loss_ib": 0.01546990592032671, + "step": 828 + }, + { + "ce_ib": 7.223691463470459, + "ce_orig": 0.9051380157470703, + "epoch": 0.23840678697246387, + "kl_loss": 0.2608543038368225, + "loss_ib": 0.009832234121859074, + "step": 829 + }, + { + "ce_ib": 6.39984130859375, + "ce_orig": 0.6907777190208435, + "epoch": 0.23840678697246387, + "kl_loss": 0.3057895600795746, + "loss_ib": 0.00945773720741272, + "step": 829 + }, + { + "ce_ib": 5.409855365753174, + "ce_orig": 0.7622098326683044, + "epoch": 0.23840678697246387, + "kl_loss": 0.24784672260284424, + "loss_ib": 0.007888322696089745, + "step": 829 + }, + { + "ce_ib": 8.38547134399414, + "ce_orig": 0.7260690331459045, + "epoch": 0.23840678697246387, + "kl_loss": 0.29803162813186646, + "loss_ib": 0.01136578805744648, + "step": 829 + }, + { + "epoch": 0.23869437055144152, + "grad_norm": 0.11168427765369415, + "learning_rate": 9.93746664420054e-06, + "loss": 0.9104, + "step": 830 + }, + { + "ce_ib": 5.065134048461914, + "ce_orig": 0.8026469349861145, + "epoch": 0.23869437055144152, + "kl_loss": 0.278666615486145, + "loss_ib": 0.007851799950003624, + "step": 830 + }, + { + "ce_ib": 13.80343246459961, + "ce_orig": 0.9906110167503357, + "epoch": 0.23869437055144152, + "kl_loss": 0.3099890649318695, + "loss_ib": 0.01690332405269146, + "step": 830 + }, + { + "ce_ib": 8.821106910705566, + "ce_orig": 1.093506932258606, + "epoch": 0.23869437055144152, + "kl_loss": 0.3800942599773407, + "loss_ib": 0.012622050009667873, + "step": 830 + }, + { + "ce_ib": 11.06147575378418, + "ce_orig": 1.1915507316589355, + "epoch": 0.23869437055144152, + "kl_loss": 0.3803118169307709, + "loss_ib": 0.014864594675600529, + "step": 830 + }, + { + "ce_ib": 6.217090129852295, + "ce_orig": 0.4522053599357605, + "epoch": 0.23898195413041914, + "kl_loss": 0.3387228846549988, + "loss_ib": 0.009604318998754025, + "step": 831 + }, + { + "ce_ib": 8.78792953491211, + "ce_orig": 0.818161129951477, + "epoch": 0.23898195413041914, + "kl_loss": 0.3240455389022827, + "loss_ib": 0.012028384022414684, + "step": 831 + }, + { + "ce_ib": 9.777087211608887, + "ce_orig": 0.9559274315834045, + "epoch": 0.23898195413041914, + "kl_loss": 0.305178701877594, + "loss_ib": 0.01282887440174818, + "step": 831 + }, + { + "ce_ib": 8.793158531188965, + "ce_orig": 1.7135603427886963, + "epoch": 0.23898195413041914, + "kl_loss": 0.35992008447647095, + "loss_ib": 0.012392358854413033, + "step": 831 + }, + { + "ce_ib": 14.298954963684082, + "ce_orig": 1.9766751527786255, + "epoch": 0.2392695377093968, + "kl_loss": 0.37456846237182617, + "loss_ib": 0.018044639378786087, + "step": 832 + }, + { + "ce_ib": 5.9418816566467285, + "ce_orig": 0.7225015163421631, + "epoch": 0.2392695377093968, + "kl_loss": 0.35724693536758423, + "loss_ib": 0.009514350444078445, + "step": 832 + }, + { + "ce_ib": 8.032739639282227, + "ce_orig": 0.6944239139556885, + "epoch": 0.2392695377093968, + "kl_loss": 0.35457563400268555, + "loss_ib": 0.011578495614230633, + "step": 832 + }, + { + "ce_ib": 9.528278350830078, + "ce_orig": 1.3547769784927368, + "epoch": 0.2392695377093968, + "kl_loss": 0.4733262062072754, + "loss_ib": 0.014261540956795216, + "step": 832 + }, + { + "ce_ib": 10.221992492675781, + "ce_orig": 0.9856470227241516, + "epoch": 0.23955712128837445, + "kl_loss": 0.33555009961128235, + "loss_ib": 0.013577492907643318, + "step": 833 + }, + { + "ce_ib": 8.804466247558594, + "ce_orig": 0.928854763507843, + "epoch": 0.23955712128837445, + "kl_loss": 0.36634519696235657, + "loss_ib": 0.012467917986214161, + "step": 833 + }, + { + "ce_ib": 6.364208221435547, + "ce_orig": 0.8164104223251343, + "epoch": 0.23955712128837445, + "kl_loss": 0.22879984974861145, + "loss_ib": 0.008652206510305405, + "step": 833 + }, + { + "ce_ib": 8.315366744995117, + "ce_orig": 0.9906771779060364, + "epoch": 0.23955712128837445, + "kl_loss": 0.4181078374385834, + "loss_ib": 0.012496445327997208, + "step": 833 + }, + { + "ce_ib": 8.126928329467773, + "ce_orig": 0.9921467304229736, + "epoch": 0.23984470486735207, + "kl_loss": 0.2800453305244446, + "loss_ib": 0.010927380993962288, + "step": 834 + }, + { + "ce_ib": 5.869344234466553, + "ce_orig": 0.8272134065628052, + "epoch": 0.23984470486735207, + "kl_loss": 0.23419909179210663, + "loss_ib": 0.00821133516728878, + "step": 834 + }, + { + "ce_ib": 10.878475189208984, + "ce_orig": 1.308764934539795, + "epoch": 0.23984470486735207, + "kl_loss": 0.3325369656085968, + "loss_ib": 0.014203844591975212, + "step": 834 + }, + { + "ce_ib": 5.4352545738220215, + "ce_orig": 0.45368334650993347, + "epoch": 0.23984470486735207, + "kl_loss": 0.6772407293319702, + "loss_ib": 0.012207661755383015, + "step": 834 + }, + { + "epoch": 0.24013228844632972, + "grad_norm": 0.09686867892742157, + "learning_rate": 9.93623711495529e-06, + "loss": 0.9213, + "step": 835 + }, + { + "ce_ib": 8.014131546020508, + "ce_orig": 1.0289490222930908, + "epoch": 0.24013228844632972, + "kl_loss": 0.3445562720298767, + "loss_ib": 0.011459693312644958, + "step": 835 + }, + { + "ce_ib": 6.29163122177124, + "ce_orig": 0.48442956805229187, + "epoch": 0.24013228844632972, + "kl_loss": 0.3220330774784088, + "loss_ib": 0.009511961601674557, + "step": 835 + }, + { + "ce_ib": 7.94074821472168, + "ce_orig": 0.7545216083526611, + "epoch": 0.24013228844632972, + "kl_loss": 0.45153874158859253, + "loss_ib": 0.0124561358243227, + "step": 835 + }, + { + "ce_ib": 5.553848743438721, + "ce_orig": 0.3156552314758301, + "epoch": 0.24013228844632972, + "kl_loss": 0.35743337869644165, + "loss_ib": 0.00912818219512701, + "step": 835 + }, + { + "ce_ib": 7.592702865600586, + "ce_orig": 0.8854877352714539, + "epoch": 0.24041987202530735, + "kl_loss": 0.28391388058662415, + "loss_ib": 0.010431841015815735, + "step": 836 + }, + { + "ce_ib": 8.959979057312012, + "ce_orig": 0.5111126899719238, + "epoch": 0.24041987202530735, + "kl_loss": 0.426180899143219, + "loss_ib": 0.013221788220107555, + "step": 836 + }, + { + "ce_ib": 12.740047454833984, + "ce_orig": 1.6768262386322021, + "epoch": 0.24041987202530735, + "kl_loss": 0.3991634249687195, + "loss_ib": 0.016731681302189827, + "step": 836 + }, + { + "ce_ib": 12.013802528381348, + "ce_orig": 1.272831916809082, + "epoch": 0.24041987202530735, + "kl_loss": 0.3409850001335144, + "loss_ib": 0.015423652715981007, + "step": 836 + }, + { + "ce_ib": 2.0301332473754883, + "ce_orig": 0.16432513296604156, + "epoch": 0.240707455604285, + "kl_loss": 0.6776133179664612, + "loss_ib": 0.008806266821920872, + "step": 837 + }, + { + "ce_ib": 10.865999221801758, + "ce_orig": 1.410832405090332, + "epoch": 0.240707455604285, + "kl_loss": 0.312138170003891, + "loss_ib": 0.013987381011247635, + "step": 837 + }, + { + "ce_ib": 6.166257858276367, + "ce_orig": 0.6295948624610901, + "epoch": 0.240707455604285, + "kl_loss": 0.36339548230171204, + "loss_ib": 0.00980021245777607, + "step": 837 + }, + { + "ce_ib": 5.043333053588867, + "ce_orig": 0.46830177307128906, + "epoch": 0.240707455604285, + "kl_loss": 0.3290286064147949, + "loss_ib": 0.008333618752658367, + "step": 837 + }, + { + "ce_ib": 9.060256004333496, + "ce_orig": 1.1709790229797363, + "epoch": 0.24099503918326265, + "kl_loss": 0.27760225534439087, + "loss_ib": 0.0118362782523036, + "step": 838 + }, + { + "ce_ib": 6.811290264129639, + "ce_orig": 0.793929934501648, + "epoch": 0.24099503918326265, + "kl_loss": 0.31319600343704224, + "loss_ib": 0.009943250566720963, + "step": 838 + }, + { + "ce_ib": 4.931931018829346, + "ce_orig": 0.36589503288269043, + "epoch": 0.24099503918326265, + "kl_loss": 0.40839433670043945, + "loss_ib": 0.009015874937176704, + "step": 838 + }, + { + "ce_ib": 6.180576324462891, + "ce_orig": 0.4309951066970825, + "epoch": 0.24099503918326265, + "kl_loss": 0.2373921424150467, + "loss_ib": 0.008554497733712196, + "step": 838 + }, + { + "ce_ib": 5.762244701385498, + "ce_orig": 0.544806957244873, + "epoch": 0.24128262276224027, + "kl_loss": 0.31401538848876953, + "loss_ib": 0.008902398869395256, + "step": 839 + }, + { + "ce_ib": 7.717389106750488, + "ce_orig": 1.2485359907150269, + "epoch": 0.24128262276224027, + "kl_loss": 0.30778759717941284, + "loss_ib": 0.010795265436172485, + "step": 839 + }, + { + "ce_ib": 7.063183784484863, + "ce_orig": 0.768622100353241, + "epoch": 0.24128262276224027, + "kl_loss": 0.3657524585723877, + "loss_ib": 0.010720708407461643, + "step": 839 + }, + { + "ce_ib": 6.140594482421875, + "ce_orig": 0.7728462815284729, + "epoch": 0.24128262276224027, + "kl_loss": 0.2510579824447632, + "loss_ib": 0.008651173673570156, + "step": 839 + }, + { + "epoch": 0.24157020634121792, + "grad_norm": 0.10076677054166794, + "learning_rate": 9.934995693169104e-06, + "loss": 0.8875, + "step": 840 + }, + { + "ce_ib": 9.007379531860352, + "ce_orig": 0.8373998403549194, + "epoch": 0.24157020634121792, + "kl_loss": 0.3071047067642212, + "loss_ib": 0.012078425846993923, + "step": 840 + }, + { + "ce_ib": 6.403738975524902, + "ce_orig": 0.7129970192909241, + "epoch": 0.24157020634121792, + "kl_loss": 0.28114748001098633, + "loss_ib": 0.009215213358402252, + "step": 840 + }, + { + "ce_ib": 7.572381973266602, + "ce_orig": 0.8134757280349731, + "epoch": 0.24157020634121792, + "kl_loss": 0.3291366696357727, + "loss_ib": 0.010863748379051685, + "step": 840 + }, + { + "ce_ib": 7.471776962280273, + "ce_orig": 0.45619362592697144, + "epoch": 0.24157020634121792, + "kl_loss": 0.3013712763786316, + "loss_ib": 0.010485488921403885, + "step": 840 + }, + { + "ce_ib": 8.4400053024292, + "ce_orig": 0.8053632378578186, + "epoch": 0.24185778992019555, + "kl_loss": 0.30274561047554016, + "loss_ib": 0.011467461474239826, + "step": 841 + }, + { + "ce_ib": 13.409200668334961, + "ce_orig": 1.5305240154266357, + "epoch": 0.24185778992019555, + "kl_loss": 0.27949976921081543, + "loss_ib": 0.016204198822379112, + "step": 841 + }, + { + "ce_ib": 5.851305961608887, + "ce_orig": 0.7181586027145386, + "epoch": 0.24185778992019555, + "kl_loss": 0.28577935695648193, + "loss_ib": 0.00870910007506609, + "step": 841 + }, + { + "ce_ib": 8.071784973144531, + "ce_orig": 0.9581683278083801, + "epoch": 0.24185778992019555, + "kl_loss": 0.371356725692749, + "loss_ib": 0.011785351671278477, + "step": 841 + }, + { + "ce_ib": 11.643394470214844, + "ce_orig": 1.0913020372390747, + "epoch": 0.2421453734991732, + "kl_loss": 0.3137480914592743, + "loss_ib": 0.01478087529540062, + "step": 842 + }, + { + "ce_ib": 7.9136576652526855, + "ce_orig": 0.8460515141487122, + "epoch": 0.2421453734991732, + "kl_loss": 0.28541165590286255, + "loss_ib": 0.010767774656414986, + "step": 842 + }, + { + "ce_ib": 12.15166187286377, + "ce_orig": 0.9375542402267456, + "epoch": 0.2421453734991732, + "kl_loss": 0.466509073972702, + "loss_ib": 0.016816752031445503, + "step": 842 + }, + { + "ce_ib": 6.482849597930908, + "ce_orig": 0.652571439743042, + "epoch": 0.2421453734991732, + "kl_loss": 0.3070219159126282, + "loss_ib": 0.009553068317472935, + "step": 842 + }, + { + "ce_ib": 9.26317024230957, + "ce_orig": 0.6297864317893982, + "epoch": 0.24243295707815085, + "kl_loss": 0.8582457304000854, + "loss_ib": 0.017845628783106804, + "step": 843 + }, + { + "ce_ib": 9.843435287475586, + "ce_orig": 0.9486488103866577, + "epoch": 0.24243295707815085, + "kl_loss": 0.35706013441085815, + "loss_ib": 0.013414036482572556, + "step": 843 + }, + { + "ce_ib": 8.942062377929688, + "ce_orig": 0.6295011639595032, + "epoch": 0.24243295707815085, + "kl_loss": 0.3587941527366638, + "loss_ib": 0.012530003674328327, + "step": 843 + }, + { + "ce_ib": 8.235858917236328, + "ce_orig": 1.0911704301834106, + "epoch": 0.24243295707815085, + "kl_loss": 0.4925958812236786, + "loss_ib": 0.013161817565560341, + "step": 843 + }, + { + "ce_ib": 2.106238842010498, + "ce_orig": 0.10293695330619812, + "epoch": 0.24272054065712848, + "kl_loss": 0.6076182126998901, + "loss_ib": 0.008182420395314693, + "step": 844 + }, + { + "ce_ib": 10.04066276550293, + "ce_orig": 0.7675477862358093, + "epoch": 0.24272054065712848, + "kl_loss": 0.3272428512573242, + "loss_ib": 0.013313091360032558, + "step": 844 + }, + { + "ce_ib": 7.2537922859191895, + "ce_orig": 0.6966544985771179, + "epoch": 0.24272054065712848, + "kl_loss": 0.30012214183807373, + "loss_ib": 0.010255013592541218, + "step": 844 + }, + { + "ce_ib": 9.952733039855957, + "ce_orig": 0.8222768902778625, + "epoch": 0.24272054065712848, + "kl_loss": 0.29932597279548645, + "loss_ib": 0.01294599287211895, + "step": 844 + }, + { + "epoch": 0.24300812423610613, + "grad_norm": 0.09415728598833084, + "learning_rate": 9.93374238183286e-06, + "loss": 0.8609, + "step": 845 + }, + { + "ce_ib": 9.53044605255127, + "ce_orig": 0.9650492668151855, + "epoch": 0.24300812423610613, + "kl_loss": 0.23566867411136627, + "loss_ib": 0.011887133121490479, + "step": 845 + }, + { + "ce_ib": 9.0289945602417, + "ce_orig": 1.0505539178848267, + "epoch": 0.24300812423610613, + "kl_loss": 0.3517257273197174, + "loss_ib": 0.012546251527965069, + "step": 845 + }, + { + "ce_ib": 6.667138576507568, + "ce_orig": 0.6767503023147583, + "epoch": 0.24300812423610613, + "kl_loss": 0.25494682788848877, + "loss_ib": 0.009216606616973877, + "step": 845 + }, + { + "ce_ib": 10.858115196228027, + "ce_orig": 1.128201961517334, + "epoch": 0.24300812423610613, + "kl_loss": 0.3126045763492584, + "loss_ib": 0.013984160497784615, + "step": 845 + }, + { + "ce_ib": 8.37086296081543, + "ce_orig": 1.0704699754714966, + "epoch": 0.24329570781508375, + "kl_loss": 0.3986669182777405, + "loss_ib": 0.012357532978057861, + "step": 846 + }, + { + "ce_ib": 12.114412307739258, + "ce_orig": 0.8631466031074524, + "epoch": 0.24329570781508375, + "kl_loss": 0.5870100259780884, + "loss_ib": 0.017984513193368912, + "step": 846 + }, + { + "ce_ib": 10.035650253295898, + "ce_orig": 0.7930597066879272, + "epoch": 0.24329570781508375, + "kl_loss": 0.34875866770744324, + "loss_ib": 0.013523237779736519, + "step": 846 + }, + { + "ce_ib": 9.243821144104004, + "ce_orig": 0.9381915330886841, + "epoch": 0.24329570781508375, + "kl_loss": 0.6351751089096069, + "loss_ib": 0.0155955720692873, + "step": 846 + }, + { + "ce_ib": 7.255211353302002, + "ce_orig": 0.7044399380683899, + "epoch": 0.2435832913940614, + "kl_loss": 0.31700530648231506, + "loss_ib": 0.010425264947116375, + "step": 847 + }, + { + "ce_ib": 8.711723327636719, + "ce_orig": 0.8803223967552185, + "epoch": 0.2435832913940614, + "kl_loss": 0.3968978822231293, + "loss_ib": 0.012680701911449432, + "step": 847 + }, + { + "ce_ib": 6.419612407684326, + "ce_orig": 0.5552703142166138, + "epoch": 0.2435832913940614, + "kl_loss": 0.24681052565574646, + "loss_ib": 0.008887717500329018, + "step": 847 + }, + { + "ce_ib": 7.220922470092773, + "ce_orig": 0.8049042224884033, + "epoch": 0.2435832913940614, + "kl_loss": 0.25132423639297485, + "loss_ib": 0.00973416492342949, + "step": 847 + }, + { + "ce_ib": 10.610690116882324, + "ce_orig": 1.24649178981781, + "epoch": 0.24387087497303903, + "kl_loss": 0.3552427291870117, + "loss_ib": 0.014163116924464703, + "step": 848 + }, + { + "ce_ib": 8.324702262878418, + "ce_orig": 0.7784779667854309, + "epoch": 0.24387087497303903, + "kl_loss": 0.26029253005981445, + "loss_ib": 0.010927626863121986, + "step": 848 + }, + { + "ce_ib": 5.074281692504883, + "ce_orig": 0.5773417353630066, + "epoch": 0.24387087497303903, + "kl_loss": 0.34564918279647827, + "loss_ib": 0.008530773222446442, + "step": 848 + }, + { + "ce_ib": 6.707390308380127, + "ce_orig": 0.6460347175598145, + "epoch": 0.24387087497303903, + "kl_loss": 0.33726412057876587, + "loss_ib": 0.010080032050609589, + "step": 848 + }, + { + "ce_ib": 5.850057601928711, + "ce_orig": 0.5735775232315063, + "epoch": 0.24415845855201668, + "kl_loss": 0.30734121799468994, + "loss_ib": 0.008923470042645931, + "step": 849 + }, + { + "ce_ib": 9.06562614440918, + "ce_orig": 1.0471097230911255, + "epoch": 0.24415845855201668, + "kl_loss": 0.43707603216171265, + "loss_ib": 0.013436386361718178, + "step": 849 + }, + { + "ce_ib": 6.853366374969482, + "ce_orig": 0.9200989007949829, + "epoch": 0.24415845855201668, + "kl_loss": 0.3171440362930298, + "loss_ib": 0.010024807415902615, + "step": 849 + }, + { + "ce_ib": 5.92440128326416, + "ce_orig": 0.5834106802940369, + "epoch": 0.24415845855201668, + "kl_loss": 0.2906469702720642, + "loss_ib": 0.008830870501697063, + "step": 849 + }, + { + "epoch": 0.24444604213099433, + "grad_norm": 0.11549082398414612, + "learning_rate": 9.93247718396607e-06, + "loss": 0.8256, + "step": 850 + }, + { + "ce_ib": 6.185736179351807, + "ce_orig": 0.5565721392631531, + "epoch": 0.24444604213099433, + "kl_loss": 0.3115679621696472, + "loss_ib": 0.009301415644586086, + "step": 850 + }, + { + "ce_ib": 10.200575828552246, + "ce_orig": 1.0745152235031128, + "epoch": 0.24444604213099433, + "kl_loss": 0.3611696660518646, + "loss_ib": 0.013812271878123283, + "step": 850 + }, + { + "ce_ib": 11.002309799194336, + "ce_orig": 1.2843042612075806, + "epoch": 0.24444604213099433, + "kl_loss": 0.3904564380645752, + "loss_ib": 0.014906874857842922, + "step": 850 + }, + { + "ce_ib": 8.565811157226562, + "ce_orig": 0.9313501715660095, + "epoch": 0.24444604213099433, + "kl_loss": 0.3498835265636444, + "loss_ib": 0.012064645998179913, + "step": 850 + }, + { + "ce_ib": 6.367196559906006, + "ce_orig": 0.3282487392425537, + "epoch": 0.24473362570997195, + "kl_loss": 0.3030562996864319, + "loss_ib": 0.009397759102284908, + "step": 851 + }, + { + "ce_ib": 7.566930294036865, + "ce_orig": 0.4791885316371918, + "epoch": 0.24473362570997195, + "kl_loss": 0.3446214199066162, + "loss_ib": 0.011013145558536053, + "step": 851 + }, + { + "ce_ib": 6.3060078620910645, + "ce_orig": 0.7763472199440002, + "epoch": 0.24473362570997195, + "kl_loss": 0.2825550436973572, + "loss_ib": 0.009131558239459991, + "step": 851 + }, + { + "ce_ib": 6.97025728225708, + "ce_orig": 0.5963578820228577, + "epoch": 0.24473362570997195, + "kl_loss": 0.26160991191864014, + "loss_ib": 0.009586355648934841, + "step": 851 + }, + { + "ce_ib": 12.1688814163208, + "ce_orig": 1.5043821334838867, + "epoch": 0.2450212092889496, + "kl_loss": 0.4432004690170288, + "loss_ib": 0.01660088635981083, + "step": 852 + }, + { + "ce_ib": 10.238726615905762, + "ce_orig": 0.6479013562202454, + "epoch": 0.2450212092889496, + "kl_loss": 0.3322160243988037, + "loss_ib": 0.013560887426137924, + "step": 852 + }, + { + "ce_ib": 4.64656400680542, + "ce_orig": 0.37472283840179443, + "epoch": 0.2450212092889496, + "kl_loss": 0.388122022151947, + "loss_ib": 0.0085277846083045, + "step": 852 + }, + { + "ce_ib": 8.35583209991455, + "ce_orig": 0.9440561532974243, + "epoch": 0.2450212092889496, + "kl_loss": 0.34462809562683105, + "loss_ib": 0.011802112683653831, + "step": 852 + }, + { + "ce_ib": 7.765969753265381, + "ce_orig": 1.0505164861679077, + "epoch": 0.24530879286792723, + "kl_loss": 0.31646573543548584, + "loss_ib": 0.010930625721812248, + "step": 853 + }, + { + "ce_ib": 6.262195587158203, + "ce_orig": 0.6370275616645813, + "epoch": 0.24530879286792723, + "kl_loss": 0.3554043769836426, + "loss_ib": 0.009816239587962627, + "step": 853 + }, + { + "ce_ib": 3.9668338298797607, + "ce_orig": 0.3455740511417389, + "epoch": 0.24530879286792723, + "kl_loss": 0.33059853315353394, + "loss_ib": 0.007272819057106972, + "step": 853 + }, + { + "ce_ib": 5.173341751098633, + "ce_orig": 0.5830708742141724, + "epoch": 0.24530879286792723, + "kl_loss": 0.24811115860939026, + "loss_ib": 0.007654453162103891, + "step": 853 + }, + { + "ce_ib": 7.131439685821533, + "ce_orig": 0.9161396622657776, + "epoch": 0.24559637644690488, + "kl_loss": 0.6239281892776489, + "loss_ib": 0.01337072066962719, + "step": 854 + }, + { + "ce_ib": 9.913086891174316, + "ce_orig": 0.5309944748878479, + "epoch": 0.24559637644690488, + "kl_loss": 0.2742801010608673, + "loss_ib": 0.012655887752771378, + "step": 854 + }, + { + "ce_ib": 6.634543418884277, + "ce_orig": 0.8277848362922668, + "epoch": 0.24559637644690488, + "kl_loss": 0.27079910039901733, + "loss_ib": 0.009342534467577934, + "step": 854 + }, + { + "ce_ib": 6.51190185546875, + "ce_orig": 0.7204493880271912, + "epoch": 0.24559637644690488, + "kl_loss": 0.26875340938568115, + "loss_ib": 0.009199435822665691, + "step": 854 + }, + { + "epoch": 0.24588396002588253, + "grad_norm": 0.10104996711015701, + "learning_rate": 9.931200102616892e-06, + "loss": 0.8524, + "step": 855 + }, + { + "ce_ib": 9.483591079711914, + "ce_orig": 0.5329586267471313, + "epoch": 0.24588396002588253, + "kl_loss": 0.3656144142150879, + "loss_ib": 0.013139734975993633, + "step": 855 + }, + { + "ce_ib": 7.316298961639404, + "ce_orig": 0.8602546453475952, + "epoch": 0.24588396002588253, + "kl_loss": 0.2442716807126999, + "loss_ib": 0.009759015403687954, + "step": 855 + }, + { + "ce_ib": 6.996337890625, + "ce_orig": 0.7689603567123413, + "epoch": 0.24588396002588253, + "kl_loss": 0.28071606159210205, + "loss_ib": 0.009803498163819313, + "step": 855 + }, + { + "ce_ib": 9.025272369384766, + "ce_orig": 1.0526149272918701, + "epoch": 0.24588396002588253, + "kl_loss": 0.3657127618789673, + "loss_ib": 0.012682399712502956, + "step": 855 + }, + { + "ce_ib": 5.605438709259033, + "ce_orig": 0.4452979564666748, + "epoch": 0.24617154360486015, + "kl_loss": 0.36353376507759094, + "loss_ib": 0.009240776300430298, + "step": 856 + }, + { + "ce_ib": 8.554245948791504, + "ce_orig": 0.9303341507911682, + "epoch": 0.24617154360486015, + "kl_loss": 0.40140336751937866, + "loss_ib": 0.012568279169499874, + "step": 856 + }, + { + "ce_ib": 8.914340019226074, + "ce_orig": 0.9002864360809326, + "epoch": 0.24617154360486015, + "kl_loss": 0.3354775905609131, + "loss_ib": 0.012269115075469017, + "step": 856 + }, + { + "ce_ib": 8.865965843200684, + "ce_orig": 0.9647238254547119, + "epoch": 0.24617154360486015, + "kl_loss": 0.3433181643486023, + "loss_ib": 0.012299147434532642, + "step": 856 + }, + { + "ce_ib": 10.691956520080566, + "ce_orig": 1.4019811153411865, + "epoch": 0.2464591271838378, + "kl_loss": 0.33202850818634033, + "loss_ib": 0.014012240804731846, + "step": 857 + }, + { + "ce_ib": 6.769617557525635, + "ce_orig": 0.60886549949646, + "epoch": 0.2464591271838378, + "kl_loss": 0.34499895572662354, + "loss_ib": 0.010219607502222061, + "step": 857 + }, + { + "ce_ib": 5.660098075866699, + "ce_orig": 0.6113988757133484, + "epoch": 0.2464591271838378, + "kl_loss": 0.2966403067111969, + "loss_ib": 0.008626501075923443, + "step": 857 + }, + { + "ce_ib": 13.32357406616211, + "ce_orig": 1.3362207412719727, + "epoch": 0.2464591271838378, + "kl_loss": 0.29936474561691284, + "loss_ib": 0.016317222267389297, + "step": 857 + }, + { + "ce_ib": 2.6769988536834717, + "ce_orig": 0.18080325424671173, + "epoch": 0.24674671076281543, + "kl_loss": 0.6789752840995789, + "loss_ib": 0.009466751478612423, + "step": 858 + }, + { + "ce_ib": 8.577744483947754, + "ce_orig": 0.7904664278030396, + "epoch": 0.24674671076281543, + "kl_loss": 0.35377517342567444, + "loss_ib": 0.012115496210753918, + "step": 858 + }, + { + "ce_ib": 9.277144432067871, + "ce_orig": 0.858630359172821, + "epoch": 0.24674671076281543, + "kl_loss": 0.4751337170600891, + "loss_ib": 0.014028482139110565, + "step": 858 + }, + { + "ce_ib": 4.141605377197266, + "ce_orig": 0.31388720870018005, + "epoch": 0.24674671076281543, + "kl_loss": 0.6098456382751465, + "loss_ib": 0.010240061208605766, + "step": 858 + }, + { + "ce_ib": 9.002893447875977, + "ce_orig": 0.7537875175476074, + "epoch": 0.24703429434179308, + "kl_loss": 0.300067663192749, + "loss_ib": 0.012003568932414055, + "step": 859 + }, + { + "ce_ib": 4.289847373962402, + "ce_orig": 0.40423697233200073, + "epoch": 0.24703429434179308, + "kl_loss": 0.37460029125213623, + "loss_ib": 0.008035850711166859, + "step": 859 + }, + { + "ce_ib": 6.030440330505371, + "ce_orig": 0.7502020001411438, + "epoch": 0.24703429434179308, + "kl_loss": 0.301896333694458, + "loss_ib": 0.009049403481185436, + "step": 859 + }, + { + "ce_ib": 5.722672462463379, + "ce_orig": 0.8073954582214355, + "epoch": 0.24703429434179308, + "kl_loss": 0.2869844436645508, + "loss_ib": 0.00859251618385315, + "step": 859 + }, + { + "epoch": 0.24732187792077073, + "grad_norm": 0.09929006546735764, + "learning_rate": 9.929911140862109e-06, + "loss": 0.8739, + "step": 860 + }, + { + "ce_ib": 9.267024040222168, + "ce_orig": 1.2398027181625366, + "epoch": 0.24732187792077073, + "kl_loss": 0.28411030769348145, + "loss_ib": 0.01210812758654356, + "step": 860 + }, + { + "ce_ib": 8.08333683013916, + "ce_orig": 0.8170286417007446, + "epoch": 0.24732187792077073, + "kl_loss": 0.25793078541755676, + "loss_ib": 0.010662645101547241, + "step": 860 + }, + { + "ce_ib": 10.10383129119873, + "ce_orig": 1.1762322187423706, + "epoch": 0.24732187792077073, + "kl_loss": 0.3745589256286621, + "loss_ib": 0.013849420472979546, + "step": 860 + }, + { + "ce_ib": 10.957147598266602, + "ce_orig": 1.4055129289627075, + "epoch": 0.24732187792077073, + "kl_loss": 0.47874391078948975, + "loss_ib": 0.015744587406516075, + "step": 860 + }, + { + "ce_ib": 5.491908073425293, + "ce_orig": 0.8730387091636658, + "epoch": 0.24760946149974836, + "kl_loss": 0.2792191803455353, + "loss_ib": 0.008284100331366062, + "step": 861 + }, + { + "ce_ib": 9.5418701171875, + "ce_orig": 0.7111859321594238, + "epoch": 0.24760946149974836, + "kl_loss": 0.28549331426620483, + "loss_ib": 0.012396802194416523, + "step": 861 + }, + { + "ce_ib": 9.359732627868652, + "ce_orig": 0.5781843066215515, + "epoch": 0.24760946149974836, + "kl_loss": 0.3297385573387146, + "loss_ib": 0.01265711709856987, + "step": 861 + }, + { + "ce_ib": 4.646590232849121, + "ce_orig": 0.6796752214431763, + "epoch": 0.24760946149974836, + "kl_loss": 0.5749114751815796, + "loss_ib": 0.010395705699920654, + "step": 861 + }, + { + "ce_ib": 8.25394344329834, + "ce_orig": 0.9002748131752014, + "epoch": 0.247897045078726, + "kl_loss": 0.3159523904323578, + "loss_ib": 0.01141346711665392, + "step": 862 + }, + { + "ce_ib": 8.197535514831543, + "ce_orig": 0.6030675172805786, + "epoch": 0.247897045078726, + "kl_loss": 0.46316292881965637, + "loss_ib": 0.01282916497439146, + "step": 862 + }, + { + "ce_ib": 8.135879516601562, + "ce_orig": 0.5498018264770508, + "epoch": 0.247897045078726, + "kl_loss": 0.37288355827331543, + "loss_ib": 0.011864714324474335, + "step": 862 + }, + { + "ce_ib": 8.026688575744629, + "ce_orig": 0.6925224661827087, + "epoch": 0.247897045078726, + "kl_loss": 0.3975781798362732, + "loss_ib": 0.012002469971776009, + "step": 862 + }, + { + "ce_ib": 6.015689849853516, + "ce_orig": 0.795344889163971, + "epoch": 0.24818462865770363, + "kl_loss": 0.2662222981452942, + "loss_ib": 0.008677912876009941, + "step": 863 + }, + { + "ce_ib": 6.312599182128906, + "ce_orig": 0.4928840100765228, + "epoch": 0.24818462865770363, + "kl_loss": 0.28389108180999756, + "loss_ib": 0.009151509962975979, + "step": 863 + }, + { + "ce_ib": 12.868780136108398, + "ce_orig": 1.7627007961273193, + "epoch": 0.24818462865770363, + "kl_loss": 0.2926875352859497, + "loss_ib": 0.01579565554857254, + "step": 863 + }, + { + "ce_ib": 13.420784950256348, + "ce_orig": 1.5475761890411377, + "epoch": 0.24818462865770363, + "kl_loss": 0.9178951382637024, + "loss_ib": 0.022599736228585243, + "step": 863 + }, + { + "ce_ib": 9.601221084594727, + "ce_orig": 0.6273306012153625, + "epoch": 0.24847221223668128, + "kl_loss": 0.44295597076416016, + "loss_ib": 0.014030780643224716, + "step": 864 + }, + { + "ce_ib": 10.278837203979492, + "ce_orig": 1.2835111618041992, + "epoch": 0.24847221223668128, + "kl_loss": 0.33239656686782837, + "loss_ib": 0.013602802529931068, + "step": 864 + }, + { + "ce_ib": 10.454754829406738, + "ce_orig": 0.7932427525520325, + "epoch": 0.24847221223668128, + "kl_loss": 0.39738035202026367, + "loss_ib": 0.014428557828068733, + "step": 864 + }, + { + "ce_ib": 8.318263053894043, + "ce_orig": 1.0965704917907715, + "epoch": 0.24847221223668128, + "kl_loss": 0.27918365597724915, + "loss_ib": 0.011110099032521248, + "step": 864 + }, + { + "epoch": 0.24875979581565894, + "grad_norm": 0.09105879068374634, + "learning_rate": 9.928610301807134e-06, + "loss": 0.9249, + "step": 865 + }, + { + "ce_ib": 11.053398132324219, + "ce_orig": 0.98641037940979, + "epoch": 0.24875979581565894, + "kl_loss": 0.34464025497436523, + "loss_ib": 0.014499801211059093, + "step": 865 + }, + { + "ce_ib": 10.561725616455078, + "ce_orig": 0.9344740509986877, + "epoch": 0.24875979581565894, + "kl_loss": 0.40244221687316895, + "loss_ib": 0.014586147852241993, + "step": 865 + }, + { + "ce_ib": 11.87633991241455, + "ce_orig": 1.324188470840454, + "epoch": 0.24875979581565894, + "kl_loss": 0.3682016134262085, + "loss_ib": 0.015558355487883091, + "step": 865 + }, + { + "ce_ib": 6.436470985412598, + "ce_orig": 0.5687084794044495, + "epoch": 0.24875979581565894, + "kl_loss": 0.3997005224227905, + "loss_ib": 0.010433475486934185, + "step": 865 + }, + { + "ce_ib": 7.637430191040039, + "ce_orig": 0.796751856803894, + "epoch": 0.24904737939463656, + "kl_loss": 0.40374982357025146, + "loss_ib": 0.011674928478896618, + "step": 866 + }, + { + "ce_ib": 6.677864074707031, + "ce_orig": 0.6250858902931213, + "epoch": 0.24904737939463656, + "kl_loss": 0.38111498951911926, + "loss_ib": 0.010489013977348804, + "step": 866 + }, + { + "ce_ib": 12.016997337341309, + "ce_orig": 1.5606534481048584, + "epoch": 0.24904737939463656, + "kl_loss": 0.3536309599876404, + "loss_ib": 0.0155533067882061, + "step": 866 + }, + { + "ce_ib": 8.68997573852539, + "ce_orig": 1.1279748678207397, + "epoch": 0.24904737939463656, + "kl_loss": 0.4000932574272156, + "loss_ib": 0.012690908275544643, + "step": 866 + }, + { + "ce_ib": 6.430920124053955, + "ce_orig": 0.6035857200622559, + "epoch": 0.2493349629736142, + "kl_loss": 0.3381291925907135, + "loss_ib": 0.009812211617827415, + "step": 867 + }, + { + "ce_ib": 6.76858377456665, + "ce_orig": 0.8521968126296997, + "epoch": 0.2493349629736142, + "kl_loss": 0.2716369330883026, + "loss_ib": 0.009484952315688133, + "step": 867 + }, + { + "ce_ib": 15.660572052001953, + "ce_orig": 2.287351608276367, + "epoch": 0.2493349629736142, + "kl_loss": 0.33299481868743896, + "loss_ib": 0.018990520387887955, + "step": 867 + }, + { + "ce_ib": 7.993679523468018, + "ce_orig": 0.547291100025177, + "epoch": 0.2493349629736142, + "kl_loss": 0.7522663474082947, + "loss_ib": 0.015516342595219612, + "step": 867 + }, + { + "ce_ib": 9.38365364074707, + "ce_orig": 0.9876450300216675, + "epoch": 0.24962254655259183, + "kl_loss": 0.31372547149658203, + "loss_ib": 0.012520909309387207, + "step": 868 + }, + { + "ce_ib": 13.470992088317871, + "ce_orig": 1.528100609779358, + "epoch": 0.24962254655259183, + "kl_loss": 0.36726510524749756, + "loss_ib": 0.017143642529845238, + "step": 868 + }, + { + "ce_ib": 6.063528060913086, + "ce_orig": 0.8686865568161011, + "epoch": 0.24962254655259183, + "kl_loss": 0.291568398475647, + "loss_ib": 0.008979211561381817, + "step": 868 + }, + { + "ce_ib": 9.594892501831055, + "ce_orig": 0.7062548398971558, + "epoch": 0.24962254655259183, + "kl_loss": 0.7296528816223145, + "loss_ib": 0.016891421750187874, + "step": 868 + }, + { + "ce_ib": 9.050810813903809, + "ce_orig": 0.8255693912506104, + "epoch": 0.24991013013156949, + "kl_loss": 0.4382583498954773, + "loss_ib": 0.013433394022285938, + "step": 869 + }, + { + "ce_ib": 6.191425800323486, + "ce_orig": 0.7848706841468811, + "epoch": 0.24991013013156949, + "kl_loss": 0.22416508197784424, + "loss_ib": 0.008433076553046703, + "step": 869 + }, + { + "ce_ib": 6.417364120483398, + "ce_orig": 0.7727878093719482, + "epoch": 0.24991013013156949, + "kl_loss": 0.2643115520477295, + "loss_ib": 0.009060479700565338, + "step": 869 + }, + { + "ce_ib": 7.678783893585205, + "ce_orig": 0.7737617492675781, + "epoch": 0.24991013013156949, + "kl_loss": 0.42957326769828796, + "loss_ib": 0.0119745172560215, + "step": 869 + }, + { + "epoch": 0.2501977137105471, + "grad_norm": 0.10078983008861542, + "learning_rate": 9.927297588585984e-06, + "loss": 0.8886, + "step": 870 + }, + { + "ce_ib": 10.537775039672852, + "ce_orig": 1.0423405170440674, + "epoch": 0.2501977137105471, + "kl_loss": 0.3949153423309326, + "loss_ib": 0.014486928470432758, + "step": 870 + }, + { + "ce_ib": 5.604185581207275, + "ce_orig": 0.5952542424201965, + "epoch": 0.2501977137105471, + "kl_loss": 0.32443949580192566, + "loss_ib": 0.008848579600453377, + "step": 870 + }, + { + "ce_ib": 11.089546203613281, + "ce_orig": 0.9591896533966064, + "epoch": 0.2501977137105471, + "kl_loss": 0.250461608171463, + "loss_ib": 0.013594161719083786, + "step": 870 + }, + { + "ce_ib": 6.729354381561279, + "ce_orig": 1.008842945098877, + "epoch": 0.2501977137105471, + "kl_loss": 0.25118446350097656, + "loss_ib": 0.009241199120879173, + "step": 870 + }, + { + "ce_ib": 9.652894973754883, + "ce_orig": 1.2227216958999634, + "epoch": 0.2504852972895248, + "kl_loss": 0.4000895023345947, + "loss_ib": 0.013653790578246117, + "step": 871 + }, + { + "ce_ib": 9.622594833374023, + "ce_orig": 0.9795891642570496, + "epoch": 0.2504852972895248, + "kl_loss": 0.25612586736679077, + "loss_ib": 0.012183853425085545, + "step": 871 + }, + { + "ce_ib": 7.215780735015869, + "ce_orig": 0.4193066656589508, + "epoch": 0.2504852972895248, + "kl_loss": 0.4899890422821045, + "loss_ib": 0.012115671299397945, + "step": 871 + }, + { + "ce_ib": 6.65898323059082, + "ce_orig": 0.8104040622711182, + "epoch": 0.2504852972895248, + "kl_loss": 0.26327258348464966, + "loss_ib": 0.00929170846939087, + "step": 871 + }, + { + "ce_ib": 4.725259780883789, + "ce_orig": 0.3476851284503937, + "epoch": 0.2507728808685024, + "kl_loss": 0.6398271322250366, + "loss_ib": 0.011123530566692352, + "step": 872 + }, + { + "ce_ib": 5.815470218658447, + "ce_orig": 0.7363677620887756, + "epoch": 0.2507728808685024, + "kl_loss": 0.5072571039199829, + "loss_ib": 0.010888040997087955, + "step": 872 + }, + { + "ce_ib": 7.01890230178833, + "ce_orig": 0.5536361336708069, + "epoch": 0.2507728808685024, + "kl_loss": 0.3183189928531647, + "loss_ib": 0.010202092118561268, + "step": 872 + }, + { + "ce_ib": 4.89237642288208, + "ce_orig": 0.5722980499267578, + "epoch": 0.2507728808685024, + "kl_loss": 0.30087870359420776, + "loss_ib": 0.007901162840425968, + "step": 872 + }, + { + "ce_ib": 7.8064866065979, + "ce_orig": 1.0494457483291626, + "epoch": 0.25106046444748004, + "kl_loss": 0.27215179800987244, + "loss_ib": 0.010528003796935081, + "step": 873 + }, + { + "ce_ib": 6.676516056060791, + "ce_orig": 0.6909357905387878, + "epoch": 0.25106046444748004, + "kl_loss": 0.36614060401916504, + "loss_ib": 0.010337922722101212, + "step": 873 + }, + { + "ce_ib": 8.948216438293457, + "ce_orig": 0.7401660084724426, + "epoch": 0.25106046444748004, + "kl_loss": 0.3224928677082062, + "loss_ib": 0.012173144146800041, + "step": 873 + }, + { + "ce_ib": 5.891620635986328, + "ce_orig": 0.8088377714157104, + "epoch": 0.25106046444748004, + "kl_loss": 0.3477107882499695, + "loss_ib": 0.009368727914988995, + "step": 873 + }, + { + "ce_ib": 8.354748725891113, + "ce_orig": 0.9080251455307007, + "epoch": 0.2513480480264577, + "kl_loss": 0.4312264621257782, + "loss_ib": 0.012667013332247734, + "step": 874 + }, + { + "ce_ib": 9.125079154968262, + "ce_orig": 1.2377448081970215, + "epoch": 0.2513480480264577, + "kl_loss": 0.28963541984558105, + "loss_ib": 0.012021434493362904, + "step": 874 + }, + { + "ce_ib": 6.146969795227051, + "ce_orig": 0.7261344194412231, + "epoch": 0.2513480480264577, + "kl_loss": 0.2670226991176605, + "loss_ib": 0.008817196823656559, + "step": 874 + }, + { + "ce_ib": 8.836943626403809, + "ce_orig": 0.9964169859886169, + "epoch": 0.2513480480264577, + "kl_loss": 0.29464757442474365, + "loss_ib": 0.011783418245613575, + "step": 874 + }, + { + "epoch": 0.25163563160543534, + "grad_norm": 0.09101825952529907, + "learning_rate": 9.925973004361295e-06, + "loss": 0.9106, + "step": 875 + }, + { + "ce_ib": 10.854291915893555, + "ce_orig": 1.7030887603759766, + "epoch": 0.25163563160543534, + "kl_loss": 0.2948107421398163, + "loss_ib": 0.013802398927509785, + "step": 875 + }, + { + "ce_ib": 5.0278215408325195, + "ce_orig": 0.6354923844337463, + "epoch": 0.25163563160543534, + "kl_loss": 0.27879562973976135, + "loss_ib": 0.007815778255462646, + "step": 875 + }, + { + "ce_ib": 3.9477179050445557, + "ce_orig": 0.27782636880874634, + "epoch": 0.25163563160543534, + "kl_loss": 0.5922541618347168, + "loss_ib": 0.009870259091258049, + "step": 875 + }, + { + "ce_ib": 9.96784782409668, + "ce_orig": 0.8732298016548157, + "epoch": 0.25163563160543534, + "kl_loss": 0.3482765555381775, + "loss_ib": 0.01345061231404543, + "step": 875 + }, + { + "ce_ib": 10.751762390136719, + "ce_orig": 0.7869917750358582, + "epoch": 0.25192321518441296, + "kl_loss": 0.35357779264450073, + "loss_ib": 0.014287540689110756, + "step": 876 + }, + { + "ce_ib": 11.038793563842773, + "ce_orig": 1.3073241710662842, + "epoch": 0.25192321518441296, + "kl_loss": 0.3102778494358063, + "loss_ib": 0.014141570776700974, + "step": 876 + }, + { + "ce_ib": 10.809063911437988, + "ce_orig": 1.3352001905441284, + "epoch": 0.25192321518441296, + "kl_loss": 0.266183078289032, + "loss_ib": 0.013470894657075405, + "step": 876 + }, + { + "ce_ib": 9.622008323669434, + "ce_orig": 1.2020785808563232, + "epoch": 0.25192321518441296, + "kl_loss": 0.29643064737319946, + "loss_ib": 0.012586314231157303, + "step": 876 + }, + { + "ce_ib": 4.619173526763916, + "ce_orig": 0.5329123735427856, + "epoch": 0.2522107987633906, + "kl_loss": 0.25063222646713257, + "loss_ib": 0.007125495467334986, + "step": 877 + }, + { + "ce_ib": 14.786099433898926, + "ce_orig": 1.6358728408813477, + "epoch": 0.2522107987633906, + "kl_loss": 0.3289327919483185, + "loss_ib": 0.018075427040457726, + "step": 877 + }, + { + "ce_ib": 6.436891555786133, + "ce_orig": 0.6348329782485962, + "epoch": 0.2522107987633906, + "kl_loss": 0.24020250141620636, + "loss_ib": 0.008838916197419167, + "step": 877 + }, + { + "ce_ib": 8.219223976135254, + "ce_orig": 0.4184137284755707, + "epoch": 0.2522107987633906, + "kl_loss": 0.2933961749076843, + "loss_ib": 0.011153184808790684, + "step": 877 + }, + { + "ce_ib": 11.890969276428223, + "ce_orig": 1.2066895961761475, + "epoch": 0.25249838234236827, + "kl_loss": 0.3087109327316284, + "loss_ib": 0.014978078193962574, + "step": 878 + }, + { + "ce_ib": 8.250067710876465, + "ce_orig": 0.8217212557792664, + "epoch": 0.25249838234236827, + "kl_loss": 0.30817484855651855, + "loss_ib": 0.011331815272569656, + "step": 878 + }, + { + "ce_ib": 5.804820537567139, + "ce_orig": 0.7277146577835083, + "epoch": 0.25249838234236827, + "kl_loss": 0.2948615252971649, + "loss_ib": 0.00875343568623066, + "step": 878 + }, + { + "ce_ib": 7.200780868530273, + "ce_orig": 0.7634241580963135, + "epoch": 0.25249838234236827, + "kl_loss": 0.434295117855072, + "loss_ib": 0.011543731205165386, + "step": 878 + }, + { + "ce_ib": 8.42392635345459, + "ce_orig": 0.452232301235199, + "epoch": 0.2527859659213459, + "kl_loss": 0.40239661931991577, + "loss_ib": 0.012447891756892204, + "step": 879 + }, + { + "ce_ib": 9.636970520019531, + "ce_orig": 0.909797728061676, + "epoch": 0.2527859659213459, + "kl_loss": 0.2469838708639145, + "loss_ib": 0.012106809765100479, + "step": 879 + }, + { + "ce_ib": 10.866209983825684, + "ce_orig": 0.9333135485649109, + "epoch": 0.2527859659213459, + "kl_loss": 0.2821698486804962, + "loss_ib": 0.01368790864944458, + "step": 879 + }, + { + "ce_ib": 4.037529468536377, + "ce_orig": 0.6108816266059875, + "epoch": 0.2527859659213459, + "kl_loss": 0.2413032352924347, + "loss_ib": 0.006450561806559563, + "step": 879 + }, + { + "epoch": 0.2530735495003235, + "grad_norm": 0.09534526616334915, + "learning_rate": 9.924636552324296e-06, + "loss": 0.8423, + "step": 880 + }, + { + "ce_ib": 7.282698631286621, + "ce_orig": 1.1506547927856445, + "epoch": 0.2530735495003235, + "kl_loss": 0.34004634618759155, + "loss_ib": 0.01068316213786602, + "step": 880 + }, + { + "ce_ib": 11.481069564819336, + "ce_orig": 1.6637837886810303, + "epoch": 0.2530735495003235, + "kl_loss": 0.6563593149185181, + "loss_ib": 0.018044661730527878, + "step": 880 + }, + { + "ce_ib": 10.626919746398926, + "ce_orig": 1.1782991886138916, + "epoch": 0.2530735495003235, + "kl_loss": 0.328736275434494, + "loss_ib": 0.013914283365011215, + "step": 880 + }, + { + "ce_ib": 11.945712089538574, + "ce_orig": 1.6569890975952148, + "epoch": 0.2530735495003235, + "kl_loss": 0.39438027143478394, + "loss_ib": 0.015889516100287437, + "step": 880 + }, + { + "ce_ib": 6.359279632568359, + "ce_orig": 0.8192113637924194, + "epoch": 0.2533611330793012, + "kl_loss": 0.3645118176937103, + "loss_ib": 0.010004397481679916, + "step": 881 + }, + { + "ce_ib": 10.964028358459473, + "ce_orig": 0.7998406887054443, + "epoch": 0.2533611330793012, + "kl_loss": 0.49998754262924194, + "loss_ib": 0.015963904559612274, + "step": 881 + }, + { + "ce_ib": 11.182770729064941, + "ce_orig": 1.3286393880844116, + "epoch": 0.2533611330793012, + "kl_loss": 0.4621961712837219, + "loss_ib": 0.015804732218384743, + "step": 881 + }, + { + "ce_ib": 8.53327751159668, + "ce_orig": 0.8316857218742371, + "epoch": 0.2533611330793012, + "kl_loss": 0.3155197203159332, + "loss_ib": 0.0116884745657444, + "step": 881 + }, + { + "ce_ib": 6.411721229553223, + "ce_orig": 0.7425188422203064, + "epoch": 0.2536487166582788, + "kl_loss": 0.2903468906879425, + "loss_ib": 0.00931518990546465, + "step": 882 + }, + { + "ce_ib": 6.149788856506348, + "ce_orig": 0.8823583722114563, + "epoch": 0.2536487166582788, + "kl_loss": 0.2212170958518982, + "loss_ib": 0.00836195982992649, + "step": 882 + }, + { + "ce_ib": 9.317445755004883, + "ce_orig": 0.932159423828125, + "epoch": 0.2536487166582788, + "kl_loss": 0.2839629650115967, + "loss_ib": 0.012157075107097626, + "step": 882 + }, + { + "ce_ib": 7.935296535491943, + "ce_orig": 0.9251559376716614, + "epoch": 0.2536487166582788, + "kl_loss": 0.3732267916202545, + "loss_ib": 0.011667564511299133, + "step": 882 + }, + { + "ce_ib": 5.324270725250244, + "ce_orig": 0.5246868133544922, + "epoch": 0.25393630023725644, + "kl_loss": 0.27826499938964844, + "loss_ib": 0.00810692086815834, + "step": 883 + }, + { + "ce_ib": 7.959061145782471, + "ce_orig": 1.0556241273880005, + "epoch": 0.25393630023725644, + "kl_loss": 0.2916038930416107, + "loss_ib": 0.010875099338591099, + "step": 883 + }, + { + "ce_ib": 6.867218971252441, + "ce_orig": 0.6588130593299866, + "epoch": 0.25393630023725644, + "kl_loss": 0.29453879594802856, + "loss_ib": 0.009812606498599052, + "step": 883 + }, + { + "ce_ib": 4.161896228790283, + "ce_orig": 0.4560477137565613, + "epoch": 0.25393630023725644, + "kl_loss": 0.2710115611553192, + "loss_ib": 0.006872011814266443, + "step": 883 + }, + { + "ce_ib": 6.286719799041748, + "ce_orig": 0.7286011576652527, + "epoch": 0.2542238838162341, + "kl_loss": 0.3385438621044159, + "loss_ib": 0.009672158397734165, + "step": 884 + }, + { + "ce_ib": 10.294978141784668, + "ce_orig": 0.775021493434906, + "epoch": 0.2542238838162341, + "kl_loss": 0.3102239966392517, + "loss_ib": 0.01339721865952015, + "step": 884 + }, + { + "ce_ib": 5.616055488586426, + "ce_orig": 0.5666757822036743, + "epoch": 0.2542238838162341, + "kl_loss": 0.3340657651424408, + "loss_ib": 0.00895671360194683, + "step": 884 + }, + { + "ce_ib": 6.7213826179504395, + "ce_orig": 0.6700468063354492, + "epoch": 0.2542238838162341, + "kl_loss": 0.259299099445343, + "loss_ib": 0.009314373135566711, + "step": 884 + }, + { + "epoch": 0.25451146739521174, + "grad_norm": 0.09304392337799072, + "learning_rate": 9.92328823569481e-06, + "loss": 0.9081, + "step": 885 + }, + { + "ce_ib": 10.979228973388672, + "ce_orig": 1.314060091972351, + "epoch": 0.25451146739521174, + "kl_loss": 0.4535999298095703, + "loss_ib": 0.015515227802097797, + "step": 885 + }, + { + "ce_ib": 9.044007301330566, + "ce_orig": 1.0800416469573975, + "epoch": 0.25451146739521174, + "kl_loss": 0.25342878699302673, + "loss_ib": 0.01157829537987709, + "step": 885 + }, + { + "ce_ib": 7.7309441566467285, + "ce_orig": 0.737576425075531, + "epoch": 0.25451146739521174, + "kl_loss": 0.25567033886909485, + "loss_ib": 0.010287647135555744, + "step": 885 + }, + { + "ce_ib": 5.4991774559021, + "ce_orig": 0.67442387342453, + "epoch": 0.25451146739521174, + "kl_loss": 0.22683289647102356, + "loss_ib": 0.007767506875097752, + "step": 885 + }, + { + "ce_ib": 9.953997611999512, + "ce_orig": 1.5094314813613892, + "epoch": 0.25479905097418937, + "kl_loss": 0.23526480793952942, + "loss_ib": 0.012306645512580872, + "step": 886 + }, + { + "ce_ib": 5.9002685546875, + "ce_orig": 0.8582682013511658, + "epoch": 0.25479905097418937, + "kl_loss": 0.2747255265712738, + "loss_ib": 0.008647523820400238, + "step": 886 + }, + { + "ce_ib": 3.582958698272705, + "ce_orig": 0.4530331492424011, + "epoch": 0.25479905097418937, + "kl_loss": 0.22470691800117493, + "loss_ib": 0.005830028094351292, + "step": 886 + }, + { + "ce_ib": 8.590431213378906, + "ce_orig": 0.7905410528182983, + "epoch": 0.25479905097418937, + "kl_loss": 0.32975587248802185, + "loss_ib": 0.011887989938259125, + "step": 886 + }, + { + "ce_ib": 12.112462997436523, + "ce_orig": 1.4443359375, + "epoch": 0.255086634553167, + "kl_loss": 0.39053958654403687, + "loss_ib": 0.016017857939004898, + "step": 887 + }, + { + "ce_ib": 7.377198219299316, + "ce_orig": 0.8195330500602722, + "epoch": 0.255086634553167, + "kl_loss": 0.3151588439941406, + "loss_ib": 0.010528786107897758, + "step": 887 + }, + { + "ce_ib": 5.7976861000061035, + "ce_orig": 0.6533734798431396, + "epoch": 0.255086634553167, + "kl_loss": 0.5457720756530762, + "loss_ib": 0.011255406774580479, + "step": 887 + }, + { + "ce_ib": 3.642547369003296, + "ce_orig": 0.3404500484466553, + "epoch": 0.255086634553167, + "kl_loss": 0.31931373476982117, + "loss_ib": 0.006835684645920992, + "step": 887 + }, + { + "ce_ib": 9.251564025878906, + "ce_orig": 1.1658694744110107, + "epoch": 0.25537421813214467, + "kl_loss": 0.276309609413147, + "loss_ib": 0.012014660984277725, + "step": 888 + }, + { + "ce_ib": 9.521452903747559, + "ce_orig": 1.0355658531188965, + "epoch": 0.25537421813214467, + "kl_loss": 0.3186280131340027, + "loss_ib": 0.012707732617855072, + "step": 888 + }, + { + "ce_ib": 8.611248970031738, + "ce_orig": 1.067291498184204, + "epoch": 0.25537421813214467, + "kl_loss": 0.3613908290863037, + "loss_ib": 0.012225157581269741, + "step": 888 + }, + { + "ce_ib": 6.128733158111572, + "ce_orig": 0.7295544147491455, + "epoch": 0.25537421813214467, + "kl_loss": 0.2833183705806732, + "loss_ib": 0.008961916901171207, + "step": 888 + }, + { + "ce_ib": 7.8607683181762695, + "ce_orig": 0.9411081075668335, + "epoch": 0.2556618017111223, + "kl_loss": 0.3833426833152771, + "loss_ib": 0.011694194748997688, + "step": 889 + }, + { + "ce_ib": 7.081918716430664, + "ce_orig": 0.7683687806129456, + "epoch": 0.2556618017111223, + "kl_loss": 0.3560516834259033, + "loss_ib": 0.010642435401678085, + "step": 889 + }, + { + "ce_ib": 6.406893253326416, + "ce_orig": 0.6722978353500366, + "epoch": 0.2556618017111223, + "kl_loss": 0.2877756357192993, + "loss_ib": 0.009284649044275284, + "step": 889 + }, + { + "ce_ib": 7.919890403747559, + "ce_orig": 1.0300544500350952, + "epoch": 0.2556618017111223, + "kl_loss": 0.3256221413612366, + "loss_ib": 0.011176111176609993, + "step": 889 + }, + { + "epoch": 0.2559493852900999, + "grad_norm": 0.08482884615659714, + "learning_rate": 9.921928057721242e-06, + "loss": 0.8751, + "step": 890 + }, + { + "ce_ib": 8.637929916381836, + "ce_orig": 0.9734044075012207, + "epoch": 0.2559493852900999, + "kl_loss": 0.32373255491256714, + "loss_ib": 0.011875255033373833, + "step": 890 + }, + { + "ce_ib": 8.563017845153809, + "ce_orig": 0.4407178461551666, + "epoch": 0.2559493852900999, + "kl_loss": 0.37603944540023804, + "loss_ib": 0.012323413044214249, + "step": 890 + }, + { + "ce_ib": 4.576637268066406, + "ce_orig": 0.5486153960227966, + "epoch": 0.2559493852900999, + "kl_loss": 0.2956191301345825, + "loss_ib": 0.007532828953117132, + "step": 890 + }, + { + "ce_ib": 6.462611675262451, + "ce_orig": 0.5243940353393555, + "epoch": 0.2559493852900999, + "kl_loss": 0.2781410813331604, + "loss_ib": 0.009244021959602833, + "step": 890 + }, + { + "ce_ib": 8.214609146118164, + "ce_orig": 0.5406211614608765, + "epoch": 0.2562369688690776, + "kl_loss": 0.40383273363113403, + "loss_ib": 0.012252936139702797, + "step": 891 + }, + { + "ce_ib": 9.679584503173828, + "ce_orig": 0.9921509027481079, + "epoch": 0.2562369688690776, + "kl_loss": 0.3201594352722168, + "loss_ib": 0.01288117840886116, + "step": 891 + }, + { + "ce_ib": 8.80469036102295, + "ce_orig": 1.0968488454818726, + "epoch": 0.2562369688690776, + "kl_loss": 0.683159589767456, + "loss_ib": 0.01563628576695919, + "step": 891 + }, + { + "ce_ib": 6.155488014221191, + "ce_orig": 0.5672451853752136, + "epoch": 0.2562369688690776, + "kl_loss": 0.39383214712142944, + "loss_ib": 0.010093809105455875, + "step": 891 + }, + { + "ce_ib": 6.969648838043213, + "ce_orig": 0.7994107604026794, + "epoch": 0.2565245524480552, + "kl_loss": 0.3125140070915222, + "loss_ib": 0.010094788856804371, + "step": 892 + }, + { + "ce_ib": 7.141666412353516, + "ce_orig": 0.4777945876121521, + "epoch": 0.2565245524480552, + "kl_loss": 0.3340398967266083, + "loss_ib": 0.010482065379619598, + "step": 892 + }, + { + "ce_ib": 8.195847511291504, + "ce_orig": 1.0143331289291382, + "epoch": 0.2565245524480552, + "kl_loss": 0.4175947308540344, + "loss_ib": 0.012371795251965523, + "step": 892 + }, + { + "ce_ib": 10.73903751373291, + "ce_orig": 0.8994997143745422, + "epoch": 0.2565245524480552, + "kl_loss": 0.2581188380718231, + "loss_ib": 0.013320226222276688, + "step": 892 + }, + { + "ce_ib": 3.9347095489501953, + "ce_orig": 0.3383885324001312, + "epoch": 0.25681213602703284, + "kl_loss": 0.3705168068408966, + "loss_ib": 0.007639877498149872, + "step": 893 + }, + { + "ce_ib": 9.592818260192871, + "ce_orig": 0.9891217947006226, + "epoch": 0.25681213602703284, + "kl_loss": 0.3685084581375122, + "loss_ib": 0.013277902267873287, + "step": 893 + }, + { + "ce_ib": 8.22640609741211, + "ce_orig": 0.8431357145309448, + "epoch": 0.25681213602703284, + "kl_loss": 0.28724151849746704, + "loss_ib": 0.011098820716142654, + "step": 893 + }, + { + "ce_ib": 6.6608357429504395, + "ce_orig": 0.7445570230484009, + "epoch": 0.25681213602703284, + "kl_loss": 0.23527176678180695, + "loss_ib": 0.009013553149998188, + "step": 893 + }, + { + "ce_ib": 4.6245598793029785, + "ce_orig": 0.5491394400596619, + "epoch": 0.2570997196060105, + "kl_loss": 0.2531249523162842, + "loss_ib": 0.007155809085816145, + "step": 894 + }, + { + "ce_ib": 7.3063063621521, + "ce_orig": 0.6298426985740662, + "epoch": 0.2570997196060105, + "kl_loss": 0.25430071353912354, + "loss_ib": 0.009849313646554947, + "step": 894 + }, + { + "ce_ib": 9.165714263916016, + "ce_orig": 1.1000019311904907, + "epoch": 0.2570997196060105, + "kl_loss": 0.5990852117538452, + "loss_ib": 0.015156567096710205, + "step": 894 + }, + { + "ce_ib": 8.9033842086792, + "ce_orig": 0.9108843207359314, + "epoch": 0.2570997196060105, + "kl_loss": 0.30792462825775146, + "loss_ib": 0.011982630006968975, + "step": 894 + }, + { + "epoch": 0.25738730318498815, + "grad_norm": 0.09863123297691345, + "learning_rate": 9.92055602168058e-06, + "loss": 0.8698, + "step": 895 + }, + { + "ce_ib": 7.7511515617370605, + "ce_orig": 0.7273695468902588, + "epoch": 0.25738730318498815, + "kl_loss": 0.2521267533302307, + "loss_ib": 0.010272419080138206, + "step": 895 + }, + { + "ce_ib": 8.19736385345459, + "ce_orig": 0.666002094745636, + "epoch": 0.25738730318498815, + "kl_loss": 0.45970863103866577, + "loss_ib": 0.012794449925422668, + "step": 895 + }, + { + "ce_ib": 8.727402687072754, + "ce_orig": 0.5750917196273804, + "epoch": 0.25738730318498815, + "kl_loss": 0.33787843585014343, + "loss_ib": 0.012106186710298061, + "step": 895 + }, + { + "ce_ib": 7.301302433013916, + "ce_orig": 0.49371325969696045, + "epoch": 0.25738730318498815, + "kl_loss": 0.39213109016418457, + "loss_ib": 0.011222613044083118, + "step": 895 + }, + { + "ce_ib": 8.087718963623047, + "ce_orig": 1.0477973222732544, + "epoch": 0.25767488676396577, + "kl_loss": 0.26531845331192017, + "loss_ib": 0.010740903206169605, + "step": 896 + }, + { + "ce_ib": 14.437392234802246, + "ce_orig": 1.9533960819244385, + "epoch": 0.25767488676396577, + "kl_loss": 0.3840804100036621, + "loss_ib": 0.018278196454048157, + "step": 896 + }, + { + "ce_ib": 12.922438621520996, + "ce_orig": 1.4912583827972412, + "epoch": 0.25767488676396577, + "kl_loss": 0.33412793278694153, + "loss_ib": 0.016263717785477638, + "step": 896 + }, + { + "ce_ib": 9.707921028137207, + "ce_orig": 1.1894207000732422, + "epoch": 0.25767488676396577, + "kl_loss": 0.2657119035720825, + "loss_ib": 0.012365040369331837, + "step": 896 + }, + { + "ce_ib": 9.557899475097656, + "ce_orig": 0.8531953692436218, + "epoch": 0.2579624703429434, + "kl_loss": 0.2381764054298401, + "loss_ib": 0.01193966343998909, + "step": 897 + }, + { + "ce_ib": 7.224669456481934, + "ce_orig": 1.0568636655807495, + "epoch": 0.2579624703429434, + "kl_loss": 0.2253345549106598, + "loss_ib": 0.009478014893829823, + "step": 897 + }, + { + "ce_ib": 9.596117973327637, + "ce_orig": 0.8501659035682678, + "epoch": 0.2579624703429434, + "kl_loss": 0.3259985148906708, + "loss_ib": 0.012856102548539639, + "step": 897 + }, + { + "ce_ib": 8.441751480102539, + "ce_orig": 0.8498370051383972, + "epoch": 0.2579624703429434, + "kl_loss": 0.34127742052078247, + "loss_ib": 0.011854525655508041, + "step": 897 + }, + { + "ce_ib": 9.776628494262695, + "ce_orig": 1.1606560945510864, + "epoch": 0.2582500539219211, + "kl_loss": 0.39107829332351685, + "loss_ib": 0.013687411323189735, + "step": 898 + }, + { + "ce_ib": 8.28480052947998, + "ce_orig": 0.8396362066268921, + "epoch": 0.2582500539219211, + "kl_loss": 0.5774872899055481, + "loss_ib": 0.014059673063457012, + "step": 898 + }, + { + "ce_ib": 5.942250728607178, + "ce_orig": 0.570982038974762, + "epoch": 0.2582500539219211, + "kl_loss": 0.35418200492858887, + "loss_ib": 0.009484071284532547, + "step": 898 + }, + { + "ce_ib": 13.332066535949707, + "ce_orig": 1.999030590057373, + "epoch": 0.2582500539219211, + "kl_loss": 0.32090240716934204, + "loss_ib": 0.016541089862585068, + "step": 898 + }, + { + "ce_ib": 6.9958176612854, + "ce_orig": 0.950183629989624, + "epoch": 0.2585376375008987, + "kl_loss": 0.2381882220506668, + "loss_ib": 0.00937770027667284, + "step": 899 + }, + { + "ce_ib": 10.04285717010498, + "ce_orig": 0.8781871199607849, + "epoch": 0.2585376375008987, + "kl_loss": 0.3415898382663727, + "loss_ib": 0.013458754867315292, + "step": 899 + }, + { + "ce_ib": 13.117063522338867, + "ce_orig": 1.5412489175796509, + "epoch": 0.2585376375008987, + "kl_loss": 0.28792649507522583, + "loss_ib": 0.015996329486370087, + "step": 899 + }, + { + "ce_ib": 5.770230293273926, + "ce_orig": 0.4059688448905945, + "epoch": 0.2585376375008987, + "kl_loss": 0.3441586196422577, + "loss_ib": 0.00921181682497263, + "step": 899 + }, + { + "epoch": 0.2588252210798763, + "grad_norm": 0.11080905795097351, + "learning_rate": 9.919172130878378e-06, + "loss": 0.8609, + "step": 900 + }, + { + "ce_ib": 7.81713342666626, + "ce_orig": 0.7552506923675537, + "epoch": 0.2588252210798763, + "kl_loss": 0.3844016194343567, + "loss_ib": 0.011661150492727757, + "step": 900 + }, + { + "ce_ib": 7.249913215637207, + "ce_orig": 0.968014657497406, + "epoch": 0.2588252210798763, + "kl_loss": 0.25877052545547485, + "loss_ib": 0.009837618097662926, + "step": 900 + }, + { + "ce_ib": 7.9647650718688965, + "ce_orig": 0.5272249579429626, + "epoch": 0.2588252210798763, + "kl_loss": 0.284597784280777, + "loss_ib": 0.01081074308604002, + "step": 900 + }, + { + "ce_ib": 8.811546325683594, + "ce_orig": 0.7540147304534912, + "epoch": 0.2588252210798763, + "kl_loss": 0.39285385608673096, + "loss_ib": 0.012740084901452065, + "step": 900 + }, + { + "ce_ib": 4.880631446838379, + "ce_orig": 0.5076504945755005, + "epoch": 0.259112804658854, + "kl_loss": 0.26206904649734497, + "loss_ib": 0.007501321844756603, + "step": 901 + }, + { + "ce_ib": 7.368719100952148, + "ce_orig": 0.6915988326072693, + "epoch": 0.259112804658854, + "kl_loss": 0.4998074769973755, + "loss_ib": 0.012366793118417263, + "step": 901 + }, + { + "ce_ib": 4.789905071258545, + "ce_orig": 0.756260335445404, + "epoch": 0.259112804658854, + "kl_loss": 0.2596268951892853, + "loss_ib": 0.00738617405295372, + "step": 901 + }, + { + "ce_ib": 6.979801177978516, + "ce_orig": 0.49811649322509766, + "epoch": 0.259112804658854, + "kl_loss": 0.3590124845504761, + "loss_ib": 0.010569925419986248, + "step": 901 + }, + { + "ce_ib": 5.86633825302124, + "ce_orig": 0.7878961563110352, + "epoch": 0.2594003882378316, + "kl_loss": 0.2769380211830139, + "loss_ib": 0.008635718375444412, + "step": 902 + }, + { + "ce_ib": 7.545517921447754, + "ce_orig": 0.9500758051872253, + "epoch": 0.2594003882378316, + "kl_loss": 0.21764595806598663, + "loss_ib": 0.009721977636218071, + "step": 902 + }, + { + "ce_ib": 6.535619258880615, + "ce_orig": 0.8012053966522217, + "epoch": 0.2594003882378316, + "kl_loss": 0.29037898778915405, + "loss_ib": 0.009439408779144287, + "step": 902 + }, + { + "ce_ib": 9.716675758361816, + "ce_orig": 0.9397892951965332, + "epoch": 0.2594003882378316, + "kl_loss": 0.3330523669719696, + "loss_ib": 0.01304719876497984, + "step": 902 + }, + { + "ce_ib": 4.242072105407715, + "ce_orig": 0.5148694515228271, + "epoch": 0.25968797181680925, + "kl_loss": 0.2478223443031311, + "loss_ib": 0.006720295175909996, + "step": 903 + }, + { + "ce_ib": 6.857370376586914, + "ce_orig": 0.48739153146743774, + "epoch": 0.25968797181680925, + "kl_loss": 0.3677447736263275, + "loss_ib": 0.010534818284213543, + "step": 903 + }, + { + "ce_ib": 5.933156967163086, + "ce_orig": 0.40238049626350403, + "epoch": 0.25968797181680925, + "kl_loss": 0.25679805874824524, + "loss_ib": 0.008501137606799603, + "step": 903 + }, + { + "ce_ib": 8.204896926879883, + "ce_orig": 0.7569003105163574, + "epoch": 0.25968797181680925, + "kl_loss": 0.2540472745895386, + "loss_ib": 0.010745369829237461, + "step": 903 + }, + { + "ce_ib": 6.275259494781494, + "ce_orig": 0.70445716381073, + "epoch": 0.25997555539578693, + "kl_loss": 0.3568292260169983, + "loss_ib": 0.009843551553785801, + "step": 904 + }, + { + "ce_ib": 9.74506664276123, + "ce_orig": 1.258253812789917, + "epoch": 0.25997555539578693, + "kl_loss": 0.2770775854587555, + "loss_ib": 0.012515842914581299, + "step": 904 + }, + { + "ce_ib": 10.810585021972656, + "ce_orig": 0.8011317849159241, + "epoch": 0.25997555539578693, + "kl_loss": 0.3040934205055237, + "loss_ib": 0.013851518742740154, + "step": 904 + }, + { + "ce_ib": 7.582912445068359, + "ce_orig": 0.9397713541984558, + "epoch": 0.25997555539578693, + "kl_loss": 0.22432610392570496, + "loss_ib": 0.00982617400586605, + "step": 904 + }, + { + "epoch": 0.26026313897476455, + "grad_norm": 0.09053654223680496, + "learning_rate": 9.917776388648748e-06, + "loss": 0.82, + "step": 905 + }, + { + "ce_ib": 10.179444313049316, + "ce_orig": 0.9755759835243225, + "epoch": 0.26026313897476455, + "kl_loss": 0.33106786012649536, + "loss_ib": 0.013490123674273491, + "step": 905 + }, + { + "ce_ib": 3.325618267059326, + "ce_orig": 0.16170339286327362, + "epoch": 0.26026313897476455, + "kl_loss": 0.5996187925338745, + "loss_ib": 0.009321806021034718, + "step": 905 + }, + { + "ce_ib": 7.778548240661621, + "ce_orig": 0.8278656005859375, + "epoch": 0.26026313897476455, + "kl_loss": 0.2725660502910614, + "loss_ib": 0.010504208505153656, + "step": 905 + }, + { + "ce_ib": 8.379356384277344, + "ce_orig": 0.9261234402656555, + "epoch": 0.26026313897476455, + "kl_loss": 0.378944993019104, + "loss_ib": 0.012168805114924908, + "step": 905 + }, + { + "ce_ib": 9.240448951721191, + "ce_orig": 0.6733576655387878, + "epoch": 0.2605507225537422, + "kl_loss": 0.4032820165157318, + "loss_ib": 0.013273268938064575, + "step": 906 + }, + { + "ce_ib": 12.491889953613281, + "ce_orig": 1.8018544912338257, + "epoch": 0.2605507225537422, + "kl_loss": 0.34695449471473694, + "loss_ib": 0.015961434692144394, + "step": 906 + }, + { + "ce_ib": 8.959487915039062, + "ce_orig": 1.4174104928970337, + "epoch": 0.2605507225537422, + "kl_loss": 0.30827200412750244, + "loss_ib": 0.012042207643389702, + "step": 906 + }, + { + "ce_ib": 7.630092620849609, + "ce_orig": 0.2582243084907532, + "epoch": 0.2605507225537422, + "kl_loss": 0.4620741903781891, + "loss_ib": 0.01225083414465189, + "step": 906 + }, + { + "ce_ib": 9.350717544555664, + "ce_orig": 0.8410260081291199, + "epoch": 0.2608383061327198, + "kl_loss": 0.27230745553970337, + "loss_ib": 0.012073791585862637, + "step": 907 + }, + { + "ce_ib": 5.942009925842285, + "ce_orig": 0.631314754486084, + "epoch": 0.2608383061327198, + "kl_loss": 0.23767834901809692, + "loss_ib": 0.008318793959915638, + "step": 907 + }, + { + "ce_ib": 7.381026744842529, + "ce_orig": 0.8023630380630493, + "epoch": 0.2608383061327198, + "kl_loss": 0.2556014060974121, + "loss_ib": 0.009937040507793427, + "step": 907 + }, + { + "ce_ib": 10.775535583496094, + "ce_orig": 1.5145833492279053, + "epoch": 0.2608383061327198, + "kl_loss": 0.3266568183898926, + "loss_ib": 0.014042104594409466, + "step": 907 + }, + { + "ce_ib": 6.754226207733154, + "ce_orig": 0.5556401014328003, + "epoch": 0.2611258897116975, + "kl_loss": 0.37610459327697754, + "loss_ib": 0.010515272617340088, + "step": 908 + }, + { + "ce_ib": 4.648565292358398, + "ce_orig": 0.42741912603378296, + "epoch": 0.2611258897116975, + "kl_loss": 0.2677002549171448, + "loss_ib": 0.007325568236410618, + "step": 908 + }, + { + "ce_ib": 9.743642807006836, + "ce_orig": 1.2022920846939087, + "epoch": 0.2611258897116975, + "kl_loss": 0.31233084201812744, + "loss_ib": 0.01286695059388876, + "step": 908 + }, + { + "ce_ib": 8.952857971191406, + "ce_orig": 1.0449740886688232, + "epoch": 0.2611258897116975, + "kl_loss": 0.290395051240921, + "loss_ib": 0.011856808327138424, + "step": 908 + }, + { + "ce_ib": 7.314600467681885, + "ce_orig": 0.487211138010025, + "epoch": 0.2614134732906751, + "kl_loss": 0.25061601400375366, + "loss_ib": 0.009820760227739811, + "step": 909 + }, + { + "ce_ib": 8.830986976623535, + "ce_orig": 0.7534437775611877, + "epoch": 0.2614134732906751, + "kl_loss": 0.276096910238266, + "loss_ib": 0.01159195601940155, + "step": 909 + }, + { + "ce_ib": 5.072354793548584, + "ce_orig": 0.7483262419700623, + "epoch": 0.2614134732906751, + "kl_loss": 0.2831575870513916, + "loss_ib": 0.007903930731117725, + "step": 909 + }, + { + "ce_ib": 6.34334659576416, + "ce_orig": 0.792799711227417, + "epoch": 0.2614134732906751, + "kl_loss": 0.27525418996810913, + "loss_ib": 0.009095888584852219, + "step": 909 + }, + { + "epoch": 0.2617010568696527, + "grad_norm": 0.09709301590919495, + "learning_rate": 9.916368798354356e-06, + "loss": 0.8731, + "step": 910 + }, + { + "ce_ib": 4.5776896476745605, + "ce_orig": 0.5359620451927185, + "epoch": 0.2617010568696527, + "kl_loss": 0.2551354765892029, + "loss_ib": 0.007129044272005558, + "step": 910 + }, + { + "ce_ib": 5.752841949462891, + "ce_orig": 0.4892864227294922, + "epoch": 0.2617010568696527, + "kl_loss": 0.28199148178100586, + "loss_ib": 0.008572756312787533, + "step": 910 + }, + { + "ce_ib": 7.317269325256348, + "ce_orig": 0.8823491930961609, + "epoch": 0.2617010568696527, + "kl_loss": 0.2838546335697174, + "loss_ib": 0.0101558156311512, + "step": 910 + }, + { + "ce_ib": 4.611359119415283, + "ce_orig": 0.6804866790771484, + "epoch": 0.2617010568696527, + "kl_loss": 0.24438399076461792, + "loss_ib": 0.007055198773741722, + "step": 910 + }, + { + "ce_ib": 6.782260894775391, + "ce_orig": 0.7364330887794495, + "epoch": 0.2619886404486304, + "kl_loss": 0.31710243225097656, + "loss_ib": 0.009953285567462444, + "step": 911 + }, + { + "ce_ib": 8.65565013885498, + "ce_orig": 1.165532112121582, + "epoch": 0.2619886404486304, + "kl_loss": 0.27839866280555725, + "loss_ib": 0.01143963634967804, + "step": 911 + }, + { + "ce_ib": 10.186466217041016, + "ce_orig": 0.9923035502433777, + "epoch": 0.2619886404486304, + "kl_loss": 0.34808266162872314, + "loss_ib": 0.013667291961610317, + "step": 911 + }, + { + "ce_ib": 7.47867488861084, + "ce_orig": 0.6916998028755188, + "epoch": 0.2619886404486304, + "kl_loss": 0.3453470766544342, + "loss_ib": 0.01093214564025402, + "step": 911 + }, + { + "ce_ib": 13.006325721740723, + "ce_orig": 1.4633382558822632, + "epoch": 0.26227622402760803, + "kl_loss": 0.23132070899009705, + "loss_ib": 0.015319532714784145, + "step": 912 + }, + { + "ce_ib": 6.6978654861450195, + "ce_orig": 0.7193461060523987, + "epoch": 0.26227622402760803, + "kl_loss": 0.24380119144916534, + "loss_ib": 0.009135877713561058, + "step": 912 + }, + { + "ce_ib": 7.76169490814209, + "ce_orig": 0.75163733959198, + "epoch": 0.26227622402760803, + "kl_loss": 0.27823013067245483, + "loss_ib": 0.010543995536863804, + "step": 912 + }, + { + "ce_ib": 10.430732727050781, + "ce_orig": 0.5616188049316406, + "epoch": 0.26227622402760803, + "kl_loss": 0.2974989116191864, + "loss_ib": 0.013405721634626389, + "step": 912 + }, + { + "ce_ib": 7.609768390655518, + "ce_orig": 0.4176271855831146, + "epoch": 0.26256380760658565, + "kl_loss": 0.38976022601127625, + "loss_ib": 0.011507370509207249, + "step": 913 + }, + { + "ce_ib": 11.438220977783203, + "ce_orig": 1.6095136404037476, + "epoch": 0.26256380760658565, + "kl_loss": 0.24979974329471588, + "loss_ib": 0.013936217874288559, + "step": 913 + }, + { + "ce_ib": 11.603028297424316, + "ce_orig": 1.417939305305481, + "epoch": 0.26256380760658565, + "kl_loss": 0.2228064239025116, + "loss_ib": 0.013831092976033688, + "step": 913 + }, + { + "ce_ib": 8.409954071044922, + "ce_orig": 0.827298104763031, + "epoch": 0.26256380760658565, + "kl_loss": 0.3605518341064453, + "loss_ib": 0.012015472166240215, + "step": 913 + }, + { + "ce_ib": 4.817587852478027, + "ce_orig": 0.4817865192890167, + "epoch": 0.2628513911855633, + "kl_loss": 0.25332438945770264, + "loss_ib": 0.007350832223892212, + "step": 914 + }, + { + "ce_ib": 8.813254356384277, + "ce_orig": 0.6531316041946411, + "epoch": 0.2628513911855633, + "kl_loss": 0.42108702659606934, + "loss_ib": 0.01302412524819374, + "step": 914 + }, + { + "ce_ib": 11.274821281433105, + "ce_orig": 1.3683724403381348, + "epoch": 0.2628513911855633, + "kl_loss": 0.42044275999069214, + "loss_ib": 0.015479249879717827, + "step": 914 + }, + { + "ce_ib": 4.2823686599731445, + "ce_orig": 0.432784765958786, + "epoch": 0.2628513911855633, + "kl_loss": 0.5916892290115356, + "loss_ib": 0.010199260897934437, + "step": 914 + }, + { + "epoch": 0.26313897476454096, + "grad_norm": 0.104263536632061, + "learning_rate": 9.914949363386417e-06, + "loss": 0.9239, + "step": 915 + }, + { + "ce_ib": 10.224993705749512, + "ce_orig": 0.7435204982757568, + "epoch": 0.26313897476454096, + "kl_loss": 0.48760131001472473, + "loss_ib": 0.01510100718587637, + "step": 915 + }, + { + "ce_ib": 5.030897617340088, + "ce_orig": 0.5878293514251709, + "epoch": 0.26313897476454096, + "kl_loss": 0.2752516567707062, + "loss_ib": 0.007783413864672184, + "step": 915 + }, + { + "ce_ib": 9.2077054977417, + "ce_orig": 1.0866307020187378, + "epoch": 0.26313897476454096, + "kl_loss": 0.27611714601516724, + "loss_ib": 0.011968877166509628, + "step": 915 + }, + { + "ce_ib": 7.913527965545654, + "ce_orig": 0.6080297827720642, + "epoch": 0.26313897476454096, + "kl_loss": 0.36963915824890137, + "loss_ib": 0.011609918437898159, + "step": 915 + }, + { + "ce_ib": 4.733314037322998, + "ce_orig": 0.3992489278316498, + "epoch": 0.2634265583435186, + "kl_loss": 0.2293146848678589, + "loss_ib": 0.0070264614187181, + "step": 916 + }, + { + "ce_ib": 9.097807884216309, + "ce_orig": 0.9395797848701477, + "epoch": 0.2634265583435186, + "kl_loss": 0.2749103009700775, + "loss_ib": 0.01184691023081541, + "step": 916 + }, + { + "ce_ib": 5.224862098693848, + "ce_orig": 0.49672380089759827, + "epoch": 0.2634265583435186, + "kl_loss": 0.3457143306732178, + "loss_ib": 0.008682005107402802, + "step": 916 + }, + { + "ce_ib": 3.887174367904663, + "ce_orig": 0.37023693323135376, + "epoch": 0.2634265583435186, + "kl_loss": 0.24267533421516418, + "loss_ib": 0.006313927471637726, + "step": 916 + }, + { + "ce_ib": 6.222542762756348, + "ce_orig": 0.5573530197143555, + "epoch": 0.2637141419224962, + "kl_loss": 0.30178603529930115, + "loss_ib": 0.009240402840077877, + "step": 917 + }, + { + "ce_ib": 9.022146224975586, + "ce_orig": 1.252068042755127, + "epoch": 0.2637141419224962, + "kl_loss": 0.26920855045318604, + "loss_ib": 0.01171423215419054, + "step": 917 + }, + { + "ce_ib": 7.715620994567871, + "ce_orig": 0.8185163140296936, + "epoch": 0.2637141419224962, + "kl_loss": 0.4483085572719574, + "loss_ib": 0.012198706157505512, + "step": 917 + }, + { + "ce_ib": 6.617023944854736, + "ce_orig": 1.0761107206344604, + "epoch": 0.2637141419224962, + "kl_loss": 0.3058355748653412, + "loss_ib": 0.00967537984251976, + "step": 917 + }, + { + "ce_ib": 5.220884799957275, + "ce_orig": 0.7539888620376587, + "epoch": 0.2640017255014739, + "kl_loss": 0.26427191495895386, + "loss_ib": 0.0078636035323143, + "step": 918 + }, + { + "ce_ib": 7.620891571044922, + "ce_orig": 0.7659056782722473, + "epoch": 0.2640017255014739, + "kl_loss": 0.30664142966270447, + "loss_ib": 0.010687305592000484, + "step": 918 + }, + { + "ce_ib": 8.80104923248291, + "ce_orig": 1.0766273736953735, + "epoch": 0.2640017255014739, + "kl_loss": 0.370442271232605, + "loss_ib": 0.01250547170639038, + "step": 918 + }, + { + "ce_ib": 12.601346969604492, + "ce_orig": 0.6492716073989868, + "epoch": 0.2640017255014739, + "kl_loss": 0.2531167268753052, + "loss_ib": 0.015132513828575611, + "step": 918 + }, + { + "ce_ib": 7.953497409820557, + "ce_orig": 0.7863147854804993, + "epoch": 0.2642893090804515, + "kl_loss": 0.25480780005455017, + "loss_ib": 0.010501575656235218, + "step": 919 + }, + { + "ce_ib": 9.819870948791504, + "ce_orig": 0.8340607285499573, + "epoch": 0.2642893090804515, + "kl_loss": 0.3362913727760315, + "loss_ib": 0.013182785362005234, + "step": 919 + }, + { + "ce_ib": 7.872880935668945, + "ce_orig": 1.1914820671081543, + "epoch": 0.2642893090804515, + "kl_loss": 0.2527698874473572, + "loss_ib": 0.010400580242276192, + "step": 919 + }, + { + "ce_ib": 10.222329139709473, + "ce_orig": 1.3306633234024048, + "epoch": 0.2642893090804515, + "kl_loss": 0.2606678009033203, + "loss_ib": 0.012829006649553776, + "step": 919 + }, + { + "epoch": 0.26457689265942913, + "grad_norm": 0.11017937958240509, + "learning_rate": 9.913518087164678e-06, + "loss": 0.8505, + "step": 920 + }, + { + "ce_ib": 7.44199800491333, + "ce_orig": 0.5371276140213013, + "epoch": 0.26457689265942913, + "kl_loss": 0.42192068696022034, + "loss_ib": 0.011661205440759659, + "step": 920 + }, + { + "ce_ib": 9.151060104370117, + "ce_orig": 1.068630337715149, + "epoch": 0.26457689265942913, + "kl_loss": 0.31581708788871765, + "loss_ib": 0.01230922993272543, + "step": 920 + }, + { + "ce_ib": 9.204903602600098, + "ce_orig": 0.909243643283844, + "epoch": 0.26457689265942913, + "kl_loss": 0.31501439213752747, + "loss_ib": 0.012355047278106213, + "step": 920 + }, + { + "ce_ib": 9.801901817321777, + "ce_orig": 0.9215630888938904, + "epoch": 0.26457689265942913, + "kl_loss": 0.3821442127227783, + "loss_ib": 0.013623344711959362, + "step": 920 + }, + { + "ce_ib": 9.883593559265137, + "ce_orig": 1.3681849241256714, + "epoch": 0.2648644762384068, + "kl_loss": 0.4136194586753845, + "loss_ib": 0.01401978824287653, + "step": 921 + }, + { + "ce_ib": 2.2384650707244873, + "ce_orig": 0.09530481696128845, + "epoch": 0.2648644762384068, + "kl_loss": 0.5867444276809692, + "loss_ib": 0.008105909451842308, + "step": 921 + }, + { + "ce_ib": 10.180109977722168, + "ce_orig": 0.8450271487236023, + "epoch": 0.2648644762384068, + "kl_loss": 0.2972285747528076, + "loss_ib": 0.013152395375072956, + "step": 921 + }, + { + "ce_ib": 7.981590270996094, + "ce_orig": 0.49811968207359314, + "epoch": 0.2648644762384068, + "kl_loss": 0.41247886419296265, + "loss_ib": 0.012106378562748432, + "step": 921 + }, + { + "ce_ib": 11.12768840789795, + "ce_orig": 1.3055758476257324, + "epoch": 0.26515205981738443, + "kl_loss": 0.4190482497215271, + "loss_ib": 0.015318172052502632, + "step": 922 + }, + { + "ce_ib": 6.9174723625183105, + "ce_orig": 0.5914834141731262, + "epoch": 0.26515205981738443, + "kl_loss": 0.275257408618927, + "loss_ib": 0.009670046158134937, + "step": 922 + }, + { + "ce_ib": 5.110889911651611, + "ce_orig": 0.4252597987651825, + "epoch": 0.26515205981738443, + "kl_loss": 0.25920987129211426, + "loss_ib": 0.007702989038079977, + "step": 922 + }, + { + "ce_ib": 9.542389869689941, + "ce_orig": 1.1555229425430298, + "epoch": 0.26515205981738443, + "kl_loss": 0.27457690238952637, + "loss_ib": 0.012288158759474754, + "step": 922 + }, + { + "ce_ib": 5.449411392211914, + "ce_orig": 0.7608138918876648, + "epoch": 0.26543964339636206, + "kl_loss": 0.2544456422328949, + "loss_ib": 0.007993867620825768, + "step": 923 + }, + { + "ce_ib": 5.343168258666992, + "ce_orig": 0.675761342048645, + "epoch": 0.26543964339636206, + "kl_loss": 0.2828470468521118, + "loss_ib": 0.00817163847386837, + "step": 923 + }, + { + "ce_ib": 5.600818157196045, + "ce_orig": 0.23253723978996277, + "epoch": 0.26543964339636206, + "kl_loss": 0.532296359539032, + "loss_ib": 0.010923781432211399, + "step": 923 + }, + { + "ce_ib": 7.541820526123047, + "ce_orig": 0.9155359268188477, + "epoch": 0.26543964339636206, + "kl_loss": 0.27482742071151733, + "loss_ib": 0.010290094651281834, + "step": 923 + }, + { + "ce_ib": 7.052559852600098, + "ce_orig": 0.744045078754425, + "epoch": 0.2657272269753397, + "kl_loss": 0.2872994840145111, + "loss_ib": 0.009925554506480694, + "step": 924 + }, + { + "ce_ib": 13.817998886108398, + "ce_orig": 1.8272193670272827, + "epoch": 0.2657272269753397, + "kl_loss": 0.3623213469982147, + "loss_ib": 0.017441213130950928, + "step": 924 + }, + { + "ce_ib": 10.127370834350586, + "ce_orig": 1.1621817350387573, + "epoch": 0.2657272269753397, + "kl_loss": 0.30838003754615784, + "loss_ib": 0.013211171142756939, + "step": 924 + }, + { + "ce_ib": 7.752861499786377, + "ce_orig": 0.7536821961402893, + "epoch": 0.2657272269753397, + "kl_loss": 0.3602597415447235, + "loss_ib": 0.011355457827448845, + "step": 924 + }, + { + "epoch": 0.26601481055431736, + "grad_norm": 0.11023896187543869, + "learning_rate": 9.912074973137413e-06, + "loss": 0.9011, + "step": 925 + }, + { + "ce_ib": 6.1034836769104, + "ce_orig": 0.6622889041900635, + "epoch": 0.26601481055431736, + "kl_loss": 0.2770423889160156, + "loss_ib": 0.00887390784919262, + "step": 925 + }, + { + "ce_ib": 8.66541576385498, + "ce_orig": 0.6636568903923035, + "epoch": 0.26601481055431736, + "kl_loss": 0.35214751958847046, + "loss_ib": 0.012186890468001366, + "step": 925 + }, + { + "ce_ib": 8.501686096191406, + "ce_orig": 1.0294500589370728, + "epoch": 0.26601481055431736, + "kl_loss": 0.32191595435142517, + "loss_ib": 0.011720845475792885, + "step": 925 + }, + { + "ce_ib": 8.163397789001465, + "ce_orig": 0.8285762071609497, + "epoch": 0.26601481055431736, + "kl_loss": 0.29768481850624084, + "loss_ib": 0.011140245944261551, + "step": 925 + }, + { + "ce_ib": 6.980233669281006, + "ce_orig": 0.7328478693962097, + "epoch": 0.266302394133295, + "kl_loss": 0.24676677584648132, + "loss_ib": 0.009447900578379631, + "step": 926 + }, + { + "ce_ib": 7.14872407913208, + "ce_orig": 0.8223768472671509, + "epoch": 0.266302394133295, + "kl_loss": 0.30274513363838196, + "loss_ib": 0.010176175273954868, + "step": 926 + }, + { + "ce_ib": 7.805938720703125, + "ce_orig": 0.9373571276664734, + "epoch": 0.266302394133295, + "kl_loss": 0.25375306606292725, + "loss_ib": 0.010343468748033047, + "step": 926 + }, + { + "ce_ib": 4.937993049621582, + "ce_orig": 0.6246491074562073, + "epoch": 0.266302394133295, + "kl_loss": 0.25294995307922363, + "loss_ib": 0.007467492483556271, + "step": 926 + }, + { + "ce_ib": 5.480818271636963, + "ce_orig": 0.5673008561134338, + "epoch": 0.2665899777122726, + "kl_loss": 0.2302064150571823, + "loss_ib": 0.0077828820794820786, + "step": 927 + }, + { + "ce_ib": 6.405802249908447, + "ce_orig": 0.5960977077484131, + "epoch": 0.2665899777122726, + "kl_loss": 0.2936103045940399, + "loss_ib": 0.009341904893517494, + "step": 927 + }, + { + "ce_ib": 11.765841484069824, + "ce_orig": 1.8276900053024292, + "epoch": 0.2665899777122726, + "kl_loss": 0.22832275927066803, + "loss_ib": 0.014049068093299866, + "step": 927 + }, + { + "ce_ib": 10.557626724243164, + "ce_orig": 1.3219131231307983, + "epoch": 0.2665899777122726, + "kl_loss": 0.31494155526161194, + "loss_ib": 0.01370704174041748, + "step": 927 + }, + { + "ce_ib": 7.791426181793213, + "ce_orig": 1.044633388519287, + "epoch": 0.2668775612912503, + "kl_loss": 0.22664925456047058, + "loss_ib": 0.010057918727397919, + "step": 928 + }, + { + "ce_ib": 5.639753341674805, + "ce_orig": 0.7303126454353333, + "epoch": 0.2668775612912503, + "kl_loss": 0.265280544757843, + "loss_ib": 0.00829255860298872, + "step": 928 + }, + { + "ce_ib": 9.726358413696289, + "ce_orig": 1.371625304222107, + "epoch": 0.2668775612912503, + "kl_loss": 0.2165038287639618, + "loss_ib": 0.011891396716237068, + "step": 928 + }, + { + "ce_ib": 6.140399932861328, + "ce_orig": 0.6564132571220398, + "epoch": 0.2668775612912503, + "kl_loss": 0.2733922004699707, + "loss_ib": 0.008874322287738323, + "step": 928 + }, + { + "ce_ib": 6.607884883880615, + "ce_orig": 0.5726844072341919, + "epoch": 0.2671651448702279, + "kl_loss": 0.36948060989379883, + "loss_ib": 0.010302690789103508, + "step": 929 + }, + { + "ce_ib": 8.565917015075684, + "ce_orig": 0.6998363733291626, + "epoch": 0.2671651448702279, + "kl_loss": 0.281715989112854, + "loss_ib": 0.011383076198399067, + "step": 929 + }, + { + "ce_ib": 5.31778621673584, + "ce_orig": 0.3831661343574524, + "epoch": 0.2671651448702279, + "kl_loss": 0.47300854325294495, + "loss_ib": 0.010047871619462967, + "step": 929 + }, + { + "ce_ib": 11.917003631591797, + "ce_orig": 1.680348515510559, + "epoch": 0.2671651448702279, + "kl_loss": 0.4127323031425476, + "loss_ib": 0.01604432612657547, + "step": 929 + }, + { + "epoch": 0.26745272844920553, + "grad_norm": 0.10447093099355698, + "learning_rate": 9.910620024781422e-06, + "loss": 0.9509, + "step": 930 + }, + { + "ce_ib": 11.539644241333008, + "ce_orig": 1.3957158327102661, + "epoch": 0.26745272844920553, + "kl_loss": 0.28435567021369934, + "loss_ib": 0.014383199624717236, + "step": 930 + }, + { + "ce_ib": 7.828366756439209, + "ce_orig": 1.2391057014465332, + "epoch": 0.26745272844920553, + "kl_loss": 0.2012348175048828, + "loss_ib": 0.009840714745223522, + "step": 930 + }, + { + "ce_ib": 8.055999755859375, + "ce_orig": 0.834807276725769, + "epoch": 0.26745272844920553, + "kl_loss": 0.3234785199165344, + "loss_ib": 0.011290784925222397, + "step": 930 + }, + { + "ce_ib": 2.334864854812622, + "ce_orig": 0.1836487501859665, + "epoch": 0.26745272844920553, + "kl_loss": 0.6265314817428589, + "loss_ib": 0.008600179105997086, + "step": 930 + }, + { + "ce_ib": 6.897594928741455, + "ce_orig": 0.6649829745292664, + "epoch": 0.2677403120281832, + "kl_loss": 0.2482946813106537, + "loss_ib": 0.009380541741847992, + "step": 931 + }, + { + "ce_ib": 2.828857660293579, + "ce_orig": 0.37604042887687683, + "epoch": 0.2677403120281832, + "kl_loss": 0.566237211227417, + "loss_ib": 0.008491230197250843, + "step": 931 + }, + { + "ce_ib": 10.757418632507324, + "ce_orig": 1.2388101816177368, + "epoch": 0.2677403120281832, + "kl_loss": 0.3185473084449768, + "loss_ib": 0.013942892663180828, + "step": 931 + }, + { + "ce_ib": 5.737816333770752, + "ce_orig": 0.8773601651191711, + "epoch": 0.2677403120281832, + "kl_loss": 0.28802287578582764, + "loss_ib": 0.008618045598268509, + "step": 931 + }, + { + "ce_ib": 5.9482855796813965, + "ce_orig": 0.7482618689537048, + "epoch": 0.26802789560716084, + "kl_loss": 0.36070847511291504, + "loss_ib": 0.00955536961555481, + "step": 932 + }, + { + "ce_ib": 15.711875915527344, + "ce_orig": 2.0872745513916016, + "epoch": 0.26802789560716084, + "kl_loss": 0.3053112328052521, + "loss_ib": 0.018764987587928772, + "step": 932 + }, + { + "ce_ib": 7.429306507110596, + "ce_orig": 0.7280347347259521, + "epoch": 0.26802789560716084, + "kl_loss": 0.2755368649959564, + "loss_ib": 0.010184675455093384, + "step": 932 + }, + { + "ce_ib": 6.810687065124512, + "ce_orig": 0.5476087927818298, + "epoch": 0.26802789560716084, + "kl_loss": 0.254897803068161, + "loss_ib": 0.009359664283692837, + "step": 932 + }, + { + "ce_ib": 4.790681838989258, + "ce_orig": 0.6239033341407776, + "epoch": 0.26831547918613846, + "kl_loss": 0.2333354651927948, + "loss_ib": 0.007124036550521851, + "step": 933 + }, + { + "ce_ib": 10.430938720703125, + "ce_orig": 1.3715506792068481, + "epoch": 0.26831547918613846, + "kl_loss": 0.3643788695335388, + "loss_ib": 0.014074727892875671, + "step": 933 + }, + { + "ce_ib": 8.228853225708008, + "ce_orig": 0.7698665857315063, + "epoch": 0.26831547918613846, + "kl_loss": 0.25796443223953247, + "loss_ib": 0.010808497667312622, + "step": 933 + }, + { + "ce_ib": 10.43002986907959, + "ce_orig": 0.6817733645439148, + "epoch": 0.26831547918613846, + "kl_loss": 0.40837064385414124, + "loss_ib": 0.01451373565942049, + "step": 933 + }, + { + "ce_ib": 6.217407703399658, + "ce_orig": 0.5401474237442017, + "epoch": 0.2686030627651161, + "kl_loss": 0.244707390666008, + "loss_ib": 0.008664481341838837, + "step": 934 + }, + { + "ce_ib": 6.634978294372559, + "ce_orig": 0.7534041404724121, + "epoch": 0.2686030627651161, + "kl_loss": 0.2863770127296448, + "loss_ib": 0.009498748928308487, + "step": 934 + }, + { + "ce_ib": 6.577731132507324, + "ce_orig": 0.7866048812866211, + "epoch": 0.2686030627651161, + "kl_loss": 0.3120877146720886, + "loss_ib": 0.009698607958853245, + "step": 934 + }, + { + "ce_ib": 6.873836994171143, + "ce_orig": 0.6249902248382568, + "epoch": 0.2686030627651161, + "kl_loss": 0.2403496652841568, + "loss_ib": 0.00927733350545168, + "step": 934 + }, + { + "epoch": 0.26889064634409376, + "grad_norm": 0.09980176389217377, + "learning_rate": 9.909153245602012e-06, + "loss": 0.8424, + "step": 935 + }, + { + "ce_ib": 8.975276947021484, + "ce_orig": 1.0671329498291016, + "epoch": 0.26889064634409376, + "kl_loss": 0.31780779361724854, + "loss_ib": 0.012153354473412037, + "step": 935 + }, + { + "ce_ib": 7.080759048461914, + "ce_orig": 0.6667758226394653, + "epoch": 0.26889064634409376, + "kl_loss": 0.4114856719970703, + "loss_ib": 0.01119561679661274, + "step": 935 + }, + { + "ce_ib": 8.725547790527344, + "ce_orig": 0.8014039993286133, + "epoch": 0.26889064634409376, + "kl_loss": 0.23206469416618347, + "loss_ib": 0.011046194471418858, + "step": 935 + }, + { + "ce_ib": 6.310739040374756, + "ce_orig": 0.6323633193969727, + "epoch": 0.26889064634409376, + "kl_loss": 0.3221661448478699, + "loss_ib": 0.009532400406897068, + "step": 935 + }, + { + "ce_ib": 6.315284729003906, + "ce_orig": 0.5775780081748962, + "epoch": 0.2691782299230714, + "kl_loss": 0.29390376806259155, + "loss_ib": 0.00925432238727808, + "step": 936 + }, + { + "ce_ib": 4.208686828613281, + "ce_orig": 0.4354095160961151, + "epoch": 0.2691782299230714, + "kl_loss": 0.297516405582428, + "loss_ib": 0.00718385074287653, + "step": 936 + }, + { + "ce_ib": 8.687440872192383, + "ce_orig": 0.6239301562309265, + "epoch": 0.2691782299230714, + "kl_loss": 0.30756255984306335, + "loss_ib": 0.011763066053390503, + "step": 936 + }, + { + "ce_ib": 8.584075927734375, + "ce_orig": 1.136924386024475, + "epoch": 0.2691782299230714, + "kl_loss": 0.2599954903125763, + "loss_ib": 0.011184030212461948, + "step": 936 + }, + { + "ce_ib": 5.511157035827637, + "ce_orig": 0.6932129859924316, + "epoch": 0.269465813502049, + "kl_loss": 0.27716803550720215, + "loss_ib": 0.008282837457954884, + "step": 937 + }, + { + "ce_ib": 9.595725059509277, + "ce_orig": 0.9539211988449097, + "epoch": 0.269465813502049, + "kl_loss": 0.24416208267211914, + "loss_ib": 0.012037346139550209, + "step": 937 + }, + { + "ce_ib": 6.6203083992004395, + "ce_orig": 0.9232450127601624, + "epoch": 0.269465813502049, + "kl_loss": 0.20647379755973816, + "loss_ib": 0.008685045875608921, + "step": 937 + }, + { + "ce_ib": 7.428555488586426, + "ce_orig": 0.48220083117485046, + "epoch": 0.269465813502049, + "kl_loss": 0.35726824402809143, + "loss_ib": 0.01100123766809702, + "step": 937 + }, + { + "ce_ib": 8.365070343017578, + "ce_orig": 0.43749791383743286, + "epoch": 0.2697533970810267, + "kl_loss": 0.30819666385650635, + "loss_ib": 0.01144703570753336, + "step": 938 + }, + { + "ce_ib": 8.154250144958496, + "ce_orig": 0.9003611207008362, + "epoch": 0.2697533970810267, + "kl_loss": 0.4072403311729431, + "loss_ib": 0.012226653285324574, + "step": 938 + }, + { + "ce_ib": 6.375957012176514, + "ce_orig": 0.6323530673980713, + "epoch": 0.2697533970810267, + "kl_loss": 0.32399243116378784, + "loss_ib": 0.009615881368517876, + "step": 938 + }, + { + "ce_ib": 7.025842666625977, + "ce_orig": 0.43968331813812256, + "epoch": 0.2697533970810267, + "kl_loss": 0.312505841255188, + "loss_ib": 0.010150901041924953, + "step": 938 + }, + { + "ce_ib": 7.08709192276001, + "ce_orig": 0.5549724698066711, + "epoch": 0.2700409806600043, + "kl_loss": 0.4219147861003876, + "loss_ib": 0.011306239292025566, + "step": 939 + }, + { + "ce_ib": 7.558558464050293, + "ce_orig": 1.0065475702285767, + "epoch": 0.2700409806600043, + "kl_loss": 0.2504430413246155, + "loss_ib": 0.010062988847494125, + "step": 939 + }, + { + "ce_ib": 6.018697738647461, + "ce_orig": 0.8349258899688721, + "epoch": 0.2700409806600043, + "kl_loss": 0.20029550790786743, + "loss_ib": 0.008021652698516846, + "step": 939 + }, + { + "ce_ib": 10.575093269348145, + "ce_orig": 1.4976614713668823, + "epoch": 0.2700409806600043, + "kl_loss": 0.3008938133716583, + "loss_ib": 0.013584030792117119, + "step": 939 + }, + { + "epoch": 0.27032856423898194, + "grad_norm": 0.10055164247751236, + "learning_rate": 9.907674639132995e-06, + "loss": 0.8408, + "step": 940 + }, + { + "ce_ib": 4.028119087219238, + "ce_orig": 0.4562585949897766, + "epoch": 0.27032856423898194, + "kl_loss": 0.2093207985162735, + "loss_ib": 0.006121327169239521, + "step": 940 + }, + { + "ce_ib": 6.0146613121032715, + "ce_orig": 0.6793175935745239, + "epoch": 0.27032856423898194, + "kl_loss": 0.21066246926784515, + "loss_ib": 0.00812128558754921, + "step": 940 + }, + { + "ce_ib": 8.57176399230957, + "ce_orig": 1.1877782344818115, + "epoch": 0.27032856423898194, + "kl_loss": 0.32324960827827454, + "loss_ib": 0.01180425938218832, + "step": 940 + }, + { + "ce_ib": 10.997118949890137, + "ce_orig": 1.800614595413208, + "epoch": 0.27032856423898194, + "kl_loss": 0.2794951796531677, + "loss_ib": 0.013792071491479874, + "step": 940 + }, + { + "ce_ib": 7.437707424163818, + "ce_orig": 1.0083261728286743, + "epoch": 0.2706161478179596, + "kl_loss": 0.21823345124721527, + "loss_ib": 0.009620042517781258, + "step": 941 + }, + { + "ce_ib": 6.081821441650391, + "ce_orig": 0.7146956324577332, + "epoch": 0.2706161478179596, + "kl_loss": 0.23236779868602753, + "loss_ib": 0.008405499160289764, + "step": 941 + }, + { + "ce_ib": 10.360185623168945, + "ce_orig": 1.2722724676132202, + "epoch": 0.2706161478179596, + "kl_loss": 0.4324212670326233, + "loss_ib": 0.014684397727251053, + "step": 941 + }, + { + "ce_ib": 8.173184394836426, + "ce_orig": 0.3789297640323639, + "epoch": 0.2706161478179596, + "kl_loss": 0.318384051322937, + "loss_ib": 0.011357024312019348, + "step": 941 + }, + { + "ce_ib": 7.6668782234191895, + "ce_orig": 0.6972788572311401, + "epoch": 0.27090373139693724, + "kl_loss": 0.40589210391044617, + "loss_ib": 0.01172579824924469, + "step": 942 + }, + { + "ce_ib": 8.189241409301758, + "ce_orig": 0.7313915491104126, + "epoch": 0.27090373139693724, + "kl_loss": 0.28188782930374146, + "loss_ib": 0.011008119210600853, + "step": 942 + }, + { + "ce_ib": 7.981906414031982, + "ce_orig": 0.9324852824211121, + "epoch": 0.27090373139693724, + "kl_loss": 0.3256570100784302, + "loss_ib": 0.011238477192819118, + "step": 942 + }, + { + "ce_ib": 8.018670082092285, + "ce_orig": 0.8148074150085449, + "epoch": 0.27090373139693724, + "kl_loss": 0.3132972717285156, + "loss_ib": 0.01115164253860712, + "step": 942 + }, + { + "ce_ib": 8.356046676635742, + "ce_orig": 0.5797178745269775, + "epoch": 0.27119131497591487, + "kl_loss": 0.4871473014354706, + "loss_ib": 0.013227519579231739, + "step": 943 + }, + { + "ce_ib": 10.591626167297363, + "ce_orig": 1.4526135921478271, + "epoch": 0.27119131497591487, + "kl_loss": 0.374215304851532, + "loss_ib": 0.01433377992361784, + "step": 943 + }, + { + "ce_ib": 6.816173553466797, + "ce_orig": 0.7827908396720886, + "epoch": 0.27119131497591487, + "kl_loss": 0.31245943903923035, + "loss_ib": 0.009940768592059612, + "step": 943 + }, + { + "ce_ib": 8.920738220214844, + "ce_orig": 1.1158427000045776, + "epoch": 0.27119131497591487, + "kl_loss": 0.3286566436290741, + "loss_ib": 0.012207304127514362, + "step": 943 + }, + { + "ce_ib": 9.12059211730957, + "ce_orig": 1.1127867698669434, + "epoch": 0.2714788985548925, + "kl_loss": 0.5925597548484802, + "loss_ib": 0.015046189539134502, + "step": 944 + }, + { + "ce_ib": 7.13171911239624, + "ce_orig": 0.7354855537414551, + "epoch": 0.2714788985548925, + "kl_loss": 0.3313751220703125, + "loss_ib": 0.010445470921695232, + "step": 944 + }, + { + "ce_ib": 5.612328052520752, + "ce_orig": 0.6953266859054565, + "epoch": 0.2714788985548925, + "kl_loss": 0.22593432664871216, + "loss_ib": 0.007871671579778194, + "step": 944 + }, + { + "ce_ib": 7.506075859069824, + "ce_orig": 0.7433436512947083, + "epoch": 0.2714788985548925, + "kl_loss": 0.3743892014026642, + "loss_ib": 0.011249967850744724, + "step": 944 + }, + { + "epoch": 0.27176648213387017, + "grad_norm": 0.10328007489442825, + "learning_rate": 9.906184208936675e-06, + "loss": 0.8559, + "step": 945 + }, + { + "ce_ib": 6.606997966766357, + "ce_orig": 0.9594669342041016, + "epoch": 0.27176648213387017, + "kl_loss": 0.3110538423061371, + "loss_ib": 0.00971753615885973, + "step": 945 + }, + { + "ce_ib": 7.5082621574401855, + "ce_orig": 0.7433197498321533, + "epoch": 0.27176648213387017, + "kl_loss": 0.3739091157913208, + "loss_ib": 0.011247353628277779, + "step": 945 + }, + { + "ce_ib": 6.4652605056762695, + "ce_orig": 0.44874706864356995, + "epoch": 0.27176648213387017, + "kl_loss": 0.3493693470954895, + "loss_ib": 0.009958953596651554, + "step": 945 + }, + { + "ce_ib": 11.719304084777832, + "ce_orig": 1.1819493770599365, + "epoch": 0.27176648213387017, + "kl_loss": 0.2937600314617157, + "loss_ib": 0.01465690415352583, + "step": 945 + }, + { + "ce_ib": 12.488480567932129, + "ce_orig": 1.4912511110305786, + "epoch": 0.2720540657128478, + "kl_loss": 0.3444375991821289, + "loss_ib": 0.015932856127619743, + "step": 946 + }, + { + "ce_ib": 6.640789985656738, + "ce_orig": 0.7050909399986267, + "epoch": 0.2720540657128478, + "kl_loss": 0.2555736005306244, + "loss_ib": 0.009196525439620018, + "step": 946 + }, + { + "ce_ib": 4.94317626953125, + "ce_orig": 0.5460999608039856, + "epoch": 0.2720540657128478, + "kl_loss": 0.28116828203201294, + "loss_ib": 0.007754858583211899, + "step": 946 + }, + { + "ce_ib": 12.355217933654785, + "ce_orig": 1.9291751384735107, + "epoch": 0.2720540657128478, + "kl_loss": 0.3321114778518677, + "loss_ib": 0.015676332637667656, + "step": 946 + }, + { + "ce_ib": 9.124369621276855, + "ce_orig": 1.180700659751892, + "epoch": 0.2723416492918254, + "kl_loss": 0.2571544945240021, + "loss_ib": 0.011695913970470428, + "step": 947 + }, + { + "ce_ib": 4.049078941345215, + "ce_orig": 0.46376824378967285, + "epoch": 0.2723416492918254, + "kl_loss": 0.5594756007194519, + "loss_ib": 0.00964383501559496, + "step": 947 + }, + { + "ce_ib": 6.723659515380859, + "ce_orig": 0.8255409002304077, + "epoch": 0.2723416492918254, + "kl_loss": 0.5856155157089233, + "loss_ib": 0.012579815462231636, + "step": 947 + }, + { + "ce_ib": 6.566412925720215, + "ce_orig": 0.3921998143196106, + "epoch": 0.2723416492918254, + "kl_loss": 0.8435590863227844, + "loss_ib": 0.015002003870904446, + "step": 947 + }, + { + "ce_ib": 7.8375725746154785, + "ce_orig": 1.2053213119506836, + "epoch": 0.2726292328708031, + "kl_loss": 0.3942751884460449, + "loss_ib": 0.011780323460698128, + "step": 948 + }, + { + "ce_ib": 6.784183025360107, + "ce_orig": 0.8397347331047058, + "epoch": 0.2726292328708031, + "kl_loss": 0.3014325499534607, + "loss_ib": 0.009798509068787098, + "step": 948 + }, + { + "ce_ib": 9.067359924316406, + "ce_orig": 0.6075240969657898, + "epoch": 0.2726292328708031, + "kl_loss": 0.3293374180793762, + "loss_ib": 0.01236073486506939, + "step": 948 + }, + { + "ce_ib": 8.427669525146484, + "ce_orig": 0.67606520652771, + "epoch": 0.2726292328708031, + "kl_loss": 0.411496102809906, + "loss_ib": 0.012542630545794964, + "step": 948 + }, + { + "ce_ib": 9.243931770324707, + "ce_orig": 0.8393007516860962, + "epoch": 0.2729168164497807, + "kl_loss": 0.31467002630233765, + "loss_ib": 0.012390632182359695, + "step": 949 + }, + { + "ce_ib": 10.456159591674805, + "ce_orig": 1.1939802169799805, + "epoch": 0.2729168164497807, + "kl_loss": 0.30369293689727783, + "loss_ib": 0.013493089005351067, + "step": 949 + }, + { + "ce_ib": 3.625389337539673, + "ce_orig": 0.4330550730228424, + "epoch": 0.2729168164497807, + "kl_loss": 0.21511869132518768, + "loss_ib": 0.005776576232165098, + "step": 949 + }, + { + "ce_ib": 9.729208946228027, + "ce_orig": 1.1287404298782349, + "epoch": 0.2729168164497807, + "kl_loss": 0.29766565561294556, + "loss_ib": 0.012705864384770393, + "step": 949 + }, + { + "epoch": 0.27320440002875834, + "grad_norm": 0.11360763758420944, + "learning_rate": 9.904681958603847e-06, + "loss": 0.8716, + "step": 950 + }, + { + "ce_ib": 5.15933084487915, + "ce_orig": 0.5913940668106079, + "epoch": 0.27320440002875834, + "kl_loss": 0.2843480110168457, + "loss_ib": 0.0080028111115098, + "step": 950 + }, + { + "ce_ib": 6.33256721496582, + "ce_orig": 0.7312302589416504, + "epoch": 0.27320440002875834, + "kl_loss": 0.3365132212638855, + "loss_ib": 0.00969769898802042, + "step": 950 + }, + { + "ce_ib": 7.236344814300537, + "ce_orig": 0.9631039500236511, + "epoch": 0.27320440002875834, + "kl_loss": 0.3992907404899597, + "loss_ib": 0.011229252442717552, + "step": 950 + }, + { + "ce_ib": 8.233247756958008, + "ce_orig": 0.9781149625778198, + "epoch": 0.27320440002875834, + "kl_loss": 0.32408010959625244, + "loss_ib": 0.011474048718810081, + "step": 950 + }, + { + "ce_ib": 9.425809860229492, + "ce_orig": 1.0185048580169678, + "epoch": 0.273491983607736, + "kl_loss": 0.3078628480434418, + "loss_ib": 0.012504437938332558, + "step": 951 + }, + { + "ce_ib": 5.389331340789795, + "ce_orig": 0.7841058969497681, + "epoch": 0.273491983607736, + "kl_loss": 0.28654882311820984, + "loss_ib": 0.008254819549620152, + "step": 951 + }, + { + "ce_ib": 9.956135749816895, + "ce_orig": 1.0741745233535767, + "epoch": 0.273491983607736, + "kl_loss": 0.2052108645439148, + "loss_ib": 0.0120082451030612, + "step": 951 + }, + { + "ce_ib": 10.591538429260254, + "ce_orig": 1.5130939483642578, + "epoch": 0.273491983607736, + "kl_loss": 0.3333120346069336, + "loss_ib": 0.013924659229815006, + "step": 951 + }, + { + "ce_ib": 11.254968643188477, + "ce_orig": 1.5675315856933594, + "epoch": 0.27377956718671365, + "kl_loss": 0.2911054790019989, + "loss_ib": 0.014166023582220078, + "step": 952 + }, + { + "ce_ib": 5.313507556915283, + "ce_orig": 0.3765413761138916, + "epoch": 0.27377956718671365, + "kl_loss": 0.23264440894126892, + "loss_ib": 0.007639951538294554, + "step": 952 + }, + { + "ce_ib": 8.539169311523438, + "ce_orig": 1.1101938486099243, + "epoch": 0.27377956718671365, + "kl_loss": 0.30267441272735596, + "loss_ib": 0.011565913446247578, + "step": 952 + }, + { + "ce_ib": 8.370697975158691, + "ce_orig": 1.101776123046875, + "epoch": 0.27377956718671365, + "kl_loss": 0.2343529760837555, + "loss_ib": 0.010714228264987469, + "step": 952 + }, + { + "ce_ib": 7.463455677032471, + "ce_orig": 0.7007122039794922, + "epoch": 0.27406715076569127, + "kl_loss": 0.2482740581035614, + "loss_ib": 0.009946195408701897, + "step": 953 + }, + { + "ce_ib": 9.414673805236816, + "ce_orig": 0.7169649004936218, + "epoch": 0.27406715076569127, + "kl_loss": 0.386259526014328, + "loss_ib": 0.013277268968522549, + "step": 953 + }, + { + "ce_ib": 6.160613059997559, + "ce_orig": 0.6735572814941406, + "epoch": 0.27406715076569127, + "kl_loss": 0.2884378135204315, + "loss_ib": 0.009044991806149483, + "step": 953 + }, + { + "ce_ib": 4.295875549316406, + "ce_orig": 0.3026116192340851, + "epoch": 0.27406715076569127, + "kl_loss": 0.3349815607070923, + "loss_ib": 0.007645691279321909, + "step": 953 + }, + { + "ce_ib": 11.889391899108887, + "ce_orig": 1.7062128782272339, + "epoch": 0.2743547343446689, + "kl_loss": 0.3287060856819153, + "loss_ib": 0.01517645362764597, + "step": 954 + }, + { + "ce_ib": 9.97256851196289, + "ce_orig": 0.8832455277442932, + "epoch": 0.2743547343446689, + "kl_loss": 0.3570169508457184, + "loss_ib": 0.013542737811803818, + "step": 954 + }, + { + "ce_ib": 11.863001823425293, + "ce_orig": 1.066734790802002, + "epoch": 0.2743547343446689, + "kl_loss": 0.3354340195655823, + "loss_ib": 0.015217342413961887, + "step": 954 + }, + { + "ce_ib": 6.305656909942627, + "ce_orig": 0.609102725982666, + "epoch": 0.2743547343446689, + "kl_loss": 0.33587515354156494, + "loss_ib": 0.009664407931268215, + "step": 954 + }, + { + "epoch": 0.2746423179236466, + "grad_norm": 0.09016523510217667, + "learning_rate": 9.903167891753781e-06, + "loss": 0.863, + "step": 955 + }, + { + "ce_ib": 7.7995100021362305, + "ce_orig": 0.48420944809913635, + "epoch": 0.2746423179236466, + "kl_loss": 0.4207814335823059, + "loss_ib": 0.012007324025034904, + "step": 955 + }, + { + "ce_ib": 9.223494529724121, + "ce_orig": 0.8398165702819824, + "epoch": 0.2746423179236466, + "kl_loss": 0.31191039085388184, + "loss_ib": 0.012342598289251328, + "step": 955 + }, + { + "ce_ib": 9.061304092407227, + "ce_orig": 0.5709256529808044, + "epoch": 0.2746423179236466, + "kl_loss": 0.35397371649742126, + "loss_ib": 0.012601041235029697, + "step": 955 + }, + { + "ce_ib": 6.83884859085083, + "ce_orig": 0.8219555020332336, + "epoch": 0.2746423179236466, + "kl_loss": 0.4126385450363159, + "loss_ib": 0.01096523366868496, + "step": 955 + }, + { + "ce_ib": 4.699288368225098, + "ce_orig": 0.44520479440689087, + "epoch": 0.2749299015026242, + "kl_loss": 0.325499027967453, + "loss_ib": 0.007954278029501438, + "step": 956 + }, + { + "ce_ib": 8.23465633392334, + "ce_orig": 0.8416075110435486, + "epoch": 0.2749299015026242, + "kl_loss": 0.21434545516967773, + "loss_ib": 0.010378110222518444, + "step": 956 + }, + { + "ce_ib": 6.925322532653809, + "ce_orig": 0.47291597723960876, + "epoch": 0.2749299015026242, + "kl_loss": 0.8208890557289124, + "loss_ib": 0.01513421256095171, + "step": 956 + }, + { + "ce_ib": 4.961340427398682, + "ce_orig": 0.7618390917778015, + "epoch": 0.2749299015026242, + "kl_loss": 0.2526277005672455, + "loss_ib": 0.0074876174330711365, + "step": 956 + }, + { + "ce_ib": 7.343484401702881, + "ce_orig": 0.8298138380050659, + "epoch": 0.2752174850816018, + "kl_loss": 0.36207157373428345, + "loss_ib": 0.010964199900627136, + "step": 957 + }, + { + "ce_ib": 7.905651569366455, + "ce_orig": 1.0571790933609009, + "epoch": 0.2752174850816018, + "kl_loss": 0.27009880542755127, + "loss_ib": 0.010606639087200165, + "step": 957 + }, + { + "ce_ib": 6.430732727050781, + "ce_orig": 0.7019039392471313, + "epoch": 0.2752174850816018, + "kl_loss": 0.3244302272796631, + "loss_ib": 0.009675034321844578, + "step": 957 + }, + { + "ce_ib": 9.795534133911133, + "ce_orig": 1.5741654634475708, + "epoch": 0.2752174850816018, + "kl_loss": 0.27315622568130493, + "loss_ib": 0.012527096085250378, + "step": 957 + }, + { + "ce_ib": 7.98039436340332, + "ce_orig": 0.5512452721595764, + "epoch": 0.2755050686605795, + "kl_loss": 0.3615424633026123, + "loss_ib": 0.011595819145441055, + "step": 958 + }, + { + "ce_ib": 6.763614654541016, + "ce_orig": 0.6786366105079651, + "epoch": 0.2755050686605795, + "kl_loss": 0.28236159682273865, + "loss_ib": 0.009587230160832405, + "step": 958 + }, + { + "ce_ib": 4.218715667724609, + "ce_orig": 0.6746366620063782, + "epoch": 0.2755050686605795, + "kl_loss": 0.3110579550266266, + "loss_ib": 0.007329294923692942, + "step": 958 + }, + { + "ce_ib": 11.259781837463379, + "ce_orig": 1.4071563482284546, + "epoch": 0.2755050686605795, + "kl_loss": 0.25216126441955566, + "loss_ib": 0.013781394809484482, + "step": 958 + }, + { + "ce_ib": 5.040888786315918, + "ce_orig": 0.8139171600341797, + "epoch": 0.2757926522395571, + "kl_loss": 0.1931689977645874, + "loss_ib": 0.0069725788198411465, + "step": 959 + }, + { + "ce_ib": 8.201353073120117, + "ce_orig": 0.8536245822906494, + "epoch": 0.2757926522395571, + "kl_loss": 0.2896878719329834, + "loss_ib": 0.011098232120275497, + "step": 959 + }, + { + "ce_ib": 5.141984939575195, + "ce_orig": 0.36424100399017334, + "epoch": 0.2757926522395571, + "kl_loss": 0.2921089828014374, + "loss_ib": 0.008063074201345444, + "step": 959 + }, + { + "ce_ib": 10.956453323364258, + "ce_orig": 1.2245234251022339, + "epoch": 0.2757926522395571, + "kl_loss": 0.4386565685272217, + "loss_ib": 0.015343018807470798, + "step": 959 + }, + { + "epoch": 0.27608023581853475, + "grad_norm": 0.1057107001543045, + "learning_rate": 9.901642012034214e-06, + "loss": 0.7911, + "step": 960 + }, + { + "ce_ib": 13.470467567443848, + "ce_orig": 1.7405394315719604, + "epoch": 0.27608023581853475, + "kl_loss": 0.328184574842453, + "loss_ib": 0.01675231382250786, + "step": 960 + }, + { + "ce_ib": 6.385258197784424, + "ce_orig": 0.726634681224823, + "epoch": 0.27608023581853475, + "kl_loss": 0.2683427333831787, + "loss_ib": 0.009068685583770275, + "step": 960 + }, + { + "ce_ib": 12.370391845703125, + "ce_orig": 1.2445263862609863, + "epoch": 0.27608023581853475, + "kl_loss": 0.29927101731300354, + "loss_ib": 0.015363101847469807, + "step": 960 + }, + { + "ce_ib": 8.821762084960938, + "ce_orig": 1.0471879243850708, + "epoch": 0.27608023581853475, + "kl_loss": 0.19080427289009094, + "loss_ib": 0.010729804635047913, + "step": 960 + }, + { + "ce_ib": 8.348326683044434, + "ce_orig": 1.2821494340896606, + "epoch": 0.2763678193975124, + "kl_loss": 0.4306579828262329, + "loss_ib": 0.012654906138777733, + "step": 961 + }, + { + "ce_ib": 9.511332511901855, + "ce_orig": 1.11271333694458, + "epoch": 0.2763678193975124, + "kl_loss": 0.39218825101852417, + "loss_ib": 0.013433215208351612, + "step": 961 + }, + { + "ce_ib": 10.88101577758789, + "ce_orig": 1.3116422891616821, + "epoch": 0.2763678193975124, + "kl_loss": 0.28412100672721863, + "loss_ib": 0.013722226023674011, + "step": 961 + }, + { + "ce_ib": 4.692732810974121, + "ce_orig": 0.5053314566612244, + "epoch": 0.2763678193975124, + "kl_loss": 0.20232996344566345, + "loss_ib": 0.006716032512485981, + "step": 961 + }, + { + "ce_ib": 4.9355149269104, + "ce_orig": 0.46557968854904175, + "epoch": 0.27665540297649005, + "kl_loss": 0.5611593723297119, + "loss_ib": 0.010547108016908169, + "step": 962 + }, + { + "ce_ib": 9.1857271194458, + "ce_orig": 0.8455274105072021, + "epoch": 0.27665540297649005, + "kl_loss": 0.2985427975654602, + "loss_ib": 0.012171154841780663, + "step": 962 + }, + { + "ce_ib": 10.592644691467285, + "ce_orig": 1.263620376586914, + "epoch": 0.27665540297649005, + "kl_loss": 0.4153948724269867, + "loss_ib": 0.014746594242751598, + "step": 962 + }, + { + "ce_ib": 6.028276443481445, + "ce_orig": 0.6360313892364502, + "epoch": 0.27665540297649005, + "kl_loss": 0.256117582321167, + "loss_ib": 0.008589452132582664, + "step": 962 + }, + { + "ce_ib": 7.142398357391357, + "ce_orig": 0.8674178123474121, + "epoch": 0.2769429865554677, + "kl_loss": 0.26119542121887207, + "loss_ib": 0.009754352271556854, + "step": 963 + }, + { + "ce_ib": 4.550671577453613, + "ce_orig": 0.4504320025444031, + "epoch": 0.2769429865554677, + "kl_loss": 0.27785179018974304, + "loss_ib": 0.00732918968424201, + "step": 963 + }, + { + "ce_ib": 10.917935371398926, + "ce_orig": 1.3695002794265747, + "epoch": 0.2769429865554677, + "kl_loss": 0.310863733291626, + "loss_ib": 0.014026571996510029, + "step": 963 + }, + { + "ce_ib": 5.352876663208008, + "ce_orig": 0.9681556224822998, + "epoch": 0.2769429865554677, + "kl_loss": 0.24146953225135803, + "loss_ib": 0.007767572067677975, + "step": 963 + }, + { + "ce_ib": 6.516645908355713, + "ce_orig": 0.6153663396835327, + "epoch": 0.2772305701344453, + "kl_loss": 0.2555677592754364, + "loss_ib": 0.009072324261069298, + "step": 964 + }, + { + "ce_ib": 5.9202094078063965, + "ce_orig": 0.562997043132782, + "epoch": 0.2772305701344453, + "kl_loss": 0.32697319984436035, + "loss_ib": 0.009189940989017487, + "step": 964 + }, + { + "ce_ib": 11.461648941040039, + "ce_orig": 1.4160990715026855, + "epoch": 0.2772305701344453, + "kl_loss": 0.20565414428710938, + "loss_ib": 0.013518190011382103, + "step": 964 + }, + { + "ce_ib": 4.950023174285889, + "ce_orig": 0.5089555978775024, + "epoch": 0.2772305701344453, + "kl_loss": 0.2313445806503296, + "loss_ib": 0.00726346904411912, + "step": 964 + }, + { + "epoch": 0.277518153713423, + "grad_norm": 0.10406413674354553, + "learning_rate": 9.900104323121344e-06, + "loss": 0.8932, + "step": 965 + }, + { + "ce_ib": 3.8523833751678467, + "ce_orig": 0.6625568866729736, + "epoch": 0.277518153713423, + "kl_loss": 0.206961527466774, + "loss_ib": 0.005921998526901007, + "step": 965 + }, + { + "ce_ib": 5.566319942474365, + "ce_orig": 0.4371108114719391, + "epoch": 0.277518153713423, + "kl_loss": 0.3670559823513031, + "loss_ib": 0.009236878715455532, + "step": 965 + }, + { + "ce_ib": 6.957183837890625, + "ce_orig": 0.5599235892295837, + "epoch": 0.277518153713423, + "kl_loss": 0.23749658465385437, + "loss_ib": 0.009332150220870972, + "step": 965 + }, + { + "ce_ib": 10.045352935791016, + "ce_orig": 1.2828036546707153, + "epoch": 0.277518153713423, + "kl_loss": 0.2769809663295746, + "loss_ib": 0.012815162539482117, + "step": 965 + }, + { + "ce_ib": 6.997009754180908, + "ce_orig": 0.8958306312561035, + "epoch": 0.2778057372924006, + "kl_loss": 0.26039648056030273, + "loss_ib": 0.00960097461938858, + "step": 966 + }, + { + "ce_ib": 9.824999809265137, + "ce_orig": 1.1781514883041382, + "epoch": 0.2778057372924006, + "kl_loss": 0.32364603877067566, + "loss_ib": 0.013061460107564926, + "step": 966 + }, + { + "ce_ib": 10.24802017211914, + "ce_orig": 1.2914663553237915, + "epoch": 0.2778057372924006, + "kl_loss": 0.3219491243362427, + "loss_ib": 0.013467512093484402, + "step": 966 + }, + { + "ce_ib": 5.80756950378418, + "ce_orig": 0.7262393236160278, + "epoch": 0.2778057372924006, + "kl_loss": 0.25441619753837585, + "loss_ib": 0.008351731114089489, + "step": 966 + }, + { + "ce_ib": 10.359816551208496, + "ce_orig": 1.53457510471344, + "epoch": 0.2780933208713782, + "kl_loss": 0.28124716877937317, + "loss_ib": 0.013172287493944168, + "step": 967 + }, + { + "ce_ib": 3.750755548477173, + "ce_orig": 0.25399720668792725, + "epoch": 0.2780933208713782, + "kl_loss": 0.5752956867218018, + "loss_ib": 0.009503712877631187, + "step": 967 + }, + { + "ce_ib": 8.690093994140625, + "ce_orig": 0.9079038500785828, + "epoch": 0.2780933208713782, + "kl_loss": 0.4378716051578522, + "loss_ib": 0.013068810105323792, + "step": 967 + }, + { + "ce_ib": 5.155570030212402, + "ce_orig": 0.4399418234825134, + "epoch": 0.2780933208713782, + "kl_loss": 0.19257600605487823, + "loss_ib": 0.007081329822540283, + "step": 967 + }, + { + "ce_ib": 7.317140102386475, + "ce_orig": 0.9123186469078064, + "epoch": 0.2783809044503559, + "kl_loss": 0.2773372530937195, + "loss_ib": 0.010090513154864311, + "step": 968 + }, + { + "ce_ib": 9.807036399841309, + "ce_orig": 1.104854702949524, + "epoch": 0.2783809044503559, + "kl_loss": 0.31141215562820435, + "loss_ib": 0.012921158224344254, + "step": 968 + }, + { + "ce_ib": 2.5050039291381836, + "ce_orig": 0.18219490349292755, + "epoch": 0.2783809044503559, + "kl_loss": 0.5909286141395569, + "loss_ib": 0.00841428991407156, + "step": 968 + }, + { + "ce_ib": 5.669373512268066, + "ce_orig": 0.5021364092826843, + "epoch": 0.2783809044503559, + "kl_loss": 0.3175276219844818, + "loss_ib": 0.0088446494191885, + "step": 968 + }, + { + "ce_ib": 6.658907890319824, + "ce_orig": 0.8224705457687378, + "epoch": 0.2786684880293335, + "kl_loss": 0.3462896943092346, + "loss_ib": 0.010121805593371391, + "step": 969 + }, + { + "ce_ib": 7.009324073791504, + "ce_orig": 1.261172890663147, + "epoch": 0.2786684880293335, + "kl_loss": 0.2837193012237549, + "loss_ib": 0.009846516884863377, + "step": 969 + }, + { + "ce_ib": 5.1103196144104, + "ce_orig": 0.6225570440292358, + "epoch": 0.2786684880293335, + "kl_loss": 0.25508421659469604, + "loss_ib": 0.007661161944270134, + "step": 969 + }, + { + "ce_ib": 7.586276531219482, + "ce_orig": 0.6994422078132629, + "epoch": 0.2786684880293335, + "kl_loss": 0.3505334258079529, + "loss_ib": 0.011091611348092556, + "step": 969 + }, + { + "epoch": 0.27895607160831115, + "grad_norm": 0.10747111588716507, + "learning_rate": 9.89855482871982e-06, + "loss": 0.8618, + "step": 970 + }, + { + "ce_ib": 8.283812522888184, + "ce_orig": 1.250517725944519, + "epoch": 0.27895607160831115, + "kl_loss": 0.2938983738422394, + "loss_ib": 0.011222796514630318, + "step": 970 + }, + { + "ce_ib": 7.375507354736328, + "ce_orig": 1.0513449907302856, + "epoch": 0.27895607160831115, + "kl_loss": 0.260643869638443, + "loss_ib": 0.009981945157051086, + "step": 970 + }, + { + "ce_ib": 8.069171905517578, + "ce_orig": 0.7485910654067993, + "epoch": 0.27895607160831115, + "kl_loss": 0.39277946949005127, + "loss_ib": 0.011996966786682606, + "step": 970 + }, + { + "ce_ib": 9.535097122192383, + "ce_orig": 0.617765486240387, + "epoch": 0.27895607160831115, + "kl_loss": 0.24510113894939423, + "loss_ib": 0.011986108496785164, + "step": 970 + }, + { + "ce_ib": 7.473255634307861, + "ce_orig": 0.7423644661903381, + "epoch": 0.27924365518728883, + "kl_loss": 0.27745312452316284, + "loss_ib": 0.010247787460684776, + "step": 971 + }, + { + "ce_ib": 8.25799560546875, + "ce_orig": 0.686055600643158, + "epoch": 0.27924365518728883, + "kl_loss": 0.21401247382164001, + "loss_ib": 0.010398120619356632, + "step": 971 + }, + { + "ce_ib": 7.072482109069824, + "ce_orig": 0.48852288722991943, + "epoch": 0.27924365518728883, + "kl_loss": 0.25071245431900024, + "loss_ib": 0.009579606354236603, + "step": 971 + }, + { + "ce_ib": 6.167069911956787, + "ce_orig": 0.6813110113143921, + "epoch": 0.27924365518728883, + "kl_loss": 0.25662925839424133, + "loss_ib": 0.008733362890779972, + "step": 971 + }, + { + "ce_ib": 10.110807418823242, + "ce_orig": 1.0463491678237915, + "epoch": 0.27953123876626645, + "kl_loss": 0.3023725748062134, + "loss_ib": 0.013134532608091831, + "step": 972 + }, + { + "ce_ib": 8.601117134094238, + "ce_orig": 1.0095446109771729, + "epoch": 0.27953123876626645, + "kl_loss": 0.2603193521499634, + "loss_ib": 0.011204310692846775, + "step": 972 + }, + { + "ce_ib": 8.582576751708984, + "ce_orig": 1.1173807382583618, + "epoch": 0.27953123876626645, + "kl_loss": 0.45898622274398804, + "loss_ib": 0.01317243929952383, + "step": 972 + }, + { + "ce_ib": 8.407683372497559, + "ce_orig": 0.682980477809906, + "epoch": 0.27953123876626645, + "kl_loss": 0.23422002792358398, + "loss_ib": 0.010749883018434048, + "step": 972 + }, + { + "ce_ib": 7.096373081207275, + "ce_orig": 0.8979065418243408, + "epoch": 0.2798188223452441, + "kl_loss": 0.36893928050994873, + "loss_ib": 0.010785765014588833, + "step": 973 + }, + { + "ce_ib": 6.754623889923096, + "ce_orig": 0.6018571853637695, + "epoch": 0.2798188223452441, + "kl_loss": 0.20240068435668945, + "loss_ib": 0.008778630755841732, + "step": 973 + }, + { + "ce_ib": 6.34320592880249, + "ce_orig": 0.5469645857810974, + "epoch": 0.2798188223452441, + "kl_loss": 0.3521893322467804, + "loss_ib": 0.009865098632872105, + "step": 973 + }, + { + "ce_ib": 8.217493057250977, + "ce_orig": 1.0419894456863403, + "epoch": 0.2798188223452441, + "kl_loss": 0.36468231678009033, + "loss_ib": 0.011864316649734974, + "step": 973 + }, + { + "ce_ib": 6.634602069854736, + "ce_orig": 0.6549820303916931, + "epoch": 0.2801064059242217, + "kl_loss": 0.3038894534111023, + "loss_ib": 0.009673496708273888, + "step": 974 + }, + { + "ce_ib": 9.341080665588379, + "ce_orig": 0.607479989528656, + "epoch": 0.2801064059242217, + "kl_loss": 0.26178374886512756, + "loss_ib": 0.011958918534219265, + "step": 974 + }, + { + "ce_ib": 9.761402130126953, + "ce_orig": 0.7530984878540039, + "epoch": 0.2801064059242217, + "kl_loss": 0.3960914611816406, + "loss_ib": 0.013722317293286324, + "step": 974 + }, + { + "ce_ib": 7.921651840209961, + "ce_orig": 0.7006543874740601, + "epoch": 0.2801064059242217, + "kl_loss": 0.2808942198753357, + "loss_ib": 0.010730594396591187, + "step": 974 + }, + { + "epoch": 0.2803939895031994, + "grad_norm": 0.09944093227386475, + "learning_rate": 9.896993532562736e-06, + "loss": 0.8261, + "step": 975 + }, + { + "ce_ib": 7.843717575073242, + "ce_orig": 0.8895473480224609, + "epoch": 0.2803939895031994, + "kl_loss": 0.35273388028144836, + "loss_ib": 0.01137105654925108, + "step": 975 + }, + { + "ce_ib": 9.197118759155273, + "ce_orig": 0.8774324059486389, + "epoch": 0.2803939895031994, + "kl_loss": 0.3209964632987976, + "loss_ib": 0.012407083064317703, + "step": 975 + }, + { + "ce_ib": 4.643885612487793, + "ce_orig": 0.6738633513450623, + "epoch": 0.2803939895031994, + "kl_loss": 0.23056040704250336, + "loss_ib": 0.0069494894705712795, + "step": 975 + }, + { + "ce_ib": 7.597918510437012, + "ce_orig": 0.7519400119781494, + "epoch": 0.2803939895031994, + "kl_loss": 0.3231240212917328, + "loss_ib": 0.010829159058630466, + "step": 975 + }, + { + "ce_ib": 5.716797351837158, + "ce_orig": 0.4619486927986145, + "epoch": 0.280681573082177, + "kl_loss": 0.2213851362466812, + "loss_ib": 0.007930648513138294, + "step": 976 + }, + { + "ce_ib": 4.659027099609375, + "ce_orig": 0.46190229058265686, + "epoch": 0.280681573082177, + "kl_loss": 0.21290841698646545, + "loss_ib": 0.006788111291825771, + "step": 976 + }, + { + "ce_ib": 8.67602252960205, + "ce_orig": 0.7991732358932495, + "epoch": 0.280681573082177, + "kl_loss": 0.4040037989616394, + "loss_ib": 0.012716060504317284, + "step": 976 + }, + { + "ce_ib": 11.049056053161621, + "ce_orig": 1.187608242034912, + "epoch": 0.280681573082177, + "kl_loss": 0.22732923924922943, + "loss_ib": 0.013322348706424236, + "step": 976 + }, + { + "ce_ib": 6.996013641357422, + "ce_orig": 0.3616046607494354, + "epoch": 0.28096915666115463, + "kl_loss": 0.5100580453872681, + "loss_ib": 0.012096593156456947, + "step": 977 + }, + { + "ce_ib": 5.4247727394104, + "ce_orig": 0.7208405137062073, + "epoch": 0.28096915666115463, + "kl_loss": 0.24420976638793945, + "loss_ib": 0.007866870611906052, + "step": 977 + }, + { + "ce_ib": 5.2020392417907715, + "ce_orig": 0.6198222041130066, + "epoch": 0.28096915666115463, + "kl_loss": 0.3132549524307251, + "loss_ib": 0.008334589190781116, + "step": 977 + }, + { + "ce_ib": 8.632140159606934, + "ce_orig": 1.0381910800933838, + "epoch": 0.28096915666115463, + "kl_loss": 0.25286003947257996, + "loss_ib": 0.011160740628838539, + "step": 977 + }, + { + "ce_ib": 6.451074600219727, + "ce_orig": 0.6350893378257751, + "epoch": 0.2812567402401323, + "kl_loss": 0.2584133744239807, + "loss_ib": 0.009035208262503147, + "step": 978 + }, + { + "ce_ib": 9.116836547851562, + "ce_orig": 1.3513867855072021, + "epoch": 0.2812567402401323, + "kl_loss": 0.24926617741584778, + "loss_ib": 0.011609498411417007, + "step": 978 + }, + { + "ce_ib": 9.134062767028809, + "ce_orig": 1.004563570022583, + "epoch": 0.2812567402401323, + "kl_loss": 0.49993661046028137, + "loss_ib": 0.014133429154753685, + "step": 978 + }, + { + "ce_ib": 12.412116050720215, + "ce_orig": 1.79780113697052, + "epoch": 0.2812567402401323, + "kl_loss": 0.410219669342041, + "loss_ib": 0.016514312475919724, + "step": 978 + }, + { + "ce_ib": 7.063502311706543, + "ce_orig": 0.6859074234962463, + "epoch": 0.28154432381910993, + "kl_loss": 0.3865056037902832, + "loss_ib": 0.010928559117019176, + "step": 979 + }, + { + "ce_ib": 6.0811543464660645, + "ce_orig": 0.7244781255722046, + "epoch": 0.28154432381910993, + "kl_loss": 0.23856481909751892, + "loss_ib": 0.008466802537441254, + "step": 979 + }, + { + "ce_ib": 10.540010452270508, + "ce_orig": 1.1688413619995117, + "epoch": 0.28154432381910993, + "kl_loss": 0.35274291038513184, + "loss_ib": 0.01406743936240673, + "step": 979 + }, + { + "ce_ib": 5.423574447631836, + "ce_orig": 0.8129374980926514, + "epoch": 0.28154432381910993, + "kl_loss": 0.31833183765411377, + "loss_ib": 0.008606893010437489, + "step": 979 + }, + { + "epoch": 0.28183190739808756, + "grad_norm": 0.09410839527845383, + "learning_rate": 9.895420438411616e-06, + "loss": 0.8391, + "step": 980 + }, + { + "ce_ib": 4.933524131774902, + "ce_orig": 0.5870782732963562, + "epoch": 0.28183190739808756, + "kl_loss": 0.24300794303417206, + "loss_ib": 0.007363603450357914, + "step": 980 + }, + { + "ce_ib": 8.602434158325195, + "ce_orig": 0.7935175895690918, + "epoch": 0.28183190739808756, + "kl_loss": 0.28050893545150757, + "loss_ib": 0.011407522484660149, + "step": 980 + }, + { + "ce_ib": 5.863503456115723, + "ce_orig": 0.7190868258476257, + "epoch": 0.28183190739808756, + "kl_loss": 0.2712858319282532, + "loss_ib": 0.008576362393796444, + "step": 980 + }, + { + "ce_ib": 6.571584224700928, + "ce_orig": 0.8708552718162537, + "epoch": 0.28183190739808756, + "kl_loss": 0.20640678703784943, + "loss_ib": 0.008635652251541615, + "step": 980 + }, + { + "ce_ib": 7.416860580444336, + "ce_orig": 0.6324207186698914, + "epoch": 0.28211949097706523, + "kl_loss": 0.23176950216293335, + "loss_ib": 0.009734555147588253, + "step": 981 + }, + { + "ce_ib": 5.944385051727295, + "ce_orig": 0.7745913863182068, + "epoch": 0.28211949097706523, + "kl_loss": 0.25892847776412964, + "loss_ib": 0.008533669635653496, + "step": 981 + }, + { + "ce_ib": 5.516068458557129, + "ce_orig": 0.46634528040885925, + "epoch": 0.28211949097706523, + "kl_loss": 0.33703067898750305, + "loss_ib": 0.008886375464498997, + "step": 981 + }, + { + "ce_ib": 9.37726879119873, + "ce_orig": 0.8699996471405029, + "epoch": 0.28211949097706523, + "kl_loss": 0.333581805229187, + "loss_ib": 0.012713085860013962, + "step": 981 + }, + { + "ce_ib": 7.538808822631836, + "ce_orig": 0.9253389835357666, + "epoch": 0.28240707455604286, + "kl_loss": 0.3331226408481598, + "loss_ib": 0.010870035737752914, + "step": 982 + }, + { + "ce_ib": 8.707592010498047, + "ce_orig": 1.1699987649917603, + "epoch": 0.28240707455604286, + "kl_loss": 0.3539894223213196, + "loss_ib": 0.012247486039996147, + "step": 982 + }, + { + "ce_ib": 4.077365875244141, + "ce_orig": 0.5715107917785645, + "epoch": 0.28240707455604286, + "kl_loss": 0.33435067534446716, + "loss_ib": 0.007420872338116169, + "step": 982 + }, + { + "ce_ib": 10.470292091369629, + "ce_orig": 1.234960675239563, + "epoch": 0.28240707455604286, + "kl_loss": 0.3328559994697571, + "loss_ib": 0.013798851519823074, + "step": 982 + }, + { + "ce_ib": 12.779997825622559, + "ce_orig": 1.8746068477630615, + "epoch": 0.2826946581350205, + "kl_loss": 0.3426949381828308, + "loss_ib": 0.016206946223974228, + "step": 983 + }, + { + "ce_ib": 6.349981307983398, + "ce_orig": 0.5666931867599487, + "epoch": 0.2826946581350205, + "kl_loss": 0.3899381756782532, + "loss_ib": 0.010249363258481026, + "step": 983 + }, + { + "ce_ib": 5.248485088348389, + "ce_orig": 0.6375545859336853, + "epoch": 0.2826946581350205, + "kl_loss": 0.2220623940229416, + "loss_ib": 0.007469109259545803, + "step": 983 + }, + { + "ce_ib": 8.392010688781738, + "ce_orig": 1.1137949228286743, + "epoch": 0.2826946581350205, + "kl_loss": 0.24818137288093567, + "loss_ib": 0.01087382435798645, + "step": 983 + }, + { + "ce_ib": 7.158164024353027, + "ce_orig": 0.5367669463157654, + "epoch": 0.2829822417139981, + "kl_loss": 0.27173370122909546, + "loss_ib": 0.00987550150603056, + "step": 984 + }, + { + "ce_ib": 9.511956214904785, + "ce_orig": 1.0864530801773071, + "epoch": 0.2829822417139981, + "kl_loss": 0.2743532657623291, + "loss_ib": 0.012255489826202393, + "step": 984 + }, + { + "ce_ib": 4.440701484680176, + "ce_orig": 0.66915363073349, + "epoch": 0.2829822417139981, + "kl_loss": 0.2580929696559906, + "loss_ib": 0.007021630648523569, + "step": 984 + }, + { + "ce_ib": 7.797125339508057, + "ce_orig": 0.9739370346069336, + "epoch": 0.2829822417139981, + "kl_loss": 0.24688200652599335, + "loss_ib": 0.01026594452559948, + "step": 984 + }, + { + "epoch": 0.2832698252929758, + "grad_norm": 0.09766387939453125, + "learning_rate": 9.893835550056407e-06, + "loss": 0.8618, + "step": 985 + }, + { + "ce_ib": 7.013763904571533, + "ce_orig": 0.8495591282844543, + "epoch": 0.2832698252929758, + "kl_loss": 0.23701989650726318, + "loss_ib": 0.009383962489664555, + "step": 985 + }, + { + "ce_ib": 5.373435020446777, + "ce_orig": 0.6739266514778137, + "epoch": 0.2832698252929758, + "kl_loss": 0.2958618402481079, + "loss_ib": 0.008332053199410439, + "step": 985 + }, + { + "ce_ib": 8.126791000366211, + "ce_orig": 0.8309746384620667, + "epoch": 0.2832698252929758, + "kl_loss": 0.38277897238731384, + "loss_ib": 0.011954580433666706, + "step": 985 + }, + { + "ce_ib": 8.946346282958984, + "ce_orig": 1.2075648307800293, + "epoch": 0.2832698252929758, + "kl_loss": 0.3251242935657501, + "loss_ib": 0.012197589501738548, + "step": 985 + }, + { + "ce_ib": 9.519036293029785, + "ce_orig": 0.8708526492118835, + "epoch": 0.2835574088719534, + "kl_loss": 0.31902384757995605, + "loss_ib": 0.01270927395671606, + "step": 986 + }, + { + "ce_ib": 11.005833625793457, + "ce_orig": 1.3966689109802246, + "epoch": 0.2835574088719534, + "kl_loss": 0.39275312423706055, + "loss_ib": 0.014933365397155285, + "step": 986 + }, + { + "ce_ib": 8.268836975097656, + "ce_orig": 1.026828408241272, + "epoch": 0.2835574088719534, + "kl_loss": 0.25195300579071045, + "loss_ib": 0.010788366198539734, + "step": 986 + }, + { + "ce_ib": 10.45020580291748, + "ce_orig": 1.0152733325958252, + "epoch": 0.2835574088719534, + "kl_loss": 0.2754089832305908, + "loss_ib": 0.013204295188188553, + "step": 986 + }, + { + "ce_ib": 8.130716323852539, + "ce_orig": 0.5325186848640442, + "epoch": 0.28384499245093103, + "kl_loss": 0.4100872576236725, + "loss_ib": 0.012231589294970036, + "step": 987 + }, + { + "ce_ib": 6.4655256271362305, + "ce_orig": 0.6601911783218384, + "epoch": 0.28384499245093103, + "kl_loss": 0.2558635473251343, + "loss_ib": 0.009024160914123058, + "step": 987 + }, + { + "ce_ib": 6.571667194366455, + "ce_orig": 0.621316134929657, + "epoch": 0.28384499245093103, + "kl_loss": 0.23040370643138885, + "loss_ib": 0.008875704370439053, + "step": 987 + }, + { + "ce_ib": 6.913454532623291, + "ce_orig": 0.48008617758750916, + "epoch": 0.28384499245093103, + "kl_loss": 0.4269219636917114, + "loss_ib": 0.011182674206793308, + "step": 987 + }, + { + "ce_ib": 6.073386192321777, + "ce_orig": 0.7689921855926514, + "epoch": 0.2841325760299087, + "kl_loss": 0.33685237169265747, + "loss_ib": 0.00944190938025713, + "step": 988 + }, + { + "ce_ib": 5.702093601226807, + "ce_orig": 0.7048614025115967, + "epoch": 0.2841325760299087, + "kl_loss": 0.2855239510536194, + "loss_ib": 0.0085573336109519, + "step": 988 + }, + { + "ce_ib": 6.3464884757995605, + "ce_orig": 0.6249024868011475, + "epoch": 0.2841325760299087, + "kl_loss": 0.37955427169799805, + "loss_ib": 0.010142030194401741, + "step": 988 + }, + { + "ce_ib": 7.4441657066345215, + "ce_orig": 0.9428173303604126, + "epoch": 0.2841325760299087, + "kl_loss": 0.41461101174354553, + "loss_ib": 0.011590275913476944, + "step": 988 + }, + { + "ce_ib": 8.65250015258789, + "ce_orig": 1.1751656532287598, + "epoch": 0.28442015960888634, + "kl_loss": 0.34314173460006714, + "loss_ib": 0.01208391785621643, + "step": 989 + }, + { + "ce_ib": 10.522750854492188, + "ce_orig": 1.5159534215927124, + "epoch": 0.28442015960888634, + "kl_loss": 0.23152892291545868, + "loss_ib": 0.012838039547204971, + "step": 989 + }, + { + "ce_ib": 9.248775482177734, + "ce_orig": 1.2440454959869385, + "epoch": 0.28442015960888634, + "kl_loss": 0.3038747012615204, + "loss_ib": 0.012287522666156292, + "step": 989 + }, + { + "ce_ib": 9.64876651763916, + "ce_orig": 1.3754632472991943, + "epoch": 0.28442015960888634, + "kl_loss": 0.2952038645744324, + "loss_ib": 0.01260080561041832, + "step": 989 + }, + { + "epoch": 0.28470774318786396, + "grad_norm": 0.11360838264226913, + "learning_rate": 9.892238871315477e-06, + "loss": 0.9178, + "step": 990 + }, + { + "ce_ib": 6.687885761260986, + "ce_orig": 0.7147387862205505, + "epoch": 0.28470774318786396, + "kl_loss": 0.2835395932197571, + "loss_ib": 0.009523281827569008, + "step": 990 + }, + { + "ce_ib": 7.797086715698242, + "ce_orig": 0.8477333784103394, + "epoch": 0.28470774318786396, + "kl_loss": 0.2771601676940918, + "loss_ib": 0.010568687692284584, + "step": 990 + }, + { + "ce_ib": 7.964920520782471, + "ce_orig": 1.1867727041244507, + "epoch": 0.28470774318786396, + "kl_loss": 0.21424566209316254, + "loss_ib": 0.010107376612722874, + "step": 990 + }, + { + "ce_ib": 7.639345169067383, + "ce_orig": 0.9419063329696655, + "epoch": 0.28470774318786396, + "kl_loss": 0.3332253694534302, + "loss_ib": 0.010971598327159882, + "step": 990 + }, + { + "ce_ib": 9.816707611083984, + "ce_orig": 0.7999016046524048, + "epoch": 0.28499532676684164, + "kl_loss": 0.33083677291870117, + "loss_ib": 0.013125075958669186, + "step": 991 + }, + { + "ce_ib": 10.84311580657959, + "ce_orig": 1.1912862062454224, + "epoch": 0.28499532676684164, + "kl_loss": 0.28931328654289246, + "loss_ib": 0.013736248947679996, + "step": 991 + }, + { + "ce_ib": 7.672483921051025, + "ce_orig": 0.9036149978637695, + "epoch": 0.28499532676684164, + "kl_loss": 0.3275108337402344, + "loss_ib": 0.010947591625154018, + "step": 991 + }, + { + "ce_ib": 7.526676177978516, + "ce_orig": 0.8651123046875, + "epoch": 0.28499532676684164, + "kl_loss": 0.2458728551864624, + "loss_ib": 0.009985404089093208, + "step": 991 + }, + { + "ce_ib": 8.313544273376465, + "ce_orig": 1.2926881313323975, + "epoch": 0.28528291034581926, + "kl_loss": 0.32590439915657043, + "loss_ib": 0.011572588235139847, + "step": 992 + }, + { + "ce_ib": 9.292975425720215, + "ce_orig": 1.3791533708572388, + "epoch": 0.28528291034581926, + "kl_loss": 0.2774926722049713, + "loss_ib": 0.012067901901900768, + "step": 992 + }, + { + "ce_ib": 6.573873996734619, + "ce_orig": 0.4999292194843292, + "epoch": 0.28528291034581926, + "kl_loss": 0.25200730562210083, + "loss_ib": 0.009093946777284145, + "step": 992 + }, + { + "ce_ib": 6.635220050811768, + "ce_orig": 0.6851158142089844, + "epoch": 0.28528291034581926, + "kl_loss": 0.2963348627090454, + "loss_ib": 0.009598568081855774, + "step": 992 + }, + { + "ce_ib": 7.602895736694336, + "ce_orig": 0.9763492941856384, + "epoch": 0.2855704939247969, + "kl_loss": 0.24165207147598267, + "loss_ib": 0.01001941692084074, + "step": 993 + }, + { + "ce_ib": 4.053561210632324, + "ce_orig": 0.7590509057044983, + "epoch": 0.2855704939247969, + "kl_loss": 0.22461174428462982, + "loss_ib": 0.006299678701907396, + "step": 993 + }, + { + "ce_ib": 11.96536922454834, + "ce_orig": 1.5654411315917969, + "epoch": 0.2855704939247969, + "kl_loss": 0.34373822808265686, + "loss_ib": 0.015402751043438911, + "step": 993 + }, + { + "ce_ib": 5.362922668457031, + "ce_orig": 0.8094271421432495, + "epoch": 0.2855704939247969, + "kl_loss": 0.280230849981308, + "loss_ib": 0.008165230974555016, + "step": 993 + }, + { + "ce_ib": 5.077024936676025, + "ce_orig": 0.6099755167961121, + "epoch": 0.2858580775037745, + "kl_loss": 0.2580031752586365, + "loss_ib": 0.007657056674361229, + "step": 994 + }, + { + "ce_ib": 7.4664812088012695, + "ce_orig": 0.9805220365524292, + "epoch": 0.2858580775037745, + "kl_loss": 0.2251621037721634, + "loss_ib": 0.009718102402985096, + "step": 994 + }, + { + "ce_ib": 7.463787078857422, + "ce_orig": 0.7944656014442444, + "epoch": 0.2858580775037745, + "kl_loss": 0.20221929252147675, + "loss_ib": 0.009485980495810509, + "step": 994 + }, + { + "ce_ib": 9.706088066101074, + "ce_orig": 1.0263926982879639, + "epoch": 0.2858580775037745, + "kl_loss": 0.32880985736846924, + "loss_ib": 0.012994186952710152, + "step": 994 + }, + { + "epoch": 0.2861456610827522, + "grad_norm": 0.10201858729124069, + "learning_rate": 9.89063040603559e-06, + "loss": 0.9227, + "step": 995 + }, + { + "ce_ib": 9.292113304138184, + "ce_orig": 1.1094127893447876, + "epoch": 0.2861456610827522, + "kl_loss": 0.3149503469467163, + "loss_ib": 0.01244161557406187, + "step": 995 + }, + { + "ce_ib": 11.187474250793457, + "ce_orig": 0.847707986831665, + "epoch": 0.2861456610827522, + "kl_loss": 0.28196096420288086, + "loss_ib": 0.0140070840716362, + "step": 995 + }, + { + "ce_ib": 5.9683427810668945, + "ce_orig": 0.7678855657577515, + "epoch": 0.2861456610827522, + "kl_loss": 0.24694621562957764, + "loss_ib": 0.008437804877758026, + "step": 995 + }, + { + "ce_ib": 4.204360008239746, + "ce_orig": 0.5550284385681152, + "epoch": 0.2861456610827522, + "kl_loss": 0.33598631620407104, + "loss_ib": 0.007564222440123558, + "step": 995 + }, + { + "ce_ib": 4.783699035644531, + "ce_orig": 0.5354413390159607, + "epoch": 0.2864332446617298, + "kl_loss": 0.3188742995262146, + "loss_ib": 0.007972441613674164, + "step": 996 + }, + { + "ce_ib": 7.208334445953369, + "ce_orig": 0.8125985860824585, + "epoch": 0.2864332446617298, + "kl_loss": 0.2528786063194275, + "loss_ib": 0.009737120941281319, + "step": 996 + }, + { + "ce_ib": 8.211366653442383, + "ce_orig": 0.8897217512130737, + "epoch": 0.2864332446617298, + "kl_loss": 0.251012921333313, + "loss_ib": 0.010721495375037193, + "step": 996 + }, + { + "ce_ib": 7.538112163543701, + "ce_orig": 0.8684660196304321, + "epoch": 0.2864332446617298, + "kl_loss": 0.24459782242774963, + "loss_ib": 0.009984089992940426, + "step": 996 + }, + { + "ce_ib": 5.27274751663208, + "ce_orig": 0.6079943776130676, + "epoch": 0.28672082824070744, + "kl_loss": 0.36212387681007385, + "loss_ib": 0.008893987163901329, + "step": 997 + }, + { + "ce_ib": 3.044394016265869, + "ce_orig": 0.46399974822998047, + "epoch": 0.28672082824070744, + "kl_loss": 0.26355600357055664, + "loss_ib": 0.005679954309016466, + "step": 997 + }, + { + "ce_ib": 6.704494476318359, + "ce_orig": 0.8144213557243347, + "epoch": 0.28672082824070744, + "kl_loss": 0.23444503545761108, + "loss_ib": 0.00904894433915615, + "step": 997 + }, + { + "ce_ib": 8.70203971862793, + "ce_orig": 1.2462023496627808, + "epoch": 0.28672082824070744, + "kl_loss": 0.21362106502056122, + "loss_ib": 0.010838249698281288, + "step": 997 + }, + { + "ce_ib": 6.0318922996521, + "ce_orig": 0.5330185294151306, + "epoch": 0.2870084118196851, + "kl_loss": 0.3407291769981384, + "loss_ib": 0.00943918339908123, + "step": 998 + }, + { + "ce_ib": 8.035571098327637, + "ce_orig": 1.056720495223999, + "epoch": 0.2870084118196851, + "kl_loss": 0.23358367383480072, + "loss_ib": 0.010371407493948936, + "step": 998 + }, + { + "ce_ib": 8.166302680969238, + "ce_orig": 0.6143516898155212, + "epoch": 0.2870084118196851, + "kl_loss": 0.29871490597724915, + "loss_ib": 0.011153452098369598, + "step": 998 + }, + { + "ce_ib": 8.71125602722168, + "ce_orig": 1.1870672702789307, + "epoch": 0.2870084118196851, + "kl_loss": 0.2737518548965454, + "loss_ib": 0.011448774486780167, + "step": 998 + }, + { + "ce_ib": 5.139594078063965, + "ce_orig": 0.5000967383384705, + "epoch": 0.28729599539866274, + "kl_loss": 0.5653914213180542, + "loss_ib": 0.010793508030474186, + "step": 999 + }, + { + "ce_ib": 6.412952899932861, + "ce_orig": 0.9104073643684387, + "epoch": 0.28729599539866274, + "kl_loss": 0.28137922286987305, + "loss_ib": 0.009226744994521141, + "step": 999 + }, + { + "ce_ib": 9.592132568359375, + "ce_orig": 0.7200940251350403, + "epoch": 0.28729599539866274, + "kl_loss": 0.3655579090118408, + "loss_ib": 0.013247711583971977, + "step": 999 + }, + { + "ce_ib": 8.952437400817871, + "ce_orig": 1.0639101266860962, + "epoch": 0.28729599539866274, + "kl_loss": 0.3098136782646179, + "loss_ib": 0.012050573714077473, + "step": 999 + }, + { + "epoch": 0.28758357897764036, + "grad_norm": 0.10075593739748001, + "learning_rate": 9.889010158091917e-06, + "loss": 0.92, + "step": 1000 + }, + { + "ce_ib": 9.31643009185791, + "ce_orig": 0.6427621245384216, + "epoch": 0.28758357897764036, + "kl_loss": 0.2728811800479889, + "loss_ib": 0.012045240961015224, + "step": 1000 + }, + { + "ce_ib": 8.242283821105957, + "ce_orig": 0.8939123749732971, + "epoch": 0.28758357897764036, + "kl_loss": 0.25834226608276367, + "loss_ib": 0.010825706645846367, + "step": 1000 + }, + { + "ce_ib": 7.34848165512085, + "ce_orig": 0.541661262512207, + "epoch": 0.28758357897764036, + "kl_loss": 0.3545241951942444, + "loss_ib": 0.010893723927438259, + "step": 1000 + }, + { + "ce_ib": 7.228389263153076, + "ce_orig": 0.8468849658966064, + "epoch": 0.28758357897764036, + "kl_loss": 0.33031901717185974, + "loss_ib": 0.01053157914429903, + "step": 1000 + }, + { + "ce_ib": 8.21964168548584, + "ce_orig": 0.6685616970062256, + "epoch": 0.28787116255661804, + "kl_loss": 0.2963103950023651, + "loss_ib": 0.011182744987308979, + "step": 1001 + }, + { + "ce_ib": 7.676856517791748, + "ce_orig": 0.6330392956733704, + "epoch": 0.28787116255661804, + "kl_loss": 0.31577515602111816, + "loss_ib": 0.010834608227014542, + "step": 1001 + }, + { + "ce_ib": 5.161186695098877, + "ce_orig": 0.7620050311088562, + "epoch": 0.28787116255661804, + "kl_loss": 0.2610274851322174, + "loss_ib": 0.0077714622020721436, + "step": 1001 + }, + { + "ce_ib": 6.980307102203369, + "ce_orig": 0.4666039049625397, + "epoch": 0.28787116255661804, + "kl_loss": 0.3128125071525574, + "loss_ib": 0.010108432732522488, + "step": 1001 + }, + { + "ce_ib": 11.080206871032715, + "ce_orig": 1.3631070852279663, + "epoch": 0.28815874613559567, + "kl_loss": 0.5862671732902527, + "loss_ib": 0.01694287732243538, + "step": 1002 + }, + { + "ce_ib": 7.41237211227417, + "ce_orig": 0.8579285740852356, + "epoch": 0.28815874613559567, + "kl_loss": 0.20175296068191528, + "loss_ib": 0.009429901838302612, + "step": 1002 + }, + { + "ce_ib": 9.326122283935547, + "ce_orig": 0.9100500345230103, + "epoch": 0.28815874613559567, + "kl_loss": 0.21318525075912476, + "loss_ib": 0.011457975022494793, + "step": 1002 + }, + { + "ce_ib": 8.27169132232666, + "ce_orig": 0.7772948741912842, + "epoch": 0.28815874613559567, + "kl_loss": 0.35793256759643555, + "loss_ib": 0.011851017363369465, + "step": 1002 + }, + { + "ce_ib": 3.5838937759399414, + "ce_orig": 0.6172391176223755, + "epoch": 0.2884463297145733, + "kl_loss": 0.2099723070859909, + "loss_ib": 0.0056836167350411415, + "step": 1003 + }, + { + "ce_ib": 11.964217185974121, + "ce_orig": 1.454162359237671, + "epoch": 0.2884463297145733, + "kl_loss": 0.26618391275405884, + "loss_ib": 0.014626056887209415, + "step": 1003 + }, + { + "ce_ib": 8.715005874633789, + "ce_orig": 1.0397450923919678, + "epoch": 0.2884463297145733, + "kl_loss": 0.4153057932853699, + "loss_ib": 0.012868063524365425, + "step": 1003 + }, + { + "ce_ib": 5.671633243560791, + "ce_orig": 0.5089535117149353, + "epoch": 0.2884463297145733, + "kl_loss": 0.3270706832408905, + "loss_ib": 0.00894234050065279, + "step": 1003 + }, + { + "ce_ib": 9.240535736083984, + "ce_orig": 0.9992527365684509, + "epoch": 0.2887339132935509, + "kl_loss": 0.3158443570137024, + "loss_ib": 0.012398979626595974, + "step": 1004 + }, + { + "ce_ib": 4.2876434326171875, + "ce_orig": 0.4395374357700348, + "epoch": 0.2887339132935509, + "kl_loss": 0.25293272733688354, + "loss_ib": 0.0068169706501066685, + "step": 1004 + }, + { + "ce_ib": 8.207853317260742, + "ce_orig": 0.8152860403060913, + "epoch": 0.2887339132935509, + "kl_loss": 0.32358676195144653, + "loss_ib": 0.011443721130490303, + "step": 1004 + }, + { + "ce_ib": 3.8691201210021973, + "ce_orig": 0.36630746722221375, + "epoch": 0.2887339132935509, + "kl_loss": 0.5294014811515808, + "loss_ib": 0.00916313473135233, + "step": 1004 + }, + { + "epoch": 0.2890214968725286, + "grad_norm": 0.09347087144851685, + "learning_rate": 9.88737813138801e-06, + "loss": 0.8989, + "step": 1005 + }, + { + "ce_ib": 7.5148749351501465, + "ce_orig": 0.7807815670967102, + "epoch": 0.2890214968725286, + "kl_loss": 0.28932327032089233, + "loss_ib": 0.010408108122646809, + "step": 1005 + }, + { + "ce_ib": 11.744305610656738, + "ce_orig": 1.5088998079299927, + "epoch": 0.2890214968725286, + "kl_loss": 0.3862993121147156, + "loss_ib": 0.015607299283146858, + "step": 1005 + }, + { + "ce_ib": 8.315278053283691, + "ce_orig": 0.6296089291572571, + "epoch": 0.2890214968725286, + "kl_loss": 0.24432632327079773, + "loss_ib": 0.010758541524410248, + "step": 1005 + }, + { + "ce_ib": 5.209807395935059, + "ce_orig": 0.365771621465683, + "epoch": 0.2890214968725286, + "kl_loss": 0.3716930150985718, + "loss_ib": 0.008926738053560257, + "step": 1005 + }, + { + "ce_ib": 7.445919990539551, + "ce_orig": 0.9328464269638062, + "epoch": 0.2893090804515062, + "kl_loss": 0.26125314831733704, + "loss_ib": 0.010058451443910599, + "step": 1006 + }, + { + "ce_ib": 6.672987937927246, + "ce_orig": 0.5431604981422424, + "epoch": 0.2893090804515062, + "kl_loss": 0.6196109652519226, + "loss_ib": 0.012869098223745823, + "step": 1006 + }, + { + "ce_ib": 9.32304573059082, + "ce_orig": 0.8576613664627075, + "epoch": 0.2893090804515062, + "kl_loss": 0.1936599165201187, + "loss_ib": 0.011259645223617554, + "step": 1006 + }, + { + "ce_ib": 8.495000839233398, + "ce_orig": 0.8172574043273926, + "epoch": 0.2893090804515062, + "kl_loss": 0.33307093381881714, + "loss_ib": 0.011825710535049438, + "step": 1006 + }, + { + "ce_ib": 8.885623931884766, + "ce_orig": 1.0538289546966553, + "epoch": 0.28959666403048384, + "kl_loss": 0.2817854881286621, + "loss_ib": 0.01170347910374403, + "step": 1007 + }, + { + "ce_ib": 5.0845417976379395, + "ce_orig": 0.8662396669387817, + "epoch": 0.28959666403048384, + "kl_loss": 0.2701185345649719, + "loss_ib": 0.007785727269947529, + "step": 1007 + }, + { + "ce_ib": 4.460420608520508, + "ce_orig": 0.8098589181900024, + "epoch": 0.28959666403048384, + "kl_loss": 0.2515374422073364, + "loss_ib": 0.006975794676691294, + "step": 1007 + }, + { + "ce_ib": 9.361479759216309, + "ce_orig": 1.1995859146118164, + "epoch": 0.28959666403048384, + "kl_loss": 0.2952824831008911, + "loss_ib": 0.01231430470943451, + "step": 1007 + }, + { + "ce_ib": 8.661140441894531, + "ce_orig": 0.9951620697975159, + "epoch": 0.2898842476094615, + "kl_loss": 0.2747381627559662, + "loss_ib": 0.011408521793782711, + "step": 1008 + }, + { + "ce_ib": 7.686845779418945, + "ce_orig": 0.8603042960166931, + "epoch": 0.2898842476094615, + "kl_loss": 0.2675703763961792, + "loss_ib": 0.010362548753619194, + "step": 1008 + }, + { + "ce_ib": 10.085550308227539, + "ce_orig": 0.561826765537262, + "epoch": 0.2898842476094615, + "kl_loss": 0.30274707078933716, + "loss_ib": 0.013113021850585938, + "step": 1008 + }, + { + "ce_ib": 9.925764083862305, + "ce_orig": 1.4767504930496216, + "epoch": 0.2898842476094615, + "kl_loss": 0.22820648550987244, + "loss_ib": 0.01220782846212387, + "step": 1008 + }, + { + "ce_ib": 8.104393005371094, + "ce_orig": 0.8048654198646545, + "epoch": 0.29017183118843914, + "kl_loss": 0.3005231022834778, + "loss_ib": 0.011109624058008194, + "step": 1009 + }, + { + "ce_ib": 4.768399238586426, + "ce_orig": 0.7496779561042786, + "epoch": 0.29017183118843914, + "kl_loss": 0.23560698330402374, + "loss_ib": 0.0071244691498577595, + "step": 1009 + }, + { + "ce_ib": 7.887166500091553, + "ce_orig": 1.1598186492919922, + "epoch": 0.29017183118843914, + "kl_loss": 0.27508389949798584, + "loss_ib": 0.01063800510019064, + "step": 1009 + }, + { + "ce_ib": 7.2288594245910645, + "ce_orig": 1.204685926437378, + "epoch": 0.29017183118843914, + "kl_loss": 0.24225227534770966, + "loss_ib": 0.009651382453739643, + "step": 1009 + }, + { + "epoch": 0.29045941476741677, + "grad_norm": 0.09235741198062897, + "learning_rate": 9.885734329855798e-06, + "loss": 0.8963, + "step": 1010 + }, + { + "ce_ib": 8.130411148071289, + "ce_orig": 0.9784060120582581, + "epoch": 0.29045941476741677, + "kl_loss": 0.2725956439971924, + "loss_ib": 0.010856368578970432, + "step": 1010 + }, + { + "ce_ib": 6.9893717765808105, + "ce_orig": 0.9504007697105408, + "epoch": 0.29045941476741677, + "kl_loss": 0.2702571153640747, + "loss_ib": 0.009691942483186722, + "step": 1010 + }, + { + "ce_ib": 7.257619857788086, + "ce_orig": 0.9049093127250671, + "epoch": 0.29045941476741677, + "kl_loss": 0.26276618242263794, + "loss_ib": 0.009885281324386597, + "step": 1010 + }, + { + "ce_ib": 2.96409273147583, + "ce_orig": 0.5564637780189514, + "epoch": 0.29045941476741677, + "kl_loss": 0.24044734239578247, + "loss_ib": 0.005368566606193781, + "step": 1010 + }, + { + "ce_ib": 9.04433822631836, + "ce_orig": 1.248004674911499, + "epoch": 0.29074699834639445, + "kl_loss": 0.5226652026176453, + "loss_ib": 0.01427098922431469, + "step": 1011 + }, + { + "ce_ib": 5.734447956085205, + "ce_orig": 0.7180121541023254, + "epoch": 0.29074699834639445, + "kl_loss": 0.27341729402542114, + "loss_ib": 0.008468620479106903, + "step": 1011 + }, + { + "ce_ib": 7.165276050567627, + "ce_orig": 0.6450743675231934, + "epoch": 0.29074699834639445, + "kl_loss": 0.2959662675857544, + "loss_ib": 0.010124938562512398, + "step": 1011 + }, + { + "ce_ib": 8.000144958496094, + "ce_orig": 1.1693824529647827, + "epoch": 0.29074699834639445, + "kl_loss": 0.3289404511451721, + "loss_ib": 0.011289549060165882, + "step": 1011 + }, + { + "ce_ib": 11.336484909057617, + "ce_orig": 1.0948889255523682, + "epoch": 0.29103458192537207, + "kl_loss": 0.2643250524997711, + "loss_ib": 0.013979734852910042, + "step": 1012 + }, + { + "ce_ib": 7.049655914306641, + "ce_orig": 0.7831434011459351, + "epoch": 0.29103458192537207, + "kl_loss": 0.2921431064605713, + "loss_ib": 0.009971086867153645, + "step": 1012 + }, + { + "ce_ib": 6.0634355545043945, + "ce_orig": 0.777411162853241, + "epoch": 0.29103458192537207, + "kl_loss": 0.2579835057258606, + "loss_ib": 0.00864327047020197, + "step": 1012 + }, + { + "ce_ib": 7.513561725616455, + "ce_orig": 1.4329041242599487, + "epoch": 0.29103458192537207, + "kl_loss": 0.32468241453170776, + "loss_ib": 0.010760385543107986, + "step": 1012 + }, + { + "ce_ib": 7.622284412384033, + "ce_orig": 0.6885894536972046, + "epoch": 0.2913221655043497, + "kl_loss": 0.30582404136657715, + "loss_ib": 0.01068052463233471, + "step": 1013 + }, + { + "ce_ib": 10.078372955322266, + "ce_orig": 0.8869221806526184, + "epoch": 0.2913221655043497, + "kl_loss": 0.25840914249420166, + "loss_ib": 0.012662463821470737, + "step": 1013 + }, + { + "ce_ib": 6.638105392456055, + "ce_orig": 1.0796653032302856, + "epoch": 0.2913221655043497, + "kl_loss": 0.26853376626968384, + "loss_ib": 0.009323443286120892, + "step": 1013 + }, + { + "ce_ib": 8.890301704406738, + "ce_orig": 1.291822910308838, + "epoch": 0.2913221655043497, + "kl_loss": 0.2958293557167053, + "loss_ib": 0.01184859499335289, + "step": 1013 + }, + { + "ce_ib": 8.304509162902832, + "ce_orig": 1.1144568920135498, + "epoch": 0.2916097490833273, + "kl_loss": 0.33458924293518066, + "loss_ib": 0.01165040209889412, + "step": 1014 + }, + { + "ce_ib": 5.494061470031738, + "ce_orig": 0.8900278210639954, + "epoch": 0.2916097490833273, + "kl_loss": 0.252785861492157, + "loss_ib": 0.00802191998809576, + "step": 1014 + }, + { + "ce_ib": 6.3537516593933105, + "ce_orig": 0.8443594574928284, + "epoch": 0.2916097490833273, + "kl_loss": 0.2535431385040283, + "loss_ib": 0.008889183402061462, + "step": 1014 + }, + { + "ce_ib": 11.406895637512207, + "ce_orig": 1.344512939453125, + "epoch": 0.2916097490833273, + "kl_loss": 0.28891634941101074, + "loss_ib": 0.014296059496700764, + "step": 1014 + }, + { + "epoch": 0.291897332662305, + "grad_norm": 0.09041693806648254, + "learning_rate": 9.884078757455583e-06, + "loss": 0.9109, + "step": 1015 + }, + { + "ce_ib": 9.290079116821289, + "ce_orig": 1.5962048768997192, + "epoch": 0.291897332662305, + "kl_loss": 0.28169265389442444, + "loss_ib": 0.012107006274163723, + "step": 1015 + }, + { + "ce_ib": 7.13163948059082, + "ce_orig": 0.6506978273391724, + "epoch": 0.291897332662305, + "kl_loss": 0.32169321179389954, + "loss_ib": 0.010348571464419365, + "step": 1015 + }, + { + "ce_ib": 2.4287688732147217, + "ce_orig": 0.17141437530517578, + "epoch": 0.291897332662305, + "kl_loss": 0.5213749408721924, + "loss_ib": 0.007642518263310194, + "step": 1015 + }, + { + "ce_ib": 5.544053077697754, + "ce_orig": 0.7960252165794373, + "epoch": 0.291897332662305, + "kl_loss": 0.26576048135757446, + "loss_ib": 0.00820165779441595, + "step": 1015 + }, + { + "ce_ib": 8.132667541503906, + "ce_orig": 0.7880412936210632, + "epoch": 0.2921849162412826, + "kl_loss": 0.3609253168106079, + "loss_ib": 0.011741920374333858, + "step": 1016 + }, + { + "ce_ib": 5.45988655090332, + "ce_orig": 0.7703402042388916, + "epoch": 0.2921849162412826, + "kl_loss": 0.23446249961853027, + "loss_ib": 0.007804511580616236, + "step": 1016 + }, + { + "ce_ib": 8.336692810058594, + "ce_orig": 1.0396983623504639, + "epoch": 0.2921849162412826, + "kl_loss": 0.2709803581237793, + "loss_ib": 0.011046496219933033, + "step": 1016 + }, + { + "ce_ib": 8.177699089050293, + "ce_orig": 0.5715538859367371, + "epoch": 0.2921849162412826, + "kl_loss": 0.29589879512786865, + "loss_ib": 0.01113668642938137, + "step": 1016 + }, + { + "ce_ib": 7.4771318435668945, + "ce_orig": 0.9154139757156372, + "epoch": 0.29247249982026025, + "kl_loss": 0.25590091943740845, + "loss_ib": 0.010036141611635685, + "step": 1017 + }, + { + "ce_ib": 7.049166679382324, + "ce_orig": 0.9006765484809875, + "epoch": 0.29247249982026025, + "kl_loss": 0.20401448011398315, + "loss_ib": 0.009089311584830284, + "step": 1017 + }, + { + "ce_ib": 4.249927043914795, + "ce_orig": 0.7018132209777832, + "epoch": 0.29247249982026025, + "kl_loss": 0.7936820387840271, + "loss_ib": 0.01218674797564745, + "step": 1017 + }, + { + "ce_ib": 4.743485450744629, + "ce_orig": 0.8688510060310364, + "epoch": 0.29247249982026025, + "kl_loss": 0.21329936385154724, + "loss_ib": 0.006876479368656874, + "step": 1017 + }, + { + "ce_ib": 9.883633613586426, + "ce_orig": 0.8130282163619995, + "epoch": 0.2927600833992379, + "kl_loss": 0.4127139747142792, + "loss_ib": 0.014010773971676826, + "step": 1018 + }, + { + "ce_ib": 6.509448528289795, + "ce_orig": 0.7483327388763428, + "epoch": 0.2927600833992379, + "kl_loss": 0.2256876528263092, + "loss_ib": 0.008766325190663338, + "step": 1018 + }, + { + "ce_ib": 7.60127067565918, + "ce_orig": 0.8613872528076172, + "epoch": 0.2927600833992379, + "kl_loss": 0.2583634853363037, + "loss_ib": 0.010184905491769314, + "step": 1018 + }, + { + "ce_ib": 3.682047128677368, + "ce_orig": 0.4670972228050232, + "epoch": 0.2927600833992379, + "kl_loss": 0.2330019176006317, + "loss_ib": 0.006012066267430782, + "step": 1018 + }, + { + "ce_ib": 2.297346353530884, + "ce_orig": 0.24013860523700714, + "epoch": 0.29304766697821555, + "kl_loss": 0.6258134245872498, + "loss_ib": 0.008555480279028416, + "step": 1019 + }, + { + "ce_ib": 8.305569648742676, + "ce_orig": 0.9835488200187683, + "epoch": 0.29304766697821555, + "kl_loss": 0.36162036657333374, + "loss_ib": 0.011921772733330727, + "step": 1019 + }, + { + "ce_ib": 10.808023452758789, + "ce_orig": 1.32200026512146, + "epoch": 0.29304766697821555, + "kl_loss": 0.27893298864364624, + "loss_ib": 0.013597352430224419, + "step": 1019 + }, + { + "ce_ib": 9.159648895263672, + "ce_orig": 0.862013041973114, + "epoch": 0.29304766697821555, + "kl_loss": 0.2749783396720886, + "loss_ib": 0.011909431777894497, + "step": 1019 + }, + { + "epoch": 0.2933352505571932, + "grad_norm": 0.08970300853252411, + "learning_rate": 9.882411418176023e-06, + "loss": 0.8709, + "step": 1020 + }, + { + "ce_ib": 3.6713716983795166, + "ce_orig": 0.7519941926002502, + "epoch": 0.2933352505571932, + "kl_loss": 0.20077042281627655, + "loss_ib": 0.0056790756061673164, + "step": 1020 + }, + { + "ce_ib": 10.4853515625, + "ce_orig": 1.4266449213027954, + "epoch": 0.2933352505571932, + "kl_loss": 0.20042094588279724, + "loss_ib": 0.012489561922848225, + "step": 1020 + }, + { + "ce_ib": 5.523143768310547, + "ce_orig": 0.49153417348861694, + "epoch": 0.2933352505571932, + "kl_loss": 0.3945586085319519, + "loss_ib": 0.009468729607760906, + "step": 1020 + }, + { + "ce_ib": 8.129873275756836, + "ce_orig": 0.6584604978561401, + "epoch": 0.2933352505571932, + "kl_loss": 0.36802613735198975, + "loss_ib": 0.011810134164988995, + "step": 1020 + }, + { + "ce_ib": 10.585819244384766, + "ce_orig": 1.2593013048171997, + "epoch": 0.29362283413617085, + "kl_loss": 0.2198692411184311, + "loss_ib": 0.012784511782228947, + "step": 1021 + }, + { + "ce_ib": 8.541426658630371, + "ce_orig": 0.8910534977912903, + "epoch": 0.29362283413617085, + "kl_loss": 0.2919941246509552, + "loss_ib": 0.011461366899311543, + "step": 1021 + }, + { + "ce_ib": 6.236502647399902, + "ce_orig": 0.5739825963973999, + "epoch": 0.29362283413617085, + "kl_loss": 0.3800389766693115, + "loss_ib": 0.010036892257630825, + "step": 1021 + }, + { + "ce_ib": 5.119142055511475, + "ce_orig": 0.3912176191806793, + "epoch": 0.29362283413617085, + "kl_loss": 0.1975279450416565, + "loss_ib": 0.007094421423971653, + "step": 1021 + }, + { + "ce_ib": 6.980103492736816, + "ce_orig": 0.6286670565605164, + "epoch": 0.2939104177151485, + "kl_loss": 0.35218000411987305, + "loss_ib": 0.010501904413104057, + "step": 1022 + }, + { + "ce_ib": 9.082075119018555, + "ce_orig": 1.1112430095672607, + "epoch": 0.2939104177151485, + "kl_loss": 0.33446168899536133, + "loss_ib": 0.012426692061126232, + "step": 1022 + }, + { + "ce_ib": 8.302471160888672, + "ce_orig": 0.8119482398033142, + "epoch": 0.2939104177151485, + "kl_loss": 0.28878772258758545, + "loss_ib": 0.011190347373485565, + "step": 1022 + }, + { + "ce_ib": 7.19743013381958, + "ce_orig": 0.737561047077179, + "epoch": 0.2939104177151485, + "kl_loss": 0.35057979822158813, + "loss_ib": 0.01070322748273611, + "step": 1022 + }, + { + "ce_ib": 6.976788520812988, + "ce_orig": 1.0390961170196533, + "epoch": 0.2941980012941261, + "kl_loss": 0.30705952644348145, + "loss_ib": 0.010047382675111294, + "step": 1023 + }, + { + "ce_ib": 6.412473201751709, + "ce_orig": 0.6833570003509521, + "epoch": 0.2941980012941261, + "kl_loss": 0.25251883268356323, + "loss_ib": 0.008937661536037922, + "step": 1023 + }, + { + "ce_ib": 4.118930816650391, + "ce_orig": 0.4952293038368225, + "epoch": 0.2941980012941261, + "kl_loss": 0.30007433891296387, + "loss_ib": 0.007119674701243639, + "step": 1023 + }, + { + "ce_ib": 8.888395309448242, + "ce_orig": 0.9846009612083435, + "epoch": 0.2941980012941261, + "kl_loss": 0.2865602970123291, + "loss_ib": 0.011753997765481472, + "step": 1023 + }, + { + "ce_ib": 8.605910301208496, + "ce_orig": 0.9668225049972534, + "epoch": 0.2944855848731037, + "kl_loss": 0.22117966413497925, + "loss_ib": 0.01081770658493042, + "step": 1024 + }, + { + "ce_ib": 7.79697322845459, + "ce_orig": 0.7142451405525208, + "epoch": 0.2944855848731037, + "kl_loss": 0.276878297328949, + "loss_ib": 0.010565755888819695, + "step": 1024 + }, + { + "ce_ib": 13.723976135253906, + "ce_orig": 1.894836664199829, + "epoch": 0.2944855848731037, + "kl_loss": 0.25465625524520874, + "loss_ib": 0.016270538792014122, + "step": 1024 + }, + { + "ce_ib": 10.904229164123535, + "ce_orig": 1.352797508239746, + "epoch": 0.2944855848731037, + "kl_loss": 0.24989211559295654, + "loss_ib": 0.01340315118432045, + "step": 1024 + }, + { + "epoch": 0.2947731684520814, + "grad_norm": 0.12832897901535034, + "learning_rate": 9.880732316034124e-06, + "loss": 0.8606, + "step": 1025 + }, + { + "ce_ib": 6.572211742401123, + "ce_orig": 0.46886974573135376, + "epoch": 0.2947731684520814, + "kl_loss": 0.38375556468963623, + "loss_ib": 0.010409767739474773, + "step": 1025 + }, + { + "ce_ib": 4.577428340911865, + "ce_orig": 0.5632930994033813, + "epoch": 0.2947731684520814, + "kl_loss": 0.18317699432373047, + "loss_ib": 0.006409198045730591, + "step": 1025 + }, + { + "ce_ib": 5.605873107910156, + "ce_orig": 0.8631706833839417, + "epoch": 0.2947731684520814, + "kl_loss": 0.25881266593933105, + "loss_ib": 0.008193999528884888, + "step": 1025 + }, + { + "ce_ib": 8.974735260009766, + "ce_orig": 1.4067844152450562, + "epoch": 0.2947731684520814, + "kl_loss": 0.4378132224082947, + "loss_ib": 0.013352867215871811, + "step": 1025 + }, + { + "ce_ib": 7.069388389587402, + "ce_orig": 0.8898831009864807, + "epoch": 0.295060752031059, + "kl_loss": 0.31348714232444763, + "loss_ib": 0.010204260237514973, + "step": 1026 + }, + { + "ce_ib": 7.188381195068359, + "ce_orig": 0.4827899932861328, + "epoch": 0.295060752031059, + "kl_loss": 0.23111492395401, + "loss_ib": 0.00949953030794859, + "step": 1026 + }, + { + "ce_ib": 4.271545886993408, + "ce_orig": 0.6371316909790039, + "epoch": 0.295060752031059, + "kl_loss": 0.21791093051433563, + "loss_ib": 0.0064506554044783115, + "step": 1026 + }, + { + "ce_ib": 8.962451934814453, + "ce_orig": 1.176334023475647, + "epoch": 0.295060752031059, + "kl_loss": 0.24294477701187134, + "loss_ib": 0.011391899548470974, + "step": 1026 + }, + { + "ce_ib": 9.733299255371094, + "ce_orig": 0.9046944379806519, + "epoch": 0.29534833561003665, + "kl_loss": 0.21920621395111084, + "loss_ib": 0.01192536111921072, + "step": 1027 + }, + { + "ce_ib": 6.503138542175293, + "ce_orig": 0.8069035410881042, + "epoch": 0.29534833561003665, + "kl_loss": 0.18204201757907867, + "loss_ib": 0.00832355860620737, + "step": 1027 + }, + { + "ce_ib": 6.754278182983398, + "ce_orig": 0.4663512110710144, + "epoch": 0.29534833561003665, + "kl_loss": 0.2619379162788391, + "loss_ib": 0.009373657405376434, + "step": 1027 + }, + { + "ce_ib": 5.69559907913208, + "ce_orig": 0.8563054800033569, + "epoch": 0.29534833561003665, + "kl_loss": 0.2036648690700531, + "loss_ib": 0.007732247933745384, + "step": 1027 + }, + { + "ce_ib": 9.430176734924316, + "ce_orig": 1.1677995920181274, + "epoch": 0.29563591918901433, + "kl_loss": 0.5974254608154297, + "loss_ib": 0.015404431149363518, + "step": 1028 + }, + { + "ce_ib": 4.5675787925720215, + "ce_orig": 0.3929074704647064, + "epoch": 0.29563591918901433, + "kl_loss": 0.27183613181114197, + "loss_ib": 0.007285939995199442, + "step": 1028 + }, + { + "ce_ib": 10.300110816955566, + "ce_orig": 0.7582395076751709, + "epoch": 0.29563591918901433, + "kl_loss": 0.4548535943031311, + "loss_ib": 0.014848646707832813, + "step": 1028 + }, + { + "ce_ib": 7.837728023529053, + "ce_orig": 0.41884028911590576, + "epoch": 0.29563591918901433, + "kl_loss": 0.2513379156589508, + "loss_ib": 0.010351106524467468, + "step": 1028 + }, + { + "ce_ib": 5.368338108062744, + "ce_orig": 0.3831135928630829, + "epoch": 0.29592350276799195, + "kl_loss": 0.31224921345710754, + "loss_ib": 0.008490830659866333, + "step": 1029 + }, + { + "ce_ib": 5.802161693572998, + "ce_orig": 0.46503645181655884, + "epoch": 0.29592350276799195, + "kl_loss": 0.18800479173660278, + "loss_ib": 0.0076822093687951565, + "step": 1029 + }, + { + "ce_ib": 5.5843281745910645, + "ce_orig": 0.7334034442901611, + "epoch": 0.29592350276799195, + "kl_loss": 0.25795796513557434, + "loss_ib": 0.008163907565176487, + "step": 1029 + }, + { + "ce_ib": 7.496910572052002, + "ce_orig": 1.0607128143310547, + "epoch": 0.29592350276799195, + "kl_loss": 0.24131786823272705, + "loss_ib": 0.00991008896380663, + "step": 1029 + }, + { + "epoch": 0.2962110863469696, + "grad_norm": 0.09884575009346008, + "learning_rate": 9.879041455075236e-06, + "loss": 0.8737, + "step": 1030 + }, + { + "ce_ib": 6.056578159332275, + "ce_orig": 0.7114567756652832, + "epoch": 0.2962110863469696, + "kl_loss": 0.28722214698791504, + "loss_ib": 0.008928799070417881, + "step": 1030 + }, + { + "ce_ib": 9.305815696716309, + "ce_orig": 0.5742266178131104, + "epoch": 0.2962110863469696, + "kl_loss": 0.35437315702438354, + "loss_ib": 0.012849547900259495, + "step": 1030 + }, + { + "ce_ib": 5.0757365226745605, + "ce_orig": 0.5962457656860352, + "epoch": 0.2962110863469696, + "kl_loss": 0.253349244594574, + "loss_ib": 0.007609229069203138, + "step": 1030 + }, + { + "ce_ib": 13.085478782653809, + "ce_orig": 1.712263822555542, + "epoch": 0.2962110863469696, + "kl_loss": 0.2800062894821167, + "loss_ib": 0.01588554121553898, + "step": 1030 + }, + { + "ce_ib": 7.016113758087158, + "ce_orig": 0.7030823826789856, + "epoch": 0.29649866992594726, + "kl_loss": 0.3182916045188904, + "loss_ib": 0.010199028998613358, + "step": 1031 + }, + { + "ce_ib": 7.362382888793945, + "ce_orig": 0.5507587790489197, + "epoch": 0.29649866992594726, + "kl_loss": 0.28012615442276, + "loss_ib": 0.010163644328713417, + "step": 1031 + }, + { + "ce_ib": 5.964332103729248, + "ce_orig": 0.39900022745132446, + "epoch": 0.29649866992594726, + "kl_loss": 0.26267558336257935, + "loss_ib": 0.00859108753502369, + "step": 1031 + }, + { + "ce_ib": 3.582094430923462, + "ce_orig": 0.5880594849586487, + "epoch": 0.29649866992594726, + "kl_loss": 0.20877045392990112, + "loss_ib": 0.005669798702001572, + "step": 1031 + }, + { + "ce_ib": 6.128958225250244, + "ce_orig": 0.9819390773773193, + "epoch": 0.2967862535049249, + "kl_loss": 0.22314751148223877, + "loss_ib": 0.008360433392226696, + "step": 1032 + }, + { + "ce_ib": 5.336613655090332, + "ce_orig": 0.6773203611373901, + "epoch": 0.2967862535049249, + "kl_loss": 0.18450209498405457, + "loss_ib": 0.007181634660810232, + "step": 1032 + }, + { + "ce_ib": 8.280702590942383, + "ce_orig": 1.1966774463653564, + "epoch": 0.2967862535049249, + "kl_loss": 0.21508166193962097, + "loss_ib": 0.010431519709527493, + "step": 1032 + }, + { + "ce_ib": 7.033453464508057, + "ce_orig": 0.8263683319091797, + "epoch": 0.2967862535049249, + "kl_loss": 0.1751515120267868, + "loss_ib": 0.00878496840596199, + "step": 1032 + }, + { + "ce_ib": 5.190278053283691, + "ce_orig": 0.7666304707527161, + "epoch": 0.2970738370839025, + "kl_loss": 0.19723618030548096, + "loss_ib": 0.007162639871239662, + "step": 1033 + }, + { + "ce_ib": 6.446037292480469, + "ce_orig": 0.6451196074485779, + "epoch": 0.2970738370839025, + "kl_loss": 0.6441924571990967, + "loss_ib": 0.012887961231172085, + "step": 1033 + }, + { + "ce_ib": 9.412089347839355, + "ce_orig": 1.2795425653457642, + "epoch": 0.2970738370839025, + "kl_loss": 0.2573137879371643, + "loss_ib": 0.011985227465629578, + "step": 1033 + }, + { + "ce_ib": 8.65932846069336, + "ce_orig": 0.6759138107299805, + "epoch": 0.2970738370839025, + "kl_loss": 0.3903021216392517, + "loss_ib": 0.012562349438667297, + "step": 1033 + }, + { + "ce_ib": 12.967315673828125, + "ce_orig": 2.125581741333008, + "epoch": 0.2973614206628801, + "kl_loss": 0.4240291714668274, + "loss_ib": 0.017207607626914978, + "step": 1034 + }, + { + "ce_ib": 6.941053867340088, + "ce_orig": 0.9638831615447998, + "epoch": 0.2973614206628801, + "kl_loss": 0.2834685444831848, + "loss_ib": 0.009775739163160324, + "step": 1034 + }, + { + "ce_ib": 11.784566879272461, + "ce_orig": 1.6271092891693115, + "epoch": 0.2973614206628801, + "kl_loss": 0.2749708294868469, + "loss_ib": 0.01453427504748106, + "step": 1034 + }, + { + "ce_ib": 10.994661331176758, + "ce_orig": 1.71238374710083, + "epoch": 0.2973614206628801, + "kl_loss": 0.3320281505584717, + "loss_ib": 0.014314942993223667, + "step": 1034 + }, + { + "epoch": 0.2976490042418578, + "grad_norm": 0.10119609534740448, + "learning_rate": 9.877338839373032e-06, + "loss": 0.881, + "step": 1035 + }, + { + "ce_ib": 8.704519271850586, + "ce_orig": 0.5223831534385681, + "epoch": 0.2976490042418578, + "kl_loss": 0.312034547328949, + "loss_ib": 0.011824864894151688, + "step": 1035 + }, + { + "ce_ib": 6.133333206176758, + "ce_orig": 0.764382004737854, + "epoch": 0.2976490042418578, + "kl_loss": 0.26369595527648926, + "loss_ib": 0.0087702926248312, + "step": 1035 + }, + { + "ce_ib": 6.9865946769714355, + "ce_orig": 0.6817479729652405, + "epoch": 0.2976490042418578, + "kl_loss": 0.20456379652023315, + "loss_ib": 0.00903223268687725, + "step": 1035 + }, + { + "ce_ib": 7.445248126983643, + "ce_orig": 0.923649787902832, + "epoch": 0.2976490042418578, + "kl_loss": 0.2720944583415985, + "loss_ib": 0.01016619335860014, + "step": 1035 + }, + { + "ce_ib": 7.735437393188477, + "ce_orig": 0.8882045149803162, + "epoch": 0.29793658782083543, + "kl_loss": 0.3494291603565216, + "loss_ib": 0.011229729279875755, + "step": 1036 + }, + { + "ce_ib": 7.865479469299316, + "ce_orig": 0.8003482222557068, + "epoch": 0.29793658782083543, + "kl_loss": 0.26265841722488403, + "loss_ib": 0.010492063127458096, + "step": 1036 + }, + { + "ce_ib": 5.9282989501953125, + "ce_orig": 0.8610711097717285, + "epoch": 0.29793658782083543, + "kl_loss": 0.21928074955940247, + "loss_ib": 0.008121106773614883, + "step": 1036 + }, + { + "ce_ib": 4.373647689819336, + "ce_orig": 0.5918238759040833, + "epoch": 0.29793658782083543, + "kl_loss": 0.21359167993068695, + "loss_ib": 0.006509564351290464, + "step": 1036 + }, + { + "ce_ib": 8.592558860778809, + "ce_orig": 1.061389684677124, + "epoch": 0.29822417139981305, + "kl_loss": 0.2698967456817627, + "loss_ib": 0.01129152625799179, + "step": 1037 + }, + { + "ce_ib": 8.775501251220703, + "ce_orig": 0.823801577091217, + "epoch": 0.29822417139981305, + "kl_loss": 0.4299342930316925, + "loss_ib": 0.01307484321296215, + "step": 1037 + }, + { + "ce_ib": 7.973211765289307, + "ce_orig": 0.808665931224823, + "epoch": 0.29822417139981305, + "kl_loss": 0.3724827766418457, + "loss_ib": 0.011698039248585701, + "step": 1037 + }, + { + "ce_ib": 10.214940071105957, + "ce_orig": 1.393505573272705, + "epoch": 0.29822417139981305, + "kl_loss": 0.4218568801879883, + "loss_ib": 0.014433508738875389, + "step": 1037 + }, + { + "ce_ib": 12.167582511901855, + "ce_orig": 0.8209355473518372, + "epoch": 0.29851175497879073, + "kl_loss": 0.3849828243255615, + "loss_ib": 0.016017410904169083, + "step": 1038 + }, + { + "ce_ib": 6.071450710296631, + "ce_orig": 0.5013828277587891, + "epoch": 0.29851175497879073, + "kl_loss": 0.2566567361354828, + "loss_ib": 0.008638017810881138, + "step": 1038 + }, + { + "ce_ib": 9.4537992477417, + "ce_orig": 1.401556134223938, + "epoch": 0.29851175497879073, + "kl_loss": 0.21812914311885834, + "loss_ib": 0.011635090224444866, + "step": 1038 + }, + { + "ce_ib": 6.496313095092773, + "ce_orig": 0.8470216989517212, + "epoch": 0.29851175497879073, + "kl_loss": 0.2540469765663147, + "loss_ib": 0.009036783128976822, + "step": 1038 + }, + { + "ce_ib": 7.5606279373168945, + "ce_orig": 1.107744574546814, + "epoch": 0.29879933855776836, + "kl_loss": 0.24179263412952423, + "loss_ib": 0.009978554211556911, + "step": 1039 + }, + { + "ce_ib": 7.44442892074585, + "ce_orig": 0.7664405703544617, + "epoch": 0.29879933855776836, + "kl_loss": 0.3160368502140045, + "loss_ib": 0.010604796931147575, + "step": 1039 + }, + { + "ce_ib": 4.493764877319336, + "ce_orig": 0.8293079137802124, + "epoch": 0.29879933855776836, + "kl_loss": 0.23693546652793884, + "loss_ib": 0.006863119546324015, + "step": 1039 + }, + { + "ce_ib": 9.437397003173828, + "ce_orig": 1.3835856914520264, + "epoch": 0.29879933855776836, + "kl_loss": 0.32433897256851196, + "loss_ib": 0.012680786661803722, + "step": 1039 + }, + { + "epoch": 0.299086922136746, + "grad_norm": 0.12828873097896576, + "learning_rate": 9.875624473029508e-06, + "loss": 0.868, + "step": 1040 + }, + { + "ce_ib": 5.776780605316162, + "ce_orig": 0.6242812871932983, + "epoch": 0.299086922136746, + "kl_loss": 0.21151088178157806, + "loss_ib": 0.007891889661550522, + "step": 1040 + }, + { + "ce_ib": 6.101773262023926, + "ce_orig": 0.7682925462722778, + "epoch": 0.299086922136746, + "kl_loss": 0.16745525598526, + "loss_ib": 0.007776325568556786, + "step": 1040 + }, + { + "ce_ib": 6.381745338439941, + "ce_orig": 0.8632617592811584, + "epoch": 0.299086922136746, + "kl_loss": 0.21712660789489746, + "loss_ib": 0.00855301134288311, + "step": 1040 + }, + { + "ce_ib": 9.156272888183594, + "ce_orig": 0.7367652654647827, + "epoch": 0.299086922136746, + "kl_loss": 0.4417145252227783, + "loss_ib": 0.0135734174400568, + "step": 1040 + }, + { + "ce_ib": 6.1640424728393555, + "ce_orig": 0.5637131333351135, + "epoch": 0.2993745057157236, + "kl_loss": 0.24898210167884827, + "loss_ib": 0.008653863333165646, + "step": 1041 + }, + { + "ce_ib": 5.767309665679932, + "ce_orig": 0.6547291278839111, + "epoch": 0.2993745057157236, + "kl_loss": 0.26231563091278076, + "loss_ib": 0.008390465751290321, + "step": 1041 + }, + { + "ce_ib": 6.178884983062744, + "ce_orig": 0.8318864703178406, + "epoch": 0.2993745057157236, + "kl_loss": 0.2267499417066574, + "loss_ib": 0.008446384221315384, + "step": 1041 + }, + { + "ce_ib": 6.974244594573975, + "ce_orig": 0.7129083275794983, + "epoch": 0.2993745057157236, + "kl_loss": 0.30809468030929565, + "loss_ib": 0.01005519088357687, + "step": 1041 + }, + { + "ce_ib": 7.284916400909424, + "ce_orig": 0.9830975532531738, + "epoch": 0.2996620892947013, + "kl_loss": 0.23813575506210327, + "loss_ib": 0.009666274301707745, + "step": 1042 + }, + { + "ce_ib": 4.249719619750977, + "ce_orig": 0.7718077301979065, + "epoch": 0.2996620892947013, + "kl_loss": 0.1756143569946289, + "loss_ib": 0.006005862727761269, + "step": 1042 + }, + { + "ce_ib": 9.810495376586914, + "ce_orig": 1.5580822229385376, + "epoch": 0.2996620892947013, + "kl_loss": 0.3328104019165039, + "loss_ib": 0.013138598762452602, + "step": 1042 + }, + { + "ce_ib": 7.983740329742432, + "ce_orig": 1.1797325611114502, + "epoch": 0.2996620892947013, + "kl_loss": 0.30479660630226135, + "loss_ib": 0.011031705886125565, + "step": 1042 + }, + { + "ce_ib": 5.6402082443237305, + "ce_orig": 0.8855639696121216, + "epoch": 0.2999496728736789, + "kl_loss": 0.2435443103313446, + "loss_ib": 0.008075650781393051, + "step": 1043 + }, + { + "ce_ib": 6.360856533050537, + "ce_orig": 0.9510517716407776, + "epoch": 0.2999496728736789, + "kl_loss": 0.24474883079528809, + "loss_ib": 0.008808344602584839, + "step": 1043 + }, + { + "ce_ib": 7.274141311645508, + "ce_orig": 0.7802663445472717, + "epoch": 0.2999496728736789, + "kl_loss": 0.2819164991378784, + "loss_ib": 0.010093306191265583, + "step": 1043 + }, + { + "ce_ib": 6.382179260253906, + "ce_orig": 0.9669009447097778, + "epoch": 0.2999496728736789, + "kl_loss": 0.33987829089164734, + "loss_ib": 0.009780962020158768, + "step": 1043 + }, + { + "ce_ib": 9.163640022277832, + "ce_orig": 1.0312260389328003, + "epoch": 0.30023725645265653, + "kl_loss": 0.2656589150428772, + "loss_ib": 0.011820228770375252, + "step": 1044 + }, + { + "ce_ib": 11.79682731628418, + "ce_orig": 0.9269500374794006, + "epoch": 0.30023725645265653, + "kl_loss": 0.24998760223388672, + "loss_ib": 0.014296703040599823, + "step": 1044 + }, + { + "ce_ib": 11.359130859375, + "ce_orig": 1.4259446859359741, + "epoch": 0.30023725645265653, + "kl_loss": 0.24341662228107452, + "loss_ib": 0.013793298043310642, + "step": 1044 + }, + { + "ce_ib": 9.31595230102539, + "ce_orig": 0.473222553730011, + "epoch": 0.30023725645265653, + "kl_loss": 0.28317737579345703, + "loss_ib": 0.012147726491093636, + "step": 1044 + }, + { + "epoch": 0.3005248400316342, + "grad_norm": 0.09338007122278214, + "learning_rate": 9.873898360174972e-06, + "loss": 0.8916, + "step": 1045 + }, + { + "ce_ib": 3.8569414615631104, + "ce_orig": 0.659389853477478, + "epoch": 0.3005248400316342, + "kl_loss": 0.17949606478214264, + "loss_ib": 0.005651901941746473, + "step": 1045 + }, + { + "ce_ib": 4.244134902954102, + "ce_orig": 0.7671023607254028, + "epoch": 0.3005248400316342, + "kl_loss": 0.2857362926006317, + "loss_ib": 0.007101497612893581, + "step": 1045 + }, + { + "ce_ib": 8.260163307189941, + "ce_orig": 1.042970061302185, + "epoch": 0.3005248400316342, + "kl_loss": 0.36634838581085205, + "loss_ib": 0.011923646554350853, + "step": 1045 + }, + { + "ce_ib": 5.835524559020996, + "ce_orig": 1.086169958114624, + "epoch": 0.3005248400316342, + "kl_loss": 0.26493754982948303, + "loss_ib": 0.008484899997711182, + "step": 1045 + }, + { + "ce_ib": 5.560356140136719, + "ce_orig": 0.6846012473106384, + "epoch": 0.30081242361061183, + "kl_loss": 0.23671673238277435, + "loss_ib": 0.00792752392590046, + "step": 1046 + }, + { + "ce_ib": 8.212461471557617, + "ce_orig": 1.0910207033157349, + "epoch": 0.30081242361061183, + "kl_loss": 0.25429821014404297, + "loss_ib": 0.010755443014204502, + "step": 1046 + }, + { + "ce_ib": 8.125418663024902, + "ce_orig": 1.192816972732544, + "epoch": 0.30081242361061183, + "kl_loss": 0.3981271982192993, + "loss_ib": 0.012106690555810928, + "step": 1046 + }, + { + "ce_ib": 7.482778549194336, + "ce_orig": 1.0049912929534912, + "epoch": 0.30081242361061183, + "kl_loss": 0.3500697612762451, + "loss_ib": 0.010983476415276527, + "step": 1046 + }, + { + "ce_ib": 8.525084495544434, + "ce_orig": 0.8908082842826843, + "epoch": 0.30110000718958946, + "kl_loss": 0.31901606917381287, + "loss_ib": 0.011715245433151722, + "step": 1047 + }, + { + "ce_ib": 9.571982383728027, + "ce_orig": 1.1031574010849, + "epoch": 0.30110000718958946, + "kl_loss": 0.49426859617233276, + "loss_ib": 0.01451466791331768, + "step": 1047 + }, + { + "ce_ib": 5.504137992858887, + "ce_orig": 0.6715264916419983, + "epoch": 0.30110000718958946, + "kl_loss": 0.2991946339607239, + "loss_ib": 0.008496084250509739, + "step": 1047 + }, + { + "ce_ib": 7.694691181182861, + "ce_orig": 0.9191931486129761, + "epoch": 0.30110000718958946, + "kl_loss": 0.48156046867370605, + "loss_ib": 0.012510295957326889, + "step": 1047 + }, + { + "ce_ib": 5.405656814575195, + "ce_orig": 0.42357781529426575, + "epoch": 0.30138759076856714, + "kl_loss": 0.29605257511138916, + "loss_ib": 0.008366182446479797, + "step": 1048 + }, + { + "ce_ib": 6.5060834884643555, + "ce_orig": 0.7840076088905334, + "epoch": 0.30138759076856714, + "kl_loss": 0.22564014792442322, + "loss_ib": 0.008762484416365623, + "step": 1048 + }, + { + "ce_ib": 6.392003059387207, + "ce_orig": 0.6996757388114929, + "epoch": 0.30138759076856714, + "kl_loss": 0.23019085824489594, + "loss_ib": 0.00869391206651926, + "step": 1048 + }, + { + "ce_ib": 9.806346893310547, + "ce_orig": 1.645071029663086, + "epoch": 0.30138759076856714, + "kl_loss": 0.302403062582016, + "loss_ib": 0.01283037755638361, + "step": 1048 + }, + { + "ce_ib": 7.08400821685791, + "ce_orig": 0.5776684880256653, + "epoch": 0.30167517434754476, + "kl_loss": 0.23051565885543823, + "loss_ib": 0.009389164857566357, + "step": 1049 + }, + { + "ce_ib": 8.618279457092285, + "ce_orig": 1.1826684474945068, + "epoch": 0.30167517434754476, + "kl_loss": 0.23479107022285461, + "loss_ib": 0.01096619013696909, + "step": 1049 + }, + { + "ce_ib": 12.606616020202637, + "ce_orig": 1.786026954650879, + "epoch": 0.30167517434754476, + "kl_loss": 0.4211004972457886, + "loss_ib": 0.016817620024085045, + "step": 1049 + }, + { + "ce_ib": 9.673046112060547, + "ce_orig": 1.1197701692581177, + "epoch": 0.30167517434754476, + "kl_loss": 0.42400917410850525, + "loss_ib": 0.013913137838244438, + "step": 1049 + }, + { + "epoch": 0.3019627579265224, + "grad_norm": 0.12706100940704346, + "learning_rate": 9.872160504968032e-06, + "loss": 0.958, + "step": 1050 + }, + { + "ce_ib": 7.350996494293213, + "ce_orig": 0.9169667363166809, + "epoch": 0.3019627579265224, + "kl_loss": 0.2554692029953003, + "loss_ib": 0.009905688464641571, + "step": 1050 + }, + { + "ce_ib": 5.9857635498046875, + "ce_orig": 0.606926679611206, + "epoch": 0.3019627579265224, + "kl_loss": 0.25890299677848816, + "loss_ib": 0.008574793115258217, + "step": 1050 + }, + { + "ce_ib": 7.011756420135498, + "ce_orig": 0.5373194217681885, + "epoch": 0.3019627579265224, + "kl_loss": 0.29745566844940186, + "loss_ib": 0.009986313059926033, + "step": 1050 + }, + { + "ce_ib": 8.080672264099121, + "ce_orig": 1.0620334148406982, + "epoch": 0.3019627579265224, + "kl_loss": 0.31463325023651123, + "loss_ib": 0.01122700423002243, + "step": 1050 + }, + { + "ce_ib": 4.345581531524658, + "ce_orig": 0.7046716809272766, + "epoch": 0.3022503415055, + "kl_loss": 0.17745816707611084, + "loss_ib": 0.006120163016021252, + "step": 1051 + }, + { + "ce_ib": 6.712942600250244, + "ce_orig": 0.8896129131317139, + "epoch": 0.3022503415055, + "kl_loss": 0.25029951333999634, + "loss_ib": 0.009215937927365303, + "step": 1051 + }, + { + "ce_ib": 6.059515953063965, + "ce_orig": 0.8851151466369629, + "epoch": 0.3022503415055, + "kl_loss": 0.24742120504379272, + "loss_ib": 0.008533728308975697, + "step": 1051 + }, + { + "ce_ib": 6.032464981079102, + "ce_orig": 0.6225546002388, + "epoch": 0.3022503415055, + "kl_loss": 0.293154776096344, + "loss_ib": 0.008964012376964092, + "step": 1051 + }, + { + "ce_ib": 9.237442970275879, + "ce_orig": 1.07052743434906, + "epoch": 0.3025379250844777, + "kl_loss": 0.3022955656051636, + "loss_ib": 0.01226039882749319, + "step": 1052 + }, + { + "ce_ib": 6.975470066070557, + "ce_orig": 0.6021490097045898, + "epoch": 0.3025379250844777, + "kl_loss": 0.23589861392974854, + "loss_ib": 0.00933445617556572, + "step": 1052 + }, + { + "ce_ib": 4.114511489868164, + "ce_orig": 0.4750274419784546, + "epoch": 0.3025379250844777, + "kl_loss": 0.5271556377410889, + "loss_ib": 0.00938606821000576, + "step": 1052 + }, + { + "ce_ib": 6.8197221755981445, + "ce_orig": 1.1514414548873901, + "epoch": 0.3025379250844777, + "kl_loss": 0.24803856015205383, + "loss_ib": 0.009300108067691326, + "step": 1052 + }, + { + "ce_ib": 11.652617454528809, + "ce_orig": 1.5600248575210571, + "epoch": 0.3028255086634553, + "kl_loss": 0.24478089809417725, + "loss_ib": 0.014100425876677036, + "step": 1053 + }, + { + "ce_ib": 5.302700996398926, + "ce_orig": 0.6378918886184692, + "epoch": 0.3028255086634553, + "kl_loss": 0.2648119330406189, + "loss_ib": 0.007950820028781891, + "step": 1053 + }, + { + "ce_ib": 6.452362537384033, + "ce_orig": 1.062257170677185, + "epoch": 0.3028255086634553, + "kl_loss": 0.30296024680137634, + "loss_ib": 0.009481964632868767, + "step": 1053 + }, + { + "ce_ib": 6.938072681427002, + "ce_orig": 1.032331943511963, + "epoch": 0.3028255086634553, + "kl_loss": 0.3145177960395813, + "loss_ib": 0.01008325070142746, + "step": 1053 + }, + { + "ce_ib": 7.614224910736084, + "ce_orig": 0.9548846483230591, + "epoch": 0.30311309224243294, + "kl_loss": 0.4099164307117462, + "loss_ib": 0.011713389307260513, + "step": 1054 + }, + { + "ce_ib": 8.340744972229004, + "ce_orig": 1.0289732217788696, + "epoch": 0.30311309224243294, + "kl_loss": 0.3351021409034729, + "loss_ib": 0.011691765859723091, + "step": 1054 + }, + { + "ce_ib": 5.127655506134033, + "ce_orig": 0.7370550632476807, + "epoch": 0.30311309224243294, + "kl_loss": 0.2200349122285843, + "loss_ib": 0.007328004576265812, + "step": 1054 + }, + { + "ce_ib": 7.544470310211182, + "ce_orig": 0.5004482269287109, + "epoch": 0.30311309224243294, + "kl_loss": 0.33762672543525696, + "loss_ib": 0.010920737870037556, + "step": 1054 + }, + { + "epoch": 0.3034006758214106, + "grad_norm": 0.10111220180988312, + "learning_rate": 9.870410911595581e-06, + "loss": 0.8707, + "step": 1055 + }, + { + "ce_ib": 8.684142112731934, + "ce_orig": 0.7165521383285522, + "epoch": 0.3034006758214106, + "kl_loss": 0.3941863477230072, + "loss_ib": 0.012626005336642265, + "step": 1055 + }, + { + "ce_ib": 6.7958879470825195, + "ce_orig": 0.8354387879371643, + "epoch": 0.3034006758214106, + "kl_loss": 0.20312602818012238, + "loss_ib": 0.008827148005366325, + "step": 1055 + }, + { + "ce_ib": 4.682708263397217, + "ce_orig": 0.6456162929534912, + "epoch": 0.3034006758214106, + "kl_loss": 0.21023279428482056, + "loss_ib": 0.006785036064684391, + "step": 1055 + }, + { + "ce_ib": 3.8527894020080566, + "ce_orig": 0.7322303056716919, + "epoch": 0.3034006758214106, + "kl_loss": 0.19984376430511475, + "loss_ib": 0.005851226858794689, + "step": 1055 + }, + { + "ce_ib": 12.782026290893555, + "ce_orig": 1.3153671026229858, + "epoch": 0.30368825940038824, + "kl_loss": 0.2544756233692169, + "loss_ib": 0.015326782129704952, + "step": 1056 + }, + { + "ce_ib": 5.77595329284668, + "ce_orig": 0.6023581624031067, + "epoch": 0.30368825940038824, + "kl_loss": 0.25102975964546204, + "loss_ib": 0.00828625075519085, + "step": 1056 + }, + { + "ce_ib": 7.34418249130249, + "ce_orig": 0.7453755140304565, + "epoch": 0.30368825940038824, + "kl_loss": 0.2862794101238251, + "loss_ib": 0.010206976905465126, + "step": 1056 + }, + { + "ce_ib": 4.816280364990234, + "ce_orig": 0.5178154110908508, + "epoch": 0.30368825940038824, + "kl_loss": 0.23472407460212708, + "loss_ib": 0.007163520902395248, + "step": 1056 + }, + { + "ce_ib": 5.181397438049316, + "ce_orig": 0.48226651549339294, + "epoch": 0.30397584297936586, + "kl_loss": 0.3203040063381195, + "loss_ib": 0.008384437300264835, + "step": 1057 + }, + { + "ce_ib": 9.590699195861816, + "ce_orig": 0.9705502986907959, + "epoch": 0.30397584297936586, + "kl_loss": 0.19820456206798553, + "loss_ib": 0.011572744697332382, + "step": 1057 + }, + { + "ce_ib": 8.903388977050781, + "ce_orig": 1.0351887941360474, + "epoch": 0.30397584297936586, + "kl_loss": 0.4149536192417145, + "loss_ib": 0.013052924536168575, + "step": 1057 + }, + { + "ce_ib": 4.388928413391113, + "ce_orig": 0.3223065733909607, + "epoch": 0.30397584297936586, + "kl_loss": 0.5553755760192871, + "loss_ib": 0.009942684322595596, + "step": 1057 + }, + { + "ce_ib": 6.448616027832031, + "ce_orig": 0.42247146368026733, + "epoch": 0.30426342655834354, + "kl_loss": 0.267733633518219, + "loss_ib": 0.009125952608883381, + "step": 1058 + }, + { + "ce_ib": 4.839138507843018, + "ce_orig": 0.40205830335617065, + "epoch": 0.30426342655834354, + "kl_loss": 0.24600833654403687, + "loss_ib": 0.007299221586436033, + "step": 1058 + }, + { + "ce_ib": 4.3870015144348145, + "ce_orig": 0.6392689347267151, + "epoch": 0.30426342655834354, + "kl_loss": 0.26623180508613586, + "loss_ib": 0.007049319799989462, + "step": 1058 + }, + { + "ce_ib": 5.980587959289551, + "ce_orig": 0.5612409710884094, + "epoch": 0.30426342655834354, + "kl_loss": 0.3410719931125641, + "loss_ib": 0.009391307830810547, + "step": 1058 + }, + { + "ce_ib": 9.207480430603027, + "ce_orig": 0.8099052309989929, + "epoch": 0.30455101013732117, + "kl_loss": 0.316256046295166, + "loss_ib": 0.012370039708912373, + "step": 1059 + }, + { + "ce_ib": 7.6445631980896, + "ce_orig": 0.9055308699607849, + "epoch": 0.30455101013732117, + "kl_loss": 0.25548362731933594, + "loss_ib": 0.01019939873367548, + "step": 1059 + }, + { + "ce_ib": 2.876786231994629, + "ce_orig": 0.6097726821899414, + "epoch": 0.30455101013732117, + "kl_loss": 0.1825810670852661, + "loss_ib": 0.004702596925199032, + "step": 1059 + }, + { + "ce_ib": 11.932756423950195, + "ce_orig": 1.4808422327041626, + "epoch": 0.30455101013732117, + "kl_loss": 0.32290422916412354, + "loss_ib": 0.015161799266934395, + "step": 1059 + }, + { + "epoch": 0.3048385937162988, + "grad_norm": 0.08608844131231308, + "learning_rate": 9.8686495842728e-06, + "loss": 0.8409, + "step": 1060 + }, + { + "ce_ib": 12.914449691772461, + "ce_orig": 1.5343337059020996, + "epoch": 0.3048385937162988, + "kl_loss": 0.22788041830062866, + "loss_ib": 0.015193254686892033, + "step": 1060 + }, + { + "ce_ib": 5.87794303894043, + "ce_orig": 0.774178147315979, + "epoch": 0.3048385937162988, + "kl_loss": 0.1882862150669098, + "loss_ib": 0.007760804612189531, + "step": 1060 + }, + { + "ce_ib": 8.641247749328613, + "ce_orig": 0.8724406957626343, + "epoch": 0.3048385937162988, + "kl_loss": 0.29896390438079834, + "loss_ib": 0.011630886234343052, + "step": 1060 + }, + { + "ce_ib": 2.8925094604492188, + "ce_orig": 0.3218468129634857, + "epoch": 0.3048385937162988, + "kl_loss": 0.6263114213943481, + "loss_ib": 0.009155623614788055, + "step": 1060 + }, + { + "ce_ib": 10.991764068603516, + "ce_orig": 1.4918473958969116, + "epoch": 0.3051261772952764, + "kl_loss": 0.2560180425643921, + "loss_ib": 0.013551943935453892, + "step": 1061 + }, + { + "ce_ib": 7.894521236419678, + "ce_orig": 0.41926810145378113, + "epoch": 0.3051261772952764, + "kl_loss": 0.28496524691581726, + "loss_ib": 0.01074417307972908, + "step": 1061 + }, + { + "ce_ib": 11.378397941589355, + "ce_orig": 1.5567322969436646, + "epoch": 0.3051261772952764, + "kl_loss": 0.35284626483917236, + "loss_ib": 0.014906859956681728, + "step": 1061 + }, + { + "ce_ib": 8.901268005371094, + "ce_orig": 0.9377511143684387, + "epoch": 0.3051261772952764, + "kl_loss": 0.44969889521598816, + "loss_ib": 0.013398257084190845, + "step": 1061 + }, + { + "ce_ib": 4.749931812286377, + "ce_orig": 0.5768422484397888, + "epoch": 0.3054137608742541, + "kl_loss": 0.31295904517173767, + "loss_ib": 0.007879522629082203, + "step": 1062 + }, + { + "ce_ib": 6.151778697967529, + "ce_orig": 0.9215263724327087, + "epoch": 0.3054137608742541, + "kl_loss": 0.26666373014450073, + "loss_ib": 0.00881841592490673, + "step": 1062 + }, + { + "ce_ib": 7.430096626281738, + "ce_orig": 0.7150664925575256, + "epoch": 0.3054137608742541, + "kl_loss": 0.21466025710105896, + "loss_ib": 0.009576699696481228, + "step": 1062 + }, + { + "ce_ib": 10.958990097045898, + "ce_orig": 1.4779627323150635, + "epoch": 0.3054137608742541, + "kl_loss": 0.5556790232658386, + "loss_ib": 0.01651577837765217, + "step": 1062 + }, + { + "ce_ib": 5.780200004577637, + "ce_orig": 0.5452198386192322, + "epoch": 0.3057013444532317, + "kl_loss": 0.27294105291366577, + "loss_ib": 0.008509610779583454, + "step": 1063 + }, + { + "ce_ib": 5.6268839836120605, + "ce_orig": 0.4185033440589905, + "epoch": 0.3057013444532317, + "kl_loss": 0.22825220227241516, + "loss_ib": 0.00790940597653389, + "step": 1063 + }, + { + "ce_ib": 7.02670431137085, + "ce_orig": 0.8597890734672546, + "epoch": 0.3057013444532317, + "kl_loss": 0.19926466047763824, + "loss_ib": 0.00901935063302517, + "step": 1063 + }, + { + "ce_ib": 6.201532363891602, + "ce_orig": 0.6214030385017395, + "epoch": 0.3057013444532317, + "kl_loss": 0.17251111567020416, + "loss_ib": 0.007926642894744873, + "step": 1063 + }, + { + "ce_ib": 8.757668495178223, + "ce_orig": 0.7274843454360962, + "epoch": 0.30598892803220934, + "kl_loss": 0.2544369101524353, + "loss_ib": 0.011302037164568901, + "step": 1064 + }, + { + "ce_ib": 6.019333362579346, + "ce_orig": 0.53138267993927, + "epoch": 0.30598892803220934, + "kl_loss": 0.2464289516210556, + "loss_ib": 0.008483623154461384, + "step": 1064 + }, + { + "ce_ib": 11.079992294311523, + "ce_orig": 1.1950740814208984, + "epoch": 0.30598892803220934, + "kl_loss": 0.2509814202785492, + "loss_ib": 0.01358980592340231, + "step": 1064 + }, + { + "ce_ib": 5.924220085144043, + "ce_orig": 0.6483764052391052, + "epoch": 0.30598892803220934, + "kl_loss": 0.20502346754074097, + "loss_ib": 0.007974454201757908, + "step": 1064 + }, + { + "epoch": 0.306276511611187, + "grad_norm": 0.09530378878116608, + "learning_rate": 9.86687652724313e-06, + "loss": 0.8737, + "step": 1065 + }, + { + "ce_ib": 8.690861701965332, + "ce_orig": 0.8475526571273804, + "epoch": 0.306276511611187, + "kl_loss": 0.35495179891586304, + "loss_ib": 0.012240380048751831, + "step": 1065 + }, + { + "ce_ib": 8.537470817565918, + "ce_orig": 1.1267801523208618, + "epoch": 0.306276511611187, + "kl_loss": 0.20950856804847717, + "loss_ib": 0.010632556863129139, + "step": 1065 + }, + { + "ce_ib": 6.342406749725342, + "ce_orig": 0.5704479813575745, + "epoch": 0.306276511611187, + "kl_loss": 0.24826784431934357, + "loss_ib": 0.008825085125863552, + "step": 1065 + }, + { + "ce_ib": 9.24067211151123, + "ce_orig": 0.8120705485343933, + "epoch": 0.306276511611187, + "kl_loss": 0.287699818611145, + "loss_ib": 0.012117668986320496, + "step": 1065 + }, + { + "ce_ib": 5.934770107269287, + "ce_orig": 0.538998544216156, + "epoch": 0.30656409519016464, + "kl_loss": 0.21416616439819336, + "loss_ib": 0.008076431229710579, + "step": 1066 + }, + { + "ce_ib": 11.81325912475586, + "ce_orig": 2.007389783859253, + "epoch": 0.30656409519016464, + "kl_loss": 0.33129292726516724, + "loss_ib": 0.015126187354326248, + "step": 1066 + }, + { + "ce_ib": 6.053503513336182, + "ce_orig": 0.6393458843231201, + "epoch": 0.30656409519016464, + "kl_loss": 0.30956026911735535, + "loss_ib": 0.009149106219410896, + "step": 1066 + }, + { + "ce_ib": 6.03159236907959, + "ce_orig": 0.9210621118545532, + "epoch": 0.30656409519016464, + "kl_loss": 0.23238983750343323, + "loss_ib": 0.008355490863323212, + "step": 1066 + }, + { + "ce_ib": 8.668764114379883, + "ce_orig": 0.8384073376655579, + "epoch": 0.30685167876914227, + "kl_loss": 0.30814939737319946, + "loss_ib": 0.011750257574021816, + "step": 1067 + }, + { + "ce_ib": 6.8552374839782715, + "ce_orig": 0.7925436496734619, + "epoch": 0.30685167876914227, + "kl_loss": 0.22319209575653076, + "loss_ib": 0.009087158367037773, + "step": 1067 + }, + { + "ce_ib": 11.231310844421387, + "ce_orig": 1.3585479259490967, + "epoch": 0.30685167876914227, + "kl_loss": 0.2911950349807739, + "loss_ib": 0.014143262058496475, + "step": 1067 + }, + { + "ce_ib": 8.62575912475586, + "ce_orig": 1.0265886783599854, + "epoch": 0.30685167876914227, + "kl_loss": 0.3105778992176056, + "loss_ib": 0.01173153892159462, + "step": 1067 + }, + { + "ce_ib": 7.522292137145996, + "ce_orig": 0.8019500374794006, + "epoch": 0.30713926234811995, + "kl_loss": 0.4065636992454529, + "loss_ib": 0.011587929911911488, + "step": 1068 + }, + { + "ce_ib": 8.296854972839355, + "ce_orig": 0.5768155455589294, + "epoch": 0.30713926234811995, + "kl_loss": 0.35958027839660645, + "loss_ib": 0.011892656795680523, + "step": 1068 + }, + { + "ce_ib": 4.952488899230957, + "ce_orig": 0.6868072748184204, + "epoch": 0.30713926234811995, + "kl_loss": 0.21061554551124573, + "loss_ib": 0.007058644201606512, + "step": 1068 + }, + { + "ce_ib": 7.091208457946777, + "ce_orig": 1.1439430713653564, + "epoch": 0.30713926234811995, + "kl_loss": 0.2813361585140228, + "loss_ib": 0.009904569946229458, + "step": 1068 + }, + { + "ce_ib": 9.432252883911133, + "ce_orig": 1.3456711769104004, + "epoch": 0.30742684592709757, + "kl_loss": 0.38078808784484863, + "loss_ib": 0.013240134343504906, + "step": 1069 + }, + { + "ce_ib": 7.201338291168213, + "ce_orig": 1.095965027809143, + "epoch": 0.30742684592709757, + "kl_loss": 0.32409659028053284, + "loss_ib": 0.01044230442494154, + "step": 1069 + }, + { + "ce_ib": 7.40519905090332, + "ce_orig": 1.0274244546890259, + "epoch": 0.30742684592709757, + "kl_loss": 0.25500303506851196, + "loss_ib": 0.009955229237675667, + "step": 1069 + }, + { + "ce_ib": 8.46408462524414, + "ce_orig": 0.7477757930755615, + "epoch": 0.30742684592709757, + "kl_loss": 0.24867352843284607, + "loss_ib": 0.01095082052052021, + "step": 1069 + }, + { + "epoch": 0.3077144295060752, + "grad_norm": 0.11164247989654541, + "learning_rate": 9.865091744778281e-06, + "loss": 0.9093, + "step": 1070 + }, + { + "ce_ib": 8.997332572937012, + "ce_orig": 1.2727254629135132, + "epoch": 0.3077144295060752, + "kl_loss": 0.28486955165863037, + "loss_ib": 0.011846027337014675, + "step": 1070 + }, + { + "ce_ib": 8.73849868774414, + "ce_orig": 0.8974448442459106, + "epoch": 0.3077144295060752, + "kl_loss": 0.28219351172447205, + "loss_ib": 0.01156043354421854, + "step": 1070 + }, + { + "ce_ib": 6.766753196716309, + "ce_orig": 0.8781928420066833, + "epoch": 0.3077144295060752, + "kl_loss": 0.2647661864757538, + "loss_ib": 0.009414414875209332, + "step": 1070 + }, + { + "ce_ib": 7.04093074798584, + "ce_orig": 0.9725215435028076, + "epoch": 0.3077144295060752, + "kl_loss": 0.22904753684997559, + "loss_ib": 0.009331406094133854, + "step": 1070 + }, + { + "ce_ib": 5.76390266418457, + "ce_orig": 0.3865777850151062, + "epoch": 0.3080020130850528, + "kl_loss": 0.41168057918548584, + "loss_ib": 0.009880708530545235, + "step": 1071 + }, + { + "ce_ib": 7.124619007110596, + "ce_orig": 1.183470368385315, + "epoch": 0.3080020130850528, + "kl_loss": 0.25442296266555786, + "loss_ib": 0.009668848477303982, + "step": 1071 + }, + { + "ce_ib": 8.085482597351074, + "ce_orig": 1.3335832357406616, + "epoch": 0.3080020130850528, + "kl_loss": 0.25079238414764404, + "loss_ib": 0.010593406856060028, + "step": 1071 + }, + { + "ce_ib": 5.582581043243408, + "ce_orig": 0.6214055418968201, + "epoch": 0.3080020130850528, + "kl_loss": 0.32632315158843994, + "loss_ib": 0.008845812641084194, + "step": 1071 + }, + { + "ce_ib": 9.020593643188477, + "ce_orig": 1.0598443746566772, + "epoch": 0.3082895966640305, + "kl_loss": 0.2390134483575821, + "loss_ib": 0.011410728096961975, + "step": 1072 + }, + { + "ce_ib": 4.355950355529785, + "ce_orig": 0.5563782453536987, + "epoch": 0.3082895966640305, + "kl_loss": 0.44233155250549316, + "loss_ib": 0.00877926591783762, + "step": 1072 + }, + { + "ce_ib": 5.475048542022705, + "ce_orig": 0.8107210397720337, + "epoch": 0.3082895966640305, + "kl_loss": 0.29146504402160645, + "loss_ib": 0.008389698341488838, + "step": 1072 + }, + { + "ce_ib": 9.553082466125488, + "ce_orig": 1.393612265586853, + "epoch": 0.3082895966640305, + "kl_loss": 0.47753843665122986, + "loss_ib": 0.014328466728329659, + "step": 1072 + }, + { + "ce_ib": 4.682051658630371, + "ce_orig": 0.6672455072402954, + "epoch": 0.3085771802430081, + "kl_loss": 0.23742368817329407, + "loss_ib": 0.007056288421154022, + "step": 1073 + }, + { + "ce_ib": 9.294081687927246, + "ce_orig": 1.250988245010376, + "epoch": 0.3085771802430081, + "kl_loss": 0.24257344007492065, + "loss_ib": 0.01171981543302536, + "step": 1073 + }, + { + "ce_ib": 6.823581695556641, + "ce_orig": 0.5856737494468689, + "epoch": 0.3085771802430081, + "kl_loss": 0.4416767358779907, + "loss_ib": 0.011240348219871521, + "step": 1073 + }, + { + "ce_ib": 8.78899097442627, + "ce_orig": 1.3411270380020142, + "epoch": 0.3085771802430081, + "kl_loss": 0.1991458237171173, + "loss_ib": 0.010780449025332928, + "step": 1073 + }, + { + "ce_ib": 10.130610466003418, + "ce_orig": 1.481398105621338, + "epoch": 0.30886476382198574, + "kl_loss": 0.21280843019485474, + "loss_ib": 0.01225869357585907, + "step": 1074 + }, + { + "ce_ib": 6.745814323425293, + "ce_orig": 0.6256406903266907, + "epoch": 0.30886476382198574, + "kl_loss": 0.21133030951023102, + "loss_ib": 0.008859117515385151, + "step": 1074 + }, + { + "ce_ib": 4.382914066314697, + "ce_orig": 0.5915647149085999, + "epoch": 0.30886476382198574, + "kl_loss": 0.2159290462732315, + "loss_ib": 0.006542204413563013, + "step": 1074 + }, + { + "ce_ib": 9.640929222106934, + "ce_orig": 0.9657080769538879, + "epoch": 0.30886476382198574, + "kl_loss": 0.27724915742874146, + "loss_ib": 0.012413420714437962, + "step": 1074 + }, + { + "epoch": 0.3091523474009634, + "grad_norm": 0.09821418672800064, + "learning_rate": 9.863295241178207e-06, + "loss": 0.9336, + "step": 1075 + }, + { + "ce_ib": 10.311457633972168, + "ce_orig": 1.1121739149093628, + "epoch": 0.3091523474009634, + "kl_loss": 0.23806434869766235, + "loss_ib": 0.012692100368440151, + "step": 1075 + }, + { + "ce_ib": 5.269136428833008, + "ce_orig": 0.8820784091949463, + "epoch": 0.3091523474009634, + "kl_loss": 0.22836144268512726, + "loss_ib": 0.007552750408649445, + "step": 1075 + }, + { + "ce_ib": 10.771013259887695, + "ce_orig": 1.2149564027786255, + "epoch": 0.3091523474009634, + "kl_loss": 0.2552693784236908, + "loss_ib": 0.0133237075060606, + "step": 1075 + }, + { + "ce_ib": 4.205135822296143, + "ce_orig": 0.5866997241973877, + "epoch": 0.3091523474009634, + "kl_loss": 0.2929501533508301, + "loss_ib": 0.007134637795388699, + "step": 1075 + }, + { + "ce_ib": 4.309512615203857, + "ce_orig": 0.6440024375915527, + "epoch": 0.30943993097994105, + "kl_loss": 0.19797396659851074, + "loss_ib": 0.006289252080023289, + "step": 1076 + }, + { + "ce_ib": 9.057791709899902, + "ce_orig": 0.7737462520599365, + "epoch": 0.30943993097994105, + "kl_loss": 0.23361043632030487, + "loss_ib": 0.01139389630407095, + "step": 1076 + }, + { + "ce_ib": 3.5322015285491943, + "ce_orig": 0.4126538336277008, + "epoch": 0.30943993097994105, + "kl_loss": 0.18980346620082855, + "loss_ib": 0.005430236458778381, + "step": 1076 + }, + { + "ce_ib": 7.820254325866699, + "ce_orig": 1.1803672313690186, + "epoch": 0.30943993097994105, + "kl_loss": 0.5156011581420898, + "loss_ib": 0.012976265512406826, + "step": 1076 + }, + { + "ce_ib": 10.2060546875, + "ce_orig": 1.340649127960205, + "epoch": 0.30972751455891867, + "kl_loss": 0.6236756443977356, + "loss_ib": 0.016442811116576195, + "step": 1077 + }, + { + "ce_ib": 6.512198448181152, + "ce_orig": 0.6726751923561096, + "epoch": 0.30972751455891867, + "kl_loss": 0.22547681629657745, + "loss_ib": 0.008766965940594673, + "step": 1077 + }, + { + "ce_ib": 7.2397565841674805, + "ce_orig": 1.0542415380477905, + "epoch": 0.30972751455891867, + "kl_loss": 0.3293522000312805, + "loss_ib": 0.010533277876675129, + "step": 1077 + }, + { + "ce_ib": 4.1594157218933105, + "ce_orig": 0.7226054072380066, + "epoch": 0.30972751455891867, + "kl_loss": 0.16181252896785736, + "loss_ib": 0.0057775406166911125, + "step": 1077 + }, + { + "ce_ib": 7.226589679718018, + "ce_orig": 0.8086729049682617, + "epoch": 0.31001509813789635, + "kl_loss": 0.29907116293907166, + "loss_ib": 0.010217301547527313, + "step": 1078 + }, + { + "ce_ib": 5.652985572814941, + "ce_orig": 0.8123196959495544, + "epoch": 0.31001509813789635, + "kl_loss": 0.21832415461540222, + "loss_ib": 0.007836227305233479, + "step": 1078 + }, + { + "ce_ib": 5.190840721130371, + "ce_orig": 0.6470297574996948, + "epoch": 0.31001509813789635, + "kl_loss": 0.379181444644928, + "loss_ib": 0.008982655592262745, + "step": 1078 + }, + { + "ce_ib": 8.699675559997559, + "ce_orig": 0.8381361961364746, + "epoch": 0.31001509813789635, + "kl_loss": 0.18951405584812164, + "loss_ib": 0.010594815947115421, + "step": 1078 + }, + { + "ce_ib": 6.517348289489746, + "ce_orig": 0.3661119043827057, + "epoch": 0.310302681716874, + "kl_loss": 0.2933904826641083, + "loss_ib": 0.009451253339648247, + "step": 1079 + }, + { + "ce_ib": 7.789237022399902, + "ce_orig": 0.814723014831543, + "epoch": 0.310302681716874, + "kl_loss": 0.4003676772117615, + "loss_ib": 0.01179291307926178, + "step": 1079 + }, + { + "ce_ib": 5.38702392578125, + "ce_orig": 0.5761942267417908, + "epoch": 0.310302681716874, + "kl_loss": 0.3325355648994446, + "loss_ib": 0.008712380193173885, + "step": 1079 + }, + { + "ce_ib": 7.839290142059326, + "ce_orig": 0.7794978618621826, + "epoch": 0.310302681716874, + "kl_loss": 0.2755085229873657, + "loss_ib": 0.010594374500215054, + "step": 1079 + }, + { + "epoch": 0.3105902652958516, + "grad_norm": 0.09332282096147537, + "learning_rate": 9.861487020771103e-06, + "loss": 0.8683, + "step": 1080 + }, + { + "ce_ib": 7.493288516998291, + "ce_orig": 0.9413217902183533, + "epoch": 0.3105902652958516, + "kl_loss": 0.2304474264383316, + "loss_ib": 0.009797762148082256, + "step": 1080 + }, + { + "ce_ib": 5.598116397857666, + "ce_orig": 1.187414526939392, + "epoch": 0.3105902652958516, + "kl_loss": 0.26155588030815125, + "loss_ib": 0.008213674649596214, + "step": 1080 + }, + { + "ce_ib": 9.935769081115723, + "ce_orig": 0.690135657787323, + "epoch": 0.3105902652958516, + "kl_loss": 0.3921072781085968, + "loss_ib": 0.013856842182576656, + "step": 1080 + }, + { + "ce_ib": 7.49739408493042, + "ce_orig": 0.7895194888114929, + "epoch": 0.3105902652958516, + "kl_loss": 0.1883796751499176, + "loss_ib": 0.009381189942359924, + "step": 1080 + }, + { + "ce_ib": 2.7070467472076416, + "ce_orig": 0.19051028788089752, + "epoch": 0.3108778488748292, + "kl_loss": 0.7031688690185547, + "loss_ib": 0.009738734923303127, + "step": 1081 + }, + { + "ce_ib": 10.460716247558594, + "ce_orig": 0.8958859443664551, + "epoch": 0.3108778488748292, + "kl_loss": 0.2543383240699768, + "loss_ib": 0.013004099950194359, + "step": 1081 + }, + { + "ce_ib": 8.759541511535645, + "ce_orig": 0.8319806456565857, + "epoch": 0.3108778488748292, + "kl_loss": 0.2823876738548279, + "loss_ib": 0.01158341858536005, + "step": 1081 + }, + { + "ce_ib": 3.7187533378601074, + "ce_orig": 0.3965437114238739, + "epoch": 0.3108778488748292, + "kl_loss": 0.3348379135131836, + "loss_ib": 0.0070671322755515575, + "step": 1081 + }, + { + "ce_ib": 11.48133659362793, + "ce_orig": 0.8450151681900024, + "epoch": 0.3111654324538069, + "kl_loss": 0.2467035949230194, + "loss_ib": 0.013948372565209866, + "step": 1082 + }, + { + "ce_ib": 8.043100357055664, + "ce_orig": 1.2448742389678955, + "epoch": 0.3111654324538069, + "kl_loss": 0.23143544793128967, + "loss_ib": 0.010357454419136047, + "step": 1082 + }, + { + "ce_ib": 7.008208751678467, + "ce_orig": 0.859933614730835, + "epoch": 0.3111654324538069, + "kl_loss": 0.24393969774246216, + "loss_ib": 0.009447605349123478, + "step": 1082 + }, + { + "ce_ib": 8.620953559875488, + "ce_orig": 1.273354172706604, + "epoch": 0.3111654324538069, + "kl_loss": 0.33981454372406006, + "loss_ib": 0.012019098736345768, + "step": 1082 + }, + { + "ce_ib": 9.286676406860352, + "ce_orig": 1.2248402833938599, + "epoch": 0.3114530160327845, + "kl_loss": 0.24009215831756592, + "loss_ib": 0.011687598191201687, + "step": 1083 + }, + { + "ce_ib": 4.905697345733643, + "ce_orig": 0.5274173617362976, + "epoch": 0.3114530160327845, + "kl_loss": 0.2764824330806732, + "loss_ib": 0.0076705217361450195, + "step": 1083 + }, + { + "ce_ib": 7.331008434295654, + "ce_orig": 0.8421371579170227, + "epoch": 0.3114530160327845, + "kl_loss": 0.2732947766780853, + "loss_ib": 0.010063955560326576, + "step": 1083 + }, + { + "ce_ib": 5.719943046569824, + "ce_orig": 0.579391360282898, + "epoch": 0.3114530160327845, + "kl_loss": 0.3374432325363159, + "loss_ib": 0.009094375185668468, + "step": 1083 + }, + { + "ce_ib": 9.131670951843262, + "ce_orig": 1.3303252458572388, + "epoch": 0.31174059961176215, + "kl_loss": 0.33134493231773376, + "loss_ib": 0.012445120140910149, + "step": 1084 + }, + { + "ce_ib": 6.945981502532959, + "ce_orig": 1.103769063949585, + "epoch": 0.31174059961176215, + "kl_loss": 0.3207002580165863, + "loss_ib": 0.010152983479201794, + "step": 1084 + }, + { + "ce_ib": 6.838366508483887, + "ce_orig": 0.8066115379333496, + "epoch": 0.31174059961176215, + "kl_loss": 0.27075129747390747, + "loss_ib": 0.009545879438519478, + "step": 1084 + }, + { + "ce_ib": 12.763402938842773, + "ce_orig": 1.7998559474945068, + "epoch": 0.31174059961176215, + "kl_loss": 0.4378839135169983, + "loss_ib": 0.017142243683338165, + "step": 1084 + }, + { + "epoch": 0.3120281831907398, + "grad_norm": 0.12011191248893738, + "learning_rate": 9.85966708791339e-06, + "loss": 0.9255, + "step": 1085 + }, + { + "ce_ib": 6.058638572692871, + "ce_orig": 0.7367510199546814, + "epoch": 0.3120281831907398, + "kl_loss": 0.23104149103164673, + "loss_ib": 0.008369053713977337, + "step": 1085 + }, + { + "ce_ib": 4.6562724113464355, + "ce_orig": 0.5617725253105164, + "epoch": 0.3120281831907398, + "kl_loss": 0.203491672873497, + "loss_ib": 0.006691189482808113, + "step": 1085 + }, + { + "ce_ib": 7.416159152984619, + "ce_orig": 0.6727170348167419, + "epoch": 0.3120281831907398, + "kl_loss": 0.34073999524116516, + "loss_ib": 0.010823559947311878, + "step": 1085 + }, + { + "ce_ib": 5.643037796020508, + "ce_orig": 0.6726337671279907, + "epoch": 0.3120281831907398, + "kl_loss": 0.1980753391981125, + "loss_ib": 0.007623791694641113, + "step": 1085 + }, + { + "ce_ib": 6.168734073638916, + "ce_orig": 0.7488775849342346, + "epoch": 0.31231576676971745, + "kl_loss": 0.21899104118347168, + "loss_ib": 0.00835864432156086, + "step": 1086 + }, + { + "ce_ib": 9.138340950012207, + "ce_orig": 1.072446584701538, + "epoch": 0.31231576676971745, + "kl_loss": 0.2767692804336548, + "loss_ib": 0.011906033381819725, + "step": 1086 + }, + { + "ce_ib": 5.069515228271484, + "ce_orig": 0.5102606415748596, + "epoch": 0.31231576676971745, + "kl_loss": 0.30164480209350586, + "loss_ib": 0.008085963316261768, + "step": 1086 + }, + { + "ce_ib": 8.540528297424316, + "ce_orig": 1.3709053993225098, + "epoch": 0.31231576676971745, + "kl_loss": 0.2561246156692505, + "loss_ib": 0.011101774871349335, + "step": 1086 + }, + { + "ce_ib": 4.596883773803711, + "ce_orig": 0.5804041028022766, + "epoch": 0.3126033503486951, + "kl_loss": 0.20311373472213745, + "loss_ib": 0.006628020666539669, + "step": 1087 + }, + { + "ce_ib": 4.821422576904297, + "ce_orig": 0.7049044370651245, + "epoch": 0.3126033503486951, + "kl_loss": 0.2860710918903351, + "loss_ib": 0.007682133931666613, + "step": 1087 + }, + { + "ce_ib": 9.637624740600586, + "ce_orig": 1.2415283918380737, + "epoch": 0.3126033503486951, + "kl_loss": 0.5572246313095093, + "loss_ib": 0.015209870412945747, + "step": 1087 + }, + { + "ce_ib": 5.758790969848633, + "ce_orig": 0.865833044052124, + "epoch": 0.3126033503486951, + "kl_loss": 0.17076636850833893, + "loss_ib": 0.007466454524546862, + "step": 1087 + }, + { + "ce_ib": 5.545116901397705, + "ce_orig": 0.7929593920707703, + "epoch": 0.31289093392767275, + "kl_loss": 0.29858559370040894, + "loss_ib": 0.008530973456799984, + "step": 1088 + }, + { + "ce_ib": 7.397669315338135, + "ce_orig": 1.049553394317627, + "epoch": 0.31289093392767275, + "kl_loss": 0.3252887427806854, + "loss_ib": 0.010650557465851307, + "step": 1088 + }, + { + "ce_ib": 9.861873626708984, + "ce_orig": 1.445408821105957, + "epoch": 0.31289093392767275, + "kl_loss": 0.43143945932388306, + "loss_ib": 0.014176268130540848, + "step": 1088 + }, + { + "ce_ib": 7.751357555389404, + "ce_orig": 0.9703396558761597, + "epoch": 0.31289093392767275, + "kl_loss": 0.27120447158813477, + "loss_ib": 0.010463401675224304, + "step": 1088 + }, + { + "ce_ib": 9.846639633178711, + "ce_orig": 1.2402608394622803, + "epoch": 0.3131785175066504, + "kl_loss": 0.30426251888275146, + "loss_ib": 0.012889264151453972, + "step": 1089 + }, + { + "ce_ib": 4.732132911682129, + "ce_orig": 0.8199735879898071, + "epoch": 0.3131785175066504, + "kl_loss": 0.22151115536689758, + "loss_ib": 0.006947244051843882, + "step": 1089 + }, + { + "ce_ib": 7.251793384552002, + "ce_orig": 0.8341598510742188, + "epoch": 0.3131785175066504, + "kl_loss": 0.26620736718177795, + "loss_ib": 0.009913867339491844, + "step": 1089 + }, + { + "ce_ib": 9.6569242477417, + "ce_orig": 0.8773269057273865, + "epoch": 0.3131785175066504, + "kl_loss": 0.35142725706100464, + "loss_ib": 0.013171196915209293, + "step": 1089 + }, + { + "epoch": 0.313466101085628, + "grad_norm": 0.11186288297176361, + "learning_rate": 9.857835446989708e-06, + "loss": 0.9513, + "step": 1090 + }, + { + "ce_ib": 7.215015888214111, + "ce_orig": 0.9098507761955261, + "epoch": 0.313466101085628, + "kl_loss": 0.3162091076374054, + "loss_ib": 0.010377106256783009, + "step": 1090 + }, + { + "ce_ib": 4.774172306060791, + "ce_orig": 0.4562654495239258, + "epoch": 0.313466101085628, + "kl_loss": 0.5272875428199768, + "loss_ib": 0.010047046467661858, + "step": 1090 + }, + { + "ce_ib": 7.998179912567139, + "ce_orig": 0.6721808910369873, + "epoch": 0.313466101085628, + "kl_loss": 0.2237975001335144, + "loss_ib": 0.010236154310405254, + "step": 1090 + }, + { + "ce_ib": 4.404753684997559, + "ce_orig": 0.8174905180931091, + "epoch": 0.313466101085628, + "kl_loss": 0.17927923798561096, + "loss_ib": 0.0061975461430847645, + "step": 1090 + }, + { + "ce_ib": 5.590545654296875, + "ce_orig": 0.63374263048172, + "epoch": 0.3137536846646056, + "kl_loss": 0.22596238553524017, + "loss_ib": 0.007850169204175472, + "step": 1091 + }, + { + "ce_ib": 5.190075397491455, + "ce_orig": 0.49746814370155334, + "epoch": 0.3137536846646056, + "kl_loss": 0.5168180465698242, + "loss_ib": 0.010358256287872791, + "step": 1091 + }, + { + "ce_ib": 7.648189544677734, + "ce_orig": 0.8305013179779053, + "epoch": 0.3137536846646056, + "kl_loss": 0.2619606554508209, + "loss_ib": 0.010267795994877815, + "step": 1091 + }, + { + "ce_ib": 2.5894863605499268, + "ce_orig": 0.4961947202682495, + "epoch": 0.3137536846646056, + "kl_loss": 0.16488581895828247, + "loss_ib": 0.004238344728946686, + "step": 1091 + }, + { + "ce_ib": 6.8501973152160645, + "ce_orig": 0.9551085233688354, + "epoch": 0.3140412682435833, + "kl_loss": 0.22183682024478912, + "loss_ib": 0.00906856544315815, + "step": 1092 + }, + { + "ce_ib": 9.278849601745605, + "ce_orig": 1.0578449964523315, + "epoch": 0.3140412682435833, + "kl_loss": 0.46093541383743286, + "loss_ib": 0.013888203538954258, + "step": 1092 + }, + { + "ce_ib": 10.20980453491211, + "ce_orig": 1.130508303642273, + "epoch": 0.3140412682435833, + "kl_loss": 0.26771920919418335, + "loss_ib": 0.012886996380984783, + "step": 1092 + }, + { + "ce_ib": 7.884620666503906, + "ce_orig": 1.2247728109359741, + "epoch": 0.3140412682435833, + "kl_loss": 0.5485724210739136, + "loss_ib": 0.013370344415307045, + "step": 1092 + }, + { + "ce_ib": 4.926917552947998, + "ce_orig": 0.620137631893158, + "epoch": 0.31432885182256093, + "kl_loss": 0.23593732714653015, + "loss_ib": 0.007286291103810072, + "step": 1093 + }, + { + "ce_ib": 6.589530944824219, + "ce_orig": 0.5658002495765686, + "epoch": 0.31432885182256093, + "kl_loss": 0.36354243755340576, + "loss_ib": 0.010224955156445503, + "step": 1093 + }, + { + "ce_ib": 7.028726100921631, + "ce_orig": 0.9482094049453735, + "epoch": 0.31432885182256093, + "kl_loss": 0.250606894493103, + "loss_ib": 0.009534794837236404, + "step": 1093 + }, + { + "ce_ib": 7.295685291290283, + "ce_orig": 0.8431223630905151, + "epoch": 0.31432885182256093, + "kl_loss": 0.8136886358261108, + "loss_ib": 0.015432571992278099, + "step": 1093 + }, + { + "ce_ib": 5.327012062072754, + "ce_orig": 0.7496179342269897, + "epoch": 0.31461643540153855, + "kl_loss": 0.32254457473754883, + "loss_ib": 0.008552457205951214, + "step": 1094 + }, + { + "ce_ib": 8.12031364440918, + "ce_orig": 0.6261737942695618, + "epoch": 0.31461643540153855, + "kl_loss": 0.2707287669181824, + "loss_ib": 0.010827600955963135, + "step": 1094 + }, + { + "ce_ib": 7.939435005187988, + "ce_orig": 0.72379070520401, + "epoch": 0.31461643540153855, + "kl_loss": 0.24098311364650726, + "loss_ib": 0.010349266231060028, + "step": 1094 + }, + { + "ce_ib": 5.902799606323242, + "ce_orig": 0.5440672636032104, + "epoch": 0.31461643540153855, + "kl_loss": 0.24652087688446045, + "loss_ib": 0.008368008770048618, + "step": 1094 + }, + { + "epoch": 0.31490401898051623, + "grad_norm": 0.09559078514575958, + "learning_rate": 9.855992102412909e-06, + "loss": 0.8071, + "step": 1095 + }, + { + "ce_ib": 8.018738746643066, + "ce_orig": 0.6169609427452087, + "epoch": 0.31490401898051623, + "kl_loss": 0.3646816313266754, + "loss_ib": 0.011665554717183113, + "step": 1095 + }, + { + "ce_ib": 5.268819808959961, + "ce_orig": 0.1188381165266037, + "epoch": 0.31490401898051623, + "kl_loss": 0.5084589123725891, + "loss_ib": 0.010353408753871918, + "step": 1095 + }, + { + "ce_ib": 6.112328052520752, + "ce_orig": 0.7286979556083679, + "epoch": 0.31490401898051623, + "kl_loss": 0.22665318846702576, + "loss_ib": 0.008378859609365463, + "step": 1095 + }, + { + "ce_ib": 9.162174224853516, + "ce_orig": 0.981454074382782, + "epoch": 0.31490401898051623, + "kl_loss": 0.23977544903755188, + "loss_ib": 0.011559928767383099, + "step": 1095 + }, + { + "ce_ib": 9.974848747253418, + "ce_orig": 1.2838889360427856, + "epoch": 0.31519160255949386, + "kl_loss": 0.3775405287742615, + "loss_ib": 0.01375025324523449, + "step": 1096 + }, + { + "ce_ib": 5.4543867111206055, + "ce_orig": 0.8588042855262756, + "epoch": 0.31519160255949386, + "kl_loss": 0.23723624646663666, + "loss_ib": 0.007826749235391617, + "step": 1096 + }, + { + "ce_ib": 6.729191303253174, + "ce_orig": 0.7287084460258484, + "epoch": 0.31519160255949386, + "kl_loss": 0.2046607881784439, + "loss_ib": 0.008775799535214901, + "step": 1096 + }, + { + "ce_ib": 6.898683071136475, + "ce_orig": 0.9344533681869507, + "epoch": 0.31519160255949386, + "kl_loss": 0.31471872329711914, + "loss_ib": 0.010045870207250118, + "step": 1096 + }, + { + "ce_ib": 3.085794687271118, + "ce_orig": 0.6212059855461121, + "epoch": 0.3154791861384715, + "kl_loss": 0.1684914082288742, + "loss_ib": 0.0047707087360322475, + "step": 1097 + }, + { + "ce_ib": 7.249536037445068, + "ce_orig": 0.6098146438598633, + "epoch": 0.3154791861384715, + "kl_loss": 0.4129192531108856, + "loss_ib": 0.011378727853298187, + "step": 1097 + }, + { + "ce_ib": 10.709492683410645, + "ce_orig": 0.8577325940132141, + "epoch": 0.3154791861384715, + "kl_loss": 0.2508341670036316, + "loss_ib": 0.013217834755778313, + "step": 1097 + }, + { + "ce_ib": 3.9720232486724854, + "ce_orig": 0.6190991401672363, + "epoch": 0.3154791861384715, + "kl_loss": 0.18957431614398956, + "loss_ib": 0.005867766682058573, + "step": 1097 + }, + { + "ce_ib": 8.379387855529785, + "ce_orig": 1.019614577293396, + "epoch": 0.31576676971744916, + "kl_loss": 0.2486770749092102, + "loss_ib": 0.010866157710552216, + "step": 1098 + }, + { + "ce_ib": 7.849251747131348, + "ce_orig": 0.6944756507873535, + "epoch": 0.31576676971744916, + "kl_loss": 0.19975972175598145, + "loss_ib": 0.00984684843569994, + "step": 1098 + }, + { + "ce_ib": 6.0830583572387695, + "ce_orig": 0.9229554533958435, + "epoch": 0.31576676971744916, + "kl_loss": 0.28290021419525146, + "loss_ib": 0.008912060409784317, + "step": 1098 + }, + { + "ce_ib": 8.36382007598877, + "ce_orig": 1.2436766624450684, + "epoch": 0.31576676971744916, + "kl_loss": 0.2294701784849167, + "loss_ib": 0.010658521205186844, + "step": 1098 + }, + { + "ce_ib": 4.324337005615234, + "ce_orig": 0.6799556612968445, + "epoch": 0.3160543532964268, + "kl_loss": 0.1960705667734146, + "loss_ib": 0.006285042501986027, + "step": 1099 + }, + { + "ce_ib": 3.3304200172424316, + "ce_orig": 0.5313477516174316, + "epoch": 0.3160543532964268, + "kl_loss": 0.22542990744113922, + "loss_ib": 0.0055847191251814365, + "step": 1099 + }, + { + "ce_ib": 4.986284255981445, + "ce_orig": 0.6891649961471558, + "epoch": 0.3160543532964268, + "kl_loss": 0.25679337978363037, + "loss_ib": 0.007554218173027039, + "step": 1099 + }, + { + "ce_ib": 4.57914924621582, + "ce_orig": 0.6530379056930542, + "epoch": 0.3160543532964268, + "kl_loss": 0.20343467593193054, + "loss_ib": 0.006613495759665966, + "step": 1099 + }, + { + "epoch": 0.3163419368754044, + "grad_norm": 0.11441560834646225, + "learning_rate": 9.854137058624034e-06, + "loss": 0.8445, + "step": 1100 + }, + { + "ce_ib": 2.812058687210083, + "ce_orig": 0.530636727809906, + "epoch": 0.3163419368754044, + "kl_loss": 0.19126400351524353, + "loss_ib": 0.004724698606878519, + "step": 1100 + }, + { + "ce_ib": 8.779197692871094, + "ce_orig": 1.1562453508377075, + "epoch": 0.3163419368754044, + "kl_loss": 0.23617590963840485, + "loss_ib": 0.011140956543385983, + "step": 1100 + }, + { + "ce_ib": 10.066039085388184, + "ce_orig": 1.1768686771392822, + "epoch": 0.3163419368754044, + "kl_loss": 0.28456875681877136, + "loss_ib": 0.012911726720631123, + "step": 1100 + }, + { + "ce_ib": 5.156683921813965, + "ce_orig": 0.7831324934959412, + "epoch": 0.3163419368754044, + "kl_loss": 0.201686292886734, + "loss_ib": 0.007173546589910984, + "step": 1100 + }, + { + "ce_ib": 3.357862949371338, + "ce_orig": 0.1526433229446411, + "epoch": 0.31662952045438203, + "kl_loss": 0.7000162601470947, + "loss_ib": 0.010358026251196861, + "step": 1101 + }, + { + "ce_ib": 7.347134113311768, + "ce_orig": 0.8936167359352112, + "epoch": 0.31662952045438203, + "kl_loss": 0.18082134425640106, + "loss_ib": 0.00915534794330597, + "step": 1101 + }, + { + "ce_ib": 10.29253101348877, + "ce_orig": 1.4773753881454468, + "epoch": 0.31662952045438203, + "kl_loss": 0.5856887102127075, + "loss_ib": 0.01614941842854023, + "step": 1101 + }, + { + "ce_ib": 5.112944602966309, + "ce_orig": 0.491621732711792, + "epoch": 0.31662952045438203, + "kl_loss": 0.22965355217456818, + "loss_ib": 0.007409479934722185, + "step": 1101 + }, + { + "ce_ib": 6.190317630767822, + "ce_orig": 0.6080651879310608, + "epoch": 0.3169171040333597, + "kl_loss": 0.2565647065639496, + "loss_ib": 0.008755965158343315, + "step": 1102 + }, + { + "ce_ib": 7.12357234954834, + "ce_orig": 0.5342175364494324, + "epoch": 0.3169171040333597, + "kl_loss": 0.3891940116882324, + "loss_ib": 0.011015512980520725, + "step": 1102 + }, + { + "ce_ib": 4.89890193939209, + "ce_orig": 0.8590795397758484, + "epoch": 0.3169171040333597, + "kl_loss": 0.17115063965320587, + "loss_ib": 0.006610408425331116, + "step": 1102 + }, + { + "ce_ib": 5.244142055511475, + "ce_orig": 0.5240667462348938, + "epoch": 0.3169171040333597, + "kl_loss": 0.2943491041660309, + "loss_ib": 0.008187633939087391, + "step": 1102 + }, + { + "ce_ib": 9.725130081176758, + "ce_orig": 1.2593644857406616, + "epoch": 0.31720468761233733, + "kl_loss": 0.27966809272766113, + "loss_ib": 0.01252180989831686, + "step": 1103 + }, + { + "ce_ib": 6.8854193687438965, + "ce_orig": 1.081408143043518, + "epoch": 0.31720468761233733, + "kl_loss": 0.39314359426498413, + "loss_ib": 0.010816855356097221, + "step": 1103 + }, + { + "ce_ib": 5.625035285949707, + "ce_orig": 0.7493113279342651, + "epoch": 0.31720468761233733, + "kl_loss": 0.29417410492897034, + "loss_ib": 0.008566776290535927, + "step": 1103 + }, + { + "ce_ib": 8.13229751586914, + "ce_orig": 0.7472993731498718, + "epoch": 0.31720468761233733, + "kl_loss": 0.2875515818595886, + "loss_ib": 0.011007812805473804, + "step": 1103 + }, + { + "ce_ib": 4.085546016693115, + "ce_orig": 0.7198461890220642, + "epoch": 0.31749227119131496, + "kl_loss": 0.1820926070213318, + "loss_ib": 0.005906471982598305, + "step": 1104 + }, + { + "ce_ib": 7.235878944396973, + "ce_orig": 1.0650726556777954, + "epoch": 0.31749227119131496, + "kl_loss": 0.28940922021865845, + "loss_ib": 0.01012997142970562, + "step": 1104 + }, + { + "ce_ib": 7.105355262756348, + "ce_orig": 0.8803567290306091, + "epoch": 0.31749227119131496, + "kl_loss": 0.19339075684547424, + "loss_ib": 0.009039262309670448, + "step": 1104 + }, + { + "ce_ib": 5.405515670776367, + "ce_orig": 0.5333710312843323, + "epoch": 0.31749227119131496, + "kl_loss": 0.2288927584886551, + "loss_ib": 0.007694443222135305, + "step": 1104 + }, + { + "epoch": 0.31777985477029264, + "grad_norm": 0.13750189542770386, + "learning_rate": 9.852270320092314e-06, + "loss": 0.842, + "step": 1105 + }, + { + "ce_ib": 6.282751083374023, + "ce_orig": 0.6561731696128845, + "epoch": 0.31777985477029264, + "kl_loss": 0.3129974603652954, + "loss_ib": 0.009412725456058979, + "step": 1105 + }, + { + "ce_ib": 9.112653732299805, + "ce_orig": 1.2813491821289062, + "epoch": 0.31777985477029264, + "kl_loss": 0.2854575514793396, + "loss_ib": 0.011967229656875134, + "step": 1105 + }, + { + "ce_ib": 6.1287760734558105, + "ce_orig": 0.5941926836967468, + "epoch": 0.31777985477029264, + "kl_loss": 0.26773563027381897, + "loss_ib": 0.008806131780147552, + "step": 1105 + }, + { + "ce_ib": 4.9001922607421875, + "ce_orig": 0.613892674446106, + "epoch": 0.31777985477029264, + "kl_loss": 0.2149442881345749, + "loss_ib": 0.007049635052680969, + "step": 1105 + }, + { + "ce_ib": 6.514833450317383, + "ce_orig": 0.7088329792022705, + "epoch": 0.31806743834927026, + "kl_loss": 0.22958248853683472, + "loss_ib": 0.008810658007860184, + "step": 1106 + }, + { + "ce_ib": 4.464747905731201, + "ce_orig": 0.49244388937950134, + "epoch": 0.31806743834927026, + "kl_loss": 0.2644960582256317, + "loss_ib": 0.007109708618372679, + "step": 1106 + }, + { + "ce_ib": 5.996800422668457, + "ce_orig": 0.8356698751449585, + "epoch": 0.31806743834927026, + "kl_loss": 0.2915058135986328, + "loss_ib": 0.008911858312785625, + "step": 1106 + }, + { + "ce_ib": 10.251335144042969, + "ce_orig": 1.1799588203430176, + "epoch": 0.31806743834927026, + "kl_loss": 0.26157453656196594, + "loss_ib": 0.012867080047726631, + "step": 1106 + }, + { + "ce_ib": 7.458197593688965, + "ce_orig": 0.9880890250205994, + "epoch": 0.3183550219282479, + "kl_loss": 0.21888777613639832, + "loss_ib": 0.009647075086832047, + "step": 1107 + }, + { + "ce_ib": 7.327599048614502, + "ce_orig": 0.7127462029457092, + "epoch": 0.3183550219282479, + "kl_loss": 0.43115466833114624, + "loss_ib": 0.011639145202934742, + "step": 1107 + }, + { + "ce_ib": 11.34225845336914, + "ce_orig": 1.4238355159759521, + "epoch": 0.3183550219282479, + "kl_loss": 0.2260439693927765, + "loss_ib": 0.013602697290480137, + "step": 1107 + }, + { + "ce_ib": 4.567384719848633, + "ce_orig": 0.5381410121917725, + "epoch": 0.3183550219282479, + "kl_loss": 0.24306389689445496, + "loss_ib": 0.006998023949563503, + "step": 1107 + }, + { + "ce_ib": 2.3374364376068115, + "ce_orig": 0.1576087772846222, + "epoch": 0.31864260550722556, + "kl_loss": 0.4729865789413452, + "loss_ib": 0.007067302241921425, + "step": 1108 + }, + { + "ce_ib": 6.564968109130859, + "ce_orig": 0.8243353962898254, + "epoch": 0.31864260550722556, + "kl_loss": 0.29320281744003296, + "loss_ib": 0.00949699617922306, + "step": 1108 + }, + { + "ce_ib": 6.024631977081299, + "ce_orig": 0.8786768913269043, + "epoch": 0.31864260550722556, + "kl_loss": 0.2908465564250946, + "loss_ib": 0.008933097124099731, + "step": 1108 + }, + { + "ce_ib": 6.480597496032715, + "ce_orig": 1.0345938205718994, + "epoch": 0.31864260550722556, + "kl_loss": 0.24593515694141388, + "loss_ib": 0.008939948864281178, + "step": 1108 + }, + { + "ce_ib": 4.5928215980529785, + "ce_orig": 0.6721344590187073, + "epoch": 0.3189301890862032, + "kl_loss": 0.29296231269836426, + "loss_ib": 0.00752244470641017, + "step": 1109 + }, + { + "ce_ib": 6.1651997566223145, + "ce_orig": 0.9395220279693604, + "epoch": 0.3189301890862032, + "kl_loss": 0.2568630278110504, + "loss_ib": 0.008733830414712429, + "step": 1109 + }, + { + "ce_ib": 7.659371376037598, + "ce_orig": 0.46118995547294617, + "epoch": 0.3189301890862032, + "kl_loss": 0.38257402181625366, + "loss_ib": 0.011485111899673939, + "step": 1109 + }, + { + "ce_ib": 9.976858139038086, + "ce_orig": 1.3371727466583252, + "epoch": 0.3189301890862032, + "kl_loss": 0.1954931914806366, + "loss_ib": 0.01193179003894329, + "step": 1109 + }, + { + "epoch": 0.3192177726651808, + "grad_norm": 0.10743506252765656, + "learning_rate": 9.850391891315159e-06, + "loss": 0.8003, + "step": 1110 + }, + { + "ce_ib": 6.971478462219238, + "ce_orig": 0.638785719871521, + "epoch": 0.3192177726651808, + "kl_loss": 0.24314042925834656, + "loss_ib": 0.009402883239090443, + "step": 1110 + }, + { + "ce_ib": 7.48801326751709, + "ce_orig": 1.115414023399353, + "epoch": 0.3192177726651808, + "kl_loss": 0.34067392349243164, + "loss_ib": 0.010894752107560635, + "step": 1110 + }, + { + "ce_ib": 7.332833766937256, + "ce_orig": 1.0408788919448853, + "epoch": 0.3192177726651808, + "kl_loss": 0.2122744917869568, + "loss_ib": 0.009455578401684761, + "step": 1110 + }, + { + "ce_ib": 4.309993743896484, + "ce_orig": 0.6265511512756348, + "epoch": 0.3192177726651808, + "kl_loss": 0.27435722947120667, + "loss_ib": 0.007053565699607134, + "step": 1110 + }, + { + "ce_ib": 8.549775123596191, + "ce_orig": 1.175057053565979, + "epoch": 0.31950535624415843, + "kl_loss": 0.34124380350112915, + "loss_ib": 0.011962213553488255, + "step": 1111 + }, + { + "ce_ib": 4.665210723876953, + "ce_orig": 0.4430766999721527, + "epoch": 0.31950535624415843, + "kl_loss": 0.5751094222068787, + "loss_ib": 0.010416304692626, + "step": 1111 + }, + { + "ce_ib": 5.320335865020752, + "ce_orig": 0.8363803029060364, + "epoch": 0.31950535624415843, + "kl_loss": 0.15228267014026642, + "loss_ib": 0.006843162700533867, + "step": 1111 + }, + { + "ce_ib": 5.226586818695068, + "ce_orig": 0.6585968732833862, + "epoch": 0.31950535624415843, + "kl_loss": 0.26359066367149353, + "loss_ib": 0.007862493395805359, + "step": 1111 + }, + { + "ce_ib": 5.659263610839844, + "ce_orig": 0.9022344946861267, + "epoch": 0.3197929398231361, + "kl_loss": 0.23027633130550385, + "loss_ib": 0.007962026633322239, + "step": 1112 + }, + { + "ce_ib": 8.274489402770996, + "ce_orig": 0.9125567078590393, + "epoch": 0.3197929398231361, + "kl_loss": 0.37815025448799133, + "loss_ib": 0.012055991217494011, + "step": 1112 + }, + { + "ce_ib": 5.287826061248779, + "ce_orig": 0.6700226068496704, + "epoch": 0.3197929398231361, + "kl_loss": 0.2153065800666809, + "loss_ib": 0.007440891582518816, + "step": 1112 + }, + { + "ce_ib": 4.901998043060303, + "ce_orig": 0.5990259647369385, + "epoch": 0.3197929398231361, + "kl_loss": 0.22657959163188934, + "loss_ib": 0.007167793810367584, + "step": 1112 + }, + { + "ce_ib": 8.574362754821777, + "ce_orig": 1.1461323499679565, + "epoch": 0.32008052340211374, + "kl_loss": 0.20302176475524902, + "loss_ib": 0.010604580864310265, + "step": 1113 + }, + { + "ce_ib": 5.339359760284424, + "ce_orig": 0.6689932346343994, + "epoch": 0.32008052340211374, + "kl_loss": 0.173078715801239, + "loss_ib": 0.0070701465010643005, + "step": 1113 + }, + { + "ce_ib": 7.654620170593262, + "ce_orig": 1.0080437660217285, + "epoch": 0.32008052340211374, + "kl_loss": 0.26648497581481934, + "loss_ib": 0.01031946949660778, + "step": 1113 + }, + { + "ce_ib": 9.265800476074219, + "ce_orig": 1.3707072734832764, + "epoch": 0.32008052340211374, + "kl_loss": 0.32689064741134644, + "loss_ib": 0.01253470592200756, + "step": 1113 + }, + { + "ce_ib": 6.342159271240234, + "ce_orig": 0.67596435546875, + "epoch": 0.32036810698109136, + "kl_loss": 0.21211153268814087, + "loss_ib": 0.00846327468752861, + "step": 1114 + }, + { + "ce_ib": 8.386610984802246, + "ce_orig": 0.9587525129318237, + "epoch": 0.32036810698109136, + "kl_loss": 0.24733799695968628, + "loss_ib": 0.010859991423785686, + "step": 1114 + }, + { + "ce_ib": 6.137772560119629, + "ce_orig": 0.9148625135421753, + "epoch": 0.32036810698109136, + "kl_loss": 0.30503973364830017, + "loss_ib": 0.009188170544803143, + "step": 1114 + }, + { + "ce_ib": 5.591522216796875, + "ce_orig": 0.4235896170139313, + "epoch": 0.32036810698109136, + "kl_loss": 0.2647779881954193, + "loss_ib": 0.008239302784204483, + "step": 1114 + }, + { + "epoch": 0.32065569056006904, + "grad_norm": 0.10749529302120209, + "learning_rate": 9.848501776818138e-06, + "loss": 0.8231, + "step": 1115 + }, + { + "ce_ib": 10.10469913482666, + "ce_orig": 1.2156010866165161, + "epoch": 0.32065569056006904, + "kl_loss": 0.2942560911178589, + "loss_ib": 0.01304725930094719, + "step": 1115 + }, + { + "ce_ib": 7.5524091720581055, + "ce_orig": 1.165973424911499, + "epoch": 0.32065569056006904, + "kl_loss": 0.2608107924461365, + "loss_ib": 0.010160517878830433, + "step": 1115 + }, + { + "ce_ib": 6.978625297546387, + "ce_orig": 0.7588456869125366, + "epoch": 0.32065569056006904, + "kl_loss": 0.20529861748218536, + "loss_ib": 0.009031611494719982, + "step": 1115 + }, + { + "ce_ib": 6.989262104034424, + "ce_orig": 0.7142454981803894, + "epoch": 0.32065569056006904, + "kl_loss": 0.30295610427856445, + "loss_ib": 0.010018822737038136, + "step": 1115 + }, + { + "ce_ib": 3.487807273864746, + "ce_orig": 0.3319603204727173, + "epoch": 0.32094327413904666, + "kl_loss": 0.5826542377471924, + "loss_ib": 0.009314349852502346, + "step": 1116 + }, + { + "ce_ib": 7.845709800720215, + "ce_orig": 0.8958043456077576, + "epoch": 0.32094327413904666, + "kl_loss": 0.3864516019821167, + "loss_ib": 0.01171022653579712, + "step": 1116 + }, + { + "ce_ib": 2.6743617057800293, + "ce_orig": 0.3000844717025757, + "epoch": 0.32094327413904666, + "kl_loss": 0.5113818645477295, + "loss_ib": 0.007788179907947779, + "step": 1116 + }, + { + "ce_ib": 8.463080406188965, + "ce_orig": 1.0114843845367432, + "epoch": 0.32094327413904666, + "kl_loss": 0.3062596321105957, + "loss_ib": 0.011525675654411316, + "step": 1116 + }, + { + "ce_ib": 10.33855152130127, + "ce_orig": 1.2473762035369873, + "epoch": 0.3212308577180243, + "kl_loss": 0.2699921131134033, + "loss_ib": 0.013038473203778267, + "step": 1117 + }, + { + "ce_ib": 8.67563533782959, + "ce_orig": 0.9891132116317749, + "epoch": 0.3212308577180243, + "kl_loss": 0.24578243494033813, + "loss_ib": 0.011133459396660328, + "step": 1117 + }, + { + "ce_ib": 11.412935256958008, + "ce_orig": 1.6994497776031494, + "epoch": 0.3212308577180243, + "kl_loss": 0.3418155312538147, + "loss_ib": 0.014831089414656162, + "step": 1117 + }, + { + "ce_ib": 6.820316314697266, + "ce_orig": 0.7683108448982239, + "epoch": 0.3212308577180243, + "kl_loss": 0.2374948263168335, + "loss_ib": 0.009195264428853989, + "step": 1117 + }, + { + "ce_ib": 5.1326704025268555, + "ce_orig": 0.7094582915306091, + "epoch": 0.32151844129700197, + "kl_loss": 0.2784407436847687, + "loss_ib": 0.007917077280580997, + "step": 1118 + }, + { + "ce_ib": 7.020430564880371, + "ce_orig": 1.007041573524475, + "epoch": 0.32151844129700197, + "kl_loss": 0.3052269518375397, + "loss_ib": 0.010072699747979641, + "step": 1118 + }, + { + "ce_ib": 3.6181588172912598, + "ce_orig": 0.5216957330703735, + "epoch": 0.32151844129700197, + "kl_loss": 0.23030348122119904, + "loss_ib": 0.005921193864196539, + "step": 1118 + }, + { + "ce_ib": 6.98055362701416, + "ce_orig": 0.91546231508255, + "epoch": 0.32151844129700197, + "kl_loss": 0.2503894567489624, + "loss_ib": 0.009484448470175266, + "step": 1118 + }, + { + "ce_ib": 3.671968460083008, + "ce_orig": 0.419208824634552, + "epoch": 0.3218060248759796, + "kl_loss": 0.24047990143299103, + "loss_ib": 0.006076767109334469, + "step": 1119 + }, + { + "ce_ib": 5.895604133605957, + "ce_orig": 0.5340924263000488, + "epoch": 0.3218060248759796, + "kl_loss": 0.18886855244636536, + "loss_ib": 0.007784290239214897, + "step": 1119 + }, + { + "ce_ib": 6.686251640319824, + "ce_orig": 0.8506937623023987, + "epoch": 0.3218060248759796, + "kl_loss": 0.42726773023605347, + "loss_ib": 0.010958928614854813, + "step": 1119 + }, + { + "ce_ib": 5.3213419914245605, + "ce_orig": 0.582292377948761, + "epoch": 0.3218060248759796, + "kl_loss": 0.21478994190692902, + "loss_ib": 0.0074692415073513985, + "step": 1119 + }, + { + "epoch": 0.3220936084549572, + "grad_norm": 0.09495838731527328, + "learning_rate": 9.846599981154975e-06, + "loss": 0.8629, + "step": 1120 + }, + { + "ce_ib": 6.199802398681641, + "ce_orig": 0.8364900946617126, + "epoch": 0.3220936084549572, + "kl_loss": 0.2115131914615631, + "loss_ib": 0.008314934559166431, + "step": 1120 + }, + { + "ce_ib": 7.682260990142822, + "ce_orig": 0.6661112308502197, + "epoch": 0.3220936084549572, + "kl_loss": 0.33651497960090637, + "loss_ib": 0.011047410778701305, + "step": 1120 + }, + { + "ce_ib": 7.0127973556518555, + "ce_orig": 0.47509559988975525, + "epoch": 0.3220936084549572, + "kl_loss": 0.4513690173625946, + "loss_ib": 0.01152648776769638, + "step": 1120 + }, + { + "ce_ib": 5.214129447937012, + "ce_orig": 0.7018793225288391, + "epoch": 0.3220936084549572, + "kl_loss": 0.20314499735832214, + "loss_ib": 0.007245579734444618, + "step": 1120 + }, + { + "ce_ib": 2.6834769248962402, + "ce_orig": 0.33916646242141724, + "epoch": 0.32238119203393484, + "kl_loss": 0.20696307718753815, + "loss_ib": 0.004753108136355877, + "step": 1121 + }, + { + "ce_ib": 11.433638572692871, + "ce_orig": 1.6915712356567383, + "epoch": 0.32238119203393484, + "kl_loss": 0.30615732073783875, + "loss_ib": 0.014495211653411388, + "step": 1121 + }, + { + "ce_ib": 7.008380889892578, + "ce_orig": 0.6508950591087341, + "epoch": 0.32238119203393484, + "kl_loss": 0.2695773243904114, + "loss_ib": 0.00970415398478508, + "step": 1121 + }, + { + "ce_ib": 10.525337219238281, + "ce_orig": 1.1632081270217896, + "epoch": 0.32238119203393484, + "kl_loss": 0.3140749931335449, + "loss_ib": 0.013666086830198765, + "step": 1121 + }, + { + "ce_ib": 9.778067588806152, + "ce_orig": 1.3605836629867554, + "epoch": 0.3226687756129125, + "kl_loss": 0.2360476404428482, + "loss_ib": 0.012138543650507927, + "step": 1122 + }, + { + "ce_ib": 5.721891403198242, + "ce_orig": 0.6278418898582458, + "epoch": 0.3226687756129125, + "kl_loss": 0.18863216042518616, + "loss_ib": 0.00760821346193552, + "step": 1122 + }, + { + "ce_ib": 6.584217071533203, + "ce_orig": 0.7220848202705383, + "epoch": 0.3226687756129125, + "kl_loss": 0.27102869749069214, + "loss_ib": 0.00929450336843729, + "step": 1122 + }, + { + "ce_ib": 9.604668617248535, + "ce_orig": 1.3348008394241333, + "epoch": 0.3226687756129125, + "kl_loss": 0.32848864793777466, + "loss_ib": 0.012889553792774677, + "step": 1122 + }, + { + "ce_ib": 10.836069107055664, + "ce_orig": 1.4283229112625122, + "epoch": 0.32295635919189014, + "kl_loss": 0.3214641809463501, + "loss_ib": 0.014050710946321487, + "step": 1123 + }, + { + "ce_ib": 5.0625319480896, + "ce_orig": 0.5947402715682983, + "epoch": 0.32295635919189014, + "kl_loss": 0.2923838496208191, + "loss_ib": 0.007986370474100113, + "step": 1123 + }, + { + "ce_ib": 6.186778545379639, + "ce_orig": 1.0432881116867065, + "epoch": 0.32295635919189014, + "kl_loss": 0.2616155445575714, + "loss_ib": 0.008802933618426323, + "step": 1123 + }, + { + "ce_ib": 11.60007381439209, + "ce_orig": 1.7871153354644775, + "epoch": 0.32295635919189014, + "kl_loss": 0.3969075679779053, + "loss_ib": 0.01556914858520031, + "step": 1123 + }, + { + "ce_ib": 9.874221801757812, + "ce_orig": 1.1525869369506836, + "epoch": 0.32324394277086776, + "kl_loss": 0.2839212119579315, + "loss_ib": 0.012713433243334293, + "step": 1124 + }, + { + "ce_ib": 5.750823497772217, + "ce_orig": 0.8648343682289124, + "epoch": 0.32324394277086776, + "kl_loss": 0.18375760316848755, + "loss_ib": 0.007588399574160576, + "step": 1124 + }, + { + "ce_ib": 7.892747402191162, + "ce_orig": 0.6954336166381836, + "epoch": 0.32324394277086776, + "kl_loss": 0.3389386236667633, + "loss_ib": 0.011282133869826794, + "step": 1124 + }, + { + "ce_ib": 9.0963716506958, + "ce_orig": 0.9955480694770813, + "epoch": 0.32324394277086776, + "kl_loss": 0.28071296215057373, + "loss_ib": 0.011903500184416771, + "step": 1124 + }, + { + "epoch": 0.32353152634984544, + "grad_norm": 0.09425008296966553, + "learning_rate": 9.844686508907538e-06, + "loss": 0.8663, + "step": 1125 + }, + { + "ce_ib": 12.10210132598877, + "ce_orig": 1.6567598581314087, + "epoch": 0.32353152634984544, + "kl_loss": 0.20576657354831696, + "loss_ib": 0.01415976695716381, + "step": 1125 + }, + { + "ce_ib": 8.69025707244873, + "ce_orig": 0.7126254439353943, + "epoch": 0.32353152634984544, + "kl_loss": 0.33204948902130127, + "loss_ib": 0.01201075129210949, + "step": 1125 + }, + { + "ce_ib": 6.837287902832031, + "ce_orig": 0.9816555380821228, + "epoch": 0.32353152634984544, + "kl_loss": 0.2468874454498291, + "loss_ib": 0.009306162595748901, + "step": 1125 + }, + { + "ce_ib": 6.7292327880859375, + "ce_orig": 0.8677306175231934, + "epoch": 0.32353152634984544, + "kl_loss": 0.23569883406162262, + "loss_ib": 0.009086220525205135, + "step": 1125 + }, + { + "ce_ib": 9.347533226013184, + "ce_orig": 0.4040294289588928, + "epoch": 0.32381910992882307, + "kl_loss": 0.34666794538497925, + "loss_ib": 0.012814212590456009, + "step": 1126 + }, + { + "ce_ib": 7.4951019287109375, + "ce_orig": 1.36309814453125, + "epoch": 0.32381910992882307, + "kl_loss": 0.18759815394878387, + "loss_ib": 0.009371084161102772, + "step": 1126 + }, + { + "ce_ib": 4.983643054962158, + "ce_orig": 0.8482396602630615, + "epoch": 0.32381910992882307, + "kl_loss": 0.25121212005615234, + "loss_ib": 0.007495764177292585, + "step": 1126 + }, + { + "ce_ib": 10.230554580688477, + "ce_orig": 0.7575033903121948, + "epoch": 0.32381910992882307, + "kl_loss": 0.27114659547805786, + "loss_ib": 0.012942020781338215, + "step": 1126 + }, + { + "ce_ib": 8.284119606018066, + "ce_orig": 1.0574978590011597, + "epoch": 0.3241066935078007, + "kl_loss": 0.23789556324481964, + "loss_ib": 0.010663075372576714, + "step": 1127 + }, + { + "ce_ib": 6.288346767425537, + "ce_orig": 0.9875720143318176, + "epoch": 0.3241066935078007, + "kl_loss": 0.26568907499313354, + "loss_ib": 0.008945236913859844, + "step": 1127 + }, + { + "ce_ib": 5.09885835647583, + "ce_orig": 0.7185342311859131, + "epoch": 0.3241066935078007, + "kl_loss": 0.5336880087852478, + "loss_ib": 0.0104357386007905, + "step": 1127 + }, + { + "ce_ib": 3.9797940254211426, + "ce_orig": 0.4946213364601135, + "epoch": 0.3241066935078007, + "kl_loss": 0.5142374634742737, + "loss_ib": 0.009122168645262718, + "step": 1127 + }, + { + "ce_ib": 6.337353229522705, + "ce_orig": 0.668466329574585, + "epoch": 0.32439427708677837, + "kl_loss": 0.4345847964286804, + "loss_ib": 0.010683201253414154, + "step": 1128 + }, + { + "ce_ib": 4.80925178527832, + "ce_orig": 1.2135826349258423, + "epoch": 0.32439427708677837, + "kl_loss": 0.21446409821510315, + "loss_ib": 0.0069538927637040615, + "step": 1128 + }, + { + "ce_ib": 6.434673309326172, + "ce_orig": 1.0011742115020752, + "epoch": 0.32439427708677837, + "kl_loss": 0.23707206547260284, + "loss_ib": 0.008805394172668457, + "step": 1128 + }, + { + "ce_ib": 8.776188850402832, + "ce_orig": 1.1622161865234375, + "epoch": 0.32439427708677837, + "kl_loss": 0.21508213877677917, + "loss_ib": 0.010927010327577591, + "step": 1128 + }, + { + "ce_ib": 8.308552742004395, + "ce_orig": 1.4075417518615723, + "epoch": 0.324681860665756, + "kl_loss": 0.22292867302894592, + "loss_ib": 0.010537839494645596, + "step": 1129 + }, + { + "ce_ib": 5.862061977386475, + "ce_orig": 0.9006170034408569, + "epoch": 0.324681860665756, + "kl_loss": 0.2912452518939972, + "loss_ib": 0.008774514310061932, + "step": 1129 + }, + { + "ce_ib": 10.914876937866211, + "ce_orig": 1.1613266468048096, + "epoch": 0.324681860665756, + "kl_loss": 0.23145024478435516, + "loss_ib": 0.013229379430413246, + "step": 1129 + }, + { + "ce_ib": 8.344647407531738, + "ce_orig": 1.1508418321609497, + "epoch": 0.324681860665756, + "kl_loss": 0.25917115807533264, + "loss_ib": 0.010936358943581581, + "step": 1129 + }, + { + "epoch": 0.3249694442447336, + "grad_norm": 0.11727182567119598, + "learning_rate": 9.842761364685824e-06, + "loss": 0.898, + "step": 1130 + }, + { + "ce_ib": 7.812705039978027, + "ce_orig": 1.451427698135376, + "epoch": 0.3249694442447336, + "kl_loss": 0.19738689064979553, + "loss_ib": 0.009786573238670826, + "step": 1130 + }, + { + "ce_ib": 6.450815200805664, + "ce_orig": 0.8914653658866882, + "epoch": 0.3249694442447336, + "kl_loss": 0.2358463555574417, + "loss_ib": 0.008809278719127178, + "step": 1130 + }, + { + "ce_ib": 6.206367015838623, + "ce_orig": 0.7019004225730896, + "epoch": 0.3249694442447336, + "kl_loss": 0.3190705180168152, + "loss_ib": 0.009397071786224842, + "step": 1130 + }, + { + "ce_ib": 6.046915531158447, + "ce_orig": 0.7179989814758301, + "epoch": 0.3249694442447336, + "kl_loss": 0.29096171259880066, + "loss_ib": 0.00895653199404478, + "step": 1130 + }, + { + "ce_ib": 7.675392150878906, + "ce_orig": 0.7474125623703003, + "epoch": 0.32525702782371124, + "kl_loss": 0.2970944941043854, + "loss_ib": 0.01064633671194315, + "step": 1131 + }, + { + "ce_ib": 5.643684387207031, + "ce_orig": 0.7740488648414612, + "epoch": 0.32525702782371124, + "kl_loss": 0.2655462920665741, + "loss_ib": 0.00829914677888155, + "step": 1131 + }, + { + "ce_ib": 12.916472434997559, + "ce_orig": 2.1709859371185303, + "epoch": 0.32525702782371124, + "kl_loss": 0.23599407076835632, + "loss_ib": 0.015276413410902023, + "step": 1131 + }, + { + "ce_ib": 6.033138751983643, + "ce_orig": 0.5232858657836914, + "epoch": 0.32525702782371124, + "kl_loss": 0.413301020860672, + "loss_ib": 0.010166148655116558, + "step": 1131 + }, + { + "ce_ib": 5.810317039489746, + "ce_orig": 0.42475029826164246, + "epoch": 0.3255446114026889, + "kl_loss": 0.3846255838871002, + "loss_ib": 0.00965657364577055, + "step": 1132 + }, + { + "ce_ib": 8.389314651489258, + "ce_orig": 0.6634515523910522, + "epoch": 0.3255446114026889, + "kl_loss": 0.3044917583465576, + "loss_ib": 0.01143423281610012, + "step": 1132 + }, + { + "ce_ib": 8.147542953491211, + "ce_orig": 1.1206897497177124, + "epoch": 0.3255446114026889, + "kl_loss": 0.6051585674285889, + "loss_ib": 0.014199127443134785, + "step": 1132 + }, + { + "ce_ib": 4.850771903991699, + "ce_orig": 0.9383435845375061, + "epoch": 0.3255446114026889, + "kl_loss": 0.16845840215682983, + "loss_ib": 0.006535355933010578, + "step": 1132 + }, + { + "ce_ib": 4.84213399887085, + "ce_orig": 0.5435336232185364, + "epoch": 0.32583219498166655, + "kl_loss": 0.2434813678264618, + "loss_ib": 0.007276947144418955, + "step": 1133 + }, + { + "ce_ib": 7.962569713592529, + "ce_orig": 0.961725115776062, + "epoch": 0.32583219498166655, + "kl_loss": 0.24803465604782104, + "loss_ib": 0.010442916303873062, + "step": 1133 + }, + { + "ce_ib": 5.276309967041016, + "ce_orig": 0.6765012145042419, + "epoch": 0.32583219498166655, + "kl_loss": 0.36133527755737305, + "loss_ib": 0.008889662101864815, + "step": 1133 + }, + { + "ce_ib": 4.470157623291016, + "ce_orig": 0.5433730483055115, + "epoch": 0.32583219498166655, + "kl_loss": 0.27675801515579224, + "loss_ib": 0.007237737532705069, + "step": 1133 + }, + { + "ce_ib": 5.233908176422119, + "ce_orig": 0.6392762064933777, + "epoch": 0.32611977856064417, + "kl_loss": 0.220979705452919, + "loss_ib": 0.007443705108016729, + "step": 1134 + }, + { + "ce_ib": 5.830175876617432, + "ce_orig": 0.7680152058601379, + "epoch": 0.32611977856064417, + "kl_loss": 0.38658952713012695, + "loss_ib": 0.009696071036159992, + "step": 1134 + }, + { + "ce_ib": 8.762476921081543, + "ce_orig": 1.609471321105957, + "epoch": 0.32611977856064417, + "kl_loss": 0.21049867570400238, + "loss_ib": 0.010867463424801826, + "step": 1134 + }, + { + "ce_ib": 7.221193313598633, + "ce_orig": 0.8087792992591858, + "epoch": 0.32611977856064417, + "kl_loss": 0.2421259880065918, + "loss_ib": 0.00964245293289423, + "step": 1134 + }, + { + "epoch": 0.32640736213962185, + "grad_norm": 0.09609334170818329, + "learning_rate": 9.840824553127954e-06, + "loss": 0.8168, + "step": 1135 + }, + { + "ce_ib": 7.099168300628662, + "ce_orig": 0.954532265663147, + "epoch": 0.32640736213962185, + "kl_loss": 0.22126835584640503, + "loss_ib": 0.009311852045357227, + "step": 1135 + }, + { + "ce_ib": 4.000297546386719, + "ce_orig": 0.6663585305213928, + "epoch": 0.32640736213962185, + "kl_loss": 0.2963029146194458, + "loss_ib": 0.006963326595723629, + "step": 1135 + }, + { + "ce_ib": 5.186071395874023, + "ce_orig": 0.5578123331069946, + "epoch": 0.32640736213962185, + "kl_loss": 0.24658748507499695, + "loss_ib": 0.007651946507394314, + "step": 1135 + }, + { + "ce_ib": 10.776365280151367, + "ce_orig": 1.1968107223510742, + "epoch": 0.32640736213962185, + "kl_loss": 0.21162372827529907, + "loss_ib": 0.012892603874206543, + "step": 1135 + }, + { + "ce_ib": 6.071680068969727, + "ce_orig": 0.6322076320648193, + "epoch": 0.32669494571859947, + "kl_loss": 0.28067898750305176, + "loss_ib": 0.008878469467163086, + "step": 1136 + }, + { + "ce_ib": 9.11697769165039, + "ce_orig": 0.9247298240661621, + "epoch": 0.32669494571859947, + "kl_loss": 0.2652074694633484, + "loss_ib": 0.011769052594900131, + "step": 1136 + }, + { + "ce_ib": 4.236838340759277, + "ce_orig": 0.572127103805542, + "epoch": 0.32669494571859947, + "kl_loss": 0.4477131962776184, + "loss_ib": 0.008713969960808754, + "step": 1136 + }, + { + "ce_ib": 5.590993404388428, + "ce_orig": 0.7507918477058411, + "epoch": 0.32669494571859947, + "kl_loss": 0.30635079741477966, + "loss_ib": 0.008654501289129257, + "step": 1136 + }, + { + "ce_ib": 5.092818737030029, + "ce_orig": 0.6064829230308533, + "epoch": 0.3269825292975771, + "kl_loss": 0.20581455528736115, + "loss_ib": 0.007150963880121708, + "step": 1137 + }, + { + "ce_ib": 6.453564643859863, + "ce_orig": 0.9269260168075562, + "epoch": 0.3269825292975771, + "kl_loss": 0.19207924604415894, + "loss_ib": 0.008374356664717197, + "step": 1137 + }, + { + "ce_ib": 10.570686340332031, + "ce_orig": 1.2108427286148071, + "epoch": 0.3269825292975771, + "kl_loss": 0.32438749074935913, + "loss_ib": 0.013814561069011688, + "step": 1137 + }, + { + "ce_ib": 5.679614067077637, + "ce_orig": 0.6215935945510864, + "epoch": 0.3269825292975771, + "kl_loss": 0.5665749907493591, + "loss_ib": 0.011345363222062588, + "step": 1137 + }, + { + "ce_ib": 5.880247592926025, + "ce_orig": 0.7091310024261475, + "epoch": 0.3272701128765548, + "kl_loss": 0.2912711799144745, + "loss_ib": 0.008792959153652191, + "step": 1138 + }, + { + "ce_ib": 9.816313743591309, + "ce_orig": 1.1858292818069458, + "epoch": 0.3272701128765548, + "kl_loss": 0.24216127395629883, + "loss_ib": 0.01223792601376772, + "step": 1138 + }, + { + "ce_ib": 6.8238444328308105, + "ce_orig": 0.5911304354667664, + "epoch": 0.3272701128765548, + "kl_loss": 0.2992258071899414, + "loss_ib": 0.009816101752221584, + "step": 1138 + }, + { + "ce_ib": 4.645931720733643, + "ce_orig": 0.7781140804290771, + "epoch": 0.3272701128765548, + "kl_loss": 0.24800650775432587, + "loss_ib": 0.007125996984541416, + "step": 1138 + }, + { + "ce_ib": 3.9870479106903076, + "ce_orig": 0.5202873945236206, + "epoch": 0.3275576964555324, + "kl_loss": 0.22053340077400208, + "loss_ib": 0.006192381959408522, + "step": 1139 + }, + { + "ce_ib": 8.540157318115234, + "ce_orig": 1.3470656871795654, + "epoch": 0.3275576964555324, + "kl_loss": 0.23808708786964417, + "loss_ib": 0.010921028442680836, + "step": 1139 + }, + { + "ce_ib": 6.005643367767334, + "ce_orig": 0.8297461867332458, + "epoch": 0.3275576964555324, + "kl_loss": 0.22874270379543304, + "loss_ib": 0.008293070830404758, + "step": 1139 + }, + { + "ce_ib": 8.215617179870605, + "ce_orig": 0.706344485282898, + "epoch": 0.3275576964555324, + "kl_loss": 0.3254474997520447, + "loss_ib": 0.011470092460513115, + "step": 1139 + }, + { + "epoch": 0.32784528003451, + "grad_norm": 0.09755509346723557, + "learning_rate": 9.838876078900158e-06, + "loss": 0.8811, + "step": 1140 + }, + { + "ce_ib": 5.847168445587158, + "ce_orig": 1.0301125049591064, + "epoch": 0.32784528003451, + "kl_loss": 0.22780725359916687, + "loss_ib": 0.008125240914523602, + "step": 1140 + }, + { + "ce_ib": 8.922221183776855, + "ce_orig": 0.9144381880760193, + "epoch": 0.32784528003451, + "kl_loss": 0.3217501640319824, + "loss_ib": 0.01213972270488739, + "step": 1140 + }, + { + "ce_ib": 7.23486328125, + "ce_orig": 0.4056432843208313, + "epoch": 0.32784528003451, + "kl_loss": 0.3580802083015442, + "loss_ib": 0.010815665125846863, + "step": 1140 + }, + { + "ce_ib": 5.954606056213379, + "ce_orig": 0.589507520198822, + "epoch": 0.32784528003451, + "kl_loss": 0.2619907557964325, + "loss_ib": 0.008574512787163258, + "step": 1140 + }, + { + "ce_ib": 10.314091682434082, + "ce_orig": 1.3086189031600952, + "epoch": 0.32813286361348765, + "kl_loss": 0.3001616597175598, + "loss_ib": 0.013315708376467228, + "step": 1141 + }, + { + "ce_ib": 6.59683084487915, + "ce_orig": 0.7322800755500793, + "epoch": 0.32813286361348765, + "kl_loss": 0.2564740777015686, + "loss_ib": 0.009161571972072124, + "step": 1141 + }, + { + "ce_ib": 8.852384567260742, + "ce_orig": 0.7106950879096985, + "epoch": 0.32813286361348765, + "kl_loss": 0.3281547427177429, + "loss_ib": 0.012133931741118431, + "step": 1141 + }, + { + "ce_ib": 7.752597332000732, + "ce_orig": 0.7331222295761108, + "epoch": 0.32813286361348765, + "kl_loss": 0.330089807510376, + "loss_ib": 0.011053495109081268, + "step": 1141 + }, + { + "ce_ib": 5.990482807159424, + "ce_orig": 0.5036841630935669, + "epoch": 0.3284204471924653, + "kl_loss": 0.25676479935646057, + "loss_ib": 0.008558130823075771, + "step": 1142 + }, + { + "ce_ib": 5.725650310516357, + "ce_orig": 0.6308770775794983, + "epoch": 0.3284204471924653, + "kl_loss": 0.18920324742794037, + "loss_ib": 0.007617682684212923, + "step": 1142 + }, + { + "ce_ib": 7.363343715667725, + "ce_orig": 0.8220383524894714, + "epoch": 0.3284204471924653, + "kl_loss": 0.36467304825782776, + "loss_ib": 0.01101007405668497, + "step": 1142 + }, + { + "ce_ib": 7.561408996582031, + "ce_orig": 0.729387640953064, + "epoch": 0.3284204471924653, + "kl_loss": 0.1974533498287201, + "loss_ib": 0.00953594222664833, + "step": 1142 + }, + { + "ce_ib": 9.33537769317627, + "ce_orig": 0.7693027853965759, + "epoch": 0.32870803077144295, + "kl_loss": 0.3023679256439209, + "loss_ib": 0.012359056621789932, + "step": 1143 + }, + { + "ce_ib": 5.879181861877441, + "ce_orig": 0.778829038143158, + "epoch": 0.32870803077144295, + "kl_loss": 0.32206517457962036, + "loss_ib": 0.00909983366727829, + "step": 1143 + }, + { + "ce_ib": 5.382481575012207, + "ce_orig": 0.737525999546051, + "epoch": 0.32870803077144295, + "kl_loss": 0.22152158617973328, + "loss_ib": 0.007597697898745537, + "step": 1143 + }, + { + "ce_ib": 11.252842903137207, + "ce_orig": 0.9058976173400879, + "epoch": 0.32870803077144295, + "kl_loss": 0.26175910234451294, + "loss_ib": 0.01387043483555317, + "step": 1143 + }, + { + "ce_ib": 9.349008560180664, + "ce_orig": 0.7972526550292969, + "epoch": 0.3289956143504206, + "kl_loss": 0.32632461190223694, + "loss_ib": 0.012612254358828068, + "step": 1144 + }, + { + "ce_ib": 9.687057495117188, + "ce_orig": 1.1150705814361572, + "epoch": 0.3289956143504206, + "kl_loss": 0.25978612899780273, + "loss_ib": 0.012284918688237667, + "step": 1144 + }, + { + "ce_ib": 5.115502834320068, + "ce_orig": 0.3979432284832001, + "epoch": 0.3289956143504206, + "kl_loss": 0.24917109310626984, + "loss_ib": 0.0076072136871516705, + "step": 1144 + }, + { + "ce_ib": 10.403783798217773, + "ce_orig": 1.24606192111969, + "epoch": 0.3289956143504206, + "kl_loss": 0.2913515865802765, + "loss_ib": 0.013317300006747246, + "step": 1144 + }, + { + "epoch": 0.32928319792939825, + "grad_norm": 0.09816709160804749, + "learning_rate": 9.83691594669676e-06, + "loss": 0.873, + "step": 1145 + }, + { + "ce_ib": 5.447243690490723, + "ce_orig": 0.6441102027893066, + "epoch": 0.32928319792939825, + "kl_loss": 0.23241248726844788, + "loss_ib": 0.007771369069814682, + "step": 1145 + }, + { + "ce_ib": 7.5973429679870605, + "ce_orig": 0.9865646362304688, + "epoch": 0.32928319792939825, + "kl_loss": 0.38887763023376465, + "loss_ib": 0.011486119590699673, + "step": 1145 + }, + { + "ce_ib": 7.2056779861450195, + "ce_orig": 0.6080095767974854, + "epoch": 0.32928319792939825, + "kl_loss": 0.23443345725536346, + "loss_ib": 0.009550012648105621, + "step": 1145 + }, + { + "ce_ib": 9.07731819152832, + "ce_orig": 0.840085506439209, + "epoch": 0.32928319792939825, + "kl_loss": 0.39250028133392334, + "loss_ib": 0.013002321124076843, + "step": 1145 + }, + { + "ce_ib": 9.375168800354004, + "ce_orig": 1.1422781944274902, + "epoch": 0.3295707815083759, + "kl_loss": 0.2328919619321823, + "loss_ib": 0.011704088188707829, + "step": 1146 + }, + { + "ce_ib": 4.802104473114014, + "ce_orig": 0.7695512771606445, + "epoch": 0.3295707815083759, + "kl_loss": 0.27905359864234924, + "loss_ib": 0.0075926403515040874, + "step": 1146 + }, + { + "ce_ib": 6.649759292602539, + "ce_orig": 0.9065436720848083, + "epoch": 0.3295707815083759, + "kl_loss": 0.3191312253475189, + "loss_ib": 0.0098410714417696, + "step": 1146 + }, + { + "ce_ib": 8.3096923828125, + "ce_orig": 0.8790789842605591, + "epoch": 0.3295707815083759, + "kl_loss": 0.24609088897705078, + "loss_ib": 0.010770602151751518, + "step": 1146 + }, + { + "ce_ib": 6.451026439666748, + "ce_orig": 0.5358216166496277, + "epoch": 0.3298583650873535, + "kl_loss": 0.19455279409885406, + "loss_ib": 0.008396554738283157, + "step": 1147 + }, + { + "ce_ib": 9.036972999572754, + "ce_orig": 0.6360263824462891, + "epoch": 0.3298583650873535, + "kl_loss": 0.24949690699577332, + "loss_ib": 0.011531941592693329, + "step": 1147 + }, + { + "ce_ib": 7.984084606170654, + "ce_orig": 0.7079024910926819, + "epoch": 0.3298583650873535, + "kl_loss": 0.25150322914123535, + "loss_ib": 0.010499116964638233, + "step": 1147 + }, + { + "ce_ib": 5.445267200469971, + "ce_orig": 0.6818575263023376, + "epoch": 0.3298583650873535, + "kl_loss": 0.23439064621925354, + "loss_ib": 0.007789174094796181, + "step": 1147 + }, + { + "ce_ib": 5.773715019226074, + "ce_orig": 0.8410392999649048, + "epoch": 0.3301459486663312, + "kl_loss": 0.1873149275779724, + "loss_ib": 0.007646864280104637, + "step": 1148 + }, + { + "ce_ib": 8.979928016662598, + "ce_orig": 1.226332187652588, + "epoch": 0.3301459486663312, + "kl_loss": 0.30875229835510254, + "loss_ib": 0.012067451141774654, + "step": 1148 + }, + { + "ce_ib": 5.598503589630127, + "ce_orig": 0.7870617508888245, + "epoch": 0.3301459486663312, + "kl_loss": 0.2419789880514145, + "loss_ib": 0.008018293417990208, + "step": 1148 + }, + { + "ce_ib": 4.286440849304199, + "ce_orig": 0.6685412526130676, + "epoch": 0.3301459486663312, + "kl_loss": 0.19143131375312805, + "loss_ib": 0.0062007540836930275, + "step": 1148 + }, + { + "ce_ib": 5.1454854011535645, + "ce_orig": 0.4119313657283783, + "epoch": 0.3304335322453088, + "kl_loss": 0.3295081853866577, + "loss_ib": 0.008440567180514336, + "step": 1149 + }, + { + "ce_ib": 4.645988464355469, + "ce_orig": 0.6422159075737, + "epoch": 0.3304335322453088, + "kl_loss": 0.22805818915367126, + "loss_ib": 0.006926570553332567, + "step": 1149 + }, + { + "ce_ib": 4.646116733551025, + "ce_orig": 0.3140992224216461, + "epoch": 0.3304335322453088, + "kl_loss": 0.35052090883255005, + "loss_ib": 0.008151326328516006, + "step": 1149 + }, + { + "ce_ib": 8.022761344909668, + "ce_orig": 0.9764880537986755, + "epoch": 0.3304335322453088, + "kl_loss": 0.21903610229492188, + "loss_ib": 0.010213121771812439, + "step": 1149 + }, + { + "epoch": 0.3307211158242864, + "grad_norm": 0.10742151737213135, + "learning_rate": 9.834944161240172e-06, + "loss": 0.8247, + "step": 1150 + }, + { + "ce_ib": 10.445816040039062, + "ce_orig": 1.3861838579177856, + "epoch": 0.3307211158242864, + "kl_loss": 0.31801116466522217, + "loss_ib": 0.013625928200781345, + "step": 1150 + }, + { + "ce_ib": 4.385981559753418, + "ce_orig": 0.8889569044113159, + "epoch": 0.3307211158242864, + "kl_loss": 0.22078658640384674, + "loss_ib": 0.006593847181648016, + "step": 1150 + }, + { + "ce_ib": 9.175389289855957, + "ce_orig": 0.8606024384498596, + "epoch": 0.3307211158242864, + "kl_loss": 0.38910189270973206, + "loss_ib": 0.013066408224403858, + "step": 1150 + }, + { + "ce_ib": 7.696003437042236, + "ce_orig": 0.4989853501319885, + "epoch": 0.3307211158242864, + "kl_loss": 0.2723633348941803, + "loss_ib": 0.010419636964797974, + "step": 1150 + }, + { + "ce_ib": 4.9289350509643555, + "ce_orig": 0.4876880645751953, + "epoch": 0.33100869940326405, + "kl_loss": 0.2187470644712448, + "loss_ib": 0.007116405759006739, + "step": 1151 + }, + { + "ce_ib": 4.567633152008057, + "ce_orig": 0.8315699100494385, + "epoch": 0.33100869940326405, + "kl_loss": 0.26455092430114746, + "loss_ib": 0.007213142234832048, + "step": 1151 + }, + { + "ce_ib": 11.272817611694336, + "ce_orig": 1.7354942560195923, + "epoch": 0.33100869940326405, + "kl_loss": 0.3189008831977844, + "loss_ib": 0.014461826533079147, + "step": 1151 + }, + { + "ce_ib": 7.502978324890137, + "ce_orig": 1.1127325296401978, + "epoch": 0.33100869940326405, + "kl_loss": 0.27890706062316895, + "loss_ib": 0.010292048566043377, + "step": 1151 + }, + { + "ce_ib": 9.4304838180542, + "ce_orig": 1.2863335609436035, + "epoch": 0.33129628298224173, + "kl_loss": 0.36749768257141113, + "loss_ib": 0.013105461373925209, + "step": 1152 + }, + { + "ce_ib": 9.577445983886719, + "ce_orig": 0.9453591108322144, + "epoch": 0.33129628298224173, + "kl_loss": 0.2589572072029114, + "loss_ib": 0.012167016975581646, + "step": 1152 + }, + { + "ce_ib": 6.510651111602783, + "ce_orig": 0.3509739339351654, + "epoch": 0.33129628298224173, + "kl_loss": 0.2997548282146454, + "loss_ib": 0.00950819905847311, + "step": 1152 + }, + { + "ce_ib": 5.625336647033691, + "ce_orig": 0.7494199275970459, + "epoch": 0.33129628298224173, + "kl_loss": 0.24421089887619019, + "loss_ib": 0.008067445829510689, + "step": 1152 + }, + { + "ce_ib": 6.045725345611572, + "ce_orig": 0.6475803256034851, + "epoch": 0.33158386656121935, + "kl_loss": 0.2583104074001312, + "loss_ib": 0.008628829382359982, + "step": 1153 + }, + { + "ce_ib": 7.56576681137085, + "ce_orig": 0.9475224614143372, + "epoch": 0.33158386656121935, + "kl_loss": 0.2908802032470703, + "loss_ib": 0.010474569164216518, + "step": 1153 + }, + { + "ce_ib": 9.017477989196777, + "ce_orig": 1.088793158531189, + "epoch": 0.33158386656121935, + "kl_loss": 0.3477438688278198, + "loss_ib": 0.012494917027652264, + "step": 1153 + }, + { + "ce_ib": 7.708969593048096, + "ce_orig": 1.1444097757339478, + "epoch": 0.33158386656121935, + "kl_loss": 0.33121156692504883, + "loss_ib": 0.011021084152162075, + "step": 1153 + }, + { + "ce_ib": 5.410340309143066, + "ce_orig": 0.5340110063552856, + "epoch": 0.331871450140197, + "kl_loss": 0.23038868606090546, + "loss_ib": 0.007714227307587862, + "step": 1154 + }, + { + "ce_ib": 6.989197254180908, + "ce_orig": 0.6077629327774048, + "epoch": 0.331871450140197, + "kl_loss": 0.2350027859210968, + "loss_ib": 0.009339225478470325, + "step": 1154 + }, + { + "ce_ib": 8.7223539352417, + "ce_orig": 1.0650534629821777, + "epoch": 0.331871450140197, + "kl_loss": 0.3202980160713196, + "loss_ib": 0.01192533504217863, + "step": 1154 + }, + { + "ce_ib": 4.908301830291748, + "ce_orig": 0.9075806736946106, + "epoch": 0.331871450140197, + "kl_loss": 0.2366945594549179, + "loss_ib": 0.007275247480720282, + "step": 1154 + }, + { + "epoch": 0.33215903371917466, + "grad_norm": 0.08863980323076248, + "learning_rate": 9.832960727280887e-06, + "loss": 0.8609, + "step": 1155 + }, + { + "ce_ib": 5.900798320770264, + "ce_orig": 0.8526896834373474, + "epoch": 0.33215903371917466, + "kl_loss": 0.22288133203983307, + "loss_ib": 0.008129611611366272, + "step": 1155 + }, + { + "ce_ib": 4.731245994567871, + "ce_orig": 0.5797931551933289, + "epoch": 0.33215903371917466, + "kl_loss": 0.20637674629688263, + "loss_ib": 0.006795013323426247, + "step": 1155 + }, + { + "ce_ib": 7.965129852294922, + "ce_orig": 0.7734341025352478, + "epoch": 0.33215903371917466, + "kl_loss": 0.26391905546188354, + "loss_ib": 0.010604320093989372, + "step": 1155 + }, + { + "ce_ib": 5.641459941864014, + "ce_orig": 0.8688373565673828, + "epoch": 0.33215903371917466, + "kl_loss": 0.18195831775665283, + "loss_ib": 0.007461042609065771, + "step": 1155 + }, + { + "ce_ib": 8.416781425476074, + "ce_orig": 1.1019142866134644, + "epoch": 0.3324466172981523, + "kl_loss": 0.22895824909210205, + "loss_ib": 0.010706363245844841, + "step": 1156 + }, + { + "ce_ib": 7.812315940856934, + "ce_orig": 1.1109238862991333, + "epoch": 0.3324466172981523, + "kl_loss": 0.2360575646162033, + "loss_ib": 0.010172891430556774, + "step": 1156 + }, + { + "ce_ib": 6.696736812591553, + "ce_orig": 0.809617280960083, + "epoch": 0.3324466172981523, + "kl_loss": 0.23302681744098663, + "loss_ib": 0.00902700424194336, + "step": 1156 + }, + { + "ce_ib": 5.31292724609375, + "ce_orig": 0.46756190061569214, + "epoch": 0.3324466172981523, + "kl_loss": 0.23464633524417877, + "loss_ib": 0.007659390568733215, + "step": 1156 + }, + { + "ce_ib": 4.710688591003418, + "ce_orig": 0.6011915802955627, + "epoch": 0.3327342008771299, + "kl_loss": 0.6861224174499512, + "loss_ib": 0.011571912094950676, + "step": 1157 + }, + { + "ce_ib": 10.64146614074707, + "ce_orig": 1.7183725833892822, + "epoch": 0.3327342008771299, + "kl_loss": 0.2469731569290161, + "loss_ib": 0.013111197389662266, + "step": 1157 + }, + { + "ce_ib": 4.336862564086914, + "ce_orig": 0.9087672233581543, + "epoch": 0.3327342008771299, + "kl_loss": 0.1872231662273407, + "loss_ib": 0.006209094543009996, + "step": 1157 + }, + { + "ce_ib": 7.800237655639648, + "ce_orig": 0.9339820146560669, + "epoch": 0.3327342008771299, + "kl_loss": 0.2661847770214081, + "loss_ib": 0.010462084785103798, + "step": 1157 + }, + { + "ce_ib": 9.811734199523926, + "ce_orig": 1.4699519872665405, + "epoch": 0.3330217844561076, + "kl_loss": 0.26861608028411865, + "loss_ib": 0.01249789446592331, + "step": 1158 + }, + { + "ce_ib": 9.553086280822754, + "ce_orig": 1.3622437715530396, + "epoch": 0.3330217844561076, + "kl_loss": 0.2595096826553345, + "loss_ib": 0.012148181907832623, + "step": 1158 + }, + { + "ce_ib": 3.6624906063079834, + "ce_orig": 0.40558406710624695, + "epoch": 0.3330217844561076, + "kl_loss": 0.4334304630756378, + "loss_ib": 0.007996794767677784, + "step": 1158 + }, + { + "ce_ib": 7.823740482330322, + "ce_orig": 0.8662386536598206, + "epoch": 0.3330217844561076, + "kl_loss": 0.30475282669067383, + "loss_ib": 0.01087126974016428, + "step": 1158 + }, + { + "ce_ib": 6.864394664764404, + "ce_orig": 0.6482590436935425, + "epoch": 0.3333093680350852, + "kl_loss": 0.30486205220222473, + "loss_ib": 0.009913015179336071, + "step": 1159 + }, + { + "ce_ib": 10.348648071289062, + "ce_orig": 1.443572998046875, + "epoch": 0.3333093680350852, + "kl_loss": 0.33501648902893066, + "loss_ib": 0.013698812574148178, + "step": 1159 + }, + { + "ce_ib": 9.253283500671387, + "ce_orig": 1.5353633165359497, + "epoch": 0.3333093680350852, + "kl_loss": 0.3021102845668793, + "loss_ib": 0.01227438636124134, + "step": 1159 + }, + { + "ce_ib": 8.694429397583008, + "ce_orig": 0.9382469058036804, + "epoch": 0.3333093680350852, + "kl_loss": 0.2970583438873291, + "loss_ib": 0.011665012687444687, + "step": 1159 + }, + { + "epoch": 0.33359695161406283, + "grad_norm": 0.10275700688362122, + "learning_rate": 9.830965649597455e-06, + "loss": 0.935, + "step": 1160 + }, + { + "ce_ib": 5.686638832092285, + "ce_orig": 0.6911338567733765, + "epoch": 0.33359695161406283, + "kl_loss": 0.34932830929756165, + "loss_ib": 0.009179921820759773, + "step": 1160 + }, + { + "ce_ib": 6.213655948638916, + "ce_orig": 0.41276422142982483, + "epoch": 0.33359695161406283, + "kl_loss": 0.29866302013397217, + "loss_ib": 0.009200286120176315, + "step": 1160 + }, + { + "ce_ib": 6.622434616088867, + "ce_orig": 1.0098650455474854, + "epoch": 0.33359695161406283, + "kl_loss": 0.3152294158935547, + "loss_ib": 0.009774728678166866, + "step": 1160 + }, + { + "ce_ib": 5.391828536987305, + "ce_orig": 0.8483104705810547, + "epoch": 0.33359695161406283, + "kl_loss": 0.29343515634536743, + "loss_ib": 0.008326179347932339, + "step": 1160 + }, + { + "ce_ib": 3.05668044090271, + "ce_orig": 0.5592519640922546, + "epoch": 0.33388453519304045, + "kl_loss": 0.1691984087228775, + "loss_ib": 0.004748664330691099, + "step": 1161 + }, + { + "ce_ib": 6.491842746734619, + "ce_orig": 0.8682400584220886, + "epoch": 0.33388453519304045, + "kl_loss": 0.2605954706668854, + "loss_ib": 0.00909779779613018, + "step": 1161 + }, + { + "ce_ib": 4.910883903503418, + "ce_orig": 0.66831374168396, + "epoch": 0.33388453519304045, + "kl_loss": 0.3180694878101349, + "loss_ib": 0.008091578260064125, + "step": 1161 + }, + { + "ce_ib": 7.609592437744141, + "ce_orig": 1.0245137214660645, + "epoch": 0.33388453519304045, + "kl_loss": 0.42994049191474915, + "loss_ib": 0.011908996850252151, + "step": 1161 + }, + { + "ce_ib": 3.5741307735443115, + "ce_orig": 0.677931547164917, + "epoch": 0.33417211877201813, + "kl_loss": 0.2711496949195862, + "loss_ib": 0.006285627838224173, + "step": 1162 + }, + { + "ce_ib": 8.588187217712402, + "ce_orig": 0.8838889002799988, + "epoch": 0.33417211877201813, + "kl_loss": 0.34474146366119385, + "loss_ib": 0.01203560084104538, + "step": 1162 + }, + { + "ce_ib": 8.509382247924805, + "ce_orig": 1.1210061311721802, + "epoch": 0.33417211877201813, + "kl_loss": 0.22604356706142426, + "loss_ib": 0.010769817978143692, + "step": 1162 + }, + { + "ce_ib": 7.751454830169678, + "ce_orig": 0.4753890633583069, + "epoch": 0.33417211877201813, + "kl_loss": 0.25493529438972473, + "loss_ib": 0.010300807654857635, + "step": 1162 + }, + { + "ce_ib": 5.955982685089111, + "ce_orig": 0.7514666318893433, + "epoch": 0.33445970235099576, + "kl_loss": 0.2573981285095215, + "loss_ib": 0.0085299639031291, + "step": 1163 + }, + { + "ce_ib": 6.996506690979004, + "ce_orig": 1.0626541376113892, + "epoch": 0.33445970235099576, + "kl_loss": 0.159402996301651, + "loss_ib": 0.008590537123382092, + "step": 1163 + }, + { + "ce_ib": 9.42298412322998, + "ce_orig": 1.2421761751174927, + "epoch": 0.33445970235099576, + "kl_loss": 0.18329188227653503, + "loss_ib": 0.011255903169512749, + "step": 1163 + }, + { + "ce_ib": 6.042232513427734, + "ce_orig": 0.43465274572372437, + "epoch": 0.33445970235099576, + "kl_loss": 0.2287701964378357, + "loss_ib": 0.008329934440553188, + "step": 1163 + }, + { + "ce_ib": 6.9496750831604, + "ce_orig": 0.44478708505630493, + "epoch": 0.3347472859299734, + "kl_loss": 0.2602609097957611, + "loss_ib": 0.009552284143865108, + "step": 1164 + }, + { + "ce_ib": 5.079557418823242, + "ce_orig": 0.7076042294502258, + "epoch": 0.3347472859299734, + "kl_loss": 0.25082868337631226, + "loss_ib": 0.007587844040244818, + "step": 1164 + }, + { + "ce_ib": 9.466922760009766, + "ce_orig": 1.1378250122070312, + "epoch": 0.3347472859299734, + "kl_loss": 0.30516183376312256, + "loss_ib": 0.012518541887402534, + "step": 1164 + }, + { + "ce_ib": 8.919095993041992, + "ce_orig": 1.0271871089935303, + "epoch": 0.3347472859299734, + "kl_loss": 0.25492143630981445, + "loss_ib": 0.011468309909105301, + "step": 1164 + }, + { + "epoch": 0.33503486950895106, + "grad_norm": 0.10710824280977249, + "learning_rate": 9.828958932996483e-06, + "loss": 0.8727, + "step": 1165 + }, + { + "ce_ib": 8.745596885681152, + "ce_orig": 1.240087628364563, + "epoch": 0.33503486950895106, + "kl_loss": 0.21713513135910034, + "loss_ib": 0.010916948318481445, + "step": 1165 + }, + { + "ce_ib": 8.954095840454102, + "ce_orig": 1.4257614612579346, + "epoch": 0.33503486950895106, + "kl_loss": 0.2196533977985382, + "loss_ib": 0.011150629259645939, + "step": 1165 + }, + { + "ce_ib": 7.628279685974121, + "ce_orig": 1.0557085275650024, + "epoch": 0.33503486950895106, + "kl_loss": 0.27045494318008423, + "loss_ib": 0.01033282931894064, + "step": 1165 + }, + { + "ce_ib": 6.705820083618164, + "ce_orig": 0.17608341574668884, + "epoch": 0.33503486950895106, + "kl_loss": 0.46943503618240356, + "loss_ib": 0.01140016969293356, + "step": 1165 + }, + { + "ce_ib": 7.123849868774414, + "ce_orig": 0.4561106562614441, + "epoch": 0.3353224530879287, + "kl_loss": 0.23111434280872345, + "loss_ib": 0.009434993378818035, + "step": 1166 + }, + { + "ce_ib": 7.194637298583984, + "ce_orig": 0.5614644885063171, + "epoch": 0.3353224530879287, + "kl_loss": 0.35345301032066345, + "loss_ib": 0.010729167610406876, + "step": 1166 + }, + { + "ce_ib": 4.771577835083008, + "ce_orig": 0.8040262460708618, + "epoch": 0.3353224530879287, + "kl_loss": 0.32398122549057007, + "loss_ib": 0.008011389523744583, + "step": 1166 + }, + { + "ce_ib": 9.567709922790527, + "ce_orig": 1.383070707321167, + "epoch": 0.3353224530879287, + "kl_loss": 0.27471500635147095, + "loss_ib": 0.012314860709011555, + "step": 1166 + }, + { + "ce_ib": 6.410955429077148, + "ce_orig": 0.8717294335365295, + "epoch": 0.3356100366669063, + "kl_loss": 0.25556251406669617, + "loss_ib": 0.008966580033302307, + "step": 1167 + }, + { + "ce_ib": 3.2919697761535645, + "ce_orig": 0.35355857014656067, + "epoch": 0.3356100366669063, + "kl_loss": 0.2551717758178711, + "loss_ib": 0.00584368733689189, + "step": 1167 + }, + { + "ce_ib": 7.970048904418945, + "ce_orig": 1.257944941520691, + "epoch": 0.3356100366669063, + "kl_loss": 0.25095152854919434, + "loss_ib": 0.010479564778506756, + "step": 1167 + }, + { + "ce_ib": 8.489374160766602, + "ce_orig": 1.0214661359786987, + "epoch": 0.3356100366669063, + "kl_loss": 0.23575599491596222, + "loss_ib": 0.010846934281289577, + "step": 1167 + }, + { + "ce_ib": 7.049896240234375, + "ce_orig": 1.0765953063964844, + "epoch": 0.335897620245884, + "kl_loss": 0.5470938682556152, + "loss_ib": 0.012520834803581238, + "step": 1168 + }, + { + "ce_ib": 8.322129249572754, + "ce_orig": 1.0483063459396362, + "epoch": 0.335897620245884, + "kl_loss": 0.2858157753944397, + "loss_ib": 0.011180286295711994, + "step": 1168 + }, + { + "ce_ib": 8.06887435913086, + "ce_orig": 1.226811170578003, + "epoch": 0.335897620245884, + "kl_loss": 0.2247699499130249, + "loss_ib": 0.010316574014723301, + "step": 1168 + }, + { + "ce_ib": 4.309444904327393, + "ce_orig": 0.39025741815567017, + "epoch": 0.335897620245884, + "kl_loss": 0.24306048452854156, + "loss_ib": 0.006740049459040165, + "step": 1168 + }, + { + "ce_ib": 8.550530433654785, + "ce_orig": 0.5781838893890381, + "epoch": 0.3361852038248616, + "kl_loss": 0.4174377918243408, + "loss_ib": 0.01272490806877613, + "step": 1169 + }, + { + "ce_ib": 4.730093479156494, + "ce_orig": 0.5063918232917786, + "epoch": 0.3361852038248616, + "kl_loss": 0.2270166277885437, + "loss_ib": 0.007000259589403868, + "step": 1169 + }, + { + "ce_ib": 5.606704235076904, + "ce_orig": 0.7304767966270447, + "epoch": 0.3361852038248616, + "kl_loss": 0.2581622898578644, + "loss_ib": 0.00818832777440548, + "step": 1169 + }, + { + "ce_ib": 6.313033103942871, + "ce_orig": 0.6566088795661926, + "epoch": 0.3361852038248616, + "kl_loss": 0.22507020831108093, + "loss_ib": 0.008563735522329807, + "step": 1169 + }, + { + "epoch": 0.33647278740383924, + "grad_norm": 0.0896129310131073, + "learning_rate": 9.826940582312617e-06, + "loss": 0.868, + "step": 1170 + }, + { + "ce_ib": 8.58356761932373, + "ce_orig": 1.0399962663650513, + "epoch": 0.33647278740383924, + "kl_loss": 0.2707816958427429, + "loss_ib": 0.01129138469696045, + "step": 1170 + }, + { + "ce_ib": 5.062176704406738, + "ce_orig": 1.0545399188995361, + "epoch": 0.33647278740383924, + "kl_loss": 0.24855771660804749, + "loss_ib": 0.0075477538630366325, + "step": 1170 + }, + { + "ce_ib": 7.816201210021973, + "ce_orig": 0.6892129182815552, + "epoch": 0.33647278740383924, + "kl_loss": 0.3450187146663666, + "loss_ib": 0.011266388930380344, + "step": 1170 + }, + { + "ce_ib": 5.691289901733398, + "ce_orig": 0.7200804352760315, + "epoch": 0.33647278740383924, + "kl_loss": 0.28832268714904785, + "loss_ib": 0.008574516512453556, + "step": 1170 + }, + { + "ce_ib": 4.545563697814941, + "ce_orig": 0.3753531873226166, + "epoch": 0.33676037098281686, + "kl_loss": 0.1820743829011917, + "loss_ib": 0.006366307381540537, + "step": 1171 + }, + { + "ce_ib": 6.307059288024902, + "ce_orig": 0.7057124376296997, + "epoch": 0.33676037098281686, + "kl_loss": 0.18096861243247986, + "loss_ib": 0.00811674538999796, + "step": 1171 + }, + { + "ce_ib": 8.955676078796387, + "ce_orig": 1.7811617851257324, + "epoch": 0.33676037098281686, + "kl_loss": 0.24952708184719086, + "loss_ib": 0.01145094633102417, + "step": 1171 + }, + { + "ce_ib": 6.340480327606201, + "ce_orig": 0.8761670589447021, + "epoch": 0.33676037098281686, + "kl_loss": 0.20854952931404114, + "loss_ib": 0.008425976149737835, + "step": 1171 + }, + { + "ce_ib": 7.614955902099609, + "ce_orig": 1.0214663743972778, + "epoch": 0.33704795456179454, + "kl_loss": 0.34129971265792847, + "loss_ib": 0.011027953587472439, + "step": 1172 + }, + { + "ce_ib": 4.446689128875732, + "ce_orig": 0.5307155251502991, + "epoch": 0.33704795456179454, + "kl_loss": 0.2831967771053314, + "loss_ib": 0.0072786565870046616, + "step": 1172 + }, + { + "ce_ib": 7.496953964233398, + "ce_orig": 1.0684783458709717, + "epoch": 0.33704795456179454, + "kl_loss": 0.26136043667793274, + "loss_ib": 0.01011055801063776, + "step": 1172 + }, + { + "ce_ib": 9.910701751708984, + "ce_orig": 0.9857996702194214, + "epoch": 0.33704795456179454, + "kl_loss": 0.2773677706718445, + "loss_ib": 0.012684379704296589, + "step": 1172 + }, + { + "ce_ib": 7.447598457336426, + "ce_orig": 0.7740610837936401, + "epoch": 0.33733553814077216, + "kl_loss": 0.28991585969924927, + "loss_ib": 0.010346757248044014, + "step": 1173 + }, + { + "ce_ib": 8.320796012878418, + "ce_orig": 0.91221684217453, + "epoch": 0.33733553814077216, + "kl_loss": 0.28020790219306946, + "loss_ib": 0.011122874915599823, + "step": 1173 + }, + { + "ce_ib": 11.175503730773926, + "ce_orig": 1.6308225393295288, + "epoch": 0.33733553814077216, + "kl_loss": 0.34757018089294434, + "loss_ib": 0.014651205390691757, + "step": 1173 + }, + { + "ce_ib": 6.0726728439331055, + "ce_orig": 0.5518878698348999, + "epoch": 0.33733553814077216, + "kl_loss": 0.25517135858535767, + "loss_ib": 0.008624386973679066, + "step": 1173 + }, + { + "ce_ib": 6.273822784423828, + "ce_orig": 0.7508142590522766, + "epoch": 0.3376231217197498, + "kl_loss": 0.3808406591415405, + "loss_ib": 0.010082229040563107, + "step": 1174 + }, + { + "ce_ib": 7.740455150604248, + "ce_orig": 0.8385905623435974, + "epoch": 0.3376231217197498, + "kl_loss": 0.23284657299518585, + "loss_ib": 0.010068920440971851, + "step": 1174 + }, + { + "ce_ib": 4.734543323516846, + "ce_orig": 0.69203120470047, + "epoch": 0.3376231217197498, + "kl_loss": 0.33553797006607056, + "loss_ib": 0.008089922368526459, + "step": 1174 + }, + { + "ce_ib": 8.494558334350586, + "ce_orig": 1.1173644065856934, + "epoch": 0.3376231217197498, + "kl_loss": 0.32031548023223877, + "loss_ib": 0.011697713285684586, + "step": 1174 + }, + { + "epoch": 0.33791070529872747, + "grad_norm": 0.09744829684495926, + "learning_rate": 9.824910602408528e-06, + "loss": 0.931, + "step": 1175 + }, + { + "ce_ib": 6.686302661895752, + "ce_orig": 0.8108925819396973, + "epoch": 0.33791070529872747, + "kl_loss": 0.34382033348083496, + "loss_ib": 0.010124506428837776, + "step": 1175 + }, + { + "ce_ib": 5.286237716674805, + "ce_orig": 0.3699839115142822, + "epoch": 0.33791070529872747, + "kl_loss": 0.16003935039043427, + "loss_ib": 0.00688663125038147, + "step": 1175 + }, + { + "ce_ib": 10.233742713928223, + "ce_orig": 1.4519670009613037, + "epoch": 0.33791070529872747, + "kl_loss": 0.25252097845077515, + "loss_ib": 0.012758953496813774, + "step": 1175 + }, + { + "ce_ib": 8.468210220336914, + "ce_orig": 1.0720109939575195, + "epoch": 0.33791070529872747, + "kl_loss": 0.38942861557006836, + "loss_ib": 0.012362496927380562, + "step": 1175 + }, + { + "ce_ib": 5.240467548370361, + "ce_orig": 0.8880922198295593, + "epoch": 0.3381982888777051, + "kl_loss": 0.22541771829128265, + "loss_ib": 0.007494644727557898, + "step": 1176 + }, + { + "ce_ib": 7.298333644866943, + "ce_orig": 0.511344850063324, + "epoch": 0.3381982888777051, + "kl_loss": 0.31986504793167114, + "loss_ib": 0.010496983304619789, + "step": 1176 + }, + { + "ce_ib": 7.720901012420654, + "ce_orig": 1.3299001455307007, + "epoch": 0.3381982888777051, + "kl_loss": 0.18659347295761108, + "loss_ib": 0.009586836211383343, + "step": 1176 + }, + { + "ce_ib": 10.655141830444336, + "ce_orig": 1.165802240371704, + "epoch": 0.3381982888777051, + "kl_loss": 0.2347698211669922, + "loss_ib": 0.013002839870750904, + "step": 1176 + }, + { + "ce_ib": 4.5789265632629395, + "ce_orig": 0.5766971707344055, + "epoch": 0.3384858724566827, + "kl_loss": 0.19134168326854706, + "loss_ib": 0.00649234326556325, + "step": 1177 + }, + { + "ce_ib": 4.78309440612793, + "ce_orig": 0.6634275317192078, + "epoch": 0.3384858724566827, + "kl_loss": 0.21720531582832336, + "loss_ib": 0.006955147720873356, + "step": 1177 + }, + { + "ce_ib": 7.196147441864014, + "ce_orig": 0.7668088674545288, + "epoch": 0.3384858724566827, + "kl_loss": 0.41518035531044006, + "loss_ib": 0.01134795043617487, + "step": 1177 + }, + { + "ce_ib": 4.836219310760498, + "ce_orig": 0.33054235577583313, + "epoch": 0.3384858724566827, + "kl_loss": 0.22906270623207092, + "loss_ib": 0.0071268463507294655, + "step": 1177 + }, + { + "ce_ib": 5.0311455726623535, + "ce_orig": 0.5774480104446411, + "epoch": 0.33877345603566034, + "kl_loss": 0.24220524728298187, + "loss_ib": 0.007453198079019785, + "step": 1178 + }, + { + "ce_ib": 11.710144996643066, + "ce_orig": 2.045793056488037, + "epoch": 0.33877345603566034, + "kl_loss": 0.3105084300041199, + "loss_ib": 0.014815229922533035, + "step": 1178 + }, + { + "ce_ib": 8.100988388061523, + "ce_orig": 0.9709780812263489, + "epoch": 0.33877345603566034, + "kl_loss": 0.18915799260139465, + "loss_ib": 0.00999256782233715, + "step": 1178 + }, + { + "ce_ib": 5.740729331970215, + "ce_orig": 0.48105230927467346, + "epoch": 0.33877345603566034, + "kl_loss": 0.3077582120895386, + "loss_ib": 0.008818311616778374, + "step": 1178 + }, + { + "ce_ib": 8.696881294250488, + "ce_orig": 1.047848105430603, + "epoch": 0.339061039614638, + "kl_loss": 0.25821876525878906, + "loss_ib": 0.011279068887233734, + "step": 1179 + }, + { + "ce_ib": 8.131317138671875, + "ce_orig": 0.9292140603065491, + "epoch": 0.339061039614638, + "kl_loss": 0.2904861569404602, + "loss_ib": 0.011036179028451443, + "step": 1179 + }, + { + "ce_ib": 5.153392314910889, + "ce_orig": 0.8021326065063477, + "epoch": 0.339061039614638, + "kl_loss": 0.22328916192054749, + "loss_ib": 0.007386283483356237, + "step": 1179 + }, + { + "ce_ib": 6.696101665496826, + "ce_orig": 1.0752699375152588, + "epoch": 0.339061039614638, + "kl_loss": 0.2535207271575928, + "loss_ib": 0.009231309406459332, + "step": 1179 + }, + { + "epoch": 0.33934862319361564, + "grad_norm": 0.10896741598844528, + "learning_rate": 9.822868998174914e-06, + "loss": 0.9011, + "step": 1180 + }, + { + "ce_ib": 7.786527156829834, + "ce_orig": 0.8277803659439087, + "epoch": 0.33934862319361564, + "kl_loss": 0.32757166028022766, + "loss_ib": 0.011062243953347206, + "step": 1180 + }, + { + "ce_ib": 5.486213684082031, + "ce_orig": 0.6657846570014954, + "epoch": 0.33934862319361564, + "kl_loss": 0.27369385957717896, + "loss_ib": 0.008223151788115501, + "step": 1180 + }, + { + "ce_ib": 6.703554153442383, + "ce_orig": 0.8262245059013367, + "epoch": 0.33934862319361564, + "kl_loss": 0.28381454944610596, + "loss_ib": 0.009541699662804604, + "step": 1180 + }, + { + "ce_ib": 4.37410306930542, + "ce_orig": 0.6137135028839111, + "epoch": 0.33934862319361564, + "kl_loss": 0.304335355758667, + "loss_ib": 0.007417456712573767, + "step": 1180 + }, + { + "ce_ib": 8.273782730102539, + "ce_orig": 1.3585046529769897, + "epoch": 0.33963620677259326, + "kl_loss": 0.26714888215065, + "loss_ib": 0.010945270769298077, + "step": 1181 + }, + { + "ce_ib": 5.653596878051758, + "ce_orig": 0.46559175848960876, + "epoch": 0.33963620677259326, + "kl_loss": 0.2943084239959717, + "loss_ib": 0.00859668105840683, + "step": 1181 + }, + { + "ce_ib": 9.782648086547852, + "ce_orig": 1.2866759300231934, + "epoch": 0.33963620677259326, + "kl_loss": 0.3248477578163147, + "loss_ib": 0.01303112506866455, + "step": 1181 + }, + { + "ce_ib": 4.714155197143555, + "ce_orig": 0.6941638588905334, + "epoch": 0.33963620677259326, + "kl_loss": 0.21619562804698944, + "loss_ib": 0.006876111496239901, + "step": 1181 + }, + { + "ce_ib": 6.625691890716553, + "ce_orig": 0.5445870161056519, + "epoch": 0.33992379035157094, + "kl_loss": 0.23607990145683289, + "loss_ib": 0.008986490778625011, + "step": 1182 + }, + { + "ce_ib": 10.256919860839844, + "ce_orig": 1.1101305484771729, + "epoch": 0.33992379035157094, + "kl_loss": 0.4181860089302063, + "loss_ib": 0.014438779093325138, + "step": 1182 + }, + { + "ce_ib": 9.138778686523438, + "ce_orig": 1.4723149538040161, + "epoch": 0.33992379035157094, + "kl_loss": 0.33457493782043457, + "loss_ib": 0.012484529055655003, + "step": 1182 + }, + { + "ce_ib": 10.66258430480957, + "ce_orig": 1.4558533430099487, + "epoch": 0.33992379035157094, + "kl_loss": 0.19665685296058655, + "loss_ib": 0.012629152275621891, + "step": 1182 + }, + { + "ce_ib": 6.267303466796875, + "ce_orig": 0.7697774171829224, + "epoch": 0.34021137393054857, + "kl_loss": 0.5935349464416504, + "loss_ib": 0.012202654033899307, + "step": 1183 + }, + { + "ce_ib": 5.881093502044678, + "ce_orig": 0.4210011065006256, + "epoch": 0.34021137393054857, + "kl_loss": 0.25692081451416016, + "loss_ib": 0.008450301364064217, + "step": 1183 + }, + { + "ce_ib": 7.419436454772949, + "ce_orig": 1.293154239654541, + "epoch": 0.34021137393054857, + "kl_loss": 0.26566553115844727, + "loss_ib": 0.010076090693473816, + "step": 1183 + }, + { + "ce_ib": 8.447088241577148, + "ce_orig": 1.1318758726119995, + "epoch": 0.34021137393054857, + "kl_loss": 0.26372429728507996, + "loss_ib": 0.011084331199526787, + "step": 1183 + }, + { + "ce_ib": 6.175292491912842, + "ce_orig": 0.9468995928764343, + "epoch": 0.3404989575095262, + "kl_loss": 0.23682790994644165, + "loss_ib": 0.008543571457266808, + "step": 1184 + }, + { + "ce_ib": 7.1323466300964355, + "ce_orig": 0.8490071892738342, + "epoch": 0.3404989575095262, + "kl_loss": 0.2767108678817749, + "loss_ib": 0.00989945512264967, + "step": 1184 + }, + { + "ce_ib": 6.375570774078369, + "ce_orig": 0.5345741510391235, + "epoch": 0.3404989575095262, + "kl_loss": 0.41314756870269775, + "loss_ib": 0.010507047176361084, + "step": 1184 + }, + { + "ce_ib": 7.322238445281982, + "ce_orig": 1.1370083093643188, + "epoch": 0.3404989575095262, + "kl_loss": 0.2956608235836029, + "loss_ib": 0.010278847068548203, + "step": 1184 + }, + { + "epoch": 0.34078654108850387, + "grad_norm": 0.10617753863334656, + "learning_rate": 9.820815774530473e-06, + "loss": 0.9171, + "step": 1185 + }, + { + "ce_ib": 7.385372161865234, + "ce_orig": 0.7977047562599182, + "epoch": 0.34078654108850387, + "kl_loss": 0.2396102249622345, + "loss_ib": 0.00978147517889738, + "step": 1185 + }, + { + "ce_ib": 5.597156047821045, + "ce_orig": 0.7299818992614746, + "epoch": 0.34078654108850387, + "kl_loss": 0.198746919631958, + "loss_ib": 0.007584625389426947, + "step": 1185 + }, + { + "ce_ib": 7.188481330871582, + "ce_orig": 1.3601056337356567, + "epoch": 0.34078654108850387, + "kl_loss": 0.44576168060302734, + "loss_ib": 0.011646098457276821, + "step": 1185 + }, + { + "ce_ib": 6.3431267738342285, + "ce_orig": 0.7139648795127869, + "epoch": 0.34078654108850387, + "kl_loss": 0.3590855896472931, + "loss_ib": 0.009933982975780964, + "step": 1185 + }, + { + "ce_ib": 8.162310600280762, + "ce_orig": 0.6909653544425964, + "epoch": 0.3410741246674815, + "kl_loss": 0.2678564786911011, + "loss_ib": 0.010840876027941704, + "step": 1186 + }, + { + "ce_ib": 8.19145393371582, + "ce_orig": 0.899401843547821, + "epoch": 0.3410741246674815, + "kl_loss": 0.2756710350513458, + "loss_ib": 0.010948164388537407, + "step": 1186 + }, + { + "ce_ib": 5.634396076202393, + "ce_orig": 0.8212024569511414, + "epoch": 0.3410741246674815, + "kl_loss": 0.19679471850395203, + "loss_ib": 0.007602343335747719, + "step": 1186 + }, + { + "ce_ib": 8.251862525939941, + "ce_orig": 1.219065546989441, + "epoch": 0.3410741246674815, + "kl_loss": 0.28769683837890625, + "loss_ib": 0.011128830723464489, + "step": 1186 + }, + { + "ce_ib": 9.134533882141113, + "ce_orig": 1.1667752265930176, + "epoch": 0.3413617082464591, + "kl_loss": 0.23589232563972473, + "loss_ib": 0.011493457481265068, + "step": 1187 + }, + { + "ce_ib": 1.7729456424713135, + "ce_orig": 0.17611609399318695, + "epoch": 0.3413617082464591, + "kl_loss": 0.6091700792312622, + "loss_ib": 0.007864645682275295, + "step": 1187 + }, + { + "ce_ib": 5.996875286102295, + "ce_orig": 0.7136014103889465, + "epoch": 0.3413617082464591, + "kl_loss": 0.2414180040359497, + "loss_ib": 0.00841105543076992, + "step": 1187 + }, + { + "ce_ib": 8.649369239807129, + "ce_orig": 1.5905383825302124, + "epoch": 0.3413617082464591, + "kl_loss": 0.42170989513397217, + "loss_ib": 0.012866468168795109, + "step": 1187 + }, + { + "ce_ib": 6.070381164550781, + "ce_orig": 0.599243700504303, + "epoch": 0.34164929182543674, + "kl_loss": 0.5712750554084778, + "loss_ib": 0.011783132329583168, + "step": 1188 + }, + { + "ce_ib": 7.615642070770264, + "ce_orig": 0.8723514080047607, + "epoch": 0.34164929182543674, + "kl_loss": 0.31852707266807556, + "loss_ib": 0.010800912044942379, + "step": 1188 + }, + { + "ce_ib": 6.003516674041748, + "ce_orig": 0.6859092712402344, + "epoch": 0.34164929182543674, + "kl_loss": 0.21503300964832306, + "loss_ib": 0.008153846487402916, + "step": 1188 + }, + { + "ce_ib": 5.837275981903076, + "ce_orig": 0.5255218744277954, + "epoch": 0.34164929182543674, + "kl_loss": 0.33323538303375244, + "loss_ib": 0.009169629774987698, + "step": 1188 + }, + { + "ce_ib": 7.077301502227783, + "ce_orig": 0.5584138631820679, + "epoch": 0.3419368754044144, + "kl_loss": 0.3154032528400421, + "loss_ib": 0.010231333784759045, + "step": 1189 + }, + { + "ce_ib": 4.683353900909424, + "ce_orig": 1.0100295543670654, + "epoch": 0.3419368754044144, + "kl_loss": 0.23839840292930603, + "loss_ib": 0.0070673380978405476, + "step": 1189 + }, + { + "ce_ib": 6.981997489929199, + "ce_orig": 0.8947463035583496, + "epoch": 0.3419368754044144, + "kl_loss": 0.30623385310173035, + "loss_ib": 0.010044336318969727, + "step": 1189 + }, + { + "ce_ib": 8.45647144317627, + "ce_orig": 1.307883858680725, + "epoch": 0.3419368754044144, + "kl_loss": 0.3093627393245697, + "loss_ib": 0.011550098657608032, + "step": 1189 + }, + { + "epoch": 0.34222445898339204, + "grad_norm": 0.09876321256160736, + "learning_rate": 9.818750936421894e-06, + "loss": 0.9088, + "step": 1190 + }, + { + "ce_ib": 9.657329559326172, + "ce_orig": 1.32797110080719, + "epoch": 0.34222445898339204, + "kl_loss": 0.2832852602005005, + "loss_ib": 0.012490181252360344, + "step": 1190 + }, + { + "ce_ib": 5.555445194244385, + "ce_orig": 0.38129884004592896, + "epoch": 0.34222445898339204, + "kl_loss": 0.37391650676727295, + "loss_ib": 0.009294610470533371, + "step": 1190 + }, + { + "ce_ib": 5.386440753936768, + "ce_orig": 0.8913825750350952, + "epoch": 0.34222445898339204, + "kl_loss": 0.4977033734321594, + "loss_ib": 0.010363473556935787, + "step": 1190 + }, + { + "ce_ib": 6.759226322174072, + "ce_orig": 0.8466914296150208, + "epoch": 0.34222445898339204, + "kl_loss": 0.29842573404312134, + "loss_ib": 0.009743483737111092, + "step": 1190 + }, + { + "ce_ib": 5.570773601531982, + "ce_orig": 0.5707303285598755, + "epoch": 0.34251204256236967, + "kl_loss": 0.35615256428718567, + "loss_ib": 0.009132299572229385, + "step": 1191 + }, + { + "ce_ib": 5.576659202575684, + "ce_orig": 0.28790995478630066, + "epoch": 0.34251204256236967, + "kl_loss": 0.45576032996177673, + "loss_ib": 0.010134262964129448, + "step": 1191 + }, + { + "ce_ib": 11.361724853515625, + "ce_orig": 1.6689220666885376, + "epoch": 0.34251204256236967, + "kl_loss": 0.3620713949203491, + "loss_ib": 0.014982438646256924, + "step": 1191 + }, + { + "ce_ib": 8.733168601989746, + "ce_orig": 0.7460479736328125, + "epoch": 0.34251204256236967, + "kl_loss": 0.3041139245033264, + "loss_ib": 0.011774308048188686, + "step": 1191 + }, + { + "ce_ib": 14.115975379943848, + "ce_orig": 0.7688201069831848, + "epoch": 0.34279962614134735, + "kl_loss": 0.6082284450531006, + "loss_ib": 0.020198259502649307, + "step": 1192 + }, + { + "ce_ib": 5.99207878112793, + "ce_orig": 0.8545339107513428, + "epoch": 0.34279962614134735, + "kl_loss": 0.21773307025432587, + "loss_ib": 0.008169409818947315, + "step": 1192 + }, + { + "ce_ib": 3.9453814029693604, + "ce_orig": 0.5740529298782349, + "epoch": 0.34279962614134735, + "kl_loss": 0.2615795135498047, + "loss_ib": 0.006561176851391792, + "step": 1192 + }, + { + "ce_ib": 6.691573619842529, + "ce_orig": 0.8405234813690186, + "epoch": 0.34279962614134735, + "kl_loss": 0.3192151188850403, + "loss_ib": 0.009883725084364414, + "step": 1192 + }, + { + "ce_ib": 6.382562637329102, + "ce_orig": 0.7000412940979004, + "epoch": 0.34308720972032497, + "kl_loss": 0.3458077907562256, + "loss_ib": 0.009840640239417553, + "step": 1193 + }, + { + "ce_ib": 5.735156059265137, + "ce_orig": 0.5913882851600647, + "epoch": 0.34308720972032497, + "kl_loss": 0.21425525844097137, + "loss_ib": 0.007877708412706852, + "step": 1193 + }, + { + "ce_ib": 3.7972733974456787, + "ce_orig": 0.3964553475379944, + "epoch": 0.34308720972032497, + "kl_loss": 0.30395567417144775, + "loss_ib": 0.006836830172687769, + "step": 1193 + }, + { + "ce_ib": 8.773690223693848, + "ce_orig": 1.267960786819458, + "epoch": 0.34308720972032497, + "kl_loss": 0.28563830256462097, + "loss_ib": 0.011630073189735413, + "step": 1193 + }, + { + "ce_ib": 8.8164701461792, + "ce_orig": 1.25728440284729, + "epoch": 0.3433747932993026, + "kl_loss": 0.2996102273464203, + "loss_ib": 0.011812572367489338, + "step": 1194 + }, + { + "ce_ib": 6.719943523406982, + "ce_orig": 0.874944806098938, + "epoch": 0.3433747932993026, + "kl_loss": 0.19839473068714142, + "loss_ib": 0.008703891187906265, + "step": 1194 + }, + { + "ce_ib": 9.051475524902344, + "ce_orig": 1.5228703022003174, + "epoch": 0.3433747932993026, + "kl_loss": 0.2388281524181366, + "loss_ib": 0.01143975742161274, + "step": 1194 + }, + { + "ce_ib": 6.544286251068115, + "ce_orig": 0.8071295022964478, + "epoch": 0.3433747932993026, + "kl_loss": 0.21565112471580505, + "loss_ib": 0.008700797334313393, + "step": 1194 + }, + { + "epoch": 0.3436623768782803, + "grad_norm": 0.10329017043113708, + "learning_rate": 9.816674488823855e-06, + "loss": 0.943, + "step": 1195 + }, + { + "ce_ib": 6.421690940856934, + "ce_orig": 0.5300900340080261, + "epoch": 0.3436623768782803, + "kl_loss": 0.238215833902359, + "loss_ib": 0.00880384910851717, + "step": 1195 + }, + { + "ce_ib": 3.6498396396636963, + "ce_orig": 0.66141277551651, + "epoch": 0.3436623768782803, + "kl_loss": 0.23390421271324158, + "loss_ib": 0.005988881457597017, + "step": 1195 + }, + { + "ce_ib": 9.907021522521973, + "ce_orig": 0.8684224486351013, + "epoch": 0.3436623768782803, + "kl_loss": 0.26282399892807007, + "loss_ib": 0.012535261921584606, + "step": 1195 + }, + { + "ce_ib": 5.8363356590271, + "ce_orig": 0.5741661787033081, + "epoch": 0.3436623768782803, + "kl_loss": 0.239915132522583, + "loss_ib": 0.008235487155616283, + "step": 1195 + }, + { + "ce_ib": 5.6322784423828125, + "ce_orig": 0.797076404094696, + "epoch": 0.3439499604572579, + "kl_loss": 0.2528030276298523, + "loss_ib": 0.008160308003425598, + "step": 1196 + }, + { + "ce_ib": 7.733359336853027, + "ce_orig": 0.8631399869918823, + "epoch": 0.3439499604572579, + "kl_loss": 0.32714614272117615, + "loss_ib": 0.011004820466041565, + "step": 1196 + }, + { + "ce_ib": 7.169785499572754, + "ce_orig": 0.9283391237258911, + "epoch": 0.3439499604572579, + "kl_loss": 0.19584496319293976, + "loss_ib": 0.009128234349191189, + "step": 1196 + }, + { + "ce_ib": 4.730182647705078, + "ce_orig": 0.30926573276519775, + "epoch": 0.3439499604572579, + "kl_loss": 0.2444748878479004, + "loss_ib": 0.007174931466579437, + "step": 1196 + }, + { + "ce_ib": 4.997303485870361, + "ce_orig": 0.5115468502044678, + "epoch": 0.3442375440362355, + "kl_loss": 0.21230413019657135, + "loss_ib": 0.0071203443221747875, + "step": 1197 + }, + { + "ce_ib": 4.934088230133057, + "ce_orig": 0.5581589937210083, + "epoch": 0.3442375440362355, + "kl_loss": 0.19467350840568542, + "loss_ib": 0.00688082305714488, + "step": 1197 + }, + { + "ce_ib": 6.582386016845703, + "ce_orig": 0.543520987033844, + "epoch": 0.3442375440362355, + "kl_loss": 0.29340246319770813, + "loss_ib": 0.009516410529613495, + "step": 1197 + }, + { + "ce_ib": 6.0954508781433105, + "ce_orig": 0.7179067730903625, + "epoch": 0.3442375440362355, + "kl_loss": 0.20862382650375366, + "loss_ib": 0.00818168930709362, + "step": 1197 + }, + { + "ce_ib": 8.645586013793945, + "ce_orig": 1.1063332557678223, + "epoch": 0.34452512761521314, + "kl_loss": 0.2973189353942871, + "loss_ib": 0.011618776246905327, + "step": 1198 + }, + { + "ce_ib": 7.050906181335449, + "ce_orig": 1.1842377185821533, + "epoch": 0.34452512761521314, + "kl_loss": 0.2546880841255188, + "loss_ib": 0.009597786702215672, + "step": 1198 + }, + { + "ce_ib": 6.744875431060791, + "ce_orig": 0.6672945618629456, + "epoch": 0.34452512761521314, + "kl_loss": 0.29981529712677, + "loss_ib": 0.009743028320372105, + "step": 1198 + }, + { + "ce_ib": 9.70975112915039, + "ce_orig": 1.2213077545166016, + "epoch": 0.34452512761521314, + "kl_loss": 0.29038137197494507, + "loss_ib": 0.012613564729690552, + "step": 1198 + }, + { + "ce_ib": 6.745275497436523, + "ce_orig": 0.7089959383010864, + "epoch": 0.3448127111941908, + "kl_loss": 0.22385196387767792, + "loss_ib": 0.008983795531094074, + "step": 1199 + }, + { + "ce_ib": 6.717109680175781, + "ce_orig": 0.7282753586769104, + "epoch": 0.3448127111941908, + "kl_loss": 0.4085049331188202, + "loss_ib": 0.010802159085869789, + "step": 1199 + }, + { + "ce_ib": 8.308910369873047, + "ce_orig": 0.9311578273773193, + "epoch": 0.3448127111941908, + "kl_loss": 0.2200586348772049, + "loss_ib": 0.010509496554732323, + "step": 1199 + }, + { + "ce_ib": 6.919417858123779, + "ce_orig": 0.981727659702301, + "epoch": 0.3448127111941908, + "kl_loss": 0.3038212060928345, + "loss_ib": 0.009957630187273026, + "step": 1199 + }, + { + "epoch": 0.34510029477316845, + "grad_norm": 0.11079682409763336, + "learning_rate": 9.814586436738998e-06, + "loss": 0.8328, + "step": 1200 + }, + { + "ce_ib": 2.4315686225891113, + "ce_orig": 0.35374554991722107, + "epoch": 0.34510029477316845, + "kl_loss": 0.5890471935272217, + "loss_ib": 0.008322040550410748, + "step": 1200 + }, + { + "ce_ib": 3.5685207843780518, + "ce_orig": 0.49777722358703613, + "epoch": 0.34510029477316845, + "kl_loss": 0.19487112760543823, + "loss_ib": 0.005517232231795788, + "step": 1200 + }, + { + "ce_ib": 2.2421321868896484, + "ce_orig": 0.22677947580814362, + "epoch": 0.34510029477316845, + "kl_loss": 0.26925888657569885, + "loss_ib": 0.004934720695018768, + "step": 1200 + }, + { + "ce_ib": 3.7340574264526367, + "ce_orig": 0.6081327199935913, + "epoch": 0.34510029477316845, + "kl_loss": 0.19507867097854614, + "loss_ib": 0.005684844218194485, + "step": 1200 + }, + { + "ce_ib": 6.920347213745117, + "ce_orig": 0.8367874026298523, + "epoch": 0.34538787835214607, + "kl_loss": 0.3019488453865051, + "loss_ib": 0.009939835406839848, + "step": 1201 + }, + { + "ce_ib": 5.684332847595215, + "ce_orig": 1.077157735824585, + "epoch": 0.34538787835214607, + "kl_loss": 0.1948537677526474, + "loss_ib": 0.007632870692759752, + "step": 1201 + }, + { + "ce_ib": 9.116320610046387, + "ce_orig": 1.3145966529846191, + "epoch": 0.34538787835214607, + "kl_loss": 0.2820887565612793, + "loss_ib": 0.011937207542359829, + "step": 1201 + }, + { + "ce_ib": 8.325549125671387, + "ce_orig": 1.0065648555755615, + "epoch": 0.34538787835214607, + "kl_loss": 0.20022490620613098, + "loss_ib": 0.010327798314392567, + "step": 1201 + }, + { + "ce_ib": 9.057621955871582, + "ce_orig": 0.5740677118301392, + "epoch": 0.34567546193112375, + "kl_loss": 0.37036046385765076, + "loss_ib": 0.01276122685521841, + "step": 1202 + }, + { + "ce_ib": 7.503146171569824, + "ce_orig": 1.0310344696044922, + "epoch": 0.34567546193112375, + "kl_loss": 0.30264437198638916, + "loss_ib": 0.010529589839279652, + "step": 1202 + }, + { + "ce_ib": 6.097241401672363, + "ce_orig": 0.6190967559814453, + "epoch": 0.34567546193112375, + "kl_loss": 0.3846118450164795, + "loss_ib": 0.009943359531462193, + "step": 1202 + }, + { + "ce_ib": 6.613105297088623, + "ce_orig": 0.7227441668510437, + "epoch": 0.34567546193112375, + "kl_loss": 0.3394354581832886, + "loss_ib": 0.010007459670305252, + "step": 1202 + }, + { + "ce_ib": 3.6348624229431152, + "ce_orig": 0.8466792106628418, + "epoch": 0.3459630455101014, + "kl_loss": 0.21068529784679413, + "loss_ib": 0.005741715431213379, + "step": 1203 + }, + { + "ce_ib": 3.840749979019165, + "ce_orig": 0.7769103050231934, + "epoch": 0.3459630455101014, + "kl_loss": 0.20804926753044128, + "loss_ib": 0.005921242758631706, + "step": 1203 + }, + { + "ce_ib": 10.696869850158691, + "ce_orig": 1.6593687534332275, + "epoch": 0.3459630455101014, + "kl_loss": 0.2703685164451599, + "loss_ib": 0.013400554656982422, + "step": 1203 + }, + { + "ce_ib": 9.642045974731445, + "ce_orig": 1.204627275466919, + "epoch": 0.3459630455101014, + "kl_loss": 0.27802687883377075, + "loss_ib": 0.01242231484502554, + "step": 1203 + }, + { + "ce_ib": 7.497930526733398, + "ce_orig": 0.8816418647766113, + "epoch": 0.346250629089079, + "kl_loss": 0.2705840766429901, + "loss_ib": 0.0102037712931633, + "step": 1204 + }, + { + "ce_ib": 9.714649200439453, + "ce_orig": 1.4165947437286377, + "epoch": 0.346250629089079, + "kl_loss": 0.2666982114315033, + "loss_ib": 0.012381630949676037, + "step": 1204 + }, + { + "ce_ib": 8.192032814025879, + "ce_orig": 0.9408739805221558, + "epoch": 0.346250629089079, + "kl_loss": 0.23753347992897034, + "loss_ib": 0.010567368008196354, + "step": 1204 + }, + { + "ce_ib": 5.191983699798584, + "ce_orig": 0.2504975497722626, + "epoch": 0.346250629089079, + "kl_loss": 0.19956733286380768, + "loss_ib": 0.007187656592577696, + "step": 1204 + }, + { + "epoch": 0.3465382126680567, + "grad_norm": 0.09698857367038727, + "learning_rate": 9.812486785197924e-06, + "loss": 0.864, + "step": 1205 + }, + { + "ce_ib": 5.932236194610596, + "ce_orig": 0.5712811350822449, + "epoch": 0.3465382126680567, + "kl_loss": 0.2073478102684021, + "loss_ib": 0.008005714043974876, + "step": 1205 + }, + { + "ce_ib": 9.851508140563965, + "ce_orig": 1.2491554021835327, + "epoch": 0.3465382126680567, + "kl_loss": 0.26627326011657715, + "loss_ib": 0.012514240108430386, + "step": 1205 + }, + { + "ce_ib": 7.024086952209473, + "ce_orig": 1.2069941759109497, + "epoch": 0.3465382126680567, + "kl_loss": 0.17645485699176788, + "loss_ib": 0.008788635954260826, + "step": 1205 + }, + { + "ce_ib": 8.410697937011719, + "ce_orig": 0.8548517227172852, + "epoch": 0.3465382126680567, + "kl_loss": 0.34426677227020264, + "loss_ib": 0.01185336522758007, + "step": 1205 + }, + { + "ce_ib": 10.94921588897705, + "ce_orig": 1.5700947046279907, + "epoch": 0.3468257962470343, + "kl_loss": 0.2599565386772156, + "loss_ib": 0.013548781163990498, + "step": 1206 + }, + { + "ce_ib": 6.561511993408203, + "ce_orig": 0.7359023690223694, + "epoch": 0.3468257962470343, + "kl_loss": 0.2437046766281128, + "loss_ib": 0.008998558856546879, + "step": 1206 + }, + { + "ce_ib": 5.731392860412598, + "ce_orig": 0.6426838040351868, + "epoch": 0.3468257962470343, + "kl_loss": 0.2911027669906616, + "loss_ib": 0.008642420172691345, + "step": 1206 + }, + { + "ce_ib": 7.189348220825195, + "ce_orig": 0.7906773090362549, + "epoch": 0.3468257962470343, + "kl_loss": 0.2725953459739685, + "loss_ib": 0.009915301576256752, + "step": 1206 + }, + { + "ce_ib": 5.853690147399902, + "ce_orig": 0.6496776938438416, + "epoch": 0.3471133798260119, + "kl_loss": 0.2832804322242737, + "loss_ib": 0.008686495013535023, + "step": 1207 + }, + { + "ce_ib": 5.281362533569336, + "ce_orig": 0.4212856590747833, + "epoch": 0.3471133798260119, + "kl_loss": 0.261347234249115, + "loss_ib": 0.007894834503531456, + "step": 1207 + }, + { + "ce_ib": 8.261890411376953, + "ce_orig": 0.8318018913269043, + "epoch": 0.3471133798260119, + "kl_loss": 0.2593042850494385, + "loss_ib": 0.01085493341088295, + "step": 1207 + }, + { + "ce_ib": 6.534182548522949, + "ce_orig": 0.5106642842292786, + "epoch": 0.3471133798260119, + "kl_loss": 0.2081461399793625, + "loss_ib": 0.008615643717348576, + "step": 1207 + }, + { + "ce_ib": 9.115994453430176, + "ce_orig": 0.8802713751792908, + "epoch": 0.34740096340498955, + "kl_loss": 0.2686144709587097, + "loss_ib": 0.01180213876068592, + "step": 1208 + }, + { + "ce_ib": 7.533227920532227, + "ce_orig": 0.9319108128547668, + "epoch": 0.34740096340498955, + "kl_loss": 0.2602120339870453, + "loss_ib": 0.010135347954928875, + "step": 1208 + }, + { + "ce_ib": 8.701380729675293, + "ce_orig": 1.2851958274841309, + "epoch": 0.34740096340498955, + "kl_loss": 0.2070631980895996, + "loss_ib": 0.01077201310545206, + "step": 1208 + }, + { + "ce_ib": 5.88447904586792, + "ce_orig": 0.626255989074707, + "epoch": 0.34740096340498955, + "kl_loss": 0.20318818092346191, + "loss_ib": 0.007916361093521118, + "step": 1208 + }, + { + "ce_ib": 3.801424980163574, + "ce_orig": 0.49367231130599976, + "epoch": 0.34768854698396723, + "kl_loss": 0.27957862615585327, + "loss_ib": 0.006597211118787527, + "step": 1209 + }, + { + "ce_ib": 8.915145874023438, + "ce_orig": 1.2738125324249268, + "epoch": 0.34768854698396723, + "kl_loss": 0.25946545600891113, + "loss_ib": 0.011509799398481846, + "step": 1209 + }, + { + "ce_ib": 7.498837471008301, + "ce_orig": 0.46814650297164917, + "epoch": 0.34768854698396723, + "kl_loss": 0.30411720275878906, + "loss_ib": 0.01054000947624445, + "step": 1209 + }, + { + "ce_ib": 9.11368465423584, + "ce_orig": 1.3512606620788574, + "epoch": 0.34768854698396723, + "kl_loss": 0.29874473810195923, + "loss_ib": 0.012101132422685623, + "step": 1209 + }, + { + "epoch": 0.34797613056294485, + "grad_norm": 0.11233938485383987, + "learning_rate": 9.810375539259184e-06, + "loss": 0.8904, + "step": 1210 + }, + { + "ce_ib": 6.393960952758789, + "ce_orig": 0.6846663355827332, + "epoch": 0.34797613056294485, + "kl_loss": 0.2849634885787964, + "loss_ib": 0.009243596345186234, + "step": 1210 + }, + { + "ce_ib": 6.281631946563721, + "ce_orig": 0.8732141852378845, + "epoch": 0.34797613056294485, + "kl_loss": 0.21354413032531738, + "loss_ib": 0.008417073637247086, + "step": 1210 + }, + { + "ce_ib": 6.037140846252441, + "ce_orig": 0.4813711643218994, + "epoch": 0.34797613056294485, + "kl_loss": 0.2744561433792114, + "loss_ib": 0.008781702257692814, + "step": 1210 + }, + { + "ce_ib": 8.615631103515625, + "ce_orig": 1.3087021112442017, + "epoch": 0.34797613056294485, + "kl_loss": 0.2230015993118286, + "loss_ib": 0.010845646262168884, + "step": 1210 + }, + { + "ce_ib": 4.9515380859375, + "ce_orig": 0.7219204306602478, + "epoch": 0.3482637141419225, + "kl_loss": 0.2197096347808838, + "loss_ib": 0.007148634176701307, + "step": 1211 + }, + { + "ce_ib": 8.830263137817383, + "ce_orig": 1.387976884841919, + "epoch": 0.3482637141419225, + "kl_loss": 0.2981712818145752, + "loss_ib": 0.011811976321041584, + "step": 1211 + }, + { + "ce_ib": 6.819638252258301, + "ce_orig": 0.5993950366973877, + "epoch": 0.3482637141419225, + "kl_loss": 0.41057854890823364, + "loss_ib": 0.01092542428523302, + "step": 1211 + }, + { + "ce_ib": 7.180927276611328, + "ce_orig": 0.9277037382125854, + "epoch": 0.3482637141419225, + "kl_loss": 0.3397431969642639, + "loss_ib": 0.010578359477221966, + "step": 1211 + }, + { + "ce_ib": 4.253024578094482, + "ce_orig": 0.6946703791618347, + "epoch": 0.34855129772090016, + "kl_loss": 0.23091982305049896, + "loss_ib": 0.0065622227266430855, + "step": 1212 + }, + { + "ce_ib": 4.551302433013916, + "ce_orig": 0.7342872619628906, + "epoch": 0.34855129772090016, + "kl_loss": 0.2785085439682007, + "loss_ib": 0.007336387410759926, + "step": 1212 + }, + { + "ce_ib": 4.195248126983643, + "ce_orig": 0.5465542674064636, + "epoch": 0.34855129772090016, + "kl_loss": 0.21678900718688965, + "loss_ib": 0.006363137625157833, + "step": 1212 + }, + { + "ce_ib": 7.5936079025268555, + "ce_orig": 1.040616512298584, + "epoch": 0.34855129772090016, + "kl_loss": 0.18895815312862396, + "loss_ib": 0.009483189322054386, + "step": 1212 + }, + { + "ce_ib": 4.654483795166016, + "ce_orig": 0.3671084940433502, + "epoch": 0.3488388812998778, + "kl_loss": 0.22442926466464996, + "loss_ib": 0.006898776162415743, + "step": 1213 + }, + { + "ce_ib": 4.995607376098633, + "ce_orig": 0.8385266661643982, + "epoch": 0.3488388812998778, + "kl_loss": 0.18434521555900574, + "loss_ib": 0.006839059293270111, + "step": 1213 + }, + { + "ce_ib": 6.814243793487549, + "ce_orig": 0.710411548614502, + "epoch": 0.3488388812998778, + "kl_loss": 0.20435184240341187, + "loss_ib": 0.008857762441039085, + "step": 1213 + }, + { + "ce_ib": 8.296137809753418, + "ce_orig": 0.9488054513931274, + "epoch": 0.3488388812998778, + "kl_loss": 0.337627649307251, + "loss_ib": 0.011672413907945156, + "step": 1213 + }, + { + "ce_ib": 6.109886169433594, + "ce_orig": 0.8862010836601257, + "epoch": 0.3491264648788554, + "kl_loss": 0.17762064933776855, + "loss_ib": 0.00788609217852354, + "step": 1214 + }, + { + "ce_ib": 7.6561503410339355, + "ce_orig": 0.8912796378135681, + "epoch": 0.3491264648788554, + "kl_loss": 0.2638469636440277, + "loss_ib": 0.01029461994767189, + "step": 1214 + }, + { + "ce_ib": 7.491686820983887, + "ce_orig": 1.1298644542694092, + "epoch": 0.3491264648788554, + "kl_loss": 0.2536466717720032, + "loss_ib": 0.010028153657913208, + "step": 1214 + }, + { + "ce_ib": 8.880212783813477, + "ce_orig": 1.0798959732055664, + "epoch": 0.3491264648788554, + "kl_loss": 0.22560831904411316, + "loss_ib": 0.011136295273900032, + "step": 1214 + }, + { + "epoch": 0.3494140484578331, + "grad_norm": 0.09869109094142914, + "learning_rate": 9.808252704009258e-06, + "loss": 0.9111, + "step": 1215 + }, + { + "ce_ib": 8.043791770935059, + "ce_orig": 1.0452327728271484, + "epoch": 0.3494140484578331, + "kl_loss": 0.23494543135166168, + "loss_ib": 0.010393246077001095, + "step": 1215 + }, + { + "ce_ib": 6.372407913208008, + "ce_orig": 0.7277105450630188, + "epoch": 0.3494140484578331, + "kl_loss": 0.3500578999519348, + "loss_ib": 0.009872986935079098, + "step": 1215 + }, + { + "ce_ib": 4.63390588760376, + "ce_orig": 0.5187411308288574, + "epoch": 0.3494140484578331, + "kl_loss": 0.23105770349502563, + "loss_ib": 0.006944482680410147, + "step": 1215 + }, + { + "ce_ib": 6.37406587600708, + "ce_orig": 0.8014640808105469, + "epoch": 0.3494140484578331, + "kl_loss": 0.445901095867157, + "loss_ib": 0.010833077132701874, + "step": 1215 + }, + { + "ce_ib": 4.982112884521484, + "ce_orig": 0.5382281541824341, + "epoch": 0.3497016320368107, + "kl_loss": 0.2244800329208374, + "loss_ib": 0.007226912770420313, + "step": 1216 + }, + { + "ce_ib": 6.78171968460083, + "ce_orig": 0.89256352186203, + "epoch": 0.3497016320368107, + "kl_loss": 0.4011702835559845, + "loss_ib": 0.010793422348797321, + "step": 1216 + }, + { + "ce_ib": 6.829211711883545, + "ce_orig": 0.8494387865066528, + "epoch": 0.3497016320368107, + "kl_loss": 0.5227317810058594, + "loss_ib": 0.012056529521942139, + "step": 1216 + }, + { + "ce_ib": 6.879478931427002, + "ce_orig": 0.9619460701942444, + "epoch": 0.3497016320368107, + "kl_loss": 0.2130880057811737, + "loss_ib": 0.009010358713567257, + "step": 1216 + }, + { + "ce_ib": 3.678586483001709, + "ce_orig": 0.4464549720287323, + "epoch": 0.34998921561578833, + "kl_loss": 0.19008475542068481, + "loss_ib": 0.005579433869570494, + "step": 1217 + }, + { + "ce_ib": 5.889970302581787, + "ce_orig": 0.9165810942649841, + "epoch": 0.34998921561578833, + "kl_loss": 0.2494632750749588, + "loss_ib": 0.008384603075683117, + "step": 1217 + }, + { + "ce_ib": 4.036910533905029, + "ce_orig": 0.8464987874031067, + "epoch": 0.34998921561578833, + "kl_loss": 0.155860036611557, + "loss_ib": 0.005595511291176081, + "step": 1217 + }, + { + "ce_ib": 3.3294496536254883, + "ce_orig": 0.18816563487052917, + "epoch": 0.34998921561578833, + "kl_loss": 0.38086679577827454, + "loss_ib": 0.007138117216527462, + "step": 1217 + }, + { + "ce_ib": 5.73319673538208, + "ce_orig": 0.7483682632446289, + "epoch": 0.35027679919476595, + "kl_loss": 0.19469204545021057, + "loss_ib": 0.0076801166869699955, + "step": 1218 + }, + { + "ce_ib": 8.876380920410156, + "ce_orig": 1.0530145168304443, + "epoch": 0.35027679919476595, + "kl_loss": 0.2871856093406677, + "loss_ib": 0.011748237535357475, + "step": 1218 + }, + { + "ce_ib": 2.245468854904175, + "ce_orig": 0.210285022854805, + "epoch": 0.35027679919476595, + "kl_loss": 0.5912055373191833, + "loss_ib": 0.008157524280250072, + "step": 1218 + }, + { + "ce_ib": 6.965002059936523, + "ce_orig": 0.9538248181343079, + "epoch": 0.35027679919476595, + "kl_loss": 0.3390040993690491, + "loss_ib": 0.010355043224990368, + "step": 1218 + }, + { + "ce_ib": 5.554605960845947, + "ce_orig": 0.5747299194335938, + "epoch": 0.35056438277374363, + "kl_loss": 0.358900785446167, + "loss_ib": 0.009143614210188389, + "step": 1219 + }, + { + "ce_ib": 12.596790313720703, + "ce_orig": 1.7860560417175293, + "epoch": 0.35056438277374363, + "kl_loss": 0.2815093398094177, + "loss_ib": 0.015411884523928165, + "step": 1219 + }, + { + "ce_ib": 7.36424446105957, + "ce_orig": 1.0605376958847046, + "epoch": 0.35056438277374363, + "kl_loss": 0.22156105935573578, + "loss_ib": 0.009579855017364025, + "step": 1219 + }, + { + "ce_ib": 8.19119644165039, + "ce_orig": 1.3693232536315918, + "epoch": 0.35056438277374363, + "kl_loss": 0.2518046498298645, + "loss_ib": 0.010709242895245552, + "step": 1219 + }, + { + "epoch": 0.35085196635272126, + "grad_norm": 0.1013348326086998, + "learning_rate": 9.806118284562547e-06, + "loss": 0.9093, + "step": 1220 + }, + { + "ce_ib": 6.345913887023926, + "ce_orig": 1.0799219608306885, + "epoch": 0.35085196635272126, + "kl_loss": 0.2296457588672638, + "loss_ib": 0.008642371743917465, + "step": 1220 + }, + { + "ce_ib": 8.522051811218262, + "ce_orig": 0.7675303816795349, + "epoch": 0.35085196635272126, + "kl_loss": 0.23121927678585052, + "loss_ib": 0.010834244079887867, + "step": 1220 + }, + { + "ce_ib": 6.218955993652344, + "ce_orig": 0.7469417452812195, + "epoch": 0.35085196635272126, + "kl_loss": 0.32687658071517944, + "loss_ib": 0.00948772206902504, + "step": 1220 + }, + { + "ce_ib": 4.8152174949646, + "ce_orig": 0.5418853759765625, + "epoch": 0.35085196635272126, + "kl_loss": 0.7648394107818604, + "loss_ib": 0.012463611550629139, + "step": 1220 + }, + { + "ce_ib": 4.405568599700928, + "ce_orig": 0.4719264507293701, + "epoch": 0.3511395499316989, + "kl_loss": 0.20381106436252594, + "loss_ib": 0.006443679332733154, + "step": 1221 + }, + { + "ce_ib": 10.199350357055664, + "ce_orig": 1.4328693151474, + "epoch": 0.3511395499316989, + "kl_loss": 0.24579858779907227, + "loss_ib": 0.012657335959374905, + "step": 1221 + }, + { + "ce_ib": 7.0942912101745605, + "ce_orig": 1.0040373802185059, + "epoch": 0.3511395499316989, + "kl_loss": 0.5016403198242188, + "loss_ib": 0.0121106943115592, + "step": 1221 + }, + { + "ce_ib": 7.4397172927856445, + "ce_orig": 0.8064084649085999, + "epoch": 0.3511395499316989, + "kl_loss": 0.3063448965549469, + "loss_ib": 0.010503166355192661, + "step": 1221 + }, + { + "ce_ib": 4.31360387802124, + "ce_orig": 0.786395788192749, + "epoch": 0.35142713351067656, + "kl_loss": 0.1687285304069519, + "loss_ib": 0.006000889465212822, + "step": 1222 + }, + { + "ce_ib": 9.686395645141602, + "ce_orig": 1.5242962837219238, + "epoch": 0.35142713351067656, + "kl_loss": 0.34040552377700806, + "loss_ib": 0.013090450316667557, + "step": 1222 + }, + { + "ce_ib": 7.19453763961792, + "ce_orig": 0.7576363682746887, + "epoch": 0.35142713351067656, + "kl_loss": 0.34985777735710144, + "loss_ib": 0.010693115182220936, + "step": 1222 + }, + { + "ce_ib": 5.745079040527344, + "ce_orig": 0.5910704135894775, + "epoch": 0.35142713351067656, + "kl_loss": 0.3402925729751587, + "loss_ib": 0.009148004464805126, + "step": 1222 + }, + { + "ce_ib": 9.865362167358398, + "ce_orig": 1.643523931503296, + "epoch": 0.3517147170896542, + "kl_loss": 0.2811984717845917, + "loss_ib": 0.012677346356213093, + "step": 1223 + }, + { + "ce_ib": 8.58837604522705, + "ce_orig": 1.436140537261963, + "epoch": 0.3517147170896542, + "kl_loss": 0.2681333124637604, + "loss_ib": 0.011269708164036274, + "step": 1223 + }, + { + "ce_ib": 3.754268169403076, + "ce_orig": 0.6821795105934143, + "epoch": 0.3517147170896542, + "kl_loss": 0.22629603743553162, + "loss_ib": 0.006017228122800589, + "step": 1223 + }, + { + "ce_ib": 7.725935459136963, + "ce_orig": 0.7299022674560547, + "epoch": 0.3517147170896542, + "kl_loss": 0.27880483865737915, + "loss_ib": 0.01051398366689682, + "step": 1223 + }, + { + "ce_ib": 7.817504405975342, + "ce_orig": 1.1026769876480103, + "epoch": 0.3520023006686318, + "kl_loss": 0.25443196296691895, + "loss_ib": 0.010361824184656143, + "step": 1224 + }, + { + "ce_ib": 4.619856834411621, + "ce_orig": 0.5043254494667053, + "epoch": 0.3520023006686318, + "kl_loss": 0.1772795170545578, + "loss_ib": 0.006392651703208685, + "step": 1224 + }, + { + "ce_ib": 7.694671630859375, + "ce_orig": 0.7912726998329163, + "epoch": 0.3520023006686318, + "kl_loss": 0.28781387209892273, + "loss_ib": 0.010572809725999832, + "step": 1224 + }, + { + "ce_ib": 7.137585163116455, + "ce_orig": 0.7689734697341919, + "epoch": 0.3520023006686318, + "kl_loss": 0.31799736618995667, + "loss_ib": 0.010317559354007244, + "step": 1224 + }, + { + "epoch": 0.3522898842476095, + "grad_norm": 0.09380496293306351, + "learning_rate": 9.803972286061366e-06, + "loss": 0.9006, + "step": 1225 + }, + { + "ce_ib": 8.018561363220215, + "ce_orig": 1.227948546409607, + "epoch": 0.3522898842476095, + "kl_loss": 0.3420131504535675, + "loss_ib": 0.011438692919909954, + "step": 1225 + }, + { + "ce_ib": 8.40815258026123, + "ce_orig": 1.3984031677246094, + "epoch": 0.3522898842476095, + "kl_loss": 0.8153228759765625, + "loss_ib": 0.01656138151884079, + "step": 1225 + }, + { + "ce_ib": 8.145472526550293, + "ce_orig": 1.3120951652526855, + "epoch": 0.3522898842476095, + "kl_loss": 0.2454821616411209, + "loss_ib": 0.01060029398649931, + "step": 1225 + }, + { + "ce_ib": 9.155527114868164, + "ce_orig": 1.0914626121520996, + "epoch": 0.3522898842476095, + "kl_loss": 0.22678032517433167, + "loss_ib": 0.011423329822719097, + "step": 1225 + }, + { + "ce_ib": 4.5271806716918945, + "ce_orig": 0.5904192328453064, + "epoch": 0.3525774678265871, + "kl_loss": 0.19810084998607635, + "loss_ib": 0.006508189253509045, + "step": 1226 + }, + { + "ce_ib": 5.63466215133667, + "ce_orig": 0.8149072527885437, + "epoch": 0.3525774678265871, + "kl_loss": 0.1674325168132782, + "loss_ib": 0.007308987434953451, + "step": 1226 + }, + { + "ce_ib": 7.006682872772217, + "ce_orig": 0.8032488226890564, + "epoch": 0.3525774678265871, + "kl_loss": 0.2711438238620758, + "loss_ib": 0.009718121029436588, + "step": 1226 + }, + { + "ce_ib": 9.975687026977539, + "ce_orig": 1.0549230575561523, + "epoch": 0.3525774678265871, + "kl_loss": 0.3018321096897125, + "loss_ib": 0.012994008138775826, + "step": 1226 + }, + { + "ce_ib": 4.590726375579834, + "ce_orig": 0.5848087668418884, + "epoch": 0.35286505140556473, + "kl_loss": 0.28551799058914185, + "loss_ib": 0.007445906288921833, + "step": 1227 + }, + { + "ce_ib": 7.8249192237854, + "ce_orig": 1.0972347259521484, + "epoch": 0.35286505140556473, + "kl_loss": 0.22432781755924225, + "loss_ib": 0.010068196803331375, + "step": 1227 + }, + { + "ce_ib": 6.804174900054932, + "ce_orig": 0.5949727296829224, + "epoch": 0.35286505140556473, + "kl_loss": 0.33591410517692566, + "loss_ib": 0.010163315571844578, + "step": 1227 + }, + { + "ce_ib": 9.282870292663574, + "ce_orig": 1.5607327222824097, + "epoch": 0.35286505140556473, + "kl_loss": 0.31619787216186523, + "loss_ib": 0.012444849126040936, + "step": 1227 + }, + { + "ce_ib": 3.7365572452545166, + "ce_orig": 0.5436097979545593, + "epoch": 0.35315263498454236, + "kl_loss": 0.23581741750240326, + "loss_ib": 0.006094731390476227, + "step": 1228 + }, + { + "ce_ib": 9.241966247558594, + "ce_orig": 1.2546502351760864, + "epoch": 0.35315263498454236, + "kl_loss": 0.25352782011032104, + "loss_ib": 0.011777244508266449, + "step": 1228 + }, + { + "ce_ib": 7.271526336669922, + "ce_orig": 1.4413655996322632, + "epoch": 0.35315263498454236, + "kl_loss": 0.21419073641300201, + "loss_ib": 0.009413433261215687, + "step": 1228 + }, + { + "ce_ib": 12.323684692382812, + "ce_orig": 2.418548822402954, + "epoch": 0.35315263498454236, + "kl_loss": 0.20560504496097565, + "loss_ib": 0.014379735104739666, + "step": 1228 + }, + { + "ce_ib": 6.3870158195495605, + "ce_orig": 0.8991002440452576, + "epoch": 0.35344021856352004, + "kl_loss": 0.23421180248260498, + "loss_ib": 0.008729133754968643, + "step": 1229 + }, + { + "ce_ib": 3.585265874862671, + "ce_orig": 0.6805919408798218, + "epoch": 0.35344021856352004, + "kl_loss": 0.169908806681633, + "loss_ib": 0.005284354090690613, + "step": 1229 + }, + { + "ce_ib": 9.112138748168945, + "ce_orig": 1.2712079286575317, + "epoch": 0.35344021856352004, + "kl_loss": 0.23605284094810486, + "loss_ib": 0.011472667567431927, + "step": 1229 + }, + { + "ce_ib": 7.619685173034668, + "ce_orig": 1.315202236175537, + "epoch": 0.35344021856352004, + "kl_loss": 0.21875296533107758, + "loss_ib": 0.009807215072214603, + "step": 1229 + }, + { + "epoch": 0.35372780214249766, + "grad_norm": 0.13430581986904144, + "learning_rate": 9.801814713675922e-06, + "loss": 0.8915, + "step": 1230 + }, + { + "ce_ib": 9.14773941040039, + "ce_orig": 1.6861295700073242, + "epoch": 0.35372780214249766, + "kl_loss": 0.23906412720680237, + "loss_ib": 0.011538379825651646, + "step": 1230 + }, + { + "ce_ib": 7.55902624130249, + "ce_orig": 1.0963160991668701, + "epoch": 0.35372780214249766, + "kl_loss": 0.18921005725860596, + "loss_ib": 0.009451126679778099, + "step": 1230 + }, + { + "ce_ib": 7.272607326507568, + "ce_orig": 0.8341992497444153, + "epoch": 0.35372780214249766, + "kl_loss": 0.1842857003211975, + "loss_ib": 0.009115464054048061, + "step": 1230 + }, + { + "ce_ib": 11.349145889282227, + "ce_orig": 1.8195122480392456, + "epoch": 0.35372780214249766, + "kl_loss": 0.28681570291519165, + "loss_ib": 0.014217302203178406, + "step": 1230 + }, + { + "ce_ib": 6.5777435302734375, + "ce_orig": 1.0577532052993774, + "epoch": 0.3540153857214753, + "kl_loss": 0.2370855063199997, + "loss_ib": 0.008948598988354206, + "step": 1231 + }, + { + "ce_ib": 6.332425594329834, + "ce_orig": 0.4660177230834961, + "epoch": 0.3540153857214753, + "kl_loss": 0.4329322576522827, + "loss_ib": 0.010661747306585312, + "step": 1231 + }, + { + "ce_ib": 7.591532230377197, + "ce_orig": 0.6235303282737732, + "epoch": 0.3540153857214753, + "kl_loss": 0.20224440097808838, + "loss_ib": 0.009613975882530212, + "step": 1231 + }, + { + "ce_ib": 7.099629878997803, + "ce_orig": 1.0191956758499146, + "epoch": 0.3540153857214753, + "kl_loss": 0.18419277667999268, + "loss_ib": 0.008941558189690113, + "step": 1231 + }, + { + "ce_ib": 3.364225387573242, + "ce_orig": 0.47195231914520264, + "epoch": 0.35430296930045296, + "kl_loss": 0.20699940621852875, + "loss_ib": 0.0054342192597687244, + "step": 1232 + }, + { + "ce_ib": 9.607991218566895, + "ce_orig": 1.4189627170562744, + "epoch": 0.35430296930045296, + "kl_loss": 0.22665303945541382, + "loss_ib": 0.01187452208250761, + "step": 1232 + }, + { + "ce_ib": 7.680178165435791, + "ce_orig": 0.7752040028572083, + "epoch": 0.35430296930045296, + "kl_loss": 0.2515419125556946, + "loss_ib": 0.0101955970749259, + "step": 1232 + }, + { + "ce_ib": 10.139663696289062, + "ce_orig": 1.8179274797439575, + "epoch": 0.35430296930045296, + "kl_loss": 0.2826952040195465, + "loss_ib": 0.012966616079211235, + "step": 1232 + }, + { + "ce_ib": 4.237985610961914, + "ce_orig": 0.3648199141025543, + "epoch": 0.3545905528794306, + "kl_loss": 0.3507782220840454, + "loss_ib": 0.00774576747789979, + "step": 1233 + }, + { + "ce_ib": 6.260341644287109, + "ce_orig": 0.9368284940719604, + "epoch": 0.3545905528794306, + "kl_loss": 0.3227092921733856, + "loss_ib": 0.009487434290349483, + "step": 1233 + }, + { + "ce_ib": 5.236300945281982, + "ce_orig": 0.5902385711669922, + "epoch": 0.3545905528794306, + "kl_loss": 0.332288920879364, + "loss_ib": 0.008559189736843109, + "step": 1233 + }, + { + "ce_ib": 9.382746696472168, + "ce_orig": 0.9858824014663696, + "epoch": 0.3545905528794306, + "kl_loss": 0.24119676649570465, + "loss_ib": 0.011794714257121086, + "step": 1233 + }, + { + "ce_ib": 5.475151538848877, + "ce_orig": 0.8246574997901917, + "epoch": 0.3548781364584082, + "kl_loss": 0.2016981691122055, + "loss_ib": 0.007492133416235447, + "step": 1234 + }, + { + "ce_ib": 7.603310585021973, + "ce_orig": 0.8240784406661987, + "epoch": 0.3548781364584082, + "kl_loss": 0.2915946841239929, + "loss_ib": 0.010519257746636868, + "step": 1234 + }, + { + "ce_ib": 5.872536659240723, + "ce_orig": 0.5426589846611023, + "epoch": 0.3548781364584082, + "kl_loss": 0.26665955781936646, + "loss_ib": 0.008539131842553616, + "step": 1234 + }, + { + "ce_ib": 5.885195255279541, + "ce_orig": 0.43417975306510925, + "epoch": 0.3548781364584082, + "kl_loss": 0.4874606728553772, + "loss_ib": 0.010759801603853703, + "step": 1234 + }, + { + "epoch": 0.3551657200373859, + "grad_norm": 0.12445518374443054, + "learning_rate": 9.799645572604308e-06, + "loss": 0.9172, + "step": 1235 + }, + { + "ce_ib": 9.555274963378906, + "ce_orig": 1.4601075649261475, + "epoch": 0.3551657200373859, + "kl_loss": 0.3519800305366516, + "loss_ib": 0.01307507511228323, + "step": 1235 + }, + { + "ce_ib": 8.43733024597168, + "ce_orig": 0.7304520010948181, + "epoch": 0.3551657200373859, + "kl_loss": 0.2575218081474304, + "loss_ib": 0.01101254764944315, + "step": 1235 + }, + { + "ce_ib": 7.886836528778076, + "ce_orig": 1.0809401273727417, + "epoch": 0.3551657200373859, + "kl_loss": 0.22798356413841248, + "loss_ib": 0.010166672058403492, + "step": 1235 + }, + { + "ce_ib": 6.685604095458984, + "ce_orig": 0.8483709096908569, + "epoch": 0.3551657200373859, + "kl_loss": 0.28703203797340393, + "loss_ib": 0.00955592468380928, + "step": 1235 + }, + { + "ce_ib": 10.434022903442383, + "ce_orig": 1.7004473209381104, + "epoch": 0.3554533036163635, + "kl_loss": 0.27314144372940063, + "loss_ib": 0.013165437616407871, + "step": 1236 + }, + { + "ce_ib": 6.759583950042725, + "ce_orig": 0.9155409932136536, + "epoch": 0.3554533036163635, + "kl_loss": 0.3281250596046448, + "loss_ib": 0.010040833614766598, + "step": 1236 + }, + { + "ce_ib": 8.04914665222168, + "ce_orig": 0.8380681872367859, + "epoch": 0.3554533036163635, + "kl_loss": 0.2907578647136688, + "loss_ib": 0.010956724174320698, + "step": 1236 + }, + { + "ce_ib": 7.6711812019348145, + "ce_orig": 1.1974780559539795, + "epoch": 0.3554533036163635, + "kl_loss": 0.5388073921203613, + "loss_ib": 0.013059255667030811, + "step": 1236 + }, + { + "ce_ib": 7.075990200042725, + "ce_orig": 1.2427293062210083, + "epoch": 0.35574088719534114, + "kl_loss": 0.2628134489059448, + "loss_ib": 0.009704125113785267, + "step": 1237 + }, + { + "ce_ib": 8.428919792175293, + "ce_orig": 0.9998230338096619, + "epoch": 0.35574088719534114, + "kl_loss": 0.22011926770210266, + "loss_ib": 0.010630113072693348, + "step": 1237 + }, + { + "ce_ib": 9.050934791564941, + "ce_orig": 0.6873317360877991, + "epoch": 0.35574088719534114, + "kl_loss": 0.24966318905353546, + "loss_ib": 0.011547566391527653, + "step": 1237 + }, + { + "ce_ib": 5.283548355102539, + "ce_orig": 0.3626221716403961, + "epoch": 0.35574088719534114, + "kl_loss": 0.26341110467910767, + "loss_ib": 0.007917659357190132, + "step": 1237 + }, + { + "ce_ib": 5.644829273223877, + "ce_orig": 0.7479636073112488, + "epoch": 0.35602847077431876, + "kl_loss": 0.34714972972869873, + "loss_ib": 0.009116326458752155, + "step": 1238 + }, + { + "ce_ib": 8.070452690124512, + "ce_orig": 1.1102337837219238, + "epoch": 0.35602847077431876, + "kl_loss": 0.29915985465049744, + "loss_ib": 0.01106205116957426, + "step": 1238 + }, + { + "ce_ib": 8.20000171661377, + "ce_orig": 0.9142467975616455, + "epoch": 0.35602847077431876, + "kl_loss": 0.2920359969139099, + "loss_ib": 0.01112036220729351, + "step": 1238 + }, + { + "ce_ib": 5.415696620941162, + "ce_orig": 0.8824329376220703, + "epoch": 0.35602847077431876, + "kl_loss": 0.23210978507995605, + "loss_ib": 0.00773679418489337, + "step": 1238 + }, + { + "ce_ib": 5.435910224914551, + "ce_orig": 0.6103357076644897, + "epoch": 0.35631605435329644, + "kl_loss": 0.27932867407798767, + "loss_ib": 0.00822919700294733, + "step": 1239 + }, + { + "ce_ib": 7.409430503845215, + "ce_orig": 1.067017674446106, + "epoch": 0.35631605435329644, + "kl_loss": 0.28290998935699463, + "loss_ib": 0.010238530114293098, + "step": 1239 + }, + { + "ce_ib": 7.207986831665039, + "ce_orig": 0.7639102339744568, + "epoch": 0.35631605435329644, + "kl_loss": 0.3880687355995178, + "loss_ib": 0.011088673956692219, + "step": 1239 + }, + { + "ce_ib": 9.050562858581543, + "ce_orig": 0.777185320854187, + "epoch": 0.35631605435329644, + "kl_loss": 0.27783846855163574, + "loss_ib": 0.011828946880996227, + "step": 1239 + }, + { + "epoch": 0.35660363793227406, + "grad_norm": 0.1228327825665474, + "learning_rate": 9.797464868072489e-06, + "loss": 0.9154, + "step": 1240 + }, + { + "ce_ib": 7.292773723602295, + "ce_orig": 1.0429723262786865, + "epoch": 0.35660363793227406, + "kl_loss": 0.20807845890522003, + "loss_ib": 0.00937355775386095, + "step": 1240 + }, + { + "ce_ib": 4.26139497756958, + "ce_orig": 0.7658198475837708, + "epoch": 0.35660363793227406, + "kl_loss": 0.18837539851665497, + "loss_ib": 0.006145148538053036, + "step": 1240 + }, + { + "ce_ib": 5.861346244812012, + "ce_orig": 0.9043319821357727, + "epoch": 0.35660363793227406, + "kl_loss": 0.5151675939559937, + "loss_ib": 0.011013020761311054, + "step": 1240 + }, + { + "ce_ib": 5.455165386199951, + "ce_orig": 0.7346108555793762, + "epoch": 0.35660363793227406, + "kl_loss": 0.2766348719596863, + "loss_ib": 0.008221513591706753, + "step": 1240 + }, + { + "ce_ib": 5.686266899108887, + "ce_orig": 0.7627474665641785, + "epoch": 0.3568912215112517, + "kl_loss": 0.22055384516716003, + "loss_ib": 0.007891804911196232, + "step": 1241 + }, + { + "ce_ib": 9.36503791809082, + "ce_orig": 1.0557454824447632, + "epoch": 0.3568912215112517, + "kl_loss": 0.34796249866485596, + "loss_ib": 0.012844662182033062, + "step": 1241 + }, + { + "ce_ib": 5.484736919403076, + "ce_orig": 0.551432192325592, + "epoch": 0.3568912215112517, + "kl_loss": 0.264079749584198, + "loss_ib": 0.008125534281134605, + "step": 1241 + }, + { + "ce_ib": 4.883925914764404, + "ce_orig": 0.3223569095134735, + "epoch": 0.3568912215112517, + "kl_loss": 0.27467021346092224, + "loss_ib": 0.007630628068000078, + "step": 1241 + }, + { + "ce_ib": 3.6608123779296875, + "ce_orig": 0.4998208284378052, + "epoch": 0.35717880509022937, + "kl_loss": 0.2078043520450592, + "loss_ib": 0.005738855339586735, + "step": 1242 + }, + { + "ce_ib": 7.225930213928223, + "ce_orig": 0.7941088676452637, + "epoch": 0.35717880509022937, + "kl_loss": 0.313068687915802, + "loss_ib": 0.010356617160141468, + "step": 1242 + }, + { + "ce_ib": 7.831450939178467, + "ce_orig": 1.0795207023620605, + "epoch": 0.35717880509022937, + "kl_loss": 0.33597350120544434, + "loss_ib": 0.011191186495125294, + "step": 1242 + }, + { + "ce_ib": 3.794747829437256, + "ce_orig": 0.39786288142204285, + "epoch": 0.35717880509022937, + "kl_loss": 0.24459530413150787, + "loss_ib": 0.006240700837224722, + "step": 1242 + }, + { + "ce_ib": 8.7359037399292, + "ce_orig": 1.3474030494689941, + "epoch": 0.357466388669207, + "kl_loss": 0.30283403396606445, + "loss_ib": 0.011764245107769966, + "step": 1243 + }, + { + "ce_ib": 6.3446831703186035, + "ce_orig": 0.6941496729850769, + "epoch": 0.357466388669207, + "kl_loss": 0.2185823768377304, + "loss_ib": 0.008530506864190102, + "step": 1243 + }, + { + "ce_ib": 3.736961841583252, + "ce_orig": 0.5355826020240784, + "epoch": 0.357466388669207, + "kl_loss": 0.21260380744934082, + "loss_ib": 0.005863000173121691, + "step": 1243 + }, + { + "ce_ib": 7.129443168640137, + "ce_orig": 0.9421985149383545, + "epoch": 0.357466388669207, + "kl_loss": 0.30670252442359924, + "loss_ib": 0.01019646879285574, + "step": 1243 + }, + { + "ce_ib": 5.549740314483643, + "ce_orig": 0.5611580014228821, + "epoch": 0.3577539722481846, + "kl_loss": 0.4680359363555908, + "loss_ib": 0.01023009978234768, + "step": 1244 + }, + { + "ce_ib": 3.336719512939453, + "ce_orig": 0.4326326549053192, + "epoch": 0.3577539722481846, + "kl_loss": 0.2741917073726654, + "loss_ib": 0.006078636739403009, + "step": 1244 + }, + { + "ce_ib": 6.86518669128418, + "ce_orig": 0.7357510924339294, + "epoch": 0.3577539722481846, + "kl_loss": 0.4421781301498413, + "loss_ib": 0.011286967433989048, + "step": 1244 + }, + { + "ce_ib": 8.771323204040527, + "ce_orig": 0.7612717151641846, + "epoch": 0.3577539722481846, + "kl_loss": 0.15122197568416595, + "loss_ib": 0.010283542796969414, + "step": 1244 + }, + { + "epoch": 0.3580415558271623, + "grad_norm": 0.09404265880584717, + "learning_rate": 9.795272605334285e-06, + "loss": 0.8139, + "step": 1245 + }, + { + "ce_ib": 4.558228015899658, + "ce_orig": 0.712019681930542, + "epoch": 0.3580415558271623, + "kl_loss": 0.1576419323682785, + "loss_ib": 0.006134646944701672, + "step": 1245 + }, + { + "ce_ib": 11.132481575012207, + "ce_orig": 1.6099278926849365, + "epoch": 0.3580415558271623, + "kl_loss": 0.2066415250301361, + "loss_ib": 0.013198897242546082, + "step": 1245 + }, + { + "ce_ib": 7.148979663848877, + "ce_orig": 0.9569799304008484, + "epoch": 0.3580415558271623, + "kl_loss": 0.3003823161125183, + "loss_ib": 0.010152801871299744, + "step": 1245 + }, + { + "ce_ib": 3.507009744644165, + "ce_orig": 0.4302767217159271, + "epoch": 0.3580415558271623, + "kl_loss": 0.3271106481552124, + "loss_ib": 0.006778115872293711, + "step": 1245 + }, + { + "ce_ib": 8.464205741882324, + "ce_orig": 0.7025609612464905, + "epoch": 0.3583291394061399, + "kl_loss": 0.2535151243209839, + "loss_ib": 0.01099935732781887, + "step": 1246 + }, + { + "ce_ib": 8.343195915222168, + "ce_orig": 0.9053884148597717, + "epoch": 0.3583291394061399, + "kl_loss": 0.25387266278266907, + "loss_ib": 0.010881922207772732, + "step": 1246 + }, + { + "ce_ib": 5.672733306884766, + "ce_orig": 0.5709149241447449, + "epoch": 0.3583291394061399, + "kl_loss": 0.40478748083114624, + "loss_ib": 0.009720607660710812, + "step": 1246 + }, + { + "ce_ib": 6.032468318939209, + "ce_orig": 0.6649541854858398, + "epoch": 0.3583291394061399, + "kl_loss": 0.18674571812152863, + "loss_ib": 0.007899925112724304, + "step": 1246 + }, + { + "ce_ib": 3.471609592437744, + "ce_orig": 0.5787562131881714, + "epoch": 0.35861672298511754, + "kl_loss": 0.23248958587646484, + "loss_ib": 0.0057965051382780075, + "step": 1247 + }, + { + "ce_ib": 5.650269031524658, + "ce_orig": 0.5460963249206543, + "epoch": 0.35861672298511754, + "kl_loss": 0.3052501082420349, + "loss_ib": 0.008702769875526428, + "step": 1247 + }, + { + "ce_ib": 7.508909702301025, + "ce_orig": 0.26758936047554016, + "epoch": 0.35861672298511754, + "kl_loss": 0.2105289101600647, + "loss_ib": 0.009614198468625546, + "step": 1247 + }, + { + "ce_ib": 3.595618486404419, + "ce_orig": 0.44015583395957947, + "epoch": 0.35861672298511754, + "kl_loss": 0.15356630086898804, + "loss_ib": 0.0051312814466655254, + "step": 1247 + }, + { + "ce_ib": 5.277204513549805, + "ce_orig": 0.7604292035102844, + "epoch": 0.35890430656409517, + "kl_loss": 0.2653564512729645, + "loss_ib": 0.00793076865375042, + "step": 1248 + }, + { + "ce_ib": 3.4653306007385254, + "ce_orig": 0.5359505414962769, + "epoch": 0.35890430656409517, + "kl_loss": 0.14454194903373718, + "loss_ib": 0.004910749848932028, + "step": 1248 + }, + { + "ce_ib": 1.7260143756866455, + "ce_orig": 0.11219265311956406, + "epoch": 0.35890430656409517, + "kl_loss": 0.5338764786720276, + "loss_ib": 0.0070647792890667915, + "step": 1248 + }, + { + "ce_ib": 7.0942912101745605, + "ce_orig": 1.2052923440933228, + "epoch": 0.35890430656409517, + "kl_loss": 0.20122863352298737, + "loss_ib": 0.00910657737404108, + "step": 1248 + }, + { + "ce_ib": 6.274049282073975, + "ce_orig": 0.7101867198944092, + "epoch": 0.35919189014307285, + "kl_loss": 0.29377713799476624, + "loss_ib": 0.009211820550262928, + "step": 1249 + }, + { + "ce_ib": 5.265260696411133, + "ce_orig": 0.5141690969467163, + "epoch": 0.35919189014307285, + "kl_loss": 0.25618404150009155, + "loss_ib": 0.007827101275324821, + "step": 1249 + }, + { + "ce_ib": 7.371795177459717, + "ce_orig": 0.20159362256526947, + "epoch": 0.35919189014307285, + "kl_loss": 0.4881149232387543, + "loss_ib": 0.012252944521605968, + "step": 1249 + }, + { + "ce_ib": 10.091595649719238, + "ce_orig": 1.0113621950149536, + "epoch": 0.35919189014307285, + "kl_loss": 0.30168700218200684, + "loss_ib": 0.013108465820550919, + "step": 1249 + }, + { + "epoch": 0.35947947372205047, + "grad_norm": 0.10958760976791382, + "learning_rate": 9.79306878967137e-06, + "loss": 0.8439, + "step": 1250 + }, + { + "ce_ib": 4.476527690887451, + "ce_orig": 0.6243991255760193, + "epoch": 0.35947947372205047, + "kl_loss": 0.1909669041633606, + "loss_ib": 0.006386196240782738, + "step": 1250 + }, + { + "ce_ib": 11.409143447875977, + "ce_orig": 1.3198966979980469, + "epoch": 0.35947947372205047, + "kl_loss": 0.2266976684331894, + "loss_ib": 0.013676119968295097, + "step": 1250 + }, + { + "ce_ib": 6.0254106521606445, + "ce_orig": 0.6282336711883545, + "epoch": 0.35947947372205047, + "kl_loss": 0.2567211389541626, + "loss_ib": 0.008592622354626656, + "step": 1250 + }, + { + "ce_ib": 3.8054723739624023, + "ce_orig": 0.5951682925224304, + "epoch": 0.35947947372205047, + "kl_loss": 0.31412309408187866, + "loss_ib": 0.00694670295342803, + "step": 1250 + }, + { + "ce_ib": 7.657241344451904, + "ce_orig": 0.9810330867767334, + "epoch": 0.3597670573010281, + "kl_loss": 0.2732018828392029, + "loss_ib": 0.01038926001638174, + "step": 1251 + }, + { + "ce_ib": 6.377111434936523, + "ce_orig": 0.7457929849624634, + "epoch": 0.3597670573010281, + "kl_loss": 0.2344977855682373, + "loss_ib": 0.008722089231014252, + "step": 1251 + }, + { + "ce_ib": 7.052966594696045, + "ce_orig": 0.7349291443824768, + "epoch": 0.3597670573010281, + "kl_loss": 0.3841466009616852, + "loss_ib": 0.010894432663917542, + "step": 1251 + }, + { + "ce_ib": 4.98142671585083, + "ce_orig": 0.7807597517967224, + "epoch": 0.3597670573010281, + "kl_loss": 0.20535174012184143, + "loss_ib": 0.007034944370388985, + "step": 1251 + }, + { + "ce_ib": 5.311237812042236, + "ce_orig": 0.7335159778594971, + "epoch": 0.36005464088000577, + "kl_loss": 0.369441956281662, + "loss_ib": 0.009005657397210598, + "step": 1252 + }, + { + "ce_ib": 6.797234535217285, + "ce_orig": 0.6558939814567566, + "epoch": 0.36005464088000577, + "kl_loss": 0.3157234191894531, + "loss_ib": 0.009954468347132206, + "step": 1252 + }, + { + "ce_ib": 10.10152530670166, + "ce_orig": 1.7004576921463013, + "epoch": 0.36005464088000577, + "kl_loss": 0.2638339400291443, + "loss_ib": 0.012739865109324455, + "step": 1252 + }, + { + "ce_ib": 8.451179504394531, + "ce_orig": 1.1088011264801025, + "epoch": 0.36005464088000577, + "kl_loss": 0.2189917117357254, + "loss_ib": 0.010641096159815788, + "step": 1252 + }, + { + "ce_ib": 5.147396564483643, + "ce_orig": 0.47145599126815796, + "epoch": 0.3603422244589834, + "kl_loss": 0.38510337471961975, + "loss_ib": 0.008998430334031582, + "step": 1253 + }, + { + "ce_ib": 6.613088607788086, + "ce_orig": 0.7753716707229614, + "epoch": 0.3603422244589834, + "kl_loss": 0.2607177793979645, + "loss_ib": 0.009220265783369541, + "step": 1253 + }, + { + "ce_ib": 4.183328151702881, + "ce_orig": 0.5365942716598511, + "epoch": 0.3603422244589834, + "kl_loss": 0.14574888348579407, + "loss_ib": 0.005640816409140825, + "step": 1253 + }, + { + "ce_ib": 8.866814613342285, + "ce_orig": 1.2385174036026, + "epoch": 0.3603422244589834, + "kl_loss": 0.21413256227970123, + "loss_ib": 0.011008140631020069, + "step": 1253 + }, + { + "ce_ib": 9.283498764038086, + "ce_orig": 1.193198323249817, + "epoch": 0.360629808037961, + "kl_loss": 0.24336770176887512, + "loss_ib": 0.0117171760648489, + "step": 1254 + }, + { + "ce_ib": 5.101339340209961, + "ce_orig": 0.7213374972343445, + "epoch": 0.360629808037961, + "kl_loss": 0.23240378499031067, + "loss_ib": 0.007425377145409584, + "step": 1254 + }, + { + "ce_ib": 4.567759990692139, + "ce_orig": 0.5381884574890137, + "epoch": 0.360629808037961, + "kl_loss": 0.4335998296737671, + "loss_ib": 0.008903758600354195, + "step": 1254 + }, + { + "ce_ib": 6.097298622131348, + "ce_orig": 0.7400136590003967, + "epoch": 0.360629808037961, + "kl_loss": 0.2513897716999054, + "loss_ib": 0.008611195720732212, + "step": 1254 + }, + { + "epoch": 0.3609173916169387, + "grad_norm": 0.12052126973867416, + "learning_rate": 9.790853426393246e-06, + "loss": 0.9385, + "step": 1255 + }, + { + "ce_ib": 4.656575679779053, + "ce_orig": 0.5645290017127991, + "epoch": 0.3609173916169387, + "kl_loss": 0.193633571267128, + "loss_ib": 0.006592911202460527, + "step": 1255 + }, + { + "ce_ib": 8.54527473449707, + "ce_orig": 0.7616824507713318, + "epoch": 0.3609173916169387, + "kl_loss": 0.28272953629493713, + "loss_ib": 0.01137256994843483, + "step": 1255 + }, + { + "ce_ib": 7.215847015380859, + "ce_orig": 0.8538178205490112, + "epoch": 0.3609173916169387, + "kl_loss": 0.2556816339492798, + "loss_ib": 0.009772663936018944, + "step": 1255 + }, + { + "ce_ib": 5.195621013641357, + "ce_orig": 0.7892569899559021, + "epoch": 0.3609173916169387, + "kl_loss": 0.3346374034881592, + "loss_ib": 0.008541994728147984, + "step": 1255 + }, + { + "ce_ib": 6.393796443939209, + "ce_orig": 0.8440943360328674, + "epoch": 0.3612049751959163, + "kl_loss": 0.2655456066131592, + "loss_ib": 0.009049252606928349, + "step": 1256 + }, + { + "ce_ib": 7.520873069763184, + "ce_orig": 0.6589466333389282, + "epoch": 0.3612049751959163, + "kl_loss": 0.2204410433769226, + "loss_ib": 0.009725282900035381, + "step": 1256 + }, + { + "ce_ib": 6.403946399688721, + "ce_orig": 0.9284466505050659, + "epoch": 0.3612049751959163, + "kl_loss": 0.25736451148986816, + "loss_ib": 0.008977591060101986, + "step": 1256 + }, + { + "ce_ib": 5.164172172546387, + "ce_orig": 0.3477190136909485, + "epoch": 0.3612049751959163, + "kl_loss": 0.31088292598724365, + "loss_ib": 0.008273000828921795, + "step": 1256 + }, + { + "ce_ib": 7.093279838562012, + "ce_orig": 0.9794586300849915, + "epoch": 0.36149255877489395, + "kl_loss": 0.25934290885925293, + "loss_ib": 0.009686708450317383, + "step": 1257 + }, + { + "ce_ib": 3.6077513694763184, + "ce_orig": 0.6717298626899719, + "epoch": 0.36149255877489395, + "kl_loss": 0.2020391970872879, + "loss_ib": 0.005628143437206745, + "step": 1257 + }, + { + "ce_ib": 8.314215660095215, + "ce_orig": 1.189759373664856, + "epoch": 0.36149255877489395, + "kl_loss": 0.5190852284431458, + "loss_ib": 0.013505067676305771, + "step": 1257 + }, + { + "ce_ib": 4.868413925170898, + "ce_orig": 0.6784672141075134, + "epoch": 0.36149255877489395, + "kl_loss": 0.4079880714416504, + "loss_ib": 0.008948295377194881, + "step": 1257 + }, + { + "ce_ib": 8.850499153137207, + "ce_orig": 0.9206279516220093, + "epoch": 0.36178014235387157, + "kl_loss": 0.3540341854095459, + "loss_ib": 0.01239084079861641, + "step": 1258 + }, + { + "ce_ib": 9.655441284179688, + "ce_orig": 1.3504509925842285, + "epoch": 0.36178014235387157, + "kl_loss": 0.19344475865364075, + "loss_ib": 0.011589889414608479, + "step": 1258 + }, + { + "ce_ib": 8.310129165649414, + "ce_orig": 0.5761513710021973, + "epoch": 0.36178014235387157, + "kl_loss": 0.3058510422706604, + "loss_ib": 0.011368638835847378, + "step": 1258 + }, + { + "ce_ib": 7.006587982177734, + "ce_orig": 0.531325101852417, + "epoch": 0.36178014235387157, + "kl_loss": 0.23296204209327698, + "loss_ib": 0.009336207993328571, + "step": 1258 + }, + { + "ce_ib": 6.28865909576416, + "ce_orig": 0.7898370623588562, + "epoch": 0.36206772593284925, + "kl_loss": 0.35747230052948, + "loss_ib": 0.009863382205367088, + "step": 1259 + }, + { + "ce_ib": 5.487530708312988, + "ce_orig": 0.6396716833114624, + "epoch": 0.36206772593284925, + "kl_loss": 0.30861911177635193, + "loss_ib": 0.008573721162974834, + "step": 1259 + }, + { + "ce_ib": 5.676125526428223, + "ce_orig": 0.6367034316062927, + "epoch": 0.36206772593284925, + "kl_loss": 0.2743126451969147, + "loss_ib": 0.008419252000749111, + "step": 1259 + }, + { + "ce_ib": 9.005084991455078, + "ce_orig": 0.9271017909049988, + "epoch": 0.36206772593284925, + "kl_loss": 0.4467215836048126, + "loss_ib": 0.0134723000228405, + "step": 1259 + }, + { + "epoch": 0.3623553095118269, + "grad_norm": 0.09680938720703125, + "learning_rate": 9.788626520837235e-06, + "loss": 0.8753, + "step": 1260 + }, + { + "ce_ib": 6.723548889160156, + "ce_orig": 1.007396936416626, + "epoch": 0.3623553095118269, + "kl_loss": 0.1902208775281906, + "loss_ib": 0.0086257578805089, + "step": 1260 + }, + { + "ce_ib": 8.341665267944336, + "ce_orig": 1.1784008741378784, + "epoch": 0.3623553095118269, + "kl_loss": 0.26580482721328735, + "loss_ib": 0.010999713093042374, + "step": 1260 + }, + { + "ce_ib": 7.171521186828613, + "ce_orig": 1.0155116319656372, + "epoch": 0.3623553095118269, + "kl_loss": 0.19991645216941833, + "loss_ib": 0.009170685894787312, + "step": 1260 + }, + { + "ce_ib": 8.256757736206055, + "ce_orig": 1.5229460000991821, + "epoch": 0.3623553095118269, + "kl_loss": 0.21548259258270264, + "loss_ib": 0.010411583818495274, + "step": 1260 + }, + { + "ce_ib": 8.004878044128418, + "ce_orig": 1.2474851608276367, + "epoch": 0.3626428930908045, + "kl_loss": 0.2474817931652069, + "loss_ib": 0.010479695163667202, + "step": 1261 + }, + { + "ce_ib": 10.200814247131348, + "ce_orig": 1.5161863565444946, + "epoch": 0.3626428930908045, + "kl_loss": 0.32217681407928467, + "loss_ib": 0.013422582298517227, + "step": 1261 + }, + { + "ce_ib": 4.585286617279053, + "ce_orig": 0.772280216217041, + "epoch": 0.3626428930908045, + "kl_loss": 0.2460094690322876, + "loss_ib": 0.007045380771160126, + "step": 1261 + }, + { + "ce_ib": 5.989019870758057, + "ce_orig": 0.5098203420639038, + "epoch": 0.3626428930908045, + "kl_loss": 0.3226553797721863, + "loss_ib": 0.009215573780238628, + "step": 1261 + }, + { + "ce_ib": 2.9106924533843994, + "ce_orig": 0.2865024209022522, + "epoch": 0.3629304766697822, + "kl_loss": 0.28513258695602417, + "loss_ib": 0.005762017797678709, + "step": 1262 + }, + { + "ce_ib": 8.726869583129883, + "ce_orig": 1.2086615562438965, + "epoch": 0.3629304766697822, + "kl_loss": 0.19402892887592316, + "loss_ib": 0.010667159222066402, + "step": 1262 + }, + { + "ce_ib": 7.850878715515137, + "ce_orig": 1.1507000923156738, + "epoch": 0.3629304766697822, + "kl_loss": 0.27822476625442505, + "loss_ib": 0.010633125901222229, + "step": 1262 + }, + { + "ce_ib": 7.459512233734131, + "ce_orig": 0.743778645992279, + "epoch": 0.3629304766697822, + "kl_loss": 0.22068756818771362, + "loss_ib": 0.009666387923061848, + "step": 1262 + }, + { + "ce_ib": 4.350217819213867, + "ce_orig": 0.6065347194671631, + "epoch": 0.3632180602487598, + "kl_loss": 0.31890130043029785, + "loss_ib": 0.007539230398833752, + "step": 1263 + }, + { + "ce_ib": 4.045588493347168, + "ce_orig": 0.4070097506046295, + "epoch": 0.3632180602487598, + "kl_loss": 0.2118133008480072, + "loss_ib": 0.006163721438497305, + "step": 1263 + }, + { + "ce_ib": 6.7642621994018555, + "ce_orig": 0.9856066703796387, + "epoch": 0.3632180602487598, + "kl_loss": 0.3220054507255554, + "loss_ib": 0.009984316304326057, + "step": 1263 + }, + { + "ce_ib": 7.201728343963623, + "ce_orig": 0.5664107203483582, + "epoch": 0.3632180602487598, + "kl_loss": 0.33470451831817627, + "loss_ib": 0.010548772290349007, + "step": 1263 + }, + { + "ce_ib": 6.063399314880371, + "ce_orig": 0.5952107906341553, + "epoch": 0.3635056438277374, + "kl_loss": 0.2663578391075134, + "loss_ib": 0.008726977743208408, + "step": 1264 + }, + { + "ce_ib": 6.796082973480225, + "ce_orig": 0.9985532164573669, + "epoch": 0.3635056438277374, + "kl_loss": 0.28935104608535767, + "loss_ib": 0.009689592756330967, + "step": 1264 + }, + { + "ce_ib": 8.167562484741211, + "ce_orig": 1.0221731662750244, + "epoch": 0.3635056438277374, + "kl_loss": 0.22773785889148712, + "loss_ib": 0.010444940999150276, + "step": 1264 + }, + { + "ce_ib": 6.192745208740234, + "ce_orig": 0.8716363310813904, + "epoch": 0.3635056438277374, + "kl_loss": 0.2293071448802948, + "loss_ib": 0.008485816419124603, + "step": 1264 + }, + { + "epoch": 0.3637932274067151, + "grad_norm": 0.12041808664798737, + "learning_rate": 9.786388078368473e-06, + "loss": 0.8926, + "step": 1265 + }, + { + "ce_ib": 6.316495895385742, + "ce_orig": 0.7256811857223511, + "epoch": 0.3637932274067151, + "kl_loss": 0.396675705909729, + "loss_ib": 0.010283253155648708, + "step": 1265 + }, + { + "ce_ib": 4.922732830047607, + "ce_orig": 0.7064297795295715, + "epoch": 0.3637932274067151, + "kl_loss": 0.20546512305736542, + "loss_ib": 0.006977383978664875, + "step": 1265 + }, + { + "ce_ib": 7.470864295959473, + "ce_orig": 1.3002017736434937, + "epoch": 0.3637932274067151, + "kl_loss": 0.2607371211051941, + "loss_ib": 0.010078235529363155, + "step": 1265 + }, + { + "ce_ib": 8.990388870239258, + "ce_orig": 0.8157200813293457, + "epoch": 0.3637932274067151, + "kl_loss": 0.21014195680618286, + "loss_ib": 0.011091808788478374, + "step": 1265 + }, + { + "ce_ib": 5.218315124511719, + "ce_orig": 0.6002530455589294, + "epoch": 0.3640808109856927, + "kl_loss": 0.21902649104595184, + "loss_ib": 0.007408579811453819, + "step": 1266 + }, + { + "ce_ib": 4.633239269256592, + "ce_orig": 0.842490017414093, + "epoch": 0.3640808109856927, + "kl_loss": 0.254517138004303, + "loss_ib": 0.007178409956395626, + "step": 1266 + }, + { + "ce_ib": 6.270453929901123, + "ce_orig": 0.9670078754425049, + "epoch": 0.3640808109856927, + "kl_loss": 0.26729723811149597, + "loss_ib": 0.008943426422774792, + "step": 1266 + }, + { + "ce_ib": 8.4492769241333, + "ce_orig": 1.4273326396942139, + "epoch": 0.3640808109856927, + "kl_loss": 0.28911885619163513, + "loss_ib": 0.011340465396642685, + "step": 1266 + }, + { + "ce_ib": 2.5559194087982178, + "ce_orig": 0.11846259981393814, + "epoch": 0.36436839456467035, + "kl_loss": 0.6923233866691589, + "loss_ib": 0.009479152970016003, + "step": 1267 + }, + { + "ce_ib": 6.5786519050598145, + "ce_orig": 0.8715175986289978, + "epoch": 0.36436839456467035, + "kl_loss": 0.3284551799297333, + "loss_ib": 0.009863203391432762, + "step": 1267 + }, + { + "ce_ib": 6.378236293792725, + "ce_orig": 0.7787953019142151, + "epoch": 0.36436839456467035, + "kl_loss": 0.2639128863811493, + "loss_ib": 0.00901736505329609, + "step": 1267 + }, + { + "ce_ib": 5.599494457244873, + "ce_orig": 0.6616715788841248, + "epoch": 0.36436839456467035, + "kl_loss": 0.27571022510528564, + "loss_ib": 0.00835659634321928, + "step": 1267 + }, + { + "ce_ib": 5.361080646514893, + "ce_orig": 0.7969940304756165, + "epoch": 0.364655978143648, + "kl_loss": 0.27387315034866333, + "loss_ib": 0.0080998120829463, + "step": 1268 + }, + { + "ce_ib": 7.742045879364014, + "ce_orig": 0.970597505569458, + "epoch": 0.364655978143648, + "kl_loss": 0.17689698934555054, + "loss_ib": 0.009511015377938747, + "step": 1268 + }, + { + "ce_ib": 8.752741813659668, + "ce_orig": 1.0461597442626953, + "epoch": 0.364655978143648, + "kl_loss": 0.19503554701805115, + "loss_ib": 0.010703097097575665, + "step": 1268 + }, + { + "ce_ib": 7.340433120727539, + "ce_orig": 1.0559906959533691, + "epoch": 0.364655978143648, + "kl_loss": 0.3039883077144623, + "loss_ib": 0.010380315594375134, + "step": 1268 + }, + { + "ce_ib": 5.940576553344727, + "ce_orig": 0.4752423167228699, + "epoch": 0.36494356172262565, + "kl_loss": 0.33972451090812683, + "loss_ib": 0.00933782197535038, + "step": 1269 + }, + { + "ce_ib": 5.702699184417725, + "ce_orig": 0.9983672499656677, + "epoch": 0.36494356172262565, + "kl_loss": 0.2671028971672058, + "loss_ib": 0.008373728021979332, + "step": 1269 + }, + { + "ce_ib": 5.869582653045654, + "ce_orig": 0.6255727410316467, + "epoch": 0.36494356172262565, + "kl_loss": 0.18517814576625824, + "loss_ib": 0.007721364498138428, + "step": 1269 + }, + { + "ce_ib": 5.308149337768555, + "ce_orig": 0.9392489194869995, + "epoch": 0.36494356172262565, + "kl_loss": 0.22410941123962402, + "loss_ib": 0.00754924351349473, + "step": 1269 + }, + { + "epoch": 0.3652311453016033, + "grad_norm": 0.10248145461082458, + "learning_rate": 9.784138104379886e-06, + "loss": 0.8412, + "step": 1270 + }, + { + "ce_ib": 5.1530938148498535, + "ce_orig": 0.7408868670463562, + "epoch": 0.3652311453016033, + "kl_loss": 0.2502548098564148, + "loss_ib": 0.007655641995370388, + "step": 1270 + }, + { + "ce_ib": 5.076098918914795, + "ce_orig": 0.7266834378242493, + "epoch": 0.3652311453016033, + "kl_loss": 0.2595973610877991, + "loss_ib": 0.007672072388231754, + "step": 1270 + }, + { + "ce_ib": 7.772645950317383, + "ce_orig": 1.095561146736145, + "epoch": 0.3652311453016033, + "kl_loss": 0.23874793946743011, + "loss_ib": 0.010160124860703945, + "step": 1270 + }, + { + "ce_ib": 6.981496334075928, + "ce_orig": 1.0339139699935913, + "epoch": 0.3652311453016033, + "kl_loss": 0.25664764642715454, + "loss_ib": 0.009547972120344639, + "step": 1270 + }, + { + "ce_ib": 5.776525497436523, + "ce_orig": 0.7597066760063171, + "epoch": 0.3655187288805809, + "kl_loss": 0.19280946254730225, + "loss_ib": 0.007704620249569416, + "step": 1271 + }, + { + "ce_ib": 5.559055328369141, + "ce_orig": 0.773419201374054, + "epoch": 0.3655187288805809, + "kl_loss": 0.2716330885887146, + "loss_ib": 0.008275385946035385, + "step": 1271 + }, + { + "ce_ib": 4.46776008605957, + "ce_orig": 0.7645898461341858, + "epoch": 0.3655187288805809, + "kl_loss": 0.22453013062477112, + "loss_ib": 0.00671306112781167, + "step": 1271 + }, + { + "ce_ib": 8.849346160888672, + "ce_orig": 1.4099323749542236, + "epoch": 0.3655187288805809, + "kl_loss": 0.22820809483528137, + "loss_ib": 0.011131427250802517, + "step": 1271 + }, + { + "ce_ib": 7.46262264251709, + "ce_orig": 1.324544906616211, + "epoch": 0.3658063124595586, + "kl_loss": 0.23189082741737366, + "loss_ib": 0.009781531058251858, + "step": 1272 + }, + { + "ce_ib": 2.2801783084869385, + "ce_orig": 0.22608597576618195, + "epoch": 0.3658063124595586, + "kl_loss": 0.5748566389083862, + "loss_ib": 0.008028744719922543, + "step": 1272 + }, + { + "ce_ib": 11.055466651916504, + "ce_orig": 1.7213143110275269, + "epoch": 0.3658063124595586, + "kl_loss": 0.5324690937995911, + "loss_ib": 0.016380157321691513, + "step": 1272 + }, + { + "ce_ib": 5.690445899963379, + "ce_orig": 0.7228091955184937, + "epoch": 0.3658063124595586, + "kl_loss": 0.25707000494003296, + "loss_ib": 0.008261146023869514, + "step": 1272 + }, + { + "ce_ib": 5.308260917663574, + "ce_orig": 0.5550944209098816, + "epoch": 0.3660938960385362, + "kl_loss": 0.35145512223243713, + "loss_ib": 0.008822811767458916, + "step": 1273 + }, + { + "ce_ib": 9.893881797790527, + "ce_orig": 1.526107668876648, + "epoch": 0.3660938960385362, + "kl_loss": 0.47689947485923767, + "loss_ib": 0.014662875793874264, + "step": 1273 + }, + { + "ce_ib": 10.002148628234863, + "ce_orig": 1.4347825050354004, + "epoch": 0.3660938960385362, + "kl_loss": 0.5732181072235107, + "loss_ib": 0.01573432981967926, + "step": 1273 + }, + { + "ce_ib": 6.262985706329346, + "ce_orig": 0.7272791862487793, + "epoch": 0.3660938960385362, + "kl_loss": 0.2030579149723053, + "loss_ib": 0.00829356536269188, + "step": 1273 + }, + { + "ce_ib": 5.783365726470947, + "ce_orig": 0.6207488775253296, + "epoch": 0.3663814796175138, + "kl_loss": 0.24680054187774658, + "loss_ib": 0.00825137086212635, + "step": 1274 + }, + { + "ce_ib": 7.845116138458252, + "ce_orig": 0.9590703248977661, + "epoch": 0.3663814796175138, + "kl_loss": 0.30576378107070923, + "loss_ib": 0.01090275403112173, + "step": 1274 + }, + { + "ce_ib": 6.0168986320495605, + "ce_orig": 0.873681902885437, + "epoch": 0.3663814796175138, + "kl_loss": 0.2680283188819885, + "loss_ib": 0.008697181940078735, + "step": 1274 + }, + { + "ce_ib": 4.450382232666016, + "ce_orig": 0.5019147992134094, + "epoch": 0.3663814796175138, + "kl_loss": 0.2422555387020111, + "loss_ib": 0.006872937548905611, + "step": 1274 + }, + { + "epoch": 0.3666690631964915, + "grad_norm": 0.11415659636259079, + "learning_rate": 9.781876604292181e-06, + "loss": 0.895, + "step": 1275 + }, + { + "ce_ib": 7.331485748291016, + "ce_orig": 0.668783962726593, + "epoch": 0.3666690631964915, + "kl_loss": 0.177715003490448, + "loss_ib": 0.00910863559693098, + "step": 1275 + }, + { + "ce_ib": 7.6367902755737305, + "ce_orig": 0.4947163462638855, + "epoch": 0.3666690631964915, + "kl_loss": 0.27151551842689514, + "loss_ib": 0.010351944714784622, + "step": 1275 + }, + { + "ce_ib": 6.944085121154785, + "ce_orig": 0.817987322807312, + "epoch": 0.3666690631964915, + "kl_loss": 0.4041800796985626, + "loss_ib": 0.010985885746777058, + "step": 1275 + }, + { + "ce_ib": 5.102936267852783, + "ce_orig": 0.6391552090644836, + "epoch": 0.3666690631964915, + "kl_loss": 0.26941439509391785, + "loss_ib": 0.0077970800921320915, + "step": 1275 + }, + { + "ce_ib": 5.671210765838623, + "ce_orig": 0.9727239608764648, + "epoch": 0.36695664677546913, + "kl_loss": 0.46421921253204346, + "loss_ib": 0.01031340379267931, + "step": 1276 + }, + { + "ce_ib": 7.265056610107422, + "ce_orig": 1.0186502933502197, + "epoch": 0.36695664677546913, + "kl_loss": 0.33712470531463623, + "loss_ib": 0.010636303573846817, + "step": 1276 + }, + { + "ce_ib": 5.534669876098633, + "ce_orig": 0.6169944405555725, + "epoch": 0.36695664677546913, + "kl_loss": 0.261410653591156, + "loss_ib": 0.00814877636730671, + "step": 1276 + }, + { + "ce_ib": 10.333765983581543, + "ce_orig": 1.208878993988037, + "epoch": 0.36695664677546913, + "kl_loss": 0.31233105063438416, + "loss_ib": 0.013457076624035835, + "step": 1276 + }, + { + "ce_ib": 8.364578247070312, + "ce_orig": 1.1486130952835083, + "epoch": 0.36724423035444675, + "kl_loss": 0.31856000423431396, + "loss_ib": 0.011550177820026875, + "step": 1277 + }, + { + "ce_ib": 8.633726119995117, + "ce_orig": 1.2692739963531494, + "epoch": 0.36724423035444675, + "kl_loss": 0.178573340177536, + "loss_ib": 0.010419459082186222, + "step": 1277 + }, + { + "ce_ib": 5.818536758422852, + "ce_orig": 0.6847742795944214, + "epoch": 0.36724423035444675, + "kl_loss": 0.2574927508831024, + "loss_ib": 0.008393463678658009, + "step": 1277 + }, + { + "ce_ib": 9.451970100402832, + "ce_orig": 0.8362522721290588, + "epoch": 0.36724423035444675, + "kl_loss": 0.19935394823551178, + "loss_ib": 0.01144551020115614, + "step": 1277 + }, + { + "ce_ib": 4.879723072052002, + "ce_orig": 0.9751784205436707, + "epoch": 0.3675318139334244, + "kl_loss": 0.2573961317539215, + "loss_ib": 0.0074536846950650215, + "step": 1278 + }, + { + "ce_ib": 6.389813423156738, + "ce_orig": 0.9395421743392944, + "epoch": 0.3675318139334244, + "kl_loss": 0.25022318959236145, + "loss_ib": 0.008892044425010681, + "step": 1278 + }, + { + "ce_ib": 4.8754119873046875, + "ce_orig": 1.0186686515808105, + "epoch": 0.3675318139334244, + "kl_loss": 0.20029765367507935, + "loss_ib": 0.0068783885799348354, + "step": 1278 + }, + { + "ce_ib": 7.321893215179443, + "ce_orig": 0.7457486391067505, + "epoch": 0.3675318139334244, + "kl_loss": 0.22448067367076874, + "loss_ib": 0.009566700085997581, + "step": 1278 + }, + { + "ce_ib": 4.064176559448242, + "ce_orig": 0.6539361476898193, + "epoch": 0.36781939751240206, + "kl_loss": 0.23849976062774658, + "loss_ib": 0.006449174135923386, + "step": 1279 + }, + { + "ce_ib": 6.140800476074219, + "ce_orig": 0.642346978187561, + "epoch": 0.36781939751240206, + "kl_loss": 0.26734820008277893, + "loss_ib": 0.008814281783998013, + "step": 1279 + }, + { + "ce_ib": 2.3328685760498047, + "ce_orig": 0.2597403824329376, + "epoch": 0.36781939751240206, + "kl_loss": 0.550055205821991, + "loss_ib": 0.007833420298993587, + "step": 1279 + }, + { + "ce_ib": 5.496246814727783, + "ce_orig": 0.7764479517936707, + "epoch": 0.36781939751240206, + "kl_loss": 0.2076108306646347, + "loss_ib": 0.007572355214506388, + "step": 1279 + }, + { + "epoch": 0.3681069810913797, + "grad_norm": 0.1022362932562828, + "learning_rate": 9.779603583553842e-06, + "loss": 0.8978, + "step": 1280 + }, + { + "ce_ib": 6.203456878662109, + "ce_orig": 0.8070749640464783, + "epoch": 0.3681069810913797, + "kl_loss": 0.3705917000770569, + "loss_ib": 0.009909373708069324, + "step": 1280 + }, + { + "ce_ib": 8.270031929016113, + "ce_orig": 0.9747037291526794, + "epoch": 0.3681069810913797, + "kl_loss": 0.21375861763954163, + "loss_ib": 0.010407618246972561, + "step": 1280 + }, + { + "ce_ib": 6.111821174621582, + "ce_orig": 0.8507468700408936, + "epoch": 0.3681069810913797, + "kl_loss": 0.22477471828460693, + "loss_ib": 0.008359568193554878, + "step": 1280 + }, + { + "ce_ib": 3.5844693183898926, + "ce_orig": 0.5020504593849182, + "epoch": 0.3681069810913797, + "kl_loss": 0.3244742751121521, + "loss_ib": 0.006829211488366127, + "step": 1280 + }, + { + "ce_ib": 9.442797660827637, + "ce_orig": 1.0303080081939697, + "epoch": 0.3683945646703573, + "kl_loss": 0.34166717529296875, + "loss_ib": 0.012859469279646873, + "step": 1281 + }, + { + "ce_ib": 5.348814487457275, + "ce_orig": 1.0583198070526123, + "epoch": 0.3683945646703573, + "kl_loss": 0.20181548595428467, + "loss_ib": 0.007366969250142574, + "step": 1281 + }, + { + "ce_ib": 5.487965106964111, + "ce_orig": 0.6619595289230347, + "epoch": 0.3683945646703573, + "kl_loss": 0.20301175117492676, + "loss_ib": 0.007518082857131958, + "step": 1281 + }, + { + "ce_ib": 3.802037239074707, + "ce_orig": 0.5519328117370605, + "epoch": 0.3683945646703573, + "kl_loss": 0.19785550236701965, + "loss_ib": 0.005780591629445553, + "step": 1281 + }, + { + "ce_ib": 9.927574157714844, + "ce_orig": 1.1654108762741089, + "epoch": 0.368682148249335, + "kl_loss": 0.34230512380599976, + "loss_ib": 0.013350624591112137, + "step": 1282 + }, + { + "ce_ib": 7.524363040924072, + "ce_orig": 0.9500890970230103, + "epoch": 0.368682148249335, + "kl_loss": 0.3446368873119354, + "loss_ib": 0.010970731265842915, + "step": 1282 + }, + { + "ce_ib": 6.607281684875488, + "ce_orig": 0.6834641098976135, + "epoch": 0.368682148249335, + "kl_loss": 0.3660809099674225, + "loss_ib": 0.010268090292811394, + "step": 1282 + }, + { + "ce_ib": 8.014334678649902, + "ce_orig": 1.0792757272720337, + "epoch": 0.368682148249335, + "kl_loss": 0.3143981099128723, + "loss_ib": 0.01115831546485424, + "step": 1282 + }, + { + "ce_ib": 7.707310199737549, + "ce_orig": 0.734540581703186, + "epoch": 0.3689697318283126, + "kl_loss": 0.28208112716674805, + "loss_ib": 0.010528121143579483, + "step": 1283 + }, + { + "ce_ib": 5.258242607116699, + "ce_orig": 0.5540490746498108, + "epoch": 0.3689697318283126, + "kl_loss": 0.1847594976425171, + "loss_ib": 0.007105837110430002, + "step": 1283 + }, + { + "ce_ib": 7.624849796295166, + "ce_orig": 1.0621042251586914, + "epoch": 0.3689697318283126, + "kl_loss": 0.1952236145734787, + "loss_ib": 0.009577086195349693, + "step": 1283 + }, + { + "ce_ib": 7.061639308929443, + "ce_orig": 0.7444918751716614, + "epoch": 0.3689697318283126, + "kl_loss": 0.34235984086990356, + "loss_ib": 0.010485237464308739, + "step": 1283 + }, + { + "ce_ib": 6.315250873565674, + "ce_orig": 0.7070626616477966, + "epoch": 0.36925731540729023, + "kl_loss": 0.25096261501312256, + "loss_ib": 0.008824876509606838, + "step": 1284 + }, + { + "ce_ib": 7.8378424644470215, + "ce_orig": 0.45956188440322876, + "epoch": 0.36925731540729023, + "kl_loss": 0.3414694368839264, + "loss_ib": 0.011252536438405514, + "step": 1284 + }, + { + "ce_ib": 4.959325790405273, + "ce_orig": 0.7961344718933105, + "epoch": 0.36925731540729023, + "kl_loss": 0.25667428970336914, + "loss_ib": 0.007526068482547998, + "step": 1284 + }, + { + "ce_ib": 8.150565147399902, + "ce_orig": 1.2826651334762573, + "epoch": 0.36925731540729023, + "kl_loss": 0.2368721067905426, + "loss_ib": 0.01051928661763668, + "step": 1284 + }, + { + "epoch": 0.3695448989862679, + "grad_norm": 0.11932408809661865, + "learning_rate": 9.777319047641098e-06, + "loss": 0.843, + "step": 1285 + }, + { + "ce_ib": 9.26733112335205, + "ce_orig": 1.25326669216156, + "epoch": 0.3695448989862679, + "kl_loss": 0.2810722887516022, + "loss_ib": 0.012078053317964077, + "step": 1285 + }, + { + "ce_ib": 8.349451065063477, + "ce_orig": 0.8618485927581787, + "epoch": 0.3695448989862679, + "kl_loss": 0.2214396446943283, + "loss_ib": 0.010563847608864307, + "step": 1285 + }, + { + "ce_ib": 8.556676864624023, + "ce_orig": 0.8297768235206604, + "epoch": 0.3695448989862679, + "kl_loss": 0.24484603106975555, + "loss_ib": 0.011005137115716934, + "step": 1285 + }, + { + "ce_ib": 4.535597324371338, + "ce_orig": 0.6433321833610535, + "epoch": 0.3695448989862679, + "kl_loss": 0.18611079454421997, + "loss_ib": 0.0063967048190534115, + "step": 1285 + }, + { + "ce_ib": 7.341830253601074, + "ce_orig": 0.9603883028030396, + "epoch": 0.36983248256524553, + "kl_loss": 0.332909494638443, + "loss_ib": 0.010670925490558147, + "step": 1286 + }, + { + "ce_ib": 4.135986328125, + "ce_orig": 0.6429925560951233, + "epoch": 0.36983248256524553, + "kl_loss": 0.17457111179828644, + "loss_ib": 0.005881697405129671, + "step": 1286 + }, + { + "ce_ib": 10.247614860534668, + "ce_orig": 1.8241069316864014, + "epoch": 0.36983248256524553, + "kl_loss": 0.3723381459712982, + "loss_ib": 0.013970997184515, + "step": 1286 + }, + { + "ce_ib": 9.693305969238281, + "ce_orig": 1.5605031251907349, + "epoch": 0.36983248256524553, + "kl_loss": 0.28591495752334595, + "loss_ib": 0.012552455067634583, + "step": 1286 + }, + { + "ce_ib": 3.6252291202545166, + "ce_orig": 0.4865838885307312, + "epoch": 0.37012006614422316, + "kl_loss": 0.28489428758621216, + "loss_ib": 0.00647417176514864, + "step": 1287 + }, + { + "ce_ib": 8.392481803894043, + "ce_orig": 0.6792038083076477, + "epoch": 0.37012006614422316, + "kl_loss": 0.28094637393951416, + "loss_ib": 0.011201945133507252, + "step": 1287 + }, + { + "ce_ib": 6.859921455383301, + "ce_orig": 1.0309630632400513, + "epoch": 0.37012006614422316, + "kl_loss": 0.21855288743972778, + "loss_ib": 0.009045450948178768, + "step": 1287 + }, + { + "ce_ib": 9.25197696685791, + "ce_orig": 1.3951451778411865, + "epoch": 0.37012006614422316, + "kl_loss": 0.2725180983543396, + "loss_ib": 0.011977157555520535, + "step": 1287 + }, + { + "ce_ib": 4.974238872528076, + "ce_orig": 0.700851559638977, + "epoch": 0.3704076497232008, + "kl_loss": 0.17885011434555054, + "loss_ib": 0.006762739736586809, + "step": 1288 + }, + { + "ce_ib": 6.060398101806641, + "ce_orig": 0.7376994490623474, + "epoch": 0.3704076497232008, + "kl_loss": 0.21035978198051453, + "loss_ib": 0.0081639951094985, + "step": 1288 + }, + { + "ce_ib": 6.525364875793457, + "ce_orig": 0.7431560754776001, + "epoch": 0.3704076497232008, + "kl_loss": 0.18042346835136414, + "loss_ib": 0.008329600095748901, + "step": 1288 + }, + { + "ce_ib": 9.062355041503906, + "ce_orig": 1.4950919151306152, + "epoch": 0.3704076497232008, + "kl_loss": 0.21809548139572144, + "loss_ib": 0.011243309825658798, + "step": 1288 + }, + { + "ce_ib": 5.605106830596924, + "ce_orig": 1.000119686126709, + "epoch": 0.37069523330217846, + "kl_loss": 0.2606685161590576, + "loss_ib": 0.008211791515350342, + "step": 1289 + }, + { + "ce_ib": 5.617210388183594, + "ce_orig": 0.8429021239280701, + "epoch": 0.37069523330217846, + "kl_loss": 0.23302999138832092, + "loss_ib": 0.007947510108351707, + "step": 1289 + }, + { + "ce_ib": 7.6939697265625, + "ce_orig": 0.448341965675354, + "epoch": 0.37069523330217846, + "kl_loss": 0.33551597595214844, + "loss_ib": 0.011049130000174046, + "step": 1289 + }, + { + "ce_ib": 6.936496257781982, + "ce_orig": 0.8369705677032471, + "epoch": 0.37069523330217846, + "kl_loss": 0.35632652044296265, + "loss_ib": 0.010499760508537292, + "step": 1289 + }, + { + "epoch": 0.3709828168811561, + "grad_norm": 0.11123020946979523, + "learning_rate": 9.775023002057931e-06, + "loss": 0.9009, + "step": 1290 + }, + { + "ce_ib": 7.711465358734131, + "ce_orig": 0.6811661124229431, + "epoch": 0.3709828168811561, + "kl_loss": 0.24951444566249847, + "loss_ib": 0.010206609964370728, + "step": 1290 + }, + { + "ce_ib": 6.423352241516113, + "ce_orig": 0.6590506434440613, + "epoch": 0.3709828168811561, + "kl_loss": 0.25252413749694824, + "loss_ib": 0.008948593400418758, + "step": 1290 + }, + { + "ce_ib": 7.123904228210449, + "ce_orig": 0.9283602237701416, + "epoch": 0.3709828168811561, + "kl_loss": 0.2745411992073059, + "loss_ib": 0.009869315661489964, + "step": 1290 + }, + { + "ce_ib": 5.736572265625, + "ce_orig": 0.8704516291618347, + "epoch": 0.3709828168811561, + "kl_loss": 0.23893356323242188, + "loss_ib": 0.008125907741487026, + "step": 1290 + }, + { + "ce_ib": 6.976745128631592, + "ce_orig": 0.6642255187034607, + "epoch": 0.3712704004601337, + "kl_loss": 0.24520739912986755, + "loss_ib": 0.009428819641470909, + "step": 1291 + }, + { + "ce_ib": 3.866387367248535, + "ce_orig": 0.6691374778747559, + "epoch": 0.3712704004601337, + "kl_loss": 0.19989712536334991, + "loss_ib": 0.0058653587475419044, + "step": 1291 + }, + { + "ce_ib": 4.301640510559082, + "ce_orig": 0.4932442307472229, + "epoch": 0.3712704004601337, + "kl_loss": 0.20259158313274384, + "loss_ib": 0.006327556446194649, + "step": 1291 + }, + { + "ce_ib": 6.155208587646484, + "ce_orig": 0.8413316607475281, + "epoch": 0.3712704004601337, + "kl_loss": 0.33612358570098877, + "loss_ib": 0.009516444988548756, + "step": 1291 + }, + { + "ce_ib": 8.439119338989258, + "ce_orig": 1.0063964128494263, + "epoch": 0.3715579840391114, + "kl_loss": 0.24863265454769135, + "loss_ib": 0.010925445705652237, + "step": 1292 + }, + { + "ce_ib": 3.834338426589966, + "ce_orig": 0.5589891076087952, + "epoch": 0.3715579840391114, + "kl_loss": 0.27248573303222656, + "loss_ib": 0.006559195462614298, + "step": 1292 + }, + { + "ce_ib": 3.839154005050659, + "ce_orig": 0.4755013883113861, + "epoch": 0.3715579840391114, + "kl_loss": 0.26118844747543335, + "loss_ib": 0.0064510381780564785, + "step": 1292 + }, + { + "ce_ib": 3.814335584640503, + "ce_orig": 0.6413306593894958, + "epoch": 0.3715579840391114, + "kl_loss": 0.259212464094162, + "loss_ib": 0.006406460423022509, + "step": 1292 + }, + { + "ce_ib": 4.642645359039307, + "ce_orig": 0.5980650186538696, + "epoch": 0.371845567618089, + "kl_loss": 0.28181472420692444, + "loss_ib": 0.007460792548954487, + "step": 1293 + }, + { + "ce_ib": 9.607026100158691, + "ce_orig": 1.2752386331558228, + "epoch": 0.371845567618089, + "kl_loss": 0.2727745473384857, + "loss_ib": 0.012334770523011684, + "step": 1293 + }, + { + "ce_ib": 5.609538555145264, + "ce_orig": 0.42599013447761536, + "epoch": 0.371845567618089, + "kl_loss": 0.3356173038482666, + "loss_ib": 0.008965711109340191, + "step": 1293 + }, + { + "ce_ib": 6.105560779571533, + "ce_orig": 0.8053005933761597, + "epoch": 0.371845567618089, + "kl_loss": 0.2141549289226532, + "loss_ib": 0.00824710913002491, + "step": 1293 + }, + { + "ce_ib": 7.115893363952637, + "ce_orig": 0.969586968421936, + "epoch": 0.37213315119706664, + "kl_loss": 0.2096409797668457, + "loss_ib": 0.009212302975356579, + "step": 1294 + }, + { + "ce_ib": 5.7646965980529785, + "ce_orig": 0.823491096496582, + "epoch": 0.37213315119706664, + "kl_loss": 0.2654654383659363, + "loss_ib": 0.00841935072094202, + "step": 1294 + }, + { + "ce_ib": 2.2926371097564697, + "ce_orig": 0.2627139687538147, + "epoch": 0.37213315119706664, + "kl_loss": 0.4336986839771271, + "loss_ib": 0.006629623472690582, + "step": 1294 + }, + { + "ce_ib": 5.9607462882995605, + "ce_orig": 0.6307532787322998, + "epoch": 0.37213315119706664, + "kl_loss": 0.26752978563308716, + "loss_ib": 0.008636044338345528, + "step": 1294 + }, + { + "epoch": 0.3724207347760443, + "grad_norm": 0.12341900169849396, + "learning_rate": 9.772715452336046e-06, + "loss": 0.8441, + "step": 1295 + }, + { + "ce_ib": 4.824267864227295, + "ce_orig": 0.700189471244812, + "epoch": 0.3724207347760443, + "kl_loss": 0.22174613177776337, + "loss_ib": 0.0070417290553450584, + "step": 1295 + }, + { + "ce_ib": 3.344231128692627, + "ce_orig": 0.5434899926185608, + "epoch": 0.3724207347760443, + "kl_loss": 0.21252413094043732, + "loss_ib": 0.005469472147524357, + "step": 1295 + }, + { + "ce_ib": 6.728499889373779, + "ce_orig": 0.7563159465789795, + "epoch": 0.3724207347760443, + "kl_loss": 0.461100310087204, + "loss_ib": 0.011339503340423107, + "step": 1295 + }, + { + "ce_ib": 6.366405010223389, + "ce_orig": 1.089074969291687, + "epoch": 0.3724207347760443, + "kl_loss": 0.2879982590675354, + "loss_ib": 0.009246387518942356, + "step": 1295 + }, + { + "ce_ib": 7.0506696701049805, + "ce_orig": 1.0485398769378662, + "epoch": 0.37270831835502194, + "kl_loss": 0.23068824410438538, + "loss_ib": 0.009357552044093609, + "step": 1296 + }, + { + "ce_ib": 5.35711145401001, + "ce_orig": 0.6842909455299377, + "epoch": 0.37270831835502194, + "kl_loss": 0.26941436529159546, + "loss_ib": 0.008051254786550999, + "step": 1296 + }, + { + "ce_ib": 4.525808811187744, + "ce_orig": 0.4813930094242096, + "epoch": 0.37270831835502194, + "kl_loss": 0.3485274314880371, + "loss_ib": 0.008011083118617535, + "step": 1296 + }, + { + "ce_ib": 4.62595272064209, + "ce_orig": 0.6876837015151978, + "epoch": 0.37270831835502194, + "kl_loss": 0.2051934003829956, + "loss_ib": 0.006677886471152306, + "step": 1296 + }, + { + "ce_ib": 8.571621894836426, + "ce_orig": 1.0995779037475586, + "epoch": 0.37299590193399956, + "kl_loss": 0.21237477660179138, + "loss_ib": 0.010695368982851505, + "step": 1297 + }, + { + "ce_ib": 4.471730709075928, + "ce_orig": 0.5538232922554016, + "epoch": 0.37299590193399956, + "kl_loss": 0.5044662952423096, + "loss_ib": 0.009516393765807152, + "step": 1297 + }, + { + "ce_ib": 4.533944606781006, + "ce_orig": 0.8222848176956177, + "epoch": 0.37299590193399956, + "kl_loss": 0.315934956073761, + "loss_ib": 0.00769329397007823, + "step": 1297 + }, + { + "ce_ib": 6.893505096435547, + "ce_orig": 0.8183647394180298, + "epoch": 0.37299590193399956, + "kl_loss": 0.2846068739891052, + "loss_ib": 0.009739574044942856, + "step": 1297 + }, + { + "ce_ib": 7.192381381988525, + "ce_orig": 1.0460643768310547, + "epoch": 0.3732834855129772, + "kl_loss": 0.25278496742248535, + "loss_ib": 0.009720231406390667, + "step": 1298 + }, + { + "ce_ib": 6.201778411865234, + "ce_orig": 0.9309136271476746, + "epoch": 0.3732834855129772, + "kl_loss": 0.27258479595184326, + "loss_ib": 0.00892762653529644, + "step": 1298 + }, + { + "ce_ib": 5.355123043060303, + "ce_orig": 0.8157143592834473, + "epoch": 0.3732834855129772, + "kl_loss": 0.28976643085479736, + "loss_ib": 0.00825278740376234, + "step": 1298 + }, + { + "ce_ib": 7.66465425491333, + "ce_orig": 0.924324095249176, + "epoch": 0.3732834855129772, + "kl_loss": 0.20117942988872528, + "loss_ib": 0.009676448069512844, + "step": 1298 + }, + { + "ce_ib": 10.385946273803711, + "ce_orig": 1.7267224788665771, + "epoch": 0.37357106909195487, + "kl_loss": 0.3095071315765381, + "loss_ib": 0.0134810172021389, + "step": 1299 + }, + { + "ce_ib": 8.435067176818848, + "ce_orig": 0.8238710761070251, + "epoch": 0.37357106909195487, + "kl_loss": 1.0476688146591187, + "loss_ib": 0.018911754712462425, + "step": 1299 + }, + { + "ce_ib": 5.311509132385254, + "ce_orig": 0.33952459692955017, + "epoch": 0.37357106909195487, + "kl_loss": 0.23365667462348938, + "loss_ib": 0.007648075465112925, + "step": 1299 + }, + { + "ce_ib": 7.611974716186523, + "ce_orig": 1.0087212324142456, + "epoch": 0.37357106909195487, + "kl_loss": 0.24070414900779724, + "loss_ib": 0.010019016452133656, + "step": 1299 + }, + { + "epoch": 0.3738586526709325, + "grad_norm": 0.12493283301591873, + "learning_rate": 9.770396404034863e-06, + "loss": 0.9006, + "step": 1300 + }, + { + "ce_ib": 8.976292610168457, + "ce_orig": 1.355560064315796, + "epoch": 0.3738586526709325, + "kl_loss": 0.2191731184720993, + "loss_ib": 0.011168022640049458, + "step": 1300 + }, + { + "ce_ib": 5.216058731079102, + "ce_orig": 0.5929533839225769, + "epoch": 0.3738586526709325, + "kl_loss": 0.29089826345443726, + "loss_ib": 0.008125041611492634, + "step": 1300 + }, + { + "ce_ib": 4.076107025146484, + "ce_orig": 0.48292118310928345, + "epoch": 0.3738586526709325, + "kl_loss": 0.1804906278848648, + "loss_ib": 0.005881013348698616, + "step": 1300 + }, + { + "ce_ib": 7.759974479675293, + "ce_orig": 0.8867191672325134, + "epoch": 0.3738586526709325, + "kl_loss": 0.31273460388183594, + "loss_ib": 0.010887320153415203, + "step": 1300 + }, + { + "ce_ib": 6.218832015991211, + "ce_orig": 0.7377117276191711, + "epoch": 0.3741462362499101, + "kl_loss": 0.39184969663619995, + "loss_ib": 0.010137328878045082, + "step": 1301 + }, + { + "ce_ib": 5.884152889251709, + "ce_orig": 0.5230953693389893, + "epoch": 0.3741462362499101, + "kl_loss": 0.29413941502571106, + "loss_ib": 0.008825547061860561, + "step": 1301 + }, + { + "ce_ib": 6.26108455657959, + "ce_orig": 1.0266234874725342, + "epoch": 0.3741462362499101, + "kl_loss": 0.35874414443969727, + "loss_ib": 0.009848525747656822, + "step": 1301 + }, + { + "ce_ib": 8.435012817382812, + "ce_orig": 1.1848310232162476, + "epoch": 0.3741462362499101, + "kl_loss": 0.2637442946434021, + "loss_ib": 0.01107245497405529, + "step": 1301 + }, + { + "ce_ib": 2.2409121990203857, + "ce_orig": 0.21592168509960175, + "epoch": 0.3744338198288878, + "kl_loss": 0.5623815059661865, + "loss_ib": 0.007864727638661861, + "step": 1302 + }, + { + "ce_ib": 4.696742057800293, + "ce_orig": 0.6992368698120117, + "epoch": 0.3744338198288878, + "kl_loss": 0.17069947719573975, + "loss_ib": 0.006403736770153046, + "step": 1302 + }, + { + "ce_ib": 6.172117233276367, + "ce_orig": 0.794901430606842, + "epoch": 0.3744338198288878, + "kl_loss": 0.26270967721939087, + "loss_ib": 0.008799213916063309, + "step": 1302 + }, + { + "ce_ib": 7.035194396972656, + "ce_orig": 0.9057155847549438, + "epoch": 0.3744338198288878, + "kl_loss": 0.36150482296943665, + "loss_ib": 0.010650242678821087, + "step": 1302 + }, + { + "ce_ib": 5.013677597045898, + "ce_orig": 0.5755417346954346, + "epoch": 0.3747214034078654, + "kl_loss": 0.30179062485694885, + "loss_ib": 0.00803158339112997, + "step": 1303 + }, + { + "ce_ib": 4.116542339324951, + "ce_orig": 0.7811974287033081, + "epoch": 0.3747214034078654, + "kl_loss": 0.2888774573802948, + "loss_ib": 0.007005317136645317, + "step": 1303 + }, + { + "ce_ib": 4.250243186950684, + "ce_orig": 0.7281661033630371, + "epoch": 0.3747214034078654, + "kl_loss": 0.17032361030578613, + "loss_ib": 0.005953479092568159, + "step": 1303 + }, + { + "ce_ib": 9.735255241394043, + "ce_orig": 1.2109895944595337, + "epoch": 0.3747214034078654, + "kl_loss": 0.1804056465625763, + "loss_ib": 0.011539311148226261, + "step": 1303 + }, + { + "ce_ib": 4.496049880981445, + "ce_orig": 0.4316735863685608, + "epoch": 0.37500898698684304, + "kl_loss": 0.22079500555992126, + "loss_ib": 0.006704000290483236, + "step": 1304 + }, + { + "ce_ib": 9.108988761901855, + "ce_orig": 1.4747859239578247, + "epoch": 0.37500898698684304, + "kl_loss": 0.2784188985824585, + "loss_ib": 0.011893176473677158, + "step": 1304 + }, + { + "ce_ib": 9.340219497680664, + "ce_orig": 1.2900398969650269, + "epoch": 0.37500898698684304, + "kl_loss": 0.2142384946346283, + "loss_ib": 0.011482604779303074, + "step": 1304 + }, + { + "ce_ib": 7.92219877243042, + "ce_orig": 1.0115225315093994, + "epoch": 0.37500898698684304, + "kl_loss": 0.31117451190948486, + "loss_ib": 0.011033943854272366, + "step": 1304 + }, + { + "epoch": 0.37529657056582066, + "grad_norm": 0.09640936553478241, + "learning_rate": 9.768065862741512e-06, + "loss": 0.8762, + "step": 1305 + }, + { + "ce_ib": 8.480440139770508, + "ce_orig": 1.1000053882598877, + "epoch": 0.37529657056582066, + "kl_loss": 0.24573183059692383, + "loss_ib": 0.010937758721411228, + "step": 1305 + }, + { + "ce_ib": 6.316285610198975, + "ce_orig": 0.744388222694397, + "epoch": 0.37529657056582066, + "kl_loss": 0.2152835577726364, + "loss_ib": 0.008469121530652046, + "step": 1305 + }, + { + "ce_ib": 3.8323137760162354, + "ce_orig": 0.9252414703369141, + "epoch": 0.37529657056582066, + "kl_loss": 0.15203577280044556, + "loss_ib": 0.005352671258151531, + "step": 1305 + }, + { + "ce_ib": 7.657679557800293, + "ce_orig": 1.0006682872772217, + "epoch": 0.37529657056582066, + "kl_loss": 0.20859548449516296, + "loss_ib": 0.00974363461136818, + "step": 1305 + }, + { + "ce_ib": 9.317044258117676, + "ce_orig": 1.1992231607437134, + "epoch": 0.37558415414479834, + "kl_loss": 0.2548269033432007, + "loss_ib": 0.011865313164889812, + "step": 1306 + }, + { + "ce_ib": 7.432621955871582, + "ce_orig": 0.8354329466819763, + "epoch": 0.37558415414479834, + "kl_loss": 0.3074944317340851, + "loss_ib": 0.01050756685435772, + "step": 1306 + }, + { + "ce_ib": 8.270771026611328, + "ce_orig": 1.0792176723480225, + "epoch": 0.37558415414479834, + "kl_loss": 0.2315516471862793, + "loss_ib": 0.010586287826299667, + "step": 1306 + }, + { + "ce_ib": 9.993446350097656, + "ce_orig": 1.468353509902954, + "epoch": 0.37558415414479834, + "kl_loss": 0.3438698649406433, + "loss_ib": 0.013432145118713379, + "step": 1306 + }, + { + "ce_ib": 9.865982055664062, + "ce_orig": 1.856319785118103, + "epoch": 0.37587173772377597, + "kl_loss": 0.22962693870067596, + "loss_ib": 0.012162251397967339, + "step": 1307 + }, + { + "ce_ib": 4.654817581176758, + "ce_orig": 0.596437931060791, + "epoch": 0.37587173772377597, + "kl_loss": 0.4065954089164734, + "loss_ib": 0.00872077140957117, + "step": 1307 + }, + { + "ce_ib": 11.853404998779297, + "ce_orig": 1.680164098739624, + "epoch": 0.37587173772377597, + "kl_loss": 0.16628415882587433, + "loss_ib": 0.013516247272491455, + "step": 1307 + }, + { + "ce_ib": 3.770814895629883, + "ce_orig": 0.6643754243850708, + "epoch": 0.37587173772377597, + "kl_loss": 0.2119031697511673, + "loss_ib": 0.0058898464776575565, + "step": 1307 + }, + { + "ce_ib": 7.13842248916626, + "ce_orig": 0.6096088886260986, + "epoch": 0.3761593213027536, + "kl_loss": 0.24172694981098175, + "loss_ib": 0.009555691853165627, + "step": 1308 + }, + { + "ce_ib": 7.158660411834717, + "ce_orig": 0.7481746673583984, + "epoch": 0.3761593213027536, + "kl_loss": 0.19245290756225586, + "loss_ib": 0.009083189070224762, + "step": 1308 + }, + { + "ce_ib": 4.829501152038574, + "ce_orig": 0.6256715655326843, + "epoch": 0.3761593213027536, + "kl_loss": 0.23448200523853302, + "loss_ib": 0.007174321450293064, + "step": 1308 + }, + { + "ce_ib": 8.600601196289062, + "ce_orig": 1.2861305475234985, + "epoch": 0.3761593213027536, + "kl_loss": 0.24668508768081665, + "loss_ib": 0.011067451909184456, + "step": 1308 + }, + { + "ce_ib": 8.377589225769043, + "ce_orig": 1.4738719463348389, + "epoch": 0.37644690488173127, + "kl_loss": 0.2912333309650421, + "loss_ib": 0.011289922520518303, + "step": 1309 + }, + { + "ce_ib": 5.8922119140625, + "ce_orig": 0.9516330361366272, + "epoch": 0.37644690488173127, + "kl_loss": 0.21654640138149261, + "loss_ib": 0.008057675324380398, + "step": 1309 + }, + { + "ce_ib": 6.052369594573975, + "ce_orig": 1.0148662328720093, + "epoch": 0.37644690488173127, + "kl_loss": 0.1896468997001648, + "loss_ib": 0.007948838174343109, + "step": 1309 + }, + { + "ce_ib": 6.462357044219971, + "ce_orig": 0.9467957615852356, + "epoch": 0.37644690488173127, + "kl_loss": 0.2643457353115082, + "loss_ib": 0.00910581462085247, + "step": 1309 + }, + { + "epoch": 0.3767344884607089, + "grad_norm": 0.11843874305486679, + "learning_rate": 9.765723834070805e-06, + "loss": 0.8566, + "step": 1310 + }, + { + "ce_ib": 10.225154876708984, + "ce_orig": 1.4613780975341797, + "epoch": 0.3767344884607089, + "kl_loss": 0.41715824604034424, + "loss_ib": 0.014396737329661846, + "step": 1310 + }, + { + "ce_ib": 11.08290958404541, + "ce_orig": 1.4982622861862183, + "epoch": 0.3767344884607089, + "kl_loss": 0.2907963991165161, + "loss_ib": 0.013990874402225018, + "step": 1310 + }, + { + "ce_ib": 5.848134994506836, + "ce_orig": 1.044801950454712, + "epoch": 0.3767344884607089, + "kl_loss": 0.26418235898017883, + "loss_ib": 0.008489958941936493, + "step": 1310 + }, + { + "ce_ib": 7.654242515563965, + "ce_orig": 0.9891024231910706, + "epoch": 0.3767344884607089, + "kl_loss": 0.3541780710220337, + "loss_ib": 0.011196022853255272, + "step": 1310 + }, + { + "ce_ib": 6.14833927154541, + "ce_orig": 0.8169759511947632, + "epoch": 0.3770220720396865, + "kl_loss": 0.16873528063297272, + "loss_ib": 0.007835691794753075, + "step": 1311 + }, + { + "ce_ib": 6.844672203063965, + "ce_orig": 0.7956097722053528, + "epoch": 0.3770220720396865, + "kl_loss": 0.22771617770195007, + "loss_ib": 0.009121834300458431, + "step": 1311 + }, + { + "ce_ib": 5.502779006958008, + "ce_orig": 0.37446027994155884, + "epoch": 0.3770220720396865, + "kl_loss": 0.35141971707344055, + "loss_ib": 0.0090169757604599, + "step": 1311 + }, + { + "ce_ib": 5.839422702789307, + "ce_orig": 0.5762996673583984, + "epoch": 0.3770220720396865, + "kl_loss": 0.23966707289218903, + "loss_ib": 0.008236093446612358, + "step": 1311 + }, + { + "ce_ib": 6.2157158851623535, + "ce_orig": 0.4993678331375122, + "epoch": 0.3773096556186642, + "kl_loss": 0.2873460054397583, + "loss_ib": 0.00908917561173439, + "step": 1312 + }, + { + "ce_ib": 6.513637065887451, + "ce_orig": 0.633793830871582, + "epoch": 0.3773096556186642, + "kl_loss": 0.28910765051841736, + "loss_ib": 0.009404714219272137, + "step": 1312 + }, + { + "ce_ib": 5.872572422027588, + "ce_orig": 0.7542458772659302, + "epoch": 0.3773096556186642, + "kl_loss": 0.1863556206226349, + "loss_ib": 0.00773612828925252, + "step": 1312 + }, + { + "ce_ib": 4.847990036010742, + "ce_orig": 0.5747877359390259, + "epoch": 0.3773096556186642, + "kl_loss": 0.19151942431926727, + "loss_ib": 0.006763183977454901, + "step": 1312 + }, + { + "ce_ib": 4.407119274139404, + "ce_orig": 0.8114118576049805, + "epoch": 0.3775972391976418, + "kl_loss": 0.19292503595352173, + "loss_ib": 0.006336369551718235, + "step": 1313 + }, + { + "ce_ib": 8.981295585632324, + "ce_orig": 1.2180886268615723, + "epoch": 0.3775972391976418, + "kl_loss": 0.2586621344089508, + "loss_ib": 0.011567916721105576, + "step": 1313 + }, + { + "ce_ib": 7.540792942047119, + "ce_orig": 1.174260139465332, + "epoch": 0.3775972391976418, + "kl_loss": 0.25622671842575073, + "loss_ib": 0.010103059932589531, + "step": 1313 + }, + { + "ce_ib": 6.744704246520996, + "ce_orig": 0.8999756574630737, + "epoch": 0.3775972391976418, + "kl_loss": 0.25654101371765137, + "loss_ib": 0.009310114197432995, + "step": 1313 + }, + { + "ce_ib": 5.615959167480469, + "ce_orig": 0.8794683814048767, + "epoch": 0.37788482277661944, + "kl_loss": 0.26790547370910645, + "loss_ib": 0.008295013569295406, + "step": 1314 + }, + { + "ce_ib": 4.905920028686523, + "ce_orig": 0.7111831307411194, + "epoch": 0.37788482277661944, + "kl_loss": 0.21155670285224915, + "loss_ib": 0.007021486759185791, + "step": 1314 + }, + { + "ce_ib": 5.637347221374512, + "ce_orig": 0.5438764095306396, + "epoch": 0.37788482277661944, + "kl_loss": 0.2609032094478607, + "loss_ib": 0.008246378973126411, + "step": 1314 + }, + { + "ce_ib": 7.9561448097229, + "ce_orig": 1.2378239631652832, + "epoch": 0.37788482277661944, + "kl_loss": 0.25759926438331604, + "loss_ib": 0.010532137006521225, + "step": 1314 + }, + { + "epoch": 0.37817240635559707, + "grad_norm": 0.10208520293235779, + "learning_rate": 9.763370323665233e-06, + "loss": 0.87, + "step": 1315 + }, + { + "ce_ib": 3.863255739212036, + "ce_orig": 0.4220186173915863, + "epoch": 0.37817240635559707, + "kl_loss": 0.2792007029056549, + "loss_ib": 0.006655262783169746, + "step": 1315 + }, + { + "ce_ib": 8.859979629516602, + "ce_orig": 1.0536916255950928, + "epoch": 0.37817240635559707, + "kl_loss": 0.5747706294059753, + "loss_ib": 0.01460768561810255, + "step": 1315 + }, + { + "ce_ib": 6.9834771156311035, + "ce_orig": 1.0781289339065552, + "epoch": 0.37817240635559707, + "kl_loss": 0.28461551666259766, + "loss_ib": 0.009829632006585598, + "step": 1315 + }, + { + "ce_ib": 5.6756134033203125, + "ce_orig": 0.9484390020370483, + "epoch": 0.37817240635559707, + "kl_loss": 0.20451746881008148, + "loss_ib": 0.007720788009464741, + "step": 1315 + }, + { + "ce_ib": 6.136423110961914, + "ce_orig": 0.7416388988494873, + "epoch": 0.37845998993457475, + "kl_loss": 0.16922442615032196, + "loss_ib": 0.007828667759895325, + "step": 1316 + }, + { + "ce_ib": 3.612623453140259, + "ce_orig": 0.6604862809181213, + "epoch": 0.37845998993457475, + "kl_loss": 0.21814820170402527, + "loss_ib": 0.005794105585664511, + "step": 1316 + }, + { + "ce_ib": 6.457236289978027, + "ce_orig": 0.6525535583496094, + "epoch": 0.37845998993457475, + "kl_loss": 0.3399786353111267, + "loss_ib": 0.00985702220350504, + "step": 1316 + }, + { + "ce_ib": 4.825037002563477, + "ce_orig": 0.5446357727050781, + "epoch": 0.37845998993457475, + "kl_loss": 0.2203066349029541, + "loss_ib": 0.007028103340417147, + "step": 1316 + }, + { + "ce_ib": 5.3223772048950195, + "ce_orig": 0.48080676794052124, + "epoch": 0.37874757351355237, + "kl_loss": 0.48036542534828186, + "loss_ib": 0.010126031003892422, + "step": 1317 + }, + { + "ce_ib": 6.367911338806152, + "ce_orig": 0.6589131951332092, + "epoch": 0.37874757351355237, + "kl_loss": 0.31526780128479004, + "loss_ib": 0.009520589374005795, + "step": 1317 + }, + { + "ce_ib": 7.104604721069336, + "ce_orig": 0.8577982187271118, + "epoch": 0.37874757351355237, + "kl_loss": 0.25986504554748535, + "loss_ib": 0.009703255258500576, + "step": 1317 + }, + { + "ce_ib": 5.245115756988525, + "ce_orig": 0.6983909010887146, + "epoch": 0.37874757351355237, + "kl_loss": 0.3243550658226013, + "loss_ib": 0.008488666266202927, + "step": 1317 + }, + { + "ce_ib": 4.573901176452637, + "ce_orig": 0.602114200592041, + "epoch": 0.37903515709253, + "kl_loss": 0.17613765597343445, + "loss_ib": 0.006335277575999498, + "step": 1318 + }, + { + "ce_ib": 10.542227745056152, + "ce_orig": 1.4979974031448364, + "epoch": 0.37903515709253, + "kl_loss": 0.4047009348869324, + "loss_ib": 0.014589237980544567, + "step": 1318 + }, + { + "ce_ib": 7.663567543029785, + "ce_orig": 1.2707366943359375, + "epoch": 0.37903515709253, + "kl_loss": 0.3883098363876343, + "loss_ib": 0.011546666733920574, + "step": 1318 + }, + { + "ce_ib": 7.179001808166504, + "ce_orig": 1.0249370336532593, + "epoch": 0.37903515709253, + "kl_loss": 0.22374102473258972, + "loss_ib": 0.009416411630809307, + "step": 1318 + }, + { + "ce_ib": 5.827734470367432, + "ce_orig": 0.6614716649055481, + "epoch": 0.3793227406715077, + "kl_loss": 0.23256278038024902, + "loss_ib": 0.008153362199664116, + "step": 1319 + }, + { + "ce_ib": 5.04184627532959, + "ce_orig": 0.8984420299530029, + "epoch": 0.3793227406715077, + "kl_loss": 0.20100192725658417, + "loss_ib": 0.007051866035908461, + "step": 1319 + }, + { + "ce_ib": 3.4561145305633545, + "ce_orig": 0.6697774529457092, + "epoch": 0.3793227406715077, + "kl_loss": 0.19955700635910034, + "loss_ib": 0.0054516843520104885, + "step": 1319 + }, + { + "ce_ib": 8.484859466552734, + "ce_orig": 1.0996067523956299, + "epoch": 0.3793227406715077, + "kl_loss": 0.211076021194458, + "loss_ib": 0.010595619678497314, + "step": 1319 + }, + { + "epoch": 0.3796103242504853, + "grad_norm": 0.12847267091274261, + "learning_rate": 9.76100533719495e-06, + "loss": 0.8867, + "step": 1320 + }, + { + "ce_ib": 3.297279119491577, + "ce_orig": 0.6684339642524719, + "epoch": 0.3796103242504853, + "kl_loss": 0.20436504483222961, + "loss_ib": 0.005340929608792067, + "step": 1320 + }, + { + "ce_ib": 5.393133163452148, + "ce_orig": 0.7639580368995667, + "epoch": 0.3796103242504853, + "kl_loss": 0.3076980710029602, + "loss_ib": 0.008470113389194012, + "step": 1320 + }, + { + "ce_ib": 5.216917037963867, + "ce_orig": 0.8593165278434753, + "epoch": 0.3796103242504853, + "kl_loss": 0.2214372307062149, + "loss_ib": 0.007431289181113243, + "step": 1320 + }, + { + "ce_ib": 4.598435401916504, + "ce_orig": 0.5633476376533508, + "epoch": 0.3796103242504853, + "kl_loss": 0.20765292644500732, + "loss_ib": 0.0066749644465744495, + "step": 1320 + }, + { + "ce_ib": 5.472358226776123, + "ce_orig": 0.5398709774017334, + "epoch": 0.3798979078294629, + "kl_loss": 0.2486787736415863, + "loss_ib": 0.007959146052598953, + "step": 1321 + }, + { + "ce_ib": 7.950688362121582, + "ce_orig": 1.1804054975509644, + "epoch": 0.3798979078294629, + "kl_loss": 0.2736315429210663, + "loss_ib": 0.010687003843486309, + "step": 1321 + }, + { + "ce_ib": 6.406330585479736, + "ce_orig": 1.0859549045562744, + "epoch": 0.3798979078294629, + "kl_loss": 0.166508287191391, + "loss_ib": 0.00807141326367855, + "step": 1321 + }, + { + "ce_ib": 4.812961578369141, + "ce_orig": 0.7601636648178101, + "epoch": 0.3798979078294629, + "kl_loss": 0.30231884121894836, + "loss_ib": 0.007836150005459785, + "step": 1321 + }, + { + "ce_ib": 5.525980472564697, + "ce_orig": 0.8041296601295471, + "epoch": 0.3801854914084406, + "kl_loss": 0.2853173613548279, + "loss_ib": 0.008379153907299042, + "step": 1322 + }, + { + "ce_ib": 4.20200252532959, + "ce_orig": 0.8979824185371399, + "epoch": 0.3801854914084406, + "kl_loss": 0.4491305947303772, + "loss_ib": 0.00869330856949091, + "step": 1322 + }, + { + "ce_ib": 4.392397880554199, + "ce_orig": 0.6132926940917969, + "epoch": 0.3801854914084406, + "kl_loss": 0.15145298838615417, + "loss_ib": 0.005906927399337292, + "step": 1322 + }, + { + "ce_ib": 7.698770999908447, + "ce_orig": 1.1069796085357666, + "epoch": 0.3801854914084406, + "kl_loss": 0.26237034797668457, + "loss_ib": 0.010322474874556065, + "step": 1322 + }, + { + "ce_ib": 2.342242956161499, + "ce_orig": 0.16955788433551788, + "epoch": 0.3804730749874182, + "kl_loss": 0.5944281816482544, + "loss_ib": 0.008286524564027786, + "step": 1323 + }, + { + "ce_ib": 8.634743690490723, + "ce_orig": 1.3468730449676514, + "epoch": 0.3804730749874182, + "kl_loss": 0.2927013337612152, + "loss_ib": 0.011561756953597069, + "step": 1323 + }, + { + "ce_ib": 8.237252235412598, + "ce_orig": 0.9930875301361084, + "epoch": 0.3804730749874182, + "kl_loss": 0.24464577436447144, + "loss_ib": 0.010683710686862469, + "step": 1323 + }, + { + "ce_ib": 4.277406215667725, + "ce_orig": 0.6701924204826355, + "epoch": 0.3804730749874182, + "kl_loss": 0.24595500528812408, + "loss_ib": 0.00673695607110858, + "step": 1323 + }, + { + "ce_ib": 8.226551055908203, + "ce_orig": 1.2383060455322266, + "epoch": 0.38076065856639585, + "kl_loss": 0.19104276597499847, + "loss_ib": 0.010136978700757027, + "step": 1324 + }, + { + "ce_ib": 5.543074607849121, + "ce_orig": 0.6039655208587646, + "epoch": 0.38076065856639585, + "kl_loss": 0.24177339673042297, + "loss_ib": 0.007960808463394642, + "step": 1324 + }, + { + "ce_ib": 4.560553073883057, + "ce_orig": 0.719609260559082, + "epoch": 0.38076065856639585, + "kl_loss": 0.2100091278553009, + "loss_ib": 0.006660644430667162, + "step": 1324 + }, + { + "ce_ib": 5.934459209442139, + "ce_orig": 0.7645275592803955, + "epoch": 0.38076065856639585, + "kl_loss": 0.22334322333335876, + "loss_ib": 0.008167891763150692, + "step": 1324 + }, + { + "epoch": 0.3810482421453735, + "grad_norm": 0.11815589666366577, + "learning_rate": 9.75862888035776e-06, + "loss": 0.8853, + "step": 1325 + }, + { + "ce_ib": 7.395681381225586, + "ce_orig": 1.428235411643982, + "epoch": 0.3810482421453735, + "kl_loss": 0.2361186295747757, + "loss_ib": 0.00975686777383089, + "step": 1325 + }, + { + "ce_ib": 8.123614311218262, + "ce_orig": 0.9370039701461792, + "epoch": 0.3810482421453735, + "kl_loss": 0.27744072675704956, + "loss_ib": 0.010898021049797535, + "step": 1325 + }, + { + "ce_ib": 6.170518398284912, + "ce_orig": 0.7545908689498901, + "epoch": 0.3810482421453735, + "kl_loss": 0.26478803157806396, + "loss_ib": 0.008818398229777813, + "step": 1325 + }, + { + "ce_ib": 6.98801851272583, + "ce_orig": 0.889594554901123, + "epoch": 0.3810482421453735, + "kl_loss": 0.25020599365234375, + "loss_ib": 0.009490078315138817, + "step": 1325 + }, + { + "ce_ib": 8.423517227172852, + "ce_orig": 0.5639511346817017, + "epoch": 0.38133582572435115, + "kl_loss": 0.3541921377182007, + "loss_ib": 0.011965438723564148, + "step": 1326 + }, + { + "ce_ib": 6.9096479415893555, + "ce_orig": 0.8797593712806702, + "epoch": 0.38133582572435115, + "kl_loss": 0.2511420249938965, + "loss_ib": 0.009421068243682384, + "step": 1326 + }, + { + "ce_ib": 4.40999698638916, + "ce_orig": 0.64253830909729, + "epoch": 0.38133582572435115, + "kl_loss": 0.2337566614151001, + "loss_ib": 0.00674756383523345, + "step": 1326 + }, + { + "ce_ib": 6.412195205688477, + "ce_orig": 0.7317036390304565, + "epoch": 0.38133582572435115, + "kl_loss": 0.40119338035583496, + "loss_ib": 0.010424129664897919, + "step": 1326 + }, + { + "ce_ib": 8.463457107543945, + "ce_orig": 1.4905399084091187, + "epoch": 0.3816234093033288, + "kl_loss": 0.27826082706451416, + "loss_ib": 0.011246065609157085, + "step": 1327 + }, + { + "ce_ib": 4.925488471984863, + "ce_orig": 0.7923012375831604, + "epoch": 0.3816234093033288, + "kl_loss": 0.24017053842544556, + "loss_ib": 0.007327193859964609, + "step": 1327 + }, + { + "ce_ib": 4.493514537811279, + "ce_orig": 0.6073864698410034, + "epoch": 0.3816234093033288, + "kl_loss": 0.13801854848861694, + "loss_ib": 0.005873700138181448, + "step": 1327 + }, + { + "ce_ib": 12.996380805969238, + "ce_orig": 2.183483123779297, + "epoch": 0.3816234093033288, + "kl_loss": 0.3698446750640869, + "loss_ib": 0.016694827005267143, + "step": 1327 + }, + { + "ce_ib": 5.160539627075195, + "ce_orig": 0.37488970160484314, + "epoch": 0.3819109928823064, + "kl_loss": 0.326717734336853, + "loss_ib": 0.008427716791629791, + "step": 1328 + }, + { + "ce_ib": 5.422794342041016, + "ce_orig": 0.7671301960945129, + "epoch": 0.3819109928823064, + "kl_loss": 0.2245626300573349, + "loss_ib": 0.007668420672416687, + "step": 1328 + }, + { + "ce_ib": 6.127049922943115, + "ce_orig": 0.7963330745697021, + "epoch": 0.3819109928823064, + "kl_loss": 0.36005595326423645, + "loss_ib": 0.00972760934382677, + "step": 1328 + }, + { + "ce_ib": 6.172006130218506, + "ce_orig": 0.7193319797515869, + "epoch": 0.3819109928823064, + "kl_loss": 0.4020848274230957, + "loss_ib": 0.010192854329943657, + "step": 1328 + }, + { + "ce_ib": 6.396267414093018, + "ce_orig": 0.5827741622924805, + "epoch": 0.3821985764612841, + "kl_loss": 0.25311774015426636, + "loss_ib": 0.00892744492739439, + "step": 1329 + }, + { + "ce_ib": 5.07418155670166, + "ce_orig": 0.4467667043209076, + "epoch": 0.3821985764612841, + "kl_loss": 0.2523980736732483, + "loss_ib": 0.007598162163048983, + "step": 1329 + }, + { + "ce_ib": 7.253851413726807, + "ce_orig": 0.934846818447113, + "epoch": 0.3821985764612841, + "kl_loss": 0.19089868664741516, + "loss_ib": 0.009162838570773602, + "step": 1329 + }, + { + "ce_ib": 3.882725954055786, + "ce_orig": 0.8223580121994019, + "epoch": 0.3821985764612841, + "kl_loss": 0.1783471554517746, + "loss_ib": 0.005666197277605534, + "step": 1329 + }, + { + "epoch": 0.3824861600402617, + "grad_norm": 0.11724074929952621, + "learning_rate": 9.75624095887909e-06, + "loss": 0.8862, + "step": 1330 + }, + { + "ce_ib": 7.525057315826416, + "ce_orig": 1.2038042545318604, + "epoch": 0.3824861600402617, + "kl_loss": 0.1984117329120636, + "loss_ib": 0.009509174153208733, + "step": 1330 + }, + { + "ce_ib": 4.756983280181885, + "ce_orig": 0.5972123742103577, + "epoch": 0.3824861600402617, + "kl_loss": 0.4368649423122406, + "loss_ib": 0.009125632233917713, + "step": 1330 + }, + { + "ce_ib": 3.6005241870880127, + "ce_orig": 0.4440179467201233, + "epoch": 0.3824861600402617, + "kl_loss": 0.3235924541950226, + "loss_ib": 0.006836448796093464, + "step": 1330 + }, + { + "ce_ib": 8.435921669006348, + "ce_orig": 0.8452567458152771, + "epoch": 0.3824861600402617, + "kl_loss": 0.2886652648448944, + "loss_ib": 0.011322574689984322, + "step": 1330 + }, + { + "ce_ib": 7.909755229949951, + "ce_orig": 0.9250390529632568, + "epoch": 0.3827737436192393, + "kl_loss": 0.19716186821460724, + "loss_ib": 0.00988137349486351, + "step": 1331 + }, + { + "ce_ib": 6.099967956542969, + "ce_orig": 0.807924211025238, + "epoch": 0.3827737436192393, + "kl_loss": 0.208805650472641, + "loss_ib": 0.008188024163246155, + "step": 1331 + }, + { + "ce_ib": 3.4478707313537598, + "ce_orig": 0.5189741253852844, + "epoch": 0.3827737436192393, + "kl_loss": 0.1719244122505188, + "loss_ib": 0.005167114548385143, + "step": 1331 + }, + { + "ce_ib": 4.225029468536377, + "ce_orig": 0.5319197773933411, + "epoch": 0.3827737436192393, + "kl_loss": 0.2567492127418518, + "loss_ib": 0.006792521104216576, + "step": 1331 + }, + { + "ce_ib": 3.7936832904815674, + "ce_orig": 0.6889621615409851, + "epoch": 0.383061327198217, + "kl_loss": 0.24284930527210236, + "loss_ib": 0.006222176365554333, + "step": 1332 + }, + { + "ce_ib": 6.814220905303955, + "ce_orig": 0.8868374824523926, + "epoch": 0.383061327198217, + "kl_loss": 0.20072780549526215, + "loss_ib": 0.008821499533951283, + "step": 1332 + }, + { + "ce_ib": 7.203690052032471, + "ce_orig": 0.9520978331565857, + "epoch": 0.383061327198217, + "kl_loss": 0.28056567907333374, + "loss_ib": 0.010009346529841423, + "step": 1332 + }, + { + "ce_ib": 8.070340156555176, + "ce_orig": 1.3334993124008179, + "epoch": 0.383061327198217, + "kl_loss": 0.1696416139602661, + "loss_ib": 0.009766755625605583, + "step": 1332 + }, + { + "ce_ib": 5.705083847045898, + "ce_orig": 0.6407719850540161, + "epoch": 0.38334891077719463, + "kl_loss": 0.29713699221611023, + "loss_ib": 0.008676453493535519, + "step": 1333 + }, + { + "ce_ib": 4.981590747833252, + "ce_orig": 0.5492885708808899, + "epoch": 0.38334891077719463, + "kl_loss": 0.28598642349243164, + "loss_ib": 0.007841454818844795, + "step": 1333 + }, + { + "ce_ib": 4.273902416229248, + "ce_orig": 0.807598888874054, + "epoch": 0.38334891077719463, + "kl_loss": 0.20297232270240784, + "loss_ib": 0.006303625646978617, + "step": 1333 + }, + { + "ce_ib": 9.406611442565918, + "ce_orig": 1.0958489179611206, + "epoch": 0.38334891077719463, + "kl_loss": 0.2598443031311035, + "loss_ib": 0.012005054391920567, + "step": 1333 + }, + { + "ce_ib": 7.128815650939941, + "ce_orig": 1.1524990797042847, + "epoch": 0.38363649435617225, + "kl_loss": 0.39675629138946533, + "loss_ib": 0.011096377857029438, + "step": 1334 + }, + { + "ce_ib": 10.28831958770752, + "ce_orig": 1.841440200805664, + "epoch": 0.38363649435617225, + "kl_loss": 0.318050354719162, + "loss_ib": 0.01346882339566946, + "step": 1334 + }, + { + "ce_ib": 3.544858932495117, + "ce_orig": 0.6753540635108948, + "epoch": 0.38363649435617225, + "kl_loss": 0.14195430278778076, + "loss_ib": 0.004964401945471764, + "step": 1334 + }, + { + "ce_ib": 7.335440158843994, + "ce_orig": 1.0214455127716064, + "epoch": 0.38363649435617225, + "kl_loss": 0.30033838748931885, + "loss_ib": 0.010338823311030865, + "step": 1334 + }, + { + "epoch": 0.3839240779351499, + "grad_norm": 0.10687928646802902, + "learning_rate": 9.753841578512007e-06, + "loss": 0.8394, + "step": 1335 + }, + { + "ce_ib": 3.9797632694244385, + "ce_orig": 0.8001569509506226, + "epoch": 0.3839240779351499, + "kl_loss": 0.40325748920440674, + "loss_ib": 0.008012338541448116, + "step": 1335 + }, + { + "ce_ib": 3.7276611328125, + "ce_orig": 0.7037749886512756, + "epoch": 0.3839240779351499, + "kl_loss": 0.17334817349910736, + "loss_ib": 0.005461142398416996, + "step": 1335 + }, + { + "ce_ib": 7.768494129180908, + "ce_orig": 1.434823989868164, + "epoch": 0.3839240779351499, + "kl_loss": 0.2680274546146393, + "loss_ib": 0.010448768734931946, + "step": 1335 + }, + { + "ce_ib": 8.82320785522461, + "ce_orig": 1.1123329401016235, + "epoch": 0.3839240779351499, + "kl_loss": 0.2079583704471588, + "loss_ib": 0.01090279221534729, + "step": 1335 + }, + { + "ce_ib": 8.984968185424805, + "ce_orig": 0.9062168598175049, + "epoch": 0.38421166151412756, + "kl_loss": 0.21713285148143768, + "loss_ib": 0.011156296357512474, + "step": 1336 + }, + { + "ce_ib": 4.361588478088379, + "ce_orig": 0.7425259351730347, + "epoch": 0.38421166151412756, + "kl_loss": 0.21430090069770813, + "loss_ib": 0.00650459760800004, + "step": 1336 + }, + { + "ce_ib": 8.561041831970215, + "ce_orig": 0.8657642006874084, + "epoch": 0.38421166151412756, + "kl_loss": 0.2691475749015808, + "loss_ib": 0.011252517811954021, + "step": 1336 + }, + { + "ce_ib": 8.476003646850586, + "ce_orig": 1.0116504430770874, + "epoch": 0.38421166151412756, + "kl_loss": 0.23885221779346466, + "loss_ib": 0.01086452603340149, + "step": 1336 + }, + { + "ce_ib": 6.69258451461792, + "ce_orig": 1.416684865951538, + "epoch": 0.3844992450931052, + "kl_loss": 0.2654043436050415, + "loss_ib": 0.00934662763029337, + "step": 1337 + }, + { + "ce_ib": 5.728124618530273, + "ce_orig": 0.5610059499740601, + "epoch": 0.3844992450931052, + "kl_loss": 0.35689449310302734, + "loss_ib": 0.00929707009345293, + "step": 1337 + }, + { + "ce_ib": 4.5632500648498535, + "ce_orig": 1.0310240983963013, + "epoch": 0.3844992450931052, + "kl_loss": 0.2000049650669098, + "loss_ib": 0.006563299801200628, + "step": 1337 + }, + { + "ce_ib": 6.055202007293701, + "ce_orig": 1.0701017379760742, + "epoch": 0.3844992450931052, + "kl_loss": 0.2184426188468933, + "loss_ib": 0.008239627815783024, + "step": 1337 + }, + { + "ce_ib": 5.917305946350098, + "ce_orig": 0.9330586791038513, + "epoch": 0.3847868286720828, + "kl_loss": 0.24308359622955322, + "loss_ib": 0.008348141796886921, + "step": 1338 + }, + { + "ce_ib": 6.002380847930908, + "ce_orig": 0.4452073574066162, + "epoch": 0.3847868286720828, + "kl_loss": 0.30033767223358154, + "loss_ib": 0.009005757048726082, + "step": 1338 + }, + { + "ce_ib": 7.772523403167725, + "ce_orig": 0.9946969151496887, + "epoch": 0.3847868286720828, + "kl_loss": 0.4542730450630188, + "loss_ib": 0.012315253727138042, + "step": 1338 + }, + { + "ce_ib": 8.752180099487305, + "ce_orig": 1.3333590030670166, + "epoch": 0.3847868286720828, + "kl_loss": 0.3577191233634949, + "loss_ib": 0.012329370714724064, + "step": 1338 + }, + { + "ce_ib": 8.994691848754883, + "ce_orig": 0.5966424942016602, + "epoch": 0.3850744122510605, + "kl_loss": 0.44705814123153687, + "loss_ib": 0.013465273194015026, + "step": 1339 + }, + { + "ce_ib": 9.827507019042969, + "ce_orig": 1.0684852600097656, + "epoch": 0.3850744122510605, + "kl_loss": 0.18542367219924927, + "loss_ib": 0.011681743897497654, + "step": 1339 + }, + { + "ce_ib": 5.672684192657471, + "ce_orig": 0.5647222995758057, + "epoch": 0.3850744122510605, + "kl_loss": 0.2828938961029053, + "loss_ib": 0.008501622825860977, + "step": 1339 + }, + { + "ce_ib": 7.591995716094971, + "ce_orig": 0.8451278209686279, + "epoch": 0.3850744122510605, + "kl_loss": 0.24813343584537506, + "loss_ib": 0.010073330253362656, + "step": 1339 + }, + { + "epoch": 0.3853619958300381, + "grad_norm": 0.10725533962249756, + "learning_rate": 9.75143074503717e-06, + "loss": 0.8553, + "step": 1340 + }, + { + "ce_ib": 8.64397144317627, + "ce_orig": 0.8796740174293518, + "epoch": 0.3853619958300381, + "kl_loss": 0.32401660084724426, + "loss_ib": 0.01188413705676794, + "step": 1340 + }, + { + "ce_ib": 8.900287628173828, + "ce_orig": 1.103305697441101, + "epoch": 0.3853619958300381, + "kl_loss": 0.23756858706474304, + "loss_ib": 0.011275973170995712, + "step": 1340 + }, + { + "ce_ib": 6.611321926116943, + "ce_orig": 0.9286351799964905, + "epoch": 0.3853619958300381, + "kl_loss": 0.2873968482017517, + "loss_ib": 0.009485290385782719, + "step": 1340 + }, + { + "ce_ib": 5.8496599197387695, + "ce_orig": 0.9573297500610352, + "epoch": 0.3853619958300381, + "kl_loss": 0.24602648615837097, + "loss_ib": 0.008309924043715, + "step": 1340 + }, + { + "ce_ib": 12.561427116394043, + "ce_orig": 2.141439199447632, + "epoch": 0.38564957940901573, + "kl_loss": 0.2753842771053314, + "loss_ib": 0.01531527005136013, + "step": 1341 + }, + { + "ce_ib": 8.508872032165527, + "ce_orig": 1.3350071907043457, + "epoch": 0.38564957940901573, + "kl_loss": 0.30021315813064575, + "loss_ib": 0.011511003598570824, + "step": 1341 + }, + { + "ce_ib": 8.320743560791016, + "ce_orig": 1.0415490865707397, + "epoch": 0.38564957940901573, + "kl_loss": 0.29242002964019775, + "loss_ib": 0.0112449424341321, + "step": 1341 + }, + { + "ce_ib": 3.407970905303955, + "ce_orig": 0.5049671530723572, + "epoch": 0.38564957940901573, + "kl_loss": 0.3130846917629242, + "loss_ib": 0.006538817659020424, + "step": 1341 + }, + { + "ce_ib": 5.786276340484619, + "ce_orig": 0.7689294815063477, + "epoch": 0.3859371629879934, + "kl_loss": 0.2797991633415222, + "loss_ib": 0.00858426745980978, + "step": 1342 + }, + { + "ce_ib": 3.7406654357910156, + "ce_orig": 0.6262723803520203, + "epoch": 0.3859371629879934, + "kl_loss": 0.26983505487442017, + "loss_ib": 0.006439015734940767, + "step": 1342 + }, + { + "ce_ib": 7.9180755615234375, + "ce_orig": 0.9678489565849304, + "epoch": 0.3859371629879934, + "kl_loss": 0.3031494617462158, + "loss_ib": 0.010949570685625076, + "step": 1342 + }, + { + "ce_ib": 6.733755111694336, + "ce_orig": 0.9541864395141602, + "epoch": 0.3859371629879934, + "kl_loss": 0.30572301149368286, + "loss_ib": 0.00979098491370678, + "step": 1342 + }, + { + "ce_ib": 6.16925573348999, + "ce_orig": 0.9874963760375977, + "epoch": 0.38622474656697103, + "kl_loss": 0.2973823547363281, + "loss_ib": 0.00914307963103056, + "step": 1343 + }, + { + "ce_ib": 6.805942535400391, + "ce_orig": 0.5375406742095947, + "epoch": 0.38622474656697103, + "kl_loss": 0.2381344437599182, + "loss_ib": 0.009187286719679832, + "step": 1343 + }, + { + "ce_ib": 4.226902484893799, + "ce_orig": 0.5966165065765381, + "epoch": 0.38622474656697103, + "kl_loss": 0.16867578029632568, + "loss_ib": 0.005913660395890474, + "step": 1343 + }, + { + "ce_ib": 7.073963165283203, + "ce_orig": 1.2386435270309448, + "epoch": 0.38622474656697103, + "kl_loss": 0.3061671853065491, + "loss_ib": 0.010135634802281857, + "step": 1343 + }, + { + "ce_ib": 5.890805721282959, + "ce_orig": 0.6538254618644714, + "epoch": 0.38651233014594866, + "kl_loss": 0.5040521621704102, + "loss_ib": 0.010931327007710934, + "step": 1344 + }, + { + "ce_ib": 3.833566665649414, + "ce_orig": 0.6508686542510986, + "epoch": 0.38651233014594866, + "kl_loss": 0.20960384607315063, + "loss_ib": 0.005929604638367891, + "step": 1344 + }, + { + "ce_ib": 4.755572319030762, + "ce_orig": 0.8241642713546753, + "epoch": 0.38651233014594866, + "kl_loss": 0.25931376218795776, + "loss_ib": 0.0073487102054059505, + "step": 1344 + }, + { + "ce_ib": 6.719439506530762, + "ce_orig": 0.9579751491546631, + "epoch": 0.38651233014594866, + "kl_loss": 0.2530074119567871, + "loss_ib": 0.00924951396882534, + "step": 1344 + }, + { + "epoch": 0.3867999137249263, + "grad_norm": 0.10795299708843231, + "learning_rate": 9.749008464262837e-06, + "loss": 0.8552, + "step": 1345 + }, + { + "ce_ib": 5.197643756866455, + "ce_orig": 1.011999487876892, + "epoch": 0.3867999137249263, + "kl_loss": 0.24813109636306763, + "loss_ib": 0.007678954396396875, + "step": 1345 + }, + { + "ce_ib": 5.459554195404053, + "ce_orig": 0.7550332546234131, + "epoch": 0.3867999137249263, + "kl_loss": 0.23652155697345734, + "loss_ib": 0.007824769243597984, + "step": 1345 + }, + { + "ce_ib": 7.382880210876465, + "ce_orig": 1.0171791315078735, + "epoch": 0.3867999137249263, + "kl_loss": 0.2015581727027893, + "loss_ib": 0.009398462250828743, + "step": 1345 + }, + { + "ce_ib": 4.113571643829346, + "ce_orig": 0.6482676267623901, + "epoch": 0.3867999137249263, + "kl_loss": 0.3286363482475281, + "loss_ib": 0.007399934809654951, + "step": 1345 + }, + { + "ce_ib": 4.742993354797363, + "ce_orig": 0.8031437397003174, + "epoch": 0.38708749730390396, + "kl_loss": 0.17718759179115295, + "loss_ib": 0.006514869164675474, + "step": 1346 + }, + { + "ce_ib": 5.845980644226074, + "ce_orig": 0.6565824151039124, + "epoch": 0.38708749730390396, + "kl_loss": 0.20798616111278534, + "loss_ib": 0.007925841957330704, + "step": 1346 + }, + { + "ce_ib": 6.087835788726807, + "ce_orig": 0.7384578585624695, + "epoch": 0.38708749730390396, + "kl_loss": 0.2806638777256012, + "loss_ib": 0.008894474245607853, + "step": 1346 + }, + { + "ce_ib": 4.014012813568115, + "ce_orig": 0.737417995929718, + "epoch": 0.38708749730390396, + "kl_loss": 0.2056456208229065, + "loss_ib": 0.006070469040423632, + "step": 1346 + }, + { + "ce_ib": 4.660091400146484, + "ce_orig": 0.506597638130188, + "epoch": 0.3873750808828816, + "kl_loss": 0.2430647909641266, + "loss_ib": 0.007090738974511623, + "step": 1347 + }, + { + "ce_ib": 3.7780253887176514, + "ce_orig": 0.6846595406532288, + "epoch": 0.3873750808828816, + "kl_loss": 0.1995660960674286, + "loss_ib": 0.005773685872554779, + "step": 1347 + }, + { + "ce_ib": 4.743012428283691, + "ce_orig": 0.7443711161613464, + "epoch": 0.3873750808828816, + "kl_loss": 0.22071635723114014, + "loss_ib": 0.006950175389647484, + "step": 1347 + }, + { + "ce_ib": 7.79235315322876, + "ce_orig": 0.5954486131668091, + "epoch": 0.3873750808828816, + "kl_loss": 0.22928141057491302, + "loss_ib": 0.010085166431963444, + "step": 1347 + }, + { + "ce_ib": 6.381791114807129, + "ce_orig": 0.977142333984375, + "epoch": 0.3876626644618592, + "kl_loss": 0.5138674974441528, + "loss_ib": 0.011520466767251492, + "step": 1348 + }, + { + "ce_ib": 7.824554920196533, + "ce_orig": 0.7537108659744263, + "epoch": 0.3876626644618592, + "kl_loss": 0.3558143377304077, + "loss_ib": 0.011382699012756348, + "step": 1348 + }, + { + "ce_ib": 5.348059177398682, + "ce_orig": 0.6633920073509216, + "epoch": 0.3876626644618592, + "kl_loss": 0.19098712503910065, + "loss_ib": 0.007257930468767881, + "step": 1348 + }, + { + "ce_ib": 6.024688243865967, + "ce_orig": 0.9390560388565063, + "epoch": 0.3876626644618592, + "kl_loss": 0.23263616859912872, + "loss_ib": 0.00835104938596487, + "step": 1348 + }, + { + "ce_ib": 8.379621505737305, + "ce_orig": 1.0396898984909058, + "epoch": 0.3879502480408369, + "kl_loss": 0.28618064522743225, + "loss_ib": 0.0112414276227355, + "step": 1349 + }, + { + "ce_ib": 7.121238708496094, + "ce_orig": 0.9079939723014832, + "epoch": 0.3879502480408369, + "kl_loss": 0.25672510266304016, + "loss_ib": 0.009688489139080048, + "step": 1349 + }, + { + "ce_ib": 4.600920677185059, + "ce_orig": 0.5308390855789185, + "epoch": 0.3879502480408369, + "kl_loss": 0.24835561215877533, + "loss_ib": 0.007084476761519909, + "step": 1349 + }, + { + "ce_ib": 6.664026260375977, + "ce_orig": 0.8902830481529236, + "epoch": 0.3879502480408369, + "kl_loss": 0.15312698483467102, + "loss_ib": 0.008195295929908752, + "step": 1349 + }, + { + "epoch": 0.3882378316198145, + "grad_norm": 0.11668138206005096, + "learning_rate": 9.746574742024846e-06, + "loss": 0.8753, + "step": 1350 + }, + { + "ce_ib": 5.689170837402344, + "ce_orig": 0.9190986156463623, + "epoch": 0.3882378316198145, + "kl_loss": 0.2182837426662445, + "loss_ib": 0.00787200778722763, + "step": 1350 + }, + { + "ce_ib": 9.147358894348145, + "ce_orig": 1.1027569770812988, + "epoch": 0.3882378316198145, + "kl_loss": 0.3216399550437927, + "loss_ib": 0.012363757938146591, + "step": 1350 + }, + { + "ce_ib": 8.353636741638184, + "ce_orig": 0.9055649638175964, + "epoch": 0.3882378316198145, + "kl_loss": 0.2102494239807129, + "loss_ib": 0.010456129908561707, + "step": 1350 + }, + { + "ce_ib": 11.257818222045898, + "ce_orig": 1.819875717163086, + "epoch": 0.3882378316198145, + "kl_loss": 0.3288717567920685, + "loss_ib": 0.014546535909175873, + "step": 1350 + }, + { + "ce_ib": 9.890776634216309, + "ce_orig": 1.695237636566162, + "epoch": 0.38852541519879213, + "kl_loss": 0.2624281644821167, + "loss_ib": 0.012515057809650898, + "step": 1351 + }, + { + "ce_ib": 8.211555480957031, + "ce_orig": 0.7318482995033264, + "epoch": 0.38852541519879213, + "kl_loss": 0.3202785551548004, + "loss_ib": 0.011414340697228909, + "step": 1351 + }, + { + "ce_ib": 4.634771347045898, + "ce_orig": 0.513573944568634, + "epoch": 0.38852541519879213, + "kl_loss": 0.17353704571723938, + "loss_ib": 0.0063701411709189415, + "step": 1351 + }, + { + "ce_ib": 6.764926433563232, + "ce_orig": 1.0543662309646606, + "epoch": 0.38852541519879213, + "kl_loss": 0.2905517816543579, + "loss_ib": 0.009670443832874298, + "step": 1351 + }, + { + "ce_ib": 7.045490264892578, + "ce_orig": 0.8002457022666931, + "epoch": 0.3888129987777698, + "kl_loss": 0.29713061451911926, + "loss_ib": 0.010016796179115772, + "step": 1352 + }, + { + "ce_ib": 8.436086654663086, + "ce_orig": 0.9077552556991577, + "epoch": 0.3888129987777698, + "kl_loss": 0.3642025589942932, + "loss_ib": 0.012078111991286278, + "step": 1352 + }, + { + "ce_ib": 11.715314865112305, + "ce_orig": 2.0356435775756836, + "epoch": 0.3888129987777698, + "kl_loss": 0.28554120659828186, + "loss_ib": 0.01457072701305151, + "step": 1352 + }, + { + "ce_ib": 4.3168230056762695, + "ce_orig": 0.7764055132865906, + "epoch": 0.3888129987777698, + "kl_loss": 0.5350329875946045, + "loss_ib": 0.009667153470218182, + "step": 1352 + }, + { + "ce_ib": 8.007417678833008, + "ce_orig": 1.0445178747177124, + "epoch": 0.38910058235674744, + "kl_loss": 0.35413455963134766, + "loss_ib": 0.011548763141036034, + "step": 1353 + }, + { + "ce_ib": 6.87570333480835, + "ce_orig": 0.5900187492370605, + "epoch": 0.38910058235674744, + "kl_loss": 0.29607564210891724, + "loss_ib": 0.009836459532380104, + "step": 1353 + }, + { + "ce_ib": 8.155138969421387, + "ce_orig": 1.0756138563156128, + "epoch": 0.38910058235674744, + "kl_loss": 0.3444306254386902, + "loss_ib": 0.011599444784224033, + "step": 1353 + }, + { + "ce_ib": 7.465303421020508, + "ce_orig": 0.5089307427406311, + "epoch": 0.38910058235674744, + "kl_loss": 0.2902388274669647, + "loss_ib": 0.01036769151687622, + "step": 1353 + }, + { + "ce_ib": 4.054125785827637, + "ce_orig": 0.5274952054023743, + "epoch": 0.38938816593572506, + "kl_loss": 0.28459879755973816, + "loss_ib": 0.006900113541632891, + "step": 1354 + }, + { + "ce_ib": 5.810909748077393, + "ce_orig": 0.7706413865089417, + "epoch": 0.38938816593572506, + "kl_loss": 0.30980196595191956, + "loss_ib": 0.00890892930328846, + "step": 1354 + }, + { + "ce_ib": 6.011123180389404, + "ce_orig": 0.8576530814170837, + "epoch": 0.38938816593572506, + "kl_loss": 0.35513028502464294, + "loss_ib": 0.009562425315380096, + "step": 1354 + }, + { + "ce_ib": 4.335852146148682, + "ce_orig": 0.8391232490539551, + "epoch": 0.38938816593572506, + "kl_loss": 0.2350061982870102, + "loss_ib": 0.006685914471745491, + "step": 1354 + }, + { + "epoch": 0.3896757495147027, + "grad_norm": 0.10238523036241531, + "learning_rate": 9.744129584186599e-06, + "loss": 0.8873, + "step": 1355 + }, + { + "ce_ib": 6.874613285064697, + "ce_orig": 1.2522763013839722, + "epoch": 0.3896757495147027, + "kl_loss": 0.26102685928344727, + "loss_ib": 0.009484881535172462, + "step": 1355 + }, + { + "ce_ib": 3.02353835105896, + "ce_orig": 0.5653353929519653, + "epoch": 0.3896757495147027, + "kl_loss": 0.17425790429115295, + "loss_ib": 0.004766117315739393, + "step": 1355 + }, + { + "ce_ib": 8.993725776672363, + "ce_orig": 1.1802468299865723, + "epoch": 0.3896757495147027, + "kl_loss": 0.3000760078430176, + "loss_ib": 0.01199448574334383, + "step": 1355 + }, + { + "ce_ib": 9.069596290588379, + "ce_orig": 1.3481104373931885, + "epoch": 0.3896757495147027, + "kl_loss": 0.3342220187187195, + "loss_ib": 0.012411816045641899, + "step": 1355 + }, + { + "ce_ib": 5.213831901550293, + "ce_orig": 1.1847585439682007, + "epoch": 0.38996333309368036, + "kl_loss": 0.27714553475379944, + "loss_ib": 0.007985287345945835, + "step": 1356 + }, + { + "ce_ib": 6.282614707946777, + "ce_orig": 0.9747211337089539, + "epoch": 0.38996333309368036, + "kl_loss": 0.2517557144165039, + "loss_ib": 0.008800172246992588, + "step": 1356 + }, + { + "ce_ib": 5.210909843444824, + "ce_orig": 0.6339356899261475, + "epoch": 0.38996333309368036, + "kl_loss": 0.22786879539489746, + "loss_ib": 0.007489597424864769, + "step": 1356 + }, + { + "ce_ib": 6.147688865661621, + "ce_orig": 1.0988351106643677, + "epoch": 0.38996333309368036, + "kl_loss": 0.24896582961082458, + "loss_ib": 0.008637347258627415, + "step": 1356 + }, + { + "ce_ib": 7.1301350593566895, + "ce_orig": 1.1670876741409302, + "epoch": 0.390250916672658, + "kl_loss": 0.20031045377254486, + "loss_ib": 0.009133240208029747, + "step": 1357 + }, + { + "ce_ib": 5.422787189483643, + "ce_orig": 0.8434985280036926, + "epoch": 0.390250916672658, + "kl_loss": 0.28638529777526855, + "loss_ib": 0.008286640048027039, + "step": 1357 + }, + { + "ce_ib": 9.074339866638184, + "ce_orig": 1.444636344909668, + "epoch": 0.390250916672658, + "kl_loss": 0.3131384253501892, + "loss_ib": 0.012205724604427814, + "step": 1357 + }, + { + "ce_ib": 4.545176029205322, + "ce_orig": 0.431958943605423, + "epoch": 0.390250916672658, + "kl_loss": 0.26394376158714294, + "loss_ib": 0.007184613961726427, + "step": 1357 + }, + { + "ce_ib": 6.025633335113525, + "ce_orig": 0.7248860001564026, + "epoch": 0.3905385002516356, + "kl_loss": 0.30378299951553345, + "loss_ib": 0.009063462726771832, + "step": 1358 + }, + { + "ce_ib": 4.739029407501221, + "ce_orig": 0.6262908577919006, + "epoch": 0.3905385002516356, + "kl_loss": 0.2560563087463379, + "loss_ib": 0.00729959225282073, + "step": 1358 + }, + { + "ce_ib": 7.491399765014648, + "ce_orig": 1.0817595720291138, + "epoch": 0.3905385002516356, + "kl_loss": 0.5565602779388428, + "loss_ib": 0.013057002797722816, + "step": 1358 + }, + { + "ce_ib": 6.858555793762207, + "ce_orig": 1.060663104057312, + "epoch": 0.3905385002516356, + "kl_loss": 0.26650601625442505, + "loss_ib": 0.00952361524105072, + "step": 1358 + }, + { + "ce_ib": 8.05048656463623, + "ce_orig": 1.2159759998321533, + "epoch": 0.3908260838306133, + "kl_loss": 0.4176337420940399, + "loss_ib": 0.012226823717355728, + "step": 1359 + }, + { + "ce_ib": 8.304803848266602, + "ce_orig": 0.8402754068374634, + "epoch": 0.3908260838306133, + "kl_loss": 0.3264410197734833, + "loss_ib": 0.011569214053452015, + "step": 1359 + }, + { + "ce_ib": 8.870750427246094, + "ce_orig": 0.8822159171104431, + "epoch": 0.3908260838306133, + "kl_loss": 0.26001280546188354, + "loss_ib": 0.011470877565443516, + "step": 1359 + }, + { + "ce_ib": 9.573052406311035, + "ce_orig": 1.2161577939987183, + "epoch": 0.3908260838306133, + "kl_loss": 0.2086510956287384, + "loss_ib": 0.011659563519060612, + "step": 1359 + }, + { + "epoch": 0.3911136674095909, + "grad_norm": 0.11230572313070297, + "learning_rate": 9.741672996639046e-06, + "loss": 0.8671, + "step": 1360 + }, + { + "ce_ib": 5.597973823547363, + "ce_orig": 0.8666937351226807, + "epoch": 0.3911136674095909, + "kl_loss": 0.15866965055465698, + "loss_ib": 0.007184670306742191, + "step": 1360 + }, + { + "ce_ib": 1.7455174922943115, + "ce_orig": 0.21882081031799316, + "epoch": 0.3911136674095909, + "kl_loss": 0.5883793234825134, + "loss_ib": 0.007629310712218285, + "step": 1360 + }, + { + "ce_ib": 5.6036505699157715, + "ce_orig": 1.038603663444519, + "epoch": 0.3911136674095909, + "kl_loss": 0.21248281002044678, + "loss_ib": 0.007728478871285915, + "step": 1360 + }, + { + "ce_ib": 7.444608688354492, + "ce_orig": 1.4021137952804565, + "epoch": 0.3911136674095909, + "kl_loss": 0.2528078556060791, + "loss_ib": 0.009972686879336834, + "step": 1360 + }, + { + "ce_ib": 5.095190525054932, + "ce_orig": 0.4394410252571106, + "epoch": 0.39140125098856854, + "kl_loss": 0.2796628475189209, + "loss_ib": 0.007891818881034851, + "step": 1361 + }, + { + "ce_ib": 3.942939281463623, + "ce_orig": 0.5083321928977966, + "epoch": 0.39140125098856854, + "kl_loss": 0.1425539255142212, + "loss_ib": 0.005368478130549192, + "step": 1361 + }, + { + "ce_ib": 3.894341468811035, + "ce_orig": 0.5946666598320007, + "epoch": 0.39140125098856854, + "kl_loss": 0.18116998672485352, + "loss_ib": 0.005706041119992733, + "step": 1361 + }, + { + "ce_ib": 3.2327067852020264, + "ce_orig": 0.5612151026725769, + "epoch": 0.39140125098856854, + "kl_loss": 0.1936264932155609, + "loss_ib": 0.005168972071260214, + "step": 1361 + }, + { + "ce_ib": 8.949872970581055, + "ce_orig": 1.2524312734603882, + "epoch": 0.3916888345675462, + "kl_loss": 0.25393006205558777, + "loss_ib": 0.011489172466099262, + "step": 1362 + }, + { + "ce_ib": 5.752600193023682, + "ce_orig": 0.43837770819664, + "epoch": 0.3916888345675462, + "kl_loss": 0.23531876504421234, + "loss_ib": 0.008105788379907608, + "step": 1362 + }, + { + "ce_ib": 5.596198558807373, + "ce_orig": 0.5549354553222656, + "epoch": 0.3916888345675462, + "kl_loss": 0.40296921133995056, + "loss_ib": 0.009625890292227268, + "step": 1362 + }, + { + "ce_ib": 6.891479969024658, + "ce_orig": 1.2710909843444824, + "epoch": 0.3916888345675462, + "kl_loss": 0.2136833369731903, + "loss_ib": 0.009028312750160694, + "step": 1362 + }, + { + "ce_ib": 6.111293792724609, + "ce_orig": 1.0104577541351318, + "epoch": 0.39197641814652384, + "kl_loss": 0.21573954820632935, + "loss_ib": 0.0082686897367239, + "step": 1363 + }, + { + "ce_ib": 4.847911357879639, + "ce_orig": 0.6719464063644409, + "epoch": 0.39197641814652384, + "kl_loss": 0.18924060463905334, + "loss_ib": 0.006740317214280367, + "step": 1363 + }, + { + "ce_ib": 7.202842712402344, + "ce_orig": 0.8166525363922119, + "epoch": 0.39197641814652384, + "kl_loss": 0.2810524106025696, + "loss_ib": 0.010013367049396038, + "step": 1363 + }, + { + "ce_ib": 13.842483520507812, + "ce_orig": 2.174565076828003, + "epoch": 0.39197641814652384, + "kl_loss": 0.2552925944328308, + "loss_ib": 0.016395408660173416, + "step": 1363 + }, + { + "ce_ib": 9.695000648498535, + "ce_orig": 1.372729778289795, + "epoch": 0.39226400172550147, + "kl_loss": 0.2769021987915039, + "loss_ib": 0.012464022263884544, + "step": 1364 + }, + { + "ce_ib": 7.319131374359131, + "ce_orig": 0.9690439701080322, + "epoch": 0.39226400172550147, + "kl_loss": 0.30465811491012573, + "loss_ib": 0.010365712456405163, + "step": 1364 + }, + { + "ce_ib": 8.980354309082031, + "ce_orig": 1.4327776432037354, + "epoch": 0.39226400172550147, + "kl_loss": 0.21982143819332123, + "loss_ib": 0.011178568005561829, + "step": 1364 + }, + { + "ce_ib": 5.232222080230713, + "ce_orig": 0.9438716173171997, + "epoch": 0.39226400172550147, + "kl_loss": 0.21315476298332214, + "loss_ib": 0.007363769691437483, + "step": 1364 + }, + { + "epoch": 0.3925515853044791, + "grad_norm": 0.11538668721914291, + "learning_rate": 9.73920498530068e-06, + "loss": 0.8671, + "step": 1365 + }, + { + "ce_ib": 4.021336555480957, + "ce_orig": 0.4584941864013672, + "epoch": 0.3925515853044791, + "kl_loss": 0.3306760787963867, + "loss_ib": 0.0073280977085232735, + "step": 1365 + }, + { + "ce_ib": 3.9117350578308105, + "ce_orig": 0.3970031440258026, + "epoch": 0.3925515853044791, + "kl_loss": 0.1676924228668213, + "loss_ib": 0.005588659085333347, + "step": 1365 + }, + { + "ce_ib": 4.400966167449951, + "ce_orig": 0.5858869552612305, + "epoch": 0.3925515853044791, + "kl_loss": 0.5540776252746582, + "loss_ib": 0.009941741824150085, + "step": 1365 + }, + { + "ce_ib": 3.1652352809906006, + "ce_orig": 0.5204657912254333, + "epoch": 0.3925515853044791, + "kl_loss": 0.14290514588356018, + "loss_ib": 0.004594286438077688, + "step": 1365 + }, + { + "ce_ib": 8.941207885742188, + "ce_orig": 1.6999166011810303, + "epoch": 0.39283916888345677, + "kl_loss": 0.20357975363731384, + "loss_ib": 0.010977005586028099, + "step": 1366 + }, + { + "ce_ib": 9.166007995605469, + "ce_orig": 1.3682373762130737, + "epoch": 0.39283916888345677, + "kl_loss": 0.3395346999168396, + "loss_ib": 0.012561354786157608, + "step": 1366 + }, + { + "ce_ib": 4.455244541168213, + "ce_orig": 0.6007863283157349, + "epoch": 0.39283916888345677, + "kl_loss": 0.25369763374328613, + "loss_ib": 0.006992220878601074, + "step": 1366 + }, + { + "ce_ib": 7.158716201782227, + "ce_orig": 1.056156039237976, + "epoch": 0.39283916888345677, + "kl_loss": 0.18826474249362946, + "loss_ib": 0.00904136337339878, + "step": 1366 + }, + { + "ce_ib": 4.3503499031066895, + "ce_orig": 0.6633068323135376, + "epoch": 0.3931267524624344, + "kl_loss": 0.2977406978607178, + "loss_ib": 0.007327756844460964, + "step": 1367 + }, + { + "ce_ib": 2.9677188396453857, + "ce_orig": 0.5597977638244629, + "epoch": 0.3931267524624344, + "kl_loss": 0.18398423492908478, + "loss_ib": 0.004807560704648495, + "step": 1367 + }, + { + "ce_ib": 3.997077226638794, + "ce_orig": 0.5306004881858826, + "epoch": 0.3931267524624344, + "kl_loss": 0.3879128694534302, + "loss_ib": 0.007876206189393997, + "step": 1367 + }, + { + "ce_ib": 5.02707052230835, + "ce_orig": 0.6347300410270691, + "epoch": 0.3931267524624344, + "kl_loss": 0.3675246834754944, + "loss_ib": 0.008702317252755165, + "step": 1367 + }, + { + "ce_ib": 6.142372131347656, + "ce_orig": 0.7465280294418335, + "epoch": 0.393414336041412, + "kl_loss": 0.17125555872917175, + "loss_ib": 0.007854927331209183, + "step": 1368 + }, + { + "ce_ib": 6.2471022605896, + "ce_orig": 0.6920092701911926, + "epoch": 0.393414336041412, + "kl_loss": 0.26138269901275635, + "loss_ib": 0.008860929869115353, + "step": 1368 + }, + { + "ce_ib": 5.284491062164307, + "ce_orig": 0.9602318406105042, + "epoch": 0.393414336041412, + "kl_loss": 0.19777587056159973, + "loss_ib": 0.007262249942868948, + "step": 1368 + }, + { + "ce_ib": 3.394522190093994, + "ce_orig": 0.4060690104961395, + "epoch": 0.393414336041412, + "kl_loss": 0.3864419460296631, + "loss_ib": 0.007258941885083914, + "step": 1368 + }, + { + "ce_ib": 5.781606197357178, + "ce_orig": 0.7252171039581299, + "epoch": 0.3937019196203897, + "kl_loss": 0.29058459401130676, + "loss_ib": 0.008687451481819153, + "step": 1369 + }, + { + "ce_ib": 8.223978996276855, + "ce_orig": 1.25217604637146, + "epoch": 0.3937019196203897, + "kl_loss": 0.2124319076538086, + "loss_ib": 0.010348298586905003, + "step": 1369 + }, + { + "ce_ib": 3.267852544784546, + "ce_orig": 0.5731252431869507, + "epoch": 0.3937019196203897, + "kl_loss": 0.16048312187194824, + "loss_ib": 0.004872683901339769, + "step": 1369 + }, + { + "ce_ib": 3.981436014175415, + "ce_orig": 0.656374990940094, + "epoch": 0.3937019196203897, + "kl_loss": 0.2220917046070099, + "loss_ib": 0.006202353164553642, + "step": 1369 + }, + { + "epoch": 0.3939895031993673, + "grad_norm": 0.1153147965669632, + "learning_rate": 9.73672555611751e-06, + "loss": 0.8134, + "step": 1370 + }, + { + "ce_ib": 4.606146335601807, + "ce_orig": 0.7800334095954895, + "epoch": 0.3939895031993673, + "kl_loss": 0.2652835249900818, + "loss_ib": 0.007258981466293335, + "step": 1370 + }, + { + "ce_ib": 5.3803324699401855, + "ce_orig": 0.7244141697883606, + "epoch": 0.3939895031993673, + "kl_loss": 0.15984582901000977, + "loss_ib": 0.006978790741413832, + "step": 1370 + }, + { + "ce_ib": 8.137627601623535, + "ce_orig": 1.3305286169052124, + "epoch": 0.3939895031993673, + "kl_loss": 0.20696598291397095, + "loss_ib": 0.010207287967205048, + "step": 1370 + }, + { + "ce_ib": 10.457786560058594, + "ce_orig": 1.3038825988769531, + "epoch": 0.3939895031993673, + "kl_loss": 0.2336128205060959, + "loss_ib": 0.01279391348361969, + "step": 1370 + }, + { + "ce_ib": 7.365723609924316, + "ce_orig": 0.9686176180839539, + "epoch": 0.39427708677834494, + "kl_loss": 0.32739341259002686, + "loss_ib": 0.010639658197760582, + "step": 1371 + }, + { + "ce_ib": 6.254762172698975, + "ce_orig": 1.2625272274017334, + "epoch": 0.39427708677834494, + "kl_loss": 0.2828513979911804, + "loss_ib": 0.009083276614546776, + "step": 1371 + }, + { + "ce_ib": 5.433732986450195, + "ce_orig": 0.9750329852104187, + "epoch": 0.39427708677834494, + "kl_loss": 0.1383676826953888, + "loss_ib": 0.0068174097687006, + "step": 1371 + }, + { + "ce_ib": 3.722066640853882, + "ce_orig": 0.789971649646759, + "epoch": 0.39427708677834494, + "kl_loss": 0.13841679692268372, + "loss_ib": 0.005106234457343817, + "step": 1371 + }, + { + "ce_ib": 5.894031047821045, + "ce_orig": 1.156799554824829, + "epoch": 0.3945646703573226, + "kl_loss": 0.2652025520801544, + "loss_ib": 0.008546056225895882, + "step": 1372 + }, + { + "ce_ib": 5.060960292816162, + "ce_orig": 0.5705182552337646, + "epoch": 0.3945646703573226, + "kl_loss": 0.3146953284740448, + "loss_ib": 0.008207913488149643, + "step": 1372 + }, + { + "ce_ib": 3.138653039932251, + "ce_orig": 0.5762611627578735, + "epoch": 0.3945646703573226, + "kl_loss": 0.19025224447250366, + "loss_ib": 0.005041175521910191, + "step": 1372 + }, + { + "ce_ib": 7.081219673156738, + "ce_orig": 1.0405709743499756, + "epoch": 0.3945646703573226, + "kl_loss": 0.28342974185943604, + "loss_ib": 0.009915516711771488, + "step": 1372 + }, + { + "ce_ib": 6.494819164276123, + "ce_orig": 0.656830906867981, + "epoch": 0.39485225393630025, + "kl_loss": 0.22173810005187988, + "loss_ib": 0.008712200447916985, + "step": 1373 + }, + { + "ce_ib": 8.728991508483887, + "ce_orig": 1.0599778890609741, + "epoch": 0.39485225393630025, + "kl_loss": 0.2607494592666626, + "loss_ib": 0.011336485855281353, + "step": 1373 + }, + { + "ce_ib": 4.47887659072876, + "ce_orig": 0.6049656867980957, + "epoch": 0.39485225393630025, + "kl_loss": 0.20736724138259888, + "loss_ib": 0.0065525490790605545, + "step": 1373 + }, + { + "ce_ib": 5.101130962371826, + "ce_orig": 0.9835696816444397, + "epoch": 0.39485225393630025, + "kl_loss": 0.23616167902946472, + "loss_ib": 0.007462748326361179, + "step": 1373 + }, + { + "ce_ib": 4.498948097229004, + "ce_orig": 0.6125088334083557, + "epoch": 0.39513983751527787, + "kl_loss": 0.2392752468585968, + "loss_ib": 0.006891700439155102, + "step": 1374 + }, + { + "ce_ib": 5.00796365737915, + "ce_orig": 0.5471829771995544, + "epoch": 0.39513983751527787, + "kl_loss": 0.2096775323152542, + "loss_ib": 0.007104738615453243, + "step": 1374 + }, + { + "ce_ib": 5.918644905090332, + "ce_orig": 0.7079725861549377, + "epoch": 0.39513983751527787, + "kl_loss": 0.40169966220855713, + "loss_ib": 0.009935641661286354, + "step": 1374 + }, + { + "ce_ib": 9.236472129821777, + "ce_orig": 1.5323234796524048, + "epoch": 0.39513983751527787, + "kl_loss": 0.32300513982772827, + "loss_ib": 0.012466523796319962, + "step": 1374 + }, + { + "epoch": 0.3954274210942555, + "grad_norm": 0.08639590442180634, + "learning_rate": 9.734234715063058e-06, + "loss": 0.8309, + "step": 1375 + }, + { + "ce_ib": 9.688190460205078, + "ce_orig": 1.6802754402160645, + "epoch": 0.3954274210942555, + "kl_loss": 0.24151378870010376, + "loss_ib": 0.012103328481316566, + "step": 1375 + }, + { + "ce_ib": 7.782926082611084, + "ce_orig": 1.4242134094238281, + "epoch": 0.3954274210942555, + "kl_loss": 0.2984180152416229, + "loss_ib": 0.010767105966806412, + "step": 1375 + }, + { + "ce_ib": 7.2921905517578125, + "ce_orig": 1.0522878170013428, + "epoch": 0.3954274210942555, + "kl_loss": 0.23101499676704407, + "loss_ib": 0.009602339938282967, + "step": 1375 + }, + { + "ce_ib": 4.890142440795898, + "ce_orig": 0.7784461379051208, + "epoch": 0.3954274210942555, + "kl_loss": 0.21940796077251434, + "loss_ib": 0.007084221579134464, + "step": 1375 + }, + { + "ce_ib": 6.3335394859313965, + "ce_orig": 0.8000179529190063, + "epoch": 0.3957150046732332, + "kl_loss": 0.20593953132629395, + "loss_ib": 0.008392934687435627, + "step": 1376 + }, + { + "ce_ib": 6.561087608337402, + "ce_orig": 0.9470977783203125, + "epoch": 0.3957150046732332, + "kl_loss": 0.23629780113697052, + "loss_ib": 0.008924066089093685, + "step": 1376 + }, + { + "ce_ib": 4.079127311706543, + "ce_orig": 0.92641681432724, + "epoch": 0.3957150046732332, + "kl_loss": 0.20223310589790344, + "loss_ib": 0.006101457867771387, + "step": 1376 + }, + { + "ce_ib": 6.066758155822754, + "ce_orig": 0.9022270441055298, + "epoch": 0.3957150046732332, + "kl_loss": 0.267997145652771, + "loss_ib": 0.008746729232370853, + "step": 1376 + }, + { + "ce_ib": 7.5327324867248535, + "ce_orig": 1.4820178747177124, + "epoch": 0.3960025882522108, + "kl_loss": 0.20720908045768738, + "loss_ib": 0.009604823775589466, + "step": 1377 + }, + { + "ce_ib": 8.504740715026855, + "ce_orig": 0.7840504050254822, + "epoch": 0.3960025882522108, + "kl_loss": 0.2782425582408905, + "loss_ib": 0.011287165805697441, + "step": 1377 + }, + { + "ce_ib": 5.524410247802734, + "ce_orig": 0.5637204051017761, + "epoch": 0.3960025882522108, + "kl_loss": 0.23940207064151764, + "loss_ib": 0.007918431423604488, + "step": 1377 + }, + { + "ce_ib": 3.232489585876465, + "ce_orig": 0.451824426651001, + "epoch": 0.3960025882522108, + "kl_loss": 0.22094693779945374, + "loss_ib": 0.005441958550363779, + "step": 1377 + }, + { + "ce_ib": 6.773088455200195, + "ce_orig": 0.7511025071144104, + "epoch": 0.3962901718311884, + "kl_loss": 0.23778003454208374, + "loss_ib": 0.00915088877081871, + "step": 1378 + }, + { + "ce_ib": 4.436305999755859, + "ce_orig": 0.677458643913269, + "epoch": 0.3962901718311884, + "kl_loss": 0.18002289533615112, + "loss_ib": 0.006236535031348467, + "step": 1378 + }, + { + "ce_ib": 5.611198425292969, + "ce_orig": 0.9948437213897705, + "epoch": 0.3962901718311884, + "kl_loss": 0.20385992527008057, + "loss_ib": 0.007649797480553389, + "step": 1378 + }, + { + "ce_ib": 9.630160331726074, + "ce_orig": 1.6383625268936157, + "epoch": 0.3962901718311884, + "kl_loss": 0.2511659264564514, + "loss_ib": 0.01214181911200285, + "step": 1378 + }, + { + "ce_ib": 7.712647438049316, + "ce_orig": 1.0958294868469238, + "epoch": 0.3965777554101661, + "kl_loss": 0.27488973736763, + "loss_ib": 0.010461544618010521, + "step": 1379 + }, + { + "ce_ib": 6.212006092071533, + "ce_orig": 0.8772387504577637, + "epoch": 0.3965777554101661, + "kl_loss": 0.250034362077713, + "loss_ib": 0.008712350390851498, + "step": 1379 + }, + { + "ce_ib": 2.9093852043151855, + "ce_orig": 0.18342959880828857, + "epoch": 0.3965777554101661, + "kl_loss": 0.9934872388839722, + "loss_ib": 0.012844257056713104, + "step": 1379 + }, + { + "ce_ib": 7.467209339141846, + "ce_orig": 1.1859040260314941, + "epoch": 0.3965777554101661, + "kl_loss": 0.274760365486145, + "loss_ib": 0.010214812122285366, + "step": 1379 + }, + { + "epoch": 0.3968653389891437, + "grad_norm": 0.10431889444589615, + "learning_rate": 9.731732468138338e-06, + "loss": 0.939, + "step": 1380 + }, + { + "ce_ib": 6.341503143310547, + "ce_orig": 0.8641688823699951, + "epoch": 0.3968653389891437, + "kl_loss": 0.35600030422210693, + "loss_ib": 0.009901505894958973, + "step": 1380 + }, + { + "ce_ib": 4.540727615356445, + "ce_orig": 0.6933018565177917, + "epoch": 0.3968653389891437, + "kl_loss": 0.29483872652053833, + "loss_ib": 0.007489114999771118, + "step": 1380 + }, + { + "ce_ib": 5.404646873474121, + "ce_orig": 0.8426015973091125, + "epoch": 0.3968653389891437, + "kl_loss": 0.2104819267988205, + "loss_ib": 0.007509466260671616, + "step": 1380 + }, + { + "ce_ib": 6.046473026275635, + "ce_orig": 0.644819438457489, + "epoch": 0.3968653389891437, + "kl_loss": 0.21694740653038025, + "loss_ib": 0.008215947076678276, + "step": 1380 + }, + { + "ce_ib": 8.979574203491211, + "ce_orig": 1.3111884593963623, + "epoch": 0.39715292256812135, + "kl_loss": 0.2692817449569702, + "loss_ib": 0.011672391556203365, + "step": 1381 + }, + { + "ce_ib": 10.654874801635742, + "ce_orig": 1.7917509078979492, + "epoch": 0.39715292256812135, + "kl_loss": 0.24403555691242218, + "loss_ib": 0.013095230795443058, + "step": 1381 + }, + { + "ce_ib": 4.652118682861328, + "ce_orig": 0.449733704328537, + "epoch": 0.39715292256812135, + "kl_loss": 0.2827409505844116, + "loss_ib": 0.007479527965188026, + "step": 1381 + }, + { + "ce_ib": 4.677504539489746, + "ce_orig": 0.4694943428039551, + "epoch": 0.39715292256812135, + "kl_loss": 0.20924463868141174, + "loss_ib": 0.006769950967282057, + "step": 1381 + }, + { + "ce_ib": 4.163352966308594, + "ce_orig": 0.3782082498073578, + "epoch": 0.397440506147099, + "kl_loss": 0.295204222202301, + "loss_ib": 0.0071153948083519936, + "step": 1382 + }, + { + "ce_ib": 7.688187122344971, + "ce_orig": 0.9315841794013977, + "epoch": 0.397440506147099, + "kl_loss": 0.17919141054153442, + "loss_ib": 0.009480101056396961, + "step": 1382 + }, + { + "ce_ib": 7.311005592346191, + "ce_orig": 0.939527153968811, + "epoch": 0.397440506147099, + "kl_loss": 0.20597898960113525, + "loss_ib": 0.009370795451104641, + "step": 1382 + }, + { + "ce_ib": 6.882622718811035, + "ce_orig": 0.8821007013320923, + "epoch": 0.397440506147099, + "kl_loss": 0.324748694896698, + "loss_ib": 0.010130109265446663, + "step": 1382 + }, + { + "ce_ib": 6.135637283325195, + "ce_orig": 1.4052454233169556, + "epoch": 0.39772808972607665, + "kl_loss": 0.35044461488723755, + "loss_ib": 0.009640082716941833, + "step": 1383 + }, + { + "ce_ib": 2.134206771850586, + "ce_orig": 0.5873817801475525, + "epoch": 0.39772808972607665, + "kl_loss": 0.7168362140655518, + "loss_ib": 0.00930256862193346, + "step": 1383 + }, + { + "ce_ib": 11.386298179626465, + "ce_orig": 1.552770733833313, + "epoch": 0.39772808972607665, + "kl_loss": 0.3069136440753937, + "loss_ib": 0.014455433934926987, + "step": 1383 + }, + { + "ce_ib": 4.990416526794434, + "ce_orig": 0.5759061574935913, + "epoch": 0.39772808972607665, + "kl_loss": 0.19468580186367035, + "loss_ib": 0.006937274243682623, + "step": 1383 + }, + { + "ce_ib": 8.349421501159668, + "ce_orig": 1.4221105575561523, + "epoch": 0.3980156733050543, + "kl_loss": 0.24148491024971008, + "loss_ib": 0.010764271020889282, + "step": 1384 + }, + { + "ce_ib": 5.238234996795654, + "ce_orig": 0.8385730385780334, + "epoch": 0.3980156733050543, + "kl_loss": 0.19655439257621765, + "loss_ib": 0.007203779183328152, + "step": 1384 + }, + { + "ce_ib": 7.211249351501465, + "ce_orig": 1.0338671207427979, + "epoch": 0.3980156733050543, + "kl_loss": 0.31964731216430664, + "loss_ib": 0.010407721623778343, + "step": 1384 + }, + { + "ce_ib": 5.4048380851745605, + "ce_orig": 0.5029476881027222, + "epoch": 0.3980156733050543, + "kl_loss": 0.32509592175483704, + "loss_ib": 0.008655796758830547, + "step": 1384 + }, + { + "epoch": 0.3983032568840319, + "grad_norm": 0.10620667785406113, + "learning_rate": 9.729218821371844e-06, + "loss": 0.8809, + "step": 1385 + }, + { + "ce_ib": 5.505414009094238, + "ce_orig": 0.9368299245834351, + "epoch": 0.3983032568840319, + "kl_loss": 0.24715746939182281, + "loss_ib": 0.007976988330483437, + "step": 1385 + }, + { + "ce_ib": 8.991453170776367, + "ce_orig": 1.7614587545394897, + "epoch": 0.3983032568840319, + "kl_loss": 0.21385596692562103, + "loss_ib": 0.011130011640489101, + "step": 1385 + }, + { + "ce_ib": 11.152636528015137, + "ce_orig": 1.6417429447174072, + "epoch": 0.3983032568840319, + "kl_loss": 0.22926491498947144, + "loss_ib": 0.013445286080241203, + "step": 1385 + }, + { + "ce_ib": 5.502985000610352, + "ce_orig": 0.8666877150535583, + "epoch": 0.3983032568840319, + "kl_loss": 0.3327026665210724, + "loss_ib": 0.008830011822283268, + "step": 1385 + }, + { + "ce_ib": 7.255623817443848, + "ce_orig": 1.1066640615463257, + "epoch": 0.3985908404630096, + "kl_loss": 0.39020809531211853, + "loss_ib": 0.011157704517245293, + "step": 1386 + }, + { + "ce_ib": 5.247617721557617, + "ce_orig": 0.6558859348297119, + "epoch": 0.3985908404630096, + "kl_loss": 0.2507811188697815, + "loss_ib": 0.007755429018288851, + "step": 1386 + }, + { + "ce_ib": 6.0136590003967285, + "ce_orig": 0.9555683135986328, + "epoch": 0.3985908404630096, + "kl_loss": 0.2821842133998871, + "loss_ib": 0.008835501037538052, + "step": 1386 + }, + { + "ce_ib": 7.45864200592041, + "ce_orig": 1.1321091651916504, + "epoch": 0.3985908404630096, + "kl_loss": 0.18043175339698792, + "loss_ib": 0.009262959472835064, + "step": 1386 + }, + { + "ce_ib": 5.33476448059082, + "ce_orig": 0.3529107868671417, + "epoch": 0.3988784240419872, + "kl_loss": 0.2667803168296814, + "loss_ib": 0.008002568036317825, + "step": 1387 + }, + { + "ce_ib": 4.215025901794434, + "ce_orig": 0.7531979084014893, + "epoch": 0.3988784240419872, + "kl_loss": 0.18931743502616882, + "loss_ib": 0.006108200643211603, + "step": 1387 + }, + { + "ce_ib": 7.2497639656066895, + "ce_orig": 1.3689095973968506, + "epoch": 0.3988784240419872, + "kl_loss": 0.19657601416110992, + "loss_ib": 0.009215524420142174, + "step": 1387 + }, + { + "ce_ib": 4.428555488586426, + "ce_orig": 0.5836307406425476, + "epoch": 0.3988784240419872, + "kl_loss": 0.2728900909423828, + "loss_ib": 0.00715745659545064, + "step": 1387 + }, + { + "ce_ib": 8.63098430633545, + "ce_orig": 1.268597960472107, + "epoch": 0.3991660076209648, + "kl_loss": 0.351509153842926, + "loss_ib": 0.012146075256168842, + "step": 1388 + }, + { + "ce_ib": 6.511746406555176, + "ce_orig": 1.0989983081817627, + "epoch": 0.3991660076209648, + "kl_loss": 0.22151046991348267, + "loss_ib": 0.00872685108333826, + "step": 1388 + }, + { + "ce_ib": 6.183234214782715, + "ce_orig": 0.8914353847503662, + "epoch": 0.3991660076209648, + "kl_loss": 0.2892371416091919, + "loss_ib": 0.009075605310499668, + "step": 1388 + }, + { + "ce_ib": 5.882315635681152, + "ce_orig": 0.9376100301742554, + "epoch": 0.3991660076209648, + "kl_loss": 0.3167765736579895, + "loss_ib": 0.009050081484019756, + "step": 1388 + }, + { + "ce_ib": 4.763624668121338, + "ce_orig": 0.7746894955635071, + "epoch": 0.3994535911999425, + "kl_loss": 0.28361696004867554, + "loss_ib": 0.007599794305860996, + "step": 1389 + }, + { + "ce_ib": 6.894254684448242, + "ce_orig": 1.0107172727584839, + "epoch": 0.3994535911999425, + "kl_loss": 0.24421223998069763, + "loss_ib": 0.009336377494037151, + "step": 1389 + }, + { + "ce_ib": 8.682352066040039, + "ce_orig": 1.454932451248169, + "epoch": 0.3994535911999425, + "kl_loss": 0.19313442707061768, + "loss_ib": 0.010613696649670601, + "step": 1389 + }, + { + "ce_ib": 8.39739990234375, + "ce_orig": 1.3320800065994263, + "epoch": 0.3994535911999425, + "kl_loss": 0.19475214183330536, + "loss_ib": 0.010344920679926872, + "step": 1389 + }, + { + "epoch": 0.3997411747789201, + "grad_norm": 0.1351994425058365, + "learning_rate": 9.726693780819535e-06, + "loss": 0.9787, + "step": 1390 + }, + { + "ce_ib": 5.831480979919434, + "ce_orig": 0.9803398251533508, + "epoch": 0.3997411747789201, + "kl_loss": 0.2716244161128998, + "loss_ib": 0.008547725155949593, + "step": 1390 + }, + { + "ce_ib": 10.931524276733398, + "ce_orig": 1.3122738599777222, + "epoch": 0.3997411747789201, + "kl_loss": 0.24578051269054413, + "loss_ib": 0.013389328494668007, + "step": 1390 + }, + { + "ce_ib": 5.385982513427734, + "ce_orig": 0.6934446096420288, + "epoch": 0.3997411747789201, + "kl_loss": 0.18988949060440063, + "loss_ib": 0.007284877356141806, + "step": 1390 + }, + { + "ce_ib": 5.229064464569092, + "ce_orig": 0.5983728170394897, + "epoch": 0.3997411747789201, + "kl_loss": 0.33651402592658997, + "loss_ib": 0.008594205603003502, + "step": 1390 + }, + { + "ce_ib": 3.7700514793395996, + "ce_orig": 0.7275258898735046, + "epoch": 0.40002875835789775, + "kl_loss": 0.20360833406448364, + "loss_ib": 0.0058061350136995316, + "step": 1391 + }, + { + "ce_ib": 7.143492698669434, + "ce_orig": 1.1508095264434814, + "epoch": 0.40002875835789775, + "kl_loss": 0.18225440382957458, + "loss_ib": 0.008966037072241306, + "step": 1391 + }, + { + "ce_ib": 5.067678928375244, + "ce_orig": 0.6422498822212219, + "epoch": 0.40002875835789775, + "kl_loss": 0.26821526885032654, + "loss_ib": 0.0077498313039541245, + "step": 1391 + }, + { + "ce_ib": 4.1541595458984375, + "ce_orig": 0.5645342469215393, + "epoch": 0.40002875835789775, + "kl_loss": 0.23720993101596832, + "loss_ib": 0.0065262592397630215, + "step": 1391 + }, + { + "ce_ib": 7.886209964752197, + "ce_orig": 0.7300704121589661, + "epoch": 0.40031634193687543, + "kl_loss": 0.3853447437286377, + "loss_ib": 0.011739656329154968, + "step": 1392 + }, + { + "ce_ib": 8.745075225830078, + "ce_orig": 0.8196200132369995, + "epoch": 0.40031634193687543, + "kl_loss": 0.29888561367988586, + "loss_ib": 0.011733931489288807, + "step": 1392 + }, + { + "ce_ib": 11.331106185913086, + "ce_orig": 1.2489943504333496, + "epoch": 0.40031634193687543, + "kl_loss": 0.2049940675497055, + "loss_ib": 0.013381047174334526, + "step": 1392 + }, + { + "ce_ib": 6.58360481262207, + "ce_orig": 0.8827504515647888, + "epoch": 0.40031634193687543, + "kl_loss": 0.25717267394065857, + "loss_ib": 0.0091553321108222, + "step": 1392 + }, + { + "ce_ib": 7.464869499206543, + "ce_orig": 0.8372098207473755, + "epoch": 0.40060392551585305, + "kl_loss": 0.18981406092643738, + "loss_ib": 0.009363009594380856, + "step": 1393 + }, + { + "ce_ib": 3.8323893547058105, + "ce_orig": 0.5065505504608154, + "epoch": 0.40060392551585305, + "kl_loss": 0.40909343957901, + "loss_ib": 0.007923323661088943, + "step": 1393 + }, + { + "ce_ib": 6.201034069061279, + "ce_orig": 0.6383828520774841, + "epoch": 0.40060392551585305, + "kl_loss": 0.495597243309021, + "loss_ib": 0.011157006956636906, + "step": 1393 + }, + { + "ce_ib": 3.3212897777557373, + "ce_orig": 0.5181757211685181, + "epoch": 0.40060392551585305, + "kl_loss": 0.5330347418785095, + "loss_ib": 0.00865163654088974, + "step": 1393 + }, + { + "ce_ib": 5.628756999969482, + "ce_orig": 1.1345739364624023, + "epoch": 0.4008915090948307, + "kl_loss": 0.2799358367919922, + "loss_ib": 0.008428115397691727, + "step": 1394 + }, + { + "ce_ib": 7.621800899505615, + "ce_orig": 1.179695725440979, + "epoch": 0.4008915090948307, + "kl_loss": 0.20849332213401794, + "loss_ib": 0.009706733748316765, + "step": 1394 + }, + { + "ce_ib": 4.524997711181641, + "ce_orig": 0.5907806158065796, + "epoch": 0.4008915090948307, + "kl_loss": 0.2825338840484619, + "loss_ib": 0.007350336294621229, + "step": 1394 + }, + { + "ce_ib": 5.2815470695495605, + "ce_orig": 0.9510052800178528, + "epoch": 0.4008915090948307, + "kl_loss": 0.20430287718772888, + "loss_ib": 0.007324576377868652, + "step": 1394 + }, + { + "epoch": 0.4011790926738083, + "grad_norm": 0.10714766383171082, + "learning_rate": 9.724157352564818e-06, + "loss": 0.8557, + "step": 1395 + }, + { + "ce_ib": 9.26974868774414, + "ce_orig": 1.2472344636917114, + "epoch": 0.4011790926738083, + "kl_loss": 0.22076760232448578, + "loss_ib": 0.011477424763143063, + "step": 1395 + }, + { + "ce_ib": 6.311810493469238, + "ce_orig": 1.118715524673462, + "epoch": 0.4011790926738083, + "kl_loss": 0.18845130503177643, + "loss_ib": 0.008196323178708553, + "step": 1395 + }, + { + "ce_ib": 8.513527870178223, + "ce_orig": 1.4789761304855347, + "epoch": 0.4011790926738083, + "kl_loss": 0.2904071807861328, + "loss_ib": 0.011417599394917488, + "step": 1395 + }, + { + "ce_ib": 8.834794044494629, + "ce_orig": 1.0904123783111572, + "epoch": 0.4011790926738083, + "kl_loss": 0.24855023622512817, + "loss_ib": 0.011320296674966812, + "step": 1395 + }, + { + "ce_ib": 10.29422378540039, + "ce_orig": 1.6876254081726074, + "epoch": 0.401466676252786, + "kl_loss": 0.25927823781967163, + "loss_ib": 0.01288700569421053, + "step": 1396 + }, + { + "ce_ib": 4.860633850097656, + "ce_orig": 0.3961508870124817, + "epoch": 0.401466676252786, + "kl_loss": 0.377347856760025, + "loss_ib": 0.0086341118440032, + "step": 1396 + }, + { + "ce_ib": 5.413529872894287, + "ce_orig": 0.6334626078605652, + "epoch": 0.401466676252786, + "kl_loss": 0.2238302230834961, + "loss_ib": 0.007651831954717636, + "step": 1396 + }, + { + "ce_ib": 7.909429550170898, + "ce_orig": 0.8602885007858276, + "epoch": 0.401466676252786, + "kl_loss": 0.3015333116054535, + "loss_ib": 0.01092476211488247, + "step": 1396 + }, + { + "ce_ib": 11.0640869140625, + "ce_orig": 1.5119869709014893, + "epoch": 0.4017542598317636, + "kl_loss": 0.21881186962127686, + "loss_ib": 0.013252205215394497, + "step": 1397 + }, + { + "ce_ib": 7.311441421508789, + "ce_orig": 0.8810110688209534, + "epoch": 0.4017542598317636, + "kl_loss": 0.295499324798584, + "loss_ib": 0.010266435332596302, + "step": 1397 + }, + { + "ce_ib": 4.952889919281006, + "ce_orig": 0.6296012997627258, + "epoch": 0.4017542598317636, + "kl_loss": 0.23733410239219666, + "loss_ib": 0.007326230872422457, + "step": 1397 + }, + { + "ce_ib": 4.622181415557861, + "ce_orig": 0.7245978116989136, + "epoch": 0.4017542598317636, + "kl_loss": 0.20384420454502106, + "loss_ib": 0.006660623475909233, + "step": 1397 + }, + { + "ce_ib": 5.9267683029174805, + "ce_orig": 0.5333219766616821, + "epoch": 0.40204184341074123, + "kl_loss": 0.20834708213806152, + "loss_ib": 0.008010238409042358, + "step": 1398 + }, + { + "ce_ib": 1.2435208559036255, + "ce_orig": 0.12993678450584412, + "epoch": 0.40204184341074123, + "kl_loss": 0.43682751059532166, + "loss_ib": 0.005611795466393232, + "step": 1398 + }, + { + "ce_ib": 6.2769036293029785, + "ce_orig": 0.3595212697982788, + "epoch": 0.40204184341074123, + "kl_loss": 0.47299015522003174, + "loss_ib": 0.011006806045770645, + "step": 1398 + }, + { + "ce_ib": 6.465590000152588, + "ce_orig": 1.3574104309082031, + "epoch": 0.40204184341074123, + "kl_loss": 0.20642521977424622, + "loss_ib": 0.008529841899871826, + "step": 1398 + }, + { + "ce_ib": 6.165492534637451, + "ce_orig": 1.2102705240249634, + "epoch": 0.4023294269897189, + "kl_loss": 0.18871337175369263, + "loss_ib": 0.008052625693380833, + "step": 1399 + }, + { + "ce_ib": 8.377633094787598, + "ce_orig": 0.949652910232544, + "epoch": 0.4023294269897189, + "kl_loss": 0.31055009365081787, + "loss_ib": 0.011483133770525455, + "step": 1399 + }, + { + "ce_ib": 4.6876139640808105, + "ce_orig": 0.6695358157157898, + "epoch": 0.4023294269897189, + "kl_loss": 0.2681241035461426, + "loss_ib": 0.007368855178356171, + "step": 1399 + }, + { + "ce_ib": 9.898786544799805, + "ce_orig": 1.370671272277832, + "epoch": 0.4023294269897189, + "kl_loss": 0.21454685926437378, + "loss_ib": 0.012044255621731281, + "step": 1399 + }, + { + "epoch": 0.40261701056869653, + "grad_norm": 0.10930775105953217, + "learning_rate": 9.72160954271854e-06, + "loss": 0.9588, + "step": 1400 + }, + { + "ce_ib": 5.554301738739014, + "ce_orig": 0.6720674633979797, + "epoch": 0.40261701056869653, + "kl_loss": 0.2781994044780731, + "loss_ib": 0.008336296305060387, + "step": 1400 + }, + { + "ce_ib": 5.45673942565918, + "ce_orig": 0.3260180652141571, + "epoch": 0.40261701056869653, + "kl_loss": 0.32308900356292725, + "loss_ib": 0.008687629364430904, + "step": 1400 + }, + { + "ce_ib": 5.333756446838379, + "ce_orig": 0.7048057317733765, + "epoch": 0.40261701056869653, + "kl_loss": 0.22332128882408142, + "loss_ib": 0.007566969376057386, + "step": 1400 + }, + { + "ce_ib": 7.247291564941406, + "ce_orig": 1.5441960096359253, + "epoch": 0.40261701056869653, + "kl_loss": 0.2041657269001007, + "loss_ib": 0.009288948960602283, + "step": 1400 + }, + { + "ce_ib": 5.841701984405518, + "ce_orig": 1.0211917161941528, + "epoch": 0.40290459414767416, + "kl_loss": 0.15334239602088928, + "loss_ib": 0.007375125773251057, + "step": 1401 + }, + { + "ce_ib": 1.6962262392044067, + "ce_orig": 0.263886958360672, + "epoch": 0.40290459414767416, + "kl_loss": 0.3646056652069092, + "loss_ib": 0.00534228328615427, + "step": 1401 + }, + { + "ce_ib": 4.868833541870117, + "ce_orig": 0.9672197699546814, + "epoch": 0.40290459414767416, + "kl_loss": 0.1706363558769226, + "loss_ib": 0.006575196981430054, + "step": 1401 + }, + { + "ce_ib": 6.2680768966674805, + "ce_orig": 0.741491973400116, + "epoch": 0.40290459414767416, + "kl_loss": 0.2005263715982437, + "loss_ib": 0.008273339830338955, + "step": 1401 + }, + { + "ce_ib": 7.944718837738037, + "ce_orig": 0.9443780183792114, + "epoch": 0.40319217772665183, + "kl_loss": 0.2510814964771271, + "loss_ib": 0.010455533862113953, + "step": 1402 + }, + { + "ce_ib": 4.779119968414307, + "ce_orig": 0.5356561541557312, + "epoch": 0.40319217772665183, + "kl_loss": 0.21569356322288513, + "loss_ib": 0.006936055142432451, + "step": 1402 + }, + { + "ce_ib": 7.5224385261535645, + "ce_orig": 0.6601194739341736, + "epoch": 0.40319217772665183, + "kl_loss": 0.28233033418655396, + "loss_ib": 0.010345742106437683, + "step": 1402 + }, + { + "ce_ib": 2.8687710762023926, + "ce_orig": 0.41683757305145264, + "epoch": 0.40319217772665183, + "kl_loss": 0.32434821128845215, + "loss_ib": 0.00611225375905633, + "step": 1402 + }, + { + "ce_ib": 12.094705581665039, + "ce_orig": 2.08050537109375, + "epoch": 0.40347976130562946, + "kl_loss": 0.25543084740638733, + "loss_ib": 0.014649013988673687, + "step": 1403 + }, + { + "ce_ib": 5.257640361785889, + "ce_orig": 0.6185838580131531, + "epoch": 0.40347976130562946, + "kl_loss": 0.20503397285938263, + "loss_ib": 0.007307979743927717, + "step": 1403 + }, + { + "ce_ib": 6.66251802444458, + "ce_orig": 1.0003657341003418, + "epoch": 0.40347976130562946, + "kl_loss": 0.2243478149175644, + "loss_ib": 0.008905996568500996, + "step": 1403 + }, + { + "ce_ib": 3.802760124206543, + "ce_orig": 0.5514618158340454, + "epoch": 0.40347976130562946, + "kl_loss": 0.2789892256259918, + "loss_ib": 0.006592652760446072, + "step": 1403 + }, + { + "ce_ib": 5.127264499664307, + "ce_orig": 0.7384080290794373, + "epoch": 0.4037673448846071, + "kl_loss": 0.2943039536476135, + "loss_ib": 0.008070304058492184, + "step": 1404 + }, + { + "ce_ib": 6.885931491851807, + "ce_orig": 0.5815180540084839, + "epoch": 0.4037673448846071, + "kl_loss": 0.33004623651504517, + "loss_ib": 0.010186392813920975, + "step": 1404 + }, + { + "ce_ib": 11.163129806518555, + "ce_orig": 1.7125380039215088, + "epoch": 0.4037673448846071, + "kl_loss": 0.27400827407836914, + "loss_ib": 0.013903211802244186, + "step": 1404 + }, + { + "ce_ib": 7.123013019561768, + "ce_orig": 0.45505034923553467, + "epoch": 0.4037673448846071, + "kl_loss": 0.218210369348526, + "loss_ib": 0.009305116720497608, + "step": 1404 + }, + { + "epoch": 0.4040549284635847, + "grad_norm": 0.1016739085316658, + "learning_rate": 9.719050357418962e-06, + "loss": 0.8958, + "step": 1405 + }, + { + "ce_ib": 5.687928199768066, + "ce_orig": 0.8055709600448608, + "epoch": 0.4040549284635847, + "kl_loss": 0.24938495457172394, + "loss_ib": 0.008181777782738209, + "step": 1405 + }, + { + "ce_ib": 6.564266204833984, + "ce_orig": 0.8630247712135315, + "epoch": 0.4040549284635847, + "kl_loss": 0.2232058048248291, + "loss_ib": 0.008796324022114277, + "step": 1405 + }, + { + "ce_ib": 10.02625560760498, + "ce_orig": 1.6753994226455688, + "epoch": 0.4040549284635847, + "kl_loss": 0.4892248511314392, + "loss_ib": 0.014918503351509571, + "step": 1405 + }, + { + "ce_ib": 5.1657586097717285, + "ce_orig": 0.8059775233268738, + "epoch": 0.4040549284635847, + "kl_loss": 0.2578337490558624, + "loss_ib": 0.007744096219539642, + "step": 1405 + }, + { + "ce_ib": 8.515156745910645, + "ce_orig": 0.9876176118850708, + "epoch": 0.4043425120425624, + "kl_loss": 0.2993163466453552, + "loss_ib": 0.011508320458233356, + "step": 1406 + }, + { + "ce_ib": 4.786593437194824, + "ce_orig": 0.6819922924041748, + "epoch": 0.4043425120425624, + "kl_loss": 0.26278918981552124, + "loss_ib": 0.007414484862238169, + "step": 1406 + }, + { + "ce_ib": 9.841745376586914, + "ce_orig": 1.6463918685913086, + "epoch": 0.4043425120425624, + "kl_loss": 0.25848478078842163, + "loss_ib": 0.012426593340933323, + "step": 1406 + }, + { + "ce_ib": 5.537144660949707, + "ce_orig": 0.6347407102584839, + "epoch": 0.4043425120425624, + "kl_loss": 0.29522833228111267, + "loss_ib": 0.008489427156746387, + "step": 1406 + }, + { + "ce_ib": 2.009383201599121, + "ce_orig": 0.4568289816379547, + "epoch": 0.40463009562154, + "kl_loss": 0.14038433134555817, + "loss_ib": 0.003413226455450058, + "step": 1407 + }, + { + "ce_ib": 8.308806419372559, + "ce_orig": 0.9344093799591064, + "epoch": 0.40463009562154, + "kl_loss": 0.3734573721885681, + "loss_ib": 0.012043380178511143, + "step": 1407 + }, + { + "ce_ib": 7.919328212738037, + "ce_orig": 0.4728245437145233, + "epoch": 0.40463009562154, + "kl_loss": 0.3370034992694855, + "loss_ib": 0.011289362795650959, + "step": 1407 + }, + { + "ce_ib": 7.972267150878906, + "ce_orig": 1.1928261518478394, + "epoch": 0.40463009562154, + "kl_loss": 0.2522418797016144, + "loss_ib": 0.010494685731828213, + "step": 1407 + }, + { + "ce_ib": 6.536746978759766, + "ce_orig": 0.30310842394828796, + "epoch": 0.40491767920051763, + "kl_loss": 0.3365011215209961, + "loss_ib": 0.009901758283376694, + "step": 1408 + }, + { + "ce_ib": 8.056245803833008, + "ce_orig": 0.7353057861328125, + "epoch": 0.40491767920051763, + "kl_loss": 0.2854786217212677, + "loss_ib": 0.010911031626164913, + "step": 1408 + }, + { + "ce_ib": 7.960206985473633, + "ce_orig": 1.1164931058883667, + "epoch": 0.40491767920051763, + "kl_loss": 0.24506092071533203, + "loss_ib": 0.01041081640869379, + "step": 1408 + }, + { + "ce_ib": 7.368075847625732, + "ce_orig": 1.0007930994033813, + "epoch": 0.40491767920051763, + "kl_loss": 0.2809593677520752, + "loss_ib": 0.010177669115364552, + "step": 1408 + }, + { + "ce_ib": 6.244273662567139, + "ce_orig": 0.9306173324584961, + "epoch": 0.4052052627794953, + "kl_loss": 0.2776910662651062, + "loss_ib": 0.009021184407174587, + "step": 1409 + }, + { + "ce_ib": 8.91396713256836, + "ce_orig": 1.491280198097229, + "epoch": 0.4052052627794953, + "kl_loss": 0.2998870313167572, + "loss_ib": 0.011912836693227291, + "step": 1409 + }, + { + "ce_ib": 7.80165958404541, + "ce_orig": 1.2421525716781616, + "epoch": 0.4052052627794953, + "kl_loss": 0.2242795079946518, + "loss_ib": 0.010044453665614128, + "step": 1409 + }, + { + "ce_ib": 5.978887557983398, + "ce_orig": 0.6589029431343079, + "epoch": 0.4052052627794953, + "kl_loss": 0.17178526520729065, + "loss_ib": 0.0076967403292655945, + "step": 1409 + }, + { + "epoch": 0.40549284635847294, + "grad_norm": 0.11591479182243347, + "learning_rate": 9.71647980283176e-06, + "loss": 0.8355, + "step": 1410 + }, + { + "ce_ib": 5.507479667663574, + "ce_orig": 0.8018810153007507, + "epoch": 0.40549284635847294, + "kl_loss": 0.2922688126564026, + "loss_ib": 0.00843016803264618, + "step": 1410 + }, + { + "ce_ib": 4.450343608856201, + "ce_orig": 0.7255460023880005, + "epoch": 0.40549284635847294, + "kl_loss": 0.25514137744903564, + "loss_ib": 0.007001757621765137, + "step": 1410 + }, + { + "ce_ib": 4.221258640289307, + "ce_orig": 0.8187763094902039, + "epoch": 0.40549284635847294, + "kl_loss": 0.2624683976173401, + "loss_ib": 0.006845942698419094, + "step": 1410 + }, + { + "ce_ib": 7.064179420471191, + "ce_orig": 0.7070431113243103, + "epoch": 0.40549284635847294, + "kl_loss": 0.21688687801361084, + "loss_ib": 0.009233048185706139, + "step": 1410 + }, + { + "ce_ib": 7.324556350708008, + "ce_orig": 0.8796791434288025, + "epoch": 0.40578042993745056, + "kl_loss": 0.2597200870513916, + "loss_ib": 0.009921757504343987, + "step": 1411 + }, + { + "ce_ib": 10.051068305969238, + "ce_orig": 1.3552758693695068, + "epoch": 0.40578042993745056, + "kl_loss": 0.3554384410381317, + "loss_ib": 0.01360545214265585, + "step": 1411 + }, + { + "ce_ib": 3.1821837425231934, + "ce_orig": 0.5796427130699158, + "epoch": 0.40578042993745056, + "kl_loss": 0.23813700675964355, + "loss_ib": 0.005563553422689438, + "step": 1411 + }, + { + "ce_ib": 5.153987407684326, + "ce_orig": 0.7459867000579834, + "epoch": 0.40578042993745056, + "kl_loss": 0.2217734456062317, + "loss_ib": 0.007371721789240837, + "step": 1411 + }, + { + "ce_ib": 6.2244977951049805, + "ce_orig": 1.012880563735962, + "epoch": 0.40606801351642824, + "kl_loss": 0.19198933243751526, + "loss_ib": 0.00814439170062542, + "step": 1412 + }, + { + "ce_ib": 7.10197114944458, + "ce_orig": 0.8235251903533936, + "epoch": 0.40606801351642824, + "kl_loss": 0.23724320530891418, + "loss_ib": 0.009474403224885464, + "step": 1412 + }, + { + "ce_ib": 5.754896640777588, + "ce_orig": 0.8808709383010864, + "epoch": 0.40606801351642824, + "kl_loss": 0.24545630812644958, + "loss_ib": 0.008209459483623505, + "step": 1412 + }, + { + "ce_ib": 5.674515247344971, + "ce_orig": 1.1812490224838257, + "epoch": 0.40606801351642824, + "kl_loss": 0.2397882491350174, + "loss_ib": 0.00807239767163992, + "step": 1412 + }, + { + "ce_ib": 2.947330951690674, + "ce_orig": 0.6269690990447998, + "epoch": 0.40635559709540586, + "kl_loss": 0.18147560954093933, + "loss_ib": 0.004762087017297745, + "step": 1413 + }, + { + "ce_ib": 8.58889102935791, + "ce_orig": 1.1103471517562866, + "epoch": 0.40635559709540586, + "kl_loss": 0.2692915201187134, + "loss_ib": 0.011281806044280529, + "step": 1413 + }, + { + "ce_ib": 4.350068092346191, + "ce_orig": 0.6857120394706726, + "epoch": 0.40635559709540586, + "kl_loss": 0.18276429176330566, + "loss_ib": 0.006177711300551891, + "step": 1413 + }, + { + "ce_ib": 5.827058792114258, + "ce_orig": 0.6069142818450928, + "epoch": 0.40635559709540586, + "kl_loss": 0.3223385810852051, + "loss_ib": 0.009050444699823856, + "step": 1413 + }, + { + "ce_ib": 3.627539873123169, + "ce_orig": 0.5261756181716919, + "epoch": 0.4066431806743835, + "kl_loss": 0.27821996808052063, + "loss_ib": 0.006409739144146442, + "step": 1414 + }, + { + "ce_ib": 10.914916038513184, + "ce_orig": 1.7057969570159912, + "epoch": 0.4066431806743835, + "kl_loss": 0.18722708523273468, + "loss_ib": 0.012787186540663242, + "step": 1414 + }, + { + "ce_ib": 5.268122673034668, + "ce_orig": 0.8776735067367554, + "epoch": 0.4066431806743835, + "kl_loss": 0.1794731169939041, + "loss_ib": 0.007062853313982487, + "step": 1414 + }, + { + "ce_ib": 1.5480438470840454, + "ce_orig": 0.19495157897472382, + "epoch": 0.4066431806743835, + "kl_loss": 0.5221264958381653, + "loss_ib": 0.0067693088203668594, + "step": 1414 + }, + { + "epoch": 0.4069307642533611, + "grad_norm": 0.10485367476940155, + "learning_rate": 9.713897885149994e-06, + "loss": 0.8869, + "step": 1415 + }, + { + "ce_ib": 6.97669792175293, + "ce_orig": 0.885951042175293, + "epoch": 0.4069307642533611, + "kl_loss": 0.22319351136684418, + "loss_ib": 0.00920863263309002, + "step": 1415 + }, + { + "ce_ib": 6.193924903869629, + "ce_orig": 1.229131817817688, + "epoch": 0.4069307642533611, + "kl_loss": 0.2454812377691269, + "loss_ib": 0.008648737333714962, + "step": 1415 + }, + { + "ce_ib": 8.014636039733887, + "ce_orig": 1.4707450866699219, + "epoch": 0.4069307642533611, + "kl_loss": 0.3282904028892517, + "loss_ib": 0.011297540739178658, + "step": 1415 + }, + { + "ce_ib": 5.57000207901001, + "ce_orig": 0.5667921304702759, + "epoch": 0.4069307642533611, + "kl_loss": 0.23232273757457733, + "loss_ib": 0.007893229834735394, + "step": 1415 + }, + { + "ce_ib": 4.052793979644775, + "ce_orig": 0.6895977258682251, + "epoch": 0.4072183478323388, + "kl_loss": 0.16937918961048126, + "loss_ib": 0.0057465857826173306, + "step": 1416 + }, + { + "ce_ib": 9.834171295166016, + "ce_orig": 1.3297240734100342, + "epoch": 0.4072183478323388, + "kl_loss": 0.2615419030189514, + "loss_ib": 0.012449590489268303, + "step": 1416 + }, + { + "ce_ib": 6.8249831199646, + "ce_orig": 0.9767938256263733, + "epoch": 0.4072183478323388, + "kl_loss": 0.27759337425231934, + "loss_ib": 0.009600916877388954, + "step": 1416 + }, + { + "ce_ib": 5.38134765625, + "ce_orig": 0.7195479869842529, + "epoch": 0.4072183478323388, + "kl_loss": 0.23229524493217468, + "loss_ib": 0.007704299408942461, + "step": 1416 + }, + { + "ce_ib": 4.78963565826416, + "ce_orig": 0.3465440571308136, + "epoch": 0.4075059314113164, + "kl_loss": 0.2830664813518524, + "loss_ib": 0.007620300631970167, + "step": 1417 + }, + { + "ce_ib": 3.202763557434082, + "ce_orig": 0.664502739906311, + "epoch": 0.4075059314113164, + "kl_loss": 0.2345416098833084, + "loss_ib": 0.005548179615288973, + "step": 1417 + }, + { + "ce_ib": 4.993650913238525, + "ce_orig": 1.1927835941314697, + "epoch": 0.4075059314113164, + "kl_loss": 0.156328484416008, + "loss_ib": 0.006556935608386993, + "step": 1417 + }, + { + "ce_ib": 4.823815822601318, + "ce_orig": 0.8249080181121826, + "epoch": 0.4075059314113164, + "kl_loss": 0.29396092891693115, + "loss_ib": 0.007763424888253212, + "step": 1417 + }, + { + "ce_ib": 4.946149826049805, + "ce_orig": 0.41540834307670593, + "epoch": 0.40779351499029404, + "kl_loss": 0.2321927547454834, + "loss_ib": 0.00726807676255703, + "step": 1418 + }, + { + "ce_ib": 5.462716579437256, + "ce_orig": 0.7939573526382446, + "epoch": 0.40779351499029404, + "kl_loss": 0.19948364794254303, + "loss_ib": 0.007457552943378687, + "step": 1418 + }, + { + "ce_ib": 6.427265644073486, + "ce_orig": 0.6861687898635864, + "epoch": 0.40779351499029404, + "kl_loss": 0.31438255310058594, + "loss_ib": 0.009571091271936893, + "step": 1418 + }, + { + "ce_ib": 9.064435005187988, + "ce_orig": 1.7697478532791138, + "epoch": 0.40779351499029404, + "kl_loss": 0.23587557673454285, + "loss_ib": 0.011423190124332905, + "step": 1418 + }, + { + "ce_ib": 5.1803460121154785, + "ce_orig": 0.9039329886436462, + "epoch": 0.4080810985692717, + "kl_loss": 0.2533673644065857, + "loss_ib": 0.007714019622653723, + "step": 1419 + }, + { + "ce_ib": 6.159054756164551, + "ce_orig": 1.274484395980835, + "epoch": 0.4080810985692717, + "kl_loss": 0.1596750020980835, + "loss_ib": 0.007755804341286421, + "step": 1419 + }, + { + "ce_ib": 6.3064799308776855, + "ce_orig": 0.614425003528595, + "epoch": 0.4080810985692717, + "kl_loss": 0.213271364569664, + "loss_ib": 0.008439193479716778, + "step": 1419 + }, + { + "ce_ib": 8.887816429138184, + "ce_orig": 0.7756012082099915, + "epoch": 0.4080810985692717, + "kl_loss": 0.7952628135681152, + "loss_ib": 0.01684044487774372, + "step": 1419 + }, + { + "epoch": 0.40836868214824934, + "grad_norm": 0.1200951635837555, + "learning_rate": 9.711304610594104e-06, + "loss": 0.8206, + "step": 1420 + }, + { + "ce_ib": 7.012088775634766, + "ce_orig": 1.0536072254180908, + "epoch": 0.40836868214824934, + "kl_loss": 0.16178733110427856, + "loss_ib": 0.008629961870610714, + "step": 1420 + }, + { + "ce_ib": 6.098531723022461, + "ce_orig": 1.1920461654663086, + "epoch": 0.40836868214824934, + "kl_loss": 0.2587133049964905, + "loss_ib": 0.00868566520512104, + "step": 1420 + }, + { + "ce_ib": 7.746165752410889, + "ce_orig": 0.9715460538864136, + "epoch": 0.40836868214824934, + "kl_loss": 0.39986562728881836, + "loss_ib": 0.01174482237547636, + "step": 1420 + }, + { + "ce_ib": 3.6467649936676025, + "ce_orig": 0.47526460886001587, + "epoch": 0.40836868214824934, + "kl_loss": 0.15108205378055573, + "loss_ib": 0.005157585721462965, + "step": 1420 + }, + { + "ce_ib": 8.291778564453125, + "ce_orig": 1.4398640394210815, + "epoch": 0.40865626572722696, + "kl_loss": 0.3259751796722412, + "loss_ib": 0.011551530100405216, + "step": 1421 + }, + { + "ce_ib": 8.222766876220703, + "ce_orig": 0.7970226407051086, + "epoch": 0.40865626572722696, + "kl_loss": 0.3190440237522125, + "loss_ib": 0.011413206346333027, + "step": 1421 + }, + { + "ce_ib": 7.1990065574646, + "ce_orig": 1.1771275997161865, + "epoch": 0.40865626572722696, + "kl_loss": 0.1906033456325531, + "loss_ib": 0.00910503976047039, + "step": 1421 + }, + { + "ce_ib": 6.150810241699219, + "ce_orig": 0.8190860152244568, + "epoch": 0.40865626572722696, + "kl_loss": 0.20879146456718445, + "loss_ib": 0.008238724432885647, + "step": 1421 + }, + { + "ce_ib": 4.397854328155518, + "ce_orig": 0.5997467637062073, + "epoch": 0.40894384930620464, + "kl_loss": 0.17261874675750732, + "loss_ib": 0.006124041974544525, + "step": 1422 + }, + { + "ce_ib": 8.484132766723633, + "ce_orig": 1.3368240594863892, + "epoch": 0.40894384930620464, + "kl_loss": 0.3058059811592102, + "loss_ib": 0.011542192660272121, + "step": 1422 + }, + { + "ce_ib": 6.705843448638916, + "ce_orig": 1.415745735168457, + "epoch": 0.40894384930620464, + "kl_loss": 0.3636002540588379, + "loss_ib": 0.010341846384108067, + "step": 1422 + }, + { + "ce_ib": 4.8936944007873535, + "ce_orig": 0.6219208240509033, + "epoch": 0.40894384930620464, + "kl_loss": 0.2456442266702652, + "loss_ib": 0.007350136525928974, + "step": 1422 + }, + { + "ce_ib": 7.285373210906982, + "ce_orig": 0.9635927081108093, + "epoch": 0.40923143288518227, + "kl_loss": 0.3069985806941986, + "loss_ib": 0.010355358943343163, + "step": 1423 + }, + { + "ce_ib": 8.484498977661133, + "ce_orig": 1.178830623626709, + "epoch": 0.40923143288518227, + "kl_loss": 0.18762880563735962, + "loss_ib": 0.010360786691308022, + "step": 1423 + }, + { + "ce_ib": 6.52214241027832, + "ce_orig": 0.715835452079773, + "epoch": 0.40923143288518227, + "kl_loss": 0.35115742683410645, + "loss_ib": 0.010033717378973961, + "step": 1423 + }, + { + "ce_ib": 6.971340179443359, + "ce_orig": 1.2938631772994995, + "epoch": 0.40923143288518227, + "kl_loss": 0.16480334103107452, + "loss_ib": 0.00861937366425991, + "step": 1423 + }, + { + "ce_ib": 4.016520977020264, + "ce_orig": 0.6445021629333496, + "epoch": 0.4095190164641599, + "kl_loss": 0.21900208294391632, + "loss_ib": 0.006206541322171688, + "step": 1424 + }, + { + "ce_ib": 6.553114891052246, + "ce_orig": 1.2314685583114624, + "epoch": 0.4095190164641599, + "kl_loss": 0.2770439684391022, + "loss_ib": 0.009323555044829845, + "step": 1424 + }, + { + "ce_ib": 9.892790794372559, + "ce_orig": 1.7373547554016113, + "epoch": 0.4095190164641599, + "kl_loss": 0.23448815941810608, + "loss_ib": 0.012237672694027424, + "step": 1424 + }, + { + "ce_ib": 6.126859664916992, + "ce_orig": 0.7430605888366699, + "epoch": 0.4095190164641599, + "kl_loss": 0.20536071062088013, + "loss_ib": 0.00818046648055315, + "step": 1424 + }, + { + "epoch": 0.4098066000431375, + "grad_norm": 0.11239798367023468, + "learning_rate": 9.70869998541189e-06, + "loss": 0.905, + "step": 1425 + }, + { + "ce_ib": 6.2881574630737305, + "ce_orig": 0.5882934331893921, + "epoch": 0.4098066000431375, + "kl_loss": 0.40092191100120544, + "loss_ib": 0.010297376662492752, + "step": 1425 + }, + { + "ce_ib": 2.095649242401123, + "ce_orig": 0.3593219518661499, + "epoch": 0.4098066000431375, + "kl_loss": 0.20840157568454742, + "loss_ib": 0.004179664887487888, + "step": 1425 + }, + { + "ce_ib": 9.387741088867188, + "ce_orig": 1.2381709814071655, + "epoch": 0.4098066000431375, + "kl_loss": 0.19193483889102936, + "loss_ib": 0.011307088658213615, + "step": 1425 + }, + { + "ce_ib": 4.3381524085998535, + "ce_orig": 0.3203746974468231, + "epoch": 0.4098066000431375, + "kl_loss": 0.636804461479187, + "loss_ib": 0.010706196539103985, + "step": 1425 + }, + { + "ce_ib": 7.042891502380371, + "ce_orig": 0.8674129247665405, + "epoch": 0.4100941836221152, + "kl_loss": 0.33732450008392334, + "loss_ib": 0.01041613519191742, + "step": 1426 + }, + { + "ce_ib": 6.801926136016846, + "ce_orig": 0.8991611003875732, + "epoch": 0.4100941836221152, + "kl_loss": 0.15263846516609192, + "loss_ib": 0.008328311145305634, + "step": 1426 + }, + { + "ce_ib": 6.085090637207031, + "ce_orig": 0.9544994831085205, + "epoch": 0.4100941836221152, + "kl_loss": 0.23805543780326843, + "loss_ib": 0.008465644903481007, + "step": 1426 + }, + { + "ce_ib": 6.1746296882629395, + "ce_orig": 0.7697358131408691, + "epoch": 0.4100941836221152, + "kl_loss": 0.2907295525074005, + "loss_ib": 0.009081925265491009, + "step": 1426 + }, + { + "ce_ib": 5.99095344543457, + "ce_orig": 0.7473070025444031, + "epoch": 0.4103817672010928, + "kl_loss": 0.2807786464691162, + "loss_ib": 0.00879873987287283, + "step": 1427 + }, + { + "ce_ib": 5.068008899688721, + "ce_orig": 0.7087705731391907, + "epoch": 0.4103817672010928, + "kl_loss": 0.21263612806797028, + "loss_ib": 0.0071943700313568115, + "step": 1427 + }, + { + "ce_ib": 5.395236015319824, + "ce_orig": 0.853853702545166, + "epoch": 0.4103817672010928, + "kl_loss": 0.2351619303226471, + "loss_ib": 0.007746854797005653, + "step": 1427 + }, + { + "ce_ib": 8.231751441955566, + "ce_orig": 1.556867003440857, + "epoch": 0.4103817672010928, + "kl_loss": 0.24943526089191437, + "loss_ib": 0.01072610355913639, + "step": 1427 + }, + { + "ce_ib": 9.276421546936035, + "ce_orig": 1.3323252201080322, + "epoch": 0.41066935078007044, + "kl_loss": 0.18265675008296967, + "loss_ib": 0.011102988384664059, + "step": 1428 + }, + { + "ce_ib": 6.04024076461792, + "ce_orig": 0.7419769167900085, + "epoch": 0.41066935078007044, + "kl_loss": 0.4130827784538269, + "loss_ib": 0.010171068832278252, + "step": 1428 + }, + { + "ce_ib": 7.3178300857543945, + "ce_orig": 1.3224425315856934, + "epoch": 0.41066935078007044, + "kl_loss": 0.23849479854106903, + "loss_ib": 0.009702778421342373, + "step": 1428 + }, + { + "ce_ib": 9.07182502746582, + "ce_orig": 1.5430114269256592, + "epoch": 0.41066935078007044, + "kl_loss": 0.26186248660087585, + "loss_ib": 0.01169044990092516, + "step": 1428 + }, + { + "ce_ib": 5.606130123138428, + "ce_orig": 0.9684110879898071, + "epoch": 0.4109569343590481, + "kl_loss": 0.28772905468940735, + "loss_ib": 0.008483420126140118, + "step": 1429 + }, + { + "ce_ib": 7.555757999420166, + "ce_orig": 0.9240909814834595, + "epoch": 0.4109569343590481, + "kl_loss": 0.20850898325443268, + "loss_ib": 0.009640848264098167, + "step": 1429 + }, + { + "ce_ib": 4.836035251617432, + "ce_orig": 0.8449050188064575, + "epoch": 0.4109569343590481, + "kl_loss": 0.23588550090789795, + "loss_ib": 0.007194890175014734, + "step": 1429 + }, + { + "ce_ib": 4.731801986694336, + "ce_orig": 0.5297708511352539, + "epoch": 0.4109569343590481, + "kl_loss": 0.2536720931529999, + "loss_ib": 0.0072685228660702705, + "step": 1429 + }, + { + "epoch": 0.41124451793802574, + "grad_norm": 0.10925720632076263, + "learning_rate": 9.706084015878496e-06, + "loss": 0.8944, + "step": 1430 + }, + { + "ce_ib": 5.5966572761535645, + "ce_orig": 0.7206467390060425, + "epoch": 0.41124451793802574, + "kl_loss": 0.27587223052978516, + "loss_ib": 0.008355379104614258, + "step": 1430 + }, + { + "ce_ib": 6.693604946136475, + "ce_orig": 0.7028912305831909, + "epoch": 0.41124451793802574, + "kl_loss": 0.3054695129394531, + "loss_ib": 0.009748300537467003, + "step": 1430 + }, + { + "ce_ib": 8.012578964233398, + "ce_orig": 0.9017972350120544, + "epoch": 0.41124451793802574, + "kl_loss": 0.35887661576271057, + "loss_ib": 0.011601345613598824, + "step": 1430 + }, + { + "ce_ib": 7.473693370819092, + "ce_orig": 1.0751852989196777, + "epoch": 0.41124451793802574, + "kl_loss": 0.25154802203178406, + "loss_ib": 0.009989173151552677, + "step": 1430 + }, + { + "ce_ib": 4.834800720214844, + "ce_orig": 0.4753416180610657, + "epoch": 0.41153210151700337, + "kl_loss": 0.29349297285079956, + "loss_ib": 0.007769729942083359, + "step": 1431 + }, + { + "ce_ib": 5.113678932189941, + "ce_orig": 0.7640902996063232, + "epoch": 0.41153210151700337, + "kl_loss": 0.30045798420906067, + "loss_ib": 0.00811825878918171, + "step": 1431 + }, + { + "ce_ib": 7.861815452575684, + "ce_orig": 1.0139867067337036, + "epoch": 0.41153210151700337, + "kl_loss": 0.1912413090467453, + "loss_ib": 0.009774228557944298, + "step": 1431 + }, + { + "ce_ib": 6.809225082397461, + "ce_orig": 0.9317693710327148, + "epoch": 0.41153210151700337, + "kl_loss": 0.3157738447189331, + "loss_ib": 0.009966962970793247, + "step": 1431 + }, + { + "ce_ib": 3.798652172088623, + "ce_orig": 0.19985660910606384, + "epoch": 0.411819685095981, + "kl_loss": 0.38498273491859436, + "loss_ib": 0.007648479659110308, + "step": 1432 + }, + { + "ce_ib": 5.891729354858398, + "ce_orig": 1.026416301727295, + "epoch": 0.411819685095981, + "kl_loss": 0.18191149830818176, + "loss_ib": 0.007710844278335571, + "step": 1432 + }, + { + "ce_ib": 5.983758926391602, + "ce_orig": 0.9381806254386902, + "epoch": 0.411819685095981, + "kl_loss": 0.18528085947036743, + "loss_ib": 0.007836567237973213, + "step": 1432 + }, + { + "ce_ib": 5.368931770324707, + "ce_orig": 0.6532841324806213, + "epoch": 0.411819685095981, + "kl_loss": 0.23761454224586487, + "loss_ib": 0.007745077367872, + "step": 1432 + }, + { + "ce_ib": 4.2342753410339355, + "ce_orig": 0.6612984538078308, + "epoch": 0.41210726867495867, + "kl_loss": 0.47673696279525757, + "loss_ib": 0.00900164432823658, + "step": 1433 + }, + { + "ce_ib": 7.980175018310547, + "ce_orig": 1.3077398538589478, + "epoch": 0.41210726867495867, + "kl_loss": 0.2359972447156906, + "loss_ib": 0.010340146720409393, + "step": 1433 + }, + { + "ce_ib": 5.692867279052734, + "ce_orig": 0.4656047224998474, + "epoch": 0.41210726867495867, + "kl_loss": 0.32997220754623413, + "loss_ib": 0.008992589078843594, + "step": 1433 + }, + { + "ce_ib": 7.435876369476318, + "ce_orig": 1.2225924730300903, + "epoch": 0.41210726867495867, + "kl_loss": 0.3203752338886261, + "loss_ib": 0.010639629326760769, + "step": 1433 + }, + { + "ce_ib": 5.403990268707275, + "ce_orig": 0.548412561416626, + "epoch": 0.4123948522539363, + "kl_loss": 0.23780032992362976, + "loss_ib": 0.007781993132084608, + "step": 1434 + }, + { + "ce_ib": 5.518211841583252, + "ce_orig": 0.7836940288543701, + "epoch": 0.4123948522539363, + "kl_loss": 0.2192806899547577, + "loss_ib": 0.007711017969995737, + "step": 1434 + }, + { + "ce_ib": 6.065393447875977, + "ce_orig": 0.8244072198867798, + "epoch": 0.4123948522539363, + "kl_loss": 0.16638624668121338, + "loss_ib": 0.007729256059974432, + "step": 1434 + }, + { + "ce_ib": 4.7424116134643555, + "ce_orig": 0.6653726100921631, + "epoch": 0.4123948522539363, + "kl_loss": 0.2525525391101837, + "loss_ib": 0.007267937064170837, + "step": 1434 + }, + { + "epoch": 0.4126824358329139, + "grad_norm": 0.09591538459062576, + "learning_rate": 9.703456708296405e-06, + "loss": 0.8678, + "step": 1435 + }, + { + "ce_ib": 5.17003870010376, + "ce_orig": 0.6860577464103699, + "epoch": 0.4126824358329139, + "kl_loss": 0.239205002784729, + "loss_ib": 0.007562088780105114, + "step": 1435 + }, + { + "ce_ib": 6.399457931518555, + "ce_orig": 1.0888773202896118, + "epoch": 0.4126824358329139, + "kl_loss": 0.1467989981174469, + "loss_ib": 0.007867448031902313, + "step": 1435 + }, + { + "ce_ib": 6.637815952301025, + "ce_orig": 0.8630712628364563, + "epoch": 0.4126824358329139, + "kl_loss": 0.19071052968502045, + "loss_ib": 0.008544920943677425, + "step": 1435 + }, + { + "ce_ib": 4.753064155578613, + "ce_orig": 0.8432294726371765, + "epoch": 0.4126824358329139, + "kl_loss": 0.2774086892604828, + "loss_ib": 0.007527151145040989, + "step": 1435 + }, + { + "ce_ib": 10.428075790405273, + "ce_orig": 1.4622315168380737, + "epoch": 0.4129700194118916, + "kl_loss": 0.1714332103729248, + "loss_ib": 0.012142407707870007, + "step": 1436 + }, + { + "ce_ib": 4.453615188598633, + "ce_orig": 0.5258841514587402, + "epoch": 0.4129700194118916, + "kl_loss": 0.19536477327346802, + "loss_ib": 0.006407263223081827, + "step": 1436 + }, + { + "ce_ib": 4.699621200561523, + "ce_orig": 0.3211705982685089, + "epoch": 0.4129700194118916, + "kl_loss": 0.3277928829193115, + "loss_ib": 0.00797754991799593, + "step": 1436 + }, + { + "ce_ib": 7.0514140129089355, + "ce_orig": 1.1391209363937378, + "epoch": 0.4129700194118916, + "kl_loss": 0.24634301662445068, + "loss_ib": 0.009514844045042992, + "step": 1436 + }, + { + "ce_ib": 3.10416316986084, + "ce_orig": 0.34879812598228455, + "epoch": 0.4132576029908692, + "kl_loss": 0.5351189970970154, + "loss_ib": 0.008455352857708931, + "step": 1437 + }, + { + "ce_ib": 6.911233425140381, + "ce_orig": 0.7747100591659546, + "epoch": 0.4132576029908692, + "kl_loss": 0.3284524977207184, + "loss_ib": 0.010195758193731308, + "step": 1437 + }, + { + "ce_ib": 8.081716537475586, + "ce_orig": 1.54729163646698, + "epoch": 0.4132576029908692, + "kl_loss": 0.22005927562713623, + "loss_ib": 0.010282308794558048, + "step": 1437 + }, + { + "ce_ib": 4.381701946258545, + "ce_orig": 0.7111383080482483, + "epoch": 0.4132576029908692, + "kl_loss": 0.23173516988754272, + "loss_ib": 0.006699053570628166, + "step": 1437 + }, + { + "ce_ib": 3.102794885635376, + "ce_orig": 0.6434416770935059, + "epoch": 0.41354518656984685, + "kl_loss": 0.1948084980249405, + "loss_ib": 0.0050508794374763966, + "step": 1438 + }, + { + "ce_ib": 2.404301404953003, + "ce_orig": 0.49301013350486755, + "epoch": 0.41354518656984685, + "kl_loss": 0.1756540983915329, + "loss_ib": 0.004160842392593622, + "step": 1438 + }, + { + "ce_ib": 6.963180065155029, + "ce_orig": 1.3057996034622192, + "epoch": 0.41354518656984685, + "kl_loss": 0.2243916094303131, + "loss_ib": 0.009207095950841904, + "step": 1438 + }, + { + "ce_ib": 7.458621978759766, + "ce_orig": 1.0133202075958252, + "epoch": 0.41354518656984685, + "kl_loss": 0.2954729497432709, + "loss_ib": 0.01041335053741932, + "step": 1438 + }, + { + "ce_ib": 6.399861812591553, + "ce_orig": 0.6673619747161865, + "epoch": 0.4138327701488245, + "kl_loss": 0.2691270709037781, + "loss_ib": 0.009091132320463657, + "step": 1439 + }, + { + "ce_ib": 3.874779224395752, + "ce_orig": 0.32614997029304504, + "epoch": 0.4138327701488245, + "kl_loss": 0.21969608962535858, + "loss_ib": 0.006071740295737982, + "step": 1439 + }, + { + "ce_ib": 6.7588372230529785, + "ce_orig": 0.9650707244873047, + "epoch": 0.4138327701488245, + "kl_loss": 0.257515549659729, + "loss_ib": 0.009333992376923561, + "step": 1439 + }, + { + "ce_ib": 5.581239700317383, + "ce_orig": 0.7861056923866272, + "epoch": 0.4138327701488245, + "kl_loss": 0.17133313417434692, + "loss_ib": 0.007294571027159691, + "step": 1439 + }, + { + "epoch": 0.41412035372780215, + "grad_norm": 0.08960733562707901, + "learning_rate": 9.700818068995407e-06, + "loss": 0.8817, + "step": 1440 + }, + { + "ce_ib": 7.127894401550293, + "ce_orig": 1.099780559539795, + "epoch": 0.41412035372780215, + "kl_loss": 0.21275877952575684, + "loss_ib": 0.009255481883883476, + "step": 1440 + }, + { + "ce_ib": 5.4239821434021, + "ce_orig": 1.1731576919555664, + "epoch": 0.41412035372780215, + "kl_loss": 0.21886183321475983, + "loss_ib": 0.007612599991261959, + "step": 1440 + }, + { + "ce_ib": 5.250892639160156, + "ce_orig": 0.7625457644462585, + "epoch": 0.41412035372780215, + "kl_loss": 0.19820094108581543, + "loss_ib": 0.007232902105897665, + "step": 1440 + }, + { + "ce_ib": 6.036355972290039, + "ce_orig": 0.5982348322868347, + "epoch": 0.41412035372780215, + "kl_loss": 0.20665858685970306, + "loss_ib": 0.008102942258119583, + "step": 1440 + }, + { + "ce_ib": 3.7877354621887207, + "ce_orig": 0.6050831079483032, + "epoch": 0.4144079373067798, + "kl_loss": 0.22110241651535034, + "loss_ib": 0.005998759064823389, + "step": 1441 + }, + { + "ce_ib": 7.939857006072998, + "ce_orig": 1.3141567707061768, + "epoch": 0.4144079373067798, + "kl_loss": 0.47960013151168823, + "loss_ib": 0.012735857628285885, + "step": 1441 + }, + { + "ce_ib": 4.724193096160889, + "ce_orig": 0.5596928000450134, + "epoch": 0.4144079373067798, + "kl_loss": 0.25891292095184326, + "loss_ib": 0.007313322275876999, + "step": 1441 + }, + { + "ce_ib": 4.276533126831055, + "ce_orig": 0.42426598072052, + "epoch": 0.4144079373067798, + "kl_loss": 0.26543980836868286, + "loss_ib": 0.006930931005626917, + "step": 1441 + }, + { + "ce_ib": 3.9375219345092773, + "ce_orig": 0.5840734243392944, + "epoch": 0.4146955208857574, + "kl_loss": 0.19807593524456024, + "loss_ib": 0.005918281152844429, + "step": 1442 + }, + { + "ce_ib": 5.756697654724121, + "ce_orig": 0.6194900870323181, + "epoch": 0.4146955208857574, + "kl_loss": 0.2578275203704834, + "loss_ib": 0.008334972895681858, + "step": 1442 + }, + { + "ce_ib": 5.568426132202148, + "ce_orig": 0.6911214590072632, + "epoch": 0.4146955208857574, + "kl_loss": 0.29421865940093994, + "loss_ib": 0.00851061288267374, + "step": 1442 + }, + { + "ce_ib": 8.481583595275879, + "ce_orig": 1.2472021579742432, + "epoch": 0.4146955208857574, + "kl_loss": 0.2462369054555893, + "loss_ib": 0.010943952947854996, + "step": 1442 + }, + { + "ce_ib": 5.0001420974731445, + "ce_orig": 0.6307148337364197, + "epoch": 0.4149831044647351, + "kl_loss": 0.22077739238739014, + "loss_ib": 0.007207916118204594, + "step": 1443 + }, + { + "ce_ib": 4.8295063972473145, + "ce_orig": 0.3529702425003052, + "epoch": 0.4149831044647351, + "kl_loss": 0.2350378781557083, + "loss_ib": 0.007179885171353817, + "step": 1443 + }, + { + "ce_ib": 5.45084810256958, + "ce_orig": 0.8416758179664612, + "epoch": 0.4149831044647351, + "kl_loss": 0.503132164478302, + "loss_ib": 0.01048217061907053, + "step": 1443 + }, + { + "ce_ib": 6.197181701660156, + "ce_orig": 0.5805358290672302, + "epoch": 0.4149831044647351, + "kl_loss": 0.1916370987892151, + "loss_ib": 0.008113552816212177, + "step": 1443 + }, + { + "ce_ib": 5.213675498962402, + "ce_orig": 0.5786038041114807, + "epoch": 0.4152706880437127, + "kl_loss": 0.18455766141414642, + "loss_ib": 0.007059251889586449, + "step": 1444 + }, + { + "ce_ib": 3.5388176441192627, + "ce_orig": 0.5333930850028992, + "epoch": 0.4152706880437127, + "kl_loss": 0.14553654193878174, + "loss_ib": 0.004994182847440243, + "step": 1444 + }, + { + "ce_ib": 3.691051959991455, + "ce_orig": 0.5889726281166077, + "epoch": 0.4152706880437127, + "kl_loss": 0.17222023010253906, + "loss_ib": 0.005413254257291555, + "step": 1444 + }, + { + "ce_ib": 7.286379814147949, + "ce_orig": 0.6404244899749756, + "epoch": 0.4152706880437127, + "kl_loss": 0.3586054742336273, + "loss_ib": 0.01087243389338255, + "step": 1444 + }, + { + "epoch": 0.4155582716226903, + "grad_norm": 0.11402870714664459, + "learning_rate": 9.6981681043326e-06, + "loss": 0.8482, + "step": 1445 + }, + { + "ce_ib": 4.635682106018066, + "ce_orig": 0.7357145547866821, + "epoch": 0.4155582716226903, + "kl_loss": 0.18429890275001526, + "loss_ib": 0.00647867051884532, + "step": 1445 + }, + { + "ce_ib": 8.371834754943848, + "ce_orig": 1.3967629671096802, + "epoch": 0.4155582716226903, + "kl_loss": 0.25143635272979736, + "loss_ib": 0.010886197909712791, + "step": 1445 + }, + { + "ce_ib": 4.193347930908203, + "ce_orig": 0.815974771976471, + "epoch": 0.4155582716226903, + "kl_loss": 0.17522263526916504, + "loss_ib": 0.005945574026554823, + "step": 1445 + }, + { + "ce_ib": 4.08240270614624, + "ce_orig": 0.6947717666625977, + "epoch": 0.4155582716226903, + "kl_loss": 0.13767096400260925, + "loss_ib": 0.005459112580865622, + "step": 1445 + }, + { + "ce_ib": 4.673977375030518, + "ce_orig": 0.7940452098846436, + "epoch": 0.415845855201668, + "kl_loss": 0.19539020955562592, + "loss_ib": 0.006627879571169615, + "step": 1446 + }, + { + "ce_ib": 6.093282699584961, + "ce_orig": 0.8800034523010254, + "epoch": 0.415845855201668, + "kl_loss": 0.26846742630004883, + "loss_ib": 0.00877795647829771, + "step": 1446 + }, + { + "ce_ib": 4.820080757141113, + "ce_orig": 0.34295347332954407, + "epoch": 0.415845855201668, + "kl_loss": 0.3963184952735901, + "loss_ib": 0.008783265016973019, + "step": 1446 + }, + { + "ce_ib": 8.66891098022461, + "ce_orig": 1.160307765007019, + "epoch": 0.415845855201668, + "kl_loss": 0.18693780899047852, + "loss_ib": 0.010538289323449135, + "step": 1446 + }, + { + "ce_ib": 8.202392578125, + "ce_orig": 0.8199527263641357, + "epoch": 0.4161334387806456, + "kl_loss": 0.3237569332122803, + "loss_ib": 0.011439962312579155, + "step": 1447 + }, + { + "ce_ib": 5.400129318237305, + "ce_orig": 0.445811003446579, + "epoch": 0.4161334387806456, + "kl_loss": 0.2745596170425415, + "loss_ib": 0.008145725354552269, + "step": 1447 + }, + { + "ce_ib": 3.9494316577911377, + "ce_orig": 0.4661496877670288, + "epoch": 0.4161334387806456, + "kl_loss": 0.3082231879234314, + "loss_ib": 0.007031663320958614, + "step": 1447 + }, + { + "ce_ib": 5.456274032592773, + "ce_orig": 0.9136053323745728, + "epoch": 0.4161334387806456, + "kl_loss": 0.2540931701660156, + "loss_ib": 0.00799720548093319, + "step": 1447 + }, + { + "ce_ib": 11.496981620788574, + "ce_orig": 1.669168472290039, + "epoch": 0.41642102235962325, + "kl_loss": 0.2675935626029968, + "loss_ib": 0.014172916300594807, + "step": 1448 + }, + { + "ce_ib": 6.146816730499268, + "ce_orig": 0.32925063371658325, + "epoch": 0.41642102235962325, + "kl_loss": 0.3541129231452942, + "loss_ib": 0.009687945246696472, + "step": 1448 + }, + { + "ce_ib": 6.110250473022461, + "ce_orig": 0.9609233736991882, + "epoch": 0.41642102235962325, + "kl_loss": 0.21723921597003937, + "loss_ib": 0.008282641880214214, + "step": 1448 + }, + { + "ce_ib": 2.770850419998169, + "ce_orig": 0.3007255494594574, + "epoch": 0.41642102235962325, + "kl_loss": 0.7660866379737854, + "loss_ib": 0.010431716218590736, + "step": 1448 + }, + { + "ce_ib": 3.9824109077453613, + "ce_orig": 0.590600311756134, + "epoch": 0.41670860593860093, + "kl_loss": 0.3264272212982178, + "loss_ib": 0.00724668288603425, + "step": 1449 + }, + { + "ce_ib": 4.859111785888672, + "ce_orig": 0.7057644724845886, + "epoch": 0.41670860593860093, + "kl_loss": 0.2452726811170578, + "loss_ib": 0.0073118386790156364, + "step": 1449 + }, + { + "ce_ib": 7.2970404624938965, + "ce_orig": 0.9776987433433533, + "epoch": 0.41670860593860093, + "kl_loss": 0.266795814037323, + "loss_ib": 0.009964998811483383, + "step": 1449 + }, + { + "ce_ib": 5.001408576965332, + "ce_orig": 0.7703402638435364, + "epoch": 0.41670860593860093, + "kl_loss": 0.2569698095321655, + "loss_ib": 0.007571106310933828, + "step": 1449 + }, + { + "epoch": 0.41699618951757855, + "grad_norm": 0.10733474045991898, + "learning_rate": 9.69550682069236e-06, + "loss": 0.8422, + "step": 1450 + }, + { + "ce_ib": 6.8307037353515625, + "ce_orig": 0.8752904534339905, + "epoch": 0.41699618951757855, + "kl_loss": 0.37020280957221985, + "loss_ib": 0.010532732121646404, + "step": 1450 + }, + { + "ce_ib": 6.152828216552734, + "ce_orig": 0.8328402638435364, + "epoch": 0.41699618951757855, + "kl_loss": 0.3578481078147888, + "loss_ib": 0.009731309488415718, + "step": 1450 + }, + { + "ce_ib": 4.250329971313477, + "ce_orig": 0.46717268228530884, + "epoch": 0.41699618951757855, + "kl_loss": 0.32806381583213806, + "loss_ib": 0.0075309681706130505, + "step": 1450 + }, + { + "ce_ib": 5.095127105712891, + "ce_orig": 0.3596700131893158, + "epoch": 0.41699618951757855, + "kl_loss": 0.24298208951950073, + "loss_ib": 0.007524948101490736, + "step": 1450 + }, + { + "ce_ib": 5.839034080505371, + "ce_orig": 0.8622336387634277, + "epoch": 0.4172837730965562, + "kl_loss": 0.22366026043891907, + "loss_ib": 0.008075636811554432, + "step": 1451 + }, + { + "ce_ib": 6.032916069030762, + "ce_orig": 1.2333753108978271, + "epoch": 0.4172837730965562, + "kl_loss": 0.27827978134155273, + "loss_ib": 0.008815714158117771, + "step": 1451 + }, + { + "ce_ib": 6.857519626617432, + "ce_orig": 0.8044654130935669, + "epoch": 0.4172837730965562, + "kl_loss": 0.29137736558914185, + "loss_ib": 0.00977129302918911, + "step": 1451 + }, + { + "ce_ib": 5.73335075378418, + "ce_orig": 0.6533335447311401, + "epoch": 0.4172837730965562, + "kl_loss": 0.2547338008880615, + "loss_ib": 0.008280688896775246, + "step": 1451 + }, + { + "ce_ib": 8.236536026000977, + "ce_orig": 1.2800368070602417, + "epoch": 0.4175713566755338, + "kl_loss": 0.205164834856987, + "loss_ib": 0.010288184508681297, + "step": 1452 + }, + { + "ce_ib": 7.160531044006348, + "ce_orig": 0.6678914427757263, + "epoch": 0.4175713566755338, + "kl_loss": 0.357463538646698, + "loss_ib": 0.010735166259109974, + "step": 1452 + }, + { + "ce_ib": 5.64363431930542, + "ce_orig": 0.8172556757926941, + "epoch": 0.4175713566755338, + "kl_loss": 0.2605098485946655, + "loss_ib": 0.008248732425272465, + "step": 1452 + }, + { + "ce_ib": 6.310688495635986, + "ce_orig": 0.6108517646789551, + "epoch": 0.4175713566755338, + "kl_loss": 0.2450312376022339, + "loss_ib": 0.008761000819504261, + "step": 1452 + }, + { + "ce_ib": 4.53042459487915, + "ce_orig": 0.8491594791412354, + "epoch": 0.4178589402545115, + "kl_loss": 0.2809299826622009, + "loss_ib": 0.007339724339544773, + "step": 1453 + }, + { + "ce_ib": 6.299931526184082, + "ce_orig": 0.6813634634017944, + "epoch": 0.4178589402545115, + "kl_loss": 0.21116459369659424, + "loss_ib": 0.008411576971411705, + "step": 1453 + }, + { + "ce_ib": 7.031019687652588, + "ce_orig": 0.8715657591819763, + "epoch": 0.4178589402545115, + "kl_loss": 0.2228662371635437, + "loss_ib": 0.009259682148694992, + "step": 1453 + }, + { + "ce_ib": 6.7674736976623535, + "ce_orig": 0.8163739442825317, + "epoch": 0.4178589402545115, + "kl_loss": 0.31730207800865173, + "loss_ib": 0.0099404938519001, + "step": 1453 + }, + { + "ce_ib": 6.6158528327941895, + "ce_orig": 0.8040879964828491, + "epoch": 0.4181465238334891, + "kl_loss": 0.3747669458389282, + "loss_ib": 0.010363521054387093, + "step": 1454 + }, + { + "ce_ib": 4.000415802001953, + "ce_orig": 0.6374342441558838, + "epoch": 0.4181465238334891, + "kl_loss": 0.19590912759304047, + "loss_ib": 0.005959507077932358, + "step": 1454 + }, + { + "ce_ib": 7.182856559753418, + "ce_orig": 1.3756706714630127, + "epoch": 0.4181465238334891, + "kl_loss": 0.27559158205986023, + "loss_ib": 0.009938772767782211, + "step": 1454 + }, + { + "ce_ib": 7.4786176681518555, + "ce_orig": 1.2998679876327515, + "epoch": 0.4181465238334891, + "kl_loss": 0.2330915331840515, + "loss_ib": 0.009809533134102821, + "step": 1454 + }, + { + "epoch": 0.4184341074124667, + "grad_norm": 0.12323027849197388, + "learning_rate": 9.692834224486338e-06, + "loss": 0.8675, + "step": 1455 + }, + { + "ce_ib": 2.875929832458496, + "ce_orig": 0.5546712279319763, + "epoch": 0.4184341074124667, + "kl_loss": 0.1498483419418335, + "loss_ib": 0.004374413285404444, + "step": 1455 + }, + { + "ce_ib": 5.155608654022217, + "ce_orig": 0.6677677035331726, + "epoch": 0.4184341074124667, + "kl_loss": 0.2732641100883484, + "loss_ib": 0.00788824912160635, + "step": 1455 + }, + { + "ce_ib": 5.532578945159912, + "ce_orig": 0.8035033345222473, + "epoch": 0.4184341074124667, + "kl_loss": 0.2326735258102417, + "loss_ib": 0.007859313860535622, + "step": 1455 + }, + { + "ce_ib": 5.624016761779785, + "ce_orig": 0.5841573476791382, + "epoch": 0.4184341074124667, + "kl_loss": 0.3162783980369568, + "loss_ib": 0.008786801248788834, + "step": 1455 + }, + { + "ce_ib": 5.438791751861572, + "ce_orig": 0.7679370045661926, + "epoch": 0.4187216909914444, + "kl_loss": 0.25558215379714966, + "loss_ib": 0.007994613610208035, + "step": 1456 + }, + { + "ce_ib": 4.692433834075928, + "ce_orig": 0.50995272397995, + "epoch": 0.4187216909914444, + "kl_loss": 0.19274017214775085, + "loss_ib": 0.006619835272431374, + "step": 1456 + }, + { + "ce_ib": 8.270526885986328, + "ce_orig": 1.1970558166503906, + "epoch": 0.4187216909914444, + "kl_loss": 0.3240256905555725, + "loss_ib": 0.011510784737765789, + "step": 1456 + }, + { + "ce_ib": 4.748291969299316, + "ce_orig": 0.6559625267982483, + "epoch": 0.4187216909914444, + "kl_loss": 0.24350781738758087, + "loss_ib": 0.007183369714766741, + "step": 1456 + }, + { + "ce_ib": 3.4031496047973633, + "ce_orig": 0.439064085483551, + "epoch": 0.41900927457042203, + "kl_loss": 0.3383101224899292, + "loss_ib": 0.006786250974982977, + "step": 1457 + }, + { + "ce_ib": 3.4370038509368896, + "ce_orig": 0.5014415383338928, + "epoch": 0.41900927457042203, + "kl_loss": 0.24829784035682678, + "loss_ib": 0.005919982213526964, + "step": 1457 + }, + { + "ce_ib": 7.7961039543151855, + "ce_orig": 0.9597184658050537, + "epoch": 0.41900927457042203, + "kl_loss": 0.20575028657913208, + "loss_ib": 0.009853607043623924, + "step": 1457 + }, + { + "ce_ib": 5.096848964691162, + "ce_orig": 0.859772264957428, + "epoch": 0.41900927457042203, + "kl_loss": 0.310441255569458, + "loss_ib": 0.00820126198232174, + "step": 1457 + }, + { + "ce_ib": 6.103631019592285, + "ce_orig": 0.82071852684021, + "epoch": 0.41929685814939965, + "kl_loss": 0.25560837984085083, + "loss_ib": 0.008659714832901955, + "step": 1458 + }, + { + "ce_ib": 4.901415824890137, + "ce_orig": 0.9847708344459534, + "epoch": 0.41929685814939965, + "kl_loss": 0.2805362343788147, + "loss_ib": 0.007706777658313513, + "step": 1458 + }, + { + "ce_ib": 4.433986186981201, + "ce_orig": 0.6790226697921753, + "epoch": 0.41929685814939965, + "kl_loss": 0.2497805505990982, + "loss_ib": 0.006931791547685862, + "step": 1458 + }, + { + "ce_ib": 9.856012344360352, + "ce_orig": 1.251712679862976, + "epoch": 0.41929685814939965, + "kl_loss": 0.2390027940273285, + "loss_ib": 0.01224603969603777, + "step": 1458 + }, + { + "ce_ib": 5.431840896606445, + "ce_orig": 0.9738479256629944, + "epoch": 0.41958444172837733, + "kl_loss": 0.2178439497947693, + "loss_ib": 0.007610280532389879, + "step": 1459 + }, + { + "ce_ib": 5.6414570808410645, + "ce_orig": 0.7506046891212463, + "epoch": 0.41958444172837733, + "kl_loss": 0.19342423975467682, + "loss_ib": 0.007575699593871832, + "step": 1459 + }, + { + "ce_ib": 5.220902919769287, + "ce_orig": 0.7075722813606262, + "epoch": 0.41958444172837733, + "kl_loss": 0.24338899552822113, + "loss_ib": 0.007654793094843626, + "step": 1459 + }, + { + "ce_ib": 4.7413010597229, + "ce_orig": 0.7294593453407288, + "epoch": 0.41958444172837733, + "kl_loss": 0.2376450002193451, + "loss_ib": 0.007117751054465771, + "step": 1459 + }, + { + "epoch": 0.41987202530735496, + "grad_norm": 0.1294163465499878, + "learning_rate": 9.69015032215344e-06, + "loss": 0.8336, + "step": 1460 + }, + { + "ce_ib": 4.186010837554932, + "ce_orig": 0.7451074123382568, + "epoch": 0.41987202530735496, + "kl_loss": 0.16504120826721191, + "loss_ib": 0.005836423020809889, + "step": 1460 + }, + { + "ce_ib": 5.1512322425842285, + "ce_orig": 0.4749121367931366, + "epoch": 0.41987202530735496, + "kl_loss": 0.3345229923725128, + "loss_ib": 0.008496462367475033, + "step": 1460 + }, + { + "ce_ib": 6.151814937591553, + "ce_orig": 0.7117960453033447, + "epoch": 0.41987202530735496, + "kl_loss": 0.22646743059158325, + "loss_ib": 0.008416488766670227, + "step": 1460 + }, + { + "ce_ib": 6.976144313812256, + "ce_orig": 1.4999809265136719, + "epoch": 0.41987202530735496, + "kl_loss": 0.18899531662464142, + "loss_ib": 0.00886609684675932, + "step": 1460 + }, + { + "ce_ib": 2.479775905609131, + "ce_orig": 0.27685609459877014, + "epoch": 0.4201596088863326, + "kl_loss": 0.7138643264770508, + "loss_ib": 0.009618419222533703, + "step": 1461 + }, + { + "ce_ib": 5.140337944030762, + "ce_orig": 0.6391001343727112, + "epoch": 0.4201596088863326, + "kl_loss": 0.2612397074699402, + "loss_ib": 0.007752734702080488, + "step": 1461 + }, + { + "ce_ib": 3.4452733993530273, + "ce_orig": 0.5357054471969604, + "epoch": 0.4201596088863326, + "kl_loss": 0.24114762246608734, + "loss_ib": 0.005856749136000872, + "step": 1461 + }, + { + "ce_ib": 4.0398712158203125, + "ce_orig": 0.6190880537033081, + "epoch": 0.4201596088863326, + "kl_loss": 0.28476786613464355, + "loss_ib": 0.006887550000101328, + "step": 1461 + }, + { + "ce_ib": 6.414108753204346, + "ce_orig": 1.0519330501556396, + "epoch": 0.4204471924653102, + "kl_loss": 0.23815977573394775, + "loss_ib": 0.008795706555247307, + "step": 1462 + }, + { + "ce_ib": 4.921961784362793, + "ce_orig": 0.7752912044525146, + "epoch": 0.4204471924653102, + "kl_loss": 0.21857008337974548, + "loss_ib": 0.0071076625026762486, + "step": 1462 + }, + { + "ce_ib": 3.1480214595794678, + "ce_orig": 0.6203112602233887, + "epoch": 0.4204471924653102, + "kl_loss": 0.23576763272285461, + "loss_ib": 0.005505697801709175, + "step": 1462 + }, + { + "ce_ib": 7.7558770179748535, + "ce_orig": 1.295670986175537, + "epoch": 0.4204471924653102, + "kl_loss": 0.2446509599685669, + "loss_ib": 0.010202386416494846, + "step": 1462 + }, + { + "ce_ib": 8.119604110717773, + "ce_orig": 1.3207716941833496, + "epoch": 0.4207347760442879, + "kl_loss": 0.2782466411590576, + "loss_ib": 0.010902070440351963, + "step": 1463 + }, + { + "ce_ib": 5.560944080352783, + "ce_orig": 0.8067669868469238, + "epoch": 0.4207347760442879, + "kl_loss": 0.33775997161865234, + "loss_ib": 0.008938543498516083, + "step": 1463 + }, + { + "ce_ib": 9.663983345031738, + "ce_orig": 0.9008134603500366, + "epoch": 0.4207347760442879, + "kl_loss": 0.4337159991264343, + "loss_ib": 0.014001142233610153, + "step": 1463 + }, + { + "ce_ib": 1.857756495475769, + "ce_orig": 0.36369869112968445, + "epoch": 0.4207347760442879, + "kl_loss": 0.12271080166101456, + "loss_ib": 0.003084864467382431, + "step": 1463 + }, + { + "ce_ib": 6.856328964233398, + "ce_orig": 1.1367650032043457, + "epoch": 0.4210223596232655, + "kl_loss": 0.26799026131629944, + "loss_ib": 0.009536231867969036, + "step": 1464 + }, + { + "ce_ib": 5.147492408752441, + "ce_orig": 0.46571192145347595, + "epoch": 0.4210223596232655, + "kl_loss": 0.3239108920097351, + "loss_ib": 0.008386600762605667, + "step": 1464 + }, + { + "ce_ib": 5.288569450378418, + "ce_orig": 0.36156103014945984, + "epoch": 0.4210223596232655, + "kl_loss": 0.27096402645111084, + "loss_ib": 0.007998209446668625, + "step": 1464 + }, + { + "ce_ib": 4.390820503234863, + "ce_orig": 0.6171783804893494, + "epoch": 0.4210223596232655, + "kl_loss": 0.18023133277893066, + "loss_ib": 0.006193133536726236, + "step": 1464 + }, + { + "epoch": 0.42130994320224313, + "grad_norm": 0.10051855444908142, + "learning_rate": 9.687455120159808e-06, + "loss": 0.8689, + "step": 1465 + }, + { + "ce_ib": 7.877378463745117, + "ce_orig": 1.2540488243103027, + "epoch": 0.42130994320224313, + "kl_loss": 0.1941794902086258, + "loss_ib": 0.009819173254072666, + "step": 1465 + }, + { + "ce_ib": 9.131965637207031, + "ce_orig": 0.6505073308944702, + "epoch": 0.42130994320224313, + "kl_loss": 0.2972337007522583, + "loss_ib": 0.012104302644729614, + "step": 1465 + }, + { + "ce_ib": 7.370760917663574, + "ce_orig": 1.3000590801239014, + "epoch": 0.42130994320224313, + "kl_loss": 0.19257795810699463, + "loss_ib": 0.009296540170907974, + "step": 1465 + }, + { + "ce_ib": 5.004774570465088, + "ce_orig": 0.4962858259677887, + "epoch": 0.42130994320224313, + "kl_loss": 0.18925777077674866, + "loss_ib": 0.006897352635860443, + "step": 1465 + }, + { + "ce_ib": 2.390171527862549, + "ce_orig": 0.26865100860595703, + "epoch": 0.4215975267812208, + "kl_loss": 0.5247079133987427, + "loss_ib": 0.0076372502371668816, + "step": 1466 + }, + { + "ce_ib": 3.2409467697143555, + "ce_orig": 0.5981196165084839, + "epoch": 0.4215975267812208, + "kl_loss": 0.1973566710948944, + "loss_ib": 0.0052145132794976234, + "step": 1466 + }, + { + "ce_ib": 5.385571479797363, + "ce_orig": 1.052996039390564, + "epoch": 0.4215975267812208, + "kl_loss": 0.22852903604507446, + "loss_ib": 0.0076708621345460415, + "step": 1466 + }, + { + "ce_ib": 10.688210487365723, + "ce_orig": 1.7798447608947754, + "epoch": 0.4215975267812208, + "kl_loss": 0.45732739567756653, + "loss_ib": 0.015261484310030937, + "step": 1466 + }, + { + "ce_ib": 7.756859302520752, + "ce_orig": 0.7536134719848633, + "epoch": 0.42188511036019843, + "kl_loss": 0.35317015647888184, + "loss_ib": 0.01128856185823679, + "step": 1467 + }, + { + "ce_ib": 4.976874828338623, + "ce_orig": 0.6887327432632446, + "epoch": 0.42188511036019843, + "kl_loss": 0.3163662552833557, + "loss_ib": 0.00814053788781166, + "step": 1467 + }, + { + "ce_ib": 8.146329879760742, + "ce_orig": 0.9967268705368042, + "epoch": 0.42188511036019843, + "kl_loss": 0.21219246089458466, + "loss_ib": 0.010268254205584526, + "step": 1467 + }, + { + "ce_ib": 4.826017379760742, + "ce_orig": 0.3177625238895416, + "epoch": 0.42188511036019843, + "kl_loss": 0.3488796055316925, + "loss_ib": 0.008314813487231731, + "step": 1467 + }, + { + "ce_ib": 7.634398460388184, + "ce_orig": 1.1551445722579956, + "epoch": 0.42217269393917606, + "kl_loss": 0.2153216451406479, + "loss_ib": 0.009787614457309246, + "step": 1468 + }, + { + "ce_ib": 7.4884843826293945, + "ce_orig": 0.9426076412200928, + "epoch": 0.42217269393917606, + "kl_loss": 0.34704655408859253, + "loss_ib": 0.010958950035274029, + "step": 1468 + }, + { + "ce_ib": 4.062495231628418, + "ce_orig": 0.3596497178077698, + "epoch": 0.42217269393917606, + "kl_loss": 0.4352097511291504, + "loss_ib": 0.00841459259390831, + "step": 1468 + }, + { + "ce_ib": 2.4920849800109863, + "ce_orig": 0.40482640266418457, + "epoch": 0.42217269393917606, + "kl_loss": 0.5243197679519653, + "loss_ib": 0.007735282648354769, + "step": 1468 + }, + { + "ce_ib": 5.229053974151611, + "ce_orig": 0.8194414973258972, + "epoch": 0.42246027751815374, + "kl_loss": 0.2739192247390747, + "loss_ib": 0.007968246005475521, + "step": 1469 + }, + { + "ce_ib": 6.554534912109375, + "ce_orig": 0.7452446222305298, + "epoch": 0.42246027751815374, + "kl_loss": 0.2899870276451111, + "loss_ib": 0.009454404935240746, + "step": 1469 + }, + { + "ce_ib": 6.135105609893799, + "ce_orig": 0.7410409450531006, + "epoch": 0.42246027751815374, + "kl_loss": 0.23898279666900635, + "loss_ib": 0.008524932898581028, + "step": 1469 + }, + { + "ce_ib": 4.908745765686035, + "ce_orig": 0.4362742006778717, + "epoch": 0.42246027751815374, + "kl_loss": 0.6675567626953125, + "loss_ib": 0.011584312655031681, + "step": 1469 + }, + { + "epoch": 0.42274786109713136, + "grad_norm": 0.10823974758386612, + "learning_rate": 9.68474862499881e-06, + "loss": 0.8345, + "step": 1470 + }, + { + "ce_ib": 8.23246955871582, + "ce_orig": 1.1660966873168945, + "epoch": 0.42274786109713136, + "kl_loss": 0.2664303183555603, + "loss_ib": 0.01089677307754755, + "step": 1470 + }, + { + "ce_ib": 3.9345450401306152, + "ce_orig": 0.4807896018028259, + "epoch": 0.42274786109713136, + "kl_loss": 0.2337619513273239, + "loss_ib": 0.006272164639085531, + "step": 1470 + }, + { + "ce_ib": 9.054957389831543, + "ce_orig": 1.2980424165725708, + "epoch": 0.42274786109713136, + "kl_loss": 0.2841121256351471, + "loss_ib": 0.01189607847481966, + "step": 1470 + }, + { + "ce_ib": 5.502397537231445, + "ce_orig": 0.7632869482040405, + "epoch": 0.42274786109713136, + "kl_loss": 0.18508179485797882, + "loss_ib": 0.007353215012699366, + "step": 1470 + }, + { + "ce_ib": 7.207784652709961, + "ce_orig": 1.1374162435531616, + "epoch": 0.423035444676109, + "kl_loss": 0.27467674016952515, + "loss_ib": 0.009954552166163921, + "step": 1471 + }, + { + "ce_ib": 8.317543029785156, + "ce_orig": 1.4949835538864136, + "epoch": 0.423035444676109, + "kl_loss": 0.21355046331882477, + "loss_ib": 0.01045304723083973, + "step": 1471 + }, + { + "ce_ib": 3.975776195526123, + "ce_orig": 0.4850504994392395, + "epoch": 0.423035444676109, + "kl_loss": 0.3246009349822998, + "loss_ib": 0.007221785839647055, + "step": 1471 + }, + { + "ce_ib": 9.7860689163208, + "ce_orig": 1.5497608184814453, + "epoch": 0.423035444676109, + "kl_loss": 0.30065202713012695, + "loss_ib": 0.012792589142918587, + "step": 1471 + }, + { + "ce_ib": 4.456712245941162, + "ce_orig": 0.8147068619728088, + "epoch": 0.4233230282550866, + "kl_loss": 0.27602076530456543, + "loss_ib": 0.007216919679194689, + "step": 1472 + }, + { + "ce_ib": 7.215217113494873, + "ce_orig": 0.9568760991096497, + "epoch": 0.4233230282550866, + "kl_loss": 0.21230699121952057, + "loss_ib": 0.009338286705315113, + "step": 1472 + }, + { + "ce_ib": 6.054652690887451, + "ce_orig": 0.6687721610069275, + "epoch": 0.4233230282550866, + "kl_loss": 0.33765116333961487, + "loss_ib": 0.009431163780391216, + "step": 1472 + }, + { + "ce_ib": 6.395743370056152, + "ce_orig": 0.6408840417861938, + "epoch": 0.4233230282550866, + "kl_loss": 0.31022149324417114, + "loss_ib": 0.009497958235442638, + "step": 1472 + }, + { + "ce_ib": 4.3919901847839355, + "ce_orig": 0.7670180797576904, + "epoch": 0.4236106118340643, + "kl_loss": 0.18993571400642395, + "loss_ib": 0.0062913475558161736, + "step": 1473 + }, + { + "ce_ib": 3.2439560890197754, + "ce_orig": 0.4134061336517334, + "epoch": 0.4236106118340643, + "kl_loss": 0.23562690615653992, + "loss_ib": 0.005600225180387497, + "step": 1473 + }, + { + "ce_ib": 5.886128902435303, + "ce_orig": 0.5026991963386536, + "epoch": 0.4236106118340643, + "kl_loss": 0.39356550574302673, + "loss_ib": 0.009821784682571888, + "step": 1473 + }, + { + "ce_ib": 10.143879890441895, + "ce_orig": 1.480689525604248, + "epoch": 0.4236106118340643, + "kl_loss": 0.2073626071214676, + "loss_ib": 0.0122175058349967, + "step": 1473 + }, + { + "ce_ib": 7.02231502532959, + "ce_orig": 0.5753958225250244, + "epoch": 0.4238981954130419, + "kl_loss": 0.6799865961074829, + "loss_ib": 0.013822181150317192, + "step": 1474 + }, + { + "ce_ib": 4.180866718292236, + "ce_orig": 0.7731302380561829, + "epoch": 0.4238981954130419, + "kl_loss": 0.18818950653076172, + "loss_ib": 0.006062761414796114, + "step": 1474 + }, + { + "ce_ib": 6.477175235748291, + "ce_orig": 0.8291592597961426, + "epoch": 0.4238981954130419, + "kl_loss": 0.2998458743095398, + "loss_ib": 0.00947563350200653, + "step": 1474 + }, + { + "ce_ib": 6.95041561126709, + "ce_orig": 0.47487786412239075, + "epoch": 0.4238981954130419, + "kl_loss": 0.6084232330322266, + "loss_ib": 0.013034648261964321, + "step": 1474 + }, + { + "epoch": 0.42418577899201954, + "grad_norm": 0.11552698165178299, + "learning_rate": 9.682030843191021e-06, + "loss": 0.9039, + "step": 1475 + }, + { + "ce_ib": 7.451219081878662, + "ce_orig": 1.3754565715789795, + "epoch": 0.42418577899201954, + "kl_loss": 0.26375889778137207, + "loss_ib": 0.01008880790323019, + "step": 1475 + }, + { + "ce_ib": 5.691998481750488, + "ce_orig": 0.6472443342208862, + "epoch": 0.42418577899201954, + "kl_loss": 0.23740312457084656, + "loss_ib": 0.008066029287874699, + "step": 1475 + }, + { + "ce_ib": 6.195897102355957, + "ce_orig": 0.9898815751075745, + "epoch": 0.42418577899201954, + "kl_loss": 0.21684029698371887, + "loss_ib": 0.0083643002435565, + "step": 1475 + }, + { + "ce_ib": 5.425847053527832, + "ce_orig": 0.5178442597389221, + "epoch": 0.42418577899201954, + "kl_loss": 0.2924429178237915, + "loss_ib": 0.00835027638822794, + "step": 1475 + }, + { + "ce_ib": 7.3904805183410645, + "ce_orig": 0.770089864730835, + "epoch": 0.4244733625709972, + "kl_loss": 0.21272428333759308, + "loss_ib": 0.009517722763121128, + "step": 1476 + }, + { + "ce_ib": 5.064784049987793, + "ce_orig": 0.6370176076889038, + "epoch": 0.4244733625709972, + "kl_loss": 0.28311437368392944, + "loss_ib": 0.007895927876234055, + "step": 1476 + }, + { + "ce_ib": 9.224869728088379, + "ce_orig": 1.5204010009765625, + "epoch": 0.4244733625709972, + "kl_loss": 0.30629977583885193, + "loss_ib": 0.0122878672555089, + "step": 1476 + }, + { + "ce_ib": 9.436309814453125, + "ce_orig": 1.3387373685836792, + "epoch": 0.4244733625709972, + "kl_loss": 0.2948681116104126, + "loss_ib": 0.01238499116152525, + "step": 1476 + }, + { + "ce_ib": 3.8061373233795166, + "ce_orig": 0.6259332895278931, + "epoch": 0.42476094614997484, + "kl_loss": 0.2015649676322937, + "loss_ib": 0.005821786820888519, + "step": 1477 + }, + { + "ce_ib": 6.14137077331543, + "ce_orig": 0.7257814407348633, + "epoch": 0.42476094614997484, + "kl_loss": 0.2847587466239929, + "loss_ib": 0.008988958783447742, + "step": 1477 + }, + { + "ce_ib": 9.099712371826172, + "ce_orig": 1.2262767553329468, + "epoch": 0.42476094614997484, + "kl_loss": 0.21662941575050354, + "loss_ib": 0.011266006156802177, + "step": 1477 + }, + { + "ce_ib": 4.089813232421875, + "ce_orig": 1.0098782777786255, + "epoch": 0.42476094614997484, + "kl_loss": 0.17735642194747925, + "loss_ib": 0.00586337735876441, + "step": 1477 + }, + { + "ce_ib": 7.239267826080322, + "ce_orig": 1.438734769821167, + "epoch": 0.42504852972895246, + "kl_loss": 0.20792442560195923, + "loss_ib": 0.009318511933088303, + "step": 1478 + }, + { + "ce_ib": 6.3115386962890625, + "ce_orig": 0.6404736042022705, + "epoch": 0.42504852972895246, + "kl_loss": 0.3348001539707184, + "loss_ib": 0.009659540839493275, + "step": 1478 + }, + { + "ce_ib": 7.504519939422607, + "ce_orig": 0.7322005033493042, + "epoch": 0.42504852972895246, + "kl_loss": 0.2848302721977234, + "loss_ib": 0.01035282202064991, + "step": 1478 + }, + { + "ce_ib": 5.972691535949707, + "ce_orig": 0.8413686752319336, + "epoch": 0.42504852972895246, + "kl_loss": 0.21566344797611237, + "loss_ib": 0.008129325695335865, + "step": 1478 + }, + { + "ce_ib": 8.528627395629883, + "ce_orig": 1.0025306940078735, + "epoch": 0.42533611330793014, + "kl_loss": 0.2139550894498825, + "loss_ib": 0.010668179020285606, + "step": 1479 + }, + { + "ce_ib": 3.167024612426758, + "ce_orig": 0.45949411392211914, + "epoch": 0.42533611330793014, + "kl_loss": 0.43662020564079285, + "loss_ib": 0.007533226627856493, + "step": 1479 + }, + { + "ce_ib": 5.232333660125732, + "ce_orig": 0.21036502718925476, + "epoch": 0.42533611330793014, + "kl_loss": 0.4534093141555786, + "loss_ib": 0.009766426868736744, + "step": 1479 + }, + { + "ce_ib": 4.0448899269104, + "ce_orig": 0.6088986396789551, + "epoch": 0.42533611330793014, + "kl_loss": 0.15418598055839539, + "loss_ib": 0.005586749874055386, + "step": 1479 + }, + { + "epoch": 0.42562369688690777, + "grad_norm": 0.11320596933364868, + "learning_rate": 9.679301781284209e-06, + "loss": 0.881, + "step": 1480 + }, + { + "ce_ib": 6.253572463989258, + "ce_orig": 0.9887263178825378, + "epoch": 0.42562369688690777, + "kl_loss": 0.25370287895202637, + "loss_ib": 0.008790601044893265, + "step": 1480 + }, + { + "ce_ib": 6.013632297515869, + "ce_orig": 0.6574453115463257, + "epoch": 0.42562369688690777, + "kl_loss": 0.3245493173599243, + "loss_ib": 0.009259125217795372, + "step": 1480 + }, + { + "ce_ib": 6.525053977966309, + "ce_orig": 1.4866364002227783, + "epoch": 0.42562369688690777, + "kl_loss": 0.1928248554468155, + "loss_ib": 0.008453302085399628, + "step": 1480 + }, + { + "ce_ib": 8.524917602539062, + "ce_orig": 1.3874448537826538, + "epoch": 0.42562369688690777, + "kl_loss": 0.20940372347831726, + "loss_ib": 0.01061895489692688, + "step": 1480 + }, + { + "ce_ib": 7.312863349914551, + "ce_orig": 0.865240216255188, + "epoch": 0.4259112804658854, + "kl_loss": 0.5077807307243347, + "loss_ib": 0.012390670366585255, + "step": 1481 + }, + { + "ce_ib": 7.382387161254883, + "ce_orig": 1.1883589029312134, + "epoch": 0.4259112804658854, + "kl_loss": 0.33302438259124756, + "loss_ib": 0.010712630115449429, + "step": 1481 + }, + { + "ce_ib": 5.761624336242676, + "ce_orig": 0.8007732033729553, + "epoch": 0.4259112804658854, + "kl_loss": 0.19490881264209747, + "loss_ib": 0.007710712496191263, + "step": 1481 + }, + { + "ce_ib": 5.854297161102295, + "ce_orig": 0.90272456407547, + "epoch": 0.4259112804658854, + "kl_loss": 0.2707770764827728, + "loss_ib": 0.008562067523598671, + "step": 1481 + }, + { + "ce_ib": 9.733773231506348, + "ce_orig": 1.9829741716384888, + "epoch": 0.426198864044863, + "kl_loss": 0.2391510009765625, + "loss_ib": 0.012125283479690552, + "step": 1482 + }, + { + "ce_ib": 6.117833614349365, + "ce_orig": 0.9634227752685547, + "epoch": 0.426198864044863, + "kl_loss": 0.249215230345726, + "loss_ib": 0.008609985932707787, + "step": 1482 + }, + { + "ce_ib": 6.840470790863037, + "ce_orig": 0.9898148775100708, + "epoch": 0.426198864044863, + "kl_loss": 0.25602009892463684, + "loss_ib": 0.009400671347975731, + "step": 1482 + }, + { + "ce_ib": 2.724457025527954, + "ce_orig": 0.2802466154098511, + "epoch": 0.426198864044863, + "kl_loss": 0.453813761472702, + "loss_ib": 0.007262594532221556, + "step": 1482 + }, + { + "ce_ib": 4.271263599395752, + "ce_orig": 1.0552265644073486, + "epoch": 0.4264864476238407, + "kl_loss": 0.18592074513435364, + "loss_ib": 0.006130470894277096, + "step": 1483 + }, + { + "ce_ib": 3.728668689727783, + "ce_orig": 0.5460267663002014, + "epoch": 0.4264864476238407, + "kl_loss": 0.3072531223297119, + "loss_ib": 0.00680119963362813, + "step": 1483 + }, + { + "ce_ib": 5.636871814727783, + "ce_orig": 0.8854542374610901, + "epoch": 0.4264864476238407, + "kl_loss": 0.20121414959430695, + "loss_ib": 0.007649013306945562, + "step": 1483 + }, + { + "ce_ib": 5.380930423736572, + "ce_orig": 0.7147185802459717, + "epoch": 0.4264864476238407, + "kl_loss": 0.29215019941329956, + "loss_ib": 0.008302432484924793, + "step": 1483 + }, + { + "ce_ib": 4.051311016082764, + "ce_orig": 0.7806766033172607, + "epoch": 0.4267740312028183, + "kl_loss": 0.2124352753162384, + "loss_ib": 0.006175663322210312, + "step": 1484 + }, + { + "ce_ib": 6.189905643463135, + "ce_orig": 0.7869464159011841, + "epoch": 0.4267740312028183, + "kl_loss": 0.33432358503341675, + "loss_ib": 0.009533141739666462, + "step": 1484 + }, + { + "ce_ib": 4.387548446655273, + "ce_orig": 1.0712177753448486, + "epoch": 0.4267740312028183, + "kl_loss": 0.16804896295070648, + "loss_ib": 0.006068038288503885, + "step": 1484 + }, + { + "ce_ib": 5.980642318725586, + "ce_orig": 1.0337541103363037, + "epoch": 0.4267740312028183, + "kl_loss": 0.27346473932266235, + "loss_ib": 0.008715289644896984, + "step": 1484 + }, + { + "epoch": 0.42706161478179594, + "grad_norm": 0.1281713843345642, + "learning_rate": 9.676561445853317e-06, + "loss": 0.9568, + "step": 1485 + }, + { + "ce_ib": 9.559850692749023, + "ce_orig": 1.2330940961837769, + "epoch": 0.42706161478179594, + "kl_loss": 0.26453912258148193, + "loss_ib": 0.012205241248011589, + "step": 1485 + }, + { + "ce_ib": 10.209794998168945, + "ce_orig": 0.9033480882644653, + "epoch": 0.42706161478179594, + "kl_loss": 0.33303964138031006, + "loss_ib": 0.013540191575884819, + "step": 1485 + }, + { + "ce_ib": 6.278723239898682, + "ce_orig": 1.024091124534607, + "epoch": 0.42706161478179594, + "kl_loss": 0.2991013526916504, + "loss_ib": 0.00926973670721054, + "step": 1485 + }, + { + "ce_ib": 4.2076263427734375, + "ce_orig": 0.4415350556373596, + "epoch": 0.42706161478179594, + "kl_loss": 0.1996149867773056, + "loss_ib": 0.006203775759786367, + "step": 1485 + }, + { + "ce_ib": 3.6215503215789795, + "ce_orig": 0.695202648639679, + "epoch": 0.4273491983607736, + "kl_loss": 0.23646116256713867, + "loss_ib": 0.005986162461340427, + "step": 1486 + }, + { + "ce_ib": 3.97869873046875, + "ce_orig": 0.602679431438446, + "epoch": 0.4273491983607736, + "kl_loss": 0.19544193148612976, + "loss_ib": 0.005933117587119341, + "step": 1486 + }, + { + "ce_ib": 3.666558265686035, + "ce_orig": 0.7779607772827148, + "epoch": 0.4273491983607736, + "kl_loss": 0.31298935413360596, + "loss_ib": 0.00679645175114274, + "step": 1486 + }, + { + "ce_ib": 4.802906036376953, + "ce_orig": 0.4945107400417328, + "epoch": 0.4273491983607736, + "kl_loss": 0.3955551087856293, + "loss_ib": 0.008758457377552986, + "step": 1486 + }, + { + "ce_ib": 3.2569401264190674, + "ce_orig": 0.7755473256111145, + "epoch": 0.42763678193975124, + "kl_loss": 0.18514487147331238, + "loss_ib": 0.005108388606458902, + "step": 1487 + }, + { + "ce_ib": 3.5347471237182617, + "ce_orig": 0.6426426768302917, + "epoch": 0.42763678193975124, + "kl_loss": 0.24189895391464233, + "loss_ib": 0.005953736137598753, + "step": 1487 + }, + { + "ce_ib": 3.0833957195281982, + "ce_orig": 0.3376903831958771, + "epoch": 0.42763678193975124, + "kl_loss": 0.5483517646789551, + "loss_ib": 0.008566913194954395, + "step": 1487 + }, + { + "ce_ib": 7.679892539978027, + "ce_orig": 1.2606022357940674, + "epoch": 0.42763678193975124, + "kl_loss": 0.23624634742736816, + "loss_ib": 0.010042356327176094, + "step": 1487 + }, + { + "ce_ib": 7.427282333374023, + "ce_orig": 0.9086766242980957, + "epoch": 0.42792436551872887, + "kl_loss": 0.2795743942260742, + "loss_ib": 0.010223026387393475, + "step": 1488 + }, + { + "ce_ib": 7.853158950805664, + "ce_orig": 0.7320258617401123, + "epoch": 0.42792436551872887, + "kl_loss": 0.2966246008872986, + "loss_ib": 0.010819405317306519, + "step": 1488 + }, + { + "ce_ib": 5.303369522094727, + "ce_orig": 1.0770010948181152, + "epoch": 0.42792436551872887, + "kl_loss": 0.16358917951583862, + "loss_ib": 0.006939261220395565, + "step": 1488 + }, + { + "ce_ib": 9.29581069946289, + "ce_orig": 1.4535112380981445, + "epoch": 0.42792436551872887, + "kl_loss": 0.3460651636123657, + "loss_ib": 0.012756462208926678, + "step": 1488 + }, + { + "ce_ib": 4.586825847625732, + "ce_orig": 0.6064006686210632, + "epoch": 0.42821194909770655, + "kl_loss": 0.2779026925563812, + "loss_ib": 0.00736585259437561, + "step": 1489 + }, + { + "ce_ib": 7.388415813446045, + "ce_orig": 1.4265230894088745, + "epoch": 0.42821194909770655, + "kl_loss": 0.1808740347623825, + "loss_ib": 0.009197155945003033, + "step": 1489 + }, + { + "ce_ib": 6.708005428314209, + "ce_orig": 0.8520740270614624, + "epoch": 0.42821194909770655, + "kl_loss": 0.17089973390102386, + "loss_ib": 0.008417002856731415, + "step": 1489 + }, + { + "ce_ib": 3.947643756866455, + "ce_orig": 0.5688271522521973, + "epoch": 0.42821194909770655, + "kl_loss": 0.22878821194171906, + "loss_ib": 0.006235525943338871, + "step": 1489 + }, + { + "epoch": 0.42849953267668417, + "grad_norm": 0.1272149682044983, + "learning_rate": 9.673809843500447e-06, + "loss": 0.8415, + "step": 1490 + }, + { + "ce_ib": 5.999438762664795, + "ce_orig": 1.1109205484390259, + "epoch": 0.42849953267668417, + "kl_loss": 0.23863765597343445, + "loss_ib": 0.008385815657675266, + "step": 1490 + }, + { + "ce_ib": 5.141195297241211, + "ce_orig": 0.8208162784576416, + "epoch": 0.42849953267668417, + "kl_loss": 0.17496593296527863, + "loss_ib": 0.006890854798257351, + "step": 1490 + }, + { + "ce_ib": 3.8230690956115723, + "ce_orig": 0.33701616525650024, + "epoch": 0.42849953267668417, + "kl_loss": 0.18180124461650848, + "loss_ib": 0.005641081370413303, + "step": 1490 + }, + { + "ce_ib": 4.297886371612549, + "ce_orig": 0.5608041286468506, + "epoch": 0.42849953267668417, + "kl_loss": 0.18302598595619202, + "loss_ib": 0.0061281463131308556, + "step": 1490 + }, + { + "ce_ib": 5.28217887878418, + "ce_orig": 0.8655717968940735, + "epoch": 0.4287871162556618, + "kl_loss": 0.2272111475467682, + "loss_ib": 0.007554290350526571, + "step": 1491 + }, + { + "ce_ib": 6.0911431312561035, + "ce_orig": 1.2214902639389038, + "epoch": 0.4287871162556618, + "kl_loss": 0.24875463545322418, + "loss_ib": 0.008578689768910408, + "step": 1491 + }, + { + "ce_ib": 4.257596969604492, + "ce_orig": 0.6725324988365173, + "epoch": 0.4287871162556618, + "kl_loss": 0.19750872254371643, + "loss_ib": 0.006232684012502432, + "step": 1491 + }, + { + "ce_ib": 4.0605340003967285, + "ce_orig": 0.8823887705802917, + "epoch": 0.4287871162556618, + "kl_loss": 0.21200858056545258, + "loss_ib": 0.006180619355291128, + "step": 1491 + }, + { + "ce_ib": 4.476174831390381, + "ce_orig": 1.0363364219665527, + "epoch": 0.4290746998346394, + "kl_loss": 0.18386420607566833, + "loss_ib": 0.006314816884696484, + "step": 1492 + }, + { + "ce_ib": 5.4428510665893555, + "ce_orig": 0.6156265139579773, + "epoch": 0.4290746998346394, + "kl_loss": 0.19407202303409576, + "loss_ib": 0.00738357100635767, + "step": 1492 + }, + { + "ce_ib": 8.190139770507812, + "ce_orig": 1.3100130558013916, + "epoch": 0.4290746998346394, + "kl_loss": 0.4860832095146179, + "loss_ib": 0.013050971552729607, + "step": 1492 + }, + { + "ce_ib": 6.034806728363037, + "ce_orig": 0.3907848298549652, + "epoch": 0.4290746998346394, + "kl_loss": 0.249636709690094, + "loss_ib": 0.008531173691153526, + "step": 1492 + }, + { + "ce_ib": 7.136789798736572, + "ce_orig": 0.9948228001594543, + "epoch": 0.4293622834136171, + "kl_loss": 0.494230180978775, + "loss_ib": 0.012079091742634773, + "step": 1493 + }, + { + "ce_ib": 4.873354434967041, + "ce_orig": 0.7475965023040771, + "epoch": 0.4293622834136171, + "kl_loss": 0.19573433697223663, + "loss_ib": 0.006830697413533926, + "step": 1493 + }, + { + "ce_ib": 8.23890209197998, + "ce_orig": 0.5245663523674011, + "epoch": 0.4293622834136171, + "kl_loss": 0.23461030423641205, + "loss_ib": 0.010585005395114422, + "step": 1493 + }, + { + "ce_ib": 4.986420154571533, + "ce_orig": 0.6100597977638245, + "epoch": 0.4293622834136171, + "kl_loss": 0.25687411427497864, + "loss_ib": 0.007555161602795124, + "step": 1493 + }, + { + "ce_ib": 5.936038017272949, + "ce_orig": 1.0116311311721802, + "epoch": 0.4296498669925947, + "kl_loss": 0.18465059995651245, + "loss_ib": 0.007782543543726206, + "step": 1494 + }, + { + "ce_ib": 2.660158634185791, + "ce_orig": 0.4223203659057617, + "epoch": 0.4296498669925947, + "kl_loss": 0.2522263526916504, + "loss_ib": 0.005182422231882811, + "step": 1494 + }, + { + "ce_ib": 3.642902374267578, + "ce_orig": 0.3541110157966614, + "epoch": 0.4296498669925947, + "kl_loss": 0.18501171469688416, + "loss_ib": 0.005493019707500935, + "step": 1494 + }, + { + "ce_ib": 2.7317256927490234, + "ce_orig": 0.6133846044540405, + "epoch": 0.4296498669925947, + "kl_loss": 0.15653100609779358, + "loss_ib": 0.004297035746276379, + "step": 1494 + }, + { + "epoch": 0.42993745057157234, + "grad_norm": 0.11513698846101761, + "learning_rate": 9.67104698085485e-06, + "loss": 0.8492, + "step": 1495 + }, + { + "ce_ib": 5.127799034118652, + "ce_orig": 0.5826548933982849, + "epoch": 0.42993745057157234, + "kl_loss": 0.18143674731254578, + "loss_ib": 0.006942166015505791, + "step": 1495 + }, + { + "ce_ib": 3.7079544067382812, + "ce_orig": 0.5327990055084229, + "epoch": 0.42993745057157234, + "kl_loss": 0.15200555324554443, + "loss_ib": 0.005228010471910238, + "step": 1495 + }, + { + "ce_ib": 7.367830276489258, + "ce_orig": 0.8680596947669983, + "epoch": 0.42993745057157234, + "kl_loss": 0.2782801687717438, + "loss_ib": 0.01015063188970089, + "step": 1495 + }, + { + "ce_ib": 6.355778694152832, + "ce_orig": 1.0745817422866821, + "epoch": 0.42993745057157234, + "kl_loss": 0.1733226329088211, + "loss_ib": 0.008089005015790462, + "step": 1495 + }, + { + "ce_ib": 7.89412260055542, + "ce_orig": 1.3843724727630615, + "epoch": 0.43022503415055, + "kl_loss": 0.28065750002861023, + "loss_ib": 0.010700698010623455, + "step": 1496 + }, + { + "ce_ib": 10.201616287231445, + "ce_orig": 1.7674388885498047, + "epoch": 0.43022503415055, + "kl_loss": 0.2781248390674591, + "loss_ib": 0.012982863932847977, + "step": 1496 + }, + { + "ce_ib": 5.998897075653076, + "ce_orig": 0.6868672966957092, + "epoch": 0.43022503415055, + "kl_loss": 0.37494516372680664, + "loss_ib": 0.009748348034918308, + "step": 1496 + }, + { + "ce_ib": 6.706963062286377, + "ce_orig": 0.680381178855896, + "epoch": 0.43022503415055, + "kl_loss": 0.24509286880493164, + "loss_ib": 0.009157891385257244, + "step": 1496 + }, + { + "ce_ib": 9.054398536682129, + "ce_orig": 1.5919907093048096, + "epoch": 0.43051261772952765, + "kl_loss": 0.25301802158355713, + "loss_ib": 0.011584578081965446, + "step": 1497 + }, + { + "ce_ib": 9.301456451416016, + "ce_orig": 1.3819289207458496, + "epoch": 0.43051261772952765, + "kl_loss": 0.23771429061889648, + "loss_ib": 0.011678599752485752, + "step": 1497 + }, + { + "ce_ib": 8.360661506652832, + "ce_orig": 1.6322040557861328, + "epoch": 0.43051261772952765, + "kl_loss": 0.24911445379257202, + "loss_ib": 0.010851806029677391, + "step": 1497 + }, + { + "ce_ib": 5.899044513702393, + "ce_orig": 1.0200097560882568, + "epoch": 0.43051261772952765, + "kl_loss": 0.21863295137882233, + "loss_ib": 0.008085373789072037, + "step": 1497 + }, + { + "ce_ib": 7.849880218505859, + "ce_orig": 1.2386505603790283, + "epoch": 0.43080020130850527, + "kl_loss": 0.31184014678001404, + "loss_ib": 0.010968281887471676, + "step": 1498 + }, + { + "ce_ib": 3.9608442783355713, + "ce_orig": 0.6503081321716309, + "epoch": 0.43080020130850527, + "kl_loss": 0.22978989779949188, + "loss_ib": 0.006258743349462748, + "step": 1498 + }, + { + "ce_ib": 6.0665717124938965, + "ce_orig": 1.1576424837112427, + "epoch": 0.43080020130850527, + "kl_loss": 0.21611745655536652, + "loss_ib": 0.00822774600237608, + "step": 1498 + }, + { + "ce_ib": 6.375781059265137, + "ce_orig": 1.1019394397735596, + "epoch": 0.43080020130850527, + "kl_loss": 0.20872078835964203, + "loss_ib": 0.008462988771498203, + "step": 1498 + }, + { + "ce_ib": 9.109877586364746, + "ce_orig": 1.6101223230361938, + "epoch": 0.43108778488748295, + "kl_loss": 0.28492850065231323, + "loss_ib": 0.011959162540733814, + "step": 1499 + }, + { + "ce_ib": 7.772365093231201, + "ce_orig": 1.3896774053573608, + "epoch": 0.43108778488748295, + "kl_loss": 0.21816033124923706, + "loss_ib": 0.009953968226909637, + "step": 1499 + }, + { + "ce_ib": 7.498557090759277, + "ce_orig": 0.6977278590202332, + "epoch": 0.43108778488748295, + "kl_loss": 0.4549769163131714, + "loss_ib": 0.012048325501382351, + "step": 1499 + }, + { + "ce_ib": 8.391149520874023, + "ce_orig": 1.1614909172058105, + "epoch": 0.43108778488748295, + "kl_loss": 0.2926523685455322, + "loss_ib": 0.011317672207951546, + "step": 1499 + }, + { + "epoch": 0.4313753684664606, + "grad_norm": 0.10857830196619034, + "learning_rate": 9.668272864572904e-06, + "loss": 0.9772, + "step": 1500 + }, + { + "ce_ib": 7.412587642669678, + "ce_orig": 1.3245453834533691, + "epoch": 0.4313753684664606, + "kl_loss": 0.19331267476081848, + "loss_ib": 0.009345714934170246, + "step": 1500 + }, + { + "ce_ib": 2.4376213550567627, + "ce_orig": 0.21778453886508942, + "epoch": 0.4313753684664606, + "kl_loss": 0.30824974179267883, + "loss_ib": 0.005520118400454521, + "step": 1500 + }, + { + "ce_ib": 4.506190776824951, + "ce_orig": 0.690782368183136, + "epoch": 0.4313753684664606, + "kl_loss": 0.22411122918128967, + "loss_ib": 0.006747303064912558, + "step": 1500 + }, + { + "ce_ib": 5.372386455535889, + "ce_orig": 0.811801016330719, + "epoch": 0.4313753684664606, + "kl_loss": 0.27325868606567383, + "loss_ib": 0.00810497347265482, + "step": 1500 + }, + { + "ce_ib": 5.54148006439209, + "ce_orig": 0.8415253162384033, + "epoch": 0.4316629520454382, + "kl_loss": 0.1796332150697708, + "loss_ib": 0.0073378118686378, + "step": 1501 + }, + { + "ce_ib": 7.4257049560546875, + "ce_orig": 0.7999733686447144, + "epoch": 0.4316629520454382, + "kl_loss": 0.2060936689376831, + "loss_ib": 0.009486641734838486, + "step": 1501 + }, + { + "ce_ib": 7.355204105377197, + "ce_orig": 1.4873985052108765, + "epoch": 0.4316629520454382, + "kl_loss": 0.1954437792301178, + "loss_ib": 0.009309642016887665, + "step": 1501 + }, + { + "ce_ib": 3.211336612701416, + "ce_orig": 0.5319896340370178, + "epoch": 0.4316629520454382, + "kl_loss": 0.1957024782896042, + "loss_ib": 0.005168361589312553, + "step": 1501 + }, + { + "ce_ib": 7.809789657592773, + "ce_orig": 1.3806283473968506, + "epoch": 0.4319505356244158, + "kl_loss": 0.4769303798675537, + "loss_ib": 0.012579092755913734, + "step": 1502 + }, + { + "ce_ib": 5.052849292755127, + "ce_orig": 0.748846173286438, + "epoch": 0.4319505356244158, + "kl_loss": 0.27402952313423157, + "loss_ib": 0.007793144788593054, + "step": 1502 + }, + { + "ce_ib": 5.5736083984375, + "ce_orig": 0.5355104207992554, + "epoch": 0.4319505356244158, + "kl_loss": 0.2576088607311249, + "loss_ib": 0.00814969651401043, + "step": 1502 + }, + { + "ce_ib": 9.92026424407959, + "ce_orig": 1.241144061088562, + "epoch": 0.4319505356244158, + "kl_loss": 0.4010234475135803, + "loss_ib": 0.01393049955368042, + "step": 1502 + }, + { + "ce_ib": 5.654511451721191, + "ce_orig": 1.0896109342575073, + "epoch": 0.4322381192033935, + "kl_loss": 0.19214342534542084, + "loss_ib": 0.00757594546303153, + "step": 1503 + }, + { + "ce_ib": 4.391484260559082, + "ce_orig": 0.6887181997299194, + "epoch": 0.4322381192033935, + "kl_loss": 0.2409554123878479, + "loss_ib": 0.006801038049161434, + "step": 1503 + }, + { + "ce_ib": 6.433215141296387, + "ce_orig": 0.7667641043663025, + "epoch": 0.4322381192033935, + "kl_loss": 0.3592512607574463, + "loss_ib": 0.010025727562606335, + "step": 1503 + }, + { + "ce_ib": 6.487468242645264, + "ce_orig": 1.3075002431869507, + "epoch": 0.4322381192033935, + "kl_loss": 0.2316405326128006, + "loss_ib": 0.00880387332290411, + "step": 1503 + }, + { + "ce_ib": 5.21145486831665, + "ce_orig": 0.6229283809661865, + "epoch": 0.4325257027823711, + "kl_loss": 0.2223101705312729, + "loss_ib": 0.0074345567263662815, + "step": 1504 + }, + { + "ce_ib": 7.79116153717041, + "ce_orig": 1.0141862630844116, + "epoch": 0.4325257027823711, + "kl_loss": 0.19813181459903717, + "loss_ib": 0.00977247953414917, + "step": 1504 + }, + { + "ce_ib": 6.594308853149414, + "ce_orig": 0.9326078295707703, + "epoch": 0.4325257027823711, + "kl_loss": 0.36497896909713745, + "loss_ib": 0.010244098491966724, + "step": 1504 + }, + { + "ce_ib": 4.622511386871338, + "ce_orig": 0.4883497357368469, + "epoch": 0.4325257027823711, + "kl_loss": 0.2173852175474167, + "loss_ib": 0.006796363275498152, + "step": 1504 + }, + { + "epoch": 0.43281328636134875, + "grad_norm": 0.1054597720503807, + "learning_rate": 9.665487501338097e-06, + "loss": 0.852, + "step": 1505 + }, + { + "ce_ib": 7.86767578125, + "ce_orig": 1.2227966785430908, + "epoch": 0.43281328636134875, + "kl_loss": 0.1758153885602951, + "loss_ib": 0.009625829756259918, + "step": 1505 + }, + { + "ce_ib": 7.569886684417725, + "ce_orig": 1.2155966758728027, + "epoch": 0.43281328636134875, + "kl_loss": 0.22750471532344818, + "loss_ib": 0.009844934567809105, + "step": 1505 + }, + { + "ce_ib": 5.736252784729004, + "ce_orig": 0.9024336338043213, + "epoch": 0.43281328636134875, + "kl_loss": 0.21125327050685883, + "loss_ib": 0.007848785258829594, + "step": 1505 + }, + { + "ce_ib": 4.251120567321777, + "ce_orig": 0.6517446041107178, + "epoch": 0.43281328636134875, + "kl_loss": 0.2938510775566101, + "loss_ib": 0.007189631462097168, + "step": 1505 + }, + { + "ce_ib": 4.12835693359375, + "ce_orig": 0.6448106169700623, + "epoch": 0.4331008699403264, + "kl_loss": 0.2133052945137024, + "loss_ib": 0.006261409260332584, + "step": 1506 + }, + { + "ce_ib": 8.605734825134277, + "ce_orig": 1.6096972227096558, + "epoch": 0.4331008699403264, + "kl_loss": 0.2922551929950714, + "loss_ib": 0.011528287082910538, + "step": 1506 + }, + { + "ce_ib": 3.7822954654693604, + "ce_orig": 0.5684179663658142, + "epoch": 0.4331008699403264, + "kl_loss": 0.21701683104038239, + "loss_ib": 0.005952463485300541, + "step": 1506 + }, + { + "ce_ib": 4.113936424255371, + "ce_orig": 0.7411576509475708, + "epoch": 0.4331008699403264, + "kl_loss": 0.41098541021347046, + "loss_ib": 0.008223790675401688, + "step": 1506 + }, + { + "ce_ib": 4.773281574249268, + "ce_orig": 0.8815646171569824, + "epoch": 0.43338845351930405, + "kl_loss": 0.19395877420902252, + "loss_ib": 0.0067128692753612995, + "step": 1507 + }, + { + "ce_ib": 5.464571475982666, + "ce_orig": 0.551234245300293, + "epoch": 0.43338845351930405, + "kl_loss": 0.3368734121322632, + "loss_ib": 0.008833305910229683, + "step": 1507 + }, + { + "ce_ib": 5.097268581390381, + "ce_orig": 0.6974053978919983, + "epoch": 0.43338845351930405, + "kl_loss": 0.21404951810836792, + "loss_ib": 0.007237763609737158, + "step": 1507 + }, + { + "ce_ib": 4.549488067626953, + "ce_orig": 0.6958446502685547, + "epoch": 0.43338845351930405, + "kl_loss": 0.41875457763671875, + "loss_ib": 0.008737033233046532, + "step": 1507 + }, + { + "ce_ib": 7.839320659637451, + "ce_orig": 0.8189214468002319, + "epoch": 0.4336760370982817, + "kl_loss": 0.22917625308036804, + "loss_ib": 0.01013108342885971, + "step": 1508 + }, + { + "ce_ib": 7.603825569152832, + "ce_orig": 1.1187269687652588, + "epoch": 0.4336760370982817, + "kl_loss": 0.24749788641929626, + "loss_ib": 0.010078804567456245, + "step": 1508 + }, + { + "ce_ib": 4.917834758758545, + "ce_orig": 0.6455671787261963, + "epoch": 0.4336760370982817, + "kl_loss": 0.19830983877182007, + "loss_ib": 0.006900932639837265, + "step": 1508 + }, + { + "ce_ib": 6.577511787414551, + "ce_orig": 0.6373150944709778, + "epoch": 0.4336760370982817, + "kl_loss": 0.2596627175807953, + "loss_ib": 0.009174139238893986, + "step": 1508 + }, + { + "ce_ib": 8.961044311523438, + "ce_orig": 1.27748441696167, + "epoch": 0.43396362067725935, + "kl_loss": 0.18702471256256104, + "loss_ib": 0.010831291787326336, + "step": 1509 + }, + { + "ce_ib": 5.431795120239258, + "ce_orig": 0.9966217875480652, + "epoch": 0.43396362067725935, + "kl_loss": 0.2645154595375061, + "loss_ib": 0.00807694997638464, + "step": 1509 + }, + { + "ce_ib": 5.892299652099609, + "ce_orig": 0.9126742482185364, + "epoch": 0.43396362067725935, + "kl_loss": 0.23256003856658936, + "loss_ib": 0.008217900060117245, + "step": 1509 + }, + { + "ce_ib": 5.339905738830566, + "ce_orig": 1.0408644676208496, + "epoch": 0.43396362067725935, + "kl_loss": 0.32789915800094604, + "loss_ib": 0.008618896827101707, + "step": 1509 + }, + { + "epoch": 0.434251204256237, + "grad_norm": 0.11715665459632874, + "learning_rate": 9.662690897861018e-06, + "loss": 0.8719, + "step": 1510 + }, + { + "ce_ib": 8.179683685302734, + "ce_orig": 0.557664692401886, + "epoch": 0.434251204256237, + "kl_loss": 0.23708882927894592, + "loss_ib": 0.010550571605563164, + "step": 1510 + }, + { + "ce_ib": 4.347968101501465, + "ce_orig": 0.7091971039772034, + "epoch": 0.434251204256237, + "kl_loss": 0.16048789024353027, + "loss_ib": 0.005952846724539995, + "step": 1510 + }, + { + "ce_ib": 9.547073364257812, + "ce_orig": 1.5443179607391357, + "epoch": 0.434251204256237, + "kl_loss": 0.27068954706192017, + "loss_ib": 0.012253968045115471, + "step": 1510 + }, + { + "ce_ib": 5.236355304718018, + "ce_orig": 0.6377555131912231, + "epoch": 0.434251204256237, + "kl_loss": 0.3052249848842621, + "loss_ib": 0.008288605138659477, + "step": 1510 + }, + { + "ce_ib": 4.119098663330078, + "ce_orig": 0.2889801859855652, + "epoch": 0.4345387878352146, + "kl_loss": 0.48718804121017456, + "loss_ib": 0.008990978822112083, + "step": 1511 + }, + { + "ce_ib": 6.019423484802246, + "ce_orig": 0.8404080271720886, + "epoch": 0.4345387878352146, + "kl_loss": 0.28162747621536255, + "loss_ib": 0.00883569847792387, + "step": 1511 + }, + { + "ce_ib": 7.011948585510254, + "ce_orig": 0.7254608869552612, + "epoch": 0.4345387878352146, + "kl_loss": 0.18887865543365479, + "loss_ib": 0.008900735527276993, + "step": 1511 + }, + { + "ce_ib": 9.401799201965332, + "ce_orig": 1.7815628051757812, + "epoch": 0.4345387878352146, + "kl_loss": 0.2903178930282593, + "loss_ib": 0.01230497844517231, + "step": 1511 + }, + { + "ce_ib": 10.27991008758545, + "ce_orig": 1.76600182056427, + "epoch": 0.4348263714141922, + "kl_loss": 0.482980340719223, + "loss_ib": 0.01510971412062645, + "step": 1512 + }, + { + "ce_ib": 5.170495986938477, + "ce_orig": 0.4687999486923218, + "epoch": 0.4348263714141922, + "kl_loss": 0.21127310395240784, + "loss_ib": 0.007283227052539587, + "step": 1512 + }, + { + "ce_ib": 7.678666591644287, + "ce_orig": 0.8942811489105225, + "epoch": 0.4348263714141922, + "kl_loss": 0.28342896699905396, + "loss_ib": 0.01051295641809702, + "step": 1512 + }, + { + "ce_ib": 9.15030574798584, + "ce_orig": 1.1176377534866333, + "epoch": 0.4348263714141922, + "kl_loss": 0.18655428290367126, + "loss_ib": 0.011015849187970161, + "step": 1512 + }, + { + "ce_ib": 4.875361442565918, + "ce_orig": 0.7338507175445557, + "epoch": 0.4351139549931699, + "kl_loss": 0.19533416628837585, + "loss_ib": 0.006828702986240387, + "step": 1513 + }, + { + "ce_ib": 3.502323627471924, + "ce_orig": 0.6863707304000854, + "epoch": 0.4351139549931699, + "kl_loss": 0.1371149718761444, + "loss_ib": 0.004873473197221756, + "step": 1513 + }, + { + "ce_ib": 6.512062072753906, + "ce_orig": 1.264266848564148, + "epoch": 0.4351139549931699, + "kl_loss": 0.2282438576221466, + "loss_ib": 0.00879450049251318, + "step": 1513 + }, + { + "ce_ib": 13.425664901733398, + "ce_orig": 2.067333698272705, + "epoch": 0.4351139549931699, + "kl_loss": 0.741570234298706, + "loss_ib": 0.020841367542743683, + "step": 1513 + }, + { + "ce_ib": 8.506553649902344, + "ce_orig": 1.420841097831726, + "epoch": 0.43540153857214753, + "kl_loss": 0.230472594499588, + "loss_ib": 0.010811279527842999, + "step": 1514 + }, + { + "ce_ib": 6.533507347106934, + "ce_orig": 0.893858790397644, + "epoch": 0.43540153857214753, + "kl_loss": 0.29412978887557983, + "loss_ib": 0.009474805556237698, + "step": 1514 + }, + { + "ce_ib": 8.280712127685547, + "ce_orig": 1.3050899505615234, + "epoch": 0.43540153857214753, + "kl_loss": 0.30212193727493286, + "loss_ib": 0.01130193192511797, + "step": 1514 + }, + { + "ce_ib": 5.96417236328125, + "ce_orig": 0.8574302792549133, + "epoch": 0.43540153857214753, + "kl_loss": 0.28048038482666016, + "loss_ib": 0.008768975734710693, + "step": 1514 + }, + { + "epoch": 0.43568912215112515, + "grad_norm": 0.13548529148101807, + "learning_rate": 9.659883060879333e-06, + "loss": 0.9358, + "step": 1515 + }, + { + "ce_ib": 3.8507282733917236, + "ce_orig": 0.8658118844032288, + "epoch": 0.43568912215112515, + "kl_loss": 0.17177735269069672, + "loss_ib": 0.005568502005189657, + "step": 1515 + }, + { + "ce_ib": 3.168516159057617, + "ce_orig": 0.40062665939331055, + "epoch": 0.43568912215112515, + "kl_loss": 0.4463632106781006, + "loss_ib": 0.007632147986441851, + "step": 1515 + }, + { + "ce_ib": 7.129504680633545, + "ce_orig": 0.9099282026290894, + "epoch": 0.43568912215112515, + "kl_loss": 0.1943160444498062, + "loss_ib": 0.009072665125131607, + "step": 1515 + }, + { + "ce_ib": 3.711723804473877, + "ce_orig": 0.6062166094779968, + "epoch": 0.43568912215112515, + "kl_loss": 0.17591118812561035, + "loss_ib": 0.005470836069434881, + "step": 1515 + }, + { + "ce_ib": 4.956431865692139, + "ce_orig": 0.8364520072937012, + "epoch": 0.43597670573010283, + "kl_loss": 0.2664000689983368, + "loss_ib": 0.007620432414114475, + "step": 1516 + }, + { + "ce_ib": 10.05667495727539, + "ce_orig": 1.1949691772460938, + "epoch": 0.43597670573010283, + "kl_loss": 0.24516168236732483, + "loss_ib": 0.012508291751146317, + "step": 1516 + }, + { + "ce_ib": 6.448936939239502, + "ce_orig": 0.7740538120269775, + "epoch": 0.43597670573010283, + "kl_loss": 0.336667537689209, + "loss_ib": 0.009815611876547337, + "step": 1516 + }, + { + "ce_ib": 3.7940187454223633, + "ce_orig": 1.0476545095443726, + "epoch": 0.43597670573010283, + "kl_loss": 0.499667763710022, + "loss_ib": 0.00879069697111845, + "step": 1516 + }, + { + "ce_ib": 4.319472789764404, + "ce_orig": 0.6066058874130249, + "epoch": 0.43626428930908046, + "kl_loss": 0.23717695474624634, + "loss_ib": 0.006691242102533579, + "step": 1517 + }, + { + "ce_ib": 6.696260452270508, + "ce_orig": 0.9695017337799072, + "epoch": 0.43626428930908046, + "kl_loss": 0.25179165601730347, + "loss_ib": 0.009214176796376705, + "step": 1517 + }, + { + "ce_ib": 8.007752418518066, + "ce_orig": 1.0904877185821533, + "epoch": 0.43626428930908046, + "kl_loss": 0.3288235068321228, + "loss_ib": 0.011295987293124199, + "step": 1517 + }, + { + "ce_ib": 6.769944190979004, + "ce_orig": 1.0057461261749268, + "epoch": 0.43626428930908046, + "kl_loss": 0.2518278956413269, + "loss_ib": 0.009288223460316658, + "step": 1517 + }, + { + "ce_ib": 4.416586399078369, + "ce_orig": 0.5639540553092957, + "epoch": 0.4365518728880581, + "kl_loss": 0.31698429584503174, + "loss_ib": 0.0075864288955926895, + "step": 1518 + }, + { + "ce_ib": 5.002842426300049, + "ce_orig": 0.6733067035675049, + "epoch": 0.4365518728880581, + "kl_loss": 0.26536762714385986, + "loss_ib": 0.0076565188355743885, + "step": 1518 + }, + { + "ce_ib": 10.548088073730469, + "ce_orig": 1.861885666847229, + "epoch": 0.4365518728880581, + "kl_loss": 0.3233809471130371, + "loss_ib": 0.013781897723674774, + "step": 1518 + }, + { + "ce_ib": 3.5734047889709473, + "ce_orig": 0.466007798910141, + "epoch": 0.4365518728880581, + "kl_loss": 0.5387842655181885, + "loss_ib": 0.008961247280240059, + "step": 1518 + }, + { + "ce_ib": 11.615768432617188, + "ce_orig": 2.115899085998535, + "epoch": 0.43683945646703576, + "kl_loss": 0.3307480812072754, + "loss_ib": 0.014923249371349812, + "step": 1519 + }, + { + "ce_ib": 5.898049354553223, + "ce_orig": 0.5681475400924683, + "epoch": 0.43683945646703576, + "kl_loss": 0.20464731752872467, + "loss_ib": 0.007944522425532341, + "step": 1519 + }, + { + "ce_ib": 4.767960548400879, + "ce_orig": 0.3993023931980133, + "epoch": 0.43683945646703576, + "kl_loss": 0.21171991527080536, + "loss_ib": 0.00688515929505229, + "step": 1519 + }, + { + "ce_ib": 5.281295299530029, + "ce_orig": 0.7131248712539673, + "epoch": 0.43683945646703576, + "kl_loss": 0.35403138399124146, + "loss_ib": 0.008821608498692513, + "step": 1519 + }, + { + "epoch": 0.4371270400460134, + "grad_norm": 0.09799163043498993, + "learning_rate": 9.65706399715777e-06, + "loss": 0.8888, + "step": 1520 + }, + { + "ce_ib": 7.551825046539307, + "ce_orig": 1.2064350843429565, + "epoch": 0.4371270400460134, + "kl_loss": 0.232176274061203, + "loss_ib": 0.009873587638139725, + "step": 1520 + }, + { + "ce_ib": 9.467873573303223, + "ce_orig": 1.4917707443237305, + "epoch": 0.4371270400460134, + "kl_loss": 0.23060013353824615, + "loss_ib": 0.011773874051868916, + "step": 1520 + }, + { + "ce_ib": 4.483201026916504, + "ce_orig": 0.5693926215171814, + "epoch": 0.4371270400460134, + "kl_loss": 0.2563168406486511, + "loss_ib": 0.00704636937007308, + "step": 1520 + }, + { + "ce_ib": 6.398351669311523, + "ce_orig": 0.8779327869415283, + "epoch": 0.4371270400460134, + "kl_loss": 0.23980334401130676, + "loss_ib": 0.008796385489404202, + "step": 1520 + }, + { + "ce_ib": 5.1323018074035645, + "ce_orig": 0.7175425291061401, + "epoch": 0.437414623624991, + "kl_loss": 0.18345916271209717, + "loss_ib": 0.006966893561184406, + "step": 1521 + }, + { + "ce_ib": 7.529026985168457, + "ce_orig": 1.5794041156768799, + "epoch": 0.437414623624991, + "kl_loss": 0.17436310648918152, + "loss_ib": 0.009272657334804535, + "step": 1521 + }, + { + "ce_ib": 10.308955192565918, + "ce_orig": 0.9079710841178894, + "epoch": 0.437414623624991, + "kl_loss": 0.23323319852352142, + "loss_ib": 0.01264128740876913, + "step": 1521 + }, + { + "ce_ib": 7.019486427307129, + "ce_orig": 1.42014741897583, + "epoch": 0.437414623624991, + "kl_loss": 0.19986101984977722, + "loss_ib": 0.009018097072839737, + "step": 1521 + }, + { + "ce_ib": 8.882169723510742, + "ce_orig": 1.5748227834701538, + "epoch": 0.43770220720396863, + "kl_loss": 0.2393156886100769, + "loss_ib": 0.01127532683312893, + "step": 1522 + }, + { + "ce_ib": 6.86137580871582, + "ce_orig": 0.8214037418365479, + "epoch": 0.43770220720396863, + "kl_loss": 0.21534022688865662, + "loss_ib": 0.009014777839183807, + "step": 1522 + }, + { + "ce_ib": 4.2742085456848145, + "ce_orig": 0.781179666519165, + "epoch": 0.43770220720396863, + "kl_loss": 0.2706628441810608, + "loss_ib": 0.006980836857110262, + "step": 1522 + }, + { + "ce_ib": 4.317927837371826, + "ce_orig": 0.4462431073188782, + "epoch": 0.43770220720396863, + "kl_loss": 0.26821720600128174, + "loss_ib": 0.007000099867582321, + "step": 1522 + }, + { + "ce_ib": 9.145509719848633, + "ce_orig": 1.3221925497055054, + "epoch": 0.4379897907829463, + "kl_loss": 0.1444806456565857, + "loss_ib": 0.01059031579643488, + "step": 1523 + }, + { + "ce_ib": 6.61660099029541, + "ce_orig": 1.1076503992080688, + "epoch": 0.4379897907829463, + "kl_loss": 0.2389574646949768, + "loss_ib": 0.009006176143884659, + "step": 1523 + }, + { + "ce_ib": 9.713702201843262, + "ce_orig": 1.8915526866912842, + "epoch": 0.4379897907829463, + "kl_loss": 0.7463563680648804, + "loss_ib": 0.017177265137434006, + "step": 1523 + }, + { + "ce_ib": 5.233415603637695, + "ce_orig": 0.8730946779251099, + "epoch": 0.4379897907829463, + "kl_loss": 0.24787402153015137, + "loss_ib": 0.007712156046181917, + "step": 1523 + }, + { + "ce_ib": 7.0577850341796875, + "ce_orig": 0.5874288082122803, + "epoch": 0.43827737436192393, + "kl_loss": 0.3205152153968811, + "loss_ib": 0.010262937285006046, + "step": 1524 + }, + { + "ce_ib": 3.819340467453003, + "ce_orig": 0.6037487387657166, + "epoch": 0.43827737436192393, + "kl_loss": 0.16586077213287354, + "loss_ib": 0.005477948114275932, + "step": 1524 + }, + { + "ce_ib": 7.668882846832275, + "ce_orig": 0.8537070155143738, + "epoch": 0.43827737436192393, + "kl_loss": 0.2507067024707794, + "loss_ib": 0.010175949893891811, + "step": 1524 + }, + { + "ce_ib": 5.600090980529785, + "ce_orig": 1.1724077463150024, + "epoch": 0.43827737436192393, + "kl_loss": 0.20900413393974304, + "loss_ib": 0.007690132595598698, + "step": 1524 + }, + { + "epoch": 0.43856495794090156, + "grad_norm": 0.09972728043794632, + "learning_rate": 9.654233713488112e-06, + "loss": 0.8899, + "step": 1525 + }, + { + "ce_ib": 6.1174798011779785, + "ce_orig": 1.08661949634552, + "epoch": 0.43856495794090156, + "kl_loss": 0.22504150867462158, + "loss_ib": 0.008367895148694515, + "step": 1525 + }, + { + "ce_ib": 3.9706997871398926, + "ce_orig": 0.5374826192855835, + "epoch": 0.43856495794090156, + "kl_loss": 0.18511299788951874, + "loss_ib": 0.005821830127388239, + "step": 1525 + }, + { + "ce_ib": 6.025529384613037, + "ce_orig": 0.9799771308898926, + "epoch": 0.43856495794090156, + "kl_loss": 0.25164347887039185, + "loss_ib": 0.008541963994503021, + "step": 1525 + }, + { + "ce_ib": 7.1165313720703125, + "ce_orig": 0.6115043759346008, + "epoch": 0.43856495794090156, + "kl_loss": 0.25530850887298584, + "loss_ib": 0.00966961681842804, + "step": 1525 + }, + { + "ce_ib": 5.062288284301758, + "ce_orig": 0.6672273874282837, + "epoch": 0.43885254151987924, + "kl_loss": 0.19899994134902954, + "loss_ib": 0.007052287925034761, + "step": 1526 + }, + { + "ce_ib": 7.357656478881836, + "ce_orig": 1.3270339965820312, + "epoch": 0.43885254151987924, + "kl_loss": 0.25842565298080444, + "loss_ib": 0.00994191225618124, + "step": 1526 + }, + { + "ce_ib": 7.756661891937256, + "ce_orig": 1.2379779815673828, + "epoch": 0.43885254151987924, + "kl_loss": 0.22414611279964447, + "loss_ib": 0.009998123161494732, + "step": 1526 + }, + { + "ce_ib": 3.338984251022339, + "ce_orig": 0.5923484563827515, + "epoch": 0.43885254151987924, + "kl_loss": 0.2646840810775757, + "loss_ib": 0.005985824856907129, + "step": 1526 + }, + { + "ce_ib": 3.647657632827759, + "ce_orig": 0.34279149770736694, + "epoch": 0.43914012509885686, + "kl_loss": 0.194602370262146, + "loss_ib": 0.005593681242316961, + "step": 1527 + }, + { + "ce_ib": 10.869415283203125, + "ce_orig": 1.901925802230835, + "epoch": 0.43914012509885686, + "kl_loss": 0.326307475566864, + "loss_ib": 0.014132489450275898, + "step": 1527 + }, + { + "ce_ib": 5.827927589416504, + "ce_orig": 0.7578860521316528, + "epoch": 0.43914012509885686, + "kl_loss": 0.25722992420196533, + "loss_ib": 0.008400226943194866, + "step": 1527 + }, + { + "ce_ib": 6.778678894042969, + "ce_orig": 1.1516461372375488, + "epoch": 0.43914012509885686, + "kl_loss": 0.28379058837890625, + "loss_ib": 0.00961658451706171, + "step": 1527 + }, + { + "ce_ib": 6.970860958099365, + "ce_orig": 0.8692914843559265, + "epoch": 0.4394277086778345, + "kl_loss": 0.29207298159599304, + "loss_ib": 0.009891591034829617, + "step": 1528 + }, + { + "ce_ib": 4.707976341247559, + "ce_orig": 0.4135763645172119, + "epoch": 0.4394277086778345, + "kl_loss": 0.22886447608470917, + "loss_ib": 0.006996620912104845, + "step": 1528 + }, + { + "ce_ib": 7.193721294403076, + "ce_orig": 0.8729004859924316, + "epoch": 0.4394277086778345, + "kl_loss": 0.20061442255973816, + "loss_ib": 0.009199866093695164, + "step": 1528 + }, + { + "ce_ib": 7.849767208099365, + "ce_orig": 1.0315536260604858, + "epoch": 0.4394277086778345, + "kl_loss": 0.27790358662605286, + "loss_ib": 0.010628802701830864, + "step": 1528 + }, + { + "ce_ib": 7.09470796585083, + "ce_orig": 1.4208656549453735, + "epoch": 0.43971529225681216, + "kl_loss": 0.3060705065727234, + "loss_ib": 0.010155413299798965, + "step": 1529 + }, + { + "ce_ib": 4.538375377655029, + "ce_orig": 0.6493577361106873, + "epoch": 0.43971529225681216, + "kl_loss": 0.3036617040634155, + "loss_ib": 0.007574991788715124, + "step": 1529 + }, + { + "ce_ib": 6.309713840484619, + "ce_orig": 0.5263689160346985, + "epoch": 0.43971529225681216, + "kl_loss": 0.3667697310447693, + "loss_ib": 0.009977410547435284, + "step": 1529 + }, + { + "ce_ib": 4.808983325958252, + "ce_orig": 0.5394362211227417, + "epoch": 0.43971529225681216, + "kl_loss": 0.20775997638702393, + "loss_ib": 0.006886583287268877, + "step": 1529 + }, + { + "epoch": 0.4400028758357898, + "grad_norm": 0.09744717180728912, + "learning_rate": 9.651392216689167e-06, + "loss": 0.8832, + "step": 1530 + }, + { + "ce_ib": 5.440213680267334, + "ce_orig": 0.5668556690216064, + "epoch": 0.4400028758357898, + "kl_loss": 0.2607230246067047, + "loss_ib": 0.008047443814575672, + "step": 1530 + }, + { + "ce_ib": 7.057228088378906, + "ce_orig": 0.7402814626693726, + "epoch": 0.4400028758357898, + "kl_loss": 0.1861785650253296, + "loss_ib": 0.008919013664126396, + "step": 1530 + }, + { + "ce_ib": 3.8372819423675537, + "ce_orig": 0.6064683794975281, + "epoch": 0.4400028758357898, + "kl_loss": 0.22002679109573364, + "loss_ib": 0.006037550047039986, + "step": 1530 + }, + { + "ce_ib": 5.222181797027588, + "ce_orig": 0.6531853079795837, + "epoch": 0.4400028758357898, + "kl_loss": 0.2058596909046173, + "loss_ib": 0.007280778605490923, + "step": 1530 + }, + { + "ce_ib": 6.428709030151367, + "ce_orig": 1.0921529531478882, + "epoch": 0.4402904594147674, + "kl_loss": 0.24581462144851685, + "loss_ib": 0.008886855095624924, + "step": 1531 + }, + { + "ce_ib": 6.755316257476807, + "ce_orig": 1.2458606958389282, + "epoch": 0.4402904594147674, + "kl_loss": 0.5204867720603943, + "loss_ib": 0.011960184201598167, + "step": 1531 + }, + { + "ce_ib": 2.8809092044830322, + "ce_orig": 0.4281614422798157, + "epoch": 0.4402904594147674, + "kl_loss": 0.2708930969238281, + "loss_ib": 0.005589840468019247, + "step": 1531 + }, + { + "ce_ib": 3.588864803314209, + "ce_orig": 0.5742392539978027, + "epoch": 0.4402904594147674, + "kl_loss": 0.25775015354156494, + "loss_ib": 0.006166366394609213, + "step": 1531 + }, + { + "ce_ib": 6.447246551513672, + "ce_orig": 1.0808292627334595, + "epoch": 0.44057804299374503, + "kl_loss": 0.1584368497133255, + "loss_ib": 0.008031615056097507, + "step": 1532 + }, + { + "ce_ib": 6.346353054046631, + "ce_orig": 1.025672197341919, + "epoch": 0.44057804299374503, + "kl_loss": 0.2612716257572174, + "loss_ib": 0.008959068916738033, + "step": 1532 + }, + { + "ce_ib": 6.36782693862915, + "ce_orig": 1.040495753288269, + "epoch": 0.44057804299374503, + "kl_loss": 0.23167358338832855, + "loss_ib": 0.008684562519192696, + "step": 1532 + }, + { + "ce_ib": 4.488460063934326, + "ce_orig": 0.4259486794471741, + "epoch": 0.44057804299374503, + "kl_loss": 0.2865976095199585, + "loss_ib": 0.0073544359765946865, + "step": 1532 + }, + { + "ce_ib": 4.456282138824463, + "ce_orig": 0.7206268906593323, + "epoch": 0.4408656265727227, + "kl_loss": 0.2152857780456543, + "loss_ib": 0.0066091399639844894, + "step": 1533 + }, + { + "ce_ib": 4.3911590576171875, + "ce_orig": 0.6018010377883911, + "epoch": 0.4408656265727227, + "kl_loss": 0.46944814920425415, + "loss_ib": 0.00908564031124115, + "step": 1533 + }, + { + "ce_ib": 4.210399150848389, + "ce_orig": 0.4141979217529297, + "epoch": 0.4408656265727227, + "kl_loss": 0.16251495480537415, + "loss_ib": 0.005835548508912325, + "step": 1533 + }, + { + "ce_ib": 7.888265132904053, + "ce_orig": 0.6826446056365967, + "epoch": 0.4408656265727227, + "kl_loss": 0.23286780714988708, + "loss_ib": 0.010216942988336086, + "step": 1533 + }, + { + "ce_ib": 7.644165992736816, + "ce_orig": 1.4100502729415894, + "epoch": 0.44115321015170034, + "kl_loss": 0.23425854742527008, + "loss_ib": 0.009986751712858677, + "step": 1534 + }, + { + "ce_ib": 3.7016208171844482, + "ce_orig": 0.6204091310501099, + "epoch": 0.44115321015170034, + "kl_loss": 0.18072494864463806, + "loss_ib": 0.005508870352059603, + "step": 1534 + }, + { + "ce_ib": 5.5186052322387695, + "ce_orig": 0.5334498286247253, + "epoch": 0.44115321015170034, + "kl_loss": 0.33088797330856323, + "loss_ib": 0.008827484212815762, + "step": 1534 + }, + { + "ce_ib": 4.205150604248047, + "ce_orig": 0.9227320551872253, + "epoch": 0.44115321015170034, + "kl_loss": 0.147797092795372, + "loss_ib": 0.005683121737092733, + "step": 1534 + }, + { + "epoch": 0.44144079373067796, + "grad_norm": 0.12952423095703125, + "learning_rate": 9.64853951360676e-06, + "loss": 0.8526, + "step": 1535 + }, + { + "ce_ib": 6.74899959564209, + "ce_orig": 1.1357985734939575, + "epoch": 0.44144079373067796, + "kl_loss": 0.2118733525276184, + "loss_ib": 0.008867733180522919, + "step": 1535 + }, + { + "ce_ib": 3.9933173656463623, + "ce_orig": 0.38218119740486145, + "epoch": 0.44144079373067796, + "kl_loss": 0.27774354815483093, + "loss_ib": 0.006770752370357513, + "step": 1535 + }, + { + "ce_ib": 5.220664024353027, + "ce_orig": 0.4890460968017578, + "epoch": 0.44144079373067796, + "kl_loss": 0.23115640878677368, + "loss_ib": 0.007532228250056505, + "step": 1535 + }, + { + "ce_ib": 8.40595531463623, + "ce_orig": 1.3035410642623901, + "epoch": 0.44144079373067796, + "kl_loss": 0.2943679690361023, + "loss_ib": 0.011349635198712349, + "step": 1535 + }, + { + "ce_ib": 6.669676303863525, + "ce_orig": 0.8286333084106445, + "epoch": 0.44172837730965564, + "kl_loss": 0.2603955864906311, + "loss_ib": 0.009273631498217583, + "step": 1536 + }, + { + "ce_ib": 3.091402292251587, + "ce_orig": 0.6263300776481628, + "epoch": 0.44172837730965564, + "kl_loss": 0.21528851985931396, + "loss_ib": 0.005244287196546793, + "step": 1536 + }, + { + "ce_ib": 5.090882778167725, + "ce_orig": 0.784512460231781, + "epoch": 0.44172837730965564, + "kl_loss": 0.16377821564674377, + "loss_ib": 0.006728664506226778, + "step": 1536 + }, + { + "ce_ib": 9.245505332946777, + "ce_orig": 1.5032682418823242, + "epoch": 0.44172837730965564, + "kl_loss": 0.23776906728744507, + "loss_ib": 0.011623196303844452, + "step": 1536 + }, + { + "ce_ib": 7.090968132019043, + "ce_orig": 1.1932905912399292, + "epoch": 0.44201596088863326, + "kl_loss": 0.2389073371887207, + "loss_ib": 0.009480041451752186, + "step": 1537 + }, + { + "ce_ib": 7.811031818389893, + "ce_orig": 1.1800780296325684, + "epoch": 0.44201596088863326, + "kl_loss": 0.2227729856967926, + "loss_ib": 0.010038761422038078, + "step": 1537 + }, + { + "ce_ib": 6.59114933013916, + "ce_orig": 1.033427119255066, + "epoch": 0.44201596088863326, + "kl_loss": 0.2936612069606781, + "loss_ib": 0.009527761489152908, + "step": 1537 + }, + { + "ce_ib": 4.836669921875, + "ce_orig": 0.5264076590538025, + "epoch": 0.44201596088863326, + "kl_loss": 0.17789442837238312, + "loss_ib": 0.006615614052861929, + "step": 1537 + }, + { + "ce_ib": 10.921903610229492, + "ce_orig": 1.932668924331665, + "epoch": 0.4423035444676109, + "kl_loss": 0.23033437132835388, + "loss_ib": 0.013225247152149677, + "step": 1538 + }, + { + "ce_ib": 5.171535968780518, + "ce_orig": 0.7136121988296509, + "epoch": 0.4423035444676109, + "kl_loss": 0.1687464714050293, + "loss_ib": 0.006859000772237778, + "step": 1538 + }, + { + "ce_ib": 5.190009593963623, + "ce_orig": 0.5568004846572876, + "epoch": 0.4423035444676109, + "kl_loss": 0.26972246170043945, + "loss_ib": 0.007887233980000019, + "step": 1538 + }, + { + "ce_ib": 8.552884101867676, + "ce_orig": 0.9528330564498901, + "epoch": 0.4423035444676109, + "kl_loss": 0.3009048402309418, + "loss_ib": 0.011561932042241096, + "step": 1538 + }, + { + "ce_ib": 4.198854446411133, + "ce_orig": 0.8415638208389282, + "epoch": 0.44259112804658857, + "kl_loss": 0.22010204195976257, + "loss_ib": 0.006399874575436115, + "step": 1539 + }, + { + "ce_ib": 9.696503639221191, + "ce_orig": 1.5634467601776123, + "epoch": 0.44259112804658857, + "kl_loss": 0.19891905784606934, + "loss_ib": 0.011685694567859173, + "step": 1539 + }, + { + "ce_ib": 4.511733531951904, + "ce_orig": 0.8286433219909668, + "epoch": 0.44259112804658857, + "kl_loss": 0.16388346254825592, + "loss_ib": 0.006150567904114723, + "step": 1539 + }, + { + "ce_ib": 6.678924083709717, + "ce_orig": 0.9080381393432617, + "epoch": 0.44259112804658857, + "kl_loss": 0.16212314367294312, + "loss_ib": 0.008300155401229858, + "step": 1539 + }, + { + "epoch": 0.4428787116255662, + "grad_norm": 0.12597346305847168, + "learning_rate": 9.645675611113715e-06, + "loss": 0.8547, + "step": 1540 + }, + { + "ce_ib": 5.553781986236572, + "ce_orig": 0.9590039849281311, + "epoch": 0.4428787116255662, + "kl_loss": 0.19984352588653564, + "loss_ib": 0.007552217226475477, + "step": 1540 + }, + { + "ce_ib": 7.916083335876465, + "ce_orig": 1.2729321718215942, + "epoch": 0.4428787116255662, + "kl_loss": 0.16869348287582397, + "loss_ib": 0.009603017941117287, + "step": 1540 + }, + { + "ce_ib": 6.0101399421691895, + "ce_orig": 0.8366454243659973, + "epoch": 0.4428787116255662, + "kl_loss": 0.2121955305337906, + "loss_ib": 0.008132095448672771, + "step": 1540 + }, + { + "ce_ib": 5.466952323913574, + "ce_orig": 0.7045579552650452, + "epoch": 0.4428787116255662, + "kl_loss": 0.33795952796936035, + "loss_ib": 0.008846547454595566, + "step": 1540 + }, + { + "ce_ib": 7.9375739097595215, + "ce_orig": 1.2572914361953735, + "epoch": 0.4431662952045438, + "kl_loss": 0.25002428889274597, + "loss_ib": 0.010437816381454468, + "step": 1541 + }, + { + "ce_ib": 4.4934234619140625, + "ce_orig": 0.39058825373649597, + "epoch": 0.4431662952045438, + "kl_loss": 0.25045621395111084, + "loss_ib": 0.006997985765337944, + "step": 1541 + }, + { + "ce_ib": 6.903958320617676, + "ce_orig": 0.5183854103088379, + "epoch": 0.4431662952045438, + "kl_loss": 0.31575220823287964, + "loss_ib": 0.010061481036245823, + "step": 1541 + }, + { + "ce_ib": 3.8567583560943604, + "ce_orig": 0.8497815728187561, + "epoch": 0.4431662952045438, + "kl_loss": 0.16291774809360504, + "loss_ib": 0.005485935602337122, + "step": 1541 + }, + { + "ce_ib": 3.3252243995666504, + "ce_orig": 0.605065107345581, + "epoch": 0.44345387878352144, + "kl_loss": 0.2954948842525482, + "loss_ib": 0.00628017308190465, + "step": 1542 + }, + { + "ce_ib": 6.021589756011963, + "ce_orig": 1.0220741033554077, + "epoch": 0.44345387878352144, + "kl_loss": 0.250985711812973, + "loss_ib": 0.008531446568667889, + "step": 1542 + }, + { + "ce_ib": 5.84227180480957, + "ce_orig": 0.8054718971252441, + "epoch": 0.44345387878352144, + "kl_loss": 0.2861500382423401, + "loss_ib": 0.008703771978616714, + "step": 1542 + }, + { + "ce_ib": 8.967488288879395, + "ce_orig": 1.5930452346801758, + "epoch": 0.44345387878352144, + "kl_loss": 0.23124566674232483, + "loss_ib": 0.011279945261776447, + "step": 1542 + }, + { + "ce_ib": 6.608915328979492, + "ce_orig": 0.62285315990448, + "epoch": 0.4437414623624991, + "kl_loss": 0.29133331775665283, + "loss_ib": 0.009522248059511185, + "step": 1543 + }, + { + "ce_ib": 3.287733793258667, + "ce_orig": 0.7592197060585022, + "epoch": 0.4437414623624991, + "kl_loss": 0.17416146397590637, + "loss_ib": 0.005029348190873861, + "step": 1543 + }, + { + "ce_ib": 4.219071388244629, + "ce_orig": 0.7607125043869019, + "epoch": 0.4437414623624991, + "kl_loss": 0.20168092846870422, + "loss_ib": 0.0062358807772397995, + "step": 1543 + }, + { + "ce_ib": 2.911717414855957, + "ce_orig": 0.3957778811454773, + "epoch": 0.4437414623624991, + "kl_loss": 0.28831154108047485, + "loss_ib": 0.005794832482933998, + "step": 1543 + }, + { + "ce_ib": 5.865091800689697, + "ce_orig": 0.7722824811935425, + "epoch": 0.44402904594147674, + "kl_loss": 0.24601982533931732, + "loss_ib": 0.008325289934873581, + "step": 1544 + }, + { + "ce_ib": 5.063828945159912, + "ce_orig": 0.706646740436554, + "epoch": 0.44402904594147674, + "kl_loss": 0.22632688283920288, + "loss_ib": 0.0073270974680781364, + "step": 1544 + }, + { + "ce_ib": 7.057989597320557, + "ce_orig": 1.2739616632461548, + "epoch": 0.44402904594147674, + "kl_loss": 0.13553673028945923, + "loss_ib": 0.008413356728851795, + "step": 1544 + }, + { + "ce_ib": 6.612596035003662, + "ce_orig": 1.0941510200500488, + "epoch": 0.44402904594147674, + "kl_loss": 0.33927834033966064, + "loss_ib": 0.010005378164350986, + "step": 1544 + }, + { + "epoch": 0.44431662952045436, + "grad_norm": 0.11126173287630081, + "learning_rate": 9.642800516109842e-06, + "loss": 0.9154, + "step": 1545 + }, + { + "ce_ib": 6.68749475479126, + "ce_orig": 1.1780781745910645, + "epoch": 0.44431662952045436, + "kl_loss": 0.2303389608860016, + "loss_ib": 0.008990884758532047, + "step": 1545 + }, + { + "ce_ib": 8.691813468933105, + "ce_orig": 1.0570861101150513, + "epoch": 0.44431662952045436, + "kl_loss": 0.23569072782993317, + "loss_ib": 0.01104872114956379, + "step": 1545 + }, + { + "ce_ib": 7.5655598640441895, + "ce_orig": 0.8942456841468811, + "epoch": 0.44431662952045436, + "kl_loss": 0.19266939163208008, + "loss_ib": 0.009492253884673119, + "step": 1545 + }, + { + "ce_ib": 4.0522966384887695, + "ce_orig": 0.8112623691558838, + "epoch": 0.44431662952045436, + "kl_loss": 0.1970742642879486, + "loss_ib": 0.006023039110004902, + "step": 1545 + }, + { + "ce_ib": 9.083720207214355, + "ce_orig": 1.3299872875213623, + "epoch": 0.44460421309943204, + "kl_loss": 0.7850167751312256, + "loss_ib": 0.01693388819694519, + "step": 1546 + }, + { + "ce_ib": 6.961404323577881, + "ce_orig": 1.0265910625457764, + "epoch": 0.44460421309943204, + "kl_loss": 0.22739318013191223, + "loss_ib": 0.009235336445271969, + "step": 1546 + }, + { + "ce_ib": 6.790365219116211, + "ce_orig": 0.7861922383308411, + "epoch": 0.44460421309943204, + "kl_loss": 0.2910609245300293, + "loss_ib": 0.009700974449515343, + "step": 1546 + }, + { + "ce_ib": 9.423171043395996, + "ce_orig": 1.499626874923706, + "epoch": 0.44460421309943204, + "kl_loss": 0.2499282956123352, + "loss_ib": 0.011922454461455345, + "step": 1546 + }, + { + "ce_ib": 5.589235782623291, + "ce_orig": 0.6277043223381042, + "epoch": 0.44489179667840967, + "kl_loss": 0.275199294090271, + "loss_ib": 0.00834122858941555, + "step": 1547 + }, + { + "ce_ib": 3.979626178741455, + "ce_orig": 0.5686247944831848, + "epoch": 0.44489179667840967, + "kl_loss": 0.1741591989994049, + "loss_ib": 0.005721218418329954, + "step": 1547 + }, + { + "ce_ib": 8.743000030517578, + "ce_orig": 0.9185367226600647, + "epoch": 0.44489179667840967, + "kl_loss": 0.19029943645000458, + "loss_ib": 0.010645993985235691, + "step": 1547 + }, + { + "ce_ib": 3.9479920864105225, + "ce_orig": 0.4463140666484833, + "epoch": 0.44489179667840967, + "kl_loss": 0.1770147979259491, + "loss_ib": 0.005718139931559563, + "step": 1547 + }, + { + "ce_ib": 4.838682651519775, + "ce_orig": 0.7097772359848022, + "epoch": 0.4451793802573873, + "kl_loss": 0.13939312100410461, + "loss_ib": 0.006232613697648048, + "step": 1548 + }, + { + "ce_ib": 4.897730827331543, + "ce_orig": 0.8895747065544128, + "epoch": 0.4451793802573873, + "kl_loss": 0.22099938988685608, + "loss_ib": 0.007107724901288748, + "step": 1548 + }, + { + "ce_ib": 4.8738508224487305, + "ce_orig": 0.7790616750717163, + "epoch": 0.4451793802573873, + "kl_loss": 0.433513343334198, + "loss_ib": 0.00920898374170065, + "step": 1548 + }, + { + "ce_ib": 6.785175800323486, + "ce_orig": 1.1384638547897339, + "epoch": 0.4451793802573873, + "kl_loss": 0.2551881968975067, + "loss_ib": 0.009337058290839195, + "step": 1548 + }, + { + "ce_ib": 7.461643218994141, + "ce_orig": 1.2999147176742554, + "epoch": 0.44546696383636497, + "kl_loss": 0.18034838140010834, + "loss_ib": 0.009265126660466194, + "step": 1549 + }, + { + "ce_ib": 9.189070701599121, + "ce_orig": 1.6017738580703735, + "epoch": 0.44546696383636497, + "kl_loss": 0.2147252857685089, + "loss_ib": 0.01133632380515337, + "step": 1549 + }, + { + "ce_ib": 5.132811069488525, + "ce_orig": 0.12677517533302307, + "epoch": 0.44546696383636497, + "kl_loss": 0.632737398147583, + "loss_ib": 0.011460185050964355, + "step": 1549 + }, + { + "ce_ib": 8.620797157287598, + "ce_orig": 1.5666041374206543, + "epoch": 0.44546696383636497, + "kl_loss": 0.21782813966274261, + "loss_ib": 0.010799078270792961, + "step": 1549 + }, + { + "epoch": 0.4457545474153426, + "grad_norm": 0.13772690296173096, + "learning_rate": 9.639914235521906e-06, + "loss": 0.863, + "step": 1550 + }, + { + "ce_ib": 6.6242451667785645, + "ce_orig": 0.9287428259849548, + "epoch": 0.4457545474153426, + "kl_loss": 0.3059898614883423, + "loss_ib": 0.009684143587946892, + "step": 1550 + }, + { + "ce_ib": 4.536319732666016, + "ce_orig": 0.94605952501297, + "epoch": 0.4457545474153426, + "kl_loss": 0.16641706228256226, + "loss_ib": 0.006200490053743124, + "step": 1550 + }, + { + "ce_ib": 7.407623291015625, + "ce_orig": 1.001709222793579, + "epoch": 0.4457545474153426, + "kl_loss": 0.26862847805023193, + "loss_ib": 0.010093907825648785, + "step": 1550 + }, + { + "ce_ib": 5.6089396476745605, + "ce_orig": 0.9210013747215271, + "epoch": 0.4457545474153426, + "kl_loss": 0.294098824262619, + "loss_ib": 0.008549927733838558, + "step": 1550 + }, + { + "ce_ib": 8.03560733795166, + "ce_orig": 0.9753695130348206, + "epoch": 0.4460421309943202, + "kl_loss": 0.23451608419418335, + "loss_ib": 0.010380768217146397, + "step": 1551 + }, + { + "ce_ib": 5.776275157928467, + "ce_orig": 0.8842979073524475, + "epoch": 0.4460421309943202, + "kl_loss": 0.2639259099960327, + "loss_ib": 0.008415534161031246, + "step": 1551 + }, + { + "ce_ib": 3.086613655090332, + "ce_orig": 0.5480087995529175, + "epoch": 0.4460421309943202, + "kl_loss": 0.23094633221626282, + "loss_ib": 0.0053960769437253475, + "step": 1551 + }, + { + "ce_ib": 3.417895793914795, + "ce_orig": 0.6882546544075012, + "epoch": 0.4460421309943202, + "kl_loss": 0.2075841724872589, + "loss_ib": 0.005493737291544676, + "step": 1551 + }, + { + "ce_ib": 5.6718668937683105, + "ce_orig": 0.9152762293815613, + "epoch": 0.44632971457329784, + "kl_loss": 0.31601691246032715, + "loss_ib": 0.008832036517560482, + "step": 1552 + }, + { + "ce_ib": 6.148171424865723, + "ce_orig": 0.9496646523475647, + "epoch": 0.44632971457329784, + "kl_loss": 0.17234985530376434, + "loss_ib": 0.007871669717133045, + "step": 1552 + }, + { + "ce_ib": 3.8803837299346924, + "ce_orig": 0.5967508554458618, + "epoch": 0.44632971457329784, + "kl_loss": 0.2288057506084442, + "loss_ib": 0.006168440915644169, + "step": 1552 + }, + { + "ce_ib": 2.663280963897705, + "ce_orig": 0.2842079997062683, + "epoch": 0.44632971457329784, + "kl_loss": 0.4749584197998047, + "loss_ib": 0.007412864826619625, + "step": 1552 + }, + { + "ce_ib": 6.659852504730225, + "ce_orig": 1.2880715131759644, + "epoch": 0.4466172981522755, + "kl_loss": 0.2627614736557007, + "loss_ib": 0.00928746722638607, + "step": 1553 + }, + { + "ce_ib": 5.383847236633301, + "ce_orig": 0.7564171552658081, + "epoch": 0.4466172981522755, + "kl_loss": 0.2418477088212967, + "loss_ib": 0.007802323903888464, + "step": 1553 + }, + { + "ce_ib": 8.930891990661621, + "ce_orig": 1.424485206604004, + "epoch": 0.4466172981522755, + "kl_loss": 0.23407617211341858, + "loss_ib": 0.011271653696894646, + "step": 1553 + }, + { + "ce_ib": 6.906391143798828, + "ce_orig": 0.9523347020149231, + "epoch": 0.4466172981522755, + "kl_loss": 0.27365630865097046, + "loss_ib": 0.009642953984439373, + "step": 1553 + }, + { + "ce_ib": 10.713818550109863, + "ce_orig": 1.5967822074890137, + "epoch": 0.44690488173125315, + "kl_loss": 0.19433662295341492, + "loss_ib": 0.012657185085117817, + "step": 1554 + }, + { + "ce_ib": 3.7548916339874268, + "ce_orig": 0.8045800924301147, + "epoch": 0.44690488173125315, + "kl_loss": 0.13913817703723907, + "loss_ib": 0.005146273411810398, + "step": 1554 + }, + { + "ce_ib": 5.803292751312256, + "ce_orig": 0.6776520013809204, + "epoch": 0.44690488173125315, + "kl_loss": 0.26821577548980713, + "loss_ib": 0.00848545040935278, + "step": 1554 + }, + { + "ce_ib": 3.515928268432617, + "ce_orig": 0.46322593092918396, + "epoch": 0.44690488173125315, + "kl_loss": 0.20348799228668213, + "loss_ib": 0.005550808273255825, + "step": 1554 + }, + { + "epoch": 0.44719246531023077, + "grad_norm": 0.13196228444576263, + "learning_rate": 9.637016776303631e-06, + "loss": 0.9205, + "step": 1555 + }, + { + "ce_ib": 3.699512481689453, + "ce_orig": 0.6298815608024597, + "epoch": 0.44719246531023077, + "kl_loss": 0.18475520610809326, + "loss_ib": 0.005547064356505871, + "step": 1555 + }, + { + "ce_ib": 3.959015369415283, + "ce_orig": 0.7226744294166565, + "epoch": 0.44719246531023077, + "kl_loss": 0.2643928825855255, + "loss_ib": 0.006602943874895573, + "step": 1555 + }, + { + "ce_ib": 7.5745110511779785, + "ce_orig": 0.3859376311302185, + "epoch": 0.44719246531023077, + "kl_loss": 0.7596704959869385, + "loss_ib": 0.015171214938163757, + "step": 1555 + }, + { + "ce_ib": 6.784470558166504, + "ce_orig": 0.9845409989356995, + "epoch": 0.44719246531023077, + "kl_loss": 0.14599010348320007, + "loss_ib": 0.00824437104165554, + "step": 1555 + }, + { + "ce_ib": 5.580190658569336, + "ce_orig": 0.6907637119293213, + "epoch": 0.44748004888920845, + "kl_loss": 0.3499740958213806, + "loss_ib": 0.009079932235181332, + "step": 1556 + }, + { + "ce_ib": 8.40283203125, + "ce_orig": 1.2569537162780762, + "epoch": 0.44748004888920845, + "kl_loss": 0.19355545938014984, + "loss_ib": 0.01033838652074337, + "step": 1556 + }, + { + "ce_ib": 5.298177242279053, + "ce_orig": 0.5841009616851807, + "epoch": 0.44748004888920845, + "kl_loss": 0.31074339151382446, + "loss_ib": 0.008405610918998718, + "step": 1556 + }, + { + "ce_ib": 4.919023036956787, + "ce_orig": 0.9556659460067749, + "epoch": 0.44748004888920845, + "kl_loss": 0.23168615996837616, + "loss_ib": 0.007235884666442871, + "step": 1556 + }, + { + "ce_ib": 7.061880111694336, + "ce_orig": 0.8365444540977478, + "epoch": 0.44776763246818607, + "kl_loss": 0.38870474696159363, + "loss_ib": 0.010948927141726017, + "step": 1557 + }, + { + "ce_ib": 5.572457790374756, + "ce_orig": 0.6920515298843384, + "epoch": 0.44776763246818607, + "kl_loss": 0.22817905247211456, + "loss_ib": 0.007854248397052288, + "step": 1557 + }, + { + "ce_ib": 3.7289929389953613, + "ce_orig": 0.545609176158905, + "epoch": 0.44776763246818607, + "kl_loss": 0.2949768900871277, + "loss_ib": 0.006678761914372444, + "step": 1557 + }, + { + "ce_ib": 3.3400137424468994, + "ce_orig": 0.5780962109565735, + "epoch": 0.44776763246818607, + "kl_loss": 0.14108021557331085, + "loss_ib": 0.004750816151499748, + "step": 1557 + }, + { + "ce_ib": 6.065938472747803, + "ce_orig": 1.059144139289856, + "epoch": 0.4480552160471637, + "kl_loss": 0.26288458704948425, + "loss_ib": 0.008694784715771675, + "step": 1558 + }, + { + "ce_ib": 8.30125617980957, + "ce_orig": 1.3371621370315552, + "epoch": 0.4480552160471637, + "kl_loss": 0.36227551102638245, + "loss_ib": 0.011924011632800102, + "step": 1558 + }, + { + "ce_ib": 4.027812957763672, + "ce_orig": 0.3836546838283539, + "epoch": 0.4480552160471637, + "kl_loss": 0.5240738987922668, + "loss_ib": 0.00926855206489563, + "step": 1558 + }, + { + "ce_ib": 8.322689056396484, + "ce_orig": 1.4534698724746704, + "epoch": 0.4480552160471637, + "kl_loss": 0.21685175597667694, + "loss_ib": 0.01049120631068945, + "step": 1558 + }, + { + "ce_ib": 4.906183242797852, + "ce_orig": 0.7980424761772156, + "epoch": 0.4483427996261414, + "kl_loss": 0.1907358318567276, + "loss_ib": 0.006813541520386934, + "step": 1559 + }, + { + "ce_ib": 6.262854099273682, + "ce_orig": 0.8929293155670166, + "epoch": 0.4483427996261414, + "kl_loss": 0.21179665625095367, + "loss_ib": 0.008380820974707603, + "step": 1559 + }, + { + "ce_ib": 6.7866621017456055, + "ce_orig": 0.6150768995285034, + "epoch": 0.4483427996261414, + "kl_loss": 0.31106656789779663, + "loss_ib": 0.009897327981889248, + "step": 1559 + }, + { + "ce_ib": 4.801574230194092, + "ce_orig": 0.6632340550422668, + "epoch": 0.4483427996261414, + "kl_loss": 0.4509941637516022, + "loss_ib": 0.009311515837907791, + "step": 1559 + }, + { + "epoch": 0.448630383205119, + "grad_norm": 0.12390803545713425, + "learning_rate": 9.634108145435665e-06, + "loss": 0.8429, + "step": 1560 + }, + { + "ce_ib": 7.607968807220459, + "ce_orig": 0.994629442691803, + "epoch": 0.448630383205119, + "kl_loss": 0.21743838489055634, + "loss_ib": 0.009782352484762669, + "step": 1560 + }, + { + "ce_ib": 8.091766357421875, + "ce_orig": 1.0472888946533203, + "epoch": 0.448630383205119, + "kl_loss": 0.18147239089012146, + "loss_ib": 0.00990648940205574, + "step": 1560 + }, + { + "ce_ib": 6.3520827293396, + "ce_orig": 1.1672568321228027, + "epoch": 0.448630383205119, + "kl_loss": 0.341322124004364, + "loss_ib": 0.009765303693711758, + "step": 1560 + }, + { + "ce_ib": 6.405336380004883, + "ce_orig": 1.0733023881912231, + "epoch": 0.448630383205119, + "kl_loss": 0.21727420389652252, + "loss_ib": 0.008578077889978886, + "step": 1560 + }, + { + "ce_ib": 5.873064994812012, + "ce_orig": 0.9369548559188843, + "epoch": 0.4489179667840966, + "kl_loss": 0.2877747416496277, + "loss_ib": 0.008750812150537968, + "step": 1561 + }, + { + "ce_ib": 7.807909965515137, + "ce_orig": 1.4815807342529297, + "epoch": 0.4489179667840966, + "kl_loss": 0.18937669694423676, + "loss_ib": 0.009701676666736603, + "step": 1561 + }, + { + "ce_ib": 4.973557472229004, + "ce_orig": 0.647799015045166, + "epoch": 0.4489179667840966, + "kl_loss": 0.2897361218929291, + "loss_ib": 0.00787091813981533, + "step": 1561 + }, + { + "ce_ib": 3.7155158519744873, + "ce_orig": 0.5365419387817383, + "epoch": 0.4489179667840966, + "kl_loss": 0.235978901386261, + "loss_ib": 0.006075304467231035, + "step": 1561 + }, + { + "ce_ib": 4.31659460067749, + "ce_orig": 0.5963836312294006, + "epoch": 0.44920555036307425, + "kl_loss": 0.21188724040985107, + "loss_ib": 0.006435466930270195, + "step": 1562 + }, + { + "ce_ib": 7.9532790184021, + "ce_orig": 0.521329939365387, + "epoch": 0.44920555036307425, + "kl_loss": 0.22407691180706024, + "loss_ib": 0.01019404735416174, + "step": 1562 + }, + { + "ce_ib": 7.27313232421875, + "ce_orig": 0.788743793964386, + "epoch": 0.44920555036307425, + "kl_loss": 0.23372234404087067, + "loss_ib": 0.009610354900360107, + "step": 1562 + }, + { + "ce_ib": 6.345553398132324, + "ce_orig": 1.1143635511398315, + "epoch": 0.44920555036307425, + "kl_loss": 0.24764125049114227, + "loss_ib": 0.008821966126561165, + "step": 1562 + }, + { + "ce_ib": 7.859414577484131, + "ce_orig": 1.0159443616867065, + "epoch": 0.4494931339420519, + "kl_loss": 0.23846226930618286, + "loss_ib": 0.010244037955999374, + "step": 1563 + }, + { + "ce_ib": 7.57835054397583, + "ce_orig": 1.1461509466171265, + "epoch": 0.4494931339420519, + "kl_loss": 0.22977961599826813, + "loss_ib": 0.009876146912574768, + "step": 1563 + }, + { + "ce_ib": 7.633553504943848, + "ce_orig": 1.1035288572311401, + "epoch": 0.4494931339420519, + "kl_loss": 0.22606161236763, + "loss_ib": 0.009894168935716152, + "step": 1563 + }, + { + "ce_ib": 8.209954261779785, + "ce_orig": 1.4839401245117188, + "epoch": 0.4494931339420519, + "kl_loss": 0.2907108664512634, + "loss_ib": 0.011117062531411648, + "step": 1563 + }, + { + "ce_ib": 4.617456912994385, + "ce_orig": 0.6988782286643982, + "epoch": 0.44978071752102955, + "kl_loss": 0.1478702425956726, + "loss_ib": 0.006096159107983112, + "step": 1564 + }, + { + "ce_ib": 7.396731376647949, + "ce_orig": 1.0505073070526123, + "epoch": 0.44978071752102955, + "kl_loss": 0.26244524121284485, + "loss_ib": 0.010021183639764786, + "step": 1564 + }, + { + "ce_ib": 7.378440856933594, + "ce_orig": 0.49798375368118286, + "epoch": 0.44978071752102955, + "kl_loss": 0.23360861837863922, + "loss_ib": 0.009714527055621147, + "step": 1564 + }, + { + "ce_ib": 4.991917610168457, + "ce_orig": 0.3777581453323364, + "epoch": 0.44978071752102955, + "kl_loss": 0.28184863924980164, + "loss_ib": 0.007810404058545828, + "step": 1564 + }, + { + "epoch": 0.4500683011000072, + "grad_norm": 0.10581698268651962, + "learning_rate": 9.63118834992558e-06, + "loss": 0.889, + "step": 1565 + }, + { + "ce_ib": 8.230337142944336, + "ce_orig": 1.355285882949829, + "epoch": 0.4500683011000072, + "kl_loss": 0.2831115126609802, + "loss_ib": 0.011061452329158783, + "step": 1565 + }, + { + "ce_ib": 6.374688148498535, + "ce_orig": 0.9068766236305237, + "epoch": 0.4500683011000072, + "kl_loss": 0.1812073290348053, + "loss_ib": 0.008186761289834976, + "step": 1565 + }, + { + "ce_ib": 7.544388771057129, + "ce_orig": 0.9622206091880798, + "epoch": 0.4500683011000072, + "kl_loss": 0.2278764247894287, + "loss_ib": 0.009823152795433998, + "step": 1565 + }, + { + "ce_ib": 4.876921653747559, + "ce_orig": 0.6613551378250122, + "epoch": 0.4500683011000072, + "kl_loss": 0.2815976142883301, + "loss_ib": 0.007692897692322731, + "step": 1565 + }, + { + "ce_ib": 4.218951225280762, + "ce_orig": 0.6239345669746399, + "epoch": 0.45035588467898485, + "kl_loss": 0.17060774564743042, + "loss_ib": 0.005925028119236231, + "step": 1566 + }, + { + "ce_ib": 3.829336643218994, + "ce_orig": 0.6506801843643188, + "epoch": 0.45035588467898485, + "kl_loss": 0.1707981675863266, + "loss_ib": 0.005537318531423807, + "step": 1566 + }, + { + "ce_ib": 7.071207046508789, + "ce_orig": 1.1898292303085327, + "epoch": 0.45035588467898485, + "kl_loss": 0.27813827991485596, + "loss_ib": 0.009852590039372444, + "step": 1566 + }, + { + "ce_ib": 5.183157920837402, + "ce_orig": 0.547346293926239, + "epoch": 0.45035588467898485, + "kl_loss": 0.3790608048439026, + "loss_ib": 0.00897376611828804, + "step": 1566 + }, + { + "ce_ib": 6.814054489135742, + "ce_orig": 1.0130473375320435, + "epoch": 0.4506434682579625, + "kl_loss": 0.25653523206710815, + "loss_ib": 0.00937940739095211, + "step": 1567 + }, + { + "ce_ib": 5.125803470611572, + "ce_orig": 0.594994306564331, + "epoch": 0.4506434682579625, + "kl_loss": 0.159761443734169, + "loss_ib": 0.006723417434841394, + "step": 1567 + }, + { + "ce_ib": 10.355138778686523, + "ce_orig": 1.708457589149475, + "epoch": 0.4506434682579625, + "kl_loss": 0.22042052447795868, + "loss_ib": 0.012559343129396439, + "step": 1567 + }, + { + "ce_ib": 5.936809539794922, + "ce_orig": 0.6713258028030396, + "epoch": 0.4506434682579625, + "kl_loss": 0.21336688101291656, + "loss_ib": 0.008070478215813637, + "step": 1567 + }, + { + "ce_ib": 4.86829137802124, + "ce_orig": 0.9267745018005371, + "epoch": 0.4509310518369401, + "kl_loss": 0.3055550456047058, + "loss_ib": 0.00792384147644043, + "step": 1568 + }, + { + "ce_ib": 7.262491703033447, + "ce_orig": 0.9000452160835266, + "epoch": 0.4509310518369401, + "kl_loss": 0.21604710817337036, + "loss_ib": 0.009422962553799152, + "step": 1568 + }, + { + "ce_ib": 6.170190334320068, + "ce_orig": 0.7533259391784668, + "epoch": 0.4509310518369401, + "kl_loss": 0.43113571405410767, + "loss_ib": 0.010481548495590687, + "step": 1568 + }, + { + "ce_ib": 9.377881050109863, + "ce_orig": 0.8980697393417358, + "epoch": 0.4509310518369401, + "kl_loss": 0.21846041083335876, + "loss_ib": 0.011562485247850418, + "step": 1568 + }, + { + "ce_ib": 9.08447551727295, + "ce_orig": 1.786766529083252, + "epoch": 0.4512186354159177, + "kl_loss": 0.26060664653778076, + "loss_ib": 0.011690542101860046, + "step": 1569 + }, + { + "ce_ib": 5.312047481536865, + "ce_orig": 0.864721417427063, + "epoch": 0.4512186354159177, + "kl_loss": 0.23503050208091736, + "loss_ib": 0.0076623521745204926, + "step": 1569 + }, + { + "ce_ib": 4.463824272155762, + "ce_orig": 0.4612298011779785, + "epoch": 0.4512186354159177, + "kl_loss": 0.374639093875885, + "loss_ib": 0.008210215717554092, + "step": 1569 + }, + { + "ce_ib": 5.714632034301758, + "ce_orig": 0.946444034576416, + "epoch": 0.4512186354159177, + "kl_loss": 0.2866571545600891, + "loss_ib": 0.008581203408539295, + "step": 1569 + }, + { + "epoch": 0.4515062189948954, + "grad_norm": 0.11883459240198135, + "learning_rate": 9.628257396807837e-06, + "loss": 0.9178, + "step": 1570 + }, + { + "ce_ib": 6.601624965667725, + "ce_orig": 1.0058202743530273, + "epoch": 0.4515062189948954, + "kl_loss": 0.3906886577606201, + "loss_ib": 0.01050851121544838, + "step": 1570 + }, + { + "ce_ib": 5.223203182220459, + "ce_orig": 0.8138896822929382, + "epoch": 0.4515062189948954, + "kl_loss": 0.3764778971672058, + "loss_ib": 0.00898798182606697, + "step": 1570 + }, + { + "ce_ib": 6.802353382110596, + "ce_orig": 0.9874704480171204, + "epoch": 0.4515062189948954, + "kl_loss": 0.1776190996170044, + "loss_ib": 0.008578544482588768, + "step": 1570 + }, + { + "ce_ib": 7.111766338348389, + "ce_orig": 1.030307650566101, + "epoch": 0.4515062189948954, + "kl_loss": 0.22811469435691833, + "loss_ib": 0.009392913430929184, + "step": 1570 + }, + { + "ce_ib": 5.195444583892822, + "ce_orig": 0.5673562288284302, + "epoch": 0.451793802573873, + "kl_loss": 0.3687005043029785, + "loss_ib": 0.008882449939846992, + "step": 1571 + }, + { + "ce_ib": 7.545969009399414, + "ce_orig": 0.8662387728691101, + "epoch": 0.451793802573873, + "kl_loss": 0.21740826964378357, + "loss_ib": 0.009720050729811192, + "step": 1571 + }, + { + "ce_ib": 5.633011341094971, + "ce_orig": 0.9099085927009583, + "epoch": 0.451793802573873, + "kl_loss": 0.17175164818763733, + "loss_ib": 0.0073505281470716, + "step": 1571 + }, + { + "ce_ib": 4.424170970916748, + "ce_orig": 0.40657246112823486, + "epoch": 0.451793802573873, + "kl_loss": 0.3058871030807495, + "loss_ib": 0.007483042310923338, + "step": 1571 + }, + { + "ce_ib": 4.206382751464844, + "ce_orig": 0.3294968605041504, + "epoch": 0.45208138615285065, + "kl_loss": 0.1678428202867508, + "loss_ib": 0.005884811282157898, + "step": 1572 + }, + { + "ce_ib": 6.239657878875732, + "ce_orig": 0.9700036644935608, + "epoch": 0.45208138615285065, + "kl_loss": 0.269490122795105, + "loss_ib": 0.008934559300541878, + "step": 1572 + }, + { + "ce_ib": 4.88264274597168, + "ce_orig": 0.7675372958183289, + "epoch": 0.45208138615285065, + "kl_loss": 0.14468610286712646, + "loss_ib": 0.006329504307359457, + "step": 1572 + }, + { + "ce_ib": 6.80122184753418, + "ce_orig": 1.0441501140594482, + "epoch": 0.45208138615285065, + "kl_loss": 0.1825808882713318, + "loss_ib": 0.008627030998468399, + "step": 1572 + }, + { + "ce_ib": 3.62424373626709, + "ce_orig": 0.5604528784751892, + "epoch": 0.45236896973182833, + "kl_loss": 0.2947345972061157, + "loss_ib": 0.006571589503437281, + "step": 1573 + }, + { + "ce_ib": 7.601794719696045, + "ce_orig": 1.3156349658966064, + "epoch": 0.45236896973182833, + "kl_loss": 0.29263508319854736, + "loss_ib": 0.010528144426643848, + "step": 1573 + }, + { + "ce_ib": 9.446778297424316, + "ce_orig": 1.2505724430084229, + "epoch": 0.45236896973182833, + "kl_loss": 0.7459277510643005, + "loss_ib": 0.01690605655312538, + "step": 1573 + }, + { + "ce_ib": 5.411386966705322, + "ce_orig": 0.655278742313385, + "epoch": 0.45236896973182833, + "kl_loss": 0.311574250459671, + "loss_ib": 0.008527129888534546, + "step": 1573 + }, + { + "ce_ib": 4.58731746673584, + "ce_orig": 0.5685978531837463, + "epoch": 0.45265655331080595, + "kl_loss": 0.4015287756919861, + "loss_ib": 0.008602604269981384, + "step": 1574 + }, + { + "ce_ib": 4.962559223175049, + "ce_orig": 0.7091254591941833, + "epoch": 0.45265655331080595, + "kl_loss": 0.2449083924293518, + "loss_ib": 0.00741164293140173, + "step": 1574 + }, + { + "ce_ib": 7.383504390716553, + "ce_orig": 0.6441636681556702, + "epoch": 0.45265655331080595, + "kl_loss": 0.3001161813735962, + "loss_ib": 0.010384666733443737, + "step": 1574 + }, + { + "ce_ib": 6.909374713897705, + "ce_orig": 1.234971523284912, + "epoch": 0.45265655331080595, + "kl_loss": 0.3168690502643585, + "loss_ib": 0.010078065097332, + "step": 1574 + }, + { + "epoch": 0.4529441368897836, + "grad_norm": 0.10603757202625275, + "learning_rate": 9.625315293143782e-06, + "loss": 0.9032, + "step": 1575 + }, + { + "ce_ib": 4.061898708343506, + "ce_orig": 0.45858651399612427, + "epoch": 0.4529441368897836, + "kl_loss": 0.32226306200027466, + "loss_ib": 0.0072845290414988995, + "step": 1575 + }, + { + "ce_ib": 3.875119209289551, + "ce_orig": 0.8682102560997009, + "epoch": 0.4529441368897836, + "kl_loss": 0.19093768298625946, + "loss_ib": 0.005784495733678341, + "step": 1575 + }, + { + "ce_ib": 8.514786720275879, + "ce_orig": 1.2168067693710327, + "epoch": 0.4529441368897836, + "kl_loss": 0.1767929196357727, + "loss_ib": 0.01028271671384573, + "step": 1575 + }, + { + "ce_ib": 6.604011058807373, + "ce_orig": 0.7284548282623291, + "epoch": 0.4529441368897836, + "kl_loss": 0.260367751121521, + "loss_ib": 0.009207688271999359, + "step": 1575 + }, + { + "ce_ib": 5.733308792114258, + "ce_orig": 0.9140332341194153, + "epoch": 0.45323172046876126, + "kl_loss": 0.25994765758514404, + "loss_ib": 0.008332785218954086, + "step": 1576 + }, + { + "ce_ib": 4.547566890716553, + "ce_orig": 0.5950409770011902, + "epoch": 0.45323172046876126, + "kl_loss": 0.3094450831413269, + "loss_ib": 0.007642017211765051, + "step": 1576 + }, + { + "ce_ib": 2.6981751918792725, + "ce_orig": 0.3503713011741638, + "epoch": 0.45323172046876126, + "kl_loss": 0.4907337427139282, + "loss_ib": 0.007605512626469135, + "step": 1576 + }, + { + "ce_ib": 6.769986629486084, + "ce_orig": 0.7730137705802917, + "epoch": 0.45323172046876126, + "kl_loss": 0.20853619277477264, + "loss_ib": 0.008855348452925682, + "step": 1576 + }, + { + "ce_ib": 4.362432956695557, + "ce_orig": 0.5568724870681763, + "epoch": 0.4535193040477389, + "kl_loss": 0.28733065724372864, + "loss_ib": 0.007235738914459944, + "step": 1577 + }, + { + "ce_ib": 4.170414924621582, + "ce_orig": 0.7116380333900452, + "epoch": 0.4535193040477389, + "kl_loss": 0.17540094256401062, + "loss_ib": 0.00592442462220788, + "step": 1577 + }, + { + "ce_ib": 3.8414411544799805, + "ce_orig": 0.5860041975975037, + "epoch": 0.4535193040477389, + "kl_loss": 0.5004762411117554, + "loss_ib": 0.008846203796565533, + "step": 1577 + }, + { + "ce_ib": 4.819023609161377, + "ce_orig": 0.4309028089046478, + "epoch": 0.4535193040477389, + "kl_loss": 0.3439497947692871, + "loss_ib": 0.008258521556854248, + "step": 1577 + }, + { + "ce_ib": 4.089813232421875, + "ce_orig": 0.6745030283927917, + "epoch": 0.4538068876267165, + "kl_loss": 0.18524643778800964, + "loss_ib": 0.0059422776103019714, + "step": 1578 + }, + { + "ce_ib": 5.1730451583862305, + "ce_orig": 0.779570996761322, + "epoch": 0.4538068876267165, + "kl_loss": 0.14864078164100647, + "loss_ib": 0.0066594528034329414, + "step": 1578 + }, + { + "ce_ib": 6.661314964294434, + "ce_orig": 1.013376235961914, + "epoch": 0.4538068876267165, + "kl_loss": 0.20308473706245422, + "loss_ib": 0.008692162111401558, + "step": 1578 + }, + { + "ce_ib": 7.177057266235352, + "ce_orig": 0.7783916592597961, + "epoch": 0.4538068876267165, + "kl_loss": 0.2335265874862671, + "loss_ib": 0.009512322954833508, + "step": 1578 + }, + { + "ce_ib": 3.965541362762451, + "ce_orig": 0.7133391499519348, + "epoch": 0.45409447120569413, + "kl_loss": 0.23369936645030975, + "loss_ib": 0.006302534602582455, + "step": 1579 + }, + { + "ce_ib": 4.178727626800537, + "ce_orig": 0.6237984895706177, + "epoch": 0.45409447120569413, + "kl_loss": 0.20310792326927185, + "loss_ib": 0.00620980653911829, + "step": 1579 + }, + { + "ce_ib": 5.857765197753906, + "ce_orig": 0.7854532599449158, + "epoch": 0.45409447120569413, + "kl_loss": 0.21422770619392395, + "loss_ib": 0.008000042289495468, + "step": 1579 + }, + { + "ce_ib": 7.018479347229004, + "ce_orig": 0.9364678263664246, + "epoch": 0.45409447120569413, + "kl_loss": 0.2819264531135559, + "loss_ib": 0.009837743826210499, + "step": 1579 + }, + { + "epoch": 0.4543820547846718, + "grad_norm": 0.12139754742383957, + "learning_rate": 9.62236204602163e-06, + "loss": 0.8779, + "step": 1580 + }, + { + "ce_ib": 7.709521293640137, + "ce_orig": 0.9983739256858826, + "epoch": 0.4543820547846718, + "kl_loss": 0.2801150381565094, + "loss_ib": 0.010510671883821487, + "step": 1580 + }, + { + "ce_ib": 8.958810806274414, + "ce_orig": 1.4371901750564575, + "epoch": 0.4543820547846718, + "kl_loss": 0.3038935661315918, + "loss_ib": 0.011997747235000134, + "step": 1580 + }, + { + "ce_ib": 5.254581451416016, + "ce_orig": 0.8143784403800964, + "epoch": 0.4543820547846718, + "kl_loss": 0.28170040249824524, + "loss_ib": 0.008071585558354855, + "step": 1580 + }, + { + "ce_ib": 4.93033504486084, + "ce_orig": 0.5638120770454407, + "epoch": 0.4543820547846718, + "kl_loss": 0.28069180250167847, + "loss_ib": 0.0077372523956000805, + "step": 1580 + }, + { + "ce_ib": 6.513678550720215, + "ce_orig": 0.8502893447875977, + "epoch": 0.45466963836364943, + "kl_loss": 0.23893187940120697, + "loss_ib": 0.00890299677848816, + "step": 1581 + }, + { + "ce_ib": 4.787106037139893, + "ce_orig": 0.8497691750526428, + "epoch": 0.45466963836364943, + "kl_loss": 0.20702172815799713, + "loss_ib": 0.006857323460280895, + "step": 1581 + }, + { + "ce_ib": 7.873937606811523, + "ce_orig": 1.1831961870193481, + "epoch": 0.45466963836364943, + "kl_loss": 0.28345048427581787, + "loss_ib": 0.010708441957831383, + "step": 1581 + }, + { + "ce_ib": 4.772539138793945, + "ce_orig": 0.680529773235321, + "epoch": 0.45466963836364943, + "kl_loss": 0.20636993646621704, + "loss_ib": 0.006836238317191601, + "step": 1581 + }, + { + "ce_ib": 4.1148223876953125, + "ce_orig": 0.641548752784729, + "epoch": 0.45495722194262705, + "kl_loss": 0.19606587290763855, + "loss_ib": 0.006075480952858925, + "step": 1582 + }, + { + "ce_ib": 6.089351654052734, + "ce_orig": 0.7535414695739746, + "epoch": 0.45495722194262705, + "kl_loss": 0.1836378574371338, + "loss_ib": 0.00792573019862175, + "step": 1582 + }, + { + "ce_ib": 6.302079200744629, + "ce_orig": 1.0797772407531738, + "epoch": 0.45495722194262705, + "kl_loss": 0.7078266143798828, + "loss_ib": 0.01338034588843584, + "step": 1582 + }, + { + "ce_ib": 4.168867588043213, + "ce_orig": 0.7152249217033386, + "epoch": 0.45495722194262705, + "kl_loss": 0.1736205816268921, + "loss_ib": 0.00590507360175252, + "step": 1582 + }, + { + "ce_ib": 3.9457499980926514, + "ce_orig": 0.6125537753105164, + "epoch": 0.45524480552160473, + "kl_loss": 0.1671469509601593, + "loss_ib": 0.005617219023406506, + "step": 1583 + }, + { + "ce_ib": 3.6192708015441895, + "ce_orig": 0.608466386795044, + "epoch": 0.45524480552160473, + "kl_loss": 0.2092287540435791, + "loss_ib": 0.005711558274924755, + "step": 1583 + }, + { + "ce_ib": 6.533486843109131, + "ce_orig": 0.9287842512130737, + "epoch": 0.45524480552160473, + "kl_loss": 0.21517397463321686, + "loss_ib": 0.008685226552188396, + "step": 1583 + }, + { + "ce_ib": 6.803183078765869, + "ce_orig": 0.9185959696769714, + "epoch": 0.45524480552160473, + "kl_loss": 0.27622342109680176, + "loss_ib": 0.009565416723489761, + "step": 1583 + }, + { + "ce_ib": 4.511726379394531, + "ce_orig": 0.7662067413330078, + "epoch": 0.45553238910058236, + "kl_loss": 0.17484962940216064, + "loss_ib": 0.006260222755372524, + "step": 1584 + }, + { + "ce_ib": 7.601507663726807, + "ce_orig": 0.9514760971069336, + "epoch": 0.45553238910058236, + "kl_loss": 0.25499165058135986, + "loss_ib": 0.010151424445211887, + "step": 1584 + }, + { + "ce_ib": 3.7975528240203857, + "ce_orig": 0.6705338358879089, + "epoch": 0.45553238910058236, + "kl_loss": 0.24700944125652313, + "loss_ib": 0.0062676467932760715, + "step": 1584 + }, + { + "ce_ib": 4.939420223236084, + "ce_orig": 0.8580985069274902, + "epoch": 0.45553238910058236, + "kl_loss": 0.40102115273475647, + "loss_ib": 0.00894963089376688, + "step": 1584 + }, + { + "epoch": 0.45581997267956, + "grad_norm": 0.12361612915992737, + "learning_rate": 9.619397662556434e-06, + "loss": 0.8915, + "step": 1585 + }, + { + "ce_ib": 4.5925374031066895, + "ce_orig": 0.6823598742485046, + "epoch": 0.45581997267956, + "kl_loss": 0.24436944723129272, + "loss_ib": 0.007036231458187103, + "step": 1585 + }, + { + "ce_ib": 6.4787092208862305, + "ce_orig": 1.0821856260299683, + "epoch": 0.45581997267956, + "kl_loss": 0.22065825760364532, + "loss_ib": 0.00868529174476862, + "step": 1585 + }, + { + "ce_ib": 5.234336853027344, + "ce_orig": 0.5106269717216492, + "epoch": 0.45581997267956, + "kl_loss": 0.4164009094238281, + "loss_ib": 0.009398345835506916, + "step": 1585 + }, + { + "ce_ib": 3.9497246742248535, + "ce_orig": 0.601236879825592, + "epoch": 0.45581997267956, + "kl_loss": 0.2019076943397522, + "loss_ib": 0.00596880167722702, + "step": 1585 + }, + { + "ce_ib": 5.286367893218994, + "ce_orig": 0.8565965294837952, + "epoch": 0.45610755625853766, + "kl_loss": 0.2048177868127823, + "loss_ib": 0.007334545720368624, + "step": 1586 + }, + { + "ce_ib": 3.798523187637329, + "ce_orig": 0.5074141025543213, + "epoch": 0.45610755625853766, + "kl_loss": 0.26109427213668823, + "loss_ib": 0.00640946626663208, + "step": 1586 + }, + { + "ce_ib": 6.737989902496338, + "ce_orig": 1.062273383140564, + "epoch": 0.45610755625853766, + "kl_loss": 0.2507803738117218, + "loss_ib": 0.009245793335139751, + "step": 1586 + }, + { + "ce_ib": 7.858916282653809, + "ce_orig": 1.1395679712295532, + "epoch": 0.45610755625853766, + "kl_loss": 0.3817332983016968, + "loss_ib": 0.011676249094307423, + "step": 1586 + }, + { + "ce_ib": 5.658448219299316, + "ce_orig": 0.8056834936141968, + "epoch": 0.4563951398375153, + "kl_loss": 0.2785063683986664, + "loss_ib": 0.008443511091172695, + "step": 1587 + }, + { + "ce_ib": 4.108708381652832, + "ce_orig": 0.7508328557014465, + "epoch": 0.4563951398375153, + "kl_loss": 0.161625474691391, + "loss_ib": 0.005724962800741196, + "step": 1587 + }, + { + "ce_ib": 5.145606994628906, + "ce_orig": 0.9987155199050903, + "epoch": 0.4563951398375153, + "kl_loss": 0.214373379945755, + "loss_ib": 0.007289340253919363, + "step": 1587 + }, + { + "ce_ib": 4.941565990447998, + "ce_orig": 0.7044458389282227, + "epoch": 0.4563951398375153, + "kl_loss": 0.17570409178733826, + "loss_ib": 0.006698607001453638, + "step": 1587 + }, + { + "ce_ib": 3.0381317138671875, + "ce_orig": 0.48189476132392883, + "epoch": 0.4566827234164929, + "kl_loss": 0.18399228155612946, + "loss_ib": 0.004878054838627577, + "step": 1588 + }, + { + "ce_ib": 6.867878437042236, + "ce_orig": 0.8533276319503784, + "epoch": 0.4566827234164929, + "kl_loss": 0.23878858983516693, + "loss_ib": 0.009255764074623585, + "step": 1588 + }, + { + "ce_ib": 5.534303665161133, + "ce_orig": 0.5891181826591492, + "epoch": 0.4566827234164929, + "kl_loss": 0.1646348536014557, + "loss_ib": 0.0071806525811553, + "step": 1588 + }, + { + "ce_ib": 4.690039157867432, + "ce_orig": 0.8940519094467163, + "epoch": 0.4566827234164929, + "kl_loss": 0.18715199828147888, + "loss_ib": 0.006561559159308672, + "step": 1588 + }, + { + "ce_ib": 4.361746788024902, + "ce_orig": 0.4742627441883087, + "epoch": 0.45697030699547053, + "kl_loss": 0.3901596665382385, + "loss_ib": 0.008263343013823032, + "step": 1589 + }, + { + "ce_ib": 10.482245445251465, + "ce_orig": 1.0567958354949951, + "epoch": 0.45697030699547053, + "kl_loss": 0.1713293194770813, + "loss_ib": 0.01219553966075182, + "step": 1589 + }, + { + "ce_ib": 7.079917907714844, + "ce_orig": 0.8915940523147583, + "epoch": 0.45697030699547053, + "kl_loss": 0.30659422278404236, + "loss_ib": 0.01014585979282856, + "step": 1589 + }, + { + "ce_ib": 3.8864879608154297, + "ce_orig": 0.5591042637825012, + "epoch": 0.45697030699547053, + "kl_loss": 0.20273935794830322, + "loss_ib": 0.0059138815850019455, + "step": 1589 + }, + { + "epoch": 0.4572578905744482, + "grad_norm": 0.1403963714838028, + "learning_rate": 9.616422149890085e-06, + "loss": 0.896, + "step": 1590 + }, + { + "ce_ib": 4.583006381988525, + "ce_orig": 0.5320500731468201, + "epoch": 0.4572578905744482, + "kl_loss": 0.2249002754688263, + "loss_ib": 0.006832009181380272, + "step": 1590 + }, + { + "ce_ib": 9.813572883605957, + "ce_orig": 1.42888605594635, + "epoch": 0.4572578905744482, + "kl_loss": 0.2696181535720825, + "loss_ib": 0.012509752996265888, + "step": 1590 + }, + { + "ce_ib": 7.4518632888793945, + "ce_orig": 0.5761680603027344, + "epoch": 0.4572578905744482, + "kl_loss": 0.39669954776763916, + "loss_ib": 0.011418858543038368, + "step": 1590 + }, + { + "ce_ib": 6.660532474517822, + "ce_orig": 0.7236970067024231, + "epoch": 0.4572578905744482, + "kl_loss": 0.31353431940078735, + "loss_ib": 0.009795875288546085, + "step": 1590 + }, + { + "ce_ib": 11.518242835998535, + "ce_orig": 1.814514398574829, + "epoch": 0.45754547415342584, + "kl_loss": 0.2373267114162445, + "loss_ib": 0.013891510665416718, + "step": 1591 + }, + { + "ce_ib": 4.618646621704102, + "ce_orig": 0.6165957450866699, + "epoch": 0.45754547415342584, + "kl_loss": 0.18919825553894043, + "loss_ib": 0.006510629318654537, + "step": 1591 + }, + { + "ce_ib": 3.9572300910949707, + "ce_orig": 0.5626340508460999, + "epoch": 0.45754547415342584, + "kl_loss": 0.23723739385604858, + "loss_ib": 0.006329604424536228, + "step": 1591 + }, + { + "ce_ib": 5.463366508483887, + "ce_orig": 0.8892853260040283, + "epoch": 0.45754547415342584, + "kl_loss": 0.27879300713539124, + "loss_ib": 0.008251296356320381, + "step": 1591 + }, + { + "ce_ib": 8.190649032592773, + "ce_orig": 1.2614320516586304, + "epoch": 0.45783305773240346, + "kl_loss": 0.31339144706726074, + "loss_ib": 0.011324563063681126, + "step": 1592 + }, + { + "ce_ib": 4.4856367111206055, + "ce_orig": 0.7876867055892944, + "epoch": 0.45783305773240346, + "kl_loss": 0.18498772382736206, + "loss_ib": 0.006335513666272163, + "step": 1592 + }, + { + "ce_ib": 6.534660339355469, + "ce_orig": 0.8544452786445618, + "epoch": 0.45783305773240346, + "kl_loss": 0.16686499118804932, + "loss_ib": 0.008203309960663319, + "step": 1592 + }, + { + "ce_ib": 6.89320182800293, + "ce_orig": 1.0038596391677856, + "epoch": 0.45783305773240346, + "kl_loss": 0.22854721546173096, + "loss_ib": 0.009178673848509789, + "step": 1592 + }, + { + "ce_ib": 5.936371326446533, + "ce_orig": 0.955289363861084, + "epoch": 0.45812064131138114, + "kl_loss": 0.23201020061969757, + "loss_ib": 0.008256473578512669, + "step": 1593 + }, + { + "ce_ib": 6.370386123657227, + "ce_orig": 1.2829995155334473, + "epoch": 0.45812064131138114, + "kl_loss": 0.17797306180000305, + "loss_ib": 0.008150117471814156, + "step": 1593 + }, + { + "ce_ib": 3.247084856033325, + "ce_orig": 0.7356828451156616, + "epoch": 0.45812064131138114, + "kl_loss": 0.23136037588119507, + "loss_ib": 0.005560688674449921, + "step": 1593 + }, + { + "ce_ib": 5.93695068359375, + "ce_orig": 1.0750383138656616, + "epoch": 0.45812064131138114, + "kl_loss": 0.3686366081237793, + "loss_ib": 0.009623317047953606, + "step": 1593 + }, + { + "ce_ib": 4.900889873504639, + "ce_orig": 0.8093308210372925, + "epoch": 0.45840822489035876, + "kl_loss": 0.2369879186153412, + "loss_ib": 0.007270768750458956, + "step": 1594 + }, + { + "ce_ib": 2.814967393875122, + "ce_orig": 0.43275266885757446, + "epoch": 0.45840822489035876, + "kl_loss": 0.22987329959869385, + "loss_ib": 0.005113700404763222, + "step": 1594 + }, + { + "ce_ib": 5.747320175170898, + "ce_orig": 0.8896006345748901, + "epoch": 0.45840822489035876, + "kl_loss": 0.18795983493328094, + "loss_ib": 0.007626918610185385, + "step": 1594 + }, + { + "ce_ib": 7.953315734863281, + "ce_orig": 0.7957958579063416, + "epoch": 0.45840822489035876, + "kl_loss": 0.2804729640483856, + "loss_ib": 0.010758046060800552, + "step": 1594 + }, + { + "epoch": 0.4586958084693364, + "grad_norm": 0.11645019799470901, + "learning_rate": 9.613435515191282e-06, + "loss": 0.8937, + "step": 1595 + }, + { + "ce_ib": 5.9087347984313965, + "ce_orig": 0.8856273293495178, + "epoch": 0.4586958084693364, + "kl_loss": 0.2484017014503479, + "loss_ib": 0.008392751216888428, + "step": 1595 + }, + { + "ce_ib": 6.337006568908691, + "ce_orig": 1.040190577507019, + "epoch": 0.4586958084693364, + "kl_loss": 0.22671160101890564, + "loss_ib": 0.008604122325778008, + "step": 1595 + }, + { + "ce_ib": 3.9932260513305664, + "ce_orig": 0.5096628665924072, + "epoch": 0.4586958084693364, + "kl_loss": 0.36274319887161255, + "loss_ib": 0.007620657328516245, + "step": 1595 + }, + { + "ce_ib": 6.888410568237305, + "ce_orig": 0.9193194508552551, + "epoch": 0.4586958084693364, + "kl_loss": 0.16623620688915253, + "loss_ib": 0.008550772443413734, + "step": 1595 + }, + { + "ce_ib": 3.571662425994873, + "ce_orig": 0.5151391625404358, + "epoch": 0.45898339204831407, + "kl_loss": 0.17320913076400757, + "loss_ib": 0.005303753539919853, + "step": 1596 + }, + { + "ce_ib": 6.104183197021484, + "ce_orig": 1.0585072040557861, + "epoch": 0.45898339204831407, + "kl_loss": 0.2961203455924988, + "loss_ib": 0.009065386839210987, + "step": 1596 + }, + { + "ce_ib": 6.868567943572998, + "ce_orig": 0.9249302744865417, + "epoch": 0.45898339204831407, + "kl_loss": 0.2994818091392517, + "loss_ib": 0.009863385930657387, + "step": 1596 + }, + { + "ce_ib": 2.8859384059906006, + "ce_orig": 0.6401421427726746, + "epoch": 0.45898339204831407, + "kl_loss": 0.20423877239227295, + "loss_ib": 0.0049283262342214584, + "step": 1596 + }, + { + "ce_ib": 7.507976055145264, + "ce_orig": 1.2500346899032593, + "epoch": 0.4592709756272917, + "kl_loss": 0.18150609731674194, + "loss_ib": 0.00932303722947836, + "step": 1597 + }, + { + "ce_ib": 4.735340118408203, + "ce_orig": 0.6459792852401733, + "epoch": 0.4592709756272917, + "kl_loss": 0.16608083248138428, + "loss_ib": 0.006396147888153791, + "step": 1597 + }, + { + "ce_ib": 5.722679138183594, + "ce_orig": 0.8577017188072205, + "epoch": 0.4592709756272917, + "kl_loss": 0.17896723747253418, + "loss_ib": 0.007512351498007774, + "step": 1597 + }, + { + "ce_ib": 5.09295129776001, + "ce_orig": 0.5127891898155212, + "epoch": 0.4592709756272917, + "kl_loss": 0.22883820533752441, + "loss_ib": 0.007381333503872156, + "step": 1597 + }, + { + "ce_ib": 12.0892333984375, + "ce_orig": 1.6606770753860474, + "epoch": 0.4595585592062693, + "kl_loss": 0.17387744784355164, + "loss_ib": 0.013828007504343987, + "step": 1598 + }, + { + "ce_ib": 3.8233511447906494, + "ce_orig": 0.401006281375885, + "epoch": 0.4595585592062693, + "kl_loss": 0.32604125142097473, + "loss_ib": 0.007083763834089041, + "step": 1598 + }, + { + "ce_ib": 7.2053117752075195, + "ce_orig": 1.3019723892211914, + "epoch": 0.4595585592062693, + "kl_loss": 0.2079797089099884, + "loss_ib": 0.009285109117627144, + "step": 1598 + }, + { + "ce_ib": 6.970457553863525, + "ce_orig": 1.142815113067627, + "epoch": 0.4595585592062693, + "kl_loss": 0.2328367829322815, + "loss_ib": 0.009298824705183506, + "step": 1598 + }, + { + "ce_ib": 6.52714204788208, + "ce_orig": 1.1193588972091675, + "epoch": 0.45984614278524694, + "kl_loss": 0.2331889271736145, + "loss_ib": 0.008859030902385712, + "step": 1599 + }, + { + "ce_ib": 5.672296047210693, + "ce_orig": 0.6065714359283447, + "epoch": 0.45984614278524694, + "kl_loss": 0.2097187489271164, + "loss_ib": 0.007769483607262373, + "step": 1599 + }, + { + "ce_ib": 4.53487491607666, + "ce_orig": 0.8241526484489441, + "epoch": 0.45984614278524694, + "kl_loss": 0.270264208316803, + "loss_ib": 0.007237516343593597, + "step": 1599 + }, + { + "ce_ib": 5.069760322570801, + "ce_orig": 0.7474254965782166, + "epoch": 0.45984614278524694, + "kl_loss": 0.2103268802165985, + "loss_ib": 0.007173029240220785, + "step": 1599 + }, + { + "epoch": 0.4601337263642246, + "grad_norm": 0.10566619038581848, + "learning_rate": 9.610437765655522e-06, + "loss": 0.8697, + "step": 1600 + }, + { + "ce_ib": 10.489505767822266, + "ce_orig": 1.6401379108428955, + "epoch": 0.4601337263642246, + "kl_loss": 0.1997671276330948, + "loss_ib": 0.012487176805734634, + "step": 1600 + }, + { + "ce_ib": 3.287750720977783, + "ce_orig": 0.6354742050170898, + "epoch": 0.4601337263642246, + "kl_loss": 0.2241269201040268, + "loss_ib": 0.005529019515961409, + "step": 1600 + }, + { + "ce_ib": 4.282444000244141, + "ce_orig": 0.4484873116016388, + "epoch": 0.4601337263642246, + "kl_loss": 0.3232642710208893, + "loss_ib": 0.00751508679240942, + "step": 1600 + }, + { + "ce_ib": 7.709043502807617, + "ce_orig": 0.8646803498268127, + "epoch": 0.4601337263642246, + "kl_loss": 0.280630886554718, + "loss_ib": 0.010515352711081505, + "step": 1600 + }, + { + "ce_ib": 6.3130669593811035, + "ce_orig": 1.0754271745681763, + "epoch": 0.46042130994320224, + "kl_loss": 0.24628064036369324, + "loss_ib": 0.008775873109698296, + "step": 1601 + }, + { + "ce_ib": 7.311588287353516, + "ce_orig": 0.664889395236969, + "epoch": 0.46042130994320224, + "kl_loss": 0.2610669732093811, + "loss_ib": 0.009922257624566555, + "step": 1601 + }, + { + "ce_ib": 7.912898540496826, + "ce_orig": 1.17232084274292, + "epoch": 0.46042130994320224, + "kl_loss": 0.22218400239944458, + "loss_ib": 0.010134738869965076, + "step": 1601 + }, + { + "ce_ib": 5.320058822631836, + "ce_orig": 0.6333699226379395, + "epoch": 0.46042130994320224, + "kl_loss": 0.18659427762031555, + "loss_ib": 0.007186001166701317, + "step": 1601 + }, + { + "ce_ib": 7.077117443084717, + "ce_orig": 1.322934627532959, + "epoch": 0.46070889352217986, + "kl_loss": 0.22655072808265686, + "loss_ib": 0.009342624805867672, + "step": 1602 + }, + { + "ce_ib": 8.87541389465332, + "ce_orig": 1.5171864032745361, + "epoch": 0.46070889352217986, + "kl_loss": 0.2778520882129669, + "loss_ib": 0.011653934605419636, + "step": 1602 + }, + { + "ce_ib": 4.750857353210449, + "ce_orig": 0.9474871754646301, + "epoch": 0.46070889352217986, + "kl_loss": 0.19248110055923462, + "loss_ib": 0.0066756680607795715, + "step": 1602 + }, + { + "ce_ib": 0.7055354714393616, + "ce_orig": 0.09127622097730637, + "epoch": 0.46070889352217986, + "kl_loss": 0.49646514654159546, + "loss_ib": 0.005670186597853899, + "step": 1602 + }, + { + "ce_ib": 4.906747341156006, + "ce_orig": 0.6237359046936035, + "epoch": 0.46099647710115754, + "kl_loss": 0.3591931462287903, + "loss_ib": 0.008498678915202618, + "step": 1603 + }, + { + "ce_ib": 5.325519561767578, + "ce_orig": 0.8479997515678406, + "epoch": 0.46099647710115754, + "kl_loss": 0.21136073768138885, + "loss_ib": 0.00743912672623992, + "step": 1603 + }, + { + "ce_ib": 7.966558933258057, + "ce_orig": 0.49447599053382874, + "epoch": 0.46099647710115754, + "kl_loss": 0.23808987438678741, + "loss_ib": 0.010347457602620125, + "step": 1603 + }, + { + "ce_ib": 5.689008712768555, + "ce_orig": 0.8724644184112549, + "epoch": 0.46099647710115754, + "kl_loss": 0.32918137311935425, + "loss_ib": 0.008980822749435902, + "step": 1603 + }, + { + "ce_ib": 5.284266471862793, + "ce_orig": 0.8119617104530334, + "epoch": 0.46128406068013517, + "kl_loss": 0.22637273371219635, + "loss_ib": 0.007547993678599596, + "step": 1604 + }, + { + "ce_ib": 7.932982444763184, + "ce_orig": 1.1257801055908203, + "epoch": 0.46128406068013517, + "kl_loss": 0.33682769536972046, + "loss_ib": 0.011301259510219097, + "step": 1604 + }, + { + "ce_ib": 6.964417934417725, + "ce_orig": 0.6016362309455872, + "epoch": 0.46128406068013517, + "kl_loss": 0.7285957336425781, + "loss_ib": 0.014250376261770725, + "step": 1604 + }, + { + "ce_ib": 10.399383544921875, + "ce_orig": 1.1091629266738892, + "epoch": 0.46128406068013517, + "kl_loss": 0.3269003927707672, + "loss_ib": 0.01366838626563549, + "step": 1604 + }, + { + "epoch": 0.4615716442591128, + "grad_norm": 0.11775634437799454, + "learning_rate": 9.607428908505078e-06, + "loss": 0.871, + "step": 1605 + }, + { + "ce_ib": 7.806272983551025, + "ce_orig": 1.1525717973709106, + "epoch": 0.4615716442591128, + "kl_loss": 0.24559307098388672, + "loss_ib": 0.01026220340281725, + "step": 1605 + }, + { + "ce_ib": 9.258004188537598, + "ce_orig": 0.9235900044441223, + "epoch": 0.4615716442591128, + "kl_loss": 0.21653199195861816, + "loss_ib": 0.01142332423478365, + "step": 1605 + }, + { + "ce_ib": 3.807772397994995, + "ce_orig": 0.501733124256134, + "epoch": 0.4615716442591128, + "kl_loss": 0.21480196714401245, + "loss_ib": 0.005955792032182217, + "step": 1605 + }, + { + "ce_ib": 3.2948741912841797, + "ce_orig": 0.24705025553703308, + "epoch": 0.4615716442591128, + "kl_loss": 0.2047540843486786, + "loss_ib": 0.005342415068298578, + "step": 1605 + }, + { + "ce_ib": 8.07645034790039, + "ce_orig": 0.9673423171043396, + "epoch": 0.46185922783809047, + "kl_loss": 0.18127772212028503, + "loss_ib": 0.009889227338135242, + "step": 1606 + }, + { + "ce_ib": 2.4459357261657715, + "ce_orig": 0.5707719326019287, + "epoch": 0.46185922783809047, + "kl_loss": 0.23185060918331146, + "loss_ib": 0.00476444186642766, + "step": 1606 + }, + { + "ce_ib": 6.811293125152588, + "ce_orig": 0.8880608677864075, + "epoch": 0.46185922783809047, + "kl_loss": 0.2558295428752899, + "loss_ib": 0.00936958845704794, + "step": 1606 + }, + { + "ce_ib": 6.341538429260254, + "ce_orig": 0.83788001537323, + "epoch": 0.46185922783809047, + "kl_loss": 0.2810502052307129, + "loss_ib": 0.009152039885520935, + "step": 1606 + }, + { + "ce_ib": 6.138156890869141, + "ce_orig": 1.0111743211746216, + "epoch": 0.4621468114170681, + "kl_loss": 0.17590323090553284, + "loss_ib": 0.007897189818322659, + "step": 1607 + }, + { + "ce_ib": 5.954296112060547, + "ce_orig": 1.0918947458267212, + "epoch": 0.4621468114170681, + "kl_loss": 0.18631017208099365, + "loss_ib": 0.007817397825419903, + "step": 1607 + }, + { + "ce_ib": 2.8839921951293945, + "ce_orig": 0.4976504147052765, + "epoch": 0.4621468114170681, + "kl_loss": 0.19970375299453735, + "loss_ib": 0.004881029482930899, + "step": 1607 + }, + { + "ce_ib": 6.565066337585449, + "ce_orig": 1.105733036994934, + "epoch": 0.4621468114170681, + "kl_loss": 0.20072929561138153, + "loss_ib": 0.008572359569370747, + "step": 1607 + }, + { + "ce_ib": 4.914196968078613, + "ce_orig": 0.40476492047309875, + "epoch": 0.4624343949960457, + "kl_loss": 0.38428157567977905, + "loss_ib": 0.008757012896239758, + "step": 1608 + }, + { + "ce_ib": 5.936429500579834, + "ce_orig": 0.7984280586242676, + "epoch": 0.4624343949960457, + "kl_loss": 0.31145086884498596, + "loss_ib": 0.009050937369465828, + "step": 1608 + }, + { + "ce_ib": 4.807775497436523, + "ce_orig": 0.776930034160614, + "epoch": 0.4624343949960457, + "kl_loss": 0.18399487435817719, + "loss_ib": 0.006647724192589521, + "step": 1608 + }, + { + "ce_ib": 5.2376322746276855, + "ce_orig": 0.9014029502868652, + "epoch": 0.4624343949960457, + "kl_loss": 0.28456035256385803, + "loss_ib": 0.008083236403763294, + "step": 1608 + }, + { + "ce_ib": 5.227330684661865, + "ce_orig": 0.7392305135726929, + "epoch": 0.46272197857502334, + "kl_loss": 0.26967063546180725, + "loss_ib": 0.007924037054181099, + "step": 1609 + }, + { + "ce_ib": 2.673739194869995, + "ce_orig": 0.509719729423523, + "epoch": 0.46272197857502334, + "kl_loss": 0.16655415296554565, + "loss_ib": 0.004339280538260937, + "step": 1609 + }, + { + "ce_ib": 5.3345046043396, + "ce_orig": 0.8803150653839111, + "epoch": 0.46272197857502334, + "kl_loss": 0.2357148826122284, + "loss_ib": 0.007691653911024332, + "step": 1609 + }, + { + "ce_ib": 6.286981582641602, + "ce_orig": 0.9555469155311584, + "epoch": 0.46272197857502334, + "kl_loss": 0.19689792394638062, + "loss_ib": 0.008255960419774055, + "step": 1609 + }, + { + "epoch": 0.463009562154001, + "grad_norm": 0.10211392492055893, + "learning_rate": 9.604408950988988e-06, + "loss": 0.8694, + "step": 1610 + }, + { + "ce_ib": 6.750776767730713, + "ce_orig": 0.6942508220672607, + "epoch": 0.463009562154001, + "kl_loss": 0.315233051776886, + "loss_ib": 0.009903106838464737, + "step": 1610 + }, + { + "ce_ib": 7.861863613128662, + "ce_orig": 1.380213737487793, + "epoch": 0.463009562154001, + "kl_loss": 0.2642974257469177, + "loss_ib": 0.010504838079214096, + "step": 1610 + }, + { + "ce_ib": 6.023126125335693, + "ce_orig": 0.9096881151199341, + "epoch": 0.463009562154001, + "kl_loss": 0.2661204934120178, + "loss_ib": 0.008684330619871616, + "step": 1610 + }, + { + "ce_ib": 9.524619102478027, + "ce_orig": 1.7194697856903076, + "epoch": 0.463009562154001, + "kl_loss": 0.2726157307624817, + "loss_ib": 0.012250776402652264, + "step": 1610 + }, + { + "ce_ib": 1.833953857421875, + "ce_orig": 0.2571110129356384, + "epoch": 0.46329714573297864, + "kl_loss": 0.5162818431854248, + "loss_ib": 0.00699677225202322, + "step": 1611 + }, + { + "ce_ib": 7.046572208404541, + "ce_orig": 1.1004294157028198, + "epoch": 0.46329714573297864, + "kl_loss": 0.2600012719631195, + "loss_ib": 0.009646585211157799, + "step": 1611 + }, + { + "ce_ib": 6.907512187957764, + "ce_orig": 1.0553163290023804, + "epoch": 0.46329714573297864, + "kl_loss": 0.3212874233722687, + "loss_ib": 0.010120387189090252, + "step": 1611 + }, + { + "ce_ib": 7.378001689910889, + "ce_orig": 1.1235740184783936, + "epoch": 0.46329714573297864, + "kl_loss": 0.3305365741252899, + "loss_ib": 0.010683367028832436, + "step": 1611 + }, + { + "ce_ib": 5.189960479736328, + "ce_orig": 0.7845567464828491, + "epoch": 0.46358472931195627, + "kl_loss": 0.20006737112998962, + "loss_ib": 0.0071906340308487415, + "step": 1612 + }, + { + "ce_ib": 6.913703441619873, + "ce_orig": 0.7184546589851379, + "epoch": 0.46358472931195627, + "kl_loss": 0.3024430572986603, + "loss_ib": 0.009938133880496025, + "step": 1612 + }, + { + "ce_ib": 7.071518421173096, + "ce_orig": 1.0965911149978638, + "epoch": 0.46358472931195627, + "kl_loss": 0.2570381760597229, + "loss_ib": 0.009641899727284908, + "step": 1612 + }, + { + "ce_ib": 4.9828410148620605, + "ce_orig": 0.7130359411239624, + "epoch": 0.46358472931195627, + "kl_loss": 0.20155639946460724, + "loss_ib": 0.006998404860496521, + "step": 1612 + }, + { + "ce_ib": 3.570085287094116, + "ce_orig": 0.6303805708885193, + "epoch": 0.46387231289093395, + "kl_loss": 0.15140435099601746, + "loss_ib": 0.005084129050374031, + "step": 1613 + }, + { + "ce_ib": 5.76014518737793, + "ce_orig": 0.6615375280380249, + "epoch": 0.46387231289093395, + "kl_loss": 0.2971652150154114, + "loss_ib": 0.008731797337532043, + "step": 1613 + }, + { + "ce_ib": 7.161432266235352, + "ce_orig": 1.0736418962478638, + "epoch": 0.46387231289093395, + "kl_loss": 0.24368785321712494, + "loss_ib": 0.00959831103682518, + "step": 1613 + }, + { + "ce_ib": 4.258911609649658, + "ce_orig": 0.6454308032989502, + "epoch": 0.46387231289093395, + "kl_loss": 0.22261682152748108, + "loss_ib": 0.006485079415142536, + "step": 1613 + }, + { + "ce_ib": 4.7470855712890625, + "ce_orig": 0.6598400473594666, + "epoch": 0.46415989646991157, + "kl_loss": 0.2554096579551697, + "loss_ib": 0.007301182020455599, + "step": 1614 + }, + { + "ce_ib": 4.303165435791016, + "ce_orig": 0.23601269721984863, + "epoch": 0.46415989646991157, + "kl_loss": 0.22935321927070618, + "loss_ib": 0.006596697494387627, + "step": 1614 + }, + { + "ce_ib": 4.91155481338501, + "ce_orig": 0.5904660820960999, + "epoch": 0.46415989646991157, + "kl_loss": 0.2633596658706665, + "loss_ib": 0.007545151747763157, + "step": 1614 + }, + { + "ce_ib": 9.601012229919434, + "ce_orig": 1.771711826324463, + "epoch": 0.46415989646991157, + "kl_loss": 0.24031728506088257, + "loss_ib": 0.012004184536635876, + "step": 1614 + }, + { + "epoch": 0.4644474800488892, + "grad_norm": 0.10622972995042801, + "learning_rate": 9.601377900383029e-06, + "loss": 0.8348, + "step": 1615 + }, + { + "ce_ib": 7.197417259216309, + "ce_orig": 0.7182507514953613, + "epoch": 0.4644474800488892, + "kl_loss": 0.35040098428726196, + "loss_ib": 0.010701427236199379, + "step": 1615 + }, + { + "ce_ib": 10.0778169631958, + "ce_orig": 1.1566050052642822, + "epoch": 0.4644474800488892, + "kl_loss": 0.22660639882087708, + "loss_ib": 0.012343880720436573, + "step": 1615 + }, + { + "ce_ib": 9.881003379821777, + "ce_orig": 1.7606111764907837, + "epoch": 0.4644474800488892, + "kl_loss": 0.265298992395401, + "loss_ib": 0.01253399346023798, + "step": 1615 + }, + { + "ce_ib": 6.909655570983887, + "ce_orig": 0.8872973322868347, + "epoch": 0.4644474800488892, + "kl_loss": 0.5289936065673828, + "loss_ib": 0.012199592776596546, + "step": 1615 + }, + { + "ce_ib": 8.272558212280273, + "ce_orig": 1.5352343320846558, + "epoch": 0.4647350636278669, + "kl_loss": 0.28025469183921814, + "loss_ib": 0.011075105518102646, + "step": 1616 + }, + { + "ce_ib": 4.740227222442627, + "ce_orig": 0.7010709643363953, + "epoch": 0.4647350636278669, + "kl_loss": 0.21988792717456818, + "loss_ib": 0.006939106620848179, + "step": 1616 + }, + { + "ce_ib": 2.735738754272461, + "ce_orig": 0.2933582663536072, + "epoch": 0.4647350636278669, + "kl_loss": 0.1642613708972931, + "loss_ib": 0.00437835231423378, + "step": 1616 + }, + { + "ce_ib": 9.15244197845459, + "ce_orig": 1.5223807096481323, + "epoch": 0.4647350636278669, + "kl_loss": 0.2403932809829712, + "loss_ib": 0.011556374840438366, + "step": 1616 + }, + { + "ce_ib": 5.430967807769775, + "ce_orig": 0.7502396702766418, + "epoch": 0.4650226472068445, + "kl_loss": 0.23676195740699768, + "loss_ib": 0.007798586972057819, + "step": 1617 + }, + { + "ce_ib": 7.760725975036621, + "ce_orig": 1.305103063583374, + "epoch": 0.4650226472068445, + "kl_loss": 0.19089996814727783, + "loss_ib": 0.00966972578316927, + "step": 1617 + }, + { + "ce_ib": 5.95468282699585, + "ce_orig": 0.969433069229126, + "epoch": 0.4650226472068445, + "kl_loss": 0.2655257284641266, + "loss_ib": 0.008609939366579056, + "step": 1617 + }, + { + "ce_ib": 4.386993885040283, + "ce_orig": 0.6455299258232117, + "epoch": 0.4650226472068445, + "kl_loss": 0.13578274846076965, + "loss_ib": 0.0057448213919997215, + "step": 1617 + }, + { + "ce_ib": 6.207513809204102, + "ce_orig": 0.927709698677063, + "epoch": 0.4653102307858221, + "kl_loss": 0.23612076044082642, + "loss_ib": 0.008568720892071724, + "step": 1618 + }, + { + "ce_ib": 6.073016166687012, + "ce_orig": 0.61612868309021, + "epoch": 0.4653102307858221, + "kl_loss": 0.23253057897090912, + "loss_ib": 0.008398322388529778, + "step": 1618 + }, + { + "ce_ib": 4.8698554039001465, + "ce_orig": 0.9121537208557129, + "epoch": 0.4653102307858221, + "kl_loss": 0.19702599942684174, + "loss_ib": 0.006840114947408438, + "step": 1618 + }, + { + "ce_ib": 3.864777088165283, + "ce_orig": 0.48421528935432434, + "epoch": 0.4653102307858221, + "kl_loss": 0.20810124278068542, + "loss_ib": 0.005945789627730846, + "step": 1618 + }, + { + "ce_ib": 6.4403395652771, + "ce_orig": 1.0684853792190552, + "epoch": 0.46559781436479974, + "kl_loss": 0.2469189614057541, + "loss_ib": 0.008909529075026512, + "step": 1619 + }, + { + "ce_ib": 2.098714828491211, + "ce_orig": 0.18352091312408447, + "epoch": 0.46559781436479974, + "kl_loss": 0.27719706296920776, + "loss_ib": 0.0048706852830946445, + "step": 1619 + }, + { + "ce_ib": 3.6879613399505615, + "ce_orig": 0.6930275559425354, + "epoch": 0.46559781436479974, + "kl_loss": 0.2174752801656723, + "loss_ib": 0.0058627137914299965, + "step": 1619 + }, + { + "ce_ib": 3.930967092514038, + "ce_orig": 0.7239459156990051, + "epoch": 0.46559781436479974, + "kl_loss": 0.23733000457286835, + "loss_ib": 0.0063042668625712395, + "step": 1619 + }, + { + "epoch": 0.4658853979437774, + "grad_norm": 0.12069246172904968, + "learning_rate": 9.598335763989703e-06, + "loss": 0.8468, + "step": 1620 + }, + { + "ce_ib": 2.3631093502044678, + "ce_orig": 0.5599596500396729, + "epoch": 0.4658853979437774, + "kl_loss": 0.1640537679195404, + "loss_ib": 0.004003646783530712, + "step": 1620 + }, + { + "ce_ib": 6.95280122756958, + "ce_orig": 1.01499605178833, + "epoch": 0.4658853979437774, + "kl_loss": 0.2521277070045471, + "loss_ib": 0.009474078193306923, + "step": 1620 + }, + { + "ce_ib": 9.791449546813965, + "ce_orig": 1.662266731262207, + "epoch": 0.4658853979437774, + "kl_loss": 0.22838973999023438, + "loss_ib": 0.012075347825884819, + "step": 1620 + }, + { + "ce_ib": 3.9279820919036865, + "ce_orig": 0.4412092864513397, + "epoch": 0.4658853979437774, + "kl_loss": 0.16607823967933655, + "loss_ib": 0.005588764324784279, + "step": 1620 + }, + { + "ce_ib": 8.574518203735352, + "ce_orig": 1.3736652135849, + "epoch": 0.46617298152275505, + "kl_loss": 0.33889293670654297, + "loss_ib": 0.011963448487222195, + "step": 1621 + }, + { + "ce_ib": 3.510213851928711, + "ce_orig": 0.6036752462387085, + "epoch": 0.46617298152275505, + "kl_loss": 0.2927241921424866, + "loss_ib": 0.006437455303966999, + "step": 1621 + }, + { + "ce_ib": 5.375370502471924, + "ce_orig": 0.6719651222229004, + "epoch": 0.46617298152275505, + "kl_loss": 0.2255745232105255, + "loss_ib": 0.007631115615367889, + "step": 1621 + }, + { + "ce_ib": 3.7908592224121094, + "ce_orig": 0.47120845317840576, + "epoch": 0.46617298152275505, + "kl_loss": 0.1993405669927597, + "loss_ib": 0.005784264765679836, + "step": 1621 + }, + { + "ce_ib": 3.3675405979156494, + "ce_orig": 0.37180203199386597, + "epoch": 0.46646056510173267, + "kl_loss": 0.194191575050354, + "loss_ib": 0.005309456493705511, + "step": 1622 + }, + { + "ce_ib": 4.649647235870361, + "ce_orig": 0.5024625658988953, + "epoch": 0.46646056510173267, + "kl_loss": 0.23426902294158936, + "loss_ib": 0.0069923377595841885, + "step": 1622 + }, + { + "ce_ib": 5.274606704711914, + "ce_orig": 0.6442150473594666, + "epoch": 0.46646056510173267, + "kl_loss": 0.24061354994773865, + "loss_ib": 0.007680742535740137, + "step": 1622 + }, + { + "ce_ib": 7.837526321411133, + "ce_orig": 1.271608829498291, + "epoch": 0.46646056510173267, + "kl_loss": 0.3615526258945465, + "loss_ib": 0.011453052051365376, + "step": 1622 + }, + { + "ce_ib": 4.621031761169434, + "ce_orig": 0.7775217294692993, + "epoch": 0.46674814868071035, + "kl_loss": 0.18649159371852875, + "loss_ib": 0.006485947873443365, + "step": 1623 + }, + { + "ce_ib": 4.2601518630981445, + "ce_orig": 0.8415741920471191, + "epoch": 0.46674814868071035, + "kl_loss": 0.20816299319267273, + "loss_ib": 0.0063417814671993256, + "step": 1623 + }, + { + "ce_ib": 7.340706825256348, + "ce_orig": 1.174594521522522, + "epoch": 0.46674814868071035, + "kl_loss": 0.1925428807735443, + "loss_ib": 0.009266135282814503, + "step": 1623 + }, + { + "ce_ib": 4.316766262054443, + "ce_orig": 0.9118759632110596, + "epoch": 0.46674814868071035, + "kl_loss": 0.13632294535636902, + "loss_ib": 0.0056799957528710365, + "step": 1623 + }, + { + "ce_ib": 6.24122428894043, + "ce_orig": 1.3776178359985352, + "epoch": 0.467035732259688, + "kl_loss": 0.3462154269218445, + "loss_ib": 0.009703378193080425, + "step": 1624 + }, + { + "ce_ib": 6.74078369140625, + "ce_orig": 0.6831387877464294, + "epoch": 0.467035732259688, + "kl_loss": 0.30415505170822144, + "loss_ib": 0.009782334789633751, + "step": 1624 + }, + { + "ce_ib": 5.621586322784424, + "ce_orig": 1.0059772729873657, + "epoch": 0.467035732259688, + "kl_loss": 0.2930832505226135, + "loss_ib": 0.008552419021725655, + "step": 1624 + }, + { + "ce_ib": 3.5580859184265137, + "ce_orig": 0.3204123079776764, + "epoch": 0.467035732259688, + "kl_loss": 0.3000769019126892, + "loss_ib": 0.006558854598551989, + "step": 1624 + }, + { + "epoch": 0.4673233158386656, + "grad_norm": 0.12965475022792816, + "learning_rate": 9.595282549138228e-06, + "loss": 0.947, + "step": 1625 + }, + { + "ce_ib": 8.029479026794434, + "ce_orig": 1.0885497331619263, + "epoch": 0.4673233158386656, + "kl_loss": 0.2798893451690674, + "loss_ib": 0.010828373022377491, + "step": 1625 + }, + { + "ce_ib": 5.572749137878418, + "ce_orig": 0.9018054604530334, + "epoch": 0.4673233158386656, + "kl_loss": 0.49554967880249023, + "loss_ib": 0.010528245940804482, + "step": 1625 + }, + { + "ce_ib": 8.166579246520996, + "ce_orig": 0.8004129528999329, + "epoch": 0.4673233158386656, + "kl_loss": 0.2181631475687027, + "loss_ib": 0.010348211042582989, + "step": 1625 + }, + { + "ce_ib": 5.353193759918213, + "ce_orig": 1.0144050121307373, + "epoch": 0.4673233158386656, + "kl_loss": 0.1349445879459381, + "loss_ib": 0.00670264009386301, + "step": 1625 + }, + { + "ce_ib": 4.509777069091797, + "ce_orig": 0.5489388704299927, + "epoch": 0.4676108994176433, + "kl_loss": 0.22875724732875824, + "loss_ib": 0.006797349080443382, + "step": 1626 + }, + { + "ce_ib": 6.12491512298584, + "ce_orig": 0.7085449695587158, + "epoch": 0.4676108994176433, + "kl_loss": 0.2517673969268799, + "loss_ib": 0.008642589673399925, + "step": 1626 + }, + { + "ce_ib": 4.152883529663086, + "ce_orig": 0.6175920963287354, + "epoch": 0.4676108994176433, + "kl_loss": 0.18230682611465454, + "loss_ib": 0.005975951906293631, + "step": 1626 + }, + { + "ce_ib": 1.8417199850082397, + "ce_orig": 0.4287397265434265, + "epoch": 0.4676108994176433, + "kl_loss": 0.1650610864162445, + "loss_ib": 0.003492330899462104, + "step": 1626 + }, + { + "ce_ib": 9.31445026397705, + "ce_orig": 1.1101757287979126, + "epoch": 0.4678984829966209, + "kl_loss": 0.23538920283317566, + "loss_ib": 0.011668342165648937, + "step": 1627 + }, + { + "ce_ib": 6.0186262130737305, + "ce_orig": 0.6789668798446655, + "epoch": 0.4678984829966209, + "kl_loss": 0.31773999333381653, + "loss_ib": 0.009196026250720024, + "step": 1627 + }, + { + "ce_ib": 5.332401752471924, + "ce_orig": 0.7318709492683411, + "epoch": 0.4678984829966209, + "kl_loss": 0.21486830711364746, + "loss_ib": 0.007481084670871496, + "step": 1627 + }, + { + "ce_ib": 7.069350242614746, + "ce_orig": 1.3702043294906616, + "epoch": 0.4678984829966209, + "kl_loss": 0.18601363897323608, + "loss_ib": 0.008929486386477947, + "step": 1627 + }, + { + "ce_ib": 4.144545555114746, + "ce_orig": 0.7570605278015137, + "epoch": 0.4681860665755985, + "kl_loss": 0.22682487964630127, + "loss_ib": 0.0064127943478524685, + "step": 1628 + }, + { + "ce_ib": 4.975637912750244, + "ce_orig": 0.9772812128067017, + "epoch": 0.4681860665755985, + "kl_loss": 0.2234506607055664, + "loss_ib": 0.0072101447731256485, + "step": 1628 + }, + { + "ce_ib": 4.926833152770996, + "ce_orig": 0.8578603863716125, + "epoch": 0.4681860665755985, + "kl_loss": 0.13924984633922577, + "loss_ib": 0.006319331470876932, + "step": 1628 + }, + { + "ce_ib": 7.390744209289551, + "ce_orig": 0.7690408229827881, + "epoch": 0.4681860665755985, + "kl_loss": 0.20541280508041382, + "loss_ib": 0.009444871917366982, + "step": 1628 + }, + { + "ce_ib": 3.5227086544036865, + "ce_orig": 0.622070848941803, + "epoch": 0.46847365015457615, + "kl_loss": 0.2001567929983139, + "loss_ib": 0.005524276290088892, + "step": 1629 + }, + { + "ce_ib": 9.255240440368652, + "ce_orig": 0.9039395451545715, + "epoch": 0.46847365015457615, + "kl_loss": 0.2905845642089844, + "loss_ib": 0.01216108538210392, + "step": 1629 + }, + { + "ce_ib": 6.1646199226379395, + "ce_orig": 0.6998523473739624, + "epoch": 0.46847365015457615, + "kl_loss": 0.28400886058807373, + "loss_ib": 0.009004708379507065, + "step": 1629 + }, + { + "ce_ib": 6.819250583648682, + "ce_orig": 0.3863005042076111, + "epoch": 0.46847365015457615, + "kl_loss": 0.2553757429122925, + "loss_ib": 0.009373007342219353, + "step": 1629 + }, + { + "epoch": 0.46876123373355383, + "grad_norm": 0.10917191207408905, + "learning_rate": 9.592218263184503e-06, + "loss": 0.8765, + "step": 1630 + }, + { + "ce_ib": 4.765841484069824, + "ce_orig": 0.589921772480011, + "epoch": 0.46876123373355383, + "kl_loss": 0.24076642096042633, + "loss_ib": 0.007173506077378988, + "step": 1630 + }, + { + "ce_ib": 4.582233905792236, + "ce_orig": 0.9084943532943726, + "epoch": 0.46876123373355383, + "kl_loss": 0.3679146468639374, + "loss_ib": 0.008261379785835743, + "step": 1630 + }, + { + "ce_ib": 7.527477264404297, + "ce_orig": 1.0047513246536255, + "epoch": 0.46876123373355383, + "kl_loss": 0.3431330621242523, + "loss_ib": 0.010958807542920113, + "step": 1630 + }, + { + "ce_ib": 2.3991539478302, + "ce_orig": 0.48176831007003784, + "epoch": 0.46876123373355383, + "kl_loss": 0.15134502947330475, + "loss_ib": 0.003912604413926601, + "step": 1630 + }, + { + "ce_ib": 9.214207649230957, + "ce_orig": 1.4497013092041016, + "epoch": 0.46904881731253145, + "kl_loss": 0.24560272693634033, + "loss_ib": 0.011670233681797981, + "step": 1631 + }, + { + "ce_ib": 7.9614362716674805, + "ce_orig": 1.3710211515426636, + "epoch": 0.46904881731253145, + "kl_loss": 0.17029419541358948, + "loss_ib": 0.009664378128945827, + "step": 1631 + }, + { + "ce_ib": 9.119590759277344, + "ce_orig": 1.3142935037612915, + "epoch": 0.46904881731253145, + "kl_loss": 0.24297848343849182, + "loss_ib": 0.01154937595129013, + "step": 1631 + }, + { + "ce_ib": 5.7404866218566895, + "ce_orig": 0.7808132171630859, + "epoch": 0.46904881731253145, + "kl_loss": 0.3067057132720947, + "loss_ib": 0.00880754366517067, + "step": 1631 + }, + { + "ce_ib": 4.927828311920166, + "ce_orig": 0.8637658357620239, + "epoch": 0.4693364008915091, + "kl_loss": 0.23912517726421356, + "loss_ib": 0.007319080177694559, + "step": 1632 + }, + { + "ce_ib": 6.485845565795898, + "ce_orig": 0.46057918667793274, + "epoch": 0.4693364008915091, + "kl_loss": 0.304848313331604, + "loss_ib": 0.009534328244626522, + "step": 1632 + }, + { + "ce_ib": 4.575008869171143, + "ce_orig": 0.5714587569236755, + "epoch": 0.4693364008915091, + "kl_loss": 0.280214786529541, + "loss_ib": 0.007377156987786293, + "step": 1632 + }, + { + "ce_ib": 5.922549724578857, + "ce_orig": 0.9120684862136841, + "epoch": 0.4693364008915091, + "kl_loss": 0.3095513880252838, + "loss_ib": 0.00901806354522705, + "step": 1632 + }, + { + "ce_ib": 7.879459857940674, + "ce_orig": 1.2214351892471313, + "epoch": 0.46962398447048675, + "kl_loss": 0.278079092502594, + "loss_ib": 0.01066024973988533, + "step": 1633 + }, + { + "ce_ib": 7.259771823883057, + "ce_orig": 1.1840142011642456, + "epoch": 0.46962398447048675, + "kl_loss": 0.23207473754882812, + "loss_ib": 0.009580519050359726, + "step": 1633 + }, + { + "ce_ib": 8.346108436584473, + "ce_orig": 0.706606924533844, + "epoch": 0.46962398447048675, + "kl_loss": 0.16712577641010284, + "loss_ib": 0.010017366148531437, + "step": 1633 + }, + { + "ce_ib": 8.588577270507812, + "ce_orig": 1.0711778402328491, + "epoch": 0.46962398447048675, + "kl_loss": 0.19195330142974854, + "loss_ib": 0.010508110746741295, + "step": 1633 + }, + { + "ce_ib": 5.0715813636779785, + "ce_orig": 0.8741798996925354, + "epoch": 0.4699115680494644, + "kl_loss": 0.2602311968803406, + "loss_ib": 0.007673893589526415, + "step": 1634 + }, + { + "ce_ib": 7.203498363494873, + "ce_orig": 0.7638635635375977, + "epoch": 0.4699115680494644, + "kl_loss": 0.1899445801973343, + "loss_ib": 0.009102944284677505, + "step": 1634 + }, + { + "ce_ib": 7.463857650756836, + "ce_orig": 1.3012150526046753, + "epoch": 0.4699115680494644, + "kl_loss": 0.2831643521785736, + "loss_ib": 0.010295500978827477, + "step": 1634 + }, + { + "ce_ib": 7.1105146408081055, + "ce_orig": 0.9453640580177307, + "epoch": 0.4699115680494644, + "kl_loss": 0.23901736736297607, + "loss_ib": 0.009500687941908836, + "step": 1634 + }, + { + "epoch": 0.470199151628442, + "grad_norm": 0.1111624464392662, + "learning_rate": 9.589142913511104e-06, + "loss": 0.8375, + "step": 1635 + }, + { + "ce_ib": 10.440080642700195, + "ce_orig": 1.3846956491470337, + "epoch": 0.470199151628442, + "kl_loss": 0.2537896931171417, + "loss_ib": 0.01297797728329897, + "step": 1635 + }, + { + "ce_ib": 7.813945770263672, + "ce_orig": 1.048168659210205, + "epoch": 0.470199151628442, + "kl_loss": 0.2586318552494049, + "loss_ib": 0.010400263592600822, + "step": 1635 + }, + { + "ce_ib": 5.204543590545654, + "ce_orig": 0.8099000453948975, + "epoch": 0.470199151628442, + "kl_loss": 0.22041164338588715, + "loss_ib": 0.007408659905195236, + "step": 1635 + }, + { + "ce_ib": 5.3694610595703125, + "ce_orig": 0.9361176490783691, + "epoch": 0.470199151628442, + "kl_loss": 0.2414846271276474, + "loss_ib": 0.0077843074686825275, + "step": 1635 + }, + { + "ce_ib": 4.985073089599609, + "ce_orig": 0.5407580137252808, + "epoch": 0.4704867352074197, + "kl_loss": 0.3416972756385803, + "loss_ib": 0.00840204581618309, + "step": 1636 + }, + { + "ce_ib": 5.113473415374756, + "ce_orig": 0.6522555351257324, + "epoch": 0.4704867352074197, + "kl_loss": 0.29770779609680176, + "loss_ib": 0.008090551011264324, + "step": 1636 + }, + { + "ce_ib": 3.002034902572632, + "ce_orig": 0.5235405564308167, + "epoch": 0.4704867352074197, + "kl_loss": 0.23390766978263855, + "loss_ib": 0.005341111216694117, + "step": 1636 + }, + { + "ce_ib": 4.9306321144104, + "ce_orig": 0.8330354690551758, + "epoch": 0.4704867352074197, + "kl_loss": 0.17335164546966553, + "loss_ib": 0.006664148531854153, + "step": 1636 + }, + { + "ce_ib": 7.28361177444458, + "ce_orig": 1.006595492362976, + "epoch": 0.4707743187863973, + "kl_loss": 0.24205949902534485, + "loss_ib": 0.009704207070171833, + "step": 1637 + }, + { + "ce_ib": 3.2504138946533203, + "ce_orig": 0.44360581040382385, + "epoch": 0.4707743187863973, + "kl_loss": 0.23381365835666656, + "loss_ib": 0.005588550586253405, + "step": 1637 + }, + { + "ce_ib": 4.840280532836914, + "ce_orig": 0.8442293405532837, + "epoch": 0.4707743187863973, + "kl_loss": 0.17008595168590546, + "loss_ib": 0.006541139911860228, + "step": 1637 + }, + { + "ce_ib": 5.317061424255371, + "ce_orig": 1.048091173171997, + "epoch": 0.4707743187863973, + "kl_loss": 0.23069365322589874, + "loss_ib": 0.007623997982591391, + "step": 1637 + }, + { + "ce_ib": 3.422884702682495, + "ce_orig": 0.6781839728355408, + "epoch": 0.47106190236537493, + "kl_loss": 0.13445274531841278, + "loss_ib": 0.004767411854118109, + "step": 1638 + }, + { + "ce_ib": 7.733299732208252, + "ce_orig": 1.2221657037734985, + "epoch": 0.47106190236537493, + "kl_loss": 0.38926810026168823, + "loss_ib": 0.011625980958342552, + "step": 1638 + }, + { + "ce_ib": 6.331749439239502, + "ce_orig": 0.6211827993392944, + "epoch": 0.47106190236537493, + "kl_loss": 0.2626792788505554, + "loss_ib": 0.008958541788160801, + "step": 1638 + }, + { + "ce_ib": 5.28639554977417, + "ce_orig": 0.6637152433395386, + "epoch": 0.47106190236537493, + "kl_loss": 0.2628288269042969, + "loss_ib": 0.007914683781564236, + "step": 1638 + }, + { + "ce_ib": 4.917193412780762, + "ce_orig": 0.8369187712669373, + "epoch": 0.47134948594435255, + "kl_loss": 0.1748519241809845, + "loss_ib": 0.006665712222456932, + "step": 1639 + }, + { + "ce_ib": 6.316074371337891, + "ce_orig": 0.5653436183929443, + "epoch": 0.47134948594435255, + "kl_loss": 0.5525051951408386, + "loss_ib": 0.011841126717627048, + "step": 1639 + }, + { + "ce_ib": 5.5230841636657715, + "ce_orig": 0.85850989818573, + "epoch": 0.47134948594435255, + "kl_loss": 0.21816977858543396, + "loss_ib": 0.007704782299697399, + "step": 1639 + }, + { + "ce_ib": 4.093878746032715, + "ce_orig": 0.6998303532600403, + "epoch": 0.47134948594435255, + "kl_loss": 0.17293649911880493, + "loss_ib": 0.005823243875056505, + "step": 1639 + }, + { + "epoch": 0.47163706952333023, + "grad_norm": 0.13059952855110168, + "learning_rate": 9.586056507527266e-06, + "loss": 0.8683, + "step": 1640 + }, + { + "ce_ib": 3.3075473308563232, + "ce_orig": 0.4052724838256836, + "epoch": 0.47163706952333023, + "kl_loss": 0.3414004445075989, + "loss_ib": 0.006721551064401865, + "step": 1640 + }, + { + "ce_ib": 7.159058570861816, + "ce_orig": 1.193800926208496, + "epoch": 0.47163706952333023, + "kl_loss": 0.2893860936164856, + "loss_ib": 0.010052919387817383, + "step": 1640 + }, + { + "ce_ib": 6.255849838256836, + "ce_orig": 0.9312906265258789, + "epoch": 0.47163706952333023, + "kl_loss": 0.26795995235443115, + "loss_ib": 0.00893544964492321, + "step": 1640 + }, + { + "ce_ib": 5.748259544372559, + "ce_orig": 1.22636878490448, + "epoch": 0.47163706952333023, + "kl_loss": 0.2102559357881546, + "loss_ib": 0.00785081833600998, + "step": 1640 + }, + { + "ce_ib": 0.89692622423172, + "ce_orig": 0.09618912637233734, + "epoch": 0.47192465310230786, + "kl_loss": 0.5119258165359497, + "loss_ib": 0.006016184110194445, + "step": 1641 + }, + { + "ce_ib": 5.4927897453308105, + "ce_orig": 0.5387899279594421, + "epoch": 0.47192465310230786, + "kl_loss": 0.243363618850708, + "loss_ib": 0.007926425896584988, + "step": 1641 + }, + { + "ce_ib": 6.7933573722839355, + "ce_orig": 1.027798056602478, + "epoch": 0.47192465310230786, + "kl_loss": 0.1942419707775116, + "loss_ib": 0.008735776878893375, + "step": 1641 + }, + { + "ce_ib": 5.988297462463379, + "ce_orig": 0.8516644239425659, + "epoch": 0.47192465310230786, + "kl_loss": 0.20200949907302856, + "loss_ib": 0.00800839252769947, + "step": 1641 + }, + { + "ce_ib": 4.71184778213501, + "ce_orig": 1.0416312217712402, + "epoch": 0.4722122366812855, + "kl_loss": 0.19892562925815582, + "loss_ib": 0.006701103877276182, + "step": 1642 + }, + { + "ce_ib": 5.363261699676514, + "ce_orig": 1.0295337438583374, + "epoch": 0.4722122366812855, + "kl_loss": 0.16552218794822693, + "loss_ib": 0.007018483709543943, + "step": 1642 + }, + { + "ce_ib": 6.74136209487915, + "ce_orig": 1.0272431373596191, + "epoch": 0.4722122366812855, + "kl_loss": 0.2746507525444031, + "loss_ib": 0.009487869217991829, + "step": 1642 + }, + { + "ce_ib": 0.9795145392417908, + "ce_orig": 0.1657838374376297, + "epoch": 0.4722122366812855, + "kl_loss": 0.501509964466095, + "loss_ib": 0.005994614213705063, + "step": 1642 + }, + { + "ce_ib": 3.574262857437134, + "ce_orig": 0.35515451431274414, + "epoch": 0.47249982026026316, + "kl_loss": 0.21287068724632263, + "loss_ib": 0.0057029700838029385, + "step": 1643 + }, + { + "ce_ib": 6.737833499908447, + "ce_orig": 0.5578605532646179, + "epoch": 0.47249982026026316, + "kl_loss": 0.32588255405426025, + "loss_ib": 0.009996659122407436, + "step": 1643 + }, + { + "ce_ib": 4.701847076416016, + "ce_orig": 0.7198030352592468, + "epoch": 0.47249982026026316, + "kl_loss": 0.268189013004303, + "loss_ib": 0.007383737247437239, + "step": 1643 + }, + { + "ce_ib": 4.935227870941162, + "ce_orig": 0.9215746521949768, + "epoch": 0.47249982026026316, + "kl_loss": 0.25834494829177856, + "loss_ib": 0.00751867750659585, + "step": 1643 + }, + { + "ce_ib": 5.433006286621094, + "ce_orig": 0.967570424079895, + "epoch": 0.4727874038392408, + "kl_loss": 0.14970257878303528, + "loss_ib": 0.006930031813681126, + "step": 1644 + }, + { + "ce_ib": 5.268426418304443, + "ce_orig": 0.9726541042327881, + "epoch": 0.4727874038392408, + "kl_loss": 0.22781161963939667, + "loss_ib": 0.007546542678028345, + "step": 1644 + }, + { + "ce_ib": 4.189428806304932, + "ce_orig": 0.9564615488052368, + "epoch": 0.4727874038392408, + "kl_loss": 0.2764762341976166, + "loss_ib": 0.006954191252589226, + "step": 1644 + }, + { + "ce_ib": 8.928913116455078, + "ce_orig": 1.3035036325454712, + "epoch": 0.4727874038392408, + "kl_loss": 0.2561219334602356, + "loss_ib": 0.01149013265967369, + "step": 1644 + }, + { + "epoch": 0.4730749874182184, + "grad_norm": 0.11808949708938599, + "learning_rate": 9.582959052668855e-06, + "loss": 0.8916, + "step": 1645 + }, + { + "ce_ib": 4.722048759460449, + "ce_orig": 0.8292427659034729, + "epoch": 0.4730749874182184, + "kl_loss": 0.17961278557777405, + "loss_ib": 0.006518176756799221, + "step": 1645 + }, + { + "ce_ib": 5.358216762542725, + "ce_orig": 0.3747130334377289, + "epoch": 0.4730749874182184, + "kl_loss": 0.37739047408103943, + "loss_ib": 0.009132121689617634, + "step": 1645 + }, + { + "ce_ib": 4.914391994476318, + "ce_orig": 0.3914518356323242, + "epoch": 0.4730749874182184, + "kl_loss": 0.25701606273651123, + "loss_ib": 0.007484552916139364, + "step": 1645 + }, + { + "ce_ib": 6.924988746643066, + "ce_orig": 0.5208780765533447, + "epoch": 0.4730749874182184, + "kl_loss": 0.29026472568511963, + "loss_ib": 0.009827635250985622, + "step": 1645 + }, + { + "ce_ib": 8.15476131439209, + "ce_orig": 1.0385876893997192, + "epoch": 0.4733625709971961, + "kl_loss": 0.21632888913154602, + "loss_ib": 0.010318050161004066, + "step": 1646 + }, + { + "ce_ib": 8.506185531616211, + "ce_orig": 1.3584825992584229, + "epoch": 0.4733625709971961, + "kl_loss": 0.27990996837615967, + "loss_ib": 0.011305284686386585, + "step": 1646 + }, + { + "ce_ib": 9.8707275390625, + "ce_orig": 1.7097861766815186, + "epoch": 0.4733625709971961, + "kl_loss": 0.27105122804641724, + "loss_ib": 0.012581239454448223, + "step": 1646 + }, + { + "ce_ib": 4.385462760925293, + "ce_orig": 0.47088804841041565, + "epoch": 0.4733625709971961, + "kl_loss": 0.30151084065437317, + "loss_ib": 0.0074005709029734135, + "step": 1646 + }, + { + "ce_ib": 3.344881057739258, + "ce_orig": 0.5342389941215515, + "epoch": 0.4736501545761737, + "kl_loss": 0.18429887294769287, + "loss_ib": 0.005187870003283024, + "step": 1647 + }, + { + "ce_ib": 7.1738457679748535, + "ce_orig": 0.8699575066566467, + "epoch": 0.4736501545761737, + "kl_loss": 0.15991726517677307, + "loss_ib": 0.008773017674684525, + "step": 1647 + }, + { + "ce_ib": 8.26055908203125, + "ce_orig": 1.2396870851516724, + "epoch": 0.4736501545761737, + "kl_loss": 0.24188140034675598, + "loss_ib": 0.010679373517632484, + "step": 1647 + }, + { + "ce_ib": 6.468341827392578, + "ce_orig": 1.0458532571792603, + "epoch": 0.4736501545761737, + "kl_loss": 0.46231311559677124, + "loss_ib": 0.011091472581028938, + "step": 1647 + }, + { + "ce_ib": 7.5267863273620605, + "ce_orig": 1.2770994901657104, + "epoch": 0.47393773815515133, + "kl_loss": 0.218851700425148, + "loss_ib": 0.009715302847325802, + "step": 1648 + }, + { + "ce_ib": 4.878725051879883, + "ce_orig": 0.6984403729438782, + "epoch": 0.47393773815515133, + "kl_loss": 0.2023659348487854, + "loss_ib": 0.0069023845717310905, + "step": 1648 + }, + { + "ce_ib": 4.6335601806640625, + "ce_orig": 0.709439754486084, + "epoch": 0.47393773815515133, + "kl_loss": 0.23505522310733795, + "loss_ib": 0.0069841123186051846, + "step": 1648 + }, + { + "ce_ib": 8.300333976745605, + "ce_orig": 0.872704029083252, + "epoch": 0.47393773815515133, + "kl_loss": 0.31134361028671265, + "loss_ib": 0.01141376979649067, + "step": 1648 + }, + { + "ce_ib": 5.34572696685791, + "ce_orig": 0.7818442583084106, + "epoch": 0.47422532173412896, + "kl_loss": 0.23405304551124573, + "loss_ib": 0.007686257362365723, + "step": 1649 + }, + { + "ce_ib": 5.499821186065674, + "ce_orig": 0.7070225477218628, + "epoch": 0.47422532173412896, + "kl_loss": 0.27050989866256714, + "loss_ib": 0.008204920217394829, + "step": 1649 + }, + { + "ce_ib": 7.790607929229736, + "ce_orig": 1.1115490198135376, + "epoch": 0.47422532173412896, + "kl_loss": 0.2872862219810486, + "loss_ib": 0.01066347025334835, + "step": 1649 + }, + { + "ce_ib": 7.456298828125, + "ce_orig": 1.2676522731781006, + "epoch": 0.47422532173412896, + "kl_loss": 0.2790737748146057, + "loss_ib": 0.010247036814689636, + "step": 1649 + }, + { + "epoch": 0.47451290531310664, + "grad_norm": 0.11858896166086197, + "learning_rate": 9.579850556398356e-06, + "loss": 0.8653, + "step": 1650 + }, + { + "ce_ib": 5.404179096221924, + "ce_orig": 1.1686975955963135, + "epoch": 0.47451290531310664, + "kl_loss": 0.17138412594795227, + "loss_ib": 0.007118019741028547, + "step": 1650 + }, + { + "ce_ib": 4.425675392150879, + "ce_orig": 0.6207453012466431, + "epoch": 0.47451290531310664, + "kl_loss": 0.2140074074268341, + "loss_ib": 0.006565749645233154, + "step": 1650 + }, + { + "ce_ib": 4.123245716094971, + "ce_orig": 0.6918128132820129, + "epoch": 0.47451290531310664, + "kl_loss": 0.2605014443397522, + "loss_ib": 0.006728260312229395, + "step": 1650 + }, + { + "ce_ib": 6.735450267791748, + "ce_orig": 0.9332886934280396, + "epoch": 0.47451290531310664, + "kl_loss": 0.26818910241127014, + "loss_ib": 0.009417342022061348, + "step": 1650 + }, + { + "ce_ib": 4.729572296142578, + "ce_orig": 0.7350138425827026, + "epoch": 0.47480048889208426, + "kl_loss": 0.25111934542655945, + "loss_ib": 0.007240765728056431, + "step": 1651 + }, + { + "ce_ib": 4.569364070892334, + "ce_orig": 0.5795966982841492, + "epoch": 0.47480048889208426, + "kl_loss": 0.27044403553009033, + "loss_ib": 0.007273804396390915, + "step": 1651 + }, + { + "ce_ib": 7.339898586273193, + "ce_orig": 1.3115758895874023, + "epoch": 0.47480048889208426, + "kl_loss": 0.22244146466255188, + "loss_ib": 0.009564314037561417, + "step": 1651 + }, + { + "ce_ib": 7.483953475952148, + "ce_orig": 1.1939107179641724, + "epoch": 0.47480048889208426, + "kl_loss": 0.22645403444766998, + "loss_ib": 0.009748494252562523, + "step": 1651 + }, + { + "ce_ib": 5.908069133758545, + "ce_orig": 0.7272500395774841, + "epoch": 0.4750880724710619, + "kl_loss": 0.3213765621185303, + "loss_ib": 0.009121834300458431, + "step": 1652 + }, + { + "ce_ib": 6.445575714111328, + "ce_orig": 0.8850076198577881, + "epoch": 0.4750880724710619, + "kl_loss": 0.17783117294311523, + "loss_ib": 0.008223887532949448, + "step": 1652 + }, + { + "ce_ib": 2.8532485961914062, + "ce_orig": 0.4185572564601898, + "epoch": 0.4750880724710619, + "kl_loss": 0.15768098831176758, + "loss_ib": 0.004430058412253857, + "step": 1652 + }, + { + "ce_ib": 4.459863185882568, + "ce_orig": 0.7344067692756653, + "epoch": 0.4750880724710619, + "kl_loss": 0.2608838677406311, + "loss_ib": 0.007068702019751072, + "step": 1652 + }, + { + "ce_ib": 5.409404754638672, + "ce_orig": 1.1071850061416626, + "epoch": 0.47537565605003956, + "kl_loss": 0.2114146500825882, + "loss_ib": 0.0075235506519675255, + "step": 1653 + }, + { + "ce_ib": 3.9895527362823486, + "ce_orig": 0.7837691903114319, + "epoch": 0.47537565605003956, + "kl_loss": 0.19163039326667786, + "loss_ib": 0.005905856844037771, + "step": 1653 + }, + { + "ce_ib": 5.048002243041992, + "ce_orig": 0.7901415228843689, + "epoch": 0.47537565605003956, + "kl_loss": 0.4744173586368561, + "loss_ib": 0.009792176075279713, + "step": 1653 + }, + { + "ce_ib": 6.277562618255615, + "ce_orig": 0.8317906856536865, + "epoch": 0.47537565605003956, + "kl_loss": 0.18643495440483093, + "loss_ib": 0.008141911588609219, + "step": 1653 + }, + { + "ce_ib": 4.940022945404053, + "ce_orig": 0.8361666202545166, + "epoch": 0.4756632396290172, + "kl_loss": 0.2563755214214325, + "loss_ib": 0.0075037782080471516, + "step": 1654 + }, + { + "ce_ib": 4.22787618637085, + "ce_orig": 0.7344029545783997, + "epoch": 0.4756632396290172, + "kl_loss": 0.26797008514404297, + "loss_ib": 0.006907577160745859, + "step": 1654 + }, + { + "ce_ib": 7.910140514373779, + "ce_orig": 1.2482683658599854, + "epoch": 0.4756632396290172, + "kl_loss": 0.30194056034088135, + "loss_ib": 0.010929546318948269, + "step": 1654 + }, + { + "ce_ib": 5.600918292999268, + "ce_orig": 0.6320188045501709, + "epoch": 0.4756632396290172, + "kl_loss": 0.2211628258228302, + "loss_ib": 0.00781254656612873, + "step": 1654 + }, + { + "epoch": 0.4759508232079948, + "grad_norm": 0.112090565264225, + "learning_rate": 9.576731026204862e-06, + "loss": 0.8784, + "step": 1655 + }, + { + "ce_ib": 6.266970157623291, + "ce_orig": 1.1994843482971191, + "epoch": 0.4759508232079948, + "kl_loss": 0.254923939704895, + "loss_ib": 0.008816209621727467, + "step": 1655 + }, + { + "ce_ib": 7.322546482086182, + "ce_orig": 1.139443278312683, + "epoch": 0.4759508232079948, + "kl_loss": 0.22967414557933807, + "loss_ib": 0.009619288146495819, + "step": 1655 + }, + { + "ce_ib": 5.917881488800049, + "ce_orig": 0.758932888507843, + "epoch": 0.4759508232079948, + "kl_loss": 0.2735050618648529, + "loss_ib": 0.00865293201059103, + "step": 1655 + }, + { + "ce_ib": 5.141608715057373, + "ce_orig": 0.6212088465690613, + "epoch": 0.4759508232079948, + "kl_loss": 0.26703643798828125, + "loss_ib": 0.007811972871422768, + "step": 1655 + }, + { + "ce_ib": 3.660046339035034, + "ce_orig": 0.38183343410491943, + "epoch": 0.4762384067869725, + "kl_loss": 0.2949381172657013, + "loss_ib": 0.006609427742660046, + "step": 1656 + }, + { + "ce_ib": 6.633572578430176, + "ce_orig": 0.861060619354248, + "epoch": 0.4762384067869725, + "kl_loss": 0.2914574444293976, + "loss_ib": 0.009548146277666092, + "step": 1656 + }, + { + "ce_ib": 5.865113735198975, + "ce_orig": 0.838970959186554, + "epoch": 0.4762384067869725, + "kl_loss": 0.2378089725971222, + "loss_ib": 0.008243203163146973, + "step": 1656 + }, + { + "ce_ib": 7.63082218170166, + "ce_orig": 1.2247337102890015, + "epoch": 0.4762384067869725, + "kl_loss": 0.3240877389907837, + "loss_ib": 0.010871698148548603, + "step": 1656 + }, + { + "ce_ib": 3.8568806648254395, + "ce_orig": 0.6943607330322266, + "epoch": 0.4765259903659501, + "kl_loss": 0.17261001467704773, + "loss_ib": 0.005582980811595917, + "step": 1657 + }, + { + "ce_ib": 5.316219806671143, + "ce_orig": 0.7326712608337402, + "epoch": 0.4765259903659501, + "kl_loss": 0.15569522976875305, + "loss_ib": 0.006873172242194414, + "step": 1657 + }, + { + "ce_ib": 4.3332366943359375, + "ce_orig": 0.7443872690200806, + "epoch": 0.4765259903659501, + "kl_loss": 0.2839428186416626, + "loss_ib": 0.007172664627432823, + "step": 1657 + }, + { + "ce_ib": 4.349783420562744, + "ce_orig": 0.38230669498443604, + "epoch": 0.4765259903659501, + "kl_loss": 0.2405998259782791, + "loss_ib": 0.006755781825631857, + "step": 1657 + }, + { + "ce_ib": 4.3664116859436035, + "ce_orig": 0.80488520860672, + "epoch": 0.47681357394492774, + "kl_loss": 0.3269127607345581, + "loss_ib": 0.007635539397597313, + "step": 1658 + }, + { + "ce_ib": 3.9766013622283936, + "ce_orig": 0.6910889744758606, + "epoch": 0.47681357394492774, + "kl_loss": 0.15313193202018738, + "loss_ib": 0.005507920868694782, + "step": 1658 + }, + { + "ce_ib": 4.066466808319092, + "ce_orig": 0.6271889209747314, + "epoch": 0.47681357394492774, + "kl_loss": 0.19948875904083252, + "loss_ib": 0.006061354652047157, + "step": 1658 + }, + { + "ce_ib": 6.327476501464844, + "ce_orig": 0.9295334815979004, + "epoch": 0.47681357394492774, + "kl_loss": 0.18949063122272491, + "loss_ib": 0.008222382515668869, + "step": 1658 + }, + { + "ce_ib": 4.394959926605225, + "ce_orig": 0.3115496337413788, + "epoch": 0.47710115752390536, + "kl_loss": 0.28351879119873047, + "loss_ib": 0.007230148185044527, + "step": 1659 + }, + { + "ce_ib": 6.753210067749023, + "ce_orig": 0.9570003151893616, + "epoch": 0.47710115752390536, + "kl_loss": 0.20350044965744019, + "loss_ib": 0.0087882149964571, + "step": 1659 + }, + { + "ce_ib": 6.190893650054932, + "ce_orig": 0.856682300567627, + "epoch": 0.47710115752390536, + "kl_loss": 0.2722907066345215, + "loss_ib": 0.008913800120353699, + "step": 1659 + }, + { + "ce_ib": 5.620670795440674, + "ce_orig": 0.853740394115448, + "epoch": 0.47710115752390536, + "kl_loss": 0.3125329613685608, + "loss_ib": 0.00874600000679493, + "step": 1659 + }, + { + "epoch": 0.47738874110288304, + "grad_norm": 0.1384882926940918, + "learning_rate": 9.573600469604044e-06, + "loss": 0.8339, + "step": 1660 + }, + { + "ce_ib": 6.315702438354492, + "ce_orig": 0.9741970896720886, + "epoch": 0.47738874110288304, + "kl_loss": 0.5963153839111328, + "loss_ib": 0.012278856709599495, + "step": 1660 + }, + { + "ce_ib": 8.414582252502441, + "ce_orig": 1.3185315132141113, + "epoch": 0.47738874110288304, + "kl_loss": 0.18932093679904938, + "loss_ib": 0.010307792574167252, + "step": 1660 + }, + { + "ce_ib": 6.331785202026367, + "ce_orig": 0.7015150785446167, + "epoch": 0.47738874110288304, + "kl_loss": 0.22453047335147858, + "loss_ib": 0.008577089756727219, + "step": 1660 + }, + { + "ce_ib": 5.603737831115723, + "ce_orig": 0.6692718863487244, + "epoch": 0.47738874110288304, + "kl_loss": 0.1807200163602829, + "loss_ib": 0.007410937920212746, + "step": 1660 + }, + { + "ce_ib": 4.327080249786377, + "ce_orig": 0.6909063458442688, + "epoch": 0.47767632468186066, + "kl_loss": 0.15561500191688538, + "loss_ib": 0.005883229896426201, + "step": 1661 + }, + { + "ce_ib": 4.2858805656433105, + "ce_orig": 0.7942281365394592, + "epoch": 0.47767632468186066, + "kl_loss": 0.1694888025522232, + "loss_ib": 0.005980768706649542, + "step": 1661 + }, + { + "ce_ib": 6.364988803863525, + "ce_orig": 0.889671802520752, + "epoch": 0.47767632468186066, + "kl_loss": 0.43929800391197205, + "loss_ib": 0.010757967829704285, + "step": 1661 + }, + { + "ce_ib": 3.5083861351013184, + "ce_orig": 0.7230427861213684, + "epoch": 0.47767632468186066, + "kl_loss": 0.22810864448547363, + "loss_ib": 0.005789472721517086, + "step": 1661 + }, + { + "ce_ib": 6.556941986083984, + "ce_orig": 0.500135064125061, + "epoch": 0.4779639082608383, + "kl_loss": 0.34359556436538696, + "loss_ib": 0.009992897510528564, + "step": 1662 + }, + { + "ce_ib": 6.858614921569824, + "ce_orig": 1.1202833652496338, + "epoch": 0.4779639082608383, + "kl_loss": 0.28433266282081604, + "loss_ib": 0.009701942093670368, + "step": 1662 + }, + { + "ce_ib": 3.0755460262298584, + "ce_orig": 0.5723063349723816, + "epoch": 0.4779639082608383, + "kl_loss": 0.23363028466701508, + "loss_ib": 0.005411848891526461, + "step": 1662 + }, + { + "ce_ib": 6.072047710418701, + "ce_orig": 0.8839273452758789, + "epoch": 0.4779639082608383, + "kl_loss": 0.3012058734893799, + "loss_ib": 0.009084106422960758, + "step": 1662 + }, + { + "ce_ib": 3.6702189445495605, + "ce_orig": 0.7512197494506836, + "epoch": 0.47825149183981597, + "kl_loss": 0.1513669192790985, + "loss_ib": 0.005183888133615255, + "step": 1663 + }, + { + "ce_ib": 4.374359607696533, + "ce_orig": 0.8027209043502808, + "epoch": 0.47825149183981597, + "kl_loss": 0.18617792427539825, + "loss_ib": 0.006236138753592968, + "step": 1663 + }, + { + "ce_ib": 3.1989543437957764, + "ce_orig": 0.7183689475059509, + "epoch": 0.47825149183981597, + "kl_loss": 0.16047146916389465, + "loss_ib": 0.004803669173270464, + "step": 1663 + }, + { + "ce_ib": 6.37852144241333, + "ce_orig": 1.3415592908859253, + "epoch": 0.47825149183981597, + "kl_loss": 0.18785199522972107, + "loss_ib": 0.00825704075396061, + "step": 1663 + }, + { + "ce_ib": 6.634273529052734, + "ce_orig": 1.201911211013794, + "epoch": 0.4785390754187936, + "kl_loss": 0.20284774899482727, + "loss_ib": 0.008662750944495201, + "step": 1664 + }, + { + "ce_ib": 5.713882923126221, + "ce_orig": 0.7980459332466125, + "epoch": 0.4785390754187936, + "kl_loss": 0.23021364212036133, + "loss_ib": 0.008016019128262997, + "step": 1664 + }, + { + "ce_ib": 5.318065166473389, + "ce_orig": 0.694571316242218, + "epoch": 0.4785390754187936, + "kl_loss": 0.26769188046455383, + "loss_ib": 0.007994984276592731, + "step": 1664 + }, + { + "ce_ib": 3.1290438175201416, + "ce_orig": 0.6283248662948608, + "epoch": 0.4785390754187936, + "kl_loss": 0.17708495259284973, + "loss_ib": 0.004899893421679735, + "step": 1664 + }, + { + "epoch": 0.4788266589977712, + "grad_norm": 0.14190877974033356, + "learning_rate": 9.57045889413814e-06, + "loss": 0.8418, + "step": 1665 + }, + { + "ce_ib": 4.5773468017578125, + "ce_orig": 0.563421368598938, + "epoch": 0.4788266589977712, + "kl_loss": 0.25328588485717773, + "loss_ib": 0.007110205944627523, + "step": 1665 + }, + { + "ce_ib": 4.391364574432373, + "ce_orig": 0.7508824467658997, + "epoch": 0.4788266589977712, + "kl_loss": 0.1749100238084793, + "loss_ib": 0.0061404649168252945, + "step": 1665 + }, + { + "ce_ib": 7.601984024047852, + "ce_orig": 1.1138851642608643, + "epoch": 0.4788266589977712, + "kl_loss": 0.16526943445205688, + "loss_ib": 0.009254678152501583, + "step": 1665 + }, + { + "ce_ib": 4.717142581939697, + "ce_orig": 0.9921720027923584, + "epoch": 0.4788266589977712, + "kl_loss": 0.21343350410461426, + "loss_ib": 0.006851477548480034, + "step": 1665 + }, + { + "ce_ib": 3.0098533630371094, + "ce_orig": 0.600035548210144, + "epoch": 0.4791142425767489, + "kl_loss": 0.18211796879768372, + "loss_ib": 0.004831032827496529, + "step": 1666 + }, + { + "ce_ib": 4.640512466430664, + "ce_orig": 0.7434430122375488, + "epoch": 0.4791142425767489, + "kl_loss": 0.3951219320297241, + "loss_ib": 0.008591732010245323, + "step": 1666 + }, + { + "ce_ib": 5.423933029174805, + "ce_orig": 0.6963688731193542, + "epoch": 0.4791142425767489, + "kl_loss": 0.2843274176120758, + "loss_ib": 0.008267207071185112, + "step": 1666 + }, + { + "ce_ib": 3.382248640060425, + "ce_orig": 0.6287346482276917, + "epoch": 0.4791142425767489, + "kl_loss": 0.22552873194217682, + "loss_ib": 0.005637535825371742, + "step": 1666 + }, + { + "ce_ib": 5.888190746307373, + "ce_orig": 0.9677170515060425, + "epoch": 0.4794018261557265, + "kl_loss": 0.22070422768592834, + "loss_ib": 0.00809523370116949, + "step": 1667 + }, + { + "ce_ib": 8.648638725280762, + "ce_orig": 1.1520633697509766, + "epoch": 0.4794018261557265, + "kl_loss": 0.22846215963363647, + "loss_ib": 0.010933260433375835, + "step": 1667 + }, + { + "ce_ib": 3.865662097930908, + "ce_orig": 0.649163007736206, + "epoch": 0.4794018261557265, + "kl_loss": 0.2529069781303406, + "loss_ib": 0.006394731812179089, + "step": 1667 + }, + { + "ce_ib": 3.900329828262329, + "ce_orig": 0.6768735647201538, + "epoch": 0.4794018261557265, + "kl_loss": 0.23504945635795593, + "loss_ib": 0.006250824313610792, + "step": 1667 + }, + { + "ce_ib": 8.672497749328613, + "ce_orig": 1.6776679754257202, + "epoch": 0.47968940973470414, + "kl_loss": 0.23172059655189514, + "loss_ib": 0.010989704169332981, + "step": 1668 + }, + { + "ce_ib": 3.6088309288024902, + "ce_orig": 0.4569207429885864, + "epoch": 0.47968940973470414, + "kl_loss": 0.23361648619174957, + "loss_ib": 0.005944995675235987, + "step": 1668 + }, + { + "ce_ib": 5.450647830963135, + "ce_orig": 0.8888800740242004, + "epoch": 0.47968940973470414, + "kl_loss": 0.22026127576828003, + "loss_ib": 0.007653260603547096, + "step": 1668 + }, + { + "ce_ib": 2.2504947185516357, + "ce_orig": 0.26895272731781006, + "epoch": 0.47968940973470414, + "kl_loss": 0.4324935972690582, + "loss_ib": 0.006575430277734995, + "step": 1668 + }, + { + "ce_ib": 9.812613487243652, + "ce_orig": 0.47519996762275696, + "epoch": 0.47997699331368177, + "kl_loss": 0.2601562738418579, + "loss_ib": 0.012414176017045975, + "step": 1669 + }, + { + "ce_ib": 8.931351661682129, + "ce_orig": 1.1598052978515625, + "epoch": 0.47997699331368177, + "kl_loss": 0.27191799879074097, + "loss_ib": 0.011650530621409416, + "step": 1669 + }, + { + "ce_ib": 5.104313850402832, + "ce_orig": 0.9475460052490234, + "epoch": 0.47997699331368177, + "kl_loss": 0.303463339805603, + "loss_ib": 0.008138947188854218, + "step": 1669 + }, + { + "ce_ib": 5.356082439422607, + "ce_orig": 0.7924367189407349, + "epoch": 0.47997699331368177, + "kl_loss": 0.17794081568717957, + "loss_ib": 0.007135489955544472, + "step": 1669 + }, + { + "epoch": 0.48026457689265944, + "grad_norm": 0.12426599115133286, + "learning_rate": 9.567306307375933e-06, + "loss": 0.8209, + "step": 1670 + }, + { + "ce_ib": 7.080532550811768, + "ce_orig": 1.2319401502609253, + "epoch": 0.48026457689265944, + "kl_loss": 0.24892403185367584, + "loss_ib": 0.009569772519171238, + "step": 1670 + }, + { + "ce_ib": 5.915502071380615, + "ce_orig": 0.8437856435775757, + "epoch": 0.48026457689265944, + "kl_loss": 0.7955646514892578, + "loss_ib": 0.013871148228645325, + "step": 1670 + }, + { + "ce_ib": 5.740050792694092, + "ce_orig": 0.8433563709259033, + "epoch": 0.48026457689265944, + "kl_loss": 0.24343541264533997, + "loss_ib": 0.008174404501914978, + "step": 1670 + }, + { + "ce_ib": 9.337093353271484, + "ce_orig": 1.3409016132354736, + "epoch": 0.48026457689265944, + "kl_loss": 0.2468741089105606, + "loss_ib": 0.011805834248661995, + "step": 1670 + }, + { + "ce_ib": 3.697110414505005, + "ce_orig": 0.4739634692668915, + "epoch": 0.48055216047163707, + "kl_loss": 0.4081552028656006, + "loss_ib": 0.007778662256896496, + "step": 1671 + }, + { + "ce_ib": 4.245136737823486, + "ce_orig": 0.8474032878875732, + "epoch": 0.48055216047163707, + "kl_loss": 0.20764032006263733, + "loss_ib": 0.0063215396367013454, + "step": 1671 + }, + { + "ce_ib": 4.236147403717041, + "ce_orig": 0.8165615200996399, + "epoch": 0.48055216047163707, + "kl_loss": 0.1807256042957306, + "loss_ib": 0.006043402943760157, + "step": 1671 + }, + { + "ce_ib": 4.574710369110107, + "ce_orig": 0.7062886357307434, + "epoch": 0.48055216047163707, + "kl_loss": 0.21255922317504883, + "loss_ib": 0.006700302008539438, + "step": 1671 + }, + { + "ce_ib": 5.6235761642456055, + "ce_orig": 0.9316110014915466, + "epoch": 0.4808397440506147, + "kl_loss": 0.1461721509695053, + "loss_ib": 0.0070852977223694324, + "step": 1672 + }, + { + "ce_ib": 5.345804691314697, + "ce_orig": 0.8603891730308533, + "epoch": 0.4808397440506147, + "kl_loss": 0.29918748140335083, + "loss_ib": 0.00833767931908369, + "step": 1672 + }, + { + "ce_ib": 6.696346282958984, + "ce_orig": 1.43319571018219, + "epoch": 0.4808397440506147, + "kl_loss": 0.18958798050880432, + "loss_ib": 0.008592226542532444, + "step": 1672 + }, + { + "ce_ib": 3.778940439224243, + "ce_orig": 0.8303045630455017, + "epoch": 0.4808397440506147, + "kl_loss": 0.22567567229270935, + "loss_ib": 0.006035697180777788, + "step": 1672 + }, + { + "ce_ib": 3.8298754692077637, + "ce_orig": 0.3461815416812897, + "epoch": 0.48112732762959237, + "kl_loss": 0.3373625576496124, + "loss_ib": 0.007203501183539629, + "step": 1673 + }, + { + "ce_ib": 5.3641252517700195, + "ce_orig": 0.7774767279624939, + "epoch": 0.48112732762959237, + "kl_loss": 0.2694661617279053, + "loss_ib": 0.008058786392211914, + "step": 1673 + }, + { + "ce_ib": 4.745968818664551, + "ce_orig": 0.942043125629425, + "epoch": 0.48112732762959237, + "kl_loss": 0.15933820605278015, + "loss_ib": 0.006339350715279579, + "step": 1673 + }, + { + "ce_ib": 8.730219841003418, + "ce_orig": 1.2593905925750732, + "epoch": 0.48112732762959237, + "kl_loss": 0.30499938130378723, + "loss_ib": 0.011780214495956898, + "step": 1673 + }, + { + "ce_ib": 5.755885601043701, + "ce_orig": 0.6556824445724487, + "epoch": 0.48141491120857, + "kl_loss": 0.12161286920309067, + "loss_ib": 0.006972013972699642, + "step": 1674 + }, + { + "ce_ib": 5.917918682098389, + "ce_orig": 0.728310227394104, + "epoch": 0.48141491120857, + "kl_loss": 0.41346365213394165, + "loss_ib": 0.010052554309368134, + "step": 1674 + }, + { + "ce_ib": 5.303135395050049, + "ce_orig": 0.7300191521644592, + "epoch": 0.48141491120857, + "kl_loss": 0.2900460362434387, + "loss_ib": 0.008203595876693726, + "step": 1674 + }, + { + "ce_ib": 3.7630128860473633, + "ce_orig": 0.6488651633262634, + "epoch": 0.48141491120857, + "kl_loss": 0.180400550365448, + "loss_ib": 0.005567018408328295, + "step": 1674 + }, + { + "epoch": 0.4817024947875476, + "grad_norm": 0.12936660647392273, + "learning_rate": 9.564142716912737e-06, + "loss": 0.8743, + "step": 1675 + }, + { + "ce_ib": 5.63591194152832, + "ce_orig": 0.7025658488273621, + "epoch": 0.4817024947875476, + "kl_loss": 0.31601640582084656, + "loss_ib": 0.008796075358986855, + "step": 1675 + }, + { + "ce_ib": 8.811908721923828, + "ce_orig": 1.0708190202713013, + "epoch": 0.4817024947875476, + "kl_loss": 0.25741177797317505, + "loss_ib": 0.011386026628315449, + "step": 1675 + }, + { + "ce_ib": 3.726789712905884, + "ce_orig": 0.7018229365348816, + "epoch": 0.4817024947875476, + "kl_loss": 0.20980185270309448, + "loss_ib": 0.005824807565659285, + "step": 1675 + }, + { + "ce_ib": 4.922455787658691, + "ce_orig": 0.8278821110725403, + "epoch": 0.4817024947875476, + "kl_loss": 0.18464845418930054, + "loss_ib": 0.006768940482288599, + "step": 1675 + }, + { + "ce_ib": 6.2850518226623535, + "ce_orig": 0.7126919031143188, + "epoch": 0.4819900783665253, + "kl_loss": 0.2205125093460083, + "loss_ib": 0.008490176871418953, + "step": 1676 + }, + { + "ce_ib": 7.34765100479126, + "ce_orig": 1.3165032863616943, + "epoch": 0.4819900783665253, + "kl_loss": 0.257365345954895, + "loss_ib": 0.009921303950250149, + "step": 1676 + }, + { + "ce_ib": 7.904400825500488, + "ce_orig": 1.4748260974884033, + "epoch": 0.4819900783665253, + "kl_loss": 0.27275893092155457, + "loss_ib": 0.010631990619003773, + "step": 1676 + }, + { + "ce_ib": 3.2757465839385986, + "ce_orig": 0.7456637024879456, + "epoch": 0.4819900783665253, + "kl_loss": 0.12369295209646225, + "loss_ib": 0.004512676037847996, + "step": 1676 + }, + { + "ce_ib": 5.002022743225098, + "ce_orig": 0.9142476320266724, + "epoch": 0.4822776619455029, + "kl_loss": 0.24907097220420837, + "loss_ib": 0.007492732722312212, + "step": 1677 + }, + { + "ce_ib": 5.418490886688232, + "ce_orig": 0.6175795793533325, + "epoch": 0.4822776619455029, + "kl_loss": 0.1571856439113617, + "loss_ib": 0.006990346591919661, + "step": 1677 + }, + { + "ce_ib": 7.182295799255371, + "ce_orig": 1.1133639812469482, + "epoch": 0.4822776619455029, + "kl_loss": 0.32248324155807495, + "loss_ib": 0.010407128371298313, + "step": 1677 + }, + { + "ce_ib": 7.538179397583008, + "ce_orig": 1.0507768392562866, + "epoch": 0.4822776619455029, + "kl_loss": 0.36521878838539124, + "loss_ib": 0.011190367862582207, + "step": 1677 + }, + { + "ce_ib": 6.171362400054932, + "ce_orig": 0.7541481256484985, + "epoch": 0.48256524552448055, + "kl_loss": 0.30361032485961914, + "loss_ib": 0.009207465685904026, + "step": 1678 + }, + { + "ce_ib": 3.476198673248291, + "ce_orig": 0.4351646900177002, + "epoch": 0.48256524552448055, + "kl_loss": 0.2083813101053238, + "loss_ib": 0.005560011602938175, + "step": 1678 + }, + { + "ce_ib": 8.659995079040527, + "ce_orig": 1.454718828201294, + "epoch": 0.48256524552448055, + "kl_loss": 0.2963302433490753, + "loss_ib": 0.011623297818005085, + "step": 1678 + }, + { + "ce_ib": 2.5902063846588135, + "ce_orig": 0.5103031992912292, + "epoch": 0.48256524552448055, + "kl_loss": 0.26295000314712524, + "loss_ib": 0.005219706334173679, + "step": 1678 + }, + { + "ce_ib": 4.061892986297607, + "ce_orig": 0.9226779937744141, + "epoch": 0.48285282910345817, + "kl_loss": 0.39064711332321167, + "loss_ib": 0.007968363352119923, + "step": 1679 + }, + { + "ce_ib": 4.2745361328125, + "ce_orig": 0.7729305624961853, + "epoch": 0.48285282910345817, + "kl_loss": 0.1899227499961853, + "loss_ib": 0.006173762958496809, + "step": 1679 + }, + { + "ce_ib": 6.655904769897461, + "ce_orig": 1.1210262775421143, + "epoch": 0.48285282910345817, + "kl_loss": 0.23119421303272247, + "loss_ib": 0.008967846632003784, + "step": 1679 + }, + { + "ce_ib": 7.641833305358887, + "ce_orig": 1.2369133234024048, + "epoch": 0.48285282910345817, + "kl_loss": 0.25590628385543823, + "loss_ib": 0.010200896300375462, + "step": 1679 + }, + { + "epoch": 0.48314041268243585, + "grad_norm": 0.10780736804008484, + "learning_rate": 9.560968130370376e-06, + "loss": 0.9124, + "step": 1680 + }, + { + "ce_ib": 7.560593605041504, + "ce_orig": 1.204754114151001, + "epoch": 0.48314041268243585, + "kl_loss": 0.2123798131942749, + "loss_ib": 0.00968439131975174, + "step": 1680 + }, + { + "ce_ib": 4.961677551269531, + "ce_orig": 0.9925312995910645, + "epoch": 0.48314041268243585, + "kl_loss": 0.19958920776844025, + "loss_ib": 0.006957569625228643, + "step": 1680 + }, + { + "ce_ib": 8.905904769897461, + "ce_orig": 1.682554006576538, + "epoch": 0.48314041268243585, + "kl_loss": 0.31346291303634644, + "loss_ib": 0.012040534056723118, + "step": 1680 + }, + { + "ce_ib": 4.896394729614258, + "ce_orig": 0.658926784992218, + "epoch": 0.48314041268243585, + "kl_loss": 0.27128756046295166, + "loss_ib": 0.007609270513057709, + "step": 1680 + }, + { + "ce_ib": 11.167470932006836, + "ce_orig": 1.5560513734817505, + "epoch": 0.4834279962614135, + "kl_loss": 0.19887183606624603, + "loss_ib": 0.01315618958324194, + "step": 1681 + }, + { + "ce_ib": 3.238182306289673, + "ce_orig": 0.3320446014404297, + "epoch": 0.4834279962614135, + "kl_loss": 0.22955651581287384, + "loss_ib": 0.005533747375011444, + "step": 1681 + }, + { + "ce_ib": 7.380128383636475, + "ce_orig": 1.2976762056350708, + "epoch": 0.4834279962614135, + "kl_loss": 0.23345749080181122, + "loss_ib": 0.00971470307558775, + "step": 1681 + }, + { + "ce_ib": 4.491724014282227, + "ce_orig": 0.49089425802230835, + "epoch": 0.4834279962614135, + "kl_loss": 0.3844373822212219, + "loss_ib": 0.008336097933351994, + "step": 1681 + }, + { + "ce_ib": 8.009778022766113, + "ce_orig": 1.3929885625839233, + "epoch": 0.4837155798403911, + "kl_loss": 0.19164226949214935, + "loss_ib": 0.009926200844347477, + "step": 1682 + }, + { + "ce_ib": 3.8315823078155518, + "ce_orig": 0.37863993644714355, + "epoch": 0.4837155798403911, + "kl_loss": 0.496357798576355, + "loss_ib": 0.008795159868896008, + "step": 1682 + }, + { + "ce_ib": 5.143205165863037, + "ce_orig": 0.8195101618766785, + "epoch": 0.4837155798403911, + "kl_loss": 0.2191523164510727, + "loss_ib": 0.007334728725254536, + "step": 1682 + }, + { + "ce_ib": 6.365453720092773, + "ce_orig": 1.110253930091858, + "epoch": 0.4837155798403911, + "kl_loss": 0.158380389213562, + "loss_ib": 0.007949257269501686, + "step": 1682 + }, + { + "ce_ib": 3.3443350791931152, + "ce_orig": 0.5569667220115662, + "epoch": 0.4840031634193688, + "kl_loss": 0.3231843113899231, + "loss_ib": 0.006576178129762411, + "step": 1683 + }, + { + "ce_ib": 8.063350677490234, + "ce_orig": 1.0422242879867554, + "epoch": 0.4840031634193688, + "kl_loss": 0.23159486055374146, + "loss_ib": 0.010379299521446228, + "step": 1683 + }, + { + "ce_ib": 7.586836338043213, + "ce_orig": 1.3177027702331543, + "epoch": 0.4840031634193688, + "kl_loss": 0.2565996050834656, + "loss_ib": 0.010152831673622131, + "step": 1683 + }, + { + "ce_ib": 5.579047679901123, + "ce_orig": 0.4584742784500122, + "epoch": 0.4840031634193688, + "kl_loss": 0.3952004313468933, + "loss_ib": 0.009531051851809025, + "step": 1683 + }, + { + "ce_ib": 3.8018834590911865, + "ce_orig": 0.633375883102417, + "epoch": 0.4842907469983464, + "kl_loss": 0.21211303770542145, + "loss_ib": 0.005923013668507338, + "step": 1684 + }, + { + "ce_ib": 4.17789888381958, + "ce_orig": 0.6012802720069885, + "epoch": 0.4842907469983464, + "kl_loss": 0.3501446843147278, + "loss_ib": 0.007679345551878214, + "step": 1684 + }, + { + "ce_ib": 4.611387729644775, + "ce_orig": 1.0369935035705566, + "epoch": 0.4842907469983464, + "kl_loss": 0.18224164843559265, + "loss_ib": 0.006433804053813219, + "step": 1684 + }, + { + "ce_ib": 4.641064167022705, + "ce_orig": 0.7356343269348145, + "epoch": 0.4842907469983464, + "kl_loss": 0.18943347036838531, + "loss_ib": 0.0065353987738490105, + "step": 1684 + }, + { + "epoch": 0.484578330577324, + "grad_norm": 0.11663123965263367, + "learning_rate": 9.557782555397167e-06, + "loss": 0.8837, + "step": 1685 + }, + { + "ce_ib": 6.534969329833984, + "ce_orig": 1.1525304317474365, + "epoch": 0.484578330577324, + "kl_loss": 0.22389784455299377, + "loss_ib": 0.008773948065936565, + "step": 1685 + }, + { + "ce_ib": 8.140199661254883, + "ce_orig": 1.0339369773864746, + "epoch": 0.484578330577324, + "kl_loss": 0.15249694883823395, + "loss_ib": 0.009665168821811676, + "step": 1685 + }, + { + "ce_ib": 5.809932708740234, + "ce_orig": 0.920750081539154, + "epoch": 0.484578330577324, + "kl_loss": 0.3143186867237091, + "loss_ib": 0.00895311962813139, + "step": 1685 + }, + { + "ce_ib": 5.950395107269287, + "ce_orig": 1.0373817682266235, + "epoch": 0.484578330577324, + "kl_loss": 0.16437512636184692, + "loss_ib": 0.007594146765768528, + "step": 1685 + }, + { + "ce_ib": 7.124546527862549, + "ce_orig": 0.8699126243591309, + "epoch": 0.4848659141563017, + "kl_loss": 0.2191973328590393, + "loss_ib": 0.0093165198341012, + "step": 1686 + }, + { + "ce_ib": 6.38749361038208, + "ce_orig": 1.0310173034667969, + "epoch": 0.4848659141563017, + "kl_loss": 0.29276883602142334, + "loss_ib": 0.009315181523561478, + "step": 1686 + }, + { + "ce_ib": 7.34552001953125, + "ce_orig": 1.3071355819702148, + "epoch": 0.4848659141563017, + "kl_loss": 0.2939929962158203, + "loss_ib": 0.010285450145602226, + "step": 1686 + }, + { + "ce_ib": 2.83811354637146, + "ce_orig": 0.4443729817867279, + "epoch": 0.4848659141563017, + "kl_loss": 0.14913912117481232, + "loss_ib": 0.004329504910856485, + "step": 1686 + }, + { + "ce_ib": 4.3961286544799805, + "ce_orig": 0.6618458032608032, + "epoch": 0.4851534977352793, + "kl_loss": 0.2749791741371155, + "loss_ib": 0.007145920302718878, + "step": 1687 + }, + { + "ce_ib": 6.693999767303467, + "ce_orig": 0.6293303966522217, + "epoch": 0.4851534977352793, + "kl_loss": 0.2422647476196289, + "loss_ib": 0.009116646833717823, + "step": 1687 + }, + { + "ce_ib": 8.874431610107422, + "ce_orig": 1.4825538396835327, + "epoch": 0.4851534977352793, + "kl_loss": 0.297224223613739, + "loss_ib": 0.011846673674881458, + "step": 1687 + }, + { + "ce_ib": 6.874109745025635, + "ce_orig": 1.1482160091400146, + "epoch": 0.4851534977352793, + "kl_loss": 0.19153878092765808, + "loss_ib": 0.008789497427642345, + "step": 1687 + }, + { + "ce_ib": 3.8232715129852295, + "ce_orig": 0.6324473023414612, + "epoch": 0.48544108131425695, + "kl_loss": 0.16611912846565247, + "loss_ib": 0.005484462715685368, + "step": 1688 + }, + { + "ce_ib": 7.457026958465576, + "ce_orig": 0.8956002593040466, + "epoch": 0.48544108131425695, + "kl_loss": 0.31268706917762756, + "loss_ib": 0.010583898052573204, + "step": 1688 + }, + { + "ce_ib": 7.080108642578125, + "ce_orig": 1.149488925933838, + "epoch": 0.48544108131425695, + "kl_loss": 0.2502942979335785, + "loss_ib": 0.009583051316440105, + "step": 1688 + }, + { + "ce_ib": 5.92689323425293, + "ce_orig": 0.9622479677200317, + "epoch": 0.48544108131425695, + "kl_loss": 0.18768984079360962, + "loss_ib": 0.007803791668266058, + "step": 1688 + }, + { + "ce_ib": 6.139303207397461, + "ce_orig": 1.1526930332183838, + "epoch": 0.4857286648932346, + "kl_loss": 0.23713448643684387, + "loss_ib": 0.008510648272931576, + "step": 1689 + }, + { + "ce_ib": 4.866360664367676, + "ce_orig": 0.6933936476707458, + "epoch": 0.4857286648932346, + "kl_loss": 0.2967952489852905, + "loss_ib": 0.007834312506020069, + "step": 1689 + }, + { + "ce_ib": 6.797483444213867, + "ce_orig": 1.2159748077392578, + "epoch": 0.4857286648932346, + "kl_loss": 0.22910846769809723, + "loss_ib": 0.009088567458093166, + "step": 1689 + }, + { + "ce_ib": 6.406723499298096, + "ce_orig": 1.1615173816680908, + "epoch": 0.4857286648932346, + "kl_loss": 0.25945186614990234, + "loss_ib": 0.009001241996884346, + "step": 1689 + }, + { + "epoch": 0.48601624847221225, + "grad_norm": 0.11609376221895218, + "learning_rate": 9.554585999667897e-06, + "loss": 0.9146, + "step": 1690 + }, + { + "ce_ib": 5.247783184051514, + "ce_orig": 0.8090236186981201, + "epoch": 0.48601624847221225, + "kl_loss": 0.26968154311180115, + "loss_ib": 0.007944597862660885, + "step": 1690 + }, + { + "ce_ib": 3.077852725982666, + "ce_orig": 0.5081151127815247, + "epoch": 0.48601624847221225, + "kl_loss": 0.18126562237739563, + "loss_ib": 0.004890508949756622, + "step": 1690 + }, + { + "ce_ib": 3.773611307144165, + "ce_orig": 0.694117546081543, + "epoch": 0.48601624847221225, + "kl_loss": 0.13272148370742798, + "loss_ib": 0.005100825801491737, + "step": 1690 + }, + { + "ce_ib": 4.646406650543213, + "ce_orig": 1.1609487533569336, + "epoch": 0.48601624847221225, + "kl_loss": 0.15832726657390594, + "loss_ib": 0.006229679565876722, + "step": 1690 + }, + { + "ce_ib": 6.037997245788574, + "ce_orig": 0.8993503451347351, + "epoch": 0.4863038320511899, + "kl_loss": 0.27683573961257935, + "loss_ib": 0.008806354366242886, + "step": 1691 + }, + { + "ce_ib": 4.57020902633667, + "ce_orig": 0.7976517677307129, + "epoch": 0.4863038320511899, + "kl_loss": 0.2402155101299286, + "loss_ib": 0.006972364149987698, + "step": 1691 + }, + { + "ce_ib": 5.292080402374268, + "ce_orig": 0.5217359066009521, + "epoch": 0.4863038320511899, + "kl_loss": 0.3130798637866974, + "loss_ib": 0.008422878570854664, + "step": 1691 + }, + { + "ce_ib": 6.2566118240356445, + "ce_orig": 0.6656879782676697, + "epoch": 0.4863038320511899, + "kl_loss": 0.28265661001205444, + "loss_ib": 0.009083177894353867, + "step": 1691 + }, + { + "ce_ib": 1.7650432586669922, + "ce_orig": 0.2512088716030121, + "epoch": 0.4865914156301675, + "kl_loss": 0.533643364906311, + "loss_ib": 0.0071014766581356525, + "step": 1692 + }, + { + "ce_ib": 6.721052646636963, + "ce_orig": 0.9016904830932617, + "epoch": 0.4865914156301675, + "kl_loss": 0.2493845820426941, + "loss_ib": 0.009214898571372032, + "step": 1692 + }, + { + "ce_ib": 4.070987701416016, + "ce_orig": 0.5971701145172119, + "epoch": 0.4865914156301675, + "kl_loss": 0.2511335015296936, + "loss_ib": 0.006582322530448437, + "step": 1692 + }, + { + "ce_ib": 8.66051959991455, + "ce_orig": 1.652574062347412, + "epoch": 0.4865914156301675, + "kl_loss": 0.3365510106086731, + "loss_ib": 0.012026029638946056, + "step": 1692 + }, + { + "ce_ib": 4.093155384063721, + "ce_orig": 0.8512476682662964, + "epoch": 0.4868789992091452, + "kl_loss": 0.17010530829429626, + "loss_ib": 0.005794208496809006, + "step": 1693 + }, + { + "ce_ib": 4.806715965270996, + "ce_orig": 0.7924500703811646, + "epoch": 0.4868789992091452, + "kl_loss": 0.2046879529953003, + "loss_ib": 0.006853595841675997, + "step": 1693 + }, + { + "ce_ib": 1.9750059843063354, + "ce_orig": 0.19683994352817535, + "epoch": 0.4868789992091452, + "kl_loss": 0.41815072298049927, + "loss_ib": 0.006156513001769781, + "step": 1693 + }, + { + "ce_ib": 4.909409046173096, + "ce_orig": 0.6787269711494446, + "epoch": 0.4868789992091452, + "kl_loss": 0.2110307216644287, + "loss_ib": 0.007019716314971447, + "step": 1693 + }, + { + "ce_ib": 3.878619909286499, + "ce_orig": 0.5532967448234558, + "epoch": 0.4871665827881228, + "kl_loss": 0.18584373593330383, + "loss_ib": 0.00573705742135644, + "step": 1694 + }, + { + "ce_ib": 3.7735960483551025, + "ce_orig": 0.7870396971702576, + "epoch": 0.4871665827881228, + "kl_loss": 0.160109281539917, + "loss_ib": 0.005374689120799303, + "step": 1694 + }, + { + "ce_ib": 5.4384260177612305, + "ce_orig": 0.819983184337616, + "epoch": 0.4871665827881228, + "kl_loss": 0.28779086470603943, + "loss_ib": 0.008316334336996078, + "step": 1694 + }, + { + "ce_ib": 3.6858084201812744, + "ce_orig": 0.7565300464630127, + "epoch": 0.4871665827881228, + "kl_loss": 0.3524230420589447, + "loss_ib": 0.007210038602352142, + "step": 1694 + }, + { + "epoch": 0.4874541663671004, + "grad_norm": 0.10960856080055237, + "learning_rate": 9.551378470883813e-06, + "loss": 0.7927, + "step": 1695 + }, + { + "ce_ib": 6.876530170440674, + "ce_orig": 1.097631812095642, + "epoch": 0.4874541663671004, + "kl_loss": 0.23491376638412476, + "loss_ib": 0.009225667454302311, + "step": 1695 + }, + { + "ce_ib": 8.743326187133789, + "ce_orig": 1.2223474979400635, + "epoch": 0.4874541663671004, + "kl_loss": 0.26982706785202026, + "loss_ib": 0.011441596783697605, + "step": 1695 + }, + { + "ce_ib": 4.467686176300049, + "ce_orig": 0.7761092782020569, + "epoch": 0.4874541663671004, + "kl_loss": 0.266385555267334, + "loss_ib": 0.007131541613489389, + "step": 1695 + }, + { + "ce_ib": 3.9495129585266113, + "ce_orig": 0.6083857417106628, + "epoch": 0.4874541663671004, + "kl_loss": 0.25618743896484375, + "loss_ib": 0.0065113878808915615, + "step": 1695 + }, + { + "ce_ib": 7.466285705566406, + "ce_orig": 0.969895601272583, + "epoch": 0.48774174994607805, + "kl_loss": 0.27946212887763977, + "loss_ib": 0.010260907001793385, + "step": 1696 + }, + { + "ce_ib": 9.822277069091797, + "ce_orig": 1.6922423839569092, + "epoch": 0.48774174994607805, + "kl_loss": 0.18676882982254028, + "loss_ib": 0.01168996561318636, + "step": 1696 + }, + { + "ce_ib": 6.797215938568115, + "ce_orig": 0.7068189382553101, + "epoch": 0.48774174994607805, + "kl_loss": 0.2542659640312195, + "loss_ib": 0.009339875541627407, + "step": 1696 + }, + { + "ce_ib": 5.888164520263672, + "ce_orig": 0.7551953792572021, + "epoch": 0.48774174994607805, + "kl_loss": 0.2606009244918823, + "loss_ib": 0.008494174107909203, + "step": 1696 + }, + { + "ce_ib": 6.542970180511475, + "ce_orig": 0.7762916088104248, + "epoch": 0.48802933352505573, + "kl_loss": 0.17304621636867523, + "loss_ib": 0.008273432962596416, + "step": 1697 + }, + { + "ce_ib": 4.387442111968994, + "ce_orig": 0.9359598159790039, + "epoch": 0.48802933352505573, + "kl_loss": 0.21369890868663788, + "loss_ib": 0.006524431053549051, + "step": 1697 + }, + { + "ce_ib": 5.826173782348633, + "ce_orig": 1.0441244840621948, + "epoch": 0.48802933352505573, + "kl_loss": 0.16852568089962006, + "loss_ib": 0.007511430885642767, + "step": 1697 + }, + { + "ce_ib": 5.742081165313721, + "ce_orig": 0.3558730185031891, + "epoch": 0.48802933352505573, + "kl_loss": 0.2536010146141052, + "loss_ib": 0.008278091438114643, + "step": 1697 + }, + { + "ce_ib": 7.02601957321167, + "ce_orig": 0.9475264549255371, + "epoch": 0.48831691710403335, + "kl_loss": 0.30207914113998413, + "loss_ib": 0.01004681084305048, + "step": 1698 + }, + { + "ce_ib": 4.868920803070068, + "ce_orig": 0.8664445281028748, + "epoch": 0.48831691710403335, + "kl_loss": 0.33686017990112305, + "loss_ib": 0.008237523026764393, + "step": 1698 + }, + { + "ce_ib": 3.719809055328369, + "ce_orig": 0.790595531463623, + "epoch": 0.48831691710403335, + "kl_loss": 0.3065032958984375, + "loss_ib": 0.006784841883927584, + "step": 1698 + }, + { + "ce_ib": 3.9900901317596436, + "ce_orig": 0.6241580843925476, + "epoch": 0.48831691710403335, + "kl_loss": 0.16233143210411072, + "loss_ib": 0.005613404326140881, + "step": 1698 + }, + { + "ce_ib": 8.611964225769043, + "ce_orig": 0.6841998100280762, + "epoch": 0.488604500683011, + "kl_loss": 0.3877715468406677, + "loss_ib": 0.0124896802008152, + "step": 1699 + }, + { + "ce_ib": 2.7463784217834473, + "ce_orig": 0.5244596004486084, + "epoch": 0.488604500683011, + "kl_loss": 0.16525377333164215, + "loss_ib": 0.004398916382342577, + "step": 1699 + }, + { + "ce_ib": 1.7029069662094116, + "ce_orig": 0.17470017075538635, + "epoch": 0.488604500683011, + "kl_loss": 0.38435274362564087, + "loss_ib": 0.0055464343167841434, + "step": 1699 + }, + { + "ce_ib": 3.5958950519561768, + "ce_orig": 0.4831347167491913, + "epoch": 0.488604500683011, + "kl_loss": 0.27735060453414917, + "loss_ib": 0.0063694012351334095, + "step": 1699 + }, + { + "epoch": 0.48889208426198866, + "grad_norm": 0.12257218360900879, + "learning_rate": 9.548159976772593e-06, + "loss": 0.8168, + "step": 1700 + }, + { + "ce_ib": 6.5620198249816895, + "ce_orig": 0.6928233504295349, + "epoch": 0.48889208426198866, + "kl_loss": 0.2537040412425995, + "loss_ib": 0.009099059738218784, + "step": 1700 + }, + { + "ce_ib": 7.755258083343506, + "ce_orig": 1.2825660705566406, + "epoch": 0.48889208426198866, + "kl_loss": 0.2563188672065735, + "loss_ib": 0.010318445973098278, + "step": 1700 + }, + { + "ce_ib": 4.941026210784912, + "ce_orig": 0.6089584827423096, + "epoch": 0.48889208426198866, + "kl_loss": 0.259429931640625, + "loss_ib": 0.007535324897617102, + "step": 1700 + }, + { + "ce_ib": 5.35645055770874, + "ce_orig": 0.6481252908706665, + "epoch": 0.48889208426198866, + "kl_loss": 0.2527640461921692, + "loss_ib": 0.007884090766310692, + "step": 1700 + }, + { + "ce_ib": 2.423488140106201, + "ce_orig": 0.5764703154563904, + "epoch": 0.4891796678409663, + "kl_loss": 0.1315830647945404, + "loss_ib": 0.003739318810403347, + "step": 1701 + }, + { + "ce_ib": 5.227047920227051, + "ce_orig": 0.9303528666496277, + "epoch": 0.4891796678409663, + "kl_loss": 0.2286655604839325, + "loss_ib": 0.007513702847063541, + "step": 1701 + }, + { + "ce_ib": 5.084414482116699, + "ce_orig": 0.42619651556015015, + "epoch": 0.4891796678409663, + "kl_loss": 0.3433401584625244, + "loss_ib": 0.008517815731465816, + "step": 1701 + }, + { + "ce_ib": 3.6347930431365967, + "ce_orig": 0.6375098824501038, + "epoch": 0.4891796678409663, + "kl_loss": 0.19476991891860962, + "loss_ib": 0.005582492332905531, + "step": 1701 + }, + { + "ce_ib": 4.194334983825684, + "ce_orig": 0.6213921308517456, + "epoch": 0.4894672514199439, + "kl_loss": 0.25615689158439636, + "loss_ib": 0.0067559038288891315, + "step": 1702 + }, + { + "ce_ib": 4.74091100692749, + "ce_orig": 0.38939017057418823, + "epoch": 0.4894672514199439, + "kl_loss": 0.27375322580337524, + "loss_ib": 0.007478443440049887, + "step": 1702 + }, + { + "ce_ib": 5.847019672393799, + "ce_orig": 0.7315099239349365, + "epoch": 0.4894672514199439, + "kl_loss": 0.2402675598859787, + "loss_ib": 0.008249695412814617, + "step": 1702 + }, + { + "ce_ib": 5.618659019470215, + "ce_orig": 0.8388023972511292, + "epoch": 0.4894672514199439, + "kl_loss": 0.19676315784454346, + "loss_ib": 0.007586290594190359, + "step": 1702 + }, + { + "ce_ib": 5.9708709716796875, + "ce_orig": 0.9229080677032471, + "epoch": 0.4897548349989216, + "kl_loss": 0.19908156991004944, + "loss_ib": 0.007961686700582504, + "step": 1703 + }, + { + "ce_ib": 5.164056777954102, + "ce_orig": 1.0340594053268433, + "epoch": 0.4897548349989216, + "kl_loss": 0.18683184683322906, + "loss_ib": 0.007032375317066908, + "step": 1703 + }, + { + "ce_ib": 3.921279191970825, + "ce_orig": 0.46810030937194824, + "epoch": 0.4897548349989216, + "kl_loss": 0.24264341592788696, + "loss_ib": 0.006347713526338339, + "step": 1703 + }, + { + "ce_ib": 4.657447338104248, + "ce_orig": 0.8225224614143372, + "epoch": 0.4897548349989216, + "kl_loss": 0.18631809949874878, + "loss_ib": 0.006520627997815609, + "step": 1703 + }, + { + "ce_ib": 7.400676727294922, + "ce_orig": 0.8509280681610107, + "epoch": 0.4900424185778992, + "kl_loss": 0.2300112247467041, + "loss_ib": 0.00970078818500042, + "step": 1704 + }, + { + "ce_ib": 5.8490424156188965, + "ce_orig": 1.0918712615966797, + "epoch": 0.4900424185778992, + "kl_loss": 0.7357668876647949, + "loss_ib": 0.01320671010762453, + "step": 1704 + }, + { + "ce_ib": 7.443508625030518, + "ce_orig": 0.8640341758728027, + "epoch": 0.4900424185778992, + "kl_loss": 0.2282414734363556, + "loss_ib": 0.009725923649966717, + "step": 1704 + }, + { + "ce_ib": 9.343680381774902, + "ce_orig": 1.5816478729248047, + "epoch": 0.4900424185778992, + "kl_loss": 0.1807907372713089, + "loss_ib": 0.011151587590575218, + "step": 1704 + }, + { + "epoch": 0.49033000215687683, + "grad_norm": 0.13203872740268707, + "learning_rate": 9.544930525088339e-06, + "loss": 0.8373, + "step": 1705 + }, + { + "ce_ib": 6.998898506164551, + "ce_orig": 0.47324255108833313, + "epoch": 0.49033000215687683, + "kl_loss": 0.3926210403442383, + "loss_ib": 0.0109251094982028, + "step": 1705 + }, + { + "ce_ib": 6.503366470336914, + "ce_orig": 0.7842316031455994, + "epoch": 0.49033000215687683, + "kl_loss": 0.33447882533073425, + "loss_ib": 0.00984815414994955, + "step": 1705 + }, + { + "ce_ib": 2.677558422088623, + "ce_orig": 0.7216985821723938, + "epoch": 0.49033000215687683, + "kl_loss": 0.1252802014350891, + "loss_ib": 0.0039303600788116455, + "step": 1705 + }, + { + "ce_ib": 5.550953388214111, + "ce_orig": 0.8223257064819336, + "epoch": 0.49033000215687683, + "kl_loss": 0.17812271416187286, + "loss_ib": 0.007332180626690388, + "step": 1705 + }, + { + "ce_ib": 3.981729745864868, + "ce_orig": 0.36211925745010376, + "epoch": 0.49061758573585446, + "kl_loss": 0.3238323926925659, + "loss_ib": 0.00722005357965827, + "step": 1706 + }, + { + "ce_ib": 5.24422025680542, + "ce_orig": 0.6217085123062134, + "epoch": 0.49061758573585446, + "kl_loss": 0.21295107901096344, + "loss_ib": 0.00737373111769557, + "step": 1706 + }, + { + "ce_ib": 4.3960347175598145, + "ce_orig": 1.0565332174301147, + "epoch": 0.49061758573585446, + "kl_loss": 0.21014171838760376, + "loss_ib": 0.006497452035546303, + "step": 1706 + }, + { + "ce_ib": 9.517853736877441, + "ce_orig": 1.5981885194778442, + "epoch": 0.49061758573585446, + "kl_loss": 0.18369972705841064, + "loss_ib": 0.01135485153645277, + "step": 1706 + }, + { + "ce_ib": 6.477010250091553, + "ce_orig": 0.8719711303710938, + "epoch": 0.49090516931483213, + "kl_loss": 0.27074265480041504, + "loss_ib": 0.009184436872601509, + "step": 1707 + }, + { + "ce_ib": 6.044996738433838, + "ce_orig": 1.1001982688903809, + "epoch": 0.49090516931483213, + "kl_loss": 0.2015824168920517, + "loss_ib": 0.008060821332037449, + "step": 1707 + }, + { + "ce_ib": 3.7902770042419434, + "ce_orig": 0.6360217928886414, + "epoch": 0.49090516931483213, + "kl_loss": 0.1633738875389099, + "loss_ib": 0.00542401522397995, + "step": 1707 + }, + { + "ce_ib": 3.3657476902008057, + "ce_orig": 0.6742066144943237, + "epoch": 0.49090516931483213, + "kl_loss": 0.20251691341400146, + "loss_ib": 0.0053909169510006905, + "step": 1707 + }, + { + "ce_ib": 4.074330806732178, + "ce_orig": 0.7614818811416626, + "epoch": 0.49119275289380976, + "kl_loss": 0.2751924395561218, + "loss_ib": 0.00682625500485301, + "step": 1708 + }, + { + "ce_ib": 7.01824951171875, + "ce_orig": 0.9621512293815613, + "epoch": 0.49119275289380976, + "kl_loss": 0.17211195826530457, + "loss_ib": 0.008739368990063667, + "step": 1708 + }, + { + "ce_ib": 8.20371150970459, + "ce_orig": 1.1622143983840942, + "epoch": 0.49119275289380976, + "kl_loss": 0.2795262932777405, + "loss_ib": 0.010998973622918129, + "step": 1708 + }, + { + "ce_ib": 8.394777297973633, + "ce_orig": 1.366093635559082, + "epoch": 0.49119275289380976, + "kl_loss": 0.17209161818027496, + "loss_ib": 0.01011569332331419, + "step": 1708 + }, + { + "ce_ib": 3.742612600326538, + "ce_orig": 0.4698511064052582, + "epoch": 0.4914803364727874, + "kl_loss": 0.21373629570007324, + "loss_ib": 0.005879975389689207, + "step": 1709 + }, + { + "ce_ib": 6.780648708343506, + "ce_orig": 1.2006686925888062, + "epoch": 0.4914803364727874, + "kl_loss": 0.19385182857513428, + "loss_ib": 0.008719166740775108, + "step": 1709 + }, + { + "ce_ib": 4.651514053344727, + "ce_orig": 0.9274974465370178, + "epoch": 0.4914803364727874, + "kl_loss": 0.18124522268772125, + "loss_ib": 0.006463966332376003, + "step": 1709 + }, + { + "ce_ib": 3.968463897705078, + "ce_orig": 0.5359354019165039, + "epoch": 0.4914803364727874, + "kl_loss": 0.24947825074195862, + "loss_ib": 0.0064632464200258255, + "step": 1709 + }, + { + "epoch": 0.49176792005176506, + "grad_norm": 0.12094637006521225, + "learning_rate": 9.54169012361155e-06, + "loss": 0.885, + "step": 1710 + }, + { + "ce_ib": 6.588129997253418, + "ce_orig": 1.0155458450317383, + "epoch": 0.49176792005176506, + "kl_loss": 0.2495093196630478, + "loss_ib": 0.009083223529160023, + "step": 1710 + }, + { + "ce_ib": 3.638334274291992, + "ce_orig": 0.42263445258140564, + "epoch": 0.49176792005176506, + "kl_loss": 0.21503064036369324, + "loss_ib": 0.005788641050457954, + "step": 1710 + }, + { + "ce_ib": 6.531235218048096, + "ce_orig": 0.8453525304794312, + "epoch": 0.49176792005176506, + "kl_loss": 0.23209929466247559, + "loss_ib": 0.00885222852230072, + "step": 1710 + }, + { + "ce_ib": 7.706050872802734, + "ce_orig": 0.9590526819229126, + "epoch": 0.49176792005176506, + "kl_loss": 0.24353507161140442, + "loss_ib": 0.010141400620341301, + "step": 1710 + }, + { + "ce_ib": 3.765565872192383, + "ce_orig": 0.8244125247001648, + "epoch": 0.4920555036307427, + "kl_loss": 0.4936164319515228, + "loss_ib": 0.008701730519533157, + "step": 1711 + }, + { + "ce_ib": 7.067040920257568, + "ce_orig": 1.2625956535339355, + "epoch": 0.4920555036307427, + "kl_loss": 0.7286717891693115, + "loss_ib": 0.014353758655488491, + "step": 1711 + }, + { + "ce_ib": 8.84214973449707, + "ce_orig": 1.6408401727676392, + "epoch": 0.4920555036307427, + "kl_loss": 0.24816496670246124, + "loss_ib": 0.011323799379169941, + "step": 1711 + }, + { + "ce_ib": 8.242490768432617, + "ce_orig": 1.148856520652771, + "epoch": 0.4920555036307427, + "kl_loss": 0.2580086588859558, + "loss_ib": 0.010822577401995659, + "step": 1711 + }, + { + "ce_ib": 5.534130573272705, + "ce_orig": 0.8404598832130432, + "epoch": 0.4923430872097203, + "kl_loss": 0.3187624216079712, + "loss_ib": 0.008721754886209965, + "step": 1712 + }, + { + "ce_ib": 8.611713409423828, + "ce_orig": 1.3891855478286743, + "epoch": 0.4923430872097203, + "kl_loss": 0.2638487219810486, + "loss_ib": 0.011250199750065804, + "step": 1712 + }, + { + "ce_ib": 4.341231822967529, + "ce_orig": 0.5861788392066956, + "epoch": 0.4923430872097203, + "kl_loss": 0.28423434495925903, + "loss_ib": 0.007183575537055731, + "step": 1712 + }, + { + "ce_ib": 6.878125190734863, + "ce_orig": 1.4111419916152954, + "epoch": 0.4923430872097203, + "kl_loss": 0.2343723028898239, + "loss_ib": 0.009221848100423813, + "step": 1712 + }, + { + "ce_ib": 6.162860870361328, + "ce_orig": 0.9645307660102844, + "epoch": 0.492630670788698, + "kl_loss": 0.20777800679206848, + "loss_ib": 0.008240641094744205, + "step": 1713 + }, + { + "ce_ib": 8.0570707321167, + "ce_orig": 1.2794287204742432, + "epoch": 0.492630670788698, + "kl_loss": 0.2319069355726242, + "loss_ib": 0.010376139543950558, + "step": 1713 + }, + { + "ce_ib": 5.692114353179932, + "ce_orig": 0.6935074925422668, + "epoch": 0.492630670788698, + "kl_loss": 0.26182082295417786, + "loss_ib": 0.008310322649776936, + "step": 1713 + }, + { + "ce_ib": 3.0138778686523438, + "ce_orig": 0.468887597322464, + "epoch": 0.492630670788698, + "kl_loss": 0.23393811285495758, + "loss_ib": 0.005353258922696114, + "step": 1713 + }, + { + "ce_ib": 8.280229568481445, + "ce_orig": 1.1677502393722534, + "epoch": 0.4929182543676756, + "kl_loss": 0.2325466424226761, + "loss_ib": 0.010605696588754654, + "step": 1714 + }, + { + "ce_ib": 6.72873592376709, + "ce_orig": 0.803114652633667, + "epoch": 0.4929182543676756, + "kl_loss": 0.4147999882698059, + "loss_ib": 0.010876736603677273, + "step": 1714 + }, + { + "ce_ib": 7.0533599853515625, + "ce_orig": 0.8928973078727722, + "epoch": 0.4929182543676756, + "kl_loss": 0.26862865686416626, + "loss_ib": 0.009739646688103676, + "step": 1714 + }, + { + "ce_ib": 4.385868549346924, + "ce_orig": 0.8779287338256836, + "epoch": 0.4929182543676756, + "kl_loss": 0.21822282671928406, + "loss_ib": 0.006568096578121185, + "step": 1714 + }, + { + "epoch": 0.49320583794665324, + "grad_norm": 0.14142395555973053, + "learning_rate": 9.538438780149104e-06, + "loss": 0.8621, + "step": 1715 + }, + { + "ce_ib": 3.48097825050354, + "ce_orig": 0.6204516887664795, + "epoch": 0.49320583794665324, + "kl_loss": 0.1770043522119522, + "loss_ib": 0.005251022055745125, + "step": 1715 + }, + { + "ce_ib": 6.237753868103027, + "ce_orig": 0.5141828060150146, + "epoch": 0.49320583794665324, + "kl_loss": 0.3674449324607849, + "loss_ib": 0.009912203066051006, + "step": 1715 + }, + { + "ce_ib": 4.3492817878723145, + "ce_orig": 0.6501482129096985, + "epoch": 0.49320583794665324, + "kl_loss": 0.22237512469291687, + "loss_ib": 0.006573033053427935, + "step": 1715 + }, + { + "ce_ib": 7.455378532409668, + "ce_orig": 1.408462405204773, + "epoch": 0.49320583794665324, + "kl_loss": 0.18538546562194824, + "loss_ib": 0.009309233166277409, + "step": 1715 + }, + { + "ce_ib": 5.599409103393555, + "ce_orig": 0.5918640494346619, + "epoch": 0.49349342152563086, + "kl_loss": 0.20586490631103516, + "loss_ib": 0.007658058311790228, + "step": 1716 + }, + { + "ce_ib": 5.35037088394165, + "ce_orig": 1.1837389469146729, + "epoch": 0.49349342152563086, + "kl_loss": 0.27578169107437134, + "loss_ib": 0.008108187466859818, + "step": 1716 + }, + { + "ce_ib": 5.994269847869873, + "ce_orig": 0.7266632318496704, + "epoch": 0.49349342152563086, + "kl_loss": 0.2978155016899109, + "loss_ib": 0.008972425013780594, + "step": 1716 + }, + { + "ce_ib": 3.708055019378662, + "ce_orig": 0.8986055254936218, + "epoch": 0.49349342152563086, + "kl_loss": 0.28073328733444214, + "loss_ib": 0.006515387911349535, + "step": 1716 + }, + { + "ce_ib": 6.660898208618164, + "ce_orig": 1.04195237159729, + "epoch": 0.49378100510460854, + "kl_loss": 0.26174142956733704, + "loss_ib": 0.0092783123254776, + "step": 1717 + }, + { + "ce_ib": 6.0924787521362305, + "ce_orig": 0.7078341841697693, + "epoch": 0.49378100510460854, + "kl_loss": 0.2791142761707306, + "loss_ib": 0.008883621543645859, + "step": 1717 + }, + { + "ce_ib": 3.7908501625061035, + "ce_orig": 0.791426956653595, + "epoch": 0.49378100510460854, + "kl_loss": 0.20216532051563263, + "loss_ib": 0.005812503397464752, + "step": 1717 + }, + { + "ce_ib": 2.7622318267822266, + "ce_orig": 0.3543376922607422, + "epoch": 0.49378100510460854, + "kl_loss": 0.22950060665607452, + "loss_ib": 0.005057237576693296, + "step": 1717 + }, + { + "ce_ib": 4.37089204788208, + "ce_orig": 0.7407205700874329, + "epoch": 0.49406858868358616, + "kl_loss": 0.2812882661819458, + "loss_ib": 0.007183774374425411, + "step": 1718 + }, + { + "ce_ib": 4.498960018157959, + "ce_orig": 0.6540777683258057, + "epoch": 0.49406858868358616, + "kl_loss": 0.18491533398628235, + "loss_ib": 0.006348113063722849, + "step": 1718 + }, + { + "ce_ib": 8.588423728942871, + "ce_orig": 1.2199863195419312, + "epoch": 0.49406858868358616, + "kl_loss": 0.3112899363040924, + "loss_ib": 0.01170132216066122, + "step": 1718 + }, + { + "ce_ib": 8.444191932678223, + "ce_orig": 1.4956631660461426, + "epoch": 0.49406858868358616, + "kl_loss": 0.2000323385000229, + "loss_ib": 0.010444514453411102, + "step": 1718 + }, + { + "ce_ib": 7.196175575256348, + "ce_orig": 0.8098769187927246, + "epoch": 0.4943561722625638, + "kl_loss": 0.289365291595459, + "loss_ib": 0.010089828632771969, + "step": 1719 + }, + { + "ce_ib": 7.820986747741699, + "ce_orig": 1.6552858352661133, + "epoch": 0.4943561722625638, + "kl_loss": 0.2239806354045868, + "loss_ib": 0.010060792788863182, + "step": 1719 + }, + { + "ce_ib": 6.495529651641846, + "ce_orig": 0.9777346849441528, + "epoch": 0.4943561722625638, + "kl_loss": 0.24929951131343842, + "loss_ib": 0.008988524787127972, + "step": 1719 + }, + { + "ce_ib": 5.176101207733154, + "ce_orig": 0.6900345087051392, + "epoch": 0.4943561722625638, + "kl_loss": 0.3611376881599426, + "loss_ib": 0.008787478320300579, + "step": 1719 + }, + { + "epoch": 0.49464375584154147, + "grad_norm": 0.13287211954593658, + "learning_rate": 9.535176502534242e-06, + "loss": 0.8975, + "step": 1720 + }, + { + "ce_ib": 8.692408561706543, + "ce_orig": 1.4137736558914185, + "epoch": 0.49464375584154147, + "kl_loss": 0.2862509787082672, + "loss_ib": 0.011554918251931667, + "step": 1720 + }, + { + "ce_ib": 7.457424163818359, + "ce_orig": 1.2316714525222778, + "epoch": 0.49464375584154147, + "kl_loss": 0.21747153997421265, + "loss_ib": 0.009632139466702938, + "step": 1720 + }, + { + "ce_ib": 4.586647987365723, + "ce_orig": 0.817991316318512, + "epoch": 0.49464375584154147, + "kl_loss": 0.20044812560081482, + "loss_ib": 0.006591129116714001, + "step": 1720 + }, + { + "ce_ib": 2.7127957344055176, + "ce_orig": 0.5314325094223022, + "epoch": 0.49464375584154147, + "kl_loss": 0.2668687701225281, + "loss_ib": 0.005381483118981123, + "step": 1720 + }, + { + "ce_ib": 3.999713182449341, + "ce_orig": 0.7891056537628174, + "epoch": 0.4949313394205191, + "kl_loss": 0.18380488455295563, + "loss_ib": 0.005837762262672186, + "step": 1721 + }, + { + "ce_ib": 5.790007591247559, + "ce_orig": 0.7806348204612732, + "epoch": 0.4949313394205191, + "kl_loss": 0.21094569563865662, + "loss_ib": 0.00789946410804987, + "step": 1721 + }, + { + "ce_ib": 5.886396408081055, + "ce_orig": 0.912238359451294, + "epoch": 0.4949313394205191, + "kl_loss": 0.24456319212913513, + "loss_ib": 0.008332028053700924, + "step": 1721 + }, + { + "ce_ib": 4.271791934967041, + "ce_orig": 0.7383297085762024, + "epoch": 0.4949313394205191, + "kl_loss": 0.24236194789409637, + "loss_ib": 0.006695411168038845, + "step": 1721 + }, + { + "ce_ib": 3.6333987712860107, + "ce_orig": 0.7166858911514282, + "epoch": 0.4952189229994967, + "kl_loss": 0.2111375331878662, + "loss_ib": 0.0057447743602097034, + "step": 1722 + }, + { + "ce_ib": 5.225841999053955, + "ce_orig": 0.6549127697944641, + "epoch": 0.4952189229994967, + "kl_loss": 0.2381574958562851, + "loss_ib": 0.007607416715472937, + "step": 1722 + }, + { + "ce_ib": 4.282773971557617, + "ce_orig": 0.6731183528900146, + "epoch": 0.4952189229994967, + "kl_loss": 0.14624956250190735, + "loss_ib": 0.005745269358158112, + "step": 1722 + }, + { + "ce_ib": 9.602765083312988, + "ce_orig": 1.1258394718170166, + "epoch": 0.4952189229994967, + "kl_loss": 0.18037505447864532, + "loss_ib": 0.01140651572495699, + "step": 1722 + }, + { + "ce_ib": 3.2537600994110107, + "ce_orig": 0.40181875228881836, + "epoch": 0.4955065065784744, + "kl_loss": 0.18366406857967377, + "loss_ib": 0.005090400576591492, + "step": 1723 + }, + { + "ce_ib": 6.9169416427612305, + "ce_orig": 0.6074340343475342, + "epoch": 0.4955065065784744, + "kl_loss": 0.30570995807647705, + "loss_ib": 0.009974041022360325, + "step": 1723 + }, + { + "ce_ib": 5.034404754638672, + "ce_orig": 0.934673011302948, + "epoch": 0.4955065065784744, + "kl_loss": 0.2834550738334656, + "loss_ib": 0.007868955843150616, + "step": 1723 + }, + { + "ce_ib": 3.9033546447753906, + "ce_orig": 0.59845370054245, + "epoch": 0.4955065065784744, + "kl_loss": 0.1284828931093216, + "loss_ib": 0.005188183858990669, + "step": 1723 + }, + { + "ce_ib": 3.368408203125, + "ce_orig": 0.4007030129432678, + "epoch": 0.495794090157452, + "kl_loss": 0.3604559302330017, + "loss_ib": 0.006972967181354761, + "step": 1724 + }, + { + "ce_ib": 5.497180938720703, + "ce_orig": 0.9960641264915466, + "epoch": 0.495794090157452, + "kl_loss": 0.2774682641029358, + "loss_ib": 0.00827186368405819, + "step": 1724 + }, + { + "ce_ib": 4.814739227294922, + "ce_orig": 0.8194516897201538, + "epoch": 0.495794090157452, + "kl_loss": 0.23562568426132202, + "loss_ib": 0.007170995697379112, + "step": 1724 + }, + { + "ce_ib": 6.389047145843506, + "ce_orig": 0.9979404211044312, + "epoch": 0.495794090157452, + "kl_loss": 0.2550808787345886, + "loss_ib": 0.008939855732023716, + "step": 1724 + }, + { + "epoch": 0.49608167373642964, + "grad_norm": 0.1158408373594284, + "learning_rate": 9.53190329862655e-06, + "loss": 0.8712, + "step": 1725 + }, + { + "ce_ib": 4.169690132141113, + "ce_orig": 0.6141831874847412, + "epoch": 0.49608167373642964, + "kl_loss": 0.25308936834335327, + "loss_ib": 0.006700583733618259, + "step": 1725 + }, + { + "ce_ib": 3.0598349571228027, + "ce_orig": 0.6335474252700806, + "epoch": 0.49608167373642964, + "kl_loss": 0.6865431666374207, + "loss_ib": 0.009925266727805138, + "step": 1725 + }, + { + "ce_ib": 4.783539772033691, + "ce_orig": 0.5297357439994812, + "epoch": 0.49608167373642964, + "kl_loss": 0.27161890268325806, + "loss_ib": 0.0074997288174927235, + "step": 1725 + }, + { + "ce_ib": 4.168352127075195, + "ce_orig": 0.6996920108795166, + "epoch": 0.49608167373642964, + "kl_loss": 0.2507363557815552, + "loss_ib": 0.00667571509256959, + "step": 1725 + }, + { + "ce_ib": 4.639172077178955, + "ce_orig": 0.7657274603843689, + "epoch": 0.49636925731540726, + "kl_loss": 0.2979816198348999, + "loss_ib": 0.007618988398462534, + "step": 1726 + }, + { + "ce_ib": 3.389683246612549, + "ce_orig": 0.7435678243637085, + "epoch": 0.49636925731540726, + "kl_loss": 0.20240336656570435, + "loss_ib": 0.005413717124611139, + "step": 1726 + }, + { + "ce_ib": 8.156920433044434, + "ce_orig": 1.2631562948226929, + "epoch": 0.49636925731540726, + "kl_loss": 0.216718852519989, + "loss_ib": 0.01032410841435194, + "step": 1726 + }, + { + "ce_ib": 9.228316307067871, + "ce_orig": 1.7304272651672363, + "epoch": 0.49636925731540726, + "kl_loss": 0.21756497025489807, + "loss_ib": 0.011403965763747692, + "step": 1726 + }, + { + "ce_ib": 5.42836856842041, + "ce_orig": 0.6177011132240295, + "epoch": 0.49665684089438494, + "kl_loss": 0.25108325481414795, + "loss_ib": 0.007939200848340988, + "step": 1727 + }, + { + "ce_ib": 5.849068641662598, + "ce_orig": 1.0078530311584473, + "epoch": 0.49665684089438494, + "kl_loss": 0.2704865634441376, + "loss_ib": 0.008553934283554554, + "step": 1727 + }, + { + "ce_ib": 7.377047061920166, + "ce_orig": 1.1187243461608887, + "epoch": 0.49665684089438494, + "kl_loss": 0.2173384428024292, + "loss_ib": 0.009550430811941624, + "step": 1727 + }, + { + "ce_ib": 7.3513617515563965, + "ce_orig": 0.6316287517547607, + "epoch": 0.49665684089438494, + "kl_loss": 0.23208868503570557, + "loss_ib": 0.009672248736023903, + "step": 1727 + }, + { + "ce_ib": 7.48237419128418, + "ce_orig": 0.7925584316253662, + "epoch": 0.49694442447336257, + "kl_loss": 0.36000531911849976, + "loss_ib": 0.011082427576184273, + "step": 1728 + }, + { + "ce_ib": 4.199851989746094, + "ce_orig": 0.7553547620773315, + "epoch": 0.49694442447336257, + "kl_loss": 0.3046613931655884, + "loss_ib": 0.0072464654222130775, + "step": 1728 + }, + { + "ce_ib": 3.7874884605407715, + "ce_orig": 0.675774872303009, + "epoch": 0.49694442447336257, + "kl_loss": 0.1680212914943695, + "loss_ib": 0.0054677012376487255, + "step": 1728 + }, + { + "ce_ib": 6.877801895141602, + "ce_orig": 1.1077038049697876, + "epoch": 0.49694442447336257, + "kl_loss": 0.24500277638435364, + "loss_ib": 0.00932782981544733, + "step": 1728 + }, + { + "ce_ib": 4.3296709060668945, + "ce_orig": 0.46692103147506714, + "epoch": 0.4972320080523402, + "kl_loss": 0.336995393037796, + "loss_ib": 0.007699624635279179, + "step": 1729 + }, + { + "ce_ib": 4.633038520812988, + "ce_orig": 0.8983129262924194, + "epoch": 0.4972320080523402, + "kl_loss": 0.30017250776290894, + "loss_ib": 0.007634763605892658, + "step": 1729 + }, + { + "ce_ib": 7.470649719238281, + "ce_orig": 1.2206380367279053, + "epoch": 0.4972320080523402, + "kl_loss": 0.2591376006603241, + "loss_ib": 0.010062025859951973, + "step": 1729 + }, + { + "ce_ib": 6.334610462188721, + "ce_orig": 0.8265384435653687, + "epoch": 0.4972320080523402, + "kl_loss": 0.2139129638671875, + "loss_ib": 0.008473739959299564, + "step": 1729 + }, + { + "epoch": 0.49751959163131787, + "grad_norm": 0.1053248941898346, + "learning_rate": 9.528619176311933e-06, + "loss": 0.9408, + "step": 1730 + }, + { + "ce_ib": 6.331774711608887, + "ce_orig": 1.1401456594467163, + "epoch": 0.49751959163131787, + "kl_loss": 0.2342427670955658, + "loss_ib": 0.008674201555550098, + "step": 1730 + }, + { + "ce_ib": 8.644342422485352, + "ce_orig": 1.4284965991973877, + "epoch": 0.49751959163131787, + "kl_loss": 0.2252599000930786, + "loss_ib": 0.010896940715610981, + "step": 1730 + }, + { + "ce_ib": 4.456500053405762, + "ce_orig": 0.5329476594924927, + "epoch": 0.49751959163131787, + "kl_loss": 0.19349047541618347, + "loss_ib": 0.006391404662281275, + "step": 1730 + }, + { + "ce_ib": 4.347330093383789, + "ce_orig": 0.8360463380813599, + "epoch": 0.49751959163131787, + "kl_loss": 0.2571093440055847, + "loss_ib": 0.006918422877788544, + "step": 1730 + }, + { + "ce_ib": 4.1414971351623535, + "ce_orig": 0.5835949778556824, + "epoch": 0.4978071752102955, + "kl_loss": 0.15816175937652588, + "loss_ib": 0.005723115056753159, + "step": 1731 + }, + { + "ce_ib": 3.465496778488159, + "ce_orig": 0.6377593278884888, + "epoch": 0.4978071752102955, + "kl_loss": 0.185111865401268, + "loss_ib": 0.00531661557033658, + "step": 1731 + }, + { + "ce_ib": 6.669215202331543, + "ce_orig": 0.6736965775489807, + "epoch": 0.4978071752102955, + "kl_loss": 0.2598609924316406, + "loss_ib": 0.00926782563328743, + "step": 1731 + }, + { + "ce_ib": 3.3897271156311035, + "ce_orig": 0.5642126202583313, + "epoch": 0.4978071752102955, + "kl_loss": 0.2248254418373108, + "loss_ib": 0.005637981928884983, + "step": 1731 + }, + { + "ce_ib": 8.572836875915527, + "ce_orig": 1.7926554679870605, + "epoch": 0.4980947587892731, + "kl_loss": 0.24958643317222595, + "loss_ib": 0.01106870174407959, + "step": 1732 + }, + { + "ce_ib": 4.426803112030029, + "ce_orig": 0.6430197954177856, + "epoch": 0.4980947587892731, + "kl_loss": 0.31002277135849, + "loss_ib": 0.007527030538767576, + "step": 1732 + }, + { + "ce_ib": 4.7570929527282715, + "ce_orig": 0.9767276048660278, + "epoch": 0.4980947587892731, + "kl_loss": 0.18130776286125183, + "loss_ib": 0.006570170167833567, + "step": 1732 + }, + { + "ce_ib": 5.817841529846191, + "ce_orig": 0.6756792664527893, + "epoch": 0.4980947587892731, + "kl_loss": 0.24906647205352783, + "loss_ib": 0.008308506570756435, + "step": 1732 + }, + { + "ce_ib": 5.906297206878662, + "ce_orig": 0.8869830369949341, + "epoch": 0.4983823423682508, + "kl_loss": 0.3161448538303375, + "loss_ib": 0.009067745879292488, + "step": 1733 + }, + { + "ce_ib": 3.069126844406128, + "ce_orig": 0.5189517736434937, + "epoch": 0.4983823423682508, + "kl_loss": 0.24513983726501465, + "loss_ib": 0.005520524922758341, + "step": 1733 + }, + { + "ce_ib": 4.824000358581543, + "ce_orig": 0.6937529444694519, + "epoch": 0.4983823423682508, + "kl_loss": 0.25546473264694214, + "loss_ib": 0.007378647103905678, + "step": 1733 + }, + { + "ce_ib": 4.200857162475586, + "ce_orig": 0.6918690800666809, + "epoch": 0.4983823423682508, + "kl_loss": 0.23723284900188446, + "loss_ib": 0.006573185790330172, + "step": 1733 + }, + { + "ce_ib": 7.380935192108154, + "ce_orig": 1.0280795097351074, + "epoch": 0.4986699259472284, + "kl_loss": 0.201813206076622, + "loss_ib": 0.009399067610502243, + "step": 1734 + }, + { + "ce_ib": 6.978311538696289, + "ce_orig": 1.288365364074707, + "epoch": 0.4986699259472284, + "kl_loss": 0.22564837336540222, + "loss_ib": 0.009234795346856117, + "step": 1734 + }, + { + "ce_ib": 4.881115913391113, + "ce_orig": 0.6921979188919067, + "epoch": 0.4986699259472284, + "kl_loss": 0.25443413853645325, + "loss_ib": 0.007425457239151001, + "step": 1734 + }, + { + "ce_ib": 2.8651721477508545, + "ce_orig": 0.28916382789611816, + "epoch": 0.4986699259472284, + "kl_loss": 0.5442566871643066, + "loss_ib": 0.008307739160954952, + "step": 1734 + }, + { + "epoch": 0.49895750952620604, + "grad_norm": 0.12058666348457336, + "learning_rate": 9.525324143502607e-06, + "loss": 0.8673, + "step": 1735 + }, + { + "ce_ib": 8.609652519226074, + "ce_orig": 1.4004709720611572, + "epoch": 0.49895750952620604, + "kl_loss": 0.2570205330848694, + "loss_ib": 0.011179856956005096, + "step": 1735 + }, + { + "ce_ib": 5.465366363525391, + "ce_orig": 0.6692785620689392, + "epoch": 0.49895750952620604, + "kl_loss": 0.22908666729927063, + "loss_ib": 0.007756233215332031, + "step": 1735 + }, + { + "ce_ib": 4.082115173339844, + "ce_orig": 0.6186438798904419, + "epoch": 0.49895750952620604, + "kl_loss": 0.2082287073135376, + "loss_ib": 0.006164402235299349, + "step": 1735 + }, + { + "ce_ib": 3.701429843902588, + "ce_orig": 0.5794656276702881, + "epoch": 0.49895750952620604, + "kl_loss": 0.22643627226352692, + "loss_ib": 0.005965792573988438, + "step": 1735 + }, + { + "ce_ib": 7.719645023345947, + "ce_orig": 1.1597990989685059, + "epoch": 0.49924509310518367, + "kl_loss": 0.21026304364204407, + "loss_ib": 0.00982227548956871, + "step": 1736 + }, + { + "ce_ib": 3.7463462352752686, + "ce_orig": 0.7905706763267517, + "epoch": 0.49924509310518367, + "kl_loss": 0.18672803044319153, + "loss_ib": 0.005613625980913639, + "step": 1736 + }, + { + "ce_ib": 5.696522235870361, + "ce_orig": 1.1005171537399292, + "epoch": 0.49924509310518367, + "kl_loss": 0.248357892036438, + "loss_ib": 0.008180101402103901, + "step": 1736 + }, + { + "ce_ib": 8.115674018859863, + "ce_orig": 1.2610074281692505, + "epoch": 0.49924509310518367, + "kl_loss": 0.2915061116218567, + "loss_ib": 0.011030735448002815, + "step": 1736 + }, + { + "ce_ib": 4.881083011627197, + "ce_orig": 0.8494547009468079, + "epoch": 0.49953267668416135, + "kl_loss": 0.1594853401184082, + "loss_ib": 0.006475936155766249, + "step": 1737 + }, + { + "ce_ib": 5.13024377822876, + "ce_orig": 0.8937183022499084, + "epoch": 0.49953267668416135, + "kl_loss": 0.1997142732143402, + "loss_ib": 0.007127386052161455, + "step": 1737 + }, + { + "ce_ib": 4.386274337768555, + "ce_orig": 0.7156038880348206, + "epoch": 0.49953267668416135, + "kl_loss": 0.21608230471611023, + "loss_ib": 0.006547097582370043, + "step": 1737 + }, + { + "ce_ib": 6.867334842681885, + "ce_orig": 1.0097808837890625, + "epoch": 0.49953267668416135, + "kl_loss": 0.28443610668182373, + "loss_ib": 0.009711695834994316, + "step": 1737 + }, + { + "ce_ib": 3.524717330932617, + "ce_orig": 0.3500223755836487, + "epoch": 0.49982026026313897, + "kl_loss": 0.27112168073654175, + "loss_ib": 0.00623593432828784, + "step": 1738 + }, + { + "ce_ib": 6.105541229248047, + "ce_orig": 1.2126398086547852, + "epoch": 0.49982026026313897, + "kl_loss": 0.1625138521194458, + "loss_ib": 0.007730680052191019, + "step": 1738 + }, + { + "ce_ib": 5.947635173797607, + "ce_orig": 1.0524487495422363, + "epoch": 0.49982026026313897, + "kl_loss": 0.32878363132476807, + "loss_ib": 0.009235471487045288, + "step": 1738 + }, + { + "ce_ib": 4.0039896965026855, + "ce_orig": 0.6879218816757202, + "epoch": 0.49982026026313897, + "kl_loss": 0.2660224735736847, + "loss_ib": 0.006664214190095663, + "step": 1738 + }, + { + "ce_ib": 7.965237617492676, + "ce_orig": 1.1455343961715698, + "epoch": 0.5001078438421166, + "kl_loss": 0.2807881236076355, + "loss_ib": 0.01077311858534813, + "step": 1739 + }, + { + "ce_ib": 5.267090797424316, + "ce_orig": 0.6191811561584473, + "epoch": 0.5001078438421166, + "kl_loss": 0.23513248562812805, + "loss_ib": 0.007618415169417858, + "step": 1739 + }, + { + "ce_ib": 4.817395210266113, + "ce_orig": 0.6077452898025513, + "epoch": 0.5001078438421166, + "kl_loss": 0.33568328619003296, + "loss_ib": 0.008174227550625801, + "step": 1739 + }, + { + "ce_ib": 4.480566501617432, + "ce_orig": 0.9865591526031494, + "epoch": 0.5001078438421166, + "kl_loss": 0.23918387293815613, + "loss_ib": 0.006872405298054218, + "step": 1739 + }, + { + "epoch": 0.5003954274210942, + "grad_norm": 0.12100203335285187, + "learning_rate": 9.522018208137066e-06, + "loss": 0.8901, + "step": 1740 + }, + { + "ce_ib": 11.01678466796875, + "ce_orig": 1.4696050882339478, + "epoch": 0.5003954274210942, + "kl_loss": 0.2423844039440155, + "loss_ib": 0.013440628536045551, + "step": 1740 + }, + { + "ce_ib": 6.453488826751709, + "ce_orig": 0.8022169470787048, + "epoch": 0.5003954274210942, + "kl_loss": 0.17600062489509583, + "loss_ib": 0.008213494904339314, + "step": 1740 + }, + { + "ce_ib": 6.403378009796143, + "ce_orig": 1.1907107830047607, + "epoch": 0.5003954274210942, + "kl_loss": 0.34092897176742554, + "loss_ib": 0.009812667965888977, + "step": 1740 + }, + { + "ce_ib": 3.2982351779937744, + "ce_orig": 0.6578614711761475, + "epoch": 0.5003954274210942, + "kl_loss": 0.27500489354133606, + "loss_ib": 0.006048284005373716, + "step": 1740 + }, + { + "ce_ib": 6.678479194641113, + "ce_orig": 1.2896283864974976, + "epoch": 0.5006830110000718, + "kl_loss": 0.32643431425094604, + "loss_ib": 0.00994282215833664, + "step": 1741 + }, + { + "ce_ib": 7.29425048828125, + "ce_orig": 0.7709023356437683, + "epoch": 0.5006830110000718, + "kl_loss": 0.20930366218090057, + "loss_ib": 0.009387287311255932, + "step": 1741 + }, + { + "ce_ib": 4.5514702796936035, + "ce_orig": 0.950491189956665, + "epoch": 0.5006830110000718, + "kl_loss": 0.22086811065673828, + "loss_ib": 0.0067601511254906654, + "step": 1741 + }, + { + "ce_ib": 5.60288667678833, + "ce_orig": 0.6810688972473145, + "epoch": 0.5006830110000718, + "kl_loss": 0.3826490044593811, + "loss_ib": 0.00942937657237053, + "step": 1741 + }, + { + "ce_ib": 9.729187965393066, + "ce_orig": 1.1416085958480835, + "epoch": 0.5009705945790496, + "kl_loss": 0.2906084656715393, + "loss_ib": 0.01263527199625969, + "step": 1742 + }, + { + "ce_ib": 3.0167596340179443, + "ce_orig": 0.6279767155647278, + "epoch": 0.5009705945790496, + "kl_loss": 0.2155575305223465, + "loss_ib": 0.005172334611415863, + "step": 1742 + }, + { + "ce_ib": 3.6063148975372314, + "ce_orig": 0.5931623578071594, + "epoch": 0.5009705945790496, + "kl_loss": 0.20270030200481415, + "loss_ib": 0.005633317865431309, + "step": 1742 + }, + { + "ce_ib": 3.6025731563568115, + "ce_orig": 0.504107654094696, + "epoch": 0.5009705945790496, + "kl_loss": 0.31339216232299805, + "loss_ib": 0.006736494600772858, + "step": 1742 + }, + { + "ce_ib": 6.566645622253418, + "ce_orig": 1.1916468143463135, + "epoch": 0.5012581781580272, + "kl_loss": 0.23999133706092834, + "loss_ib": 0.008966558612883091, + "step": 1743 + }, + { + "ce_ib": 4.4429450035095215, + "ce_orig": 0.7111625671386719, + "epoch": 0.5012581781580272, + "kl_loss": 0.1585032194852829, + "loss_ib": 0.0060279774479568005, + "step": 1743 + }, + { + "ce_ib": 3.6084020137786865, + "ce_orig": 0.5784590840339661, + "epoch": 0.5012581781580272, + "kl_loss": 0.19830071926116943, + "loss_ib": 0.005591409280896187, + "step": 1743 + }, + { + "ce_ib": 5.667142391204834, + "ce_orig": 0.7029927372932434, + "epoch": 0.5012581781580272, + "kl_loss": 0.2522284984588623, + "loss_ib": 0.008189426735043526, + "step": 1743 + }, + { + "ce_ib": 7.597262382507324, + "ce_orig": 0.6795341968536377, + "epoch": 0.5015457617370048, + "kl_loss": 0.5472317337989807, + "loss_ib": 0.013069579377770424, + "step": 1744 + }, + { + "ce_ib": 4.5865702629089355, + "ce_orig": 0.8706312775611877, + "epoch": 0.5015457617370048, + "kl_loss": 0.39241304993629456, + "loss_ib": 0.008510700426995754, + "step": 1744 + }, + { + "ce_ib": 6.553436279296875, + "ce_orig": 1.0656144618988037, + "epoch": 0.5015457617370048, + "kl_loss": 0.27133724093437195, + "loss_ib": 0.00926680862903595, + "step": 1744 + }, + { + "ce_ib": 3.608102321624756, + "ce_orig": 0.8301694393157959, + "epoch": 0.5015457617370048, + "kl_loss": 0.21063321828842163, + "loss_ib": 0.005714434199035168, + "step": 1744 + }, + { + "epoch": 0.5018333453159824, + "grad_norm": 0.12239973247051239, + "learning_rate": 9.518701378180082e-06, + "loss": 0.8828, + "step": 1745 + }, + { + "ce_ib": 4.432754993438721, + "ce_orig": 0.6014177799224854, + "epoch": 0.5018333453159824, + "kl_loss": 0.17354170978069305, + "loss_ib": 0.006168172229081392, + "step": 1745 + }, + { + "ce_ib": 2.7183847427368164, + "ce_orig": 0.3308621048927307, + "epoch": 0.5018333453159824, + "kl_loss": 0.4907413721084595, + "loss_ib": 0.007625798229128122, + "step": 1745 + }, + { + "ce_ib": 6.574141979217529, + "ce_orig": 1.18874192237854, + "epoch": 0.5018333453159824, + "kl_loss": 0.24722984433174133, + "loss_ib": 0.00904644001275301, + "step": 1745 + }, + { + "ce_ib": 3.3298656940460205, + "ce_orig": 0.6092104911804199, + "epoch": 0.5018333453159824, + "kl_loss": 0.21840175986289978, + "loss_ib": 0.0055138831958174706, + "step": 1745 + }, + { + "ce_ib": 3.5292844772338867, + "ce_orig": 0.6854714155197144, + "epoch": 0.5021209288949601, + "kl_loss": 0.1925717145204544, + "loss_ib": 0.0054550012573599815, + "step": 1746 + }, + { + "ce_ib": 6.507283687591553, + "ce_orig": 1.2124181985855103, + "epoch": 0.5021209288949601, + "kl_loss": 0.3079559803009033, + "loss_ib": 0.00958684366196394, + "step": 1746 + }, + { + "ce_ib": 3.4478015899658203, + "ce_orig": 0.609721302986145, + "epoch": 0.5021209288949601, + "kl_loss": 0.15236534178256989, + "loss_ib": 0.004971455316990614, + "step": 1746 + }, + { + "ce_ib": 5.8801045417785645, + "ce_orig": 0.7739959955215454, + "epoch": 0.5021209288949601, + "kl_loss": 0.3879520297050476, + "loss_ib": 0.009759625419974327, + "step": 1746 + }, + { + "ce_ib": 2.8765039443969727, + "ce_orig": 0.444017618894577, + "epoch": 0.5024085124739377, + "kl_loss": 0.17314675450325012, + "loss_ib": 0.004607971291989088, + "step": 1747 + }, + { + "ce_ib": 4.182227611541748, + "ce_orig": 0.40417736768722534, + "epoch": 0.5024085124739377, + "kl_loss": 0.2266075611114502, + "loss_ib": 0.00644830334931612, + "step": 1747 + }, + { + "ce_ib": 4.521939277648926, + "ce_orig": 0.7604411840438843, + "epoch": 0.5024085124739377, + "kl_loss": 0.1887277364730835, + "loss_ib": 0.0064092171378433704, + "step": 1747 + }, + { + "ce_ib": 5.178815841674805, + "ce_orig": 0.853346049785614, + "epoch": 0.5024085124739377, + "kl_loss": 0.2517073452472687, + "loss_ib": 0.007695889100432396, + "step": 1747 + }, + { + "ce_ib": 6.200848579406738, + "ce_orig": 0.9567843675613403, + "epoch": 0.5026960960529154, + "kl_loss": 0.26944202184677124, + "loss_ib": 0.008895268663764, + "step": 1748 + }, + { + "ce_ib": 8.421351432800293, + "ce_orig": 0.534841001033783, + "epoch": 0.5026960960529154, + "kl_loss": 0.2718900442123413, + "loss_ib": 0.011140250600874424, + "step": 1748 + }, + { + "ce_ib": 4.257621765136719, + "ce_orig": 0.7095168232917786, + "epoch": 0.5026960960529154, + "kl_loss": 0.2819932997226715, + "loss_ib": 0.007077554240822792, + "step": 1748 + }, + { + "ce_ib": 8.776861190795898, + "ce_orig": 1.6992207765579224, + "epoch": 0.5026960960529154, + "kl_loss": 0.2835456430912018, + "loss_ib": 0.011612317524850368, + "step": 1748 + }, + { + "ce_ib": 5.450873851776123, + "ce_orig": 0.7511981725692749, + "epoch": 0.502983679631893, + "kl_loss": 0.28340595960617065, + "loss_ib": 0.008284932933747768, + "step": 1749 + }, + { + "ce_ib": 4.766958713531494, + "ce_orig": 0.6778752207756042, + "epoch": 0.502983679631893, + "kl_loss": 0.20694370567798615, + "loss_ib": 0.006836395710706711, + "step": 1749 + }, + { + "ce_ib": 3.5692238807678223, + "ce_orig": 0.5320716500282288, + "epoch": 0.502983679631893, + "kl_loss": 0.2173265516757965, + "loss_ib": 0.005742488894611597, + "step": 1749 + }, + { + "ce_ib": 5.871660232543945, + "ce_orig": 1.1351439952850342, + "epoch": 0.502983679631893, + "kl_loss": 0.24728937447071075, + "loss_ib": 0.008344553411006927, + "step": 1749 + }, + { + "epoch": 0.5032712632108707, + "grad_norm": 0.11421152949333191, + "learning_rate": 9.515373661622665e-06, + "loss": 0.8177, + "step": 1750 + }, + { + "ce_ib": 11.361602783203125, + "ce_orig": 1.9655756950378418, + "epoch": 0.5032712632108707, + "kl_loss": 0.24857398867607117, + "loss_ib": 0.013847342692315578, + "step": 1750 + }, + { + "ce_ib": 5.9394097328186035, + "ce_orig": 0.7931159734725952, + "epoch": 0.5032712632108707, + "kl_loss": 0.2079966813325882, + "loss_ib": 0.008019376546144485, + "step": 1750 + }, + { + "ce_ib": 3.0225653648376465, + "ce_orig": 0.48664718866348267, + "epoch": 0.5032712632108707, + "kl_loss": 0.25711023807525635, + "loss_ib": 0.005593668203800917, + "step": 1750 + }, + { + "ce_ib": 5.224844932556152, + "ce_orig": 0.7229938507080078, + "epoch": 0.5032712632108707, + "kl_loss": 0.21279790997505188, + "loss_ib": 0.007352823857218027, + "step": 1750 + }, + { + "ce_ib": 5.852553367614746, + "ce_orig": 0.992152750492096, + "epoch": 0.5035588467898483, + "kl_loss": 0.3124367594718933, + "loss_ib": 0.008976920507848263, + "step": 1751 + }, + { + "ce_ib": 6.506232738494873, + "ce_orig": 0.840754508972168, + "epoch": 0.5035588467898483, + "kl_loss": 0.28680092096328735, + "loss_ib": 0.009374241344630718, + "step": 1751 + }, + { + "ce_ib": 5.130998134613037, + "ce_orig": 0.5533031821250916, + "epoch": 0.5035588467898483, + "kl_loss": 0.3280577063560486, + "loss_ib": 0.008411575108766556, + "step": 1751 + }, + { + "ce_ib": 6.5981926918029785, + "ce_orig": 0.7942853569984436, + "epoch": 0.5035588467898483, + "kl_loss": 0.34047335386276245, + "loss_ib": 0.010002925992012024, + "step": 1751 + }, + { + "ce_ib": 6.03656530380249, + "ce_orig": 1.0924416780471802, + "epoch": 0.5038464303688259, + "kl_loss": 0.43093806505203247, + "loss_ib": 0.01034594513475895, + "step": 1752 + }, + { + "ce_ib": 5.8077497482299805, + "ce_orig": 0.8590795993804932, + "epoch": 0.5038464303688259, + "kl_loss": 0.19107350707054138, + "loss_ib": 0.007718484383076429, + "step": 1752 + }, + { + "ce_ib": 3.6463088989257812, + "ce_orig": 0.6247625350952148, + "epoch": 0.5038464303688259, + "kl_loss": 0.19530805945396423, + "loss_ib": 0.0055993893183767796, + "step": 1752 + }, + { + "ce_ib": 4.36478328704834, + "ce_orig": 0.7276626825332642, + "epoch": 0.5038464303688259, + "kl_loss": 0.27551114559173584, + "loss_ib": 0.0071198949590325356, + "step": 1752 + }, + { + "ce_ib": 3.8157730102539062, + "ce_orig": 0.664462685585022, + "epoch": 0.5041340139478035, + "kl_loss": 0.24338558316230774, + "loss_ib": 0.006249628961086273, + "step": 1753 + }, + { + "ce_ib": 5.476027965545654, + "ce_orig": 1.096780776977539, + "epoch": 0.5041340139478035, + "kl_loss": 0.17896342277526855, + "loss_ib": 0.007265662308782339, + "step": 1753 + }, + { + "ce_ib": 5.217350006103516, + "ce_orig": 0.7322924733161926, + "epoch": 0.5041340139478035, + "kl_loss": 0.21693328022956848, + "loss_ib": 0.0073866830207407475, + "step": 1753 + }, + { + "ce_ib": 6.2500319480896, + "ce_orig": 1.0605376958847046, + "epoch": 0.5041340139478035, + "kl_loss": 0.24295900762081146, + "loss_ib": 0.00867962185293436, + "step": 1753 + }, + { + "ce_ib": 6.467724323272705, + "ce_orig": 0.9832982420921326, + "epoch": 0.5044215975267812, + "kl_loss": 0.17008638381958008, + "loss_ib": 0.008168588392436504, + "step": 1754 + }, + { + "ce_ib": 6.879829406738281, + "ce_orig": 1.5099862813949585, + "epoch": 0.5044215975267812, + "kl_loss": 0.26725825667381287, + "loss_ib": 0.00955241173505783, + "step": 1754 + }, + { + "ce_ib": 9.748746871948242, + "ce_orig": 1.5472444295883179, + "epoch": 0.5044215975267812, + "kl_loss": 0.27403298020362854, + "loss_ib": 0.01248907670378685, + "step": 1754 + }, + { + "ce_ib": 3.8178939819335938, + "ce_orig": 0.7111484408378601, + "epoch": 0.5044215975267812, + "kl_loss": 0.2375500202178955, + "loss_ib": 0.006193394307047129, + "step": 1754 + }, + { + "epoch": 0.5047091811057589, + "grad_norm": 0.14262355864048004, + "learning_rate": 9.512035066482055e-06, + "loss": 0.8646, + "step": 1755 + }, + { + "ce_ib": 6.009156227111816, + "ce_orig": 0.7930887937545776, + "epoch": 0.5047091811057589, + "kl_loss": 0.319755494594574, + "loss_ib": 0.009206710383296013, + "step": 1755 + }, + { + "ce_ib": 6.522556781768799, + "ce_orig": 0.9451214671134949, + "epoch": 0.5047091811057589, + "kl_loss": 0.26357918977737427, + "loss_ib": 0.00915834866464138, + "step": 1755 + }, + { + "ce_ib": 7.798460960388184, + "ce_orig": 1.1518856287002563, + "epoch": 0.5047091811057589, + "kl_loss": 0.25611695647239685, + "loss_ib": 0.010359629988670349, + "step": 1755 + }, + { + "ce_ib": 5.715204238891602, + "ce_orig": 1.0006587505340576, + "epoch": 0.5047091811057589, + "kl_loss": 0.28110426664352417, + "loss_ib": 0.00852624699473381, + "step": 1755 + }, + { + "ce_ib": 3.4260988235473633, + "ce_orig": 0.858456015586853, + "epoch": 0.5049967646847365, + "kl_loss": 0.2268604338169098, + "loss_ib": 0.005694702733308077, + "step": 1756 + }, + { + "ce_ib": 4.031425476074219, + "ce_orig": 0.7041755318641663, + "epoch": 0.5049967646847365, + "kl_loss": 0.23584911227226257, + "loss_ib": 0.006389916408807039, + "step": 1756 + }, + { + "ce_ib": 4.38675594329834, + "ce_orig": 0.6774066686630249, + "epoch": 0.5049967646847365, + "kl_loss": 0.16846446692943573, + "loss_ib": 0.006071400362998247, + "step": 1756 + }, + { + "ce_ib": 4.694783687591553, + "ce_orig": 0.6566852331161499, + "epoch": 0.5049967646847365, + "kl_loss": 0.15230196714401245, + "loss_ib": 0.006217803340405226, + "step": 1756 + }, + { + "ce_ib": 2.7962417602539062, + "ce_orig": 0.4238570034503937, + "epoch": 0.5052843482637142, + "kl_loss": 0.29057538509368896, + "loss_ib": 0.005701995920389891, + "step": 1757 + }, + { + "ce_ib": 7.179047584533691, + "ce_orig": 0.871265709400177, + "epoch": 0.5052843482637142, + "kl_loss": 0.25131675601005554, + "loss_ib": 0.00969221442937851, + "step": 1757 + }, + { + "ce_ib": 4.701264381408691, + "ce_orig": 1.0522727966308594, + "epoch": 0.5052843482637142, + "kl_loss": 0.20373789966106415, + "loss_ib": 0.006738643627613783, + "step": 1757 + }, + { + "ce_ib": 4.195560455322266, + "ce_orig": 0.8334342837333679, + "epoch": 0.5052843482637142, + "kl_loss": 0.19431960582733154, + "loss_ib": 0.006138755939900875, + "step": 1757 + }, + { + "ce_ib": 7.60351037979126, + "ce_orig": 1.42118501663208, + "epoch": 0.5055719318426918, + "kl_loss": 0.46376219391822815, + "loss_ib": 0.012241131626069546, + "step": 1758 + }, + { + "ce_ib": 5.35986328125, + "ce_orig": 0.7749453186988831, + "epoch": 0.5055719318426918, + "kl_loss": 0.24413596093654633, + "loss_ib": 0.0078012230806052685, + "step": 1758 + }, + { + "ce_ib": 7.557556629180908, + "ce_orig": 0.8648902177810669, + "epoch": 0.5055719318426918, + "kl_loss": 0.1665661633014679, + "loss_ib": 0.009223218075931072, + "step": 1758 + }, + { + "ce_ib": 8.175300598144531, + "ce_orig": 0.8997637629508972, + "epoch": 0.5055719318426918, + "kl_loss": 0.37757620215415955, + "loss_ib": 0.011951062828302383, + "step": 1758 + }, + { + "ce_ib": 8.870894432067871, + "ce_orig": 1.0376429557800293, + "epoch": 0.5058595154216694, + "kl_loss": 0.2231506109237671, + "loss_ib": 0.011102399788796902, + "step": 1759 + }, + { + "ce_ib": 3.632920742034912, + "ce_orig": 0.44894668459892273, + "epoch": 0.5058595154216694, + "kl_loss": 0.3840489983558655, + "loss_ib": 0.007473410107195377, + "step": 1759 + }, + { + "ce_ib": 6.754943370819092, + "ce_orig": 1.1315734386444092, + "epoch": 0.5058595154216694, + "kl_loss": 0.19753900170326233, + "loss_ib": 0.008730334229767323, + "step": 1759 + }, + { + "ce_ib": 4.836207866668701, + "ce_orig": 0.7240886688232422, + "epoch": 0.5058595154216694, + "kl_loss": 0.17611101269721985, + "loss_ib": 0.0065973177552223206, + "step": 1759 + }, + { + "epoch": 0.506147099000647, + "grad_norm": 0.12391646206378937, + "learning_rate": 9.508685600801704e-06, + "loss": 0.8845, + "step": 1760 + }, + { + "ce_ib": 4.2273688316345215, + "ce_orig": 1.025089979171753, + "epoch": 0.506147099000647, + "kl_loss": 0.14844375848770142, + "loss_ib": 0.00571180647239089, + "step": 1760 + }, + { + "ce_ib": 4.832033157348633, + "ce_orig": 0.9036097526550293, + "epoch": 0.506147099000647, + "kl_loss": 0.31038039922714233, + "loss_ib": 0.007935836911201477, + "step": 1760 + }, + { + "ce_ib": 5.035365581512451, + "ce_orig": 0.7276528477668762, + "epoch": 0.506147099000647, + "kl_loss": 0.2854631245136261, + "loss_ib": 0.007889996282756329, + "step": 1760 + }, + { + "ce_ib": 5.9888505935668945, + "ce_orig": 0.9987836480140686, + "epoch": 0.506147099000647, + "kl_loss": 0.17566829919815063, + "loss_ib": 0.007745533250272274, + "step": 1760 + }, + { + "ce_ib": 4.102182388305664, + "ce_orig": 0.95122230052948, + "epoch": 0.5064346825796247, + "kl_loss": 0.28314220905303955, + "loss_ib": 0.006933604367077351, + "step": 1761 + }, + { + "ce_ib": 6.556976795196533, + "ce_orig": 1.0992000102996826, + "epoch": 0.5064346825796247, + "kl_loss": 0.17420658469200134, + "loss_ib": 0.008299043402075768, + "step": 1761 + }, + { + "ce_ib": 6.701450347900391, + "ce_orig": 0.8601769208908081, + "epoch": 0.5064346825796247, + "kl_loss": 0.26562756299972534, + "loss_ib": 0.009357726201415062, + "step": 1761 + }, + { + "ce_ib": 3.7292327880859375, + "ce_orig": 0.5020140409469604, + "epoch": 0.5064346825796247, + "kl_loss": 0.1775652915239334, + "loss_ib": 0.00550488568842411, + "step": 1761 + }, + { + "ce_ib": 4.09344482421875, + "ce_orig": 0.7577466368675232, + "epoch": 0.5067222661586024, + "kl_loss": 0.1945955753326416, + "loss_ib": 0.006039400584995747, + "step": 1762 + }, + { + "ce_ib": 7.949859142303467, + "ce_orig": 1.1857138872146606, + "epoch": 0.5067222661586024, + "kl_loss": 0.15808865427970886, + "loss_ib": 0.009530745446681976, + "step": 1762 + }, + { + "ce_ib": 6.878933906555176, + "ce_orig": 1.056178092956543, + "epoch": 0.5067222661586024, + "kl_loss": 0.15867654979228973, + "loss_ib": 0.008465698920190334, + "step": 1762 + }, + { + "ce_ib": 1.1259777545928955, + "ce_orig": 0.13052049279212952, + "epoch": 0.5067222661586024, + "kl_loss": 0.5092992782592773, + "loss_ib": 0.006218970287591219, + "step": 1762 + }, + { + "ce_ib": 6.466426372528076, + "ce_orig": 1.1745500564575195, + "epoch": 0.50700984973758, + "kl_loss": 0.20478039979934692, + "loss_ib": 0.008514230139553547, + "step": 1763 + }, + { + "ce_ib": 7.062382698059082, + "ce_orig": 1.2049685716629028, + "epoch": 0.50700984973758, + "kl_loss": 0.24910223484039307, + "loss_ib": 0.009553404524922371, + "step": 1763 + }, + { + "ce_ib": 7.08675479888916, + "ce_orig": 1.275549054145813, + "epoch": 0.50700984973758, + "kl_loss": 0.1990867555141449, + "loss_ib": 0.009077622555196285, + "step": 1763 + }, + { + "ce_ib": 6.7942795753479, + "ce_orig": 0.9077232480049133, + "epoch": 0.50700984973758, + "kl_loss": 0.24441751837730408, + "loss_ib": 0.009238455444574356, + "step": 1763 + }, + { + "ce_ib": 5.48286247253418, + "ce_orig": 0.6654765605926514, + "epoch": 0.5072974333165576, + "kl_loss": 0.19381588697433472, + "loss_ib": 0.0074210213497281075, + "step": 1764 + }, + { + "ce_ib": 4.639284133911133, + "ce_orig": 0.9201982021331787, + "epoch": 0.5072974333165576, + "kl_loss": 0.23711277544498444, + "loss_ib": 0.007010411936789751, + "step": 1764 + }, + { + "ce_ib": 4.738180160522461, + "ce_orig": 0.9239326119422913, + "epoch": 0.5072974333165576, + "kl_loss": 0.2706284523010254, + "loss_ib": 0.007444465067237616, + "step": 1764 + }, + { + "ce_ib": 4.196111679077148, + "ce_orig": 0.2848537266254425, + "epoch": 0.5072974333165576, + "kl_loss": 0.25172311067581177, + "loss_ib": 0.006713342387229204, + "step": 1764 + }, + { + "epoch": 0.5075850168955353, + "grad_norm": 0.1055581197142601, + "learning_rate": 9.505325272651253e-06, + "loss": 0.8398, + "step": 1765 + }, + { + "ce_ib": 4.018991470336914, + "ce_orig": 0.7165852785110474, + "epoch": 0.5075850168955353, + "kl_loss": 0.21685855090618134, + "loss_ib": 0.00618757726624608, + "step": 1765 + }, + { + "ce_ib": 7.085607051849365, + "ce_orig": 0.42605891823768616, + "epoch": 0.5075850168955353, + "kl_loss": 0.2992393672466278, + "loss_ib": 0.010078000836074352, + "step": 1765 + }, + { + "ce_ib": 4.441742897033691, + "ce_orig": 0.7470413446426392, + "epoch": 0.5075850168955353, + "kl_loss": 0.15440087020397186, + "loss_ib": 0.005985751748085022, + "step": 1765 + }, + { + "ce_ib": 5.012276649475098, + "ce_orig": 0.7102251648902893, + "epoch": 0.5075850168955353, + "kl_loss": 0.20242461562156677, + "loss_ib": 0.007036522496491671, + "step": 1765 + }, + { + "ce_ib": 5.46077299118042, + "ce_orig": 1.014110803604126, + "epoch": 0.5078726004745129, + "kl_loss": 0.17652538418769836, + "loss_ib": 0.0072260270826518536, + "step": 1766 + }, + { + "ce_ib": 6.734499454498291, + "ce_orig": 1.149742603302002, + "epoch": 0.5078726004745129, + "kl_loss": 0.24488303065299988, + "loss_ib": 0.009183329530060291, + "step": 1766 + }, + { + "ce_ib": 4.96577787399292, + "ce_orig": 0.6755092740058899, + "epoch": 0.5078726004745129, + "kl_loss": 0.20076847076416016, + "loss_ib": 0.0069734626449644566, + "step": 1766 + }, + { + "ce_ib": 9.733043670654297, + "ce_orig": 1.1865333318710327, + "epoch": 0.5078726004745129, + "kl_loss": 0.3436145484447479, + "loss_ib": 0.013169188983738422, + "step": 1766 + }, + { + "ce_ib": 3.1408607959747314, + "ce_orig": 0.24576525390148163, + "epoch": 0.5081601840534905, + "kl_loss": 0.3785693049430847, + "loss_ib": 0.006926553789526224, + "step": 1767 + }, + { + "ce_ib": 3.944741725921631, + "ce_orig": 0.6151083707809448, + "epoch": 0.5081601840534905, + "kl_loss": 0.21588996052742004, + "loss_ib": 0.006103641353547573, + "step": 1767 + }, + { + "ce_ib": 6.064630508422852, + "ce_orig": 1.414920449256897, + "epoch": 0.5081601840534905, + "kl_loss": 0.22204412519931793, + "loss_ib": 0.008285071700811386, + "step": 1767 + }, + { + "ce_ib": 5.1514506340026855, + "ce_orig": 0.8176374435424805, + "epoch": 0.5081601840534905, + "kl_loss": 0.20381109416484833, + "loss_ib": 0.007189561612904072, + "step": 1767 + }, + { + "ce_ib": 5.502082347869873, + "ce_orig": 0.9125807881355286, + "epoch": 0.5084477676324682, + "kl_loss": 0.21032029390335083, + "loss_ib": 0.007605285383760929, + "step": 1768 + }, + { + "ce_ib": 6.051918029785156, + "ce_orig": 1.0317310094833374, + "epoch": 0.5084477676324682, + "kl_loss": 0.26851925253868103, + "loss_ib": 0.008737110532820225, + "step": 1768 + }, + { + "ce_ib": 3.6897971630096436, + "ce_orig": 0.6450068950653076, + "epoch": 0.5084477676324682, + "kl_loss": 0.3440362811088562, + "loss_ib": 0.007130159996449947, + "step": 1768 + }, + { + "ce_ib": 3.3664798736572266, + "ce_orig": 0.6561928391456604, + "epoch": 0.5084477676324682, + "kl_loss": 0.1874711513519287, + "loss_ib": 0.005241191480308771, + "step": 1768 + }, + { + "ce_ib": 3.5129668712615967, + "ce_orig": 0.5466682314872742, + "epoch": 0.5087353512114459, + "kl_loss": 0.25630488991737366, + "loss_ib": 0.006076015532016754, + "step": 1769 + }, + { + "ce_ib": 3.623927354812622, + "ce_orig": 0.718904972076416, + "epoch": 0.5087353512114459, + "kl_loss": 0.2727135419845581, + "loss_ib": 0.006351063027977943, + "step": 1769 + }, + { + "ce_ib": 3.5800700187683105, + "ce_orig": 0.6674350500106812, + "epoch": 0.5087353512114459, + "kl_loss": 0.28915029764175415, + "loss_ib": 0.006471572909504175, + "step": 1769 + }, + { + "ce_ib": 5.109983921051025, + "ce_orig": 0.5016462206840515, + "epoch": 0.5087353512114459, + "kl_loss": 0.36470139026641846, + "loss_ib": 0.008756997995078564, + "step": 1769 + }, + { + "epoch": 0.5090229347904235, + "grad_norm": 0.1167183369398117, + "learning_rate": 9.501954090126514e-06, + "loss": 0.8217, + "step": 1770 + }, + { + "ce_ib": 5.424228668212891, + "ce_orig": 0.6310187578201294, + "epoch": 0.5090229347904235, + "kl_loss": 0.21915018558502197, + "loss_ib": 0.007615730632096529, + "step": 1770 + }, + { + "ce_ib": 5.505951404571533, + "ce_orig": 0.9999389052391052, + "epoch": 0.5090229347904235, + "kl_loss": 0.22435197234153748, + "loss_ib": 0.007749471347779036, + "step": 1770 + }, + { + "ce_ib": 1.7814141511917114, + "ce_orig": 0.2353065013885498, + "epoch": 0.5090229347904235, + "kl_loss": 0.24533554911613464, + "loss_ib": 0.004234769381582737, + "step": 1770 + }, + { + "ce_ib": 2.6672050952911377, + "ce_orig": 0.367904931306839, + "epoch": 0.5090229347904235, + "kl_loss": 0.21802517771720886, + "loss_ib": 0.004847456701099873, + "step": 1770 + }, + { + "ce_ib": 3.7499396800994873, + "ce_orig": 0.3734409213066101, + "epoch": 0.5093105183694011, + "kl_loss": 0.195489764213562, + "loss_ib": 0.0057048373855650425, + "step": 1771 + }, + { + "ce_ib": 9.809314727783203, + "ce_orig": 2.07853102684021, + "epoch": 0.5093105183694011, + "kl_loss": 0.27149325609207153, + "loss_ib": 0.012524247169494629, + "step": 1771 + }, + { + "ce_ib": 3.7507245540618896, + "ce_orig": 0.8744357824325562, + "epoch": 0.5093105183694011, + "kl_loss": 0.19304901361465454, + "loss_ib": 0.005681214388459921, + "step": 1771 + }, + { + "ce_ib": 3.5979347229003906, + "ce_orig": 0.5374438166618347, + "epoch": 0.5093105183694011, + "kl_loss": 0.13741683959960938, + "loss_ib": 0.004972103051841259, + "step": 1771 + }, + { + "ce_ib": 4.961030006408691, + "ce_orig": 0.8366300463676453, + "epoch": 0.5095981019483787, + "kl_loss": 0.2362416386604309, + "loss_ib": 0.0073234462179243565, + "step": 1772 + }, + { + "ce_ib": 4.925734043121338, + "ce_orig": 0.6740673780441284, + "epoch": 0.5095981019483787, + "kl_loss": 0.22513160109519958, + "loss_ib": 0.0071770497597754, + "step": 1772 + }, + { + "ce_ib": 8.155735969543457, + "ce_orig": 1.4952911138534546, + "epoch": 0.5095981019483787, + "kl_loss": 0.2642083764076233, + "loss_ib": 0.010797820053994656, + "step": 1772 + }, + { + "ce_ib": 4.71515417098999, + "ce_orig": 0.9140082597732544, + "epoch": 0.5095981019483787, + "kl_loss": 0.4507846236228943, + "loss_ib": 0.009223000146448612, + "step": 1772 + }, + { + "ce_ib": 5.51159143447876, + "ce_orig": 0.9571477770805359, + "epoch": 0.5098856855273564, + "kl_loss": 0.23798805475234985, + "loss_ib": 0.00789147149771452, + "step": 1773 + }, + { + "ce_ib": 5.530810832977295, + "ce_orig": 0.43717384338378906, + "epoch": 0.5098856855273564, + "kl_loss": 0.3572522699832916, + "loss_ib": 0.009103333577513695, + "step": 1773 + }, + { + "ce_ib": 5.345722198486328, + "ce_orig": 0.9172801375389099, + "epoch": 0.5098856855273564, + "kl_loss": 0.15426042675971985, + "loss_ib": 0.00688832625746727, + "step": 1773 + }, + { + "ce_ib": 9.65254020690918, + "ce_orig": 1.5662338733673096, + "epoch": 0.5098856855273564, + "kl_loss": 0.17158162593841553, + "loss_ib": 0.011368355713784695, + "step": 1773 + }, + { + "ce_ib": 5.850159645080566, + "ce_orig": 0.9013000726699829, + "epoch": 0.510173269106334, + "kl_loss": 0.22897228598594666, + "loss_ib": 0.008139882236719131, + "step": 1774 + }, + { + "ce_ib": 2.766835927963257, + "ce_orig": 0.6518144607543945, + "epoch": 0.510173269106334, + "kl_loss": 0.20160110294818878, + "loss_ib": 0.004782847128808498, + "step": 1774 + }, + { + "ce_ib": 4.017307758331299, + "ce_orig": 0.7030872106552124, + "epoch": 0.510173269106334, + "kl_loss": 0.19265756011009216, + "loss_ib": 0.005943883676081896, + "step": 1774 + }, + { + "ce_ib": 6.924398899078369, + "ce_orig": 1.2229417562484741, + "epoch": 0.510173269106334, + "kl_loss": 0.23021990060806274, + "loss_ib": 0.009226597845554352, + "step": 1774 + }, + { + "epoch": 0.5104608526853117, + "grad_norm": 0.11179368942975998, + "learning_rate": 9.498572061349442e-06, + "loss": 0.8365, + "step": 1775 + }, + { + "ce_ib": 7.849050998687744, + "ce_orig": 0.9646425247192383, + "epoch": 0.5104608526853117, + "kl_loss": 0.17950886487960815, + "loss_ib": 0.009644139558076859, + "step": 1775 + }, + { + "ce_ib": 5.053280830383301, + "ce_orig": 0.9874280691146851, + "epoch": 0.5104608526853117, + "kl_loss": 0.18137818574905396, + "loss_ib": 0.006867062766104937, + "step": 1775 + }, + { + "ce_ib": 5.154513359069824, + "ce_orig": 0.38847586512565613, + "epoch": 0.5104608526853117, + "kl_loss": 0.2768966257572174, + "loss_ib": 0.007923480123281479, + "step": 1775 + }, + { + "ce_ib": 7.6988630294799805, + "ce_orig": 1.6250163316726685, + "epoch": 0.5104608526853117, + "kl_loss": 0.2540596127510071, + "loss_ib": 0.01023945864289999, + "step": 1775 + }, + { + "ce_ib": 1.5759936571121216, + "ce_orig": 0.16515246033668518, + "epoch": 0.5107484362642893, + "kl_loss": 0.5132174491882324, + "loss_ib": 0.006708168424665928, + "step": 1776 + }, + { + "ce_ib": 5.543222904205322, + "ce_orig": 0.925904393196106, + "epoch": 0.5107484362642893, + "kl_loss": 0.2087169885635376, + "loss_ib": 0.007630392909049988, + "step": 1776 + }, + { + "ce_ib": 6.327423572540283, + "ce_orig": 0.9812836647033691, + "epoch": 0.5107484362642893, + "kl_loss": 0.31686538457870483, + "loss_ib": 0.009496076963841915, + "step": 1776 + }, + { + "ce_ib": 6.192408561706543, + "ce_orig": 1.1052305698394775, + "epoch": 0.5107484362642893, + "kl_loss": 0.28637051582336426, + "loss_ib": 0.00905611366033554, + "step": 1776 + }, + { + "ce_ib": 4.368666172027588, + "ce_orig": 0.8808457255363464, + "epoch": 0.511036019843267, + "kl_loss": 0.2151002436876297, + "loss_ib": 0.006519668735563755, + "step": 1777 + }, + { + "ce_ib": 3.9586246013641357, + "ce_orig": 0.8993598818778992, + "epoch": 0.511036019843267, + "kl_loss": 0.25651246309280396, + "loss_ib": 0.0065237488597631454, + "step": 1777 + }, + { + "ce_ib": 5.036628723144531, + "ce_orig": 0.716555118560791, + "epoch": 0.511036019843267, + "kl_loss": 0.5716152191162109, + "loss_ib": 0.010752780362963676, + "step": 1777 + }, + { + "ce_ib": 5.706982135772705, + "ce_orig": 0.6277963519096375, + "epoch": 0.511036019843267, + "kl_loss": 0.23554293811321259, + "loss_ib": 0.008062411099672318, + "step": 1777 + }, + { + "ce_ib": 5.136541366577148, + "ce_orig": 1.0825738906860352, + "epoch": 0.5113236034222446, + "kl_loss": 0.18041208386421204, + "loss_ib": 0.006940662860870361, + "step": 1778 + }, + { + "ce_ib": 2.9055914878845215, + "ce_orig": 0.42757514119148254, + "epoch": 0.5113236034222446, + "kl_loss": 0.31750643253326416, + "loss_ib": 0.006080655846744776, + "step": 1778 + }, + { + "ce_ib": 4.574838161468506, + "ce_orig": 0.8141748905181885, + "epoch": 0.5113236034222446, + "kl_loss": 0.22247333824634552, + "loss_ib": 0.006799571216106415, + "step": 1778 + }, + { + "ce_ib": 2.3404247760772705, + "ce_orig": 0.24534167349338531, + "epoch": 0.5113236034222446, + "kl_loss": 0.5108161568641663, + "loss_ib": 0.007448586169630289, + "step": 1778 + }, + { + "ce_ib": 6.489202976226807, + "ce_orig": 0.9206019043922424, + "epoch": 0.5116111870012222, + "kl_loss": 0.32573944330215454, + "loss_ib": 0.009746597148478031, + "step": 1779 + }, + { + "ce_ib": 4.82711124420166, + "ce_orig": 0.5427364706993103, + "epoch": 0.5116111870012222, + "kl_loss": 0.280947208404541, + "loss_ib": 0.007636583410203457, + "step": 1779 + }, + { + "ce_ib": 4.279891490936279, + "ce_orig": 0.8699036836624146, + "epoch": 0.5116111870012222, + "kl_loss": 0.26657766103744507, + "loss_ib": 0.006945668254047632, + "step": 1779 + }, + { + "ce_ib": 6.691254138946533, + "ce_orig": 1.0939675569534302, + "epoch": 0.5116111870012222, + "kl_loss": 0.22720719873905182, + "loss_ib": 0.0089633259922266, + "step": 1779 + }, + { + "epoch": 0.5118987705801998, + "grad_norm": 0.11643703281879425, + "learning_rate": 9.495179194468135e-06, + "loss": 0.8764, + "step": 1780 + }, + { + "ce_ib": 6.29385232925415, + "ce_orig": 0.8807694911956787, + "epoch": 0.5118987705801998, + "kl_loss": 0.7251090407371521, + "loss_ib": 0.013544943183660507, + "step": 1780 + }, + { + "ce_ib": 3.4683852195739746, + "ce_orig": 0.5015277862548828, + "epoch": 0.5118987705801998, + "kl_loss": 0.17699450254440308, + "loss_ib": 0.005238330457359552, + "step": 1780 + }, + { + "ce_ib": 4.6820526123046875, + "ce_orig": 0.8917681574821472, + "epoch": 0.5118987705801998, + "kl_loss": 0.20606280863285065, + "loss_ib": 0.006742680445313454, + "step": 1780 + }, + { + "ce_ib": 5.075531005859375, + "ce_orig": 1.0754148960113525, + "epoch": 0.5118987705801998, + "kl_loss": 0.15766265988349915, + "loss_ib": 0.006652157288044691, + "step": 1780 + }, + { + "ce_ib": 5.327798366546631, + "ce_orig": 0.7447091341018677, + "epoch": 0.5121863541591775, + "kl_loss": 0.21377348899841309, + "loss_ib": 0.007465533446520567, + "step": 1781 + }, + { + "ce_ib": 5.237917423248291, + "ce_orig": 0.7073243856430054, + "epoch": 0.5121863541591775, + "kl_loss": 0.2471804916858673, + "loss_ib": 0.007709722500294447, + "step": 1781 + }, + { + "ce_ib": 6.874824523925781, + "ce_orig": 1.0658276081085205, + "epoch": 0.5121863541591775, + "kl_loss": 0.24324162304401398, + "loss_ib": 0.009307241067290306, + "step": 1781 + }, + { + "ce_ib": 5.399325847625732, + "ce_orig": 0.6844679713249207, + "epoch": 0.5121863541591775, + "kl_loss": 0.20346783101558685, + "loss_ib": 0.0074340044520795345, + "step": 1781 + }, + { + "ce_ib": 8.268445014953613, + "ce_orig": 1.5828357934951782, + "epoch": 0.5124739377381552, + "kl_loss": 0.14310169219970703, + "loss_ib": 0.009699461981654167, + "step": 1782 + }, + { + "ce_ib": 3.4318509101867676, + "ce_orig": 0.64007169008255, + "epoch": 0.5124739377381552, + "kl_loss": 0.22269004583358765, + "loss_ib": 0.00565875181928277, + "step": 1782 + }, + { + "ce_ib": 4.808406352996826, + "ce_orig": 0.5978017449378967, + "epoch": 0.5124739377381552, + "kl_loss": 0.3129728436470032, + "loss_ib": 0.007938134483993053, + "step": 1782 + }, + { + "ce_ib": 7.869020462036133, + "ce_orig": 1.6359044313430786, + "epoch": 0.5124739377381552, + "kl_loss": 0.2035483568906784, + "loss_ib": 0.00990450382232666, + "step": 1782 + }, + { + "ce_ib": 2.7303977012634277, + "ce_orig": 0.5645561218261719, + "epoch": 0.5127615213171328, + "kl_loss": 0.1680532693862915, + "loss_ib": 0.004410930443555117, + "step": 1783 + }, + { + "ce_ib": 4.404380798339844, + "ce_orig": 0.6755330562591553, + "epoch": 0.5127615213171328, + "kl_loss": 0.17769068479537964, + "loss_ib": 0.006181287579238415, + "step": 1783 + }, + { + "ce_ib": 4.336306095123291, + "ce_orig": 0.7614088654518127, + "epoch": 0.5127615213171328, + "kl_loss": 0.16061021387577057, + "loss_ib": 0.005942408461123705, + "step": 1783 + }, + { + "ce_ib": 6.839239120483398, + "ce_orig": 1.1383109092712402, + "epoch": 0.5127615213171328, + "kl_loss": 0.2974740266799927, + "loss_ib": 0.009813979268074036, + "step": 1783 + }, + { + "ce_ib": 7.303009510040283, + "ce_orig": 0.9598062634468079, + "epoch": 0.5130491048961104, + "kl_loss": 0.21405655145645142, + "loss_ib": 0.009443574585020542, + "step": 1784 + }, + { + "ce_ib": 6.253368854522705, + "ce_orig": 0.7201490998268127, + "epoch": 0.5130491048961104, + "kl_loss": 0.1870848834514618, + "loss_ib": 0.008124217391014099, + "step": 1784 + }, + { + "ce_ib": 3.032378911972046, + "ce_orig": 0.34329771995544434, + "epoch": 0.5130491048961104, + "kl_loss": 0.38947010040283203, + "loss_ib": 0.006927079986780882, + "step": 1784 + }, + { + "ce_ib": 3.8072333335876465, + "ce_orig": 0.5993536710739136, + "epoch": 0.5130491048961104, + "kl_loss": 0.1714390069246292, + "loss_ib": 0.0055216234177351, + "step": 1784 + }, + { + "epoch": 0.5133366884750881, + "grad_norm": 0.11334189772605896, + "learning_rate": 9.491775497656796e-06, + "loss": 0.9333, + "step": 1785 + }, + { + "ce_ib": 4.950538635253906, + "ce_orig": 0.7863808274269104, + "epoch": 0.5133366884750881, + "kl_loss": 0.17428286373615265, + "loss_ib": 0.006693367380648851, + "step": 1785 + }, + { + "ce_ib": 5.514882564544678, + "ce_orig": 0.9535226821899414, + "epoch": 0.5133366884750881, + "kl_loss": 0.17509450018405914, + "loss_ib": 0.007265827618539333, + "step": 1785 + }, + { + "ce_ib": 2.5482590198516846, + "ce_orig": 0.5628145933151245, + "epoch": 0.5133366884750881, + "kl_loss": 0.15639597177505493, + "loss_ib": 0.004112218972295523, + "step": 1785 + }, + { + "ce_ib": 4.515284538269043, + "ce_orig": 0.6384725570678711, + "epoch": 0.5133366884750881, + "kl_loss": 0.255043089389801, + "loss_ib": 0.007065715733915567, + "step": 1785 + }, + { + "ce_ib": 3.976243019104004, + "ce_orig": 0.5672842860221863, + "epoch": 0.5136242720540657, + "kl_loss": 0.3050616979598999, + "loss_ib": 0.007026860024780035, + "step": 1786 + }, + { + "ce_ib": 5.6237640380859375, + "ce_orig": 0.9839730858802795, + "epoch": 0.5136242720540657, + "kl_loss": 0.1823396533727646, + "loss_ib": 0.00744716078042984, + "step": 1786 + }, + { + "ce_ib": 6.167069435119629, + "ce_orig": 1.0040804147720337, + "epoch": 0.5136242720540657, + "kl_loss": 0.31575673818588257, + "loss_ib": 0.009324637241661549, + "step": 1786 + }, + { + "ce_ib": 7.244035243988037, + "ce_orig": 0.9823845028877258, + "epoch": 0.5136242720540657, + "kl_loss": 0.16594602167606354, + "loss_ib": 0.008903495036065578, + "step": 1786 + }, + { + "ce_ib": 5.678101062774658, + "ce_orig": 0.7103188037872314, + "epoch": 0.5139118556330433, + "kl_loss": 0.356295108795166, + "loss_ib": 0.009241051971912384, + "step": 1787 + }, + { + "ce_ib": 5.744541645050049, + "ce_orig": 0.6343448758125305, + "epoch": 0.5139118556330433, + "kl_loss": 0.34542733430862427, + "loss_ib": 0.009198814630508423, + "step": 1787 + }, + { + "ce_ib": 7.367956638336182, + "ce_orig": 1.3359696865081787, + "epoch": 0.5139118556330433, + "kl_loss": 0.17473283410072327, + "loss_ib": 0.00911528430879116, + "step": 1787 + }, + { + "ce_ib": 3.320540189743042, + "ce_orig": 0.48704051971435547, + "epoch": 0.5139118556330433, + "kl_loss": 0.2031700611114502, + "loss_ib": 0.0053522405214607716, + "step": 1787 + }, + { + "ce_ib": 5.3151631355285645, + "ce_orig": 0.6564879417419434, + "epoch": 0.514199439212021, + "kl_loss": 0.6673084497451782, + "loss_ib": 0.011988247744739056, + "step": 1788 + }, + { + "ce_ib": 5.180042743682861, + "ce_orig": 0.680637001991272, + "epoch": 0.514199439212021, + "kl_loss": 0.264859139919281, + "loss_ib": 0.007828634232282639, + "step": 1788 + }, + { + "ce_ib": 7.5746989250183105, + "ce_orig": 0.6266165375709534, + "epoch": 0.514199439212021, + "kl_loss": 0.2970588207244873, + "loss_ib": 0.010545287281274796, + "step": 1788 + }, + { + "ce_ib": 6.376684665679932, + "ce_orig": 0.6511795520782471, + "epoch": 0.514199439212021, + "kl_loss": 0.3848002552986145, + "loss_ib": 0.010224687866866589, + "step": 1788 + }, + { + "ce_ib": 9.771164894104004, + "ce_orig": 1.8776581287384033, + "epoch": 0.5144870227909987, + "kl_loss": 0.3116125464439392, + "loss_ib": 0.012887290678918362, + "step": 1789 + }, + { + "ce_ib": 5.537979602813721, + "ce_orig": 0.7493560910224915, + "epoch": 0.5144870227909987, + "kl_loss": 0.169920414686203, + "loss_ib": 0.00723718386143446, + "step": 1789 + }, + { + "ce_ib": 3.1610782146453857, + "ce_orig": 0.5918846726417542, + "epoch": 0.5144870227909987, + "kl_loss": 0.1520177721977234, + "loss_ib": 0.0046812561340630054, + "step": 1789 + }, + { + "ce_ib": 3.679049253463745, + "ce_orig": 0.7241947650909424, + "epoch": 0.5144870227909987, + "kl_loss": 0.21339523792266846, + "loss_ib": 0.005813001189380884, + "step": 1789 + }, + { + "epoch": 0.5147746063699763, + "grad_norm": 0.12801145017147064, + "learning_rate": 9.488360979115719e-06, + "loss": 0.8703, + "step": 1790 + }, + { + "ce_ib": 5.356670379638672, + "ce_orig": 0.858161985874176, + "epoch": 0.5147746063699763, + "kl_loss": 0.17264670133590698, + "loss_ib": 0.007083137519657612, + "step": 1790 + }, + { + "ce_ib": 5.483158588409424, + "ce_orig": 0.8920816779136658, + "epoch": 0.5147746063699763, + "kl_loss": 0.17023879289627075, + "loss_ib": 0.0071855466812849045, + "step": 1790 + }, + { + "ce_ib": 5.797133445739746, + "ce_orig": 0.503385603427887, + "epoch": 0.5147746063699763, + "kl_loss": 0.2506237030029297, + "loss_ib": 0.008303370326757431, + "step": 1790 + }, + { + "ce_ib": 6.2926154136657715, + "ce_orig": 0.9780447483062744, + "epoch": 0.5147746063699763, + "kl_loss": 0.24656260013580322, + "loss_ib": 0.008758241310715675, + "step": 1790 + }, + { + "ce_ib": 8.355574607849121, + "ce_orig": 1.5691980123519897, + "epoch": 0.5150621899489539, + "kl_loss": 0.1623454988002777, + "loss_ib": 0.00997903011739254, + "step": 1791 + }, + { + "ce_ib": 3.8799028396606445, + "ce_orig": 0.3765094578266144, + "epoch": 0.5150621899489539, + "kl_loss": 0.2286275327205658, + "loss_ib": 0.006166177801787853, + "step": 1791 + }, + { + "ce_ib": 3.321380615234375, + "ce_orig": 0.5442015528678894, + "epoch": 0.5150621899489539, + "kl_loss": 0.1851947158575058, + "loss_ib": 0.0051733278669416904, + "step": 1791 + }, + { + "ce_ib": 8.461237907409668, + "ce_orig": 1.496105670928955, + "epoch": 0.5150621899489539, + "kl_loss": 0.21716958284378052, + "loss_ib": 0.010632934048771858, + "step": 1791 + }, + { + "ce_ib": 5.559602737426758, + "ce_orig": 0.7069318294525146, + "epoch": 0.5153497735279315, + "kl_loss": 0.1599215269088745, + "loss_ib": 0.0071588181890547276, + "step": 1792 + }, + { + "ce_ib": 5.715478897094727, + "ce_orig": 0.5931094884872437, + "epoch": 0.5153497735279315, + "kl_loss": 0.20943738520145416, + "loss_ib": 0.0078098527155816555, + "step": 1792 + }, + { + "ce_ib": 6.070672035217285, + "ce_orig": 0.8749914169311523, + "epoch": 0.5153497735279315, + "kl_loss": 0.2170514613389969, + "loss_ib": 0.00824118684977293, + "step": 1792 + }, + { + "ce_ib": 5.315495014190674, + "ce_orig": 0.7516446709632874, + "epoch": 0.5153497735279315, + "kl_loss": 0.236702099442482, + "loss_ib": 0.007682515773922205, + "step": 1792 + }, + { + "ce_ib": 6.013267517089844, + "ce_orig": 0.95022052526474, + "epoch": 0.5156373571069092, + "kl_loss": 0.15140356123447418, + "loss_ib": 0.007527302950620651, + "step": 1793 + }, + { + "ce_ib": 3.3293774127960205, + "ce_orig": 0.5675919055938721, + "epoch": 0.5156373571069092, + "kl_loss": 0.2904687523841858, + "loss_ib": 0.006234065163880587, + "step": 1793 + }, + { + "ce_ib": 6.802953720092773, + "ce_orig": 1.1898548603057861, + "epoch": 0.5156373571069092, + "kl_loss": 0.2126396894454956, + "loss_ib": 0.008929350413382053, + "step": 1793 + }, + { + "ce_ib": 5.496389865875244, + "ce_orig": 0.4064522683620453, + "epoch": 0.5156373571069092, + "kl_loss": 0.18568216264247894, + "loss_ib": 0.007353211287409067, + "step": 1793 + }, + { + "ce_ib": 2.6666884422302246, + "ce_orig": 0.3487064838409424, + "epoch": 0.5159249406858868, + "kl_loss": 0.363121896982193, + "loss_ib": 0.006297907792031765, + "step": 1794 + }, + { + "ce_ib": 6.851724624633789, + "ce_orig": 1.0352543592453003, + "epoch": 0.5159249406858868, + "kl_loss": 0.1926984190940857, + "loss_ib": 0.008778708986938, + "step": 1794 + }, + { + "ce_ib": 7.737794399261475, + "ce_orig": 1.1757160425186157, + "epoch": 0.5159249406858868, + "kl_loss": 0.2267036885023117, + "loss_ib": 0.010004831477999687, + "step": 1794 + }, + { + "ce_ib": 6.57763147354126, + "ce_orig": 1.18329656124115, + "epoch": 0.5159249406858868, + "kl_loss": 0.28247499465942383, + "loss_ib": 0.009402381256222725, + "step": 1794 + }, + { + "epoch": 0.5162125242648645, + "grad_norm": 0.10428358614444733, + "learning_rate": 9.484935647071273e-06, + "loss": 0.8431, + "step": 1795 + }, + { + "ce_ib": 6.214389801025391, + "ce_orig": 1.3849897384643555, + "epoch": 0.5162125242648645, + "kl_loss": 0.19358517229557037, + "loss_ib": 0.00815024133771658, + "step": 1795 + }, + { + "ce_ib": 6.025190830230713, + "ce_orig": 0.49577391147613525, + "epoch": 0.5162125242648645, + "kl_loss": 0.26496514678001404, + "loss_ib": 0.008674842305481434, + "step": 1795 + }, + { + "ce_ib": 4.6445722579956055, + "ce_orig": 0.952302098274231, + "epoch": 0.5162125242648645, + "kl_loss": 0.20629054307937622, + "loss_ib": 0.006707477383315563, + "step": 1795 + }, + { + "ce_ib": 6.648656845092773, + "ce_orig": 0.8857218027114868, + "epoch": 0.5162125242648645, + "kl_loss": 0.24975921213626862, + "loss_ib": 0.009146248921751976, + "step": 1795 + }, + { + "ce_ib": 5.934831142425537, + "ce_orig": 0.5952855944633484, + "epoch": 0.5165001078438421, + "kl_loss": 0.29829543828964233, + "loss_ib": 0.008917785249650478, + "step": 1796 + }, + { + "ce_ib": 8.936899185180664, + "ce_orig": 1.2357314825057983, + "epoch": 0.5165001078438421, + "kl_loss": 0.2907228171825409, + "loss_ib": 0.011844126507639885, + "step": 1796 + }, + { + "ce_ib": 7.4102959632873535, + "ce_orig": 1.278591275215149, + "epoch": 0.5165001078438421, + "kl_loss": 0.23426221311092377, + "loss_ib": 0.009752918034791946, + "step": 1796 + }, + { + "ce_ib": 6.351538181304932, + "ce_orig": 1.2011011838912964, + "epoch": 0.5165001078438421, + "kl_loss": 0.18798768520355225, + "loss_ib": 0.008231415413320065, + "step": 1796 + }, + { + "ce_ib": 4.647305965423584, + "ce_orig": 0.6263547539710999, + "epoch": 0.5167876914228198, + "kl_loss": 0.24468818306922913, + "loss_ib": 0.0070941876620054245, + "step": 1797 + }, + { + "ce_ib": 4.786785125732422, + "ce_orig": 0.805772602558136, + "epoch": 0.5167876914228198, + "kl_loss": 0.2500882148742676, + "loss_ib": 0.007287667132914066, + "step": 1797 + }, + { + "ce_ib": 4.60325288772583, + "ce_orig": 0.7054445743560791, + "epoch": 0.5167876914228198, + "kl_loss": 0.25732022523880005, + "loss_ib": 0.007176455110311508, + "step": 1797 + }, + { + "ce_ib": 5.693312168121338, + "ce_orig": 0.6329914927482605, + "epoch": 0.5167876914228198, + "kl_loss": 0.33628445863723755, + "loss_ib": 0.009056156501173973, + "step": 1797 + }, + { + "ce_ib": 7.216248035430908, + "ce_orig": 1.340164303779602, + "epoch": 0.5170752750017974, + "kl_loss": 0.1946697235107422, + "loss_ib": 0.009162944741547108, + "step": 1798 + }, + { + "ce_ib": 5.003932952880859, + "ce_orig": 0.7873584628105164, + "epoch": 0.5170752750017974, + "kl_loss": 0.206161230802536, + "loss_ib": 0.007065545301884413, + "step": 1798 + }, + { + "ce_ib": 3.926520586013794, + "ce_orig": 0.6281074285507202, + "epoch": 0.5170752750017974, + "kl_loss": 0.15041778981685638, + "loss_ib": 0.005430698394775391, + "step": 1798 + }, + { + "ce_ib": 6.007340908050537, + "ce_orig": 0.9729514718055725, + "epoch": 0.5170752750017974, + "kl_loss": 0.16553735733032227, + "loss_ib": 0.007662714459002018, + "step": 1798 + }, + { + "ce_ib": 7.125491142272949, + "ce_orig": 0.9569961428642273, + "epoch": 0.517362858580775, + "kl_loss": 0.18219032883644104, + "loss_ib": 0.008947394788265228, + "step": 1799 + }, + { + "ce_ib": 7.163382053375244, + "ce_orig": 0.9688937067985535, + "epoch": 0.517362858580775, + "kl_loss": 0.23835283517837524, + "loss_ib": 0.009546910412609577, + "step": 1799 + }, + { + "ce_ib": 7.227553367614746, + "ce_orig": 1.2459492683410645, + "epoch": 0.517362858580775, + "kl_loss": 0.25786083936691284, + "loss_ib": 0.009806161746382713, + "step": 1799 + }, + { + "ce_ib": 7.201501369476318, + "ce_orig": 0.6090161800384521, + "epoch": 0.517362858580775, + "kl_loss": 0.34199318289756775, + "loss_ib": 0.010621433146297932, + "step": 1799 + }, + { + "epoch": 0.5176504421597526, + "grad_norm": 0.11489441245794296, + "learning_rate": 9.481499509775878e-06, + "loss": 0.9051, + "step": 1800 + }, + { + "ce_ib": 3.390878915786743, + "ce_orig": 0.4669150412082672, + "epoch": 0.5176504421597526, + "kl_loss": 0.2900773286819458, + "loss_ib": 0.006291652098298073, + "step": 1800 + }, + { + "ce_ib": 6.904531478881836, + "ce_orig": 1.0020664930343628, + "epoch": 0.5176504421597526, + "kl_loss": 0.2667804956436157, + "loss_ib": 0.009572336450219154, + "step": 1800 + }, + { + "ce_ib": 3.6753551959991455, + "ce_orig": 0.4369771480560303, + "epoch": 0.5176504421597526, + "kl_loss": 0.27190694212913513, + "loss_ib": 0.0063944244757294655, + "step": 1800 + }, + { + "ce_ib": 5.33976411819458, + "ce_orig": 0.9397656321525574, + "epoch": 0.5176504421597526, + "kl_loss": 0.3777155578136444, + "loss_ib": 0.009116919711232185, + "step": 1800 + }, + { + "ce_ib": 2.7868943214416504, + "ce_orig": 0.45415911078453064, + "epoch": 0.5179380257387303, + "kl_loss": 0.4942619502544403, + "loss_ib": 0.007729513570666313, + "step": 1801 + }, + { + "ce_ib": 5.863338947296143, + "ce_orig": 0.6420555114746094, + "epoch": 0.5179380257387303, + "kl_loss": 0.24516044557094574, + "loss_ib": 0.008314943872392178, + "step": 1801 + }, + { + "ce_ib": 3.1034252643585205, + "ce_orig": 0.5916230082511902, + "epoch": 0.5179380257387303, + "kl_loss": 0.18923625349998474, + "loss_ib": 0.004995787516236305, + "step": 1801 + }, + { + "ce_ib": 6.073347091674805, + "ce_orig": 0.6509691476821899, + "epoch": 0.5179380257387303, + "kl_loss": 0.22603951394557953, + "loss_ib": 0.008333742618560791, + "step": 1801 + }, + { + "ce_ib": 3.9230880737304688, + "ce_orig": 0.5485641360282898, + "epoch": 0.518225609317708, + "kl_loss": 0.23585425317287445, + "loss_ib": 0.006281630136072636, + "step": 1802 + }, + { + "ce_ib": 4.007041931152344, + "ce_orig": 0.6441857218742371, + "epoch": 0.518225609317708, + "kl_loss": 0.2565135061740875, + "loss_ib": 0.006572177167981863, + "step": 1802 + }, + { + "ce_ib": 8.00465202331543, + "ce_orig": 1.16056489944458, + "epoch": 0.518225609317708, + "kl_loss": 0.36502379179000854, + "loss_ib": 0.011654889211058617, + "step": 1802 + }, + { + "ce_ib": 5.138528347015381, + "ce_orig": 0.8069429397583008, + "epoch": 0.518225609317708, + "kl_loss": 0.20522454380989075, + "loss_ib": 0.0071907732635736465, + "step": 1802 + }, + { + "ce_ib": 3.2270872592926025, + "ce_orig": 0.6316734552383423, + "epoch": 0.5185131928966856, + "kl_loss": 0.2573164701461792, + "loss_ib": 0.005800251848995686, + "step": 1803 + }, + { + "ce_ib": 6.0523858070373535, + "ce_orig": 0.6935653686523438, + "epoch": 0.5185131928966856, + "kl_loss": 0.4677497148513794, + "loss_ib": 0.01072988286614418, + "step": 1803 + }, + { + "ce_ib": 4.9070281982421875, + "ce_orig": 0.7013511061668396, + "epoch": 0.5185131928966856, + "kl_loss": 0.22718186676502228, + "loss_ib": 0.007178847212344408, + "step": 1803 + }, + { + "ce_ib": 6.728785991668701, + "ce_orig": 1.182370662689209, + "epoch": 0.5185131928966856, + "kl_loss": 0.15556176006793976, + "loss_ib": 0.008284403942525387, + "step": 1803 + }, + { + "ce_ib": 4.9489054679870605, + "ce_orig": 0.6644280552864075, + "epoch": 0.5188007764756633, + "kl_loss": 0.2218227982521057, + "loss_ib": 0.0071671330370008945, + "step": 1804 + }, + { + "ce_ib": 3.5137171745300293, + "ce_orig": 0.744360089302063, + "epoch": 0.5188007764756633, + "kl_loss": 0.14267009496688843, + "loss_ib": 0.0049404180608689785, + "step": 1804 + }, + { + "ce_ib": 6.757044792175293, + "ce_orig": 1.1124842166900635, + "epoch": 0.5188007764756633, + "kl_loss": 0.23045742511749268, + "loss_ib": 0.009061618708074093, + "step": 1804 + }, + { + "ce_ib": 5.489344596862793, + "ce_orig": 1.2817620038986206, + "epoch": 0.5188007764756633, + "kl_loss": 0.2718643844127655, + "loss_ib": 0.008207988925278187, + "step": 1804 + }, + { + "epoch": 0.5190883600546409, + "grad_norm": 0.13748838007450104, + "learning_rate": 9.478052575507983e-06, + "loss": 0.883, + "step": 1805 + }, + { + "ce_ib": 4.155758857727051, + "ce_orig": 0.7262378931045532, + "epoch": 0.5190883600546409, + "kl_loss": 0.22212238609790802, + "loss_ib": 0.006376982666552067, + "step": 1805 + }, + { + "ce_ib": 6.122762680053711, + "ce_orig": 0.7501749992370605, + "epoch": 0.5190883600546409, + "kl_loss": 0.2191631942987442, + "loss_ib": 0.008314394392073154, + "step": 1805 + }, + { + "ce_ib": 4.582356929779053, + "ce_orig": 0.6791300177574158, + "epoch": 0.5190883600546409, + "kl_loss": 0.14363408088684082, + "loss_ib": 0.006018698215484619, + "step": 1805 + }, + { + "ce_ib": 7.492832183837891, + "ce_orig": 1.3158620595932007, + "epoch": 0.5190883600546409, + "kl_loss": 0.16507276892662048, + "loss_ib": 0.009143560193479061, + "step": 1805 + }, + { + "ce_ib": 8.37580394744873, + "ce_orig": 1.5903420448303223, + "epoch": 0.5193759436336185, + "kl_loss": 0.2548186779022217, + "loss_ib": 0.010923990979790688, + "step": 1806 + }, + { + "ce_ib": 3.802596092224121, + "ce_orig": 0.7291561365127563, + "epoch": 0.5193759436336185, + "kl_loss": 0.2117210030555725, + "loss_ib": 0.0059198057278990746, + "step": 1806 + }, + { + "ce_ib": 4.115170001983643, + "ce_orig": 1.112787127494812, + "epoch": 0.5193759436336185, + "kl_loss": 0.1683754026889801, + "loss_ib": 0.005798923783004284, + "step": 1806 + }, + { + "ce_ib": 5.299164295196533, + "ce_orig": 1.3519405126571655, + "epoch": 0.5193759436336185, + "kl_loss": 0.11419677734375, + "loss_ib": 0.006441132165491581, + "step": 1806 + }, + { + "ce_ib": 4.519089698791504, + "ce_orig": 0.6978946924209595, + "epoch": 0.5196635272125961, + "kl_loss": 0.3922877311706543, + "loss_ib": 0.008441966958343983, + "step": 1807 + }, + { + "ce_ib": 7.629366874694824, + "ce_orig": 1.3517639636993408, + "epoch": 0.5196635272125961, + "kl_loss": 0.2827756702899933, + "loss_ib": 0.010457123629748821, + "step": 1807 + }, + { + "ce_ib": 6.334427356719971, + "ce_orig": 1.0139820575714111, + "epoch": 0.5196635272125961, + "kl_loss": 0.5295234322547913, + "loss_ib": 0.011629662476480007, + "step": 1807 + }, + { + "ce_ib": 4.028738021850586, + "ce_orig": 0.34927576780319214, + "epoch": 0.5196635272125961, + "kl_loss": 0.24801911413669586, + "loss_ib": 0.006508929189294577, + "step": 1807 + }, + { + "ce_ib": 5.080193996429443, + "ce_orig": 0.6764727830886841, + "epoch": 0.5199511107915739, + "kl_loss": 0.28257232904434204, + "loss_ib": 0.00790591724216938, + "step": 1808 + }, + { + "ce_ib": 4.959287643432617, + "ce_orig": 0.680107831954956, + "epoch": 0.5199511107915739, + "kl_loss": 0.37651553750038147, + "loss_ib": 0.008724442683160305, + "step": 1808 + }, + { + "ce_ib": 3.6068825721740723, + "ce_orig": 0.5630862712860107, + "epoch": 0.5199511107915739, + "kl_loss": 0.2557922601699829, + "loss_ib": 0.00616480503231287, + "step": 1808 + }, + { + "ce_ib": 2.9706921577453613, + "ce_orig": 0.49435415863990784, + "epoch": 0.5199511107915739, + "kl_loss": 0.24606207013130188, + "loss_ib": 0.0054313126020133495, + "step": 1808 + }, + { + "ce_ib": 8.037646293640137, + "ce_orig": 1.4617276191711426, + "epoch": 0.5202386943705515, + "kl_loss": 0.73418128490448, + "loss_ib": 0.015379459597170353, + "step": 1809 + }, + { + "ce_ib": 5.94744348526001, + "ce_orig": 0.6382511854171753, + "epoch": 0.5202386943705515, + "kl_loss": 0.297149658203125, + "loss_ib": 0.008918940089643002, + "step": 1809 + }, + { + "ce_ib": 3.2467129230499268, + "ce_orig": 0.7186192870140076, + "epoch": 0.5202386943705515, + "kl_loss": 0.4525447189807892, + "loss_ib": 0.007772160228341818, + "step": 1809 + }, + { + "ce_ib": 3.5433926582336426, + "ce_orig": 0.39463841915130615, + "epoch": 0.5202386943705515, + "kl_loss": 0.23873688280582428, + "loss_ib": 0.005930761341005564, + "step": 1809 + }, + { + "epoch": 0.5205262779495291, + "grad_norm": 0.1307157725095749, + "learning_rate": 9.47459485257206e-06, + "loss": 0.8351, + "step": 1810 + }, + { + "ce_ib": 6.789810657501221, + "ce_orig": 1.124668836593628, + "epoch": 0.5205262779495291, + "kl_loss": 0.2300483137369156, + "loss_ib": 0.009090293198823929, + "step": 1810 + }, + { + "ce_ib": 3.373751163482666, + "ce_orig": 0.68404620885849, + "epoch": 0.5205262779495291, + "kl_loss": 0.17840611934661865, + "loss_ib": 0.005157812498509884, + "step": 1810 + }, + { + "ce_ib": 7.042891025543213, + "ce_orig": 1.2993831634521484, + "epoch": 0.5205262779495291, + "kl_loss": 0.20561161637306213, + "loss_ib": 0.009099007584154606, + "step": 1810 + }, + { + "ce_ib": 3.786529064178467, + "ce_orig": 0.6418856978416443, + "epoch": 0.5205262779495291, + "kl_loss": 0.17593012750148773, + "loss_ib": 0.005545829888433218, + "step": 1810 + }, + { + "ce_ib": 4.698478698730469, + "ce_orig": 0.3812403082847595, + "epoch": 0.5208138615285067, + "kl_loss": 0.20140892267227173, + "loss_ib": 0.006712567526847124, + "step": 1811 + }, + { + "ce_ib": 3.34533953666687, + "ce_orig": 0.7359578013420105, + "epoch": 0.5208138615285067, + "kl_loss": 0.13502469658851624, + "loss_ib": 0.004695586394518614, + "step": 1811 + }, + { + "ce_ib": 6.590109348297119, + "ce_orig": 1.0476917028427124, + "epoch": 0.5208138615285067, + "kl_loss": 0.3302564024925232, + "loss_ib": 0.00989267323166132, + "step": 1811 + }, + { + "ce_ib": 6.3849921226501465, + "ce_orig": 1.483210802078247, + "epoch": 0.5208138615285067, + "kl_loss": 0.15260383486747742, + "loss_ib": 0.00791103020310402, + "step": 1811 + }, + { + "ce_ib": 4.3355841636657715, + "ce_orig": 0.9082209467887878, + "epoch": 0.5211014451074844, + "kl_loss": 0.18973350524902344, + "loss_ib": 0.00623291963711381, + "step": 1812 + }, + { + "ce_ib": 4.391592025756836, + "ce_orig": 0.4710513949394226, + "epoch": 0.5211014451074844, + "kl_loss": 0.3285670280456543, + "loss_ib": 0.007677262183278799, + "step": 1812 + }, + { + "ce_ib": 6.789366245269775, + "ce_orig": 1.4087302684783936, + "epoch": 0.5211014451074844, + "kl_loss": 0.23697486519813538, + "loss_ib": 0.009159115143120289, + "step": 1812 + }, + { + "ce_ib": 7.329963684082031, + "ce_orig": 1.1739439964294434, + "epoch": 0.5211014451074844, + "kl_loss": 0.23812434077262878, + "loss_ib": 0.009711206890642643, + "step": 1812 + }, + { + "ce_ib": 4.353401184082031, + "ce_orig": 0.7496168613433838, + "epoch": 0.521389028686462, + "kl_loss": 0.15959800779819489, + "loss_ib": 0.005949381273239851, + "step": 1813 + }, + { + "ce_ib": 6.93917989730835, + "ce_orig": 0.8042504191398621, + "epoch": 0.521389028686462, + "kl_loss": 0.26898664236068726, + "loss_ib": 0.00962904654443264, + "step": 1813 + }, + { + "ce_ib": 3.2111239433288574, + "ce_orig": 0.6300265192985535, + "epoch": 0.521389028686462, + "kl_loss": 0.14442920684814453, + "loss_ib": 0.004655416123569012, + "step": 1813 + }, + { + "ce_ib": 5.082729816436768, + "ce_orig": 0.8240136504173279, + "epoch": 0.521389028686462, + "kl_loss": 0.2538681626319885, + "loss_ib": 0.007621411699801683, + "step": 1813 + }, + { + "ce_ib": 7.506804466247559, + "ce_orig": 1.042869210243225, + "epoch": 0.5216766122654396, + "kl_loss": 0.20386838912963867, + "loss_ib": 0.00954548828303814, + "step": 1814 + }, + { + "ce_ib": 5.0663886070251465, + "ce_orig": 0.6606091856956482, + "epoch": 0.5216766122654396, + "kl_loss": 0.20135483145713806, + "loss_ib": 0.007079937495291233, + "step": 1814 + }, + { + "ce_ib": 3.6751060485839844, + "ce_orig": 0.6798691749572754, + "epoch": 0.5216766122654396, + "kl_loss": 0.21309301257133484, + "loss_ib": 0.005806035827845335, + "step": 1814 + }, + { + "ce_ib": 2.864553213119507, + "ce_orig": 0.3934019207954407, + "epoch": 0.5216766122654396, + "kl_loss": 0.2546788156032562, + "loss_ib": 0.005411340855062008, + "step": 1814 + }, + { + "epoch": 0.5219641958444173, + "grad_norm": 0.13427554070949554, + "learning_rate": 9.471126349298557e-06, + "loss": 0.8379, + "step": 1815 + }, + { + "ce_ib": 9.50009822845459, + "ce_orig": 1.661615014076233, + "epoch": 0.5219641958444173, + "kl_loss": 0.28572994470596313, + "loss_ib": 0.012357397004961967, + "step": 1815 + }, + { + "ce_ib": 3.747300624847412, + "ce_orig": 0.6363303065299988, + "epoch": 0.5219641958444173, + "kl_loss": 0.19487634301185608, + "loss_ib": 0.005696064326912165, + "step": 1815 + }, + { + "ce_ib": 8.828596115112305, + "ce_orig": 1.535306453704834, + "epoch": 0.5219641958444173, + "kl_loss": 0.2173335999250412, + "loss_ib": 0.011001932434737682, + "step": 1815 + }, + { + "ce_ib": 4.563418388366699, + "ce_orig": 0.7024432420730591, + "epoch": 0.5219641958444173, + "kl_loss": 0.24083112180233002, + "loss_ib": 0.006971729453653097, + "step": 1815 + }, + { + "ce_ib": 2.5764825344085693, + "ce_orig": 0.5380098223686218, + "epoch": 0.522251779423395, + "kl_loss": 0.15003418922424316, + "loss_ib": 0.004076824523508549, + "step": 1816 + }, + { + "ce_ib": 7.708797931671143, + "ce_orig": 1.3907426595687866, + "epoch": 0.522251779423395, + "kl_loss": 0.25853198766708374, + "loss_ib": 0.010294117964804173, + "step": 1816 + }, + { + "ce_ib": 4.552369594573975, + "ce_orig": 0.44630172848701477, + "epoch": 0.522251779423395, + "kl_loss": 0.30989986658096313, + "loss_ib": 0.007651368156075478, + "step": 1816 + }, + { + "ce_ib": 5.001628875732422, + "ce_orig": 1.0199403762817383, + "epoch": 0.522251779423395, + "kl_loss": 0.16034522652626038, + "loss_ib": 0.006605081260204315, + "step": 1816 + }, + { + "ce_ib": 10.416280746459961, + "ce_orig": 1.8923710584640503, + "epoch": 0.5225393630023726, + "kl_loss": 0.2715950608253479, + "loss_ib": 0.013132231310009956, + "step": 1817 + }, + { + "ce_ib": 6.229454040527344, + "ce_orig": 1.060247778892517, + "epoch": 0.5225393630023726, + "kl_loss": 0.23605555295944214, + "loss_ib": 0.00859000999480486, + "step": 1817 + }, + { + "ce_ib": 6.946117401123047, + "ce_orig": 1.0619940757751465, + "epoch": 0.5225393630023726, + "kl_loss": 0.3530215620994568, + "loss_ib": 0.01047633308917284, + "step": 1817 + }, + { + "ce_ib": 8.509015083312988, + "ce_orig": 1.2890263795852661, + "epoch": 0.5225393630023726, + "kl_loss": 0.22496478259563446, + "loss_ib": 0.010758663527667522, + "step": 1817 + }, + { + "ce_ib": 4.807441711425781, + "ce_orig": 0.8506971001625061, + "epoch": 0.5228269465813502, + "kl_loss": 0.24618715047836304, + "loss_ib": 0.007269313093274832, + "step": 1818 + }, + { + "ce_ib": 6.583683013916016, + "ce_orig": 0.9181709885597229, + "epoch": 0.5228269465813502, + "kl_loss": 0.19941741228103638, + "loss_ib": 0.008577857166528702, + "step": 1818 + }, + { + "ce_ib": 6.426215648651123, + "ce_orig": 0.9120950102806091, + "epoch": 0.5228269465813502, + "kl_loss": 0.15273059904575348, + "loss_ib": 0.00795352179557085, + "step": 1818 + }, + { + "ce_ib": 6.072673320770264, + "ce_orig": 0.8870359063148499, + "epoch": 0.5228269465813502, + "kl_loss": 0.3441610634326935, + "loss_ib": 0.009514284320175648, + "step": 1818 + }, + { + "ce_ib": 3.889371633529663, + "ce_orig": 0.6676734089851379, + "epoch": 0.5231145301603278, + "kl_loss": 0.20489048957824707, + "loss_ib": 0.005938276648521423, + "step": 1819 + }, + { + "ce_ib": 6.331286907196045, + "ce_orig": 0.7984427213668823, + "epoch": 0.5231145301603278, + "kl_loss": 0.21341674029827118, + "loss_ib": 0.00846545398235321, + "step": 1819 + }, + { + "ce_ib": 6.073073863983154, + "ce_orig": 1.0317842960357666, + "epoch": 0.5231145301603278, + "kl_loss": 0.3320351541042328, + "loss_ib": 0.009393424727022648, + "step": 1819 + }, + { + "ce_ib": 5.782386302947998, + "ce_orig": 0.8939815163612366, + "epoch": 0.5231145301603278, + "kl_loss": 0.22046232223510742, + "loss_ib": 0.0079870093613863, + "step": 1819 + }, + { + "epoch": 0.5234021137393055, + "grad_norm": 0.1295160949230194, + "learning_rate": 9.467647074043911e-06, + "loss": 0.936, + "step": 1820 + }, + { + "ce_ib": 4.161619186401367, + "ce_orig": 0.6846871376037598, + "epoch": 0.5234021137393055, + "kl_loss": 0.16246896982192993, + "loss_ib": 0.005786309018731117, + "step": 1820 + }, + { + "ce_ib": 5.756795406341553, + "ce_orig": 1.0516748428344727, + "epoch": 0.5234021137393055, + "kl_loss": 0.2070978283882141, + "loss_ib": 0.007827773690223694, + "step": 1820 + }, + { + "ce_ib": 5.790688514709473, + "ce_orig": 0.8329687118530273, + "epoch": 0.5234021137393055, + "kl_loss": 0.3111580014228821, + "loss_ib": 0.00890226848423481, + "step": 1820 + }, + { + "ce_ib": 3.9477272033691406, + "ce_orig": 0.7902374863624573, + "epoch": 0.5234021137393055, + "kl_loss": 0.18525344133377075, + "loss_ib": 0.005800261162221432, + "step": 1820 + }, + { + "ce_ib": 6.208718299865723, + "ce_orig": 0.8927279114723206, + "epoch": 0.5236896973182831, + "kl_loss": 0.3012109398841858, + "loss_ib": 0.009220827370882034, + "step": 1821 + }, + { + "ce_ib": 6.990211486816406, + "ce_orig": 0.9636775851249695, + "epoch": 0.5236896973182831, + "kl_loss": 0.20870433747768402, + "loss_ib": 0.009077254682779312, + "step": 1821 + }, + { + "ce_ib": 8.73526668548584, + "ce_orig": 1.3548780679702759, + "epoch": 0.5236896973182831, + "kl_loss": 0.44657760858535767, + "loss_ib": 0.013201043009757996, + "step": 1821 + }, + { + "ce_ib": 5.386695384979248, + "ce_orig": 0.7904844284057617, + "epoch": 0.5236896973182831, + "kl_loss": 0.18388625979423523, + "loss_ib": 0.00722555723041296, + "step": 1821 + }, + { + "ce_ib": 4.732143878936768, + "ce_orig": 0.8115664720535278, + "epoch": 0.5239772808972608, + "kl_loss": 0.188096284866333, + "loss_ib": 0.006613106466829777, + "step": 1822 + }, + { + "ce_ib": 5.429836750030518, + "ce_orig": 0.6583589911460876, + "epoch": 0.5239772808972608, + "kl_loss": 0.2904740571975708, + "loss_ib": 0.008334577083587646, + "step": 1822 + }, + { + "ce_ib": 3.356475591659546, + "ce_orig": 0.6954246759414673, + "epoch": 0.5239772808972608, + "kl_loss": 0.14421172440052032, + "loss_ib": 0.004798592999577522, + "step": 1822 + }, + { + "ce_ib": 4.951510906219482, + "ce_orig": 0.9459335207939148, + "epoch": 0.5239772808972608, + "kl_loss": 0.24713939428329468, + "loss_ib": 0.007422904949635267, + "step": 1822 + }, + { + "ce_ib": 3.451356887817383, + "ce_orig": 0.7636024355888367, + "epoch": 0.5242648644762384, + "kl_loss": 0.21749041974544525, + "loss_ib": 0.005626261234283447, + "step": 1823 + }, + { + "ce_ib": 3.5414316654205322, + "ce_orig": 0.6915144920349121, + "epoch": 0.5242648644762384, + "kl_loss": 0.20919787883758545, + "loss_ib": 0.005633410066366196, + "step": 1823 + }, + { + "ce_ib": 4.181634902954102, + "ce_orig": 0.30439430475234985, + "epoch": 0.5242648644762384, + "kl_loss": 0.5112854242324829, + "loss_ib": 0.009294489398598671, + "step": 1823 + }, + { + "ce_ib": 6.770209789276123, + "ce_orig": 0.7689716219902039, + "epoch": 0.5242648644762384, + "kl_loss": 0.46790021657943726, + "loss_ib": 0.011449211277067661, + "step": 1823 + }, + { + "ce_ib": 6.827091217041016, + "ce_orig": 0.7090415358543396, + "epoch": 0.5245524480552161, + "kl_loss": 0.3946807384490967, + "loss_ib": 0.010773899033665657, + "step": 1824 + }, + { + "ce_ib": 5.242634296417236, + "ce_orig": 0.6740880012512207, + "epoch": 0.5245524480552161, + "kl_loss": 0.17949745059013367, + "loss_ib": 0.007037608418613672, + "step": 1824 + }, + { + "ce_ib": 7.583951473236084, + "ce_orig": 1.1575921773910522, + "epoch": 0.5245524480552161, + "kl_loss": 0.22207129001617432, + "loss_ib": 0.009804664179682732, + "step": 1824 + }, + { + "ce_ib": 3.365133047103882, + "ce_orig": 0.5091253519058228, + "epoch": 0.5245524480552161, + "kl_loss": 0.20353856682777405, + "loss_ib": 0.005400518886744976, + "step": 1824 + }, + { + "epoch": 0.5248400316341937, + "grad_norm": 0.13930882513523102, + "learning_rate": 9.4641570351905e-06, + "loss": 0.8489, + "step": 1825 + }, + { + "ce_ib": 5.171679496765137, + "ce_orig": 1.0531930923461914, + "epoch": 0.5248400316341937, + "kl_loss": 0.17203733325004578, + "loss_ib": 0.006892052944749594, + "step": 1825 + }, + { + "ce_ib": 4.869714260101318, + "ce_orig": 0.5718777775764465, + "epoch": 0.5248400316341937, + "kl_loss": 0.27115121483802795, + "loss_ib": 0.0075812265276908875, + "step": 1825 + }, + { + "ce_ib": 3.5416884422302246, + "ce_orig": 0.8580121397972107, + "epoch": 0.5248400316341937, + "kl_loss": 0.1877019703388214, + "loss_ib": 0.005418708082288504, + "step": 1825 + }, + { + "ce_ib": 9.05768871307373, + "ce_orig": 1.5205368995666504, + "epoch": 0.5248400316341937, + "kl_loss": 0.18774256110191345, + "loss_ib": 0.010935114696621895, + "step": 1825 + }, + { + "ce_ib": 3.9015533924102783, + "ce_orig": 0.8431259989738464, + "epoch": 0.5251276152131713, + "kl_loss": 0.14487867057323456, + "loss_ib": 0.005350339692085981, + "step": 1826 + }, + { + "ce_ib": 4.161779403686523, + "ce_orig": 0.8969098925590515, + "epoch": 0.5251276152131713, + "kl_loss": 0.2807765305042267, + "loss_ib": 0.006969544570893049, + "step": 1826 + }, + { + "ce_ib": 10.926655769348145, + "ce_orig": 1.9176727533340454, + "epoch": 0.5251276152131713, + "kl_loss": 0.5257304906845093, + "loss_ib": 0.01618395932018757, + "step": 1826 + }, + { + "ce_ib": 5.555898189544678, + "ce_orig": 0.9816900491714478, + "epoch": 0.5251276152131713, + "kl_loss": 0.3077981472015381, + "loss_ib": 0.008633879944682121, + "step": 1826 + }, + { + "ce_ib": 4.505410671234131, + "ce_orig": 0.7756586670875549, + "epoch": 0.5254151987921489, + "kl_loss": 0.3038627803325653, + "loss_ib": 0.007544038351625204, + "step": 1827 + }, + { + "ce_ib": 4.859555244445801, + "ce_orig": 0.9365068078041077, + "epoch": 0.5254151987921489, + "kl_loss": 0.20699253678321838, + "loss_ib": 0.006929480936378241, + "step": 1827 + }, + { + "ce_ib": 5.228682994842529, + "ce_orig": 0.827009916305542, + "epoch": 0.5254151987921489, + "kl_loss": 0.1980331391096115, + "loss_ib": 0.007209014613181353, + "step": 1827 + }, + { + "ce_ib": 2.3166110515594482, + "ce_orig": 0.30793437361717224, + "epoch": 0.5254151987921489, + "kl_loss": 0.17321155965328217, + "loss_ib": 0.0040487265214324, + "step": 1827 + }, + { + "ce_ib": 4.875110149383545, + "ce_orig": 0.6053239703178406, + "epoch": 0.5257027823711266, + "kl_loss": 0.2659691274166107, + "loss_ib": 0.0075348010286688805, + "step": 1828 + }, + { + "ce_ib": 5.519563674926758, + "ce_orig": 0.8646820783615112, + "epoch": 0.5257027823711266, + "kl_loss": 0.24867025017738342, + "loss_ib": 0.008006266318261623, + "step": 1828 + }, + { + "ce_ib": 5.005699157714844, + "ce_orig": 0.8784776329994202, + "epoch": 0.5257027823711266, + "kl_loss": 0.2948490381240845, + "loss_ib": 0.00795418955385685, + "step": 1828 + }, + { + "ce_ib": 8.660489082336426, + "ce_orig": 1.0395890474319458, + "epoch": 0.5257027823711266, + "kl_loss": 0.21846416592597961, + "loss_ib": 0.010845130309462547, + "step": 1828 + }, + { + "ce_ib": 3.270155191421509, + "ce_orig": 0.6141106486320496, + "epoch": 0.5259903659501043, + "kl_loss": 0.17618581652641296, + "loss_ib": 0.005032013636082411, + "step": 1829 + }, + { + "ce_ib": 5.039238929748535, + "ce_orig": 0.9362063407897949, + "epoch": 0.5259903659501043, + "kl_loss": 0.4431926906108856, + "loss_ib": 0.0094711659476161, + "step": 1829 + }, + { + "ce_ib": 5.330448627471924, + "ce_orig": 0.8868371844291687, + "epoch": 0.5259903659501043, + "kl_loss": 0.3449958264827728, + "loss_ib": 0.008780406787991524, + "step": 1829 + }, + { + "ce_ib": 7.948352336883545, + "ce_orig": 1.10189688205719, + "epoch": 0.5259903659501043, + "kl_loss": 0.15406270325183868, + "loss_ib": 0.00948897935450077, + "step": 1829 + }, + { + "epoch": 0.5262779495290819, + "grad_norm": 0.12116040289402008, + "learning_rate": 9.460656241146637e-06, + "loss": 0.8764, + "step": 1830 + }, + { + "ce_ib": 7.966455459594727, + "ce_orig": 1.353620171546936, + "epoch": 0.5262779495290819, + "kl_loss": 0.2310958206653595, + "loss_ib": 0.01027741376310587, + "step": 1830 + }, + { + "ce_ib": 5.285830974578857, + "ce_orig": 0.6761056184768677, + "epoch": 0.5262779495290819, + "kl_loss": 0.16036930680274963, + "loss_ib": 0.006889524403959513, + "step": 1830 + }, + { + "ce_ib": 9.416250228881836, + "ce_orig": 1.3026270866394043, + "epoch": 0.5262779495290819, + "kl_loss": 0.24720579385757446, + "loss_ib": 0.011888308450579643, + "step": 1830 + }, + { + "ce_ib": 5.543875694274902, + "ce_orig": 0.46377140283584595, + "epoch": 0.5262779495290819, + "kl_loss": 0.27701789140701294, + "loss_ib": 0.00831405445933342, + "step": 1830 + }, + { + "ce_ib": 2.8311800956726074, + "ce_orig": 0.2755441963672638, + "epoch": 0.5265655331080595, + "kl_loss": 0.25843143463134766, + "loss_ib": 0.005415494553744793, + "step": 1831 + }, + { + "ce_ib": 8.62271785736084, + "ce_orig": 1.1264030933380127, + "epoch": 0.5265655331080595, + "kl_loss": 0.32971686124801636, + "loss_ib": 0.01191988680511713, + "step": 1831 + }, + { + "ce_ib": 5.154438495635986, + "ce_orig": 0.9023049473762512, + "epoch": 0.5265655331080595, + "kl_loss": 0.24387776851654053, + "loss_ib": 0.0075932159088552, + "step": 1831 + }, + { + "ce_ib": 2.5585079193115234, + "ce_orig": 0.46558678150177, + "epoch": 0.5265655331080595, + "kl_loss": 0.17119866609573364, + "loss_ib": 0.004270494449883699, + "step": 1831 + }, + { + "ce_ib": 5.680759906768799, + "ce_orig": 1.1517668962478638, + "epoch": 0.5268531166870372, + "kl_loss": 0.2441079318523407, + "loss_ib": 0.008121839724481106, + "step": 1832 + }, + { + "ce_ib": 5.976526737213135, + "ce_orig": 0.9688917994499207, + "epoch": 0.5268531166870372, + "kl_loss": 0.22500276565551758, + "loss_ib": 0.008226553909480572, + "step": 1832 + }, + { + "ce_ib": 4.417006492614746, + "ce_orig": 0.8098064064979553, + "epoch": 0.5268531166870372, + "kl_loss": 0.1642741858959198, + "loss_ib": 0.006059748586267233, + "step": 1832 + }, + { + "ce_ib": 4.212074279785156, + "ce_orig": 0.6016438007354736, + "epoch": 0.5268531166870372, + "kl_loss": 0.20138077437877655, + "loss_ib": 0.006225882098078728, + "step": 1832 + }, + { + "ce_ib": 4.229487895965576, + "ce_orig": 0.3920978009700775, + "epoch": 0.5271407002660148, + "kl_loss": 0.22139926254749298, + "loss_ib": 0.006443480495363474, + "step": 1833 + }, + { + "ce_ib": 7.130161762237549, + "ce_orig": 1.1279047727584839, + "epoch": 0.5271407002660148, + "kl_loss": 0.2132570892572403, + "loss_ib": 0.009262732230126858, + "step": 1833 + }, + { + "ce_ib": 4.03831148147583, + "ce_orig": 0.5502790808677673, + "epoch": 0.5271407002660148, + "kl_loss": 0.20839695632457733, + "loss_ib": 0.006122280843555927, + "step": 1833 + }, + { + "ce_ib": 5.961589336395264, + "ce_orig": 0.9671207666397095, + "epoch": 0.5271407002660148, + "kl_loss": 0.2966713011264801, + "loss_ib": 0.008928302675485611, + "step": 1833 + }, + { + "ce_ib": 6.421506881713867, + "ce_orig": 1.0025813579559326, + "epoch": 0.5274282838449924, + "kl_loss": 0.19049221277236938, + "loss_ib": 0.008326428942382336, + "step": 1834 + }, + { + "ce_ib": 5.470172882080078, + "ce_orig": 0.9889363050460815, + "epoch": 0.5274282838449924, + "kl_loss": 0.17300361394882202, + "loss_ib": 0.007200208958238363, + "step": 1834 + }, + { + "ce_ib": 3.251887559890747, + "ce_orig": 0.7033747434616089, + "epoch": 0.5274282838449924, + "kl_loss": 0.22987093031406403, + "loss_ib": 0.0055505963973701, + "step": 1834 + }, + { + "ce_ib": 3.2938926219940186, + "ce_orig": 0.7175408005714417, + "epoch": 0.5274282838449924, + "kl_loss": 0.22159971296787262, + "loss_ib": 0.005509889684617519, + "step": 1834 + }, + { + "epoch": 0.5277158674239701, + "grad_norm": 0.10707426071166992, + "learning_rate": 9.45714470034655e-06, + "loss": 0.824, + "step": 1835 + }, + { + "ce_ib": 5.597881317138672, + "ce_orig": 0.9906297326087952, + "epoch": 0.5277158674239701, + "kl_loss": 0.254499226808548, + "loss_ib": 0.008142873644828796, + "step": 1835 + }, + { + "ce_ib": 4.040140151977539, + "ce_orig": 0.6455329060554504, + "epoch": 0.5277158674239701, + "kl_loss": 0.26006942987442017, + "loss_ib": 0.006640834733843803, + "step": 1835 + }, + { + "ce_ib": 4.632035732269287, + "ce_orig": 0.862791121006012, + "epoch": 0.5277158674239701, + "kl_loss": 0.20942223072052002, + "loss_ib": 0.006726257503032684, + "step": 1835 + }, + { + "ce_ib": 5.974380970001221, + "ce_orig": 0.761141836643219, + "epoch": 0.5277158674239701, + "kl_loss": 0.1809392273426056, + "loss_ib": 0.007783772889524698, + "step": 1835 + }, + { + "ce_ib": 3.7566730976104736, + "ce_orig": 0.5548977851867676, + "epoch": 0.5280034510029478, + "kl_loss": 0.2547493875026703, + "loss_ib": 0.006304166745394468, + "step": 1836 + }, + { + "ce_ib": 3.8706605434417725, + "ce_orig": 0.8601248860359192, + "epoch": 0.5280034510029478, + "kl_loss": 0.23735542595386505, + "loss_ib": 0.006244214717298746, + "step": 1836 + }, + { + "ce_ib": 6.391262531280518, + "ce_orig": 0.9554364681243896, + "epoch": 0.5280034510029478, + "kl_loss": 0.2322809100151062, + "loss_ib": 0.008714071474969387, + "step": 1836 + }, + { + "ce_ib": 6.248229026794434, + "ce_orig": 0.7384769320487976, + "epoch": 0.5280034510029478, + "kl_loss": 0.30158042907714844, + "loss_ib": 0.009264033287763596, + "step": 1836 + }, + { + "ce_ib": 5.4738993644714355, + "ce_orig": 0.8020815253257751, + "epoch": 0.5282910345819254, + "kl_loss": 0.27552667260169983, + "loss_ib": 0.008229166269302368, + "step": 1837 + }, + { + "ce_ib": 8.685053825378418, + "ce_orig": 0.5568800568580627, + "epoch": 0.5282910345819254, + "kl_loss": 0.3398151397705078, + "loss_ib": 0.012083206325769424, + "step": 1837 + }, + { + "ce_ib": 3.9550063610076904, + "ce_orig": 0.6799144744873047, + "epoch": 0.5282910345819254, + "kl_loss": 0.2494896501302719, + "loss_ib": 0.006449902430176735, + "step": 1837 + }, + { + "ce_ib": 5.9247918128967285, + "ce_orig": 0.8019424080848694, + "epoch": 0.5282910345819254, + "kl_loss": 0.2974478006362915, + "loss_ib": 0.008899269625544548, + "step": 1837 + }, + { + "ce_ib": 8.21461296081543, + "ce_orig": 1.2831261157989502, + "epoch": 0.528578618160903, + "kl_loss": 0.183128222823143, + "loss_ib": 0.010045895352959633, + "step": 1838 + }, + { + "ce_ib": 7.030628681182861, + "ce_orig": 0.9009212255477905, + "epoch": 0.528578618160903, + "kl_loss": 0.1871497929096222, + "loss_ib": 0.008902125991880894, + "step": 1838 + }, + { + "ce_ib": 8.397993087768555, + "ce_orig": 1.4693775177001953, + "epoch": 0.528578618160903, + "kl_loss": 0.28764963150024414, + "loss_ib": 0.011274490505456924, + "step": 1838 + }, + { + "ce_ib": 5.634881973266602, + "ce_orig": 1.246681809425354, + "epoch": 0.528578618160903, + "kl_loss": 0.14776155352592468, + "loss_ib": 0.0071124969981610775, + "step": 1838 + }, + { + "ce_ib": 6.353714942932129, + "ce_orig": 0.8384955525398254, + "epoch": 0.5288662017398806, + "kl_loss": 0.20396575331687927, + "loss_ib": 0.008393372409045696, + "step": 1839 + }, + { + "ce_ib": 3.9521143436431885, + "ce_orig": 0.652812123298645, + "epoch": 0.5288662017398806, + "kl_loss": 0.19296777248382568, + "loss_ib": 0.005881792400032282, + "step": 1839 + }, + { + "ce_ib": 3.103644371032715, + "ce_orig": 0.5401879549026489, + "epoch": 0.5288662017398806, + "kl_loss": 0.17736856639385223, + "loss_ib": 0.004877329804003239, + "step": 1839 + }, + { + "ce_ib": 5.33885383605957, + "ce_orig": 0.7373903393745422, + "epoch": 0.5288662017398806, + "kl_loss": 0.28985947370529175, + "loss_ib": 0.008237448520958424, + "step": 1839 + }, + { + "epoch": 0.5291537853188583, + "grad_norm": 0.12048087269067764, + "learning_rate": 9.453622421250353e-06, + "loss": 0.8883, + "step": 1840 + }, + { + "ce_ib": 6.563951015472412, + "ce_orig": 1.5464122295379639, + "epoch": 0.5291537853188583, + "kl_loss": 0.19577865302562714, + "loss_ib": 0.008521737530827522, + "step": 1840 + }, + { + "ce_ib": 6.511598587036133, + "ce_orig": 1.1769744157791138, + "epoch": 0.5291537853188583, + "kl_loss": 0.18648597598075867, + "loss_ib": 0.008376458659768105, + "step": 1840 + }, + { + "ce_ib": 4.372602462768555, + "ce_orig": 0.6788270473480225, + "epoch": 0.5291537853188583, + "kl_loss": 0.15491482615470886, + "loss_ib": 0.005921750329434872, + "step": 1840 + }, + { + "ce_ib": 6.303147315979004, + "ce_orig": 0.5359430313110352, + "epoch": 0.5291537853188583, + "kl_loss": 0.3028493821620941, + "loss_ib": 0.009331640787422657, + "step": 1840 + }, + { + "ce_ib": 6.323972225189209, + "ce_orig": 1.238000512123108, + "epoch": 0.5294413688978359, + "kl_loss": 0.31458938121795654, + "loss_ib": 0.009469865821301937, + "step": 1841 + }, + { + "ce_ib": 3.9357001781463623, + "ce_orig": 0.7044163346290588, + "epoch": 0.5294413688978359, + "kl_loss": 0.24502159655094147, + "loss_ib": 0.006385916378349066, + "step": 1841 + }, + { + "ce_ib": 7.748276233673096, + "ce_orig": 1.0002756118774414, + "epoch": 0.5294413688978359, + "kl_loss": 0.24710191786289215, + "loss_ib": 0.01021929644048214, + "step": 1841 + }, + { + "ce_ib": 4.496145248413086, + "ce_orig": 0.7424629330635071, + "epoch": 0.5294413688978359, + "kl_loss": 0.21064282953739166, + "loss_ib": 0.006602573208510876, + "step": 1841 + }, + { + "ce_ib": 4.606788158416748, + "ce_orig": 0.7641493082046509, + "epoch": 0.5297289524768136, + "kl_loss": 0.2618545889854431, + "loss_ib": 0.007225333712995052, + "step": 1842 + }, + { + "ce_ib": 1.3112238645553589, + "ce_orig": 0.12241856753826141, + "epoch": 0.5297289524768136, + "kl_loss": 0.5186067819595337, + "loss_ib": 0.006497291848063469, + "step": 1842 + }, + { + "ce_ib": 3.796043872833252, + "ce_orig": 0.6563534736633301, + "epoch": 0.5297289524768136, + "kl_loss": 0.18121027946472168, + "loss_ib": 0.005608146544545889, + "step": 1842 + }, + { + "ce_ib": 3.368903636932373, + "ce_orig": 0.6486485600471497, + "epoch": 0.5297289524768136, + "kl_loss": 0.40390148758888245, + "loss_ib": 0.007407918572425842, + "step": 1842 + }, + { + "ce_ib": 3.5803630352020264, + "ce_orig": 0.9925637245178223, + "epoch": 0.5300165360557912, + "kl_loss": 0.17552965879440308, + "loss_ib": 0.005335659720003605, + "step": 1843 + }, + { + "ce_ib": 2.867419719696045, + "ce_orig": 0.604449450969696, + "epoch": 0.5300165360557912, + "kl_loss": 0.23837560415267944, + "loss_ib": 0.005251175723969936, + "step": 1843 + }, + { + "ce_ib": 5.86564826965332, + "ce_orig": 1.1684297323226929, + "epoch": 0.5300165360557912, + "kl_loss": 0.20978295803070068, + "loss_ib": 0.00796347763389349, + "step": 1843 + }, + { + "ce_ib": 3.580265760421753, + "ce_orig": 0.6890649795532227, + "epoch": 0.5300165360557912, + "kl_loss": 0.19464614987373352, + "loss_ib": 0.00552672753110528, + "step": 1843 + }, + { + "ce_ib": 7.2468085289001465, + "ce_orig": 1.008955478668213, + "epoch": 0.5303041196347689, + "kl_loss": 0.24262303113937378, + "loss_ib": 0.009673038497567177, + "step": 1844 + }, + { + "ce_ib": 4.100954055786133, + "ce_orig": 0.8103383779525757, + "epoch": 0.5303041196347689, + "kl_loss": 0.19310420751571655, + "loss_ib": 0.006031996104866266, + "step": 1844 + }, + { + "ce_ib": 6.187928676605225, + "ce_orig": 1.223270058631897, + "epoch": 0.5303041196347689, + "kl_loss": 0.2613314986228943, + "loss_ib": 0.008801243267953396, + "step": 1844 + }, + { + "ce_ib": 6.660463809967041, + "ce_orig": 0.7532047033309937, + "epoch": 0.5303041196347689, + "kl_loss": 0.19040045142173767, + "loss_ib": 0.00856446847319603, + "step": 1844 + }, + { + "epoch": 0.5305917032137465, + "grad_norm": 0.10593312233686447, + "learning_rate": 9.450089412344037e-06, + "loss": 0.8629, + "step": 1845 + }, + { + "ce_ib": 7.035682201385498, + "ce_orig": 1.2130488157272339, + "epoch": 0.5305917032137465, + "kl_loss": 0.16078200936317444, + "loss_ib": 0.008643501438200474, + "step": 1845 + }, + { + "ce_ib": 5.997169494628906, + "ce_orig": 0.7861976027488708, + "epoch": 0.5305917032137465, + "kl_loss": 0.1388709545135498, + "loss_ib": 0.007385878823697567, + "step": 1845 + }, + { + "ce_ib": 4.488471508026123, + "ce_orig": 0.9490336179733276, + "epoch": 0.5305917032137465, + "kl_loss": 0.15419700741767883, + "loss_ib": 0.006030441261827946, + "step": 1845 + }, + { + "ce_ib": 5.691738605499268, + "ce_orig": 1.1692591905593872, + "epoch": 0.5305917032137465, + "kl_loss": 0.2401736080646515, + "loss_ib": 0.008093475364148617, + "step": 1845 + }, + { + "ce_ib": 3.7242166996002197, + "ce_orig": 0.693812370300293, + "epoch": 0.5308792867927241, + "kl_loss": 0.2123832106590271, + "loss_ib": 0.005848048720508814, + "step": 1846 + }, + { + "ce_ib": 4.208561420440674, + "ce_orig": 0.7323001027107239, + "epoch": 0.5308792867927241, + "kl_loss": 0.15289628505706787, + "loss_ib": 0.005737524013966322, + "step": 1846 + }, + { + "ce_ib": 6.112689971923828, + "ce_orig": 0.7250316143035889, + "epoch": 0.5308792867927241, + "kl_loss": 0.1991797685623169, + "loss_ib": 0.00810448732227087, + "step": 1846 + }, + { + "ce_ib": 5.507354259490967, + "ce_orig": 0.6912445425987244, + "epoch": 0.5308792867927241, + "kl_loss": 0.2743017077445984, + "loss_ib": 0.008250370621681213, + "step": 1846 + }, + { + "ce_ib": 6.0127668380737305, + "ce_orig": 1.1777681112289429, + "epoch": 0.5311668703717017, + "kl_loss": 0.2775518596172333, + "loss_ib": 0.008788284845650196, + "step": 1847 + }, + { + "ce_ib": 8.435003280639648, + "ce_orig": 0.9789080619812012, + "epoch": 0.5311668703717017, + "kl_loss": 0.30630841851234436, + "loss_ib": 0.011498087085783482, + "step": 1847 + }, + { + "ce_ib": 4.990412712097168, + "ce_orig": 0.6420254707336426, + "epoch": 0.5311668703717017, + "kl_loss": 0.2548336088657379, + "loss_ib": 0.0075387489050626755, + "step": 1847 + }, + { + "ce_ib": 5.797047138214111, + "ce_orig": 1.0188409090042114, + "epoch": 0.5311668703717017, + "kl_loss": 0.3676277995109558, + "loss_ib": 0.009473324753344059, + "step": 1847 + }, + { + "ce_ib": 9.056893348693848, + "ce_orig": 0.5721787810325623, + "epoch": 0.5314544539506794, + "kl_loss": 0.20263320207595825, + "loss_ib": 0.011083225719630718, + "step": 1848 + }, + { + "ce_ib": 3.6427559852600098, + "ce_orig": 0.6390652060508728, + "epoch": 0.5314544539506794, + "kl_loss": 0.18275010585784912, + "loss_ib": 0.00547025678679347, + "step": 1848 + }, + { + "ce_ib": 4.470255374908447, + "ce_orig": 0.7787516117095947, + "epoch": 0.5314544539506794, + "kl_loss": 0.21285396814346313, + "loss_ib": 0.006598794832825661, + "step": 1848 + }, + { + "ce_ib": 5.499170780181885, + "ce_orig": 0.8165490031242371, + "epoch": 0.5314544539506794, + "kl_loss": 0.1710931807756424, + "loss_ib": 0.0072101023979485035, + "step": 1848 + }, + { + "ce_ib": 2.9710166454315186, + "ce_orig": 0.5027737617492676, + "epoch": 0.5317420375296571, + "kl_loss": 0.21074137091636658, + "loss_ib": 0.005078430287539959, + "step": 1849 + }, + { + "ce_ib": 8.172359466552734, + "ce_orig": 1.1063852310180664, + "epoch": 0.5317420375296571, + "kl_loss": 0.22217628359794617, + "loss_ib": 0.010394122451543808, + "step": 1849 + }, + { + "ce_ib": 2.164992094039917, + "ce_orig": 0.40636521577835083, + "epoch": 0.5317420375296571, + "kl_loss": 0.16406163573265076, + "loss_ib": 0.003805608255788684, + "step": 1849 + }, + { + "ce_ib": 5.142477035522461, + "ce_orig": 0.82965487241745, + "epoch": 0.5317420375296571, + "kl_loss": 0.29681843519210815, + "loss_ib": 0.008110661059617996, + "step": 1849 + }, + { + "epoch": 0.5320296211086347, + "grad_norm": 0.10571553558111191, + "learning_rate": 9.446545682139437e-06, + "loss": 0.8116, + "step": 1850 + }, + { + "ce_ib": 3.0195400714874268, + "ce_orig": 0.5976029634475708, + "epoch": 0.5320296211086347, + "kl_loss": 0.24145764112472534, + "loss_ib": 0.005434115882962942, + "step": 1850 + }, + { + "ce_ib": 7.33281135559082, + "ce_orig": 0.9514852166175842, + "epoch": 0.5320296211086347, + "kl_loss": 0.30109816789627075, + "loss_ib": 0.010343791916966438, + "step": 1850 + }, + { + "ce_ib": 6.971595764160156, + "ce_orig": 0.744994580745697, + "epoch": 0.5320296211086347, + "kl_loss": 0.2107081413269043, + "loss_ib": 0.00907867681235075, + "step": 1850 + }, + { + "ce_ib": 4.410055160522461, + "ce_orig": 0.9290229678153992, + "epoch": 0.5320296211086347, + "kl_loss": 0.27106451988220215, + "loss_ib": 0.007120700087398291, + "step": 1850 + }, + { + "ce_ib": 7.343533039093018, + "ce_orig": 0.6487832069396973, + "epoch": 0.5323172046876123, + "kl_loss": 0.30970144271850586, + "loss_ib": 0.01044054701924324, + "step": 1851 + }, + { + "ce_ib": 2.6211395263671875, + "ce_orig": 0.3374985158443451, + "epoch": 0.5323172046876123, + "kl_loss": 0.18458132445812225, + "loss_ib": 0.004466952756047249, + "step": 1851 + }, + { + "ce_ib": 3.530855417251587, + "ce_orig": 0.7399183511734009, + "epoch": 0.5323172046876123, + "kl_loss": 0.22764435410499573, + "loss_ib": 0.005807298701256514, + "step": 1851 + }, + { + "ce_ib": 6.120885372161865, + "ce_orig": 0.6404238939285278, + "epoch": 0.5323172046876123, + "kl_loss": 0.2242920696735382, + "loss_ib": 0.008363805711269379, + "step": 1851 + }, + { + "ce_ib": 4.473357677459717, + "ce_orig": 0.23305130004882812, + "epoch": 0.53260478826659, + "kl_loss": 0.20445549488067627, + "loss_ib": 0.0065179127268493176, + "step": 1852 + }, + { + "ce_ib": 10.160700798034668, + "ce_orig": 1.1857593059539795, + "epoch": 0.53260478826659, + "kl_loss": 0.23605488240718842, + "loss_ib": 0.012521250173449516, + "step": 1852 + }, + { + "ce_ib": 3.499321222305298, + "ce_orig": 0.6823559403419495, + "epoch": 0.53260478826659, + "kl_loss": 0.2032666653394699, + "loss_ib": 0.0055319881066679955, + "step": 1852 + }, + { + "ce_ib": 7.681511878967285, + "ce_orig": 1.04057776927948, + "epoch": 0.53260478826659, + "kl_loss": 0.21528491377830505, + "loss_ib": 0.009834361262619495, + "step": 1852 + }, + { + "ce_ib": 4.359706401824951, + "ce_orig": 0.7192271947860718, + "epoch": 0.5328923718455676, + "kl_loss": 0.24812665581703186, + "loss_ib": 0.006840972695499659, + "step": 1853 + }, + { + "ce_ib": 3.1339895725250244, + "ce_orig": 0.810313880443573, + "epoch": 0.5328923718455676, + "kl_loss": 0.15557479858398438, + "loss_ib": 0.004689737223088741, + "step": 1853 + }, + { + "ce_ib": 3.581479549407959, + "ce_orig": 0.8280433416366577, + "epoch": 0.5328923718455676, + "kl_loss": 0.2017989158630371, + "loss_ib": 0.005599468946456909, + "step": 1853 + }, + { + "ce_ib": 4.026859283447266, + "ce_orig": 0.32227322459220886, + "epoch": 0.5328923718455676, + "kl_loss": 0.23444557189941406, + "loss_ib": 0.006371315103024244, + "step": 1853 + }, + { + "ce_ib": 4.264099597930908, + "ce_orig": 0.7270826101303101, + "epoch": 0.5331799554245452, + "kl_loss": 0.2571170926094055, + "loss_ib": 0.006835270207375288, + "step": 1854 + }, + { + "ce_ib": 3.458313226699829, + "ce_orig": 0.503343403339386, + "epoch": 0.5331799554245452, + "kl_loss": 0.1853717863559723, + "loss_ib": 0.005312031600624323, + "step": 1854 + }, + { + "ce_ib": 4.385552883148193, + "ce_orig": 0.643458902835846, + "epoch": 0.5331799554245452, + "kl_loss": 0.19659477472305298, + "loss_ib": 0.006351500749588013, + "step": 1854 + }, + { + "ce_ib": 4.395240783691406, + "ce_orig": 0.740811824798584, + "epoch": 0.5331799554245452, + "kl_loss": 0.16096997261047363, + "loss_ib": 0.006004940252751112, + "step": 1854 + }, + { + "epoch": 0.533467539003523, + "grad_norm": 0.12044809013605118, + "learning_rate": 9.442991239174225e-06, + "loss": 0.8133, + "step": 1855 + }, + { + "ce_ib": 6.274482727050781, + "ce_orig": 1.3136651515960693, + "epoch": 0.533467539003523, + "kl_loss": 0.2152913212776184, + "loss_ib": 0.008427395485341549, + "step": 1855 + }, + { + "ce_ib": 2.4021594524383545, + "ce_orig": 0.3267451822757721, + "epoch": 0.533467539003523, + "kl_loss": 0.19621196389198303, + "loss_ib": 0.004364278633147478, + "step": 1855 + }, + { + "ce_ib": 5.595287322998047, + "ce_orig": 0.7406966090202332, + "epoch": 0.533467539003523, + "kl_loss": 0.20382627844810486, + "loss_ib": 0.007633550092577934, + "step": 1855 + }, + { + "ce_ib": 6.821021556854248, + "ce_orig": 1.3345282077789307, + "epoch": 0.533467539003523, + "kl_loss": 0.22391046583652496, + "loss_ib": 0.009060125797986984, + "step": 1855 + }, + { + "ce_ib": 4.37741756439209, + "ce_orig": 0.5070826411247253, + "epoch": 0.5337551225825006, + "kl_loss": 0.1709289848804474, + "loss_ib": 0.00608670711517334, + "step": 1856 + }, + { + "ce_ib": 5.665239334106445, + "ce_orig": 0.543302595615387, + "epoch": 0.5337551225825006, + "kl_loss": 0.4002218246459961, + "loss_ib": 0.009667458012700081, + "step": 1856 + }, + { + "ce_ib": 10.248526573181152, + "ce_orig": 1.6775959730148315, + "epoch": 0.5337551225825006, + "kl_loss": 0.2522523105144501, + "loss_ib": 0.01277104951441288, + "step": 1856 + }, + { + "ce_ib": 4.3051228523254395, + "ce_orig": 0.6298352479934692, + "epoch": 0.5337551225825006, + "kl_loss": 0.30425626039505005, + "loss_ib": 0.007347684819251299, + "step": 1856 + }, + { + "ce_ib": 3.970698356628418, + "ce_orig": 0.5867936015129089, + "epoch": 0.5340427061614782, + "kl_loss": 0.17628365755081177, + "loss_ib": 0.005733535159379244, + "step": 1857 + }, + { + "ce_ib": 4.260129928588867, + "ce_orig": 0.704187273979187, + "epoch": 0.5340427061614782, + "kl_loss": 0.2587973475456238, + "loss_ib": 0.006848103366792202, + "step": 1857 + }, + { + "ce_ib": 4.6300554275512695, + "ce_orig": 0.6545487642288208, + "epoch": 0.5340427061614782, + "kl_loss": 0.3494999408721924, + "loss_ib": 0.008125054650008678, + "step": 1857 + }, + { + "ce_ib": 4.991218090057373, + "ce_orig": 0.862577497959137, + "epoch": 0.5340427061614782, + "kl_loss": 0.23879991471767426, + "loss_ib": 0.0073792170733213425, + "step": 1857 + }, + { + "ce_ib": 7.007440090179443, + "ce_orig": 1.259676456451416, + "epoch": 0.5343302897404558, + "kl_loss": 0.23333919048309326, + "loss_ib": 0.009340832009911537, + "step": 1858 + }, + { + "ce_ib": 5.029625415802002, + "ce_orig": 0.9573800563812256, + "epoch": 0.5343302897404558, + "kl_loss": 0.24174338579177856, + "loss_ib": 0.00744705880060792, + "step": 1858 + }, + { + "ce_ib": 4.438313961029053, + "ce_orig": 0.7793439626693726, + "epoch": 0.5343302897404558, + "kl_loss": 0.1766584813594818, + "loss_ib": 0.006204898934811354, + "step": 1858 + }, + { + "ce_ib": 5.21489953994751, + "ce_orig": 0.5086827874183655, + "epoch": 0.5343302897404558, + "kl_loss": 0.3779343366622925, + "loss_ib": 0.00899424310773611, + "step": 1858 + }, + { + "ce_ib": 4.461227893829346, + "ce_orig": 0.7233249545097351, + "epoch": 0.5346178733194334, + "kl_loss": 0.14038971066474915, + "loss_ib": 0.005865124985575676, + "step": 1859 + }, + { + "ce_ib": 8.790775299072266, + "ce_orig": 1.4900492429733276, + "epoch": 0.5346178733194334, + "kl_loss": 0.3974230885505676, + "loss_ib": 0.012765007093548775, + "step": 1859 + }, + { + "ce_ib": 4.1817545890808105, + "ce_orig": 0.3852037489414215, + "epoch": 0.5346178733194334, + "kl_loss": 0.23326320946216583, + "loss_ib": 0.006514386273920536, + "step": 1859 + }, + { + "ce_ib": 5.260295391082764, + "ce_orig": 0.434694766998291, + "epoch": 0.5346178733194334, + "kl_loss": 0.19891120493412018, + "loss_ib": 0.0072494070045650005, + "step": 1859 + }, + { + "epoch": 0.5349054568984111, + "grad_norm": 0.1161816194653511, + "learning_rate": 9.439426092011877e-06, + "loss": 0.8398, + "step": 1860 + }, + { + "ce_ib": 4.339866638183594, + "ce_orig": 0.7001444101333618, + "epoch": 0.5349054568984111, + "kl_loss": 0.2064824402332306, + "loss_ib": 0.006404690444469452, + "step": 1860 + }, + { + "ce_ib": 3.5802195072174072, + "ce_orig": 0.6469123959541321, + "epoch": 0.5349054568984111, + "kl_loss": 0.2189090996980667, + "loss_ib": 0.0057693105190992355, + "step": 1860 + }, + { + "ce_ib": 6.698261737823486, + "ce_orig": 1.117401123046875, + "epoch": 0.5349054568984111, + "kl_loss": 0.20926931500434875, + "loss_ib": 0.008790954947471619, + "step": 1860 + }, + { + "ce_ib": 6.679226875305176, + "ce_orig": 0.4575742185115814, + "epoch": 0.5349054568984111, + "kl_loss": 0.4343309998512268, + "loss_ib": 0.0110225360840559, + "step": 1860 + }, + { + "ce_ib": 5.680418968200684, + "ce_orig": 1.021546483039856, + "epoch": 0.5351930404773887, + "kl_loss": 0.23155631124973297, + "loss_ib": 0.007995981723070145, + "step": 1861 + }, + { + "ce_ib": 3.0201821327209473, + "ce_orig": 0.5457724928855896, + "epoch": 0.5351930404773887, + "kl_loss": 0.21453796327114105, + "loss_ib": 0.005165562033653259, + "step": 1861 + }, + { + "ce_ib": 4.915546417236328, + "ce_orig": 0.7357434034347534, + "epoch": 0.5351930404773887, + "kl_loss": 0.7311533689498901, + "loss_ib": 0.012227079831063747, + "step": 1861 + }, + { + "ce_ib": 5.287921905517578, + "ce_orig": 0.5148226618766785, + "epoch": 0.5351930404773887, + "kl_loss": 0.20094850659370422, + "loss_ib": 0.007297407370060682, + "step": 1861 + }, + { + "ce_ib": 6.0569305419921875, + "ce_orig": 0.9353012442588806, + "epoch": 0.5354806240563664, + "kl_loss": 0.19924458861351013, + "loss_ib": 0.008049375377595425, + "step": 1862 + }, + { + "ce_ib": 6.241096019744873, + "ce_orig": 1.3277326822280884, + "epoch": 0.5354806240563664, + "kl_loss": 0.3990367352962494, + "loss_ib": 0.010231463238596916, + "step": 1862 + }, + { + "ce_ib": 4.2260613441467285, + "ce_orig": 0.5645077228546143, + "epoch": 0.5354806240563664, + "kl_loss": 0.22848635911941528, + "loss_ib": 0.006510925013571978, + "step": 1862 + }, + { + "ce_ib": 4.25538444519043, + "ce_orig": 0.8132855296134949, + "epoch": 0.5354806240563664, + "kl_loss": 0.2353193461894989, + "loss_ib": 0.006608577910810709, + "step": 1862 + }, + { + "ce_ib": 4.660745620727539, + "ce_orig": 0.8753987550735474, + "epoch": 0.535768207635344, + "kl_loss": 0.16747131943702698, + "loss_ib": 0.006335458718240261, + "step": 1863 + }, + { + "ce_ib": 3.7566816806793213, + "ce_orig": 0.5486100316047668, + "epoch": 0.535768207635344, + "kl_loss": 0.2130318135023117, + "loss_ib": 0.005886999890208244, + "step": 1863 + }, + { + "ce_ib": 4.214888572692871, + "ce_orig": 0.8790446519851685, + "epoch": 0.535768207635344, + "kl_loss": 0.19534213840961456, + "loss_ib": 0.006168310064822435, + "step": 1863 + }, + { + "ce_ib": 6.063351631164551, + "ce_orig": 0.9974119067192078, + "epoch": 0.535768207635344, + "kl_loss": 0.19461339712142944, + "loss_ib": 0.008009484969079494, + "step": 1863 + }, + { + "ce_ib": 7.536983966827393, + "ce_orig": 1.4408035278320312, + "epoch": 0.5360557912143217, + "kl_loss": 0.32511430978775024, + "loss_ib": 0.010788126848638058, + "step": 1864 + }, + { + "ce_ib": 3.6541574001312256, + "ce_orig": 0.504055917263031, + "epoch": 0.5360557912143217, + "kl_loss": 0.16534735262393951, + "loss_ib": 0.0053076306357979774, + "step": 1864 + }, + { + "ce_ib": 4.483316898345947, + "ce_orig": 0.666479229927063, + "epoch": 0.5360557912143217, + "kl_loss": 0.15484187006950378, + "loss_ib": 0.006031735334545374, + "step": 1864 + }, + { + "ce_ib": 5.627042770385742, + "ce_orig": 1.2612030506134033, + "epoch": 0.5360557912143217, + "kl_loss": 0.20702578127384186, + "loss_ib": 0.007697300054132938, + "step": 1864 + }, + { + "epoch": 0.5363433747932993, + "grad_norm": 0.14977069199085236, + "learning_rate": 9.435850249241661e-06, + "loss": 0.8601, + "step": 1865 + }, + { + "ce_ib": 3.3860867023468018, + "ce_orig": 0.5201670527458191, + "epoch": 0.5363433747932993, + "kl_loss": 0.1453399956226349, + "loss_ib": 0.004839486442506313, + "step": 1865 + }, + { + "ce_ib": 5.578195095062256, + "ce_orig": 0.6977161169052124, + "epoch": 0.5363433747932993, + "kl_loss": 0.19398577511310577, + "loss_ib": 0.00751805305480957, + "step": 1865 + }, + { + "ce_ib": 4.881834506988525, + "ce_orig": 0.8882226347923279, + "epoch": 0.5363433747932993, + "kl_loss": 0.46950674057006836, + "loss_ib": 0.00957690179347992, + "step": 1865 + }, + { + "ce_ib": 5.812175273895264, + "ce_orig": 0.8029134273529053, + "epoch": 0.5363433747932993, + "kl_loss": 0.2707087993621826, + "loss_ib": 0.008519263006746769, + "step": 1865 + }, + { + "ce_ib": 8.659444808959961, + "ce_orig": 1.537245512008667, + "epoch": 0.5366309583722769, + "kl_loss": 0.22481295466423035, + "loss_ib": 0.01090757455676794, + "step": 1866 + }, + { + "ce_ib": 6.766580104827881, + "ce_orig": 0.9183495044708252, + "epoch": 0.5366309583722769, + "kl_loss": 0.22349092364311218, + "loss_ib": 0.009001489728689194, + "step": 1866 + }, + { + "ce_ib": 8.252838134765625, + "ce_orig": 0.9894993305206299, + "epoch": 0.5366309583722769, + "kl_loss": 0.15923966467380524, + "loss_ib": 0.009845234453678131, + "step": 1866 + }, + { + "ce_ib": 9.690625190734863, + "ce_orig": 1.1797600984573364, + "epoch": 0.5366309583722769, + "kl_loss": 0.21562716364860535, + "loss_ib": 0.011846896260976791, + "step": 1866 + }, + { + "ce_ib": 6.657962322235107, + "ce_orig": 1.0763212442398071, + "epoch": 0.5369185419512545, + "kl_loss": 0.1339656114578247, + "loss_ib": 0.007997618056833744, + "step": 1867 + }, + { + "ce_ib": 9.438556671142578, + "ce_orig": 1.5125812292099, + "epoch": 0.5369185419512545, + "kl_loss": 0.17226141691207886, + "loss_ib": 0.011161170899868011, + "step": 1867 + }, + { + "ce_ib": 3.83799409866333, + "ce_orig": 0.6082793474197388, + "epoch": 0.5369185419512545, + "kl_loss": 0.17104685306549072, + "loss_ib": 0.005548462737351656, + "step": 1867 + }, + { + "ce_ib": 4.337790489196777, + "ce_orig": 0.8539470434188843, + "epoch": 0.5369185419512545, + "kl_loss": 0.15344543755054474, + "loss_ib": 0.005872244480997324, + "step": 1867 + }, + { + "ce_ib": 4.979361534118652, + "ce_orig": 1.101728081703186, + "epoch": 0.5372061255302322, + "kl_loss": 0.1728961169719696, + "loss_ib": 0.006708322558552027, + "step": 1868 + }, + { + "ce_ib": 5.25793981552124, + "ce_orig": 0.8807792663574219, + "epoch": 0.5372061255302322, + "kl_loss": 0.1700776368379593, + "loss_ib": 0.006958715617656708, + "step": 1868 + }, + { + "ce_ib": 6.643743991851807, + "ce_orig": 1.299357533454895, + "epoch": 0.5372061255302322, + "kl_loss": 0.1740136742591858, + "loss_ib": 0.008383880369365215, + "step": 1868 + }, + { + "ce_ib": 6.947753429412842, + "ce_orig": 1.0708379745483398, + "epoch": 0.5372061255302322, + "kl_loss": 0.34942391514778137, + "loss_ib": 0.010441992431879044, + "step": 1868 + }, + { + "ce_ib": 4.8975443840026855, + "ce_orig": 0.6301613450050354, + "epoch": 0.5374937091092099, + "kl_loss": 0.18357506394386292, + "loss_ib": 0.006733294576406479, + "step": 1869 + }, + { + "ce_ib": 6.845427513122559, + "ce_orig": 0.771119236946106, + "epoch": 0.5374937091092099, + "kl_loss": 0.17592459917068481, + "loss_ib": 0.00860467366874218, + "step": 1869 + }, + { + "ce_ib": 5.129888534545898, + "ce_orig": 1.204761266708374, + "epoch": 0.5374937091092099, + "kl_loss": 0.11413148045539856, + "loss_ib": 0.006271203514188528, + "step": 1869 + }, + { + "ce_ib": 8.118036270141602, + "ce_orig": 1.9563100337982178, + "epoch": 0.5374937091092099, + "kl_loss": 0.7197785973548889, + "loss_ib": 0.015315822325646877, + "step": 1869 + }, + { + "epoch": 0.5377812926881875, + "grad_norm": 0.15173690021038055, + "learning_rate": 9.432263719478611e-06, + "loss": 0.8979, + "step": 1870 + }, + { + "ce_ib": 2.8820931911468506, + "ce_orig": 0.6525115370750427, + "epoch": 0.5377812926881875, + "kl_loss": 0.17920103669166565, + "loss_ib": 0.004674103576689959, + "step": 1870 + }, + { + "ce_ib": 8.007864952087402, + "ce_orig": 0.8055094480514526, + "epoch": 0.5377812926881875, + "kl_loss": 0.31373703479766846, + "loss_ib": 0.011145235039293766, + "step": 1870 + }, + { + "ce_ib": 5.944539546966553, + "ce_orig": 0.7078796029090881, + "epoch": 0.5377812926881875, + "kl_loss": 0.24224576354026794, + "loss_ib": 0.008366997353732586, + "step": 1870 + }, + { + "ce_ib": 9.522847175598145, + "ce_orig": 1.4455835819244385, + "epoch": 0.5377812926881875, + "kl_loss": 0.19860181212425232, + "loss_ib": 0.011508865281939507, + "step": 1870 + }, + { + "ce_ib": 3.601259469985962, + "ce_orig": 0.5386037230491638, + "epoch": 0.5380688762671652, + "kl_loss": 0.14678509533405304, + "loss_ib": 0.0050691100768744946, + "step": 1871 + }, + { + "ce_ib": 3.1088805198669434, + "ce_orig": 0.7355481386184692, + "epoch": 0.5380688762671652, + "kl_loss": 0.1154508888721466, + "loss_ib": 0.004263389389961958, + "step": 1871 + }, + { + "ce_ib": 6.967272758483887, + "ce_orig": 1.1396918296813965, + "epoch": 0.5380688762671652, + "kl_loss": 0.268510639667511, + "loss_ib": 0.009652378968894482, + "step": 1871 + }, + { + "ce_ib": 3.3585612773895264, + "ce_orig": 0.6488027572631836, + "epoch": 0.5380688762671652, + "kl_loss": 0.26048383116722107, + "loss_ib": 0.005963399074971676, + "step": 1871 + }, + { + "ce_ib": 2.642223596572876, + "ce_orig": 0.4931933283805847, + "epoch": 0.5383564598461428, + "kl_loss": 0.2122310996055603, + "loss_ib": 0.004764534533023834, + "step": 1872 + }, + { + "ce_ib": 5.761163711547852, + "ce_orig": 0.9917870759963989, + "epoch": 0.5383564598461428, + "kl_loss": 0.20570719242095947, + "loss_ib": 0.007818236015737057, + "step": 1872 + }, + { + "ce_ib": 7.926612854003906, + "ce_orig": 1.3921815156936646, + "epoch": 0.5383564598461428, + "kl_loss": 0.210135817527771, + "loss_ib": 0.010027971118688583, + "step": 1872 + }, + { + "ce_ib": 1.9567288160324097, + "ce_orig": 0.2648179531097412, + "epoch": 0.5383564598461428, + "kl_loss": 0.41877481341362, + "loss_ib": 0.006144477054476738, + "step": 1872 + }, + { + "ce_ib": 5.836376667022705, + "ce_orig": 0.9933511018753052, + "epoch": 0.5386440434251204, + "kl_loss": 0.27552711963653564, + "loss_ib": 0.008591647259891033, + "step": 1873 + }, + { + "ce_ib": 7.745918273925781, + "ce_orig": 1.4708333015441895, + "epoch": 0.5386440434251204, + "kl_loss": 0.33777713775634766, + "loss_ib": 0.011123690754175186, + "step": 1873 + }, + { + "ce_ib": 5.4888081550598145, + "ce_orig": 1.2457603216171265, + "epoch": 0.5386440434251204, + "kl_loss": 0.21348831057548523, + "loss_ib": 0.007623691577464342, + "step": 1873 + }, + { + "ce_ib": 5.6587605476379395, + "ce_orig": 0.7417976260185242, + "epoch": 0.5386440434251204, + "kl_loss": 0.27924901247024536, + "loss_ib": 0.00845125038176775, + "step": 1873 + }, + { + "ce_ib": 6.356706619262695, + "ce_orig": 0.9084938168525696, + "epoch": 0.538931627004098, + "kl_loss": 0.2768899202346802, + "loss_ib": 0.00912560522556305, + "step": 1874 + }, + { + "ce_ib": 8.2174072265625, + "ce_orig": 1.7141512632369995, + "epoch": 0.538931627004098, + "kl_loss": 0.13865156471729279, + "loss_ib": 0.009603923186659813, + "step": 1874 + }, + { + "ce_ib": 5.935986042022705, + "ce_orig": 0.8549964427947998, + "epoch": 0.538931627004098, + "kl_loss": 0.13466152548789978, + "loss_ib": 0.007282601203769445, + "step": 1874 + }, + { + "ce_ib": 5.00381326675415, + "ce_orig": 0.6767867207527161, + "epoch": 0.538931627004098, + "kl_loss": 0.3218216300010681, + "loss_ib": 0.00822202954441309, + "step": 1874 + }, + { + "epoch": 0.5392192105830758, + "grad_norm": 0.13676752150058746, + "learning_rate": 9.428666511363511e-06, + "loss": 0.8618, + "step": 1875 + }, + { + "ce_ib": 3.0487828254699707, + "ce_orig": 0.5314833521842957, + "epoch": 0.5392192105830758, + "kl_loss": 0.17765596508979797, + "loss_ib": 0.004825342446565628, + "step": 1875 + }, + { + "ce_ib": 7.444401264190674, + "ce_orig": 0.8767432570457458, + "epoch": 0.5392192105830758, + "kl_loss": 0.2486288845539093, + "loss_ib": 0.009930690750479698, + "step": 1875 + }, + { + "ce_ib": 2.8306288719177246, + "ce_orig": 0.44761285185813904, + "epoch": 0.5392192105830758, + "kl_loss": 0.10526077449321747, + "loss_ib": 0.0038832365535199642, + "step": 1875 + }, + { + "ce_ib": 3.7552425861358643, + "ce_orig": 0.7788964509963989, + "epoch": 0.5392192105830758, + "kl_loss": 0.18940740823745728, + "loss_ib": 0.0056493161246180534, + "step": 1875 + }, + { + "ce_ib": 5.244472503662109, + "ce_orig": 0.7166180610656738, + "epoch": 0.5395067941620534, + "kl_loss": 0.23369517922401428, + "loss_ib": 0.007581424433737993, + "step": 1876 + }, + { + "ce_ib": 7.7227253913879395, + "ce_orig": 1.4851809740066528, + "epoch": 0.5395067941620534, + "kl_loss": 0.15310481190681458, + "loss_ib": 0.009253773838281631, + "step": 1876 + }, + { + "ce_ib": 3.0689175128936768, + "ce_orig": 0.6693606376647949, + "epoch": 0.5395067941620534, + "kl_loss": 0.1706458032131195, + "loss_ib": 0.004775375593453646, + "step": 1876 + }, + { + "ce_ib": 6.795139312744141, + "ce_orig": 1.3710235357284546, + "epoch": 0.5395067941620534, + "kl_loss": 0.19929218292236328, + "loss_ib": 0.008788061328232288, + "step": 1876 + }, + { + "ce_ib": 5.0664448738098145, + "ce_orig": 0.5143498182296753, + "epoch": 0.539794377741031, + "kl_loss": 0.42177170515060425, + "loss_ib": 0.00928416196256876, + "step": 1877 + }, + { + "ce_ib": 7.223611354827881, + "ce_orig": 1.1241228580474854, + "epoch": 0.539794377741031, + "kl_loss": 0.20792663097381592, + "loss_ib": 0.009302877821028233, + "step": 1877 + }, + { + "ce_ib": 6.8414626121521, + "ce_orig": 0.7621123194694519, + "epoch": 0.539794377741031, + "kl_loss": 0.31143611669540405, + "loss_ib": 0.009955823421478271, + "step": 1877 + }, + { + "ce_ib": 6.14713716506958, + "ce_orig": 1.0838134288787842, + "epoch": 0.539794377741031, + "kl_loss": 0.18095140159130096, + "loss_ib": 0.007956651039421558, + "step": 1877 + }, + { + "ce_ib": 3.394634485244751, + "ce_orig": 0.47664836049079895, + "epoch": 0.5400819613200086, + "kl_loss": 0.30461785197257996, + "loss_ib": 0.006440812721848488, + "step": 1878 + }, + { + "ce_ib": 3.8926734924316406, + "ce_orig": 0.5368797779083252, + "epoch": 0.5400819613200086, + "kl_loss": 0.19092890620231628, + "loss_ib": 0.005801962688565254, + "step": 1878 + }, + { + "ce_ib": 6.97349214553833, + "ce_orig": 1.6624675989151, + "epoch": 0.5400819613200086, + "kl_loss": 0.21015450358390808, + "loss_ib": 0.009075037203729153, + "step": 1878 + }, + { + "ce_ib": 5.310675621032715, + "ce_orig": 1.0094043016433716, + "epoch": 0.5400819613200086, + "kl_loss": 0.22270002961158752, + "loss_ib": 0.0075376760214567184, + "step": 1878 + }, + { + "ce_ib": 4.8102874755859375, + "ce_orig": 0.8906371593475342, + "epoch": 0.5403695448989863, + "kl_loss": 0.2329912781715393, + "loss_ib": 0.0071402001194655895, + "step": 1879 + }, + { + "ce_ib": 7.846437931060791, + "ce_orig": 1.5362975597381592, + "epoch": 0.5403695448989863, + "kl_loss": 0.2593178153038025, + "loss_ib": 0.0104396166279912, + "step": 1879 + }, + { + "ce_ib": 6.457812309265137, + "ce_orig": 1.4550244808197021, + "epoch": 0.5403695448989863, + "kl_loss": 0.2090592086315155, + "loss_ib": 0.008548404090106487, + "step": 1879 + }, + { + "ce_ib": 4.535857200622559, + "ce_orig": 0.6384449601173401, + "epoch": 0.5403695448989863, + "kl_loss": 0.25717389583587646, + "loss_ib": 0.007107596378773451, + "step": 1879 + }, + { + "epoch": 0.5406571284779639, + "grad_norm": 0.11576619744300842, + "learning_rate": 9.42505863356287e-06, + "loss": 0.8797, + "step": 1880 + }, + { + "ce_ib": 3.876596450805664, + "ce_orig": 0.6871512532234192, + "epoch": 0.5406571284779639, + "kl_loss": 0.25499510765075684, + "loss_ib": 0.006426547653973103, + "step": 1880 + }, + { + "ce_ib": 4.118466377258301, + "ce_orig": 0.6281008124351501, + "epoch": 0.5406571284779639, + "kl_loss": 0.23545455932617188, + "loss_ib": 0.006473011802881956, + "step": 1880 + }, + { + "ce_ib": 5.587640762329102, + "ce_orig": 1.1079524755477905, + "epoch": 0.5406571284779639, + "kl_loss": 0.17437410354614258, + "loss_ib": 0.007331382017582655, + "step": 1880 + }, + { + "ce_ib": 6.028123378753662, + "ce_orig": 0.6316193342208862, + "epoch": 0.5406571284779639, + "kl_loss": 0.27316561341285706, + "loss_ib": 0.00875977985560894, + "step": 1880 + }, + { + "ce_ib": 6.767507076263428, + "ce_orig": 1.2337112426757812, + "epoch": 0.5409447120569415, + "kl_loss": 0.37158188223838806, + "loss_ib": 0.010483325459063053, + "step": 1881 + }, + { + "ce_ib": 3.529797315597534, + "ce_orig": 0.7312442660331726, + "epoch": 0.5409447120569415, + "kl_loss": 0.2754474878311157, + "loss_ib": 0.00628427229821682, + "step": 1881 + }, + { + "ce_ib": 4.150570392608643, + "ce_orig": 0.6519814133644104, + "epoch": 0.5409447120569415, + "kl_loss": 0.12673813104629517, + "loss_ib": 0.005417951382696629, + "step": 1881 + }, + { + "ce_ib": 7.97968864440918, + "ce_orig": 1.4256978034973145, + "epoch": 0.5409447120569415, + "kl_loss": 0.46767693758010864, + "loss_ib": 0.012656456790864468, + "step": 1881 + }, + { + "ce_ib": 4.163929462432861, + "ce_orig": 0.6977522373199463, + "epoch": 0.5412322956359192, + "kl_loss": 0.22474998235702515, + "loss_ib": 0.0064114294946193695, + "step": 1882 + }, + { + "ce_ib": 6.889791965484619, + "ce_orig": 1.1969032287597656, + "epoch": 0.5412322956359192, + "kl_loss": 0.33730947971343994, + "loss_ib": 0.010262886993587017, + "step": 1882 + }, + { + "ce_ib": 2.566894769668579, + "ce_orig": 0.4599597454071045, + "epoch": 0.5412322956359192, + "kl_loss": 0.11533834040164948, + "loss_ib": 0.003720278153195977, + "step": 1882 + }, + { + "ce_ib": 6.130898475646973, + "ce_orig": 1.097641110420227, + "epoch": 0.5412322956359192, + "kl_loss": 0.2714581787586212, + "loss_ib": 0.008845480158925056, + "step": 1882 + }, + { + "ce_ib": 5.6272687911987305, + "ce_orig": 0.811035692691803, + "epoch": 0.5415198792148969, + "kl_loss": 0.406782329082489, + "loss_ib": 0.009695092216134071, + "step": 1883 + }, + { + "ce_ib": 4.359681129455566, + "ce_orig": 0.6566734910011292, + "epoch": 0.5415198792148969, + "kl_loss": 0.2520410418510437, + "loss_ib": 0.00688009150326252, + "step": 1883 + }, + { + "ce_ib": 5.818939685821533, + "ce_orig": 0.9224424362182617, + "epoch": 0.5415198792148969, + "kl_loss": 0.2999403476715088, + "loss_ib": 0.00881834328174591, + "step": 1883 + }, + { + "ce_ib": 4.2311506271362305, + "ce_orig": 0.3763723075389862, + "epoch": 0.5415198792148969, + "kl_loss": 0.2320677787065506, + "loss_ib": 0.006551828235387802, + "step": 1883 + }, + { + "ce_ib": 3.7210848331451416, + "ce_orig": 0.830406129360199, + "epoch": 0.5418074627938745, + "kl_loss": 0.22216445207595825, + "loss_ib": 0.005942729767411947, + "step": 1884 + }, + { + "ce_ib": 5.938092231750488, + "ce_orig": 1.2687435150146484, + "epoch": 0.5418074627938745, + "kl_loss": 0.21658417582511902, + "loss_ib": 0.008103934116661549, + "step": 1884 + }, + { + "ce_ib": 4.062071323394775, + "ce_orig": 0.8783230781555176, + "epoch": 0.5418074627938745, + "kl_loss": 0.15394330024719238, + "loss_ib": 0.005601504351943731, + "step": 1884 + }, + { + "ce_ib": 5.388051509857178, + "ce_orig": 0.7404717206954956, + "epoch": 0.5418074627938745, + "kl_loss": 0.14342042803764343, + "loss_ib": 0.006822255905717611, + "step": 1884 + }, + { + "epoch": 0.5420950463728521, + "grad_norm": 0.1313483566045761, + "learning_rate": 9.421440094768903e-06, + "loss": 0.8407, + "step": 1885 + }, + { + "ce_ib": 3.99212384223938, + "ce_orig": 0.6558020114898682, + "epoch": 0.5420950463728521, + "kl_loss": 0.20062603056430817, + "loss_ib": 0.005998384207487106, + "step": 1885 + }, + { + "ce_ib": 5.2277727127075195, + "ce_orig": 0.8348150253295898, + "epoch": 0.5420950463728521, + "kl_loss": 0.20737120509147644, + "loss_ib": 0.0073014842346310616, + "step": 1885 + }, + { + "ce_ib": 3.134458541870117, + "ce_orig": 0.5520787239074707, + "epoch": 0.5420950463728521, + "kl_loss": 0.18166503310203552, + "loss_ib": 0.00495110871270299, + "step": 1885 + }, + { + "ce_ib": 4.72963285446167, + "ce_orig": 0.7465882897377014, + "epoch": 0.5420950463728521, + "kl_loss": 0.4079294204711914, + "loss_ib": 0.008808927610516548, + "step": 1885 + }, + { + "ce_ib": 5.483861446380615, + "ce_orig": 1.0649985074996948, + "epoch": 0.5423826299518297, + "kl_loss": 0.27285709977149963, + "loss_ib": 0.008212432265281677, + "step": 1886 + }, + { + "ce_ib": 4.002931118011475, + "ce_orig": 0.6592392921447754, + "epoch": 0.5423826299518297, + "kl_loss": 0.1890222281217575, + "loss_ib": 0.005893153604120016, + "step": 1886 + }, + { + "ce_ib": 4.448903560638428, + "ce_orig": 0.8768444657325745, + "epoch": 0.5423826299518297, + "kl_loss": 0.14126378297805786, + "loss_ib": 0.005861540790647268, + "step": 1886 + }, + { + "ce_ib": 6.241933345794678, + "ce_orig": 1.0336610078811646, + "epoch": 0.5423826299518297, + "kl_loss": 0.2534750699996948, + "loss_ib": 0.008776684291660786, + "step": 1886 + }, + { + "ce_ib": 3.197049856185913, + "ce_orig": 0.8099791407585144, + "epoch": 0.5426702135308074, + "kl_loss": 0.1748722493648529, + "loss_ib": 0.004945772234350443, + "step": 1887 + }, + { + "ce_ib": 4.192432403564453, + "ce_orig": 0.7689682841300964, + "epoch": 0.5426702135308074, + "kl_loss": 0.19127227365970612, + "loss_ib": 0.0061051552183926105, + "step": 1887 + }, + { + "ce_ib": 9.011332511901855, + "ce_orig": 1.5508743524551392, + "epoch": 0.5426702135308074, + "kl_loss": 0.18881624937057495, + "loss_ib": 0.010899494402110577, + "step": 1887 + }, + { + "ce_ib": 2.7345058917999268, + "ce_orig": 0.6141093373298645, + "epoch": 0.5426702135308074, + "kl_loss": 0.22916878759860992, + "loss_ib": 0.005026193801313639, + "step": 1887 + }, + { + "ce_ib": 6.5455098152160645, + "ce_orig": 1.1694104671478271, + "epoch": 0.542957797109785, + "kl_loss": 0.1957620233297348, + "loss_ib": 0.008503129705786705, + "step": 1888 + }, + { + "ce_ib": 3.9233615398406982, + "ce_orig": 0.5173555612564087, + "epoch": 0.542957797109785, + "kl_loss": 0.17764030396938324, + "loss_ib": 0.005699764471501112, + "step": 1888 + }, + { + "ce_ib": 8.286897659301758, + "ce_orig": 1.6964346170425415, + "epoch": 0.542957797109785, + "kl_loss": 0.21384048461914062, + "loss_ib": 0.010425303131341934, + "step": 1888 + }, + { + "ce_ib": 5.003795623779297, + "ce_orig": 0.7326104044914246, + "epoch": 0.542957797109785, + "kl_loss": 0.3118665814399719, + "loss_ib": 0.008122460916638374, + "step": 1888 + }, + { + "ce_ib": 3.2082910537719727, + "ce_orig": 0.6192259192466736, + "epoch": 0.5432453806887627, + "kl_loss": 0.17962077260017395, + "loss_ib": 0.005004498641937971, + "step": 1889 + }, + { + "ce_ib": 3.2921929359436035, + "ce_orig": 0.5335933566093445, + "epoch": 0.5432453806887627, + "kl_loss": 0.27632591128349304, + "loss_ib": 0.006055451929569244, + "step": 1889 + }, + { + "ce_ib": 6.6286234855651855, + "ce_orig": 1.1099865436553955, + "epoch": 0.5432453806887627, + "kl_loss": 0.1445891112089157, + "loss_ib": 0.00807451456785202, + "step": 1889 + }, + { + "ce_ib": 4.841403007507324, + "ce_orig": 0.49701279401779175, + "epoch": 0.5432453806887627, + "kl_loss": 0.32149016857147217, + "loss_ib": 0.008056304417550564, + "step": 1889 + }, + { + "epoch": 0.5435329642677403, + "grad_norm": 0.11242397874593735, + "learning_rate": 9.417810903699508e-06, + "loss": 0.8766, + "step": 1890 + }, + { + "ce_ib": 3.463954210281372, + "ce_orig": 0.6613526344299316, + "epoch": 0.5435329642677403, + "kl_loss": 0.16436061263084412, + "loss_ib": 0.005107560195028782, + "step": 1890 + }, + { + "ce_ib": 6.53847074508667, + "ce_orig": 1.2058744430541992, + "epoch": 0.5435329642677403, + "kl_loss": 0.2621225416660309, + "loss_ib": 0.009159696288406849, + "step": 1890 + }, + { + "ce_ib": 4.156022548675537, + "ce_orig": 0.8861443996429443, + "epoch": 0.5435329642677403, + "kl_loss": 0.17761921882629395, + "loss_ib": 0.005932214669883251, + "step": 1890 + }, + { + "ce_ib": 2.1399264335632324, + "ce_orig": 0.2459627389907837, + "epoch": 0.5435329642677403, + "kl_loss": 0.4725005626678467, + "loss_ib": 0.006864931900054216, + "step": 1890 + }, + { + "ce_ib": 5.926367282867432, + "ce_orig": 1.149590015411377, + "epoch": 0.543820547846718, + "kl_loss": 0.19166606664657593, + "loss_ib": 0.00784302782267332, + "step": 1891 + }, + { + "ce_ib": 5.3010969161987305, + "ce_orig": 0.6915357708930969, + "epoch": 0.543820547846718, + "kl_loss": 0.19798554480075836, + "loss_ib": 0.007280952297151089, + "step": 1891 + }, + { + "ce_ib": 4.656065940856934, + "ce_orig": 0.6376870274543762, + "epoch": 0.543820547846718, + "kl_loss": 0.25316092371940613, + "loss_ib": 0.007187675219029188, + "step": 1891 + }, + { + "ce_ib": 5.661493301391602, + "ce_orig": 0.9320741295814514, + "epoch": 0.543820547846718, + "kl_loss": 0.2215915024280548, + "loss_ib": 0.007877408526837826, + "step": 1891 + }, + { + "ce_ib": 5.105285167694092, + "ce_orig": 0.6744505763053894, + "epoch": 0.5441081314256956, + "kl_loss": 0.3318011164665222, + "loss_ib": 0.008423295803368092, + "step": 1892 + }, + { + "ce_ib": 3.8302907943725586, + "ce_orig": 0.528817355632782, + "epoch": 0.5441081314256956, + "kl_loss": 0.19222483038902283, + "loss_ib": 0.005752538796514273, + "step": 1892 + }, + { + "ce_ib": 4.628088474273682, + "ce_orig": 0.7512523531913757, + "epoch": 0.5441081314256956, + "kl_loss": 0.2041558027267456, + "loss_ib": 0.006669646129012108, + "step": 1892 + }, + { + "ce_ib": 5.9766621589660645, + "ce_orig": 0.9840378165245056, + "epoch": 0.5441081314256956, + "kl_loss": 0.271618515253067, + "loss_ib": 0.008692847564816475, + "step": 1892 + }, + { + "ce_ib": 6.145042896270752, + "ce_orig": 0.914753794670105, + "epoch": 0.5443957150046732, + "kl_loss": 0.26932966709136963, + "loss_ib": 0.00883833970874548, + "step": 1893 + }, + { + "ce_ib": 6.065830230712891, + "ce_orig": 1.0640356540679932, + "epoch": 0.5443957150046732, + "kl_loss": 0.36412590742111206, + "loss_ib": 0.009707089513540268, + "step": 1893 + }, + { + "ce_ib": 3.033369779586792, + "ce_orig": 0.43958115577697754, + "epoch": 0.5443957150046732, + "kl_loss": 0.17401011288166046, + "loss_ib": 0.00477347057312727, + "step": 1893 + }, + { + "ce_ib": 6.30896520614624, + "ce_orig": 1.375815510749817, + "epoch": 0.5443957150046732, + "kl_loss": 0.23319408297538757, + "loss_ib": 0.00864090584218502, + "step": 1893 + }, + { + "ce_ib": 3.9230875968933105, + "ce_orig": 0.6200323104858398, + "epoch": 0.5446832985836508, + "kl_loss": 0.22491362690925598, + "loss_ib": 0.006172223947942257, + "step": 1894 + }, + { + "ce_ib": 4.911445140838623, + "ce_orig": 0.5863372087478638, + "epoch": 0.5446832985836508, + "kl_loss": 0.28006529808044434, + "loss_ib": 0.007712098304182291, + "step": 1894 + }, + { + "ce_ib": 4.6514363288879395, + "ce_orig": 0.8628247976303101, + "epoch": 0.5446832985836508, + "kl_loss": 0.203122079372406, + "loss_ib": 0.006682656705379486, + "step": 1894 + }, + { + "ce_ib": 4.065433025360107, + "ce_orig": 0.6826019883155823, + "epoch": 0.5446832985836508, + "kl_loss": 0.26031285524368286, + "loss_ib": 0.006668561603873968, + "step": 1894 + }, + { + "epoch": 0.5449708821626286, + "grad_norm": 0.12011422216892242, + "learning_rate": 9.414171069098252e-06, + "loss": 0.8858, + "step": 1895 + }, + { + "ce_ib": 6.216679573059082, + "ce_orig": 0.9155467748641968, + "epoch": 0.5449708821626286, + "kl_loss": 0.2708143889904022, + "loss_ib": 0.008924824185669422, + "step": 1895 + }, + { + "ce_ib": 4.060703277587891, + "ce_orig": 0.5009088516235352, + "epoch": 0.5449708821626286, + "kl_loss": 0.16042546927928925, + "loss_ib": 0.00566495768725872, + "step": 1895 + }, + { + "ce_ib": 6.486944198608398, + "ce_orig": 0.8409891128540039, + "epoch": 0.5449708821626286, + "kl_loss": 0.15728840231895447, + "loss_ib": 0.008059828542172909, + "step": 1895 + }, + { + "ce_ib": 5.548868656158447, + "ce_orig": 0.6430012583732605, + "epoch": 0.5449708821626286, + "kl_loss": 0.2702459692955017, + "loss_ib": 0.008251328021287918, + "step": 1895 + }, + { + "ce_ib": 3.7308897972106934, + "ce_orig": 0.551981508731842, + "epoch": 0.5452584657416062, + "kl_loss": 0.1988811194896698, + "loss_ib": 0.005719700828194618, + "step": 1896 + }, + { + "ce_ib": 6.85680627822876, + "ce_orig": 0.6434974074363708, + "epoch": 0.5452584657416062, + "kl_loss": 0.2716730237007141, + "loss_ib": 0.009573535993695259, + "step": 1896 + }, + { + "ce_ib": 8.618288040161133, + "ce_orig": 1.4570660591125488, + "epoch": 0.5452584657416062, + "kl_loss": 0.22189399600028992, + "loss_ib": 0.010837228037416935, + "step": 1896 + }, + { + "ce_ib": 3.696220636367798, + "ce_orig": 0.7216467261314392, + "epoch": 0.5452584657416062, + "kl_loss": 0.20159435272216797, + "loss_ib": 0.00571216456592083, + "step": 1896 + }, + { + "ce_ib": 8.26495361328125, + "ce_orig": 1.3415573835372925, + "epoch": 0.5455460493205838, + "kl_loss": 0.24519026279449463, + "loss_ib": 0.010716855525970459, + "step": 1897 + }, + { + "ce_ib": 3.7366104125976562, + "ce_orig": 0.37444525957107544, + "epoch": 0.5455460493205838, + "kl_loss": 0.3108067214488983, + "loss_ib": 0.006844677962362766, + "step": 1897 + }, + { + "ce_ib": 5.597085475921631, + "ce_orig": 1.131085753440857, + "epoch": 0.5455460493205838, + "kl_loss": 0.17204684019088745, + "loss_ib": 0.007317553739994764, + "step": 1897 + }, + { + "ce_ib": 5.646449565887451, + "ce_orig": 1.0100291967391968, + "epoch": 0.5455460493205838, + "kl_loss": 0.19637063145637512, + "loss_ib": 0.007610156200826168, + "step": 1897 + }, + { + "ce_ib": 4.564586639404297, + "ce_orig": 0.4993542730808258, + "epoch": 0.5458336328995614, + "kl_loss": 0.2906390428543091, + "loss_ib": 0.007470977026969194, + "step": 1898 + }, + { + "ce_ib": 4.966852188110352, + "ce_orig": 0.8930625915527344, + "epoch": 0.5458336328995614, + "kl_loss": 0.18525347113609314, + "loss_ib": 0.006819386966526508, + "step": 1898 + }, + { + "ce_ib": 2.9131898880004883, + "ce_orig": 0.398380309343338, + "epoch": 0.5458336328995614, + "kl_loss": 0.1496700942516327, + "loss_ib": 0.0044098906219005585, + "step": 1898 + }, + { + "ce_ib": 6.019862651824951, + "ce_orig": 1.2035516500473022, + "epoch": 0.5458336328995614, + "kl_loss": 0.23380295932292938, + "loss_ib": 0.008357892744243145, + "step": 1898 + }, + { + "ce_ib": 6.085721492767334, + "ce_orig": 0.4494902789592743, + "epoch": 0.5461212164785391, + "kl_loss": 0.25177237391471863, + "loss_ib": 0.008603445254266262, + "step": 1899 + }, + { + "ce_ib": 3.5063061714172363, + "ce_orig": 0.6513923406600952, + "epoch": 0.5461212164785391, + "kl_loss": 0.19170190393924713, + "loss_ib": 0.00542332511395216, + "step": 1899 + }, + { + "ce_ib": 2.6341021060943604, + "ce_orig": 0.5152660012245178, + "epoch": 0.5461212164785391, + "kl_loss": 0.16661971807479858, + "loss_ib": 0.004300299100577831, + "step": 1899 + }, + { + "ce_ib": 5.236257553100586, + "ce_orig": 0.5088071823120117, + "epoch": 0.5461212164785391, + "kl_loss": 0.24358290433883667, + "loss_ib": 0.007672086823731661, + "step": 1899 + }, + { + "epoch": 0.5464088000575167, + "grad_norm": 0.11163178831338882, + "learning_rate": 9.410520599734338e-06, + "loss": 0.8181, + "step": 1900 + }, + { + "ce_ib": 6.69284200668335, + "ce_orig": 0.766628623008728, + "epoch": 0.5464088000575167, + "kl_loss": 0.14039671421051025, + "loss_ib": 0.00809680949896574, + "step": 1900 + }, + { + "ce_ib": 5.698209762573242, + "ce_orig": 0.45784991979599, + "epoch": 0.5464088000575167, + "kl_loss": 0.4267015755176544, + "loss_ib": 0.009965225122869015, + "step": 1900 + }, + { + "ce_ib": 3.716140031814575, + "ce_orig": 0.3612969219684601, + "epoch": 0.5464088000575167, + "kl_loss": 0.2603936493396759, + "loss_ib": 0.0063200765289366245, + "step": 1900 + }, + { + "ce_ib": 5.239020347595215, + "ce_orig": 1.0331742763519287, + "epoch": 0.5464088000575167, + "kl_loss": 0.24392680823802948, + "loss_ib": 0.007678288500756025, + "step": 1900 + }, + { + "ce_ib": 8.650659561157227, + "ce_orig": 1.662894368171692, + "epoch": 0.5466963836364943, + "kl_loss": 0.26084673404693604, + "loss_ib": 0.011259126476943493, + "step": 1901 + }, + { + "ce_ib": 7.4206438064575195, + "ce_orig": 1.3804047107696533, + "epoch": 0.5466963836364943, + "kl_loss": 0.20314303040504456, + "loss_ib": 0.009452074766159058, + "step": 1901 + }, + { + "ce_ib": 4.9321208000183105, + "ce_orig": 0.7552053332328796, + "epoch": 0.5466963836364943, + "kl_loss": 0.14079751074314117, + "loss_ib": 0.0063400957733392715, + "step": 1901 + }, + { + "ce_ib": 8.351343154907227, + "ce_orig": 1.5969254970550537, + "epoch": 0.5466963836364943, + "kl_loss": 0.17940068244934082, + "loss_ib": 0.01014534942805767, + "step": 1901 + }, + { + "ce_ib": 4.7066874504089355, + "ce_orig": 0.5475854873657227, + "epoch": 0.546983967215472, + "kl_loss": 0.192865252494812, + "loss_ib": 0.006635340396314859, + "step": 1902 + }, + { + "ce_ib": 2.6522388458251953, + "ce_orig": 0.45800575613975525, + "epoch": 0.546983967215472, + "kl_loss": 0.17094935476779938, + "loss_ib": 0.004361732397228479, + "step": 1902 + }, + { + "ce_ib": 3.336050271987915, + "ce_orig": 0.6363884806632996, + "epoch": 0.546983967215472, + "kl_loss": 0.21376313269138336, + "loss_ib": 0.005473681725561619, + "step": 1902 + }, + { + "ce_ib": 5.004870891571045, + "ce_orig": 0.8490200042724609, + "epoch": 0.546983967215472, + "kl_loss": 0.20807631313800812, + "loss_ib": 0.007085633929818869, + "step": 1902 + }, + { + "ce_ib": 3.4831836223602295, + "ce_orig": 0.37373557686805725, + "epoch": 0.5472715507944497, + "kl_loss": 0.1494193971157074, + "loss_ib": 0.004977377597242594, + "step": 1903 + }, + { + "ce_ib": 5.920387268066406, + "ce_orig": 1.1255521774291992, + "epoch": 0.5472715507944497, + "kl_loss": 0.3962468206882477, + "loss_ib": 0.009882855229079723, + "step": 1903 + }, + { + "ce_ib": 4.647152423858643, + "ce_orig": 0.8957937359809875, + "epoch": 0.5472715507944497, + "kl_loss": 0.1819320023059845, + "loss_ib": 0.006466472055763006, + "step": 1903 + }, + { + "ce_ib": 5.453622817993164, + "ce_orig": 0.9864134788513184, + "epoch": 0.5472715507944497, + "kl_loss": 0.18783822655677795, + "loss_ib": 0.00733200553804636, + "step": 1903 + }, + { + "ce_ib": 6.079820156097412, + "ce_orig": 0.8921633362770081, + "epoch": 0.5475591343734273, + "kl_loss": 0.40260013937950134, + "loss_ib": 0.01010582223534584, + "step": 1904 + }, + { + "ce_ib": 7.706158638000488, + "ce_orig": 1.3141320943832397, + "epoch": 0.5475591343734273, + "kl_loss": 0.2728313207626343, + "loss_ib": 0.010434472002089024, + "step": 1904 + }, + { + "ce_ib": 5.507092475891113, + "ce_orig": 0.9962813854217529, + "epoch": 0.5475591343734273, + "kl_loss": 0.20328868925571442, + "loss_ib": 0.0075399791821837425, + "step": 1904 + }, + { + "ce_ib": 8.686485290527344, + "ce_orig": 1.5090293884277344, + "epoch": 0.5475591343734273, + "kl_loss": 0.2840725779533386, + "loss_ib": 0.011527211405336857, + "step": 1904 + }, + { + "epoch": 0.5478467179524049, + "grad_norm": 0.11758630722761154, + "learning_rate": 9.406859504402597e-06, + "loss": 0.8631, + "step": 1905 + }, + { + "ce_ib": 5.677255153656006, + "ce_orig": 1.0834262371063232, + "epoch": 0.5478467179524049, + "kl_loss": 0.17184485495090485, + "loss_ib": 0.007395703811198473, + "step": 1905 + }, + { + "ce_ib": 4.020125389099121, + "ce_orig": 0.678731381893158, + "epoch": 0.5478467179524049, + "kl_loss": 0.1683593988418579, + "loss_ib": 0.005703719798475504, + "step": 1905 + }, + { + "ce_ib": 3.427790403366089, + "ce_orig": 0.8733957409858704, + "epoch": 0.5478467179524049, + "kl_loss": 0.2050205022096634, + "loss_ib": 0.00547799514606595, + "step": 1905 + }, + { + "ce_ib": 7.873215198516846, + "ce_orig": 1.0264562368392944, + "epoch": 0.5478467179524049, + "kl_loss": 0.2520449459552765, + "loss_ib": 0.010393664240837097, + "step": 1905 + }, + { + "ce_ib": 6.0730204582214355, + "ce_orig": 1.1063258647918701, + "epoch": 0.5481343015313825, + "kl_loss": 0.14009909331798553, + "loss_ib": 0.007474011741578579, + "step": 1906 + }, + { + "ce_ib": 4.413961410522461, + "ce_orig": 0.6400641798973083, + "epoch": 0.5481343015313825, + "kl_loss": 0.23015549778938293, + "loss_ib": 0.006715516094118357, + "step": 1906 + }, + { + "ce_ib": 6.499683380126953, + "ce_orig": 0.9660086035728455, + "epoch": 0.5481343015313825, + "kl_loss": 0.13740497827529907, + "loss_ib": 0.007873732596635818, + "step": 1906 + }, + { + "ce_ib": 4.543107032775879, + "ce_orig": 0.775365948677063, + "epoch": 0.5481343015313825, + "kl_loss": 0.24304203689098358, + "loss_ib": 0.006973527371883392, + "step": 1906 + }, + { + "ce_ib": 3.398146867752075, + "ce_orig": 0.40355029702186584, + "epoch": 0.5484218851103602, + "kl_loss": 0.10796613246202469, + "loss_ib": 0.004477808251976967, + "step": 1907 + }, + { + "ce_ib": 4.06525182723999, + "ce_orig": 0.8296524286270142, + "epoch": 0.5484218851103602, + "kl_loss": 0.19505798816680908, + "loss_ib": 0.006015831604599953, + "step": 1907 + }, + { + "ce_ib": 3.425095319747925, + "ce_orig": 0.7265019416809082, + "epoch": 0.5484218851103602, + "kl_loss": 0.2750162184238434, + "loss_ib": 0.0061752572655677795, + "step": 1907 + }, + { + "ce_ib": 6.132481098175049, + "ce_orig": 1.0314496755599976, + "epoch": 0.5484218851103602, + "kl_loss": 0.21296876668930054, + "loss_ib": 0.008262168616056442, + "step": 1907 + }, + { + "ce_ib": 6.469040870666504, + "ce_orig": 0.8110198974609375, + "epoch": 0.5487094686893378, + "kl_loss": 0.28225424885749817, + "loss_ib": 0.009291582740843296, + "step": 1908 + }, + { + "ce_ib": 6.6580328941345215, + "ce_orig": 0.9502229690551758, + "epoch": 0.5487094686893378, + "kl_loss": 0.27972739934921265, + "loss_ib": 0.009455306455492973, + "step": 1908 + }, + { + "ce_ib": 3.2922401428222656, + "ce_orig": 0.764376163482666, + "epoch": 0.5487094686893378, + "kl_loss": 0.24232013523578644, + "loss_ib": 0.005715441424399614, + "step": 1908 + }, + { + "ce_ib": 9.52113151550293, + "ce_orig": 0.905972957611084, + "epoch": 0.5487094686893378, + "kl_loss": 0.17079463601112366, + "loss_ib": 0.011229077354073524, + "step": 1908 + }, + { + "ce_ib": 6.658320426940918, + "ce_orig": 0.7265307903289795, + "epoch": 0.5489970522683155, + "kl_loss": 0.17263171076774597, + "loss_ib": 0.008384637534618378, + "step": 1909 + }, + { + "ce_ib": 5.1962175369262695, + "ce_orig": 1.1013990640640259, + "epoch": 0.5489970522683155, + "kl_loss": 0.20727990567684174, + "loss_ib": 0.007269016932696104, + "step": 1909 + }, + { + "ce_ib": 5.728460311889648, + "ce_orig": 1.0274955034255981, + "epoch": 0.5489970522683155, + "kl_loss": 0.24510778486728668, + "loss_ib": 0.008179537951946259, + "step": 1909 + }, + { + "ce_ib": 2.743274450302124, + "ce_orig": 0.40781253576278687, + "epoch": 0.5489970522683155, + "kl_loss": 0.1681884527206421, + "loss_ib": 0.004425159189850092, + "step": 1909 + }, + { + "epoch": 0.5492846358472931, + "grad_norm": 0.12240489572286606, + "learning_rate": 9.403187791923455e-06, + "loss": 0.9302, + "step": 1910 + }, + { + "ce_ib": 2.886667490005493, + "ce_orig": 0.5741050243377686, + "epoch": 0.5492846358472931, + "kl_loss": 0.16820037364959717, + "loss_ib": 0.004568671341985464, + "step": 1910 + }, + { + "ce_ib": 4.348527908325195, + "ce_orig": 0.3352336585521698, + "epoch": 0.5492846358472931, + "kl_loss": 0.29646146297454834, + "loss_ib": 0.007313142996281385, + "step": 1910 + }, + { + "ce_ib": 3.690995216369629, + "ce_orig": 0.8190628886222839, + "epoch": 0.5492846358472931, + "kl_loss": 0.17163097858428955, + "loss_ib": 0.005407304503023624, + "step": 1910 + }, + { + "ce_ib": 5.406653881072998, + "ce_orig": 0.8319385647773743, + "epoch": 0.5492846358472931, + "kl_loss": 0.2621734142303467, + "loss_ib": 0.008028388023376465, + "step": 1910 + }, + { + "ce_ib": 4.8341193199157715, + "ce_orig": 0.2599828541278839, + "epoch": 0.5495722194262708, + "kl_loss": 0.232896089553833, + "loss_ib": 0.007163079921156168, + "step": 1911 + }, + { + "ce_ib": 5.156683921813965, + "ce_orig": 0.5585552453994751, + "epoch": 0.5495722194262708, + "kl_loss": 0.28248530626296997, + "loss_ib": 0.007981536909937859, + "step": 1911 + }, + { + "ce_ib": 4.102294445037842, + "ce_orig": 0.9090990424156189, + "epoch": 0.5495722194262708, + "kl_loss": 0.12182167172431946, + "loss_ib": 0.0053205108270049095, + "step": 1911 + }, + { + "ce_ib": 3.1007497310638428, + "ce_orig": 0.3543629050254822, + "epoch": 0.5495722194262708, + "kl_loss": 0.23339495062828064, + "loss_ib": 0.005434698890894651, + "step": 1911 + }, + { + "ce_ib": 3.793285369873047, + "ce_orig": 0.6255490183830261, + "epoch": 0.5498598030052484, + "kl_loss": 0.19438017904758453, + "loss_ib": 0.005737087223678827, + "step": 1912 + }, + { + "ce_ib": 3.512158155441284, + "ce_orig": 0.5986344814300537, + "epoch": 0.5498598030052484, + "kl_loss": 0.1535426378250122, + "loss_ib": 0.005047584883868694, + "step": 1912 + }, + { + "ce_ib": 7.09074592590332, + "ce_orig": 1.1093448400497437, + "epoch": 0.5498598030052484, + "kl_loss": 0.23947542905807495, + "loss_ib": 0.009485499933362007, + "step": 1912 + }, + { + "ce_ib": 4.961276054382324, + "ce_orig": 0.995897650718689, + "epoch": 0.5498598030052484, + "kl_loss": 0.15836216509342194, + "loss_ib": 0.006544897332787514, + "step": 1912 + }, + { + "ce_ib": 5.7570037841796875, + "ce_orig": 0.8582257628440857, + "epoch": 0.550147386584226, + "kl_loss": 0.20467862486839294, + "loss_ib": 0.007803790271282196, + "step": 1913 + }, + { + "ce_ib": 6.998703479766846, + "ce_orig": 1.3555225133895874, + "epoch": 0.550147386584226, + "kl_loss": 0.24651572108268738, + "loss_ib": 0.009463860653340816, + "step": 1913 + }, + { + "ce_ib": 5.055090427398682, + "ce_orig": 0.5152579545974731, + "epoch": 0.550147386584226, + "kl_loss": 0.21888798475265503, + "loss_ib": 0.007243970409035683, + "step": 1913 + }, + { + "ce_ib": 5.7093729972839355, + "ce_orig": 1.3219268321990967, + "epoch": 0.550147386584226, + "kl_loss": 0.22586293518543243, + "loss_ib": 0.007968001998960972, + "step": 1913 + }, + { + "ce_ib": 6.738046646118164, + "ce_orig": 0.8517309427261353, + "epoch": 0.5504349701632036, + "kl_loss": 0.17055313289165497, + "loss_ib": 0.008443578146398067, + "step": 1914 + }, + { + "ce_ib": 4.148743629455566, + "ce_orig": 0.5026411414146423, + "epoch": 0.5504349701632036, + "kl_loss": 0.23652209341526031, + "loss_ib": 0.006513964384794235, + "step": 1914 + }, + { + "ce_ib": 3.847912549972534, + "ce_orig": 0.6879178881645203, + "epoch": 0.5504349701632036, + "kl_loss": 0.2362547069787979, + "loss_ib": 0.006210459396243095, + "step": 1914 + }, + { + "ce_ib": 4.1432671546936035, + "ce_orig": 0.6263501644134521, + "epoch": 0.5504349701632036, + "kl_loss": 0.23196320235729218, + "loss_ib": 0.006462899502366781, + "step": 1914 + }, + { + "epoch": 0.5507225537421814, + "grad_norm": 0.1128683015704155, + "learning_rate": 9.39950547114292e-06, + "loss": 0.9156, + "step": 1915 + }, + { + "ce_ib": 2.125145196914673, + "ce_orig": 0.31030476093292236, + "epoch": 0.5507225537421814, + "kl_loss": 0.2390914410352707, + "loss_ib": 0.004516059532761574, + "step": 1915 + }, + { + "ce_ib": 3.485665798187256, + "ce_orig": 0.8444491624832153, + "epoch": 0.5507225537421814, + "kl_loss": 0.2743951082229614, + "loss_ib": 0.006229616701602936, + "step": 1915 + }, + { + "ce_ib": 5.361143112182617, + "ce_orig": 0.9358366131782532, + "epoch": 0.5507225537421814, + "kl_loss": 0.2102963626384735, + "loss_ib": 0.007464107125997543, + "step": 1915 + }, + { + "ce_ib": 2.9449386596679688, + "ce_orig": 0.622304379940033, + "epoch": 0.5507225537421814, + "kl_loss": 0.2748371958732605, + "loss_ib": 0.005693310406059027, + "step": 1915 + }, + { + "ce_ib": 3.3908071517944336, + "ce_orig": 0.7540403604507446, + "epoch": 0.551010137321159, + "kl_loss": 0.19135555624961853, + "loss_ib": 0.005304362624883652, + "step": 1916 + }, + { + "ce_ib": 4.298043727874756, + "ce_orig": 0.5426058769226074, + "epoch": 0.551010137321159, + "kl_loss": 0.21068069338798523, + "loss_ib": 0.0064048501662909985, + "step": 1916 + }, + { + "ce_ib": 6.896023273468018, + "ce_orig": 1.1848102807998657, + "epoch": 0.551010137321159, + "kl_loss": 0.17196182906627655, + "loss_ib": 0.008615641854703426, + "step": 1916 + }, + { + "ce_ib": 6.652091026306152, + "ce_orig": 1.0947281122207642, + "epoch": 0.551010137321159, + "kl_loss": 0.3030722737312317, + "loss_ib": 0.009682813659310341, + "step": 1916 + }, + { + "ce_ib": 5.312432289123535, + "ce_orig": 1.1666892766952515, + "epoch": 0.5512977209001366, + "kl_loss": 0.2234857827425003, + "loss_ib": 0.007547290064394474, + "step": 1917 + }, + { + "ce_ib": 7.760123252868652, + "ce_orig": 1.0860095024108887, + "epoch": 0.5512977209001366, + "kl_loss": 0.2641758322715759, + "loss_ib": 0.01040188129991293, + "step": 1917 + }, + { + "ce_ib": 5.78464412689209, + "ce_orig": 0.7784232497215271, + "epoch": 0.5512977209001366, + "kl_loss": 0.20703254640102386, + "loss_ib": 0.007854970172047615, + "step": 1917 + }, + { + "ce_ib": 6.338881492614746, + "ce_orig": 0.6332058906555176, + "epoch": 0.5512977209001366, + "kl_loss": 0.22034861147403717, + "loss_ib": 0.00854236725717783, + "step": 1917 + }, + { + "ce_ib": 2.3926210403442383, + "ce_orig": 0.4297766089439392, + "epoch": 0.5515853044791142, + "kl_loss": 0.18296483159065247, + "loss_ib": 0.004222269169986248, + "step": 1918 + }, + { + "ce_ib": 4.578824043273926, + "ce_orig": 0.38781973719596863, + "epoch": 0.5515853044791142, + "kl_loss": 0.32944250106811523, + "loss_ib": 0.007873249240219593, + "step": 1918 + }, + { + "ce_ib": 4.018671989440918, + "ce_orig": 0.6380063891410828, + "epoch": 0.5515853044791142, + "kl_loss": 0.24909710884094238, + "loss_ib": 0.006509643048048019, + "step": 1918 + }, + { + "ce_ib": 5.295958042144775, + "ce_orig": 0.8462589383125305, + "epoch": 0.5515853044791142, + "kl_loss": 0.20792736113071442, + "loss_ib": 0.007375231012701988, + "step": 1918 + }, + { + "ce_ib": 8.229303359985352, + "ce_orig": 1.2693380117416382, + "epoch": 0.5518728880580919, + "kl_loss": 0.19707272946834564, + "loss_ib": 0.01020003017038107, + "step": 1919 + }, + { + "ce_ib": 3.729757785797119, + "ce_orig": 0.7573217153549194, + "epoch": 0.5518728880580919, + "kl_loss": 0.20582321286201477, + "loss_ib": 0.005787990055978298, + "step": 1919 + }, + { + "ce_ib": 5.051339626312256, + "ce_orig": 0.7594917416572571, + "epoch": 0.5518728880580919, + "kl_loss": 0.21279123425483704, + "loss_ib": 0.0071792518720030785, + "step": 1919 + }, + { + "ce_ib": 5.25255823135376, + "ce_orig": 0.8441687822341919, + "epoch": 0.5518728880580919, + "kl_loss": 0.16548152267932892, + "loss_ib": 0.006907373666763306, + "step": 1919 + }, + { + "epoch": 0.5521604716370695, + "grad_norm": 0.13038235902786255, + "learning_rate": 9.395812550932559e-06, + "loss": 0.8628, + "step": 1920 + }, + { + "ce_ib": 5.125471115112305, + "ce_orig": 0.9467810392379761, + "epoch": 0.5521604716370695, + "kl_loss": 0.2430807650089264, + "loss_ib": 0.007556278724223375, + "step": 1920 + }, + { + "ce_ib": 4.794182300567627, + "ce_orig": 0.6340484023094177, + "epoch": 0.5521604716370695, + "kl_loss": 0.29665517807006836, + "loss_ib": 0.007760734297335148, + "step": 1920 + }, + { + "ce_ib": 6.495025157928467, + "ce_orig": 0.8584146499633789, + "epoch": 0.5521604716370695, + "kl_loss": 0.2170822024345398, + "loss_ib": 0.008665846660733223, + "step": 1920 + }, + { + "ce_ib": 3.3026716709136963, + "ce_orig": 0.6432685256004333, + "epoch": 0.5521604716370695, + "kl_loss": 0.15812014043331146, + "loss_ib": 0.0048838728107512, + "step": 1920 + }, + { + "ce_ib": 4.022793292999268, + "ce_orig": 0.48814812302589417, + "epoch": 0.5524480552160471, + "kl_loss": 0.2611497640609741, + "loss_ib": 0.006634291261434555, + "step": 1921 + }, + { + "ce_ib": 6.338824272155762, + "ce_orig": 1.1433099508285522, + "epoch": 0.5524480552160471, + "kl_loss": 0.2686011791229248, + "loss_ib": 0.009024836122989655, + "step": 1921 + }, + { + "ce_ib": 6.309455394744873, + "ce_orig": 1.0639891624450684, + "epoch": 0.5524480552160471, + "kl_loss": 0.28311389684677124, + "loss_ib": 0.009140594862401485, + "step": 1921 + }, + { + "ce_ib": 7.51746940612793, + "ce_orig": 1.0372909307479858, + "epoch": 0.5524480552160471, + "kl_loss": 0.2799808979034424, + "loss_ib": 0.01031727809458971, + "step": 1921 + }, + { + "ce_ib": 4.688455104827881, + "ce_orig": 0.7796941995620728, + "epoch": 0.5527356387950249, + "kl_loss": 0.25242507457733154, + "loss_ib": 0.007212705444544554, + "step": 1922 + }, + { + "ce_ib": 8.777731895446777, + "ce_orig": 1.802945613861084, + "epoch": 0.5527356387950249, + "kl_loss": 0.33165305852890015, + "loss_ib": 0.01209426298737526, + "step": 1922 + }, + { + "ce_ib": 3.469494581222534, + "ce_orig": 0.6810784339904785, + "epoch": 0.5527356387950249, + "kl_loss": 0.17616687715053558, + "loss_ib": 0.005231163464486599, + "step": 1922 + }, + { + "ce_ib": 4.473598003387451, + "ce_orig": 0.5759130716323853, + "epoch": 0.5527356387950249, + "kl_loss": 0.22008073329925537, + "loss_ib": 0.006674405187368393, + "step": 1922 + }, + { + "ce_ib": 4.555544376373291, + "ce_orig": 0.7167826890945435, + "epoch": 0.5530232223740025, + "kl_loss": 0.168533056974411, + "loss_ib": 0.006240874994546175, + "step": 1923 + }, + { + "ce_ib": 8.480325698852539, + "ce_orig": 1.7931766510009766, + "epoch": 0.5530232223740025, + "kl_loss": 0.3537088632583618, + "loss_ib": 0.012017413973808289, + "step": 1923 + }, + { + "ce_ib": 4.297208309173584, + "ce_orig": 0.6875762939453125, + "epoch": 0.5530232223740025, + "kl_loss": 0.15500590205192566, + "loss_ib": 0.0058472673408687115, + "step": 1923 + }, + { + "ce_ib": 4.13336706161499, + "ce_orig": 0.49390938878059387, + "epoch": 0.5530232223740025, + "kl_loss": 0.2773393392562866, + "loss_ib": 0.006906760856509209, + "step": 1923 + }, + { + "ce_ib": 5.192725658416748, + "ce_orig": 1.1892716884613037, + "epoch": 0.5533108059529801, + "kl_loss": 0.14960896968841553, + "loss_ib": 0.006688815075904131, + "step": 1924 + }, + { + "ce_ib": 1.0564361810684204, + "ce_orig": 0.140930637717247, + "epoch": 0.5533108059529801, + "kl_loss": 0.48497554659843445, + "loss_ib": 0.005906191188842058, + "step": 1924 + }, + { + "ce_ib": 4.778229713439941, + "ce_orig": 0.6768468618392944, + "epoch": 0.5533108059529801, + "kl_loss": 0.18857140839099884, + "loss_ib": 0.006663944106549025, + "step": 1924 + }, + { + "ce_ib": 3.5872573852539062, + "ce_orig": 0.7152208089828491, + "epoch": 0.5533108059529801, + "kl_loss": 0.1990417242050171, + "loss_ib": 0.005577675066888332, + "step": 1924 + }, + { + "epoch": 0.5535983895319577, + "grad_norm": 0.1305120289325714, + "learning_rate": 9.392109040189473e-06, + "loss": 0.8503, + "step": 1925 + }, + { + "ce_ib": 5.968764781951904, + "ce_orig": 0.8049296736717224, + "epoch": 0.5535983895319577, + "kl_loss": 0.24111254513263702, + "loss_ib": 0.008379890583455563, + "step": 1925 + }, + { + "ce_ib": 3.8194169998168945, + "ce_orig": 0.7100443243980408, + "epoch": 0.5535983895319577, + "kl_loss": 0.3223645091056824, + "loss_ib": 0.0070430622436106205, + "step": 1925 + }, + { + "ce_ib": 8.28250503540039, + "ce_orig": 1.206803560256958, + "epoch": 0.5535983895319577, + "kl_loss": 0.201694518327713, + "loss_ib": 0.010299449786543846, + "step": 1925 + }, + { + "ce_ib": 7.383141994476318, + "ce_orig": 1.472749948501587, + "epoch": 0.5535983895319577, + "kl_loss": 0.15550222992897034, + "loss_ib": 0.008938164450228214, + "step": 1925 + }, + { + "ce_ib": 2.9840638637542725, + "ce_orig": 0.4754523038864136, + "epoch": 0.5538859731109353, + "kl_loss": 0.3020710349082947, + "loss_ib": 0.00600477447733283, + "step": 1926 + }, + { + "ce_ib": 2.064957618713379, + "ce_orig": 0.5488508939743042, + "epoch": 0.5538859731109353, + "kl_loss": 0.13696321845054626, + "loss_ib": 0.003434589831158519, + "step": 1926 + }, + { + "ce_ib": 3.859769582748413, + "ce_orig": 0.6679697632789612, + "epoch": 0.5538859731109353, + "kl_loss": 0.23655346035957336, + "loss_ib": 0.006225304678082466, + "step": 1926 + }, + { + "ce_ib": 3.0847089290618896, + "ce_orig": 0.5940335988998413, + "epoch": 0.5538859731109353, + "kl_loss": 0.2199656069278717, + "loss_ib": 0.005284365266561508, + "step": 1926 + }, + { + "ce_ib": 4.7833333015441895, + "ce_orig": 0.699425220489502, + "epoch": 0.554173556689913, + "kl_loss": 0.19268561899662018, + "loss_ib": 0.0067101893946528435, + "step": 1927 + }, + { + "ce_ib": 3.15501070022583, + "ce_orig": 0.6162528395652771, + "epoch": 0.554173556689913, + "kl_loss": 0.1841224730014801, + "loss_ib": 0.004996235482394695, + "step": 1927 + }, + { + "ce_ib": 6.578360557556152, + "ce_orig": 0.9799951910972595, + "epoch": 0.554173556689913, + "kl_loss": 0.22263585031032562, + "loss_ib": 0.00880471896380186, + "step": 1927 + }, + { + "ce_ib": 7.8032402992248535, + "ce_orig": 1.353818416595459, + "epoch": 0.554173556689913, + "kl_loss": 0.30164778232574463, + "loss_ib": 0.01081971824169159, + "step": 1927 + }, + { + "ce_ib": 4.972599983215332, + "ce_orig": 0.7287405729293823, + "epoch": 0.5544611402688906, + "kl_loss": 0.19396856427192688, + "loss_ib": 0.006912285462021828, + "step": 1928 + }, + { + "ce_ib": 3.7816243171691895, + "ce_orig": 0.6331422328948975, + "epoch": 0.5544611402688906, + "kl_loss": 0.18299607932567596, + "loss_ib": 0.0056115854531526566, + "step": 1928 + }, + { + "ce_ib": 5.973362922668457, + "ce_orig": 1.0235064029693604, + "epoch": 0.5544611402688906, + "kl_loss": 0.2769926190376282, + "loss_ib": 0.008743288926780224, + "step": 1928 + }, + { + "ce_ib": 5.59320068359375, + "ce_orig": 0.9232028722763062, + "epoch": 0.5544611402688906, + "kl_loss": 0.22228997945785522, + "loss_ib": 0.007816100493073463, + "step": 1928 + }, + { + "ce_ib": 3.503995656967163, + "ce_orig": 0.5884864330291748, + "epoch": 0.5547487238478683, + "kl_loss": 0.14196009933948517, + "loss_ib": 0.004923596978187561, + "step": 1929 + }, + { + "ce_ib": 6.933825492858887, + "ce_orig": 1.301206111907959, + "epoch": 0.5547487238478683, + "kl_loss": 0.20545580983161926, + "loss_ib": 0.008988384157419205, + "step": 1929 + }, + { + "ce_ib": 5.770648956298828, + "ce_orig": 1.1211457252502441, + "epoch": 0.5547487238478683, + "kl_loss": 0.1420101821422577, + "loss_ib": 0.0071907504461705685, + "step": 1929 + }, + { + "ce_ib": 3.230302095413208, + "ce_orig": 0.5871448516845703, + "epoch": 0.5547487238478683, + "kl_loss": 0.19306373596191406, + "loss_ib": 0.005160939414054155, + "step": 1929 + }, + { + "epoch": 0.555036307426846, + "grad_norm": 0.11835107952356339, + "learning_rate": 9.388394947836278e-06, + "loss": 0.8813, + "step": 1930 + }, + { + "ce_ib": 6.595455169677734, + "ce_orig": 0.8396819829940796, + "epoch": 0.555036307426846, + "kl_loss": 0.2513989210128784, + "loss_ib": 0.009109443984925747, + "step": 1930 + }, + { + "ce_ib": 5.202250003814697, + "ce_orig": 1.0775376558303833, + "epoch": 0.555036307426846, + "kl_loss": 0.2137918770313263, + "loss_ib": 0.007340168580412865, + "step": 1930 + }, + { + "ce_ib": 3.5433919429779053, + "ce_orig": 0.5223658084869385, + "epoch": 0.555036307426846, + "kl_loss": 0.16330486536026, + "loss_ib": 0.005176440346986055, + "step": 1930 + }, + { + "ce_ib": 4.392524719238281, + "ce_orig": 0.8352428674697876, + "epoch": 0.555036307426846, + "kl_loss": 0.20079953968524933, + "loss_ib": 0.006400519981980324, + "step": 1930 + }, + { + "ce_ib": 3.605915069580078, + "ce_orig": 0.6275536417961121, + "epoch": 0.5553238910058236, + "kl_loss": 0.2333388775587082, + "loss_ib": 0.005939303431659937, + "step": 1931 + }, + { + "ce_ib": 3.149280548095703, + "ce_orig": 0.732917070388794, + "epoch": 0.5553238910058236, + "kl_loss": 0.14264488220214844, + "loss_ib": 0.0045757293701171875, + "step": 1931 + }, + { + "ce_ib": 6.063021183013916, + "ce_orig": 0.9229342937469482, + "epoch": 0.5553238910058236, + "kl_loss": 0.214775949716568, + "loss_ib": 0.00821078009903431, + "step": 1931 + }, + { + "ce_ib": 5.187615871429443, + "ce_orig": 0.6924121975898743, + "epoch": 0.5553238910058236, + "kl_loss": 0.2174490988254547, + "loss_ib": 0.007362106814980507, + "step": 1931 + }, + { + "ce_ib": 6.522134304046631, + "ce_orig": 1.2137961387634277, + "epoch": 0.5556114745848012, + "kl_loss": 0.18655726313591003, + "loss_ib": 0.00838770717382431, + "step": 1932 + }, + { + "ce_ib": 5.60007381439209, + "ce_orig": 0.766647219657898, + "epoch": 0.5556114745848012, + "kl_loss": 0.32396399974823, + "loss_ib": 0.008839713409543037, + "step": 1932 + }, + { + "ce_ib": 9.259220123291016, + "ce_orig": 1.8698841333389282, + "epoch": 0.5556114745848012, + "kl_loss": 0.2572416365146637, + "loss_ib": 0.011831636540591717, + "step": 1932 + }, + { + "ce_ib": 7.186155796051025, + "ce_orig": 1.1270413398742676, + "epoch": 0.5556114745848012, + "kl_loss": 0.2171599268913269, + "loss_ib": 0.009357755072414875, + "step": 1932 + }, + { + "ce_ib": 5.8336663246154785, + "ce_orig": 0.8805820941925049, + "epoch": 0.5558990581637788, + "kl_loss": 0.20731845498085022, + "loss_ib": 0.007906850427389145, + "step": 1933 + }, + { + "ce_ib": 2.1970837116241455, + "ce_orig": 0.4671885669231415, + "epoch": 0.5558990581637788, + "kl_loss": 0.16664046049118042, + "loss_ib": 0.0038634883239865303, + "step": 1933 + }, + { + "ce_ib": 6.38844108581543, + "ce_orig": 0.7484267950057983, + "epoch": 0.5558990581637788, + "kl_loss": 0.25336700677871704, + "loss_ib": 0.008922111243009567, + "step": 1933 + }, + { + "ce_ib": 5.786157131195068, + "ce_orig": 0.8145442008972168, + "epoch": 0.5558990581637788, + "kl_loss": 0.19787608087062836, + "loss_ib": 0.007764918264001608, + "step": 1933 + }, + { + "ce_ib": 6.9282121658325195, + "ce_orig": 1.1202467679977417, + "epoch": 0.5561866417427564, + "kl_loss": 0.2380782961845398, + "loss_ib": 0.009308994747698307, + "step": 1934 + }, + { + "ce_ib": 4.730992317199707, + "ce_orig": 0.7270490527153015, + "epoch": 0.5561866417427564, + "kl_loss": 0.27505892515182495, + "loss_ib": 0.0074815815314650536, + "step": 1934 + }, + { + "ce_ib": 5.864253520965576, + "ce_orig": 0.8641997575759888, + "epoch": 0.5561866417427564, + "kl_loss": 0.2009393870830536, + "loss_ib": 0.007873646914958954, + "step": 1934 + }, + { + "ce_ib": 7.711478233337402, + "ce_orig": 1.4662551879882812, + "epoch": 0.5561866417427564, + "kl_loss": 0.20308509469032288, + "loss_ib": 0.009742328897118568, + "step": 1934 + }, + { + "epoch": 0.5564742253217342, + "grad_norm": 0.12336688488721848, + "learning_rate": 9.384670282821087e-06, + "loss": 0.8883, + "step": 1935 + }, + { + "ce_ib": 3.3277626037597656, + "ce_orig": 0.5428022742271423, + "epoch": 0.5564742253217342, + "kl_loss": 0.23022133111953735, + "loss_ib": 0.005629975814372301, + "step": 1935 + }, + { + "ce_ib": 4.103045463562012, + "ce_orig": 0.3328402042388916, + "epoch": 0.5564742253217342, + "kl_loss": 0.26368433237075806, + "loss_ib": 0.006739888805896044, + "step": 1935 + }, + { + "ce_ib": 3.9666333198547363, + "ce_orig": 0.656386137008667, + "epoch": 0.5564742253217342, + "kl_loss": 0.17204327881336212, + "loss_ib": 0.005687066353857517, + "step": 1935 + }, + { + "ce_ib": 2.325824022293091, + "ce_orig": 0.326698899269104, + "epoch": 0.5564742253217342, + "kl_loss": 0.48896095156669617, + "loss_ib": 0.00721543375402689, + "step": 1935 + }, + { + "ce_ib": 4.6291728019714355, + "ce_orig": 0.6698210835456848, + "epoch": 0.5567618089007118, + "kl_loss": 0.21553833782672882, + "loss_ib": 0.006784556433558464, + "step": 1936 + }, + { + "ce_ib": 7.387948989868164, + "ce_orig": 1.3421707153320312, + "epoch": 0.5567618089007118, + "kl_loss": 0.24260932207107544, + "loss_ib": 0.00981404259800911, + "step": 1936 + }, + { + "ce_ib": 5.946327209472656, + "ce_orig": 1.0489966869354248, + "epoch": 0.5567618089007118, + "kl_loss": 0.19853127002716064, + "loss_ib": 0.00793164037168026, + "step": 1936 + }, + { + "ce_ib": 3.620438575744629, + "ce_orig": 0.5803955793380737, + "epoch": 0.5567618089007118, + "kl_loss": 0.18083828687667847, + "loss_ib": 0.005428821314126253, + "step": 1936 + }, + { + "ce_ib": 4.323624134063721, + "ce_orig": 1.1346577405929565, + "epoch": 0.5570493924796894, + "kl_loss": 0.2001379430294037, + "loss_ib": 0.00632500322535634, + "step": 1937 + }, + { + "ce_ib": 3.8898186683654785, + "ce_orig": 0.6070666909217834, + "epoch": 0.5570493924796894, + "kl_loss": 0.2289465367794037, + "loss_ib": 0.0061792838387191296, + "step": 1937 + }, + { + "ce_ib": 4.37337064743042, + "ce_orig": 0.7158910036087036, + "epoch": 0.5570493924796894, + "kl_loss": 0.17210406064987183, + "loss_ib": 0.006094411481171846, + "step": 1937 + }, + { + "ce_ib": 3.612762451171875, + "ce_orig": 0.7651700973510742, + "epoch": 0.5570493924796894, + "kl_loss": 0.35289445519447327, + "loss_ib": 0.007141706999391317, + "step": 1937 + }, + { + "ce_ib": 3.410884380340576, + "ce_orig": 0.8918249607086182, + "epoch": 0.557336976058667, + "kl_loss": 0.13667425513267517, + "loss_ib": 0.004777627065777779, + "step": 1938 + }, + { + "ce_ib": 7.995250701904297, + "ce_orig": 1.657909870147705, + "epoch": 0.557336976058667, + "kl_loss": 0.1475721150636673, + "loss_ib": 0.009470971301198006, + "step": 1938 + }, + { + "ce_ib": 3.5121572017669678, + "ce_orig": 0.7898868322372437, + "epoch": 0.557336976058667, + "kl_loss": 0.23313003778457642, + "loss_ib": 0.0058434573002159595, + "step": 1938 + }, + { + "ce_ib": 5.825277328491211, + "ce_orig": 1.079064965248108, + "epoch": 0.557336976058667, + "kl_loss": 0.18237462639808655, + "loss_ib": 0.007649023551493883, + "step": 1938 + }, + { + "ce_ib": 3.7814619541168213, + "ce_orig": 0.8045347929000854, + "epoch": 0.5576245596376447, + "kl_loss": 0.16451720893383026, + "loss_ib": 0.005426633637398481, + "step": 1939 + }, + { + "ce_ib": 2.8695623874664307, + "ce_orig": 0.4086380898952484, + "epoch": 0.5576245596376447, + "kl_loss": 0.17066530883312225, + "loss_ib": 0.004576215520501137, + "step": 1939 + }, + { + "ce_ib": 3.0014541149139404, + "ce_orig": 0.4150030016899109, + "epoch": 0.5576245596376447, + "kl_loss": 0.24444639682769775, + "loss_ib": 0.0054459176026284695, + "step": 1939 + }, + { + "ce_ib": 4.278745651245117, + "ce_orig": 0.5602519512176514, + "epoch": 0.5576245596376447, + "kl_loss": 0.25006240606307983, + "loss_ib": 0.006779369432479143, + "step": 1939 + }, + { + "epoch": 0.5579121432166223, + "grad_norm": 0.11354150623083115, + "learning_rate": 9.38093505411748e-06, + "loss": 0.8296, + "step": 1940 + }, + { + "ce_ib": 4.754982948303223, + "ce_orig": 0.9043082594871521, + "epoch": 0.5579121432166223, + "kl_loss": 0.22222253680229187, + "loss_ib": 0.006977207958698273, + "step": 1940 + }, + { + "ce_ib": 3.1008718013763428, + "ce_orig": 0.6609811782836914, + "epoch": 0.5579121432166223, + "kl_loss": 0.1551208347082138, + "loss_ib": 0.004652079660445452, + "step": 1940 + }, + { + "ce_ib": 5.148151874542236, + "ce_orig": 0.7034737467765808, + "epoch": 0.5579121432166223, + "kl_loss": 0.32634222507476807, + "loss_ib": 0.008411574177443981, + "step": 1940 + }, + { + "ce_ib": 10.941390037536621, + "ce_orig": 1.2599574327468872, + "epoch": 0.5579121432166223, + "kl_loss": 0.2286052703857422, + "loss_ib": 0.013227442279458046, + "step": 1940 + }, + { + "ce_ib": 7.19765567779541, + "ce_orig": 1.0609315633773804, + "epoch": 0.5581997267955999, + "kl_loss": 0.2000490128993988, + "loss_ib": 0.009198145940899849, + "step": 1941 + }, + { + "ce_ib": 3.3666741847991943, + "ce_orig": 0.7088072896003723, + "epoch": 0.5581997267955999, + "kl_loss": 0.17230737209320068, + "loss_ib": 0.005089747719466686, + "step": 1941 + }, + { + "ce_ib": 6.163748741149902, + "ce_orig": 1.036344051361084, + "epoch": 0.5581997267955999, + "kl_loss": 0.2598022222518921, + "loss_ib": 0.008761771023273468, + "step": 1941 + }, + { + "ce_ib": 4.131283760070801, + "ce_orig": 0.7375625967979431, + "epoch": 0.5581997267955999, + "kl_loss": 0.1777574121952057, + "loss_ib": 0.005908857565373182, + "step": 1941 + }, + { + "ce_ib": 4.538864612579346, + "ce_orig": 0.9160065054893494, + "epoch": 0.5584873103745777, + "kl_loss": 0.17552489042282104, + "loss_ib": 0.006294113118201494, + "step": 1942 + }, + { + "ce_ib": 2.258758783340454, + "ce_orig": 0.4297439754009247, + "epoch": 0.5584873103745777, + "kl_loss": 0.2720901668071747, + "loss_ib": 0.004979660268872976, + "step": 1942 + }, + { + "ce_ib": 6.058786392211914, + "ce_orig": 1.130829095840454, + "epoch": 0.5584873103745777, + "kl_loss": 0.22988750040531158, + "loss_ib": 0.008357660844922066, + "step": 1942 + }, + { + "ce_ib": 3.9205870628356934, + "ce_orig": 1.050699234008789, + "epoch": 0.5584873103745777, + "kl_loss": 0.16049782931804657, + "loss_ib": 0.00552556524053216, + "step": 1942 + }, + { + "ce_ib": 3.205777168273926, + "ce_orig": 0.426290363073349, + "epoch": 0.5587748939535553, + "kl_loss": 0.34285032749176025, + "loss_ib": 0.006634280551224947, + "step": 1943 + }, + { + "ce_ib": 3.6531386375427246, + "ce_orig": 0.6537418961524963, + "epoch": 0.5587748939535553, + "kl_loss": 0.2146739363670349, + "loss_ib": 0.00579987745732069, + "step": 1943 + }, + { + "ce_ib": 4.059100151062012, + "ce_orig": 0.9437558054924011, + "epoch": 0.5587748939535553, + "kl_loss": 0.15987905859947205, + "loss_ib": 0.005657890811562538, + "step": 1943 + }, + { + "ce_ib": 4.630014419555664, + "ce_orig": 0.7337749004364014, + "epoch": 0.5587748939535553, + "kl_loss": 0.31163904070854187, + "loss_ib": 0.007746404968202114, + "step": 1943 + }, + { + "ce_ib": 5.040141582489014, + "ce_orig": 0.7503706812858582, + "epoch": 0.5590624775325329, + "kl_loss": 0.2039007693529129, + "loss_ib": 0.007079149130731821, + "step": 1944 + }, + { + "ce_ib": 3.309403657913208, + "ce_orig": 0.6147379279136658, + "epoch": 0.5590624775325329, + "kl_loss": 0.20214751362800598, + "loss_ib": 0.005330878309905529, + "step": 1944 + }, + { + "ce_ib": 3.446329355239868, + "ce_orig": 0.61670982837677, + "epoch": 0.5590624775325329, + "kl_loss": 0.20748162269592285, + "loss_ib": 0.005521146114915609, + "step": 1944 + }, + { + "ce_ib": 4.511063575744629, + "ce_orig": 0.9238880276679993, + "epoch": 0.5590624775325329, + "kl_loss": 0.17485204339027405, + "loss_ib": 0.006259584333747625, + "step": 1944 + }, + { + "epoch": 0.5593500611115105, + "grad_norm": 0.12173520773649216, + "learning_rate": 9.377189270724492e-06, + "loss": 0.896, + "step": 1945 + }, + { + "ce_ib": 6.507874965667725, + "ce_orig": 0.6445823907852173, + "epoch": 0.5593500611115105, + "kl_loss": 0.29428672790527344, + "loss_ib": 0.009450742043554783, + "step": 1945 + }, + { + "ce_ib": 7.969465255737305, + "ce_orig": 1.6248737573623657, + "epoch": 0.5593500611115105, + "kl_loss": 0.1924789547920227, + "loss_ib": 0.009894254617393017, + "step": 1945 + }, + { + "ce_ib": 8.53795051574707, + "ce_orig": 1.648207426071167, + "epoch": 0.5593500611115105, + "kl_loss": 0.2450883835554123, + "loss_ib": 0.010988833382725716, + "step": 1945 + }, + { + "ce_ib": 5.957749843597412, + "ce_orig": 1.022748351097107, + "epoch": 0.5593500611115105, + "kl_loss": 0.17536038160324097, + "loss_ib": 0.007711353711783886, + "step": 1945 + }, + { + "ce_ib": 5.166660308837891, + "ce_orig": 1.0340142250061035, + "epoch": 0.5596376446904882, + "kl_loss": 0.21062946319580078, + "loss_ib": 0.007272955030202866, + "step": 1946 + }, + { + "ce_ib": 5.739360809326172, + "ce_orig": 0.8432859778404236, + "epoch": 0.5596376446904882, + "kl_loss": 0.1813448667526245, + "loss_ib": 0.007552809547632933, + "step": 1946 + }, + { + "ce_ib": 3.5078823566436768, + "ce_orig": 0.6890704035758972, + "epoch": 0.5596376446904882, + "kl_loss": 0.19745320081710815, + "loss_ib": 0.005482414271682501, + "step": 1946 + }, + { + "ce_ib": 5.156094074249268, + "ce_orig": 0.9947879910469055, + "epoch": 0.5596376446904882, + "kl_loss": 0.20295652747154236, + "loss_ib": 0.007185659371316433, + "step": 1946 + }, + { + "ce_ib": 5.824108123779297, + "ce_orig": 0.4990655481815338, + "epoch": 0.5599252282694658, + "kl_loss": 0.1985587179660797, + "loss_ib": 0.007809694856405258, + "step": 1947 + }, + { + "ce_ib": 0.6695483326911926, + "ce_orig": 0.1322779357433319, + "epoch": 0.5599252282694658, + "kl_loss": 0.4654156565666199, + "loss_ib": 0.005323704797774553, + "step": 1947 + }, + { + "ce_ib": 8.81364631652832, + "ce_orig": 1.402514934539795, + "epoch": 0.5599252282694658, + "kl_loss": 0.2827611565589905, + "loss_ib": 0.01164125744253397, + "step": 1947 + }, + { + "ce_ib": 3.307119607925415, + "ce_orig": 0.5449358224868774, + "epoch": 0.5599252282694658, + "kl_loss": 0.2450537085533142, + "loss_ib": 0.005757656414061785, + "step": 1947 + }, + { + "ce_ib": 5.520815849304199, + "ce_orig": 0.7765263319015503, + "epoch": 0.5602128118484434, + "kl_loss": 0.2116309404373169, + "loss_ib": 0.007637124974280596, + "step": 1948 + }, + { + "ce_ib": 6.154223442077637, + "ce_orig": 0.9919604659080505, + "epoch": 0.5602128118484434, + "kl_loss": 0.2171577513217926, + "loss_ib": 0.00832580029964447, + "step": 1948 + }, + { + "ce_ib": 2.002056121826172, + "ce_orig": 0.4453052580356598, + "epoch": 0.5602128118484434, + "kl_loss": 0.4559124708175659, + "loss_ib": 0.006561180576682091, + "step": 1948 + }, + { + "ce_ib": 1.4254610538482666, + "ce_orig": 0.26237672567367554, + "epoch": 0.5602128118484434, + "kl_loss": 0.4651191532611847, + "loss_ib": 0.006076652090996504, + "step": 1948 + }, + { + "ce_ib": 9.372055053710938, + "ce_orig": 1.944899320602417, + "epoch": 0.5605003954274211, + "kl_loss": 0.20772811770439148, + "loss_ib": 0.011449335142970085, + "step": 1949 + }, + { + "ce_ib": 3.2384655475616455, + "ce_orig": 0.551419734954834, + "epoch": 0.5605003954274211, + "kl_loss": 0.19436872005462646, + "loss_ib": 0.005182153079658747, + "step": 1949 + }, + { + "ce_ib": 10.017022132873535, + "ce_orig": 1.6119446754455566, + "epoch": 0.5605003954274211, + "kl_loss": 0.22372767329216003, + "loss_ib": 0.01225429866462946, + "step": 1949 + }, + { + "ce_ib": 3.6044728755950928, + "ce_orig": 0.7428186535835266, + "epoch": 0.5605003954274211, + "kl_loss": 0.17656241357326508, + "loss_ib": 0.0053700972348451614, + "step": 1949 + }, + { + "epoch": 0.5607879790063988, + "grad_norm": 0.13764934241771698, + "learning_rate": 9.373432941666582e-06, + "loss": 0.8726, + "step": 1950 + }, + { + "ce_ib": 5.5733113288879395, + "ce_orig": 1.06040620803833, + "epoch": 0.5607879790063988, + "kl_loss": 0.23042647540569305, + "loss_ib": 0.007877576164901257, + "step": 1950 + }, + { + "ce_ib": 4.640501499176025, + "ce_orig": 0.7323870658874512, + "epoch": 0.5607879790063988, + "kl_loss": 0.26961541175842285, + "loss_ib": 0.007336655631661415, + "step": 1950 + }, + { + "ce_ib": 6.980530738830566, + "ce_orig": 1.5620394945144653, + "epoch": 0.5607879790063988, + "kl_loss": 0.24542734026908875, + "loss_ib": 0.009434803389012814, + "step": 1950 + }, + { + "ce_ib": 4.435082912445068, + "ce_orig": 0.559454083442688, + "epoch": 0.5607879790063988, + "kl_loss": 0.3001989722251892, + "loss_ib": 0.007437072694301605, + "step": 1950 + }, + { + "ce_ib": 5.830046653747559, + "ce_orig": 1.1036546230316162, + "epoch": 0.5610755625853764, + "kl_loss": 0.1718643307685852, + "loss_ib": 0.007548689842224121, + "step": 1951 + }, + { + "ce_ib": 3.1599724292755127, + "ce_orig": 0.5915268659591675, + "epoch": 0.5610755625853764, + "kl_loss": 0.20919816195964813, + "loss_ib": 0.005251954309642315, + "step": 1951 + }, + { + "ce_ib": 4.4157843589782715, + "ce_orig": 1.294607400894165, + "epoch": 0.5610755625853764, + "kl_loss": 0.170917809009552, + "loss_ib": 0.0061249625869095325, + "step": 1951 + }, + { + "ce_ib": 4.775761127471924, + "ce_orig": 0.7333765625953674, + "epoch": 0.5610755625853764, + "kl_loss": 0.23736411333084106, + "loss_ib": 0.0071494015865027905, + "step": 1951 + }, + { + "ce_ib": 6.860382080078125, + "ce_orig": 0.7319337725639343, + "epoch": 0.561363146164354, + "kl_loss": 0.2919643819332123, + "loss_ib": 0.009780025109648705, + "step": 1952 + }, + { + "ce_ib": 4.966904640197754, + "ce_orig": 0.6298078298568726, + "epoch": 0.561363146164354, + "kl_loss": 0.2703355848789215, + "loss_ib": 0.00767026050016284, + "step": 1952 + }, + { + "ce_ib": 4.709525108337402, + "ce_orig": 0.5962979793548584, + "epoch": 0.561363146164354, + "kl_loss": 0.2725815773010254, + "loss_ib": 0.007435340899974108, + "step": 1952 + }, + { + "ce_ib": 7.591611385345459, + "ce_orig": 1.7110387086868286, + "epoch": 0.561363146164354, + "kl_loss": 0.7085400223731995, + "loss_ib": 0.014677011407911777, + "step": 1952 + }, + { + "ce_ib": 3.424950361251831, + "ce_orig": 0.6562435626983643, + "epoch": 0.5616507297433316, + "kl_loss": 0.28956568241119385, + "loss_ib": 0.006320607382804155, + "step": 1953 + }, + { + "ce_ib": 9.279152870178223, + "ce_orig": 0.6927914619445801, + "epoch": 0.5616507297433316, + "kl_loss": 0.20212432742118835, + "loss_ib": 0.011300395242869854, + "step": 1953 + }, + { + "ce_ib": 5.230459213256836, + "ce_orig": 0.4061637222766876, + "epoch": 0.5616507297433316, + "kl_loss": 0.26631632447242737, + "loss_ib": 0.007893622852861881, + "step": 1953 + }, + { + "ce_ib": 4.862950801849365, + "ce_orig": 0.6797504425048828, + "epoch": 0.5616507297433316, + "kl_loss": 0.31247395277023315, + "loss_ib": 0.007987690158188343, + "step": 1953 + }, + { + "ce_ib": 7.613481044769287, + "ce_orig": 1.2723121643066406, + "epoch": 0.5619383133223093, + "kl_loss": 0.22658246755599976, + "loss_ib": 0.009879305958747864, + "step": 1954 + }, + { + "ce_ib": 7.024825096130371, + "ce_orig": 1.1097939014434814, + "epoch": 0.5619383133223093, + "kl_loss": 0.17091816663742065, + "loss_ib": 0.008734006434679031, + "step": 1954 + }, + { + "ce_ib": 2.7863800525665283, + "ce_orig": 0.5768458247184753, + "epoch": 0.5619383133223093, + "kl_loss": 0.20055538415908813, + "loss_ib": 0.004791933577507734, + "step": 1954 + }, + { + "ce_ib": 3.9914278984069824, + "ce_orig": 0.5456915497779846, + "epoch": 0.5619383133223093, + "kl_loss": 0.30046433210372925, + "loss_ib": 0.006996070966124535, + "step": 1954 + }, + { + "epoch": 0.5622258969012869, + "grad_norm": 0.14414216578006744, + "learning_rate": 9.36966607599362e-06, + "loss": 0.828, + "step": 1955 + }, + { + "ce_ib": 4.35274600982666, + "ce_orig": 0.5877292156219482, + "epoch": 0.5622258969012869, + "kl_loss": 0.2220989465713501, + "loss_ib": 0.006573735270649195, + "step": 1955 + }, + { + "ce_ib": 4.19326639175415, + "ce_orig": 0.7570400834083557, + "epoch": 0.5622258969012869, + "kl_loss": 0.22635827958583832, + "loss_ib": 0.0064568486995995045, + "step": 1955 + }, + { + "ce_ib": 6.88547945022583, + "ce_orig": 0.5518658757209778, + "epoch": 0.5622258969012869, + "kl_loss": 0.7282689809799194, + "loss_ib": 0.014168169349431992, + "step": 1955 + }, + { + "ce_ib": 5.221278667449951, + "ce_orig": 1.0822886228561401, + "epoch": 0.5622258969012869, + "kl_loss": 0.24413618445396423, + "loss_ib": 0.007662640418857336, + "step": 1955 + }, + { + "ce_ib": 5.041411399841309, + "ce_orig": 0.7814905047416687, + "epoch": 0.5625134804802646, + "kl_loss": 0.3357033431529999, + "loss_ib": 0.008398444391787052, + "step": 1956 + }, + { + "ce_ib": 4.407586574554443, + "ce_orig": 0.6083753108978271, + "epoch": 0.5625134804802646, + "kl_loss": 0.2976855933666229, + "loss_ib": 0.00738444272428751, + "step": 1956 + }, + { + "ce_ib": 3.456092119216919, + "ce_orig": 0.5818278789520264, + "epoch": 0.5625134804802646, + "kl_loss": 0.21381860971450806, + "loss_ib": 0.00559427822008729, + "step": 1956 + }, + { + "ce_ib": 3.3865714073181152, + "ce_orig": 0.5109878778457642, + "epoch": 0.5625134804802646, + "kl_loss": 0.1840440034866333, + "loss_ib": 0.005227011162787676, + "step": 1956 + }, + { + "ce_ib": 5.505570411682129, + "ce_orig": 1.1070959568023682, + "epoch": 0.5628010640592422, + "kl_loss": 0.18553832173347473, + "loss_ib": 0.00736095430329442, + "step": 1957 + }, + { + "ce_ib": 4.772067070007324, + "ce_orig": 0.8959853649139404, + "epoch": 0.5628010640592422, + "kl_loss": 0.2142309695482254, + "loss_ib": 0.0069143762812018394, + "step": 1957 + }, + { + "ce_ib": 6.18085241317749, + "ce_orig": 1.1948823928833008, + "epoch": 0.5628010640592422, + "kl_loss": 0.2218266874551773, + "loss_ib": 0.008399119600653648, + "step": 1957 + }, + { + "ce_ib": 5.908140182495117, + "ce_orig": 1.0072635412216187, + "epoch": 0.5628010640592422, + "kl_loss": 0.1685742884874344, + "loss_ib": 0.007593883201479912, + "step": 1957 + }, + { + "ce_ib": 2.8653934001922607, + "ce_orig": 0.7492801547050476, + "epoch": 0.5630886476382199, + "kl_loss": 0.1760227084159851, + "loss_ib": 0.004625620320439339, + "step": 1958 + }, + { + "ce_ib": 3.984140157699585, + "ce_orig": 0.9309844970703125, + "epoch": 0.5630886476382199, + "kl_loss": 0.15883925557136536, + "loss_ib": 0.005572532303631306, + "step": 1958 + }, + { + "ce_ib": 4.55318546295166, + "ce_orig": 0.6771810054779053, + "epoch": 0.5630886476382199, + "kl_loss": 0.2006765604019165, + "loss_ib": 0.006559951230883598, + "step": 1958 + }, + { + "ce_ib": 4.834569454193115, + "ce_orig": 0.7299646735191345, + "epoch": 0.5630886476382199, + "kl_loss": 0.19424204528331757, + "loss_ib": 0.006776989437639713, + "step": 1958 + }, + { + "ce_ib": 5.048141002655029, + "ce_orig": 0.6003642082214355, + "epoch": 0.5633762312171975, + "kl_loss": 0.22421041131019592, + "loss_ib": 0.007290245033800602, + "step": 1959 + }, + { + "ce_ib": 2.5761818885803223, + "ce_orig": 0.5762946009635925, + "epoch": 0.5633762312171975, + "kl_loss": 0.11957596242427826, + "loss_ib": 0.0037719416432082653, + "step": 1959 + }, + { + "ce_ib": 2.9306375980377197, + "ce_orig": 0.4841289818286896, + "epoch": 0.5633762312171975, + "kl_loss": 0.18257150053977966, + "loss_ib": 0.00475635239854455, + "step": 1959 + }, + { + "ce_ib": 4.837977886199951, + "ce_orig": 0.9663442373275757, + "epoch": 0.5633762312171975, + "kl_loss": 0.21835489571094513, + "loss_ib": 0.007021526340395212, + "step": 1959 + }, + { + "epoch": 0.5636638147961751, + "grad_norm": 0.11777511239051819, + "learning_rate": 9.365888682780862e-06, + "loss": 0.8707, + "step": 1960 + }, + { + "ce_ib": 1.4187829494476318, + "ce_orig": 0.19470104575157166, + "epoch": 0.5636638147961751, + "kl_loss": 0.40988442301750183, + "loss_ib": 0.0055176266469061375, + "step": 1960 + }, + { + "ce_ib": 5.243957042694092, + "ce_orig": 0.5946865677833557, + "epoch": 0.5636638147961751, + "kl_loss": 0.31962987780570984, + "loss_ib": 0.008440256118774414, + "step": 1960 + }, + { + "ce_ib": 6.228485107421875, + "ce_orig": 0.5695993304252625, + "epoch": 0.5636638147961751, + "kl_loss": 0.3002013564109802, + "loss_ib": 0.009230498224496841, + "step": 1960 + }, + { + "ce_ib": 6.686850547790527, + "ce_orig": 1.0680514574050903, + "epoch": 0.5636638147961751, + "kl_loss": 0.16701674461364746, + "loss_ib": 0.008357018232345581, + "step": 1960 + }, + { + "ce_ib": 5.57006311416626, + "ce_orig": 0.7003214955329895, + "epoch": 0.5639513983751527, + "kl_loss": 0.22560706734657288, + "loss_ib": 0.007826133631169796, + "step": 1961 + }, + { + "ce_ib": 3.7655398845672607, + "ce_orig": 0.7719546556472778, + "epoch": 0.5639513983751527, + "kl_loss": 0.1541329324245453, + "loss_ib": 0.005306868813931942, + "step": 1961 + }, + { + "ce_ib": 3.8506884574890137, + "ce_orig": 0.6675884127616882, + "epoch": 0.5639513983751527, + "kl_loss": 0.1861284077167511, + "loss_ib": 0.005711972713470459, + "step": 1961 + }, + { + "ce_ib": 4.970664024353027, + "ce_orig": 0.8154793381690979, + "epoch": 0.5639513983751527, + "kl_loss": 0.26470455527305603, + "loss_ib": 0.007617709692567587, + "step": 1961 + }, + { + "ce_ib": 7.087484836578369, + "ce_orig": 0.9953211545944214, + "epoch": 0.5642389819541305, + "kl_loss": 0.23596073687076569, + "loss_ib": 0.009447092190384865, + "step": 1962 + }, + { + "ce_ib": 3.6978096961975098, + "ce_orig": 0.8677507638931274, + "epoch": 0.5642389819541305, + "kl_loss": 0.12703941762447357, + "loss_ib": 0.004968203604221344, + "step": 1962 + }, + { + "ce_ib": 5.717005252838135, + "ce_orig": 1.2173904180526733, + "epoch": 0.5642389819541305, + "kl_loss": 0.21575793623924255, + "loss_ib": 0.007874583825469017, + "step": 1962 + }, + { + "ce_ib": 3.6788642406463623, + "ce_orig": 0.8600021004676819, + "epoch": 0.5642389819541305, + "kl_loss": 0.14753463864326477, + "loss_ib": 0.005154210142791271, + "step": 1962 + }, + { + "ce_ib": 3.4018752574920654, + "ce_orig": 0.8169087767601013, + "epoch": 0.5645265655331081, + "kl_loss": 0.18081295490264893, + "loss_ib": 0.00521000474691391, + "step": 1963 + }, + { + "ce_ib": 4.7397894859313965, + "ce_orig": 0.7717341184616089, + "epoch": 0.5645265655331081, + "kl_loss": 0.199485182762146, + "loss_ib": 0.00673464173451066, + "step": 1963 + }, + { + "ce_ib": 4.653703689575195, + "ce_orig": 0.7700741291046143, + "epoch": 0.5645265655331081, + "kl_loss": 0.2130657583475113, + "loss_ib": 0.006784361321479082, + "step": 1963 + }, + { + "ce_ib": 4.431517601013184, + "ce_orig": 0.9778881669044495, + "epoch": 0.5645265655331081, + "kl_loss": 0.19082719087600708, + "loss_ib": 0.006339789368212223, + "step": 1963 + }, + { + "ce_ib": 7.125487327575684, + "ce_orig": 0.6698811054229736, + "epoch": 0.5648141491120857, + "kl_loss": 0.3037925362586975, + "loss_ib": 0.010163411498069763, + "step": 1964 + }, + { + "ce_ib": 2.7644705772399902, + "ce_orig": 0.27121105790138245, + "epoch": 0.5648141491120857, + "kl_loss": 0.17889803647994995, + "loss_ib": 0.004553450737148523, + "step": 1964 + }, + { + "ce_ib": 3.524197578430176, + "ce_orig": 0.6075092554092407, + "epoch": 0.5648141491120857, + "kl_loss": 0.3592822551727295, + "loss_ib": 0.00711701950058341, + "step": 1964 + }, + { + "ce_ib": 6.147427558898926, + "ce_orig": 0.9962998628616333, + "epoch": 0.5648141491120857, + "kl_loss": 0.2286912351846695, + "loss_ib": 0.008434339426457882, + "step": 1964 + }, + { + "epoch": 0.5651017326910633, + "grad_norm": 0.12749658524990082, + "learning_rate": 9.36210077112892e-06, + "loss": 0.802, + "step": 1965 + }, + { + "ce_ib": 2.299947738647461, + "ce_orig": 0.4007086753845215, + "epoch": 0.5651017326910633, + "kl_loss": 0.22868286073207855, + "loss_ib": 0.004586776252835989, + "step": 1965 + }, + { + "ce_ib": 6.62831974029541, + "ce_orig": 1.134644627571106, + "epoch": 0.5651017326910633, + "kl_loss": 0.24236369132995605, + "loss_ib": 0.009051956236362457, + "step": 1965 + }, + { + "ce_ib": 3.9246878623962402, + "ce_orig": 0.5810030102729797, + "epoch": 0.5651017326910633, + "kl_loss": 0.26141154766082764, + "loss_ib": 0.006538803223520517, + "step": 1965 + }, + { + "ce_ib": 4.731905937194824, + "ce_orig": 0.6843492984771729, + "epoch": 0.5651017326910633, + "kl_loss": 0.22076337039470673, + "loss_ib": 0.006939539685845375, + "step": 1965 + }, + { + "ce_ib": 5.879118919372559, + "ce_orig": 0.7771980166435242, + "epoch": 0.565389316270041, + "kl_loss": 0.21585160493850708, + "loss_ib": 0.008037635125219822, + "step": 1966 + }, + { + "ce_ib": 4.194392681121826, + "ce_orig": 0.7197152376174927, + "epoch": 0.565389316270041, + "kl_loss": 0.2818256616592407, + "loss_ib": 0.007012649439275265, + "step": 1966 + }, + { + "ce_ib": 6.469331741333008, + "ce_orig": 0.8204442262649536, + "epoch": 0.565389316270041, + "kl_loss": 0.16121214628219604, + "loss_ib": 0.008081452921032906, + "step": 1966 + }, + { + "ce_ib": 5.347432613372803, + "ce_orig": 0.8207641839981079, + "epoch": 0.565389316270041, + "kl_loss": 0.16065415740013123, + "loss_ib": 0.006953973788768053, + "step": 1966 + }, + { + "ce_ib": 5.6471076011657715, + "ce_orig": 0.8323034048080444, + "epoch": 0.5656768998490186, + "kl_loss": 0.22340193390846252, + "loss_ib": 0.007881127297878265, + "step": 1967 + }, + { + "ce_ib": 3.217761754989624, + "ce_orig": 0.6444844603538513, + "epoch": 0.5656768998490186, + "kl_loss": 0.17131659388542175, + "loss_ib": 0.0049309274181723595, + "step": 1967 + }, + { + "ce_ib": 7.174370765686035, + "ce_orig": 1.3534836769104004, + "epoch": 0.5656768998490186, + "kl_loss": 0.15459537506103516, + "loss_ib": 0.008720324374735355, + "step": 1967 + }, + { + "ce_ib": 4.484457969665527, + "ce_orig": 0.7245428562164307, + "epoch": 0.5656768998490186, + "kl_loss": 0.25370633602142334, + "loss_ib": 0.007021521218121052, + "step": 1967 + }, + { + "ce_ib": 6.128870487213135, + "ce_orig": 0.6025968194007874, + "epoch": 0.5659644834279962, + "kl_loss": 0.2562224864959717, + "loss_ib": 0.008691095747053623, + "step": 1968 + }, + { + "ce_ib": 8.38642406463623, + "ce_orig": 1.2454034090042114, + "epoch": 0.5659644834279962, + "kl_loss": 0.25671201944351196, + "loss_ib": 0.010953543707728386, + "step": 1968 + }, + { + "ce_ib": 2.751986265182495, + "ce_orig": 0.3705005645751953, + "epoch": 0.5659644834279962, + "kl_loss": 0.5028501749038696, + "loss_ib": 0.0077804881148040295, + "step": 1968 + }, + { + "ce_ib": 5.967759132385254, + "ce_orig": 0.9296004176139832, + "epoch": 0.5659644834279962, + "kl_loss": 0.16728214919567108, + "loss_ib": 0.007640581112354994, + "step": 1968 + }, + { + "ce_ib": 5.655499458312988, + "ce_orig": 0.9303698539733887, + "epoch": 0.566252067006974, + "kl_loss": 0.2030981481075287, + "loss_ib": 0.007686481345444918, + "step": 1969 + }, + { + "ce_ib": 3.7224225997924805, + "ce_orig": 0.40809500217437744, + "epoch": 0.566252067006974, + "kl_loss": 0.24028897285461426, + "loss_ib": 0.006125312298536301, + "step": 1969 + }, + { + "ce_ib": 4.018906593322754, + "ce_orig": 0.6669531464576721, + "epoch": 0.566252067006974, + "kl_loss": 0.22310064733028412, + "loss_ib": 0.006249913014471531, + "step": 1969 + }, + { + "ce_ib": 5.23858118057251, + "ce_orig": 0.5927353501319885, + "epoch": 0.566252067006974, + "kl_loss": 0.2642560601234436, + "loss_ib": 0.007881141267716885, + "step": 1969 + }, + { + "epoch": 0.5665396505859516, + "grad_norm": 0.11762264370918274, + "learning_rate": 9.358302350163758e-06, + "loss": 0.8599, + "step": 1970 + }, + { + "ce_ib": 4.985979080200195, + "ce_orig": 0.9551448225975037, + "epoch": 0.5665396505859516, + "kl_loss": 0.399181604385376, + "loss_ib": 0.008977795019745827, + "step": 1970 + }, + { + "ce_ib": 4.591373443603516, + "ce_orig": 0.8099931478500366, + "epoch": 0.5665396505859516, + "kl_loss": 0.19217705726623535, + "loss_ib": 0.006513143423944712, + "step": 1970 + }, + { + "ce_ib": 6.659412860870361, + "ce_orig": 1.127098798751831, + "epoch": 0.5665396505859516, + "kl_loss": 0.21108250319957733, + "loss_ib": 0.008770237676799297, + "step": 1970 + }, + { + "ce_ib": 5.537312030792236, + "ce_orig": 0.7628366947174072, + "epoch": 0.5665396505859516, + "kl_loss": 0.20539790391921997, + "loss_ib": 0.00759129086509347, + "step": 1970 + }, + { + "ce_ib": 3.149667978286743, + "ce_orig": 0.7977863550186157, + "epoch": 0.5668272341649292, + "kl_loss": 0.19728781282901764, + "loss_ib": 0.005122545640915632, + "step": 1971 + }, + { + "ce_ib": 3.710076332092285, + "ce_orig": 0.5578461289405823, + "epoch": 0.5668272341649292, + "kl_loss": 0.20799040794372559, + "loss_ib": 0.005789980757981539, + "step": 1971 + }, + { + "ce_ib": 6.388326644897461, + "ce_orig": 1.2302563190460205, + "epoch": 0.5668272341649292, + "kl_loss": 0.23331697285175323, + "loss_ib": 0.008721495978534222, + "step": 1971 + }, + { + "ce_ib": 5.520804405212402, + "ce_orig": 0.9213300943374634, + "epoch": 0.5668272341649292, + "kl_loss": 0.288860559463501, + "loss_ib": 0.008409409783780575, + "step": 1971 + }, + { + "ce_ib": 6.34333610534668, + "ce_orig": 0.6838300228118896, + "epoch": 0.5671148177439068, + "kl_loss": 0.2031092792749405, + "loss_ib": 0.008374428376555443, + "step": 1972 + }, + { + "ce_ib": 2.3525428771972656, + "ce_orig": 0.6577918529510498, + "epoch": 0.5671148177439068, + "kl_loss": 0.1439896523952484, + "loss_ib": 0.003792439354583621, + "step": 1972 + }, + { + "ce_ib": 5.995169162750244, + "ce_orig": 1.369937777519226, + "epoch": 0.5671148177439068, + "kl_loss": 0.25531530380249023, + "loss_ib": 0.008548322133719921, + "step": 1972 + }, + { + "ce_ib": 9.326186180114746, + "ce_orig": 1.1996033191680908, + "epoch": 0.5671148177439068, + "kl_loss": 0.2241915613412857, + "loss_ib": 0.01156810112297535, + "step": 1972 + }, + { + "ce_ib": 4.266485214233398, + "ce_orig": 0.7407187223434448, + "epoch": 0.5674024013228844, + "kl_loss": 0.15525701642036438, + "loss_ib": 0.005819055251777172, + "step": 1973 + }, + { + "ce_ib": 6.443141937255859, + "ce_orig": 1.002044916152954, + "epoch": 0.5674024013228844, + "kl_loss": 0.19105428457260132, + "loss_ib": 0.008353685028851032, + "step": 1973 + }, + { + "ce_ib": 5.837249279022217, + "ce_orig": 1.0835412740707397, + "epoch": 0.5674024013228844, + "kl_loss": 0.2187289148569107, + "loss_ib": 0.008024537935853004, + "step": 1973 + }, + { + "ce_ib": 6.973402976989746, + "ce_orig": 1.2432231903076172, + "epoch": 0.5674024013228844, + "kl_loss": 0.20823659002780914, + "loss_ib": 0.009055769070982933, + "step": 1973 + }, + { + "ce_ib": 4.132625102996826, + "ce_orig": 0.7623564004898071, + "epoch": 0.5676899849018621, + "kl_loss": 0.155003622174263, + "loss_ib": 0.005682661198079586, + "step": 1974 + }, + { + "ce_ib": 4.418184757232666, + "ce_orig": 0.865897536277771, + "epoch": 0.5676899849018621, + "kl_loss": 0.23717643320560455, + "loss_ib": 0.006789948791265488, + "step": 1974 + }, + { + "ce_ib": 8.083592414855957, + "ce_orig": 1.3602644205093384, + "epoch": 0.5676899849018621, + "kl_loss": 0.18909840285778046, + "loss_ib": 0.009974576532840729, + "step": 1974 + }, + { + "ce_ib": 4.652640342712402, + "ce_orig": 0.973773717880249, + "epoch": 0.5676899849018621, + "kl_loss": 0.19918793439865112, + "loss_ib": 0.0066445195116102695, + "step": 1974 + }, + { + "epoch": 0.5679775684808397, + "grad_norm": 0.1255088448524475, + "learning_rate": 9.35449342903665e-06, + "loss": 0.9833, + "step": 1975 + }, + { + "ce_ib": 8.040228843688965, + "ce_orig": 0.7436036467552185, + "epoch": 0.5679775684808397, + "kl_loss": 0.2832046449184418, + "loss_ib": 0.010872275568544865, + "step": 1975 + }, + { + "ce_ib": 6.236600875854492, + "ce_orig": 1.1183065176010132, + "epoch": 0.5679775684808397, + "kl_loss": 0.26579269766807556, + "loss_ib": 0.00889452826231718, + "step": 1975 + }, + { + "ce_ib": 7.973631381988525, + "ce_orig": 1.1876856088638306, + "epoch": 0.5679775684808397, + "kl_loss": 0.23983827233314514, + "loss_ib": 0.010372013784945011, + "step": 1975 + }, + { + "ce_ib": 5.137147426605225, + "ce_orig": 0.3782559335231781, + "epoch": 0.5679775684808397, + "kl_loss": 0.6367517709732056, + "loss_ib": 0.011504664085805416, + "step": 1975 + }, + { + "ce_ib": 8.592161178588867, + "ce_orig": 1.8806641101837158, + "epoch": 0.5682651520598174, + "kl_loss": 0.20329293608665466, + "loss_ib": 0.010625090450048447, + "step": 1976 + }, + { + "ce_ib": 7.720051288604736, + "ce_orig": 1.0171343088150024, + "epoch": 0.5682651520598174, + "kl_loss": 0.262137234210968, + "loss_ib": 0.010341423563659191, + "step": 1976 + }, + { + "ce_ib": 4.9487762451171875, + "ce_orig": 0.7323424816131592, + "epoch": 0.5682651520598174, + "kl_loss": 0.313734769821167, + "loss_ib": 0.008086123503744602, + "step": 1976 + }, + { + "ce_ib": 3.0523829460144043, + "ce_orig": 0.44585099816322327, + "epoch": 0.5682651520598174, + "kl_loss": 0.29260846972465515, + "loss_ib": 0.005978467408567667, + "step": 1976 + }, + { + "ce_ib": 4.261775493621826, + "ce_orig": 0.7914097905158997, + "epoch": 0.568552735638795, + "kl_loss": 0.2280750870704651, + "loss_ib": 0.006542526185512543, + "step": 1977 + }, + { + "ce_ib": 4.111820697784424, + "ce_orig": 0.8065600991249084, + "epoch": 0.568552735638795, + "kl_loss": 0.19572162628173828, + "loss_ib": 0.006069036666303873, + "step": 1977 + }, + { + "ce_ib": 3.917246103286743, + "ce_orig": 0.6122181415557861, + "epoch": 0.568552735638795, + "kl_loss": 0.27111905813217163, + "loss_ib": 0.006628436967730522, + "step": 1977 + }, + { + "ce_ib": 5.574148178100586, + "ce_orig": 0.9698813557624817, + "epoch": 0.568552735638795, + "kl_loss": 0.2713298797607422, + "loss_ib": 0.008287446573376656, + "step": 1977 + }, + { + "ce_ib": 4.16474723815918, + "ce_orig": 0.9032987952232361, + "epoch": 0.5688403192177727, + "kl_loss": 0.1838064193725586, + "loss_ib": 0.006002811249345541, + "step": 1978 + }, + { + "ce_ib": 4.287901401519775, + "ce_orig": 0.8550389409065247, + "epoch": 0.5688403192177727, + "kl_loss": 0.2831040322780609, + "loss_ib": 0.007118941284716129, + "step": 1978 + }, + { + "ce_ib": 8.180766105651855, + "ce_orig": 1.2101643085479736, + "epoch": 0.5688403192177727, + "kl_loss": 0.26428651809692383, + "loss_ib": 0.010823630727827549, + "step": 1978 + }, + { + "ce_ib": 4.760415077209473, + "ce_orig": 0.8011019825935364, + "epoch": 0.5688403192177727, + "kl_loss": 0.3007705807685852, + "loss_ib": 0.007768120616674423, + "step": 1978 + }, + { + "ce_ib": 5.779112339019775, + "ce_orig": 1.2069902420043945, + "epoch": 0.5691279027967503, + "kl_loss": 0.2669152021408081, + "loss_ib": 0.008448264561593533, + "step": 1979 + }, + { + "ce_ib": 3.393887519836426, + "ce_orig": 0.5832438468933105, + "epoch": 0.5691279027967503, + "kl_loss": 0.146831214427948, + "loss_ib": 0.004862199537456036, + "step": 1979 + }, + { + "ce_ib": 6.103104591369629, + "ce_orig": 1.0624935626983643, + "epoch": 0.5691279027967503, + "kl_loss": 0.34160977602005005, + "loss_ib": 0.009519202634692192, + "step": 1979 + }, + { + "ce_ib": 2.8118221759796143, + "ce_orig": 0.5914442539215088, + "epoch": 0.5691279027967503, + "kl_loss": 0.197896808385849, + "loss_ib": 0.004790790379047394, + "step": 1979 + }, + { + "epoch": 0.5694154863757279, + "grad_norm": 0.12583377957344055, + "learning_rate": 9.35067401692417e-06, + "loss": 0.9343, + "step": 1980 + }, + { + "ce_ib": 6.383730411529541, + "ce_orig": 1.111653208732605, + "epoch": 0.5694154863757279, + "kl_loss": 0.22049827873706818, + "loss_ib": 0.008588713593780994, + "step": 1980 + }, + { + "ce_ib": 4.164979934692383, + "ce_orig": 0.4715644121170044, + "epoch": 0.5694154863757279, + "kl_loss": 0.19932615756988525, + "loss_ib": 0.0061582415364682674, + "step": 1980 + }, + { + "ce_ib": 3.7962656021118164, + "ce_orig": 0.47578591108322144, + "epoch": 0.5694154863757279, + "kl_loss": 0.4800046980381012, + "loss_ib": 0.008596313185989857, + "step": 1980 + }, + { + "ce_ib": 4.593181610107422, + "ce_orig": 0.6127696633338928, + "epoch": 0.5694154863757279, + "kl_loss": 0.22612276673316956, + "loss_ib": 0.006854408886283636, + "step": 1980 + }, + { + "ce_ib": 5.146809101104736, + "ce_orig": 0.36611780524253845, + "epoch": 0.5697030699547055, + "kl_loss": 0.35808759927749634, + "loss_ib": 0.008727684617042542, + "step": 1981 + }, + { + "ce_ib": 6.117854595184326, + "ce_orig": 0.8459972739219666, + "epoch": 0.5697030699547055, + "kl_loss": 0.2890790104866028, + "loss_ib": 0.009008645080029964, + "step": 1981 + }, + { + "ce_ib": 4.888398170471191, + "ce_orig": 0.5127837657928467, + "epoch": 0.5697030699547055, + "kl_loss": 0.24100396037101746, + "loss_ib": 0.007298437878489494, + "step": 1981 + }, + { + "ce_ib": 4.431370735168457, + "ce_orig": 0.4488409757614136, + "epoch": 0.5697030699547055, + "kl_loss": 0.26973119378089905, + "loss_ib": 0.00712868245318532, + "step": 1981 + }, + { + "ce_ib": 2.463197946548462, + "ce_orig": 0.5498092770576477, + "epoch": 0.5699906535336833, + "kl_loss": 0.12848663330078125, + "loss_ib": 0.00374806416220963, + "step": 1982 + }, + { + "ce_ib": 4.813397407531738, + "ce_orig": 0.9176458120346069, + "epoch": 0.5699906535336833, + "kl_loss": 0.23598945140838623, + "loss_ib": 0.007173291873186827, + "step": 1982 + }, + { + "ce_ib": 6.061206340789795, + "ce_orig": 1.3012479543685913, + "epoch": 0.5699906535336833, + "kl_loss": 0.15754292905330658, + "loss_ib": 0.007636635564267635, + "step": 1982 + }, + { + "ce_ib": 3.3574044704437256, + "ce_orig": 0.5007990598678589, + "epoch": 0.5699906535336833, + "kl_loss": 0.2878541648387909, + "loss_ib": 0.00623594643548131, + "step": 1982 + }, + { + "ce_ib": 4.542166233062744, + "ce_orig": 1.2737343311309814, + "epoch": 0.5702782371126609, + "kl_loss": 0.16203643381595612, + "loss_ib": 0.006162530742585659, + "step": 1983 + }, + { + "ce_ib": 5.179211139678955, + "ce_orig": 0.9539887309074402, + "epoch": 0.5702782371126609, + "kl_loss": 0.34270644187927246, + "loss_ib": 0.008606275543570518, + "step": 1983 + }, + { + "ce_ib": 4.461246013641357, + "ce_orig": 0.7930008172988892, + "epoch": 0.5702782371126609, + "kl_loss": 0.3119294345378876, + "loss_ib": 0.007580540142953396, + "step": 1983 + }, + { + "ce_ib": 3.8203072547912598, + "ce_orig": 0.4939483106136322, + "epoch": 0.5702782371126609, + "kl_loss": 0.18022455275058746, + "loss_ib": 0.005622552707791328, + "step": 1983 + }, + { + "ce_ib": 3.5545308589935303, + "ce_orig": 0.4724838137626648, + "epoch": 0.5705658206916385, + "kl_loss": 0.17989501357078552, + "loss_ib": 0.005353481043130159, + "step": 1984 + }, + { + "ce_ib": 5.211380958557129, + "ce_orig": 0.8598366379737854, + "epoch": 0.5705658206916385, + "kl_loss": 0.20496268570423126, + "loss_ib": 0.007261008024215698, + "step": 1984 + }, + { + "ce_ib": 6.27523136138916, + "ce_orig": 1.1058155298233032, + "epoch": 0.5705658206916385, + "kl_loss": 0.23712310194969177, + "loss_ib": 0.008646462112665176, + "step": 1984 + }, + { + "ce_ib": 5.946166038513184, + "ce_orig": 0.9420840740203857, + "epoch": 0.5705658206916385, + "kl_loss": 0.23133786022663116, + "loss_ib": 0.008259544149041176, + "step": 1984 + }, + { + "epoch": 0.5708534042706161, + "grad_norm": 0.12770475447177887, + "learning_rate": 9.346844123028172e-06, + "loss": 0.854, + "step": 1985 + }, + { + "ce_ib": 5.662142276763916, + "ce_orig": 0.8796694874763489, + "epoch": 0.5708534042706161, + "kl_loss": 0.19070428609848022, + "loss_ib": 0.007569185458123684, + "step": 1985 + }, + { + "ce_ib": 4.19233512878418, + "ce_orig": 0.9024421572685242, + "epoch": 0.5708534042706161, + "kl_loss": 0.2900976836681366, + "loss_ib": 0.007093311753123999, + "step": 1985 + }, + { + "ce_ib": 7.417691707611084, + "ce_orig": 1.1089247465133667, + "epoch": 0.5708534042706161, + "kl_loss": 0.217866450548172, + "loss_ib": 0.009596356190741062, + "step": 1985 + }, + { + "ce_ib": 6.199939727783203, + "ce_orig": 1.012454867362976, + "epoch": 0.5708534042706161, + "kl_loss": 0.22154691815376282, + "loss_ib": 0.008415409363806248, + "step": 1985 + }, + { + "ce_ib": 4.7033257484436035, + "ce_orig": 0.695756733417511, + "epoch": 0.5711409878495938, + "kl_loss": 0.19042037427425385, + "loss_ib": 0.006607529241591692, + "step": 1986 + }, + { + "ce_ib": 5.706419467926025, + "ce_orig": 0.5783017873764038, + "epoch": 0.5711409878495938, + "kl_loss": 0.16660155355930328, + "loss_ib": 0.007372434716671705, + "step": 1986 + }, + { + "ce_ib": 6.473813056945801, + "ce_orig": 0.6710724830627441, + "epoch": 0.5711409878495938, + "kl_loss": 0.48526352643966675, + "loss_ib": 0.011326448991894722, + "step": 1986 + }, + { + "ce_ib": 5.012685775756836, + "ce_orig": 0.8257612586021423, + "epoch": 0.5711409878495938, + "kl_loss": 0.22710269689559937, + "loss_ib": 0.007283712271600962, + "step": 1986 + }, + { + "ce_ib": 4.048294544219971, + "ce_orig": 0.5852899551391602, + "epoch": 0.5714285714285714, + "kl_loss": 0.13435371220111847, + "loss_ib": 0.00539183197543025, + "step": 1987 + }, + { + "ce_ib": 5.982120037078857, + "ce_orig": 0.9091359376907349, + "epoch": 0.5714285714285714, + "kl_loss": 0.17164137959480286, + "loss_ib": 0.007698533590883017, + "step": 1987 + }, + { + "ce_ib": 5.078379154205322, + "ce_orig": 1.0537258386611938, + "epoch": 0.5714285714285714, + "kl_loss": 0.17836973071098328, + "loss_ib": 0.006862076465040445, + "step": 1987 + }, + { + "ce_ib": 3.4556667804718018, + "ce_orig": 0.6418643593788147, + "epoch": 0.5714285714285714, + "kl_loss": 0.23529046773910522, + "loss_ib": 0.005808571353554726, + "step": 1987 + }, + { + "ce_ib": 3.838954448699951, + "ce_orig": 0.5713245868682861, + "epoch": 0.571716155007549, + "kl_loss": 0.14425553381443024, + "loss_ib": 0.005281509831547737, + "step": 1988 + }, + { + "ce_ib": 3.5798120498657227, + "ce_orig": 0.308655709028244, + "epoch": 0.571716155007549, + "kl_loss": 0.49981340765953064, + "loss_ib": 0.008577946573495865, + "step": 1988 + }, + { + "ce_ib": 3.3572182655334473, + "ce_orig": 0.7377309203147888, + "epoch": 0.571716155007549, + "kl_loss": 0.16723394393920898, + "loss_ib": 0.00502955773845315, + "step": 1988 + }, + { + "ce_ib": 4.432819366455078, + "ce_orig": 0.8372645974159241, + "epoch": 0.571716155007549, + "kl_loss": 0.2893436551094055, + "loss_ib": 0.007326256018131971, + "step": 1988 + }, + { + "ce_ib": 3.809474229812622, + "ce_orig": 0.7397820353507996, + "epoch": 0.5720037385865268, + "kl_loss": 0.1930888444185257, + "loss_ib": 0.00574036268517375, + "step": 1989 + }, + { + "ce_ib": 3.7399144172668457, + "ce_orig": 0.5138627886772156, + "epoch": 0.5720037385865268, + "kl_loss": 0.2592537999153137, + "loss_ib": 0.006332451943308115, + "step": 1989 + }, + { + "ce_ib": 3.4589803218841553, + "ce_orig": 0.6595289707183838, + "epoch": 0.5720037385865268, + "kl_loss": 0.1255977749824524, + "loss_ib": 0.004714957904070616, + "step": 1989 + }, + { + "ce_ib": 4.204591751098633, + "ce_orig": 0.8881221413612366, + "epoch": 0.5720037385865268, + "kl_loss": 0.17572838068008423, + "loss_ib": 0.005961875896900892, + "step": 1989 + }, + { + "epoch": 0.5722913221655044, + "grad_norm": 0.15225893259048462, + "learning_rate": 9.343003756575758e-06, + "loss": 0.8669, + "step": 1990 + }, + { + "ce_ib": 3.736387014389038, + "ce_orig": 0.4147120714187622, + "epoch": 0.5722913221655044, + "kl_loss": 0.23932798206806183, + "loss_ib": 0.006129667162895203, + "step": 1990 + }, + { + "ce_ib": 4.791964530944824, + "ce_orig": 1.0321811437606812, + "epoch": 0.5722913221655044, + "kl_loss": 0.1818554401397705, + "loss_ib": 0.00661051832139492, + "step": 1990 + }, + { + "ce_ib": 6.760359287261963, + "ce_orig": 1.4710983037948608, + "epoch": 0.5722913221655044, + "kl_loss": 0.12290021777153015, + "loss_ib": 0.007989360950887203, + "step": 1990 + }, + { + "ce_ib": 4.722901344299316, + "ce_orig": 0.6649274230003357, + "epoch": 0.5722913221655044, + "kl_loss": 0.31279200315475464, + "loss_ib": 0.007850821129977703, + "step": 1990 + }, + { + "ce_ib": 4.941730976104736, + "ce_orig": 0.7840262651443481, + "epoch": 0.572578905744482, + "kl_loss": 0.22302401065826416, + "loss_ib": 0.007171971257776022, + "step": 1991 + }, + { + "ce_ib": 4.915649890899658, + "ce_orig": 1.1190143823623657, + "epoch": 0.572578905744482, + "kl_loss": 0.16858713328838348, + "loss_ib": 0.0066015212796628475, + "step": 1991 + }, + { + "ce_ib": 8.89372444152832, + "ce_orig": 1.5894157886505127, + "epoch": 0.572578905744482, + "kl_loss": 0.22975695133209229, + "loss_ib": 0.011191293597221375, + "step": 1991 + }, + { + "ce_ib": 4.597128391265869, + "ce_orig": 0.8089755177497864, + "epoch": 0.572578905744482, + "kl_loss": 0.30403149127960205, + "loss_ib": 0.007637443486601114, + "step": 1991 + }, + { + "ce_ib": 7.109074592590332, + "ce_orig": 0.5922656059265137, + "epoch": 0.5728664893234596, + "kl_loss": 0.34544605016708374, + "loss_ib": 0.010563535615801811, + "step": 1992 + }, + { + "ce_ib": 3.9104511737823486, + "ce_orig": 0.510415256023407, + "epoch": 0.5728664893234596, + "kl_loss": 0.23136720061302185, + "loss_ib": 0.006224123295396566, + "step": 1992 + }, + { + "ce_ib": 4.182595729827881, + "ce_orig": 0.814848005771637, + "epoch": 0.5728664893234596, + "kl_loss": 0.14320699870586395, + "loss_ib": 0.005614665802568197, + "step": 1992 + }, + { + "ce_ib": 3.3440380096435547, + "ce_orig": 0.6243037581443787, + "epoch": 0.5728664893234596, + "kl_loss": 0.1349714994430542, + "loss_ib": 0.004693753086030483, + "step": 1992 + }, + { + "ce_ib": 3.631105899810791, + "ce_orig": 0.6191436052322388, + "epoch": 0.5731540729024373, + "kl_loss": 0.20608216524124146, + "loss_ib": 0.005691927392035723, + "step": 1993 + }, + { + "ce_ib": 5.589832305908203, + "ce_orig": 1.1829005479812622, + "epoch": 0.5731540729024373, + "kl_loss": 0.22724345326423645, + "loss_ib": 0.007862267084419727, + "step": 1993 + }, + { + "ce_ib": 3.7915942668914795, + "ce_orig": 0.6931657791137695, + "epoch": 0.5731540729024373, + "kl_loss": 0.18231695890426636, + "loss_ib": 0.005614763591438532, + "step": 1993 + }, + { + "ce_ib": 4.375422954559326, + "ce_orig": 0.745322585105896, + "epoch": 0.5731540729024373, + "kl_loss": 0.2949674725532532, + "loss_ib": 0.007325096987187862, + "step": 1993 + }, + { + "ce_ib": 3.4465203285217285, + "ce_orig": 0.5611634850502014, + "epoch": 0.5734416564814149, + "kl_loss": 0.2395157516002655, + "loss_ib": 0.005841677542775869, + "step": 1994 + }, + { + "ce_ib": 8.060595512390137, + "ce_orig": 1.1999075412750244, + "epoch": 0.5734416564814149, + "kl_loss": 0.1809830665588379, + "loss_ib": 0.009870425797998905, + "step": 1994 + }, + { + "ce_ib": 3.0074267387390137, + "ce_orig": 0.6377060413360596, + "epoch": 0.5734416564814149, + "kl_loss": 0.21215245127677917, + "loss_ib": 0.00512895081192255, + "step": 1994 + }, + { + "ce_ib": 6.93182373046875, + "ce_orig": 0.8554105162620544, + "epoch": 0.5734416564814149, + "kl_loss": 0.2965483069419861, + "loss_ib": 0.009897306561470032, + "step": 1994 + }, + { + "epoch": 0.5737292400603925, + "grad_norm": 0.12339665740728378, + "learning_rate": 9.339152926819259e-06, + "loss": 0.84, + "step": 1995 + }, + { + "ce_ib": 6.209877967834473, + "ce_orig": 1.1827176809310913, + "epoch": 0.5737292400603925, + "kl_loss": 0.2422569990158081, + "loss_ib": 0.008632448501884937, + "step": 1995 + }, + { + "ce_ib": 4.090214252471924, + "ce_orig": 0.5631054639816284, + "epoch": 0.5737292400603925, + "kl_loss": 0.18600359559059143, + "loss_ib": 0.0059502506628632545, + "step": 1995 + }, + { + "ce_ib": 7.50163459777832, + "ce_orig": 1.0576012134552002, + "epoch": 0.5737292400603925, + "kl_loss": 0.19061043858528137, + "loss_ib": 0.009407739154994488, + "step": 1995 + }, + { + "ce_ib": 3.8708176612854004, + "ce_orig": 0.44926437735557556, + "epoch": 0.5737292400603925, + "kl_loss": 0.31955450773239136, + "loss_ib": 0.007066363003104925, + "step": 1995 + }, + { + "ce_ib": 3.8656487464904785, + "ce_orig": 0.6919336915016174, + "epoch": 0.5740168236393702, + "kl_loss": 0.20553775131702423, + "loss_ib": 0.005921026226133108, + "step": 1996 + }, + { + "ce_ib": 2.9704113006591797, + "ce_orig": 0.7503409385681152, + "epoch": 0.5740168236393702, + "kl_loss": 0.15444160997867584, + "loss_ib": 0.004514827858656645, + "step": 1996 + }, + { + "ce_ib": 2.5017213821411133, + "ce_orig": 0.47855257987976074, + "epoch": 0.5740168236393702, + "kl_loss": 0.17140816152095795, + "loss_ib": 0.004215802997350693, + "step": 1996 + }, + { + "ce_ib": 6.732548236846924, + "ce_orig": 1.4294027090072632, + "epoch": 0.5740168236393702, + "kl_loss": 0.3413958251476288, + "loss_ib": 0.010146507062017918, + "step": 1996 + }, + { + "ce_ib": 4.873874187469482, + "ce_orig": 0.8762377500534058, + "epoch": 0.5743044072183479, + "kl_loss": 0.1936822235584259, + "loss_ib": 0.006810695864260197, + "step": 1997 + }, + { + "ce_ib": 3.110563039779663, + "ce_orig": 0.5195457339286804, + "epoch": 0.5743044072183479, + "kl_loss": 0.12195809185504913, + "loss_ib": 0.004330143798142672, + "step": 1997 + }, + { + "ce_ib": 3.6337907314300537, + "ce_orig": 0.7683537602424622, + "epoch": 0.5743044072183479, + "kl_loss": 0.22961460053920746, + "loss_ib": 0.005929936654865742, + "step": 1997 + }, + { + "ce_ib": 5.1048054695129395, + "ce_orig": 0.8128162622451782, + "epoch": 0.5743044072183479, + "kl_loss": 0.20120778679847717, + "loss_ib": 0.007116883993148804, + "step": 1997 + }, + { + "ce_ib": 2.6532974243164062, + "ce_orig": 0.5304936170578003, + "epoch": 0.5745919907973255, + "kl_loss": 0.1840747892856598, + "loss_ib": 0.0044940453954041, + "step": 1998 + }, + { + "ce_ib": 4.239226341247559, + "ce_orig": 0.8676175475120544, + "epoch": 0.5745919907973255, + "kl_loss": 0.26321902871131897, + "loss_ib": 0.006871416233479977, + "step": 1998 + }, + { + "ce_ib": 7.493546009063721, + "ce_orig": 1.3227934837341309, + "epoch": 0.5745919907973255, + "kl_loss": 0.16562724113464355, + "loss_ib": 0.009149818681180477, + "step": 1998 + }, + { + "ce_ib": 3.1623857021331787, + "ce_orig": 0.6350651383399963, + "epoch": 0.5745919907973255, + "kl_loss": 0.18704582750797272, + "loss_ib": 0.0050328439101576805, + "step": 1998 + }, + { + "ce_ib": 2.9258923530578613, + "ce_orig": 0.5514384508132935, + "epoch": 0.5748795743763031, + "kl_loss": 0.16143694519996643, + "loss_ib": 0.004540261812508106, + "step": 1999 + }, + { + "ce_ib": 5.353640079498291, + "ce_orig": 0.828662633895874, + "epoch": 0.5748795743763031, + "kl_loss": 0.18023285269737244, + "loss_ib": 0.0071559688076376915, + "step": 1999 + }, + { + "ce_ib": 6.6875481605529785, + "ce_orig": 0.8465195894241333, + "epoch": 0.5748795743763031, + "kl_loss": 0.20709696412086487, + "loss_ib": 0.008758516982197762, + "step": 1999 + }, + { + "ce_ib": 5.4458537101745605, + "ce_orig": 0.7180911302566528, + "epoch": 0.5748795743763031, + "kl_loss": 0.22075165808200836, + "loss_ib": 0.007653370499610901, + "step": 1999 + }, + { + "epoch": 0.5751671579552807, + "grad_norm": 0.11972998082637787, + "learning_rate": 9.335291643036221e-06, + "loss": 0.8855, + "step": 2000 + }, + { + "ce_ib": 4.879345417022705, + "ce_orig": 0.7676214575767517, + "epoch": 0.5751671579552807, + "kl_loss": 0.1884056180715561, + "loss_ib": 0.0067634014412760735, + "step": 2000 + }, + { + "ce_ib": 7.150795936584473, + "ce_orig": 1.00071382522583, + "epoch": 0.5751671579552807, + "kl_loss": 0.252411425113678, + "loss_ib": 0.009674910455942154, + "step": 2000 + }, + { + "ce_ib": 4.5267462730407715, + "ce_orig": 0.808560311794281, + "epoch": 0.5751671579552807, + "kl_loss": 0.21236705780029297, + "loss_ib": 0.006650416646152735, + "step": 2000 + }, + { + "ce_ib": 5.318306922912598, + "ce_orig": 0.8332003355026245, + "epoch": 0.5751671579552807, + "kl_loss": 0.1630081981420517, + "loss_ib": 0.006948389112949371, + "step": 2000 + }, + { + "ce_ib": 3.644702911376953, + "ce_orig": 0.7948723435401917, + "epoch": 0.5754547415342584, + "kl_loss": 0.24896064400672913, + "loss_ib": 0.006134309805929661, + "step": 2001 + }, + { + "ce_ib": 2.4411685466766357, + "ce_orig": 0.5796854496002197, + "epoch": 0.5754547415342584, + "kl_loss": 0.16755080223083496, + "loss_ib": 0.004116676282137632, + "step": 2001 + }, + { + "ce_ib": 3.537076473236084, + "ce_orig": 0.7569655179977417, + "epoch": 0.5754547415342584, + "kl_loss": 0.19084776937961578, + "loss_ib": 0.005445553921163082, + "step": 2001 + }, + { + "ce_ib": 3.459110736846924, + "ce_orig": 0.6917217969894409, + "epoch": 0.5754547415342584, + "kl_loss": 0.19892358779907227, + "loss_ib": 0.005448346491903067, + "step": 2001 + }, + { + "ce_ib": 5.084751605987549, + "ce_orig": 0.5684532523155212, + "epoch": 0.5757423251132361, + "kl_loss": 0.299898236989975, + "loss_ib": 0.008083734661340714, + "step": 2002 + }, + { + "ce_ib": 4.969757080078125, + "ce_orig": 0.9148443937301636, + "epoch": 0.5757423251132361, + "kl_loss": 0.22143074870109558, + "loss_ib": 0.007184064015746117, + "step": 2002 + }, + { + "ce_ib": 4.137480735778809, + "ce_orig": 0.6058545112609863, + "epoch": 0.5757423251132361, + "kl_loss": 0.24219946563243866, + "loss_ib": 0.00655947532504797, + "step": 2002 + }, + { + "ce_ib": 5.256179332733154, + "ce_orig": 1.015351414680481, + "epoch": 0.5757423251132361, + "kl_loss": 0.22360490262508392, + "loss_ib": 0.007492228411138058, + "step": 2002 + }, + { + "ce_ib": 3.315232038497925, + "ce_orig": 0.4629835784435272, + "epoch": 0.5760299086922137, + "kl_loss": 0.21057288348674774, + "loss_ib": 0.005420960485935211, + "step": 2003 + }, + { + "ce_ib": 5.873707294464111, + "ce_orig": 0.6028958559036255, + "epoch": 0.5760299086922137, + "kl_loss": 0.2710071802139282, + "loss_ib": 0.008583779446780682, + "step": 2003 + }, + { + "ce_ib": 5.810640335083008, + "ce_orig": 0.8809718489646912, + "epoch": 0.5760299086922137, + "kl_loss": 0.3170161843299866, + "loss_ib": 0.00898080226033926, + "step": 2003 + }, + { + "ce_ib": 8.538762092590332, + "ce_orig": 1.479617714881897, + "epoch": 0.5760299086922137, + "kl_loss": 0.22407421469688416, + "loss_ib": 0.010779504664242268, + "step": 2003 + }, + { + "ce_ib": 4.519423007965088, + "ce_orig": 0.7419164776802063, + "epoch": 0.5763174922711913, + "kl_loss": 0.21693052351474762, + "loss_ib": 0.006688727997243404, + "step": 2004 + }, + { + "ce_ib": 4.213459014892578, + "ce_orig": 0.8245019316673279, + "epoch": 0.5763174922711913, + "kl_loss": 0.13654984533786774, + "loss_ib": 0.005578957498073578, + "step": 2004 + }, + { + "ce_ib": 7.727110862731934, + "ce_orig": 1.345529556274414, + "epoch": 0.5763174922711913, + "kl_loss": 0.15968406200408936, + "loss_ib": 0.009323951788246632, + "step": 2004 + }, + { + "ce_ib": 4.007021903991699, + "ce_orig": 0.794309675693512, + "epoch": 0.5763174922711913, + "kl_loss": 0.15225544571876526, + "loss_ib": 0.005529576446861029, + "step": 2004 + }, + { + "epoch": 0.576605075850169, + "grad_norm": 0.1318047195672989, + "learning_rate": 9.331419914529375e-06, + "loss": 0.8885, + "step": 2005 + }, + { + "ce_ib": 9.740813255310059, + "ce_orig": 1.7057772874832153, + "epoch": 0.576605075850169, + "kl_loss": 0.25457310676574707, + "loss_ib": 0.012286543846130371, + "step": 2005 + }, + { + "ce_ib": 6.324468612670898, + "ce_orig": 1.1503417491912842, + "epoch": 0.576605075850169, + "kl_loss": 0.305970162153244, + "loss_ib": 0.009384170174598694, + "step": 2005 + }, + { + "ce_ib": 4.382785797119141, + "ce_orig": 0.6723372936248779, + "epoch": 0.576605075850169, + "kl_loss": 0.19469520449638367, + "loss_ib": 0.006329737603664398, + "step": 2005 + }, + { + "ce_ib": 4.49220609664917, + "ce_orig": 0.605858325958252, + "epoch": 0.576605075850169, + "kl_loss": 0.20513440668582916, + "loss_ib": 0.0065435501746833324, + "step": 2005 + }, + { + "ce_ib": 4.312743186950684, + "ce_orig": 0.9074334502220154, + "epoch": 0.5768926594291466, + "kl_loss": 0.20234304666519165, + "loss_ib": 0.006336173973977566, + "step": 2006 + }, + { + "ce_ib": 4.609302043914795, + "ce_orig": 0.8466799259185791, + "epoch": 0.5768926594291466, + "kl_loss": 0.17691607773303986, + "loss_ib": 0.006378462538123131, + "step": 2006 + }, + { + "ce_ib": 3.863643169403076, + "ce_orig": 0.7927859425544739, + "epoch": 0.5768926594291466, + "kl_loss": 0.23325464129447937, + "loss_ib": 0.00619618920609355, + "step": 2006 + }, + { + "ce_ib": 2.7736246585845947, + "ce_orig": 0.3418360650539398, + "epoch": 0.5768926594291466, + "kl_loss": 0.20447048544883728, + "loss_ib": 0.0048183295875787735, + "step": 2006 + }, + { + "ce_ib": 3.8773717880249023, + "ce_orig": 0.8834701180458069, + "epoch": 0.5771802430081242, + "kl_loss": 0.12003468722105026, + "loss_ib": 0.005077718291431665, + "step": 2007 + }, + { + "ce_ib": 6.984947681427002, + "ce_orig": 1.5931556224822998, + "epoch": 0.5771802430081242, + "kl_loss": 0.22794847190380096, + "loss_ib": 0.009264432825148106, + "step": 2007 + }, + { + "ce_ib": 8.2157621383667, + "ce_orig": 1.3655363321304321, + "epoch": 0.5771802430081242, + "kl_loss": 0.2591714859008789, + "loss_ib": 0.010807476937770844, + "step": 2007 + }, + { + "ce_ib": 4.496334552764893, + "ce_orig": 1.0632449388504028, + "epoch": 0.5771802430081242, + "kl_loss": 0.20411358773708344, + "loss_ib": 0.006537470035254955, + "step": 2007 + }, + { + "ce_ib": 5.301584720611572, + "ce_orig": 1.0583372116088867, + "epoch": 0.5774678265871018, + "kl_loss": 0.2802945673465729, + "loss_ib": 0.008104530163109303, + "step": 2008 + }, + { + "ce_ib": 2.8047409057617188, + "ce_orig": 0.4265078902244568, + "epoch": 0.5774678265871018, + "kl_loss": 0.39175528287887573, + "loss_ib": 0.006722293794155121, + "step": 2008 + }, + { + "ce_ib": 2.957156181335449, + "ce_orig": 0.7408266663551331, + "epoch": 0.5774678265871018, + "kl_loss": 0.16199912130832672, + "loss_ib": 0.0045771473087370396, + "step": 2008 + }, + { + "ce_ib": 7.095102310180664, + "ce_orig": 1.1492635011672974, + "epoch": 0.5774678265871018, + "kl_loss": 0.22469548881053925, + "loss_ib": 0.009342057630419731, + "step": 2008 + }, + { + "ce_ib": 4.770378112792969, + "ce_orig": 0.7521302700042725, + "epoch": 0.5777554101660796, + "kl_loss": 0.2624219059944153, + "loss_ib": 0.00739459739997983, + "step": 2009 + }, + { + "ce_ib": 4.084651470184326, + "ce_orig": 0.6303939819335938, + "epoch": 0.5777554101660796, + "kl_loss": 0.24030208587646484, + "loss_ib": 0.006487672217190266, + "step": 2009 + }, + { + "ce_ib": 2.661609411239624, + "ce_orig": 0.40044105052948, + "epoch": 0.5777554101660796, + "kl_loss": 0.22666935622692108, + "loss_ib": 0.004928302485495806, + "step": 2009 + }, + { + "ce_ib": 2.7991974353790283, + "ce_orig": 0.49671587347984314, + "epoch": 0.5777554101660796, + "kl_loss": 0.17896729707717896, + "loss_ib": 0.004588870331645012, + "step": 2009 + }, + { + "epoch": 0.5780429937450572, + "grad_norm": 0.11672159284353256, + "learning_rate": 9.32753775062661e-06, + "loss": 0.8514, + "step": 2010 + }, + { + "ce_ib": 4.256751537322998, + "ce_orig": 0.6445202827453613, + "epoch": 0.5780429937450572, + "kl_loss": 0.2806565463542938, + "loss_ib": 0.007063317112624645, + "step": 2010 + }, + { + "ce_ib": 7.2349724769592285, + "ce_orig": 1.3720309734344482, + "epoch": 0.5780429937450572, + "kl_loss": 0.22288262844085693, + "loss_ib": 0.009463799186050892, + "step": 2010 + }, + { + "ce_ib": 3.916721820831299, + "ce_orig": 0.42397215962409973, + "epoch": 0.5780429937450572, + "kl_loss": 0.23481033742427826, + "loss_ib": 0.006264825351536274, + "step": 2010 + }, + { + "ce_ib": 4.734856128692627, + "ce_orig": 0.73923659324646, + "epoch": 0.5780429937450572, + "kl_loss": 0.19627177715301514, + "loss_ib": 0.006697573699057102, + "step": 2010 + }, + { + "ce_ib": 3.080993890762329, + "ce_orig": 0.38280364871025085, + "epoch": 0.5783305773240348, + "kl_loss": 0.6872298717498779, + "loss_ib": 0.009953292086720467, + "step": 2011 + }, + { + "ce_ib": 3.383258819580078, + "ce_orig": 0.8479024767875671, + "epoch": 0.5783305773240348, + "kl_loss": 0.2118729054927826, + "loss_ib": 0.005501987878233194, + "step": 2011 + }, + { + "ce_ib": 5.42293119430542, + "ce_orig": 0.6892567276954651, + "epoch": 0.5783305773240348, + "kl_loss": 0.20090317726135254, + "loss_ib": 0.007431962992995977, + "step": 2011 + }, + { + "ce_ib": 7.2111124992370605, + "ce_orig": 1.4583224058151245, + "epoch": 0.5783305773240348, + "kl_loss": 0.19084247946739197, + "loss_ib": 0.009119536727666855, + "step": 2011 + }, + { + "ce_ib": 3.1745665073394775, + "ce_orig": 0.7574561238288879, + "epoch": 0.5786181609030124, + "kl_loss": 0.2708422541618347, + "loss_ib": 0.005882989149540663, + "step": 2012 + }, + { + "ce_ib": 3.323664665222168, + "ce_orig": 0.6054462194442749, + "epoch": 0.5786181609030124, + "kl_loss": 0.23874612152576447, + "loss_ib": 0.005711125675588846, + "step": 2012 + }, + { + "ce_ib": 4.515368938446045, + "ce_orig": 0.9479967355728149, + "epoch": 0.5786181609030124, + "kl_loss": 0.17634344100952148, + "loss_ib": 0.00627880310639739, + "step": 2012 + }, + { + "ce_ib": 7.173547744750977, + "ce_orig": 1.2383763790130615, + "epoch": 0.5786181609030124, + "kl_loss": 0.3248887062072754, + "loss_ib": 0.010422434657812119, + "step": 2012 + }, + { + "ce_ib": 4.0743842124938965, + "ce_orig": 0.6871362924575806, + "epoch": 0.5789057444819901, + "kl_loss": 0.3043670654296875, + "loss_ib": 0.007118054665625095, + "step": 2013 + }, + { + "ce_ib": 8.666088104248047, + "ce_orig": 1.2420462369918823, + "epoch": 0.5789057444819901, + "kl_loss": 0.1530001163482666, + "loss_ib": 0.010196089744567871, + "step": 2013 + }, + { + "ce_ib": 6.544810771942139, + "ce_orig": 1.202377438545227, + "epoch": 0.5789057444819901, + "kl_loss": 0.227060005068779, + "loss_ib": 0.008815411478281021, + "step": 2013 + }, + { + "ce_ib": 5.939795017242432, + "ce_orig": 1.1491752862930298, + "epoch": 0.5789057444819901, + "kl_loss": 0.26358476281166077, + "loss_ib": 0.00857564341276884, + "step": 2013 + }, + { + "ce_ib": 3.8898046016693115, + "ce_orig": 0.5594109892845154, + "epoch": 0.5791933280609677, + "kl_loss": 0.15346726775169373, + "loss_ib": 0.005424477159976959, + "step": 2014 + }, + { + "ce_ib": 2.3330352306365967, + "ce_orig": 0.5267202854156494, + "epoch": 0.5791933280609677, + "kl_loss": 0.14478082954883575, + "loss_ib": 0.0037808434572070837, + "step": 2014 + }, + { + "ce_ib": 6.609352111816406, + "ce_orig": 1.243557095527649, + "epoch": 0.5791933280609677, + "kl_loss": 0.1778619885444641, + "loss_ib": 0.008387971669435501, + "step": 2014 + }, + { + "ce_ib": 6.029139518737793, + "ce_orig": 0.9641034007072449, + "epoch": 0.5791933280609677, + "kl_loss": 0.18145057559013367, + "loss_ib": 0.00784364528954029, + "step": 2014 + }, + { + "epoch": 0.5794809116399453, + "grad_norm": 0.13346710801124573, + "learning_rate": 9.323645160680959e-06, + "loss": 0.8219, + "step": 2015 + }, + { + "ce_ib": 11.219620704650879, + "ce_orig": 1.6361397504806519, + "epoch": 0.5794809116399453, + "kl_loss": 0.2200336903333664, + "loss_ib": 0.013419956900179386, + "step": 2015 + }, + { + "ce_ib": 6.252190113067627, + "ce_orig": 1.3452035188674927, + "epoch": 0.5794809116399453, + "kl_loss": 0.3112284541130066, + "loss_ib": 0.009364474564790726, + "step": 2015 + }, + { + "ce_ib": 4.395932197570801, + "ce_orig": 0.9340473413467407, + "epoch": 0.5794809116399453, + "kl_loss": 0.16509567201137543, + "loss_ib": 0.006046888884156942, + "step": 2015 + }, + { + "ce_ib": 3.131438970565796, + "ce_orig": 0.6799478530883789, + "epoch": 0.5794809116399453, + "kl_loss": 0.1820930540561676, + "loss_ib": 0.004952369723469019, + "step": 2015 + }, + { + "ce_ib": 5.146693229675293, + "ce_orig": 0.7331662178039551, + "epoch": 0.579768495218923, + "kl_loss": 0.21667331457138062, + "loss_ib": 0.007313426584005356, + "step": 2016 + }, + { + "ce_ib": 2.4229273796081543, + "ce_orig": 0.30835646390914917, + "epoch": 0.579768495218923, + "kl_loss": 0.19585931301116943, + "loss_ib": 0.004381520673632622, + "step": 2016 + }, + { + "ce_ib": 1.9863251447677612, + "ce_orig": 0.4894937574863434, + "epoch": 0.579768495218923, + "kl_loss": 0.1655675321817398, + "loss_ib": 0.0036420004907995462, + "step": 2016 + }, + { + "ce_ib": 5.537152290344238, + "ce_orig": 0.822915256023407, + "epoch": 0.579768495218923, + "kl_loss": 0.24174067378044128, + "loss_ib": 0.007954559288918972, + "step": 2016 + }, + { + "ce_ib": 4.983294486999512, + "ce_orig": 0.6607749462127686, + "epoch": 0.5800560787979007, + "kl_loss": 0.2653239369392395, + "loss_ib": 0.007636533584445715, + "step": 2017 + }, + { + "ce_ib": 3.198190212249756, + "ce_orig": 0.5013297200202942, + "epoch": 0.5800560787979007, + "kl_loss": 0.1853349655866623, + "loss_ib": 0.005051540210843086, + "step": 2017 + }, + { + "ce_ib": 4.767001628875732, + "ce_orig": 0.6593576669692993, + "epoch": 0.5800560787979007, + "kl_loss": 0.2059084177017212, + "loss_ib": 0.0068260859698057175, + "step": 2017 + }, + { + "ce_ib": 5.228281497955322, + "ce_orig": 0.8957405090332031, + "epoch": 0.5800560787979007, + "kl_loss": 0.18382826447486877, + "loss_ib": 0.007066564634442329, + "step": 2017 + }, + { + "ce_ib": 6.210137844085693, + "ce_orig": 1.0728610754013062, + "epoch": 0.5803436623768783, + "kl_loss": 0.18371208012104034, + "loss_ib": 0.008047258481383324, + "step": 2018 + }, + { + "ce_ib": 4.568665504455566, + "ce_orig": 0.9390750527381897, + "epoch": 0.5803436623768783, + "kl_loss": 0.2391340136528015, + "loss_ib": 0.00696000549942255, + "step": 2018 + }, + { + "ce_ib": 4.962886810302734, + "ce_orig": 1.0895274877548218, + "epoch": 0.5803436623768783, + "kl_loss": 0.208282470703125, + "loss_ib": 0.007045710925012827, + "step": 2018 + }, + { + "ce_ib": 2.6035728454589844, + "ce_orig": 0.5119933485984802, + "epoch": 0.5803436623768783, + "kl_loss": 0.24143317341804504, + "loss_ib": 0.005017904564738274, + "step": 2018 + }, + { + "ce_ib": 4.47508430480957, + "ce_orig": 0.6946568489074707, + "epoch": 0.5806312459558559, + "kl_loss": 0.2398218810558319, + "loss_ib": 0.006873303093016148, + "step": 2019 + }, + { + "ce_ib": 3.2205610275268555, + "ce_orig": 0.6797946095466614, + "epoch": 0.5806312459558559, + "kl_loss": 0.19944965839385986, + "loss_ib": 0.005215057637542486, + "step": 2019 + }, + { + "ce_ib": 8.659202575683594, + "ce_orig": 1.4204720258712769, + "epoch": 0.5806312459558559, + "kl_loss": 0.23280560970306396, + "loss_ib": 0.010987257584929466, + "step": 2019 + }, + { + "ce_ib": 4.547617435455322, + "ce_orig": 1.0080000162124634, + "epoch": 0.5806312459558559, + "kl_loss": 0.2181328982114792, + "loss_ib": 0.006728946231305599, + "step": 2019 + }, + { + "epoch": 0.5809188295348335, + "grad_norm": 0.1154453381896019, + "learning_rate": 9.319742154070578e-06, + "loss": 0.8487, + "step": 2020 + }, + { + "ce_ib": 3.555640697479248, + "ce_orig": 0.48467716574668884, + "epoch": 0.5809188295348335, + "kl_loss": 0.14274027943611145, + "loss_ib": 0.004983043763786554, + "step": 2020 + }, + { + "ce_ib": 4.953778266906738, + "ce_orig": 0.9959943294525146, + "epoch": 0.5809188295348335, + "kl_loss": 0.2175217568874359, + "loss_ib": 0.007128995377570391, + "step": 2020 + }, + { + "ce_ib": 6.705366611480713, + "ce_orig": 1.4568270444869995, + "epoch": 0.5809188295348335, + "kl_loss": 0.19832724332809448, + "loss_ib": 0.008688638918101788, + "step": 2020 + }, + { + "ce_ib": 4.839088439941406, + "ce_orig": 0.8255569934844971, + "epoch": 0.5809188295348335, + "kl_loss": 0.20868706703186035, + "loss_ib": 0.0069259596057236195, + "step": 2020 + }, + { + "ce_ib": 4.48192024230957, + "ce_orig": 0.35738876461982727, + "epoch": 0.5812064131138112, + "kl_loss": 0.31564462184906006, + "loss_ib": 0.007638365961611271, + "step": 2021 + }, + { + "ce_ib": 2.9765989780426025, + "ce_orig": 0.6060298085212708, + "epoch": 0.5812064131138112, + "kl_loss": 0.15206460654735565, + "loss_ib": 0.004497244954109192, + "step": 2021 + }, + { + "ce_ib": 4.4704813957214355, + "ce_orig": 0.5146627426147461, + "epoch": 0.5812064131138112, + "kl_loss": 0.2160194218158722, + "loss_ib": 0.006630675867199898, + "step": 2021 + }, + { + "ce_ib": 7.710872173309326, + "ce_orig": 1.4399417638778687, + "epoch": 0.5812064131138112, + "kl_loss": 0.1873570680618286, + "loss_ib": 0.009584442712366581, + "step": 2021 + }, + { + "ce_ib": 3.1961584091186523, + "ce_orig": 0.6886042356491089, + "epoch": 0.5814939966927889, + "kl_loss": 0.15131154656410217, + "loss_ib": 0.00470927357673645, + "step": 2022 + }, + { + "ce_ib": 5.946525573730469, + "ce_orig": 1.1888014078140259, + "epoch": 0.5814939966927889, + "kl_loss": 0.20089611411094666, + "loss_ib": 0.007955486886203289, + "step": 2022 + }, + { + "ce_ib": 8.369162559509277, + "ce_orig": 1.252890706062317, + "epoch": 0.5814939966927889, + "kl_loss": 0.26733046770095825, + "loss_ib": 0.011042467318475246, + "step": 2022 + }, + { + "ce_ib": 12.882861137390137, + "ce_orig": 2.313584804534912, + "epoch": 0.5814939966927889, + "kl_loss": 0.2516401410102844, + "loss_ib": 0.015399262309074402, + "step": 2022 + }, + { + "ce_ib": 6.849246501922607, + "ce_orig": 1.2088770866394043, + "epoch": 0.5817815802717665, + "kl_loss": 0.15012520551681519, + "loss_ib": 0.008350498043000698, + "step": 2023 + }, + { + "ce_ib": 3.0896048545837402, + "ce_orig": 0.782191276550293, + "epoch": 0.5817815802717665, + "kl_loss": 0.16189227998256683, + "loss_ib": 0.004708527587354183, + "step": 2023 + }, + { + "ce_ib": 6.3142266273498535, + "ce_orig": 1.3417201042175293, + "epoch": 0.5817815802717665, + "kl_loss": 0.27284345030784607, + "loss_ib": 0.00904266070574522, + "step": 2023 + }, + { + "ce_ib": 7.445364475250244, + "ce_orig": 0.6577990055084229, + "epoch": 0.5817815802717665, + "kl_loss": 0.4242474436759949, + "loss_ib": 0.011687838472425938, + "step": 2023 + }, + { + "ce_ib": 4.873393535614014, + "ce_orig": 0.6504711508750916, + "epoch": 0.5820691638507441, + "kl_loss": 0.23290221393108368, + "loss_ib": 0.007202415261417627, + "step": 2024 + }, + { + "ce_ib": 4.809274196624756, + "ce_orig": 0.8644685745239258, + "epoch": 0.5820691638507441, + "kl_loss": 0.15975919365882874, + "loss_ib": 0.006406866014003754, + "step": 2024 + }, + { + "ce_ib": 3.5567092895507812, + "ce_orig": 0.5155238509178162, + "epoch": 0.5820691638507441, + "kl_loss": 0.19175970554351807, + "loss_ib": 0.0054743061773478985, + "step": 2024 + }, + { + "ce_ib": 3.4617462158203125, + "ce_orig": 0.5451185703277588, + "epoch": 0.5820691638507441, + "kl_loss": 0.19567197561264038, + "loss_ib": 0.005418466404080391, + "step": 2024 + }, + { + "epoch": 0.5823567474297218, + "grad_norm": 0.12068771570920944, + "learning_rate": 9.315828740198714e-06, + "loss": 0.85, + "step": 2025 + }, + { + "ce_ib": 6.605884075164795, + "ce_orig": 1.12260901927948, + "epoch": 0.5823567474297218, + "kl_loss": 0.26475635170936584, + "loss_ib": 0.009253447875380516, + "step": 2025 + }, + { + "ce_ib": 2.954988956451416, + "ce_orig": 0.5637505054473877, + "epoch": 0.5823567474297218, + "kl_loss": 0.13619470596313477, + "loss_ib": 0.004316936247050762, + "step": 2025 + }, + { + "ce_ib": 3.7592430114746094, + "ce_orig": 0.6377084851264954, + "epoch": 0.5823567474297218, + "kl_loss": 0.21954379975795746, + "loss_ib": 0.0059546809643507, + "step": 2025 + }, + { + "ce_ib": 4.312002658843994, + "ce_orig": 0.5840259790420532, + "epoch": 0.5823567474297218, + "kl_loss": 0.1754627525806427, + "loss_ib": 0.006066629663109779, + "step": 2025 + }, + { + "ce_ib": 3.500800848007202, + "ce_orig": 0.44518136978149414, + "epoch": 0.5826443310086994, + "kl_loss": 0.254309743642807, + "loss_ib": 0.006043898407369852, + "step": 2026 + }, + { + "ce_ib": 4.565871238708496, + "ce_orig": 0.6639509797096252, + "epoch": 0.5826443310086994, + "kl_loss": 0.13143613934516907, + "loss_ib": 0.005880232900381088, + "step": 2026 + }, + { + "ce_ib": 2.641382932662964, + "ce_orig": 0.5192428827285767, + "epoch": 0.5826443310086994, + "kl_loss": 0.15137457847595215, + "loss_ib": 0.004155128728598356, + "step": 2026 + }, + { + "ce_ib": 3.294861078262329, + "ce_orig": 0.4373263716697693, + "epoch": 0.5826443310086994, + "kl_loss": 0.2667046785354614, + "loss_ib": 0.0059619080275297165, + "step": 2026 + }, + { + "ce_ib": 5.823228359222412, + "ce_orig": 1.2514150142669678, + "epoch": 0.582931914587677, + "kl_loss": 0.2534264326095581, + "loss_ib": 0.00835749227553606, + "step": 2027 + }, + { + "ce_ib": 3.514845609664917, + "ce_orig": 0.8277014493942261, + "epoch": 0.582931914587677, + "kl_loss": 0.19533860683441162, + "loss_ib": 0.005468231160193682, + "step": 2027 + }, + { + "ce_ib": 2.771880626678467, + "ce_orig": 0.5809987783432007, + "epoch": 0.582931914587677, + "kl_loss": 0.1883137822151184, + "loss_ib": 0.004655018448829651, + "step": 2027 + }, + { + "ce_ib": 3.0748887062072754, + "ce_orig": 0.795819103717804, + "epoch": 0.582931914587677, + "kl_loss": 0.13685670495033264, + "loss_ib": 0.004443455953150988, + "step": 2027 + }, + { + "ce_ib": 2.5866506099700928, + "ce_orig": 0.5054392218589783, + "epoch": 0.5832194981666546, + "kl_loss": 0.18069525063037872, + "loss_ib": 0.004393603187054396, + "step": 2028 + }, + { + "ce_ib": 4.650757312774658, + "ce_orig": 0.6371245384216309, + "epoch": 0.5832194981666546, + "kl_loss": 0.30854061245918274, + "loss_ib": 0.00773616274818778, + "step": 2028 + }, + { + "ce_ib": 8.422320365905762, + "ce_orig": 1.6519966125488281, + "epoch": 0.5832194981666546, + "kl_loss": 0.2717132270336151, + "loss_ib": 0.011139452457427979, + "step": 2028 + }, + { + "ce_ib": 4.515995979309082, + "ce_orig": 0.5190319418907166, + "epoch": 0.5832194981666546, + "kl_loss": 0.3163284659385681, + "loss_ib": 0.007679280359297991, + "step": 2028 + }, + { + "ce_ib": 4.528309345245361, + "ce_orig": 0.7467356324195862, + "epoch": 0.5835070817456324, + "kl_loss": 0.22670456767082214, + "loss_ib": 0.0067953551188111305, + "step": 2029 + }, + { + "ce_ib": 4.409091472625732, + "ce_orig": 0.8908617496490479, + "epoch": 0.5835070817456324, + "kl_loss": 0.2962318956851959, + "loss_ib": 0.007371409796178341, + "step": 2029 + }, + { + "ce_ib": 6.433431625366211, + "ce_orig": 0.9417192935943604, + "epoch": 0.5835070817456324, + "kl_loss": 0.2247939258813858, + "loss_ib": 0.008681370876729488, + "step": 2029 + }, + { + "ce_ib": 7.176348686218262, + "ce_orig": 1.3555039167404175, + "epoch": 0.5835070817456324, + "kl_loss": 0.2610422968864441, + "loss_ib": 0.009786771610379219, + "step": 2029 + }, + { + "epoch": 0.58379466532461, + "grad_norm": 0.11402686685323715, + "learning_rate": 9.31190492849369e-06, + "loss": 0.8238, + "step": 2030 + }, + { + "ce_ib": 2.2141783237457275, + "ce_orig": 0.4325743615627289, + "epoch": 0.58379466532461, + "kl_loss": 0.18517948687076569, + "loss_ib": 0.0040659732185304165, + "step": 2030 + }, + { + "ce_ib": 6.7610087394714355, + "ce_orig": 1.1669477224349976, + "epoch": 0.58379466532461, + "kl_loss": 0.2562747001647949, + "loss_ib": 0.009323756210505962, + "step": 2030 + }, + { + "ce_ib": 3.5304489135742188, + "ce_orig": 0.42435288429260254, + "epoch": 0.58379466532461, + "kl_loss": 0.21549230813980103, + "loss_ib": 0.005685372278094292, + "step": 2030 + }, + { + "ce_ib": 6.898292541503906, + "ce_orig": 1.0768563747406006, + "epoch": 0.58379466532461, + "kl_loss": 0.2347845733165741, + "loss_ib": 0.00924613792449236, + "step": 2030 + }, + { + "ce_ib": 4.3790998458862305, + "ce_orig": 0.7672278881072998, + "epoch": 0.5840822489035876, + "kl_loss": 0.31554219126701355, + "loss_ib": 0.007534521631896496, + "step": 2031 + }, + { + "ce_ib": 4.507769584655762, + "ce_orig": 0.8058867454528809, + "epoch": 0.5840822489035876, + "kl_loss": 0.36073389649391174, + "loss_ib": 0.008115108124911785, + "step": 2031 + }, + { + "ce_ib": 3.3563175201416016, + "ce_orig": 0.6390334367752075, + "epoch": 0.5840822489035876, + "kl_loss": 0.20743393898010254, + "loss_ib": 0.005430656485259533, + "step": 2031 + }, + { + "ce_ib": 4.183574676513672, + "ce_orig": 0.7447531223297119, + "epoch": 0.5840822489035876, + "kl_loss": 0.30607807636260986, + "loss_ib": 0.007244355510920286, + "step": 2031 + }, + { + "ce_ib": 5.814215183258057, + "ce_orig": 0.9811205267906189, + "epoch": 0.5843698324825652, + "kl_loss": 0.2644641101360321, + "loss_ib": 0.008458856493234634, + "step": 2032 + }, + { + "ce_ib": 5.839713096618652, + "ce_orig": 1.1721041202545166, + "epoch": 0.5843698324825652, + "kl_loss": 0.1886214017868042, + "loss_ib": 0.007725927047431469, + "step": 2032 + }, + { + "ce_ib": 1.4380989074707031, + "ce_orig": 0.23305408656597137, + "epoch": 0.5843698324825652, + "kl_loss": 0.1629962921142578, + "loss_ib": 0.003068062011152506, + "step": 2032 + }, + { + "ce_ib": 7.2243499755859375, + "ce_orig": 1.271135687828064, + "epoch": 0.5843698324825652, + "kl_loss": 0.22438788414001465, + "loss_ib": 0.009468228556215763, + "step": 2032 + }, + { + "ce_ib": 2.224442958831787, + "ce_orig": 0.47314566373825073, + "epoch": 0.5846574160615429, + "kl_loss": 0.16190429031848907, + "loss_ib": 0.0038434856105595827, + "step": 2033 + }, + { + "ce_ib": 3.209609031677246, + "ce_orig": 0.5736773610115051, + "epoch": 0.5846574160615429, + "kl_loss": 0.1659107506275177, + "loss_ib": 0.004868716467171907, + "step": 2033 + }, + { + "ce_ib": 3.029855251312256, + "ce_orig": 0.7035171389579773, + "epoch": 0.5846574160615429, + "kl_loss": 0.16903111338615417, + "loss_ib": 0.004720166325569153, + "step": 2033 + }, + { + "ce_ib": 4.378622531890869, + "ce_orig": 0.7716140747070312, + "epoch": 0.5846574160615429, + "kl_loss": 0.22922512888908386, + "loss_ib": 0.006670873612165451, + "step": 2033 + }, + { + "ce_ib": 4.28971004486084, + "ce_orig": 0.771354615688324, + "epoch": 0.5849449996405205, + "kl_loss": 0.16497091948986053, + "loss_ib": 0.005939418915659189, + "step": 2034 + }, + { + "ce_ib": 3.484515428543091, + "ce_orig": 0.9304020404815674, + "epoch": 0.5849449996405205, + "kl_loss": 0.10316888988018036, + "loss_ib": 0.0045162043534219265, + "step": 2034 + }, + { + "ce_ib": 7.649745464324951, + "ce_orig": 1.5024563074111938, + "epoch": 0.5849449996405205, + "kl_loss": 0.2911885976791382, + "loss_ib": 0.010561630129814148, + "step": 2034 + }, + { + "ce_ib": 3.6881048679351807, + "ce_orig": 0.6267303228378296, + "epoch": 0.5849449996405205, + "kl_loss": 0.23605424165725708, + "loss_ib": 0.006048647221177816, + "step": 2034 + }, + { + "epoch": 0.5852325832194981, + "grad_norm": 0.14834119379520416, + "learning_rate": 9.307970728408879e-06, + "loss": 0.9064, + "step": 2035 + }, + { + "ce_ib": 4.182180404663086, + "ce_orig": 0.3208024203777313, + "epoch": 0.5852325832194981, + "kl_loss": 0.2807713747024536, + "loss_ib": 0.0069898939691483974, + "step": 2035 + }, + { + "ce_ib": 4.371974945068359, + "ce_orig": 0.5380173325538635, + "epoch": 0.5852325832194981, + "kl_loss": 0.20919658243656158, + "loss_ib": 0.006463940721005201, + "step": 2035 + }, + { + "ce_ib": 4.705419540405273, + "ce_orig": 0.7852678298950195, + "epoch": 0.5852325832194981, + "kl_loss": 0.24647799134254456, + "loss_ib": 0.007170198950916529, + "step": 2035 + }, + { + "ce_ib": 6.359873294830322, + "ce_orig": 1.2596913576126099, + "epoch": 0.5852325832194981, + "kl_loss": 0.21764031052589417, + "loss_ib": 0.008536276407539845, + "step": 2035 + }, + { + "ce_ib": 7.2528862953186035, + "ce_orig": 1.3040971755981445, + "epoch": 0.5855201667984758, + "kl_loss": 0.28334662318229675, + "loss_ib": 0.010086352936923504, + "step": 2036 + }, + { + "ce_ib": 3.821026563644409, + "ce_orig": 0.7737224102020264, + "epoch": 0.5855201667984758, + "kl_loss": 0.15841002762317657, + "loss_ib": 0.005405126605182886, + "step": 2036 + }, + { + "ce_ib": 4.744300365447998, + "ce_orig": 0.7366772890090942, + "epoch": 0.5855201667984758, + "kl_loss": 0.16167090833187103, + "loss_ib": 0.006361009553074837, + "step": 2036 + }, + { + "ce_ib": 4.969760894775391, + "ce_orig": 1.018959403038025, + "epoch": 0.5855201667984758, + "kl_loss": 0.52406907081604, + "loss_ib": 0.010210451669991016, + "step": 2036 + }, + { + "ce_ib": 3.3464584350585938, + "ce_orig": 0.5746207237243652, + "epoch": 0.5858077503774535, + "kl_loss": 0.2503615915775299, + "loss_ib": 0.0058500743471086025, + "step": 2037 + }, + { + "ce_ib": 3.191675901412964, + "ce_orig": 0.6590673327445984, + "epoch": 0.5858077503774535, + "kl_loss": 0.1891201287508011, + "loss_ib": 0.005082877352833748, + "step": 2037 + }, + { + "ce_ib": 4.508326530456543, + "ce_orig": 0.9777689576148987, + "epoch": 0.5858077503774535, + "kl_loss": 0.21319806575775146, + "loss_ib": 0.006640307139605284, + "step": 2037 + }, + { + "ce_ib": 2.45052170753479, + "ce_orig": 0.7049754858016968, + "epoch": 0.5858077503774535, + "kl_loss": 0.12927241623401642, + "loss_ib": 0.003743245732039213, + "step": 2037 + }, + { + "ce_ib": 9.134133338928223, + "ce_orig": 1.647373080253601, + "epoch": 0.5860953339564311, + "kl_loss": 0.2874155044555664, + "loss_ib": 0.012008287943899632, + "step": 2038 + }, + { + "ce_ib": 4.831528663635254, + "ce_orig": 0.8849010467529297, + "epoch": 0.5860953339564311, + "kl_loss": 0.31699520349502563, + "loss_ib": 0.008001480251550674, + "step": 2038 + }, + { + "ce_ib": 8.014817237854004, + "ce_orig": 0.75030916929245, + "epoch": 0.5860953339564311, + "kl_loss": 0.7846512794494629, + "loss_ib": 0.015861330553889275, + "step": 2038 + }, + { + "ce_ib": 5.897004127502441, + "ce_orig": 0.5770127773284912, + "epoch": 0.5860953339564311, + "kl_loss": 0.24998416006565094, + "loss_ib": 0.008396845310926437, + "step": 2038 + }, + { + "ce_ib": 7.127138614654541, + "ce_orig": 1.32103431224823, + "epoch": 0.5863829175354087, + "kl_loss": 0.17013974487781525, + "loss_ib": 0.008828535676002502, + "step": 2039 + }, + { + "ce_ib": 3.4614453315734863, + "ce_orig": 0.672970175743103, + "epoch": 0.5863829175354087, + "kl_loss": 0.15705068409442902, + "loss_ib": 0.005031952168792486, + "step": 2039 + }, + { + "ce_ib": 5.278350353240967, + "ce_orig": 0.9213048815727234, + "epoch": 0.5863829175354087, + "kl_loss": 0.22010907530784607, + "loss_ib": 0.0074794408865273, + "step": 2039 + }, + { + "ce_ib": 8.97058391571045, + "ce_orig": 1.8099918365478516, + "epoch": 0.5863829175354087, + "kl_loss": 0.7058647274971008, + "loss_ib": 0.016029231250286102, + "step": 2039 + }, + { + "epoch": 0.5866705011143863, + "grad_norm": 0.12003674358129501, + "learning_rate": 9.30402614942268e-06, + "loss": 0.8559, + "step": 2040 + }, + { + "ce_ib": 6.384011745452881, + "ce_orig": 1.2485592365264893, + "epoch": 0.5866705011143863, + "kl_loss": 0.29011303186416626, + "loss_ib": 0.00928514264523983, + "step": 2040 + }, + { + "ce_ib": 7.743472576141357, + "ce_orig": 1.1696637868881226, + "epoch": 0.5866705011143863, + "kl_loss": 0.25318658351898193, + "loss_ib": 0.010275337845087051, + "step": 2040 + }, + { + "ce_ib": 4.0685648918151855, + "ce_orig": 0.8661677837371826, + "epoch": 0.5866705011143863, + "kl_loss": 0.2033451795578003, + "loss_ib": 0.006102017126977444, + "step": 2040 + }, + { + "ce_ib": 4.969006538391113, + "ce_orig": 0.8438129425048828, + "epoch": 0.5866705011143863, + "kl_loss": 0.1788439303636551, + "loss_ib": 0.0067574456334114075, + "step": 2040 + }, + { + "ce_ib": 6.406804084777832, + "ce_orig": 1.4265271425247192, + "epoch": 0.586958084693364, + "kl_loss": 0.1533001959323883, + "loss_ib": 0.007939806208014488, + "step": 2041 + }, + { + "ce_ib": 4.529582977294922, + "ce_orig": 0.7594735026359558, + "epoch": 0.586958084693364, + "kl_loss": 0.1272142380475998, + "loss_ib": 0.005801725201308727, + "step": 2041 + }, + { + "ce_ib": 9.068610191345215, + "ce_orig": 1.6357172727584839, + "epoch": 0.586958084693364, + "kl_loss": 0.37834179401397705, + "loss_ib": 0.012852027080953121, + "step": 2041 + }, + { + "ce_ib": 6.551062107086182, + "ce_orig": 0.8756269812583923, + "epoch": 0.586958084693364, + "kl_loss": 0.2394266426563263, + "loss_ib": 0.008945329114794731, + "step": 2041 + }, + { + "ce_ib": 3.1676878929138184, + "ce_orig": 0.6104368567466736, + "epoch": 0.5872456682723417, + "kl_loss": 0.1721348613500595, + "loss_ib": 0.004889036528766155, + "step": 2042 + }, + { + "ce_ib": 6.388540744781494, + "ce_orig": 0.8919225931167603, + "epoch": 0.5872456682723417, + "kl_loss": 0.18694764375686646, + "loss_ib": 0.008258016780018806, + "step": 2042 + }, + { + "ce_ib": 4.939432144165039, + "ce_orig": 0.5474595427513123, + "epoch": 0.5872456682723417, + "kl_loss": 0.1608837991952896, + "loss_ib": 0.006548269651830196, + "step": 2042 + }, + { + "ce_ib": 7.473372936248779, + "ce_orig": 1.4672268629074097, + "epoch": 0.5872456682723417, + "kl_loss": 0.1711295247077942, + "loss_ib": 0.009184667840600014, + "step": 2042 + }, + { + "ce_ib": 5.83610725402832, + "ce_orig": 1.0926398038864136, + "epoch": 0.5875332518513193, + "kl_loss": 0.2679767310619354, + "loss_ib": 0.008515873923897743, + "step": 2043 + }, + { + "ce_ib": 4.0004706382751465, + "ce_orig": 0.9150829911231995, + "epoch": 0.5875332518513193, + "kl_loss": 0.12260211259126663, + "loss_ib": 0.00522649148479104, + "step": 2043 + }, + { + "ce_ib": 3.0863475799560547, + "ce_orig": 0.6312680244445801, + "epoch": 0.5875332518513193, + "kl_loss": 0.17093320190906525, + "loss_ib": 0.004795679356902838, + "step": 2043 + }, + { + "ce_ib": 6.654106140136719, + "ce_orig": 1.241966724395752, + "epoch": 0.5875332518513193, + "kl_loss": 0.3395664691925049, + "loss_ib": 0.010049770586192608, + "step": 2043 + }, + { + "ce_ib": 5.191025257110596, + "ce_orig": 1.0703442096710205, + "epoch": 0.587820835430297, + "kl_loss": 0.1417418122291565, + "loss_ib": 0.006608443334698677, + "step": 2044 + }, + { + "ce_ib": 6.3760247230529785, + "ce_orig": 1.411771297454834, + "epoch": 0.587820835430297, + "kl_loss": 0.22577844560146332, + "loss_ib": 0.00863380916416645, + "step": 2044 + }, + { + "ce_ib": 3.2642109394073486, + "ce_orig": 0.6053921580314636, + "epoch": 0.587820835430297, + "kl_loss": 0.2618617117404938, + "loss_ib": 0.005882828030735254, + "step": 2044 + }, + { + "ce_ib": 3.9096338748931885, + "ce_orig": 0.6907406449317932, + "epoch": 0.587820835430297, + "kl_loss": 0.17725342512130737, + "loss_ib": 0.005682168062776327, + "step": 2044 + }, + { + "epoch": 0.5881084190092746, + "grad_norm": 0.16644762456417084, + "learning_rate": 9.300071201038503e-06, + "loss": 0.936, + "step": 2045 + }, + { + "ce_ib": 7.573703289031982, + "ce_orig": 0.8438562154769897, + "epoch": 0.5881084190092746, + "kl_loss": 0.2791364789009094, + "loss_ib": 0.010365068912506104, + "step": 2045 + }, + { + "ce_ib": 2.4754631519317627, + "ce_orig": 0.5992810726165771, + "epoch": 0.5881084190092746, + "kl_loss": 0.1212860494852066, + "loss_ib": 0.003688323311507702, + "step": 2045 + }, + { + "ce_ib": 1.766405463218689, + "ce_orig": 0.35218337178230286, + "epoch": 0.5881084190092746, + "kl_loss": 0.30110466480255127, + "loss_ib": 0.004777452442795038, + "step": 2045 + }, + { + "ce_ib": 8.0269136428833, + "ce_orig": 0.9992926716804504, + "epoch": 0.5881084190092746, + "kl_loss": 0.3202246129512787, + "loss_ib": 0.01122915931046009, + "step": 2045 + }, + { + "ce_ib": 6.651566028594971, + "ce_orig": 1.0772193670272827, + "epoch": 0.5883960025882522, + "kl_loss": 0.1741446703672409, + "loss_ib": 0.008393012918531895, + "step": 2046 + }, + { + "ce_ib": 5.165933609008789, + "ce_orig": 0.6263613700866699, + "epoch": 0.5883960025882522, + "kl_loss": 0.24610164761543274, + "loss_ib": 0.007626950740814209, + "step": 2046 + }, + { + "ce_ib": 5.869894027709961, + "ce_orig": 1.1408411264419556, + "epoch": 0.5883960025882522, + "kl_loss": 0.3872752785682678, + "loss_ib": 0.009742646478116512, + "step": 2046 + }, + { + "ce_ib": 6.700784206390381, + "ce_orig": 0.7842115759849548, + "epoch": 0.5883960025882522, + "kl_loss": 0.20976783335208893, + "loss_ib": 0.008798462338745594, + "step": 2046 + }, + { + "ce_ib": 3.9305217266082764, + "ce_orig": 0.7430287599563599, + "epoch": 0.5886835861672298, + "kl_loss": 0.16036522388458252, + "loss_ib": 0.005534174386411905, + "step": 2047 + }, + { + "ce_ib": 8.042134284973145, + "ce_orig": 1.3651808500289917, + "epoch": 0.5886835861672298, + "kl_loss": 0.2068200409412384, + "loss_ib": 0.010110335424542427, + "step": 2047 + }, + { + "ce_ib": 5.41609001159668, + "ce_orig": 0.622063398361206, + "epoch": 0.5886835861672298, + "kl_loss": 0.2125660479068756, + "loss_ib": 0.007541750557720661, + "step": 2047 + }, + { + "ce_ib": 7.232964515686035, + "ce_orig": 1.0602325201034546, + "epoch": 0.5886835861672298, + "kl_loss": 0.17278698086738586, + "loss_ib": 0.008960834704339504, + "step": 2047 + }, + { + "ce_ib": 4.515341758728027, + "ce_orig": 1.0521502494812012, + "epoch": 0.5889711697462074, + "kl_loss": 0.2842191457748413, + "loss_ib": 0.007357532624155283, + "step": 2048 + }, + { + "ce_ib": 6.29218864440918, + "ce_orig": 1.0724506378173828, + "epoch": 0.5889711697462074, + "kl_loss": 0.26281213760375977, + "loss_ib": 0.008920310065150261, + "step": 2048 + }, + { + "ce_ib": 6.29061222076416, + "ce_orig": 0.9738880395889282, + "epoch": 0.5889711697462074, + "kl_loss": 0.2565401494503021, + "loss_ib": 0.008856013417243958, + "step": 2048 + }, + { + "ce_ib": 7.356647968292236, + "ce_orig": 1.240610957145691, + "epoch": 0.5889711697462074, + "kl_loss": 0.14009961485862732, + "loss_ib": 0.008757643401622772, + "step": 2048 + }, + { + "ce_ib": 7.707006931304932, + "ce_orig": 0.9623115658760071, + "epoch": 0.5892587533251852, + "kl_loss": 0.19092121720314026, + "loss_ib": 0.009616218507289886, + "step": 2049 + }, + { + "ce_ib": 4.078545093536377, + "ce_orig": 0.9163188934326172, + "epoch": 0.5892587533251852, + "kl_loss": 0.196649968624115, + "loss_ib": 0.006045044399797916, + "step": 2049 + }, + { + "ce_ib": 4.261326313018799, + "ce_orig": 0.9444615244865417, + "epoch": 0.5892587533251852, + "kl_loss": 0.2110685408115387, + "loss_ib": 0.006372011732310057, + "step": 2049 + }, + { + "ce_ib": 8.47182559967041, + "ce_orig": 1.1026065349578857, + "epoch": 0.5892587533251852, + "kl_loss": 0.14484983682632446, + "loss_ib": 0.009920324198901653, + "step": 2049 + }, + { + "epoch": 0.5895463369041628, + "grad_norm": 0.13144488632678986, + "learning_rate": 9.296105892784735e-06, + "loss": 0.8992, + "step": 2050 + }, + { + "ce_ib": 4.59412956237793, + "ce_orig": 0.5798391699790955, + "epoch": 0.5895463369041628, + "kl_loss": 0.18515604734420776, + "loss_ib": 0.006445690058171749, + "step": 2050 + }, + { + "ce_ib": 5.787864685058594, + "ce_orig": 1.2647463083267212, + "epoch": 0.5895463369041628, + "kl_loss": 0.6664136648178101, + "loss_ib": 0.012452001683413982, + "step": 2050 + }, + { + "ce_ib": 4.434258937835693, + "ce_orig": 0.840908944606781, + "epoch": 0.5895463369041628, + "kl_loss": 0.3353492021560669, + "loss_ib": 0.007787750568240881, + "step": 2050 + }, + { + "ce_ib": 6.084216117858887, + "ce_orig": 1.061396598815918, + "epoch": 0.5895463369041628, + "kl_loss": 0.49507468938827515, + "loss_ib": 0.011034962721168995, + "step": 2050 + }, + { + "ce_ib": 4.585057258605957, + "ce_orig": 0.4735352098941803, + "epoch": 0.5898339204831404, + "kl_loss": 0.262253999710083, + "loss_ib": 0.007207597140222788, + "step": 2051 + }, + { + "ce_ib": 6.167572498321533, + "ce_orig": 1.3375307321548462, + "epoch": 0.5898339204831404, + "kl_loss": 0.2133650779724121, + "loss_ib": 0.008301223628222942, + "step": 2051 + }, + { + "ce_ib": 3.7163245677948, + "ce_orig": 0.7413255572319031, + "epoch": 0.5898339204831404, + "kl_loss": 0.12681716680526733, + "loss_ib": 0.004984496161341667, + "step": 2051 + }, + { + "ce_ib": 6.360496520996094, + "ce_orig": 1.522934913635254, + "epoch": 0.5898339204831404, + "kl_loss": 0.28432437777519226, + "loss_ib": 0.009203740395605564, + "step": 2051 + }, + { + "ce_ib": 3.3480935096740723, + "ce_orig": 0.37387335300445557, + "epoch": 0.590121504062118, + "kl_loss": 0.2720363736152649, + "loss_ib": 0.006068457383662462, + "step": 2052 + }, + { + "ce_ib": 4.984671115875244, + "ce_orig": 1.0763942003250122, + "epoch": 0.590121504062118, + "kl_loss": 0.1703924834728241, + "loss_ib": 0.006688595749437809, + "step": 2052 + }, + { + "ce_ib": 3.4284963607788086, + "ce_orig": 0.7063562870025635, + "epoch": 0.590121504062118, + "kl_loss": 0.14729595184326172, + "loss_ib": 0.004901455715298653, + "step": 2052 + }, + { + "ce_ib": 6.527516841888428, + "ce_orig": 1.246464490890503, + "epoch": 0.590121504062118, + "kl_loss": 0.2508622407913208, + "loss_ib": 0.009036138653755188, + "step": 2052 + }, + { + "ce_ib": 3.9006760120391846, + "ce_orig": 0.5281254053115845, + "epoch": 0.5904090876410957, + "kl_loss": 0.17836639285087585, + "loss_ib": 0.00568433990702033, + "step": 2053 + }, + { + "ce_ib": 5.733733177185059, + "ce_orig": 1.0769739151000977, + "epoch": 0.5904090876410957, + "kl_loss": 0.15859729051589966, + "loss_ib": 0.007319706026464701, + "step": 2053 + }, + { + "ce_ib": 3.8053760528564453, + "ce_orig": 0.8356033563613892, + "epoch": 0.5904090876410957, + "kl_loss": 0.22188405692577362, + "loss_ib": 0.006024216301739216, + "step": 2053 + }, + { + "ce_ib": 6.246058940887451, + "ce_orig": 1.0846574306488037, + "epoch": 0.5904090876410957, + "kl_loss": 0.2462453842163086, + "loss_ib": 0.008708512410521507, + "step": 2053 + }, + { + "ce_ib": 5.412936687469482, + "ce_orig": 0.7873042225837708, + "epoch": 0.5906966712200733, + "kl_loss": 0.2851676344871521, + "loss_ib": 0.008264613337814808, + "step": 2054 + }, + { + "ce_ib": 4.343871593475342, + "ce_orig": 0.8394014835357666, + "epoch": 0.5906966712200733, + "kl_loss": 0.19336605072021484, + "loss_ib": 0.0062775323167443275, + "step": 2054 + }, + { + "ce_ib": 3.6528735160827637, + "ce_orig": 0.6667112112045288, + "epoch": 0.5906966712200733, + "kl_loss": 0.22935521602630615, + "loss_ib": 0.005946425721049309, + "step": 2054 + }, + { + "ce_ib": 5.693700790405273, + "ce_orig": 0.884652316570282, + "epoch": 0.5906966712200733, + "kl_loss": 0.25201672315597534, + "loss_ib": 0.008213868364691734, + "step": 2054 + }, + { + "epoch": 0.5909842547990509, + "grad_norm": 0.10507703572511673, + "learning_rate": 9.292130234214723e-06, + "loss": 0.8483, + "step": 2055 + }, + { + "ce_ib": 2.736111879348755, + "ce_orig": 0.423274964094162, + "epoch": 0.5909842547990509, + "kl_loss": 0.23253107070922852, + "loss_ib": 0.005061422474682331, + "step": 2055 + }, + { + "ce_ib": 6.250183582305908, + "ce_orig": 0.7986965775489807, + "epoch": 0.5909842547990509, + "kl_loss": 0.18229824304580688, + "loss_ib": 0.008073166012763977, + "step": 2055 + }, + { + "ce_ib": 5.01643180847168, + "ce_orig": 0.602378249168396, + "epoch": 0.5909842547990509, + "kl_loss": 0.1619524359703064, + "loss_ib": 0.00663595600053668, + "step": 2055 + }, + { + "ce_ib": 4.706564903259277, + "ce_orig": 0.7405197024345398, + "epoch": 0.5909842547990509, + "kl_loss": 0.32082268595695496, + "loss_ib": 0.007914791814982891, + "step": 2055 + }, + { + "ce_ib": 5.577157974243164, + "ce_orig": 0.83033287525177, + "epoch": 0.5912718383780287, + "kl_loss": 0.22945891320705414, + "loss_ib": 0.007871747016906738, + "step": 2056 + }, + { + "ce_ib": 2.8635106086730957, + "ce_orig": 0.6882225275039673, + "epoch": 0.5912718383780287, + "kl_loss": 0.1064576730132103, + "loss_ib": 0.003928087186068296, + "step": 2056 + }, + { + "ce_ib": 6.049351215362549, + "ce_orig": 0.6695765256881714, + "epoch": 0.5912718383780287, + "kl_loss": 0.2339426726102829, + "loss_ib": 0.008388778194785118, + "step": 2056 + }, + { + "ce_ib": 4.371702194213867, + "ce_orig": 0.5801190733909607, + "epoch": 0.5912718383780287, + "kl_loss": 0.2094293087720871, + "loss_ib": 0.006465995218604803, + "step": 2056 + }, + { + "ce_ib": 3.356267213821411, + "ce_orig": 0.6061080098152161, + "epoch": 0.5915594219570063, + "kl_loss": 0.1820964217185974, + "loss_ib": 0.005177231505513191, + "step": 2057 + }, + { + "ce_ib": 4.07068395614624, + "ce_orig": 0.7745286226272583, + "epoch": 0.5915594219570063, + "kl_loss": 0.3107374906539917, + "loss_ib": 0.007178058382123709, + "step": 2057 + }, + { + "ce_ib": 7.959623336791992, + "ce_orig": 1.4540094137191772, + "epoch": 0.5915594219570063, + "kl_loss": 0.22118154168128967, + "loss_ib": 0.010171438567340374, + "step": 2057 + }, + { + "ce_ib": 6.915707111358643, + "ce_orig": 1.0588812828063965, + "epoch": 0.5915594219570063, + "kl_loss": 0.19814369082450867, + "loss_ib": 0.008897143416106701, + "step": 2057 + }, + { + "ce_ib": 2.96760630607605, + "ce_orig": 0.5811135768890381, + "epoch": 0.5918470055359839, + "kl_loss": 0.15378454327583313, + "loss_ib": 0.0045054517686367035, + "step": 2058 + }, + { + "ce_ib": 4.389075756072998, + "ce_orig": 0.7807630896568298, + "epoch": 0.5918470055359839, + "kl_loss": 0.16778071224689484, + "loss_ib": 0.006066882982850075, + "step": 2058 + }, + { + "ce_ib": 2.881894826889038, + "ce_orig": 0.5922518372535706, + "epoch": 0.5918470055359839, + "kl_loss": 0.19884595274925232, + "loss_ib": 0.004870354197919369, + "step": 2058 + }, + { + "ce_ib": 5.46249532699585, + "ce_orig": 0.938137948513031, + "epoch": 0.5918470055359839, + "kl_loss": 0.21858401596546173, + "loss_ib": 0.007648335304111242, + "step": 2058 + }, + { + "ce_ib": 3.754957914352417, + "ce_orig": 0.5632526278495789, + "epoch": 0.5921345891149615, + "kl_loss": 0.4024226665496826, + "loss_ib": 0.007779184263199568, + "step": 2059 + }, + { + "ce_ib": 3.978754758834839, + "ce_orig": 0.8315809369087219, + "epoch": 0.5921345891149615, + "kl_loss": 0.23001733422279358, + "loss_ib": 0.006278928369283676, + "step": 2059 + }, + { + "ce_ib": 6.105306625366211, + "ce_orig": 1.1316747665405273, + "epoch": 0.5921345891149615, + "kl_loss": 0.1772197186946869, + "loss_ib": 0.007877503521740437, + "step": 2059 + }, + { + "ce_ib": 3.6194305419921875, + "ce_orig": 0.7556952238082886, + "epoch": 0.5921345891149615, + "kl_loss": 0.15260668098926544, + "loss_ib": 0.005145497154444456, + "step": 2059 + }, + { + "epoch": 0.5924221726939392, + "grad_norm": 0.12156666070222855, + "learning_rate": 9.288144234906753e-06, + "loss": 0.941, + "step": 2060 + }, + { + "ce_ib": 4.248643398284912, + "ce_orig": 0.6011191606521606, + "epoch": 0.5924221726939392, + "kl_loss": 0.15735268592834473, + "loss_ib": 0.0058221700601279736, + "step": 2060 + }, + { + "ce_ib": 4.506063461303711, + "ce_orig": 1.003247618675232, + "epoch": 0.5924221726939392, + "kl_loss": 0.21041925251483917, + "loss_ib": 0.006610255688428879, + "step": 2060 + }, + { + "ce_ib": 5.961613655090332, + "ce_orig": 1.1829288005828857, + "epoch": 0.5924221726939392, + "kl_loss": 0.1866694688796997, + "loss_ib": 0.007828308269381523, + "step": 2060 + }, + { + "ce_ib": 3.218472957611084, + "ce_orig": 0.8427760601043701, + "epoch": 0.5924221726939392, + "kl_loss": 0.17898887395858765, + "loss_ib": 0.005008361302316189, + "step": 2060 + }, + { + "ce_ib": 5.007608413696289, + "ce_orig": 0.6411275863647461, + "epoch": 0.5927097562729168, + "kl_loss": 0.27299579977989197, + "loss_ib": 0.007737566251307726, + "step": 2061 + }, + { + "ce_ib": 7.6573896408081055, + "ce_orig": 1.0820140838623047, + "epoch": 0.5927097562729168, + "kl_loss": 0.17076873779296875, + "loss_ib": 0.009365077130496502, + "step": 2061 + }, + { + "ce_ib": 5.588649749755859, + "ce_orig": 0.8421550393104553, + "epoch": 0.5927097562729168, + "kl_loss": 0.2026507407426834, + "loss_ib": 0.007615157403051853, + "step": 2061 + }, + { + "ce_ib": 2.4196178913116455, + "ce_orig": 0.6165553331375122, + "epoch": 0.5927097562729168, + "kl_loss": 0.15342579782009125, + "loss_ib": 0.003953875973820686, + "step": 2061 + }, + { + "ce_ib": 8.730169296264648, + "ce_orig": 1.6175671815872192, + "epoch": 0.5929973398518945, + "kl_loss": 0.2744751572608948, + "loss_ib": 0.011474921368062496, + "step": 2062 + }, + { + "ce_ib": 5.290787696838379, + "ce_orig": 0.9072948694229126, + "epoch": 0.5929973398518945, + "kl_loss": 0.256028413772583, + "loss_ib": 0.007851071655750275, + "step": 2062 + }, + { + "ce_ib": 3.2452917098999023, + "ce_orig": 0.6795864701271057, + "epoch": 0.5929973398518945, + "kl_loss": 0.15033653378486633, + "loss_ib": 0.004748656880110502, + "step": 2062 + }, + { + "ce_ib": 4.596741199493408, + "ce_orig": 0.8556483387947083, + "epoch": 0.5929973398518945, + "kl_loss": 0.16316509246826172, + "loss_ib": 0.0062283920124173164, + "step": 2062 + }, + { + "ce_ib": 5.618043422698975, + "ce_orig": 0.8574694395065308, + "epoch": 0.5932849234308721, + "kl_loss": 0.20839184522628784, + "loss_ib": 0.007701961789280176, + "step": 2063 + }, + { + "ce_ib": 6.287090301513672, + "ce_orig": 1.0756618976593018, + "epoch": 0.5932849234308721, + "kl_loss": 0.21466302871704102, + "loss_ib": 0.008433720096945763, + "step": 2063 + }, + { + "ce_ib": 4.73652458190918, + "ce_orig": 0.8521677851676941, + "epoch": 0.5932849234308721, + "kl_loss": 0.2687045931816101, + "loss_ib": 0.007423569913953543, + "step": 2063 + }, + { + "ce_ib": 3.0426318645477295, + "ce_orig": 0.6344220042228699, + "epoch": 0.5932849234308721, + "kl_loss": 0.15638887882232666, + "loss_ib": 0.0046065207570791245, + "step": 2063 + }, + { + "ce_ib": 5.0184407234191895, + "ce_orig": 0.550957977771759, + "epoch": 0.5935725070098498, + "kl_loss": 0.16721653938293457, + "loss_ib": 0.006690606474876404, + "step": 2064 + }, + { + "ce_ib": 8.494905471801758, + "ce_orig": 1.3617125749588013, + "epoch": 0.5935725070098498, + "kl_loss": 0.17605943977832794, + "loss_ib": 0.010255499742925167, + "step": 2064 + }, + { + "ce_ib": 4.676984786987305, + "ce_orig": 0.8289539217948914, + "epoch": 0.5935725070098498, + "kl_loss": 0.46488091349601746, + "loss_ib": 0.009325793944299221, + "step": 2064 + }, + { + "ce_ib": 5.7031683921813965, + "ce_orig": 0.7907314300537109, + "epoch": 0.5935725070098498, + "kl_loss": 0.7190141081809998, + "loss_ib": 0.012893308885395527, + "step": 2064 + }, + { + "epoch": 0.5938600905888274, + "grad_norm": 0.11491747200489044, + "learning_rate": 9.28414790446402e-06, + "loss": 0.8548, + "step": 2065 + }, + { + "ce_ib": 3.752856731414795, + "ce_orig": 0.6086117029190063, + "epoch": 0.5938600905888274, + "kl_loss": 0.1678592413663864, + "loss_ib": 0.005431449506431818, + "step": 2065 + }, + { + "ce_ib": 4.154584884643555, + "ce_orig": 0.4722253382205963, + "epoch": 0.5938600905888274, + "kl_loss": 0.2808340787887573, + "loss_ib": 0.006962925661355257, + "step": 2065 + }, + { + "ce_ib": 6.309084415435791, + "ce_orig": 1.1207170486450195, + "epoch": 0.5938600905888274, + "kl_loss": 0.15332366526126862, + "loss_ib": 0.007842320948839188, + "step": 2065 + }, + { + "ce_ib": 3.620288372039795, + "ce_orig": 0.647536039352417, + "epoch": 0.5938600905888274, + "kl_loss": 0.14881616830825806, + "loss_ib": 0.005108450073748827, + "step": 2065 + }, + { + "ce_ib": 6.141505241394043, + "ce_orig": 1.0184967517852783, + "epoch": 0.594147674167805, + "kl_loss": 0.17834064364433289, + "loss_ib": 0.007924911566078663, + "step": 2066 + }, + { + "ce_ib": 5.849570274353027, + "ce_orig": 0.6750349402427673, + "epoch": 0.594147674167805, + "kl_loss": 0.18002375960350037, + "loss_ib": 0.007649807725101709, + "step": 2066 + }, + { + "ce_ib": 2.8912107944488525, + "ce_orig": 0.6125627160072327, + "epoch": 0.594147674167805, + "kl_loss": 0.14266398549079895, + "loss_ib": 0.004317850805819035, + "step": 2066 + }, + { + "ce_ib": 6.303753852844238, + "ce_orig": 1.1504613161087036, + "epoch": 0.594147674167805, + "kl_loss": 0.2193274348974228, + "loss_ib": 0.008497028611600399, + "step": 2066 + }, + { + "ce_ib": 2.5488486289978027, + "ce_orig": 0.4684530794620514, + "epoch": 0.5944352577467826, + "kl_loss": 0.19753870368003845, + "loss_ib": 0.004524235613644123, + "step": 2067 + }, + { + "ce_ib": 5.814770698547363, + "ce_orig": 1.1630233526229858, + "epoch": 0.5944352577467826, + "kl_loss": 0.2190193086862564, + "loss_ib": 0.008004963397979736, + "step": 2067 + }, + { + "ce_ib": 3.9435291290283203, + "ce_orig": 0.7115368843078613, + "epoch": 0.5944352577467826, + "kl_loss": 0.1378944367170334, + "loss_ib": 0.005322473589330912, + "step": 2067 + }, + { + "ce_ib": 3.3251471519470215, + "ce_orig": 0.6287946105003357, + "epoch": 0.5944352577467826, + "kl_loss": 0.24056656658649445, + "loss_ib": 0.005730812903493643, + "step": 2067 + }, + { + "ce_ib": 6.838568687438965, + "ce_orig": 1.2248945236206055, + "epoch": 0.5947228413257603, + "kl_loss": 0.207009419798851, + "loss_ib": 0.00890866294503212, + "step": 2068 + }, + { + "ce_ib": 3.6662333011627197, + "ce_orig": 0.5293366312980652, + "epoch": 0.5947228413257603, + "kl_loss": 0.2073693573474884, + "loss_ib": 0.005739927291870117, + "step": 2068 + }, + { + "ce_ib": 2.781123161315918, + "ce_orig": 0.48525944352149963, + "epoch": 0.5947228413257603, + "kl_loss": 0.1400332748889923, + "loss_ib": 0.004181455820798874, + "step": 2068 + }, + { + "ce_ib": 2.7869696617126465, + "ce_orig": 0.6144731044769287, + "epoch": 0.5947228413257603, + "kl_loss": 0.13396167755126953, + "loss_ib": 0.004126586485654116, + "step": 2068 + }, + { + "ce_ib": 3.058475971221924, + "ce_orig": 0.5810040235519409, + "epoch": 0.595010424904738, + "kl_loss": 0.23707331717014313, + "loss_ib": 0.0054292092099785805, + "step": 2069 + }, + { + "ce_ib": 3.6933155059814453, + "ce_orig": 0.5778312087059021, + "epoch": 0.595010424904738, + "kl_loss": 0.17655959725379944, + "loss_ib": 0.005458911415189505, + "step": 2069 + }, + { + "ce_ib": 3.7702252864837646, + "ce_orig": 0.9477126598358154, + "epoch": 0.595010424904738, + "kl_loss": 0.18256643414497375, + "loss_ib": 0.005595889408141375, + "step": 2069 + }, + { + "ce_ib": 4.253366947174072, + "ce_orig": 0.5540456175804138, + "epoch": 0.595010424904738, + "kl_loss": 0.25291597843170166, + "loss_ib": 0.00678252661600709, + "step": 2069 + }, + { + "epoch": 0.5952980084837156, + "grad_norm": 0.13097120821475983, + "learning_rate": 9.280141252514614e-06, + "loss": 0.8013, + "step": 2070 + }, + { + "ce_ib": 6.171236991882324, + "ce_orig": 0.5903248190879822, + "epoch": 0.5952980084837156, + "kl_loss": 0.3133825659751892, + "loss_ib": 0.00930506270378828, + "step": 2070 + }, + { + "ce_ib": 7.131008625030518, + "ce_orig": 1.0830953121185303, + "epoch": 0.5952980084837156, + "kl_loss": 0.21663135290145874, + "loss_ib": 0.009297321550548077, + "step": 2070 + }, + { + "ce_ib": 3.344316244125366, + "ce_orig": 0.47048458456993103, + "epoch": 0.5952980084837156, + "kl_loss": 0.22308547794818878, + "loss_ib": 0.005575171206146479, + "step": 2070 + }, + { + "ce_ib": 6.163971900939941, + "ce_orig": 0.7799375653266907, + "epoch": 0.5952980084837156, + "kl_loss": 0.2768741548061371, + "loss_ib": 0.00893271341919899, + "step": 2070 + }, + { + "ce_ib": 5.176492214202881, + "ce_orig": 0.6897236108779907, + "epoch": 0.5955855920626932, + "kl_loss": 0.16324345767498016, + "loss_ib": 0.006808926817029715, + "step": 2071 + }, + { + "ce_ib": 2.8466508388519287, + "ce_orig": 0.5691062808036804, + "epoch": 0.5955855920626932, + "kl_loss": 0.18539051711559296, + "loss_ib": 0.004700555466115475, + "step": 2071 + }, + { + "ce_ib": 4.133129596710205, + "ce_orig": 0.6584720611572266, + "epoch": 0.5955855920626932, + "kl_loss": 0.24256190657615662, + "loss_ib": 0.0065587484277784824, + "step": 2071 + }, + { + "ce_ib": 6.468080520629883, + "ce_orig": 0.7348335981369019, + "epoch": 0.5955855920626932, + "kl_loss": 0.29541581869125366, + "loss_ib": 0.009422238916158676, + "step": 2071 + }, + { + "ce_ib": 3.32320499420166, + "ce_orig": 0.3435814678668976, + "epoch": 0.5958731756416709, + "kl_loss": 0.18724419176578522, + "loss_ib": 0.005195646546781063, + "step": 2072 + }, + { + "ce_ib": 5.39824914932251, + "ce_orig": 0.8594803810119629, + "epoch": 0.5958731756416709, + "kl_loss": 0.26189982891082764, + "loss_ib": 0.008017247542738914, + "step": 2072 + }, + { + "ce_ib": 6.2181525230407715, + "ce_orig": 0.8033913373947144, + "epoch": 0.5958731756416709, + "kl_loss": 0.19071519374847412, + "loss_ib": 0.008125304244458675, + "step": 2072 + }, + { + "ce_ib": 5.449014663696289, + "ce_orig": 0.853152334690094, + "epoch": 0.5958731756416709, + "kl_loss": 0.2036660760641098, + "loss_ib": 0.007485675625503063, + "step": 2072 + }, + { + "ce_ib": 5.962812423706055, + "ce_orig": 0.7293989062309265, + "epoch": 0.5961607592206485, + "kl_loss": 0.273118793964386, + "loss_ib": 0.008693999610841274, + "step": 2073 + }, + { + "ce_ib": 7.237607002258301, + "ce_orig": 1.3457728624343872, + "epoch": 0.5961607592206485, + "kl_loss": 0.25628921389579773, + "loss_ib": 0.009800499305129051, + "step": 2073 + }, + { + "ce_ib": 4.905211925506592, + "ce_orig": 0.6746551394462585, + "epoch": 0.5961607592206485, + "kl_loss": 0.20697373151779175, + "loss_ib": 0.006974949035793543, + "step": 2073 + }, + { + "ce_ib": 6.8421807289123535, + "ce_orig": 0.6132577061653137, + "epoch": 0.5961607592206485, + "kl_loss": 0.20820024609565735, + "loss_ib": 0.008924183435738087, + "step": 2073 + }, + { + "ce_ib": 6.010553359985352, + "ce_orig": 1.2015384435653687, + "epoch": 0.5964483427996261, + "kl_loss": 0.2384946644306183, + "loss_ib": 0.008395499549806118, + "step": 2074 + }, + { + "ce_ib": 4.4026360511779785, + "ce_orig": 0.9705696702003479, + "epoch": 0.5964483427996261, + "kl_loss": 0.22556087374687195, + "loss_ib": 0.006658244412392378, + "step": 2074 + }, + { + "ce_ib": 6.5355095863342285, + "ce_orig": 1.207631230354309, + "epoch": 0.5964483427996261, + "kl_loss": 0.1978880912065506, + "loss_ib": 0.00851439032703638, + "step": 2074 + }, + { + "ce_ib": 5.488405704498291, + "ce_orig": 0.9157701134681702, + "epoch": 0.5964483427996261, + "kl_loss": 0.19019928574562073, + "loss_ib": 0.007390398997813463, + "step": 2074 + }, + { + "epoch": 0.5967359263786037, + "grad_norm": 0.13625822961330414, + "learning_rate": 9.27612428871149e-06, + "loss": 0.8406, + "step": 2075 + }, + { + "ce_ib": 3.0082650184631348, + "ce_orig": 0.47534269094467163, + "epoch": 0.5967359263786037, + "kl_loss": 0.17265082895755768, + "loss_ib": 0.004734773654490709, + "step": 2075 + }, + { + "ce_ib": 1.438467264175415, + "ce_orig": 0.18047429621219635, + "epoch": 0.5967359263786037, + "kl_loss": 0.3461623787879944, + "loss_ib": 0.004900091327726841, + "step": 2075 + }, + { + "ce_ib": 4.305662631988525, + "ce_orig": 0.8647083640098572, + "epoch": 0.5967359263786037, + "kl_loss": 0.24142710864543915, + "loss_ib": 0.006719933822751045, + "step": 2075 + }, + { + "ce_ib": 4.125507354736328, + "ce_orig": 0.7370970249176025, + "epoch": 0.5967359263786037, + "kl_loss": 0.21225431561470032, + "loss_ib": 0.006248050834983587, + "step": 2075 + }, + { + "ce_ib": 3.2206335067749023, + "ce_orig": 0.47642046213150024, + "epoch": 0.5970235099575815, + "kl_loss": 0.24507978558540344, + "loss_ib": 0.005671431310474873, + "step": 2076 + }, + { + "ce_ib": 4.709357261657715, + "ce_orig": 0.7291908860206604, + "epoch": 0.5970235099575815, + "kl_loss": 0.21995604038238525, + "loss_ib": 0.006908917333930731, + "step": 2076 + }, + { + "ce_ib": 6.2564826011657715, + "ce_orig": 0.9551504850387573, + "epoch": 0.5970235099575815, + "kl_loss": 0.28348308801651, + "loss_ib": 0.009091312997043133, + "step": 2076 + }, + { + "ce_ib": 3.568683624267578, + "ce_orig": 0.2636149525642395, + "epoch": 0.5970235099575815, + "kl_loss": 0.17387905716896057, + "loss_ib": 0.005307474173605442, + "step": 2076 + }, + { + "ce_ib": 3.4836227893829346, + "ce_orig": 0.6143074035644531, + "epoch": 0.5973110935365591, + "kl_loss": 0.20271126925945282, + "loss_ib": 0.00551073532551527, + "step": 2077 + }, + { + "ce_ib": 5.4516401290893555, + "ce_orig": 0.7557647228240967, + "epoch": 0.5973110935365591, + "kl_loss": 0.17650623619556427, + "loss_ib": 0.007216702681034803, + "step": 2077 + }, + { + "ce_ib": 9.112334251403809, + "ce_orig": 1.0993027687072754, + "epoch": 0.5973110935365591, + "kl_loss": 0.22774910926818848, + "loss_ib": 0.011389825493097305, + "step": 2077 + }, + { + "ce_ib": 6.631237983703613, + "ce_orig": 0.9337772727012634, + "epoch": 0.5973110935365591, + "kl_loss": 0.18126292526721954, + "loss_ib": 0.008443866856396198, + "step": 2077 + }, + { + "ce_ib": 3.9743731021881104, + "ce_orig": 0.49186524748802185, + "epoch": 0.5975986771155367, + "kl_loss": 0.29742109775543213, + "loss_ib": 0.006948584225028753, + "step": 2078 + }, + { + "ce_ib": 7.268859386444092, + "ce_orig": 1.249497890472412, + "epoch": 0.5975986771155367, + "kl_loss": 0.22757278382778168, + "loss_ib": 0.009544587694108486, + "step": 2078 + }, + { + "ce_ib": 2.933871030807495, + "ce_orig": 0.8399577140808105, + "epoch": 0.5975986771155367, + "kl_loss": 0.11978927254676819, + "loss_ib": 0.004131763707846403, + "step": 2078 + }, + { + "ce_ib": 5.381503105163574, + "ce_orig": 0.8440800905227661, + "epoch": 0.5975986771155367, + "kl_loss": 0.26429328322410583, + "loss_ib": 0.008024436421692371, + "step": 2078 + }, + { + "ce_ib": 5.056124687194824, + "ce_orig": 0.7371515035629272, + "epoch": 0.5978862606945143, + "kl_loss": 0.19052472710609436, + "loss_ib": 0.006961371749639511, + "step": 2079 + }, + { + "ce_ib": 6.356292724609375, + "ce_orig": 0.7682399153709412, + "epoch": 0.5978862606945143, + "kl_loss": 0.35712194442749023, + "loss_ib": 0.009927512146532536, + "step": 2079 + }, + { + "ce_ib": 4.677209854125977, + "ce_orig": 0.7622928023338318, + "epoch": 0.5978862606945143, + "kl_loss": 0.16212056577205658, + "loss_ib": 0.00629841536283493, + "step": 2079 + }, + { + "ce_ib": 4.289590835571289, + "ce_orig": 0.5875444412231445, + "epoch": 0.5978862606945143, + "kl_loss": 0.27369120717048645, + "loss_ib": 0.00702650286257267, + "step": 2079 + }, + { + "epoch": 0.598173844273492, + "grad_norm": 0.11476273834705353, + "learning_rate": 9.272097022732444e-06, + "loss": 0.862, + "step": 2080 + }, + { + "ce_ib": 4.460967540740967, + "ce_orig": 0.8822163343429565, + "epoch": 0.598173844273492, + "kl_loss": 0.18671518564224243, + "loss_ib": 0.0063281189650297165, + "step": 2080 + }, + { + "ce_ib": 4.260026454925537, + "ce_orig": 0.6982441544532776, + "epoch": 0.598173844273492, + "kl_loss": 0.17402076721191406, + "loss_ib": 0.006000233814120293, + "step": 2080 + }, + { + "ce_ib": 3.826930522918701, + "ce_orig": 0.957663893699646, + "epoch": 0.598173844273492, + "kl_loss": 0.18133273720741272, + "loss_ib": 0.005640257615596056, + "step": 2080 + }, + { + "ce_ib": 3.6623902320861816, + "ce_orig": 0.6616648435592651, + "epoch": 0.598173844273492, + "kl_loss": 0.19481715559959412, + "loss_ib": 0.005610561929643154, + "step": 2080 + }, + { + "ce_ib": 2.686539888381958, + "ce_orig": 0.5880342125892639, + "epoch": 0.5984614278524696, + "kl_loss": 0.21468524634838104, + "loss_ib": 0.004833392333239317, + "step": 2081 + }, + { + "ce_ib": 3.237717866897583, + "ce_orig": 0.3776146173477173, + "epoch": 0.5984614278524696, + "kl_loss": 0.23309177160263062, + "loss_ib": 0.005568635184317827, + "step": 2081 + }, + { + "ce_ib": 5.037214279174805, + "ce_orig": 0.7644826769828796, + "epoch": 0.5984614278524696, + "kl_loss": 0.1452305018901825, + "loss_ib": 0.006489518564194441, + "step": 2081 + }, + { + "ce_ib": 5.38549280166626, + "ce_orig": 1.0354236364364624, + "epoch": 0.5984614278524696, + "kl_loss": 0.2297293245792389, + "loss_ib": 0.0076827858574688435, + "step": 2081 + }, + { + "ce_ib": 3.767529010772705, + "ce_orig": 0.7511104345321655, + "epoch": 0.5987490114314472, + "kl_loss": 0.15210753679275513, + "loss_ib": 0.0052886041812598705, + "step": 2082 + }, + { + "ce_ib": 3.160916328430176, + "ce_orig": 0.7515000700950623, + "epoch": 0.5987490114314472, + "kl_loss": 0.1490771770477295, + "loss_ib": 0.004651688039302826, + "step": 2082 + }, + { + "ce_ib": 3.3616483211517334, + "ce_orig": 0.13828474283218384, + "epoch": 0.5987490114314472, + "kl_loss": 0.13184183835983276, + "loss_ib": 0.004680066369473934, + "step": 2082 + }, + { + "ce_ib": 5.73211145401001, + "ce_orig": 1.0202233791351318, + "epoch": 0.5987490114314472, + "kl_loss": 0.20128896832466125, + "loss_ib": 0.007745000999420881, + "step": 2082 + }, + { + "ce_ib": 4.945359706878662, + "ce_orig": 0.5194501876831055, + "epoch": 0.5990365950104249, + "kl_loss": 0.290300577878952, + "loss_ib": 0.007848365232348442, + "step": 2083 + }, + { + "ce_ib": 2.5904102325439453, + "ce_orig": 0.35565775632858276, + "epoch": 0.5990365950104249, + "kl_loss": 0.12042788416147232, + "loss_ib": 0.0037946891970932484, + "step": 2083 + }, + { + "ce_ib": 1.2720377445220947, + "ce_orig": 0.19666935503482819, + "epoch": 0.5990365950104249, + "kl_loss": 0.3596463203430176, + "loss_ib": 0.004868500865995884, + "step": 2083 + }, + { + "ce_ib": 7.751014709472656, + "ce_orig": 1.2603626251220703, + "epoch": 0.5990365950104249, + "kl_loss": 0.1805923879146576, + "loss_ib": 0.009556937962770462, + "step": 2083 + }, + { + "ce_ib": 4.48265266418457, + "ce_orig": 0.8277034759521484, + "epoch": 0.5993241785894026, + "kl_loss": 0.1738310009241104, + "loss_ib": 0.006220962852239609, + "step": 2084 + }, + { + "ce_ib": 5.61903190612793, + "ce_orig": 0.4434449374675751, + "epoch": 0.5993241785894026, + "kl_loss": 0.15921629965305328, + "loss_ib": 0.00721119437366724, + "step": 2084 + }, + { + "ce_ib": 7.312516212463379, + "ce_orig": 1.1309770345687866, + "epoch": 0.5993241785894026, + "kl_loss": 0.1566864252090454, + "loss_ib": 0.00887938030064106, + "step": 2084 + }, + { + "ce_ib": 7.714537143707275, + "ce_orig": 1.477719783782959, + "epoch": 0.5993241785894026, + "kl_loss": 0.15421676635742188, + "loss_ib": 0.009256704710423946, + "step": 2084 + }, + { + "epoch": 0.5996117621683802, + "grad_norm": 0.1325940489768982, + "learning_rate": 9.268059464280095e-06, + "loss": 0.8217, + "step": 2085 + }, + { + "ce_ib": 8.082406044006348, + "ce_orig": 1.7936134338378906, + "epoch": 0.5996117621683802, + "kl_loss": 0.2558595538139343, + "loss_ib": 0.010641001164913177, + "step": 2085 + }, + { + "ce_ib": 3.264824628829956, + "ce_orig": 0.6696819067001343, + "epoch": 0.5996117621683802, + "kl_loss": 0.18294455111026764, + "loss_ib": 0.005094270221889019, + "step": 2085 + }, + { + "ce_ib": 2.988065004348755, + "ce_orig": 0.5979989171028137, + "epoch": 0.5996117621683802, + "kl_loss": 0.1452246457338333, + "loss_ib": 0.004440311808139086, + "step": 2085 + }, + { + "ce_ib": 6.243656635284424, + "ce_orig": 0.8166417479515076, + "epoch": 0.5996117621683802, + "kl_loss": 0.2831941545009613, + "loss_ib": 0.009075598791241646, + "step": 2085 + }, + { + "ce_ib": 4.610079765319824, + "ce_orig": 0.7553311586380005, + "epoch": 0.5998993457473578, + "kl_loss": 0.13966938853263855, + "loss_ib": 0.0060067735612392426, + "step": 2086 + }, + { + "ce_ib": 3.9481794834136963, + "ce_orig": 0.5105636715888977, + "epoch": 0.5998993457473578, + "kl_loss": 0.30191195011138916, + "loss_ib": 0.006967298686504364, + "step": 2086 + }, + { + "ce_ib": 6.209671974182129, + "ce_orig": 0.6980500817298889, + "epoch": 0.5998993457473578, + "kl_loss": 0.23936083912849426, + "loss_ib": 0.008603280410170555, + "step": 2086 + }, + { + "ce_ib": 4.788403034210205, + "ce_orig": 0.6483240127563477, + "epoch": 0.5998993457473578, + "kl_loss": 0.28850898146629333, + "loss_ib": 0.007673492655158043, + "step": 2086 + }, + { + "ce_ib": 2.751786470413208, + "ce_orig": 0.6620684862136841, + "epoch": 0.6001869293263354, + "kl_loss": 0.13669724762439728, + "loss_ib": 0.004118758719414473, + "step": 2087 + }, + { + "ce_ib": 2.474940299987793, + "ce_orig": 0.5832647681236267, + "epoch": 0.6001869293263354, + "kl_loss": 0.1528944969177246, + "loss_ib": 0.004003885202109814, + "step": 2087 + }, + { + "ce_ib": 3.148454427719116, + "ce_orig": 0.506666362285614, + "epoch": 0.6001869293263354, + "kl_loss": 0.21760955452919006, + "loss_ib": 0.0053245495073497295, + "step": 2087 + }, + { + "ce_ib": 2.912853240966797, + "ce_orig": 0.4210709035396576, + "epoch": 0.6001869293263354, + "kl_loss": 0.15655303001403809, + "loss_ib": 0.004478383343666792, + "step": 2087 + }, + { + "ce_ib": 4.57736873626709, + "ce_orig": 0.7178866267204285, + "epoch": 0.6004745129053131, + "kl_loss": 0.21136485040187836, + "loss_ib": 0.006691017188131809, + "step": 2088 + }, + { + "ce_ib": 4.886351108551025, + "ce_orig": 0.9937307238578796, + "epoch": 0.6004745129053131, + "kl_loss": 0.16396041214466095, + "loss_ib": 0.0065259551629424095, + "step": 2088 + }, + { + "ce_ib": 4.5424628257751465, + "ce_orig": 0.6400948762893677, + "epoch": 0.6004745129053131, + "kl_loss": 0.2126380205154419, + "loss_ib": 0.006668842863291502, + "step": 2088 + }, + { + "ce_ib": 4.56268310546875, + "ce_orig": 1.062746286392212, + "epoch": 0.6004745129053131, + "kl_loss": 0.22786788642406464, + "loss_ib": 0.006841361988335848, + "step": 2088 + }, + { + "ce_ib": 6.46700382232666, + "ce_orig": 1.0065141916275024, + "epoch": 0.6007620964842908, + "kl_loss": 0.25201237201690674, + "loss_ib": 0.008987127803266048, + "step": 2089 + }, + { + "ce_ib": 5.147810935974121, + "ce_orig": 1.0766664743423462, + "epoch": 0.6007620964842908, + "kl_loss": 0.4153611660003662, + "loss_ib": 0.009301422163844109, + "step": 2089 + }, + { + "ce_ib": 3.8252246379852295, + "ce_orig": 0.6971337199211121, + "epoch": 0.6007620964842908, + "kl_loss": 0.19865623116493225, + "loss_ib": 0.005811786744743586, + "step": 2089 + }, + { + "ce_ib": 6.350879192352295, + "ce_orig": 1.0373011827468872, + "epoch": 0.6007620964842908, + "kl_loss": 0.21543404459953308, + "loss_ib": 0.008505218662321568, + "step": 2089 + }, + { + "epoch": 0.6010496800632684, + "grad_norm": 0.11215566098690033, + "learning_rate": 9.264011623081859e-06, + "loss": 0.8294, + "step": 2090 + }, + { + "ce_ib": 4.727746963500977, + "ce_orig": 0.6475889682769775, + "epoch": 0.6010496800632684, + "kl_loss": 0.1473398208618164, + "loss_ib": 0.006201145704835653, + "step": 2090 + }, + { + "ce_ib": 3.913344621658325, + "ce_orig": 0.8017098903656006, + "epoch": 0.6010496800632684, + "kl_loss": 0.20396456122398376, + "loss_ib": 0.005952990148216486, + "step": 2090 + }, + { + "ce_ib": 4.595177173614502, + "ce_orig": 1.1179481744766235, + "epoch": 0.6010496800632684, + "kl_loss": 0.19838805496692657, + "loss_ib": 0.006579057313501835, + "step": 2090 + }, + { + "ce_ib": 3.843561887741089, + "ce_orig": 0.6480299830436707, + "epoch": 0.6010496800632684, + "kl_loss": 0.24693216383457184, + "loss_ib": 0.006312883459031582, + "step": 2090 + }, + { + "ce_ib": 3.4909827709198, + "ce_orig": 0.7175936698913574, + "epoch": 0.601337263642246, + "kl_loss": 0.18714486062526703, + "loss_ib": 0.0053624315187335014, + "step": 2091 + }, + { + "ce_ib": 3.8991596698760986, + "ce_orig": 0.650896430015564, + "epoch": 0.601337263642246, + "kl_loss": 0.13521143794059753, + "loss_ib": 0.005251273978501558, + "step": 2091 + }, + { + "ce_ib": 5.10942268371582, + "ce_orig": 0.7713260650634766, + "epoch": 0.601337263642246, + "kl_loss": 0.19073829054832458, + "loss_ib": 0.007016805466264486, + "step": 2091 + }, + { + "ce_ib": 4.478224754333496, + "ce_orig": 0.9309024214744568, + "epoch": 0.601337263642246, + "kl_loss": 0.16332119703292847, + "loss_ib": 0.006111436989158392, + "step": 2091 + }, + { + "ce_ib": 4.191051959991455, + "ce_orig": 0.9116933941841125, + "epoch": 0.6016248472212237, + "kl_loss": 0.44909727573394775, + "loss_ib": 0.008682024665176868, + "step": 2092 + }, + { + "ce_ib": 6.7304840087890625, + "ce_orig": 1.3909428119659424, + "epoch": 0.6016248472212237, + "kl_loss": 0.17702725529670715, + "loss_ib": 0.008500756695866585, + "step": 2092 + }, + { + "ce_ib": 6.66505765914917, + "ce_orig": 1.4173803329467773, + "epoch": 0.6016248472212237, + "kl_loss": 0.2214445173740387, + "loss_ib": 0.008879503235220909, + "step": 2092 + }, + { + "ce_ib": 5.718295574188232, + "ce_orig": 1.3040616512298584, + "epoch": 0.6016248472212237, + "kl_loss": 0.37144291400909424, + "loss_ib": 0.009432724677026272, + "step": 2092 + }, + { + "ce_ib": 4.147932529449463, + "ce_orig": 0.8032743334770203, + "epoch": 0.6019124308002013, + "kl_loss": 0.19093680381774902, + "loss_ib": 0.006057300604879856, + "step": 2093 + }, + { + "ce_ib": 8.267769813537598, + "ce_orig": 1.403730034828186, + "epoch": 0.6019124308002013, + "kl_loss": 0.18498936295509338, + "loss_ib": 0.010117663070559502, + "step": 2093 + }, + { + "ce_ib": 6.650984764099121, + "ce_orig": 1.058042287826538, + "epoch": 0.6019124308002013, + "kl_loss": 0.2810244560241699, + "loss_ib": 0.009461229667067528, + "step": 2093 + }, + { + "ce_ib": 4.832900047302246, + "ce_orig": 0.5885409712791443, + "epoch": 0.6019124308002013, + "kl_loss": 0.3039490282535553, + "loss_ib": 0.007872390560805798, + "step": 2093 + }, + { + "ce_ib": 4.066385746002197, + "ce_orig": 0.5938214659690857, + "epoch": 0.6022000143791789, + "kl_loss": 0.2049914002418518, + "loss_ib": 0.006116299424320459, + "step": 2094 + }, + { + "ce_ib": 2.7283902168273926, + "ce_orig": 0.414643257856369, + "epoch": 0.6022000143791789, + "kl_loss": 0.23084616661071777, + "loss_ib": 0.0050368523225188255, + "step": 2094 + }, + { + "ce_ib": 4.095137119293213, + "ce_orig": 1.0721259117126465, + "epoch": 0.6022000143791789, + "kl_loss": 0.15656571090221405, + "loss_ib": 0.005660794209688902, + "step": 2094 + }, + { + "ce_ib": 2.631633758544922, + "ce_orig": 0.37384849786758423, + "epoch": 0.6022000143791789, + "kl_loss": 0.318506121635437, + "loss_ib": 0.005816694814711809, + "step": 2094 + }, + { + "epoch": 0.6024875979581565, + "grad_norm": 0.12139395624399185, + "learning_rate": 9.259953508889925e-06, + "loss": 0.8983, + "step": 2095 + }, + { + "ce_ib": 3.175386667251587, + "ce_orig": 0.8221135139465332, + "epoch": 0.6024875979581565, + "kl_loss": 0.17824412882328033, + "loss_ib": 0.004957827739417553, + "step": 2095 + }, + { + "ce_ib": 2.4482743740081787, + "ce_orig": 0.6656613349914551, + "epoch": 0.6024875979581565, + "kl_loss": 0.14436644315719604, + "loss_ib": 0.0038919388316571712, + "step": 2095 + }, + { + "ce_ib": 5.8846116065979, + "ce_orig": 1.0916104316711426, + "epoch": 0.6024875979581565, + "kl_loss": 0.17465797066688538, + "loss_ib": 0.007631191052496433, + "step": 2095 + }, + { + "ce_ib": 6.0317182540893555, + "ce_orig": 1.0918896198272705, + "epoch": 0.6024875979581565, + "kl_loss": 0.14249981939792633, + "loss_ib": 0.007456716150045395, + "step": 2095 + }, + { + "ce_ib": 5.0229878425598145, + "ce_orig": 0.7971199154853821, + "epoch": 0.6027751815371343, + "kl_loss": 0.3438808023929596, + "loss_ib": 0.00846179574728012, + "step": 2096 + }, + { + "ce_ib": 5.592642784118652, + "ce_orig": 0.6603500843048096, + "epoch": 0.6027751815371343, + "kl_loss": 0.2648226022720337, + "loss_ib": 0.008240869268774986, + "step": 2096 + }, + { + "ce_ib": 4.188091278076172, + "ce_orig": 0.7250223159790039, + "epoch": 0.6027751815371343, + "kl_loss": 0.24168048799037933, + "loss_ib": 0.006604895927011967, + "step": 2096 + }, + { + "ce_ib": 4.790040969848633, + "ce_orig": 0.9240785241127014, + "epoch": 0.6027751815371343, + "kl_loss": 0.14271962642669678, + "loss_ib": 0.006217237561941147, + "step": 2096 + }, + { + "ce_ib": 7.86405611038208, + "ce_orig": 1.825260877609253, + "epoch": 0.6030627651161119, + "kl_loss": 0.14283712208271027, + "loss_ib": 0.009292427450418472, + "step": 2097 + }, + { + "ce_ib": 4.7033305168151855, + "ce_orig": 1.0352340936660767, + "epoch": 0.6030627651161119, + "kl_loss": 0.2924274802207947, + "loss_ib": 0.007627604994922876, + "step": 2097 + }, + { + "ce_ib": 7.174673557281494, + "ce_orig": 0.7464105486869812, + "epoch": 0.6030627651161119, + "kl_loss": 0.27289408445358276, + "loss_ib": 0.009903614409267902, + "step": 2097 + }, + { + "ce_ib": 5.175540924072266, + "ce_orig": 0.9560375809669495, + "epoch": 0.6030627651161119, + "kl_loss": 0.2242799699306488, + "loss_ib": 0.0074183400720357895, + "step": 2097 + }, + { + "ce_ib": 6.1560492515563965, + "ce_orig": 1.3387001752853394, + "epoch": 0.6033503486950895, + "kl_loss": 0.3016314208507538, + "loss_ib": 0.009172363206744194, + "step": 2098 + }, + { + "ce_ib": 4.332640171051025, + "ce_orig": 0.8088712096214294, + "epoch": 0.6033503486950895, + "kl_loss": 0.1949349343776703, + "loss_ib": 0.006281989626586437, + "step": 2098 + }, + { + "ce_ib": 5.805788993835449, + "ce_orig": 1.1944451332092285, + "epoch": 0.6033503486950895, + "kl_loss": 0.2268642783164978, + "loss_ib": 0.00807443168014288, + "step": 2098 + }, + { + "ce_ib": 5.1681227684021, + "ce_orig": 1.0210596323013306, + "epoch": 0.6033503486950895, + "kl_loss": 0.22374176979064941, + "loss_ib": 0.007405539974570274, + "step": 2098 + }, + { + "ce_ib": 4.980714797973633, + "ce_orig": 0.9220489859580994, + "epoch": 0.6036379322740671, + "kl_loss": 0.27895432710647583, + "loss_ib": 0.007770258001983166, + "step": 2099 + }, + { + "ce_ib": 4.185295104980469, + "ce_orig": 0.9611167907714844, + "epoch": 0.6036379322740671, + "kl_loss": 0.13708534836769104, + "loss_ib": 0.005556148011237383, + "step": 2099 + }, + { + "ce_ib": 5.33571720123291, + "ce_orig": 0.7860413193702698, + "epoch": 0.6036379322740671, + "kl_loss": 0.17641831934452057, + "loss_ib": 0.0070998999290168285, + "step": 2099 + }, + { + "ce_ib": 5.9347381591796875, + "ce_orig": 1.200312852859497, + "epoch": 0.6036379322740671, + "kl_loss": 0.2357495278120041, + "loss_ib": 0.00829223357141018, + "step": 2099 + }, + { + "epoch": 0.6039255158530448, + "grad_norm": 0.12281585484743118, + "learning_rate": 9.255885131481231e-06, + "loss": 0.8864, + "step": 2100 + }, + { + "ce_ib": 5.931026458740234, + "ce_orig": 1.2171458005905151, + "epoch": 0.6039255158530448, + "kl_loss": 0.15274977684020996, + "loss_ib": 0.0074585238471627235, + "step": 2100 + }, + { + "ce_ib": 4.037585735321045, + "ce_orig": 0.5968522429466248, + "epoch": 0.6039255158530448, + "kl_loss": 0.27065396308898926, + "loss_ib": 0.00674412539228797, + "step": 2100 + }, + { + "ce_ib": 7.190185070037842, + "ce_orig": 0.8761034607887268, + "epoch": 0.6039255158530448, + "kl_loss": 0.20919720828533173, + "loss_ib": 0.009282156825065613, + "step": 2100 + }, + { + "ce_ib": 2.0245614051818848, + "ce_orig": 0.20018620789051056, + "epoch": 0.6039255158530448, + "kl_loss": 0.45052579045295715, + "loss_ib": 0.006529819220304489, + "step": 2100 + }, + { + "ce_ib": 7.2500834465026855, + "ce_orig": 1.048833966255188, + "epoch": 0.6042130994320224, + "kl_loss": 0.2508978843688965, + "loss_ib": 0.009759061969816685, + "step": 2101 + }, + { + "ce_ib": 3.4611642360687256, + "ce_orig": 0.257497102022171, + "epoch": 0.6042130994320224, + "kl_loss": 0.2169705033302307, + "loss_ib": 0.005630868952721357, + "step": 2101 + }, + { + "ce_ib": 4.60268497467041, + "ce_orig": 0.7579203248023987, + "epoch": 0.6042130994320224, + "kl_loss": 0.29804739356040955, + "loss_ib": 0.007583159022033215, + "step": 2101 + }, + { + "ce_ib": 4.1543779373168945, + "ce_orig": 0.9789897799491882, + "epoch": 0.6042130994320224, + "kl_loss": 0.19559822976589203, + "loss_ib": 0.006110359914600849, + "step": 2101 + }, + { + "ce_ib": 3.808459758758545, + "ce_orig": 0.4731706380844116, + "epoch": 0.604500683011, + "kl_loss": 0.18388184905052185, + "loss_ib": 0.005647278390824795, + "step": 2102 + }, + { + "ce_ib": 4.9764933586120605, + "ce_orig": 0.7839788794517517, + "epoch": 0.604500683011, + "kl_loss": 0.22187867760658264, + "loss_ib": 0.007195279933512211, + "step": 2102 + }, + { + "ce_ib": 3.1799213886260986, + "ce_orig": 0.7104110717773438, + "epoch": 0.604500683011, + "kl_loss": 0.23116706311702728, + "loss_ib": 0.005491591989994049, + "step": 2102 + }, + { + "ce_ib": 5.238558292388916, + "ce_orig": 1.080971598625183, + "epoch": 0.604500683011, + "kl_loss": 0.2622756361961365, + "loss_ib": 0.00786131527274847, + "step": 2102 + }, + { + "ce_ib": 3.700233221054077, + "ce_orig": 0.6759883761405945, + "epoch": 0.6047882665899778, + "kl_loss": 0.11124815791845322, + "loss_ib": 0.004812715109437704, + "step": 2103 + }, + { + "ce_ib": 5.205451488494873, + "ce_orig": 0.7194454669952393, + "epoch": 0.6047882665899778, + "kl_loss": 0.19555526971817017, + "loss_ib": 0.007161004468798637, + "step": 2103 + }, + { + "ce_ib": 7.948861598968506, + "ce_orig": 1.6023788452148438, + "epoch": 0.6047882665899778, + "kl_loss": 0.23041550815105438, + "loss_ib": 0.010253016836941242, + "step": 2103 + }, + { + "ce_ib": 5.942636966705322, + "ce_orig": 1.2763230800628662, + "epoch": 0.6047882665899778, + "kl_loss": 0.26866039633750916, + "loss_ib": 0.008629241026937962, + "step": 2103 + }, + { + "ce_ib": 3.3919172286987305, + "ce_orig": 0.5541743636131287, + "epoch": 0.6050758501689554, + "kl_loss": 0.2860734760761261, + "loss_ib": 0.006252652034163475, + "step": 2104 + }, + { + "ce_ib": 2.5143818855285645, + "ce_orig": 0.5698620676994324, + "epoch": 0.6050758501689554, + "kl_loss": 0.15168343484401703, + "loss_ib": 0.004031216260045767, + "step": 2104 + }, + { + "ce_ib": 3.977893352508545, + "ce_orig": 0.9532864689826965, + "epoch": 0.6050758501689554, + "kl_loss": 0.1357690393924713, + "loss_ib": 0.005335583351552486, + "step": 2104 + }, + { + "ce_ib": 3.939521074295044, + "ce_orig": 0.8121203780174255, + "epoch": 0.6050758501689554, + "kl_loss": 0.2524918019771576, + "loss_ib": 0.006464438978582621, + "step": 2104 + }, + { + "epoch": 0.605363433747933, + "grad_norm": 0.1293361783027649, + "learning_rate": 9.251806500657443e-06, + "loss": 0.8399, + "step": 2105 + }, + { + "ce_ib": 6.487258434295654, + "ce_orig": 0.7448585033416748, + "epoch": 0.605363433747933, + "kl_loss": 0.1960236132144928, + "loss_ib": 0.008447494357824326, + "step": 2105 + }, + { + "ce_ib": 4.8964056968688965, + "ce_orig": 0.609185516834259, + "epoch": 0.605363433747933, + "kl_loss": 0.15936075150966644, + "loss_ib": 0.006490012630820274, + "step": 2105 + }, + { + "ce_ib": 4.070477485656738, + "ce_orig": 0.9137795567512512, + "epoch": 0.605363433747933, + "kl_loss": 0.1739756166934967, + "loss_ib": 0.005810233298689127, + "step": 2105 + }, + { + "ce_ib": 3.9960384368896484, + "ce_orig": 0.816129744052887, + "epoch": 0.605363433747933, + "kl_loss": 0.15446269512176514, + "loss_ib": 0.005540665239095688, + "step": 2105 + }, + { + "ce_ib": 4.505927085876465, + "ce_orig": 0.5056048035621643, + "epoch": 0.6056510173269106, + "kl_loss": 0.22409559786319733, + "loss_ib": 0.006746883038431406, + "step": 2106 + }, + { + "ce_ib": 3.9140424728393555, + "ce_orig": 0.8718525767326355, + "epoch": 0.6056510173269106, + "kl_loss": 0.13706588745117188, + "loss_ib": 0.005284701474010944, + "step": 2106 + }, + { + "ce_ib": 4.185517311096191, + "ce_orig": 0.9850714802742004, + "epoch": 0.6056510173269106, + "kl_loss": 0.21597495675086975, + "loss_ib": 0.006345266941934824, + "step": 2106 + }, + { + "ce_ib": 4.168082237243652, + "ce_orig": 0.6870065927505493, + "epoch": 0.6056510173269106, + "kl_loss": 0.18822196125984192, + "loss_ib": 0.006050301715731621, + "step": 2106 + }, + { + "ce_ib": 6.932581424713135, + "ce_orig": 1.288358449935913, + "epoch": 0.6059386009058882, + "kl_loss": 0.3322944641113281, + "loss_ib": 0.010255525819957256, + "step": 2107 + }, + { + "ce_ib": 4.402205467224121, + "ce_orig": 0.6296238303184509, + "epoch": 0.6059386009058882, + "kl_loss": 0.2156883031129837, + "loss_ib": 0.006559088826179504, + "step": 2107 + }, + { + "ce_ib": 4.142282962799072, + "ce_orig": 0.7608774304389954, + "epoch": 0.6059386009058882, + "kl_loss": 0.20693744719028473, + "loss_ib": 0.006211657542735338, + "step": 2107 + }, + { + "ce_ib": 6.204916954040527, + "ce_orig": 0.9014793038368225, + "epoch": 0.6059386009058882, + "kl_loss": 0.26911792159080505, + "loss_ib": 0.008896095678210258, + "step": 2107 + }, + { + "ce_ib": 3.6460511684417725, + "ce_orig": 0.5912688374519348, + "epoch": 0.6062261844848659, + "kl_loss": 0.16472607851028442, + "loss_ib": 0.005293312016874552, + "step": 2108 + }, + { + "ce_ib": 4.080135345458984, + "ce_orig": 0.8540125489234924, + "epoch": 0.6062261844848659, + "kl_loss": 0.2516666352748871, + "loss_ib": 0.006596801802515984, + "step": 2108 + }, + { + "ce_ib": 6.799363613128662, + "ce_orig": 1.1493276357650757, + "epoch": 0.6062261844848659, + "kl_loss": 0.3562454581260681, + "loss_ib": 0.010361817665398121, + "step": 2108 + }, + { + "ce_ib": 5.209597110748291, + "ce_orig": 0.861347496509552, + "epoch": 0.6062261844848659, + "kl_loss": 0.23559173941612244, + "loss_ib": 0.007565514650195837, + "step": 2108 + }, + { + "ce_ib": 2.90177059173584, + "ce_orig": 0.6902340054512024, + "epoch": 0.6065137680638436, + "kl_loss": 0.12874732911586761, + "loss_ib": 0.004189244005829096, + "step": 2109 + }, + { + "ce_ib": 2.592775583267212, + "ce_orig": 0.4718996584415436, + "epoch": 0.6065137680638436, + "kl_loss": 0.36936187744140625, + "loss_ib": 0.006286394316703081, + "step": 2109 + }, + { + "ce_ib": 5.419248580932617, + "ce_orig": 1.046078085899353, + "epoch": 0.6065137680638436, + "kl_loss": 0.21121957898139954, + "loss_ib": 0.007531445007771254, + "step": 2109 + }, + { + "ce_ib": 5.559264183044434, + "ce_orig": 1.0997865200042725, + "epoch": 0.6065137680638436, + "kl_loss": 0.16464745998382568, + "loss_ib": 0.007205738686025143, + "step": 2109 + }, + { + "epoch": 0.6068013516428212, + "grad_norm": 0.13606220483779907, + "learning_rate": 9.24771762624493e-06, + "loss": 0.8298, + "step": 2110 + }, + { + "ce_ib": 5.804508686065674, + "ce_orig": 0.9017541408538818, + "epoch": 0.6068013516428212, + "kl_loss": 0.19018954038619995, + "loss_ib": 0.007706404197961092, + "step": 2110 + }, + { + "ce_ib": 7.682343482971191, + "ce_orig": 1.264442801475525, + "epoch": 0.6068013516428212, + "kl_loss": 0.288762629032135, + "loss_ib": 0.01056997012346983, + "step": 2110 + }, + { + "ce_ib": 6.56893253326416, + "ce_orig": 0.9220224022865295, + "epoch": 0.6068013516428212, + "kl_loss": 0.2719862759113312, + "loss_ib": 0.009288794361054897, + "step": 2110 + }, + { + "ce_ib": 5.830661296844482, + "ce_orig": 0.9345362186431885, + "epoch": 0.6068013516428212, + "kl_loss": 0.23889029026031494, + "loss_ib": 0.008219564333558083, + "step": 2110 + }, + { + "ce_ib": 3.8872666358947754, + "ce_orig": 0.78645920753479, + "epoch": 0.6070889352217989, + "kl_loss": 0.16978541016578674, + "loss_ib": 0.005585120990872383, + "step": 2111 + }, + { + "ce_ib": 5.560759544372559, + "ce_orig": 0.6178320646286011, + "epoch": 0.6070889352217989, + "kl_loss": 0.3392623960971832, + "loss_ib": 0.00895338412374258, + "step": 2111 + }, + { + "ce_ib": 6.4202399253845215, + "ce_orig": 1.3836085796356201, + "epoch": 0.6070889352217989, + "kl_loss": 0.16969707608222961, + "loss_ib": 0.008117211051285267, + "step": 2111 + }, + { + "ce_ib": 3.9117469787597656, + "ce_orig": 0.4862440228462219, + "epoch": 0.6070889352217989, + "kl_loss": 0.20649954676628113, + "loss_ib": 0.005976742599159479, + "step": 2111 + }, + { + "ce_ib": 4.565629959106445, + "ce_orig": 1.0319615602493286, + "epoch": 0.6073765188007765, + "kl_loss": 0.14287219941616058, + "loss_ib": 0.005994352046400309, + "step": 2112 + }, + { + "ce_ib": 8.177915573120117, + "ce_orig": 1.6782734394073486, + "epoch": 0.6073765188007765, + "kl_loss": 0.3027498424053192, + "loss_ib": 0.011205414310097694, + "step": 2112 + }, + { + "ce_ib": 6.124603271484375, + "ce_orig": 1.2374764680862427, + "epoch": 0.6073765188007765, + "kl_loss": 0.24490374326705933, + "loss_ib": 0.008573640137910843, + "step": 2112 + }, + { + "ce_ib": 9.245201110839844, + "ce_orig": 1.5667716264724731, + "epoch": 0.6073765188007765, + "kl_loss": 0.18349461257457733, + "loss_ib": 0.01108014676719904, + "step": 2112 + }, + { + "ce_ib": 4.517976760864258, + "ce_orig": 0.8942232728004456, + "epoch": 0.6076641023797541, + "kl_loss": 0.19106557965278625, + "loss_ib": 0.006428632419556379, + "step": 2113 + }, + { + "ce_ib": 3.394009828567505, + "ce_orig": 0.6811273694038391, + "epoch": 0.6076641023797541, + "kl_loss": 0.19193032383918762, + "loss_ib": 0.005313313100486994, + "step": 2113 + }, + { + "ce_ib": 5.409207344055176, + "ce_orig": 0.7552686333656311, + "epoch": 0.6076641023797541, + "kl_loss": 0.26530203223228455, + "loss_ib": 0.008062227629125118, + "step": 2113 + }, + { + "ce_ib": 5.414778232574463, + "ce_orig": 1.1809942722320557, + "epoch": 0.6076641023797541, + "kl_loss": 0.16809800267219543, + "loss_ib": 0.007095758803188801, + "step": 2113 + }, + { + "ce_ib": 8.463586807250977, + "ce_orig": 1.7049224376678467, + "epoch": 0.6079516859587317, + "kl_loss": 0.6566388607025146, + "loss_ib": 0.015029976144433022, + "step": 2114 + }, + { + "ce_ib": 4.247213363647461, + "ce_orig": 0.8814427256584167, + "epoch": 0.6079516859587317, + "kl_loss": 0.1321144551038742, + "loss_ib": 0.005568357650190592, + "step": 2114 + }, + { + "ce_ib": 4.689974784851074, + "ce_orig": 0.6478808522224426, + "epoch": 0.6079516859587317, + "kl_loss": 0.17946076393127441, + "loss_ib": 0.0064845820888876915, + "step": 2114 + }, + { + "ce_ib": 3.4115371704101562, + "ce_orig": 0.7198323607444763, + "epoch": 0.6079516859587317, + "kl_loss": 0.1590496003627777, + "loss_ib": 0.005002032965421677, + "step": 2114 + }, + { + "epoch": 0.6082392695377093, + "grad_norm": 0.1553514152765274, + "learning_rate": 9.243618518094741e-06, + "loss": 0.9883, + "step": 2115 + }, + { + "ce_ib": 6.639435768127441, + "ce_orig": 0.8874683976173401, + "epoch": 0.6082392695377093, + "kl_loss": 0.2129654586315155, + "loss_ib": 0.008769090287387371, + "step": 2115 + }, + { + "ce_ib": 2.994427442550659, + "ce_orig": 0.8822687864303589, + "epoch": 0.6082392695377093, + "kl_loss": 0.21947252750396729, + "loss_ib": 0.005189152900129557, + "step": 2115 + }, + { + "ce_ib": 10.844606399536133, + "ce_orig": 1.765454649925232, + "epoch": 0.6082392695377093, + "kl_loss": 0.27179983258247375, + "loss_ib": 0.01356260385364294, + "step": 2115 + }, + { + "ce_ib": 3.3998141288757324, + "ce_orig": 0.45301687717437744, + "epoch": 0.6082392695377093, + "kl_loss": 0.10441018640995026, + "loss_ib": 0.004443916026502848, + "step": 2115 + }, + { + "ce_ib": 3.1267662048339844, + "ce_orig": 0.480742484331131, + "epoch": 0.6085268531166871, + "kl_loss": 0.20724813640117645, + "loss_ib": 0.005199247505515814, + "step": 2116 + }, + { + "ce_ib": 3.388462543487549, + "ce_orig": 0.639552652835846, + "epoch": 0.6085268531166871, + "kl_loss": 0.15150845050811768, + "loss_ib": 0.004903547000139952, + "step": 2116 + }, + { + "ce_ib": 7.652255535125732, + "ce_orig": 1.3306525945663452, + "epoch": 0.6085268531166871, + "kl_loss": 0.15088030695915222, + "loss_ib": 0.009161058813333511, + "step": 2116 + }, + { + "ce_ib": 4.337798595428467, + "ce_orig": 0.7529159188270569, + "epoch": 0.6085268531166871, + "kl_loss": 0.2630062699317932, + "loss_ib": 0.006967861671000719, + "step": 2116 + }, + { + "ce_ib": 4.641881465911865, + "ce_orig": 0.8489657044410706, + "epoch": 0.6088144366956647, + "kl_loss": 0.27277541160583496, + "loss_ib": 0.007369635626673698, + "step": 2117 + }, + { + "ce_ib": 2.937098979949951, + "ce_orig": 0.8028139472007751, + "epoch": 0.6088144366956647, + "kl_loss": 0.18757417798042297, + "loss_ib": 0.004812840837985277, + "step": 2117 + }, + { + "ce_ib": 3.003194570541382, + "ce_orig": 0.7116705179214478, + "epoch": 0.6088144366956647, + "kl_loss": 0.2538503110408783, + "loss_ib": 0.005541697610169649, + "step": 2117 + }, + { + "ce_ib": 3.9709370136260986, + "ce_orig": 0.4724328815937042, + "epoch": 0.6088144366956647, + "kl_loss": 0.1441301703453064, + "loss_ib": 0.005412238650023937, + "step": 2117 + }, + { + "ce_ib": 3.6186530590057373, + "ce_orig": 0.4829815626144409, + "epoch": 0.6091020202746423, + "kl_loss": 0.24389347434043884, + "loss_ib": 0.006057587917894125, + "step": 2118 + }, + { + "ce_ib": 6.648671627044678, + "ce_orig": 1.419867753982544, + "epoch": 0.6091020202746423, + "kl_loss": 0.1908860206604004, + "loss_ib": 0.008557531982660294, + "step": 2118 + }, + { + "ce_ib": 4.257307529449463, + "ce_orig": 0.5077572464942932, + "epoch": 0.6091020202746423, + "kl_loss": 0.3365297317504883, + "loss_ib": 0.007622604724019766, + "step": 2118 + }, + { + "ce_ib": 4.0375447273254395, + "ce_orig": 0.5007375478744507, + "epoch": 0.6091020202746423, + "kl_loss": 0.1617627590894699, + "loss_ib": 0.0056551722809672356, + "step": 2118 + }, + { + "ce_ib": 4.645079135894775, + "ce_orig": 0.92597895860672, + "epoch": 0.60938960385362, + "kl_loss": 0.14361873269081116, + "loss_ib": 0.006081266328692436, + "step": 2119 + }, + { + "ce_ib": 4.87855863571167, + "ce_orig": 0.6463962197303772, + "epoch": 0.60938960385362, + "kl_loss": 0.26122039556503296, + "loss_ib": 0.007490762509405613, + "step": 2119 + }, + { + "ce_ib": 3.212925910949707, + "ce_orig": 0.8976016640663147, + "epoch": 0.60938960385362, + "kl_loss": 0.12607215344905853, + "loss_ib": 0.004473647568374872, + "step": 2119 + }, + { + "ce_ib": 9.196220397949219, + "ce_orig": 1.8560212850570679, + "epoch": 0.60938960385362, + "kl_loss": 0.2542807459831238, + "loss_ib": 0.011739027686417103, + "step": 2119 + }, + { + "epoch": 0.6096771874325976, + "grad_norm": 0.13248442113399506, + "learning_rate": 9.239509186082574e-06, + "loss": 0.9119, + "step": 2120 + }, + { + "ce_ib": 2.5775210857391357, + "ce_orig": 0.743409276008606, + "epoch": 0.6096771874325976, + "kl_loss": 0.13335728645324707, + "loss_ib": 0.003911093808710575, + "step": 2120 + }, + { + "ce_ib": 2.721320390701294, + "ce_orig": 0.6192002892494202, + "epoch": 0.6096771874325976, + "kl_loss": 0.11261288821697235, + "loss_ib": 0.003847449319437146, + "step": 2120 + }, + { + "ce_ib": 4.909618854522705, + "ce_orig": 0.7369688749313354, + "epoch": 0.6096771874325976, + "kl_loss": 0.16477900743484497, + "loss_ib": 0.006557408254593611, + "step": 2120 + }, + { + "ce_ib": 4.797292709350586, + "ce_orig": 0.8298988342285156, + "epoch": 0.6096771874325976, + "kl_loss": 0.25165626406669617, + "loss_ib": 0.007313854992389679, + "step": 2120 + }, + { + "ce_ib": 3.1922547817230225, + "ce_orig": 0.6548759937286377, + "epoch": 0.6099647710115752, + "kl_loss": 0.24803176522254944, + "loss_ib": 0.005672572646290064, + "step": 2121 + }, + { + "ce_ib": 2.2532079219818115, + "ce_orig": 0.4539450407028198, + "epoch": 0.6099647710115752, + "kl_loss": 0.20249101519584656, + "loss_ib": 0.004278117790818214, + "step": 2121 + }, + { + "ce_ib": 7.7856268882751465, + "ce_orig": 1.1526672840118408, + "epoch": 0.6099647710115752, + "kl_loss": 0.16948170959949493, + "loss_ib": 0.00948044378310442, + "step": 2121 + }, + { + "ce_ib": 2.6415672302246094, + "ce_orig": 0.6043635010719299, + "epoch": 0.6099647710115752, + "kl_loss": 0.1961209774017334, + "loss_ib": 0.00460277684032917, + "step": 2121 + }, + { + "ce_ib": 6.634166717529297, + "ce_orig": 1.1256740093231201, + "epoch": 0.6102523545905528, + "kl_loss": 0.18688133358955383, + "loss_ib": 0.008502980694174767, + "step": 2122 + }, + { + "ce_ib": 4.490315914154053, + "ce_orig": 1.1010440587997437, + "epoch": 0.6102523545905528, + "kl_loss": 0.2946227490901947, + "loss_ib": 0.007436543703079224, + "step": 2122 + }, + { + "ce_ib": 8.648628234863281, + "ce_orig": 1.6791341304779053, + "epoch": 0.6102523545905528, + "kl_loss": 0.2725248336791992, + "loss_ib": 0.01137387752532959, + "step": 2122 + }, + { + "ce_ib": 6.406111240386963, + "ce_orig": 1.0798084735870361, + "epoch": 0.6102523545905528, + "kl_loss": 0.15316331386566162, + "loss_ib": 0.00793774425983429, + "step": 2122 + }, + { + "ce_ib": 8.29507064819336, + "ce_orig": 1.6230907440185547, + "epoch": 0.6105399381695306, + "kl_loss": 0.22892825305461884, + "loss_ib": 0.010584352537989616, + "step": 2123 + }, + { + "ce_ib": 6.077398777008057, + "ce_orig": 1.0588009357452393, + "epoch": 0.6105399381695306, + "kl_loss": 0.22441905736923218, + "loss_ib": 0.008321588858962059, + "step": 2123 + }, + { + "ce_ib": 3.2698423862457275, + "ce_orig": 0.5916512608528137, + "epoch": 0.6105399381695306, + "kl_loss": 0.18160267174243927, + "loss_ib": 0.0050858692266047, + "step": 2123 + }, + { + "ce_ib": 8.65733528137207, + "ce_orig": 1.1385416984558105, + "epoch": 0.6105399381695306, + "kl_loss": 0.26328638195991516, + "loss_ib": 0.011290199123322964, + "step": 2123 + }, + { + "ce_ib": 3.2631733417510986, + "ce_orig": 0.7165438532829285, + "epoch": 0.6108275217485082, + "kl_loss": 0.14989891648292542, + "loss_ib": 0.004762162454426289, + "step": 2124 + }, + { + "ce_ib": 3.4419682025909424, + "ce_orig": 0.5420752167701721, + "epoch": 0.6108275217485082, + "kl_loss": 0.10058829188346863, + "loss_ib": 0.004447850864380598, + "step": 2124 + }, + { + "ce_ib": 6.7049713134765625, + "ce_orig": 1.1739871501922607, + "epoch": 0.6108275217485082, + "kl_loss": 0.3051673471927643, + "loss_ib": 0.009756645187735558, + "step": 2124 + }, + { + "ce_ib": 6.274320125579834, + "ce_orig": 1.3123911619186401, + "epoch": 0.6108275217485082, + "kl_loss": 0.15925326943397522, + "loss_ib": 0.007866852916777134, + "step": 2124 + }, + { + "epoch": 0.6111151053274858, + "grad_norm": 0.13773378729820251, + "learning_rate": 9.23538964010877e-06, + "loss": 0.9025, + "step": 2125 + }, + { + "ce_ib": 4.88681697845459, + "ce_orig": 1.0514509677886963, + "epoch": 0.6111151053274858, + "kl_loss": 0.13540518283843994, + "loss_ib": 0.006240868475288153, + "step": 2125 + }, + { + "ce_ib": 7.104170799255371, + "ce_orig": 1.2814857959747314, + "epoch": 0.6111151053274858, + "kl_loss": 0.2071654498577118, + "loss_ib": 0.00917582493275404, + "step": 2125 + }, + { + "ce_ib": 7.298569679260254, + "ce_orig": 1.5028326511383057, + "epoch": 0.6111151053274858, + "kl_loss": 0.2278403788805008, + "loss_ib": 0.009576973505318165, + "step": 2125 + }, + { + "ce_ib": 3.304814100265503, + "ce_orig": 0.4093223214149475, + "epoch": 0.6111151053274858, + "kl_loss": 0.2189027965068817, + "loss_ib": 0.00549384206533432, + "step": 2125 + }, + { + "ce_ib": 2.6418187618255615, + "ce_orig": 0.5143451690673828, + "epoch": 0.6114026889064634, + "kl_loss": 0.16270941495895386, + "loss_ib": 0.004268913064152002, + "step": 2126 + }, + { + "ce_ib": 3.249535083770752, + "ce_orig": 0.43723252415657043, + "epoch": 0.6114026889064634, + "kl_loss": 0.22625167667865753, + "loss_ib": 0.0055120522156357765, + "step": 2126 + }, + { + "ce_ib": 7.128957748413086, + "ce_orig": 1.1190768480300903, + "epoch": 0.6114026889064634, + "kl_loss": 0.42188870906829834, + "loss_ib": 0.011347844265401363, + "step": 2126 + }, + { + "ce_ib": 1.8053135871887207, + "ce_orig": 0.3770313560962677, + "epoch": 0.6114026889064634, + "kl_loss": 0.15452507138252258, + "loss_ib": 0.0033505642786622047, + "step": 2126 + }, + { + "ce_ib": 3.227522850036621, + "ce_orig": 0.7376641631126404, + "epoch": 0.6116902724854411, + "kl_loss": 0.16641360521316528, + "loss_ib": 0.004891659133136272, + "step": 2127 + }, + { + "ce_ib": 3.7297191619873047, + "ce_orig": 0.691023051738739, + "epoch": 0.6116902724854411, + "kl_loss": 0.12572142481803894, + "loss_ib": 0.004986933432519436, + "step": 2127 + }, + { + "ce_ib": 0.7535586357116699, + "ce_orig": 0.11636380106210709, + "epoch": 0.6116902724854411, + "kl_loss": 0.3841002285480499, + "loss_ib": 0.0045945607125759125, + "step": 2127 + }, + { + "ce_ib": 2.521564245223999, + "ce_orig": 0.5358123183250427, + "epoch": 0.6116902724854411, + "kl_loss": 0.18710127472877502, + "loss_ib": 0.004392576869577169, + "step": 2127 + }, + { + "ce_ib": 6.148972034454346, + "ce_orig": 0.7971481680870056, + "epoch": 0.6119778560644187, + "kl_loss": 0.214311882853508, + "loss_ib": 0.008292091079056263, + "step": 2128 + }, + { + "ce_ib": 2.726161241531372, + "ce_orig": 0.6313729286193848, + "epoch": 0.6119778560644187, + "kl_loss": 0.14849919080734253, + "loss_ib": 0.004211152903735638, + "step": 2128 + }, + { + "ce_ib": 8.12751293182373, + "ce_orig": 1.3711216449737549, + "epoch": 0.6119778560644187, + "kl_loss": 0.1890946328639984, + "loss_ib": 0.010018459521234035, + "step": 2128 + }, + { + "ce_ib": 5.9603376388549805, + "ce_orig": 1.1166486740112305, + "epoch": 0.6119778560644187, + "kl_loss": 0.19808559119701385, + "loss_ib": 0.00794119294732809, + "step": 2128 + }, + { + "ce_ib": 7.765037536621094, + "ce_orig": 1.6083548069000244, + "epoch": 0.6122654396433964, + "kl_loss": 0.1600465178489685, + "loss_ib": 0.009365502744913101, + "step": 2129 + }, + { + "ce_ib": 6.826645374298096, + "ce_orig": 1.2979741096496582, + "epoch": 0.6122654396433964, + "kl_loss": 0.42146408557891846, + "loss_ib": 0.01104128547012806, + "step": 2129 + }, + { + "ce_ib": 6.564967155456543, + "ce_orig": 1.1155140399932861, + "epoch": 0.6122654396433964, + "kl_loss": 0.15702641010284424, + "loss_ib": 0.008135231211781502, + "step": 2129 + }, + { + "ce_ib": 3.0503625869750977, + "ce_orig": 0.38983190059661865, + "epoch": 0.6122654396433964, + "kl_loss": 0.32414597272872925, + "loss_ib": 0.00629182206466794, + "step": 2129 + }, + { + "epoch": 0.612553023222374, + "grad_norm": 0.1455356478691101, + "learning_rate": 9.231259890098266e-06, + "loss": 0.884, + "step": 2130 + }, + { + "ce_ib": 4.982285499572754, + "ce_orig": 1.0661743879318237, + "epoch": 0.612553023222374, + "kl_loss": 0.258543461561203, + "loss_ib": 0.0075677200220525265, + "step": 2130 + }, + { + "ce_ib": 5.002605438232422, + "ce_orig": 0.6898569464683533, + "epoch": 0.612553023222374, + "kl_loss": 0.2061755657196045, + "loss_ib": 0.007064360659569502, + "step": 2130 + }, + { + "ce_ib": 4.824326515197754, + "ce_orig": 0.9099055528640747, + "epoch": 0.612553023222374, + "kl_loss": 0.24781644344329834, + "loss_ib": 0.007302490994334221, + "step": 2130 + }, + { + "ce_ib": 6.193486213684082, + "ce_orig": 1.069178819656372, + "epoch": 0.612553023222374, + "kl_loss": 0.32996946573257446, + "loss_ib": 0.009493180550634861, + "step": 2130 + }, + { + "ce_ib": 3.6664533615112305, + "ce_orig": 0.9003771543502808, + "epoch": 0.6128406068013517, + "kl_loss": 0.18835298717021942, + "loss_ib": 0.005549983121454716, + "step": 2131 + }, + { + "ce_ib": 6.522622585296631, + "ce_orig": 1.2862212657928467, + "epoch": 0.6128406068013517, + "kl_loss": 0.24667346477508545, + "loss_ib": 0.008989357389509678, + "step": 2131 + }, + { + "ce_ib": 7.5279059410095215, + "ce_orig": 1.5823092460632324, + "epoch": 0.6128406068013517, + "kl_loss": 0.2326289415359497, + "loss_ib": 0.009854195639491081, + "step": 2131 + }, + { + "ce_ib": 6.167239189147949, + "ce_orig": 1.1377007961273193, + "epoch": 0.6128406068013517, + "kl_loss": 0.2641475796699524, + "loss_ib": 0.008808715268969536, + "step": 2131 + }, + { + "ce_ib": 6.241806507110596, + "ce_orig": 0.7772445678710938, + "epoch": 0.6131281903803293, + "kl_loss": 0.22522038221359253, + "loss_ib": 0.00849401019513607, + "step": 2132 + }, + { + "ce_ib": 3.1980679035186768, + "ce_orig": 0.6957777738571167, + "epoch": 0.6131281903803293, + "kl_loss": 0.1474233716726303, + "loss_ib": 0.004672301467508078, + "step": 2132 + }, + { + "ce_ib": 6.75003719329834, + "ce_orig": 0.7646394968032837, + "epoch": 0.6131281903803293, + "kl_loss": 0.4094095826148987, + "loss_ib": 0.010844132862985134, + "step": 2132 + }, + { + "ce_ib": 7.7711663246154785, + "ce_orig": 1.1008481979370117, + "epoch": 0.6131281903803293, + "kl_loss": 0.19172796607017517, + "loss_ib": 0.009688446298241615, + "step": 2132 + }, + { + "ce_ib": 3.8368194103240967, + "ce_orig": 0.9266735315322876, + "epoch": 0.6134157739593069, + "kl_loss": 0.1813657134771347, + "loss_ib": 0.005650476552546024, + "step": 2133 + }, + { + "ce_ib": 6.408683776855469, + "ce_orig": 1.0894063711166382, + "epoch": 0.6134157739593069, + "kl_loss": 0.17796170711517334, + "loss_ib": 0.008188300766050816, + "step": 2133 + }, + { + "ce_ib": 6.643096923828125, + "ce_orig": 1.2671693563461304, + "epoch": 0.6134157739593069, + "kl_loss": 0.2648300230503082, + "loss_ib": 0.009291397407650948, + "step": 2133 + }, + { + "ce_ib": 5.421484470367432, + "ce_orig": 1.0842853784561157, + "epoch": 0.6134157739593069, + "kl_loss": 0.1903899610042572, + "loss_ib": 0.007325384300202131, + "step": 2133 + }, + { + "ce_ib": 5.360467433929443, + "ce_orig": 0.8042804002761841, + "epoch": 0.6137033575382845, + "kl_loss": 0.35999175906181335, + "loss_ib": 0.008960384875535965, + "step": 2134 + }, + { + "ce_ib": 3.965925693511963, + "ce_orig": 0.7532318234443665, + "epoch": 0.6137033575382845, + "kl_loss": 0.24825260043144226, + "loss_ib": 0.006448451895266771, + "step": 2134 + }, + { + "ce_ib": 4.725895404815674, + "ce_orig": 0.7118043899536133, + "epoch": 0.6137033575382845, + "kl_loss": 0.30505359172821045, + "loss_ib": 0.0077764312736690044, + "step": 2134 + }, + { + "ce_ib": 3.4691596031188965, + "ce_orig": 0.7582639455795288, + "epoch": 0.6137033575382845, + "kl_loss": 0.2597672939300537, + "loss_ib": 0.006066832225769758, + "step": 2134 + }, + { + "epoch": 0.6139909411172622, + "grad_norm": 0.11501387506723404, + "learning_rate": 9.22711994600059e-06, + "loss": 0.8888, + "step": 2135 + }, + { + "ce_ib": 4.7227253913879395, + "ce_orig": 0.7678329944610596, + "epoch": 0.6139909411172622, + "kl_loss": 0.25123685598373413, + "loss_ib": 0.0072350939735770226, + "step": 2135 + }, + { + "ce_ib": 3.5295708179473877, + "ce_orig": 0.9026975631713867, + "epoch": 0.6139909411172622, + "kl_loss": 0.18043279647827148, + "loss_ib": 0.005333899054676294, + "step": 2135 + }, + { + "ce_ib": 5.927875518798828, + "ce_orig": 1.2064181566238403, + "epoch": 0.6139909411172622, + "kl_loss": 0.15645165741443634, + "loss_ib": 0.007492391858249903, + "step": 2135 + }, + { + "ce_ib": 6.0282111167907715, + "ce_orig": 0.9740961790084839, + "epoch": 0.6139909411172622, + "kl_loss": 0.2290135622024536, + "loss_ib": 0.008318346925079823, + "step": 2135 + }, + { + "ce_ib": 6.317895889282227, + "ce_orig": 1.2893723249435425, + "epoch": 0.6142785246962399, + "kl_loss": 0.27189162373542786, + "loss_ib": 0.00903681293129921, + "step": 2136 + }, + { + "ce_ib": 4.896044731140137, + "ce_orig": 0.7516950964927673, + "epoch": 0.6142785246962399, + "kl_loss": 0.3434276580810547, + "loss_ib": 0.008330320939421654, + "step": 2136 + }, + { + "ce_ib": 6.441210746765137, + "ce_orig": 1.156441569328308, + "epoch": 0.6142785246962399, + "kl_loss": 0.24771161377429962, + "loss_ib": 0.008918327279388905, + "step": 2136 + }, + { + "ce_ib": 2.481863260269165, + "ce_orig": 0.58522629737854, + "epoch": 0.6142785246962399, + "kl_loss": 0.17207714915275574, + "loss_ib": 0.004202634561806917, + "step": 2136 + }, + { + "ce_ib": 3.511462688446045, + "ce_orig": 0.6315295696258545, + "epoch": 0.6145661082752175, + "kl_loss": 0.14798977971076965, + "loss_ib": 0.0049913604743778706, + "step": 2137 + }, + { + "ce_ib": 3.2794857025146484, + "ce_orig": 0.5536940693855286, + "epoch": 0.6145661082752175, + "kl_loss": 0.16751450300216675, + "loss_ib": 0.004954630509018898, + "step": 2137 + }, + { + "ce_ib": 1.656890869140625, + "ce_orig": 0.29773762822151184, + "epoch": 0.6145661082752175, + "kl_loss": 0.4993658661842346, + "loss_ib": 0.006650549825280905, + "step": 2137 + }, + { + "ce_ib": 7.803337574005127, + "ce_orig": 1.0832288265228271, + "epoch": 0.6145661082752175, + "kl_loss": 0.234723299741745, + "loss_ib": 0.010150570422410965, + "step": 2137 + }, + { + "ce_ib": 5.012164115905762, + "ce_orig": 0.7559343576431274, + "epoch": 0.6148536918541951, + "kl_loss": 0.24938853085041046, + "loss_ib": 0.007506049238145351, + "step": 2138 + }, + { + "ce_ib": 6.5192670822143555, + "ce_orig": 1.2368531227111816, + "epoch": 0.6148536918541951, + "kl_loss": 0.17594745755195618, + "loss_ib": 0.008278741501271725, + "step": 2138 + }, + { + "ce_ib": 3.9774487018585205, + "ce_orig": 0.6455888748168945, + "epoch": 0.6148536918541951, + "kl_loss": 0.25385504961013794, + "loss_ib": 0.006515998858958483, + "step": 2138 + }, + { + "ce_ib": 5.573937892913818, + "ce_orig": 1.3305895328521729, + "epoch": 0.6148536918541951, + "kl_loss": 0.19003352522850037, + "loss_ib": 0.007474273443222046, + "step": 2138 + }, + { + "ce_ib": 5.050466060638428, + "ce_orig": 0.7372082471847534, + "epoch": 0.6151412754331728, + "kl_loss": 0.14413952827453613, + "loss_ib": 0.006491861306130886, + "step": 2139 + }, + { + "ce_ib": 4.076945781707764, + "ce_orig": 0.8760890364646912, + "epoch": 0.6151412754331728, + "kl_loss": 0.15848958492279053, + "loss_ib": 0.005661841481924057, + "step": 2139 + }, + { + "ce_ib": 8.118515968322754, + "ce_orig": 1.3670653104782104, + "epoch": 0.6151412754331728, + "kl_loss": 0.15316376090049744, + "loss_ib": 0.009650154039263725, + "step": 2139 + }, + { + "ce_ib": 7.861691951751709, + "ce_orig": 1.918341875076294, + "epoch": 0.6151412754331728, + "kl_loss": 0.23022012412548065, + "loss_ib": 0.010163893923163414, + "step": 2139 + }, + { + "epoch": 0.6154288590121504, + "grad_norm": 0.1331578940153122, + "learning_rate": 9.222969817789829e-06, + "loss": 0.9071, + "step": 2140 + }, + { + "ce_ib": 4.143268585205078, + "ce_orig": 0.7518031001091003, + "epoch": 0.6154288590121504, + "kl_loss": 0.24699410796165466, + "loss_ib": 0.006613209843635559, + "step": 2140 + }, + { + "ce_ib": 4.835630416870117, + "ce_orig": 0.8265523910522461, + "epoch": 0.6154288590121504, + "kl_loss": 0.34374815225601196, + "loss_ib": 0.008273111656308174, + "step": 2140 + }, + { + "ce_ib": 2.7290453910827637, + "ce_orig": 0.28519779443740845, + "epoch": 0.6154288590121504, + "kl_loss": 0.20341753959655762, + "loss_ib": 0.004763220902532339, + "step": 2140 + }, + { + "ce_ib": 5.028217792510986, + "ce_orig": 0.7576372623443604, + "epoch": 0.6154288590121504, + "kl_loss": 0.18777036666870117, + "loss_ib": 0.00690592173486948, + "step": 2140 + }, + { + "ce_ib": 6.009255886077881, + "ce_orig": 1.1055283546447754, + "epoch": 0.615716442591128, + "kl_loss": 0.18182960152626038, + "loss_ib": 0.007827552035450935, + "step": 2141 + }, + { + "ce_ib": 4.761140823364258, + "ce_orig": 1.0481175184249878, + "epoch": 0.615716442591128, + "kl_loss": 0.16665112972259521, + "loss_ib": 0.0064276522025465965, + "step": 2141 + }, + { + "ce_ib": 4.251883506774902, + "ce_orig": 0.8358097076416016, + "epoch": 0.615716442591128, + "kl_loss": 0.1544901430606842, + "loss_ib": 0.0057967850007116795, + "step": 2141 + }, + { + "ce_ib": 5.710509777069092, + "ce_orig": 1.117326021194458, + "epoch": 0.615716442591128, + "kl_loss": 0.14629152417182922, + "loss_ib": 0.007173424586653709, + "step": 2141 + }, + { + "ce_ib": 6.411238670349121, + "ce_orig": 1.11051607131958, + "epoch": 0.6160040261701056, + "kl_loss": 0.23813730478286743, + "loss_ib": 0.00879261177033186, + "step": 2142 + }, + { + "ce_ib": 3.170525312423706, + "ce_orig": 0.750603437423706, + "epoch": 0.6160040261701056, + "kl_loss": 0.27874404191970825, + "loss_ib": 0.005957965739071369, + "step": 2142 + }, + { + "ce_ib": 7.098043441772461, + "ce_orig": 1.2906512022018433, + "epoch": 0.6160040261701056, + "kl_loss": 0.21711736917495728, + "loss_ib": 0.009269217029213905, + "step": 2142 + }, + { + "ce_ib": 3.494011163711548, + "ce_orig": 0.7247247695922852, + "epoch": 0.6160040261701056, + "kl_loss": 0.13338002562522888, + "loss_ib": 0.004827811382710934, + "step": 2142 + }, + { + "ce_ib": 3.2649214267730713, + "ce_orig": 0.40932533144950867, + "epoch": 0.6162916097490834, + "kl_loss": 0.2904610335826874, + "loss_ib": 0.006169531960040331, + "step": 2143 + }, + { + "ce_ib": 4.905594825744629, + "ce_orig": 0.7640795111656189, + "epoch": 0.6162916097490834, + "kl_loss": 0.1738964319229126, + "loss_ib": 0.006644558627158403, + "step": 2143 + }, + { + "ce_ib": 6.556239128112793, + "ce_orig": 0.8183667063713074, + "epoch": 0.6162916097490834, + "kl_loss": 0.20312285423278809, + "loss_ib": 0.008587467484176159, + "step": 2143 + }, + { + "ce_ib": 4.390976905822754, + "ce_orig": 0.5400758981704712, + "epoch": 0.6162916097490834, + "kl_loss": 0.2612292766571045, + "loss_ib": 0.007003269623965025, + "step": 2143 + }, + { + "ce_ib": 6.297164440155029, + "ce_orig": 1.328621745109558, + "epoch": 0.616579193328061, + "kl_loss": 0.2626768946647644, + "loss_ib": 0.008923932909965515, + "step": 2144 + }, + { + "ce_ib": 9.727057456970215, + "ce_orig": 0.9763944149017334, + "epoch": 0.616579193328061, + "kl_loss": 0.19568291306495667, + "loss_ib": 0.011683886870741844, + "step": 2144 + }, + { + "ce_ib": 4.665521621704102, + "ce_orig": 0.659018874168396, + "epoch": 0.616579193328061, + "kl_loss": 0.23189496994018555, + "loss_ib": 0.0069844708777964115, + "step": 2144 + }, + { + "ce_ib": 3.6958107948303223, + "ce_orig": 0.6659523844718933, + "epoch": 0.616579193328061, + "kl_loss": 0.18812400102615356, + "loss_ib": 0.005577050149440765, + "step": 2144 + }, + { + "epoch": 0.6168667769070386, + "grad_norm": 0.11785251647233963, + "learning_rate": 9.218809515464606e-06, + "loss": 0.789, + "step": 2145 + }, + { + "ce_ib": 5.566320896148682, + "ce_orig": 1.4110373258590698, + "epoch": 0.6168667769070386, + "kl_loss": 0.19093210995197296, + "loss_ib": 0.007475642021745443, + "step": 2145 + }, + { + "ce_ib": 7.2003350257873535, + "ce_orig": 1.5182240009307861, + "epoch": 0.6168667769070386, + "kl_loss": 0.20426465570926666, + "loss_ib": 0.009242981672286987, + "step": 2145 + }, + { + "ce_ib": 3.406104326248169, + "ce_orig": 0.48498019576072693, + "epoch": 0.6168667769070386, + "kl_loss": 0.1514992117881775, + "loss_ib": 0.0049210963770747185, + "step": 2145 + }, + { + "ce_ib": 7.462745189666748, + "ce_orig": 0.9224519729614258, + "epoch": 0.6168667769070386, + "kl_loss": 0.2533667981624603, + "loss_ib": 0.009996412321925163, + "step": 2145 + }, + { + "ce_ib": 6.181352138519287, + "ce_orig": 1.111406683921814, + "epoch": 0.6171543604860162, + "kl_loss": 0.27217304706573486, + "loss_ib": 0.008903082460165024, + "step": 2146 + }, + { + "ce_ib": 4.97157096862793, + "ce_orig": 1.1485421657562256, + "epoch": 0.6171543604860162, + "kl_loss": 0.20860697329044342, + "loss_ib": 0.007057640235871077, + "step": 2146 + }, + { + "ce_ib": 7.318411827087402, + "ce_orig": 1.3204861879348755, + "epoch": 0.6171543604860162, + "kl_loss": 0.2406044751405716, + "loss_ib": 0.009724456816911697, + "step": 2146 + }, + { + "ce_ib": 7.596539497375488, + "ce_orig": 1.3422795534133911, + "epoch": 0.6171543604860162, + "kl_loss": 0.23449048399925232, + "loss_ib": 0.009941443800926208, + "step": 2146 + }, + { + "ce_ib": 2.6501286029815674, + "ce_orig": 0.4186507761478424, + "epoch": 0.6174419440649939, + "kl_loss": 0.23372873663902283, + "loss_ib": 0.004987415857613087, + "step": 2147 + }, + { + "ce_ib": 7.631412982940674, + "ce_orig": 1.495327115058899, + "epoch": 0.6174419440649939, + "kl_loss": 0.1872740387916565, + "loss_ib": 0.00950415339320898, + "step": 2147 + }, + { + "ce_ib": 2.903261423110962, + "ce_orig": 0.4891170263290405, + "epoch": 0.6174419440649939, + "kl_loss": 0.20846757292747498, + "loss_ib": 0.004987936932593584, + "step": 2147 + }, + { + "ce_ib": 3.8513810634613037, + "ce_orig": 0.5343347787857056, + "epoch": 0.6174419440649939, + "kl_loss": 0.18040011823177338, + "loss_ib": 0.005655382294207811, + "step": 2147 + }, + { + "ce_ib": 4.649677753448486, + "ce_orig": 1.0790480375289917, + "epoch": 0.6177295276439715, + "kl_loss": 0.2076108157634735, + "loss_ib": 0.006725785788148642, + "step": 2148 + }, + { + "ce_ib": 4.655816078186035, + "ce_orig": 0.974000871181488, + "epoch": 0.6177295276439715, + "kl_loss": 0.2940071225166321, + "loss_ib": 0.007595886941999197, + "step": 2148 + }, + { + "ce_ib": 6.636551856994629, + "ce_orig": 1.0597760677337646, + "epoch": 0.6177295276439715, + "kl_loss": 0.44149142503738403, + "loss_ib": 0.011051465757191181, + "step": 2148 + }, + { + "ce_ib": 5.08578634262085, + "ce_orig": 0.9035578370094299, + "epoch": 0.6177295276439715, + "kl_loss": 0.20379114151000977, + "loss_ib": 0.007123698014765978, + "step": 2148 + }, + { + "ce_ib": 4.784078121185303, + "ce_orig": 1.1317611932754517, + "epoch": 0.6180171112229492, + "kl_loss": 0.20388054847717285, + "loss_ib": 0.0068228840827941895, + "step": 2149 + }, + { + "ce_ib": 4.328738689422607, + "ce_orig": 0.8812701106071472, + "epoch": 0.6180171112229492, + "kl_loss": 0.392722487449646, + "loss_ib": 0.00825596321374178, + "step": 2149 + }, + { + "ce_ib": 5.563507080078125, + "ce_orig": 0.8992621898651123, + "epoch": 0.6180171112229492, + "kl_loss": 0.19330154359340668, + "loss_ib": 0.007496522273868322, + "step": 2149 + }, + { + "ce_ib": 4.975098609924316, + "ce_orig": 0.8951318860054016, + "epoch": 0.6180171112229492, + "kl_loss": 0.21375977993011475, + "loss_ib": 0.007112695835530758, + "step": 2149 + }, + { + "epoch": 0.6183046948019268, + "grad_norm": 0.1262214630842209, + "learning_rate": 9.21463904904805e-06, + "loss": 0.8606, + "step": 2150 + }, + { + "ce_ib": 9.187541961669922, + "ce_orig": 1.5719724893569946, + "epoch": 0.6183046948019268, + "kl_loss": 0.3338630497455597, + "loss_ib": 0.012526172213256359, + "step": 2150 + }, + { + "ce_ib": 8.154082298278809, + "ce_orig": 1.3432285785675049, + "epoch": 0.6183046948019268, + "kl_loss": 0.2084856629371643, + "loss_ib": 0.01023893803358078, + "step": 2150 + }, + { + "ce_ib": 7.151741027832031, + "ce_orig": 0.7475894689559937, + "epoch": 0.6183046948019268, + "kl_loss": 0.37563467025756836, + "loss_ib": 0.010908088646829128, + "step": 2150 + }, + { + "ce_ib": 3.1457815170288086, + "ce_orig": 0.7959761023521423, + "epoch": 0.6183046948019268, + "kl_loss": 0.1517266184091568, + "loss_ib": 0.004663047846406698, + "step": 2150 + }, + { + "ce_ib": 5.097203731536865, + "ce_orig": 0.718260645866394, + "epoch": 0.6185922783809045, + "kl_loss": 0.2522243857383728, + "loss_ib": 0.007619447540491819, + "step": 2151 + }, + { + "ce_ib": 3.034619092941284, + "ce_orig": 0.6448426246643066, + "epoch": 0.6185922783809045, + "kl_loss": 0.15701469779014587, + "loss_ib": 0.004604766145348549, + "step": 2151 + }, + { + "ce_ib": 5.944415092468262, + "ce_orig": 1.3137152194976807, + "epoch": 0.6185922783809045, + "kl_loss": 0.2946608066558838, + "loss_ib": 0.008891022764146328, + "step": 2151 + }, + { + "ce_ib": 3.093179225921631, + "ce_orig": 0.5718558430671692, + "epoch": 0.6185922783809045, + "kl_loss": 0.2779678702354431, + "loss_ib": 0.005872857291251421, + "step": 2151 + }, + { + "ce_ib": 3.620548725128174, + "ce_orig": 0.9731256365776062, + "epoch": 0.6188798619598821, + "kl_loss": 0.10619598627090454, + "loss_ib": 0.004682508762925863, + "step": 2152 + }, + { + "ce_ib": 6.294347763061523, + "ce_orig": 1.308428406715393, + "epoch": 0.6188798619598821, + "kl_loss": 0.3141248822212219, + "loss_ib": 0.009435595944523811, + "step": 2152 + }, + { + "ce_ib": 5.9925079345703125, + "ce_orig": 0.7067108154296875, + "epoch": 0.6188798619598821, + "kl_loss": 0.320584774017334, + "loss_ib": 0.009198355488479137, + "step": 2152 + }, + { + "ce_ib": 3.6602141857147217, + "ce_orig": 0.7023137807846069, + "epoch": 0.6188798619598821, + "kl_loss": 0.2537105083465576, + "loss_ib": 0.006197318900376558, + "step": 2152 + }, + { + "ce_ib": 4.874889850616455, + "ce_orig": 0.44656699895858765, + "epoch": 0.6191674455388597, + "kl_loss": 0.22528545558452606, + "loss_ib": 0.007127744611352682, + "step": 2153 + }, + { + "ce_ib": 3.2139854431152344, + "ce_orig": 0.6972787380218506, + "epoch": 0.6191674455388597, + "kl_loss": 0.17160893976688385, + "loss_ib": 0.004930074792355299, + "step": 2153 + }, + { + "ce_ib": 7.200535774230957, + "ce_orig": 1.5800094604492188, + "epoch": 0.6191674455388597, + "kl_loss": 0.18877124786376953, + "loss_ib": 0.009088248014450073, + "step": 2153 + }, + { + "ce_ib": 3.255420446395874, + "ce_orig": 0.7539072036743164, + "epoch": 0.6191674455388597, + "kl_loss": 0.14085526764392853, + "loss_ib": 0.004663973115384579, + "step": 2153 + }, + { + "ce_ib": 2.290630578994751, + "ce_orig": 0.4853062331676483, + "epoch": 0.6194550291178373, + "kl_loss": 0.1813400536775589, + "loss_ib": 0.004104031249880791, + "step": 2154 + }, + { + "ce_ib": 6.902670860290527, + "ce_orig": 1.1744805574417114, + "epoch": 0.6194550291178373, + "kl_loss": 0.23921817541122437, + "loss_ib": 0.009294852614402771, + "step": 2154 + }, + { + "ce_ib": 5.465926647186279, + "ce_orig": 0.9923774003982544, + "epoch": 0.6194550291178373, + "kl_loss": 0.17027647793293, + "loss_ib": 0.007168691139668226, + "step": 2154 + }, + { + "ce_ib": 4.102112293243408, + "ce_orig": 0.7846276164054871, + "epoch": 0.6194550291178373, + "kl_loss": 0.1937449723482132, + "loss_ib": 0.006039562169462442, + "step": 2154 + }, + { + "epoch": 0.619742612696815, + "grad_norm": 0.142181396484375, + "learning_rate": 9.210458428587791e-06, + "loss": 0.8225, + "step": 2155 + }, + { + "ce_ib": 4.58763313293457, + "ce_orig": 0.8732112646102905, + "epoch": 0.619742612696815, + "kl_loss": 0.7496742010116577, + "loss_ib": 0.012084375135600567, + "step": 2155 + }, + { + "ce_ib": 5.695552349090576, + "ce_orig": 0.8618770241737366, + "epoch": 0.619742612696815, + "kl_loss": 0.24963220953941345, + "loss_ib": 0.008191874250769615, + "step": 2155 + }, + { + "ce_ib": 4.53383731842041, + "ce_orig": 0.9831038117408752, + "epoch": 0.619742612696815, + "kl_loss": 0.2832198143005371, + "loss_ib": 0.007366035599261522, + "step": 2155 + }, + { + "ce_ib": 2.783428430557251, + "ce_orig": 0.6188550591468811, + "epoch": 0.619742612696815, + "kl_loss": 0.20475472509860992, + "loss_ib": 0.00483097555115819, + "step": 2155 + }, + { + "ce_ib": 5.902095317840576, + "ce_orig": 1.290173053741455, + "epoch": 0.6200301962757927, + "kl_loss": 0.20311693847179413, + "loss_ib": 0.007933264598250389, + "step": 2156 + }, + { + "ce_ib": 2.556387424468994, + "ce_orig": 0.5328282117843628, + "epoch": 0.6200301962757927, + "kl_loss": 0.12790322303771973, + "loss_ib": 0.0038354196585714817, + "step": 2156 + }, + { + "ce_ib": 4.599797248840332, + "ce_orig": 0.7863470315933228, + "epoch": 0.6200301962757927, + "kl_loss": 0.39598774909973145, + "loss_ib": 0.008559674955904484, + "step": 2156 + }, + { + "ce_ib": 0.7379006147384644, + "ce_orig": 0.10237868875265121, + "epoch": 0.6200301962757927, + "kl_loss": 0.4458302855491638, + "loss_ib": 0.005196203477680683, + "step": 2156 + }, + { + "ce_ib": 3.04360032081604, + "ce_orig": 0.6047723293304443, + "epoch": 0.6203177798547703, + "kl_loss": 0.15585938096046448, + "loss_ib": 0.004602193832397461, + "step": 2157 + }, + { + "ce_ib": 6.0303473472595215, + "ce_orig": 1.1076823472976685, + "epoch": 0.6203177798547703, + "kl_loss": 0.1526183933019638, + "loss_ib": 0.007556531112641096, + "step": 2157 + }, + { + "ce_ib": 8.617756843566895, + "ce_orig": 1.2954045534133911, + "epoch": 0.6203177798547703, + "kl_loss": 0.2226177304983139, + "loss_ib": 0.010843933559954166, + "step": 2157 + }, + { + "ce_ib": 3.5769262313842773, + "ce_orig": 0.9680632948875427, + "epoch": 0.6203177798547703, + "kl_loss": 0.1847831904888153, + "loss_ib": 0.005424757953733206, + "step": 2157 + }, + { + "ce_ib": 2.7547073364257812, + "ce_orig": 0.5651143789291382, + "epoch": 0.620605363433748, + "kl_loss": 0.20474357903003693, + "loss_ib": 0.004802142735570669, + "step": 2158 + }, + { + "ce_ib": 5.82823371887207, + "ce_orig": 1.1670838594436646, + "epoch": 0.620605363433748, + "kl_loss": 0.26366227865219116, + "loss_ib": 0.008464857004582882, + "step": 2158 + }, + { + "ce_ib": 4.538852214813232, + "ce_orig": 0.7840450406074524, + "epoch": 0.620605363433748, + "kl_loss": 0.1295931041240692, + "loss_ib": 0.005834782961755991, + "step": 2158 + }, + { + "ce_ib": 4.505538463592529, + "ce_orig": 0.8547424077987671, + "epoch": 0.620605363433748, + "kl_loss": 0.24852260947227478, + "loss_ib": 0.006990764755755663, + "step": 2158 + }, + { + "ce_ib": 4.043621063232422, + "ce_orig": 0.552291214466095, + "epoch": 0.6208929470127256, + "kl_loss": 0.2459917962551117, + "loss_ib": 0.0065035391598939896, + "step": 2159 + }, + { + "ce_ib": 4.176906108856201, + "ce_orig": 0.6100576519966125, + "epoch": 0.6208929470127256, + "kl_loss": 0.23731429874897003, + "loss_ib": 0.006550048943608999, + "step": 2159 + }, + { + "ce_ib": 5.633539199829102, + "ce_orig": 0.7912328243255615, + "epoch": 0.6208929470127256, + "kl_loss": 0.23528137803077698, + "loss_ib": 0.00798635371029377, + "step": 2159 + }, + { + "ce_ib": 3.9707818031311035, + "ce_orig": 0.7814981937408447, + "epoch": 0.6208929470127256, + "kl_loss": 0.14381256699562073, + "loss_ib": 0.005408907309174538, + "step": 2159 + }, + { + "epoch": 0.6211805305917032, + "grad_norm": 0.12526297569274902, + "learning_rate": 9.206267664155906e-06, + "loss": 0.8657, + "step": 2160 + }, + { + "ce_ib": 3.3502628803253174, + "ce_orig": 0.6756080389022827, + "epoch": 0.6211805305917032, + "kl_loss": 0.2067308872938156, + "loss_ib": 0.0054175714030861855, + "step": 2160 + }, + { + "ce_ib": 6.1746697425842285, + "ce_orig": 1.1626530885696411, + "epoch": 0.6211805305917032, + "kl_loss": 0.19956183433532715, + "loss_ib": 0.008170288056135178, + "step": 2160 + }, + { + "ce_ib": 4.0786895751953125, + "ce_orig": 0.7112184762954712, + "epoch": 0.6211805305917032, + "kl_loss": 0.1844608187675476, + "loss_ib": 0.005923297256231308, + "step": 2160 + }, + { + "ce_ib": 2.7946383953094482, + "ce_orig": 0.5912685990333557, + "epoch": 0.6211805305917032, + "kl_loss": 0.14733105897903442, + "loss_ib": 0.004267949145287275, + "step": 2160 + }, + { + "ce_ib": 4.302814483642578, + "ce_orig": 0.7075379490852356, + "epoch": 0.6214681141706808, + "kl_loss": 0.30187147855758667, + "loss_ib": 0.0073215290904045105, + "step": 2161 + }, + { + "ce_ib": 6.6178107261657715, + "ce_orig": 1.3355305194854736, + "epoch": 0.6214681141706808, + "kl_loss": 0.15836915373802185, + "loss_ib": 0.00820150226354599, + "step": 2161 + }, + { + "ce_ib": 4.982397556304932, + "ce_orig": 0.6107086539268494, + "epoch": 0.6214681141706808, + "kl_loss": 0.1849871277809143, + "loss_ib": 0.006832268554717302, + "step": 2161 + }, + { + "ce_ib": 8.51357364654541, + "ce_orig": 1.5165003538131714, + "epoch": 0.6214681141706808, + "kl_loss": 0.17437207698822021, + "loss_ib": 0.010257294401526451, + "step": 2161 + }, + { + "ce_ib": 5.731928825378418, + "ce_orig": 1.2617409229278564, + "epoch": 0.6217556977496584, + "kl_loss": 0.2256811112165451, + "loss_ib": 0.007988739758729935, + "step": 2162 + }, + { + "ce_ib": 4.097559928894043, + "ce_orig": 0.8222991824150085, + "epoch": 0.6217556977496584, + "kl_loss": 0.1560210883617401, + "loss_ib": 0.005657770670950413, + "step": 2162 + }, + { + "ce_ib": 3.418379783630371, + "ce_orig": 0.6789857745170593, + "epoch": 0.6217556977496584, + "kl_loss": 0.6642388701438904, + "loss_ib": 0.010060767643153667, + "step": 2162 + }, + { + "ce_ib": 5.275548934936523, + "ce_orig": 1.0893356800079346, + "epoch": 0.6217556977496584, + "kl_loss": 0.2654927968978882, + "loss_ib": 0.007930477149784565, + "step": 2162 + }, + { + "ce_ib": 3.179114580154419, + "ce_orig": 0.7453732490539551, + "epoch": 0.6220432813286362, + "kl_loss": 0.14592987298965454, + "loss_ib": 0.004638413432985544, + "step": 2163 + }, + { + "ce_ib": 3.1268296241760254, + "ce_orig": 0.39869827032089233, + "epoch": 0.6220432813286362, + "kl_loss": 0.300598680973053, + "loss_ib": 0.006132815964519978, + "step": 2163 + }, + { + "ce_ib": 4.536884307861328, + "ce_orig": 0.46046456694602966, + "epoch": 0.6220432813286362, + "kl_loss": 0.32225167751312256, + "loss_ib": 0.007759401109069586, + "step": 2163 + }, + { + "ce_ib": 6.982893943786621, + "ce_orig": 1.188586711883545, + "epoch": 0.6220432813286362, + "kl_loss": 0.19934502243995667, + "loss_ib": 0.008976344019174576, + "step": 2163 + }, + { + "ce_ib": 5.956131935119629, + "ce_orig": 1.2192281484603882, + "epoch": 0.6223308649076138, + "kl_loss": 0.2335318624973297, + "loss_ib": 0.008291450329124928, + "step": 2164 + }, + { + "ce_ib": 6.613880157470703, + "ce_orig": 0.7648369669914246, + "epoch": 0.6223308649076138, + "kl_loss": 0.4214598536491394, + "loss_ib": 0.010828478261828423, + "step": 2164 + }, + { + "ce_ib": 2.8426427841186523, + "ce_orig": 0.4054485559463501, + "epoch": 0.6223308649076138, + "kl_loss": 0.239111065864563, + "loss_ib": 0.005233753938227892, + "step": 2164 + }, + { + "ce_ib": 3.5347137451171875, + "ce_orig": 0.5744460821151733, + "epoch": 0.6223308649076138, + "kl_loss": 0.3418549597263336, + "loss_ib": 0.0069532631896436214, + "step": 2164 + }, + { + "epoch": 0.6226184484865914, + "grad_norm": 0.1269790679216385, + "learning_rate": 9.202066765848925e-06, + "loss": 0.9091, + "step": 2165 + }, + { + "ce_ib": 5.748733043670654, + "ce_orig": 0.7350433468818665, + "epoch": 0.6226184484865914, + "kl_loss": 0.18006457388401031, + "loss_ib": 0.007549378555268049, + "step": 2165 + }, + { + "ce_ib": 3.9702625274658203, + "ce_orig": 0.8265632390975952, + "epoch": 0.6226184484865914, + "kl_loss": 0.1726166307926178, + "loss_ib": 0.005696428939700127, + "step": 2165 + }, + { + "ce_ib": 4.465852737426758, + "ce_orig": 0.675706148147583, + "epoch": 0.6226184484865914, + "kl_loss": 0.2692142724990845, + "loss_ib": 0.007157995365560055, + "step": 2165 + }, + { + "ce_ib": 5.383127212524414, + "ce_orig": 0.9024345874786377, + "epoch": 0.6226184484865914, + "kl_loss": 0.17205694317817688, + "loss_ib": 0.007103696931153536, + "step": 2165 + }, + { + "ce_ib": 4.902032375335693, + "ce_orig": 0.9634964466094971, + "epoch": 0.622906032065569, + "kl_loss": 0.18807223439216614, + "loss_ib": 0.006782754324376583, + "step": 2166 + }, + { + "ce_ib": 3.6285758018493652, + "ce_orig": 0.43647268414497375, + "epoch": 0.622906032065569, + "kl_loss": 0.2331068217754364, + "loss_ib": 0.005959643982350826, + "step": 2166 + }, + { + "ce_ib": 3.601552724838257, + "ce_orig": 0.4294748306274414, + "epoch": 0.622906032065569, + "kl_loss": 0.5800065994262695, + "loss_ib": 0.009401618503034115, + "step": 2166 + }, + { + "ce_ib": 6.203525543212891, + "ce_orig": 1.0982586145401, + "epoch": 0.622906032065569, + "kl_loss": 0.2581806778907776, + "loss_ib": 0.008785332553088665, + "step": 2166 + }, + { + "ce_ib": 6.7412824630737305, + "ce_orig": 1.324432134628296, + "epoch": 0.6231936156445467, + "kl_loss": 0.17983590066432953, + "loss_ib": 0.00853964127600193, + "step": 2167 + }, + { + "ce_ib": 4.752391815185547, + "ce_orig": 0.652934193611145, + "epoch": 0.6231936156445467, + "kl_loss": 0.16791144013404846, + "loss_ib": 0.006431506015360355, + "step": 2167 + }, + { + "ce_ib": 4.0424981117248535, + "ce_orig": 0.7708609104156494, + "epoch": 0.6231936156445467, + "kl_loss": 0.2151108980178833, + "loss_ib": 0.006193606648594141, + "step": 2167 + }, + { + "ce_ib": 3.994393825531006, + "ce_orig": 0.7483550310134888, + "epoch": 0.6231936156445467, + "kl_loss": 0.23240643739700317, + "loss_ib": 0.00631845835596323, + "step": 2167 + }, + { + "ce_ib": 4.431944370269775, + "ce_orig": 0.8531684279441833, + "epoch": 0.6234811992235243, + "kl_loss": 0.2674523890018463, + "loss_ib": 0.0071064685471355915, + "step": 2168 + }, + { + "ce_ib": 3.8455970287323, + "ce_orig": 0.6555244326591492, + "epoch": 0.6234811992235243, + "kl_loss": 0.1013220027089119, + "loss_ib": 0.004858816973865032, + "step": 2168 + }, + { + "ce_ib": 5.234328269958496, + "ce_orig": 0.6332589983940125, + "epoch": 0.6234811992235243, + "kl_loss": 0.20951086282730103, + "loss_ib": 0.0073294369503855705, + "step": 2168 + }, + { + "ce_ib": 2.612290382385254, + "ce_orig": 0.33276164531707764, + "epoch": 0.6234811992235243, + "kl_loss": 0.2590327858924866, + "loss_ib": 0.005202617961913347, + "step": 2168 + }, + { + "ce_ib": 0.6998932957649231, + "ce_orig": 0.09262393414974213, + "epoch": 0.623768782802502, + "kl_loss": 0.44392192363739014, + "loss_ib": 0.00513911247253418, + "step": 2169 + }, + { + "ce_ib": 4.59726095199585, + "ce_orig": 0.5444502234458923, + "epoch": 0.623768782802502, + "kl_loss": 0.1615527868270874, + "loss_ib": 0.006212788634002209, + "step": 2169 + }, + { + "ce_ib": 3.793414831161499, + "ce_orig": 0.6881318092346191, + "epoch": 0.623768782802502, + "kl_loss": 0.2007608562707901, + "loss_ib": 0.005801022984087467, + "step": 2169 + }, + { + "ce_ib": 6.452333927154541, + "ce_orig": 1.242098331451416, + "epoch": 0.623768782802502, + "kl_loss": 0.13996249437332153, + "loss_ib": 0.007851958274841309, + "step": 2169 + }, + { + "epoch": 0.6240563663814797, + "grad_norm": 0.11348122358322144, + "learning_rate": 9.197855743787782e-06, + "loss": 0.8092, + "step": 2170 + }, + { + "ce_ib": 3.959604501724243, + "ce_orig": 0.5853133797645569, + "epoch": 0.6240563663814797, + "kl_loss": 0.17382411658763885, + "loss_ib": 0.005697845481336117, + "step": 2170 + }, + { + "ce_ib": 2.8761467933654785, + "ce_orig": 0.6434140801429749, + "epoch": 0.6240563663814797, + "kl_loss": 0.14189153909683228, + "loss_ib": 0.004295061808079481, + "step": 2170 + }, + { + "ce_ib": 5.131590843200684, + "ce_orig": 0.8321529626846313, + "epoch": 0.6240563663814797, + "kl_loss": 0.2734423875808716, + "loss_ib": 0.00786601472645998, + "step": 2170 + }, + { + "ce_ib": 3.914663791656494, + "ce_orig": 0.641477644443512, + "epoch": 0.6240563663814797, + "kl_loss": 0.22433051466941833, + "loss_ib": 0.006157969124615192, + "step": 2170 + }, + { + "ce_ib": 4.786894798278809, + "ce_orig": 1.1949337720870972, + "epoch": 0.6243439499604573, + "kl_loss": 0.2033330500125885, + "loss_ib": 0.00682022562250495, + "step": 2171 + }, + { + "ce_ib": 4.530778408050537, + "ce_orig": 0.8341248035430908, + "epoch": 0.6243439499604573, + "kl_loss": 0.21387013792991638, + "loss_ib": 0.006669479422271252, + "step": 2171 + }, + { + "ce_ib": 5.406308174133301, + "ce_orig": 1.0737069845199585, + "epoch": 0.6243439499604573, + "kl_loss": 0.1405438780784607, + "loss_ib": 0.00681174686178565, + "step": 2171 + }, + { + "ce_ib": 4.8231892585754395, + "ce_orig": 0.856098473072052, + "epoch": 0.6243439499604573, + "kl_loss": 0.18844470381736755, + "loss_ib": 0.006707636173814535, + "step": 2171 + }, + { + "ce_ib": 5.599411964416504, + "ce_orig": 0.7836161255836487, + "epoch": 0.6246315335394349, + "kl_loss": 0.30393749475479126, + "loss_ib": 0.00863878708332777, + "step": 2172 + }, + { + "ce_ib": 5.092525482177734, + "ce_orig": 0.6020777821540833, + "epoch": 0.6246315335394349, + "kl_loss": 0.19910360872745514, + "loss_ib": 0.007083561271429062, + "step": 2172 + }, + { + "ce_ib": 4.1754584312438965, + "ce_orig": 0.8163929581642151, + "epoch": 0.6246315335394349, + "kl_loss": 0.19805875420570374, + "loss_ib": 0.006156046409159899, + "step": 2172 + }, + { + "ce_ib": 4.207817077636719, + "ce_orig": 0.5880407094955444, + "epoch": 0.6246315335394349, + "kl_loss": 0.1353999674320221, + "loss_ib": 0.00556181650608778, + "step": 2172 + }, + { + "ce_ib": 5.925167083740234, + "ce_orig": 0.7960474491119385, + "epoch": 0.6249191171184125, + "kl_loss": 0.31724268198013306, + "loss_ib": 0.00909759383648634, + "step": 2173 + }, + { + "ce_ib": 4.865185737609863, + "ce_orig": 0.8660761713981628, + "epoch": 0.6249191171184125, + "kl_loss": 0.17593121528625488, + "loss_ib": 0.006624498404562473, + "step": 2173 + }, + { + "ce_ib": 5.087806701660156, + "ce_orig": 1.0389798879623413, + "epoch": 0.6249191171184125, + "kl_loss": 0.19951105117797852, + "loss_ib": 0.007082916796207428, + "step": 2173 + }, + { + "ce_ib": 4.690605163574219, + "ce_orig": 1.0007323026657104, + "epoch": 0.6249191171184125, + "kl_loss": 0.15601330995559692, + "loss_ib": 0.006250737700611353, + "step": 2173 + }, + { + "ce_ib": 6.944530963897705, + "ce_orig": 1.121569275856018, + "epoch": 0.6252067006973901, + "kl_loss": 0.1326197385787964, + "loss_ib": 0.008270728401839733, + "step": 2174 + }, + { + "ce_ib": 4.315922260284424, + "ce_orig": 0.7736250758171082, + "epoch": 0.6252067006973901, + "kl_loss": 0.19191789627075195, + "loss_ib": 0.006235101260244846, + "step": 2174 + }, + { + "ce_ib": 4.545253753662109, + "ce_orig": 0.4803105592727661, + "epoch": 0.6252067006973901, + "kl_loss": 0.2609526515007019, + "loss_ib": 0.007154780440032482, + "step": 2174 + }, + { + "ce_ib": 7.877763748168945, + "ce_orig": 1.6456254720687866, + "epoch": 0.6252067006973901, + "kl_loss": 0.2793182134628296, + "loss_ib": 0.010670945048332214, + "step": 2174 + }, + { + "epoch": 0.6254942842763678, + "grad_norm": 0.15020400285720825, + "learning_rate": 9.19363460811781e-06, + "loss": 0.9053, + "step": 2175 + }, + { + "ce_ib": 4.148181915283203, + "ce_orig": 0.6111353635787964, + "epoch": 0.6254942842763678, + "kl_loss": 0.2148330807685852, + "loss_ib": 0.006296513136476278, + "step": 2175 + }, + { + "ce_ib": 4.982062339782715, + "ce_orig": 0.9503230452537537, + "epoch": 0.6254942842763678, + "kl_loss": 0.24265460669994354, + "loss_ib": 0.007408608216792345, + "step": 2175 + }, + { + "ce_ib": 3.18255615234375, + "ce_orig": 0.3673202693462372, + "epoch": 0.6254942842763678, + "kl_loss": 0.21864053606987, + "loss_ib": 0.005368961486965418, + "step": 2175 + }, + { + "ce_ib": 3.7449817657470703, + "ce_orig": 0.7098271250724792, + "epoch": 0.6254942842763678, + "kl_loss": 0.22189775109291077, + "loss_ib": 0.005963959731161594, + "step": 2175 + }, + { + "ce_ib": 3.4397494792938232, + "ce_orig": 0.8284677863121033, + "epoch": 0.6257818678553455, + "kl_loss": 0.18627086281776428, + "loss_ib": 0.005302457604557276, + "step": 2176 + }, + { + "ce_ib": 2.6903762817382812, + "ce_orig": 0.5287154316902161, + "epoch": 0.6257818678553455, + "kl_loss": 0.23180004954338074, + "loss_ib": 0.00500837666913867, + "step": 2176 + }, + { + "ce_ib": 6.7106451988220215, + "ce_orig": 1.0442821979522705, + "epoch": 0.6257818678553455, + "kl_loss": 0.21391576528549194, + "loss_ib": 0.008849803358316422, + "step": 2176 + }, + { + "ce_ib": 3.9068639278411865, + "ce_orig": 0.7838433980941772, + "epoch": 0.6257818678553455, + "kl_loss": 0.23056644201278687, + "loss_ib": 0.006212528329342604, + "step": 2176 + }, + { + "ce_ib": 4.055504322052002, + "ce_orig": 1.0523951053619385, + "epoch": 0.6260694514343231, + "kl_loss": 0.12532126903533936, + "loss_ib": 0.005308717489242554, + "step": 2177 + }, + { + "ce_ib": 7.474114894866943, + "ce_orig": 1.5449210405349731, + "epoch": 0.6260694514343231, + "kl_loss": 0.2875552773475647, + "loss_ib": 0.010349666699767113, + "step": 2177 + }, + { + "ce_ib": 3.737835168838501, + "ce_orig": 0.6483784914016724, + "epoch": 0.6260694514343231, + "kl_loss": 0.12081165611743927, + "loss_ib": 0.004945951513946056, + "step": 2177 + }, + { + "ce_ib": 4.239599704742432, + "ce_orig": 0.613529622554779, + "epoch": 0.6260694514343231, + "kl_loss": 0.19919347763061523, + "loss_ib": 0.006231534294784069, + "step": 2177 + }, + { + "ce_ib": 5.579134464263916, + "ce_orig": 0.8101744055747986, + "epoch": 0.6263570350133008, + "kl_loss": 0.2537233233451843, + "loss_ib": 0.00811636820435524, + "step": 2178 + }, + { + "ce_ib": 5.626760959625244, + "ce_orig": 1.028881311416626, + "epoch": 0.6263570350133008, + "kl_loss": 0.3112236261367798, + "loss_ib": 0.008738997392356396, + "step": 2178 + }, + { + "ce_ib": 4.482662200927734, + "ce_orig": 0.6208342909812927, + "epoch": 0.6263570350133008, + "kl_loss": 0.2449740320444107, + "loss_ib": 0.006932402495294809, + "step": 2178 + }, + { + "ce_ib": 3.0533101558685303, + "ce_orig": 0.46820297837257385, + "epoch": 0.6263570350133008, + "kl_loss": 0.20178434252738953, + "loss_ib": 0.005071153398603201, + "step": 2178 + }, + { + "ce_ib": 4.9975199699401855, + "ce_orig": 0.5782241225242615, + "epoch": 0.6266446185922784, + "kl_loss": 0.2018904685974121, + "loss_ib": 0.007016425020992756, + "step": 2179 + }, + { + "ce_ib": 5.100138187408447, + "ce_orig": 0.797370970249176, + "epoch": 0.6266446185922784, + "kl_loss": 0.20188194513320923, + "loss_ib": 0.007118957582861185, + "step": 2179 + }, + { + "ce_ib": 5.393182754516602, + "ce_orig": 0.5854423642158508, + "epoch": 0.6266446185922784, + "kl_loss": 0.19240854680538177, + "loss_ib": 0.007317268289625645, + "step": 2179 + }, + { + "ce_ib": 8.490264892578125, + "ce_orig": 1.3876678943634033, + "epoch": 0.6266446185922784, + "kl_loss": 0.23732757568359375, + "loss_ib": 0.010863539762794971, + "step": 2179 + }, + { + "epoch": 0.626932202171256, + "grad_norm": 0.1686592698097229, + "learning_rate": 9.189403369008704e-06, + "loss": 0.8573, + "step": 2180 + }, + { + "ce_ib": 4.988306045532227, + "ce_orig": 0.9063957929611206, + "epoch": 0.626932202171256, + "kl_loss": 0.3024927079677582, + "loss_ib": 0.008013232611119747, + "step": 2180 + }, + { + "ce_ib": 4.599817752838135, + "ce_orig": 0.6906898617744446, + "epoch": 0.626932202171256, + "kl_loss": 0.21359705924987793, + "loss_ib": 0.006735788658261299, + "step": 2180 + }, + { + "ce_ib": 5.489274501800537, + "ce_orig": 0.6280763745307922, + "epoch": 0.626932202171256, + "kl_loss": 0.2147134393453598, + "loss_ib": 0.007636409252882004, + "step": 2180 + }, + { + "ce_ib": 5.202547073364258, + "ce_orig": 0.2536402642726898, + "epoch": 0.626932202171256, + "kl_loss": 0.4179847836494446, + "loss_ib": 0.009382395073771477, + "step": 2180 + }, + { + "ce_ib": 5.584418296813965, + "ce_orig": 1.0076138973236084, + "epoch": 0.6272197857502336, + "kl_loss": 0.2288784384727478, + "loss_ib": 0.007873202674090862, + "step": 2181 + }, + { + "ce_ib": 3.943537950515747, + "ce_orig": 0.7420039772987366, + "epoch": 0.6272197857502336, + "kl_loss": 0.1976877897977829, + "loss_ib": 0.005920416209846735, + "step": 2181 + }, + { + "ce_ib": 4.992494583129883, + "ce_orig": 1.039832592010498, + "epoch": 0.6272197857502336, + "kl_loss": 0.19544944167137146, + "loss_ib": 0.006946989335119724, + "step": 2181 + }, + { + "ce_ib": 5.205137252807617, + "ce_orig": 0.9288253784179688, + "epoch": 0.6272197857502336, + "kl_loss": 0.12785544991493225, + "loss_ib": 0.006483691744506359, + "step": 2181 + }, + { + "ce_ib": 2.9786932468414307, + "ce_orig": 0.4201512038707733, + "epoch": 0.6275073693292113, + "kl_loss": 0.15608945488929749, + "loss_ib": 0.004539587534964085, + "step": 2182 + }, + { + "ce_ib": 4.0746870040893555, + "ce_orig": 0.8631406426429749, + "epoch": 0.6275073693292113, + "kl_loss": 0.22282622754573822, + "loss_ib": 0.006302949041128159, + "step": 2182 + }, + { + "ce_ib": 4.037292003631592, + "ce_orig": 0.6439822316169739, + "epoch": 0.6275073693292113, + "kl_loss": 0.3364154100418091, + "loss_ib": 0.007401445880532265, + "step": 2182 + }, + { + "ce_ib": 5.034689903259277, + "ce_orig": 1.1084098815917969, + "epoch": 0.6275073693292113, + "kl_loss": 0.2528407871723175, + "loss_ib": 0.0075630974024534225, + "step": 2182 + }, + { + "ce_ib": 3.346541166305542, + "ce_orig": 0.46885353326797485, + "epoch": 0.627794952908189, + "kl_loss": 0.2044687420129776, + "loss_ib": 0.005391228478401899, + "step": 2183 + }, + { + "ce_ib": 3.0896904468536377, + "ce_orig": 0.5667807459831238, + "epoch": 0.627794952908189, + "kl_loss": 0.21380160748958588, + "loss_ib": 0.005227706395089626, + "step": 2183 + }, + { + "ce_ib": 5.758452892303467, + "ce_orig": 0.9883688688278198, + "epoch": 0.627794952908189, + "kl_loss": 0.2595304846763611, + "loss_ib": 0.008353757672011852, + "step": 2183 + }, + { + "ce_ib": 4.42169189453125, + "ce_orig": 1.0059806108474731, + "epoch": 0.627794952908189, + "kl_loss": 0.19752058386802673, + "loss_ib": 0.006396898068487644, + "step": 2183 + }, + { + "ce_ib": 4.26932954788208, + "ce_orig": 0.450920432806015, + "epoch": 0.6280825364871666, + "kl_loss": 0.3021624684333801, + "loss_ib": 0.007290954235941172, + "step": 2184 + }, + { + "ce_ib": 7.29946231842041, + "ce_orig": 1.1745415925979614, + "epoch": 0.6280825364871666, + "kl_loss": 0.1744268536567688, + "loss_ib": 0.009043730795383453, + "step": 2184 + }, + { + "ce_ib": 5.290059566497803, + "ce_orig": 0.9789856672286987, + "epoch": 0.6280825364871666, + "kl_loss": 0.2083434909582138, + "loss_ib": 0.007373494561761618, + "step": 2184 + }, + { + "ce_ib": 2.613187074661255, + "ce_orig": 0.270492285490036, + "epoch": 0.6280825364871666, + "kl_loss": 0.2857426106929779, + "loss_ib": 0.005470613017678261, + "step": 2184 + }, + { + "epoch": 0.6283701200661442, + "grad_norm": 0.12443746626377106, + "learning_rate": 9.185162036654501e-06, + "loss": 0.8149, + "step": 2185 + }, + { + "ce_ib": 2.939107656478882, + "ce_orig": 0.6765180230140686, + "epoch": 0.6283701200661442, + "kl_loss": 0.17190474271774292, + "loss_ib": 0.004658155143260956, + "step": 2185 + }, + { + "ce_ib": 5.501664161682129, + "ce_orig": 0.736784040927887, + "epoch": 0.6283701200661442, + "kl_loss": 0.26856160163879395, + "loss_ib": 0.008187280036509037, + "step": 2185 + }, + { + "ce_ib": 5.371570587158203, + "ce_orig": 0.840857744216919, + "epoch": 0.6283701200661442, + "kl_loss": 0.2873210906982422, + "loss_ib": 0.008244781754910946, + "step": 2185 + }, + { + "ce_ib": 4.46457052230835, + "ce_orig": 0.5719901919364929, + "epoch": 0.6283701200661442, + "kl_loss": 0.3405674695968628, + "loss_ib": 0.007870244793593884, + "step": 2185 + }, + { + "ce_ib": 2.9551923274993896, + "ce_orig": 0.8033626079559326, + "epoch": 0.6286577036451219, + "kl_loss": 0.20071572065353394, + "loss_ib": 0.004962349310517311, + "step": 2186 + }, + { + "ce_ib": 7.108614444732666, + "ce_orig": 1.4395195245742798, + "epoch": 0.6286577036451219, + "kl_loss": 0.18850986659526825, + "loss_ib": 0.008993713185191154, + "step": 2186 + }, + { + "ce_ib": 3.2102608680725098, + "ce_orig": 0.24597090482711792, + "epoch": 0.6286577036451219, + "kl_loss": 0.6582862138748169, + "loss_ib": 0.009793123230338097, + "step": 2186 + }, + { + "ce_ib": 4.990241050720215, + "ce_orig": 0.8994059562683105, + "epoch": 0.6286577036451219, + "kl_loss": 0.19860996305942535, + "loss_ib": 0.006976340897381306, + "step": 2186 + }, + { + "ce_ib": 4.043550968170166, + "ce_orig": 0.8206343650817871, + "epoch": 0.6289452872240995, + "kl_loss": 0.24080049991607666, + "loss_ib": 0.006451555993407965, + "step": 2187 + }, + { + "ce_ib": 3.566204309463501, + "ce_orig": 0.5830263495445251, + "epoch": 0.6289452872240995, + "kl_loss": 0.2044263780117035, + "loss_ib": 0.005610468331724405, + "step": 2187 + }, + { + "ce_ib": 4.842691898345947, + "ce_orig": 0.9244598150253296, + "epoch": 0.6289452872240995, + "kl_loss": 0.16062550246715546, + "loss_ib": 0.0064489468932151794, + "step": 2187 + }, + { + "ce_ib": 6.935279846191406, + "ce_orig": 1.1726163625717163, + "epoch": 0.6289452872240995, + "kl_loss": 0.3078581988811493, + "loss_ib": 0.010013861581683159, + "step": 2187 + }, + { + "ce_ib": 5.334938049316406, + "ce_orig": 0.9508473873138428, + "epoch": 0.6292328708030771, + "kl_loss": 0.2513980567455292, + "loss_ib": 0.007848918437957764, + "step": 2188 + }, + { + "ce_ib": 4.609929084777832, + "ce_orig": 0.7305771708488464, + "epoch": 0.6292328708030771, + "kl_loss": 0.22614625096321106, + "loss_ib": 0.006871391553431749, + "step": 2188 + }, + { + "ce_ib": 4.794405460357666, + "ce_orig": 1.0350334644317627, + "epoch": 0.6292328708030771, + "kl_loss": 0.2020883858203888, + "loss_ib": 0.0068152891471982, + "step": 2188 + }, + { + "ce_ib": 4.382978916168213, + "ce_orig": 0.8123695850372314, + "epoch": 0.6292328708030771, + "kl_loss": 0.13421489298343658, + "loss_ib": 0.00572512811049819, + "step": 2188 + }, + { + "ce_ib": 6.359029769897461, + "ce_orig": 1.1148920059204102, + "epoch": 0.6295204543820548, + "kl_loss": 0.20672211050987244, + "loss_ib": 0.008426250889897346, + "step": 2189 + }, + { + "ce_ib": 5.616842269897461, + "ce_orig": 1.1702699661254883, + "epoch": 0.6295204543820548, + "kl_loss": 0.21212296187877655, + "loss_ib": 0.007738071959465742, + "step": 2189 + }, + { + "ce_ib": 8.12535285949707, + "ce_orig": 1.4454329013824463, + "epoch": 0.6295204543820548, + "kl_loss": 0.21126307547092438, + "loss_ib": 0.0102379834279418, + "step": 2189 + }, + { + "ce_ib": 3.9631290435791016, + "ce_orig": 0.6833024024963379, + "epoch": 0.6295204543820548, + "kl_loss": 0.19328176975250244, + "loss_ib": 0.005895947106182575, + "step": 2189 + }, + { + "epoch": 0.6298080379610325, + "grad_norm": 0.12672469019889832, + "learning_rate": 9.180910621273555e-06, + "loss": 0.8756, + "step": 2190 + }, + { + "ce_ib": 5.356502532958984, + "ce_orig": 1.1100050210952759, + "epoch": 0.6298080379610325, + "kl_loss": 0.46440964937210083, + "loss_ib": 0.01000059861689806, + "step": 2190 + }, + { + "ce_ib": 5.063337802886963, + "ce_orig": 0.9074004292488098, + "epoch": 0.6298080379610325, + "kl_loss": 0.23114615678787231, + "loss_ib": 0.007374798879027367, + "step": 2190 + }, + { + "ce_ib": 4.956969261169434, + "ce_orig": 0.9582605957984924, + "epoch": 0.6298080379610325, + "kl_loss": 0.18644684553146362, + "loss_ib": 0.006821437738835812, + "step": 2190 + }, + { + "ce_ib": 2.8803234100341797, + "ce_orig": 0.511568546295166, + "epoch": 0.6298080379610325, + "kl_loss": 0.1577109396457672, + "loss_ib": 0.004457432776689529, + "step": 2190 + }, + { + "ce_ib": 5.70156717300415, + "ce_orig": 1.2631670236587524, + "epoch": 0.6300956215400101, + "kl_loss": 0.2205638736486435, + "loss_ib": 0.007907206192612648, + "step": 2191 + }, + { + "ce_ib": 7.334299087524414, + "ce_orig": 1.5894874334335327, + "epoch": 0.6300956215400101, + "kl_loss": 0.190862238407135, + "loss_ib": 0.009242921136319637, + "step": 2191 + }, + { + "ce_ib": 2.6760811805725098, + "ce_orig": 0.46456319093704224, + "epoch": 0.6300956215400101, + "kl_loss": 0.1990666687488556, + "loss_ib": 0.0046667479909956455, + "step": 2191 + }, + { + "ce_ib": 3.3284518718719482, + "ce_orig": 0.7095261216163635, + "epoch": 0.6300956215400101, + "kl_loss": 0.21444392204284668, + "loss_ib": 0.00547289103269577, + "step": 2191 + }, + { + "ce_ib": 4.748816967010498, + "ce_orig": 0.964838981628418, + "epoch": 0.6303832051189877, + "kl_loss": 0.17607776820659637, + "loss_ib": 0.006509594619274139, + "step": 2192 + }, + { + "ce_ib": 8.192007064819336, + "ce_orig": 1.2936049699783325, + "epoch": 0.6303832051189877, + "kl_loss": 0.13866834342479706, + "loss_ib": 0.009578689932823181, + "step": 2192 + }, + { + "ce_ib": 4.324428081512451, + "ce_orig": 0.7115892171859741, + "epoch": 0.6303832051189877, + "kl_loss": 0.22969940304756165, + "loss_ib": 0.006621422246098518, + "step": 2192 + }, + { + "ce_ib": 2.846266031265259, + "ce_orig": 0.5144986510276794, + "epoch": 0.6303832051189877, + "kl_loss": 0.14948999881744385, + "loss_ib": 0.004341166000813246, + "step": 2192 + }, + { + "ce_ib": 6.996910095214844, + "ce_orig": 1.1969375610351562, + "epoch": 0.6306707886979653, + "kl_loss": 0.24093547463417053, + "loss_ib": 0.009406264871358871, + "step": 2193 + }, + { + "ce_ib": 5.515145778656006, + "ce_orig": 1.0158809423446655, + "epoch": 0.6306707886979653, + "kl_loss": 0.14953389763832092, + "loss_ib": 0.007010484579950571, + "step": 2193 + }, + { + "ce_ib": 6.435831546783447, + "ce_orig": 1.3188444375991821, + "epoch": 0.6306707886979653, + "kl_loss": 0.22116480767726898, + "loss_ib": 0.008647480048239231, + "step": 2193 + }, + { + "ce_ib": 3.44875431060791, + "ce_orig": 0.5087660551071167, + "epoch": 0.6306707886979653, + "kl_loss": 0.1745607703924179, + "loss_ib": 0.005194361787289381, + "step": 2193 + }, + { + "ce_ib": 4.487468242645264, + "ce_orig": 0.6030293703079224, + "epoch": 0.630958372276943, + "kl_loss": 0.33941227197647095, + "loss_ib": 0.007881591096520424, + "step": 2194 + }, + { + "ce_ib": 1.6230883598327637, + "ce_orig": 0.3181823790073395, + "epoch": 0.630958372276943, + "kl_loss": 0.1293061077594757, + "loss_ib": 0.002916149329394102, + "step": 2194 + }, + { + "ce_ib": 3.889341115951538, + "ce_orig": 0.5213361978530884, + "epoch": 0.630958372276943, + "kl_loss": 0.21425241231918335, + "loss_ib": 0.0060318647883832455, + "step": 2194 + }, + { + "ce_ib": 5.395575523376465, + "ce_orig": 0.5960964560508728, + "epoch": 0.630958372276943, + "kl_loss": 0.2363138645887375, + "loss_ib": 0.007758714258670807, + "step": 2194 + }, + { + "epoch": 0.6312459558559206, + "grad_norm": 0.1237902119755745, + "learning_rate": 9.176649133108512e-06, + "loss": 0.8671, + "step": 2195 + }, + { + "ce_ib": 6.758686542510986, + "ce_orig": 1.0319459438323975, + "epoch": 0.6312459558559206, + "kl_loss": 0.19627748429775238, + "loss_ib": 0.008721461519598961, + "step": 2195 + }, + { + "ce_ib": 6.924462795257568, + "ce_orig": 1.2270729541778564, + "epoch": 0.6312459558559206, + "kl_loss": 0.2227768898010254, + "loss_ib": 0.009152231737971306, + "step": 2195 + }, + { + "ce_ib": 4.6350274085998535, + "ce_orig": 0.7698379755020142, + "epoch": 0.6312459558559206, + "kl_loss": 0.2448214441537857, + "loss_ib": 0.007083241827785969, + "step": 2195 + }, + { + "ce_ib": 4.744444370269775, + "ce_orig": 0.8593710660934448, + "epoch": 0.6312459558559206, + "kl_loss": 0.1782427430152893, + "loss_ib": 0.006526871584355831, + "step": 2195 + }, + { + "ce_ib": 3.014436960220337, + "ce_orig": 0.7454186081886292, + "epoch": 0.6315335394348983, + "kl_loss": 0.1269155889749527, + "loss_ib": 0.0042835925705730915, + "step": 2196 + }, + { + "ce_ib": 4.767195224761963, + "ce_orig": 0.7007725238800049, + "epoch": 0.6315335394348983, + "kl_loss": 0.19323253631591797, + "loss_ib": 0.0066995201632380486, + "step": 2196 + }, + { + "ce_ib": 4.345602512359619, + "ce_orig": 0.7724635004997253, + "epoch": 0.6315335394348983, + "kl_loss": 0.19796407222747803, + "loss_ib": 0.006325243506580591, + "step": 2196 + }, + { + "ce_ib": 5.568161964416504, + "ce_orig": 0.8215292096138, + "epoch": 0.6315335394348983, + "kl_loss": 0.2492532730102539, + "loss_ib": 0.008060694672167301, + "step": 2196 + }, + { + "ce_ib": 2.5041520595550537, + "ce_orig": 0.3059498965740204, + "epoch": 0.6318211230138759, + "kl_loss": 0.12402266263961792, + "loss_ib": 0.003744378685951233, + "step": 2197 + }, + { + "ce_ib": 6.271118640899658, + "ce_orig": 1.026820182800293, + "epoch": 0.6318211230138759, + "kl_loss": 0.24393972754478455, + "loss_ib": 0.008710515685379505, + "step": 2197 + }, + { + "ce_ib": 4.727241516113281, + "ce_orig": 1.0130637884140015, + "epoch": 0.6318211230138759, + "kl_loss": 0.17855679988861084, + "loss_ib": 0.006512810010462999, + "step": 2197 + }, + { + "ce_ib": 4.50971794128418, + "ce_orig": 0.5322487950325012, + "epoch": 0.6318211230138759, + "kl_loss": 0.225905179977417, + "loss_ib": 0.006768770050257444, + "step": 2197 + }, + { + "ce_ib": 3.852827548980713, + "ce_orig": 0.5307216048240662, + "epoch": 0.6321087065928536, + "kl_loss": 0.2056402713060379, + "loss_ib": 0.0059092300944030285, + "step": 2198 + }, + { + "ce_ib": 2.778536319732666, + "ce_orig": 0.4526360332965851, + "epoch": 0.6321087065928536, + "kl_loss": 0.37721559405326843, + "loss_ib": 0.006550692021846771, + "step": 2198 + }, + { + "ce_ib": 1.447381615638733, + "ce_orig": 0.17611360549926758, + "epoch": 0.6321087065928536, + "kl_loss": 0.430044949054718, + "loss_ib": 0.0057478309608995914, + "step": 2198 + }, + { + "ce_ib": 6.993647575378418, + "ce_orig": 0.8321609497070312, + "epoch": 0.6321087065928536, + "kl_loss": 0.23529650270938873, + "loss_ib": 0.009346612729132175, + "step": 2198 + }, + { + "ce_ib": 5.959378719329834, + "ce_orig": 1.1883234977722168, + "epoch": 0.6323962901718312, + "kl_loss": 0.22546645998954773, + "loss_ib": 0.008214043453335762, + "step": 2199 + }, + { + "ce_ib": 7.8797430992126465, + "ce_orig": 1.1563398838043213, + "epoch": 0.6323962901718312, + "kl_loss": 0.18117791414260864, + "loss_ib": 0.009691521525382996, + "step": 2199 + }, + { + "ce_ib": 3.788001775741577, + "ce_orig": 0.5305266380310059, + "epoch": 0.6323962901718312, + "kl_loss": 0.271159291267395, + "loss_ib": 0.006499594077467918, + "step": 2199 + }, + { + "ce_ib": 5.119014739990234, + "ce_orig": 1.1302251815795898, + "epoch": 0.6323962901718312, + "kl_loss": 0.16404278576374054, + "loss_ib": 0.006759442389011383, + "step": 2199 + }, + { + "epoch": 0.6326838737508088, + "grad_norm": 0.13028459250926971, + "learning_rate": 9.172377582426286e-06, + "loss": 0.8303, + "step": 2200 + }, + { + "ce_ib": 3.0526628494262695, + "ce_orig": 0.7973530888557434, + "epoch": 0.6326838737508088, + "kl_loss": 0.18761806190013885, + "loss_ib": 0.00492884311825037, + "step": 2200 + }, + { + "ce_ib": 6.877143383026123, + "ce_orig": 1.1326839923858643, + "epoch": 0.6326838737508088, + "kl_loss": 0.17816945910453796, + "loss_ib": 0.008658838458359241, + "step": 2200 + }, + { + "ce_ib": 8.918926239013672, + "ce_orig": 1.753047227859497, + "epoch": 0.6326838737508088, + "kl_loss": 0.19775111973285675, + "loss_ib": 0.010896436870098114, + "step": 2200 + }, + { + "ce_ib": 1.8500012159347534, + "ce_orig": 0.39816170930862427, + "epoch": 0.6326838737508088, + "kl_loss": 0.10897579789161682, + "loss_ib": 0.0029397590551525354, + "step": 2200 + }, + { + "ce_ib": 3.226393461227417, + "ce_orig": 0.5321866869926453, + "epoch": 0.6329714573297864, + "kl_loss": 0.1124526634812355, + "loss_ib": 0.004350919742137194, + "step": 2201 + }, + { + "ce_ib": 4.929792881011963, + "ce_orig": 0.8258790969848633, + "epoch": 0.6329714573297864, + "kl_loss": 0.27273306250572205, + "loss_ib": 0.007657123729586601, + "step": 2201 + }, + { + "ce_ib": 7.05640983581543, + "ce_orig": 1.0518056154251099, + "epoch": 0.6329714573297864, + "kl_loss": 0.14790643751621246, + "loss_ib": 0.008535473607480526, + "step": 2201 + }, + { + "ce_ib": 9.117015838623047, + "ce_orig": 1.6696200370788574, + "epoch": 0.6329714573297864, + "kl_loss": 0.22694717347621918, + "loss_ib": 0.011386487632989883, + "step": 2201 + }, + { + "ce_ib": 1.834923505783081, + "ce_orig": 0.27851182222366333, + "epoch": 0.6332590409087641, + "kl_loss": 0.19105659425258636, + "loss_ib": 0.003745489288121462, + "step": 2202 + }, + { + "ce_ib": 5.729212284088135, + "ce_orig": 1.29579758644104, + "epoch": 0.6332590409087641, + "kl_loss": 0.22180750966072083, + "loss_ib": 0.007947287522256374, + "step": 2202 + }, + { + "ce_ib": 6.4219279289245605, + "ce_orig": 1.1227483749389648, + "epoch": 0.6332590409087641, + "kl_loss": 0.10874571651220322, + "loss_ib": 0.007509384769946337, + "step": 2202 + }, + { + "ce_ib": 3.1834561824798584, + "ce_orig": 0.552497386932373, + "epoch": 0.6332590409087641, + "kl_loss": 0.15675875544548035, + "loss_ib": 0.004751043859869242, + "step": 2202 + }, + { + "ce_ib": 4.069188594818115, + "ce_orig": 0.9128319621086121, + "epoch": 0.6335466244877418, + "kl_loss": 0.15898245573043823, + "loss_ib": 0.0056590125896036625, + "step": 2203 + }, + { + "ce_ib": 5.134266376495361, + "ce_orig": 0.8305782079696655, + "epoch": 0.6335466244877418, + "kl_loss": 0.377849817276001, + "loss_ib": 0.008912764489650726, + "step": 2203 + }, + { + "ce_ib": 4.165460586547852, + "ce_orig": 0.5774954557418823, + "epoch": 0.6335466244877418, + "kl_loss": 0.213725745677948, + "loss_ib": 0.006302717607468367, + "step": 2203 + }, + { + "ce_ib": 2.7415518760681152, + "ce_orig": 0.6307628750801086, + "epoch": 0.6335466244877418, + "kl_loss": 0.16675925254821777, + "loss_ib": 0.0044091446325182915, + "step": 2203 + }, + { + "ce_ib": 5.103527545928955, + "ce_orig": 0.9893144965171814, + "epoch": 0.6338342080667194, + "kl_loss": 0.13826075196266174, + "loss_ib": 0.00648613553494215, + "step": 2204 + }, + { + "ce_ib": 3.3791143894195557, + "ce_orig": 0.4623070955276489, + "epoch": 0.6338342080667194, + "kl_loss": 0.1429978907108307, + "loss_ib": 0.0048090931959450245, + "step": 2204 + }, + { + "ce_ib": 5.151632308959961, + "ce_orig": 0.8678387999534607, + "epoch": 0.6338342080667194, + "kl_loss": 0.2878186106681824, + "loss_ib": 0.008029818534851074, + "step": 2204 + }, + { + "ce_ib": 2.9871344566345215, + "ce_orig": 0.6721150279045105, + "epoch": 0.6338342080667194, + "kl_loss": 0.13700687885284424, + "loss_ib": 0.004357203375548124, + "step": 2204 + }, + { + "epoch": 0.634121791645697, + "grad_norm": 0.1520298421382904, + "learning_rate": 9.168095979518035e-06, + "loss": 0.8404, + "step": 2205 + }, + { + "ce_ib": 4.8783369064331055, + "ce_orig": 1.1538881063461304, + "epoch": 0.634121791645697, + "kl_loss": 0.10744345188140869, + "loss_ib": 0.005952771753072739, + "step": 2205 + }, + { + "ce_ib": 3.7751576900482178, + "ce_orig": 0.2712635397911072, + "epoch": 0.634121791645697, + "kl_loss": 0.2157440483570099, + "loss_ib": 0.0059325979091227055, + "step": 2205 + }, + { + "ce_ib": 6.563039779663086, + "ce_orig": 1.244568943977356, + "epoch": 0.634121791645697, + "kl_loss": 0.27706634998321533, + "loss_ib": 0.00933370366692543, + "step": 2205 + }, + { + "ce_ib": 4.557952404022217, + "ce_orig": 0.6225706338882446, + "epoch": 0.634121791645697, + "kl_loss": 0.31502047181129456, + "loss_ib": 0.007708156947046518, + "step": 2205 + }, + { + "ce_ib": 5.521819114685059, + "ce_orig": 0.9180542230606079, + "epoch": 0.6344093752246747, + "kl_loss": 0.2098533660173416, + "loss_ib": 0.007620352320373058, + "step": 2206 + }, + { + "ce_ib": 3.7038090229034424, + "ce_orig": 0.9125936031341553, + "epoch": 0.6344093752246747, + "kl_loss": 0.16848035156726837, + "loss_ib": 0.005388612858951092, + "step": 2206 + }, + { + "ce_ib": 2.951793909072876, + "ce_orig": 0.45831578969955444, + "epoch": 0.6344093752246747, + "kl_loss": 0.15553949773311615, + "loss_ib": 0.004507188685238361, + "step": 2206 + }, + { + "ce_ib": 2.742610454559326, + "ce_orig": 0.3560488224029541, + "epoch": 0.6344093752246747, + "kl_loss": 0.4331458806991577, + "loss_ib": 0.007074069231748581, + "step": 2206 + }, + { + "ce_ib": 3.0339300632476807, + "ce_orig": 0.5373973846435547, + "epoch": 0.6346969588036523, + "kl_loss": 0.28881320357322693, + "loss_ib": 0.005922062322497368, + "step": 2207 + }, + { + "ce_ib": 3.7979543209075928, + "ce_orig": 0.42616796493530273, + "epoch": 0.6346969588036523, + "kl_loss": 0.21474269032478333, + "loss_ib": 0.00594538077712059, + "step": 2207 + }, + { + "ce_ib": 5.790818214416504, + "ce_orig": 0.8668949604034424, + "epoch": 0.6346969588036523, + "kl_loss": 0.2167271077632904, + "loss_ib": 0.007958089001476765, + "step": 2207 + }, + { + "ce_ib": 4.634560585021973, + "ce_orig": 0.7365112900733948, + "epoch": 0.6346969588036523, + "kl_loss": 0.14533138275146484, + "loss_ib": 0.006087874062359333, + "step": 2207 + }, + { + "ce_ib": 6.4656548500061035, + "ce_orig": 0.8318517804145813, + "epoch": 0.6349845423826299, + "kl_loss": 0.22589851915836334, + "loss_ib": 0.008724640123546124, + "step": 2208 + }, + { + "ce_ib": 3.7671382427215576, + "ce_orig": 0.5915427803993225, + "epoch": 0.6349845423826299, + "kl_loss": 0.10247278958559036, + "loss_ib": 0.004791866056621075, + "step": 2208 + }, + { + "ce_ib": 4.118164539337158, + "ce_orig": 0.7888700366020203, + "epoch": 0.6349845423826299, + "kl_loss": 0.19865640997886658, + "loss_ib": 0.006104728672653437, + "step": 2208 + }, + { + "ce_ib": 7.100134372711182, + "ce_orig": 1.3124192953109741, + "epoch": 0.6349845423826299, + "kl_loss": 0.13706496357917786, + "loss_ib": 0.008470783941447735, + "step": 2208 + }, + { + "ce_ib": 4.084353923797607, + "ce_orig": 0.9040095210075378, + "epoch": 0.6352721259616076, + "kl_loss": 0.1941271424293518, + "loss_ib": 0.006025625392794609, + "step": 2209 + }, + { + "ce_ib": 5.486684322357178, + "ce_orig": 0.9122800230979919, + "epoch": 0.6352721259616076, + "kl_loss": 0.22840403020381927, + "loss_ib": 0.007770724594593048, + "step": 2209 + }, + { + "ce_ib": 1.8039683103561401, + "ce_orig": 0.45256277918815613, + "epoch": 0.6352721259616076, + "kl_loss": 0.13264043629169464, + "loss_ib": 0.0031303726136684418, + "step": 2209 + }, + { + "ce_ib": 5.049233913421631, + "ce_orig": 0.687046229839325, + "epoch": 0.6352721259616076, + "kl_loss": 0.35874372720718384, + "loss_ib": 0.008636671118438244, + "step": 2209 + }, + { + "epoch": 0.6355597095405853, + "grad_norm": 0.14466316998004913, + "learning_rate": 9.163804334699133e-06, + "loss": 0.8632, + "step": 2210 + }, + { + "ce_ib": 5.3636274337768555, + "ce_orig": 0.6622792482376099, + "epoch": 0.6355597095405853, + "kl_loss": 0.2323140799999237, + "loss_ib": 0.0076867686584591866, + "step": 2210 + }, + { + "ce_ib": 5.558813571929932, + "ce_orig": 1.1827737092971802, + "epoch": 0.6355597095405853, + "kl_loss": 0.20273897051811218, + "loss_ib": 0.007586203515529633, + "step": 2210 + }, + { + "ce_ib": 4.615037441253662, + "ce_orig": 0.8739094138145447, + "epoch": 0.6355597095405853, + "kl_loss": 0.29081958532333374, + "loss_ib": 0.007523233070969582, + "step": 2210 + }, + { + "ce_ib": 2.6187686920166016, + "ce_orig": 0.5757517218589783, + "epoch": 0.6355597095405853, + "kl_loss": 0.12387143075466156, + "loss_ib": 0.0038574831560254097, + "step": 2210 + }, + { + "ce_ib": 3.3548741340637207, + "ce_orig": 0.7138122320175171, + "epoch": 0.6358472931195629, + "kl_loss": 0.26162633299827576, + "loss_ib": 0.005971136968582869, + "step": 2211 + }, + { + "ce_ib": 3.489084005355835, + "ce_orig": 0.44013887643814087, + "epoch": 0.6358472931195629, + "kl_loss": 0.1802653670310974, + "loss_ib": 0.005291737150400877, + "step": 2211 + }, + { + "ce_ib": 6.446778297424316, + "ce_orig": 1.1365994215011597, + "epoch": 0.6358472931195629, + "kl_loss": 0.17198850214481354, + "loss_ib": 0.0081666624173522, + "step": 2211 + }, + { + "ce_ib": 6.723930358886719, + "ce_orig": 1.400697946548462, + "epoch": 0.6358472931195629, + "kl_loss": 0.21475178003311157, + "loss_ib": 0.00887144822627306, + "step": 2211 + }, + { + "ce_ib": 5.876286029815674, + "ce_orig": 1.1697585582733154, + "epoch": 0.6361348766985405, + "kl_loss": 0.18463635444641113, + "loss_ib": 0.00772264925763011, + "step": 2212 + }, + { + "ce_ib": 3.266535758972168, + "ce_orig": 0.7767891883850098, + "epoch": 0.6361348766985405, + "kl_loss": 0.2117159068584442, + "loss_ib": 0.005383694544434547, + "step": 2212 + }, + { + "ce_ib": 6.199361324310303, + "ce_orig": 0.8212311267852783, + "epoch": 0.6361348766985405, + "kl_loss": 0.22937805950641632, + "loss_ib": 0.008493142202496529, + "step": 2212 + }, + { + "ce_ib": 4.997589111328125, + "ce_orig": 0.712940514087677, + "epoch": 0.6361348766985405, + "kl_loss": 0.22071903944015503, + "loss_ib": 0.007204779423773289, + "step": 2212 + }, + { + "ce_ib": 6.436483383178711, + "ce_orig": 0.7925613522529602, + "epoch": 0.6364224602775181, + "kl_loss": 0.40336403250694275, + "loss_ib": 0.010470123030245304, + "step": 2213 + }, + { + "ce_ib": 2.260660409927368, + "ce_orig": 0.22176764905452728, + "epoch": 0.6364224602775181, + "kl_loss": 0.16642223298549652, + "loss_ib": 0.003924882505089045, + "step": 2213 + }, + { + "ce_ib": 6.506783962249756, + "ce_orig": 0.8177502155303955, + "epoch": 0.6364224602775181, + "kl_loss": 0.22268158197402954, + "loss_ib": 0.008733599446713924, + "step": 2213 + }, + { + "ce_ib": 5.496917724609375, + "ce_orig": 0.9146586656570435, + "epoch": 0.6364224602775181, + "kl_loss": 0.1801244169473648, + "loss_ib": 0.007298161741346121, + "step": 2213 + }, + { + "ce_ib": 2.444319009780884, + "ce_orig": 0.44191423058509827, + "epoch": 0.6367100438564958, + "kl_loss": 0.15850010514259338, + "loss_ib": 0.004029319621622562, + "step": 2214 + }, + { + "ce_ib": 3.652113437652588, + "ce_orig": 0.6008463501930237, + "epoch": 0.6367100438564958, + "kl_loss": 0.2332906872034073, + "loss_ib": 0.0059850201942026615, + "step": 2214 + }, + { + "ce_ib": 3.4007692337036133, + "ce_orig": 0.8491352200508118, + "epoch": 0.6367100438564958, + "kl_loss": 0.21809153258800507, + "loss_ib": 0.005581684410572052, + "step": 2214 + }, + { + "ce_ib": 3.7371294498443604, + "ce_orig": 0.7220768332481384, + "epoch": 0.6367100438564958, + "kl_loss": 0.18825232982635498, + "loss_ib": 0.005619652569293976, + "step": 2214 + }, + { + "epoch": 0.6369976274354734, + "grad_norm": 0.12433803081512451, + "learning_rate": 9.15950265830915e-06, + "loss": 0.8794, + "step": 2215 + }, + { + "ce_ib": 5.938788890838623, + "ce_orig": 0.8294755816459656, + "epoch": 0.6369976274354734, + "kl_loss": 0.1850244700908661, + "loss_ib": 0.007789033465087414, + "step": 2215 + }, + { + "ce_ib": 4.130274295806885, + "ce_orig": 0.7152994871139526, + "epoch": 0.6369976274354734, + "kl_loss": 0.23378309607505798, + "loss_ib": 0.0064681051298975945, + "step": 2215 + }, + { + "ce_ib": 6.712379455566406, + "ce_orig": 0.7170183062553406, + "epoch": 0.6369976274354734, + "kl_loss": 0.26324141025543213, + "loss_ib": 0.009344792924821377, + "step": 2215 + }, + { + "ce_ib": 3.4517650604248047, + "ce_orig": 0.5049684047698975, + "epoch": 0.6369976274354734, + "kl_loss": 0.1913483887910843, + "loss_ib": 0.005365248769521713, + "step": 2215 + }, + { + "ce_ib": 4.6739654541015625, + "ce_orig": 0.9622190594673157, + "epoch": 0.6372852110144511, + "kl_loss": 0.17057183384895325, + "loss_ib": 0.006379683967679739, + "step": 2216 + }, + { + "ce_ib": 6.13259220123291, + "ce_orig": 1.3338274955749512, + "epoch": 0.6372852110144511, + "kl_loss": 0.30193406343460083, + "loss_ib": 0.009151932783424854, + "step": 2216 + }, + { + "ce_ib": 2.9055683612823486, + "ce_orig": 0.3796944320201874, + "epoch": 0.6372852110144511, + "kl_loss": 0.14027754962444305, + "loss_ib": 0.00430834386497736, + "step": 2216 + }, + { + "ce_ib": 3.2655749320983887, + "ce_orig": 0.7146506905555725, + "epoch": 0.6372852110144511, + "kl_loss": 0.17266951501369476, + "loss_ib": 0.004992269910871983, + "step": 2216 + }, + { + "ce_ib": 4.4180192947387695, + "ce_orig": 0.9017258882522583, + "epoch": 0.6375727945934287, + "kl_loss": 0.16334456205368042, + "loss_ib": 0.0060514649376273155, + "step": 2217 + }, + { + "ce_ib": 5.1288371086120605, + "ce_orig": 0.6881497502326965, + "epoch": 0.6375727945934287, + "kl_loss": 0.6514620780944824, + "loss_ib": 0.011643457226455212, + "step": 2217 + }, + { + "ce_ib": 4.378441333770752, + "ce_orig": 0.5895921587944031, + "epoch": 0.6375727945934287, + "kl_loss": 0.2794097661972046, + "loss_ib": 0.00717253889888525, + "step": 2217 + }, + { + "ce_ib": 4.561514854431152, + "ce_orig": 0.31532394886016846, + "epoch": 0.6375727945934287, + "kl_loss": 0.5408508777618408, + "loss_ib": 0.009970023296773434, + "step": 2217 + }, + { + "ce_ib": 4.721513271331787, + "ce_orig": 0.944973349571228, + "epoch": 0.6378603781724064, + "kl_loss": 0.17320331931114197, + "loss_ib": 0.006453546229749918, + "step": 2218 + }, + { + "ce_ib": 5.978292465209961, + "ce_orig": 0.9687330722808838, + "epoch": 0.6378603781724064, + "kl_loss": 0.18983343243598938, + "loss_ib": 0.007876627147197723, + "step": 2218 + }, + { + "ce_ib": 4.968411922454834, + "ce_orig": 0.5198065638542175, + "epoch": 0.6378603781724064, + "kl_loss": 0.15803438425064087, + "loss_ib": 0.006548755802214146, + "step": 2218 + }, + { + "ce_ib": 7.942214488983154, + "ce_orig": 1.097477912902832, + "epoch": 0.6378603781724064, + "kl_loss": 0.16561007499694824, + "loss_ib": 0.009598314762115479, + "step": 2218 + }, + { + "ce_ib": 5.340999126434326, + "ce_orig": 1.1523351669311523, + "epoch": 0.638147961751384, + "kl_loss": 0.4156242907047272, + "loss_ib": 0.009497242048382759, + "step": 2219 + }, + { + "ce_ib": 5.354678153991699, + "ce_orig": 0.6772332191467285, + "epoch": 0.638147961751384, + "kl_loss": 0.22625137865543365, + "loss_ib": 0.007617191411554813, + "step": 2219 + }, + { + "ce_ib": 4.555023670196533, + "ce_orig": 0.8217195272445679, + "epoch": 0.638147961751384, + "kl_loss": 0.19400615990161896, + "loss_ib": 0.0064950850792229176, + "step": 2219 + }, + { + "ce_ib": 2.460514783859253, + "ce_orig": 0.5635286569595337, + "epoch": 0.638147961751384, + "kl_loss": 0.1454383134841919, + "loss_ib": 0.003914897795766592, + "step": 2219 + }, + { + "epoch": 0.6384355453303616, + "grad_norm": 0.1347493678331375, + "learning_rate": 9.155190960711822e-06, + "loss": 0.8594, + "step": 2220 + }, + { + "ce_ib": 4.029130935668945, + "ce_orig": 0.8111188411712646, + "epoch": 0.6384355453303616, + "kl_loss": 0.15119630098342896, + "loss_ib": 0.005541093647480011, + "step": 2220 + }, + { + "ce_ib": 5.241641998291016, + "ce_orig": 0.9767241477966309, + "epoch": 0.6384355453303616, + "kl_loss": 0.12450902163982391, + "loss_ib": 0.006486732047051191, + "step": 2220 + }, + { + "ce_ib": 3.2337307929992676, + "ce_orig": 0.5764454007148743, + "epoch": 0.6384355453303616, + "kl_loss": 0.16570377349853516, + "loss_ib": 0.004890768323093653, + "step": 2220 + }, + { + "ce_ib": 3.605407238006592, + "ce_orig": 0.589805006980896, + "epoch": 0.6384355453303616, + "kl_loss": 0.27963346242904663, + "loss_ib": 0.006401741411536932, + "step": 2220 + }, + { + "ce_ib": 6.4307661056518555, + "ce_orig": 1.0037425756454468, + "epoch": 0.6387231289093392, + "kl_loss": 0.2174602746963501, + "loss_ib": 0.008605368435382843, + "step": 2221 + }, + { + "ce_ib": 3.9862797260284424, + "ce_orig": 0.5300729274749756, + "epoch": 0.6387231289093392, + "kl_loss": 0.21078234910964966, + "loss_ib": 0.006094102747738361, + "step": 2221 + }, + { + "ce_ib": 2.137843608856201, + "ce_orig": 0.3431706726551056, + "epoch": 0.6387231289093392, + "kl_loss": 0.1845364272594452, + "loss_ib": 0.003983207978308201, + "step": 2221 + }, + { + "ce_ib": 4.962773323059082, + "ce_orig": 0.5847994685173035, + "epoch": 0.6387231289093392, + "kl_loss": 0.25704827904701233, + "loss_ib": 0.007533255498856306, + "step": 2221 + }, + { + "ce_ib": 5.298913478851318, + "ce_orig": 1.0430620908737183, + "epoch": 0.6390107124883169, + "kl_loss": 0.23215684294700623, + "loss_ib": 0.007620482239872217, + "step": 2222 + }, + { + "ce_ib": 6.302296161651611, + "ce_orig": 1.0351884365081787, + "epoch": 0.6390107124883169, + "kl_loss": 0.1954186111688614, + "loss_ib": 0.00825648196041584, + "step": 2222 + }, + { + "ce_ib": 3.369130849838257, + "ce_orig": 0.609680712223053, + "epoch": 0.6390107124883169, + "kl_loss": 0.23838892579078674, + "loss_ib": 0.005753019824624062, + "step": 2222 + }, + { + "ce_ib": 5.24609899520874, + "ce_orig": 1.1397161483764648, + "epoch": 0.6390107124883169, + "kl_loss": 0.21884658932685852, + "loss_ib": 0.007434564642608166, + "step": 2222 + }, + { + "ce_ib": 5.321019649505615, + "ce_orig": 1.1072014570236206, + "epoch": 0.6392982960672946, + "kl_loss": 0.2210046648979187, + "loss_ib": 0.007531066425144672, + "step": 2223 + }, + { + "ce_ib": 4.805994510650635, + "ce_orig": 0.7707478404045105, + "epoch": 0.6392982960672946, + "kl_loss": 0.3198475241661072, + "loss_ib": 0.00800446979701519, + "step": 2223 + }, + { + "ce_ib": 4.817802429199219, + "ce_orig": 0.807983934879303, + "epoch": 0.6392982960672946, + "kl_loss": 0.27704912424087524, + "loss_ib": 0.00758829340338707, + "step": 2223 + }, + { + "ce_ib": 5.179113864898682, + "ce_orig": 0.4138365387916565, + "epoch": 0.6392982960672946, + "kl_loss": 0.3153059482574463, + "loss_ib": 0.008332173340022564, + "step": 2223 + }, + { + "ce_ib": 5.2783355712890625, + "ce_orig": 0.8336692452430725, + "epoch": 0.6395858796462722, + "kl_loss": 0.16245882213115692, + "loss_ib": 0.006902923807501793, + "step": 2224 + }, + { + "ce_ib": 5.048581123352051, + "ce_orig": 1.1351484060287476, + "epoch": 0.6395858796462722, + "kl_loss": 0.3129180669784546, + "loss_ib": 0.008177761919796467, + "step": 2224 + }, + { + "ce_ib": 3.3916990756988525, + "ce_orig": 0.6629234552383423, + "epoch": 0.6395858796462722, + "kl_loss": 0.15780460834503174, + "loss_ib": 0.004969744943082333, + "step": 2224 + }, + { + "ce_ib": 6.467718124389648, + "ce_orig": 1.4458715915679932, + "epoch": 0.6395858796462722, + "kl_loss": 0.2174796611070633, + "loss_ib": 0.008642515167593956, + "step": 2224 + }, + { + "epoch": 0.6398734632252498, + "grad_norm": 0.12976644933223724, + "learning_rate": 9.150869252295029e-06, + "loss": 0.8694, + "step": 2225 + }, + { + "ce_ib": 6.314749717712402, + "ce_orig": 1.0856988430023193, + "epoch": 0.6398734632252498, + "kl_loss": 0.29134392738342285, + "loss_ib": 0.009228188544511795, + "step": 2225 + }, + { + "ce_ib": 2.9565343856811523, + "ce_orig": 0.6439271569252014, + "epoch": 0.6398734632252498, + "kl_loss": 0.2480812966823578, + "loss_ib": 0.0054373471066355705, + "step": 2225 + }, + { + "ce_ib": 3.590745687484741, + "ce_orig": 0.693541407585144, + "epoch": 0.6398734632252498, + "kl_loss": 0.21406376361846924, + "loss_ib": 0.005731383338570595, + "step": 2225 + }, + { + "ce_ib": 5.855668067932129, + "ce_orig": 1.0512198209762573, + "epoch": 0.6398734632252498, + "kl_loss": 0.24106208980083466, + "loss_ib": 0.008266288787126541, + "step": 2225 + }, + { + "ce_ib": 5.390851974487305, + "ce_orig": 0.9472723603248596, + "epoch": 0.6401610468042275, + "kl_loss": 0.16632603108882904, + "loss_ib": 0.007054112385958433, + "step": 2226 + }, + { + "ce_ib": 8.343703269958496, + "ce_orig": 1.7662280797958374, + "epoch": 0.6401610468042275, + "kl_loss": 0.24778643250465393, + "loss_ib": 0.010821567848324776, + "step": 2226 + }, + { + "ce_ib": 3.1283116340637207, + "ce_orig": 0.7345309853553772, + "epoch": 0.6401610468042275, + "kl_loss": 0.2352600395679474, + "loss_ib": 0.005480911582708359, + "step": 2226 + }, + { + "ce_ib": 8.498689651489258, + "ce_orig": 1.6550312042236328, + "epoch": 0.6401610468042275, + "kl_loss": 0.2038343846797943, + "loss_ib": 0.010537032969295979, + "step": 2226 + }, + { + "ce_ib": 5.6598968505859375, + "ce_orig": 0.8605701923370361, + "epoch": 0.6404486303832051, + "kl_loss": 0.3150702714920044, + "loss_ib": 0.008810599334537983, + "step": 2227 + }, + { + "ce_ib": 6.535090446472168, + "ce_orig": 1.2702900171279907, + "epoch": 0.6404486303832051, + "kl_loss": 0.1929207146167755, + "loss_ib": 0.008464298211038113, + "step": 2227 + }, + { + "ce_ib": 5.811291217803955, + "ce_orig": 0.9448096752166748, + "epoch": 0.6404486303832051, + "kl_loss": 0.29662108421325684, + "loss_ib": 0.008777501992881298, + "step": 2227 + }, + { + "ce_ib": 5.146031856536865, + "ce_orig": 1.1392779350280762, + "epoch": 0.6404486303832051, + "kl_loss": 0.17739135026931763, + "loss_ib": 0.006919945124536753, + "step": 2227 + }, + { + "ce_ib": 5.6806230545043945, + "ce_orig": 0.8015081882476807, + "epoch": 0.6407362139621827, + "kl_loss": 0.20818392932415009, + "loss_ib": 0.007762462366372347, + "step": 2228 + }, + { + "ce_ib": 5.306705951690674, + "ce_orig": 0.915160596370697, + "epoch": 0.6407362139621827, + "kl_loss": 0.2048511505126953, + "loss_ib": 0.007355217356234789, + "step": 2228 + }, + { + "ce_ib": 6.346898078918457, + "ce_orig": 1.044769287109375, + "epoch": 0.6407362139621827, + "kl_loss": 0.27916374802589417, + "loss_ib": 0.009138534776866436, + "step": 2228 + }, + { + "ce_ib": 4.158748149871826, + "ce_orig": 0.8663656711578369, + "epoch": 0.6407362139621827, + "kl_loss": 0.3763602077960968, + "loss_ib": 0.00792235042899847, + "step": 2228 + }, + { + "ce_ib": 6.447758674621582, + "ce_orig": 1.2747336626052856, + "epoch": 0.6410237975411603, + "kl_loss": 0.19005420804023743, + "loss_ib": 0.008348300121724606, + "step": 2229 + }, + { + "ce_ib": 5.719443321228027, + "ce_orig": 1.1968472003936768, + "epoch": 0.6410237975411603, + "kl_loss": 0.196004718542099, + "loss_ib": 0.007679490838199854, + "step": 2229 + }, + { + "ce_ib": 5.736206531524658, + "ce_orig": 1.274706482887268, + "epoch": 0.6410237975411603, + "kl_loss": 0.33017146587371826, + "loss_ib": 0.009037921205163002, + "step": 2229 + }, + { + "ce_ib": 6.951866626739502, + "ce_orig": 1.3064205646514893, + "epoch": 0.6410237975411603, + "kl_loss": 0.14234721660614014, + "loss_ib": 0.008375338278710842, + "step": 2229 + }, + { + "epoch": 0.6413113811201381, + "grad_norm": 0.13269558548927307, + "learning_rate": 9.14653754347077e-06, + "loss": 0.9679, + "step": 2230 + }, + { + "ce_ib": 3.7429044246673584, + "ce_orig": 0.6391395926475525, + "epoch": 0.6413113811201381, + "kl_loss": 0.1924726963043213, + "loss_ib": 0.0056676315143704414, + "step": 2230 + }, + { + "ce_ib": 6.15748929977417, + "ce_orig": 0.9046223163604736, + "epoch": 0.6413113811201381, + "kl_loss": 0.23866811394691467, + "loss_ib": 0.008544170297682285, + "step": 2230 + }, + { + "ce_ib": 5.88073205947876, + "ce_orig": 0.6476671099662781, + "epoch": 0.6413113811201381, + "kl_loss": 0.2682310938835144, + "loss_ib": 0.008563042618334293, + "step": 2230 + }, + { + "ce_ib": 4.005565643310547, + "ce_orig": 0.6499187350273132, + "epoch": 0.6413113811201381, + "kl_loss": 0.19824722409248352, + "loss_ib": 0.005988037679344416, + "step": 2230 + }, + { + "ce_ib": 6.916352272033691, + "ce_orig": 1.3124637603759766, + "epoch": 0.6415989646991157, + "kl_loss": 0.18663859367370605, + "loss_ib": 0.008782737888395786, + "step": 2231 + }, + { + "ce_ib": 5.662384986877441, + "ce_orig": 0.9848887324333191, + "epoch": 0.6415989646991157, + "kl_loss": 0.3282080888748169, + "loss_ib": 0.008944465778768063, + "step": 2231 + }, + { + "ce_ib": 4.647446632385254, + "ce_orig": 1.0038726329803467, + "epoch": 0.6415989646991157, + "kl_loss": 0.21789169311523438, + "loss_ib": 0.006826363503932953, + "step": 2231 + }, + { + "ce_ib": 3.867286443710327, + "ce_orig": 0.5315132141113281, + "epoch": 0.6415989646991157, + "kl_loss": 0.21558289229869843, + "loss_ib": 0.00602311547845602, + "step": 2231 + }, + { + "ce_ib": 6.045629978179932, + "ce_orig": 1.5301743745803833, + "epoch": 0.6418865482780933, + "kl_loss": 0.23397418856620789, + "loss_ib": 0.00838537234812975, + "step": 2232 + }, + { + "ce_ib": 5.792202472686768, + "ce_orig": 0.8930842280387878, + "epoch": 0.6418865482780933, + "kl_loss": 0.2365385890007019, + "loss_ib": 0.008157587610185146, + "step": 2232 + }, + { + "ce_ib": 3.916881561279297, + "ce_orig": 0.7099558711051941, + "epoch": 0.6418865482780933, + "kl_loss": 0.22701631486415863, + "loss_ib": 0.006187045015394688, + "step": 2232 + }, + { + "ce_ib": 5.121767044067383, + "ce_orig": 1.1378718614578247, + "epoch": 0.6418865482780933, + "kl_loss": 0.2537359297275543, + "loss_ib": 0.007659126538783312, + "step": 2232 + }, + { + "ce_ib": 5.795713901519775, + "ce_orig": 1.0107265710830688, + "epoch": 0.642174131857071, + "kl_loss": 0.24966418743133545, + "loss_ib": 0.008292355574667454, + "step": 2233 + }, + { + "ce_ib": 4.047680854797363, + "ce_orig": 0.6066938042640686, + "epoch": 0.642174131857071, + "kl_loss": 0.31624898314476013, + "loss_ib": 0.00721017038449645, + "step": 2233 + }, + { + "ce_ib": 4.567048072814941, + "ce_orig": 0.8630667924880981, + "epoch": 0.642174131857071, + "kl_loss": 0.1926921308040619, + "loss_ib": 0.006493969354778528, + "step": 2233 + }, + { + "ce_ib": 5.160074710845947, + "ce_orig": 1.0430033206939697, + "epoch": 0.642174131857071, + "kl_loss": 0.17789754271507263, + "loss_ib": 0.006939049810171127, + "step": 2233 + }, + { + "ce_ib": 3.6557466983795166, + "ce_orig": 0.29716572165489197, + "epoch": 0.6424617154360486, + "kl_loss": 0.23930786550045013, + "loss_ib": 0.006048825103789568, + "step": 2234 + }, + { + "ce_ib": 4.675722122192383, + "ce_orig": 0.7132110595703125, + "epoch": 0.6424617154360486, + "kl_loss": 0.3141395151615143, + "loss_ib": 0.007817116566002369, + "step": 2234 + }, + { + "ce_ib": 4.3292460441589355, + "ce_orig": 0.4419725835323334, + "epoch": 0.6424617154360486, + "kl_loss": 0.25981035828590393, + "loss_ib": 0.006927349604666233, + "step": 2234 + }, + { + "ce_ib": 1.5285900831222534, + "ce_orig": 0.33371371030807495, + "epoch": 0.6424617154360486, + "kl_loss": 0.4574854075908661, + "loss_ib": 0.006103443913161755, + "step": 2234 + }, + { + "epoch": 0.6427492990150262, + "grad_norm": 0.1594630926847458, + "learning_rate": 9.142195844675136e-06, + "loss": 0.876, + "step": 2235 + }, + { + "ce_ib": 2.624837875366211, + "ce_orig": 0.23371818661689758, + "epoch": 0.6427492990150262, + "kl_loss": 0.2746467590332031, + "loss_ib": 0.005371305625885725, + "step": 2235 + }, + { + "ce_ib": 2.7384371757507324, + "ce_orig": 0.5476139783859253, + "epoch": 0.6427492990150262, + "kl_loss": 0.25276979804039, + "loss_ib": 0.0052661350928246975, + "step": 2235 + }, + { + "ce_ib": 2.6237423419952393, + "ce_orig": 0.7641239762306213, + "epoch": 0.6427492990150262, + "kl_loss": 0.15601274371147156, + "loss_ib": 0.004183869808912277, + "step": 2235 + }, + { + "ce_ib": 9.549378395080566, + "ce_orig": 1.9201443195343018, + "epoch": 0.6427492990150262, + "kl_loss": 0.2991485893726349, + "loss_ib": 0.012540864758193493, + "step": 2235 + }, + { + "ce_ib": 6.1513495445251465, + "ce_orig": 0.9431938529014587, + "epoch": 0.6430368825940039, + "kl_loss": 0.41607987880706787, + "loss_ib": 0.010312148369848728, + "step": 2236 + }, + { + "ce_ib": 2.924443483352661, + "ce_orig": 0.34709110856056213, + "epoch": 0.6430368825940039, + "kl_loss": 0.18409812450408936, + "loss_ib": 0.004765424411743879, + "step": 2236 + }, + { + "ce_ib": 4.066567420959473, + "ce_orig": 0.7068833708763123, + "epoch": 0.6430368825940039, + "kl_loss": 0.20552408695220947, + "loss_ib": 0.00612180819734931, + "step": 2236 + }, + { + "ce_ib": 2.58671236038208, + "ce_orig": 0.6245825886726379, + "epoch": 0.6430368825940039, + "kl_loss": 0.1434691697359085, + "loss_ib": 0.0040214043110609055, + "step": 2236 + }, + { + "ce_ib": 5.304448127746582, + "ce_orig": 1.2027007341384888, + "epoch": 0.6433244661729816, + "kl_loss": 0.34131044149398804, + "loss_ib": 0.008717551827430725, + "step": 2237 + }, + { + "ce_ib": 5.310830116271973, + "ce_orig": 0.6920839548110962, + "epoch": 0.6433244661729816, + "kl_loss": 0.26179635524749756, + "loss_ib": 0.007928794249892235, + "step": 2237 + }, + { + "ce_ib": 6.841182708740234, + "ce_orig": 1.1565219163894653, + "epoch": 0.6433244661729816, + "kl_loss": 0.22811982035636902, + "loss_ib": 0.00912238098680973, + "step": 2237 + }, + { + "ce_ib": 3.447711229324341, + "ce_orig": 0.8260337710380554, + "epoch": 0.6433244661729816, + "kl_loss": 0.146821990609169, + "loss_ib": 0.004915931262075901, + "step": 2237 + }, + { + "ce_ib": 2.900927782058716, + "ce_orig": 0.6868208646774292, + "epoch": 0.6436120497519592, + "kl_loss": 0.13188967108726501, + "loss_ib": 0.004219824448227882, + "step": 2238 + }, + { + "ce_ib": 3.7288565635681152, + "ce_orig": 0.3572218120098114, + "epoch": 0.6436120497519592, + "kl_loss": 0.29727935791015625, + "loss_ib": 0.0067016505636274815, + "step": 2238 + }, + { + "ce_ib": 3.1037209033966064, + "ce_orig": 0.606605589389801, + "epoch": 0.6436120497519592, + "kl_loss": 0.16162024438381195, + "loss_ib": 0.004719923250377178, + "step": 2238 + }, + { + "ce_ib": 4.155191421508789, + "ce_orig": 1.0667132139205933, + "epoch": 0.6436120497519592, + "kl_loss": 0.15585938096046448, + "loss_ib": 0.005713785067200661, + "step": 2238 + }, + { + "ce_ib": 4.998521327972412, + "ce_orig": 0.9305186867713928, + "epoch": 0.6438996333309368, + "kl_loss": 0.17796039581298828, + "loss_ib": 0.006778125651180744, + "step": 2239 + }, + { + "ce_ib": 5.873899459838867, + "ce_orig": 1.1869970560073853, + "epoch": 0.6438996333309368, + "kl_loss": 0.28436988592147827, + "loss_ib": 0.008717598393559456, + "step": 2239 + }, + { + "ce_ib": 6.386340618133545, + "ce_orig": 1.2576159238815308, + "epoch": 0.6438996333309368, + "kl_loss": 0.25126370787620544, + "loss_ib": 0.008898978121578693, + "step": 2239 + }, + { + "ce_ib": 3.205414295196533, + "ce_orig": 0.4032820761203766, + "epoch": 0.6438996333309368, + "kl_loss": 0.23453949391841888, + "loss_ib": 0.0055508092045784, + "step": 2239 + }, + { + "epoch": 0.6441872169099144, + "grad_norm": 0.12528882920742035, + "learning_rate": 9.137844166368289e-06, + "loss": 0.87, + "step": 2240 + }, + { + "ce_ib": 4.543309211730957, + "ce_orig": 0.8168278932571411, + "epoch": 0.6441872169099144, + "kl_loss": 0.17696496844291687, + "loss_ib": 0.006312958896160126, + "step": 2240 + }, + { + "ce_ib": 9.161275863647461, + "ce_orig": 1.5103412866592407, + "epoch": 0.6441872169099144, + "kl_loss": 0.28043800592422485, + "loss_ib": 0.011965655721724033, + "step": 2240 + }, + { + "ce_ib": 6.615466117858887, + "ce_orig": 1.2118124961853027, + "epoch": 0.6441872169099144, + "kl_loss": 0.1844298541545868, + "loss_ib": 0.008459764532744884, + "step": 2240 + }, + { + "ce_ib": 3.091930389404297, + "ce_orig": 0.3411165773868561, + "epoch": 0.6441872169099144, + "kl_loss": 0.42432093620300293, + "loss_ib": 0.007335139438509941, + "step": 2240 + }, + { + "ce_ib": 4.408655166625977, + "ce_orig": 0.8881070613861084, + "epoch": 0.644474800488892, + "kl_loss": 0.20451763272285461, + "loss_ib": 0.00645383121445775, + "step": 2241 + }, + { + "ce_ib": 3.4105663299560547, + "ce_orig": 0.563164234161377, + "epoch": 0.644474800488892, + "kl_loss": 0.20554696023464203, + "loss_ib": 0.005466036032885313, + "step": 2241 + }, + { + "ce_ib": 4.918702125549316, + "ce_orig": 0.9418376088142395, + "epoch": 0.644474800488892, + "kl_loss": 0.2818794548511505, + "loss_ib": 0.007737496867775917, + "step": 2241 + }, + { + "ce_ib": 5.287755012512207, + "ce_orig": 0.9695674777030945, + "epoch": 0.644474800488892, + "kl_loss": 0.2167108803987503, + "loss_ib": 0.007454863749444485, + "step": 2241 + }, + { + "ce_ib": 3.216372013092041, + "ce_orig": 0.8663840293884277, + "epoch": 0.6447623840678697, + "kl_loss": 0.22231486439704895, + "loss_ib": 0.005439520813524723, + "step": 2242 + }, + { + "ce_ib": 6.962829113006592, + "ce_orig": 0.6389144659042358, + "epoch": 0.6447623840678697, + "kl_loss": 0.21309678256511688, + "loss_ib": 0.009093796834349632, + "step": 2242 + }, + { + "ce_ib": 3.884773015975952, + "ce_orig": 0.6866126656532288, + "epoch": 0.6447623840678697, + "kl_loss": 0.2649279534816742, + "loss_ib": 0.006534052547067404, + "step": 2242 + }, + { + "ce_ib": 2.7887189388275146, + "ce_orig": 0.6048072576522827, + "epoch": 0.6447623840678697, + "kl_loss": 0.13193631172180176, + "loss_ib": 0.004108082037419081, + "step": 2242 + }, + { + "ce_ib": 4.203516006469727, + "ce_orig": 0.6678962111473083, + "epoch": 0.6450499676468474, + "kl_loss": 0.24637240171432495, + "loss_ib": 0.0066672395914793015, + "step": 2243 + }, + { + "ce_ib": 2.886334180831909, + "ce_orig": 0.7411644458770752, + "epoch": 0.6450499676468474, + "kl_loss": 0.1261531114578247, + "loss_ib": 0.00414786534383893, + "step": 2243 + }, + { + "ce_ib": 4.255670070648193, + "ce_orig": 0.6623680591583252, + "epoch": 0.6450499676468474, + "kl_loss": 0.2253631204366684, + "loss_ib": 0.006509300787001848, + "step": 2243 + }, + { + "ce_ib": 3.619183301925659, + "ce_orig": 0.6642064452171326, + "epoch": 0.6450499676468474, + "kl_loss": 0.12438347935676575, + "loss_ib": 0.004863018169999123, + "step": 2243 + }, + { + "ce_ib": 4.287467956542969, + "ce_orig": 1.1600483655929565, + "epoch": 0.645337551225825, + "kl_loss": 0.16359232366085052, + "loss_ib": 0.005923390854150057, + "step": 2244 + }, + { + "ce_ib": 6.496616363525391, + "ce_orig": 0.845738410949707, + "epoch": 0.645337551225825, + "kl_loss": 0.2528335452079773, + "loss_ib": 0.009024951606988907, + "step": 2244 + }, + { + "ce_ib": 7.880937099456787, + "ce_orig": 1.0474770069122314, + "epoch": 0.645337551225825, + "kl_loss": 0.2722257971763611, + "loss_ib": 0.010603195056319237, + "step": 2244 + }, + { + "ce_ib": 3.8393173217773438, + "ce_orig": 0.7134605646133423, + "epoch": 0.645337551225825, + "kl_loss": 0.10741133987903595, + "loss_ib": 0.0049134306609630585, + "step": 2244 + }, + { + "epoch": 0.6456251348048027, + "grad_norm": 0.12564875185489655, + "learning_rate": 9.133482519034428e-06, + "loss": 0.8549, + "step": 2245 + }, + { + "ce_ib": 6.637200355529785, + "ce_orig": 1.2411879301071167, + "epoch": 0.6456251348048027, + "kl_loss": 0.24967756867408752, + "loss_ib": 0.009133976884186268, + "step": 2245 + }, + { + "ce_ib": 6.066783905029297, + "ce_orig": 1.2708193063735962, + "epoch": 0.6456251348048027, + "kl_loss": 0.15911602973937988, + "loss_ib": 0.0076579442247748375, + "step": 2245 + }, + { + "ce_ib": 4.962345600128174, + "ce_orig": 1.0237606763839722, + "epoch": 0.6456251348048027, + "kl_loss": 0.14082685112953186, + "loss_ib": 0.006370613817125559, + "step": 2245 + }, + { + "ce_ib": 6.208744049072266, + "ce_orig": 0.8410935401916504, + "epoch": 0.6456251348048027, + "kl_loss": 0.23255032300949097, + "loss_ib": 0.008534247055649757, + "step": 2245 + }, + { + "ce_ib": 4.818325519561768, + "ce_orig": 0.5712409615516663, + "epoch": 0.6459127183837803, + "kl_loss": 0.19245558977127075, + "loss_ib": 0.006742881610989571, + "step": 2246 + }, + { + "ce_ib": 4.837373733520508, + "ce_orig": 0.8214397430419922, + "epoch": 0.6459127183837803, + "kl_loss": 0.1572948396205902, + "loss_ib": 0.006410322152078152, + "step": 2246 + }, + { + "ce_ib": 3.3520500659942627, + "ce_orig": 0.49317845702171326, + "epoch": 0.6459127183837803, + "kl_loss": 0.13430660963058472, + "loss_ib": 0.004695116076618433, + "step": 2246 + }, + { + "ce_ib": 7.759151458740234, + "ce_orig": 0.7141404151916504, + "epoch": 0.6459127183837803, + "kl_loss": 0.2492925375699997, + "loss_ib": 0.01025207620114088, + "step": 2246 + }, + { + "ce_ib": 2.7222187519073486, + "ce_orig": 0.786942183971405, + "epoch": 0.6462003019627579, + "kl_loss": 0.15134668350219727, + "loss_ib": 0.004235685802996159, + "step": 2247 + }, + { + "ce_ib": 4.210977077484131, + "ce_orig": 0.8983677625656128, + "epoch": 0.6462003019627579, + "kl_loss": 0.20296208560466766, + "loss_ib": 0.006240597926080227, + "step": 2247 + }, + { + "ce_ib": 6.568497657775879, + "ce_orig": 1.2330660820007324, + "epoch": 0.6462003019627579, + "kl_loss": 0.2589224576950073, + "loss_ib": 0.009157722815871239, + "step": 2247 + }, + { + "ce_ib": 4.430677890777588, + "ce_orig": 0.7657988667488098, + "epoch": 0.6462003019627579, + "kl_loss": 0.18467693030834198, + "loss_ib": 0.00627744710072875, + "step": 2247 + }, + { + "ce_ib": 2.391599655151367, + "ce_orig": 0.5384037494659424, + "epoch": 0.6464878855417355, + "kl_loss": 0.14074201881885529, + "loss_ib": 0.003799020079895854, + "step": 2248 + }, + { + "ce_ib": 4.82369327545166, + "ce_orig": 0.8073243498802185, + "epoch": 0.6464878855417355, + "kl_loss": 0.30961212515830994, + "loss_ib": 0.007919814437627792, + "step": 2248 + }, + { + "ce_ib": 2.0225911140441895, + "ce_orig": 0.3172788918018341, + "epoch": 0.6464878855417355, + "kl_loss": 0.3762037754058838, + "loss_ib": 0.005784628912806511, + "step": 2248 + }, + { + "ce_ib": 3.1631970405578613, + "ce_orig": 0.31016409397125244, + "epoch": 0.6464878855417355, + "kl_loss": 0.30518269538879395, + "loss_ib": 0.006215023808181286, + "step": 2248 + }, + { + "ce_ib": 4.748915195465088, + "ce_orig": 0.6384258270263672, + "epoch": 0.6467754691207132, + "kl_loss": 0.24335654079914093, + "loss_ib": 0.007182480301707983, + "step": 2249 + }, + { + "ce_ib": 5.963736057281494, + "ce_orig": 1.0780372619628906, + "epoch": 0.6467754691207132, + "kl_loss": 0.19202247262001038, + "loss_ib": 0.00788396131247282, + "step": 2249 + }, + { + "ce_ib": 4.247345447540283, + "ce_orig": 1.1208248138427734, + "epoch": 0.6467754691207132, + "kl_loss": 0.16707342863082886, + "loss_ib": 0.005918079521507025, + "step": 2249 + }, + { + "ce_ib": 6.135866165161133, + "ce_orig": 1.1953470706939697, + "epoch": 0.6467754691207132, + "kl_loss": 0.1827436089515686, + "loss_ib": 0.007963301613926888, + "step": 2249 + }, + { + "epoch": 0.6470630526996909, + "grad_norm": 0.11626575142145157, + "learning_rate": 9.129110913181781e-06, + "loss": 0.8554, + "step": 2250 + }, + { + "ce_ib": 3.955518960952759, + "ce_orig": 0.7211806774139404, + "epoch": 0.6470630526996909, + "kl_loss": 0.2496684193611145, + "loss_ib": 0.006452202796936035, + "step": 2250 + }, + { + "ce_ib": 5.705596923828125, + "ce_orig": 1.0855046510696411, + "epoch": 0.6470630526996909, + "kl_loss": 0.30518245697021484, + "loss_ib": 0.008757420815527439, + "step": 2250 + }, + { + "ce_ib": 3.1378843784332275, + "ce_orig": 0.7174537181854248, + "epoch": 0.6470630526996909, + "kl_loss": 0.17021341621875763, + "loss_ib": 0.004840018693357706, + "step": 2250 + }, + { + "ce_ib": 4.392123699188232, + "ce_orig": 0.7461744546890259, + "epoch": 0.6470630526996909, + "kl_loss": 0.30186840891838074, + "loss_ib": 0.0074108075350522995, + "step": 2250 + }, + { + "ce_ib": 5.376955509185791, + "ce_orig": 0.7827103137969971, + "epoch": 0.6473506362786685, + "kl_loss": 0.21616169810295105, + "loss_ib": 0.007538572419434786, + "step": 2251 + }, + { + "ce_ib": 6.7061357498168945, + "ce_orig": 1.665534496307373, + "epoch": 0.6473506362786685, + "kl_loss": 0.23187361657619476, + "loss_ib": 0.009024872444570065, + "step": 2251 + }, + { + "ce_ib": 5.407220363616943, + "ce_orig": 0.5845106840133667, + "epoch": 0.6473506362786685, + "kl_loss": 0.29989856481552124, + "loss_ib": 0.008406206034123898, + "step": 2251 + }, + { + "ce_ib": 4.57041072845459, + "ce_orig": 0.9693235754966736, + "epoch": 0.6473506362786685, + "kl_loss": 0.19489061832427979, + "loss_ib": 0.00651931669563055, + "step": 2251 + }, + { + "ce_ib": 6.327913284301758, + "ce_orig": 1.2742117643356323, + "epoch": 0.6476382198576461, + "kl_loss": 0.14553216099739075, + "loss_ib": 0.007783235050737858, + "step": 2252 + }, + { + "ce_ib": 2.6556529998779297, + "ce_orig": 0.32272765040397644, + "epoch": 0.6476382198576461, + "kl_loss": 0.25656867027282715, + "loss_ib": 0.005221339408308268, + "step": 2252 + }, + { + "ce_ib": 2.681532859802246, + "ce_orig": 0.4519537389278412, + "epoch": 0.6476382198576461, + "kl_loss": 0.1598626971244812, + "loss_ib": 0.004280159715563059, + "step": 2252 + }, + { + "ce_ib": 5.681432723999023, + "ce_orig": 0.924653947353363, + "epoch": 0.6476382198576461, + "kl_loss": 0.22507384419441223, + "loss_ib": 0.00793217122554779, + "step": 2252 + }, + { + "ce_ib": 4.36812162399292, + "ce_orig": 0.7782438397407532, + "epoch": 0.6479258034366238, + "kl_loss": 0.12868894636631012, + "loss_ib": 0.00565501069650054, + "step": 2253 + }, + { + "ce_ib": 6.5291290283203125, + "ce_orig": 1.069020390510559, + "epoch": 0.6479258034366238, + "kl_loss": 0.22031882405281067, + "loss_ib": 0.008732317946851254, + "step": 2253 + }, + { + "ce_ib": 5.36151123046875, + "ce_orig": 0.7163639068603516, + "epoch": 0.6479258034366238, + "kl_loss": 0.1564219892024994, + "loss_ib": 0.006925730500370264, + "step": 2253 + }, + { + "ce_ib": 2.17145037651062, + "ce_orig": 0.506192147731781, + "epoch": 0.6479258034366238, + "kl_loss": 0.13293686509132385, + "loss_ib": 0.003500819206237793, + "step": 2253 + }, + { + "ce_ib": 4.0031023025512695, + "ce_orig": 0.6139377951622009, + "epoch": 0.6482133870156014, + "kl_loss": 0.27738460898399353, + "loss_ib": 0.00677694845944643, + "step": 2254 + }, + { + "ce_ib": 5.048296928405762, + "ce_orig": 1.0689398050308228, + "epoch": 0.6482133870156014, + "kl_loss": 0.2553049921989441, + "loss_ib": 0.00760134682059288, + "step": 2254 + }, + { + "ce_ib": 4.395095348358154, + "ce_orig": 1.1098370552062988, + "epoch": 0.6482133870156014, + "kl_loss": 0.12031193822622299, + "loss_ib": 0.0055982149206101894, + "step": 2254 + }, + { + "ce_ib": 4.8862385749816895, + "ce_orig": 0.6202949285507202, + "epoch": 0.6482133870156014, + "kl_loss": 0.26532214879989624, + "loss_ib": 0.007539459969848394, + "step": 2254 + }, + { + "epoch": 0.648500970594579, + "grad_norm": 0.13130851089954376, + "learning_rate": 9.124729359342556e-06, + "loss": 0.8401, + "step": 2255 + }, + { + "ce_ib": 6.721100330352783, + "ce_orig": 1.352034091949463, + "epoch": 0.648500970594579, + "kl_loss": 0.21674489974975586, + "loss_ib": 0.00888854917138815, + "step": 2255 + }, + { + "ce_ib": 5.685616493225098, + "ce_orig": 0.4006801247596741, + "epoch": 0.648500970594579, + "kl_loss": 0.32675042748451233, + "loss_ib": 0.008953120559453964, + "step": 2255 + }, + { + "ce_ib": 6.937414646148682, + "ce_orig": 1.3048559427261353, + "epoch": 0.648500970594579, + "kl_loss": 0.22833287715911865, + "loss_ib": 0.009220743551850319, + "step": 2255 + }, + { + "ce_ib": 3.5109612941741943, + "ce_orig": 0.5779734253883362, + "epoch": 0.648500970594579, + "kl_loss": 0.221551775932312, + "loss_ib": 0.00572647899389267, + "step": 2255 + }, + { + "ce_ib": 3.522029161453247, + "ce_orig": 0.5473127961158752, + "epoch": 0.6487885541735567, + "kl_loss": 0.24041630327701569, + "loss_ib": 0.0059261922724545, + "step": 2256 + }, + { + "ce_ib": 9.644031524658203, + "ce_orig": 1.5152044296264648, + "epoch": 0.6487885541735567, + "kl_loss": 0.15143217146396637, + "loss_ib": 0.011158352717757225, + "step": 2256 + }, + { + "ce_ib": 4.903205871582031, + "ce_orig": 0.7695021033287048, + "epoch": 0.6487885541735567, + "kl_loss": 0.20325185358524323, + "loss_ib": 0.006935724522918463, + "step": 2256 + }, + { + "ce_ib": 5.176592826843262, + "ce_orig": 1.023983120918274, + "epoch": 0.6487885541735567, + "kl_loss": 0.17058387398719788, + "loss_ib": 0.006882431451231241, + "step": 2256 + }, + { + "ce_ib": 3.8008506298065186, + "ce_orig": 0.6973961591720581, + "epoch": 0.6490761377525344, + "kl_loss": 0.2552768588066101, + "loss_ib": 0.006353619042783976, + "step": 2257 + }, + { + "ce_ib": 2.8841559886932373, + "ce_orig": 0.5467620491981506, + "epoch": 0.6490761377525344, + "kl_loss": 0.17127463221549988, + "loss_ib": 0.004596902523189783, + "step": 2257 + }, + { + "ce_ib": 3.021038293838501, + "ce_orig": 0.6342318654060364, + "epoch": 0.6490761377525344, + "kl_loss": 0.1151144951581955, + "loss_ib": 0.004172183107584715, + "step": 2257 + }, + { + "ce_ib": 4.881259918212891, + "ce_orig": 1.024870753288269, + "epoch": 0.6490761377525344, + "kl_loss": 0.17035000026226044, + "loss_ib": 0.0065847598016262054, + "step": 2257 + }, + { + "ce_ib": 3.8067638874053955, + "ce_orig": 0.8056713342666626, + "epoch": 0.649363721331512, + "kl_loss": 0.1606387346982956, + "loss_ib": 0.00541315134614706, + "step": 2258 + }, + { + "ce_ib": 6.6613569259643555, + "ce_orig": 1.2579281330108643, + "epoch": 0.649363721331512, + "kl_loss": 0.1497056931257248, + "loss_ib": 0.008158414624631405, + "step": 2258 + }, + { + "ce_ib": 4.672719955444336, + "ce_orig": 0.6233908534049988, + "epoch": 0.649363721331512, + "kl_loss": 0.3078916072845459, + "loss_ib": 0.007751636207103729, + "step": 2258 + }, + { + "ce_ib": 4.652003765106201, + "ce_orig": 0.6957845091819763, + "epoch": 0.649363721331512, + "kl_loss": 0.14288872480392456, + "loss_ib": 0.006080890540033579, + "step": 2258 + }, + { + "ce_ib": 5.235818386077881, + "ce_orig": 0.9623026847839355, + "epoch": 0.6496513049104896, + "kl_loss": 0.15480844676494598, + "loss_ib": 0.006783903110772371, + "step": 2259 + }, + { + "ce_ib": 7.471147537231445, + "ce_orig": 1.0113636255264282, + "epoch": 0.6496513049104896, + "kl_loss": 0.28058916330337524, + "loss_ib": 0.010277039371430874, + "step": 2259 + }, + { + "ce_ib": 5.677246570587158, + "ce_orig": 1.3297709226608276, + "epoch": 0.6496513049104896, + "kl_loss": 0.16554579138755798, + "loss_ib": 0.007332704029977322, + "step": 2259 + }, + { + "ce_ib": 3.4067184925079346, + "ce_orig": 0.5593897104263306, + "epoch": 0.6496513049104896, + "kl_loss": 0.15655125677585602, + "loss_ib": 0.004972231108695269, + "step": 2259 + }, + { + "epoch": 0.6499388884894672, + "grad_norm": 0.12662167847156525, + "learning_rate": 9.120337868072933e-06, + "loss": 0.8564, + "step": 2260 + }, + { + "ce_ib": 3.483961582183838, + "ce_orig": 0.9890021085739136, + "epoch": 0.6499388884894672, + "kl_loss": 0.14795762300491333, + "loss_ib": 0.0049635376781225204, + "step": 2260 + }, + { + "ce_ib": 3.515713930130005, + "ce_orig": 0.8011900186538696, + "epoch": 0.6499388884894672, + "kl_loss": 0.20832431316375732, + "loss_ib": 0.005598957184702158, + "step": 2260 + }, + { + "ce_ib": 3.5883865356445312, + "ce_orig": 0.5746553540229797, + "epoch": 0.6499388884894672, + "kl_loss": 0.27696043252944946, + "loss_ib": 0.006357990205287933, + "step": 2260 + }, + { + "ce_ib": 3.6470983028411865, + "ce_orig": 0.6424703001976013, + "epoch": 0.6499388884894672, + "kl_loss": 0.2583951950073242, + "loss_ib": 0.006231050472706556, + "step": 2260 + }, + { + "ce_ib": 5.479825973510742, + "ce_orig": 0.8377441167831421, + "epoch": 0.6502264720684449, + "kl_loss": 0.24956056475639343, + "loss_ib": 0.00797543115913868, + "step": 2261 + }, + { + "ce_ib": 5.23044490814209, + "ce_orig": 0.5936237573623657, + "epoch": 0.6502264720684449, + "kl_loss": 0.22531592845916748, + "loss_ib": 0.007483604364097118, + "step": 2261 + }, + { + "ce_ib": 4.040580749511719, + "ce_orig": 0.48740649223327637, + "epoch": 0.6502264720684449, + "kl_loss": 0.2741675078868866, + "loss_ib": 0.0067822556011378765, + "step": 2261 + }, + { + "ce_ib": 2.9189600944519043, + "ce_orig": 0.6496787667274475, + "epoch": 0.6502264720684449, + "kl_loss": 0.18401286005973816, + "loss_ib": 0.00475908862426877, + "step": 2261 + }, + { + "ce_ib": 7.004356384277344, + "ce_orig": 1.3017712831497192, + "epoch": 0.6505140556474225, + "kl_loss": 0.16769982874393463, + "loss_ib": 0.008681354112923145, + "step": 2262 + }, + { + "ce_ib": 3.6684107780456543, + "ce_orig": 0.5158635973930359, + "epoch": 0.6505140556474225, + "kl_loss": 0.2022833228111267, + "loss_ib": 0.005691243801265955, + "step": 2262 + }, + { + "ce_ib": 5.419810771942139, + "ce_orig": 0.9490334987640381, + "epoch": 0.6505140556474225, + "kl_loss": 0.19168806076049805, + "loss_ib": 0.007336691487580538, + "step": 2262 + }, + { + "ce_ib": 4.413972854614258, + "ce_orig": 0.5576635003089905, + "epoch": 0.6505140556474225, + "kl_loss": 0.2515392303466797, + "loss_ib": 0.006929365452378988, + "step": 2262 + }, + { + "ce_ib": 4.328033447265625, + "ce_orig": 0.6746641993522644, + "epoch": 0.6508016392264002, + "kl_loss": 0.21490831673145294, + "loss_ib": 0.006477116607129574, + "step": 2263 + }, + { + "ce_ib": 3.7582409381866455, + "ce_orig": 0.5193226933479309, + "epoch": 0.6508016392264002, + "kl_loss": 0.20581883192062378, + "loss_ib": 0.0058164289221167564, + "step": 2263 + }, + { + "ce_ib": 8.32127857208252, + "ce_orig": 1.6226961612701416, + "epoch": 0.6508016392264002, + "kl_loss": 0.27379170060157776, + "loss_ib": 0.01105919573456049, + "step": 2263 + }, + { + "ce_ib": 4.299564361572266, + "ce_orig": 0.7232734560966492, + "epoch": 0.6508016392264002, + "kl_loss": 0.21898016333580017, + "loss_ib": 0.006489366292953491, + "step": 2263 + }, + { + "ce_ib": 5.640127182006836, + "ce_orig": 1.190728783607483, + "epoch": 0.6510892228053778, + "kl_loss": 0.18460053205490112, + "loss_ib": 0.007486132439225912, + "step": 2264 + }, + { + "ce_ib": 5.7644944190979, + "ce_orig": 0.7370679378509521, + "epoch": 0.6510892228053778, + "kl_loss": 0.18890826404094696, + "loss_ib": 0.0076535772532224655, + "step": 2264 + }, + { + "ce_ib": 5.198642253875732, + "ce_orig": 0.6990883350372314, + "epoch": 0.6510892228053778, + "kl_loss": 0.24586433172225952, + "loss_ib": 0.007657285779714584, + "step": 2264 + }, + { + "ce_ib": 6.454962253570557, + "ce_orig": 0.8647112846374512, + "epoch": 0.6510892228053778, + "kl_loss": 0.2419222742319107, + "loss_ib": 0.00887418445199728, + "step": 2264 + }, + { + "epoch": 0.6513768063843555, + "grad_norm": 0.11244803667068481, + "learning_rate": 9.115936449953036e-06, + "loss": 0.8469, + "step": 2265 + }, + { + "ce_ib": 3.9171080589294434, + "ce_orig": 0.6864652633666992, + "epoch": 0.6513768063843555, + "kl_loss": 0.24665439128875732, + "loss_ib": 0.006383651867508888, + "step": 2265 + }, + { + "ce_ib": 6.049743175506592, + "ce_orig": 0.9248459339141846, + "epoch": 0.6513768063843555, + "kl_loss": 0.29479116201400757, + "loss_ib": 0.008997654542326927, + "step": 2265 + }, + { + "ce_ib": 4.566906452178955, + "ce_orig": 0.7267240881919861, + "epoch": 0.6513768063843555, + "kl_loss": 0.15579795837402344, + "loss_ib": 0.006124886218458414, + "step": 2265 + }, + { + "ce_ib": 5.593707084655762, + "ce_orig": 0.8196239471435547, + "epoch": 0.6513768063843555, + "kl_loss": 0.23697367310523987, + "loss_ib": 0.007963444106280804, + "step": 2265 + }, + { + "ce_ib": 5.442676544189453, + "ce_orig": 0.9030793905258179, + "epoch": 0.6516643899633331, + "kl_loss": 0.18283693492412567, + "loss_ib": 0.007271045818924904, + "step": 2266 + }, + { + "ce_ib": 5.453946590423584, + "ce_orig": 0.9420092701911926, + "epoch": 0.6516643899633331, + "kl_loss": 0.2290283590555191, + "loss_ib": 0.0077442298643291, + "step": 2266 + }, + { + "ce_ib": 3.4726572036743164, + "ce_orig": 0.7561721205711365, + "epoch": 0.6516643899633331, + "kl_loss": 0.20391522347927094, + "loss_ib": 0.005511809606105089, + "step": 2266 + }, + { + "ce_ib": 5.099103927612305, + "ce_orig": 1.426679015159607, + "epoch": 0.6516643899633331, + "kl_loss": 0.19550392031669617, + "loss_ib": 0.007054142653942108, + "step": 2266 + }, + { + "ce_ib": 5.264751434326172, + "ce_orig": 0.8312174081802368, + "epoch": 0.6519519735423107, + "kl_loss": 0.2500390410423279, + "loss_ib": 0.007765141781419516, + "step": 2267 + }, + { + "ce_ib": 7.719354152679443, + "ce_orig": 1.5774320363998413, + "epoch": 0.6519519735423107, + "kl_loss": 0.2847979664802551, + "loss_ib": 0.010567333549261093, + "step": 2267 + }, + { + "ce_ib": 3.4195444583892822, + "ce_orig": 0.6064673066139221, + "epoch": 0.6519519735423107, + "kl_loss": 0.21362721920013428, + "loss_ib": 0.005555816926062107, + "step": 2267 + }, + { + "ce_ib": 6.156761169433594, + "ce_orig": 0.6228665113449097, + "epoch": 0.6519519735423107, + "kl_loss": 0.28636229038238525, + "loss_ib": 0.009020383469760418, + "step": 2267 + }, + { + "ce_ib": 6.97183895111084, + "ce_orig": 0.8427409529685974, + "epoch": 0.6522395571212883, + "kl_loss": 0.20738546550273895, + "loss_ib": 0.009045694023370743, + "step": 2268 + }, + { + "ce_ib": 4.626084804534912, + "ce_orig": 0.9060959219932556, + "epoch": 0.6522395571212883, + "kl_loss": 0.2292974293231964, + "loss_ib": 0.006919058971107006, + "step": 2268 + }, + { + "ce_ib": 6.670544624328613, + "ce_orig": 1.3138222694396973, + "epoch": 0.6522395571212883, + "kl_loss": 0.2316010594367981, + "loss_ib": 0.00898655503988266, + "step": 2268 + }, + { + "ce_ib": 4.2746901512146, + "ce_orig": 0.9855881929397583, + "epoch": 0.6522395571212883, + "kl_loss": 0.20798909664154053, + "loss_ib": 0.006354581099003553, + "step": 2268 + }, + { + "ce_ib": 4.882701873779297, + "ce_orig": 0.9565039277076721, + "epoch": 0.652527140700266, + "kl_loss": 0.14919501543045044, + "loss_ib": 0.006374652031809092, + "step": 2269 + }, + { + "ce_ib": 5.353384494781494, + "ce_orig": 0.8473581671714783, + "epoch": 0.652527140700266, + "kl_loss": 0.2696492671966553, + "loss_ib": 0.008049877360463142, + "step": 2269 + }, + { + "ce_ib": 7.44644832611084, + "ce_orig": 1.6524927616119385, + "epoch": 0.652527140700266, + "kl_loss": 0.16521842777729034, + "loss_ib": 0.009098632261157036, + "step": 2269 + }, + { + "ce_ib": 4.213702201843262, + "ce_orig": 0.9332996606826782, + "epoch": 0.652527140700266, + "kl_loss": 0.1758432537317276, + "loss_ib": 0.005972134880721569, + "step": 2269 + }, + { + "epoch": 0.6528147242792437, + "grad_norm": 0.12635977566242218, + "learning_rate": 9.111525115586901e-06, + "loss": 0.91, + "step": 2270 + }, + { + "ce_ib": 5.812165260314941, + "ce_orig": 1.0928884744644165, + "epoch": 0.6528147242792437, + "kl_loss": 0.147163525223732, + "loss_ib": 0.007283800281584263, + "step": 2270 + }, + { + "ce_ib": 4.52280330657959, + "ce_orig": 0.8002405762672424, + "epoch": 0.6528147242792437, + "kl_loss": 0.24009452760219574, + "loss_ib": 0.006923748645931482, + "step": 2270 + }, + { + "ce_ib": 3.2163827419281006, + "ce_orig": 0.621583878993988, + "epoch": 0.6528147242792437, + "kl_loss": 0.15160441398620605, + "loss_ib": 0.004732426721602678, + "step": 2270 + }, + { + "ce_ib": 5.036918640136719, + "ce_orig": 1.0962857007980347, + "epoch": 0.6528147242792437, + "kl_loss": 0.23822124302387238, + "loss_ib": 0.007419130764901638, + "step": 2270 + }, + { + "ce_ib": 3.195239782333374, + "ce_orig": 0.8773270845413208, + "epoch": 0.6531023078582213, + "kl_loss": 0.19888979196548462, + "loss_ib": 0.005184137728065252, + "step": 2271 + }, + { + "ce_ib": 6.457829475402832, + "ce_orig": 1.3103928565979004, + "epoch": 0.6531023078582213, + "kl_loss": 0.27166053652763367, + "loss_ib": 0.009174434468150139, + "step": 2271 + }, + { + "ce_ib": 6.66159200668335, + "ce_orig": 1.225464940071106, + "epoch": 0.6531023078582213, + "kl_loss": 0.15735968947410583, + "loss_ib": 0.008235188201069832, + "step": 2271 + }, + { + "ce_ib": 4.808085918426514, + "ce_orig": 0.5937879681587219, + "epoch": 0.6531023078582213, + "kl_loss": 0.19018517434597015, + "loss_ib": 0.00670993747189641, + "step": 2271 + }, + { + "ce_ib": 4.717358589172363, + "ce_orig": 0.7907701730728149, + "epoch": 0.6533898914371989, + "kl_loss": 0.15580663084983826, + "loss_ib": 0.006275424733757973, + "step": 2272 + }, + { + "ce_ib": 4.552402973175049, + "ce_orig": 1.0680454969406128, + "epoch": 0.6533898914371989, + "kl_loss": 0.19898721575737, + "loss_ib": 0.0065422747284173965, + "step": 2272 + }, + { + "ce_ib": 5.202406406402588, + "ce_orig": 1.1133003234863281, + "epoch": 0.6533898914371989, + "kl_loss": 0.18520894646644592, + "loss_ib": 0.007054495625197887, + "step": 2272 + }, + { + "ce_ib": 3.4599199295043945, + "ce_orig": 0.7913157343864441, + "epoch": 0.6533898914371989, + "kl_loss": 0.2094898223876953, + "loss_ib": 0.005554817616939545, + "step": 2272 + }, + { + "ce_ib": 3.54768705368042, + "ce_orig": 0.7002692818641663, + "epoch": 0.6536774750161766, + "kl_loss": 0.2366122454404831, + "loss_ib": 0.005913809407502413, + "step": 2273 + }, + { + "ce_ib": 3.665952444076538, + "ce_orig": 0.8548576235771179, + "epoch": 0.6536774750161766, + "kl_loss": 0.1414390355348587, + "loss_ib": 0.005080342758446932, + "step": 2273 + }, + { + "ce_ib": 7.427947044372559, + "ce_orig": 1.5475941896438599, + "epoch": 0.6536774750161766, + "kl_loss": 0.2165505439043045, + "loss_ib": 0.009593452326953411, + "step": 2273 + }, + { + "ce_ib": 5.823622226715088, + "ce_orig": 0.9029852151870728, + "epoch": 0.6536774750161766, + "kl_loss": 0.23918765783309937, + "loss_ib": 0.008215499110519886, + "step": 2273 + }, + { + "ce_ib": 3.63211989402771, + "ce_orig": 0.6129016280174255, + "epoch": 0.6539650585951542, + "kl_loss": 0.18752413988113403, + "loss_ib": 0.005507361143827438, + "step": 2274 + }, + { + "ce_ib": 3.483790159225464, + "ce_orig": 0.5521928668022156, + "epoch": 0.6539650585951542, + "kl_loss": 0.21330544352531433, + "loss_ib": 0.00561684463173151, + "step": 2274 + }, + { + "ce_ib": 3.352748155593872, + "ce_orig": 0.8112760186195374, + "epoch": 0.6539650585951542, + "kl_loss": 0.18040809035301208, + "loss_ib": 0.00515682902187109, + "step": 2274 + }, + { + "ce_ib": 8.536733627319336, + "ce_orig": 1.8604228496551514, + "epoch": 0.6539650585951542, + "kl_loss": 0.7127609252929688, + "loss_ib": 0.015664342790842056, + "step": 2274 + }, + { + "epoch": 0.6542526421741318, + "grad_norm": 0.11538825184106827, + "learning_rate": 9.107103875602458e-06, + "loss": 0.8825, + "step": 2275 + }, + { + "ce_ib": 5.1151299476623535, + "ce_orig": 0.488956093788147, + "epoch": 0.6542526421741318, + "kl_loss": 0.22045278549194336, + "loss_ib": 0.007319657597690821, + "step": 2275 + }, + { + "ce_ib": 4.602018356323242, + "ce_orig": 0.9235979318618774, + "epoch": 0.6542526421741318, + "kl_loss": 0.21172873675823212, + "loss_ib": 0.00671930518001318, + "step": 2275 + }, + { + "ce_ib": 5.10174560546875, + "ce_orig": 0.7535123825073242, + "epoch": 0.6542526421741318, + "kl_loss": 0.20969155430793762, + "loss_ib": 0.00719866156578064, + "step": 2275 + }, + { + "ce_ib": 6.449713706970215, + "ce_orig": 0.9579578638076782, + "epoch": 0.6542526421741318, + "kl_loss": 0.26221269369125366, + "loss_ib": 0.009071840904653072, + "step": 2275 + }, + { + "ce_ib": 4.284127712249756, + "ce_orig": 0.7207916378974915, + "epoch": 0.6545402257531096, + "kl_loss": 0.12328524142503738, + "loss_ib": 0.005516980309039354, + "step": 2276 + }, + { + "ce_ib": 5.419441223144531, + "ce_orig": 0.5377898812294006, + "epoch": 0.6545402257531096, + "kl_loss": 0.1781124770641327, + "loss_ib": 0.007200566586107016, + "step": 2276 + }, + { + "ce_ib": 5.226006984710693, + "ce_orig": 0.7674787044525146, + "epoch": 0.6545402257531096, + "kl_loss": 0.2415277659893036, + "loss_ib": 0.0076412842608988285, + "step": 2276 + }, + { + "ce_ib": 5.263702392578125, + "ce_orig": 1.0727978944778442, + "epoch": 0.6545402257531096, + "kl_loss": 0.24408626556396484, + "loss_ib": 0.007704564835876226, + "step": 2276 + }, + { + "ce_ib": 5.449596405029297, + "ce_orig": 0.7077871561050415, + "epoch": 0.6548278093320872, + "kl_loss": 0.19660833477973938, + "loss_ib": 0.0074156797491014, + "step": 2277 + }, + { + "ce_ib": 3.4373152256011963, + "ce_orig": 0.659400224685669, + "epoch": 0.6548278093320872, + "kl_loss": 0.19523224234580994, + "loss_ib": 0.005389637779444456, + "step": 2277 + }, + { + "ce_ib": 2.983494758605957, + "ce_orig": 0.4473530352115631, + "epoch": 0.6548278093320872, + "kl_loss": 0.16390661895275116, + "loss_ib": 0.0046225604601204395, + "step": 2277 + }, + { + "ce_ib": 5.935503959655762, + "ce_orig": 1.0860085487365723, + "epoch": 0.6548278093320872, + "kl_loss": 0.17695719003677368, + "loss_ib": 0.007705075666308403, + "step": 2277 + }, + { + "ce_ib": 2.9383726119995117, + "ce_orig": 0.595731258392334, + "epoch": 0.6551153929110648, + "kl_loss": 0.1811446249485016, + "loss_ib": 0.004749818705022335, + "step": 2278 + }, + { + "ce_ib": 4.4217729568481445, + "ce_orig": 0.7559758424758911, + "epoch": 0.6551153929110648, + "kl_loss": 0.17565125226974487, + "loss_ib": 0.006178285460919142, + "step": 2278 + }, + { + "ce_ib": 3.3670401573181152, + "ce_orig": 0.6416783928871155, + "epoch": 0.6551153929110648, + "kl_loss": 0.2297191172838211, + "loss_ib": 0.00566423125565052, + "step": 2278 + }, + { + "ce_ib": 5.8523993492126465, + "ce_orig": 0.9604541063308716, + "epoch": 0.6551153929110648, + "kl_loss": 0.2466244399547577, + "loss_ib": 0.00831864308565855, + "step": 2278 + }, + { + "ce_ib": 4.521697521209717, + "ce_orig": 0.7972961068153381, + "epoch": 0.6554029764900424, + "kl_loss": 0.22596532106399536, + "loss_ib": 0.00678135035559535, + "step": 2279 + }, + { + "ce_ib": 2.5839197635650635, + "ce_orig": 0.7831220626831055, + "epoch": 0.6554029764900424, + "kl_loss": 0.13902916014194489, + "loss_ib": 0.003974210936576128, + "step": 2279 + }, + { + "ce_ib": 5.67334508895874, + "ce_orig": 1.3843718767166138, + "epoch": 0.6554029764900424, + "kl_loss": 0.265225350856781, + "loss_ib": 0.008325598202645779, + "step": 2279 + }, + { + "ce_ib": 6.482242584228516, + "ce_orig": 0.8738701343536377, + "epoch": 0.6554029764900424, + "kl_loss": 0.23340937495231628, + "loss_ib": 0.008816336281597614, + "step": 2279 + }, + { + "epoch": 0.65569056006902, + "grad_norm": 0.13038480281829834, + "learning_rate": 9.1026727406515e-06, + "loss": 0.8362, + "step": 2280 + }, + { + "ce_ib": 3.797309637069702, + "ce_orig": 0.6048603653907776, + "epoch": 0.65569056006902, + "kl_loss": 0.19496099650859833, + "loss_ib": 0.005746919196099043, + "step": 2280 + }, + { + "ce_ib": 7.111101150512695, + "ce_orig": 1.4631901979446411, + "epoch": 0.65569056006902, + "kl_loss": 0.24587488174438477, + "loss_ib": 0.009569849818944931, + "step": 2280 + }, + { + "ce_ib": 6.699181079864502, + "ce_orig": 1.0768893957138062, + "epoch": 0.65569056006902, + "kl_loss": 0.1425737589597702, + "loss_ib": 0.008124918676912785, + "step": 2280 + }, + { + "ce_ib": 3.0598464012145996, + "ce_orig": 0.4842908978462219, + "epoch": 0.65569056006902, + "kl_loss": 0.2532777190208435, + "loss_ib": 0.005592623725533485, + "step": 2280 + }, + { + "ce_ib": 6.48867130279541, + "ce_orig": 1.114367961883545, + "epoch": 0.6559781436479977, + "kl_loss": 0.20553603768348694, + "loss_ib": 0.008544031530618668, + "step": 2281 + }, + { + "ce_ib": 5.120079517364502, + "ce_orig": 0.6403471827507019, + "epoch": 0.6559781436479977, + "kl_loss": 0.21486327052116394, + "loss_ib": 0.007268712390214205, + "step": 2281 + }, + { + "ce_ib": 4.73817777633667, + "ce_orig": 1.083737850189209, + "epoch": 0.6559781436479977, + "kl_loss": 0.2323836237192154, + "loss_ib": 0.007062014192342758, + "step": 2281 + }, + { + "ce_ib": 2.2608022689819336, + "ce_orig": 0.5298892855644226, + "epoch": 0.6559781436479977, + "kl_loss": 0.1687333881855011, + "loss_ib": 0.003948135767132044, + "step": 2281 + }, + { + "ce_ib": 2.21573805809021, + "ce_orig": 0.4007877707481384, + "epoch": 0.6562657272269753, + "kl_loss": 0.31912633776664734, + "loss_ib": 0.005407001357525587, + "step": 2282 + }, + { + "ce_ib": 5.524689197540283, + "ce_orig": 0.5071929693222046, + "epoch": 0.6562657272269753, + "kl_loss": 0.456214964389801, + "loss_ib": 0.010086839087307453, + "step": 2282 + }, + { + "ce_ib": 6.743049621582031, + "ce_orig": 1.356688380241394, + "epoch": 0.6562657272269753, + "kl_loss": 0.18109339475631714, + "loss_ib": 0.008553983643651009, + "step": 2282 + }, + { + "ce_ib": 8.001782417297363, + "ce_orig": 1.5407428741455078, + "epoch": 0.6562657272269753, + "kl_loss": 0.30566996335983276, + "loss_ib": 0.01105848141014576, + "step": 2282 + }, + { + "ce_ib": 3.1452620029449463, + "ce_orig": 0.8282737731933594, + "epoch": 0.656553310805953, + "kl_loss": 0.1557915210723877, + "loss_ib": 0.004703177139163017, + "step": 2283 + }, + { + "ce_ib": 6.547390460968018, + "ce_orig": 1.3769874572753906, + "epoch": 0.656553310805953, + "kl_loss": 0.2881268262863159, + "loss_ib": 0.0094286585226655, + "step": 2283 + }, + { + "ce_ib": 4.003195762634277, + "ce_orig": 0.748750627040863, + "epoch": 0.656553310805953, + "kl_loss": 0.192085400223732, + "loss_ib": 0.00592404929921031, + "step": 2283 + }, + { + "ce_ib": 6.139941692352295, + "ce_orig": 0.9976292252540588, + "epoch": 0.656553310805953, + "kl_loss": 0.1992945671081543, + "loss_ib": 0.008132887072861195, + "step": 2283 + }, + { + "ce_ib": 3.6579947471618652, + "ce_orig": 0.6098557710647583, + "epoch": 0.6568408943849307, + "kl_loss": 0.24097001552581787, + "loss_ib": 0.0060676950961351395, + "step": 2284 + }, + { + "ce_ib": 4.688565254211426, + "ce_orig": 0.6419979929924011, + "epoch": 0.6568408943849307, + "kl_loss": 0.25667375326156616, + "loss_ib": 0.0072553022764623165, + "step": 2284 + }, + { + "ce_ib": 4.626008987426758, + "ce_orig": 0.8725230693817139, + "epoch": 0.6568408943849307, + "kl_loss": 0.18151512742042542, + "loss_ib": 0.006441160105168819, + "step": 2284 + }, + { + "ce_ib": 2.6526167392730713, + "ce_orig": 0.54185950756073, + "epoch": 0.6568408943849307, + "kl_loss": 0.2597923278808594, + "loss_ib": 0.005250539630651474, + "step": 2284 + }, + { + "epoch": 0.6571284779639083, + "grad_norm": 0.15330269932746887, + "learning_rate": 9.098231721409659e-06, + "loss": 0.8278, + "step": 2285 + }, + { + "ce_ib": 8.044528007507324, + "ce_orig": 1.8005108833312988, + "epoch": 0.6571284779639083, + "kl_loss": 0.21400517225265503, + "loss_ib": 0.010184579528868198, + "step": 2285 + }, + { + "ce_ib": 4.781381130218506, + "ce_orig": 1.0841262340545654, + "epoch": 0.6571284779639083, + "kl_loss": 0.19825077056884766, + "loss_ib": 0.006763888522982597, + "step": 2285 + }, + { + "ce_ib": 5.006478786468506, + "ce_orig": 0.7233791351318359, + "epoch": 0.6571284779639083, + "kl_loss": 0.2184155434370041, + "loss_ib": 0.007190634496510029, + "step": 2285 + }, + { + "ce_ib": 4.638318061828613, + "ce_orig": 0.9480195641517639, + "epoch": 0.6571284779639083, + "kl_loss": 0.17835699021816254, + "loss_ib": 0.006421887781471014, + "step": 2285 + }, + { + "ce_ib": 3.09930682182312, + "ce_orig": 0.5251554846763611, + "epoch": 0.6574160615428859, + "kl_loss": 0.17313343286514282, + "loss_ib": 0.004830641206353903, + "step": 2286 + }, + { + "ce_ib": 3.9313840866088867, + "ce_orig": 0.46815305948257446, + "epoch": 0.6574160615428859, + "kl_loss": 0.25345152616500854, + "loss_ib": 0.006465899292379618, + "step": 2286 + }, + { + "ce_ib": 3.6320157051086426, + "ce_orig": 0.3348139226436615, + "epoch": 0.6574160615428859, + "kl_loss": 0.29587066173553467, + "loss_ib": 0.006590722594410181, + "step": 2286 + }, + { + "ce_ib": 5.399853706359863, + "ce_orig": 0.9447898864746094, + "epoch": 0.6574160615428859, + "kl_loss": 0.2238387167453766, + "loss_ib": 0.007638240698724985, + "step": 2286 + }, + { + "ce_ib": 2.786569833755493, + "ce_orig": 0.6506332755088806, + "epoch": 0.6577036451218635, + "kl_loss": 0.1588733196258545, + "loss_ib": 0.004375303164124489, + "step": 2287 + }, + { + "ce_ib": 7.408504009246826, + "ce_orig": 1.105047583580017, + "epoch": 0.6577036451218635, + "kl_loss": 0.26325517892837524, + "loss_ib": 0.010041055269539356, + "step": 2287 + }, + { + "ce_ib": 4.151974201202393, + "ce_orig": 0.9579995274543762, + "epoch": 0.6577036451218635, + "kl_loss": 0.2238772064447403, + "loss_ib": 0.006390746217221022, + "step": 2287 + }, + { + "ce_ib": 4.313475131988525, + "ce_orig": 0.5825878381729126, + "epoch": 0.6577036451218635, + "kl_loss": 0.2647661864757538, + "loss_ib": 0.006961137056350708, + "step": 2287 + }, + { + "ce_ib": 3.1852827072143555, + "ce_orig": 0.571605920791626, + "epoch": 0.6579912287008411, + "kl_loss": 0.20789766311645508, + "loss_ib": 0.005264259409159422, + "step": 2288 + }, + { + "ce_ib": 3.4241669178009033, + "ce_orig": 0.9267202019691467, + "epoch": 0.6579912287008411, + "kl_loss": 0.19404074549674988, + "loss_ib": 0.005364574026316404, + "step": 2288 + }, + { + "ce_ib": 4.205686092376709, + "ce_orig": 0.9795342683792114, + "epoch": 0.6579912287008411, + "kl_loss": 0.7399368286132812, + "loss_ib": 0.011605054140090942, + "step": 2288 + }, + { + "ce_ib": 5.134119033813477, + "ce_orig": 0.48095354437828064, + "epoch": 0.6579912287008411, + "kl_loss": 0.18898122012615204, + "loss_ib": 0.007023931480944157, + "step": 2288 + }, + { + "ce_ib": 3.576956272125244, + "ce_orig": 0.47784698009490967, + "epoch": 0.6582788122798188, + "kl_loss": 0.1758681982755661, + "loss_ib": 0.005335638299584389, + "step": 2289 + }, + { + "ce_ib": 7.3718414306640625, + "ce_orig": 0.746460497379303, + "epoch": 0.6582788122798188, + "kl_loss": 0.1681760549545288, + "loss_ib": 0.009053601883351803, + "step": 2289 + }, + { + "ce_ib": 3.690066337585449, + "ce_orig": 0.6259572505950928, + "epoch": 0.6582788122798188, + "kl_loss": 0.12946048378944397, + "loss_ib": 0.004984671249985695, + "step": 2289 + }, + { + "ce_ib": 3.972560167312622, + "ce_orig": 0.6264833211898804, + "epoch": 0.6582788122798188, + "kl_loss": 0.1985759735107422, + "loss_ib": 0.005958319641649723, + "step": 2289 + }, + { + "epoch": 0.6585663958587965, + "grad_norm": 0.13316069543361664, + "learning_rate": 9.09378082857638e-06, + "loss": 0.8642, + "step": 2290 + }, + { + "ce_ib": 3.9557366371154785, + "ce_orig": 0.749455988407135, + "epoch": 0.6585663958587965, + "kl_loss": 0.21419957280158997, + "loss_ib": 0.006097732577472925, + "step": 2290 + }, + { + "ce_ib": 3.821668863296509, + "ce_orig": 0.6771107316017151, + "epoch": 0.6585663958587965, + "kl_loss": 0.25902289152145386, + "loss_ib": 0.006411897949874401, + "step": 2290 + }, + { + "ce_ib": 4.96189546585083, + "ce_orig": 0.8286534547805786, + "epoch": 0.6585663958587965, + "kl_loss": 0.20024362206459045, + "loss_ib": 0.006964331492781639, + "step": 2290 + }, + { + "ce_ib": 4.72358512878418, + "ce_orig": 0.3945871591567993, + "epoch": 0.6585663958587965, + "kl_loss": 0.3368324637413025, + "loss_ib": 0.008091909810900688, + "step": 2290 + }, + { + "ce_ib": 6.335732460021973, + "ce_orig": 1.050377368927002, + "epoch": 0.6588539794377741, + "kl_loss": 0.24789276719093323, + "loss_ib": 0.008814659900963306, + "step": 2291 + }, + { + "ce_ib": 8.51484203338623, + "ce_orig": 1.5628204345703125, + "epoch": 0.6588539794377741, + "kl_loss": 0.23003104329109192, + "loss_ib": 0.010815152898430824, + "step": 2291 + }, + { + "ce_ib": 3.5196340084075928, + "ce_orig": 0.8101951479911804, + "epoch": 0.6588539794377741, + "kl_loss": 0.14394183456897736, + "loss_ib": 0.004959051962941885, + "step": 2291 + }, + { + "ce_ib": 3.6381170749664307, + "ce_orig": 0.444163978099823, + "epoch": 0.6588539794377741, + "kl_loss": 0.2257876843214035, + "loss_ib": 0.005895993672311306, + "step": 2291 + }, + { + "ce_ib": 3.119291067123413, + "ce_orig": 0.5189169049263, + "epoch": 0.6591415630167518, + "kl_loss": 0.14547976851463318, + "loss_ib": 0.004574088845402002, + "step": 2292 + }, + { + "ce_ib": 4.330430030822754, + "ce_orig": 0.9643470048904419, + "epoch": 0.6591415630167518, + "kl_loss": 0.22295892238616943, + "loss_ib": 0.006560019217431545, + "step": 2292 + }, + { + "ce_ib": 3.788888454437256, + "ce_orig": 0.7872934937477112, + "epoch": 0.6591415630167518, + "kl_loss": 0.21740767359733582, + "loss_ib": 0.005962965544313192, + "step": 2292 + }, + { + "ce_ib": 4.643380165100098, + "ce_orig": 0.846193253993988, + "epoch": 0.6591415630167518, + "kl_loss": 0.1977544128894806, + "loss_ib": 0.006620924454182386, + "step": 2292 + }, + { + "ce_ib": 6.4399871826171875, + "ce_orig": 1.5555925369262695, + "epoch": 0.6594291465957294, + "kl_loss": 0.17902851104736328, + "loss_ib": 0.008230272680521011, + "step": 2293 + }, + { + "ce_ib": 5.36967134475708, + "ce_orig": 0.893637478351593, + "epoch": 0.6594291465957294, + "kl_loss": 0.17702049016952515, + "loss_ib": 0.007139876484870911, + "step": 2293 + }, + { + "ce_ib": 5.005563735961914, + "ce_orig": 1.2092565298080444, + "epoch": 0.6594291465957294, + "kl_loss": 0.22880873084068298, + "loss_ib": 0.007293650880455971, + "step": 2293 + }, + { + "ce_ib": 3.6468358039855957, + "ce_orig": 0.7272307872772217, + "epoch": 0.6594291465957294, + "kl_loss": 0.18234962224960327, + "loss_ib": 0.005470331758260727, + "step": 2293 + }, + { + "ce_ib": 6.381992340087891, + "ce_orig": 1.1269545555114746, + "epoch": 0.659716730174707, + "kl_loss": 0.18945792317390442, + "loss_ib": 0.008276572450995445, + "step": 2294 + }, + { + "ce_ib": 3.2712364196777344, + "ce_orig": 0.6103898882865906, + "epoch": 0.659716730174707, + "kl_loss": 0.1779961884021759, + "loss_ib": 0.005051197949796915, + "step": 2294 + }, + { + "ce_ib": 3.581029176712036, + "ce_orig": 0.6919112205505371, + "epoch": 0.659716730174707, + "kl_loss": 0.18208445608615875, + "loss_ib": 0.005401873495429754, + "step": 2294 + }, + { + "ce_ib": 6.911735534667969, + "ce_orig": 1.2725592851638794, + "epoch": 0.659716730174707, + "kl_loss": 0.22877198457717896, + "loss_ib": 0.009199455380439758, + "step": 2294 + }, + { + "epoch": 0.6600043137536846, + "grad_norm": 0.13777166604995728, + "learning_rate": 9.089320072874899e-06, + "loss": 0.9365, + "step": 2295 + }, + { + "ce_ib": 2.108306646347046, + "ce_orig": 0.4912090599536896, + "epoch": 0.6600043137536846, + "kl_loss": 0.20527879893779755, + "loss_ib": 0.004161094781011343, + "step": 2295 + }, + { + "ce_ib": 6.547287940979004, + "ce_orig": 1.3033679723739624, + "epoch": 0.6600043137536846, + "kl_loss": 0.34901562333106995, + "loss_ib": 0.010037444531917572, + "step": 2295 + }, + { + "ce_ib": 1.6235681772232056, + "ce_orig": 0.42206233739852905, + "epoch": 0.6600043137536846, + "kl_loss": 0.17852813005447388, + "loss_ib": 0.003408849472180009, + "step": 2295 + }, + { + "ce_ib": 4.040412425994873, + "ce_orig": 1.0970664024353027, + "epoch": 0.6600043137536846, + "kl_loss": 0.2232007086277008, + "loss_ib": 0.006272419355809689, + "step": 2295 + }, + { + "ce_ib": 1.9265698194503784, + "ce_orig": 0.5202773809432983, + "epoch": 0.6602918973326624, + "kl_loss": 0.12810984253883362, + "loss_ib": 0.003207667963579297, + "step": 2296 + }, + { + "ce_ib": 2.938476324081421, + "ce_orig": 0.7402215600013733, + "epoch": 0.6602918973326624, + "kl_loss": 0.16756270825862885, + "loss_ib": 0.004614103119820356, + "step": 2296 + }, + { + "ce_ib": 3.5038981437683105, + "ce_orig": 0.872126579284668, + "epoch": 0.6602918973326624, + "kl_loss": 0.18988430500030518, + "loss_ib": 0.005402741488069296, + "step": 2296 + }, + { + "ce_ib": 3.6407828330993652, + "ce_orig": 0.6124637722969055, + "epoch": 0.6602918973326624, + "kl_loss": 0.1619088351726532, + "loss_ib": 0.005259871482849121, + "step": 2296 + }, + { + "ce_ib": 3.631239652633667, + "ce_orig": 0.7639176249504089, + "epoch": 0.66057948091164, + "kl_loss": 0.33420413732528687, + "loss_ib": 0.006973280571401119, + "step": 2297 + }, + { + "ce_ib": 3.8985724449157715, + "ce_orig": 0.673458456993103, + "epoch": 0.66057948091164, + "kl_loss": 0.29177185893058777, + "loss_ib": 0.0068162912502884865, + "step": 2297 + }, + { + "ce_ib": 6.479887962341309, + "ce_orig": 1.582534909248352, + "epoch": 0.66057948091164, + "kl_loss": 0.15527716279029846, + "loss_ib": 0.008032659068703651, + "step": 2297 + }, + { + "ce_ib": 5.800191879272461, + "ce_orig": 0.7706913948059082, + "epoch": 0.66057948091164, + "kl_loss": 0.23747895658016205, + "loss_ib": 0.008174980990588665, + "step": 2297 + }, + { + "ce_ib": 4.7168192863464355, + "ce_orig": 1.0300753116607666, + "epoch": 0.6608670644906176, + "kl_loss": 0.26457151770591736, + "loss_ib": 0.007362534292042255, + "step": 2298 + }, + { + "ce_ib": 5.740164756774902, + "ce_orig": 1.0717376470565796, + "epoch": 0.6608670644906176, + "kl_loss": 0.1727582812309265, + "loss_ib": 0.007467747665941715, + "step": 2298 + }, + { + "ce_ib": 6.1796793937683105, + "ce_orig": 1.228541612625122, + "epoch": 0.6608670644906176, + "kl_loss": 0.15097536146640778, + "loss_ib": 0.007689432706683874, + "step": 2298 + }, + { + "ce_ib": 5.045371055603027, + "ce_orig": 0.9332913160324097, + "epoch": 0.6608670644906176, + "kl_loss": 0.17121383547782898, + "loss_ib": 0.006757509429007769, + "step": 2298 + }, + { + "ce_ib": 5.582101345062256, + "ce_orig": 0.600243866443634, + "epoch": 0.6611546480695952, + "kl_loss": 0.2736514210700989, + "loss_ib": 0.008318615145981312, + "step": 2299 + }, + { + "ce_ib": 6.409243106842041, + "ce_orig": 1.0324525833129883, + "epoch": 0.6611546480695952, + "kl_loss": 0.2332119196653366, + "loss_ib": 0.008741362951695919, + "step": 2299 + }, + { + "ce_ib": 4.843522548675537, + "ce_orig": 0.7020336389541626, + "epoch": 0.6611546480695952, + "kl_loss": 0.24764475226402283, + "loss_ib": 0.007319970056414604, + "step": 2299 + }, + { + "ce_ib": 3.429962396621704, + "ce_orig": 0.6713109016418457, + "epoch": 0.6611546480695952, + "kl_loss": 0.1246408075094223, + "loss_ib": 0.004676370415836573, + "step": 2299 + }, + { + "epoch": 0.6614422316485729, + "grad_norm": 0.13128052651882172, + "learning_rate": 9.08484946505221e-06, + "loss": 0.8757, + "step": 2300 + }, + { + "ce_ib": 5.738707542419434, + "ce_orig": 0.5569428205490112, + "epoch": 0.6614422316485729, + "kl_loss": 0.3296275734901428, + "loss_ib": 0.00903498288244009, + "step": 2300 + }, + { + "ce_ib": 7.344489097595215, + "ce_orig": 1.1955050230026245, + "epoch": 0.6614422316485729, + "kl_loss": 0.3340761959552765, + "loss_ib": 0.010685250163078308, + "step": 2300 + }, + { + "ce_ib": 4.579660892486572, + "ce_orig": 0.7082767486572266, + "epoch": 0.6614422316485729, + "kl_loss": 0.27735596895217896, + "loss_ib": 0.007353220600634813, + "step": 2300 + }, + { + "ce_ib": 4.235044002532959, + "ce_orig": 0.6184917688369751, + "epoch": 0.6614422316485729, + "kl_loss": 0.257684588432312, + "loss_ib": 0.006811889819800854, + "step": 2300 + }, + { + "ce_ib": 5.410089492797852, + "ce_orig": 0.5388880968093872, + "epoch": 0.6617298152275505, + "kl_loss": 0.20469123125076294, + "loss_ib": 0.0074570016004145145, + "step": 2301 + }, + { + "ce_ib": 3.214844226837158, + "ce_orig": 0.5830159187316895, + "epoch": 0.6617298152275505, + "kl_loss": 0.182199627161026, + "loss_ib": 0.0050368402153253555, + "step": 2301 + }, + { + "ce_ib": 6.462206840515137, + "ce_orig": 1.2904666662216187, + "epoch": 0.6617298152275505, + "kl_loss": 0.21579495072364807, + "loss_ib": 0.008620155975222588, + "step": 2301 + }, + { + "ce_ib": 3.759885787963867, + "ce_orig": 0.8263224959373474, + "epoch": 0.6617298152275505, + "kl_loss": 0.17655593156814575, + "loss_ib": 0.005525444634258747, + "step": 2301 + }, + { + "ce_ib": 4.697689533233643, + "ce_orig": 0.3160771131515503, + "epoch": 0.6620173988065281, + "kl_loss": 0.2722274363040924, + "loss_ib": 0.007419963832944632, + "step": 2302 + }, + { + "ce_ib": 6.01535701751709, + "ce_orig": 0.9937010407447815, + "epoch": 0.6620173988065281, + "kl_loss": 0.17533355951309204, + "loss_ib": 0.007768692914396524, + "step": 2302 + }, + { + "ce_ib": 4.2047438621521, + "ce_orig": 0.9726738929748535, + "epoch": 0.6620173988065281, + "kl_loss": 0.1652931421995163, + "loss_ib": 0.0058576748706400394, + "step": 2302 + }, + { + "ce_ib": 4.442732810974121, + "ce_orig": 0.817629873752594, + "epoch": 0.6620173988065281, + "kl_loss": 0.2010645568370819, + "loss_ib": 0.0064533781260252, + "step": 2302 + }, + { + "ce_ib": 6.189380645751953, + "ce_orig": 1.260310411453247, + "epoch": 0.6623049823855058, + "kl_loss": 0.2650079131126404, + "loss_ib": 0.008839460089802742, + "step": 2303 + }, + { + "ce_ib": 4.9466352462768555, + "ce_orig": 1.031982183456421, + "epoch": 0.6623049823855058, + "kl_loss": 0.20939788222312927, + "loss_ib": 0.007040613796561956, + "step": 2303 + }, + { + "ce_ib": 3.97642183303833, + "ce_orig": 0.7425662875175476, + "epoch": 0.6623049823855058, + "kl_loss": 0.2740076780319214, + "loss_ib": 0.006716498639434576, + "step": 2303 + }, + { + "ce_ib": 2.6178650856018066, + "ce_orig": 0.41987624764442444, + "epoch": 0.6623049823855058, + "kl_loss": 0.19170521199703217, + "loss_ib": 0.004534917417913675, + "step": 2303 + }, + { + "ce_ib": 4.486406326293945, + "ce_orig": 1.0327218770980835, + "epoch": 0.6625925659644835, + "kl_loss": 0.1658685803413391, + "loss_ib": 0.0061450921930372715, + "step": 2304 + }, + { + "ce_ib": 2.6808152198791504, + "ce_orig": 0.3298308551311493, + "epoch": 0.6625925659644835, + "kl_loss": 0.2232923060655594, + "loss_ib": 0.004913738463073969, + "step": 2304 + }, + { + "ce_ib": 6.866913318634033, + "ce_orig": 1.4204944372177124, + "epoch": 0.6625925659644835, + "kl_loss": 0.1526600420475006, + "loss_ib": 0.008393513970077038, + "step": 2304 + }, + { + "ce_ib": 3.6056344509124756, + "ce_orig": 0.6933943629264832, + "epoch": 0.6625925659644835, + "kl_loss": 0.15653468668460846, + "loss_ib": 0.005170981399714947, + "step": 2304 + }, + { + "epoch": 0.6628801495434611, + "grad_norm": 0.12283355742692947, + "learning_rate": 9.080369015879048e-06, + "loss": 0.796, + "step": 2305 + }, + { + "ce_ib": 3.633728265762329, + "ce_orig": 0.7075216174125671, + "epoch": 0.6628801495434611, + "kl_loss": 0.19181497395038605, + "loss_ib": 0.005551877897232771, + "step": 2305 + }, + { + "ce_ib": 7.455410003662109, + "ce_orig": 1.5090198516845703, + "epoch": 0.6628801495434611, + "kl_loss": 0.211482435464859, + "loss_ib": 0.009570234455168247, + "step": 2305 + }, + { + "ce_ib": 6.673778533935547, + "ce_orig": 1.2941972017288208, + "epoch": 0.6628801495434611, + "kl_loss": 0.1832512617111206, + "loss_ib": 0.008506291545927525, + "step": 2305 + }, + { + "ce_ib": 4.814781188964844, + "ce_orig": 1.0985026359558105, + "epoch": 0.6628801495434611, + "kl_loss": 0.17557013034820557, + "loss_ib": 0.006570482160896063, + "step": 2305 + }, + { + "ce_ib": 4.938228130340576, + "ce_orig": 0.8788639903068542, + "epoch": 0.6631677331224387, + "kl_loss": 0.24381078779697418, + "loss_ib": 0.007376335561275482, + "step": 2306 + }, + { + "ce_ib": 3.4899322986602783, + "ce_orig": 0.7927453517913818, + "epoch": 0.6631677331224387, + "kl_loss": 0.13090947270393372, + "loss_ib": 0.004799026995897293, + "step": 2306 + }, + { + "ce_ib": 5.959718704223633, + "ce_orig": 1.150773525238037, + "epoch": 0.6631677331224387, + "kl_loss": 0.17227405309677124, + "loss_ib": 0.007682458963245153, + "step": 2306 + }, + { + "ce_ib": 4.546328067779541, + "ce_orig": 0.6749290227890015, + "epoch": 0.6631677331224387, + "kl_loss": 0.22192329168319702, + "loss_ib": 0.006765561178326607, + "step": 2306 + }, + { + "ce_ib": 4.505923748016357, + "ce_orig": 0.9216287136077881, + "epoch": 0.6634553167014163, + "kl_loss": 0.21496614813804626, + "loss_ib": 0.006655585020780563, + "step": 2307 + }, + { + "ce_ib": 5.319473743438721, + "ce_orig": 0.807655394077301, + "epoch": 0.6634553167014163, + "kl_loss": 0.2159365713596344, + "loss_ib": 0.007478838786482811, + "step": 2307 + }, + { + "ce_ib": 9.603060722351074, + "ce_orig": 1.8466027975082397, + "epoch": 0.6634553167014163, + "kl_loss": 0.23066823184490204, + "loss_ib": 0.011909742839634418, + "step": 2307 + }, + { + "ce_ib": 3.8825199604034424, + "ce_orig": 0.4007914662361145, + "epoch": 0.6634553167014163, + "kl_loss": 0.2320995330810547, + "loss_ib": 0.006203515455126762, + "step": 2307 + }, + { + "ce_ib": 4.163855075836182, + "ce_orig": 0.8419356942176819, + "epoch": 0.663742900280394, + "kl_loss": 0.13957376778125763, + "loss_ib": 0.005559592507779598, + "step": 2308 + }, + { + "ce_ib": 3.0431878566741943, + "ce_orig": 0.6635653376579285, + "epoch": 0.663742900280394, + "kl_loss": 0.1712033599615097, + "loss_ib": 0.0047552213072776794, + "step": 2308 + }, + { + "ce_ib": 4.620757579803467, + "ce_orig": 1.0425530672073364, + "epoch": 0.663742900280394, + "kl_loss": 0.23304906487464905, + "loss_ib": 0.006951248273253441, + "step": 2308 + }, + { + "ce_ib": 4.67766809463501, + "ce_orig": 0.8161407709121704, + "epoch": 0.663742900280394, + "kl_loss": 0.1810990422964096, + "loss_ib": 0.0064886584877967834, + "step": 2308 + }, + { + "ce_ib": 3.7442498207092285, + "ce_orig": 0.839737594127655, + "epoch": 0.6640304838593716, + "kl_loss": 0.2148469090461731, + "loss_ib": 0.005892718676477671, + "step": 2309 + }, + { + "ce_ib": 3.436880350112915, + "ce_orig": 0.5422624349594116, + "epoch": 0.6640304838593716, + "kl_loss": 0.18458907306194305, + "loss_ib": 0.0052827708423137665, + "step": 2309 + }, + { + "ce_ib": 2.7827517986297607, + "ce_orig": 0.44572675228118896, + "epoch": 0.6640304838593716, + "kl_loss": 0.1494661271572113, + "loss_ib": 0.004277413245290518, + "step": 2309 + }, + { + "ce_ib": 2.7595722675323486, + "ce_orig": 0.5761458873748779, + "epoch": 0.6640304838593716, + "kl_loss": 0.11558070033788681, + "loss_ib": 0.003915379289537668, + "step": 2309 + }, + { + "epoch": 0.6643180674383493, + "grad_norm": 0.15105421841144562, + "learning_rate": 9.075878736149852e-06, + "loss": 0.9148, + "step": 2310 + }, + { + "ce_ib": 4.356497287750244, + "ce_orig": 0.784406840801239, + "epoch": 0.6643180674383493, + "kl_loss": 0.18368977308273315, + "loss_ib": 0.006193394307047129, + "step": 2310 + }, + { + "ce_ib": 3.865837812423706, + "ce_orig": 0.6743321418762207, + "epoch": 0.6643180674383493, + "kl_loss": 0.2267402559518814, + "loss_ib": 0.006133240181952715, + "step": 2310 + }, + { + "ce_ib": 4.13247537612915, + "ce_orig": 0.9794188737869263, + "epoch": 0.6643180674383493, + "kl_loss": 0.21807053685188293, + "loss_ib": 0.006313180550932884, + "step": 2310 + }, + { + "ce_ib": 6.043604850769043, + "ce_orig": 1.1714067459106445, + "epoch": 0.6643180674383493, + "kl_loss": 0.19049227237701416, + "loss_ib": 0.007948528043925762, + "step": 2310 + }, + { + "ce_ib": 4.022154808044434, + "ce_orig": 0.9222388863563538, + "epoch": 0.6646056510173269, + "kl_loss": 0.16805587708950043, + "loss_ib": 0.005702713504433632, + "step": 2311 + }, + { + "ce_ib": 2.488668203353882, + "ce_orig": 0.6140643358230591, + "epoch": 0.6646056510173269, + "kl_loss": 0.15313443541526794, + "loss_ib": 0.004020012449473143, + "step": 2311 + }, + { + "ce_ib": 3.9163601398468018, + "ce_orig": 0.9380266666412354, + "epoch": 0.6646056510173269, + "kl_loss": 0.1668686866760254, + "loss_ib": 0.005585046950727701, + "step": 2311 + }, + { + "ce_ib": 4.102234363555908, + "ce_orig": 0.8519632816314697, + "epoch": 0.6646056510173269, + "kl_loss": 0.19923876225948334, + "loss_ib": 0.0060946219600737095, + "step": 2311 + }, + { + "ce_ib": 4.2779765129089355, + "ce_orig": 0.8459751605987549, + "epoch": 0.6648932345963046, + "kl_loss": 0.2088940441608429, + "loss_ib": 0.006366916932165623, + "step": 2312 + }, + { + "ce_ib": 6.0030083656311035, + "ce_orig": 1.1003422737121582, + "epoch": 0.6648932345963046, + "kl_loss": 0.31639087200164795, + "loss_ib": 0.009166916832327843, + "step": 2312 + }, + { + "ce_ib": 2.8780276775360107, + "ce_orig": 0.7629425525665283, + "epoch": 0.6648932345963046, + "kl_loss": 0.19082742929458618, + "loss_ib": 0.0047863018698990345, + "step": 2312 + }, + { + "ce_ib": 4.196934223175049, + "ce_orig": 0.700529932975769, + "epoch": 0.6648932345963046, + "kl_loss": 0.11504492163658142, + "loss_ib": 0.005347383674234152, + "step": 2312 + }, + { + "ce_ib": 3.8713040351867676, + "ce_orig": 0.7414731383323669, + "epoch": 0.6651808181752822, + "kl_loss": 0.1930261254310608, + "loss_ib": 0.005801565479487181, + "step": 2313 + }, + { + "ce_ib": 3.508068561553955, + "ce_orig": 0.7245529890060425, + "epoch": 0.6651808181752822, + "kl_loss": 0.15145504474639893, + "loss_ib": 0.00502261845394969, + "step": 2313 + }, + { + "ce_ib": 2.9022111892700195, + "ce_orig": 0.6527214646339417, + "epoch": 0.6651808181752822, + "kl_loss": 0.17903375625610352, + "loss_ib": 0.004692548885941505, + "step": 2313 + }, + { + "ce_ib": 4.778469562530518, + "ce_orig": 1.277742862701416, + "epoch": 0.6651808181752822, + "kl_loss": 0.20605185627937317, + "loss_ib": 0.006838988047093153, + "step": 2313 + }, + { + "ce_ib": 4.27022647857666, + "ce_orig": 0.5646873712539673, + "epoch": 0.6654684017542598, + "kl_loss": 0.30799970030784607, + "loss_ib": 0.007350223604589701, + "step": 2314 + }, + { + "ce_ib": 4.6585693359375, + "ce_orig": 0.5386217832565308, + "epoch": 0.6654684017542598, + "kl_loss": 0.2960846722126007, + "loss_ib": 0.0076194158755242825, + "step": 2314 + }, + { + "ce_ib": 3.0009429454803467, + "ce_orig": 0.46797236800193787, + "epoch": 0.6654684017542598, + "kl_loss": 0.2924917936325073, + "loss_ib": 0.005925860721617937, + "step": 2314 + }, + { + "ce_ib": 6.079381465911865, + "ce_orig": 1.0401948690414429, + "epoch": 0.6654684017542598, + "kl_loss": 0.30849015712738037, + "loss_ib": 0.00916428305208683, + "step": 2314 + }, + { + "epoch": 0.6657559853332374, + "grad_norm": 0.10521713644266129, + "learning_rate": 9.071378636682748e-06, + "loss": 0.8104, + "step": 2315 + }, + { + "ce_ib": 4.690069675445557, + "ce_orig": 0.5854066014289856, + "epoch": 0.6657559853332374, + "kl_loss": 0.26681387424468994, + "loss_ib": 0.007358208764344454, + "step": 2315 + }, + { + "ce_ib": 3.353416919708252, + "ce_orig": 0.5649052858352661, + "epoch": 0.6657559853332374, + "kl_loss": 0.18701042234897614, + "loss_ib": 0.005223521031439304, + "step": 2315 + }, + { + "ce_ib": 5.193617343902588, + "ce_orig": 0.7037189602851868, + "epoch": 0.6657559853332374, + "kl_loss": 0.20212486386299133, + "loss_ib": 0.007214865647256374, + "step": 2315 + }, + { + "ce_ib": 8.1829833984375, + "ce_orig": 1.7209243774414062, + "epoch": 0.6657559853332374, + "kl_loss": 0.5039078593254089, + "loss_ib": 0.013222062028944492, + "step": 2315 + }, + { + "ce_ib": 2.5782253742218018, + "ce_orig": 0.6221116781234741, + "epoch": 0.6660435689122152, + "kl_loss": 0.19260889291763306, + "loss_ib": 0.004504314158111811, + "step": 2316 + }, + { + "ce_ib": 7.151542663574219, + "ce_orig": 1.0111775398254395, + "epoch": 0.6660435689122152, + "kl_loss": 0.2965904474258423, + "loss_ib": 0.01011744700372219, + "step": 2316 + }, + { + "ce_ib": 2.883510112762451, + "ce_orig": 0.5365747809410095, + "epoch": 0.6660435689122152, + "kl_loss": 0.2640886902809143, + "loss_ib": 0.005524396896362305, + "step": 2316 + }, + { + "ce_ib": 3.349785566329956, + "ce_orig": 0.7265335321426392, + "epoch": 0.6660435689122152, + "kl_loss": 0.41355395317077637, + "loss_ib": 0.0074853249825537205, + "step": 2316 + }, + { + "ce_ib": 2.5003128051757812, + "ce_orig": 0.40089958906173706, + "epoch": 0.6663311524911928, + "kl_loss": 0.11820243299007416, + "loss_ib": 0.003682337235659361, + "step": 2317 + }, + { + "ce_ib": 3.681676149368286, + "ce_orig": 0.8597127795219421, + "epoch": 0.6663311524911928, + "kl_loss": 0.260034441947937, + "loss_ib": 0.006282020825892687, + "step": 2317 + }, + { + "ce_ib": 8.207046508789062, + "ce_orig": 1.6418060064315796, + "epoch": 0.6663311524911928, + "kl_loss": 0.20495416224002838, + "loss_ib": 0.010256588459014893, + "step": 2317 + }, + { + "ce_ib": 4.131764888763428, + "ce_orig": 0.67762690782547, + "epoch": 0.6663311524911928, + "kl_loss": 0.21367797255516052, + "loss_ib": 0.006268544588238001, + "step": 2317 + }, + { + "ce_ib": 4.952819347381592, + "ce_orig": 0.9552193284034729, + "epoch": 0.6666187360701704, + "kl_loss": 0.30492764711380005, + "loss_ib": 0.008002095855772495, + "step": 2318 + }, + { + "ce_ib": 5.584290504455566, + "ce_orig": 0.5857093930244446, + "epoch": 0.6666187360701704, + "kl_loss": 0.16136643290519714, + "loss_ib": 0.0071979546919465065, + "step": 2318 + }, + { + "ce_ib": 4.395860195159912, + "ce_orig": 0.7115727066993713, + "epoch": 0.6666187360701704, + "kl_loss": 0.1814490407705307, + "loss_ib": 0.006210350431501865, + "step": 2318 + }, + { + "ce_ib": 4.416397571563721, + "ce_orig": 0.9567720293998718, + "epoch": 0.6666187360701704, + "kl_loss": 0.28053173422813416, + "loss_ib": 0.007221714593470097, + "step": 2318 + }, + { + "ce_ib": 4.807407855987549, + "ce_orig": 0.7930325865745544, + "epoch": 0.666906319649148, + "kl_loss": 0.19729767739772797, + "loss_ib": 0.006780384574085474, + "step": 2319 + }, + { + "ce_ib": 3.7227392196655273, + "ce_orig": 0.4157956838607788, + "epoch": 0.666906319649148, + "kl_loss": 0.393335223197937, + "loss_ib": 0.00765609135851264, + "step": 2319 + }, + { + "ce_ib": 5.779292106628418, + "ce_orig": 1.0905405282974243, + "epoch": 0.666906319649148, + "kl_loss": 0.19318749010562897, + "loss_ib": 0.007711166515946388, + "step": 2319 + }, + { + "ce_ib": 6.3399834632873535, + "ce_orig": 1.3981906175613403, + "epoch": 0.666906319649148, + "kl_loss": 0.2791050970554352, + "loss_ib": 0.009131034836173058, + "step": 2319 + }, + { + "epoch": 0.6671939032281257, + "grad_norm": 0.1096319928765297, + "learning_rate": 9.066868728319522e-06, + "loss": 0.8247, + "step": 2320 + }, + { + "ce_ib": 6.138654708862305, + "ce_orig": 1.3560196161270142, + "epoch": 0.6671939032281257, + "kl_loss": 0.1886732578277588, + "loss_ib": 0.008025387302041054, + "step": 2320 + }, + { + "ce_ib": 5.537533283233643, + "ce_orig": 1.0234616994857788, + "epoch": 0.6671939032281257, + "kl_loss": 0.19791053235530853, + "loss_ib": 0.007516638375818729, + "step": 2320 + }, + { + "ce_ib": 5.231417179107666, + "ce_orig": 1.1735353469848633, + "epoch": 0.6671939032281257, + "kl_loss": 0.2248787134885788, + "loss_ib": 0.007480204571038485, + "step": 2320 + }, + { + "ce_ib": 4.522344589233398, + "ce_orig": 1.1308051347732544, + "epoch": 0.6671939032281257, + "kl_loss": 0.25762468576431274, + "loss_ib": 0.0070985909551382065, + "step": 2320 + }, + { + "ce_ib": 7.586910247802734, + "ce_orig": 1.427869200706482, + "epoch": 0.6674814868071033, + "kl_loss": 0.26728934049606323, + "loss_ib": 0.010259803384542465, + "step": 2321 + }, + { + "ce_ib": 5.584657192230225, + "ce_orig": 0.5508022904396057, + "epoch": 0.6674814868071033, + "kl_loss": 0.274594247341156, + "loss_ib": 0.008330599404871464, + "step": 2321 + }, + { + "ce_ib": 4.292329788208008, + "ce_orig": 0.8082171082496643, + "epoch": 0.6674814868071033, + "kl_loss": 0.19844694435596466, + "loss_ib": 0.006276799365878105, + "step": 2321 + }, + { + "ce_ib": 3.143080234527588, + "ce_orig": 0.5746122002601624, + "epoch": 0.6674814868071033, + "kl_loss": 0.14143384993076324, + "loss_ib": 0.004557418636977673, + "step": 2321 + }, + { + "ce_ib": 4.037876129150391, + "ce_orig": 0.7527395486831665, + "epoch": 0.6677690703860809, + "kl_loss": 0.16537578403949738, + "loss_ib": 0.0056916335597634315, + "step": 2322 + }, + { + "ce_ib": 1.9071094989776611, + "ce_orig": 0.46841585636138916, + "epoch": 0.6677690703860809, + "kl_loss": 0.15046262741088867, + "loss_ib": 0.003411735873669386, + "step": 2322 + }, + { + "ce_ib": 3.901660919189453, + "ce_orig": 1.033377766609192, + "epoch": 0.6677690703860809, + "kl_loss": 0.22247587144374847, + "loss_ib": 0.0061264196410775185, + "step": 2322 + }, + { + "ce_ib": 7.487425327301025, + "ce_orig": 1.643418312072754, + "epoch": 0.6677690703860809, + "kl_loss": 0.24965816736221313, + "loss_ib": 0.009984007105231285, + "step": 2322 + }, + { + "ce_ib": 5.4058403968811035, + "ce_orig": 1.282691478729248, + "epoch": 0.6680566539650586, + "kl_loss": 0.24787637591362, + "loss_ib": 0.007884603925049305, + "step": 2323 + }, + { + "ce_ib": 3.9149701595306396, + "ce_orig": 0.9995007514953613, + "epoch": 0.6680566539650586, + "kl_loss": 0.19947105646133423, + "loss_ib": 0.005909680388867855, + "step": 2323 + }, + { + "ce_ib": 2.9360954761505127, + "ce_orig": 0.6395455002784729, + "epoch": 0.6680566539650586, + "kl_loss": 0.12786418199539185, + "loss_ib": 0.004214737098664045, + "step": 2323 + }, + { + "ce_ib": 7.721859455108643, + "ce_orig": 1.5973061323165894, + "epoch": 0.6680566539650586, + "kl_loss": 0.1990022212266922, + "loss_ib": 0.00971188209950924, + "step": 2323 + }, + { + "ce_ib": 4.176976203918457, + "ce_orig": 0.7965225577354431, + "epoch": 0.6683442375440363, + "kl_loss": 0.22723785042762756, + "loss_ib": 0.006449354812502861, + "step": 2324 + }, + { + "ce_ib": 0.6024351119995117, + "ce_orig": 0.05473356321454048, + "epoch": 0.6683442375440363, + "kl_loss": 0.43181926012039185, + "loss_ib": 0.0049206274561584, + "step": 2324 + }, + { + "ce_ib": 5.572903156280518, + "ce_orig": 0.883960485458374, + "epoch": 0.6683442375440363, + "kl_loss": 0.1704525351524353, + "loss_ib": 0.007277428172528744, + "step": 2324 + }, + { + "ce_ib": 4.530956745147705, + "ce_orig": 1.0239940881729126, + "epoch": 0.6683442375440363, + "kl_loss": 0.2481943517923355, + "loss_ib": 0.007012899965047836, + "step": 2324 + }, + { + "epoch": 0.6686318211230139, + "grad_norm": 0.12744073569774628, + "learning_rate": 9.062349021925589e-06, + "loss": 0.8768, + "step": 2325 + }, + { + "ce_ib": 4.617441654205322, + "ce_orig": 0.5573573112487793, + "epoch": 0.6686318211230139, + "kl_loss": 0.322216659784317, + "loss_ib": 0.007839608006179333, + "step": 2325 + }, + { + "ce_ib": 3.3940160274505615, + "ce_orig": 0.31825533509254456, + "epoch": 0.6686318211230139, + "kl_loss": 0.49466294050216675, + "loss_ib": 0.008340645581483841, + "step": 2325 + }, + { + "ce_ib": 3.136697292327881, + "ce_orig": 0.5470073819160461, + "epoch": 0.6686318211230139, + "kl_loss": 0.16384011507034302, + "loss_ib": 0.004775098524987698, + "step": 2325 + }, + { + "ce_ib": 5.717016696929932, + "ce_orig": 0.7741180062294006, + "epoch": 0.6686318211230139, + "kl_loss": 0.2645062506198883, + "loss_ib": 0.008362079970538616, + "step": 2325 + }, + { + "ce_ib": 4.217399597167969, + "ce_orig": 0.7617364525794983, + "epoch": 0.6689194047019915, + "kl_loss": 0.3065522313117981, + "loss_ib": 0.0072829220443964005, + "step": 2326 + }, + { + "ce_ib": 5.12044095993042, + "ce_orig": 0.9735357761383057, + "epoch": 0.6689194047019915, + "kl_loss": 0.1624661237001419, + "loss_ib": 0.006745102349668741, + "step": 2326 + }, + { + "ce_ib": 1.9749603271484375, + "ce_orig": 0.6473518013954163, + "epoch": 0.6689194047019915, + "kl_loss": 0.1717369258403778, + "loss_ib": 0.0036923293955624104, + "step": 2326 + }, + { + "ce_ib": 6.083463668823242, + "ce_orig": 1.1711344718933105, + "epoch": 0.6689194047019915, + "kl_loss": 0.16198769211769104, + "loss_ib": 0.007703341078013182, + "step": 2326 + }, + { + "ce_ib": 5.925444602966309, + "ce_orig": 0.8338901400566101, + "epoch": 0.6692069882809691, + "kl_loss": 0.19112402200698853, + "loss_ib": 0.007836684584617615, + "step": 2327 + }, + { + "ce_ib": 7.35871696472168, + "ce_orig": 1.5004538297653198, + "epoch": 0.6692069882809691, + "kl_loss": 0.46498560905456543, + "loss_ib": 0.012008573859930038, + "step": 2327 + }, + { + "ce_ib": 6.406662464141846, + "ce_orig": 1.0568636655807495, + "epoch": 0.6692069882809691, + "kl_loss": 0.16739046573638916, + "loss_ib": 0.008080567233264446, + "step": 2327 + }, + { + "ce_ib": 4.3972487449646, + "ce_orig": 0.9488065242767334, + "epoch": 0.6692069882809691, + "kl_loss": 0.3993264436721802, + "loss_ib": 0.008390513248741627, + "step": 2327 + }, + { + "ce_ib": 3.288093328475952, + "ce_orig": 0.6891045570373535, + "epoch": 0.6694945718599468, + "kl_loss": 0.2747056484222412, + "loss_ib": 0.006035150028765202, + "step": 2328 + }, + { + "ce_ib": 6.637941360473633, + "ce_orig": 1.2731645107269287, + "epoch": 0.6694945718599468, + "kl_loss": 0.19071942567825317, + "loss_ib": 0.008545136079192162, + "step": 2328 + }, + { + "ce_ib": 4.187133312225342, + "ce_orig": 1.068514347076416, + "epoch": 0.6694945718599468, + "kl_loss": 0.2083878219127655, + "loss_ib": 0.006271011661738157, + "step": 2328 + }, + { + "ce_ib": 4.477646827697754, + "ce_orig": 0.7131198644638062, + "epoch": 0.6694945718599468, + "kl_loss": 0.21260179579257965, + "loss_ib": 0.006603664718568325, + "step": 2328 + }, + { + "ce_ib": 5.011939525604248, + "ce_orig": 0.7236289978027344, + "epoch": 0.6697821554389244, + "kl_loss": 0.18620991706848145, + "loss_ib": 0.0068740383721888065, + "step": 2329 + }, + { + "ce_ib": 6.844339847564697, + "ce_orig": 1.4143403768539429, + "epoch": 0.6697821554389244, + "kl_loss": 0.22798632085323334, + "loss_ib": 0.009124203585088253, + "step": 2329 + }, + { + "ce_ib": 2.691387414932251, + "ce_orig": 0.7347269654273987, + "epoch": 0.6697821554389244, + "kl_loss": 0.15707460045814514, + "loss_ib": 0.004262133501470089, + "step": 2329 + }, + { + "ce_ib": 3.458364725112915, + "ce_orig": 1.0929536819458008, + "epoch": 0.6697821554389244, + "kl_loss": 0.12029542028903961, + "loss_ib": 0.004661319311708212, + "step": 2329 + }, + { + "epoch": 0.6700697390179021, + "grad_norm": 0.1449551284313202, + "learning_rate": 9.057819528389971e-06, + "loss": 0.8559, + "step": 2330 + }, + { + "ce_ib": 6.357821941375732, + "ce_orig": 0.6374034881591797, + "epoch": 0.6700697390179021, + "kl_loss": 0.1867590695619583, + "loss_ib": 0.008225412108004093, + "step": 2330 + }, + { + "ce_ib": 5.690848350524902, + "ce_orig": 0.7554671168327332, + "epoch": 0.6700697390179021, + "kl_loss": 0.16120535135269165, + "loss_ib": 0.007302901707589626, + "step": 2330 + }, + { + "ce_ib": 5.963001251220703, + "ce_orig": 1.0315766334533691, + "epoch": 0.6700697390179021, + "kl_loss": 0.2611057162284851, + "loss_ib": 0.008574058301746845, + "step": 2330 + }, + { + "ce_ib": 4.486814498901367, + "ce_orig": 1.2127509117126465, + "epoch": 0.6700697390179021, + "kl_loss": 0.185156911611557, + "loss_ib": 0.006338383536785841, + "step": 2330 + }, + { + "ce_ib": 4.906322479248047, + "ce_orig": 0.7693057060241699, + "epoch": 0.6703573225968797, + "kl_loss": 0.20947617292404175, + "loss_ib": 0.007001083809882402, + "step": 2331 + }, + { + "ce_ib": 3.0545711517333984, + "ce_orig": 0.8038958311080933, + "epoch": 0.6703573225968797, + "kl_loss": 0.16863229870796204, + "loss_ib": 0.004740893840789795, + "step": 2331 + }, + { + "ce_ib": 3.368959903717041, + "ce_orig": 0.4329507350921631, + "epoch": 0.6703573225968797, + "kl_loss": 0.1823541224002838, + "loss_ib": 0.005192501470446587, + "step": 2331 + }, + { + "ce_ib": 3.220052719116211, + "ce_orig": 0.7664608955383301, + "epoch": 0.6703573225968797, + "kl_loss": 0.17317095398902893, + "loss_ib": 0.004951762035489082, + "step": 2331 + }, + { + "ce_ib": 3.6824681758880615, + "ce_orig": 0.873006284236908, + "epoch": 0.6706449061758574, + "kl_loss": 0.21751652657985687, + "loss_ib": 0.005857633426785469, + "step": 2332 + }, + { + "ce_ib": 5.798840045928955, + "ce_orig": 1.1246321201324463, + "epoch": 0.6706449061758574, + "kl_loss": 0.18159759044647217, + "loss_ib": 0.007614815607666969, + "step": 2332 + }, + { + "ce_ib": 3.549923896789551, + "ce_orig": 0.6875470280647278, + "epoch": 0.6706449061758574, + "kl_loss": 0.18668906390666962, + "loss_ib": 0.00541681470349431, + "step": 2332 + }, + { + "ce_ib": 6.316079616546631, + "ce_orig": 1.312699794769287, + "epoch": 0.6706449061758574, + "kl_loss": 0.21871861815452576, + "loss_ib": 0.008503264747560024, + "step": 2332 + }, + { + "ce_ib": 2.512765884399414, + "ce_orig": 0.5692850351333618, + "epoch": 0.670932489754835, + "kl_loss": 0.12300410866737366, + "loss_ib": 0.0037428068462759256, + "step": 2333 + }, + { + "ce_ib": 6.292421817779541, + "ce_orig": 1.3564940690994263, + "epoch": 0.670932489754835, + "kl_loss": 0.17199182510375977, + "loss_ib": 0.008012339472770691, + "step": 2333 + }, + { + "ce_ib": 3.5772225856781006, + "ce_orig": 0.762313723564148, + "epoch": 0.670932489754835, + "kl_loss": 0.19644099473953247, + "loss_ib": 0.005541631951928139, + "step": 2333 + }, + { + "ce_ib": 7.53089714050293, + "ce_orig": 1.4466739892959595, + "epoch": 0.670932489754835, + "kl_loss": 0.18346333503723145, + "loss_ib": 0.00936553068459034, + "step": 2333 + }, + { + "ce_ib": 3.3528292179107666, + "ce_orig": 0.7205506563186646, + "epoch": 0.6712200733338126, + "kl_loss": 0.17134252190589905, + "loss_ib": 0.005066254176199436, + "step": 2334 + }, + { + "ce_ib": 5.103963375091553, + "ce_orig": 1.3335671424865723, + "epoch": 0.6712200733338126, + "kl_loss": 0.1452326476573944, + "loss_ib": 0.006556290201842785, + "step": 2334 + }, + { + "ce_ib": 1.8792632818222046, + "ce_orig": 0.5216496586799622, + "epoch": 0.6712200733338126, + "kl_loss": 0.19384339451789856, + "loss_ib": 0.0038176970556378365, + "step": 2334 + }, + { + "ce_ib": 3.1075665950775146, + "ce_orig": 0.6417901515960693, + "epoch": 0.6712200733338126, + "kl_loss": 0.19948217272758484, + "loss_ib": 0.005102388095110655, + "step": 2334 + }, + { + "epoch": 0.6715076569127902, + "grad_norm": 0.14064951241016388, + "learning_rate": 9.053280258625268e-06, + "loss": 0.8385, + "step": 2335 + }, + { + "ce_ib": 2.9291601181030273, + "ce_orig": 0.5136308670043945, + "epoch": 0.6715076569127902, + "kl_loss": 0.1660720258951187, + "loss_ib": 0.00458988081663847, + "step": 2335 + }, + { + "ce_ib": 2.492825746536255, + "ce_orig": 0.4083261787891388, + "epoch": 0.6715076569127902, + "kl_loss": 0.15092833340168, + "loss_ib": 0.004002109169960022, + "step": 2335 + }, + { + "ce_ib": 2.782907724380493, + "ce_orig": 0.5632783770561218, + "epoch": 0.6715076569127902, + "kl_loss": 0.13648569583892822, + "loss_ib": 0.004147764761000872, + "step": 2335 + }, + { + "ce_ib": 5.769538879394531, + "ce_orig": 0.582737386226654, + "epoch": 0.6715076569127902, + "kl_loss": 0.27770137786865234, + "loss_ib": 0.008546552620828152, + "step": 2335 + }, + { + "ce_ib": 5.156252861022949, + "ce_orig": 1.1190091371536255, + "epoch": 0.671795240491768, + "kl_loss": 0.20150886476039886, + "loss_ib": 0.007171341683715582, + "step": 2336 + }, + { + "ce_ib": 5.814518928527832, + "ce_orig": 0.8593125939369202, + "epoch": 0.671795240491768, + "kl_loss": 0.20143669843673706, + "loss_ib": 0.007828885689377785, + "step": 2336 + }, + { + "ce_ib": 6.94173002243042, + "ce_orig": 1.235355257987976, + "epoch": 0.671795240491768, + "kl_loss": 0.28982973098754883, + "loss_ib": 0.009840027429163456, + "step": 2336 + }, + { + "ce_ib": 6.70390510559082, + "ce_orig": 0.6194453835487366, + "epoch": 0.671795240491768, + "kl_loss": 0.15700793266296387, + "loss_ib": 0.008273984305560589, + "step": 2336 + }, + { + "ce_ib": 2.346567153930664, + "ce_orig": 0.5361838340759277, + "epoch": 0.6720828240707456, + "kl_loss": 0.3237065076828003, + "loss_ib": 0.005583631806075573, + "step": 2337 + }, + { + "ce_ib": 4.051931381225586, + "ce_orig": 0.7538625001907349, + "epoch": 0.6720828240707456, + "kl_loss": 0.2385300099849701, + "loss_ib": 0.006437231320887804, + "step": 2337 + }, + { + "ce_ib": 2.3810617923736572, + "ce_orig": 0.6849679350852966, + "epoch": 0.6720828240707456, + "kl_loss": 0.21861755847930908, + "loss_ib": 0.004567237570881844, + "step": 2337 + }, + { + "ce_ib": 5.001443386077881, + "ce_orig": 0.8313379287719727, + "epoch": 0.6720828240707456, + "kl_loss": 0.18274323642253876, + "loss_ib": 0.0068288762122392654, + "step": 2337 + }, + { + "ce_ib": 6.006716251373291, + "ce_orig": 1.1954948902130127, + "epoch": 0.6723704076497232, + "kl_loss": 0.14286038279533386, + "loss_ib": 0.007435320410877466, + "step": 2338 + }, + { + "ce_ib": 1.8805468082427979, + "ce_orig": 0.32521066069602966, + "epoch": 0.6723704076497232, + "kl_loss": 0.4029024541378021, + "loss_ib": 0.005909571424126625, + "step": 2338 + }, + { + "ce_ib": 5.370026111602783, + "ce_orig": 0.9780164361000061, + "epoch": 0.6723704076497232, + "kl_loss": 0.1850007325410843, + "loss_ib": 0.007220033090561628, + "step": 2338 + }, + { + "ce_ib": 5.9619221687316895, + "ce_orig": 0.8305768370628357, + "epoch": 0.6723704076497232, + "kl_loss": 0.3197008967399597, + "loss_ib": 0.009158930741250515, + "step": 2338 + }, + { + "ce_ib": 4.689261436462402, + "ce_orig": 0.8220483660697937, + "epoch": 0.6726579912287008, + "kl_loss": 0.21096330881118774, + "loss_ib": 0.006798894144594669, + "step": 2339 + }, + { + "ce_ib": 2.320617914199829, + "ce_orig": 0.5016850233078003, + "epoch": 0.6726579912287008, + "kl_loss": 0.2059776335954666, + "loss_ib": 0.004380394238978624, + "step": 2339 + }, + { + "ce_ib": 5.369472026824951, + "ce_orig": 1.2433044910430908, + "epoch": 0.6726579912287008, + "kl_loss": 0.24739579856395721, + "loss_ib": 0.007843430154025555, + "step": 2339 + }, + { + "ce_ib": 3.7815656661987305, + "ce_orig": 0.8344639539718628, + "epoch": 0.6726579912287008, + "kl_loss": 0.17600911855697632, + "loss_ib": 0.005541657097637653, + "step": 2339 + }, + { + "epoch": 0.6729455748076785, + "grad_norm": 0.16967147588729858, + "learning_rate": 9.048731223567636e-06, + "loss": 0.8483, + "step": 2340 + }, + { + "ce_ib": 5.963703632354736, + "ce_orig": 1.3152751922607422, + "epoch": 0.6729455748076785, + "kl_loss": 0.2186407446861267, + "loss_ib": 0.008150110952556133, + "step": 2340 + }, + { + "ce_ib": 4.918736934661865, + "ce_orig": 0.8563671112060547, + "epoch": 0.6729455748076785, + "kl_loss": 0.2406436800956726, + "loss_ib": 0.007325173355638981, + "step": 2340 + }, + { + "ce_ib": 2.569237470626831, + "ce_orig": 0.5691882967948914, + "epoch": 0.6729455748076785, + "kl_loss": 0.18694287538528442, + "loss_ib": 0.00443866616114974, + "step": 2340 + }, + { + "ce_ib": 3.6589016914367676, + "ce_orig": 0.8152604103088379, + "epoch": 0.6729455748076785, + "kl_loss": 0.16828224062919617, + "loss_ib": 0.005341724026948214, + "step": 2340 + }, + { + "ce_ib": 4.1208014488220215, + "ce_orig": 1.0974019765853882, + "epoch": 0.6732331583866561, + "kl_loss": 0.2799421548843384, + "loss_ib": 0.006920223124325275, + "step": 2341 + }, + { + "ce_ib": 3.796556234359741, + "ce_orig": 0.6352778673171997, + "epoch": 0.6732331583866561, + "kl_loss": 0.18381713330745697, + "loss_ib": 0.005634727422147989, + "step": 2341 + }, + { + "ce_ib": 5.7456207275390625, + "ce_orig": 1.0789732933044434, + "epoch": 0.6732331583866561, + "kl_loss": 0.38630348443984985, + "loss_ib": 0.009608655236661434, + "step": 2341 + }, + { + "ce_ib": 4.114236831665039, + "ce_orig": 0.9554736614227295, + "epoch": 0.6732331583866561, + "kl_loss": 0.282163143157959, + "loss_ib": 0.0069358679465949535, + "step": 2341 + }, + { + "ce_ib": 3.419074773788452, + "ce_orig": 0.5071178674697876, + "epoch": 0.6735207419656337, + "kl_loss": 0.15728682279586792, + "loss_ib": 0.0049919430166482925, + "step": 2342 + }, + { + "ce_ib": 8.935401916503906, + "ce_orig": 1.4129444360733032, + "epoch": 0.6735207419656337, + "kl_loss": 0.34903863072395325, + "loss_ib": 0.012425788678228855, + "step": 2342 + }, + { + "ce_ib": 4.430383682250977, + "ce_orig": 0.8488162755966187, + "epoch": 0.6735207419656337, + "kl_loss": 0.2273930311203003, + "loss_ib": 0.006704313680529594, + "step": 2342 + }, + { + "ce_ib": 5.013377666473389, + "ce_orig": 0.5987446308135986, + "epoch": 0.6735207419656337, + "kl_loss": 0.19916749000549316, + "loss_ib": 0.007005052641034126, + "step": 2342 + }, + { + "ce_ib": 2.796550750732422, + "ce_orig": 0.6809712648391724, + "epoch": 0.6738083255446115, + "kl_loss": 0.14544333517551422, + "loss_ib": 0.00425098417326808, + "step": 2343 + }, + { + "ce_ib": 4.187790870666504, + "ce_orig": 0.7618436813354492, + "epoch": 0.6738083255446115, + "kl_loss": 0.20742039382457733, + "loss_ib": 0.00626199459657073, + "step": 2343 + }, + { + "ce_ib": 5.015692234039307, + "ce_orig": 0.3720974326133728, + "epoch": 0.6738083255446115, + "kl_loss": 0.22999659180641174, + "loss_ib": 0.0073156580328941345, + "step": 2343 + }, + { + "ce_ib": 4.114816188812256, + "ce_orig": 0.46043452620506287, + "epoch": 0.6738083255446115, + "kl_loss": 0.36091458797454834, + "loss_ib": 0.00772396195679903, + "step": 2343 + }, + { + "ce_ib": 5.10518741607666, + "ce_orig": 0.8090283274650574, + "epoch": 0.6740959091235891, + "kl_loss": 0.22144706547260284, + "loss_ib": 0.007319657597690821, + "step": 2344 + }, + { + "ce_ib": 3.5865478515625, + "ce_orig": 0.6963501572608948, + "epoch": 0.6740959091235891, + "kl_loss": 0.20066100358963013, + "loss_ib": 0.005593157839030027, + "step": 2344 + }, + { + "ce_ib": 6.458381175994873, + "ce_orig": 1.4207088947296143, + "epoch": 0.6740959091235891, + "kl_loss": 0.2123204469680786, + "loss_ib": 0.008581585250794888, + "step": 2344 + }, + { + "ce_ib": 3.5102202892303467, + "ce_orig": 0.953303337097168, + "epoch": 0.6740959091235891, + "kl_loss": 0.2366867959499359, + "loss_ib": 0.005877088289707899, + "step": 2344 + }, + { + "epoch": 0.6743834927025667, + "grad_norm": 0.1284521222114563, + "learning_rate": 9.044172434176757e-06, + "loss": 0.8805, + "step": 2345 + }, + { + "ce_ib": 5.521263122558594, + "ce_orig": 0.8246755003929138, + "epoch": 0.6743834927025667, + "kl_loss": 0.4463164806365967, + "loss_ib": 0.009984428063035011, + "step": 2345 + }, + { + "ce_ib": 3.74079966545105, + "ce_orig": 0.7136639952659607, + "epoch": 0.6743834927025667, + "kl_loss": 0.2656722664833069, + "loss_ib": 0.006397522520273924, + "step": 2345 + }, + { + "ce_ib": 8.860637664794922, + "ce_orig": 1.9142242670059204, + "epoch": 0.6743834927025667, + "kl_loss": 0.28182417154312134, + "loss_ib": 0.011678879149258137, + "step": 2345 + }, + { + "ce_ib": 3.7075371742248535, + "ce_orig": 0.860762894153595, + "epoch": 0.6743834927025667, + "kl_loss": 0.14719226956367493, + "loss_ib": 0.005179460160434246, + "step": 2345 + }, + { + "ce_ib": 3.6549248695373535, + "ce_orig": 0.9365842342376709, + "epoch": 0.6746710762815443, + "kl_loss": 0.14415206015110016, + "loss_ib": 0.005096445791423321, + "step": 2346 + }, + { + "ce_ib": 4.782288074493408, + "ce_orig": 0.748873770236969, + "epoch": 0.6746710762815443, + "kl_loss": 0.29222923517227173, + "loss_ib": 0.007704580202698708, + "step": 2346 + }, + { + "ce_ib": 7.602805137634277, + "ce_orig": 1.4727648496627808, + "epoch": 0.6746710762815443, + "kl_loss": 0.22837351262569427, + "loss_ib": 0.009886540472507477, + "step": 2346 + }, + { + "ce_ib": 5.569965362548828, + "ce_orig": 0.8594422340393066, + "epoch": 0.6746710762815443, + "kl_loss": 0.20288127660751343, + "loss_ib": 0.0075987777672708035, + "step": 2346 + }, + { + "ce_ib": 5.449804306030273, + "ce_orig": 0.7872324585914612, + "epoch": 0.674958659860522, + "kl_loss": 0.3451184630393982, + "loss_ib": 0.008900988847017288, + "step": 2347 + }, + { + "ce_ib": 5.852057456970215, + "ce_orig": 1.1093541383743286, + "epoch": 0.674958659860522, + "kl_loss": 0.23821544647216797, + "loss_ib": 0.00823421124368906, + "step": 2347 + }, + { + "ce_ib": 4.197488784790039, + "ce_orig": 0.7132943272590637, + "epoch": 0.674958659860522, + "kl_loss": 0.23380252718925476, + "loss_ib": 0.0065355137921869755, + "step": 2347 + }, + { + "ce_ib": 2.707888126373291, + "ce_orig": 0.6055094599723816, + "epoch": 0.674958659860522, + "kl_loss": 0.10501334071159363, + "loss_ib": 0.0037580213975161314, + "step": 2347 + }, + { + "ce_ib": 4.545577526092529, + "ce_orig": 0.7026136517524719, + "epoch": 0.6752462434394996, + "kl_loss": 0.19579410552978516, + "loss_ib": 0.006503518670797348, + "step": 2348 + }, + { + "ce_ib": 5.610827445983887, + "ce_orig": 0.8758822679519653, + "epoch": 0.6752462434394996, + "kl_loss": 0.2484227567911148, + "loss_ib": 0.008095054887235165, + "step": 2348 + }, + { + "ce_ib": 3.9573872089385986, + "ce_orig": 0.6290544271469116, + "epoch": 0.6752462434394996, + "kl_loss": 0.19353343546390533, + "loss_ib": 0.0058927214704453945, + "step": 2348 + }, + { + "ce_ib": 8.327220916748047, + "ce_orig": 1.6419728994369507, + "epoch": 0.6752462434394996, + "kl_loss": 0.24164289236068726, + "loss_ib": 0.010743649676442146, + "step": 2348 + }, + { + "ce_ib": 4.906221389770508, + "ce_orig": 0.6636301279067993, + "epoch": 0.6755338270184772, + "kl_loss": 0.22329646348953247, + "loss_ib": 0.007139185443520546, + "step": 2349 + }, + { + "ce_ib": 5.560032367706299, + "ce_orig": 1.220058798789978, + "epoch": 0.6755338270184772, + "kl_loss": 0.21335220336914062, + "loss_ib": 0.007693554740399122, + "step": 2349 + }, + { + "ce_ib": 4.252519130706787, + "ce_orig": 0.810975968837738, + "epoch": 0.6755338270184772, + "kl_loss": 0.12749168276786804, + "loss_ib": 0.005527435801923275, + "step": 2349 + }, + { + "ce_ib": 9.798966407775879, + "ce_orig": 2.0772080421447754, + "epoch": 0.6755338270184772, + "kl_loss": 0.2154698222875595, + "loss_ib": 0.011953664943575859, + "step": 2349 + }, + { + "epoch": 0.6758214105974549, + "grad_norm": 0.13566534221172333, + "learning_rate": 9.03960390143581e-06, + "loss": 0.9287, + "step": 2350 + }, + { + "ce_ib": 2.7756145000457764, + "ce_orig": 0.6627106070518494, + "epoch": 0.6758214105974549, + "kl_loss": 0.22529399394989014, + "loss_ib": 0.005028554238379002, + "step": 2350 + }, + { + "ce_ib": 3.85699725151062, + "ce_orig": 0.6137217283248901, + "epoch": 0.6758214105974549, + "kl_loss": 0.14741161465644836, + "loss_ib": 0.005331113003194332, + "step": 2350 + }, + { + "ce_ib": 0.9724986553192139, + "ce_orig": 0.18795469403266907, + "epoch": 0.6758214105974549, + "kl_loss": 0.3977949619293213, + "loss_ib": 0.0049504479393363, + "step": 2350 + }, + { + "ce_ib": 3.003432273864746, + "ce_orig": 0.7800980806350708, + "epoch": 0.6758214105974549, + "kl_loss": 0.1825912594795227, + "loss_ib": 0.004829344805330038, + "step": 2350 + }, + { + "ce_ib": 3.924611806869507, + "ce_orig": 0.6622674465179443, + "epoch": 0.6761089941764326, + "kl_loss": 0.2910749614238739, + "loss_ib": 0.0068353614769876, + "step": 2351 + }, + { + "ce_ib": 3.5590686798095703, + "ce_orig": 0.45562946796417236, + "epoch": 0.6761089941764326, + "kl_loss": 0.3571373224258423, + "loss_ib": 0.0071304417215287685, + "step": 2351 + }, + { + "ce_ib": 4.754192352294922, + "ce_orig": 0.6212707161903381, + "epoch": 0.6761089941764326, + "kl_loss": 0.18337634205818176, + "loss_ib": 0.0065879556350409985, + "step": 2351 + }, + { + "ce_ib": 4.584872722625732, + "ce_orig": 0.8036566972732544, + "epoch": 0.6761089941764326, + "kl_loss": 0.18803319334983826, + "loss_ib": 0.006465204991400242, + "step": 2351 + }, + { + "ce_ib": 2.5493974685668945, + "ce_orig": 0.3258399963378906, + "epoch": 0.6763965777554102, + "kl_loss": 0.17739419639110565, + "loss_ib": 0.004323339555412531, + "step": 2352 + }, + { + "ce_ib": 1.6236635446548462, + "ce_orig": 0.2723792791366577, + "epoch": 0.6763965777554102, + "kl_loss": 0.44364506006240845, + "loss_ib": 0.006060113664716482, + "step": 2352 + }, + { + "ce_ib": 4.549102306365967, + "ce_orig": 1.0282446146011353, + "epoch": 0.6763965777554102, + "kl_loss": 0.2998218238353729, + "loss_ib": 0.007547320332378149, + "step": 2352 + }, + { + "ce_ib": 5.592477321624756, + "ce_orig": 0.5212347507476807, + "epoch": 0.6763965777554102, + "kl_loss": 0.3289104700088501, + "loss_ib": 0.00888158194720745, + "step": 2352 + }, + { + "ce_ib": 2.3811488151550293, + "ce_orig": 0.5460391044616699, + "epoch": 0.6766841613343878, + "kl_loss": 0.13842087984085083, + "loss_ib": 0.003765357658267021, + "step": 2353 + }, + { + "ce_ib": 5.319685935974121, + "ce_orig": 1.3493181467056274, + "epoch": 0.6766841613343878, + "kl_loss": 0.6397020220756531, + "loss_ib": 0.011716706678271294, + "step": 2353 + }, + { + "ce_ib": 7.929182529449463, + "ce_orig": 1.3184224367141724, + "epoch": 0.6766841613343878, + "kl_loss": 0.1742190420627594, + "loss_ib": 0.009671373292803764, + "step": 2353 + }, + { + "ce_ib": 4.245944976806641, + "ce_orig": 0.8950180411338806, + "epoch": 0.6766841613343878, + "kl_loss": 0.18292689323425293, + "loss_ib": 0.0060752141289412975, + "step": 2353 + }, + { + "ce_ib": 7.6407694816589355, + "ce_orig": 1.476797342300415, + "epoch": 0.6769717449133654, + "kl_loss": 0.17630422115325928, + "loss_ib": 0.009403811767697334, + "step": 2354 + }, + { + "ce_ib": 6.825623512268066, + "ce_orig": 1.2747817039489746, + "epoch": 0.6769717449133654, + "kl_loss": 0.24619455635547638, + "loss_ib": 0.009287568740546703, + "step": 2354 + }, + { + "ce_ib": 3.7621774673461914, + "ce_orig": 0.5517895817756653, + "epoch": 0.6769717449133654, + "kl_loss": 0.5038815140724182, + "loss_ib": 0.008800992742180824, + "step": 2354 + }, + { + "ce_ib": 6.254488945007324, + "ce_orig": 1.4451842308044434, + "epoch": 0.6769717449133654, + "kl_loss": 0.35730934143066406, + "loss_ib": 0.00982758216559887, + "step": 2354 + }, + { + "epoch": 0.677259328492343, + "grad_norm": 0.15294215083122253, + "learning_rate": 9.035025636351453e-06, + "loss": 0.8364, + "step": 2355 + }, + { + "ce_ib": 5.420169353485107, + "ce_orig": 1.2019628286361694, + "epoch": 0.677259328492343, + "kl_loss": 0.2097071409225464, + "loss_ib": 0.007517240475863218, + "step": 2355 + }, + { + "ce_ib": 3.1420066356658936, + "ce_orig": 0.565796434879303, + "epoch": 0.677259328492343, + "kl_loss": 0.16950723528862, + "loss_ib": 0.004837078973650932, + "step": 2355 + }, + { + "ce_ib": 3.7191355228424072, + "ce_orig": 0.48417067527770996, + "epoch": 0.677259328492343, + "kl_loss": 0.23754984140396118, + "loss_ib": 0.006094633601605892, + "step": 2355 + }, + { + "ce_ib": 3.7623915672302246, + "ce_orig": 0.3713918924331665, + "epoch": 0.677259328492343, + "kl_loss": 0.28425341844558716, + "loss_ib": 0.006604925263673067, + "step": 2355 + }, + { + "ce_ib": 5.329808712005615, + "ce_orig": 0.696547269821167, + "epoch": 0.6775469120713207, + "kl_loss": 0.2687610387802124, + "loss_ib": 0.008017418906092644, + "step": 2356 + }, + { + "ce_ib": 5.137853145599365, + "ce_orig": 0.8662459850311279, + "epoch": 0.6775469120713207, + "kl_loss": 0.20760409533977509, + "loss_ib": 0.007213893812149763, + "step": 2356 + }, + { + "ce_ib": 6.586230754852295, + "ce_orig": 1.033025860786438, + "epoch": 0.6775469120713207, + "kl_loss": 0.24910518527030945, + "loss_ib": 0.00907728262245655, + "step": 2356 + }, + { + "ce_ib": 6.441699504852295, + "ce_orig": 0.9868768453598022, + "epoch": 0.6775469120713207, + "kl_loss": 0.3523910343647003, + "loss_ib": 0.009965610690414906, + "step": 2356 + }, + { + "ce_ib": 3.392749786376953, + "ce_orig": 0.689198911190033, + "epoch": 0.6778344956502984, + "kl_loss": 0.14096780121326447, + "loss_ib": 0.0048024277202785015, + "step": 2357 + }, + { + "ce_ib": 5.674539566040039, + "ce_orig": 0.6831174492835999, + "epoch": 0.6778344956502984, + "kl_loss": 0.30105888843536377, + "loss_ib": 0.008685128763318062, + "step": 2357 + }, + { + "ce_ib": 4.994870185852051, + "ce_orig": 0.6596392393112183, + "epoch": 0.6778344956502984, + "kl_loss": 0.16945010423660278, + "loss_ib": 0.006689370609819889, + "step": 2357 + }, + { + "ce_ib": 5.080516815185547, + "ce_orig": 1.3030409812927246, + "epoch": 0.6778344956502984, + "kl_loss": 0.23111680150032043, + "loss_ib": 0.007391685154289007, + "step": 2357 + }, + { + "ce_ib": 3.5136775970458984, + "ce_orig": 0.923321008682251, + "epoch": 0.678122079229276, + "kl_loss": 0.6449806094169617, + "loss_ib": 0.009963483549654484, + "step": 2358 + }, + { + "ce_ib": 4.806703090667725, + "ce_orig": 0.6261407136917114, + "epoch": 0.678122079229276, + "kl_loss": 0.2209281325340271, + "loss_ib": 0.0070159840397536755, + "step": 2358 + }, + { + "ce_ib": 3.1615469455718994, + "ce_orig": 1.0646497011184692, + "epoch": 0.678122079229276, + "kl_loss": 0.1739317625761032, + "loss_ib": 0.004900864325463772, + "step": 2358 + }, + { + "ce_ib": 4.176257133483887, + "ce_orig": 0.475572407245636, + "epoch": 0.678122079229276, + "kl_loss": 0.2626805901527405, + "loss_ib": 0.0068030632100999355, + "step": 2358 + }, + { + "ce_ib": 4.871155738830566, + "ce_orig": 0.5042467713356018, + "epoch": 0.6784096628082537, + "kl_loss": 0.270442932844162, + "loss_ib": 0.007575585041195154, + "step": 2359 + }, + { + "ce_ib": 1.4488774538040161, + "ce_orig": 0.24092383682727814, + "epoch": 0.6784096628082537, + "kl_loss": 0.39829567074775696, + "loss_ib": 0.005431834142655134, + "step": 2359 + }, + { + "ce_ib": 5.6718549728393555, + "ce_orig": 0.7820304036140442, + "epoch": 0.6784096628082537, + "kl_loss": 0.16890694200992584, + "loss_ib": 0.0073609245009720325, + "step": 2359 + }, + { + "ce_ib": 6.213348865509033, + "ce_orig": 1.114067792892456, + "epoch": 0.6784096628082537, + "kl_loss": 0.19319316744804382, + "loss_ib": 0.008145281113684177, + "step": 2359 + }, + { + "epoch": 0.6786972463872313, + "grad_norm": 0.12670515477657318, + "learning_rate": 9.03043764995379e-06, + "loss": 0.8637, + "step": 2360 + }, + { + "ce_ib": 6.427003860473633, + "ce_orig": 0.9043854475021362, + "epoch": 0.6786972463872313, + "kl_loss": 0.20379087328910828, + "loss_ib": 0.008464911952614784, + "step": 2360 + }, + { + "ce_ib": 5.410439968109131, + "ce_orig": 1.3197859525680542, + "epoch": 0.6786972463872313, + "kl_loss": 0.17676478624343872, + "loss_ib": 0.007178087718784809, + "step": 2360 + }, + { + "ce_ib": 4.42930793762207, + "ce_orig": 0.8722084164619446, + "epoch": 0.6786972463872313, + "kl_loss": 0.16509079933166504, + "loss_ib": 0.006080216262489557, + "step": 2360 + }, + { + "ce_ib": 6.546119213104248, + "ce_orig": 1.0400314331054688, + "epoch": 0.6786972463872313, + "kl_loss": 0.23384100198745728, + "loss_ib": 0.008884529583156109, + "step": 2360 + }, + { + "ce_ib": 2.8369908332824707, + "ce_orig": 0.8056364059448242, + "epoch": 0.6789848299662089, + "kl_loss": 0.16659578680992126, + "loss_ib": 0.004502948839217424, + "step": 2361 + }, + { + "ce_ib": 6.208215713500977, + "ce_orig": 0.8637713193893433, + "epoch": 0.6789848299662089, + "kl_loss": 0.31148862838745117, + "loss_ib": 0.009323102422058582, + "step": 2361 + }, + { + "ce_ib": 4.256682872772217, + "ce_orig": 0.6507083773612976, + "epoch": 0.6789848299662089, + "kl_loss": 0.18238262832164764, + "loss_ib": 0.006080509163439274, + "step": 2361 + }, + { + "ce_ib": 6.640824794769287, + "ce_orig": 1.16599440574646, + "epoch": 0.6789848299662089, + "kl_loss": 0.2389250099658966, + "loss_ib": 0.009030074812471867, + "step": 2361 + }, + { + "ce_ib": 2.219738721847534, + "ce_orig": 0.621587336063385, + "epoch": 0.6792724135451865, + "kl_loss": 0.11807604134082794, + "loss_ib": 0.0034004992339760065, + "step": 2362 + }, + { + "ce_ib": 4.101718425750732, + "ce_orig": 0.8874995112419128, + "epoch": 0.6792724135451865, + "kl_loss": 0.16040900349617004, + "loss_ib": 0.005705808289349079, + "step": 2362 + }, + { + "ce_ib": 5.192953109741211, + "ce_orig": 1.0671027898788452, + "epoch": 0.6792724135451865, + "kl_loss": 0.23526489734649658, + "loss_ib": 0.0075456020422279835, + "step": 2362 + }, + { + "ce_ib": 3.4906961917877197, + "ce_orig": 0.5743681788444519, + "epoch": 0.6792724135451865, + "kl_loss": 0.22730977833271027, + "loss_ib": 0.005763793829828501, + "step": 2362 + }, + { + "ce_ib": 6.633861064910889, + "ce_orig": 1.2964842319488525, + "epoch": 0.6795599971241643, + "kl_loss": 0.24438124895095825, + "loss_ib": 0.009077673777937889, + "step": 2363 + }, + { + "ce_ib": 4.804771423339844, + "ce_orig": 0.7338257431983948, + "epoch": 0.6795599971241643, + "kl_loss": 0.2660658061504364, + "loss_ib": 0.00746542913839221, + "step": 2363 + }, + { + "ce_ib": 5.0763139724731445, + "ce_orig": 0.4563937187194824, + "epoch": 0.6795599971241643, + "kl_loss": 0.6463937759399414, + "loss_ib": 0.011540251784026623, + "step": 2363 + }, + { + "ce_ib": 4.423039436340332, + "ce_orig": 0.9181050658226013, + "epoch": 0.6795599971241643, + "kl_loss": 0.11539775133132935, + "loss_ib": 0.0055770170874893665, + "step": 2363 + }, + { + "ce_ib": 3.090385913848877, + "ce_orig": 0.5898316502571106, + "epoch": 0.6798475807031419, + "kl_loss": 0.1657380759716034, + "loss_ib": 0.00474776653572917, + "step": 2364 + }, + { + "ce_ib": 3.0061793327331543, + "ce_orig": 0.7557374835014343, + "epoch": 0.6798475807031419, + "kl_loss": 0.1658133864402771, + "loss_ib": 0.004664313048124313, + "step": 2364 + }, + { + "ce_ib": 4.450294017791748, + "ce_orig": 0.8198217749595642, + "epoch": 0.6798475807031419, + "kl_loss": 0.14525921642780304, + "loss_ib": 0.005902886390686035, + "step": 2364 + }, + { + "ce_ib": 5.118402004241943, + "ce_orig": 1.1276792287826538, + "epoch": 0.6798475807031419, + "kl_loss": 0.17322368919849396, + "loss_ib": 0.006850638892501593, + "step": 2364 + }, + { + "epoch": 0.6801351642821195, + "grad_norm": 0.12413902580738068, + "learning_rate": 9.025839953296342e-06, + "loss": 0.8494, + "step": 2365 + }, + { + "ce_ib": 4.436099052429199, + "ce_orig": 0.5341281890869141, + "epoch": 0.6801351642821195, + "kl_loss": 0.32176828384399414, + "loss_ib": 0.007653782144188881, + "step": 2365 + }, + { + "ce_ib": 6.374281406402588, + "ce_orig": 1.040210247039795, + "epoch": 0.6801351642821195, + "kl_loss": 0.32304877042770386, + "loss_ib": 0.009604768827557564, + "step": 2365 + }, + { + "ce_ib": 2.873868703842163, + "ce_orig": 0.7397594451904297, + "epoch": 0.6801351642821195, + "kl_loss": 0.15672293305397034, + "loss_ib": 0.004441097844392061, + "step": 2365 + }, + { + "ce_ib": 5.143776893615723, + "ce_orig": 1.1027494668960571, + "epoch": 0.6801351642821195, + "kl_loss": 0.16566824913024902, + "loss_ib": 0.006800459697842598, + "step": 2365 + }, + { + "ce_ib": 6.05198335647583, + "ce_orig": 1.0332986116409302, + "epoch": 0.6804227478610971, + "kl_loss": 0.18929535150527954, + "loss_ib": 0.007944936864078045, + "step": 2366 + }, + { + "ce_ib": 4.431005954742432, + "ce_orig": 0.43965333700180054, + "epoch": 0.6804227478610971, + "kl_loss": 0.23078951239585876, + "loss_ib": 0.006738901138305664, + "step": 2366 + }, + { + "ce_ib": 2.475435256958008, + "ce_orig": 0.4659450650215149, + "epoch": 0.6804227478610971, + "kl_loss": 0.35076653957366943, + "loss_ib": 0.005983100272715092, + "step": 2366 + }, + { + "ce_ib": 5.519201278686523, + "ce_orig": 0.7240884304046631, + "epoch": 0.6804227478610971, + "kl_loss": 0.23449768126010895, + "loss_ib": 0.007864177227020264, + "step": 2366 + }, + { + "ce_ib": 6.131736755371094, + "ce_orig": 1.2049247026443481, + "epoch": 0.6807103314400748, + "kl_loss": 0.25966542959213257, + "loss_ib": 0.0087283905595541, + "step": 2367 + }, + { + "ce_ib": 3.6924593448638916, + "ce_orig": 0.6331778168678284, + "epoch": 0.6807103314400748, + "kl_loss": 0.3288051187992096, + "loss_ib": 0.0069805108942091465, + "step": 2367 + }, + { + "ce_ib": 5.510125637054443, + "ce_orig": 0.8159403800964355, + "epoch": 0.6807103314400748, + "kl_loss": 0.20541627705097198, + "loss_ib": 0.0075642880983650684, + "step": 2367 + }, + { + "ce_ib": 3.6148011684417725, + "ce_orig": 0.8443357944488525, + "epoch": 0.6807103314400748, + "kl_loss": 0.4258665144443512, + "loss_ib": 0.007873466238379478, + "step": 2367 + }, + { + "ce_ib": 2.2964365482330322, + "ce_orig": 0.46671342849731445, + "epoch": 0.6809979150190524, + "kl_loss": 0.1298447847366333, + "loss_ib": 0.0035948841832578182, + "step": 2368 + }, + { + "ce_ib": 2.711488962173462, + "ce_orig": 0.7087107300758362, + "epoch": 0.6809979150190524, + "kl_loss": 0.13659030199050903, + "loss_ib": 0.0040773916989564896, + "step": 2368 + }, + { + "ce_ib": 5.347286224365234, + "ce_orig": 1.1058367490768433, + "epoch": 0.6809979150190524, + "kl_loss": 0.17516417801380157, + "loss_ib": 0.007098928093910217, + "step": 2368 + }, + { + "ce_ib": 4.97794246673584, + "ce_orig": 0.8794678449630737, + "epoch": 0.6809979150190524, + "kl_loss": 0.20176264643669128, + "loss_ib": 0.0069955685175955296, + "step": 2368 + }, + { + "ce_ib": 7.590750217437744, + "ce_orig": 1.3139705657958984, + "epoch": 0.68128549859803, + "kl_loss": 0.16173359751701355, + "loss_ib": 0.009208086878061295, + "step": 2369 + }, + { + "ce_ib": 6.094817638397217, + "ce_orig": 1.4002524614334106, + "epoch": 0.68128549859803, + "kl_loss": 0.21228596568107605, + "loss_ib": 0.008217676542699337, + "step": 2369 + }, + { + "ce_ib": 5.396921157836914, + "ce_orig": 1.1210111379623413, + "epoch": 0.68128549859803, + "kl_loss": 0.26670876145362854, + "loss_ib": 0.008064008317887783, + "step": 2369 + }, + { + "ce_ib": 4.005306720733643, + "ce_orig": 1.0440188646316528, + "epoch": 0.68128549859803, + "kl_loss": 0.14161914587020874, + "loss_ib": 0.005421497859060764, + "step": 2369 + }, + { + "epoch": 0.6815730821770077, + "grad_norm": 0.13982900977134705, + "learning_rate": 9.02123255745603e-06, + "loss": 0.817, + "step": 2370 + }, + { + "ce_ib": 7.438904285430908, + "ce_orig": 1.1820238828659058, + "epoch": 0.6815730821770077, + "kl_loss": 0.2899150848388672, + "loss_ib": 0.010338055901229382, + "step": 2370 + }, + { + "ce_ib": 3.7752606868743896, + "ce_orig": 0.8388400673866272, + "epoch": 0.6815730821770077, + "kl_loss": 0.23778773844242096, + "loss_ib": 0.006153138354420662, + "step": 2370 + }, + { + "ce_ib": 5.824619293212891, + "ce_orig": 0.8916985988616943, + "epoch": 0.6815730821770077, + "kl_loss": 0.4634697735309601, + "loss_ib": 0.01045931689441204, + "step": 2370 + }, + { + "ce_ib": 6.1580376625061035, + "ce_orig": 1.1340430974960327, + "epoch": 0.6815730821770077, + "kl_loss": 0.1768806278705597, + "loss_ib": 0.00792684406042099, + "step": 2370 + }, + { + "ce_ib": 4.843919277191162, + "ce_orig": 1.1111186742782593, + "epoch": 0.6818606657559854, + "kl_loss": 0.2137346863746643, + "loss_ib": 0.00698126619681716, + "step": 2371 + }, + { + "ce_ib": 4.06892728805542, + "ce_orig": 0.9198371171951294, + "epoch": 0.6818606657559854, + "kl_loss": 0.16928580403327942, + "loss_ib": 0.0057617854326963425, + "step": 2371 + }, + { + "ce_ib": 4.192783355712891, + "ce_orig": 0.7015928030014038, + "epoch": 0.6818606657559854, + "kl_loss": 0.18972888588905334, + "loss_ib": 0.006090072449296713, + "step": 2371 + }, + { + "ce_ib": 1.4769037961959839, + "ce_orig": 0.362619012594223, + "epoch": 0.6818606657559854, + "kl_loss": 0.3306482136249542, + "loss_ib": 0.0047833858989179134, + "step": 2371 + }, + { + "ce_ib": 4.633091449737549, + "ce_orig": 0.777779221534729, + "epoch": 0.682148249334963, + "kl_loss": 0.24499820172786713, + "loss_ib": 0.007083073258399963, + "step": 2372 + }, + { + "ce_ib": 3.3289074897766113, + "ce_orig": 0.49277225136756897, + "epoch": 0.682148249334963, + "kl_loss": 0.23076125979423523, + "loss_ib": 0.005636520218104124, + "step": 2372 + }, + { + "ce_ib": 4.609645366668701, + "ce_orig": 0.6936049461364746, + "epoch": 0.682148249334963, + "kl_loss": 0.23711679875850677, + "loss_ib": 0.006980813108384609, + "step": 2372 + }, + { + "ce_ib": 2.0846705436706543, + "ce_orig": 0.3794306814670563, + "epoch": 0.682148249334963, + "kl_loss": 0.16076591610908508, + "loss_ib": 0.0036923293955624104, + "step": 2372 + }, + { + "ce_ib": 3.8989789485931396, + "ce_orig": 0.826328456401825, + "epoch": 0.6824358329139406, + "kl_loss": 0.2557678818702698, + "loss_ib": 0.006456657312810421, + "step": 2373 + }, + { + "ce_ib": 6.227227210998535, + "ce_orig": 1.0655980110168457, + "epoch": 0.6824358329139406, + "kl_loss": 0.23140808939933777, + "loss_ib": 0.008541308343410492, + "step": 2373 + }, + { + "ce_ib": 4.1658525466918945, + "ce_orig": 0.6457476019859314, + "epoch": 0.6824358329139406, + "kl_loss": 0.3586903214454651, + "loss_ib": 0.007752755656838417, + "step": 2373 + }, + { + "ce_ib": 3.9805781841278076, + "ce_orig": 0.5306757688522339, + "epoch": 0.6824358329139406, + "kl_loss": 0.19820117950439453, + "loss_ib": 0.005962589755654335, + "step": 2373 + }, + { + "ce_ib": 7.276369571685791, + "ce_orig": 1.2178153991699219, + "epoch": 0.6827234164929182, + "kl_loss": 0.23022830486297607, + "loss_ib": 0.009578652679920197, + "step": 2374 + }, + { + "ce_ib": 5.24856424331665, + "ce_orig": 1.0352643728256226, + "epoch": 0.6827234164929182, + "kl_loss": 0.19126391410827637, + "loss_ib": 0.007161203306168318, + "step": 2374 + }, + { + "ce_ib": 3.878305673599243, + "ce_orig": 0.6407814621925354, + "epoch": 0.6827234164929182, + "kl_loss": 0.1853662133216858, + "loss_ib": 0.005731967743486166, + "step": 2374 + }, + { + "ce_ib": 3.075993776321411, + "ce_orig": 0.5941479206085205, + "epoch": 0.6827234164929182, + "kl_loss": 0.14396926760673523, + "loss_ib": 0.00451568653807044, + "step": 2374 + }, + { + "epoch": 0.6830110000718959, + "grad_norm": 0.12975303828716278, + "learning_rate": 9.01661547353314e-06, + "loss": 0.8551, + "step": 2375 + }, + { + "ce_ib": 4.098069667816162, + "ce_orig": 0.9359715580940247, + "epoch": 0.6830110000718959, + "kl_loss": 0.2209508717060089, + "loss_ib": 0.0063075777143239975, + "step": 2375 + }, + { + "ce_ib": 7.783524036407471, + "ce_orig": 1.7888940572738647, + "epoch": 0.6830110000718959, + "kl_loss": 0.19450144469738007, + "loss_ib": 0.009728538803756237, + "step": 2375 + }, + { + "ce_ib": 4.3932414054870605, + "ce_orig": 0.4576880931854248, + "epoch": 0.6830110000718959, + "kl_loss": 0.2907083332538605, + "loss_ib": 0.007300324272364378, + "step": 2375 + }, + { + "ce_ib": 1.653120517730713, + "ce_orig": 0.25907251238822937, + "epoch": 0.6830110000718959, + "kl_loss": 0.1716480702161789, + "loss_ib": 0.003369601210579276, + "step": 2375 + }, + { + "ce_ib": 2.1150875091552734, + "ce_orig": 0.5020933747291565, + "epoch": 0.6832985836508735, + "kl_loss": 0.15428447723388672, + "loss_ib": 0.003657932160422206, + "step": 2376 + }, + { + "ce_ib": 6.963423728942871, + "ce_orig": 1.1670407056808472, + "epoch": 0.6832985836508735, + "kl_loss": 0.18724414706230164, + "loss_ib": 0.008835865184664726, + "step": 2376 + }, + { + "ce_ib": 2.80057430267334, + "ce_orig": 0.5362765789031982, + "epoch": 0.6832985836508735, + "kl_loss": 0.13857564330101013, + "loss_ib": 0.004186330828815699, + "step": 2376 + }, + { + "ce_ib": 3.9137444496154785, + "ce_orig": 1.145675778388977, + "epoch": 0.6832985836508735, + "kl_loss": 0.16857630014419556, + "loss_ib": 0.005599507130682468, + "step": 2376 + }, + { + "ce_ib": 4.37617826461792, + "ce_orig": 0.4916369318962097, + "epoch": 0.6835861672298512, + "kl_loss": 0.20442155003547668, + "loss_ib": 0.006420393940061331, + "step": 2377 + }, + { + "ce_ib": 6.336913585662842, + "ce_orig": 1.4445587396621704, + "epoch": 0.6835861672298512, + "kl_loss": 0.19390463829040527, + "loss_ib": 0.008275959640741348, + "step": 2377 + }, + { + "ce_ib": 5.270610332489014, + "ce_orig": 1.015923023223877, + "epoch": 0.6835861672298512, + "kl_loss": 0.1694609820842743, + "loss_ib": 0.006965219974517822, + "step": 2377 + }, + { + "ce_ib": 3.61518931388855, + "ce_orig": 0.5621278285980225, + "epoch": 0.6835861672298512, + "kl_loss": 0.17705115675926208, + "loss_ib": 0.0053857010789215565, + "step": 2377 + }, + { + "ce_ib": 5.525913715362549, + "ce_orig": 0.8908613920211792, + "epoch": 0.6838737508088288, + "kl_loss": 0.2557414770126343, + "loss_ib": 0.008083328604698181, + "step": 2378 + }, + { + "ce_ib": 3.3756632804870605, + "ce_orig": 0.5262348055839539, + "epoch": 0.6838737508088288, + "kl_loss": 0.18069535493850708, + "loss_ib": 0.005182616412639618, + "step": 2378 + }, + { + "ce_ib": 0.6069609522819519, + "ce_orig": 0.11017680168151855, + "epoch": 0.6838737508088288, + "kl_loss": 0.36451786756515503, + "loss_ib": 0.00425213947892189, + "step": 2378 + }, + { + "ce_ib": 3.892404079437256, + "ce_orig": 0.6378296613693237, + "epoch": 0.6838737508088288, + "kl_loss": 0.201584592461586, + "loss_ib": 0.005908249877393246, + "step": 2378 + }, + { + "ce_ib": 7.031460285186768, + "ce_orig": 1.7615665197372437, + "epoch": 0.6841613343878065, + "kl_loss": 0.18355390429496765, + "loss_ib": 0.008866999298334122, + "step": 2379 + }, + { + "ce_ib": 4.738483905792236, + "ce_orig": 0.7730192542076111, + "epoch": 0.6841613343878065, + "kl_loss": 0.19678109884262085, + "loss_ib": 0.006706295069307089, + "step": 2379 + }, + { + "ce_ib": 2.0314853191375732, + "ce_orig": 0.31440311670303345, + "epoch": 0.6841613343878065, + "kl_loss": 0.47268036007881165, + "loss_ib": 0.0067582884803414345, + "step": 2379 + }, + { + "ce_ib": 3.6376583576202393, + "ce_orig": 0.7319126129150391, + "epoch": 0.6841613343878065, + "kl_loss": 0.19824498891830444, + "loss_ib": 0.005620107986032963, + "step": 2379 + }, + { + "epoch": 0.6844489179667841, + "grad_norm": 0.1280554234981537, + "learning_rate": 9.011988712651295e-06, + "loss": 0.7737, + "step": 2380 + }, + { + "ce_ib": 3.079505443572998, + "ce_orig": 0.5528362989425659, + "epoch": 0.6844489179667841, + "kl_loss": 0.17561742663383484, + "loss_ib": 0.0048356796614825726, + "step": 2380 + }, + { + "ce_ib": 3.236459255218506, + "ce_orig": 0.4977704882621765, + "epoch": 0.6844489179667841, + "kl_loss": 0.20061321556568146, + "loss_ib": 0.005242591258138418, + "step": 2380 + }, + { + "ce_ib": 4.035147190093994, + "ce_orig": 0.5336639285087585, + "epoch": 0.6844489179667841, + "kl_loss": 0.44036218523979187, + "loss_ib": 0.008438768796622753, + "step": 2380 + }, + { + "ce_ib": 2.822251796722412, + "ce_orig": 0.7800899744033813, + "epoch": 0.6844489179667841, + "kl_loss": 0.14072850346565247, + "loss_ib": 0.00422953674569726, + "step": 2380 + }, + { + "ce_ib": 6.740298271179199, + "ce_orig": 1.0656028985977173, + "epoch": 0.6847365015457617, + "kl_loss": 0.22949329018592834, + "loss_ib": 0.009035230614244938, + "step": 2381 + }, + { + "ce_ib": 7.143641471862793, + "ce_orig": 1.686445951461792, + "epoch": 0.6847365015457617, + "kl_loss": 0.24999219179153442, + "loss_ib": 0.009643563069403172, + "step": 2381 + }, + { + "ce_ib": 3.179046869277954, + "ce_orig": 0.64617520570755, + "epoch": 0.6847365015457617, + "kl_loss": 0.1780432015657425, + "loss_ib": 0.004959478974342346, + "step": 2381 + }, + { + "ce_ib": 3.668739080429077, + "ce_orig": 0.7885252237319946, + "epoch": 0.6847365015457617, + "kl_loss": 0.13506971299648285, + "loss_ib": 0.005019436590373516, + "step": 2381 + }, + { + "ce_ib": 6.150866508483887, + "ce_orig": 1.3070327043533325, + "epoch": 0.6850240851247393, + "kl_loss": 0.16907364130020142, + "loss_ib": 0.007841602899134159, + "step": 2382 + }, + { + "ce_ib": 3.3803164958953857, + "ce_orig": 0.6914639472961426, + "epoch": 0.6850240851247393, + "kl_loss": 0.18524609506130219, + "loss_ib": 0.005232777446508408, + "step": 2382 + }, + { + "ce_ib": 3.8572239875793457, + "ce_orig": 0.7191105484962463, + "epoch": 0.6850240851247393, + "kl_loss": 0.16937828063964844, + "loss_ib": 0.005551006644964218, + "step": 2382 + }, + { + "ce_ib": 5.849963665008545, + "ce_orig": 1.3807730674743652, + "epoch": 0.6850240851247393, + "kl_loss": 0.25966402888298035, + "loss_ib": 0.008446604013442993, + "step": 2382 + }, + { + "ce_ib": 4.747272968292236, + "ce_orig": 0.8425436019897461, + "epoch": 0.6853116687037171, + "kl_loss": 0.22707083821296692, + "loss_ib": 0.007017981261014938, + "step": 2383 + }, + { + "ce_ib": 6.236233234405518, + "ce_orig": 1.204581379890442, + "epoch": 0.6853116687037171, + "kl_loss": 0.2563554644584656, + "loss_ib": 0.008799787610769272, + "step": 2383 + }, + { + "ce_ib": 5.137873649597168, + "ce_orig": 0.9157485961914062, + "epoch": 0.6853116687037171, + "kl_loss": 0.22456423938274384, + "loss_ib": 0.0073835160583257675, + "step": 2383 + }, + { + "ce_ib": 6.138115882873535, + "ce_orig": 1.1767443418502808, + "epoch": 0.6853116687037171, + "kl_loss": 0.1781543493270874, + "loss_ib": 0.007919659838080406, + "step": 2383 + }, + { + "ce_ib": 3.9803855419158936, + "ce_orig": 0.7228215336799622, + "epoch": 0.6855992522826947, + "kl_loss": 0.2518206536769867, + "loss_ib": 0.006498591974377632, + "step": 2384 + }, + { + "ce_ib": 5.565451622009277, + "ce_orig": 1.1306179761886597, + "epoch": 0.6855992522826947, + "kl_loss": 0.2570563852787018, + "loss_ib": 0.008136016316711903, + "step": 2384 + }, + { + "ce_ib": 4.5619425773620605, + "ce_orig": 0.7895923852920532, + "epoch": 0.6855992522826947, + "kl_loss": 0.2366015762090683, + "loss_ib": 0.006927957758307457, + "step": 2384 + }, + { + "ce_ib": 4.499630451202393, + "ce_orig": 0.5701790452003479, + "epoch": 0.6855992522826947, + "kl_loss": 0.6754400730133057, + "loss_ib": 0.011254031211137772, + "step": 2384 + }, + { + "epoch": 0.6858868358616723, + "grad_norm": 0.1150699332356453, + "learning_rate": 9.00735228595744e-06, + "loss": 0.8738, + "step": 2385 + }, + { + "ce_ib": 1.6136904954910278, + "ce_orig": 0.38070446252822876, + "epoch": 0.6858868358616723, + "kl_loss": 0.15536415576934814, + "loss_ib": 0.003167332150042057, + "step": 2385 + }, + { + "ce_ib": 2.848937749862671, + "ce_orig": 0.4383619427680969, + "epoch": 0.6858868358616723, + "kl_loss": 0.21270304918289185, + "loss_ib": 0.004975968040525913, + "step": 2385 + }, + { + "ce_ib": 3.05055832862854, + "ce_orig": 0.3636588752269745, + "epoch": 0.6858868358616723, + "kl_loss": 0.23941145837306976, + "loss_ib": 0.005444672424346209, + "step": 2385 + }, + { + "ce_ib": 5.691380500793457, + "ce_orig": 0.9744901061058044, + "epoch": 0.6858868358616723, + "kl_loss": 0.15087062120437622, + "loss_ib": 0.007200086489319801, + "step": 2385 + }, + { + "ce_ib": 4.286596775054932, + "ce_orig": 1.0040082931518555, + "epoch": 0.6861744194406499, + "kl_loss": 0.20548425614833832, + "loss_ib": 0.006341439206153154, + "step": 2386 + }, + { + "ce_ib": 4.422677516937256, + "ce_orig": 0.9499791860580444, + "epoch": 0.6861744194406499, + "kl_loss": 0.20669901371002197, + "loss_ib": 0.006489667575806379, + "step": 2386 + }, + { + "ce_ib": 3.1653730869293213, + "ce_orig": 0.5927715301513672, + "epoch": 0.6861744194406499, + "kl_loss": 0.1456114500761032, + "loss_ib": 0.0046214875765144825, + "step": 2386 + }, + { + "ce_ib": 7.665916919708252, + "ce_orig": 1.0653904676437378, + "epoch": 0.6861744194406499, + "kl_loss": 0.3232917785644531, + "loss_ib": 0.010898835025727749, + "step": 2386 + }, + { + "ce_ib": 4.5224103927612305, + "ce_orig": 0.9202977418899536, + "epoch": 0.6864620030196276, + "kl_loss": 0.20480769872665405, + "loss_ib": 0.006570486817508936, + "step": 2387 + }, + { + "ce_ib": 3.985074520111084, + "ce_orig": 0.643227219581604, + "epoch": 0.6864620030196276, + "kl_loss": 0.18821227550506592, + "loss_ib": 0.005867197178304195, + "step": 2387 + }, + { + "ce_ib": 5.693385601043701, + "ce_orig": 0.7097618579864502, + "epoch": 0.6864620030196276, + "kl_loss": 0.20515014231204987, + "loss_ib": 0.007744886912405491, + "step": 2387 + }, + { + "ce_ib": 4.017644882202148, + "ce_orig": 0.6900057792663574, + "epoch": 0.6864620030196276, + "kl_loss": 0.25589680671691895, + "loss_ib": 0.0065766130574047565, + "step": 2387 + }, + { + "ce_ib": 4.187311172485352, + "ce_orig": 0.7181167006492615, + "epoch": 0.6867495865986052, + "kl_loss": 0.1706588864326477, + "loss_ib": 0.00589390005916357, + "step": 2388 + }, + { + "ce_ib": 7.511682033538818, + "ce_orig": 1.6211843490600586, + "epoch": 0.6867495865986052, + "kl_loss": 0.24460217356681824, + "loss_ib": 0.009957702830433846, + "step": 2388 + }, + { + "ce_ib": 4.977305889129639, + "ce_orig": 0.9272069931030273, + "epoch": 0.6867495865986052, + "kl_loss": 0.14356006681919098, + "loss_ib": 0.00641290657222271, + "step": 2388 + }, + { + "ce_ib": 4.552124500274658, + "ce_orig": 0.6412807703018188, + "epoch": 0.6867495865986052, + "kl_loss": 0.1931004524230957, + "loss_ib": 0.006483129225671291, + "step": 2388 + }, + { + "ce_ib": 4.915032863616943, + "ce_orig": 1.1063121557235718, + "epoch": 0.6870371701775828, + "kl_loss": 0.16841772198677063, + "loss_ib": 0.006599210202693939, + "step": 2389 + }, + { + "ce_ib": 3.4152989387512207, + "ce_orig": 0.6192360520362854, + "epoch": 0.6870371701775828, + "kl_loss": 0.15919390320777893, + "loss_ib": 0.0050072381272912025, + "step": 2389 + }, + { + "ce_ib": 7.184045791625977, + "ce_orig": 1.2253427505493164, + "epoch": 0.6870371701775828, + "kl_loss": 0.17397835850715637, + "loss_ib": 0.008923829533159733, + "step": 2389 + }, + { + "ce_ib": 5.5855393409729, + "ce_orig": 0.8925540447235107, + "epoch": 0.6870371701775828, + "kl_loss": 0.23730136454105377, + "loss_ib": 0.007958552800118923, + "step": 2389 + }, + { + "epoch": 0.6873247537565605, + "grad_norm": 0.12830393016338348, + "learning_rate": 9.002706204621802e-06, + "loss": 0.8186, + "step": 2390 + }, + { + "ce_ib": 5.027583122253418, + "ce_orig": 0.8946557641029358, + "epoch": 0.6873247537565605, + "kl_loss": 0.16012822091579437, + "loss_ib": 0.006628865376114845, + "step": 2390 + }, + { + "ce_ib": 7.448464870452881, + "ce_orig": 1.44466233253479, + "epoch": 0.6873247537565605, + "kl_loss": 0.27910733222961426, + "loss_ib": 0.010239538736641407, + "step": 2390 + }, + { + "ce_ib": 3.1236283779144287, + "ce_orig": 0.671385645866394, + "epoch": 0.6873247537565605, + "kl_loss": 0.15631049871444702, + "loss_ib": 0.004686733242124319, + "step": 2390 + }, + { + "ce_ib": 5.633164405822754, + "ce_orig": 0.6819784641265869, + "epoch": 0.6873247537565605, + "kl_loss": 0.21376991271972656, + "loss_ib": 0.007770863827317953, + "step": 2390 + }, + { + "ce_ib": 4.821913242340088, + "ce_orig": 0.6051445603370667, + "epoch": 0.6876123373355382, + "kl_loss": 0.3266524076461792, + "loss_ib": 0.008088436909019947, + "step": 2391 + }, + { + "ce_ib": 4.945763111114502, + "ce_orig": 0.8836590051651001, + "epoch": 0.6876123373355382, + "kl_loss": 0.16861730813980103, + "loss_ib": 0.006631935480982065, + "step": 2391 + }, + { + "ce_ib": 3.4438564777374268, + "ce_orig": 0.7033485174179077, + "epoch": 0.6876123373355382, + "kl_loss": 0.270365446805954, + "loss_ib": 0.006147510837763548, + "step": 2391 + }, + { + "ce_ib": 3.4915878772735596, + "ce_orig": 0.6463858485221863, + "epoch": 0.6876123373355382, + "kl_loss": 0.18920986354351044, + "loss_ib": 0.005383686162531376, + "step": 2391 + }, + { + "ce_ib": 2.2939703464508057, + "ce_orig": 0.5179139971733093, + "epoch": 0.6878999209145158, + "kl_loss": 0.1924765706062317, + "loss_ib": 0.004218736197799444, + "step": 2392 + }, + { + "ce_ib": 5.7430949211120605, + "ce_orig": 0.7452789545059204, + "epoch": 0.6878999209145158, + "kl_loss": 0.2681679129600525, + "loss_ib": 0.008424773812294006, + "step": 2392 + }, + { + "ce_ib": 9.0850191116333, + "ce_orig": 0.7892183065414429, + "epoch": 0.6878999209145158, + "kl_loss": 0.1594020128250122, + "loss_ib": 0.010679040104150772, + "step": 2392 + }, + { + "ce_ib": 2.9276812076568604, + "ce_orig": 0.8117877840995789, + "epoch": 0.6878999209145158, + "kl_loss": 0.18995533883571625, + "loss_ib": 0.004827234428375959, + "step": 2392 + }, + { + "ce_ib": 4.03348970413208, + "ce_orig": 0.520341157913208, + "epoch": 0.6881875044934934, + "kl_loss": 0.27789855003356934, + "loss_ib": 0.0068124751560389996, + "step": 2393 + }, + { + "ce_ib": 6.184271812438965, + "ce_orig": 1.0774855613708496, + "epoch": 0.6881875044934934, + "kl_loss": 0.22177600860595703, + "loss_ib": 0.008402031846344471, + "step": 2393 + }, + { + "ce_ib": 3.8255410194396973, + "ce_orig": 0.6043673753738403, + "epoch": 0.6881875044934934, + "kl_loss": 0.16857953369617462, + "loss_ib": 0.005511336028575897, + "step": 2393 + }, + { + "ce_ib": 3.214205503463745, + "ce_orig": 0.7789235711097717, + "epoch": 0.6881875044934934, + "kl_loss": 0.17179355025291443, + "loss_ib": 0.0049321409314870834, + "step": 2393 + }, + { + "ce_ib": 2.381571054458618, + "ce_orig": 0.40216943621635437, + "epoch": 0.688475088072471, + "kl_loss": 0.23219895362854004, + "loss_ib": 0.0047035603784024715, + "step": 2394 + }, + { + "ce_ib": 4.6842827796936035, + "ce_orig": 1.169750690460205, + "epoch": 0.688475088072471, + "kl_loss": 0.1751519739627838, + "loss_ib": 0.006435802206397057, + "step": 2394 + }, + { + "ce_ib": 4.01504373550415, + "ce_orig": 0.6808378100395203, + "epoch": 0.688475088072471, + "kl_loss": 0.1845950186252594, + "loss_ib": 0.005860993638634682, + "step": 2394 + }, + { + "ce_ib": 5.88308572769165, + "ce_orig": 0.9513795375823975, + "epoch": 0.688475088072471, + "kl_loss": 0.20837947726249695, + "loss_ib": 0.007966880686581135, + "step": 2394 + }, + { + "epoch": 0.6887626716514487, + "grad_norm": 0.15225575864315033, + "learning_rate": 8.99805047983787e-06, + "loss": 0.865, + "step": 2395 + }, + { + "ce_ib": 2.413329601287842, + "ce_orig": 0.731285810470581, + "epoch": 0.6887626716514487, + "kl_loss": 0.12271420657634735, + "loss_ib": 0.003640471724793315, + "step": 2395 + }, + { + "ce_ib": 5.5345563888549805, + "ce_orig": 0.6089126467704773, + "epoch": 0.6887626716514487, + "kl_loss": 0.36456912755966187, + "loss_ib": 0.009180247783660889, + "step": 2395 + }, + { + "ce_ib": 4.461871147155762, + "ce_orig": 0.8785419464111328, + "epoch": 0.6887626716514487, + "kl_loss": 0.2888180613517761, + "loss_ib": 0.007350051309913397, + "step": 2395 + }, + { + "ce_ib": 4.436915874481201, + "ce_orig": 0.7861196398735046, + "epoch": 0.6887626716514487, + "kl_loss": 0.2211454212665558, + "loss_ib": 0.006648370064795017, + "step": 2395 + }, + { + "ce_ib": 4.408246994018555, + "ce_orig": 1.1523668766021729, + "epoch": 0.6890502552304263, + "kl_loss": 0.1841999590396881, + "loss_ib": 0.006250246427953243, + "step": 2396 + }, + { + "ce_ib": 6.049507141113281, + "ce_orig": 1.203579306602478, + "epoch": 0.6890502552304263, + "kl_loss": 0.23133550584316254, + "loss_ib": 0.008362862281501293, + "step": 2396 + }, + { + "ce_ib": 4.558348655700684, + "ce_orig": 0.7876361012458801, + "epoch": 0.6890502552304263, + "kl_loss": 0.18433716893196106, + "loss_ib": 0.006401719991117716, + "step": 2396 + }, + { + "ce_ib": 6.432979106903076, + "ce_orig": 1.3744632005691528, + "epoch": 0.6890502552304263, + "kl_loss": 0.1743810772895813, + "loss_ib": 0.008176789619028568, + "step": 2396 + }, + { + "ce_ib": 3.554727792739868, + "ce_orig": 0.7899414300918579, + "epoch": 0.689337838809404, + "kl_loss": 0.24588340520858765, + "loss_ib": 0.0060135615058243275, + "step": 2397 + }, + { + "ce_ib": 4.015282154083252, + "ce_orig": 0.9327799081802368, + "epoch": 0.689337838809404, + "kl_loss": 0.22539189457893372, + "loss_ib": 0.0062692007049918175, + "step": 2397 + }, + { + "ce_ib": 3.4173712730407715, + "ce_orig": 0.7210127115249634, + "epoch": 0.689337838809404, + "kl_loss": 0.13540691137313843, + "loss_ib": 0.004771440289914608, + "step": 2397 + }, + { + "ce_ib": 4.951717853546143, + "ce_orig": 0.7128760814666748, + "epoch": 0.689337838809404, + "kl_loss": 0.2813519537448883, + "loss_ib": 0.007765236776322126, + "step": 2397 + }, + { + "ce_ib": 5.532104015350342, + "ce_orig": 0.8527866005897522, + "epoch": 0.6896254223883816, + "kl_loss": 0.20440876483917236, + "loss_ib": 0.007576191797852516, + "step": 2398 + }, + { + "ce_ib": 3.2207984924316406, + "ce_orig": 0.7405092120170593, + "epoch": 0.6896254223883816, + "kl_loss": 0.2712554931640625, + "loss_ib": 0.005933353677392006, + "step": 2398 + }, + { + "ce_ib": 6.153557300567627, + "ce_orig": 1.3490718603134155, + "epoch": 0.6896254223883816, + "kl_loss": 0.22948600351810455, + "loss_ib": 0.00844841729849577, + "step": 2398 + }, + { + "ce_ib": 5.060577869415283, + "ce_orig": 1.0982760190963745, + "epoch": 0.6896254223883816, + "kl_loss": 0.14024147391319275, + "loss_ib": 0.0064629921689629555, + "step": 2398 + }, + { + "ce_ib": 6.580198764801025, + "ce_orig": 1.3842320442199707, + "epoch": 0.6899130059673593, + "kl_loss": 0.23815420269966125, + "loss_ib": 0.008961740881204605, + "step": 2399 + }, + { + "ce_ib": 5.279874324798584, + "ce_orig": 0.9357604384422302, + "epoch": 0.6899130059673593, + "kl_loss": 0.24284739792346954, + "loss_ib": 0.0077083478681743145, + "step": 2399 + }, + { + "ce_ib": 5.588033676147461, + "ce_orig": 0.7285202741622925, + "epoch": 0.6899130059673593, + "kl_loss": 0.21420125663280487, + "loss_ib": 0.007730046287178993, + "step": 2399 + }, + { + "ce_ib": 4.362695217132568, + "ce_orig": 0.6814695000648499, + "epoch": 0.6899130059673593, + "kl_loss": 0.2684081196784973, + "loss_ib": 0.007046776358038187, + "step": 2399 + }, + { + "epoch": 0.6902005895463369, + "grad_norm": 0.11391985416412354, + "learning_rate": 8.993385122822364e-06, + "loss": 0.8726, + "step": 2400 + }, + { + "ce_ib": 5.628213405609131, + "ce_orig": 1.0916125774383545, + "epoch": 0.6902005895463369, + "kl_loss": 0.30950677394866943, + "loss_ib": 0.008723280392587185, + "step": 2400 + }, + { + "ce_ib": 3.2452595233917236, + "ce_orig": 0.7010137438774109, + "epoch": 0.6902005895463369, + "kl_loss": 0.14687174558639526, + "loss_ib": 0.004713976755738258, + "step": 2400 + }, + { + "ce_ib": 3.8985743522644043, + "ce_orig": 0.7211989164352417, + "epoch": 0.6902005895463369, + "kl_loss": 0.27580884099006653, + "loss_ib": 0.0066566625609993935, + "step": 2400 + }, + { + "ce_ib": 6.110245227813721, + "ce_orig": 1.2514992952346802, + "epoch": 0.6902005895463369, + "kl_loss": 0.21933753788471222, + "loss_ib": 0.008303620852530003, + "step": 2400 + }, + { + "ce_ib": 3.173556327819824, + "ce_orig": 0.5070919990539551, + "epoch": 0.6904881731253145, + "kl_loss": 0.2262185513973236, + "loss_ib": 0.005435741972178221, + "step": 2401 + }, + { + "ce_ib": 5.275131702423096, + "ce_orig": 1.0548036098480225, + "epoch": 0.6904881731253145, + "kl_loss": 0.16304481029510498, + "loss_ib": 0.006905579939484596, + "step": 2401 + }, + { + "ce_ib": 2.803535223007202, + "ce_orig": 0.6576704382896423, + "epoch": 0.6904881731253145, + "kl_loss": 0.19854572415351868, + "loss_ib": 0.004788992460817099, + "step": 2401 + }, + { + "ce_ib": 4.430517196655273, + "ce_orig": 1.0183892250061035, + "epoch": 0.6904881731253145, + "kl_loss": 0.17105215787887573, + "loss_ib": 0.006141039077192545, + "step": 2401 + }, + { + "ce_ib": 3.71323823928833, + "ce_orig": 0.6197255253791809, + "epoch": 0.6907757567042921, + "kl_loss": 0.16805288195610046, + "loss_ib": 0.005393767263740301, + "step": 2402 + }, + { + "ce_ib": 3.595470666885376, + "ce_orig": 0.737883985042572, + "epoch": 0.6907757567042921, + "kl_loss": 0.13908977806568146, + "loss_ib": 0.004986368119716644, + "step": 2402 + }, + { + "ce_ib": 5.538041591644287, + "ce_orig": 1.42640221118927, + "epoch": 0.6907757567042921, + "kl_loss": 0.18409159779548645, + "loss_ib": 0.007378957234323025, + "step": 2402 + }, + { + "ce_ib": 2.952552318572998, + "ce_orig": 0.5811508893966675, + "epoch": 0.6907757567042921, + "kl_loss": 0.14636904001235962, + "loss_ib": 0.004416242707520723, + "step": 2402 + }, + { + "ce_ib": 3.4382336139678955, + "ce_orig": 0.8199602961540222, + "epoch": 0.6910633402832699, + "kl_loss": 0.27610012888908386, + "loss_ib": 0.006199234630912542, + "step": 2403 + }, + { + "ce_ib": 7.643496513366699, + "ce_orig": 1.495052695274353, + "epoch": 0.6910633402832699, + "kl_loss": 0.28434306383132935, + "loss_ib": 0.010486926883459091, + "step": 2403 + }, + { + "ce_ib": 2.803825855255127, + "ce_orig": 0.6761776804924011, + "epoch": 0.6910633402832699, + "kl_loss": 0.1861177682876587, + "loss_ib": 0.004665003623813391, + "step": 2403 + }, + { + "ce_ib": 5.057126998901367, + "ce_orig": 0.6593854427337646, + "epoch": 0.6910633402832699, + "kl_loss": 0.20004726946353912, + "loss_ib": 0.0070575992576777935, + "step": 2403 + }, + { + "ce_ib": 3.6397206783294678, + "ce_orig": 0.8633478879928589, + "epoch": 0.6913509238622475, + "kl_loss": 0.15559305250644684, + "loss_ib": 0.005195650737732649, + "step": 2404 + }, + { + "ce_ib": 3.3662946224212646, + "ce_orig": 0.4461177885532379, + "epoch": 0.6913509238622475, + "kl_loss": 0.2385517954826355, + "loss_ib": 0.005751812364906073, + "step": 2404 + }, + { + "ce_ib": 7.255328178405762, + "ce_orig": 1.0894469022750854, + "epoch": 0.6913509238622475, + "kl_loss": 0.15987083315849304, + "loss_ib": 0.008854036219418049, + "step": 2404 + }, + { + "ce_ib": 3.7056689262390137, + "ce_orig": 0.9965315461158752, + "epoch": 0.6913509238622475, + "kl_loss": 0.23595911264419556, + "loss_ib": 0.006065260153263807, + "step": 2404 + }, + { + "epoch": 0.6916385074412251, + "grad_norm": 0.1370088756084442, + "learning_rate": 8.988710144815214e-06, + "loss": 0.9389, + "step": 2405 + }, + { + "ce_ib": 5.232123374938965, + "ce_orig": 1.219592571258545, + "epoch": 0.6916385074412251, + "kl_loss": 0.14559973776340485, + "loss_ib": 0.006688120774924755, + "step": 2405 + }, + { + "ce_ib": 5.104818344116211, + "ce_orig": 1.0733293294906616, + "epoch": 0.6916385074412251, + "kl_loss": 0.26452091336250305, + "loss_ib": 0.007750026881694794, + "step": 2405 + }, + { + "ce_ib": 2.871487855911255, + "ce_orig": 0.71175217628479, + "epoch": 0.6916385074412251, + "kl_loss": 0.2141643464565277, + "loss_ib": 0.005013131536543369, + "step": 2405 + }, + { + "ce_ib": 4.25388765335083, + "ce_orig": 0.40299662947654724, + "epoch": 0.6916385074412251, + "kl_loss": 0.18725387752056122, + "loss_ib": 0.006126426160335541, + "step": 2405 + }, + { + "ce_ib": 2.7139675617218018, + "ce_orig": 0.802738606929779, + "epoch": 0.6919260910202027, + "kl_loss": 0.15843184292316437, + "loss_ib": 0.004298285581171513, + "step": 2406 + }, + { + "ce_ib": 6.768404960632324, + "ce_orig": 1.3235762119293213, + "epoch": 0.6919260910202027, + "kl_loss": 0.15620771050453186, + "loss_ib": 0.008330482058227062, + "step": 2406 + }, + { + "ce_ib": 5.864822864532471, + "ce_orig": 0.5528459548950195, + "epoch": 0.6919260910202027, + "kl_loss": 0.3489032983779907, + "loss_ib": 0.00935385562479496, + "step": 2406 + }, + { + "ce_ib": 4.952910423278809, + "ce_orig": 1.0999948978424072, + "epoch": 0.6919260910202027, + "kl_loss": 0.19057774543762207, + "loss_ib": 0.006858687847852707, + "step": 2406 + }, + { + "ce_ib": 3.072336196899414, + "ce_orig": 0.8540735840797424, + "epoch": 0.6922136745991804, + "kl_loss": 0.12695752084255219, + "loss_ib": 0.0043419115245342255, + "step": 2407 + }, + { + "ce_ib": 6.23612642288208, + "ce_orig": 0.8896905183792114, + "epoch": 0.6922136745991804, + "kl_loss": 0.6845148205757141, + "loss_ib": 0.01308127399533987, + "step": 2407 + }, + { + "ce_ib": 4.36691427230835, + "ce_orig": 0.8579941987991333, + "epoch": 0.6922136745991804, + "kl_loss": 0.2746030390262604, + "loss_ib": 0.00711294449865818, + "step": 2407 + }, + { + "ce_ib": 8.855356216430664, + "ce_orig": 1.3261338472366333, + "epoch": 0.6922136745991804, + "kl_loss": 0.20804375410079956, + "loss_ib": 0.01093579363077879, + "step": 2407 + }, + { + "ce_ib": 3.1044881343841553, + "ce_orig": 0.6264901757240295, + "epoch": 0.692501258178158, + "kl_loss": 0.1990627944469452, + "loss_ib": 0.00509511586278677, + "step": 2408 + }, + { + "ce_ib": 7.037113189697266, + "ce_orig": 1.7594568729400635, + "epoch": 0.692501258178158, + "kl_loss": 0.23146814107894897, + "loss_ib": 0.009351794607937336, + "step": 2408 + }, + { + "ce_ib": 5.538635730743408, + "ce_orig": 1.1727491617202759, + "epoch": 0.692501258178158, + "kl_loss": 0.13140957057476044, + "loss_ib": 0.006852731574326754, + "step": 2408 + }, + { + "ce_ib": 5.378310680389404, + "ce_orig": 0.9516169428825378, + "epoch": 0.692501258178158, + "kl_loss": 0.17713814973831177, + "loss_ib": 0.0071496921591460705, + "step": 2408 + }, + { + "ce_ib": 3.3371167182922363, + "ce_orig": 0.8855733275413513, + "epoch": 0.6927888417571356, + "kl_loss": 0.1612313687801361, + "loss_ib": 0.0049494304694235325, + "step": 2409 + }, + { + "ce_ib": 5.991858959197998, + "ce_orig": 1.1410064697265625, + "epoch": 0.6927888417571356, + "kl_loss": 0.33487892150878906, + "loss_ib": 0.009340647608041763, + "step": 2409 + }, + { + "ce_ib": 4.172990322113037, + "ce_orig": 0.626848042011261, + "epoch": 0.6927888417571356, + "kl_loss": 0.13972032070159912, + "loss_ib": 0.005570193286985159, + "step": 2409 + }, + { + "ce_ib": 5.867154121398926, + "ce_orig": 1.064906120300293, + "epoch": 0.6927888417571356, + "kl_loss": 0.3594074845314026, + "loss_ib": 0.009461228735744953, + "step": 2409 + }, + { + "epoch": 0.6930764253361134, + "grad_norm": 0.1410120576620102, + "learning_rate": 8.984025557079523e-06, + "loss": 0.8378, + "step": 2410 + }, + { + "ce_ib": 2.7871220111846924, + "ce_orig": 0.4288124740123749, + "epoch": 0.6930764253361134, + "kl_loss": 0.17272427678108215, + "loss_ib": 0.004514364991337061, + "step": 2410 + }, + { + "ce_ib": 5.802957057952881, + "ce_orig": 1.1900373697280884, + "epoch": 0.6930764253361134, + "kl_loss": 0.27152466773986816, + "loss_ib": 0.008518204092979431, + "step": 2410 + }, + { + "ce_ib": 4.63405179977417, + "ce_orig": 1.048531413078308, + "epoch": 0.6930764253361134, + "kl_loss": 0.24516570568084717, + "loss_ib": 0.007085708901286125, + "step": 2410 + }, + { + "ce_ib": 4.924686908721924, + "ce_orig": 0.9600716829299927, + "epoch": 0.6930764253361134, + "kl_loss": 0.24635016918182373, + "loss_ib": 0.007388188503682613, + "step": 2410 + }, + { + "ce_ib": 7.256275653839111, + "ce_orig": 0.9030719995498657, + "epoch": 0.693364008915091, + "kl_loss": 0.14725181460380554, + "loss_ib": 0.008728793822228909, + "step": 2411 + }, + { + "ce_ib": 4.90898323059082, + "ce_orig": 0.907230794429779, + "epoch": 0.693364008915091, + "kl_loss": 0.19522446393966675, + "loss_ib": 0.006861227564513683, + "step": 2411 + }, + { + "ce_ib": 6.2339277267456055, + "ce_orig": 0.695134699344635, + "epoch": 0.693364008915091, + "kl_loss": 0.22565706074237823, + "loss_ib": 0.008490498177707195, + "step": 2411 + }, + { + "ce_ib": 4.061104774475098, + "ce_orig": 0.7017101049423218, + "epoch": 0.693364008915091, + "kl_loss": 0.2405104786157608, + "loss_ib": 0.006466209888458252, + "step": 2411 + }, + { + "ce_ib": 4.928533554077148, + "ce_orig": 0.6650084257125854, + "epoch": 0.6936515924940686, + "kl_loss": 0.21970662474632263, + "loss_ib": 0.007125599775463343, + "step": 2412 + }, + { + "ce_ib": 5.871987342834473, + "ce_orig": 1.1183828115463257, + "epoch": 0.6936515924940686, + "kl_loss": 0.14693346619606018, + "loss_ib": 0.007341321557760239, + "step": 2412 + }, + { + "ce_ib": 5.479212284088135, + "ce_orig": 1.2118109464645386, + "epoch": 0.6936515924940686, + "kl_loss": 0.3389509320259094, + "loss_ib": 0.008868721313774586, + "step": 2412 + }, + { + "ce_ib": 6.014887809753418, + "ce_orig": 1.0132817029953003, + "epoch": 0.6936515924940686, + "kl_loss": 0.2870677709579468, + "loss_ib": 0.008885565213859081, + "step": 2412 + }, + { + "ce_ib": 5.067049503326416, + "ce_orig": 1.1913286447525024, + "epoch": 0.6939391760730462, + "kl_loss": 0.2144058346748352, + "loss_ib": 0.007211108226329088, + "step": 2413 + }, + { + "ce_ib": 5.860118389129639, + "ce_orig": 1.2377287149429321, + "epoch": 0.6939391760730462, + "kl_loss": 0.23114271461963654, + "loss_ib": 0.008171545341610909, + "step": 2413 + }, + { + "ce_ib": 1.405377745628357, + "ce_orig": 0.23858924210071564, + "epoch": 0.6939391760730462, + "kl_loss": 0.4559404253959656, + "loss_ib": 0.00596478208899498, + "step": 2413 + }, + { + "ce_ib": 4.598946571350098, + "ce_orig": 0.7946881055831909, + "epoch": 0.6939391760730462, + "kl_loss": 0.2312631458044052, + "loss_ib": 0.006911578122526407, + "step": 2413 + }, + { + "ce_ib": 4.264790058135986, + "ce_orig": 1.0555330514907837, + "epoch": 0.6942267596520239, + "kl_loss": 0.11380890011787415, + "loss_ib": 0.005402878858149052, + "step": 2414 + }, + { + "ce_ib": 2.5716073513031006, + "ce_orig": 0.4063844382762909, + "epoch": 0.6942267596520239, + "kl_loss": 0.2371552586555481, + "loss_ib": 0.0049431598745286465, + "step": 2414 + }, + { + "ce_ib": 3.989999532699585, + "ce_orig": 0.6045850515365601, + "epoch": 0.6942267596520239, + "kl_loss": 0.2643967270851135, + "loss_ib": 0.006633967161178589, + "step": 2414 + }, + { + "ce_ib": 4.335952281951904, + "ce_orig": 0.7788189053535461, + "epoch": 0.6942267596520239, + "kl_loss": 0.2065017968416214, + "loss_ib": 0.00640097027644515, + "step": 2414 + }, + { + "epoch": 0.6945143432310015, + "grad_norm": 0.14285415410995483, + "learning_rate": 8.979331370901555e-06, + "loss": 0.9576, + "step": 2415 + }, + { + "ce_ib": 2.6946606636047363, + "ce_orig": 0.44798439741134644, + "epoch": 0.6945143432310015, + "kl_loss": 0.20365490019321442, + "loss_ib": 0.004731209948658943, + "step": 2415 + }, + { + "ce_ib": 1.5912957191467285, + "ce_orig": 0.4382992088794708, + "epoch": 0.6945143432310015, + "kl_loss": 0.16860251128673553, + "loss_ib": 0.0032773208804428577, + "step": 2415 + }, + { + "ce_ib": 3.5469701290130615, + "ce_orig": 0.5418413281440735, + "epoch": 0.6945143432310015, + "kl_loss": 0.2513400614261627, + "loss_ib": 0.006060370709747076, + "step": 2415 + }, + { + "ce_ib": 3.3219313621520996, + "ce_orig": 0.5207368731498718, + "epoch": 0.6945143432310015, + "kl_loss": 0.157955601811409, + "loss_ib": 0.004901486914604902, + "step": 2415 + }, + { + "ce_ib": 6.159844398498535, + "ce_orig": 1.1970372200012207, + "epoch": 0.6948019268099791, + "kl_loss": 0.20213840901851654, + "loss_ib": 0.008181228302419186, + "step": 2416 + }, + { + "ce_ib": 2.9355900287628174, + "ce_orig": 0.5587672591209412, + "epoch": 0.6948019268099791, + "kl_loss": 0.1779528260231018, + "loss_ib": 0.00471511809155345, + "step": 2416 + }, + { + "ce_ib": 4.310654640197754, + "ce_orig": 0.8464197516441345, + "epoch": 0.6948019268099791, + "kl_loss": 0.1896919310092926, + "loss_ib": 0.006207573693245649, + "step": 2416 + }, + { + "ce_ib": 4.852100849151611, + "ce_orig": 0.9800527691841125, + "epoch": 0.6948019268099791, + "kl_loss": 0.1618199348449707, + "loss_ib": 0.006470299791544676, + "step": 2416 + }, + { + "ce_ib": 2.940601348876953, + "ce_orig": 0.23113757371902466, + "epoch": 0.6950895103889568, + "kl_loss": 0.13246305286884308, + "loss_ib": 0.004265232011675835, + "step": 2417 + }, + { + "ce_ib": 4.257994651794434, + "ce_orig": 0.8326098322868347, + "epoch": 0.6950895103889568, + "kl_loss": 0.23064452409744263, + "loss_ib": 0.00656443927437067, + "step": 2417 + }, + { + "ce_ib": 1.3948147296905518, + "ce_orig": 0.5103235840797424, + "epoch": 0.6950895103889568, + "kl_loss": 0.15107592940330505, + "loss_ib": 0.002905573695898056, + "step": 2417 + }, + { + "ce_ib": 4.410287380218506, + "ce_orig": 0.5701937079429626, + "epoch": 0.6950895103889568, + "kl_loss": 0.1590811312198639, + "loss_ib": 0.006001098547130823, + "step": 2417 + }, + { + "ce_ib": 5.925664901733398, + "ce_orig": 1.1722787618637085, + "epoch": 0.6953770939679345, + "kl_loss": 0.23889777064323425, + "loss_ib": 0.008314643055200577, + "step": 2418 + }, + { + "ce_ib": 2.961379289627075, + "ce_orig": 0.6965137124061584, + "epoch": 0.6953770939679345, + "kl_loss": 0.10923226177692413, + "loss_ib": 0.004053701646625996, + "step": 2418 + }, + { + "ce_ib": 6.739655494689941, + "ce_orig": 1.4294700622558594, + "epoch": 0.6953770939679345, + "kl_loss": 0.27310776710510254, + "loss_ib": 0.009470732882618904, + "step": 2418 + }, + { + "ce_ib": 3.9184179306030273, + "ce_orig": 1.122968316078186, + "epoch": 0.6953770939679345, + "kl_loss": 0.17318294942378998, + "loss_ib": 0.005650246981531382, + "step": 2418 + }, + { + "ce_ib": 9.242107391357422, + "ce_orig": 1.7458148002624512, + "epoch": 0.6956646775469121, + "kl_loss": 0.21381334960460663, + "loss_ib": 0.011380240321159363, + "step": 2419 + }, + { + "ce_ib": 4.509101390838623, + "ce_orig": 0.7678815126419067, + "epoch": 0.6956646775469121, + "kl_loss": 0.2221733033657074, + "loss_ib": 0.006730834022164345, + "step": 2419 + }, + { + "ce_ib": 3.558361530303955, + "ce_orig": 0.73585045337677, + "epoch": 0.6956646775469121, + "kl_loss": 0.25031572580337524, + "loss_ib": 0.00606151856482029, + "step": 2419 + }, + { + "ce_ib": 4.18864631652832, + "ce_orig": 0.5730027556419373, + "epoch": 0.6956646775469121, + "kl_loss": 0.2419864982366562, + "loss_ib": 0.006608510855585337, + "step": 2419 + }, + { + "epoch": 0.6959522611258897, + "grad_norm": 0.1219499334692955, + "learning_rate": 8.974627597590693e-06, + "loss": 0.8613, + "step": 2420 + }, + { + "ce_ib": 5.737261772155762, + "ce_orig": 0.9809511303901672, + "epoch": 0.6959522611258897, + "kl_loss": 0.195301353931427, + "loss_ib": 0.007690275087952614, + "step": 2420 + }, + { + "ce_ib": 6.33241081237793, + "ce_orig": 1.2405515909194946, + "epoch": 0.6959522611258897, + "kl_loss": 0.1861291527748108, + "loss_ib": 0.008193702436983585, + "step": 2420 + }, + { + "ce_ib": 3.763944149017334, + "ce_orig": 0.4507366120815277, + "epoch": 0.6959522611258897, + "kl_loss": 0.18752402067184448, + "loss_ib": 0.005639184731990099, + "step": 2420 + }, + { + "ce_ib": 3.305497169494629, + "ce_orig": 0.5916218757629395, + "epoch": 0.6959522611258897, + "kl_loss": 0.1937239021062851, + "loss_ib": 0.005242736078798771, + "step": 2420 + }, + { + "ce_ib": 5.556766510009766, + "ce_orig": 1.1131832599639893, + "epoch": 0.6962398447048673, + "kl_loss": 0.1904936283826828, + "loss_ib": 0.007461702451109886, + "step": 2421 + }, + { + "ce_ib": 2.638434886932373, + "ce_orig": 0.6469461917877197, + "epoch": 0.6962398447048673, + "kl_loss": 0.15896832942962646, + "loss_ib": 0.0042281183414161205, + "step": 2421 + }, + { + "ce_ib": 6.0541605949401855, + "ce_orig": 1.5262277126312256, + "epoch": 0.6962398447048673, + "kl_loss": 0.21541330218315125, + "loss_ib": 0.008208293467760086, + "step": 2421 + }, + { + "ce_ib": 2.6946842670440674, + "ce_orig": 0.5890316367149353, + "epoch": 0.6962398447048673, + "kl_loss": 0.18139027059078217, + "loss_ib": 0.0045085870660841465, + "step": 2421 + }, + { + "ce_ib": 4.4677581787109375, + "ce_orig": 0.7474468946456909, + "epoch": 0.696527428283845, + "kl_loss": 0.23473142087459564, + "loss_ib": 0.006815072149038315, + "step": 2422 + }, + { + "ce_ib": 5.2137579917907715, + "ce_orig": 0.8165543675422668, + "epoch": 0.696527428283845, + "kl_loss": 0.2467758059501648, + "loss_ib": 0.0076815164647996426, + "step": 2422 + }, + { + "ce_ib": 5.4788737297058105, + "ce_orig": 1.0695494413375854, + "epoch": 0.696527428283845, + "kl_loss": 0.29588818550109863, + "loss_ib": 0.008437755517661572, + "step": 2422 + }, + { + "ce_ib": 3.4874091148376465, + "ce_orig": 0.7008340954780579, + "epoch": 0.696527428283845, + "kl_loss": 0.1763230264186859, + "loss_ib": 0.005250639282166958, + "step": 2422 + }, + { + "ce_ib": 3.8563852310180664, + "ce_orig": 0.6202989220619202, + "epoch": 0.6968150118628227, + "kl_loss": 0.30527645349502563, + "loss_ib": 0.006909149698913097, + "step": 2423 + }, + { + "ce_ib": 4.736316204071045, + "ce_orig": 0.9403350949287415, + "epoch": 0.6968150118628227, + "kl_loss": 0.2824068069458008, + "loss_ib": 0.00756038399413228, + "step": 2423 + }, + { + "ce_ib": 2.9245920181274414, + "ce_orig": 0.6894713640213013, + "epoch": 0.6968150118628227, + "kl_loss": 0.14423459768295288, + "loss_ib": 0.004366938024759293, + "step": 2423 + }, + { + "ce_ib": 3.506089210510254, + "ce_orig": 0.5349201560020447, + "epoch": 0.6968150118628227, + "kl_loss": 0.22165276110172272, + "loss_ib": 0.005722616799175739, + "step": 2423 + }, + { + "ce_ib": 3.481236219406128, + "ce_orig": 0.8721638917922974, + "epoch": 0.6971025954418003, + "kl_loss": 0.12151001393795013, + "loss_ib": 0.004696336109191179, + "step": 2424 + }, + { + "ce_ib": 2.894277811050415, + "ce_orig": 0.756441593170166, + "epoch": 0.6971025954418003, + "kl_loss": 0.1852130889892578, + "loss_ib": 0.0047464086674153805, + "step": 2424 + }, + { + "ce_ib": 3.8221399784088135, + "ce_orig": 0.6097682118415833, + "epoch": 0.6971025954418003, + "kl_loss": 0.21840128302574158, + "loss_ib": 0.006006152369081974, + "step": 2424 + }, + { + "ce_ib": 4.079960823059082, + "ce_orig": 0.8855893015861511, + "epoch": 0.6971025954418003, + "kl_loss": 0.15344709157943726, + "loss_ib": 0.005614432040601969, + "step": 2424 + }, + { + "epoch": 0.6973901790207779, + "grad_norm": 0.13722670078277588, + "learning_rate": 8.969914248479414e-06, + "loss": 0.8147, + "step": 2425 + }, + { + "ce_ib": 6.801426410675049, + "ce_orig": 0.8152185678482056, + "epoch": 0.6973901790207779, + "kl_loss": 0.21233677864074707, + "loss_ib": 0.008924794383347034, + "step": 2425 + }, + { + "ce_ib": 5.920100212097168, + "ce_orig": 1.341076374053955, + "epoch": 0.6973901790207779, + "kl_loss": 0.1854715794324875, + "loss_ib": 0.007774815894663334, + "step": 2425 + }, + { + "ce_ib": 6.361289024353027, + "ce_orig": 1.6550935506820679, + "epoch": 0.6973901790207779, + "kl_loss": 0.16866163909435272, + "loss_ib": 0.008047905750572681, + "step": 2425 + }, + { + "ce_ib": 6.724674701690674, + "ce_orig": 1.1297317743301392, + "epoch": 0.6973901790207779, + "kl_loss": 0.21016354858875275, + "loss_ib": 0.008826309815049171, + "step": 2425 + }, + { + "ce_ib": 4.373428821563721, + "ce_orig": 0.7542706727981567, + "epoch": 0.6976777625997556, + "kl_loss": 0.25670379400253296, + "loss_ib": 0.006940466817468405, + "step": 2426 + }, + { + "ce_ib": 3.6487412452697754, + "ce_orig": 0.8987184166908264, + "epoch": 0.6976777625997556, + "kl_loss": 0.21151773631572723, + "loss_ib": 0.005763918627053499, + "step": 2426 + }, + { + "ce_ib": 3.5242438316345215, + "ce_orig": 0.9049651026725769, + "epoch": 0.6976777625997556, + "kl_loss": 0.16397777199745178, + "loss_ib": 0.005164021160453558, + "step": 2426 + }, + { + "ce_ib": 3.6560511589050293, + "ce_orig": 0.9564192891120911, + "epoch": 0.6976777625997556, + "kl_loss": 0.1525060087442398, + "loss_ib": 0.005181110929697752, + "step": 2426 + }, + { + "ce_ib": 6.666114330291748, + "ce_orig": 0.843747615814209, + "epoch": 0.6979653461787332, + "kl_loss": 0.15217703580856323, + "loss_ib": 0.008187884464859962, + "step": 2427 + }, + { + "ce_ib": 5.129820823669434, + "ce_orig": 0.6228529810905457, + "epoch": 0.6979653461787332, + "kl_loss": 0.2989824712276459, + "loss_ib": 0.008119644597172737, + "step": 2427 + }, + { + "ce_ib": 2.1597063541412354, + "ce_orig": 0.32099682092666626, + "epoch": 0.6979653461787332, + "kl_loss": 0.15401709079742432, + "loss_ib": 0.003699877066537738, + "step": 2427 + }, + { + "ce_ib": 3.223099708557129, + "ce_orig": 1.028043508529663, + "epoch": 0.6979653461787332, + "kl_loss": 0.11145815253257751, + "loss_ib": 0.004337680991739035, + "step": 2427 + }, + { + "ce_ib": 2.7599575519561768, + "ce_orig": 0.6509795784950256, + "epoch": 0.6982529297577108, + "kl_loss": 0.20751430094242096, + "loss_ib": 0.004835100844502449, + "step": 2428 + }, + { + "ce_ib": 3.688498020172119, + "ce_orig": 0.5895922183990479, + "epoch": 0.6982529297577108, + "kl_loss": 0.16587762534618378, + "loss_ib": 0.0053472742438316345, + "step": 2428 + }, + { + "ce_ib": 6.869842052459717, + "ce_orig": 1.1545127630233765, + "epoch": 0.6982529297577108, + "kl_loss": 0.2183350920677185, + "loss_ib": 0.009053193032741547, + "step": 2428 + }, + { + "ce_ib": 7.695930004119873, + "ce_orig": 1.737312912940979, + "epoch": 0.6982529297577108, + "kl_loss": 0.23506046831607819, + "loss_ib": 0.010046534240245819, + "step": 2428 + }, + { + "ce_ib": 3.5399765968322754, + "ce_orig": 0.46882471442222595, + "epoch": 0.6985405133366884, + "kl_loss": 0.2623475193977356, + "loss_ib": 0.006163451354950666, + "step": 2429 + }, + { + "ce_ib": 6.233175277709961, + "ce_orig": 1.0568830966949463, + "epoch": 0.6985405133366884, + "kl_loss": 0.17596399784088135, + "loss_ib": 0.007992815226316452, + "step": 2429 + }, + { + "ce_ib": 3.359575033187866, + "ce_orig": 0.4224954843521118, + "epoch": 0.6985405133366884, + "kl_loss": 0.2936793267726898, + "loss_ib": 0.006296368315815926, + "step": 2429 + }, + { + "ce_ib": 2.273890256881714, + "ce_orig": 0.5147646069526672, + "epoch": 0.6985405133366884, + "kl_loss": 0.1383156031370163, + "loss_ib": 0.003657046239823103, + "step": 2429 + }, + { + "epoch": 0.6988280969156662, + "grad_norm": 0.12554365396499634, + "learning_rate": 8.965191334923277e-06, + "loss": 0.847, + "step": 2430 + }, + { + "ce_ib": 2.2322325706481934, + "ce_orig": 0.6423028707504272, + "epoch": 0.6988280969156662, + "kl_loss": 0.12338902801275253, + "loss_ib": 0.0034661227837204933, + "step": 2430 + }, + { + "ce_ib": 4.520761489868164, + "ce_orig": 0.8139554262161255, + "epoch": 0.6988280969156662, + "kl_loss": 0.1993054747581482, + "loss_ib": 0.006513815838843584, + "step": 2430 + }, + { + "ce_ib": 3.3718934059143066, + "ce_orig": 0.7306619882583618, + "epoch": 0.6988280969156662, + "kl_loss": 0.2223799228668213, + "loss_ib": 0.005595692433416843, + "step": 2430 + }, + { + "ce_ib": 2.8443775177001953, + "ce_orig": 0.6240906119346619, + "epoch": 0.6988280969156662, + "kl_loss": 0.08528804779052734, + "loss_ib": 0.0036972579546272755, + "step": 2430 + }, + { + "ce_ib": 6.8722124099731445, + "ce_orig": 1.5818616151809692, + "epoch": 0.6991156804946438, + "kl_loss": 0.1691589057445526, + "loss_ib": 0.00856380071491003, + "step": 2431 + }, + { + "ce_ib": 4.189690589904785, + "ce_orig": 0.7093541026115417, + "epoch": 0.6991156804946438, + "kl_loss": 0.18430349230766296, + "loss_ib": 0.00603272533044219, + "step": 2431 + }, + { + "ce_ib": 7.095147609710693, + "ce_orig": 1.360703945159912, + "epoch": 0.6991156804946438, + "kl_loss": 0.14179909229278564, + "loss_ib": 0.008513138629496098, + "step": 2431 + }, + { + "ce_ib": 3.959176778793335, + "ce_orig": 0.8309829235076904, + "epoch": 0.6991156804946438, + "kl_loss": 0.18603643774986267, + "loss_ib": 0.005819540936499834, + "step": 2431 + }, + { + "ce_ib": 7.096500873565674, + "ce_orig": 1.5574572086334229, + "epoch": 0.6994032640736214, + "kl_loss": 0.1808396577835083, + "loss_ib": 0.00890489760786295, + "step": 2432 + }, + { + "ce_ib": 3.507507085800171, + "ce_orig": 0.5320173501968384, + "epoch": 0.6994032640736214, + "kl_loss": 0.24915461242198944, + "loss_ib": 0.00599905289709568, + "step": 2432 + }, + { + "ce_ib": 8.442329406738281, + "ce_orig": 1.6803004741668701, + "epoch": 0.6994032640736214, + "kl_loss": 0.2249784767627716, + "loss_ib": 0.010692114941775799, + "step": 2432 + }, + { + "ce_ib": 4.159822463989258, + "ce_orig": 0.5757670402526855, + "epoch": 0.6994032640736214, + "kl_loss": 0.2591209411621094, + "loss_ib": 0.006751031614840031, + "step": 2432 + }, + { + "ce_ib": 4.8770833015441895, + "ce_orig": 0.3673838973045349, + "epoch": 0.699690847652599, + "kl_loss": 0.3417678773403168, + "loss_ib": 0.00829476211220026, + "step": 2433 + }, + { + "ce_ib": 3.166240930557251, + "ce_orig": 0.6240102648735046, + "epoch": 0.699690847652599, + "kl_loss": 0.19871404767036438, + "loss_ib": 0.005153381731361151, + "step": 2433 + }, + { + "ce_ib": 4.534908771514893, + "ce_orig": 0.9828783273696899, + "epoch": 0.699690847652599, + "kl_loss": 0.16495586931705475, + "loss_ib": 0.006184467580169439, + "step": 2433 + }, + { + "ce_ib": 5.364969730377197, + "ce_orig": 1.0833231210708618, + "epoch": 0.699690847652599, + "kl_loss": 0.19026780128479004, + "loss_ib": 0.007267647888511419, + "step": 2433 + }, + { + "ce_ib": 4.501548767089844, + "ce_orig": 0.7575254440307617, + "epoch": 0.6999784312315767, + "kl_loss": 0.2524551749229431, + "loss_ib": 0.007026100065559149, + "step": 2434 + }, + { + "ce_ib": 6.143152236938477, + "ce_orig": 1.291489601135254, + "epoch": 0.6999784312315767, + "kl_loss": 0.22557900846004486, + "loss_ib": 0.008398941718041897, + "step": 2434 + }, + { + "ce_ib": 7.974960803985596, + "ce_orig": 1.263658881187439, + "epoch": 0.6999784312315767, + "kl_loss": 0.23335802555084229, + "loss_ib": 0.010308540426194668, + "step": 2434 + }, + { + "ce_ib": 6.011842727661133, + "ce_orig": 1.006027102470398, + "epoch": 0.6999784312315767, + "kl_loss": 0.185214102268219, + "loss_ib": 0.007863983511924744, + "step": 2434 + }, + { + "epoch": 0.7002660148105543, + "grad_norm": 0.13026727735996246, + "learning_rate": 8.960458868300872e-06, + "loss": 0.865, + "step": 2435 + }, + { + "ce_ib": 6.487788200378418, + "ce_orig": 1.0859819650650024, + "epoch": 0.7002660148105543, + "kl_loss": 0.2326304316520691, + "loss_ib": 0.008814092725515366, + "step": 2435 + }, + { + "ce_ib": 7.35122013092041, + "ce_orig": 1.547262191772461, + "epoch": 0.7002660148105543, + "kl_loss": 0.21615475416183472, + "loss_ib": 0.009512767195701599, + "step": 2435 + }, + { + "ce_ib": 5.429762840270996, + "ce_orig": 0.9504778981208801, + "epoch": 0.7002660148105543, + "kl_loss": 0.261913537979126, + "loss_ib": 0.008048898540437222, + "step": 2435 + }, + { + "ce_ib": 3.8734874725341797, + "ce_orig": 0.6953659057617188, + "epoch": 0.7002660148105543, + "kl_loss": 0.2307904213666916, + "loss_ib": 0.006181391421705484, + "step": 2435 + }, + { + "ce_ib": 3.261629581451416, + "ce_orig": 0.7624748349189758, + "epoch": 0.7005535983895319, + "kl_loss": 0.23138350248336792, + "loss_ib": 0.00557546503841877, + "step": 2436 + }, + { + "ce_ib": 6.821616172790527, + "ce_orig": 1.5520045757293701, + "epoch": 0.7005535983895319, + "kl_loss": 0.22766916453838348, + "loss_ib": 0.00909830816090107, + "step": 2436 + }, + { + "ce_ib": 1.6194193363189697, + "ce_orig": 0.5069912075996399, + "epoch": 0.7005535983895319, + "kl_loss": 0.10839072614908218, + "loss_ib": 0.0027033265214413404, + "step": 2436 + }, + { + "ce_ib": 5.037510395050049, + "ce_orig": 0.8599326014518738, + "epoch": 0.7005535983895319, + "kl_loss": 0.19257068634033203, + "loss_ib": 0.006963217165321112, + "step": 2436 + }, + { + "ce_ib": 8.086687088012695, + "ce_orig": 1.5508853197097778, + "epoch": 0.7008411819685096, + "kl_loss": 0.1327190399169922, + "loss_ib": 0.009413877502083778, + "step": 2437 + }, + { + "ce_ib": 3.309077262878418, + "ce_orig": 0.7658331990242004, + "epoch": 0.7008411819685096, + "kl_loss": 0.1830010861158371, + "loss_ib": 0.005139087792485952, + "step": 2437 + }, + { + "ce_ib": 2.6921231746673584, + "ce_orig": 0.489041268825531, + "epoch": 0.7008411819685096, + "kl_loss": 0.3568006157875061, + "loss_ib": 0.006260129157453775, + "step": 2437 + }, + { + "ce_ib": 3.9836058616638184, + "ce_orig": 0.8987211585044861, + "epoch": 0.7008411819685096, + "kl_loss": 0.14789436757564545, + "loss_ib": 0.005462549161165953, + "step": 2437 + }, + { + "ce_ib": 2.970796823501587, + "ce_orig": 0.5548107624053955, + "epoch": 0.7011287655474873, + "kl_loss": 0.23277616500854492, + "loss_ib": 0.005298558156937361, + "step": 2438 + }, + { + "ce_ib": 5.185052394866943, + "ce_orig": 0.8123276233673096, + "epoch": 0.7011287655474873, + "kl_loss": 0.1533677726984024, + "loss_ib": 0.006718730088323355, + "step": 2438 + }, + { + "ce_ib": 8.754329681396484, + "ce_orig": 1.415686011314392, + "epoch": 0.7011287655474873, + "kl_loss": 0.17952938377857208, + "loss_ib": 0.010549623519182205, + "step": 2438 + }, + { + "ce_ib": 3.446535348892212, + "ce_orig": 0.7566198110580444, + "epoch": 0.7011287655474873, + "kl_loss": 0.20639178156852722, + "loss_ib": 0.0055104526691138744, + "step": 2438 + }, + { + "ce_ib": 2.759075403213501, + "ce_orig": 0.5070812106132507, + "epoch": 0.7014163491264649, + "kl_loss": 0.17929387092590332, + "loss_ib": 0.004552014172077179, + "step": 2439 + }, + { + "ce_ib": 4.85590934753418, + "ce_orig": 0.5642621517181396, + "epoch": 0.7014163491264649, + "kl_loss": 0.20524805784225464, + "loss_ib": 0.006908389739692211, + "step": 2439 + }, + { + "ce_ib": 6.354367256164551, + "ce_orig": 1.0917991399765015, + "epoch": 0.7014163491264649, + "kl_loss": 0.2128371298313141, + "loss_ib": 0.008482738398015499, + "step": 2439 + }, + { + "ce_ib": 2.9412741661071777, + "ce_orig": 0.7179765105247498, + "epoch": 0.7014163491264649, + "kl_loss": 0.13620570302009583, + "loss_ib": 0.004303331486880779, + "step": 2439 + }, + { + "epoch": 0.7017039327054425, + "grad_norm": 0.12951286137104034, + "learning_rate": 8.955716860013812e-06, + "loss": 0.8775, + "step": 2440 + }, + { + "ce_ib": 4.248921871185303, + "ce_orig": 0.6975815296173096, + "epoch": 0.7017039327054425, + "kl_loss": 0.2829996347427368, + "loss_ib": 0.007078918162733316, + "step": 2440 + }, + { + "ce_ib": 4.42530632019043, + "ce_orig": 1.0175950527191162, + "epoch": 0.7017039327054425, + "kl_loss": 0.1522035300731659, + "loss_ib": 0.005947341676801443, + "step": 2440 + }, + { + "ce_ib": 5.316950798034668, + "ce_orig": 1.1289992332458496, + "epoch": 0.7017039327054425, + "kl_loss": 0.19074948132038116, + "loss_ib": 0.007224445231258869, + "step": 2440 + }, + { + "ce_ib": 4.589580535888672, + "ce_orig": 0.5317543745040894, + "epoch": 0.7017039327054425, + "kl_loss": 0.20460361242294312, + "loss_ib": 0.00663561699911952, + "step": 2440 + }, + { + "ce_ib": 5.746917724609375, + "ce_orig": 0.9187743663787842, + "epoch": 0.7019915162844201, + "kl_loss": 0.3030744194984436, + "loss_ib": 0.008777662180364132, + "step": 2441 + }, + { + "ce_ib": 7.0934529304504395, + "ce_orig": 0.8852782249450684, + "epoch": 0.7019915162844201, + "kl_loss": 0.14544662833213806, + "loss_ib": 0.008547918871045113, + "step": 2441 + }, + { + "ce_ib": 1.683266282081604, + "ce_orig": 0.5077006816864014, + "epoch": 0.7019915162844201, + "kl_loss": 0.12856154143810272, + "loss_ib": 0.002968881744891405, + "step": 2441 + }, + { + "ce_ib": 3.3954010009765625, + "ce_orig": 0.6229950189590454, + "epoch": 0.7019915162844201, + "kl_loss": 0.25116997957229614, + "loss_ib": 0.005907100159674883, + "step": 2441 + }, + { + "ce_ib": 4.702591896057129, + "ce_orig": 1.0615803003311157, + "epoch": 0.7022790998633978, + "kl_loss": 0.22615429759025574, + "loss_ib": 0.006964134983718395, + "step": 2442 + }, + { + "ce_ib": 8.696467399597168, + "ce_orig": 1.4355907440185547, + "epoch": 0.7022790998633978, + "kl_loss": 0.4286218285560608, + "loss_ib": 0.01298268511891365, + "step": 2442 + }, + { + "ce_ib": 4.668061256408691, + "ce_orig": 0.7070122957229614, + "epoch": 0.7022790998633978, + "kl_loss": 0.2771902084350586, + "loss_ib": 0.007439963519573212, + "step": 2442 + }, + { + "ce_ib": 4.348588466644287, + "ce_orig": 0.9242904186248779, + "epoch": 0.7022790998633978, + "kl_loss": 0.28774628043174744, + "loss_ib": 0.007226050831377506, + "step": 2442 + }, + { + "ce_ib": 4.298355579376221, + "ce_orig": 1.0071570873260498, + "epoch": 0.7025666834423755, + "kl_loss": 0.14224860072135925, + "loss_ib": 0.005720841698348522, + "step": 2443 + }, + { + "ce_ib": 5.874499797821045, + "ce_orig": 0.7610557675361633, + "epoch": 0.7025666834423755, + "kl_loss": 0.1738225817680359, + "loss_ib": 0.007612725254148245, + "step": 2443 + }, + { + "ce_ib": 8.471444129943848, + "ce_orig": 0.9607338905334473, + "epoch": 0.7025666834423755, + "kl_loss": 0.2798956036567688, + "loss_ib": 0.011270400136709213, + "step": 2443 + }, + { + "ce_ib": 2.642880916595459, + "ce_orig": 0.6445857286453247, + "epoch": 0.7025666834423755, + "kl_loss": 0.10346847027540207, + "loss_ib": 0.0036775656044483185, + "step": 2443 + }, + { + "ce_ib": 2.6366117000579834, + "ce_orig": 0.5772340297698975, + "epoch": 0.7028542670213531, + "kl_loss": 0.2849457859992981, + "loss_ib": 0.005486069712787867, + "step": 2444 + }, + { + "ce_ib": 7.770017147064209, + "ce_orig": 1.2013649940490723, + "epoch": 0.7028542670213531, + "kl_loss": 0.20461878180503845, + "loss_ib": 0.009816205129027367, + "step": 2444 + }, + { + "ce_ib": 7.250518321990967, + "ce_orig": 1.1754608154296875, + "epoch": 0.7028542670213531, + "kl_loss": 0.1670418381690979, + "loss_ib": 0.008920936845242977, + "step": 2444 + }, + { + "ce_ib": 6.096081256866455, + "ce_orig": 0.8619139194488525, + "epoch": 0.7028542670213531, + "kl_loss": 0.1868334710597992, + "loss_ib": 0.007964415475726128, + "step": 2444 + }, + { + "epoch": 0.7031418506003307, + "grad_norm": 0.12006016075611115, + "learning_rate": 8.950965321486694e-06, + "loss": 0.861, + "step": 2445 + }, + { + "ce_ib": 5.779016494750977, + "ce_orig": 1.2267673015594482, + "epoch": 0.7031418506003307, + "kl_loss": 0.1899043619632721, + "loss_ib": 0.0076780603267252445, + "step": 2445 + }, + { + "ce_ib": 4.180911064147949, + "ce_orig": 0.7706923484802246, + "epoch": 0.7031418506003307, + "kl_loss": 0.192090705037117, + "loss_ib": 0.006101817823946476, + "step": 2445 + }, + { + "ce_ib": 1.447769045829773, + "ce_orig": 0.28345268964767456, + "epoch": 0.7031418506003307, + "kl_loss": 0.40772777795791626, + "loss_ib": 0.005525046493858099, + "step": 2445 + }, + { + "ce_ib": 2.655752658843994, + "ce_orig": 0.5226135849952698, + "epoch": 0.7031418506003307, + "kl_loss": 0.1641179919242859, + "loss_ib": 0.004296932369470596, + "step": 2445 + }, + { + "ce_ib": 4.993356227874756, + "ce_orig": 0.7461522817611694, + "epoch": 0.7034294341793084, + "kl_loss": 0.42973268032073975, + "loss_ib": 0.009290683083236217, + "step": 2446 + }, + { + "ce_ib": 4.877987861633301, + "ce_orig": 0.8758281469345093, + "epoch": 0.7034294341793084, + "kl_loss": 0.19880720973014832, + "loss_ib": 0.006866059731692076, + "step": 2446 + }, + { + "ce_ib": 3.8634555339813232, + "ce_orig": 0.5192645788192749, + "epoch": 0.7034294341793084, + "kl_loss": 0.1645628809928894, + "loss_ib": 0.005509084556251764, + "step": 2446 + }, + { + "ce_ib": 4.9088873863220215, + "ce_orig": 0.8568646311759949, + "epoch": 0.7034294341793084, + "kl_loss": 0.18079669773578644, + "loss_ib": 0.006716854404658079, + "step": 2446 + }, + { + "ce_ib": 4.960153579711914, + "ce_orig": 0.613753080368042, + "epoch": 0.703717017758286, + "kl_loss": 0.2048960030078888, + "loss_ib": 0.007009113673120737, + "step": 2447 + }, + { + "ce_ib": 3.7501981258392334, + "ce_orig": 1.1124041080474854, + "epoch": 0.703717017758286, + "kl_loss": 0.17805469036102295, + "loss_ib": 0.005530744791030884, + "step": 2447 + }, + { + "ce_ib": 3.7275726795196533, + "ce_orig": 0.9652771949768066, + "epoch": 0.703717017758286, + "kl_loss": 0.18629790842533112, + "loss_ib": 0.005590551998466253, + "step": 2447 + }, + { + "ce_ib": 3.3489480018615723, + "ce_orig": 0.7537181973457336, + "epoch": 0.703717017758286, + "kl_loss": 0.137593075633049, + "loss_ib": 0.00472487835213542, + "step": 2447 + }, + { + "ce_ib": 3.8026604652404785, + "ce_orig": 0.7021476030349731, + "epoch": 0.7040046013372636, + "kl_loss": 0.23082387447357178, + "loss_ib": 0.0061108991503715515, + "step": 2448 + }, + { + "ce_ib": 5.698450565338135, + "ce_orig": 0.7742329239845276, + "epoch": 0.7040046013372636, + "kl_loss": 0.1562822461128235, + "loss_ib": 0.007261272985488176, + "step": 2448 + }, + { + "ce_ib": 4.645622730255127, + "ce_orig": 1.0012844800949097, + "epoch": 0.7040046013372636, + "kl_loss": 0.16790464520454407, + "loss_ib": 0.0063246688805520535, + "step": 2448 + }, + { + "ce_ib": 3.908360004425049, + "ce_orig": 0.9341500401496887, + "epoch": 0.7040046013372636, + "kl_loss": 0.19129341840744019, + "loss_ib": 0.005821294151246548, + "step": 2448 + }, + { + "ce_ib": 4.507713794708252, + "ce_orig": 0.9582691788673401, + "epoch": 0.7042921849162412, + "kl_loss": 0.12079484760761261, + "loss_ib": 0.005715662147849798, + "step": 2449 + }, + { + "ce_ib": 4.245687007904053, + "ce_orig": 0.8140878081321716, + "epoch": 0.7042921849162412, + "kl_loss": 0.2505837678909302, + "loss_ib": 0.0067515247501432896, + "step": 2449 + }, + { + "ce_ib": 4.761693000793457, + "ce_orig": 1.063140869140625, + "epoch": 0.7042921849162412, + "kl_loss": 0.2723318040370941, + "loss_ib": 0.007485011126846075, + "step": 2449 + }, + { + "ce_ib": 4.74562931060791, + "ce_orig": 0.9865927696228027, + "epoch": 0.7042921849162412, + "kl_loss": 0.2027050405740738, + "loss_ib": 0.00677267974242568, + "step": 2449 + }, + { + "epoch": 0.704579768495219, + "grad_norm": 0.1343173384666443, + "learning_rate": 8.946204264167077e-06, + "loss": 0.8531, + "step": 2450 + }, + { + "ce_ib": 1.718927264213562, + "ce_orig": 0.38326868414878845, + "epoch": 0.704579768495219, + "kl_loss": 0.11992516368627548, + "loss_ib": 0.002918178914114833, + "step": 2450 + }, + { + "ce_ib": 8.862274169921875, + "ce_orig": 1.970462441444397, + "epoch": 0.704579768495219, + "kl_loss": 0.20316967368125916, + "loss_ib": 0.010893970727920532, + "step": 2450 + }, + { + "ce_ib": 1.9595216512680054, + "ce_orig": 0.48047906160354614, + "epoch": 0.704579768495219, + "kl_loss": 0.14251524209976196, + "loss_ib": 0.0033846742007881403, + "step": 2450 + }, + { + "ce_ib": 3.69404673576355, + "ce_orig": 0.7261792421340942, + "epoch": 0.704579768495219, + "kl_loss": 0.19010820984840393, + "loss_ib": 0.005595128517597914, + "step": 2450 + }, + { + "ce_ib": 4.755764007568359, + "ce_orig": 0.6254162788391113, + "epoch": 0.7048673520741966, + "kl_loss": 0.23739823698997498, + "loss_ib": 0.007129746489226818, + "step": 2451 + }, + { + "ce_ib": 3.1766622066497803, + "ce_orig": 0.6445481181144714, + "epoch": 0.7048673520741966, + "kl_loss": 0.13016438484191895, + "loss_ib": 0.004478306043893099, + "step": 2451 + }, + { + "ce_ib": 7.232974052429199, + "ce_orig": 1.5919005870819092, + "epoch": 0.7048673520741966, + "kl_loss": 0.25187182426452637, + "loss_ib": 0.009751692414283752, + "step": 2451 + }, + { + "ce_ib": 2.6306259632110596, + "ce_orig": 0.6229518055915833, + "epoch": 0.7048673520741966, + "kl_loss": 0.13169635832309723, + "loss_ib": 0.0039475890807807446, + "step": 2451 + }, + { + "ce_ib": 5.294440269470215, + "ce_orig": 0.9607746005058289, + "epoch": 0.7051549356531742, + "kl_loss": 0.26718705892562866, + "loss_ib": 0.00796631071716547, + "step": 2452 + }, + { + "ce_ib": 2.3992583751678467, + "ce_orig": 0.5728752613067627, + "epoch": 0.7051549356531742, + "kl_loss": 0.13775096833705902, + "loss_ib": 0.0037767679896205664, + "step": 2452 + }, + { + "ce_ib": 5.888376235961914, + "ce_orig": 1.2223107814788818, + "epoch": 0.7051549356531742, + "kl_loss": 0.24276965856552124, + "loss_ib": 0.008316072635352612, + "step": 2452 + }, + { + "ce_ib": 4.759071350097656, + "ce_orig": 0.6090178489685059, + "epoch": 0.7051549356531742, + "kl_loss": 0.2622416913509369, + "loss_ib": 0.007381488103419542, + "step": 2452 + }, + { + "ce_ib": 7.886328220367432, + "ce_orig": 1.4019814729690552, + "epoch": 0.7054425192321518, + "kl_loss": 0.19632211327552795, + "loss_ib": 0.009849549271166325, + "step": 2453 + }, + { + "ce_ib": 5.197869777679443, + "ce_orig": 1.4119362831115723, + "epoch": 0.7054425192321518, + "kl_loss": 0.1652664840221405, + "loss_ib": 0.006850534584373236, + "step": 2453 + }, + { + "ce_ib": 2.074538230895996, + "ce_orig": 0.358896940946579, + "epoch": 0.7054425192321518, + "kl_loss": 0.448444664478302, + "loss_ib": 0.006558984983712435, + "step": 2453 + }, + { + "ce_ib": 7.347366809844971, + "ce_orig": 1.5785619020462036, + "epoch": 0.7054425192321518, + "kl_loss": 0.23904848098754883, + "loss_ib": 0.009737851098179817, + "step": 2453 + }, + { + "ce_ib": 4.922626972198486, + "ce_orig": 0.7413555979728699, + "epoch": 0.7057301028111295, + "kl_loss": 0.19527319073677063, + "loss_ib": 0.006875358521938324, + "step": 2454 + }, + { + "ce_ib": 5.0059332847595215, + "ce_orig": 1.1818859577178955, + "epoch": 0.7057301028111295, + "kl_loss": 0.18456777930259705, + "loss_ib": 0.006851611193269491, + "step": 2454 + }, + { + "ce_ib": 4.893244743347168, + "ce_orig": 0.8956164717674255, + "epoch": 0.7057301028111295, + "kl_loss": 0.16873672604560852, + "loss_ib": 0.006580611690878868, + "step": 2454 + }, + { + "ce_ib": 4.127147197723389, + "ce_orig": 0.7247373461723328, + "epoch": 0.7057301028111295, + "kl_loss": 0.2217589169740677, + "loss_ib": 0.006344736088067293, + "step": 2454 + }, + { + "epoch": 0.7060176863901071, + "grad_norm": 0.11841706186532974, + "learning_rate": 8.941433699525457e-06, + "loss": 0.8736, + "step": 2455 + }, + { + "ce_ib": 3.414628505706787, + "ce_orig": 0.6182649731636047, + "epoch": 0.7060176863901071, + "kl_loss": 0.14889919757843018, + "loss_ib": 0.004903620574623346, + "step": 2455 + }, + { + "ce_ib": 4.0037102699279785, + "ce_orig": 0.8226838111877441, + "epoch": 0.7060176863901071, + "kl_loss": 0.21210241317749023, + "loss_ib": 0.006124734878540039, + "step": 2455 + }, + { + "ce_ib": 2.2070424556732178, + "ce_orig": 0.6676501035690308, + "epoch": 0.7060176863901071, + "kl_loss": 0.22784548997879028, + "loss_ib": 0.004485497251152992, + "step": 2455 + }, + { + "ce_ib": 6.857331275939941, + "ce_orig": 0.8481958508491516, + "epoch": 0.7060176863901071, + "kl_loss": 0.18963176012039185, + "loss_ib": 0.008753648959100246, + "step": 2455 + }, + { + "ce_ib": 4.906370162963867, + "ce_orig": 0.9278814196586609, + "epoch": 0.7063052699690847, + "kl_loss": 0.24903592467308044, + "loss_ib": 0.007396729197353125, + "step": 2456 + }, + { + "ce_ib": 4.725356578826904, + "ce_orig": 1.0813392400741577, + "epoch": 0.7063052699690847, + "kl_loss": 0.1818477362394333, + "loss_ib": 0.00654383422806859, + "step": 2456 + }, + { + "ce_ib": 5.809159278869629, + "ce_orig": 0.9456941485404968, + "epoch": 0.7063052699690847, + "kl_loss": 0.27650341391563416, + "loss_ib": 0.008574193343520164, + "step": 2456 + }, + { + "ce_ib": 2.613278865814209, + "ce_orig": 0.7225695848464966, + "epoch": 0.7063052699690847, + "kl_loss": 0.12392690032720566, + "loss_ib": 0.0038525478448718786, + "step": 2456 + }, + { + "ce_ib": 3.3244218826293945, + "ce_orig": 0.7079664468765259, + "epoch": 0.7065928535480624, + "kl_loss": 0.12985889613628387, + "loss_ib": 0.004623010754585266, + "step": 2457 + }, + { + "ce_ib": 4.485446929931641, + "ce_orig": 0.7468883395195007, + "epoch": 0.7065928535480624, + "kl_loss": 0.21681030094623566, + "loss_ib": 0.0066535500809550285, + "step": 2457 + }, + { + "ce_ib": 2.456899642944336, + "ce_orig": 0.4553338885307312, + "epoch": 0.7065928535480624, + "kl_loss": 0.1348019242286682, + "loss_ib": 0.0038049190770834684, + "step": 2457 + }, + { + "ce_ib": 5.291893005371094, + "ce_orig": 0.5436161160469055, + "epoch": 0.7065928535480624, + "kl_loss": 0.24852143228054047, + "loss_ib": 0.007777107413858175, + "step": 2457 + }, + { + "ce_ib": 3.7016005516052246, + "ce_orig": 0.719068169593811, + "epoch": 0.7068804371270401, + "kl_loss": 0.18645989894866943, + "loss_ib": 0.005566199775785208, + "step": 2458 + }, + { + "ce_ib": 3.5907840728759766, + "ce_orig": 0.5479931235313416, + "epoch": 0.7068804371270401, + "kl_loss": 0.15012654662132263, + "loss_ib": 0.005092049948871136, + "step": 2458 + }, + { + "ce_ib": 4.258731365203857, + "ce_orig": 0.6525676846504211, + "epoch": 0.7068804371270401, + "kl_loss": 0.21840988099575043, + "loss_ib": 0.006442829966545105, + "step": 2458 + }, + { + "ce_ib": 2.2317848205566406, + "ce_orig": 0.5534005165100098, + "epoch": 0.7068804371270401, + "kl_loss": 0.2782726287841797, + "loss_ib": 0.005014511290937662, + "step": 2458 + }, + { + "ce_ib": 3.37851881980896, + "ce_orig": 0.7892978191375732, + "epoch": 0.7071680207060177, + "kl_loss": 0.22823655605316162, + "loss_ib": 0.005660884082317352, + "step": 2459 + }, + { + "ce_ib": 4.543015480041504, + "ce_orig": 0.95122230052948, + "epoch": 0.7071680207060177, + "kl_loss": 0.22944548726081848, + "loss_ib": 0.006837470456957817, + "step": 2459 + }, + { + "ce_ib": 6.270914554595947, + "ce_orig": 1.365883469581604, + "epoch": 0.7071680207060177, + "kl_loss": 0.19309654831886292, + "loss_ib": 0.008201880380511284, + "step": 2459 + }, + { + "ce_ib": 2.384920835494995, + "ce_orig": 0.7749490141868591, + "epoch": 0.7071680207060177, + "kl_loss": 0.1361580193042755, + "loss_ib": 0.0037465011700987816, + "step": 2459 + }, + { + "epoch": 0.7074556042849953, + "grad_norm": 0.13423018157482147, + "learning_rate": 8.936653639055225e-06, + "loss": 0.8881, + "step": 2460 + }, + { + "ce_ib": 3.559011936187744, + "ce_orig": 0.7191449999809265, + "epoch": 0.7074556042849953, + "kl_loss": 0.6353079080581665, + "loss_ib": 0.009912091307342052, + "step": 2460 + }, + { + "ce_ib": 4.540559768676758, + "ce_orig": 0.8296971917152405, + "epoch": 0.7074556042849953, + "kl_loss": 0.26579299569129944, + "loss_ib": 0.007198489736765623, + "step": 2460 + }, + { + "ce_ib": 3.6890928745269775, + "ce_orig": 0.5952888131141663, + "epoch": 0.7074556042849953, + "kl_loss": 0.47429218888282776, + "loss_ib": 0.008432014845311642, + "step": 2460 + }, + { + "ce_ib": 4.126105785369873, + "ce_orig": 0.9326795339584351, + "epoch": 0.7074556042849953, + "kl_loss": 0.14136868715286255, + "loss_ib": 0.005539792589843273, + "step": 2460 + }, + { + "ce_ib": 2.4468135833740234, + "ce_orig": 0.709064781665802, + "epoch": 0.707743187863973, + "kl_loss": 0.11162683367729187, + "loss_ib": 0.003563081845641136, + "step": 2461 + }, + { + "ce_ib": 2.1770384311676025, + "ce_orig": 0.42996320128440857, + "epoch": 0.707743187863973, + "kl_loss": 0.13453949987888336, + "loss_ib": 0.0035224335733801126, + "step": 2461 + }, + { + "ce_ib": 3.181065559387207, + "ce_orig": 0.5902462005615234, + "epoch": 0.707743187863973, + "kl_loss": 0.26152825355529785, + "loss_ib": 0.0057963477447628975, + "step": 2461 + }, + { + "ce_ib": 6.08859920501709, + "ce_orig": 1.0784636735916138, + "epoch": 0.707743187863973, + "kl_loss": 0.23440703749656677, + "loss_ib": 0.008432669565081596, + "step": 2461 + }, + { + "ce_ib": 5.488067626953125, + "ce_orig": 0.49219024181365967, + "epoch": 0.7080307714429506, + "kl_loss": 0.28346681594848633, + "loss_ib": 0.008322736248373985, + "step": 2462 + }, + { + "ce_ib": 5.813212871551514, + "ce_orig": 0.6718828082084656, + "epoch": 0.7080307714429506, + "kl_loss": 0.24692827463150024, + "loss_ib": 0.00828249566257, + "step": 2462 + }, + { + "ce_ib": 4.745418548583984, + "ce_orig": 0.9086847901344299, + "epoch": 0.7080307714429506, + "kl_loss": 0.14914977550506592, + "loss_ib": 0.006236916873604059, + "step": 2462 + }, + { + "ce_ib": 3.277320146560669, + "ce_orig": 0.6311280727386475, + "epoch": 0.7080307714429506, + "kl_loss": 0.25652337074279785, + "loss_ib": 0.0058425539173185825, + "step": 2462 + }, + { + "ce_ib": 4.531377792358398, + "ce_orig": 0.938064455986023, + "epoch": 0.7083183550219283, + "kl_loss": 0.19440090656280518, + "loss_ib": 0.006475386675447226, + "step": 2463 + }, + { + "ce_ib": 4.009416580200195, + "ce_orig": 0.4701254367828369, + "epoch": 0.7083183550219283, + "kl_loss": 0.5556694269180298, + "loss_ib": 0.00956611055880785, + "step": 2463 + }, + { + "ce_ib": 1.974107027053833, + "ce_orig": 0.2559237778186798, + "epoch": 0.7083183550219283, + "kl_loss": 0.18718266487121582, + "loss_ib": 0.0038459335919469595, + "step": 2463 + }, + { + "ce_ib": 8.388144493103027, + "ce_orig": 1.7004711627960205, + "epoch": 0.7083183550219283, + "kl_loss": 0.28114789724349976, + "loss_ib": 0.011199623346328735, + "step": 2463 + }, + { + "ce_ib": 3.123183488845825, + "ce_orig": 0.8704360127449036, + "epoch": 0.7086059386009059, + "kl_loss": 0.27237221598625183, + "loss_ib": 0.005846905522048473, + "step": 2464 + }, + { + "ce_ib": 5.002218246459961, + "ce_orig": 0.8473984599113464, + "epoch": 0.7086059386009059, + "kl_loss": 0.208529531955719, + "loss_ib": 0.00708751380443573, + "step": 2464 + }, + { + "ce_ib": 4.352365970611572, + "ce_orig": 0.6045506596565247, + "epoch": 0.7086059386009059, + "kl_loss": 0.22644178569316864, + "loss_ib": 0.006616783794015646, + "step": 2464 + }, + { + "ce_ib": 3.685572862625122, + "ce_orig": 0.7349777817726135, + "epoch": 0.7086059386009059, + "kl_loss": 0.1540321707725525, + "loss_ib": 0.005225894507020712, + "step": 2464 + }, + { + "epoch": 0.7088935221798836, + "grad_norm": 0.12013764679431915, + "learning_rate": 8.931864094272663e-06, + "loss": 0.7658, + "step": 2465 + }, + { + "ce_ib": 3.5299832820892334, + "ce_orig": 0.745291531085968, + "epoch": 0.7088935221798836, + "kl_loss": 0.20926502346992493, + "loss_ib": 0.00562263373285532, + "step": 2465 + }, + { + "ce_ib": 2.2804737091064453, + "ce_orig": 0.4194578528404236, + "epoch": 0.7088935221798836, + "kl_loss": 0.37461721897125244, + "loss_ib": 0.0060266456566751, + "step": 2465 + }, + { + "ce_ib": 5.408512592315674, + "ce_orig": 0.989798367023468, + "epoch": 0.7088935221798836, + "kl_loss": 0.20717564225196838, + "loss_ib": 0.00748026929795742, + "step": 2465 + }, + { + "ce_ib": 3.1032357215881348, + "ce_orig": 0.3262802064418793, + "epoch": 0.7088935221798836, + "kl_loss": 0.25903594493865967, + "loss_ib": 0.005693595390766859, + "step": 2465 + }, + { + "ce_ib": 4.562615871429443, + "ce_orig": 0.6536048054695129, + "epoch": 0.7091811057588612, + "kl_loss": 0.1862957775592804, + "loss_ib": 0.006425573490560055, + "step": 2466 + }, + { + "ce_ib": 5.110820770263672, + "ce_orig": 0.9636155366897583, + "epoch": 0.7091811057588612, + "kl_loss": 0.28258612751960754, + "loss_ib": 0.007936682552099228, + "step": 2466 + }, + { + "ce_ib": 4.800495624542236, + "ce_orig": 0.7760675549507141, + "epoch": 0.7091811057588612, + "kl_loss": 0.19224202632904053, + "loss_ib": 0.006722916383296251, + "step": 2466 + }, + { + "ce_ib": 6.743045330047607, + "ce_orig": 0.4003826975822449, + "epoch": 0.7091811057588612, + "kl_loss": 0.3523140847682953, + "loss_ib": 0.010266185738146305, + "step": 2466 + }, + { + "ce_ib": 3.5157086849212646, + "ce_orig": 0.6121046543121338, + "epoch": 0.7094686893378388, + "kl_loss": 0.11269594728946686, + "loss_ib": 0.004642668180167675, + "step": 2467 + }, + { + "ce_ib": 4.104611873626709, + "ce_orig": 0.7934226989746094, + "epoch": 0.7094686893378388, + "kl_loss": 0.22681328654289246, + "loss_ib": 0.006372744683176279, + "step": 2467 + }, + { + "ce_ib": 2.2597403526306152, + "ce_orig": 0.5568003058433533, + "epoch": 0.7094686893378388, + "kl_loss": 0.10006988048553467, + "loss_ib": 0.0032604390289634466, + "step": 2467 + }, + { + "ce_ib": 4.6498122215271, + "ce_orig": 0.4041396379470825, + "epoch": 0.7094686893378388, + "kl_loss": 0.19379419088363647, + "loss_ib": 0.0065877544693648815, + "step": 2467 + }, + { + "ce_ib": 3.022171974182129, + "ce_orig": 0.6176078915596008, + "epoch": 0.7097562729168164, + "kl_loss": 0.20789438486099243, + "loss_ib": 0.005101115442812443, + "step": 2468 + }, + { + "ce_ib": 5.4649882316589355, + "ce_orig": 0.864780843257904, + "epoch": 0.7097562729168164, + "kl_loss": 0.34020015597343445, + "loss_ib": 0.008866989053785801, + "step": 2468 + }, + { + "ce_ib": 3.0243659019470215, + "ce_orig": 0.6787719130516052, + "epoch": 0.7097562729168164, + "kl_loss": 0.1928875595331192, + "loss_ib": 0.004953241441398859, + "step": 2468 + }, + { + "ce_ib": 5.412663459777832, + "ce_orig": 1.0597435235977173, + "epoch": 0.7097562729168164, + "kl_loss": 0.17190364003181458, + "loss_ib": 0.007131700403988361, + "step": 2468 + }, + { + "ce_ib": 3.930914878845215, + "ce_orig": 0.7852596640586853, + "epoch": 0.710043856495794, + "kl_loss": 0.19156157970428467, + "loss_ib": 0.005846530664712191, + "step": 2469 + }, + { + "ce_ib": 4.665014266967773, + "ce_orig": 0.8209867477416992, + "epoch": 0.710043856495794, + "kl_loss": 0.1435384750366211, + "loss_ib": 0.006100399419665337, + "step": 2469 + }, + { + "ce_ib": 4.323861122131348, + "ce_orig": 0.6764291524887085, + "epoch": 0.710043856495794, + "kl_loss": 0.30246907472610474, + "loss_ib": 0.007348551414906979, + "step": 2469 + }, + { + "ce_ib": 2.2976574897766113, + "ce_orig": 0.47426995635032654, + "epoch": 0.710043856495794, + "kl_loss": 0.16332413256168365, + "loss_ib": 0.0039308988489210606, + "step": 2469 + }, + { + "epoch": 0.7103314400747718, + "grad_norm": 0.11430375277996063, + "learning_rate": 8.927065076716893e-06, + "loss": 0.8042, + "step": 2470 + }, + { + "ce_ib": 3.500749111175537, + "ce_orig": 0.6656222939491272, + "epoch": 0.7103314400747718, + "kl_loss": 0.18032793700695038, + "loss_ib": 0.005304028280079365, + "step": 2470 + }, + { + "ce_ib": 4.9946393966674805, + "ce_orig": 1.1473900079727173, + "epoch": 0.7103314400747718, + "kl_loss": 0.21399357914924622, + "loss_ib": 0.007134574931114912, + "step": 2470 + }, + { + "ce_ib": 6.715755939483643, + "ce_orig": 0.8042346835136414, + "epoch": 0.7103314400747718, + "kl_loss": 0.21267415583133698, + "loss_ib": 0.008842497132718563, + "step": 2470 + }, + { + "ce_ib": 6.924514293670654, + "ce_orig": 1.076629877090454, + "epoch": 0.7103314400747718, + "kl_loss": 0.18408870697021484, + "loss_ib": 0.00876540131866932, + "step": 2470 + }, + { + "ce_ib": 4.9450812339782715, + "ce_orig": 0.8957083225250244, + "epoch": 0.7106190236537494, + "kl_loss": 0.19185563921928406, + "loss_ib": 0.006863637361675501, + "step": 2471 + }, + { + "ce_ib": 4.184980869293213, + "ce_orig": 0.8444265127182007, + "epoch": 0.7106190236537494, + "kl_loss": 0.22760412096977234, + "loss_ib": 0.006461021956056356, + "step": 2471 + }, + { + "ce_ib": 4.377635955810547, + "ce_orig": 0.701134979724884, + "epoch": 0.7106190236537494, + "kl_loss": 0.17219781875610352, + "loss_ib": 0.0060996138490736485, + "step": 2471 + }, + { + "ce_ib": 3.249746799468994, + "ce_orig": 0.620972216129303, + "epoch": 0.7106190236537494, + "kl_loss": 0.22462549805641174, + "loss_ib": 0.005496001802384853, + "step": 2471 + }, + { + "ce_ib": 8.470349311828613, + "ce_orig": 1.803223967552185, + "epoch": 0.710906607232727, + "kl_loss": 0.28490298986434937, + "loss_ib": 0.011319379322230816, + "step": 2472 + }, + { + "ce_ib": 3.162708044052124, + "ce_orig": 0.7958919405937195, + "epoch": 0.710906607232727, + "kl_loss": 0.17028683423995972, + "loss_ib": 0.004865576047450304, + "step": 2472 + }, + { + "ce_ib": 6.512999534606934, + "ce_orig": 0.8988737463951111, + "epoch": 0.710906607232727, + "kl_loss": 0.24711459875106812, + "loss_ib": 0.00898414570838213, + "step": 2472 + }, + { + "ce_ib": 7.058821201324463, + "ce_orig": 0.9030210375785828, + "epoch": 0.710906607232727, + "kl_loss": 0.20606905221939087, + "loss_ib": 0.00911951158195734, + "step": 2472 + }, + { + "ce_ib": 3.994568347930908, + "ce_orig": 0.5271956324577332, + "epoch": 0.7111941908117047, + "kl_loss": 0.20601822435855865, + "loss_ib": 0.006054750643670559, + "step": 2473 + }, + { + "ce_ib": 2.570216178894043, + "ce_orig": 0.64536052942276, + "epoch": 0.7111941908117047, + "kl_loss": 0.1528271734714508, + "loss_ib": 0.004098488017916679, + "step": 2473 + }, + { + "ce_ib": 1.8178131580352783, + "ce_orig": 0.33946651220321655, + "epoch": 0.7111941908117047, + "kl_loss": 0.44274958968162537, + "loss_ib": 0.006245309021323919, + "step": 2473 + }, + { + "ce_ib": 3.7621352672576904, + "ce_orig": 0.7500786781311035, + "epoch": 0.7111941908117047, + "kl_loss": 0.11502841114997864, + "loss_ib": 0.004912419244647026, + "step": 2473 + }, + { + "ce_ib": 9.046262741088867, + "ce_orig": 1.0091217756271362, + "epoch": 0.7114817743906823, + "kl_loss": 0.23223043978214264, + "loss_ib": 0.011368568055331707, + "step": 2474 + }, + { + "ce_ib": 3.7000691890716553, + "ce_orig": 0.8172020316123962, + "epoch": 0.7114817743906823, + "kl_loss": 0.18483516573905945, + "loss_ib": 0.005548420827835798, + "step": 2474 + }, + { + "ce_ib": 2.4359116554260254, + "ce_orig": 0.4627586305141449, + "epoch": 0.7114817743906823, + "kl_loss": 0.2110711932182312, + "loss_ib": 0.0045466236770153046, + "step": 2474 + }, + { + "ce_ib": 2.3167457580566406, + "ce_orig": 0.40322211384773254, + "epoch": 0.7114817743906823, + "kl_loss": 0.18760305643081665, + "loss_ib": 0.004192776512354612, + "step": 2474 + }, + { + "epoch": 0.7117693579696599, + "grad_norm": 0.1099696159362793, + "learning_rate": 8.922256597949859e-06, + "loss": 0.8924, + "step": 2475 + }, + { + "ce_ib": 4.680692672729492, + "ce_orig": 0.9544792771339417, + "epoch": 0.7117693579696599, + "kl_loss": 0.2275879681110382, + "loss_ib": 0.00695657217875123, + "step": 2475 + }, + { + "ce_ib": 4.5714850425720215, + "ce_orig": 1.0356491804122925, + "epoch": 0.7117693579696599, + "kl_loss": 0.18080659210681915, + "loss_ib": 0.006379551254212856, + "step": 2475 + }, + { + "ce_ib": 6.744418144226074, + "ce_orig": 0.5662514567375183, + "epoch": 0.7117693579696599, + "kl_loss": 0.6459289789199829, + "loss_ib": 0.01320370752364397, + "step": 2475 + }, + { + "ce_ib": 3.7402443885803223, + "ce_orig": 0.8505580425262451, + "epoch": 0.7117693579696599, + "kl_loss": 0.21263258159160614, + "loss_ib": 0.005866570398211479, + "step": 2475 + }, + { + "ce_ib": 3.4963014125823975, + "ce_orig": 0.5317114591598511, + "epoch": 0.7120569415486375, + "kl_loss": 0.26440662145614624, + "loss_ib": 0.0061403680592775345, + "step": 2476 + }, + { + "ce_ib": 2.6156201362609863, + "ce_orig": 0.5434846878051758, + "epoch": 0.7120569415486375, + "kl_loss": 0.16465461254119873, + "loss_ib": 0.0042621660977602005, + "step": 2476 + }, + { + "ce_ib": 2.386190891265869, + "ce_orig": 0.2997593581676483, + "epoch": 0.7120569415486375, + "kl_loss": 0.18276721239089966, + "loss_ib": 0.004213863052427769, + "step": 2476 + }, + { + "ce_ib": 6.603331565856934, + "ce_orig": 1.3014670610427856, + "epoch": 0.7120569415486375, + "kl_loss": 0.1665414422750473, + "loss_ib": 0.008268745616078377, + "step": 2476 + }, + { + "ce_ib": 3.9416794776916504, + "ce_orig": 0.8220130801200867, + "epoch": 0.7123445251276153, + "kl_loss": 0.18316848576068878, + "loss_ib": 0.005773364100605249, + "step": 2477 + }, + { + "ce_ib": 3.5157623291015625, + "ce_orig": 0.6955379247665405, + "epoch": 0.7123445251276153, + "kl_loss": 0.18898114562034607, + "loss_ib": 0.0054055736400187016, + "step": 2477 + }, + { + "ce_ib": 2.4742698669433594, + "ce_orig": 0.4631092846393585, + "epoch": 0.7123445251276153, + "kl_loss": 0.1427808403968811, + "loss_ib": 0.00390207814052701, + "step": 2477 + }, + { + "ce_ib": 4.711426734924316, + "ce_orig": 0.6251652836799622, + "epoch": 0.7123445251276153, + "kl_loss": 0.14794865250587463, + "loss_ib": 0.006190913263708353, + "step": 2477 + }, + { + "ce_ib": 3.948150873184204, + "ce_orig": 0.6915721297264099, + "epoch": 0.7126321087065929, + "kl_loss": 0.2171051949262619, + "loss_ib": 0.006119202822446823, + "step": 2478 + }, + { + "ce_ib": 4.755468368530273, + "ce_orig": 0.6160720586776733, + "epoch": 0.7126321087065929, + "kl_loss": 0.19839942455291748, + "loss_ib": 0.00673946226015687, + "step": 2478 + }, + { + "ce_ib": 5.251643657684326, + "ce_orig": 1.2555502653121948, + "epoch": 0.7126321087065929, + "kl_loss": 0.21477065980434418, + "loss_ib": 0.00739935040473938, + "step": 2478 + }, + { + "ce_ib": 6.729740142822266, + "ce_orig": 1.579503059387207, + "epoch": 0.7126321087065929, + "kl_loss": 0.21744674444198608, + "loss_ib": 0.00890420749783516, + "step": 2478 + }, + { + "ce_ib": 3.6595840454101562, + "ce_orig": 0.6848421692848206, + "epoch": 0.7129196922855705, + "kl_loss": 0.1874752640724182, + "loss_ib": 0.005534336436539888, + "step": 2479 + }, + { + "ce_ib": 7.090970516204834, + "ce_orig": 1.1734898090362549, + "epoch": 0.7129196922855705, + "kl_loss": 0.1414545327425003, + "loss_ib": 0.00850551575422287, + "step": 2479 + }, + { + "ce_ib": 2.2517106533050537, + "ce_orig": 0.672306478023529, + "epoch": 0.7129196922855705, + "kl_loss": 0.12923094630241394, + "loss_ib": 0.003544020000845194, + "step": 2479 + }, + { + "ce_ib": 7.038332462310791, + "ce_orig": 1.3664395809173584, + "epoch": 0.7129196922855705, + "kl_loss": 0.254940927028656, + "loss_ib": 0.009587741456925869, + "step": 2479 + }, + { + "epoch": 0.7132072758645481, + "grad_norm": 0.1326526254415512, + "learning_rate": 8.917438669556307e-06, + "loss": 0.8229, + "step": 2480 + }, + { + "ce_ib": 3.156128168106079, + "ce_orig": 0.7535279989242554, + "epoch": 0.7132072758645481, + "kl_loss": 0.1915542632341385, + "loss_ib": 0.0050716707482934, + "step": 2480 + }, + { + "ce_ib": 3.335029363632202, + "ce_orig": 0.8862932920455933, + "epoch": 0.7132072758645481, + "kl_loss": 0.1786877065896988, + "loss_ib": 0.005121906287968159, + "step": 2480 + }, + { + "ce_ib": 4.7469868659973145, + "ce_orig": 0.9212871789932251, + "epoch": 0.7132072758645481, + "kl_loss": 0.11687248200178146, + "loss_ib": 0.005915711168199778, + "step": 2480 + }, + { + "ce_ib": 5.119639873504639, + "ce_orig": 1.0079585313796997, + "epoch": 0.7132072758645481, + "kl_loss": 0.20498999953269958, + "loss_ib": 0.007169539574533701, + "step": 2480 + }, + { + "ce_ib": 2.4274814128875732, + "ce_orig": 0.31579700112342834, + "epoch": 0.7134948594435258, + "kl_loss": 0.17605026066303253, + "loss_ib": 0.004187983926385641, + "step": 2481 + }, + { + "ce_ib": 7.459712982177734, + "ce_orig": 1.1958779096603394, + "epoch": 0.7134948594435258, + "kl_loss": 0.16319221258163452, + "loss_ib": 0.00909163523465395, + "step": 2481 + }, + { + "ce_ib": 4.335313320159912, + "ce_orig": 0.9336671829223633, + "epoch": 0.7134948594435258, + "kl_loss": 0.20388107001781464, + "loss_ib": 0.006374123971909285, + "step": 2481 + }, + { + "ce_ib": 8.106508255004883, + "ce_orig": 1.5710558891296387, + "epoch": 0.7134948594435258, + "kl_loss": 0.22833117842674255, + "loss_ib": 0.010389819741249084, + "step": 2481 + }, + { + "ce_ib": 6.906356334686279, + "ce_orig": 1.0720226764678955, + "epoch": 0.7137824430225034, + "kl_loss": 0.24129244685173035, + "loss_ib": 0.00931928027421236, + "step": 2482 + }, + { + "ce_ib": 3.808276653289795, + "ce_orig": 0.6952037811279297, + "epoch": 0.7137824430225034, + "kl_loss": 0.19198130071163177, + "loss_ib": 0.005728089716285467, + "step": 2482 + }, + { + "ce_ib": 7.549028396606445, + "ce_orig": 1.6158088445663452, + "epoch": 0.7137824430225034, + "kl_loss": 0.24766524136066437, + "loss_ib": 0.010025680996477604, + "step": 2482 + }, + { + "ce_ib": 3.8289289474487305, + "ce_orig": 1.1639766693115234, + "epoch": 0.7137824430225034, + "kl_loss": 0.15054497122764587, + "loss_ib": 0.005334378220140934, + "step": 2482 + }, + { + "ce_ib": 1.8597147464752197, + "ce_orig": 0.5555332899093628, + "epoch": 0.714070026601481, + "kl_loss": 0.18058651685714722, + "loss_ib": 0.0036655801814049482, + "step": 2483 + }, + { + "ce_ib": 3.5221879482269287, + "ce_orig": 0.3999289274215698, + "epoch": 0.714070026601481, + "kl_loss": 0.2512551546096802, + "loss_ib": 0.006034739315509796, + "step": 2483 + }, + { + "ce_ib": 3.6032888889312744, + "ce_orig": 0.5781653523445129, + "epoch": 0.714070026601481, + "kl_loss": 0.1730547845363617, + "loss_ib": 0.005333836656063795, + "step": 2483 + }, + { + "ce_ib": 5.964961051940918, + "ce_orig": 1.3527952432632446, + "epoch": 0.714070026601481, + "kl_loss": 0.2641600966453552, + "loss_ib": 0.008606561459600925, + "step": 2483 + }, + { + "ce_ib": 3.5484142303466797, + "ce_orig": 0.7381170988082886, + "epoch": 0.7143576101804587, + "kl_loss": 0.2197662740945816, + "loss_ib": 0.0057460772804915905, + "step": 2484 + }, + { + "ce_ib": 3.3191192150115967, + "ce_orig": 0.8309910297393799, + "epoch": 0.7143576101804587, + "kl_loss": 0.10991767793893814, + "loss_ib": 0.004418295808136463, + "step": 2484 + }, + { + "ce_ib": 5.145423889160156, + "ce_orig": 0.765242874622345, + "epoch": 0.7143576101804587, + "kl_loss": 0.16683821380138397, + "loss_ib": 0.006813806015998125, + "step": 2484 + }, + { + "ce_ib": 2.869119882583618, + "ce_orig": 0.26651298999786377, + "epoch": 0.7143576101804587, + "kl_loss": 0.22436821460723877, + "loss_ib": 0.005112801678478718, + "step": 2484 + }, + { + "epoch": 0.7146451937594364, + "grad_norm": 0.14005237817764282, + "learning_rate": 8.912611303143744e-06, + "loss": 0.9139, + "step": 2485 + }, + { + "ce_ib": 5.030973434448242, + "ce_orig": 1.0023778676986694, + "epoch": 0.7146451937594364, + "kl_loss": 0.2887710928916931, + "loss_ib": 0.007918684743344784, + "step": 2485 + }, + { + "ce_ib": 4.473417282104492, + "ce_orig": 0.9796201586723328, + "epoch": 0.7146451937594364, + "kl_loss": 0.21507331728935242, + "loss_ib": 0.006624150089919567, + "step": 2485 + }, + { + "ce_ib": 2.966494083404541, + "ce_orig": 0.7352120876312256, + "epoch": 0.7146451937594364, + "kl_loss": 0.1822904497385025, + "loss_ib": 0.004789398983120918, + "step": 2485 + }, + { + "ce_ib": 3.2499582767486572, + "ce_orig": 0.9504530429840088, + "epoch": 0.7146451937594364, + "kl_loss": 0.19532546401023865, + "loss_ib": 0.0052032130770385265, + "step": 2485 + }, + { + "ce_ib": 3.8999574184417725, + "ce_orig": 0.9199422597885132, + "epoch": 0.714932777338414, + "kl_loss": 0.15184003114700317, + "loss_ib": 0.005418357905000448, + "step": 2486 + }, + { + "ce_ib": 6.128220558166504, + "ce_orig": 1.1559429168701172, + "epoch": 0.714932777338414, + "kl_loss": 0.15650111436843872, + "loss_ib": 0.007693231571465731, + "step": 2486 + }, + { + "ce_ib": 4.746060848236084, + "ce_orig": 1.009746789932251, + "epoch": 0.714932777338414, + "kl_loss": 0.431932270526886, + "loss_ib": 0.009065383113920689, + "step": 2486 + }, + { + "ce_ib": 8.026719093322754, + "ce_orig": 1.6724977493286133, + "epoch": 0.714932777338414, + "kl_loss": 0.21206480264663696, + "loss_ib": 0.010147366672754288, + "step": 2486 + }, + { + "ce_ib": 3.629343032836914, + "ce_orig": 0.7157540321350098, + "epoch": 0.7152203609173916, + "kl_loss": 0.3941739797592163, + "loss_ib": 0.007571083027869463, + "step": 2487 + }, + { + "ce_ib": 7.190319538116455, + "ce_orig": 1.4781324863433838, + "epoch": 0.7152203609173916, + "kl_loss": 0.1468566358089447, + "loss_ib": 0.008658885955810547, + "step": 2487 + }, + { + "ce_ib": 4.047339916229248, + "ce_orig": 1.2317798137664795, + "epoch": 0.7152203609173916, + "kl_loss": 0.13497991859912872, + "loss_ib": 0.0053971391171216965, + "step": 2487 + }, + { + "ce_ib": 2.8382623195648193, + "ce_orig": 0.5432199239730835, + "epoch": 0.7152203609173916, + "kl_loss": 0.1644466519355774, + "loss_ib": 0.004482728894799948, + "step": 2487 + }, + { + "ce_ib": 7.793308734893799, + "ce_orig": 1.6912760734558105, + "epoch": 0.7155079444963692, + "kl_loss": 0.20891493558883667, + "loss_ib": 0.009882458485662937, + "step": 2488 + }, + { + "ce_ib": 6.664352893829346, + "ce_orig": 0.8153213262557983, + "epoch": 0.7155079444963692, + "kl_loss": 0.29684311151504517, + "loss_ib": 0.009632783941924572, + "step": 2488 + }, + { + "ce_ib": 3.924208164215088, + "ce_orig": 0.837173342704773, + "epoch": 0.7155079444963692, + "kl_loss": 0.18836970627307892, + "loss_ib": 0.005807904992252588, + "step": 2488 + }, + { + "ce_ib": 4.976451396942139, + "ce_orig": 0.8175032138824463, + "epoch": 0.7155079444963692, + "kl_loss": 0.2678220570087433, + "loss_ib": 0.007654671557247639, + "step": 2488 + }, + { + "ce_ib": 3.750826120376587, + "ce_orig": 0.7406229376792908, + "epoch": 0.7157955280753469, + "kl_loss": 0.1818077266216278, + "loss_ib": 0.0055689034052193165, + "step": 2489 + }, + { + "ce_ib": 7.126981258392334, + "ce_orig": 0.9249127507209778, + "epoch": 0.7157955280753469, + "kl_loss": 0.2218654602766037, + "loss_ib": 0.009345635771751404, + "step": 2489 + }, + { + "ce_ib": 4.463529586791992, + "ce_orig": 0.9753496646881104, + "epoch": 0.7157955280753469, + "kl_loss": 0.43563923239707947, + "loss_ib": 0.008819921873509884, + "step": 2489 + }, + { + "ce_ib": 3.6855883598327637, + "ce_orig": 0.577411949634552, + "epoch": 0.7157955280753469, + "kl_loss": 0.18506306409835815, + "loss_ib": 0.005536218639463186, + "step": 2489 + }, + { + "epoch": 0.7160831116543246, + "grad_norm": 0.14433909952640533, + "learning_rate": 8.907774510342413e-06, + "loss": 0.8969, + "step": 2490 + }, + { + "ce_ib": 6.55862283706665, + "ce_orig": 1.1540892124176025, + "epoch": 0.7160831116543246, + "kl_loss": 0.20950475335121155, + "loss_ib": 0.0086536705493927, + "step": 2490 + }, + { + "ce_ib": 6.722370147705078, + "ce_orig": 1.5239182710647583, + "epoch": 0.7160831116543246, + "kl_loss": 0.2113035023212433, + "loss_ib": 0.008835405111312866, + "step": 2490 + }, + { + "ce_ib": 4.584859371185303, + "ce_orig": 0.7088872194290161, + "epoch": 0.7160831116543246, + "kl_loss": 0.18888910114765167, + "loss_ib": 0.006473750341683626, + "step": 2490 + }, + { + "ce_ib": 3.3412115573883057, + "ce_orig": 0.6023476719856262, + "epoch": 0.7160831116543246, + "kl_loss": 0.21500644087791443, + "loss_ib": 0.005491275805979967, + "step": 2490 + }, + { + "ce_ib": 5.299452304840088, + "ce_orig": 0.8530816435813904, + "epoch": 0.7163706952333022, + "kl_loss": 0.16170944273471832, + "loss_ib": 0.006916546728461981, + "step": 2491 + }, + { + "ce_ib": 2.9656972885131836, + "ce_orig": 0.3937515914440155, + "epoch": 0.7163706952333022, + "kl_loss": 0.24705004692077637, + "loss_ib": 0.005436197388917208, + "step": 2491 + }, + { + "ce_ib": 4.289590358734131, + "ce_orig": 0.8268551826477051, + "epoch": 0.7163706952333022, + "kl_loss": 0.18690204620361328, + "loss_ib": 0.006158610340207815, + "step": 2491 + }, + { + "ce_ib": 3.9494388103485107, + "ce_orig": 0.6989210247993469, + "epoch": 0.7163706952333022, + "kl_loss": 0.331828773021698, + "loss_ib": 0.007267726585268974, + "step": 2491 + }, + { + "ce_ib": 2.9467008113861084, + "ce_orig": 0.5671961903572083, + "epoch": 0.7166582788122798, + "kl_loss": 0.1825624406337738, + "loss_ib": 0.004772325046360493, + "step": 2492 + }, + { + "ce_ib": 5.631208896636963, + "ce_orig": 1.132770299911499, + "epoch": 0.7166582788122798, + "kl_loss": 0.20113016664981842, + "loss_ib": 0.00764251034706831, + "step": 2492 + }, + { + "ce_ib": 3.4795758724212646, + "ce_orig": 0.7269555926322937, + "epoch": 0.7166582788122798, + "kl_loss": 0.1972765177488327, + "loss_ib": 0.005452340934425592, + "step": 2492 + }, + { + "ce_ib": 7.090541362762451, + "ce_orig": 1.4612987041473389, + "epoch": 0.7166582788122798, + "kl_loss": 0.23041698336601257, + "loss_ib": 0.009394710883498192, + "step": 2492 + }, + { + "ce_ib": 5.52360200881958, + "ce_orig": 0.695889949798584, + "epoch": 0.7169458623912575, + "kl_loss": 0.23038938641548157, + "loss_ib": 0.007827496156096458, + "step": 2493 + }, + { + "ce_ib": 4.228504657745361, + "ce_orig": 0.815923810005188, + "epoch": 0.7169458623912575, + "kl_loss": 0.24551962316036224, + "loss_ib": 0.00668370071798563, + "step": 2493 + }, + { + "ce_ib": 4.390937328338623, + "ce_orig": 0.8147494792938232, + "epoch": 0.7169458623912575, + "kl_loss": 0.21400001645088196, + "loss_ib": 0.006530937273055315, + "step": 2493 + }, + { + "ce_ib": 3.263502836227417, + "ce_orig": 0.9467772841453552, + "epoch": 0.7169458623912575, + "kl_loss": 0.19045831263065338, + "loss_ib": 0.0051680863834917545, + "step": 2493 + }, + { + "ce_ib": 3.629258871078491, + "ce_orig": 0.7941665649414062, + "epoch": 0.7172334459702351, + "kl_loss": 0.12851198017597198, + "loss_ib": 0.00491437828168273, + "step": 2494 + }, + { + "ce_ib": 4.290923595428467, + "ce_orig": 1.0583690404891968, + "epoch": 0.7172334459702351, + "kl_loss": 0.19096890091896057, + "loss_ib": 0.006200612522661686, + "step": 2494 + }, + { + "ce_ib": 6.187159061431885, + "ce_orig": 1.111602783203125, + "epoch": 0.7172334459702351, + "kl_loss": 0.2161535620689392, + "loss_ib": 0.008348695002496243, + "step": 2494 + }, + { + "ce_ib": 5.234256744384766, + "ce_orig": 0.7384940981864929, + "epoch": 0.7172334459702351, + "kl_loss": 0.1988651156425476, + "loss_ib": 0.007222908083349466, + "step": 2494 + }, + { + "epoch": 0.7175210295492127, + "grad_norm": 0.15452158451080322, + "learning_rate": 8.902928302805274e-06, + "loss": 0.8773, + "step": 2495 + }, + { + "ce_ib": 3.540203809738159, + "ce_orig": 0.6726974248886108, + "epoch": 0.7175210295492127, + "kl_loss": 0.25644540786743164, + "loss_ib": 0.006104657426476479, + "step": 2495 + }, + { + "ce_ib": 8.323140144348145, + "ce_orig": 1.74116849899292, + "epoch": 0.7175210295492127, + "kl_loss": 0.24888670444488525, + "loss_ib": 0.010812006890773773, + "step": 2495 + }, + { + "ce_ib": 3.000756025314331, + "ce_orig": 0.6415656208992004, + "epoch": 0.7175210295492127, + "kl_loss": 0.20656685531139374, + "loss_ib": 0.005066424608230591, + "step": 2495 + }, + { + "ce_ib": 2.8935205936431885, + "ce_orig": 0.5921223759651184, + "epoch": 0.7175210295492127, + "kl_loss": 0.19233408570289612, + "loss_ib": 0.004816861357539892, + "step": 2495 + }, + { + "ce_ib": 6.608353137969971, + "ce_orig": 1.3551684617996216, + "epoch": 0.7178086131281903, + "kl_loss": 0.2029527872800827, + "loss_ib": 0.00863788090646267, + "step": 2496 + }, + { + "ce_ib": 4.469206809997559, + "ce_orig": 0.7597352862358093, + "epoch": 0.7178086131281903, + "kl_loss": 0.2385953962802887, + "loss_ib": 0.006855160929262638, + "step": 2496 + }, + { + "ce_ib": 3.773024797439575, + "ce_orig": 0.8034536838531494, + "epoch": 0.7178086131281903, + "kl_loss": 0.1563478261232376, + "loss_ib": 0.0053365034982562065, + "step": 2496 + }, + { + "ce_ib": 5.09822416305542, + "ce_orig": 1.2549128532409668, + "epoch": 0.7178086131281903, + "kl_loss": 0.3098856210708618, + "loss_ib": 0.008197080343961716, + "step": 2496 + }, + { + "ce_ib": 6.148093223571777, + "ce_orig": 1.1274843215942383, + "epoch": 0.7180961967071681, + "kl_loss": 0.21972815692424774, + "loss_ib": 0.008345374837517738, + "step": 2497 + }, + { + "ce_ib": 4.908323287963867, + "ce_orig": 1.2127140760421753, + "epoch": 0.7180961967071681, + "kl_loss": 0.18957440555095673, + "loss_ib": 0.006804067175835371, + "step": 2497 + }, + { + "ce_ib": 4.882380485534668, + "ce_orig": 0.7795057892799377, + "epoch": 0.7180961967071681, + "kl_loss": 0.22794081270694733, + "loss_ib": 0.007161788642406464, + "step": 2497 + }, + { + "ce_ib": 4.949336528778076, + "ce_orig": 0.6635670065879822, + "epoch": 0.7180961967071681, + "kl_loss": 0.2275816649198532, + "loss_ib": 0.007225153502076864, + "step": 2497 + }, + { + "ce_ib": 4.429477691650391, + "ce_orig": 0.6596328616142273, + "epoch": 0.7183837802861457, + "kl_loss": 0.20032398402690887, + "loss_ib": 0.006432717200368643, + "step": 2498 + }, + { + "ce_ib": 4.636064529418945, + "ce_orig": 0.4866473078727722, + "epoch": 0.7183837802861457, + "kl_loss": 0.287273108959198, + "loss_ib": 0.007508795242756605, + "step": 2498 + }, + { + "ce_ib": 3.3174519538879395, + "ce_orig": 0.48340141773223877, + "epoch": 0.7183837802861457, + "kl_loss": 0.19342826306819916, + "loss_ib": 0.005251734517514706, + "step": 2498 + }, + { + "ce_ib": 5.689999580383301, + "ce_orig": 0.8357694745063782, + "epoch": 0.7183837802861457, + "kl_loss": 0.24437981843948364, + "loss_ib": 0.008133797906339169, + "step": 2498 + }, + { + "ce_ib": 5.469334125518799, + "ce_orig": 0.704836368560791, + "epoch": 0.7186713638651233, + "kl_loss": 0.1782444715499878, + "loss_ib": 0.007251778617501259, + "step": 2499 + }, + { + "ce_ib": 4.799906253814697, + "ce_orig": 0.878022313117981, + "epoch": 0.7186713638651233, + "kl_loss": 0.2835623621940613, + "loss_ib": 0.007635530084371567, + "step": 2499 + }, + { + "ce_ib": 4.127560615539551, + "ce_orig": 0.692426860332489, + "epoch": 0.7186713638651233, + "kl_loss": 0.1458818018436432, + "loss_ib": 0.005586378276348114, + "step": 2499 + }, + { + "ce_ib": 3.277618169784546, + "ce_orig": 0.7952898740768433, + "epoch": 0.7186713638651233, + "kl_loss": 0.27375778555870056, + "loss_ib": 0.00601519551128149, + "step": 2499 + }, + { + "epoch": 0.7189589474441009, + "grad_norm": 0.12991805374622345, + "learning_rate": 8.898072692207964e-06, + "loss": 0.8723, + "step": 2500 + }, + { + "ce_ib": 3.840437650680542, + "ce_orig": 0.5074248909950256, + "epoch": 0.7189589474441009, + "kl_loss": 0.14846184849739075, + "loss_ib": 0.00532505614683032, + "step": 2500 + }, + { + "ce_ib": 4.130908966064453, + "ce_orig": 0.29927563667297363, + "epoch": 0.7189589474441009, + "kl_loss": 0.3075806796550751, + "loss_ib": 0.007206715643405914, + "step": 2500 + }, + { + "ce_ib": 3.783940553665161, + "ce_orig": 0.6616368889808655, + "epoch": 0.7189589474441009, + "kl_loss": 0.19312883913516998, + "loss_ib": 0.005715228617191315, + "step": 2500 + }, + { + "ce_ib": 3.112264633178711, + "ce_orig": 0.8023442029953003, + "epoch": 0.7189589474441009, + "kl_loss": 0.12769372761249542, + "loss_ib": 0.004389201756566763, + "step": 2500 + }, + { + "ce_ib": 4.128509998321533, + "ce_orig": 0.8490266799926758, + "epoch": 0.7192465310230786, + "kl_loss": 0.28443443775177, + "loss_ib": 0.006972854025661945, + "step": 2501 + }, + { + "ce_ib": 5.731149673461914, + "ce_orig": 1.0165455341339111, + "epoch": 0.7192465310230786, + "kl_loss": 0.24555587768554688, + "loss_ib": 0.008186708204448223, + "step": 2501 + }, + { + "ce_ib": 7.389912128448486, + "ce_orig": 0.7189239263534546, + "epoch": 0.7192465310230786, + "kl_loss": 0.19057686626911163, + "loss_ib": 0.009295680560171604, + "step": 2501 + }, + { + "ce_ib": 4.6084723472595215, + "ce_orig": 0.571967601776123, + "epoch": 0.7192465310230786, + "kl_loss": 0.2761896848678589, + "loss_ib": 0.007370369043201208, + "step": 2501 + }, + { + "ce_ib": 5.681662559509277, + "ce_orig": 1.156119465827942, + "epoch": 0.7195341146020562, + "kl_loss": 0.23007524013519287, + "loss_ib": 0.007982415147125721, + "step": 2502 + }, + { + "ce_ib": 3.2368552684783936, + "ce_orig": 0.510309100151062, + "epoch": 0.7195341146020562, + "kl_loss": 0.15739592909812927, + "loss_ib": 0.004810814280062914, + "step": 2502 + }, + { + "ce_ib": 5.407199859619141, + "ce_orig": 0.6840211749076843, + "epoch": 0.7195341146020562, + "kl_loss": 0.15022671222686768, + "loss_ib": 0.006909466814249754, + "step": 2502 + }, + { + "ce_ib": 7.396757125854492, + "ce_orig": 1.3134000301361084, + "epoch": 0.7195341146020562, + "kl_loss": 0.35230275988578796, + "loss_ib": 0.01091978419572115, + "step": 2502 + }, + { + "ce_ib": 6.008892059326172, + "ce_orig": 1.4841415882110596, + "epoch": 0.7198216981810338, + "kl_loss": 0.22478842735290527, + "loss_ib": 0.008256776258349419, + "step": 2503 + }, + { + "ce_ib": 3.4192957878112793, + "ce_orig": 0.59815514087677, + "epoch": 0.7198216981810338, + "kl_loss": 0.14265286922454834, + "loss_ib": 0.004845824558287859, + "step": 2503 + }, + { + "ce_ib": 3.372753143310547, + "ce_orig": 0.46218565106391907, + "epoch": 0.7198216981810338, + "kl_loss": 0.14870795607566833, + "loss_ib": 0.00485983258113265, + "step": 2503 + }, + { + "ce_ib": 4.455740928649902, + "ce_orig": 0.9116576313972473, + "epoch": 0.7198216981810338, + "kl_loss": 0.27701178193092346, + "loss_ib": 0.0072258589789271355, + "step": 2503 + }, + { + "ce_ib": 3.432887554168701, + "ce_orig": 0.6222139596939087, + "epoch": 0.7201092817600115, + "kl_loss": 0.13691318035125732, + "loss_ib": 0.004802019335329533, + "step": 2504 + }, + { + "ce_ib": 2.571091651916504, + "ce_orig": 0.59248948097229, + "epoch": 0.7201092817600115, + "kl_loss": 0.12404315173625946, + "loss_ib": 0.0038115233182907104, + "step": 2504 + }, + { + "ce_ib": 2.4471139907836914, + "ce_orig": 0.5194493532180786, + "epoch": 0.7201092817600115, + "kl_loss": 0.25537437200546265, + "loss_ib": 0.005000857636332512, + "step": 2504 + }, + { + "ce_ib": 2.038858652114868, + "ce_orig": 0.3906935453414917, + "epoch": 0.7201092817600115, + "kl_loss": 0.5183064341545105, + "loss_ib": 0.007221922744065523, + "step": 2504 + }, + { + "epoch": 0.7203968653389892, + "grad_norm": 0.17863060534000397, + "learning_rate": 8.893207690248776e-06, + "loss": 0.825, + "step": 2505 + }, + { + "ce_ib": 3.943737745285034, + "ce_orig": 0.753431499004364, + "epoch": 0.7203968653389892, + "kl_loss": 0.14054058492183685, + "loss_ib": 0.005349143408238888, + "step": 2505 + }, + { + "ce_ib": 2.3115034103393555, + "ce_orig": 0.5841467976570129, + "epoch": 0.7203968653389892, + "kl_loss": 0.1173812597990036, + "loss_ib": 0.0034853159449994564, + "step": 2505 + }, + { + "ce_ib": 4.599484443664551, + "ce_orig": 0.9001040458679199, + "epoch": 0.7203968653389892, + "kl_loss": 0.25579017400741577, + "loss_ib": 0.007157386280596256, + "step": 2505 + }, + { + "ce_ib": 2.891711711883545, + "ce_orig": 0.3671533465385437, + "epoch": 0.7203968653389892, + "kl_loss": 0.37723496556282043, + "loss_ib": 0.006664061453193426, + "step": 2505 + }, + { + "ce_ib": 8.090414047241211, + "ce_orig": 1.488939642906189, + "epoch": 0.7206844489179668, + "kl_loss": 0.23095694184303284, + "loss_ib": 0.010399984195828438, + "step": 2506 + }, + { + "ce_ib": 2.080073118209839, + "ce_orig": 0.23651042580604553, + "epoch": 0.7206844489179668, + "kl_loss": 0.29194843769073486, + "loss_ib": 0.004999557510018349, + "step": 2506 + }, + { + "ce_ib": 2.61016845703125, + "ce_orig": 0.6063271760940552, + "epoch": 0.7206844489179668, + "kl_loss": 0.31345129013061523, + "loss_ib": 0.005744681227952242, + "step": 2506 + }, + { + "ce_ib": 4.059607982635498, + "ce_orig": 1.0120675563812256, + "epoch": 0.7206844489179668, + "kl_loss": 0.1832512617111206, + "loss_ib": 0.00589212030172348, + "step": 2506 + }, + { + "ce_ib": 5.670032978057861, + "ce_orig": 1.3762825727462769, + "epoch": 0.7209720324969444, + "kl_loss": 0.2068944126367569, + "loss_ib": 0.007738976739346981, + "step": 2507 + }, + { + "ce_ib": 3.239231824874878, + "ce_orig": 0.9150526523590088, + "epoch": 0.7209720324969444, + "kl_loss": 0.15922676026821136, + "loss_ib": 0.004831499420106411, + "step": 2507 + }, + { + "ce_ib": 7.722126483917236, + "ce_orig": 1.677129864692688, + "epoch": 0.7209720324969444, + "kl_loss": 0.22065219283103943, + "loss_ib": 0.009928648360073566, + "step": 2507 + }, + { + "ce_ib": 4.740816116333008, + "ce_orig": 0.7990610003471375, + "epoch": 0.7209720324969444, + "kl_loss": 0.26611655950546265, + "loss_ib": 0.007401981391012669, + "step": 2507 + }, + { + "ce_ib": 4.546820163726807, + "ce_orig": 1.0527441501617432, + "epoch": 0.721259616075922, + "kl_loss": 0.2476527988910675, + "loss_ib": 0.00702334800735116, + "step": 2508 + }, + { + "ce_ib": 5.925107955932617, + "ce_orig": 0.7070982456207275, + "epoch": 0.721259616075922, + "kl_loss": 0.17431113123893738, + "loss_ib": 0.00766821950674057, + "step": 2508 + }, + { + "ce_ib": 5.4321417808532715, + "ce_orig": 0.767119288444519, + "epoch": 0.721259616075922, + "kl_loss": 0.25727635622024536, + "loss_ib": 0.00800490565598011, + "step": 2508 + }, + { + "ce_ib": 4.150089740753174, + "ce_orig": 0.6357972025871277, + "epoch": 0.721259616075922, + "kl_loss": 0.1965765357017517, + "loss_ib": 0.0061158547177910805, + "step": 2508 + }, + { + "ce_ib": 3.5340373516082764, + "ce_orig": 0.6567212343215942, + "epoch": 0.7215471996548997, + "kl_loss": 0.16417407989501953, + "loss_ib": 0.005175778176635504, + "step": 2509 + }, + { + "ce_ib": 4.8474602699279785, + "ce_orig": 0.8372482657432556, + "epoch": 0.7215471996548997, + "kl_loss": 0.19940069317817688, + "loss_ib": 0.006841467693448067, + "step": 2509 + }, + { + "ce_ib": 3.119236469268799, + "ce_orig": 0.7961684465408325, + "epoch": 0.7215471996548997, + "kl_loss": 0.1494385302066803, + "loss_ib": 0.00461362162604928, + "step": 2509 + }, + { + "ce_ib": 6.41019344329834, + "ce_orig": 0.9595053791999817, + "epoch": 0.7215471996548997, + "kl_loss": 0.24994304776191711, + "loss_ib": 0.008909623138606548, + "step": 2509 + }, + { + "epoch": 0.7218347832338774, + "grad_norm": 0.13032162189483643, + "learning_rate": 8.888333308648631e-06, + "loss": 0.843, + "step": 2510 + }, + { + "ce_ib": 2.772088050842285, + "ce_orig": 0.6643859148025513, + "epoch": 0.7218347832338774, + "kl_loss": 0.1320253610610962, + "loss_ib": 0.0040923417545855045, + "step": 2510 + }, + { + "ce_ib": 5.378190517425537, + "ce_orig": 1.1245940923690796, + "epoch": 0.7218347832338774, + "kl_loss": 0.10469274967908859, + "loss_ib": 0.006425118073821068, + "step": 2510 + }, + { + "ce_ib": 3.6598618030548096, + "ce_orig": 0.7919801473617554, + "epoch": 0.7218347832338774, + "kl_loss": 0.17093023657798767, + "loss_ib": 0.005369164049625397, + "step": 2510 + }, + { + "ce_ib": 3.3423163890838623, + "ce_orig": 0.3750736713409424, + "epoch": 0.7218347832338774, + "kl_loss": 0.2497774213552475, + "loss_ib": 0.005840090569108725, + "step": 2510 + }, + { + "ce_ib": 4.41222620010376, + "ce_orig": 1.1678798198699951, + "epoch": 0.722122366812855, + "kl_loss": 0.25563251972198486, + "loss_ib": 0.006968551781028509, + "step": 2511 + }, + { + "ce_ib": 8.973475456237793, + "ce_orig": 1.4032347202301025, + "epoch": 0.722122366812855, + "kl_loss": 0.20035865902900696, + "loss_ib": 0.010977061465382576, + "step": 2511 + }, + { + "ce_ib": 3.2678964138031006, + "ce_orig": 0.6407129168510437, + "epoch": 0.722122366812855, + "kl_loss": 0.15741372108459473, + "loss_ib": 0.004842033609747887, + "step": 2511 + }, + { + "ce_ib": 4.30183744430542, + "ce_orig": 1.173709750175476, + "epoch": 0.722122366812855, + "kl_loss": 0.16396476328372955, + "loss_ib": 0.005941485054790974, + "step": 2511 + }, + { + "ce_ib": 3.428366184234619, + "ce_orig": 0.6137793064117432, + "epoch": 0.7224099503918326, + "kl_loss": 0.14888988435268402, + "loss_ib": 0.00491726491600275, + "step": 2512 + }, + { + "ce_ib": 5.374594688415527, + "ce_orig": 0.8426963090896606, + "epoch": 0.7224099503918326, + "kl_loss": 0.2078673392534256, + "loss_ib": 0.007453267928212881, + "step": 2512 + }, + { + "ce_ib": 4.200553894042969, + "ce_orig": 0.5894094109535217, + "epoch": 0.7224099503918326, + "kl_loss": 0.22123518586158752, + "loss_ib": 0.006412905175238848, + "step": 2512 + }, + { + "ce_ib": 4.245772838592529, + "ce_orig": 0.5918552279472351, + "epoch": 0.7224099503918326, + "kl_loss": 0.24719572067260742, + "loss_ib": 0.006717729847878218, + "step": 2512 + }, + { + "ce_ib": 6.424860000610352, + "ce_orig": 1.0758002996444702, + "epoch": 0.7226975339708103, + "kl_loss": 0.15630485117435455, + "loss_ib": 0.007987908087670803, + "step": 2513 + }, + { + "ce_ib": 2.9678447246551514, + "ce_orig": 0.6309089064598083, + "epoch": 0.7226975339708103, + "kl_loss": 0.18075816333293915, + "loss_ib": 0.004775426350533962, + "step": 2513 + }, + { + "ce_ib": 8.72826099395752, + "ce_orig": 1.753806233406067, + "epoch": 0.7226975339708103, + "kl_loss": 0.14980554580688477, + "loss_ib": 0.010226315818727016, + "step": 2513 + }, + { + "ce_ib": 3.341848373413086, + "ce_orig": 0.8971935510635376, + "epoch": 0.7226975339708103, + "kl_loss": 0.18098539113998413, + "loss_ib": 0.005151702091097832, + "step": 2513 + }, + { + "ce_ib": 4.017345428466797, + "ce_orig": 0.6597781181335449, + "epoch": 0.7229851175497879, + "kl_loss": 0.13701076805591583, + "loss_ib": 0.0053874533623456955, + "step": 2514 + }, + { + "ce_ib": 5.691319942474365, + "ce_orig": 1.2358458042144775, + "epoch": 0.7229851175497879, + "kl_loss": 0.23474542796611786, + "loss_ib": 0.008038774132728577, + "step": 2514 + }, + { + "ce_ib": 6.987742900848389, + "ce_orig": 1.5237940549850464, + "epoch": 0.7229851175497879, + "kl_loss": 0.1586611121892929, + "loss_ib": 0.008574353531002998, + "step": 2514 + }, + { + "ce_ib": 5.067503452301025, + "ce_orig": 0.7037188410758972, + "epoch": 0.7229851175497879, + "kl_loss": 0.30927878618240356, + "loss_ib": 0.008160291239619255, + "step": 2514 + }, + { + "epoch": 0.7232727011287655, + "grad_norm": 0.13385380804538727, + "learning_rate": 8.883449559151042e-06, + "loss": 0.9027, + "step": 2515 + }, + { + "ce_ib": 8.299914360046387, + "ce_orig": 1.7366583347320557, + "epoch": 0.7232727011287655, + "kl_loss": 0.14361456036567688, + "loss_ib": 0.009736059233546257, + "step": 2515 + }, + { + "ce_ib": 2.25701642036438, + "ce_orig": 0.537664532661438, + "epoch": 0.7232727011287655, + "kl_loss": 0.19600212574005127, + "loss_ib": 0.0042170374654233456, + "step": 2515 + }, + { + "ce_ib": 4.1400837898254395, + "ce_orig": 1.0750600099563599, + "epoch": 0.7232727011287655, + "kl_loss": 0.12178318202495575, + "loss_ib": 0.005357915535569191, + "step": 2515 + }, + { + "ce_ib": 2.5217180252075195, + "ce_orig": 0.578260600566864, + "epoch": 0.7232727011287655, + "kl_loss": 0.13974130153656006, + "loss_ib": 0.003919131122529507, + "step": 2515 + }, + { + "ce_ib": 2.8474795818328857, + "ce_orig": 0.6656384468078613, + "epoch": 0.7235602847077431, + "kl_loss": 0.18708130717277527, + "loss_ib": 0.0047182925045490265, + "step": 2516 + }, + { + "ce_ib": 2.9178571701049805, + "ce_orig": 0.728754460811615, + "epoch": 0.7235602847077431, + "kl_loss": 0.15307269990444183, + "loss_ib": 0.004448584280908108, + "step": 2516 + }, + { + "ce_ib": 1.1246371269226074, + "ce_orig": 0.2569318115711212, + "epoch": 0.7235602847077431, + "kl_loss": 0.5084906816482544, + "loss_ib": 0.006209543906152248, + "step": 2516 + }, + { + "ce_ib": 4.226795673370361, + "ce_orig": 0.6125168800354004, + "epoch": 0.7235602847077431, + "kl_loss": 0.21311627328395844, + "loss_ib": 0.006357958074659109, + "step": 2516 + }, + { + "ce_ib": 4.30646276473999, + "ce_orig": 1.1980448961257935, + "epoch": 0.7238478682867209, + "kl_loss": 0.1549130529165268, + "loss_ib": 0.0058555928990244865, + "step": 2517 + }, + { + "ce_ib": 5.013482570648193, + "ce_orig": 0.7624354958534241, + "epoch": 0.7238478682867209, + "kl_loss": 0.5641247034072876, + "loss_ib": 0.010654729790985584, + "step": 2517 + }, + { + "ce_ib": 2.6010067462921143, + "ce_orig": 0.7118472456932068, + "epoch": 0.7238478682867209, + "kl_loss": 0.19082625210285187, + "loss_ib": 0.00450926972553134, + "step": 2517 + }, + { + "ce_ib": 3.292039155960083, + "ce_orig": 0.6678742170333862, + "epoch": 0.7238478682867209, + "kl_loss": 0.1751147210597992, + "loss_ib": 0.005043186247348785, + "step": 2517 + }, + { + "ce_ib": 2.764521360397339, + "ce_orig": 0.553181529045105, + "epoch": 0.7241354518656985, + "kl_loss": 0.21922045946121216, + "loss_ib": 0.004956725984811783, + "step": 2518 + }, + { + "ce_ib": 3.009870767593384, + "ce_orig": 0.5886662006378174, + "epoch": 0.7241354518656985, + "kl_loss": 0.48456642031669617, + "loss_ib": 0.007855534553527832, + "step": 2518 + }, + { + "ce_ib": 1.969268560409546, + "ce_orig": 0.5540490746498108, + "epoch": 0.7241354518656985, + "kl_loss": 0.12451550364494324, + "loss_ib": 0.003214423544704914, + "step": 2518 + }, + { + "ce_ib": 4.151377201080322, + "ce_orig": 0.6580724716186523, + "epoch": 0.7241354518656985, + "kl_loss": 0.25471431016921997, + "loss_ib": 0.0066985199227929115, + "step": 2518 + }, + { + "ce_ib": 5.97609281539917, + "ce_orig": 1.0815062522888184, + "epoch": 0.7244230354446761, + "kl_loss": 0.1554882526397705, + "loss_ib": 0.00753097515553236, + "step": 2519 + }, + { + "ce_ib": 8.810497283935547, + "ce_orig": 1.5777137279510498, + "epoch": 0.7244230354446761, + "kl_loss": 0.3573387861251831, + "loss_ib": 0.012383884750306606, + "step": 2519 + }, + { + "ce_ib": 3.547301769256592, + "ce_orig": 0.5644884705543518, + "epoch": 0.7244230354446761, + "kl_loss": 0.1625167429447174, + "loss_ib": 0.005172469653189182, + "step": 2519 + }, + { + "ce_ib": 3.6891496181488037, + "ce_orig": 0.7678593397140503, + "epoch": 0.7244230354446761, + "kl_loss": 0.24497109651565552, + "loss_ib": 0.006138860248029232, + "step": 2519 + }, + { + "epoch": 0.7247106190236537, + "grad_norm": 0.15710414946079254, + "learning_rate": 8.8785564535221e-06, + "loss": 0.8678, + "step": 2520 + }, + { + "ce_ib": 2.361621379852295, + "ce_orig": 0.6400150656700134, + "epoch": 0.7247106190236537, + "kl_loss": 0.14078263938426971, + "loss_ib": 0.0037694477941840887, + "step": 2520 + }, + { + "ce_ib": 4.082233428955078, + "ce_orig": 0.9154109954833984, + "epoch": 0.7247106190236537, + "kl_loss": 0.13224422931671143, + "loss_ib": 0.0054046763107180595, + "step": 2520 + }, + { + "ce_ib": 4.477207660675049, + "ce_orig": 0.6887823939323425, + "epoch": 0.7247106190236537, + "kl_loss": 0.18788228929042816, + "loss_ib": 0.006356030702590942, + "step": 2520 + }, + { + "ce_ib": 2.5956380367279053, + "ce_orig": 0.6625695824623108, + "epoch": 0.7247106190236537, + "kl_loss": 0.14304092526435852, + "loss_ib": 0.004026047419756651, + "step": 2520 + }, + { + "ce_ib": 2.2236440181732178, + "ce_orig": 0.5976349711418152, + "epoch": 0.7249982026026314, + "kl_loss": 0.1438315510749817, + "loss_ib": 0.0036619596648961306, + "step": 2521 + }, + { + "ce_ib": 4.618163585662842, + "ce_orig": 0.9855654835700989, + "epoch": 0.7249982026026314, + "kl_loss": 0.22157877683639526, + "loss_ib": 0.0068339514546096325, + "step": 2521 + }, + { + "ce_ib": 1.657913088798523, + "ce_orig": 0.46903225779533386, + "epoch": 0.7249982026026314, + "kl_loss": 0.12339049577713013, + "loss_ib": 0.002891818294301629, + "step": 2521 + }, + { + "ce_ib": 3.5853898525238037, + "ce_orig": 0.7759209871292114, + "epoch": 0.7249982026026314, + "kl_loss": 0.19314628839492798, + "loss_ib": 0.005516852717846632, + "step": 2521 + }, + { + "ce_ib": 3.789393186569214, + "ce_orig": 1.0023432970046997, + "epoch": 0.725285786181609, + "kl_loss": 0.1431301087141037, + "loss_ib": 0.005220694467425346, + "step": 2522 + }, + { + "ce_ib": 2.6141014099121094, + "ce_orig": 0.5093699097633362, + "epoch": 0.725285786181609, + "kl_loss": 0.1443803459405899, + "loss_ib": 0.004057905171066523, + "step": 2522 + }, + { + "ce_ib": 3.5342679023742676, + "ce_orig": 0.8021438717842102, + "epoch": 0.725285786181609, + "kl_loss": 0.1735205501317978, + "loss_ib": 0.005269472952932119, + "step": 2522 + }, + { + "ce_ib": 3.0312376022338867, + "ce_orig": 0.7142080664634705, + "epoch": 0.725285786181609, + "kl_loss": 0.21719901263713837, + "loss_ib": 0.00520322797819972, + "step": 2522 + }, + { + "ce_ib": 5.138169765472412, + "ce_orig": 1.2895311117172241, + "epoch": 0.7255733697605866, + "kl_loss": 0.19524922966957092, + "loss_ib": 0.007090662606060505, + "step": 2523 + }, + { + "ce_ib": 3.6298792362213135, + "ce_orig": 0.7515292167663574, + "epoch": 0.7255733697605866, + "kl_loss": 0.2045418918132782, + "loss_ib": 0.005675297696143389, + "step": 2523 + }, + { + "ce_ib": 4.7301764488220215, + "ce_orig": 0.9368622303009033, + "epoch": 0.7255733697605866, + "kl_loss": 0.21051715314388275, + "loss_ib": 0.006835347972810268, + "step": 2523 + }, + { + "ce_ib": 2.5731325149536133, + "ce_orig": 0.7157918810844421, + "epoch": 0.7255733697605866, + "kl_loss": 0.1792164444923401, + "loss_ib": 0.00436529703438282, + "step": 2523 + }, + { + "ce_ib": 5.561503887176514, + "ce_orig": 1.002907395362854, + "epoch": 0.7258609533395644, + "kl_loss": 0.20699205994606018, + "loss_ib": 0.0076314243488013744, + "step": 2524 + }, + { + "ce_ib": 1.9409124851226807, + "ce_orig": 0.567584753036499, + "epoch": 0.7258609533395644, + "kl_loss": 0.11601073294878006, + "loss_ib": 0.003101019887253642, + "step": 2524 + }, + { + "ce_ib": 5.23681116104126, + "ce_orig": 1.2110340595245361, + "epoch": 0.7258609533395644, + "kl_loss": 0.23309381306171417, + "loss_ib": 0.0075677488930523396, + "step": 2524 + }, + { + "ce_ib": 4.491612434387207, + "ce_orig": 0.7328187227249146, + "epoch": 0.7258609533395644, + "kl_loss": 0.30518341064453125, + "loss_ib": 0.007543446961790323, + "step": 2524 + }, + { + "epoch": 0.726148536918542, + "grad_norm": 0.16274237632751465, + "learning_rate": 8.873654003550427e-06, + "loss": 0.7865, + "step": 2525 + }, + { + "ce_ib": 3.61338472366333, + "ce_orig": 0.4371030032634735, + "epoch": 0.726148536918542, + "kl_loss": 0.36879706382751465, + "loss_ib": 0.007301355246454477, + "step": 2525 + }, + { + "ce_ib": 6.2848615646362305, + "ce_orig": 1.4661558866500854, + "epoch": 0.726148536918542, + "kl_loss": 0.21298791468143463, + "loss_ib": 0.008414740674197674, + "step": 2525 + }, + { + "ce_ib": 5.078806400299072, + "ce_orig": 0.8804810643196106, + "epoch": 0.726148536918542, + "kl_loss": 0.21598049998283386, + "loss_ib": 0.007238610647618771, + "step": 2525 + }, + { + "ce_ib": 6.611787796020508, + "ce_orig": 1.1651970148086548, + "epoch": 0.726148536918542, + "kl_loss": 0.2119695395231247, + "loss_ib": 0.008731483481824398, + "step": 2525 + }, + { + "ce_ib": 4.82480525970459, + "ce_orig": 0.7575070858001709, + "epoch": 0.7264361204975196, + "kl_loss": 0.17824791371822357, + "loss_ib": 0.006607284303754568, + "step": 2526 + }, + { + "ce_ib": 8.964017868041992, + "ce_orig": 1.7281575202941895, + "epoch": 0.7264361204975196, + "kl_loss": 0.19071899354457855, + "loss_ib": 0.01087120734155178, + "step": 2526 + }, + { + "ce_ib": 4.976237773895264, + "ce_orig": 0.5733823776245117, + "epoch": 0.7264361204975196, + "kl_loss": 0.24618759751319885, + "loss_ib": 0.007438113447278738, + "step": 2526 + }, + { + "ce_ib": 4.490512371063232, + "ce_orig": 0.6584044098854065, + "epoch": 0.7264361204975196, + "kl_loss": 0.22959786653518677, + "loss_ib": 0.006786490324884653, + "step": 2526 + }, + { + "ce_ib": 5.968379020690918, + "ce_orig": 1.2476915121078491, + "epoch": 0.7267237040764972, + "kl_loss": 0.26528942584991455, + "loss_ib": 0.008621273562312126, + "step": 2527 + }, + { + "ce_ib": 6.964885711669922, + "ce_orig": 0.740926206111908, + "epoch": 0.7267237040764972, + "kl_loss": 0.18319736421108246, + "loss_ib": 0.00879685953259468, + "step": 2527 + }, + { + "ce_ib": 2.9035191535949707, + "ce_orig": 0.8801014423370361, + "epoch": 0.7267237040764972, + "kl_loss": 0.1203213483095169, + "loss_ib": 0.004106732551008463, + "step": 2527 + }, + { + "ce_ib": 2.587808609008789, + "ce_orig": 0.4864714443683624, + "epoch": 0.7267237040764972, + "kl_loss": 0.19787320494651794, + "loss_ib": 0.004566540475934744, + "step": 2527 + }, + { + "ce_ib": 4.049018383026123, + "ce_orig": 0.6973385214805603, + "epoch": 0.7270112876554748, + "kl_loss": 0.2172483503818512, + "loss_ib": 0.006221501622349024, + "step": 2528 + }, + { + "ce_ib": 2.6029727458953857, + "ce_orig": 0.4793729782104492, + "epoch": 0.7270112876554748, + "kl_loss": 0.20938065648078918, + "loss_ib": 0.004696778953075409, + "step": 2528 + }, + { + "ce_ib": 4.643763065338135, + "ce_orig": 0.8491379618644714, + "epoch": 0.7270112876554748, + "kl_loss": 0.16997799277305603, + "loss_ib": 0.006343542598187923, + "step": 2528 + }, + { + "ce_ib": 4.75612735748291, + "ce_orig": 0.9615321755409241, + "epoch": 0.7270112876554748, + "kl_loss": 0.21524907648563385, + "loss_ib": 0.006908617913722992, + "step": 2528 + }, + { + "ce_ib": 3.5874319076538086, + "ce_orig": 0.5294368267059326, + "epoch": 0.7272988712344525, + "kl_loss": 0.22930535674095154, + "loss_ib": 0.005880485754460096, + "step": 2529 + }, + { + "ce_ib": 4.689302444458008, + "ce_orig": 0.8774774670600891, + "epoch": 0.7272988712344525, + "kl_loss": 0.15649212896823883, + "loss_ib": 0.006254223641008139, + "step": 2529 + }, + { + "ce_ib": 3.474750280380249, + "ce_orig": 0.7626444101333618, + "epoch": 0.7272988712344525, + "kl_loss": 0.16305047273635864, + "loss_ib": 0.005105254705995321, + "step": 2529 + }, + { + "ce_ib": 4.572054862976074, + "ce_orig": 0.7977743744850159, + "epoch": 0.7272988712344525, + "kl_loss": 0.25873106718063354, + "loss_ib": 0.007159365341067314, + "step": 2529 + }, + { + "epoch": 0.7275864548134302, + "grad_norm": 0.13307194411754608, + "learning_rate": 8.868742221047166e-06, + "loss": 0.8624, + "step": 2530 + }, + { + "ce_ib": 7.569628715515137, + "ce_orig": 1.3519738912582397, + "epoch": 0.7275864548134302, + "kl_loss": 0.1869397908449173, + "loss_ib": 0.009439026936888695, + "step": 2530 + }, + { + "ce_ib": 2.900282859802246, + "ce_orig": 0.45444416999816895, + "epoch": 0.7275864548134302, + "kl_loss": 0.20257222652435303, + "loss_ib": 0.004926005378365517, + "step": 2530 + }, + { + "ce_ib": 4.944513320922852, + "ce_orig": 0.6527019143104553, + "epoch": 0.7275864548134302, + "kl_loss": 0.2105763703584671, + "loss_ib": 0.007050276733934879, + "step": 2530 + }, + { + "ce_ib": 6.565045356750488, + "ce_orig": 1.4448590278625488, + "epoch": 0.7275864548134302, + "kl_loss": 0.16757765412330627, + "loss_ib": 0.008240822702646255, + "step": 2530 + }, + { + "ce_ib": 3.29996657371521, + "ce_orig": 0.8067175149917603, + "epoch": 0.7278740383924078, + "kl_loss": 0.1416984647512436, + "loss_ib": 0.004716950934380293, + "step": 2531 + }, + { + "ce_ib": 4.659356117248535, + "ce_orig": 0.8202959299087524, + "epoch": 0.7278740383924078, + "kl_loss": 0.2616799473762512, + "loss_ib": 0.007276155985891819, + "step": 2531 + }, + { + "ce_ib": 5.321714401245117, + "ce_orig": 0.8288587927818298, + "epoch": 0.7278740383924078, + "kl_loss": 0.2016560286283493, + "loss_ib": 0.007338274735957384, + "step": 2531 + }, + { + "ce_ib": 2.9782910346984863, + "ce_orig": 0.68231201171875, + "epoch": 0.7278740383924078, + "kl_loss": 0.14082930982112885, + "loss_ib": 0.004386584274470806, + "step": 2531 + }, + { + "ce_ib": 2.549398422241211, + "ce_orig": 0.5398287773132324, + "epoch": 0.7281616219713855, + "kl_loss": 0.16559934616088867, + "loss_ib": 0.004205391742289066, + "step": 2532 + }, + { + "ce_ib": 6.580379009246826, + "ce_orig": 1.1940321922302246, + "epoch": 0.7281616219713855, + "kl_loss": 0.16324348747730255, + "loss_ib": 0.008212813176214695, + "step": 2532 + }, + { + "ce_ib": 4.75060510635376, + "ce_orig": 0.731674075126648, + "epoch": 0.7281616219713855, + "kl_loss": 0.2523269057273865, + "loss_ib": 0.007273874245584011, + "step": 2532 + }, + { + "ce_ib": 4.7401909828186035, + "ce_orig": 0.9838863015174866, + "epoch": 0.7281616219713855, + "kl_loss": 0.14462025463581085, + "loss_ib": 0.006186393555253744, + "step": 2532 + }, + { + "ce_ib": 3.902693033218384, + "ce_orig": 0.7335491180419922, + "epoch": 0.7284492055503631, + "kl_loss": 0.1239098310470581, + "loss_ib": 0.005141790956258774, + "step": 2533 + }, + { + "ce_ib": 3.317328453063965, + "ce_orig": 0.5207099914550781, + "epoch": 0.7284492055503631, + "kl_loss": 0.28617584705352783, + "loss_ib": 0.006179086863994598, + "step": 2533 + }, + { + "ce_ib": 3.0068206787109375, + "ce_orig": 0.6773881912231445, + "epoch": 0.7284492055503631, + "kl_loss": 0.15056882798671722, + "loss_ib": 0.004512508865445852, + "step": 2533 + }, + { + "ce_ib": 3.3238332271575928, + "ce_orig": 0.6099371314048767, + "epoch": 0.7284492055503631, + "kl_loss": 0.22859710454940796, + "loss_ib": 0.0056098042987287045, + "step": 2533 + }, + { + "ce_ib": 4.972322463989258, + "ce_orig": 0.9544420838356018, + "epoch": 0.7287367891293407, + "kl_loss": 0.21976974606513977, + "loss_ib": 0.0071700201369822025, + "step": 2534 + }, + { + "ce_ib": 2.9089255332946777, + "ce_orig": 0.47158682346343994, + "epoch": 0.7287367891293407, + "kl_loss": 0.1894119679927826, + "loss_ib": 0.004803045187145472, + "step": 2534 + }, + { + "ce_ib": 3.936174154281616, + "ce_orig": 0.7770448923110962, + "epoch": 0.7287367891293407, + "kl_loss": 0.25812530517578125, + "loss_ib": 0.006517427042126656, + "step": 2534 + }, + { + "ce_ib": 4.223024845123291, + "ce_orig": 0.7272053956985474, + "epoch": 0.7287367891293407, + "kl_loss": 0.1604825258255005, + "loss_ib": 0.005827850196510553, + "step": 2534 + }, + { + "epoch": 0.7290243727083183, + "grad_norm": 0.12158993631601334, + "learning_rate": 8.863821117845941e-06, + "loss": 0.8604, + "step": 2535 + }, + { + "ce_ib": 4.316573143005371, + "ce_orig": 0.6328327059745789, + "epoch": 0.7290243727083183, + "kl_loss": 0.21117781102657318, + "loss_ib": 0.0064283511601388454, + "step": 2535 + }, + { + "ce_ib": 5.242321014404297, + "ce_orig": 0.6888391971588135, + "epoch": 0.7290243727083183, + "kl_loss": 0.26725026965141296, + "loss_ib": 0.007914823479950428, + "step": 2535 + }, + { + "ce_ib": 3.653698205947876, + "ce_orig": 0.8008305430412292, + "epoch": 0.7290243727083183, + "kl_loss": 0.21913787722587585, + "loss_ib": 0.005845076870173216, + "step": 2535 + }, + { + "ce_ib": 7.566171646118164, + "ce_orig": 1.3247278928756714, + "epoch": 0.7290243727083183, + "kl_loss": 0.2749588191509247, + "loss_ib": 0.010315760038793087, + "step": 2535 + }, + { + "ce_ib": 5.654600620269775, + "ce_orig": 0.9481891393661499, + "epoch": 0.729311956287296, + "kl_loss": 0.2006463259458542, + "loss_ib": 0.007661064155399799, + "step": 2536 + }, + { + "ce_ib": 5.602104187011719, + "ce_orig": 1.1102676391601562, + "epoch": 0.729311956287296, + "kl_loss": 0.24522778391838074, + "loss_ib": 0.008054381236433983, + "step": 2536 + }, + { + "ce_ib": 4.494493007659912, + "ce_orig": 0.938625693321228, + "epoch": 0.729311956287296, + "kl_loss": 0.2041284143924713, + "loss_ib": 0.006535776890814304, + "step": 2536 + }, + { + "ce_ib": 3.61446475982666, + "ce_orig": 0.6642093062400818, + "epoch": 0.729311956287296, + "kl_loss": 0.1920737624168396, + "loss_ib": 0.0055352021008729935, + "step": 2536 + }, + { + "ce_ib": 5.035924434661865, + "ce_orig": 0.6532450318336487, + "epoch": 0.7295995398662737, + "kl_loss": 0.220256507396698, + "loss_ib": 0.007238489110022783, + "step": 2537 + }, + { + "ce_ib": 4.157240867614746, + "ce_orig": 0.9606974124908447, + "epoch": 0.7295995398662737, + "kl_loss": 0.2437390685081482, + "loss_ib": 0.006594631355255842, + "step": 2537 + }, + { + "ce_ib": 5.654318332672119, + "ce_orig": 1.41196870803833, + "epoch": 0.7295995398662737, + "kl_loss": 0.18301436305046082, + "loss_ib": 0.007484462112188339, + "step": 2537 + }, + { + "ce_ib": 3.323256731033325, + "ce_orig": 0.5386354923248291, + "epoch": 0.7295995398662737, + "kl_loss": 0.19296641647815704, + "loss_ib": 0.005252921022474766, + "step": 2537 + }, + { + "ce_ib": 5.815941333770752, + "ce_orig": 0.8353392481803894, + "epoch": 0.7298871234452513, + "kl_loss": 0.34108874201774597, + "loss_ib": 0.009226828813552856, + "step": 2538 + }, + { + "ce_ib": 2.1584155559539795, + "ce_orig": 0.569779098033905, + "epoch": 0.7298871234452513, + "kl_loss": 0.14150260388851166, + "loss_ib": 0.0035734414122998714, + "step": 2538 + }, + { + "ce_ib": 4.030116081237793, + "ce_orig": 0.679789662361145, + "epoch": 0.7298871234452513, + "kl_loss": 0.19638001918792725, + "loss_ib": 0.0059939161874353886, + "step": 2538 + }, + { + "ce_ib": 4.3823561668396, + "ce_orig": 0.9256640672683716, + "epoch": 0.7298871234452513, + "kl_loss": 0.17284919321537018, + "loss_ib": 0.006110847927629948, + "step": 2538 + }, + { + "ce_ib": 6.645534992218018, + "ce_orig": 1.5445504188537598, + "epoch": 0.7301747070242289, + "kl_loss": 0.18235301971435547, + "loss_ib": 0.00846906565129757, + "step": 2539 + }, + { + "ce_ib": 4.604394912719727, + "ce_orig": 0.8400562405586243, + "epoch": 0.7301747070242289, + "kl_loss": 0.2532072961330414, + "loss_ib": 0.007136467378586531, + "step": 2539 + }, + { + "ce_ib": 6.2900261878967285, + "ce_orig": 1.0496379137039185, + "epoch": 0.7301747070242289, + "kl_loss": 0.2555379867553711, + "loss_ib": 0.008845405653119087, + "step": 2539 + }, + { + "ce_ib": 3.1146092414855957, + "ce_orig": 0.6226138472557068, + "epoch": 0.7301747070242289, + "kl_loss": 0.14337599277496338, + "loss_ib": 0.004548369441181421, + "step": 2539 + }, + { + "epoch": 0.7304622906032066, + "grad_norm": 0.1273193210363388, + "learning_rate": 8.85889070580283e-06, + "loss": 0.8603, + "step": 2540 + }, + { + "ce_ib": 3.3540663719177246, + "ce_orig": 0.5802752375602722, + "epoch": 0.7304622906032066, + "kl_loss": 0.2491673231124878, + "loss_ib": 0.005845739506185055, + "step": 2540 + }, + { + "ce_ib": 2.8744521141052246, + "ce_orig": 0.5922904014587402, + "epoch": 0.7304622906032066, + "kl_loss": 0.2469978928565979, + "loss_ib": 0.005344431381672621, + "step": 2540 + }, + { + "ce_ib": 3.6111652851104736, + "ce_orig": 0.7384878993034363, + "epoch": 0.7304622906032066, + "kl_loss": 0.16603347659111023, + "loss_ib": 0.005271500442177057, + "step": 2540 + }, + { + "ce_ib": 3.283325433731079, + "ce_orig": 0.7728832960128784, + "epoch": 0.7304622906032066, + "kl_loss": 0.1811826378107071, + "loss_ib": 0.0050951517187058926, + "step": 2540 + }, + { + "ce_ib": 4.673333168029785, + "ce_orig": 0.6707044839859009, + "epoch": 0.7307498741821842, + "kl_loss": 0.20945781469345093, + "loss_ib": 0.006767911370843649, + "step": 2541 + }, + { + "ce_ib": 9.975776672363281, + "ce_orig": 2.087721586227417, + "epoch": 0.7307498741821842, + "kl_loss": 0.2019626498222351, + "loss_ib": 0.011995403096079826, + "step": 2541 + }, + { + "ce_ib": 2.5355749130249023, + "ce_orig": 0.314893513917923, + "epoch": 0.7307498741821842, + "kl_loss": 0.5428642630577087, + "loss_ib": 0.007964217104017735, + "step": 2541 + }, + { + "ce_ib": 7.82562255859375, + "ce_orig": 1.3675401210784912, + "epoch": 0.7307498741821842, + "kl_loss": 0.2041248083114624, + "loss_ib": 0.009866870008409023, + "step": 2541 + }, + { + "ce_ib": 8.856773376464844, + "ce_orig": 1.4997708797454834, + "epoch": 0.7310374577611618, + "kl_loss": 0.2434220314025879, + "loss_ib": 0.01129099354147911, + "step": 2542 + }, + { + "ce_ib": 3.4646103382110596, + "ce_orig": 0.7923183441162109, + "epoch": 0.7310374577611618, + "kl_loss": 0.1896541863679886, + "loss_ib": 0.005361152347177267, + "step": 2542 + }, + { + "ce_ib": 5.8178019523620605, + "ce_orig": 1.1129672527313232, + "epoch": 0.7310374577611618, + "kl_loss": 0.21624338626861572, + "loss_ib": 0.00798023585230112, + "step": 2542 + }, + { + "ce_ib": 5.676085948944092, + "ce_orig": 1.1327587366104126, + "epoch": 0.7310374577611618, + "kl_loss": 0.17191913723945618, + "loss_ib": 0.007395276799798012, + "step": 2542 + }, + { + "ce_ib": 4.287909984588623, + "ce_orig": 1.0197404623031616, + "epoch": 0.7313250413401394, + "kl_loss": 0.1528233289718628, + "loss_ib": 0.005816143471747637, + "step": 2543 + }, + { + "ce_ib": 6.55578088760376, + "ce_orig": 1.0446761846542358, + "epoch": 0.7313250413401394, + "kl_loss": 0.22786542773246765, + "loss_ib": 0.008834434673190117, + "step": 2543 + }, + { + "ce_ib": 3.1340205669403076, + "ce_orig": 0.5964686870574951, + "epoch": 0.7313250413401394, + "kl_loss": 0.1861734390258789, + "loss_ib": 0.004995754919946194, + "step": 2543 + }, + { + "ce_ib": 4.182183742523193, + "ce_orig": 1.021657943725586, + "epoch": 0.7313250413401394, + "kl_loss": 0.16932624578475952, + "loss_ib": 0.005875445902347565, + "step": 2543 + }, + { + "ce_ib": 2.6951730251312256, + "ce_orig": 0.7059181928634644, + "epoch": 0.7316126249191172, + "kl_loss": 0.13691604137420654, + "loss_ib": 0.004064333159476519, + "step": 2544 + }, + { + "ce_ib": 1.5954822301864624, + "ce_orig": 0.4164380729198456, + "epoch": 0.7316126249191172, + "kl_loss": 0.21253317594528198, + "loss_ib": 0.0037208138965070248, + "step": 2544 + }, + { + "ce_ib": 2.180385112762451, + "ce_orig": 0.3864493668079376, + "epoch": 0.7316126249191172, + "kl_loss": 0.12041348218917847, + "loss_ib": 0.0033845198340713978, + "step": 2544 + }, + { + "ce_ib": 3.884178638458252, + "ce_orig": 0.676811933517456, + "epoch": 0.7316126249191172, + "kl_loss": 0.14880794286727905, + "loss_ib": 0.005372258368879557, + "step": 2544 + }, + { + "epoch": 0.7319002084980948, + "grad_norm": 0.15186643600463867, + "learning_rate": 8.853950996796341e-06, + "loss": 0.8397, + "step": 2545 + }, + { + "ce_ib": 3.329721212387085, + "ce_orig": 0.4718300998210907, + "epoch": 0.7319002084980948, + "kl_loss": 0.15227025747299194, + "loss_ib": 0.004852423910051584, + "step": 2545 + }, + { + "ce_ib": 3.374490737915039, + "ce_orig": 0.5379548668861389, + "epoch": 0.7319002084980948, + "kl_loss": 0.21276816725730896, + "loss_ib": 0.0055021727457642555, + "step": 2545 + }, + { + "ce_ib": 4.824540615081787, + "ce_orig": 1.1002769470214844, + "epoch": 0.7319002084980948, + "kl_loss": 0.24835695326328278, + "loss_ib": 0.007308109663426876, + "step": 2545 + }, + { + "ce_ib": 8.406145095825195, + "ce_orig": 1.682849645614624, + "epoch": 0.7319002084980948, + "kl_loss": 0.24956876039505005, + "loss_ib": 0.010901832021772861, + "step": 2545 + }, + { + "ce_ib": 4.42384147644043, + "ce_orig": 1.225504755973816, + "epoch": 0.7321877920770724, + "kl_loss": 0.17111210525035858, + "loss_ib": 0.006134962197393179, + "step": 2546 + }, + { + "ce_ib": 6.234374523162842, + "ce_orig": 1.0164705514907837, + "epoch": 0.7321877920770724, + "kl_loss": 0.1942627876996994, + "loss_ib": 0.008177002891898155, + "step": 2546 + }, + { + "ce_ib": 4.526999473571777, + "ce_orig": 0.8007500767707825, + "epoch": 0.7321877920770724, + "kl_loss": 0.1164974719285965, + "loss_ib": 0.0056919739581644535, + "step": 2546 + }, + { + "ce_ib": 6.8128461837768555, + "ce_orig": 1.4955042600631714, + "epoch": 0.7321877920770724, + "kl_loss": 0.2636832296848297, + "loss_ib": 0.009449678473174572, + "step": 2546 + }, + { + "ce_ib": 2.5461623668670654, + "ce_orig": 0.6503687500953674, + "epoch": 0.73247537565605, + "kl_loss": 0.15859639644622803, + "loss_ib": 0.0041321259923279285, + "step": 2547 + }, + { + "ce_ib": 4.421162128448486, + "ce_orig": 0.7524070739746094, + "epoch": 0.73247537565605, + "kl_loss": 0.17589130997657776, + "loss_ib": 0.006180075462907553, + "step": 2547 + }, + { + "ce_ib": 2.897881031036377, + "ce_orig": 0.5212623476982117, + "epoch": 0.73247537565605, + "kl_loss": 0.1743762493133545, + "loss_ib": 0.004641643259674311, + "step": 2547 + }, + { + "ce_ib": 4.898672103881836, + "ce_orig": 1.0489792823791504, + "epoch": 0.73247537565605, + "kl_loss": 0.21361450850963593, + "loss_ib": 0.00703481724485755, + "step": 2547 + }, + { + "ce_ib": 6.72739839553833, + "ce_orig": 1.5949162244796753, + "epoch": 0.7327629592350277, + "kl_loss": 0.22439390420913696, + "loss_ib": 0.008971337229013443, + "step": 2548 + }, + { + "ce_ib": 3.26434063911438, + "ce_orig": 0.4731270968914032, + "epoch": 0.7327629592350277, + "kl_loss": 0.17709501087665558, + "loss_ib": 0.005035290494561195, + "step": 2548 + }, + { + "ce_ib": 4.29093599319458, + "ce_orig": 0.7910265326499939, + "epoch": 0.7327629592350277, + "kl_loss": 0.25194281339645386, + "loss_ib": 0.006810363847762346, + "step": 2548 + }, + { + "ce_ib": 3.2080788612365723, + "ce_orig": 0.7851672768592834, + "epoch": 0.7327629592350277, + "kl_loss": 0.19290344417095184, + "loss_ib": 0.0051371133886277676, + "step": 2548 + }, + { + "ce_ib": 6.732349395751953, + "ce_orig": 0.7863187789916992, + "epoch": 0.7330505428140053, + "kl_loss": 0.3710494041442871, + "loss_ib": 0.010442843660712242, + "step": 2549 + }, + { + "ce_ib": 4.101581573486328, + "ce_orig": 1.079390048980713, + "epoch": 0.7330505428140053, + "kl_loss": 0.6515724062919617, + "loss_ib": 0.01061730645596981, + "step": 2549 + }, + { + "ce_ib": 5.336399555206299, + "ce_orig": 1.1182750463485718, + "epoch": 0.7330505428140053, + "kl_loss": 0.2290104776620865, + "loss_ib": 0.007626504171639681, + "step": 2549 + }, + { + "ce_ib": 4.31520414352417, + "ce_orig": 0.9655687808990479, + "epoch": 0.7330505428140053, + "kl_loss": 0.17504623532295227, + "loss_ib": 0.006065666675567627, + "step": 2549 + }, + { + "epoch": 0.733338126392983, + "grad_norm": 0.13281425833702087, + "learning_rate": 8.84900200272738e-06, + "loss": 0.8862, + "step": 2550 + }, + { + "ce_ib": 2.9421887397766113, + "ce_orig": 0.6561675667762756, + "epoch": 0.733338126392983, + "kl_loss": 0.18979209661483765, + "loss_ib": 0.004840109497308731, + "step": 2550 + }, + { + "ce_ib": 1.5935217142105103, + "ce_orig": 0.42675676941871643, + "epoch": 0.733338126392983, + "kl_loss": 0.5875999927520752, + "loss_ib": 0.00746952136978507, + "step": 2550 + }, + { + "ce_ib": 7.481528282165527, + "ce_orig": 0.9564614295959473, + "epoch": 0.733338126392983, + "kl_loss": 0.3910300135612488, + "loss_ib": 0.011391827836632729, + "step": 2550 + }, + { + "ce_ib": 4.764713764190674, + "ce_orig": 0.9402880072593689, + "epoch": 0.733338126392983, + "kl_loss": 0.19707155227661133, + "loss_ib": 0.006735429633408785, + "step": 2550 + }, + { + "ce_ib": 6.444184303283691, + "ce_orig": 1.5136650800704956, + "epoch": 0.7336257099719606, + "kl_loss": 0.17555882036685944, + "loss_ib": 0.008199771866202354, + "step": 2551 + }, + { + "ce_ib": 4.974852085113525, + "ce_orig": 0.6899729371070862, + "epoch": 0.7336257099719606, + "kl_loss": 0.15493717789649963, + "loss_ib": 0.006524224299937487, + "step": 2551 + }, + { + "ce_ib": 3.880821943283081, + "ce_orig": 0.6791972517967224, + "epoch": 0.7336257099719606, + "kl_loss": 0.13545028865337372, + "loss_ib": 0.005235325079411268, + "step": 2551 + }, + { + "ce_ib": 3.2677886486053467, + "ce_orig": 0.6525985598564148, + "epoch": 0.7336257099719606, + "kl_loss": 0.23155803978443146, + "loss_ib": 0.005583369173109531, + "step": 2551 + }, + { + "ce_ib": 8.704572677612305, + "ce_orig": 1.3541654348373413, + "epoch": 0.7339132935509383, + "kl_loss": 0.21871531009674072, + "loss_ib": 0.010891725309193134, + "step": 2552 + }, + { + "ce_ib": 5.087385177612305, + "ce_orig": 0.8132082223892212, + "epoch": 0.7339132935509383, + "kl_loss": 0.24268057942390442, + "loss_ib": 0.007514190394431353, + "step": 2552 + }, + { + "ce_ib": 4.962815761566162, + "ce_orig": 1.0737613439559937, + "epoch": 0.7339132935509383, + "kl_loss": 0.15456260740756989, + "loss_ib": 0.006508442107588053, + "step": 2552 + }, + { + "ce_ib": 5.14438009262085, + "ce_orig": 0.9246877431869507, + "epoch": 0.7339132935509383, + "kl_loss": 0.15848132967948914, + "loss_ib": 0.006729193963110447, + "step": 2552 + }, + { + "ce_ib": 4.025750637054443, + "ce_orig": 0.502545177936554, + "epoch": 0.7342008771299159, + "kl_loss": 0.24753263592720032, + "loss_ib": 0.0065010772086679935, + "step": 2553 + }, + { + "ce_ib": 5.4704976081848145, + "ce_orig": 1.0458730459213257, + "epoch": 0.7342008771299159, + "kl_loss": 0.4560311436653137, + "loss_ib": 0.010030807927250862, + "step": 2553 + }, + { + "ce_ib": 5.9472150802612305, + "ce_orig": 0.9928147792816162, + "epoch": 0.7342008771299159, + "kl_loss": 0.14948582649230957, + "loss_ib": 0.007442072965204716, + "step": 2553 + }, + { + "ce_ib": 4.897968292236328, + "ce_orig": 0.8965709209442139, + "epoch": 0.7342008771299159, + "kl_loss": 0.13417254388332367, + "loss_ib": 0.006239693611860275, + "step": 2553 + }, + { + "ce_ib": 4.33453369140625, + "ce_orig": 1.161650538444519, + "epoch": 0.7344884607088935, + "kl_loss": 0.2302822470664978, + "loss_ib": 0.006637356244027615, + "step": 2554 + }, + { + "ce_ib": 3.0067250728607178, + "ce_orig": 0.7108209729194641, + "epoch": 0.7344884607088935, + "kl_loss": 0.14400392770767212, + "loss_ib": 0.004446764010936022, + "step": 2554 + }, + { + "ce_ib": 6.579458713531494, + "ce_orig": 1.1703170537948608, + "epoch": 0.7344884607088935, + "kl_loss": 0.3049105405807495, + "loss_ib": 0.00962856411933899, + "step": 2554 + }, + { + "ce_ib": 2.6723830699920654, + "ce_orig": 0.241280660033226, + "epoch": 0.7344884607088935, + "kl_loss": 0.13743741810321808, + "loss_ib": 0.004046757239848375, + "step": 2554 + }, + { + "epoch": 0.7347760442878711, + "grad_norm": 0.13614144921302795, + "learning_rate": 8.844043735519223e-06, + "loss": 0.8584, + "step": 2555 + }, + { + "ce_ib": 9.4235258102417, + "ce_orig": 2.113218069076538, + "epoch": 0.7347760442878711, + "kl_loss": 0.2493559569120407, + "loss_ib": 0.011917085386812687, + "step": 2555 + }, + { + "ce_ib": 4.260012149810791, + "ce_orig": 0.9707128405570984, + "epoch": 0.7347760442878711, + "kl_loss": 0.223122239112854, + "loss_ib": 0.0064912340603768826, + "step": 2555 + }, + { + "ce_ib": 4.282027244567871, + "ce_orig": 1.0054616928100586, + "epoch": 0.7347760442878711, + "kl_loss": 0.1813632696866989, + "loss_ib": 0.006095659453421831, + "step": 2555 + }, + { + "ce_ib": 3.7958266735076904, + "ce_orig": 0.5763915777206421, + "epoch": 0.7347760442878711, + "kl_loss": 0.33603981137275696, + "loss_ib": 0.0071562244556844234, + "step": 2555 + }, + { + "ce_ib": 9.375370979309082, + "ce_orig": 2.093088150024414, + "epoch": 0.7350636278668488, + "kl_loss": 0.19322378933429718, + "loss_ib": 0.011307609267532825, + "step": 2556 + }, + { + "ce_ib": 1.146724820137024, + "ce_orig": 0.2515225410461426, + "epoch": 0.7350636278668488, + "kl_loss": 0.3781161606311798, + "loss_ib": 0.004927886184304953, + "step": 2556 + }, + { + "ce_ib": 2.7138261795043945, + "ce_orig": 0.7228332161903381, + "epoch": 0.7350636278668488, + "kl_loss": 0.11352983117103577, + "loss_ib": 0.0038491245359182358, + "step": 2556 + }, + { + "ce_ib": 4.3683342933654785, + "ce_orig": 0.6672880053520203, + "epoch": 0.7350636278668488, + "kl_loss": 0.18328242003917694, + "loss_ib": 0.006201158743351698, + "step": 2556 + }, + { + "ce_ib": 2.801339864730835, + "ce_orig": 0.5198619365692139, + "epoch": 0.7353512114458265, + "kl_loss": 0.16145440936088562, + "loss_ib": 0.004415884148329496, + "step": 2557 + }, + { + "ce_ib": 3.8393261432647705, + "ce_orig": 0.5997737050056458, + "epoch": 0.7353512114458265, + "kl_loss": 0.1668006181716919, + "loss_ib": 0.005507331807166338, + "step": 2557 + }, + { + "ce_ib": 3.470797300338745, + "ce_orig": 0.8124134540557861, + "epoch": 0.7353512114458265, + "kl_loss": 0.18060387670993805, + "loss_ib": 0.005276835989207029, + "step": 2557 + }, + { + "ce_ib": 5.477876663208008, + "ce_orig": 1.432835578918457, + "epoch": 0.7353512114458265, + "kl_loss": 0.19081763923168182, + "loss_ib": 0.00738605298101902, + "step": 2557 + }, + { + "ce_ib": 3.6735334396362305, + "ce_orig": 0.3612663745880127, + "epoch": 0.7356387950248041, + "kl_loss": 0.4769405722618103, + "loss_ib": 0.008442939259111881, + "step": 2558 + }, + { + "ce_ib": 4.266597270965576, + "ce_orig": 0.6615706086158752, + "epoch": 0.7356387950248041, + "kl_loss": 0.18313086032867432, + "loss_ib": 0.006097905337810516, + "step": 2558 + }, + { + "ce_ib": 1.50693678855896, + "ce_orig": 0.37383392453193665, + "epoch": 0.7356387950248041, + "kl_loss": 0.1151699498295784, + "loss_ib": 0.0026586363092064857, + "step": 2558 + }, + { + "ce_ib": 6.807993412017822, + "ce_orig": 0.8396763205528259, + "epoch": 0.7356387950248041, + "kl_loss": 0.22540436685085297, + "loss_ib": 0.009062036871910095, + "step": 2558 + }, + { + "ce_ib": 4.866621017456055, + "ce_orig": 0.9644668698310852, + "epoch": 0.7359263786037817, + "kl_loss": 0.3721684217453003, + "loss_ib": 0.008588305674493313, + "step": 2559 + }, + { + "ce_ib": 5.09184455871582, + "ce_orig": 1.1408530473709106, + "epoch": 0.7359263786037817, + "kl_loss": 0.1891973614692688, + "loss_ib": 0.006983818486332893, + "step": 2559 + }, + { + "ce_ib": 6.938527584075928, + "ce_orig": 1.1939775943756104, + "epoch": 0.7359263786037817, + "kl_loss": 0.19274920225143433, + "loss_ib": 0.008866019546985626, + "step": 2559 + }, + { + "ce_ib": 5.292520523071289, + "ce_orig": 0.9220177531242371, + "epoch": 0.7359263786037817, + "kl_loss": 0.11681418120861053, + "loss_ib": 0.006460662465542555, + "step": 2559 + }, + { + "epoch": 0.7362139621827594, + "grad_norm": 0.14909107983112335, + "learning_rate": 8.839076207117485e-06, + "loss": 0.856, + "step": 2560 + }, + { + "ce_ib": 6.5187249183654785, + "ce_orig": 1.2658649682998657, + "epoch": 0.7362139621827594, + "kl_loss": 0.2650366425514221, + "loss_ib": 0.00916909147053957, + "step": 2560 + }, + { + "ce_ib": 2.778655529022217, + "ce_orig": 0.7510286569595337, + "epoch": 0.7362139621827594, + "kl_loss": 0.32148587703704834, + "loss_ib": 0.005993513856083155, + "step": 2560 + }, + { + "ce_ib": 7.402314186096191, + "ce_orig": 1.5245254039764404, + "epoch": 0.7362139621827594, + "kl_loss": 0.22213423252105713, + "loss_ib": 0.00962365698069334, + "step": 2560 + }, + { + "ce_ib": 2.8126060962677, + "ce_orig": 0.6918426752090454, + "epoch": 0.7362139621827594, + "kl_loss": 0.16680511832237244, + "loss_ib": 0.004480657167732716, + "step": 2560 + }, + { + "ce_ib": 6.149195194244385, + "ce_orig": 1.0709120035171509, + "epoch": 0.736501545761737, + "kl_loss": 0.4859921336174011, + "loss_ib": 0.011009115725755692, + "step": 2561 + }, + { + "ce_ib": 5.254208564758301, + "ce_orig": 1.0476106405258179, + "epoch": 0.736501545761737, + "kl_loss": 0.21358159184455872, + "loss_ib": 0.007390024606138468, + "step": 2561 + }, + { + "ce_ib": 4.236629486083984, + "ce_orig": 0.8053039908409119, + "epoch": 0.736501545761737, + "kl_loss": 0.2696879506111145, + "loss_ib": 0.0069335089065134525, + "step": 2561 + }, + { + "ce_ib": 2.903453826904297, + "ce_orig": 0.29160189628601074, + "epoch": 0.736501545761737, + "kl_loss": 0.21754445135593414, + "loss_ib": 0.0050788987427949905, + "step": 2561 + }, + { + "ce_ib": 3.7869420051574707, + "ce_orig": 0.4276214838027954, + "epoch": 0.7367891293407146, + "kl_loss": 0.23804032802581787, + "loss_ib": 0.0061673447489738464, + "step": 2562 + }, + { + "ce_ib": 2.3639414310455322, + "ce_orig": 0.2768684923648834, + "epoch": 0.7367891293407146, + "kl_loss": 0.17740586400032043, + "loss_ib": 0.004137999843806028, + "step": 2562 + }, + { + "ce_ib": 4.40699577331543, + "ce_orig": 0.7448769807815552, + "epoch": 0.7367891293407146, + "kl_loss": 0.19818249344825745, + "loss_ib": 0.006388820242136717, + "step": 2562 + }, + { + "ce_ib": 2.5941619873046875, + "ce_orig": 0.5460858345031738, + "epoch": 0.7367891293407146, + "kl_loss": 0.18133410811424255, + "loss_ib": 0.004407502710819244, + "step": 2562 + }, + { + "ce_ib": 3.785524606704712, + "ce_orig": 0.8852932453155518, + "epoch": 0.7370767129196922, + "kl_loss": 0.18399569392204285, + "loss_ib": 0.005625481251627207, + "step": 2563 + }, + { + "ce_ib": 7.145519733428955, + "ce_orig": 1.7377878427505493, + "epoch": 0.7370767129196922, + "kl_loss": 0.233211949467659, + "loss_ib": 0.009477638639509678, + "step": 2563 + }, + { + "ce_ib": 3.0961406230926514, + "ce_orig": 0.5206360220909119, + "epoch": 0.7370767129196922, + "kl_loss": 0.2226572036743164, + "loss_ib": 0.0053227124735713005, + "step": 2563 + }, + { + "ce_ib": 2.999840497970581, + "ce_orig": 0.42844289541244507, + "epoch": 0.7370767129196922, + "kl_loss": 0.221274271607399, + "loss_ib": 0.00521258357912302, + "step": 2563 + }, + { + "ce_ib": 3.503265619277954, + "ce_orig": 0.7142927050590515, + "epoch": 0.73736429649867, + "kl_loss": 0.19937363266944885, + "loss_ib": 0.00549700204282999, + "step": 2564 + }, + { + "ce_ib": 5.201097011566162, + "ce_orig": 1.300091028213501, + "epoch": 0.73736429649867, + "kl_loss": 0.1701774299144745, + "loss_ib": 0.006902871187776327, + "step": 2564 + }, + { + "ce_ib": 5.592081546783447, + "ce_orig": 0.7587504982948303, + "epoch": 0.73736429649867, + "kl_loss": 0.19642731547355652, + "loss_ib": 0.0075563546270132065, + "step": 2564 + }, + { + "ce_ib": 5.897303104400635, + "ce_orig": 0.7982569932937622, + "epoch": 0.73736429649867, + "kl_loss": 0.2019897699356079, + "loss_ib": 0.007917201146483421, + "step": 2564 + }, + { + "epoch": 0.7376518800776476, + "grad_norm": 0.15659016370773315, + "learning_rate": 8.834099429490095e-06, + "loss": 0.8367, + "step": 2565 + }, + { + "ce_ib": 4.518640518188477, + "ce_orig": 0.870286226272583, + "epoch": 0.7376518800776476, + "kl_loss": 0.2513827979564667, + "loss_ib": 0.007032467983663082, + "step": 2565 + }, + { + "ce_ib": 6.619004726409912, + "ce_orig": 1.1051381826400757, + "epoch": 0.7376518800776476, + "kl_loss": 0.1617254614830017, + "loss_ib": 0.00823625922203064, + "step": 2565 + }, + { + "ce_ib": 3.8071038722991943, + "ce_orig": 0.5759081840515137, + "epoch": 0.7376518800776476, + "kl_loss": 0.19302260875701904, + "loss_ib": 0.005737329367548227, + "step": 2565 + }, + { + "ce_ib": 2.225736141204834, + "ce_orig": 0.5594247579574585, + "epoch": 0.7376518800776476, + "kl_loss": 0.1534692347049713, + "loss_ib": 0.003760428400710225, + "step": 2565 + }, + { + "ce_ib": 4.5102057456970215, + "ce_orig": 1.0116081237792969, + "epoch": 0.7379394636566252, + "kl_loss": 0.25719350576400757, + "loss_ib": 0.007082140073180199, + "step": 2566 + }, + { + "ce_ib": 2.72005033493042, + "ce_orig": 0.4931775629520416, + "epoch": 0.7379394636566252, + "kl_loss": 0.2041696310043335, + "loss_ib": 0.004761746618896723, + "step": 2566 + }, + { + "ce_ib": 2.3821487426757812, + "ce_orig": 0.6277795433998108, + "epoch": 0.7379394636566252, + "kl_loss": 0.13530269265174866, + "loss_ib": 0.0037351755890995264, + "step": 2566 + }, + { + "ce_ib": 4.573750019073486, + "ce_orig": 0.9936109781265259, + "epoch": 0.7379394636566252, + "kl_loss": 0.19924214482307434, + "loss_ib": 0.006566171534359455, + "step": 2566 + }, + { + "ce_ib": 4.923214912414551, + "ce_orig": 0.4318653643131256, + "epoch": 0.7382270472356028, + "kl_loss": 0.22069713473320007, + "loss_ib": 0.007130186539143324, + "step": 2567 + }, + { + "ce_ib": 3.012427568435669, + "ce_orig": 0.8313344717025757, + "epoch": 0.7382270472356028, + "kl_loss": 0.21456068754196167, + "loss_ib": 0.00515803461894393, + "step": 2567 + }, + { + "ce_ib": 6.574810981750488, + "ce_orig": 1.056522011756897, + "epoch": 0.7382270472356028, + "kl_loss": 0.3454853892326355, + "loss_ib": 0.010029665194451809, + "step": 2567 + }, + { + "ce_ib": 3.87062406539917, + "ce_orig": 1.056571125984192, + "epoch": 0.7382270472356028, + "kl_loss": 0.16708382964134216, + "loss_ib": 0.005541461985558271, + "step": 2567 + }, + { + "ce_ib": 4.926036834716797, + "ce_orig": 1.2292739152908325, + "epoch": 0.7385146308145805, + "kl_loss": 0.1680401861667633, + "loss_ib": 0.006606438662856817, + "step": 2568 + }, + { + "ce_ib": 4.365617275238037, + "ce_orig": 0.7057650089263916, + "epoch": 0.7385146308145805, + "kl_loss": 0.21214549243450165, + "loss_ib": 0.006487071979790926, + "step": 2568 + }, + { + "ce_ib": 4.991042613983154, + "ce_orig": 0.8296493291854858, + "epoch": 0.7385146308145805, + "kl_loss": 0.17544487118721008, + "loss_ib": 0.006745490711182356, + "step": 2568 + }, + { + "ce_ib": 4.780955791473389, + "ce_orig": 0.7544527053833008, + "epoch": 0.7385146308145805, + "kl_loss": 0.2547728717327118, + "loss_ib": 0.007328683976083994, + "step": 2568 + }, + { + "ce_ib": 4.888114929199219, + "ce_orig": 0.8506876826286316, + "epoch": 0.7388022143935581, + "kl_loss": 0.15544959902763367, + "loss_ib": 0.00644261110574007, + "step": 2569 + }, + { + "ce_ib": 3.7467734813690186, + "ce_orig": 0.6523400545120239, + "epoch": 0.7388022143935581, + "kl_loss": 0.15044018626213074, + "loss_ib": 0.005251175258308649, + "step": 2569 + }, + { + "ce_ib": 5.4009623527526855, + "ce_orig": 1.0598244667053223, + "epoch": 0.7388022143935581, + "kl_loss": 0.1931811273097992, + "loss_ib": 0.007332772947847843, + "step": 2569 + }, + { + "ce_ib": 2.6927499771118164, + "ce_orig": 0.6333009600639343, + "epoch": 0.7388022143935581, + "kl_loss": 0.15667955577373505, + "loss_ib": 0.00425954582169652, + "step": 2569 + }, + { + "epoch": 0.7390897979725358, + "grad_norm": 0.13841809332370758, + "learning_rate": 8.829113414627265e-06, + "loss": 0.8277, + "step": 2570 + }, + { + "ce_ib": 7.729239463806152, + "ce_orig": 1.426335096359253, + "epoch": 0.7390897979725358, + "kl_loss": 0.27063241600990295, + "loss_ib": 0.010435563512146473, + "step": 2570 + }, + { + "ce_ib": 4.164693832397461, + "ce_orig": 0.4802332818508148, + "epoch": 0.7390897979725358, + "kl_loss": 0.25104472041130066, + "loss_ib": 0.006675140932202339, + "step": 2570 + }, + { + "ce_ib": 5.851113319396973, + "ce_orig": 1.0208754539489746, + "epoch": 0.7390897979725358, + "kl_loss": 0.12596988677978516, + "loss_ib": 0.007110811769962311, + "step": 2570 + }, + { + "ce_ib": 4.445706844329834, + "ce_orig": 0.9484270811080933, + "epoch": 0.7390897979725358, + "kl_loss": 0.21725603938102722, + "loss_ib": 0.006618266925215721, + "step": 2570 + }, + { + "ce_ib": 2.7476983070373535, + "ce_orig": 0.8009291291236877, + "epoch": 0.7393773815515134, + "kl_loss": 0.15158116817474365, + "loss_ib": 0.004263509996235371, + "step": 2571 + }, + { + "ce_ib": 2.645385980606079, + "ce_orig": 0.4621393382549286, + "epoch": 0.7393773815515134, + "kl_loss": 0.16877344250679016, + "loss_ib": 0.004333120305091143, + "step": 2571 + }, + { + "ce_ib": 5.737054347991943, + "ce_orig": 1.2605913877487183, + "epoch": 0.7393773815515134, + "kl_loss": 0.24252218008041382, + "loss_ib": 0.00816227588802576, + "step": 2571 + }, + { + "ce_ib": 3.0991556644439697, + "ce_orig": 0.5015894770622253, + "epoch": 0.7393773815515134, + "kl_loss": 0.1452709138393402, + "loss_ib": 0.004551864694803953, + "step": 2571 + }, + { + "ce_ib": 4.402594089508057, + "ce_orig": 0.9723419547080994, + "epoch": 0.7396649651304911, + "kl_loss": 0.22799727320671082, + "loss_ib": 0.006682566832751036, + "step": 2572 + }, + { + "ce_ib": 5.81080436706543, + "ce_orig": 1.2645658254623413, + "epoch": 0.7396649651304911, + "kl_loss": 0.18754348158836365, + "loss_ib": 0.007686239201575518, + "step": 2572 + }, + { + "ce_ib": 7.216136455535889, + "ce_orig": 0.9380136132240295, + "epoch": 0.7396649651304911, + "kl_loss": 0.22901731729507446, + "loss_ib": 0.009506309404969215, + "step": 2572 + }, + { + "ce_ib": 3.9565727710723877, + "ce_orig": 0.41609060764312744, + "epoch": 0.7396649651304911, + "kl_loss": 0.17743808031082153, + "loss_ib": 0.0057309530675411224, + "step": 2572 + }, + { + "ce_ib": 8.478562355041504, + "ce_orig": 1.7710846662521362, + "epoch": 0.7399525487094687, + "kl_loss": 0.24844428896903992, + "loss_ib": 0.010963005013763905, + "step": 2573 + }, + { + "ce_ib": 3.404003381729126, + "ce_orig": 0.5567448735237122, + "epoch": 0.7399525487094687, + "kl_loss": 0.20377889275550842, + "loss_ib": 0.005441791843622923, + "step": 2573 + }, + { + "ce_ib": 5.085226058959961, + "ce_orig": 0.8226644992828369, + "epoch": 0.7399525487094687, + "kl_loss": 0.18037447333335876, + "loss_ib": 0.006888971198350191, + "step": 2573 + }, + { + "ce_ib": 4.634652137756348, + "ce_orig": 0.9007614850997925, + "epoch": 0.7399525487094687, + "kl_loss": 0.1822461485862732, + "loss_ib": 0.006457113660871983, + "step": 2573 + }, + { + "ce_ib": 5.102598667144775, + "ce_orig": 1.1384222507476807, + "epoch": 0.7402401322884463, + "kl_loss": 0.18727795779705048, + "loss_ib": 0.006975378375500441, + "step": 2574 + }, + { + "ce_ib": 6.683871746063232, + "ce_orig": 1.3876487016677856, + "epoch": 0.7402401322884463, + "kl_loss": 0.2265142947435379, + "loss_ib": 0.008949014358222485, + "step": 2574 + }, + { + "ce_ib": 2.7709245681762695, + "ce_orig": 0.6468157172203064, + "epoch": 0.7402401322884463, + "kl_loss": 0.23943136632442474, + "loss_ib": 0.00516523839905858, + "step": 2574 + }, + { + "ce_ib": 5.708085536956787, + "ce_orig": 1.076336145401001, + "epoch": 0.7402401322884463, + "kl_loss": 0.24211981892585754, + "loss_ib": 0.008129283785820007, + "step": 2574 + }, + { + "epoch": 0.7405277158674239, + "grad_norm": 0.13666267693042755, + "learning_rate": 8.824118174541464e-06, + "loss": 0.8805, + "step": 2575 + }, + { + "ce_ib": 7.57201623916626, + "ce_orig": 1.3982281684875488, + "epoch": 0.7405277158674239, + "kl_loss": 0.18778201937675476, + "loss_ib": 0.009449835866689682, + "step": 2575 + }, + { + "ce_ib": 2.905700206756592, + "ce_orig": 0.8986089825630188, + "epoch": 0.7405277158674239, + "kl_loss": 0.13515421748161316, + "loss_ib": 0.0042572421953082085, + "step": 2575 + }, + { + "ce_ib": 4.787923812866211, + "ce_orig": 0.46424776315689087, + "epoch": 0.7405277158674239, + "kl_loss": 0.23090215027332306, + "loss_ib": 0.007096945308148861, + "step": 2575 + }, + { + "ce_ib": 3.8042099475860596, + "ce_orig": 0.670193612575531, + "epoch": 0.7405277158674239, + "kl_loss": 0.23526544868946075, + "loss_ib": 0.0061568645760416985, + "step": 2575 + }, + { + "ce_ib": 1.60924232006073, + "ce_orig": 0.3325219750404358, + "epoch": 0.7408152994464016, + "kl_loss": 0.22424790263175964, + "loss_ib": 0.0038517212960869074, + "step": 2576 + }, + { + "ce_ib": 2.781916379928589, + "ce_orig": 0.6473597288131714, + "epoch": 0.7408152994464016, + "kl_loss": 0.1884986162185669, + "loss_ib": 0.004666902590543032, + "step": 2576 + }, + { + "ce_ib": 4.204617500305176, + "ce_orig": 0.4444848299026489, + "epoch": 0.7408152994464016, + "kl_loss": 0.3060661554336548, + "loss_ib": 0.007265279069542885, + "step": 2576 + }, + { + "ce_ib": 4.898244380950928, + "ce_orig": 1.403886318206787, + "epoch": 0.7408152994464016, + "kl_loss": 0.22143475711345673, + "loss_ib": 0.007112591527402401, + "step": 2576 + }, + { + "ce_ib": 3.6005136966705322, + "ce_orig": 0.5709686279296875, + "epoch": 0.7411028830253793, + "kl_loss": 0.21527156233787537, + "loss_ib": 0.005753228906542063, + "step": 2577 + }, + { + "ce_ib": 2.4373972415924072, + "ce_orig": 0.5430202484130859, + "epoch": 0.7411028830253793, + "kl_loss": 0.19544973969459534, + "loss_ib": 0.004391894675791264, + "step": 2577 + }, + { + "ce_ib": 4.7119903564453125, + "ce_orig": 1.1896655559539795, + "epoch": 0.7411028830253793, + "kl_loss": 0.1579751819372177, + "loss_ib": 0.006291741970926523, + "step": 2577 + }, + { + "ce_ib": 3.998326539993286, + "ce_orig": 0.7738791704177856, + "epoch": 0.7411028830253793, + "kl_loss": 0.14571744203567505, + "loss_ib": 0.005455500911921263, + "step": 2577 + }, + { + "ce_ib": 4.429496765136719, + "ce_orig": 0.9417147636413574, + "epoch": 0.7413904666043569, + "kl_loss": 0.16477635502815247, + "loss_ib": 0.006077260244637728, + "step": 2578 + }, + { + "ce_ib": 3.0596871376037598, + "ce_orig": 0.6019025444984436, + "epoch": 0.7413904666043569, + "kl_loss": 0.2195931375026703, + "loss_ib": 0.0052556185983121395, + "step": 2578 + }, + { + "ce_ib": 4.305246829986572, + "ce_orig": 0.871566653251648, + "epoch": 0.7413904666043569, + "kl_loss": 0.15897265076637268, + "loss_ib": 0.0058949729427695274, + "step": 2578 + }, + { + "ce_ib": 3.013394832611084, + "ce_orig": 0.6616352796554565, + "epoch": 0.7413904666043569, + "kl_loss": 0.16270308196544647, + "loss_ib": 0.004640425555408001, + "step": 2578 + }, + { + "ce_ib": 2.8454954624176025, + "ce_orig": 0.7038243412971497, + "epoch": 0.7416780501833345, + "kl_loss": 0.22182762622833252, + "loss_ib": 0.005063772201538086, + "step": 2579 + }, + { + "ce_ib": 3.7053661346435547, + "ce_orig": 0.6119694113731384, + "epoch": 0.7416780501833345, + "kl_loss": 0.20408344268798828, + "loss_ib": 0.005746200680732727, + "step": 2579 + }, + { + "ce_ib": 3.078496217727661, + "ce_orig": 0.5248070359230042, + "epoch": 0.7416780501833345, + "kl_loss": 0.19792822003364563, + "loss_ib": 0.005057778209447861, + "step": 2579 + }, + { + "ce_ib": 3.542449712753296, + "ce_orig": 0.9441152215003967, + "epoch": 0.7416780501833345, + "kl_loss": 0.1631067395210266, + "loss_ib": 0.005173517391085625, + "step": 2579 + }, + { + "epoch": 0.7419656337623122, + "grad_norm": 0.1353457123041153, + "learning_rate": 8.819113721267385e-06, + "loss": 0.8548, + "step": 2580 + }, + { + "ce_ib": 4.764225482940674, + "ce_orig": 0.8099194765090942, + "epoch": 0.7419656337623122, + "kl_loss": 0.2249242514371872, + "loss_ib": 0.007013468071818352, + "step": 2580 + }, + { + "ce_ib": 3.8239669799804688, + "ce_orig": 0.9098833799362183, + "epoch": 0.7419656337623122, + "kl_loss": 0.3162440061569214, + "loss_ib": 0.006986407097429037, + "step": 2580 + }, + { + "ce_ib": 2.9232869148254395, + "ce_orig": 0.5978767275810242, + "epoch": 0.7419656337623122, + "kl_loss": 0.149845153093338, + "loss_ib": 0.004421738442033529, + "step": 2580 + }, + { + "ce_ib": 3.1318604946136475, + "ce_orig": 0.6882135272026062, + "epoch": 0.7419656337623122, + "kl_loss": 0.09711272269487381, + "loss_ib": 0.004102987702935934, + "step": 2580 + }, + { + "ce_ib": 2.3762612342834473, + "ce_orig": 0.4865489900112152, + "epoch": 0.7422532173412898, + "kl_loss": 0.11851350963115692, + "loss_ib": 0.003561396151781082, + "step": 2581 + }, + { + "ce_ib": 6.021539211273193, + "ce_orig": 0.8161255121231079, + "epoch": 0.7422532173412898, + "kl_loss": 0.17625460028648376, + "loss_ib": 0.0077840848825871944, + "step": 2581 + }, + { + "ce_ib": 2.6608047485351562, + "ce_orig": 0.5225613117218018, + "epoch": 0.7422532173412898, + "kl_loss": 0.20752441883087158, + "loss_ib": 0.004736048635095358, + "step": 2581 + }, + { + "ce_ib": 5.251967430114746, + "ce_orig": 1.0237979888916016, + "epoch": 0.7422532173412898, + "kl_loss": 0.15389150381088257, + "loss_ib": 0.006790881976485252, + "step": 2581 + }, + { + "ce_ib": 3.5213623046875, + "ce_orig": 0.8759695887565613, + "epoch": 0.7425408009202674, + "kl_loss": 0.20943287014961243, + "loss_ib": 0.005615690723061562, + "step": 2582 + }, + { + "ce_ib": 4.751433849334717, + "ce_orig": 0.9768491983413696, + "epoch": 0.7425408009202674, + "kl_loss": 0.23824124038219452, + "loss_ib": 0.007133846171200275, + "step": 2582 + }, + { + "ce_ib": 5.653142929077148, + "ce_orig": 1.3995029926300049, + "epoch": 0.7425408009202674, + "kl_loss": 0.18900740146636963, + "loss_ib": 0.007543216925114393, + "step": 2582 + }, + { + "ce_ib": 5.0937042236328125, + "ce_orig": 0.8041016459465027, + "epoch": 0.7425408009202674, + "kl_loss": 0.2537655830383301, + "loss_ib": 0.007631360087543726, + "step": 2582 + }, + { + "ce_ib": 4.071826457977295, + "ce_orig": 0.9932346343994141, + "epoch": 0.742828384499245, + "kl_loss": 0.15011170506477356, + "loss_ib": 0.005572943482547998, + "step": 2583 + }, + { + "ce_ib": 4.647455215454102, + "ce_orig": 0.9470099806785583, + "epoch": 0.742828384499245, + "kl_loss": 0.23544074594974518, + "loss_ib": 0.007001862395554781, + "step": 2583 + }, + { + "ce_ib": 2.831411123275757, + "ce_orig": 0.7565651535987854, + "epoch": 0.742828384499245, + "kl_loss": 0.147371307015419, + "loss_ib": 0.004305124282836914, + "step": 2583 + }, + { + "ce_ib": 3.428097724914551, + "ce_orig": 0.7055372595787048, + "epoch": 0.742828384499245, + "kl_loss": 0.2640978693962097, + "loss_ib": 0.006069075781852007, + "step": 2583 + }, + { + "ce_ib": 7.421047210693359, + "ce_orig": 1.6223247051239014, + "epoch": 0.7431159680782228, + "kl_loss": 0.18651573359966278, + "loss_ib": 0.009286204352974892, + "step": 2584 + }, + { + "ce_ib": 5.084964275360107, + "ce_orig": 1.1988569498062134, + "epoch": 0.7431159680782228, + "kl_loss": 0.21246474981307983, + "loss_ib": 0.0072096120566129684, + "step": 2584 + }, + { + "ce_ib": 6.0411458015441895, + "ce_orig": 1.238525152206421, + "epoch": 0.7431159680782228, + "kl_loss": 0.2892298400402069, + "loss_ib": 0.008933443576097488, + "step": 2584 + }, + { + "ce_ib": 4.677313327789307, + "ce_orig": 0.7739096283912659, + "epoch": 0.7431159680782228, + "kl_loss": 0.15585342049598694, + "loss_ib": 0.006235847249627113, + "step": 2584 + }, + { + "epoch": 0.7434035516572004, + "grad_norm": 0.14788320660591125, + "learning_rate": 8.814100066861914e-06, + "loss": 0.8439, + "step": 2585 + }, + { + "ce_ib": 5.2246856689453125, + "ce_orig": 0.957588791847229, + "epoch": 0.7434035516572004, + "kl_loss": 0.16945905983448029, + "loss_ib": 0.0069192759692668915, + "step": 2585 + }, + { + "ce_ib": 4.008471965789795, + "ce_orig": 0.36780887842178345, + "epoch": 0.7434035516572004, + "kl_loss": 0.20784255862236023, + "loss_ib": 0.006086897570639849, + "step": 2585 + }, + { + "ce_ib": 4.49461030960083, + "ce_orig": 1.055728793144226, + "epoch": 0.7434035516572004, + "kl_loss": 0.2001650631427765, + "loss_ib": 0.006496261339634657, + "step": 2585 + }, + { + "ce_ib": 4.038247108459473, + "ce_orig": 1.067158818244934, + "epoch": 0.7434035516572004, + "kl_loss": 0.134958416223526, + "loss_ib": 0.005387831013649702, + "step": 2585 + }, + { + "ce_ib": 2.9517061710357666, + "ce_orig": 0.5431344509124756, + "epoch": 0.743691135236178, + "kl_loss": 0.16544240713119507, + "loss_ib": 0.0046061305329203606, + "step": 2586 + }, + { + "ce_ib": 6.098656177520752, + "ce_orig": 0.642929196357727, + "epoch": 0.743691135236178, + "kl_loss": 0.17586374282836914, + "loss_ib": 0.007857292890548706, + "step": 2586 + }, + { + "ce_ib": 4.390201091766357, + "ce_orig": 0.6576611399650574, + "epoch": 0.743691135236178, + "kl_loss": 0.26229941844940186, + "loss_ib": 0.007013195659965277, + "step": 2586 + }, + { + "ce_ib": 3.479870080947876, + "ce_orig": 0.7700825929641724, + "epoch": 0.743691135236178, + "kl_loss": 0.2034727782011032, + "loss_ib": 0.005514597985893488, + "step": 2586 + }, + { + "ce_ib": 4.680325984954834, + "ce_orig": 0.7108718752861023, + "epoch": 0.7439787188151556, + "kl_loss": 0.14571698009967804, + "loss_ib": 0.006137495394796133, + "step": 2587 + }, + { + "ce_ib": 1.9870260953903198, + "ce_orig": 0.6367833614349365, + "epoch": 0.7439787188151556, + "kl_loss": 0.12130317091941833, + "loss_ib": 0.0032000576611608267, + "step": 2587 + }, + { + "ce_ib": 3.8737473487854004, + "ce_orig": 0.6847418546676636, + "epoch": 0.7439787188151556, + "kl_loss": 0.135023832321167, + "loss_ib": 0.0052239857614040375, + "step": 2587 + }, + { + "ce_ib": 4.116144180297852, + "ce_orig": 0.6735658645629883, + "epoch": 0.7439787188151556, + "kl_loss": 0.2364426553249359, + "loss_ib": 0.006480570882558823, + "step": 2587 + }, + { + "ce_ib": 3.806919813156128, + "ce_orig": 0.6142502427101135, + "epoch": 0.7442663023941333, + "kl_loss": 0.20373547077178955, + "loss_ib": 0.005844274535775185, + "step": 2588 + }, + { + "ce_ib": 3.100107431411743, + "ce_orig": 0.8178222179412842, + "epoch": 0.7442663023941333, + "kl_loss": 0.14214962720870972, + "loss_ib": 0.00452160369604826, + "step": 2588 + }, + { + "ce_ib": 6.254805088043213, + "ce_orig": 0.7357363104820251, + "epoch": 0.7442663023941333, + "kl_loss": 0.27672773599624634, + "loss_ib": 0.009022082202136517, + "step": 2588 + }, + { + "ce_ib": 3.830014944076538, + "ce_orig": 0.6422297358512878, + "epoch": 0.7442663023941333, + "kl_loss": 0.2615591585636139, + "loss_ib": 0.006445606704801321, + "step": 2588 + }, + { + "ce_ib": 4.4572954177856445, + "ce_orig": 1.039433240890503, + "epoch": 0.7445538859731109, + "kl_loss": 0.1657789945602417, + "loss_ib": 0.006115085445344448, + "step": 2589 + }, + { + "ce_ib": 3.198632001876831, + "ce_orig": 0.6883112788200378, + "epoch": 0.7445538859731109, + "kl_loss": 0.20392891764640808, + "loss_ib": 0.005237921141088009, + "step": 2589 + }, + { + "ce_ib": 2.488726854324341, + "ce_orig": 0.7084459662437439, + "epoch": 0.7445538859731109, + "kl_loss": 0.22710345685482025, + "loss_ib": 0.00475976150482893, + "step": 2589 + }, + { + "ce_ib": 6.172207832336426, + "ce_orig": 0.8101732730865479, + "epoch": 0.7445538859731109, + "kl_loss": 0.32388556003570557, + "loss_ib": 0.009411063976585865, + "step": 2589 + }, + { + "epoch": 0.7448414695520886, + "grad_norm": 0.12637796998023987, + "learning_rate": 8.80907722340411e-06, + "loss": 0.862, + "step": 2590 + }, + { + "ce_ib": 4.200333118438721, + "ce_orig": 1.0838595628738403, + "epoch": 0.7448414695520886, + "kl_loss": 0.17191341519355774, + "loss_ib": 0.005919467192143202, + "step": 2590 + }, + { + "ce_ib": 6.05013370513916, + "ce_orig": 1.010703206062317, + "epoch": 0.7448414695520886, + "kl_loss": 0.198906809091568, + "loss_ib": 0.008039201609790325, + "step": 2590 + }, + { + "ce_ib": 2.7454190254211426, + "ce_orig": 0.39899104833602905, + "epoch": 0.7448414695520886, + "kl_loss": 0.22500291466712952, + "loss_ib": 0.004995448049157858, + "step": 2590 + }, + { + "ce_ib": 3.9912755489349365, + "ce_orig": 0.7996641397476196, + "epoch": 0.7448414695520886, + "kl_loss": 0.10403124988079071, + "loss_ib": 0.0050315880216658115, + "step": 2590 + }, + { + "ce_ib": 3.5658199787139893, + "ce_orig": 0.9470148086547852, + "epoch": 0.7451290531310663, + "kl_loss": 0.22992362082004547, + "loss_ib": 0.005865056533366442, + "step": 2591 + }, + { + "ce_ib": 2.375925302505493, + "ce_orig": 0.6366772651672363, + "epoch": 0.7451290531310663, + "kl_loss": 0.11606550961732864, + "loss_ib": 0.0035365803632885218, + "step": 2591 + }, + { + "ce_ib": 5.782662868499756, + "ce_orig": 1.1687548160552979, + "epoch": 0.7451290531310663, + "kl_loss": 0.1939266473054886, + "loss_ib": 0.007721929345279932, + "step": 2591 + }, + { + "ce_ib": 3.1581881046295166, + "ce_orig": 0.49737629294395447, + "epoch": 0.7451290531310663, + "kl_loss": 0.1458546221256256, + "loss_ib": 0.004616734106093645, + "step": 2591 + }, + { + "ce_ib": 2.2188241481781006, + "ce_orig": 0.25918155908584595, + "epoch": 0.7454166367100439, + "kl_loss": 0.22846943140029907, + "loss_ib": 0.004503518342971802, + "step": 2592 + }, + { + "ce_ib": 5.46461296081543, + "ce_orig": 1.2142939567565918, + "epoch": 0.7454166367100439, + "kl_loss": 0.1991407722234726, + "loss_ib": 0.007456020452082157, + "step": 2592 + }, + { + "ce_ib": 2.756636619567871, + "ce_orig": 0.6729408502578735, + "epoch": 0.7454166367100439, + "kl_loss": 0.2822578549385071, + "loss_ib": 0.005579215008765459, + "step": 2592 + }, + { + "ce_ib": 4.115917682647705, + "ce_orig": 0.7334967255592346, + "epoch": 0.7454166367100439, + "kl_loss": 0.20065023005008698, + "loss_ib": 0.006122419610619545, + "step": 2592 + }, + { + "ce_ib": 4.262344837188721, + "ce_orig": 0.9576670527458191, + "epoch": 0.7457042202890215, + "kl_loss": 0.218836709856987, + "loss_ib": 0.006450711749494076, + "step": 2593 + }, + { + "ce_ib": 2.714486837387085, + "ce_orig": 0.7882108688354492, + "epoch": 0.7457042202890215, + "kl_loss": 0.11419865489006042, + "loss_ib": 0.003856473369523883, + "step": 2593 + }, + { + "ce_ib": 5.04304313659668, + "ce_orig": 1.1597262620925903, + "epoch": 0.7457042202890215, + "kl_loss": 0.18530048429965973, + "loss_ib": 0.006896047852933407, + "step": 2593 + }, + { + "ce_ib": 4.566178798675537, + "ce_orig": 1.0415821075439453, + "epoch": 0.7457042202890215, + "kl_loss": 0.2964380085468292, + "loss_ib": 0.007530559320002794, + "step": 2593 + }, + { + "ce_ib": 5.143246173858643, + "ce_orig": 0.770951509475708, + "epoch": 0.7459918038679991, + "kl_loss": 0.3436826467514038, + "loss_ib": 0.008580072782933712, + "step": 2594 + }, + { + "ce_ib": 5.1267619132995605, + "ce_orig": 1.269606113433838, + "epoch": 0.7459918038679991, + "kl_loss": 0.18622660636901855, + "loss_ib": 0.006989027839154005, + "step": 2594 + }, + { + "ce_ib": 5.488406658172607, + "ce_orig": 1.177490472793579, + "epoch": 0.7459918038679991, + "kl_loss": 0.17595282196998596, + "loss_ib": 0.007247935514897108, + "step": 2594 + }, + { + "ce_ib": 3.1142120361328125, + "ce_orig": 0.5109836459159851, + "epoch": 0.7459918038679991, + "kl_loss": 0.23061153292655945, + "loss_ib": 0.005420327186584473, + "step": 2594 + }, + { + "epoch": 0.7462793874469767, + "grad_norm": 0.10948961228132248, + "learning_rate": 8.80404520299517e-06, + "loss": 0.874, + "step": 2595 + }, + { + "ce_ib": 3.7748541831970215, + "ce_orig": 0.7458875775337219, + "epoch": 0.7462793874469767, + "kl_loss": 0.15546229481697083, + "loss_ib": 0.00532947713509202, + "step": 2595 + }, + { + "ce_ib": 5.0309319496154785, + "ce_orig": 0.9753800630569458, + "epoch": 0.7462793874469767, + "kl_loss": 0.4292054772377014, + "loss_ib": 0.009322986006736755, + "step": 2595 + }, + { + "ce_ib": 3.7169740200042725, + "ce_orig": 0.48856163024902344, + "epoch": 0.7462793874469767, + "kl_loss": 0.26186686754226685, + "loss_ib": 0.00633564218878746, + "step": 2595 + }, + { + "ce_ib": 4.928201198577881, + "ce_orig": 1.0897455215454102, + "epoch": 0.7462793874469767, + "kl_loss": 0.23717769980430603, + "loss_ib": 0.007299977820366621, + "step": 2595 + }, + { + "ce_ib": 6.133231163024902, + "ce_orig": 1.3915197849273682, + "epoch": 0.7465669710259544, + "kl_loss": 0.3226655125617981, + "loss_ib": 0.00935988686978817, + "step": 2596 + }, + { + "ce_ib": 6.268303394317627, + "ce_orig": 1.3448091745376587, + "epoch": 0.7465669710259544, + "kl_loss": 0.19232401251792908, + "loss_ib": 0.008191543631255627, + "step": 2596 + }, + { + "ce_ib": 5.337457656860352, + "ce_orig": 0.7382128238677979, + "epoch": 0.7465669710259544, + "kl_loss": 0.19253607094287872, + "loss_ib": 0.007262818049639463, + "step": 2596 + }, + { + "ce_ib": 5.950638294219971, + "ce_orig": 1.4761794805526733, + "epoch": 0.7465669710259544, + "kl_loss": 0.23724736273288727, + "loss_ib": 0.008323111571371555, + "step": 2596 + }, + { + "ce_ib": 5.421618461608887, + "ce_orig": 1.3961288928985596, + "epoch": 0.7468545546049321, + "kl_loss": 0.18974730372428894, + "loss_ib": 0.0073190913535654545, + "step": 2597 + }, + { + "ce_ib": 3.885322093963623, + "ce_orig": 1.0384812355041504, + "epoch": 0.7468545546049321, + "kl_loss": 0.11825446784496307, + "loss_ib": 0.005067866761237383, + "step": 2597 + }, + { + "ce_ib": 1.9441862106323242, + "ce_orig": 0.30193236470222473, + "epoch": 0.7468545546049321, + "kl_loss": 0.1660376936197281, + "loss_ib": 0.0036045631859451532, + "step": 2597 + }, + { + "ce_ib": 3.969054698944092, + "ce_orig": 0.5342854857444763, + "epoch": 0.7468545546049321, + "kl_loss": 0.2611023187637329, + "loss_ib": 0.0065800780430436134, + "step": 2597 + }, + { + "ce_ib": 5.306792259216309, + "ce_orig": 0.5224060416221619, + "epoch": 0.7471421381839097, + "kl_loss": 0.6702401638031006, + "loss_ib": 0.012009193189442158, + "step": 2598 + }, + { + "ce_ib": 5.922163963317871, + "ce_orig": 1.3012316226959229, + "epoch": 0.7471421381839097, + "kl_loss": 0.23983889818191528, + "loss_ib": 0.008320553228259087, + "step": 2598 + }, + { + "ce_ib": 6.244970321655273, + "ce_orig": 0.9579610228538513, + "epoch": 0.7471421381839097, + "kl_loss": 0.17391961812973022, + "loss_ib": 0.007984166964888573, + "step": 2598 + }, + { + "ce_ib": 5.448939323425293, + "ce_orig": 1.0022505521774292, + "epoch": 0.7471421381839097, + "kl_loss": 0.1608801931142807, + "loss_ib": 0.007057741284370422, + "step": 2598 + }, + { + "ce_ib": 5.678102493286133, + "ce_orig": 1.335200548171997, + "epoch": 0.7474297217628874, + "kl_loss": 0.1545880138874054, + "loss_ib": 0.007223982829600573, + "step": 2599 + }, + { + "ce_ib": 1.5931049585342407, + "ce_orig": 0.2615325450897217, + "epoch": 0.7474297217628874, + "kl_loss": 0.39247915148735046, + "loss_ib": 0.005517896264791489, + "step": 2599 + }, + { + "ce_ib": 2.458228588104248, + "ce_orig": 0.5735325217247009, + "epoch": 0.7474297217628874, + "kl_loss": 0.16433358192443848, + "loss_ib": 0.004101564176380634, + "step": 2599 + }, + { + "ce_ib": 4.242914199829102, + "ce_orig": 0.5653514862060547, + "epoch": 0.7474297217628874, + "kl_loss": 0.1908051073551178, + "loss_ib": 0.006150965578854084, + "step": 2599 + }, + { + "epoch": 0.747717305341865, + "grad_norm": 0.12968450784683228, + "learning_rate": 8.7990040177584e-06, + "loss": 0.8961, + "step": 2600 + }, + { + "ce_ib": 3.551048517227173, + "ce_orig": 0.7637311816215515, + "epoch": 0.747717305341865, + "kl_loss": 0.17068752646446228, + "loss_ib": 0.005257923621684313, + "step": 2600 + }, + { + "ce_ib": 5.006448268890381, + "ce_orig": 0.739934504032135, + "epoch": 0.747717305341865, + "kl_loss": 0.22762688994407654, + "loss_ib": 0.007282717153429985, + "step": 2600 + }, + { + "ce_ib": 4.673069000244141, + "ce_orig": 0.8180674910545349, + "epoch": 0.747717305341865, + "kl_loss": 0.19597876071929932, + "loss_ib": 0.00663285655900836, + "step": 2600 + }, + { + "ce_ib": 2.5581648349761963, + "ce_orig": 0.5609844326972961, + "epoch": 0.747717305341865, + "kl_loss": 0.12680409848690033, + "loss_ib": 0.0038262056186795235, + "step": 2600 + }, + { + "ce_ib": 2.0394060611724854, + "ce_orig": 0.5976606607437134, + "epoch": 0.7480048889208426, + "kl_loss": 0.09791192412376404, + "loss_ib": 0.0030185251962393522, + "step": 2601 + }, + { + "ce_ib": 4.86935567855835, + "ce_orig": 0.922757089138031, + "epoch": 0.7480048889208426, + "kl_loss": 0.17146477103233337, + "loss_ib": 0.006584003567695618, + "step": 2601 + }, + { + "ce_ib": 7.481740951538086, + "ce_orig": 0.613779604434967, + "epoch": 0.7480048889208426, + "kl_loss": 0.22363689541816711, + "loss_ib": 0.009718110784888268, + "step": 2601 + }, + { + "ce_ib": 5.447230815887451, + "ce_orig": 1.1395933628082275, + "epoch": 0.7480048889208426, + "kl_loss": 0.1672269105911255, + "loss_ib": 0.007119500078260899, + "step": 2601 + }, + { + "ce_ib": 5.129712104797363, + "ce_orig": 0.8894155621528625, + "epoch": 0.7482924724998202, + "kl_loss": 0.2216540277004242, + "loss_ib": 0.007346251979470253, + "step": 2602 + }, + { + "ce_ib": 8.332121849060059, + "ce_orig": 1.9803522825241089, + "epoch": 0.7482924724998202, + "kl_loss": 0.34187376499176025, + "loss_ib": 0.011750860139727592, + "step": 2602 + }, + { + "ce_ib": 7.72484827041626, + "ce_orig": 1.0794447660446167, + "epoch": 0.7482924724998202, + "kl_loss": 0.1801939457654953, + "loss_ib": 0.00952678732573986, + "step": 2602 + }, + { + "ce_ib": 6.257765769958496, + "ce_orig": 0.973808765411377, + "epoch": 0.7482924724998202, + "kl_loss": 0.16122156381607056, + "loss_ib": 0.007869981229305267, + "step": 2602 + }, + { + "ce_ib": 4.5414018630981445, + "ce_orig": 0.5139517188072205, + "epoch": 0.7485800560787979, + "kl_loss": 0.22036521136760712, + "loss_ib": 0.006745053920894861, + "step": 2603 + }, + { + "ce_ib": 7.23079252243042, + "ce_orig": 0.8496915102005005, + "epoch": 0.7485800560787979, + "kl_loss": 0.22078154981136322, + "loss_ib": 0.009438607841730118, + "step": 2603 + }, + { + "ce_ib": 1.0103164911270142, + "ce_orig": 0.16727228462696075, + "epoch": 0.7485800560787979, + "kl_loss": 0.41546377539634705, + "loss_ib": 0.005164953880012035, + "step": 2603 + }, + { + "ce_ib": 5.002810001373291, + "ce_orig": 1.0020577907562256, + "epoch": 0.7485800560787979, + "kl_loss": 0.12147587537765503, + "loss_ib": 0.00621756911277771, + "step": 2603 + }, + { + "ce_ib": 2.6427080631256104, + "ce_orig": 0.5037702322006226, + "epoch": 0.7488676396577756, + "kl_loss": 0.25183188915252686, + "loss_ib": 0.005161026492714882, + "step": 2604 + }, + { + "ce_ib": 5.494909763336182, + "ce_orig": 0.7157990336418152, + "epoch": 0.7488676396577756, + "kl_loss": 0.2705061137676239, + "loss_ib": 0.008199970237910748, + "step": 2604 + }, + { + "ce_ib": 4.260808944702148, + "ce_orig": 0.8194092512130737, + "epoch": 0.7488676396577756, + "kl_loss": 0.1595461368560791, + "loss_ib": 0.005856270436197519, + "step": 2604 + }, + { + "ce_ib": 5.187585353851318, + "ce_orig": 0.8787273168563843, + "epoch": 0.7488676396577756, + "kl_loss": 0.1358882188796997, + "loss_ib": 0.006546467542648315, + "step": 2604 + }, + { + "epoch": 0.7491552232367532, + "grad_norm": 0.15425364673137665, + "learning_rate": 8.793953679839183e-06, + "loss": 0.8701, + "step": 2605 + }, + { + "ce_ib": 3.931887626647949, + "ce_orig": 0.761011004447937, + "epoch": 0.7491552232367532, + "kl_loss": 0.11755862832069397, + "loss_ib": 0.00510747404769063, + "step": 2605 + }, + { + "ce_ib": 2.071127414703369, + "ce_orig": 0.4273303747177124, + "epoch": 0.7491552232367532, + "kl_loss": 0.2388300597667694, + "loss_ib": 0.004459428135305643, + "step": 2605 + }, + { + "ce_ib": 4.050381660461426, + "ce_orig": 0.6631671190261841, + "epoch": 0.7491552232367532, + "kl_loss": 0.16295471787452698, + "loss_ib": 0.0056799291633069515, + "step": 2605 + }, + { + "ce_ib": 6.231662750244141, + "ce_orig": 1.4028292894363403, + "epoch": 0.7491552232367532, + "kl_loss": 0.2317008674144745, + "loss_ib": 0.008548670448362827, + "step": 2605 + }, + { + "ce_ib": 5.691612720489502, + "ce_orig": 1.0877605676651, + "epoch": 0.7494428068157308, + "kl_loss": 0.26541367173194885, + "loss_ib": 0.008345749229192734, + "step": 2606 + }, + { + "ce_ib": 3.5025734901428223, + "ce_orig": 0.6863381266593933, + "epoch": 0.7494428068157308, + "kl_loss": 0.2598698139190674, + "loss_ib": 0.006101272068917751, + "step": 2606 + }, + { + "ce_ib": 3.79890513420105, + "ce_orig": 0.38811928033828735, + "epoch": 0.7494428068157308, + "kl_loss": 0.31434446573257446, + "loss_ib": 0.00694234948605299, + "step": 2606 + }, + { + "ce_ib": 3.3912341594696045, + "ce_orig": 0.6797558665275574, + "epoch": 0.7494428068157308, + "kl_loss": 0.21602338552474976, + "loss_ib": 0.00555146811529994, + "step": 2606 + }, + { + "ce_ib": 4.036881923675537, + "ce_orig": 0.5770557522773743, + "epoch": 0.7497303903947085, + "kl_loss": 0.29054975509643555, + "loss_ib": 0.006942379288375378, + "step": 2607 + }, + { + "ce_ib": 5.722087383270264, + "ce_orig": 0.9802952408790588, + "epoch": 0.7497303903947085, + "kl_loss": 0.2789079248905182, + "loss_ib": 0.008511166088283062, + "step": 2607 + }, + { + "ce_ib": 7.408735275268555, + "ce_orig": 1.5637530088424683, + "epoch": 0.7497303903947085, + "kl_loss": 0.18354880809783936, + "loss_ib": 0.00924422312527895, + "step": 2607 + }, + { + "ce_ib": 1.856675148010254, + "ce_orig": 0.27032554149627686, + "epoch": 0.7497303903947085, + "kl_loss": 0.29673466086387634, + "loss_ib": 0.004824021831154823, + "step": 2607 + }, + { + "ce_ib": 8.694708824157715, + "ce_orig": 2.0251450538635254, + "epoch": 0.7500179739736861, + "kl_loss": 0.20796933770179749, + "loss_ib": 0.01077440194785595, + "step": 2608 + }, + { + "ce_ib": 5.582411766052246, + "ce_orig": 1.043638825416565, + "epoch": 0.7500179739736861, + "kl_loss": 0.1743408441543579, + "loss_ib": 0.007325820159167051, + "step": 2608 + }, + { + "ce_ib": 5.950559616088867, + "ce_orig": 1.3696651458740234, + "epoch": 0.7500179739736861, + "kl_loss": 0.22234246134757996, + "loss_ib": 0.008173984475433826, + "step": 2608 + }, + { + "ce_ib": 3.118690252304077, + "ce_orig": 0.6960458755493164, + "epoch": 0.7500179739736861, + "kl_loss": 0.27731919288635254, + "loss_ib": 0.005891881883144379, + "step": 2608 + }, + { + "ce_ib": 5.956692695617676, + "ce_orig": 1.4103070497512817, + "epoch": 0.7503055575526637, + "kl_loss": 0.23033785820007324, + "loss_ib": 0.008260071277618408, + "step": 2609 + }, + { + "ce_ib": 3.361687183380127, + "ce_orig": 0.6810121536254883, + "epoch": 0.7503055575526637, + "kl_loss": 0.17885328829288483, + "loss_ib": 0.0051502203568816185, + "step": 2609 + }, + { + "ce_ib": 4.322081089019775, + "ce_orig": 0.7280053496360779, + "epoch": 0.7503055575526637, + "kl_loss": 0.16031888127326965, + "loss_ib": 0.005925270263105631, + "step": 2609 + }, + { + "ce_ib": 2.8500401973724365, + "ce_orig": 0.7871994972229004, + "epoch": 0.7503055575526637, + "kl_loss": 0.1885482370853424, + "loss_ib": 0.0047355224378407, + "step": 2609 + }, + { + "epoch": 0.7505931411316413, + "grad_norm": 0.13842301070690155, + "learning_rate": 8.788894201404955e-06, + "loss": 0.9162, + "step": 2610 + }, + { + "ce_ib": 7.193480014801025, + "ce_orig": 1.1168270111083984, + "epoch": 0.7505931411316413, + "kl_loss": 0.12047191709280014, + "loss_ib": 0.008398198522627354, + "step": 2610 + }, + { + "ce_ib": 2.625211238861084, + "ce_orig": 0.6027564406394958, + "epoch": 0.7505931411316413, + "kl_loss": 0.22688663005828857, + "loss_ib": 0.0048940773122012615, + "step": 2610 + }, + { + "ce_ib": 3.5850391387939453, + "ce_orig": 0.7569261193275452, + "epoch": 0.7505931411316413, + "kl_loss": 0.17093133926391602, + "loss_ib": 0.005294352769851685, + "step": 2610 + }, + { + "ce_ib": 4.972846984863281, + "ce_orig": 1.0571033954620361, + "epoch": 0.7505931411316413, + "kl_loss": 0.1841404139995575, + "loss_ib": 0.006814250722527504, + "step": 2610 + }, + { + "ce_ib": 6.00336217880249, + "ce_orig": 1.3257564306259155, + "epoch": 0.7508807247106191, + "kl_loss": 0.22727754712104797, + "loss_ib": 0.008276138454675674, + "step": 2611 + }, + { + "ce_ib": 1.3026533126831055, + "ce_orig": 0.21875964105129242, + "epoch": 0.7508807247106191, + "kl_loss": 0.4728362560272217, + "loss_ib": 0.0060310158878564835, + "step": 2611 + }, + { + "ce_ib": 1.72498619556427, + "ce_orig": 0.523443341255188, + "epoch": 0.7508807247106191, + "kl_loss": 0.17761996388435364, + "loss_ib": 0.003501185681670904, + "step": 2611 + }, + { + "ce_ib": 2.8709146976470947, + "ce_orig": 0.5661706924438477, + "epoch": 0.7508807247106191, + "kl_loss": 0.15353849530220032, + "loss_ib": 0.004406299442052841, + "step": 2611 + }, + { + "ce_ib": 3.5320279598236084, + "ce_orig": 0.966065526008606, + "epoch": 0.7511683082895967, + "kl_loss": 0.1390446126461029, + "loss_ib": 0.004922473803162575, + "step": 2612 + }, + { + "ce_ib": 3.0042974948883057, + "ce_orig": 0.5092431902885437, + "epoch": 0.7511683082895967, + "kl_loss": 0.155857115983963, + "loss_ib": 0.004562868736684322, + "step": 2612 + }, + { + "ce_ib": 4.438431739807129, + "ce_orig": 1.2498116493225098, + "epoch": 0.7511683082895967, + "kl_loss": 0.18202248215675354, + "loss_ib": 0.006258656736463308, + "step": 2612 + }, + { + "ce_ib": 3.1508655548095703, + "ce_orig": 0.6800640225410461, + "epoch": 0.7511683082895967, + "kl_loss": 0.220650777220726, + "loss_ib": 0.0053573730401694775, + "step": 2612 + }, + { + "ce_ib": 5.952095985412598, + "ce_orig": 1.1927322149276733, + "epoch": 0.7514558918685743, + "kl_loss": 0.3184989094734192, + "loss_ib": 0.00913708470761776, + "step": 2613 + }, + { + "ce_ib": 5.0650482177734375, + "ce_orig": 1.2014424800872803, + "epoch": 0.7514558918685743, + "kl_loss": 0.183578759431839, + "loss_ib": 0.006900836247950792, + "step": 2613 + }, + { + "ce_ib": 3.1567912101745605, + "ce_orig": 0.7563827633857727, + "epoch": 0.7514558918685743, + "kl_loss": 0.13547047972679138, + "loss_ib": 0.004511496052145958, + "step": 2613 + }, + { + "ce_ib": 3.652660608291626, + "ce_orig": 0.7997739911079407, + "epoch": 0.7514558918685743, + "kl_loss": 0.23583781719207764, + "loss_ib": 0.0060110390186309814, + "step": 2613 + }, + { + "ce_ib": 1.8393422365188599, + "ce_orig": 0.6035061478614807, + "epoch": 0.7517434754475519, + "kl_loss": 0.13997793197631836, + "loss_ib": 0.003239121288061142, + "step": 2614 + }, + { + "ce_ib": 3.942996025085449, + "ce_orig": 0.9190546274185181, + "epoch": 0.7517434754475519, + "kl_loss": 0.15574577450752258, + "loss_ib": 0.005500453524291515, + "step": 2614 + }, + { + "ce_ib": 3.7081987857818604, + "ce_orig": 0.6993608474731445, + "epoch": 0.7517434754475519, + "kl_loss": 0.1790006309747696, + "loss_ib": 0.005498205311596394, + "step": 2614 + }, + { + "ce_ib": 4.905894756317139, + "ce_orig": 1.041585922241211, + "epoch": 0.7517434754475519, + "kl_loss": 0.22501158714294434, + "loss_ib": 0.007156010717153549, + "step": 2614 + }, + { + "epoch": 0.7520310590265296, + "grad_norm": 0.13353775441646576, + "learning_rate": 8.783825594645175e-06, + "loss": 0.8672, + "step": 2615 + }, + { + "ce_ib": 3.4363949298858643, + "ce_orig": 0.4537047743797302, + "epoch": 0.7520310590265296, + "kl_loss": 0.345688134431839, + "loss_ib": 0.006893275771290064, + "step": 2615 + }, + { + "ce_ib": 5.174594402313232, + "ce_orig": 0.7855082154273987, + "epoch": 0.7520310590265296, + "kl_loss": 0.23155534267425537, + "loss_ib": 0.007490147836506367, + "step": 2615 + }, + { + "ce_ib": 3.527798652648926, + "ce_orig": 0.4586198925971985, + "epoch": 0.7520310590265296, + "kl_loss": 0.10918769240379333, + "loss_ib": 0.004619675688445568, + "step": 2615 + }, + { + "ce_ib": 6.273728847503662, + "ce_orig": 1.3865363597869873, + "epoch": 0.7520310590265296, + "kl_loss": 0.25850334763526917, + "loss_ib": 0.008858762681484222, + "step": 2615 + }, + { + "ce_ib": 4.43386173248291, + "ce_orig": 0.8604032397270203, + "epoch": 0.7523186426055072, + "kl_loss": 0.20553404092788696, + "loss_ib": 0.006489201448857784, + "step": 2616 + }, + { + "ce_ib": 6.327968597412109, + "ce_orig": 1.0516220331192017, + "epoch": 0.7523186426055072, + "kl_loss": 0.28541654348373413, + "loss_ib": 0.009182133711874485, + "step": 2616 + }, + { + "ce_ib": 8.037652015686035, + "ce_orig": 1.4183295965194702, + "epoch": 0.7523186426055072, + "kl_loss": 0.20792891085147858, + "loss_ib": 0.010116941295564175, + "step": 2616 + }, + { + "ce_ib": 3.778507947921753, + "ce_orig": 0.8165983557701111, + "epoch": 0.7523186426055072, + "kl_loss": 0.17324283719062805, + "loss_ib": 0.0055109360255301, + "step": 2616 + }, + { + "ce_ib": 2.0315403938293457, + "ce_orig": 0.48897257447242737, + "epoch": 0.7526062261844849, + "kl_loss": 0.21287748217582703, + "loss_ib": 0.00416031526401639, + "step": 2617 + }, + { + "ce_ib": 6.233209609985352, + "ce_orig": 1.302970051765442, + "epoch": 0.7526062261844849, + "kl_loss": 0.1957775205373764, + "loss_ib": 0.008190984837710857, + "step": 2617 + }, + { + "ce_ib": 2.9089767932891846, + "ce_orig": 0.6495563983917236, + "epoch": 0.7526062261844849, + "kl_loss": 0.2403596043586731, + "loss_ib": 0.0053125726990401745, + "step": 2617 + }, + { + "ce_ib": 6.324840068817139, + "ce_orig": 0.8608341813087463, + "epoch": 0.7526062261844849, + "kl_loss": 0.31460922956466675, + "loss_ib": 0.009470932185649872, + "step": 2617 + }, + { + "ce_ib": 3.974912166595459, + "ce_orig": 0.6329675912857056, + "epoch": 0.7528938097634625, + "kl_loss": 0.18912279605865479, + "loss_ib": 0.005866140127182007, + "step": 2618 + }, + { + "ce_ib": 4.883636474609375, + "ce_orig": 0.6710384488105774, + "epoch": 0.7528938097634625, + "kl_loss": 0.2127836048603058, + "loss_ib": 0.007011472247540951, + "step": 2618 + }, + { + "ce_ib": 6.646682262420654, + "ce_orig": 1.512648344039917, + "epoch": 0.7528938097634625, + "kl_loss": 0.21219536662101746, + "loss_ib": 0.008768635801970959, + "step": 2618 + }, + { + "ce_ib": 5.4820966720581055, + "ce_orig": 0.799479067325592, + "epoch": 0.7528938097634625, + "kl_loss": 0.2085864245891571, + "loss_ib": 0.007567961234599352, + "step": 2618 + }, + { + "ce_ib": 4.575918197631836, + "ce_orig": 1.1529948711395264, + "epoch": 0.7531813933424402, + "kl_loss": 0.16979825496673584, + "loss_ib": 0.0062739006243646145, + "step": 2619 + }, + { + "ce_ib": 5.007054328918457, + "ce_orig": 0.8537209033966064, + "epoch": 0.7531813933424402, + "kl_loss": 0.16741487383842468, + "loss_ib": 0.006681202445179224, + "step": 2619 + }, + { + "ce_ib": 4.9396071434021, + "ce_orig": 0.6650539636611938, + "epoch": 0.7531813933424402, + "kl_loss": 0.1871897578239441, + "loss_ib": 0.006811504252254963, + "step": 2619 + }, + { + "ce_ib": 3.3253331184387207, + "ce_orig": 0.6770694851875305, + "epoch": 0.7531813933424402, + "kl_loss": 0.18182995915412903, + "loss_ib": 0.005143632646650076, + "step": 2619 + }, + { + "epoch": 0.7534689769214178, + "grad_norm": 0.12906841933727264, + "learning_rate": 8.778747871771293e-06, + "loss": 0.8083, + "step": 2620 + }, + { + "ce_ib": 5.931896209716797, + "ce_orig": 1.0294599533081055, + "epoch": 0.7534689769214178, + "kl_loss": 0.16601070761680603, + "loss_ib": 0.0075920033268630505, + "step": 2620 + }, + { + "ce_ib": 2.8707973957061768, + "ce_orig": 0.7961857318878174, + "epoch": 0.7534689769214178, + "kl_loss": 0.15038399398326874, + "loss_ib": 0.004374637734144926, + "step": 2620 + }, + { + "ce_ib": 7.667156219482422, + "ce_orig": 1.3962912559509277, + "epoch": 0.7534689769214178, + "kl_loss": 0.2111242115497589, + "loss_ib": 0.009778398089110851, + "step": 2620 + }, + { + "ce_ib": 5.480246543884277, + "ce_orig": 1.0303969383239746, + "epoch": 0.7534689769214178, + "kl_loss": 0.26064902544021606, + "loss_ib": 0.0080867363139987, + "step": 2620 + }, + { + "ce_ib": 3.6733744144439697, + "ce_orig": 0.7467669248580933, + "epoch": 0.7537565605003954, + "kl_loss": 0.24557507038116455, + "loss_ib": 0.006129125133156776, + "step": 2621 + }, + { + "ce_ib": 5.548442840576172, + "ce_orig": 1.2781387567520142, + "epoch": 0.7537565605003954, + "kl_loss": 0.24259498715400696, + "loss_ib": 0.007974392734467983, + "step": 2621 + }, + { + "ce_ib": 3.745363473892212, + "ce_orig": 0.6479752063751221, + "epoch": 0.7537565605003954, + "kl_loss": 0.17568568885326385, + "loss_ib": 0.005502220243215561, + "step": 2621 + }, + { + "ce_ib": 4.873748779296875, + "ce_orig": 0.9247176647186279, + "epoch": 0.7537565605003954, + "kl_loss": 0.14945976436138153, + "loss_ib": 0.00636834604665637, + "step": 2621 + }, + { + "ce_ib": 4.481228828430176, + "ce_orig": 0.7537117600440979, + "epoch": 0.754044144079373, + "kl_loss": 0.17988093197345734, + "loss_ib": 0.0062800380401313305, + "step": 2622 + }, + { + "ce_ib": 3.0280661582946777, + "ce_orig": 0.6704833507537842, + "epoch": 0.754044144079373, + "kl_loss": 0.1288382112979889, + "loss_ib": 0.004316448234021664, + "step": 2622 + }, + { + "ce_ib": 2.0439889430999756, + "ce_orig": 0.476043701171875, + "epoch": 0.754044144079373, + "kl_loss": 0.1096796840429306, + "loss_ib": 0.0031407857313752174, + "step": 2622 + }, + { + "ce_ib": 6.459160327911377, + "ce_orig": 1.1912013292312622, + "epoch": 0.754044144079373, + "kl_loss": 0.22714608907699585, + "loss_ib": 0.008730621077120304, + "step": 2622 + }, + { + "ce_ib": 4.760768413543701, + "ce_orig": 0.8238224983215332, + "epoch": 0.7543317276583507, + "kl_loss": 0.17707939445972443, + "loss_ib": 0.006531562190502882, + "step": 2623 + }, + { + "ce_ib": 4.385411739349365, + "ce_orig": 0.8241497278213501, + "epoch": 0.7543317276583507, + "kl_loss": 0.2112196683883667, + "loss_ib": 0.006497608497738838, + "step": 2623 + }, + { + "ce_ib": 2.401984930038452, + "ce_orig": 0.43911775946617126, + "epoch": 0.7543317276583507, + "kl_loss": 0.19691365957260132, + "loss_ib": 0.004371121525764465, + "step": 2623 + }, + { + "ce_ib": 3.2455620765686035, + "ce_orig": 0.5355571508407593, + "epoch": 0.7543317276583507, + "kl_loss": 0.13629460334777832, + "loss_ib": 0.004608507733792067, + "step": 2623 + }, + { + "ce_ib": 5.762964725494385, + "ce_orig": 1.4793741703033447, + "epoch": 0.7546193112373284, + "kl_loss": 0.19602183997631073, + "loss_ib": 0.007723183371126652, + "step": 2624 + }, + { + "ce_ib": 3.560100793838501, + "ce_orig": 0.7406176328659058, + "epoch": 0.7546193112373284, + "kl_loss": 0.20637154579162598, + "loss_ib": 0.005623816046863794, + "step": 2624 + }, + { + "ce_ib": 3.9012722969055176, + "ce_orig": 0.5109891891479492, + "epoch": 0.7546193112373284, + "kl_loss": 0.12340521812438965, + "loss_ib": 0.005135324317961931, + "step": 2624 + }, + { + "ce_ib": 4.159745693206787, + "ce_orig": 0.9275307655334473, + "epoch": 0.7546193112373284, + "kl_loss": 0.19453167915344238, + "loss_ib": 0.006105063017457724, + "step": 2624 + }, + { + "epoch": 0.754906894816306, + "grad_norm": 0.13310624659061432, + "learning_rate": 8.773661045016722e-06, + "loss": 0.86, + "step": 2625 + }, + { + "ce_ib": 7.49035120010376, + "ce_orig": 1.023419976234436, + "epoch": 0.754906894816306, + "kl_loss": 0.18621128797531128, + "loss_ib": 0.009352464228868484, + "step": 2625 + }, + { + "ce_ib": 3.383023977279663, + "ce_orig": 0.558623194694519, + "epoch": 0.754906894816306, + "kl_loss": 0.16369101405143738, + "loss_ib": 0.005019933916628361, + "step": 2625 + }, + { + "ce_ib": 5.43183708190918, + "ce_orig": 1.149892807006836, + "epoch": 0.754906894816306, + "kl_loss": 0.1809125542640686, + "loss_ib": 0.007240962702780962, + "step": 2625 + }, + { + "ce_ib": 4.753440856933594, + "ce_orig": 0.7928643226623535, + "epoch": 0.754906894816306, + "kl_loss": 0.22326546907424927, + "loss_ib": 0.006986095570027828, + "step": 2625 + }, + { + "ce_ib": 3.9325509071350098, + "ce_orig": 0.9242215752601624, + "epoch": 0.7551944783952836, + "kl_loss": 0.24724993109703064, + "loss_ib": 0.0064050499349832535, + "step": 2626 + }, + { + "ce_ib": 2.903085947036743, + "ce_orig": 0.6301029920578003, + "epoch": 0.7551944783952836, + "kl_loss": 0.14413705468177795, + "loss_ib": 0.004344456363469362, + "step": 2626 + }, + { + "ce_ib": 2.109008550643921, + "ce_orig": 0.2799154222011566, + "epoch": 0.7551944783952836, + "kl_loss": 0.12071849405765533, + "loss_ib": 0.003316193353384733, + "step": 2626 + }, + { + "ce_ib": 4.079471111297607, + "ce_orig": 0.4775671064853668, + "epoch": 0.7551944783952836, + "kl_loss": 0.27210554480552673, + "loss_ib": 0.006800526287406683, + "step": 2626 + }, + { + "ce_ib": 6.213024616241455, + "ce_orig": 1.6035507917404175, + "epoch": 0.7554820619742613, + "kl_loss": 0.14170202612876892, + "loss_ib": 0.007630045060068369, + "step": 2627 + }, + { + "ce_ib": 2.732428550720215, + "ce_orig": 0.6961827874183655, + "epoch": 0.7554820619742613, + "kl_loss": 0.12326030433177948, + "loss_ib": 0.0039650313556194305, + "step": 2627 + }, + { + "ce_ib": 2.148674488067627, + "ce_orig": 0.38719743490219116, + "epoch": 0.7554820619742613, + "kl_loss": 0.14745885133743286, + "loss_ib": 0.0036232632119208574, + "step": 2627 + }, + { + "ce_ib": 2.9523091316223145, + "ce_orig": 0.7065005302429199, + "epoch": 0.7554820619742613, + "kl_loss": 0.7911630868911743, + "loss_ib": 0.010863940231502056, + "step": 2627 + }, + { + "ce_ib": 5.209384441375732, + "ce_orig": 1.0833566188812256, + "epoch": 0.7557696455532389, + "kl_loss": 0.14854329824447632, + "loss_ib": 0.006694817449897528, + "step": 2628 + }, + { + "ce_ib": 2.239570379257202, + "ce_orig": 0.4442305564880371, + "epoch": 0.7557696455532389, + "kl_loss": 0.23837065696716309, + "loss_ib": 0.004623277112841606, + "step": 2628 + }, + { + "ce_ib": 5.184863090515137, + "ce_orig": 1.1968085765838623, + "epoch": 0.7557696455532389, + "kl_loss": 0.22003570199012756, + "loss_ib": 0.007385220378637314, + "step": 2628 + }, + { + "ce_ib": 3.396928310394287, + "ce_orig": 0.9038397669792175, + "epoch": 0.7557696455532389, + "kl_loss": 0.1768680214881897, + "loss_ib": 0.00516560859978199, + "step": 2628 + }, + { + "ce_ib": 4.309116840362549, + "ce_orig": 0.8189201951026917, + "epoch": 0.7560572291322165, + "kl_loss": 0.5288306474685669, + "loss_ib": 0.009597422555088997, + "step": 2629 + }, + { + "ce_ib": 4.134854316711426, + "ce_orig": 0.8727141618728638, + "epoch": 0.7560572291322165, + "kl_loss": 0.3346378207206726, + "loss_ib": 0.00748123275116086, + "step": 2629 + }, + { + "ce_ib": 3.8057265281677246, + "ce_orig": 0.9415704011917114, + "epoch": 0.7560572291322165, + "kl_loss": 0.12715908885002136, + "loss_ib": 0.0050773173570632935, + "step": 2629 + }, + { + "ce_ib": 3.281142234802246, + "ce_orig": 0.9857540130615234, + "epoch": 0.7560572291322165, + "kl_loss": 0.1346607208251953, + "loss_ib": 0.00462774932384491, + "step": 2629 + }, + { + "epoch": 0.7563448127111941, + "grad_norm": 0.14264759421348572, + "learning_rate": 8.768565126636806e-06, + "loss": 0.8594, + "step": 2630 + }, + { + "ce_ib": 5.958950042724609, + "ce_orig": 1.178805947303772, + "epoch": 0.7563448127111941, + "kl_loss": 0.24730631709098816, + "loss_ib": 0.008432012982666492, + "step": 2630 + }, + { + "ce_ib": 2.4535164833068848, + "ce_orig": 0.5348263382911682, + "epoch": 0.7563448127111941, + "kl_loss": 0.18763434886932373, + "loss_ib": 0.004329860210418701, + "step": 2630 + }, + { + "ce_ib": 6.2979021072387695, + "ce_orig": 1.512069821357727, + "epoch": 0.7563448127111941, + "kl_loss": 0.21917422115802765, + "loss_ib": 0.008489644154906273, + "step": 2630 + }, + { + "ce_ib": 7.773667812347412, + "ce_orig": 1.4590362310409546, + "epoch": 0.7563448127111941, + "kl_loss": 0.20483574271202087, + "loss_ib": 0.009822024963796139, + "step": 2630 + }, + { + "ce_ib": 5.482571601867676, + "ce_orig": 1.057276725769043, + "epoch": 0.7566323962901719, + "kl_loss": 0.16988354921340942, + "loss_ib": 0.007181406952440739, + "step": 2631 + }, + { + "ce_ib": 5.886826038360596, + "ce_orig": 1.205045461654663, + "epoch": 0.7566323962901719, + "kl_loss": 0.18374085426330566, + "loss_ib": 0.007724234368652105, + "step": 2631 + }, + { + "ce_ib": 1.8823027610778809, + "ce_orig": 0.11565704643726349, + "epoch": 0.7566323962901719, + "kl_loss": 0.2559951841831207, + "loss_ib": 0.004442254547029734, + "step": 2631 + }, + { + "ce_ib": 4.232356071472168, + "ce_orig": 0.9597600698471069, + "epoch": 0.7566323962901719, + "kl_loss": 0.16143012046813965, + "loss_ib": 0.005846657790243626, + "step": 2631 + }, + { + "ce_ib": 2.7253475189208984, + "ce_orig": 0.5796699523925781, + "epoch": 0.7569199798691495, + "kl_loss": 0.16967594623565674, + "loss_ib": 0.00442210678011179, + "step": 2632 + }, + { + "ce_ib": 4.684848308563232, + "ce_orig": 0.960963785648346, + "epoch": 0.7569199798691495, + "kl_loss": 0.19102981686592102, + "loss_ib": 0.006595146376639605, + "step": 2632 + }, + { + "ce_ib": 4.60353946685791, + "ce_orig": 0.6935308575630188, + "epoch": 0.7569199798691495, + "kl_loss": 0.22787094116210938, + "loss_ib": 0.006882248912006617, + "step": 2632 + }, + { + "ce_ib": 4.197956085205078, + "ce_orig": 0.9388461112976074, + "epoch": 0.7569199798691495, + "kl_loss": 0.16372543573379517, + "loss_ib": 0.005835209973156452, + "step": 2632 + }, + { + "ce_ib": 3.5843732357025146, + "ce_orig": 0.4447890520095825, + "epoch": 0.7572075634481271, + "kl_loss": 0.23397943377494812, + "loss_ib": 0.005924167577177286, + "step": 2633 + }, + { + "ce_ib": 2.27738356590271, + "ce_orig": 0.5343998670578003, + "epoch": 0.7572075634481271, + "kl_loss": 0.1457487791776657, + "loss_ib": 0.003734871279448271, + "step": 2633 + }, + { + "ce_ib": 4.18596076965332, + "ce_orig": 0.8805382251739502, + "epoch": 0.7572075634481271, + "kl_loss": 0.22003400325775146, + "loss_ib": 0.006386300548911095, + "step": 2633 + }, + { + "ce_ib": 7.307007789611816, + "ce_orig": 1.455415964126587, + "epoch": 0.7572075634481271, + "kl_loss": 0.2606584429740906, + "loss_ib": 0.009913591668009758, + "step": 2633 + }, + { + "ce_ib": 2.3272109031677246, + "ce_orig": 0.28919437527656555, + "epoch": 0.7574951470271047, + "kl_loss": 0.22077061235904694, + "loss_ib": 0.004534916952252388, + "step": 2634 + }, + { + "ce_ib": 4.548549175262451, + "ce_orig": 0.8430200219154358, + "epoch": 0.7574951470271047, + "kl_loss": 0.1962272673845291, + "loss_ib": 0.006510822102427483, + "step": 2634 + }, + { + "ce_ib": 2.7857398986816406, + "ce_orig": 0.5082514882087708, + "epoch": 0.7574951470271047, + "kl_loss": 0.19667303562164307, + "loss_ib": 0.004752470180392265, + "step": 2634 + }, + { + "ce_ib": 5.8072428703308105, + "ce_orig": 1.2311731576919556, + "epoch": 0.7574951470271047, + "kl_loss": 0.31852787733078003, + "loss_ib": 0.008992522023618221, + "step": 2634 + }, + { + "epoch": 0.7577827306060824, + "grad_norm": 0.11934711784124374, + "learning_rate": 8.7634601289088e-06, + "loss": 0.8736, + "step": 2635 + }, + { + "ce_ib": 4.58791446685791, + "ce_orig": 0.8767384886741638, + "epoch": 0.7577827306060824, + "kl_loss": 0.14706677198410034, + "loss_ib": 0.006058582104742527, + "step": 2635 + }, + { + "ce_ib": 3.573789119720459, + "ce_orig": 0.8960891366004944, + "epoch": 0.7577827306060824, + "kl_loss": 0.11046603322029114, + "loss_ib": 0.004678449593484402, + "step": 2635 + }, + { + "ce_ib": 4.493900299072266, + "ce_orig": 0.9403761625289917, + "epoch": 0.7577827306060824, + "kl_loss": 0.12893377244472504, + "loss_ib": 0.005783237516880035, + "step": 2635 + }, + { + "ce_ib": 3.618637800216675, + "ce_orig": 0.781054675579071, + "epoch": 0.7577827306060824, + "kl_loss": 0.1356169432401657, + "loss_ib": 0.004974807612597942, + "step": 2635 + }, + { + "ce_ib": 3.6907761096954346, + "ce_orig": 0.6817976832389832, + "epoch": 0.75807031418506, + "kl_loss": 0.16382288932800293, + "loss_ib": 0.00532900495454669, + "step": 2636 + }, + { + "ce_ib": 6.02223539352417, + "ce_orig": 0.7448879480361938, + "epoch": 0.75807031418506, + "kl_loss": 0.191973477602005, + "loss_ib": 0.007941970601677895, + "step": 2636 + }, + { + "ce_ib": 3.0537641048431396, + "ce_orig": 0.7009120583534241, + "epoch": 0.75807031418506, + "kl_loss": 0.13642160594463348, + "loss_ib": 0.0044179800897836685, + "step": 2636 + }, + { + "ce_ib": 2.8808343410491943, + "ce_orig": 0.6699591279029846, + "epoch": 0.75807031418506, + "kl_loss": 0.15554901957511902, + "loss_ib": 0.00443632435053587, + "step": 2636 + }, + { + "ce_ib": 2.929126501083374, + "ce_orig": 0.6242467761039734, + "epoch": 0.7583578977640377, + "kl_loss": 0.12927904725074768, + "loss_ib": 0.004221917130053043, + "step": 2637 + }, + { + "ce_ib": 2.45251727104187, + "ce_orig": 0.3594660758972168, + "epoch": 0.7583578977640377, + "kl_loss": 0.11570817232131958, + "loss_ib": 0.003609598847106099, + "step": 2637 + }, + { + "ce_ib": 6.199082851409912, + "ce_orig": 1.224169135093689, + "epoch": 0.7583578977640377, + "kl_loss": 0.25470447540283203, + "loss_ib": 0.008746127597987652, + "step": 2637 + }, + { + "ce_ib": 2.640068769454956, + "ce_orig": 0.7153624296188354, + "epoch": 0.7583578977640377, + "kl_loss": 0.15309934318065643, + "loss_ib": 0.004171061795204878, + "step": 2637 + }, + { + "ce_ib": 3.736149311065674, + "ce_orig": 0.8224330544471741, + "epoch": 0.7586454813430153, + "kl_loss": 0.17550373077392578, + "loss_ib": 0.005491186399012804, + "step": 2638 + }, + { + "ce_ib": 3.3791985511779785, + "ce_orig": 0.3090614378452301, + "epoch": 0.7586454813430153, + "kl_loss": 0.17111347615718842, + "loss_ib": 0.005090333055704832, + "step": 2638 + }, + { + "ce_ib": 3.0711355209350586, + "ce_orig": 0.675676703453064, + "epoch": 0.7586454813430153, + "kl_loss": 0.2076961249113083, + "loss_ib": 0.005148096475750208, + "step": 2638 + }, + { + "ce_ib": 4.920444011688232, + "ce_orig": 0.7433791160583496, + "epoch": 0.7586454813430153, + "kl_loss": 0.2590378224849701, + "loss_ib": 0.007510822266340256, + "step": 2638 + }, + { + "ce_ib": 2.516503095626831, + "ce_orig": 0.3781818747520447, + "epoch": 0.758933064921993, + "kl_loss": 0.26893460750579834, + "loss_ib": 0.005205849185585976, + "step": 2639 + }, + { + "ce_ib": 2.942891836166382, + "ce_orig": 0.6569324731826782, + "epoch": 0.758933064921993, + "kl_loss": 0.12835341691970825, + "loss_ib": 0.004226426128298044, + "step": 2639 + }, + { + "ce_ib": 3.787118911743164, + "ce_orig": 0.6652781963348389, + "epoch": 0.758933064921993, + "kl_loss": 0.24075525999069214, + "loss_ib": 0.0061946711502969265, + "step": 2639 + }, + { + "ce_ib": 4.362105369567871, + "ce_orig": 0.47802308201789856, + "epoch": 0.758933064921993, + "kl_loss": 0.2832249104976654, + "loss_ib": 0.007194354198873043, + "step": 2639 + }, + { + "epoch": 0.7592206485009706, + "grad_norm": 0.12656278908252716, + "learning_rate": 8.758346064131824e-06, + "loss": 0.7994, + "step": 2640 + }, + { + "ce_ib": 2.0702056884765625, + "ce_orig": 0.5179637670516968, + "epoch": 0.7592206485009706, + "kl_loss": 0.16575536131858826, + "loss_ib": 0.003727759001776576, + "step": 2640 + }, + { + "ce_ib": 4.5471062660217285, + "ce_orig": 1.1474436521530151, + "epoch": 0.7592206485009706, + "kl_loss": 0.18087249994277954, + "loss_ib": 0.006355831399559975, + "step": 2640 + }, + { + "ce_ib": 7.985849857330322, + "ce_orig": 1.6772617101669312, + "epoch": 0.7592206485009706, + "kl_loss": 0.17632503807544708, + "loss_ib": 0.009749099612236023, + "step": 2640 + }, + { + "ce_ib": 3.8723182678222656, + "ce_orig": 0.824953556060791, + "epoch": 0.7592206485009706, + "kl_loss": 0.16510866582393646, + "loss_ib": 0.005523405037820339, + "step": 2640 + }, + { + "ce_ib": 4.648763179779053, + "ce_orig": 0.9198799729347229, + "epoch": 0.7595082320799482, + "kl_loss": 0.20061646401882172, + "loss_ib": 0.006654927507042885, + "step": 2641 + }, + { + "ce_ib": 5.260655403137207, + "ce_orig": 0.9047766923904419, + "epoch": 0.7595082320799482, + "kl_loss": 0.18518932163715363, + "loss_ib": 0.007112548220902681, + "step": 2641 + }, + { + "ce_ib": 2.273455858230591, + "ce_orig": 0.35614702105522156, + "epoch": 0.7595082320799482, + "kl_loss": 0.6111971139907837, + "loss_ib": 0.008385426364839077, + "step": 2641 + }, + { + "ce_ib": 5.715373516082764, + "ce_orig": 1.1122969388961792, + "epoch": 0.7595082320799482, + "kl_loss": 0.14654523134231567, + "loss_ib": 0.007180825807154179, + "step": 2641 + }, + { + "ce_ib": 2.5375583171844482, + "ce_orig": 0.54390549659729, + "epoch": 0.7597958156589258, + "kl_loss": 0.15614035725593567, + "loss_ib": 0.004098962061107159, + "step": 2642 + }, + { + "ce_ib": 4.851729869842529, + "ce_orig": 0.8752785921096802, + "epoch": 0.7597958156589258, + "kl_loss": 0.18643701076507568, + "loss_ib": 0.006716100033372641, + "step": 2642 + }, + { + "ce_ib": 3.3613288402557373, + "ce_orig": 0.4744165241718292, + "epoch": 0.7597958156589258, + "kl_loss": 0.32491350173950195, + "loss_ib": 0.006610463839024305, + "step": 2642 + }, + { + "ce_ib": 6.4512715339660645, + "ce_orig": 1.469534993171692, + "epoch": 0.7597958156589258, + "kl_loss": 0.2590535283088684, + "loss_ib": 0.009041806682944298, + "step": 2642 + }, + { + "ce_ib": 4.434234619140625, + "ce_orig": 1.2063195705413818, + "epoch": 0.7600833992379035, + "kl_loss": 0.3186129629611969, + "loss_ib": 0.007620364427566528, + "step": 2643 + }, + { + "ce_ib": 1.4180086851119995, + "ce_orig": 0.41451701521873474, + "epoch": 0.7600833992379035, + "kl_loss": 0.08789937198162079, + "loss_ib": 0.0022970023564994335, + "step": 2643 + }, + { + "ce_ib": 3.478858470916748, + "ce_orig": 0.7086161971092224, + "epoch": 0.7600833992379035, + "kl_loss": 0.21333752572536469, + "loss_ib": 0.005612233653664589, + "step": 2643 + }, + { + "ce_ib": 2.536179780960083, + "ce_orig": 0.32990366220474243, + "epoch": 0.7600833992379035, + "kl_loss": 0.20381584763526917, + "loss_ib": 0.004574337974190712, + "step": 2643 + }, + { + "ce_ib": 3.8562443256378174, + "ce_orig": 1.0933148860931396, + "epoch": 0.7603709828168812, + "kl_loss": 0.18891742825508118, + "loss_ib": 0.00574541836977005, + "step": 2644 + }, + { + "ce_ib": 3.244658946990967, + "ce_orig": 0.7171898484230042, + "epoch": 0.7603709828168812, + "kl_loss": 0.1433825045824051, + "loss_ib": 0.0046784840524196625, + "step": 2644 + }, + { + "ce_ib": 3.213437557220459, + "ce_orig": 0.8024879097938538, + "epoch": 0.7603709828168812, + "kl_loss": 0.1851913332939148, + "loss_ib": 0.005065350327640772, + "step": 2644 + }, + { + "ce_ib": 4.224512100219727, + "ce_orig": 0.7830402255058289, + "epoch": 0.7603709828168812, + "kl_loss": 0.22911913692951202, + "loss_ib": 0.00651570362970233, + "step": 2644 + }, + { + "epoch": 0.7606585663958588, + "grad_norm": 0.12764975428581238, + "learning_rate": 8.75322294462685e-06, + "loss": 0.9084, + "step": 2645 + }, + { + "ce_ib": 2.6081647872924805, + "ce_orig": 0.6326508522033691, + "epoch": 0.7606585663958588, + "kl_loss": 0.28607654571533203, + "loss_ib": 0.005468929652124643, + "step": 2645 + }, + { + "ce_ib": 3.3373146057128906, + "ce_orig": 0.6481754779815674, + "epoch": 0.7606585663958588, + "kl_loss": 0.25934723019599915, + "loss_ib": 0.005930787418037653, + "step": 2645 + }, + { + "ce_ib": 3.62253999710083, + "ce_orig": 0.4353153705596924, + "epoch": 0.7606585663958588, + "kl_loss": 0.12089753150939941, + "loss_ib": 0.0048315152525901794, + "step": 2645 + }, + { + "ce_ib": 3.0958259105682373, + "ce_orig": 0.6617231965065002, + "epoch": 0.7606585663958588, + "kl_loss": 0.16132411360740662, + "loss_ib": 0.0047090668231248856, + "step": 2645 + }, + { + "ce_ib": 5.024348258972168, + "ce_orig": 0.9414939880371094, + "epoch": 0.7609461499748364, + "kl_loss": 0.20178616046905518, + "loss_ib": 0.007042210083454847, + "step": 2646 + }, + { + "ce_ib": 2.8238377571105957, + "ce_orig": 0.642044186592102, + "epoch": 0.7609461499748364, + "kl_loss": 0.16310420632362366, + "loss_ib": 0.004454879555851221, + "step": 2646 + }, + { + "ce_ib": 3.866748809814453, + "ce_orig": 0.8266614675521851, + "epoch": 0.7609461499748364, + "kl_loss": 0.15960218012332916, + "loss_ib": 0.005462770350277424, + "step": 2646 + }, + { + "ce_ib": 3.6293976306915283, + "ce_orig": 0.8015586137771606, + "epoch": 0.7609461499748364, + "kl_loss": 0.24394340813159943, + "loss_ib": 0.006068831775337458, + "step": 2646 + }, + { + "ce_ib": 3.9693355560302734, + "ce_orig": 0.5850176811218262, + "epoch": 0.7612337335538141, + "kl_loss": 0.15408934652805328, + "loss_ib": 0.005510228686034679, + "step": 2647 + }, + { + "ce_ib": 2.61226224899292, + "ce_orig": 0.5833810567855835, + "epoch": 0.7612337335538141, + "kl_loss": 0.15572556853294373, + "loss_ib": 0.004169518128037453, + "step": 2647 + }, + { + "ce_ib": 3.4103424549102783, + "ce_orig": 0.5386757254600525, + "epoch": 0.7612337335538141, + "kl_loss": 0.17670348286628723, + "loss_ib": 0.0051773772574961185, + "step": 2647 + }, + { + "ce_ib": 4.056217670440674, + "ce_orig": 0.7658858895301819, + "epoch": 0.7612337335538141, + "kl_loss": 0.15652042627334595, + "loss_ib": 0.005621421616524458, + "step": 2647 + }, + { + "ce_ib": 5.768680095672607, + "ce_orig": 1.2092863321304321, + "epoch": 0.7615213171327917, + "kl_loss": 0.18171271681785583, + "loss_ib": 0.007585806772112846, + "step": 2648 + }, + { + "ce_ib": 1.320860743522644, + "ce_orig": 0.25114986300468445, + "epoch": 0.7615213171327917, + "kl_loss": 0.2698851227760315, + "loss_ib": 0.004019712097942829, + "step": 2648 + }, + { + "ce_ib": 5.166114330291748, + "ce_orig": 0.7004562020301819, + "epoch": 0.7615213171327917, + "kl_loss": 0.21033844351768494, + "loss_ib": 0.007269499357789755, + "step": 2648 + }, + { + "ce_ib": 5.426998615264893, + "ce_orig": 0.9856237173080444, + "epoch": 0.7615213171327917, + "kl_loss": 0.17246879637241364, + "loss_ib": 0.0071516865864396095, + "step": 2648 + }, + { + "ce_ib": 6.409529209136963, + "ce_orig": 1.388299822807312, + "epoch": 0.7618089007117693, + "kl_loss": 0.16340279579162598, + "loss_ib": 0.008043557405471802, + "step": 2649 + }, + { + "ce_ib": 5.088698387145996, + "ce_orig": 0.6823151111602783, + "epoch": 0.7618089007117693, + "kl_loss": 0.22207438945770264, + "loss_ib": 0.0073094419203698635, + "step": 2649 + }, + { + "ce_ib": 3.0859451293945312, + "ce_orig": 0.6529281735420227, + "epoch": 0.7618089007117693, + "kl_loss": 0.24292787909507751, + "loss_ib": 0.0055152238346636295, + "step": 2649 + }, + { + "ce_ib": 5.839105129241943, + "ce_orig": 0.701348602771759, + "epoch": 0.7618089007117693, + "kl_loss": 0.3160467743873596, + "loss_ib": 0.008999573066830635, + "step": 2649 + }, + { + "epoch": 0.762096484290747, + "grad_norm": 0.12742988765239716, + "learning_rate": 8.74809078273666e-06, + "loss": 0.8014, + "step": 2650 + }, + { + "ce_ib": 6.402480602264404, + "ce_orig": 1.4192262887954712, + "epoch": 0.762096484290747, + "kl_loss": 0.2483729124069214, + "loss_ib": 0.008886209689080715, + "step": 2650 + }, + { + "ce_ib": 2.882206678390503, + "ce_orig": 0.49058184027671814, + "epoch": 0.762096484290747, + "kl_loss": 0.20756055414676666, + "loss_ib": 0.004957812372595072, + "step": 2650 + }, + { + "ce_ib": 4.582705020904541, + "ce_orig": 0.9225060343742371, + "epoch": 0.762096484290747, + "kl_loss": 0.1656898856163025, + "loss_ib": 0.006239603739231825, + "step": 2650 + }, + { + "ce_ib": 5.575969696044922, + "ce_orig": 1.357067346572876, + "epoch": 0.762096484290747, + "kl_loss": 0.3843778371810913, + "loss_ib": 0.00941974762827158, + "step": 2650 + }, + { + "ce_ib": 4.3320770263671875, + "ce_orig": 1.086622953414917, + "epoch": 0.7623840678697247, + "kl_loss": 0.2539912760257721, + "loss_ib": 0.00687198992818594, + "step": 2651 + }, + { + "ce_ib": 1.4046937227249146, + "ce_orig": 0.30529776215553284, + "epoch": 0.7623840678697247, + "kl_loss": 0.3426405191421509, + "loss_ib": 0.004831098951399326, + "step": 2651 + }, + { + "ce_ib": 2.622265100479126, + "ce_orig": 0.571476936340332, + "epoch": 0.7623840678697247, + "kl_loss": 0.14426662027835846, + "loss_ib": 0.004064931068569422, + "step": 2651 + }, + { + "ce_ib": 3.3678815364837646, + "ce_orig": 0.7884094715118408, + "epoch": 0.7623840678697247, + "kl_loss": 0.18118204176425934, + "loss_ib": 0.005179701838642359, + "step": 2651 + }, + { + "ce_ib": 5.364591121673584, + "ce_orig": 0.5627880096435547, + "epoch": 0.7626716514487023, + "kl_loss": 0.17294424772262573, + "loss_ib": 0.007094033993780613, + "step": 2652 + }, + { + "ce_ib": 1.5667387247085571, + "ce_orig": 0.49013757705688477, + "epoch": 0.7626716514487023, + "kl_loss": 0.09284859895706177, + "loss_ib": 0.0024952248204499483, + "step": 2652 + }, + { + "ce_ib": 5.7229228019714355, + "ce_orig": 1.15831458568573, + "epoch": 0.7626716514487023, + "kl_loss": 0.2607805132865906, + "loss_ib": 0.00833072792738676, + "step": 2652 + }, + { + "ce_ib": 4.753726482391357, + "ce_orig": 0.9814288020133972, + "epoch": 0.7626716514487023, + "kl_loss": 0.12670743465423584, + "loss_ib": 0.006020801141858101, + "step": 2652 + }, + { + "ce_ib": 2.5956716537475586, + "ce_orig": 0.5240503549575806, + "epoch": 0.7629592350276799, + "kl_loss": 0.15760423243045807, + "loss_ib": 0.004171714186668396, + "step": 2653 + }, + { + "ce_ib": 4.635409832000732, + "ce_orig": 0.9120356440544128, + "epoch": 0.7629592350276799, + "kl_loss": 0.21524742245674133, + "loss_ib": 0.006787884049117565, + "step": 2653 + }, + { + "ce_ib": 3.5473947525024414, + "ce_orig": 0.6748715043067932, + "epoch": 0.7629592350276799, + "kl_loss": 0.13029545545578003, + "loss_ib": 0.004850349389016628, + "step": 2653 + }, + { + "ce_ib": 3.9424233436584473, + "ce_orig": 0.7827705144882202, + "epoch": 0.7629592350276799, + "kl_loss": 0.17276060581207275, + "loss_ib": 0.005670029204338789, + "step": 2653 + }, + { + "ce_ib": 2.620264768600464, + "ce_orig": 0.5689034461975098, + "epoch": 0.7632468186066576, + "kl_loss": 0.21889735758304596, + "loss_ib": 0.004809238016605377, + "step": 2654 + }, + { + "ce_ib": 4.995490550994873, + "ce_orig": 0.973610520362854, + "epoch": 0.7632468186066576, + "kl_loss": 0.2510707974433899, + "loss_ib": 0.007506198715418577, + "step": 2654 + }, + { + "ce_ib": 5.359694004058838, + "ce_orig": 0.9751203656196594, + "epoch": 0.7632468186066576, + "kl_loss": 0.25280386209487915, + "loss_ib": 0.007887733168900013, + "step": 2654 + }, + { + "ce_ib": 4.311456680297852, + "ce_orig": 0.5822281837463379, + "epoch": 0.7632468186066576, + "kl_loss": 0.309068500995636, + "loss_ib": 0.0074021415784955025, + "step": 2654 + }, + { + "epoch": 0.7635344021856352, + "grad_norm": 0.11955903470516205, + "learning_rate": 8.742949590825824e-06, + "loss": 0.8324, + "step": 2655 + }, + { + "ce_ib": 4.569638729095459, + "ce_orig": 0.647222638130188, + "epoch": 0.7635344021856352, + "kl_loss": 0.20398718118667603, + "loss_ib": 0.006609510164707899, + "step": 2655 + }, + { + "ce_ib": 2.9429311752319336, + "ce_orig": 0.7456191182136536, + "epoch": 0.7635344021856352, + "kl_loss": 0.1903575360774994, + "loss_ib": 0.004846506752073765, + "step": 2655 + }, + { + "ce_ib": 6.73652982711792, + "ce_orig": 1.377373456954956, + "epoch": 0.7635344021856352, + "kl_loss": 0.2505124807357788, + "loss_ib": 0.00924165453761816, + "step": 2655 + }, + { + "ce_ib": 5.091064929962158, + "ce_orig": 0.5532473921775818, + "epoch": 0.7635344021856352, + "kl_loss": 0.220844566822052, + "loss_ib": 0.007299510762095451, + "step": 2655 + }, + { + "ce_ib": 5.157125473022461, + "ce_orig": 0.8219767212867737, + "epoch": 0.7638219857646128, + "kl_loss": 0.27135467529296875, + "loss_ib": 0.007870672270655632, + "step": 2656 + }, + { + "ce_ib": 7.015442371368408, + "ce_orig": 1.3735243082046509, + "epoch": 0.7638219857646128, + "kl_loss": 0.26851505041122437, + "loss_ib": 0.00970059260725975, + "step": 2656 + }, + { + "ce_ib": 3.188746452331543, + "ce_orig": 0.6334740519523621, + "epoch": 0.7638219857646128, + "kl_loss": 0.13195723295211792, + "loss_ib": 0.004508318845182657, + "step": 2656 + }, + { + "ce_ib": 3.0526700019836426, + "ce_orig": 0.6682045459747314, + "epoch": 0.7638219857646128, + "kl_loss": 0.15357322990894318, + "loss_ib": 0.004588402342051268, + "step": 2656 + }, + { + "ce_ib": 5.548831462860107, + "ce_orig": 1.2949832677841187, + "epoch": 0.7641095693435905, + "kl_loss": 0.19394201040267944, + "loss_ib": 0.00748825166374445, + "step": 2657 + }, + { + "ce_ib": 3.3373570442199707, + "ce_orig": 0.7203373908996582, + "epoch": 0.7641095693435905, + "kl_loss": 0.166145920753479, + "loss_ib": 0.004998816177248955, + "step": 2657 + }, + { + "ce_ib": 5.508217811584473, + "ce_orig": 0.925467312335968, + "epoch": 0.7641095693435905, + "kl_loss": 0.22192956507205963, + "loss_ib": 0.007727513089776039, + "step": 2657 + }, + { + "ce_ib": 3.6190271377563477, + "ce_orig": 0.7876771092414856, + "epoch": 0.7641095693435905, + "kl_loss": 0.22831059992313385, + "loss_ib": 0.005902132950723171, + "step": 2657 + }, + { + "ce_ib": 5.261258125305176, + "ce_orig": 1.099668025970459, + "epoch": 0.7643971529225682, + "kl_loss": 0.22072085738182068, + "loss_ib": 0.007468466646969318, + "step": 2658 + }, + { + "ce_ib": 2.7173893451690674, + "ce_orig": 0.3713693916797638, + "epoch": 0.7643971529225682, + "kl_loss": 0.19928419589996338, + "loss_ib": 0.004710231442004442, + "step": 2658 + }, + { + "ce_ib": 3.163177251815796, + "ce_orig": 0.7582259178161621, + "epoch": 0.7643971529225682, + "kl_loss": 0.13620887696743011, + "loss_ib": 0.004525266122072935, + "step": 2658 + }, + { + "ce_ib": 3.594573736190796, + "ce_orig": 0.7633141279220581, + "epoch": 0.7643971529225682, + "kl_loss": 0.1725812703371048, + "loss_ib": 0.005320386029779911, + "step": 2658 + }, + { + "ce_ib": 5.594439506530762, + "ce_orig": 0.7502869367599487, + "epoch": 0.7646847365015458, + "kl_loss": 0.34577494859695435, + "loss_ib": 0.009052189067006111, + "step": 2659 + }, + { + "ce_ib": 3.0249269008636475, + "ce_orig": 0.6821627020835876, + "epoch": 0.7646847365015458, + "kl_loss": 0.19836491346359253, + "loss_ib": 0.005008575972169638, + "step": 2659 + }, + { + "ce_ib": 2.5430805683135986, + "ce_orig": 0.6584339737892151, + "epoch": 0.7646847365015458, + "kl_loss": 0.1519394814968109, + "loss_ib": 0.004062475636601448, + "step": 2659 + }, + { + "ce_ib": 3.3861875534057617, + "ce_orig": 0.784185528755188, + "epoch": 0.7646847365015458, + "kl_loss": 0.14595624804496765, + "loss_ib": 0.00484575005248189, + "step": 2659 + }, + { + "epoch": 0.7649723200805234, + "grad_norm": 0.15413770079612732, + "learning_rate": 8.737799381280667e-06, + "loss": 0.8042, + "step": 2660 + }, + { + "ce_ib": 3.68664813041687, + "ce_orig": 0.7469617128372192, + "epoch": 0.7649723200805234, + "kl_loss": 0.21648356318473816, + "loss_ib": 0.005851483903825283, + "step": 2660 + }, + { + "ce_ib": 4.369641304016113, + "ce_orig": 0.7228782176971436, + "epoch": 0.7649723200805234, + "kl_loss": 0.20795339345932007, + "loss_ib": 0.006449174601584673, + "step": 2660 + }, + { + "ce_ib": 3.7817766666412354, + "ce_orig": 0.4416484236717224, + "epoch": 0.7649723200805234, + "kl_loss": 0.26821184158325195, + "loss_ib": 0.006463894620537758, + "step": 2660 + }, + { + "ce_ib": 2.5166189670562744, + "ce_orig": 0.5488329529762268, + "epoch": 0.7649723200805234, + "kl_loss": 0.30919384956359863, + "loss_ib": 0.005608557257801294, + "step": 2660 + }, + { + "ce_ib": 5.467869758605957, + "ce_orig": 1.0757412910461426, + "epoch": 0.765259903659501, + "kl_loss": 0.3027237057685852, + "loss_ib": 0.008495106361806393, + "step": 2661 + }, + { + "ce_ib": 3.8150339126586914, + "ce_orig": 0.9729740023612976, + "epoch": 0.765259903659501, + "kl_loss": 0.21719518303871155, + "loss_ib": 0.005986986216157675, + "step": 2661 + }, + { + "ce_ib": 3.6948885917663574, + "ce_orig": 0.8232192397117615, + "epoch": 0.765259903659501, + "kl_loss": 0.17612913250923157, + "loss_ib": 0.005456179846078157, + "step": 2661 + }, + { + "ce_ib": 5.747403621673584, + "ce_orig": 1.3938367366790771, + "epoch": 0.765259903659501, + "kl_loss": 0.27180594205856323, + "loss_ib": 0.008465462364256382, + "step": 2661 + }, + { + "ce_ib": 4.529789447784424, + "ce_orig": 0.4549812376499176, + "epoch": 0.7655474872384787, + "kl_loss": 0.21600869297981262, + "loss_ib": 0.006689876317977905, + "step": 2662 + }, + { + "ce_ib": 6.6778974533081055, + "ce_orig": 1.4586409330368042, + "epoch": 0.7655474872384787, + "kl_loss": 0.18687503039836884, + "loss_ib": 0.008546647615730762, + "step": 2662 + }, + { + "ce_ib": 4.483046531677246, + "ce_orig": 0.8692341446876526, + "epoch": 0.7655474872384787, + "kl_loss": 0.15105444192886353, + "loss_ib": 0.0059935906901955605, + "step": 2662 + }, + { + "ce_ib": 4.478153228759766, + "ce_orig": 0.9578176736831665, + "epoch": 0.7655474872384787, + "kl_loss": 0.40023258328437805, + "loss_ib": 0.008480479009449482, + "step": 2662 + }, + { + "ce_ib": 3.3699424266815186, + "ce_orig": 0.4658781588077545, + "epoch": 0.7658350708174563, + "kl_loss": 0.15822994709014893, + "loss_ib": 0.004952242132276297, + "step": 2663 + }, + { + "ce_ib": 5.243926048278809, + "ce_orig": 0.8241167664527893, + "epoch": 0.7658350708174563, + "kl_loss": 0.25469616055488586, + "loss_ib": 0.007790887262672186, + "step": 2663 + }, + { + "ce_ib": 2.4107582569122314, + "ce_orig": 0.6151766180992126, + "epoch": 0.7658350708174563, + "kl_loss": 0.21850070357322693, + "loss_ib": 0.004595765378326178, + "step": 2663 + }, + { + "ce_ib": 3.134808301925659, + "ce_orig": 0.856974184513092, + "epoch": 0.7658350708174563, + "kl_loss": 0.1560961902141571, + "loss_ib": 0.004695769865065813, + "step": 2663 + }, + { + "ce_ib": 5.586699962615967, + "ce_orig": 0.8920469284057617, + "epoch": 0.766122654396434, + "kl_loss": 0.2066674381494522, + "loss_ib": 0.007653373759239912, + "step": 2664 + }, + { + "ce_ib": 7.19252347946167, + "ce_orig": 1.8758338689804077, + "epoch": 0.766122654396434, + "kl_loss": 0.2390487790107727, + "loss_ib": 0.009583011269569397, + "step": 2664 + }, + { + "ce_ib": 5.645152568817139, + "ce_orig": 1.3952891826629639, + "epoch": 0.766122654396434, + "kl_loss": 0.19645075500011444, + "loss_ib": 0.007609660271555185, + "step": 2664 + }, + { + "ce_ib": 4.852270603179932, + "ce_orig": 0.5818642377853394, + "epoch": 0.766122654396434, + "kl_loss": 0.24616771936416626, + "loss_ib": 0.007313947658985853, + "step": 2664 + }, + { + "epoch": 0.7664102379754116, + "grad_norm": 0.13037721812725067, + "learning_rate": 8.732640166509238e-06, + "loss": 0.8601, + "step": 2665 + }, + { + "ce_ib": 2.390371084213257, + "ce_orig": 0.404838889837265, + "epoch": 0.7664102379754116, + "kl_loss": 0.15580779314041138, + "loss_ib": 0.003948448691517115, + "step": 2665 + }, + { + "ce_ib": 3.723391056060791, + "ce_orig": 0.7972869873046875, + "epoch": 0.7664102379754116, + "kl_loss": 0.16510701179504395, + "loss_ib": 0.00537446141242981, + "step": 2665 + }, + { + "ce_ib": 3.4980363845825195, + "ce_orig": 0.5924327969551086, + "epoch": 0.7664102379754116, + "kl_loss": 0.14756357669830322, + "loss_ib": 0.004973672330379486, + "step": 2665 + }, + { + "ce_ib": 6.056390762329102, + "ce_orig": 0.998401403427124, + "epoch": 0.7664102379754116, + "kl_loss": 0.20627082884311676, + "loss_ib": 0.008119098842144012, + "step": 2665 + }, + { + "ce_ib": 4.907528877258301, + "ce_orig": 0.7421924471855164, + "epoch": 0.7666978215543893, + "kl_loss": 0.13271337747573853, + "loss_ib": 0.0062346626073122025, + "step": 2666 + }, + { + "ce_ib": 6.988088607788086, + "ce_orig": 1.4784311056137085, + "epoch": 0.7666978215543893, + "kl_loss": 0.1762663722038269, + "loss_ib": 0.008750751614570618, + "step": 2666 + }, + { + "ce_ib": 3.0783910751342773, + "ce_orig": 0.6941865682601929, + "epoch": 0.7666978215543893, + "kl_loss": 0.22743719816207886, + "loss_ib": 0.005352762993425131, + "step": 2666 + }, + { + "ce_ib": 5.78221321105957, + "ce_orig": 1.1571283340454102, + "epoch": 0.7666978215543893, + "kl_loss": 0.17562153935432434, + "loss_ib": 0.0075384280644357204, + "step": 2666 + }, + { + "ce_ib": 5.119140148162842, + "ce_orig": 1.0265793800354004, + "epoch": 0.7669854051333669, + "kl_loss": 0.182699516415596, + "loss_ib": 0.006946134846657515, + "step": 2667 + }, + { + "ce_ib": 3.842658281326294, + "ce_orig": 0.7085807919502258, + "epoch": 0.7669854051333669, + "kl_loss": 0.22759315371513367, + "loss_ib": 0.006118589546531439, + "step": 2667 + }, + { + "ce_ib": 3.052748680114746, + "ce_orig": 0.7917462587356567, + "epoch": 0.7669854051333669, + "kl_loss": 0.1337980479001999, + "loss_ib": 0.004390729125589132, + "step": 2667 + }, + { + "ce_ib": 5.534055233001709, + "ce_orig": 1.0489271879196167, + "epoch": 0.7669854051333669, + "kl_loss": 0.16357935965061188, + "loss_ib": 0.007169848307967186, + "step": 2667 + }, + { + "ce_ib": 2.960449457168579, + "ce_orig": 0.8261027336120605, + "epoch": 0.7672729887123445, + "kl_loss": 0.18544867634773254, + "loss_ib": 0.004814936313778162, + "step": 2668 + }, + { + "ce_ib": 2.4196560382843018, + "ce_orig": 0.6661205887794495, + "epoch": 0.7672729887123445, + "kl_loss": 0.14346888661384583, + "loss_ib": 0.003854345064610243, + "step": 2668 + }, + { + "ce_ib": 4.377885818481445, + "ce_orig": 1.2066311836242676, + "epoch": 0.7672729887123445, + "kl_loss": 0.17611011862754822, + "loss_ib": 0.00613898690789938, + "step": 2668 + }, + { + "ce_ib": 3.01912784576416, + "ce_orig": 0.7927043437957764, + "epoch": 0.7672729887123445, + "kl_loss": 0.18893393874168396, + "loss_ib": 0.004908467177301645, + "step": 2668 + }, + { + "ce_ib": 2.370398759841919, + "ce_orig": 0.4703998565673828, + "epoch": 0.7675605722913221, + "kl_loss": 0.19997119903564453, + "loss_ib": 0.00437011057510972, + "step": 2669 + }, + { + "ce_ib": 2.9021122455596924, + "ce_orig": 0.6094480752944946, + "epoch": 0.7675605722913221, + "kl_loss": 0.232709139585495, + "loss_ib": 0.005229203496128321, + "step": 2669 + }, + { + "ce_ib": 2.3632373809814453, + "ce_orig": 0.6607572436332703, + "epoch": 0.7675605722913221, + "kl_loss": 0.22494138777256012, + "loss_ib": 0.004612651187926531, + "step": 2669 + }, + { + "ce_ib": 5.757905006408691, + "ce_orig": 1.0790709257125854, + "epoch": 0.7675605722913221, + "kl_loss": 0.17883586883544922, + "loss_ib": 0.007546263746917248, + "step": 2669 + }, + { + "epoch": 0.7678481558702998, + "grad_norm": 0.11361628025770187, + "learning_rate": 8.727471958941285e-06, + "loss": 0.8366, + "step": 2670 + }, + { + "ce_ib": 4.434467315673828, + "ce_orig": 0.8696165084838867, + "epoch": 0.7678481558702998, + "kl_loss": 0.20124205946922302, + "loss_ib": 0.006446887739002705, + "step": 2670 + }, + { + "ce_ib": 4.032402038574219, + "ce_orig": 0.7612284421920776, + "epoch": 0.7678481558702998, + "kl_loss": 0.21410904824733734, + "loss_ib": 0.006173492409288883, + "step": 2670 + }, + { + "ce_ib": 2.4475035667419434, + "ce_orig": 0.7115465998649597, + "epoch": 0.7678481558702998, + "kl_loss": 0.12265248596668243, + "loss_ib": 0.003674028441309929, + "step": 2670 + }, + { + "ce_ib": 5.633935451507568, + "ce_orig": 0.5377886891365051, + "epoch": 0.7678481558702998, + "kl_loss": 0.17566078901290894, + "loss_ib": 0.007390542887151241, + "step": 2670 + }, + { + "ce_ib": 5.355801582336426, + "ce_orig": 1.1064484119415283, + "epoch": 0.7681357394492775, + "kl_loss": 0.17070221900939941, + "loss_ib": 0.007062823511660099, + "step": 2671 + }, + { + "ce_ib": 3.595139265060425, + "ce_orig": 0.26594236493110657, + "epoch": 0.7681357394492775, + "kl_loss": 0.15930229425430298, + "loss_ib": 0.005188162438571453, + "step": 2671 + }, + { + "ce_ib": 4.590541362762451, + "ce_orig": 0.9834463000297546, + "epoch": 0.7681357394492775, + "kl_loss": 0.18045327067375183, + "loss_ib": 0.00639507407322526, + "step": 2671 + }, + { + "ce_ib": 5.167852401733398, + "ce_orig": 1.2499750852584839, + "epoch": 0.7681357394492775, + "kl_loss": 0.16169065237045288, + "loss_ib": 0.006784758996218443, + "step": 2671 + }, + { + "ce_ib": 6.117361545562744, + "ce_orig": 0.6421705484390259, + "epoch": 0.7684233230282551, + "kl_loss": 0.1670074760913849, + "loss_ib": 0.007787436246871948, + "step": 2672 + }, + { + "ce_ib": 4.517022132873535, + "ce_orig": 1.174759030342102, + "epoch": 0.7684233230282551, + "kl_loss": 0.18228118121623993, + "loss_ib": 0.006339833606034517, + "step": 2672 + }, + { + "ce_ib": 4.1177077293396, + "ce_orig": 0.31198203563690186, + "epoch": 0.7684233230282551, + "kl_loss": 0.2486502230167389, + "loss_ib": 0.006604210007935762, + "step": 2672 + }, + { + "ce_ib": 5.125777721405029, + "ce_orig": 0.907699704170227, + "epoch": 0.7684233230282551, + "kl_loss": 0.17626668512821198, + "loss_ib": 0.006888444069772959, + "step": 2672 + }, + { + "ce_ib": 4.686802387237549, + "ce_orig": 0.6721720695495605, + "epoch": 0.7687109066072327, + "kl_loss": 0.31664857268333435, + "loss_ib": 0.00785328820347786, + "step": 2673 + }, + { + "ce_ib": 3.4699769020080566, + "ce_orig": 0.85077965259552, + "epoch": 0.7687109066072327, + "kl_loss": 0.13537606596946716, + "loss_ib": 0.004823737777769566, + "step": 2673 + }, + { + "ce_ib": 7.3906989097595215, + "ce_orig": 1.7384141683578491, + "epoch": 0.7687109066072327, + "kl_loss": 0.21663859486579895, + "loss_ib": 0.009557085111737251, + "step": 2673 + }, + { + "ce_ib": 2.644005298614502, + "ce_orig": 0.5467746257781982, + "epoch": 0.7687109066072327, + "kl_loss": 0.1762668937444687, + "loss_ib": 0.004406674299389124, + "step": 2673 + }, + { + "ce_ib": 4.088390350341797, + "ce_orig": 0.8603402972221375, + "epoch": 0.7689984901862104, + "kl_loss": 0.2640787661075592, + "loss_ib": 0.0067291781306266785, + "step": 2674 + }, + { + "ce_ib": 6.988543510437012, + "ce_orig": 1.5728989839553833, + "epoch": 0.7689984901862104, + "kl_loss": 0.12621276080608368, + "loss_ib": 0.008250671438872814, + "step": 2674 + }, + { + "ce_ib": 0.9747673869132996, + "ce_orig": 0.18342220783233643, + "epoch": 0.7689984901862104, + "kl_loss": 0.4049503803253174, + "loss_ib": 0.005024271085858345, + "step": 2674 + }, + { + "ce_ib": 4.664572715759277, + "ce_orig": 0.8780193328857422, + "epoch": 0.7689984901862104, + "kl_loss": 0.2095623016357422, + "loss_ib": 0.00676019536331296, + "step": 2674 + }, + { + "epoch": 0.769286073765188, + "grad_norm": 0.12538056075572968, + "learning_rate": 8.722294771028216e-06, + "loss": 0.8794, + "step": 2675 + }, + { + "ce_ib": 5.025839805603027, + "ce_orig": 0.6151210069656372, + "epoch": 0.769286073765188, + "kl_loss": 0.19349908828735352, + "loss_ib": 0.0069608306512236595, + "step": 2675 + }, + { + "ce_ib": 3.750826597213745, + "ce_orig": 1.044618844985962, + "epoch": 0.769286073765188, + "kl_loss": 0.13355395197868347, + "loss_ib": 0.00508636562153697, + "step": 2675 + }, + { + "ce_ib": 4.547115802764893, + "ce_orig": 0.6408321857452393, + "epoch": 0.769286073765188, + "kl_loss": 0.39537888765335083, + "loss_ib": 0.008500904776155949, + "step": 2675 + }, + { + "ce_ib": 5.367480278015137, + "ce_orig": 1.2703197002410889, + "epoch": 0.769286073765188, + "kl_loss": 0.24346604943275452, + "loss_ib": 0.007802140433341265, + "step": 2675 + }, + { + "ce_ib": 8.094207763671875, + "ce_orig": 1.6508105993270874, + "epoch": 0.7695736573441656, + "kl_loss": 0.13613484799861908, + "loss_ib": 0.00945555604994297, + "step": 2676 + }, + { + "ce_ib": 3.2168569564819336, + "ce_orig": 0.6643295884132385, + "epoch": 0.7695736573441656, + "kl_loss": 0.22262844443321228, + "loss_ib": 0.005443141330033541, + "step": 2676 + }, + { + "ce_ib": 2.646818161010742, + "ce_orig": 0.7149338722229004, + "epoch": 0.7695736573441656, + "kl_loss": 0.12713196873664856, + "loss_ib": 0.003918137867003679, + "step": 2676 + }, + { + "ce_ib": 5.446217060089111, + "ce_orig": 1.2448493242263794, + "epoch": 0.7695736573441656, + "kl_loss": 0.2831036448478699, + "loss_ib": 0.00827725324779749, + "step": 2676 + }, + { + "ce_ib": 5.641241073608398, + "ce_orig": 1.1158215999603271, + "epoch": 0.7698612409231433, + "kl_loss": 0.21700063347816467, + "loss_ib": 0.007811246905475855, + "step": 2677 + }, + { + "ce_ib": 6.449841499328613, + "ce_orig": 1.0011471509933472, + "epoch": 0.7698612409231433, + "kl_loss": 0.25625520944595337, + "loss_ib": 0.009012393653392792, + "step": 2677 + }, + { + "ce_ib": 4.202730178833008, + "ce_orig": 0.8315361738204956, + "epoch": 0.7698612409231433, + "kl_loss": 0.1140250489115715, + "loss_ib": 0.00534298038110137, + "step": 2677 + }, + { + "ce_ib": 4.169776916503906, + "ce_orig": 0.5281667709350586, + "epoch": 0.7698612409231433, + "kl_loss": 0.18761610984802246, + "loss_ib": 0.00604593800380826, + "step": 2677 + }, + { + "ce_ib": 3.636735677719116, + "ce_orig": 0.8471371531486511, + "epoch": 0.770148824502121, + "kl_loss": 0.19884130358695984, + "loss_ib": 0.005625148769468069, + "step": 2678 + }, + { + "ce_ib": 4.396966934204102, + "ce_orig": 0.8394135236740112, + "epoch": 0.770148824502121, + "kl_loss": 0.2063685804605484, + "loss_ib": 0.006460652686655521, + "step": 2678 + }, + { + "ce_ib": 4.3910393714904785, + "ce_orig": 0.9420214295387268, + "epoch": 0.770148824502121, + "kl_loss": 0.15118226408958435, + "loss_ib": 0.005902861710637808, + "step": 2678 + }, + { + "ce_ib": 3.944725751876831, + "ce_orig": 0.4956255853176117, + "epoch": 0.770148824502121, + "kl_loss": 0.27153757214546204, + "loss_ib": 0.006660101469606161, + "step": 2678 + }, + { + "ce_ib": 4.101735591888428, + "ce_orig": 0.7615695595741272, + "epoch": 0.7704364080810986, + "kl_loss": 0.14278002083301544, + "loss_ib": 0.005529535934329033, + "step": 2679 + }, + { + "ce_ib": 2.1670799255371094, + "ce_orig": 0.5569210052490234, + "epoch": 0.7704364080810986, + "kl_loss": 0.2390817254781723, + "loss_ib": 0.004557896871119738, + "step": 2679 + }, + { + "ce_ib": 3.0058376789093018, + "ce_orig": 0.5550844669342041, + "epoch": 0.7704364080810986, + "kl_loss": 0.16870659589767456, + "loss_ib": 0.004692903719842434, + "step": 2679 + }, + { + "ce_ib": 4.788110256195068, + "ce_orig": 1.0130534172058105, + "epoch": 0.7704364080810986, + "kl_loss": 0.2946546673774719, + "loss_ib": 0.007734656799584627, + "step": 2679 + }, + { + "epoch": 0.7707239916600762, + "grad_norm": 0.14219574630260468, + "learning_rate": 8.717108615243081e-06, + "loss": 0.8355, + "step": 2680 + }, + { + "ce_ib": 3.8282883167266846, + "ce_orig": 0.8506295084953308, + "epoch": 0.7707239916600762, + "kl_loss": 0.1139247715473175, + "loss_ib": 0.004967535845935345, + "step": 2680 + }, + { + "ce_ib": 3.728376865386963, + "ce_orig": 0.8837341666221619, + "epoch": 0.7707239916600762, + "kl_loss": 0.1808839738368988, + "loss_ib": 0.005537216551601887, + "step": 2680 + }, + { + "ce_ib": 5.85162353515625, + "ce_orig": 1.1746516227722168, + "epoch": 0.7707239916600762, + "kl_loss": 0.36976680159568787, + "loss_ib": 0.009549290873110294, + "step": 2680 + }, + { + "ce_ib": 2.7520511150360107, + "ce_orig": 0.6917570233345032, + "epoch": 0.7707239916600762, + "kl_loss": 0.13332661986351013, + "loss_ib": 0.004085317254066467, + "step": 2680 + }, + { + "ce_ib": 3.831477403640747, + "ce_orig": 0.8897028565406799, + "epoch": 0.7710115752390538, + "kl_loss": 0.2204003483057022, + "loss_ib": 0.006035481113940477, + "step": 2681 + }, + { + "ce_ib": 3.5157663822174072, + "ce_orig": 0.4195421040058136, + "epoch": 0.7710115752390538, + "kl_loss": 0.2423984557390213, + "loss_ib": 0.00593975093215704, + "step": 2681 + }, + { + "ce_ib": 2.704944372177124, + "ce_orig": 0.4097641408443451, + "epoch": 0.7710115752390538, + "kl_loss": 0.17569047212600708, + "loss_ib": 0.004461849108338356, + "step": 2681 + }, + { + "ce_ib": 4.788296222686768, + "ce_orig": 0.6093381643295288, + "epoch": 0.7710115752390538, + "kl_loss": 0.25679707527160645, + "loss_ib": 0.0073562669567763805, + "step": 2681 + }, + { + "ce_ib": 3.4643094539642334, + "ce_orig": 0.5594658255577087, + "epoch": 0.7712991588180315, + "kl_loss": 0.19462326169013977, + "loss_ib": 0.005410542245954275, + "step": 2682 + }, + { + "ce_ib": 4.234952449798584, + "ce_orig": 0.5192727446556091, + "epoch": 0.7712991588180315, + "kl_loss": 0.30812233686447144, + "loss_ib": 0.007316175382584333, + "step": 2682 + }, + { + "ce_ib": 4.707577705383301, + "ce_orig": 0.7339281439781189, + "epoch": 0.7712991588180315, + "kl_loss": 0.2206556648015976, + "loss_ib": 0.006914134602993727, + "step": 2682 + }, + { + "ce_ib": 4.856429576873779, + "ce_orig": 0.998317539691925, + "epoch": 0.7712991588180315, + "kl_loss": 0.2275029718875885, + "loss_ib": 0.007131459657102823, + "step": 2682 + }, + { + "ce_ib": 6.105445384979248, + "ce_orig": 1.3326770067214966, + "epoch": 0.7715867423970091, + "kl_loss": 0.21600939333438873, + "loss_ib": 0.008265539072453976, + "step": 2683 + }, + { + "ce_ib": 3.0221447944641113, + "ce_orig": 0.7252461910247803, + "epoch": 0.7715867423970091, + "kl_loss": 0.1576673686504364, + "loss_ib": 0.004598818253725767, + "step": 2683 + }, + { + "ce_ib": 6.782814025878906, + "ce_orig": 1.8366950750350952, + "epoch": 0.7715867423970091, + "kl_loss": 0.22897960245609283, + "loss_ib": 0.009072610177099705, + "step": 2683 + }, + { + "ce_ib": 4.057888031005859, + "ce_orig": 0.3861165940761566, + "epoch": 0.7715867423970091, + "kl_loss": 0.23608344793319702, + "loss_ib": 0.006418722681701183, + "step": 2683 + }, + { + "ce_ib": 5.7094621658325195, + "ce_orig": 1.4329556226730347, + "epoch": 0.7718743259759868, + "kl_loss": 0.2626643776893616, + "loss_ib": 0.00833610538393259, + "step": 2684 + }, + { + "ce_ib": 4.4984869956970215, + "ce_orig": 0.906755268573761, + "epoch": 0.7718743259759868, + "kl_loss": 0.18250368535518646, + "loss_ib": 0.006323523819446564, + "step": 2684 + }, + { + "ce_ib": 4.270473480224609, + "ce_orig": 0.6698318719863892, + "epoch": 0.7718743259759868, + "kl_loss": 0.19535252451896667, + "loss_ib": 0.006223998498171568, + "step": 2684 + }, + { + "ce_ib": 4.718249320983887, + "ce_orig": 0.6493656039237976, + "epoch": 0.7718743259759868, + "kl_loss": 0.15363658964633942, + "loss_ib": 0.0062546152621507645, + "step": 2684 + }, + { + "epoch": 0.7721619095549644, + "grad_norm": 0.13145041465759277, + "learning_rate": 8.711913504080534e-06, + "loss": 0.8587, + "step": 2685 + }, + { + "ce_ib": 3.149228572845459, + "ce_orig": 0.598234236240387, + "epoch": 0.7721619095549644, + "kl_loss": 0.133408784866333, + "loss_ib": 0.00448331655934453, + "step": 2685 + }, + { + "ce_ib": 4.333808898925781, + "ce_orig": 1.0598585605621338, + "epoch": 0.7721619095549644, + "kl_loss": 0.2111036777496338, + "loss_ib": 0.00644484581425786, + "step": 2685 + }, + { + "ce_ib": 4.468900203704834, + "ce_orig": 0.9862536191940308, + "epoch": 0.7721619095549644, + "kl_loss": 0.21621854603290558, + "loss_ib": 0.006631085649132729, + "step": 2685 + }, + { + "ce_ib": 5.794833660125732, + "ce_orig": 1.1927554607391357, + "epoch": 0.7721619095549644, + "kl_loss": 0.2481311410665512, + "loss_ib": 0.008276144973933697, + "step": 2685 + }, + { + "ce_ib": 4.997649669647217, + "ce_orig": 0.7067171335220337, + "epoch": 0.7724494931339421, + "kl_loss": 0.3280888795852661, + "loss_ib": 0.008278538472950459, + "step": 2686 + }, + { + "ce_ib": 2.3970680236816406, + "ce_orig": 0.6991992592811584, + "epoch": 0.7724494931339421, + "kl_loss": 0.14256328344345093, + "loss_ib": 0.0038227008190006018, + "step": 2686 + }, + { + "ce_ib": 6.031111240386963, + "ce_orig": 0.8539875745773315, + "epoch": 0.7724494931339421, + "kl_loss": 0.19747132062911987, + "loss_ib": 0.008005824871361256, + "step": 2686 + }, + { + "ce_ib": 4.010863304138184, + "ce_orig": 0.6465052962303162, + "epoch": 0.7724494931339421, + "kl_loss": 0.20366010069847107, + "loss_ib": 0.006047464441508055, + "step": 2686 + }, + { + "ce_ib": 2.681018352508545, + "ce_orig": 0.43267834186553955, + "epoch": 0.7727370767129197, + "kl_loss": 0.5051535964012146, + "loss_ib": 0.007732554338872433, + "step": 2687 + }, + { + "ce_ib": 3.966505765914917, + "ce_orig": 0.8605852127075195, + "epoch": 0.7727370767129197, + "kl_loss": 0.24231983721256256, + "loss_ib": 0.006389704067260027, + "step": 2687 + }, + { + "ce_ib": 4.944021701812744, + "ce_orig": 0.5461651086807251, + "epoch": 0.7727370767129197, + "kl_loss": 0.5547879934310913, + "loss_ib": 0.010491901077330112, + "step": 2687 + }, + { + "ce_ib": 5.731528282165527, + "ce_orig": 1.3357869386672974, + "epoch": 0.7727370767129197, + "kl_loss": 0.17668838798999786, + "loss_ib": 0.007498411927372217, + "step": 2687 + }, + { + "ce_ib": 2.473698854446411, + "ce_orig": 0.5978347659111023, + "epoch": 0.7730246602918973, + "kl_loss": 0.13884739577770233, + "loss_ib": 0.003862172830849886, + "step": 2688 + }, + { + "ce_ib": 3.3378286361694336, + "ce_orig": 0.7855865955352783, + "epoch": 0.7730246602918973, + "kl_loss": 0.15396198630332947, + "loss_ib": 0.004877448547631502, + "step": 2688 + }, + { + "ce_ib": 3.6066927909851074, + "ce_orig": 0.7304968237876892, + "epoch": 0.7730246602918973, + "kl_loss": 0.20514509081840515, + "loss_ib": 0.005658143665641546, + "step": 2688 + }, + { + "ce_ib": 5.274631023406982, + "ce_orig": 0.9876590371131897, + "epoch": 0.7730246602918973, + "kl_loss": 0.2101043462753296, + "loss_ib": 0.0073756747879087925, + "step": 2688 + }, + { + "ce_ib": 3.020888328552246, + "ce_orig": 0.703035295009613, + "epoch": 0.7733122438708749, + "kl_loss": 0.27102380990982056, + "loss_ib": 0.005731125827878714, + "step": 2689 + }, + { + "ce_ib": 4.531244277954102, + "ce_orig": 0.986247718334198, + "epoch": 0.7733122438708749, + "kl_loss": 0.1349295675754547, + "loss_ib": 0.005880539771169424, + "step": 2689 + }, + { + "ce_ib": 4.474483013153076, + "ce_orig": 0.7355599999427795, + "epoch": 0.7733122438708749, + "kl_loss": 0.23691584169864655, + "loss_ib": 0.006843641400337219, + "step": 2689 + }, + { + "ce_ib": 5.56644868850708, + "ce_orig": 0.7205083966255188, + "epoch": 0.7733122438708749, + "kl_loss": 0.17553767561912537, + "loss_ib": 0.007321824785321951, + "step": 2689 + }, + { + "epoch": 0.7735998274498526, + "grad_norm": 0.1379229575395584, + "learning_rate": 8.706709450056803e-06, + "loss": 0.845, + "step": 2690 + }, + { + "ce_ib": 6.142656326293945, + "ce_orig": 1.026603102684021, + "epoch": 0.7735998274498526, + "kl_loss": 0.20974117517471313, + "loss_ib": 0.008240067400038242, + "step": 2690 + }, + { + "ce_ib": 5.674263000488281, + "ce_orig": 1.0867112874984741, + "epoch": 0.7735998274498526, + "kl_loss": 0.20860444009304047, + "loss_ib": 0.007760307751595974, + "step": 2690 + }, + { + "ce_ib": 4.820021629333496, + "ce_orig": 1.105331540107727, + "epoch": 0.7735998274498526, + "kl_loss": 0.1474853754043579, + "loss_ib": 0.00629487494006753, + "step": 2690 + }, + { + "ce_ib": 3.3739027976989746, + "ce_orig": 0.5637878775596619, + "epoch": 0.7735998274498526, + "kl_loss": 0.17478325963020325, + "loss_ib": 0.005121735390275717, + "step": 2690 + }, + { + "ce_ib": 2.8170979022979736, + "ce_orig": 0.7207901477813721, + "epoch": 0.7738874110288303, + "kl_loss": 0.15122771263122559, + "loss_ib": 0.0043293749913573265, + "step": 2691 + }, + { + "ce_ib": 5.264162063598633, + "ce_orig": 0.8226876258850098, + "epoch": 0.7738874110288303, + "kl_loss": 0.23614515364170074, + "loss_ib": 0.0076256138272583485, + "step": 2691 + }, + { + "ce_ib": 3.327314853668213, + "ce_orig": 0.6425967812538147, + "epoch": 0.7738874110288303, + "kl_loss": 0.17430998384952545, + "loss_ib": 0.0050704143941402435, + "step": 2691 + }, + { + "ce_ib": 5.2930779457092285, + "ce_orig": 0.5656704306602478, + "epoch": 0.7738874110288303, + "kl_loss": 0.3638197183609009, + "loss_ib": 0.008931274525821209, + "step": 2691 + }, + { + "ce_ib": 4.4129638671875, + "ce_orig": 1.181778073310852, + "epoch": 0.7741749946078079, + "kl_loss": 0.13155078887939453, + "loss_ib": 0.0057284715585410595, + "step": 2692 + }, + { + "ce_ib": 6.004623889923096, + "ce_orig": 1.0219248533248901, + "epoch": 0.7741749946078079, + "kl_loss": 0.2388216108083725, + "loss_ib": 0.008392839692533016, + "step": 2692 + }, + { + "ce_ib": 3.07499623298645, + "ce_orig": 0.688065230846405, + "epoch": 0.7741749946078079, + "kl_loss": 0.1302335411310196, + "loss_ib": 0.004377331584692001, + "step": 2692 + }, + { + "ce_ib": 2.654501438140869, + "ce_orig": 0.507672131061554, + "epoch": 0.7741749946078079, + "kl_loss": 0.20411497354507446, + "loss_ib": 0.00469565112143755, + "step": 2692 + }, + { + "ce_ib": 2.358966827392578, + "ce_orig": 0.6751900315284729, + "epoch": 0.7744625781867855, + "kl_loss": 0.11738178133964539, + "loss_ib": 0.0035327845253050327, + "step": 2693 + }, + { + "ce_ib": 4.166375637054443, + "ce_orig": 0.6949523091316223, + "epoch": 0.7744625781867855, + "kl_loss": 0.35711896419525146, + "loss_ib": 0.007737564854323864, + "step": 2693 + }, + { + "ce_ib": 5.04532527923584, + "ce_orig": 0.9932501316070557, + "epoch": 0.7744625781867855, + "kl_loss": 0.2069970965385437, + "loss_ib": 0.007115296088159084, + "step": 2693 + }, + { + "ce_ib": 3.8409881591796875, + "ce_orig": 0.813295304775238, + "epoch": 0.7744625781867855, + "kl_loss": 0.13509809970855713, + "loss_ib": 0.005191969219595194, + "step": 2693 + }, + { + "ce_ib": 2.594104528427124, + "ce_orig": 0.6908443570137024, + "epoch": 0.7747501617657632, + "kl_loss": 0.1336483657360077, + "loss_ib": 0.003930588252842426, + "step": 2694 + }, + { + "ce_ib": 3.590452194213867, + "ce_orig": 0.6640672087669373, + "epoch": 0.7747501617657632, + "kl_loss": 0.22425517439842224, + "loss_ib": 0.005833004135638475, + "step": 2694 + }, + { + "ce_ib": 4.201746463775635, + "ce_orig": 0.8755315542221069, + "epoch": 0.7747501617657632, + "kl_loss": 0.18735718727111816, + "loss_ib": 0.0060753184370696545, + "step": 2694 + }, + { + "ce_ib": 3.4719390869140625, + "ce_orig": 0.90131676197052, + "epoch": 0.7747501617657632, + "kl_loss": 0.12206269055604935, + "loss_ib": 0.004692566115409136, + "step": 2694 + }, + { + "epoch": 0.7750377453447408, + "grad_norm": 0.12173628807067871, + "learning_rate": 8.701496465709658e-06, + "loss": 0.8655, + "step": 2695 + }, + { + "ce_ib": 3.3879430294036865, + "ce_orig": 0.4214743375778198, + "epoch": 0.7750377453447408, + "kl_loss": 0.22775623202323914, + "loss_ib": 0.005665505304932594, + "step": 2695 + }, + { + "ce_ib": 5.650472164154053, + "ce_orig": 1.2884719371795654, + "epoch": 0.7750377453447408, + "kl_loss": 0.19937117397785187, + "loss_ib": 0.0076441834680736065, + "step": 2695 + }, + { + "ce_ib": 4.519664764404297, + "ce_orig": 1.2139748334884644, + "epoch": 0.7750377453447408, + "kl_loss": 0.1951858550310135, + "loss_ib": 0.0064715235494077206, + "step": 2695 + }, + { + "ce_ib": 3.7043914794921875, + "ce_orig": 0.46516087651252747, + "epoch": 0.7750377453447408, + "kl_loss": 0.29766014218330383, + "loss_ib": 0.0066809928975999355, + "step": 2695 + }, + { + "ce_ib": 7.127460956573486, + "ce_orig": 1.0202349424362183, + "epoch": 0.7753253289237184, + "kl_loss": 0.17090535163879395, + "loss_ib": 0.008836514316499233, + "step": 2696 + }, + { + "ce_ib": 3.286564588546753, + "ce_orig": 0.8607140183448792, + "epoch": 0.7753253289237184, + "kl_loss": 0.21561230719089508, + "loss_ib": 0.005442687775939703, + "step": 2696 + }, + { + "ce_ib": 7.7063398361206055, + "ce_orig": 1.3946815729141235, + "epoch": 0.7753253289237184, + "kl_loss": 0.2705761790275574, + "loss_ib": 0.01041210163384676, + "step": 2696 + }, + { + "ce_ib": 4.268287181854248, + "ce_orig": 0.7079622149467468, + "epoch": 0.7753253289237184, + "kl_loss": 0.17713786661624908, + "loss_ib": 0.006039666011929512, + "step": 2696 + }, + { + "ce_ib": 4.962904453277588, + "ce_orig": 0.8362733125686646, + "epoch": 0.7756129125026962, + "kl_loss": 0.34171438217163086, + "loss_ib": 0.008380047976970673, + "step": 2697 + }, + { + "ce_ib": 2.954094886779785, + "ce_orig": 0.8275262713432312, + "epoch": 0.7756129125026962, + "kl_loss": 0.18495135009288788, + "loss_ib": 0.004803608637303114, + "step": 2697 + }, + { + "ce_ib": 6.413492202758789, + "ce_orig": 0.8163627982139587, + "epoch": 0.7756129125026962, + "kl_loss": 0.2912302017211914, + "loss_ib": 0.009325793944299221, + "step": 2697 + }, + { + "ce_ib": 2.805976629257202, + "ce_orig": 0.6467490792274475, + "epoch": 0.7756129125026962, + "kl_loss": 0.10465889424085617, + "loss_ib": 0.0038525655400007963, + "step": 2697 + }, + { + "ce_ib": 2.9664418697357178, + "ce_orig": 0.4171972870826721, + "epoch": 0.7759004960816738, + "kl_loss": 0.1846449077129364, + "loss_ib": 0.004812890663743019, + "step": 2698 + }, + { + "ce_ib": 3.7311689853668213, + "ce_orig": 0.7812168002128601, + "epoch": 0.7759004960816738, + "kl_loss": 0.1927301287651062, + "loss_ib": 0.005658470094203949, + "step": 2698 + }, + { + "ce_ib": 4.6271514892578125, + "ce_orig": 0.8795397281646729, + "epoch": 0.7759004960816738, + "kl_loss": 0.13556422293186188, + "loss_ib": 0.005982793401926756, + "step": 2698 + }, + { + "ce_ib": 3.8481667041778564, + "ce_orig": 0.6995415687561035, + "epoch": 0.7759004960816738, + "kl_loss": 0.18563339114189148, + "loss_ib": 0.005704500712454319, + "step": 2698 + }, + { + "ce_ib": 2.7853751182556152, + "ce_orig": 0.6338481307029724, + "epoch": 0.7761880796606514, + "kl_loss": 0.14729925990104675, + "loss_ib": 0.004258367698639631, + "step": 2699 + }, + { + "ce_ib": 7.1730732917785645, + "ce_orig": 1.6775718927383423, + "epoch": 0.7761880796606514, + "kl_loss": 0.2279331088066101, + "loss_ib": 0.009452404454350471, + "step": 2699 + }, + { + "ce_ib": 4.6817169189453125, + "ce_orig": 0.6098928451538086, + "epoch": 0.7761880796606514, + "kl_loss": 0.23269328474998474, + "loss_ib": 0.007008649408817291, + "step": 2699 + }, + { + "ce_ib": 6.569291591644287, + "ce_orig": 1.2347885370254517, + "epoch": 0.7761880796606514, + "kl_loss": 0.19254139065742493, + "loss_ib": 0.008494705893099308, + "step": 2699 + }, + { + "epoch": 0.776475663239629, + "grad_norm": 0.12347973138093948, + "learning_rate": 8.696274563598395e-06, + "loss": 0.8739, + "step": 2700 + }, + { + "ce_ib": 3.203612804412842, + "ce_orig": 0.4480385184288025, + "epoch": 0.776475663239629, + "kl_loss": 0.21258488297462463, + "loss_ib": 0.005329461768269539, + "step": 2700 + }, + { + "ce_ib": 4.213879585266113, + "ce_orig": 0.7582896947860718, + "epoch": 0.776475663239629, + "kl_loss": 0.18250152468681335, + "loss_ib": 0.006038894411176443, + "step": 2700 + }, + { + "ce_ib": 3.278160333633423, + "ce_orig": 0.9305478930473328, + "epoch": 0.776475663239629, + "kl_loss": 0.226128488779068, + "loss_ib": 0.005539445206522942, + "step": 2700 + }, + { + "ce_ib": 2.597438335418701, + "ce_orig": 0.7397278547286987, + "epoch": 0.776475663239629, + "kl_loss": 0.22017645835876465, + "loss_ib": 0.004799203015863895, + "step": 2700 + }, + { + "ce_ib": 2.916224718093872, + "ce_orig": 0.34332630038261414, + "epoch": 0.7767632468186066, + "kl_loss": 0.20001491904258728, + "loss_ib": 0.004916374105960131, + "step": 2701 + }, + { + "ce_ib": 4.902586460113525, + "ce_orig": 1.0312693119049072, + "epoch": 0.7767632468186066, + "kl_loss": 0.13715390861034393, + "loss_ib": 0.006274125538766384, + "step": 2701 + }, + { + "ce_ib": 5.039478778839111, + "ce_orig": 0.9197045564651489, + "epoch": 0.7767632468186066, + "kl_loss": 0.19457176327705383, + "loss_ib": 0.006985196843743324, + "step": 2701 + }, + { + "ce_ib": 6.869854927062988, + "ce_orig": 0.8780651688575745, + "epoch": 0.7767632468186066, + "kl_loss": 0.1970367580652237, + "loss_ib": 0.008840221911668777, + "step": 2701 + }, + { + "ce_ib": 5.269690036773682, + "ce_orig": 0.784523069858551, + "epoch": 0.7770508303975843, + "kl_loss": 0.18512074649333954, + "loss_ib": 0.007120897527784109, + "step": 2702 + }, + { + "ce_ib": 2.5189483165740967, + "ce_orig": 0.59951251745224, + "epoch": 0.7770508303975843, + "kl_loss": 0.14980730414390564, + "loss_ib": 0.004017021041363478, + "step": 2702 + }, + { + "ce_ib": 2.053790330886841, + "ce_orig": 0.43648841977119446, + "epoch": 0.7770508303975843, + "kl_loss": 0.14114448428153992, + "loss_ib": 0.0034652352333068848, + "step": 2702 + }, + { + "ce_ib": 5.934473991394043, + "ce_orig": 1.206220269203186, + "epoch": 0.7770508303975843, + "kl_loss": 0.19736945629119873, + "loss_ib": 0.007908168248832226, + "step": 2702 + }, + { + "ce_ib": 3.64151668548584, + "ce_orig": 0.7078453302383423, + "epoch": 0.7773384139765619, + "kl_loss": 0.11904677003622055, + "loss_ib": 0.004831984639167786, + "step": 2703 + }, + { + "ce_ib": 3.3232619762420654, + "ce_orig": 0.49810346961021423, + "epoch": 0.7773384139765619, + "kl_loss": 0.16154175996780396, + "loss_ib": 0.004938679747283459, + "step": 2703 + }, + { + "ce_ib": 6.1583099365234375, + "ce_orig": 1.2617319822311401, + "epoch": 0.7773384139765619, + "kl_loss": 0.15612542629241943, + "loss_ib": 0.007719564251601696, + "step": 2703 + }, + { + "ce_ib": 6.92116641998291, + "ce_orig": 1.3259848356246948, + "epoch": 0.7773384139765619, + "kl_loss": 0.12090645730495453, + "loss_ib": 0.008130230940878391, + "step": 2703 + }, + { + "ce_ib": 4.043232440948486, + "ce_orig": 0.554977297782898, + "epoch": 0.7776259975555396, + "kl_loss": 0.16329798102378845, + "loss_ib": 0.005676212254911661, + "step": 2704 + }, + { + "ce_ib": 5.904057502746582, + "ce_orig": 1.3732980489730835, + "epoch": 0.7776259975555396, + "kl_loss": 0.1736152172088623, + "loss_ib": 0.007640209514647722, + "step": 2704 + }, + { + "ce_ib": 1.660758376121521, + "ce_orig": 0.3187108635902405, + "epoch": 0.7776259975555396, + "kl_loss": 0.28603461384773254, + "loss_ib": 0.004521104507148266, + "step": 2704 + }, + { + "ce_ib": 5.045555591583252, + "ce_orig": 1.0134388208389282, + "epoch": 0.7776259975555396, + "kl_loss": 0.1769598126411438, + "loss_ib": 0.006815153639763594, + "step": 2704 + }, + { + "epoch": 0.7779135811345173, + "grad_norm": 0.12038562446832657, + "learning_rate": 8.691043756303783e-06, + "loss": 0.7671, + "step": 2705 + }, + { + "ce_ib": 3.5279858112335205, + "ce_orig": 0.8467187881469727, + "epoch": 0.7779135811345173, + "kl_loss": 0.23199772834777832, + "loss_ib": 0.005847963038831949, + "step": 2705 + }, + { + "ce_ib": 3.2106475830078125, + "ce_orig": 0.5957720875740051, + "epoch": 0.7779135811345173, + "kl_loss": 0.11117967963218689, + "loss_ib": 0.004322444554418325, + "step": 2705 + }, + { + "ce_ib": 3.568873405456543, + "ce_orig": 0.7546811103820801, + "epoch": 0.7779135811345173, + "kl_loss": 0.3638947606086731, + "loss_ib": 0.007207820657640696, + "step": 2705 + }, + { + "ce_ib": 4.330502986907959, + "ce_orig": 0.6962299942970276, + "epoch": 0.7779135811345173, + "kl_loss": 0.20825625956058502, + "loss_ib": 0.006413065828382969, + "step": 2705 + }, + { + "ce_ib": 4.074060916900635, + "ce_orig": 0.4406779706478119, + "epoch": 0.7782011647134949, + "kl_loss": 0.3436194658279419, + "loss_ib": 0.007510255556553602, + "step": 2706 + }, + { + "ce_ib": 5.668239116668701, + "ce_orig": 1.0138334035873413, + "epoch": 0.7782011647134949, + "kl_loss": 0.1889248788356781, + "loss_ib": 0.007557487115263939, + "step": 2706 + }, + { + "ce_ib": 5.497678756713867, + "ce_orig": 1.0756967067718506, + "epoch": 0.7782011647134949, + "kl_loss": 0.1771945059299469, + "loss_ib": 0.007269624155014753, + "step": 2706 + }, + { + "ce_ib": 4.6745195388793945, + "ce_orig": 0.606073260307312, + "epoch": 0.7782011647134949, + "kl_loss": 0.18823322653770447, + "loss_ib": 0.006556851789355278, + "step": 2706 + }, + { + "ce_ib": 5.023312568664551, + "ce_orig": 1.2428022623062134, + "epoch": 0.7784887482924725, + "kl_loss": 0.19273462891578674, + "loss_ib": 0.006950658280402422, + "step": 2707 + }, + { + "ce_ib": 3.142155170440674, + "ce_orig": 0.6934645771980286, + "epoch": 0.7784887482924725, + "kl_loss": 0.1817052662372589, + "loss_ib": 0.004959207493811846, + "step": 2707 + }, + { + "ce_ib": 3.929304361343384, + "ce_orig": 0.5754401087760925, + "epoch": 0.7784887482924725, + "kl_loss": 0.21322879195213318, + "loss_ib": 0.006061592139303684, + "step": 2707 + }, + { + "ce_ib": 2.999094247817993, + "ce_orig": 0.6005908846855164, + "epoch": 0.7784887482924725, + "kl_loss": 0.18800440430641174, + "loss_ib": 0.004879138432443142, + "step": 2707 + }, + { + "ce_ib": 4.866888999938965, + "ce_orig": 0.9780073761940002, + "epoch": 0.7787763318714501, + "kl_loss": 0.1526758223772049, + "loss_ib": 0.0063936468213796616, + "step": 2708 + }, + { + "ce_ib": 3.6466617584228516, + "ce_orig": 0.7223555445671082, + "epoch": 0.7787763318714501, + "kl_loss": 0.12220080196857452, + "loss_ib": 0.004868669901043177, + "step": 2708 + }, + { + "ce_ib": 2.942636728286743, + "ce_orig": 0.5459937453269958, + "epoch": 0.7787763318714501, + "kl_loss": 0.1917245090007782, + "loss_ib": 0.004859881941229105, + "step": 2708 + }, + { + "ce_ib": 3.027148723602295, + "ce_orig": 0.8216822743415833, + "epoch": 0.7787763318714501, + "kl_loss": 0.17816027998924255, + "loss_ib": 0.004808751400560141, + "step": 2708 + }, + { + "ce_ib": 4.645953178405762, + "ce_orig": 0.8390154838562012, + "epoch": 0.7790639154504277, + "kl_loss": 0.14945706725120544, + "loss_ib": 0.006140524055808783, + "step": 2709 + }, + { + "ce_ib": 6.468642711639404, + "ce_orig": 1.5108964443206787, + "epoch": 0.7790639154504277, + "kl_loss": 0.3112328052520752, + "loss_ib": 0.009580970741808414, + "step": 2709 + }, + { + "ce_ib": 2.9019930362701416, + "ce_orig": 0.7031728029251099, + "epoch": 0.7790639154504277, + "kl_loss": 0.1603904813528061, + "loss_ib": 0.004505897872149944, + "step": 2709 + }, + { + "ce_ib": 3.4052464962005615, + "ce_orig": 0.692156195640564, + "epoch": 0.7790639154504277, + "kl_loss": 0.11515810340642929, + "loss_ib": 0.004556827247142792, + "step": 2709 + }, + { + "epoch": 0.7793514990294054, + "grad_norm": 0.12613117694854736, + "learning_rate": 8.685804056428051e-06, + "loss": 0.8546, + "step": 2710 + }, + { + "ce_ib": 3.5062291622161865, + "ce_orig": 0.8018337488174438, + "epoch": 0.7793514990294054, + "kl_loss": 0.18911606073379517, + "loss_ib": 0.005397390108555555, + "step": 2710 + }, + { + "ce_ib": 5.01025390625, + "ce_orig": 1.0650445222854614, + "epoch": 0.7793514990294054, + "kl_loss": 0.1308707296848297, + "loss_ib": 0.0063189612701535225, + "step": 2710 + }, + { + "ce_ib": 5.010434627532959, + "ce_orig": 1.1510440111160278, + "epoch": 0.7793514990294054, + "kl_loss": 0.22114408016204834, + "loss_ib": 0.007221875712275505, + "step": 2710 + }, + { + "ce_ib": 2.953617811203003, + "ce_orig": 0.7106932401657104, + "epoch": 0.7793514990294054, + "kl_loss": 0.24088209867477417, + "loss_ib": 0.005362438969314098, + "step": 2710 + }, + { + "ce_ib": 1.7862536907196045, + "ce_orig": 0.3512114882469177, + "epoch": 0.7796390826083831, + "kl_loss": 0.16715696454048157, + "loss_ib": 0.0034578233025968075, + "step": 2711 + }, + { + "ce_ib": 7.503305912017822, + "ce_orig": 1.6962192058563232, + "epoch": 0.7796390826083831, + "kl_loss": 0.16802304983139038, + "loss_ib": 0.009183536283671856, + "step": 2711 + }, + { + "ce_ib": 2.8254716396331787, + "ce_orig": 0.5472760796546936, + "epoch": 0.7796390826083831, + "kl_loss": 0.188894122838974, + "loss_ib": 0.004714413080364466, + "step": 2711 + }, + { + "ce_ib": 3.5187113285064697, + "ce_orig": 0.8152872323989868, + "epoch": 0.7796390826083831, + "kl_loss": 0.26375612616539, + "loss_ib": 0.00615627272054553, + "step": 2711 + }, + { + "ce_ib": 2.8427863121032715, + "ce_orig": 0.5060200095176697, + "epoch": 0.7799266661873607, + "kl_loss": 0.39670461416244507, + "loss_ib": 0.006809832528233528, + "step": 2712 + }, + { + "ce_ib": 6.1493449211120605, + "ce_orig": 1.3759946823120117, + "epoch": 0.7799266661873607, + "kl_loss": 0.21549078822135925, + "loss_ib": 0.008304252289235592, + "step": 2712 + }, + { + "ce_ib": 5.895354747772217, + "ce_orig": 1.139642596244812, + "epoch": 0.7799266661873607, + "kl_loss": 0.21992060542106628, + "loss_ib": 0.008094561286270618, + "step": 2712 + }, + { + "ce_ib": 6.050610542297363, + "ce_orig": 1.0301744937896729, + "epoch": 0.7799266661873607, + "kl_loss": 0.183490589261055, + "loss_ib": 0.007885515689849854, + "step": 2712 + }, + { + "ce_ib": 3.088456630706787, + "ce_orig": 0.5854818820953369, + "epoch": 0.7802142497663384, + "kl_loss": 0.16750502586364746, + "loss_ib": 0.004763506818562746, + "step": 2713 + }, + { + "ce_ib": 6.339079856872559, + "ce_orig": 1.1519056558609009, + "epoch": 0.7802142497663384, + "kl_loss": 0.1994762420654297, + "loss_ib": 0.008333842270076275, + "step": 2713 + }, + { + "ce_ib": 3.188093423843384, + "ce_orig": 0.6260164976119995, + "epoch": 0.7802142497663384, + "kl_loss": 0.19529280066490173, + "loss_ib": 0.005141021683812141, + "step": 2713 + }, + { + "ce_ib": 3.5215060710906982, + "ce_orig": 0.8398208022117615, + "epoch": 0.7802142497663384, + "kl_loss": 0.23008506000041962, + "loss_ib": 0.005822356790304184, + "step": 2713 + }, + { + "ce_ib": 2.6426968574523926, + "ce_orig": 0.6388031244277954, + "epoch": 0.780501833345316, + "kl_loss": 0.14691084623336792, + "loss_ib": 0.004111804999411106, + "step": 2714 + }, + { + "ce_ib": 4.943881511688232, + "ce_orig": 0.831605076789856, + "epoch": 0.780501833345316, + "kl_loss": 0.2703228294849396, + "loss_ib": 0.007647110149264336, + "step": 2714 + }, + { + "ce_ib": 5.140976905822754, + "ce_orig": 1.3906036615371704, + "epoch": 0.780501833345316, + "kl_loss": 0.17738981544971466, + "loss_ib": 0.006914874538779259, + "step": 2714 + }, + { + "ce_ib": 6.2101898193359375, + "ce_orig": 1.120468020439148, + "epoch": 0.780501833345316, + "kl_loss": 0.3212350010871887, + "loss_ib": 0.009422539733350277, + "step": 2714 + }, + { + "epoch": 0.7807894169242936, + "grad_norm": 0.13475124537944794, + "learning_rate": 8.680555476594856e-06, + "loss": 0.8517, + "step": 2715 + }, + { + "ce_ib": 2.7186832427978516, + "ce_orig": 0.36960017681121826, + "epoch": 0.7807894169242936, + "kl_loss": 0.2547531723976135, + "loss_ib": 0.005266214720904827, + "step": 2715 + }, + { + "ce_ib": 4.612826347351074, + "ce_orig": 0.6630774736404419, + "epoch": 0.7807894169242936, + "kl_loss": 0.15206821262836456, + "loss_ib": 0.006133508402854204, + "step": 2715 + }, + { + "ce_ib": 3.0367722511291504, + "ce_orig": 0.4402415454387665, + "epoch": 0.7807894169242936, + "kl_loss": 0.19668874144554138, + "loss_ib": 0.005003659520298243, + "step": 2715 + }, + { + "ce_ib": 6.169906139373779, + "ce_orig": 1.0097582340240479, + "epoch": 0.7807894169242936, + "kl_loss": 0.16498789191246033, + "loss_ib": 0.007819784805178642, + "step": 2715 + }, + { + "ce_ib": 3.5743210315704346, + "ce_orig": 0.7624484896659851, + "epoch": 0.7810770005032712, + "kl_loss": 0.15138256549835205, + "loss_ib": 0.005088146775960922, + "step": 2716 + }, + { + "ce_ib": 1.110661506652832, + "ce_orig": 0.3755703270435333, + "epoch": 0.7810770005032712, + "kl_loss": 0.10201916098594666, + "loss_ib": 0.002130853245034814, + "step": 2716 + }, + { + "ce_ib": 4.4699177742004395, + "ce_orig": 0.8524041771888733, + "epoch": 0.7810770005032712, + "kl_loss": 0.2791486382484436, + "loss_ib": 0.00726140383630991, + "step": 2716 + }, + { + "ce_ib": 3.5189054012298584, + "ce_orig": 0.6458645462989807, + "epoch": 0.7810770005032712, + "kl_loss": 0.2172730714082718, + "loss_ib": 0.005691635888069868, + "step": 2716 + }, + { + "ce_ib": 5.652476787567139, + "ce_orig": 0.9217711687088013, + "epoch": 0.781364584082249, + "kl_loss": 0.2503940463066101, + "loss_ib": 0.00815641786903143, + "step": 2717 + }, + { + "ce_ib": 2.188689947128296, + "ce_orig": 0.5551097393035889, + "epoch": 0.781364584082249, + "kl_loss": 0.09967285394668579, + "loss_ib": 0.0031854184344410896, + "step": 2717 + }, + { + "ce_ib": 5.457345008850098, + "ce_orig": 0.9253448247909546, + "epoch": 0.781364584082249, + "kl_loss": 0.25344645977020264, + "loss_ib": 0.007991809397935867, + "step": 2717 + }, + { + "ce_ib": 2.250331163406372, + "ce_orig": 0.5643190741539001, + "epoch": 0.781364584082249, + "kl_loss": 0.12146709859371185, + "loss_ib": 0.0034650021698325872, + "step": 2717 + }, + { + "ce_ib": 4.525414943695068, + "ce_orig": 0.6623874306678772, + "epoch": 0.7816521676612266, + "kl_loss": 0.16599920392036438, + "loss_ib": 0.006185406818985939, + "step": 2718 + }, + { + "ce_ib": 3.6189305782318115, + "ce_orig": 0.9099918007850647, + "epoch": 0.7816521676612266, + "kl_loss": 0.11824986338615417, + "loss_ib": 0.004801428876817226, + "step": 2718 + }, + { + "ce_ib": 5.778247833251953, + "ce_orig": 0.8001450300216675, + "epoch": 0.7816521676612266, + "kl_loss": 0.2708320617675781, + "loss_ib": 0.008486567996442318, + "step": 2718 + }, + { + "ce_ib": 4.502006530761719, + "ce_orig": 0.9487282633781433, + "epoch": 0.7816521676612266, + "kl_loss": 0.24400541186332703, + "loss_ib": 0.006942060310393572, + "step": 2718 + }, + { + "ce_ib": 3.520664691925049, + "ce_orig": 0.6303805708885193, + "epoch": 0.7819397512402042, + "kl_loss": 0.18012355268001556, + "loss_ib": 0.005321900360286236, + "step": 2719 + }, + { + "ce_ib": 3.9685168266296387, + "ce_orig": 1.0573605298995972, + "epoch": 0.7819397512402042, + "kl_loss": 0.14996476471424103, + "loss_ib": 0.005468164570629597, + "step": 2719 + }, + { + "ce_ib": 6.472084045410156, + "ce_orig": 1.268609881401062, + "epoch": 0.7819397512402042, + "kl_loss": 0.1305822730064392, + "loss_ib": 0.007777906488627195, + "step": 2719 + }, + { + "ce_ib": 6.392467498779297, + "ce_orig": 1.3696576356887817, + "epoch": 0.7819397512402042, + "kl_loss": 0.22345314919948578, + "loss_ib": 0.008626999333500862, + "step": 2719 + }, + { + "epoch": 0.7822273348191818, + "grad_norm": 0.1507391482591629, + "learning_rate": 8.675298029449241e-06, + "loss": 0.9011, + "step": 2720 + }, + { + "ce_ib": 3.1935646533966064, + "ce_orig": 0.651075541973114, + "epoch": 0.7822273348191818, + "kl_loss": 0.19866079092025757, + "loss_ib": 0.005180172622203827, + "step": 2720 + }, + { + "ce_ib": 5.4806928634643555, + "ce_orig": 0.6173608899116516, + "epoch": 0.7822273348191818, + "kl_loss": 0.16603083908557892, + "loss_ib": 0.007141001056879759, + "step": 2720 + }, + { + "ce_ib": 5.80867338180542, + "ce_orig": 1.3396787643432617, + "epoch": 0.7822273348191818, + "kl_loss": 0.2363133430480957, + "loss_ib": 0.008171807043254375, + "step": 2720 + }, + { + "ce_ib": 4.558791160583496, + "ce_orig": 0.9239268898963928, + "epoch": 0.7822273348191818, + "kl_loss": 0.24151882529258728, + "loss_ib": 0.006973979528993368, + "step": 2720 + }, + { + "ce_ib": 2.8407487869262695, + "ce_orig": 0.7757731676101685, + "epoch": 0.7825149183981595, + "kl_loss": 0.29815295338630676, + "loss_ib": 0.005822278093546629, + "step": 2721 + }, + { + "ce_ib": 2.621760845184326, + "ce_orig": 0.39356735348701477, + "epoch": 0.7825149183981595, + "kl_loss": 0.2701706886291504, + "loss_ib": 0.005323467776179314, + "step": 2721 + }, + { + "ce_ib": 4.154554843902588, + "ce_orig": 0.8714709281921387, + "epoch": 0.7825149183981595, + "kl_loss": 0.2828247547149658, + "loss_ib": 0.006982801947742701, + "step": 2721 + }, + { + "ce_ib": 1.9910200834274292, + "ce_orig": 0.37471655011177063, + "epoch": 0.7825149183981595, + "kl_loss": 0.1833181083202362, + "loss_ib": 0.0038242011796683073, + "step": 2721 + }, + { + "ce_ib": 5.041821002960205, + "ce_orig": 0.9591912627220154, + "epoch": 0.7828025019771371, + "kl_loss": 0.18540039658546448, + "loss_ib": 0.006895824335515499, + "step": 2722 + }, + { + "ce_ib": 2.1645216941833496, + "ce_orig": 0.410467267036438, + "epoch": 0.7828025019771371, + "kl_loss": 0.20135310292243958, + "loss_ib": 0.004178052302449942, + "step": 2722 + }, + { + "ce_ib": 4.068915367126465, + "ce_orig": 0.6603286266326904, + "epoch": 0.7828025019771371, + "kl_loss": 0.1958611011505127, + "loss_ib": 0.006027526687830687, + "step": 2722 + }, + { + "ce_ib": 4.691650867462158, + "ce_orig": 0.9408314824104309, + "epoch": 0.7828025019771371, + "kl_loss": 0.1991593837738037, + "loss_ib": 0.0066832443699240685, + "step": 2722 + }, + { + "ce_ib": 2.230818271636963, + "ce_orig": 0.5093337297439575, + "epoch": 0.7830900855561147, + "kl_loss": 0.15979281067848206, + "loss_ib": 0.0038287462666630745, + "step": 2723 + }, + { + "ce_ib": 2.174407958984375, + "ce_orig": 0.4368442893028259, + "epoch": 0.7830900855561147, + "kl_loss": 0.11443396657705307, + "loss_ib": 0.003318747505545616, + "step": 2723 + }, + { + "ce_ib": 5.406113624572754, + "ce_orig": 1.226242184638977, + "epoch": 0.7830900855561147, + "kl_loss": 0.15773087739944458, + "loss_ib": 0.006983422674238682, + "step": 2723 + }, + { + "ce_ib": 3.943209409713745, + "ce_orig": 0.4984041452407837, + "epoch": 0.7830900855561147, + "kl_loss": 0.3206872344017029, + "loss_ib": 0.00715008145198226, + "step": 2723 + }, + { + "ce_ib": 2.952493190765381, + "ce_orig": 0.6862987279891968, + "epoch": 0.7833776691350924, + "kl_loss": 0.16699567437171936, + "loss_ib": 0.00462244963273406, + "step": 2724 + }, + { + "ce_ib": 4.5579833984375, + "ce_orig": 1.098588228225708, + "epoch": 0.7833776691350924, + "kl_loss": 0.20472615957260132, + "loss_ib": 0.00660524470731616, + "step": 2724 + }, + { + "ce_ib": 2.937892198562622, + "ce_orig": 0.6423895359039307, + "epoch": 0.7833776691350924, + "kl_loss": 0.1455557942390442, + "loss_ib": 0.004393450450152159, + "step": 2724 + }, + { + "ce_ib": 4.520116329193115, + "ce_orig": 0.38412004709243774, + "epoch": 0.7833776691350924, + "kl_loss": 0.15677770972251892, + "loss_ib": 0.006087893154472113, + "step": 2724 + }, + { + "epoch": 0.7836652527140701, + "grad_norm": 0.12537802755832672, + "learning_rate": 8.670031727657619e-06, + "loss": 0.8167, + "step": 2725 + }, + { + "ce_ib": 5.611186981201172, + "ce_orig": 0.8701201677322388, + "epoch": 0.7836652527140701, + "kl_loss": 0.32899320125579834, + "loss_ib": 0.008901119232177734, + "step": 2725 + }, + { + "ce_ib": 1.9109997749328613, + "ce_orig": 0.43149980902671814, + "epoch": 0.7836652527140701, + "kl_loss": 0.2172943651676178, + "loss_ib": 0.004083943087607622, + "step": 2725 + }, + { + "ce_ib": 3.7204318046569824, + "ce_orig": 0.7511438131332397, + "epoch": 0.7836652527140701, + "kl_loss": 0.26910266280174255, + "loss_ib": 0.006411457899957895, + "step": 2725 + }, + { + "ce_ib": 3.3525071144104004, + "ce_orig": 0.5433449149131775, + "epoch": 0.7836652527140701, + "kl_loss": 0.21967625617980957, + "loss_ib": 0.00554926972836256, + "step": 2725 + }, + { + "ce_ib": 5.362212657928467, + "ce_orig": 0.874241054058075, + "epoch": 0.7839528362930477, + "kl_loss": 0.26424533128738403, + "loss_ib": 0.00800466537475586, + "step": 2726 + }, + { + "ce_ib": 3.239499568939209, + "ce_orig": 0.6779794096946716, + "epoch": 0.7839528362930477, + "kl_loss": 0.19285258650779724, + "loss_ib": 0.005168025381863117, + "step": 2726 + }, + { + "ce_ib": 4.798427104949951, + "ce_orig": 0.5724495053291321, + "epoch": 0.7839528362930477, + "kl_loss": 0.1949080228805542, + "loss_ib": 0.006747507490217686, + "step": 2726 + }, + { + "ce_ib": 5.448785305023193, + "ce_orig": 1.4226347208023071, + "epoch": 0.7839528362930477, + "kl_loss": 0.1756843626499176, + "loss_ib": 0.007205629255622625, + "step": 2726 + }, + { + "ce_ib": 3.9827942848205566, + "ce_orig": 0.6478061676025391, + "epoch": 0.7842404198720253, + "kl_loss": 0.17866963148117065, + "loss_ib": 0.005769490264356136, + "step": 2727 + }, + { + "ce_ib": 4.743083953857422, + "ce_orig": 0.741560697555542, + "epoch": 0.7842404198720253, + "kl_loss": 0.1720791906118393, + "loss_ib": 0.006463875528424978, + "step": 2727 + }, + { + "ce_ib": 6.274484634399414, + "ce_orig": 1.3918360471725464, + "epoch": 0.7842404198720253, + "kl_loss": 0.21030181646347046, + "loss_ib": 0.008377502672374249, + "step": 2727 + }, + { + "ce_ib": 3.4608652591705322, + "ce_orig": 0.841582715511322, + "epoch": 0.7842404198720253, + "kl_loss": 0.17983579635620117, + "loss_ib": 0.005259222816675901, + "step": 2727 + }, + { + "ce_ib": 7.517250061035156, + "ce_orig": 1.6494693756103516, + "epoch": 0.7845280034510029, + "kl_loss": 0.2609565258026123, + "loss_ib": 0.010126814246177673, + "step": 2728 + }, + { + "ce_ib": 5.135434150695801, + "ce_orig": 0.5945441126823425, + "epoch": 0.7845280034510029, + "kl_loss": 0.2340955138206482, + "loss_ib": 0.007476389408111572, + "step": 2728 + }, + { + "ce_ib": 5.948171615600586, + "ce_orig": 0.782850980758667, + "epoch": 0.7845280034510029, + "kl_loss": 0.13445721566677094, + "loss_ib": 0.007292743772268295, + "step": 2728 + }, + { + "ce_ib": 3.1776576042175293, + "ce_orig": 0.5856213569641113, + "epoch": 0.7845280034510029, + "kl_loss": 0.28212517499923706, + "loss_ib": 0.0059989094734191895, + "step": 2728 + }, + { + "ce_ib": 5.18125057220459, + "ce_orig": 1.0885370969772339, + "epoch": 0.7848155870299806, + "kl_loss": 0.20825104415416718, + "loss_ib": 0.007263761013746262, + "step": 2729 + }, + { + "ce_ib": 5.486889839172363, + "ce_orig": 0.9282345771789551, + "epoch": 0.7848155870299806, + "kl_loss": 0.24076706171035767, + "loss_ib": 0.007894560694694519, + "step": 2729 + }, + { + "ce_ib": 6.342306613922119, + "ce_orig": 1.2040321826934814, + "epoch": 0.7848155870299806, + "kl_loss": 0.23474547266960144, + "loss_ib": 0.0086897611618042, + "step": 2729 + }, + { + "ce_ib": 6.790159702301025, + "ce_orig": 1.2831542491912842, + "epoch": 0.7848155870299806, + "kl_loss": 0.22844929993152618, + "loss_ib": 0.009074652567505836, + "step": 2729 + }, + { + "epoch": 0.7851031706089582, + "grad_norm": 0.12554174661636353, + "learning_rate": 8.664756583907732e-06, + "loss": 0.8963, + "step": 2730 + }, + { + "ce_ib": 2.84965181350708, + "ce_orig": 0.7848037481307983, + "epoch": 0.7851031706089582, + "kl_loss": 0.24064317345619202, + "loss_ib": 0.005256083328276873, + "step": 2730 + }, + { + "ce_ib": 3.766002893447876, + "ce_orig": 0.7471505999565125, + "epoch": 0.7851031706089582, + "kl_loss": 0.15807577967643738, + "loss_ib": 0.005346760619431734, + "step": 2730 + }, + { + "ce_ib": 5.252348899841309, + "ce_orig": 1.28067147731781, + "epoch": 0.7851031706089582, + "kl_loss": 0.28510189056396484, + "loss_ib": 0.008103367872536182, + "step": 2730 + }, + { + "ce_ib": 4.7102227210998535, + "ce_orig": 1.0900607109069824, + "epoch": 0.7851031706089582, + "kl_loss": 0.19359925389289856, + "loss_ib": 0.006646214984357357, + "step": 2730 + }, + { + "ce_ib": 5.289003849029541, + "ce_orig": 1.2580071687698364, + "epoch": 0.7853907541879359, + "kl_loss": 0.19912269711494446, + "loss_ib": 0.007280230522155762, + "step": 2731 + }, + { + "ce_ib": 6.305051803588867, + "ce_orig": 1.3635590076446533, + "epoch": 0.7853907541879359, + "kl_loss": 0.2279193252325058, + "loss_ib": 0.00858424510806799, + "step": 2731 + }, + { + "ce_ib": 5.449383735656738, + "ce_orig": 0.7337322235107422, + "epoch": 0.7853907541879359, + "kl_loss": 0.12912139296531677, + "loss_ib": 0.006740597542375326, + "step": 2731 + }, + { + "ce_ib": 2.5998284816741943, + "ce_orig": 0.6283999085426331, + "epoch": 0.7853907541879359, + "kl_loss": 0.12789136171340942, + "loss_ib": 0.00387874199077487, + "step": 2731 + }, + { + "ce_ib": 2.894594669342041, + "ce_orig": 0.7734305262565613, + "epoch": 0.7856783377669135, + "kl_loss": 0.1547931581735611, + "loss_ib": 0.004442526027560234, + "step": 2732 + }, + { + "ce_ib": 3.5512070655822754, + "ce_orig": 0.7577702403068542, + "epoch": 0.7856783377669135, + "kl_loss": 0.13697198033332825, + "loss_ib": 0.0049209268763661385, + "step": 2732 + }, + { + "ce_ib": 4.628597736358643, + "ce_orig": 0.865829586982727, + "epoch": 0.7856783377669135, + "kl_loss": 0.11890053749084473, + "loss_ib": 0.005817602854222059, + "step": 2732 + }, + { + "ce_ib": 3.6676313877105713, + "ce_orig": 1.2243973016738892, + "epoch": 0.7856783377669135, + "kl_loss": 0.15291428565979004, + "loss_ib": 0.005196773912757635, + "step": 2732 + }, + { + "ce_ib": 6.416301727294922, + "ce_orig": 1.3570109605789185, + "epoch": 0.7859659213458912, + "kl_loss": 0.2397879660129547, + "loss_ib": 0.008814181201159954, + "step": 2733 + }, + { + "ce_ib": 4.670480728149414, + "ce_orig": 0.5524818301200867, + "epoch": 0.7859659213458912, + "kl_loss": 0.23820951581001282, + "loss_ib": 0.007052575703710318, + "step": 2733 + }, + { + "ce_ib": 3.8948171138763428, + "ce_orig": 0.6598958373069763, + "epoch": 0.7859659213458912, + "kl_loss": 0.19862905144691467, + "loss_ib": 0.0058811078779399395, + "step": 2733 + }, + { + "ce_ib": 2.4426281452178955, + "ce_orig": 0.35372820496559143, + "epoch": 0.7859659213458912, + "kl_loss": 0.29576531052589417, + "loss_ib": 0.005400280933827162, + "step": 2733 + }, + { + "ce_ib": 5.338825225830078, + "ce_orig": 1.4058363437652588, + "epoch": 0.7862535049248688, + "kl_loss": 0.18193553388118744, + "loss_ib": 0.007158180698752403, + "step": 2734 + }, + { + "ce_ib": 4.913614273071289, + "ce_orig": 0.6737449169158936, + "epoch": 0.7862535049248688, + "kl_loss": 0.27140572667121887, + "loss_ib": 0.007627671584486961, + "step": 2734 + }, + { + "ce_ib": 2.3888213634490967, + "ce_orig": 0.26473549008369446, + "epoch": 0.7862535049248688, + "kl_loss": 0.5566707849502563, + "loss_ib": 0.007955528795719147, + "step": 2734 + }, + { + "ce_ib": 4.576173305511475, + "ce_orig": 0.8816800117492676, + "epoch": 0.7862535049248688, + "kl_loss": 0.15599694848060608, + "loss_ib": 0.006136143114417791, + "step": 2734 + }, + { + "epoch": 0.7865410885038464, + "grad_norm": 0.1405750960111618, + "learning_rate": 8.659472610908628e-06, + "loss": 0.873, + "step": 2735 + }, + { + "ce_ib": 3.4770314693450928, + "ce_orig": 0.6534469723701477, + "epoch": 0.7865410885038464, + "kl_loss": 0.1717129498720169, + "loss_ib": 0.005194160621613264, + "step": 2735 + }, + { + "ce_ib": 4.225412845611572, + "ce_orig": 0.6392700672149658, + "epoch": 0.7865410885038464, + "kl_loss": 0.249902606010437, + "loss_ib": 0.00672443863004446, + "step": 2735 + }, + { + "ce_ib": 4.901531219482422, + "ce_orig": 0.9861379861831665, + "epoch": 0.7865410885038464, + "kl_loss": 0.17420238256454468, + "loss_ib": 0.006643555127084255, + "step": 2735 + }, + { + "ce_ib": 4.6517205238342285, + "ce_orig": 0.6390941739082336, + "epoch": 0.7865410885038464, + "kl_loss": 0.1649528592824936, + "loss_ib": 0.006301248911768198, + "step": 2735 + }, + { + "ce_ib": 2.2542264461517334, + "ce_orig": 0.45972391963005066, + "epoch": 0.786828672082824, + "kl_loss": 0.15816381573677063, + "loss_ib": 0.0038358645979315042, + "step": 2736 + }, + { + "ce_ib": 4.460427284240723, + "ce_orig": 0.525603175163269, + "epoch": 0.786828672082824, + "kl_loss": 0.21258553862571716, + "loss_ib": 0.006586282514035702, + "step": 2736 + }, + { + "ce_ib": 6.900876998901367, + "ce_orig": 1.6743699312210083, + "epoch": 0.786828672082824, + "kl_loss": 0.2404664158821106, + "loss_ib": 0.009305541403591633, + "step": 2736 + }, + { + "ce_ib": 2.672252655029297, + "ce_orig": 0.45290109515190125, + "epoch": 0.786828672082824, + "kl_loss": 0.1283743679523468, + "loss_ib": 0.0039559961296617985, + "step": 2736 + }, + { + "ce_ib": 3.5261294841766357, + "ce_orig": 0.6866245865821838, + "epoch": 0.7871162556618017, + "kl_loss": 0.14537011086940765, + "loss_ib": 0.004979830235242844, + "step": 2737 + }, + { + "ce_ib": 3.4089927673339844, + "ce_orig": 0.8131651878356934, + "epoch": 0.7871162556618017, + "kl_loss": 0.21698397397994995, + "loss_ib": 0.005578832700848579, + "step": 2737 + }, + { + "ce_ib": 3.7060933113098145, + "ce_orig": 0.9040982127189636, + "epoch": 0.7871162556618017, + "kl_loss": 0.1103254109621048, + "loss_ib": 0.0048093474470078945, + "step": 2737 + }, + { + "ce_ib": 4.779409885406494, + "ce_orig": 1.026601791381836, + "epoch": 0.7871162556618017, + "kl_loss": 0.15338656306266785, + "loss_ib": 0.006313275545835495, + "step": 2737 + }, + { + "ce_ib": 1.8242250680923462, + "ce_orig": 0.26663699746131897, + "epoch": 0.7874038392407794, + "kl_loss": 0.3459293246269226, + "loss_ib": 0.005283518228679895, + "step": 2738 + }, + { + "ce_ib": 3.9527640342712402, + "ce_orig": 1.009655237197876, + "epoch": 0.7874038392407794, + "kl_loss": 0.19118760526180267, + "loss_ib": 0.005864639766514301, + "step": 2738 + }, + { + "ce_ib": 5.1099534034729, + "ce_orig": 0.9213424921035767, + "epoch": 0.7874038392407794, + "kl_loss": 0.165008544921875, + "loss_ib": 0.006760038435459137, + "step": 2738 + }, + { + "ce_ib": 6.328514575958252, + "ce_orig": 1.234072208404541, + "epoch": 0.7874038392407794, + "kl_loss": 0.3562542498111725, + "loss_ib": 0.009891056455671787, + "step": 2738 + }, + { + "ce_ib": 2.8221323490142822, + "ce_orig": 0.5539727210998535, + "epoch": 0.787691422819757, + "kl_loss": 0.28737008571624756, + "loss_ib": 0.005695832893252373, + "step": 2739 + }, + { + "ce_ib": 4.947691917419434, + "ce_orig": 0.8832185864448547, + "epoch": 0.787691422819757, + "kl_loss": 0.24668726325035095, + "loss_ib": 0.007414564490318298, + "step": 2739 + }, + { + "ce_ib": 3.5642197132110596, + "ce_orig": 0.7994794249534607, + "epoch": 0.787691422819757, + "kl_loss": 0.1580054610967636, + "loss_ib": 0.005144274327903986, + "step": 2739 + }, + { + "ce_ib": 2.206226110458374, + "ce_orig": 0.3500339686870575, + "epoch": 0.787691422819757, + "kl_loss": 0.14829157292842865, + "loss_ib": 0.0036891417112201452, + "step": 2739 + }, + { + "epoch": 0.7879790063987346, + "grad_norm": 0.13288362324237823, + "learning_rate": 8.65417982139062e-06, + "loss": 0.8406, + "step": 2740 + }, + { + "ce_ib": 6.469200134277344, + "ce_orig": 1.4757155179977417, + "epoch": 0.7879790063987346, + "kl_loss": 0.24287369847297668, + "loss_ib": 0.008897936902940273, + "step": 2740 + }, + { + "ce_ib": 5.145862102508545, + "ce_orig": 1.0043734312057495, + "epoch": 0.7879790063987346, + "kl_loss": 0.18999546766281128, + "loss_ib": 0.007045816630125046, + "step": 2740 + }, + { + "ce_ib": 1.94945228099823, + "ce_orig": 0.4839635193347931, + "epoch": 0.7879790063987346, + "kl_loss": 0.17327482998371124, + "loss_ib": 0.003682200564071536, + "step": 2740 + }, + { + "ce_ib": 4.200349807739258, + "ce_orig": 0.8614667057991028, + "epoch": 0.7879790063987346, + "kl_loss": 0.2621985673904419, + "loss_ib": 0.006822335533797741, + "step": 2740 + }, + { + "ce_ib": 2.7876837253570557, + "ce_orig": 0.6414878964424133, + "epoch": 0.7882665899777123, + "kl_loss": 0.14853867888450623, + "loss_ib": 0.004273070488125086, + "step": 2741 + }, + { + "ce_ib": 4.544503211975098, + "ce_orig": 0.44164010882377625, + "epoch": 0.7882665899777123, + "kl_loss": 0.19419780373573303, + "loss_ib": 0.0064864810556173325, + "step": 2741 + }, + { + "ce_ib": 6.916110038757324, + "ce_orig": 1.4075812101364136, + "epoch": 0.7882665899777123, + "kl_loss": 0.278253972530365, + "loss_ib": 0.009698649868369102, + "step": 2741 + }, + { + "ce_ib": 3.5414652824401855, + "ce_orig": 0.5956655740737915, + "epoch": 0.7882665899777123, + "kl_loss": 0.1729031354188919, + "loss_ib": 0.005270496476441622, + "step": 2741 + }, + { + "ce_ib": 4.198173522949219, + "ce_orig": 0.7491302490234375, + "epoch": 0.7885541735566899, + "kl_loss": 0.2013479769229889, + "loss_ib": 0.0062116533517837524, + "step": 2742 + }, + { + "ce_ib": 4.3359551429748535, + "ce_orig": 0.8029128909111023, + "epoch": 0.7885541735566899, + "kl_loss": 0.18447285890579224, + "loss_ib": 0.006180683150887489, + "step": 2742 + }, + { + "ce_ib": 4.134951591491699, + "ce_orig": 0.5711294412612915, + "epoch": 0.7885541735566899, + "kl_loss": 0.23934298753738403, + "loss_ib": 0.0065283807925879955, + "step": 2742 + }, + { + "ce_ib": 3.1764371395111084, + "ce_orig": 0.4846685528755188, + "epoch": 0.7885541735566899, + "kl_loss": 0.12998847663402557, + "loss_ib": 0.0044763218611478806, + "step": 2742 + }, + { + "ce_ib": 2.4909403324127197, + "ce_orig": 0.35042428970336914, + "epoch": 0.7888417571356675, + "kl_loss": 0.16934789717197418, + "loss_ib": 0.0041844192892313, + "step": 2743 + }, + { + "ce_ib": 3.8890864849090576, + "ce_orig": 0.7771456241607666, + "epoch": 0.7888417571356675, + "kl_loss": 0.21861794590950012, + "loss_ib": 0.006075265817344189, + "step": 2743 + }, + { + "ce_ib": 3.966567277908325, + "ce_orig": 0.6934286952018738, + "epoch": 0.7888417571356675, + "kl_loss": 0.3333410620689392, + "loss_ib": 0.007299977354705334, + "step": 2743 + }, + { + "ce_ib": 3.649965524673462, + "ce_orig": 1.0220375061035156, + "epoch": 0.7888417571356675, + "kl_loss": 0.1598169207572937, + "loss_ib": 0.0052481344901025295, + "step": 2743 + }, + { + "ce_ib": 3.9928863048553467, + "ce_orig": 0.5848942995071411, + "epoch": 0.7891293407146452, + "kl_loss": 0.306313157081604, + "loss_ib": 0.007056017871946096, + "step": 2744 + }, + { + "ce_ib": 3.8919413089752197, + "ce_orig": 0.9523884654045105, + "epoch": 0.7891293407146452, + "kl_loss": 0.18412822484970093, + "loss_ib": 0.005733223631978035, + "step": 2744 + }, + { + "ce_ib": 2.3355541229248047, + "ce_orig": 0.5770450234413147, + "epoch": 0.7891293407146452, + "kl_loss": 0.3074193000793457, + "loss_ib": 0.005409747362136841, + "step": 2744 + }, + { + "ce_ib": 3.54097843170166, + "ce_orig": 0.5804538726806641, + "epoch": 0.7891293407146452, + "kl_loss": 0.2105836570262909, + "loss_ib": 0.005646815057843924, + "step": 2744 + }, + { + "epoch": 0.7894169242936229, + "grad_norm": 0.13089072704315186, + "learning_rate": 8.648878228105272e-06, + "loss": 0.7968, + "step": 2745 + }, + { + "ce_ib": 4.697009086608887, + "ce_orig": 0.6946346759796143, + "epoch": 0.7894169242936229, + "kl_loss": 0.22419506311416626, + "loss_ib": 0.006938959006220102, + "step": 2745 + }, + { + "ce_ib": 2.951273202896118, + "ce_orig": 0.48735567927360535, + "epoch": 0.7894169242936229, + "kl_loss": 0.18669840693473816, + "loss_ib": 0.004818257410079241, + "step": 2745 + }, + { + "ce_ib": 3.259666919708252, + "ce_orig": 0.6200994849205017, + "epoch": 0.7894169242936229, + "kl_loss": 0.13377158343791962, + "loss_ib": 0.004597382619976997, + "step": 2745 + }, + { + "ce_ib": 3.75490403175354, + "ce_orig": 0.7715207934379578, + "epoch": 0.7894169242936229, + "kl_loss": 0.20120392739772797, + "loss_ib": 0.005766943097114563, + "step": 2745 + }, + { + "ce_ib": 5.6369123458862305, + "ce_orig": 0.7197417616844177, + "epoch": 0.7897045078726005, + "kl_loss": 0.1298103779554367, + "loss_ib": 0.006935016252100468, + "step": 2746 + }, + { + "ce_ib": 6.9938483238220215, + "ce_orig": 1.3413105010986328, + "epoch": 0.7897045078726005, + "kl_loss": 0.21598517894744873, + "loss_ib": 0.009153700433671474, + "step": 2746 + }, + { + "ce_ib": 3.7235426902770996, + "ce_orig": 0.7142806053161621, + "epoch": 0.7897045078726005, + "kl_loss": 0.3376994729042053, + "loss_ib": 0.007100537419319153, + "step": 2746 + }, + { + "ce_ib": 7.694264888763428, + "ce_orig": 1.3566375970840454, + "epoch": 0.7897045078726005, + "kl_loss": 0.2324189394712448, + "loss_ib": 0.010018454864621162, + "step": 2746 + }, + { + "ce_ib": 3.4769294261932373, + "ce_orig": 0.7905767560005188, + "epoch": 0.7899920914515781, + "kl_loss": 0.19332806766033173, + "loss_ib": 0.005410209763795137, + "step": 2747 + }, + { + "ce_ib": 3.8334522247314453, + "ce_orig": 0.681187629699707, + "epoch": 0.7899920914515781, + "kl_loss": 0.1845042109489441, + "loss_ib": 0.005678493995219469, + "step": 2747 + }, + { + "ce_ib": 4.538810729980469, + "ce_orig": 1.0244139432907104, + "epoch": 0.7899920914515781, + "kl_loss": 0.17232194542884827, + "loss_ib": 0.006262030452489853, + "step": 2747 + }, + { + "ce_ib": 7.481337070465088, + "ce_orig": 1.3629653453826904, + "epoch": 0.7899920914515781, + "kl_loss": 0.12006182968616486, + "loss_ib": 0.008681954815983772, + "step": 2747 + }, + { + "ce_ib": 4.870067596435547, + "ce_orig": 0.9308566451072693, + "epoch": 0.7902796750305557, + "kl_loss": 0.2330045998096466, + "loss_ib": 0.007200113497674465, + "step": 2748 + }, + { + "ce_ib": 4.624252796173096, + "ce_orig": 0.8895007371902466, + "epoch": 0.7902796750305557, + "kl_loss": 0.33799904584884644, + "loss_ib": 0.008004243485629559, + "step": 2748 + }, + { + "ce_ib": 2.7337212562561035, + "ce_orig": 0.3937757611274719, + "epoch": 0.7902796750305557, + "kl_loss": 0.18044324219226837, + "loss_ib": 0.004538153763860464, + "step": 2748 + }, + { + "ce_ib": 3.1119930744171143, + "ce_orig": 0.7096352577209473, + "epoch": 0.7902796750305557, + "kl_loss": 0.17501361668109894, + "loss_ib": 0.004862128756940365, + "step": 2748 + }, + { + "ce_ib": 6.954444408416748, + "ce_orig": 1.451302409172058, + "epoch": 0.7905672586095334, + "kl_loss": 0.17049020528793335, + "loss_ib": 0.008659346960484982, + "step": 2749 + }, + { + "ce_ib": 2.8630242347717285, + "ce_orig": 0.710782527923584, + "epoch": 0.7905672586095334, + "kl_loss": 0.20410099625587463, + "loss_ib": 0.004904034081846476, + "step": 2749 + }, + { + "ce_ib": 7.780990123748779, + "ce_orig": 1.719214916229248, + "epoch": 0.7905672586095334, + "kl_loss": 0.151246577501297, + "loss_ib": 0.009293455630540848, + "step": 2749 + }, + { + "ce_ib": 5.562849521636963, + "ce_orig": 0.9157482385635376, + "epoch": 0.7905672586095334, + "kl_loss": 0.30990296602249146, + "loss_ib": 0.008661879226565361, + "step": 2749 + }, + { + "epoch": 0.790854842188511, + "grad_norm": 0.13269051909446716, + "learning_rate": 8.643567843825348e-06, + "loss": 0.8813, + "step": 2750 + }, + { + "ce_ib": 8.351317405700684, + "ce_orig": 1.6423311233520508, + "epoch": 0.790854842188511, + "kl_loss": 0.16628894209861755, + "loss_ib": 0.010014207102358341, + "step": 2750 + }, + { + "ce_ib": 6.484913349151611, + "ce_orig": 0.791697084903717, + "epoch": 0.790854842188511, + "kl_loss": 0.19653227925300598, + "loss_ib": 0.008450236171483994, + "step": 2750 + }, + { + "ce_ib": 4.408458709716797, + "ce_orig": 0.7958822250366211, + "epoch": 0.790854842188511, + "kl_loss": 0.23280999064445496, + "loss_ib": 0.006736558396369219, + "step": 2750 + }, + { + "ce_ib": 3.8739349842071533, + "ce_orig": 0.7481834292411804, + "epoch": 0.790854842188511, + "kl_loss": 0.20205393433570862, + "loss_ib": 0.005894474219530821, + "step": 2750 + }, + { + "ce_ib": 3.0691494941711426, + "ce_orig": 0.7061200141906738, + "epoch": 0.7911424257674887, + "kl_loss": 0.18613889813423157, + "loss_ib": 0.0049305385909974575, + "step": 2751 + }, + { + "ce_ib": 4.863382339477539, + "ce_orig": 0.524652361869812, + "epoch": 0.7911424257674887, + "kl_loss": 0.27657216787338257, + "loss_ib": 0.007629103492945433, + "step": 2751 + }, + { + "ce_ib": 5.993050575256348, + "ce_orig": 1.1847738027572632, + "epoch": 0.7911424257674887, + "kl_loss": 0.17775925993919373, + "loss_ib": 0.007770643103867769, + "step": 2751 + }, + { + "ce_ib": 6.042690753936768, + "ce_orig": 1.036934733390808, + "epoch": 0.7911424257674887, + "kl_loss": 0.20639201998710632, + "loss_ib": 0.008106610737740993, + "step": 2751 + }, + { + "ce_ib": 3.7937984466552734, + "ce_orig": 0.7052984833717346, + "epoch": 0.7914300093464663, + "kl_loss": 0.1615985631942749, + "loss_ib": 0.005409784149378538, + "step": 2752 + }, + { + "ce_ib": 5.987606048583984, + "ce_orig": 1.1922229528427124, + "epoch": 0.7914300093464663, + "kl_loss": 0.38160789012908936, + "loss_ib": 0.009803684428334236, + "step": 2752 + }, + { + "ce_ib": 4.2101030349731445, + "ce_orig": 0.7606436610221863, + "epoch": 0.7914300093464663, + "kl_loss": 0.2124921679496765, + "loss_ib": 0.00633502472192049, + "step": 2752 + }, + { + "ce_ib": 3.323347568511963, + "ce_orig": 1.0886470079421997, + "epoch": 0.7914300093464663, + "kl_loss": 0.15815412998199463, + "loss_ib": 0.004904888570308685, + "step": 2752 + }, + { + "ce_ib": 2.958256244659424, + "ce_orig": 0.7871049046516418, + "epoch": 0.791717592925444, + "kl_loss": 0.15421737730503082, + "loss_ib": 0.004500430077314377, + "step": 2753 + }, + { + "ce_ib": 4.915529251098633, + "ce_orig": 1.2013494968414307, + "epoch": 0.791717592925444, + "kl_loss": 0.1808222234249115, + "loss_ib": 0.006723751313984394, + "step": 2753 + }, + { + "ce_ib": 5.75012731552124, + "ce_orig": 1.3001903295516968, + "epoch": 0.791717592925444, + "kl_loss": 0.1288374811410904, + "loss_ib": 0.007038502022624016, + "step": 2753 + }, + { + "ce_ib": 6.461428165435791, + "ce_orig": 1.1464755535125732, + "epoch": 0.791717592925444, + "kl_loss": 0.23097389936447144, + "loss_ib": 0.008771167136728764, + "step": 2753 + }, + { + "ce_ib": 4.977194786071777, + "ce_orig": 1.0097579956054688, + "epoch": 0.7920051765044216, + "kl_loss": 0.13171273469924927, + "loss_ib": 0.006294321734458208, + "step": 2754 + }, + { + "ce_ib": 8.1357421875, + "ce_orig": 1.813603401184082, + "epoch": 0.7920051765044216, + "kl_loss": 0.17944678664207458, + "loss_ib": 0.009930210188031197, + "step": 2754 + }, + { + "ce_ib": 4.541104793548584, + "ce_orig": 1.0649460554122925, + "epoch": 0.7920051765044216, + "kl_loss": 0.14698731899261475, + "loss_ib": 0.006010978017002344, + "step": 2754 + }, + { + "ce_ib": 5.720710754394531, + "ce_orig": 1.0215946435928345, + "epoch": 0.7920051765044216, + "kl_loss": 0.20531198382377625, + "loss_ib": 0.007773830555379391, + "step": 2754 + }, + { + "epoch": 0.7922927600833992, + "grad_norm": 0.17417287826538086, + "learning_rate": 8.638248681344798e-06, + "loss": 0.9434, + "step": 2755 + }, + { + "ce_ib": 3.3936402797698975, + "ce_orig": 0.7705132961273193, + "epoch": 0.7922927600833992, + "kl_loss": 0.1691194474697113, + "loss_ib": 0.005084834527224302, + "step": 2755 + }, + { + "ce_ib": 6.375730514526367, + "ce_orig": 0.883257269859314, + "epoch": 0.7922927600833992, + "kl_loss": 0.2325638234615326, + "loss_ib": 0.008701368235051632, + "step": 2755 + }, + { + "ce_ib": 4.215276718139648, + "ce_orig": 0.9064416885375977, + "epoch": 0.7922927600833992, + "kl_loss": 0.19901862740516663, + "loss_ib": 0.006205463316291571, + "step": 2755 + }, + { + "ce_ib": 4.263459205627441, + "ce_orig": 0.7173588275909424, + "epoch": 0.7922927600833992, + "kl_loss": 0.21890214085578918, + "loss_ib": 0.006452480331063271, + "step": 2755 + }, + { + "ce_ib": 4.03041410446167, + "ce_orig": 0.8386393189430237, + "epoch": 0.7925803436623768, + "kl_loss": 0.21599651873111725, + "loss_ib": 0.006190379150211811, + "step": 2756 + }, + { + "ce_ib": 8.068974494934082, + "ce_orig": 1.6212472915649414, + "epoch": 0.7925803436623768, + "kl_loss": 0.2655890882015228, + "loss_ib": 0.010724864900112152, + "step": 2756 + }, + { + "ce_ib": 4.8943328857421875, + "ce_orig": 0.9499472379684448, + "epoch": 0.7925803436623768, + "kl_loss": 0.16992083191871643, + "loss_ib": 0.0065935407765209675, + "step": 2756 + }, + { + "ce_ib": 2.3267531394958496, + "ce_orig": 0.47837963700294495, + "epoch": 0.7925803436623768, + "kl_loss": 0.22688943147659302, + "loss_ib": 0.004595647566020489, + "step": 2756 + }, + { + "ce_ib": 2.3207454681396484, + "ce_orig": 0.5028666257858276, + "epoch": 0.7928679272413545, + "kl_loss": 0.17111597955226898, + "loss_ib": 0.004031904973089695, + "step": 2757 + }, + { + "ce_ib": 8.574760437011719, + "ce_orig": 1.3332573175430298, + "epoch": 0.7928679272413545, + "kl_loss": 0.14517948031425476, + "loss_ib": 0.010026554577052593, + "step": 2757 + }, + { + "ce_ib": 7.3622026443481445, + "ce_orig": 1.8929307460784912, + "epoch": 0.7928679272413545, + "kl_loss": 0.19576385617256165, + "loss_ib": 0.009319841861724854, + "step": 2757 + }, + { + "ce_ib": 4.028372764587402, + "ce_orig": 0.6198791265487671, + "epoch": 0.7928679272413545, + "kl_loss": 0.4046986401081085, + "loss_ib": 0.008075358346104622, + "step": 2757 + }, + { + "ce_ib": 3.327620267868042, + "ce_orig": 0.48118478059768677, + "epoch": 0.7931555108203322, + "kl_loss": 0.1740066260099411, + "loss_ib": 0.005067686550319195, + "step": 2758 + }, + { + "ce_ib": 4.479047775268555, + "ce_orig": 0.7677157521247864, + "epoch": 0.7931555108203322, + "kl_loss": 0.2577696144580841, + "loss_ib": 0.007056743837893009, + "step": 2758 + }, + { + "ce_ib": 3.990666151046753, + "ce_orig": 0.7755125761032104, + "epoch": 0.7931555108203322, + "kl_loss": 0.1299208402633667, + "loss_ib": 0.005289874505251646, + "step": 2758 + }, + { + "ce_ib": 4.9348602294921875, + "ce_orig": 1.080062747001648, + "epoch": 0.7931555108203322, + "kl_loss": 0.21153199672698975, + "loss_ib": 0.007050180342048407, + "step": 2758 + }, + { + "ce_ib": 1.851986289024353, + "ce_orig": 0.3239161968231201, + "epoch": 0.7934430943993098, + "kl_loss": 0.2984461188316345, + "loss_ib": 0.004836447536945343, + "step": 2759 + }, + { + "ce_ib": 2.5775554180145264, + "ce_orig": 0.538185715675354, + "epoch": 0.7934430943993098, + "kl_loss": 0.11861022561788559, + "loss_ib": 0.0037636577617377043, + "step": 2759 + }, + { + "ce_ib": 8.447591781616211, + "ce_orig": 2.065392255783081, + "epoch": 0.7934430943993098, + "kl_loss": 0.24820604920387268, + "loss_ib": 0.010929652489721775, + "step": 2759 + }, + { + "ce_ib": 4.995940685272217, + "ce_orig": 1.2098535299301147, + "epoch": 0.7934430943993098, + "kl_loss": 0.14290031790733337, + "loss_ib": 0.006424943450838327, + "step": 2759 + }, + { + "epoch": 0.7937306779782874, + "grad_norm": 0.13721108436584473, + "learning_rate": 8.63292075347872e-06, + "loss": 0.8601, + "step": 2760 + }, + { + "ce_ib": 1.7511529922485352, + "ce_orig": 0.5045529007911682, + "epoch": 0.7937306779782874, + "kl_loss": 0.11075113713741302, + "loss_ib": 0.0028586643747985363, + "step": 2760 + }, + { + "ce_ib": 1.915544867515564, + "ce_orig": 0.4372524619102478, + "epoch": 0.7937306779782874, + "kl_loss": 0.12330545485019684, + "loss_ib": 0.0031485992949455976, + "step": 2760 + }, + { + "ce_ib": 5.718080997467041, + "ce_orig": 0.9962584376335144, + "epoch": 0.7937306779782874, + "kl_loss": 0.2054772675037384, + "loss_ib": 0.007772853132337332, + "step": 2760 + }, + { + "ce_ib": 5.407802581787109, + "ce_orig": 1.166533350944519, + "epoch": 0.7937306779782874, + "kl_loss": 0.16021756827831268, + "loss_ib": 0.00700997794046998, + "step": 2760 + }, + { + "ce_ib": 2.6081628799438477, + "ce_orig": 0.7873552441596985, + "epoch": 0.7940182615572651, + "kl_loss": 0.10385706275701523, + "loss_ib": 0.0036467337049543858, + "step": 2761 + }, + { + "ce_ib": 4.96137809753418, + "ce_orig": 0.8537298440933228, + "epoch": 0.7940182615572651, + "kl_loss": 0.22339153289794922, + "loss_ib": 0.00719529390335083, + "step": 2761 + }, + { + "ce_ib": 2.915616273880005, + "ce_orig": 0.8159688711166382, + "epoch": 0.7940182615572651, + "kl_loss": 0.1329660713672638, + "loss_ib": 0.004245277028530836, + "step": 2761 + }, + { + "ce_ib": 4.883017539978027, + "ce_orig": 0.7250867486000061, + "epoch": 0.7940182615572651, + "kl_loss": 0.14818190038204193, + "loss_ib": 0.006364836357533932, + "step": 2761 + }, + { + "ce_ib": 7.642560005187988, + "ce_orig": 1.7527059316635132, + "epoch": 0.7943058451362427, + "kl_loss": 0.2933797836303711, + "loss_ib": 0.010576358065009117, + "step": 2762 + }, + { + "ce_ib": 2.81842041015625, + "ce_orig": 0.6462904810905457, + "epoch": 0.7943058451362427, + "kl_loss": 0.10573592036962509, + "loss_ib": 0.0038757796864956617, + "step": 2762 + }, + { + "ce_ib": 6.210951328277588, + "ce_orig": 1.1333308219909668, + "epoch": 0.7943058451362427, + "kl_loss": 0.2085423469543457, + "loss_ib": 0.00829637423157692, + "step": 2762 + }, + { + "ce_ib": 3.7195959091186523, + "ce_orig": 0.9254713654518127, + "epoch": 0.7943058451362427, + "kl_loss": 0.15598778426647186, + "loss_ib": 0.00527947349473834, + "step": 2762 + }, + { + "ce_ib": 5.87052059173584, + "ce_orig": 0.6520940661430359, + "epoch": 0.7945934287152203, + "kl_loss": 0.2868911921977997, + "loss_ib": 0.008739432319998741, + "step": 2763 + }, + { + "ce_ib": 2.7620677947998047, + "ce_orig": 0.3922322690486908, + "epoch": 0.7945934287152203, + "kl_loss": 0.21191099286079407, + "loss_ib": 0.0048811775632202625, + "step": 2763 + }, + { + "ce_ib": 4.830256938934326, + "ce_orig": 1.004847526550293, + "epoch": 0.7945934287152203, + "kl_loss": 0.16704332828521729, + "loss_ib": 0.0065006897784769535, + "step": 2763 + }, + { + "ce_ib": 3.0332460403442383, + "ce_orig": 0.5319488048553467, + "epoch": 0.7945934287152203, + "kl_loss": 0.2513481080532074, + "loss_ib": 0.00554672721773386, + "step": 2763 + }, + { + "ce_ib": 5.994606018066406, + "ce_orig": 1.1363625526428223, + "epoch": 0.794881012294198, + "kl_loss": 0.15206828713417053, + "loss_ib": 0.007515288423746824, + "step": 2764 + }, + { + "ce_ib": 7.449738025665283, + "ce_orig": 1.4065895080566406, + "epoch": 0.794881012294198, + "kl_loss": 0.26797497272491455, + "loss_ib": 0.01012948714196682, + "step": 2764 + }, + { + "ce_ib": 5.697770595550537, + "ce_orig": 1.137904405593872, + "epoch": 0.794881012294198, + "kl_loss": 0.2015613615512848, + "loss_ib": 0.007713384460657835, + "step": 2764 + }, + { + "ce_ib": 4.236091136932373, + "ce_orig": 0.6694088578224182, + "epoch": 0.794881012294198, + "kl_loss": 0.18720346689224243, + "loss_ib": 0.006108125206083059, + "step": 2764 + }, + { + "epoch": 0.7951685958731757, + "grad_norm": 0.13329960405826569, + "learning_rate": 8.627584073063327e-06, + "loss": 0.8475, + "step": 2765 + }, + { + "ce_ib": 5.169940948486328, + "ce_orig": 1.327917456626892, + "epoch": 0.7951685958731757, + "kl_loss": 0.16798537969589233, + "loss_ib": 0.006849794648587704, + "step": 2765 + }, + { + "ce_ib": 4.786345958709717, + "ce_orig": 1.215107798576355, + "epoch": 0.7951685958731757, + "kl_loss": 0.22210919857025146, + "loss_ib": 0.0070074377581477165, + "step": 2765 + }, + { + "ce_ib": 6.4169087409973145, + "ce_orig": 1.3336730003356934, + "epoch": 0.7951685958731757, + "kl_loss": 0.18122230470180511, + "loss_ib": 0.008229131810367107, + "step": 2765 + }, + { + "ce_ib": 3.018606424331665, + "ce_orig": 0.7120202779769897, + "epoch": 0.7951685958731757, + "kl_loss": 0.15817300975322723, + "loss_ib": 0.00460033630952239, + "step": 2765 + }, + { + "ce_ib": 3.972600221633911, + "ce_orig": 0.9581891298294067, + "epoch": 0.7954561794521533, + "kl_loss": 0.230808287858963, + "loss_ib": 0.006280682981014252, + "step": 2766 + }, + { + "ce_ib": 4.349094390869141, + "ce_orig": 0.7879023551940918, + "epoch": 0.7954561794521533, + "kl_loss": 0.22120392322540283, + "loss_ib": 0.006561133544892073, + "step": 2766 + }, + { + "ce_ib": 1.6276339292526245, + "ce_orig": 0.3938104808330536, + "epoch": 0.7954561794521533, + "kl_loss": 0.1592043936252594, + "loss_ib": 0.003219677833840251, + "step": 2766 + }, + { + "ce_ib": 2.3285670280456543, + "ce_orig": 0.49714717268943787, + "epoch": 0.7954561794521533, + "kl_loss": 0.1425882875919342, + "loss_ib": 0.0037544500082731247, + "step": 2766 + }, + { + "ce_ib": 5.291499137878418, + "ce_orig": 0.7954302430152893, + "epoch": 0.7957437630311309, + "kl_loss": 0.2652718424797058, + "loss_ib": 0.007944217883050442, + "step": 2767 + }, + { + "ce_ib": 2.142642021179199, + "ce_orig": 0.36455076932907104, + "epoch": 0.7957437630311309, + "kl_loss": 0.26755863428115845, + "loss_ib": 0.004818228539079428, + "step": 2767 + }, + { + "ce_ib": 2.949099540710449, + "ce_orig": 1.0272963047027588, + "epoch": 0.7957437630311309, + "kl_loss": 0.12087738513946533, + "loss_ib": 0.004157873336225748, + "step": 2767 + }, + { + "ce_ib": 3.718870162963867, + "ce_orig": 0.8425977230072021, + "epoch": 0.7957437630311309, + "kl_loss": 0.15176647901535034, + "loss_ib": 0.005236534867435694, + "step": 2767 + }, + { + "ce_ib": 2.5902912616729736, + "ce_orig": 0.396890252828598, + "epoch": 0.7960313466101085, + "kl_loss": 0.1690995693206787, + "loss_ib": 0.004281286615878344, + "step": 2768 + }, + { + "ce_ib": 5.672253608703613, + "ce_orig": 1.1647948026657104, + "epoch": 0.7960313466101085, + "kl_loss": 0.20857927203178406, + "loss_ib": 0.00775804603472352, + "step": 2768 + }, + { + "ce_ib": 4.269392490386963, + "ce_orig": 1.0209579467773438, + "epoch": 0.7960313466101085, + "kl_loss": 0.2718915343284607, + "loss_ib": 0.00698830746114254, + "step": 2768 + }, + { + "ce_ib": 2.1507039070129395, + "ce_orig": 0.5833152532577515, + "epoch": 0.7960313466101085, + "kl_loss": 0.23342619836330414, + "loss_ib": 0.004484965931624174, + "step": 2768 + }, + { + "ce_ib": 6.749910354614258, + "ce_orig": 1.6572372913360596, + "epoch": 0.7963189301890862, + "kl_loss": 0.22038185596466064, + "loss_ib": 0.008953728713095188, + "step": 2769 + }, + { + "ce_ib": 3.859058380126953, + "ce_orig": 0.57147216796875, + "epoch": 0.7963189301890862, + "kl_loss": 0.15442770719528198, + "loss_ib": 0.005403335206210613, + "step": 2769 + }, + { + "ce_ib": 2.2091100215911865, + "ce_orig": 0.5754510164260864, + "epoch": 0.7963189301890862, + "kl_loss": 0.36819988489151, + "loss_ib": 0.005891108885407448, + "step": 2769 + }, + { + "ce_ib": 6.605405330657959, + "ce_orig": 1.1422359943389893, + "epoch": 0.7963189301890862, + "kl_loss": 0.14399854838848114, + "loss_ib": 0.008045390248298645, + "step": 2769 + }, + { + "epoch": 0.7966065137680638, + "grad_norm": 0.13270476460456848, + "learning_rate": 8.622238652955923e-06, + "loss": 0.8316, + "step": 2770 + }, + { + "ce_ib": 3.180725574493408, + "ce_orig": 0.6364140510559082, + "epoch": 0.7966065137680638, + "kl_loss": 0.16610968112945557, + "loss_ib": 0.004841822199523449, + "step": 2770 + }, + { + "ce_ib": 4.043299674987793, + "ce_orig": 0.9815298318862915, + "epoch": 0.7966065137680638, + "kl_loss": 0.2349405586719513, + "loss_ib": 0.006392705254256725, + "step": 2770 + }, + { + "ce_ib": 2.0619144439697266, + "ce_orig": 0.40131959319114685, + "epoch": 0.7966065137680638, + "kl_loss": 0.1198010966181755, + "loss_ib": 0.003259925404563546, + "step": 2770 + }, + { + "ce_ib": 5.611801624298096, + "ce_orig": 1.1940922737121582, + "epoch": 0.7966065137680638, + "kl_loss": 0.21230050921440125, + "loss_ib": 0.007734806276857853, + "step": 2770 + }, + { + "ce_ib": 3.0244171619415283, + "ce_orig": 0.7978131771087646, + "epoch": 0.7968940973470415, + "kl_loss": 0.14453330636024475, + "loss_ib": 0.004469749983400106, + "step": 2771 + }, + { + "ce_ib": 5.83652400970459, + "ce_orig": 1.0816104412078857, + "epoch": 0.7968940973470415, + "kl_loss": 0.15763390064239502, + "loss_ib": 0.007412863429635763, + "step": 2771 + }, + { + "ce_ib": 4.499009132385254, + "ce_orig": 0.7824953198432922, + "epoch": 0.7968940973470415, + "kl_loss": 0.21736302971839905, + "loss_ib": 0.006672639865428209, + "step": 2771 + }, + { + "ce_ib": 6.7878336906433105, + "ce_orig": 1.2972204685211182, + "epoch": 0.7968940973470415, + "kl_loss": 0.16984407603740692, + "loss_ib": 0.008486274629831314, + "step": 2771 + }, + { + "ce_ib": 4.4549336433410645, + "ce_orig": 0.9316060543060303, + "epoch": 0.7971816809260192, + "kl_loss": 0.23357026278972626, + "loss_ib": 0.006790636107325554, + "step": 2772 + }, + { + "ce_ib": 3.829343795776367, + "ce_orig": 0.8582841157913208, + "epoch": 0.7971816809260192, + "kl_loss": 0.258108913898468, + "loss_ib": 0.006410432979464531, + "step": 2772 + }, + { + "ce_ib": 2.4217939376831055, + "ce_orig": 0.577266275882721, + "epoch": 0.7971816809260192, + "kl_loss": 0.12985166907310486, + "loss_ib": 0.003720310516655445, + "step": 2772 + }, + { + "ce_ib": 5.322347164154053, + "ce_orig": 0.8086357712745667, + "epoch": 0.7971816809260192, + "kl_loss": 0.21720638871192932, + "loss_ib": 0.007494411431252956, + "step": 2772 + }, + { + "ce_ib": 4.838637828826904, + "ce_orig": 0.9434384703636169, + "epoch": 0.7974692645049968, + "kl_loss": 0.2088300883769989, + "loss_ib": 0.006926938891410828, + "step": 2773 + }, + { + "ce_ib": 3.3257739543914795, + "ce_orig": 0.5758197903633118, + "epoch": 0.7974692645049968, + "kl_loss": 0.22597923874855042, + "loss_ib": 0.005585566628724337, + "step": 2773 + }, + { + "ce_ib": 4.353536605834961, + "ce_orig": 0.9619526863098145, + "epoch": 0.7974692645049968, + "kl_loss": 0.16354277729988098, + "loss_ib": 0.005988964345306158, + "step": 2773 + }, + { + "ce_ib": 4.37595796585083, + "ce_orig": 0.7329281568527222, + "epoch": 0.7974692645049968, + "kl_loss": 0.7227753400802612, + "loss_ib": 0.011603711172938347, + "step": 2773 + }, + { + "ce_ib": 3.8698649406433105, + "ce_orig": 0.7077966332435608, + "epoch": 0.7977568480839744, + "kl_loss": 0.17679467797279358, + "loss_ib": 0.005637811496853828, + "step": 2774 + }, + { + "ce_ib": 3.393077850341797, + "ce_orig": 0.6630714535713196, + "epoch": 0.7977568480839744, + "kl_loss": 0.16627629101276398, + "loss_ib": 0.005055840592831373, + "step": 2774 + }, + { + "ce_ib": 2.478025436401367, + "ce_orig": 0.704636812210083, + "epoch": 0.7977568480839744, + "kl_loss": 0.1644262969493866, + "loss_ib": 0.004122287966310978, + "step": 2774 + }, + { + "ce_ib": 3.8648605346679688, + "ce_orig": 0.8926273584365845, + "epoch": 0.7977568480839744, + "kl_loss": 0.2535039186477661, + "loss_ib": 0.00639989972114563, + "step": 2774 + }, + { + "epoch": 0.798044431662952, + "grad_norm": 0.13850665092468262, + "learning_rate": 8.616884506034862e-06, + "loss": 0.8251, + "step": 2775 + }, + { + "ce_ib": 2.175795316696167, + "ce_orig": 0.5840879678726196, + "epoch": 0.798044431662952, + "kl_loss": 0.12633799016475677, + "loss_ib": 0.003439175197854638, + "step": 2775 + }, + { + "ce_ib": 6.151766300201416, + "ce_orig": 1.4571189880371094, + "epoch": 0.798044431662952, + "kl_loss": 0.1972137987613678, + "loss_ib": 0.008123904466629028, + "step": 2775 + }, + { + "ce_ib": 1.6729315519332886, + "ce_orig": 0.4999871551990509, + "epoch": 0.798044431662952, + "kl_loss": 0.1291855126619339, + "loss_ib": 0.002964786719530821, + "step": 2775 + }, + { + "ce_ib": 4.860602378845215, + "ce_orig": 1.1550661325454712, + "epoch": 0.798044431662952, + "kl_loss": 0.15274448692798615, + "loss_ib": 0.006388047244399786, + "step": 2775 + }, + { + "ce_ib": 3.7515244483947754, + "ce_orig": 0.5468907356262207, + "epoch": 0.7983320152419296, + "kl_loss": 0.27080798149108887, + "loss_ib": 0.006459604483097792, + "step": 2776 + }, + { + "ce_ib": 4.041459560394287, + "ce_orig": 0.9361514449119568, + "epoch": 0.7983320152419296, + "kl_loss": 0.1546390950679779, + "loss_ib": 0.005587850697338581, + "step": 2776 + }, + { + "ce_ib": 4.129010200500488, + "ce_orig": 0.7371531128883362, + "epoch": 0.7983320152419296, + "kl_loss": 0.14446759223937988, + "loss_ib": 0.005573686212301254, + "step": 2776 + }, + { + "ce_ib": 3.4718308448791504, + "ce_orig": 0.5729216933250427, + "epoch": 0.7983320152419296, + "kl_loss": 0.16182780265808105, + "loss_ib": 0.005090109072625637, + "step": 2776 + }, + { + "ce_ib": 4.173073768615723, + "ce_orig": 0.5577591061592102, + "epoch": 0.7986195988209073, + "kl_loss": 0.13119199872016907, + "loss_ib": 0.005484993103891611, + "step": 2777 + }, + { + "ce_ib": 4.952641010284424, + "ce_orig": 0.8829336166381836, + "epoch": 0.7986195988209073, + "kl_loss": 0.41505753993988037, + "loss_ib": 0.009103216230869293, + "step": 2777 + }, + { + "ce_ib": 5.977888584136963, + "ce_orig": 0.8822013139724731, + "epoch": 0.7986195988209073, + "kl_loss": 0.2576290965080261, + "loss_ib": 0.008554179221391678, + "step": 2777 + }, + { + "ce_ib": 6.000790119171143, + "ce_orig": 0.9867299199104309, + "epoch": 0.7986195988209073, + "kl_loss": 0.25134074687957764, + "loss_ib": 0.008514197543263435, + "step": 2777 + }, + { + "ce_ib": 4.744520664215088, + "ce_orig": 0.9692033529281616, + "epoch": 0.798907182399885, + "kl_loss": 0.16027696430683136, + "loss_ib": 0.006347290240228176, + "step": 2778 + }, + { + "ce_ib": 3.8108181953430176, + "ce_orig": 0.4224478006362915, + "epoch": 0.798907182399885, + "kl_loss": 0.16644200682640076, + "loss_ib": 0.005475238431245089, + "step": 2778 + }, + { + "ce_ib": 5.774768829345703, + "ce_orig": 1.136649489402771, + "epoch": 0.798907182399885, + "kl_loss": 0.22186119854450226, + "loss_ib": 0.007993380539119244, + "step": 2778 + }, + { + "ce_ib": 2.9189510345458984, + "ce_orig": 0.5785156488418579, + "epoch": 0.798907182399885, + "kl_loss": 0.17671211063861847, + "loss_ib": 0.004686072468757629, + "step": 2778 + }, + { + "ce_ib": 4.7172956466674805, + "ce_orig": 1.0060970783233643, + "epoch": 0.7991947659788626, + "kl_loss": 0.25071677565574646, + "loss_ib": 0.007224462926387787, + "step": 2779 + }, + { + "ce_ib": 3.0503134727478027, + "ce_orig": 0.5084138512611389, + "epoch": 0.7991947659788626, + "kl_loss": 0.18581795692443848, + "loss_ib": 0.0049084932543337345, + "step": 2779 + }, + { + "ce_ib": 3.7966902256011963, + "ce_orig": 0.8144973516464233, + "epoch": 0.7991947659788626, + "kl_loss": 0.18538370728492737, + "loss_ib": 0.005650527309626341, + "step": 2779 + }, + { + "ce_ib": 3.398282289505005, + "ce_orig": 0.3389405310153961, + "epoch": 0.7991947659788626, + "kl_loss": 0.16038772463798523, + "loss_ib": 0.005002159159630537, + "step": 2779 + }, + { + "epoch": 0.7994823495578403, + "grad_norm": 0.13896815478801727, + "learning_rate": 8.611521645199532e-06, + "loss": 0.8904, + "step": 2780 + }, + { + "ce_ib": 4.967715263366699, + "ce_orig": 0.7593681216239929, + "epoch": 0.7994823495578403, + "kl_loss": 0.19736868143081665, + "loss_ib": 0.006941401865333319, + "step": 2780 + }, + { + "ce_ib": 5.797456741333008, + "ce_orig": 1.2519092559814453, + "epoch": 0.7994823495578403, + "kl_loss": 0.21374088525772095, + "loss_ib": 0.007934865541756153, + "step": 2780 + }, + { + "ce_ib": 3.533454179763794, + "ce_orig": 0.5676873922348022, + "epoch": 0.7994823495578403, + "kl_loss": 0.21779662370681763, + "loss_ib": 0.005711420439183712, + "step": 2780 + }, + { + "ce_ib": 5.329074859619141, + "ce_orig": 0.8747817873954773, + "epoch": 0.7994823495578403, + "kl_loss": 0.24814845621585846, + "loss_ib": 0.007810559123754501, + "step": 2780 + }, + { + "ce_ib": 3.8114421367645264, + "ce_orig": 0.8032283782958984, + "epoch": 0.7997699331368179, + "kl_loss": 0.18565869331359863, + "loss_ib": 0.005668029189109802, + "step": 2781 + }, + { + "ce_ib": 4.797056674957275, + "ce_orig": 0.6615355014801025, + "epoch": 0.7997699331368179, + "kl_loss": 0.20621836185455322, + "loss_ib": 0.006859240587800741, + "step": 2781 + }, + { + "ce_ib": 5.624706268310547, + "ce_orig": 1.2006057500839233, + "epoch": 0.7997699331368179, + "kl_loss": 0.1763170063495636, + "loss_ib": 0.007387876510620117, + "step": 2781 + }, + { + "ce_ib": 2.8803436756134033, + "ce_orig": 0.6264297962188721, + "epoch": 0.7997699331368179, + "kl_loss": 0.13622674345970154, + "loss_ib": 0.004242611117660999, + "step": 2781 + }, + { + "ce_ib": 3.4638054370880127, + "ce_orig": 0.3409463167190552, + "epoch": 0.8000575167157955, + "kl_loss": 0.19616374373435974, + "loss_ib": 0.0054254429414868355, + "step": 2782 + }, + { + "ce_ib": 6.138943195343018, + "ce_orig": 1.4858006238937378, + "epoch": 0.8000575167157955, + "kl_loss": 0.21942077577114105, + "loss_ib": 0.00833315122872591, + "step": 2782 + }, + { + "ce_ib": 3.272803783416748, + "ce_orig": 0.6592520475387573, + "epoch": 0.8000575167157955, + "kl_loss": 0.1194593608379364, + "loss_ib": 0.004467397462576628, + "step": 2782 + }, + { + "ce_ib": 4.285655975341797, + "ce_orig": 0.37267249822616577, + "epoch": 0.8000575167157955, + "kl_loss": 0.2731223702430725, + "loss_ib": 0.007016879040747881, + "step": 2782 + }, + { + "ce_ib": 4.350681304931641, + "ce_orig": 0.6221808791160583, + "epoch": 0.8003451002947731, + "kl_loss": 0.21273842453956604, + "loss_ib": 0.00647806515917182, + "step": 2783 + }, + { + "ce_ib": 3.7565019130706787, + "ce_orig": 0.6631125807762146, + "epoch": 0.8003451002947731, + "kl_loss": 0.18899348378181458, + "loss_ib": 0.005646436475217342, + "step": 2783 + }, + { + "ce_ib": 3.776038408279419, + "ce_orig": 0.8879638910293579, + "epoch": 0.8003451002947731, + "kl_loss": 0.14831481873989105, + "loss_ib": 0.005259186495095491, + "step": 2783 + }, + { + "ce_ib": 4.097657203674316, + "ce_orig": 0.75706946849823, + "epoch": 0.8003451002947731, + "kl_loss": 0.13940918445587158, + "loss_ib": 0.005491748917847872, + "step": 2783 + }, + { + "ce_ib": 3.4067788124084473, + "ce_orig": 0.7590777277946472, + "epoch": 0.8006326838737509, + "kl_loss": 0.24040597677230835, + "loss_ib": 0.005810838658362627, + "step": 2784 + }, + { + "ce_ib": 3.6400585174560547, + "ce_orig": 0.7885200381278992, + "epoch": 0.8006326838737509, + "kl_loss": 0.26050201058387756, + "loss_ib": 0.006245078984647989, + "step": 2784 + }, + { + "ce_ib": 4.840256214141846, + "ce_orig": 0.7395043969154358, + "epoch": 0.8006326838737509, + "kl_loss": 0.222259521484375, + "loss_ib": 0.00706285098567605, + "step": 2784 + }, + { + "ce_ib": 2.3897817134857178, + "ce_orig": 0.33077970147132874, + "epoch": 0.8006326838737509, + "kl_loss": 0.1783285290002823, + "loss_ib": 0.004173066932708025, + "step": 2784 + }, + { + "epoch": 0.8009202674527285, + "grad_norm": 0.1349128931760788, + "learning_rate": 8.606150083370304e-06, + "loss": 0.7908, + "step": 2785 + }, + { + "ce_ib": 6.939953804016113, + "ce_orig": 1.2674649953842163, + "epoch": 0.8009202674527285, + "kl_loss": 0.24453765153884888, + "loss_ib": 0.009385330602526665, + "step": 2785 + }, + { + "ce_ib": 6.00085973739624, + "ce_orig": 0.8547451496124268, + "epoch": 0.8009202674527285, + "kl_loss": 0.18797782063484192, + "loss_ib": 0.007880637422204018, + "step": 2785 + }, + { + "ce_ib": 2.006056308746338, + "ce_orig": 0.4705958068370819, + "epoch": 0.8009202674527285, + "kl_loss": 0.1580205261707306, + "loss_ib": 0.003586261533200741, + "step": 2785 + }, + { + "ce_ib": 3.9961814880371094, + "ce_orig": 0.5746617317199707, + "epoch": 0.8009202674527285, + "kl_loss": 0.19651246070861816, + "loss_ib": 0.005961305927485228, + "step": 2785 + }, + { + "ce_ib": 5.611196041107178, + "ce_orig": 1.2195407152175903, + "epoch": 0.8012078510317061, + "kl_loss": 0.17307162284851074, + "loss_ib": 0.007341912016272545, + "step": 2786 + }, + { + "ce_ib": 3.0286495685577393, + "ce_orig": 0.6959517598152161, + "epoch": 0.8012078510317061, + "kl_loss": 0.1301916539669037, + "loss_ib": 0.00433056615293026, + "step": 2786 + }, + { + "ce_ib": 5.279967784881592, + "ce_orig": 1.3134632110595703, + "epoch": 0.8012078510317061, + "kl_loss": 0.17393508553504944, + "loss_ib": 0.007019318174570799, + "step": 2786 + }, + { + "ce_ib": 2.784822940826416, + "ce_orig": 0.6506140828132629, + "epoch": 0.8012078510317061, + "kl_loss": 0.1798112839460373, + "loss_ib": 0.004582935478538275, + "step": 2786 + }, + { + "ce_ib": 8.128206253051758, + "ce_orig": 1.6097584962844849, + "epoch": 0.8014954346106837, + "kl_loss": 0.2555684745311737, + "loss_ib": 0.01068389043211937, + "step": 2787 + }, + { + "ce_ib": 2.529799699783325, + "ce_orig": 0.47001174092292786, + "epoch": 0.8014954346106837, + "kl_loss": 0.1928243786096573, + "loss_ib": 0.00445804325863719, + "step": 2787 + }, + { + "ce_ib": 3.7421910762786865, + "ce_orig": 0.9342586994171143, + "epoch": 0.8014954346106837, + "kl_loss": 0.15996897220611572, + "loss_ib": 0.005341880954802036, + "step": 2787 + }, + { + "ce_ib": 3.884831666946411, + "ce_orig": 0.7539273500442505, + "epoch": 0.8014954346106837, + "kl_loss": 0.14403212070465088, + "loss_ib": 0.005325152538716793, + "step": 2787 + }, + { + "ce_ib": 3.502986192703247, + "ce_orig": 0.7524189949035645, + "epoch": 0.8017830181896614, + "kl_loss": 0.22970163822174072, + "loss_ib": 0.005800002720206976, + "step": 2788 + }, + { + "ce_ib": 3.5299184322357178, + "ce_orig": 0.4191644489765167, + "epoch": 0.8017830181896614, + "kl_loss": 0.43149957060813904, + "loss_ib": 0.007844913750886917, + "step": 2788 + }, + { + "ce_ib": 4.789775371551514, + "ce_orig": 0.8889884352684021, + "epoch": 0.8017830181896614, + "kl_loss": 0.30868151783943176, + "loss_ib": 0.007876589894294739, + "step": 2788 + }, + { + "ce_ib": 6.916083335876465, + "ce_orig": 1.4910715818405151, + "epoch": 0.8017830181896614, + "kl_loss": 0.21862119436264038, + "loss_ib": 0.009102295152842999, + "step": 2788 + }, + { + "ce_ib": 4.667547225952148, + "ce_orig": 0.6016953587532043, + "epoch": 0.802070601768639, + "kl_loss": 0.23145586252212524, + "loss_ib": 0.0069821057841181755, + "step": 2789 + }, + { + "ce_ib": 5.150094509124756, + "ce_orig": 1.0076875686645508, + "epoch": 0.802070601768639, + "kl_loss": 0.18371953070163727, + "loss_ib": 0.006987289525568485, + "step": 2789 + }, + { + "ce_ib": 4.41151762008667, + "ce_orig": 0.6109520196914673, + "epoch": 0.802070601768639, + "kl_loss": 0.1515008807182312, + "loss_ib": 0.005926526617258787, + "step": 2789 + }, + { + "ce_ib": 1.968022108078003, + "ce_orig": 0.53102046251297, + "epoch": 0.802070601768639, + "kl_loss": 0.14849258959293365, + "loss_ib": 0.003452948061749339, + "step": 2789 + }, + { + "epoch": 0.8023581853476166, + "grad_norm": 0.12933726608753204, + "learning_rate": 8.600769833488522e-06, + "loss": 0.883, + "step": 2790 + }, + { + "ce_ib": 3.364567756652832, + "ce_orig": 0.5981623530387878, + "epoch": 0.8023581853476166, + "kl_loss": 0.23685318231582642, + "loss_ib": 0.005733099300414324, + "step": 2790 + }, + { + "ce_ib": 4.1042680740356445, + "ce_orig": 1.0170713663101196, + "epoch": 0.8023581853476166, + "kl_loss": 0.14984333515167236, + "loss_ib": 0.0056027015671133995, + "step": 2790 + }, + { + "ce_ib": 2.920992851257324, + "ce_orig": 0.6483107805252075, + "epoch": 0.8023581853476166, + "kl_loss": 0.1978006511926651, + "loss_ib": 0.004898999352008104, + "step": 2790 + }, + { + "ce_ib": 4.970139980316162, + "ce_orig": 0.7824289202690125, + "epoch": 0.8023581853476166, + "kl_loss": 0.17422813177108765, + "loss_ib": 0.006712420843541622, + "step": 2790 + }, + { + "ce_ib": 6.84839391708374, + "ce_orig": 1.5283554792404175, + "epoch": 0.8026457689265943, + "kl_loss": 0.17102569341659546, + "loss_ib": 0.008558651432394981, + "step": 2791 + }, + { + "ce_ib": 2.884920358657837, + "ce_orig": 0.7699412703514099, + "epoch": 0.8026457689265943, + "kl_loss": 0.20604582130908966, + "loss_ib": 0.004945378750562668, + "step": 2791 + }, + { + "ce_ib": 2.0681536197662354, + "ce_orig": 0.42342451214790344, + "epoch": 0.8026457689265943, + "kl_loss": 0.24226680397987366, + "loss_ib": 0.004490821622312069, + "step": 2791 + }, + { + "ce_ib": 3.775587320327759, + "ce_orig": 0.812340497970581, + "epoch": 0.8026457689265943, + "kl_loss": 0.2112949788570404, + "loss_ib": 0.005888537038117647, + "step": 2791 + }, + { + "ce_ib": 3.2735788822174072, + "ce_orig": 0.5886802077293396, + "epoch": 0.802933352505572, + "kl_loss": 0.4086434245109558, + "loss_ib": 0.007360012736171484, + "step": 2792 + }, + { + "ce_ib": 3.7608702182769775, + "ce_orig": 1.0073295831680298, + "epoch": 0.802933352505572, + "kl_loss": 0.19011616706848145, + "loss_ib": 0.005662032403051853, + "step": 2792 + }, + { + "ce_ib": 5.494467258453369, + "ce_orig": 1.2233991622924805, + "epoch": 0.802933352505572, + "kl_loss": 0.12955746054649353, + "loss_ib": 0.006790041923522949, + "step": 2792 + }, + { + "ce_ib": 4.110065460205078, + "ce_orig": 0.7874861359596252, + "epoch": 0.802933352505572, + "kl_loss": 0.14604663848876953, + "loss_ib": 0.005570531822741032, + "step": 2792 + }, + { + "ce_ib": 4.114236354827881, + "ce_orig": 0.7710134983062744, + "epoch": 0.8032209360845496, + "kl_loss": 0.24312704801559448, + "loss_ib": 0.006545506417751312, + "step": 2793 + }, + { + "ce_ib": 6.853270053863525, + "ce_orig": 1.6216522455215454, + "epoch": 0.8032209360845496, + "kl_loss": 0.2621528208255768, + "loss_ib": 0.0094747981056571, + "step": 2793 + }, + { + "ce_ib": 4.471492767333984, + "ce_orig": 1.1191344261169434, + "epoch": 0.8032209360845496, + "kl_loss": 0.2282359004020691, + "loss_ib": 0.006753851659595966, + "step": 2793 + }, + { + "ce_ib": 2.911357879638672, + "ce_orig": 0.8954541087150574, + "epoch": 0.8032209360845496, + "kl_loss": 0.15402372181415558, + "loss_ib": 0.004451594781130552, + "step": 2793 + }, + { + "ce_ib": 4.911228179931641, + "ce_orig": 0.8078605532646179, + "epoch": 0.8035085196635272, + "kl_loss": 0.21320509910583496, + "loss_ib": 0.007043279241770506, + "step": 2794 + }, + { + "ce_ib": 6.387778282165527, + "ce_orig": 0.9819888472557068, + "epoch": 0.8035085196635272, + "kl_loss": 0.17024248838424683, + "loss_ib": 0.008090203627943993, + "step": 2794 + }, + { + "ce_ib": 4.098394870758057, + "ce_orig": 0.542461633682251, + "epoch": 0.8035085196635272, + "kl_loss": 0.29534584283828735, + "loss_ib": 0.007051852997392416, + "step": 2794 + }, + { + "ce_ib": 3.4787449836730957, + "ce_orig": 0.8231446146965027, + "epoch": 0.8035085196635272, + "kl_loss": 0.1357879489660263, + "loss_ib": 0.004836624022573233, + "step": 2794 + }, + { + "epoch": 0.8037961032425048, + "grad_norm": 0.12585827708244324, + "learning_rate": 8.595380908516454e-06, + "loss": 0.9189, + "step": 2795 + }, + { + "ce_ib": 3.8170788288116455, + "ce_orig": 0.8598630428314209, + "epoch": 0.8037961032425048, + "kl_loss": 0.19137398898601532, + "loss_ib": 0.0057308184914290905, + "step": 2795 + }, + { + "ce_ib": 4.971706867218018, + "ce_orig": 0.7220256924629211, + "epoch": 0.8037961032425048, + "kl_loss": 0.25570929050445557, + "loss_ib": 0.007528799585998058, + "step": 2795 + }, + { + "ce_ib": 3.4372808933258057, + "ce_orig": 0.8906099796295166, + "epoch": 0.8037961032425048, + "kl_loss": 0.16909119486808777, + "loss_ib": 0.005128192715346813, + "step": 2795 + }, + { + "ce_ib": 2.3434135913848877, + "ce_orig": 0.5603612661361694, + "epoch": 0.8037961032425048, + "kl_loss": 0.21929849684238434, + "loss_ib": 0.004536398686468601, + "step": 2795 + }, + { + "ce_ib": 3.0567421913146973, + "ce_orig": 0.36373355984687805, + "epoch": 0.8040836868214825, + "kl_loss": 0.24296829104423523, + "loss_ib": 0.005486425012350082, + "step": 2796 + }, + { + "ce_ib": 5.738767623901367, + "ce_orig": 0.6767974495887756, + "epoch": 0.8040836868214825, + "kl_loss": 0.2584180235862732, + "loss_ib": 0.008322947658598423, + "step": 2796 + }, + { + "ce_ib": 5.068378448486328, + "ce_orig": 0.8405973315238953, + "epoch": 0.8040836868214825, + "kl_loss": 0.19308370351791382, + "loss_ib": 0.0069992151111364365, + "step": 2796 + }, + { + "ce_ib": 6.272626876831055, + "ce_orig": 1.08251953125, + "epoch": 0.8040836868214825, + "kl_loss": 0.204598531126976, + "loss_ib": 0.008318612352013588, + "step": 2796 + }, + { + "ce_ib": 2.255296230316162, + "ce_orig": 0.24658799171447754, + "epoch": 0.8043712704004601, + "kl_loss": 0.23966023325920105, + "loss_ib": 0.004651898518204689, + "step": 2797 + }, + { + "ce_ib": 4.973262310028076, + "ce_orig": 0.7181485295295715, + "epoch": 0.8043712704004601, + "kl_loss": 0.19763267040252686, + "loss_ib": 0.006949589122086763, + "step": 2797 + }, + { + "ce_ib": 2.3995981216430664, + "ce_orig": 0.6234301924705505, + "epoch": 0.8043712704004601, + "kl_loss": 0.17397215962409973, + "loss_ib": 0.0041393195278942585, + "step": 2797 + }, + { + "ce_ib": 2.677018165588379, + "ce_orig": 0.4222238063812256, + "epoch": 0.8043712704004601, + "kl_loss": 0.162387877702713, + "loss_ib": 0.004300897009670734, + "step": 2797 + }, + { + "ce_ib": 3.3216586112976074, + "ce_orig": 0.6974101066589355, + "epoch": 0.8046588539794378, + "kl_loss": 0.15088820457458496, + "loss_ib": 0.004830540623515844, + "step": 2798 + }, + { + "ce_ib": 3.3838982582092285, + "ce_orig": 0.7169514298439026, + "epoch": 0.8046588539794378, + "kl_loss": 0.15540283918380737, + "loss_ib": 0.004937926772981882, + "step": 2798 + }, + { + "ce_ib": 4.905030250549316, + "ce_orig": 0.44179248809814453, + "epoch": 0.8046588539794378, + "kl_loss": 0.23234084248542786, + "loss_ib": 0.00722843874245882, + "step": 2798 + }, + { + "ce_ib": 2.6288938522338867, + "ce_orig": 0.6862131953239441, + "epoch": 0.8046588539794378, + "kl_loss": 0.1321481615304947, + "loss_ib": 0.003950375132262707, + "step": 2798 + }, + { + "ce_ib": 3.934225559234619, + "ce_orig": 0.9600613117218018, + "epoch": 0.8049464375584154, + "kl_loss": 0.2248958945274353, + "loss_ib": 0.006183184217661619, + "step": 2799 + }, + { + "ce_ib": 7.438791751861572, + "ce_orig": 1.745050311088562, + "epoch": 0.8049464375584154, + "kl_loss": 0.17757470905780792, + "loss_ib": 0.00921453908085823, + "step": 2799 + }, + { + "ce_ib": 6.7007670402526855, + "ce_orig": 0.7472442388534546, + "epoch": 0.8049464375584154, + "kl_loss": 0.24857917428016663, + "loss_ib": 0.009186558425426483, + "step": 2799 + }, + { + "ce_ib": 1.3189858198165894, + "ce_orig": 0.30229729413986206, + "epoch": 0.8049464375584154, + "kl_loss": 0.4337390065193176, + "loss_ib": 0.005656375549733639, + "step": 2799 + }, + { + "epoch": 0.8052340211373931, + "grad_norm": 0.11427468061447144, + "learning_rate": 8.589983321437271e-06, + "loss": 0.8503, + "step": 2800 + }, + { + "ce_ib": 2.705871820449829, + "ce_orig": 0.5266305804252625, + "epoch": 0.8052340211373931, + "kl_loss": 0.2032681703567505, + "loss_ib": 0.004738553427159786, + "step": 2800 + }, + { + "ce_ib": 3.667393445968628, + "ce_orig": 0.9854228496551514, + "epoch": 0.8052340211373931, + "kl_loss": 0.15344777703285217, + "loss_ib": 0.005201871506869793, + "step": 2800 + }, + { + "ce_ib": 5.329960346221924, + "ce_orig": 0.3858145475387573, + "epoch": 0.8052340211373931, + "kl_loss": 0.145588681101799, + "loss_ib": 0.006785847246646881, + "step": 2800 + }, + { + "ce_ib": 3.830564498901367, + "ce_orig": 0.44024157524108887, + "epoch": 0.8052340211373931, + "kl_loss": 0.18857505917549133, + "loss_ib": 0.005716315470635891, + "step": 2800 + }, + { + "ce_ib": 2.406057119369507, + "ce_orig": 0.6884495615959167, + "epoch": 0.8055216047163707, + "kl_loss": 0.18645690381526947, + "loss_ib": 0.0042706262320280075, + "step": 2801 + }, + { + "ce_ib": 3.357063055038452, + "ce_orig": 0.666911780834198, + "epoch": 0.8055216047163707, + "kl_loss": 0.16594094038009644, + "loss_ib": 0.005016472190618515, + "step": 2801 + }, + { + "ce_ib": 3.5473363399505615, + "ce_orig": 0.5164636373519897, + "epoch": 0.8055216047163707, + "kl_loss": 0.21546733379364014, + "loss_ib": 0.00570200989022851, + "step": 2801 + }, + { + "ce_ib": 6.565066814422607, + "ce_orig": 1.294385552406311, + "epoch": 0.8055216047163707, + "kl_loss": 0.20282116532325745, + "loss_ib": 0.00859327893704176, + "step": 2801 + }, + { + "ce_ib": 4.201599597930908, + "ce_orig": 0.6373945474624634, + "epoch": 0.8058091882953483, + "kl_loss": 0.14645066857337952, + "loss_ib": 0.005666106007993221, + "step": 2802 + }, + { + "ce_ib": 3.943754196166992, + "ce_orig": 1.210932731628418, + "epoch": 0.8058091882953483, + "kl_loss": 0.183653324842453, + "loss_ib": 0.0057802870869636536, + "step": 2802 + }, + { + "ce_ib": 3.357595682144165, + "ce_orig": 0.6501697897911072, + "epoch": 0.8058091882953483, + "kl_loss": 0.2822418808937073, + "loss_ib": 0.006180014461278915, + "step": 2802 + }, + { + "ce_ib": 2.714097499847412, + "ce_orig": 0.6307512521743774, + "epoch": 0.8058091882953483, + "kl_loss": 0.11940597742795944, + "loss_ib": 0.0039081573486328125, + "step": 2802 + }, + { + "ce_ib": 5.659660816192627, + "ce_orig": 1.0749328136444092, + "epoch": 0.8060967718743259, + "kl_loss": 0.17701321840286255, + "loss_ib": 0.007429792545735836, + "step": 2803 + }, + { + "ce_ib": 4.032803058624268, + "ce_orig": 0.6299296617507935, + "epoch": 0.8060967718743259, + "kl_loss": 0.17145830392837524, + "loss_ib": 0.0057473862543702126, + "step": 2803 + }, + { + "ce_ib": 2.8550431728363037, + "ce_orig": 0.7385361194610596, + "epoch": 0.8060967718743259, + "kl_loss": 0.14410051703453064, + "loss_ib": 0.004296048078685999, + "step": 2803 + }, + { + "ce_ib": 3.4584474563598633, + "ce_orig": 0.6431363224983215, + "epoch": 0.8060967718743259, + "kl_loss": 0.20019178092479706, + "loss_ib": 0.005460365209728479, + "step": 2803 + }, + { + "ce_ib": 4.157374858856201, + "ce_orig": 0.7320928573608398, + "epoch": 0.8063843554533037, + "kl_loss": 0.23593372106552124, + "loss_ib": 0.006516711786389351, + "step": 2804 + }, + { + "ce_ib": 2.1108884811401367, + "ce_orig": 0.48985254764556885, + "epoch": 0.8063843554533037, + "kl_loss": 0.20431937277317047, + "loss_ib": 0.0041540819220244884, + "step": 2804 + }, + { + "ce_ib": 3.1367850303649902, + "ce_orig": 0.6365300416946411, + "epoch": 0.8063843554533037, + "kl_loss": 0.21324652433395386, + "loss_ib": 0.005269250366836786, + "step": 2804 + }, + { + "ce_ib": 5.759416580200195, + "ce_orig": 0.9574940204620361, + "epoch": 0.8063843554533037, + "kl_loss": 0.2229733169078827, + "loss_ib": 0.00798915047198534, + "step": 2804 + }, + { + "epoch": 0.8066719390322813, + "grad_norm": 0.13379214704036713, + "learning_rate": 8.584577085255014e-06, + "loss": 0.7422, + "step": 2805 + }, + { + "ce_ib": 2.37490177154541, + "ce_orig": 0.5711527466773987, + "epoch": 0.8066719390322813, + "kl_loss": 0.15478868782520294, + "loss_ib": 0.003922788426280022, + "step": 2805 + }, + { + "ce_ib": 4.947363376617432, + "ce_orig": 0.8613508939743042, + "epoch": 0.8066719390322813, + "kl_loss": 0.21390339732170105, + "loss_ib": 0.007086397148668766, + "step": 2805 + }, + { + "ce_ib": 3.646303415298462, + "ce_orig": 0.8767181634902954, + "epoch": 0.8066719390322813, + "kl_loss": 0.19256269931793213, + "loss_ib": 0.005571930203586817, + "step": 2805 + }, + { + "ce_ib": 3.14528489112854, + "ce_orig": 0.3270719051361084, + "epoch": 0.8066719390322813, + "kl_loss": 0.2279808670282364, + "loss_ib": 0.005425093695521355, + "step": 2805 + }, + { + "ce_ib": 2.777611255645752, + "ce_orig": 0.7800548076629639, + "epoch": 0.8069595226112589, + "kl_loss": 0.15531979501247406, + "loss_ib": 0.004330809228122234, + "step": 2806 + }, + { + "ce_ib": 5.386082649230957, + "ce_orig": 1.1029577255249023, + "epoch": 0.8069595226112589, + "kl_loss": 0.2565973103046417, + "loss_ib": 0.007952055893838406, + "step": 2806 + }, + { + "ce_ib": 4.640817165374756, + "ce_orig": 1.1013484001159668, + "epoch": 0.8069595226112589, + "kl_loss": 0.15469850599765778, + "loss_ib": 0.006187802646309137, + "step": 2806 + }, + { + "ce_ib": 3.2009220123291016, + "ce_orig": 0.5224027037620544, + "epoch": 0.8069595226112589, + "kl_loss": 0.18021519482135773, + "loss_ib": 0.005003073718398809, + "step": 2806 + }, + { + "ce_ib": 3.915433883666992, + "ce_orig": 0.8189111948013306, + "epoch": 0.8072471061902365, + "kl_loss": 0.14318901300430298, + "loss_ib": 0.0053473240695893764, + "step": 2807 + }, + { + "ce_ib": 5.580287933349609, + "ce_orig": 0.8719700574874878, + "epoch": 0.8072471061902365, + "kl_loss": 0.18730315566062927, + "loss_ib": 0.0074533200822770596, + "step": 2807 + }, + { + "ce_ib": 5.255758285522461, + "ce_orig": 1.1668004989624023, + "epoch": 0.8072471061902365, + "kl_loss": 0.19962236285209656, + "loss_ib": 0.007251981180161238, + "step": 2807 + }, + { + "ce_ib": 4.7979512214660645, + "ce_orig": 1.1983979940414429, + "epoch": 0.8072471061902365, + "kl_loss": 0.17445316910743713, + "loss_ib": 0.006542482879012823, + "step": 2807 + }, + { + "ce_ib": 4.400650978088379, + "ce_orig": 0.6877610087394714, + "epoch": 0.8075346897692142, + "kl_loss": 0.21947693824768066, + "loss_ib": 0.006595420651137829, + "step": 2808 + }, + { + "ce_ib": 2.023010015487671, + "ce_orig": 0.30413609743118286, + "epoch": 0.8075346897692142, + "kl_loss": 0.1409745216369629, + "loss_ib": 0.00343275535851717, + "step": 2808 + }, + { + "ce_ib": 3.3349180221557617, + "ce_orig": 0.7836301922798157, + "epoch": 0.8075346897692142, + "kl_loss": 0.18727228045463562, + "loss_ib": 0.005207641050219536, + "step": 2808 + }, + { + "ce_ib": 4.174093246459961, + "ce_orig": 0.9926294088363647, + "epoch": 0.8075346897692142, + "kl_loss": 0.16385450959205627, + "loss_ib": 0.0058126384392380714, + "step": 2808 + }, + { + "ce_ib": 4.957505226135254, + "ce_orig": 1.1171748638153076, + "epoch": 0.8078222733481918, + "kl_loss": 0.505752444267273, + "loss_ib": 0.010015029460191727, + "step": 2809 + }, + { + "ce_ib": 4.368190765380859, + "ce_orig": 0.704754114151001, + "epoch": 0.8078222733481918, + "kl_loss": 0.16711105406284332, + "loss_ib": 0.006039300933480263, + "step": 2809 + }, + { + "ce_ib": 4.491915702819824, + "ce_orig": 0.7848616242408752, + "epoch": 0.8078222733481918, + "kl_loss": 0.13472099602222443, + "loss_ib": 0.005839125253260136, + "step": 2809 + }, + { + "ce_ib": 5.327619552612305, + "ce_orig": 1.0921765565872192, + "epoch": 0.8078222733481918, + "kl_loss": 0.45765241980552673, + "loss_ib": 0.009904143400490284, + "step": 2809 + }, + { + "epoch": 0.8081098569271694, + "grad_norm": 0.12458102405071259, + "learning_rate": 8.579162212994563e-06, + "loss": 0.8557, + "step": 2810 + }, + { + "ce_ib": 6.635728359222412, + "ce_orig": 1.4311456680297852, + "epoch": 0.8081098569271694, + "kl_loss": 0.14241039752960205, + "loss_ib": 0.008059832267463207, + "step": 2810 + }, + { + "ce_ib": 4.22183084487915, + "ce_orig": 0.6894580721855164, + "epoch": 0.8081098569271694, + "kl_loss": 0.31671828031539917, + "loss_ib": 0.007389013655483723, + "step": 2810 + }, + { + "ce_ib": 6.408979892730713, + "ce_orig": 1.4805161952972412, + "epoch": 0.8081098569271694, + "kl_loss": 0.12917451560497284, + "loss_ib": 0.007700724992901087, + "step": 2810 + }, + { + "ce_ib": 5.151460647583008, + "ce_orig": 1.1030305624008179, + "epoch": 0.8081098569271694, + "kl_loss": 0.14889763295650482, + "loss_ib": 0.006640437059104443, + "step": 2810 + }, + { + "ce_ib": 4.164640426635742, + "ce_orig": 0.8648793697357178, + "epoch": 0.8083974405061471, + "kl_loss": 0.2870556116104126, + "loss_ib": 0.0070351967588067055, + "step": 2811 + }, + { + "ce_ib": 4.377676010131836, + "ce_orig": 0.8731876015663147, + "epoch": 0.8083974405061471, + "kl_loss": 0.1410640925168991, + "loss_ib": 0.005788316484540701, + "step": 2811 + }, + { + "ce_ib": 4.981928825378418, + "ce_orig": 0.924431324005127, + "epoch": 0.8083974405061471, + "kl_loss": 0.22364798188209534, + "loss_ib": 0.007218408398330212, + "step": 2811 + }, + { + "ce_ib": 2.7620465755462646, + "ce_orig": 0.7982394099235535, + "epoch": 0.8083974405061471, + "kl_loss": 0.17122766375541687, + "loss_ib": 0.004474323242902756, + "step": 2811 + }, + { + "ce_ib": 5.429634094238281, + "ce_orig": 1.2720057964324951, + "epoch": 0.8086850240851248, + "kl_loss": 0.129172682762146, + "loss_ib": 0.0067213610745966434, + "step": 2812 + }, + { + "ce_ib": 5.693449974060059, + "ce_orig": 1.2809714078903198, + "epoch": 0.8086850240851248, + "kl_loss": 0.23529353737831116, + "loss_ib": 0.008046384900808334, + "step": 2812 + }, + { + "ce_ib": 3.6881043910980225, + "ce_orig": 0.5456626415252686, + "epoch": 0.8086850240851248, + "kl_loss": 0.15139053761959076, + "loss_ib": 0.005202009342610836, + "step": 2812 + }, + { + "ce_ib": 4.382667541503906, + "ce_orig": 1.056902527809143, + "epoch": 0.8086850240851248, + "kl_loss": 0.15667033195495605, + "loss_ib": 0.005949370563030243, + "step": 2812 + }, + { + "ce_ib": 5.248158931732178, + "ce_orig": 1.0571036338806152, + "epoch": 0.8089726076641024, + "kl_loss": 0.22341494262218475, + "loss_ib": 0.007482308428734541, + "step": 2813 + }, + { + "ce_ib": 7.10188627243042, + "ce_orig": 1.3599414825439453, + "epoch": 0.8089726076641024, + "kl_loss": 0.22525709867477417, + "loss_ib": 0.009354457259178162, + "step": 2813 + }, + { + "ce_ib": 3.1746881008148193, + "ce_orig": 0.9168302416801453, + "epoch": 0.8089726076641024, + "kl_loss": 0.1531776338815689, + "loss_ib": 0.004706464242190123, + "step": 2813 + }, + { + "ce_ib": 3.019801139831543, + "ce_orig": 0.8773506283760071, + "epoch": 0.8089726076641024, + "kl_loss": 0.17970812320709229, + "loss_ib": 0.004816882312297821, + "step": 2813 + }, + { + "ce_ib": 4.835145950317383, + "ce_orig": 1.0368512868881226, + "epoch": 0.80926019124308, + "kl_loss": 0.2398310899734497, + "loss_ib": 0.0072334567084908485, + "step": 2814 + }, + { + "ce_ib": 5.538415431976318, + "ce_orig": 1.423850178718567, + "epoch": 0.80926019124308, + "kl_loss": 0.1790781468153, + "loss_ib": 0.00732919666916132, + "step": 2814 + }, + { + "ce_ib": 4.625051021575928, + "ce_orig": 0.9466367363929749, + "epoch": 0.80926019124308, + "kl_loss": 0.19708652794361115, + "loss_ib": 0.006595916114747524, + "step": 2814 + }, + { + "ce_ib": 2.22379732131958, + "ce_orig": 0.5998920202255249, + "epoch": 0.80926019124308, + "kl_loss": 0.18184642493724823, + "loss_ib": 0.004042261280119419, + "step": 2814 + }, + { + "epoch": 0.8095477748220576, + "grad_norm": 0.16903330385684967, + "learning_rate": 8.573738717701597e-06, + "loss": 0.9492, + "step": 2815 + }, + { + "ce_ib": 3.334712505340576, + "ce_orig": 0.6134759783744812, + "epoch": 0.8095477748220576, + "kl_loss": 0.12776976823806763, + "loss_ib": 0.004612410441040993, + "step": 2815 + }, + { + "ce_ib": 6.925368785858154, + "ce_orig": 1.4901227951049805, + "epoch": 0.8095477748220576, + "kl_loss": 0.20446811616420746, + "loss_ib": 0.008970050141215324, + "step": 2815 + }, + { + "ce_ib": 4.832937240600586, + "ce_orig": 0.87412029504776, + "epoch": 0.8095477748220576, + "kl_loss": 0.1459960639476776, + "loss_ib": 0.006292897742241621, + "step": 2815 + }, + { + "ce_ib": 8.120792388916016, + "ce_orig": 1.5464801788330078, + "epoch": 0.8095477748220576, + "kl_loss": 0.19833144545555115, + "loss_ib": 0.010104106739163399, + "step": 2815 + }, + { + "ce_ib": 9.830647468566895, + "ce_orig": 2.1820504665374756, + "epoch": 0.8098353584010353, + "kl_loss": 0.16470742225646973, + "loss_ib": 0.011477721855044365, + "step": 2816 + }, + { + "ce_ib": 4.770041465759277, + "ce_orig": 0.7583075761795044, + "epoch": 0.8098353584010353, + "kl_loss": 0.19950591027736664, + "loss_ib": 0.006765100173652172, + "step": 2816 + }, + { + "ce_ib": 5.410827159881592, + "ce_orig": 1.094847559928894, + "epoch": 0.8098353584010353, + "kl_loss": 0.1976110339164734, + "loss_ib": 0.007386937737464905, + "step": 2816 + }, + { + "ce_ib": 2.9771618843078613, + "ce_orig": 0.5368821620941162, + "epoch": 0.8098353584010353, + "kl_loss": 0.2207041084766388, + "loss_ib": 0.005184203386306763, + "step": 2816 + }, + { + "ce_ib": 2.824002981185913, + "ce_orig": 0.48365992307662964, + "epoch": 0.8101229419800129, + "kl_loss": 0.24350310862064362, + "loss_ib": 0.0052590337581932545, + "step": 2817 + }, + { + "ce_ib": 5.052495002746582, + "ce_orig": 1.0713677406311035, + "epoch": 0.8101229419800129, + "kl_loss": 0.2519412934780121, + "loss_ib": 0.007571908179670572, + "step": 2817 + }, + { + "ce_ib": 4.451130390167236, + "ce_orig": 0.5203421115875244, + "epoch": 0.8101229419800129, + "kl_loss": 0.18035238981246948, + "loss_ib": 0.006254653912037611, + "step": 2817 + }, + { + "ce_ib": 5.46199369430542, + "ce_orig": 1.0179420709609985, + "epoch": 0.8101229419800129, + "kl_loss": 0.24266262352466583, + "loss_ib": 0.007888619787991047, + "step": 2817 + }, + { + "ce_ib": 4.916067600250244, + "ce_orig": 1.2941205501556396, + "epoch": 0.8104105255589906, + "kl_loss": 0.14624658226966858, + "loss_ib": 0.006378533784300089, + "step": 2818 + }, + { + "ce_ib": 5.445988178253174, + "ce_orig": 1.0503796339035034, + "epoch": 0.8104105255589906, + "kl_loss": 0.23406225442886353, + "loss_ib": 0.007786610629409552, + "step": 2818 + }, + { + "ce_ib": 3.0745108127593994, + "ce_orig": 0.7845838069915771, + "epoch": 0.8104105255589906, + "kl_loss": 0.15637946128845215, + "loss_ib": 0.004638305399566889, + "step": 2818 + }, + { + "ce_ib": 1.6301770210266113, + "ce_orig": 0.4679369628429413, + "epoch": 0.8104105255589906, + "kl_loss": 0.09047284722328186, + "loss_ib": 0.00253490568138659, + "step": 2818 + }, + { + "ce_ib": 2.7442147731781006, + "ce_orig": 0.9432048797607422, + "epoch": 0.8106981091379682, + "kl_loss": 0.14241287112236023, + "loss_ib": 0.004168343264609575, + "step": 2819 + }, + { + "ce_ib": 3.174682378768921, + "ce_orig": 0.4576375186443329, + "epoch": 0.8106981091379682, + "kl_loss": 0.1850023865699768, + "loss_ib": 0.0050247060135006905, + "step": 2819 + }, + { + "ce_ib": 4.269175052642822, + "ce_orig": 0.6284765601158142, + "epoch": 0.8106981091379682, + "kl_loss": 0.2437506914138794, + "loss_ib": 0.006706682499498129, + "step": 2819 + }, + { + "ce_ib": 5.863152980804443, + "ce_orig": 1.2594232559204102, + "epoch": 0.8106981091379682, + "kl_loss": 0.17471331357955933, + "loss_ib": 0.007610286120325327, + "step": 2819 + }, + { + "epoch": 0.8109856927169459, + "grad_norm": 0.15425726771354675, + "learning_rate": 8.568306612442579e-06, + "loss": 0.851, + "step": 2820 + }, + { + "ce_ib": 6.788617134094238, + "ce_orig": 1.2635438442230225, + "epoch": 0.8109856927169459, + "kl_loss": 0.27282410860061646, + "loss_ib": 0.009516858495771885, + "step": 2820 + }, + { + "ce_ib": 3.9059863090515137, + "ce_orig": 0.7710947394371033, + "epoch": 0.8109856927169459, + "kl_loss": 0.26100146770477295, + "loss_ib": 0.006516001187264919, + "step": 2820 + }, + { + "ce_ib": 3.0546553134918213, + "ce_orig": 0.3682522177696228, + "epoch": 0.8109856927169459, + "kl_loss": 0.23592615127563477, + "loss_ib": 0.005413917358964682, + "step": 2820 + }, + { + "ce_ib": 3.022343158721924, + "ce_orig": 0.6393887400627136, + "epoch": 0.8109856927169459, + "kl_loss": 0.20133253931999207, + "loss_ib": 0.005035668611526489, + "step": 2820 + }, + { + "ce_ib": 4.075399875640869, + "ce_orig": 1.1181811094284058, + "epoch": 0.8112732762959235, + "kl_loss": 0.2347995936870575, + "loss_ib": 0.0064233955927193165, + "step": 2821 + }, + { + "ce_ib": 2.4053831100463867, + "ce_orig": 0.6245460510253906, + "epoch": 0.8112732762959235, + "kl_loss": 0.13468626141548157, + "loss_ib": 0.0037522458005696535, + "step": 2821 + }, + { + "ce_ib": 3.2733516693115234, + "ce_orig": 0.7988634705543518, + "epoch": 0.8112732762959235, + "kl_loss": 0.18906927108764648, + "loss_ib": 0.005164044443517923, + "step": 2821 + }, + { + "ce_ib": 4.329441547393799, + "ce_orig": 0.6481775641441345, + "epoch": 0.8112732762959235, + "kl_loss": 0.16387635469436646, + "loss_ib": 0.005968204699456692, + "step": 2821 + }, + { + "ce_ib": 3.09920334815979, + "ce_orig": 0.4815601408481598, + "epoch": 0.8115608598749011, + "kl_loss": 0.23736260831356049, + "loss_ib": 0.005472829099744558, + "step": 2822 + }, + { + "ce_ib": 4.2055983543396, + "ce_orig": 0.8898127675056458, + "epoch": 0.8115608598749011, + "kl_loss": 0.1176103875041008, + "loss_ib": 0.005381701979786158, + "step": 2822 + }, + { + "ce_ib": 3.9223551750183105, + "ce_orig": 0.7810664176940918, + "epoch": 0.8115608598749011, + "kl_loss": 0.20519232749938965, + "loss_ib": 0.005974278319627047, + "step": 2822 + }, + { + "ce_ib": 6.134365081787109, + "ce_orig": 1.1662322282791138, + "epoch": 0.8115608598749011, + "kl_loss": 0.11553213000297546, + "loss_ib": 0.007289686240255833, + "step": 2822 + }, + { + "ce_ib": 1.6252281665802002, + "ce_orig": 0.2407359927892685, + "epoch": 0.8118484434538787, + "kl_loss": 0.1682782769203186, + "loss_ib": 0.0033080109860748053, + "step": 2823 + }, + { + "ce_ib": 3.9646918773651123, + "ce_orig": 0.6208115816116333, + "epoch": 0.8118484434538787, + "kl_loss": 0.243473082780838, + "loss_ib": 0.006399422883987427, + "step": 2823 + }, + { + "ce_ib": 3.300515651702881, + "ce_orig": 0.7768258452415466, + "epoch": 0.8118484434538787, + "kl_loss": 0.2973312437534332, + "loss_ib": 0.0062738279812037945, + "step": 2823 + }, + { + "ce_ib": 5.570272445678711, + "ce_orig": 1.3416250944137573, + "epoch": 0.8118484434538787, + "kl_loss": 0.1277160346508026, + "loss_ib": 0.006847432814538479, + "step": 2823 + }, + { + "ce_ib": 2.212543249130249, + "ce_orig": 0.4162386655807495, + "epoch": 0.8121360270328565, + "kl_loss": 0.3607606887817383, + "loss_ib": 0.005820150021463633, + "step": 2824 + }, + { + "ce_ib": 4.168985843658447, + "ce_orig": 0.753040611743927, + "epoch": 0.8121360270328565, + "kl_loss": 0.32606783509254456, + "loss_ib": 0.007429664023220539, + "step": 2824 + }, + { + "ce_ib": 5.677418231964111, + "ce_orig": 0.6049272418022156, + "epoch": 0.8121360270328565, + "kl_loss": 0.1760782152414322, + "loss_ib": 0.007438200060278177, + "step": 2824 + }, + { + "ce_ib": 3.4200432300567627, + "ce_orig": 0.521856427192688, + "epoch": 0.8121360270328565, + "kl_loss": 0.5339518785476685, + "loss_ib": 0.00875956192612648, + "step": 2824 + }, + { + "epoch": 0.8124236106118341, + "grad_norm": 0.13098736107349396, + "learning_rate": 8.562865910304709e-06, + "loss": 0.8104, + "step": 2825 + }, + { + "ce_ib": 3.800471067428589, + "ce_orig": 0.846940279006958, + "epoch": 0.8124236106118341, + "kl_loss": 0.17787261307239532, + "loss_ib": 0.005579197313636541, + "step": 2825 + }, + { + "ce_ib": 4.9278059005737305, + "ce_orig": 0.9474262595176697, + "epoch": 0.8124236106118341, + "kl_loss": 0.23009270429611206, + "loss_ib": 0.007228733040392399, + "step": 2825 + }, + { + "ce_ib": 3.5791468620300293, + "ce_orig": 0.8838215470314026, + "epoch": 0.8124236106118341, + "kl_loss": 0.14385095238685608, + "loss_ib": 0.005017655901610851, + "step": 2825 + }, + { + "ce_ib": 7.1811652183532715, + "ce_orig": 1.7461858987808228, + "epoch": 0.8124236106118341, + "kl_loss": 0.26902881264686584, + "loss_ib": 0.009871453046798706, + "step": 2825 + }, + { + "ce_ib": 4.847505569458008, + "ce_orig": 1.148413062095642, + "epoch": 0.8127111941908117, + "kl_loss": 0.21338492631912231, + "loss_ib": 0.006981354672461748, + "step": 2826 + }, + { + "ce_ib": 4.31863260269165, + "ce_orig": 1.0397228002548218, + "epoch": 0.8127111941908117, + "kl_loss": 0.20409339666366577, + "loss_ib": 0.00635956646874547, + "step": 2826 + }, + { + "ce_ib": 7.493680477142334, + "ce_orig": 1.0952584743499756, + "epoch": 0.8127111941908117, + "kl_loss": 0.19997602701187134, + "loss_ib": 0.009493440389633179, + "step": 2826 + }, + { + "ce_ib": 3.809511661529541, + "ce_orig": 0.8474019765853882, + "epoch": 0.8127111941908117, + "kl_loss": 0.1323765218257904, + "loss_ib": 0.005133277270942926, + "step": 2826 + }, + { + "ce_ib": 4.267281532287598, + "ce_orig": 0.8097968697547913, + "epoch": 0.8129987777697893, + "kl_loss": 0.23593609035015106, + "loss_ib": 0.006626642309129238, + "step": 2827 + }, + { + "ce_ib": 3.5344808101654053, + "ce_orig": 0.9996666312217712, + "epoch": 0.8129987777697893, + "kl_loss": 0.12655311822891235, + "loss_ib": 0.004800011869519949, + "step": 2827 + }, + { + "ce_ib": 2.484947443008423, + "ce_orig": 0.38326403498649597, + "epoch": 0.8129987777697893, + "kl_loss": 0.20319604873657227, + "loss_ib": 0.0045169079676270485, + "step": 2827 + }, + { + "ce_ib": 2.8379597663879395, + "ce_orig": 0.6204448938369751, + "epoch": 0.8129987777697893, + "kl_loss": 0.23185965418815613, + "loss_ib": 0.005156556144356728, + "step": 2827 + }, + { + "ce_ib": 2.486659288406372, + "ce_orig": 0.32783243060112, + "epoch": 0.813286361348767, + "kl_loss": 0.2391996532678604, + "loss_ib": 0.004878655541688204, + "step": 2828 + }, + { + "ce_ib": 3.3051137924194336, + "ce_orig": 0.7440186738967896, + "epoch": 0.813286361348767, + "kl_loss": 0.163787379860878, + "loss_ib": 0.004942987579852343, + "step": 2828 + }, + { + "ce_ib": 3.5936367511749268, + "ce_orig": 0.7862833738327026, + "epoch": 0.813286361348767, + "kl_loss": 0.19060471653938293, + "loss_ib": 0.005499683320522308, + "step": 2828 + }, + { + "ce_ib": 3.994291305541992, + "ce_orig": 0.7201908230781555, + "epoch": 0.813286361348767, + "kl_loss": 0.2092348039150238, + "loss_ib": 0.006086639128625393, + "step": 2828 + }, + { + "ce_ib": 3.9633843898773193, + "ce_orig": 0.7429069876670837, + "epoch": 0.8135739449277446, + "kl_loss": 0.2630009055137634, + "loss_ib": 0.006593393161892891, + "step": 2829 + }, + { + "ce_ib": 5.242956638336182, + "ce_orig": 0.8232580423355103, + "epoch": 0.8135739449277446, + "kl_loss": 0.27004361152648926, + "loss_ib": 0.007943392731249332, + "step": 2829 + }, + { + "ce_ib": 6.151187419891357, + "ce_orig": 1.4016473293304443, + "epoch": 0.8135739449277446, + "kl_loss": 0.2851313352584839, + "loss_ib": 0.009002500213682652, + "step": 2829 + }, + { + "ce_ib": 4.095247268676758, + "ce_orig": 0.8852970600128174, + "epoch": 0.8135739449277446, + "kl_loss": 0.19033268094062805, + "loss_ib": 0.005998574197292328, + "step": 2829 + }, + { + "epoch": 0.8138615285067222, + "grad_norm": 0.1376420110464096, + "learning_rate": 8.557416624395901e-06, + "loss": 0.9082, + "step": 2830 + }, + { + "ce_ib": 4.390120983123779, + "ce_orig": 0.78986656665802, + "epoch": 0.8138615285067222, + "kl_loss": 0.26521235704421997, + "loss_ib": 0.007042244542390108, + "step": 2830 + }, + { + "ce_ib": 6.817259788513184, + "ce_orig": 1.3516018390655518, + "epoch": 0.8138615285067222, + "kl_loss": 0.19069942831993103, + "loss_ib": 0.008724254556000233, + "step": 2830 + }, + { + "ce_ib": 2.3844285011291504, + "ce_orig": 0.687908411026001, + "epoch": 0.8138615285067222, + "kl_loss": 0.1946626603603363, + "loss_ib": 0.004331055097281933, + "step": 2830 + }, + { + "ce_ib": 4.269682884216309, + "ce_orig": 0.9854820966720581, + "epoch": 0.8138615285067222, + "kl_loss": 0.2006697654724121, + "loss_ib": 0.0062763807363808155, + "step": 2830 + }, + { + "ce_ib": 2.126732349395752, + "ce_orig": 0.5603146553039551, + "epoch": 0.8141491120857, + "kl_loss": 0.15848079323768616, + "loss_ib": 0.0037115400191396475, + "step": 2831 + }, + { + "ce_ib": 5.555351734161377, + "ce_orig": 1.392636775970459, + "epoch": 0.8141491120857, + "kl_loss": 0.14505447447299957, + "loss_ib": 0.007005896419286728, + "step": 2831 + }, + { + "ce_ib": 3.1060070991516113, + "ce_orig": 0.7390784621238708, + "epoch": 0.8141491120857, + "kl_loss": 0.27595746517181396, + "loss_ib": 0.005865582264959812, + "step": 2831 + }, + { + "ce_ib": 3.791517496109009, + "ce_orig": 0.7295724153518677, + "epoch": 0.8141491120857, + "kl_loss": 0.22407452762126923, + "loss_ib": 0.006032262928783894, + "step": 2831 + }, + { + "ce_ib": 3.402796506881714, + "ce_orig": 0.7584720849990845, + "epoch": 0.8144366956646776, + "kl_loss": 0.31253477931022644, + "loss_ib": 0.006528144236654043, + "step": 2832 + }, + { + "ce_ib": 4.4147443771362305, + "ce_orig": 0.46959227323532104, + "epoch": 0.8144366956646776, + "kl_loss": 0.20867693424224854, + "loss_ib": 0.006501513533294201, + "step": 2832 + }, + { + "ce_ib": 6.229449272155762, + "ce_orig": 0.5778273940086365, + "epoch": 0.8144366956646776, + "kl_loss": 0.16594219207763672, + "loss_ib": 0.007888871245086193, + "step": 2832 + }, + { + "ce_ib": 4.943928241729736, + "ce_orig": 0.750137448310852, + "epoch": 0.8144366956646776, + "kl_loss": 0.2245766967535019, + "loss_ib": 0.007189695257693529, + "step": 2832 + }, + { + "ce_ib": 5.440518379211426, + "ce_orig": 0.37751418352127075, + "epoch": 0.8147242792436552, + "kl_loss": 0.33012381196022034, + "loss_ib": 0.008741756901144981, + "step": 2833 + }, + { + "ce_ib": 3.30912184715271, + "ce_orig": 0.7351868748664856, + "epoch": 0.8147242792436552, + "kl_loss": 0.28515249490737915, + "loss_ib": 0.006160646677017212, + "step": 2833 + }, + { + "ce_ib": 4.992186069488525, + "ce_orig": 0.7784414887428284, + "epoch": 0.8147242792436552, + "kl_loss": 0.263955682516098, + "loss_ib": 0.007631742861121893, + "step": 2833 + }, + { + "ce_ib": 7.165378570556641, + "ce_orig": 0.9597838521003723, + "epoch": 0.8147242792436552, + "kl_loss": 0.16148656606674194, + "loss_ib": 0.00878024473786354, + "step": 2833 + }, + { + "ce_ib": 4.596874713897705, + "ce_orig": 0.9479933381080627, + "epoch": 0.8150118628226328, + "kl_loss": 0.13651429116725922, + "loss_ib": 0.005962017457932234, + "step": 2834 + }, + { + "ce_ib": 2.764472484588623, + "ce_orig": 0.498548686504364, + "epoch": 0.8150118628226328, + "kl_loss": 0.1640867292881012, + "loss_ib": 0.0044053397141397, + "step": 2834 + }, + { + "ce_ib": 2.910493850708008, + "ce_orig": 0.7079524397850037, + "epoch": 0.8150118628226328, + "kl_loss": 0.1698717474937439, + "loss_ib": 0.004609211347997189, + "step": 2834 + }, + { + "ce_ib": 4.50593900680542, + "ce_orig": 0.728657603263855, + "epoch": 0.8150118628226328, + "kl_loss": 0.12762397527694702, + "loss_ib": 0.005782178603112698, + "step": 2834 + }, + { + "epoch": 0.8152994464016105, + "grad_norm": 0.14263704419136047, + "learning_rate": 8.551958767844752e-06, + "loss": 0.866, + "step": 2835 + }, + { + "ce_ib": 4.128542900085449, + "ce_orig": 1.1715339422225952, + "epoch": 0.8152994464016105, + "kl_loss": 0.29623574018478394, + "loss_ib": 0.0070908996276557446, + "step": 2835 + }, + { + "ce_ib": 6.016716003417969, + "ce_orig": 1.2604360580444336, + "epoch": 0.8152994464016105, + "kl_loss": 0.19429010152816772, + "loss_ib": 0.007959617301821709, + "step": 2835 + }, + { + "ce_ib": 5.392943382263184, + "ce_orig": 1.215772032737732, + "epoch": 0.8152994464016105, + "kl_loss": 0.15653465688228607, + "loss_ib": 0.0069582900032401085, + "step": 2835 + }, + { + "ce_ib": 4.160385608673096, + "ce_orig": 0.6095132827758789, + "epoch": 0.8152994464016105, + "kl_loss": 0.19494789838790894, + "loss_ib": 0.006109864450991154, + "step": 2835 + }, + { + "ce_ib": 2.456991195678711, + "ce_orig": 0.6359385848045349, + "epoch": 0.8155870299805881, + "kl_loss": 0.12176677584648132, + "loss_ib": 0.0036746589466929436, + "step": 2836 + }, + { + "ce_ib": 3.396928071975708, + "ce_orig": 0.708736002445221, + "epoch": 0.8155870299805881, + "kl_loss": 0.18704214692115784, + "loss_ib": 0.005267349537461996, + "step": 2836 + }, + { + "ce_ib": 4.349599361419678, + "ce_orig": 0.7039024829864502, + "epoch": 0.8155870299805881, + "kl_loss": 0.21036848425865173, + "loss_ib": 0.006453284062445164, + "step": 2836 + }, + { + "ce_ib": 5.113347053527832, + "ce_orig": 1.113311529159546, + "epoch": 0.8155870299805881, + "kl_loss": 0.21657243371009827, + "loss_ib": 0.007279071491211653, + "step": 2836 + }, + { + "ce_ib": 7.241262435913086, + "ce_orig": 1.8005057573318481, + "epoch": 0.8158746135595657, + "kl_loss": 0.219766765832901, + "loss_ib": 0.009438930079340935, + "step": 2837 + }, + { + "ce_ib": 4.65227746963501, + "ce_orig": 0.7348995208740234, + "epoch": 0.8158746135595657, + "kl_loss": 0.16257837414741516, + "loss_ib": 0.006278061307966709, + "step": 2837 + }, + { + "ce_ib": 2.817396640777588, + "ce_orig": 0.5756980180740356, + "epoch": 0.8158746135595657, + "kl_loss": 0.1704842448234558, + "loss_ib": 0.004522239323705435, + "step": 2837 + }, + { + "ce_ib": 3.054978132247925, + "ce_orig": 0.7806615233421326, + "epoch": 0.8158746135595657, + "kl_loss": 0.25649887323379517, + "loss_ib": 0.0056199668906629086, + "step": 2837 + }, + { + "ce_ib": 3.733205795288086, + "ce_orig": 0.7331455945968628, + "epoch": 0.8161621971385434, + "kl_loss": 0.24562786519527435, + "loss_ib": 0.006189484614878893, + "step": 2838 + }, + { + "ce_ib": 5.999746322631836, + "ce_orig": 0.9371368885040283, + "epoch": 0.8161621971385434, + "kl_loss": 0.23596028983592987, + "loss_ib": 0.008359349332749844, + "step": 2838 + }, + { + "ce_ib": 2.947451591491699, + "ce_orig": 0.6083135008811951, + "epoch": 0.8161621971385434, + "kl_loss": 0.14301379024982452, + "loss_ib": 0.00437758956104517, + "step": 2838 + }, + { + "ce_ib": 3.337872266769409, + "ce_orig": 0.6912399530410767, + "epoch": 0.8161621971385434, + "kl_loss": 0.15199977159500122, + "loss_ib": 0.004857869818806648, + "step": 2838 + }, + { + "ce_ib": 3.5584962368011475, + "ce_orig": 0.5141441226005554, + "epoch": 0.8164497807175211, + "kl_loss": 0.27234795689582825, + "loss_ib": 0.006281975191086531, + "step": 2839 + }, + { + "ce_ib": 3.646437168121338, + "ce_orig": 0.8237012624740601, + "epoch": 0.8164497807175211, + "kl_loss": 0.2885761857032776, + "loss_ib": 0.006532199215143919, + "step": 2839 + }, + { + "ce_ib": 3.559065103530884, + "ce_orig": 0.692865788936615, + "epoch": 0.8164497807175211, + "kl_loss": 0.12891432642936707, + "loss_ib": 0.004848208278417587, + "step": 2839 + }, + { + "ce_ib": 3.0342888832092285, + "ce_orig": 0.7529148459434509, + "epoch": 0.8164497807175211, + "kl_loss": 0.1057397872209549, + "loss_ib": 0.0040916865691542625, + "step": 2839 + }, + { + "epoch": 0.8167373642964987, + "grad_norm": 0.1326970010995865, + "learning_rate": 8.546492353800504e-06, + "loss": 0.7945, + "step": 2840 + }, + { + "ce_ib": 3.4266812801361084, + "ce_orig": 1.0130959749221802, + "epoch": 0.8167373642964987, + "kl_loss": 0.1586403250694275, + "loss_ib": 0.005013084504753351, + "step": 2840 + }, + { + "ce_ib": 3.8006293773651123, + "ce_orig": 0.8026612401008606, + "epoch": 0.8167373642964987, + "kl_loss": 0.17246824502944946, + "loss_ib": 0.005525311920791864, + "step": 2840 + }, + { + "ce_ib": 2.211536407470703, + "ce_orig": 0.5483260154724121, + "epoch": 0.8167373642964987, + "kl_loss": 0.16693736612796783, + "loss_ib": 0.0038809101097285748, + "step": 2840 + }, + { + "ce_ib": 4.326406002044678, + "ce_orig": 1.0342612266540527, + "epoch": 0.8167373642964987, + "kl_loss": 0.14114832878112793, + "loss_ib": 0.005737889092415571, + "step": 2840 + }, + { + "ce_ib": 3.84999418258667, + "ce_orig": 0.9693388342857361, + "epoch": 0.8170249478754763, + "kl_loss": 0.22577735781669617, + "loss_ib": 0.006107767578214407, + "step": 2841 + }, + { + "ce_ib": 5.461817264556885, + "ce_orig": 0.9861846566200256, + "epoch": 0.8170249478754763, + "kl_loss": 0.12871147692203522, + "loss_ib": 0.00674893194809556, + "step": 2841 + }, + { + "ce_ib": 5.103665828704834, + "ce_orig": 0.9926477074623108, + "epoch": 0.8170249478754763, + "kl_loss": 0.19555670022964478, + "loss_ib": 0.007059233263134956, + "step": 2841 + }, + { + "ce_ib": 4.3282856941223145, + "ce_orig": 0.7030021548271179, + "epoch": 0.8170249478754763, + "kl_loss": 0.17224840819835663, + "loss_ib": 0.006050769705325365, + "step": 2841 + }, + { + "ce_ib": 4.43719482421875, + "ce_orig": 0.8141950964927673, + "epoch": 0.8173125314544539, + "kl_loss": 0.23830828070640564, + "loss_ib": 0.006820277310907841, + "step": 2842 + }, + { + "ce_ib": 6.6146063804626465, + "ce_orig": 1.0631023645401, + "epoch": 0.8173125314544539, + "kl_loss": 0.20128114521503448, + "loss_ib": 0.008627417497336864, + "step": 2842 + }, + { + "ce_ib": 4.293017864227295, + "ce_orig": 1.1018860340118408, + "epoch": 0.8173125314544539, + "kl_loss": 0.22862300276756287, + "loss_ib": 0.006579247768968344, + "step": 2842 + }, + { + "ce_ib": 2.8993287086486816, + "ce_orig": 0.5880809426307678, + "epoch": 0.8173125314544539, + "kl_loss": 0.2704673111438751, + "loss_ib": 0.005604001693427563, + "step": 2842 + }, + { + "ce_ib": 4.875209331512451, + "ce_orig": 0.896125316619873, + "epoch": 0.8176001150334316, + "kl_loss": 0.23391586542129517, + "loss_ib": 0.007214367855340242, + "step": 2843 + }, + { + "ce_ib": 4.112821578979492, + "ce_orig": 1.1720503568649292, + "epoch": 0.8176001150334316, + "kl_loss": 0.18853890895843506, + "loss_ib": 0.005998210981488228, + "step": 2843 + }, + { + "ce_ib": 3.685950994491577, + "ce_orig": 0.8709239363670349, + "epoch": 0.8176001150334316, + "kl_loss": 0.14548154175281525, + "loss_ib": 0.005140766501426697, + "step": 2843 + }, + { + "ce_ib": 1.854185938835144, + "ce_orig": 0.5507779121398926, + "epoch": 0.8176001150334316, + "kl_loss": 0.09584920108318329, + "loss_ib": 0.002812677761539817, + "step": 2843 + }, + { + "ce_ib": 2.404982566833496, + "ce_orig": 0.4656935930252075, + "epoch": 0.8178876986124093, + "kl_loss": 0.23761698603630066, + "loss_ib": 0.0047811525873839855, + "step": 2844 + }, + { + "ce_ib": 4.358006954193115, + "ce_orig": 1.1140120029449463, + "epoch": 0.8178876986124093, + "kl_loss": 0.20427867770195007, + "loss_ib": 0.006400793790817261, + "step": 2844 + }, + { + "ce_ib": 3.125774621963501, + "ce_orig": 0.44086161255836487, + "epoch": 0.8178876986124093, + "kl_loss": 0.1800767034292221, + "loss_ib": 0.004926541354507208, + "step": 2844 + }, + { + "ce_ib": 2.7394967079162598, + "ce_orig": 0.7649036049842834, + "epoch": 0.8178876986124093, + "kl_loss": 0.17163550853729248, + "loss_ib": 0.00445585185661912, + "step": 2844 + }, + { + "epoch": 0.8181752821913869, + "grad_norm": 0.15440942347049713, + "learning_rate": 8.541017395433018e-06, + "loss": 0.8715, + "step": 2845 + }, + { + "ce_ib": 5.967456340789795, + "ce_orig": 1.224817156791687, + "epoch": 0.8181752821913869, + "kl_loss": 0.2096702754497528, + "loss_ib": 0.00806415919214487, + "step": 2845 + }, + { + "ce_ib": 4.876577377319336, + "ce_orig": 0.9866447448730469, + "epoch": 0.8181752821913869, + "kl_loss": 0.2013949751853943, + "loss_ib": 0.006890526972711086, + "step": 2845 + }, + { + "ce_ib": 3.1578376293182373, + "ce_orig": 0.661472499370575, + "epoch": 0.8181752821913869, + "kl_loss": 0.12272630631923676, + "loss_ib": 0.004385100677609444, + "step": 2845 + }, + { + "ce_ib": 4.076501846313477, + "ce_orig": 0.7466546297073364, + "epoch": 0.8181752821913869, + "kl_loss": 0.17658758163452148, + "loss_ib": 0.00584237789735198, + "step": 2845 + }, + { + "ce_ib": 5.18546199798584, + "ce_orig": 0.9121829271316528, + "epoch": 0.8184628657703645, + "kl_loss": 0.1519068032503128, + "loss_ib": 0.006704529747366905, + "step": 2846 + }, + { + "ce_ib": 6.174019813537598, + "ce_orig": 1.5258045196533203, + "epoch": 0.8184628657703645, + "kl_loss": 0.2035447508096695, + "loss_ib": 0.008209466934204102, + "step": 2846 + }, + { + "ce_ib": 8.079944610595703, + "ce_orig": 1.5995420217514038, + "epoch": 0.8184628657703645, + "kl_loss": 0.16209664940834045, + "loss_ib": 0.009700911119580269, + "step": 2846 + }, + { + "ce_ib": 3.6722934246063232, + "ce_orig": 0.8055585026741028, + "epoch": 0.8184628657703645, + "kl_loss": 0.21619875729084015, + "loss_ib": 0.005834280513226986, + "step": 2846 + }, + { + "ce_ib": 6.393935680389404, + "ce_orig": 1.3129714727401733, + "epoch": 0.8187504493493422, + "kl_loss": 0.12182891368865967, + "loss_ib": 0.007612224668264389, + "step": 2847 + }, + { + "ce_ib": 1.5194599628448486, + "ce_orig": 0.42877620458602905, + "epoch": 0.8187504493493422, + "kl_loss": 0.11159531772136688, + "loss_ib": 0.002635413082316518, + "step": 2847 + }, + { + "ce_ib": 2.6059069633483887, + "ce_orig": 0.6444127559661865, + "epoch": 0.8187504493493422, + "kl_loss": 0.15298683941364288, + "loss_ib": 0.004135775379836559, + "step": 2847 + }, + { + "ce_ib": 2.698728084564209, + "ce_orig": 0.8229141235351562, + "epoch": 0.8187504493493422, + "kl_loss": 0.1506408303976059, + "loss_ib": 0.004205136094242334, + "step": 2847 + }, + { + "ce_ib": 1.9747765064239502, + "ce_orig": 0.2866949737071991, + "epoch": 0.8190380329283198, + "kl_loss": 0.1769970804452896, + "loss_ib": 0.003744747256860137, + "step": 2848 + }, + { + "ce_ib": 3.034151077270508, + "ce_orig": 0.5237607359886169, + "epoch": 0.8190380329283198, + "kl_loss": 0.21641966700553894, + "loss_ib": 0.005198347382247448, + "step": 2848 + }, + { + "ce_ib": 5.405912399291992, + "ce_orig": 1.0065901279449463, + "epoch": 0.8190380329283198, + "kl_loss": 0.25831490755081177, + "loss_ib": 0.007989061065018177, + "step": 2848 + }, + { + "ce_ib": 6.146017074584961, + "ce_orig": 0.8521340489387512, + "epoch": 0.8190380329283198, + "kl_loss": 0.1837710738182068, + "loss_ib": 0.007983728311955929, + "step": 2848 + }, + { + "ce_ib": 7.964849472045898, + "ce_orig": 1.5647331476211548, + "epoch": 0.8193256165072974, + "kl_loss": 0.12952587008476257, + "loss_ib": 0.009260108694434166, + "step": 2849 + }, + { + "ce_ib": 3.968723773956299, + "ce_orig": 0.6262959837913513, + "epoch": 0.8193256165072974, + "kl_loss": 0.15752112865447998, + "loss_ib": 0.005543935112655163, + "step": 2849 + }, + { + "ce_ib": 3.751953125, + "ce_orig": 0.797330379486084, + "epoch": 0.8193256165072974, + "kl_loss": 0.11296423524618149, + "loss_ib": 0.004881595727056265, + "step": 2849 + }, + { + "ce_ib": 3.4408726692199707, + "ce_orig": 0.7306565642356873, + "epoch": 0.8193256165072974, + "kl_loss": 0.18291951715946198, + "loss_ib": 0.005270068068057299, + "step": 2849 + }, + { + "epoch": 0.819613200086275, + "grad_norm": 0.1521758735179901, + "learning_rate": 8.535533905932739e-06, + "loss": 0.8569, + "step": 2850 + }, + { + "ce_ib": 4.685534477233887, + "ce_orig": 0.6803269982337952, + "epoch": 0.819613200086275, + "kl_loss": 0.1946541965007782, + "loss_ib": 0.0066320765763521194, + "step": 2850 + }, + { + "ce_ib": 7.041268348693848, + "ce_orig": 1.4169713258743286, + "epoch": 0.819613200086275, + "kl_loss": 0.148095041513443, + "loss_ib": 0.008522218093276024, + "step": 2850 + }, + { + "ce_ib": 3.532743453979492, + "ce_orig": 0.8797569274902344, + "epoch": 0.819613200086275, + "kl_loss": 0.16309787333011627, + "loss_ib": 0.005163721740245819, + "step": 2850 + }, + { + "ce_ib": 2.8769690990448, + "ce_orig": 0.8015829920768738, + "epoch": 0.819613200086275, + "kl_loss": 0.20922717452049255, + "loss_ib": 0.004969241097569466, + "step": 2850 + }, + { + "ce_ib": 3.885328769683838, + "ce_orig": 0.7685953378677368, + "epoch": 0.8199007836652528, + "kl_loss": 0.2542825937271118, + "loss_ib": 0.006428154651075602, + "step": 2851 + }, + { + "ce_ib": 2.7301218509674072, + "ce_orig": 0.6045214533805847, + "epoch": 0.8199007836652528, + "kl_loss": 0.32760006189346313, + "loss_ib": 0.006006122566759586, + "step": 2851 + }, + { + "ce_ib": 2.7168455123901367, + "ce_orig": 0.7245138883590698, + "epoch": 0.8199007836652528, + "kl_loss": 0.17292635142803192, + "loss_ib": 0.00444610882550478, + "step": 2851 + }, + { + "ce_ib": 4.157426834106445, + "ce_orig": 0.8168440461158752, + "epoch": 0.8199007836652528, + "kl_loss": 0.25775986909866333, + "loss_ib": 0.006735025439411402, + "step": 2851 + }, + { + "ce_ib": 3.4581665992736816, + "ce_orig": 0.6491184830665588, + "epoch": 0.8201883672442304, + "kl_loss": 0.1731381118297577, + "loss_ib": 0.005189547315239906, + "step": 2852 + }, + { + "ce_ib": 5.383091449737549, + "ce_orig": 0.9468398690223694, + "epoch": 0.8201883672442304, + "kl_loss": 0.13185985386371613, + "loss_ib": 0.006701689679175615, + "step": 2852 + }, + { + "ce_ib": 4.308228969573975, + "ce_orig": 0.9642396569252014, + "epoch": 0.8201883672442304, + "kl_loss": 0.15657532215118408, + "loss_ib": 0.0058739823289215565, + "step": 2852 + }, + { + "ce_ib": 3.2275044918060303, + "ce_orig": 0.6428219676017761, + "epoch": 0.8201883672442304, + "kl_loss": 0.21205663681030273, + "loss_ib": 0.005348070524632931, + "step": 2852 + }, + { + "ce_ib": 3.990896701812744, + "ce_orig": 0.8158345818519592, + "epoch": 0.820475950823208, + "kl_loss": 0.292044997215271, + "loss_ib": 0.006911346688866615, + "step": 2853 + }, + { + "ce_ib": 3.6127045154571533, + "ce_orig": 0.7912606596946716, + "epoch": 0.820475950823208, + "kl_loss": 0.19544699788093567, + "loss_ib": 0.005567174404859543, + "step": 2853 + }, + { + "ce_ib": 1.9215842485427856, + "ce_orig": 0.6142799854278564, + "epoch": 0.820475950823208, + "kl_loss": 0.1280638873577118, + "loss_ib": 0.0032022232189774513, + "step": 2853 + }, + { + "ce_ib": 5.822604656219482, + "ce_orig": 1.246188998222351, + "epoch": 0.820475950823208, + "kl_loss": 0.22739733755588531, + "loss_ib": 0.008096577599644661, + "step": 2853 + }, + { + "ce_ib": 7.600926399230957, + "ce_orig": 1.4978835582733154, + "epoch": 0.8207635344021856, + "kl_loss": 0.1690545678138733, + "loss_ib": 0.009291471913456917, + "step": 2854 + }, + { + "ce_ib": 3.4800753593444824, + "ce_orig": 0.47231921553611755, + "epoch": 0.8207635344021856, + "kl_loss": 0.28924280405044556, + "loss_ib": 0.006372503004968166, + "step": 2854 + }, + { + "ce_ib": 5.095150470733643, + "ce_orig": 0.8916752934455872, + "epoch": 0.8207635344021856, + "kl_loss": 0.1482037752866745, + "loss_ib": 0.006577188149094582, + "step": 2854 + }, + { + "ce_ib": 2.534146308898926, + "ce_orig": 0.4876583516597748, + "epoch": 0.8207635344021856, + "kl_loss": 0.1967693716287613, + "loss_ib": 0.004501839634031057, + "step": 2854 + }, + { + "epoch": 0.8210511179811633, + "grad_norm": 0.13573400676250458, + "learning_rate": 8.530041898510663e-06, + "loss": 0.8556, + "step": 2855 + }, + { + "ce_ib": 4.143399238586426, + "ce_orig": 0.7905087471008301, + "epoch": 0.8210511179811633, + "kl_loss": 0.1848156750202179, + "loss_ib": 0.005991555750370026, + "step": 2855 + }, + { + "ce_ib": 3.937817335128784, + "ce_orig": 1.0232199430465698, + "epoch": 0.8210511179811633, + "kl_loss": 0.1607877016067505, + "loss_ib": 0.005545694846659899, + "step": 2855 + }, + { + "ce_ib": 5.00846004486084, + "ce_orig": 1.069165587425232, + "epoch": 0.8210511179811633, + "kl_loss": 0.1926964819431305, + "loss_ib": 0.006935424637049437, + "step": 2855 + }, + { + "ce_ib": 4.038358688354492, + "ce_orig": 0.8369476795196533, + "epoch": 0.8210511179811633, + "kl_loss": 0.28232860565185547, + "loss_ib": 0.006861644797027111, + "step": 2855 + }, + { + "ce_ib": 4.253125190734863, + "ce_orig": 0.8929765820503235, + "epoch": 0.8213387015601409, + "kl_loss": 0.23910315334796906, + "loss_ib": 0.0066441562958061695, + "step": 2856 + }, + { + "ce_ib": 4.615047454833984, + "ce_orig": 1.077056646347046, + "epoch": 0.8213387015601409, + "kl_loss": 0.12749764323234558, + "loss_ib": 0.0058900234289467335, + "step": 2856 + }, + { + "ce_ib": 3.595489263534546, + "ce_orig": 0.7449144124984741, + "epoch": 0.8213387015601409, + "kl_loss": 0.21712982654571533, + "loss_ib": 0.005766787566244602, + "step": 2856 + }, + { + "ce_ib": 5.466747760772705, + "ce_orig": 1.5264551639556885, + "epoch": 0.8213387015601409, + "kl_loss": 0.14280110597610474, + "loss_ib": 0.006894758902490139, + "step": 2856 + }, + { + "ce_ib": 3.781481981277466, + "ce_orig": 0.8257534503936768, + "epoch": 0.8216262851391185, + "kl_loss": 0.18269553780555725, + "loss_ib": 0.005608437117189169, + "step": 2857 + }, + { + "ce_ib": 2.959507465362549, + "ce_orig": 0.3994794189929962, + "epoch": 0.8216262851391185, + "kl_loss": 0.15256273746490479, + "loss_ib": 0.0044851345010101795, + "step": 2857 + }, + { + "ce_ib": 4.63261604309082, + "ce_orig": 0.8416280150413513, + "epoch": 0.8216262851391185, + "kl_loss": 0.16357600688934326, + "loss_ib": 0.0062683760188519955, + "step": 2857 + }, + { + "ce_ib": 2.701120376586914, + "ce_orig": 0.8305757641792297, + "epoch": 0.8216262851391185, + "kl_loss": 0.14445768296718597, + "loss_ib": 0.004145697224885225, + "step": 2857 + }, + { + "ce_ib": 3.014299154281616, + "ce_orig": 0.4247124195098877, + "epoch": 0.8219138687180962, + "kl_loss": 0.16191065311431885, + "loss_ib": 0.004633405711501837, + "step": 2858 + }, + { + "ce_ib": 6.16417932510376, + "ce_orig": 1.4036452770233154, + "epoch": 0.8219138687180962, + "kl_loss": 0.2612762451171875, + "loss_ib": 0.00877694133669138, + "step": 2858 + }, + { + "ce_ib": 3.6000258922576904, + "ce_orig": 0.752758800983429, + "epoch": 0.8219138687180962, + "kl_loss": 0.17405880987644196, + "loss_ib": 0.005340614356100559, + "step": 2858 + }, + { + "ce_ib": 3.017117738723755, + "ce_orig": 0.7054120898246765, + "epoch": 0.8219138687180962, + "kl_loss": 0.15043801069259644, + "loss_ib": 0.004521497525274754, + "step": 2858 + }, + { + "ce_ib": 2.423567533493042, + "ce_orig": 0.5206600427627563, + "epoch": 0.8222014522970739, + "kl_loss": 0.15910860896110535, + "loss_ib": 0.004014653619378805, + "step": 2859 + }, + { + "ce_ib": 6.463271617889404, + "ce_orig": 1.0760353803634644, + "epoch": 0.8222014522970739, + "kl_loss": 0.25092631578445435, + "loss_ib": 0.008972534909844398, + "step": 2859 + }, + { + "ce_ib": 5.665767669677734, + "ce_orig": 0.9514893293380737, + "epoch": 0.8222014522970739, + "kl_loss": 0.19432765245437622, + "loss_ib": 0.007609044201672077, + "step": 2859 + }, + { + "ce_ib": 5.506107807159424, + "ce_orig": 0.567813515663147, + "epoch": 0.8222014522970739, + "kl_loss": 0.20168712735176086, + "loss_ib": 0.007522978819906712, + "step": 2859 + }, + { + "epoch": 0.8224890358760515, + "grad_norm": 0.143329456448555, + "learning_rate": 8.524541386398318e-06, + "loss": 0.833, + "step": 2860 + }, + { + "ce_ib": 4.0668134689331055, + "ce_orig": 0.8874337673187256, + "epoch": 0.8224890358760515, + "kl_loss": 0.21045368909835815, + "loss_ib": 0.006171350367367268, + "step": 2860 + }, + { + "ce_ib": 7.2811503410339355, + "ce_orig": 1.1967501640319824, + "epoch": 0.8224890358760515, + "kl_loss": 0.21767723560333252, + "loss_ib": 0.009457922540605068, + "step": 2860 + }, + { + "ce_ib": 4.020680904388428, + "ce_orig": 0.824561595916748, + "epoch": 0.8224890358760515, + "kl_loss": 0.14666612446308136, + "loss_ib": 0.005487342365086079, + "step": 2860 + }, + { + "ce_ib": 3.1904449462890625, + "ce_orig": 0.8340655565261841, + "epoch": 0.8224890358760515, + "kl_loss": 0.20053663849830627, + "loss_ib": 0.00519581139087677, + "step": 2860 + }, + { + "ce_ib": 4.520137310028076, + "ce_orig": 0.8902618288993835, + "epoch": 0.8227766194550291, + "kl_loss": 0.18103903532028198, + "loss_ib": 0.006330527365207672, + "step": 2861 + }, + { + "ce_ib": 4.586082935333252, + "ce_orig": 1.0623624324798584, + "epoch": 0.8227766194550291, + "kl_loss": 0.19532205164432526, + "loss_ib": 0.006539303343743086, + "step": 2861 + }, + { + "ce_ib": 2.6956794261932373, + "ce_orig": 0.6123293042182922, + "epoch": 0.8227766194550291, + "kl_loss": 0.16199655830860138, + "loss_ib": 0.004315644968301058, + "step": 2861 + }, + { + "ce_ib": 3.46711802482605, + "ce_orig": 0.8381121754646301, + "epoch": 0.8227766194550291, + "kl_loss": 0.18239985406398773, + "loss_ib": 0.005291116889566183, + "step": 2861 + }, + { + "ce_ib": 6.929968357086182, + "ce_orig": 1.1881275177001953, + "epoch": 0.8230642030340067, + "kl_loss": 0.24714432656764984, + "loss_ib": 0.00940141174942255, + "step": 2862 + }, + { + "ce_ib": 2.4272284507751465, + "ce_orig": 0.5440158247947693, + "epoch": 0.8230642030340067, + "kl_loss": 0.19503292441368103, + "loss_ib": 0.004377557430416346, + "step": 2862 + }, + { + "ce_ib": 4.169910430908203, + "ce_orig": 1.036006212234497, + "epoch": 0.8230642030340067, + "kl_loss": 0.1740855872631073, + "loss_ib": 0.005910765845328569, + "step": 2862 + }, + { + "ce_ib": 6.921990871429443, + "ce_orig": 1.2436848878860474, + "epoch": 0.8230642030340067, + "kl_loss": 0.19021737575531006, + "loss_ib": 0.008824164979159832, + "step": 2862 + }, + { + "ce_ib": 3.643216133117676, + "ce_orig": 0.6118395328521729, + "epoch": 0.8233517866129844, + "kl_loss": 0.20965096354484558, + "loss_ib": 0.005739725660532713, + "step": 2863 + }, + { + "ce_ib": 2.556272506713867, + "ce_orig": 0.3845931589603424, + "epoch": 0.8233517866129844, + "kl_loss": 0.18874534964561462, + "loss_ib": 0.004443726036697626, + "step": 2863 + }, + { + "ce_ib": 2.939204692840576, + "ce_orig": 0.6024640202522278, + "epoch": 0.8233517866129844, + "kl_loss": 0.2381192296743393, + "loss_ib": 0.005320396739989519, + "step": 2863 + }, + { + "ce_ib": 4.3409624099731445, + "ce_orig": 0.4958752989768982, + "epoch": 0.8233517866129844, + "kl_loss": 0.4438689649105072, + "loss_ib": 0.00877965148538351, + "step": 2863 + }, + { + "ce_ib": 5.137648582458496, + "ce_orig": 1.2664213180541992, + "epoch": 0.823639370191962, + "kl_loss": 0.12713196873664856, + "loss_ib": 0.006408968009054661, + "step": 2864 + }, + { + "ce_ib": 1.8961910009384155, + "ce_orig": 0.4744472801685333, + "epoch": 0.823639370191962, + "kl_loss": 0.35975757241249084, + "loss_ib": 0.005493766628205776, + "step": 2864 + }, + { + "ce_ib": 1.904695987701416, + "ce_orig": 0.4115987718105316, + "epoch": 0.823639370191962, + "kl_loss": 0.13161231577396393, + "loss_ib": 0.0032208191696554422, + "step": 2864 + }, + { + "ce_ib": 3.6558029651641846, + "ce_orig": 0.7234848141670227, + "epoch": 0.823639370191962, + "kl_loss": 0.20596303045749664, + "loss_ib": 0.005715433042496443, + "step": 2864 + }, + { + "epoch": 0.8239269537709397, + "grad_norm": 0.17390212416648865, + "learning_rate": 8.519032382847705e-06, + "loss": 0.8514, + "step": 2865 + }, + { + "ce_ib": 2.7139947414398193, + "ce_orig": 0.4796334505081177, + "epoch": 0.8239269537709397, + "kl_loss": 0.14796119928359985, + "loss_ib": 0.004193606786429882, + "step": 2865 + }, + { + "ce_ib": 4.9579291343688965, + "ce_orig": 1.0982221364974976, + "epoch": 0.8239269537709397, + "kl_loss": 0.19019246101379395, + "loss_ib": 0.0068598538637161255, + "step": 2865 + }, + { + "ce_ib": 6.676804542541504, + "ce_orig": 1.546370267868042, + "epoch": 0.8239269537709397, + "kl_loss": 0.25229397416114807, + "loss_ib": 0.009199744090437889, + "step": 2865 + }, + { + "ce_ib": 4.024975776672363, + "ce_orig": 0.9780876636505127, + "epoch": 0.8239269537709397, + "kl_loss": 0.14702078700065613, + "loss_ib": 0.005495183169841766, + "step": 2865 + }, + { + "ce_ib": 4.689302444458008, + "ce_orig": 1.0279675722122192, + "epoch": 0.8242145373499173, + "kl_loss": 0.14508503675460815, + "loss_ib": 0.006140152458101511, + "step": 2866 + }, + { + "ce_ib": 2.490905523300171, + "ce_orig": 0.5002899765968323, + "epoch": 0.8242145373499173, + "kl_loss": 0.1693512201309204, + "loss_ib": 0.004184417426586151, + "step": 2866 + }, + { + "ce_ib": 2.524169921875, + "ce_orig": 0.7661482691764832, + "epoch": 0.8242145373499173, + "kl_loss": 0.10011142492294312, + "loss_ib": 0.003525284118950367, + "step": 2866 + }, + { + "ce_ib": 3.2156848907470703, + "ce_orig": 0.6873882412910461, + "epoch": 0.8242145373499173, + "kl_loss": 0.24119526147842407, + "loss_ib": 0.005627637263387442, + "step": 2866 + }, + { + "ce_ib": 6.647129058837891, + "ce_orig": 1.530397653579712, + "epoch": 0.824502120928895, + "kl_loss": 0.16500911116600037, + "loss_ib": 0.00829721987247467, + "step": 2867 + }, + { + "ce_ib": 3.649136543273926, + "ce_orig": 0.9370477795600891, + "epoch": 0.824502120928895, + "kl_loss": 0.12773850560188293, + "loss_ib": 0.004926521796733141, + "step": 2867 + }, + { + "ce_ib": 4.161107063293457, + "ce_orig": 0.7579275965690613, + "epoch": 0.824502120928895, + "kl_loss": 0.15169017016887665, + "loss_ib": 0.005678008776158094, + "step": 2867 + }, + { + "ce_ib": 1.709155559539795, + "ce_orig": 0.6099335551261902, + "epoch": 0.824502120928895, + "kl_loss": 0.13694071769714355, + "loss_ib": 0.0030785624403506517, + "step": 2867 + }, + { + "ce_ib": 3.62985897064209, + "ce_orig": 0.7556082606315613, + "epoch": 0.8247897045078726, + "kl_loss": 0.14075452089309692, + "loss_ib": 0.005037404131144285, + "step": 2868 + }, + { + "ce_ib": 2.7115583419799805, + "ce_orig": 0.28906700015068054, + "epoch": 0.8247897045078726, + "kl_loss": 0.22270025312900543, + "loss_ib": 0.004938560537993908, + "step": 2868 + }, + { + "ce_ib": 4.786983966827393, + "ce_orig": 0.8722836971282959, + "epoch": 0.8247897045078726, + "kl_loss": 0.17636477947235107, + "loss_ib": 0.006550631485879421, + "step": 2868 + }, + { + "ce_ib": 4.516746520996094, + "ce_orig": 0.9172162413597107, + "epoch": 0.8247897045078726, + "kl_loss": 0.25016945600509644, + "loss_ib": 0.007018440868705511, + "step": 2868 + }, + { + "ce_ib": 3.136047601699829, + "ce_orig": 0.550767719745636, + "epoch": 0.8250772880868502, + "kl_loss": 0.17897364497184753, + "loss_ib": 0.0049257841892540455, + "step": 2869 + }, + { + "ce_ib": 3.6300761699676514, + "ce_orig": 0.8521891832351685, + "epoch": 0.8250772880868502, + "kl_loss": 0.17535553872585297, + "loss_ib": 0.005383631680160761, + "step": 2869 + }, + { + "ce_ib": 3.7452054023742676, + "ce_orig": 0.7275384068489075, + "epoch": 0.8250772880868502, + "kl_loss": 0.1624750792980194, + "loss_ib": 0.0053699566051363945, + "step": 2869 + }, + { + "ce_ib": 2.9989326000213623, + "ce_orig": 0.4710121154785156, + "epoch": 0.8250772880868502, + "kl_loss": 0.21253973245620728, + "loss_ib": 0.005124329589307308, + "step": 2869 + }, + { + "epoch": 0.8253648716658278, + "grad_norm": 0.14280347526073456, + "learning_rate": 8.513514901131299e-06, + "loss": 0.8845, + "step": 2870 + }, + { + "ce_ib": 5.743932723999023, + "ce_orig": 1.4675697088241577, + "epoch": 0.8253648716658278, + "kl_loss": 0.13766717910766602, + "loss_ib": 0.007120604161173105, + "step": 2870 + }, + { + "ce_ib": 3.0853612422943115, + "ce_orig": 0.5946407318115234, + "epoch": 0.8253648716658278, + "kl_loss": 0.1597779542207718, + "loss_ib": 0.004683141130954027, + "step": 2870 + }, + { + "ce_ib": 2.41890549659729, + "ce_orig": 0.6495623588562012, + "epoch": 0.8253648716658278, + "kl_loss": 0.10604320466518402, + "loss_ib": 0.003479337552562356, + "step": 2870 + }, + { + "ce_ib": 3.901663303375244, + "ce_orig": 0.8993545174598694, + "epoch": 0.8253648716658278, + "kl_loss": 0.20225822925567627, + "loss_ib": 0.005924245808273554, + "step": 2870 + }, + { + "ce_ib": 4.631702423095703, + "ce_orig": 0.9225010871887207, + "epoch": 0.8256524552448056, + "kl_loss": 0.1727062612771988, + "loss_ib": 0.006358765065670013, + "step": 2871 + }, + { + "ce_ib": 1.5815256834030151, + "ce_orig": 0.2096952199935913, + "epoch": 0.8256524552448056, + "kl_loss": 0.30791378021240234, + "loss_ib": 0.004660663660615683, + "step": 2871 + }, + { + "ce_ib": 3.1319077014923096, + "ce_orig": 0.5929129123687744, + "epoch": 0.8256524552448056, + "kl_loss": 0.08406004309654236, + "loss_ib": 0.003972508013248444, + "step": 2871 + }, + { + "ce_ib": 5.878556728363037, + "ce_orig": 0.9196772575378418, + "epoch": 0.8256524552448056, + "kl_loss": 0.25829339027404785, + "loss_ib": 0.008461490273475647, + "step": 2871 + }, + { + "ce_ib": 2.8657312393188477, + "ce_orig": 0.5281575918197632, + "epoch": 0.8259400388237832, + "kl_loss": 0.21896713972091675, + "loss_ib": 0.005055402405560017, + "step": 2872 + }, + { + "ce_ib": 7.692933559417725, + "ce_orig": 1.5486690998077393, + "epoch": 0.8259400388237832, + "kl_loss": 0.17179694771766663, + "loss_ib": 0.009410902857780457, + "step": 2872 + }, + { + "ce_ib": 4.637419700622559, + "ce_orig": 0.9842481017112732, + "epoch": 0.8259400388237832, + "kl_loss": 0.13949233293533325, + "loss_ib": 0.006032342556864023, + "step": 2872 + }, + { + "ce_ib": 3.8346705436706543, + "ce_orig": 0.6260755062103271, + "epoch": 0.8259400388237832, + "kl_loss": 0.16124111413955688, + "loss_ib": 0.0054470812901854515, + "step": 2872 + }, + { + "ce_ib": 6.825984001159668, + "ce_orig": 1.6129895448684692, + "epoch": 0.8262276224027608, + "kl_loss": 0.217058002948761, + "loss_ib": 0.008996563963592052, + "step": 2873 + }, + { + "ce_ib": 3.7715983390808105, + "ce_orig": 0.5599387288093567, + "epoch": 0.8262276224027608, + "kl_loss": 0.259441077709198, + "loss_ib": 0.006366008892655373, + "step": 2873 + }, + { + "ce_ib": 2.3925578594207764, + "ce_orig": 0.5393683910369873, + "epoch": 0.8262276224027608, + "kl_loss": 0.2209458351135254, + "loss_ib": 0.004602015949785709, + "step": 2873 + }, + { + "ce_ib": 6.666255950927734, + "ce_orig": 1.50017511844635, + "epoch": 0.8262276224027608, + "kl_loss": 0.19450441002845764, + "loss_ib": 0.008611300028860569, + "step": 2873 + }, + { + "ce_ib": 5.365810394287109, + "ce_orig": 0.8668674230575562, + "epoch": 0.8265152059817384, + "kl_loss": 0.1635822057723999, + "loss_ib": 0.007001632358878851, + "step": 2874 + }, + { + "ce_ib": 4.231411933898926, + "ce_orig": 0.4599015712738037, + "epoch": 0.8265152059817384, + "kl_loss": 0.2030838131904602, + "loss_ib": 0.006262249778956175, + "step": 2874 + }, + { + "ce_ib": 2.692293405532837, + "ce_orig": 0.6010900735855103, + "epoch": 0.8265152059817384, + "kl_loss": 0.25684595108032227, + "loss_ib": 0.005260752979665995, + "step": 2874 + }, + { + "ce_ib": 4.020428657531738, + "ce_orig": 0.6259286999702454, + "epoch": 0.8265152059817384, + "kl_loss": 0.17078344523906708, + "loss_ib": 0.005728262942284346, + "step": 2874 + }, + { + "epoch": 0.8268027895607161, + "grad_norm": 0.1282636523246765, + "learning_rate": 8.507988954541992e-06, + "loss": 0.8341, + "step": 2875 + }, + { + "ce_ib": 8.74016284942627, + "ce_orig": 1.698149561882019, + "epoch": 0.8268027895607161, + "kl_loss": 0.24367672204971313, + "loss_ib": 0.011176928877830505, + "step": 2875 + }, + { + "ce_ib": 3.0760068893432617, + "ce_orig": 0.6694203019142151, + "epoch": 0.8268027895607161, + "kl_loss": 0.1647557020187378, + "loss_ib": 0.00472356379032135, + "step": 2875 + }, + { + "ce_ib": 3.856241464614868, + "ce_orig": 1.0132771730422974, + "epoch": 0.8268027895607161, + "kl_loss": 0.12855999171733856, + "loss_ib": 0.005141841247677803, + "step": 2875 + }, + { + "ce_ib": 3.2805328369140625, + "ce_orig": 0.44985726475715637, + "epoch": 0.8268027895607161, + "kl_loss": 0.2788565754890442, + "loss_ib": 0.006069098599255085, + "step": 2875 + }, + { + "ce_ib": 2.87795352935791, + "ce_orig": 0.5038062334060669, + "epoch": 0.8270903731396937, + "kl_loss": 0.21774187684059143, + "loss_ib": 0.005055372603237629, + "step": 2876 + }, + { + "ce_ib": 3.5416343212127686, + "ce_orig": 0.6795667409896851, + "epoch": 0.8270903731396937, + "kl_loss": 0.32440316677093506, + "loss_ib": 0.006785665638744831, + "step": 2876 + }, + { + "ce_ib": 5.496023654937744, + "ce_orig": 1.0768823623657227, + "epoch": 0.8270903731396937, + "kl_loss": 0.22173427045345306, + "loss_ib": 0.007713366765528917, + "step": 2876 + }, + { + "ce_ib": 6.698769569396973, + "ce_orig": 1.0211304426193237, + "epoch": 0.8270903731396937, + "kl_loss": 0.19053158164024353, + "loss_ib": 0.008604085072875023, + "step": 2876 + }, + { + "ce_ib": 1.986926555633545, + "ce_orig": 0.5106616020202637, + "epoch": 0.8273779567186713, + "kl_loss": 0.1424979567527771, + "loss_ib": 0.0034119063057005405, + "step": 2877 + }, + { + "ce_ib": 5.254164695739746, + "ce_orig": 1.2080793380737305, + "epoch": 0.8273779567186713, + "kl_loss": 0.15144726634025574, + "loss_ib": 0.006768637802451849, + "step": 2877 + }, + { + "ce_ib": 2.3577568531036377, + "ce_orig": 0.4192110300064087, + "epoch": 0.8273779567186713, + "kl_loss": 0.09593936800956726, + "loss_ib": 0.0033171502873301506, + "step": 2877 + }, + { + "ce_ib": 4.645291805267334, + "ce_orig": 0.43245187401771545, + "epoch": 0.8273779567186713, + "kl_loss": 0.5649994611740112, + "loss_ib": 0.01029528584331274, + "step": 2877 + }, + { + "ce_ib": 2.371523141860962, + "ce_orig": 0.37932202219963074, + "epoch": 0.827665540297649, + "kl_loss": 0.20313873887062073, + "loss_ib": 0.0044029103592038155, + "step": 2878 + }, + { + "ce_ib": 3.0895700454711914, + "ce_orig": 0.8610268831253052, + "epoch": 0.827665540297649, + "kl_loss": 0.10560034215450287, + "loss_ib": 0.0041455733589828014, + "step": 2878 + }, + { + "ce_ib": 2.53816294670105, + "ce_orig": 0.5499843955039978, + "epoch": 0.827665540297649, + "kl_loss": 0.19593513011932373, + "loss_ib": 0.004497514106333256, + "step": 2878 + }, + { + "ce_ib": 4.075026035308838, + "ce_orig": 0.5414060354232788, + "epoch": 0.827665540297649, + "kl_loss": 0.30075669288635254, + "loss_ib": 0.007082593161612749, + "step": 2878 + }, + { + "ce_ib": 1.509731411933899, + "ce_orig": 0.5128348469734192, + "epoch": 0.8279531238766267, + "kl_loss": 0.1364012360572815, + "loss_ib": 0.002873743651434779, + "step": 2879 + }, + { + "ce_ib": 3.392395257949829, + "ce_orig": 0.6388053297996521, + "epoch": 0.8279531238766267, + "kl_loss": 0.12642276287078857, + "loss_ib": 0.004656623117625713, + "step": 2879 + }, + { + "ce_ib": 5.222067832946777, + "ce_orig": 0.768227756023407, + "epoch": 0.8279531238766267, + "kl_loss": 0.23935635387897491, + "loss_ib": 0.007615630980581045, + "step": 2879 + }, + { + "ce_ib": 6.693854331970215, + "ce_orig": 1.2126444578170776, + "epoch": 0.8279531238766267, + "kl_loss": 0.1396772563457489, + "loss_ib": 0.008090627379715443, + "step": 2879 + }, + { + "epoch": 0.8282407074556043, + "grad_norm": 0.14994961023330688, + "learning_rate": 8.502454556393071e-06, + "loss": 0.8254, + "step": 2880 + }, + { + "ce_ib": 2.6127724647521973, + "ce_orig": 0.6051091551780701, + "epoch": 0.8282407074556043, + "kl_loss": 0.14914774894714355, + "loss_ib": 0.004104250110685825, + "step": 2880 + }, + { + "ce_ib": 7.166625022888184, + "ce_orig": 1.6240248680114746, + "epoch": 0.8282407074556043, + "kl_loss": 0.19623595476150513, + "loss_ib": 0.009128984995186329, + "step": 2880 + }, + { + "ce_ib": 4.487271785736084, + "ce_orig": 1.261370062828064, + "epoch": 0.8282407074556043, + "kl_loss": 0.25172144174575806, + "loss_ib": 0.00700448639690876, + "step": 2880 + }, + { + "ce_ib": 7.288825035095215, + "ce_orig": 1.1731101274490356, + "epoch": 0.8282407074556043, + "kl_loss": 0.11732153594493866, + "loss_ib": 0.008462040685117245, + "step": 2880 + }, + { + "ce_ib": 4.130617618560791, + "ce_orig": 1.030800223350525, + "epoch": 0.8285282910345819, + "kl_loss": 0.17943039536476135, + "loss_ib": 0.005924921482801437, + "step": 2881 + }, + { + "ce_ib": 4.978092670440674, + "ce_orig": 1.023690938949585, + "epoch": 0.8285282910345819, + "kl_loss": 0.296636164188385, + "loss_ib": 0.00794445350766182, + "step": 2881 + }, + { + "ce_ib": 2.826033353805542, + "ce_orig": 0.5625208020210266, + "epoch": 0.8285282910345819, + "kl_loss": 0.13340122997760773, + "loss_ib": 0.004160046111792326, + "step": 2881 + }, + { + "ce_ib": 4.092287540435791, + "ce_orig": 0.9232235550880432, + "epoch": 0.8285282910345819, + "kl_loss": 0.14677828550338745, + "loss_ib": 0.005560070276260376, + "step": 2881 + }, + { + "ce_ib": 4.707136154174805, + "ce_orig": 1.1053889989852905, + "epoch": 0.8288158746135595, + "kl_loss": 0.16771593689918518, + "loss_ib": 0.00638429494574666, + "step": 2882 + }, + { + "ce_ib": 2.9224894046783447, + "ce_orig": 0.7328656315803528, + "epoch": 0.8288158746135595, + "kl_loss": 0.1964307427406311, + "loss_ib": 0.004886796697974205, + "step": 2882 + }, + { + "ce_ib": 4.01039981842041, + "ce_orig": 0.7895780205726624, + "epoch": 0.8288158746135595, + "kl_loss": 0.16833549737930298, + "loss_ib": 0.005693755112588406, + "step": 2882 + }, + { + "ce_ib": 3.1918344497680664, + "ce_orig": 0.6506085991859436, + "epoch": 0.8288158746135595, + "kl_loss": 0.21863773465156555, + "loss_ib": 0.005378211848437786, + "step": 2882 + }, + { + "ce_ib": 4.261894702911377, + "ce_orig": 1.1424298286437988, + "epoch": 0.8291034581925372, + "kl_loss": 0.17619222402572632, + "loss_ib": 0.006023816764354706, + "step": 2883 + }, + { + "ce_ib": 4.474537372589111, + "ce_orig": 0.9098606109619141, + "epoch": 0.8291034581925372, + "kl_loss": 0.14229857921600342, + "loss_ib": 0.005897522903978825, + "step": 2883 + }, + { + "ce_ib": 1.215714693069458, + "ce_orig": 0.2662150263786316, + "epoch": 0.8291034581925372, + "kl_loss": 0.3516296148300171, + "loss_ib": 0.0047320108860731125, + "step": 2883 + }, + { + "ce_ib": 4.155750274658203, + "ce_orig": 0.8843290209770203, + "epoch": 0.8291034581925372, + "kl_loss": 0.23709248006343842, + "loss_ib": 0.006526675075292587, + "step": 2883 + }, + { + "ce_ib": 3.0701165199279785, + "ce_orig": 0.6476393938064575, + "epoch": 0.8293910417715148, + "kl_loss": 0.19681119918823242, + "loss_ib": 0.00503822835162282, + "step": 2884 + }, + { + "ce_ib": 3.5683486461639404, + "ce_orig": 0.8242078423500061, + "epoch": 0.8293910417715148, + "kl_loss": 0.22961559891700745, + "loss_ib": 0.005864504259079695, + "step": 2884 + }, + { + "ce_ib": 4.132795810699463, + "ce_orig": 0.7210560441017151, + "epoch": 0.8293910417715148, + "kl_loss": 0.23567797243595123, + "loss_ib": 0.006489575374871492, + "step": 2884 + }, + { + "ce_ib": 3.961473226547241, + "ce_orig": 1.0444350242614746, + "epoch": 0.8293910417715148, + "kl_loss": 0.21871310472488403, + "loss_ib": 0.0061486042104661465, + "step": 2884 + }, + { + "epoch": 0.8296786253504925, + "grad_norm": 0.13329626619815826, + "learning_rate": 8.496911720018188e-06, + "loss": 0.9209, + "step": 2885 + }, + { + "ce_ib": 4.126862049102783, + "ce_orig": 0.8005906343460083, + "epoch": 0.8296786253504925, + "kl_loss": 0.25327903032302856, + "loss_ib": 0.006659651640802622, + "step": 2885 + }, + { + "ce_ib": 3.6192307472229004, + "ce_orig": 0.8209283947944641, + "epoch": 0.8296786253504925, + "kl_loss": 0.21794436872005463, + "loss_ib": 0.005798674188554287, + "step": 2885 + }, + { + "ce_ib": 4.849692344665527, + "ce_orig": 0.9806627631187439, + "epoch": 0.8296786253504925, + "kl_loss": 0.2706400752067566, + "loss_ib": 0.007556092459708452, + "step": 2885 + }, + { + "ce_ib": 3.1249520778656006, + "ce_orig": 0.6359015107154846, + "epoch": 0.8296786253504925, + "kl_loss": 0.21268625557422638, + "loss_ib": 0.005251814611256123, + "step": 2885 + }, + { + "ce_ib": 5.761795520782471, + "ce_orig": 0.5992882251739502, + "epoch": 0.8299662089294702, + "kl_loss": 0.18527448177337646, + "loss_ib": 0.00761454040184617, + "step": 2886 + }, + { + "ce_ib": 5.2286696434021, + "ce_orig": 1.0217677354812622, + "epoch": 0.8299662089294702, + "kl_loss": 0.21787375211715698, + "loss_ib": 0.007407406810671091, + "step": 2886 + }, + { + "ce_ib": 4.5149006843566895, + "ce_orig": 1.0029388666152954, + "epoch": 0.8299662089294702, + "kl_loss": 0.24407942593097687, + "loss_ib": 0.0069556948728859425, + "step": 2886 + }, + { + "ce_ib": 5.617978096008301, + "ce_orig": 1.1238768100738525, + "epoch": 0.8299662089294702, + "kl_loss": 0.1921212375164032, + "loss_ib": 0.007539189886301756, + "step": 2886 + }, + { + "ce_ib": 4.6024250984191895, + "ce_orig": 0.8452867865562439, + "epoch": 0.8302537925084478, + "kl_loss": 0.288438618183136, + "loss_ib": 0.0074868109077215195, + "step": 2887 + }, + { + "ce_ib": 4.649557113647461, + "ce_orig": 1.1539958715438843, + "epoch": 0.8302537925084478, + "kl_loss": 0.17076998949050903, + "loss_ib": 0.006357257254421711, + "step": 2887 + }, + { + "ce_ib": 3.8930463790893555, + "ce_orig": 0.8908532857894897, + "epoch": 0.8302537925084478, + "kl_loss": 0.2528723478317261, + "loss_ib": 0.006421769503504038, + "step": 2887 + }, + { + "ce_ib": 3.8733344078063965, + "ce_orig": 0.8784165978431702, + "epoch": 0.8302537925084478, + "kl_loss": 0.1962842494249344, + "loss_ib": 0.005836176685988903, + "step": 2887 + }, + { + "ce_ib": 4.4736809730529785, + "ce_orig": 0.9343304634094238, + "epoch": 0.8305413760874254, + "kl_loss": 0.1179821565747261, + "loss_ib": 0.005653502885252237, + "step": 2888 + }, + { + "ce_ib": 3.519237995147705, + "ce_orig": 0.9355604648590088, + "epoch": 0.8305413760874254, + "kl_loss": 0.14492401480674744, + "loss_ib": 0.004968477878719568, + "step": 2888 + }, + { + "ce_ib": 2.9303903579711914, + "ce_orig": 0.5732430815696716, + "epoch": 0.8305413760874254, + "kl_loss": 0.17453515529632568, + "loss_ib": 0.004675741773098707, + "step": 2888 + }, + { + "ce_ib": 4.675782680511475, + "ce_orig": 1.0136622190475464, + "epoch": 0.8305413760874254, + "kl_loss": 0.10914991050958633, + "loss_ib": 0.005767281632870436, + "step": 2888 + }, + { + "ce_ib": 4.3638811111450195, + "ce_orig": 0.8265060186386108, + "epoch": 0.830828959666403, + "kl_loss": 0.16551318764686584, + "loss_ib": 0.006019013002514839, + "step": 2889 + }, + { + "ce_ib": 4.828332424163818, + "ce_orig": 0.49547696113586426, + "epoch": 0.830828959666403, + "kl_loss": 0.20689180493354797, + "loss_ib": 0.006897250190377235, + "step": 2889 + }, + { + "ce_ib": 3.3260722160339355, + "ce_orig": 0.6456339359283447, + "epoch": 0.830828959666403, + "kl_loss": 0.23485828936100006, + "loss_ib": 0.005674655083566904, + "step": 2889 + }, + { + "ce_ib": 2.9236183166503906, + "ce_orig": 0.4193689823150635, + "epoch": 0.830828959666403, + "kl_loss": 0.23497849702835083, + "loss_ib": 0.005273402668535709, + "step": 2889 + }, + { + "epoch": 0.8311165432453806, + "grad_norm": 0.11836716532707214, + "learning_rate": 8.49136045877132e-06, + "loss": 0.9, + "step": 2890 + }, + { + "ce_ib": 2.087693452835083, + "ce_orig": 0.4276258647441864, + "epoch": 0.8311165432453806, + "kl_loss": 0.14191588759422302, + "loss_ib": 0.003506852313876152, + "step": 2890 + }, + { + "ce_ib": 2.9643912315368652, + "ce_orig": 0.4000520408153534, + "epoch": 0.8311165432453806, + "kl_loss": 0.26583606004714966, + "loss_ib": 0.005622751545161009, + "step": 2890 + }, + { + "ce_ib": 5.387731075286865, + "ce_orig": 0.9836530089378357, + "epoch": 0.8311165432453806, + "kl_loss": 0.241763174533844, + "loss_ib": 0.007805362809449434, + "step": 2890 + }, + { + "ce_ib": 3.007267475128174, + "ce_orig": 0.4516206979751587, + "epoch": 0.8311165432453806, + "kl_loss": 0.3592800498008728, + "loss_ib": 0.006600067485123873, + "step": 2890 + }, + { + "ce_ib": 4.3520121574401855, + "ce_orig": 1.1651091575622559, + "epoch": 0.8314041268243584, + "kl_loss": 0.17165684700012207, + "loss_ib": 0.006068580783903599, + "step": 2891 + }, + { + "ce_ib": 2.8225855827331543, + "ce_orig": 0.6308429837226868, + "epoch": 0.8314041268243584, + "kl_loss": 0.15242592990398407, + "loss_ib": 0.004346845205873251, + "step": 2891 + }, + { + "ce_ib": 2.0741705894470215, + "ce_orig": 0.48199349641799927, + "epoch": 0.8314041268243584, + "kl_loss": 0.2081989198923111, + "loss_ib": 0.004156159702688456, + "step": 2891 + }, + { + "ce_ib": 4.7561140060424805, + "ce_orig": 1.053267240524292, + "epoch": 0.8314041268243584, + "kl_loss": 0.14036613702774048, + "loss_ib": 0.006159774959087372, + "step": 2891 + }, + { + "ce_ib": 2.185760259628296, + "ce_orig": 0.2743958830833435, + "epoch": 0.831691710403336, + "kl_loss": 0.16071251034736633, + "loss_ib": 0.003792885458096862, + "step": 2892 + }, + { + "ce_ib": 2.9996659755706787, + "ce_orig": 0.5672270655632019, + "epoch": 0.831691710403336, + "kl_loss": 0.12822943925857544, + "loss_ib": 0.004281960427761078, + "step": 2892 + }, + { + "ce_ib": 7.920281410217285, + "ce_orig": 1.3763327598571777, + "epoch": 0.831691710403336, + "kl_loss": 0.23749034106731415, + "loss_ib": 0.010295184329152107, + "step": 2892 + }, + { + "ce_ib": 7.507551193237305, + "ce_orig": 1.4732897281646729, + "epoch": 0.831691710403336, + "kl_loss": 0.21093228459358215, + "loss_ib": 0.009616874158382416, + "step": 2892 + }, + { + "ce_ib": 8.900546073913574, + "ce_orig": 1.5648458003997803, + "epoch": 0.8319792939823136, + "kl_loss": 0.21868747472763062, + "loss_ib": 0.011087421327829361, + "step": 2893 + }, + { + "ce_ib": 3.0837104320526123, + "ce_orig": 0.7571828365325928, + "epoch": 0.8319792939823136, + "kl_loss": 0.18605761229991913, + "loss_ib": 0.004944286309182644, + "step": 2893 + }, + { + "ce_ib": 5.029618740081787, + "ce_orig": 1.297188401222229, + "epoch": 0.8319792939823136, + "kl_loss": 0.2751466631889343, + "loss_ib": 0.007781085092574358, + "step": 2893 + }, + { + "ce_ib": 4.449343681335449, + "ce_orig": 0.9719931483268738, + "epoch": 0.8319792939823136, + "kl_loss": 0.21002188324928284, + "loss_ib": 0.006549561861902475, + "step": 2893 + }, + { + "ce_ib": 4.407426357269287, + "ce_orig": 1.1481525897979736, + "epoch": 0.8322668775612913, + "kl_loss": 0.2196575403213501, + "loss_ib": 0.0066040013916790485, + "step": 2894 + }, + { + "ce_ib": 5.482417106628418, + "ce_orig": 1.134550929069519, + "epoch": 0.8322668775612913, + "kl_loss": 0.30899959802627563, + "loss_ib": 0.008572413586080074, + "step": 2894 + }, + { + "ce_ib": 3.2840638160705566, + "ce_orig": 0.9014392495155334, + "epoch": 0.8322668775612913, + "kl_loss": 0.1595136821269989, + "loss_ib": 0.004879200831055641, + "step": 2894 + }, + { + "ce_ib": 3.54443097114563, + "ce_orig": 0.485805481672287, + "epoch": 0.8322668775612913, + "kl_loss": 0.21616803109645844, + "loss_ib": 0.005706110969185829, + "step": 2894 + }, + { + "epoch": 0.8325544611402689, + "grad_norm": 0.13097865879535675, + "learning_rate": 8.485800786026745e-06, + "loss": 0.9232, + "step": 2895 + }, + { + "ce_ib": 2.699634313583374, + "ce_orig": 0.6938281059265137, + "epoch": 0.8325544611402689, + "kl_loss": 0.17667828500270844, + "loss_ib": 0.004466417245566845, + "step": 2895 + }, + { + "ce_ib": 7.170710563659668, + "ce_orig": 1.681235909461975, + "epoch": 0.8325544611402689, + "kl_loss": 0.24743963778018951, + "loss_ib": 0.00964510627090931, + "step": 2895 + }, + { + "ce_ib": 3.624577283859253, + "ce_orig": 0.7956358790397644, + "epoch": 0.8325544611402689, + "kl_loss": 0.18146419525146484, + "loss_ib": 0.005439219530671835, + "step": 2895 + }, + { + "ce_ib": 3.070042848587036, + "ce_orig": 0.7522200345993042, + "epoch": 0.8325544611402689, + "kl_loss": 0.2552671432495117, + "loss_ib": 0.0056227147579193115, + "step": 2895 + }, + { + "ce_ib": 5.908425807952881, + "ce_orig": 1.4578828811645508, + "epoch": 0.8328420447192465, + "kl_loss": 0.13771629333496094, + "loss_ib": 0.007285588886588812, + "step": 2896 + }, + { + "ce_ib": 3.154857635498047, + "ce_orig": 0.7026169896125793, + "epoch": 0.8328420447192465, + "kl_loss": 0.13523399829864502, + "loss_ib": 0.00450719753280282, + "step": 2896 + }, + { + "ce_ib": 4.86837100982666, + "ce_orig": 0.5375339984893799, + "epoch": 0.8328420447192465, + "kl_loss": 0.16447500884532928, + "loss_ib": 0.006513121072202921, + "step": 2896 + }, + { + "ce_ib": 7.457552433013916, + "ce_orig": 1.0827571153640747, + "epoch": 0.8328420447192465, + "kl_loss": 0.2079276144504547, + "loss_ib": 0.009536828845739365, + "step": 2896 + }, + { + "ce_ib": 4.548498630523682, + "ce_orig": 1.067577838897705, + "epoch": 0.8331296282982241, + "kl_loss": 0.2063087522983551, + "loss_ib": 0.006611586082726717, + "step": 2897 + }, + { + "ce_ib": 3.7382538318634033, + "ce_orig": 0.900969922542572, + "epoch": 0.8331296282982241, + "kl_loss": 0.19945940375328064, + "loss_ib": 0.005732847843319178, + "step": 2897 + }, + { + "ce_ib": 4.974239349365234, + "ce_orig": 1.0694955587387085, + "epoch": 0.8331296282982241, + "kl_loss": 0.16955307126045227, + "loss_ib": 0.006669770460575819, + "step": 2897 + }, + { + "ce_ib": 3.8977832794189453, + "ce_orig": 0.6915797591209412, + "epoch": 0.8331296282982241, + "kl_loss": 0.19714118540287018, + "loss_ib": 0.005869195330888033, + "step": 2897 + }, + { + "ce_ib": 1.8507922887802124, + "ce_orig": 0.509788453578949, + "epoch": 0.8334172118772019, + "kl_loss": 0.0959552675485611, + "loss_ib": 0.0028103450313210487, + "step": 2898 + }, + { + "ce_ib": 3.147413730621338, + "ce_orig": 0.4308997690677643, + "epoch": 0.8334172118772019, + "kl_loss": 0.1367012858390808, + "loss_ib": 0.0045144264586269855, + "step": 2898 + }, + { + "ce_ib": 5.746183395385742, + "ce_orig": 0.9529247283935547, + "epoch": 0.8334172118772019, + "kl_loss": 0.1923537403345108, + "loss_ib": 0.007669720333069563, + "step": 2898 + }, + { + "ce_ib": 5.821624755859375, + "ce_orig": 1.301271677017212, + "epoch": 0.8334172118772019, + "kl_loss": 0.15492677688598633, + "loss_ib": 0.007370892446488142, + "step": 2898 + }, + { + "ce_ib": 3.6843650341033936, + "ce_orig": 0.8940287232398987, + "epoch": 0.8337047954561795, + "kl_loss": 0.15676340460777283, + "loss_ib": 0.0052519990131258965, + "step": 2899 + }, + { + "ce_ib": 5.658127307891846, + "ce_orig": 0.7565308809280396, + "epoch": 0.8337047954561795, + "kl_loss": 0.1712825894355774, + "loss_ib": 0.007370953448116779, + "step": 2899 + }, + { + "ce_ib": 3.842254638671875, + "ce_orig": 0.917491614818573, + "epoch": 0.8337047954561795, + "kl_loss": 0.1777859330177307, + "loss_ib": 0.0056201135739684105, + "step": 2899 + }, + { + "ce_ib": 4.515219211578369, + "ce_orig": 0.8804351091384888, + "epoch": 0.8337047954561795, + "kl_loss": 0.2849474251270294, + "loss_ib": 0.007364693563431501, + "step": 2899 + }, + { + "epoch": 0.8339923790351571, + "grad_norm": 0.13247261941432953, + "learning_rate": 8.480232715179004e-06, + "loss": 0.8231, + "step": 2900 + }, + { + "ce_ib": 7.6090474128723145, + "ce_orig": 1.5276763439178467, + "epoch": 0.8339923790351571, + "kl_loss": 0.16883759200572968, + "loss_ib": 0.00929742306470871, + "step": 2900 + }, + { + "ce_ib": 3.5297486782073975, + "ce_orig": 0.9258477687835693, + "epoch": 0.8339923790351571, + "kl_loss": 0.1877252161502838, + "loss_ib": 0.0054070008918643, + "step": 2900 + }, + { + "ce_ib": 4.608790397644043, + "ce_orig": 0.865960419178009, + "epoch": 0.8339923790351571, + "kl_loss": 0.1497492790222168, + "loss_ib": 0.006106283515691757, + "step": 2900 + }, + { + "ce_ib": 4.958095550537109, + "ce_orig": 0.985210120677948, + "epoch": 0.8339923790351571, + "kl_loss": 0.16268588602542877, + "loss_ib": 0.006584953982383013, + "step": 2900 + }, + { + "ce_ib": 3.8500874042510986, + "ce_orig": 0.8563905358314514, + "epoch": 0.8342799626141347, + "kl_loss": 0.2067205309867859, + "loss_ib": 0.005917292553931475, + "step": 2901 + }, + { + "ce_ib": 1.9801716804504395, + "ce_orig": 0.4866342544555664, + "epoch": 0.8342799626141347, + "kl_loss": 0.13902930915355682, + "loss_ib": 0.0033704647794365883, + "step": 2901 + }, + { + "ce_ib": 2.9689159393310547, + "ce_orig": 0.4116072654724121, + "epoch": 0.8342799626141347, + "kl_loss": 0.18430763483047485, + "loss_ib": 0.0048119924031198025, + "step": 2901 + }, + { + "ce_ib": 3.225693941116333, + "ce_orig": 0.7215883135795593, + "epoch": 0.8342799626141347, + "kl_loss": 0.17096495628356934, + "loss_ib": 0.004935343284159899, + "step": 2901 + }, + { + "ce_ib": 4.316681861877441, + "ce_orig": 1.0640817880630493, + "epoch": 0.8345675461931124, + "kl_loss": 0.1927296221256256, + "loss_ib": 0.006243978161364794, + "step": 2902 + }, + { + "ce_ib": 4.347996711730957, + "ce_orig": 0.8180198073387146, + "epoch": 0.8345675461931124, + "kl_loss": 0.2086285948753357, + "loss_ib": 0.006434282287955284, + "step": 2902 + }, + { + "ce_ib": 7.472233772277832, + "ce_orig": 1.6519242525100708, + "epoch": 0.8345675461931124, + "kl_loss": 0.200802281498909, + "loss_ib": 0.009480256587266922, + "step": 2902 + }, + { + "ce_ib": 0.779662549495697, + "ce_orig": 0.2130521833896637, + "epoch": 0.8345675461931124, + "kl_loss": 0.41853639483451843, + "loss_ib": 0.004965026397258043, + "step": 2902 + }, + { + "ce_ib": 5.879096508026123, + "ce_orig": 0.7981755137443542, + "epoch": 0.83485512977209, + "kl_loss": 0.1783498227596283, + "loss_ib": 0.007662594318389893, + "step": 2903 + }, + { + "ce_ib": 2.6400856971740723, + "ce_orig": 0.6582839488983154, + "epoch": 0.83485512977209, + "kl_loss": 0.18457801640033722, + "loss_ib": 0.004485865589231253, + "step": 2903 + }, + { + "ce_ib": 2.251703977584839, + "ce_orig": 0.39036810398101807, + "epoch": 0.83485512977209, + "kl_loss": 0.16432222723960876, + "loss_ib": 0.003894926281645894, + "step": 2903 + }, + { + "ce_ib": 5.238669395446777, + "ce_orig": 1.2235749959945679, + "epoch": 0.83485512977209, + "kl_loss": 0.20809103548526764, + "loss_ib": 0.007319579366594553, + "step": 2903 + }, + { + "ce_ib": 5.429309368133545, + "ce_orig": 0.9074487090110779, + "epoch": 0.8351427133510676, + "kl_loss": 0.1712082326412201, + "loss_ib": 0.007141392212361097, + "step": 2904 + }, + { + "ce_ib": 6.653004169464111, + "ce_orig": 0.9080125093460083, + "epoch": 0.8351427133510676, + "kl_loss": 0.16572004556655884, + "loss_ib": 0.00831020437180996, + "step": 2904 + }, + { + "ce_ib": 5.762917518615723, + "ce_orig": 1.1289302110671997, + "epoch": 0.8351427133510676, + "kl_loss": 0.1803303360939026, + "loss_ib": 0.0075662205927073956, + "step": 2904 + }, + { + "ce_ib": 4.048570156097412, + "ce_orig": 1.04703950881958, + "epoch": 0.8351427133510676, + "kl_loss": 0.18396562337875366, + "loss_ib": 0.005888225976377726, + "step": 2904 + }, + { + "epoch": 0.8354302969300453, + "grad_norm": 0.15995749831199646, + "learning_rate": 8.474656259642874e-06, + "loss": 0.8373, + "step": 2905 + }, + { + "ce_ib": 3.7099361419677734, + "ce_orig": 0.837143063545227, + "epoch": 0.8354302969300453, + "kl_loss": 0.22996701300144196, + "loss_ib": 0.0060096061788499355, + "step": 2905 + }, + { + "ce_ib": 3.545142412185669, + "ce_orig": 0.8339126110076904, + "epoch": 0.8354302969300453, + "kl_loss": 0.28583255410194397, + "loss_ib": 0.006403467617928982, + "step": 2905 + }, + { + "ce_ib": 2.8092193603515625, + "ce_orig": 0.6911078095436096, + "epoch": 0.8354302969300453, + "kl_loss": 0.16218139231204987, + "loss_ib": 0.004431033041328192, + "step": 2905 + }, + { + "ce_ib": 6.353653907775879, + "ce_orig": 1.3881524801254272, + "epoch": 0.8354302969300453, + "kl_loss": 0.21781398355960846, + "loss_ib": 0.00853179395198822, + "step": 2905 + }, + { + "ce_ib": 2.98447322845459, + "ce_orig": 0.6346457004547119, + "epoch": 0.835717880509023, + "kl_loss": 0.20211075246334076, + "loss_ib": 0.005005580838769674, + "step": 2906 + }, + { + "ce_ib": 8.148993492126465, + "ce_orig": 1.9483650922775269, + "epoch": 0.835717880509023, + "kl_loss": 0.17024879157543182, + "loss_ib": 0.009851481765508652, + "step": 2906 + }, + { + "ce_ib": 4.9676408767700195, + "ce_orig": 0.9335427284240723, + "epoch": 0.835717880509023, + "kl_loss": 0.18863695859909058, + "loss_ib": 0.006854010745882988, + "step": 2906 + }, + { + "ce_ib": 4.882185935974121, + "ce_orig": 0.8378196358680725, + "epoch": 0.835717880509023, + "kl_loss": 0.22268146276474, + "loss_ib": 0.007109000347554684, + "step": 2906 + }, + { + "ce_ib": 3.62324857711792, + "ce_orig": 0.9179442524909973, + "epoch": 0.8360054640880006, + "kl_loss": 0.20474950969219208, + "loss_ib": 0.005670743528753519, + "step": 2907 + }, + { + "ce_ib": 3.1512033939361572, + "ce_orig": 0.7860889434814453, + "epoch": 0.8360054640880006, + "kl_loss": 0.149979829788208, + "loss_ib": 0.004651001654565334, + "step": 2907 + }, + { + "ce_ib": 5.049472808837891, + "ce_orig": 0.6877983808517456, + "epoch": 0.8360054640880006, + "kl_loss": 0.2142196148633957, + "loss_ib": 0.007191669195890427, + "step": 2907 + }, + { + "ce_ib": 6.674953937530518, + "ce_orig": 1.0458133220672607, + "epoch": 0.8360054640880006, + "kl_loss": 0.20085614919662476, + "loss_ib": 0.008683515712618828, + "step": 2907 + }, + { + "ce_ib": 3.1431827545166016, + "ce_orig": 0.5916361212730408, + "epoch": 0.8362930476669782, + "kl_loss": 0.10433641076087952, + "loss_ib": 0.004186546895653009, + "step": 2908 + }, + { + "ce_ib": 4.064469337463379, + "ce_orig": 1.126461148262024, + "epoch": 0.8362930476669782, + "kl_loss": 0.17715157568454742, + "loss_ib": 0.005835984833538532, + "step": 2908 + }, + { + "ce_ib": 3.5729174613952637, + "ce_orig": 0.16631881892681122, + "epoch": 0.8362930476669782, + "kl_loss": 0.18557186424732208, + "loss_ib": 0.005428635980933905, + "step": 2908 + }, + { + "ce_ib": 4.311108589172363, + "ce_orig": 0.8285064697265625, + "epoch": 0.8362930476669782, + "kl_loss": 0.2308265119791031, + "loss_ib": 0.006619373336434364, + "step": 2908 + }, + { + "ce_ib": 4.348074436187744, + "ce_orig": 0.8351548910140991, + "epoch": 0.8365806312459558, + "kl_loss": 0.13852792978286743, + "loss_ib": 0.005733354017138481, + "step": 2909 + }, + { + "ce_ib": 3.8693974018096924, + "ce_orig": 0.7146889567375183, + "epoch": 0.8365806312459558, + "kl_loss": 0.21414747834205627, + "loss_ib": 0.006010871846228838, + "step": 2909 + }, + { + "ce_ib": 2.717846393585205, + "ce_orig": 0.5990954637527466, + "epoch": 0.8365806312459558, + "kl_loss": 0.1445508897304535, + "loss_ib": 0.004163355100899935, + "step": 2909 + }, + { + "ce_ib": 2.8536758422851562, + "ce_orig": 0.48729366064071655, + "epoch": 0.8365806312459558, + "kl_loss": 0.24590769410133362, + "loss_ib": 0.005312752444297075, + "step": 2909 + }, + { + "epoch": 0.8368682148249335, + "grad_norm": 0.13402685523033142, + "learning_rate": 8.469071432853329e-06, + "loss": 0.8736, + "step": 2910 + }, + { + "ce_ib": 3.052593469619751, + "ce_orig": 0.5604897737503052, + "epoch": 0.8368682148249335, + "kl_loss": 0.2229059934616089, + "loss_ib": 0.005281653255224228, + "step": 2910 + }, + { + "ce_ib": 4.705053806304932, + "ce_orig": 0.7971434593200684, + "epoch": 0.8368682148249335, + "kl_loss": 0.1863807737827301, + "loss_ib": 0.006568861659616232, + "step": 2910 + }, + { + "ce_ib": 2.9832077026367188, + "ce_orig": 0.6626828908920288, + "epoch": 0.8368682148249335, + "kl_loss": 0.1437872350215912, + "loss_ib": 0.004421079996973276, + "step": 2910 + }, + { + "ce_ib": 3.3920557498931885, + "ce_orig": 0.7632421851158142, + "epoch": 0.8368682148249335, + "kl_loss": 0.15045514702796936, + "loss_ib": 0.0048966072499752045, + "step": 2910 + }, + { + "ce_ib": 5.401617050170898, + "ce_orig": 1.0651540756225586, + "epoch": 0.8371557984039112, + "kl_loss": 0.105026975274086, + "loss_ib": 0.006451886612921953, + "step": 2911 + }, + { + "ce_ib": 4.488781452178955, + "ce_orig": 0.8101596236228943, + "epoch": 0.8371557984039112, + "kl_loss": 0.16841769218444824, + "loss_ib": 0.006172958295792341, + "step": 2911 + }, + { + "ce_ib": 3.4760043621063232, + "ce_orig": 0.46561482548713684, + "epoch": 0.8371557984039112, + "kl_loss": 0.26452165842056274, + "loss_ib": 0.0061212205328047276, + "step": 2911 + }, + { + "ce_ib": 5.681820392608643, + "ce_orig": 1.4156770706176758, + "epoch": 0.8371557984039112, + "kl_loss": 0.1484798789024353, + "loss_ib": 0.007166618946939707, + "step": 2911 + }, + { + "ce_ib": 5.866646766662598, + "ce_orig": 1.4603973627090454, + "epoch": 0.8374433819828888, + "kl_loss": 0.2265225648880005, + "loss_ib": 0.008131871931254864, + "step": 2912 + }, + { + "ce_ib": 7.343017578125, + "ce_orig": 1.5341060161590576, + "epoch": 0.8374433819828888, + "kl_loss": 0.2715000510215759, + "loss_ib": 0.010058017447590828, + "step": 2912 + }, + { + "ce_ib": 7.536292552947998, + "ce_orig": 1.8335294723510742, + "epoch": 0.8374433819828888, + "kl_loss": 0.23000776767730713, + "loss_ib": 0.009836370125412941, + "step": 2912 + }, + { + "ce_ib": 5.241741180419922, + "ce_orig": 1.0719796419143677, + "epoch": 0.8374433819828888, + "kl_loss": 0.14302486181259155, + "loss_ib": 0.006671989802271128, + "step": 2912 + }, + { + "ce_ib": 4.748905658721924, + "ce_orig": 1.336301565170288, + "epoch": 0.8377309655618664, + "kl_loss": 0.19931060075759888, + "loss_ib": 0.0067420112900435925, + "step": 2913 + }, + { + "ce_ib": 1.888339877128601, + "ce_orig": 0.6144108176231384, + "epoch": 0.8377309655618664, + "kl_loss": 0.5674480199813843, + "loss_ib": 0.0075628203339874744, + "step": 2913 + }, + { + "ce_ib": 2.531874179840088, + "ce_orig": 0.7370081543922424, + "epoch": 0.8377309655618664, + "kl_loss": 0.1287044882774353, + "loss_ib": 0.003818918950855732, + "step": 2913 + }, + { + "ce_ib": 5.816553115844727, + "ce_orig": 0.9684366583824158, + "epoch": 0.8377309655618664, + "kl_loss": 0.27140405774116516, + "loss_ib": 0.00853059347718954, + "step": 2913 + }, + { + "ce_ib": 5.327174663543701, + "ce_orig": 0.8974166512489319, + "epoch": 0.8380185491408441, + "kl_loss": 0.2663567066192627, + "loss_ib": 0.007990741170942783, + "step": 2914 + }, + { + "ce_ib": 2.21366810798645, + "ce_orig": 0.5375339984893799, + "epoch": 0.8380185491408441, + "kl_loss": 0.130900576710701, + "loss_ib": 0.0035226738546043634, + "step": 2914 + }, + { + "ce_ib": 7.04434061050415, + "ce_orig": 1.4757009744644165, + "epoch": 0.8380185491408441, + "kl_loss": 0.20010070502758026, + "loss_ib": 0.009045347571372986, + "step": 2914 + }, + { + "ce_ib": 4.547479152679443, + "ce_orig": 1.249865174293518, + "epoch": 0.8380185491408441, + "kl_loss": 0.18236799538135529, + "loss_ib": 0.006371158640831709, + "step": 2914 + }, + { + "epoch": 0.8383061327198217, + "grad_norm": 0.14355167746543884, + "learning_rate": 8.463478248265514e-06, + "loss": 0.8935, + "step": 2915 + }, + { + "ce_ib": 3.6391940116882324, + "ce_orig": 0.6202227473258972, + "epoch": 0.8383061327198217, + "kl_loss": 0.16132566332817078, + "loss_ib": 0.005252450704574585, + "step": 2915 + }, + { + "ce_ib": 3.60113263130188, + "ce_orig": 1.0475081205368042, + "epoch": 0.8383061327198217, + "kl_loss": 0.18369968235492706, + "loss_ib": 0.00543812895193696, + "step": 2915 + }, + { + "ce_ib": 3.52458119392395, + "ce_orig": 0.7193403840065002, + "epoch": 0.8383061327198217, + "kl_loss": 0.18626174330711365, + "loss_ib": 0.005387198645621538, + "step": 2915 + }, + { + "ce_ib": 3.976618766784668, + "ce_orig": 0.8247824311256409, + "epoch": 0.8383061327198217, + "kl_loss": 0.2051939219236374, + "loss_ib": 0.006028557661920786, + "step": 2915 + }, + { + "ce_ib": 3.5929715633392334, + "ce_orig": 1.0435718297958374, + "epoch": 0.8385937162987993, + "kl_loss": 0.1586383432149887, + "loss_ib": 0.0051793549209833145, + "step": 2916 + }, + { + "ce_ib": 5.562612533569336, + "ce_orig": 1.3815348148345947, + "epoch": 0.8385937162987993, + "kl_loss": 0.20087262988090515, + "loss_ib": 0.007571338210254908, + "step": 2916 + }, + { + "ce_ib": 5.334982395172119, + "ce_orig": 1.3579343557357788, + "epoch": 0.8385937162987993, + "kl_loss": 0.15595337748527527, + "loss_ib": 0.006894515827298164, + "step": 2916 + }, + { + "ce_ib": 3.0564379692077637, + "ce_orig": 0.8348274827003479, + "epoch": 0.8385937162987993, + "kl_loss": 0.12892338633537292, + "loss_ib": 0.0043456717394292355, + "step": 2916 + }, + { + "ce_ib": 3.423248052597046, + "ce_orig": 0.630363941192627, + "epoch": 0.8388812998777769, + "kl_loss": 0.2015131711959839, + "loss_ib": 0.005438379943370819, + "step": 2917 + }, + { + "ce_ib": 2.3415415287017822, + "ce_orig": 0.3390975296497345, + "epoch": 0.8388812998777769, + "kl_loss": 0.23868633806705475, + "loss_ib": 0.004728404805064201, + "step": 2917 + }, + { + "ce_ib": 4.012410640716553, + "ce_orig": 0.6170244812965393, + "epoch": 0.8388812998777769, + "kl_loss": 0.16929833590984344, + "loss_ib": 0.005705393850803375, + "step": 2917 + }, + { + "ce_ib": 4.867165565490723, + "ce_orig": 1.3418068885803223, + "epoch": 0.8388812998777769, + "kl_loss": 0.16488373279571533, + "loss_ib": 0.0065160030499100685, + "step": 2917 + }, + { + "ce_ib": 4.3010663986206055, + "ce_orig": 0.7550563216209412, + "epoch": 0.8391688834567547, + "kl_loss": 0.19600550830364227, + "loss_ib": 0.006261121481657028, + "step": 2918 + }, + { + "ce_ib": 5.192765712738037, + "ce_orig": 1.1917047500610352, + "epoch": 0.8391688834567547, + "kl_loss": 0.1784992516040802, + "loss_ib": 0.0069777583703398705, + "step": 2918 + }, + { + "ce_ib": 5.158356189727783, + "ce_orig": 0.5185113549232483, + "epoch": 0.8391688834567547, + "kl_loss": 0.3533838093280792, + "loss_ib": 0.008692193776369095, + "step": 2918 + }, + { + "ce_ib": 4.563296318054199, + "ce_orig": 1.2135379314422607, + "epoch": 0.8391688834567547, + "kl_loss": 0.17420029640197754, + "loss_ib": 0.006305299233645201, + "step": 2918 + }, + { + "ce_ib": 1.98062264919281, + "ce_orig": 0.4669637084007263, + "epoch": 0.8394564670357323, + "kl_loss": 0.14031895995140076, + "loss_ib": 0.0033838122617453337, + "step": 2919 + }, + { + "ce_ib": 4.274301528930664, + "ce_orig": 1.02561354637146, + "epoch": 0.8394564670357323, + "kl_loss": 0.17940224707126617, + "loss_ib": 0.006068323738873005, + "step": 2919 + }, + { + "ce_ib": 5.051400184631348, + "ce_orig": 0.8354811668395996, + "epoch": 0.8394564670357323, + "kl_loss": 0.40270090103149414, + "loss_ib": 0.00907840859144926, + "step": 2919 + }, + { + "ce_ib": 7.367971897125244, + "ce_orig": 1.302773118019104, + "epoch": 0.8394564670357323, + "kl_loss": 0.15627703070640564, + "loss_ib": 0.008930742740631104, + "step": 2919 + }, + { + "epoch": 0.8397440506147099, + "grad_norm": 0.14247827231884003, + "learning_rate": 8.457876719354708e-06, + "loss": 0.9071, + "step": 2920 + }, + { + "ce_ib": 2.943159341812134, + "ce_orig": 0.4000529646873474, + "epoch": 0.8397440506147099, + "kl_loss": 0.10829321295022964, + "loss_ib": 0.004026091191917658, + "step": 2920 + }, + { + "ce_ib": 2.1774046421051025, + "ce_orig": 0.5214499235153198, + "epoch": 0.8397440506147099, + "kl_loss": 0.1626403033733368, + "loss_ib": 0.003803807310760021, + "step": 2920 + }, + { + "ce_ib": 3.7260048389434814, + "ce_orig": 0.8961794972419739, + "epoch": 0.8397440506147099, + "kl_loss": 0.2332487255334854, + "loss_ib": 0.006058491766452789, + "step": 2920 + }, + { + "ce_ib": 5.790842056274414, + "ce_orig": 1.305792212486267, + "epoch": 0.8397440506147099, + "kl_loss": 0.19068914651870728, + "loss_ib": 0.007697733584791422, + "step": 2920 + }, + { + "ce_ib": 3.3704559803009033, + "ce_orig": 0.577182412147522, + "epoch": 0.8400316341936875, + "kl_loss": 0.2738697826862335, + "loss_ib": 0.006109154317528009, + "step": 2921 + }, + { + "ce_ib": 4.617032527923584, + "ce_orig": 1.0991714000701904, + "epoch": 0.8400316341936875, + "kl_loss": 0.19721615314483643, + "loss_ib": 0.006589194294065237, + "step": 2921 + }, + { + "ce_ib": 2.637040615081787, + "ce_orig": 0.7472373247146606, + "epoch": 0.8400316341936875, + "kl_loss": 0.11929958313703537, + "loss_ib": 0.0038300363812595606, + "step": 2921 + }, + { + "ce_ib": 3.6349143981933594, + "ce_orig": 0.5525444149971008, + "epoch": 0.8400316341936875, + "kl_loss": 0.25642579793930054, + "loss_ib": 0.00619917269796133, + "step": 2921 + }, + { + "ce_ib": 4.811841011047363, + "ce_orig": 1.0224248170852661, + "epoch": 0.8403192177726652, + "kl_loss": 0.22663328051567078, + "loss_ib": 0.007078173570334911, + "step": 2922 + }, + { + "ce_ib": 6.881445407867432, + "ce_orig": 1.1473280191421509, + "epoch": 0.8403192177726652, + "kl_loss": 0.23235651850700378, + "loss_ib": 0.00920501071959734, + "step": 2922 + }, + { + "ce_ib": 6.108866214752197, + "ce_orig": 0.748229444026947, + "epoch": 0.8403192177726652, + "kl_loss": 0.2378755658864975, + "loss_ib": 0.008487622253596783, + "step": 2922 + }, + { + "ce_ib": 4.89274263381958, + "ce_orig": 1.160775899887085, + "epoch": 0.8403192177726652, + "kl_loss": 0.14791029691696167, + "loss_ib": 0.006371845491230488, + "step": 2922 + }, + { + "ce_ib": 2.895566463470459, + "ce_orig": 0.6588643789291382, + "epoch": 0.8406068013516428, + "kl_loss": 0.16339895129203796, + "loss_ib": 0.004529555793851614, + "step": 2923 + }, + { + "ce_ib": 5.899153709411621, + "ce_orig": 0.8184993267059326, + "epoch": 0.8406068013516428, + "kl_loss": 0.17759612202644348, + "loss_ib": 0.007675114553421736, + "step": 2923 + }, + { + "ce_ib": 4.624838352203369, + "ce_orig": 0.9021128416061401, + "epoch": 0.8406068013516428, + "kl_loss": 0.1851455122232437, + "loss_ib": 0.006476293783634901, + "step": 2923 + }, + { + "ce_ib": 3.8756418228149414, + "ce_orig": 0.7759286165237427, + "epoch": 0.8406068013516428, + "kl_loss": 0.12561294436454773, + "loss_ib": 0.005131771322339773, + "step": 2923 + }, + { + "ce_ib": 4.451784610748291, + "ce_orig": 0.7045734524726868, + "epoch": 0.8408943849306204, + "kl_loss": 0.11903272569179535, + "loss_ib": 0.005642111878842115, + "step": 2924 + }, + { + "ce_ib": 6.888359546661377, + "ce_orig": 1.4011181592941284, + "epoch": 0.8408943849306204, + "kl_loss": 0.18173274397850037, + "loss_ib": 0.008705687709152699, + "step": 2924 + }, + { + "ce_ib": 2.8860442638397217, + "ce_orig": 0.7234134078025818, + "epoch": 0.8408943849306204, + "kl_loss": 0.14621689915657043, + "loss_ib": 0.004348213318735361, + "step": 2924 + }, + { + "ce_ib": 3.619358539581299, + "ce_orig": 0.8318102955818176, + "epoch": 0.8408943849306204, + "kl_loss": 0.15798379480838776, + "loss_ib": 0.00519919628277421, + "step": 2924 + }, + { + "epoch": 0.8411819685095981, + "grad_norm": 0.1526568979024887, + "learning_rate": 8.452266859616297e-06, + "loss": 0.8861, + "step": 2925 + }, + { + "ce_ib": 2.879456043243408, + "ce_orig": 0.5351731181144714, + "epoch": 0.8411819685095981, + "kl_loss": 0.22766941785812378, + "loss_ib": 0.005156149622052908, + "step": 2925 + }, + { + "ce_ib": 2.0445098876953125, + "ce_orig": 0.40749239921569824, + "epoch": 0.8411819685095981, + "kl_loss": 0.23694376647472382, + "loss_ib": 0.0044139474630355835, + "step": 2925 + }, + { + "ce_ib": 3.3751471042633057, + "ce_orig": 0.8434615731239319, + "epoch": 0.8411819685095981, + "kl_loss": 0.17736798524856567, + "loss_ib": 0.0051488266326487064, + "step": 2925 + }, + { + "ce_ib": 2.296003818511963, + "ce_orig": 0.5613936185836792, + "epoch": 0.8411819685095981, + "kl_loss": 0.18245503306388855, + "loss_ib": 0.004120554309338331, + "step": 2925 + }, + { + "ce_ib": 1.8614481687545776, + "ce_orig": 0.45161980390548706, + "epoch": 0.8414695520885758, + "kl_loss": 0.12577638030052185, + "loss_ib": 0.00311921164393425, + "step": 2926 + }, + { + "ce_ib": 5.2035675048828125, + "ce_orig": 1.2196885347366333, + "epoch": 0.8414695520885758, + "kl_loss": 0.16717004776000977, + "loss_ib": 0.006875268183648586, + "step": 2926 + }, + { + "ce_ib": 4.368530750274658, + "ce_orig": 0.566372275352478, + "epoch": 0.8414695520885758, + "kl_loss": 0.23139940202236176, + "loss_ib": 0.006682524457573891, + "step": 2926 + }, + { + "ce_ib": 6.134298324584961, + "ce_orig": 1.1319655179977417, + "epoch": 0.8414695520885758, + "kl_loss": 0.27617335319519043, + "loss_ib": 0.00889603141695261, + "step": 2926 + }, + { + "ce_ib": 2.88232684135437, + "ce_orig": 0.7383785247802734, + "epoch": 0.8417571356675534, + "kl_loss": 0.18662692606449127, + "loss_ib": 0.004748595878481865, + "step": 2927 + }, + { + "ce_ib": 5.670011043548584, + "ce_orig": 1.375922679901123, + "epoch": 0.8417571356675534, + "kl_loss": 0.19847328960895538, + "loss_ib": 0.007654743734747171, + "step": 2927 + }, + { + "ce_ib": 2.3244402408599854, + "ce_orig": 0.707338809967041, + "epoch": 0.8417571356675534, + "kl_loss": 0.1137804463505745, + "loss_ib": 0.0034622447565197945, + "step": 2927 + }, + { + "ce_ib": 4.392611503601074, + "ce_orig": 0.6233220100402832, + "epoch": 0.8417571356675534, + "kl_loss": 0.2642463743686676, + "loss_ib": 0.007035075221210718, + "step": 2927 + }, + { + "ce_ib": 3.9228415489196777, + "ce_orig": 0.9571593999862671, + "epoch": 0.842044719246531, + "kl_loss": 0.1924964189529419, + "loss_ib": 0.005847805645316839, + "step": 2928 + }, + { + "ce_ib": 4.323923110961914, + "ce_orig": 1.0600332021713257, + "epoch": 0.842044719246531, + "kl_loss": 0.24095161259174347, + "loss_ib": 0.006733439397066832, + "step": 2928 + }, + { + "ce_ib": 3.6694657802581787, + "ce_orig": 0.8358418345451355, + "epoch": 0.842044719246531, + "kl_loss": 0.1799805611371994, + "loss_ib": 0.005469271447509527, + "step": 2928 + }, + { + "ce_ib": 5.689077854156494, + "ce_orig": 1.0564565658569336, + "epoch": 0.842044719246531, + "kl_loss": 0.18149012327194214, + "loss_ib": 0.007503978908061981, + "step": 2928 + }, + { + "ce_ib": 2.2802157402038574, + "ce_orig": 0.4555323123931885, + "epoch": 0.8423323028255086, + "kl_loss": 0.14798693358898163, + "loss_ib": 0.0037600849755108356, + "step": 2929 + }, + { + "ce_ib": 3.091298818588257, + "ce_orig": 0.6165558099746704, + "epoch": 0.8423323028255086, + "kl_loss": 0.1425306499004364, + "loss_ib": 0.0045166052877902985, + "step": 2929 + }, + { + "ce_ib": 7.591645240783691, + "ce_orig": 1.7780826091766357, + "epoch": 0.8423323028255086, + "kl_loss": 0.18147917091846466, + "loss_ib": 0.0094064362347126, + "step": 2929 + }, + { + "ce_ib": 2.4423606395721436, + "ce_orig": 0.5262928009033203, + "epoch": 0.8423323028255086, + "kl_loss": 0.17228208482265472, + "loss_ib": 0.004165181424468756, + "step": 2929 + }, + { + "epoch": 0.8426198864044863, + "grad_norm": 0.15762092173099518, + "learning_rate": 8.44664868256573e-06, + "loss": 0.8739, + "step": 2930 + }, + { + "ce_ib": 2.1493899822235107, + "ce_orig": 0.6479324102401733, + "epoch": 0.8426198864044863, + "kl_loss": 0.10649716854095459, + "loss_ib": 0.003214361611753702, + "step": 2930 + }, + { + "ce_ib": 2.484492540359497, + "ce_orig": 0.702995240688324, + "epoch": 0.8426198864044863, + "kl_loss": 0.14975860714912415, + "loss_ib": 0.003982078284025192, + "step": 2930 + }, + { + "ce_ib": 3.0349082946777344, + "ce_orig": 0.653529703617096, + "epoch": 0.8426198864044863, + "kl_loss": 0.23898805677890778, + "loss_ib": 0.005424789153039455, + "step": 2930 + }, + { + "ce_ib": 3.9509541988372803, + "ce_orig": 0.6597896218299866, + "epoch": 0.8426198864044863, + "kl_loss": 0.23642024397850037, + "loss_ib": 0.006315156817436218, + "step": 2930 + }, + { + "ce_ib": 4.49127197265625, + "ce_orig": 1.3309627771377563, + "epoch": 0.842907469983464, + "kl_loss": 0.12598660588264465, + "loss_ib": 0.005751137621700764, + "step": 2931 + }, + { + "ce_ib": 3.388828754425049, + "ce_orig": 0.4712602198123932, + "epoch": 0.842907469983464, + "kl_loss": 0.19728657603263855, + "loss_ib": 0.005361694376915693, + "step": 2931 + }, + { + "ce_ib": 3.1186814308166504, + "ce_orig": 0.6122041940689087, + "epoch": 0.842907469983464, + "kl_loss": 0.20014531910419464, + "loss_ib": 0.00512013491243124, + "step": 2931 + }, + { + "ce_ib": 4.567592144012451, + "ce_orig": 0.8907778263092041, + "epoch": 0.842907469983464, + "kl_loss": 0.21347588300704956, + "loss_ib": 0.006702350918203592, + "step": 2931 + }, + { + "ce_ib": 3.5589091777801514, + "ce_orig": 0.739313006401062, + "epoch": 0.8431950535624416, + "kl_loss": 0.15806004405021667, + "loss_ib": 0.005139509215950966, + "step": 2932 + }, + { + "ce_ib": 2.4254534244537354, + "ce_orig": 0.6405080556869507, + "epoch": 0.8431950535624416, + "kl_loss": 0.17621135711669922, + "loss_ib": 0.004187567159533501, + "step": 2932 + }, + { + "ce_ib": 5.497552394866943, + "ce_orig": 0.9814654588699341, + "epoch": 0.8431950535624416, + "kl_loss": 0.3041871190071106, + "loss_ib": 0.00853942334651947, + "step": 2932 + }, + { + "ce_ib": 3.5739550590515137, + "ce_orig": 0.48150473833084106, + "epoch": 0.8431950535624416, + "kl_loss": 0.11265752464532852, + "loss_ib": 0.004700529854744673, + "step": 2932 + }, + { + "ce_ib": 4.334322452545166, + "ce_orig": 0.6278800368309021, + "epoch": 0.8434826371414192, + "kl_loss": 0.219837486743927, + "loss_ib": 0.006532697472721338, + "step": 2933 + }, + { + "ce_ib": 3.207486152648926, + "ce_orig": 0.8464101552963257, + "epoch": 0.8434826371414192, + "kl_loss": 0.14957521855831146, + "loss_ib": 0.004703238606452942, + "step": 2933 + }, + { + "ce_ib": 2.21799373626709, + "ce_orig": 0.5960560441017151, + "epoch": 0.8434826371414192, + "kl_loss": 0.12928315997123718, + "loss_ib": 0.0035108253359794617, + "step": 2933 + }, + { + "ce_ib": 3.9511594772338867, + "ce_orig": 0.7465740442276001, + "epoch": 0.8434826371414192, + "kl_loss": 0.23855000734329224, + "loss_ib": 0.00633665919303894, + "step": 2933 + }, + { + "ce_ib": 0.6156674027442932, + "ce_orig": 0.09341368824243546, + "epoch": 0.8437702207203969, + "kl_loss": 0.392306923866272, + "loss_ib": 0.004538736771792173, + "step": 2934 + }, + { + "ce_ib": 2.7125954627990723, + "ce_orig": 0.5482867956161499, + "epoch": 0.8437702207203969, + "kl_loss": 0.13293634355068207, + "loss_ib": 0.004041959065943956, + "step": 2934 + }, + { + "ce_ib": 5.773982048034668, + "ce_orig": 1.4389967918395996, + "epoch": 0.8437702207203969, + "kl_loss": 0.1830286681652069, + "loss_ib": 0.007604268845170736, + "step": 2934 + }, + { + "ce_ib": 3.8364150524139404, + "ce_orig": 0.8445960879325867, + "epoch": 0.8437702207203969, + "kl_loss": 0.19146698713302612, + "loss_ib": 0.005751085001975298, + "step": 2934 + }, + { + "epoch": 0.8440578042993745, + "grad_norm": 0.13429047167301178, + "learning_rate": 8.441022201738502e-06, + "loss": 0.8254, + "step": 2935 + }, + { + "ce_ib": 5.7195234298706055, + "ce_orig": 1.3891003131866455, + "epoch": 0.8440578042993745, + "kl_loss": 0.20004111528396606, + "loss_ib": 0.007719934452325106, + "step": 2935 + }, + { + "ce_ib": 4.73309326171875, + "ce_orig": 1.0069538354873657, + "epoch": 0.8440578042993745, + "kl_loss": 0.31494373083114624, + "loss_ib": 0.007882530800998211, + "step": 2935 + }, + { + "ce_ib": 4.648421287536621, + "ce_orig": 1.287237524986267, + "epoch": 0.8440578042993745, + "kl_loss": 0.17087295651435852, + "loss_ib": 0.006357151083648205, + "step": 2935 + }, + { + "ce_ib": 3.652656078338623, + "ce_orig": 0.7906246185302734, + "epoch": 0.8440578042993745, + "kl_loss": 0.17286822199821472, + "loss_ib": 0.00538133829832077, + "step": 2935 + }, + { + "ce_ib": 4.1066741943359375, + "ce_orig": 0.5136639475822449, + "epoch": 0.8443453878783521, + "kl_loss": 0.23319825530052185, + "loss_ib": 0.006438656710088253, + "step": 2936 + }, + { + "ce_ib": 5.772101402282715, + "ce_orig": 1.2176302671432495, + "epoch": 0.8443453878783521, + "kl_loss": 0.148533433675766, + "loss_ib": 0.007257435005158186, + "step": 2936 + }, + { + "ce_ib": 4.3117289543151855, + "ce_orig": 0.630899965763092, + "epoch": 0.8443453878783521, + "kl_loss": 0.18523259460926056, + "loss_ib": 0.006164054851979017, + "step": 2936 + }, + { + "ce_ib": 4.929681301116943, + "ce_orig": 0.9494863748550415, + "epoch": 0.8443453878783521, + "kl_loss": 0.23100605607032776, + "loss_ib": 0.007239742204546928, + "step": 2936 + }, + { + "ce_ib": 2.768369674682617, + "ce_orig": 0.7732290625572205, + "epoch": 0.8446329714573297, + "kl_loss": 0.1128636971116066, + "loss_ib": 0.0038970063906162977, + "step": 2937 + }, + { + "ce_ib": 4.6263556480407715, + "ce_orig": 0.887306272983551, + "epoch": 0.8446329714573297, + "kl_loss": 0.15249747037887573, + "loss_ib": 0.006151330191642046, + "step": 2937 + }, + { + "ce_ib": 3.8590378761291504, + "ce_orig": 0.7560481429100037, + "epoch": 0.8446329714573297, + "kl_loss": 0.16143280267715454, + "loss_ib": 0.005473365541547537, + "step": 2937 + }, + { + "ce_ib": 7.168416976928711, + "ce_orig": 1.6114459037780762, + "epoch": 0.8446329714573297, + "kl_loss": 0.16050288081169128, + "loss_ib": 0.008773446083068848, + "step": 2937 + }, + { + "ce_ib": 6.351920127868652, + "ce_orig": 1.4613620042800903, + "epoch": 0.8449205550363075, + "kl_loss": 0.20887671411037445, + "loss_ib": 0.008440687321126461, + "step": 2938 + }, + { + "ce_ib": 5.302759647369385, + "ce_orig": 0.9303883910179138, + "epoch": 0.8449205550363075, + "kl_loss": 0.17138314247131348, + "loss_ib": 0.007016591262072325, + "step": 2938 + }, + { + "ce_ib": 3.643139362335205, + "ce_orig": 0.6640917062759399, + "epoch": 0.8449205550363075, + "kl_loss": 0.18407195806503296, + "loss_ib": 0.005483858287334442, + "step": 2938 + }, + { + "ce_ib": 2.498333692550659, + "ce_orig": 0.6795788407325745, + "epoch": 0.8449205550363075, + "kl_loss": 0.1405520737171173, + "loss_ib": 0.0039038544055074453, + "step": 2938 + }, + { + "ce_ib": 2.2950079441070557, + "ce_orig": 0.6429508328437805, + "epoch": 0.8452081386152851, + "kl_loss": 0.12559273838996887, + "loss_ib": 0.0035509353037923574, + "step": 2939 + }, + { + "ce_ib": 2.753821611404419, + "ce_orig": 0.5119112133979797, + "epoch": 0.8452081386152851, + "kl_loss": 0.1673486828804016, + "loss_ib": 0.004427308216691017, + "step": 2939 + }, + { + "ce_ib": 4.47838020324707, + "ce_orig": 1.0376296043395996, + "epoch": 0.8452081386152851, + "kl_loss": 0.19803501665592194, + "loss_ib": 0.006458730436861515, + "step": 2939 + }, + { + "ce_ib": 2.8540761470794678, + "ce_orig": 0.3727121949195862, + "epoch": 0.8452081386152851, + "kl_loss": 0.1764756143093109, + "loss_ib": 0.004618832375854254, + "step": 2939 + }, + { + "epoch": 0.8454957221942627, + "grad_norm": 0.147036612033844, + "learning_rate": 8.435387430690114e-06, + "loss": 0.8647, + "step": 2940 + }, + { + "ce_ib": 3.059846878051758, + "ce_orig": 0.8531759977340698, + "epoch": 0.8454957221942627, + "kl_loss": 0.12579110264778137, + "loss_ib": 0.00431775813922286, + "step": 2940 + }, + { + "ce_ib": 5.663677215576172, + "ce_orig": 1.0725362300872803, + "epoch": 0.8454957221942627, + "kl_loss": 0.1886647343635559, + "loss_ib": 0.0075503247790038586, + "step": 2940 + }, + { + "ce_ib": 3.5680909156799316, + "ce_orig": 0.4942995607852936, + "epoch": 0.8454957221942627, + "kl_loss": 0.14352409541606903, + "loss_ib": 0.005003331694751978, + "step": 2940 + }, + { + "ce_ib": 3.823296308517456, + "ce_orig": 0.6140665411949158, + "epoch": 0.8454957221942627, + "kl_loss": 0.15891605615615845, + "loss_ib": 0.0054124570451676846, + "step": 2940 + }, + { + "ce_ib": 2.7044126987457275, + "ce_orig": 0.5164967775344849, + "epoch": 0.8457833057732403, + "kl_loss": 0.2568615674972534, + "loss_ib": 0.005273028276860714, + "step": 2941 + }, + { + "ce_ib": 5.364206314086914, + "ce_orig": 1.2052416801452637, + "epoch": 0.8457833057732403, + "kl_loss": 0.2056640386581421, + "loss_ib": 0.007420846726745367, + "step": 2941 + }, + { + "ce_ib": 3.5216474533081055, + "ce_orig": 0.7832375764846802, + "epoch": 0.8457833057732403, + "kl_loss": 0.13394367694854736, + "loss_ib": 0.004861084278672934, + "step": 2941 + }, + { + "ce_ib": 5.224883556365967, + "ce_orig": 1.2004969120025635, + "epoch": 0.8457833057732403, + "kl_loss": 0.11066606640815735, + "loss_ib": 0.006331543903797865, + "step": 2941 + }, + { + "ce_ib": 6.231259346008301, + "ce_orig": 1.1523476839065552, + "epoch": 0.846070889352218, + "kl_loss": 0.21798187494277954, + "loss_ib": 0.008411078713834286, + "step": 2942 + }, + { + "ce_ib": 3.7770023345947266, + "ce_orig": 0.653734028339386, + "epoch": 0.846070889352218, + "kl_loss": 0.21294745802879333, + "loss_ib": 0.005906476639211178, + "step": 2942 + }, + { + "ce_ib": 6.750773906707764, + "ce_orig": 1.6212873458862305, + "epoch": 0.846070889352218, + "kl_loss": 0.25735294818878174, + "loss_ib": 0.009324302896857262, + "step": 2942 + }, + { + "ce_ib": 6.595660209655762, + "ce_orig": 1.5212187767028809, + "epoch": 0.846070889352218, + "kl_loss": 0.16071966290473938, + "loss_ib": 0.008202856406569481, + "step": 2942 + }, + { + "ce_ib": 6.470799922943115, + "ce_orig": 1.43364679813385, + "epoch": 0.8463584729311956, + "kl_loss": 0.3032870292663574, + "loss_ib": 0.009503670036792755, + "step": 2943 + }, + { + "ce_ib": 5.694655418395996, + "ce_orig": 1.0725480318069458, + "epoch": 0.8463584729311956, + "kl_loss": 0.2389702945947647, + "loss_ib": 0.008084358647465706, + "step": 2943 + }, + { + "ce_ib": 7.671566963195801, + "ce_orig": 1.836821436882019, + "epoch": 0.8463584729311956, + "kl_loss": 0.17316392064094543, + "loss_ib": 0.00940320547670126, + "step": 2943 + }, + { + "ce_ib": 2.492377996444702, + "ce_orig": 0.497020423412323, + "epoch": 0.8463584729311956, + "kl_loss": 0.28432121872901917, + "loss_ib": 0.005335590336471796, + "step": 2943 + }, + { + "ce_ib": 4.430933475494385, + "ce_orig": 0.8364505767822266, + "epoch": 0.8466460565101732, + "kl_loss": 0.12460307776927948, + "loss_ib": 0.005676964297890663, + "step": 2944 + }, + { + "ce_ib": 7.81962251663208, + "ce_orig": 0.6463693976402283, + "epoch": 0.8466460565101732, + "kl_loss": 0.4282599687576294, + "loss_ib": 0.012102222070097923, + "step": 2944 + }, + { + "ce_ib": 3.213360548019409, + "ce_orig": 0.7275444269180298, + "epoch": 0.8466460565101732, + "kl_loss": 0.21476224064826965, + "loss_ib": 0.005360982846468687, + "step": 2944 + }, + { + "ce_ib": 4.544894695281982, + "ce_orig": 1.2431622743606567, + "epoch": 0.8466460565101732, + "kl_loss": 0.09253580868244171, + "loss_ib": 0.005470252595841885, + "step": 2944 + }, + { + "epoch": 0.846933640089151, + "grad_norm": 0.198257178068161, + "learning_rate": 8.429744382996034e-06, + "loss": 0.9131, + "step": 2945 + }, + { + "ce_ib": 2.549489974975586, + "ce_orig": 0.5159873366355896, + "epoch": 0.846933640089151, + "kl_loss": 0.09921863675117493, + "loss_ib": 0.003541676327586174, + "step": 2945 + }, + { + "ce_ib": 2.4364514350891113, + "ce_orig": 0.5717270374298096, + "epoch": 0.846933640089151, + "kl_loss": 0.15476562082767487, + "loss_ib": 0.003984107635915279, + "step": 2945 + }, + { + "ce_ib": 4.0521345138549805, + "ce_orig": 1.0191154479980469, + "epoch": 0.846933640089151, + "kl_loss": 0.1511918306350708, + "loss_ib": 0.005564053077250719, + "step": 2945 + }, + { + "ce_ib": 3.76226806640625, + "ce_orig": 0.7399132251739502, + "epoch": 0.846933640089151, + "kl_loss": 0.1534525454044342, + "loss_ib": 0.005296793300658464, + "step": 2945 + }, + { + "ce_ib": 4.7174811363220215, + "ce_orig": 0.597251832485199, + "epoch": 0.8472212236681286, + "kl_loss": 0.2460491955280304, + "loss_ib": 0.007177972700446844, + "step": 2946 + }, + { + "ce_ib": 2.5704967975616455, + "ce_orig": 0.6177753210067749, + "epoch": 0.8472212236681286, + "kl_loss": 0.1223616898059845, + "loss_ib": 0.003794113639742136, + "step": 2946 + }, + { + "ce_ib": 6.084099292755127, + "ce_orig": 1.4340417385101318, + "epoch": 0.8472212236681286, + "kl_loss": 0.2456207573413849, + "loss_ib": 0.00854030717164278, + "step": 2946 + }, + { + "ce_ib": 3.7893478870391846, + "ce_orig": 0.827774167060852, + "epoch": 0.8472212236681286, + "kl_loss": 0.2582167088985443, + "loss_ib": 0.006371514871716499, + "step": 2946 + }, + { + "ce_ib": 4.942902565002441, + "ce_orig": 0.8973841071128845, + "epoch": 0.8475088072471062, + "kl_loss": 0.15454010665416718, + "loss_ib": 0.006488303188234568, + "step": 2947 + }, + { + "ce_ib": 6.46604061126709, + "ce_orig": 1.32331383228302, + "epoch": 0.8475088072471062, + "kl_loss": 0.1838822066783905, + "loss_ib": 0.00830486323684454, + "step": 2947 + }, + { + "ce_ib": 4.584469318389893, + "ce_orig": 0.6683431267738342, + "epoch": 0.8475088072471062, + "kl_loss": 0.19962969422340393, + "loss_ib": 0.006580766290426254, + "step": 2947 + }, + { + "ce_ib": 8.04084300994873, + "ce_orig": 1.7878350019454956, + "epoch": 0.8475088072471062, + "kl_loss": 0.2733556032180786, + "loss_ib": 0.010774399153888226, + "step": 2947 + }, + { + "ce_ib": 3.715244770050049, + "ce_orig": 0.41331833600997925, + "epoch": 0.8477963908260838, + "kl_loss": 0.21092696487903595, + "loss_ib": 0.005824514664709568, + "step": 2948 + }, + { + "ce_ib": 3.0547852516174316, + "ce_orig": 0.788264811038971, + "epoch": 0.8477963908260838, + "kl_loss": 0.16831353306770325, + "loss_ib": 0.004737920593470335, + "step": 2948 + }, + { + "ce_ib": 5.738078594207764, + "ce_orig": 1.454357385635376, + "epoch": 0.8477963908260838, + "kl_loss": 0.15187665820121765, + "loss_ib": 0.007256845012307167, + "step": 2948 + }, + { + "ce_ib": 6.227163791656494, + "ce_orig": 1.1468111276626587, + "epoch": 0.8477963908260838, + "kl_loss": 0.1848088502883911, + "loss_ib": 0.008075252175331116, + "step": 2948 + }, + { + "ce_ib": 2.599863290786743, + "ce_orig": 0.4311082065105438, + "epoch": 0.8480839744050614, + "kl_loss": 0.21214577555656433, + "loss_ib": 0.004721321165561676, + "step": 2949 + }, + { + "ce_ib": 3.2096030712127686, + "ce_orig": 0.7193498015403748, + "epoch": 0.8480839744050614, + "kl_loss": 0.13732098042964935, + "loss_ib": 0.0045828125439584255, + "step": 2949 + }, + { + "ce_ib": 3.722921848297119, + "ce_orig": 0.5724219083786011, + "epoch": 0.8480839744050614, + "kl_loss": 0.22521287202835083, + "loss_ib": 0.00597505085170269, + "step": 2949 + }, + { + "ce_ib": 3.7103519439697266, + "ce_orig": 0.6429916024208069, + "epoch": 0.8480839744050614, + "kl_loss": 0.1677098274230957, + "loss_ib": 0.005387450568377972, + "step": 2949 + }, + { + "epoch": 0.8483715579840391, + "grad_norm": 0.15996325016021729, + "learning_rate": 8.424093072251675e-06, + "loss": 0.908, + "step": 2950 + }, + { + "ce_ib": 3.4128224849700928, + "ce_orig": 0.583622932434082, + "epoch": 0.8483715579840391, + "kl_loss": 0.1511433869600296, + "loss_ib": 0.0049242558889091015, + "step": 2950 + }, + { + "ce_ib": 4.639676570892334, + "ce_orig": 0.6618440747261047, + "epoch": 0.8483715579840391, + "kl_loss": 0.3065328598022461, + "loss_ib": 0.0077050053514540195, + "step": 2950 + }, + { + "ce_ib": 1.5957257747650146, + "ce_orig": 0.35707542300224304, + "epoch": 0.8483715579840391, + "kl_loss": 0.14902089536190033, + "loss_ib": 0.003085934789851308, + "step": 2950 + }, + { + "ce_ib": 3.1648948192596436, + "ce_orig": 0.626279890537262, + "epoch": 0.8483715579840391, + "kl_loss": 0.2580316960811615, + "loss_ib": 0.005745211616158485, + "step": 2950 + }, + { + "ce_ib": 4.248167514801025, + "ce_orig": 1.0324609279632568, + "epoch": 0.8486591415630168, + "kl_loss": 0.18505597114562988, + "loss_ib": 0.006098726764321327, + "step": 2951 + }, + { + "ce_ib": 4.964432716369629, + "ce_orig": 0.975151002407074, + "epoch": 0.8486591415630168, + "kl_loss": 0.19103354215621948, + "loss_ib": 0.006874768063426018, + "step": 2951 + }, + { + "ce_ib": 2.6858952045440674, + "ce_orig": 0.7822684049606323, + "epoch": 0.8486591415630168, + "kl_loss": 0.25134748220443726, + "loss_ib": 0.005199370440095663, + "step": 2951 + }, + { + "ce_ib": 4.365019798278809, + "ce_orig": 0.7550670504570007, + "epoch": 0.8486591415630168, + "kl_loss": 0.14840421080589294, + "loss_ib": 0.0058490619994699955, + "step": 2951 + }, + { + "ce_ib": 6.819460391998291, + "ce_orig": 1.5705127716064453, + "epoch": 0.8489467251419944, + "kl_loss": 0.23585152626037598, + "loss_ib": 0.009177975356578827, + "step": 2952 + }, + { + "ce_ib": 2.5759828090667725, + "ce_orig": 0.606515109539032, + "epoch": 0.8489467251419944, + "kl_loss": 0.11176659911870956, + "loss_ib": 0.003693648846819997, + "step": 2952 + }, + { + "ce_ib": 3.736466646194458, + "ce_orig": 0.4980674088001251, + "epoch": 0.8489467251419944, + "kl_loss": 0.19593198597431183, + "loss_ib": 0.005695786327123642, + "step": 2952 + }, + { + "ce_ib": 3.388256549835205, + "ce_orig": 0.7693995833396912, + "epoch": 0.8489467251419944, + "kl_loss": 0.18680864572525024, + "loss_ib": 0.005256342701613903, + "step": 2952 + }, + { + "ce_ib": 3.7969179153442383, + "ce_orig": 0.5358807444572449, + "epoch": 0.849234308720972, + "kl_loss": 0.12164665013551712, + "loss_ib": 0.005013384856283665, + "step": 2953 + }, + { + "ce_ib": 2.4188921451568604, + "ce_orig": 0.5529060363769531, + "epoch": 0.849234308720972, + "kl_loss": 0.36399704217910767, + "loss_ib": 0.006058862898498774, + "step": 2953 + }, + { + "ce_ib": 2.381598711013794, + "ce_orig": 0.5631563067436218, + "epoch": 0.849234308720972, + "kl_loss": 0.14878305792808533, + "loss_ib": 0.0038694292306900024, + "step": 2953 + }, + { + "ce_ib": 6.510183811187744, + "ce_orig": 1.1252490282058716, + "epoch": 0.849234308720972, + "kl_loss": 0.13018660247325897, + "loss_ib": 0.007812049705535173, + "step": 2953 + }, + { + "ce_ib": 3.568572521209717, + "ce_orig": 0.9058544635772705, + "epoch": 0.8495218922999497, + "kl_loss": 0.19865000247955322, + "loss_ib": 0.005555072799324989, + "step": 2954 + }, + { + "ce_ib": 5.9231367111206055, + "ce_orig": 1.092559576034546, + "epoch": 0.8495218922999497, + "kl_loss": 0.20950636267662048, + "loss_ib": 0.008018200285732746, + "step": 2954 + }, + { + "ce_ib": 3.433079242706299, + "ce_orig": 0.7039644718170166, + "epoch": 0.8495218922999497, + "kl_loss": 0.1689169555902481, + "loss_ib": 0.005122249014675617, + "step": 2954 + }, + { + "ce_ib": 2.2904505729675293, + "ce_orig": 0.4298301637172699, + "epoch": 0.8495218922999497, + "kl_loss": 0.2540898025035858, + "loss_ib": 0.004831348545849323, + "step": 2954 + }, + { + "epoch": 0.8498094758789273, + "grad_norm": 0.11862782388925552, + "learning_rate": 8.418433512072356e-06, + "loss": 0.8102, + "step": 2955 + }, + { + "ce_ib": 5.042343616485596, + "ce_orig": 0.9310670495033264, + "epoch": 0.8498094758789273, + "kl_loss": 0.23780180513858795, + "loss_ib": 0.007420361507683992, + "step": 2955 + }, + { + "ce_ib": 4.4760823249816895, + "ce_orig": 1.264243245124817, + "epoch": 0.8498094758789273, + "kl_loss": 0.42119577527046204, + "loss_ib": 0.008688039146363735, + "step": 2955 + }, + { + "ce_ib": 3.7456891536712646, + "ce_orig": 0.9952322244644165, + "epoch": 0.8498094758789273, + "kl_loss": 0.16147460043430328, + "loss_ib": 0.005360435228794813, + "step": 2955 + }, + { + "ce_ib": 5.905974864959717, + "ce_orig": 0.7881524562835693, + "epoch": 0.8498094758789273, + "kl_loss": 0.2277597188949585, + "loss_ib": 0.008183572441339493, + "step": 2955 + }, + { + "ce_ib": 6.694603443145752, + "ce_orig": 1.3370400667190552, + "epoch": 0.8500970594579049, + "kl_loss": 0.14719584584236145, + "loss_ib": 0.008166561834514141, + "step": 2956 + }, + { + "ce_ib": 3.5256755352020264, + "ce_orig": 0.8100686073303223, + "epoch": 0.8500970594579049, + "kl_loss": 0.17537111043930054, + "loss_ib": 0.005279386416077614, + "step": 2956 + }, + { + "ce_ib": 6.48076057434082, + "ce_orig": 1.489971399307251, + "epoch": 0.8500970594579049, + "kl_loss": 0.25193876028060913, + "loss_ib": 0.009000147692859173, + "step": 2956 + }, + { + "ce_ib": 1.5796130895614624, + "ce_orig": 0.2857901155948639, + "epoch": 0.8500970594579049, + "kl_loss": 0.4466320872306824, + "loss_ib": 0.006045933812856674, + "step": 2956 + }, + { + "ce_ib": 3.5242691040039062, + "ce_orig": 0.6978204846382141, + "epoch": 0.8503846430368825, + "kl_loss": 0.17755275964736938, + "loss_ib": 0.005299796815961599, + "step": 2957 + }, + { + "ce_ib": 4.66121768951416, + "ce_orig": 0.7492635250091553, + "epoch": 0.8503846430368825, + "kl_loss": 0.12883822619915009, + "loss_ib": 0.005949599668383598, + "step": 2957 + }, + { + "ce_ib": 3.4606668949127197, + "ce_orig": 0.6976551413536072, + "epoch": 0.8503846430368825, + "kl_loss": 0.20703622698783875, + "loss_ib": 0.0055310288444161415, + "step": 2957 + }, + { + "ce_ib": 6.674962520599365, + "ce_orig": 1.092530608177185, + "epoch": 0.8503846430368825, + "kl_loss": 0.2927056849002838, + "loss_ib": 0.009602019563317299, + "step": 2957 + }, + { + "ce_ib": 4.409564018249512, + "ce_orig": 1.0627976655960083, + "epoch": 0.8506722266158603, + "kl_loss": 0.19298407435417175, + "loss_ib": 0.0063394042663276196, + "step": 2958 + }, + { + "ce_ib": 6.633329391479492, + "ce_orig": 0.8166534900665283, + "epoch": 0.8506722266158603, + "kl_loss": 0.17902210354804993, + "loss_ib": 0.008423550054430962, + "step": 2958 + }, + { + "ce_ib": 4.46525239944458, + "ce_orig": 0.807449996471405, + "epoch": 0.8506722266158603, + "kl_loss": 0.25444549322128296, + "loss_ib": 0.0070097073912620544, + "step": 2958 + }, + { + "ce_ib": 3.6922526359558105, + "ce_orig": 1.0776283740997314, + "epoch": 0.8506722266158603, + "kl_loss": 0.193572998046875, + "loss_ib": 0.005627982318401337, + "step": 2958 + }, + { + "ce_ib": 4.886637210845947, + "ce_orig": 0.9113212823867798, + "epoch": 0.8509598101948379, + "kl_loss": 0.2511560320854187, + "loss_ib": 0.007398197427392006, + "step": 2959 + }, + { + "ce_ib": 3.201608657836914, + "ce_orig": 0.8645471334457397, + "epoch": 0.8509598101948379, + "kl_loss": 0.08205005526542664, + "loss_ib": 0.004022109322249889, + "step": 2959 + }, + { + "ce_ib": 2.953087568283081, + "ce_orig": 0.8891538977622986, + "epoch": 0.8509598101948379, + "kl_loss": 0.2314966768026352, + "loss_ib": 0.00526805454865098, + "step": 2959 + }, + { + "ce_ib": 3.212994337081909, + "ce_orig": 0.6963291764259338, + "epoch": 0.8509598101948379, + "kl_loss": 0.17277084290981293, + "loss_ib": 0.0049407025799155235, + "step": 2959 + }, + { + "epoch": 0.8512473937738155, + "grad_norm": 0.17067305743694305, + "learning_rate": 8.412765716093273e-06, + "loss": 0.8596, + "step": 2960 + }, + { + "ce_ib": 2.8538103103637695, + "ce_orig": 0.592431902885437, + "epoch": 0.8512473937738155, + "kl_loss": 0.15411598980426788, + "loss_ib": 0.004394970368593931, + "step": 2960 + }, + { + "ce_ib": 3.235257625579834, + "ce_orig": 0.6062502861022949, + "epoch": 0.8512473937738155, + "kl_loss": 0.1507919281721115, + "loss_ib": 0.0047431765124201775, + "step": 2960 + }, + { + "ce_ib": 7.460322380065918, + "ce_orig": 1.7360141277313232, + "epoch": 0.8512473937738155, + "kl_loss": 0.11055456101894379, + "loss_ib": 0.008565868251025677, + "step": 2960 + }, + { + "ce_ib": 4.954519748687744, + "ce_orig": 1.078063726425171, + "epoch": 0.8512473937738155, + "kl_loss": 0.1487312614917755, + "loss_ib": 0.006441832520067692, + "step": 2960 + }, + { + "ce_ib": 3.5400893688201904, + "ce_orig": 0.8485971689224243, + "epoch": 0.8515349773527932, + "kl_loss": 0.13005240261554718, + "loss_ib": 0.004840613342821598, + "step": 2961 + }, + { + "ce_ib": 5.3483757972717285, + "ce_orig": 1.4278228282928467, + "epoch": 0.8515349773527932, + "kl_loss": 0.10780370235443115, + "loss_ib": 0.006426413077861071, + "step": 2961 + }, + { + "ce_ib": 4.224776744842529, + "ce_orig": 0.864602267742157, + "epoch": 0.8515349773527932, + "kl_loss": 0.30043745040893555, + "loss_ib": 0.0072291516698896885, + "step": 2961 + }, + { + "ce_ib": 2.64499831199646, + "ce_orig": 0.4963012933731079, + "epoch": 0.8515349773527932, + "kl_loss": 0.1546255499124527, + "loss_ib": 0.004191253799945116, + "step": 2961 + }, + { + "ce_ib": 3.1556575298309326, + "ce_orig": 0.9476629495620728, + "epoch": 0.8518225609317708, + "kl_loss": 0.18631252646446228, + "loss_ib": 0.005018782336264849, + "step": 2962 + }, + { + "ce_ib": 7.266080856323242, + "ce_orig": 1.580652117729187, + "epoch": 0.8518225609317708, + "kl_loss": 0.1501215249300003, + "loss_ib": 0.008767295628786087, + "step": 2962 + }, + { + "ce_ib": 4.412817001342773, + "ce_orig": 0.7905393838882446, + "epoch": 0.8518225609317708, + "kl_loss": 0.19279643893241882, + "loss_ib": 0.0063407812267541885, + "step": 2962 + }, + { + "ce_ib": 4.736240863800049, + "ce_orig": 0.9638208150863647, + "epoch": 0.8518225609317708, + "kl_loss": 0.12784332036972046, + "loss_ib": 0.006014673504978418, + "step": 2962 + }, + { + "ce_ib": 5.072812080383301, + "ce_orig": 0.8128719925880432, + "epoch": 0.8521101445107484, + "kl_loss": 0.16831374168395996, + "loss_ib": 0.006755949929356575, + "step": 2963 + }, + { + "ce_ib": 1.829398274421692, + "ce_orig": 0.5640958547592163, + "epoch": 0.8521101445107484, + "kl_loss": 0.13411983847618103, + "loss_ib": 0.003170596668496728, + "step": 2963 + }, + { + "ce_ib": 2.987241744995117, + "ce_orig": 0.2718130946159363, + "epoch": 0.8521101445107484, + "kl_loss": 0.3280700445175171, + "loss_ib": 0.006267942022532225, + "step": 2963 + }, + { + "ce_ib": 2.1400675773620605, + "ce_orig": 0.6326172947883606, + "epoch": 0.8521101445107484, + "kl_loss": 0.16512084007263184, + "loss_ib": 0.0037912761326879263, + "step": 2963 + }, + { + "ce_ib": 5.2689900398254395, + "ce_orig": 1.105605125427246, + "epoch": 0.852397728089726, + "kl_loss": 0.21548780798912048, + "loss_ib": 0.00742386793717742, + "step": 2964 + }, + { + "ce_ib": 6.297221660614014, + "ce_orig": 1.3161224126815796, + "epoch": 0.852397728089726, + "kl_loss": 0.2339518964290619, + "loss_ib": 0.008636740036308765, + "step": 2964 + }, + { + "ce_ib": 2.5215377807617188, + "ce_orig": 0.7098166346549988, + "epoch": 0.852397728089726, + "kl_loss": 0.137304425239563, + "loss_ib": 0.00389458192512393, + "step": 2964 + }, + { + "ce_ib": 2.6957151889801025, + "ce_orig": 0.5771493315696716, + "epoch": 0.852397728089726, + "kl_loss": 0.1604924499988556, + "loss_ib": 0.004300639498978853, + "step": 2964 + }, + { + "epoch": 0.8526853116687038, + "grad_norm": 0.13448046147823334, + "learning_rate": 8.407089697969458e-06, + "loss": 0.8648, + "step": 2965 + }, + { + "ce_ib": 3.0287673473358154, + "ce_orig": 0.5765299201011658, + "epoch": 0.8526853116687038, + "kl_loss": 0.22049295902252197, + "loss_ib": 0.005233696661889553, + "step": 2965 + }, + { + "ce_ib": 4.762798309326172, + "ce_orig": 0.7070713043212891, + "epoch": 0.8526853116687038, + "kl_loss": 0.2752772569656372, + "loss_ib": 0.00751557108014822, + "step": 2965 + }, + { + "ce_ib": 4.6227569580078125, + "ce_orig": 0.49827519059181213, + "epoch": 0.8526853116687038, + "kl_loss": 0.2227911502122879, + "loss_ib": 0.00685066869482398, + "step": 2965 + }, + { + "ce_ib": 5.372928142547607, + "ce_orig": 1.3953849077224731, + "epoch": 0.8526853116687038, + "kl_loss": 0.12743832170963287, + "loss_ib": 0.0066473111510276794, + "step": 2965 + }, + { + "ce_ib": 4.215069770812988, + "ce_orig": 0.7439747452735901, + "epoch": 0.8529728952476814, + "kl_loss": 0.23111078143119812, + "loss_ib": 0.006526177283376455, + "step": 2966 + }, + { + "ce_ib": 2.53460431098938, + "ce_orig": 0.6148127317428589, + "epoch": 0.8529728952476814, + "kl_loss": 0.16759958863258362, + "loss_ib": 0.004210600163787603, + "step": 2966 + }, + { + "ce_ib": 2.678927183151245, + "ce_orig": 0.5285096764564514, + "epoch": 0.8529728952476814, + "kl_loss": 0.12035226076841354, + "loss_ib": 0.003882449818775058, + "step": 2966 + }, + { + "ce_ib": 5.239190578460693, + "ce_orig": 1.0615259408950806, + "epoch": 0.8529728952476814, + "kl_loss": 0.21074168384075165, + "loss_ib": 0.007346607279032469, + "step": 2966 + }, + { + "ce_ib": 3.778083562850952, + "ce_orig": 0.9650173783302307, + "epoch": 0.853260478826659, + "kl_loss": 0.18498679995536804, + "loss_ib": 0.005627951584756374, + "step": 2967 + }, + { + "ce_ib": 6.7100043296813965, + "ce_orig": 1.7610026597976685, + "epoch": 0.853260478826659, + "kl_loss": 0.18536663055419922, + "loss_ib": 0.008563670329749584, + "step": 2967 + }, + { + "ce_ib": 3.3127918243408203, + "ce_orig": 0.717430830001831, + "epoch": 0.853260478826659, + "kl_loss": 0.17499461770057678, + "loss_ib": 0.0050627379678189754, + "step": 2967 + }, + { + "ce_ib": 1.9449009895324707, + "ce_orig": 0.5779984593391418, + "epoch": 0.853260478826659, + "kl_loss": 0.12659773230552673, + "loss_ib": 0.0032108782324939966, + "step": 2967 + }, + { + "ce_ib": 4.317677021026611, + "ce_orig": 0.9004755616188049, + "epoch": 0.8535480624056366, + "kl_loss": 0.13967551290988922, + "loss_ib": 0.005714431870728731, + "step": 2968 + }, + { + "ce_ib": 3.5088250637054443, + "ce_orig": 0.553810179233551, + "epoch": 0.8535480624056366, + "kl_loss": 0.29036056995391846, + "loss_ib": 0.006412430666387081, + "step": 2968 + }, + { + "ce_ib": 6.0417938232421875, + "ce_orig": 1.3496768474578857, + "epoch": 0.8535480624056366, + "kl_loss": 0.15871095657348633, + "loss_ib": 0.00762890325859189, + "step": 2968 + }, + { + "ce_ib": 5.050684928894043, + "ce_orig": 1.2064789533615112, + "epoch": 0.8535480624056366, + "kl_loss": 0.19189035892486572, + "loss_ib": 0.006969588343054056, + "step": 2968 + }, + { + "ce_ib": 4.12687349319458, + "ce_orig": 0.5823162794113159, + "epoch": 0.8538356459846143, + "kl_loss": 0.26785823702812195, + "loss_ib": 0.006805455312132835, + "step": 2969 + }, + { + "ce_ib": 6.669224739074707, + "ce_orig": 1.5886961221694946, + "epoch": 0.8538356459846143, + "kl_loss": 0.14328455924987793, + "loss_ib": 0.008102070540189743, + "step": 2969 + }, + { + "ce_ib": 3.7932491302490234, + "ce_orig": 0.681464672088623, + "epoch": 0.8538356459846143, + "kl_loss": 0.25402891635894775, + "loss_ib": 0.006333538331091404, + "step": 2969 + }, + { + "ce_ib": 5.443696975708008, + "ce_orig": 0.9264258742332458, + "epoch": 0.8538356459846143, + "kl_loss": 0.143731951713562, + "loss_ib": 0.006881016306579113, + "step": 2969 + }, + { + "epoch": 0.8541232295635919, + "grad_norm": 0.12369820475578308, + "learning_rate": 8.401405471375757e-06, + "loss": 0.87, + "step": 2970 + }, + { + "ce_ib": 2.32771372795105, + "ce_orig": 0.4015655219554901, + "epoch": 0.8541232295635919, + "kl_loss": 0.16326454281806946, + "loss_ib": 0.003960358910262585, + "step": 2970 + }, + { + "ce_ib": 3.1813879013061523, + "ce_orig": 0.6704120635986328, + "epoch": 0.8541232295635919, + "kl_loss": 0.15537288784980774, + "loss_ib": 0.004735116846859455, + "step": 2970 + }, + { + "ce_ib": 2.6516852378845215, + "ce_orig": 0.8014650940895081, + "epoch": 0.8541232295635919, + "kl_loss": 0.11453309655189514, + "loss_ib": 0.003797016106545925, + "step": 2970 + }, + { + "ce_ib": 4.313230514526367, + "ce_orig": 1.0053460597991943, + "epoch": 0.8541232295635919, + "kl_loss": 0.15701556205749512, + "loss_ib": 0.005883385892957449, + "step": 2970 + }, + { + "ce_ib": 3.6376211643218994, + "ce_orig": 0.875159740447998, + "epoch": 0.8544108131425696, + "kl_loss": 0.2562505900859833, + "loss_ib": 0.006200126837939024, + "step": 2971 + }, + { + "ce_ib": 8.572291374206543, + "ce_orig": 1.884285569190979, + "epoch": 0.8544108131425696, + "kl_loss": 0.22660133242607117, + "loss_ib": 0.01083830464631319, + "step": 2971 + }, + { + "ce_ib": 4.299160003662109, + "ce_orig": 1.0389413833618164, + "epoch": 0.8544108131425696, + "kl_loss": 0.2627021372318268, + "loss_ib": 0.006926181260496378, + "step": 2971 + }, + { + "ce_ib": 1.8871734142303467, + "ce_orig": 0.4395648241043091, + "epoch": 0.8544108131425696, + "kl_loss": 0.09295320510864258, + "loss_ib": 0.002816705731675029, + "step": 2971 + }, + { + "ce_ib": 2.382647752761841, + "ce_orig": 0.59117192029953, + "epoch": 0.8546983967215472, + "kl_loss": 0.19237692654132843, + "loss_ib": 0.00430641695857048, + "step": 2972 + }, + { + "ce_ib": 3.155672073364258, + "ce_orig": 0.6831037402153015, + "epoch": 0.8546983967215472, + "kl_loss": 0.15743333101272583, + "loss_ib": 0.004730005282908678, + "step": 2972 + }, + { + "ce_ib": 7.707786560058594, + "ce_orig": 1.712813138961792, + "epoch": 0.8546983967215472, + "kl_loss": 0.2685178220272064, + "loss_ib": 0.010392964817583561, + "step": 2972 + }, + { + "ce_ib": 2.7355971336364746, + "ce_orig": 0.2675800025463104, + "epoch": 0.8546983967215472, + "kl_loss": 0.2867012619972229, + "loss_ib": 0.0056026098318398, + "step": 2972 + }, + { + "ce_ib": 4.186552047729492, + "ce_orig": 0.6689779162406921, + "epoch": 0.8549859803005249, + "kl_loss": 0.2695651650428772, + "loss_ib": 0.006882203742861748, + "step": 2973 + }, + { + "ce_ib": 7.103358268737793, + "ce_orig": 1.3846296072006226, + "epoch": 0.8549859803005249, + "kl_loss": 0.32631537318229675, + "loss_ib": 0.010366512462496758, + "step": 2973 + }, + { + "ce_ib": 2.423001289367676, + "ce_orig": 0.48849350214004517, + "epoch": 0.8549859803005249, + "kl_loss": 0.15834078192710876, + "loss_ib": 0.004006409086287022, + "step": 2973 + }, + { + "ce_ib": 1.6791139841079712, + "ce_orig": 0.3320811092853546, + "epoch": 0.8549859803005249, + "kl_loss": 0.36685290932655334, + "loss_ib": 0.005347642581909895, + "step": 2973 + }, + { + "ce_ib": 5.69830846786499, + "ce_orig": 1.3763846158981323, + "epoch": 0.8552735638795025, + "kl_loss": 0.14218086004257202, + "loss_ib": 0.007120116613805294, + "step": 2974 + }, + { + "ce_ib": 3.1831507682800293, + "ce_orig": 0.5675821900367737, + "epoch": 0.8552735638795025, + "kl_loss": 0.2230696827173233, + "loss_ib": 0.0054138475097715855, + "step": 2974 + }, + { + "ce_ib": 3.4734995365142822, + "ce_orig": 0.693574845790863, + "epoch": 0.8552735638795025, + "kl_loss": 0.29138320684432983, + "loss_ib": 0.006387331523001194, + "step": 2974 + }, + { + "ce_ib": 2.290343999862671, + "ce_orig": 0.6538745164871216, + "epoch": 0.8552735638795025, + "kl_loss": 0.11931988596916199, + "loss_ib": 0.0034835427068173885, + "step": 2974 + }, + { + "epoch": 0.8555611474584801, + "grad_norm": 0.15045621991157532, + "learning_rate": 8.395713050006793e-06, + "loss": 0.8889, + "step": 2975 + }, + { + "ce_ib": 5.352742671966553, + "ce_orig": 0.9939350485801697, + "epoch": 0.8555611474584801, + "kl_loss": 0.15542243421077728, + "loss_ib": 0.006906967144459486, + "step": 2975 + }, + { + "ce_ib": 4.543302059173584, + "ce_orig": 1.0180364847183228, + "epoch": 0.8555611474584801, + "kl_loss": 0.16144771873950958, + "loss_ib": 0.006157779134809971, + "step": 2975 + }, + { + "ce_ib": 4.6210479736328125, + "ce_orig": 0.9617406129837036, + "epoch": 0.8555611474584801, + "kl_loss": 0.12812790274620056, + "loss_ib": 0.0059023271314799786, + "step": 2975 + }, + { + "ce_ib": 2.5872647762298584, + "ce_orig": 0.5044644474983215, + "epoch": 0.8555611474584801, + "kl_loss": 0.2180185616016388, + "loss_ib": 0.004767450504004955, + "step": 2975 + }, + { + "ce_ib": 2.8784983158111572, + "ce_orig": 0.9585151672363281, + "epoch": 0.8558487310374577, + "kl_loss": 0.14945240318775177, + "loss_ib": 0.0043730223551392555, + "step": 2976 + }, + { + "ce_ib": 8.066974639892578, + "ce_orig": 1.7447173595428467, + "epoch": 0.8558487310374577, + "kl_loss": 0.1547113060951233, + "loss_ib": 0.009614087641239166, + "step": 2976 + }, + { + "ce_ib": 2.7234621047973633, + "ce_orig": 0.48713457584381104, + "epoch": 0.8558487310374577, + "kl_loss": 0.18933795392513275, + "loss_ib": 0.004616841673851013, + "step": 2976 + }, + { + "ce_ib": 2.03658127784729, + "ce_orig": 0.5643510818481445, + "epoch": 0.8558487310374577, + "kl_loss": 0.20340237021446228, + "loss_ib": 0.004070605151355267, + "step": 2976 + }, + { + "ce_ib": 4.62175178527832, + "ce_orig": 1.165268898010254, + "epoch": 0.8561363146164354, + "kl_loss": 0.1961984634399414, + "loss_ib": 0.006583736278116703, + "step": 2977 + }, + { + "ce_ib": 5.398066520690918, + "ce_orig": 1.500983476638794, + "epoch": 0.8561363146164354, + "kl_loss": 0.2097923457622528, + "loss_ib": 0.007495989557355642, + "step": 2977 + }, + { + "ce_ib": 6.266430854797363, + "ce_orig": 1.3154008388519287, + "epoch": 0.8561363146164354, + "kl_loss": 0.19246898591518402, + "loss_ib": 0.008191120810806751, + "step": 2977 + }, + { + "ce_ib": 1.5026779174804688, + "ce_orig": 0.3540169298648834, + "epoch": 0.8561363146164354, + "kl_loss": 0.13055400550365448, + "loss_ib": 0.002808217890560627, + "step": 2977 + }, + { + "ce_ib": 5.414214134216309, + "ce_orig": 1.053859829902649, + "epoch": 0.8564238981954131, + "kl_loss": 0.17202094197273254, + "loss_ib": 0.00713442312553525, + "step": 2978 + }, + { + "ce_ib": 5.503726959228516, + "ce_orig": 1.2527791261672974, + "epoch": 0.8564238981954131, + "kl_loss": 0.20842045545578003, + "loss_ib": 0.007587931584566832, + "step": 2978 + }, + { + "ce_ib": 6.182973861694336, + "ce_orig": 1.6710340976715088, + "epoch": 0.8564238981954131, + "kl_loss": 0.18958091735839844, + "loss_ib": 0.008078782819211483, + "step": 2978 + }, + { + "ce_ib": 2.1381382942199707, + "ce_orig": 0.7164555191993713, + "epoch": 0.8564238981954131, + "kl_loss": 0.13109391927719116, + "loss_ib": 0.003449077485129237, + "step": 2978 + }, + { + "ce_ib": 3.5863914489746094, + "ce_orig": 0.4447256326675415, + "epoch": 0.8567114817743907, + "kl_loss": 0.14463555812835693, + "loss_ib": 0.005032746586948633, + "step": 2979 + }, + { + "ce_ib": 5.353610992431641, + "ce_orig": 1.1203068494796753, + "epoch": 0.8567114817743907, + "kl_loss": 0.27223941683769226, + "loss_ib": 0.008076004683971405, + "step": 2979 + }, + { + "ce_ib": 5.3613667488098145, + "ce_orig": 0.8839277625083923, + "epoch": 0.8567114817743907, + "kl_loss": 0.20378506183624268, + "loss_ib": 0.00739921722561121, + "step": 2979 + }, + { + "ce_ib": 1.6910353899002075, + "ce_orig": 0.3850093483924866, + "epoch": 0.8567114817743907, + "kl_loss": 0.2173919826745987, + "loss_ib": 0.0038649551570415497, + "step": 2979 + }, + { + "epoch": 0.8569990653533683, + "grad_norm": 0.13861577212810516, + "learning_rate": 8.390012447576931e-06, + "loss": 0.8788, + "step": 2980 + }, + { + "ce_ib": 4.916282653808594, + "ce_orig": 1.1274889707565308, + "epoch": 0.8569990653533683, + "kl_loss": 0.17775681614875793, + "loss_ib": 0.006693850737065077, + "step": 2980 + }, + { + "ce_ib": 3.354749917984009, + "ce_orig": 0.4847354590892792, + "epoch": 0.8569990653533683, + "kl_loss": 0.17642588913440704, + "loss_ib": 0.00511900894343853, + "step": 2980 + }, + { + "ce_ib": 3.7311792373657227, + "ce_orig": 0.8150086998939514, + "epoch": 0.8569990653533683, + "kl_loss": 0.2533571422100067, + "loss_ib": 0.006264750845730305, + "step": 2980 + }, + { + "ce_ib": 2.51065993309021, + "ce_orig": 0.5653507709503174, + "epoch": 0.8569990653533683, + "kl_loss": 0.12129000574350357, + "loss_ib": 0.0037235599011182785, + "step": 2980 + }, + { + "ce_ib": 1.9455848932266235, + "ce_orig": 0.4901646375656128, + "epoch": 0.857286648932346, + "kl_loss": 0.22917205095291138, + "loss_ib": 0.004237305372953415, + "step": 2981 + }, + { + "ce_ib": 6.209074974060059, + "ce_orig": 1.3893464803695679, + "epoch": 0.857286648932346, + "kl_loss": 0.213079571723938, + "loss_ib": 0.00833987072110176, + "step": 2981 + }, + { + "ce_ib": 3.5128471851348877, + "ce_orig": 0.8023747801780701, + "epoch": 0.857286648932346, + "kl_loss": 0.26800256967544556, + "loss_ib": 0.006192872300744057, + "step": 2981 + }, + { + "ce_ib": 2.8090124130249023, + "ce_orig": 0.7876785397529602, + "epoch": 0.857286648932346, + "kl_loss": 0.16606995463371277, + "loss_ib": 0.004469711799174547, + "step": 2981 + }, + { + "ce_ib": 3.8154067993164062, + "ce_orig": 0.36259564757347107, + "epoch": 0.8575742325113236, + "kl_loss": 0.22752967476844788, + "loss_ib": 0.006090703420341015, + "step": 2982 + }, + { + "ce_ib": 4.014313220977783, + "ce_orig": 0.5754386186599731, + "epoch": 0.8575742325113236, + "kl_loss": 0.14469796419143677, + "loss_ib": 0.005461292807012796, + "step": 2982 + }, + { + "ce_ib": 5.612699508666992, + "ce_orig": 0.8713221549987793, + "epoch": 0.8575742325113236, + "kl_loss": 0.27516961097717285, + "loss_ib": 0.00836439523845911, + "step": 2982 + }, + { + "ce_ib": 3.676208257675171, + "ce_orig": 0.7132762670516968, + "epoch": 0.8575742325113236, + "kl_loss": 0.09339886158704758, + "loss_ib": 0.004610196687281132, + "step": 2982 + }, + { + "ce_ib": 3.723997116088867, + "ce_orig": 0.5823830962181091, + "epoch": 0.8578618160903012, + "kl_loss": 0.23006170988082886, + "loss_ib": 0.006024614442139864, + "step": 2983 + }, + { + "ce_ib": 6.040067195892334, + "ce_orig": 1.2723535299301147, + "epoch": 0.8578618160903012, + "kl_loss": 0.23192352056503296, + "loss_ib": 0.008359301835298538, + "step": 2983 + }, + { + "ce_ib": 2.7833971977233887, + "ce_orig": 0.8930437564849854, + "epoch": 0.8578618160903012, + "kl_loss": 0.11118154227733612, + "loss_ib": 0.0038952124305069447, + "step": 2983 + }, + { + "ce_ib": 5.695436954498291, + "ce_orig": 1.3627220392227173, + "epoch": 0.8578618160903012, + "kl_loss": 0.1550302356481552, + "loss_ib": 0.007245739456266165, + "step": 2983 + }, + { + "ce_ib": 5.908705711364746, + "ce_orig": 1.441714882850647, + "epoch": 0.8581493996692788, + "kl_loss": 0.17177727818489075, + "loss_ib": 0.007626478560268879, + "step": 2984 + }, + { + "ce_ib": 7.352113723754883, + "ce_orig": 1.374422311782837, + "epoch": 0.8581493996692788, + "kl_loss": 0.17505773901939392, + "loss_ib": 0.00910269096493721, + "step": 2984 + }, + { + "ce_ib": 1.1674845218658447, + "ce_orig": 0.21033094823360443, + "epoch": 0.8581493996692788, + "kl_loss": 0.3936414420604706, + "loss_ib": 0.0051038991659879684, + "step": 2984 + }, + { + "ce_ib": 2.513113021850586, + "ce_orig": 0.7041570544242859, + "epoch": 0.8581493996692788, + "kl_loss": 0.18227624893188477, + "loss_ib": 0.0043358756229281425, + "step": 2984 + }, + { + "epoch": 0.8584369832482566, + "grad_norm": 0.1366492211818695, + "learning_rate": 8.384303677820244e-06, + "loss": 0.825, + "step": 2985 + }, + { + "ce_ib": 3.718862295150757, + "ce_orig": 0.7346978187561035, + "epoch": 0.8584369832482566, + "kl_loss": 0.1767883002758026, + "loss_ib": 0.005486744921654463, + "step": 2985 + }, + { + "ce_ib": 3.2781970500946045, + "ce_orig": 0.7281125783920288, + "epoch": 0.8584369832482566, + "kl_loss": 0.11536775529384613, + "loss_ib": 0.004431874491274357, + "step": 2985 + }, + { + "ce_ib": 6.139727592468262, + "ce_orig": 1.4238390922546387, + "epoch": 0.8584369832482566, + "kl_loss": 0.19449162483215332, + "loss_ib": 0.008084643632173538, + "step": 2985 + }, + { + "ce_ib": 6.335012435913086, + "ce_orig": 1.5576012134552002, + "epoch": 0.8584369832482566, + "kl_loss": 0.20960426330566406, + "loss_ib": 0.008431054651737213, + "step": 2985 + }, + { + "ce_ib": 4.708496570587158, + "ce_orig": 0.7799429893493652, + "epoch": 0.8587245668272342, + "kl_loss": 0.2956230938434601, + "loss_ib": 0.007664727512747049, + "step": 2986 + }, + { + "ce_ib": 3.8033554553985596, + "ce_orig": 1.075318694114685, + "epoch": 0.8587245668272342, + "kl_loss": 0.1524268090724945, + "loss_ib": 0.005327623337507248, + "step": 2986 + }, + { + "ce_ib": 5.771701812744141, + "ce_orig": 1.2864129543304443, + "epoch": 0.8587245668272342, + "kl_loss": 0.23551759123802185, + "loss_ib": 0.008126877248287201, + "step": 2986 + }, + { + "ce_ib": 3.5633580684661865, + "ce_orig": 0.8883635997772217, + "epoch": 0.8587245668272342, + "kl_loss": 0.16270112991333008, + "loss_ib": 0.0051903692074120045, + "step": 2986 + }, + { + "ce_ib": 3.488046884536743, + "ce_orig": 0.7503054141998291, + "epoch": 0.8590121504062118, + "kl_loss": 0.23679843544960022, + "loss_ib": 0.005856031086295843, + "step": 2987 + }, + { + "ce_ib": 2.880974292755127, + "ce_orig": 0.586113691329956, + "epoch": 0.8590121504062118, + "kl_loss": 0.14423543214797974, + "loss_ib": 0.004323328845202923, + "step": 2987 + }, + { + "ce_ib": 4.842213153839111, + "ce_orig": 1.0663623809814453, + "epoch": 0.8590121504062118, + "kl_loss": 0.2068026065826416, + "loss_ib": 0.0069102393463253975, + "step": 2987 + }, + { + "ce_ib": 6.419052600860596, + "ce_orig": 0.9741712212562561, + "epoch": 0.8590121504062118, + "kl_loss": 0.16354122757911682, + "loss_ib": 0.008054465055465698, + "step": 2987 + }, + { + "ce_ib": 2.863057851791382, + "ce_orig": 0.6474774479866028, + "epoch": 0.8592997339851894, + "kl_loss": 0.16997593641281128, + "loss_ib": 0.004562817048281431, + "step": 2988 + }, + { + "ce_ib": 5.92358922958374, + "ce_orig": 1.41544508934021, + "epoch": 0.8592997339851894, + "kl_loss": 0.22443082928657532, + "loss_ib": 0.00816789735108614, + "step": 2988 + }, + { + "ce_ib": 1.9370551109313965, + "ce_orig": 0.40236911177635193, + "epoch": 0.8592997339851894, + "kl_loss": 0.17839156091213226, + "loss_ib": 0.003720970591530204, + "step": 2988 + }, + { + "ce_ib": 4.112259864807129, + "ce_orig": 1.0245790481567383, + "epoch": 0.8592997339851894, + "kl_loss": 0.19733837246894836, + "loss_ib": 0.006085643544793129, + "step": 2988 + }, + { + "ce_ib": 5.073434829711914, + "ce_orig": 0.9975028038024902, + "epoch": 0.8595873175641671, + "kl_loss": 0.27723363041877747, + "loss_ib": 0.007845771498978138, + "step": 2989 + }, + { + "ce_ib": 3.7721898555755615, + "ce_orig": 0.6361210346221924, + "epoch": 0.8595873175641671, + "kl_loss": 0.22449322044849396, + "loss_ib": 0.0060171219520270824, + "step": 2989 + }, + { + "ce_ib": 0.7623528242111206, + "ce_orig": 0.14674201607704163, + "epoch": 0.8595873175641671, + "kl_loss": 0.4075654149055481, + "loss_ib": 0.004838007036596537, + "step": 2989 + }, + { + "ce_ib": 3.3159894943237305, + "ce_orig": 0.8190420269966125, + "epoch": 0.8595873175641671, + "kl_loss": 0.26408734917640686, + "loss_ib": 0.0059568630531430244, + "step": 2989 + }, + { + "epoch": 0.8598749011431447, + "grad_norm": 0.10596659034490585, + "learning_rate": 8.378586754490483e-06, + "loss": 0.8369, + "step": 2990 + }, + { + "ce_ib": 3.150998115539551, + "ce_orig": 0.7614936828613281, + "epoch": 0.8598749011431447, + "kl_loss": 0.15050023794174194, + "loss_ib": 0.004656000528484583, + "step": 2990 + }, + { + "ce_ib": 6.584854602813721, + "ce_orig": 1.3150067329406738, + "epoch": 0.8598749011431447, + "kl_loss": 0.20752540230751038, + "loss_ib": 0.008660108782351017, + "step": 2990 + }, + { + "ce_ib": 2.7269551753997803, + "ce_orig": 0.6325028538703918, + "epoch": 0.8598749011431447, + "kl_loss": 0.1750892698764801, + "loss_ib": 0.004477847833186388, + "step": 2990 + }, + { + "ce_ib": 6.839788436889648, + "ce_orig": 1.768015742301941, + "epoch": 0.8598749011431447, + "kl_loss": 0.16023500263690948, + "loss_ib": 0.008442138321697712, + "step": 2990 + }, + { + "ce_ib": 2.470526695251465, + "ce_orig": 0.5935539603233337, + "epoch": 0.8601624847221223, + "kl_loss": 0.1589840203523636, + "loss_ib": 0.004060366656631231, + "step": 2991 + }, + { + "ce_ib": 6.190135478973389, + "ce_orig": 1.5965759754180908, + "epoch": 0.8601624847221223, + "kl_loss": 0.17117367684841156, + "loss_ib": 0.007901872508227825, + "step": 2991 + }, + { + "ce_ib": 6.627017974853516, + "ce_orig": 1.4191302061080933, + "epoch": 0.8601624847221223, + "kl_loss": 0.21776404976844788, + "loss_ib": 0.00880465842783451, + "step": 2991 + }, + { + "ce_ib": 4.46639347076416, + "ce_orig": 0.9868731498718262, + "epoch": 0.8601624847221223, + "kl_loss": 0.09350216388702393, + "loss_ib": 0.0054014152847230434, + "step": 2991 + }, + { + "ce_ib": 4.430308818817139, + "ce_orig": 0.7406636476516724, + "epoch": 0.8604500683011, + "kl_loss": 0.25927644968032837, + "loss_ib": 0.0070230732671916485, + "step": 2992 + }, + { + "ce_ib": 1.1932636499404907, + "ce_orig": 0.4177144467830658, + "epoch": 0.8604500683011, + "kl_loss": 0.09548039734363556, + "loss_ib": 0.0021480675786733627, + "step": 2992 + }, + { + "ce_ib": 5.048689365386963, + "ce_orig": 1.0911771059036255, + "epoch": 0.8604500683011, + "kl_loss": 0.21020181477069855, + "loss_ib": 0.007150707300752401, + "step": 2992 + }, + { + "ce_ib": 4.805177211761475, + "ce_orig": 0.7214016914367676, + "epoch": 0.8604500683011, + "kl_loss": 0.23380136489868164, + "loss_ib": 0.007143191061913967, + "step": 2992 + }, + { + "ce_ib": 3.072817802429199, + "ce_orig": 0.9012374877929688, + "epoch": 0.8607376518800777, + "kl_loss": 0.15334683656692505, + "loss_ib": 0.004606286063790321, + "step": 2993 + }, + { + "ce_ib": 3.2643120288848877, + "ce_orig": 0.648951530456543, + "epoch": 0.8607376518800777, + "kl_loss": 0.1993439644575119, + "loss_ib": 0.005257751792669296, + "step": 2993 + }, + { + "ce_ib": 5.807519912719727, + "ce_orig": 1.3328781127929688, + "epoch": 0.8607376518800777, + "kl_loss": 0.1537371426820755, + "loss_ib": 0.00734489131718874, + "step": 2993 + }, + { + "ce_ib": 4.729267120361328, + "ce_orig": 1.1192653179168701, + "epoch": 0.8607376518800777, + "kl_loss": 0.14197349548339844, + "loss_ib": 0.006149002350866795, + "step": 2993 + }, + { + "ce_ib": 2.4578466415405273, + "ce_orig": 0.7148013114929199, + "epoch": 0.8610252354590553, + "kl_loss": 0.1689370572566986, + "loss_ib": 0.004147217143326998, + "step": 2994 + }, + { + "ce_ib": 4.9760260581970215, + "ce_orig": 0.7658789753913879, + "epoch": 0.8610252354590553, + "kl_loss": 0.28641277551651, + "loss_ib": 0.007840153761208057, + "step": 2994 + }, + { + "ce_ib": 3.9919309616088867, + "ce_orig": 0.8315105438232422, + "epoch": 0.8610252354590553, + "kl_loss": 0.2282397747039795, + "loss_ib": 0.00627432856708765, + "step": 2994 + }, + { + "ce_ib": 4.871729850769043, + "ce_orig": 1.098596453666687, + "epoch": 0.8610252354590553, + "kl_loss": 0.18684494495391846, + "loss_ib": 0.0067401789128780365, + "step": 2994 + }, + { + "epoch": 0.8613128190380329, + "grad_norm": 0.1303800791501999, + "learning_rate": 8.372861691361044e-06, + "loss": 0.8676, + "step": 2995 + }, + { + "ce_ib": 2.466981887817383, + "ce_orig": 0.5816183686256409, + "epoch": 0.8613128190380329, + "kl_loss": 0.2223205715417862, + "loss_ib": 0.004690187517553568, + "step": 2995 + }, + { + "ce_ib": 2.121854543685913, + "ce_orig": 0.6220636367797852, + "epoch": 0.8613128190380329, + "kl_loss": 0.1483505368232727, + "loss_ib": 0.003605359699577093, + "step": 2995 + }, + { + "ce_ib": 1.8765342235565186, + "ce_orig": 0.5450969338417053, + "epoch": 0.8613128190380329, + "kl_loss": 0.16896703839302063, + "loss_ib": 0.003566204570233822, + "step": 2995 + }, + { + "ce_ib": 2.9437177181243896, + "ce_orig": 0.6614558100700378, + "epoch": 0.8613128190380329, + "kl_loss": 0.19010262191295624, + "loss_ib": 0.004844744224101305, + "step": 2995 + }, + { + "ce_ib": 3.4529480934143066, + "ce_orig": 0.7244323492050171, + "epoch": 0.8616004026170105, + "kl_loss": 0.13880489766597748, + "loss_ib": 0.004840996582061052, + "step": 2996 + }, + { + "ce_ib": 2.5768890380859375, + "ce_orig": 0.456135630607605, + "epoch": 0.8616004026170105, + "kl_loss": 0.218291774392128, + "loss_ib": 0.004759806673973799, + "step": 2996 + }, + { + "ce_ib": 4.2617950439453125, + "ce_orig": 0.886661946773529, + "epoch": 0.8616004026170105, + "kl_loss": 0.16381962597370148, + "loss_ib": 0.005899991374462843, + "step": 2996 + }, + { + "ce_ib": 5.616584777832031, + "ce_orig": 1.159299612045288, + "epoch": 0.8616004026170105, + "kl_loss": 0.19833976030349731, + "loss_ib": 0.007599982433021069, + "step": 2996 + }, + { + "ce_ib": 3.293844699859619, + "ce_orig": 0.6831731796264648, + "epoch": 0.8618879861959882, + "kl_loss": 0.19201332330703735, + "loss_ib": 0.005213978234678507, + "step": 2997 + }, + { + "ce_ib": 2.5857510566711426, + "ce_orig": 0.7857537269592285, + "epoch": 0.8618879861959882, + "kl_loss": 0.13751493394374847, + "loss_ib": 0.003960900008678436, + "step": 2997 + }, + { + "ce_ib": 3.603851795196533, + "ce_orig": 0.6430637240409851, + "epoch": 0.8618879861959882, + "kl_loss": 0.17404192686080933, + "loss_ib": 0.0053442707285285, + "step": 2997 + }, + { + "ce_ib": 1.960789680480957, + "ce_orig": 0.45327359437942505, + "epoch": 0.8618879861959882, + "kl_loss": 0.1968812793493271, + "loss_ib": 0.003929602447897196, + "step": 2997 + }, + { + "ce_ib": 3.7228963375091553, + "ce_orig": 0.2873280942440033, + "epoch": 0.8621755697749659, + "kl_loss": 0.15952713787555695, + "loss_ib": 0.005318167619407177, + "step": 2998 + }, + { + "ce_ib": 3.984111785888672, + "ce_orig": 1.010909080505371, + "epoch": 0.8621755697749659, + "kl_loss": 0.19310228526592255, + "loss_ib": 0.005915134213864803, + "step": 2998 + }, + { + "ce_ib": 2.526618719100952, + "ce_orig": 0.5332162976264954, + "epoch": 0.8621755697749659, + "kl_loss": 0.21336935460567474, + "loss_ib": 0.004660312552005053, + "step": 2998 + }, + { + "ce_ib": 1.7668520212173462, + "ce_orig": 0.542713463306427, + "epoch": 0.8621755697749659, + "kl_loss": 0.11394859105348587, + "loss_ib": 0.002906337846070528, + "step": 2998 + }, + { + "ce_ib": 4.356890678405762, + "ce_orig": 0.6856581568717957, + "epoch": 0.8624631533539435, + "kl_loss": 0.20584648847579956, + "loss_ib": 0.006415355019271374, + "step": 2999 + }, + { + "ce_ib": 4.757500648498535, + "ce_orig": 1.188984751701355, + "epoch": 0.8624631533539435, + "kl_loss": 0.13669447600841522, + "loss_ib": 0.006124445237219334, + "step": 2999 + }, + { + "ce_ib": 4.7899932861328125, + "ce_orig": 1.0844311714172363, + "epoch": 0.8624631533539435, + "kl_loss": 0.2915774881839752, + "loss_ib": 0.00770576810464263, + "step": 2999 + }, + { + "ce_ib": 5.037421703338623, + "ce_orig": 0.6441201567649841, + "epoch": 0.8624631533539435, + "kl_loss": 0.23180849850177765, + "loss_ib": 0.00735550606623292, + "step": 2999 + }, + { + "epoch": 0.8627507369329211, + "grad_norm": 0.13104727864265442, + "learning_rate": 8.367128502224931e-06, + "loss": 0.8361, + "step": 3000 + }, + { + "ce_ib": 1.9772206544876099, + "ce_orig": 0.5333869457244873, + "epoch": 0.8627507369329211, + "kl_loss": 0.1758229285478592, + "loss_ib": 0.0037354498635977507, + "step": 3000 + }, + { + "ce_ib": 4.860544681549072, + "ce_orig": 1.271416187286377, + "epoch": 0.8627507369329211, + "kl_loss": 0.17729312181472778, + "loss_ib": 0.006633475888520479, + "step": 3000 + }, + { + "ce_ib": 3.0738236904144287, + "ce_orig": 0.34895452857017517, + "epoch": 0.8627507369329211, + "kl_loss": 0.1731383353471756, + "loss_ib": 0.004805207252502441, + "step": 3000 + }, + { + "ce_ib": 5.707888603210449, + "ce_orig": 0.8969690203666687, + "epoch": 0.8627507369329211, + "kl_loss": 0.2259966880083084, + "loss_ib": 0.007967854849994183, + "step": 3000 + }, + { + "ce_ib": 1.1887364387512207, + "ce_orig": 0.19286949932575226, + "epoch": 0.8630383205118988, + "kl_loss": 0.38218259811401367, + "loss_ib": 0.0050105624832212925, + "step": 3001 + }, + { + "ce_ib": 1.5354338884353638, + "ce_orig": 0.4168838858604431, + "epoch": 0.8630383205118988, + "kl_loss": 0.20836549997329712, + "loss_ib": 0.003619089024141431, + "step": 3001 + }, + { + "ce_ib": 4.300173759460449, + "ce_orig": 0.9959496259689331, + "epoch": 0.8630383205118988, + "kl_loss": 0.16739241778850555, + "loss_ib": 0.005974097643047571, + "step": 3001 + }, + { + "ce_ib": 3.0195460319519043, + "ce_orig": 0.642698347568512, + "epoch": 0.8630383205118988, + "kl_loss": 0.20051544904708862, + "loss_ib": 0.0050246999599039555, + "step": 3001 + }, + { + "ce_ib": 2.4399235248565674, + "ce_orig": 0.6809800267219543, + "epoch": 0.8633259040908764, + "kl_loss": 0.11953020095825195, + "loss_ib": 0.003635225584730506, + "step": 3002 + }, + { + "ce_ib": 4.555309295654297, + "ce_orig": 1.141787052154541, + "epoch": 0.8633259040908764, + "kl_loss": 0.23614510893821716, + "loss_ib": 0.006916760001331568, + "step": 3002 + }, + { + "ce_ib": 2.8278424739837646, + "ce_orig": 0.6852285265922546, + "epoch": 0.8633259040908764, + "kl_loss": 0.14180883765220642, + "loss_ib": 0.004245930816978216, + "step": 3002 + }, + { + "ce_ib": 4.3479509353637695, + "ce_orig": 1.0186361074447632, + "epoch": 0.8633259040908764, + "kl_loss": 0.23219282925128937, + "loss_ib": 0.006669879425317049, + "step": 3002 + }, + { + "ce_ib": 4.622503280639648, + "ce_orig": 1.0129183530807495, + "epoch": 0.863613487669854, + "kl_loss": 0.1800311803817749, + "loss_ib": 0.006422815378755331, + "step": 3003 + }, + { + "ce_ib": 5.854118347167969, + "ce_orig": 0.982895016670227, + "epoch": 0.863613487669854, + "kl_loss": 0.17344164848327637, + "loss_ib": 0.007588534615933895, + "step": 3003 + }, + { + "ce_ib": 5.198338031768799, + "ce_orig": 1.0541749000549316, + "epoch": 0.863613487669854, + "kl_loss": 0.3520140051841736, + "loss_ib": 0.008718477562069893, + "step": 3003 + }, + { + "ce_ib": 4.521932125091553, + "ce_orig": 1.2909585237503052, + "epoch": 0.863613487669854, + "kl_loss": 0.19401347637176514, + "loss_ib": 0.006462067365646362, + "step": 3003 + }, + { + "ce_ib": 4.217136383056641, + "ce_orig": 0.8373450040817261, + "epoch": 0.8639010712488316, + "kl_loss": 0.18357031047344208, + "loss_ib": 0.006052839569747448, + "step": 3004 + }, + { + "ce_ib": 3.8651678562164307, + "ce_orig": 0.7563463449478149, + "epoch": 0.8639010712488316, + "kl_loss": 0.15264450013637543, + "loss_ib": 0.005391612648963928, + "step": 3004 + }, + { + "ce_ib": 2.9442851543426514, + "ce_orig": 0.8526968359947205, + "epoch": 0.8639010712488316, + "kl_loss": 0.187911719083786, + "loss_ib": 0.004823402501642704, + "step": 3004 + }, + { + "ce_ib": 4.832127571105957, + "ce_orig": 1.2579444646835327, + "epoch": 0.8639010712488316, + "kl_loss": 0.21027392148971558, + "loss_ib": 0.006934866774827242, + "step": 3004 + }, + { + "epoch": 0.8641886548278094, + "grad_norm": 0.13010871410369873, + "learning_rate": 8.361387200894731e-06, + "loss": 0.8291, + "step": 3005 + }, + { + "ce_ib": 7.837201118469238, + "ce_orig": 1.6624194383621216, + "epoch": 0.8641886548278094, + "kl_loss": 0.26533836126327515, + "loss_ib": 0.010490584187209606, + "step": 3005 + }, + { + "ce_ib": 2.4636898040771484, + "ce_orig": 0.5575696229934692, + "epoch": 0.8641886548278094, + "kl_loss": 0.07987219095230103, + "loss_ib": 0.003262411803007126, + "step": 3005 + }, + { + "ce_ib": 5.4659223556518555, + "ce_orig": 1.5140830278396606, + "epoch": 0.8641886548278094, + "kl_loss": 0.13353462517261505, + "loss_ib": 0.006801268085837364, + "step": 3005 + }, + { + "ce_ib": 2.3106443881988525, + "ce_orig": 0.6429100036621094, + "epoch": 0.8641886548278094, + "kl_loss": 0.11272639036178589, + "loss_ib": 0.0034379081334918737, + "step": 3005 + }, + { + "ce_ib": 4.110043525695801, + "ce_orig": 0.755738377571106, + "epoch": 0.864476238406787, + "kl_loss": 0.22144170105457306, + "loss_ib": 0.00632446026429534, + "step": 3006 + }, + { + "ce_ib": 4.292721748352051, + "ce_orig": 0.6863679885864258, + "epoch": 0.864476238406787, + "kl_loss": 0.2579711675643921, + "loss_ib": 0.006872433237731457, + "step": 3006 + }, + { + "ce_ib": 6.222200870513916, + "ce_orig": 1.4691364765167236, + "epoch": 0.864476238406787, + "kl_loss": 0.1675024926662445, + "loss_ib": 0.007897225208580494, + "step": 3006 + }, + { + "ce_ib": 3.179210901260376, + "ce_orig": 0.8284295797348022, + "epoch": 0.864476238406787, + "kl_loss": 0.13368403911590576, + "loss_ib": 0.004516051150858402, + "step": 3006 + }, + { + "ce_ib": 2.87296986579895, + "ce_orig": 0.4777787923812866, + "epoch": 0.8647638219857646, + "kl_loss": 0.1723158359527588, + "loss_ib": 0.00459612812846899, + "step": 3007 + }, + { + "ce_ib": 3.62384033203125, + "ce_orig": 0.65885990858078, + "epoch": 0.8647638219857646, + "kl_loss": 0.1805269718170166, + "loss_ib": 0.005429110489785671, + "step": 3007 + }, + { + "ce_ib": 1.1863665580749512, + "ce_orig": 0.19569087028503418, + "epoch": 0.8647638219857646, + "kl_loss": 0.4047548174858093, + "loss_ib": 0.005233914591372013, + "step": 3007 + }, + { + "ce_ib": 3.26503324508667, + "ce_orig": 0.7990972399711609, + "epoch": 0.8647638219857646, + "kl_loss": 0.1750766485929489, + "loss_ib": 0.005015799775719643, + "step": 3007 + }, + { + "ce_ib": 4.644229888916016, + "ce_orig": 1.0495415925979614, + "epoch": 0.8650514055647422, + "kl_loss": 0.2083299160003662, + "loss_ib": 0.006727529224008322, + "step": 3008 + }, + { + "ce_ib": 3.3299522399902344, + "ce_orig": 0.7625173330307007, + "epoch": 0.8650514055647422, + "kl_loss": 0.17443007230758667, + "loss_ib": 0.005074252840131521, + "step": 3008 + }, + { + "ce_ib": 1.9631105661392212, + "ce_orig": 0.583035945892334, + "epoch": 0.8650514055647422, + "kl_loss": 0.14565034210681915, + "loss_ib": 0.0034196136984974146, + "step": 3008 + }, + { + "ce_ib": 3.0216565132141113, + "ce_orig": 0.8371701240539551, + "epoch": 0.8650514055647422, + "kl_loss": 0.18516576290130615, + "loss_ib": 0.004873313941061497, + "step": 3008 + }, + { + "ce_ib": 4.222174167633057, + "ce_orig": 0.9483547210693359, + "epoch": 0.8653389891437199, + "kl_loss": 0.13453739881515503, + "loss_ib": 0.005567548330873251, + "step": 3009 + }, + { + "ce_ib": 6.9173102378845215, + "ce_orig": 1.6285717487335205, + "epoch": 0.8653389891437199, + "kl_loss": 0.14785441756248474, + "loss_ib": 0.008395854383707047, + "step": 3009 + }, + { + "ce_ib": 3.075317859649658, + "ce_orig": 0.46485623717308044, + "epoch": 0.8653389891437199, + "kl_loss": 0.1814536154270172, + "loss_ib": 0.004889854229986668, + "step": 3009 + }, + { + "ce_ib": 4.928703784942627, + "ce_orig": 1.0569024085998535, + "epoch": 0.8653389891437199, + "kl_loss": 0.22923503816127777, + "loss_ib": 0.007221054285764694, + "step": 3009 + }, + { + "epoch": 0.8656265727226975, + "grad_norm": 0.12698809802532196, + "learning_rate": 8.355637801202569e-06, + "loss": 0.8535, + "step": 3010 + }, + { + "ce_ib": 2.6460816860198975, + "ce_orig": 0.43447649478912354, + "epoch": 0.8656265727226975, + "kl_loss": 0.1479308307170868, + "loss_ib": 0.004125389736145735, + "step": 3010 + }, + { + "ce_ib": 3.938239574432373, + "ce_orig": 0.9360447525978088, + "epoch": 0.8656265727226975, + "kl_loss": 0.11909787356853485, + "loss_ib": 0.005129218101501465, + "step": 3010 + }, + { + "ce_ib": 4.355501651763916, + "ce_orig": 1.087766170501709, + "epoch": 0.8656265727226975, + "kl_loss": 0.16360707581043243, + "loss_ib": 0.005991572514176369, + "step": 3010 + }, + { + "ce_ib": 6.553983688354492, + "ce_orig": 1.283312439918518, + "epoch": 0.8656265727226975, + "kl_loss": 0.17514923214912415, + "loss_ib": 0.008305475115776062, + "step": 3010 + }, + { + "ce_ib": 3.4389612674713135, + "ce_orig": 0.7475440502166748, + "epoch": 0.8659141563016751, + "kl_loss": 0.15552109479904175, + "loss_ib": 0.004994172137230635, + "step": 3011 + }, + { + "ce_ib": 2.543870449066162, + "ce_orig": 0.6337660551071167, + "epoch": 0.8659141563016751, + "kl_loss": 0.16164501011371613, + "loss_ib": 0.0041603208519518375, + "step": 3011 + }, + { + "ce_ib": 5.69629430770874, + "ce_orig": 1.1121882200241089, + "epoch": 0.8659141563016751, + "kl_loss": 0.19035005569458008, + "loss_ib": 0.0075997947715222836, + "step": 3011 + }, + { + "ce_ib": 3.514690637588501, + "ce_orig": 0.6305133104324341, + "epoch": 0.8659141563016751, + "kl_loss": 0.1579352617263794, + "loss_ib": 0.005094042979180813, + "step": 3011 + }, + { + "ce_ib": 7.271183967590332, + "ce_orig": 1.3676300048828125, + "epoch": 0.8662017398806529, + "kl_loss": 0.139164999127388, + "loss_ib": 0.008662833832204342, + "step": 3012 + }, + { + "ce_ib": 4.112687587738037, + "ce_orig": 0.4896656572818756, + "epoch": 0.8662017398806529, + "kl_loss": 0.18893960118293762, + "loss_ib": 0.006002083886414766, + "step": 3012 + }, + { + "ce_ib": 1.918709397315979, + "ce_orig": 0.24923954904079437, + "epoch": 0.8662017398806529, + "kl_loss": 0.20903825759887695, + "loss_ib": 0.0040090917609632015, + "step": 3012 + }, + { + "ce_ib": 2.594801664352417, + "ce_orig": 0.6271476149559021, + "epoch": 0.8662017398806529, + "kl_loss": 0.17794287204742432, + "loss_ib": 0.004374230280518532, + "step": 3012 + }, + { + "ce_ib": 3.2675163745880127, + "ce_orig": 0.7322733998298645, + "epoch": 0.8664893234596305, + "kl_loss": 0.20991019904613495, + "loss_ib": 0.005366618745028973, + "step": 3013 + }, + { + "ce_ib": 5.20664119720459, + "ce_orig": 0.7670385241508484, + "epoch": 0.8664893234596305, + "kl_loss": 0.227274090051651, + "loss_ib": 0.007479382678866386, + "step": 3013 + }, + { + "ce_ib": 4.029938220977783, + "ce_orig": 0.8140459656715393, + "epoch": 0.8664893234596305, + "kl_loss": 0.18185733258724213, + "loss_ib": 0.0058485111221671104, + "step": 3013 + }, + { + "ce_ib": 3.2437539100646973, + "ce_orig": 0.9006868600845337, + "epoch": 0.8664893234596305, + "kl_loss": 0.1517217457294464, + "loss_ib": 0.004760971292853355, + "step": 3013 + }, + { + "ce_ib": 7.016524314880371, + "ce_orig": 1.2513902187347412, + "epoch": 0.8667769070386081, + "kl_loss": 0.16712398827075958, + "loss_ib": 0.008687764406204224, + "step": 3014 + }, + { + "ce_ib": 3.4075658321380615, + "ce_orig": 0.4264299273490906, + "epoch": 0.8667769070386081, + "kl_loss": 0.1699734926223755, + "loss_ib": 0.0051073008216917515, + "step": 3014 + }, + { + "ce_ib": 8.140447616577148, + "ce_orig": 1.5638492107391357, + "epoch": 0.8667769070386081, + "kl_loss": 0.16669031977653503, + "loss_ib": 0.009807350113987923, + "step": 3014 + }, + { + "ce_ib": 6.0623931884765625, + "ce_orig": 1.4582245349884033, + "epoch": 0.8667769070386081, + "kl_loss": 0.3340158760547638, + "loss_ib": 0.00940255168825388, + "step": 3014 + }, + { + "epoch": 0.8670644906175857, + "grad_norm": 0.1335875689983368, + "learning_rate": 8.349880317000083e-06, + "loss": 0.8158, + "step": 3015 + }, + { + "ce_ib": 2.362092971801758, + "ce_orig": 0.6302754878997803, + "epoch": 0.8670644906175857, + "kl_loss": 0.12334628403186798, + "loss_ib": 0.0035955558996647596, + "step": 3015 + }, + { + "ce_ib": 4.981596946716309, + "ce_orig": 1.261350393295288, + "epoch": 0.8670644906175857, + "kl_loss": 0.15054452419281006, + "loss_ib": 0.006487042177468538, + "step": 3015 + }, + { + "ce_ib": 4.897180557250977, + "ce_orig": 1.0228736400604248, + "epoch": 0.8670644906175857, + "kl_loss": 0.16880470514297485, + "loss_ib": 0.006585227325558662, + "step": 3015 + }, + { + "ce_ib": 3.6332204341888428, + "ce_orig": 0.5773554444313049, + "epoch": 0.8670644906175857, + "kl_loss": 0.1567068099975586, + "loss_ib": 0.005200288258492947, + "step": 3015 + }, + { + "ce_ib": 3.042067766189575, + "ce_orig": 0.5106818079948425, + "epoch": 0.8673520741965633, + "kl_loss": 0.2356758415699005, + "loss_ib": 0.0053988266736269, + "step": 3016 + }, + { + "ce_ib": 5.268722057342529, + "ce_orig": 1.2216737270355225, + "epoch": 0.8673520741965633, + "kl_loss": 0.21203766763210297, + "loss_ib": 0.0073890988714993, + "step": 3016 + }, + { + "ce_ib": 1.6301428079605103, + "ce_orig": 0.524239718914032, + "epoch": 0.8673520741965633, + "kl_loss": 0.11092757433652878, + "loss_ib": 0.0027394182980060577, + "step": 3016 + }, + { + "ce_ib": 5.71569299697876, + "ce_orig": 0.9442450404167175, + "epoch": 0.8673520741965633, + "kl_loss": 0.1812833994626999, + "loss_ib": 0.007528527174144983, + "step": 3016 + }, + { + "ce_ib": 1.7585885524749756, + "ce_orig": 0.5057519674301147, + "epoch": 0.867639657775541, + "kl_loss": 0.16781117022037506, + "loss_ib": 0.003436699975281954, + "step": 3017 + }, + { + "ce_ib": 4.428910732269287, + "ce_orig": 0.9075770378112793, + "epoch": 0.867639657775541, + "kl_loss": 0.23230570554733276, + "loss_ib": 0.006751967594027519, + "step": 3017 + }, + { + "ce_ib": 3.950982093811035, + "ce_orig": 0.7783148884773254, + "epoch": 0.867639657775541, + "kl_loss": 0.17000272870063782, + "loss_ib": 0.005651008803397417, + "step": 3017 + }, + { + "ce_ib": 3.913686513900757, + "ce_orig": 0.9961515069007874, + "epoch": 0.867639657775541, + "kl_loss": 0.21526211500167847, + "loss_ib": 0.00606630789116025, + "step": 3017 + }, + { + "ce_ib": 4.94702672958374, + "ce_orig": 1.0543237924575806, + "epoch": 0.8679272413545187, + "kl_loss": 0.2064594030380249, + "loss_ib": 0.007011620793491602, + "step": 3018 + }, + { + "ce_ib": 3.4020309448242188, + "ce_orig": 0.9306709170341492, + "epoch": 0.8679272413545187, + "kl_loss": 0.10333293676376343, + "loss_ib": 0.004435360431671143, + "step": 3018 + }, + { + "ce_ib": 0.5967859625816345, + "ce_orig": 0.11385688185691833, + "epoch": 0.8679272413545187, + "kl_loss": 0.33057525753974915, + "loss_ib": 0.0039025384467095137, + "step": 3018 + }, + { + "ce_ib": 3.176997661590576, + "ce_orig": 0.5830402970314026, + "epoch": 0.8679272413545187, + "kl_loss": 0.17823773622512817, + "loss_ib": 0.004959374666213989, + "step": 3018 + }, + { + "ce_ib": 3.3964667320251465, + "ce_orig": 0.7229934334754944, + "epoch": 0.8682148249334963, + "kl_loss": 0.13125120103359222, + "loss_ib": 0.004708978347480297, + "step": 3019 + }, + { + "ce_ib": 5.4309563636779785, + "ce_orig": 0.7788625955581665, + "epoch": 0.8682148249334963, + "kl_loss": 0.1882954239845276, + "loss_ib": 0.007313910406082869, + "step": 3019 + }, + { + "ce_ib": 1.5707846879959106, + "ce_orig": 0.4568687677383423, + "epoch": 0.8682148249334963, + "kl_loss": 0.15187838673591614, + "loss_ib": 0.003089568577706814, + "step": 3019 + }, + { + "ce_ib": 3.410620927810669, + "ce_orig": 0.5367089509963989, + "epoch": 0.8682148249334963, + "kl_loss": 0.19426020979881287, + "loss_ib": 0.005353223066776991, + "step": 3019 + }, + { + "epoch": 0.868502408512474, + "grad_norm": 0.11909222602844238, + "learning_rate": 8.344114762158391e-06, + "loss": 0.8302, + "step": 3020 + }, + { + "ce_ib": 3.9181406497955322, + "ce_orig": 0.8403506875038147, + "epoch": 0.868502408512474, + "kl_loss": 0.22145986557006836, + "loss_ib": 0.006132739596068859, + "step": 3020 + }, + { + "ce_ib": 3.801875114440918, + "ce_orig": 0.9968141913414001, + "epoch": 0.868502408512474, + "kl_loss": 0.12872661650180817, + "loss_ib": 0.005089140962809324, + "step": 3020 + }, + { + "ce_ib": 3.749919891357422, + "ce_orig": 0.9062440991401672, + "epoch": 0.868502408512474, + "kl_loss": 0.12406952679157257, + "loss_ib": 0.004990614950656891, + "step": 3020 + }, + { + "ce_ib": 3.57771897315979, + "ce_orig": 0.8275331258773804, + "epoch": 0.868502408512474, + "kl_loss": 0.15949268639087677, + "loss_ib": 0.005172646138817072, + "step": 3020 + }, + { + "ce_ib": 3.058896064758301, + "ce_orig": 0.48331984877586365, + "epoch": 0.8687899920914516, + "kl_loss": 0.3053073287010193, + "loss_ib": 0.006111969240009785, + "step": 3021 + }, + { + "ce_ib": 2.301487445831299, + "ce_orig": 0.29902276396751404, + "epoch": 0.8687899920914516, + "kl_loss": 0.31316041946411133, + "loss_ib": 0.005433091428130865, + "step": 3021 + }, + { + "ce_ib": 2.1476869583129883, + "ce_orig": 0.7346675992012024, + "epoch": 0.8687899920914516, + "kl_loss": 0.13162323832511902, + "loss_ib": 0.0034639190416783094, + "step": 3021 + }, + { + "ce_ib": 4.939001083374023, + "ce_orig": 0.7055896520614624, + "epoch": 0.8687899920914516, + "kl_loss": 0.2624973952770233, + "loss_ib": 0.00756397470831871, + "step": 3021 + }, + { + "ce_ib": 4.971782684326172, + "ce_orig": 1.0777415037155151, + "epoch": 0.8690775756704292, + "kl_loss": 0.19297458231449127, + "loss_ib": 0.006901528686285019, + "step": 3022 + }, + { + "ce_ib": 4.517510414123535, + "ce_orig": 1.0288059711456299, + "epoch": 0.8690775756704292, + "kl_loss": 0.193510040640831, + "loss_ib": 0.006452610716223717, + "step": 3022 + }, + { + "ce_ib": 4.915410995483398, + "ce_orig": 1.3614414930343628, + "epoch": 0.8690775756704292, + "kl_loss": 0.21771442890167236, + "loss_ib": 0.007092555519193411, + "step": 3022 + }, + { + "ce_ib": 3.986006259918213, + "ce_orig": 0.6571169495582581, + "epoch": 0.8690775756704292, + "kl_loss": 0.19942648708820343, + "loss_ib": 0.00598027091473341, + "step": 3022 + }, + { + "ce_ib": 2.6020853519439697, + "ce_orig": 0.6312983632087708, + "epoch": 0.8693651592494068, + "kl_loss": 0.1265435516834259, + "loss_ib": 0.0038675209507346153, + "step": 3023 + }, + { + "ce_ib": 4.450971603393555, + "ce_orig": 0.8783058524131775, + "epoch": 0.8693651592494068, + "kl_loss": 0.14090636372566223, + "loss_ib": 0.005860035307705402, + "step": 3023 + }, + { + "ce_ib": 4.221871852874756, + "ce_orig": 0.9108918309211731, + "epoch": 0.8693651592494068, + "kl_loss": 0.19407954812049866, + "loss_ib": 0.00616266718134284, + "step": 3023 + }, + { + "ce_ib": 5.816306114196777, + "ce_orig": 1.3166329860687256, + "epoch": 0.8693651592494068, + "kl_loss": 0.18428364396095276, + "loss_ib": 0.007659142836928368, + "step": 3023 + }, + { + "ce_ib": 2.572385787963867, + "ce_orig": 0.6323646306991577, + "epoch": 0.8696527428283845, + "kl_loss": 0.18181702494621277, + "loss_ib": 0.004390555899590254, + "step": 3024 + }, + { + "ce_ib": 7.327784538269043, + "ce_orig": 1.58877694606781, + "epoch": 0.8696527428283845, + "kl_loss": 0.20328304171562195, + "loss_ib": 0.00936061516404152, + "step": 3024 + }, + { + "ce_ib": 5.797823429107666, + "ce_orig": 1.1490319967269897, + "epoch": 0.8696527428283845, + "kl_loss": 0.15995655953884125, + "loss_ib": 0.00739738903939724, + "step": 3024 + }, + { + "ce_ib": 3.218154191970825, + "ce_orig": 0.35950058698654175, + "epoch": 0.8696527428283845, + "kl_loss": 0.4894768297672272, + "loss_ib": 0.008112922310829163, + "step": 3024 + }, + { + "epoch": 0.8699403264073622, + "grad_norm": 0.14609524607658386, + "learning_rate": 8.33834115056805e-06, + "loss": 0.8787, + "step": 3025 + }, + { + "ce_ib": 4.010785102844238, + "ce_orig": 0.6598092913627625, + "epoch": 0.8699403264073622, + "kl_loss": 0.18467503786087036, + "loss_ib": 0.005857535637915134, + "step": 3025 + }, + { + "ce_ib": 5.192906856536865, + "ce_orig": 1.0597468614578247, + "epoch": 0.8699403264073622, + "kl_loss": 0.17567190527915955, + "loss_ib": 0.006949625909328461, + "step": 3025 + }, + { + "ce_ib": 1.9872652292251587, + "ce_orig": 0.40370696783065796, + "epoch": 0.8699403264073622, + "kl_loss": 0.22219263017177582, + "loss_ib": 0.0042091915383934975, + "step": 3025 + }, + { + "ce_ib": 4.006402015686035, + "ce_orig": 0.3677336871623993, + "epoch": 0.8699403264073622, + "kl_loss": 0.24998939037322998, + "loss_ib": 0.006506296340376139, + "step": 3025 + }, + { + "ce_ib": 2.88132381439209, + "ce_orig": 0.6982382535934448, + "epoch": 0.8702279099863398, + "kl_loss": 0.15904450416564941, + "loss_ib": 0.004471769090741873, + "step": 3026 + }, + { + "ce_ib": 2.6095731258392334, + "ce_orig": 0.7412109971046448, + "epoch": 0.8702279099863398, + "kl_loss": 0.13235488533973694, + "loss_ib": 0.003933121915906668, + "step": 3026 + }, + { + "ce_ib": 2.671224594116211, + "ce_orig": 0.3961498737335205, + "epoch": 0.8702279099863398, + "kl_loss": 0.23746153712272644, + "loss_ib": 0.005045840051025152, + "step": 3026 + }, + { + "ce_ib": 4.517078876495361, + "ce_orig": 0.8469858765602112, + "epoch": 0.8702279099863398, + "kl_loss": 0.17898641526699066, + "loss_ib": 0.006306943017989397, + "step": 3026 + }, + { + "ce_ib": 5.516362190246582, + "ce_orig": 0.3952403664588928, + "epoch": 0.8705154935653174, + "kl_loss": 0.17385250329971313, + "loss_ib": 0.007254886906594038, + "step": 3027 + }, + { + "ce_ib": 4.095155239105225, + "ce_orig": 0.5025407671928406, + "epoch": 0.8705154935653174, + "kl_loss": 0.2340993881225586, + "loss_ib": 0.0064361486583948135, + "step": 3027 + }, + { + "ce_ib": 7.067741870880127, + "ce_orig": 1.6179289817810059, + "epoch": 0.8705154935653174, + "kl_loss": 0.19046235084533691, + "loss_ib": 0.008972365409135818, + "step": 3027 + }, + { + "ce_ib": 3.600334644317627, + "ce_orig": 0.6266310811042786, + "epoch": 0.8705154935653174, + "kl_loss": 0.22558774054050446, + "loss_ib": 0.005856212228536606, + "step": 3027 + }, + { + "ce_ib": 3.046905994415283, + "ce_orig": 0.7626993656158447, + "epoch": 0.8708030771442951, + "kl_loss": 0.1426832526922226, + "loss_ib": 0.004473738372325897, + "step": 3028 + }, + { + "ce_ib": 7.039170742034912, + "ce_orig": 1.7132810354232788, + "epoch": 0.8708030771442951, + "kl_loss": 0.1748616099357605, + "loss_ib": 0.008787786588072777, + "step": 3028 + }, + { + "ce_ib": 5.05453634262085, + "ce_orig": 1.0629078149795532, + "epoch": 0.8708030771442951, + "kl_loss": 0.17724494636058807, + "loss_ib": 0.006826986093074083, + "step": 3028 + }, + { + "ce_ib": 4.112802982330322, + "ce_orig": 0.7018669247627258, + "epoch": 0.8708030771442951, + "kl_loss": 0.17210307717323303, + "loss_ib": 0.0058338334783911705, + "step": 3028 + }, + { + "ce_ib": 4.009239196777344, + "ce_orig": 0.7000343799591064, + "epoch": 0.8710906607232727, + "kl_loss": 0.36622560024261475, + "loss_ib": 0.00767149543389678, + "step": 3029 + }, + { + "ce_ib": 7.223428726196289, + "ce_orig": 1.3972519636154175, + "epoch": 0.8710906607232727, + "kl_loss": 0.14825300872325897, + "loss_ib": 0.008705958724021912, + "step": 3029 + }, + { + "ce_ib": 3.7556073665618896, + "ce_orig": 0.540886640548706, + "epoch": 0.8710906607232727, + "kl_loss": 0.20389699935913086, + "loss_ib": 0.0057945773005485535, + "step": 3029 + }, + { + "ce_ib": 4.7798895835876465, + "ce_orig": 0.957796573638916, + "epoch": 0.8710906607232727, + "kl_loss": 0.13983109593391418, + "loss_ib": 0.0061782002449035645, + "step": 3029 + }, + { + "epoch": 0.8713782443022503, + "grad_norm": 0.14438124001026154, + "learning_rate": 8.332559496139032e-06, + "loss": 0.8682, + "step": 3030 + }, + { + "ce_ib": 3.2834765911102295, + "ce_orig": 0.8015002608299255, + "epoch": 0.8713782443022503, + "kl_loss": 0.21262550354003906, + "loss_ib": 0.005409731529653072, + "step": 3030 + }, + { + "ce_ib": 6.317176342010498, + "ce_orig": 1.4887133836746216, + "epoch": 0.8713782443022503, + "kl_loss": 0.12564486265182495, + "loss_ib": 0.007573624607175589, + "step": 3030 + }, + { + "ce_ib": 3.0036516189575195, + "ce_orig": 0.6563520431518555, + "epoch": 0.8713782443022503, + "kl_loss": 0.1893462836742401, + "loss_ib": 0.004897114355117083, + "step": 3030 + }, + { + "ce_ib": 3.484760046005249, + "ce_orig": 0.7235775589942932, + "epoch": 0.8713782443022503, + "kl_loss": 0.18455787003040314, + "loss_ib": 0.005330339074134827, + "step": 3030 + }, + { + "ce_ib": 4.074502944946289, + "ce_orig": 0.3178923726081848, + "epoch": 0.8716658278812279, + "kl_loss": 0.26050621271133423, + "loss_ib": 0.006679564714431763, + "step": 3031 + }, + { + "ce_ib": 5.301794528961182, + "ce_orig": 0.9491296410560608, + "epoch": 0.8716658278812279, + "kl_loss": 0.2215113639831543, + "loss_ib": 0.007516907993704081, + "step": 3031 + }, + { + "ce_ib": 4.318514347076416, + "ce_orig": 1.0439366102218628, + "epoch": 0.8716658278812279, + "kl_loss": 0.14817553758621216, + "loss_ib": 0.005800269544124603, + "step": 3031 + }, + { + "ce_ib": 2.6792521476745605, + "ce_orig": 0.37578514218330383, + "epoch": 0.8716658278812279, + "kl_loss": 0.21641702950000763, + "loss_ib": 0.004843422677367926, + "step": 3031 + }, + { + "ce_ib": 4.457328796386719, + "ce_orig": 0.9045831561088562, + "epoch": 0.8719534114602057, + "kl_loss": 0.1507292538881302, + "loss_ib": 0.005964620970189571, + "step": 3032 + }, + { + "ce_ib": 6.486919403076172, + "ce_orig": 1.2994674444198608, + "epoch": 0.8719534114602057, + "kl_loss": 0.17472514510154724, + "loss_ib": 0.008234170265495777, + "step": 3032 + }, + { + "ce_ib": 4.274205684661865, + "ce_orig": 0.6470766067504883, + "epoch": 0.8719534114602057, + "kl_loss": 0.23298919200897217, + "loss_ib": 0.006604097783565521, + "step": 3032 + }, + { + "ce_ib": 7.044003009796143, + "ce_orig": 1.8121014833450317, + "epoch": 0.8719534114602057, + "kl_loss": 0.18318665027618408, + "loss_ib": 0.00887586921453476, + "step": 3032 + }, + { + "ce_ib": 3.3039450645446777, + "ce_orig": 0.8081182837486267, + "epoch": 0.8722409950391833, + "kl_loss": 0.08861543983221054, + "loss_ib": 0.00419009942561388, + "step": 3033 + }, + { + "ce_ib": 2.3807733058929443, + "ce_orig": 0.40308454632759094, + "epoch": 0.8722409950391833, + "kl_loss": 0.14029696583747864, + "loss_ib": 0.0037837426643818617, + "step": 3033 + }, + { + "ce_ib": 1.953352928161621, + "ce_orig": 0.640842080116272, + "epoch": 0.8722409950391833, + "kl_loss": 0.1372177004814148, + "loss_ib": 0.0033255298621952534, + "step": 3033 + }, + { + "ce_ib": 3.7499566078186035, + "ce_orig": 0.5217834711074829, + "epoch": 0.8722409950391833, + "kl_loss": 0.22263969480991364, + "loss_ib": 0.005976353771984577, + "step": 3033 + }, + { + "ce_ib": 4.762399196624756, + "ce_orig": 1.32420015335083, + "epoch": 0.8725285786181609, + "kl_loss": 0.23337429761886597, + "loss_ib": 0.007096142042428255, + "step": 3034 + }, + { + "ce_ib": 5.285161972045898, + "ce_orig": 1.2225781679153442, + "epoch": 0.8725285786181609, + "kl_loss": 0.1873425990343094, + "loss_ib": 0.0071585881523787975, + "step": 3034 + }, + { + "ce_ib": 6.1437087059021, + "ce_orig": 0.9043737649917603, + "epoch": 0.8725285786181609, + "kl_loss": 0.27028024196624756, + "loss_ib": 0.008846511133015156, + "step": 3034 + }, + { + "ce_ib": 2.4722588062286377, + "ce_orig": 0.5514847636222839, + "epoch": 0.8725285786181609, + "kl_loss": 0.26560187339782715, + "loss_ib": 0.005128277465701103, + "step": 3034 + }, + { + "epoch": 0.8728161621971385, + "grad_norm": 0.1285254955291748, + "learning_rate": 8.326769812800683e-06, + "loss": 0.873, + "step": 3035 + }, + { + "ce_ib": 2.3306262493133545, + "ce_orig": 0.7729085683822632, + "epoch": 0.8728161621971385, + "kl_loss": 0.1702902913093567, + "loss_ib": 0.004033529199659824, + "step": 3035 + }, + { + "ce_ib": 4.5736985206604, + "ce_orig": 0.7340548038482666, + "epoch": 0.8728161621971385, + "kl_loss": 0.15245644748210907, + "loss_ib": 0.006098262965679169, + "step": 3035 + }, + { + "ce_ib": 3.817833185195923, + "ce_orig": 0.8421128392219543, + "epoch": 0.8728161621971385, + "kl_loss": 0.18282413482666016, + "loss_ib": 0.005646074656397104, + "step": 3035 + }, + { + "ce_ib": 1.7949944734573364, + "ce_orig": 0.4217584729194641, + "epoch": 0.8728161621971385, + "kl_loss": 0.08695808053016663, + "loss_ib": 0.002664575120434165, + "step": 3035 + }, + { + "ce_ib": 3.6982173919677734, + "ce_orig": 0.6239057779312134, + "epoch": 0.8731037457761162, + "kl_loss": 0.17682400345802307, + "loss_ib": 0.0054664574563503265, + "step": 3036 + }, + { + "ce_ib": 6.131997108459473, + "ce_orig": 1.3514271974563599, + "epoch": 0.8731037457761162, + "kl_loss": 0.16346392035484314, + "loss_ib": 0.0077666365541517735, + "step": 3036 + }, + { + "ce_ib": 3.3351590633392334, + "ce_orig": 0.6700191497802734, + "epoch": 0.8731037457761162, + "kl_loss": 0.15108458697795868, + "loss_ib": 0.004846004769206047, + "step": 3036 + }, + { + "ce_ib": 3.472780227661133, + "ce_orig": 0.8315459489822388, + "epoch": 0.8731037457761162, + "kl_loss": 0.12546147406101227, + "loss_ib": 0.004727395251393318, + "step": 3036 + }, + { + "ce_ib": 2.8575921058654785, + "ce_orig": 0.6245294809341431, + "epoch": 0.8733913293550938, + "kl_loss": 0.1922912299633026, + "loss_ib": 0.004780504386872053, + "step": 3037 + }, + { + "ce_ib": 2.992750883102417, + "ce_orig": 0.7355105876922607, + "epoch": 0.8733913293550938, + "kl_loss": 0.18924084305763245, + "loss_ib": 0.004885159432888031, + "step": 3037 + }, + { + "ce_ib": 3.8686740398406982, + "ce_orig": 0.934607744216919, + "epoch": 0.8733913293550938, + "kl_loss": 0.09966481477022171, + "loss_ib": 0.004865321796387434, + "step": 3037 + }, + { + "ce_ib": 4.446110248565674, + "ce_orig": 0.6510801911354065, + "epoch": 0.8733913293550938, + "kl_loss": 0.27235469222068787, + "loss_ib": 0.00716965738683939, + "step": 3037 + }, + { + "ce_ib": 6.8025007247924805, + "ce_orig": 1.331974744796753, + "epoch": 0.8736789129340715, + "kl_loss": 0.2276889681816101, + "loss_ib": 0.009079390205442905, + "step": 3038 + }, + { + "ce_ib": 5.516584873199463, + "ce_orig": 0.9134126901626587, + "epoch": 0.8736789129340715, + "kl_loss": 0.15084168314933777, + "loss_ib": 0.007025002036243677, + "step": 3038 + }, + { + "ce_ib": 5.278623104095459, + "ce_orig": 1.1301530599594116, + "epoch": 0.8736789129340715, + "kl_loss": 0.2288721203804016, + "loss_ib": 0.007567344233393669, + "step": 3038 + }, + { + "ce_ib": 2.236379861831665, + "ce_orig": 0.5138092637062073, + "epoch": 0.8736789129340715, + "kl_loss": 0.21197755634784698, + "loss_ib": 0.004356155171990395, + "step": 3038 + }, + { + "ce_ib": 4.191259384155273, + "ce_orig": 0.7219685912132263, + "epoch": 0.8739664965130491, + "kl_loss": 0.2527785003185272, + "loss_ib": 0.0067190444096922874, + "step": 3039 + }, + { + "ce_ib": 3.607558250427246, + "ce_orig": 0.7551795244216919, + "epoch": 0.8739664965130491, + "kl_loss": 0.19249799847602844, + "loss_ib": 0.005532538052648306, + "step": 3039 + }, + { + "ce_ib": 3.0679328441619873, + "ce_orig": 0.884811282157898, + "epoch": 0.8739664965130491, + "kl_loss": 0.12692108750343323, + "loss_ib": 0.0043371436186134815, + "step": 3039 + }, + { + "ce_ib": 4.902533531188965, + "ce_orig": 1.0194000005722046, + "epoch": 0.8739664965130491, + "kl_loss": 0.18132995069026947, + "loss_ib": 0.006715832743793726, + "step": 3039 + }, + { + "epoch": 0.8742540800920268, + "grad_norm": 0.1200781837105751, + "learning_rate": 8.320972114501698e-06, + "loss": 0.928, + "step": 3040 + }, + { + "ce_ib": 4.933560848236084, + "ce_orig": 0.7650630474090576, + "epoch": 0.8742540800920268, + "kl_loss": 0.14683586359024048, + "loss_ib": 0.006401918828487396, + "step": 3040 + }, + { + "ce_ib": 3.279649257659912, + "ce_orig": 0.8381637334823608, + "epoch": 0.8742540800920268, + "kl_loss": 0.174057736992836, + "loss_ib": 0.005020226817578077, + "step": 3040 + }, + { + "ce_ib": 4.808218002319336, + "ce_orig": 1.217624306678772, + "epoch": 0.8742540800920268, + "kl_loss": 0.2032126635313034, + "loss_ib": 0.0068403445184230804, + "step": 3040 + }, + { + "ce_ib": 4.5620880126953125, + "ce_orig": 0.781871497631073, + "epoch": 0.8742540800920268, + "kl_loss": 0.20516060292720795, + "loss_ib": 0.006613693665713072, + "step": 3040 + }, + { + "ce_ib": 6.188195705413818, + "ce_orig": 0.9964828491210938, + "epoch": 0.8745416636710044, + "kl_loss": 0.23861075937747955, + "loss_ib": 0.008574303239583969, + "step": 3041 + }, + { + "ce_ib": 4.999070167541504, + "ce_orig": 0.9220011830329895, + "epoch": 0.8745416636710044, + "kl_loss": 0.3200100064277649, + "loss_ib": 0.008199170231819153, + "step": 3041 + }, + { + "ce_ib": 3.7421555519104004, + "ce_orig": 0.656177282333374, + "epoch": 0.8745416636710044, + "kl_loss": 0.16276812553405762, + "loss_ib": 0.005369837395846844, + "step": 3041 + }, + { + "ce_ib": 3.947352170944214, + "ce_orig": 0.8647215962409973, + "epoch": 0.8745416636710044, + "kl_loss": 0.25835922360420227, + "loss_ib": 0.006530943792313337, + "step": 3041 + }, + { + "ce_ib": 3.613624334335327, + "ce_orig": 0.8710114359855652, + "epoch": 0.874829247249982, + "kl_loss": 0.1986517459154129, + "loss_ib": 0.005600141827017069, + "step": 3042 + }, + { + "ce_ib": 4.632547378540039, + "ce_orig": 1.0963115692138672, + "epoch": 0.874829247249982, + "kl_loss": 0.20253604650497437, + "loss_ib": 0.006657907739281654, + "step": 3042 + }, + { + "ce_ib": 4.467654228210449, + "ce_orig": 0.4362611174583435, + "epoch": 0.874829247249982, + "kl_loss": 0.4562360644340515, + "loss_ib": 0.009030014276504517, + "step": 3042 + }, + { + "ce_ib": 4.45102596282959, + "ce_orig": 1.0953092575073242, + "epoch": 0.874829247249982, + "kl_loss": 0.19773781299591064, + "loss_ib": 0.006428403779864311, + "step": 3042 + }, + { + "ce_ib": 5.116706848144531, + "ce_orig": 1.2621861696243286, + "epoch": 0.8751168308289596, + "kl_loss": 0.21595728397369385, + "loss_ib": 0.007276279851794243, + "step": 3043 + }, + { + "ce_ib": 3.0784718990325928, + "ce_orig": 0.5937740206718445, + "epoch": 0.8751168308289596, + "kl_loss": 0.20705337822437286, + "loss_ib": 0.005149005446583033, + "step": 3043 + }, + { + "ce_ib": 4.860495090484619, + "ce_orig": 0.8705167770385742, + "epoch": 0.8751168308289596, + "kl_loss": 0.18615683913230896, + "loss_ib": 0.0067220632918179035, + "step": 3043 + }, + { + "ce_ib": 5.58819055557251, + "ce_orig": 1.5587291717529297, + "epoch": 0.8751168308289596, + "kl_loss": 0.15177707374095917, + "loss_ib": 0.007105961441993713, + "step": 3043 + }, + { + "ce_ib": 6.084508895874023, + "ce_orig": 1.1718674898147583, + "epoch": 0.8754044144079373, + "kl_loss": 0.19104859232902527, + "loss_ib": 0.007994994521141052, + "step": 3044 + }, + { + "ce_ib": 3.707300901412964, + "ce_orig": 0.9459441900253296, + "epoch": 0.8754044144079373, + "kl_loss": 0.18733461201190948, + "loss_ib": 0.00558064691722393, + "step": 3044 + }, + { + "ce_ib": 6.369130611419678, + "ce_orig": 1.271790862083435, + "epoch": 0.8754044144079373, + "kl_loss": 0.14983811974525452, + "loss_ib": 0.007867511361837387, + "step": 3044 + }, + { + "ce_ib": 0.7784809470176697, + "ce_orig": 0.18599770963191986, + "epoch": 0.8754044144079373, + "kl_loss": 0.29490435123443604, + "loss_ib": 0.0037275245413184166, + "step": 3044 + }, + { + "epoch": 0.875691997986915, + "grad_norm": 0.1277233213186264, + "learning_rate": 8.315166415210074e-06, + "loss": 0.8311, + "step": 3045 + }, + { + "ce_ib": 3.9692647457122803, + "ce_orig": 0.9209102392196655, + "epoch": 0.875691997986915, + "kl_loss": 0.1860496997833252, + "loss_ib": 0.005829762201756239, + "step": 3045 + }, + { + "ce_ib": 2.629826307296753, + "ce_orig": 0.5422137975692749, + "epoch": 0.875691997986915, + "kl_loss": 0.11129085719585419, + "loss_ib": 0.003742734668776393, + "step": 3045 + }, + { + "ce_ib": 3.1649856567382812, + "ce_orig": 0.6551657319068909, + "epoch": 0.875691997986915, + "kl_loss": 0.13035431504249573, + "loss_ib": 0.0044685290195047855, + "step": 3045 + }, + { + "ce_ib": 3.220211982727051, + "ce_orig": 0.7827670574188232, + "epoch": 0.875691997986915, + "kl_loss": 0.21635128557682037, + "loss_ib": 0.00538372527807951, + "step": 3045 + }, + { + "ce_ib": 7.3002448081970215, + "ce_orig": 1.575875163078308, + "epoch": 0.8759795815658926, + "kl_loss": 0.16415657103061676, + "loss_ib": 0.008941810578107834, + "step": 3046 + }, + { + "ce_ib": 4.410012722015381, + "ce_orig": 0.7340313792228699, + "epoch": 0.8759795815658926, + "kl_loss": 0.199954092502594, + "loss_ib": 0.006409553345292807, + "step": 3046 + }, + { + "ce_ib": 4.879310131072998, + "ce_orig": 1.2142974138259888, + "epoch": 0.8759795815658926, + "kl_loss": 0.13459903001785278, + "loss_ib": 0.00622530048713088, + "step": 3046 + }, + { + "ce_ib": 1.8233544826507568, + "ce_orig": 0.551039457321167, + "epoch": 0.8759795815658926, + "kl_loss": 0.1882602721452713, + "loss_ib": 0.003705957205966115, + "step": 3046 + }, + { + "ce_ib": 5.348729610443115, + "ce_orig": 1.1791518926620483, + "epoch": 0.8762671651448702, + "kl_loss": 0.16205860674381256, + "loss_ib": 0.006969315465539694, + "step": 3047 + }, + { + "ce_ib": 2.2098920345306396, + "ce_orig": 0.6143059134483337, + "epoch": 0.8762671651448702, + "kl_loss": 0.16593199968338013, + "loss_ib": 0.003869212232530117, + "step": 3047 + }, + { + "ce_ib": 4.15065860748291, + "ce_orig": 0.9528758525848389, + "epoch": 0.8762671651448702, + "kl_loss": 0.13516543805599213, + "loss_ib": 0.005502312444150448, + "step": 3047 + }, + { + "ce_ib": 3.400308609008789, + "ce_orig": 0.7901802062988281, + "epoch": 0.8762671651448702, + "kl_loss": 0.17159172892570496, + "loss_ib": 0.005116226151585579, + "step": 3047 + }, + { + "ce_ib": 2.8829243183135986, + "ce_orig": 0.6695106029510498, + "epoch": 0.8765547487238479, + "kl_loss": 0.14370793104171753, + "loss_ib": 0.004320003557950258, + "step": 3048 + }, + { + "ce_ib": 1.6141356229782104, + "ce_orig": 0.3914640247821808, + "epoch": 0.8765547487238479, + "kl_loss": 0.20889873802661896, + "loss_ib": 0.0037031229585409164, + "step": 3048 + }, + { + "ce_ib": 3.9875435829162598, + "ce_orig": 0.8402961492538452, + "epoch": 0.8765547487238479, + "kl_loss": 0.1674710512161255, + "loss_ib": 0.005662254057824612, + "step": 3048 + }, + { + "ce_ib": 8.877432823181152, + "ce_orig": 2.00728440284729, + "epoch": 0.8765547487238479, + "kl_loss": 0.22589288651943207, + "loss_ib": 0.01113636139780283, + "step": 3048 + }, + { + "ce_ib": 3.428380250930786, + "ce_orig": 0.8265547156333923, + "epoch": 0.8768423323028255, + "kl_loss": 0.18235045671463013, + "loss_ib": 0.005251884460449219, + "step": 3049 + }, + { + "ce_ib": 4.5227131843566895, + "ce_orig": 1.203538417816162, + "epoch": 0.8768423323028255, + "kl_loss": 0.13124075531959534, + "loss_ib": 0.005835120566189289, + "step": 3049 + }, + { + "ce_ib": 3.7063913345336914, + "ce_orig": 0.853362500667572, + "epoch": 0.8768423323028255, + "kl_loss": 0.16040238738059998, + "loss_ib": 0.00531041482463479, + "step": 3049 + }, + { + "ce_ib": 7.051861763000488, + "ce_orig": 1.4497120380401611, + "epoch": 0.8768423323028255, + "kl_loss": 0.20045648515224457, + "loss_ib": 0.009056426584720612, + "step": 3049 + }, + { + "epoch": 0.8771299158818031, + "grad_norm": 0.12882909178733826, + "learning_rate": 8.309352728913089e-06, + "loss": 0.9337, + "step": 3050 + }, + { + "ce_ib": 3.086623430252075, + "ce_orig": 0.7435568571090698, + "epoch": 0.8771299158818031, + "kl_loss": 0.22055839002132416, + "loss_ib": 0.005292207468301058, + "step": 3050 + }, + { + "ce_ib": 3.7483327388763428, + "ce_orig": 0.46604809165000916, + "epoch": 0.8771299158818031, + "kl_loss": 0.2166709005832672, + "loss_ib": 0.005915042012929916, + "step": 3050 + }, + { + "ce_ib": 3.462855339050293, + "ce_orig": 0.619250476360321, + "epoch": 0.8771299158818031, + "kl_loss": 0.25416532158851624, + "loss_ib": 0.006004508584737778, + "step": 3050 + }, + { + "ce_ib": 4.087599277496338, + "ce_orig": 0.70986008644104, + "epoch": 0.8771299158818031, + "kl_loss": 0.2227795124053955, + "loss_ib": 0.006315394304692745, + "step": 3050 + }, + { + "ce_ib": 2.8300247192382812, + "ce_orig": 0.8294196724891663, + "epoch": 0.8774174994607807, + "kl_loss": 0.12276231497526169, + "loss_ib": 0.004057647660374641, + "step": 3051 + }, + { + "ce_ib": 3.4560534954071045, + "ce_orig": 0.6274535059928894, + "epoch": 0.8774174994607807, + "kl_loss": 0.23831184208393097, + "loss_ib": 0.005839171819388866, + "step": 3051 + }, + { + "ce_ib": 4.7823591232299805, + "ce_orig": 0.8944647908210754, + "epoch": 0.8774174994607807, + "kl_loss": 0.21229159832000732, + "loss_ib": 0.006905275397002697, + "step": 3051 + }, + { + "ce_ib": 6.785813808441162, + "ce_orig": 1.3654885292053223, + "epoch": 0.8774174994607807, + "kl_loss": 0.24112610518932343, + "loss_ib": 0.009197074919939041, + "step": 3051 + }, + { + "ce_ib": 1.8399426937103271, + "ce_orig": 0.39209291338920593, + "epoch": 0.8777050830397585, + "kl_loss": 0.1638159453868866, + "loss_ib": 0.003478101920336485, + "step": 3052 + }, + { + "ce_ib": 2.961289405822754, + "ce_orig": 0.5362990498542786, + "epoch": 0.8777050830397585, + "kl_loss": 0.2205842137336731, + "loss_ib": 0.005167131312191486, + "step": 3052 + }, + { + "ce_ib": 4.6895856857299805, + "ce_orig": 1.175492525100708, + "epoch": 0.8777050830397585, + "kl_loss": 0.24714608490467072, + "loss_ib": 0.007161046843975782, + "step": 3052 + }, + { + "ce_ib": 6.604238510131836, + "ce_orig": 1.4248602390289307, + "epoch": 0.8777050830397585, + "kl_loss": 0.14148271083831787, + "loss_ib": 0.008019065484404564, + "step": 3052 + }, + { + "ce_ib": 3.8631694316864014, + "ce_orig": 0.4907926023006439, + "epoch": 0.8779926666187361, + "kl_loss": 0.24325700104236603, + "loss_ib": 0.006295739207416773, + "step": 3053 + }, + { + "ce_ib": 6.5158867835998535, + "ce_orig": 1.5951883792877197, + "epoch": 0.8779926666187361, + "kl_loss": 0.22780978679656982, + "loss_ib": 0.008793984539806843, + "step": 3053 + }, + { + "ce_ib": 2.598376989364624, + "ce_orig": 0.5279430747032166, + "epoch": 0.8779926666187361, + "kl_loss": 0.14617790281772614, + "loss_ib": 0.004060156177729368, + "step": 3053 + }, + { + "ce_ib": 4.675154685974121, + "ce_orig": 0.6967430710792542, + "epoch": 0.8779926666187361, + "kl_loss": 0.17188823223114014, + "loss_ib": 0.0063940370455384254, + "step": 3053 + }, + { + "ce_ib": 2.564706802368164, + "ce_orig": 0.4164303243160248, + "epoch": 0.8782802501977137, + "kl_loss": 0.19407999515533447, + "loss_ib": 0.004505506716668606, + "step": 3054 + }, + { + "ce_ib": 2.4788761138916016, + "ce_orig": 0.6980212926864624, + "epoch": 0.8782802501977137, + "kl_loss": 0.20377634465694427, + "loss_ib": 0.004516639746725559, + "step": 3054 + }, + { + "ce_ib": 4.21281623840332, + "ce_orig": 0.8277150988578796, + "epoch": 0.8782802501977137, + "kl_loss": 0.28035688400268555, + "loss_ib": 0.007016384974122047, + "step": 3054 + }, + { + "ce_ib": 3.541639566421509, + "ce_orig": 0.9311122298240662, + "epoch": 0.8782802501977137, + "kl_loss": 0.174239382147789, + "loss_ib": 0.005284033250063658, + "step": 3054 + }, + { + "epoch": 0.8785678337766913, + "grad_norm": 0.12853515148162842, + "learning_rate": 8.303531069617263e-06, + "loss": 0.8695, + "step": 3055 + }, + { + "ce_ib": 4.35776424407959, + "ce_orig": 0.6463779211044312, + "epoch": 0.8785678337766913, + "kl_loss": 0.23964811861515045, + "loss_ib": 0.006754245609045029, + "step": 3055 + }, + { + "ce_ib": 3.9873058795928955, + "ce_orig": 0.6223101019859314, + "epoch": 0.8785678337766913, + "kl_loss": 0.15986594557762146, + "loss_ib": 0.005585965234786272, + "step": 3055 + }, + { + "ce_ib": 4.919009208679199, + "ce_orig": 0.5695360898971558, + "epoch": 0.8785678337766913, + "kl_loss": 0.19250193238258362, + "loss_ib": 0.006844027899205685, + "step": 3055 + }, + { + "ce_ib": 4.715623378753662, + "ce_orig": 1.2933791875839233, + "epoch": 0.8785678337766913, + "kl_loss": 0.15048742294311523, + "loss_ib": 0.006220498122274876, + "step": 3055 + }, + { + "ce_ib": 5.068349838256836, + "ce_orig": 1.144762396812439, + "epoch": 0.878855417355669, + "kl_loss": 0.22132861614227295, + "loss_ib": 0.0072816358879208565, + "step": 3056 + }, + { + "ce_ib": 2.594994068145752, + "ce_orig": 0.5574873685836792, + "epoch": 0.878855417355669, + "kl_loss": 0.2279636263847351, + "loss_ib": 0.004874630365520716, + "step": 3056 + }, + { + "ce_ib": 4.545940399169922, + "ce_orig": 1.1927260160446167, + "epoch": 0.878855417355669, + "kl_loss": 0.16227847337722778, + "loss_ib": 0.006168725434690714, + "step": 3056 + }, + { + "ce_ib": 3.108933210372925, + "ce_orig": 0.7815560698509216, + "epoch": 0.878855417355669, + "kl_loss": 0.13176694512367249, + "loss_ib": 0.004426602739840746, + "step": 3056 + }, + { + "ce_ib": 3.774458885192871, + "ce_orig": 1.0255310535430908, + "epoch": 0.8791430009346466, + "kl_loss": 0.14360246062278748, + "loss_ib": 0.005210483446717262, + "step": 3057 + }, + { + "ce_ib": 2.6507601737976074, + "ce_orig": 0.7645630836486816, + "epoch": 0.8791430009346466, + "kl_loss": 0.21412435173988342, + "loss_ib": 0.0047920034267008305, + "step": 3057 + }, + { + "ce_ib": 4.915416240692139, + "ce_orig": 1.083138346672058, + "epoch": 0.8791430009346466, + "kl_loss": 0.1600581407546997, + "loss_ib": 0.006515997461974621, + "step": 3057 + }, + { + "ce_ib": 7.527081489562988, + "ce_orig": 1.7966099977493286, + "epoch": 0.8791430009346466, + "kl_loss": 0.20978859066963196, + "loss_ib": 0.009624967351555824, + "step": 3057 + }, + { + "ce_ib": 6.630353927612305, + "ce_orig": 1.7295324802398682, + "epoch": 0.8794305845136243, + "kl_loss": 0.36602914333343506, + "loss_ib": 0.010290645062923431, + "step": 3058 + }, + { + "ce_ib": 3.416299819946289, + "ce_orig": 0.7320575714111328, + "epoch": 0.8794305845136243, + "kl_loss": 0.12010858952999115, + "loss_ib": 0.004617385566234589, + "step": 3058 + }, + { + "ce_ib": 3.0388786792755127, + "ce_orig": 0.49272647500038147, + "epoch": 0.8794305845136243, + "kl_loss": 0.2126273810863495, + "loss_ib": 0.0051651522517204285, + "step": 3058 + }, + { + "ce_ib": 3.101600408554077, + "ce_orig": 0.6054064035415649, + "epoch": 0.8794305845136243, + "kl_loss": 0.1710098832845688, + "loss_ib": 0.004811699502170086, + "step": 3058 + }, + { + "ce_ib": 6.441290378570557, + "ce_orig": 0.8552629351615906, + "epoch": 0.879718168092602, + "kl_loss": 0.20998477935791016, + "loss_ib": 0.008541137911379337, + "step": 3059 + }, + { + "ce_ib": 3.8606514930725098, + "ce_orig": 0.7665522694587708, + "epoch": 0.879718168092602, + "kl_loss": 0.21450476348400116, + "loss_ib": 0.006005698814988136, + "step": 3059 + }, + { + "ce_ib": 2.3430442810058594, + "ce_orig": 0.4716733396053314, + "epoch": 0.879718168092602, + "kl_loss": 0.421161949634552, + "loss_ib": 0.006554663646966219, + "step": 3059 + }, + { + "ce_ib": 5.199859619140625, + "ce_orig": 1.5456653833389282, + "epoch": 0.879718168092602, + "kl_loss": 0.2480582296848297, + "loss_ib": 0.007680442184209824, + "step": 3059 + }, + { + "epoch": 0.8800057516715796, + "grad_norm": 0.13622905313968658, + "learning_rate": 8.297701451348324e-06, + "loss": 0.8469, + "step": 3060 + }, + { + "ce_ib": 2.1857688426971436, + "ce_orig": 0.6112737059593201, + "epoch": 0.8800057516715796, + "kl_loss": 0.0955800861120224, + "loss_ib": 0.0031415696721524, + "step": 3060 + }, + { + "ce_ib": 2.5487937927246094, + "ce_orig": 0.5758946537971497, + "epoch": 0.8800057516715796, + "kl_loss": 0.11483193933963776, + "loss_ib": 0.0036971133667975664, + "step": 3060 + }, + { + "ce_ib": 5.417148590087891, + "ce_orig": 1.1784647703170776, + "epoch": 0.8800057516715796, + "kl_loss": 0.12727558612823486, + "loss_ib": 0.006689904257655144, + "step": 3060 + }, + { + "ce_ib": 4.405616283416748, + "ce_orig": 0.8284060955047607, + "epoch": 0.8800057516715796, + "kl_loss": 0.18287478387355804, + "loss_ib": 0.006234363652765751, + "step": 3060 + }, + { + "ce_ib": 4.837342262268066, + "ce_orig": 1.1703826189041138, + "epoch": 0.8802933352505572, + "kl_loss": 0.19387152791023254, + "loss_ib": 0.006776057183742523, + "step": 3061 + }, + { + "ce_ib": 7.227414131164551, + "ce_orig": 1.721402645111084, + "epoch": 0.8802933352505572, + "kl_loss": 0.20542433857917786, + "loss_ib": 0.009281657636165619, + "step": 3061 + }, + { + "ce_ib": 5.411126613616943, + "ce_orig": 1.3114420175552368, + "epoch": 0.8802933352505572, + "kl_loss": 0.2142118215560913, + "loss_ib": 0.007553244475275278, + "step": 3061 + }, + { + "ce_ib": 5.459466457366943, + "ce_orig": 0.762791633605957, + "epoch": 0.8802933352505572, + "kl_loss": 0.21897321939468384, + "loss_ib": 0.007649198640137911, + "step": 3061 + }, + { + "ce_ib": 8.109661102294922, + "ce_orig": 1.6528195142745972, + "epoch": 0.8805809188295348, + "kl_loss": 0.6141775846481323, + "loss_ib": 0.014251437038183212, + "step": 3062 + }, + { + "ce_ib": 4.389732360839844, + "ce_orig": 1.0809739828109741, + "epoch": 0.8805809188295348, + "kl_loss": 0.13251656293869019, + "loss_ib": 0.005714897997677326, + "step": 3062 + }, + { + "ce_ib": 2.523601531982422, + "ce_orig": 0.5277051329612732, + "epoch": 0.8805809188295348, + "kl_loss": 0.16016708314418793, + "loss_ib": 0.004125272389501333, + "step": 3062 + }, + { + "ce_ib": 4.421096324920654, + "ce_orig": 0.9276777505874634, + "epoch": 0.8805809188295348, + "kl_loss": 0.10827996581792831, + "loss_ib": 0.005503895692527294, + "step": 3062 + }, + { + "ce_ib": 4.903621196746826, + "ce_orig": 0.9886559844017029, + "epoch": 0.8808685024085124, + "kl_loss": 0.22913159430027008, + "loss_ib": 0.007194937206804752, + "step": 3063 + }, + { + "ce_ib": 3.051028251647949, + "ce_orig": 0.6100396513938904, + "epoch": 0.8808685024085124, + "kl_loss": 0.1715845763683319, + "loss_ib": 0.004766874015331268, + "step": 3063 + }, + { + "ce_ib": 5.615814208984375, + "ce_orig": 1.1646831035614014, + "epoch": 0.8808685024085124, + "kl_loss": 0.178575336933136, + "loss_ib": 0.007401567418128252, + "step": 3063 + }, + { + "ce_ib": 4.982259750366211, + "ce_orig": 1.5935046672821045, + "epoch": 0.8808685024085124, + "kl_loss": 0.1533435583114624, + "loss_ib": 0.006515695247799158, + "step": 3063 + }, + { + "ce_ib": 3.3795762062072754, + "ce_orig": 0.440749853849411, + "epoch": 0.8811560859874901, + "kl_loss": 0.29031282663345337, + "loss_ib": 0.006282704416662455, + "step": 3064 + }, + { + "ce_ib": 3.359157085418701, + "ce_orig": 0.7010852694511414, + "epoch": 0.8811560859874901, + "kl_loss": 0.15154162049293518, + "loss_ib": 0.004874573089182377, + "step": 3064 + }, + { + "ce_ib": 3.980064630508423, + "ce_orig": 1.2977957725524902, + "epoch": 0.8811560859874901, + "kl_loss": 0.29652395844459534, + "loss_ib": 0.0069453041069209576, + "step": 3064 + }, + { + "ce_ib": 2.7044196128845215, + "ce_orig": 0.7362260222434998, + "epoch": 0.8811560859874901, + "kl_loss": 0.1688288450241089, + "loss_ib": 0.00439270818606019, + "step": 3064 + }, + { + "epoch": 0.8814436695664678, + "grad_norm": 0.14457188546657562, + "learning_rate": 8.291863888151176e-06, + "loss": 0.946, + "step": 3065 + }, + { + "ce_ib": 5.323140621185303, + "ce_orig": 1.5791101455688477, + "epoch": 0.8814436695664678, + "kl_loss": 0.17657095193862915, + "loss_ib": 0.007088849786669016, + "step": 3065 + }, + { + "ce_ib": 2.4477474689483643, + "ce_orig": 0.4383699297904968, + "epoch": 0.8814436695664678, + "kl_loss": 0.14723339676856995, + "loss_ib": 0.0039200810715556145, + "step": 3065 + }, + { + "ce_ib": 2.432288885116577, + "ce_orig": 0.6804338693618774, + "epoch": 0.8814436695664678, + "kl_loss": 0.1723802238702774, + "loss_ib": 0.004156091250479221, + "step": 3065 + }, + { + "ce_ib": 1.683430790901184, + "ce_orig": 0.19677922129631042, + "epoch": 0.8814436695664678, + "kl_loss": 0.20183901488780975, + "loss_ib": 0.003701820969581604, + "step": 3065 + }, + { + "ce_ib": 3.4261279106140137, + "ce_orig": 0.7195852398872375, + "epoch": 0.8817312531454454, + "kl_loss": 0.16370904445648193, + "loss_ib": 0.005063218530267477, + "step": 3066 + }, + { + "ce_ib": 2.1651246547698975, + "ce_orig": 0.6310738325119019, + "epoch": 0.8817312531454454, + "kl_loss": 0.16355782747268677, + "loss_ib": 0.003800702979788184, + "step": 3066 + }, + { + "ce_ib": 3.588609218597412, + "ce_orig": 0.9319191575050354, + "epoch": 0.8817312531454454, + "kl_loss": 0.19826602935791016, + "loss_ib": 0.005571269895881414, + "step": 3066 + }, + { + "ce_ib": 5.221468925476074, + "ce_orig": 1.516661524772644, + "epoch": 0.8817312531454454, + "kl_loss": 0.14909476041793823, + "loss_ib": 0.006712416186928749, + "step": 3066 + }, + { + "ce_ib": 5.486189365386963, + "ce_orig": 1.0689890384674072, + "epoch": 0.882018836724423, + "kl_loss": 0.13618884980678558, + "loss_ib": 0.006848078221082687, + "step": 3067 + }, + { + "ce_ib": 5.196125030517578, + "ce_orig": 0.6194407343864441, + "epoch": 0.882018836724423, + "kl_loss": 0.23749490082263947, + "loss_ib": 0.007571073714643717, + "step": 3067 + }, + { + "ce_ib": 5.705458641052246, + "ce_orig": 1.318363904953003, + "epoch": 0.882018836724423, + "kl_loss": 0.19002561271190643, + "loss_ib": 0.007605714723467827, + "step": 3067 + }, + { + "ce_ib": 4.617785453796387, + "ce_orig": 0.7899675965309143, + "epoch": 0.882018836724423, + "kl_loss": 0.25065675377845764, + "loss_ib": 0.0071243527345359325, + "step": 3067 + }, + { + "ce_ib": 3.5914306640625, + "ce_orig": 0.9278072714805603, + "epoch": 0.8823064203034007, + "kl_loss": 0.12412326782941818, + "loss_ib": 0.00483266357332468, + "step": 3068 + }, + { + "ce_ib": 3.622978925704956, + "ce_orig": 0.35210251808166504, + "epoch": 0.8823064203034007, + "kl_loss": 0.27997148036956787, + "loss_ib": 0.006422693375498056, + "step": 3068 + }, + { + "ce_ib": 6.320131778717041, + "ce_orig": 1.394415020942688, + "epoch": 0.8823064203034007, + "kl_loss": 0.20569929480552673, + "loss_ib": 0.00837712548673153, + "step": 3068 + }, + { + "ce_ib": 3.134469985961914, + "ce_orig": 0.9777742624282837, + "epoch": 0.8823064203034007, + "kl_loss": 0.08676188439130783, + "loss_ib": 0.00400208868086338, + "step": 3068 + }, + { + "ce_ib": 4.984770774841309, + "ce_orig": 1.102685570716858, + "epoch": 0.8825940038823783, + "kl_loss": 0.12994639575481415, + "loss_ib": 0.006284234579652548, + "step": 3069 + }, + { + "ce_ib": 2.329989433288574, + "ce_orig": 0.7404146790504456, + "epoch": 0.8825940038823783, + "kl_loss": 0.13576126098632812, + "loss_ib": 0.003687602235004306, + "step": 3069 + }, + { + "ce_ib": 3.7095093727111816, + "ce_orig": 0.6195202469825745, + "epoch": 0.8825940038823783, + "kl_loss": 0.27855074405670166, + "loss_ib": 0.006495016627013683, + "step": 3069 + }, + { + "ce_ib": 2.7263472080230713, + "ce_orig": 0.6311208605766296, + "epoch": 0.8825940038823783, + "kl_loss": 0.19236406683921814, + "loss_ib": 0.004649987909942865, + "step": 3069 + }, + { + "epoch": 0.8828815874613559, + "grad_norm": 0.12904800474643707, + "learning_rate": 8.286018394089864e-06, + "loss": 0.8748, + "step": 3070 + }, + { + "ce_ib": 3.360929012298584, + "ce_orig": 0.7049216628074646, + "epoch": 0.8828815874613559, + "kl_loss": 0.1223522275686264, + "loss_ib": 0.0045844512060284615, + "step": 3070 + }, + { + "ce_ib": 6.476019859313965, + "ce_orig": 1.0920530557632446, + "epoch": 0.8828815874613559, + "kl_loss": 0.2219792753458023, + "loss_ib": 0.00869581289589405, + "step": 3070 + }, + { + "ce_ib": 4.1171345710754395, + "ce_orig": 0.8872969746589661, + "epoch": 0.8828815874613559, + "kl_loss": 0.22027911245822906, + "loss_ib": 0.006319925654679537, + "step": 3070 + }, + { + "ce_ib": 3.319334030151367, + "ce_orig": 0.5417797565460205, + "epoch": 0.8828815874613559, + "kl_loss": 0.1333342045545578, + "loss_ib": 0.004652675706893206, + "step": 3070 + }, + { + "ce_ib": 3.7458906173706055, + "ce_orig": 0.8377024531364441, + "epoch": 0.8831691710403335, + "kl_loss": 0.198823481798172, + "loss_ib": 0.005734125152230263, + "step": 3071 + }, + { + "ce_ib": 7.115640163421631, + "ce_orig": 1.3274205923080444, + "epoch": 0.8831691710403335, + "kl_loss": 0.16730117797851562, + "loss_ib": 0.008788651786744595, + "step": 3071 + }, + { + "ce_ib": 7.03788948059082, + "ce_orig": 1.3785840272903442, + "epoch": 0.8831691710403335, + "kl_loss": 0.3148314952850342, + "loss_ib": 0.010186204686760902, + "step": 3071 + }, + { + "ce_ib": 4.770383358001709, + "ce_orig": 1.2000609636306763, + "epoch": 0.8831691710403335, + "kl_loss": 0.16548897325992584, + "loss_ib": 0.006425273139029741, + "step": 3071 + }, + { + "ce_ib": 2.4295945167541504, + "ce_orig": 0.5674925446510315, + "epoch": 0.8834567546193113, + "kl_loss": 0.09844321012496948, + "loss_ib": 0.0034140264615416527, + "step": 3072 + }, + { + "ce_ib": 3.855354070663452, + "ce_orig": 0.8394067883491516, + "epoch": 0.8834567546193113, + "kl_loss": 0.24685588479042053, + "loss_ib": 0.006323913112282753, + "step": 3072 + }, + { + "ce_ib": 3.5977623462677, + "ce_orig": 1.0916414260864258, + "epoch": 0.8834567546193113, + "kl_loss": 0.27952736616134644, + "loss_ib": 0.0063930354081094265, + "step": 3072 + }, + { + "ce_ib": 3.602118968963623, + "ce_orig": 0.9053552150726318, + "epoch": 0.8834567546193113, + "kl_loss": 0.15335942804813385, + "loss_ib": 0.005135713145136833, + "step": 3072 + }, + { + "ce_ib": 2.016082763671875, + "ce_orig": 0.4706403911113739, + "epoch": 0.8837443381982889, + "kl_loss": 0.17800061404705048, + "loss_ib": 0.0037960889749228954, + "step": 3073 + }, + { + "ce_ib": 3.200401782989502, + "ce_orig": 0.5257211923599243, + "epoch": 0.8837443381982889, + "kl_loss": 0.2158670425415039, + "loss_ib": 0.005359072703868151, + "step": 3073 + }, + { + "ce_ib": 1.7003599405288696, + "ce_orig": 0.2983892858028412, + "epoch": 0.8837443381982889, + "kl_loss": 0.08623377233743668, + "loss_ib": 0.002562697743996978, + "step": 3073 + }, + { + "ce_ib": 2.707484006881714, + "ce_orig": 0.6165201663970947, + "epoch": 0.8837443381982889, + "kl_loss": 0.3974493741989136, + "loss_ib": 0.006681977305561304, + "step": 3073 + }, + { + "ce_ib": 6.680910110473633, + "ce_orig": 1.6155489683151245, + "epoch": 0.8840319217772665, + "kl_loss": 0.24467241764068604, + "loss_ib": 0.009127633646130562, + "step": 3074 + }, + { + "ce_ib": 5.185642719268799, + "ce_orig": 0.9525250792503357, + "epoch": 0.8840319217772665, + "kl_loss": 0.17506206035614014, + "loss_ib": 0.006936263293027878, + "step": 3074 + }, + { + "ce_ib": 2.5496037006378174, + "ce_orig": 0.7050709128379822, + "epoch": 0.8840319217772665, + "kl_loss": 0.10980841517448425, + "loss_ib": 0.0036476878449320793, + "step": 3074 + }, + { + "ce_ib": 4.497872829437256, + "ce_orig": 1.1747169494628906, + "epoch": 0.8840319217772665, + "kl_loss": 0.14082947373390198, + "loss_ib": 0.0059061674401164055, + "step": 3074 + }, + { + "epoch": 0.8843195053562442, + "grad_norm": 0.15176159143447876, + "learning_rate": 8.280164983247538e-06, + "loss": 0.8387, + "step": 3075 + }, + { + "ce_ib": 4.535062313079834, + "ce_orig": 0.764616847038269, + "epoch": 0.8843195053562442, + "kl_loss": 0.3106607496738434, + "loss_ib": 0.007641669362783432, + "step": 3075 + }, + { + "ce_ib": 2.6656851768493652, + "ce_orig": 0.6713985204696655, + "epoch": 0.8843195053562442, + "kl_loss": 0.20070026814937592, + "loss_ib": 0.004672687500715256, + "step": 3075 + }, + { + "ce_ib": 3.223310947418213, + "ce_orig": 0.9145636558532715, + "epoch": 0.8843195053562442, + "kl_loss": 0.15708431601524353, + "loss_ib": 0.004794153850525618, + "step": 3075 + }, + { + "ce_ib": 2.949373960494995, + "ce_orig": 0.6231632232666016, + "epoch": 0.8843195053562442, + "kl_loss": 0.16267427802085876, + "loss_ib": 0.0045761168003082275, + "step": 3075 + }, + { + "ce_ib": 4.430508136749268, + "ce_orig": 0.9652950763702393, + "epoch": 0.8846070889352218, + "kl_loss": 0.20514148473739624, + "loss_ib": 0.006481922697275877, + "step": 3076 + }, + { + "ce_ib": 4.545839309692383, + "ce_orig": 0.9516241550445557, + "epoch": 0.8846070889352218, + "kl_loss": 0.20764552056789398, + "loss_ib": 0.006622294429689646, + "step": 3076 + }, + { + "ce_ib": 5.686695098876953, + "ce_orig": 1.0516982078552246, + "epoch": 0.8846070889352218, + "kl_loss": 0.19339773058891296, + "loss_ib": 0.007620671764016151, + "step": 3076 + }, + { + "ce_ib": 4.726521968841553, + "ce_orig": 1.071621298789978, + "epoch": 0.8846070889352218, + "kl_loss": 0.20270103216171265, + "loss_ib": 0.006753532215952873, + "step": 3076 + }, + { + "ce_ib": 5.816036224365234, + "ce_orig": 1.1984522342681885, + "epoch": 0.8848946725141994, + "kl_loss": 0.14901722967624664, + "loss_ib": 0.007306208368390799, + "step": 3077 + }, + { + "ce_ib": 5.019110679626465, + "ce_orig": 0.8168647885322571, + "epoch": 0.8848946725141994, + "kl_loss": 0.19176605343818665, + "loss_ib": 0.006936771795153618, + "step": 3077 + }, + { + "ce_ib": 6.412586688995361, + "ce_orig": 1.270485281944275, + "epoch": 0.8848946725141994, + "kl_loss": 0.19644244015216827, + "loss_ib": 0.008377010934054852, + "step": 3077 + }, + { + "ce_ib": 4.839014530181885, + "ce_orig": 1.2454934120178223, + "epoch": 0.8848946725141994, + "kl_loss": 0.21265417337417603, + "loss_ib": 0.0069655561819672585, + "step": 3077 + }, + { + "ce_ib": 3.9767653942108154, + "ce_orig": 0.5159098505973816, + "epoch": 0.8851822560931771, + "kl_loss": 0.2571735978126526, + "loss_ib": 0.006548501085489988, + "step": 3078 + }, + { + "ce_ib": 4.852346897125244, + "ce_orig": 0.9101503491401672, + "epoch": 0.8851822560931771, + "kl_loss": 0.3004709780216217, + "loss_ib": 0.007857056334614754, + "step": 3078 + }, + { + "ce_ib": 3.1333205699920654, + "ce_orig": 0.5069099068641663, + "epoch": 0.8851822560931771, + "kl_loss": 0.21746285259723663, + "loss_ib": 0.005307949148118496, + "step": 3078 + }, + { + "ce_ib": 2.6595354080200195, + "ce_orig": 0.6484982371330261, + "epoch": 0.8851822560931771, + "kl_loss": 0.10532091557979584, + "loss_ib": 0.0037127446848899126, + "step": 3078 + }, + { + "ce_ib": 4.613312721252441, + "ce_orig": 0.8996244072914124, + "epoch": 0.8854698396721548, + "kl_loss": 0.11338866502046585, + "loss_ib": 0.005747199524194002, + "step": 3079 + }, + { + "ce_ib": 3.7050395011901855, + "ce_orig": 0.8916770815849304, + "epoch": 0.8854698396721548, + "kl_loss": 0.21373462677001953, + "loss_ib": 0.0058423858135938644, + "step": 3079 + }, + { + "ce_ib": 5.291779518127441, + "ce_orig": 1.3640319108963013, + "epoch": 0.8854698396721548, + "kl_loss": 0.19368988275527954, + "loss_ib": 0.0072286780923604965, + "step": 3079 + }, + { + "ce_ib": 4.431234359741211, + "ce_orig": 0.7684363722801208, + "epoch": 0.8854698396721548, + "kl_loss": 0.23420697450637817, + "loss_ib": 0.0067733037285506725, + "step": 3079 + }, + { + "epoch": 0.8857574232511324, + "grad_norm": 0.13807165622711182, + "learning_rate": 8.274303669726427e-06, + "loss": 0.9344, + "step": 3080 + }, + { + "ce_ib": 4.338321208953857, + "ce_orig": 0.6835975050926208, + "epoch": 0.8857574232511324, + "kl_loss": 0.2666195034980774, + "loss_ib": 0.007004516199231148, + "step": 3080 + }, + { + "ce_ib": 3.3368334770202637, + "ce_orig": 0.5998972058296204, + "epoch": 0.8857574232511324, + "kl_loss": 0.19078823924064636, + "loss_ib": 0.005244715604931116, + "step": 3080 + }, + { + "ce_ib": 5.814561367034912, + "ce_orig": 1.4643666744232178, + "epoch": 0.8857574232511324, + "kl_loss": 0.12079041451215744, + "loss_ib": 0.007022465113550425, + "step": 3080 + }, + { + "ce_ib": 2.212169647216797, + "ce_orig": 0.690682590007782, + "epoch": 0.8857574232511324, + "kl_loss": 0.19206100702285767, + "loss_ib": 0.004132779780775309, + "step": 3080 + }, + { + "ce_ib": 3.3354408740997314, + "ce_orig": 0.8451545238494873, + "epoch": 0.88604500683011, + "kl_loss": 0.1485080122947693, + "loss_ib": 0.004820520989596844, + "step": 3081 + }, + { + "ce_ib": 4.870260238647461, + "ce_orig": 1.4757972955703735, + "epoch": 0.88604500683011, + "kl_loss": 0.20187990367412567, + "loss_ib": 0.006889059208333492, + "step": 3081 + }, + { + "ce_ib": 3.3677244186401367, + "ce_orig": 0.6357197165489197, + "epoch": 0.88604500683011, + "kl_loss": 0.21895340085029602, + "loss_ib": 0.0055572581477463245, + "step": 3081 + }, + { + "ce_ib": 5.365149974822998, + "ce_orig": 1.4832841157913208, + "epoch": 0.88604500683011, + "kl_loss": 0.21524447202682495, + "loss_ib": 0.007517594378441572, + "step": 3081 + }, + { + "ce_ib": 4.768296718597412, + "ce_orig": 1.0655986070632935, + "epoch": 0.8863325904090876, + "kl_loss": 0.16927467286586761, + "loss_ib": 0.006461043376475573, + "step": 3082 + }, + { + "ce_ib": 1.7738676071166992, + "ce_orig": 0.26091521978378296, + "epoch": 0.8863325904090876, + "kl_loss": 0.24931061267852783, + "loss_ib": 0.004266973584890366, + "step": 3082 + }, + { + "ce_ib": 6.045033931732178, + "ce_orig": 0.9139532446861267, + "epoch": 0.8863325904090876, + "kl_loss": 0.1619824469089508, + "loss_ib": 0.0076648578979074955, + "step": 3082 + }, + { + "ce_ib": 4.51046085357666, + "ce_orig": 0.9852688908576965, + "epoch": 0.8863325904090876, + "kl_loss": 0.15117862820625305, + "loss_ib": 0.006022247020155191, + "step": 3082 + }, + { + "ce_ib": 3.3503830432891846, + "ce_orig": 0.7593944072723389, + "epoch": 0.8866201739880653, + "kl_loss": 0.13240444660186768, + "loss_ib": 0.0046744272112846375, + "step": 3083 + }, + { + "ce_ib": 3.532987117767334, + "ce_orig": 0.761421263217926, + "epoch": 0.8866201739880653, + "kl_loss": 0.15348142385482788, + "loss_ib": 0.00506780156865716, + "step": 3083 + }, + { + "ce_ib": 3.3112778663635254, + "ce_orig": 0.7769888639450073, + "epoch": 0.8866201739880653, + "kl_loss": 0.1210353821516037, + "loss_ib": 0.004521631635725498, + "step": 3083 + }, + { + "ce_ib": 3.1191651821136475, + "ce_orig": 0.8053016066551208, + "epoch": 0.8866201739880653, + "kl_loss": 0.13703714311122894, + "loss_ib": 0.004489536862820387, + "step": 3083 + }, + { + "ce_ib": 3.4403834342956543, + "ce_orig": 0.8176072835922241, + "epoch": 0.8869077575670429, + "kl_loss": 0.23449639976024628, + "loss_ib": 0.0057853469625115395, + "step": 3084 + }, + { + "ce_ib": 2.108762502670288, + "ce_orig": 0.5024569034576416, + "epoch": 0.8869077575670429, + "kl_loss": 0.09066268056631088, + "loss_ib": 0.003015389433130622, + "step": 3084 + }, + { + "ce_ib": 2.715806484222412, + "ce_orig": 0.5141053199768066, + "epoch": 0.8869077575670429, + "kl_loss": 0.24227169156074524, + "loss_ib": 0.005138523411005735, + "step": 3084 + }, + { + "ce_ib": 4.007493495941162, + "ce_orig": 0.8825364708900452, + "epoch": 0.8869077575670429, + "kl_loss": 0.2419431358575821, + "loss_ib": 0.006426924839615822, + "step": 3084 + }, + { + "epoch": 0.8871953411460206, + "grad_norm": 0.1550760120153427, + "learning_rate": 8.268434467647793e-06, + "loss": 0.8906, + "step": 3085 + }, + { + "ce_ib": 4.605878829956055, + "ce_orig": 0.9871290326118469, + "epoch": 0.8871953411460206, + "kl_loss": 0.2080526053905487, + "loss_ib": 0.006686404813081026, + "step": 3085 + }, + { + "ce_ib": 4.952621936798096, + "ce_orig": 1.0212136507034302, + "epoch": 0.8871953411460206, + "kl_loss": 0.17193788290023804, + "loss_ib": 0.006672000512480736, + "step": 3085 + }, + { + "ce_ib": 5.273582458496094, + "ce_orig": 0.8727911710739136, + "epoch": 0.8871953411460206, + "kl_loss": 0.1888829916715622, + "loss_ib": 0.007162412162870169, + "step": 3085 + }, + { + "ce_ib": 4.661823749542236, + "ce_orig": 1.0504554510116577, + "epoch": 0.8871953411460206, + "kl_loss": 0.2121097892522812, + "loss_ib": 0.006782921496778727, + "step": 3085 + }, + { + "ce_ib": 2.725644826889038, + "ce_orig": 0.7153981924057007, + "epoch": 0.8874829247249982, + "kl_loss": 0.14276044070720673, + "loss_ib": 0.004153249319642782, + "step": 3086 + }, + { + "ce_ib": 3.6039278507232666, + "ce_orig": 0.7025612592697144, + "epoch": 0.8874829247249982, + "kl_loss": 0.18722836673259735, + "loss_ib": 0.0054762111976742744, + "step": 3086 + }, + { + "ce_ib": 3.1239755153656006, + "ce_orig": 0.7350562810897827, + "epoch": 0.8874829247249982, + "kl_loss": 0.14566752314567566, + "loss_ib": 0.004580650944262743, + "step": 3086 + }, + { + "ce_ib": 4.7595391273498535, + "ce_orig": 1.2551275491714478, + "epoch": 0.8874829247249982, + "kl_loss": 0.11130133271217346, + "loss_ib": 0.005872552283108234, + "step": 3086 + }, + { + "ce_ib": 4.523993015289307, + "ce_orig": 0.784727156162262, + "epoch": 0.8877705083039759, + "kl_loss": 0.16700120270252228, + "loss_ib": 0.006194004788994789, + "step": 3087 + }, + { + "ce_ib": 6.151092529296875, + "ce_orig": 0.9387657046318054, + "epoch": 0.8877705083039759, + "kl_loss": 0.17329981923103333, + "loss_ib": 0.007884090766310692, + "step": 3087 + }, + { + "ce_ib": 3.0421762466430664, + "ce_orig": 0.7652984857559204, + "epoch": 0.8877705083039759, + "kl_loss": 0.11124810576438904, + "loss_ib": 0.004154657479375601, + "step": 3087 + }, + { + "ce_ib": 2.5970237255096436, + "ce_orig": 0.5761621594429016, + "epoch": 0.8877705083039759, + "kl_loss": 0.2838428318500519, + "loss_ib": 0.005435452330857515, + "step": 3087 + }, + { + "ce_ib": 4.183699131011963, + "ce_orig": 0.868650496006012, + "epoch": 0.8880580918829535, + "kl_loss": 0.14227671921253204, + "loss_ib": 0.005606466438621283, + "step": 3088 + }, + { + "ce_ib": 4.438241481781006, + "ce_orig": 1.1313414573669434, + "epoch": 0.8880580918829535, + "kl_loss": 0.16181080043315887, + "loss_ib": 0.006056349258869886, + "step": 3088 + }, + { + "ce_ib": 4.243907451629639, + "ce_orig": 0.9602914452552795, + "epoch": 0.8880580918829535, + "kl_loss": 0.1408255249261856, + "loss_ib": 0.005652162712067366, + "step": 3088 + }, + { + "ce_ib": 4.726057052612305, + "ce_orig": 1.1026653051376343, + "epoch": 0.8880580918829535, + "kl_loss": 0.1907709836959839, + "loss_ib": 0.0066337669268250465, + "step": 3088 + }, + { + "ce_ib": 3.0958852767944336, + "ce_orig": 0.8226089477539062, + "epoch": 0.8883456754619311, + "kl_loss": 0.14688819646835327, + "loss_ib": 0.004564767237752676, + "step": 3089 + }, + { + "ce_ib": 2.7387607097625732, + "ce_orig": 0.5583288073539734, + "epoch": 0.8883456754619311, + "kl_loss": 0.09383485466241837, + "loss_ib": 0.0036771090235561132, + "step": 3089 + }, + { + "ce_ib": 2.949733018875122, + "ce_orig": 0.6151276230812073, + "epoch": 0.8883456754619311, + "kl_loss": 0.37819188833236694, + "loss_ib": 0.006731651723384857, + "step": 3089 + }, + { + "ce_ib": 2.407611846923828, + "ce_orig": 0.5239338278770447, + "epoch": 0.8883456754619311, + "kl_loss": 0.2176307737827301, + "loss_ib": 0.004583919420838356, + "step": 3089 + }, + { + "epoch": 0.8886332590409087, + "grad_norm": 0.14267465472221375, + "learning_rate": 8.262557391151904e-06, + "loss": 0.8551, + "step": 3090 + }, + { + "ce_ib": 2.9020845890045166, + "ce_orig": 0.7984545230865479, + "epoch": 0.8886332590409087, + "kl_loss": 0.20462962985038757, + "loss_ib": 0.004948380868881941, + "step": 3090 + }, + { + "ce_ib": 5.201047897338867, + "ce_orig": 1.028778076171875, + "epoch": 0.8886332590409087, + "kl_loss": 0.1906316727399826, + "loss_ib": 0.007107364945113659, + "step": 3090 + }, + { + "ce_ib": 4.175278186798096, + "ce_orig": 0.7040855884552002, + "epoch": 0.8886332590409087, + "kl_loss": 0.3909037113189697, + "loss_ib": 0.008084314875304699, + "step": 3090 + }, + { + "ce_ib": 4.757804870605469, + "ce_orig": 0.7530616521835327, + "epoch": 0.8886332590409087, + "kl_loss": 0.19657528400421143, + "loss_ib": 0.006723557598888874, + "step": 3090 + }, + { + "ce_ib": 5.039158821105957, + "ce_orig": 1.2365496158599854, + "epoch": 0.8889208426198864, + "kl_loss": 0.1495305597782135, + "loss_ib": 0.006534464191645384, + "step": 3091 + }, + { + "ce_ib": 2.5162713527679443, + "ce_orig": 0.7866913080215454, + "epoch": 0.8889208426198864, + "kl_loss": 0.10511822998523712, + "loss_ib": 0.0035674534738063812, + "step": 3091 + }, + { + "ce_ib": 4.255115509033203, + "ce_orig": 0.9234839081764221, + "epoch": 0.8889208426198864, + "kl_loss": 0.13177117705345154, + "loss_ib": 0.005572827532887459, + "step": 3091 + }, + { + "ce_ib": 3.2626843452453613, + "ce_orig": 0.9934234023094177, + "epoch": 0.8889208426198864, + "kl_loss": 0.09166992455720901, + "loss_ib": 0.0041793836280703545, + "step": 3091 + }, + { + "ce_ib": 5.400909900665283, + "ce_orig": 1.177150845527649, + "epoch": 0.8892084261988641, + "kl_loss": 0.13706953823566437, + "loss_ib": 0.006771604996174574, + "step": 3092 + }, + { + "ce_ib": 2.996107816696167, + "ce_orig": 0.537158727645874, + "epoch": 0.8892084261988641, + "kl_loss": 0.15069852769374847, + "loss_ib": 0.00450309319421649, + "step": 3092 + }, + { + "ce_ib": 3.0097062587738037, + "ce_orig": 0.6442214250564575, + "epoch": 0.8892084261988641, + "kl_loss": 0.14520230889320374, + "loss_ib": 0.004461729433387518, + "step": 3092 + }, + { + "ce_ib": 5.491713523864746, + "ce_orig": 1.1160171031951904, + "epoch": 0.8892084261988641, + "kl_loss": 0.14572402834892273, + "loss_ib": 0.006948953494429588, + "step": 3092 + }, + { + "ce_ib": 5.116119861602783, + "ce_orig": 1.3369945287704468, + "epoch": 0.8894960097778417, + "kl_loss": 0.18969155848026276, + "loss_ib": 0.007013035472482443, + "step": 3093 + }, + { + "ce_ib": 3.283036470413208, + "ce_orig": 0.7648364901542664, + "epoch": 0.8894960097778417, + "kl_loss": 0.13788864016532898, + "loss_ib": 0.004661922808736563, + "step": 3093 + }, + { + "ce_ib": 2.349837064743042, + "ce_orig": 0.6380730867385864, + "epoch": 0.8894960097778417, + "kl_loss": 0.14782792329788208, + "loss_ib": 0.003828116226941347, + "step": 3093 + }, + { + "ce_ib": 2.9619250297546387, + "ce_orig": 0.5749934911727905, + "epoch": 0.8894960097778417, + "kl_loss": 0.16786880791187286, + "loss_ib": 0.004640613216906786, + "step": 3093 + }, + { + "ce_ib": 3.471174955368042, + "ce_orig": 0.6754353642463684, + "epoch": 0.8897835933568193, + "kl_loss": 0.12338775396347046, + "loss_ib": 0.0047050523571670055, + "step": 3094 + }, + { + "ce_ib": 1.770673155784607, + "ce_orig": 0.46942323446273804, + "epoch": 0.8897835933568193, + "kl_loss": 0.11101822555065155, + "loss_ib": 0.0028808554634451866, + "step": 3094 + }, + { + "ce_ib": 3.069263458251953, + "ce_orig": 0.6611189842224121, + "epoch": 0.8897835933568193, + "kl_loss": 0.173280730843544, + "loss_ib": 0.0048020705580711365, + "step": 3094 + }, + { + "ce_ib": 3.1257896423339844, + "ce_orig": 0.9125049710273743, + "epoch": 0.8897835933568193, + "kl_loss": 0.1386227160692215, + "loss_ib": 0.004512016661465168, + "step": 3094 + }, + { + "epoch": 0.890071176935797, + "grad_norm": 0.13136667013168335, + "learning_rate": 8.256672454398008e-06, + "loss": 0.8818, + "step": 3095 + }, + { + "ce_ib": 3.6568267345428467, + "ce_orig": 0.7233477830886841, + "epoch": 0.890071176935797, + "kl_loss": 0.19779348373413086, + "loss_ib": 0.00563476188108325, + "step": 3095 + }, + { + "ce_ib": 4.749492168426514, + "ce_orig": 1.0913872718811035, + "epoch": 0.890071176935797, + "kl_loss": 0.25381627678871155, + "loss_ib": 0.007287654560059309, + "step": 3095 + }, + { + "ce_ib": 2.752319097518921, + "ce_orig": 0.5719608664512634, + "epoch": 0.890071176935797, + "kl_loss": 0.18782395124435425, + "loss_ib": 0.004630558658391237, + "step": 3095 + }, + { + "ce_ib": 7.980030536651611, + "ce_orig": 1.4149949550628662, + "epoch": 0.890071176935797, + "kl_loss": 0.1778389811515808, + "loss_ib": 0.009758420288562775, + "step": 3095 + }, + { + "ce_ib": 2.1935806274414062, + "ce_orig": 0.5157347917556763, + "epoch": 0.8903587605147746, + "kl_loss": 0.18753363192081451, + "loss_ib": 0.004068917129188776, + "step": 3096 + }, + { + "ce_ib": 2.8331174850463867, + "ce_orig": 0.6525582075119019, + "epoch": 0.8903587605147746, + "kl_loss": 0.14246660470962524, + "loss_ib": 0.00425778329372406, + "step": 3096 + }, + { + "ce_ib": 3.686743974685669, + "ce_orig": 0.6509453058242798, + "epoch": 0.8903587605147746, + "kl_loss": 0.17239689826965332, + "loss_ib": 0.00541071267798543, + "step": 3096 + }, + { + "ce_ib": 2.51916241645813, + "ce_orig": 0.34357577562332153, + "epoch": 0.8903587605147746, + "kl_loss": 0.15531475841999054, + "loss_ib": 0.0040723104029893875, + "step": 3096 + }, + { + "ce_ib": 5.622654438018799, + "ce_orig": 1.266108751296997, + "epoch": 0.8906463440937522, + "kl_loss": 0.15450066328048706, + "loss_ib": 0.0071676610969007015, + "step": 3097 + }, + { + "ce_ib": 2.417294979095459, + "ce_orig": 0.5796713829040527, + "epoch": 0.8906463440937522, + "kl_loss": 0.12580905854701996, + "loss_ib": 0.0036753856111317873, + "step": 3097 + }, + { + "ce_ib": 1.6475956439971924, + "ce_orig": 0.291083961725235, + "epoch": 0.8906463440937522, + "kl_loss": 0.3437381684780121, + "loss_ib": 0.0050849770195782185, + "step": 3097 + }, + { + "ce_ib": 1.5092206001281738, + "ce_orig": 0.44675758481025696, + "epoch": 0.8906463440937522, + "kl_loss": 0.10672518610954285, + "loss_ib": 0.002576472470536828, + "step": 3097 + }, + { + "ce_ib": 3.1002862453460693, + "ce_orig": 0.7281441688537598, + "epoch": 0.8909339276727299, + "kl_loss": 0.38414910435676575, + "loss_ib": 0.006941777188330889, + "step": 3098 + }, + { + "ce_ib": 2.1353094577789307, + "ce_orig": 0.6358004212379456, + "epoch": 0.8909339276727299, + "kl_loss": 0.1170094907283783, + "loss_ib": 0.0033054042141884565, + "step": 3098 + }, + { + "ce_ib": 3.071075201034546, + "ce_orig": 0.4042377769947052, + "epoch": 0.8909339276727299, + "kl_loss": 0.19243475794792175, + "loss_ib": 0.004995422903448343, + "step": 3098 + }, + { + "ce_ib": 5.273190498352051, + "ce_orig": 1.1149479150772095, + "epoch": 0.8909339276727299, + "kl_loss": 0.18822598457336426, + "loss_ib": 0.007155450526624918, + "step": 3098 + }, + { + "ce_ib": 6.8703155517578125, + "ce_orig": 1.8975019454956055, + "epoch": 0.8912215112517076, + "kl_loss": 0.2508567273616791, + "loss_ib": 0.009378882125020027, + "step": 3099 + }, + { + "ce_ib": 5.823940753936768, + "ce_orig": 1.190085768699646, + "epoch": 0.8912215112517076, + "kl_loss": 0.21669773757457733, + "loss_ib": 0.00799091812223196, + "step": 3099 + }, + { + "ce_ib": 2.8537404537200928, + "ce_orig": 0.3235244154930115, + "epoch": 0.8912215112517076, + "kl_loss": 0.20920301973819733, + "loss_ib": 0.004945770371705294, + "step": 3099 + }, + { + "ce_ib": 1.9009647369384766, + "ce_orig": 0.4865524172782898, + "epoch": 0.8912215112517076, + "kl_loss": 0.18332403898239136, + "loss_ib": 0.0037342053838074207, + "step": 3099 + }, + { + "epoch": 0.8915090948306852, + "grad_norm": 0.1286255419254303, + "learning_rate": 8.250779671564277e-06, + "loss": 0.7964, + "step": 3100 + }, + { + "ce_ib": 2.9008967876434326, + "ce_orig": 0.4400394558906555, + "epoch": 0.8915090948306852, + "kl_loss": 0.19166168570518494, + "loss_ib": 0.00481751374900341, + "step": 3100 + }, + { + "ce_ib": 2.7978878021240234, + "ce_orig": 0.8827282786369324, + "epoch": 0.8915090948306852, + "kl_loss": 0.10108354687690735, + "loss_ib": 0.003808723296970129, + "step": 3100 + }, + { + "ce_ib": 3.6101016998291016, + "ce_orig": 0.5083693265914917, + "epoch": 0.8915090948306852, + "kl_loss": 0.11145010590553284, + "loss_ib": 0.004724602680653334, + "step": 3100 + }, + { + "ce_ib": 2.4380686283111572, + "ce_orig": 0.7589706182479858, + "epoch": 0.8915090948306852, + "kl_loss": 0.16641366481781006, + "loss_ib": 0.004102205391973257, + "step": 3100 + }, + { + "ce_ib": 3.174034595489502, + "ce_orig": 0.7147536277770996, + "epoch": 0.8917966784096628, + "kl_loss": 0.12305164337158203, + "loss_ib": 0.004404550883919001, + "step": 3101 + }, + { + "ce_ib": 2.950622081756592, + "ce_orig": 0.7160993218421936, + "epoch": 0.8917966784096628, + "kl_loss": 0.10902819782495499, + "loss_ib": 0.004040903877466917, + "step": 3101 + }, + { + "ce_ib": 3.538774251937866, + "ce_orig": 0.8319281935691833, + "epoch": 0.8917966784096628, + "kl_loss": 0.19172807037830353, + "loss_ib": 0.005456054583191872, + "step": 3101 + }, + { + "ce_ib": 3.7571165561676025, + "ce_orig": 0.8096185922622681, + "epoch": 0.8917966784096628, + "kl_loss": 0.12147871404886246, + "loss_ib": 0.004971903283149004, + "step": 3101 + }, + { + "ce_ib": 3.9453351497650146, + "ce_orig": 0.7617383599281311, + "epoch": 0.8920842619886404, + "kl_loss": 0.2089398056268692, + "loss_ib": 0.006034732796251774, + "step": 3102 + }, + { + "ce_ib": 5.245067119598389, + "ce_orig": 1.2969458103179932, + "epoch": 0.8920842619886404, + "kl_loss": 0.18399310111999512, + "loss_ib": 0.007084998302161694, + "step": 3102 + }, + { + "ce_ib": 3.8839986324310303, + "ce_orig": 0.7695393562316895, + "epoch": 0.8920842619886404, + "kl_loss": 0.16984878480434418, + "loss_ib": 0.005582486279308796, + "step": 3102 + }, + { + "ce_ib": 4.059947967529297, + "ce_orig": 0.7965952157974243, + "epoch": 0.8920842619886404, + "kl_loss": 0.25123584270477295, + "loss_ib": 0.006572306156158447, + "step": 3102 + }, + { + "ce_ib": 2.498436450958252, + "ce_orig": 0.5058416724205017, + "epoch": 0.8923718455676181, + "kl_loss": 0.2805415391921997, + "loss_ib": 0.005303851794451475, + "step": 3103 + }, + { + "ce_ib": 3.355717658996582, + "ce_orig": 0.7554253339767456, + "epoch": 0.8923718455676181, + "kl_loss": 0.2145039588212967, + "loss_ib": 0.00550075713545084, + "step": 3103 + }, + { + "ce_ib": 2.9880754947662354, + "ce_orig": 0.4339578151702881, + "epoch": 0.8923718455676181, + "kl_loss": 0.08154422789812088, + "loss_ib": 0.003803517669439316, + "step": 3103 + }, + { + "ce_ib": 5.9173808097839355, + "ce_orig": 1.3245134353637695, + "epoch": 0.8923718455676181, + "kl_loss": 0.21591177582740784, + "loss_ib": 0.008076498284935951, + "step": 3103 + }, + { + "ce_ib": 2.226825714111328, + "ce_orig": 0.5399856567382812, + "epoch": 0.8926594291465957, + "kl_loss": 0.1364595592021942, + "loss_ib": 0.0035914210602641106, + "step": 3104 + }, + { + "ce_ib": 4.401676177978516, + "ce_orig": 0.8910858035087585, + "epoch": 0.8926594291465957, + "kl_loss": 0.16982391476631165, + "loss_ib": 0.006099915597587824, + "step": 3104 + }, + { + "ce_ib": 4.015680313110352, + "ce_orig": 0.9455913305282593, + "epoch": 0.8926594291465957, + "kl_loss": 0.21420812606811523, + "loss_ib": 0.0061577619053423405, + "step": 3104 + }, + { + "ce_ib": 5.7855072021484375, + "ce_orig": 1.229187250137329, + "epoch": 0.8926594291465957, + "kl_loss": 0.206244096159935, + "loss_ib": 0.007847948931157589, + "step": 3104 + }, + { + "epoch": 0.8929470127255734, + "grad_norm": 0.1565793752670288, + "learning_rate": 8.244879056847795e-06, + "loss": 0.796, + "step": 3105 + }, + { + "ce_ib": 2.6370437145233154, + "ce_orig": 0.6637303829193115, + "epoch": 0.8929470127255734, + "kl_loss": 0.1480325311422348, + "loss_ib": 0.004117369186133146, + "step": 3105 + }, + { + "ce_ib": 3.273104429244995, + "ce_orig": 0.9297803640365601, + "epoch": 0.8929470127255734, + "kl_loss": 0.11711890995502472, + "loss_ib": 0.004444293212145567, + "step": 3105 + }, + { + "ce_ib": 2.952204942703247, + "ce_orig": 0.7365701794624329, + "epoch": 0.8929470127255734, + "kl_loss": 0.15222002565860748, + "loss_ib": 0.004474404733628035, + "step": 3105 + }, + { + "ce_ib": 3.3860251903533936, + "ce_orig": 0.7415664196014404, + "epoch": 0.8929470127255734, + "kl_loss": 0.46801453828811646, + "loss_ib": 0.008066169917583466, + "step": 3105 + }, + { + "ce_ib": 1.7011414766311646, + "ce_orig": 0.5008966326713562, + "epoch": 0.893234596304551, + "kl_loss": 0.2590595483779907, + "loss_ib": 0.004291736986488104, + "step": 3106 + }, + { + "ce_ib": 8.371695518493652, + "ce_orig": 0.9549317955970764, + "epoch": 0.893234596304551, + "kl_loss": 0.14085331559181213, + "loss_ib": 0.009780229069292545, + "step": 3106 + }, + { + "ce_ib": 7.334364891052246, + "ce_orig": 1.8049062490463257, + "epoch": 0.893234596304551, + "kl_loss": 0.25361931324005127, + "loss_ib": 0.0098705580458045, + "step": 3106 + }, + { + "ce_ib": 2.676165819168091, + "ce_orig": 0.4457092583179474, + "epoch": 0.893234596304551, + "kl_loss": 0.2109290510416031, + "loss_ib": 0.004785456694662571, + "step": 3106 + }, + { + "ce_ib": 3.790651798248291, + "ce_orig": 0.8075549006462097, + "epoch": 0.8935221798835287, + "kl_loss": 0.25837188959121704, + "loss_ib": 0.006374370772391558, + "step": 3107 + }, + { + "ce_ib": 3.6731419563293457, + "ce_orig": 0.5870965123176575, + "epoch": 0.8935221798835287, + "kl_loss": 0.28679656982421875, + "loss_ib": 0.006541107781231403, + "step": 3107 + }, + { + "ce_ib": 3.0405051708221436, + "ce_orig": 0.729755163192749, + "epoch": 0.8935221798835287, + "kl_loss": 0.2554026246070862, + "loss_ib": 0.005594531074166298, + "step": 3107 + }, + { + "ce_ib": 4.960191249847412, + "ce_orig": 1.344190239906311, + "epoch": 0.8935221798835287, + "kl_loss": 0.16449442505836487, + "loss_ib": 0.006605135276913643, + "step": 3107 + }, + { + "ce_ib": 3.2105085849761963, + "ce_orig": 0.4656553864479065, + "epoch": 0.8938097634625063, + "kl_loss": 0.2154984027147293, + "loss_ib": 0.0053654927760362625, + "step": 3108 + }, + { + "ce_ib": 1.8681886196136475, + "ce_orig": 0.20042641460895538, + "epoch": 0.8938097634625063, + "kl_loss": 0.19211459159851074, + "loss_ib": 0.0037893345579504967, + "step": 3108 + }, + { + "ce_ib": 3.93146014213562, + "ce_orig": 0.7625020146369934, + "epoch": 0.8938097634625063, + "kl_loss": 0.20217503607273102, + "loss_ib": 0.0059532104060053825, + "step": 3108 + }, + { + "ce_ib": 4.259249687194824, + "ce_orig": 0.9421066641807556, + "epoch": 0.8938097634625063, + "kl_loss": 0.20511692762374878, + "loss_ib": 0.006310418713837862, + "step": 3108 + }, + { + "ce_ib": 4.327896595001221, + "ce_orig": 0.7654626369476318, + "epoch": 0.8940973470414839, + "kl_loss": 0.239793062210083, + "loss_ib": 0.006725827232003212, + "step": 3109 + }, + { + "ce_ib": 6.9924139976501465, + "ce_orig": 1.467161774635315, + "epoch": 0.8940973470414839, + "kl_loss": 0.2230253368616104, + "loss_ib": 0.0092226667329669, + "step": 3109 + }, + { + "ce_ib": 2.184725046157837, + "ce_orig": 0.7018915414810181, + "epoch": 0.8940973470414839, + "kl_loss": 0.10926135629415512, + "loss_ib": 0.0032773385755717754, + "step": 3109 + }, + { + "ce_ib": 7.436008930206299, + "ce_orig": 1.9167146682739258, + "epoch": 0.8940973470414839, + "kl_loss": 0.38748612999916077, + "loss_ib": 0.011310870759189129, + "step": 3109 + }, + { + "epoch": 0.8943849306204615, + "grad_norm": 0.14232924580574036, + "learning_rate": 8.238970624464513e-06, + "loss": 0.8134, + "step": 3110 + }, + { + "ce_ib": 3.492448329925537, + "ce_orig": 0.7338119745254517, + "epoch": 0.8943849306204615, + "kl_loss": 0.17633527517318726, + "loss_ib": 0.005255801137536764, + "step": 3110 + }, + { + "ce_ib": 5.673172950744629, + "ce_orig": 1.1973505020141602, + "epoch": 0.8943849306204615, + "kl_loss": 0.2729659378528595, + "loss_ib": 0.00840283278375864, + "step": 3110 + }, + { + "ce_ib": 2.2961483001708984, + "ce_orig": 0.5676378011703491, + "epoch": 0.8943849306204615, + "kl_loss": 0.32829755544662476, + "loss_ib": 0.005579123739153147, + "step": 3110 + }, + { + "ce_ib": 5.755520343780518, + "ce_orig": 1.2145345211029053, + "epoch": 0.8943849306204615, + "kl_loss": 0.3456052541732788, + "loss_ib": 0.00921157281845808, + "step": 3110 + }, + { + "ce_ib": 2.902986526489258, + "ce_orig": 0.5643954873085022, + "epoch": 0.8946725141994392, + "kl_loss": 0.1688193678855896, + "loss_ib": 0.004591180477291346, + "step": 3111 + }, + { + "ce_ib": 2.7501296997070312, + "ce_orig": 0.6646100282669067, + "epoch": 0.8946725141994392, + "kl_loss": 0.16075585782527924, + "loss_ib": 0.004357688128948212, + "step": 3111 + }, + { + "ce_ib": 3.049804449081421, + "ce_orig": 0.6853830814361572, + "epoch": 0.8946725141994392, + "kl_loss": 0.16282302141189575, + "loss_ib": 0.0046780346892774105, + "step": 3111 + }, + { + "ce_ib": 4.916747093200684, + "ce_orig": 1.1096487045288086, + "epoch": 0.8946725141994392, + "kl_loss": 0.23516105115413666, + "loss_ib": 0.007268357556313276, + "step": 3111 + }, + { + "ce_ib": 4.852289199829102, + "ce_orig": 1.1359295845031738, + "epoch": 0.8949600977784169, + "kl_loss": 0.18279314041137695, + "loss_ib": 0.006680220365524292, + "step": 3112 + }, + { + "ce_ib": 5.102050304412842, + "ce_orig": 1.2899397611618042, + "epoch": 0.8949600977784169, + "kl_loss": 0.197246253490448, + "loss_ib": 0.007074512541294098, + "step": 3112 + }, + { + "ce_ib": 3.3909833431243896, + "ce_orig": 1.191597819328308, + "epoch": 0.8949600977784169, + "kl_loss": 0.1347818523645401, + "loss_ib": 0.004738801624625921, + "step": 3112 + }, + { + "ce_ib": 4.479989528656006, + "ce_orig": 0.9429420828819275, + "epoch": 0.8949600977784169, + "kl_loss": 0.26518866419792175, + "loss_ib": 0.007131875958293676, + "step": 3112 + }, + { + "ce_ib": 6.626401424407959, + "ce_orig": 1.1166218519210815, + "epoch": 0.8952476813573945, + "kl_loss": 0.2301233410835266, + "loss_ib": 0.008927634917199612, + "step": 3113 + }, + { + "ce_ib": 3.6317522525787354, + "ce_orig": 0.4517545700073242, + "epoch": 0.8952476813573945, + "kl_loss": 0.18546707928180695, + "loss_ib": 0.005486423149704933, + "step": 3113 + }, + { + "ce_ib": 4.603742599487305, + "ce_orig": 0.6014341711997986, + "epoch": 0.8952476813573945, + "kl_loss": 0.3739631175994873, + "loss_ib": 0.00834337342530489, + "step": 3113 + }, + { + "ce_ib": 3.061030864715576, + "ce_orig": 0.6591997742652893, + "epoch": 0.8952476813573945, + "kl_loss": 0.16396206617355347, + "loss_ib": 0.00470065139234066, + "step": 3113 + }, + { + "ce_ib": 3.9802205562591553, + "ce_orig": 1.0077344179153442, + "epoch": 0.8955352649363721, + "kl_loss": 0.2607388496398926, + "loss_ib": 0.006587608717381954, + "step": 3114 + }, + { + "ce_ib": 4.416682720184326, + "ce_orig": 1.042061448097229, + "epoch": 0.8955352649363721, + "kl_loss": 0.18695387244224548, + "loss_ib": 0.00628622155636549, + "step": 3114 + }, + { + "ce_ib": 4.81662130355835, + "ce_orig": 1.1300642490386963, + "epoch": 0.8955352649363721, + "kl_loss": 0.2115844190120697, + "loss_ib": 0.006932465359568596, + "step": 3114 + }, + { + "ce_ib": 4.201371669769287, + "ce_orig": 0.6356173157691956, + "epoch": 0.8955352649363721, + "kl_loss": 0.2386530041694641, + "loss_ib": 0.006587901618331671, + "step": 3114 + }, + { + "epoch": 0.8958228485153498, + "grad_norm": 0.12256649881601334, + "learning_rate": 8.233054388649216e-06, + "loss": 0.8498, + "step": 3115 + }, + { + "ce_ib": 2.4222707748413086, + "ce_orig": 0.6614570617675781, + "epoch": 0.8958228485153498, + "kl_loss": 0.1349015235900879, + "loss_ib": 0.003771285992115736, + "step": 3115 + }, + { + "ce_ib": 2.957444667816162, + "ce_orig": 0.6917654275894165, + "epoch": 0.8958228485153498, + "kl_loss": 0.13120931386947632, + "loss_ib": 0.004269537981599569, + "step": 3115 + }, + { + "ce_ib": 5.412607192993164, + "ce_orig": 1.1988635063171387, + "epoch": 0.8958228485153498, + "kl_loss": 0.22368696331977844, + "loss_ib": 0.007649476174265146, + "step": 3115 + }, + { + "ce_ib": 2.5532491207122803, + "ce_orig": 0.6821431517601013, + "epoch": 0.8958228485153498, + "kl_loss": 0.14186251163482666, + "loss_ib": 0.003971874248236418, + "step": 3115 + }, + { + "ce_ib": 3.198167085647583, + "ce_orig": 0.7227888107299805, + "epoch": 0.8961104320943274, + "kl_loss": 0.12820947170257568, + "loss_ib": 0.004480261821299791, + "step": 3116 + }, + { + "ce_ib": 4.625506401062012, + "ce_orig": 0.9869837164878845, + "epoch": 0.8961104320943274, + "kl_loss": 0.23777008056640625, + "loss_ib": 0.007003207225352526, + "step": 3116 + }, + { + "ce_ib": 4.728062629699707, + "ce_orig": 1.1571589708328247, + "epoch": 0.8961104320943274, + "kl_loss": 0.2300049066543579, + "loss_ib": 0.007028111722320318, + "step": 3116 + }, + { + "ce_ib": 5.685070514678955, + "ce_orig": 1.2384172677993774, + "epoch": 0.8961104320943274, + "kl_loss": 0.13973000645637512, + "loss_ib": 0.007082370575517416, + "step": 3116 + }, + { + "ce_ib": 2.531684398651123, + "ce_orig": 0.6974915862083435, + "epoch": 0.896398015673305, + "kl_loss": 0.16540437936782837, + "loss_ib": 0.004185728263109922, + "step": 3117 + }, + { + "ce_ib": 3.4759488105773926, + "ce_orig": 0.7094293832778931, + "epoch": 0.896398015673305, + "kl_loss": 0.2013729065656662, + "loss_ib": 0.005489677656441927, + "step": 3117 + }, + { + "ce_ib": 1.7376712560653687, + "ce_orig": 0.3581862151622772, + "epoch": 0.896398015673305, + "kl_loss": 0.21363332867622375, + "loss_ib": 0.003874004352837801, + "step": 3117 + }, + { + "ce_ib": 5.835577964782715, + "ce_orig": 1.4885233640670776, + "epoch": 0.896398015673305, + "kl_loss": 0.2026522159576416, + "loss_ib": 0.007862099446356297, + "step": 3117 + }, + { + "ce_ib": 3.5799036026000977, + "ce_orig": 0.7649292945861816, + "epoch": 0.8966855992522828, + "kl_loss": 0.255397230386734, + "loss_ib": 0.00613387580960989, + "step": 3118 + }, + { + "ce_ib": 3.0513083934783936, + "ce_orig": 0.865064799785614, + "epoch": 0.8966855992522828, + "kl_loss": 0.12493079900741577, + "loss_ib": 0.004300616215914488, + "step": 3118 + }, + { + "ce_ib": 3.544097661972046, + "ce_orig": 0.8298912644386292, + "epoch": 0.8966855992522828, + "kl_loss": 0.20499157905578613, + "loss_ib": 0.005594013724476099, + "step": 3118 + }, + { + "ce_ib": 4.663642883300781, + "ce_orig": 1.200555682182312, + "epoch": 0.8966855992522828, + "kl_loss": 0.1810140311717987, + "loss_ib": 0.006473782937973738, + "step": 3118 + }, + { + "ce_ib": 7.226274490356445, + "ce_orig": 1.7892075777053833, + "epoch": 0.8969731828312604, + "kl_loss": 0.23664698004722595, + "loss_ib": 0.009592743590474129, + "step": 3119 + }, + { + "ce_ib": 5.304622173309326, + "ce_orig": 0.830334484577179, + "epoch": 0.8969731828312604, + "kl_loss": 0.16461464762687683, + "loss_ib": 0.006950768176466227, + "step": 3119 + }, + { + "ce_ib": 3.4193334579467773, + "ce_orig": 0.4090982973575592, + "epoch": 0.8969731828312604, + "kl_loss": 0.24512316286563873, + "loss_ib": 0.005870564840734005, + "step": 3119 + }, + { + "ce_ib": 3.584702968597412, + "ce_orig": 1.1868985891342163, + "epoch": 0.8969731828312604, + "kl_loss": 0.12651076912879944, + "loss_ib": 0.004849810618907213, + "step": 3119 + }, + { + "epoch": 0.897260766410238, + "grad_norm": 0.12822048366069794, + "learning_rate": 8.22713036365549e-06, + "loss": 0.9344, + "step": 3120 + }, + { + "ce_ib": 4.425660133361816, + "ce_orig": 1.024692177772522, + "epoch": 0.897260766410238, + "kl_loss": 0.11720477044582367, + "loss_ib": 0.005597707349807024, + "step": 3120 + }, + { + "ce_ib": 3.9638566970825195, + "ce_orig": 0.6974149346351624, + "epoch": 0.897260766410238, + "kl_loss": 0.18265047669410706, + "loss_ib": 0.005790361203253269, + "step": 3120 + }, + { + "ce_ib": 4.227596282958984, + "ce_orig": 1.0537580251693726, + "epoch": 0.897260766410238, + "kl_loss": 0.21190759539604187, + "loss_ib": 0.0063466718420386314, + "step": 3120 + }, + { + "ce_ib": 5.220859050750732, + "ce_orig": 1.344127893447876, + "epoch": 0.897260766410238, + "kl_loss": 0.19623708724975586, + "loss_ib": 0.007183229550719261, + "step": 3120 + }, + { + "ce_ib": 3.239370346069336, + "ce_orig": 0.8545911908149719, + "epoch": 0.8975483499892156, + "kl_loss": 0.25508975982666016, + "loss_ib": 0.005790268070995808, + "step": 3121 + }, + { + "ce_ib": 4.049173831939697, + "ce_orig": 0.755760908126831, + "epoch": 0.8975483499892156, + "kl_loss": 0.13172268867492676, + "loss_ib": 0.005366400349885225, + "step": 3121 + }, + { + "ce_ib": 2.47257399559021, + "ce_orig": 0.5434133410453796, + "epoch": 0.8975483499892156, + "kl_loss": 0.16033831238746643, + "loss_ib": 0.004075956996530294, + "step": 3121 + }, + { + "ce_ib": 3.258507013320923, + "ce_orig": 0.8297489881515503, + "epoch": 0.8975483499892156, + "kl_loss": 0.1460033804178238, + "loss_ib": 0.0047185407020151615, + "step": 3121 + }, + { + "ce_ib": 3.5900731086730957, + "ce_orig": 0.7971727252006531, + "epoch": 0.8978359335681932, + "kl_loss": 0.18613466620445251, + "loss_ib": 0.005451419856399298, + "step": 3122 + }, + { + "ce_ib": 5.520582675933838, + "ce_orig": 0.9833826422691345, + "epoch": 0.8978359335681932, + "kl_loss": 0.21962870657444, + "loss_ib": 0.007716869469732046, + "step": 3122 + }, + { + "ce_ib": 2.6682424545288086, + "ce_orig": 0.6504754424095154, + "epoch": 0.8978359335681932, + "kl_loss": 0.1496294140815735, + "loss_ib": 0.0041645364835858345, + "step": 3122 + }, + { + "ce_ib": 2.269178867340088, + "ce_orig": 0.44291168451309204, + "epoch": 0.8978359335681932, + "kl_loss": 0.1862488090991974, + "loss_ib": 0.004131666850298643, + "step": 3122 + }, + { + "ce_ib": 6.443488597869873, + "ce_orig": 1.612691879272461, + "epoch": 0.8981235171471709, + "kl_loss": 0.22128766775131226, + "loss_ib": 0.008656364865601063, + "step": 3123 + }, + { + "ce_ib": 4.016378879547119, + "ce_orig": 0.855128288269043, + "epoch": 0.8981235171471709, + "kl_loss": 0.17366044223308563, + "loss_ib": 0.005752983037382364, + "step": 3123 + }, + { + "ce_ib": 4.343630313873291, + "ce_orig": 0.7859445810317993, + "epoch": 0.8981235171471709, + "kl_loss": 0.445234090089798, + "loss_ib": 0.008795971050858498, + "step": 3123 + }, + { + "ce_ib": 4.078170299530029, + "ce_orig": 0.5438718795776367, + "epoch": 0.8981235171471709, + "kl_loss": 0.16791702806949615, + "loss_ib": 0.00575734069570899, + "step": 3123 + }, + { + "ce_ib": 5.593932151794434, + "ce_orig": 1.3485321998596191, + "epoch": 0.8984111007261485, + "kl_loss": 0.19490006566047668, + "loss_ib": 0.0075429328717291355, + "step": 3124 + }, + { + "ce_ib": 6.722785472869873, + "ce_orig": 1.7951992750167847, + "epoch": 0.8984111007261485, + "kl_loss": 0.26770544052124023, + "loss_ib": 0.0093998396769166, + "step": 3124 + }, + { + "ce_ib": 0.6715496778488159, + "ce_orig": 0.08217887580394745, + "epoch": 0.8984111007261485, + "kl_loss": 0.37461569905281067, + "loss_ib": 0.004417706746608019, + "step": 3124 + }, + { + "ce_ib": 2.4788076877593994, + "ce_orig": 0.5279305577278137, + "epoch": 0.8984111007261485, + "kl_loss": 0.15033073723316193, + "loss_ib": 0.00398211507126689, + "step": 3124 + }, + { + "epoch": 0.8986986843051262, + "grad_norm": 0.1373157948255539, + "learning_rate": 8.221198563755683e-06, + "loss": 0.8426, + "step": 3125 + }, + { + "ce_ib": 5.870676517486572, + "ce_orig": 1.4601093530654907, + "epoch": 0.8986986843051262, + "kl_loss": 0.14469575881958008, + "loss_ib": 0.007317633833736181, + "step": 3125 + }, + { + "ce_ib": 6.361517906188965, + "ce_orig": 1.4486407041549683, + "epoch": 0.8986986843051262, + "kl_loss": 0.1265343427658081, + "loss_ib": 0.007626861333847046, + "step": 3125 + }, + { + "ce_ib": 6.843608856201172, + "ce_orig": 1.7605913877487183, + "epoch": 0.8986986843051262, + "kl_loss": 0.2316533923149109, + "loss_ib": 0.009160143323242664, + "step": 3125 + }, + { + "ce_ib": 2.6104564666748047, + "ce_orig": 0.7714352011680603, + "epoch": 0.8986986843051262, + "kl_loss": 0.09336307644844055, + "loss_ib": 0.00354408728890121, + "step": 3125 + }, + { + "ce_ib": 5.947535991668701, + "ce_orig": 1.3189960718154907, + "epoch": 0.8989862678841039, + "kl_loss": 0.15965819358825684, + "loss_ib": 0.007544117979705334, + "step": 3126 + }, + { + "ce_ib": 3.2208609580993652, + "ce_orig": 0.530651330947876, + "epoch": 0.8989862678841039, + "kl_loss": 0.13725829124450684, + "loss_ib": 0.004593443591147661, + "step": 3126 + }, + { + "ce_ib": 5.100178241729736, + "ce_orig": 1.2393196821212769, + "epoch": 0.8989862678841039, + "kl_loss": 0.12538397312164307, + "loss_ib": 0.006354017648845911, + "step": 3126 + }, + { + "ce_ib": 4.280590057373047, + "ce_orig": 0.8864709734916687, + "epoch": 0.8989862678841039, + "kl_loss": 0.17300677299499512, + "loss_ib": 0.006010657642036676, + "step": 3126 + }, + { + "ce_ib": 3.68678879737854, + "ce_orig": 0.6577613353729248, + "epoch": 0.8992738514630815, + "kl_loss": 0.16078492999076843, + "loss_ib": 0.005294638220220804, + "step": 3127 + }, + { + "ce_ib": 3.7182564735412598, + "ce_orig": 0.5978642702102661, + "epoch": 0.8992738514630815, + "kl_loss": 0.22328969836235046, + "loss_ib": 0.005951153580099344, + "step": 3127 + }, + { + "ce_ib": 2.2302346229553223, + "ce_orig": 0.7189071178436279, + "epoch": 0.8992738514630815, + "kl_loss": 0.10048036277294159, + "loss_ib": 0.003235038137063384, + "step": 3127 + }, + { + "ce_ib": 3.9530744552612305, + "ce_orig": 0.8222480416297913, + "epoch": 0.8992738514630815, + "kl_loss": 0.14437635242938995, + "loss_ib": 0.005396838299930096, + "step": 3127 + }, + { + "ce_ib": 4.0737996101379395, + "ce_orig": 1.1365528106689453, + "epoch": 0.8995614350420591, + "kl_loss": 0.17542600631713867, + "loss_ib": 0.005828059744089842, + "step": 3128 + }, + { + "ce_ib": 2.3081774711608887, + "ce_orig": 0.6253951787948608, + "epoch": 0.8995614350420591, + "kl_loss": 0.14313453435897827, + "loss_ib": 0.003739522537216544, + "step": 3128 + }, + { + "ce_ib": 3.0021369457244873, + "ce_orig": 0.4933500289916992, + "epoch": 0.8995614350420591, + "kl_loss": 0.2465226650238037, + "loss_ib": 0.005467363633215427, + "step": 3128 + }, + { + "ce_ib": 2.873345136642456, + "ce_orig": 0.6222209930419922, + "epoch": 0.8995614350420591, + "kl_loss": 0.11812890321016312, + "loss_ib": 0.004054634366184473, + "step": 3128 + }, + { + "ce_ib": 2.196197032928467, + "ce_orig": 0.6074555516242981, + "epoch": 0.8998490186210367, + "kl_loss": 0.19152693450450897, + "loss_ib": 0.004111466463655233, + "step": 3129 + }, + { + "ce_ib": 1.4556373357772827, + "ce_orig": 0.2552449703216553, + "epoch": 0.8998490186210367, + "kl_loss": 0.3423904776573181, + "loss_ib": 0.0048795416951179504, + "step": 3129 + }, + { + "ce_ib": 2.8635828495025635, + "ce_orig": 0.7850995063781738, + "epoch": 0.8998490186210367, + "kl_loss": 0.13298878073692322, + "loss_ib": 0.004193470813333988, + "step": 3129 + }, + { + "ce_ib": 4.386826992034912, + "ce_orig": 0.8934640884399414, + "epoch": 0.8998490186210367, + "kl_loss": 0.16001014411449432, + "loss_ib": 0.005986928008496761, + "step": 3129 + }, + { + "epoch": 0.9001366022000143, + "grad_norm": 0.16122177243232727, + "learning_rate": 8.21525900324088e-06, + "loss": 0.9082, + "step": 3130 + }, + { + "ce_ib": 6.461752891540527, + "ce_orig": 1.4621835947036743, + "epoch": 0.9001366022000143, + "kl_loss": 0.17072081565856934, + "loss_ib": 0.00816896092146635, + "step": 3130 + }, + { + "ce_ib": 3.331132411956787, + "ce_orig": 0.6427520513534546, + "epoch": 0.9001366022000143, + "kl_loss": 0.1445806622505188, + "loss_ib": 0.004776938818395138, + "step": 3130 + }, + { + "ce_ib": 4.278102874755859, + "ce_orig": 0.9806033372879028, + "epoch": 0.9001366022000143, + "kl_loss": 0.13012027740478516, + "loss_ib": 0.005579305812716484, + "step": 3130 + }, + { + "ce_ib": 4.063268661499023, + "ce_orig": 0.7401987314224243, + "epoch": 0.9001366022000143, + "kl_loss": 0.21880973875522614, + "loss_ib": 0.006251365877687931, + "step": 3130 + }, + { + "ce_ib": 3.910611152648926, + "ce_orig": 0.8353100419044495, + "epoch": 0.900424185778992, + "kl_loss": 0.14991123974323273, + "loss_ib": 0.005409723613411188, + "step": 3131 + }, + { + "ce_ib": 2.747136116027832, + "ce_orig": 0.7366280555725098, + "epoch": 0.900424185778992, + "kl_loss": 0.1073562502861023, + "loss_ib": 0.0038206984754651785, + "step": 3131 + }, + { + "ce_ib": 2.510486364364624, + "ce_orig": 0.8741284608840942, + "epoch": 0.900424185778992, + "kl_loss": 0.15865197777748108, + "loss_ib": 0.004097006283700466, + "step": 3131 + }, + { + "ce_ib": 2.9812331199645996, + "ce_orig": 0.5116045475006104, + "epoch": 0.900424185778992, + "kl_loss": 0.28621774911880493, + "loss_ib": 0.0058434102684259415, + "step": 3131 + }, + { + "ce_ib": 2.9572653770446777, + "ce_orig": 0.6594868302345276, + "epoch": 0.9007117693579697, + "kl_loss": 0.1423066407442093, + "loss_ib": 0.004380331374704838, + "step": 3132 + }, + { + "ce_ib": 6.421196937561035, + "ce_orig": 1.3668510913848877, + "epoch": 0.9007117693579697, + "kl_loss": 0.20493853092193604, + "loss_ib": 0.008470581844449043, + "step": 3132 + }, + { + "ce_ib": 3.4352309703826904, + "ce_orig": 0.3711633086204529, + "epoch": 0.9007117693579697, + "kl_loss": 0.13755705952644348, + "loss_ib": 0.004810801707208157, + "step": 3132 + }, + { + "ce_ib": 3.3859071731567383, + "ce_orig": 0.558665931224823, + "epoch": 0.9007117693579697, + "kl_loss": 0.1518528014421463, + "loss_ib": 0.0049044350162148476, + "step": 3132 + }, + { + "ce_ib": 3.522315263748169, + "ce_orig": 0.7901033163070679, + "epoch": 0.9009993529369473, + "kl_loss": 0.18090713024139404, + "loss_ib": 0.005331386346369982, + "step": 3133 + }, + { + "ce_ib": 6.215169429779053, + "ce_orig": 1.6276618242263794, + "epoch": 0.9009993529369473, + "kl_loss": 0.14657816290855408, + "loss_ib": 0.007680951151996851, + "step": 3133 + }, + { + "ce_ib": 4.997008800506592, + "ce_orig": 0.9101089239120483, + "epoch": 0.9009993529369473, + "kl_loss": 0.1337818205356598, + "loss_ib": 0.0063348268158733845, + "step": 3133 + }, + { + "ce_ib": 5.015913963317871, + "ce_orig": 1.2925304174423218, + "epoch": 0.9009993529369473, + "kl_loss": 0.16569727659225464, + "loss_ib": 0.006672886665910482, + "step": 3133 + }, + { + "ce_ib": 7.201035976409912, + "ce_orig": 1.5103636980056763, + "epoch": 0.901286936515925, + "kl_loss": 0.2222784459590912, + "loss_ib": 0.009423820301890373, + "step": 3134 + }, + { + "ce_ib": 4.245541572570801, + "ce_orig": 1.1381776332855225, + "epoch": 0.901286936515925, + "kl_loss": 0.16268283128738403, + "loss_ib": 0.00587236974388361, + "step": 3134 + }, + { + "ce_ib": 3.4584100246429443, + "ce_orig": 0.535250723361969, + "epoch": 0.901286936515925, + "kl_loss": 0.12079572677612305, + "loss_ib": 0.004666367545723915, + "step": 3134 + }, + { + "ce_ib": 3.0716052055358887, + "ce_orig": 0.5027214288711548, + "epoch": 0.901286936515925, + "kl_loss": 0.14468061923980713, + "loss_ib": 0.004518411587923765, + "step": 3134 + }, + { + "epoch": 0.9015745200949026, + "grad_norm": 0.15585008263587952, + "learning_rate": 8.209311696420861e-06, + "loss": 0.8091, + "step": 3135 + }, + { + "ce_ib": 3.6180272102355957, + "ce_orig": 0.7989323735237122, + "epoch": 0.9015745200949026, + "kl_loss": 0.1803344190120697, + "loss_ib": 0.005421371199190617, + "step": 3135 + }, + { + "ce_ib": 4.788664817810059, + "ce_orig": 0.7015939950942993, + "epoch": 0.9015745200949026, + "kl_loss": 0.2519822120666504, + "loss_ib": 0.007308486383408308, + "step": 3135 + }, + { + "ce_ib": 2.8992063999176025, + "ce_orig": 0.7547167539596558, + "epoch": 0.9015745200949026, + "kl_loss": 0.13063058257102966, + "loss_ib": 0.004205512348562479, + "step": 3135 + }, + { + "ce_ib": 2.6017701625823975, + "ce_orig": 0.7255498170852661, + "epoch": 0.9015745200949026, + "kl_loss": 0.11173942685127258, + "loss_ib": 0.003719164291396737, + "step": 3135 + }, + { + "ce_ib": 3.5708677768707275, + "ce_orig": 0.7267693877220154, + "epoch": 0.9018621036738802, + "kl_loss": 0.1588895320892334, + "loss_ib": 0.005159763153642416, + "step": 3136 + }, + { + "ce_ib": 2.882054090499878, + "ce_orig": 0.7681783437728882, + "epoch": 0.9018621036738802, + "kl_loss": 0.2242565006017685, + "loss_ib": 0.005124618764966726, + "step": 3136 + }, + { + "ce_ib": 6.08839750289917, + "ce_orig": 0.9009363055229187, + "epoch": 0.9018621036738802, + "kl_loss": 0.2131895124912262, + "loss_ib": 0.008220291696488857, + "step": 3136 + }, + { + "ce_ib": 5.110060214996338, + "ce_orig": 1.2241339683532715, + "epoch": 0.9018621036738802, + "kl_loss": 0.20409898459911346, + "loss_ib": 0.007151050493121147, + "step": 3136 + }, + { + "ce_ib": 3.287994384765625, + "ce_orig": 0.6549267172813416, + "epoch": 0.9021496872528578, + "kl_loss": 0.09980523586273193, + "loss_ib": 0.004286046605557203, + "step": 3137 + }, + { + "ce_ib": 3.020491361618042, + "ce_orig": 0.7144865989685059, + "epoch": 0.9021496872528578, + "kl_loss": 0.21379825472831726, + "loss_ib": 0.005158473737537861, + "step": 3137 + }, + { + "ce_ib": 2.4912428855895996, + "ce_orig": 0.6419815421104431, + "epoch": 0.9021496872528578, + "kl_loss": 0.1501091569662094, + "loss_ib": 0.003992334473878145, + "step": 3137 + }, + { + "ce_ib": 3.414757251739502, + "ce_orig": 0.7015599012374878, + "epoch": 0.9021496872528578, + "kl_loss": 0.15435834228992462, + "loss_ib": 0.004958340432494879, + "step": 3137 + }, + { + "ce_ib": 3.1175625324249268, + "ce_orig": 0.8859734535217285, + "epoch": 0.9024372708318354, + "kl_loss": 0.10315009951591492, + "loss_ib": 0.004149063490331173, + "step": 3138 + }, + { + "ce_ib": 6.299962043762207, + "ce_orig": 1.725060224533081, + "epoch": 0.9024372708318354, + "kl_loss": 0.16075916588306427, + "loss_ib": 0.00790755357593298, + "step": 3138 + }, + { + "ce_ib": 2.3819708824157715, + "ce_orig": 0.7722316384315491, + "epoch": 0.9024372708318354, + "kl_loss": 0.14825040102005005, + "loss_ib": 0.0038644748274236917, + "step": 3138 + }, + { + "ce_ib": 4.26149845123291, + "ce_orig": 1.0691019296646118, + "epoch": 0.9024372708318354, + "kl_loss": 0.21439480781555176, + "loss_ib": 0.006405446212738752, + "step": 3138 + }, + { + "ce_ib": 6.434026718139648, + "ce_orig": 1.1546363830566406, + "epoch": 0.9027248544108132, + "kl_loss": 0.19890135526657104, + "loss_ib": 0.008423040620982647, + "step": 3139 + }, + { + "ce_ib": 5.0130839347839355, + "ce_orig": 0.8309715390205383, + "epoch": 0.9027248544108132, + "kl_loss": 0.21914982795715332, + "loss_ib": 0.00720458198338747, + "step": 3139 + }, + { + "ce_ib": 1.9235478639602661, + "ce_orig": 0.5479757189750671, + "epoch": 0.9027248544108132, + "kl_loss": 0.14501991868019104, + "loss_ib": 0.0033737467601895332, + "step": 3139 + }, + { + "ce_ib": 2.527085304260254, + "ce_orig": 0.7406293749809265, + "epoch": 0.9027248544108132, + "kl_loss": 0.11556129157543182, + "loss_ib": 0.0036826978903263807, + "step": 3139 + }, + { + "epoch": 0.9030124379897908, + "grad_norm": 0.13026507198810577, + "learning_rate": 8.20335665762407e-06, + "loss": 0.8075, + "step": 3140 + }, + { + "ce_ib": 2.182521343231201, + "ce_orig": 0.619674801826477, + "epoch": 0.9030124379897908, + "kl_loss": 0.10575208812952042, + "loss_ib": 0.0032400423660874367, + "step": 3140 + }, + { + "ce_ib": 3.2674295902252197, + "ce_orig": 0.6181186437606812, + "epoch": 0.9030124379897908, + "kl_loss": 0.1084195226430893, + "loss_ib": 0.004351624753326178, + "step": 3140 + }, + { + "ce_ib": 3.4926249980926514, + "ce_orig": 0.5833140015602112, + "epoch": 0.9030124379897908, + "kl_loss": 0.11931778490543365, + "loss_ib": 0.004685802850872278, + "step": 3140 + }, + { + "ce_ib": 4.123537063598633, + "ce_orig": 0.8439756035804749, + "epoch": 0.9030124379897908, + "kl_loss": 0.1697002500295639, + "loss_ib": 0.005820539314299822, + "step": 3140 + }, + { + "ce_ib": 3.9694008827209473, + "ce_orig": 0.9983794689178467, + "epoch": 0.9033000215687684, + "kl_loss": 0.25483018159866333, + "loss_ib": 0.0065177022479474545, + "step": 3141 + }, + { + "ce_ib": 5.173247337341309, + "ce_orig": 0.9060007929801941, + "epoch": 0.9033000215687684, + "kl_loss": 0.18555516004562378, + "loss_ib": 0.0070287990383803844, + "step": 3141 + }, + { + "ce_ib": 5.181809425354004, + "ce_orig": 0.9569258689880371, + "epoch": 0.9033000215687684, + "kl_loss": 0.1443452686071396, + "loss_ib": 0.006625262554734945, + "step": 3141 + }, + { + "ce_ib": 2.905935287475586, + "ce_orig": 0.6407507658004761, + "epoch": 0.9033000215687684, + "kl_loss": 0.1944827437400818, + "loss_ib": 0.0048507628962397575, + "step": 3141 + }, + { + "ce_ib": 3.4152159690856934, + "ce_orig": 1.0485882759094238, + "epoch": 0.903587605147746, + "kl_loss": 0.15299151837825775, + "loss_ib": 0.00494513101875782, + "step": 3142 + }, + { + "ce_ib": 4.512202739715576, + "ce_orig": 1.092852234840393, + "epoch": 0.903587605147746, + "kl_loss": 0.14843882620334625, + "loss_ib": 0.005996590945869684, + "step": 3142 + }, + { + "ce_ib": 4.038938999176025, + "ce_orig": 0.7413948178291321, + "epoch": 0.903587605147746, + "kl_loss": 0.3522419333457947, + "loss_ib": 0.007561358157545328, + "step": 3142 + }, + { + "ce_ib": 3.577648639678955, + "ce_orig": 0.5872368216514587, + "epoch": 0.903587605147746, + "kl_loss": 0.18453940749168396, + "loss_ib": 0.005423042457550764, + "step": 3142 + }, + { + "ce_ib": 3.3038127422332764, + "ce_orig": 0.805388331413269, + "epoch": 0.9038751887267237, + "kl_loss": 0.21114183962345123, + "loss_ib": 0.005415230989456177, + "step": 3143 + }, + { + "ce_ib": 5.156336784362793, + "ce_orig": 1.2434169054031372, + "epoch": 0.9038751887267237, + "kl_loss": 0.2379721701145172, + "loss_ib": 0.007536058314144611, + "step": 3143 + }, + { + "ce_ib": 4.806614875793457, + "ce_orig": 1.1467207670211792, + "epoch": 0.9038751887267237, + "kl_loss": 0.142640620470047, + "loss_ib": 0.006233020685613155, + "step": 3143 + }, + { + "ce_ib": 2.184598922729492, + "ce_orig": 0.6641231179237366, + "epoch": 0.9038751887267237, + "kl_loss": 0.11880283057689667, + "loss_ib": 0.0033726270776242018, + "step": 3143 + }, + { + "ce_ib": 2.596417188644409, + "ce_orig": 0.5836762189865112, + "epoch": 0.9041627723057013, + "kl_loss": 0.11576492339372635, + "loss_ib": 0.0037540665362030268, + "step": 3144 + }, + { + "ce_ib": 3.1168344020843506, + "ce_orig": 0.8158707618713379, + "epoch": 0.9041627723057013, + "kl_loss": 0.18092788755893707, + "loss_ib": 0.004926113411784172, + "step": 3144 + }, + { + "ce_ib": 5.888381481170654, + "ce_orig": 1.4159479141235352, + "epoch": 0.9041627723057013, + "kl_loss": 0.2487201690673828, + "loss_ib": 0.008375583216547966, + "step": 3144 + }, + { + "ce_ib": 2.6120378971099854, + "ce_orig": 0.4266003668308258, + "epoch": 0.9041627723057013, + "kl_loss": 0.19628018140792847, + "loss_ib": 0.004574839491397142, + "step": 3144 + }, + { + "epoch": 0.904450355884679, + "grad_norm": 0.19082939624786377, + "learning_rate": 8.197393901197572e-06, + "loss": 0.9053, + "step": 3145 + }, + { + "ce_ib": 5.482536315917969, + "ce_orig": 0.8391470313072205, + "epoch": 0.904450355884679, + "kl_loss": 0.19511136412620544, + "loss_ib": 0.007433650549501181, + "step": 3145 + }, + { + "ce_ib": 4.079298973083496, + "ce_orig": 0.7190772294998169, + "epoch": 0.904450355884679, + "kl_loss": 0.14527688920497894, + "loss_ib": 0.005532067734748125, + "step": 3145 + }, + { + "ce_ib": 2.1403257846832275, + "ce_orig": 0.574280321598053, + "epoch": 0.904450355884679, + "kl_loss": 0.1029767096042633, + "loss_ib": 0.0031700925901532173, + "step": 3145 + }, + { + "ce_ib": 4.62451171875, + "ce_orig": 0.9222412109375, + "epoch": 0.904450355884679, + "kl_loss": 0.26902201771736145, + "loss_ib": 0.007314731832593679, + "step": 3145 + }, + { + "ce_ib": 5.182371616363525, + "ce_orig": 1.1551182270050049, + "epoch": 0.9047379394636567, + "kl_loss": 0.14750391244888306, + "loss_ib": 0.006657410878688097, + "step": 3146 + }, + { + "ce_ib": 3.8011279106140137, + "ce_orig": 0.6802340149879456, + "epoch": 0.9047379394636567, + "kl_loss": 0.1394069939851761, + "loss_ib": 0.005195198114961386, + "step": 3146 + }, + { + "ce_ib": 2.5462276935577393, + "ce_orig": 0.5792632102966309, + "epoch": 0.9047379394636567, + "kl_loss": 0.17695537209510803, + "loss_ib": 0.004315781407058239, + "step": 3146 + }, + { + "ce_ib": 4.1171956062316895, + "ce_orig": 0.7384029030799866, + "epoch": 0.9047379394636567, + "kl_loss": 0.1452680081129074, + "loss_ib": 0.005569875705987215, + "step": 3146 + }, + { + "ce_ib": 4.253487586975098, + "ce_orig": 0.4247531294822693, + "epoch": 0.9050255230426343, + "kl_loss": 0.19668516516685486, + "loss_ib": 0.006220339331775904, + "step": 3147 + }, + { + "ce_ib": 4.796370983123779, + "ce_orig": 0.9708923697471619, + "epoch": 0.9050255230426343, + "kl_loss": 0.2833160161972046, + "loss_ib": 0.0076295314356684685, + "step": 3147 + }, + { + "ce_ib": 4.514124870300293, + "ce_orig": 0.9035927057266235, + "epoch": 0.9050255230426343, + "kl_loss": 0.1645984649658203, + "loss_ib": 0.006160109303891659, + "step": 3147 + }, + { + "ce_ib": 4.55026388168335, + "ce_orig": 1.103672742843628, + "epoch": 0.9050255230426343, + "kl_loss": 0.16370058059692383, + "loss_ib": 0.00618726946413517, + "step": 3147 + }, + { + "ce_ib": 7.191635608673096, + "ce_orig": 1.5854177474975586, + "epoch": 0.9053131066216119, + "kl_loss": 0.20797839760780334, + "loss_ib": 0.009271419607102871, + "step": 3148 + }, + { + "ce_ib": 3.7654340267181396, + "ce_orig": 0.8144212365150452, + "epoch": 0.9053131066216119, + "kl_loss": 0.16580399870872498, + "loss_ib": 0.005423474125564098, + "step": 3148 + }, + { + "ce_ib": 4.680147171020508, + "ce_orig": 1.1408040523529053, + "epoch": 0.9053131066216119, + "kl_loss": 0.20122237503528595, + "loss_ib": 0.0066923703998327255, + "step": 3148 + }, + { + "ce_ib": 5.703852653503418, + "ce_orig": 1.405648946762085, + "epoch": 0.9053131066216119, + "kl_loss": 0.15912410616874695, + "loss_ib": 0.00729509349912405, + "step": 3148 + }, + { + "ce_ib": 3.3950207233428955, + "ce_orig": 0.6455244421958923, + "epoch": 0.9056006902005895, + "kl_loss": 0.19929294288158417, + "loss_ib": 0.0053879497572779655, + "step": 3149 + }, + { + "ce_ib": 4.558180809020996, + "ce_orig": 1.0325016975402832, + "epoch": 0.9056006902005895, + "kl_loss": 0.1976812332868576, + "loss_ib": 0.006534992717206478, + "step": 3149 + }, + { + "ce_ib": 5.473658084869385, + "ce_orig": 0.8959184885025024, + "epoch": 0.9056006902005895, + "kl_loss": 0.11798407137393951, + "loss_ib": 0.006653498858213425, + "step": 3149 + }, + { + "ce_ib": 4.317521095275879, + "ce_orig": 0.7732563018798828, + "epoch": 0.9056006902005895, + "kl_loss": 0.2259720265865326, + "loss_ib": 0.006577241234481335, + "step": 3149 + }, + { + "epoch": 0.9058882737795672, + "grad_norm": 0.14759676158428192, + "learning_rate": 8.191423441507039e-06, + "loss": 0.8922, + "step": 3150 + }, + { + "ce_ib": 2.267072916030884, + "ce_orig": 0.4989285469055176, + "epoch": 0.9058882737795672, + "kl_loss": 0.21835269033908844, + "loss_ib": 0.004450599662959576, + "step": 3150 + }, + { + "ce_ib": 1.458601713180542, + "ce_orig": 0.2576638460159302, + "epoch": 0.9058882737795672, + "kl_loss": 0.3316953480243683, + "loss_ib": 0.004775554873049259, + "step": 3150 + }, + { + "ce_ib": 5.768575191497803, + "ce_orig": 1.3517627716064453, + "epoch": 0.9058882737795672, + "kl_loss": 0.23937451839447021, + "loss_ib": 0.008162319660186768, + "step": 3150 + }, + { + "ce_ib": 3.128404140472412, + "ce_orig": 0.9595581889152527, + "epoch": 0.9058882737795672, + "kl_loss": 0.1273995190858841, + "loss_ib": 0.004402399528771639, + "step": 3150 + }, + { + "ce_ib": 3.0929129123687744, + "ce_orig": 0.6742674112319946, + "epoch": 0.9061758573585448, + "kl_loss": 0.175888329744339, + "loss_ib": 0.004851796198636293, + "step": 3151 + }, + { + "ce_ib": 4.32734489440918, + "ce_orig": 1.003836750984192, + "epoch": 0.9061758573585448, + "kl_loss": 0.22782963514328003, + "loss_ib": 0.006605640985071659, + "step": 3151 + }, + { + "ce_ib": 7.49906063079834, + "ce_orig": 1.7662650346755981, + "epoch": 0.9061758573585448, + "kl_loss": 0.27928030490875244, + "loss_ib": 0.010291863232851028, + "step": 3151 + }, + { + "ce_ib": 2.3563361167907715, + "ce_orig": 0.48153653740882874, + "epoch": 0.9061758573585448, + "kl_loss": 0.18630170822143555, + "loss_ib": 0.004219353199005127, + "step": 3151 + }, + { + "ce_ib": 2.7045867443084717, + "ce_orig": 0.5526795983314514, + "epoch": 0.9064634409375225, + "kl_loss": 0.16826802492141724, + "loss_ib": 0.004387266933917999, + "step": 3152 + }, + { + "ce_ib": 2.542480945587158, + "ce_orig": 0.5888773202896118, + "epoch": 0.9064634409375225, + "kl_loss": 0.2063363641500473, + "loss_ib": 0.004605844616889954, + "step": 3152 + }, + { + "ce_ib": 2.7370095252990723, + "ce_orig": 0.6573033928871155, + "epoch": 0.9064634409375225, + "kl_loss": 0.15892894566059113, + "loss_ib": 0.004326298832893372, + "step": 3152 + }, + { + "ce_ib": 3.0800182819366455, + "ce_orig": 0.7294167876243591, + "epoch": 0.9064634409375225, + "kl_loss": 0.12211046367883682, + "loss_ib": 0.004301122855395079, + "step": 3152 + }, + { + "ce_ib": 4.609310626983643, + "ce_orig": 0.5992477536201477, + "epoch": 0.9067510245165001, + "kl_loss": 0.12588074803352356, + "loss_ib": 0.005868117790669203, + "step": 3153 + }, + { + "ce_ib": 2.4295425415039062, + "ce_orig": 0.6420907974243164, + "epoch": 0.9067510245165001, + "kl_loss": 0.13598495721817017, + "loss_ib": 0.003789392067119479, + "step": 3153 + }, + { + "ce_ib": 3.6695125102996826, + "ce_orig": 1.0565630197525024, + "epoch": 0.9067510245165001, + "kl_loss": 0.31818661093711853, + "loss_ib": 0.006851378362625837, + "step": 3153 + }, + { + "ce_ib": 3.783386707305908, + "ce_orig": 0.8218306303024292, + "epoch": 0.9067510245165001, + "kl_loss": 0.19258923828601837, + "loss_ib": 0.005709278862923384, + "step": 3153 + }, + { + "ce_ib": 5.192842483520508, + "ce_orig": 1.1497149467468262, + "epoch": 0.9070386080954778, + "kl_loss": 0.1881955862045288, + "loss_ib": 0.0070747979916632175, + "step": 3154 + }, + { + "ce_ib": 2.1812407970428467, + "ce_orig": 0.540349006652832, + "epoch": 0.9070386080954778, + "kl_loss": 0.1365548074245453, + "loss_ib": 0.003546788590028882, + "step": 3154 + }, + { + "ce_ib": 2.990295171737671, + "ce_orig": 0.5244907140731812, + "epoch": 0.9070386080954778, + "kl_loss": 0.2032591700553894, + "loss_ib": 0.005022886674851179, + "step": 3154 + }, + { + "ce_ib": 1.6752750873565674, + "ce_orig": 0.5153340697288513, + "epoch": 0.9070386080954778, + "kl_loss": 0.15428675711154938, + "loss_ib": 0.0032181425485759974, + "step": 3154 + }, + { + "epoch": 0.9073261916744554, + "grad_norm": 0.12396439909934998, + "learning_rate": 8.185445292936686e-06, + "loss": 0.8194, + "step": 3155 + }, + { + "ce_ib": 5.420727252960205, + "ce_orig": 1.126619577407837, + "epoch": 0.9073261916744554, + "kl_loss": 0.15194837749004364, + "loss_ib": 0.0069402107037603855, + "step": 3155 + }, + { + "ce_ib": 6.425928592681885, + "ce_orig": 1.5001987218856812, + "epoch": 0.9073261916744554, + "kl_loss": 0.2424648404121399, + "loss_ib": 0.008850576356053352, + "step": 3155 + }, + { + "ce_ib": 4.3881611824035645, + "ce_orig": 0.576694130897522, + "epoch": 0.9073261916744554, + "kl_loss": 0.31829431653022766, + "loss_ib": 0.007571104448288679, + "step": 3155 + }, + { + "ce_ib": 3.378469467163086, + "ce_orig": 0.9681189656257629, + "epoch": 0.9073261916744554, + "kl_loss": 0.10609043389558792, + "loss_ib": 0.004439373966306448, + "step": 3155 + }, + { + "ce_ib": 1.7403013706207275, + "ce_orig": 0.46497583389282227, + "epoch": 0.907613775253433, + "kl_loss": 0.07767312973737717, + "loss_ib": 0.0025170326698571444, + "step": 3156 + }, + { + "ce_ib": 4.242976665496826, + "ce_orig": 0.7117444276809692, + "epoch": 0.907613775253433, + "kl_loss": 0.21178698539733887, + "loss_ib": 0.006360846571624279, + "step": 3156 + }, + { + "ce_ib": 3.462385654449463, + "ce_orig": 0.7830418348312378, + "epoch": 0.907613775253433, + "kl_loss": 0.2162003368139267, + "loss_ib": 0.005624388810247183, + "step": 3156 + }, + { + "ce_ib": 1.9453904628753662, + "ce_orig": 0.5198253989219666, + "epoch": 0.907613775253433, + "kl_loss": 0.21352772414684296, + "loss_ib": 0.0040806676261126995, + "step": 3156 + }, + { + "ce_ib": 4.694629192352295, + "ce_orig": 0.5303271412849426, + "epoch": 0.9079013588324106, + "kl_loss": 0.19409868121147156, + "loss_ib": 0.006635615602135658, + "step": 3157 + }, + { + "ce_ib": 3.754945755004883, + "ce_orig": 0.587378740310669, + "epoch": 0.9079013588324106, + "kl_loss": 0.23519514501094818, + "loss_ib": 0.0061068967916071415, + "step": 3157 + }, + { + "ce_ib": 3.019984722137451, + "ce_orig": 0.71587735414505, + "epoch": 0.9079013588324106, + "kl_loss": 0.23508435487747192, + "loss_ib": 0.005370827857404947, + "step": 3157 + }, + { + "ce_ib": 2.4929721355438232, + "ce_orig": 0.8357707858085632, + "epoch": 0.9079013588324106, + "kl_loss": 0.10604149848222733, + "loss_ib": 0.0035533872433006763, + "step": 3157 + }, + { + "ce_ib": 2.6229615211486816, + "ce_orig": 0.6393980383872986, + "epoch": 0.9081889424113883, + "kl_loss": 0.14325347542762756, + "loss_ib": 0.00405549630522728, + "step": 3158 + }, + { + "ce_ib": 4.964770317077637, + "ce_orig": 0.9423620700836182, + "epoch": 0.9081889424113883, + "kl_loss": 0.16697688400745392, + "loss_ib": 0.006634538993239403, + "step": 3158 + }, + { + "ce_ib": 4.14258337020874, + "ce_orig": 0.9055909514427185, + "epoch": 0.9081889424113883, + "kl_loss": 0.17360851168632507, + "loss_ib": 0.005878668278455734, + "step": 3158 + }, + { + "ce_ib": 2.482825994491577, + "ce_orig": 0.7886984348297119, + "epoch": 0.9081889424113883, + "kl_loss": 0.1454358845949173, + "loss_ib": 0.003937184810638428, + "step": 3158 + }, + { + "ce_ib": 3.0049021244049072, + "ce_orig": 0.8655731678009033, + "epoch": 0.908476525990366, + "kl_loss": 0.13500478863716125, + "loss_ib": 0.004354950040578842, + "step": 3159 + }, + { + "ce_ib": 4.4782609939575195, + "ce_orig": 1.0007271766662598, + "epoch": 0.908476525990366, + "kl_loss": 0.21190454065799713, + "loss_ib": 0.006597306579351425, + "step": 3159 + }, + { + "ce_ib": 4.535149097442627, + "ce_orig": 0.9623067378997803, + "epoch": 0.908476525990366, + "kl_loss": 0.22466082870960236, + "loss_ib": 0.006781757343560457, + "step": 3159 + }, + { + "ce_ib": 2.4248218536376953, + "ce_orig": 0.6143787503242493, + "epoch": 0.908476525990366, + "kl_loss": 0.12712045013904572, + "loss_ib": 0.003696026047691703, + "step": 3159 + }, + { + "epoch": 0.9087641095693436, + "grad_norm": 0.12650971114635468, + "learning_rate": 8.179459469889269e-06, + "loss": 0.8947, + "step": 3160 + }, + { + "ce_ib": 6.1988115310668945, + "ce_orig": 1.387940764427185, + "epoch": 0.9087641095693436, + "kl_loss": 0.1493763029575348, + "loss_ib": 0.007692574057728052, + "step": 3160 + }, + { + "ce_ib": 5.522292613983154, + "ce_orig": 1.1705983877182007, + "epoch": 0.9087641095693436, + "kl_loss": 0.16832265257835388, + "loss_ib": 0.007205519359558821, + "step": 3160 + }, + { + "ce_ib": 5.2100443840026855, + "ce_orig": 1.2484530210494995, + "epoch": 0.9087641095693436, + "kl_loss": 0.13270381093025208, + "loss_ib": 0.006537082139402628, + "step": 3160 + }, + { + "ce_ib": 3.0796713829040527, + "ce_orig": 0.7845738530158997, + "epoch": 0.9087641095693436, + "kl_loss": 0.1940571367740631, + "loss_ib": 0.005020242650061846, + "step": 3160 + }, + { + "ce_ib": 2.0068061351776123, + "ce_orig": 0.48324480652809143, + "epoch": 0.9090516931483212, + "kl_loss": 0.1596553921699524, + "loss_ib": 0.00360335991717875, + "step": 3161 + }, + { + "ce_ib": 4.258508205413818, + "ce_orig": 1.0838215351104736, + "epoch": 0.9090516931483212, + "kl_loss": 0.1960124969482422, + "loss_ib": 0.0062186336144804955, + "step": 3161 + }, + { + "ce_ib": 3.623138666152954, + "ce_orig": 0.583733081817627, + "epoch": 0.9090516931483212, + "kl_loss": 0.16974380612373352, + "loss_ib": 0.00532057648524642, + "step": 3161 + }, + { + "ce_ib": 2.236454486846924, + "ce_orig": 0.6504534482955933, + "epoch": 0.9090516931483212, + "kl_loss": 0.17454814910888672, + "loss_ib": 0.003981935791671276, + "step": 3161 + }, + { + "ce_ib": 6.242707252502441, + "ce_orig": 1.5775880813598633, + "epoch": 0.9093392767272989, + "kl_loss": 0.1340440958738327, + "loss_ib": 0.0075831483118236065, + "step": 3162 + }, + { + "ce_ib": 5.701559543609619, + "ce_orig": 0.8264329433441162, + "epoch": 0.9093392767272989, + "kl_loss": 0.43986132740974426, + "loss_ib": 0.010100172832608223, + "step": 3162 + }, + { + "ce_ib": 2.8626773357391357, + "ce_orig": 0.8954004645347595, + "epoch": 0.9093392767272989, + "kl_loss": 0.08875995129346848, + "loss_ib": 0.0037502767518162727, + "step": 3162 + }, + { + "ce_ib": 4.380766868591309, + "ce_orig": 0.9182659983634949, + "epoch": 0.9093392767272989, + "kl_loss": 0.2186027467250824, + "loss_ib": 0.006566794589161873, + "step": 3162 + }, + { + "ce_ib": 4.671289920806885, + "ce_orig": 1.093033790588379, + "epoch": 0.9096268603062765, + "kl_loss": 0.1612318754196167, + "loss_ib": 0.006283608730882406, + "step": 3163 + }, + { + "ce_ib": 3.2746760845184326, + "ce_orig": 0.7071114778518677, + "epoch": 0.9096268603062765, + "kl_loss": 0.16340234875679016, + "loss_ib": 0.004908699542284012, + "step": 3163 + }, + { + "ce_ib": 3.3516316413879395, + "ce_orig": 0.8320000171661377, + "epoch": 0.9096268603062765, + "kl_loss": 0.3149527311325073, + "loss_ib": 0.006501158699393272, + "step": 3163 + }, + { + "ce_ib": 5.466616630554199, + "ce_orig": 1.4099498987197876, + "epoch": 0.9096268603062765, + "kl_loss": 0.1850469559431076, + "loss_ib": 0.007317086216062307, + "step": 3163 + }, + { + "ce_ib": 4.366192817687988, + "ce_orig": 0.9573418498039246, + "epoch": 0.9099144438852541, + "kl_loss": 0.17724797129631042, + "loss_ib": 0.006138672586530447, + "step": 3164 + }, + { + "ce_ib": 2.474940538406372, + "ce_orig": 0.51842200756073, + "epoch": 0.9099144438852541, + "kl_loss": 0.18748492002487183, + "loss_ib": 0.004349789582192898, + "step": 3164 + }, + { + "ce_ib": 4.932372570037842, + "ce_orig": 1.048114538192749, + "epoch": 0.9099144438852541, + "kl_loss": 0.20769621431827545, + "loss_ib": 0.007009334396570921, + "step": 3164 + }, + { + "ce_ib": 2.5117714405059814, + "ce_orig": 0.624872088432312, + "epoch": 0.9099144438852541, + "kl_loss": 0.16266582906246185, + "loss_ib": 0.004138429649174213, + "step": 3164 + }, + { + "epoch": 0.9102020274642318, + "grad_norm": 0.13354536890983582, + "learning_rate": 8.173465986786018e-06, + "loss": 0.8563, + "step": 3165 + }, + { + "ce_ib": 4.021058082580566, + "ce_orig": 1.0106995105743408, + "epoch": 0.9102020274642318, + "kl_loss": 0.2775031626224518, + "loss_ib": 0.006796089466661215, + "step": 3165 + }, + { + "ce_ib": 3.0064802169799805, + "ce_orig": 0.470855176448822, + "epoch": 0.9102020274642318, + "kl_loss": 0.183244988322258, + "loss_ib": 0.004838930442929268, + "step": 3165 + }, + { + "ce_ib": 5.616123676300049, + "ce_orig": 1.13671875, + "epoch": 0.9102020274642318, + "kl_loss": 0.17052164673805237, + "loss_ib": 0.007321339566260576, + "step": 3165 + }, + { + "ce_ib": 3.3309109210968018, + "ce_orig": 0.4829893708229065, + "epoch": 0.9102020274642318, + "kl_loss": 0.2325412929058075, + "loss_ib": 0.005656323861330748, + "step": 3165 + }, + { + "ce_ib": 4.115614891052246, + "ce_orig": 0.7457357048988342, + "epoch": 0.9104896110432095, + "kl_loss": 0.18691715598106384, + "loss_ib": 0.005984786432236433, + "step": 3166 + }, + { + "ce_ib": 5.086359977722168, + "ce_orig": 1.0697160959243774, + "epoch": 0.9104896110432095, + "kl_loss": 0.20519685745239258, + "loss_ib": 0.007138328161090612, + "step": 3166 + }, + { + "ce_ib": 2.6616904735565186, + "ce_orig": 0.6974183320999146, + "epoch": 0.9104896110432095, + "kl_loss": 0.12159627676010132, + "loss_ib": 0.0038776530418545008, + "step": 3166 + }, + { + "ce_ib": 2.8673887252807617, + "ce_orig": 0.610319197177887, + "epoch": 0.9104896110432095, + "kl_loss": 0.2113325595855713, + "loss_ib": 0.004980714060366154, + "step": 3166 + }, + { + "ce_ib": 6.082167625427246, + "ce_orig": 1.275486946105957, + "epoch": 0.9107771946221871, + "kl_loss": 0.2809152603149414, + "loss_ib": 0.00889131985604763, + "step": 3167 + }, + { + "ce_ib": 2.1292428970336914, + "ce_orig": 0.34186869859695435, + "epoch": 0.9107771946221871, + "kl_loss": 0.09667559713125229, + "loss_ib": 0.0030959986615926027, + "step": 3167 + }, + { + "ce_ib": 1.4433305263519287, + "ce_orig": 0.2450559139251709, + "epoch": 0.9107771946221871, + "kl_loss": 0.41384899616241455, + "loss_ib": 0.005581820383667946, + "step": 3167 + }, + { + "ce_ib": 3.8459482192993164, + "ce_orig": 0.32628968358039856, + "epoch": 0.9107771946221871, + "kl_loss": 0.26200157403945923, + "loss_ib": 0.0064659640192985535, + "step": 3167 + }, + { + "ce_ib": 2.0838897228240967, + "ce_orig": 0.42857059836387634, + "epoch": 0.9110647782011647, + "kl_loss": 0.11008719354867935, + "loss_ib": 0.003184761619195342, + "step": 3168 + }, + { + "ce_ib": 2.7437517642974854, + "ce_orig": 0.7176569104194641, + "epoch": 0.9110647782011647, + "kl_loss": 0.21691718697547913, + "loss_ib": 0.00491292355582118, + "step": 3168 + }, + { + "ce_ib": 4.067457675933838, + "ce_orig": 0.7173943519592285, + "epoch": 0.9110647782011647, + "kl_loss": 0.23481887578964233, + "loss_ib": 0.006415646057575941, + "step": 3168 + }, + { + "ce_ib": 4.5279107093811035, + "ce_orig": 0.9445847868919373, + "epoch": 0.9110647782011647, + "kl_loss": 0.22493204474449158, + "loss_ib": 0.006777231115847826, + "step": 3168 + }, + { + "ce_ib": 3.3201186656951904, + "ce_orig": 0.8146684169769287, + "epoch": 0.9113523617801423, + "kl_loss": 0.14886169135570526, + "loss_ib": 0.004808735568076372, + "step": 3169 + }, + { + "ce_ib": 6.49231481552124, + "ce_orig": 1.7075914144515991, + "epoch": 0.9113523617801423, + "kl_loss": 0.19958128035068512, + "loss_ib": 0.008488127030432224, + "step": 3169 + }, + { + "ce_ib": 2.952949285507202, + "ce_orig": 0.3473901152610779, + "epoch": 0.9113523617801423, + "kl_loss": 0.21736979484558105, + "loss_ib": 0.005126647185534239, + "step": 3169 + }, + { + "ce_ib": 3.903048276901245, + "ce_orig": 0.8721669912338257, + "epoch": 0.9113523617801423, + "kl_loss": 0.1771429032087326, + "loss_ib": 0.0056744772009551525, + "step": 3169 + }, + { + "epoch": 0.91163994535912, + "grad_norm": 0.13759060204029083, + "learning_rate": 8.16746485806663e-06, + "loss": 0.8465, + "step": 3170 + }, + { + "ce_ib": 5.7211761474609375, + "ce_orig": 0.6818553805351257, + "epoch": 0.91163994535912, + "kl_loss": 0.29841044545173645, + "loss_ib": 0.008705280721187592, + "step": 3170 + }, + { + "ce_ib": 4.921578884124756, + "ce_orig": 1.3880023956298828, + "epoch": 0.91163994535912, + "kl_loss": 0.19139136373996735, + "loss_ib": 0.006835492793470621, + "step": 3170 + }, + { + "ce_ib": 5.213901519775391, + "ce_orig": 0.6756126880645752, + "epoch": 0.91163994535912, + "kl_loss": 0.29691457748413086, + "loss_ib": 0.00818304717540741, + "step": 3170 + }, + { + "ce_ib": 1.678503155708313, + "ce_orig": 0.5843238234519958, + "epoch": 0.91163994535912, + "kl_loss": 0.12338095903396606, + "loss_ib": 0.0029123127460479736, + "step": 3170 + }, + { + "ce_ib": 0.9010490775108337, + "ce_orig": 0.2690688371658325, + "epoch": 0.9119275289380976, + "kl_loss": 0.2951802611351013, + "loss_ib": 0.003852851688861847, + "step": 3171 + }, + { + "ce_ib": 5.655467987060547, + "ce_orig": 1.4645429849624634, + "epoch": 0.9119275289380976, + "kl_loss": 0.1574726551771164, + "loss_ib": 0.007230194751173258, + "step": 3171 + }, + { + "ce_ib": 6.127498626708984, + "ce_orig": 1.1911004781723022, + "epoch": 0.9119275289380976, + "kl_loss": 0.3076305091381073, + "loss_ib": 0.009203803725540638, + "step": 3171 + }, + { + "ce_ib": 4.247105598449707, + "ce_orig": 1.0036627054214478, + "epoch": 0.9119275289380976, + "kl_loss": 0.16630221903324127, + "loss_ib": 0.0059101274237036705, + "step": 3171 + }, + { + "ce_ib": 6.034453868865967, + "ce_orig": 1.309191107749939, + "epoch": 0.9122151125170753, + "kl_loss": 0.13417139649391174, + "loss_ib": 0.007376167457550764, + "step": 3172 + }, + { + "ce_ib": 3.3451476097106934, + "ce_orig": 0.7441681027412415, + "epoch": 0.9122151125170753, + "kl_loss": 0.2218714952468872, + "loss_ib": 0.005563862156122923, + "step": 3172 + }, + { + "ce_ib": 3.4729855060577393, + "ce_orig": 0.7385424971580505, + "epoch": 0.9122151125170753, + "kl_loss": 0.16045445203781128, + "loss_ib": 0.005077530164271593, + "step": 3172 + }, + { + "ce_ib": 2.570274829864502, + "ce_orig": 0.6625430583953857, + "epoch": 0.9122151125170753, + "kl_loss": 0.4872126579284668, + "loss_ib": 0.007442401256412268, + "step": 3172 + }, + { + "ce_ib": 3.54946231842041, + "ce_orig": 0.4175262451171875, + "epoch": 0.912502696096053, + "kl_loss": 0.2952011227607727, + "loss_ib": 0.006501473020762205, + "step": 3173 + }, + { + "ce_ib": 3.6912431716918945, + "ce_orig": 1.010550618171692, + "epoch": 0.912502696096053, + "kl_loss": 0.17737649381160736, + "loss_ib": 0.0054650078527629375, + "step": 3173 + }, + { + "ce_ib": 3.1009128093719482, + "ce_orig": 0.8131273984909058, + "epoch": 0.912502696096053, + "kl_loss": 0.1539348065853119, + "loss_ib": 0.004640260711312294, + "step": 3173 + }, + { + "ce_ib": 3.534210205078125, + "ce_orig": 0.6627948880195618, + "epoch": 0.912502696096053, + "kl_loss": 0.16842088103294373, + "loss_ib": 0.0052184187807142735, + "step": 3173 + }, + { + "ce_ib": 5.393845558166504, + "ce_orig": 0.746332585811615, + "epoch": 0.9127902796750306, + "kl_loss": 0.3136005401611328, + "loss_ib": 0.008529850281774998, + "step": 3174 + }, + { + "ce_ib": 2.5881028175354004, + "ce_orig": 0.5411162376403809, + "epoch": 0.9127902796750306, + "kl_loss": 0.18541929125785828, + "loss_ib": 0.004442295525223017, + "step": 3174 + }, + { + "ce_ib": 2.253849983215332, + "ce_orig": 0.21803267300128937, + "epoch": 0.9127902796750306, + "kl_loss": 0.1698969006538391, + "loss_ib": 0.003952818922698498, + "step": 3174 + }, + { + "ce_ib": 2.516026020050049, + "ce_orig": 0.6750215291976929, + "epoch": 0.9127902796750306, + "kl_loss": 0.16304582357406616, + "loss_ib": 0.004146484192460775, + "step": 3174 + }, + { + "epoch": 0.9130778632540082, + "grad_norm": 0.1296987384557724, + "learning_rate": 8.161456098189212e-06, + "loss": 0.8758, + "step": 3175 + }, + { + "ce_ib": 1.8355292081832886, + "ce_orig": 0.4957326650619507, + "epoch": 0.9130778632540082, + "kl_loss": 0.111463263630867, + "loss_ib": 0.0029501616954803467, + "step": 3175 + }, + { + "ce_ib": 2.6800174713134766, + "ce_orig": 0.4688003957271576, + "epoch": 0.9130778632540082, + "kl_loss": 0.16420257091522217, + "loss_ib": 0.004322043154388666, + "step": 3175 + }, + { + "ce_ib": 3.589418888092041, + "ce_orig": 0.5287401676177979, + "epoch": 0.9130778632540082, + "kl_loss": 0.3120400905609131, + "loss_ib": 0.006709819659590721, + "step": 3175 + }, + { + "ce_ib": 3.505120038986206, + "ce_orig": 0.51618891954422, + "epoch": 0.9130778632540082, + "kl_loss": 0.23527932167053223, + "loss_ib": 0.005857913289219141, + "step": 3175 + }, + { + "ce_ib": 3.9760324954986572, + "ce_orig": 0.9900290966033936, + "epoch": 0.9133654468329858, + "kl_loss": 0.22488123178482056, + "loss_ib": 0.006224844139069319, + "step": 3176 + }, + { + "ce_ib": 3.7154273986816406, + "ce_orig": 0.8207109570503235, + "epoch": 0.9133654468329858, + "kl_loss": 0.2336234152317047, + "loss_ib": 0.006051661912351847, + "step": 3176 + }, + { + "ce_ib": 5.548839569091797, + "ce_orig": 1.0677050352096558, + "epoch": 0.9133654468329858, + "kl_loss": 0.22162145376205444, + "loss_ib": 0.0077650537714362144, + "step": 3176 + }, + { + "ce_ib": 4.3213043212890625, + "ce_orig": 0.9794013500213623, + "epoch": 0.9133654468329858, + "kl_loss": 0.2075313925743103, + "loss_ib": 0.006396617740392685, + "step": 3176 + }, + { + "ce_ib": 4.170251369476318, + "ce_orig": 0.8730517029762268, + "epoch": 0.9136530304119634, + "kl_loss": 0.14822429418563843, + "loss_ib": 0.005652494262903929, + "step": 3177 + }, + { + "ce_ib": 2.874642848968506, + "ce_orig": 0.7000548839569092, + "epoch": 0.9136530304119634, + "kl_loss": 0.1353777050971985, + "loss_ib": 0.004228420089930296, + "step": 3177 + }, + { + "ce_ib": 2.3247525691986084, + "ce_orig": 0.6125262379646301, + "epoch": 0.9136530304119634, + "kl_loss": 0.14757561683654785, + "loss_ib": 0.003800508799031377, + "step": 3177 + }, + { + "ce_ib": 2.811420440673828, + "ce_orig": 0.5774731040000916, + "epoch": 0.9136530304119634, + "kl_loss": 0.12930721044540405, + "loss_ib": 0.004104492720216513, + "step": 3177 + }, + { + "ce_ib": 5.083086013793945, + "ce_orig": 1.1634904146194458, + "epoch": 0.9139406139909411, + "kl_loss": 0.1495000422000885, + "loss_ib": 0.006578086409717798, + "step": 3178 + }, + { + "ce_ib": 1.4915516376495361, + "ce_orig": 0.38907819986343384, + "epoch": 0.9139406139909411, + "kl_loss": 0.23369920253753662, + "loss_ib": 0.0038285437040030956, + "step": 3178 + }, + { + "ce_ib": 3.377516746520996, + "ce_orig": 0.4128628075122833, + "epoch": 0.9139406139909411, + "kl_loss": 0.17179487645626068, + "loss_ib": 0.005095465574413538, + "step": 3178 + }, + { + "ce_ib": 2.4665188789367676, + "ce_orig": 0.5333656668663025, + "epoch": 0.9139406139909411, + "kl_loss": 0.10329265147447586, + "loss_ib": 0.0034994452726095915, + "step": 3178 + }, + { + "ce_ib": 4.6673712730407715, + "ce_orig": 0.9231456518173218, + "epoch": 0.9142281975699188, + "kl_loss": 0.21021759510040283, + "loss_ib": 0.006769547238945961, + "step": 3179 + }, + { + "ce_ib": 4.41593599319458, + "ce_orig": 0.7892795205116272, + "epoch": 0.9142281975699188, + "kl_loss": 0.21803030371665955, + "loss_ib": 0.006596239283680916, + "step": 3179 + }, + { + "ce_ib": 3.5803112983703613, + "ce_orig": 0.484737366437912, + "epoch": 0.9142281975699188, + "kl_loss": 0.23752586543560028, + "loss_ib": 0.0059555694460868835, + "step": 3179 + }, + { + "ce_ib": 4.6087541580200195, + "ce_orig": 0.792738139629364, + "epoch": 0.9142281975699188, + "kl_loss": 0.15497587621212006, + "loss_ib": 0.0061585125513374805, + "step": 3179 + }, + { + "epoch": 0.9145157811488964, + "grad_norm": 0.14268086850643158, + "learning_rate": 8.155439721630265e-06, + "loss": 0.8018, + "step": 3180 + }, + { + "ce_ib": 2.666445255279541, + "ce_orig": 0.6924548149108887, + "epoch": 0.9145157811488964, + "kl_loss": 0.13076114654541016, + "loss_ib": 0.003974056802690029, + "step": 3180 + }, + { + "ce_ib": 3.617879867553711, + "ce_orig": 0.8236595392227173, + "epoch": 0.9145157811488964, + "kl_loss": 0.18551631271839142, + "loss_ib": 0.00547304330393672, + "step": 3180 + }, + { + "ce_ib": 3.5022506713867188, + "ce_orig": 0.9172071218490601, + "epoch": 0.9145157811488964, + "kl_loss": 0.15370741486549377, + "loss_ib": 0.005039324518293142, + "step": 3180 + }, + { + "ce_ib": 4.698860168457031, + "ce_orig": 1.2755699157714844, + "epoch": 0.9145157811488964, + "kl_loss": 0.15975219011306763, + "loss_ib": 0.0062963818199932575, + "step": 3180 + }, + { + "ce_ib": 4.820138454437256, + "ce_orig": 1.1961218118667603, + "epoch": 0.914803364727874, + "kl_loss": 0.19059348106384277, + "loss_ib": 0.0067260731011629105, + "step": 3181 + }, + { + "ce_ib": 2.873868465423584, + "ce_orig": 0.738355278968811, + "epoch": 0.914803364727874, + "kl_loss": 0.1164512038230896, + "loss_ib": 0.004038380458950996, + "step": 3181 + }, + { + "ce_ib": 4.381678581237793, + "ce_orig": 1.0426833629608154, + "epoch": 0.914803364727874, + "kl_loss": 0.18962354958057404, + "loss_ib": 0.006277913693338633, + "step": 3181 + }, + { + "ce_ib": 5.7370734214782715, + "ce_orig": 1.4122097492218018, + "epoch": 0.914803364727874, + "kl_loss": 0.15359631180763245, + "loss_ib": 0.007273036055266857, + "step": 3181 + }, + { + "ce_ib": 3.974966049194336, + "ce_orig": 0.4201129376888275, + "epoch": 0.9150909483068517, + "kl_loss": 0.1857951432466507, + "loss_ib": 0.005832917522639036, + "step": 3182 + }, + { + "ce_ib": 2.0708227157592773, + "ce_orig": 0.43078872561454773, + "epoch": 0.9150909483068517, + "kl_loss": 0.6093254685401917, + "loss_ib": 0.008164077065885067, + "step": 3182 + }, + { + "ce_ib": 4.2253923416137695, + "ce_orig": 0.7263680696487427, + "epoch": 0.9150909483068517, + "kl_loss": 0.2116239368915558, + "loss_ib": 0.006341631058603525, + "step": 3182 + }, + { + "ce_ib": 4.174966335296631, + "ce_orig": 0.9983445405960083, + "epoch": 0.9150909483068517, + "kl_loss": 0.2123318910598755, + "loss_ib": 0.0062982854433357716, + "step": 3182 + }, + { + "ce_ib": 3.0793190002441406, + "ce_orig": 0.5075044631958008, + "epoch": 0.9153785318858293, + "kl_loss": 0.1368299126625061, + "loss_ib": 0.004447618033736944, + "step": 3183 + }, + { + "ce_ib": 2.5126235485076904, + "ce_orig": 0.5631418824195862, + "epoch": 0.9153785318858293, + "kl_loss": 0.20451705157756805, + "loss_ib": 0.00455779442563653, + "step": 3183 + }, + { + "ce_ib": 3.5910816192626953, + "ce_orig": 0.7243965864181519, + "epoch": 0.9153785318858293, + "kl_loss": 0.16952157020568848, + "loss_ib": 0.005286297760903835, + "step": 3183 + }, + { + "ce_ib": 4.13463020324707, + "ce_orig": 0.9687248468399048, + "epoch": 0.9153785318858293, + "kl_loss": 0.20430293679237366, + "loss_ib": 0.006177659612149, + "step": 3183 + }, + { + "ce_ib": 3.7367217540740967, + "ce_orig": 0.5525069832801819, + "epoch": 0.9156661154648069, + "kl_loss": 0.22476978600025177, + "loss_ib": 0.0059844194911420345, + "step": 3184 + }, + { + "ce_ib": 4.478357315063477, + "ce_orig": 1.0377490520477295, + "epoch": 0.9156661154648069, + "kl_loss": 0.10645167529582977, + "loss_ib": 0.005542874336242676, + "step": 3184 + }, + { + "ce_ib": 3.4153854846954346, + "ce_orig": 0.8960189819335938, + "epoch": 0.9156661154648069, + "kl_loss": 0.20002348721027374, + "loss_ib": 0.005415620282292366, + "step": 3184 + }, + { + "ce_ib": 4.2203369140625, + "ce_orig": 0.8467499613761902, + "epoch": 0.9156661154648069, + "kl_loss": 0.1497737467288971, + "loss_ib": 0.005718074273318052, + "step": 3184 + }, + { + "epoch": 0.9159536990437847, + "grad_norm": 0.1643574833869934, + "learning_rate": 8.149415742884635e-06, + "loss": 0.8509, + "step": 3185 + }, + { + "ce_ib": 3.2262158393859863, + "ce_orig": 0.5925186276435852, + "epoch": 0.9159536990437847, + "kl_loss": 0.30362242460250854, + "loss_ib": 0.006262439768761396, + "step": 3185 + }, + { + "ce_ib": 2.4640753269195557, + "ce_orig": 0.6532748341560364, + "epoch": 0.9159536990437847, + "kl_loss": 0.16304340958595276, + "loss_ib": 0.004094509407877922, + "step": 3185 + }, + { + "ce_ib": 4.579474925994873, + "ce_orig": 0.9320160746574402, + "epoch": 0.9159536990437847, + "kl_loss": 0.22225144505500793, + "loss_ib": 0.006801988929510117, + "step": 3185 + }, + { + "ce_ib": 3.441133499145508, + "ce_orig": 0.8192372918128967, + "epoch": 0.9159536990437847, + "kl_loss": 0.1319490373134613, + "loss_ib": 0.004760623909533024, + "step": 3185 + }, + { + "ce_ib": 3.6897976398468018, + "ce_orig": 0.9002533555030823, + "epoch": 0.9162412826227623, + "kl_loss": 0.1492660492658615, + "loss_ib": 0.005182458087801933, + "step": 3186 + }, + { + "ce_ib": 5.220062255859375, + "ce_orig": 1.3011032342910767, + "epoch": 0.9162412826227623, + "kl_loss": 0.16618946194648743, + "loss_ib": 0.006881956942379475, + "step": 3186 + }, + { + "ce_ib": 3.7954814434051514, + "ce_orig": 0.8443017601966858, + "epoch": 0.9162412826227623, + "kl_loss": 0.1520317792892456, + "loss_ib": 0.005315799731761217, + "step": 3186 + }, + { + "ce_ib": 3.9846107959747314, + "ce_orig": 0.9226762056350708, + "epoch": 0.9162412826227623, + "kl_loss": 0.16926637291908264, + "loss_ib": 0.00567727442830801, + "step": 3186 + }, + { + "ce_ib": 4.859255790710449, + "ce_orig": 1.1067121028900146, + "epoch": 0.9165288662017399, + "kl_loss": 0.2464735209941864, + "loss_ib": 0.0073239910416305065, + "step": 3187 + }, + { + "ce_ib": 4.598275661468506, + "ce_orig": 0.9337934255599976, + "epoch": 0.9165288662017399, + "kl_loss": 0.18947140872478485, + "loss_ib": 0.006492989603430033, + "step": 3187 + }, + { + "ce_ib": 3.4599218368530273, + "ce_orig": 0.8806605935096741, + "epoch": 0.9165288662017399, + "kl_loss": 0.1725182682275772, + "loss_ib": 0.005185104440897703, + "step": 3187 + }, + { + "ce_ib": 6.574446678161621, + "ce_orig": 1.4797084331512451, + "epoch": 0.9165288662017399, + "kl_loss": 0.2742977738380432, + "loss_ib": 0.009317424148321152, + "step": 3187 + }, + { + "ce_ib": 5.582188606262207, + "ce_orig": 1.3584535121917725, + "epoch": 0.9168164497807175, + "kl_loss": 0.23170803487300873, + "loss_ib": 0.007899269461631775, + "step": 3188 + }, + { + "ce_ib": 4.564296722412109, + "ce_orig": 0.9914824962615967, + "epoch": 0.9168164497807175, + "kl_loss": 0.2234863042831421, + "loss_ib": 0.006799160037189722, + "step": 3188 + }, + { + "ce_ib": 3.517049789428711, + "ce_orig": 0.7064523100852966, + "epoch": 0.9168164497807175, + "kl_loss": 0.1805395781993866, + "loss_ib": 0.005322445649653673, + "step": 3188 + }, + { + "ce_ib": 2.109410524368286, + "ce_orig": 0.5606645941734314, + "epoch": 0.9168164497807175, + "kl_loss": 0.12701274454593658, + "loss_ib": 0.003379537956789136, + "step": 3188 + }, + { + "ce_ib": 2.9838778972625732, + "ce_orig": 0.5186296105384827, + "epoch": 0.9171040333596951, + "kl_loss": 0.13323411345481873, + "loss_ib": 0.004316219128668308, + "step": 3189 + }, + { + "ce_ib": 7.9756574630737305, + "ce_orig": 2.1652863025665283, + "epoch": 0.9171040333596951, + "kl_loss": 0.22363796830177307, + "loss_ib": 0.010212037712335587, + "step": 3189 + }, + { + "ce_ib": 6.14412784576416, + "ce_orig": 1.5596544742584229, + "epoch": 0.9171040333596951, + "kl_loss": 0.26990243792533875, + "loss_ib": 0.008843152783811092, + "step": 3189 + }, + { + "ce_ib": 0.8038741946220398, + "ce_orig": 0.17762021720409393, + "epoch": 0.9171040333596951, + "kl_loss": 0.25454792380332947, + "loss_ib": 0.003349353326484561, + "step": 3189 + }, + { + "epoch": 0.9173916169386728, + "grad_norm": 0.15201786160469055, + "learning_rate": 8.143384176465486e-06, + "loss": 0.8625, + "step": 3190 + }, + { + "ce_ib": 3.90012788772583, + "ce_orig": 0.5920072197914124, + "epoch": 0.9173916169386728, + "kl_loss": 0.20520135760307312, + "loss_ib": 0.005952141247689724, + "step": 3190 + }, + { + "ce_ib": 1.7243776321411133, + "ce_orig": 0.538148820400238, + "epoch": 0.9173916169386728, + "kl_loss": 0.15815015137195587, + "loss_ib": 0.0033058791887015104, + "step": 3190 + }, + { + "ce_ib": 2.385017156600952, + "ce_orig": 0.5080032348632812, + "epoch": 0.9173916169386728, + "kl_loss": 0.17918387055397034, + "loss_ib": 0.004176855552941561, + "step": 3190 + }, + { + "ce_ib": 5.12669038772583, + "ce_orig": 0.8422667980194092, + "epoch": 0.9173916169386728, + "kl_loss": 0.21997302770614624, + "loss_ib": 0.007326420396566391, + "step": 3190 + }, + { + "ce_ib": 5.192967891693115, + "ce_orig": 1.1661471128463745, + "epoch": 0.9176792005176504, + "kl_loss": 0.23560217022895813, + "loss_ib": 0.007548990193754435, + "step": 3191 + }, + { + "ce_ib": 3.617347478866577, + "ce_orig": 0.7921478152275085, + "epoch": 0.9176792005176504, + "kl_loss": 0.13622410595417023, + "loss_ib": 0.004979588091373444, + "step": 3191 + }, + { + "ce_ib": 4.795959949493408, + "ce_orig": 1.0611555576324463, + "epoch": 0.9176792005176504, + "kl_loss": 0.14636006951332092, + "loss_ib": 0.006259560585021973, + "step": 3191 + }, + { + "ce_ib": 5.522512912750244, + "ce_orig": 1.141659140586853, + "epoch": 0.9176792005176504, + "kl_loss": 0.16574549674987793, + "loss_ib": 0.0071799675934016705, + "step": 3191 + }, + { + "ce_ib": 3.547783374786377, + "ce_orig": 0.6776642203330994, + "epoch": 0.9179667840966281, + "kl_loss": 0.20387491583824158, + "loss_ib": 0.005586532410234213, + "step": 3192 + }, + { + "ce_ib": 3.4045655727386475, + "ce_orig": 0.746738612651825, + "epoch": 0.9179667840966281, + "kl_loss": 0.2359456717967987, + "loss_ib": 0.005764022469520569, + "step": 3192 + }, + { + "ce_ib": 4.755576133728027, + "ce_orig": 1.4648209810256958, + "epoch": 0.9179667840966281, + "kl_loss": 0.13741134107112885, + "loss_ib": 0.006129689514636993, + "step": 3192 + }, + { + "ce_ib": 3.4755797386169434, + "ce_orig": 0.6927435994148254, + "epoch": 0.9179667840966281, + "kl_loss": 0.2768542468547821, + "loss_ib": 0.006244122050702572, + "step": 3192 + }, + { + "ce_ib": 4.507148742675781, + "ce_orig": 0.9875214695930481, + "epoch": 0.9182543676756058, + "kl_loss": 0.18337717652320862, + "loss_ib": 0.006340920925140381, + "step": 3193 + }, + { + "ce_ib": 2.658722162246704, + "ce_orig": 0.728195309638977, + "epoch": 0.9182543676756058, + "kl_loss": 0.10772977024316788, + "loss_ib": 0.0037360196001827717, + "step": 3193 + }, + { + "ce_ib": 1.7891353368759155, + "ce_orig": 0.4403286874294281, + "epoch": 0.9182543676756058, + "kl_loss": 0.1658402383327484, + "loss_ib": 0.0034475375432521105, + "step": 3193 + }, + { + "ce_ib": 4.291409015655518, + "ce_orig": 0.8181302547454834, + "epoch": 0.9182543676756058, + "kl_loss": 0.19503021240234375, + "loss_ib": 0.00624171132221818, + "step": 3193 + }, + { + "ce_ib": 3.1194746494293213, + "ce_orig": 0.7054705619812012, + "epoch": 0.9185419512545834, + "kl_loss": 0.1422164887189865, + "loss_ib": 0.00454163970425725, + "step": 3194 + }, + { + "ce_ib": 4.6737260818481445, + "ce_orig": 0.9160136580467224, + "epoch": 0.9185419512545834, + "kl_loss": 0.1907942295074463, + "loss_ib": 0.006581668742001057, + "step": 3194 + }, + { + "ce_ib": 6.9012980461120605, + "ce_orig": 0.6364160180091858, + "epoch": 0.9185419512545834, + "kl_loss": 0.14408214390277863, + "loss_ib": 0.008342118933796883, + "step": 3194 + }, + { + "ce_ib": 5.522176742553711, + "ce_orig": 0.581107497215271, + "epoch": 0.9185419512545834, + "kl_loss": 0.32057589292526245, + "loss_ib": 0.008727935142815113, + "step": 3194 + }, + { + "epoch": 0.918829534833561, + "grad_norm": 0.11802197992801666, + "learning_rate": 8.13734503690426e-06, + "loss": 0.8157, + "step": 3195 + }, + { + "ce_ib": 2.742060661315918, + "ce_orig": 0.6172852516174316, + "epoch": 0.918829534833561, + "kl_loss": 0.24319706857204437, + "loss_ib": 0.005174031015485525, + "step": 3195 + }, + { + "ce_ib": 4.288417816162109, + "ce_orig": 0.7603237628936768, + "epoch": 0.918829534833561, + "kl_loss": 0.24504373967647552, + "loss_ib": 0.006738855037838221, + "step": 3195 + }, + { + "ce_ib": 3.3916869163513184, + "ce_orig": 0.8422545194625854, + "epoch": 0.918829534833561, + "kl_loss": 0.14094382524490356, + "loss_ib": 0.004801125265657902, + "step": 3195 + }, + { + "ce_ib": 4.949401378631592, + "ce_orig": 1.1679590940475464, + "epoch": 0.918829534833561, + "kl_loss": 0.15415292978286743, + "loss_ib": 0.006490929983556271, + "step": 3195 + }, + { + "ce_ib": 4.1491265296936035, + "ce_orig": 0.8359330296516418, + "epoch": 0.9191171184125386, + "kl_loss": 0.24248024821281433, + "loss_ib": 0.006573928985744715, + "step": 3196 + }, + { + "ce_ib": 3.4604883193969727, + "ce_orig": 0.5962561368942261, + "epoch": 0.9191171184125386, + "kl_loss": 0.20298829674720764, + "loss_ib": 0.005490371026098728, + "step": 3196 + }, + { + "ce_ib": 4.135103702545166, + "ce_orig": 0.726915180683136, + "epoch": 0.9191171184125386, + "kl_loss": 0.22172319889068604, + "loss_ib": 0.0063523356802761555, + "step": 3196 + }, + { + "ce_ib": 1.9700264930725098, + "ce_orig": 0.4206306040287018, + "epoch": 0.9191171184125386, + "kl_loss": 0.18714338541030884, + "loss_ib": 0.0038414602167904377, + "step": 3196 + }, + { + "ce_ib": 4.537639617919922, + "ce_orig": 1.1116862297058105, + "epoch": 0.9194047019915162, + "kl_loss": 0.1622447967529297, + "loss_ib": 0.006160087417811155, + "step": 3197 + }, + { + "ce_ib": 0.4987223744392395, + "ce_orig": 0.12945619225502014, + "epoch": 0.9194047019915162, + "kl_loss": 0.2751041054725647, + "loss_ib": 0.003249763511121273, + "step": 3197 + }, + { + "ce_ib": 2.2097606658935547, + "ce_orig": 0.7727401852607727, + "epoch": 0.9194047019915162, + "kl_loss": 0.12998756766319275, + "loss_ib": 0.003509636502712965, + "step": 3197 + }, + { + "ce_ib": 8.806327819824219, + "ce_orig": 1.8315576314926147, + "epoch": 0.9194047019915162, + "kl_loss": 0.25920090079307556, + "loss_ib": 0.011398336850106716, + "step": 3197 + }, + { + "ce_ib": 4.372546672821045, + "ce_orig": 0.9090943336486816, + "epoch": 0.9196922855704939, + "kl_loss": 0.24109405279159546, + "loss_ib": 0.0067834872752428055, + "step": 3198 + }, + { + "ce_ib": 3.558494806289673, + "ce_orig": 0.7527180314064026, + "epoch": 0.9196922855704939, + "kl_loss": 0.12968450784683228, + "loss_ib": 0.004855339881032705, + "step": 3198 + }, + { + "ce_ib": 2.527456521987915, + "ce_orig": 0.8368136882781982, + "epoch": 0.9196922855704939, + "kl_loss": 0.0718817412853241, + "loss_ib": 0.003246273845434189, + "step": 3198 + }, + { + "ce_ib": 2.6810665130615234, + "ce_orig": 0.8855975866317749, + "epoch": 0.9196922855704939, + "kl_loss": 0.15533995628356934, + "loss_ib": 0.0042344662360847, + "step": 3198 + }, + { + "ce_ib": 2.0557162761688232, + "ce_orig": 0.5867295265197754, + "epoch": 0.9199798691494716, + "kl_loss": 0.08816453069448471, + "loss_ib": 0.0029373615980148315, + "step": 3199 + }, + { + "ce_ib": 3.3483781814575195, + "ce_orig": 0.7366922497749329, + "epoch": 0.9199798691494716, + "kl_loss": 0.18489044904708862, + "loss_ib": 0.0051972828805446625, + "step": 3199 + }, + { + "ce_ib": 3.792405605316162, + "ce_orig": 0.908549964427948, + "epoch": 0.9199798691494716, + "kl_loss": 0.18036945164203644, + "loss_ib": 0.005596099887043238, + "step": 3199 + }, + { + "ce_ib": 4.531116008758545, + "ce_orig": 0.926806628704071, + "epoch": 0.9199798691494716, + "kl_loss": 0.1685086339712143, + "loss_ib": 0.006216202396899462, + "step": 3199 + }, + { + "epoch": 0.9202674527284492, + "grad_norm": 0.1330055594444275, + "learning_rate": 8.131298338750648e-06, + "loss": 0.7955, + "step": 3200 + }, + { + "ce_ib": 5.507092475891113, + "ce_orig": 1.010513424873352, + "epoch": 0.9202674527284492, + "kl_loss": 0.21844618022441864, + "loss_ib": 0.007691554259508848, + "step": 3200 + }, + { + "ce_ib": 5.291164398193359, + "ce_orig": 1.1609742641448975, + "epoch": 0.9202674527284492, + "kl_loss": 0.1985095590353012, + "loss_ib": 0.007276260294020176, + "step": 3200 + }, + { + "ce_ib": 8.127470016479492, + "ce_orig": 1.8220096826553345, + "epoch": 0.9202674527284492, + "kl_loss": 0.22327309846878052, + "loss_ib": 0.010360199958086014, + "step": 3200 + }, + { + "ce_ib": 4.938416957855225, + "ce_orig": 0.9598167538642883, + "epoch": 0.9202674527284492, + "kl_loss": 0.2067001461982727, + "loss_ib": 0.007005418185144663, + "step": 3200 + }, + { + "ce_ib": 2.36796498298645, + "ce_orig": 0.5143000483512878, + "epoch": 0.9205550363074269, + "kl_loss": 0.23937149345874786, + "loss_ib": 0.0047616795636713505, + "step": 3201 + }, + { + "ce_ib": 5.4660162925720215, + "ce_orig": 1.151102066040039, + "epoch": 0.9205550363074269, + "kl_loss": 0.15663892030715942, + "loss_ib": 0.007032405585050583, + "step": 3201 + }, + { + "ce_ib": 2.7241005897521973, + "ce_orig": 0.6413788199424744, + "epoch": 0.9205550363074269, + "kl_loss": 0.18821561336517334, + "loss_ib": 0.004606256727129221, + "step": 3201 + }, + { + "ce_ib": 6.506221294403076, + "ce_orig": 1.4165716171264648, + "epoch": 0.9205550363074269, + "kl_loss": 0.1837748885154724, + "loss_ib": 0.008343970403075218, + "step": 3201 + }, + { + "ce_ib": 5.485610008239746, + "ce_orig": 1.0179758071899414, + "epoch": 0.9208426198864045, + "kl_loss": 0.23710842430591583, + "loss_ib": 0.007856694050133228, + "step": 3202 + }, + { + "ce_ib": 3.015568256378174, + "ce_orig": 0.5960516929626465, + "epoch": 0.9208426198864045, + "kl_loss": 0.14763376116752625, + "loss_ib": 0.004491905681788921, + "step": 3202 + }, + { + "ce_ib": 4.941378593444824, + "ce_orig": 0.9513983726501465, + "epoch": 0.9208426198864045, + "kl_loss": 0.1912161409854889, + "loss_ib": 0.00685353996232152, + "step": 3202 + }, + { + "ce_ib": 2.8677258491516113, + "ce_orig": 0.5599122047424316, + "epoch": 0.9208426198864045, + "kl_loss": 0.20740050077438354, + "loss_ib": 0.004941730760037899, + "step": 3202 + }, + { + "ce_ib": 5.08165979385376, + "ce_orig": 0.8865757584571838, + "epoch": 0.9211302034653821, + "kl_loss": 0.16577158868312836, + "loss_ib": 0.006739376112818718, + "step": 3203 + }, + { + "ce_ib": 2.3282454013824463, + "ce_orig": 0.45950227975845337, + "epoch": 0.9211302034653821, + "kl_loss": 0.15785665810108185, + "loss_ib": 0.0039068120531737804, + "step": 3203 + }, + { + "ce_ib": 6.575521945953369, + "ce_orig": 1.6462026834487915, + "epoch": 0.9211302034653821, + "kl_loss": 0.15392594039440155, + "loss_ib": 0.008114781230688095, + "step": 3203 + }, + { + "ce_ib": 3.1184468269348145, + "ce_orig": 0.8792256712913513, + "epoch": 0.9211302034653821, + "kl_loss": 0.15204721689224243, + "loss_ib": 0.004638918675482273, + "step": 3203 + }, + { + "ce_ib": 3.462387800216675, + "ce_orig": 0.92423415184021, + "epoch": 0.9214177870443597, + "kl_loss": 0.28931835293769836, + "loss_ib": 0.006355571560561657, + "step": 3204 + }, + { + "ce_ib": 3.827796697616577, + "ce_orig": 0.8718609809875488, + "epoch": 0.9214177870443597, + "kl_loss": 0.5838905572891235, + "loss_ib": 0.009666702710092068, + "step": 3204 + }, + { + "ce_ib": 3.747541666030884, + "ce_orig": 0.5636054277420044, + "epoch": 0.9214177870443597, + "kl_loss": 0.1556585729122162, + "loss_ib": 0.005304127000272274, + "step": 3204 + }, + { + "ce_ib": 3.1074423789978027, + "ce_orig": 0.71568363904953, + "epoch": 0.9214177870443597, + "kl_loss": 0.1393730640411377, + "loss_ib": 0.00450117327272892, + "step": 3204 + }, + { + "epoch": 0.9217053706233375, + "grad_norm": 0.146153524518013, + "learning_rate": 8.125244096572547e-06, + "loss": 0.8586, + "step": 3205 + }, + { + "ce_ib": 4.68765115737915, + "ce_orig": 1.1503430604934692, + "epoch": 0.9217053706233375, + "kl_loss": 0.36926037073135376, + "loss_ib": 0.008380254730582237, + "step": 3205 + }, + { + "ce_ib": 3.007138729095459, + "ce_orig": 0.7003147602081299, + "epoch": 0.9217053706233375, + "kl_loss": 0.18725204467773438, + "loss_ib": 0.004879659041762352, + "step": 3205 + }, + { + "ce_ib": 6.892640590667725, + "ce_orig": 1.7425334453582764, + "epoch": 0.9217053706233375, + "kl_loss": 0.18270626664161682, + "loss_ib": 0.008719703182578087, + "step": 3205 + }, + { + "ce_ib": 6.100396156311035, + "ce_orig": 1.2865290641784668, + "epoch": 0.9217053706233375, + "kl_loss": 0.14449042081832886, + "loss_ib": 0.007545300759375095, + "step": 3205 + }, + { + "ce_ib": 6.11977481842041, + "ce_orig": 1.6911790370941162, + "epoch": 0.9219929542023151, + "kl_loss": 0.3146817982196808, + "loss_ib": 0.009266593493521214, + "step": 3206 + }, + { + "ce_ib": 4.191370010375977, + "ce_orig": 0.6807392835617065, + "epoch": 0.9219929542023151, + "kl_loss": 0.2996433675289154, + "loss_ib": 0.007187803741544485, + "step": 3206 + }, + { + "ce_ib": 4.871065139770508, + "ce_orig": 0.9440940022468567, + "epoch": 0.9219929542023151, + "kl_loss": 0.17337435483932495, + "loss_ib": 0.006604808382689953, + "step": 3206 + }, + { + "ce_ib": 2.298759698867798, + "ce_orig": 0.7904884219169617, + "epoch": 0.9219929542023151, + "kl_loss": 0.10926514863967896, + "loss_ib": 0.003391411155462265, + "step": 3206 + }, + { + "ce_ib": 5.028500080108643, + "ce_orig": 0.9883468747138977, + "epoch": 0.9222805377812927, + "kl_loss": 0.22988903522491455, + "loss_ib": 0.007327390369027853, + "step": 3207 + }, + { + "ce_ib": 2.362541913986206, + "ce_orig": 0.10036589205265045, + "epoch": 0.9222805377812927, + "kl_loss": 0.45887401700019836, + "loss_ib": 0.006951281800866127, + "step": 3207 + }, + { + "ce_ib": 4.4783220291137695, + "ce_orig": 1.1613068580627441, + "epoch": 0.9222805377812927, + "kl_loss": 0.21193280816078186, + "loss_ib": 0.006597649771720171, + "step": 3207 + }, + { + "ce_ib": 8.385294914245605, + "ce_orig": 1.9127928018569946, + "epoch": 0.9222805377812927, + "kl_loss": 0.15511684119701385, + "loss_ib": 0.00993646401911974, + "step": 3207 + }, + { + "ce_ib": 4.993514060974121, + "ce_orig": 0.7278662919998169, + "epoch": 0.9225681213602703, + "kl_loss": 0.21042567491531372, + "loss_ib": 0.0070977709256112576, + "step": 3208 + }, + { + "ce_ib": 4.058452606201172, + "ce_orig": 0.9675374627113342, + "epoch": 0.9225681213602703, + "kl_loss": 0.11902497708797455, + "loss_ib": 0.005248702131211758, + "step": 3208 + }, + { + "ce_ib": 3.4533960819244385, + "ce_orig": 1.17996084690094, + "epoch": 0.9225681213602703, + "kl_loss": 0.18029114603996277, + "loss_ib": 0.005256307777017355, + "step": 3208 + }, + { + "ce_ib": 4.274377346038818, + "ce_orig": 0.8098840713500977, + "epoch": 0.9225681213602703, + "kl_loss": 0.24018403887748718, + "loss_ib": 0.006676217541098595, + "step": 3208 + }, + { + "ce_ib": 2.4826157093048096, + "ce_orig": 0.4721343219280243, + "epoch": 0.922855704939248, + "kl_loss": 0.1822255551815033, + "loss_ib": 0.004304870963096619, + "step": 3209 + }, + { + "ce_ib": 5.574870586395264, + "ce_orig": 0.7712216973304749, + "epoch": 0.922855704939248, + "kl_loss": 0.4264765679836273, + "loss_ib": 0.009839636273682117, + "step": 3209 + }, + { + "ce_ib": 3.303083896636963, + "ce_orig": 0.803773820400238, + "epoch": 0.922855704939248, + "kl_loss": 0.17856857180595398, + "loss_ib": 0.00508876983076334, + "step": 3209 + }, + { + "ce_ib": 4.0008039474487305, + "ce_orig": 0.8426457047462463, + "epoch": 0.922855704939248, + "kl_loss": 0.130147784948349, + "loss_ib": 0.005302282050251961, + "step": 3209 + }, + { + "epoch": 0.9231432885182256, + "grad_norm": 0.13095571100711823, + "learning_rate": 8.119182324956034e-06, + "loss": 0.9231, + "step": 3210 + }, + { + "ce_ib": 7.459493637084961, + "ce_orig": 1.3455088138580322, + "epoch": 0.9231432885182256, + "kl_loss": 0.2336563766002655, + "loss_ib": 0.00979605782777071, + "step": 3210 + }, + { + "ce_ib": 0.968967080116272, + "ce_orig": 0.26264092326164246, + "epoch": 0.9231432885182256, + "kl_loss": 0.4228881895542145, + "loss_ib": 0.005197849124670029, + "step": 3210 + }, + { + "ce_ib": 2.6603574752807617, + "ce_orig": 0.47758063673973083, + "epoch": 0.9231432885182256, + "kl_loss": 0.17875249683856964, + "loss_ib": 0.004447882529348135, + "step": 3210 + }, + { + "ce_ib": 2.9088869094848633, + "ce_orig": 0.5862252116203308, + "epoch": 0.9231432885182256, + "kl_loss": 0.18955257534980774, + "loss_ib": 0.004804412834346294, + "step": 3210 + }, + { + "ce_ib": 4.2047505378723145, + "ce_orig": 0.9030767679214478, + "epoch": 0.9234308720972032, + "kl_loss": 0.1159481480717659, + "loss_ib": 0.005364231765270233, + "step": 3211 + }, + { + "ce_ib": 5.323775768280029, + "ce_orig": 0.919324517250061, + "epoch": 0.9234308720972032, + "kl_loss": 0.19747988879680634, + "loss_ib": 0.007298574782907963, + "step": 3211 + }, + { + "ce_ib": 3.454789161682129, + "ce_orig": 0.6511629819869995, + "epoch": 0.9234308720972032, + "kl_loss": 0.32005831599235535, + "loss_ib": 0.006655372213572264, + "step": 3211 + }, + { + "ce_ib": 3.8185055255889893, + "ce_orig": 0.4161761403083801, + "epoch": 0.9234308720972032, + "kl_loss": 0.20959071815013885, + "loss_ib": 0.005914412438869476, + "step": 3211 + }, + { + "ce_ib": 4.71874475479126, + "ce_orig": 0.7539423108100891, + "epoch": 0.9237184556761809, + "kl_loss": 0.19033768773078918, + "loss_ib": 0.006622121203690767, + "step": 3212 + }, + { + "ce_ib": 6.8177313804626465, + "ce_orig": 1.787329077720642, + "epoch": 0.9237184556761809, + "kl_loss": 0.10282856971025467, + "loss_ib": 0.007846016436815262, + "step": 3212 + }, + { + "ce_ib": 2.5719454288482666, + "ce_orig": 0.7123086452484131, + "epoch": 0.9237184556761809, + "kl_loss": 0.11980713158845901, + "loss_ib": 0.003770016599446535, + "step": 3212 + }, + { + "ce_ib": 3.355541229248047, + "ce_orig": 0.7486499547958374, + "epoch": 0.9237184556761809, + "kl_loss": 0.19155383110046387, + "loss_ib": 0.005271079484373331, + "step": 3212 + }, + { + "ce_ib": 3.6997666358947754, + "ce_orig": 0.5144417881965637, + "epoch": 0.9240060392551586, + "kl_loss": 0.19984674453735352, + "loss_ib": 0.0056982338428497314, + "step": 3213 + }, + { + "ce_ib": 3.1016879081726074, + "ce_orig": 0.6506905555725098, + "epoch": 0.9240060392551586, + "kl_loss": 0.16427451372146606, + "loss_ib": 0.004744432866573334, + "step": 3213 + }, + { + "ce_ib": 4.645236492156982, + "ce_orig": 0.9630134701728821, + "epoch": 0.9240060392551586, + "kl_loss": 0.14241835474967957, + "loss_ib": 0.006069419905543327, + "step": 3213 + }, + { + "ce_ib": 4.379214286804199, + "ce_orig": 0.9655337929725647, + "epoch": 0.9240060392551586, + "kl_loss": 0.2690991163253784, + "loss_ib": 0.007070205174386501, + "step": 3213 + }, + { + "ce_ib": 4.382303714752197, + "ce_orig": 1.0550248622894287, + "epoch": 0.9242936228341362, + "kl_loss": 0.4482329189777374, + "loss_ib": 0.008864632807672024, + "step": 3214 + }, + { + "ce_ib": 5.094371795654297, + "ce_orig": 0.8925375938415527, + "epoch": 0.9242936228341362, + "kl_loss": 0.1671142578125, + "loss_ib": 0.006765514612197876, + "step": 3214 + }, + { + "ce_ib": 5.375914573669434, + "ce_orig": 1.0704600811004639, + "epoch": 0.9242936228341362, + "kl_loss": 0.20599019527435303, + "loss_ib": 0.007435816340148449, + "step": 3214 + }, + { + "ce_ib": 2.073961019515991, + "ce_orig": 0.4821406304836273, + "epoch": 0.9242936228341362, + "kl_loss": 0.2675272822380066, + "loss_ib": 0.0047492338344454765, + "step": 3214 + }, + { + "epoch": 0.9245812064131138, + "grad_norm": 0.12218322604894638, + "learning_rate": 8.11311303850532e-06, + "loss": 0.8505, + "step": 3215 + }, + { + "ce_ib": 4.931778430938721, + "ce_orig": 1.309908151626587, + "epoch": 0.9245812064131138, + "kl_loss": 0.13766621053218842, + "loss_ib": 0.006308440584689379, + "step": 3215 + }, + { + "ce_ib": 2.554455280303955, + "ce_orig": 0.6834701895713806, + "epoch": 0.9245812064131138, + "kl_loss": 0.16669416427612305, + "loss_ib": 0.004221396986395121, + "step": 3215 + }, + { + "ce_ib": 4.203612804412842, + "ce_orig": 0.8647849559783936, + "epoch": 0.9245812064131138, + "kl_loss": 0.11569546163082123, + "loss_ib": 0.0053605674766004086, + "step": 3215 + }, + { + "ce_ib": 5.096341609954834, + "ce_orig": 1.1856518983840942, + "epoch": 0.9245812064131138, + "kl_loss": 0.28475117683410645, + "loss_ib": 0.007943853735923767, + "step": 3215 + }, + { + "ce_ib": 5.1697282791137695, + "ce_orig": 1.1183068752288818, + "epoch": 0.9248687899920914, + "kl_loss": 0.24185797572135925, + "loss_ib": 0.007588307838886976, + "step": 3216 + }, + { + "ce_ib": 4.4191789627075195, + "ce_orig": 0.8986616730690002, + "epoch": 0.9248687899920914, + "kl_loss": 0.3644023537635803, + "loss_ib": 0.00806320272386074, + "step": 3216 + }, + { + "ce_ib": 2.5547616481781006, + "ce_orig": 0.7268574237823486, + "epoch": 0.9248687899920914, + "kl_loss": 0.16433632373809814, + "loss_ib": 0.004198125097900629, + "step": 3216 + }, + { + "ce_ib": 3.4080841541290283, + "ce_orig": 0.5973031520843506, + "epoch": 0.9248687899920914, + "kl_loss": 0.11937259882688522, + "loss_ib": 0.004601810127496719, + "step": 3216 + }, + { + "ce_ib": 3.3604822158813477, + "ce_orig": 0.7598480582237244, + "epoch": 0.9251563735710691, + "kl_loss": 0.15768063068389893, + "loss_ib": 0.004937288351356983, + "step": 3217 + }, + { + "ce_ib": 4.8565287590026855, + "ce_orig": 1.2193691730499268, + "epoch": 0.9251563735710691, + "kl_loss": 0.19063036143779755, + "loss_ib": 0.006762832403182983, + "step": 3217 + }, + { + "ce_ib": 3.7153470516204834, + "ce_orig": 0.8832549452781677, + "epoch": 0.9251563735710691, + "kl_loss": 0.16812747716903687, + "loss_ib": 0.005396621767431498, + "step": 3217 + }, + { + "ce_ib": 5.194540023803711, + "ce_orig": 0.7263855338096619, + "epoch": 0.9251563735710691, + "kl_loss": 0.1914091408252716, + "loss_ib": 0.007108631078153849, + "step": 3217 + }, + { + "ce_ib": 2.5723800659179688, + "ce_orig": 0.7245425581932068, + "epoch": 0.9254439571500467, + "kl_loss": 0.18362775444984436, + "loss_ib": 0.004408657550811768, + "step": 3218 + }, + { + "ce_ib": 3.0102577209472656, + "ce_orig": 0.7973983287811279, + "epoch": 0.9254439571500467, + "kl_loss": 0.1301897168159485, + "loss_ib": 0.004312154836952686, + "step": 3218 + }, + { + "ce_ib": 4.35160493850708, + "ce_orig": 0.8577220439910889, + "epoch": 0.9254439571500467, + "kl_loss": 0.1702718734741211, + "loss_ib": 0.006054323632270098, + "step": 3218 + }, + { + "ce_ib": 3.4771993160247803, + "ce_orig": 0.7516182065010071, + "epoch": 0.9254439571500467, + "kl_loss": 0.1496502161026001, + "loss_ib": 0.004973701201379299, + "step": 3218 + }, + { + "ce_ib": 2.7115681171417236, + "ce_orig": 0.4848802387714386, + "epoch": 0.9257315407290244, + "kl_loss": 0.1798079013824463, + "loss_ib": 0.004509646911174059, + "step": 3219 + }, + { + "ce_ib": 2.883223295211792, + "ce_orig": 0.5551047325134277, + "epoch": 0.9257315407290244, + "kl_loss": 0.1769835650920868, + "loss_ib": 0.00465305894613266, + "step": 3219 + }, + { + "ce_ib": 3.211374521255493, + "ce_orig": 0.844107449054718, + "epoch": 0.9257315407290244, + "kl_loss": 0.15875950455665588, + "loss_ib": 0.004798969719558954, + "step": 3219 + }, + { + "ce_ib": 2.562871217727661, + "ce_orig": 0.7538756728172302, + "epoch": 0.9257315407290244, + "kl_loss": 0.10547100007534027, + "loss_ib": 0.0036175809800624847, + "step": 3219 + }, + { + "epoch": 0.926019124308002, + "grad_norm": 0.1607118546962738, + "learning_rate": 8.10703625184273e-06, + "loss": 0.8833, + "step": 3220 + }, + { + "ce_ib": 3.64146089553833, + "ce_orig": 0.7434746623039246, + "epoch": 0.926019124308002, + "kl_loss": 0.1567012369632721, + "loss_ib": 0.005208473186939955, + "step": 3220 + }, + { + "ce_ib": 2.0894525051116943, + "ce_orig": 0.44955122470855713, + "epoch": 0.926019124308002, + "kl_loss": 0.09115578234195709, + "loss_ib": 0.0030010102782398462, + "step": 3220 + }, + { + "ce_ib": 3.9886584281921387, + "ce_orig": 0.9963687658309937, + "epoch": 0.926019124308002, + "kl_loss": 0.15588624775409698, + "loss_ib": 0.00554752117022872, + "step": 3220 + }, + { + "ce_ib": 2.1860949993133545, + "ce_orig": 0.6833258867263794, + "epoch": 0.926019124308002, + "kl_loss": 0.1428285539150238, + "loss_ib": 0.003614380257204175, + "step": 3220 + }, + { + "ce_ib": 4.502804279327393, + "ce_orig": 1.1316927671432495, + "epoch": 0.9263067078869797, + "kl_loss": 0.12030059099197388, + "loss_ib": 0.005705810151994228, + "step": 3221 + }, + { + "ce_ib": 6.073120594024658, + "ce_orig": 1.179286241531372, + "epoch": 0.9263067078869797, + "kl_loss": 0.1726873219013214, + "loss_ib": 0.007799993734806776, + "step": 3221 + }, + { + "ce_ib": 1.9387325048446655, + "ce_orig": 0.39337658882141113, + "epoch": 0.9263067078869797, + "kl_loss": 0.16596195101737976, + "loss_ib": 0.003598351962864399, + "step": 3221 + }, + { + "ce_ib": 2.767209529876709, + "ce_orig": 0.8167507648468018, + "epoch": 0.9263067078869797, + "kl_loss": 0.1927700638771057, + "loss_ib": 0.004694910254329443, + "step": 3221 + }, + { + "ce_ib": 2.2552542686462402, + "ce_orig": 0.62040114402771, + "epoch": 0.9265942914659573, + "kl_loss": 0.11513108015060425, + "loss_ib": 0.0034065651707351208, + "step": 3222 + }, + { + "ce_ib": 2.1951260566711426, + "ce_orig": 0.6065877676010132, + "epoch": 0.9265942914659573, + "kl_loss": 0.2546282410621643, + "loss_ib": 0.00474140839651227, + "step": 3222 + }, + { + "ce_ib": 2.802903890609741, + "ce_orig": 0.8924481868743896, + "epoch": 0.9265942914659573, + "kl_loss": 0.11742217093706131, + "loss_ib": 0.003977125510573387, + "step": 3222 + }, + { + "ce_ib": 4.25860071182251, + "ce_orig": 0.9242351055145264, + "epoch": 0.9265942914659573, + "kl_loss": 0.1697923243045807, + "loss_ib": 0.005956524051725864, + "step": 3222 + }, + { + "ce_ib": 5.704512119293213, + "ce_orig": 0.9351587891578674, + "epoch": 0.9268818750449349, + "kl_loss": 0.3171802759170532, + "loss_ib": 0.008876314386725426, + "step": 3223 + }, + { + "ce_ib": 8.006518363952637, + "ce_orig": 1.6754016876220703, + "epoch": 0.9268818750449349, + "kl_loss": 0.191399484872818, + "loss_ib": 0.0099205132573843, + "step": 3223 + }, + { + "ce_ib": 3.295083999633789, + "ce_orig": 1.029266357421875, + "epoch": 0.9268818750449349, + "kl_loss": 0.11531861126422882, + "loss_ib": 0.004448269959539175, + "step": 3223 + }, + { + "ce_ib": 4.546910285949707, + "ce_orig": 0.9080656170845032, + "epoch": 0.9268818750449349, + "kl_loss": 0.1981317549943924, + "loss_ib": 0.006528227590024471, + "step": 3223 + }, + { + "ce_ib": 3.5223195552825928, + "ce_orig": 0.8407421112060547, + "epoch": 0.9271694586239125, + "kl_loss": 0.22188740968704224, + "loss_ib": 0.00574119295924902, + "step": 3224 + }, + { + "ce_ib": 4.771351337432861, + "ce_orig": 1.3052219152450562, + "epoch": 0.9271694586239125, + "kl_loss": 0.1280345320701599, + "loss_ib": 0.0060516963712871075, + "step": 3224 + }, + { + "ce_ib": 2.2819342613220215, + "ce_orig": 0.5461336970329285, + "epoch": 0.9271694586239125, + "kl_loss": 0.1492878496646881, + "loss_ib": 0.003774812677875161, + "step": 3224 + }, + { + "ce_ib": 3.557317018508911, + "ce_orig": 0.8086912631988525, + "epoch": 0.9271694586239125, + "kl_loss": 0.16668739914894104, + "loss_ib": 0.005224190652370453, + "step": 3224 + }, + { + "epoch": 0.9274570422028903, + "grad_norm": 0.1455952674150467, + "learning_rate": 8.10095197960865e-06, + "loss": 0.8546, + "step": 3225 + }, + { + "ce_ib": 2.1802382469177246, + "ce_orig": 0.5326496362686157, + "epoch": 0.9274570422028903, + "kl_loss": 0.10435812175273895, + "loss_ib": 0.003223819425329566, + "step": 3225 + }, + { + "ce_ib": 2.3503201007843018, + "ce_orig": 0.4826529622077942, + "epoch": 0.9274570422028903, + "kl_loss": 0.1523321568965912, + "loss_ib": 0.0038736413698643446, + "step": 3225 + }, + { + "ce_ib": 3.7983455657958984, + "ce_orig": 1.0679959058761597, + "epoch": 0.9274570422028903, + "kl_loss": 0.16456541419029236, + "loss_ib": 0.005443999543786049, + "step": 3225 + }, + { + "ce_ib": 2.237795829772949, + "ce_orig": 0.5832139253616333, + "epoch": 0.9274570422028903, + "kl_loss": 0.17281842231750488, + "loss_ib": 0.003965979907661676, + "step": 3225 + }, + { + "ce_ib": 3.293034553527832, + "ce_orig": 0.6464381814002991, + "epoch": 0.9277446257818679, + "kl_loss": 0.23346556723117828, + "loss_ib": 0.005627690348774195, + "step": 3226 + }, + { + "ce_ib": 4.449185848236084, + "ce_orig": 1.1229116916656494, + "epoch": 0.9277446257818679, + "kl_loss": 0.14489853382110596, + "loss_ib": 0.005898171570152044, + "step": 3226 + }, + { + "ce_ib": 6.028788089752197, + "ce_orig": 1.5290471315383911, + "epoch": 0.9277446257818679, + "kl_loss": 0.2235231101512909, + "loss_ib": 0.008264019154012203, + "step": 3226 + }, + { + "ce_ib": 4.482631683349609, + "ce_orig": 0.6820114850997925, + "epoch": 0.9277446257818679, + "kl_loss": 0.14592690765857697, + "loss_ib": 0.005941900424659252, + "step": 3226 + }, + { + "ce_ib": 3.8349499702453613, + "ce_orig": 0.9137827157974243, + "epoch": 0.9280322093608455, + "kl_loss": 0.21168389916419983, + "loss_ib": 0.005951788742095232, + "step": 3227 + }, + { + "ce_ib": 3.5760438442230225, + "ce_orig": 0.4701858162879944, + "epoch": 0.9280322093608455, + "kl_loss": 0.33818763494491577, + "loss_ib": 0.006957920268177986, + "step": 3227 + }, + { + "ce_ib": 5.99151086807251, + "ce_orig": 1.3959441184997559, + "epoch": 0.9280322093608455, + "kl_loss": 0.24300864338874817, + "loss_ib": 0.008421597070991993, + "step": 3227 + }, + { + "ce_ib": 4.72982931137085, + "ce_orig": 0.7154753804206848, + "epoch": 0.9280322093608455, + "kl_loss": 0.1574498414993286, + "loss_ib": 0.006304327864199877, + "step": 3227 + }, + { + "ce_ib": 4.161279678344727, + "ce_orig": 1.2267122268676758, + "epoch": 0.9283197929398231, + "kl_loss": 0.15431244671344757, + "loss_ib": 0.005704403854906559, + "step": 3228 + }, + { + "ce_ib": 5.288242816925049, + "ce_orig": 1.3874081373214722, + "epoch": 0.9283197929398231, + "kl_loss": 0.16867437958717346, + "loss_ib": 0.006974986754357815, + "step": 3228 + }, + { + "ce_ib": 2.251103639602661, + "ce_orig": 0.6210087537765503, + "epoch": 0.9283197929398231, + "kl_loss": 0.10996874421834946, + "loss_ib": 0.0033507910557091236, + "step": 3228 + }, + { + "ce_ib": 3.629169464111328, + "ce_orig": 0.7989233732223511, + "epoch": 0.9283197929398231, + "kl_loss": 0.13918814063072205, + "loss_ib": 0.005021051038056612, + "step": 3228 + }, + { + "ce_ib": 4.980795860290527, + "ce_orig": 1.016380786895752, + "epoch": 0.9286073765188008, + "kl_loss": 0.1702127605676651, + "loss_ib": 0.006682923063635826, + "step": 3229 + }, + { + "ce_ib": 2.278853178024292, + "ce_orig": 0.42571890354156494, + "epoch": 0.9286073765188008, + "kl_loss": 0.21674451231956482, + "loss_ib": 0.004446298349648714, + "step": 3229 + }, + { + "ce_ib": 2.824908971786499, + "ce_orig": 0.6680439114570618, + "epoch": 0.9286073765188008, + "kl_loss": 0.16465088725090027, + "loss_ib": 0.004471417982131243, + "step": 3229 + }, + { + "ce_ib": 3.003354787826538, + "ce_orig": 0.8000850081443787, + "epoch": 0.9286073765188008, + "kl_loss": 0.13791394233703613, + "loss_ib": 0.004382494371384382, + "step": 3229 + }, + { + "epoch": 0.9288949600977784, + "grad_norm": 0.1219462975859642, + "learning_rate": 8.094860236461505e-06, + "loss": 0.9212, + "step": 3230 + }, + { + "ce_ib": 4.512996673583984, + "ce_orig": 0.7077800631523132, + "epoch": 0.9288949600977784, + "kl_loss": 0.12021998316049576, + "loss_ib": 0.0057151964865624905, + "step": 3230 + }, + { + "ce_ib": 1.4684176445007324, + "ce_orig": 0.23330381512641907, + "epoch": 0.9288949600977784, + "kl_loss": 0.4025699496269226, + "loss_ib": 0.005494116805493832, + "step": 3230 + }, + { + "ce_ib": 0.4117628037929535, + "ce_orig": 0.05127744749188423, + "epoch": 0.9288949600977784, + "kl_loss": 0.36063501238822937, + "loss_ib": 0.004018113017082214, + "step": 3230 + }, + { + "ce_ib": 2.993691921234131, + "ce_orig": 0.5816148519515991, + "epoch": 0.9288949600977784, + "kl_loss": 0.17907178401947021, + "loss_ib": 0.004784409422427416, + "step": 3230 + }, + { + "ce_ib": 3.6186985969543457, + "ce_orig": 0.8693955540657043, + "epoch": 0.929182543676756, + "kl_loss": 0.20925429463386536, + "loss_ib": 0.005711241625249386, + "step": 3231 + }, + { + "ce_ib": 5.676827430725098, + "ce_orig": 1.2023162841796875, + "epoch": 0.929182543676756, + "kl_loss": 0.17022132873535156, + "loss_ib": 0.007379040587693453, + "step": 3231 + }, + { + "ce_ib": 3.1535074710845947, + "ce_orig": 0.6492658257484436, + "epoch": 0.929182543676756, + "kl_loss": 0.1631382405757904, + "loss_ib": 0.00478488951921463, + "step": 3231 + }, + { + "ce_ib": 2.5361685752868652, + "ce_orig": 0.8780034780502319, + "epoch": 0.929182543676756, + "kl_loss": 0.12279188632965088, + "loss_ib": 0.0037640875671058893, + "step": 3231 + }, + { + "ce_ib": 2.2202513217926025, + "ce_orig": 0.47849148511886597, + "epoch": 0.9294701272557337, + "kl_loss": 0.1204616129398346, + "loss_ib": 0.0034248672891408205, + "step": 3232 + }, + { + "ce_ib": 2.865365743637085, + "ce_orig": 0.6453917026519775, + "epoch": 0.9294701272557337, + "kl_loss": 0.18041643500328064, + "loss_ib": 0.004669529851526022, + "step": 3232 + }, + { + "ce_ib": 4.938257217407227, + "ce_orig": 1.2268316745758057, + "epoch": 0.9294701272557337, + "kl_loss": 0.11922299861907959, + "loss_ib": 0.0061304871924221516, + "step": 3232 + }, + { + "ce_ib": 4.168146133422852, + "ce_orig": 1.1351532936096191, + "epoch": 0.9294701272557337, + "kl_loss": 0.22474494576454163, + "loss_ib": 0.006415595766156912, + "step": 3232 + }, + { + "ce_ib": 3.7935631275177, + "ce_orig": 0.5965120196342468, + "epoch": 0.9297577108347114, + "kl_loss": 0.16683374345302582, + "loss_ib": 0.005461900494992733, + "step": 3233 + }, + { + "ce_ib": 4.789829730987549, + "ce_orig": 1.2123947143554688, + "epoch": 0.9297577108347114, + "kl_loss": 0.1447775661945343, + "loss_ib": 0.006237605586647987, + "step": 3233 + }, + { + "ce_ib": 1.7948353290557861, + "ce_orig": 0.6241395473480225, + "epoch": 0.9297577108347114, + "kl_loss": 0.16043631732463837, + "loss_ib": 0.003399198642000556, + "step": 3233 + }, + { + "ce_ib": 3.2317256927490234, + "ce_orig": 0.8875145316123962, + "epoch": 0.9297577108347114, + "kl_loss": 0.14598624408245087, + "loss_ib": 0.0046915882267057896, + "step": 3233 + }, + { + "ce_ib": 2.1207046508789062, + "ce_orig": 0.629601001739502, + "epoch": 0.930045294413689, + "kl_loss": 0.09986726939678192, + "loss_ib": 0.0031193774193525314, + "step": 3234 + }, + { + "ce_ib": 3.9990673065185547, + "ce_orig": 0.8835728168487549, + "epoch": 0.930045294413689, + "kl_loss": 0.24278602004051208, + "loss_ib": 0.006426927167922258, + "step": 3234 + }, + { + "ce_ib": 1.5140938758850098, + "ce_orig": 0.34112951159477234, + "epoch": 0.930045294413689, + "kl_loss": 0.2671073377132416, + "loss_ib": 0.004185167141258717, + "step": 3234 + }, + { + "ce_ib": 2.519674777984619, + "ce_orig": 0.5676715970039368, + "epoch": 0.930045294413689, + "kl_loss": 0.11610977351665497, + "loss_ib": 0.003680772613734007, + "step": 3234 + }, + { + "epoch": 0.9303328779926666, + "grad_norm": 0.1488310992717743, + "learning_rate": 8.088761037077718e-06, + "loss": 0.8793, + "step": 3235 + }, + { + "ce_ib": 4.142306327819824, + "ce_orig": 0.9541292190551758, + "epoch": 0.9303328779926666, + "kl_loss": 0.24097616970539093, + "loss_ib": 0.006552068050950766, + "step": 3235 + }, + { + "ce_ib": 5.1283416748046875, + "ce_orig": 1.0785205364227295, + "epoch": 0.9303328779926666, + "kl_loss": 0.15461435914039612, + "loss_ib": 0.00667448528110981, + "step": 3235 + }, + { + "ce_ib": 3.0419974327087402, + "ce_orig": 0.8062857985496521, + "epoch": 0.9303328779926666, + "kl_loss": 0.14389564096927643, + "loss_ib": 0.004480953793972731, + "step": 3235 + }, + { + "ce_ib": 5.200392246246338, + "ce_orig": 0.946050226688385, + "epoch": 0.9303328779926666, + "kl_loss": 0.13043074309825897, + "loss_ib": 0.006504700053483248, + "step": 3235 + }, + { + "ce_ib": 3.7345683574676514, + "ce_orig": 0.8802599310874939, + "epoch": 0.9306204615716442, + "kl_loss": 0.3598562777042389, + "loss_ib": 0.0073331305757164955, + "step": 3236 + }, + { + "ce_ib": 3.838217258453369, + "ce_orig": 0.7932314276695251, + "epoch": 0.9306204615716442, + "kl_loss": 0.21392855048179626, + "loss_ib": 0.005977503024041653, + "step": 3236 + }, + { + "ce_ib": 3.6369943618774414, + "ce_orig": 0.7358976602554321, + "epoch": 0.9306204615716442, + "kl_loss": 0.18936917185783386, + "loss_ib": 0.005530686117708683, + "step": 3236 + }, + { + "ce_ib": 5.536121368408203, + "ce_orig": 1.4090155363082886, + "epoch": 0.9306204615716442, + "kl_loss": 0.11777766048908234, + "loss_ib": 0.006713897921144962, + "step": 3236 + }, + { + "ce_ib": 3.0095064640045166, + "ce_orig": 0.6271776556968689, + "epoch": 0.9309080451506219, + "kl_loss": 0.1814403533935547, + "loss_ib": 0.0048239100724458694, + "step": 3237 + }, + { + "ce_ib": 2.6948273181915283, + "ce_orig": 0.8302158713340759, + "epoch": 0.9309080451506219, + "kl_loss": 0.1790432333946228, + "loss_ib": 0.004485259763896465, + "step": 3237 + }, + { + "ce_ib": 2.645322561264038, + "ce_orig": 0.8177229762077332, + "epoch": 0.9309080451506219, + "kl_loss": 0.12827064096927643, + "loss_ib": 0.003928028978407383, + "step": 3237 + }, + { + "ce_ib": 2.3021841049194336, + "ce_orig": 0.6487872004508972, + "epoch": 0.9309080451506219, + "kl_loss": 0.16879789531230927, + "loss_ib": 0.003990163095295429, + "step": 3237 + }, + { + "ce_ib": 4.519322872161865, + "ce_orig": 1.2098287343978882, + "epoch": 0.9311956287295995, + "kl_loss": 0.14077690243721008, + "loss_ib": 0.005927091930061579, + "step": 3238 + }, + { + "ce_ib": 3.532740592956543, + "ce_orig": 0.5046433210372925, + "epoch": 0.9311956287295995, + "kl_loss": 0.23830445110797882, + "loss_ib": 0.0059157852083444595, + "step": 3238 + }, + { + "ce_ib": 1.5343091487884521, + "ce_orig": 0.12626269459724426, + "epoch": 0.9311956287295995, + "kl_loss": 0.40744680166244507, + "loss_ib": 0.005608777049928904, + "step": 3238 + }, + { + "ce_ib": 4.584458827972412, + "ce_orig": 0.7875560522079468, + "epoch": 0.9311956287295995, + "kl_loss": 0.1322716325521469, + "loss_ib": 0.005907174665480852, + "step": 3238 + }, + { + "ce_ib": 3.3171870708465576, + "ce_orig": 0.5478508472442627, + "epoch": 0.9314832123085772, + "kl_loss": 0.14090004563331604, + "loss_ib": 0.004726187326014042, + "step": 3239 + }, + { + "ce_ib": 4.457345962524414, + "ce_orig": 0.7576668858528137, + "epoch": 0.9314832123085772, + "kl_loss": 0.21582721173763275, + "loss_ib": 0.006615617778152227, + "step": 3239 + }, + { + "ce_ib": 3.2957849502563477, + "ce_orig": 0.7840021252632141, + "epoch": 0.9314832123085772, + "kl_loss": 0.12566691637039185, + "loss_ib": 0.0045524537563323975, + "step": 3239 + }, + { + "ce_ib": 3.11539888381958, + "ce_orig": 0.7404480576515198, + "epoch": 0.9314832123085772, + "kl_loss": 0.17495685815811157, + "loss_ib": 0.004864967428147793, + "step": 3239 + }, + { + "epoch": 0.9317707958875548, + "grad_norm": 0.1457822620868683, + "learning_rate": 8.082654396151676e-06, + "loss": 0.8673, + "step": 3240 + }, + { + "ce_ib": 6.450761795043945, + "ce_orig": 1.8436583280563354, + "epoch": 0.9317707958875548, + "kl_loss": 0.15409070253372192, + "loss_ib": 0.0079916687682271, + "step": 3240 + }, + { + "ce_ib": 3.6586356163024902, + "ce_orig": 0.7952688336372375, + "epoch": 0.9317707958875548, + "kl_loss": 0.16898447275161743, + "loss_ib": 0.0053484803065657616, + "step": 3240 + }, + { + "ce_ib": 4.839743614196777, + "ce_orig": 0.4529500901699066, + "epoch": 0.9317707958875548, + "kl_loss": 0.29942482709884644, + "loss_ib": 0.007833992131054401, + "step": 3240 + }, + { + "ce_ib": 2.6196846961975098, + "ce_orig": 0.3419341444969177, + "epoch": 0.9317707958875548, + "kl_loss": 0.15553835034370422, + "loss_ib": 0.004175067879259586, + "step": 3240 + }, + { + "ce_ib": 2.138986349105835, + "ce_orig": 0.6350266337394714, + "epoch": 0.9320583794665325, + "kl_loss": 0.21687199175357819, + "loss_ib": 0.0043077063746750355, + "step": 3241 + }, + { + "ce_ib": 2.6447155475616455, + "ce_orig": 0.6678776741027832, + "epoch": 0.9320583794665325, + "kl_loss": 0.18117979168891907, + "loss_ib": 0.004456513561308384, + "step": 3241 + }, + { + "ce_ib": 5.5588908195495605, + "ce_orig": 0.8308722376823425, + "epoch": 0.9320583794665325, + "kl_loss": 0.2735973000526428, + "loss_ib": 0.008294863626360893, + "step": 3241 + }, + { + "ce_ib": 2.5948774814605713, + "ce_orig": 0.6443234086036682, + "epoch": 0.9320583794665325, + "kl_loss": 0.12530489265918732, + "loss_ib": 0.0038479261565953493, + "step": 3241 + }, + { + "ce_ib": 2.6596877574920654, + "ce_orig": 0.6150399446487427, + "epoch": 0.9323459630455101, + "kl_loss": 0.1247452050447464, + "loss_ib": 0.003907139878720045, + "step": 3242 + }, + { + "ce_ib": 5.940736293792725, + "ce_orig": 1.4040688276290894, + "epoch": 0.9323459630455101, + "kl_loss": 0.1975230872631073, + "loss_ib": 0.007915967144072056, + "step": 3242 + }, + { + "ce_ib": 3.942516565322876, + "ce_orig": 0.8979673385620117, + "epoch": 0.9323459630455101, + "kl_loss": 0.16634872555732727, + "loss_ib": 0.005606004036962986, + "step": 3242 + }, + { + "ce_ib": 2.955335855484009, + "ce_orig": 0.937393844127655, + "epoch": 0.9323459630455101, + "kl_loss": 0.11979742348194122, + "loss_ib": 0.004153309855610132, + "step": 3242 + }, + { + "ce_ib": 6.805368423461914, + "ce_orig": 1.0884474515914917, + "epoch": 0.9326335466244877, + "kl_loss": 0.26255887746810913, + "loss_ib": 0.009430957026779652, + "step": 3243 + }, + { + "ce_ib": 1.9631470441818237, + "ce_orig": 0.7015470266342163, + "epoch": 0.9326335466244877, + "kl_loss": 0.11344719678163528, + "loss_ib": 0.0030976191628724337, + "step": 3243 + }, + { + "ce_ib": 2.5828511714935303, + "ce_orig": 0.5801235437393188, + "epoch": 0.9326335466244877, + "kl_loss": 0.1787949800491333, + "loss_ib": 0.004370801150798798, + "step": 3243 + }, + { + "ce_ib": 3.837183952331543, + "ce_orig": 0.8466434478759766, + "epoch": 0.9326335466244877, + "kl_loss": 0.14415061473846436, + "loss_ib": 0.0052786897867918015, + "step": 3243 + }, + { + "ce_ib": 4.933218479156494, + "ce_orig": 1.0942553281784058, + "epoch": 0.9329211302034653, + "kl_loss": 0.21873867511749268, + "loss_ib": 0.007120605558156967, + "step": 3244 + }, + { + "ce_ib": 4.71442174911499, + "ce_orig": 1.114684820175171, + "epoch": 0.9329211302034653, + "kl_loss": 0.15315426886081696, + "loss_ib": 0.006245964672416449, + "step": 3244 + }, + { + "ce_ib": 3.885838031768799, + "ce_orig": 0.7544097304344177, + "epoch": 0.9329211302034653, + "kl_loss": 0.14673280715942383, + "loss_ib": 0.005353165790438652, + "step": 3244 + }, + { + "ce_ib": 3.9579153060913086, + "ce_orig": 0.6122289896011353, + "epoch": 0.9329211302034653, + "kl_loss": 0.2113698422908783, + "loss_ib": 0.006071614101529121, + "step": 3244 + }, + { + "epoch": 0.9332087137824431, + "grad_norm": 0.13092057406902313, + "learning_rate": 8.076540328395694e-06, + "loss": 0.832, + "step": 3245 + }, + { + "ce_ib": 4.329337120056152, + "ce_orig": 0.7342909574508667, + "epoch": 0.9332087137824431, + "kl_loss": 0.21323490142822266, + "loss_ib": 0.00646168552339077, + "step": 3245 + }, + { + "ce_ib": 3.77889084815979, + "ce_orig": 0.6577597856521606, + "epoch": 0.9332087137824431, + "kl_loss": 0.14141781628131866, + "loss_ib": 0.0051930686458945274, + "step": 3245 + }, + { + "ce_ib": 2.9465553760528564, + "ce_orig": 0.46646779775619507, + "epoch": 0.9332087137824431, + "kl_loss": 0.18310293555259705, + "loss_ib": 0.004777584690600634, + "step": 3245 + }, + { + "ce_ib": 4.852231025695801, + "ce_orig": 0.9568471312522888, + "epoch": 0.9332087137824431, + "kl_loss": 0.19757749140262604, + "loss_ib": 0.006828005891293287, + "step": 3245 + }, + { + "ce_ib": 3.8633980751037598, + "ce_orig": 0.9465181827545166, + "epoch": 0.9334962973614207, + "kl_loss": 0.20668761432170868, + "loss_ib": 0.0059302737936377525, + "step": 3246 + }, + { + "ce_ib": 2.9167428016662598, + "ce_orig": 0.5423509478569031, + "epoch": 0.9334962973614207, + "kl_loss": 0.12203719466924667, + "loss_ib": 0.004137114621698856, + "step": 3246 + }, + { + "ce_ib": 1.7967499494552612, + "ce_orig": 0.538041353225708, + "epoch": 0.9334962973614207, + "kl_loss": 0.09051375836133957, + "loss_ib": 0.002701887395232916, + "step": 3246 + }, + { + "ce_ib": 4.383031845092773, + "ce_orig": 0.9766879081726074, + "epoch": 0.9334962973614207, + "kl_loss": 0.17335158586502075, + "loss_ib": 0.006116547621786594, + "step": 3246 + }, + { + "ce_ib": 5.473028182983398, + "ce_orig": 1.3498201370239258, + "epoch": 0.9337838809403983, + "kl_loss": 0.19567035138607025, + "loss_ib": 0.007429732009768486, + "step": 3247 + }, + { + "ce_ib": 5.727631568908691, + "ce_orig": 1.1959530115127563, + "epoch": 0.9337838809403983, + "kl_loss": 0.19687771797180176, + "loss_ib": 0.007696408312767744, + "step": 3247 + }, + { + "ce_ib": 3.010479688644409, + "ce_orig": 0.6939531564712524, + "epoch": 0.9337838809403983, + "kl_loss": 0.15372806787490845, + "loss_ib": 0.004547760356217623, + "step": 3247 + }, + { + "ce_ib": 2.447033643722534, + "ce_orig": 0.434174120426178, + "epoch": 0.9337838809403983, + "kl_loss": 0.38448813557624817, + "loss_ib": 0.006291915196925402, + "step": 3247 + }, + { + "ce_ib": 7.737285614013672, + "ce_orig": 1.5673834085464478, + "epoch": 0.934071464519376, + "kl_loss": 0.23276521265506744, + "loss_ib": 0.010064938105642796, + "step": 3248 + }, + { + "ce_ib": 3.12143611907959, + "ce_orig": 0.5809715390205383, + "epoch": 0.934071464519376, + "kl_loss": 0.253670871257782, + "loss_ib": 0.005658144596964121, + "step": 3248 + }, + { + "ce_ib": 3.023484706878662, + "ce_orig": 0.6489853262901306, + "epoch": 0.934071464519376, + "kl_loss": 0.27465856075286865, + "loss_ib": 0.005770070478320122, + "step": 3248 + }, + { + "ce_ib": 7.533074855804443, + "ce_orig": 1.7150388956069946, + "epoch": 0.934071464519376, + "kl_loss": 0.19753293693065643, + "loss_ib": 0.0095084048807621, + "step": 3248 + }, + { + "ce_ib": 3.6982944011688232, + "ce_orig": 0.7724287509918213, + "epoch": 0.9343590480983536, + "kl_loss": 0.1969025582075119, + "loss_ib": 0.0056673199869692326, + "step": 3249 + }, + { + "ce_ib": 2.5301740169525146, + "ce_orig": 0.4779868423938751, + "epoch": 0.9343590480983536, + "kl_loss": 0.12298533320426941, + "loss_ib": 0.0037600272335112095, + "step": 3249 + }, + { + "ce_ib": 5.9553542137146, + "ce_orig": 0.8129908442497253, + "epoch": 0.9343590480983536, + "kl_loss": 0.26825180649757385, + "loss_ib": 0.008637872524559498, + "step": 3249 + }, + { + "ce_ib": 2.501769542694092, + "ce_orig": 0.4818457365036011, + "epoch": 0.9343590480983536, + "kl_loss": 0.2032531052827835, + "loss_ib": 0.0045343004167079926, + "step": 3249 + }, + { + "epoch": 0.9346466316773312, + "grad_norm": 0.1466144174337387, + "learning_rate": 8.07041884853998e-06, + "loss": 0.8011, + "step": 3250 + }, + { + "ce_ib": 4.895188808441162, + "ce_orig": 0.9779635071754456, + "epoch": 0.9346466316773312, + "kl_loss": 0.23384886980056763, + "loss_ib": 0.007233677431941032, + "step": 3250 + }, + { + "ce_ib": 3.113765239715576, + "ce_orig": 0.8282350301742554, + "epoch": 0.9346466316773312, + "kl_loss": 0.12253379076719284, + "loss_ib": 0.0043391031213104725, + "step": 3250 + }, + { + "ce_ib": 4.582502365112305, + "ce_orig": 1.2113491296768188, + "epoch": 0.9346466316773312, + "kl_loss": 0.15417498350143433, + "loss_ib": 0.006124251987785101, + "step": 3250 + }, + { + "ce_ib": 2.8985228538513184, + "ce_orig": 0.2509923577308655, + "epoch": 0.9346466316773312, + "kl_loss": 0.33661723136901855, + "loss_ib": 0.006264694966375828, + "step": 3250 + }, + { + "ce_ib": 5.095143795013428, + "ce_orig": 1.156965732574463, + "epoch": 0.9349342152563088, + "kl_loss": 0.18676765263080597, + "loss_ib": 0.006962819956243038, + "step": 3251 + }, + { + "ce_ib": 3.405500650405884, + "ce_orig": 0.4316122233867645, + "epoch": 0.9349342152563088, + "kl_loss": 0.2746553421020508, + "loss_ib": 0.006152053829282522, + "step": 3251 + }, + { + "ce_ib": 2.664518117904663, + "ce_orig": 0.6659234166145325, + "epoch": 0.9349342152563088, + "kl_loss": 0.2988000512123108, + "loss_ib": 0.0056525180116295815, + "step": 3251 + }, + { + "ce_ib": 3.5832295417785645, + "ce_orig": 0.7848812341690063, + "epoch": 0.9349342152563088, + "kl_loss": 0.19078128039836884, + "loss_ib": 0.005491042044013739, + "step": 3251 + }, + { + "ce_ib": 5.507126331329346, + "ce_orig": 1.251637578010559, + "epoch": 0.9352217988352866, + "kl_loss": 0.21619583666324615, + "loss_ib": 0.007669084705412388, + "step": 3252 + }, + { + "ce_ib": 6.969982624053955, + "ce_orig": 0.7153016328811646, + "epoch": 0.9352217988352866, + "kl_loss": 0.599750280380249, + "loss_ib": 0.012967485934495926, + "step": 3252 + }, + { + "ce_ib": 2.5869433879852295, + "ce_orig": 0.5408439040184021, + "epoch": 0.9352217988352866, + "kl_loss": 0.15543903410434723, + "loss_ib": 0.0041413335129618645, + "step": 3252 + }, + { + "ce_ib": 3.1378376483917236, + "ce_orig": 0.813326895236969, + "epoch": 0.9352217988352866, + "kl_loss": 0.198552668094635, + "loss_ib": 0.005123364273458719, + "step": 3252 + }, + { + "ce_ib": 2.8510894775390625, + "ce_orig": 0.7424506545066833, + "epoch": 0.9355093824142642, + "kl_loss": 0.19209066033363342, + "loss_ib": 0.004771995823830366, + "step": 3253 + }, + { + "ce_ib": 4.12407112121582, + "ce_orig": 0.7670327425003052, + "epoch": 0.9355093824142642, + "kl_loss": 0.2302972674369812, + "loss_ib": 0.006427043583244085, + "step": 3253 + }, + { + "ce_ib": 3.919119358062744, + "ce_orig": 0.9353273510932922, + "epoch": 0.9355093824142642, + "kl_loss": 0.21674029529094696, + "loss_ib": 0.006086522247642279, + "step": 3253 + }, + { + "ce_ib": 3.4139552116394043, + "ce_orig": 0.9272297024726868, + "epoch": 0.9355093824142642, + "kl_loss": 0.142791748046875, + "loss_ib": 0.004841872490942478, + "step": 3253 + }, + { + "ce_ib": 3.4986088275909424, + "ce_orig": 0.45314833521842957, + "epoch": 0.9357969659932418, + "kl_loss": 0.20626063644886017, + "loss_ib": 0.005561215337365866, + "step": 3254 + }, + { + "ce_ib": 4.095985412597656, + "ce_orig": 1.0287123918533325, + "epoch": 0.9357969659932418, + "kl_loss": 0.12703774869441986, + "loss_ib": 0.005366362631320953, + "step": 3254 + }, + { + "ce_ib": 5.683365821838379, + "ce_orig": 0.6639379262924194, + "epoch": 0.9357969659932418, + "kl_loss": 0.20623762905597687, + "loss_ib": 0.007745741866528988, + "step": 3254 + }, + { + "ce_ib": 3.3027541637420654, + "ce_orig": 0.5413002967834473, + "epoch": 0.9357969659932418, + "kl_loss": 0.307314395904541, + "loss_ib": 0.006375898141413927, + "step": 3254 + }, + { + "epoch": 0.9360845495722194, + "grad_norm": 0.15342558920383453, + "learning_rate": 8.064289971332601e-06, + "loss": 0.8197, + "step": 3255 + }, + { + "ce_ib": 4.6384172439575195, + "ce_orig": 1.009111762046814, + "epoch": 0.9360845495722194, + "kl_loss": 0.17211277782917023, + "loss_ib": 0.006359545048326254, + "step": 3255 + }, + { + "ce_ib": 6.499946117401123, + "ce_orig": 1.4171319007873535, + "epoch": 0.9360845495722194, + "kl_loss": 0.2080974578857422, + "loss_ib": 0.008580920286476612, + "step": 3255 + }, + { + "ce_ib": 3.0068600177764893, + "ce_orig": 0.49602821469306946, + "epoch": 0.9360845495722194, + "kl_loss": 0.18838724493980408, + "loss_ib": 0.00489073246717453, + "step": 3255 + }, + { + "ce_ib": 4.248929023742676, + "ce_orig": 1.0085469484329224, + "epoch": 0.9360845495722194, + "kl_loss": 0.16891321539878845, + "loss_ib": 0.005938061513006687, + "step": 3255 + }, + { + "ce_ib": 4.157164573669434, + "ce_orig": 0.8296511769294739, + "epoch": 0.936372133151197, + "kl_loss": 0.19158205389976501, + "loss_ib": 0.006072985008358955, + "step": 3256 + }, + { + "ce_ib": 2.4984164237976074, + "ce_orig": 0.612324059009552, + "epoch": 0.936372133151197, + "kl_loss": 0.10681437700986862, + "loss_ib": 0.0035665601026266813, + "step": 3256 + }, + { + "ce_ib": 2.850917339324951, + "ce_orig": 0.9868007302284241, + "epoch": 0.936372133151197, + "kl_loss": 0.1316082775592804, + "loss_ib": 0.0041669998317956924, + "step": 3256 + }, + { + "ce_ib": 4.554755687713623, + "ce_orig": 1.3742213249206543, + "epoch": 0.936372133151197, + "kl_loss": 0.1576414704322815, + "loss_ib": 0.006131170317530632, + "step": 3256 + }, + { + "ce_ib": 2.5939552783966064, + "ce_orig": 0.4637505114078522, + "epoch": 0.9366597167301747, + "kl_loss": 0.13520017266273499, + "loss_ib": 0.003945956937968731, + "step": 3257 + }, + { + "ce_ib": 5.583441734313965, + "ce_orig": 1.4031935930252075, + "epoch": 0.9366597167301747, + "kl_loss": 0.19552800059318542, + "loss_ib": 0.007538721431046724, + "step": 3257 + }, + { + "ce_ib": 3.8956615924835205, + "ce_orig": 0.8591693043708801, + "epoch": 0.9366597167301747, + "kl_loss": 0.22354915738105774, + "loss_ib": 0.006131153088063002, + "step": 3257 + }, + { + "ce_ib": 7.542308807373047, + "ce_orig": 1.3742130994796753, + "epoch": 0.9366597167301747, + "kl_loss": 0.23169584572315216, + "loss_ib": 0.009859267622232437, + "step": 3257 + }, + { + "ce_ib": 2.8571720123291016, + "ce_orig": 0.4686056971549988, + "epoch": 0.9369473003091523, + "kl_loss": 0.2816404402256012, + "loss_ib": 0.005673576612025499, + "step": 3258 + }, + { + "ce_ib": 4.4061760902404785, + "ce_orig": 1.1045526266098022, + "epoch": 0.9369473003091523, + "kl_loss": 0.2212226688861847, + "loss_ib": 0.006618402898311615, + "step": 3258 + }, + { + "ce_ib": 2.9291114807128906, + "ce_orig": 0.7068270444869995, + "epoch": 0.9369473003091523, + "kl_loss": 0.22319792211055756, + "loss_ib": 0.005161090288311243, + "step": 3258 + }, + { + "ce_ib": 1.733494758605957, + "ce_orig": 0.5259311199188232, + "epoch": 0.9369473003091523, + "kl_loss": 0.14857834577560425, + "loss_ib": 0.003219278296455741, + "step": 3258 + }, + { + "ce_ib": 4.875319004058838, + "ce_orig": 0.7690966129302979, + "epoch": 0.93723488388813, + "kl_loss": 0.3609575629234314, + "loss_ib": 0.008484894409775734, + "step": 3259 + }, + { + "ce_ib": 3.174485445022583, + "ce_orig": 0.9053480625152588, + "epoch": 0.93723488388813, + "kl_loss": 0.16496789455413818, + "loss_ib": 0.0048241643235087395, + "step": 3259 + }, + { + "ce_ib": 2.345489740371704, + "ce_orig": 0.53525710105896, + "epoch": 0.93723488388813, + "kl_loss": 0.14823415875434875, + "loss_ib": 0.003827831009402871, + "step": 3259 + }, + { + "ce_ib": 2.5676963329315186, + "ce_orig": 0.6539961695671082, + "epoch": 0.93723488388813, + "kl_loss": 0.2147626429796219, + "loss_ib": 0.004715322516858578, + "step": 3259 + }, + { + "epoch": 0.9375224674671077, + "grad_norm": 0.1254020631313324, + "learning_rate": 8.058153711539444e-06, + "loss": 0.8885, + "step": 3260 + }, + { + "ce_ib": 4.538667678833008, + "ce_orig": 1.0204535722732544, + "epoch": 0.9375224674671077, + "kl_loss": 0.18581286072731018, + "loss_ib": 0.006396796554327011, + "step": 3260 + }, + { + "ce_ib": 4.289400577545166, + "ce_orig": 0.5319991707801819, + "epoch": 0.9375224674671077, + "kl_loss": 0.12935447692871094, + "loss_ib": 0.005582945421338081, + "step": 3260 + }, + { + "ce_ib": 2.334221363067627, + "ce_orig": 0.6439073085784912, + "epoch": 0.9375224674671077, + "kl_loss": 0.20605933666229248, + "loss_ib": 0.00439481483772397, + "step": 3260 + }, + { + "ce_ib": 8.273599624633789, + "ce_orig": 1.2296645641326904, + "epoch": 0.9375224674671077, + "kl_loss": 0.24892759323120117, + "loss_ib": 0.010762874968349934, + "step": 3260 + }, + { + "ce_ib": 5.2492570877075195, + "ce_orig": 1.2335387468338013, + "epoch": 0.9378100510460853, + "kl_loss": 0.1453869789838791, + "loss_ib": 0.006703126709908247, + "step": 3261 + }, + { + "ce_ib": 2.452561616897583, + "ce_orig": 0.6279815435409546, + "epoch": 0.9378100510460853, + "kl_loss": 0.13441871106624603, + "loss_ib": 0.0037967488169670105, + "step": 3261 + }, + { + "ce_ib": 3.419771194458008, + "ce_orig": 0.6329423189163208, + "epoch": 0.9378100510460853, + "kl_loss": 0.18894264101982117, + "loss_ib": 0.005309197586029768, + "step": 3261 + }, + { + "ce_ib": 5.583110332489014, + "ce_orig": 1.029693841934204, + "epoch": 0.9378100510460853, + "kl_loss": 0.24952630698680878, + "loss_ib": 0.008078373037278652, + "step": 3261 + }, + { + "ce_ib": 3.024557590484619, + "ce_orig": 0.5121183395385742, + "epoch": 0.9380976346250629, + "kl_loss": 0.1955699920654297, + "loss_ib": 0.004980257246643305, + "step": 3262 + }, + { + "ce_ib": 2.7550806999206543, + "ce_orig": 0.5771573781967163, + "epoch": 0.9380976346250629, + "kl_loss": 0.21593038737773895, + "loss_ib": 0.004914384335279465, + "step": 3262 + }, + { + "ce_ib": 2.627365827560425, + "ce_orig": 0.6165468096733093, + "epoch": 0.9380976346250629, + "kl_loss": 0.17845682799816132, + "loss_ib": 0.004411933943629265, + "step": 3262 + }, + { + "ce_ib": 2.5725460052490234, + "ce_orig": 0.6145195960998535, + "epoch": 0.9380976346250629, + "kl_loss": 0.15948528051376343, + "loss_ib": 0.004167398903518915, + "step": 3262 + }, + { + "ce_ib": 6.847308158874512, + "ce_orig": 1.4806698560714722, + "epoch": 0.9383852182040405, + "kl_loss": 0.13459406793117523, + "loss_ib": 0.008193248882889748, + "step": 3263 + }, + { + "ce_ib": 4.458852291107178, + "ce_orig": 1.1609234809875488, + "epoch": 0.9383852182040405, + "kl_loss": 0.4438936710357666, + "loss_ib": 0.00889778882265091, + "step": 3263 + }, + { + "ce_ib": 5.509678363800049, + "ce_orig": 1.0021016597747803, + "epoch": 0.9383852182040405, + "kl_loss": 0.16780075430870056, + "loss_ib": 0.007187685929238796, + "step": 3263 + }, + { + "ce_ib": 0.7873799204826355, + "ce_orig": 0.14236825704574585, + "epoch": 0.9383852182040405, + "kl_loss": 0.4017913341522217, + "loss_ib": 0.004805293399840593, + "step": 3263 + }, + { + "ce_ib": 2.8725202083587646, + "ce_orig": 0.7594106793403625, + "epoch": 0.9386728017830182, + "kl_loss": 0.17252154648303986, + "loss_ib": 0.004597736056894064, + "step": 3264 + }, + { + "ce_ib": 3.4816384315490723, + "ce_orig": 0.39595019817352295, + "epoch": 0.9386728017830182, + "kl_loss": 0.15190443396568298, + "loss_ib": 0.0050006830133497715, + "step": 3264 + }, + { + "ce_ib": 3.1421849727630615, + "ce_orig": 0.7657673358917236, + "epoch": 0.9386728017830182, + "kl_loss": 0.15774956345558167, + "loss_ib": 0.004719680640846491, + "step": 3264 + }, + { + "ce_ib": 6.3351006507873535, + "ce_orig": 1.4726074934005737, + "epoch": 0.9386728017830182, + "kl_loss": 0.22020968794822693, + "loss_ib": 0.008537196554243565, + "step": 3264 + }, + { + "epoch": 0.9389603853619958, + "grad_norm": 0.12418321520090103, + "learning_rate": 8.052010083944185e-06, + "loss": 0.8123, + "step": 3265 + }, + { + "ce_ib": 3.933598518371582, + "ce_orig": 0.5280622243881226, + "epoch": 0.9389603853619958, + "kl_loss": 0.24944372475147247, + "loss_ib": 0.006428035907447338, + "step": 3265 + }, + { + "ce_ib": 2.807297945022583, + "ce_orig": 0.5816093683242798, + "epoch": 0.9389603853619958, + "kl_loss": 0.1616390496492386, + "loss_ib": 0.004423688631504774, + "step": 3265 + }, + { + "ce_ib": 5.272430896759033, + "ce_orig": 1.2980395555496216, + "epoch": 0.9389603853619958, + "kl_loss": 0.17612892389297485, + "loss_ib": 0.007033719681203365, + "step": 3265 + }, + { + "ce_ib": 2.8173305988311768, + "ce_orig": 0.5924719572067261, + "epoch": 0.9389603853619958, + "kl_loss": 0.2079431116580963, + "loss_ib": 0.004896761849522591, + "step": 3265 + }, + { + "ce_ib": 3.6418371200561523, + "ce_orig": 0.8220776915550232, + "epoch": 0.9392479689409735, + "kl_loss": 0.16075266897678375, + "loss_ib": 0.005249363835901022, + "step": 3266 + }, + { + "ce_ib": 3.7999637126922607, + "ce_orig": 0.8525601625442505, + "epoch": 0.9392479689409735, + "kl_loss": 0.19761672616004944, + "loss_ib": 0.005776131059974432, + "step": 3266 + }, + { + "ce_ib": 3.473855972290039, + "ce_orig": 0.7818673253059387, + "epoch": 0.9392479689409735, + "kl_loss": 0.08854304254055023, + "loss_ib": 0.004359286278486252, + "step": 3266 + }, + { + "ce_ib": 3.9465200901031494, + "ce_orig": 0.9030898213386536, + "epoch": 0.9392479689409735, + "kl_loss": 0.2501000165939331, + "loss_ib": 0.0064475201070308685, + "step": 3266 + }, + { + "ce_ib": 3.5694851875305176, + "ce_orig": 0.9761992692947388, + "epoch": 0.9395355525199511, + "kl_loss": 0.2408195436000824, + "loss_ib": 0.005977680440992117, + "step": 3267 + }, + { + "ce_ib": 2.659806489944458, + "ce_orig": 0.6692637801170349, + "epoch": 0.9395355525199511, + "kl_loss": 0.15877512097358704, + "loss_ib": 0.004247557837516069, + "step": 3267 + }, + { + "ce_ib": 3.180283308029175, + "ce_orig": 0.30031901597976685, + "epoch": 0.9395355525199511, + "kl_loss": 0.22517283260822296, + "loss_ib": 0.0054320115596055984, + "step": 3267 + }, + { + "ce_ib": 1.5686085224151611, + "ce_orig": 0.6228137016296387, + "epoch": 0.9395355525199511, + "kl_loss": 0.1184036135673523, + "loss_ib": 0.002752644708380103, + "step": 3267 + }, + { + "ce_ib": 4.086344242095947, + "ce_orig": 0.6778373718261719, + "epoch": 0.9398231360989288, + "kl_loss": 0.2635776996612549, + "loss_ib": 0.00672212103381753, + "step": 3268 + }, + { + "ce_ib": 3.470949411392212, + "ce_orig": 0.7590913772583008, + "epoch": 0.9398231360989288, + "kl_loss": 0.0989089161157608, + "loss_ib": 0.0044600386172533035, + "step": 3268 + }, + { + "ce_ib": 6.5095648765563965, + "ce_orig": 1.3145461082458496, + "epoch": 0.9398231360989288, + "kl_loss": 0.19622066617012024, + "loss_ib": 0.008471771143376827, + "step": 3268 + }, + { + "ce_ib": 3.0750222206115723, + "ce_orig": 0.6974140405654907, + "epoch": 0.9398231360989288, + "kl_loss": 0.15018230676651, + "loss_ib": 0.004576845560222864, + "step": 3268 + }, + { + "ce_ib": 3.068436622619629, + "ce_orig": 0.8393611311912537, + "epoch": 0.9401107196779064, + "kl_loss": 0.1422736644744873, + "loss_ib": 0.004491173196583986, + "step": 3269 + }, + { + "ce_ib": 4.06328010559082, + "ce_orig": 0.743789792060852, + "epoch": 0.9401107196779064, + "kl_loss": 0.22105614840984344, + "loss_ib": 0.006273841485381126, + "step": 3269 + }, + { + "ce_ib": 3.650400161743164, + "ce_orig": 0.4965578019618988, + "epoch": 0.9401107196779064, + "kl_loss": 0.17080388963222504, + "loss_ib": 0.005358438938856125, + "step": 3269 + }, + { + "ce_ib": 3.308788776397705, + "ce_orig": 0.5140231251716614, + "epoch": 0.9401107196779064, + "kl_loss": 0.10945694148540497, + "loss_ib": 0.004403357859700918, + "step": 3269 + }, + { + "epoch": 0.940398303256884, + "grad_norm": 0.14561527967453003, + "learning_rate": 8.045859103348245e-06, + "loss": 0.8219, + "step": 3270 + }, + { + "ce_ib": 3.7176854610443115, + "ce_orig": 0.6241322755813599, + "epoch": 0.940398303256884, + "kl_loss": 0.16319513320922852, + "loss_ib": 0.005349637009203434, + "step": 3270 + }, + { + "ce_ib": 3.063981771469116, + "ce_orig": 0.8688821196556091, + "epoch": 0.940398303256884, + "kl_loss": 0.09610728174448013, + "loss_ib": 0.004025054629892111, + "step": 3270 + }, + { + "ce_ib": 4.891517639160156, + "ce_orig": 0.7279273867607117, + "epoch": 0.940398303256884, + "kl_loss": 0.20633560419082642, + "loss_ib": 0.006954873446375132, + "step": 3270 + }, + { + "ce_ib": 1.8107298612594604, + "ce_orig": 0.4169665277004242, + "epoch": 0.940398303256884, + "kl_loss": 0.1639668047428131, + "loss_ib": 0.0034503978677093983, + "step": 3270 + }, + { + "ce_ib": 3.0315001010894775, + "ce_orig": 0.7600097060203552, + "epoch": 0.9406858868358616, + "kl_loss": 0.19810283184051514, + "loss_ib": 0.005012528970837593, + "step": 3271 + }, + { + "ce_ib": 2.6616122722625732, + "ce_orig": 0.6086564660072327, + "epoch": 0.9406858868358616, + "kl_loss": 0.25330618023872375, + "loss_ib": 0.0051946742460131645, + "step": 3271 + }, + { + "ce_ib": 4.976736068725586, + "ce_orig": 1.0928434133529663, + "epoch": 0.9406858868358616, + "kl_loss": 0.12408386170864105, + "loss_ib": 0.00621757423505187, + "step": 3271 + }, + { + "ce_ib": 3.692857265472412, + "ce_orig": 0.8020192384719849, + "epoch": 0.9406858868358616, + "kl_loss": 0.18069827556610107, + "loss_ib": 0.005499839782714844, + "step": 3271 + }, + { + "ce_ib": 4.870419502258301, + "ce_orig": 1.410875916481018, + "epoch": 0.9409734704148394, + "kl_loss": 0.24994143843650818, + "loss_ib": 0.0073698339983820915, + "step": 3272 + }, + { + "ce_ib": 6.222908973693848, + "ce_orig": 1.4255197048187256, + "epoch": 0.9409734704148394, + "kl_loss": 0.20121079683303833, + "loss_ib": 0.008235016837716103, + "step": 3272 + }, + { + "ce_ib": 3.3917903900146484, + "ce_orig": 0.914901077747345, + "epoch": 0.9409734704148394, + "kl_loss": 0.1932753026485443, + "loss_ib": 0.0053245434537529945, + "step": 3272 + }, + { + "ce_ib": 4.672590732574463, + "ce_orig": 1.050429105758667, + "epoch": 0.9409734704148394, + "kl_loss": 0.2753332257270813, + "loss_ib": 0.007425923366099596, + "step": 3272 + }, + { + "ce_ib": 1.866139531135559, + "ce_orig": 0.36754313111305237, + "epoch": 0.941261053993817, + "kl_loss": 0.19236639142036438, + "loss_ib": 0.003789803246036172, + "step": 3273 + }, + { + "ce_ib": 4.561717510223389, + "ce_orig": 0.9737030863761902, + "epoch": 0.941261053993817, + "kl_loss": 0.2608264982700348, + "loss_ib": 0.007169981952756643, + "step": 3273 + }, + { + "ce_ib": 7.658942699432373, + "ce_orig": 1.5626176595687866, + "epoch": 0.941261053993817, + "kl_loss": 0.30681949853897095, + "loss_ib": 0.010727138258516788, + "step": 3273 + }, + { + "ce_ib": 5.70886754989624, + "ce_orig": 1.2288779020309448, + "epoch": 0.941261053993817, + "kl_loss": 0.2827683091163635, + "loss_ib": 0.008536551147699356, + "step": 3273 + }, + { + "ce_ib": 5.254345893859863, + "ce_orig": 0.8754353523254395, + "epoch": 0.9415486375727946, + "kl_loss": 0.16137506067752838, + "loss_ib": 0.00686809653416276, + "step": 3274 + }, + { + "ce_ib": 2.7736222743988037, + "ce_orig": 0.5202257633209229, + "epoch": 0.9415486375727946, + "kl_loss": 0.14845497906208038, + "loss_ib": 0.004258171655237675, + "step": 3274 + }, + { + "ce_ib": 3.5698323249816895, + "ce_orig": 0.8016830682754517, + "epoch": 0.9415486375727946, + "kl_loss": 0.10443688929080963, + "loss_ib": 0.004614200908690691, + "step": 3274 + }, + { + "ce_ib": 2.9664881229400635, + "ce_orig": 0.6681947112083435, + "epoch": 0.9415486375727946, + "kl_loss": 0.10766024142503738, + "loss_ib": 0.004043090622872114, + "step": 3274 + }, + { + "epoch": 0.9418362211517722, + "grad_norm": 0.1360841542482376, + "learning_rate": 8.039700784570766e-06, + "loss": 0.8809, + "step": 3275 + }, + { + "ce_ib": 2.6881582736968994, + "ce_orig": 0.6112985610961914, + "epoch": 0.9418362211517722, + "kl_loss": 0.13226409256458282, + "loss_ib": 0.004010799340903759, + "step": 3275 + }, + { + "ce_ib": 3.6230809688568115, + "ce_orig": 0.6199791431427002, + "epoch": 0.9418362211517722, + "kl_loss": 0.18818877637386322, + "loss_ib": 0.005504968576133251, + "step": 3275 + }, + { + "ce_ib": 2.6361846923828125, + "ce_orig": 0.7098627090454102, + "epoch": 0.9418362211517722, + "kl_loss": 0.09732495248317719, + "loss_ib": 0.003609434235841036, + "step": 3275 + }, + { + "ce_ib": 3.0103068351745605, + "ce_orig": 0.7456799149513245, + "epoch": 0.9418362211517722, + "kl_loss": 0.14742815494537354, + "loss_ib": 0.0044845882803201675, + "step": 3275 + }, + { + "ce_ib": 2.4012069702148438, + "ce_orig": 0.6419284343719482, + "epoch": 0.9421238047307499, + "kl_loss": 0.1999884396791458, + "loss_ib": 0.004401091020554304, + "step": 3276 + }, + { + "ce_ib": 2.693868398666382, + "ce_orig": 0.40515610575675964, + "epoch": 0.9421238047307499, + "kl_loss": 0.19816292822360992, + "loss_ib": 0.004675497766584158, + "step": 3276 + }, + { + "ce_ib": 5.483434200286865, + "ce_orig": 1.2363933324813843, + "epoch": 0.9421238047307499, + "kl_loss": 0.19124457240104675, + "loss_ib": 0.007395879831165075, + "step": 3276 + }, + { + "ce_ib": 4.220624923706055, + "ce_orig": 1.1013628244400024, + "epoch": 0.9421238047307499, + "kl_loss": 0.1700410097837448, + "loss_ib": 0.00592103460803628, + "step": 3276 + }, + { + "ce_ib": 2.756406784057617, + "ce_orig": 0.640188992023468, + "epoch": 0.9424113883097275, + "kl_loss": 0.20532120764255524, + "loss_ib": 0.004809618927538395, + "step": 3277 + }, + { + "ce_ib": 5.149378299713135, + "ce_orig": 1.496031403541565, + "epoch": 0.9424113883097275, + "kl_loss": 0.1618286669254303, + "loss_ib": 0.006767665036022663, + "step": 3277 + }, + { + "ce_ib": 3.3406853675842285, + "ce_orig": 0.6524872779846191, + "epoch": 0.9424113883097275, + "kl_loss": 0.15635284781455994, + "loss_ib": 0.004904213827103376, + "step": 3277 + }, + { + "ce_ib": 5.326420783996582, + "ce_orig": 0.9205682277679443, + "epoch": 0.9424113883097275, + "kl_loss": 0.13922524452209473, + "loss_ib": 0.006718672811985016, + "step": 3277 + }, + { + "ce_ib": 2.617886543273926, + "ce_orig": 0.4190843403339386, + "epoch": 0.9426989718887051, + "kl_loss": 0.10716179758310318, + "loss_ib": 0.003689504461362958, + "step": 3278 + }, + { + "ce_ib": 3.891556739807129, + "ce_orig": 0.7788523435592651, + "epoch": 0.9426989718887051, + "kl_loss": 0.24393707513809204, + "loss_ib": 0.0063309273682534695, + "step": 3278 + }, + { + "ce_ib": 5.586165904998779, + "ce_orig": 1.5199679136276245, + "epoch": 0.9426989718887051, + "kl_loss": 0.12789402902126312, + "loss_ib": 0.0068651060573756695, + "step": 3278 + }, + { + "ce_ib": 8.989734649658203, + "ce_orig": 2.1172919273376465, + "epoch": 0.9426989718887051, + "kl_loss": 0.16894805431365967, + "loss_ib": 0.0106792151927948, + "step": 3278 + }, + { + "ce_ib": 7.213714599609375, + "ce_orig": 1.8203073740005493, + "epoch": 0.9429865554676828, + "kl_loss": 0.17898516356945038, + "loss_ib": 0.009003566578030586, + "step": 3279 + }, + { + "ce_ib": 6.200157642364502, + "ce_orig": 1.4354826211929321, + "epoch": 0.9429865554676828, + "kl_loss": 0.23822550475597382, + "loss_ib": 0.00858241319656372, + "step": 3279 + }, + { + "ce_ib": 2.7494497299194336, + "ce_orig": 0.5255674719810486, + "epoch": 0.9429865554676828, + "kl_loss": 0.1663813591003418, + "loss_ib": 0.004413263406604528, + "step": 3279 + }, + { + "ce_ib": 3.7802786827087402, + "ce_orig": 1.10052490234375, + "epoch": 0.9429865554676828, + "kl_loss": 0.09805887937545776, + "loss_ib": 0.004760867450386286, + "step": 3279 + }, + { + "epoch": 0.9432741390466605, + "grad_norm": 0.1506335437297821, + "learning_rate": 8.03353514244857e-06, + "loss": 0.8454, + "step": 3280 + }, + { + "ce_ib": 5.833345413208008, + "ce_orig": 1.0438047647476196, + "epoch": 0.9432741390466605, + "kl_loss": 0.16968843340873718, + "loss_ib": 0.007530230097472668, + "step": 3280 + }, + { + "ce_ib": 4.138195037841797, + "ce_orig": 1.1385136842727661, + "epoch": 0.9432741390466605, + "kl_loss": 0.13983766734600067, + "loss_ib": 0.0055365716107189655, + "step": 3280 + }, + { + "ce_ib": 1.9515892267227173, + "ce_orig": 0.34568122029304504, + "epoch": 0.9432741390466605, + "kl_loss": 0.23403674364089966, + "loss_ib": 0.004291956778615713, + "step": 3280 + }, + { + "ce_ib": 8.543895721435547, + "ce_orig": 1.101686716079712, + "epoch": 0.9432741390466605, + "kl_loss": 0.20622023940086365, + "loss_ib": 0.010606097988784313, + "step": 3280 + }, + { + "ce_ib": 4.940879821777344, + "ce_orig": 0.651372492313385, + "epoch": 0.9435617226256381, + "kl_loss": 0.18213805556297302, + "loss_ib": 0.006762260105460882, + "step": 3281 + }, + { + "ce_ib": 3.6658005714416504, + "ce_orig": 0.6953518390655518, + "epoch": 0.9435617226256381, + "kl_loss": 0.19094619154930115, + "loss_ib": 0.005575262010097504, + "step": 3281 + }, + { + "ce_ib": 4.861753463745117, + "ce_orig": 0.8835213780403137, + "epoch": 0.9435617226256381, + "kl_loss": 0.19690150022506714, + "loss_ib": 0.006830768659710884, + "step": 3281 + }, + { + "ce_ib": 3.1798620223999023, + "ce_orig": 0.6036171317100525, + "epoch": 0.9435617226256381, + "kl_loss": 0.1619228571653366, + "loss_ib": 0.004799090325832367, + "step": 3281 + }, + { + "ce_ib": 4.245451927185059, + "ce_orig": 1.1645299196243286, + "epoch": 0.9438493062046157, + "kl_loss": 0.1606082022190094, + "loss_ib": 0.005851533729583025, + "step": 3282 + }, + { + "ce_ib": 4.172497272491455, + "ce_orig": 0.927893877029419, + "epoch": 0.9438493062046157, + "kl_loss": 0.21136200428009033, + "loss_ib": 0.006286117248237133, + "step": 3282 + }, + { + "ce_ib": 2.799510955810547, + "ce_orig": 0.7750267386436462, + "epoch": 0.9438493062046157, + "kl_loss": 0.1262803077697754, + "loss_ib": 0.004062314052134752, + "step": 3282 + }, + { + "ce_ib": 6.524211406707764, + "ce_orig": 1.5280475616455078, + "epoch": 0.9438493062046157, + "kl_loss": 0.21371811628341675, + "loss_ib": 0.008661393076181412, + "step": 3282 + }, + { + "ce_ib": 2.8228890895843506, + "ce_orig": 0.6023188829421997, + "epoch": 0.9441368897835933, + "kl_loss": 0.18327844142913818, + "loss_ib": 0.0046556731685996056, + "step": 3283 + }, + { + "ce_ib": 5.049958229064941, + "ce_orig": 0.741176426410675, + "epoch": 0.9441368897835933, + "kl_loss": 0.21621428430080414, + "loss_ib": 0.007212101016193628, + "step": 3283 + }, + { + "ce_ib": 3.676262855529785, + "ce_orig": 0.655327558517456, + "epoch": 0.9441368897835933, + "kl_loss": 0.2686938941478729, + "loss_ib": 0.006363201420754194, + "step": 3283 + }, + { + "ce_ib": 3.5842318534851074, + "ce_orig": 0.6265366673469543, + "epoch": 0.9441368897835933, + "kl_loss": 0.21432626247406006, + "loss_ib": 0.005727494601160288, + "step": 3283 + }, + { + "ce_ib": 3.3210580348968506, + "ce_orig": 0.4589829444885254, + "epoch": 0.944424473362571, + "kl_loss": 0.2546005845069885, + "loss_ib": 0.005867063999176025, + "step": 3284 + }, + { + "ce_ib": 5.903194427490234, + "ce_orig": 1.6275066137313843, + "epoch": 0.944424473362571, + "kl_loss": 0.1716778576374054, + "loss_ib": 0.00761997327208519, + "step": 3284 + }, + { + "ce_ib": 4.152191638946533, + "ce_orig": 0.9887706637382507, + "epoch": 0.944424473362571, + "kl_loss": 0.18443050980567932, + "loss_ib": 0.005996496416628361, + "step": 3284 + }, + { + "ce_ib": 5.255297660827637, + "ce_orig": 1.0298514366149902, + "epoch": 0.944424473362571, + "kl_loss": 0.11626366525888443, + "loss_ib": 0.006417934782803059, + "step": 3284 + }, + { + "epoch": 0.9447120569415486, + "grad_norm": 0.16181345283985138, + "learning_rate": 8.027362191836115e-06, + "loss": 0.923, + "step": 3285 + }, + { + "ce_ib": 4.676894187927246, + "ce_orig": 0.8959108591079712, + "epoch": 0.9447120569415486, + "kl_loss": 0.20070958137512207, + "loss_ib": 0.006683989427983761, + "step": 3285 + }, + { + "ce_ib": 3.195507764816284, + "ce_orig": 0.7600585222244263, + "epoch": 0.9447120569415486, + "kl_loss": 0.21303457021713257, + "loss_ib": 0.0053258538246154785, + "step": 3285 + }, + { + "ce_ib": 1.9049609899520874, + "ce_orig": 0.5405688881874084, + "epoch": 0.9447120569415486, + "kl_loss": 0.12471234798431396, + "loss_ib": 0.0031520843040198088, + "step": 3285 + }, + { + "ce_ib": 3.8561697006225586, + "ce_orig": 0.8946505188941956, + "epoch": 0.9447120569415486, + "kl_loss": 0.21619224548339844, + "loss_ib": 0.006018092390149832, + "step": 3285 + }, + { + "ce_ib": 4.161205291748047, + "ce_orig": 0.8083364963531494, + "epoch": 0.9449996405205263, + "kl_loss": 0.24444851279258728, + "loss_ib": 0.006605690345168114, + "step": 3286 + }, + { + "ce_ib": 4.49613618850708, + "ce_orig": 0.8031622171401978, + "epoch": 0.9449996405205263, + "kl_loss": 0.12094379961490631, + "loss_ib": 0.005705574061721563, + "step": 3286 + }, + { + "ce_ib": 3.0819709300994873, + "ce_orig": 0.5965180993080139, + "epoch": 0.9449996405205263, + "kl_loss": 0.13814765214920044, + "loss_ib": 0.0044634477235376835, + "step": 3286 + }, + { + "ce_ib": 5.101512908935547, + "ce_orig": 1.4798502922058105, + "epoch": 0.9449996405205263, + "kl_loss": 0.16490575671195984, + "loss_ib": 0.006750570610165596, + "step": 3286 + }, + { + "ce_ib": 3.7754435539245605, + "ce_orig": 0.8939618468284607, + "epoch": 0.9452872240995039, + "kl_loss": 0.27414482831954956, + "loss_ib": 0.006516891531646252, + "step": 3287 + }, + { + "ce_ib": 1.124527931213379, + "ce_orig": 0.174712136387825, + "epoch": 0.9452872240995039, + "kl_loss": 0.2690599262714386, + "loss_ib": 0.0038151273038238287, + "step": 3287 + }, + { + "ce_ib": 5.628109931945801, + "ce_orig": 1.3582781553268433, + "epoch": 0.9452872240995039, + "kl_loss": 0.17102965712547302, + "loss_ib": 0.0073384069837629795, + "step": 3287 + }, + { + "ce_ib": 3.2706751823425293, + "ce_orig": 0.37947556376457214, + "epoch": 0.9452872240995039, + "kl_loss": 0.24279196560382843, + "loss_ib": 0.005698594730347395, + "step": 3287 + }, + { + "ce_ib": 2.10225248336792, + "ce_orig": 0.6370076537132263, + "epoch": 0.9455748076784816, + "kl_loss": 0.08653239905834198, + "loss_ib": 0.002967576263472438, + "step": 3288 + }, + { + "ce_ib": 1.9898314476013184, + "ce_orig": 0.5436297655105591, + "epoch": 0.9455748076784816, + "kl_loss": 0.12172642350196838, + "loss_ib": 0.0032070958986878395, + "step": 3288 + }, + { + "ce_ib": 4.524505615234375, + "ce_orig": 1.27023184299469, + "epoch": 0.9455748076784816, + "kl_loss": 0.17777511477470398, + "loss_ib": 0.006302256602793932, + "step": 3288 + }, + { + "ce_ib": 2.869356155395508, + "ce_orig": 0.8218786120414734, + "epoch": 0.9455748076784816, + "kl_loss": 0.11710339784622192, + "loss_ib": 0.004040390253067017, + "step": 3288 + }, + { + "ce_ib": 2.94926381111145, + "ce_orig": 0.5236493349075317, + "epoch": 0.9458623912574592, + "kl_loss": 0.1547756791114807, + "loss_ib": 0.0044970205053687096, + "step": 3289 + }, + { + "ce_ib": 5.394053936004639, + "ce_orig": 1.5960811376571655, + "epoch": 0.9458623912574592, + "kl_loss": 0.11904578655958176, + "loss_ib": 0.006584512069821358, + "step": 3289 + }, + { + "ce_ib": 2.470637321472168, + "ce_orig": 0.4116249084472656, + "epoch": 0.9458623912574592, + "kl_loss": 0.12575489282608032, + "loss_ib": 0.0037281864788383245, + "step": 3289 + }, + { + "ce_ib": 5.641994953155518, + "ce_orig": 0.9146354794502258, + "epoch": 0.9458623912574592, + "kl_loss": 0.1831284761428833, + "loss_ib": 0.007473279722034931, + "step": 3289 + }, + { + "epoch": 0.9461499748364368, + "grad_norm": 0.14607618749141693, + "learning_rate": 8.021181947605474e-06, + "loss": 0.9116, + "step": 3290 + }, + { + "ce_ib": 3.3200764656066895, + "ce_orig": 0.6903912425041199, + "epoch": 0.9461499748364368, + "kl_loss": 0.20126375555992126, + "loss_ib": 0.0053327144123613834, + "step": 3290 + }, + { + "ce_ib": 3.834109306335449, + "ce_orig": 0.43436378240585327, + "epoch": 0.9461499748364368, + "kl_loss": 0.2617972791194916, + "loss_ib": 0.0064520821906626225, + "step": 3290 + }, + { + "ce_ib": 5.521749496459961, + "ce_orig": 1.7288519144058228, + "epoch": 0.9461499748364368, + "kl_loss": 0.15992756187915802, + "loss_ib": 0.0071210251189768314, + "step": 3290 + }, + { + "ce_ib": 4.06191349029541, + "ce_orig": 0.8087911605834961, + "epoch": 0.9461499748364368, + "kl_loss": 0.14015284180641174, + "loss_ib": 0.005463441368192434, + "step": 3290 + }, + { + "ce_ib": 2.348867893218994, + "ce_orig": 0.5424993634223938, + "epoch": 0.9464375584154144, + "kl_loss": 0.18254929780960083, + "loss_ib": 0.004174361005425453, + "step": 3291 + }, + { + "ce_ib": 3.2160375118255615, + "ce_orig": 0.8675045371055603, + "epoch": 0.9464375584154144, + "kl_loss": 0.14490164816379547, + "loss_ib": 0.00466505391523242, + "step": 3291 + }, + { + "ce_ib": 5.246313571929932, + "ce_orig": 0.7154064178466797, + "epoch": 0.9464375584154144, + "kl_loss": 0.18849581480026245, + "loss_ib": 0.007131271995604038, + "step": 3291 + }, + { + "ce_ib": 4.033844470977783, + "ce_orig": 0.8534598350524902, + "epoch": 0.9464375584154144, + "kl_loss": 0.3704688549041748, + "loss_ib": 0.007738532964140177, + "step": 3291 + }, + { + "ce_ib": 4.9202985763549805, + "ce_orig": 0.7051324844360352, + "epoch": 0.9467251419943922, + "kl_loss": 0.18336856365203857, + "loss_ib": 0.006753984373062849, + "step": 3292 + }, + { + "ce_ib": 3.0231246948242188, + "ce_orig": 0.7381253242492676, + "epoch": 0.9467251419943922, + "kl_loss": 0.16451755166053772, + "loss_ib": 0.004668300040066242, + "step": 3292 + }, + { + "ce_ib": 2.445577383041382, + "ce_orig": 0.5650008320808411, + "epoch": 0.9467251419943922, + "kl_loss": 0.19930018484592438, + "loss_ib": 0.004438579082489014, + "step": 3292 + }, + { + "ce_ib": 3.315708637237549, + "ce_orig": 0.882557213306427, + "epoch": 0.9467251419943922, + "kl_loss": 0.13863548636436462, + "loss_ib": 0.004702063277363777, + "step": 3292 + }, + { + "ce_ib": 2.2857723236083984, + "ce_orig": 0.5623785853385925, + "epoch": 0.9470127255733698, + "kl_loss": 0.19435609877109528, + "loss_ib": 0.004229333251714706, + "step": 3293 + }, + { + "ce_ib": 2.981349229812622, + "ce_orig": 0.8648862242698669, + "epoch": 0.9470127255733698, + "kl_loss": 0.20135587453842163, + "loss_ib": 0.004994907882064581, + "step": 3293 + }, + { + "ce_ib": 3.701002836227417, + "ce_orig": 0.7059708833694458, + "epoch": 0.9470127255733698, + "kl_loss": 0.17421367764472961, + "loss_ib": 0.0054431394673883915, + "step": 3293 + }, + { + "ce_ib": 3.769763231277466, + "ce_orig": 0.5015468597412109, + "epoch": 0.9470127255733698, + "kl_loss": 0.276120126247406, + "loss_ib": 0.006530964281409979, + "step": 3293 + }, + { + "ce_ib": 1.0690758228302002, + "ce_orig": 0.23370970785617828, + "epoch": 0.9473003091523474, + "kl_loss": 0.3995145261287689, + "loss_ib": 0.005064221099019051, + "step": 3294 + }, + { + "ce_ib": 5.024189472198486, + "ce_orig": 1.1902865171432495, + "epoch": 0.9473003091523474, + "kl_loss": 0.19895145297050476, + "loss_ib": 0.007013703230768442, + "step": 3294 + }, + { + "ce_ib": 3.6465446949005127, + "ce_orig": 0.3471200168132782, + "epoch": 0.9473003091523474, + "kl_loss": 0.2946989834308624, + "loss_ib": 0.0065935347229242325, + "step": 3294 + }, + { + "ce_ib": 1.4614664316177368, + "ce_orig": 0.29186373949050903, + "epoch": 0.9473003091523474, + "kl_loss": 0.19002483785152435, + "loss_ib": 0.003361714771017432, + "step": 3294 + }, + { + "epoch": 0.947587892731325, + "grad_norm": 0.14164979755878448, + "learning_rate": 8.014994424646289e-06, + "loss": 0.8126, + "step": 3295 + }, + { + "ce_ib": 2.7252213954925537, + "ce_orig": 0.458859384059906, + "epoch": 0.947587892731325, + "kl_loss": 0.20010671019554138, + "loss_ib": 0.004726288374513388, + "step": 3295 + }, + { + "ce_ib": 2.2678232192993164, + "ce_orig": 0.5769878625869751, + "epoch": 0.947587892731325, + "kl_loss": 0.12310776114463806, + "loss_ib": 0.003498900681734085, + "step": 3295 + }, + { + "ce_ib": 2.8472201824188232, + "ce_orig": 0.5890595316886902, + "epoch": 0.947587892731325, + "kl_loss": 0.19857071340084076, + "loss_ib": 0.004832927603274584, + "step": 3295 + }, + { + "ce_ib": 2.5623419284820557, + "ce_orig": 0.5456779599189758, + "epoch": 0.947587892731325, + "kl_loss": 0.12729254364967346, + "loss_ib": 0.003835267387330532, + "step": 3295 + }, + { + "ce_ib": 4.9459662437438965, + "ce_orig": 0.9031968712806702, + "epoch": 0.9478754763103027, + "kl_loss": 0.2299586981534958, + "loss_ib": 0.007245553191751242, + "step": 3296 + }, + { + "ce_ib": 4.128141403198242, + "ce_orig": 0.989382803440094, + "epoch": 0.9478754763103027, + "kl_loss": 0.18061454594135284, + "loss_ib": 0.005934286396950483, + "step": 3296 + }, + { + "ce_ib": 2.2432591915130615, + "ce_orig": 0.48797106742858887, + "epoch": 0.9478754763103027, + "kl_loss": 0.12319779396057129, + "loss_ib": 0.0034752371720969677, + "step": 3296 + }, + { + "ce_ib": 2.694324254989624, + "ce_orig": 0.8428523540496826, + "epoch": 0.9478754763103027, + "kl_loss": 0.16096067428588867, + "loss_ib": 0.004303930792957544, + "step": 3296 + }, + { + "ce_ib": 2.3563618659973145, + "ce_orig": 0.4272860884666443, + "epoch": 0.9481630598892803, + "kl_loss": 0.12220834940671921, + "loss_ib": 0.0035784451756626368, + "step": 3297 + }, + { + "ce_ib": 6.226929664611816, + "ce_orig": 0.8882773518562317, + "epoch": 0.9481630598892803, + "kl_loss": 0.33253246545791626, + "loss_ib": 0.00955225434154272, + "step": 3297 + }, + { + "ce_ib": 4.887339115142822, + "ce_orig": 1.0053150653839111, + "epoch": 0.9481630598892803, + "kl_loss": 0.17654883861541748, + "loss_ib": 0.00665282690897584, + "step": 3297 + }, + { + "ce_ib": 4.662355422973633, + "ce_orig": 0.7857528924942017, + "epoch": 0.9481630598892803, + "kl_loss": 0.19086146354675293, + "loss_ib": 0.006570969708263874, + "step": 3297 + }, + { + "ce_ib": 4.100223064422607, + "ce_orig": 0.9341000914573669, + "epoch": 0.9484506434682579, + "kl_loss": 0.28920578956604004, + "loss_ib": 0.006992280948907137, + "step": 3298 + }, + { + "ce_ib": 2.9144461154937744, + "ce_orig": 0.6717877984046936, + "epoch": 0.9484506434682579, + "kl_loss": 0.09949972480535507, + "loss_ib": 0.0039094435051083565, + "step": 3298 + }, + { + "ce_ib": 4.21555757522583, + "ce_orig": 0.8567813038825989, + "epoch": 0.9484506434682579, + "kl_loss": 0.20325860381126404, + "loss_ib": 0.006248143967241049, + "step": 3298 + }, + { + "ce_ib": 6.347150802612305, + "ce_orig": 1.466641902923584, + "epoch": 0.9484506434682579, + "kl_loss": 0.18213778734207153, + "loss_ib": 0.008168528787791729, + "step": 3298 + }, + { + "ce_ib": 3.4227254390716553, + "ce_orig": 0.6923190951347351, + "epoch": 0.9487382270472356, + "kl_loss": 0.3514382541179657, + "loss_ib": 0.006937108002603054, + "step": 3299 + }, + { + "ce_ib": 3.996868848800659, + "ce_orig": 0.9318107962608337, + "epoch": 0.9487382270472356, + "kl_loss": 0.16195222735404968, + "loss_ib": 0.005616391077637672, + "step": 3299 + }, + { + "ce_ib": 5.0838942527771, + "ce_orig": 0.8307197093963623, + "epoch": 0.9487382270472356, + "kl_loss": 0.15657025575637817, + "loss_ib": 0.006649596616625786, + "step": 3299 + }, + { + "ce_ib": 4.918807029724121, + "ce_orig": 1.3690855503082275, + "epoch": 0.9487382270472356, + "kl_loss": 0.15859933197498322, + "loss_ib": 0.006504800170660019, + "step": 3299 + }, + { + "epoch": 0.9490258106262133, + "grad_norm": 0.15175360441207886, + "learning_rate": 8.008799637865741e-06, + "loss": 0.8943, + "step": 3300 + }, + { + "ce_ib": 3.900031566619873, + "ce_orig": 0.8106812834739685, + "epoch": 0.9490258106262133, + "kl_loss": 0.15756773948669434, + "loss_ib": 0.005475709214806557, + "step": 3300 + }, + { + "ce_ib": 4.195313930511475, + "ce_orig": 0.8236669301986694, + "epoch": 0.9490258106262133, + "kl_loss": 0.22230368852615356, + "loss_ib": 0.0064183506183326244, + "step": 3300 + }, + { + "ce_ib": 2.3647279739379883, + "ce_orig": 0.5178154110908508, + "epoch": 0.9490258106262133, + "kl_loss": 0.13390754163265228, + "loss_ib": 0.003703803289681673, + "step": 3300 + }, + { + "ce_ib": 2.4393632411956787, + "ce_orig": 0.5900273323059082, + "epoch": 0.9490258106262133, + "kl_loss": 0.1096033900976181, + "loss_ib": 0.003535396885126829, + "step": 3300 + }, + { + "ce_ib": 2.9187655448913574, + "ce_orig": 0.7299885749816895, + "epoch": 0.9493133942051909, + "kl_loss": 0.08872876316308975, + "loss_ib": 0.0038060529623180628, + "step": 3301 + }, + { + "ce_ib": 3.9645352363586426, + "ce_orig": 0.48531869053840637, + "epoch": 0.9493133942051909, + "kl_loss": 0.14480401575565338, + "loss_ib": 0.005412575323134661, + "step": 3301 + }, + { + "ce_ib": 6.687049388885498, + "ce_orig": 0.7018494606018066, + "epoch": 0.9493133942051909, + "kl_loss": 0.19723108410835266, + "loss_ib": 0.008659359999001026, + "step": 3301 + }, + { + "ce_ib": 3.1202261447906494, + "ce_orig": 0.8824996948242188, + "epoch": 0.9493133942051909, + "kl_loss": 0.13623814284801483, + "loss_ib": 0.004482607822865248, + "step": 3301 + }, + { + "ce_ib": 4.751153469085693, + "ce_orig": 1.2916632890701294, + "epoch": 0.9496009777841685, + "kl_loss": 0.18498608469963074, + "loss_ib": 0.006601014174520969, + "step": 3302 + }, + { + "ce_ib": 1.6937474012374878, + "ce_orig": 0.37303316593170166, + "epoch": 0.9496009777841685, + "kl_loss": 0.14774644374847412, + "loss_ib": 0.003171212039887905, + "step": 3302 + }, + { + "ce_ib": 4.422581195831299, + "ce_orig": 0.9905567169189453, + "epoch": 0.9496009777841685, + "kl_loss": 0.18115434050559998, + "loss_ib": 0.006234124768525362, + "step": 3302 + }, + { + "ce_ib": 4.105133056640625, + "ce_orig": 1.0619932413101196, + "epoch": 0.9496009777841685, + "kl_loss": 0.11590489000082016, + "loss_ib": 0.005264182109385729, + "step": 3302 + }, + { + "ce_ib": 5.616166114807129, + "ce_orig": 1.1129182577133179, + "epoch": 0.9498885613631461, + "kl_loss": 0.19810940325260162, + "loss_ib": 0.00759725971147418, + "step": 3303 + }, + { + "ce_ib": 4.316826343536377, + "ce_orig": 1.054659128189087, + "epoch": 0.9498885613631461, + "kl_loss": 0.14272677898406982, + "loss_ib": 0.005744094029068947, + "step": 3303 + }, + { + "ce_ib": 3.293409824371338, + "ce_orig": 0.7347771525382996, + "epoch": 0.9498885613631461, + "kl_loss": 0.1537017673254013, + "loss_ib": 0.0048304274678230286, + "step": 3303 + }, + { + "ce_ib": 6.12570858001709, + "ce_orig": 1.6158491373062134, + "epoch": 0.9498885613631461, + "kl_loss": 0.2000754475593567, + "loss_ib": 0.008126462809741497, + "step": 3303 + }, + { + "ce_ib": 2.78692889213562, + "ce_orig": 0.4465332627296448, + "epoch": 0.9501761449421238, + "kl_loss": 0.1807798147201538, + "loss_ib": 0.004594726953655481, + "step": 3304 + }, + { + "ce_ib": 4.886989116668701, + "ce_orig": 1.3525140285491943, + "epoch": 0.9501761449421238, + "kl_loss": 0.17322741448879242, + "loss_ib": 0.006619262974709272, + "step": 3304 + }, + { + "ce_ib": 4.926621437072754, + "ce_orig": 1.0570100545883179, + "epoch": 0.9501761449421238, + "kl_loss": 0.1578432023525238, + "loss_ib": 0.006505053490400314, + "step": 3304 + }, + { + "ce_ib": 4.287284851074219, + "ce_orig": 0.7047610878944397, + "epoch": 0.9501761449421238, + "kl_loss": 0.14852331578731537, + "loss_ib": 0.005772517994046211, + "step": 3304 + }, + { + "epoch": 0.9504637285211014, + "grad_norm": 0.143720805644989, + "learning_rate": 8.00259760218851e-06, + "loss": 0.8231, + "step": 3305 + }, + { + "ce_ib": 2.6771860122680664, + "ce_orig": 0.7428936958312988, + "epoch": 0.9504637285211014, + "kl_loss": 0.1869862675666809, + "loss_ib": 0.0045470488257706165, + "step": 3305 + }, + { + "ce_ib": 1.655136227607727, + "ce_orig": 0.5279562473297119, + "epoch": 0.9504637285211014, + "kl_loss": 0.5978017449378967, + "loss_ib": 0.007633153814822435, + "step": 3305 + }, + { + "ce_ib": 6.495039939880371, + "ce_orig": 1.2984504699707031, + "epoch": 0.9504637285211014, + "kl_loss": 0.196605384349823, + "loss_ib": 0.00846109353005886, + "step": 3305 + }, + { + "ce_ib": 6.599056720733643, + "ce_orig": 1.9299882650375366, + "epoch": 0.9504637285211014, + "kl_loss": 0.14506202936172485, + "loss_ib": 0.0080496771261096, + "step": 3305 + }, + { + "ce_ib": 1.9527045488357544, + "ce_orig": 0.565041184425354, + "epoch": 0.9507513121000791, + "kl_loss": 0.13392792642116547, + "loss_ib": 0.0032919838558882475, + "step": 3306 + }, + { + "ce_ib": 1.3793277740478516, + "ce_orig": 0.39615410566329956, + "epoch": 0.9507513121000791, + "kl_loss": 0.11128295212984085, + "loss_ib": 0.0024921572767198086, + "step": 3306 + }, + { + "ce_ib": 4.438904285430908, + "ce_orig": 1.0329537391662598, + "epoch": 0.9507513121000791, + "kl_loss": 0.2512805163860321, + "loss_ib": 0.0069517092779278755, + "step": 3306 + }, + { + "ce_ib": 5.511641025543213, + "ce_orig": 1.4609259366989136, + "epoch": 0.9507513121000791, + "kl_loss": 0.24140498042106628, + "loss_ib": 0.007925690151751041, + "step": 3306 + }, + { + "ce_ib": 2.0862457752227783, + "ce_orig": 0.5777906179428101, + "epoch": 0.9510388956790568, + "kl_loss": 0.1492680311203003, + "loss_ib": 0.0035789262037724257, + "step": 3307 + }, + { + "ce_ib": 3.521989345550537, + "ce_orig": 0.9171335697174072, + "epoch": 0.9510388956790568, + "kl_loss": 0.10374817997217178, + "loss_ib": 0.004559470806270838, + "step": 3307 + }, + { + "ce_ib": 2.991762161254883, + "ce_orig": 0.6923288106918335, + "epoch": 0.9510388956790568, + "kl_loss": 0.3388303518295288, + "loss_ib": 0.006380065344274044, + "step": 3307 + }, + { + "ce_ib": 1.9846609830856323, + "ce_orig": 0.3825618326663971, + "epoch": 0.9510388956790568, + "kl_loss": 0.09885461628437042, + "loss_ib": 0.0029732072725892067, + "step": 3307 + }, + { + "ce_ib": 3.4963266849517822, + "ce_orig": 0.6358732581138611, + "epoch": 0.9513264792580344, + "kl_loss": 0.20042970776557922, + "loss_ib": 0.00550062395632267, + "step": 3308 + }, + { + "ce_ib": 5.158199787139893, + "ce_orig": 0.949243426322937, + "epoch": 0.9513264792580344, + "kl_loss": 0.1764804720878601, + "loss_ib": 0.006923004053533077, + "step": 3308 + }, + { + "ce_ib": 2.5955288410186768, + "ce_orig": 0.5251213908195496, + "epoch": 0.9513264792580344, + "kl_loss": 0.12145624309778214, + "loss_ib": 0.0038100911770015955, + "step": 3308 + }, + { + "ce_ib": 2.911057710647583, + "ce_orig": 0.8708571791648865, + "epoch": 0.9513264792580344, + "kl_loss": 0.18545453250408173, + "loss_ib": 0.004765602760016918, + "step": 3308 + }, + { + "ce_ib": 3.835167169570923, + "ce_orig": 0.6529061198234558, + "epoch": 0.951614062837012, + "kl_loss": 0.25897860527038574, + "loss_ib": 0.006424953229725361, + "step": 3309 + }, + { + "ce_ib": 4.197619915008545, + "ce_orig": 1.0938645601272583, + "epoch": 0.951614062837012, + "kl_loss": 0.1554369032382965, + "loss_ib": 0.005751988850533962, + "step": 3309 + }, + { + "ce_ib": 4.934092044830322, + "ce_orig": 1.092104434967041, + "epoch": 0.951614062837012, + "kl_loss": 0.1426439732313156, + "loss_ib": 0.006360531784594059, + "step": 3309 + }, + { + "ce_ib": 4.490199089050293, + "ce_orig": 1.0030347108840942, + "epoch": 0.951614062837012, + "kl_loss": 0.13449245691299438, + "loss_ib": 0.005835123360157013, + "step": 3309 + }, + { + "epoch": 0.9519016464159896, + "grad_norm": 0.13828487694263458, + "learning_rate": 7.996388332556735e-06, + "loss": 0.858, + "step": 3310 + }, + { + "ce_ib": 4.474912643432617, + "ce_orig": 1.1191157102584839, + "epoch": 0.9519016464159896, + "kl_loss": 0.1496536135673523, + "loss_ib": 0.0059714484959840775, + "step": 3310 + }, + { + "ce_ib": 2.3265981674194336, + "ce_orig": 0.07952141761779785, + "epoch": 0.9519016464159896, + "kl_loss": 0.46740490198135376, + "loss_ib": 0.007000647019594908, + "step": 3310 + }, + { + "ce_ib": 2.8553378582000732, + "ce_orig": 0.7222899794578552, + "epoch": 0.9519016464159896, + "kl_loss": 0.15677639842033386, + "loss_ib": 0.004423101898282766, + "step": 3310 + }, + { + "ce_ib": 2.450267791748047, + "ce_orig": 0.6490485668182373, + "epoch": 0.9519016464159896, + "kl_loss": 0.1366170048713684, + "loss_ib": 0.0038164376746863127, + "step": 3310 + }, + { + "ce_ib": 2.889698028564453, + "ce_orig": 0.6299818158149719, + "epoch": 0.9521892299949672, + "kl_loss": 0.1332811415195465, + "loss_ib": 0.004222509451210499, + "step": 3311 + }, + { + "ce_ib": 5.656915187835693, + "ce_orig": 1.1039507389068604, + "epoch": 0.9521892299949672, + "kl_loss": 0.15697073936462402, + "loss_ib": 0.00722662266343832, + "step": 3311 + }, + { + "ce_ib": 4.52491569519043, + "ce_orig": 1.1002581119537354, + "epoch": 0.9521892299949672, + "kl_loss": 0.12773653864860535, + "loss_ib": 0.005802280735224485, + "step": 3311 + }, + { + "ce_ib": 2.512669324874878, + "ce_orig": 0.5961372256278992, + "epoch": 0.9521892299949672, + "kl_loss": 0.15902559459209442, + "loss_ib": 0.0041029248386621475, + "step": 3311 + }, + { + "ce_ib": 3.271580219268799, + "ce_orig": 0.5304238200187683, + "epoch": 0.952476813573945, + "kl_loss": 0.09513502568006516, + "loss_ib": 0.004222930409014225, + "step": 3312 + }, + { + "ce_ib": 3.9127845764160156, + "ce_orig": 1.0225733518600464, + "epoch": 0.952476813573945, + "kl_loss": 0.1667436808347702, + "loss_ib": 0.005580221302807331, + "step": 3312 + }, + { + "ce_ib": 2.851712465286255, + "ce_orig": 0.4855336546897888, + "epoch": 0.952476813573945, + "kl_loss": 0.16441771388053894, + "loss_ib": 0.004495889414101839, + "step": 3312 + }, + { + "ce_ib": 4.494113445281982, + "ce_orig": 0.8086840510368347, + "epoch": 0.952476813573945, + "kl_loss": 0.21845495700836182, + "loss_ib": 0.006678663194179535, + "step": 3312 + }, + { + "ce_ib": 2.2937304973602295, + "ce_orig": 0.7760726809501648, + "epoch": 0.9527643971529226, + "kl_loss": 0.12375137209892273, + "loss_ib": 0.0035312443505972624, + "step": 3313 + }, + { + "ce_ib": 4.898841857910156, + "ce_orig": 1.1814122200012207, + "epoch": 0.9527643971529226, + "kl_loss": 0.12952935695648193, + "loss_ib": 0.006194135639816523, + "step": 3313 + }, + { + "ce_ib": 4.53060245513916, + "ce_orig": 1.0210164785385132, + "epoch": 0.9527643971529226, + "kl_loss": 0.11272984743118286, + "loss_ib": 0.005657900590449572, + "step": 3313 + }, + { + "ce_ib": 5.236881256103516, + "ce_orig": 0.6995317339897156, + "epoch": 0.9527643971529226, + "kl_loss": 0.12910905480384827, + "loss_ib": 0.006527971941977739, + "step": 3313 + }, + { + "ce_ib": 4.0510430335998535, + "ce_orig": 1.0179064273834229, + "epoch": 0.9530519807319002, + "kl_loss": 0.16936489939689636, + "loss_ib": 0.00574469193816185, + "step": 3314 + }, + { + "ce_ib": 4.994795322418213, + "ce_orig": 1.1117920875549316, + "epoch": 0.9530519807319002, + "kl_loss": 0.21808314323425293, + "loss_ib": 0.007175626698881388, + "step": 3314 + }, + { + "ce_ib": 2.7023544311523438, + "ce_orig": 0.47321560978889465, + "epoch": 0.9530519807319002, + "kl_loss": 0.12096596509218216, + "loss_ib": 0.003912013955414295, + "step": 3314 + }, + { + "ce_ib": 4.39568567276001, + "ce_orig": 1.0061044692993164, + "epoch": 0.9530519807319002, + "kl_loss": 0.1833362728357315, + "loss_ib": 0.006229048129171133, + "step": 3314 + }, + { + "epoch": 0.9533395643108779, + "grad_norm": 0.14889758825302124, + "learning_rate": 7.990171843929992e-06, + "loss": 0.8402, + "step": 3315 + }, + { + "ce_ib": 2.3357174396514893, + "ce_orig": 0.568092942237854, + "epoch": 0.9533395643108779, + "kl_loss": 0.13971444964408875, + "loss_ib": 0.0037328617181628942, + "step": 3315 + }, + { + "ce_ib": 3.9148902893066406, + "ce_orig": 0.9832486510276794, + "epoch": 0.9533395643108779, + "kl_loss": 0.13900655508041382, + "loss_ib": 0.005304955877363682, + "step": 3315 + }, + { + "ce_ib": 4.2581257820129395, + "ce_orig": 0.8822944164276123, + "epoch": 0.9533395643108779, + "kl_loss": 0.18639729917049408, + "loss_ib": 0.006122098304331303, + "step": 3315 + }, + { + "ce_ib": 2.4287145137786865, + "ce_orig": 0.5696886777877808, + "epoch": 0.9533395643108779, + "kl_loss": 0.19417941570281982, + "loss_ib": 0.004370508715510368, + "step": 3315 + }, + { + "ce_ib": 5.4148077964782715, + "ce_orig": 1.2741879224777222, + "epoch": 0.9536271478898555, + "kl_loss": 0.20923194289207458, + "loss_ib": 0.007507127244025469, + "step": 3316 + }, + { + "ce_ib": 4.929604530334473, + "ce_orig": 0.7413924336433411, + "epoch": 0.9536271478898555, + "kl_loss": 0.18790876865386963, + "loss_ib": 0.0068086921237409115, + "step": 3316 + }, + { + "ce_ib": 3.7617263793945312, + "ce_orig": 1.0540417432785034, + "epoch": 0.9536271478898555, + "kl_loss": 0.15106330811977386, + "loss_ib": 0.005272359121590853, + "step": 3316 + }, + { + "ce_ib": 4.197656154632568, + "ce_orig": 1.0205645561218262, + "epoch": 0.9536271478898555, + "kl_loss": 0.17189502716064453, + "loss_ib": 0.005916606169193983, + "step": 3316 + }, + { + "ce_ib": 4.64199686050415, + "ce_orig": 0.9984999299049377, + "epoch": 0.9539147314688331, + "kl_loss": 0.16293631494045258, + "loss_ib": 0.0062713599763810635, + "step": 3317 + }, + { + "ce_ib": 5.523449420928955, + "ce_orig": 1.160380482673645, + "epoch": 0.9539147314688331, + "kl_loss": 0.27397093176841736, + "loss_ib": 0.008263158611953259, + "step": 3317 + }, + { + "ce_ib": 4.3680315017700195, + "ce_orig": 0.9612809419631958, + "epoch": 0.9539147314688331, + "kl_loss": 0.19624753296375275, + "loss_ib": 0.006330506876111031, + "step": 3317 + }, + { + "ce_ib": 3.4925012588500977, + "ce_orig": 0.9854640364646912, + "epoch": 0.9539147314688331, + "kl_loss": 0.20190054178237915, + "loss_ib": 0.005511506460607052, + "step": 3317 + }, + { + "ce_ib": 2.2599804401397705, + "ce_orig": 0.5805909037590027, + "epoch": 0.9542023150478107, + "kl_loss": 0.15012015402317047, + "loss_ib": 0.003761181840673089, + "step": 3318 + }, + { + "ce_ib": 3.3903298377990723, + "ce_orig": 0.5908503532409668, + "epoch": 0.9542023150478107, + "kl_loss": 0.2274160236120224, + "loss_ib": 0.005664489697664976, + "step": 3318 + }, + { + "ce_ib": 8.679605484008789, + "ce_orig": 1.6528841257095337, + "epoch": 0.9542023150478107, + "kl_loss": 0.13142572343349457, + "loss_ib": 0.00999386329203844, + "step": 3318 + }, + { + "ce_ib": 4.8595499992370605, + "ce_orig": 0.9188261032104492, + "epoch": 0.9542023150478107, + "kl_loss": 0.15804776549339294, + "loss_ib": 0.006440027616918087, + "step": 3318 + }, + { + "ce_ib": 3.7085108757019043, + "ce_orig": 0.8202641010284424, + "epoch": 0.9544898986267885, + "kl_loss": 0.16468796133995056, + "loss_ib": 0.005355390254408121, + "step": 3319 + }, + { + "ce_ib": 4.44246768951416, + "ce_orig": 1.1396093368530273, + "epoch": 0.9544898986267885, + "kl_loss": 0.19800156354904175, + "loss_ib": 0.006422483362257481, + "step": 3319 + }, + { + "ce_ib": 3.964158773422241, + "ce_orig": 0.9415104389190674, + "epoch": 0.9544898986267885, + "kl_loss": 0.1279345452785492, + "loss_ib": 0.005243503954261541, + "step": 3319 + }, + { + "ce_ib": 5.285578727722168, + "ce_orig": 1.0580670833587646, + "epoch": 0.9544898986267885, + "kl_loss": 0.16697555780410767, + "loss_ib": 0.006955334451049566, + "step": 3319 + }, + { + "epoch": 0.9547774822057661, + "grad_norm": 0.13430137932300568, + "learning_rate": 7.983948151285242e-06, + "loss": 0.9242, + "step": 3320 + }, + { + "ce_ib": 3.3799448013305664, + "ce_orig": 0.7020493745803833, + "epoch": 0.9547774822057661, + "kl_loss": 0.14531172811985016, + "loss_ib": 0.004833062179386616, + "step": 3320 + }, + { + "ce_ib": 3.965069532394409, + "ce_orig": 0.3939279019832611, + "epoch": 0.9547774822057661, + "kl_loss": 0.14825820922851562, + "loss_ib": 0.0054476517252624035, + "step": 3320 + }, + { + "ce_ib": 4.604496955871582, + "ce_orig": 0.9111628532409668, + "epoch": 0.9547774822057661, + "kl_loss": 0.2677081823348999, + "loss_ib": 0.007281578611582518, + "step": 3320 + }, + { + "ce_ib": 4.66343355178833, + "ce_orig": 0.9224619269371033, + "epoch": 0.9547774822057661, + "kl_loss": 0.13266386091709137, + "loss_ib": 0.005990072153508663, + "step": 3320 + }, + { + "ce_ib": 4.046107292175293, + "ce_orig": 0.7537975907325745, + "epoch": 0.9550650657847437, + "kl_loss": 0.21605221927165985, + "loss_ib": 0.006206629332154989, + "step": 3321 + }, + { + "ce_ib": 5.810869216918945, + "ce_orig": 1.4654808044433594, + "epoch": 0.9550650657847437, + "kl_loss": 0.152746319770813, + "loss_ib": 0.00733833247795701, + "step": 3321 + }, + { + "ce_ib": 2.8561718463897705, + "ce_orig": 0.5026381015777588, + "epoch": 0.9550650657847437, + "kl_loss": 0.18415838479995728, + "loss_ib": 0.004697755910456181, + "step": 3321 + }, + { + "ce_ib": 4.004387855529785, + "ce_orig": 0.7366926670074463, + "epoch": 0.9550650657847437, + "kl_loss": 0.21135467290878296, + "loss_ib": 0.006117934361100197, + "step": 3321 + }, + { + "ce_ib": 2.9510257244110107, + "ce_orig": 0.5515758395195007, + "epoch": 0.9553526493637213, + "kl_loss": 0.1859590858221054, + "loss_ib": 0.004810616374015808, + "step": 3322 + }, + { + "ce_ib": 3.369537353515625, + "ce_orig": 1.0322273969650269, + "epoch": 0.9553526493637213, + "kl_loss": 0.15723201632499695, + "loss_ib": 0.004941857419908047, + "step": 3322 + }, + { + "ce_ib": 3.5377650260925293, + "ce_orig": 0.7334312200546265, + "epoch": 0.9553526493637213, + "kl_loss": 0.1390829235315323, + "loss_ib": 0.00492859398946166, + "step": 3322 + }, + { + "ce_ib": 6.22671365737915, + "ce_orig": 1.320979356765747, + "epoch": 0.9553526493637213, + "kl_loss": 0.2167930155992508, + "loss_ib": 0.008394643664360046, + "step": 3322 + }, + { + "ce_ib": 4.410187721252441, + "ce_orig": 1.0949180126190186, + "epoch": 0.955640232942699, + "kl_loss": 0.40970170497894287, + "loss_ib": 0.008507205173373222, + "step": 3323 + }, + { + "ce_ib": 5.8651442527771, + "ce_orig": 1.3945428133010864, + "epoch": 0.955640232942699, + "kl_loss": 0.18911123275756836, + "loss_ib": 0.007756256498396397, + "step": 3323 + }, + { + "ce_ib": 4.135345458984375, + "ce_orig": 0.7820268869400024, + "epoch": 0.955640232942699, + "kl_loss": 0.19083794951438904, + "loss_ib": 0.006043724715709686, + "step": 3323 + }, + { + "ce_ib": 1.4547150135040283, + "ce_orig": 0.2675560414791107, + "epoch": 0.955640232942699, + "kl_loss": 0.3423961400985718, + "loss_ib": 0.004878676496446133, + "step": 3323 + }, + { + "ce_ib": 2.6430366039276123, + "ce_orig": 0.5926592350006104, + "epoch": 0.9559278165216766, + "kl_loss": 0.17095047235488892, + "loss_ib": 0.004352541174739599, + "step": 3324 + }, + { + "ce_ib": 2.194370985031128, + "ce_orig": 0.6913273930549622, + "epoch": 0.9559278165216766, + "kl_loss": 0.134566068649292, + "loss_ib": 0.0035400318447500467, + "step": 3324 + }, + { + "ce_ib": 3.467012882232666, + "ce_orig": 0.6945809125900269, + "epoch": 0.9559278165216766, + "kl_loss": 0.19764797389507294, + "loss_ib": 0.005443492438644171, + "step": 3324 + }, + { + "ce_ib": 3.8340513706207275, + "ce_orig": 0.8573213219642639, + "epoch": 0.9559278165216766, + "kl_loss": 0.1876620650291443, + "loss_ib": 0.0057106721214950085, + "step": 3324 + }, + { + "epoch": 0.9562154001006542, + "grad_norm": 0.147456556558609, + "learning_rate": 7.977717269616807e-06, + "loss": 0.8894, + "step": 3325 + }, + { + "ce_ib": 4.8185319900512695, + "ce_orig": 0.8606095314025879, + "epoch": 0.9562154001006542, + "kl_loss": 0.1670570969581604, + "loss_ib": 0.006489102728664875, + "step": 3325 + }, + { + "ce_ib": 3.335392475128174, + "ce_orig": 0.806896984577179, + "epoch": 0.9562154001006542, + "kl_loss": 0.1629323810338974, + "loss_ib": 0.004964716266840696, + "step": 3325 + }, + { + "ce_ib": 3.968038320541382, + "ce_orig": 0.6710392832756042, + "epoch": 0.9562154001006542, + "kl_loss": 0.22308224439620972, + "loss_ib": 0.006198860239237547, + "step": 3325 + }, + { + "ce_ib": 1.5140442848205566, + "ce_orig": 0.2584049105644226, + "epoch": 0.9562154001006542, + "kl_loss": 0.39197200536727905, + "loss_ib": 0.005433764308691025, + "step": 3325 + }, + { + "ce_ib": 4.09027099609375, + "ce_orig": 0.5001516938209534, + "epoch": 0.9565029836796319, + "kl_loss": 0.1977040320634842, + "loss_ib": 0.006067310925573111, + "step": 3326 + }, + { + "ce_ib": 2.8555314540863037, + "ce_orig": 0.8623017072677612, + "epoch": 0.9565029836796319, + "kl_loss": 0.3762728273868561, + "loss_ib": 0.0066182599402964115, + "step": 3326 + }, + { + "ce_ib": 3.4095990657806396, + "ce_orig": 0.9817270040512085, + "epoch": 0.9565029836796319, + "kl_loss": 0.1503705531358719, + "loss_ib": 0.004913304466754198, + "step": 3326 + }, + { + "ce_ib": 3.5727062225341797, + "ce_orig": 1.0884536504745483, + "epoch": 0.9565029836796319, + "kl_loss": 0.17360641062259674, + "loss_ib": 0.005308770574629307, + "step": 3326 + }, + { + "ce_ib": 3.0832483768463135, + "ce_orig": 0.7580514550209045, + "epoch": 0.9567905672586096, + "kl_loss": 0.13049271702766418, + "loss_ib": 0.004388175439089537, + "step": 3327 + }, + { + "ce_ib": 5.913597106933594, + "ce_orig": 1.1888296604156494, + "epoch": 0.9567905672586096, + "kl_loss": 0.22575731575489044, + "loss_ib": 0.008171170018613338, + "step": 3327 + }, + { + "ce_ib": 3.242964506149292, + "ce_orig": 0.6460252404212952, + "epoch": 0.9567905672586096, + "kl_loss": 0.19403883814811707, + "loss_ib": 0.0051833526231348515, + "step": 3327 + }, + { + "ce_ib": 2.7607641220092773, + "ce_orig": 0.8921818733215332, + "epoch": 0.9567905672586096, + "kl_loss": 0.1761491298675537, + "loss_ib": 0.004522255156189203, + "step": 3327 + }, + { + "ce_ib": 5.71606969833374, + "ce_orig": 0.9089986681938171, + "epoch": 0.9570781508375872, + "kl_loss": 0.22054944932460785, + "loss_ib": 0.007921564392745495, + "step": 3328 + }, + { + "ce_ib": 3.564971446990967, + "ce_orig": 0.8326021432876587, + "epoch": 0.9570781508375872, + "kl_loss": 0.22783099114894867, + "loss_ib": 0.00584328081458807, + "step": 3328 + }, + { + "ce_ib": 2.0513248443603516, + "ce_orig": 0.5166671276092529, + "epoch": 0.9570781508375872, + "kl_loss": 0.23683014512062073, + "loss_ib": 0.004419626202434301, + "step": 3328 + }, + { + "ce_ib": 3.149271249771118, + "ce_orig": 0.6055137515068054, + "epoch": 0.9570781508375872, + "kl_loss": 0.1552610993385315, + "loss_ib": 0.0047018821351230145, + "step": 3328 + }, + { + "ce_ib": 3.472010850906372, + "ce_orig": 0.7739956378936768, + "epoch": 0.9573657344165648, + "kl_loss": 0.21875721216201782, + "loss_ib": 0.005659582559019327, + "step": 3329 + }, + { + "ce_ib": 2.73500919342041, + "ce_orig": 0.48869481682777405, + "epoch": 0.9573657344165648, + "kl_loss": 0.13931117951869965, + "loss_ib": 0.004128120839595795, + "step": 3329 + }, + { + "ce_ib": 5.124686241149902, + "ce_orig": 1.3469023704528809, + "epoch": 0.9573657344165648, + "kl_loss": 0.17455336451530457, + "loss_ib": 0.006870219483971596, + "step": 3329 + }, + { + "ce_ib": 6.272923946380615, + "ce_orig": 1.603624701499939, + "epoch": 0.9573657344165648, + "kl_loss": 0.11568522453308105, + "loss_ib": 0.00742977624759078, + "step": 3329 + }, + { + "epoch": 0.9576533179955424, + "grad_norm": 0.13751061260700226, + "learning_rate": 7.971479213936327e-06, + "loss": 0.8471, + "step": 3330 + }, + { + "ce_ib": 5.7705817222595215, + "ce_orig": 1.1312416791915894, + "epoch": 0.9576533179955424, + "kl_loss": 0.1630680114030838, + "loss_ib": 0.007401261944323778, + "step": 3330 + }, + { + "ce_ib": 3.8741226196289062, + "ce_orig": 1.0228335857391357, + "epoch": 0.9576533179955424, + "kl_loss": 0.16136237978935242, + "loss_ib": 0.005487746559083462, + "step": 3330 + }, + { + "ce_ib": 4.409863471984863, + "ce_orig": 1.0570954084396362, + "epoch": 0.9576533179955424, + "kl_loss": 0.1919700801372528, + "loss_ib": 0.00632956437766552, + "step": 3330 + }, + { + "ce_ib": 4.047889709472656, + "ce_orig": 0.8462688326835632, + "epoch": 0.9576533179955424, + "kl_loss": 0.1710982322692871, + "loss_ib": 0.005758871790021658, + "step": 3330 + }, + { + "ce_ib": 3.8696517944335938, + "ce_orig": 0.9009563326835632, + "epoch": 0.95794090157452, + "kl_loss": 0.2026377022266388, + "loss_ib": 0.005896028596907854, + "step": 3331 + }, + { + "ce_ib": 2.4781062602996826, + "ce_orig": 0.37728795409202576, + "epoch": 0.95794090157452, + "kl_loss": 0.18990874290466309, + "loss_ib": 0.004377193748950958, + "step": 3331 + }, + { + "ce_ib": 2.24263072013855, + "ce_orig": 0.5002394914627075, + "epoch": 0.95794090157452, + "kl_loss": 0.1550428569316864, + "loss_ib": 0.0037930591497570276, + "step": 3331 + }, + { + "ce_ib": 3.2624247074127197, + "ce_orig": 0.8132138252258301, + "epoch": 0.95794090157452, + "kl_loss": 0.1991293728351593, + "loss_ib": 0.005253718234598637, + "step": 3331 + }, + { + "ce_ib": 2.9945390224456787, + "ce_orig": 0.6440905332565308, + "epoch": 0.9582284851534978, + "kl_loss": 0.1286853402853012, + "loss_ib": 0.00428139278665185, + "step": 3332 + }, + { + "ce_ib": 2.4472711086273193, + "ce_orig": 0.5263379216194153, + "epoch": 0.9582284851534978, + "kl_loss": 0.15382619202136993, + "loss_ib": 0.003985533025115728, + "step": 3332 + }, + { + "ce_ib": 2.0354511737823486, + "ce_orig": 0.6172226071357727, + "epoch": 0.9582284851534978, + "kl_loss": 0.16216948628425598, + "loss_ib": 0.0036571461241692305, + "step": 3332 + }, + { + "ce_ib": 2.6745996475219727, + "ce_orig": 0.775884211063385, + "epoch": 0.9582284851534978, + "kl_loss": 0.20558007061481476, + "loss_ib": 0.004730400163680315, + "step": 3332 + }, + { + "ce_ib": 2.629009485244751, + "ce_orig": 0.8781623244285583, + "epoch": 0.9585160687324754, + "kl_loss": 0.16361503303050995, + "loss_ib": 0.004265159834176302, + "step": 3333 + }, + { + "ce_ib": 2.374145746231079, + "ce_orig": 0.402930349111557, + "epoch": 0.9585160687324754, + "kl_loss": 0.2111741006374359, + "loss_ib": 0.0044858865439891815, + "step": 3333 + }, + { + "ce_ib": 4.648592472076416, + "ce_orig": 1.0199404954910278, + "epoch": 0.9585160687324754, + "kl_loss": 0.17354649305343628, + "loss_ib": 0.006384057458490133, + "step": 3333 + }, + { + "ce_ib": 3.556013345718384, + "ce_orig": 1.0439256429672241, + "epoch": 0.9585160687324754, + "kl_loss": 0.14509549736976624, + "loss_ib": 0.005006968043744564, + "step": 3333 + }, + { + "ce_ib": 2.5708067417144775, + "ce_orig": 0.5965346097946167, + "epoch": 0.958803652311453, + "kl_loss": 0.13092102110385895, + "loss_ib": 0.003880016738548875, + "step": 3334 + }, + { + "ce_ib": 4.383260250091553, + "ce_orig": 0.9738389849662781, + "epoch": 0.958803652311453, + "kl_loss": 0.1922275424003601, + "loss_ib": 0.006305535789579153, + "step": 3334 + }, + { + "ce_ib": 3.1825900077819824, + "ce_orig": 0.9833239912986755, + "epoch": 0.958803652311453, + "kl_loss": 0.09912486374378204, + "loss_ib": 0.004173838533461094, + "step": 3334 + }, + { + "ce_ib": 4.741343021392822, + "ce_orig": 1.41030752658844, + "epoch": 0.958803652311453, + "kl_loss": 0.22608181834220886, + "loss_ib": 0.007002160884439945, + "step": 3334 + }, + { + "epoch": 0.9590912358904307, + "grad_norm": 0.1599610149860382, + "learning_rate": 7.965233999272725e-06, + "loss": 0.8471, + "step": 3335 + }, + { + "ce_ib": 8.560018539428711, + "ce_orig": 1.0684107542037964, + "epoch": 0.9590912358904307, + "kl_loss": 0.18182486295700073, + "loss_ib": 0.010378267616033554, + "step": 3335 + }, + { + "ce_ib": 7.4935688972473145, + "ce_orig": 1.8815616369247437, + "epoch": 0.9590912358904307, + "kl_loss": 0.2401707023382187, + "loss_ib": 0.00989527627825737, + "step": 3335 + }, + { + "ce_ib": 2.2815144062042236, + "ce_orig": 0.7003307342529297, + "epoch": 0.9590912358904307, + "kl_loss": 0.19060127437114716, + "loss_ib": 0.004187527112662792, + "step": 3335 + }, + { + "ce_ib": 4.188779830932617, + "ce_orig": 1.0827833414077759, + "epoch": 0.9590912358904307, + "kl_loss": 0.14882278442382812, + "loss_ib": 0.0056770071387290955, + "step": 3335 + }, + { + "ce_ib": 3.112778663635254, + "ce_orig": 0.5913746953010559, + "epoch": 0.9593788194694083, + "kl_loss": 0.1941092610359192, + "loss_ib": 0.005053871311247349, + "step": 3336 + }, + { + "ce_ib": 3.0871052742004395, + "ce_orig": 0.8359124660491943, + "epoch": 0.9593788194694083, + "kl_loss": 0.2909771800041199, + "loss_ib": 0.005996876861900091, + "step": 3336 + }, + { + "ce_ib": 7.672001838684082, + "ce_orig": 0.9198316931724548, + "epoch": 0.9593788194694083, + "kl_loss": 0.17829272150993347, + "loss_ib": 0.00945492833852768, + "step": 3336 + }, + { + "ce_ib": 3.362151861190796, + "ce_orig": 0.7023892402648926, + "epoch": 0.9593788194694083, + "kl_loss": 0.16742859780788422, + "loss_ib": 0.005036437883973122, + "step": 3336 + }, + { + "ce_ib": 5.21610689163208, + "ce_orig": 1.4326841831207275, + "epoch": 0.9596664030483859, + "kl_loss": 0.22233888506889343, + "loss_ib": 0.00743949506431818, + "step": 3337 + }, + { + "ce_ib": 5.6459736824035645, + "ce_orig": 1.5197268724441528, + "epoch": 0.9596664030483859, + "kl_loss": 0.22721058130264282, + "loss_ib": 0.007918079383671284, + "step": 3337 + }, + { + "ce_ib": 5.73305606842041, + "ce_orig": 1.086674690246582, + "epoch": 0.9596664030483859, + "kl_loss": 0.19760757684707642, + "loss_ib": 0.007709131576120853, + "step": 3337 + }, + { + "ce_ib": 3.5945684909820557, + "ce_orig": 0.7643541097640991, + "epoch": 0.9596664030483859, + "kl_loss": 0.1993611603975296, + "loss_ib": 0.005588180385529995, + "step": 3337 + }, + { + "ce_ib": 5.860869884490967, + "ce_orig": 1.2037020921707153, + "epoch": 0.9599539866273635, + "kl_loss": 0.18475493788719177, + "loss_ib": 0.00770841958001256, + "step": 3338 + }, + { + "ce_ib": 4.466207981109619, + "ce_orig": 1.0823802947998047, + "epoch": 0.9599539866273635, + "kl_loss": 0.16135983169078827, + "loss_ib": 0.006079806014895439, + "step": 3338 + }, + { + "ce_ib": 4.337945461273193, + "ce_orig": 0.8534349799156189, + "epoch": 0.9599539866273635, + "kl_loss": 0.25821205973625183, + "loss_ib": 0.006920065730810165, + "step": 3338 + }, + { + "ce_ib": 4.330248832702637, + "ce_orig": 0.9073301553726196, + "epoch": 0.9599539866273635, + "kl_loss": 0.14878001809120178, + "loss_ib": 0.005818048492074013, + "step": 3338 + }, + { + "ce_ib": 2.238250732421875, + "ce_orig": 0.5877565145492554, + "epoch": 0.9602415702063413, + "kl_loss": 0.42403313517570496, + "loss_ib": 0.006478582043200731, + "step": 3339 + }, + { + "ce_ib": 2.0910425186157227, + "ce_orig": 0.6435688138008118, + "epoch": 0.9602415702063413, + "kl_loss": 0.17616117000579834, + "loss_ib": 0.003852654015645385, + "step": 3339 + }, + { + "ce_ib": 6.286886692047119, + "ce_orig": 0.9766400456428528, + "epoch": 0.9602415702063413, + "kl_loss": 0.21520666778087616, + "loss_ib": 0.008438953198492527, + "step": 3339 + }, + { + "ce_ib": 4.787293910980225, + "ce_orig": 1.2060996294021606, + "epoch": 0.9602415702063413, + "kl_loss": 0.17652097344398499, + "loss_ib": 0.006552503444254398, + "step": 3339 + }, + { + "epoch": 0.9605291537853189, + "grad_norm": 0.138056680560112, + "learning_rate": 7.958981640672173e-06, + "loss": 0.8926, + "step": 3340 + }, + { + "ce_ib": 3.3602216243743896, + "ce_orig": 0.7983546853065491, + "epoch": 0.9605291537853189, + "kl_loss": 0.10532093048095703, + "loss_ib": 0.004413431044667959, + "step": 3340 + }, + { + "ce_ib": 2.782367706298828, + "ce_orig": 0.49188390374183655, + "epoch": 0.9605291537853189, + "kl_loss": 0.2057494819164276, + "loss_ib": 0.004839862696826458, + "step": 3340 + }, + { + "ce_ib": 4.5581440925598145, + "ce_orig": 1.0476653575897217, + "epoch": 0.9605291537853189, + "kl_loss": 0.23116056621074677, + "loss_ib": 0.0068697500973939896, + "step": 3340 + }, + { + "ce_ib": 3.6803126335144043, + "ce_orig": 0.7927830219268799, + "epoch": 0.9605291537853189, + "kl_loss": 0.17640936374664307, + "loss_ib": 0.0054444060660898685, + "step": 3340 + }, + { + "ce_ib": 2.515166997909546, + "ce_orig": 0.6794766783714294, + "epoch": 0.9608167373642965, + "kl_loss": 0.13821670413017273, + "loss_ib": 0.0038973339833319187, + "step": 3341 + }, + { + "ce_ib": 2.9368526935577393, + "ce_orig": 0.43503955006599426, + "epoch": 0.9608167373642965, + "kl_loss": 0.22648566961288452, + "loss_ib": 0.0052017089910805225, + "step": 3341 + }, + { + "ce_ib": 4.5536980628967285, + "ce_orig": 1.1046017408370972, + "epoch": 0.9608167373642965, + "kl_loss": 0.1458209753036499, + "loss_ib": 0.006011907942593098, + "step": 3341 + }, + { + "ce_ib": 3.7076070308685303, + "ce_orig": 0.7159382104873657, + "epoch": 0.9608167373642965, + "kl_loss": 0.2712736427783966, + "loss_ib": 0.006420343182981014, + "step": 3341 + }, + { + "ce_ib": 3.0194427967071533, + "ce_orig": 0.7834783792495728, + "epoch": 0.9611043209432741, + "kl_loss": 0.12505289912223816, + "loss_ib": 0.00426997197791934, + "step": 3342 + }, + { + "ce_ib": 1.77254319190979, + "ce_orig": 0.4160774052143097, + "epoch": 0.9611043209432741, + "kl_loss": 0.29630327224731445, + "loss_ib": 0.0047355759888887405, + "step": 3342 + }, + { + "ce_ib": 2.907874584197998, + "ce_orig": 0.8045170903205872, + "epoch": 0.9611043209432741, + "kl_loss": 0.10379981994628906, + "loss_ib": 0.003945872653275728, + "step": 3342 + }, + { + "ce_ib": 1.9220457077026367, + "ce_orig": 0.5806375741958618, + "epoch": 0.9611043209432741, + "kl_loss": 0.1516657918691635, + "loss_ib": 0.003438703715801239, + "step": 3342 + }, + { + "ce_ib": 2.4114573001861572, + "ce_orig": 0.4854116439819336, + "epoch": 0.9613919045222518, + "kl_loss": 0.18413493037223816, + "loss_ib": 0.004252806305885315, + "step": 3343 + }, + { + "ce_ib": 5.96621036529541, + "ce_orig": 1.5207397937774658, + "epoch": 0.9613919045222518, + "kl_loss": 0.2832886576652527, + "loss_ib": 0.008799096569418907, + "step": 3343 + }, + { + "ce_ib": 2.9180362224578857, + "ce_orig": 0.7027631402015686, + "epoch": 0.9613919045222518, + "kl_loss": 0.12673768401145935, + "loss_ib": 0.004185413010418415, + "step": 3343 + }, + { + "ce_ib": 3.2849018573760986, + "ce_orig": 0.8746554851531982, + "epoch": 0.9613919045222518, + "kl_loss": 0.1829775869846344, + "loss_ib": 0.005114677827805281, + "step": 3343 + }, + { + "ce_ib": 2.239368438720703, + "ce_orig": 0.3595240116119385, + "epoch": 0.9616794881012294, + "kl_loss": 0.13996171951293945, + "loss_ib": 0.0036389855667948723, + "step": 3344 + }, + { + "ce_ib": 3.295833110809326, + "ce_orig": 0.6784860491752625, + "epoch": 0.9616794881012294, + "kl_loss": 0.18765786290168762, + "loss_ib": 0.005172411911189556, + "step": 3344 + }, + { + "ce_ib": 4.191170692443848, + "ce_orig": 1.205702781677246, + "epoch": 0.9616794881012294, + "kl_loss": 0.1542927324771881, + "loss_ib": 0.005734097678214312, + "step": 3344 + }, + { + "ce_ib": 1.495283842086792, + "ce_orig": 0.2672934830188751, + "epoch": 0.9616794881012294, + "kl_loss": 0.3836219310760498, + "loss_ib": 0.005331503227353096, + "step": 3344 + }, + { + "epoch": 0.961967071680207, + "grad_norm": 0.15038004517555237, + "learning_rate": 7.952722153198054e-06, + "loss": 0.7499, + "step": 3345 + }, + { + "ce_ib": 3.9381556510925293, + "ce_orig": 1.1041309833526611, + "epoch": 0.961967071680207, + "kl_loss": 0.18802088499069214, + "loss_ib": 0.005818364676088095, + "step": 3345 + }, + { + "ce_ib": 4.711825370788574, + "ce_orig": 0.9598775506019592, + "epoch": 0.961967071680207, + "kl_loss": 0.2421858310699463, + "loss_ib": 0.007133684121072292, + "step": 3345 + }, + { + "ce_ib": 3.777219772338867, + "ce_orig": 0.8183223605155945, + "epoch": 0.961967071680207, + "kl_loss": 0.1856333613395691, + "loss_ib": 0.005633553024381399, + "step": 3345 + }, + { + "ce_ib": 1.4636446237564087, + "ce_orig": 0.2617383599281311, + "epoch": 0.961967071680207, + "kl_loss": 0.414650022983551, + "loss_ib": 0.005610144697129726, + "step": 3345 + }, + { + "ce_ib": 2.814807653427124, + "ce_orig": 0.6062304973602295, + "epoch": 0.9622546552591847, + "kl_loss": 0.10703858733177185, + "loss_ib": 0.003885193495079875, + "step": 3346 + }, + { + "ce_ib": 5.970531940460205, + "ce_orig": 1.6252530813217163, + "epoch": 0.9622546552591847, + "kl_loss": 0.14564043283462524, + "loss_ib": 0.007426936645060778, + "step": 3346 + }, + { + "ce_ib": 2.9478611946105957, + "ce_orig": 0.7712130546569824, + "epoch": 0.9622546552591847, + "kl_loss": 0.11889441311359406, + "loss_ib": 0.004136805422604084, + "step": 3346 + }, + { + "ce_ib": 5.020895481109619, + "ce_orig": 0.7788591980934143, + "epoch": 0.9622546552591847, + "kl_loss": 0.24401207268238068, + "loss_ib": 0.0074610160663723946, + "step": 3346 + }, + { + "ce_ib": 2.651262044906616, + "ce_orig": 0.4915289878845215, + "epoch": 0.9625422388381624, + "kl_loss": 0.0809020847082138, + "loss_ib": 0.0034602826926857233, + "step": 3347 + }, + { + "ce_ib": 4.273843765258789, + "ce_orig": 1.1232596635818481, + "epoch": 0.9625422388381624, + "kl_loss": 0.23293714225292206, + "loss_ib": 0.006603215355426073, + "step": 3347 + }, + { + "ce_ib": 3.437089681625366, + "ce_orig": 0.9196497797966003, + "epoch": 0.9625422388381624, + "kl_loss": 0.14865879714488983, + "loss_ib": 0.004923677537590265, + "step": 3347 + }, + { + "ce_ib": 1.9261497259140015, + "ce_orig": 0.7335602641105652, + "epoch": 0.9625422388381624, + "kl_loss": 0.1649097204208374, + "loss_ib": 0.0035752467811107635, + "step": 3347 + }, + { + "ce_ib": 4.4518351554870605, + "ce_orig": 1.1466946601867676, + "epoch": 0.96282982241714, + "kl_loss": 0.1860491931438446, + "loss_ib": 0.0063123274594545364, + "step": 3348 + }, + { + "ce_ib": 5.247130393981934, + "ce_orig": 1.314386248588562, + "epoch": 0.96282982241714, + "kl_loss": 0.2484295517206192, + "loss_ib": 0.007731425575911999, + "step": 3348 + }, + { + "ce_ib": 5.329336166381836, + "ce_orig": 1.432247519493103, + "epoch": 0.96282982241714, + "kl_loss": 0.22348466515541077, + "loss_ib": 0.007564183324575424, + "step": 3348 + }, + { + "ce_ib": 2.3655598163604736, + "ce_orig": 0.7160993814468384, + "epoch": 0.96282982241714, + "kl_loss": 0.0977872982621193, + "loss_ib": 0.0033434326760470867, + "step": 3348 + }, + { + "ce_ib": 4.793315887451172, + "ce_orig": 0.942450761795044, + "epoch": 0.9631174059961176, + "kl_loss": 0.2303760051727295, + "loss_ib": 0.007097076158970594, + "step": 3349 + }, + { + "ce_ib": 4.032739162445068, + "ce_orig": 0.892867922782898, + "epoch": 0.9631174059961176, + "kl_loss": 0.18798036873340607, + "loss_ib": 0.005912542808800936, + "step": 3349 + }, + { + "ce_ib": 2.3579089641571045, + "ce_orig": 0.6936837434768677, + "epoch": 0.9631174059961176, + "kl_loss": 0.1387128382921219, + "loss_ib": 0.003745037131011486, + "step": 3349 + }, + { + "ce_ib": 3.2007710933685303, + "ce_orig": 0.8985721468925476, + "epoch": 0.9631174059961176, + "kl_loss": 0.1322091817855835, + "loss_ib": 0.00452286284416914, + "step": 3349 + }, + { + "epoch": 0.9634049895750952, + "grad_norm": 0.13120298087596893, + "learning_rate": 7.946455551930928e-06, + "loss": 0.8316, + "step": 3350 + }, + { + "ce_ib": 2.8877744674682617, + "ce_orig": 0.7808409929275513, + "epoch": 0.9634049895750952, + "kl_loss": 0.15923844277858734, + "loss_ib": 0.004480158444494009, + "step": 3350 + }, + { + "ce_ib": 3.7844133377075195, + "ce_orig": 0.6673776507377625, + "epoch": 0.9634049895750952, + "kl_loss": 0.20061525702476501, + "loss_ib": 0.005790566094219685, + "step": 3350 + }, + { + "ce_ib": 5.319087505340576, + "ce_orig": 1.2243086099624634, + "epoch": 0.9634049895750952, + "kl_loss": 0.19309768080711365, + "loss_ib": 0.007250064518302679, + "step": 3350 + }, + { + "ce_ib": 3.3471953868865967, + "ce_orig": 0.8923193216323853, + "epoch": 0.9634049895750952, + "kl_loss": 0.13536477088928223, + "loss_ib": 0.004700842779129744, + "step": 3350 + }, + { + "ce_ib": 6.235105037689209, + "ce_orig": 1.278998851776123, + "epoch": 0.9636925731540729, + "kl_loss": 0.14827679097652435, + "loss_ib": 0.007717873435467482, + "step": 3351 + }, + { + "ce_ib": 3.929314136505127, + "ce_orig": 0.688569962978363, + "epoch": 0.9636925731540729, + "kl_loss": 0.2550385296344757, + "loss_ib": 0.00647969963029027, + "step": 3351 + }, + { + "ce_ib": 5.3154425621032715, + "ce_orig": 1.2705579996109009, + "epoch": 0.9636925731540729, + "kl_loss": 0.1211872547864914, + "loss_ib": 0.006527315359562635, + "step": 3351 + }, + { + "ce_ib": 3.2654612064361572, + "ce_orig": 0.6614538431167603, + "epoch": 0.9636925731540729, + "kl_loss": 0.22071269154548645, + "loss_ib": 0.00547258835285902, + "step": 3351 + }, + { + "ce_ib": 4.358907699584961, + "ce_orig": 0.6845752000808716, + "epoch": 0.9639801567330506, + "kl_loss": 0.23551151156425476, + "loss_ib": 0.006714022718369961, + "step": 3352 + }, + { + "ce_ib": 3.0277130603790283, + "ce_orig": 0.5380727648735046, + "epoch": 0.9639801567330506, + "kl_loss": 0.28185606002807617, + "loss_ib": 0.005846273619681597, + "step": 3352 + }, + { + "ce_ib": 5.209164619445801, + "ce_orig": 1.0570225715637207, + "epoch": 0.9639801567330506, + "kl_loss": 0.1704956293106079, + "loss_ib": 0.0069141206331551075, + "step": 3352 + }, + { + "ce_ib": 6.411062717437744, + "ce_orig": 1.5764851570129395, + "epoch": 0.9639801567330506, + "kl_loss": 0.1690622866153717, + "loss_ib": 0.008101685903966427, + "step": 3352 + }, + { + "ce_ib": 3.3394668102264404, + "ce_orig": 0.9404020309448242, + "epoch": 0.9642677403120282, + "kl_loss": 0.19451195001602173, + "loss_ib": 0.005284585990011692, + "step": 3353 + }, + { + "ce_ib": 4.084068298339844, + "ce_orig": 1.2055480480194092, + "epoch": 0.9642677403120282, + "kl_loss": 0.1556939035654068, + "loss_ib": 0.0056410073302686214, + "step": 3353 + }, + { + "ce_ib": 3.417381763458252, + "ce_orig": 0.5781357884407043, + "epoch": 0.9642677403120282, + "kl_loss": 0.1839676797389984, + "loss_ib": 0.005257058423012495, + "step": 3353 + }, + { + "ce_ib": 5.816256999969482, + "ce_orig": 1.5622901916503906, + "epoch": 0.9642677403120282, + "kl_loss": 0.1858428716659546, + "loss_ib": 0.007674685679376125, + "step": 3353 + }, + { + "ce_ib": 4.6879191398620605, + "ce_orig": 1.3330848217010498, + "epoch": 0.9645553238910058, + "kl_loss": 0.13744190335273743, + "loss_ib": 0.006062338128685951, + "step": 3354 + }, + { + "ce_ib": 3.9194741249084473, + "ce_orig": 1.0554413795471191, + "epoch": 0.9645553238910058, + "kl_loss": 0.19503743946552277, + "loss_ib": 0.005869848653674126, + "step": 3354 + }, + { + "ce_ib": 3.0617318153381348, + "ce_orig": 0.6321820616722107, + "epoch": 0.9645553238910058, + "kl_loss": 0.19400176405906677, + "loss_ib": 0.005001749377697706, + "step": 3354 + }, + { + "ce_ib": 3.1731231212615967, + "ce_orig": 0.8027305603027344, + "epoch": 0.9645553238910058, + "kl_loss": 0.202096089720726, + "loss_ib": 0.005194083787500858, + "step": 3354 + }, + { + "epoch": 0.9648429074699835, + "grad_norm": 0.1464298814535141, + "learning_rate": 7.94018185196849e-06, + "loss": 0.8727, + "step": 3355 + }, + { + "ce_ib": 5.217344284057617, + "ce_orig": 0.5817245244979858, + "epoch": 0.9648429074699835, + "kl_loss": 0.5834470391273499, + "loss_ib": 0.011051814071834087, + "step": 3355 + }, + { + "ce_ib": 2.6196415424346924, + "ce_orig": 0.6897908449172974, + "epoch": 0.9648429074699835, + "kl_loss": 0.08933120220899582, + "loss_ib": 0.0035129536408931017, + "step": 3355 + }, + { + "ce_ib": 2.217120409011841, + "ce_orig": 0.44381192326545715, + "epoch": 0.9648429074699835, + "kl_loss": 0.1777210384607315, + "loss_ib": 0.0039943307638168335, + "step": 3355 + }, + { + "ce_ib": 2.421926975250244, + "ce_orig": 0.4178295433521271, + "epoch": 0.9648429074699835, + "kl_loss": 0.1292649656534195, + "loss_ib": 0.0037145765963941813, + "step": 3355 + }, + { + "ce_ib": 3.9827005863189697, + "ce_orig": 0.9900867342948914, + "epoch": 0.9651304910489611, + "kl_loss": 0.17903345823287964, + "loss_ib": 0.00577303534373641, + "step": 3356 + }, + { + "ce_ib": 4.562921047210693, + "ce_orig": 1.074830412864685, + "epoch": 0.9651304910489611, + "kl_loss": 0.30940622091293335, + "loss_ib": 0.007656983099877834, + "step": 3356 + }, + { + "ce_ib": 5.6238579750061035, + "ce_orig": 1.400129795074463, + "epoch": 0.9651304910489611, + "kl_loss": 0.34523630142211914, + "loss_ib": 0.009076220914721489, + "step": 3356 + }, + { + "ce_ib": 5.703523635864258, + "ce_orig": 0.8231937885284424, + "epoch": 0.9651304910489611, + "kl_loss": 0.22288912534713745, + "loss_ib": 0.00793241523206234, + "step": 3356 + }, + { + "ce_ib": 5.200591087341309, + "ce_orig": 0.8650156855583191, + "epoch": 0.9654180746279387, + "kl_loss": 0.2158549726009369, + "loss_ib": 0.007359141018241644, + "step": 3357 + }, + { + "ce_ib": 3.186631441116333, + "ce_orig": 0.5314891934394836, + "epoch": 0.9654180746279387, + "kl_loss": 0.26238203048706055, + "loss_ib": 0.005810451228171587, + "step": 3357 + }, + { + "ce_ib": 5.241153717041016, + "ce_orig": 1.2844188213348389, + "epoch": 0.9654180746279387, + "kl_loss": 0.16183149814605713, + "loss_ib": 0.006859468761831522, + "step": 3357 + }, + { + "ce_ib": 5.902309417724609, + "ce_orig": 0.9100503325462341, + "epoch": 0.9654180746279387, + "kl_loss": 0.1153629943728447, + "loss_ib": 0.007055939175188541, + "step": 3357 + }, + { + "ce_ib": 3.521975517272949, + "ce_orig": 0.8331058025360107, + "epoch": 0.9657056582069163, + "kl_loss": 0.1589934229850769, + "loss_ib": 0.0051119099371135235, + "step": 3358 + }, + { + "ce_ib": 3.0896317958831787, + "ce_orig": 0.6007292866706848, + "epoch": 0.9657056582069163, + "kl_loss": 0.15735284984111786, + "loss_ib": 0.004663160536438227, + "step": 3358 + }, + { + "ce_ib": 3.339423179626465, + "ce_orig": 0.8296361565589905, + "epoch": 0.9657056582069163, + "kl_loss": 0.1784326732158661, + "loss_ib": 0.00512374984100461, + "step": 3358 + }, + { + "ce_ib": 3.3599908351898193, + "ce_orig": 0.9564060568809509, + "epoch": 0.9657056582069163, + "kl_loss": 0.1424582600593567, + "loss_ib": 0.004784573335200548, + "step": 3358 + }, + { + "ce_ib": 6.614162921905518, + "ce_orig": 1.235037922859192, + "epoch": 0.9659932417858941, + "kl_loss": 0.22569748759269714, + "loss_ib": 0.008871138095855713, + "step": 3359 + }, + { + "ce_ib": 4.196810722351074, + "ce_orig": 1.0288525819778442, + "epoch": 0.9659932417858941, + "kl_loss": 0.21771478652954102, + "loss_ib": 0.00637395866215229, + "step": 3359 + }, + { + "ce_ib": 4.983139991760254, + "ce_orig": 1.0416717529296875, + "epoch": 0.9659932417858941, + "kl_loss": 0.15049228072166443, + "loss_ib": 0.0064880624413490295, + "step": 3359 + }, + { + "ce_ib": 2.642899990081787, + "ce_orig": 0.6655712723731995, + "epoch": 0.9659932417858941, + "kl_loss": 0.10723969340324402, + "loss_ib": 0.0037152969744056463, + "step": 3359 + }, + { + "epoch": 0.9662808253648717, + "grad_norm": 0.12460885941982269, + "learning_rate": 7.933901068425539e-06, + "loss": 0.8004, + "step": 3360 + }, + { + "ce_ib": 3.498410701751709, + "ce_orig": 0.7095348238945007, + "epoch": 0.9662808253648717, + "kl_loss": 0.34302595257759094, + "loss_ib": 0.006928670220077038, + "step": 3360 + }, + { + "ce_ib": 1.611122488975525, + "ce_orig": 0.336913138628006, + "epoch": 0.9662808253648717, + "kl_loss": 0.16640301048755646, + "loss_ib": 0.0032751522958278656, + "step": 3360 + }, + { + "ce_ib": 5.609724044799805, + "ce_orig": 1.0478085279464722, + "epoch": 0.9662808253648717, + "kl_loss": 0.2252522110939026, + "loss_ib": 0.007862245664000511, + "step": 3360 + }, + { + "ce_ib": 3.367609739303589, + "ce_orig": 0.6287264227867126, + "epoch": 0.9662808253648717, + "kl_loss": 0.18432840704917908, + "loss_ib": 0.00521089369431138, + "step": 3360 + }, + { + "ce_ib": 2.3762903213500977, + "ce_orig": 0.7577813863754272, + "epoch": 0.9665684089438493, + "kl_loss": 0.11978559195995331, + "loss_ib": 0.0035741461906582117, + "step": 3361 + }, + { + "ce_ib": 3.5802433490753174, + "ce_orig": 0.7990947961807251, + "epoch": 0.9665684089438493, + "kl_loss": 0.2111099660396576, + "loss_ib": 0.0056913429871201515, + "step": 3361 + }, + { + "ce_ib": 3.1025185585021973, + "ce_orig": 0.8050031065940857, + "epoch": 0.9665684089438493, + "kl_loss": 0.20467044413089752, + "loss_ib": 0.005149222910404205, + "step": 3361 + }, + { + "ce_ib": 3.608560562133789, + "ce_orig": 0.831062912940979, + "epoch": 0.9665684089438493, + "kl_loss": 0.1669946014881134, + "loss_ib": 0.005278506316244602, + "step": 3361 + }, + { + "ce_ib": 5.047640323638916, + "ce_orig": 1.3900291919708252, + "epoch": 0.966855992522827, + "kl_loss": 0.23774763941764832, + "loss_ib": 0.007425115909427404, + "step": 3362 + }, + { + "ce_ib": 3.105361223220825, + "ce_orig": 0.8999671339988708, + "epoch": 0.966855992522827, + "kl_loss": 0.19724927842617035, + "loss_ib": 0.005077853798866272, + "step": 3362 + }, + { + "ce_ib": 3.2547314167022705, + "ce_orig": 0.7072083950042725, + "epoch": 0.966855992522827, + "kl_loss": 0.2513149380683899, + "loss_ib": 0.0057678804732859135, + "step": 3362 + }, + { + "ce_ib": 4.4101457595825195, + "ce_orig": 1.1611377000808716, + "epoch": 0.966855992522827, + "kl_loss": 0.15095333755016327, + "loss_ib": 0.005919679068028927, + "step": 3362 + }, + { + "ce_ib": 6.482115745544434, + "ce_orig": 1.5417991876602173, + "epoch": 0.9671435761018046, + "kl_loss": 0.1471179723739624, + "loss_ib": 0.007953295484185219, + "step": 3363 + }, + { + "ce_ib": 4.533393383026123, + "ce_orig": 0.6997305750846863, + "epoch": 0.9671435761018046, + "kl_loss": 0.18520604074001312, + "loss_ib": 0.006385453976690769, + "step": 3363 + }, + { + "ce_ib": 3.700269937515259, + "ce_orig": 1.2844419479370117, + "epoch": 0.9671435761018046, + "kl_loss": 0.18817244470119476, + "loss_ib": 0.005581994540989399, + "step": 3363 + }, + { + "ce_ib": 3.8659002780914307, + "ce_orig": 0.5935215950012207, + "epoch": 0.9671435761018046, + "kl_loss": 0.25569334626197815, + "loss_ib": 0.006422833539545536, + "step": 3363 + }, + { + "ce_ib": 4.813815116882324, + "ce_orig": 1.3811302185058594, + "epoch": 0.9674311596807822, + "kl_loss": 0.1554306000471115, + "loss_ib": 0.006368120666593313, + "step": 3364 + }, + { + "ce_ib": 4.219429016113281, + "ce_orig": 1.1314771175384521, + "epoch": 0.9674311596807822, + "kl_loss": 0.2126169204711914, + "loss_ib": 0.006345598492771387, + "step": 3364 + }, + { + "ce_ib": 5.057472229003906, + "ce_orig": 1.397242784500122, + "epoch": 0.9674311596807822, + "kl_loss": 0.24882960319519043, + "loss_ib": 0.007545768283307552, + "step": 3364 + }, + { + "ce_ib": 4.099944114685059, + "ce_orig": 0.576519250869751, + "epoch": 0.9674311596807822, + "kl_loss": 0.19097080826759338, + "loss_ib": 0.006009652279317379, + "step": 3364 + }, + { + "epoch": 0.9677187432597598, + "grad_norm": 0.13225588202476501, + "learning_rate": 7.927613216433942e-06, + "loss": 0.8814, + "step": 3365 + }, + { + "ce_ib": 4.481527328491211, + "ce_orig": 1.1641849279403687, + "epoch": 0.9677187432597598, + "kl_loss": 0.20430701971054077, + "loss_ib": 0.00652459729462862, + "step": 3365 + }, + { + "ce_ib": 6.040762901306152, + "ce_orig": 0.7448489665985107, + "epoch": 0.9677187432597598, + "kl_loss": 0.19224797189235687, + "loss_ib": 0.007963242940604687, + "step": 3365 + }, + { + "ce_ib": 2.643749237060547, + "ce_orig": 0.6895259618759155, + "epoch": 0.9677187432597598, + "kl_loss": 0.12853817641735077, + "loss_ib": 0.003929130733013153, + "step": 3365 + }, + { + "ce_ib": 3.998896598815918, + "ce_orig": 1.1990605592727661, + "epoch": 0.9677187432597598, + "kl_loss": 0.1612451672554016, + "loss_ib": 0.00561134796589613, + "step": 3365 + }, + { + "ce_ib": 1.7967687845230103, + "ce_orig": 0.4065140187740326, + "epoch": 0.9680063268387376, + "kl_loss": 0.12538346648216248, + "loss_ib": 0.0030506032053381205, + "step": 3366 + }, + { + "ce_ib": 2.7919371128082275, + "ce_orig": 0.760013997554779, + "epoch": 0.9680063268387376, + "kl_loss": 0.19539996981620789, + "loss_ib": 0.004745936952531338, + "step": 3366 + }, + { + "ce_ib": 5.360004425048828, + "ce_orig": 0.8262405395507812, + "epoch": 0.9680063268387376, + "kl_loss": 0.19646190106868744, + "loss_ib": 0.00732462340965867, + "step": 3366 + }, + { + "ce_ib": 2.5690841674804688, + "ce_orig": 0.5322592854499817, + "epoch": 0.9680063268387376, + "kl_loss": 0.19171074032783508, + "loss_ib": 0.004486191552132368, + "step": 3366 + }, + { + "ce_ib": 3.5332329273223877, + "ce_orig": 0.6923716068267822, + "epoch": 0.9682939104177152, + "kl_loss": 0.18643233180046082, + "loss_ib": 0.005397556349635124, + "step": 3367 + }, + { + "ce_ib": 2.137251138687134, + "ce_orig": 0.4419175088405609, + "epoch": 0.9682939104177152, + "kl_loss": 0.1389484852552414, + "loss_ib": 0.003526735818013549, + "step": 3367 + }, + { + "ce_ib": 3.3930022716522217, + "ce_orig": 0.66623854637146, + "epoch": 0.9682939104177152, + "kl_loss": 0.2919387221336365, + "loss_ib": 0.006312389392405748, + "step": 3367 + }, + { + "ce_ib": 1.9669915437698364, + "ce_orig": 0.280337393283844, + "epoch": 0.9682939104177152, + "kl_loss": 0.14466644823551178, + "loss_ib": 0.0034136560279875994, + "step": 3367 + }, + { + "ce_ib": 2.3636677265167236, + "ce_orig": 0.669973611831665, + "epoch": 0.9685814939966928, + "kl_loss": 0.18152929842472076, + "loss_ib": 0.004178960807621479, + "step": 3368 + }, + { + "ce_ib": 3.0850510597229004, + "ce_orig": 0.7594128847122192, + "epoch": 0.9685814939966928, + "kl_loss": 0.17985433340072632, + "loss_ib": 0.00488359434530139, + "step": 3368 + }, + { + "ce_ib": 6.024084091186523, + "ce_orig": 1.2403037548065186, + "epoch": 0.9685814939966928, + "kl_loss": 0.14014658331871033, + "loss_ib": 0.007425549905747175, + "step": 3368 + }, + { + "ce_ib": 5.361085891723633, + "ce_orig": 1.2129536867141724, + "epoch": 0.9685814939966928, + "kl_loss": 0.1495765596628189, + "loss_ib": 0.006856851279735565, + "step": 3368 + }, + { + "ce_ib": 4.767640590667725, + "ce_orig": 1.0102713108062744, + "epoch": 0.9688690775756704, + "kl_loss": 0.15812979638576508, + "loss_ib": 0.006348938215523958, + "step": 3369 + }, + { + "ce_ib": 4.018687725067139, + "ce_orig": 0.8509500622749329, + "epoch": 0.9688690775756704, + "kl_loss": 0.08308425545692444, + "loss_ib": 0.004849530290812254, + "step": 3369 + }, + { + "ce_ib": 3.0332224369049072, + "ce_orig": 0.8657724261283875, + "epoch": 0.9688690775756704, + "kl_loss": 0.17610390484333038, + "loss_ib": 0.004794261418282986, + "step": 3369 + }, + { + "ce_ib": 3.214836835861206, + "ce_orig": 0.8655975461006165, + "epoch": 0.9688690775756704, + "kl_loss": 0.14806881546974182, + "loss_ib": 0.004695524927228689, + "step": 3369 + }, + { + "epoch": 0.969156661154648, + "grad_norm": 0.16944774985313416, + "learning_rate": 7.921318311142594e-06, + "loss": 0.8638, + "step": 3370 + }, + { + "ce_ib": 6.1859822273254395, + "ce_orig": 1.4082434177398682, + "epoch": 0.969156661154648, + "kl_loss": 0.14949750900268555, + "loss_ib": 0.007680957205593586, + "step": 3370 + }, + { + "ce_ib": 5.258711814880371, + "ce_orig": 0.8352487087249756, + "epoch": 0.969156661154648, + "kl_loss": 0.2584083676338196, + "loss_ib": 0.007842795923352242, + "step": 3370 + }, + { + "ce_ib": 5.019040107727051, + "ce_orig": 0.9207519888877869, + "epoch": 0.969156661154648, + "kl_loss": 0.37928497791290283, + "loss_ib": 0.0088118901476264, + "step": 3370 + }, + { + "ce_ib": 3.6812098026275635, + "ce_orig": 0.4733133614063263, + "epoch": 0.969156661154648, + "kl_loss": 0.22392170131206512, + "loss_ib": 0.005920426920056343, + "step": 3370 + }, + { + "ce_ib": 2.489124298095703, + "ce_orig": 0.7511807084083557, + "epoch": 0.9694442447336257, + "kl_loss": 0.1577809900045395, + "loss_ib": 0.00406693434342742, + "step": 3371 + }, + { + "ce_ib": 2.0912439823150635, + "ce_orig": 0.726611316204071, + "epoch": 0.9694442447336257, + "kl_loss": 0.110408715903759, + "loss_ib": 0.0031953309662640095, + "step": 3371 + }, + { + "ce_ib": 4.86668062210083, + "ce_orig": 1.073229432106018, + "epoch": 0.9694442447336257, + "kl_loss": 0.1771741658449173, + "loss_ib": 0.006638422608375549, + "step": 3371 + }, + { + "ce_ib": 1.598490834236145, + "ce_orig": 0.4849386513233185, + "epoch": 0.9694442447336257, + "kl_loss": 0.13831256330013275, + "loss_ib": 0.0029816164169460535, + "step": 3371 + }, + { + "ce_ib": 1.6163724660873413, + "ce_orig": 0.42712095379829407, + "epoch": 0.9697318283126034, + "kl_loss": 0.17094750702381134, + "loss_ib": 0.0033258472103625536, + "step": 3372 + }, + { + "ce_ib": 4.811914920806885, + "ce_orig": 1.1536673307418823, + "epoch": 0.9697318283126034, + "kl_loss": 0.15536358952522278, + "loss_ib": 0.006365550681948662, + "step": 3372 + }, + { + "ce_ib": 5.322317123413086, + "ce_orig": 0.9854094386100769, + "epoch": 0.9697318283126034, + "kl_loss": 0.19234752655029297, + "loss_ib": 0.007245792541652918, + "step": 3372 + }, + { + "ce_ib": 3.307384729385376, + "ce_orig": 0.5329270958900452, + "epoch": 0.9697318283126034, + "kl_loss": 0.15921121835708618, + "loss_ib": 0.004899497143924236, + "step": 3372 + }, + { + "ce_ib": 2.5357015132904053, + "ce_orig": 0.5346412062644958, + "epoch": 0.970019411891581, + "kl_loss": 0.28000667691230774, + "loss_ib": 0.005335768219083548, + "step": 3373 + }, + { + "ce_ib": 2.8606648445129395, + "ce_orig": 0.5403164029121399, + "epoch": 0.970019411891581, + "kl_loss": 0.17318080365657806, + "loss_ib": 0.004592472687363625, + "step": 3373 + }, + { + "ce_ib": 4.227109909057617, + "ce_orig": 0.7562904953956604, + "epoch": 0.970019411891581, + "kl_loss": 0.14068825542926788, + "loss_ib": 0.005633992608636618, + "step": 3373 + }, + { + "ce_ib": 2.0514771938323975, + "ce_orig": 0.5329060554504395, + "epoch": 0.970019411891581, + "kl_loss": 0.15046603977680206, + "loss_ib": 0.0035561376716941595, + "step": 3373 + }, + { + "ce_ib": 3.657341241836548, + "ce_orig": 0.4941119849681854, + "epoch": 0.9703069954705587, + "kl_loss": 0.21991752088069916, + "loss_ib": 0.005856516305357218, + "step": 3374 + }, + { + "ce_ib": 3.5676252841949463, + "ce_orig": 0.7633748650550842, + "epoch": 0.9703069954705587, + "kl_loss": 0.2628079652786255, + "loss_ib": 0.00619570491835475, + "step": 3374 + }, + { + "ce_ib": 2.5931241512298584, + "ce_orig": 0.38618898391723633, + "epoch": 0.9703069954705587, + "kl_loss": 0.2094118893146515, + "loss_ib": 0.004687243141233921, + "step": 3374 + }, + { + "ce_ib": 5.282919406890869, + "ce_orig": 1.1541601419448853, + "epoch": 0.9703069954705587, + "kl_loss": 0.20672863721847534, + "loss_ib": 0.007350205443799496, + "step": 3374 + }, + { + "epoch": 0.9705945790495363, + "grad_norm": 0.13194169104099274, + "learning_rate": 7.91501636771738e-06, + "loss": 0.843, + "step": 3375 + }, + { + "ce_ib": 4.210152626037598, + "ce_orig": 1.2796735763549805, + "epoch": 0.9705945790495363, + "kl_loss": 0.15165278315544128, + "loss_ib": 0.005726680625230074, + "step": 3375 + }, + { + "ce_ib": 3.0663700103759766, + "ce_orig": 0.7793062329292297, + "epoch": 0.9705945790495363, + "kl_loss": 0.18150638043880463, + "loss_ib": 0.004881433676928282, + "step": 3375 + }, + { + "ce_ib": 6.188352108001709, + "ce_orig": 1.6772207021713257, + "epoch": 0.9705945790495363, + "kl_loss": 0.24646809697151184, + "loss_ib": 0.008653032593429089, + "step": 3375 + }, + { + "ce_ib": 5.2002339363098145, + "ce_orig": 0.8174858689308167, + "epoch": 0.9705945790495363, + "kl_loss": 0.22224009037017822, + "loss_ib": 0.007422634866088629, + "step": 3375 + }, + { + "ce_ib": 6.017975330352783, + "ce_orig": 0.712619423866272, + "epoch": 0.9708821626285139, + "kl_loss": 0.21894316375255585, + "loss_ib": 0.008207406848669052, + "step": 3376 + }, + { + "ce_ib": 2.6555702686309814, + "ce_orig": 0.718238115310669, + "epoch": 0.9708821626285139, + "kl_loss": 0.12672093510627747, + "loss_ib": 0.003922779578715563, + "step": 3376 + }, + { + "ce_ib": 3.6031665802001953, + "ce_orig": 0.7362394332885742, + "epoch": 0.9708821626285139, + "kl_loss": 0.14403758943080902, + "loss_ib": 0.005043542478233576, + "step": 3376 + }, + { + "ce_ib": 3.015345335006714, + "ce_orig": 0.3999210000038147, + "epoch": 0.9708821626285139, + "kl_loss": 0.13566303253173828, + "loss_ib": 0.004371975548565388, + "step": 3376 + }, + { + "ce_ib": 3.198920965194702, + "ce_orig": 0.686124861240387, + "epoch": 0.9711697462074915, + "kl_loss": 0.2647211253643036, + "loss_ib": 0.005846132058650255, + "step": 3377 + }, + { + "ce_ib": 3.3042049407958984, + "ce_orig": 0.6864076852798462, + "epoch": 0.9711697462074915, + "kl_loss": 0.1798515021800995, + "loss_ib": 0.005102720111608505, + "step": 3377 + }, + { + "ce_ib": 3.9529266357421875, + "ce_orig": 0.9423424005508423, + "epoch": 0.9711697462074915, + "kl_loss": 0.12075973302125931, + "loss_ib": 0.005160524044185877, + "step": 3377 + }, + { + "ce_ib": 5.147461891174316, + "ce_orig": 1.2978872060775757, + "epoch": 0.9711697462074915, + "kl_loss": 0.12015613168478012, + "loss_ib": 0.006349022965878248, + "step": 3377 + }, + { + "ce_ib": 5.635847568511963, + "ce_orig": 0.8242132067680359, + "epoch": 0.9714573297864691, + "kl_loss": 0.17602364718914032, + "loss_ib": 0.007396083790808916, + "step": 3378 + }, + { + "ce_ib": 2.2231991291046143, + "ce_orig": 0.7308230996131897, + "epoch": 0.9714573297864691, + "kl_loss": 0.15361528098583221, + "loss_ib": 0.0037593517918139696, + "step": 3378 + }, + { + "ce_ib": 3.594510078430176, + "ce_orig": 1.0332648754119873, + "epoch": 0.9714573297864691, + "kl_loss": 0.1673983484506607, + "loss_ib": 0.005268493667244911, + "step": 3378 + }, + { + "ce_ib": 7.15906286239624, + "ce_orig": 0.9184725284576416, + "epoch": 0.9714573297864691, + "kl_loss": 0.21291524171829224, + "loss_ib": 0.009288215078413486, + "step": 3378 + }, + { + "ce_ib": 3.362574577331543, + "ce_orig": 0.7825086712837219, + "epoch": 0.9717449133654469, + "kl_loss": 0.1582070291042328, + "loss_ib": 0.004944644868373871, + "step": 3379 + }, + { + "ce_ib": 1.8788294792175293, + "ce_orig": 0.4731810986995697, + "epoch": 0.9717449133654469, + "kl_loss": 0.13517113029956818, + "loss_ib": 0.003230540780350566, + "step": 3379 + }, + { + "ce_ib": 5.473275661468506, + "ce_orig": 1.0070199966430664, + "epoch": 0.9717449133654469, + "kl_loss": 0.1808428019285202, + "loss_ib": 0.007281703874468803, + "step": 3379 + }, + { + "ce_ib": 3.8158047199249268, + "ce_orig": 0.7360803484916687, + "epoch": 0.9717449133654469, + "kl_loss": 0.2189553827047348, + "loss_ib": 0.006005358416587114, + "step": 3379 + }, + { + "epoch": 0.9720324969444245, + "grad_norm": 0.13989585638046265, + "learning_rate": 7.908707401341146e-06, + "loss": 0.8428, + "step": 3380 + }, + { + "ce_ib": 3.6061182022094727, + "ce_orig": 0.9334418177604675, + "epoch": 0.9720324969444245, + "kl_loss": 0.12301605939865112, + "loss_ib": 0.004836278967559338, + "step": 3380 + }, + { + "ce_ib": 2.2758023738861084, + "ce_orig": 0.6641829609870911, + "epoch": 0.9720324969444245, + "kl_loss": 0.12209293991327286, + "loss_ib": 0.0034967316314578056, + "step": 3380 + }, + { + "ce_ib": 3.3555967807769775, + "ce_orig": 0.6431085467338562, + "epoch": 0.9720324969444245, + "kl_loss": 0.1508365273475647, + "loss_ib": 0.004863962065428495, + "step": 3380 + }, + { + "ce_ib": 3.940795421600342, + "ce_orig": 0.4514043629169464, + "epoch": 0.9720324969444245, + "kl_loss": 0.21500813961029053, + "loss_ib": 0.006090877112001181, + "step": 3380 + }, + { + "ce_ib": 5.4142913818359375, + "ce_orig": 0.8018391132354736, + "epoch": 0.9723200805234021, + "kl_loss": 0.2888498902320862, + "loss_ib": 0.008302790112793446, + "step": 3381 + }, + { + "ce_ib": 4.059741973876953, + "ce_orig": 1.0782474279403687, + "epoch": 0.9723200805234021, + "kl_loss": 0.17222803831100464, + "loss_ib": 0.005782022140920162, + "step": 3381 + }, + { + "ce_ib": 1.7783828973770142, + "ce_orig": 0.4648227095603943, + "epoch": 0.9723200805234021, + "kl_loss": 0.13780853152275085, + "loss_ib": 0.0031564682722091675, + "step": 3381 + }, + { + "ce_ib": 3.3391993045806885, + "ce_orig": 0.6225607991218567, + "epoch": 0.9723200805234021, + "kl_loss": 0.14162209630012512, + "loss_ib": 0.00475542014464736, + "step": 3381 + }, + { + "ce_ib": 2.1432011127471924, + "ce_orig": 0.47130653262138367, + "epoch": 0.9726076641023798, + "kl_loss": 0.22828638553619385, + "loss_ib": 0.0044260649010539055, + "step": 3382 + }, + { + "ce_ib": 1.7448965311050415, + "ce_orig": 0.4925086498260498, + "epoch": 0.9726076641023798, + "kl_loss": 0.14395590126514435, + "loss_ib": 0.0031844554468989372, + "step": 3382 + }, + { + "ce_ib": 4.9956955909729, + "ce_orig": 0.5777289271354675, + "epoch": 0.9726076641023798, + "kl_loss": 0.17851214110851288, + "loss_ib": 0.006780816707760096, + "step": 3382 + }, + { + "ce_ib": 3.3773462772369385, + "ce_orig": 0.7415807247161865, + "epoch": 0.9726076641023798, + "kl_loss": 0.19379642605781555, + "loss_ib": 0.005315310787409544, + "step": 3382 + }, + { + "ce_ib": 2.8199052810668945, + "ce_orig": 0.6760353446006775, + "epoch": 0.9728952476813574, + "kl_loss": 0.1829606145620346, + "loss_ib": 0.0046495115384459496, + "step": 3383 + }, + { + "ce_ib": 4.930408954620361, + "ce_orig": 0.8986415266990662, + "epoch": 0.9728952476813574, + "kl_loss": 0.19197364151477814, + "loss_ib": 0.006850145291537046, + "step": 3383 + }, + { + "ce_ib": 3.041443347930908, + "ce_orig": 0.7203273177146912, + "epoch": 0.9728952476813574, + "kl_loss": 0.13436131179332733, + "loss_ib": 0.004385056439787149, + "step": 3383 + }, + { + "ce_ib": 3.7674295902252197, + "ce_orig": 0.5422837138175964, + "epoch": 0.9728952476813574, + "kl_loss": 0.12284506857395172, + "loss_ib": 0.0049958801828324795, + "step": 3383 + }, + { + "ce_ib": 2.9765124320983887, + "ce_orig": 0.5030128955841064, + "epoch": 0.973182831260335, + "kl_loss": 0.1037299782037735, + "loss_ib": 0.004013812635093927, + "step": 3384 + }, + { + "ce_ib": 6.503939628601074, + "ce_orig": 1.4330966472625732, + "epoch": 0.973182831260335, + "kl_loss": 0.14553798735141754, + "loss_ib": 0.007959319278597832, + "step": 3384 + }, + { + "ce_ib": 4.0605340003967285, + "ce_orig": 1.097898006439209, + "epoch": 0.973182831260335, + "kl_loss": 0.17302744090557098, + "loss_ib": 0.005790808238089085, + "step": 3384 + }, + { + "ce_ib": 3.046247959136963, + "ce_orig": 0.5611546635627747, + "epoch": 0.973182831260335, + "kl_loss": 0.15213365852832794, + "loss_ib": 0.004567584488540888, + "step": 3384 + }, + { + "epoch": 0.9734704148393126, + "grad_norm": 0.13612419366836548, + "learning_rate": 7.90239142721366e-06, + "loss": 0.7786, + "step": 3385 + }, + { + "ce_ib": 4.4949822425842285, + "ce_orig": 1.0764628648757935, + "epoch": 0.9734704148393126, + "kl_loss": 0.10850635170936584, + "loss_ib": 0.005580046214163303, + "step": 3385 + }, + { + "ce_ib": 3.229387044906616, + "ce_orig": 0.7230898141860962, + "epoch": 0.9734704148393126, + "kl_loss": 0.12891986966133118, + "loss_ib": 0.004518585745245218, + "step": 3385 + }, + { + "ce_ib": 5.704230785369873, + "ce_orig": 1.404258370399475, + "epoch": 0.9734704148393126, + "kl_loss": 0.1469068080186844, + "loss_ib": 0.007173298392444849, + "step": 3385 + }, + { + "ce_ib": 3.1368181705474854, + "ce_orig": 0.7700353264808655, + "epoch": 0.9734704148393126, + "kl_loss": 0.15922811627388, + "loss_ib": 0.004729099106043577, + "step": 3385 + }, + { + "ce_ib": 1.8685883283615112, + "ce_orig": 0.4982770085334778, + "epoch": 0.9737579984182904, + "kl_loss": 0.1526927351951599, + "loss_ib": 0.0033955154940485954, + "step": 3386 + }, + { + "ce_ib": 3.0173964500427246, + "ce_orig": 0.5032864212989807, + "epoch": 0.9737579984182904, + "kl_loss": 0.22673267126083374, + "loss_ib": 0.0052847228944301605, + "step": 3386 + }, + { + "ce_ib": 5.301321983337402, + "ce_orig": 1.287934422492981, + "epoch": 0.9737579984182904, + "kl_loss": 0.1392768770456314, + "loss_ib": 0.006694091018289328, + "step": 3386 + }, + { + "ce_ib": 4.813733100891113, + "ce_orig": 0.9549645781517029, + "epoch": 0.9737579984182904, + "kl_loss": 0.12166082859039307, + "loss_ib": 0.0060303411446511745, + "step": 3386 + }, + { + "ce_ib": 2.457993984222412, + "ce_orig": 0.7173119187355042, + "epoch": 0.974045581997268, + "kl_loss": 0.15175700187683105, + "loss_ib": 0.003975564148277044, + "step": 3387 + }, + { + "ce_ib": 4.007168769836426, + "ce_orig": 0.9523072838783264, + "epoch": 0.974045581997268, + "kl_loss": 0.2802882790565491, + "loss_ib": 0.0068100509233772755, + "step": 3387 + }, + { + "ce_ib": 7.699909210205078, + "ce_orig": 1.4836623668670654, + "epoch": 0.974045581997268, + "kl_loss": 0.17145544290542603, + "loss_ib": 0.009414463303983212, + "step": 3387 + }, + { + "ce_ib": 4.380040168762207, + "ce_orig": 0.7042768001556396, + "epoch": 0.974045581997268, + "kl_loss": 0.31074559688568115, + "loss_ib": 0.007487495895475149, + "step": 3387 + }, + { + "ce_ib": 3.7172691822052, + "ce_orig": 0.7695116400718689, + "epoch": 0.9743331655762456, + "kl_loss": 0.18039920926094055, + "loss_ib": 0.005521261598914862, + "step": 3388 + }, + { + "ce_ib": 4.724152565002441, + "ce_orig": 1.0218024253845215, + "epoch": 0.9743331655762456, + "kl_loss": 0.10076863318681717, + "loss_ib": 0.005731838755309582, + "step": 3388 + }, + { + "ce_ib": 3.0517079830169678, + "ce_orig": 0.6455915570259094, + "epoch": 0.9743331655762456, + "kl_loss": 0.19510158896446228, + "loss_ib": 0.005002724006772041, + "step": 3388 + }, + { + "ce_ib": 3.4774599075317383, + "ce_orig": 0.6320598125457764, + "epoch": 0.9743331655762456, + "kl_loss": 0.16735947132110596, + "loss_ib": 0.005151054821908474, + "step": 3388 + }, + { + "ce_ib": 3.746098756790161, + "ce_orig": 1.024916172027588, + "epoch": 0.9746207491552232, + "kl_loss": 0.15447968244552612, + "loss_ib": 0.005290895700454712, + "step": 3389 + }, + { + "ce_ib": 2.736370325088501, + "ce_orig": 0.8275421261787415, + "epoch": 0.9746207491552232, + "kl_loss": 0.11455312371253967, + "loss_ib": 0.003881901502609253, + "step": 3389 + }, + { + "ce_ib": 4.025751113891602, + "ce_orig": 0.7536959648132324, + "epoch": 0.9746207491552232, + "kl_loss": 0.24058696627616882, + "loss_ib": 0.006431620568037033, + "step": 3389 + }, + { + "ce_ib": 7.31816291809082, + "ce_orig": 1.8474345207214355, + "epoch": 0.9746207491552232, + "kl_loss": 0.194832906126976, + "loss_ib": 0.00926649197936058, + "step": 3389 + }, + { + "epoch": 0.9749083327342009, + "grad_norm": 0.13618436455726624, + "learning_rate": 7.896068460551562e-06, + "loss": 0.8548, + "step": 3390 + }, + { + "ce_ib": 5.410468101501465, + "ce_orig": 1.136919379234314, + "epoch": 0.9749083327342009, + "kl_loss": 0.1693323850631714, + "loss_ib": 0.007103791460394859, + "step": 3390 + }, + { + "ce_ib": 5.731606960296631, + "ce_orig": 1.351380467414856, + "epoch": 0.9749083327342009, + "kl_loss": 0.21078535914421082, + "loss_ib": 0.007839459925889969, + "step": 3390 + }, + { + "ce_ib": 6.533336162567139, + "ce_orig": 1.851571798324585, + "epoch": 0.9749083327342009, + "kl_loss": 0.1635259985923767, + "loss_ib": 0.008168595843017101, + "step": 3390 + }, + { + "ce_ib": 4.610360145568848, + "ce_orig": 1.0146766901016235, + "epoch": 0.9749083327342009, + "kl_loss": 0.15394660830497742, + "loss_ib": 0.006149826105684042, + "step": 3390 + }, + { + "ce_ib": 2.9997358322143555, + "ce_orig": 0.6265425682067871, + "epoch": 0.9751959163131785, + "kl_loss": 0.22258195281028748, + "loss_ib": 0.0052255550399422646, + "step": 3391 + }, + { + "ce_ib": 3.70426344871521, + "ce_orig": 0.7267270684242249, + "epoch": 0.9751959163131785, + "kl_loss": 0.2594698667526245, + "loss_ib": 0.00629896204918623, + "step": 3391 + }, + { + "ce_ib": 2.598280191421509, + "ce_orig": 0.6154271960258484, + "epoch": 0.9751959163131785, + "kl_loss": 0.1383146047592163, + "loss_ib": 0.003981426358222961, + "step": 3391 + }, + { + "ce_ib": 4.652087688446045, + "ce_orig": 0.7761231064796448, + "epoch": 0.9751959163131785, + "kl_loss": 0.24249820411205292, + "loss_ib": 0.007077069953083992, + "step": 3391 + }, + { + "ce_ib": 5.056804180145264, + "ce_orig": 1.4419957399368286, + "epoch": 0.9754834998921561, + "kl_loss": 0.09356854856014252, + "loss_ib": 0.005992489866912365, + "step": 3392 + }, + { + "ce_ib": 1.315329670906067, + "ce_orig": 0.3392177224159241, + "epoch": 0.9754834998921561, + "kl_loss": 0.395469605922699, + "loss_ib": 0.005270025692880154, + "step": 3392 + }, + { + "ce_ib": 2.76008677482605, + "ce_orig": 0.4836275577545166, + "epoch": 0.9754834998921561, + "kl_loss": 0.2406359165906906, + "loss_ib": 0.005166445858776569, + "step": 3392 + }, + { + "ce_ib": 2.1726367473602295, + "ce_orig": 0.6408441662788391, + "epoch": 0.9754834998921561, + "kl_loss": 0.12070643901824951, + "loss_ib": 0.003379701403900981, + "step": 3392 + }, + { + "ce_ib": 4.170151233673096, + "ce_orig": 1.0611485242843628, + "epoch": 0.9757710834711338, + "kl_loss": 0.18313029408454895, + "loss_ib": 0.006001454312354326, + "step": 3393 + }, + { + "ce_ib": 1.7983235120773315, + "ce_orig": 0.6901389360427856, + "epoch": 0.9757710834711338, + "kl_loss": 0.1658233255147934, + "loss_ib": 0.0034565567038953304, + "step": 3393 + }, + { + "ce_ib": 4.531277179718018, + "ce_orig": 0.7966442704200745, + "epoch": 0.9757710834711338, + "kl_loss": 0.2134697139263153, + "loss_ib": 0.006665973924100399, + "step": 3393 + }, + { + "ce_ib": 0.7024407982826233, + "ce_orig": 0.17529116570949554, + "epoch": 0.9757710834711338, + "kl_loss": 0.24953562021255493, + "loss_ib": 0.0031977968756109476, + "step": 3393 + }, + { + "ce_ib": 4.176857948303223, + "ce_orig": 0.6368083953857422, + "epoch": 0.9760586670501115, + "kl_loss": 0.1886952817440033, + "loss_ib": 0.006063810549676418, + "step": 3394 + }, + { + "ce_ib": 2.1527974605560303, + "ce_orig": 0.4821227788925171, + "epoch": 0.9760586670501115, + "kl_loss": 0.13843920826911926, + "loss_ib": 0.0035371892154216766, + "step": 3394 + }, + { + "ce_ib": 3.0450644493103027, + "ce_orig": 0.7612169981002808, + "epoch": 0.9760586670501115, + "kl_loss": 0.13844256103038788, + "loss_ib": 0.004429489839822054, + "step": 3394 + }, + { + "ce_ib": 2.938256025314331, + "ce_orig": 0.7958825826644897, + "epoch": 0.9760586670501115, + "kl_loss": 0.14529477059841156, + "loss_ib": 0.004391203634440899, + "step": 3394 + }, + { + "epoch": 0.9763462506290891, + "grad_norm": 0.14222535490989685, + "learning_rate": 7.889738516588355e-06, + "loss": 0.8545, + "step": 3395 + }, + { + "ce_ib": 4.215273380279541, + "ce_orig": 0.6473325490951538, + "epoch": 0.9763462506290891, + "kl_loss": 0.19917839765548706, + "loss_ib": 0.006207057274878025, + "step": 3395 + }, + { + "ce_ib": 4.417902946472168, + "ce_orig": 0.9895395040512085, + "epoch": 0.9763462506290891, + "kl_loss": 0.15684649348258972, + "loss_ib": 0.00598636781796813, + "step": 3395 + }, + { + "ce_ib": 3.1024534702301025, + "ce_orig": 0.6193094849586487, + "epoch": 0.9763462506290891, + "kl_loss": 0.1755594164133072, + "loss_ib": 0.0048580477014184, + "step": 3395 + }, + { + "ce_ib": 2.7875912189483643, + "ce_orig": 0.8767540454864502, + "epoch": 0.9763462506290891, + "kl_loss": 0.15727072954177856, + "loss_ib": 0.004360298626124859, + "step": 3395 + }, + { + "ce_ib": 4.5966339111328125, + "ce_orig": 0.9812648892402649, + "epoch": 0.9766338342080667, + "kl_loss": 0.23788225650787354, + "loss_ib": 0.006975456140935421, + "step": 3396 + }, + { + "ce_ib": 4.607774257659912, + "ce_orig": 0.8800014853477478, + "epoch": 0.9766338342080667, + "kl_loss": 0.19901499152183533, + "loss_ib": 0.006597924046218395, + "step": 3396 + }, + { + "ce_ib": 3.965853691101074, + "ce_orig": 0.8489425182342529, + "epoch": 0.9766338342080667, + "kl_loss": 0.1815967708826065, + "loss_ib": 0.0057818214409053326, + "step": 3396 + }, + { + "ce_ib": 4.360556125640869, + "ce_orig": 1.241746425628662, + "epoch": 0.9766338342080667, + "kl_loss": 0.17900243401527405, + "loss_ib": 0.0061505804769694805, + "step": 3396 + }, + { + "ce_ib": 1.5148981809616089, + "ce_orig": 0.4476351737976074, + "epoch": 0.9769214177870443, + "kl_loss": 0.1190444827079773, + "loss_ib": 0.0027053430676460266, + "step": 3397 + }, + { + "ce_ib": 1.292661428451538, + "ce_orig": 0.288264662027359, + "epoch": 0.9769214177870443, + "kl_loss": 0.3437040448188782, + "loss_ib": 0.004729701671749353, + "step": 3397 + }, + { + "ce_ib": 4.700530052185059, + "ce_orig": 0.953300416469574, + "epoch": 0.9769214177870443, + "kl_loss": 0.15420275926589966, + "loss_ib": 0.006242557428777218, + "step": 3397 + }, + { + "ce_ib": 7.644336223602295, + "ce_orig": 1.4192290306091309, + "epoch": 0.9769214177870443, + "kl_loss": 0.14439821243286133, + "loss_ib": 0.00908831786364317, + "step": 3397 + }, + { + "ce_ib": 2.762955665588379, + "ce_orig": 0.7860487103462219, + "epoch": 0.977209001366022, + "kl_loss": 0.08292270451784134, + "loss_ib": 0.0035921826492995024, + "step": 3398 + }, + { + "ce_ib": 2.253643274307251, + "ce_orig": 0.5364568829536438, + "epoch": 0.977209001366022, + "kl_loss": 0.15241482853889465, + "loss_ib": 0.0037777915131300688, + "step": 3398 + }, + { + "ce_ib": 2.805814743041992, + "ce_orig": 0.5698195695877075, + "epoch": 0.977209001366022, + "kl_loss": 0.1327616572380066, + "loss_ib": 0.004133431240916252, + "step": 3398 + }, + { + "ce_ib": 4.1514763832092285, + "ce_orig": 0.8830550909042358, + "epoch": 0.977209001366022, + "kl_loss": 0.16704951226711273, + "loss_ib": 0.005821971222758293, + "step": 3398 + }, + { + "ce_ib": 4.162603855133057, + "ce_orig": 0.9497296214103699, + "epoch": 0.9774965849449997, + "kl_loss": 0.28530704975128174, + "loss_ib": 0.0070156739093363285, + "step": 3399 + }, + { + "ce_ib": 5.149123191833496, + "ce_orig": 1.1974029541015625, + "epoch": 0.9774965849449997, + "kl_loss": 0.32122719287872314, + "loss_ib": 0.0083613945171237, + "step": 3399 + }, + { + "ce_ib": 4.9888410568237305, + "ce_orig": 0.6846693754196167, + "epoch": 0.9774965849449997, + "kl_loss": 0.2650846242904663, + "loss_ib": 0.007639687042683363, + "step": 3399 + }, + { + "ce_ib": 4.3787336349487305, + "ce_orig": 1.2579338550567627, + "epoch": 0.9774965849449997, + "kl_loss": 0.21190515160560608, + "loss_ib": 0.0064977845177054405, + "step": 3399 + }, + { + "epoch": 0.9777841685239773, + "grad_norm": 0.12320379167795181, + "learning_rate": 7.883401610574338e-06, + "loss": 0.8356, + "step": 3400 + }, + { + "ce_ib": 5.150910377502441, + "ce_orig": 1.0649821758270264, + "epoch": 0.9777841685239773, + "kl_loss": 0.18784582614898682, + "loss_ib": 0.007029368542134762, + "step": 3400 + }, + { + "ce_ib": 5.491443157196045, + "ce_orig": 1.2651485204696655, + "epoch": 0.9777841685239773, + "kl_loss": 0.1542711853981018, + "loss_ib": 0.007034155074506998, + "step": 3400 + }, + { + "ce_ib": 6.810986042022705, + "ce_orig": 0.6852309107780457, + "epoch": 0.9777841685239773, + "kl_loss": 0.17126226425170898, + "loss_ib": 0.008523608557879925, + "step": 3400 + }, + { + "ce_ib": 2.7411696910858154, + "ce_orig": 0.5043026208877563, + "epoch": 0.9777841685239773, + "kl_loss": 0.2056465446949005, + "loss_ib": 0.00479763513430953, + "step": 3400 + }, + { + "ce_ib": 5.662820816040039, + "ce_orig": 1.1337542533874512, + "epoch": 0.9780717521029549, + "kl_loss": 0.17984016239643097, + "loss_ib": 0.007461222354322672, + "step": 3401 + }, + { + "ce_ib": 2.6017630100250244, + "ce_orig": 0.650318443775177, + "epoch": 0.9780717521029549, + "kl_loss": 0.14812131226062775, + "loss_ib": 0.004082975909113884, + "step": 3401 + }, + { + "ce_ib": 6.457510471343994, + "ce_orig": 1.6532646417617798, + "epoch": 0.9780717521029549, + "kl_loss": 0.16998326778411865, + "loss_ib": 0.008157342672348022, + "step": 3401 + }, + { + "ce_ib": 3.859694719314575, + "ce_orig": 0.7313605546951294, + "epoch": 0.9780717521029549, + "kl_loss": 0.29052093625068665, + "loss_ib": 0.0067649041302502155, + "step": 3401 + }, + { + "ce_ib": 5.177092552185059, + "ce_orig": 1.373183250427246, + "epoch": 0.9783593356819326, + "kl_loss": 0.19599243998527527, + "loss_ib": 0.007137016858905554, + "step": 3402 + }, + { + "ce_ib": 2.6227519512176514, + "ce_orig": 0.4978690445423126, + "epoch": 0.9783593356819326, + "kl_loss": 0.18379437923431396, + "loss_ib": 0.004460695665329695, + "step": 3402 + }, + { + "ce_ib": 6.757078647613525, + "ce_orig": 1.547440767288208, + "epoch": 0.9783593356819326, + "kl_loss": 0.2054741382598877, + "loss_ib": 0.008811820298433304, + "step": 3402 + }, + { + "ce_ib": 3.565350294113159, + "ce_orig": 1.0976275205612183, + "epoch": 0.9783593356819326, + "kl_loss": 0.13594964146614075, + "loss_ib": 0.004924846813082695, + "step": 3402 + }, + { + "ce_ib": 3.4920830726623535, + "ce_orig": 0.9094115495681763, + "epoch": 0.9786469192609102, + "kl_loss": 0.21762816607952118, + "loss_ib": 0.005668364465236664, + "step": 3403 + }, + { + "ce_ib": 3.2433388233184814, + "ce_orig": 0.710840106010437, + "epoch": 0.9786469192609102, + "kl_loss": 0.16845111548900604, + "loss_ib": 0.004927849862724543, + "step": 3403 + }, + { + "ce_ib": 4.916865825653076, + "ce_orig": 1.1687496900558472, + "epoch": 0.9786469192609102, + "kl_loss": 0.19762159883975983, + "loss_ib": 0.0068930815905332565, + "step": 3403 + }, + { + "ce_ib": 3.069805383682251, + "ce_orig": 0.35140955448150635, + "epoch": 0.9786469192609102, + "kl_loss": 0.2385689914226532, + "loss_ib": 0.005455494858324528, + "step": 3403 + }, + { + "ce_ib": 4.039005756378174, + "ce_orig": 0.9156465530395508, + "epoch": 0.9789345028398878, + "kl_loss": 0.2040882110595703, + "loss_ib": 0.006079888436943293, + "step": 3404 + }, + { + "ce_ib": 3.081186532974243, + "ce_orig": 0.7008799314498901, + "epoch": 0.9789345028398878, + "kl_loss": 0.13770267367362976, + "loss_ib": 0.004458213225007057, + "step": 3404 + }, + { + "ce_ib": 3.6173810958862305, + "ce_orig": 0.6762107014656067, + "epoch": 0.9789345028398878, + "kl_loss": 0.17035458981990814, + "loss_ib": 0.005320926662534475, + "step": 3404 + }, + { + "ce_ib": 4.6647233963012695, + "ce_orig": 0.8420403003692627, + "epoch": 0.9789345028398878, + "kl_loss": 0.18927742540836334, + "loss_ib": 0.006557497661560774, + "step": 3404 + }, + { + "epoch": 0.9792220864188654, + "grad_norm": 0.11818605661392212, + "learning_rate": 7.877057757776586e-06, + "loss": 0.849, + "step": 3405 + }, + { + "ce_ib": 3.148676633834839, + "ce_orig": 0.8221349716186523, + "epoch": 0.9792220864188654, + "kl_loss": 0.20044484734535217, + "loss_ib": 0.0051531256176531315, + "step": 3405 + }, + { + "ce_ib": 2.8703765869140625, + "ce_orig": 0.6788053512573242, + "epoch": 0.9792220864188654, + "kl_loss": 0.17389684915542603, + "loss_ib": 0.004609344992786646, + "step": 3405 + }, + { + "ce_ib": 6.005980014801025, + "ce_orig": 1.1921123266220093, + "epoch": 0.9792220864188654, + "kl_loss": 0.14553236961364746, + "loss_ib": 0.007461303845047951, + "step": 3405 + }, + { + "ce_ib": 3.4133353233337402, + "ce_orig": 0.6822628378868103, + "epoch": 0.9792220864188654, + "kl_loss": 0.13526397943496704, + "loss_ib": 0.004765975289046764, + "step": 3405 + }, + { + "ce_ib": 3.985776901245117, + "ce_orig": 0.8898341655731201, + "epoch": 0.9795096699978432, + "kl_loss": 0.10750067979097366, + "loss_ib": 0.005060783587396145, + "step": 3406 + }, + { + "ce_ib": 4.004836082458496, + "ce_orig": 0.5383054614067078, + "epoch": 0.9795096699978432, + "kl_loss": 0.15194761753082275, + "loss_ib": 0.005524312146008015, + "step": 3406 + }, + { + "ce_ib": 1.9988408088684082, + "ce_orig": 0.5730834603309631, + "epoch": 0.9795096699978432, + "kl_loss": 0.13099397718906403, + "loss_ib": 0.003308780724182725, + "step": 3406 + }, + { + "ce_ib": 5.766295909881592, + "ce_orig": 1.1069341897964478, + "epoch": 0.9795096699978432, + "kl_loss": 0.22097903490066528, + "loss_ib": 0.007976085878908634, + "step": 3406 + }, + { + "ce_ib": 6.353326320648193, + "ce_orig": 1.2692307233810425, + "epoch": 0.9797972535768208, + "kl_loss": 0.21439647674560547, + "loss_ib": 0.00849729124456644, + "step": 3407 + }, + { + "ce_ib": 2.9031059741973877, + "ce_orig": 0.726775586605072, + "epoch": 0.9797972535768208, + "kl_loss": 0.2151387333869934, + "loss_ib": 0.005054492969065905, + "step": 3407 + }, + { + "ce_ib": 1.0906070470809937, + "ce_orig": 0.24621285498142242, + "epoch": 0.9797972535768208, + "kl_loss": 0.2608253061771393, + "loss_ib": 0.0036988600622862577, + "step": 3407 + }, + { + "ce_ib": 2.486668109893799, + "ce_orig": 0.6514029502868652, + "epoch": 0.9797972535768208, + "kl_loss": 0.11037754267454147, + "loss_ib": 0.003590443404391408, + "step": 3407 + }, + { + "ce_ib": 2.4864749908447266, + "ce_orig": 0.6359842419624329, + "epoch": 0.9800848371557984, + "kl_loss": 0.17955172061920166, + "loss_ib": 0.004281992092728615, + "step": 3408 + }, + { + "ce_ib": 5.634780406951904, + "ce_orig": 1.2513819932937622, + "epoch": 0.9800848371557984, + "kl_loss": 0.14344395697116852, + "loss_ib": 0.007069219835102558, + "step": 3408 + }, + { + "ce_ib": 2.438685894012451, + "ce_orig": 0.5503337979316711, + "epoch": 0.9800848371557984, + "kl_loss": 0.12774620950222015, + "loss_ib": 0.0037161479704082012, + "step": 3408 + }, + { + "ce_ib": 3.798079013824463, + "ce_orig": 0.917182445526123, + "epoch": 0.9800848371557984, + "kl_loss": 0.15523630380630493, + "loss_ib": 0.005350441671907902, + "step": 3408 + }, + { + "ce_ib": 5.431910514831543, + "ce_orig": 1.5601218938827515, + "epoch": 0.980372420734776, + "kl_loss": 0.15038368105888367, + "loss_ib": 0.006935747805982828, + "step": 3409 + }, + { + "ce_ib": 3.8263673782348633, + "ce_orig": 1.1314396858215332, + "epoch": 0.980372420734776, + "kl_loss": 0.16416721045970917, + "loss_ib": 0.005468039307743311, + "step": 3409 + }, + { + "ce_ib": 6.425678730010986, + "ce_orig": 1.617762565612793, + "epoch": 0.980372420734776, + "kl_loss": 0.17114725708961487, + "loss_ib": 0.008137150667607784, + "step": 3409 + }, + { + "ce_ib": 1.271180510520935, + "ce_orig": 0.2720681130886078, + "epoch": 0.980372420734776, + "kl_loss": 0.39973753690719604, + "loss_ib": 0.005268555600196123, + "step": 3409 + }, + { + "epoch": 0.9806600043137537, + "grad_norm": 0.1948043704032898, + "learning_rate": 7.870706973478919e-06, + "loss": 0.8256, + "step": 3410 + }, + { + "ce_ib": 2.970771074295044, + "ce_orig": 0.5962566137313843, + "epoch": 0.9806600043137537, + "kl_loss": 0.2115197777748108, + "loss_ib": 0.005085968878120184, + "step": 3410 + }, + { + "ce_ib": 5.792418956756592, + "ce_orig": 0.8279144167900085, + "epoch": 0.9806600043137537, + "kl_loss": 0.22786056995391846, + "loss_ib": 0.008071024902164936, + "step": 3410 + }, + { + "ce_ib": 4.824738025665283, + "ce_orig": 0.9720006585121155, + "epoch": 0.9806600043137537, + "kl_loss": 0.26451554894447327, + "loss_ib": 0.007469893433153629, + "step": 3410 + }, + { + "ce_ib": 3.994586229324341, + "ce_orig": 0.8106865286827087, + "epoch": 0.9806600043137537, + "kl_loss": 0.1907758265733719, + "loss_ib": 0.005902344360947609, + "step": 3410 + }, + { + "ce_ib": 4.186345100402832, + "ce_orig": 0.9436593651771545, + "epoch": 0.9809475878927313, + "kl_loss": 0.1590348482131958, + "loss_ib": 0.0057766935788095, + "step": 3411 + }, + { + "ce_ib": 2.861416816711426, + "ce_orig": 0.3712025582790375, + "epoch": 0.9809475878927313, + "kl_loss": 0.23446184396743774, + "loss_ib": 0.005206035450100899, + "step": 3411 + }, + { + "ce_ib": 2.533237934112549, + "ce_orig": 0.7918297052383423, + "epoch": 0.9809475878927313, + "kl_loss": 0.20700335502624512, + "loss_ib": 0.004603271372616291, + "step": 3411 + }, + { + "ce_ib": 2.937781572341919, + "ce_orig": 0.8034105896949768, + "epoch": 0.9809475878927313, + "kl_loss": 0.13726751506328583, + "loss_ib": 0.004310456570237875, + "step": 3411 + }, + { + "ce_ib": 4.010529041290283, + "ce_orig": 0.5506054162979126, + "epoch": 0.9812351714717089, + "kl_loss": 0.24194425344467163, + "loss_ib": 0.006429971195757389, + "step": 3412 + }, + { + "ce_ib": 3.7684900760650635, + "ce_orig": 0.8020967841148376, + "epoch": 0.9812351714717089, + "kl_loss": 0.1542297899723053, + "loss_ib": 0.005310787819325924, + "step": 3412 + }, + { + "ce_ib": 3.128882646560669, + "ce_orig": 0.700315535068512, + "epoch": 0.9812351714717089, + "kl_loss": 0.18535861372947693, + "loss_ib": 0.004982468672096729, + "step": 3412 + }, + { + "ce_ib": 1.7198127508163452, + "ce_orig": 0.3811705708503723, + "epoch": 0.9812351714717089, + "kl_loss": 0.3579263687133789, + "loss_ib": 0.005299076437950134, + "step": 3412 + }, + { + "ce_ib": 3.903872013092041, + "ce_orig": 0.9258607625961304, + "epoch": 0.9815227550506866, + "kl_loss": 0.13087089359760284, + "loss_ib": 0.005212581250816584, + "step": 3413 + }, + { + "ce_ib": 5.506099700927734, + "ce_orig": 0.729184091091156, + "epoch": 0.9815227550506866, + "kl_loss": 0.22512567043304443, + "loss_ib": 0.0077573563903570175, + "step": 3413 + }, + { + "ce_ib": 2.0325937271118164, + "ce_orig": 0.513338029384613, + "epoch": 0.9815227550506866, + "kl_loss": 0.1256358027458191, + "loss_ib": 0.003288951702415943, + "step": 3413 + }, + { + "ce_ib": 3.515200614929199, + "ce_orig": 0.5904244780540466, + "epoch": 0.9815227550506866, + "kl_loss": 0.1786893606185913, + "loss_ib": 0.005302093923091888, + "step": 3413 + }, + { + "ce_ib": 2.600278854370117, + "ce_orig": 0.7142552733421326, + "epoch": 0.9818103386296643, + "kl_loss": 0.11530480533838272, + "loss_ib": 0.0037533268332481384, + "step": 3414 + }, + { + "ce_ib": 3.220609426498413, + "ce_orig": 0.8559975624084473, + "epoch": 0.9818103386296643, + "kl_loss": 0.10407093167304993, + "loss_ib": 0.0042613185942173, + "step": 3414 + }, + { + "ce_ib": 5.2722859382629395, + "ce_orig": 1.1403502225875854, + "epoch": 0.9818103386296643, + "kl_loss": 0.19252604246139526, + "loss_ib": 0.007197546306997538, + "step": 3414 + }, + { + "ce_ib": 3.5321130752563477, + "ce_orig": 1.022772192955017, + "epoch": 0.9818103386296643, + "kl_loss": 0.17381763458251953, + "loss_ib": 0.00527028925716877, + "step": 3414 + }, + { + "epoch": 0.9820979222086419, + "grad_norm": 0.12855443358421326, + "learning_rate": 7.864349272981849e-06, + "loss": 0.8227, + "step": 3415 + }, + { + "ce_ib": 2.500255823135376, + "ce_orig": 0.5489463210105896, + "epoch": 0.9820979222086419, + "kl_loss": 0.12116288393735886, + "loss_ib": 0.003711884841322899, + "step": 3415 + }, + { + "ce_ib": 2.067596197128296, + "ce_orig": 0.4920838475227356, + "epoch": 0.9820979222086419, + "kl_loss": 0.1303139328956604, + "loss_ib": 0.0033707355614751577, + "step": 3415 + }, + { + "ce_ib": 4.176887035369873, + "ce_orig": 1.136609673500061, + "epoch": 0.9820979222086419, + "kl_loss": 0.15957504510879517, + "loss_ib": 0.005772637203335762, + "step": 3415 + }, + { + "ce_ib": 2.5648186206817627, + "ce_orig": 0.6519291996955872, + "epoch": 0.9820979222086419, + "kl_loss": 0.16596375405788422, + "loss_ib": 0.004224455915391445, + "step": 3415 + }, + { + "ce_ib": 6.418355464935303, + "ce_orig": 1.5127941370010376, + "epoch": 0.9823855057876195, + "kl_loss": 0.3162001371383667, + "loss_ib": 0.009580357000231743, + "step": 3416 + }, + { + "ce_ib": 4.351707935333252, + "ce_orig": 0.8844999670982361, + "epoch": 0.9823855057876195, + "kl_loss": 0.13074412941932678, + "loss_ib": 0.005659149028360844, + "step": 3416 + }, + { + "ce_ib": 3.9574413299560547, + "ce_orig": 1.1608961820602417, + "epoch": 0.9823855057876195, + "kl_loss": 0.2526435852050781, + "loss_ib": 0.006483877077698708, + "step": 3416 + }, + { + "ce_ib": 3.9256319999694824, + "ce_orig": 0.8162941336631775, + "epoch": 0.9823855057876195, + "kl_loss": 0.12078717350959778, + "loss_ib": 0.005133503582328558, + "step": 3416 + }, + { + "ce_ib": 4.638798236846924, + "ce_orig": 1.1164242029190063, + "epoch": 0.9826730893665971, + "kl_loss": 0.15142133831977844, + "loss_ib": 0.006153011228889227, + "step": 3417 + }, + { + "ce_ib": 2.157910108566284, + "ce_orig": 0.6114092469215393, + "epoch": 0.9826730893665971, + "kl_loss": 0.16145017743110657, + "loss_ib": 0.003772411961108446, + "step": 3417 + }, + { + "ce_ib": 2.926138162612915, + "ce_orig": 0.8910624384880066, + "epoch": 0.9826730893665971, + "kl_loss": 0.16647234559059143, + "loss_ib": 0.00459086149930954, + "step": 3417 + }, + { + "ce_ib": 3.529407024383545, + "ce_orig": 0.6714034080505371, + "epoch": 0.9826730893665971, + "kl_loss": 0.10876938700675964, + "loss_ib": 0.004617101047188044, + "step": 3417 + }, + { + "ce_ib": 6.786965847015381, + "ce_orig": 1.6942065954208374, + "epoch": 0.9829606729455748, + "kl_loss": 0.5935918092727661, + "loss_ib": 0.012722883373498917, + "step": 3418 + }, + { + "ce_ib": 4.197144985198975, + "ce_orig": 0.5511111617088318, + "epoch": 0.9829606729455748, + "kl_loss": 0.24122095108032227, + "loss_ib": 0.006609354633837938, + "step": 3418 + }, + { + "ce_ib": 2.8625736236572266, + "ce_orig": 0.7469815611839294, + "epoch": 0.9829606729455748, + "kl_loss": 0.12336476892232895, + "loss_ib": 0.004096221178770065, + "step": 3418 + }, + { + "ce_ib": 4.985449314117432, + "ce_orig": 1.1814351081848145, + "epoch": 0.9829606729455748, + "kl_loss": 0.13557110726833344, + "loss_ib": 0.006341160275042057, + "step": 3418 + }, + { + "ce_ib": 3.2362735271453857, + "ce_orig": 0.8342568278312683, + "epoch": 0.9832482565245525, + "kl_loss": 0.14577153325080872, + "loss_ib": 0.004693988710641861, + "step": 3419 + }, + { + "ce_ib": 6.221692085266113, + "ce_orig": 1.3782672882080078, + "epoch": 0.9832482565245525, + "kl_loss": 0.18083852529525757, + "loss_ib": 0.008030077442526817, + "step": 3419 + }, + { + "ce_ib": 4.414186954498291, + "ce_orig": 1.1550620794296265, + "epoch": 0.9832482565245525, + "kl_loss": 0.3978153467178345, + "loss_ib": 0.008392340503633022, + "step": 3419 + }, + { + "ce_ib": 2.8852176666259766, + "ce_orig": 0.6951417326927185, + "epoch": 0.9832482565245525, + "kl_loss": 0.19396254420280457, + "loss_ib": 0.004824842792004347, + "step": 3419 + }, + { + "epoch": 0.9835358401035301, + "grad_norm": 0.14215655624866486, + "learning_rate": 7.857984671602547e-06, + "loss": 0.9402, + "step": 3420 + }, + { + "ce_ib": 5.479686737060547, + "ce_orig": 1.1060905456542969, + "epoch": 0.9835358401035301, + "kl_loss": 0.1736413836479187, + "loss_ib": 0.0072161005809903145, + "step": 3420 + }, + { + "ce_ib": 3.675795555114746, + "ce_orig": 0.9898388981819153, + "epoch": 0.9835358401035301, + "kl_loss": 0.12058541178703308, + "loss_ib": 0.004881649278104305, + "step": 3420 + }, + { + "ce_ib": 4.728831768035889, + "ce_orig": 1.1201549768447876, + "epoch": 0.9835358401035301, + "kl_loss": 0.20714059472084045, + "loss_ib": 0.006800237577408552, + "step": 3420 + }, + { + "ce_ib": 3.8550925254821777, + "ce_orig": 0.8426575660705566, + "epoch": 0.9835358401035301, + "kl_loss": 0.1471833884716034, + "loss_ib": 0.0053269267082214355, + "step": 3420 + }, + { + "ce_ib": 3.6790411472320557, + "ce_orig": 0.7487043738365173, + "epoch": 0.9838234236825077, + "kl_loss": 0.17478828132152557, + "loss_ib": 0.005426924210041761, + "step": 3421 + }, + { + "ce_ib": 4.720896244049072, + "ce_orig": 1.2956913709640503, + "epoch": 0.9838234236825077, + "kl_loss": 0.1746886670589447, + "loss_ib": 0.0064677828922867775, + "step": 3421 + }, + { + "ce_ib": 3.5500850677490234, + "ce_orig": 0.956774115562439, + "epoch": 0.9838234236825077, + "kl_loss": 0.13437381386756897, + "loss_ib": 0.0048938230611383915, + "step": 3421 + }, + { + "ce_ib": 7.2723588943481445, + "ce_orig": 1.316385269165039, + "epoch": 0.9838234236825077, + "kl_loss": 0.19496214389801025, + "loss_ib": 0.009221980348229408, + "step": 3421 + }, + { + "ce_ib": 2.079954147338867, + "ce_orig": 0.3182763159275055, + "epoch": 0.9841110072614854, + "kl_loss": 0.15670007467269897, + "loss_ib": 0.003646954894065857, + "step": 3422 + }, + { + "ce_ib": 5.328388214111328, + "ce_orig": 1.2647371292114258, + "epoch": 0.9841110072614854, + "kl_loss": 0.2220952808856964, + "loss_ib": 0.00754934037104249, + "step": 3422 + }, + { + "ce_ib": 3.939596652984619, + "ce_orig": 0.8408695459365845, + "epoch": 0.9841110072614854, + "kl_loss": 0.1479591578245163, + "loss_ib": 0.005419188179075718, + "step": 3422 + }, + { + "ce_ib": 4.471084117889404, + "ce_orig": 0.8735271692276001, + "epoch": 0.9841110072614854, + "kl_loss": 0.17771871387958527, + "loss_ib": 0.006248271558433771, + "step": 3422 + }, + { + "ce_ib": 2.250105857849121, + "ce_orig": 0.2973710596561432, + "epoch": 0.984398590840463, + "kl_loss": 0.19057923555374146, + "loss_ib": 0.004155898001044989, + "step": 3423 + }, + { + "ce_ib": 3.7952873706817627, + "ce_orig": 0.8925336003303528, + "epoch": 0.984398590840463, + "kl_loss": 0.21539895236492157, + "loss_ib": 0.005949276499450207, + "step": 3423 + }, + { + "ce_ib": 3.6343276500701904, + "ce_orig": 0.45468369126319885, + "epoch": 0.984398590840463, + "kl_loss": 0.15937767922878265, + "loss_ib": 0.005228104535490274, + "step": 3423 + }, + { + "ce_ib": 3.5942745208740234, + "ce_orig": 0.46511614322662354, + "epoch": 0.984398590840463, + "kl_loss": 0.3047160506248474, + "loss_ib": 0.006641434971243143, + "step": 3423 + }, + { + "ce_ib": 4.867744445800781, + "ce_orig": 1.3110123872756958, + "epoch": 0.9846861744194406, + "kl_loss": 0.18775075674057007, + "loss_ib": 0.006745251826941967, + "step": 3424 + }, + { + "ce_ib": 3.823261022567749, + "ce_orig": 0.5520159602165222, + "epoch": 0.9846861744194406, + "kl_loss": 0.16684623062610626, + "loss_ib": 0.00549172330647707, + "step": 3424 + }, + { + "ce_ib": 4.069196701049805, + "ce_orig": 1.0530626773834229, + "epoch": 0.9846861744194406, + "kl_loss": 0.24838632345199585, + "loss_ib": 0.006553059443831444, + "step": 3424 + }, + { + "ce_ib": 7.070786476135254, + "ce_orig": 0.8900722861289978, + "epoch": 0.9846861744194406, + "kl_loss": 0.21244072914123535, + "loss_ib": 0.009195193648338318, + "step": 3424 + }, + { + "epoch": 0.9849737579984182, + "grad_norm": 0.13362911343574524, + "learning_rate": 7.851613184674821e-06, + "loss": 0.8601, + "step": 3425 + }, + { + "ce_ib": 3.959319829940796, + "ce_orig": 0.9734428524971008, + "epoch": 0.9849737579984182, + "kl_loss": 0.24427232146263123, + "loss_ib": 0.006402043160051107, + "step": 3425 + }, + { + "ce_ib": 5.283565998077393, + "ce_orig": 1.2763714790344238, + "epoch": 0.9849737579984182, + "kl_loss": 0.1873166263103485, + "loss_ib": 0.007156732026487589, + "step": 3425 + }, + { + "ce_ib": 5.565418243408203, + "ce_orig": 1.3298746347427368, + "epoch": 0.9849737579984182, + "kl_loss": 0.24601656198501587, + "loss_ib": 0.008025583811104298, + "step": 3425 + }, + { + "ce_ib": 2.2490646839141846, + "ce_orig": 0.5303550958633423, + "epoch": 0.9849737579984182, + "kl_loss": 0.25229066610336304, + "loss_ib": 0.004771971143782139, + "step": 3425 + }, + { + "ce_ib": 7.459403038024902, + "ce_orig": 1.9408222436904907, + "epoch": 0.985261341577396, + "kl_loss": 0.19859284162521362, + "loss_ib": 0.009445331059396267, + "step": 3426 + }, + { + "ce_ib": 3.959585428237915, + "ce_orig": 0.873038649559021, + "epoch": 0.985261341577396, + "kl_loss": 0.174845889210701, + "loss_ib": 0.005708043929189444, + "step": 3426 + }, + { + "ce_ib": 2.474936008453369, + "ce_orig": 0.6404684782028198, + "epoch": 0.985261341577396, + "kl_loss": 0.1498228907585144, + "loss_ib": 0.003973165061324835, + "step": 3426 + }, + { + "ce_ib": 3.894033908843994, + "ce_orig": 0.7103426456451416, + "epoch": 0.985261341577396, + "kl_loss": 0.36888352036476135, + "loss_ib": 0.007582869380712509, + "step": 3426 + }, + { + "ce_ib": 3.5785205364227295, + "ce_orig": 0.9433950781822205, + "epoch": 0.9855489251563736, + "kl_loss": 0.24037708342075348, + "loss_ib": 0.005982291419059038, + "step": 3427 + }, + { + "ce_ib": 4.283274173736572, + "ce_orig": 1.0321413278579712, + "epoch": 0.9855489251563736, + "kl_loss": 0.15006671845912933, + "loss_ib": 0.005783941131085157, + "step": 3427 + }, + { + "ce_ib": 2.165343999862671, + "ce_orig": 0.4839995205402374, + "epoch": 0.9855489251563736, + "kl_loss": 0.18053556978702545, + "loss_ib": 0.003970699850469828, + "step": 3427 + }, + { + "ce_ib": 3.0618183612823486, + "ce_orig": 0.6312494277954102, + "epoch": 0.9855489251563736, + "kl_loss": 0.29033780097961426, + "loss_ib": 0.005965196527540684, + "step": 3427 + }, + { + "ce_ib": 4.720545768737793, + "ce_orig": 0.8039481043815613, + "epoch": 0.9858365087353512, + "kl_loss": 0.1694232076406479, + "loss_ib": 0.006414777599275112, + "step": 3428 + }, + { + "ce_ib": 2.763080596923828, + "ce_orig": 0.4906012713909149, + "epoch": 0.9858365087353512, + "kl_loss": 0.18354284763336182, + "loss_ib": 0.004598509054630995, + "step": 3428 + }, + { + "ce_ib": 5.389105796813965, + "ce_orig": 1.2411772012710571, + "epoch": 0.9858365087353512, + "kl_loss": 0.16058723628520966, + "loss_ib": 0.006994978059083223, + "step": 3428 + }, + { + "ce_ib": 9.738515853881836, + "ce_orig": 1.9260482788085938, + "epoch": 0.9858365087353512, + "kl_loss": 0.21591489017009735, + "loss_ib": 0.011897665448486805, + "step": 3428 + }, + { + "ce_ib": 5.421628475189209, + "ce_orig": 0.7417599558830261, + "epoch": 0.9861240923143288, + "kl_loss": 0.2413271814584732, + "loss_ib": 0.007834900170564651, + "step": 3429 + }, + { + "ce_ib": 2.9121289253234863, + "ce_orig": 0.8004123568534851, + "epoch": 0.9861240923143288, + "kl_loss": 0.14072248339653015, + "loss_ib": 0.004319353494793177, + "step": 3429 + }, + { + "ce_ib": 3.582531452178955, + "ce_orig": 0.9690291881561279, + "epoch": 0.9861240923143288, + "kl_loss": 0.2247290015220642, + "loss_ib": 0.005829821806401014, + "step": 3429 + }, + { + "ce_ib": 3.869741916656494, + "ce_orig": 0.6451637148857117, + "epoch": 0.9861240923143288, + "kl_loss": 0.1829209327697754, + "loss_ib": 0.005698951426893473, + "step": 3429 + }, + { + "epoch": 0.9864116758933065, + "grad_norm": 0.1281895488500595, + "learning_rate": 7.84523482754906e-06, + "loss": 0.8712, + "step": 3430 + }, + { + "ce_ib": 5.201231956481934, + "ce_orig": 1.074430227279663, + "epoch": 0.9864116758933065, + "kl_loss": 0.22707390785217285, + "loss_ib": 0.007471970748156309, + "step": 3430 + }, + { + "ce_ib": 3.9298295974731445, + "ce_orig": 0.680620014667511, + "epoch": 0.9864116758933065, + "kl_loss": 0.15220913290977478, + "loss_ib": 0.005451920907944441, + "step": 3430 + }, + { + "ce_ib": 4.675142288208008, + "ce_orig": 1.1221580505371094, + "epoch": 0.9864116758933065, + "kl_loss": 0.1496671587228775, + "loss_ib": 0.006171814166009426, + "step": 3430 + }, + { + "ce_ib": 4.924769878387451, + "ce_orig": 1.2895196676254272, + "epoch": 0.9864116758933065, + "kl_loss": 0.10675053298473358, + "loss_ib": 0.005992275197058916, + "step": 3430 + }, + { + "ce_ib": 5.047325611114502, + "ce_orig": 1.396250605583191, + "epoch": 0.9866992594722841, + "kl_loss": 0.22294571995735168, + "loss_ib": 0.007276782765984535, + "step": 3431 + }, + { + "ce_ib": 4.277408599853516, + "ce_orig": 0.9603812098503113, + "epoch": 0.9866992594722841, + "kl_loss": 0.15404027700424194, + "loss_ib": 0.005817811470478773, + "step": 3431 + }, + { + "ce_ib": 3.8581697940826416, + "ce_orig": 0.9961796402931213, + "epoch": 0.9866992594722841, + "kl_loss": 0.152266725897789, + "loss_ib": 0.005380837246775627, + "step": 3431 + }, + { + "ce_ib": 3.1314592361450195, + "ce_orig": 0.871813178062439, + "epoch": 0.9866992594722841, + "kl_loss": 0.15132541954517365, + "loss_ib": 0.004644713364541531, + "step": 3431 + }, + { + "ce_ib": 3.397204875946045, + "ce_orig": 0.7433147430419922, + "epoch": 0.9869868430512617, + "kl_loss": 0.198675736784935, + "loss_ib": 0.005383962299674749, + "step": 3432 + }, + { + "ce_ib": 4.3375115394592285, + "ce_orig": 0.8831318616867065, + "epoch": 0.9869868430512617, + "kl_loss": 0.1678323745727539, + "loss_ib": 0.006015835329890251, + "step": 3432 + }, + { + "ce_ib": 6.686400413513184, + "ce_orig": 0.4095657765865326, + "epoch": 0.9869868430512617, + "kl_loss": 0.6584827899932861, + "loss_ib": 0.013271229341626167, + "step": 3432 + }, + { + "ce_ib": 4.632791519165039, + "ce_orig": 1.0032193660736084, + "epoch": 0.9869868430512617, + "kl_loss": 0.14990699291229248, + "loss_ib": 0.006131860893219709, + "step": 3432 + }, + { + "ce_ib": 3.6553525924682617, + "ce_orig": 0.7279351949691772, + "epoch": 0.9872744266302395, + "kl_loss": 0.18262223899364471, + "loss_ib": 0.005481575150042772, + "step": 3433 + }, + { + "ce_ib": 3.096954822540283, + "ce_orig": 0.6978638768196106, + "epoch": 0.9872744266302395, + "kl_loss": 0.2755298316478729, + "loss_ib": 0.0058522531762719154, + "step": 3433 + }, + { + "ce_ib": 4.5218424797058105, + "ce_orig": 1.2452096939086914, + "epoch": 0.9872744266302395, + "kl_loss": 0.1315675973892212, + "loss_ib": 0.005837518256157637, + "step": 3433 + }, + { + "ce_ib": 3.1991798877716064, + "ce_orig": 0.9928109049797058, + "epoch": 0.9872744266302395, + "kl_loss": 0.13118982315063477, + "loss_ib": 0.004511078353971243, + "step": 3433 + }, + { + "ce_ib": 4.368243217468262, + "ce_orig": 0.9354481101036072, + "epoch": 0.9875620102092171, + "kl_loss": 0.14985519647598267, + "loss_ib": 0.0058667948469519615, + "step": 3434 + }, + { + "ce_ib": 3.2699718475341797, + "ce_orig": 0.49894773960113525, + "epoch": 0.9875620102092171, + "kl_loss": 0.15097999572753906, + "loss_ib": 0.004779771901667118, + "step": 3434 + }, + { + "ce_ib": 3.821390151977539, + "ce_orig": 0.8605843782424927, + "epoch": 0.9875620102092171, + "kl_loss": 0.09251387417316437, + "loss_ib": 0.004746528808027506, + "step": 3434 + }, + { + "ce_ib": 8.170073509216309, + "ce_orig": 1.6981322765350342, + "epoch": 0.9875620102092171, + "kl_loss": 0.15220212936401367, + "loss_ib": 0.009692094288766384, + "step": 3434 + }, + { + "epoch": 0.9878495937881947, + "grad_norm": 0.15913456678390503, + "learning_rate": 7.838849615592205e-06, + "loss": 0.8746, + "step": 3435 + }, + { + "ce_ib": 4.217980861663818, + "ce_orig": 0.7539312243461609, + "epoch": 0.9878495937881947, + "kl_loss": 0.23650529980659485, + "loss_ib": 0.006583033595234156, + "step": 3435 + }, + { + "ce_ib": 4.578693389892578, + "ce_orig": 0.9591953754425049, + "epoch": 0.9878495937881947, + "kl_loss": 0.20041564106941223, + "loss_ib": 0.006582850124686956, + "step": 3435 + }, + { + "ce_ib": 2.7944319248199463, + "ce_orig": 0.806319534778595, + "epoch": 0.9878495937881947, + "kl_loss": 0.11605003476142883, + "loss_ib": 0.0039549320936203, + "step": 3435 + }, + { + "ce_ib": 4.057459354400635, + "ce_orig": 0.8706860542297363, + "epoch": 0.9878495937881947, + "kl_loss": 0.1973263919353485, + "loss_ib": 0.006030723452568054, + "step": 3435 + }, + { + "ce_ib": 4.335592269897461, + "ce_orig": 1.0314421653747559, + "epoch": 0.9881371773671723, + "kl_loss": 0.21071362495422363, + "loss_ib": 0.006442728452384472, + "step": 3436 + }, + { + "ce_ib": 4.234057903289795, + "ce_orig": 0.9200159907341003, + "epoch": 0.9881371773671723, + "kl_loss": 0.17504474520683289, + "loss_ib": 0.005984505172818899, + "step": 3436 + }, + { + "ce_ib": 1.813873291015625, + "ce_orig": 0.49205154180526733, + "epoch": 0.9881371773671723, + "kl_loss": 0.1222650408744812, + "loss_ib": 0.0030365237034857273, + "step": 3436 + }, + { + "ce_ib": 4.326531887054443, + "ce_orig": 1.1090530157089233, + "epoch": 0.9881371773671723, + "kl_loss": 0.2935987710952759, + "loss_ib": 0.007262519560754299, + "step": 3436 + }, + { + "ce_ib": 4.066534996032715, + "ce_orig": 0.9139801263809204, + "epoch": 0.98842476094615, + "kl_loss": 0.15114793181419373, + "loss_ib": 0.005578014068305492, + "step": 3437 + }, + { + "ce_ib": 2.3589062690734863, + "ce_orig": 0.7318440079689026, + "epoch": 0.98842476094615, + "kl_loss": 0.08683557063341141, + "loss_ib": 0.0032272618263959885, + "step": 3437 + }, + { + "ce_ib": 5.194920063018799, + "ce_orig": 0.70222008228302, + "epoch": 0.98842476094615, + "kl_loss": 0.1237090528011322, + "loss_ib": 0.006432010792195797, + "step": 3437 + }, + { + "ce_ib": 2.7060632705688477, + "ce_orig": 0.28738707304000854, + "epoch": 0.98842476094615, + "kl_loss": 0.18296992778778076, + "loss_ib": 0.004535762593150139, + "step": 3437 + }, + { + "ce_ib": 4.547948360443115, + "ce_orig": 0.8635962605476379, + "epoch": 0.9887123445251276, + "kl_loss": 0.20548829436302185, + "loss_ib": 0.006602831184864044, + "step": 3438 + }, + { + "ce_ib": 5.341332912445068, + "ce_orig": 1.2810200452804565, + "epoch": 0.9887123445251276, + "kl_loss": 0.2399187982082367, + "loss_ib": 0.007740521337836981, + "step": 3438 + }, + { + "ce_ib": 2.3967504501342773, + "ce_orig": 0.7540445923805237, + "epoch": 0.9887123445251276, + "kl_loss": 0.12030159682035446, + "loss_ib": 0.003599766409024596, + "step": 3438 + }, + { + "ce_ib": 3.2674050331115723, + "ce_orig": 0.45364436507225037, + "epoch": 0.9887123445251276, + "kl_loss": 0.1599595844745636, + "loss_ib": 0.004867000970989466, + "step": 3438 + }, + { + "ce_ib": 4.119136810302734, + "ce_orig": 0.6131978631019592, + "epoch": 0.9889999281041053, + "kl_loss": 0.2164367288351059, + "loss_ib": 0.006283504422754049, + "step": 3439 + }, + { + "ce_ib": 3.53912353515625, + "ce_orig": 0.5077279210090637, + "epoch": 0.9889999281041053, + "kl_loss": 0.19106553494930267, + "loss_ib": 0.005449778866022825, + "step": 3439 + }, + { + "ce_ib": 6.03710412979126, + "ce_orig": 1.5442434549331665, + "epoch": 0.9889999281041053, + "kl_loss": 0.14707544445991516, + "loss_ib": 0.007507858332246542, + "step": 3439 + }, + { + "ce_ib": 2.607732057571411, + "ce_orig": 0.7450422644615173, + "epoch": 0.9889999281041053, + "kl_loss": 0.13679882884025574, + "loss_ib": 0.003975720144808292, + "step": 3439 + }, + { + "epoch": 0.9892875116830829, + "grad_norm": 0.13898856937885284, + "learning_rate": 7.832457564187715e-06, + "loss": 0.8609, + "step": 3440 + }, + { + "ce_ib": 3.1075663566589355, + "ce_orig": 0.7921412587165833, + "epoch": 0.9892875116830829, + "kl_loss": 0.20522885024547577, + "loss_ib": 0.005159854888916016, + "step": 3440 + }, + { + "ce_ib": 3.9263219833374023, + "ce_orig": 0.9389967322349548, + "epoch": 0.9892875116830829, + "kl_loss": 0.1793370544910431, + "loss_ib": 0.005719692446291447, + "step": 3440 + }, + { + "ce_ib": 3.3058881759643555, + "ce_orig": 0.7230337858200073, + "epoch": 0.9892875116830829, + "kl_loss": 0.15588286519050598, + "loss_ib": 0.004864716902375221, + "step": 3440 + }, + { + "ce_ib": 5.285728454589844, + "ce_orig": 1.1162158250808716, + "epoch": 0.9892875116830829, + "kl_loss": 0.26405081152915955, + "loss_ib": 0.007926235906779766, + "step": 3440 + }, + { + "ce_ib": 1.8290753364562988, + "ce_orig": 0.25276410579681396, + "epoch": 0.9895750952620606, + "kl_loss": 0.17504413425922394, + "loss_ib": 0.003579516662284732, + "step": 3441 + }, + { + "ce_ib": 3.2892091274261475, + "ce_orig": 0.6130251884460449, + "epoch": 0.9895750952620606, + "kl_loss": 0.1433694064617157, + "loss_ib": 0.004722903482615948, + "step": 3441 + }, + { + "ce_ib": 3.10075044631958, + "ce_orig": 0.5951710939407349, + "epoch": 0.9895750952620606, + "kl_loss": 0.17030732333660126, + "loss_ib": 0.00480382377281785, + "step": 3441 + }, + { + "ce_ib": 3.3187947273254395, + "ce_orig": 0.7678763270378113, + "epoch": 0.9895750952620606, + "kl_loss": 0.10151606798171997, + "loss_ib": 0.004333955235779285, + "step": 3441 + }, + { + "ce_ib": 5.563756465911865, + "ce_orig": 1.1164023876190186, + "epoch": 0.9898626788410382, + "kl_loss": 0.219301238656044, + "loss_ib": 0.007756768725812435, + "step": 3442 + }, + { + "ce_ib": 6.084249973297119, + "ce_orig": 1.3264037370681763, + "epoch": 0.9898626788410382, + "kl_loss": 0.10884329676628113, + "loss_ib": 0.0071726832538843155, + "step": 3442 + }, + { + "ce_ib": 3.6121273040771484, + "ce_orig": 0.6331543326377869, + "epoch": 0.9898626788410382, + "kl_loss": 0.15937454998493195, + "loss_ib": 0.005205872468650341, + "step": 3442 + }, + { + "ce_ib": 2.7369942665100098, + "ce_orig": 0.6320769786834717, + "epoch": 0.9898626788410382, + "kl_loss": 0.17476287484169006, + "loss_ib": 0.004484623204916716, + "step": 3442 + }, + { + "ce_ib": 3.8348844051361084, + "ce_orig": 0.8182182312011719, + "epoch": 0.9901502624200158, + "kl_loss": 0.16248485445976257, + "loss_ib": 0.0054597328417003155, + "step": 3443 + }, + { + "ce_ib": 4.710428237915039, + "ce_orig": 1.039255976676941, + "epoch": 0.9901502624200158, + "kl_loss": 0.19095289707183838, + "loss_ib": 0.0066199577413499355, + "step": 3443 + }, + { + "ce_ib": 4.289885520935059, + "ce_orig": 1.242127537727356, + "epoch": 0.9901502624200158, + "kl_loss": 0.17752906680107117, + "loss_ib": 0.006065175868570805, + "step": 3443 + }, + { + "ce_ib": 3.8698458671569824, + "ce_orig": 0.8930543661117554, + "epoch": 0.9901502624200158, + "kl_loss": 0.14210911095142365, + "loss_ib": 0.005290936678647995, + "step": 3443 + }, + { + "ce_ib": 5.814810276031494, + "ce_orig": 1.422218918800354, + "epoch": 0.9904378459989934, + "kl_loss": 0.24918290972709656, + "loss_ib": 0.008306639268994331, + "step": 3444 + }, + { + "ce_ib": 5.187445163726807, + "ce_orig": 0.9302061796188354, + "epoch": 0.9904378459989934, + "kl_loss": 0.14585013687610626, + "loss_ib": 0.0066459462977945805, + "step": 3444 + }, + { + "ce_ib": 8.313593864440918, + "ce_orig": 2.0146496295928955, + "epoch": 0.9904378459989934, + "kl_loss": 0.15409588813781738, + "loss_ib": 0.009854552336037159, + "step": 3444 + }, + { + "ce_ib": 2.8958535194396973, + "ce_orig": 0.6822757124900818, + "epoch": 0.9904378459989934, + "kl_loss": 0.16490834951400757, + "loss_ib": 0.004544937051832676, + "step": 3444 + }, + { + "epoch": 0.990725429577971, + "grad_norm": 0.1467466950416565, + "learning_rate": 7.826058688735527e-06, + "loss": 0.8829, + "step": 3445 + }, + { + "ce_ib": 3.8435325622558594, + "ce_orig": 0.9026134014129639, + "epoch": 0.990725429577971, + "kl_loss": 0.12494276463985443, + "loss_ib": 0.005092959851026535, + "step": 3445 + }, + { + "ce_ib": 2.197558879852295, + "ce_orig": 0.5451766848564148, + "epoch": 0.990725429577971, + "kl_loss": 0.2003113329410553, + "loss_ib": 0.004200672265142202, + "step": 3445 + }, + { + "ce_ib": 5.185354232788086, + "ce_orig": 1.4010822772979736, + "epoch": 0.990725429577971, + "kl_loss": 0.20386537909507751, + "loss_ib": 0.0072240075096488, + "step": 3445 + }, + { + "ce_ib": 6.2193145751953125, + "ce_orig": 1.249549150466919, + "epoch": 0.990725429577971, + "kl_loss": 0.17371061444282532, + "loss_ib": 0.007956421002745628, + "step": 3445 + }, + { + "ce_ib": 3.2630414962768555, + "ce_orig": 0.8684042692184448, + "epoch": 0.9910130131569488, + "kl_loss": 0.12668652832508087, + "loss_ib": 0.004529906902462244, + "step": 3446 + }, + { + "ce_ib": 4.9632697105407715, + "ce_orig": 0.9052724838256836, + "epoch": 0.9910130131569488, + "kl_loss": 0.24909010529518127, + "loss_ib": 0.007454170845448971, + "step": 3446 + }, + { + "ce_ib": 1.8064441680908203, + "ce_orig": 0.5324355363845825, + "epoch": 0.9910130131569488, + "kl_loss": 0.11364492028951645, + "loss_ib": 0.002942893421277404, + "step": 3446 + }, + { + "ce_ib": 4.3204874992370605, + "ce_orig": 1.4047967195510864, + "epoch": 0.9910130131569488, + "kl_loss": 0.2549469470977783, + "loss_ib": 0.0068699573166668415, + "step": 3446 + }, + { + "ce_ib": 1.5152736902236938, + "ce_orig": 0.3911103308200836, + "epoch": 0.9913005967359264, + "kl_loss": 0.11341501772403717, + "loss_ib": 0.0026494241319596767, + "step": 3447 + }, + { + "ce_ib": 4.155624866485596, + "ce_orig": 0.6201159954071045, + "epoch": 0.9913005967359264, + "kl_loss": 0.11841088533401489, + "loss_ib": 0.005339733324944973, + "step": 3447 + }, + { + "ce_ib": 5.389877796173096, + "ce_orig": 0.9210597276687622, + "epoch": 0.9913005967359264, + "kl_loss": 0.18263858556747437, + "loss_ib": 0.0072162640281021595, + "step": 3447 + }, + { + "ce_ib": 2.681873321533203, + "ce_orig": 0.6592261791229248, + "epoch": 0.9913005967359264, + "kl_loss": 0.2475636601448059, + "loss_ib": 0.005157509818673134, + "step": 3447 + }, + { + "ce_ib": 1.8794137239456177, + "ce_orig": 0.4702886641025543, + "epoch": 0.991588180314904, + "kl_loss": 0.1565881371498108, + "loss_ib": 0.0034452949184924364, + "step": 3448 + }, + { + "ce_ib": 3.314703941345215, + "ce_orig": 0.6801603436470032, + "epoch": 0.991588180314904, + "kl_loss": 0.13611814379692078, + "loss_ib": 0.004675885196775198, + "step": 3448 + }, + { + "ce_ib": 4.02594518661499, + "ce_orig": 0.9674837589263916, + "epoch": 0.991588180314904, + "kl_loss": 0.13922417163848877, + "loss_ib": 0.005418187472969294, + "step": 3448 + }, + { + "ce_ib": 6.642727851867676, + "ce_orig": 1.493666648864746, + "epoch": 0.991588180314904, + "kl_loss": 0.18198667466640472, + "loss_ib": 0.008462594822049141, + "step": 3448 + }, + { + "ce_ib": 5.262790203094482, + "ce_orig": 1.323262095451355, + "epoch": 0.9918757638938817, + "kl_loss": 0.24536873400211334, + "loss_ib": 0.007716477382928133, + "step": 3449 + }, + { + "ce_ib": 3.597590923309326, + "ce_orig": 0.9030640721321106, + "epoch": 0.9918757638938817, + "kl_loss": 0.26312556862831116, + "loss_ib": 0.0062288460321724415, + "step": 3449 + }, + { + "ce_ib": 5.998754978179932, + "ce_orig": 1.4180246591567993, + "epoch": 0.9918757638938817, + "kl_loss": 0.13515712320804596, + "loss_ib": 0.007350326050072908, + "step": 3449 + }, + { + "ce_ib": 1.821897029876709, + "ce_orig": 0.4635109305381775, + "epoch": 0.9918757638938817, + "kl_loss": 0.1647477149963379, + "loss_ib": 0.003469374030828476, + "step": 3449 + }, + { + "epoch": 0.9921633474728593, + "grad_norm": 0.13037680089473724, + "learning_rate": 7.819653004652016e-06, + "loss": 0.8726, + "step": 3450 + }, + { + "ce_ib": 6.536886692047119, + "ce_orig": 0.9013069272041321, + "epoch": 0.9921633474728593, + "kl_loss": 0.18917742371559143, + "loss_ib": 0.008428660221397877, + "step": 3450 + }, + { + "ce_ib": 2.731541872024536, + "ce_orig": 0.8654618263244629, + "epoch": 0.9921633474728593, + "kl_loss": 0.10865333676338196, + "loss_ib": 0.0038180751726031303, + "step": 3450 + }, + { + "ce_ib": 7.2074971199035645, + "ce_orig": 1.6236926317214966, + "epoch": 0.9921633474728593, + "kl_loss": 0.1387084424495697, + "loss_ib": 0.008594581857323647, + "step": 3450 + }, + { + "ce_ib": 4.272385597229004, + "ce_orig": 0.99873948097229, + "epoch": 0.9921633474728593, + "kl_loss": 0.20594806969165802, + "loss_ib": 0.006331866141408682, + "step": 3450 + }, + { + "ce_ib": 2.0292980670928955, + "ce_orig": 0.5533305406570435, + "epoch": 0.9924509310518369, + "kl_loss": 0.2869671583175659, + "loss_ib": 0.004898969549685717, + "step": 3451 + }, + { + "ce_ib": 3.377380609512329, + "ce_orig": 0.8912271857261658, + "epoch": 0.9924509310518369, + "kl_loss": 0.12317998707294464, + "loss_ib": 0.004609180614352226, + "step": 3451 + }, + { + "ce_ib": 2.3013720512390137, + "ce_orig": 0.4659023880958557, + "epoch": 0.9924509310518369, + "kl_loss": 0.1551608145236969, + "loss_ib": 0.0038529799785465, + "step": 3451 + }, + { + "ce_ib": 1.6815961599349976, + "ce_orig": 0.556525707244873, + "epoch": 0.9924509310518369, + "kl_loss": 0.11572620272636414, + "loss_ib": 0.0028388584032654762, + "step": 3451 + }, + { + "ce_ib": 3.4494431018829346, + "ce_orig": 1.033869981765747, + "epoch": 0.9927385146308145, + "kl_loss": 0.1327240765094757, + "loss_ib": 0.0047766841016709805, + "step": 3452 + }, + { + "ce_ib": 3.3012754917144775, + "ce_orig": 0.7601868510246277, + "epoch": 0.9927385146308145, + "kl_loss": 0.2058526575565338, + "loss_ib": 0.005359801929444075, + "step": 3452 + }, + { + "ce_ib": 3.0463452339172363, + "ce_orig": 0.625864565372467, + "epoch": 0.9927385146308145, + "kl_loss": 0.19520063698291779, + "loss_ib": 0.004998351912945509, + "step": 3452 + }, + { + "ce_ib": 3.8183600902557373, + "ce_orig": 0.869379997253418, + "epoch": 0.9927385146308145, + "kl_loss": 0.17697370052337646, + "loss_ib": 0.005588097497820854, + "step": 3452 + }, + { + "ce_ib": 5.333684921264648, + "ce_orig": 1.0928280353546143, + "epoch": 0.9930260982097923, + "kl_loss": 0.1588953733444214, + "loss_ib": 0.006922638975083828, + "step": 3453 + }, + { + "ce_ib": 3.171299457550049, + "ce_orig": 0.872671902179718, + "epoch": 0.9930260982097923, + "kl_loss": 0.16126951575279236, + "loss_ib": 0.004783994518220425, + "step": 3453 + }, + { + "ce_ib": 4.88795804977417, + "ce_orig": 0.8521097302436829, + "epoch": 0.9930260982097923, + "kl_loss": 0.22395113110542297, + "loss_ib": 0.007127469405531883, + "step": 3453 + }, + { + "ce_ib": 3.8633527755737305, + "ce_orig": 0.9919858574867249, + "epoch": 0.9930260982097923, + "kl_loss": 0.15233144164085388, + "loss_ib": 0.00538666732609272, + "step": 3453 + }, + { + "ce_ib": 3.596989631652832, + "ce_orig": 0.877449095249176, + "epoch": 0.9933136817887699, + "kl_loss": 0.1657228171825409, + "loss_ib": 0.005254217889159918, + "step": 3454 + }, + { + "ce_ib": 3.9303176403045654, + "ce_orig": 1.011705756187439, + "epoch": 0.9933136817887699, + "kl_loss": 0.1373642086982727, + "loss_ib": 0.0053039598278701305, + "step": 3454 + }, + { + "ce_ib": 3.699378728866577, + "ce_orig": 0.847196638584137, + "epoch": 0.9933136817887699, + "kl_loss": 0.17876744270324707, + "loss_ib": 0.0054870531894266605, + "step": 3454 + }, + { + "ce_ib": 2.503683090209961, + "ce_orig": 0.4780525267124176, + "epoch": 0.9933136817887699, + "kl_loss": 0.23386429250240326, + "loss_ib": 0.004842326045036316, + "step": 3454 + }, + { + "epoch": 0.9936012653677475, + "grad_norm": 0.12748871743679047, + "learning_rate": 7.813240527369958e-06, + "loss": 0.8746, + "step": 3455 + }, + { + "ce_ib": 1.4465570449829102, + "ce_orig": 0.4469841420650482, + "epoch": 0.9936012653677475, + "kl_loss": 0.16175779700279236, + "loss_ib": 0.003064134856685996, + "step": 3455 + }, + { + "ce_ib": 4.669737815856934, + "ce_orig": 1.3489315509796143, + "epoch": 0.9936012653677475, + "kl_loss": 0.26434648036956787, + "loss_ib": 0.007313202600926161, + "step": 3455 + }, + { + "ce_ib": 5.191930294036865, + "ce_orig": 1.3877254724502563, + "epoch": 0.9936012653677475, + "kl_loss": 0.12933751940727234, + "loss_ib": 0.006485305726528168, + "step": 3455 + }, + { + "ce_ib": 5.682889461517334, + "ce_orig": 1.2969274520874023, + "epoch": 0.9936012653677475, + "kl_loss": 0.193734809756279, + "loss_ib": 0.007620237767696381, + "step": 3455 + }, + { + "ce_ib": 2.6403400897979736, + "ce_orig": 0.7688305974006653, + "epoch": 0.9938888489467251, + "kl_loss": 0.13412922620773315, + "loss_ib": 0.0039816321805119514, + "step": 3456 + }, + { + "ce_ib": 1.9652568101882935, + "ce_orig": 0.5389902591705322, + "epoch": 0.9938888489467251, + "kl_loss": 0.19882071018218994, + "loss_ib": 0.003953463863581419, + "step": 3456 + }, + { + "ce_ib": 2.797337532043457, + "ce_orig": 0.5409803986549377, + "epoch": 0.9938888489467251, + "kl_loss": 0.21415463089942932, + "loss_ib": 0.0049388837069272995, + "step": 3456 + }, + { + "ce_ib": 2.7681522369384766, + "ce_orig": 0.6485786437988281, + "epoch": 0.9938888489467251, + "kl_loss": 0.1612543910741806, + "loss_ib": 0.004380696453154087, + "step": 3456 + }, + { + "ce_ib": 5.937419414520264, + "ce_orig": 1.5775456428527832, + "epoch": 0.9941764325257028, + "kl_loss": 0.15488120913505554, + "loss_ib": 0.007486231159418821, + "step": 3457 + }, + { + "ce_ib": 4.29871129989624, + "ce_orig": 0.9876276850700378, + "epoch": 0.9941764325257028, + "kl_loss": 0.20262327790260315, + "loss_ib": 0.00632494455203414, + "step": 3457 + }, + { + "ce_ib": 5.264851093292236, + "ce_orig": 1.4519188404083252, + "epoch": 0.9941764325257028, + "kl_loss": 0.21108362078666687, + "loss_ib": 0.00737568736076355, + "step": 3457 + }, + { + "ce_ib": 3.193880081176758, + "ce_orig": 0.5552566051483154, + "epoch": 0.9941764325257028, + "kl_loss": 0.27337658405303955, + "loss_ib": 0.005927646066993475, + "step": 3457 + }, + { + "ce_ib": 5.453924179077148, + "ce_orig": 1.3202298879623413, + "epoch": 0.9944640161046804, + "kl_loss": 0.2146538496017456, + "loss_ib": 0.00760046299546957, + "step": 3458 + }, + { + "ce_ib": 2.7229292392730713, + "ce_orig": 0.8413715362548828, + "epoch": 0.9944640161046804, + "kl_loss": 0.1136932447552681, + "loss_ib": 0.0038598617538809776, + "step": 3458 + }, + { + "ce_ib": 2.979402542114258, + "ce_orig": 0.5967450737953186, + "epoch": 0.9944640161046804, + "kl_loss": 0.10229052603244781, + "loss_ib": 0.004002307541668415, + "step": 3458 + }, + { + "ce_ib": 5.026146411895752, + "ce_orig": 0.9255256056785583, + "epoch": 0.9944640161046804, + "kl_loss": 0.15148408710956573, + "loss_ib": 0.006540987174957991, + "step": 3458 + }, + { + "ce_ib": 3.5963733196258545, + "ce_orig": 0.4438382685184479, + "epoch": 0.9947515996836581, + "kl_loss": 0.17698396742343903, + "loss_ib": 0.005366213154047728, + "step": 3459 + }, + { + "ce_ib": 4.1827216148376465, + "ce_orig": 1.030991792678833, + "epoch": 0.9947515996836581, + "kl_loss": 0.16807374358177185, + "loss_ib": 0.005863458849489689, + "step": 3459 + }, + { + "ce_ib": 2.0660173892974854, + "ce_orig": 0.587500274181366, + "epoch": 0.9947515996836581, + "kl_loss": 0.15890203416347504, + "loss_ib": 0.0036550378426909447, + "step": 3459 + }, + { + "ce_ib": 4.384653091430664, + "ce_orig": 1.179965615272522, + "epoch": 0.9947515996836581, + "kl_loss": 0.21607641875743866, + "loss_ib": 0.006545417010784149, + "step": 3459 + }, + { + "epoch": 0.9950391832626357, + "grad_norm": 0.12666022777557373, + "learning_rate": 7.806821272338504e-06, + "loss": 0.9253, + "step": 3460 + }, + { + "ce_ib": 4.113341808319092, + "ce_orig": 0.9752249121665955, + "epoch": 0.9950391832626357, + "kl_loss": 0.22252559661865234, + "loss_ib": 0.006338597275316715, + "step": 3460 + }, + { + "ce_ib": 2.850165843963623, + "ce_orig": 0.4755226671695709, + "epoch": 0.9950391832626357, + "kl_loss": 0.21279025077819824, + "loss_ib": 0.004978068172931671, + "step": 3460 + }, + { + "ce_ib": 3.7958827018737793, + "ce_orig": 1.0357553958892822, + "epoch": 0.9950391832626357, + "kl_loss": 0.11572849750518799, + "loss_ib": 0.004953167401254177, + "step": 3460 + }, + { + "ce_ib": 2.4415059089660645, + "ce_orig": 0.6742508411407471, + "epoch": 0.9950391832626357, + "kl_loss": 0.16550561785697937, + "loss_ib": 0.004096562042832375, + "step": 3460 + }, + { + "ce_ib": 4.326879024505615, + "ce_orig": 0.9998949766159058, + "epoch": 0.9953267668416134, + "kl_loss": 0.12285637855529785, + "loss_ib": 0.005555442534387112, + "step": 3461 + }, + { + "ce_ib": 4.994079113006592, + "ce_orig": 1.2230302095413208, + "epoch": 0.9953267668416134, + "kl_loss": 0.19051086902618408, + "loss_ib": 0.006899187341332436, + "step": 3461 + }, + { + "ce_ib": 3.4318859577178955, + "ce_orig": 0.7996953725814819, + "epoch": 0.9953267668416134, + "kl_loss": 0.23721951246261597, + "loss_ib": 0.005804080981761217, + "step": 3461 + }, + { + "ce_ib": 5.629469394683838, + "ce_orig": 1.4414242506027222, + "epoch": 0.9953267668416134, + "kl_loss": 0.19592253863811493, + "loss_ib": 0.007588695269078016, + "step": 3461 + }, + { + "ce_ib": 2.4543001651763916, + "ce_orig": 0.5183411240577698, + "epoch": 0.995614350420591, + "kl_loss": 0.14442607760429382, + "loss_ib": 0.0038985610008239746, + "step": 3462 + }, + { + "ce_ib": 2.463240146636963, + "ce_orig": 0.7511072754859924, + "epoch": 0.995614350420591, + "kl_loss": 0.13800132274627686, + "loss_ib": 0.003843253245577216, + "step": 3462 + }, + { + "ce_ib": 2.4005744457244873, + "ce_orig": 0.6667129993438721, + "epoch": 0.995614350420591, + "kl_loss": 0.12865601480007172, + "loss_ib": 0.003687134711071849, + "step": 3462 + }, + { + "ce_ib": 3.992079496383667, + "ce_orig": 0.9901688694953918, + "epoch": 0.995614350420591, + "kl_loss": 0.22179734706878662, + "loss_ib": 0.006210052873939276, + "step": 3462 + }, + { + "ce_ib": 3.774428129196167, + "ce_orig": 1.0067039728164673, + "epoch": 0.9959019339995686, + "kl_loss": 0.15604323148727417, + "loss_ib": 0.0053348601795732975, + "step": 3463 + }, + { + "ce_ib": 2.6253957748413086, + "ce_orig": 0.7201523184776306, + "epoch": 0.9959019339995686, + "kl_loss": 0.17666539549827576, + "loss_ib": 0.004392049740999937, + "step": 3463 + }, + { + "ce_ib": 4.269009590148926, + "ce_orig": 0.8085706233978271, + "epoch": 0.9959019339995686, + "kl_loss": 0.16591572761535645, + "loss_ib": 0.005928167141973972, + "step": 3463 + }, + { + "ce_ib": 3.13584041595459, + "ce_orig": 0.5527510046958923, + "epoch": 0.9959019339995686, + "kl_loss": 0.268107533454895, + "loss_ib": 0.00581691600382328, + "step": 3463 + }, + { + "ce_ib": 2.8763444423675537, + "ce_orig": 0.5401973128318787, + "epoch": 0.9961895175785462, + "kl_loss": 0.11613184213638306, + "loss_ib": 0.004037662874907255, + "step": 3464 + }, + { + "ce_ib": 3.116708993911743, + "ce_orig": 0.6132296323776245, + "epoch": 0.9961895175785462, + "kl_loss": 0.2286818027496338, + "loss_ib": 0.005403527058660984, + "step": 3464 + }, + { + "ce_ib": 4.486922740936279, + "ce_orig": 0.9912914037704468, + "epoch": 0.9961895175785462, + "kl_loss": 0.19478023052215576, + "loss_ib": 0.006434725597500801, + "step": 3464 + }, + { + "ce_ib": 5.939351558685303, + "ce_orig": 1.3783941268920898, + "epoch": 0.9961895175785462, + "kl_loss": 0.13446007668972015, + "loss_ib": 0.0072839525528252125, + "step": 3464 + }, + { + "epoch": 0.9964771011575239, + "grad_norm": 0.14151056110858917, + "learning_rate": 7.800395255023128e-06, + "loss": 0.8322, + "step": 3465 + }, + { + "ce_ib": 6.5561394691467285, + "ce_orig": 0.7792337536811829, + "epoch": 0.9964771011575239, + "kl_loss": 0.3103591799736023, + "loss_ib": 0.009659730829298496, + "step": 3465 + }, + { + "ce_ib": 5.302000999450684, + "ce_orig": 1.2608247995376587, + "epoch": 0.9964771011575239, + "kl_loss": 0.13942871987819672, + "loss_ib": 0.006696288473904133, + "step": 3465 + }, + { + "ce_ib": 7.383823871612549, + "ce_orig": 1.9292733669281006, + "epoch": 0.9964771011575239, + "kl_loss": 0.17299148440361023, + "loss_ib": 0.009113738313317299, + "step": 3465 + }, + { + "ce_ib": 2.3026130199432373, + "ce_orig": 0.23464596271514893, + "epoch": 0.9964771011575239, + "kl_loss": 0.16129623353481293, + "loss_ib": 0.003915575332939625, + "step": 3465 + }, + { + "ce_ib": 3.8605587482452393, + "ce_orig": 0.8774089217185974, + "epoch": 0.9967646847365016, + "kl_loss": 0.22623510658740997, + "loss_ib": 0.006122909486293793, + "step": 3466 + }, + { + "ce_ib": 2.674401044845581, + "ce_orig": 0.7107449173927307, + "epoch": 0.9967646847365016, + "kl_loss": 0.21000707149505615, + "loss_ib": 0.004774471744894981, + "step": 3466 + }, + { + "ce_ib": 6.212036609649658, + "ce_orig": 1.582992672920227, + "epoch": 0.9967646847365016, + "kl_loss": 0.14351125061511993, + "loss_ib": 0.007647148799151182, + "step": 3466 + }, + { + "ce_ib": 3.0927517414093018, + "ce_orig": 0.8043943047523499, + "epoch": 0.9967646847365016, + "kl_loss": 0.12014228105545044, + "loss_ib": 0.00429417472332716, + "step": 3466 + }, + { + "ce_ib": 3.42596173286438, + "ce_orig": 0.823205828666687, + "epoch": 0.9970522683154792, + "kl_loss": 0.24498970806598663, + "loss_ib": 0.005875858478248119, + "step": 3467 + }, + { + "ce_ib": 2.0546090602874756, + "ce_orig": 0.5143095850944519, + "epoch": 0.9970522683154792, + "kl_loss": 0.1831388771533966, + "loss_ib": 0.0038859976921230555, + "step": 3467 + }, + { + "ce_ib": 4.691566467285156, + "ce_orig": 0.9886443018913269, + "epoch": 0.9970522683154792, + "kl_loss": 0.24875423312187195, + "loss_ib": 0.007179108913987875, + "step": 3467 + }, + { + "ce_ib": 4.087890625, + "ce_orig": 0.8862422704696655, + "epoch": 0.9970522683154792, + "kl_loss": 0.1820201426744461, + "loss_ib": 0.005908092018216848, + "step": 3467 + }, + { + "ce_ib": 2.628171920776367, + "ce_orig": 0.7336425185203552, + "epoch": 0.9973398518944568, + "kl_loss": 0.14292946457862854, + "loss_ib": 0.004057466518133879, + "step": 3468 + }, + { + "ce_ib": 1.9774768352508545, + "ce_orig": 0.610469400882721, + "epoch": 0.9973398518944568, + "kl_loss": 0.09289410710334778, + "loss_ib": 0.0029064177069813013, + "step": 3468 + }, + { + "ce_ib": 3.7653801441192627, + "ce_orig": 0.8011384606361389, + "epoch": 0.9973398518944568, + "kl_loss": 0.13268481194972992, + "loss_ib": 0.005092227831482887, + "step": 3468 + }, + { + "ce_ib": 4.081719398498535, + "ce_orig": 0.9630115032196045, + "epoch": 0.9973398518944568, + "kl_loss": 0.21947702765464783, + "loss_ib": 0.006276489235460758, + "step": 3468 + }, + { + "ce_ib": 6.43645715713501, + "ce_orig": 1.5939087867736816, + "epoch": 0.9976274354734345, + "kl_loss": 0.19047951698303223, + "loss_ib": 0.008341251872479916, + "step": 3469 + }, + { + "ce_ib": 2.3696601390838623, + "ce_orig": 0.6022066473960876, + "epoch": 0.9976274354734345, + "kl_loss": 0.1435680091381073, + "loss_ib": 0.0038053400348871946, + "step": 3469 + }, + { + "ce_ib": 7.459146499633789, + "ce_orig": 1.7246568202972412, + "epoch": 0.9976274354734345, + "kl_loss": 0.1759602427482605, + "loss_ib": 0.009218748658895493, + "step": 3469 + }, + { + "ce_ib": 1.921897292137146, + "ce_orig": 0.7022322416305542, + "epoch": 0.9976274354734345, + "kl_loss": 0.07831156253814697, + "loss_ib": 0.002705013146623969, + "step": 3469 + }, + { + "epoch": 0.9979150190524121, + "grad_norm": 0.15020635724067688, + "learning_rate": 7.793962490905598e-06, + "loss": 0.8746, + "step": 3470 + }, + { + "ce_ib": 3.6671571731567383, + "ce_orig": 0.49304065108299255, + "epoch": 0.9979150190524121, + "kl_loss": 0.20855364203453064, + "loss_ib": 0.005752693396061659, + "step": 3470 + }, + { + "ce_ib": 3.9736437797546387, + "ce_orig": 1.010642170906067, + "epoch": 0.9979150190524121, + "kl_loss": 0.1092081367969513, + "loss_ib": 0.005065724719315767, + "step": 3470 + }, + { + "ce_ib": 3.8366360664367676, + "ce_orig": 0.9749507308006287, + "epoch": 0.9979150190524121, + "kl_loss": 0.17382538318634033, + "loss_ib": 0.005574890412390232, + "step": 3470 + }, + { + "ce_ib": 5.859660625457764, + "ce_orig": 1.3413589000701904, + "epoch": 0.9979150190524121, + "kl_loss": 0.163615420460701, + "loss_ib": 0.007495814468711615, + "step": 3470 + }, + { + "ce_ib": 2.1945607662200928, + "ce_orig": 0.686866283416748, + "epoch": 0.9982026026313897, + "kl_loss": 0.11930695921182632, + "loss_ib": 0.00338763021863997, + "step": 3471 + }, + { + "ce_ib": 5.78333044052124, + "ce_orig": 1.3355275392532349, + "epoch": 0.9982026026313897, + "kl_loss": 0.23626267910003662, + "loss_ib": 0.008145957253873348, + "step": 3471 + }, + { + "ce_ib": 2.8005943298339844, + "ce_orig": 0.806566596031189, + "epoch": 0.9982026026313897, + "kl_loss": 0.15048038959503174, + "loss_ib": 0.004305398091673851, + "step": 3471 + }, + { + "ce_ib": 4.827478885650635, + "ce_orig": 1.039472222328186, + "epoch": 0.9982026026313897, + "kl_loss": 0.29218918085098267, + "loss_ib": 0.00774937029927969, + "step": 3471 + }, + { + "ce_ib": 2.9184415340423584, + "ce_orig": 0.86175936460495, + "epoch": 0.9984901862103673, + "kl_loss": 0.16837841272354126, + "loss_ib": 0.004602225497364998, + "step": 3472 + }, + { + "ce_ib": 2.859739065170288, + "ce_orig": 0.6152028441429138, + "epoch": 0.9984901862103673, + "kl_loss": 0.1270846128463745, + "loss_ib": 0.004130585119128227, + "step": 3472 + }, + { + "ce_ib": 3.145430088043213, + "ce_orig": 0.8154630661010742, + "epoch": 0.9984901862103673, + "kl_loss": 0.15216520428657532, + "loss_ib": 0.004667081870138645, + "step": 3472 + }, + { + "ce_ib": 4.556081295013428, + "ce_orig": 1.085402011871338, + "epoch": 0.9984901862103673, + "kl_loss": 0.212199866771698, + "loss_ib": 0.006678079720586538, + "step": 3472 + }, + { + "ce_ib": 4.468810558319092, + "ce_orig": 1.469347596168518, + "epoch": 0.9987777697893451, + "kl_loss": 0.1255287528038025, + "loss_ib": 0.005724098067730665, + "step": 3473 + }, + { + "ce_ib": 2.887476921081543, + "ce_orig": 0.6350107789039612, + "epoch": 0.9987777697893451, + "kl_loss": 0.10351546108722687, + "loss_ib": 0.003922631498426199, + "step": 3473 + }, + { + "ce_ib": 2.2254433631896973, + "ce_orig": 0.3217471241950989, + "epoch": 0.9987777697893451, + "kl_loss": 0.23874399065971375, + "loss_ib": 0.00461288308724761, + "step": 3473 + }, + { + "ce_ib": 5.0712385177612305, + "ce_orig": 1.323119044303894, + "epoch": 0.9987777697893451, + "kl_loss": 0.16774895787239075, + "loss_ib": 0.006748728454113007, + "step": 3473 + }, + { + "ce_ib": 3.8824024200439453, + "ce_orig": 0.9220649003982544, + "epoch": 0.9990653533683227, + "kl_loss": 0.22060325741767883, + "loss_ib": 0.0060884347185492516, + "step": 3474 + }, + { + "ce_ib": 2.3531692028045654, + "ce_orig": 0.6252801418304443, + "epoch": 0.9990653533683227, + "kl_loss": 0.13665544986724854, + "loss_ib": 0.0037197237834334373, + "step": 3474 + }, + { + "ce_ib": 4.634824752807617, + "ce_orig": 0.9208062291145325, + "epoch": 0.9990653533683227, + "kl_loss": 0.21885812282562256, + "loss_ib": 0.006823406089097261, + "step": 3474 + }, + { + "ce_ib": 3.634901285171509, + "ce_orig": 0.6850876808166504, + "epoch": 0.9990653533683227, + "kl_loss": 0.18191230297088623, + "loss_ib": 0.005454023834317923, + "step": 3474 + }, + { + "epoch": 0.9993529369473003, + "grad_norm": 0.13302339613437653, + "learning_rate": 7.787522995483935e-06, + "loss": 0.8186, + "step": 3475 + }, + { + "ce_ib": 4.69920539855957, + "ce_orig": 1.0757464170455933, + "epoch": 0.9993529369473003, + "kl_loss": 0.22309571504592896, + "loss_ib": 0.006930162664502859, + "step": 3475 + }, + { + "ce_ib": 4.96550989151001, + "ce_orig": 1.0464799404144287, + "epoch": 0.9993529369473003, + "kl_loss": 0.2133435159921646, + "loss_ib": 0.007098945323377848, + "step": 3475 + }, + { + "ce_ib": 4.545261859893799, + "ce_orig": 1.1787376403808594, + "epoch": 0.9993529369473003, + "kl_loss": 0.16321563720703125, + "loss_ib": 0.0061774179339408875, + "step": 3475 + }, + { + "ce_ib": 5.281437397003174, + "ce_orig": 1.096032977104187, + "epoch": 0.9993529369473003, + "kl_loss": 0.14643113315105438, + "loss_ib": 0.006745749153196812, + "step": 3475 + }, + { + "ce_ib": 1.940627932548523, + "ce_orig": 0.5416520237922668, + "epoch": 0.9996405205262779, + "kl_loss": 0.3391854763031006, + "loss_ib": 0.0053324829787015915, + "step": 3476 + }, + { + "ce_ib": 5.286829948425293, + "ce_orig": 1.1707184314727783, + "epoch": 0.9996405205262779, + "kl_loss": 0.18222661316394806, + "loss_ib": 0.007109095808118582, + "step": 3476 + }, + { + "ce_ib": 4.143987655639648, + "ce_orig": 1.0021378993988037, + "epoch": 0.9996405205262779, + "kl_loss": 0.11702059209346771, + "loss_ib": 0.005314193200320005, + "step": 3476 + }, + { + "ce_ib": 4.3000569343566895, + "ce_orig": 0.5843205451965332, + "epoch": 0.9996405205262779, + "kl_loss": 0.17412996292114258, + "loss_ib": 0.006041356362402439, + "step": 3476 + }, + { + "ce_ib": 2.1547093391418457, + "ce_orig": 0.630151093006134, + "epoch": 0.9999281041052556, + "kl_loss": 0.1130114197731018, + "loss_ib": 0.00328482361510396, + "step": 3477 + }, + { + "ce_ib": 3.1630523204803467, + "ce_orig": 0.8977872729301453, + "epoch": 1.0, + "kl_loss": 0.13182643055915833, + "loss_ib": 0.004481316544115543, + "step": 3478 + }, + { + "ce_ib": 5.818431377410889, + "ce_orig": 1.0919588804244995, + "epoch": 1.0, + "kl_loss": 0.1187293753027916, + "loss_ib": 0.007005724590271711, + "step": 3478 + }, + { + "ce_ib": 2.0855801105499268, + "ce_orig": 0.4700067341327667, + "epoch": 1.0, + "kl_loss": 0.12993621826171875, + "loss_ib": 0.0033849424216896296, + "step": 3478 + }, + { + "ce_ib": 3.506413221359253, + "ce_orig": 0.756852924823761, + "epoch": 1.0, + "kl_loss": 0.1696353256702423, + "loss_ib": 0.005202766507863998, + "step": 3478 + }, + { + "ce_ib": 7.295567989349365, + "ce_orig": 1.6302684545516968, + "epoch": 1.0002875835789777, + "kl_loss": 0.16248719394207, + "loss_ib": 0.008920439518988132, + "step": 3479 + }, + { + "ce_ib": 3.3494558334350586, + "ce_orig": 1.1031216382980347, + "epoch": 1.0002875835789777, + "kl_loss": 0.1434016227722168, + "loss_ib": 0.004783472046256065, + "step": 3479 + }, + { + "ce_ib": 6.185150623321533, + "ce_orig": 1.0855741500854492, + "epoch": 1.0002875835789777, + "kl_loss": 0.21325618028640747, + "loss_ib": 0.00831771269440651, + "step": 3479 + }, + { + "ce_ib": 4.434020042419434, + "ce_orig": 1.1405545473098755, + "epoch": 1.0002875835789777, + "kl_loss": 0.20859144628047943, + "loss_ib": 0.0065199341624975204, + "step": 3479 + }, + { + "epoch": 1.0005751671579552, + "grad_norm": 0.13186699151992798, + "learning_rate": 7.781076784272377e-06, + "loss": 0.7822, + "step": 3480 + }, + { + "ce_ib": 3.625835657119751, + "ce_orig": 0.5929476022720337, + "epoch": 1.0005751671579552, + "kl_loss": 0.11495166271924973, + "loss_ib": 0.00477535231038928, + "step": 3480 + }, + { + "ce_ib": 2.485511541366577, + "ce_orig": 0.7004041075706482, + "epoch": 1.0005751671579552, + "kl_loss": 0.11045129597187042, + "loss_ib": 0.0035900245420634747, + "step": 3480 + }, + { + "ce_ib": 2.322873115539551, + "ce_orig": 0.5784850120544434, + "epoch": 1.0005751671579552, + "kl_loss": 0.13378319144248962, + "loss_ib": 0.00366070494055748, + "step": 3480 + }, + { + "ce_ib": 5.105598449707031, + "ce_orig": 1.2249782085418701, + "epoch": 1.0005751671579552, + "kl_loss": 0.1932123303413391, + "loss_ib": 0.007037721574306488, + "step": 3480 + }, + { + "ce_ib": 2.4824228286743164, + "ce_orig": 0.31237050890922546, + "epoch": 1.000862750736933, + "kl_loss": 0.36326590180397034, + "loss_ib": 0.00611508172005415, + "step": 3481 + }, + { + "ce_ib": 4.06863260269165, + "ce_orig": 0.8603116273880005, + "epoch": 1.000862750736933, + "kl_loss": 0.147389218211174, + "loss_ib": 0.005542525090277195, + "step": 3481 + }, + { + "ce_ib": 1.4630589485168457, + "ce_orig": 0.37195420265197754, + "epoch": 1.000862750736933, + "kl_loss": 0.12227332592010498, + "loss_ib": 0.002685792278498411, + "step": 3481 + }, + { + "ce_ib": 3.2062911987304688, + "ce_orig": 0.9018990397453308, + "epoch": 1.000862750736933, + "kl_loss": 0.1651451289653778, + "loss_ib": 0.004857742227613926, + "step": 3481 + }, + { + "ce_ib": 2.4478588104248047, + "ce_orig": 0.7610259056091309, + "epoch": 1.0011503343159105, + "kl_loss": 0.11114882677793503, + "loss_ib": 0.0035593470092862844, + "step": 3482 + }, + { + "ce_ib": 5.825453281402588, + "ce_orig": 0.9555022716522217, + "epoch": 1.0011503343159105, + "kl_loss": 0.22599023580551147, + "loss_ib": 0.008085355162620544, + "step": 3482 + }, + { + "ce_ib": 2.542078733444214, + "ce_orig": 0.7784476280212402, + "epoch": 1.0011503343159105, + "kl_loss": 0.10270215570926666, + "loss_ib": 0.0035691005177795887, + "step": 3482 + }, + { + "ce_ib": 2.9650964736938477, + "ce_orig": 0.8190074563026428, + "epoch": 1.0011503343159105, + "kl_loss": 0.16220183670520782, + "loss_ib": 0.004587114788591862, + "step": 3482 + }, + { + "ce_ib": 5.132469177246094, + "ce_orig": 1.0750268697738647, + "epoch": 1.0014379178948882, + "kl_loss": 0.23581503331661224, + "loss_ib": 0.007490620017051697, + "step": 3483 + }, + { + "ce_ib": 2.678689479827881, + "ce_orig": 0.9219682812690735, + "epoch": 1.0014379178948882, + "kl_loss": 0.13806882500648499, + "loss_ib": 0.00405937759205699, + "step": 3483 + }, + { + "ce_ib": 3.8321950435638428, + "ce_orig": 0.8164547681808472, + "epoch": 1.0014379178948882, + "kl_loss": 0.22466161847114563, + "loss_ib": 0.00607881136238575, + "step": 3483 + }, + { + "ce_ib": 2.9934937953948975, + "ce_orig": 0.7932628393173218, + "epoch": 1.0014379178948882, + "kl_loss": 0.14381693303585052, + "loss_ib": 0.004431663081049919, + "step": 3483 + }, + { + "ce_ib": 1.9486080408096313, + "ce_orig": 0.5349227786064148, + "epoch": 1.0017255014738657, + "kl_loss": 0.13324777781963348, + "loss_ib": 0.003281085751950741, + "step": 3484 + }, + { + "ce_ib": 4.370147705078125, + "ce_orig": 0.8517962694168091, + "epoch": 1.0017255014738657, + "kl_loss": 0.25617456436157227, + "loss_ib": 0.006931893527507782, + "step": 3484 + }, + { + "ce_ib": 6.328146934509277, + "ce_orig": 1.7351648807525635, + "epoch": 1.0017255014738657, + "kl_loss": 0.16969579458236694, + "loss_ib": 0.008025105111300945, + "step": 3484 + }, + { + "ce_ib": 6.359526634216309, + "ce_orig": 1.4365060329437256, + "epoch": 1.0017255014738657, + "kl_loss": 0.23070436716079712, + "loss_ib": 0.0086665702983737, + "step": 3484 + }, + { + "epoch": 1.0020130850528435, + "grad_norm": 0.12651699781417847, + "learning_rate": 7.774623872801344e-06, + "loss": 0.8761, + "step": 3485 + }, + { + "ce_ib": 3.5224177837371826, + "ce_orig": 0.6698968410491943, + "epoch": 1.0020130850528435, + "kl_loss": 0.09902495890855789, + "loss_ib": 0.004512667190283537, + "step": 3485 + }, + { + "ce_ib": 4.75855016708374, + "ce_orig": 0.6129909753799438, + "epoch": 1.0020130850528435, + "kl_loss": 0.2984992265701294, + "loss_ib": 0.007743542082607746, + "step": 3485 + }, + { + "ce_ib": 3.7922234535217285, + "ce_orig": 0.8725505471229553, + "epoch": 1.0020130850528435, + "kl_loss": 0.11907500773668289, + "loss_ib": 0.004982973448932171, + "step": 3485 + }, + { + "ce_ib": 4.8769307136535645, + "ce_orig": 1.0160753726959229, + "epoch": 1.0020130850528435, + "kl_loss": 0.16960881650447845, + "loss_ib": 0.006573019083589315, + "step": 3485 + }, + { + "ce_ib": 1.94500732421875, + "ce_orig": 0.5471546053886414, + "epoch": 1.0023006686318212, + "kl_loss": 0.15106846392154694, + "loss_ib": 0.0034556919708848, + "step": 3486 + }, + { + "ce_ib": 4.850595474243164, + "ce_orig": 1.413933277130127, + "epoch": 1.0023006686318212, + "kl_loss": 0.172381192445755, + "loss_ib": 0.006574407685548067, + "step": 3486 + }, + { + "ce_ib": 6.155509948730469, + "ce_orig": 1.5550260543823242, + "epoch": 1.0023006686318212, + "kl_loss": 0.2297593653202057, + "loss_ib": 0.00845310278236866, + "step": 3486 + }, + { + "ce_ib": 4.344824314117432, + "ce_orig": 1.0779612064361572, + "epoch": 1.0023006686318212, + "kl_loss": 0.17061690986156464, + "loss_ib": 0.006050993222743273, + "step": 3486 + }, + { + "ce_ib": 4.750270366668701, + "ce_orig": 0.8952432870864868, + "epoch": 1.0025882522107987, + "kl_loss": 0.13421449065208435, + "loss_ib": 0.006092415191233158, + "step": 3487 + }, + { + "ce_ib": 5.759513854980469, + "ce_orig": 1.0048786401748657, + "epoch": 1.0025882522107987, + "kl_loss": 0.17531238496303558, + "loss_ib": 0.007512637879699469, + "step": 3487 + }, + { + "ce_ib": 6.087596416473389, + "ce_orig": 1.3597161769866943, + "epoch": 1.0025882522107987, + "kl_loss": 0.18678018450737, + "loss_ib": 0.0079553984105587, + "step": 3487 + }, + { + "ce_ib": 5.5391693115234375, + "ce_orig": 1.0989854335784912, + "epoch": 1.0025882522107987, + "kl_loss": 0.16412527859210968, + "loss_ib": 0.0071804216131567955, + "step": 3487 + }, + { + "ce_ib": 1.5826603174209595, + "ce_orig": 0.442940354347229, + "epoch": 1.0028758357897765, + "kl_loss": 0.18180260062217712, + "loss_ib": 0.0034006861969828606, + "step": 3488 + }, + { + "ce_ib": 3.531106948852539, + "ce_orig": 0.34212449193000793, + "epoch": 1.0028758357897765, + "kl_loss": 0.18498805165290833, + "loss_ib": 0.00538098718971014, + "step": 3488 + }, + { + "ce_ib": 5.461633205413818, + "ce_orig": 1.2697652578353882, + "epoch": 1.0028758357897765, + "kl_loss": 0.15513956546783447, + "loss_ib": 0.007013028487563133, + "step": 3488 + }, + { + "ce_ib": 6.223939895629883, + "ce_orig": 1.1918922662734985, + "epoch": 1.0028758357897765, + "kl_loss": 0.17273598909378052, + "loss_ib": 0.007951299659907818, + "step": 3488 + }, + { + "ce_ib": 2.909649610519409, + "ce_orig": 0.7888212203979492, + "epoch": 1.003163419368754, + "kl_loss": 0.1474660336971283, + "loss_ib": 0.004384309984743595, + "step": 3489 + }, + { + "ce_ib": 3.9414010047912598, + "ce_orig": 0.8665730357170105, + "epoch": 1.003163419368754, + "kl_loss": 0.2722112834453583, + "loss_ib": 0.006663513835519552, + "step": 3489 + }, + { + "ce_ib": 5.010422229766846, + "ce_orig": 0.860166609287262, + "epoch": 1.003163419368754, + "kl_loss": 0.1478068232536316, + "loss_ib": 0.006488490384072065, + "step": 3489 + }, + { + "ce_ib": 1.5870436429977417, + "ce_orig": 0.34755033254623413, + "epoch": 1.003163419368754, + "kl_loss": 0.09428616613149643, + "loss_ib": 0.0025299054104834795, + "step": 3489 + }, + { + "epoch": 1.0034510029477317, + "grad_norm": 0.15467816591262817, + "learning_rate": 7.768164276617397e-06, + "loss": 0.8822, + "step": 3490 + }, + { + "ce_ib": 7.89870548248291, + "ce_orig": 1.492767572402954, + "epoch": 1.0034510029477317, + "kl_loss": 0.19756749272346497, + "loss_ib": 0.009874380193650723, + "step": 3490 + }, + { + "ce_ib": 4.685238361358643, + "ce_orig": 0.9978558421134949, + "epoch": 1.0034510029477317, + "kl_loss": 0.15783655643463135, + "loss_ib": 0.006263603921979666, + "step": 3490 + }, + { + "ce_ib": 5.177371501922607, + "ce_orig": 0.9371923208236694, + "epoch": 1.0034510029477317, + "kl_loss": 0.21675696969032288, + "loss_ib": 0.007344940677285194, + "step": 3490 + }, + { + "ce_ib": 3.2749745845794678, + "ce_orig": 0.4150541126728058, + "epoch": 1.0034510029477317, + "kl_loss": 0.15287135541439056, + "loss_ib": 0.004803687799721956, + "step": 3490 + }, + { + "ce_ib": 2.6457221508026123, + "ce_orig": 0.6731870770454407, + "epoch": 1.0037385865267092, + "kl_loss": 0.22591428458690643, + "loss_ib": 0.004904864821583033, + "step": 3491 + }, + { + "ce_ib": 3.9199717044830322, + "ce_orig": 1.0910745859146118, + "epoch": 1.0037385865267092, + "kl_loss": 0.1856561005115509, + "loss_ib": 0.005776532925665379, + "step": 3491 + }, + { + "ce_ib": 4.076353549957275, + "ce_orig": 1.3336201906204224, + "epoch": 1.0037385865267092, + "kl_loss": 0.16495181620121002, + "loss_ib": 0.00572587177157402, + "step": 3491 + }, + { + "ce_ib": 2.4820244312286377, + "ce_orig": 0.46208518743515015, + "epoch": 1.0037385865267092, + "kl_loss": 0.14289981126785278, + "loss_ib": 0.003911022562533617, + "step": 3491 + }, + { + "ce_ib": 1.9946311712265015, + "ce_orig": 0.6158233284950256, + "epoch": 1.004026170105687, + "kl_loss": 0.1352892816066742, + "loss_ib": 0.003347523743286729, + "step": 3492 + }, + { + "ce_ib": 5.302822589874268, + "ce_orig": 1.209622859954834, + "epoch": 1.004026170105687, + "kl_loss": 0.18744675815105438, + "loss_ib": 0.007177290040999651, + "step": 3492 + }, + { + "ce_ib": 4.017965316772461, + "ce_orig": 0.6675052046775818, + "epoch": 1.004026170105687, + "kl_loss": 0.2046528458595276, + "loss_ib": 0.006064493674784899, + "step": 3492 + }, + { + "ce_ib": 3.1823785305023193, + "ce_orig": 0.698852002620697, + "epoch": 1.004026170105687, + "kl_loss": 0.1617402732372284, + "loss_ib": 0.004799781367182732, + "step": 3492 + }, + { + "ce_ib": 5.301358222961426, + "ce_orig": 1.4515306949615479, + "epoch": 1.0043137536846647, + "kl_loss": 0.23749494552612305, + "loss_ib": 0.007676307577639818, + "step": 3493 + }, + { + "ce_ib": 3.8011207580566406, + "ce_orig": 0.7873406410217285, + "epoch": 1.0043137536846647, + "kl_loss": 0.17556609213352203, + "loss_ib": 0.005556781776249409, + "step": 3493 + }, + { + "ce_ib": 5.169979095458984, + "ce_orig": 1.301355242729187, + "epoch": 1.0043137536846647, + "kl_loss": 0.31832611560821533, + "loss_ib": 0.008353239856660366, + "step": 3493 + }, + { + "ce_ib": 2.8897290229797363, + "ce_orig": 0.7896963953971863, + "epoch": 1.0043137536846647, + "kl_loss": 0.11785001307725906, + "loss_ib": 0.004068228881806135, + "step": 3493 + }, + { + "ce_ib": 4.55419397354126, + "ce_orig": 1.3099943399429321, + "epoch": 1.0046013372636422, + "kl_loss": 0.21962419152259827, + "loss_ib": 0.006750435568392277, + "step": 3494 + }, + { + "ce_ib": 2.694993257522583, + "ce_orig": 0.566410481929779, + "epoch": 1.0046013372636422, + "kl_loss": 0.2940097749233246, + "loss_ib": 0.005635091103613377, + "step": 3494 + }, + { + "ce_ib": 4.1881489753723145, + "ce_orig": 1.2032076120376587, + "epoch": 1.0046013372636422, + "kl_loss": 0.14809882640838623, + "loss_ib": 0.005669136997312307, + "step": 3494 + }, + { + "ce_ib": 6.390718460083008, + "ce_orig": 1.6931493282318115, + "epoch": 1.0046013372636422, + "kl_loss": 0.1826152801513672, + "loss_ib": 0.00821687187999487, + "step": 3494 + }, + { + "epoch": 1.00488892084262, + "grad_norm": 0.14368940889835358, + "learning_rate": 7.761698011283202e-06, + "loss": 0.8905, + "step": 3495 + }, + { + "ce_ib": 4.209848880767822, + "ce_orig": 0.9591165781021118, + "epoch": 1.00488892084262, + "kl_loss": 0.21706005930900574, + "loss_ib": 0.006380449514836073, + "step": 3495 + }, + { + "ce_ib": 5.090513706207275, + "ce_orig": 1.0056202411651611, + "epoch": 1.00488892084262, + "kl_loss": 0.2178749442100525, + "loss_ib": 0.00726926326751709, + "step": 3495 + }, + { + "ce_ib": 1.895297646522522, + "ce_orig": 0.5002477765083313, + "epoch": 1.00488892084262, + "kl_loss": 0.16495177149772644, + "loss_ib": 0.003544815117493272, + "step": 3495 + }, + { + "ce_ib": 2.8698477745056152, + "ce_orig": 0.4312732517719269, + "epoch": 1.00488892084262, + "kl_loss": 0.11840376257896423, + "loss_ib": 0.004053885117173195, + "step": 3495 + }, + { + "ce_ib": 4.441551685333252, + "ce_orig": 1.2148934602737427, + "epoch": 1.0051765044215974, + "kl_loss": 0.21492572128772736, + "loss_ib": 0.00659080920740962, + "step": 3496 + }, + { + "ce_ib": 2.6679062843322754, + "ce_orig": 0.8540940880775452, + "epoch": 1.0051765044215974, + "kl_loss": 0.11938831210136414, + "loss_ib": 0.003861789358779788, + "step": 3496 + }, + { + "ce_ib": 3.226736307144165, + "ce_orig": 0.8625224232673645, + "epoch": 1.0051765044215974, + "kl_loss": 0.29670724272727966, + "loss_ib": 0.0061938087455928326, + "step": 3496 + }, + { + "ce_ib": 3.5471034049987793, + "ce_orig": 0.6420904994010925, + "epoch": 1.0051765044215974, + "kl_loss": 0.2594853639602661, + "loss_ib": 0.0061419568955898285, + "step": 3496 + }, + { + "ce_ib": 4.502467155456543, + "ce_orig": 0.8526267409324646, + "epoch": 1.0054640880005752, + "kl_loss": 0.1697007715702057, + "loss_ib": 0.006199474912136793, + "step": 3497 + }, + { + "ce_ib": 1.797929048538208, + "ce_orig": 0.3370576500892639, + "epoch": 1.0054640880005752, + "kl_loss": 0.20129162073135376, + "loss_ib": 0.0038108453154563904, + "step": 3497 + }, + { + "ce_ib": 6.185752868652344, + "ce_orig": 1.3359416723251343, + "epoch": 1.0054640880005752, + "kl_loss": 0.1970268189907074, + "loss_ib": 0.008156020194292068, + "step": 3497 + }, + { + "ce_ib": 4.188435077667236, + "ce_orig": 0.9745916128158569, + "epoch": 1.0054640880005752, + "kl_loss": 0.11232029646635056, + "loss_ib": 0.005311638116836548, + "step": 3497 + }, + { + "ce_ib": 6.7191057205200195, + "ce_orig": 1.6279001235961914, + "epoch": 1.0057516715795527, + "kl_loss": 0.13282804191112518, + "loss_ib": 0.008047386072576046, + "step": 3498 + }, + { + "ce_ib": 6.921193599700928, + "ce_orig": 1.5802857875823975, + "epoch": 1.0057516715795527, + "kl_loss": 0.22907835245132446, + "loss_ib": 0.009211977012455463, + "step": 3498 + }, + { + "ce_ib": 4.890466213226318, + "ce_orig": 1.079044222831726, + "epoch": 1.0057516715795527, + "kl_loss": 0.21056044101715088, + "loss_ib": 0.006996070500463247, + "step": 3498 + }, + { + "ce_ib": 4.040192127227783, + "ce_orig": 0.8839548826217651, + "epoch": 1.0057516715795527, + "kl_loss": 0.2103506475687027, + "loss_ib": 0.0061436984688043594, + "step": 3498 + }, + { + "ce_ib": 3.4370336532592773, + "ce_orig": 0.6880020499229431, + "epoch": 1.0060392551585304, + "kl_loss": 0.21278202533721924, + "loss_ib": 0.005564854014664888, + "step": 3499 + }, + { + "ce_ib": 4.419111728668213, + "ce_orig": 0.9960678219795227, + "epoch": 1.0060392551585304, + "kl_loss": 0.3226282000541687, + "loss_ib": 0.007645393256098032, + "step": 3499 + }, + { + "ce_ib": 4.969743251800537, + "ce_orig": 1.0044398307800293, + "epoch": 1.0060392551585304, + "kl_loss": 0.16144758462905884, + "loss_ib": 0.006584218703210354, + "step": 3499 + }, + { + "ce_ib": 3.6671924591064453, + "ce_orig": 1.0641981363296509, + "epoch": 1.0060392551585304, + "kl_loss": 0.14118437469005585, + "loss_ib": 0.005079036112874746, + "step": 3499 + }, + { + "epoch": 1.0063268387375082, + "grad_norm": 0.13675080239772797, + "learning_rate": 7.755225092377498e-06, + "loss": 0.893, + "step": 3500 + }, + { + "ce_ib": 3.2606067657470703, + "ce_orig": 0.9602817296981812, + "epoch": 1.0063268387375082, + "kl_loss": 0.16103802621364594, + "loss_ib": 0.00487098703160882, + "step": 3500 + }, + { + "ce_ib": 2.8438737392425537, + "ce_orig": 0.6526296138763428, + "epoch": 1.0063268387375082, + "kl_loss": 0.24187159538269043, + "loss_ib": 0.005262589547783136, + "step": 3500 + }, + { + "ce_ib": 2.10711407661438, + "ce_orig": 0.6721705198287964, + "epoch": 1.0063268387375082, + "kl_loss": 0.1644839495420456, + "loss_ib": 0.0037519533652812243, + "step": 3500 + }, + { + "ce_ib": 3.9539151191711426, + "ce_orig": 1.3795851469039917, + "epoch": 1.0063268387375082, + "kl_loss": 0.15855813026428223, + "loss_ib": 0.0055394964292645454, + "step": 3500 + }, + { + "ce_ib": 3.5250940322875977, + "ce_orig": 0.9683559536933899, + "epoch": 1.0066144223164857, + "kl_loss": 0.21789084374904633, + "loss_ib": 0.0057040024548769, + "step": 3501 + }, + { + "ce_ib": 3.450563669204712, + "ce_orig": 0.9774522185325623, + "epoch": 1.0066144223164857, + "kl_loss": 0.13248085975646973, + "loss_ib": 0.0047753723338246346, + "step": 3501 + }, + { + "ce_ib": 3.31304669380188, + "ce_orig": 0.40190157294273376, + "epoch": 1.0066144223164857, + "kl_loss": 0.24493631720542908, + "loss_ib": 0.005762409884482622, + "step": 3501 + }, + { + "ce_ib": 3.5270066261291504, + "ce_orig": 0.8784852623939514, + "epoch": 1.0066144223164857, + "kl_loss": 0.3194485604763031, + "loss_ib": 0.006721492391079664, + "step": 3501 + }, + { + "ce_ib": 2.6976003646850586, + "ce_orig": 0.6194900274276733, + "epoch": 1.0069020058954634, + "kl_loss": 0.15731291472911835, + "loss_ib": 0.0042707291431725025, + "step": 3502 + }, + { + "ce_ib": 6.075970649719238, + "ce_orig": 1.3461014032363892, + "epoch": 1.0069020058954634, + "kl_loss": 0.2861129939556122, + "loss_ib": 0.008937099948525429, + "step": 3502 + }, + { + "ce_ib": 5.395439147949219, + "ce_orig": 1.4285333156585693, + "epoch": 1.0069020058954634, + "kl_loss": 0.2147427350282669, + "loss_ib": 0.0075428662821650505, + "step": 3502 + }, + { + "ce_ib": 0.902597963809967, + "ce_orig": 0.1777905970811844, + "epoch": 1.0069020058954634, + "kl_loss": 0.35368865728378296, + "loss_ib": 0.00443948432803154, + "step": 3502 + }, + { + "ce_ib": 2.4976694583892822, + "ce_orig": 0.4437655508518219, + "epoch": 1.007189589474441, + "kl_loss": 0.22968482971191406, + "loss_ib": 0.004794517531991005, + "step": 3503 + }, + { + "ce_ib": 2.7253479957580566, + "ce_orig": 0.6044225692749023, + "epoch": 1.007189589474441, + "kl_loss": 0.15097011625766754, + "loss_ib": 0.004235049244016409, + "step": 3503 + }, + { + "ce_ib": 4.710489273071289, + "ce_orig": 1.4210374355316162, + "epoch": 1.007189589474441, + "kl_loss": 0.11218060553073883, + "loss_ib": 0.005832295399159193, + "step": 3503 + }, + { + "ce_ib": 3.3403425216674805, + "ce_orig": 0.907450795173645, + "epoch": 1.007189589474441, + "kl_loss": 0.14022335410118103, + "loss_ib": 0.004742576275020838, + "step": 3503 + }, + { + "ce_ib": 6.920816898345947, + "ce_orig": 1.6906507015228271, + "epoch": 1.0074771730534187, + "kl_loss": 0.264279842376709, + "loss_ib": 0.009563615545630455, + "step": 3504 + }, + { + "ce_ib": 4.403830528259277, + "ce_orig": 1.0081549882888794, + "epoch": 1.0074771730534187, + "kl_loss": 0.17841750383377075, + "loss_ib": 0.006188005208969116, + "step": 3504 + }, + { + "ce_ib": 3.5233302116394043, + "ce_orig": 0.7639614343643188, + "epoch": 1.0074771730534187, + "kl_loss": 0.17686615884304047, + "loss_ib": 0.005291991867125034, + "step": 3504 + }, + { + "ce_ib": 5.075497627258301, + "ce_orig": 1.0148032903671265, + "epoch": 1.0074771730534187, + "kl_loss": 0.17463727295398712, + "loss_ib": 0.006821869872510433, + "step": 3504 + }, + { + "epoch": 1.0077647566323964, + "grad_norm": 0.13733930885791779, + "learning_rate": 7.748745535495041e-06, + "loss": 0.8679, + "step": 3505 + }, + { + "ce_ib": 3.1189887523651123, + "ce_orig": 0.6923277378082275, + "epoch": 1.0077647566323964, + "kl_loss": 0.18045887351036072, + "loss_ib": 0.004923577420413494, + "step": 3505 + }, + { + "ce_ib": 3.0639307498931885, + "ce_orig": 0.9827346801757812, + "epoch": 1.0077647566323964, + "kl_loss": 0.10278497636318207, + "loss_ib": 0.004091780632734299, + "step": 3505 + }, + { + "ce_ib": 3.8426785469055176, + "ce_orig": 0.6628304719924927, + "epoch": 1.0077647566323964, + "kl_loss": 0.1590297818183899, + "loss_ib": 0.0054329764097929, + "step": 3505 + }, + { + "ce_ib": 2.8546903133392334, + "ce_orig": 0.5351853370666504, + "epoch": 1.0077647566323964, + "kl_loss": 0.23725007474422455, + "loss_ib": 0.005227190908044577, + "step": 3505 + }, + { + "ce_ib": 4.408975124359131, + "ce_orig": 0.9195221066474915, + "epoch": 1.008052340211374, + "kl_loss": 0.2096213847398758, + "loss_ib": 0.006505188997834921, + "step": 3506 + }, + { + "ce_ib": 4.316622734069824, + "ce_orig": 0.719659686088562, + "epoch": 1.008052340211374, + "kl_loss": 0.17784371972084045, + "loss_ib": 0.006095060147345066, + "step": 3506 + }, + { + "ce_ib": 4.257396697998047, + "ce_orig": 0.9724505543708801, + "epoch": 1.008052340211374, + "kl_loss": 0.2940319776535034, + "loss_ib": 0.007197716273367405, + "step": 3506 + }, + { + "ce_ib": 2.5281920433044434, + "ce_orig": 0.8573859333992004, + "epoch": 1.008052340211374, + "kl_loss": 0.1477336585521698, + "loss_ib": 0.004005528520792723, + "step": 3506 + }, + { + "ce_ib": 3.099801540374756, + "ce_orig": 0.509239673614502, + "epoch": 1.0083399237903516, + "kl_loss": 0.21110589802265167, + "loss_ib": 0.005210860166698694, + "step": 3507 + }, + { + "ce_ib": 2.531338930130005, + "ce_orig": 0.6958469748497009, + "epoch": 1.0083399237903516, + "kl_loss": 0.12001753598451614, + "loss_ib": 0.0037315140943974257, + "step": 3507 + }, + { + "ce_ib": 3.946995973587036, + "ce_orig": 0.7779883146286011, + "epoch": 1.0083399237903516, + "kl_loss": 0.1262994110584259, + "loss_ib": 0.005209989845752716, + "step": 3507 + }, + { + "ce_ib": 5.085470676422119, + "ce_orig": 1.3374439477920532, + "epoch": 1.0083399237903516, + "kl_loss": 0.13277754187583923, + "loss_ib": 0.006413246039301157, + "step": 3507 + }, + { + "ce_ib": 2.6973705291748047, + "ce_orig": 0.4699265658855438, + "epoch": 1.0086275073693292, + "kl_loss": 0.17655624449253082, + "loss_ib": 0.004462932702153921, + "step": 3508 + }, + { + "ce_ib": 2.8475446701049805, + "ce_orig": 0.5352252125740051, + "epoch": 1.0086275073693292, + "kl_loss": 0.13340699672698975, + "loss_ib": 0.004181614611297846, + "step": 3508 + }, + { + "ce_ib": 4.106596946716309, + "ce_orig": 0.9759645462036133, + "epoch": 1.0086275073693292, + "kl_loss": 0.1678076535463333, + "loss_ib": 0.0057846736162900925, + "step": 3508 + }, + { + "ce_ib": 1.8248687982559204, + "ce_orig": 0.216191366314888, + "epoch": 1.0086275073693292, + "kl_loss": 0.3092295229434967, + "loss_ib": 0.004917163867503405, + "step": 3508 + }, + { + "ce_ib": 5.3347954750061035, + "ce_orig": 1.5408998727798462, + "epoch": 1.008915090948307, + "kl_loss": 0.2060328722000122, + "loss_ib": 0.007395124062895775, + "step": 3509 + }, + { + "ce_ib": 3.3471970558166504, + "ce_orig": 0.9165314435958862, + "epoch": 1.008915090948307, + "kl_loss": 0.10835281759500504, + "loss_ib": 0.004430725239217281, + "step": 3509 + }, + { + "ce_ib": 5.4001545906066895, + "ce_orig": 1.1301573514938354, + "epoch": 1.008915090948307, + "kl_loss": 0.10362276434898376, + "loss_ib": 0.006436382420361042, + "step": 3509 + }, + { + "ce_ib": 4.770638942718506, + "ce_orig": 1.0691231489181519, + "epoch": 1.008915090948307, + "kl_loss": 0.1860886812210083, + "loss_ib": 0.006631525699049234, + "step": 3509 + }, + { + "epoch": 1.0092026745272844, + "grad_norm": 0.14577004313468933, + "learning_rate": 7.742259356246594e-06, + "loss": 0.8867, + "step": 3510 + }, + { + "ce_ib": 6.450588226318359, + "ce_orig": 1.6324363946914673, + "epoch": 1.0092026745272844, + "kl_loss": 0.12667205929756165, + "loss_ib": 0.007717309053987265, + "step": 3510 + }, + { + "ce_ib": 2.7431273460388184, + "ce_orig": 0.7779247760772705, + "epoch": 1.0092026745272844, + "kl_loss": 0.2892572283744812, + "loss_ib": 0.005635699722915888, + "step": 3510 + }, + { + "ce_ib": 4.02156400680542, + "ce_orig": 1.258167028427124, + "epoch": 1.0092026745272844, + "kl_loss": 0.11930489540100098, + "loss_ib": 0.005214612931013107, + "step": 3510 + }, + { + "ce_ib": 3.9405128955841064, + "ce_orig": 0.9362601041793823, + "epoch": 1.0092026745272844, + "kl_loss": 0.2713027596473694, + "loss_ib": 0.006653539836406708, + "step": 3510 + }, + { + "ce_ib": 3.6639671325683594, + "ce_orig": 1.0259058475494385, + "epoch": 1.0094902581062621, + "kl_loss": 0.18503917753696442, + "loss_ib": 0.005514358635991812, + "step": 3511 + }, + { + "ce_ib": 1.360128402709961, + "ce_orig": 0.2568129897117615, + "epoch": 1.0094902581062621, + "kl_loss": 0.3701571226119995, + "loss_ib": 0.005061699543148279, + "step": 3511 + }, + { + "ce_ib": 2.4185492992401123, + "ce_orig": 0.9097275137901306, + "epoch": 1.0094902581062621, + "kl_loss": 0.1256832778453827, + "loss_ib": 0.0036753821186721325, + "step": 3511 + }, + { + "ce_ib": 1.9326263666152954, + "ce_orig": 0.43786007165908813, + "epoch": 1.0094902581062621, + "kl_loss": 0.1634925901889801, + "loss_ib": 0.0035675521939992905, + "step": 3511 + }, + { + "ce_ib": 3.7465932369232178, + "ce_orig": 0.9513622522354126, + "epoch": 1.0097778416852399, + "kl_loss": 0.18225625157356262, + "loss_ib": 0.005569155793637037, + "step": 3512 + }, + { + "ce_ib": 5.209081649780273, + "ce_orig": 1.1775507926940918, + "epoch": 1.0097778416852399, + "kl_loss": 0.13149599730968475, + "loss_ib": 0.006524041760712862, + "step": 3512 + }, + { + "ce_ib": 2.6380324363708496, + "ce_orig": 0.4574350416660309, + "epoch": 1.0097778416852399, + "kl_loss": 0.10204917937517166, + "loss_ib": 0.003658524015918374, + "step": 3512 + }, + { + "ce_ib": 1.9610402584075928, + "ce_orig": 0.510748565196991, + "epoch": 1.0097778416852399, + "kl_loss": 0.16259227693080902, + "loss_ib": 0.00358696305193007, + "step": 3512 + }, + { + "ce_ib": 4.076666831970215, + "ce_orig": 0.635549008846283, + "epoch": 1.0100654252642174, + "kl_loss": 0.16767798364162445, + "loss_ib": 0.005753446836024523, + "step": 3513 + }, + { + "ce_ib": 3.430692434310913, + "ce_orig": 0.47577905654907227, + "epoch": 1.0100654252642174, + "kl_loss": 0.2617259621620178, + "loss_ib": 0.006047951988875866, + "step": 3513 + }, + { + "ce_ib": 2.487163543701172, + "ce_orig": 0.6380851864814758, + "epoch": 1.0100654252642174, + "kl_loss": 0.1517232358455658, + "loss_ib": 0.00400439603254199, + "step": 3513 + }, + { + "ce_ib": 2.4834401607513428, + "ce_orig": 0.7907217144966125, + "epoch": 1.0100654252642174, + "kl_loss": 0.12919339537620544, + "loss_ib": 0.0037753742653876543, + "step": 3513 + }, + { + "ce_ib": 2.9610915184020996, + "ce_orig": 0.7341518402099609, + "epoch": 1.0103530088431951, + "kl_loss": 0.19114209711551666, + "loss_ib": 0.00487251253798604, + "step": 3514 + }, + { + "ce_ib": 3.3883376121520996, + "ce_orig": 0.8065544366836548, + "epoch": 1.0103530088431951, + "kl_loss": 0.16749057173728943, + "loss_ib": 0.005063243210315704, + "step": 3514 + }, + { + "ce_ib": 3.7521965503692627, + "ce_orig": 0.9800960421562195, + "epoch": 1.0103530088431951, + "kl_loss": 0.1928364336490631, + "loss_ib": 0.005680561065673828, + "step": 3514 + }, + { + "ce_ib": 3.0694713592529297, + "ce_orig": 0.6562567949295044, + "epoch": 1.0103530088431951, + "kl_loss": 0.18012946844100952, + "loss_ib": 0.004870765842497349, + "step": 3514 + }, + { + "epoch": 1.0106405924221726, + "grad_norm": 0.14879579842090607, + "learning_rate": 7.735766570258865e-06, + "loss": 0.856, + "step": 3515 + }, + { + "ce_ib": 5.947872638702393, + "ce_orig": 1.1484135389328003, + "epoch": 1.0106405924221726, + "kl_loss": 0.13732227683067322, + "loss_ib": 0.00732109509408474, + "step": 3515 + }, + { + "ce_ib": 5.621669769287109, + "ce_orig": 1.3442559242248535, + "epoch": 1.0106405924221726, + "kl_loss": 0.14098823070526123, + "loss_ib": 0.007031552027910948, + "step": 3515 + }, + { + "ce_ib": 1.6219024658203125, + "ce_orig": 0.5460817813873291, + "epoch": 1.0106405924221726, + "kl_loss": 0.09682242572307587, + "loss_ib": 0.0025901265908032656, + "step": 3515 + }, + { + "ce_ib": 4.756532192230225, + "ce_orig": 1.1896774768829346, + "epoch": 1.0106405924221726, + "kl_loss": 0.173491969704628, + "loss_ib": 0.006491451524198055, + "step": 3515 + }, + { + "ce_ib": 2.491400957107544, + "ce_orig": 0.49514472484588623, + "epoch": 1.0109281760011504, + "kl_loss": 0.21987569332122803, + "loss_ib": 0.00469015771523118, + "step": 3516 + }, + { + "ce_ib": 3.136188268661499, + "ce_orig": 0.8663493394851685, + "epoch": 1.0109281760011504, + "kl_loss": 0.3028523325920105, + "loss_ib": 0.006164711434394121, + "step": 3516 + }, + { + "ce_ib": 2.451526165008545, + "ce_orig": 0.5936236381530762, + "epoch": 1.0109281760011504, + "kl_loss": 0.13672533631324768, + "loss_ib": 0.0038187794853001833, + "step": 3516 + }, + { + "ce_ib": 2.307114601135254, + "ce_orig": 0.6439756155014038, + "epoch": 1.0109281760011504, + "kl_loss": 0.21279263496398926, + "loss_ib": 0.0044350409880280495, + "step": 3516 + }, + { + "ce_ib": 1.5381479263305664, + "ce_orig": 0.24641485512256622, + "epoch": 1.0112157595801279, + "kl_loss": 0.4006437063217163, + "loss_ib": 0.005544584710150957, + "step": 3517 + }, + { + "ce_ib": 4.9762959480285645, + "ce_orig": 1.2736730575561523, + "epoch": 1.0112157595801279, + "kl_loss": 0.20641055703163147, + "loss_ib": 0.007040401455014944, + "step": 3517 + }, + { + "ce_ib": 1.9881020784378052, + "ce_orig": 0.42657342553138733, + "epoch": 1.0112157595801279, + "kl_loss": 0.43473148345947266, + "loss_ib": 0.006335416808724403, + "step": 3517 + }, + { + "ce_ib": 6.297873020172119, + "ce_orig": 1.4365630149841309, + "epoch": 1.0112157595801279, + "kl_loss": 0.17399026453495026, + "loss_ib": 0.008037775754928589, + "step": 3517 + }, + { + "ce_ib": 1.8827205896377563, + "ce_orig": 0.2458503395318985, + "epoch": 1.0115033431591056, + "kl_loss": 0.21874362230300903, + "loss_ib": 0.004070156719535589, + "step": 3518 + }, + { + "ce_ib": 2.2154605388641357, + "ce_orig": 0.6444699168205261, + "epoch": 1.0115033431591056, + "kl_loss": 0.1972644031047821, + "loss_ib": 0.004188104532659054, + "step": 3518 + }, + { + "ce_ib": 4.022366523742676, + "ce_orig": 1.199116826057434, + "epoch": 1.0115033431591056, + "kl_loss": 0.16272883117198944, + "loss_ib": 0.005649654660373926, + "step": 3518 + }, + { + "ce_ib": 3.432330846786499, + "ce_orig": 0.8961070775985718, + "epoch": 1.0115033431591056, + "kl_loss": 0.1469658762216568, + "loss_ib": 0.004901989363133907, + "step": 3518 + }, + { + "ce_ib": 5.812510967254639, + "ce_orig": 1.5281873941421509, + "epoch": 1.0117909267380834, + "kl_loss": 0.17605173587799072, + "loss_ib": 0.007573028095066547, + "step": 3519 + }, + { + "ce_ib": 5.159303665161133, + "ce_orig": 1.1930338144302368, + "epoch": 1.0117909267380834, + "kl_loss": 0.18322330713272095, + "loss_ib": 0.006991536356508732, + "step": 3519 + }, + { + "ce_ib": 4.742610931396484, + "ce_orig": 0.9991231560707092, + "epoch": 1.0117909267380834, + "kl_loss": 0.17283663153648376, + "loss_ib": 0.006470976863056421, + "step": 3519 + }, + { + "ce_ib": 2.906618595123291, + "ce_orig": 0.6406941413879395, + "epoch": 1.0117909267380834, + "kl_loss": 0.21158117055892944, + "loss_ib": 0.0050224303267896175, + "step": 3519 + }, + { + "epoch": 1.0120785103170609, + "grad_norm": 0.12588435411453247, + "learning_rate": 7.729267193174483e-06, + "loss": 0.8872, + "step": 3520 + }, + { + "ce_ib": 6.060263156890869, + "ce_orig": 1.718260407447815, + "epoch": 1.0120785103170609, + "kl_loss": 0.2244100570678711, + "loss_ib": 0.008304364047944546, + "step": 3520 + }, + { + "ce_ib": 4.2215776443481445, + "ce_orig": 0.9852325916290283, + "epoch": 1.0120785103170609, + "kl_loss": 0.16953285038471222, + "loss_ib": 0.005916906055063009, + "step": 3520 + }, + { + "ce_ib": 2.5868160724639893, + "ce_orig": 0.6165667176246643, + "epoch": 1.0120785103170609, + "kl_loss": 0.11500076949596405, + "loss_ib": 0.003736823797225952, + "step": 3520 + }, + { + "ce_ib": 3.93110728263855, + "ce_orig": 0.7874623537063599, + "epoch": 1.0120785103170609, + "kl_loss": 0.18796521425247192, + "loss_ib": 0.005810759030282497, + "step": 3520 + }, + { + "ce_ib": 3.3858299255371094, + "ce_orig": 0.9741607308387756, + "epoch": 1.0123660938960386, + "kl_loss": 0.1454700082540512, + "loss_ib": 0.00484052998945117, + "step": 3521 + }, + { + "ce_ib": 4.222060203552246, + "ce_orig": 0.9501309990882874, + "epoch": 1.0123660938960386, + "kl_loss": 0.1330530196428299, + "loss_ib": 0.005552590359002352, + "step": 3521 + }, + { + "ce_ib": 5.293367862701416, + "ce_orig": 1.1376569271087646, + "epoch": 1.0123660938960386, + "kl_loss": 0.16229671239852905, + "loss_ib": 0.006916334852576256, + "step": 3521 + }, + { + "ce_ib": 6.2598676681518555, + "ce_orig": 1.405616283416748, + "epoch": 1.0123660938960386, + "kl_loss": 0.252564400434494, + "loss_ib": 0.008785511367022991, + "step": 3521 + }, + { + "ce_ib": 1.4115909337997437, + "ce_orig": 0.4514542520046234, + "epoch": 1.0126536774750161, + "kl_loss": 0.09879161417484283, + "loss_ib": 0.002399507211521268, + "step": 3522 + }, + { + "ce_ib": 2.4839069843292236, + "ce_orig": 0.679517924785614, + "epoch": 1.0126536774750161, + "kl_loss": 0.13107316195964813, + "loss_ib": 0.003794638440012932, + "step": 3522 + }, + { + "ce_ib": 3.74723744392395, + "ce_orig": 0.37974220514297485, + "epoch": 1.0126536774750161, + "kl_loss": 0.23723189532756805, + "loss_ib": 0.00611955625936389, + "step": 3522 + }, + { + "ce_ib": 5.980181694030762, + "ce_orig": 1.6391539573669434, + "epoch": 1.0126536774750161, + "kl_loss": 0.20952509343624115, + "loss_ib": 0.008075432851910591, + "step": 3522 + }, + { + "ce_ib": 5.097780704498291, + "ce_orig": 0.931393027305603, + "epoch": 1.0129412610539938, + "kl_loss": 0.18320167064666748, + "loss_ib": 0.00692979758605361, + "step": 3523 + }, + { + "ce_ib": 3.6124515533447266, + "ce_orig": 0.950604259967804, + "epoch": 1.0129412610539938, + "kl_loss": 0.23692303895950317, + "loss_ib": 0.005981681868433952, + "step": 3523 + }, + { + "ce_ib": 2.9529237747192383, + "ce_orig": 0.7751789689064026, + "epoch": 1.0129412610539938, + "kl_loss": 0.22281616926193237, + "loss_ib": 0.00518108531832695, + "step": 3523 + }, + { + "ce_ib": 2.544431447982788, + "ce_orig": 0.8691092133522034, + "epoch": 1.0129412610539938, + "kl_loss": 0.10715223848819733, + "loss_ib": 0.003615953726693988, + "step": 3523 + }, + { + "ce_ib": 3.6189444065093994, + "ce_orig": 0.7075605392456055, + "epoch": 1.0132288446329714, + "kl_loss": 0.19129490852355957, + "loss_ib": 0.005531894043087959, + "step": 3524 + }, + { + "ce_ib": 1.5238289833068848, + "ce_orig": 0.2779885530471802, + "epoch": 1.0132288446329714, + "kl_loss": 0.2877485454082489, + "loss_ib": 0.0044013140723109245, + "step": 3524 + }, + { + "ce_ib": 3.0392282009124756, + "ce_orig": 0.5587323904037476, + "epoch": 1.0132288446329714, + "kl_loss": 0.20263931155204773, + "loss_ib": 0.005065620876848698, + "step": 3524 + }, + { + "ce_ib": 4.815839767456055, + "ce_orig": 1.1242668628692627, + "epoch": 1.0132288446329714, + "kl_loss": 0.13810880482196808, + "loss_ib": 0.00619692774489522, + "step": 3524 + }, + { + "epoch": 1.013516428211949, + "grad_norm": 0.13313309848308563, + "learning_rate": 7.722761240651957e-06, + "loss": 0.8727, + "step": 3525 + }, + { + "ce_ib": 4.569977283477783, + "ce_orig": 0.7489607334136963, + "epoch": 1.013516428211949, + "kl_loss": 0.1878330111503601, + "loss_ib": 0.006448307074606419, + "step": 3525 + }, + { + "ce_ib": 3.9684154987335205, + "ce_orig": 1.0106829404830933, + "epoch": 1.013516428211949, + "kl_loss": 0.20595583319664001, + "loss_ib": 0.006027973722666502, + "step": 3525 + }, + { + "ce_ib": 5.254972457885742, + "ce_orig": 1.1910887956619263, + "epoch": 1.013516428211949, + "kl_loss": 0.14705660939216614, + "loss_ib": 0.0067255389876663685, + "step": 3525 + }, + { + "ce_ib": 4.797619819641113, + "ce_orig": 1.0773025751113892, + "epoch": 1.013516428211949, + "kl_loss": 0.21702349185943604, + "loss_ib": 0.006967854220420122, + "step": 3525 + }, + { + "ce_ib": 6.541355133056641, + "ce_orig": 1.843928337097168, + "epoch": 1.0138040117909268, + "kl_loss": 0.2296145111322403, + "loss_ib": 0.008837499655783176, + "step": 3526 + }, + { + "ce_ib": 2.73077392578125, + "ce_orig": 0.6201620697975159, + "epoch": 1.0138040117909268, + "kl_loss": 0.11215860396623611, + "loss_ib": 0.00385235995054245, + "step": 3526 + }, + { + "ce_ib": 2.729257106781006, + "ce_orig": 0.47776225209236145, + "epoch": 1.0138040117909268, + "kl_loss": 0.2158774733543396, + "loss_ib": 0.004888031631708145, + "step": 3526 + }, + { + "ce_ib": 2.914834976196289, + "ce_orig": 0.6870064735412598, + "epoch": 1.0138040117909268, + "kl_loss": 0.2258988916873932, + "loss_ib": 0.005173823796212673, + "step": 3526 + }, + { + "ce_ib": 2.3610668182373047, + "ce_orig": 0.9242029786109924, + "epoch": 1.0140915953699043, + "kl_loss": 0.10489942133426666, + "loss_ib": 0.00341006089001894, + "step": 3527 + }, + { + "ce_ib": 3.591273784637451, + "ce_orig": 0.5749997496604919, + "epoch": 1.0140915953699043, + "kl_loss": 0.17017552256584167, + "loss_ib": 0.005293028894811869, + "step": 3527 + }, + { + "ce_ib": 4.501062870025635, + "ce_orig": 0.9971438646316528, + "epoch": 1.0140915953699043, + "kl_loss": 0.13553690910339355, + "loss_ib": 0.005856431555002928, + "step": 3527 + }, + { + "ce_ib": 3.3083205223083496, + "ce_orig": 0.8870781064033508, + "epoch": 1.0140915953699043, + "kl_loss": 0.18900823593139648, + "loss_ib": 0.005198402795940638, + "step": 3527 + }, + { + "ce_ib": 4.879922389984131, + "ce_orig": 1.234713077545166, + "epoch": 1.014379178948882, + "kl_loss": 0.12175256013870239, + "loss_ib": 0.006097447592765093, + "step": 3528 + }, + { + "ce_ib": 7.169898509979248, + "ce_orig": 1.9038039445877075, + "epoch": 1.014379178948882, + "kl_loss": 0.14886510372161865, + "loss_ib": 0.00865854974836111, + "step": 3528 + }, + { + "ce_ib": 5.314695358276367, + "ce_orig": 1.5288681983947754, + "epoch": 1.014379178948882, + "kl_loss": 0.14669150114059448, + "loss_ib": 0.006781610194593668, + "step": 3528 + }, + { + "ce_ib": 3.367692708969116, + "ce_orig": 0.7442722320556641, + "epoch": 1.014379178948882, + "kl_loss": 0.15190039575099945, + "loss_ib": 0.004886697046458721, + "step": 3528 + }, + { + "ce_ib": 3.518707275390625, + "ce_orig": 0.7653383612632751, + "epoch": 1.0146667625278596, + "kl_loss": 0.180987149477005, + "loss_ib": 0.005328578874468803, + "step": 3529 + }, + { + "ce_ib": 6.3469672203063965, + "ce_orig": 1.0414197444915771, + "epoch": 1.0146667625278596, + "kl_loss": 0.08589836955070496, + "loss_ib": 0.007205951027572155, + "step": 3529 + }, + { + "ce_ib": 3.247135877609253, + "ce_orig": 0.6639663577079773, + "epoch": 1.0146667625278596, + "kl_loss": 0.2452826201915741, + "loss_ib": 0.005699962377548218, + "step": 3529 + }, + { + "ce_ib": 3.1192591190338135, + "ce_orig": 0.7520073652267456, + "epoch": 1.0146667625278596, + "kl_loss": 0.20333580672740936, + "loss_ib": 0.0051526171155273914, + "step": 3529 + }, + { + "epoch": 1.0149543461068373, + "grad_norm": 0.17410211265087128, + "learning_rate": 7.716248728365636e-06, + "loss": 0.923, + "step": 3530 + }, + { + "ce_ib": 3.8937301635742188, + "ce_orig": 1.146498680114746, + "epoch": 1.0149543461068373, + "kl_loss": 0.10646432638168335, + "loss_ib": 0.004958373494446278, + "step": 3530 + }, + { + "ce_ib": 2.496673345565796, + "ce_orig": 0.8138883113861084, + "epoch": 1.0149543461068373, + "kl_loss": 0.09328639507293701, + "loss_ib": 0.00342953740619123, + "step": 3530 + }, + { + "ce_ib": 3.6721816062927246, + "ce_orig": 0.6872288584709167, + "epoch": 1.0149543461068373, + "kl_loss": 0.09813699126243591, + "loss_ib": 0.0046535516157746315, + "step": 3530 + }, + { + "ce_ib": 4.178457260131836, + "ce_orig": 0.6021036505699158, + "epoch": 1.0149543461068373, + "kl_loss": 0.19010615348815918, + "loss_ib": 0.00607951870188117, + "step": 3530 + }, + { + "ce_ib": 2.3693935871124268, + "ce_orig": 0.6228126287460327, + "epoch": 1.0152419296858148, + "kl_loss": 0.12221600115299225, + "loss_ib": 0.0035915535409003496, + "step": 3531 + }, + { + "ce_ib": 3.8788273334503174, + "ce_orig": 0.9170746207237244, + "epoch": 1.0152419296858148, + "kl_loss": 0.24714714288711548, + "loss_ib": 0.006350298877805471, + "step": 3531 + }, + { + "ce_ib": 5.691501617431641, + "ce_orig": 1.5551871061325073, + "epoch": 1.0152419296858148, + "kl_loss": 0.14774751663208008, + "loss_ib": 0.007168976590037346, + "step": 3531 + }, + { + "ce_ib": 2.3313064575195312, + "ce_orig": 0.673969566822052, + "epoch": 1.0152419296858148, + "kl_loss": 0.08396998792886734, + "loss_ib": 0.0031710064504295588, + "step": 3531 + }, + { + "ce_ib": 5.145344257354736, + "ce_orig": 1.6632359027862549, + "epoch": 1.0155295132647926, + "kl_loss": 0.15443505346775055, + "loss_ib": 0.006689694244414568, + "step": 3532 + }, + { + "ce_ib": 5.157540798187256, + "ce_orig": 0.6077222228050232, + "epoch": 1.0155295132647926, + "kl_loss": 0.21021151542663574, + "loss_ib": 0.007259656209498644, + "step": 3532 + }, + { + "ce_ib": 4.444430351257324, + "ce_orig": 1.3533413410186768, + "epoch": 1.0155295132647926, + "kl_loss": 0.1445184350013733, + "loss_ib": 0.005889614578336477, + "step": 3532 + }, + { + "ce_ib": 4.711503028869629, + "ce_orig": 1.295377492904663, + "epoch": 1.0155295132647926, + "kl_loss": 0.12699389457702637, + "loss_ib": 0.0059814415872097015, + "step": 3532 + }, + { + "ce_ib": 2.830479145050049, + "ce_orig": 0.702804446220398, + "epoch": 1.0158170968437703, + "kl_loss": 0.15993191301822662, + "loss_ib": 0.004429798107594252, + "step": 3533 + }, + { + "ce_ib": 2.4561097621917725, + "ce_orig": 0.7221253514289856, + "epoch": 1.0158170968437703, + "kl_loss": 0.1334155648946762, + "loss_ib": 0.003790265182033181, + "step": 3533 + }, + { + "ce_ib": 5.1258063316345215, + "ce_orig": 1.538278341293335, + "epoch": 1.0158170968437703, + "kl_loss": 0.22911491990089417, + "loss_ib": 0.007416955195367336, + "step": 3533 + }, + { + "ce_ib": 6.195032119750977, + "ce_orig": 1.4977798461914062, + "epoch": 1.0158170968437703, + "kl_loss": 0.14455285668373108, + "loss_ib": 0.007640560623258352, + "step": 3533 + }, + { + "ce_ib": 2.72860050201416, + "ce_orig": 0.7894951701164246, + "epoch": 1.0161046804227478, + "kl_loss": 0.1137482225894928, + "loss_ib": 0.0038660827558487654, + "step": 3534 + }, + { + "ce_ib": 1.965630292892456, + "ce_orig": 0.4568782448768616, + "epoch": 1.0161046804227478, + "kl_loss": 0.0995325893163681, + "loss_ib": 0.0029609559569507837, + "step": 3534 + }, + { + "ce_ib": 4.448879718780518, + "ce_orig": 1.015597939491272, + "epoch": 1.0161046804227478, + "kl_loss": 0.17691713571548462, + "loss_ib": 0.006218050606548786, + "step": 3534 + }, + { + "ce_ib": 4.879349231719971, + "ce_orig": 1.3173856735229492, + "epoch": 1.0161046804227478, + "kl_loss": 0.22101841866970062, + "loss_ib": 0.007089532911777496, + "step": 3534 + }, + { + "epoch": 1.0163922640017256, + "grad_norm": 0.1277073174715042, + "learning_rate": 7.709729672005672e-06, + "loss": 0.9194, + "step": 3535 + }, + { + "ce_ib": 2.145296812057495, + "ce_orig": 0.668163537979126, + "epoch": 1.0163922640017256, + "kl_loss": 0.16171512007713318, + "loss_ib": 0.003762447740882635, + "step": 3535 + }, + { + "ce_ib": 5.806839466094971, + "ce_orig": 1.320348858833313, + "epoch": 1.0163922640017256, + "kl_loss": 0.1451244205236435, + "loss_ib": 0.007258083671331406, + "step": 3535 + }, + { + "ce_ib": 1.844547152519226, + "ce_orig": 0.4010717570781708, + "epoch": 1.0163922640017256, + "kl_loss": 0.11101850867271423, + "loss_ib": 0.002954732161015272, + "step": 3535 + }, + { + "ce_ib": 2.9794907569885254, + "ce_orig": 0.6108062267303467, + "epoch": 1.0163922640017256, + "kl_loss": 0.14866960048675537, + "loss_ib": 0.004466186743229628, + "step": 3535 + }, + { + "ce_ib": 6.111087799072266, + "ce_orig": 0.9618916511535645, + "epoch": 1.016679847580703, + "kl_loss": 0.15441754460334778, + "loss_ib": 0.0076552629470825195, + "step": 3536 + }, + { + "ce_ib": 3.430002450942993, + "ce_orig": 1.0311148166656494, + "epoch": 1.016679847580703, + "kl_loss": 0.11520615965127945, + "loss_ib": 0.004582064226269722, + "step": 3536 + }, + { + "ce_ib": 3.660280704498291, + "ce_orig": 0.7318329811096191, + "epoch": 1.016679847580703, + "kl_loss": 0.19165396690368652, + "loss_ib": 0.005576820112764835, + "step": 3536 + }, + { + "ce_ib": 5.161524772644043, + "ce_orig": 0.8863758444786072, + "epoch": 1.016679847580703, + "kl_loss": 0.18218667805194855, + "loss_ib": 0.006983391474932432, + "step": 3536 + }, + { + "ce_ib": 4.7224578857421875, + "ce_orig": 1.1392300128936768, + "epoch": 1.0169674311596808, + "kl_loss": 0.17736133933067322, + "loss_ib": 0.006496070884168148, + "step": 3537 + }, + { + "ce_ib": 2.4367589950561523, + "ce_orig": 0.6989193558692932, + "epoch": 1.0169674311596808, + "kl_loss": 0.08991364389657974, + "loss_ib": 0.0033358954824507236, + "step": 3537 + }, + { + "ce_ib": 4.146738052368164, + "ce_orig": 0.935728907585144, + "epoch": 1.0169674311596808, + "kl_loss": 0.14807099103927612, + "loss_ib": 0.005627447739243507, + "step": 3537 + }, + { + "ce_ib": 2.0301053524017334, + "ce_orig": 0.501010000705719, + "epoch": 1.0169674311596808, + "kl_loss": 0.11460119485855103, + "loss_ib": 0.0031761175487190485, + "step": 3537 + }, + { + "ce_ib": 2.775479316711426, + "ce_orig": 0.7772588133811951, + "epoch": 1.0172550147386583, + "kl_loss": 0.170095294713974, + "loss_ib": 0.0044764322228729725, + "step": 3538 + }, + { + "ce_ib": 2.1525399684906006, + "ce_orig": 0.6783819198608398, + "epoch": 1.0172550147386583, + "kl_loss": 0.10453394055366516, + "loss_ib": 0.0031978790648281574, + "step": 3538 + }, + { + "ce_ib": 4.649052143096924, + "ce_orig": 1.0621262788772583, + "epoch": 1.0172550147386583, + "kl_loss": 0.20156130194664001, + "loss_ib": 0.006664665415883064, + "step": 3538 + }, + { + "ce_ib": 2.994239568710327, + "ce_orig": 0.6760426759719849, + "epoch": 1.0172550147386583, + "kl_loss": 0.1769196093082428, + "loss_ib": 0.004763435572385788, + "step": 3538 + }, + { + "ce_ib": 2.529639720916748, + "ce_orig": 0.7951051592826843, + "epoch": 1.017542598317636, + "kl_loss": 0.18949270248413086, + "loss_ib": 0.0044245668686926365, + "step": 3539 + }, + { + "ce_ib": 3.644326686859131, + "ce_orig": 0.7339462041854858, + "epoch": 1.017542598317636, + "kl_loss": 0.19096137583255768, + "loss_ib": 0.005553940311074257, + "step": 3539 + }, + { + "ce_ib": 3.305342674255371, + "ce_orig": 0.7808636426925659, + "epoch": 1.017542598317636, + "kl_loss": 0.19482143223285675, + "loss_ib": 0.005253556650131941, + "step": 3539 + }, + { + "ce_ib": 4.8005690574646, + "ce_orig": 1.1487505435943604, + "epoch": 1.017542598317636, + "kl_loss": 0.23721060156822205, + "loss_ib": 0.007172674871981144, + "step": 3539 + }, + { + "epoch": 1.0178301818966138, + "grad_norm": 0.12173061817884445, + "learning_rate": 7.703204087277989e-06, + "loss": 0.8955, + "step": 3540 + }, + { + "ce_ib": 3.664698362350464, + "ce_orig": 1.0121411085128784, + "epoch": 1.0178301818966138, + "kl_loss": 0.1563931703567505, + "loss_ib": 0.005228630267083645, + "step": 3540 + }, + { + "ce_ib": 3.5236105918884277, + "ce_orig": 0.8859958648681641, + "epoch": 1.0178301818966138, + "kl_loss": 0.3023838698863983, + "loss_ib": 0.00654744915664196, + "step": 3540 + }, + { + "ce_ib": 6.025217533111572, + "ce_orig": 1.340805172920227, + "epoch": 1.0178301818966138, + "kl_loss": 0.17567169666290283, + "loss_ib": 0.007781934458762407, + "step": 3540 + }, + { + "ce_ib": 3.7626280784606934, + "ce_orig": 0.8844795227050781, + "epoch": 1.0178301818966138, + "kl_loss": 0.1515142023563385, + "loss_ib": 0.005277770105749369, + "step": 3540 + }, + { + "ce_ib": 4.275973796844482, + "ce_orig": 1.0083049535751343, + "epoch": 1.0181177654755913, + "kl_loss": 0.16967149078845978, + "loss_ib": 0.005972688551992178, + "step": 3541 + }, + { + "ce_ib": 3.6189067363739014, + "ce_orig": 1.061126708984375, + "epoch": 1.0181177654755913, + "kl_loss": 0.1454392522573471, + "loss_ib": 0.005073299165815115, + "step": 3541 + }, + { + "ce_ib": 1.4571466445922852, + "ce_orig": 0.3093093931674957, + "epoch": 1.0181177654755913, + "kl_loss": 0.15888431668281555, + "loss_ib": 0.0030459898989647627, + "step": 3541 + }, + { + "ce_ib": 4.788170337677002, + "ce_orig": 1.1042059659957886, + "epoch": 1.0181177654755913, + "kl_loss": 0.14498469233512878, + "loss_ib": 0.00623801676556468, + "step": 3541 + }, + { + "ce_ib": 4.860157012939453, + "ce_orig": 1.4293642044067383, + "epoch": 1.018405349054569, + "kl_loss": 0.14283734560012817, + "loss_ib": 0.006288530770689249, + "step": 3542 + }, + { + "ce_ib": 3.29172945022583, + "ce_orig": 0.8577694296836853, + "epoch": 1.018405349054569, + "kl_loss": 0.12591665983200073, + "loss_ib": 0.004550896119326353, + "step": 3542 + }, + { + "ce_ib": 3.541612386703491, + "ce_orig": 0.8278687000274658, + "epoch": 1.018405349054569, + "kl_loss": 0.19538268446922302, + "loss_ib": 0.005495438817888498, + "step": 3542 + }, + { + "ce_ib": 4.240954399108887, + "ce_orig": 0.8372217416763306, + "epoch": 1.018405349054569, + "kl_loss": 0.17957067489624023, + "loss_ib": 0.006036661099642515, + "step": 3542 + }, + { + "ce_ib": 2.0702295303344727, + "ce_orig": 0.6433606743812561, + "epoch": 1.0186929326335465, + "kl_loss": 0.10073749721050262, + "loss_ib": 0.003077604342252016, + "step": 3543 + }, + { + "ce_ib": 2.7033066749572754, + "ce_orig": 0.6953257918357849, + "epoch": 1.0186929326335465, + "kl_loss": 0.23304928839206696, + "loss_ib": 0.005033799447119236, + "step": 3543 + }, + { + "ce_ib": 3.07843017578125, + "ce_orig": 0.7323834896087646, + "epoch": 1.0186929326335465, + "kl_loss": 0.1734258532524109, + "loss_ib": 0.0048126885667443275, + "step": 3543 + }, + { + "ce_ib": 2.643242359161377, + "ce_orig": 0.49459972977638245, + "epoch": 1.0186929326335465, + "kl_loss": 0.13474009931087494, + "loss_ib": 0.003990643657743931, + "step": 3543 + }, + { + "ce_ib": 3.0856821537017822, + "ce_orig": 0.7506402730941772, + "epoch": 1.0189805162125243, + "kl_loss": 0.164760023355484, + "loss_ib": 0.004733282141387463, + "step": 3544 + }, + { + "ce_ib": 4.663334846496582, + "ce_orig": 0.891984224319458, + "epoch": 1.0189805162125243, + "kl_loss": 0.12552645802497864, + "loss_ib": 0.00591859919950366, + "step": 3544 + }, + { + "ce_ib": 6.453037738800049, + "ce_orig": 1.6341174840927124, + "epoch": 1.0189805162125243, + "kl_loss": 0.19340744614601135, + "loss_ib": 0.008387112058699131, + "step": 3544 + }, + { + "ce_ib": 5.709354877471924, + "ce_orig": 1.1662095785140991, + "epoch": 1.0189805162125243, + "kl_loss": 0.20658820867538452, + "loss_ib": 0.00777523685246706, + "step": 3544 + }, + { + "epoch": 1.019268099791502, + "grad_norm": 0.14564312994480133, + "learning_rate": 7.69667198990423e-06, + "loss": 0.8861, + "step": 3545 + }, + { + "ce_ib": 3.7729225158691406, + "ce_orig": 0.9540063738822937, + "epoch": 1.019268099791502, + "kl_loss": 0.16325557231903076, + "loss_ib": 0.0054054781794548035, + "step": 3545 + }, + { + "ce_ib": 3.716003894805908, + "ce_orig": 0.9380910396575928, + "epoch": 1.019268099791502, + "kl_loss": 0.17094728350639343, + "loss_ib": 0.005425476934760809, + "step": 3545 + }, + { + "ce_ib": 3.7146878242492676, + "ce_orig": 1.1108921766281128, + "epoch": 1.019268099791502, + "kl_loss": 0.23762723803520203, + "loss_ib": 0.006090960465371609, + "step": 3545 + }, + { + "ce_ib": 1.9198137521743774, + "ce_orig": 0.5548996925354004, + "epoch": 1.019268099791502, + "kl_loss": 0.1541789174079895, + "loss_ib": 0.0034616028424352407, + "step": 3545 + }, + { + "ce_ib": 5.640294075012207, + "ce_orig": 1.317185878753662, + "epoch": 1.0195556833704795, + "kl_loss": 0.1939748376607895, + "loss_ib": 0.007580042351037264, + "step": 3546 + }, + { + "ce_ib": 6.172346115112305, + "ce_orig": 1.6367155313491821, + "epoch": 1.0195556833704795, + "kl_loss": 0.17050617933273315, + "loss_ib": 0.007877408526837826, + "step": 3546 + }, + { + "ce_ib": 5.251382827758789, + "ce_orig": 1.3680143356323242, + "epoch": 1.0195556833704795, + "kl_loss": 0.12590286135673523, + "loss_ib": 0.006510411389172077, + "step": 3546 + }, + { + "ce_ib": 2.6963634490966797, + "ce_orig": 0.7785568833351135, + "epoch": 1.0195556833704795, + "kl_loss": 0.18417319655418396, + "loss_ib": 0.004538095556199551, + "step": 3546 + }, + { + "ce_ib": 3.577726364135742, + "ce_orig": 0.3871666491031647, + "epoch": 1.0198432669494573, + "kl_loss": 0.144084632396698, + "loss_ib": 0.00501857278868556, + "step": 3547 + }, + { + "ce_ib": 2.9090816974639893, + "ce_orig": 0.6210928559303284, + "epoch": 1.0198432669494573, + "kl_loss": 0.22473274171352386, + "loss_ib": 0.005156408995389938, + "step": 3547 + }, + { + "ce_ib": 2.712545156478882, + "ce_orig": 0.8476703763008118, + "epoch": 1.0198432669494573, + "kl_loss": 0.175447016954422, + "loss_ib": 0.004467015154659748, + "step": 3547 + }, + { + "ce_ib": 2.758852005004883, + "ce_orig": 0.6455957293510437, + "epoch": 1.0198432669494573, + "kl_loss": 0.13981172442436218, + "loss_ib": 0.004156969022005796, + "step": 3547 + }, + { + "ce_ib": 4.834006309509277, + "ce_orig": 1.2932534217834473, + "epoch": 1.0201308505284348, + "kl_loss": 0.2291489839553833, + "loss_ib": 0.007125495932996273, + "step": 3548 + }, + { + "ce_ib": 3.37974214553833, + "ce_orig": 0.9940027594566345, + "epoch": 1.0201308505284348, + "kl_loss": 0.1429687887430191, + "loss_ib": 0.004809430334717035, + "step": 3548 + }, + { + "ce_ib": 4.725401401519775, + "ce_orig": 0.9322115182876587, + "epoch": 1.0201308505284348, + "kl_loss": 0.16857844591140747, + "loss_ib": 0.00641118548810482, + "step": 3548 + }, + { + "ce_ib": 3.6983659267425537, + "ce_orig": 0.8000825643539429, + "epoch": 1.0201308505284348, + "kl_loss": 0.24456726014614105, + "loss_ib": 0.006144038401544094, + "step": 3548 + }, + { + "ce_ib": 2.744896650314331, + "ce_orig": 0.6006035208702087, + "epoch": 1.0204184341074125, + "kl_loss": 0.19854231178760529, + "loss_ib": 0.004730320069938898, + "step": 3549 + }, + { + "ce_ib": 4.645377159118652, + "ce_orig": 1.1533117294311523, + "epoch": 1.0204184341074125, + "kl_loss": 0.19442087411880493, + "loss_ib": 0.0065895854495465755, + "step": 3549 + }, + { + "ce_ib": 2.829228162765503, + "ce_orig": 0.6289601922035217, + "epoch": 1.0204184341074125, + "kl_loss": 0.22333413362503052, + "loss_ib": 0.00506256939843297, + "step": 3549 + }, + { + "ce_ib": 4.23634147644043, + "ce_orig": 1.1944724321365356, + "epoch": 1.0204184341074125, + "kl_loss": 0.15062648057937622, + "loss_ib": 0.005742605775594711, + "step": 3549 + }, + { + "epoch": 1.02070601768639, + "grad_norm": 0.13472387194633484, + "learning_rate": 7.690133395621739e-06, + "loss": 0.8428, + "step": 3550 + }, + { + "ce_ib": 2.344845771789551, + "ce_orig": 0.7767351269721985, + "epoch": 1.02070601768639, + "kl_loss": 0.13195425271987915, + "loss_ib": 0.003664388321340084, + "step": 3550 + }, + { + "ce_ib": 2.179033041000366, + "ce_orig": 0.5169665813446045, + "epoch": 1.02070601768639, + "kl_loss": 0.16766181588172913, + "loss_ib": 0.0038556510116904974, + "step": 3550 + }, + { + "ce_ib": 2.5245563983917236, + "ce_orig": 0.7972135543823242, + "epoch": 1.02070601768639, + "kl_loss": 0.11200879514217377, + "loss_ib": 0.0036446445155888796, + "step": 3550 + }, + { + "ce_ib": 2.582815170288086, + "ce_orig": 0.6712902188301086, + "epoch": 1.02070601768639, + "kl_loss": 0.3056652545928955, + "loss_ib": 0.005639467854052782, + "step": 3550 + }, + { + "ce_ib": 5.727010250091553, + "ce_orig": 1.3970401287078857, + "epoch": 1.0209936012653678, + "kl_loss": 0.17081768810749054, + "loss_ib": 0.007435186766088009, + "step": 3551 + }, + { + "ce_ib": 2.302201271057129, + "ce_orig": 0.695151686668396, + "epoch": 1.0209936012653678, + "kl_loss": 0.1831519901752472, + "loss_ib": 0.0041337208822369576, + "step": 3551 + }, + { + "ce_ib": 5.98333215713501, + "ce_orig": 1.4875144958496094, + "epoch": 1.0209936012653678, + "kl_loss": 0.16588973999023438, + "loss_ib": 0.007642229553312063, + "step": 3551 + }, + { + "ce_ib": 4.37196159362793, + "ce_orig": 0.6612356901168823, + "epoch": 1.0209936012653678, + "kl_loss": 0.18764495849609375, + "loss_ib": 0.006248410791158676, + "step": 3551 + }, + { + "ce_ib": 5.0755696296691895, + "ce_orig": 1.284131646156311, + "epoch": 1.0212811848443455, + "kl_loss": 0.13844914734363556, + "loss_ib": 0.006460060831159353, + "step": 3552 + }, + { + "ce_ib": 1.7481211423873901, + "ce_orig": 0.3846924901008606, + "epoch": 1.0212811848443455, + "kl_loss": 0.12386985868215561, + "loss_ib": 0.00298681971617043, + "step": 3552 + }, + { + "ce_ib": 7.271521091461182, + "ce_orig": 0.9571646451950073, + "epoch": 1.0212811848443455, + "kl_loss": 0.22519594430923462, + "loss_ib": 0.009523480199277401, + "step": 3552 + }, + { + "ce_ib": 3.015791893005371, + "ce_orig": 0.7216377258300781, + "epoch": 1.0212811848443455, + "kl_loss": 0.19619616866111755, + "loss_ib": 0.004977753851562738, + "step": 3552 + }, + { + "ce_ib": 2.766540765762329, + "ce_orig": 0.8382845520973206, + "epoch": 1.021568768423323, + "kl_loss": 0.15667042136192322, + "loss_ib": 0.004333245102316141, + "step": 3553 + }, + { + "ce_ib": 5.680378437042236, + "ce_orig": 1.2193944454193115, + "epoch": 1.021568768423323, + "kl_loss": 0.16425058245658875, + "loss_ib": 0.007322884630411863, + "step": 3553 + }, + { + "ce_ib": 2.4299633502960205, + "ce_orig": 0.7257227301597595, + "epoch": 1.021568768423323, + "kl_loss": 0.26780951023101807, + "loss_ib": 0.005108058452606201, + "step": 3553 + }, + { + "ce_ib": 5.300495624542236, + "ce_orig": 1.0261390209197998, + "epoch": 1.021568768423323, + "kl_loss": 0.23362763226032257, + "loss_ib": 0.007636771537363529, + "step": 3553 + }, + { + "ce_ib": 3.705991506576538, + "ce_orig": 0.88468337059021, + "epoch": 1.0218563520023007, + "kl_loss": 0.1868116706609726, + "loss_ib": 0.005574108101427555, + "step": 3554 + }, + { + "ce_ib": 4.7819013595581055, + "ce_orig": 1.0766117572784424, + "epoch": 1.0218563520023007, + "kl_loss": 0.13164392113685608, + "loss_ib": 0.006098340731114149, + "step": 3554 + }, + { + "ce_ib": 3.302468776702881, + "ce_orig": 0.9236818552017212, + "epoch": 1.0218563520023007, + "kl_loss": 0.21548563241958618, + "loss_ib": 0.005457324907183647, + "step": 3554 + }, + { + "ce_ib": 4.144946575164795, + "ce_orig": 0.7967799305915833, + "epoch": 1.0218563520023007, + "kl_loss": 0.2355901449918747, + "loss_ib": 0.006500847637653351, + "step": 3554 + }, + { + "epoch": 1.0221439355812783, + "grad_norm": 0.13401827216148376, + "learning_rate": 7.683588320183503e-06, + "loss": 0.8874, + "step": 3555 + }, + { + "ce_ib": 5.3190412521362305, + "ce_orig": 1.3203554153442383, + "epoch": 1.0221439355812783, + "kl_loss": 0.1844039410352707, + "loss_ib": 0.007163080852478743, + "step": 3555 + }, + { + "ce_ib": 3.538116216659546, + "ce_orig": 0.8043896555900574, + "epoch": 1.0221439355812783, + "kl_loss": 0.11457914113998413, + "loss_ib": 0.004683907609432936, + "step": 3555 + }, + { + "ce_ib": 5.433038711547852, + "ce_orig": 1.2407373189926147, + "epoch": 1.0221439355812783, + "kl_loss": 0.18587413430213928, + "loss_ib": 0.007291780319064856, + "step": 3555 + }, + { + "ce_ib": 2.819953441619873, + "ce_orig": 0.9925054907798767, + "epoch": 1.0221439355812783, + "kl_loss": 0.1740136444568634, + "loss_ib": 0.004560090135782957, + "step": 3555 + }, + { + "ce_ib": 3.2069294452667236, + "ce_orig": 0.44566258788108826, + "epoch": 1.022431519160256, + "kl_loss": 0.1497569978237152, + "loss_ib": 0.004704499617218971, + "step": 3556 + }, + { + "ce_ib": 3.2776615619659424, + "ce_orig": 0.7681499719619751, + "epoch": 1.022431519160256, + "kl_loss": 0.2908075451850891, + "loss_ib": 0.006185736507177353, + "step": 3556 + }, + { + "ce_ib": 4.398695945739746, + "ce_orig": 0.6551598906517029, + "epoch": 1.022431519160256, + "kl_loss": 0.22090700268745422, + "loss_ib": 0.006607765797525644, + "step": 3556 + }, + { + "ce_ib": 4.12183952331543, + "ce_orig": 0.9148684144020081, + "epoch": 1.022431519160256, + "kl_loss": 0.1262260526418686, + "loss_ib": 0.0053841001354157925, + "step": 3556 + }, + { + "ce_ib": 0.7549005150794983, + "ce_orig": 0.217503160238266, + "epoch": 1.0227191027392335, + "kl_loss": 0.29697391390800476, + "loss_ib": 0.0037246395368129015, + "step": 3557 + }, + { + "ce_ib": 4.515927314758301, + "ce_orig": 1.2338817119598389, + "epoch": 1.0227191027392335, + "kl_loss": 0.15208227932453156, + "loss_ib": 0.006036750040948391, + "step": 3557 + }, + { + "ce_ib": 3.0932743549346924, + "ce_orig": 0.8213307857513428, + "epoch": 1.0227191027392335, + "kl_loss": 0.14908424019813538, + "loss_ib": 0.0045841168612241745, + "step": 3557 + }, + { + "ce_ib": 1.3813655376434326, + "ce_orig": 0.28699836134910583, + "epoch": 1.0227191027392335, + "kl_loss": 0.2905579209327698, + "loss_ib": 0.00428694486618042, + "step": 3557 + }, + { + "ce_ib": 3.3761448860168457, + "ce_orig": 1.0946680307388306, + "epoch": 1.0230066863182112, + "kl_loss": 0.12342475354671478, + "loss_ib": 0.004610392265021801, + "step": 3558 + }, + { + "ce_ib": 5.53902530670166, + "ce_orig": 1.2944347858428955, + "epoch": 1.0230066863182112, + "kl_loss": 0.18283504247665405, + "loss_ib": 0.007367375772446394, + "step": 3558 + }, + { + "ce_ib": 3.019558906555176, + "ce_orig": 0.9459133744239807, + "epoch": 1.0230066863182112, + "kl_loss": 0.16044744849205017, + "loss_ib": 0.004624033346772194, + "step": 3558 + }, + { + "ce_ib": 4.0231499671936035, + "ce_orig": 0.8064485788345337, + "epoch": 1.0230066863182112, + "kl_loss": 0.24305188655853271, + "loss_ib": 0.00645366869866848, + "step": 3558 + }, + { + "ce_ib": 4.0605692863464355, + "ce_orig": 0.9248575568199158, + "epoch": 1.023294269897189, + "kl_loss": 0.2189321666955948, + "loss_ib": 0.00624989066272974, + "step": 3559 + }, + { + "ce_ib": 2.43841814994812, + "ce_orig": 0.4447513818740845, + "epoch": 1.023294269897189, + "kl_loss": 0.1594141721725464, + "loss_ib": 0.00403255969285965, + "step": 3559 + }, + { + "ce_ib": 4.77946138381958, + "ce_orig": 0.9629703760147095, + "epoch": 1.023294269897189, + "kl_loss": 0.15404273569583893, + "loss_ib": 0.0063198888674378395, + "step": 3559 + }, + { + "ce_ib": 4.141188621520996, + "ce_orig": 0.9386536478996277, + "epoch": 1.023294269897189, + "kl_loss": 0.13965891301631927, + "loss_ib": 0.005537777207791805, + "step": 3559 + }, + { + "epoch": 1.0235818534761665, + "grad_norm": 0.13636243343353271, + "learning_rate": 7.67703677935813e-06, + "loss": 0.8736, + "step": 3560 + }, + { + "ce_ib": 1.8836454153060913, + "ce_orig": 0.5725927352905273, + "epoch": 1.0235818534761665, + "kl_loss": 0.11665846407413483, + "loss_ib": 0.0030502299778163433, + "step": 3560 + }, + { + "ce_ib": 4.252971649169922, + "ce_orig": 0.7153838872909546, + "epoch": 1.0235818534761665, + "kl_loss": 0.19907334446907043, + "loss_ib": 0.006243704818189144, + "step": 3560 + }, + { + "ce_ib": 2.062504529953003, + "ce_orig": 0.40146055817604065, + "epoch": 1.0235818534761665, + "kl_loss": 0.14185044169425964, + "loss_ib": 0.003481009043753147, + "step": 3560 + }, + { + "ce_ib": 4.061996936798096, + "ce_orig": 0.7397218346595764, + "epoch": 1.0235818534761665, + "kl_loss": 0.18927356600761414, + "loss_ib": 0.0059547326527535915, + "step": 3560 + }, + { + "ce_ib": 3.9903314113616943, + "ce_orig": 0.6988621354103088, + "epoch": 1.0238694370551442, + "kl_loss": 0.21265247464179993, + "loss_ib": 0.006116856355220079, + "step": 3561 + }, + { + "ce_ib": 4.098020076751709, + "ce_orig": 0.5702466368675232, + "epoch": 1.0238694370551442, + "kl_loss": 0.16309088468551636, + "loss_ib": 0.005728928837925196, + "step": 3561 + }, + { + "ce_ib": 3.7705578804016113, + "ce_orig": 1.032801866531372, + "epoch": 1.0238694370551442, + "kl_loss": 0.14415708184242249, + "loss_ib": 0.0052121286280453205, + "step": 3561 + }, + { + "ce_ib": 3.838831663131714, + "ce_orig": 0.7948209643363953, + "epoch": 1.0238694370551442, + "kl_loss": 0.15220695734024048, + "loss_ib": 0.005360901355743408, + "step": 3561 + }, + { + "ce_ib": 5.821230888366699, + "ce_orig": 1.4141626358032227, + "epoch": 1.0241570206341217, + "kl_loss": 0.1548716276884079, + "loss_ib": 0.007369947154074907, + "step": 3562 + }, + { + "ce_ib": 2.2505125999450684, + "ce_orig": 0.3376314342021942, + "epoch": 1.0241570206341217, + "kl_loss": 0.16671894490718842, + "loss_ib": 0.003917702008038759, + "step": 3562 + }, + { + "ce_ib": 3.233471393585205, + "ce_orig": 0.5013559460639954, + "epoch": 1.0241570206341217, + "kl_loss": 0.1231411024928093, + "loss_ib": 0.004464882425963879, + "step": 3562 + }, + { + "ce_ib": 2.1180944442749023, + "ce_orig": 0.24774692952632904, + "epoch": 1.0241570206341217, + "kl_loss": 0.18924841284751892, + "loss_ib": 0.004010578151792288, + "step": 3562 + }, + { + "ce_ib": 0.5853055119514465, + "ce_orig": 0.14948898553848267, + "epoch": 1.0244446042130995, + "kl_loss": 0.34144464135169983, + "loss_ib": 0.003999751526862383, + "step": 3563 + }, + { + "ce_ib": 2.4800498485565186, + "ce_orig": 0.7221322059631348, + "epoch": 1.0244446042130995, + "kl_loss": 0.12491056323051453, + "loss_ib": 0.003729155519977212, + "step": 3563 + }, + { + "ce_ib": 3.6526246070861816, + "ce_orig": 0.6231518983840942, + "epoch": 1.0244446042130995, + "kl_loss": 0.24929650127887726, + "loss_ib": 0.0061455899849534035, + "step": 3563 + }, + { + "ce_ib": 3.4142701625823975, + "ce_orig": 0.6718997955322266, + "epoch": 1.0244446042130995, + "kl_loss": 0.1652885377407074, + "loss_ib": 0.005067155230790377, + "step": 3563 + }, + { + "ce_ib": 5.041940689086914, + "ce_orig": 1.441393256187439, + "epoch": 1.024732187792077, + "kl_loss": 0.1458439826965332, + "loss_ib": 0.006500380579382181, + "step": 3564 + }, + { + "ce_ib": 3.7564237117767334, + "ce_orig": 0.730278491973877, + "epoch": 1.024732187792077, + "kl_loss": 0.2097810059785843, + "loss_ib": 0.0058542340993881226, + "step": 3564 + }, + { + "ce_ib": 2.5478994846343994, + "ce_orig": 0.7384859919548035, + "epoch": 1.024732187792077, + "kl_loss": 0.14610141515731812, + "loss_ib": 0.004008913412690163, + "step": 3564 + }, + { + "ce_ib": 2.3486950397491455, + "ce_orig": 0.6579396724700928, + "epoch": 1.024732187792077, + "kl_loss": 0.14637847244739532, + "loss_ib": 0.0038124797865748405, + "step": 3564 + }, + { + "epoch": 1.0250197713710547, + "grad_norm": 0.14574703574180603, + "learning_rate": 7.670478788929803e-06, + "loss": 0.833, + "step": 3565 + }, + { + "ce_ib": 3.4942543506622314, + "ce_orig": 0.4724753797054291, + "epoch": 1.0250197713710547, + "kl_loss": 0.2084721028804779, + "loss_ib": 0.005578975658863783, + "step": 3565 + }, + { + "ce_ib": 2.726609706878662, + "ce_orig": 0.6946508884429932, + "epoch": 1.0250197713710547, + "kl_loss": 0.10938698053359985, + "loss_ib": 0.0038204793818295, + "step": 3565 + }, + { + "ce_ib": 3.2694568634033203, + "ce_orig": 0.7791404128074646, + "epoch": 1.0250197713710547, + "kl_loss": 0.16441723704338074, + "loss_ib": 0.004913629032671452, + "step": 3565 + }, + { + "ce_ib": 2.3029088973999023, + "ce_orig": 0.6490177512168884, + "epoch": 1.0250197713710547, + "kl_loss": 0.1031242236495018, + "loss_ib": 0.003334151115268469, + "step": 3565 + }, + { + "ce_ib": 3.403209686279297, + "ce_orig": 0.7589555382728577, + "epoch": 1.0253073549500324, + "kl_loss": 0.1593720018863678, + "loss_ib": 0.004996929783374071, + "step": 3566 + }, + { + "ce_ib": 3.4721765518188477, + "ce_orig": 1.0446593761444092, + "epoch": 1.0253073549500324, + "kl_loss": 0.22656795382499695, + "loss_ib": 0.005737856030464172, + "step": 3566 + }, + { + "ce_ib": 3.4166488647460938, + "ce_orig": 0.40411290526390076, + "epoch": 1.0253073549500324, + "kl_loss": 0.22270962595939636, + "loss_ib": 0.005643744952976704, + "step": 3566 + }, + { + "ce_ib": 3.4814889430999756, + "ce_orig": 0.4799364507198334, + "epoch": 1.0253073549500324, + "kl_loss": 0.20836088061332703, + "loss_ib": 0.005565098021179438, + "step": 3566 + }, + { + "ce_ib": 3.738161087036133, + "ce_orig": 1.06916081905365, + "epoch": 1.02559493852901, + "kl_loss": 0.17451772093772888, + "loss_ib": 0.005483338143676519, + "step": 3567 + }, + { + "ce_ib": 3.300025701522827, + "ce_orig": 0.6971123218536377, + "epoch": 1.02559493852901, + "kl_loss": 0.17081120610237122, + "loss_ib": 0.005008137784898281, + "step": 3567 + }, + { + "ce_ib": 3.814629077911377, + "ce_orig": 0.783237636089325, + "epoch": 1.02559493852901, + "kl_loss": 0.2023596465587616, + "loss_ib": 0.00583822512999177, + "step": 3567 + }, + { + "ce_ib": 3.295074939727783, + "ce_orig": 0.6047444343566895, + "epoch": 1.02559493852901, + "kl_loss": 0.26220688223838806, + "loss_ib": 0.005917143542319536, + "step": 3567 + }, + { + "ce_ib": 5.815857410430908, + "ce_orig": 1.1306426525115967, + "epoch": 1.0258825221079877, + "kl_loss": 0.3660396337509155, + "loss_ib": 0.009476253762841225, + "step": 3568 + }, + { + "ce_ib": 5.304281711578369, + "ce_orig": 1.236378788948059, + "epoch": 1.0258825221079877, + "kl_loss": 0.13986220955848694, + "loss_ib": 0.006702903192490339, + "step": 3568 + }, + { + "ce_ib": 2.83135724067688, + "ce_orig": 0.6478382349014282, + "epoch": 1.0258825221079877, + "kl_loss": 0.2232898771762848, + "loss_ib": 0.005064256023615599, + "step": 3568 + }, + { + "ce_ib": 2.7747020721435547, + "ce_orig": 0.5721414089202881, + "epoch": 1.0258825221079877, + "kl_loss": 0.18130865693092346, + "loss_ib": 0.004587788600474596, + "step": 3568 + }, + { + "ce_ib": 2.931657552719116, + "ce_orig": 0.8471775650978088, + "epoch": 1.0261701056869652, + "kl_loss": 0.12483466416597366, + "loss_ib": 0.004180004354566336, + "step": 3569 + }, + { + "ce_ib": 1.7408854961395264, + "ce_orig": 0.4966096580028534, + "epoch": 1.0261701056869652, + "kl_loss": 0.1740100383758545, + "loss_ib": 0.0034809857606887817, + "step": 3569 + }, + { + "ce_ib": 5.4163384437561035, + "ce_orig": 1.2797764539718628, + "epoch": 1.0261701056869652, + "kl_loss": 0.1754576861858368, + "loss_ib": 0.007170915603637695, + "step": 3569 + }, + { + "ce_ib": 5.568865776062012, + "ce_orig": 1.2693753242492676, + "epoch": 1.0261701056869652, + "kl_loss": 0.14386223256587982, + "loss_ib": 0.007007488515228033, + "step": 3569 + }, + { + "epoch": 1.026457689265943, + "grad_norm": 0.15481750667095184, + "learning_rate": 7.663914364698241e-06, + "loss": 0.8006, + "step": 3570 + }, + { + "ce_ib": 4.684581279754639, + "ce_orig": 1.1793394088745117, + "epoch": 1.026457689265943, + "kl_loss": 0.15456141531467438, + "loss_ib": 0.006230195518583059, + "step": 3570 + }, + { + "ce_ib": 3.9316017627716064, + "ce_orig": 1.0096546411514282, + "epoch": 1.026457689265943, + "kl_loss": 0.13924744725227356, + "loss_ib": 0.005324076395481825, + "step": 3570 + }, + { + "ce_ib": 4.19818639755249, + "ce_orig": 1.007498860359192, + "epoch": 1.026457689265943, + "kl_loss": 0.09716513007879257, + "loss_ib": 0.005169837269932032, + "step": 3570 + }, + { + "ce_ib": 4.439659595489502, + "ce_orig": 0.9071674346923828, + "epoch": 1.026457689265943, + "kl_loss": 0.15564844012260437, + "loss_ib": 0.005996143911033869, + "step": 3570 + }, + { + "ce_ib": 3.469228744506836, + "ce_orig": 0.6971327066421509, + "epoch": 1.0267452728449205, + "kl_loss": 0.1314697563648224, + "loss_ib": 0.00478392606601119, + "step": 3571 + }, + { + "ce_ib": 5.2460808753967285, + "ce_orig": 0.6593372225761414, + "epoch": 1.0267452728449205, + "kl_loss": 0.3018879294395447, + "loss_ib": 0.008264959789812565, + "step": 3571 + }, + { + "ce_ib": 4.2574615478515625, + "ce_orig": 1.077628493309021, + "epoch": 1.0267452728449205, + "kl_loss": 0.13666090369224548, + "loss_ib": 0.005624070763587952, + "step": 3571 + }, + { + "ce_ib": 4.30481481552124, + "ce_orig": 1.152515172958374, + "epoch": 1.0267452728449205, + "kl_loss": 0.1901816725730896, + "loss_ib": 0.006206631660461426, + "step": 3571 + }, + { + "ce_ib": 5.789363861083984, + "ce_orig": 1.3910146951675415, + "epoch": 1.0270328564238982, + "kl_loss": 0.23146182298660278, + "loss_ib": 0.008103981614112854, + "step": 3572 + }, + { + "ce_ib": 4.9136810302734375, + "ce_orig": 1.2784343957901, + "epoch": 1.0270328564238982, + "kl_loss": 0.20876017212867737, + "loss_ib": 0.00700128311291337, + "step": 3572 + }, + { + "ce_ib": 2.74505877494812, + "ce_orig": 0.8600686192512512, + "epoch": 1.0270328564238982, + "kl_loss": 0.11101563274860382, + "loss_ib": 0.0038552151527255774, + "step": 3572 + }, + { + "ce_ib": 3.5076346397399902, + "ce_orig": 1.2754464149475098, + "epoch": 1.0270328564238982, + "kl_loss": 0.12729036808013916, + "loss_ib": 0.004780538380146027, + "step": 3572 + }, + { + "ce_ib": 7.02449893951416, + "ce_orig": 1.5592243671417236, + "epoch": 1.027320440002876, + "kl_loss": 0.23047158122062683, + "loss_ib": 0.009329214692115784, + "step": 3573 + }, + { + "ce_ib": 3.693525552749634, + "ce_orig": 1.1303540468215942, + "epoch": 1.027320440002876, + "kl_loss": 0.11344291269779205, + "loss_ib": 0.0048279548063874245, + "step": 3573 + }, + { + "ce_ib": 2.8311381340026855, + "ce_orig": 0.6611068844795227, + "epoch": 1.027320440002876, + "kl_loss": 0.15192574262619019, + "loss_ib": 0.004350395407527685, + "step": 3573 + }, + { + "ce_ib": 1.9369728565216064, + "ce_orig": 0.4443923532962799, + "epoch": 1.027320440002876, + "kl_loss": 0.22754089534282684, + "loss_ib": 0.004212381783872843, + "step": 3573 + }, + { + "ce_ib": 4.324404239654541, + "ce_orig": 0.7201270461082458, + "epoch": 1.0276080235818534, + "kl_loss": 0.1666584014892578, + "loss_ib": 0.0059909881092607975, + "step": 3574 + }, + { + "ce_ib": 2.6163835525512695, + "ce_orig": 0.5220433473587036, + "epoch": 1.0276080235818534, + "kl_loss": 0.20118609070777893, + "loss_ib": 0.004628244321793318, + "step": 3574 + }, + { + "ce_ib": 2.6262576580047607, + "ce_orig": 0.7285624742507935, + "epoch": 1.0276080235818534, + "kl_loss": 0.07863470166921616, + "loss_ib": 0.003412604797631502, + "step": 3574 + }, + { + "ce_ib": 2.0215556621551514, + "ce_orig": 0.4623289108276367, + "epoch": 1.0276080235818534, + "kl_loss": 0.156147763133049, + "loss_ib": 0.00358303333632648, + "step": 3574 + }, + { + "epoch": 1.0278956071608312, + "grad_norm": 0.14367720484733582, + "learning_rate": 7.657343522478666e-06, + "loss": 0.9071, + "step": 3575 + }, + { + "ce_ib": 2.367799997329712, + "ce_orig": 0.7768823504447937, + "epoch": 1.0278956071608312, + "kl_loss": 0.17501166462898254, + "loss_ib": 0.00411791680380702, + "step": 3575 + }, + { + "ce_ib": 4.1678466796875, + "ce_orig": 0.9290083050727844, + "epoch": 1.0278956071608312, + "kl_loss": 0.1940726637840271, + "loss_ib": 0.006108573637902737, + "step": 3575 + }, + { + "ce_ib": 5.8375701904296875, + "ce_orig": 1.5659857988357544, + "epoch": 1.0278956071608312, + "kl_loss": 0.12143594026565552, + "loss_ib": 0.007051929831504822, + "step": 3575 + }, + { + "ce_ib": 4.767032146453857, + "ce_orig": 1.2729154825210571, + "epoch": 1.0278956071608312, + "kl_loss": 0.1925138235092163, + "loss_ib": 0.006692170165479183, + "step": 3575 + }, + { + "ce_ib": 3.0743308067321777, + "ce_orig": 0.7151661515235901, + "epoch": 1.0281831907398087, + "kl_loss": 0.16070225834846497, + "loss_ib": 0.004681353457272053, + "step": 3576 + }, + { + "ce_ib": 2.6232314109802246, + "ce_orig": 0.7253584265708923, + "epoch": 1.0281831907398087, + "kl_loss": 0.14212913811206818, + "loss_ib": 0.0040445225313305855, + "step": 3576 + }, + { + "ce_ib": 4.521548748016357, + "ce_orig": 0.9446025490760803, + "epoch": 1.0281831907398087, + "kl_loss": 0.21381030976772308, + "loss_ib": 0.006659651640802622, + "step": 3576 + }, + { + "ce_ib": 3.370269536972046, + "ce_orig": 0.7937721014022827, + "epoch": 1.0281831907398087, + "kl_loss": 0.24521715939044952, + "loss_ib": 0.005822441074997187, + "step": 3576 + }, + { + "ce_ib": 4.694420337677002, + "ce_orig": 1.3098711967468262, + "epoch": 1.0284707743187864, + "kl_loss": 0.22214581072330475, + "loss_ib": 0.006915878504514694, + "step": 3577 + }, + { + "ce_ib": 4.648372650146484, + "ce_orig": 0.9450826644897461, + "epoch": 1.0284707743187864, + "kl_loss": 0.17117208242416382, + "loss_ib": 0.006360093131661415, + "step": 3577 + }, + { + "ce_ib": 4.483219146728516, + "ce_orig": 0.9785233736038208, + "epoch": 1.0284707743187864, + "kl_loss": 0.14928585290908813, + "loss_ib": 0.005976077169179916, + "step": 3577 + }, + { + "ce_ib": 2.435619354248047, + "ce_orig": 0.687466561794281, + "epoch": 1.0284707743187864, + "kl_loss": 0.1445489525794983, + "loss_ib": 0.0038811087142676115, + "step": 3577 + }, + { + "ce_ib": 2.1030609607696533, + "ce_orig": 0.6168878674507141, + "epoch": 1.028758357897764, + "kl_loss": 0.14353352785110474, + "loss_ib": 0.0035383962094783783, + "step": 3578 + }, + { + "ce_ib": 2.35880184173584, + "ce_orig": 0.6521413326263428, + "epoch": 1.028758357897764, + "kl_loss": 0.16368362307548523, + "loss_ib": 0.003995637875050306, + "step": 3578 + }, + { + "ce_ib": 3.47455096244812, + "ce_orig": 0.6910495162010193, + "epoch": 1.028758357897764, + "kl_loss": 0.16417807340621948, + "loss_ib": 0.0051163313910365105, + "step": 3578 + }, + { + "ce_ib": 2.942774534225464, + "ce_orig": 0.5404086709022522, + "epoch": 1.028758357897764, + "kl_loss": 0.5495249629020691, + "loss_ib": 0.008438024669885635, + "step": 3578 + }, + { + "ce_ib": 5.049183368682861, + "ce_orig": 1.090328574180603, + "epoch": 1.0290459414767417, + "kl_loss": 0.1507694125175476, + "loss_ib": 0.00655687740072608, + "step": 3579 + }, + { + "ce_ib": 6.413918972015381, + "ce_orig": 1.2282706499099731, + "epoch": 1.0290459414767417, + "kl_loss": 0.14826750755310059, + "loss_ib": 0.007896593771874905, + "step": 3579 + }, + { + "ce_ib": 1.4721908569335938, + "ce_orig": 0.17029470205307007, + "epoch": 1.0290459414767417, + "kl_loss": 0.11890940368175507, + "loss_ib": 0.0026612847577780485, + "step": 3579 + }, + { + "ce_ib": 4.702784061431885, + "ce_orig": 1.0344642400741577, + "epoch": 1.0290459414767417, + "kl_loss": 0.18314868211746216, + "loss_ib": 0.006534270942211151, + "step": 3579 + }, + { + "epoch": 1.0293335250557194, + "grad_norm": 0.15805251896381378, + "learning_rate": 7.650766278101762e-06, + "loss": 0.7745, + "step": 3580 + }, + { + "ce_ib": 3.8076834678649902, + "ce_orig": 0.744575023651123, + "epoch": 1.0293335250557194, + "kl_loss": 0.21281646192073822, + "loss_ib": 0.0059358482249081135, + "step": 3580 + }, + { + "ce_ib": 6.406406402587891, + "ce_orig": 1.1799380779266357, + "epoch": 1.0293335250557194, + "kl_loss": 0.15970022976398468, + "loss_ib": 0.008003409020602703, + "step": 3580 + }, + { + "ce_ib": 2.1985936164855957, + "ce_orig": 0.5938782691955566, + "epoch": 1.0293335250557194, + "kl_loss": 0.1539672166109085, + "loss_ib": 0.003738265484571457, + "step": 3580 + }, + { + "ce_ib": 3.6236820220947266, + "ce_orig": 0.8295383453369141, + "epoch": 1.0293335250557194, + "kl_loss": 0.16882243752479553, + "loss_ib": 0.005311906337738037, + "step": 3580 + }, + { + "ce_ib": 4.4730072021484375, + "ce_orig": 1.0211290121078491, + "epoch": 1.029621108634697, + "kl_loss": 0.14912523329257965, + "loss_ib": 0.005964259151369333, + "step": 3581 + }, + { + "ce_ib": 4.5884294509887695, + "ce_orig": 1.089942455291748, + "epoch": 1.029621108634697, + "kl_loss": 0.1319698691368103, + "loss_ib": 0.005908127874135971, + "step": 3581 + }, + { + "ce_ib": 4.095072269439697, + "ce_orig": 1.3554927110671997, + "epoch": 1.029621108634697, + "kl_loss": 0.1761060357093811, + "loss_ib": 0.005856132600456476, + "step": 3581 + }, + { + "ce_ib": 3.233750104904175, + "ce_orig": 0.6547194719314575, + "epoch": 1.029621108634697, + "kl_loss": 0.24470838904380798, + "loss_ib": 0.0056808339431881905, + "step": 3581 + }, + { + "ce_ib": 4.167361736297607, + "ce_orig": 0.7291533350944519, + "epoch": 1.0299086922136746, + "kl_loss": 0.17558789253234863, + "loss_ib": 0.0059232404455542564, + "step": 3582 + }, + { + "ce_ib": 6.73720645904541, + "ce_orig": 1.7032719850540161, + "epoch": 1.0299086922136746, + "kl_loss": 0.15487733483314514, + "loss_ib": 0.008285979740321636, + "step": 3582 + }, + { + "ce_ib": 3.6537673473358154, + "ce_orig": 0.9220221042633057, + "epoch": 1.0299086922136746, + "kl_loss": 0.16936811804771423, + "loss_ib": 0.005347448401153088, + "step": 3582 + }, + { + "ce_ib": 3.91314697265625, + "ce_orig": 0.6997650861740112, + "epoch": 1.0299086922136746, + "kl_loss": 0.07356799393892288, + "loss_ib": 0.004648827016353607, + "step": 3582 + }, + { + "ce_ib": 4.798478603363037, + "ce_orig": 1.340589165687561, + "epoch": 1.0301962757926522, + "kl_loss": 0.18110913038253784, + "loss_ib": 0.0066095697693526745, + "step": 3583 + }, + { + "ce_ib": 2.26645827293396, + "ce_orig": 0.67094486951828, + "epoch": 1.0301962757926522, + "kl_loss": 0.13256940245628357, + "loss_ib": 0.0035921521484851837, + "step": 3583 + }, + { + "ce_ib": 4.926717758178711, + "ce_orig": 1.3289254903793335, + "epoch": 1.0301962757926522, + "kl_loss": 0.3271638751029968, + "loss_ib": 0.008198356255888939, + "step": 3583 + }, + { + "ce_ib": 5.680703639984131, + "ce_orig": 1.1199407577514648, + "epoch": 1.0301962757926522, + "kl_loss": 0.1929539144039154, + "loss_ib": 0.00761024234816432, + "step": 3583 + }, + { + "ce_ib": 4.394170761108398, + "ce_orig": 0.9200822710990906, + "epoch": 1.03048385937163, + "kl_loss": 0.13463452458381653, + "loss_ib": 0.005740515887737274, + "step": 3584 + }, + { + "ce_ib": 6.152550220489502, + "ce_orig": 0.6629872918128967, + "epoch": 1.03048385937163, + "kl_loss": 0.17014604806900024, + "loss_ib": 0.007854010909795761, + "step": 3584 + }, + { + "ce_ib": 2.9387927055358887, + "ce_orig": 0.8070468306541443, + "epoch": 1.03048385937163, + "kl_loss": 0.17054513096809387, + "loss_ib": 0.004644243977963924, + "step": 3584 + }, + { + "ce_ib": 2.4993042945861816, + "ce_orig": 0.6782501339912415, + "epoch": 1.03048385937163, + "kl_loss": 0.13680613040924072, + "loss_ib": 0.003867365652695298, + "step": 3584 + }, + { + "epoch": 1.0307714429506074, + "grad_norm": 0.1946462243795395, + "learning_rate": 7.64418264741364e-06, + "loss": 0.88, + "step": 3585 + }, + { + "ce_ib": 2.262641191482544, + "ce_orig": 0.5617938041687012, + "epoch": 1.0307714429506074, + "kl_loss": 0.12841811776161194, + "loss_ib": 0.003546822350472212, + "step": 3585 + }, + { + "ce_ib": 2.7894558906555176, + "ce_orig": 0.3876591920852661, + "epoch": 1.0307714429506074, + "kl_loss": 0.14859238266944885, + "loss_ib": 0.004275379702448845, + "step": 3585 + }, + { + "ce_ib": 3.1040008068084717, + "ce_orig": 0.7984561324119568, + "epoch": 1.0307714429506074, + "kl_loss": 0.19806858897209167, + "loss_ib": 0.005084686912596226, + "step": 3585 + }, + { + "ce_ib": 2.965524911880493, + "ce_orig": 0.42288023233413696, + "epoch": 1.0307714429506074, + "kl_loss": 0.16768112778663635, + "loss_ib": 0.004642336163669825, + "step": 3585 + }, + { + "ce_ib": 1.7503032684326172, + "ce_orig": 0.31008610129356384, + "epoch": 1.0310590265295851, + "kl_loss": 0.10308778285980225, + "loss_ib": 0.002781181363388896, + "step": 3586 + }, + { + "ce_ib": 4.009672164916992, + "ce_orig": 1.2113614082336426, + "epoch": 1.0310590265295851, + "kl_loss": 0.14576566219329834, + "loss_ib": 0.005467328708618879, + "step": 3586 + }, + { + "ce_ib": 4.761468887329102, + "ce_orig": 1.4155570268630981, + "epoch": 1.0310590265295851, + "kl_loss": 0.17848393321037292, + "loss_ib": 0.006546308286488056, + "step": 3586 + }, + { + "ce_ib": 4.23127555847168, + "ce_orig": 0.8098082542419434, + "epoch": 1.0310590265295851, + "kl_loss": 0.21048732101917267, + "loss_ib": 0.006336148828268051, + "step": 3586 + }, + { + "ce_ib": 3.517646074295044, + "ce_orig": 0.9048468470573425, + "epoch": 1.0313466101085629, + "kl_loss": 0.10591025650501251, + "loss_ib": 0.004576748702675104, + "step": 3587 + }, + { + "ce_ib": 2.268552541732788, + "ce_orig": 0.6489800810813904, + "epoch": 1.0313466101085629, + "kl_loss": 0.11131232976913452, + "loss_ib": 0.0033816760405898094, + "step": 3587 + }, + { + "ce_ib": 4.350583076477051, + "ce_orig": 1.2737723588943481, + "epoch": 1.0313466101085629, + "kl_loss": 0.22696109116077423, + "loss_ib": 0.006620194297283888, + "step": 3587 + }, + { + "ce_ib": 3.480614185333252, + "ce_orig": 0.7025856375694275, + "epoch": 1.0313466101085629, + "kl_loss": 0.19712403416633606, + "loss_ib": 0.005451854318380356, + "step": 3587 + }, + { + "ce_ib": 2.046537160873413, + "ce_orig": 0.4440819025039673, + "epoch": 1.0316341936875404, + "kl_loss": 0.18937671184539795, + "loss_ib": 0.003940304275602102, + "step": 3588 + }, + { + "ce_ib": 2.2807297706604004, + "ce_orig": 0.6426832675933838, + "epoch": 1.0316341936875404, + "kl_loss": 0.1830071061849594, + "loss_ib": 0.004110801033675671, + "step": 3588 + }, + { + "ce_ib": 3.13688325881958, + "ce_orig": 0.7831436991691589, + "epoch": 1.0316341936875404, + "kl_loss": 0.1790444254875183, + "loss_ib": 0.004927327390760183, + "step": 3588 + }, + { + "ce_ib": 2.262878656387329, + "ce_orig": 0.43579232692718506, + "epoch": 1.0316341936875404, + "kl_loss": 0.19101402163505554, + "loss_ib": 0.004173018969595432, + "step": 3588 + }, + { + "ce_ib": 5.015303611755371, + "ce_orig": 1.3950706720352173, + "epoch": 1.0319217772665181, + "kl_loss": 0.1301266849040985, + "loss_ib": 0.006316570099443197, + "step": 3589 + }, + { + "ce_ib": 2.5038137435913086, + "ce_orig": 0.6567334532737732, + "epoch": 1.0319217772665181, + "kl_loss": 0.14117710292339325, + "loss_ib": 0.003915584646165371, + "step": 3589 + }, + { + "ce_ib": 1.7225391864776611, + "ce_orig": 0.5046120882034302, + "epoch": 1.0319217772665181, + "kl_loss": 0.1303379237651825, + "loss_ib": 0.003025918500497937, + "step": 3589 + }, + { + "ce_ib": 2.116655111312866, + "ce_orig": 0.47218793630599976, + "epoch": 1.0319217772665181, + "kl_loss": 0.2086268961429596, + "loss_ib": 0.004202924203127623, + "step": 3589 + }, + { + "epoch": 1.0322093608454956, + "grad_norm": 0.14866966009140015, + "learning_rate": 7.637592646275792e-06, + "loss": 0.8118, + "step": 3590 + }, + { + "ce_ib": 1.1532553434371948, + "ce_orig": 0.11553885787725449, + "epoch": 1.0322093608454956, + "kl_loss": 0.2174244523048401, + "loss_ib": 0.0033274998422712088, + "step": 3590 + }, + { + "ce_ib": 2.7792015075683594, + "ce_orig": 0.7278241515159607, + "epoch": 1.0322093608454956, + "kl_loss": 0.29544389247894287, + "loss_ib": 0.005733639933168888, + "step": 3590 + }, + { + "ce_ib": 3.073111057281494, + "ce_orig": 0.572763204574585, + "epoch": 1.0322093608454956, + "kl_loss": 0.2997177541255951, + "loss_ib": 0.006070288363844156, + "step": 3590 + }, + { + "ce_ib": 4.5265655517578125, + "ce_orig": 1.0133230686187744, + "epoch": 1.0322093608454956, + "kl_loss": 0.2285437136888504, + "loss_ib": 0.006812002509832382, + "step": 3590 + }, + { + "ce_ib": 3.989344835281372, + "ce_orig": 1.0400378704071045, + "epoch": 1.0324969444244734, + "kl_loss": 0.2018832266330719, + "loss_ib": 0.006008176598697901, + "step": 3591 + }, + { + "ce_ib": 4.853291034698486, + "ce_orig": 1.2258327007293701, + "epoch": 1.0324969444244734, + "kl_loss": 0.1964568793773651, + "loss_ib": 0.006817860063165426, + "step": 3591 + }, + { + "ce_ib": 5.12047004699707, + "ce_orig": 0.8044964075088501, + "epoch": 1.0324969444244734, + "kl_loss": 0.1417456865310669, + "loss_ib": 0.006537926848977804, + "step": 3591 + }, + { + "ce_ib": 3.211007833480835, + "ce_orig": 0.7879824042320251, + "epoch": 1.0324969444244734, + "kl_loss": 0.1325056254863739, + "loss_ib": 0.004536064341664314, + "step": 3591 + }, + { + "ce_ib": 5.793774127960205, + "ce_orig": 1.7257083654403687, + "epoch": 1.032784528003451, + "kl_loss": 0.20947518944740295, + "loss_ib": 0.00788852572441101, + "step": 3592 + }, + { + "ce_ib": 4.412404537200928, + "ce_orig": 0.8841373324394226, + "epoch": 1.032784528003451, + "kl_loss": 0.21082748472690582, + "loss_ib": 0.006520679220557213, + "step": 3592 + }, + { + "ce_ib": 3.0942201614379883, + "ce_orig": 0.5155574679374695, + "epoch": 1.032784528003451, + "kl_loss": 0.1363181471824646, + "loss_ib": 0.00445740157738328, + "step": 3592 + }, + { + "ce_ib": 2.9915356636047363, + "ce_orig": 0.7921051979064941, + "epoch": 1.032784528003451, + "kl_loss": 0.13952811062335968, + "loss_ib": 0.004386816639453173, + "step": 3592 + }, + { + "ce_ib": 1.794143557548523, + "ce_orig": 0.39192211627960205, + "epoch": 1.0330721115824286, + "kl_loss": 0.19036394357681274, + "loss_ib": 0.003697782987728715, + "step": 3593 + }, + { + "ce_ib": 4.015193462371826, + "ce_orig": 0.7751226425170898, + "epoch": 1.0330721115824286, + "kl_loss": 0.18080560863018036, + "loss_ib": 0.005823249462991953, + "step": 3593 + }, + { + "ce_ib": 4.676846027374268, + "ce_orig": 1.1917617321014404, + "epoch": 1.0330721115824286, + "kl_loss": 0.24173277616500854, + "loss_ib": 0.007094173226505518, + "step": 3593 + }, + { + "ce_ib": 4.084869384765625, + "ce_orig": 0.9402002692222595, + "epoch": 1.0330721115824286, + "kl_loss": 0.16982445120811462, + "loss_ib": 0.0057831136509776115, + "step": 3593 + }, + { + "ce_ib": 2.4548161029815674, + "ce_orig": 0.5282126069068909, + "epoch": 1.0333596951614064, + "kl_loss": 0.20231130719184875, + "loss_ib": 0.0044779288582503796, + "step": 3594 + }, + { + "ce_ib": 3.9081132411956787, + "ce_orig": 0.7748914957046509, + "epoch": 1.0333596951614064, + "kl_loss": 0.22273336350917816, + "loss_ib": 0.006135446950793266, + "step": 3594 + }, + { + "ce_ib": 2.9092109203338623, + "ce_orig": 0.6723921298980713, + "epoch": 1.0333596951614064, + "kl_loss": 0.07531952857971191, + "loss_ib": 0.0036624062340706587, + "step": 3594 + }, + { + "ce_ib": 3.60611629486084, + "ce_orig": 0.6374434232711792, + "epoch": 1.0333596951614064, + "kl_loss": 0.22367563843727112, + "loss_ib": 0.005842872895300388, + "step": 3594 + }, + { + "epoch": 1.0336472787403839, + "grad_norm": 0.15302863717079163, + "learning_rate": 7.63099629056506e-06, + "loss": 0.8467, + "step": 3595 + }, + { + "ce_ib": 2.812412977218628, + "ce_orig": 0.7333006858825684, + "epoch": 1.0336472787403839, + "kl_loss": 0.12292232364416122, + "loss_ib": 0.004041636362671852, + "step": 3595 + }, + { + "ce_ib": 3.554811954498291, + "ce_orig": 1.069047212600708, + "epoch": 1.0336472787403839, + "kl_loss": 0.2857212424278259, + "loss_ib": 0.0064120241440832615, + "step": 3595 + }, + { + "ce_ib": 4.191186428070068, + "ce_orig": 0.7377612590789795, + "epoch": 1.0336472787403839, + "kl_loss": 0.27299198508262634, + "loss_ib": 0.006921106483787298, + "step": 3595 + }, + { + "ce_ib": 5.97336483001709, + "ce_orig": 1.6930336952209473, + "epoch": 1.0336472787403839, + "kl_loss": 0.2521768808364868, + "loss_ib": 0.008495133370161057, + "step": 3595 + }, + { + "ce_ib": 1.5423794984817505, + "ce_orig": 0.5520162582397461, + "epoch": 1.0339348623193616, + "kl_loss": 0.12812669575214386, + "loss_ib": 0.0028236466459929943, + "step": 3596 + }, + { + "ce_ib": 5.790314197540283, + "ce_orig": 1.3324220180511475, + "epoch": 1.0339348623193616, + "kl_loss": 0.1921837031841278, + "loss_ib": 0.0077121504582464695, + "step": 3596 + }, + { + "ce_ib": 3.491847276687622, + "ce_orig": 1.0726336240768433, + "epoch": 1.0339348623193616, + "kl_loss": 0.1639743447303772, + "loss_ib": 0.0051315901800990105, + "step": 3596 + }, + { + "ce_ib": 5.219526290893555, + "ce_orig": 1.257354736328125, + "epoch": 1.0339348623193616, + "kl_loss": 0.1612309068441391, + "loss_ib": 0.006831835489720106, + "step": 3596 + }, + { + "ce_ib": 2.995332717895508, + "ce_orig": 0.4341762363910675, + "epoch": 1.0342224458983391, + "kl_loss": 0.08784681558609009, + "loss_ib": 0.0038738008588552475, + "step": 3597 + }, + { + "ce_ib": 4.13518762588501, + "ce_orig": 1.0229902267456055, + "epoch": 1.0342224458983391, + "kl_loss": 0.17407625913619995, + "loss_ib": 0.005875949747860432, + "step": 3597 + }, + { + "ce_ib": 4.02907657623291, + "ce_orig": 0.533166766166687, + "epoch": 1.0342224458983391, + "kl_loss": 0.22527018189430237, + "loss_ib": 0.006281778682023287, + "step": 3597 + }, + { + "ce_ib": 3.3100967407226562, + "ce_orig": 1.0533865690231323, + "epoch": 1.0342224458983391, + "kl_loss": 0.1127241998910904, + "loss_ib": 0.004437338560819626, + "step": 3597 + }, + { + "ce_ib": 5.001669883728027, + "ce_orig": 1.301680564880371, + "epoch": 1.0345100294773169, + "kl_loss": 0.11535670608282089, + "loss_ib": 0.006155236624181271, + "step": 3598 + }, + { + "ce_ib": 6.318354606628418, + "ce_orig": 1.53036630153656, + "epoch": 1.0345100294773169, + "kl_loss": 0.2000124156475067, + "loss_ib": 0.008318479172885418, + "step": 3598 + }, + { + "ce_ib": 6.281805992126465, + "ce_orig": 1.663295865058899, + "epoch": 1.0345100294773169, + "kl_loss": 0.21558260917663574, + "loss_ib": 0.008437632583081722, + "step": 3598 + }, + { + "ce_ib": 1.9627292156219482, + "ce_orig": 0.4293214976787567, + "epoch": 1.0345100294773169, + "kl_loss": 0.1754666566848755, + "loss_ib": 0.0037173954769968987, + "step": 3598 + }, + { + "ce_ib": 5.302921772003174, + "ce_orig": 1.11689293384552, + "epoch": 1.0347976130562946, + "kl_loss": 0.16721583902835846, + "loss_ib": 0.006975080352276564, + "step": 3599 + }, + { + "ce_ib": 4.342101573944092, + "ce_orig": 1.0900191068649292, + "epoch": 1.0347976130562946, + "kl_loss": 0.20977860689163208, + "loss_ib": 0.00643988698720932, + "step": 3599 + }, + { + "ce_ib": 3.994821071624756, + "ce_orig": 0.7957486510276794, + "epoch": 1.0347976130562946, + "kl_loss": 0.1865224540233612, + "loss_ib": 0.005860045552253723, + "step": 3599 + }, + { + "ce_ib": 2.415705442428589, + "ce_orig": 0.6470205783843994, + "epoch": 1.0347976130562946, + "kl_loss": 0.18088434636592865, + "loss_ib": 0.004224549047648907, + "step": 3599 + }, + { + "epoch": 1.035085196635272, + "grad_norm": 0.12967762351036072, + "learning_rate": 7.624393596173598e-06, + "loss": 0.9132, + "step": 3600 + }, + { + "ce_ib": 3.4628283977508545, + "ce_orig": 0.8276459574699402, + "epoch": 1.035085196635272, + "kl_loss": 0.18228301405906677, + "loss_ib": 0.005285658873617649, + "step": 3600 + }, + { + "ce_ib": 2.857104778289795, + "ce_orig": 0.5559479594230652, + "epoch": 1.035085196635272, + "kl_loss": 0.12155002355575562, + "loss_ib": 0.004072605166584253, + "step": 3600 + }, + { + "ce_ib": 4.408941745758057, + "ce_orig": 0.9570815563201904, + "epoch": 1.035085196635272, + "kl_loss": 0.1516711711883545, + "loss_ib": 0.005925653502345085, + "step": 3600 + }, + { + "ce_ib": 2.59881854057312, + "ce_orig": 0.9157952070236206, + "epoch": 1.035085196635272, + "kl_loss": 0.1189519464969635, + "loss_ib": 0.0037883378099650145, + "step": 3600 + }, + { + "ce_ib": 4.093374729156494, + "ce_orig": 0.787654459476471, + "epoch": 1.0353727802142498, + "kl_loss": 0.17648643255233765, + "loss_ib": 0.005858238786458969, + "step": 3601 + }, + { + "ce_ib": 3.1360411643981934, + "ce_orig": 0.6991316676139832, + "epoch": 1.0353727802142498, + "kl_loss": 0.2945239543914795, + "loss_ib": 0.006081280764192343, + "step": 3601 + }, + { + "ce_ib": 3.0603394508361816, + "ce_orig": 0.8960056304931641, + "epoch": 1.0353727802142498, + "kl_loss": 0.14941520988941193, + "loss_ib": 0.004554491490125656, + "step": 3601 + }, + { + "ce_ib": 2.3848178386688232, + "ce_orig": 0.6515876054763794, + "epoch": 1.0353727802142498, + "kl_loss": 0.29692184925079346, + "loss_ib": 0.00535403611138463, + "step": 3601 + }, + { + "ce_ib": 4.11014461517334, + "ce_orig": 0.5882928967475891, + "epoch": 1.0356603637932273, + "kl_loss": 0.20375986397266388, + "loss_ib": 0.0061477432027459145, + "step": 3602 + }, + { + "ce_ib": 4.800549507141113, + "ce_orig": 1.5553240776062012, + "epoch": 1.0356603637932273, + "kl_loss": 0.12218072265386581, + "loss_ib": 0.006022356450557709, + "step": 3602 + }, + { + "ce_ib": 3.6528377532958984, + "ce_orig": 0.7968841791152954, + "epoch": 1.0356603637932273, + "kl_loss": 0.1729726642370224, + "loss_ib": 0.0053825643844902515, + "step": 3602 + }, + { + "ce_ib": 5.5807061195373535, + "ce_orig": 0.8860375285148621, + "epoch": 1.0356603637932273, + "kl_loss": 0.16759277880191803, + "loss_ib": 0.007256633602082729, + "step": 3602 + }, + { + "ce_ib": 5.473237037658691, + "ce_orig": 1.4077732563018799, + "epoch": 1.035947947372205, + "kl_loss": 0.17615102231502533, + "loss_ib": 0.007234747055917978, + "step": 3603 + }, + { + "ce_ib": 3.9375784397125244, + "ce_orig": 1.0439776182174683, + "epoch": 1.035947947372205, + "kl_loss": 0.1601138710975647, + "loss_ib": 0.00553871737793088, + "step": 3603 + }, + { + "ce_ib": 3.1198110580444336, + "ce_orig": 0.6395612359046936, + "epoch": 1.035947947372205, + "kl_loss": 0.15717889368534088, + "loss_ib": 0.004691599868237972, + "step": 3603 + }, + { + "ce_ib": 2.8842549324035645, + "ce_orig": 0.8312889337539673, + "epoch": 1.035947947372205, + "kl_loss": 0.09544116258621216, + "loss_ib": 0.003838666481897235, + "step": 3603 + }, + { + "ce_ib": 2.4198458194732666, + "ce_orig": 0.6561986207962036, + "epoch": 1.0362355309511826, + "kl_loss": 0.1382230967283249, + "loss_ib": 0.003802076680585742, + "step": 3604 + }, + { + "ce_ib": 1.7114713191986084, + "ce_orig": 0.5035542845726013, + "epoch": 1.0362355309511826, + "kl_loss": 0.1926886886358261, + "loss_ib": 0.003638358088210225, + "step": 3604 + }, + { + "ce_ib": 2.9916653633117676, + "ce_orig": 0.4726111590862274, + "epoch": 1.0362355309511826, + "kl_loss": 0.11186091601848602, + "loss_ib": 0.004110274370759726, + "step": 3604 + }, + { + "ce_ib": 4.1728105545043945, + "ce_orig": 1.078545093536377, + "epoch": 1.0362355309511826, + "kl_loss": 0.41519302129745483, + "loss_ib": 0.008324741385877132, + "step": 3604 + }, + { + "epoch": 1.0365231145301603, + "grad_norm": 0.13502448797225952, + "learning_rate": 7.617784579008829e-06, + "loss": 0.8815, + "step": 3605 + }, + { + "ce_ib": 4.048615455627441, + "ce_orig": 1.1827644109725952, + "epoch": 1.0365231145301603, + "kl_loss": 0.10706819593906403, + "loss_ib": 0.0051192971877753735, + "step": 3605 + }, + { + "ce_ib": 1.8677819967269897, + "ce_orig": 0.6191987991333008, + "epoch": 1.0365231145301603, + "kl_loss": 0.08658070862293243, + "loss_ib": 0.0027335891500115395, + "step": 3605 + }, + { + "ce_ib": 1.9578477144241333, + "ce_orig": 0.6169666051864624, + "epoch": 1.0365231145301603, + "kl_loss": 0.10110248625278473, + "loss_ib": 0.0029688726644963026, + "step": 3605 + }, + { + "ce_ib": 4.584970474243164, + "ce_orig": 1.0211849212646484, + "epoch": 1.0365231145301603, + "kl_loss": 0.21760551631450653, + "loss_ib": 0.006761025637388229, + "step": 3605 + }, + { + "ce_ib": 3.7217605113983154, + "ce_orig": 0.7339946627616882, + "epoch": 1.036810698109138, + "kl_loss": 0.16484682261943817, + "loss_ib": 0.005370228551328182, + "step": 3606 + }, + { + "ce_ib": 2.942458391189575, + "ce_orig": 0.694808304309845, + "epoch": 1.036810698109138, + "kl_loss": 0.20153895020484924, + "loss_ib": 0.004957847762852907, + "step": 3606 + }, + { + "ce_ib": 2.7448441982269287, + "ce_orig": 0.6304602026939392, + "epoch": 1.036810698109138, + "kl_loss": 0.14717306196689606, + "loss_ib": 0.004216575063765049, + "step": 3606 + }, + { + "ce_ib": 4.546078681945801, + "ce_orig": 0.8463358879089355, + "epoch": 1.036810698109138, + "kl_loss": 0.2869971990585327, + "loss_ib": 0.007416050415486097, + "step": 3606 + }, + { + "ce_ib": 3.184821605682373, + "ce_orig": 0.723363995552063, + "epoch": 1.0370982816881156, + "kl_loss": 0.12441817671060562, + "loss_ib": 0.004429003223776817, + "step": 3607 + }, + { + "ce_ib": 3.7470359802246094, + "ce_orig": 0.8613805174827576, + "epoch": 1.0370982816881156, + "kl_loss": 0.16837556660175323, + "loss_ib": 0.0054307919926941395, + "step": 3607 + }, + { + "ce_ib": 4.162301063537598, + "ce_orig": 0.7162800431251526, + "epoch": 1.0370982816881156, + "kl_loss": 0.2555719017982483, + "loss_ib": 0.006718020420521498, + "step": 3607 + }, + { + "ce_ib": 2.930645704269409, + "ce_orig": 0.6275238990783691, + "epoch": 1.0370982816881156, + "kl_loss": 0.12478743493556976, + "loss_ib": 0.004178519826382399, + "step": 3607 + }, + { + "ce_ib": 2.1399786472320557, + "ce_orig": 0.6445143222808838, + "epoch": 1.0373858652670933, + "kl_loss": 0.14293500781059265, + "loss_ib": 0.0035693286918103695, + "step": 3608 + }, + { + "ce_ib": 2.390820264816284, + "ce_orig": 0.7960323095321655, + "epoch": 1.0373858652670933, + "kl_loss": 0.13938087224960327, + "loss_ib": 0.003784629050642252, + "step": 3608 + }, + { + "ce_ib": 6.625231742858887, + "ce_orig": 1.697510838508606, + "epoch": 1.0373858652670933, + "kl_loss": 0.2854168117046356, + "loss_ib": 0.009479399770498276, + "step": 3608 + }, + { + "ce_ib": 1.6207829713821411, + "ce_orig": 0.3134153485298157, + "epoch": 1.0373858652670933, + "kl_loss": 0.3790823221206665, + "loss_ib": 0.005411606281995773, + "step": 3608 + }, + { + "ce_ib": 3.7508723735809326, + "ce_orig": 1.180008053779602, + "epoch": 1.0376734488460708, + "kl_loss": 0.12446802854537964, + "loss_ib": 0.004995552357286215, + "step": 3609 + }, + { + "ce_ib": 1.8564928770065308, + "ce_orig": 0.5005015134811401, + "epoch": 1.0376734488460708, + "kl_loss": 0.10688474029302597, + "loss_ib": 0.002925340086221695, + "step": 3609 + }, + { + "ce_ib": 6.175436496734619, + "ce_orig": 1.024315595626831, + "epoch": 1.0376734488460708, + "kl_loss": 0.21099622547626495, + "loss_ib": 0.008285398595035076, + "step": 3609 + }, + { + "ce_ib": 2.3822133541107178, + "ce_orig": 0.7041109204292297, + "epoch": 1.0376734488460708, + "kl_loss": 0.12260394543409348, + "loss_ib": 0.0036082526203244925, + "step": 3609 + }, + { + "epoch": 1.0379610324250486, + "grad_norm": 0.1491369754076004, + "learning_rate": 7.611169254993407e-06, + "loss": 0.8434, + "step": 3610 + }, + { + "ce_ib": 2.7315685749053955, + "ce_orig": 0.717684268951416, + "epoch": 1.0379610324250486, + "kl_loss": 0.14781257510185242, + "loss_ib": 0.0042096939869225025, + "step": 3610 + }, + { + "ce_ib": 4.396188735961914, + "ce_orig": 1.0201442241668701, + "epoch": 1.0379610324250486, + "kl_loss": 0.12830573320388794, + "loss_ib": 0.005679246038198471, + "step": 3610 + }, + { + "ce_ib": 2.6831612586975098, + "ce_orig": 0.7254563570022583, + "epoch": 1.0379610324250486, + "kl_loss": 0.1655263602733612, + "loss_ib": 0.004338424652814865, + "step": 3610 + }, + { + "ce_ib": 5.895235061645508, + "ce_orig": 1.00615394115448, + "epoch": 1.0379610324250486, + "kl_loss": 0.2686675786972046, + "loss_ib": 0.008581910282373428, + "step": 3610 + }, + { + "ce_ib": 3.85986590385437, + "ce_orig": 0.7758864164352417, + "epoch": 1.038248616004026, + "kl_loss": 0.14058718085289001, + "loss_ib": 0.005265737883746624, + "step": 3611 + }, + { + "ce_ib": 5.276226997375488, + "ce_orig": 1.205343246459961, + "epoch": 1.038248616004026, + "kl_loss": 0.23336853086948395, + "loss_ib": 0.007609912194311619, + "step": 3611 + }, + { + "ce_ib": 5.545557498931885, + "ce_orig": 1.5514147281646729, + "epoch": 1.038248616004026, + "kl_loss": 0.18195724487304688, + "loss_ib": 0.007365129888057709, + "step": 3611 + }, + { + "ce_ib": 4.852670669555664, + "ce_orig": 1.1876533031463623, + "epoch": 1.038248616004026, + "kl_loss": 0.25097906589508057, + "loss_ib": 0.007362460717558861, + "step": 3611 + }, + { + "ce_ib": 2.3194727897644043, + "ce_orig": 0.6568465232849121, + "epoch": 1.0385361995830038, + "kl_loss": 0.14433136582374573, + "loss_ib": 0.003762786276638508, + "step": 3612 + }, + { + "ce_ib": 4.251423358917236, + "ce_orig": 0.5566037893295288, + "epoch": 1.0385361995830038, + "kl_loss": 0.20305600762367249, + "loss_ib": 0.006281983572989702, + "step": 3612 + }, + { + "ce_ib": 2.165109872817993, + "ce_orig": 0.45617061853408813, + "epoch": 1.0385361995830038, + "kl_loss": 0.11835142970085144, + "loss_ib": 0.003348624100908637, + "step": 3612 + }, + { + "ce_ib": 3.6414670944213867, + "ce_orig": 0.4939264953136444, + "epoch": 1.0385361995830038, + "kl_loss": 0.20213980972766876, + "loss_ib": 0.005662865005433559, + "step": 3612 + }, + { + "ce_ib": 2.0439205169677734, + "ce_orig": 0.5417172908782959, + "epoch": 1.0388237831619815, + "kl_loss": 0.21288509666919708, + "loss_ib": 0.004172771703451872, + "step": 3613 + }, + { + "ce_ib": 2.0960512161254883, + "ce_orig": 0.4091334640979767, + "epoch": 1.0388237831619815, + "kl_loss": 0.556781530380249, + "loss_ib": 0.00766386603936553, + "step": 3613 + }, + { + "ce_ib": 3.1814894676208496, + "ce_orig": 0.7695678472518921, + "epoch": 1.0388237831619815, + "kl_loss": 0.21987232565879822, + "loss_ib": 0.00538021232932806, + "step": 3613 + }, + { + "ce_ib": 2.1183717250823975, + "ce_orig": 0.43028947710990906, + "epoch": 1.0388237831619815, + "kl_loss": 0.1757846474647522, + "loss_ib": 0.003876218106597662, + "step": 3613 + }, + { + "ce_ib": 5.472299098968506, + "ce_orig": 1.4629154205322266, + "epoch": 1.039111366740959, + "kl_loss": 0.17165538668632507, + "loss_ib": 0.007188852876424789, + "step": 3614 + }, + { + "ce_ib": 2.3202004432678223, + "ce_orig": 0.6800296306610107, + "epoch": 1.039111366740959, + "kl_loss": 0.18153683841228485, + "loss_ib": 0.004135568626224995, + "step": 3614 + }, + { + "ce_ib": 5.109469413757324, + "ce_orig": 1.388855218887329, + "epoch": 1.039111366740959, + "kl_loss": 0.21794676780700684, + "loss_ib": 0.0072889369912445545, + "step": 3614 + }, + { + "ce_ib": 3.6422691345214844, + "ce_orig": 0.8242025971412659, + "epoch": 1.039111366740959, + "kl_loss": 0.14878730475902557, + "loss_ib": 0.005130141973495483, + "step": 3614 + }, + { + "epoch": 1.0393989503199368, + "grad_norm": 0.15638858079910278, + "learning_rate": 7.604547640065184e-06, + "loss": 0.8258, + "step": 3615 + }, + { + "ce_ib": 2.882620334625244, + "ce_orig": 0.7578316330909729, + "epoch": 1.0393989503199368, + "kl_loss": 0.17515845596790314, + "loss_ib": 0.004634204786270857, + "step": 3615 + }, + { + "ce_ib": 2.1130142211914062, + "ce_orig": 0.7095894813537598, + "epoch": 1.0393989503199368, + "kl_loss": 0.1190444603562355, + "loss_ib": 0.0033034589141607285, + "step": 3615 + }, + { + "ce_ib": 2.9355223178863525, + "ce_orig": 0.7038721442222595, + "epoch": 1.0393989503199368, + "kl_loss": 0.17161783576011658, + "loss_ib": 0.004651700612157583, + "step": 3615 + }, + { + "ce_ib": 3.8388209342956543, + "ce_orig": 0.913504421710968, + "epoch": 1.0393989503199368, + "kl_loss": 0.2154204547405243, + "loss_ib": 0.005993025377392769, + "step": 3615 + }, + { + "ce_ib": 4.703923225402832, + "ce_orig": 1.4594624042510986, + "epoch": 1.0396865338989143, + "kl_loss": 0.16126348078250885, + "loss_ib": 0.006316557992249727, + "step": 3616 + }, + { + "ce_ib": 4.017836093902588, + "ce_orig": 0.5394452810287476, + "epoch": 1.0396865338989143, + "kl_loss": 0.552027702331543, + "loss_ib": 0.009538112208247185, + "step": 3616 + }, + { + "ce_ib": 4.790243148803711, + "ce_orig": 0.6956639289855957, + "epoch": 1.0396865338989143, + "kl_loss": 0.17188173532485962, + "loss_ib": 0.006509060505777597, + "step": 3616 + }, + { + "ce_ib": 3.5600390434265137, + "ce_orig": 0.754811704158783, + "epoch": 1.0396865338989143, + "kl_loss": 0.16752338409423828, + "loss_ib": 0.0052352724596858025, + "step": 3616 + }, + { + "ce_ib": 3.080634117126465, + "ce_orig": 0.7435992360115051, + "epoch": 1.039974117477892, + "kl_loss": 0.20298761129379272, + "loss_ib": 0.005110510624945164, + "step": 3617 + }, + { + "ce_ib": 2.4158360958099365, + "ce_orig": 0.5419292449951172, + "epoch": 1.039974117477892, + "kl_loss": 0.23701860010623932, + "loss_ib": 0.0047860220074653625, + "step": 3617 + }, + { + "ce_ib": 7.775689125061035, + "ce_orig": 1.9299973249435425, + "epoch": 1.039974117477892, + "kl_loss": 0.24520811438560486, + "loss_ib": 0.010227770544588566, + "step": 3617 + }, + { + "ce_ib": 4.337613105773926, + "ce_orig": 0.852482259273529, + "epoch": 1.039974117477892, + "kl_loss": 0.16415086388587952, + "loss_ib": 0.005979121662676334, + "step": 3617 + }, + { + "ce_ib": 3.5451977252960205, + "ce_orig": 0.5601804256439209, + "epoch": 1.0402617010568695, + "kl_loss": 0.2633844017982483, + "loss_ib": 0.0061790416948497295, + "step": 3618 + }, + { + "ce_ib": 4.009729385375977, + "ce_orig": 0.6489201784133911, + "epoch": 1.0402617010568695, + "kl_loss": 0.23559534549713135, + "loss_ib": 0.006365682929754257, + "step": 3618 + }, + { + "ce_ib": 2.7005553245544434, + "ce_orig": 0.5096434950828552, + "epoch": 1.0402617010568695, + "kl_loss": 0.13151949644088745, + "loss_ib": 0.004015750251710415, + "step": 3618 + }, + { + "ce_ib": 2.5835700035095215, + "ce_orig": 0.827130138874054, + "epoch": 1.0402617010568695, + "kl_loss": 0.17797628045082092, + "loss_ib": 0.004363332875072956, + "step": 3618 + }, + { + "ce_ib": 3.245603084564209, + "ce_orig": 0.566447377204895, + "epoch": 1.0405492846358473, + "kl_loss": 0.21388252079486847, + "loss_ib": 0.005384428426623344, + "step": 3619 + }, + { + "ce_ib": 1.5501230955123901, + "ce_orig": 0.3459555208683014, + "epoch": 1.0405492846358473, + "kl_loss": 0.28266990184783936, + "loss_ib": 0.004376822151243687, + "step": 3619 + }, + { + "ce_ib": 3.1213860511779785, + "ce_orig": 0.7358925342559814, + "epoch": 1.0405492846358473, + "kl_loss": 0.12128187716007233, + "loss_ib": 0.004334204830229282, + "step": 3619 + }, + { + "ce_ib": 3.9275474548339844, + "ce_orig": 0.8516512513160706, + "epoch": 1.0405492846358473, + "kl_loss": 0.20438756048679352, + "loss_ib": 0.005971423350274563, + "step": 3619 + }, + { + "epoch": 1.040836868214825, + "grad_norm": 0.12959076464176178, + "learning_rate": 7.597919750177168e-06, + "loss": 0.8004, + "step": 3620 + }, + { + "ce_ib": 2.462653160095215, + "ce_orig": 0.5381348133087158, + "epoch": 1.040836868214825, + "kl_loss": 0.15974313020706177, + "loss_ib": 0.004060084465891123, + "step": 3620 + }, + { + "ce_ib": 2.5418903827667236, + "ce_orig": 0.7745182514190674, + "epoch": 1.040836868214825, + "kl_loss": 0.12679605185985565, + "loss_ib": 0.003809850662946701, + "step": 3620 + }, + { + "ce_ib": 5.938531398773193, + "ce_orig": 1.4969598054885864, + "epoch": 1.040836868214825, + "kl_loss": 0.19495350122451782, + "loss_ib": 0.007888066582381725, + "step": 3620 + }, + { + "ce_ib": 7.084684371948242, + "ce_orig": 2.0437211990356445, + "epoch": 1.040836868214825, + "kl_loss": 0.1831594556570053, + "loss_ib": 0.008916279301047325, + "step": 3620 + }, + { + "ce_ib": 5.959942817687988, + "ce_orig": 1.628035306930542, + "epoch": 1.0411244517938025, + "kl_loss": 0.3451482057571411, + "loss_ib": 0.009411424398422241, + "step": 3621 + }, + { + "ce_ib": 3.497147560119629, + "ce_orig": 0.7378325462341309, + "epoch": 1.0411244517938025, + "kl_loss": 0.18200626969337463, + "loss_ib": 0.005317209754139185, + "step": 3621 + }, + { + "ce_ib": 4.301889419555664, + "ce_orig": 0.95506352186203, + "epoch": 1.0411244517938025, + "kl_loss": 0.15268218517303467, + "loss_ib": 0.005828711204230785, + "step": 3621 + }, + { + "ce_ib": 3.030855417251587, + "ce_orig": 0.6844418048858643, + "epoch": 1.0411244517938025, + "kl_loss": 0.20166534185409546, + "loss_ib": 0.005047508515417576, + "step": 3621 + }, + { + "ce_ib": 4.5757365226745605, + "ce_orig": 1.2202825546264648, + "epoch": 1.0414120353727803, + "kl_loss": 0.1162467896938324, + "loss_ib": 0.005738204810768366, + "step": 3622 + }, + { + "ce_ib": 4.120796203613281, + "ce_orig": 1.083792805671692, + "epoch": 1.0414120353727803, + "kl_loss": 0.17977003753185272, + "loss_ib": 0.0059184967540204525, + "step": 3622 + }, + { + "ce_ib": 3.5085480213165283, + "ce_orig": 0.9704054594039917, + "epoch": 1.0414120353727803, + "kl_loss": 0.14843909442424774, + "loss_ib": 0.004992939066141844, + "step": 3622 + }, + { + "ce_ib": 2.6661605834960938, + "ce_orig": 0.7857835292816162, + "epoch": 1.0414120353727803, + "kl_loss": 0.11238855868577957, + "loss_ib": 0.003790046088397503, + "step": 3622 + }, + { + "ce_ib": 2.5775246620178223, + "ce_orig": 0.7487296462059021, + "epoch": 1.0416996189517578, + "kl_loss": 0.1084538996219635, + "loss_ib": 0.003662063740193844, + "step": 3623 + }, + { + "ce_ib": 2.3492140769958496, + "ce_orig": 0.4996260404586792, + "epoch": 1.0416996189517578, + "kl_loss": 0.29593074321746826, + "loss_ib": 0.005308521445840597, + "step": 3623 + }, + { + "ce_ib": 4.1429829597473145, + "ce_orig": 0.7898831367492676, + "epoch": 1.0416996189517578, + "kl_loss": 0.15875858068466187, + "loss_ib": 0.005730568431317806, + "step": 3623 + }, + { + "ce_ib": 4.604132652282715, + "ce_orig": 1.010408878326416, + "epoch": 1.0416996189517578, + "kl_loss": 0.13157567381858826, + "loss_ib": 0.005919889081269503, + "step": 3623 + }, + { + "ce_ib": 2.630185842514038, + "ce_orig": 0.573985755443573, + "epoch": 1.0419872025307355, + "kl_loss": 0.14164462685585022, + "loss_ib": 0.0040466319769620895, + "step": 3624 + }, + { + "ce_ib": 6.282369136810303, + "ce_orig": 1.4882782697677612, + "epoch": 1.0419872025307355, + "kl_loss": 0.16870152950286865, + "loss_ib": 0.007969385012984276, + "step": 3624 + }, + { + "ce_ib": 2.669609308242798, + "ce_orig": 0.7225914001464844, + "epoch": 1.0419872025307355, + "kl_loss": 0.281491219997406, + "loss_ib": 0.005484521389007568, + "step": 3624 + }, + { + "ce_ib": 2.424116849899292, + "ce_orig": 0.30327674746513367, + "epoch": 1.0419872025307355, + "kl_loss": 0.16122932732105255, + "loss_ib": 0.004036410246044397, + "step": 3624 + }, + { + "epoch": 1.0422747861097132, + "grad_norm": 0.20815162360668182, + "learning_rate": 7.591285601297484e-06, + "loss": 0.9243, + "step": 3625 + }, + { + "ce_ib": 2.712085485458374, + "ce_orig": 0.7460544109344482, + "epoch": 1.0422747861097132, + "kl_loss": 0.14227691292762756, + "loss_ib": 0.004134854767471552, + "step": 3625 + }, + { + "ce_ib": 2.7163689136505127, + "ce_orig": 0.34830254316329956, + "epoch": 1.0422747861097132, + "kl_loss": 0.19020061194896698, + "loss_ib": 0.004618375096470118, + "step": 3625 + }, + { + "ce_ib": 2.989821672439575, + "ce_orig": 0.803505539894104, + "epoch": 1.0422747861097132, + "kl_loss": 0.14246612787246704, + "loss_ib": 0.004414482973515987, + "step": 3625 + }, + { + "ce_ib": 3.5200440883636475, + "ce_orig": 0.891615092754364, + "epoch": 1.0422747861097132, + "kl_loss": 0.21167761087417603, + "loss_ib": 0.00563682010397315, + "step": 3625 + }, + { + "ce_ib": 1.7079401016235352, + "ce_orig": 0.35267722606658936, + "epoch": 1.0425623696886908, + "kl_loss": 0.2527768015861511, + "loss_ib": 0.004235708154737949, + "step": 3626 + }, + { + "ce_ib": 3.886392831802368, + "ce_orig": 0.5840628147125244, + "epoch": 1.0425623696886908, + "kl_loss": 0.14927546679973602, + "loss_ib": 0.005379147361963987, + "step": 3626 + }, + { + "ce_ib": 2.746835708618164, + "ce_orig": 0.9108607172966003, + "epoch": 1.0425623696886908, + "kl_loss": 0.15717166662216187, + "loss_ib": 0.00431855209171772, + "step": 3626 + }, + { + "ce_ib": 3.6818456649780273, + "ce_orig": 0.9757272601127625, + "epoch": 1.0425623696886908, + "kl_loss": 0.13273414969444275, + "loss_ib": 0.005009187385439873, + "step": 3626 + }, + { + "ce_ib": 4.338442325592041, + "ce_orig": 0.7041746377944946, + "epoch": 1.0428499532676685, + "kl_loss": 0.22683140635490417, + "loss_ib": 0.006606756243854761, + "step": 3627 + }, + { + "ce_ib": 6.851692199707031, + "ce_orig": 1.451658844947815, + "epoch": 1.0428499532676685, + "kl_loss": 0.17643345892429352, + "loss_ib": 0.008616026490926743, + "step": 3627 + }, + { + "ce_ib": 3.4792940616607666, + "ce_orig": 0.830245852470398, + "epoch": 1.0428499532676685, + "kl_loss": 0.23054462671279907, + "loss_ib": 0.0057847402058541775, + "step": 3627 + }, + { + "ce_ib": 3.680671453475952, + "ce_orig": 0.4621967673301697, + "epoch": 1.0428499532676685, + "kl_loss": 0.186819925904274, + "loss_ib": 0.0055488706566393375, + "step": 3627 + }, + { + "ce_ib": 5.548410892486572, + "ce_orig": 1.2636492252349854, + "epoch": 1.043137536846646, + "kl_loss": 0.2703660726547241, + "loss_ib": 0.008252071216702461, + "step": 3628 + }, + { + "ce_ib": 2.710136651992798, + "ce_orig": 0.8244372606277466, + "epoch": 1.043137536846646, + "kl_loss": 0.22755107283592224, + "loss_ib": 0.004985647276043892, + "step": 3628 + }, + { + "ce_ib": 4.535163879394531, + "ce_orig": 0.9291025996208191, + "epoch": 1.043137536846646, + "kl_loss": 0.16401462256908417, + "loss_ib": 0.006175309885293245, + "step": 3628 + }, + { + "ce_ib": 3.750535488128662, + "ce_orig": 0.9550915956497192, + "epoch": 1.043137536846646, + "kl_loss": 0.15689367055892944, + "loss_ib": 0.005319472402334213, + "step": 3628 + }, + { + "ce_ib": 3.8357839584350586, + "ce_orig": 1.1044423580169678, + "epoch": 1.0434251204256237, + "kl_loss": 0.1493375599384308, + "loss_ib": 0.005329159554094076, + "step": 3629 + }, + { + "ce_ib": 2.981382131576538, + "ce_orig": 0.8836249709129333, + "epoch": 1.0434251204256237, + "kl_loss": 0.24000728130340576, + "loss_ib": 0.0053814551793038845, + "step": 3629 + }, + { + "ce_ib": 6.234432220458984, + "ce_orig": 1.5813705921173096, + "epoch": 1.0434251204256237, + "kl_loss": 0.2593342065811157, + "loss_ib": 0.008827774785459042, + "step": 3629 + }, + { + "ce_ib": 4.437751770019531, + "ce_orig": 0.7867863774299622, + "epoch": 1.0434251204256237, + "kl_loss": 0.11472618579864502, + "loss_ib": 0.005585013423115015, + "step": 3629 + }, + { + "epoch": 1.0437127040046013, + "grad_norm": 0.1540500819683075, + "learning_rate": 7.584645209409337e-06, + "loss": 0.9046, + "step": 3630 + }, + { + "ce_ib": 5.842644691467285, + "ce_orig": 1.0410693883895874, + "epoch": 1.0437127040046013, + "kl_loss": 0.17605364322662354, + "loss_ib": 0.0076031810604035854, + "step": 3630 + }, + { + "ce_ib": 4.649238109588623, + "ce_orig": 0.8329716920852661, + "epoch": 1.0437127040046013, + "kl_loss": 0.3158702850341797, + "loss_ib": 0.007807941175997257, + "step": 3630 + }, + { + "ce_ib": 3.95737624168396, + "ce_orig": 1.2219078540802002, + "epoch": 1.0437127040046013, + "kl_loss": 0.1715141385793686, + "loss_ib": 0.005672517232596874, + "step": 3630 + }, + { + "ce_ib": 2.7763845920562744, + "ce_orig": 0.4179551601409912, + "epoch": 1.0437127040046013, + "kl_loss": 0.15858206152915955, + "loss_ib": 0.004362205043435097, + "step": 3630 + }, + { + "ce_ib": 5.342764377593994, + "ce_orig": 1.0745208263397217, + "epoch": 1.044000287583579, + "kl_loss": 0.13743595778942108, + "loss_ib": 0.00671712402254343, + "step": 3631 + }, + { + "ce_ib": 3.1137197017669678, + "ce_orig": 0.5281271934509277, + "epoch": 1.044000287583579, + "kl_loss": 0.2626608610153198, + "loss_ib": 0.005740328226238489, + "step": 3631 + }, + { + "ce_ib": 1.8216133117675781, + "ce_orig": 0.43622004985809326, + "epoch": 1.044000287583579, + "kl_loss": 0.12864601612091064, + "loss_ib": 0.003108073491603136, + "step": 3631 + }, + { + "ce_ib": 4.897391319274902, + "ce_orig": 1.2595247030258179, + "epoch": 1.044000287583579, + "kl_loss": 0.22733396291732788, + "loss_ib": 0.007170730736106634, + "step": 3631 + }, + { + "ce_ib": 1.7825614213943481, + "ce_orig": 0.4326312243938446, + "epoch": 1.0442878711625567, + "kl_loss": 0.14635181427001953, + "loss_ib": 0.0032460796646773815, + "step": 3632 + }, + { + "ce_ib": 3.703246831893921, + "ce_orig": 0.8255389332771301, + "epoch": 1.0442878711625567, + "kl_loss": 0.1498459279537201, + "loss_ib": 0.005201706197112799, + "step": 3632 + }, + { + "ce_ib": 3.897205114364624, + "ce_orig": 1.2661492824554443, + "epoch": 1.0442878711625567, + "kl_loss": 0.14135783910751343, + "loss_ib": 0.005310783162713051, + "step": 3632 + }, + { + "ce_ib": 4.69106388092041, + "ce_orig": 1.1649013757705688, + "epoch": 1.0442878711625567, + "kl_loss": 0.2018737494945526, + "loss_ib": 0.006709801498800516, + "step": 3632 + }, + { + "ce_ib": 2.2866406440734863, + "ce_orig": 0.6755345463752747, + "epoch": 1.0445754547415342, + "kl_loss": 0.0962848961353302, + "loss_ib": 0.003249489702284336, + "step": 3633 + }, + { + "ce_ib": 4.933104515075684, + "ce_orig": 1.2129642963409424, + "epoch": 1.0445754547415342, + "kl_loss": 0.17352411150932312, + "loss_ib": 0.006668346002697945, + "step": 3633 + }, + { + "ce_ib": 5.403744697570801, + "ce_orig": 1.2934716939926147, + "epoch": 1.0445754547415342, + "kl_loss": 0.16459909081459045, + "loss_ib": 0.007049735635519028, + "step": 3633 + }, + { + "ce_ib": 2.563067674636841, + "ce_orig": 0.7930237650871277, + "epoch": 1.0445754547415342, + "kl_loss": 0.19160419702529907, + "loss_ib": 0.004479109775274992, + "step": 3633 + }, + { + "ce_ib": 3.0220417976379395, + "ce_orig": 0.8682129383087158, + "epoch": 1.044863038320512, + "kl_loss": 0.18755224347114563, + "loss_ib": 0.004897564183920622, + "step": 3634 + }, + { + "ce_ib": 4.433304309844971, + "ce_orig": 0.9875518083572388, + "epoch": 1.044863038320512, + "kl_loss": 0.18602894246578217, + "loss_ib": 0.006293593440204859, + "step": 3634 + }, + { + "ce_ib": 3.5442140102386475, + "ce_orig": 1.0161057710647583, + "epoch": 1.044863038320512, + "kl_loss": 0.1601516306400299, + "loss_ib": 0.005145730450749397, + "step": 3634 + }, + { + "ce_ib": 6.27975606918335, + "ce_orig": 1.6442888975143433, + "epoch": 1.044863038320512, + "kl_loss": 0.11484621465206146, + "loss_ib": 0.007428218610584736, + "step": 3634 + }, + { + "epoch": 1.0451506218994895, + "grad_norm": 0.16535800695419312, + "learning_rate": 7.577998590510971e-06, + "loss": 0.9333, + "step": 3635 + }, + { + "ce_ib": 4.190597057342529, + "ce_orig": 1.1908214092254639, + "epoch": 1.0451506218994895, + "kl_loss": 0.1790948510169983, + "loss_ib": 0.005981545429676771, + "step": 3635 + }, + { + "ce_ib": 2.546995162963867, + "ce_orig": 0.5718316435813904, + "epoch": 1.0451506218994895, + "kl_loss": 0.22314020991325378, + "loss_ib": 0.004778397269546986, + "step": 3635 + }, + { + "ce_ib": 2.0473380088806152, + "ce_orig": 0.36546722054481506, + "epoch": 1.0451506218994895, + "kl_loss": 0.37687957286834717, + "loss_ib": 0.005816133692860603, + "step": 3635 + }, + { + "ce_ib": 4.396595001220703, + "ce_orig": 0.7302227020263672, + "epoch": 1.0451506218994895, + "kl_loss": 0.14302082359790802, + "loss_ib": 0.0058268033899366856, + "step": 3635 + }, + { + "ce_ib": 2.47580623626709, + "ce_orig": 0.471221923828125, + "epoch": 1.0454382054784672, + "kl_loss": 0.16024239361286163, + "loss_ib": 0.004078230354934931, + "step": 3636 + }, + { + "ce_ib": 4.29340934753418, + "ce_orig": 0.9533417820930481, + "epoch": 1.0454382054784672, + "kl_loss": 0.14176523685455322, + "loss_ib": 0.005711061879992485, + "step": 3636 + }, + { + "ce_ib": 5.872299671173096, + "ce_orig": 1.5054903030395508, + "epoch": 1.0454382054784672, + "kl_loss": 0.2366928905248642, + "loss_ib": 0.008239228278398514, + "step": 3636 + }, + { + "ce_ib": 2.972935676574707, + "ce_orig": 0.7803416848182678, + "epoch": 1.0454382054784672, + "kl_loss": 0.17738822102546692, + "loss_ib": 0.004746817983686924, + "step": 3636 + }, + { + "ce_ib": 3.1697707176208496, + "ce_orig": 0.7540121674537659, + "epoch": 1.0457257890574447, + "kl_loss": 0.11249092221260071, + "loss_ib": 0.004294679965823889, + "step": 3637 + }, + { + "ce_ib": 2.6926321983337402, + "ce_orig": 0.6414611339569092, + "epoch": 1.0457257890574447, + "kl_loss": 0.21527405083179474, + "loss_ib": 0.0048453728668391705, + "step": 3637 + }, + { + "ce_ib": 3.644171953201294, + "ce_orig": 0.9228086471557617, + "epoch": 1.0457257890574447, + "kl_loss": 0.20231541991233826, + "loss_ib": 0.005667326506227255, + "step": 3637 + }, + { + "ce_ib": 2.6471686363220215, + "ce_orig": 0.9196316599845886, + "epoch": 1.0457257890574447, + "kl_loss": 0.11517561227083206, + "loss_ib": 0.003798924619331956, + "step": 3637 + }, + { + "ce_ib": 4.196496486663818, + "ce_orig": 1.1326866149902344, + "epoch": 1.0460133726364225, + "kl_loss": 0.34048521518707275, + "loss_ib": 0.0076013486832380295, + "step": 3638 + }, + { + "ce_ib": 2.758058786392212, + "ce_orig": 0.7788483500480652, + "epoch": 1.0460133726364225, + "kl_loss": 0.10606838017702103, + "loss_ib": 0.0038187424652278423, + "step": 3638 + }, + { + "ce_ib": 3.269585371017456, + "ce_orig": 0.7073972821235657, + "epoch": 1.0460133726364225, + "kl_loss": 0.12271342426538467, + "loss_ib": 0.0044967192225158215, + "step": 3638 + }, + { + "ce_ib": 4.425405979156494, + "ce_orig": 1.2364073991775513, + "epoch": 1.0460133726364225, + "kl_loss": 0.23407959938049316, + "loss_ib": 0.0067662023939192295, + "step": 3638 + }, + { + "ce_ib": 4.200232982635498, + "ce_orig": 1.009204626083374, + "epoch": 1.0463009562154002, + "kl_loss": 0.24579167366027832, + "loss_ib": 0.006658149883151054, + "step": 3639 + }, + { + "ce_ib": 3.5464251041412354, + "ce_orig": 0.7897982597351074, + "epoch": 1.0463009562154002, + "kl_loss": 0.16875281929969788, + "loss_ib": 0.005233952775597572, + "step": 3639 + }, + { + "ce_ib": 2.884044647216797, + "ce_orig": 0.6853817105293274, + "epoch": 1.0463009562154002, + "kl_loss": 0.1290467530488968, + "loss_ib": 0.004174511879682541, + "step": 3639 + }, + { + "ce_ib": 3.4582173824310303, + "ce_orig": 0.7362773418426514, + "epoch": 1.0463009562154002, + "kl_loss": 0.2450040578842163, + "loss_ib": 0.00590825779363513, + "step": 3639 + }, + { + "epoch": 1.0465885397943777, + "grad_norm": 0.13650308549404144, + "learning_rate": 7.5713457606156335e-06, + "loss": 0.8126, + "step": 3640 + }, + { + "ce_ib": 3.395496129989624, + "ce_orig": 0.8318498134613037, + "epoch": 1.0465885397943777, + "kl_loss": 0.25143519043922424, + "loss_ib": 0.005909848026931286, + "step": 3640 + }, + { + "ce_ib": 1.6843596696853638, + "ce_orig": 0.47878947854042053, + "epoch": 1.0465885397943777, + "kl_loss": 0.13939352333545685, + "loss_ib": 0.0030782949179410934, + "step": 3640 + }, + { + "ce_ib": 5.477750301361084, + "ce_orig": 1.2539632320404053, + "epoch": 1.0465885397943777, + "kl_loss": 0.1675402820110321, + "loss_ib": 0.007153153419494629, + "step": 3640 + }, + { + "ce_ib": 4.401991367340088, + "ce_orig": 1.213215708732605, + "epoch": 1.0465885397943777, + "kl_loss": 0.17598670721054077, + "loss_ib": 0.006161858327686787, + "step": 3640 + }, + { + "ce_ib": 3.5679073333740234, + "ce_orig": 0.7359730005264282, + "epoch": 1.0468761233733554, + "kl_loss": 0.2841013967990875, + "loss_ib": 0.00640892144292593, + "step": 3641 + }, + { + "ce_ib": 4.632238864898682, + "ce_orig": 0.9937613606452942, + "epoch": 1.0468761233733554, + "kl_loss": 0.28972840309143066, + "loss_ib": 0.0075295232236385345, + "step": 3641 + }, + { + "ce_ib": 4.336673736572266, + "ce_orig": 0.6222395300865173, + "epoch": 1.0468761233733554, + "kl_loss": 0.21443256735801697, + "loss_ib": 0.006480999290943146, + "step": 3641 + }, + { + "ce_ib": 2.6383516788482666, + "ce_orig": 0.685802698135376, + "epoch": 1.0468761233733554, + "kl_loss": 0.1560121774673462, + "loss_ib": 0.004198473412543535, + "step": 3641 + }, + { + "ce_ib": 4.034322738647461, + "ce_orig": 0.9574639201164246, + "epoch": 1.047163706952333, + "kl_loss": 0.13734471797943115, + "loss_ib": 0.005407769698649645, + "step": 3642 + }, + { + "ce_ib": 4.317474842071533, + "ce_orig": 0.9096835851669312, + "epoch": 1.047163706952333, + "kl_loss": 0.11041382700204849, + "loss_ib": 0.005421613343060017, + "step": 3642 + }, + { + "ce_ib": 4.122335910797119, + "ce_orig": 0.8928641080856323, + "epoch": 1.047163706952333, + "kl_loss": 0.12910380959510803, + "loss_ib": 0.005413373466581106, + "step": 3642 + }, + { + "ce_ib": 4.476260185241699, + "ce_orig": 1.2396049499511719, + "epoch": 1.047163706952333, + "kl_loss": 0.13592801988124847, + "loss_ib": 0.005835540127009153, + "step": 3642 + }, + { + "ce_ib": 6.44442081451416, + "ce_orig": 1.4508390426635742, + "epoch": 1.0474512905313107, + "kl_loss": 0.38386571407318115, + "loss_ib": 0.01028307806700468, + "step": 3643 + }, + { + "ce_ib": 3.567962646484375, + "ce_orig": 0.5832754373550415, + "epoch": 1.0474512905313107, + "kl_loss": 0.20369604229927063, + "loss_ib": 0.005604922771453857, + "step": 3643 + }, + { + "ce_ib": 2.969413995742798, + "ce_orig": 0.6055132746696472, + "epoch": 1.0474512905313107, + "kl_loss": 0.23006682097911835, + "loss_ib": 0.005270082037895918, + "step": 3643 + }, + { + "ce_ib": 4.322815418243408, + "ce_orig": 0.5854563117027283, + "epoch": 1.0474512905313107, + "kl_loss": 0.2731701135635376, + "loss_ib": 0.007054516579955816, + "step": 3643 + }, + { + "ce_ib": 4.51125955581665, + "ce_orig": 1.3126628398895264, + "epoch": 1.0477388741102882, + "kl_loss": 0.14708144962787628, + "loss_ib": 0.0059820739552378654, + "step": 3644 + }, + { + "ce_ib": 2.513582706451416, + "ce_orig": 0.7776172161102295, + "epoch": 1.0477388741102882, + "kl_loss": 0.188429594039917, + "loss_ib": 0.004397878423333168, + "step": 3644 + }, + { + "ce_ib": 4.512980937957764, + "ce_orig": 1.0213351249694824, + "epoch": 1.0477388741102882, + "kl_loss": 0.1746775507926941, + "loss_ib": 0.006259756162762642, + "step": 3644 + }, + { + "ce_ib": 8.474693298339844, + "ce_orig": 1.662079095840454, + "epoch": 1.0477388741102882, + "kl_loss": 0.1068362444639206, + "loss_ib": 0.009543055668473244, + "step": 3644 + }, + { + "epoch": 1.048026457689266, + "grad_norm": 0.16855879127979279, + "learning_rate": 7.564686735751539e-06, + "loss": 0.8918, + "step": 3645 + }, + { + "ce_ib": 3.191296100616455, + "ce_orig": 0.8048452734947205, + "epoch": 1.048026457689266, + "kl_loss": 0.11767368018627167, + "loss_ib": 0.004368032794445753, + "step": 3645 + }, + { + "ce_ib": 3.38645076751709, + "ce_orig": 0.7553361058235168, + "epoch": 1.048026457689266, + "kl_loss": 0.1676540970802307, + "loss_ib": 0.005062991287559271, + "step": 3645 + }, + { + "ce_ib": 4.094128608703613, + "ce_orig": 0.8033127784729004, + "epoch": 1.048026457689266, + "kl_loss": 0.21471288800239563, + "loss_ib": 0.006241256836801767, + "step": 3645 + }, + { + "ce_ib": 5.130524635314941, + "ce_orig": 1.3894354104995728, + "epoch": 1.048026457689266, + "kl_loss": 0.20855078101158142, + "loss_ib": 0.0072160325944423676, + "step": 3645 + }, + { + "ce_ib": 6.059122085571289, + "ce_orig": 1.410886287689209, + "epoch": 1.0483140412682437, + "kl_loss": 0.2145063877105713, + "loss_ib": 0.008204185403883457, + "step": 3646 + }, + { + "ce_ib": 4.417462348937988, + "ce_orig": 0.8268661499023438, + "epoch": 1.0483140412682437, + "kl_loss": 0.2161180078983307, + "loss_ib": 0.006578641943633556, + "step": 3646 + }, + { + "ce_ib": 2.4948556423187256, + "ce_orig": 0.7888292670249939, + "epoch": 1.0483140412682437, + "kl_loss": 0.15164753794670105, + "loss_ib": 0.0040113311260938644, + "step": 3646 + }, + { + "ce_ib": 1.9931387901306152, + "ce_orig": 0.39639052748680115, + "epoch": 1.0483140412682437, + "kl_loss": 0.15995241701602936, + "loss_ib": 0.0035926627460867167, + "step": 3646 + }, + { + "ce_ib": 6.209994792938232, + "ce_orig": 1.580641508102417, + "epoch": 1.0486016248472212, + "kl_loss": 0.16830812394618988, + "loss_ib": 0.007893076166510582, + "step": 3647 + }, + { + "ce_ib": 4.859615325927734, + "ce_orig": 0.8296990990638733, + "epoch": 1.0486016248472212, + "kl_loss": 0.21996746957302094, + "loss_ib": 0.007059289608150721, + "step": 3647 + }, + { + "ce_ib": 2.12197208404541, + "ce_orig": 0.49452146887779236, + "epoch": 1.0486016248472212, + "kl_loss": 0.1204199343919754, + "loss_ib": 0.0033261715434491634, + "step": 3647 + }, + { + "ce_ib": 2.2985355854034424, + "ce_orig": 0.42606785893440247, + "epoch": 1.0486016248472212, + "kl_loss": 0.2014763057231903, + "loss_ib": 0.004313298501074314, + "step": 3647 + }, + { + "ce_ib": 4.264749050140381, + "ce_orig": 0.7416318655014038, + "epoch": 1.048889208426199, + "kl_loss": 0.24010959267616272, + "loss_ib": 0.006665844935923815, + "step": 3648 + }, + { + "ce_ib": 5.1598358154296875, + "ce_orig": 0.9242261052131653, + "epoch": 1.048889208426199, + "kl_loss": 0.17351293563842773, + "loss_ib": 0.006894965190440416, + "step": 3648 + }, + { + "ce_ib": 2.108557939529419, + "ce_orig": 0.4998347759246826, + "epoch": 1.048889208426199, + "kl_loss": 0.17239435017108917, + "loss_ib": 0.003832501359283924, + "step": 3648 + }, + { + "ce_ib": 5.263166904449463, + "ce_orig": 1.1565532684326172, + "epoch": 1.048889208426199, + "kl_loss": 0.09865748882293701, + "loss_ib": 0.006249741651117802, + "step": 3648 + }, + { + "ce_ib": 3.475234270095825, + "ce_orig": 0.7483117580413818, + "epoch": 1.0491767920051764, + "kl_loss": 0.24043124914169312, + "loss_ib": 0.005879546049982309, + "step": 3649 + }, + { + "ce_ib": 3.2479934692382812, + "ce_orig": 0.8109352588653564, + "epoch": 1.0491767920051764, + "kl_loss": 0.19714629650115967, + "loss_ib": 0.005219456274062395, + "step": 3649 + }, + { + "ce_ib": 3.9976344108581543, + "ce_orig": 0.4744523763656616, + "epoch": 1.0491767920051764, + "kl_loss": 0.2043483853340149, + "loss_ib": 0.0060411179438233376, + "step": 3649 + }, + { + "ce_ib": 2.3655288219451904, + "ce_orig": 0.5918286442756653, + "epoch": 1.0491767920051764, + "kl_loss": 0.15580394864082336, + "loss_ib": 0.003923567943274975, + "step": 3649 + }, + { + "epoch": 1.0494643755841542, + "grad_norm": 0.1358061581850052, + "learning_rate": 7.558021531961819e-06, + "loss": 0.8196, + "step": 3650 + }, + { + "ce_ib": 3.783759832382202, + "ce_orig": 0.5764878392219543, + "epoch": 1.0494643755841542, + "kl_loss": 0.20173436403274536, + "loss_ib": 0.005801103077828884, + "step": 3650 + }, + { + "ce_ib": 2.444382905960083, + "ce_orig": 0.7179930806159973, + "epoch": 1.0494643755841542, + "kl_loss": 0.13023144006729126, + "loss_ib": 0.003746697213500738, + "step": 3650 + }, + { + "ce_ib": 2.8490359783172607, + "ce_orig": 0.8728180527687073, + "epoch": 1.0494643755841542, + "kl_loss": 0.12996962666511536, + "loss_ib": 0.004148732405155897, + "step": 3650 + }, + { + "ce_ib": 2.715395212173462, + "ce_orig": 0.48538732528686523, + "epoch": 1.0494643755841542, + "kl_loss": 0.11621193587779999, + "loss_ib": 0.0038775145076215267, + "step": 3650 + }, + { + "ce_ib": 4.728611946105957, + "ce_orig": 1.2011038064956665, + "epoch": 1.0497519591631317, + "kl_loss": 0.23808269202709198, + "loss_ib": 0.00710943853482604, + "step": 3651 + }, + { + "ce_ib": 2.0460453033447266, + "ce_orig": 0.4280034601688385, + "epoch": 1.0497519591631317, + "kl_loss": 0.12261034548282623, + "loss_ib": 0.0032721487805247307, + "step": 3651 + }, + { + "ce_ib": 5.145914554595947, + "ce_orig": 1.498686671257019, + "epoch": 1.0497519591631317, + "kl_loss": 0.132607102394104, + "loss_ib": 0.00647198548540473, + "step": 3651 + }, + { + "ce_ib": 5.764682292938232, + "ce_orig": 1.4786797761917114, + "epoch": 1.0497519591631317, + "kl_loss": 0.18697041273117065, + "loss_ib": 0.007634386420249939, + "step": 3651 + }, + { + "ce_ib": 3.109361410140991, + "ce_orig": 1.0409600734710693, + "epoch": 1.0500395427421094, + "kl_loss": 0.16750073432922363, + "loss_ib": 0.00478436890989542, + "step": 3652 + }, + { + "ce_ib": 3.389986038208008, + "ce_orig": 0.4245002865791321, + "epoch": 1.0500395427421094, + "kl_loss": 0.4731106460094452, + "loss_ib": 0.00812109187245369, + "step": 3652 + }, + { + "ce_ib": 5.391926288604736, + "ce_orig": 0.879568338394165, + "epoch": 1.0500395427421094, + "kl_loss": 0.25637850165367126, + "loss_ib": 0.007955710403621197, + "step": 3652 + }, + { + "ce_ib": 2.7497990131378174, + "ce_orig": 0.6002593636512756, + "epoch": 1.0500395427421094, + "kl_loss": 0.15893355011940002, + "loss_ib": 0.004339134786278009, + "step": 3652 + }, + { + "ce_ib": 3.2649662494659424, + "ce_orig": 0.733517587184906, + "epoch": 1.0503271263210872, + "kl_loss": 0.1772102266550064, + "loss_ib": 0.005037068389356136, + "step": 3653 + }, + { + "ce_ib": 4.486011028289795, + "ce_orig": 1.124714970588684, + "epoch": 1.0503271263210872, + "kl_loss": 0.19106027483940125, + "loss_ib": 0.006396613549441099, + "step": 3653 + }, + { + "ce_ib": 4.221242904663086, + "ce_orig": 0.6171600222587585, + "epoch": 1.0503271263210872, + "kl_loss": 0.19761806726455688, + "loss_ib": 0.0061974236741662025, + "step": 3653 + }, + { + "ce_ib": 2.530144691467285, + "ce_orig": 0.6343508362770081, + "epoch": 1.0503271263210872, + "kl_loss": 0.21784080564975739, + "loss_ib": 0.004708552733063698, + "step": 3653 + }, + { + "ce_ib": 5.1651811599731445, + "ce_orig": 1.0724341869354248, + "epoch": 1.0506147099000647, + "kl_loss": 0.18962860107421875, + "loss_ib": 0.007061467040330172, + "step": 3654 + }, + { + "ce_ib": 4.315845012664795, + "ce_orig": 0.6727851629257202, + "epoch": 1.0506147099000647, + "kl_loss": 0.2645914554595947, + "loss_ib": 0.006961759179830551, + "step": 3654 + }, + { + "ce_ib": 2.888484477996826, + "ce_orig": 0.5693802833557129, + "epoch": 1.0506147099000647, + "kl_loss": 0.16229456663131714, + "loss_ib": 0.0045114303939044476, + "step": 3654 + }, + { + "ce_ib": 4.226630687713623, + "ce_orig": 1.1126383543014526, + "epoch": 1.0506147099000647, + "kl_loss": 0.1363505870103836, + "loss_ib": 0.005590136628597975, + "step": 3654 + }, + { + "epoch": 1.0509022934790424, + "grad_norm": 0.14915041625499725, + "learning_rate": 7.5513501653045e-06, + "loss": 0.8748, + "step": 3655 + }, + { + "ce_ib": 4.484577178955078, + "ce_orig": 0.958263635635376, + "epoch": 1.0509022934790424, + "kl_loss": 0.205714151263237, + "loss_ib": 0.006541718728840351, + "step": 3655 + }, + { + "ce_ib": 3.9671008586883545, + "ce_orig": 0.7834894061088562, + "epoch": 1.0509022934790424, + "kl_loss": 0.19156014919281006, + "loss_ib": 0.005882702302187681, + "step": 3655 + }, + { + "ce_ib": 3.947950601577759, + "ce_orig": 1.2176624536514282, + "epoch": 1.0509022934790424, + "kl_loss": 0.20073866844177246, + "loss_ib": 0.005955337081104517, + "step": 3655 + }, + { + "ce_ib": 4.346681118011475, + "ce_orig": 0.5782889723777771, + "epoch": 1.0509022934790424, + "kl_loss": 0.17666780948638916, + "loss_ib": 0.006113359238952398, + "step": 3655 + }, + { + "ce_ib": 2.3411405086517334, + "ce_orig": 0.579766571521759, + "epoch": 1.05118987705802, + "kl_loss": 0.10794223845005035, + "loss_ib": 0.0034205627162009478, + "step": 3656 + }, + { + "ce_ib": 3.254361867904663, + "ce_orig": 1.0977522134780884, + "epoch": 1.05118987705802, + "kl_loss": 0.13378283381462097, + "loss_ib": 0.004592190496623516, + "step": 3656 + }, + { + "ce_ib": 3.9162514209747314, + "ce_orig": 1.0232083797454834, + "epoch": 1.05118987705802, + "kl_loss": 0.18811175227165222, + "loss_ib": 0.005797368939965963, + "step": 3656 + }, + { + "ce_ib": 4.654444694519043, + "ce_orig": 0.8104403018951416, + "epoch": 1.05118987705802, + "kl_loss": 0.4213024377822876, + "loss_ib": 0.008867468684911728, + "step": 3656 + }, + { + "ce_ib": 3.021167755126953, + "ce_orig": 0.49939489364624023, + "epoch": 1.0514774606369977, + "kl_loss": 0.17054840922355652, + "loss_ib": 0.004726651590317488, + "step": 3657 + }, + { + "ce_ib": 3.787818193435669, + "ce_orig": 0.815047562122345, + "epoch": 1.0514774606369977, + "kl_loss": 0.16290947794914246, + "loss_ib": 0.005416912958025932, + "step": 3657 + }, + { + "ce_ib": 4.47672176361084, + "ce_orig": 0.7869374752044678, + "epoch": 1.0514774606369977, + "kl_loss": 0.20635706186294556, + "loss_ib": 0.0065402924083173275, + "step": 3657 + }, + { + "ce_ib": 3.3893775939941406, + "ce_orig": 0.6872161626815796, + "epoch": 1.0514774606369977, + "kl_loss": 0.28410905599594116, + "loss_ib": 0.006230467930436134, + "step": 3657 + }, + { + "ce_ib": 4.018857479095459, + "ce_orig": 1.1540534496307373, + "epoch": 1.0517650442159752, + "kl_loss": 0.1502683013677597, + "loss_ib": 0.005521540530025959, + "step": 3658 + }, + { + "ce_ib": 4.033104419708252, + "ce_orig": 0.9666170477867126, + "epoch": 1.0517650442159752, + "kl_loss": 0.2910327613353729, + "loss_ib": 0.0069434321485459805, + "step": 3658 + }, + { + "ce_ib": 2.895505666732788, + "ce_orig": 0.8245436549186707, + "epoch": 1.0517650442159752, + "kl_loss": 0.11025664210319519, + "loss_ib": 0.003998071886599064, + "step": 3658 + }, + { + "ce_ib": 5.690065860748291, + "ce_orig": 1.2002830505371094, + "epoch": 1.0517650442159752, + "kl_loss": 0.14604711532592773, + "loss_ib": 0.007150537334382534, + "step": 3658 + }, + { + "ce_ib": 2.928450584411621, + "ce_orig": 0.6104503273963928, + "epoch": 1.052052627794953, + "kl_loss": 0.22272107005119324, + "loss_ib": 0.005155661143362522, + "step": 3659 + }, + { + "ce_ib": 2.0911433696746826, + "ce_orig": 0.6088409423828125, + "epoch": 1.052052627794953, + "kl_loss": 0.18123486638069153, + "loss_ib": 0.00390349212102592, + "step": 3659 + }, + { + "ce_ib": 2.9036343097686768, + "ce_orig": 0.382577508687973, + "epoch": 1.052052627794953, + "kl_loss": 0.18446815013885498, + "loss_ib": 0.004748315550386906, + "step": 3659 + }, + { + "ce_ib": 2.976306200027466, + "ce_orig": 0.6005876064300537, + "epoch": 1.052052627794953, + "kl_loss": 0.20300476253032684, + "loss_ib": 0.005006353836506605, + "step": 3659 + }, + { + "epoch": 1.0523402113739306, + "grad_norm": 0.1288328766822815, + "learning_rate": 7.5446726518524505e-06, + "loss": 0.8342, + "step": 3660 + }, + { + "ce_ib": 3.4285662174224854, + "ce_orig": 0.3802841007709503, + "epoch": 1.0523402113739306, + "kl_loss": 0.13791224360466003, + "loss_ib": 0.004807688295841217, + "step": 3660 + }, + { + "ce_ib": 3.5525882244110107, + "ce_orig": 0.5247630476951599, + "epoch": 1.0523402113739306, + "kl_loss": 0.20709294080734253, + "loss_ib": 0.00562351755797863, + "step": 3660 + }, + { + "ce_ib": 3.489760637283325, + "ce_orig": 1.052417516708374, + "epoch": 1.0523402113739306, + "kl_loss": 0.20943352580070496, + "loss_ib": 0.0055840956047177315, + "step": 3660 + }, + { + "ce_ib": 2.951498508453369, + "ce_orig": 0.6806835532188416, + "epoch": 1.0523402113739306, + "kl_loss": 0.1659276783466339, + "loss_ib": 0.004610775504261255, + "step": 3660 + }, + { + "ce_ib": 2.990537166595459, + "ce_orig": 0.7935705184936523, + "epoch": 1.0526277949529081, + "kl_loss": 0.09791181236505508, + "loss_ib": 0.003969655372202396, + "step": 3661 + }, + { + "ce_ib": 5.979730129241943, + "ce_orig": 1.0827693939208984, + "epoch": 1.0526277949529081, + "kl_loss": 0.1798524409532547, + "loss_ib": 0.0077782548032701015, + "step": 3661 + }, + { + "ce_ib": 2.3259470462799072, + "ce_orig": 0.5647223591804504, + "epoch": 1.0526277949529081, + "kl_loss": 0.16256371140480042, + "loss_ib": 0.003951583988964558, + "step": 3661 + }, + { + "ce_ib": 4.572921276092529, + "ce_orig": 1.0801221132278442, + "epoch": 1.0526277949529081, + "kl_loss": 0.107068732380867, + "loss_ib": 0.0056436085142195225, + "step": 3661 + }, + { + "ce_ib": 2.4234421253204346, + "ce_orig": 0.7685949802398682, + "epoch": 1.0529153785318859, + "kl_loss": 0.14905259013175964, + "loss_ib": 0.003913968335837126, + "step": 3662 + }, + { + "ce_ib": 2.973115921020508, + "ce_orig": 0.7654421925544739, + "epoch": 1.0529153785318859, + "kl_loss": 0.12558986246585846, + "loss_ib": 0.004229014273732901, + "step": 3662 + }, + { + "ce_ib": 4.088954448699951, + "ce_orig": 0.8118377327919006, + "epoch": 1.0529153785318859, + "kl_loss": 0.12484700232744217, + "loss_ib": 0.005337424576282501, + "step": 3662 + }, + { + "ce_ib": 4.094456672668457, + "ce_orig": 1.0316412448883057, + "epoch": 1.0529153785318859, + "kl_loss": 0.20120923221111298, + "loss_ib": 0.006106548942625523, + "step": 3662 + }, + { + "ce_ib": 6.3299360275268555, + "ce_orig": 1.9272968769073486, + "epoch": 1.0532029621108634, + "kl_loss": 0.16518951952457428, + "loss_ib": 0.007981831207871437, + "step": 3663 + }, + { + "ce_ib": 4.595278739929199, + "ce_orig": 1.2720568180084229, + "epoch": 1.0532029621108634, + "kl_loss": 0.18439072370529175, + "loss_ib": 0.006439186166971922, + "step": 3663 + }, + { + "ce_ib": 3.2887582778930664, + "ce_orig": 0.7657110691070557, + "epoch": 1.0532029621108634, + "kl_loss": 0.16959357261657715, + "loss_ib": 0.004984694067388773, + "step": 3663 + }, + { + "ce_ib": 2.4246368408203125, + "ce_orig": 0.6449676156044006, + "epoch": 1.0532029621108634, + "kl_loss": 0.15297164022922516, + "loss_ib": 0.003954353276640177, + "step": 3663 + }, + { + "ce_ib": 6.614846229553223, + "ce_orig": 1.4874205589294434, + "epoch": 1.0534905456898411, + "kl_loss": 0.19017261266708374, + "loss_ib": 0.008516572415828705, + "step": 3664 + }, + { + "ce_ib": 3.0780539512634277, + "ce_orig": 0.4719565808773041, + "epoch": 1.0534905456898411, + "kl_loss": 0.23646312952041626, + "loss_ib": 0.005442684981971979, + "step": 3664 + }, + { + "ce_ib": 3.6830480098724365, + "ce_orig": 0.7670210003852844, + "epoch": 1.0534905456898411, + "kl_loss": 0.22790761291980743, + "loss_ib": 0.005962124560028315, + "step": 3664 + }, + { + "ce_ib": 3.1986196041107178, + "ce_orig": 1.0742480754852295, + "epoch": 1.0534905456898411, + "kl_loss": 0.08895249664783478, + "loss_ib": 0.004088144749403, + "step": 3664 + }, + { + "epoch": 1.0537781292688186, + "grad_norm": 0.1561271846294403, + "learning_rate": 7.537989007693353e-06, + "loss": 0.8539, + "step": 3665 + }, + { + "ce_ib": 5.2615227699279785, + "ce_orig": 1.214021921157837, + "epoch": 1.0537781292688186, + "kl_loss": 0.15386277437210083, + "loss_ib": 0.006800150498747826, + "step": 3665 + }, + { + "ce_ib": 1.749990701675415, + "ce_orig": 0.4375652074813843, + "epoch": 1.0537781292688186, + "kl_loss": 0.3477346897125244, + "loss_ib": 0.005227337591350079, + "step": 3665 + }, + { + "ce_ib": 2.8560149669647217, + "ce_orig": 0.6565316319465637, + "epoch": 1.0537781292688186, + "kl_loss": 0.13631781935691833, + "loss_ib": 0.004219193011522293, + "step": 3665 + }, + { + "ce_ib": 3.503366231918335, + "ce_orig": 0.8287521004676819, + "epoch": 1.0537781292688186, + "kl_loss": 0.1962416172027588, + "loss_ib": 0.005465782713145018, + "step": 3665 + }, + { + "ce_ib": 2.5796122550964355, + "ce_orig": 0.2764204442501068, + "epoch": 1.0540657128477964, + "kl_loss": 0.4810856282711029, + "loss_ib": 0.007390468381345272, + "step": 3666 + }, + { + "ce_ib": 5.191190242767334, + "ce_orig": 0.6752364635467529, + "epoch": 1.0540657128477964, + "kl_loss": 0.20066019892692566, + "loss_ib": 0.007197792176157236, + "step": 3666 + }, + { + "ce_ib": 2.8539810180664062, + "ce_orig": 0.42300093173980713, + "epoch": 1.0540657128477964, + "kl_loss": 0.17790299654006958, + "loss_ib": 0.0046330108307302, + "step": 3666 + }, + { + "ce_ib": 4.328786849975586, + "ce_orig": 1.1823993921279907, + "epoch": 1.0540657128477964, + "kl_loss": 0.16345414519309998, + "loss_ib": 0.005963328294456005, + "step": 3666 + }, + { + "ce_ib": 2.932837724685669, + "ce_orig": 0.7088654041290283, + "epoch": 1.0543532964267741, + "kl_loss": 0.22255365550518036, + "loss_ib": 0.005158374086022377, + "step": 3667 + }, + { + "ce_ib": 4.670055389404297, + "ce_orig": 1.2242509126663208, + "epoch": 1.0543532964267741, + "kl_loss": 0.28906944394111633, + "loss_ib": 0.007560750003904104, + "step": 3667 + }, + { + "ce_ib": 4.134488105773926, + "ce_orig": 1.0871033668518066, + "epoch": 1.0543532964267741, + "kl_loss": 0.16704882681369781, + "loss_ib": 0.005804975982755423, + "step": 3667 + }, + { + "ce_ib": 3.8865177631378174, + "ce_orig": 0.7936505675315857, + "epoch": 1.0543532964267741, + "kl_loss": 0.09320206195116043, + "loss_ib": 0.004818538203835487, + "step": 3667 + }, + { + "ce_ib": 2.5999863147735596, + "ce_orig": 0.5385754108428955, + "epoch": 1.0546408800057516, + "kl_loss": 0.17018002271652222, + "loss_ib": 0.004301786422729492, + "step": 3668 + }, + { + "ce_ib": 3.570164442062378, + "ce_orig": 0.8947532773017883, + "epoch": 1.0546408800057516, + "kl_loss": 0.2909697890281677, + "loss_ib": 0.00647986214607954, + "step": 3668 + }, + { + "ce_ib": 2.218505859375, + "ce_orig": 0.6904731392860413, + "epoch": 1.0546408800057516, + "kl_loss": 0.14625775814056396, + "loss_ib": 0.003681083209812641, + "step": 3668 + }, + { + "ce_ib": 2.5462045669555664, + "ce_orig": 0.6572183966636658, + "epoch": 1.0546408800057516, + "kl_loss": 0.16380852460861206, + "loss_ib": 0.004184289835393429, + "step": 3668 + }, + { + "ce_ib": 3.0181641578674316, + "ce_orig": 0.6620156764984131, + "epoch": 1.0549284635847294, + "kl_loss": 0.1616884171962738, + "loss_ib": 0.0046350485645234585, + "step": 3669 + }, + { + "ce_ib": 2.1865363121032715, + "ce_orig": 0.2011137753725052, + "epoch": 1.0549284635847294, + "kl_loss": 0.47028031945228577, + "loss_ib": 0.0068893395364284515, + "step": 3669 + }, + { + "ce_ib": 4.263367652893066, + "ce_orig": 1.1443966627120972, + "epoch": 1.0549284635847294, + "kl_loss": 0.15904130041599274, + "loss_ib": 0.005853780545294285, + "step": 3669 + }, + { + "ce_ib": 5.571567535400391, + "ce_orig": 1.1185753345489502, + "epoch": 1.0549284635847294, + "kl_loss": 0.21121034026145935, + "loss_ib": 0.007683670148253441, + "step": 3669 + }, + { + "epoch": 1.0552160471637069, + "grad_norm": 0.1627843677997589, + "learning_rate": 7.531299248929655e-06, + "loss": 0.911, + "step": 3670 + }, + { + "ce_ib": 3.884075164794922, + "ce_orig": 0.9354414343833923, + "epoch": 1.0552160471637069, + "kl_loss": 0.14610765874385834, + "loss_ib": 0.005345151759684086, + "step": 3670 + }, + { + "ce_ib": 3.8165600299835205, + "ce_orig": 0.8054796457290649, + "epoch": 1.0552160471637069, + "kl_loss": 0.1488015353679657, + "loss_ib": 0.005304575432091951, + "step": 3670 + }, + { + "ce_ib": 3.3471834659576416, + "ce_orig": 1.1032485961914062, + "epoch": 1.0552160471637069, + "kl_loss": 0.12236291915178299, + "loss_ib": 0.004570812452584505, + "step": 3670 + }, + { + "ce_ib": 4.874920845031738, + "ce_orig": 1.2780697345733643, + "epoch": 1.0552160471637069, + "kl_loss": 0.14564074575901031, + "loss_ib": 0.006331327836960554, + "step": 3670 + }, + { + "ce_ib": 1.9827913045883179, + "ce_orig": 0.4447243809700012, + "epoch": 1.0555036307426846, + "kl_loss": 0.14796938002109528, + "loss_ib": 0.0034624850377440453, + "step": 3671 + }, + { + "ce_ib": 5.853141784667969, + "ce_orig": 1.1368886232376099, + "epoch": 1.0555036307426846, + "kl_loss": 0.20474576950073242, + "loss_ib": 0.007900599390268326, + "step": 3671 + }, + { + "ce_ib": 4.735269069671631, + "ce_orig": 1.2431176900863647, + "epoch": 1.0555036307426846, + "kl_loss": 0.19459307193756104, + "loss_ib": 0.0066812001168727875, + "step": 3671 + }, + { + "ce_ib": 2.2825753688812256, + "ce_orig": 0.6823381185531616, + "epoch": 1.0555036307426846, + "kl_loss": 0.16657045483589172, + "loss_ib": 0.003948279656469822, + "step": 3671 + }, + { + "ce_ib": 3.7679684162139893, + "ce_orig": 0.6665271520614624, + "epoch": 1.0557912143216623, + "kl_loss": 0.19089853763580322, + "loss_ib": 0.005676953587681055, + "step": 3672 + }, + { + "ce_ib": 2.8504655361175537, + "ce_orig": 0.7240284085273743, + "epoch": 1.0557912143216623, + "kl_loss": 0.13494345545768738, + "loss_ib": 0.004199899733066559, + "step": 3672 + }, + { + "ce_ib": 5.115381717681885, + "ce_orig": 0.8858199715614319, + "epoch": 1.0557912143216623, + "kl_loss": 0.08228924125432968, + "loss_ib": 0.005938274320214987, + "step": 3672 + }, + { + "ce_ib": 2.9686315059661865, + "ce_orig": 0.8169926404953003, + "epoch": 1.0557912143216623, + "kl_loss": 0.17934741079807281, + "loss_ib": 0.004762105643749237, + "step": 3672 + }, + { + "ce_ib": 3.2347776889801025, + "ce_orig": 0.7942968606948853, + "epoch": 1.0560787979006399, + "kl_loss": 0.1140647679567337, + "loss_ib": 0.004375425633043051, + "step": 3673 + }, + { + "ce_ib": 2.1555659770965576, + "ce_orig": 0.3498593270778656, + "epoch": 1.0560787979006399, + "kl_loss": 0.4521310329437256, + "loss_ib": 0.006676876451820135, + "step": 3673 + }, + { + "ce_ib": 3.929121255874634, + "ce_orig": 0.8810097575187683, + "epoch": 1.0560787979006399, + "kl_loss": 0.17935678362846375, + "loss_ib": 0.005722688976675272, + "step": 3673 + }, + { + "ce_ib": 2.615010976791382, + "ce_orig": 0.6391500234603882, + "epoch": 1.0560787979006399, + "kl_loss": 0.16520866751670837, + "loss_ib": 0.0042670974507927895, + "step": 3673 + }, + { + "ce_ib": 4.71409797668457, + "ce_orig": 0.7449265122413635, + "epoch": 1.0563663814796176, + "kl_loss": 0.19181156158447266, + "loss_ib": 0.006632213480770588, + "step": 3674 + }, + { + "ce_ib": 2.2376630306243896, + "ce_orig": 0.777832567691803, + "epoch": 1.0563663814796176, + "kl_loss": 0.1563452184200287, + "loss_ib": 0.0038011150900274515, + "step": 3674 + }, + { + "ce_ib": 3.1496129035949707, + "ce_orig": 0.7763241529464722, + "epoch": 1.0563663814796176, + "kl_loss": 0.18061748147010803, + "loss_ib": 0.004955787677317858, + "step": 3674 + }, + { + "ce_ib": 2.9897749423980713, + "ce_orig": 0.7279019951820374, + "epoch": 1.0563663814796176, + "kl_loss": 0.13367974758148193, + "loss_ib": 0.004326572176069021, + "step": 3674 + }, + { + "epoch": 1.056653965058595, + "grad_norm": 0.15534932911396027, + "learning_rate": 7.524603391678541e-06, + "loss": 0.801, + "step": 3675 + }, + { + "ce_ib": 4.997219562530518, + "ce_orig": 1.117730975151062, + "epoch": 1.056653965058595, + "kl_loss": 0.26043668389320374, + "loss_ib": 0.007601586636155844, + "step": 3675 + }, + { + "ce_ib": 5.517655372619629, + "ce_orig": 1.5212445259094238, + "epoch": 1.056653965058595, + "kl_loss": 0.1910933256149292, + "loss_ib": 0.0074285888113081455, + "step": 3675 + }, + { + "ce_ib": 3.0307672023773193, + "ce_orig": 0.4883987009525299, + "epoch": 1.056653965058595, + "kl_loss": 0.5533731579780579, + "loss_ib": 0.008564498275518417, + "step": 3675 + }, + { + "ce_ib": 3.4226467609405518, + "ce_orig": 0.8376780152320862, + "epoch": 1.056653965058595, + "kl_loss": 0.3502628207206726, + "loss_ib": 0.006925275083631277, + "step": 3675 + }, + { + "ce_ib": 3.9327573776245117, + "ce_orig": 0.919771671295166, + "epoch": 1.0569415486375728, + "kl_loss": 0.18669675290584564, + "loss_ib": 0.005799724720418453, + "step": 3676 + }, + { + "ce_ib": 2.534510612487793, + "ce_orig": 0.38436028361320496, + "epoch": 1.0569415486375728, + "kl_loss": 0.18043819069862366, + "loss_ib": 0.004338892642408609, + "step": 3676 + }, + { + "ce_ib": 4.382792949676514, + "ce_orig": 0.963530421257019, + "epoch": 1.0569415486375728, + "kl_loss": 0.20600003004074097, + "loss_ib": 0.006442793644964695, + "step": 3676 + }, + { + "ce_ib": 5.772188663482666, + "ce_orig": 1.4440151453018188, + "epoch": 1.0569415486375728, + "kl_loss": 0.19773858785629272, + "loss_ib": 0.007749574724584818, + "step": 3676 + }, + { + "ce_ib": 4.7434401512146, + "ce_orig": 1.2355220317840576, + "epoch": 1.0572291322165503, + "kl_loss": 0.2748543322086334, + "loss_ib": 0.0074919830076396465, + "step": 3677 + }, + { + "ce_ib": 3.9197847843170166, + "ce_orig": 0.9352055788040161, + "epoch": 1.0572291322165503, + "kl_loss": 0.13340473175048828, + "loss_ib": 0.005253832321614027, + "step": 3677 + }, + { + "ce_ib": 1.9924434423446655, + "ce_orig": 0.569777250289917, + "epoch": 1.0572291322165503, + "kl_loss": 0.17184753715991974, + "loss_ib": 0.003710918826982379, + "step": 3677 + }, + { + "ce_ib": 4.2286882400512695, + "ce_orig": 0.9819920063018799, + "epoch": 1.0572291322165503, + "kl_loss": 0.20941062271595, + "loss_ib": 0.00632279459387064, + "step": 3677 + }, + { + "ce_ib": 4.698777675628662, + "ce_orig": 1.120662808418274, + "epoch": 1.057516715795528, + "kl_loss": 0.17015163600444794, + "loss_ib": 0.0064002941362559795, + "step": 3678 + }, + { + "ce_ib": 2.3446707725524902, + "ce_orig": 0.603595495223999, + "epoch": 1.057516715795528, + "kl_loss": 0.22233846783638, + "loss_ib": 0.004568055272102356, + "step": 3678 + }, + { + "ce_ib": 2.9904773235321045, + "ce_orig": 0.7604645490646362, + "epoch": 1.057516715795528, + "kl_loss": 0.1464906483888626, + "loss_ib": 0.004455383867025375, + "step": 3678 + }, + { + "ce_ib": 0.5192580223083496, + "ce_orig": 0.1266927570104599, + "epoch": 1.057516715795528, + "kl_loss": 0.33603590726852417, + "loss_ib": 0.003879616968333721, + "step": 3678 + }, + { + "ce_ib": 2.8981847763061523, + "ce_orig": 0.6768044233322144, + "epoch": 1.0578042993745058, + "kl_loss": 0.15956054627895355, + "loss_ib": 0.004493790213018656, + "step": 3679 + }, + { + "ce_ib": 4.8017497062683105, + "ce_orig": 1.4463919401168823, + "epoch": 1.0578042993745058, + "kl_loss": 0.16468201577663422, + "loss_ib": 0.00644856970757246, + "step": 3679 + }, + { + "ce_ib": 3.5584006309509277, + "ce_orig": 0.8178350329399109, + "epoch": 1.0578042993745058, + "kl_loss": 0.1949108988046646, + "loss_ib": 0.0055075096897780895, + "step": 3679 + }, + { + "ce_ib": 2.8990044593811035, + "ce_orig": 0.949175238609314, + "epoch": 1.0578042993745058, + "kl_loss": 0.14644522964954376, + "loss_ib": 0.00436345674097538, + "step": 3679 + }, + { + "epoch": 1.0580918829534833, + "grad_norm": 0.14049459993839264, + "learning_rate": 7.51790145207188e-06, + "loss": 0.8911, + "step": 3680 + }, + { + "ce_ib": 5.5241618156433105, + "ce_orig": 1.4779372215270996, + "epoch": 1.0580918829534833, + "kl_loss": 0.15050484240055084, + "loss_ib": 0.007029210217297077, + "step": 3680 + }, + { + "ce_ib": 5.1323370933532715, + "ce_orig": 1.1219252347946167, + "epoch": 1.0580918829534833, + "kl_loss": 0.15398098528385162, + "loss_ib": 0.00667214673012495, + "step": 3680 + }, + { + "ce_ib": 1.7217622995376587, + "ce_orig": 0.5149421095848083, + "epoch": 1.0580918829534833, + "kl_loss": 0.12367436289787292, + "loss_ib": 0.0029585056472569704, + "step": 3680 + }, + { + "ce_ib": 2.402034282684326, + "ce_orig": 0.6857285499572754, + "epoch": 1.0580918829534833, + "kl_loss": 0.13229554891586304, + "loss_ib": 0.003724989714100957, + "step": 3680 + }, + { + "ce_ib": 2.665780782699585, + "ce_orig": 0.6923161149024963, + "epoch": 1.058379466532461, + "kl_loss": 0.1015329509973526, + "loss_ib": 0.0036811099853366613, + "step": 3681 + }, + { + "ce_ib": 6.039740562438965, + "ce_orig": 1.8097176551818848, + "epoch": 1.058379466532461, + "kl_loss": 0.14761628210544586, + "loss_ib": 0.0075159030966460705, + "step": 3681 + }, + { + "ce_ib": 3.338006019592285, + "ce_orig": 0.5554002523422241, + "epoch": 1.058379466532461, + "kl_loss": 0.24084898829460144, + "loss_ib": 0.005746495444327593, + "step": 3681 + }, + { + "ce_ib": 4.07752799987793, + "ce_orig": 1.0298540592193604, + "epoch": 1.058379466532461, + "kl_loss": 0.1619451344013214, + "loss_ib": 0.005696979817003012, + "step": 3681 + }, + { + "ce_ib": 5.185752868652344, + "ce_orig": 1.176296591758728, + "epoch": 1.0586670501114386, + "kl_loss": 0.17130064964294434, + "loss_ib": 0.0068987593986094, + "step": 3682 + }, + { + "ce_ib": 5.314700603485107, + "ce_orig": 1.2748737335205078, + "epoch": 1.0586670501114386, + "kl_loss": 0.15476495027542114, + "loss_ib": 0.006862349808216095, + "step": 3682 + }, + { + "ce_ib": 4.912985324859619, + "ce_orig": 1.253124713897705, + "epoch": 1.0586670501114386, + "kl_loss": 0.18519341945648193, + "loss_ib": 0.006764919497072697, + "step": 3682 + }, + { + "ce_ib": 1.0998543500900269, + "ce_orig": 0.1931544840335846, + "epoch": 1.0586670501114386, + "kl_loss": 0.21073433756828308, + "loss_ib": 0.0032071976456791162, + "step": 3682 + }, + { + "ce_ib": 2.576472043991089, + "ce_orig": 0.7714435458183289, + "epoch": 1.0589546336904163, + "kl_loss": 0.16101746261119843, + "loss_ib": 0.004186647012829781, + "step": 3683 + }, + { + "ce_ib": 3.407372236251831, + "ce_orig": 0.9666805863380432, + "epoch": 1.0589546336904163, + "kl_loss": 0.17560362815856934, + "loss_ib": 0.005163408350199461, + "step": 3683 + }, + { + "ce_ib": 3.309410810470581, + "ce_orig": 0.7040050625801086, + "epoch": 1.0589546336904163, + "kl_loss": 0.11053380370140076, + "loss_ib": 0.00441474886611104, + "step": 3683 + }, + { + "ce_ib": 2.3899827003479004, + "ce_orig": 0.6193454265594482, + "epoch": 1.0589546336904163, + "kl_loss": 0.12261029332876205, + "loss_ib": 0.003616085508838296, + "step": 3683 + }, + { + "ce_ib": 7.958670616149902, + "ce_orig": 1.5983353853225708, + "epoch": 1.0592422172693938, + "kl_loss": 0.24636141955852509, + "loss_ib": 0.010422284714877605, + "step": 3684 + }, + { + "ce_ib": 4.2686662673950195, + "ce_orig": 1.0899204015731812, + "epoch": 1.0592422172693938, + "kl_loss": 0.1531868427991867, + "loss_ib": 0.005800534505397081, + "step": 3684 + }, + { + "ce_ib": 3.7840042114257812, + "ce_orig": 0.8435982465744019, + "epoch": 1.0592422172693938, + "kl_loss": 0.19485867023468018, + "loss_ib": 0.005732590798288584, + "step": 3684 + }, + { + "ce_ib": 3.3070735931396484, + "ce_orig": 0.7049264907836914, + "epoch": 1.0592422172693938, + "kl_loss": 0.15955977141857147, + "loss_ib": 0.004902671556919813, + "step": 3684 + }, + { + "epoch": 1.0595298008483716, + "grad_norm": 0.1478528082370758, + "learning_rate": 7.5111934462562075e-06, + "loss": 0.8919, + "step": 3685 + }, + { + "ce_ib": 2.7455737590789795, + "ce_orig": 0.5573349595069885, + "epoch": 1.0595298008483716, + "kl_loss": 0.1610005497932434, + "loss_ib": 0.004355579148977995, + "step": 3685 + }, + { + "ce_ib": 4.542871952056885, + "ce_orig": 0.8679085373878479, + "epoch": 1.0595298008483716, + "kl_loss": 0.14371255040168762, + "loss_ib": 0.00597999757155776, + "step": 3685 + }, + { + "ce_ib": 4.348957061767578, + "ce_orig": 1.1710125207901, + "epoch": 1.0595298008483716, + "kl_loss": 0.2444102168083191, + "loss_ib": 0.006793058943003416, + "step": 3685 + }, + { + "ce_ib": 3.1233103275299072, + "ce_orig": 0.35357439517974854, + "epoch": 1.0595298008483716, + "kl_loss": 0.17423522472381592, + "loss_ib": 0.004865662660449743, + "step": 3685 + }, + { + "ce_ib": 3.035691499710083, + "ce_orig": 0.5436885356903076, + "epoch": 1.0598173844273493, + "kl_loss": 0.19155588746070862, + "loss_ib": 0.004951250273734331, + "step": 3686 + }, + { + "ce_ib": 3.74594783782959, + "ce_orig": 0.9438298940658569, + "epoch": 1.0598173844273493, + "kl_loss": 0.1710679680109024, + "loss_ib": 0.00545662734657526, + "step": 3686 + }, + { + "ce_ib": 2.8025920391082764, + "ce_orig": 0.6864954233169556, + "epoch": 1.0598173844273493, + "kl_loss": 0.1938803493976593, + "loss_ib": 0.004741395823657513, + "step": 3686 + }, + { + "ce_ib": 5.56049919128418, + "ce_orig": 0.6094831228256226, + "epoch": 1.0598173844273493, + "kl_loss": 0.22036203742027283, + "loss_ib": 0.007764120120555162, + "step": 3686 + }, + { + "ce_ib": 3.643643379211426, + "ce_orig": 0.7725002765655518, + "epoch": 1.0601049680063268, + "kl_loss": 0.16785898804664612, + "loss_ib": 0.005322233308106661, + "step": 3687 + }, + { + "ce_ib": 4.132422924041748, + "ce_orig": 1.0658875703811646, + "epoch": 1.0601049680063268, + "kl_loss": 0.14150135219097137, + "loss_ib": 0.00554743641987443, + "step": 3687 + }, + { + "ce_ib": 2.5914649963378906, + "ce_orig": 0.616837203502655, + "epoch": 1.0601049680063268, + "kl_loss": 0.14594566822052002, + "loss_ib": 0.004050921648740768, + "step": 3687 + }, + { + "ce_ib": 3.6855249404907227, + "ce_orig": 0.9215061664581299, + "epoch": 1.0601049680063268, + "kl_loss": 0.12358341366052628, + "loss_ib": 0.00492135901004076, + "step": 3687 + }, + { + "ce_ib": 1.4801949262619019, + "ce_orig": 0.5133594274520874, + "epoch": 1.0603925515853045, + "kl_loss": 0.12368921935558319, + "loss_ib": 0.0027170872781425714, + "step": 3688 + }, + { + "ce_ib": 4.830127239227295, + "ce_orig": 1.1912782192230225, + "epoch": 1.0603925515853045, + "kl_loss": 0.18866291642189026, + "loss_ib": 0.0067167566157877445, + "step": 3688 + }, + { + "ce_ib": 3.1398677825927734, + "ce_orig": 0.7157926559448242, + "epoch": 1.0603925515853045, + "kl_loss": 0.22131526470184326, + "loss_ib": 0.005353020504117012, + "step": 3688 + }, + { + "ce_ib": 3.390157699584961, + "ce_orig": 0.4519965648651123, + "epoch": 1.0603925515853045, + "kl_loss": 0.33646199107170105, + "loss_ib": 0.006754777394235134, + "step": 3688 + }, + { + "ce_ib": 3.341928243637085, + "ce_orig": 0.6031345129013062, + "epoch": 1.060680135164282, + "kl_loss": 0.17215824127197266, + "loss_ib": 0.005063510965555906, + "step": 3689 + }, + { + "ce_ib": 3.945441484451294, + "ce_orig": 0.8025451898574829, + "epoch": 1.060680135164282, + "kl_loss": 0.49673911929130554, + "loss_ib": 0.008912832476198673, + "step": 3689 + }, + { + "ce_ib": 5.066859722137451, + "ce_orig": 1.1567542552947998, + "epoch": 1.060680135164282, + "kl_loss": 0.2045937180519104, + "loss_ib": 0.007112796884030104, + "step": 3689 + }, + { + "ce_ib": 5.158127784729004, + "ce_orig": 1.1374967098236084, + "epoch": 1.060680135164282, + "kl_loss": 0.25131097435951233, + "loss_ib": 0.007671237923204899, + "step": 3689 + }, + { + "epoch": 1.0609677187432598, + "grad_norm": 0.1310495287179947, + "learning_rate": 7.504479390392661e-06, + "loss": 0.8785, + "step": 3690 + }, + { + "ce_ib": 2.9238789081573486, + "ce_orig": 0.4679180383682251, + "epoch": 1.0609677187432598, + "kl_loss": 0.19534355401992798, + "loss_ib": 0.0048773144371807575, + "step": 3690 + }, + { + "ce_ib": 2.7790427207946777, + "ce_orig": 0.7568128108978271, + "epoch": 1.0609677187432598, + "kl_loss": 0.13463428616523743, + "loss_ib": 0.004125385545194149, + "step": 3690 + }, + { + "ce_ib": 3.8817903995513916, + "ce_orig": 0.9662749767303467, + "epoch": 1.0609677187432598, + "kl_loss": 0.18284469842910767, + "loss_ib": 0.005710236728191376, + "step": 3690 + }, + { + "ce_ib": 5.2029619216918945, + "ce_orig": 1.5192396640777588, + "epoch": 1.0609677187432598, + "kl_loss": 0.1885398030281067, + "loss_ib": 0.007088359910994768, + "step": 3690 + }, + { + "ce_ib": 4.515749454498291, + "ce_orig": 0.9273286461830139, + "epoch": 1.0612553023222373, + "kl_loss": 0.18474481999874115, + "loss_ib": 0.0063631972298026085, + "step": 3691 + }, + { + "ce_ib": 4.061568737030029, + "ce_orig": 0.9410290122032166, + "epoch": 1.0612553023222373, + "kl_loss": 0.119843028485775, + "loss_ib": 0.005259999074041843, + "step": 3691 + }, + { + "ce_ib": 2.2170639038085938, + "ce_orig": 0.5914264917373657, + "epoch": 1.0612553023222373, + "kl_loss": 0.16041827201843262, + "loss_ib": 0.0038212465588003397, + "step": 3691 + }, + { + "ce_ib": 4.2386603355407715, + "ce_orig": 0.8331001996994019, + "epoch": 1.0612553023222373, + "kl_loss": 0.19673389196395874, + "loss_ib": 0.006205999292433262, + "step": 3691 + }, + { + "ce_ib": 2.9592201709747314, + "ce_orig": 0.6114610433578491, + "epoch": 1.061542885901215, + "kl_loss": 0.17193330824375153, + "loss_ib": 0.004678553435951471, + "step": 3692 + }, + { + "ce_ib": 2.4067656993865967, + "ce_orig": 0.7256864309310913, + "epoch": 1.061542885901215, + "kl_loss": 0.1571124792098999, + "loss_ib": 0.003977890592068434, + "step": 3692 + }, + { + "ce_ib": 6.2569074630737305, + "ce_orig": 1.3243553638458252, + "epoch": 1.061542885901215, + "kl_loss": 0.27144622802734375, + "loss_ib": 0.008971369825303555, + "step": 3692 + }, + { + "ce_ib": 3.5608277320861816, + "ce_orig": 0.6854957938194275, + "epoch": 1.061542885901215, + "kl_loss": 0.13703645765781403, + "loss_ib": 0.004931192379444838, + "step": 3692 + }, + { + "ce_ib": 5.106947898864746, + "ce_orig": 1.2649797201156616, + "epoch": 1.0618304694801928, + "kl_loss": 0.1915692836046219, + "loss_ib": 0.00702264066785574, + "step": 3693 + }, + { + "ce_ib": 4.390749454498291, + "ce_orig": 0.5493226647377014, + "epoch": 1.0618304694801928, + "kl_loss": 0.2111842930316925, + "loss_ib": 0.006502592470496893, + "step": 3693 + }, + { + "ce_ib": 3.1958167552948, + "ce_orig": 0.8786832094192505, + "epoch": 1.0618304694801928, + "kl_loss": 0.14139321446418762, + "loss_ib": 0.004609748721122742, + "step": 3693 + }, + { + "ce_ib": 3.6738319396972656, + "ce_orig": 0.8969832062721252, + "epoch": 1.0618304694801928, + "kl_loss": 0.19099541008472443, + "loss_ib": 0.005583786405622959, + "step": 3693 + }, + { + "ce_ib": 0.9112034440040588, + "ce_orig": 0.16721832752227783, + "epoch": 1.0621180530591703, + "kl_loss": 0.3539714813232422, + "loss_ib": 0.004450918175280094, + "step": 3694 + }, + { + "ce_ib": 4.004332542419434, + "ce_orig": 0.5200830698013306, + "epoch": 1.0621180530591703, + "kl_loss": 0.18945257365703583, + "loss_ib": 0.005898857954889536, + "step": 3694 + }, + { + "ce_ib": 6.735487461090088, + "ce_orig": 1.507177710533142, + "epoch": 1.0621180530591703, + "kl_loss": 0.13034044206142426, + "loss_ib": 0.008038892410695553, + "step": 3694 + }, + { + "ce_ib": 3.4103894233703613, + "ce_orig": 0.5060885548591614, + "epoch": 1.0621180530591703, + "kl_loss": 0.12934651970863342, + "loss_ib": 0.00470385467633605, + "step": 3694 + }, + { + "epoch": 1.062405636638148, + "grad_norm": 0.151899516582489, + "learning_rate": 7.497759300656962e-06, + "loss": 0.8836, + "step": 3695 + }, + { + "ce_ib": 3.6043341159820557, + "ce_orig": 1.0010348558425903, + "epoch": 1.062405636638148, + "kl_loss": 0.16936270892620087, + "loss_ib": 0.005297961179167032, + "step": 3695 + }, + { + "ce_ib": 1.9660004377365112, + "ce_orig": 0.48255643248558044, + "epoch": 1.062405636638148, + "kl_loss": 0.1472361981868744, + "loss_ib": 0.0034383623860776424, + "step": 3695 + }, + { + "ce_ib": 4.738006591796875, + "ce_orig": 0.9430715441703796, + "epoch": 1.062405636638148, + "kl_loss": 0.2229290008544922, + "loss_ib": 0.0069672963581979275, + "step": 3695 + }, + { + "ce_ib": 4.659473896026611, + "ce_orig": 0.6939878463745117, + "epoch": 1.062405636638148, + "kl_loss": 0.16463631391525269, + "loss_ib": 0.006305837072432041, + "step": 3695 + }, + { + "ce_ib": 5.910250186920166, + "ce_orig": 1.7555197477340698, + "epoch": 1.0626932202171255, + "kl_loss": 0.18011833727359772, + "loss_ib": 0.007711433805525303, + "step": 3696 + }, + { + "ce_ib": 4.682783603668213, + "ce_orig": 0.7696719765663147, + "epoch": 1.0626932202171255, + "kl_loss": 0.13705085217952728, + "loss_ib": 0.006053292192518711, + "step": 3696 + }, + { + "ce_ib": 1.6102081537246704, + "ce_orig": 0.4021718502044678, + "epoch": 1.0626932202171255, + "kl_loss": 0.1551816165447235, + "loss_ib": 0.0031620243098586798, + "step": 3696 + }, + { + "ce_ib": 5.645593166351318, + "ce_orig": 1.3743034601211548, + "epoch": 1.0626932202171255, + "kl_loss": 0.17244279384613037, + "loss_ib": 0.007370021194219589, + "step": 3696 + }, + { + "ce_ib": 5.177109241485596, + "ce_orig": 1.3441236019134521, + "epoch": 1.0629808037961033, + "kl_loss": 0.1226460263133049, + "loss_ib": 0.006403569597750902, + "step": 3697 + }, + { + "ce_ib": 2.4121487140655518, + "ce_orig": 0.6399890780448914, + "epoch": 1.0629808037961033, + "kl_loss": 0.1101803183555603, + "loss_ib": 0.003513951785862446, + "step": 3697 + }, + { + "ce_ib": 2.638033390045166, + "ce_orig": 0.7149957418441772, + "epoch": 1.0629808037961033, + "kl_loss": 0.1802178919315338, + "loss_ib": 0.004440212156623602, + "step": 3697 + }, + { + "ce_ib": 2.942859649658203, + "ce_orig": 0.8138946890830994, + "epoch": 1.0629808037961033, + "kl_loss": 0.17197281122207642, + "loss_ib": 0.004662587773054838, + "step": 3697 + }, + { + "ce_ib": 3.036839246749878, + "ce_orig": 0.5388631224632263, + "epoch": 1.0632683873750808, + "kl_loss": 0.2468430995941162, + "loss_ib": 0.005505270324647427, + "step": 3698 + }, + { + "ce_ib": 3.5503334999084473, + "ce_orig": 0.8098311424255371, + "epoch": 1.0632683873750808, + "kl_loss": 0.12568077445030212, + "loss_ib": 0.0048071411438286304, + "step": 3698 + }, + { + "ce_ib": 1.9698079824447632, + "ce_orig": 0.22997687757015228, + "epoch": 1.0632683873750808, + "kl_loss": 0.11466223001480103, + "loss_ib": 0.0031164302490651608, + "step": 3698 + }, + { + "ce_ib": 6.259727954864502, + "ce_orig": 1.2740585803985596, + "epoch": 1.0632683873750808, + "kl_loss": 0.18469920754432678, + "loss_ib": 0.008106720633804798, + "step": 3698 + }, + { + "ce_ib": 4.166961669921875, + "ce_orig": 0.8993425369262695, + "epoch": 1.0635559709540585, + "kl_loss": 0.1679183542728424, + "loss_ib": 0.0058461450971663, + "step": 3699 + }, + { + "ce_ib": 2.5380873680114746, + "ce_orig": 0.5296589136123657, + "epoch": 1.0635559709540585, + "kl_loss": 0.4501151442527771, + "loss_ib": 0.007039238698780537, + "step": 3699 + }, + { + "ce_ib": 3.770843982696533, + "ce_orig": 0.7893391847610474, + "epoch": 1.0635559709540585, + "kl_loss": 0.14815130829811096, + "loss_ib": 0.005252357106655836, + "step": 3699 + }, + { + "ce_ib": 3.5759799480438232, + "ce_orig": 0.9459384083747864, + "epoch": 1.0635559709540585, + "kl_loss": 0.20043456554412842, + "loss_ib": 0.005580325610935688, + "step": 3699 + }, + { + "epoch": 1.0638435545330363, + "grad_norm": 0.15500128269195557, + "learning_rate": 7.4910331932393634e-06, + "loss": 0.8393, + "step": 3700 + }, + { + "ce_ib": 4.778918743133545, + "ce_orig": 0.9994522929191589, + "epoch": 1.0638435545330363, + "kl_loss": 0.20158636569976807, + "loss_ib": 0.006794782821089029, + "step": 3700 + }, + { + "ce_ib": 2.7159817218780518, + "ce_orig": 0.6610987186431885, + "epoch": 1.0638435545330363, + "kl_loss": 0.145278200507164, + "loss_ib": 0.004168763756752014, + "step": 3700 + }, + { + "ce_ib": 3.7015039920806885, + "ce_orig": 0.8277584314346313, + "epoch": 1.0638435545330363, + "kl_loss": 0.15062600374221802, + "loss_ib": 0.005207763984799385, + "step": 3700 + }, + { + "ce_ib": 3.1250245571136475, + "ce_orig": 0.787214457988739, + "epoch": 1.0638435545330363, + "kl_loss": 0.160666823387146, + "loss_ib": 0.004731692839413881, + "step": 3700 + }, + { + "ce_ib": 4.177247524261475, + "ce_orig": 1.0638734102249146, + "epoch": 1.0641311381120138, + "kl_loss": 0.1839217245578766, + "loss_ib": 0.006016464903950691, + "step": 3701 + }, + { + "ce_ib": 3.2266571521759033, + "ce_orig": 0.7379732728004456, + "epoch": 1.0641311381120138, + "kl_loss": 0.2790715992450714, + "loss_ib": 0.006017372943460941, + "step": 3701 + }, + { + "ce_ib": 2.2124972343444824, + "ce_orig": 0.46667566895484924, + "epoch": 1.0641311381120138, + "kl_loss": 0.3498693108558655, + "loss_ib": 0.005711190402507782, + "step": 3701 + }, + { + "ce_ib": 4.066230297088623, + "ce_orig": 0.892626166343689, + "epoch": 1.0641311381120138, + "kl_loss": 0.17087280750274658, + "loss_ib": 0.005774958059191704, + "step": 3701 + }, + { + "ce_ib": 5.208425521850586, + "ce_orig": 1.1765800714492798, + "epoch": 1.0644187216909915, + "kl_loss": 0.21838794648647308, + "loss_ib": 0.007392304949462414, + "step": 3702 + }, + { + "ce_ib": 2.724466562271118, + "ce_orig": 0.587043285369873, + "epoch": 1.0644187216909915, + "kl_loss": 0.20170822739601135, + "loss_ib": 0.00474154856055975, + "step": 3702 + }, + { + "ce_ib": 4.082377910614014, + "ce_orig": 0.939187228679657, + "epoch": 1.0644187216909915, + "kl_loss": 0.14251841604709625, + "loss_ib": 0.005507561843842268, + "step": 3702 + }, + { + "ce_ib": 2.733414649963379, + "ce_orig": 0.5924063920974731, + "epoch": 1.0644187216909915, + "kl_loss": 0.16123878955841064, + "loss_ib": 0.004345802590250969, + "step": 3702 + }, + { + "ce_ib": 3.903977155685425, + "ce_orig": 0.6755804419517517, + "epoch": 1.064706305269969, + "kl_loss": 0.1411074697971344, + "loss_ib": 0.005315052345395088, + "step": 3703 + }, + { + "ce_ib": 4.221944332122803, + "ce_orig": 0.5338942408561707, + "epoch": 1.064706305269969, + "kl_loss": 0.20219075679779053, + "loss_ib": 0.006243852432817221, + "step": 3703 + }, + { + "ce_ib": 4.256496906280518, + "ce_orig": 1.17726731300354, + "epoch": 1.064706305269969, + "kl_loss": 0.1455337405204773, + "loss_ib": 0.005711834412068129, + "step": 3703 + }, + { + "ce_ib": 4.755136489868164, + "ce_orig": 1.2656282186508179, + "epoch": 1.064706305269969, + "kl_loss": 0.10679720342159271, + "loss_ib": 0.005823108367621899, + "step": 3703 + }, + { + "ce_ib": 2.375526189804077, + "ce_orig": 0.5100335478782654, + "epoch": 1.0649938888489467, + "kl_loss": 0.1659403294324875, + "loss_ib": 0.004034929443150759, + "step": 3704 + }, + { + "ce_ib": 4.0178141593933105, + "ce_orig": 1.0090502500534058, + "epoch": 1.0649938888489467, + "kl_loss": 0.19018113613128662, + "loss_ib": 0.0059196255169808865, + "step": 3704 + }, + { + "ce_ib": 2.4320619106292725, + "ce_orig": 0.42505645751953125, + "epoch": 1.0649938888489467, + "kl_loss": 0.2367468774318695, + "loss_ib": 0.004799530375748873, + "step": 3704 + }, + { + "ce_ib": 2.4145896434783936, + "ce_orig": 0.7069486379623413, + "epoch": 1.0649938888489467, + "kl_loss": 0.15019333362579346, + "loss_ib": 0.003916522953659296, + "step": 3704 + }, + { + "epoch": 1.0652814724279245, + "grad_norm": 0.12550269067287445, + "learning_rate": 7.484301084344623e-06, + "loss": 0.8908, + "step": 3705 + }, + { + "ce_ib": 5.118179798126221, + "ce_orig": 1.4451483488082886, + "epoch": 1.0652814724279245, + "kl_loss": 0.16712385416030884, + "loss_ib": 0.006789418403059244, + "step": 3705 + }, + { + "ce_ib": 3.3694489002227783, + "ce_orig": 0.8704590201377869, + "epoch": 1.0652814724279245, + "kl_loss": 0.2857484221458435, + "loss_ib": 0.006226933095604181, + "step": 3705 + }, + { + "ce_ib": 2.5016560554504395, + "ce_orig": 0.5284770131111145, + "epoch": 1.0652814724279245, + "kl_loss": 0.11245612800121307, + "loss_ib": 0.003626217134296894, + "step": 3705 + }, + { + "ce_ib": 3.0213756561279297, + "ce_orig": 0.9000400900840759, + "epoch": 1.0652814724279245, + "kl_loss": 0.16212639212608337, + "loss_ib": 0.004642639774829149, + "step": 3705 + }, + { + "ce_ib": 2.633476495742798, + "ce_orig": 0.6260303854942322, + "epoch": 1.065569056006902, + "kl_loss": 0.13930010795593262, + "loss_ib": 0.004026477690786123, + "step": 3706 + }, + { + "ce_ib": 2.209679126739502, + "ce_orig": 0.7228484153747559, + "epoch": 1.065569056006902, + "kl_loss": 0.11391685903072357, + "loss_ib": 0.0033488476183265448, + "step": 3706 + }, + { + "ce_ib": 4.2225213050842285, + "ce_orig": 0.5714927315711975, + "epoch": 1.065569056006902, + "kl_loss": 0.2825547456741333, + "loss_ib": 0.007048069033771753, + "step": 3706 + }, + { + "ce_ib": 5.630242347717285, + "ce_orig": 1.218950629234314, + "epoch": 1.065569056006902, + "kl_loss": 0.12112987786531448, + "loss_ib": 0.006841540802270174, + "step": 3706 + }, + { + "ce_ib": 3.9330332279205322, + "ce_orig": 0.761711061000824, + "epoch": 1.0658566395858797, + "kl_loss": 0.20257733762264252, + "loss_ib": 0.005958806723356247, + "step": 3707 + }, + { + "ce_ib": 4.280385494232178, + "ce_orig": 1.1924551725387573, + "epoch": 1.0658566395858797, + "kl_loss": 0.12041287124156952, + "loss_ib": 0.0054845139384269714, + "step": 3707 + }, + { + "ce_ib": 2.027050733566284, + "ce_orig": 0.5619124174118042, + "epoch": 1.0658566395858797, + "kl_loss": 0.1607547104358673, + "loss_ib": 0.0036345976404845715, + "step": 3707 + }, + { + "ce_ib": 1.865201473236084, + "ce_orig": 0.499539315700531, + "epoch": 1.0658566395858797, + "kl_loss": 0.14882689714431763, + "loss_ib": 0.0033534704707562923, + "step": 3707 + }, + { + "ce_ib": 4.540215969085693, + "ce_orig": 1.3001269102096558, + "epoch": 1.0661442231648572, + "kl_loss": 0.18104293942451477, + "loss_ib": 0.006350645329803228, + "step": 3708 + }, + { + "ce_ib": 2.058560371398926, + "ce_orig": 0.677078902721405, + "epoch": 1.0661442231648572, + "kl_loss": 0.12644845247268677, + "loss_ib": 0.003323044627904892, + "step": 3708 + }, + { + "ce_ib": 3.5192177295684814, + "ce_orig": 1.0413888692855835, + "epoch": 1.0661442231648572, + "kl_loss": 0.2175392210483551, + "loss_ib": 0.005694609601050615, + "step": 3708 + }, + { + "ce_ib": 2.2182464599609375, + "ce_orig": 0.7181891798973083, + "epoch": 1.0661442231648572, + "kl_loss": 0.11512060463428497, + "loss_ib": 0.0033694521989673376, + "step": 3708 + }, + { + "ce_ib": 3.0800232887268066, + "ce_orig": 0.9507697820663452, + "epoch": 1.066431806743835, + "kl_loss": 0.14926713705062866, + "loss_ib": 0.004572694655507803, + "step": 3709 + }, + { + "ce_ib": 3.610227108001709, + "ce_orig": 0.8478972911834717, + "epoch": 1.066431806743835, + "kl_loss": 0.20096641778945923, + "loss_ib": 0.005619890987873077, + "step": 3709 + }, + { + "ce_ib": 3.8953564167022705, + "ce_orig": 0.8608444333076477, + "epoch": 1.066431806743835, + "kl_loss": 0.21551036834716797, + "loss_ib": 0.006050460040569305, + "step": 3709 + }, + { + "ce_ib": 3.819456100463867, + "ce_orig": 0.885513424873352, + "epoch": 1.066431806743835, + "kl_loss": 0.140447199344635, + "loss_ib": 0.005223928019404411, + "step": 3709 + }, + { + "epoch": 1.0667193903228125, + "grad_norm": 0.1719667911529541, + "learning_rate": 7.47756299019195e-06, + "loss": 0.8379, + "step": 3710 + }, + { + "ce_ib": 5.260735988616943, + "ce_orig": 1.059478998184204, + "epoch": 1.0667193903228125, + "kl_loss": 0.18703633546829224, + "loss_ib": 0.007131099700927734, + "step": 3710 + }, + { + "ce_ib": 4.534790515899658, + "ce_orig": 1.0030843019485474, + "epoch": 1.0667193903228125, + "kl_loss": 0.24453049898147583, + "loss_ib": 0.006980095524340868, + "step": 3710 + }, + { + "ce_ib": 4.050265312194824, + "ce_orig": 0.8596524000167847, + "epoch": 1.0667193903228125, + "kl_loss": 0.09052155911922455, + "loss_ib": 0.004955480806529522, + "step": 3710 + }, + { + "ce_ib": 5.510591983795166, + "ce_orig": 1.1974055767059326, + "epoch": 1.0667193903228125, + "kl_loss": 0.13296450674533844, + "loss_ib": 0.006840236485004425, + "step": 3710 + }, + { + "ce_ib": 3.027688980102539, + "ce_orig": 0.6722537875175476, + "epoch": 1.0670069739017902, + "kl_loss": 0.14315898716449738, + "loss_ib": 0.004459279123693705, + "step": 3711 + }, + { + "ce_ib": 2.1680877208709717, + "ce_orig": 0.4209827780723572, + "epoch": 1.0670069739017902, + "kl_loss": 0.12374297529459, + "loss_ib": 0.003405517665669322, + "step": 3711 + }, + { + "ce_ib": 3.3959567546844482, + "ce_orig": 0.9818472862243652, + "epoch": 1.0670069739017902, + "kl_loss": 0.13225382566452026, + "loss_ib": 0.004718495067209005, + "step": 3711 + }, + { + "ce_ib": 3.2759625911712646, + "ce_orig": 0.8068625926971436, + "epoch": 1.0670069739017902, + "kl_loss": 0.20411038398742676, + "loss_ib": 0.0053170667961239815, + "step": 3711 + }, + { + "ce_ib": 3.5776801109313965, + "ce_orig": 0.9297497868537903, + "epoch": 1.0672945574807677, + "kl_loss": 0.16034412384033203, + "loss_ib": 0.00518112163990736, + "step": 3712 + }, + { + "ce_ib": 3.753314256668091, + "ce_orig": 0.9375924468040466, + "epoch": 1.0672945574807677, + "kl_loss": 0.15747252106666565, + "loss_ib": 0.005328039638698101, + "step": 3712 + }, + { + "ce_ib": 4.250487327575684, + "ce_orig": 0.9639350771903992, + "epoch": 1.0672945574807677, + "kl_loss": 0.13167747855186462, + "loss_ib": 0.005567261949181557, + "step": 3712 + }, + { + "ce_ib": 2.4100289344787598, + "ce_orig": 0.8736687898635864, + "epoch": 1.0672945574807677, + "kl_loss": 0.12946180999279022, + "loss_ib": 0.0037046470679342747, + "step": 3712 + }, + { + "ce_ib": 3.6466338634490967, + "ce_orig": 1.1696511507034302, + "epoch": 1.0675821410597455, + "kl_loss": 0.1725209653377533, + "loss_ib": 0.0053718434646725655, + "step": 3713 + }, + { + "ce_ib": 4.05808162689209, + "ce_orig": 1.0506243705749512, + "epoch": 1.0675821410597455, + "kl_loss": 0.13913223147392273, + "loss_ib": 0.0054494040086865425, + "step": 3713 + }, + { + "ce_ib": 2.9922869205474854, + "ce_orig": 0.499217689037323, + "epoch": 1.0675821410597455, + "kl_loss": 0.17930519580841064, + "loss_ib": 0.004785338882356882, + "step": 3713 + }, + { + "ce_ib": 4.861281871795654, + "ce_orig": 1.3390487432479858, + "epoch": 1.0675821410597455, + "kl_loss": 0.34796422719955444, + "loss_ib": 0.008340924046933651, + "step": 3713 + }, + { + "ce_ib": 3.1168248653411865, + "ce_orig": 0.6709375977516174, + "epoch": 1.0678697246387232, + "kl_loss": 0.22619885206222534, + "loss_ib": 0.005378813482820988, + "step": 3714 + }, + { + "ce_ib": 4.8177809715271, + "ce_orig": 1.3416420221328735, + "epoch": 1.0678697246387232, + "kl_loss": 0.16334125399589539, + "loss_ib": 0.006451193708926439, + "step": 3714 + }, + { + "ce_ib": 4.565719127655029, + "ce_orig": 1.2114781141281128, + "epoch": 1.0678697246387232, + "kl_loss": 0.1873321235179901, + "loss_ib": 0.006439039949327707, + "step": 3714 + }, + { + "ce_ib": 6.669334888458252, + "ce_orig": 1.3278783559799194, + "epoch": 1.0678697246387232, + "kl_loss": 0.17720802128314972, + "loss_ib": 0.008441414684057236, + "step": 3714 + }, + { + "epoch": 1.0681573082177007, + "grad_norm": 0.14624179899692535, + "learning_rate": 7.47081892701498e-06, + "loss": 0.8473, + "step": 3715 + }, + { + "ce_ib": 3.18794322013855, + "ce_orig": 0.562911868095398, + "epoch": 1.0681573082177007, + "kl_loss": 0.14810492098331451, + "loss_ib": 0.004668992478400469, + "step": 3715 + }, + { + "ce_ib": 2.0367186069488525, + "ce_orig": 0.5378496050834656, + "epoch": 1.0681573082177007, + "kl_loss": 0.1552637368440628, + "loss_ib": 0.0035893558524549007, + "step": 3715 + }, + { + "ce_ib": 4.656725883483887, + "ce_orig": 0.9322917461395264, + "epoch": 1.0681573082177007, + "kl_loss": 0.17503516376018524, + "loss_ib": 0.006407076958566904, + "step": 3715 + }, + { + "ce_ib": 1.7854758501052856, + "ce_orig": 0.5900777578353882, + "epoch": 1.0681573082177007, + "kl_loss": 0.09219709783792496, + "loss_ib": 0.0027074466925114393, + "step": 3715 + }, + { + "ce_ib": 2.86862850189209, + "ce_orig": 0.6192647218704224, + "epoch": 1.0684448917966785, + "kl_loss": 0.132762610912323, + "loss_ib": 0.004196254536509514, + "step": 3716 + }, + { + "ce_ib": 4.545513153076172, + "ce_orig": 0.6427558660507202, + "epoch": 1.0684448917966785, + "kl_loss": 0.192307710647583, + "loss_ib": 0.006468590348958969, + "step": 3716 + }, + { + "ce_ib": 3.1794912815093994, + "ce_orig": 0.9390966892242432, + "epoch": 1.0684448917966785, + "kl_loss": 0.13539031147956848, + "loss_ib": 0.004533394705504179, + "step": 3716 + }, + { + "ce_ib": 2.3294785022735596, + "ce_orig": 0.6123664975166321, + "epoch": 1.0684448917966785, + "kl_loss": 0.17618127167224884, + "loss_ib": 0.004091291222721338, + "step": 3716 + }, + { + "ce_ib": 4.2207932472229, + "ce_orig": 0.70974200963974, + "epoch": 1.068732475375656, + "kl_loss": 0.2509409487247467, + "loss_ib": 0.006730202119797468, + "step": 3717 + }, + { + "ce_ib": 3.0159413814544678, + "ce_orig": 0.5711081027984619, + "epoch": 1.068732475375656, + "kl_loss": 0.17550361156463623, + "loss_ib": 0.004770977422595024, + "step": 3717 + }, + { + "ce_ib": 3.547727584838867, + "ce_orig": 0.6088070273399353, + "epoch": 1.068732475375656, + "kl_loss": 0.18078657984733582, + "loss_ib": 0.005355593282729387, + "step": 3717 + }, + { + "ce_ib": 8.114408493041992, + "ce_orig": 1.474561333656311, + "epoch": 1.068732475375656, + "kl_loss": 0.180198073387146, + "loss_ib": 0.009916389361023903, + "step": 3717 + }, + { + "ce_ib": 5.973731994628906, + "ce_orig": 1.5031574964523315, + "epoch": 1.0690200589546337, + "kl_loss": 0.16852083802223206, + "loss_ib": 0.007658940274268389, + "step": 3718 + }, + { + "ce_ib": 7.2756147384643555, + "ce_orig": 1.1818933486938477, + "epoch": 1.0690200589546337, + "kl_loss": 0.1544627845287323, + "loss_ib": 0.008820242248475552, + "step": 3718 + }, + { + "ce_ib": 4.467122554779053, + "ce_orig": 1.3135366439819336, + "epoch": 1.0690200589546337, + "kl_loss": 0.16123604774475098, + "loss_ib": 0.006079482845962048, + "step": 3718 + }, + { + "ce_ib": 2.4259557723999023, + "ce_orig": 0.7069649696350098, + "epoch": 1.0690200589546337, + "kl_loss": 0.15103277564048767, + "loss_ib": 0.0039362832903862, + "step": 3718 + }, + { + "ce_ib": 4.015134811401367, + "ce_orig": 1.0382119417190552, + "epoch": 1.0693076425336114, + "kl_loss": 0.13619187474250793, + "loss_ib": 0.0053770532831549644, + "step": 3719 + }, + { + "ce_ib": 2.2528138160705566, + "ce_orig": 0.6508844494819641, + "epoch": 1.0693076425336114, + "kl_loss": 0.17617098987102509, + "loss_ib": 0.004014523699879646, + "step": 3719 + }, + { + "ce_ib": 3.664442300796509, + "ce_orig": 0.9277706742286682, + "epoch": 1.0693076425336114, + "kl_loss": 0.20026648044586182, + "loss_ib": 0.005667107179760933, + "step": 3719 + }, + { + "ce_ib": 3.30765700340271, + "ce_orig": 0.7921573519706726, + "epoch": 1.0693076425336114, + "kl_loss": 0.1617443859577179, + "loss_ib": 0.004925100598484278, + "step": 3719 + }, + { + "epoch": 1.069595226112589, + "grad_norm": 0.14912568032741547, + "learning_rate": 7.464068911061726e-06, + "loss": 0.8459, + "step": 3720 + }, + { + "ce_ib": 3.719276189804077, + "ce_orig": 0.6527918577194214, + "epoch": 1.069595226112589, + "kl_loss": 0.17203204333782196, + "loss_ib": 0.005439596716314554, + "step": 3720 + }, + { + "ce_ib": 4.758241176605225, + "ce_orig": 1.410259485244751, + "epoch": 1.069595226112589, + "kl_loss": 0.11691072583198547, + "loss_ib": 0.005927348043769598, + "step": 3720 + }, + { + "ce_ib": 4.067825794219971, + "ce_orig": 1.2815332412719727, + "epoch": 1.069595226112589, + "kl_loss": 0.2957998514175415, + "loss_ib": 0.00702582485973835, + "step": 3720 + }, + { + "ce_ib": 3.164851188659668, + "ce_orig": 0.693679928779602, + "epoch": 1.069595226112589, + "kl_loss": 0.1295740306377411, + "loss_ib": 0.004460591357201338, + "step": 3720 + }, + { + "ce_ib": 5.243149280548096, + "ce_orig": 0.9881795048713684, + "epoch": 1.0698828096915667, + "kl_loss": 0.17245779931545258, + "loss_ib": 0.0069677275605499744, + "step": 3721 + }, + { + "ce_ib": 4.861390113830566, + "ce_orig": 1.3494722843170166, + "epoch": 1.0698828096915667, + "kl_loss": 0.15352170169353485, + "loss_ib": 0.006396607030183077, + "step": 3721 + }, + { + "ce_ib": 3.318009853363037, + "ce_orig": 0.7845771312713623, + "epoch": 1.0698828096915667, + "kl_loss": 0.1498042792081833, + "loss_ib": 0.004816052969545126, + "step": 3721 + }, + { + "ce_ib": 3.4691805839538574, + "ce_orig": 0.6194568276405334, + "epoch": 1.0698828096915667, + "kl_loss": 0.156476229429245, + "loss_ib": 0.005033942870795727, + "step": 3721 + }, + { + "ce_ib": 4.7561163902282715, + "ce_orig": 1.3490153551101685, + "epoch": 1.0701703932705442, + "kl_loss": 0.11363141983747482, + "loss_ib": 0.005892430432140827, + "step": 3722 + }, + { + "ce_ib": 3.7415127754211426, + "ce_orig": 1.1166605949401855, + "epoch": 1.0701703932705442, + "kl_loss": 0.16055342555046082, + "loss_ib": 0.005347046535462141, + "step": 3722 + }, + { + "ce_ib": 4.307348251342773, + "ce_orig": 0.9618034958839417, + "epoch": 1.0701703932705442, + "kl_loss": 0.11087056994438171, + "loss_ib": 0.0054160538129508495, + "step": 3722 + }, + { + "ce_ib": 4.17004919052124, + "ce_orig": 0.980363667011261, + "epoch": 1.0701703932705442, + "kl_loss": 0.1951722502708435, + "loss_ib": 0.006121771410107613, + "step": 3722 + }, + { + "ce_ib": 2.4833550453186035, + "ce_orig": 0.6003649234771729, + "epoch": 1.070457976849522, + "kl_loss": 0.12255828827619553, + "loss_ib": 0.003708937903866172, + "step": 3723 + }, + { + "ce_ib": 2.160731315612793, + "ce_orig": 0.6264808177947998, + "epoch": 1.070457976849522, + "kl_loss": 0.15898089110851288, + "loss_ib": 0.003750540316104889, + "step": 3723 + }, + { + "ce_ib": 3.2657384872436523, + "ce_orig": 0.7848908305168152, + "epoch": 1.070457976849522, + "kl_loss": 0.12426649034023285, + "loss_ib": 0.004508403595536947, + "step": 3723 + }, + { + "ce_ib": 2.1392927169799805, + "ce_orig": 0.638602614402771, + "epoch": 1.070457976849522, + "kl_loss": 0.12294578552246094, + "loss_ib": 0.0033687506802380085, + "step": 3723 + }, + { + "ce_ib": 3.483466625213623, + "ce_orig": 0.6339578032493591, + "epoch": 1.0707455604284994, + "kl_loss": 0.16238203644752502, + "loss_ib": 0.005107286851853132, + "step": 3724 + }, + { + "ce_ib": 4.384100437164307, + "ce_orig": 1.0595958232879639, + "epoch": 1.0707455604284994, + "kl_loss": 0.20988552272319794, + "loss_ib": 0.0064829555340111256, + "step": 3724 + }, + { + "ce_ib": 3.1299757957458496, + "ce_orig": 0.6225508451461792, + "epoch": 1.0707455604284994, + "kl_loss": 0.24547278881072998, + "loss_ib": 0.005584703758358955, + "step": 3724 + }, + { + "ce_ib": 2.820998430252075, + "ce_orig": 0.62814861536026, + "epoch": 1.0707455604284994, + "kl_loss": 0.16620710492134094, + "loss_ib": 0.00448306929320097, + "step": 3724 + }, + { + "epoch": 1.0710331440074772, + "grad_norm": 0.1317840814590454, + "learning_rate": 7.457312958594542e-06, + "loss": 0.9114, + "step": 3725 + }, + { + "ce_ib": 4.281130790710449, + "ce_orig": 1.0635687112808228, + "epoch": 1.0710331440074772, + "kl_loss": 0.11547626554965973, + "loss_ib": 0.005435893312096596, + "step": 3725 + }, + { + "ce_ib": 2.266824245452881, + "ce_orig": 0.5351181030273438, + "epoch": 1.0710331440074772, + "kl_loss": 0.18100407719612122, + "loss_ib": 0.0040768650360405445, + "step": 3725 + }, + { + "ce_ib": 2.3731954097747803, + "ce_orig": 0.7394400238990784, + "epoch": 1.0710331440074772, + "kl_loss": 0.17690148949623108, + "loss_ib": 0.004142210353165865, + "step": 3725 + }, + { + "ce_ib": 2.8128650188446045, + "ce_orig": 0.748473584651947, + "epoch": 1.0710331440074772, + "kl_loss": 0.1373133659362793, + "loss_ib": 0.004185998812317848, + "step": 3725 + }, + { + "ce_ib": 3.254580020904541, + "ce_orig": 1.0097484588623047, + "epoch": 1.071320727586455, + "kl_loss": 0.19590651988983154, + "loss_ib": 0.005213645752519369, + "step": 3726 + }, + { + "ce_ib": 4.503511905670166, + "ce_orig": 1.1042876243591309, + "epoch": 1.071320727586455, + "kl_loss": 0.18191620707511902, + "loss_ib": 0.006322673987597227, + "step": 3726 + }, + { + "ce_ib": 3.8823139667510986, + "ce_orig": 0.7791673541069031, + "epoch": 1.071320727586455, + "kl_loss": 0.16962754726409912, + "loss_ib": 0.0055785891599953175, + "step": 3726 + }, + { + "ce_ib": 3.0580248832702637, + "ce_orig": 0.3380444347858429, + "epoch": 1.071320727586455, + "kl_loss": 0.08166103065013885, + "loss_ib": 0.0038746350910514593, + "step": 3726 + }, + { + "ce_ib": 2.3592660427093506, + "ce_orig": 0.32270655035972595, + "epoch": 1.0716083111654324, + "kl_loss": 0.16891948878765106, + "loss_ib": 0.004048461094498634, + "step": 3727 + }, + { + "ce_ib": 3.4033148288726807, + "ce_orig": 0.8068895936012268, + "epoch": 1.0716083111654324, + "kl_loss": 0.16556182503700256, + "loss_ib": 0.005058933515101671, + "step": 3727 + }, + { + "ce_ib": 4.209056377410889, + "ce_orig": 0.9000297784805298, + "epoch": 1.0716083111654324, + "kl_loss": 0.10351846367120743, + "loss_ib": 0.0052442410960793495, + "step": 3727 + }, + { + "ce_ib": 4.851595401763916, + "ce_orig": 1.340894103050232, + "epoch": 1.0716083111654324, + "kl_loss": 0.15581746399402618, + "loss_ib": 0.006409769877791405, + "step": 3727 + }, + { + "ce_ib": 1.7660597562789917, + "ce_orig": 0.4548191726207733, + "epoch": 1.0718958947444102, + "kl_loss": 0.19472907483577728, + "loss_ib": 0.0037133505102247, + "step": 3728 + }, + { + "ce_ib": 2.52604603767395, + "ce_orig": 0.8813480734825134, + "epoch": 1.0718958947444102, + "kl_loss": 0.1272728145122528, + "loss_ib": 0.0037987742107361555, + "step": 3728 + }, + { + "ce_ib": 2.3976798057556152, + "ce_orig": 0.6399599313735962, + "epoch": 1.0718958947444102, + "kl_loss": 0.188241109251976, + "loss_ib": 0.004280090797692537, + "step": 3728 + }, + { + "ce_ib": 1.1509253978729248, + "ce_orig": 0.2824498414993286, + "epoch": 1.0718958947444102, + "kl_loss": 0.1417289674282074, + "loss_ib": 0.0025682151317596436, + "step": 3728 + }, + { + "ce_ib": 5.199373245239258, + "ce_orig": 1.4217621088027954, + "epoch": 1.0721834783233877, + "kl_loss": 0.2173912227153778, + "loss_ib": 0.007373285014182329, + "step": 3729 + }, + { + "ce_ib": 3.4210317134857178, + "ce_orig": 1.0366597175598145, + "epoch": 1.0721834783233877, + "kl_loss": 0.33511626720428467, + "loss_ib": 0.0067721945233643055, + "step": 3729 + }, + { + "ce_ib": 3.050776958465576, + "ce_orig": 0.6609514951705933, + "epoch": 1.0721834783233877, + "kl_loss": 0.11400363594293594, + "loss_ib": 0.004190813284367323, + "step": 3729 + }, + { + "ce_ib": 4.565648078918457, + "ce_orig": 1.108733892440796, + "epoch": 1.0721834783233877, + "kl_loss": 0.14765721559524536, + "loss_ib": 0.006042220629751682, + "step": 3729 + }, + { + "epoch": 1.0724710619023654, + "grad_norm": 0.15186622738838196, + "learning_rate": 7.450551085890087e-06, + "loss": 0.8034, + "step": 3730 + }, + { + "ce_ib": 3.824685573577881, + "ce_orig": 1.123464822769165, + "epoch": 1.0724710619023654, + "kl_loss": 0.16622145473957062, + "loss_ib": 0.005486899986863136, + "step": 3730 + }, + { + "ce_ib": 3.7652382850646973, + "ce_orig": 0.8051232099533081, + "epoch": 1.0724710619023654, + "kl_loss": 0.19116723537445068, + "loss_ib": 0.005676910746842623, + "step": 3730 + }, + { + "ce_ib": 1.8034517765045166, + "ce_orig": 0.4013340175151825, + "epoch": 1.0724710619023654, + "kl_loss": 0.152227520942688, + "loss_ib": 0.0033257268369197845, + "step": 3730 + }, + { + "ce_ib": 1.5726511478424072, + "ce_orig": 0.47619158029556274, + "epoch": 1.0724710619023654, + "kl_loss": 0.10280270874500275, + "loss_ib": 0.0026006782427430153, + "step": 3730 + }, + { + "ce_ib": 5.6483988761901855, + "ce_orig": 1.6925774812698364, + "epoch": 1.072758645481343, + "kl_loss": 0.16300982236862183, + "loss_ib": 0.007278496865183115, + "step": 3731 + }, + { + "ce_ib": 4.938516139984131, + "ce_orig": 1.378319263458252, + "epoch": 1.072758645481343, + "kl_loss": 0.17324420809745789, + "loss_ib": 0.006670958362519741, + "step": 3731 + }, + { + "ce_ib": 4.183876991271973, + "ce_orig": 1.1798186302185059, + "epoch": 1.072758645481343, + "kl_loss": 0.16576331853866577, + "loss_ib": 0.005841510370373726, + "step": 3731 + }, + { + "ce_ib": 5.169313907623291, + "ce_orig": 1.0768687725067139, + "epoch": 1.072758645481343, + "kl_loss": 0.1935052126646042, + "loss_ib": 0.007104366086423397, + "step": 3731 + }, + { + "ce_ib": 5.871129035949707, + "ce_orig": 1.5974903106689453, + "epoch": 1.0730462290603207, + "kl_loss": 0.1815464198589325, + "loss_ib": 0.007686593569815159, + "step": 3732 + }, + { + "ce_ib": 4.006288051605225, + "ce_orig": 1.0281482934951782, + "epoch": 1.0730462290603207, + "kl_loss": 0.1517786681652069, + "loss_ib": 0.005524075124412775, + "step": 3732 + }, + { + "ce_ib": 2.5567755699157715, + "ce_orig": 0.6443597674369812, + "epoch": 1.0730462290603207, + "kl_loss": 0.17792588472366333, + "loss_ib": 0.0043360344134271145, + "step": 3732 + }, + { + "ce_ib": 6.740059852600098, + "ce_orig": 1.8241442441940308, + "epoch": 1.0730462290603207, + "kl_loss": 0.22555091977119446, + "loss_ib": 0.008995568379759789, + "step": 3732 + }, + { + "ce_ib": 3.4432973861694336, + "ce_orig": 0.8689512610435486, + "epoch": 1.0733338126392984, + "kl_loss": 0.1212717741727829, + "loss_ib": 0.0046560149639844894, + "step": 3733 + }, + { + "ce_ib": 5.158864974975586, + "ce_orig": 1.0829886198043823, + "epoch": 1.0733338126392984, + "kl_loss": 0.20744477212429047, + "loss_ib": 0.007233312353491783, + "step": 3733 + }, + { + "ce_ib": 3.994035005569458, + "ce_orig": 0.8082367777824402, + "epoch": 1.0733338126392984, + "kl_loss": 0.24170130491256714, + "loss_ib": 0.0064110481180250645, + "step": 3733 + }, + { + "ce_ib": 4.455965518951416, + "ce_orig": 0.6740154027938843, + "epoch": 1.0733338126392984, + "kl_loss": 0.24952340126037598, + "loss_ib": 0.006951199844479561, + "step": 3733 + }, + { + "ce_ib": 2.603191375732422, + "ce_orig": 0.9228411912918091, + "epoch": 1.073621396218276, + "kl_loss": 0.17286387085914612, + "loss_ib": 0.004331829957664013, + "step": 3734 + }, + { + "ce_ib": 2.753980875015259, + "ce_orig": 1.0495631694793701, + "epoch": 1.073621396218276, + "kl_loss": 0.15434019267559052, + "loss_ib": 0.004297382663935423, + "step": 3734 + }, + { + "ce_ib": 5.0747551918029785, + "ce_orig": 1.076672911643982, + "epoch": 1.073621396218276, + "kl_loss": 0.18221068382263184, + "loss_ib": 0.006896862294524908, + "step": 3734 + }, + { + "ce_ib": 2.0437026023864746, + "ce_orig": 0.6518487930297852, + "epoch": 1.073621396218276, + "kl_loss": 0.11609610170125961, + "loss_ib": 0.0032046635169535875, + "step": 3734 + }, + { + "epoch": 1.0739089797972536, + "grad_norm": 0.14963595569133759, + "learning_rate": 7.443783309239281e-06, + "loss": 0.8922, + "step": 3735 + }, + { + "ce_ib": 4.167486667633057, + "ce_orig": 0.9556034207344055, + "epoch": 1.0739089797972536, + "kl_loss": 0.17742353677749634, + "loss_ib": 0.005941722076386213, + "step": 3735 + }, + { + "ce_ib": 3.2680282592773438, + "ce_orig": 0.8030023574829102, + "epoch": 1.0739089797972536, + "kl_loss": 0.13637873530387878, + "loss_ib": 0.004631815478205681, + "step": 3735 + }, + { + "ce_ib": 3.677713632583618, + "ce_orig": 0.6176819205284119, + "epoch": 1.0739089797972536, + "kl_loss": 0.15956559777259827, + "loss_ib": 0.0052733696065843105, + "step": 3735 + }, + { + "ce_ib": 1.5342609882354736, + "ce_orig": 0.45176228880882263, + "epoch": 1.0739089797972536, + "kl_loss": 0.2002454400062561, + "loss_ib": 0.003536715405061841, + "step": 3735 + }, + { + "ce_ib": 2.12273907661438, + "ce_orig": 0.5221719145774841, + "epoch": 1.0741965633762312, + "kl_loss": 0.21216361224651337, + "loss_ib": 0.004244375042617321, + "step": 3736 + }, + { + "ce_ib": 6.0865373611450195, + "ce_orig": 1.2128874063491821, + "epoch": 1.0741965633762312, + "kl_loss": 0.2139849066734314, + "loss_ib": 0.008226386271417141, + "step": 3736 + }, + { + "ce_ib": 2.534095525741577, + "ce_orig": 0.8255071043968201, + "epoch": 1.0741965633762312, + "kl_loss": 0.1575341522693634, + "loss_ib": 0.004109437111765146, + "step": 3736 + }, + { + "ce_ib": 2.7017886638641357, + "ce_orig": 0.4365274906158447, + "epoch": 1.0741965633762312, + "kl_loss": 0.20996053516864777, + "loss_ib": 0.004801394417881966, + "step": 3736 + }, + { + "ce_ib": 3.383924961090088, + "ce_orig": 0.8507804870605469, + "epoch": 1.0744841469552089, + "kl_loss": 0.12140373885631561, + "loss_ib": 0.0045979623682796955, + "step": 3737 + }, + { + "ce_ib": 2.438272714614868, + "ce_orig": 0.5031996965408325, + "epoch": 1.0744841469552089, + "kl_loss": 0.14995704591274261, + "loss_ib": 0.003937843255698681, + "step": 3737 + }, + { + "ce_ib": 3.8034908771514893, + "ce_orig": 0.5599406361579895, + "epoch": 1.0744841469552089, + "kl_loss": 0.23505078256130219, + "loss_ib": 0.006153998896479607, + "step": 3737 + }, + { + "ce_ib": 1.4543911218643188, + "ce_orig": 0.5487932562828064, + "epoch": 1.0744841469552089, + "kl_loss": 0.1364293247461319, + "loss_ib": 0.002818684559315443, + "step": 3737 + }, + { + "ce_ib": 3.1766602993011475, + "ce_orig": 0.5665106773376465, + "epoch": 1.0747717305341864, + "kl_loss": 0.16851089894771576, + "loss_ib": 0.004861768800765276, + "step": 3738 + }, + { + "ce_ib": 3.059136152267456, + "ce_orig": 0.6404756307601929, + "epoch": 1.0747717305341864, + "kl_loss": 0.11152429133653641, + "loss_ib": 0.004174379166215658, + "step": 3738 + }, + { + "ce_ib": 4.878083229064941, + "ce_orig": 1.1685447692871094, + "epoch": 1.0747717305341864, + "kl_loss": 0.14666232466697693, + "loss_ib": 0.0063447062857449055, + "step": 3738 + }, + { + "ce_ib": 5.188563346862793, + "ce_orig": 1.4459930658340454, + "epoch": 1.0747717305341864, + "kl_loss": 0.24425143003463745, + "loss_ib": 0.00763107743114233, + "step": 3738 + }, + { + "ce_ib": 2.0732409954071045, + "ce_orig": 0.45572757720947266, + "epoch": 1.0750593141131641, + "kl_loss": 0.11940574645996094, + "loss_ib": 0.0032672982197254896, + "step": 3739 + }, + { + "ce_ib": 6.575459957122803, + "ce_orig": 1.2656607627868652, + "epoch": 1.0750593141131641, + "kl_loss": 0.1548156589269638, + "loss_ib": 0.008123616687953472, + "step": 3739 + }, + { + "ce_ib": 3.0012991428375244, + "ce_orig": 0.5149077773094177, + "epoch": 1.0750593141131641, + "kl_loss": 0.20912249386310577, + "loss_ib": 0.005092523992061615, + "step": 3739 + }, + { + "ce_ib": 3.4914801120758057, + "ce_orig": 1.1215609312057495, + "epoch": 1.0750593141131641, + "kl_loss": 0.1715577244758606, + "loss_ib": 0.005207057110965252, + "step": 3739 + }, + { + "epoch": 1.0753468976921419, + "grad_norm": 0.14174722135066986, + "learning_rate": 7.437009644947268e-06, + "loss": 0.8078, + "step": 3740 + }, + { + "ce_ib": 3.970473527908325, + "ce_orig": 0.8658932447433472, + "epoch": 1.0753468976921419, + "kl_loss": 0.21603314578533173, + "loss_ib": 0.0061308047734200954, + "step": 3740 + }, + { + "ce_ib": 2.104583978652954, + "ce_orig": 0.7241290807723999, + "epoch": 1.0753468976921419, + "kl_loss": 0.11920376121997833, + "loss_ib": 0.0032966213766485453, + "step": 3740 + }, + { + "ce_ib": 2.0954787731170654, + "ce_orig": 0.6257402896881104, + "epoch": 1.0753468976921419, + "kl_loss": 0.1153230369091034, + "loss_ib": 0.0032487090211361647, + "step": 3740 + }, + { + "ce_ib": 2.2848169803619385, + "ce_orig": 0.5974085927009583, + "epoch": 1.0753468976921419, + "kl_loss": 0.1619931310415268, + "loss_ib": 0.0039047482423484325, + "step": 3740 + }, + { + "ce_ib": 4.8535895347595215, + "ce_orig": 1.2779291868209839, + "epoch": 1.0756344812711194, + "kl_loss": 0.12145496159791946, + "loss_ib": 0.006068138871341944, + "step": 3741 + }, + { + "ce_ib": 3.845597743988037, + "ce_orig": 1.0463539361953735, + "epoch": 1.0756344812711194, + "kl_loss": 0.15522384643554688, + "loss_ib": 0.005397835746407509, + "step": 3741 + }, + { + "ce_ib": 4.81752872467041, + "ce_orig": 1.2605096101760864, + "epoch": 1.0756344812711194, + "kl_loss": 0.23123911023139954, + "loss_ib": 0.007129919249564409, + "step": 3741 + }, + { + "ce_ib": 2.6558406352996826, + "ce_orig": 0.5727974772453308, + "epoch": 1.0756344812711194, + "kl_loss": 0.16883939504623413, + "loss_ib": 0.004344234708696604, + "step": 3741 + }, + { + "ce_ib": 4.222306251525879, + "ce_orig": 0.5861687064170837, + "epoch": 1.0759220648500971, + "kl_loss": 0.23236201703548431, + "loss_ib": 0.0065459259785711765, + "step": 3742 + }, + { + "ce_ib": 3.5993714332580566, + "ce_orig": 0.7586058974266052, + "epoch": 1.0759220648500971, + "kl_loss": 0.10951483994722366, + "loss_ib": 0.004694520030170679, + "step": 3742 + }, + { + "ce_ib": 1.7641324996948242, + "ce_orig": 0.6006073951721191, + "epoch": 1.0759220648500971, + "kl_loss": 0.12573842704296112, + "loss_ib": 0.003021516837179661, + "step": 3742 + }, + { + "ce_ib": 1.6661756038665771, + "ce_orig": 0.3507309556007385, + "epoch": 1.0759220648500971, + "kl_loss": 0.2034458965063095, + "loss_ib": 0.003700634464621544, + "step": 3742 + }, + { + "ce_ib": 2.568779468536377, + "ce_orig": 0.5849471688270569, + "epoch": 1.0762096484290746, + "kl_loss": 0.32036399841308594, + "loss_ib": 0.005772419273853302, + "step": 3743 + }, + { + "ce_ib": 4.422804355621338, + "ce_orig": 0.9071297645568848, + "epoch": 1.0762096484290746, + "kl_loss": 0.14905746281147003, + "loss_ib": 0.0059133791364729404, + "step": 3743 + }, + { + "ce_ib": 4.485646724700928, + "ce_orig": 1.1940875053405762, + "epoch": 1.0762096484290746, + "kl_loss": 0.15004044771194458, + "loss_ib": 0.005986051633954048, + "step": 3743 + }, + { + "ce_ib": 3.049429178237915, + "ce_orig": 0.7917040586471558, + "epoch": 1.0762096484290746, + "kl_loss": 0.1386037915945053, + "loss_ib": 0.004435467068105936, + "step": 3743 + }, + { + "ce_ib": 3.7706456184387207, + "ce_orig": 0.8468393087387085, + "epoch": 1.0764972320080524, + "kl_loss": 0.12969467043876648, + "loss_ib": 0.005067592021077871, + "step": 3744 + }, + { + "ce_ib": 5.51331901550293, + "ce_orig": 0.9564926624298096, + "epoch": 1.0764972320080524, + "kl_loss": 0.22828200459480286, + "loss_ib": 0.00779613945633173, + "step": 3744 + }, + { + "ce_ib": 4.334877014160156, + "ce_orig": 0.55409175157547, + "epoch": 1.0764972320080524, + "kl_loss": 0.21467211842536926, + "loss_ib": 0.006481598597019911, + "step": 3744 + }, + { + "ce_ib": 3.8121225833892822, + "ce_orig": 0.9367244839668274, + "epoch": 1.0764972320080524, + "kl_loss": 0.16977882385253906, + "loss_ib": 0.005509910639375448, + "step": 3744 + }, + { + "epoch": 1.0767848155870299, + "grad_norm": 0.19208325445652008, + "learning_rate": 7.430230109333378e-06, + "loss": 0.8391, + "step": 3745 + }, + { + "ce_ib": 3.4358747005462646, + "ce_orig": 0.8364486694335938, + "epoch": 1.0767848155870299, + "kl_loss": 0.1644746959209442, + "loss_ib": 0.005080621689558029, + "step": 3745 + }, + { + "ce_ib": 3.056734323501587, + "ce_orig": 0.6449509859085083, + "epoch": 1.0767848155870299, + "kl_loss": 0.15851329267024994, + "loss_ib": 0.004641867242753506, + "step": 3745 + }, + { + "ce_ib": 2.5604474544525146, + "ce_orig": 0.4076974391937256, + "epoch": 1.0767848155870299, + "kl_loss": 0.23345929384231567, + "loss_ib": 0.004895040299743414, + "step": 3745 + }, + { + "ce_ib": 2.4318511486053467, + "ce_orig": 0.658304750919342, + "epoch": 1.0767848155870299, + "kl_loss": 0.12007999420166016, + "loss_ib": 0.0036326509434729815, + "step": 3745 + }, + { + "ce_ib": 5.060855865478516, + "ce_orig": 1.1398104429244995, + "epoch": 1.0770723991660076, + "kl_loss": 0.35484176874160767, + "loss_ib": 0.008609273470938206, + "step": 3746 + }, + { + "ce_ib": 4.003320693969727, + "ce_orig": 1.0175096988677979, + "epoch": 1.0770723991660076, + "kl_loss": 0.22090712189674377, + "loss_ib": 0.0062123918905854225, + "step": 3746 + }, + { + "ce_ib": 3.634718418121338, + "ce_orig": 1.1163954734802246, + "epoch": 1.0770723991660076, + "kl_loss": 0.23367050290107727, + "loss_ib": 0.005971423350274563, + "step": 3746 + }, + { + "ce_ib": 4.002242088317871, + "ce_orig": 1.2383029460906982, + "epoch": 1.0770723991660076, + "kl_loss": 0.14026346802711487, + "loss_ib": 0.005404876545071602, + "step": 3746 + }, + { + "ce_ib": 2.323601484298706, + "ce_orig": 0.615875780582428, + "epoch": 1.0773599827449853, + "kl_loss": 0.14213432371616364, + "loss_ib": 0.003744944930076599, + "step": 3747 + }, + { + "ce_ib": 6.7141032218933105, + "ce_orig": 1.5892938375473022, + "epoch": 1.0773599827449853, + "kl_loss": 0.21541400253772736, + "loss_ib": 0.008868242613971233, + "step": 3747 + }, + { + "ce_ib": 2.0809850692749023, + "ce_orig": 0.48576071858406067, + "epoch": 1.0773599827449853, + "kl_loss": 0.13238520920276642, + "loss_ib": 0.0034048371016979218, + "step": 3747 + }, + { + "ce_ib": 4.5171051025390625, + "ce_orig": 1.2239423990249634, + "epoch": 1.0773599827449853, + "kl_loss": 0.24777835607528687, + "loss_ib": 0.00699488865211606, + "step": 3747 + }, + { + "ce_ib": 4.307330131530762, + "ce_orig": 0.9427669048309326, + "epoch": 1.0776475663239629, + "kl_loss": 0.2451464831829071, + "loss_ib": 0.006758794654160738, + "step": 3748 + }, + { + "ce_ib": 2.8961355686187744, + "ce_orig": 0.7648875713348389, + "epoch": 1.0776475663239629, + "kl_loss": 0.1616344153881073, + "loss_ib": 0.004512479994446039, + "step": 3748 + }, + { + "ce_ib": 2.6896190643310547, + "ce_orig": 0.8160403966903687, + "epoch": 1.0776475663239629, + "kl_loss": 0.1407220959663391, + "loss_ib": 0.004096840042620897, + "step": 3748 + }, + { + "ce_ib": 2.2828586101531982, + "ce_orig": 0.29321953654289246, + "epoch": 1.0776475663239629, + "kl_loss": 0.1668129563331604, + "loss_ib": 0.003950987942516804, + "step": 3748 + }, + { + "ce_ib": 3.610149621963501, + "ce_orig": 0.8609417676925659, + "epoch": 1.0779351499029406, + "kl_loss": 0.10406408458948135, + "loss_ib": 0.004650790244340897, + "step": 3749 + }, + { + "ce_ib": 4.913029670715332, + "ce_orig": 1.3209969997406006, + "epoch": 1.0779351499029406, + "kl_loss": 0.14666637778282166, + "loss_ib": 0.0063796937465667725, + "step": 3749 + }, + { + "ce_ib": 3.6720001697540283, + "ce_orig": 0.942032516002655, + "epoch": 1.0779351499029406, + "kl_loss": 0.1516609489917755, + "loss_ib": 0.0051886094734072685, + "step": 3749 + }, + { + "ce_ib": 3.7283413410186768, + "ce_orig": 0.8295676708221436, + "epoch": 1.0779351499029406, + "kl_loss": 0.12235347926616669, + "loss_ib": 0.004951876122504473, + "step": 3749 + }, + { + "epoch": 1.078222733481918, + "grad_norm": 0.14107149839401245, + "learning_rate": 7.4234447187310874e-06, + "loss": 0.8574, + "step": 3750 + }, + { + "ce_ib": 2.58244252204895, + "ce_orig": 0.792534351348877, + "epoch": 1.078222733481918, + "kl_loss": 0.13196676969528198, + "loss_ib": 0.0039021100383251905, + "step": 3750 + }, + { + "ce_ib": 4.694033622741699, + "ce_orig": 1.4371920824050903, + "epoch": 1.078222733481918, + "kl_loss": 0.11953768134117126, + "loss_ib": 0.005889410153031349, + "step": 3750 + }, + { + "ce_ib": 2.5898592472076416, + "ce_orig": 0.9036226272583008, + "epoch": 1.078222733481918, + "kl_loss": 0.11874093115329742, + "loss_ib": 0.0037772683426737785, + "step": 3750 + }, + { + "ce_ib": 1.9862854480743408, + "ce_orig": 0.49069899320602417, + "epoch": 1.078222733481918, + "kl_loss": 0.17562197148799896, + "loss_ib": 0.003742505330592394, + "step": 3750 + }, + { + "ce_ib": 6.950646877288818, + "ce_orig": 1.8841527700424194, + "epoch": 1.0785103170608958, + "kl_loss": 0.17842474579811096, + "loss_ib": 0.008734894916415215, + "step": 3751 + }, + { + "ce_ib": 2.8434908390045166, + "ce_orig": 0.27689868211746216, + "epoch": 1.0785103170608958, + "kl_loss": 0.2796757221221924, + "loss_ib": 0.0056402478367090225, + "step": 3751 + }, + { + "ce_ib": 2.569302797317505, + "ce_orig": 0.5602412819862366, + "epoch": 1.0785103170608958, + "kl_loss": 0.11242317408323288, + "loss_ib": 0.0036935347598046064, + "step": 3751 + }, + { + "ce_ib": 1.9265282154083252, + "ce_orig": 0.54500412940979, + "epoch": 1.0785103170608958, + "kl_loss": 0.10192340612411499, + "loss_ib": 0.002945762127637863, + "step": 3751 + }, + { + "ce_ib": 4.805276870727539, + "ce_orig": 1.4687062501907349, + "epoch": 1.0787979006398736, + "kl_loss": 0.41926831007003784, + "loss_ib": 0.008997960016131401, + "step": 3752 + }, + { + "ce_ib": 2.9557981491088867, + "ce_orig": 0.5874965786933899, + "epoch": 1.0787979006398736, + "kl_loss": 0.17945367097854614, + "loss_ib": 0.004750334657728672, + "step": 3752 + }, + { + "ce_ib": 3.534188747406006, + "ce_orig": 0.8383740186691284, + "epoch": 1.0787979006398736, + "kl_loss": 0.1259452849626541, + "loss_ib": 0.004793641623109579, + "step": 3752 + }, + { + "ce_ib": 3.14080810546875, + "ce_orig": 0.9367170333862305, + "epoch": 1.0787979006398736, + "kl_loss": 0.14205914735794067, + "loss_ib": 0.0045613995753228664, + "step": 3752 + }, + { + "ce_ib": 4.265194416046143, + "ce_orig": 0.9357134699821472, + "epoch": 1.079085484218851, + "kl_loss": 0.202067568898201, + "loss_ib": 0.006285869982093573, + "step": 3753 + }, + { + "ce_ib": 3.7512199878692627, + "ce_orig": 1.0388152599334717, + "epoch": 1.079085484218851, + "kl_loss": 0.17725631594657898, + "loss_ib": 0.005523782689124346, + "step": 3753 + }, + { + "ce_ib": 3.7924811840057373, + "ce_orig": 0.7622377872467041, + "epoch": 1.079085484218851, + "kl_loss": 0.17184817790985107, + "loss_ib": 0.005510963033884764, + "step": 3753 + }, + { + "ce_ib": 3.0370900630950928, + "ce_orig": 0.7715529203414917, + "epoch": 1.079085484218851, + "kl_loss": 0.24410668015480042, + "loss_ib": 0.005478156730532646, + "step": 3753 + }, + { + "ce_ib": 4.408514022827148, + "ce_orig": 0.5540867447853088, + "epoch": 1.0793730677978288, + "kl_loss": 0.2008415162563324, + "loss_ib": 0.006416929420083761, + "step": 3754 + }, + { + "ce_ib": 2.231966495513916, + "ce_orig": 0.7614129781723022, + "epoch": 1.0793730677978288, + "kl_loss": 0.0925077274441719, + "loss_ib": 0.003157043596729636, + "step": 3754 + }, + { + "ce_ib": 3.06249737739563, + "ce_orig": 0.7322280406951904, + "epoch": 1.0793730677978288, + "kl_loss": 0.13941016793251038, + "loss_ib": 0.004456598777323961, + "step": 3754 + }, + { + "ce_ib": 3.8391313552856445, + "ce_orig": 0.6454974412918091, + "epoch": 1.0793730677978288, + "kl_loss": 0.2014344483613968, + "loss_ib": 0.0058534760028123856, + "step": 3754 + }, + { + "epoch": 1.0796606513768063, + "grad_norm": 0.15002015233039856, + "learning_rate": 7.416653489487975e-06, + "loss": 0.8712, + "step": 3755 + }, + { + "ce_ib": 4.124406337738037, + "ce_orig": 0.9239826798439026, + "epoch": 1.0796606513768063, + "kl_loss": 0.21460513770580292, + "loss_ib": 0.006270457524806261, + "step": 3755 + }, + { + "ce_ib": 2.058802604675293, + "ce_orig": 0.40698498487472534, + "epoch": 1.0796606513768063, + "kl_loss": 0.15474113821983337, + "loss_ib": 0.003606213955208659, + "step": 3755 + }, + { + "ce_ib": 3.535560369491577, + "ce_orig": 0.6701453924179077, + "epoch": 1.0796606513768063, + "kl_loss": 0.20174308121204376, + "loss_ib": 0.005552990827709436, + "step": 3755 + }, + { + "ce_ib": 2.82183575630188, + "ce_orig": 0.6335659027099609, + "epoch": 1.0796606513768063, + "kl_loss": 0.10449241101741791, + "loss_ib": 0.0038667600601911545, + "step": 3755 + }, + { + "ce_ib": 5.773186206817627, + "ce_orig": 1.1581511497497559, + "epoch": 1.079948234955784, + "kl_loss": 0.18343651294708252, + "loss_ib": 0.007607550825923681, + "step": 3756 + }, + { + "ce_ib": 2.3071649074554443, + "ce_orig": 0.670931339263916, + "epoch": 1.079948234955784, + "kl_loss": 0.1976160854101181, + "loss_ib": 0.004283325746655464, + "step": 3756 + }, + { + "ce_ib": 4.517411708831787, + "ce_orig": 1.2218135595321655, + "epoch": 1.079948234955784, + "kl_loss": 0.1330997794866562, + "loss_ib": 0.005848409608006477, + "step": 3756 + }, + { + "ce_ib": 3.703843355178833, + "ce_orig": 0.8200523853302002, + "epoch": 1.079948234955784, + "kl_loss": 0.2153218686580658, + "loss_ib": 0.0058570620603859425, + "step": 3756 + }, + { + "ce_ib": 4.191783428192139, + "ce_orig": 1.0144847631454468, + "epoch": 1.0802358185347616, + "kl_loss": 0.18262426555156708, + "loss_ib": 0.006018025800585747, + "step": 3757 + }, + { + "ce_ib": 2.2818939685821533, + "ce_orig": 0.6116719841957092, + "epoch": 1.0802358185347616, + "kl_loss": 0.1244550496339798, + "loss_ib": 0.003526444314047694, + "step": 3757 + }, + { + "ce_ib": 5.6487321853637695, + "ce_orig": 1.6030627489089966, + "epoch": 1.0802358185347616, + "kl_loss": 0.3033791780471802, + "loss_ib": 0.008682523854076862, + "step": 3757 + }, + { + "ce_ib": 4.145698547363281, + "ce_orig": 0.9361017942428589, + "epoch": 1.0802358185347616, + "kl_loss": 0.17803281545639038, + "loss_ib": 0.005926026497036219, + "step": 3757 + }, + { + "ce_ib": 1.955072283744812, + "ce_orig": 0.5275178551673889, + "epoch": 1.0805234021137393, + "kl_loss": 0.13798558712005615, + "loss_ib": 0.003334928071126342, + "step": 3758 + }, + { + "ce_ib": 2.7056422233581543, + "ce_orig": 0.617914080619812, + "epoch": 1.0805234021137393, + "kl_loss": 0.17458483576774597, + "loss_ib": 0.004451490473002195, + "step": 3758 + }, + { + "ce_ib": 2.267575979232788, + "ce_orig": 0.5361946821212769, + "epoch": 1.0805234021137393, + "kl_loss": 0.15858912467956543, + "loss_ib": 0.003853467060253024, + "step": 3758 + }, + { + "ce_ib": 3.5920891761779785, + "ce_orig": 0.7587829828262329, + "epoch": 1.0805234021137393, + "kl_loss": 0.09633558988571167, + "loss_ib": 0.004555444698780775, + "step": 3758 + }, + { + "ce_ib": 5.243670463562012, + "ce_orig": 1.0490797758102417, + "epoch": 1.0808109856927168, + "kl_loss": 0.16641438007354736, + "loss_ib": 0.006907814182341099, + "step": 3759 + }, + { + "ce_ib": 5.049909591674805, + "ce_orig": 1.1059050559997559, + "epoch": 1.0808109856927168, + "kl_loss": 0.15449939668178558, + "loss_ib": 0.006594903767108917, + "step": 3759 + }, + { + "ce_ib": 2.0780043601989746, + "ce_orig": 0.708108127117157, + "epoch": 1.0808109856927168, + "kl_loss": 0.12373199313879013, + "loss_ib": 0.0033153241965919733, + "step": 3759 + }, + { + "ce_ib": 3.175727367401123, + "ce_orig": 0.8883457183837891, + "epoch": 1.0808109856927168, + "kl_loss": 0.10910047590732574, + "loss_ib": 0.00426673237234354, + "step": 3759 + }, + { + "epoch": 1.0810985692716946, + "grad_norm": 0.15130417048931122, + "learning_rate": 7.40985643796569e-06, + "loss": 0.829, + "step": 3760 + }, + { + "ce_ib": 2.3604137897491455, + "ce_orig": 0.4422089159488678, + "epoch": 1.0810985692716946, + "kl_loss": 0.1962357461452484, + "loss_ib": 0.0043227714486420155, + "step": 3760 + }, + { + "ce_ib": 5.984673500061035, + "ce_orig": 1.3034573793411255, + "epoch": 1.0810985692716946, + "kl_loss": 0.21715059876441956, + "loss_ib": 0.008156179450452328, + "step": 3760 + }, + { + "ce_ib": 3.24141526222229, + "ce_orig": 1.0804603099822998, + "epoch": 1.0810985692716946, + "kl_loss": 0.5496922731399536, + "loss_ib": 0.008738338015973568, + "step": 3760 + }, + { + "ce_ib": 2.1360738277435303, + "ce_orig": 0.581156313419342, + "epoch": 1.0810985692716946, + "kl_loss": 0.1107199490070343, + "loss_ib": 0.0032432733569294214, + "step": 3760 + }, + { + "ce_ib": 5.2911834716796875, + "ce_orig": 1.2237632274627686, + "epoch": 1.0813861528506723, + "kl_loss": 0.1781991869211197, + "loss_ib": 0.00707317516207695, + "step": 3761 + }, + { + "ce_ib": 3.8836636543273926, + "ce_orig": 1.219971776008606, + "epoch": 1.0813861528506723, + "kl_loss": 0.12883733212947845, + "loss_ib": 0.005172036588191986, + "step": 3761 + }, + { + "ce_ib": 4.675649166107178, + "ce_orig": 1.1741849184036255, + "epoch": 1.0813861528506723, + "kl_loss": 0.19299690425395966, + "loss_ib": 0.006605617702007294, + "step": 3761 + }, + { + "ce_ib": 6.234090805053711, + "ce_orig": 0.8414342999458313, + "epoch": 1.0813861528506723, + "kl_loss": 0.1576894372701645, + "loss_ib": 0.007810985203832388, + "step": 3761 + }, + { + "ce_ib": 1.6336205005645752, + "ce_orig": 0.41485410928726196, + "epoch": 1.0816737364296498, + "kl_loss": 0.1574731022119522, + "loss_ib": 0.003208351554349065, + "step": 3762 + }, + { + "ce_ib": 1.7182765007019043, + "ce_orig": 0.4531683921813965, + "epoch": 1.0816737364296498, + "kl_loss": 0.10799414664506912, + "loss_ib": 0.002798218047246337, + "step": 3762 + }, + { + "ce_ib": 2.6956098079681396, + "ce_orig": 0.7385463714599609, + "epoch": 1.0816737364296498, + "kl_loss": 0.10008708387613297, + "loss_ib": 0.003696480533108115, + "step": 3762 + }, + { + "ce_ib": 6.608338356018066, + "ce_orig": 0.8928188681602478, + "epoch": 1.0816737364296498, + "kl_loss": 0.17886415123939514, + "loss_ib": 0.008396979421377182, + "step": 3762 + }, + { + "ce_ib": 2.7838823795318604, + "ce_orig": 0.6868102550506592, + "epoch": 1.0819613200086275, + "kl_loss": 0.20942462980747223, + "loss_ib": 0.004878128878772259, + "step": 3763 + }, + { + "ce_ib": 3.386364459991455, + "ce_orig": 0.7712758183479309, + "epoch": 1.0819613200086275, + "kl_loss": 0.1388063132762909, + "loss_ib": 0.004774427507072687, + "step": 3763 + }, + { + "ce_ib": 2.5877902507781982, + "ce_orig": 0.7723021507263184, + "epoch": 1.0819613200086275, + "kl_loss": 0.1725207269191742, + "loss_ib": 0.004312997218221426, + "step": 3763 + }, + { + "ce_ib": 4.2484130859375, + "ce_orig": 1.052578091621399, + "epoch": 1.0819613200086275, + "kl_loss": 0.13783390820026398, + "loss_ib": 0.00562675204128027, + "step": 3763 + }, + { + "ce_ib": 6.242189884185791, + "ce_orig": 1.1005513668060303, + "epoch": 1.082248903587605, + "kl_loss": 0.1557055413722992, + "loss_ib": 0.007799244951456785, + "step": 3764 + }, + { + "ce_ib": 3.8938732147216797, + "ce_orig": 1.1308000087738037, + "epoch": 1.082248903587605, + "kl_loss": 0.14022615551948547, + "loss_ib": 0.005296134855598211, + "step": 3764 + }, + { + "ce_ib": 3.4904980659484863, + "ce_orig": 1.0988969802856445, + "epoch": 1.082248903587605, + "kl_loss": 0.11150441318750381, + "loss_ib": 0.004605541937053204, + "step": 3764 + }, + { + "ce_ib": 4.659563064575195, + "ce_orig": 1.0370512008666992, + "epoch": 1.082248903587605, + "kl_loss": 0.2013222873210907, + "loss_ib": 0.006672785617411137, + "step": 3764 + }, + { + "epoch": 1.0825364871665828, + "grad_norm": 0.1585654467344284, + "learning_rate": 7.403053580539905e-06, + "loss": 0.8748, + "step": 3765 + }, + { + "ce_ib": 2.83117938041687, + "ce_orig": 0.5553191900253296, + "epoch": 1.0825364871665828, + "kl_loss": 0.17750144004821777, + "loss_ib": 0.004606193862855434, + "step": 3765 + }, + { + "ce_ib": 3.761446475982666, + "ce_orig": 0.5805175304412842, + "epoch": 1.0825364871665828, + "kl_loss": 0.22996604442596436, + "loss_ib": 0.00606110692024231, + "step": 3765 + }, + { + "ce_ib": 4.319489002227783, + "ce_orig": 1.147831916809082, + "epoch": 1.0825364871665828, + "kl_loss": 0.17265768349170685, + "loss_ib": 0.006046065595000982, + "step": 3765 + }, + { + "ce_ib": 1.7522047758102417, + "ce_orig": 0.2881583273410797, + "epoch": 1.0825364871665828, + "kl_loss": 0.11767279356718063, + "loss_ib": 0.0029289324302226305, + "step": 3765 + }, + { + "ce_ib": 6.050912380218506, + "ce_orig": 1.3498355150222778, + "epoch": 1.0828240707455605, + "kl_loss": 0.1947890669107437, + "loss_ib": 0.007998802699148655, + "step": 3766 + }, + { + "ce_ib": 3.9716975688934326, + "ce_orig": 0.7918298840522766, + "epoch": 1.0828240707455605, + "kl_loss": 0.1992674171924591, + "loss_ib": 0.005964371841400862, + "step": 3766 + }, + { + "ce_ib": 3.8699355125427246, + "ce_orig": 0.7547906041145325, + "epoch": 1.0828240707455605, + "kl_loss": 0.20376171171665192, + "loss_ib": 0.005907552782446146, + "step": 3766 + }, + { + "ce_ib": 5.15234899520874, + "ce_orig": 1.5526479482650757, + "epoch": 1.0828240707455605, + "kl_loss": 0.17055965960025787, + "loss_ib": 0.006857945118099451, + "step": 3766 + }, + { + "ce_ib": 0.9986130595207214, + "ce_orig": 0.20030324161052704, + "epoch": 1.083111654324538, + "kl_loss": 0.34323155879974365, + "loss_ib": 0.004430928733199835, + "step": 3767 + }, + { + "ce_ib": 4.5821309089660645, + "ce_orig": 1.4237297773361206, + "epoch": 1.083111654324538, + "kl_loss": 0.18474194407463074, + "loss_ib": 0.00642955070361495, + "step": 3767 + }, + { + "ce_ib": 2.9438624382019043, + "ce_orig": 0.745573103427887, + "epoch": 1.083111654324538, + "kl_loss": 0.14439886808395386, + "loss_ib": 0.004387850873172283, + "step": 3767 + }, + { + "ce_ib": 2.76396107673645, + "ce_orig": 0.48543134331703186, + "epoch": 1.083111654324538, + "kl_loss": 0.15677255392074585, + "loss_ib": 0.004331686533987522, + "step": 3767 + }, + { + "ce_ib": 3.091195821762085, + "ce_orig": 0.4856996536254883, + "epoch": 1.0833992379035158, + "kl_loss": 0.16267861425876617, + "loss_ib": 0.004717981908470392, + "step": 3768 + }, + { + "ce_ib": 5.854247570037842, + "ce_orig": 1.4718576669692993, + "epoch": 1.0833992379035158, + "kl_loss": 0.19890952110290527, + "loss_ib": 0.00784334260970354, + "step": 3768 + }, + { + "ce_ib": 3.093486785888672, + "ce_orig": 0.6328637003898621, + "epoch": 1.0833992379035158, + "kl_loss": 0.19367417693138123, + "loss_ib": 0.005030228290706873, + "step": 3768 + }, + { + "ce_ib": 3.196294069290161, + "ce_orig": 0.8006542921066284, + "epoch": 1.0833992379035158, + "kl_loss": 0.2608145475387573, + "loss_ib": 0.005804439540952444, + "step": 3768 + }, + { + "ce_ib": 2.1278443336486816, + "ce_orig": 0.6105515956878662, + "epoch": 1.0836868214824933, + "kl_loss": 0.09391579777002335, + "loss_ib": 0.0030670021660625935, + "step": 3769 + }, + { + "ce_ib": 4.9398274421691895, + "ce_orig": 1.155955195426941, + "epoch": 1.0836868214824933, + "kl_loss": 0.1818097084760666, + "loss_ib": 0.00675792433321476, + "step": 3769 + }, + { + "ce_ib": 2.201167106628418, + "ce_orig": 0.5803961753845215, + "epoch": 1.0836868214824933, + "kl_loss": 0.12631316483020782, + "loss_ib": 0.003464298788458109, + "step": 3769 + }, + { + "ce_ib": 3.5951106548309326, + "ce_orig": 1.2172623872756958, + "epoch": 1.0836868214824933, + "kl_loss": 0.12665463984012604, + "loss_ib": 0.004861657042056322, + "step": 3769 + }, + { + "epoch": 1.083974405061471, + "grad_norm": 0.18099313974380493, + "learning_rate": 7.396244933600285e-06, + "loss": 0.8931, + "step": 3770 + }, + { + "ce_ib": 3.255861282348633, + "ce_orig": 0.8512479662895203, + "epoch": 1.083974405061471, + "kl_loss": 0.2094070315361023, + "loss_ib": 0.0053499317727983, + "step": 3770 + }, + { + "ce_ib": 2.8719582557678223, + "ce_orig": 0.8408315181732178, + "epoch": 1.083974405061471, + "kl_loss": 0.19613993167877197, + "loss_ib": 0.004833357408642769, + "step": 3770 + }, + { + "ce_ib": 2.4612371921539307, + "ce_orig": 0.4789428412914276, + "epoch": 1.083974405061471, + "kl_loss": 0.17800505459308624, + "loss_ib": 0.004241287708282471, + "step": 3770 + }, + { + "ce_ib": 5.3728413581848145, + "ce_orig": 1.3350553512573242, + "epoch": 1.083974405061471, + "kl_loss": 0.1536901295185089, + "loss_ib": 0.006909742020070553, + "step": 3770 + }, + { + "ce_ib": 5.73849630355835, + "ce_orig": 0.9650236964225769, + "epoch": 1.0842619886404485, + "kl_loss": 0.1861943155527115, + "loss_ib": 0.0076004392467439175, + "step": 3771 + }, + { + "ce_ib": 2.582465171813965, + "ce_orig": 0.671971321105957, + "epoch": 1.0842619886404485, + "kl_loss": 0.15145626664161682, + "loss_ib": 0.004097027704119682, + "step": 3771 + }, + { + "ce_ib": 4.108642101287842, + "ce_orig": 0.9632114768028259, + "epoch": 1.0842619886404485, + "kl_loss": 0.17023606598377228, + "loss_ib": 0.005811002571135759, + "step": 3771 + }, + { + "ce_ib": 5.785202980041504, + "ce_orig": 1.4514391422271729, + "epoch": 1.0842619886404485, + "kl_loss": 0.15749591588974, + "loss_ib": 0.0073601617477834225, + "step": 3771 + }, + { + "ce_ib": 4.182226181030273, + "ce_orig": 0.9014868140220642, + "epoch": 1.0845495722194263, + "kl_loss": 0.16131699085235596, + "loss_ib": 0.005795396398752928, + "step": 3772 + }, + { + "ce_ib": 2.007551431655884, + "ce_orig": 0.8391512036323547, + "epoch": 1.0845495722194263, + "kl_loss": 0.0999622493982315, + "loss_ib": 0.003007174003869295, + "step": 3772 + }, + { + "ce_ib": 5.157216548919678, + "ce_orig": 1.4348576068878174, + "epoch": 1.0845495722194263, + "kl_loss": 0.1767241358757019, + "loss_ib": 0.006924457848072052, + "step": 3772 + }, + { + "ce_ib": 7.751264572143555, + "ce_orig": 1.8635026216506958, + "epoch": 1.0845495722194263, + "kl_loss": 0.23340418934822083, + "loss_ib": 0.010085306130349636, + "step": 3772 + }, + { + "ce_ib": 2.8423798084259033, + "ce_orig": 0.9161777496337891, + "epoch": 1.084837155798404, + "kl_loss": 0.17654438316822052, + "loss_ib": 0.004607823211699724, + "step": 3773 + }, + { + "ce_ib": 4.088259220123291, + "ce_orig": 0.9913687109947205, + "epoch": 1.084837155798404, + "kl_loss": 0.1671236753463745, + "loss_ib": 0.00575949577614665, + "step": 3773 + }, + { + "ce_ib": 5.370787620544434, + "ce_orig": 0.8220592141151428, + "epoch": 1.084837155798404, + "kl_loss": 0.20335154235363007, + "loss_ib": 0.007404303178191185, + "step": 3773 + }, + { + "ce_ib": 6.120460033416748, + "ce_orig": 1.5401954650878906, + "epoch": 1.084837155798404, + "kl_loss": 0.20170417428016663, + "loss_ib": 0.00813750084489584, + "step": 3773 + }, + { + "ce_ib": 1.8806442022323608, + "ce_orig": 0.5767578482627869, + "epoch": 1.0851247393773815, + "kl_loss": 0.13503405451774597, + "loss_ib": 0.003230984788388014, + "step": 3774 + }, + { + "ce_ib": 3.5313284397125244, + "ce_orig": 1.0947511196136475, + "epoch": 1.0851247393773815, + "kl_loss": 0.10595063865184784, + "loss_ib": 0.004590834956616163, + "step": 3774 + }, + { + "ce_ib": 1.6668500900268555, + "ce_orig": 0.9857330322265625, + "epoch": 1.0851247393773815, + "kl_loss": 0.4986911416053772, + "loss_ib": 0.006653761491179466, + "step": 3774 + }, + { + "ce_ib": 5.891106128692627, + "ce_orig": 1.0311921834945679, + "epoch": 1.0851247393773815, + "kl_loss": 0.1900380551815033, + "loss_ib": 0.007791487034410238, + "step": 3774 + }, + { + "epoch": 1.0854123229563593, + "grad_norm": 0.20588375627994537, + "learning_rate": 7.389430513550438e-06, + "loss": 0.9031, + "step": 3775 + }, + { + "ce_ib": 2.760657548904419, + "ce_orig": 0.6043491959571838, + "epoch": 1.0854123229563593, + "kl_loss": 0.10278744250535965, + "loss_ib": 0.003788531990721822, + "step": 3775 + }, + { + "ce_ib": 5.087459564208984, + "ce_orig": 0.9740437269210815, + "epoch": 1.0854123229563593, + "kl_loss": 0.14465782046318054, + "loss_ib": 0.00653403764590621, + "step": 3775 + }, + { + "ce_ib": 2.9794087409973145, + "ce_orig": 0.8607528209686279, + "epoch": 1.0854123229563593, + "kl_loss": 0.22184520959854126, + "loss_ib": 0.005197861231863499, + "step": 3775 + }, + { + "ce_ib": 2.4120290279388428, + "ce_orig": 0.6582555174827576, + "epoch": 1.0854123229563593, + "kl_loss": 0.1739373505115509, + "loss_ib": 0.00415140250697732, + "step": 3775 + }, + { + "ce_ib": 5.3129072189331055, + "ce_orig": 1.251781702041626, + "epoch": 1.0856999065353368, + "kl_loss": 0.17126673460006714, + "loss_ib": 0.007025574333965778, + "step": 3776 + }, + { + "ce_ib": 1.5969635248184204, + "ce_orig": 0.45147019624710083, + "epoch": 1.0856999065353368, + "kl_loss": 0.27599889039993286, + "loss_ib": 0.0043569523841142654, + "step": 3776 + }, + { + "ce_ib": 4.306368350982666, + "ce_orig": 1.3252615928649902, + "epoch": 1.0856999065353368, + "kl_loss": 0.24614036083221436, + "loss_ib": 0.006767772138118744, + "step": 3776 + }, + { + "ce_ib": 4.397464752197266, + "ce_orig": 1.0870826244354248, + "epoch": 1.0856999065353368, + "kl_loss": 0.13599233329296112, + "loss_ib": 0.005757387727499008, + "step": 3776 + }, + { + "ce_ib": 3.74137806892395, + "ce_orig": 1.0400974750518799, + "epoch": 1.0859874901143145, + "kl_loss": 0.15225550532341003, + "loss_ib": 0.005263932980597019, + "step": 3777 + }, + { + "ce_ib": 3.2651898860931396, + "ce_orig": 0.8442232608795166, + "epoch": 1.0859874901143145, + "kl_loss": 0.20916229486465454, + "loss_ib": 0.005356812383979559, + "step": 3777 + }, + { + "ce_ib": 2.802668333053589, + "ce_orig": 0.6875386238098145, + "epoch": 1.0859874901143145, + "kl_loss": 0.19768153131008148, + "loss_ib": 0.004779483657330275, + "step": 3777 + }, + { + "ce_ib": 3.0318450927734375, + "ce_orig": 0.4373438060283661, + "epoch": 1.0859874901143145, + "kl_loss": 0.23739303648471832, + "loss_ib": 0.005405775271356106, + "step": 3777 + }, + { + "ce_ib": 6.711424350738525, + "ce_orig": 1.8425583839416504, + "epoch": 1.086275073693292, + "kl_loss": 0.24512895941734314, + "loss_ib": 0.009162714704871178, + "step": 3778 + }, + { + "ce_ib": 4.012652397155762, + "ce_orig": 0.8049220442771912, + "epoch": 1.086275073693292, + "kl_loss": 0.18133187294006348, + "loss_ib": 0.005825971253216267, + "step": 3778 + }, + { + "ce_ib": 1.649009346961975, + "ce_orig": 0.4991517961025238, + "epoch": 1.086275073693292, + "kl_loss": 0.13338741660118103, + "loss_ib": 0.002982883481308818, + "step": 3778 + }, + { + "ce_ib": 4.2566351890563965, + "ce_orig": 0.830081582069397, + "epoch": 1.086275073693292, + "kl_loss": 0.12675149738788605, + "loss_ib": 0.005524150095880032, + "step": 3778 + }, + { + "ce_ib": 2.116013288497925, + "ce_orig": 0.4127202332019806, + "epoch": 1.0865626572722697, + "kl_loss": 0.2818816006183624, + "loss_ib": 0.004934829194098711, + "step": 3779 + }, + { + "ce_ib": 5.832136631011963, + "ce_orig": 1.4509003162384033, + "epoch": 1.0865626572722697, + "kl_loss": 0.18448209762573242, + "loss_ib": 0.0076769576407969, + "step": 3779 + }, + { + "ce_ib": 2.6202452182769775, + "ce_orig": 0.589324951171875, + "epoch": 1.0865626572722697, + "kl_loss": 0.12847338616847992, + "loss_ib": 0.003904979210346937, + "step": 3779 + }, + { + "ce_ib": 3.4796032905578613, + "ce_orig": 0.5960392355918884, + "epoch": 1.0865626572722697, + "kl_loss": 0.3328614830970764, + "loss_ib": 0.006808217614889145, + "step": 3779 + }, + { + "epoch": 1.0868502408512475, + "grad_norm": 0.13409268856048584, + "learning_rate": 7.382610336807887e-06, + "loss": 0.9121, + "step": 3780 + }, + { + "ce_ib": 2.6283624172210693, + "ce_orig": 0.7633141875267029, + "epoch": 1.0868502408512475, + "kl_loss": 0.10517530143260956, + "loss_ib": 0.003680115332826972, + "step": 3780 + }, + { + "ce_ib": 4.966353893280029, + "ce_orig": 0.743020236492157, + "epoch": 1.0868502408512475, + "kl_loss": 0.4816039800643921, + "loss_ib": 0.009782394394278526, + "step": 3780 + }, + { + "ce_ib": 5.406623840332031, + "ce_orig": 1.5958969593048096, + "epoch": 1.0868502408512475, + "kl_loss": 0.16281463205814362, + "loss_ib": 0.007034770213067532, + "step": 3780 + }, + { + "ce_ib": 5.5648512840271, + "ce_orig": 1.208393931388855, + "epoch": 1.0868502408512475, + "kl_loss": 0.23848393559455872, + "loss_ib": 0.007949690334498882, + "step": 3780 + }, + { + "ce_ib": 4.2656731605529785, + "ce_orig": 1.1610361337661743, + "epoch": 1.087137824430225, + "kl_loss": 0.18351960182189941, + "loss_ib": 0.00610086927190423, + "step": 3781 + }, + { + "ce_ib": 2.6956987380981445, + "ce_orig": 0.6019343733787537, + "epoch": 1.087137824430225, + "kl_loss": 0.197714701294899, + "loss_ib": 0.004672845359891653, + "step": 3781 + }, + { + "ce_ib": 3.466078281402588, + "ce_orig": 0.6429448127746582, + "epoch": 1.087137824430225, + "kl_loss": 0.18007300794124603, + "loss_ib": 0.005266807973384857, + "step": 3781 + }, + { + "ce_ib": 4.980103015899658, + "ce_orig": 1.2327024936676025, + "epoch": 1.087137824430225, + "kl_loss": 0.15295220911502838, + "loss_ib": 0.006509625352919102, + "step": 3781 + }, + { + "ce_ib": 4.6514201164245605, + "ce_orig": 1.1536554098129272, + "epoch": 1.0874254080092027, + "kl_loss": 0.1879802942276001, + "loss_ib": 0.006531222723424435, + "step": 3782 + }, + { + "ce_ib": 2.7603719234466553, + "ce_orig": 0.5962186455726624, + "epoch": 1.0874254080092027, + "kl_loss": 0.15867537260055542, + "loss_ib": 0.00434712553396821, + "step": 3782 + }, + { + "ce_ib": 2.487295389175415, + "ce_orig": 0.44564104080200195, + "epoch": 1.0874254080092027, + "kl_loss": 0.22449778020381927, + "loss_ib": 0.004732273053377867, + "step": 3782 + }, + { + "ce_ib": 3.847682237625122, + "ce_orig": 0.4910390079021454, + "epoch": 1.0874254080092027, + "kl_loss": 0.18126800656318665, + "loss_ib": 0.00566036207601428, + "step": 3782 + }, + { + "ce_ib": 5.222685813903809, + "ce_orig": 1.1683218479156494, + "epoch": 1.0877129915881802, + "kl_loss": 0.1613704264163971, + "loss_ib": 0.006836390122771263, + "step": 3783 + }, + { + "ce_ib": 2.4890384674072266, + "ce_orig": 0.6417744755744934, + "epoch": 1.0877129915881802, + "kl_loss": 0.5651209950447083, + "loss_ib": 0.008140248246490955, + "step": 3783 + }, + { + "ce_ib": 4.193533420562744, + "ce_orig": 0.7462524175643921, + "epoch": 1.0877129915881802, + "kl_loss": 0.20505495369434357, + "loss_ib": 0.006244082935154438, + "step": 3783 + }, + { + "ce_ib": 5.2608747482299805, + "ce_orig": 1.403700351715088, + "epoch": 1.0877129915881802, + "kl_loss": 0.18028691411018372, + "loss_ib": 0.007063744124025106, + "step": 3783 + }, + { + "ce_ib": 3.7410950660705566, + "ce_orig": 0.9581707119941711, + "epoch": 1.088000575167158, + "kl_loss": 0.16720065474510193, + "loss_ib": 0.005413101986050606, + "step": 3784 + }, + { + "ce_ib": 3.8231818675994873, + "ce_orig": 0.7691629528999329, + "epoch": 1.088000575167158, + "kl_loss": 0.20327726006507874, + "loss_ib": 0.005855954717844725, + "step": 3784 + }, + { + "ce_ib": 5.082459926605225, + "ce_orig": 1.3260232210159302, + "epoch": 1.088000575167158, + "kl_loss": 0.11223748326301575, + "loss_ib": 0.0062048351392149925, + "step": 3784 + }, + { + "ce_ib": 5.2392778396606445, + "ce_orig": 1.3136769533157349, + "epoch": 1.088000575167158, + "kl_loss": 0.22067949175834656, + "loss_ib": 0.007446072995662689, + "step": 3784 + }, + { + "epoch": 1.0882881587461355, + "grad_norm": 0.14656592905521393, + "learning_rate": 7.375784419804018e-06, + "loss": 0.8218, + "step": 3785 + }, + { + "ce_ib": 2.753603458404541, + "ce_orig": 0.6730973124504089, + "epoch": 1.0882881587461355, + "kl_loss": 0.16148145496845245, + "loss_ib": 0.004368417896330357, + "step": 3785 + }, + { + "ce_ib": 3.1144120693206787, + "ce_orig": 0.902428388595581, + "epoch": 1.0882881587461355, + "kl_loss": 0.1297929883003235, + "loss_ib": 0.004412341862916946, + "step": 3785 + }, + { + "ce_ib": 2.8799171447753906, + "ce_orig": 0.4490915536880493, + "epoch": 1.0882881587461355, + "kl_loss": 0.17664355039596558, + "loss_ib": 0.004646352492272854, + "step": 3785 + }, + { + "ce_ib": 3.4119913578033447, + "ce_orig": 1.0593268871307373, + "epoch": 1.0882881587461355, + "kl_loss": 0.11400148272514343, + "loss_ib": 0.004552006255835295, + "step": 3785 + }, + { + "ce_ib": 5.9808349609375, + "ce_orig": 1.8477864265441895, + "epoch": 1.0885757423251132, + "kl_loss": 0.19030185043811798, + "loss_ib": 0.007883853279054165, + "step": 3786 + }, + { + "ce_ib": 3.797727108001709, + "ce_orig": 1.1733660697937012, + "epoch": 1.0885757423251132, + "kl_loss": 0.19315466284751892, + "loss_ib": 0.005729273427277803, + "step": 3786 + }, + { + "ce_ib": 3.8803510665893555, + "ce_orig": 1.2508198022842407, + "epoch": 1.0885757423251132, + "kl_loss": 0.1793135702610016, + "loss_ib": 0.005673487205058336, + "step": 3786 + }, + { + "ce_ib": 5.828835964202881, + "ce_orig": 1.439537525177002, + "epoch": 1.0885757423251132, + "kl_loss": 0.1488926112651825, + "loss_ib": 0.007317762356251478, + "step": 3786 + }, + { + "ce_ib": 4.025831699371338, + "ce_orig": 1.0181071758270264, + "epoch": 1.088863325904091, + "kl_loss": 0.17312151193618774, + "loss_ib": 0.005757046863436699, + "step": 3787 + }, + { + "ce_ib": 2.8795862197875977, + "ce_orig": 0.747184157371521, + "epoch": 1.088863325904091, + "kl_loss": 0.25961363315582275, + "loss_ib": 0.005475722253322601, + "step": 3787 + }, + { + "ce_ib": 5.991549968719482, + "ce_orig": 1.001234531402588, + "epoch": 1.088863325904091, + "kl_loss": 0.23861105740070343, + "loss_ib": 0.008377660065889359, + "step": 3787 + }, + { + "ce_ib": 4.077173709869385, + "ce_orig": 0.9553885459899902, + "epoch": 1.088863325904091, + "kl_loss": 0.11611989140510559, + "loss_ib": 0.00523837236687541, + "step": 3787 + }, + { + "ce_ib": 2.829453229904175, + "ce_orig": 0.6482143402099609, + "epoch": 1.0891509094830685, + "kl_loss": 0.20583447813987732, + "loss_ib": 0.004887797869741917, + "step": 3788 + }, + { + "ce_ib": 2.430006742477417, + "ce_orig": 0.4935525059700012, + "epoch": 1.0891509094830685, + "kl_loss": 0.16222432255744934, + "loss_ib": 0.00405224971473217, + "step": 3788 + }, + { + "ce_ib": 4.801497459411621, + "ce_orig": 1.4400296211242676, + "epoch": 1.0891509094830685, + "kl_loss": 0.13291612267494202, + "loss_ib": 0.0061306580901145935, + "step": 3788 + }, + { + "ce_ib": 2.612818717956543, + "ce_orig": 0.6540479063987732, + "epoch": 1.0891509094830685, + "kl_loss": 0.14328166842460632, + "loss_ib": 0.004045635461807251, + "step": 3788 + }, + { + "ce_ib": 3.94514536857605, + "ce_orig": 1.069631576538086, + "epoch": 1.0894384930620462, + "kl_loss": 0.15030115842819214, + "loss_ib": 0.005448156502097845, + "step": 3789 + }, + { + "ce_ib": 2.251190423965454, + "ce_orig": 0.6705069541931152, + "epoch": 1.0894384930620462, + "kl_loss": 0.0846218466758728, + "loss_ib": 0.0030974089168012142, + "step": 3789 + }, + { + "ce_ib": 5.498021602630615, + "ce_orig": 1.2853094339370728, + "epoch": 1.0894384930620462, + "kl_loss": 0.13670244812965393, + "loss_ib": 0.006865045987069607, + "step": 3789 + }, + { + "ce_ib": 3.2893033027648926, + "ce_orig": 0.7376151084899902, + "epoch": 1.0894384930620462, + "kl_loss": 0.2136763632297516, + "loss_ib": 0.005426066927611828, + "step": 3789 + }, + { + "epoch": 1.0897260766410237, + "grad_norm": 0.1494576781988144, + "learning_rate": 7.368952778984052e-06, + "loss": 0.8607, + "step": 3790 + }, + { + "ce_ib": 3.3915679454803467, + "ce_orig": 0.9119009375572205, + "epoch": 1.0897260766410237, + "kl_loss": 0.17033906280994415, + "loss_ib": 0.00509495846927166, + "step": 3790 + }, + { + "ce_ib": 1.6597801446914673, + "ce_orig": 0.4990796446800232, + "epoch": 1.0897260766410237, + "kl_loss": 0.11440080404281616, + "loss_ib": 0.0028037880547344685, + "step": 3790 + }, + { + "ce_ib": 5.154903411865234, + "ce_orig": 1.3984897136688232, + "epoch": 1.0897260766410237, + "kl_loss": 0.19113534688949585, + "loss_ib": 0.007066256832331419, + "step": 3790 + }, + { + "ce_ib": 7.182960510253906, + "ce_orig": 1.2292873859405518, + "epoch": 1.0897260766410237, + "kl_loss": 0.16312557458877563, + "loss_ib": 0.008814215660095215, + "step": 3790 + }, + { + "ce_ib": 2.3909730911254883, + "ce_orig": 0.45101919770240784, + "epoch": 1.0900136602200015, + "kl_loss": 0.1541544646024704, + "loss_ib": 0.003932517487555742, + "step": 3791 + }, + { + "ce_ib": 2.4186971187591553, + "ce_orig": 0.5826050639152527, + "epoch": 1.0900136602200015, + "kl_loss": 0.16130617260932922, + "loss_ib": 0.00403175875544548, + "step": 3791 + }, + { + "ce_ib": 3.4559268951416016, + "ce_orig": 0.8271056413650513, + "epoch": 1.0900136602200015, + "kl_loss": 0.19790276885032654, + "loss_ib": 0.005434954538941383, + "step": 3791 + }, + { + "ce_ib": 2.626046657562256, + "ce_orig": 0.6138795018196106, + "epoch": 1.0900136602200015, + "kl_loss": 0.1681705117225647, + "loss_ib": 0.004307752009481192, + "step": 3791 + }, + { + "ce_ib": 2.2141306400299072, + "ce_orig": 0.21259380877017975, + "epoch": 1.090301243798979, + "kl_loss": 0.13462136685848236, + "loss_ib": 0.003560344222933054, + "step": 3792 + }, + { + "ce_ib": 2.4797465801239014, + "ce_orig": 0.7472774982452393, + "epoch": 1.090301243798979, + "kl_loss": 0.13891850411891937, + "loss_ib": 0.0038689314387738705, + "step": 3792 + }, + { + "ce_ib": 4.445582389831543, + "ce_orig": 1.2077752351760864, + "epoch": 1.090301243798979, + "kl_loss": 0.17267659306526184, + "loss_ib": 0.006172348279505968, + "step": 3792 + }, + { + "ce_ib": 2.4929463863372803, + "ce_orig": 0.867741584777832, + "epoch": 1.090301243798979, + "kl_loss": 0.1297568678855896, + "loss_ib": 0.003790515009313822, + "step": 3792 + }, + { + "ce_ib": 6.100893974304199, + "ce_orig": 1.5487650632858276, + "epoch": 1.0905888273779567, + "kl_loss": 0.1300363838672638, + "loss_ib": 0.007401257753372192, + "step": 3793 + }, + { + "ce_ib": 6.270289897918701, + "ce_orig": 1.3469594717025757, + "epoch": 1.0905888273779567, + "kl_loss": 0.15118584036827087, + "loss_ib": 0.007782147731631994, + "step": 3793 + }, + { + "ce_ib": 6.871710777282715, + "ce_orig": 1.689699649810791, + "epoch": 1.0905888273779567, + "kl_loss": 0.1984419971704483, + "loss_ib": 0.008856130763888359, + "step": 3793 + }, + { + "ce_ib": 4.314858436584473, + "ce_orig": 0.5740727782249451, + "epoch": 1.0905888273779567, + "kl_loss": 0.19477403163909912, + "loss_ib": 0.006262599024921656, + "step": 3793 + }, + { + "ce_ib": 3.3253345489501953, + "ce_orig": 0.6329350471496582, + "epoch": 1.0908764109569344, + "kl_loss": 0.17244870960712433, + "loss_ib": 0.005049821455031633, + "step": 3794 + }, + { + "ce_ib": 3.4140121936798096, + "ce_orig": 0.8457044959068298, + "epoch": 1.0908764109569344, + "kl_loss": 0.23346492648124695, + "loss_ib": 0.0057486617006361485, + "step": 3794 + }, + { + "ce_ib": 4.566333293914795, + "ce_orig": 1.29512357711792, + "epoch": 1.0908764109569344, + "kl_loss": 0.17769183218479156, + "loss_ib": 0.006343251094222069, + "step": 3794 + }, + { + "ce_ib": 1.2281403541564941, + "ce_orig": 0.32986998558044434, + "epoch": 1.0908764109569344, + "kl_loss": 0.09700661897659302, + "loss_ib": 0.0021982064936310053, + "step": 3794 + }, + { + "epoch": 1.091163994535912, + "grad_norm": 0.13813412189483643, + "learning_rate": 7.362115430806994e-06, + "loss": 0.8381, + "step": 3795 + }, + { + "ce_ib": 4.772525310516357, + "ce_orig": 0.9355142116546631, + "epoch": 1.091163994535912, + "kl_loss": 0.23038767278194427, + "loss_ib": 0.007076402194797993, + "step": 3795 + }, + { + "ce_ib": 4.283401966094971, + "ce_orig": 1.0402547121047974, + "epoch": 1.091163994535912, + "kl_loss": 0.12336242944002151, + "loss_ib": 0.0055170259438455105, + "step": 3795 + }, + { + "ce_ib": 1.217177152633667, + "ce_orig": 0.3937131464481354, + "epoch": 1.091163994535912, + "kl_loss": 0.10618780553340912, + "loss_ib": 0.0022790550719946623, + "step": 3795 + }, + { + "ce_ib": 3.736787796020508, + "ce_orig": 0.576839804649353, + "epoch": 1.091163994535912, + "kl_loss": 0.21948683261871338, + "loss_ib": 0.005931656341999769, + "step": 3795 + }, + { + "ce_ib": 3.010023832321167, + "ce_orig": 0.7147043347358704, + "epoch": 1.0914515781148897, + "kl_loss": 0.1909191608428955, + "loss_ib": 0.004919215105473995, + "step": 3796 + }, + { + "ce_ib": 3.382622718811035, + "ce_orig": 0.81058269739151, + "epoch": 1.0914515781148897, + "kl_loss": 0.13315826654434204, + "loss_ib": 0.0047142053954303265, + "step": 3796 + }, + { + "ce_ib": 2.014786958694458, + "ce_orig": 0.5832642316818237, + "epoch": 1.0914515781148897, + "kl_loss": 0.1642099916934967, + "loss_ib": 0.0036568867508322, + "step": 3796 + }, + { + "ce_ib": 1.4610683917999268, + "ce_orig": 0.4413277506828308, + "epoch": 1.0914515781148897, + "kl_loss": 0.08810869604349136, + "loss_ib": 0.0023421552032232285, + "step": 3796 + }, + { + "ce_ib": 1.7479338645935059, + "ce_orig": 0.41405484080314636, + "epoch": 1.0917391616938672, + "kl_loss": 0.1297948956489563, + "loss_ib": 0.003045882796868682, + "step": 3797 + }, + { + "ce_ib": 5.363791465759277, + "ce_orig": 0.9853528141975403, + "epoch": 1.0917391616938672, + "kl_loss": 0.18760238587856293, + "loss_ib": 0.007239815313369036, + "step": 3797 + }, + { + "ce_ib": 6.440741539001465, + "ce_orig": 0.7967831492424011, + "epoch": 1.0917391616938672, + "kl_loss": 0.1953572928905487, + "loss_ib": 0.008394313976168633, + "step": 3797 + }, + { + "ce_ib": 2.8783156871795654, + "ce_orig": 0.6038665771484375, + "epoch": 1.0917391616938672, + "kl_loss": 0.1881435066461563, + "loss_ib": 0.004759750794619322, + "step": 3797 + }, + { + "ce_ib": 1.7972525358200073, + "ce_orig": 0.6987202763557434, + "epoch": 1.092026745272845, + "kl_loss": 0.11092875152826309, + "loss_ib": 0.0029065399430692196, + "step": 3798 + }, + { + "ce_ib": 2.4328620433807373, + "ce_orig": 0.7651538848876953, + "epoch": 1.092026745272845, + "kl_loss": 0.11498149484395981, + "loss_ib": 0.003582676872611046, + "step": 3798 + }, + { + "ce_ib": 3.8543667793273926, + "ce_orig": 0.8128284215927124, + "epoch": 1.092026745272845, + "kl_loss": 0.17181788384914398, + "loss_ib": 0.00557254534214735, + "step": 3798 + }, + { + "ce_ib": 3.733863115310669, + "ce_orig": 0.5844415426254272, + "epoch": 1.092026745272845, + "kl_loss": 0.2795768976211548, + "loss_ib": 0.006529632024466991, + "step": 3798 + }, + { + "ce_ib": 2.376267194747925, + "ce_orig": 0.77012699842453, + "epoch": 1.0923143288518227, + "kl_loss": 0.11457814276218414, + "loss_ib": 0.003522048471495509, + "step": 3799 + }, + { + "ce_ib": 4.102505207061768, + "ce_orig": 0.9015953540802002, + "epoch": 1.0923143288518227, + "kl_loss": 0.20204704999923706, + "loss_ib": 0.006122975144535303, + "step": 3799 + }, + { + "ce_ib": 3.2045233249664307, + "ce_orig": 0.8556556701660156, + "epoch": 1.0923143288518227, + "kl_loss": 0.14263826608657837, + "loss_ib": 0.004630906041711569, + "step": 3799 + }, + { + "ce_ib": 3.0889809131622314, + "ce_orig": 0.879136323928833, + "epoch": 1.0923143288518227, + "kl_loss": 0.19633561372756958, + "loss_ib": 0.00505233695730567, + "step": 3799 + }, + { + "epoch": 1.0926019124308002, + "grad_norm": 0.15909484028816223, + "learning_rate": 7.355272391745605e-06, + "loss": 0.8043, + "step": 3800 + }, + { + "ce_ib": 3.510735511779785, + "ce_orig": 1.0193068981170654, + "epoch": 1.0926019124308002, + "kl_loss": 0.13227315247058868, + "loss_ib": 0.004833466839045286, + "step": 3800 + }, + { + "ce_ib": 3.476825475692749, + "ce_orig": 1.147905945777893, + "epoch": 1.0926019124308002, + "kl_loss": 0.12124013900756836, + "loss_ib": 0.004689226858317852, + "step": 3800 + }, + { + "ce_ib": 3.210724353790283, + "ce_orig": 0.7334904074668884, + "epoch": 1.0926019124308002, + "kl_loss": 0.21804502606391907, + "loss_ib": 0.005391174461692572, + "step": 3800 + }, + { + "ce_ib": 4.287133693695068, + "ce_orig": 1.4149543046951294, + "epoch": 1.0926019124308002, + "kl_loss": 0.16286315023899078, + "loss_ib": 0.005915765184909105, + "step": 3800 + }, + { + "ce_ib": 1.1724491119384766, + "ce_orig": 0.2541761100292206, + "epoch": 1.092889496009778, + "kl_loss": 0.25344401597976685, + "loss_ib": 0.003706888994202018, + "step": 3801 + }, + { + "ce_ib": 4.435404300689697, + "ce_orig": 1.0700098276138306, + "epoch": 1.092889496009778, + "kl_loss": 0.1991366744041443, + "loss_ib": 0.006426770705729723, + "step": 3801 + }, + { + "ce_ib": 4.327320098876953, + "ce_orig": 0.9483024477958679, + "epoch": 1.092889496009778, + "kl_loss": 0.13703006505966187, + "loss_ib": 0.0056976210325956345, + "step": 3801 + }, + { + "ce_ib": 3.510612726211548, + "ce_orig": 0.7401843070983887, + "epoch": 1.092889496009778, + "kl_loss": 0.23226341605186462, + "loss_ib": 0.005833246745169163, + "step": 3801 + }, + { + "ce_ib": 1.678465723991394, + "ce_orig": 0.38102850317955017, + "epoch": 1.0931770795887554, + "kl_loss": 0.09406688064336777, + "loss_ib": 0.002619134495034814, + "step": 3802 + }, + { + "ce_ib": 5.993425369262695, + "ce_orig": 1.346732497215271, + "epoch": 1.0931770795887554, + "kl_loss": 0.21466007828712463, + "loss_ib": 0.008140026591718197, + "step": 3802 + }, + { + "ce_ib": 4.112119197845459, + "ce_orig": 0.8359326720237732, + "epoch": 1.0931770795887554, + "kl_loss": 0.16863113641738892, + "loss_ib": 0.005798430182039738, + "step": 3802 + }, + { + "ce_ib": 3.1721572875976562, + "ce_orig": 0.7078550457954407, + "epoch": 1.0931770795887554, + "kl_loss": 0.15611472725868225, + "loss_ib": 0.004733304493129253, + "step": 3802 + }, + { + "ce_ib": 4.486536979675293, + "ce_orig": 0.8469123244285583, + "epoch": 1.0934646631677332, + "kl_loss": 0.19788885116577148, + "loss_ib": 0.006465425249189138, + "step": 3803 + }, + { + "ce_ib": 3.5149009227752686, + "ce_orig": 0.8108443021774292, + "epoch": 1.0934646631677332, + "kl_loss": 0.14876484870910645, + "loss_ib": 0.005002549849450588, + "step": 3803 + }, + { + "ce_ib": 1.8476144075393677, + "ce_orig": 0.756817638874054, + "epoch": 1.0934646631677332, + "kl_loss": 0.1378476619720459, + "loss_ib": 0.003226091153919697, + "step": 3803 + }, + { + "ce_ib": 0.9337911605834961, + "ce_orig": 0.3330782949924469, + "epoch": 1.0934646631677332, + "kl_loss": 0.1161806508898735, + "loss_ib": 0.0020955975633114576, + "step": 3803 + }, + { + "ce_ib": 4.159041881561279, + "ce_orig": 1.0524909496307373, + "epoch": 1.0937522467467107, + "kl_loss": 0.1859319508075714, + "loss_ib": 0.006018361542373896, + "step": 3804 + }, + { + "ce_ib": 4.206572532653809, + "ce_orig": 1.0388917922973633, + "epoch": 1.0937522467467107, + "kl_loss": 0.16547319293022156, + "loss_ib": 0.005861304234713316, + "step": 3804 + }, + { + "ce_ib": 5.021503448486328, + "ce_orig": 1.0408337116241455, + "epoch": 1.0937522467467107, + "kl_loss": 0.15119564533233643, + "loss_ib": 0.006533459760248661, + "step": 3804 + }, + { + "ce_ib": 2.454841375350952, + "ce_orig": 0.4659830629825592, + "epoch": 1.0937522467467107, + "kl_loss": 0.217985600233078, + "loss_ib": 0.0046346974559128284, + "step": 3804 + }, + { + "epoch": 1.0940398303256884, + "grad_norm": 0.1352337747812271, + "learning_rate": 7.348423678286354e-06, + "loss": 0.8471, + "step": 3805 + }, + { + "ce_ib": 2.3786373138427734, + "ce_orig": 0.7033153176307678, + "epoch": 1.0940398303256884, + "kl_loss": 0.19415795803070068, + "loss_ib": 0.004320216830819845, + "step": 3805 + }, + { + "ce_ib": 1.5033239126205444, + "ce_orig": 0.24374279379844666, + "epoch": 1.0940398303256884, + "kl_loss": 0.17801907658576965, + "loss_ib": 0.003283514641225338, + "step": 3805 + }, + { + "ce_ib": 2.594611167907715, + "ce_orig": 0.9169961810112, + "epoch": 1.0940398303256884, + "kl_loss": 0.10581228882074356, + "loss_ib": 0.0036527339834719896, + "step": 3805 + }, + { + "ce_ib": 1.9996734857559204, + "ce_orig": 0.6255030035972595, + "epoch": 1.0940398303256884, + "kl_loss": 0.14188919961452484, + "loss_ib": 0.003418565262109041, + "step": 3805 + }, + { + "ce_ib": 3.0977349281311035, + "ce_orig": 0.7293618321418762, + "epoch": 1.0943274139046661, + "kl_loss": 0.28539010882377625, + "loss_ib": 0.005951635539531708, + "step": 3806 + }, + { + "ce_ib": 3.0656960010528564, + "ce_orig": 0.8021844029426575, + "epoch": 1.0943274139046661, + "kl_loss": 0.2583335041999817, + "loss_ib": 0.005649031139910221, + "step": 3806 + }, + { + "ce_ib": 2.9686877727508545, + "ce_orig": 0.7376910448074341, + "epoch": 1.0943274139046661, + "kl_loss": 0.14315173029899597, + "loss_ib": 0.0044002048671245575, + "step": 3806 + }, + { + "ce_ib": 3.4967992305755615, + "ce_orig": 0.4422488212585449, + "epoch": 1.0943274139046661, + "kl_loss": 0.1860661804676056, + "loss_ib": 0.005357461050152779, + "step": 3806 + }, + { + "ce_ib": 5.120697498321533, + "ce_orig": 1.1273343563079834, + "epoch": 1.0946149974836437, + "kl_loss": 0.2235177457332611, + "loss_ib": 0.00735587440431118, + "step": 3807 + }, + { + "ce_ib": 4.894208908081055, + "ce_orig": 1.2485394477844238, + "epoch": 1.0946149974836437, + "kl_loss": 0.19767212867736816, + "loss_ib": 0.006870930083096027, + "step": 3807 + }, + { + "ce_ib": 2.432408094406128, + "ce_orig": 0.526232898235321, + "epoch": 1.0946149974836437, + "kl_loss": 0.10686393082141876, + "loss_ib": 0.0035010473802685738, + "step": 3807 + }, + { + "ce_ib": 2.120229721069336, + "ce_orig": 0.6616792678833008, + "epoch": 1.0946149974836437, + "kl_loss": 0.2268231213092804, + "loss_ib": 0.004388460889458656, + "step": 3807 + }, + { + "ce_ib": 4.060461521148682, + "ce_orig": 1.2555043697357178, + "epoch": 1.0949025810626214, + "kl_loss": 0.19738709926605225, + "loss_ib": 0.006034332327544689, + "step": 3808 + }, + { + "ce_ib": 2.1478140354156494, + "ce_orig": 0.5537099838256836, + "epoch": 1.0949025810626214, + "kl_loss": 0.17313799262046814, + "loss_ib": 0.003879193915054202, + "step": 3808 + }, + { + "ce_ib": 3.258403778076172, + "ce_orig": 0.8989300727844238, + "epoch": 1.0949025810626214, + "kl_loss": 0.13662904500961304, + "loss_ib": 0.004624694120138884, + "step": 3808 + }, + { + "ce_ib": 2.355712652206421, + "ce_orig": 0.5872724652290344, + "epoch": 1.0949025810626214, + "kl_loss": 0.13585087656974792, + "loss_ib": 0.0037142212968319654, + "step": 3808 + }, + { + "ce_ib": 2.8481285572052, + "ce_orig": 0.7013823986053467, + "epoch": 1.095190164641599, + "kl_loss": 0.1531846821308136, + "loss_ib": 0.004379975609481335, + "step": 3809 + }, + { + "ce_ib": 1.9710023403167725, + "ce_orig": 0.5569084286689758, + "epoch": 1.095190164641599, + "kl_loss": 0.1210094690322876, + "loss_ib": 0.00318109686486423, + "step": 3809 + }, + { + "ce_ib": 2.9295942783355713, + "ce_orig": 0.7677730917930603, + "epoch": 1.095190164641599, + "kl_loss": 0.19676437973976135, + "loss_ib": 0.004897237755358219, + "step": 3809 + }, + { + "ce_ib": 4.43642520904541, + "ce_orig": 0.9632907509803772, + "epoch": 1.095190164641599, + "kl_loss": 0.16549943387508392, + "loss_ib": 0.006091419141739607, + "step": 3809 + }, + { + "epoch": 1.0954777482205766, + "grad_norm": 0.14837682247161865, + "learning_rate": 7.341569306929381e-06, + "loss": 0.8132, + "step": 3810 + }, + { + "ce_ib": 3.1500556468963623, + "ce_orig": 0.9244651794433594, + "epoch": 1.0954777482205766, + "kl_loss": 0.12191156297922134, + "loss_ib": 0.0043691713362932205, + "step": 3810 + }, + { + "ce_ib": 6.899845600128174, + "ce_orig": 1.1539027690887451, + "epoch": 1.0954777482205766, + "kl_loss": 0.147256001830101, + "loss_ib": 0.008372405543923378, + "step": 3810 + }, + { + "ce_ib": 5.939385890960693, + "ce_orig": 1.6178550720214844, + "epoch": 1.0954777482205766, + "kl_loss": 0.1698790192604065, + "loss_ib": 0.007638175971806049, + "step": 3810 + }, + { + "ce_ib": 2.0140981674194336, + "ce_orig": 0.46876972913742065, + "epoch": 1.0954777482205766, + "kl_loss": 0.1024370789527893, + "loss_ib": 0.0030384690035134554, + "step": 3810 + }, + { + "ce_ib": 3.2461612224578857, + "ce_orig": 0.6881197094917297, + "epoch": 1.0957653317995542, + "kl_loss": 0.16089197993278503, + "loss_ib": 0.004855080973356962, + "step": 3811 + }, + { + "ce_ib": 2.919726610183716, + "ce_orig": 0.34897056221961975, + "epoch": 1.0957653317995542, + "kl_loss": 0.32883980870246887, + "loss_ib": 0.006208124570548534, + "step": 3811 + }, + { + "ce_ib": 1.8372036218643188, + "ce_orig": 0.39027702808380127, + "epoch": 1.0957653317995542, + "kl_loss": 0.13759467005729675, + "loss_ib": 0.0032131504267454147, + "step": 3811 + }, + { + "ce_ib": 4.419013977050781, + "ce_orig": 0.8939924836158752, + "epoch": 1.0957653317995542, + "kl_loss": 0.311897337436676, + "loss_ib": 0.00753798708319664, + "step": 3811 + }, + { + "ce_ib": 2.8660519123077393, + "ce_orig": 0.8226084113121033, + "epoch": 1.096052915378532, + "kl_loss": 0.10645022243261337, + "loss_ib": 0.003930554259568453, + "step": 3812 + }, + { + "ce_ib": 3.554245948791504, + "ce_orig": 0.7777566313743591, + "epoch": 1.096052915378532, + "kl_loss": 0.21770545840263367, + "loss_ib": 0.005731300450861454, + "step": 3812 + }, + { + "ce_ib": 1.9700065851211548, + "ce_orig": 0.5795442461967468, + "epoch": 1.096052915378532, + "kl_loss": 0.15914508700370789, + "loss_ib": 0.003561457386240363, + "step": 3812 + }, + { + "ce_ib": 2.294074058532715, + "ce_orig": 0.5706264972686768, + "epoch": 1.096052915378532, + "kl_loss": 0.19766655564308167, + "loss_ib": 0.004270739387720823, + "step": 3812 + }, + { + "ce_ib": 3.253023862838745, + "ce_orig": 0.8102407455444336, + "epoch": 1.0963404989575096, + "kl_loss": 0.2626744508743286, + "loss_ib": 0.005879768170416355, + "step": 3813 + }, + { + "ce_ib": 2.3831136226654053, + "ce_orig": 0.7283698320388794, + "epoch": 1.0963404989575096, + "kl_loss": 0.10343147069215775, + "loss_ib": 0.0034174283500760794, + "step": 3813 + }, + { + "ce_ib": 2.995816469192505, + "ce_orig": 0.7507858276367188, + "epoch": 1.0963404989575096, + "kl_loss": 0.1698566973209381, + "loss_ib": 0.004694383125752211, + "step": 3813 + }, + { + "ce_ib": 4.658431529998779, + "ce_orig": 0.8944865465164185, + "epoch": 1.0963404989575096, + "kl_loss": 0.1692318618297577, + "loss_ib": 0.006350750103592873, + "step": 3813 + }, + { + "ce_ib": 3.869626998901367, + "ce_orig": 0.7516367435455322, + "epoch": 1.0966280825364871, + "kl_loss": 0.145860493183136, + "loss_ib": 0.005328231956809759, + "step": 3814 + }, + { + "ce_ib": 5.571839332580566, + "ce_orig": 1.4428988695144653, + "epoch": 1.0966280825364871, + "kl_loss": 0.1783178746700287, + "loss_ib": 0.007355017587542534, + "step": 3814 + }, + { + "ce_ib": 2.124284505844116, + "ce_orig": 0.5742679238319397, + "epoch": 1.0966280825364871, + "kl_loss": 0.13155633211135864, + "loss_ib": 0.003439847845584154, + "step": 3814 + }, + { + "ce_ib": 1.5289362668991089, + "ce_orig": 0.4540554881095886, + "epoch": 1.0966280825364871, + "kl_loss": 0.09569922089576721, + "loss_ib": 0.0024859285913407803, + "step": 3814 + }, + { + "epoch": 1.0969156661154649, + "grad_norm": 0.16093896329402924, + "learning_rate": 7.3347092941884575e-06, + "loss": 0.8271, + "step": 3815 + }, + { + "ce_ib": 2.549488067626953, + "ce_orig": 0.66519695520401, + "epoch": 1.0969156661154649, + "kl_loss": 0.15812164545059204, + "loss_ib": 0.004130704794079065, + "step": 3815 + }, + { + "ce_ib": 4.622992515563965, + "ce_orig": 0.8125013709068298, + "epoch": 1.0969156661154649, + "kl_loss": 0.19547820091247559, + "loss_ib": 0.006577773950994015, + "step": 3815 + }, + { + "ce_ib": 3.244433879852295, + "ce_orig": 0.7612243890762329, + "epoch": 1.0969156661154649, + "kl_loss": 0.34278133511543274, + "loss_ib": 0.006672247312963009, + "step": 3815 + }, + { + "ce_ib": 2.4178595542907715, + "ce_orig": 0.4131658375263214, + "epoch": 1.0969156661154649, + "kl_loss": 0.32557836174964905, + "loss_ib": 0.005673643201589584, + "step": 3815 + }, + { + "ce_ib": 4.602787017822266, + "ce_orig": 1.1442619562149048, + "epoch": 1.0972032496944424, + "kl_loss": 0.16123199462890625, + "loss_ib": 0.006215107161551714, + "step": 3816 + }, + { + "ce_ib": 1.9871107339859009, + "ce_orig": 0.44651901721954346, + "epoch": 1.0972032496944424, + "kl_loss": 0.18318985402584076, + "loss_ib": 0.0038190092891454697, + "step": 3816 + }, + { + "ce_ib": 4.785186767578125, + "ce_orig": 0.83150714635849, + "epoch": 1.0972032496944424, + "kl_loss": 0.1805146336555481, + "loss_ib": 0.0065903328359127045, + "step": 3816 + }, + { + "ce_ib": 3.970661163330078, + "ce_orig": 0.6715195178985596, + "epoch": 1.0972032496944424, + "kl_loss": 0.2106669843196869, + "loss_ib": 0.0060773310251533985, + "step": 3816 + }, + { + "ce_ib": 4.096429347991943, + "ce_orig": 1.136529803276062, + "epoch": 1.0974908332734201, + "kl_loss": 0.17534491419792175, + "loss_ib": 0.005849878769367933, + "step": 3817 + }, + { + "ce_ib": 4.759635925292969, + "ce_orig": 1.1024837493896484, + "epoch": 1.0974908332734201, + "kl_loss": 0.23611533641815186, + "loss_ib": 0.007120789028704166, + "step": 3817 + }, + { + "ce_ib": 4.464900016784668, + "ce_orig": 1.1459863185882568, + "epoch": 1.0974908332734201, + "kl_loss": 0.23193033039569855, + "loss_ib": 0.006784203462302685, + "step": 3817 + }, + { + "ce_ib": 2.150792360305786, + "ce_orig": 0.3082568049430847, + "epoch": 1.0974908332734201, + "kl_loss": 0.23014900088310242, + "loss_ib": 0.0044522820971906185, + "step": 3817 + }, + { + "ce_ib": 1.9851088523864746, + "ce_orig": 0.7212237119674683, + "epoch": 1.0977784168523976, + "kl_loss": 0.10880246013402939, + "loss_ib": 0.0030731335282325745, + "step": 3818 + }, + { + "ce_ib": 3.602862596511841, + "ce_orig": 0.9789955615997314, + "epoch": 1.0977784168523976, + "kl_loss": 0.0940154641866684, + "loss_ib": 0.004543017130345106, + "step": 3818 + }, + { + "ce_ib": 3.2626705169677734, + "ce_orig": 1.0870071649551392, + "epoch": 1.0977784168523976, + "kl_loss": 0.2062474936246872, + "loss_ib": 0.0053251455537974834, + "step": 3818 + }, + { + "ce_ib": 3.054359197616577, + "ce_orig": 0.7761207818984985, + "epoch": 1.0977784168523976, + "kl_loss": 0.1706654131412506, + "loss_ib": 0.004761013202369213, + "step": 3818 + }, + { + "ce_ib": 4.2034454345703125, + "ce_orig": 1.0408003330230713, + "epoch": 1.0980660004313754, + "kl_loss": 0.12166087329387665, + "loss_ib": 0.005420053843408823, + "step": 3819 + }, + { + "ce_ib": 2.7360544204711914, + "ce_orig": 0.6341186165809631, + "epoch": 1.0980660004313754, + "kl_loss": 0.1667538583278656, + "loss_ib": 0.004403593018651009, + "step": 3819 + }, + { + "ce_ib": 2.616565465927124, + "ce_orig": 0.7785348892211914, + "epoch": 1.0980660004313754, + "kl_loss": 0.15982438623905182, + "loss_ib": 0.004214809276163578, + "step": 3819 + }, + { + "ce_ib": 1.2637567520141602, + "ce_orig": 0.3287361264228821, + "epoch": 1.0980660004313754, + "kl_loss": 0.25435540080070496, + "loss_ib": 0.003807310713455081, + "step": 3819 + }, + { + "epoch": 1.098353584010353, + "grad_norm": 0.15101812779903412, + "learning_rate": 7.327843656590948e-06, + "loss": 0.8345, + "step": 3820 + }, + { + "ce_ib": 3.1052517890930176, + "ce_orig": 0.7262755632400513, + "epoch": 1.098353584010353, + "kl_loss": 0.1302770972251892, + "loss_ib": 0.004408022854477167, + "step": 3820 + }, + { + "ce_ib": 2.1856794357299805, + "ce_orig": 0.4758984446525574, + "epoch": 1.098353584010353, + "kl_loss": 0.32237738370895386, + "loss_ib": 0.00540945352986455, + "step": 3820 + }, + { + "ce_ib": 4.068053245544434, + "ce_orig": 1.2774821519851685, + "epoch": 1.098353584010353, + "kl_loss": 0.17934653162956238, + "loss_ib": 0.0058615184389054775, + "step": 3820 + }, + { + "ce_ib": 3.864989995956421, + "ce_orig": 0.8884190320968628, + "epoch": 1.098353584010353, + "kl_loss": 0.21468086540699005, + "loss_ib": 0.006011798977851868, + "step": 3820 + }, + { + "ce_ib": 1.4389941692352295, + "ce_orig": 0.404253214597702, + "epoch": 1.0986411675893306, + "kl_loss": 0.08377191424369812, + "loss_ib": 0.0022767132613807917, + "step": 3821 + }, + { + "ce_ib": 5.949143886566162, + "ce_orig": 1.3370838165283203, + "epoch": 1.0986411675893306, + "kl_loss": 0.17116372287273407, + "loss_ib": 0.007660781033337116, + "step": 3821 + }, + { + "ce_ib": 3.6075541973114014, + "ce_orig": 0.6797505617141724, + "epoch": 1.0986411675893306, + "kl_loss": 0.260285884141922, + "loss_ib": 0.006210412830114365, + "step": 3821 + }, + { + "ce_ib": 3.183039903640747, + "ce_orig": 0.9051612019538879, + "epoch": 1.0986411675893306, + "kl_loss": 0.13108620047569275, + "loss_ib": 0.00449390197172761, + "step": 3821 + }, + { + "ce_ib": 2.02805757522583, + "ce_orig": 0.7454923987388611, + "epoch": 1.0989287511683083, + "kl_loss": 0.14916476607322693, + "loss_ib": 0.0035197052638977766, + "step": 3822 + }, + { + "ce_ib": 3.5280892848968506, + "ce_orig": 1.0006369352340698, + "epoch": 1.0989287511683083, + "kl_loss": 0.0940721333026886, + "loss_ib": 0.004468810278922319, + "step": 3822 + }, + { + "ce_ib": 2.771066427230835, + "ce_orig": 0.7736677527427673, + "epoch": 1.0989287511683083, + "kl_loss": 0.1770014762878418, + "loss_ib": 0.0045410809107124805, + "step": 3822 + }, + { + "ce_ib": 5.617350101470947, + "ce_orig": 1.3774465322494507, + "epoch": 1.0989287511683083, + "kl_loss": 0.18190234899520874, + "loss_ib": 0.007436373736709356, + "step": 3822 + }, + { + "ce_ib": 3.4309146404266357, + "ce_orig": 0.514008104801178, + "epoch": 1.0992163347472859, + "kl_loss": 0.2674559950828552, + "loss_ib": 0.006105474196374416, + "step": 3823 + }, + { + "ce_ib": 2.2286598682403564, + "ce_orig": 0.47107061743736267, + "epoch": 1.0992163347472859, + "kl_loss": 0.17578253149986267, + "loss_ib": 0.003986484836786985, + "step": 3823 + }, + { + "ce_ib": 3.7400968074798584, + "ce_orig": 0.5670531988143921, + "epoch": 1.0992163347472859, + "kl_loss": 0.17180278897285461, + "loss_ib": 0.005458124913275242, + "step": 3823 + }, + { + "ce_ib": 3.8233461380004883, + "ce_orig": 0.8644112944602966, + "epoch": 1.0992163347472859, + "kl_loss": 0.16994816064834595, + "loss_ib": 0.005522828083485365, + "step": 3823 + }, + { + "ce_ib": 3.749424934387207, + "ce_orig": 0.8371354937553406, + "epoch": 1.0995039183262636, + "kl_loss": 0.22254666686058044, + "loss_ib": 0.005974891595542431, + "step": 3824 + }, + { + "ce_ib": 2.667902708053589, + "ce_orig": 0.6827396750450134, + "epoch": 1.0995039183262636, + "kl_loss": 0.196720153093338, + "loss_ib": 0.004635103978216648, + "step": 3824 + }, + { + "ce_ib": 2.696460247039795, + "ce_orig": 0.782950222492218, + "epoch": 1.0995039183262636, + "kl_loss": 0.22884529829025269, + "loss_ib": 0.004984912928193808, + "step": 3824 + }, + { + "ce_ib": 2.2358524799346924, + "ce_orig": 0.7004312872886658, + "epoch": 1.0995039183262636, + "kl_loss": 0.14321278035640717, + "loss_ib": 0.003667980432510376, + "step": 3824 + }, + { + "epoch": 1.099791501905241, + "grad_norm": 0.153550386428833, + "learning_rate": 7.320972410677766e-06, + "loss": 0.8708, + "step": 3825 + }, + { + "ce_ib": 5.059398174285889, + "ce_orig": 1.4568535089492798, + "epoch": 1.099791501905241, + "kl_loss": 0.13811245560646057, + "loss_ib": 0.006440522614866495, + "step": 3825 + }, + { + "ce_ib": 2.6097166538238525, + "ce_orig": 0.6937264800071716, + "epoch": 1.099791501905241, + "kl_loss": 0.16385263204574585, + "loss_ib": 0.004248242825269699, + "step": 3825 + }, + { + "ce_ib": 4.842064380645752, + "ce_orig": 0.9209445714950562, + "epoch": 1.099791501905241, + "kl_loss": 0.22719122469425201, + "loss_ib": 0.007113976404070854, + "step": 3825 + }, + { + "ce_ib": 3.368427038192749, + "ce_orig": 0.8958361148834229, + "epoch": 1.099791501905241, + "kl_loss": 0.1984843909740448, + "loss_ib": 0.005353270564228296, + "step": 3825 + }, + { + "ce_ib": 2.953347682952881, + "ce_orig": 0.5182512998580933, + "epoch": 1.1000790854842188, + "kl_loss": 0.17722409963607788, + "loss_ib": 0.0047255889512598515, + "step": 3826 + }, + { + "ce_ib": 2.9522438049316406, + "ce_orig": 0.912132740020752, + "epoch": 1.1000790854842188, + "kl_loss": 0.16131556034088135, + "loss_ib": 0.00456539960578084, + "step": 3826 + }, + { + "ce_ib": 2.8142247200012207, + "ce_orig": 0.6512658596038818, + "epoch": 1.1000790854842188, + "kl_loss": 0.11536979675292969, + "loss_ib": 0.003967922646552324, + "step": 3826 + }, + { + "ce_ib": 1.9953912496566772, + "ce_orig": 0.5535604953765869, + "epoch": 1.1000790854842188, + "kl_loss": 0.23223914206027985, + "loss_ib": 0.0043177823536098, + "step": 3826 + }, + { + "ce_ib": 2.3219332695007324, + "ce_orig": 0.6814286708831787, + "epoch": 1.1003666690631966, + "kl_loss": 0.16786889731884003, + "loss_ib": 0.004000622313469648, + "step": 3827 + }, + { + "ce_ib": 5.678603649139404, + "ce_orig": 1.399429440498352, + "epoch": 1.1003666690631966, + "kl_loss": 0.2787386476993561, + "loss_ib": 0.008465989492833614, + "step": 3827 + }, + { + "ce_ib": 2.8982994556427, + "ce_orig": 0.842406153678894, + "epoch": 1.1003666690631966, + "kl_loss": 0.20240098237991333, + "loss_ib": 0.004922308959066868, + "step": 3827 + }, + { + "ce_ib": 2.705486536026001, + "ce_orig": 0.5108453035354614, + "epoch": 1.1003666690631966, + "kl_loss": 0.18954527378082275, + "loss_ib": 0.004600939340889454, + "step": 3827 + }, + { + "ce_ib": 5.414114475250244, + "ce_orig": 1.2386281490325928, + "epoch": 1.100654252642174, + "kl_loss": 0.15053164958953857, + "loss_ib": 0.006919431034475565, + "step": 3828 + }, + { + "ce_ib": 6.594316482543945, + "ce_orig": 0.8303842544555664, + "epoch": 1.100654252642174, + "kl_loss": 0.16744211316108704, + "loss_ib": 0.008268737234175205, + "step": 3828 + }, + { + "ce_ib": 2.972649097442627, + "ce_orig": 0.7595054507255554, + "epoch": 1.100654252642174, + "kl_loss": 0.19815441966056824, + "loss_ib": 0.004954193253070116, + "step": 3828 + }, + { + "ce_ib": 4.878174781799316, + "ce_orig": 0.9602627754211426, + "epoch": 1.100654252642174, + "kl_loss": 0.1625174880027771, + "loss_ib": 0.006503349170088768, + "step": 3828 + }, + { + "ce_ib": 4.615379810333252, + "ce_orig": 1.2566007375717163, + "epoch": 1.1009418362211518, + "kl_loss": 0.1762034296989441, + "loss_ib": 0.006377414334565401, + "step": 3829 + }, + { + "ce_ib": 2.5846500396728516, + "ce_orig": 0.40856847167015076, + "epoch": 1.1009418362211518, + "kl_loss": 0.17269942164421082, + "loss_ib": 0.004311644472181797, + "step": 3829 + }, + { + "ce_ib": 3.2267751693725586, + "ce_orig": 0.7280082106590271, + "epoch": 1.1009418362211518, + "kl_loss": 0.25705355405807495, + "loss_ib": 0.00579731073230505, + "step": 3829 + }, + { + "ce_ib": 4.348836898803711, + "ce_orig": 0.9549144506454468, + "epoch": 1.1009418362211518, + "kl_loss": 0.19030123949050903, + "loss_ib": 0.006251848768442869, + "step": 3829 + }, + { + "epoch": 1.1012294198001293, + "grad_norm": 0.16580691933631897, + "learning_rate": 7.3140955730033355e-06, + "loss": 0.8223, + "step": 3830 + }, + { + "ce_ib": 4.3063859939575195, + "ce_orig": 0.7384281158447266, + "epoch": 1.1012294198001293, + "kl_loss": 0.16960611939430237, + "loss_ib": 0.006002447567880154, + "step": 3830 + }, + { + "ce_ib": 2.6779825687408447, + "ce_orig": 0.6657256484031677, + "epoch": 1.1012294198001293, + "kl_loss": 0.13845586776733398, + "loss_ib": 0.0040625412948429585, + "step": 3830 + }, + { + "ce_ib": 3.631525993347168, + "ce_orig": 0.742749035358429, + "epoch": 1.1012294198001293, + "kl_loss": 0.13220806419849396, + "loss_ib": 0.004953606519848108, + "step": 3830 + }, + { + "ce_ib": 2.855379581451416, + "ce_orig": 0.4880990982055664, + "epoch": 1.1012294198001293, + "kl_loss": 0.1211700364947319, + "loss_ib": 0.004067080095410347, + "step": 3830 + }, + { + "ce_ib": 1.8236457109451294, + "ce_orig": 0.5755290389060974, + "epoch": 1.101517003379107, + "kl_loss": 0.19096305966377258, + "loss_ib": 0.003733276156708598, + "step": 3831 + }, + { + "ce_ib": 2.715730667114258, + "ce_orig": 0.7935112714767456, + "epoch": 1.101517003379107, + "kl_loss": 0.16611355543136597, + "loss_ib": 0.004376865923404694, + "step": 3831 + }, + { + "ce_ib": 3.0239574909210205, + "ce_orig": 0.5757982134819031, + "epoch": 1.101517003379107, + "kl_loss": 0.1350008249282837, + "loss_ib": 0.004373965784907341, + "step": 3831 + }, + { + "ce_ib": 3.056189775466919, + "ce_orig": 0.9073668718338013, + "epoch": 1.101517003379107, + "kl_loss": 0.13662835955619812, + "loss_ib": 0.004422473255544901, + "step": 3831 + }, + { + "ce_ib": 1.8814265727996826, + "ce_orig": 0.35456448793411255, + "epoch": 1.1018045869580848, + "kl_loss": 0.14045152068138123, + "loss_ib": 0.003285941667854786, + "step": 3832 + }, + { + "ce_ib": 4.988076686859131, + "ce_orig": 1.035435676574707, + "epoch": 1.1018045869580848, + "kl_loss": 0.22385698556900024, + "loss_ib": 0.007226646412163973, + "step": 3832 + }, + { + "ce_ib": 4.495808124542236, + "ce_orig": 0.716457188129425, + "epoch": 1.1018045869580848, + "kl_loss": 0.1467728316783905, + "loss_ib": 0.005963536445051432, + "step": 3832 + }, + { + "ce_ib": 1.7908203601837158, + "ce_orig": 0.6901408433914185, + "epoch": 1.1018045869580848, + "kl_loss": 0.10515975207090378, + "loss_ib": 0.0028424179181456566, + "step": 3832 + }, + { + "ce_ib": 3.6785757541656494, + "ce_orig": 0.9397958517074585, + "epoch": 1.1020921705370623, + "kl_loss": 0.20891065895557404, + "loss_ib": 0.00576768210157752, + "step": 3833 + }, + { + "ce_ib": 2.9083240032196045, + "ce_orig": 0.3953090012073517, + "epoch": 1.1020921705370623, + "kl_loss": 0.24668243527412415, + "loss_ib": 0.005375147797167301, + "step": 3833 + }, + { + "ce_ib": 3.482262372970581, + "ce_orig": 0.8739322423934937, + "epoch": 1.1020921705370623, + "kl_loss": 0.140113964676857, + "loss_ib": 0.004883402027189732, + "step": 3833 + }, + { + "ce_ib": 4.363138198852539, + "ce_orig": 1.0044543743133545, + "epoch": 1.1020921705370623, + "kl_loss": 0.16296818852424622, + "loss_ib": 0.005992820020765066, + "step": 3833 + }, + { + "ce_ib": 5.589554786682129, + "ce_orig": 0.7818815112113953, + "epoch": 1.10237975411604, + "kl_loss": 0.19106879830360413, + "loss_ib": 0.007500242907553911, + "step": 3834 + }, + { + "ce_ib": 2.5936036109924316, + "ce_orig": 0.5074875950813293, + "epoch": 1.10237975411604, + "kl_loss": 0.13479077816009521, + "loss_ib": 0.003941511269658804, + "step": 3834 + }, + { + "ce_ib": 4.023245811462402, + "ce_orig": 1.141403317451477, + "epoch": 1.10237975411604, + "kl_loss": 0.15276707708835602, + "loss_ib": 0.005550916772335768, + "step": 3834 + }, + { + "ce_ib": 2.911449432373047, + "ce_orig": 0.6610931158065796, + "epoch": 1.10237975411604, + "kl_loss": 0.15254855155944824, + "loss_ib": 0.00443693483248353, + "step": 3834 + }, + { + "epoch": 1.1026673376950176, + "grad_norm": 0.12520357966423035, + "learning_rate": 7.307213160135558e-06, + "loss": 0.787, + "step": 3835 + }, + { + "ce_ib": 2.600886583328247, + "ce_orig": 0.491058886051178, + "epoch": 1.1026673376950176, + "kl_loss": 0.23289449512958527, + "loss_ib": 0.004929831717163324, + "step": 3835 + }, + { + "ce_ib": 2.219325065612793, + "ce_orig": 0.6295863389968872, + "epoch": 1.1026673376950176, + "kl_loss": 0.19539958238601685, + "loss_ib": 0.004173321183770895, + "step": 3835 + }, + { + "ce_ib": 2.872145652770996, + "ce_orig": 0.5340806841850281, + "epoch": 1.1026673376950176, + "kl_loss": 0.16951650381088257, + "loss_ib": 0.004567310679703951, + "step": 3835 + }, + { + "ce_ib": 2.5370571613311768, + "ce_orig": 0.7371517419815063, + "epoch": 1.1026673376950176, + "kl_loss": 0.11538409441709518, + "loss_ib": 0.0036908979527652264, + "step": 3835 + }, + { + "ce_ib": 4.442179203033447, + "ce_orig": 0.6439678072929382, + "epoch": 1.1029549212739953, + "kl_loss": 0.2175716757774353, + "loss_ib": 0.006617895793169737, + "step": 3836 + }, + { + "ce_ib": 3.979642152786255, + "ce_orig": 0.7221947908401489, + "epoch": 1.1029549212739953, + "kl_loss": 0.10057833790779114, + "loss_ib": 0.004985425155609846, + "step": 3836 + }, + { + "ce_ib": 3.0168020725250244, + "ce_orig": 0.7335060834884644, + "epoch": 1.1029549212739953, + "kl_loss": 0.16390575468540192, + "loss_ib": 0.004655859433114529, + "step": 3836 + }, + { + "ce_ib": 4.8302836418151855, + "ce_orig": 0.99137943983078, + "epoch": 1.1029549212739953, + "kl_loss": 0.16117730736732483, + "loss_ib": 0.006442056968808174, + "step": 3836 + }, + { + "ce_ib": 4.346684455871582, + "ce_orig": 1.044454574584961, + "epoch": 1.1032425048529728, + "kl_loss": 0.1464017629623413, + "loss_ib": 0.0058107017539441586, + "step": 3837 + }, + { + "ce_ib": 2.0355753898620605, + "ce_orig": 0.5881980061531067, + "epoch": 1.1032425048529728, + "kl_loss": 0.1863154172897339, + "loss_ib": 0.00389872957020998, + "step": 3837 + }, + { + "ce_ib": 5.053615570068359, + "ce_orig": 0.8143319487571716, + "epoch": 1.1032425048529728, + "kl_loss": 0.19692759215831757, + "loss_ib": 0.0070228916592895985, + "step": 3837 + }, + { + "ce_ib": 4.845906734466553, + "ce_orig": 1.091579556465149, + "epoch": 1.1032425048529728, + "kl_loss": 0.09501354396343231, + "loss_ib": 0.005796041805297136, + "step": 3837 + }, + { + "ce_ib": 3.9367246627807617, + "ce_orig": 1.0530844926834106, + "epoch": 1.1035300884319506, + "kl_loss": 0.09378275275230408, + "loss_ib": 0.004874552134424448, + "step": 3838 + }, + { + "ce_ib": 4.376399517059326, + "ce_orig": 0.9719102382659912, + "epoch": 1.1035300884319506, + "kl_loss": 0.3069656491279602, + "loss_ib": 0.007446055766195059, + "step": 3838 + }, + { + "ce_ib": 5.150167942047119, + "ce_orig": 1.0060268640518188, + "epoch": 1.1035300884319506, + "kl_loss": 0.21773168444633484, + "loss_ib": 0.0073274848982691765, + "step": 3838 + }, + { + "ce_ib": 4.8941264152526855, + "ce_orig": 0.9423146843910217, + "epoch": 1.1035300884319506, + "kl_loss": 0.21802395582199097, + "loss_ib": 0.007074365392327309, + "step": 3838 + }, + { + "ce_ib": 2.6089086532592773, + "ce_orig": 0.9696374535560608, + "epoch": 1.103817672010928, + "kl_loss": 0.15269477665424347, + "loss_ib": 0.004135856404900551, + "step": 3839 + }, + { + "ce_ib": 1.5067003965377808, + "ce_orig": 0.34351104497909546, + "epoch": 1.103817672010928, + "kl_loss": 0.22071349620819092, + "loss_ib": 0.003713835496455431, + "step": 3839 + }, + { + "ce_ib": 2.0637142658233643, + "ce_orig": 0.4688915014266968, + "epoch": 1.103817672010928, + "kl_loss": 0.156806081533432, + "loss_ib": 0.0036317750345915556, + "step": 3839 + }, + { + "ce_ib": 4.888476848602295, + "ce_orig": 0.673945963382721, + "epoch": 1.103817672010928, + "kl_loss": 0.10609680414199829, + "loss_ib": 0.005949444603174925, + "step": 3839 + }, + { + "epoch": 1.1041052555899058, + "grad_norm": 0.12976223230361938, + "learning_rate": 7.300325188655762e-06, + "loss": 0.8622, + "step": 3840 + }, + { + "ce_ib": 2.0734779834747314, + "ce_orig": 0.7371241450309753, + "epoch": 1.1041052555899058, + "kl_loss": 0.18738311529159546, + "loss_ib": 0.003947309218347073, + "step": 3840 + }, + { + "ce_ib": 2.905463457107544, + "ce_orig": 0.7481424808502197, + "epoch": 1.1041052555899058, + "kl_loss": 0.17621591687202454, + "loss_ib": 0.0046676225028932095, + "step": 3840 + }, + { + "ce_ib": 3.3095476627349854, + "ce_orig": 0.7339060306549072, + "epoch": 1.1041052555899058, + "kl_loss": 0.19899524748325348, + "loss_ib": 0.005299500189721584, + "step": 3840 + }, + { + "ce_ib": 2.5088906288146973, + "ce_orig": 0.5923645496368408, + "epoch": 1.1041052555899058, + "kl_loss": 0.1096985936164856, + "loss_ib": 0.0036058765836060047, + "step": 3840 + }, + { + "ce_ib": 2.767529010772705, + "ce_orig": 0.5936924815177917, + "epoch": 1.1043928391688835, + "kl_loss": 0.1611221730709076, + "loss_ib": 0.004378750920295715, + "step": 3841 + }, + { + "ce_ib": 3.3252179622650146, + "ce_orig": 0.5475689768791199, + "epoch": 1.1043928391688835, + "kl_loss": 0.1826860010623932, + "loss_ib": 0.005152077414095402, + "step": 3841 + }, + { + "ce_ib": 2.9810128211975098, + "ce_orig": 0.5119138360023499, + "epoch": 1.1043928391688835, + "kl_loss": 0.15599265694618225, + "loss_ib": 0.004540939349681139, + "step": 3841 + }, + { + "ce_ib": 5.2648420333862305, + "ce_orig": 1.048999309539795, + "epoch": 1.1043928391688835, + "kl_loss": 0.17182034254074097, + "loss_ib": 0.006983045022934675, + "step": 3841 + }, + { + "ce_ib": 1.8205058574676514, + "ce_orig": 0.33179599046707153, + "epoch": 1.104680422747861, + "kl_loss": 0.29276785254478455, + "loss_ib": 0.004748184233903885, + "step": 3842 + }, + { + "ce_ib": 2.332545280456543, + "ce_orig": 0.8114911913871765, + "epoch": 1.104680422747861, + "kl_loss": 0.5526843070983887, + "loss_ib": 0.007859388366341591, + "step": 3842 + }, + { + "ce_ib": 2.0062575340270996, + "ce_orig": 0.4947834312915802, + "epoch": 1.104680422747861, + "kl_loss": 0.1254926323890686, + "loss_ib": 0.003261184087023139, + "step": 3842 + }, + { + "ce_ib": 3.0962445735931396, + "ce_orig": 0.8504034280776978, + "epoch": 1.104680422747861, + "kl_loss": 0.14549092948436737, + "loss_ib": 0.004551153630018234, + "step": 3842 + }, + { + "ce_ib": 4.194700717926025, + "ce_orig": 0.8181700110435486, + "epoch": 1.1049680063268388, + "kl_loss": 0.33655664324760437, + "loss_ib": 0.007560267113149166, + "step": 3843 + }, + { + "ce_ib": 4.4072184562683105, + "ce_orig": 0.9970977902412415, + "epoch": 1.1049680063268388, + "kl_loss": 0.1664072871208191, + "loss_ib": 0.00607129093259573, + "step": 3843 + }, + { + "ce_ib": 1.846630334854126, + "ce_orig": 0.5550662279129028, + "epoch": 1.1049680063268388, + "kl_loss": 0.13076862692832947, + "loss_ib": 0.0031543164514005184, + "step": 3843 + }, + { + "ce_ib": 3.4546878337860107, + "ce_orig": 0.6510663628578186, + "epoch": 1.1049680063268388, + "kl_loss": 0.20980900526046753, + "loss_ib": 0.005552778020501137, + "step": 3843 + }, + { + "ce_ib": 4.954936504364014, + "ce_orig": 0.7583306431770325, + "epoch": 1.1052555899058163, + "kl_loss": 0.4390001595020294, + "loss_ib": 0.009344937279820442, + "step": 3844 + }, + { + "ce_ib": 2.5170371532440186, + "ce_orig": 0.6359765529632568, + "epoch": 1.1052555899058163, + "kl_loss": 0.15491828322410583, + "loss_ib": 0.00406622001901269, + "step": 3844 + }, + { + "ce_ib": 5.592846393585205, + "ce_orig": 1.0228822231292725, + "epoch": 1.1052555899058163, + "kl_loss": 0.14030253887176514, + "loss_ib": 0.006995871663093567, + "step": 3844 + }, + { + "ce_ib": 2.7377543449401855, + "ce_orig": 0.6413487195968628, + "epoch": 1.1052555899058163, + "kl_loss": 0.22331924736499786, + "loss_ib": 0.004970946814864874, + "step": 3844 + }, + { + "epoch": 1.105543173484794, + "grad_norm": 0.15800820291042328, + "learning_rate": 7.293431675158669e-06, + "loss": 0.8199, + "step": 3845 + }, + { + "ce_ib": 5.696290016174316, + "ce_orig": 1.0802072286605835, + "epoch": 1.105543173484794, + "kl_loss": 0.17777645587921143, + "loss_ib": 0.007474054582417011, + "step": 3845 + }, + { + "ce_ib": 3.5648252964019775, + "ce_orig": 0.8706973791122437, + "epoch": 1.105543173484794, + "kl_loss": 0.1282912641763687, + "loss_ib": 0.0048477379605174065, + "step": 3845 + }, + { + "ce_ib": 1.821610450744629, + "ce_orig": 0.5566112399101257, + "epoch": 1.105543173484794, + "kl_loss": 0.16240738332271576, + "loss_ib": 0.0034456842113286257, + "step": 3845 + }, + { + "ce_ib": 2.0744669437408447, + "ce_orig": 0.61597740650177, + "epoch": 1.105543173484794, + "kl_loss": 0.1789640188217163, + "loss_ib": 0.0038641069550067186, + "step": 3845 + }, + { + "ce_ib": 3.1820731163024902, + "ce_orig": 0.6557970643043518, + "epoch": 1.1058307570637718, + "kl_loss": 0.1451619267463684, + "loss_ib": 0.004633692558854818, + "step": 3846 + }, + { + "ce_ib": 3.5771119594573975, + "ce_orig": 0.8793586492538452, + "epoch": 1.1058307570637718, + "kl_loss": 0.11066000908613205, + "loss_ib": 0.0046837120316922665, + "step": 3846 + }, + { + "ce_ib": 3.9495465755462646, + "ce_orig": 1.1206910610198975, + "epoch": 1.1058307570637718, + "kl_loss": 0.15856169164180756, + "loss_ib": 0.00553516298532486, + "step": 3846 + }, + { + "ce_ib": 6.250165939331055, + "ce_orig": 1.6232621669769287, + "epoch": 1.1058307570637718, + "kl_loss": 0.2749707102775574, + "loss_ib": 0.008999872952699661, + "step": 3846 + }, + { + "ce_ib": 1.881723403930664, + "ce_orig": 0.49641990661621094, + "epoch": 1.1061183406427493, + "kl_loss": 0.20848916471004486, + "loss_ib": 0.003966615069657564, + "step": 3847 + }, + { + "ce_ib": 2.047736167907715, + "ce_orig": 0.3885999619960785, + "epoch": 1.1061183406427493, + "kl_loss": 0.1875177025794983, + "loss_ib": 0.003922912757843733, + "step": 3847 + }, + { + "ce_ib": 2.766779661178589, + "ce_orig": 0.49112775921821594, + "epoch": 1.1061183406427493, + "kl_loss": 0.1845327913761139, + "loss_ib": 0.004612107295542955, + "step": 3847 + }, + { + "ce_ib": 4.03750467300415, + "ce_orig": 1.0020259618759155, + "epoch": 1.1061183406427493, + "kl_loss": 0.1811739057302475, + "loss_ib": 0.005849244073033333, + "step": 3847 + }, + { + "ce_ib": 2.1504836082458496, + "ce_orig": 0.5151575207710266, + "epoch": 1.106405924221727, + "kl_loss": 0.11053891479969025, + "loss_ib": 0.003255872754380107, + "step": 3848 + }, + { + "ce_ib": 6.733302116394043, + "ce_orig": 1.2778698205947876, + "epoch": 1.106405924221727, + "kl_loss": 0.15041469037532806, + "loss_ib": 0.008237449452280998, + "step": 3848 + }, + { + "ce_ib": 3.712709903717041, + "ce_orig": 0.9912630319595337, + "epoch": 1.106405924221727, + "kl_loss": 0.1282046139240265, + "loss_ib": 0.004994756076484919, + "step": 3848 + }, + { + "ce_ib": 2.9396843910217285, + "ce_orig": 0.7018322348594666, + "epoch": 1.106405924221727, + "kl_loss": 0.1473536193370819, + "loss_ib": 0.004413220565766096, + "step": 3848 + }, + { + "ce_ib": 2.556424379348755, + "ce_orig": 0.47638702392578125, + "epoch": 1.1066935078007045, + "kl_loss": 0.125730499625206, + "loss_ib": 0.003813729155808687, + "step": 3849 + }, + { + "ce_ib": 4.22622537612915, + "ce_orig": 1.1821205615997314, + "epoch": 1.1066935078007045, + "kl_loss": 0.2876165807247162, + "loss_ib": 0.007102390751242638, + "step": 3849 + }, + { + "ce_ib": 3.146923303604126, + "ce_orig": 0.842265248298645, + "epoch": 1.1066935078007045, + "kl_loss": 0.26907920837402344, + "loss_ib": 0.005837715696543455, + "step": 3849 + }, + { + "ce_ib": 4.340806484222412, + "ce_orig": 1.165098786354065, + "epoch": 1.1066935078007045, + "kl_loss": 0.1466759741306305, + "loss_ib": 0.0058075664564967155, + "step": 3849 + }, + { + "epoch": 1.1069810913796823, + "grad_norm": 0.13852111995220184, + "learning_rate": 7.286532636252354e-06, + "loss": 0.8333, + "step": 3850 + }, + { + "ce_ib": 6.752663612365723, + "ce_orig": 1.7219890356063843, + "epoch": 1.1069810913796823, + "kl_loss": 0.17357975244522095, + "loss_ib": 0.008488461375236511, + "step": 3850 + }, + { + "ce_ib": 2.9015917778015137, + "ce_orig": 0.507719874382019, + "epoch": 1.1069810913796823, + "kl_loss": 0.1269322633743286, + "loss_ib": 0.004170914646238089, + "step": 3850 + }, + { + "ce_ib": 2.2131385803222656, + "ce_orig": 0.4457644522190094, + "epoch": 1.1069810913796823, + "kl_loss": 0.1573050618171692, + "loss_ib": 0.0037861892487853765, + "step": 3850 + }, + { + "ce_ib": 3.789783000946045, + "ce_orig": 0.7035828232765198, + "epoch": 1.1069810913796823, + "kl_loss": 0.16926296055316925, + "loss_ib": 0.005482412409037352, + "step": 3850 + }, + { + "ce_ib": 2.8627102375030518, + "ce_orig": 0.362336128950119, + "epoch": 1.1072686749586598, + "kl_loss": 0.24374090135097504, + "loss_ib": 0.005300119519233704, + "step": 3851 + }, + { + "ce_ib": 2.9451777935028076, + "ce_orig": 0.5352604389190674, + "epoch": 1.1072686749586598, + "kl_loss": 0.12273313105106354, + "loss_ib": 0.00417250907048583, + "step": 3851 + }, + { + "ce_ib": 1.8742448091506958, + "ce_orig": 0.5352945923805237, + "epoch": 1.1072686749586598, + "kl_loss": 0.14514365792274475, + "loss_ib": 0.0033256812021136284, + "step": 3851 + }, + { + "ce_ib": 2.997243881225586, + "ce_orig": 0.6050189733505249, + "epoch": 1.1072686749586598, + "kl_loss": 0.14950203895568848, + "loss_ib": 0.004492264240980148, + "step": 3851 + }, + { + "ce_ib": 4.088932991027832, + "ce_orig": 1.1073566675186157, + "epoch": 1.1075562585376375, + "kl_loss": 0.14280107617378235, + "loss_ib": 0.005516943987458944, + "step": 3852 + }, + { + "ce_ib": 3.7400026321411133, + "ce_orig": 0.9817709922790527, + "epoch": 1.1075562585376375, + "kl_loss": 0.17718914151191711, + "loss_ib": 0.005511893890798092, + "step": 3852 + }, + { + "ce_ib": 2.3268203735351562, + "ce_orig": 0.7182794809341431, + "epoch": 1.1075562585376375, + "kl_loss": 0.13267722725868225, + "loss_ib": 0.0036535924300551414, + "step": 3852 + }, + { + "ce_ib": 5.215790271759033, + "ce_orig": 1.2452930212020874, + "epoch": 1.1075562585376375, + "kl_loss": 0.20635275542736053, + "loss_ib": 0.0072793178260326385, + "step": 3852 + }, + { + "ce_ib": 4.668699741363525, + "ce_orig": 1.0588072538375854, + "epoch": 1.1078438421166152, + "kl_loss": 0.16677333414554596, + "loss_ib": 0.006336433347314596, + "step": 3853 + }, + { + "ce_ib": 3.5998754501342773, + "ce_orig": 1.1184412240982056, + "epoch": 1.1078438421166152, + "kl_loss": 0.15278729796409607, + "loss_ib": 0.005127748008817434, + "step": 3853 + }, + { + "ce_ib": 3.9370458126068115, + "ce_orig": 1.025899887084961, + "epoch": 1.1078438421166152, + "kl_loss": 0.17069348692893982, + "loss_ib": 0.005643980577588081, + "step": 3853 + }, + { + "ce_ib": 7.24123477935791, + "ce_orig": 1.6084328889846802, + "epoch": 1.1078438421166152, + "kl_loss": 0.118479885160923, + "loss_ib": 0.008426033891737461, + "step": 3853 + }, + { + "ce_ib": 3.6139557361602783, + "ce_orig": 0.8437954783439636, + "epoch": 1.1081314256955928, + "kl_loss": 0.2027057707309723, + "loss_ib": 0.0056410133838653564, + "step": 3854 + }, + { + "ce_ib": 2.0957016944885254, + "ce_orig": 0.3926735818386078, + "epoch": 1.1081314256955928, + "kl_loss": 0.33186668157577515, + "loss_ib": 0.005414368584752083, + "step": 3854 + }, + { + "ce_ib": 2.332768440246582, + "ce_orig": 0.6810956597328186, + "epoch": 1.1081314256955928, + "kl_loss": 0.12335310131311417, + "loss_ib": 0.003566299332305789, + "step": 3854 + }, + { + "ce_ib": 6.737444877624512, + "ce_orig": 1.98994779586792, + "epoch": 1.1081314256955928, + "kl_loss": 0.19074326753616333, + "loss_ib": 0.008644877932965755, + "step": 3854 + }, + { + "epoch": 1.1084190092745705, + "grad_norm": 0.1289617270231247, + "learning_rate": 7.279628088558202e-06, + "loss": 0.9169, + "step": 3855 + }, + { + "ce_ib": 1.4847725629806519, + "ce_orig": 0.4937531352043152, + "epoch": 1.1084190092745705, + "kl_loss": 0.09553499519824982, + "loss_ib": 0.0024401224218308926, + "step": 3855 + }, + { + "ce_ib": 2.956536054611206, + "ce_orig": 0.522971510887146, + "epoch": 1.1084190092745705, + "kl_loss": 0.2760862708091736, + "loss_ib": 0.005717399064451456, + "step": 3855 + }, + { + "ce_ib": 3.882793664932251, + "ce_orig": 1.1864640712738037, + "epoch": 1.1084190092745705, + "kl_loss": 0.3430764079093933, + "loss_ib": 0.007313557900488377, + "step": 3855 + }, + { + "ce_ib": 1.9292471408843994, + "ce_orig": 0.36298927664756775, + "epoch": 1.1084190092745705, + "kl_loss": 0.11174611002206802, + "loss_ib": 0.0030467084143310785, + "step": 3855 + }, + { + "ce_ib": 3.636406183242798, + "ce_orig": 0.8502426147460938, + "epoch": 1.108706592853548, + "kl_loss": 0.09097196906805038, + "loss_ib": 0.004546125885099173, + "step": 3856 + }, + { + "ce_ib": 5.368165969848633, + "ce_orig": 1.4093492031097412, + "epoch": 1.108706592853548, + "kl_loss": 0.1826288104057312, + "loss_ib": 0.007194453850388527, + "step": 3856 + }, + { + "ce_ib": 6.9724225997924805, + "ce_orig": 1.4637279510498047, + "epoch": 1.108706592853548, + "kl_loss": 0.12949222326278687, + "loss_ib": 0.008267344906926155, + "step": 3856 + }, + { + "ce_ib": 2.697934627532959, + "ce_orig": 0.6762184500694275, + "epoch": 1.108706592853548, + "kl_loss": 0.16239649057388306, + "loss_ib": 0.004321899730712175, + "step": 3856 + }, + { + "ce_ib": 3.7059576511383057, + "ce_orig": 0.8498059511184692, + "epoch": 1.1089941764325257, + "kl_loss": 0.13524581491947174, + "loss_ib": 0.005058416165411472, + "step": 3857 + }, + { + "ce_ib": 4.278809070587158, + "ce_orig": 0.9501226544380188, + "epoch": 1.1089941764325257, + "kl_loss": 0.14951765537261963, + "loss_ib": 0.005773985292762518, + "step": 3857 + }, + { + "ce_ib": 3.0467464923858643, + "ce_orig": 0.7291195392608643, + "epoch": 1.1089941764325257, + "kl_loss": 0.11764383316040039, + "loss_ib": 0.004223184660077095, + "step": 3857 + }, + { + "ce_ib": 4.420482635498047, + "ce_orig": 1.1415776014328003, + "epoch": 1.1089941764325257, + "kl_loss": 0.21346448361873627, + "loss_ib": 0.0065551274456083775, + "step": 3857 + }, + { + "ce_ib": 2.5268611907958984, + "ce_orig": 0.5060736536979675, + "epoch": 1.1092817600115032, + "kl_loss": 0.11323574185371399, + "loss_ib": 0.0036592185497283936, + "step": 3858 + }, + { + "ce_ib": 4.043040752410889, + "ce_orig": 0.39826762676239014, + "epoch": 1.1092817600115032, + "kl_loss": 0.21648576855659485, + "loss_ib": 0.00620789872482419, + "step": 3858 + }, + { + "ce_ib": 2.513256788253784, + "ce_orig": 0.6793027520179749, + "epoch": 1.1092817600115032, + "kl_loss": 0.08189038932323456, + "loss_ib": 0.003332160646095872, + "step": 3858 + }, + { + "ce_ib": 4.399348735809326, + "ce_orig": 1.2221975326538086, + "epoch": 1.1092817600115032, + "kl_loss": 0.1851048767566681, + "loss_ib": 0.006250397302210331, + "step": 3858 + }, + { + "ce_ib": 3.0161969661712646, + "ce_orig": 0.7363792061805725, + "epoch": 1.109569343590481, + "kl_loss": 0.196416974067688, + "loss_ib": 0.004980366677045822, + "step": 3859 + }, + { + "ce_ib": 5.272730350494385, + "ce_orig": 1.3288748264312744, + "epoch": 1.109569343590481, + "kl_loss": 0.14058977365493774, + "loss_ib": 0.006678628269582987, + "step": 3859 + }, + { + "ce_ib": 3.6856749057769775, + "ce_orig": 0.8047479391098022, + "epoch": 1.109569343590481, + "kl_loss": 0.14452412724494934, + "loss_ib": 0.005130915902554989, + "step": 3859 + }, + { + "ce_ib": 2.3452024459838867, + "ce_orig": 0.6172611117362976, + "epoch": 1.109569343590481, + "kl_loss": 0.3386082947254181, + "loss_ib": 0.005731285084038973, + "step": 3859 + }, + { + "epoch": 1.1098569271694587, + "grad_norm": 0.12936729192733765, + "learning_rate": 7.2727180487108725e-06, + "loss": 0.7964, + "step": 3860 + }, + { + "ce_ib": 2.669661521911621, + "ce_orig": 0.7878947257995605, + "epoch": 1.1098569271694587, + "kl_loss": 0.1185256838798523, + "loss_ib": 0.0038549182936549187, + "step": 3860 + }, + { + "ce_ib": 3.2135908603668213, + "ce_orig": 0.840018093585968, + "epoch": 1.1098569271694587, + "kl_loss": 0.16714942455291748, + "loss_ib": 0.004885084927082062, + "step": 3860 + }, + { + "ce_ib": 3.235281467437744, + "ce_orig": 0.7528116106987, + "epoch": 1.1098569271694587, + "kl_loss": 0.1751568764448166, + "loss_ib": 0.004986850079149008, + "step": 3860 + }, + { + "ce_ib": 1.5756034851074219, + "ce_orig": 0.4141422212123871, + "epoch": 1.1098569271694587, + "kl_loss": 0.11511504650115967, + "loss_ib": 0.002726753940805793, + "step": 3860 + }, + { + "ce_ib": 2.006010055541992, + "ce_orig": 0.7299872636795044, + "epoch": 1.1101445107484362, + "kl_loss": 0.1369653344154358, + "loss_ib": 0.003375663422048092, + "step": 3861 + }, + { + "ce_ib": 3.1419270038604736, + "ce_orig": 0.8836514353752136, + "epoch": 1.1101445107484362, + "kl_loss": 0.1138380616903305, + "loss_ib": 0.004280307795852423, + "step": 3861 + }, + { + "ce_ib": 3.952026844024658, + "ce_orig": 0.8774535059928894, + "epoch": 1.1101445107484362, + "kl_loss": 0.08998043835163116, + "loss_ib": 0.0048518311232328415, + "step": 3861 + }, + { + "ce_ib": 2.4251577854156494, + "ce_orig": 0.5135444402694702, + "epoch": 1.1101445107484362, + "kl_loss": 0.1803717315196991, + "loss_ib": 0.004228875041007996, + "step": 3861 + }, + { + "ce_ib": 3.036259174346924, + "ce_orig": 1.10435950756073, + "epoch": 1.110432094327414, + "kl_loss": 0.13238921761512756, + "loss_ib": 0.004360151011496782, + "step": 3862 + }, + { + "ce_ib": 2.2572968006134033, + "ce_orig": 0.5137426257133484, + "epoch": 1.110432094327414, + "kl_loss": 0.15597975254058838, + "loss_ib": 0.0038170944899320602, + "step": 3862 + }, + { + "ce_ib": 3.1617836952209473, + "ce_orig": 0.7773635983467102, + "epoch": 1.110432094327414, + "kl_loss": 0.16720423102378845, + "loss_ib": 0.0048338258638978004, + "step": 3862 + }, + { + "ce_ib": 3.5827796459198, + "ce_orig": 0.9740148782730103, + "epoch": 1.110432094327414, + "kl_loss": 0.14262303709983826, + "loss_ib": 0.005009009502828121, + "step": 3862 + }, + { + "ce_ib": 4.580894947052002, + "ce_orig": 0.996924102306366, + "epoch": 1.1107196779063915, + "kl_loss": 0.19428861141204834, + "loss_ib": 0.006523780524730682, + "step": 3863 + }, + { + "ce_ib": 3.629235029220581, + "ce_orig": 1.1625126600265503, + "epoch": 1.1107196779063915, + "kl_loss": 0.1447722613811493, + "loss_ib": 0.005076957866549492, + "step": 3863 + }, + { + "ce_ib": 2.4000308513641357, + "ce_orig": 0.6476693153381348, + "epoch": 1.1107196779063915, + "kl_loss": 0.08375473320484161, + "loss_ib": 0.003237578086555004, + "step": 3863 + }, + { + "ce_ib": 5.022688388824463, + "ce_orig": 1.1083382368087769, + "epoch": 1.1107196779063915, + "kl_loss": 0.23454216122627258, + "loss_ib": 0.007368110120296478, + "step": 3863 + }, + { + "ce_ib": 3.7690935134887695, + "ce_orig": 0.91131991147995, + "epoch": 1.1110072614853692, + "kl_loss": 0.18344952166080475, + "loss_ib": 0.005603588651865721, + "step": 3864 + }, + { + "ce_ib": 2.8577044010162354, + "ce_orig": 0.933471143245697, + "epoch": 1.1110072614853692, + "kl_loss": 0.1101096123456955, + "loss_ib": 0.003958800341933966, + "step": 3864 + }, + { + "ce_ib": 3.5338871479034424, + "ce_orig": 0.8842872977256775, + "epoch": 1.1110072614853692, + "kl_loss": 0.24897968769073486, + "loss_ib": 0.006023684050887823, + "step": 3864 + }, + { + "ce_ib": 3.298011541366577, + "ce_orig": 0.6407747268676758, + "epoch": 1.1110072614853692, + "kl_loss": 0.23584294319152832, + "loss_ib": 0.005656441207975149, + "step": 3864 + }, + { + "epoch": 1.1112948450643467, + "grad_norm": 0.12503477931022644, + "learning_rate": 7.265802533358255e-06, + "loss": 0.8005, + "step": 3865 + }, + { + "ce_ib": 2.7807676792144775, + "ce_orig": 0.6544929146766663, + "epoch": 1.1112948450643467, + "kl_loss": 0.15981684625148773, + "loss_ib": 0.004378936253488064, + "step": 3865 + }, + { + "ce_ib": 2.9346420764923096, + "ce_orig": 0.7639994621276855, + "epoch": 1.1112948450643467, + "kl_loss": 0.18505451083183289, + "loss_ib": 0.00478518707677722, + "step": 3865 + }, + { + "ce_ib": 3.2760276794433594, + "ce_orig": 0.6343129873275757, + "epoch": 1.1112948450643467, + "kl_loss": 0.20406100153923035, + "loss_ib": 0.005316637456417084, + "step": 3865 + }, + { + "ce_ib": 2.64369535446167, + "ce_orig": 0.7031075954437256, + "epoch": 1.1112948450643467, + "kl_loss": 0.14553411304950714, + "loss_ib": 0.004099036566913128, + "step": 3865 + }, + { + "ce_ib": 2.8026459217071533, + "ce_orig": 0.8262401819229126, + "epoch": 1.1115824286433245, + "kl_loss": 0.10280504077672958, + "loss_ib": 0.0038306962233036757, + "step": 3866 + }, + { + "ce_ib": 4.662644863128662, + "ce_orig": 1.3187991380691528, + "epoch": 1.1115824286433245, + "kl_loss": 0.14848628640174866, + "loss_ib": 0.0061475071124732494, + "step": 3866 + }, + { + "ce_ib": 2.9288570880889893, + "ce_orig": 0.5443669557571411, + "epoch": 1.1115824286433245, + "kl_loss": 0.16068057715892792, + "loss_ib": 0.004535662941634655, + "step": 3866 + }, + { + "ce_ib": 4.885924339294434, + "ce_orig": 0.5707956552505493, + "epoch": 1.1115824286433245, + "kl_loss": 0.1307169497013092, + "loss_ib": 0.006193093955516815, + "step": 3866 + }, + { + "ce_ib": 1.9985363483428955, + "ce_orig": 0.6237527132034302, + "epoch": 1.1118700122223022, + "kl_loss": 0.09090790152549744, + "loss_ib": 0.002907615154981613, + "step": 3867 + }, + { + "ce_ib": 4.608942031860352, + "ce_orig": 1.1325715780258179, + "epoch": 1.1118700122223022, + "kl_loss": 0.15532222390174866, + "loss_ib": 0.006162164267152548, + "step": 3867 + }, + { + "ce_ib": 3.173438787460327, + "ce_orig": 0.7749789953231812, + "epoch": 1.1118700122223022, + "kl_loss": 0.18682138621807098, + "loss_ib": 0.005041652824729681, + "step": 3867 + }, + { + "ce_ib": 2.6183841228485107, + "ce_orig": 0.710125744342804, + "epoch": 1.1118700122223022, + "kl_loss": 0.17228692770004272, + "loss_ib": 0.004341253079473972, + "step": 3867 + }, + { + "ce_ib": 3.0268194675445557, + "ce_orig": 0.7778295874595642, + "epoch": 1.1121575958012797, + "kl_loss": 0.14073646068572998, + "loss_ib": 0.004434184171259403, + "step": 3868 + }, + { + "ce_ib": 1.5156598091125488, + "ce_orig": 0.4390774071216583, + "epoch": 1.1121575958012797, + "kl_loss": 0.1252206414937973, + "loss_ib": 0.0027678662445396185, + "step": 3868 + }, + { + "ce_ib": 2.0370917320251465, + "ce_orig": 0.4470581114292145, + "epoch": 1.1121575958012797, + "kl_loss": 0.18715408444404602, + "loss_ib": 0.003908632323145866, + "step": 3868 + }, + { + "ce_ib": 5.610125541687012, + "ce_orig": 1.4580353498458862, + "epoch": 1.1121575958012797, + "kl_loss": 0.1992524266242981, + "loss_ib": 0.007602649740874767, + "step": 3868 + }, + { + "ce_ib": 1.8237416744232178, + "ce_orig": 0.6232101917266846, + "epoch": 1.1124451793802574, + "kl_loss": 0.13613170385360718, + "loss_ib": 0.0031850587110966444, + "step": 3869 + }, + { + "ce_ib": 1.5173838138580322, + "ce_orig": 0.41044551134109497, + "epoch": 1.1124451793802574, + "kl_loss": 0.12411466985940933, + "loss_ib": 0.002758530667051673, + "step": 3869 + }, + { + "ce_ib": 2.2141225337982178, + "ce_orig": 0.4427952170372009, + "epoch": 1.1124451793802574, + "kl_loss": 0.17780423164367676, + "loss_ib": 0.003992164507508278, + "step": 3869 + }, + { + "ce_ib": 5.264606475830078, + "ce_orig": 1.603646159172058, + "epoch": 1.1124451793802574, + "kl_loss": 0.17102551460266113, + "loss_ib": 0.006974861491471529, + "step": 3869 + }, + { + "epoch": 1.112732762959235, + "grad_norm": 0.14370043575763702, + "learning_rate": 7.258881559161431e-06, + "loss": 0.8913, + "step": 3870 + }, + { + "ce_ib": 2.7947897911071777, + "ce_orig": 0.7676835060119629, + "epoch": 1.112732762959235, + "kl_loss": 0.1683710515499115, + "loss_ib": 0.004478500224649906, + "step": 3870 + }, + { + "ce_ib": 5.168932914733887, + "ce_orig": 1.3821829557418823, + "epoch": 1.112732762959235, + "kl_loss": 0.23699940741062164, + "loss_ib": 0.007538927253335714, + "step": 3870 + }, + { + "ce_ib": 3.4552481174468994, + "ce_orig": 0.6133846044540405, + "epoch": 1.112732762959235, + "kl_loss": 0.16100074350833893, + "loss_ib": 0.005065255798399448, + "step": 3870 + }, + { + "ce_ib": 3.4246091842651367, + "ce_orig": 0.7020003199577332, + "epoch": 1.112732762959235, + "kl_loss": 0.1535404920578003, + "loss_ib": 0.004960014019161463, + "step": 3870 + }, + { + "ce_ib": 1.387373685836792, + "ce_orig": 0.42629769444465637, + "epoch": 1.1130203465382127, + "kl_loss": 0.17655116319656372, + "loss_ib": 0.0031528854742646217, + "step": 3871 + }, + { + "ce_ib": 3.3536431789398193, + "ce_orig": 0.6090146899223328, + "epoch": 1.1130203465382127, + "kl_loss": 0.2035546898841858, + "loss_ib": 0.005389189813286066, + "step": 3871 + }, + { + "ce_ib": 3.9575774669647217, + "ce_orig": 1.3215899467468262, + "epoch": 1.1130203465382127, + "kl_loss": 0.15654785931110382, + "loss_ib": 0.005523056257516146, + "step": 3871 + }, + { + "ce_ib": 6.393060207366943, + "ce_orig": 1.305364727973938, + "epoch": 1.1130203465382127, + "kl_loss": 0.19084888696670532, + "loss_ib": 0.008301548659801483, + "step": 3871 + }, + { + "ce_ib": 2.7807669639587402, + "ce_orig": 0.6099096536636353, + "epoch": 1.1133079301171902, + "kl_loss": 0.114634670317173, + "loss_ib": 0.003927113488316536, + "step": 3872 + }, + { + "ce_ib": 4.04807186126709, + "ce_orig": 0.5349976420402527, + "epoch": 1.1133079301171902, + "kl_loss": 0.15797321498394012, + "loss_ib": 0.0056278035044670105, + "step": 3872 + }, + { + "ce_ib": 6.111651420593262, + "ce_orig": 0.8930816650390625, + "epoch": 1.1133079301171902, + "kl_loss": 0.20370088517665863, + "loss_ib": 0.008148660883307457, + "step": 3872 + }, + { + "ce_ib": 4.007510185241699, + "ce_orig": 1.067991018295288, + "epoch": 1.1133079301171902, + "kl_loss": 0.16742944717407227, + "loss_ib": 0.00568180438131094, + "step": 3872 + }, + { + "ce_ib": 6.747045516967773, + "ce_orig": 1.5691161155700684, + "epoch": 1.113595513696168, + "kl_loss": 0.17550061643123627, + "loss_ib": 0.0085020512342453, + "step": 3873 + }, + { + "ce_ib": 2.389115333557129, + "ce_orig": 0.5356447696685791, + "epoch": 1.113595513696168, + "kl_loss": 0.18643641471862793, + "loss_ib": 0.004253479652106762, + "step": 3873 + }, + { + "ce_ib": 1.9015858173370361, + "ce_orig": 0.4541458487510681, + "epoch": 1.113595513696168, + "kl_loss": 0.19771550595760345, + "loss_ib": 0.0038787408266216516, + "step": 3873 + }, + { + "ce_ib": 4.5514302253723145, + "ce_orig": 1.1730653047561646, + "epoch": 1.113595513696168, + "kl_loss": 0.1697148084640503, + "loss_ib": 0.006248578429222107, + "step": 3873 + }, + { + "ce_ib": 3.6439671516418457, + "ce_orig": 1.255178451538086, + "epoch": 1.1138830972751457, + "kl_loss": 0.143203467130661, + "loss_ib": 0.005076001863926649, + "step": 3874 + }, + { + "ce_ib": 5.257030487060547, + "ce_orig": 1.1265039443969727, + "epoch": 1.1138830972751457, + "kl_loss": 0.11870913207530975, + "loss_ib": 0.006444122176617384, + "step": 3874 + }, + { + "ce_ib": 2.069780111312866, + "ce_orig": 0.4995917081832886, + "epoch": 1.1138830972751457, + "kl_loss": 0.1225445494055748, + "loss_ib": 0.0032952253241091967, + "step": 3874 + }, + { + "ce_ib": 5.173306941986084, + "ce_orig": 1.4320061206817627, + "epoch": 1.1138830972751457, + "kl_loss": 0.15384334325790405, + "loss_ib": 0.0067117405124008656, + "step": 3874 + }, + { + "epoch": 1.1141706808541232, + "grad_norm": 0.16463994979858398, + "learning_rate": 7.251955142794631e-06, + "loss": 0.8626, + "step": 3875 + }, + { + "ce_ib": 1.6319760084152222, + "ce_orig": 0.3935070037841797, + "epoch": 1.1141706808541232, + "kl_loss": 0.39574316143989563, + "loss_ib": 0.005589407403022051, + "step": 3875 + }, + { + "ce_ib": 3.475254774093628, + "ce_orig": 0.6213216781616211, + "epoch": 1.1141706808541232, + "kl_loss": 0.25312745571136475, + "loss_ib": 0.006006529089063406, + "step": 3875 + }, + { + "ce_ib": 2.3736743927001953, + "ce_orig": 0.7294175028800964, + "epoch": 1.1141706808541232, + "kl_loss": 0.1734602004289627, + "loss_ib": 0.004108276218175888, + "step": 3875 + }, + { + "ce_ib": 2.7434895038604736, + "ce_orig": 0.6447830200195312, + "epoch": 1.1141706808541232, + "kl_loss": 0.21121814846992493, + "loss_ib": 0.004855670966207981, + "step": 3875 + }, + { + "ce_ib": 3.5333173274993896, + "ce_orig": 1.1239356994628906, + "epoch": 1.114458264433101, + "kl_loss": 0.17591524124145508, + "loss_ib": 0.005292469635605812, + "step": 3876 + }, + { + "ce_ib": 3.781933307647705, + "ce_orig": 0.782207190990448, + "epoch": 1.114458264433101, + "kl_loss": 0.2920035719871521, + "loss_ib": 0.006701968610286713, + "step": 3876 + }, + { + "ce_ib": 5.225003719329834, + "ce_orig": 1.0327513217926025, + "epoch": 1.114458264433101, + "kl_loss": 0.2318674921989441, + "loss_ib": 0.0075436788611114025, + "step": 3876 + }, + { + "ce_ib": 3.7124598026275635, + "ce_orig": 1.0929021835327148, + "epoch": 1.114458264433101, + "kl_loss": 0.17144924402236938, + "loss_ib": 0.005426952149719, + "step": 3876 + }, + { + "ce_ib": 3.0415735244750977, + "ce_orig": 0.633447527885437, + "epoch": 1.1147458480120784, + "kl_loss": 0.17112042009830475, + "loss_ib": 0.004752777516841888, + "step": 3877 + }, + { + "ce_ib": 3.5140788555145264, + "ce_orig": 1.0439358949661255, + "epoch": 1.1147458480120784, + "kl_loss": 0.15610216557979584, + "loss_ib": 0.005075100343674421, + "step": 3877 + }, + { + "ce_ib": 4.331675052642822, + "ce_orig": 1.155199408531189, + "epoch": 1.1147458480120784, + "kl_loss": 0.16704991459846497, + "loss_ib": 0.006002174690365791, + "step": 3877 + }, + { + "ce_ib": 5.094804763793945, + "ce_orig": 1.0795152187347412, + "epoch": 1.1147458480120784, + "kl_loss": 0.19309009611606598, + "loss_ib": 0.007025705650448799, + "step": 3877 + }, + { + "ce_ib": 2.2447149753570557, + "ce_orig": 0.7703022360801697, + "epoch": 1.1150334315910562, + "kl_loss": 0.09107295423746109, + "loss_ib": 0.0031554445158690214, + "step": 3878 + }, + { + "ce_ib": 3.629782199859619, + "ce_orig": 0.8764386177062988, + "epoch": 1.1150334315910562, + "kl_loss": 0.20791304111480713, + "loss_ib": 0.0057089123874902725, + "step": 3878 + }, + { + "ce_ib": 2.605992078781128, + "ce_orig": 0.6724361777305603, + "epoch": 1.1150334315910562, + "kl_loss": 0.14945295453071594, + "loss_ib": 0.004100521560758352, + "step": 3878 + }, + { + "ce_ib": 4.58405065536499, + "ce_orig": 1.010097861289978, + "epoch": 1.1150334315910562, + "kl_loss": 0.18701303005218506, + "loss_ib": 0.006454180926084518, + "step": 3878 + }, + { + "ce_ib": 2.218503475189209, + "ce_orig": 0.7878470420837402, + "epoch": 1.115321015170034, + "kl_loss": 0.15574103593826294, + "loss_ib": 0.0037759137339890003, + "step": 3879 + }, + { + "ce_ib": 3.456605911254883, + "ce_orig": 0.8127186298370361, + "epoch": 1.115321015170034, + "kl_loss": 0.26198428869247437, + "loss_ib": 0.006076449062675238, + "step": 3879 + }, + { + "ce_ib": 1.7101961374282837, + "ce_orig": 0.5507689714431763, + "epoch": 1.115321015170034, + "kl_loss": 0.09043443948030472, + "loss_ib": 0.0026145405136048794, + "step": 3879 + }, + { + "ce_ib": 3.660219430923462, + "ce_orig": 0.7860355973243713, + "epoch": 1.115321015170034, + "kl_loss": 0.08312384784221649, + "loss_ib": 0.004491457715630531, + "step": 3879 + }, + { + "epoch": 1.1156085987490114, + "grad_norm": 0.17993725836277008, + "learning_rate": 7.245023300945203e-06, + "loss": 0.8071, + "step": 3880 + }, + { + "ce_ib": 4.401760578155518, + "ce_orig": 1.245847225189209, + "epoch": 1.1156085987490114, + "kl_loss": 0.17772535979747772, + "loss_ib": 0.006179014220833778, + "step": 3880 + }, + { + "ce_ib": 2.631458282470703, + "ce_orig": 0.6308755874633789, + "epoch": 1.1156085987490114, + "kl_loss": 0.11084204912185669, + "loss_ib": 0.0037398787681013346, + "step": 3880 + }, + { + "ce_ib": 3.6805896759033203, + "ce_orig": 0.7153421640396118, + "epoch": 1.1156085987490114, + "kl_loss": 0.21258217096328735, + "loss_ib": 0.005806411150842905, + "step": 3880 + }, + { + "ce_ib": 5.83021354675293, + "ce_orig": 1.3203721046447754, + "epoch": 1.1156085987490114, + "kl_loss": 0.10428863763809204, + "loss_ib": 0.006873099599033594, + "step": 3880 + }, + { + "ce_ib": 3.3292734622955322, + "ce_orig": 1.0242277383804321, + "epoch": 1.1158961823279892, + "kl_loss": 0.12083891034126282, + "loss_ib": 0.004537662491202354, + "step": 3881 + }, + { + "ce_ib": 2.9344849586486816, + "ce_orig": 0.6850403547286987, + "epoch": 1.1158961823279892, + "kl_loss": 0.09541739523410797, + "loss_ib": 0.0038886587135493755, + "step": 3881 + }, + { + "ce_ib": 1.773116946220398, + "ce_orig": 0.22849693894386292, + "epoch": 1.1158961823279892, + "kl_loss": 0.31118297576904297, + "loss_ib": 0.004884946625679731, + "step": 3881 + }, + { + "ce_ib": 2.220137357711792, + "ce_orig": 0.536333441734314, + "epoch": 1.1158961823279892, + "kl_loss": 0.1234571784734726, + "loss_ib": 0.0034547089599072933, + "step": 3881 + }, + { + "ce_ib": 4.2805609703063965, + "ce_orig": 1.280390739440918, + "epoch": 1.1161837659069667, + "kl_loss": 0.24758607149124146, + "loss_ib": 0.0067564211785793304, + "step": 3882 + }, + { + "ce_ib": 3.1708714962005615, + "ce_orig": 0.7812508344650269, + "epoch": 1.1161837659069667, + "kl_loss": 0.14769543707370758, + "loss_ib": 0.0046478258445858955, + "step": 3882 + }, + { + "ce_ib": 2.461294651031494, + "ce_orig": 0.7184666395187378, + "epoch": 1.1161837659069667, + "kl_loss": 0.1887463927268982, + "loss_ib": 0.004348758142441511, + "step": 3882 + }, + { + "ce_ib": 4.958606719970703, + "ce_orig": 0.9165938496589661, + "epoch": 1.1161837659069667, + "kl_loss": 0.15570291876792908, + "loss_ib": 0.006515635643154383, + "step": 3882 + }, + { + "ce_ib": 2.269193172454834, + "ce_orig": 0.7348117828369141, + "epoch": 1.1164713494859444, + "kl_loss": 0.1318635642528534, + "loss_ib": 0.003587828716263175, + "step": 3883 + }, + { + "ce_ib": 3.205700397491455, + "ce_orig": 0.6638290286064148, + "epoch": 1.1164713494859444, + "kl_loss": 0.19374218583106995, + "loss_ib": 0.005143122747540474, + "step": 3883 + }, + { + "ce_ib": 4.690025806427002, + "ce_orig": 1.2852402925491333, + "epoch": 1.1164713494859444, + "kl_loss": 0.23404738306999207, + "loss_ib": 0.007030499633401632, + "step": 3883 + }, + { + "ce_ib": 2.5370755195617676, + "ce_orig": 0.4780946671962738, + "epoch": 1.1164713494859444, + "kl_loss": 0.10591033846139908, + "loss_ib": 0.0035961787216365337, + "step": 3883 + }, + { + "ce_ib": 2.683588981628418, + "ce_orig": 0.8480387926101685, + "epoch": 1.116758933064922, + "kl_loss": 0.17028166353702545, + "loss_ib": 0.004386405926197767, + "step": 3884 + }, + { + "ce_ib": 4.292421340942383, + "ce_orig": 1.018481731414795, + "epoch": 1.116758933064922, + "kl_loss": 0.1824861764907837, + "loss_ib": 0.006117282900959253, + "step": 3884 + }, + { + "ce_ib": 4.699019908905029, + "ce_orig": 1.162957787513733, + "epoch": 1.116758933064922, + "kl_loss": 0.19520071148872375, + "loss_ib": 0.006651027128100395, + "step": 3884 + }, + { + "ce_ib": 3.562889575958252, + "ce_orig": 0.7410970330238342, + "epoch": 1.116758933064922, + "kl_loss": 0.17849209904670715, + "loss_ib": 0.005347810685634613, + "step": 3884 + }, + { + "epoch": 1.1170465166438996, + "grad_norm": 0.15766797959804535, + "learning_rate": 7.238086050313563e-06, + "loss": 0.8736, + "step": 3885 + }, + { + "ce_ib": 3.013265371322632, + "ce_orig": 0.48758548498153687, + "epoch": 1.1170465166438996, + "kl_loss": 0.24852776527404785, + "loss_ib": 0.005498542450368404, + "step": 3885 + }, + { + "ce_ib": 2.277421712875366, + "ce_orig": 0.48484864830970764, + "epoch": 1.1170465166438996, + "kl_loss": 0.07046712934970856, + "loss_ib": 0.002982093021273613, + "step": 3885 + }, + { + "ce_ib": 2.7153983116149902, + "ce_orig": 0.8392788767814636, + "epoch": 1.1170465166438996, + "kl_loss": 0.16428446769714355, + "loss_ib": 0.004358242731541395, + "step": 3885 + }, + { + "ce_ib": 3.3509104251861572, + "ce_orig": 0.9146230220794678, + "epoch": 1.1170465166438996, + "kl_loss": 0.26156580448150635, + "loss_ib": 0.005966568365693092, + "step": 3885 + }, + { + "ce_ib": 4.20045280456543, + "ce_orig": 0.8057242035865784, + "epoch": 1.1173341002228772, + "kl_loss": 0.1779530644416809, + "loss_ib": 0.005979983601719141, + "step": 3886 + }, + { + "ce_ib": 3.2627456188201904, + "ce_orig": 0.9798346757888794, + "epoch": 1.1173341002228772, + "kl_loss": 0.15070967376232147, + "loss_ib": 0.004769842606037855, + "step": 3886 + }, + { + "ce_ib": 2.513864278793335, + "ce_orig": 0.9114674925804138, + "epoch": 1.1173341002228772, + "kl_loss": 0.14213034510612488, + "loss_ib": 0.003935167565941811, + "step": 3886 + }, + { + "ce_ib": 3.339972496032715, + "ce_orig": 0.7504948973655701, + "epoch": 1.1173341002228772, + "kl_loss": 0.14241528511047363, + "loss_ib": 0.004764125216752291, + "step": 3886 + }, + { + "ce_ib": 5.990136623382568, + "ce_orig": 1.7487746477127075, + "epoch": 1.117621683801855, + "kl_loss": 0.17478153109550476, + "loss_ib": 0.007737952284514904, + "step": 3887 + }, + { + "ce_ib": 2.667290210723877, + "ce_orig": 0.5776377320289612, + "epoch": 1.117621683801855, + "kl_loss": 0.14646652340888977, + "loss_ib": 0.004131955560296774, + "step": 3887 + }, + { + "ce_ib": 2.3606672286987305, + "ce_orig": 0.4862622916698456, + "epoch": 1.117621683801855, + "kl_loss": 0.12690052390098572, + "loss_ib": 0.0036296723410487175, + "step": 3887 + }, + { + "ce_ib": 5.491314888000488, + "ce_orig": 1.4094147682189941, + "epoch": 1.117621683801855, + "kl_loss": 0.16256660223007202, + "loss_ib": 0.007116980850696564, + "step": 3887 + }, + { + "ce_ib": 2.530158519744873, + "ce_orig": 0.8097782731056213, + "epoch": 1.1179092673808326, + "kl_loss": 0.093706414103508, + "loss_ib": 0.0034672224428504705, + "step": 3888 + }, + { + "ce_ib": 3.167825937271118, + "ce_orig": 0.6772505640983582, + "epoch": 1.1179092673808326, + "kl_loss": 0.1681949645280838, + "loss_ib": 0.004849775228649378, + "step": 3888 + }, + { + "ce_ib": 3.9380247592926025, + "ce_orig": 1.027603030204773, + "epoch": 1.1179092673808326, + "kl_loss": 0.15080267190933228, + "loss_ib": 0.005446051247417927, + "step": 3888 + }, + { + "ce_ib": 3.3003835678100586, + "ce_orig": 0.955130934715271, + "epoch": 1.1179092673808326, + "kl_loss": 0.14683237671852112, + "loss_ib": 0.004768707323819399, + "step": 3888 + }, + { + "ce_ib": 2.3723227977752686, + "ce_orig": 0.7642905712127686, + "epoch": 1.1181968509598101, + "kl_loss": 0.14976415038108826, + "loss_ib": 0.0038699645083397627, + "step": 3889 + }, + { + "ce_ib": 4.269714832305908, + "ce_orig": 1.1314191818237305, + "epoch": 1.1181968509598101, + "kl_loss": 0.32250356674194336, + "loss_ib": 0.007494750432670116, + "step": 3889 + }, + { + "ce_ib": 5.189486026763916, + "ce_orig": 0.9625890254974365, + "epoch": 1.1181968509598101, + "kl_loss": 0.16041061282157898, + "loss_ib": 0.006793591659516096, + "step": 3889 + }, + { + "ce_ib": 2.219383716583252, + "ce_orig": 0.6350316405296326, + "epoch": 1.1181968509598101, + "kl_loss": 0.11036394536495209, + "loss_ib": 0.003323023207485676, + "step": 3889 + }, + { + "epoch": 1.1184844345387879, + "grad_norm": 0.15484581887722015, + "learning_rate": 7.231143407613157e-06, + "loss": 0.8389, + "step": 3890 + }, + { + "ce_ib": 3.2436578273773193, + "ce_orig": 0.7612813115119934, + "epoch": 1.1184844345387879, + "kl_loss": 0.25478053092956543, + "loss_ib": 0.005791462957859039, + "step": 3890 + }, + { + "ce_ib": 2.874842405319214, + "ce_orig": 0.8951433897018433, + "epoch": 1.1184844345387879, + "kl_loss": 0.07759752869606018, + "loss_ib": 0.0036508177872747183, + "step": 3890 + }, + { + "ce_ib": 0.6315916776657104, + "ce_orig": 0.11798714101314545, + "epoch": 1.1184844345387879, + "kl_loss": 0.34158268570899963, + "loss_ib": 0.004047418478876352, + "step": 3890 + }, + { + "ce_ib": 2.2567312717437744, + "ce_orig": 0.623487651348114, + "epoch": 1.1184844345387879, + "kl_loss": 0.11758051812648773, + "loss_ib": 0.0034325362648814917, + "step": 3890 + }, + { + "ce_ib": 1.7147516012191772, + "ce_orig": 0.4423825144767761, + "epoch": 1.1187720181177654, + "kl_loss": 0.17650368809700012, + "loss_ib": 0.00347978831268847, + "step": 3891 + }, + { + "ce_ib": 3.493532180786133, + "ce_orig": 0.9570955634117126, + "epoch": 1.1187720181177654, + "kl_loss": 0.27393415570259094, + "loss_ib": 0.006232873536646366, + "step": 3891 + }, + { + "ce_ib": 2.3492376804351807, + "ce_orig": 0.6106420159339905, + "epoch": 1.1187720181177654, + "kl_loss": 0.18609312176704407, + "loss_ib": 0.004210168495774269, + "step": 3891 + }, + { + "ce_ib": 1.8666319847106934, + "ce_orig": 0.4828732907772064, + "epoch": 1.1187720181177654, + "kl_loss": 0.18415136635303497, + "loss_ib": 0.0037081455811858177, + "step": 3891 + }, + { + "ce_ib": 3.3831820487976074, + "ce_orig": 1.169136643409729, + "epoch": 1.1190596016967431, + "kl_loss": 0.13988563418388367, + "loss_ib": 0.004782038275152445, + "step": 3892 + }, + { + "ce_ib": 4.063355445861816, + "ce_orig": 1.1291744709014893, + "epoch": 1.1190596016967431, + "kl_loss": 0.10443004220724106, + "loss_ib": 0.005107656121253967, + "step": 3892 + }, + { + "ce_ib": 2.2799370288848877, + "ce_orig": 0.3593161106109619, + "epoch": 1.1190596016967431, + "kl_loss": 0.37808743119239807, + "loss_ib": 0.006060811225324869, + "step": 3892 + }, + { + "ce_ib": 3.530177593231201, + "ce_orig": 0.7509982585906982, + "epoch": 1.1190596016967431, + "kl_loss": 0.16648618876934052, + "loss_ib": 0.005195039324462414, + "step": 3892 + }, + { + "ce_ib": 4.740960597991943, + "ce_orig": 1.260643720626831, + "epoch": 1.1193471852757209, + "kl_loss": 0.1973036676645279, + "loss_ib": 0.006713997106999159, + "step": 3893 + }, + { + "ce_ib": 1.5704811811447144, + "ce_orig": 0.286518931388855, + "epoch": 1.1193471852757209, + "kl_loss": 0.10343386232852936, + "loss_ib": 0.0026048198342323303, + "step": 3893 + }, + { + "ce_ib": 3.9387588500976562, + "ce_orig": 1.079353928565979, + "epoch": 1.1193471852757209, + "kl_loss": 0.17580050230026245, + "loss_ib": 0.005696763750165701, + "step": 3893 + }, + { + "ce_ib": 2.8244900703430176, + "ce_orig": 0.8244945406913757, + "epoch": 1.1193471852757209, + "kl_loss": 0.09012217819690704, + "loss_ib": 0.0037257117219269276, + "step": 3893 + }, + { + "ce_ib": 1.9803965091705322, + "ce_orig": 0.5189911127090454, + "epoch": 1.1196347688546984, + "kl_loss": 0.1077803298830986, + "loss_ib": 0.0030581997707486153, + "step": 3894 + }, + { + "ce_ib": 3.1688642501831055, + "ce_orig": 0.769285261631012, + "epoch": 1.1196347688546984, + "kl_loss": 0.2080923467874527, + "loss_ib": 0.005249787587672472, + "step": 3894 + }, + { + "ce_ib": 2.4600908756256104, + "ce_orig": 0.4925612211227417, + "epoch": 1.1196347688546984, + "kl_loss": 0.28955358266830444, + "loss_ib": 0.0053556268103420734, + "step": 3894 + }, + { + "ce_ib": 2.9858851432800293, + "ce_orig": 0.610729992389679, + "epoch": 1.1196347688546984, + "kl_loss": 0.20124658942222595, + "loss_ib": 0.004998350981622934, + "step": 3894 + }, + { + "epoch": 1.119922352433676, + "grad_norm": 0.19129516184329987, + "learning_rate": 7.224195389570422e-06, + "loss": 0.8364, + "step": 3895 + }, + { + "ce_ib": 4.370297431945801, + "ce_orig": 0.586155116558075, + "epoch": 1.119922352433676, + "kl_loss": 0.17775538563728333, + "loss_ib": 0.00614785123616457, + "step": 3895 + }, + { + "ce_ib": 3.150125741958618, + "ce_orig": 0.7367236018180847, + "epoch": 1.119922352433676, + "kl_loss": 0.1262507438659668, + "loss_ib": 0.004412632901221514, + "step": 3895 + }, + { + "ce_ib": 3.89457106590271, + "ce_orig": 1.0968189239501953, + "epoch": 1.119922352433676, + "kl_loss": 0.23648306727409363, + "loss_ib": 0.006259401794523001, + "step": 3895 + }, + { + "ce_ib": 3.642401933670044, + "ce_orig": 0.21623072028160095, + "epoch": 1.119922352433676, + "kl_loss": 0.24410267174243927, + "loss_ib": 0.006083428859710693, + "step": 3895 + }, + { + "ce_ib": 3.59063458442688, + "ce_orig": 1.107078194618225, + "epoch": 1.1202099360126536, + "kl_loss": 0.18323466181755066, + "loss_ib": 0.005422981455922127, + "step": 3896 + }, + { + "ce_ib": 3.095017433166504, + "ce_orig": 0.876768946647644, + "epoch": 1.1202099360126536, + "kl_loss": 0.11287128925323486, + "loss_ib": 0.004223729949444532, + "step": 3896 + }, + { + "ce_ib": 3.293532133102417, + "ce_orig": 0.8392409682273865, + "epoch": 1.1202099360126536, + "kl_loss": 0.18679985404014587, + "loss_ib": 0.005161530803889036, + "step": 3896 + }, + { + "ce_ib": 4.899506568908691, + "ce_orig": 1.2102162837982178, + "epoch": 1.1202099360126536, + "kl_loss": 0.19834277033805847, + "loss_ib": 0.006882934365421534, + "step": 3896 + }, + { + "ce_ib": 3.7859256267547607, + "ce_orig": 1.0373055934906006, + "epoch": 1.1204975195916314, + "kl_loss": 0.1632351577281952, + "loss_ib": 0.005418277345597744, + "step": 3897 + }, + { + "ce_ib": 2.3959033489227295, + "ce_orig": 0.6584265828132629, + "epoch": 1.1204975195916314, + "kl_loss": 0.2392854243516922, + "loss_ib": 0.004788757301867008, + "step": 3897 + }, + { + "ce_ib": 4.009730815887451, + "ce_orig": 1.1667742729187012, + "epoch": 1.1204975195916314, + "kl_loss": 0.20015007257461548, + "loss_ib": 0.0060112313367426395, + "step": 3897 + }, + { + "ce_ib": 3.257117986679077, + "ce_orig": 0.9383162260055542, + "epoch": 1.1204975195916314, + "kl_loss": 0.28447771072387695, + "loss_ib": 0.006101895589381456, + "step": 3897 + }, + { + "ce_ib": 1.9151158332824707, + "ce_orig": 0.5112556219100952, + "epoch": 1.1207851031706089, + "kl_loss": 0.1195400059223175, + "loss_ib": 0.003110516117885709, + "step": 3898 + }, + { + "ce_ib": 3.2708983421325684, + "ce_orig": 0.9643608331680298, + "epoch": 1.1207851031706089, + "kl_loss": 0.13047127425670624, + "loss_ib": 0.004575611092150211, + "step": 3898 + }, + { + "ce_ib": 2.9182422161102295, + "ce_orig": 0.7364851832389832, + "epoch": 1.1207851031706089, + "kl_loss": 0.16505864262580872, + "loss_ib": 0.004568828735500574, + "step": 3898 + }, + { + "ce_ib": 2.439845561981201, + "ce_orig": 0.2931959927082062, + "epoch": 1.1207851031706089, + "kl_loss": 0.20053806900978088, + "loss_ib": 0.004445225931704044, + "step": 3898 + }, + { + "ce_ib": 2.092294692993164, + "ce_orig": 0.3912540674209595, + "epoch": 1.1210726867495866, + "kl_loss": 0.1642082929611206, + "loss_ib": 0.003734377445653081, + "step": 3899 + }, + { + "ce_ib": 5.7115797996521, + "ce_orig": 1.147602915763855, + "epoch": 1.1210726867495866, + "kl_loss": 0.18894687294960022, + "loss_ib": 0.007601048797369003, + "step": 3899 + }, + { + "ce_ib": 4.2804274559021, + "ce_orig": 1.0410466194152832, + "epoch": 1.1210726867495866, + "kl_loss": 0.16688121855258942, + "loss_ib": 0.0059492397122085094, + "step": 3899 + }, + { + "ce_ib": 6.075872421264648, + "ce_orig": 1.7072817087173462, + "epoch": 1.1210726867495866, + "kl_loss": 0.1778014898300171, + "loss_ib": 0.007853887043893337, + "step": 3899 + }, + { + "epoch": 1.1213602703285643, + "grad_norm": 0.1608976572751999, + "learning_rate": 7.217242012924747e-06, + "loss": 0.912, + "step": 3900 + }, + { + "ce_ib": 6.003255367279053, + "ce_orig": 1.6652911901474, + "epoch": 1.1213602703285643, + "kl_loss": 0.1647726595401764, + "loss_ib": 0.0076509821228682995, + "step": 3900 + }, + { + "ce_ib": 4.012498378753662, + "ce_orig": 1.116263747215271, + "epoch": 1.1213602703285643, + "kl_loss": 0.12894372642040253, + "loss_ib": 0.005301935598254204, + "step": 3900 + }, + { + "ce_ib": 4.5587687492370605, + "ce_orig": 1.1458543539047241, + "epoch": 1.1213602703285643, + "kl_loss": 0.17804837226867676, + "loss_ib": 0.006339252460747957, + "step": 3900 + }, + { + "ce_ib": 4.720522880554199, + "ce_orig": 1.0010950565338135, + "epoch": 1.1213602703285643, + "kl_loss": 0.12987029552459717, + "loss_ib": 0.006019226275384426, + "step": 3900 + }, + { + "ce_ib": 5.234792232513428, + "ce_orig": 1.500022053718567, + "epoch": 1.1216478539075418, + "kl_loss": 0.14662998914718628, + "loss_ib": 0.006701091770082712, + "step": 3901 + }, + { + "ce_ib": 4.216038227081299, + "ce_orig": 0.5958921909332275, + "epoch": 1.1216478539075418, + "kl_loss": 0.1426365226507187, + "loss_ib": 0.00564240338280797, + "step": 3901 + }, + { + "ce_ib": 3.1852164268493652, + "ce_orig": 0.8171262741088867, + "epoch": 1.1216478539075418, + "kl_loss": 0.1537611484527588, + "loss_ib": 0.004722828045487404, + "step": 3901 + }, + { + "ce_ib": 3.0118918418884277, + "ce_orig": 0.6983984708786011, + "epoch": 1.1216478539075418, + "kl_loss": 0.1728784739971161, + "loss_ib": 0.004740676376968622, + "step": 3901 + }, + { + "ce_ib": 2.407073497772217, + "ce_orig": 0.5650877952575684, + "epoch": 1.1219354374865196, + "kl_loss": 0.12503546476364136, + "loss_ib": 0.0036574278492480516, + "step": 3902 + }, + { + "ce_ib": 2.1830146312713623, + "ce_orig": 0.6039525866508484, + "epoch": 1.1219354374865196, + "kl_loss": 0.1318235546350479, + "loss_ib": 0.0035012501757591963, + "step": 3902 + }, + { + "ce_ib": 3.322162389755249, + "ce_orig": 0.9453467130661011, + "epoch": 1.1219354374865196, + "kl_loss": 0.14356866478919983, + "loss_ib": 0.004757849033921957, + "step": 3902 + }, + { + "ce_ib": 5.490909099578857, + "ce_orig": 0.9463068246841431, + "epoch": 1.1219354374865196, + "kl_loss": 0.14651434123516083, + "loss_ib": 0.006956052500754595, + "step": 3902 + }, + { + "ce_ib": 3.1304776668548584, + "ce_orig": 0.6972318887710571, + "epoch": 1.122223021065497, + "kl_loss": 0.10704004764556885, + "loss_ib": 0.004200878087431192, + "step": 3903 + }, + { + "ce_ib": 2.6002748012542725, + "ce_orig": 0.530504047870636, + "epoch": 1.122223021065497, + "kl_loss": 0.17747792601585388, + "loss_ib": 0.004375054035335779, + "step": 3903 + }, + { + "ce_ib": 1.5882253646850586, + "ce_orig": 0.3083640933036804, + "epoch": 1.122223021065497, + "kl_loss": 0.3830462098121643, + "loss_ib": 0.005418687127530575, + "step": 3903 + }, + { + "ce_ib": 3.448637008666992, + "ce_orig": 0.788278341293335, + "epoch": 1.122223021065497, + "kl_loss": 0.12937742471694946, + "loss_ib": 0.004742411430925131, + "step": 3903 + }, + { + "ce_ib": 2.1706180572509766, + "ce_orig": 0.552302360534668, + "epoch": 1.1225106046444748, + "kl_loss": 0.10356026142835617, + "loss_ib": 0.003206220455467701, + "step": 3904 + }, + { + "ce_ib": 2.4105823040008545, + "ce_orig": 0.7447928190231323, + "epoch": 1.1225106046444748, + "kl_loss": 0.18438643217086792, + "loss_ib": 0.004254446364939213, + "step": 3904 + }, + { + "ce_ib": 2.159188985824585, + "ce_orig": 0.5095764398574829, + "epoch": 1.1225106046444748, + "kl_loss": 0.2563633322715759, + "loss_ib": 0.004722821991890669, + "step": 3904 + }, + { + "ce_ib": 3.1957907676696777, + "ce_orig": 0.5339562892913818, + "epoch": 1.1225106046444748, + "kl_loss": 0.19812768697738647, + "loss_ib": 0.005177067592740059, + "step": 3904 + }, + { + "epoch": 1.1227981882234523, + "grad_norm": 0.1477867066860199, + "learning_rate": 7.210283294428429e-06, + "loss": 0.8362, + "step": 3905 + }, + { + "ce_ib": 2.615391731262207, + "ce_orig": 0.8067449331283569, + "epoch": 1.1227981882234523, + "kl_loss": 0.20647671818733215, + "loss_ib": 0.004680159036070108, + "step": 3905 + }, + { + "ce_ib": 3.182997226715088, + "ce_orig": 0.697062075138092, + "epoch": 1.1227981882234523, + "kl_loss": 0.08115282654762268, + "loss_ib": 0.003994525410234928, + "step": 3905 + }, + { + "ce_ib": 4.081483364105225, + "ce_orig": 0.9609723687171936, + "epoch": 1.1227981882234523, + "kl_loss": 0.25758910179138184, + "loss_ib": 0.006657374557107687, + "step": 3905 + }, + { + "ce_ib": 3.144002914428711, + "ce_orig": 0.6479609608650208, + "epoch": 1.1227981882234523, + "kl_loss": 0.12077635526657104, + "loss_ib": 0.004351766314357519, + "step": 3905 + }, + { + "ce_ib": 5.079277515411377, + "ce_orig": 1.172739028930664, + "epoch": 1.12308577180243, + "kl_loss": 0.13145391643047333, + "loss_ib": 0.006393816787749529, + "step": 3906 + }, + { + "ce_ib": 2.3759031295776367, + "ce_orig": 0.6115002632141113, + "epoch": 1.12308577180243, + "kl_loss": 0.14475280046463013, + "loss_ib": 0.0038234309758991003, + "step": 3906 + }, + { + "ce_ib": 4.323451042175293, + "ce_orig": 0.8149005174636841, + "epoch": 1.12308577180243, + "kl_loss": 0.26662135124206543, + "loss_ib": 0.006989664863795042, + "step": 3906 + }, + { + "ce_ib": 3.726379871368408, + "ce_orig": 0.7750810980796814, + "epoch": 1.12308577180243, + "kl_loss": 0.15313975512981415, + "loss_ib": 0.005257777404040098, + "step": 3906 + }, + { + "ce_ib": 2.0905988216400146, + "ce_orig": 0.5460132956504822, + "epoch": 1.1233733553814078, + "kl_loss": 0.12403136491775513, + "loss_ib": 0.0033309124410152435, + "step": 3907 + }, + { + "ce_ib": 7.25443696975708, + "ce_orig": 1.9998605251312256, + "epoch": 1.1233733553814078, + "kl_loss": 0.21230116486549377, + "loss_ib": 0.00937744788825512, + "step": 3907 + }, + { + "ce_ib": 4.911956310272217, + "ce_orig": 1.2196341753005981, + "epoch": 1.1233733553814078, + "kl_loss": 0.1308179497718811, + "loss_ib": 0.006220135372132063, + "step": 3907 + }, + { + "ce_ib": 4.863163471221924, + "ce_orig": 1.5051047801971436, + "epoch": 1.1233733553814078, + "kl_loss": 0.2038959115743637, + "loss_ib": 0.006902122404426336, + "step": 3907 + }, + { + "ce_ib": 2.9448440074920654, + "ce_orig": 0.785817563533783, + "epoch": 1.1236609389603853, + "kl_loss": 0.11219623684883118, + "loss_ib": 0.00406680628657341, + "step": 3908 + }, + { + "ce_ib": 3.8461294174194336, + "ce_orig": 1.0844178199768066, + "epoch": 1.1236609389603853, + "kl_loss": 0.23116135597229004, + "loss_ib": 0.006157742813229561, + "step": 3908 + }, + { + "ce_ib": 4.387016773223877, + "ce_orig": 1.0858128070831299, + "epoch": 1.1236609389603853, + "kl_loss": 0.20102205872535706, + "loss_ib": 0.0063972375355660915, + "step": 3908 + }, + { + "ce_ib": 4.557604789733887, + "ce_orig": 0.8248985409736633, + "epoch": 1.1236609389603853, + "kl_loss": 0.23677019774913788, + "loss_ib": 0.006925306748598814, + "step": 3908 + }, + { + "ce_ib": 2.1995849609375, + "ce_orig": 0.5640749931335449, + "epoch": 1.123948522539363, + "kl_loss": 0.13117918372154236, + "loss_ib": 0.0035113769117742777, + "step": 3909 + }, + { + "ce_ib": 2.3246381282806396, + "ce_orig": 0.690838634967804, + "epoch": 1.123948522539363, + "kl_loss": 0.09453210979700089, + "loss_ib": 0.0032699592411518097, + "step": 3909 + }, + { + "ce_ib": 6.3327741622924805, + "ce_orig": 1.4982962608337402, + "epoch": 1.123948522539363, + "kl_loss": 0.14325785636901855, + "loss_ib": 0.007765352725982666, + "step": 3909 + }, + { + "ce_ib": 3.4240047931671143, + "ce_orig": 0.7149084806442261, + "epoch": 1.123948522539363, + "kl_loss": 0.22681814432144165, + "loss_ib": 0.005692186299711466, + "step": 3909 + }, + { + "epoch": 1.1242361061183406, + "grad_norm": 0.14144718647003174, + "learning_rate": 7.2033192508466385e-06, + "loss": 0.8509, + "step": 3910 + }, + { + "ce_ib": 3.021047592163086, + "ce_orig": 0.7998876571655273, + "epoch": 1.1242361061183406, + "kl_loss": 0.19146913290023804, + "loss_ib": 0.00493573909625411, + "step": 3910 + }, + { + "ce_ib": 1.9588532447814941, + "ce_orig": 0.5959948897361755, + "epoch": 1.1242361061183406, + "kl_loss": 0.12732572853565216, + "loss_ib": 0.003232110757380724, + "step": 3910 + }, + { + "ce_ib": 4.909201145172119, + "ce_orig": 1.1666345596313477, + "epoch": 1.1242361061183406, + "kl_loss": 0.14589916169643402, + "loss_ib": 0.006368192844092846, + "step": 3910 + }, + { + "ce_ib": 4.567474365234375, + "ce_orig": 1.03497314453125, + "epoch": 1.1242361061183406, + "kl_loss": 0.26364365220069885, + "loss_ib": 0.00720391096547246, + "step": 3910 + }, + { + "ce_ib": 2.399650812149048, + "ce_orig": 0.7017228603363037, + "epoch": 1.1245236896973183, + "kl_loss": 0.11320770531892776, + "loss_ib": 0.003531727707013488, + "step": 3911 + }, + { + "ce_ib": 2.6841084957122803, + "ce_orig": 0.6782998442649841, + "epoch": 1.1245236896973183, + "kl_loss": 0.1637490838766098, + "loss_ib": 0.004321599379181862, + "step": 3911 + }, + { + "ce_ib": 3.4873926639556885, + "ce_orig": 0.8773142099380493, + "epoch": 1.1245236896973183, + "kl_loss": 0.19231098890304565, + "loss_ib": 0.005410502664744854, + "step": 3911 + }, + { + "ce_ib": 2.050614356994629, + "ce_orig": 0.6871976256370544, + "epoch": 1.1245236896973183, + "kl_loss": 0.16276763379573822, + "loss_ib": 0.0036782908719033003, + "step": 3911 + }, + { + "ce_ib": 2.2253105640411377, + "ce_orig": 0.5046465992927551, + "epoch": 1.1248112732762958, + "kl_loss": 0.16757485270500183, + "loss_ib": 0.003901059040799737, + "step": 3912 + }, + { + "ce_ib": 6.236142158508301, + "ce_orig": 1.717012882232666, + "epoch": 1.1248112732762958, + "kl_loss": 0.18528765439987183, + "loss_ib": 0.008089018985629082, + "step": 3912 + }, + { + "ce_ib": 4.981034278869629, + "ce_orig": 1.0142427682876587, + "epoch": 1.1248112732762958, + "kl_loss": 0.24799764156341553, + "loss_ib": 0.007461010478436947, + "step": 3912 + }, + { + "ce_ib": 2.999871253967285, + "ce_orig": 0.652468740940094, + "epoch": 1.1248112732762958, + "kl_loss": 0.1751008927822113, + "loss_ib": 0.004750880412757397, + "step": 3912 + }, + { + "ce_ib": 3.6516494750976562, + "ce_orig": 0.7454939484596252, + "epoch": 1.1250988568552736, + "kl_loss": 0.217171311378479, + "loss_ib": 0.005823363084346056, + "step": 3913 + }, + { + "ce_ib": 3.5868747234344482, + "ce_orig": 0.9575347900390625, + "epoch": 1.1250988568552736, + "kl_loss": 0.18043753504753113, + "loss_ib": 0.005391250364482403, + "step": 3913 + }, + { + "ce_ib": 3.8754348754882812, + "ce_orig": 0.6058643460273743, + "epoch": 1.1250988568552736, + "kl_loss": 0.2735583782196045, + "loss_ib": 0.006611018441617489, + "step": 3913 + }, + { + "ce_ib": 5.431478023529053, + "ce_orig": 1.3114943504333496, + "epoch": 1.1250988568552736, + "kl_loss": 0.23468533158302307, + "loss_ib": 0.007778330706059933, + "step": 3913 + }, + { + "ce_ib": 1.2224266529083252, + "ce_orig": 0.4762253165245056, + "epoch": 1.1253864404342513, + "kl_loss": 0.08329948782920837, + "loss_ib": 0.002055421471595764, + "step": 3914 + }, + { + "ce_ib": 2.347153663635254, + "ce_orig": 0.4576431214809418, + "epoch": 1.1253864404342513, + "kl_loss": 0.20133672654628754, + "loss_ib": 0.004360520746558905, + "step": 3914 + }, + { + "ce_ib": 4.034048557281494, + "ce_orig": 1.2347776889801025, + "epoch": 1.1253864404342513, + "kl_loss": 0.2115517407655716, + "loss_ib": 0.006149566266685724, + "step": 3914 + }, + { + "ce_ib": 7.305660247802734, + "ce_orig": 1.4728444814682007, + "epoch": 1.1253864404342513, + "kl_loss": 0.13197582960128784, + "loss_ib": 0.008625417947769165, + "step": 3914 + }, + { + "epoch": 1.1256740240132288, + "grad_norm": 0.16155408322811127, + "learning_rate": 7.196349898957371e-06, + "loss": 0.8191, + "step": 3915 + }, + { + "ce_ib": 4.577680587768555, + "ce_orig": 1.2100872993469238, + "epoch": 1.1256740240132288, + "kl_loss": 0.16353589296340942, + "loss_ib": 0.00621303915977478, + "step": 3915 + }, + { + "ce_ib": 2.179487705230713, + "ce_orig": 0.502495527267456, + "epoch": 1.1256740240132288, + "kl_loss": 0.12903475761413574, + "loss_ib": 0.003469835501164198, + "step": 3915 + }, + { + "ce_ib": 2.9935150146484375, + "ce_orig": 0.5557957887649536, + "epoch": 1.1256740240132288, + "kl_loss": 0.19124148786067963, + "loss_ib": 0.004905930254608393, + "step": 3915 + }, + { + "ce_ib": 7.290437698364258, + "ce_orig": 2.01178240776062, + "epoch": 1.1256740240132288, + "kl_loss": 0.20778091251850128, + "loss_ib": 0.009368246421217918, + "step": 3915 + }, + { + "ce_ib": 3.740522861480713, + "ce_orig": 0.9745127558708191, + "epoch": 1.1259616075922065, + "kl_loss": 0.11577711999416351, + "loss_ib": 0.004898293875157833, + "step": 3916 + }, + { + "ce_ib": 2.2861335277557373, + "ce_orig": 0.5738935470581055, + "epoch": 1.1259616075922065, + "kl_loss": 0.16964969038963318, + "loss_ib": 0.003982630092650652, + "step": 3916 + }, + { + "ce_ib": 3.480639934539795, + "ce_orig": 1.0264935493469238, + "epoch": 1.1259616075922065, + "kl_loss": 0.14730088412761688, + "loss_ib": 0.004953648895025253, + "step": 3916 + }, + { + "ce_ib": 4.523638725280762, + "ce_orig": 1.2177841663360596, + "epoch": 1.1259616075922065, + "kl_loss": 0.17182397842407227, + "loss_ib": 0.006241878028959036, + "step": 3916 + }, + { + "ce_ib": 2.168255567550659, + "ce_orig": 0.4829767048358917, + "epoch": 1.126249191171184, + "kl_loss": 0.1713561713695526, + "loss_ib": 0.003881817450746894, + "step": 3917 + }, + { + "ce_ib": 3.0349035263061523, + "ce_orig": 0.8308660387992859, + "epoch": 1.126249191171184, + "kl_loss": 0.1407392919063568, + "loss_ib": 0.004442296456545591, + "step": 3917 + }, + { + "ce_ib": 6.100493431091309, + "ce_orig": 1.7410895824432373, + "epoch": 1.126249191171184, + "kl_loss": 0.20683500170707703, + "loss_ib": 0.008168843574821949, + "step": 3917 + }, + { + "ce_ib": 2.2865724563598633, + "ce_orig": 0.6834183931350708, + "epoch": 1.126249191171184, + "kl_loss": 0.13526302576065063, + "loss_ib": 0.0036392025649547577, + "step": 3917 + }, + { + "ce_ib": 4.78338098526001, + "ce_orig": 1.2571080923080444, + "epoch": 1.1265367747501618, + "kl_loss": 0.19595828652381897, + "loss_ib": 0.006742963567376137, + "step": 3918 + }, + { + "ce_ib": 3.7328648567199707, + "ce_orig": 1.063446283340454, + "epoch": 1.1265367747501618, + "kl_loss": 0.1327057182788849, + "loss_ib": 0.005059921648353338, + "step": 3918 + }, + { + "ce_ib": 2.7721338272094727, + "ce_orig": 0.698428750038147, + "epoch": 1.1265367747501618, + "kl_loss": 0.10215036571025848, + "loss_ib": 0.0037936377339065075, + "step": 3918 + }, + { + "ce_ib": 2.301809549331665, + "ce_orig": 0.5940304398536682, + "epoch": 1.1265367747501618, + "kl_loss": 0.18677254021167755, + "loss_ib": 0.004169534891843796, + "step": 3918 + }, + { + "ce_ib": 2.342808723449707, + "ce_orig": 0.5659165978431702, + "epoch": 1.1268243583291393, + "kl_loss": 0.19828486442565918, + "loss_ib": 0.004325657617300749, + "step": 3919 + }, + { + "ce_ib": 4.9279255867004395, + "ce_orig": 1.3636478185653687, + "epoch": 1.1268243583291393, + "kl_loss": 0.16542623937129974, + "loss_ib": 0.006582187954336405, + "step": 3919 + }, + { + "ce_ib": 3.356123685836792, + "ce_orig": 0.6430820226669312, + "epoch": 1.1268243583291393, + "kl_loss": 0.13441336154937744, + "loss_ib": 0.004700257442891598, + "step": 3919 + }, + { + "ce_ib": 3.026027202606201, + "ce_orig": 0.965754508972168, + "epoch": 1.1268243583291393, + "kl_loss": 0.10625293850898743, + "loss_ib": 0.004088556859642267, + "step": 3919 + }, + { + "epoch": 1.127111941908117, + "grad_norm": 0.15482580661773682, + "learning_rate": 7.189375255551413e-06, + "loss": 0.8914, + "step": 3920 + }, + { + "ce_ib": 2.271801710128784, + "ce_orig": 0.7534090876579285, + "epoch": 1.127111941908117, + "kl_loss": 0.1597038209438324, + "loss_ib": 0.003868839703500271, + "step": 3920 + }, + { + "ce_ib": 2.0981929302215576, + "ce_orig": 0.46718689799308777, + "epoch": 1.127111941908117, + "kl_loss": 0.13452588021755219, + "loss_ib": 0.0034434518311172724, + "step": 3920 + }, + { + "ce_ib": 3.3303678035736084, + "ce_orig": 0.7099117636680603, + "epoch": 1.127111941908117, + "kl_loss": 0.16214688122272491, + "loss_ib": 0.004951836541295052, + "step": 3920 + }, + { + "ce_ib": 6.147886276245117, + "ce_orig": 1.8196073770523071, + "epoch": 1.127111941908117, + "kl_loss": 0.15354053676128387, + "loss_ib": 0.007683292031288147, + "step": 3920 + }, + { + "ce_ib": 5.3269453048706055, + "ce_orig": 1.7425971031188965, + "epoch": 1.1273995254870948, + "kl_loss": 0.18375681340694427, + "loss_ib": 0.007164513226598501, + "step": 3921 + }, + { + "ce_ib": 4.496922492980957, + "ce_orig": 0.6614744663238525, + "epoch": 1.1273995254870948, + "kl_loss": 0.16038435697555542, + "loss_ib": 0.006100765895098448, + "step": 3921 + }, + { + "ce_ib": 4.1834516525268555, + "ce_orig": 0.9342001676559448, + "epoch": 1.1273995254870948, + "kl_loss": 0.17671358585357666, + "loss_ib": 0.005950587801635265, + "step": 3921 + }, + { + "ce_ib": 5.017003059387207, + "ce_orig": 1.2815635204315186, + "epoch": 1.1273995254870948, + "kl_loss": 0.1634865403175354, + "loss_ib": 0.00665186857804656, + "step": 3921 + }, + { + "ce_ib": 3.8450989723205566, + "ce_orig": 1.0091668367385864, + "epoch": 1.1276871090660723, + "kl_loss": 0.1326412707567215, + "loss_ib": 0.00517151178792119, + "step": 3922 + }, + { + "ce_ib": 3.846137523651123, + "ce_orig": 1.083936095237732, + "epoch": 1.1276871090660723, + "kl_loss": 0.16204410791397095, + "loss_ib": 0.0054665785282850266, + "step": 3922 + }, + { + "ce_ib": 3.685983896255493, + "ce_orig": 1.155444860458374, + "epoch": 1.1276871090660723, + "kl_loss": 0.14495787024497986, + "loss_ib": 0.0051355622708797455, + "step": 3922 + }, + { + "ce_ib": 5.162167072296143, + "ce_orig": 0.790565013885498, + "epoch": 1.1276871090660723, + "kl_loss": 0.21270909905433655, + "loss_ib": 0.007289258297532797, + "step": 3922 + }, + { + "ce_ib": 4.7928242683410645, + "ce_orig": 0.892193615436554, + "epoch": 1.12797469264505, + "kl_loss": 0.3416907787322998, + "loss_ib": 0.008209732361137867, + "step": 3923 + }, + { + "ce_ib": 3.517202377319336, + "ce_orig": 0.5857639908790588, + "epoch": 1.12797469264505, + "kl_loss": 0.18970339000225067, + "loss_ib": 0.0054142358712852, + "step": 3923 + }, + { + "ce_ib": 1.7249410152435303, + "ce_orig": 0.4902622699737549, + "epoch": 1.12797469264505, + "kl_loss": 0.15257146954536438, + "loss_ib": 0.003250655485317111, + "step": 3923 + }, + { + "ce_ib": 4.9885640144348145, + "ce_orig": 0.9747017621994019, + "epoch": 1.12797469264505, + "kl_loss": 0.17914237082004547, + "loss_ib": 0.006779987830668688, + "step": 3923 + }, + { + "ce_ib": 3.016972064971924, + "ce_orig": 0.4688359498977661, + "epoch": 1.1282622762240275, + "kl_loss": 0.2227034866809845, + "loss_ib": 0.005244006868451834, + "step": 3924 + }, + { + "ce_ib": 5.2412872314453125, + "ce_orig": 1.3346974849700928, + "epoch": 1.1282622762240275, + "kl_loss": 0.13742589950561523, + "loss_ib": 0.006615546066313982, + "step": 3924 + }, + { + "ce_ib": 4.233577728271484, + "ce_orig": 0.8743234276771545, + "epoch": 1.1282622762240275, + "kl_loss": 0.12143655866384506, + "loss_ib": 0.005447943229228258, + "step": 3924 + }, + { + "ce_ib": 5.654252052307129, + "ce_orig": 1.4633684158325195, + "epoch": 1.1282622762240275, + "kl_loss": 0.18312212824821472, + "loss_ib": 0.007485473062843084, + "step": 3924 + }, + { + "epoch": 1.1285498598030053, + "grad_norm": 0.16471154987812042, + "learning_rate": 7.1823953374323e-06, + "loss": 0.9497, + "step": 3925 + }, + { + "ce_ib": 3.5644218921661377, + "ce_orig": 0.7493558526039124, + "epoch": 1.1285498598030053, + "kl_loss": 0.13890311121940613, + "loss_ib": 0.004953452851623297, + "step": 3925 + }, + { + "ce_ib": 2.628946304321289, + "ce_orig": 0.5963564515113831, + "epoch": 1.1285498598030053, + "kl_loss": 0.13148090243339539, + "loss_ib": 0.00394375529140234, + "step": 3925 + }, + { + "ce_ib": 5.828822612762451, + "ce_orig": 1.4683473110198975, + "epoch": 1.1285498598030053, + "kl_loss": 0.19797445833683014, + "loss_ib": 0.007808567024767399, + "step": 3925 + }, + { + "ce_ib": 4.494402885437012, + "ce_orig": 0.9711695909500122, + "epoch": 1.1285498598030053, + "kl_loss": 0.23408962786197662, + "loss_ib": 0.006835299078375101, + "step": 3925 + }, + { + "ce_ib": 2.2560901641845703, + "ce_orig": 0.6621701717376709, + "epoch": 1.128837443381983, + "kl_loss": 0.13508649170398712, + "loss_ib": 0.0036069550551474094, + "step": 3926 + }, + { + "ce_ib": 4.576690673828125, + "ce_orig": 1.3473657369613647, + "epoch": 1.128837443381983, + "kl_loss": 0.143002450466156, + "loss_ib": 0.006006715353578329, + "step": 3926 + }, + { + "ce_ib": 4.686463832855225, + "ce_orig": 1.0733802318572998, + "epoch": 1.128837443381983, + "kl_loss": 0.1657063066959381, + "loss_ib": 0.006343526765704155, + "step": 3926 + }, + { + "ce_ib": 2.4780328273773193, + "ce_orig": 0.4085329473018646, + "epoch": 1.128837443381983, + "kl_loss": 0.21057181060314178, + "loss_ib": 0.004583750851452351, + "step": 3926 + }, + { + "ce_ib": 4.257236003875732, + "ce_orig": 1.380908727645874, + "epoch": 1.1291250269609605, + "kl_loss": 0.17949701845645905, + "loss_ib": 0.006052205804735422, + "step": 3927 + }, + { + "ce_ib": 2.433281898498535, + "ce_orig": 0.5757558345794678, + "epoch": 1.1291250269609605, + "kl_loss": 0.11554750055074692, + "loss_ib": 0.0035887567792087793, + "step": 3927 + }, + { + "ce_ib": 2.8225862979888916, + "ce_orig": 0.7966843843460083, + "epoch": 1.1291250269609605, + "kl_loss": 0.14049221575260162, + "loss_ib": 0.004227508325129747, + "step": 3927 + }, + { + "ce_ib": 3.6341283321380615, + "ce_orig": 1.0305780172348022, + "epoch": 1.1291250269609605, + "kl_loss": 0.14135198295116425, + "loss_ib": 0.005047647748142481, + "step": 3927 + }, + { + "ce_ib": 5.75945520401001, + "ce_orig": 1.1906988620758057, + "epoch": 1.1294126105399382, + "kl_loss": 0.24725589156150818, + "loss_ib": 0.008232014253735542, + "step": 3928 + }, + { + "ce_ib": 2.0522444248199463, + "ce_orig": 0.5964614748954773, + "epoch": 1.1294126105399382, + "kl_loss": 0.220136821269989, + "loss_ib": 0.004253612831234932, + "step": 3928 + }, + { + "ce_ib": 2.6558845043182373, + "ce_orig": 0.704624593257904, + "epoch": 1.1294126105399382, + "kl_loss": 0.18420419096946716, + "loss_ib": 0.004497926216572523, + "step": 3928 + }, + { + "ce_ib": 2.30659818649292, + "ce_orig": 0.679129421710968, + "epoch": 1.1294126105399382, + "kl_loss": 0.09467032551765442, + "loss_ib": 0.0032533013727515936, + "step": 3928 + }, + { + "ce_ib": 2.5816357135772705, + "ce_orig": 0.6062504053115845, + "epoch": 1.1297001941189158, + "kl_loss": 0.2999575138092041, + "loss_ib": 0.00558121083304286, + "step": 3929 + }, + { + "ce_ib": 3.4624359607696533, + "ce_orig": 0.7636180520057678, + "epoch": 1.1297001941189158, + "kl_loss": 0.21250425279140472, + "loss_ib": 0.005587478168308735, + "step": 3929 + }, + { + "ce_ib": 2.920445680618286, + "ce_orig": 0.5624911189079285, + "epoch": 1.1297001941189158, + "kl_loss": 0.24739287793636322, + "loss_ib": 0.00539437448605895, + "step": 3929 + }, + { + "ce_ib": 3.001713752746582, + "ce_orig": 0.8129487633705139, + "epoch": 1.1297001941189158, + "kl_loss": 0.22793027758598328, + "loss_ib": 0.005281016696244478, + "step": 3929 + }, + { + "epoch": 1.1299877776978935, + "grad_norm": 0.1501062959432602, + "learning_rate": 7.175410161416274e-06, + "loss": 0.8846, + "step": 3930 + }, + { + "ce_ib": 2.5575110912323, + "ce_orig": 0.47773921489715576, + "epoch": 1.1299877776978935, + "kl_loss": 0.13941392302513123, + "loss_ib": 0.003951650112867355, + "step": 3930 + }, + { + "ce_ib": 1.8114763498306274, + "ce_orig": 0.466582328081131, + "epoch": 1.1299877776978935, + "kl_loss": 0.23567518591880798, + "loss_ib": 0.00416822824627161, + "step": 3930 + }, + { + "ce_ib": 4.968475341796875, + "ce_orig": 1.3317527770996094, + "epoch": 1.1299877776978935, + "kl_loss": 0.18577146530151367, + "loss_ib": 0.006826189812272787, + "step": 3930 + }, + { + "ce_ib": 3.7395312786102295, + "ce_orig": 0.6355983018875122, + "epoch": 1.1299877776978935, + "kl_loss": 0.21227850019931793, + "loss_ib": 0.005862316582351923, + "step": 3930 + }, + { + "ce_ib": 3.089456558227539, + "ce_orig": 0.5560135841369629, + "epoch": 1.130275361276871, + "kl_loss": 0.2021721601486206, + "loss_ib": 0.005111177917569876, + "step": 3931 + }, + { + "ce_ib": 3.340226173400879, + "ce_orig": 0.9181346297264099, + "epoch": 1.130275361276871, + "kl_loss": 0.21557824313640594, + "loss_ib": 0.005496008321642876, + "step": 3931 + }, + { + "ce_ib": 2.4335012435913086, + "ce_orig": 0.3617241382598877, + "epoch": 1.130275361276871, + "kl_loss": 0.14242148399353027, + "loss_ib": 0.0038577162194997072, + "step": 3931 + }, + { + "ce_ib": 6.180643558502197, + "ce_orig": 1.535918951034546, + "epoch": 1.130275361276871, + "kl_loss": 0.20237436890602112, + "loss_ib": 0.008204387500882149, + "step": 3931 + }, + { + "ce_ib": 3.8368656635284424, + "ce_orig": 1.1630107164382935, + "epoch": 1.1305629448558487, + "kl_loss": 0.1132858619093895, + "loss_ib": 0.004969724453985691, + "step": 3932 + }, + { + "ce_ib": 2.5335965156555176, + "ce_orig": 0.5732558965682983, + "epoch": 1.1305629448558487, + "kl_loss": 0.2692064046859741, + "loss_ib": 0.005225660745054483, + "step": 3932 + }, + { + "ce_ib": 4.882571220397949, + "ce_orig": 0.7930552959442139, + "epoch": 1.1305629448558487, + "kl_loss": 0.25022488832473755, + "loss_ib": 0.007384819909930229, + "step": 3932 + }, + { + "ce_ib": 4.906162261962891, + "ce_orig": 1.4532570838928223, + "epoch": 1.1305629448558487, + "kl_loss": 0.14860224723815918, + "loss_ib": 0.006392184179276228, + "step": 3932 + }, + { + "ce_ib": 2.623049736022949, + "ce_orig": 0.6643214225769043, + "epoch": 1.1308505284348263, + "kl_loss": 0.13647249341011047, + "loss_ib": 0.003987774718552828, + "step": 3933 + }, + { + "ce_ib": 5.599141597747803, + "ce_orig": 0.9283562898635864, + "epoch": 1.1308505284348263, + "kl_loss": 0.16410042345523834, + "loss_ib": 0.007240145467221737, + "step": 3933 + }, + { + "ce_ib": 3.3963823318481445, + "ce_orig": 0.8568121790885925, + "epoch": 1.1308505284348263, + "kl_loss": 0.12884627282619476, + "loss_ib": 0.004684844985604286, + "step": 3933 + }, + { + "ce_ib": 4.217965126037598, + "ce_orig": 1.2743847370147705, + "epoch": 1.1308505284348263, + "kl_loss": 0.16369104385375977, + "loss_ib": 0.005854875780642033, + "step": 3933 + }, + { + "ce_ib": 2.4897263050079346, + "ce_orig": 0.7656515836715698, + "epoch": 1.131138112013804, + "kl_loss": 0.130302295088768, + "loss_ib": 0.0037927492521703243, + "step": 3934 + }, + { + "ce_ib": 4.1145429611206055, + "ce_orig": 0.9169427752494812, + "epoch": 1.131138112013804, + "kl_loss": 0.3119185268878937, + "loss_ib": 0.007233728189021349, + "step": 3934 + }, + { + "ce_ib": 1.6924455165863037, + "ce_orig": 0.5501957535743713, + "epoch": 1.131138112013804, + "kl_loss": 0.15759167075157166, + "loss_ib": 0.003268362255766988, + "step": 3934 + }, + { + "ce_ib": 3.5841739177703857, + "ce_orig": 0.8761894106864929, + "epoch": 1.131138112013804, + "kl_loss": 0.17440438270568848, + "loss_ib": 0.005328217521309853, + "step": 3934 + }, + { + "epoch": 1.1314256955927817, + "grad_norm": 0.16646841168403625, + "learning_rate": 7.168419744332246e-06, + "loss": 0.8665, + "step": 3935 + }, + { + "ce_ib": 2.1544506549835205, + "ce_orig": 0.4968049228191376, + "epoch": 1.1314256955927817, + "kl_loss": 0.16783252358436584, + "loss_ib": 0.003832775866612792, + "step": 3935 + }, + { + "ce_ib": 4.0544753074646, + "ce_orig": 0.9855047464370728, + "epoch": 1.1314256955927817, + "kl_loss": 0.1476750522851944, + "loss_ib": 0.005531225819140673, + "step": 3935 + }, + { + "ce_ib": 1.8294322490692139, + "ce_orig": 0.3518667221069336, + "epoch": 1.1314256955927817, + "kl_loss": 0.2880507707595825, + "loss_ib": 0.004709939938038588, + "step": 3935 + }, + { + "ce_ib": 3.8163204193115234, + "ce_orig": 1.0863152742385864, + "epoch": 1.1314256955927817, + "kl_loss": 0.1519961655139923, + "loss_ib": 0.005336282309144735, + "step": 3935 + }, + { + "ce_ib": 3.368488073348999, + "ce_orig": 1.0426287651062012, + "epoch": 1.1317132791717592, + "kl_loss": 0.12906566262245178, + "loss_ib": 0.004659144673496485, + "step": 3936 + }, + { + "ce_ib": 3.789778232574463, + "ce_orig": 0.7834030985832214, + "epoch": 1.1317132791717592, + "kl_loss": 0.16957668960094452, + "loss_ib": 0.005485545378178358, + "step": 3936 + }, + { + "ce_ib": 2.287806272506714, + "ce_orig": 0.499049574136734, + "epoch": 1.1317132791717592, + "kl_loss": 0.138102188706398, + "loss_ib": 0.003668827936053276, + "step": 3936 + }, + { + "ce_ib": 2.4008750915527344, + "ce_orig": 0.5741463899612427, + "epoch": 1.1317132791717592, + "kl_loss": 0.16980230808258057, + "loss_ib": 0.0040988982655107975, + "step": 3936 + }, + { + "ce_ib": 4.151252746582031, + "ce_orig": 1.3616970777511597, + "epoch": 1.132000862750737, + "kl_loss": 0.15465188026428223, + "loss_ib": 0.0056977709755301476, + "step": 3937 + }, + { + "ce_ib": 3.0266852378845215, + "ce_orig": 0.9588255286216736, + "epoch": 1.132000862750737, + "kl_loss": 0.18350031971931458, + "loss_ib": 0.004861688707023859, + "step": 3937 + }, + { + "ce_ib": 3.8832783699035645, + "ce_orig": 0.8390873670578003, + "epoch": 1.132000862750737, + "kl_loss": 0.12103740870952606, + "loss_ib": 0.005093652289360762, + "step": 3937 + }, + { + "ce_ib": 4.307651996612549, + "ce_orig": 1.2005656957626343, + "epoch": 1.132000862750737, + "kl_loss": 0.2036404311656952, + "loss_ib": 0.0063440557569265366, + "step": 3937 + }, + { + "ce_ib": 3.9671132564544678, + "ce_orig": 1.0011167526245117, + "epoch": 1.1322884463297145, + "kl_loss": 0.1887717843055725, + "loss_ib": 0.005854830611497164, + "step": 3938 + }, + { + "ce_ib": 4.3219499588012695, + "ce_orig": 0.9255284070968628, + "epoch": 1.1322884463297145, + "kl_loss": 0.16396217048168182, + "loss_ib": 0.00596157182008028, + "step": 3938 + }, + { + "ce_ib": 1.4166982173919678, + "ce_orig": 0.5473747849464417, + "epoch": 1.1322884463297145, + "kl_loss": 0.10804274678230286, + "loss_ib": 0.0024971256498247385, + "step": 3938 + }, + { + "ce_ib": 2.172814130783081, + "ce_orig": 0.5025962591171265, + "epoch": 1.1322884463297145, + "kl_loss": 0.5375306606292725, + "loss_ib": 0.007548120338469744, + "step": 3938 + }, + { + "ce_ib": 3.0946600437164307, + "ce_orig": 0.7585675120353699, + "epoch": 1.1325760299086922, + "kl_loss": 0.23077255487442017, + "loss_ib": 0.0054023852571845055, + "step": 3939 + }, + { + "ce_ib": 5.094891548156738, + "ce_orig": 1.270599126815796, + "epoch": 1.1325760299086922, + "kl_loss": 0.16909882426261902, + "loss_ib": 0.00678588030859828, + "step": 3939 + }, + { + "ce_ib": 3.951557159423828, + "ce_orig": 0.8357471823692322, + "epoch": 1.1325760299086922, + "kl_loss": 0.1964915245771408, + "loss_ib": 0.005916472524404526, + "step": 3939 + }, + { + "ce_ib": 4.877115726470947, + "ce_orig": 0.9461879134178162, + "epoch": 1.1325760299086922, + "kl_loss": 0.2542746663093567, + "loss_ib": 0.007419862784445286, + "step": 3939 + }, + { + "epoch": 1.13286361348767, + "grad_norm": 0.15561792254447937, + "learning_rate": 7.161424103021752e-06, + "loss": 0.8637, + "step": 3940 + }, + { + "ce_ib": 2.084304094314575, + "ce_orig": 0.5054313540458679, + "epoch": 1.13286361348767, + "kl_loss": 0.09016568958759308, + "loss_ib": 0.002985960803925991, + "step": 3940 + }, + { + "ce_ib": 2.8581795692443848, + "ce_orig": 0.7017081379890442, + "epoch": 1.13286361348767, + "kl_loss": 0.18873372673988342, + "loss_ib": 0.004745516460388899, + "step": 3940 + }, + { + "ce_ib": 4.840374946594238, + "ce_orig": 1.2606923580169678, + "epoch": 1.13286361348767, + "kl_loss": 0.1658419817686081, + "loss_ib": 0.006498794537037611, + "step": 3940 + }, + { + "ce_ib": 3.55851149559021, + "ce_orig": 0.8815019726753235, + "epoch": 1.13286361348767, + "kl_loss": 0.16218800842761993, + "loss_ib": 0.0051803914830088615, + "step": 3940 + }, + { + "ce_ib": 3.3232576847076416, + "ce_orig": 0.6078843474388123, + "epoch": 1.1331511970666475, + "kl_loss": 0.19999879598617554, + "loss_ib": 0.0053232451900839806, + "step": 3941 + }, + { + "ce_ib": 1.4864097833633423, + "ce_orig": 0.41929230093955994, + "epoch": 1.1331511970666475, + "kl_loss": 0.11717110127210617, + "loss_ib": 0.0026581205893307924, + "step": 3941 + }, + { + "ce_ib": 4.189865589141846, + "ce_orig": 1.3383543491363525, + "epoch": 1.1331511970666475, + "kl_loss": 0.2844778895378113, + "loss_ib": 0.0070346444845199585, + "step": 3941 + }, + { + "ce_ib": 4.204874038696289, + "ce_orig": 1.084024429321289, + "epoch": 1.1331511970666475, + "kl_loss": 0.14714480936527252, + "loss_ib": 0.0056763216853141785, + "step": 3941 + }, + { + "ce_ib": 3.8630144596099854, + "ce_orig": 1.075202465057373, + "epoch": 1.1334387806456252, + "kl_loss": 0.21860390901565552, + "loss_ib": 0.006049053743481636, + "step": 3942 + }, + { + "ce_ib": 3.0353047847747803, + "ce_orig": 0.8129207491874695, + "epoch": 1.1334387806456252, + "kl_loss": 0.14835971593856812, + "loss_ib": 0.0045189019292593, + "step": 3942 + }, + { + "ce_ib": 4.968287944793701, + "ce_orig": 1.2062890529632568, + "epoch": 1.1334387806456252, + "kl_loss": 0.21913135051727295, + "loss_ib": 0.007159601431339979, + "step": 3942 + }, + { + "ce_ib": 2.2414791584014893, + "ce_orig": 0.383223295211792, + "epoch": 1.1334387806456252, + "kl_loss": 0.22503218054771423, + "loss_ib": 0.004491800907999277, + "step": 3942 + }, + { + "ce_ib": 8.76853084564209, + "ce_orig": 1.6814963817596436, + "epoch": 1.1337263642246027, + "kl_loss": 0.21383796632289886, + "loss_ib": 0.01090691052377224, + "step": 3943 + }, + { + "ce_ib": 2.653535842895508, + "ce_orig": 0.3233628571033478, + "epoch": 1.1337263642246027, + "kl_loss": 0.1711069941520691, + "loss_ib": 0.004364605527371168, + "step": 3943 + }, + { + "ce_ib": 4.965793132781982, + "ce_orig": 1.0082075595855713, + "epoch": 1.1337263642246027, + "kl_loss": 0.11084896326065063, + "loss_ib": 0.006074282806366682, + "step": 3943 + }, + { + "ce_ib": 1.2546167373657227, + "ce_orig": 0.3332562744617462, + "epoch": 1.1337263642246027, + "kl_loss": 0.39717161655426025, + "loss_ib": 0.005226333159953356, + "step": 3943 + }, + { + "ce_ib": 5.296156883239746, + "ce_orig": 1.1792292594909668, + "epoch": 1.1340139478035804, + "kl_loss": 0.11729422956705093, + "loss_ib": 0.0064690993167459965, + "step": 3944 + }, + { + "ce_ib": 2.72933030128479, + "ce_orig": 0.41811424493789673, + "epoch": 1.1340139478035804, + "kl_loss": 0.178809255361557, + "loss_ib": 0.004517422989010811, + "step": 3944 + }, + { + "ce_ib": 3.7464263439178467, + "ce_orig": 0.9621150493621826, + "epoch": 1.1340139478035804, + "kl_loss": 0.18581074476242065, + "loss_ib": 0.0056045339442789555, + "step": 3944 + }, + { + "ce_ib": 3.3696675300598145, + "ce_orig": 0.7517979741096497, + "epoch": 1.1340139478035804, + "kl_loss": 0.13052591681480408, + "loss_ib": 0.004674926400184631, + "step": 3944 + }, + { + "epoch": 1.134301531382558, + "grad_norm": 0.1478007435798645, + "learning_rate": 7.154423254338917e-06, + "loss": 0.8811, + "step": 3945 + }, + { + "ce_ib": 2.434443712234497, + "ce_orig": 0.6130331158638, + "epoch": 1.134301531382558, + "kl_loss": 0.110547736287117, + "loss_ib": 0.0035399210173636675, + "step": 3945 + }, + { + "ce_ib": 1.8482686281204224, + "ce_orig": 0.44919753074645996, + "epoch": 1.134301531382558, + "kl_loss": 0.134618878364563, + "loss_ib": 0.0031944571528583765, + "step": 3945 + }, + { + "ce_ib": 3.375232458114624, + "ce_orig": 0.7486235499382019, + "epoch": 1.134301531382558, + "kl_loss": 0.12619046866893768, + "loss_ib": 0.004637137055397034, + "step": 3945 + }, + { + "ce_ib": 2.441567897796631, + "ce_orig": 0.4312115013599396, + "epoch": 1.134301531382558, + "kl_loss": 0.09213361144065857, + "loss_ib": 0.003362904069945216, + "step": 3945 + }, + { + "ce_ib": 6.544629096984863, + "ce_orig": 1.3626481294631958, + "epoch": 1.1345891149615357, + "kl_loss": 0.10686372220516205, + "loss_ib": 0.0076132663525640965, + "step": 3946 + }, + { + "ce_ib": 3.643867015838623, + "ce_orig": 0.6211862564086914, + "epoch": 1.1345891149615357, + "kl_loss": 0.10319922119379044, + "loss_ib": 0.004675859119743109, + "step": 3946 + }, + { + "ce_ib": 3.284735679626465, + "ce_orig": 0.7935373187065125, + "epoch": 1.1345891149615357, + "kl_loss": 0.23734751343727112, + "loss_ib": 0.005658210255205631, + "step": 3946 + }, + { + "ce_ib": 2.6121294498443604, + "ce_orig": 0.6312143206596375, + "epoch": 1.1345891149615357, + "kl_loss": 0.1428372859954834, + "loss_ib": 0.0040405020117759705, + "step": 3946 + }, + { + "ce_ib": 3.1537859439849854, + "ce_orig": 0.6054879426956177, + "epoch": 1.1348766985405134, + "kl_loss": 0.16438239812850952, + "loss_ib": 0.004797609988600016, + "step": 3947 + }, + { + "ce_ib": 1.4574843645095825, + "ce_orig": 0.29297611117362976, + "epoch": 1.1348766985405134, + "kl_loss": 0.15405258536338806, + "loss_ib": 0.002998010255396366, + "step": 3947 + }, + { + "ce_ib": 4.211612224578857, + "ce_orig": 0.8280377984046936, + "epoch": 1.1348766985405134, + "kl_loss": 0.15667778253555298, + "loss_ib": 0.005778389982879162, + "step": 3947 + }, + { + "ce_ib": 5.740856170654297, + "ce_orig": 1.6125743389129639, + "epoch": 1.1348766985405134, + "kl_loss": 0.2505548298358917, + "loss_ib": 0.0082464050501585, + "step": 3947 + }, + { + "ce_ib": 1.8398690223693848, + "ce_orig": 0.5506724715232849, + "epoch": 1.135164282119491, + "kl_loss": 0.1497347056865692, + "loss_ib": 0.0033372160978615284, + "step": 3948 + }, + { + "ce_ib": 3.193735361099243, + "ce_orig": 0.630372941493988, + "epoch": 1.135164282119491, + "kl_loss": 0.18358373641967773, + "loss_ib": 0.005029572639614344, + "step": 3948 + }, + { + "ce_ib": 2.287210702896118, + "ce_orig": 0.7697523832321167, + "epoch": 1.135164282119491, + "kl_loss": 0.1057003065943718, + "loss_ib": 0.003344213590025902, + "step": 3948 + }, + { + "ce_ib": 2.483257293701172, + "ce_orig": 0.5996534824371338, + "epoch": 1.135164282119491, + "kl_loss": 0.164032980799675, + "loss_ib": 0.004123586695641279, + "step": 3948 + }, + { + "ce_ib": 5.117855072021484, + "ce_orig": 1.3291051387786865, + "epoch": 1.1354518656984687, + "kl_loss": 0.24202454090118408, + "loss_ib": 0.007538100238889456, + "step": 3949 + }, + { + "ce_ib": 4.506450653076172, + "ce_orig": 1.0943173170089722, + "epoch": 1.1354518656984687, + "kl_loss": 0.17070172727108002, + "loss_ib": 0.006213467568159103, + "step": 3949 + }, + { + "ce_ib": 4.145391464233398, + "ce_orig": 0.8949288725852966, + "epoch": 1.1354518656984687, + "kl_loss": 0.2333044409751892, + "loss_ib": 0.006478435825556517, + "step": 3949 + }, + { + "ce_ib": 2.5293102264404297, + "ce_orig": 0.4672752916812897, + "epoch": 1.1354518656984687, + "kl_loss": 0.20316118001937866, + "loss_ib": 0.004560921806842089, + "step": 3949 + }, + { + "epoch": 1.1357394492774462, + "grad_norm": 0.14447224140167236, + "learning_rate": 7.147417215150411e-06, + "loss": 0.7789, + "step": 3950 + }, + { + "ce_ib": 2.3556950092315674, + "ce_orig": 0.7110204100608826, + "epoch": 1.1357394492774462, + "kl_loss": 0.14211209118366241, + "loss_ib": 0.003776815952733159, + "step": 3950 + }, + { + "ce_ib": 2.5128633975982666, + "ce_orig": 0.6597923636436462, + "epoch": 1.1357394492774462, + "kl_loss": 0.19100737571716309, + "loss_ib": 0.004422937054187059, + "step": 3950 + }, + { + "ce_ib": 4.0910491943359375, + "ce_orig": 0.9075561165809631, + "epoch": 1.1357394492774462, + "kl_loss": 0.20867669582366943, + "loss_ib": 0.006177816074341536, + "step": 3950 + }, + { + "ce_ib": 2.709026336669922, + "ce_orig": 0.7025415301322937, + "epoch": 1.1357394492774462, + "kl_loss": 0.09210596233606339, + "loss_ib": 0.0036300858482718468, + "step": 3950 + }, + { + "ce_ib": 2.533466339111328, + "ce_orig": 0.5490314364433289, + "epoch": 1.136027032856424, + "kl_loss": 0.17517095804214478, + "loss_ib": 0.004285175818949938, + "step": 3951 + }, + { + "ce_ib": 2.24950909614563, + "ce_orig": 0.6810106635093689, + "epoch": 1.136027032856424, + "kl_loss": 0.18254494667053223, + "loss_ib": 0.004074958618730307, + "step": 3951 + }, + { + "ce_ib": 3.242535352706909, + "ce_orig": 0.7987545132637024, + "epoch": 1.136027032856424, + "kl_loss": 0.1371174305677414, + "loss_ib": 0.004613709170371294, + "step": 3951 + }, + { + "ce_ib": 2.158376455307007, + "ce_orig": 0.3654884994029999, + "epoch": 1.136027032856424, + "kl_loss": 0.12785111367702484, + "loss_ib": 0.003436887403950095, + "step": 3951 + }, + { + "ce_ib": 5.294265270233154, + "ce_orig": 1.1528698205947876, + "epoch": 1.1363146164354014, + "kl_loss": 0.18265314400196075, + "loss_ib": 0.007120796479284763, + "step": 3952 + }, + { + "ce_ib": 2.0806217193603516, + "ce_orig": 0.5498995780944824, + "epoch": 1.1363146164354014, + "kl_loss": 0.10557901114225388, + "loss_ib": 0.003136411774903536, + "step": 3952 + }, + { + "ce_ib": 1.618688941001892, + "ce_orig": 0.41980597376823425, + "epoch": 1.1363146164354014, + "kl_loss": 0.3710266947746277, + "loss_ib": 0.005328955594450235, + "step": 3952 + }, + { + "ce_ib": 2.2425780296325684, + "ce_orig": 0.6569058895111084, + "epoch": 1.1363146164354014, + "kl_loss": 0.1827286034822464, + "loss_ib": 0.00406986428424716, + "step": 3952 + }, + { + "ce_ib": 2.1700682640075684, + "ce_orig": 0.6665628552436829, + "epoch": 1.1366022000143792, + "kl_loss": 0.1078953742980957, + "loss_ib": 0.0032490219455212355, + "step": 3953 + }, + { + "ce_ib": 2.5277562141418457, + "ce_orig": 0.6242167353630066, + "epoch": 1.1366022000143792, + "kl_loss": 0.15190985798835754, + "loss_ib": 0.00404685502871871, + "step": 3953 + }, + { + "ce_ib": 4.5379319190979, + "ce_orig": 0.7711993455886841, + "epoch": 1.1366022000143792, + "kl_loss": 0.22339297831058502, + "loss_ib": 0.0067718615755438805, + "step": 3953 + }, + { + "ce_ib": 2.026395082473755, + "ce_orig": 0.5062679648399353, + "epoch": 1.1366022000143792, + "kl_loss": 0.1005304604768753, + "loss_ib": 0.003031699685379863, + "step": 3953 + }, + { + "ce_ib": 3.3349320888519287, + "ce_orig": 0.8435352444648743, + "epoch": 1.136889783593357, + "kl_loss": 0.23689545691013336, + "loss_ib": 0.00570388650521636, + "step": 3954 + }, + { + "ce_ib": 4.0679030418396, + "ce_orig": 0.8826500773429871, + "epoch": 1.136889783593357, + "kl_loss": 0.16758006811141968, + "loss_ib": 0.005743703804910183, + "step": 3954 + }, + { + "ce_ib": 1.519096851348877, + "ce_orig": 0.42301133275032043, + "epoch": 1.136889783593357, + "kl_loss": 0.12458804249763489, + "loss_ib": 0.0027649770490825176, + "step": 3954 + }, + { + "ce_ib": 2.398289442062378, + "ce_orig": 0.34174463152885437, + "epoch": 1.136889783593357, + "kl_loss": 0.08772699534893036, + "loss_ib": 0.0032755595166236162, + "step": 3954 + }, + { + "epoch": 1.1371773671723344, + "grad_norm": 0.14777500927448273, + "learning_rate": 7.140406002335406e-06, + "loss": 0.7831, + "step": 3955 + }, + { + "ce_ib": 4.357746601104736, + "ce_orig": 1.2218849658966064, + "epoch": 1.1371773671723344, + "kl_loss": 0.18883481621742249, + "loss_ib": 0.0062460945919156075, + "step": 3955 + }, + { + "ce_ib": 2.7569878101348877, + "ce_orig": 0.6238943934440613, + "epoch": 1.1371773671723344, + "kl_loss": 0.13959428668022156, + "loss_ib": 0.004152930807322264, + "step": 3955 + }, + { + "ce_ib": 2.6747636795043945, + "ce_orig": 0.834793210029602, + "epoch": 1.1371773671723344, + "kl_loss": 0.1074371412396431, + "loss_ib": 0.003749134950339794, + "step": 3955 + }, + { + "ce_ib": 8.175277709960938, + "ce_orig": 2.1093032360076904, + "epoch": 1.1371773671723344, + "kl_loss": 0.24069154262542725, + "loss_ib": 0.010582192800939083, + "step": 3955 + }, + { + "ce_ib": 2.1967215538024902, + "ce_orig": 0.7374721169471741, + "epoch": 1.1374649507513122, + "kl_loss": 0.12179844826459885, + "loss_ib": 0.0034147058613598347, + "step": 3956 + }, + { + "ce_ib": 6.714356422424316, + "ce_orig": 0.22635719180107117, + "epoch": 1.1374649507513122, + "kl_loss": 0.25491881370544434, + "loss_ib": 0.009263544343411922, + "step": 3956 + }, + { + "ce_ib": 3.349722385406494, + "ce_orig": 0.6944134831428528, + "epoch": 1.1374649507513122, + "kl_loss": 0.11383035778999329, + "loss_ib": 0.004488025791943073, + "step": 3956 + }, + { + "ce_ib": 2.9387876987457275, + "ce_orig": 0.7464215755462646, + "epoch": 1.1374649507513122, + "kl_loss": 0.16432757675647736, + "loss_ib": 0.004582063294947147, + "step": 3956 + }, + { + "ce_ib": 4.11525297164917, + "ce_orig": 1.1383620500564575, + "epoch": 1.1377525343302897, + "kl_loss": 0.21401704847812653, + "loss_ib": 0.006255423184484243, + "step": 3957 + }, + { + "ce_ib": 2.0098624229431152, + "ce_orig": 0.3897136151790619, + "epoch": 1.1377525343302897, + "kl_loss": 0.22313007712364197, + "loss_ib": 0.004241162911057472, + "step": 3957 + }, + { + "ce_ib": 1.911595344543457, + "ce_orig": 0.5635653138160706, + "epoch": 1.1377525343302897, + "kl_loss": 0.1010681688785553, + "loss_ib": 0.002922276733443141, + "step": 3957 + }, + { + "ce_ib": 3.2000880241394043, + "ce_orig": 0.7938181161880493, + "epoch": 1.1377525343302897, + "kl_loss": 0.189833864569664, + "loss_ib": 0.005098426714539528, + "step": 3957 + }, + { + "ce_ib": 4.894150733947754, + "ce_orig": 1.346842885017395, + "epoch": 1.1380401179092674, + "kl_loss": 0.17699305713176727, + "loss_ib": 0.006664081010967493, + "step": 3958 + }, + { + "ce_ib": 3.4153385162353516, + "ce_orig": 0.7859764695167542, + "epoch": 1.1380401179092674, + "kl_loss": 0.09646211564540863, + "loss_ib": 0.004379959776997566, + "step": 3958 + }, + { + "ce_ib": 6.136423587799072, + "ce_orig": 1.5660810470581055, + "epoch": 1.1380401179092674, + "kl_loss": 0.15376363694667816, + "loss_ib": 0.007674060296267271, + "step": 3958 + }, + { + "ce_ib": 3.081261396408081, + "ce_orig": 0.9445293545722961, + "epoch": 1.1380401179092674, + "kl_loss": 0.13362720608711243, + "loss_ib": 0.00441753352060914, + "step": 3958 + }, + { + "ce_ib": 2.884019374847412, + "ce_orig": 0.6308121085166931, + "epoch": 1.1383277014882451, + "kl_loss": 0.20375660061836243, + "loss_ib": 0.004921585321426392, + "step": 3959 + }, + { + "ce_ib": 2.6486034393310547, + "ce_orig": 0.7703302502632141, + "epoch": 1.1383277014882451, + "kl_loss": 0.0845952183008194, + "loss_ib": 0.0034945558290928602, + "step": 3959 + }, + { + "ce_ib": 4.908681869506836, + "ce_orig": 1.3744056224822998, + "epoch": 1.1383277014882451, + "kl_loss": 0.18665534257888794, + "loss_ib": 0.006775234825909138, + "step": 3959 + }, + { + "ce_ib": 3.0225393772125244, + "ce_orig": 0.5909170508384705, + "epoch": 1.1383277014882451, + "kl_loss": 0.1730230748653412, + "loss_ib": 0.0047527700662612915, + "step": 3959 + }, + { + "epoch": 1.1386152850672226, + "grad_norm": 0.13614460825920105, + "learning_rate": 7.133389632785543e-06, + "loss": 0.8532, + "step": 3960 + }, + { + "ce_ib": 3.8495583534240723, + "ce_orig": 0.4449765980243683, + "epoch": 1.1386152850672226, + "kl_loss": 0.19828739762306213, + "loss_ib": 0.0058324323035776615, + "step": 3960 + }, + { + "ce_ib": 1.980159878730774, + "ce_orig": 0.6595031023025513, + "epoch": 1.1386152850672226, + "kl_loss": 0.12597273290157318, + "loss_ib": 0.003239887300878763, + "step": 3960 + }, + { + "ce_ib": 4.144323348999023, + "ce_orig": 1.071546196937561, + "epoch": 1.1386152850672226, + "kl_loss": 0.1584395468235016, + "loss_ib": 0.005728719290345907, + "step": 3960 + }, + { + "ce_ib": 3.0292556285858154, + "ce_orig": 0.4770449697971344, + "epoch": 1.1386152850672226, + "kl_loss": 0.17364226281642914, + "loss_ib": 0.004765677731484175, + "step": 3960 + }, + { + "ce_ib": 3.864044189453125, + "ce_orig": 0.754497766494751, + "epoch": 1.1389028686462004, + "kl_loss": 0.1982235461473465, + "loss_ib": 0.005846279673278332, + "step": 3961 + }, + { + "ce_ib": 2.4620018005371094, + "ce_orig": 0.622006893157959, + "epoch": 1.1389028686462004, + "kl_loss": 0.5614153146743774, + "loss_ib": 0.008076154626905918, + "step": 3961 + }, + { + "ce_ib": 3.38106632232666, + "ce_orig": 0.937616765499115, + "epoch": 1.1389028686462004, + "kl_loss": 0.09834205359220505, + "loss_ib": 0.004364486783742905, + "step": 3961 + }, + { + "ce_ib": 2.4767062664031982, + "ce_orig": 0.6508283019065857, + "epoch": 1.1389028686462004, + "kl_loss": 0.11492688208818436, + "loss_ib": 0.0036259752232581377, + "step": 3961 + }, + { + "ce_ib": 2.2644052505493164, + "ce_orig": 0.755958616733551, + "epoch": 1.139190452225178, + "kl_loss": 0.1259811669588089, + "loss_ib": 0.0035242168232798576, + "step": 3962 + }, + { + "ce_ib": 3.4859752655029297, + "ce_orig": 0.8864308595657349, + "epoch": 1.139190452225178, + "kl_loss": 0.12755998969078064, + "loss_ib": 0.0047615752555429935, + "step": 3962 + }, + { + "ce_ib": 2.705723524093628, + "ce_orig": 0.6130251884460449, + "epoch": 1.139190452225178, + "kl_loss": 0.0958942174911499, + "loss_ib": 0.003664665622636676, + "step": 3962 + }, + { + "ce_ib": 4.3483500480651855, + "ce_orig": 1.0511139631271362, + "epoch": 1.139190452225178, + "kl_loss": 0.1304931938648224, + "loss_ib": 0.0056532821618020535, + "step": 3962 + }, + { + "ce_ib": 4.079406261444092, + "ce_orig": 0.7396369576454163, + "epoch": 1.1394780358041556, + "kl_loss": 0.18848946690559387, + "loss_ib": 0.005964300595223904, + "step": 3963 + }, + { + "ce_ib": 3.905263662338257, + "ce_orig": 0.6118573546409607, + "epoch": 1.1394780358041556, + "kl_loss": 0.19238829612731934, + "loss_ib": 0.005829147063195705, + "step": 3963 + }, + { + "ce_ib": 3.4972481727600098, + "ce_orig": 0.9611272215843201, + "epoch": 1.1394780358041556, + "kl_loss": 0.2044627070426941, + "loss_ib": 0.005541875027120113, + "step": 3963 + }, + { + "ce_ib": 3.190891742706299, + "ce_orig": 0.4868339002132416, + "epoch": 1.1394780358041556, + "kl_loss": 0.34803926944732666, + "loss_ib": 0.0066712843254208565, + "step": 3963 + }, + { + "ce_ib": 3.1966049671173096, + "ce_orig": 0.5446838140487671, + "epoch": 1.1397656193831331, + "kl_loss": 0.13580290973186493, + "loss_ib": 0.004554633982479572, + "step": 3964 + }, + { + "ce_ib": 5.42843770980835, + "ce_orig": 1.3018745183944702, + "epoch": 1.1397656193831331, + "kl_loss": 0.16029638051986694, + "loss_ib": 0.00703140115365386, + "step": 3964 + }, + { + "ce_ib": 2.184948205947876, + "ce_orig": 0.4782488942146301, + "epoch": 1.1397656193831331, + "kl_loss": 0.13609766960144043, + "loss_ib": 0.00354592502117157, + "step": 3964 + }, + { + "ce_ib": 5.013199806213379, + "ce_orig": 1.1836659908294678, + "epoch": 1.1397656193831331, + "kl_loss": 0.12662720680236816, + "loss_ib": 0.006279471796005964, + "step": 3964 + }, + { + "epoch": 1.1400532029621109, + "grad_norm": 0.13285596668720245, + "learning_rate": 7.126368123404886e-06, + "loss": 0.8493, + "step": 3965 + }, + { + "ce_ib": 4.007973670959473, + "ce_orig": 0.8939663767814636, + "epoch": 1.1400532029621109, + "kl_loss": 0.17341703176498413, + "loss_ib": 0.005742143839597702, + "step": 3965 + }, + { + "ce_ib": 3.1025993824005127, + "ce_orig": 0.6191834211349487, + "epoch": 1.1400532029621109, + "kl_loss": 0.13755233585834503, + "loss_ib": 0.0044781225733459, + "step": 3965 + }, + { + "ce_ib": 3.053391218185425, + "ce_orig": 0.6772500872612, + "epoch": 1.1400532029621109, + "kl_loss": 0.17400357127189636, + "loss_ib": 0.00479342695325613, + "step": 3965 + }, + { + "ce_ib": 2.8401472568511963, + "ce_orig": 0.8410648107528687, + "epoch": 1.1400532029621109, + "kl_loss": 0.16153141856193542, + "loss_ib": 0.004455461632460356, + "step": 3965 + }, + { + "ce_ib": 2.3211028575897217, + "ce_orig": 0.7319461107254028, + "epoch": 1.1403407865410884, + "kl_loss": 0.13506260514259338, + "loss_ib": 0.0036717287730425596, + "step": 3966 + }, + { + "ce_ib": 4.5439910888671875, + "ce_orig": 0.9984484910964966, + "epoch": 1.1403407865410884, + "kl_loss": 0.22703063488006592, + "loss_ib": 0.006814297288656235, + "step": 3966 + }, + { + "ce_ib": 2.2607131004333496, + "ce_orig": 0.6571620106697083, + "epoch": 1.1403407865410884, + "kl_loss": 0.10281531512737274, + "loss_ib": 0.003288866253569722, + "step": 3966 + }, + { + "ce_ib": 2.6785876750946045, + "ce_orig": 0.7084869146347046, + "epoch": 1.1403407865410884, + "kl_loss": 0.11809416860342026, + "loss_ib": 0.0038595295045524836, + "step": 3966 + }, + { + "ce_ib": 2.3325133323669434, + "ce_orig": 0.6468417644500732, + "epoch": 1.1406283701200661, + "kl_loss": 0.10453815758228302, + "loss_ib": 0.003377894638106227, + "step": 3967 + }, + { + "ce_ib": 4.210821151733398, + "ce_orig": 0.8877782821655273, + "epoch": 1.1406283701200661, + "kl_loss": 0.14602433145046234, + "loss_ib": 0.005671063903719187, + "step": 3967 + }, + { + "ce_ib": 2.114932060241699, + "ce_orig": 0.5907494425773621, + "epoch": 1.1406283701200661, + "kl_loss": 0.10224303603172302, + "loss_ib": 0.0031373624224215746, + "step": 3967 + }, + { + "ce_ib": 4.600132942199707, + "ce_orig": 0.9577822685241699, + "epoch": 1.1406283701200661, + "kl_loss": 0.13846048712730408, + "loss_ib": 0.005984737537801266, + "step": 3967 + }, + { + "ce_ib": 3.3459384441375732, + "ce_orig": 0.7013920545578003, + "epoch": 1.1409159536990439, + "kl_loss": 0.19101521372795105, + "loss_ib": 0.0052560907788574696, + "step": 3968 + }, + { + "ce_ib": 4.107287883758545, + "ce_orig": 1.0887491703033447, + "epoch": 1.1409159536990439, + "kl_loss": 0.15626081824302673, + "loss_ib": 0.005669896025210619, + "step": 3968 + }, + { + "ce_ib": 4.147005558013916, + "ce_orig": 1.111958384513855, + "epoch": 1.1409159536990439, + "kl_loss": 0.18073001503944397, + "loss_ib": 0.005954306107014418, + "step": 3968 + }, + { + "ce_ib": 4.461649417877197, + "ce_orig": 0.8366529941558838, + "epoch": 1.1409159536990439, + "kl_loss": 0.23436036705970764, + "loss_ib": 0.0068052527494728565, + "step": 3968 + }, + { + "ce_ib": 3.3640782833099365, + "ce_orig": 0.8956195712089539, + "epoch": 1.1412035372780214, + "kl_loss": 0.16946834325790405, + "loss_ib": 0.005058761686086655, + "step": 3969 + }, + { + "ce_ib": 2.272580146789551, + "ce_orig": 0.6279340982437134, + "epoch": 1.1412035372780214, + "kl_loss": 0.11809703707695007, + "loss_ib": 0.003453550161793828, + "step": 3969 + }, + { + "ce_ib": 2.3030829429626465, + "ce_orig": 0.46347954869270325, + "epoch": 1.1412035372780214, + "kl_loss": 0.1590043604373932, + "loss_ib": 0.0038931265007704496, + "step": 3969 + }, + { + "ce_ib": 2.2129383087158203, + "ce_orig": 0.7061886191368103, + "epoch": 1.1412035372780214, + "kl_loss": 0.11991426348686218, + "loss_ib": 0.003412080928683281, + "step": 3969 + }, + { + "epoch": 1.141491120856999, + "grad_norm": 0.14635339379310608, + "learning_rate": 7.119341491109877e-06, + "loss": 0.7842, + "step": 3970 + }, + { + "ce_ib": 2.909095525741577, + "ce_orig": 0.6774166822433472, + "epoch": 1.141491120856999, + "kl_loss": 0.25738799571990967, + "loss_ib": 0.005482974927872419, + "step": 3970 + }, + { + "ce_ib": 6.261410713195801, + "ce_orig": 1.6050028800964355, + "epoch": 1.141491120856999, + "kl_loss": 0.17821750044822693, + "loss_ib": 0.008043586276471615, + "step": 3970 + }, + { + "ce_ib": 3.122385025024414, + "ce_orig": 0.9190919399261475, + "epoch": 1.141491120856999, + "kl_loss": 0.13158497214317322, + "loss_ib": 0.004438234958797693, + "step": 3970 + }, + { + "ce_ib": 1.9392725229263306, + "ce_orig": 0.6763073801994324, + "epoch": 1.141491120856999, + "kl_loss": 0.1156524047255516, + "loss_ib": 0.003095796564593911, + "step": 3970 + }, + { + "ce_ib": 5.36786413192749, + "ce_orig": 1.3190624713897705, + "epoch": 1.1417787044359766, + "kl_loss": 0.16553974151611328, + "loss_ib": 0.007023261860013008, + "step": 3971 + }, + { + "ce_ib": 4.039961814880371, + "ce_orig": 0.8604825735092163, + "epoch": 1.1417787044359766, + "kl_loss": 0.13985976576805115, + "loss_ib": 0.00543855968862772, + "step": 3971 + }, + { + "ce_ib": 2.77126145362854, + "ce_orig": 0.3843162953853607, + "epoch": 1.1417787044359766, + "kl_loss": 0.24427984654903412, + "loss_ib": 0.005214059725403786, + "step": 3971 + }, + { + "ce_ib": 2.630023241043091, + "ce_orig": 0.7668853402137756, + "epoch": 1.1417787044359766, + "kl_loss": 0.11257016658782959, + "loss_ib": 0.0037557249888777733, + "step": 3971 + }, + { + "ce_ib": 3.815776824951172, + "ce_orig": 0.7731479406356812, + "epoch": 1.1420662880149544, + "kl_loss": 0.14048068225383759, + "loss_ib": 0.005220583640038967, + "step": 3972 + }, + { + "ce_ib": 1.8756998777389526, + "ce_orig": 0.5346354246139526, + "epoch": 1.1420662880149544, + "kl_loss": 0.17259588837623596, + "loss_ib": 0.003601658856496215, + "step": 3972 + }, + { + "ce_ib": 4.5574469566345215, + "ce_orig": 1.2962828874588013, + "epoch": 1.1420662880149544, + "kl_loss": 0.23046818375587463, + "loss_ib": 0.006862128619104624, + "step": 3972 + }, + { + "ce_ib": 5.837299823760986, + "ce_orig": 1.1682629585266113, + "epoch": 1.1420662880149544, + "kl_loss": 0.12710988521575928, + "loss_ib": 0.007108398713171482, + "step": 3972 + }, + { + "ce_ib": 4.403580665588379, + "ce_orig": 1.0904475450515747, + "epoch": 1.142353871593932, + "kl_loss": 0.18194542825222015, + "loss_ib": 0.006223034579306841, + "step": 3973 + }, + { + "ce_ib": 3.940922498703003, + "ce_orig": 1.0393402576446533, + "epoch": 1.142353871593932, + "kl_loss": 0.1878204345703125, + "loss_ib": 0.0058191269636154175, + "step": 3973 + }, + { + "ce_ib": 2.554093360900879, + "ce_orig": 0.5893291234970093, + "epoch": 1.142353871593932, + "kl_loss": 0.11610471457242966, + "loss_ib": 0.003715140512213111, + "step": 3973 + }, + { + "ce_ib": 3.972177028656006, + "ce_orig": 0.45445525646209717, + "epoch": 1.142353871593932, + "kl_loss": 0.18388137221336365, + "loss_ib": 0.005810990929603577, + "step": 3973 + }, + { + "ce_ib": 1.6832345724105835, + "ce_orig": 0.4562261700630188, + "epoch": 1.1426414551729096, + "kl_loss": 0.3612767457962036, + "loss_ib": 0.005296002142131329, + "step": 3974 + }, + { + "ce_ib": 1.9191820621490479, + "ce_orig": 0.5457594990730286, + "epoch": 1.1426414551729096, + "kl_loss": 0.10741101205348969, + "loss_ib": 0.002993292175233364, + "step": 3974 + }, + { + "ce_ib": 4.685827732086182, + "ce_orig": 1.1877714395523071, + "epoch": 1.1426414551729096, + "kl_loss": 0.19255797564983368, + "loss_ib": 0.006611407268792391, + "step": 3974 + }, + { + "ce_ib": 3.0216147899627686, + "ce_orig": 0.743878960609436, + "epoch": 1.1426414551729096, + "kl_loss": 0.19174164533615112, + "loss_ib": 0.004939031321555376, + "step": 3974 + }, + { + "epoch": 1.1429290387518873, + "grad_norm": 0.14721724390983582, + "learning_rate": 7.112309752829305e-06, + "loss": 0.8985, + "step": 3975 + }, + { + "ce_ib": 3.7000484466552734, + "ce_orig": 0.7087836861610413, + "epoch": 1.1429290387518873, + "kl_loss": 0.14666679501533508, + "loss_ib": 0.005166716407984495, + "step": 3975 + }, + { + "ce_ib": 3.0129897594451904, + "ce_orig": 0.5871021747589111, + "epoch": 1.1429290387518873, + "kl_loss": 0.19251133501529694, + "loss_ib": 0.004938102792948484, + "step": 3975 + }, + { + "ce_ib": 3.8776445388793945, + "ce_orig": 0.8646072745323181, + "epoch": 1.1429290387518873, + "kl_loss": 0.19804823398590088, + "loss_ib": 0.005858126562088728, + "step": 3975 + }, + { + "ce_ib": 2.9675934314727783, + "ce_orig": 0.7142659425735474, + "epoch": 1.1429290387518873, + "kl_loss": 0.1800265610218048, + "loss_ib": 0.004767858888953924, + "step": 3975 + }, + { + "ce_ib": 3.589606523513794, + "ce_orig": 0.763541042804718, + "epoch": 1.1432166223308649, + "kl_loss": 0.20370829105377197, + "loss_ib": 0.005626689177006483, + "step": 3976 + }, + { + "ce_ib": 3.2271981239318848, + "ce_orig": 0.5356317758560181, + "epoch": 1.1432166223308649, + "kl_loss": 0.1791313886642456, + "loss_ib": 0.005018511787056923, + "step": 3976 + }, + { + "ce_ib": 3.496647834777832, + "ce_orig": 0.9802384376525879, + "epoch": 1.1432166223308649, + "kl_loss": 0.36506858468055725, + "loss_ib": 0.007147333584725857, + "step": 3976 + }, + { + "ce_ib": 3.130884885787964, + "ce_orig": 0.7588338851928711, + "epoch": 1.1432166223308649, + "kl_loss": 0.1640012264251709, + "loss_ib": 0.004770897328853607, + "step": 3976 + }, + { + "ce_ib": 3.971212148666382, + "ce_orig": 1.1279114484786987, + "epoch": 1.1435042059098426, + "kl_loss": 0.18337562680244446, + "loss_ib": 0.005804968532174826, + "step": 3977 + }, + { + "ce_ib": 2.682224750518799, + "ce_orig": 0.504176676273346, + "epoch": 1.1435042059098426, + "kl_loss": 0.20195814967155457, + "loss_ib": 0.004701806232333183, + "step": 3977 + }, + { + "ce_ib": 2.681668996810913, + "ce_orig": 0.7615689635276794, + "epoch": 1.1435042059098426, + "kl_loss": 0.17322933673858643, + "loss_ib": 0.004413962364196777, + "step": 3977 + }, + { + "ce_ib": 2.8303213119506836, + "ce_orig": 0.870517909526825, + "epoch": 1.1435042059098426, + "kl_loss": 0.17063161730766296, + "loss_ib": 0.00453663757070899, + "step": 3977 + }, + { + "ce_ib": 4.550454616546631, + "ce_orig": 1.0007975101470947, + "epoch": 1.14379178948882, + "kl_loss": 0.21666014194488525, + "loss_ib": 0.006717056035995483, + "step": 3978 + }, + { + "ce_ib": 2.0756914615631104, + "ce_orig": 0.7992413640022278, + "epoch": 1.14379178948882, + "kl_loss": 0.11270985007286072, + "loss_ib": 0.003202789928764105, + "step": 3978 + }, + { + "ce_ib": 4.770277500152588, + "ce_orig": 1.055168867111206, + "epoch": 1.14379178948882, + "kl_loss": 0.20247682929039001, + "loss_ib": 0.006795045454055071, + "step": 3978 + }, + { + "ce_ib": 2.1064651012420654, + "ce_orig": 0.718454122543335, + "epoch": 1.14379178948882, + "kl_loss": 0.11965135484933853, + "loss_ib": 0.0033029785845428705, + "step": 3978 + }, + { + "ce_ib": 3.427180528640747, + "ce_orig": 0.7573584318161011, + "epoch": 1.1440793730677978, + "kl_loss": 0.1621377319097519, + "loss_ib": 0.0050485581159591675, + "step": 3979 + }, + { + "ce_ib": 2.5107579231262207, + "ce_orig": 0.7284294962882996, + "epoch": 1.1440793730677978, + "kl_loss": 0.11635010689496994, + "loss_ib": 0.00367425917647779, + "step": 3979 + }, + { + "ce_ib": 2.4524898529052734, + "ce_orig": 0.6306232213973999, + "epoch": 1.1440793730677978, + "kl_loss": 0.09593386948108673, + "loss_ib": 0.00341182854026556, + "step": 3979 + }, + { + "ce_ib": 3.9762303829193115, + "ce_orig": 1.1401424407958984, + "epoch": 1.1440793730677978, + "kl_loss": 0.17922720313072205, + "loss_ib": 0.005768502131104469, + "step": 3979 + }, + { + "epoch": 1.1443669566467753, + "grad_norm": 0.15921704471111298, + "learning_rate": 7.1052729255042645e-06, + "loss": 0.845, + "step": 3980 + }, + { + "ce_ib": 3.3616251945495605, + "ce_orig": 0.8728147745132446, + "epoch": 1.1443669566467753, + "kl_loss": 0.1356954574584961, + "loss_ib": 0.004718579817563295, + "step": 3980 + }, + { + "ce_ib": 5.751689910888672, + "ce_orig": 1.431601881980896, + "epoch": 1.1443669566467753, + "kl_loss": 0.13765107095241547, + "loss_ib": 0.007128200959414244, + "step": 3980 + }, + { + "ce_ib": 3.119159698486328, + "ce_orig": 0.7377316951751709, + "epoch": 1.1443669566467753, + "kl_loss": 0.1376071274280548, + "loss_ib": 0.004495230969041586, + "step": 3980 + }, + { + "ce_ib": 3.7247138023376465, + "ce_orig": 1.1613155603408813, + "epoch": 1.1443669566467753, + "kl_loss": 0.15885049104690552, + "loss_ib": 0.00531321857124567, + "step": 3980 + }, + { + "ce_ib": 1.8996009826660156, + "ce_orig": 0.638944149017334, + "epoch": 1.144654540225753, + "kl_loss": 0.17393219470977783, + "loss_ib": 0.003638923168182373, + "step": 3981 + }, + { + "ce_ib": 4.595991134643555, + "ce_orig": 1.2655866146087646, + "epoch": 1.144654540225753, + "kl_loss": 0.2037709802389145, + "loss_ib": 0.006633700802922249, + "step": 3981 + }, + { + "ce_ib": 3.4474947452545166, + "ce_orig": 0.7992764115333557, + "epoch": 1.144654540225753, + "kl_loss": 0.07982450723648071, + "loss_ib": 0.00424573989585042, + "step": 3981 + }, + { + "ce_ib": 2.7407455444335938, + "ce_orig": 0.7313984036445618, + "epoch": 1.144654540225753, + "kl_loss": 0.15479502081871033, + "loss_ib": 0.004288695752620697, + "step": 3981 + }, + { + "ce_ib": 4.338034629821777, + "ce_orig": 0.6660109162330627, + "epoch": 1.1449421238047308, + "kl_loss": 0.21282610297203064, + "loss_ib": 0.006466295570135117, + "step": 3982 + }, + { + "ce_ib": 3.4672513008117676, + "ce_orig": 0.619175910949707, + "epoch": 1.1449421238047308, + "kl_loss": 0.1638975739479065, + "loss_ib": 0.005106227472424507, + "step": 3982 + }, + { + "ce_ib": 2.8594884872436523, + "ce_orig": 0.6661558151245117, + "epoch": 1.1449421238047308, + "kl_loss": 0.1210477352142334, + "loss_ib": 0.004069965798407793, + "step": 3982 + }, + { + "ce_ib": 4.262199401855469, + "ce_orig": 0.601740837097168, + "epoch": 1.1449421238047308, + "kl_loss": 0.1839599311351776, + "loss_ib": 0.006101798731833696, + "step": 3982 + }, + { + "ce_ib": 4.454256534576416, + "ce_orig": 1.0579811334609985, + "epoch": 1.1452297073837083, + "kl_loss": 0.15368813276290894, + "loss_ib": 0.005991138052195311, + "step": 3983 + }, + { + "ce_ib": 2.439730644226074, + "ce_orig": 0.46088939905166626, + "epoch": 1.1452297073837083, + "kl_loss": 0.19432881474494934, + "loss_ib": 0.004383018705993891, + "step": 3983 + }, + { + "ce_ib": 4.7663421630859375, + "ce_orig": 1.4090168476104736, + "epoch": 1.1452297073837083, + "kl_loss": 0.2662087380886078, + "loss_ib": 0.007428429555147886, + "step": 3983 + }, + { + "ce_ib": 2.959800958633423, + "ce_orig": 0.9504032731056213, + "epoch": 1.1452297073837083, + "kl_loss": 0.1596524715423584, + "loss_ib": 0.004556325729936361, + "step": 3983 + }, + { + "ce_ib": 5.924626350402832, + "ce_orig": 1.551084041595459, + "epoch": 1.145517290962686, + "kl_loss": 0.19014418125152588, + "loss_ib": 0.007826068438589573, + "step": 3984 + }, + { + "ce_ib": 2.441492795944214, + "ce_orig": 0.6716703772544861, + "epoch": 1.145517290962686, + "kl_loss": 0.09383763372898102, + "loss_ib": 0.0033798690419644117, + "step": 3984 + }, + { + "ce_ib": 4.996888160705566, + "ce_orig": 1.2122154235839844, + "epoch": 1.145517290962686, + "kl_loss": 0.2530690133571625, + "loss_ib": 0.00752757815644145, + "step": 3984 + }, + { + "ce_ib": 2.0337648391723633, + "ce_orig": 0.46066561341285706, + "epoch": 1.145517290962686, + "kl_loss": 0.19915540516376495, + "loss_ib": 0.004025318659842014, + "step": 3984 + }, + { + "epoch": 1.1458048745416636, + "grad_norm": 0.14736391603946686, + "learning_rate": 7.0982310260881e-06, + "loss": 0.8133, + "step": 3985 + }, + { + "ce_ib": 2.8700032234191895, + "ce_orig": 0.6739102602005005, + "epoch": 1.1458048745416636, + "kl_loss": 0.12241429835557938, + "loss_ib": 0.004094146192073822, + "step": 3985 + }, + { + "ce_ib": 4.6883721351623535, + "ce_orig": 0.9413602352142334, + "epoch": 1.1458048745416636, + "kl_loss": 0.24337348341941833, + "loss_ib": 0.007122106850147247, + "step": 3985 + }, + { + "ce_ib": 2.542787551879883, + "ce_orig": 0.8821794986724854, + "epoch": 1.1458048745416636, + "kl_loss": 0.1326761245727539, + "loss_ib": 0.003869548672810197, + "step": 3985 + }, + { + "ce_ib": 2.3461999893188477, + "ce_orig": 0.4950065314769745, + "epoch": 1.1458048745416636, + "kl_loss": 0.12812399864196777, + "loss_ib": 0.003627439960837364, + "step": 3985 + }, + { + "ce_ib": 5.006199359893799, + "ce_orig": 1.3852721452713013, + "epoch": 1.1460924581206413, + "kl_loss": 0.17100971937179565, + "loss_ib": 0.0067162965424358845, + "step": 3986 + }, + { + "ce_ib": 4.249136924743652, + "ce_orig": 1.2067066431045532, + "epoch": 1.1460924581206413, + "kl_loss": 0.20132428407669067, + "loss_ib": 0.006262379698455334, + "step": 3986 + }, + { + "ce_ib": 4.112875938415527, + "ce_orig": 0.9874603152275085, + "epoch": 1.1460924581206413, + "kl_loss": 0.18881213665008545, + "loss_ib": 0.006000997498631477, + "step": 3986 + }, + { + "ce_ib": 4.758562088012695, + "ce_orig": 1.132392168045044, + "epoch": 1.1460924581206413, + "kl_loss": 0.1586150825023651, + "loss_ib": 0.006344712805002928, + "step": 3986 + }, + { + "ce_ib": 2.1858174800872803, + "ce_orig": 0.7086425423622131, + "epoch": 1.146380041699619, + "kl_loss": 0.12258245050907135, + "loss_ib": 0.0034116420429199934, + "step": 3987 + }, + { + "ce_ib": 2.2151284217834473, + "ce_orig": 0.942326545715332, + "epoch": 1.146380041699619, + "kl_loss": 0.0835803672671318, + "loss_ib": 0.0030509319622069597, + "step": 3987 + }, + { + "ce_ib": 3.9029126167297363, + "ce_orig": 1.0786036252975464, + "epoch": 1.146380041699619, + "kl_loss": 0.15149374306201935, + "loss_ib": 0.0054178498685359955, + "step": 3987 + }, + { + "ce_ib": 5.200980186462402, + "ce_orig": 1.2767014503479004, + "epoch": 1.146380041699619, + "kl_loss": 0.18231046199798584, + "loss_ib": 0.007024084683507681, + "step": 3987 + }, + { + "ce_ib": 2.3451879024505615, + "ce_orig": 0.5906080603599548, + "epoch": 1.1466676252785966, + "kl_loss": 0.09696901589632034, + "loss_ib": 0.003314877860248089, + "step": 3988 + }, + { + "ce_ib": 3.4889116287231445, + "ce_orig": 0.47479575872421265, + "epoch": 1.1466676252785966, + "kl_loss": 0.24510447680950165, + "loss_ib": 0.005939956288784742, + "step": 3988 + }, + { + "ce_ib": 2.7915027141571045, + "ce_orig": 0.7410535216331482, + "epoch": 1.1466676252785966, + "kl_loss": 0.14186803996562958, + "loss_ib": 0.004210182931274176, + "step": 3988 + }, + { + "ce_ib": 5.629515171051025, + "ce_orig": 0.889417290687561, + "epoch": 1.1466676252785966, + "kl_loss": 0.12318829447031021, + "loss_ib": 0.006861397996544838, + "step": 3988 + }, + { + "ce_ib": 7.128272533416748, + "ce_orig": 1.8462059497833252, + "epoch": 1.1469552088575743, + "kl_loss": 0.18516822159290314, + "loss_ib": 0.00897995475679636, + "step": 3989 + }, + { + "ce_ib": 3.4354822635650635, + "ce_orig": 0.6765165328979492, + "epoch": 1.1469552088575743, + "kl_loss": 0.19237038493156433, + "loss_ib": 0.0053591858595609665, + "step": 3989 + }, + { + "ce_ib": 2.346036672592163, + "ce_orig": 0.6308354139328003, + "epoch": 1.1469552088575743, + "kl_loss": 0.1637895703315735, + "loss_ib": 0.003983932547271252, + "step": 3989 + }, + { + "ce_ib": 3.8877041339874268, + "ce_orig": 1.1838164329528809, + "epoch": 1.1469552088575743, + "kl_loss": 0.1818244457244873, + "loss_ib": 0.005705948919057846, + "step": 3989 + }, + { + "epoch": 1.1472427924365518, + "grad_norm": 0.16816674172878265, + "learning_rate": 7.091184071546384e-06, + "loss": 0.8772, + "step": 3990 + }, + { + "ce_ib": 5.700591087341309, + "ce_orig": 1.3760294914245605, + "epoch": 1.1472427924365518, + "kl_loss": 0.18145331740379333, + "loss_ib": 0.007515124045312405, + "step": 3990 + }, + { + "ce_ib": 3.1659858226776123, + "ce_orig": 0.8198292255401611, + "epoch": 1.1472427924365518, + "kl_loss": 0.19218041002750397, + "loss_ib": 0.005087789613753557, + "step": 3990 + }, + { + "ce_ib": 2.717588424682617, + "ce_orig": 0.7190279364585876, + "epoch": 1.1472427924365518, + "kl_loss": 0.28590068221092224, + "loss_ib": 0.005576594732701778, + "step": 3990 + }, + { + "ce_ib": 1.9653754234313965, + "ce_orig": 0.5348464250564575, + "epoch": 1.1472427924365518, + "kl_loss": 0.1917305886745453, + "loss_ib": 0.003882681019604206, + "step": 3990 + }, + { + "ce_ib": 2.9495582580566406, + "ce_orig": 0.6335250735282898, + "epoch": 1.1475303760155295, + "kl_loss": 0.14392885565757751, + "loss_ib": 0.004388846457004547, + "step": 3991 + }, + { + "ce_ib": 5.9010396003723145, + "ce_orig": 1.6513053178787231, + "epoch": 1.1475303760155295, + "kl_loss": 0.1628008484840393, + "loss_ib": 0.007529048249125481, + "step": 3991 + }, + { + "ce_ib": 2.9992332458496094, + "ce_orig": 0.530497133731842, + "epoch": 1.1475303760155295, + "kl_loss": 0.2056969255208969, + "loss_ib": 0.005056202411651611, + "step": 3991 + }, + { + "ce_ib": 1.7769259214401245, + "ce_orig": 0.5374500155448914, + "epoch": 1.1475303760155295, + "kl_loss": 0.21058881282806396, + "loss_ib": 0.0038828139659017324, + "step": 3991 + }, + { + "ce_ib": 4.059913635253906, + "ce_orig": 0.7427805066108704, + "epoch": 1.1478179595945073, + "kl_loss": 0.1602439433336258, + "loss_ib": 0.005662352778017521, + "step": 3992 + }, + { + "ce_ib": 1.845596432685852, + "ce_orig": 0.47821396589279175, + "epoch": 1.1478179595945073, + "kl_loss": 0.14131686091423035, + "loss_ib": 0.0032587649766355753, + "step": 3992 + }, + { + "ce_ib": 1.6507600545883179, + "ce_orig": 0.5653470754623413, + "epoch": 1.1478179595945073, + "kl_loss": 0.128033846616745, + "loss_ib": 0.002931098220869899, + "step": 3992 + }, + { + "ce_ib": 3.3238470554351807, + "ce_orig": 0.6705299615859985, + "epoch": 1.1478179595945073, + "kl_loss": 0.2346511334180832, + "loss_ib": 0.0056703584268689156, + "step": 3992 + }, + { + "ce_ib": 4.493586540222168, + "ce_orig": 0.927484929561615, + "epoch": 1.1481055431734848, + "kl_loss": 0.148048996925354, + "loss_ib": 0.005974076688289642, + "step": 3993 + }, + { + "ce_ib": 1.928099513053894, + "ce_orig": 0.5034859776496887, + "epoch": 1.1481055431734848, + "kl_loss": 0.1272585391998291, + "loss_ib": 0.0032006846740841866, + "step": 3993 + }, + { + "ce_ib": 4.078188419342041, + "ce_orig": 1.280963659286499, + "epoch": 1.1481055431734848, + "kl_loss": 0.1755148321390152, + "loss_ib": 0.005833336617797613, + "step": 3993 + }, + { + "ce_ib": 2.3082258701324463, + "ce_orig": 0.5215340256690979, + "epoch": 1.1481055431734848, + "kl_loss": 0.19251908361911774, + "loss_ib": 0.004233416635543108, + "step": 3993 + }, + { + "ce_ib": 3.0914664268493652, + "ce_orig": 0.7510054707527161, + "epoch": 1.1483931267524625, + "kl_loss": 0.10655469447374344, + "loss_ib": 0.004157013259828091, + "step": 3994 + }, + { + "ce_ib": 3.91859769821167, + "ce_orig": 0.672126054763794, + "epoch": 1.1483931267524625, + "kl_loss": 0.19409504532814026, + "loss_ib": 0.005859547760337591, + "step": 3994 + }, + { + "ce_ib": 5.82289981842041, + "ce_orig": 1.6816506385803223, + "epoch": 1.1483931267524625, + "kl_loss": 0.18577617406845093, + "loss_ib": 0.0076806615106761456, + "step": 3994 + }, + { + "ce_ib": 1.8850373029708862, + "ce_orig": 0.40045616030693054, + "epoch": 1.1483931267524625, + "kl_loss": 0.1451561450958252, + "loss_ib": 0.003336598863825202, + "step": 3994 + }, + { + "epoch": 1.14868071033144, + "grad_norm": 0.1525232046842575, + "learning_rate": 7.0841320788568655e-06, + "loss": 0.8465, + "step": 3995 + }, + { + "ce_ib": 6.438030242919922, + "ce_orig": 1.29721200466156, + "epoch": 1.14868071033144, + "kl_loss": 0.17681217193603516, + "loss_ib": 0.008206152357161045, + "step": 3995 + }, + { + "ce_ib": 2.5554873943328857, + "ce_orig": 0.7210001945495605, + "epoch": 1.14868071033144, + "kl_loss": 0.17445415258407593, + "loss_ib": 0.004300029017031193, + "step": 3995 + }, + { + "ce_ib": 3.4299113750457764, + "ce_orig": 0.5896413326263428, + "epoch": 1.14868071033144, + "kl_loss": 0.15589478611946106, + "loss_ib": 0.004988859407603741, + "step": 3995 + }, + { + "ce_ib": 3.045745611190796, + "ce_orig": 0.7522867918014526, + "epoch": 1.14868071033144, + "kl_loss": 0.17318376898765564, + "loss_ib": 0.004777583293616772, + "step": 3995 + }, + { + "ce_ib": 3.3739044666290283, + "ce_orig": 0.7004149556159973, + "epoch": 1.1489682939104178, + "kl_loss": 0.13346663117408752, + "loss_ib": 0.004708570893853903, + "step": 3996 + }, + { + "ce_ib": 3.1429309844970703, + "ce_orig": 0.774016261100769, + "epoch": 1.1489682939104178, + "kl_loss": 0.16503509879112244, + "loss_ib": 0.0047932821325957775, + "step": 3996 + }, + { + "ce_ib": 2.6717774868011475, + "ce_orig": 0.7973310351371765, + "epoch": 1.1489682939104178, + "kl_loss": 0.14384743571281433, + "loss_ib": 0.004110252019017935, + "step": 3996 + }, + { + "ce_ib": 1.9816889762878418, + "ce_orig": 0.5023980736732483, + "epoch": 1.1489682939104178, + "kl_loss": 0.10070103406906128, + "loss_ib": 0.002988699357956648, + "step": 3996 + }, + { + "ce_ib": 3.7633473873138428, + "ce_orig": 0.6361269950866699, + "epoch": 1.1492558774893953, + "kl_loss": 0.06812086701393127, + "loss_ib": 0.004444556310772896, + "step": 3997 + }, + { + "ce_ib": 2.3760108947753906, + "ce_orig": 0.6515633463859558, + "epoch": 1.1492558774893953, + "kl_loss": 0.1323801577091217, + "loss_ib": 0.0036998121067881584, + "step": 3997 + }, + { + "ce_ib": 2.2320594787597656, + "ce_orig": 0.6799426078796387, + "epoch": 1.1492558774893953, + "kl_loss": 0.13762269914150238, + "loss_ib": 0.0036082863807678223, + "step": 3997 + }, + { + "ce_ib": 2.3795673847198486, + "ce_orig": 0.5535420179367065, + "epoch": 1.1492558774893953, + "kl_loss": 0.13284356892108917, + "loss_ib": 0.0037080030888319016, + "step": 3997 + }, + { + "ce_ib": 2.5281989574432373, + "ce_orig": 0.6037531495094299, + "epoch": 1.149543461068373, + "kl_loss": 0.1269739270210266, + "loss_ib": 0.0037979381158947945, + "step": 3998 + }, + { + "ce_ib": 3.846893787384033, + "ce_orig": 1.238062858581543, + "epoch": 1.149543461068373, + "kl_loss": 0.12695097923278809, + "loss_ib": 0.005116403568536043, + "step": 3998 + }, + { + "ce_ib": 0.8737903237342834, + "ce_orig": 0.1798173487186432, + "epoch": 1.149543461068373, + "kl_loss": 0.3677876889705658, + "loss_ib": 0.004551667254418135, + "step": 3998 + }, + { + "ce_ib": 5.273362636566162, + "ce_orig": 0.8275889158248901, + "epoch": 1.149543461068373, + "kl_loss": 0.17092236876487732, + "loss_ib": 0.006982586346566677, + "step": 3998 + }, + { + "ce_ib": 4.091914176940918, + "ce_orig": 0.8124197721481323, + "epoch": 1.1498310446473505, + "kl_loss": 0.15847674012184143, + "loss_ib": 0.005676681641489267, + "step": 3999 + }, + { + "ce_ib": 3.8812198638916016, + "ce_orig": 0.7570636868476868, + "epoch": 1.1498310446473505, + "kl_loss": 0.16866913437843323, + "loss_ib": 0.005567911081016064, + "step": 3999 + }, + { + "ce_ib": 2.5389630794525146, + "ce_orig": 0.8085405230522156, + "epoch": 1.1498310446473505, + "kl_loss": 0.1251903772354126, + "loss_ib": 0.0037908670492470264, + "step": 3999 + }, + { + "ce_ib": 7.409590244293213, + "ce_orig": 1.521683692932129, + "epoch": 1.1498310446473505, + "kl_loss": 0.14488360285758972, + "loss_ib": 0.008858426474034786, + "step": 3999 + }, + { + "epoch": 1.1501186282263283, + "grad_norm": 0.13691478967666626, + "learning_rate": 7.0770750650094335e-06, + "loss": 0.794, + "step": 4000 + } + ], + "logging_steps": 5, + "max_steps": 10434, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}