diff --git "a/checkpoint-21431/trainer_state.json" "b/checkpoint-21431/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-21431/trainer_state.json" @@ -0,0 +1,25742 @@ +{ + "best_metric": 0.05617095157504082, + "best_model_checkpoint": "wav2vec2-base-pem123-32-960h-la/checkpoint-21431", + "epoch": 1.0, + "global_step": 21431, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000001e-07, + "loss": 2.5681, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000002e-07, + "loss": 3.2419, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 3.0000000000000004e-07, + "loss": 3.4492, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 4.0000000000000003e-07, + "loss": 3.6105, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 5.000000000000001e-07, + "loss": 3.2818, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-07, + "loss": 3.3872, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 7.000000000000001e-07, + "loss": 3.4249, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-07, + "loss": 3.4935, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 9.000000000000001e-07, + "loss": 3.5878, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 3.3044, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.1e-06, + "loss": 2.1617, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 1.2000000000000002e-06, + "loss": 2.8407, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.3e-06, + "loss": 2.6913, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 1.4000000000000001e-06, + "loss": 3.1341, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-06, + "loss": 2.6384, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 1.6000000000000001e-06, + "loss": 2.3875, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 1.7000000000000002e-06, + "loss": 2.6405, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 1.8000000000000001e-06, + "loss": 2.6826, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1.9000000000000002e-06, + "loss": 2.6435, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.9612, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.1000000000000002e-06, + "loss": 1.6984, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 2.2e-06, + "loss": 1.6943, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.3000000000000004e-06, + "loss": 1.9857, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 2.4000000000000003e-06, + "loss": 2.0135, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-06, + "loss": 1.9529, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 2.6e-06, + "loss": 2.1156, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.7000000000000004e-06, + "loss": 1.8468, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-06, + "loss": 1.9046, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 2.9e-06, + "loss": 2.0311, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 2.0449, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 3.1000000000000004e-06, + "loss": 1.3589, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 3.2000000000000003e-06, + "loss": 1.2662, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 3.3000000000000006e-06, + "loss": 1.1506, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 3.4000000000000005e-06, + "loss": 1.3545, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 3.5e-06, + "loss": 1.4116, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 3.6000000000000003e-06, + "loss": 1.402, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 3.7e-06, + "loss": 1.5328, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 3.8000000000000005e-06, + "loss": 1.475, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 3.900000000000001e-06, + "loss": 1.6633, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 2.138, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.1e-06, + "loss": 1.2597, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 4.2000000000000004e-06, + "loss": 1.0349, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 4.3e-06, + "loss": 1.0808, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 4.4e-06, + "loss": 1.044, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 4.5e-06, + "loss": 1.2246, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 4.600000000000001e-06, + "loss": 1.112, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 4.7e-06, + "loss": 1.3231, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 4.800000000000001e-06, + "loss": 1.5087, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 4.9000000000000005e-06, + "loss": 1.5727, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 1.7501, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 5.1e-06, + "loss": 1.1905, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 5.2e-06, + "loss": 0.9608, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 5.300000000000001e-06, + "loss": 1.1615, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 5.400000000000001e-06, + "loss": 1.0021, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 5.500000000000001e-06, + "loss": 0.9685, + "step": 275 + }, + { + "epoch": 0.01, + "learning_rate": 5.600000000000001e-06, + "loss": 1.2904, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 5.7e-06, + "loss": 1.2705, + "step": 285 + }, + { + "epoch": 0.01, + "learning_rate": 5.8e-06, + "loss": 1.4223, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 5.9e-06, + "loss": 1.5338, + "step": 295 + }, + { + "epoch": 0.01, + "learning_rate": 6e-06, + "loss": 1.7456, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 6.1e-06, + "loss": 1.1443, + "step": 305 + }, + { + "epoch": 0.01, + "learning_rate": 6.200000000000001e-06, + "loss": 0.8596, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 6.300000000000001e-06, + "loss": 0.9641, + "step": 315 + }, + { + "epoch": 0.01, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.9228, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 6.5000000000000004e-06, + "loss": 1.0683, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 6.600000000000001e-06, + "loss": 0.9611, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 6.700000000000001e-06, + "loss": 1.2087, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 6.800000000000001e-06, + "loss": 1.1336, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 6.9e-06, + "loss": 1.422, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 7e-06, + "loss": 1.8331, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 7.100000000000001e-06, + "loss": 1.0522, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 7.2000000000000005e-06, + "loss": 0.8421, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 7.3e-06, + "loss": 0.7651, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 7.4e-06, + "loss": 0.9904, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 7.500000000000001e-06, + "loss": 0.8832, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 7.600000000000001e-06, + "loss": 0.9663, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 7.7e-06, + "loss": 1.115, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 7.800000000000002e-06, + "loss": 1.1533, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 7.9e-06, + "loss": 1.2886, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 2.0128, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 8.1e-06, + "loss": 0.9795, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 8.2e-06, + "loss": 0.7312, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 8.3e-06, + "loss": 0.7858, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 8.400000000000001e-06, + "loss": 0.8727, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 8.5e-06, + "loss": 0.8088, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 8.6e-06, + "loss": 0.9188, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 8.700000000000001e-06, + "loss": 1.1715, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 8.8e-06, + "loss": 1.3011, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 8.900000000000001e-06, + "loss": 1.3686, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 9e-06, + "loss": 1.5265, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 9.100000000000001e-06, + "loss": 0.8523, + "step": 455 + }, + { + "epoch": 0.02, + "learning_rate": 9.200000000000002e-06, + "loss": 0.6656, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 9.3e-06, + "loss": 0.8027, + "step": 465 + }, + { + "epoch": 0.02, + "learning_rate": 9.4e-06, + "loss": 0.6912, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 9.5e-06, + "loss": 0.8886, + "step": 475 + }, + { + "epoch": 0.02, + "learning_rate": 9.600000000000001e-06, + "loss": 0.8875, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 9.7e-06, + "loss": 0.9732, + "step": 485 + }, + { + "epoch": 0.02, + "learning_rate": 9.800000000000001e-06, + "loss": 1.051, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 9.9e-06, + "loss": 1.1972, + "step": 495 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 1.5824, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 1.0100000000000002e-05, + "loss": 0.9922, + "step": 505 + }, + { + "epoch": 0.02, + "learning_rate": 1.02e-05, + "loss": 0.6785, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.0300000000000001e-05, + "loss": 0.6366, + "step": 515 + }, + { + "epoch": 0.02, + "learning_rate": 1.04e-05, + "loss": 0.7643, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.8867, + "step": 525 + }, + { + "epoch": 0.02, + "learning_rate": 1.0600000000000002e-05, + "loss": 0.7378, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 1.0700000000000001e-05, + "loss": 0.8769, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 1.0800000000000002e-05, + "loss": 1.0011, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 1.0900000000000002e-05, + "loss": 1.1308, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 1.1000000000000001e-05, + "loss": 1.6817, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 1.1100000000000002e-05, + "loss": 0.8498, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.6052, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 1.13e-05, + "loss": 0.6782, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 1.14e-05, + "loss": 0.6536, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 1.15e-05, + "loss": 0.7006, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 1.16e-05, + "loss": 0.9329, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 1.17e-05, + "loss": 0.7805, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 1.18e-05, + "loss": 0.872, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 1.1900000000000001e-05, + "loss": 0.8867, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 1.2e-05, + "loss": 1.3013, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 1.2100000000000001e-05, + "loss": 0.9019, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 1.22e-05, + "loss": 0.5602, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 1.23e-05, + "loss": 0.632, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 1.2400000000000002e-05, + "loss": 0.6756, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 1.25e-05, + "loss": 0.7106, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 1.2600000000000001e-05, + "loss": 0.7885, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 1.27e-05, + "loss": 0.8643, + "step": 635 + }, + { + "epoch": 0.03, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.953, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 1.2900000000000002e-05, + "loss": 1.2641, + "step": 645 + }, + { + "epoch": 0.03, + "learning_rate": 1.3000000000000001e-05, + "loss": 1.4198, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 1.3100000000000002e-05, + "loss": 0.82, + "step": 655 + }, + { + "epoch": 0.03, + "learning_rate": 1.3200000000000002e-05, + "loss": 0.6065, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.5797, + "step": 665 + }, + { + "epoch": 0.03, + "learning_rate": 1.3400000000000002e-05, + "loss": 0.7074, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.5688, + "step": 675 + }, + { + "epoch": 0.03, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.6267, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 1.3700000000000003e-05, + "loss": 0.9009, + "step": 685 + }, + { + "epoch": 0.03, + "learning_rate": 1.38e-05, + "loss": 0.86, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.39e-05, + "loss": 0.9638, + "step": 695 + }, + { + "epoch": 0.03, + "learning_rate": 1.4e-05, + "loss": 1.6872, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 1.41e-05, + "loss": 0.7373, + "step": 705 + }, + { + "epoch": 0.03, + "learning_rate": 1.4200000000000001e-05, + "loss": 0.5019, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 1.43e-05, + "loss": 0.6072, + "step": 715 + }, + { + "epoch": 0.03, + "learning_rate": 1.4400000000000001e-05, + "loss": 0.6512, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.45e-05, + "loss": 0.6652, + "step": 725 + }, + { + "epoch": 0.03, + "learning_rate": 1.46e-05, + "loss": 0.7577, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 1.4700000000000002e-05, + "loss": 1.0377, + "step": 735 + }, + { + "epoch": 0.03, + "learning_rate": 1.48e-05, + "loss": 0.8039, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 1.4900000000000001e-05, + "loss": 0.9909, + "step": 745 + }, + { + "epoch": 0.03, + "learning_rate": 1.5000000000000002e-05, + "loss": 1.2967, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 1.5100000000000001e-05, + "loss": 0.7075, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 1.5200000000000002e-05, + "loss": 0.535, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 1.5300000000000003e-05, + "loss": 0.5943, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 1.54e-05, + "loss": 0.7975, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 1.55e-05, + "loss": 0.606, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 1.5600000000000003e-05, + "loss": 0.748, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 1.5700000000000002e-05, + "loss": 0.714, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 1.58e-05, + "loss": 0.9922, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 1.5900000000000004e-05, + "loss": 0.9972, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.2826, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 1.6100000000000002e-05, + "loss": 0.5911, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 1.62e-05, + "loss": 0.5172, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 1.63e-05, + "loss": 0.455, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 1.64e-05, + "loss": 0.6032, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 1.65e-05, + "loss": 0.6292, + "step": 825 + }, + { + "epoch": 0.04, + "learning_rate": 1.66e-05, + "loss": 0.658, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 1.67e-05, + "loss": 0.7158, + "step": 835 + }, + { + "epoch": 0.04, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.8783, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 1.69e-05, + "loss": 0.9988, + "step": 845 + }, + { + "epoch": 0.04, + "learning_rate": 1.7e-05, + "loss": 1.1596, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 1.7100000000000002e-05, + "loss": 0.7145, + "step": 855 + }, + { + "epoch": 0.04, + "learning_rate": 1.72e-05, + "loss": 0.4501, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 1.73e-05, + "loss": 0.5174, + "step": 865 + }, + { + "epoch": 0.04, + "learning_rate": 1.7400000000000003e-05, + "loss": 0.5951, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.7152, + "step": 875 + }, + { + "epoch": 0.04, + "learning_rate": 1.76e-05, + "loss": 0.6649, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 1.77e-05, + "loss": 0.6626, + "step": 885 + }, + { + "epoch": 0.04, + "learning_rate": 1.7800000000000002e-05, + "loss": 0.7571, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 1.79e-05, + "loss": 1.1449, + "step": 895 + }, + { + "epoch": 0.04, + "learning_rate": 1.8e-05, + "loss": 1.3764, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 1.8100000000000003e-05, + "loss": 0.7015, + "step": 905 + }, + { + "epoch": 0.04, + "learning_rate": 1.8200000000000002e-05, + "loss": 0.4139, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 1.83e-05, + "loss": 0.5709, + "step": 915 + }, + { + "epoch": 0.04, + "learning_rate": 1.8400000000000003e-05, + "loss": 0.4689, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 1.8500000000000002e-05, + "loss": 0.5459, + "step": 925 + }, + { + "epoch": 0.04, + "learning_rate": 1.86e-05, + "loss": 0.6105, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.8700000000000004e-05, + "loss": 0.7417, + "step": 935 + }, + { + "epoch": 0.04, + "learning_rate": 1.88e-05, + "loss": 0.7044, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 1.8900000000000002e-05, + "loss": 0.8603, + "step": 945 + }, + { + "epoch": 0.04, + "learning_rate": 1.9e-05, + "loss": 1.2372, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 1.91e-05, + "loss": 0.6774, + "step": 955 + }, + { + "epoch": 0.04, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.3458, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 1.93e-05, + "loss": 0.4924, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 1.94e-05, + "loss": 0.4699, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 1.95e-05, + "loss": 0.5591, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.7706, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 1.97e-05, + "loss": 0.6164, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 1.98e-05, + "loss": 0.6035, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 1.9900000000000003e-05, + "loss": 0.8199, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 2e-05, + "loss": 1.3823, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 1.999953119872486e-05, + "loss": 0.6102, + "step": 1005 + }, + { + "epoch": 0.05, + "learning_rate": 1.999906239744972e-05, + "loss": 0.3826, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 1.999859359617458e-05, + "loss": 0.4675, + "step": 1015 + }, + { + "epoch": 0.05, + "learning_rate": 1.999812479489944e-05, + "loss": 0.4567, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 1.9997655993624305e-05, + "loss": 0.6292, + "step": 1025 + }, + { + "epoch": 0.05, + "learning_rate": 1.9997187192349165e-05, + "loss": 0.6357, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996718391074025e-05, + "loss": 0.666, + "step": 1035 + }, + { + "epoch": 0.05, + "learning_rate": 1.9996249589798888e-05, + "loss": 0.6685, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 1.9995780788523748e-05, + "loss": 0.8939, + "step": 1045 + }, + { + "epoch": 0.05, + "learning_rate": 1.9995311987248608e-05, + "loss": 1.0384, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994843185973468e-05, + "loss": 0.4313, + "step": 1055 + }, + { + "epoch": 0.05, + "learning_rate": 1.9994374384698328e-05, + "loss": 0.3909, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 1.9993905583423188e-05, + "loss": 0.4668, + "step": 1065 + }, + { + "epoch": 0.05, + "learning_rate": 1.999343678214805e-05, + "loss": 0.4648, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 1.999296798087291e-05, + "loss": 0.5502, + "step": 1075 + }, + { + "epoch": 0.05, + "learning_rate": 1.999249917959777e-05, + "loss": 0.5675, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 1.999203037832263e-05, + "loss": 0.7048, + "step": 1085 + }, + { + "epoch": 0.05, + "learning_rate": 1.999156157704749e-05, + "loss": 0.7512, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 1.999109277577235e-05, + "loss": 0.91, + "step": 1095 + }, + { + "epoch": 0.05, + "learning_rate": 1.999062397449721e-05, + "loss": 1.4923, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 1.9990155173222074e-05, + "loss": 0.593, + "step": 1105 + }, + { + "epoch": 0.05, + "learning_rate": 1.9989686371946934e-05, + "loss": 0.412, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 1.9989217570671794e-05, + "loss": 0.4295, + "step": 1115 + }, + { + "epoch": 0.05, + "learning_rate": 1.9988748769396657e-05, + "loss": 0.5324, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 1.9988279968121517e-05, + "loss": 0.5146, + "step": 1125 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987811166846377e-05, + "loss": 0.6231, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987342365571237e-05, + "loss": 0.6736, + "step": 1135 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986873564296097e-05, + "loss": 0.7737, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986404763020957e-05, + "loss": 0.796, + "step": 1145 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985935961745817e-05, + "loss": 1.3116, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985467160470677e-05, + "loss": 0.5599, + "step": 1155 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984998359195536e-05, + "loss": 0.3027, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 1.99845295579204e-05, + "loss": 0.4646, + "step": 1165 + }, + { + "epoch": 0.05, + "learning_rate": 1.998406075664526e-05, + "loss": 0.5406, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 1.998359195537012e-05, + "loss": 0.5461, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 1.998312315409498e-05, + "loss": 0.6237, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 1.9982654352819843e-05, + "loss": 0.6277, + "step": 1185 + }, + { + "epoch": 0.06, + "learning_rate": 1.9982185551544703e-05, + "loss": 0.6304, + "step": 1190 + }, + { + "epoch": 0.06, + "learning_rate": 1.9981716750269563e-05, + "loss": 0.8886, + "step": 1195 + }, + { + "epoch": 0.06, + "learning_rate": 1.9981247948994423e-05, + "loss": 1.1062, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 1.9980779147719283e-05, + "loss": 0.597, + "step": 1205 + }, + { + "epoch": 0.06, + "learning_rate": 1.9980310346444146e-05, + "loss": 0.3313, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 1.9979841545169006e-05, + "loss": 0.3761, + "step": 1215 + }, + { + "epoch": 0.06, + "learning_rate": 1.9979372743893866e-05, + "loss": 0.4524, + "step": 1220 + }, + { + "epoch": 0.06, + "learning_rate": 1.9978903942618726e-05, + "loss": 0.5068, + "step": 1225 + }, + { + "epoch": 0.06, + "learning_rate": 1.9978435141343586e-05, + "loss": 0.5899, + "step": 1230 + }, + { + "epoch": 0.06, + "learning_rate": 1.9977966340068446e-05, + "loss": 0.5973, + "step": 1235 + }, + { + "epoch": 0.06, + "learning_rate": 1.9977497538793306e-05, + "loss": 0.7986, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 1.9977028737518165e-05, + "loss": 0.749, + "step": 1245 + }, + { + "epoch": 0.06, + "learning_rate": 1.997655993624303e-05, + "loss": 1.074, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 1.997609113496789e-05, + "loss": 0.511, + "step": 1255 + }, + { + "epoch": 0.06, + "learning_rate": 1.997562233369275e-05, + "loss": 0.3073, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 1.9975153532417612e-05, + "loss": 0.3776, + "step": 1265 + }, + { + "epoch": 0.06, + "learning_rate": 1.9974684731142472e-05, + "loss": 0.458, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 1.9974215929867332e-05, + "loss": 0.4236, + "step": 1275 + }, + { + "epoch": 0.06, + "learning_rate": 1.9973747128592192e-05, + "loss": 0.53, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 1.9973278327317052e-05, + "loss": 0.5216, + "step": 1285 + }, + { + "epoch": 0.06, + "learning_rate": 1.997280952604191e-05, + "loss": 0.8459, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 1.997234072476677e-05, + "loss": 1.0025, + "step": 1295 + }, + { + "epoch": 0.06, + "learning_rate": 1.997187192349163e-05, + "loss": 1.0659, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 1.9971403122216495e-05, + "loss": 0.5928, + "step": 1305 + }, + { + "epoch": 0.06, + "learning_rate": 1.9970934320941355e-05, + "loss": 0.2835, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 1.9970465519666215e-05, + "loss": 0.4103, + "step": 1315 + }, + { + "epoch": 0.06, + "learning_rate": 1.9969996718391075e-05, + "loss": 0.424, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 1.9969527917115935e-05, + "loss": 0.505, + "step": 1325 + }, + { + "epoch": 0.06, + "learning_rate": 1.9969059115840798e-05, + "loss": 0.5272, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 1.9968590314565658e-05, + "loss": 0.5859, + "step": 1335 + }, + { + "epoch": 0.06, + "learning_rate": 1.9968121513290518e-05, + "loss": 0.7147, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967652712015378e-05, + "loss": 0.6935, + "step": 1345 + }, + { + "epoch": 0.06, + "learning_rate": 1.996718391074024e-05, + "loss": 1.0448, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 1.99667151094651e-05, + "loss": 0.5598, + "step": 1355 + }, + { + "epoch": 0.06, + "learning_rate": 1.996624630818996e-05, + "loss": 0.2887, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 1.996577750691482e-05, + "loss": 0.3524, + "step": 1365 + }, + { + "epoch": 0.06, + "learning_rate": 1.996530870563968e-05, + "loss": 0.3648, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 1.996483990436454e-05, + "loss": 0.5021, + "step": 1375 + }, + { + "epoch": 0.06, + "learning_rate": 1.99643711030894e-05, + "loss": 0.5777, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 1.996390230181426e-05, + "loss": 0.5879, + "step": 1385 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963433500539124e-05, + "loss": 0.6235, + "step": 1390 + }, + { + "epoch": 0.07, + "learning_rate": 1.9962964699263984e-05, + "loss": 0.5928, + "step": 1395 + }, + { + "epoch": 0.07, + "learning_rate": 1.9962495897988844e-05, + "loss": 1.172, + "step": 1400 + }, + { + "epoch": 0.07, + "learning_rate": 1.9962027096713704e-05, + "loss": 0.5087, + "step": 1405 + }, + { + "epoch": 0.07, + "learning_rate": 1.9961558295438567e-05, + "loss": 0.3315, + "step": 1410 + }, + { + "epoch": 0.07, + "learning_rate": 1.9961089494163427e-05, + "loss": 0.405, + "step": 1415 + }, + { + "epoch": 0.07, + "learning_rate": 1.9960620692888287e-05, + "loss": 0.407, + "step": 1420 + }, + { + "epoch": 0.07, + "learning_rate": 1.9960151891613147e-05, + "loss": 0.5505, + "step": 1425 + }, + { + "epoch": 0.07, + "learning_rate": 1.9959683090338007e-05, + "loss": 0.5691, + "step": 1430 + }, + { + "epoch": 0.07, + "learning_rate": 1.9959214289062867e-05, + "loss": 0.502, + "step": 1435 + }, + { + "epoch": 0.07, + "learning_rate": 1.9958745487787727e-05, + "loss": 0.8324, + "step": 1440 + }, + { + "epoch": 0.07, + "learning_rate": 1.995827668651259e-05, + "loss": 0.6685, + "step": 1445 + }, + { + "epoch": 0.07, + "learning_rate": 1.995780788523745e-05, + "loss": 1.0913, + "step": 1450 + }, + { + "epoch": 0.07, + "learning_rate": 1.995733908396231e-05, + "loss": 0.4889, + "step": 1455 + }, + { + "epoch": 0.07, + "learning_rate": 1.995687028268717e-05, + "loss": 0.3823, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 1.995640148141203e-05, + "loss": 0.4376, + "step": 1465 + }, + { + "epoch": 0.07, + "learning_rate": 1.9955932680136893e-05, + "loss": 0.4689, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 1.9955463878861753e-05, + "loss": 0.4048, + "step": 1475 + }, + { + "epoch": 0.07, + "learning_rate": 1.9954995077586613e-05, + "loss": 0.5842, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 1.9954526276311473e-05, + "loss": 0.5643, + "step": 1485 + }, + { + "epoch": 0.07, + "learning_rate": 1.9954057475036336e-05, + "loss": 0.8457, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 1.9953588673761196e-05, + "loss": 0.5716, + "step": 1495 + }, + { + "epoch": 0.07, + "learning_rate": 1.9953119872486056e-05, + "loss": 1.1056, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 1.9952651071210916e-05, + "loss": 0.4777, + "step": 1505 + }, + { + "epoch": 0.07, + "learning_rate": 1.9952182269935776e-05, + "loss": 0.277, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 1.9951713468660636e-05, + "loss": 0.3154, + "step": 1515 + }, + { + "epoch": 0.07, + "learning_rate": 1.9951244667385496e-05, + "loss": 0.4537, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 1.9950775866110356e-05, + "loss": 0.517, + "step": 1525 + }, + { + "epoch": 0.07, + "learning_rate": 1.9950307064835216e-05, + "loss": 0.5247, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 1.994983826356008e-05, + "loss": 0.4861, + "step": 1535 + }, + { + "epoch": 0.07, + "learning_rate": 1.994936946228494e-05, + "loss": 0.5592, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 1.99489006610098e-05, + "loss": 0.9036, + "step": 1545 + }, + { + "epoch": 0.07, + "learning_rate": 1.9948431859734662e-05, + "loss": 1.0261, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 1.9947963058459522e-05, + "loss": 0.5694, + "step": 1555 + }, + { + "epoch": 0.07, + "learning_rate": 1.9947494257184382e-05, + "loss": 0.2699, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 1.9947025455909242e-05, + "loss": 0.3443, + "step": 1565 + }, + { + "epoch": 0.07, + "learning_rate": 1.9946556654634102e-05, + "loss": 0.563, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 1.9946087853358962e-05, + "loss": 0.3772, + "step": 1575 + }, + { + "epoch": 0.07, + "learning_rate": 1.9945619052083822e-05, + "loss": 0.5209, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 1.9945150250808685e-05, + "loss": 0.4973, + "step": 1585 + }, + { + "epoch": 0.07, + "learning_rate": 1.9944681449533545e-05, + "loss": 0.4882, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 1.9944212648258405e-05, + "loss": 0.7922, + "step": 1595 + }, + { + "epoch": 0.07, + "learning_rate": 1.9943743846983265e-05, + "loss": 0.7772, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 1.9943275045708125e-05, + "loss": 0.555, + "step": 1605 + }, + { + "epoch": 0.08, + "learning_rate": 1.9942806244432985e-05, + "loss": 0.4321, + "step": 1610 + }, + { + "epoch": 0.08, + "learning_rate": 1.9942337443157848e-05, + "loss": 0.4047, + "step": 1615 + }, + { + "epoch": 0.08, + "learning_rate": 1.9941868641882708e-05, + "loss": 0.352, + "step": 1620 + }, + { + "epoch": 0.08, + "learning_rate": 1.9941399840607568e-05, + "loss": 0.3804, + "step": 1625 + }, + { + "epoch": 0.08, + "learning_rate": 1.994093103933243e-05, + "loss": 0.4649, + "step": 1630 + }, + { + "epoch": 0.08, + "learning_rate": 1.994046223805729e-05, + "loss": 0.5527, + "step": 1635 + }, + { + "epoch": 0.08, + "learning_rate": 1.993999343678215e-05, + "loss": 0.7, + "step": 1640 + }, + { + "epoch": 0.08, + "learning_rate": 1.993952463550701e-05, + "loss": 0.7865, + "step": 1645 + }, + { + "epoch": 0.08, + "learning_rate": 1.993905583423187e-05, + "loss": 1.0613, + "step": 1650 + }, + { + "epoch": 0.08, + "learning_rate": 1.993858703295673e-05, + "loss": 0.533, + "step": 1655 + }, + { + "epoch": 0.08, + "learning_rate": 1.993811823168159e-05, + "loss": 0.2746, + "step": 1660 + }, + { + "epoch": 0.08, + "learning_rate": 1.993764943040645e-05, + "loss": 0.3082, + "step": 1665 + }, + { + "epoch": 0.08, + "learning_rate": 1.993718062913131e-05, + "loss": 0.4, + "step": 1670 + }, + { + "epoch": 0.08, + "learning_rate": 1.9936711827856174e-05, + "loss": 0.4761, + "step": 1675 + }, + { + "epoch": 0.08, + "learning_rate": 1.9936243026581034e-05, + "loss": 0.4022, + "step": 1680 + }, + { + "epoch": 0.08, + "learning_rate": 1.9935774225305894e-05, + "loss": 0.6181, + "step": 1685 + }, + { + "epoch": 0.08, + "learning_rate": 1.9935305424030754e-05, + "loss": 0.6173, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 1.9934836622755617e-05, + "loss": 0.6797, + "step": 1695 + }, + { + "epoch": 0.08, + "learning_rate": 1.9934367821480477e-05, + "loss": 1.1921, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 1.9933899020205337e-05, + "loss": 0.5224, + "step": 1705 + }, + { + "epoch": 0.08, + "learning_rate": 1.9933430218930197e-05, + "loss": 0.3617, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 1.9932961417655057e-05, + "loss": 0.3178, + "step": 1715 + }, + { + "epoch": 0.08, + "learning_rate": 1.993249261637992e-05, + "loss": 0.4298, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 1.993202381510478e-05, + "loss": 0.4292, + "step": 1725 + }, + { + "epoch": 0.08, + "learning_rate": 1.993155501382964e-05, + "loss": 0.4192, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 1.99310862125545e-05, + "loss": 0.5256, + "step": 1735 + }, + { + "epoch": 0.08, + "learning_rate": 1.993061741127936e-05, + "loss": 0.5433, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 1.993014861000422e-05, + "loss": 0.7419, + "step": 1745 + }, + { + "epoch": 0.08, + "learning_rate": 1.992967980872908e-05, + "loss": 1.1098, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 1.992921100745394e-05, + "loss": 0.4264, + "step": 1755 + }, + { + "epoch": 0.08, + "learning_rate": 1.9928742206178803e-05, + "loss": 0.2677, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 1.9928273404903663e-05, + "loss": 0.3569, + "step": 1765 + }, + { + "epoch": 0.08, + "learning_rate": 1.9927804603628523e-05, + "loss": 0.3524, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 1.9927335802353386e-05, + "loss": 0.469, + "step": 1775 + }, + { + "epoch": 0.08, + "learning_rate": 1.9926867001078246e-05, + "loss": 0.3399, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 1.9926398199803106e-05, + "loss": 0.583, + "step": 1785 + }, + { + "epoch": 0.08, + "learning_rate": 1.9925929398527966e-05, + "loss": 0.5959, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 1.9925460597252826e-05, + "loss": 0.7883, + "step": 1795 + }, + { + "epoch": 0.08, + "learning_rate": 1.9924991795977686e-05, + "loss": 0.8624, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 1.9924522994702546e-05, + "loss": 0.4495, + "step": 1805 + }, + { + "epoch": 0.08, + "learning_rate": 1.9924054193427406e-05, + "loss": 0.2398, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 1.992358539215227e-05, + "loss": 0.285, + "step": 1815 + }, + { + "epoch": 0.08, + "learning_rate": 1.992311659087713e-05, + "loss": 0.3657, + "step": 1820 + }, + { + "epoch": 0.09, + "learning_rate": 1.992264778960199e-05, + "loss": 0.4166, + "step": 1825 + }, + { + "epoch": 0.09, + "learning_rate": 1.992217898832685e-05, + "loss": 0.4724, + "step": 1830 + }, + { + "epoch": 0.09, + "learning_rate": 1.992171018705171e-05, + "loss": 0.5657, + "step": 1835 + }, + { + "epoch": 0.09, + "learning_rate": 1.9921241385776572e-05, + "loss": 0.4912, + "step": 1840 + }, + { + "epoch": 0.09, + "learning_rate": 1.9920772584501432e-05, + "loss": 0.8323, + "step": 1845 + }, + { + "epoch": 0.09, + "learning_rate": 1.9920303783226292e-05, + "loss": 1.2386, + "step": 1850 + }, + { + "epoch": 0.09, + "learning_rate": 1.9919834981951152e-05, + "loss": 0.481, + "step": 1855 + }, + { + "epoch": 0.09, + "learning_rate": 1.9919366180676015e-05, + "loss": 0.2647, + "step": 1860 + }, + { + "epoch": 0.09, + "learning_rate": 1.9918897379400875e-05, + "loss": 0.3251, + "step": 1865 + }, + { + "epoch": 0.09, + "learning_rate": 1.9918428578125735e-05, + "loss": 0.4379, + "step": 1870 + }, + { + "epoch": 0.09, + "learning_rate": 1.9917959776850595e-05, + "loss": 0.3494, + "step": 1875 + }, + { + "epoch": 0.09, + "learning_rate": 1.9917490975575455e-05, + "loss": 0.4675, + "step": 1880 + }, + { + "epoch": 0.09, + "learning_rate": 1.9917022174300315e-05, + "loss": 0.5453, + "step": 1885 + }, + { + "epoch": 0.09, + "learning_rate": 1.9916553373025175e-05, + "loss": 0.7557, + "step": 1890 + }, + { + "epoch": 0.09, + "learning_rate": 1.9916084571750035e-05, + "loss": 0.8384, + "step": 1895 + }, + { + "epoch": 0.09, + "learning_rate": 1.9915615770474898e-05, + "loss": 0.9389, + "step": 1900 + }, + { + "epoch": 0.09, + "learning_rate": 1.9915146969199758e-05, + "loss": 0.595, + "step": 1905 + }, + { + "epoch": 0.09, + "learning_rate": 1.9914678167924618e-05, + "loss": 0.2589, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 1.9914209366649478e-05, + "loss": 0.2753, + "step": 1915 + }, + { + "epoch": 0.09, + "learning_rate": 1.991374056537434e-05, + "loss": 0.327, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 1.99132717640992e-05, + "loss": 0.4068, + "step": 1925 + }, + { + "epoch": 0.09, + "learning_rate": 1.991280296282406e-05, + "loss": 0.3982, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 1.991233416154892e-05, + "loss": 0.4792, + "step": 1935 + }, + { + "epoch": 0.09, + "learning_rate": 1.991186536027378e-05, + "loss": 0.5322, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 1.991139655899864e-05, + "loss": 0.5524, + "step": 1945 + }, + { + "epoch": 0.09, + "learning_rate": 1.99109277577235e-05, + "loss": 0.7718, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 1.9910458956448364e-05, + "loss": 0.4615, + "step": 1955 + }, + { + "epoch": 0.09, + "learning_rate": 1.9909990155173224e-05, + "loss": 0.23, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 1.9909521353898084e-05, + "loss": 0.3236, + "step": 1965 + }, + { + "epoch": 0.09, + "learning_rate": 1.9909052552622944e-05, + "loss": 0.3613, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 1.9908583751347804e-05, + "loss": 0.4707, + "step": 1975 + }, + { + "epoch": 0.09, + "learning_rate": 1.9908114950072667e-05, + "loss": 0.4895, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 1.9907646148797527e-05, + "loss": 0.4998, + "step": 1985 + }, + { + "epoch": 0.09, + "learning_rate": 1.9907177347522387e-05, + "loss": 0.4744, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 1.9906708546247247e-05, + "loss": 0.6763, + "step": 1995 + }, + { + "epoch": 0.09, + "learning_rate": 1.990623974497211e-05, + "loss": 0.8183, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 1.990577094369697e-05, + "loss": 0.4009, + "step": 2005 + }, + { + "epoch": 0.09, + "learning_rate": 1.990530214242183e-05, + "loss": 0.2849, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 1.990483334114669e-05, + "loss": 0.3281, + "step": 2015 + }, + { + "epoch": 0.09, + "learning_rate": 1.990436453987155e-05, + "loss": 0.2439, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 1.990389573859641e-05, + "loss": 0.4259, + "step": 2025 + }, + { + "epoch": 0.09, + "learning_rate": 1.990342693732127e-05, + "loss": 0.4708, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 1.990295813604613e-05, + "loss": 0.4431, + "step": 2035 + }, + { + "epoch": 0.1, + "learning_rate": 1.990248933477099e-05, + "loss": 0.6112, + "step": 2040 + }, + { + "epoch": 0.1, + "learning_rate": 1.9902020533495853e-05, + "loss": 0.7258, + "step": 2045 + }, + { + "epoch": 0.1, + "learning_rate": 1.9901551732220713e-05, + "loss": 1.1696, + "step": 2050 + }, + { + "epoch": 0.1, + "learning_rate": 1.9901082930945573e-05, + "loss": 0.4031, + "step": 2055 + }, + { + "epoch": 0.1, + "learning_rate": 1.9900614129670436e-05, + "loss": 0.2141, + "step": 2060 + }, + { + "epoch": 0.1, + "learning_rate": 1.9900145328395296e-05, + "loss": 0.2085, + "step": 2065 + }, + { + "epoch": 0.1, + "learning_rate": 1.9899676527120156e-05, + "loss": 0.3854, + "step": 2070 + }, + { + "epoch": 0.1, + "learning_rate": 1.9899207725845016e-05, + "loss": 0.447, + "step": 2075 + }, + { + "epoch": 0.1, + "learning_rate": 1.9898738924569876e-05, + "loss": 0.4033, + "step": 2080 + }, + { + "epoch": 0.1, + "learning_rate": 1.9898270123294736e-05, + "loss": 0.4619, + "step": 2085 + }, + { + "epoch": 0.1, + "learning_rate": 1.9897801322019596e-05, + "loss": 0.4808, + "step": 2090 + }, + { + "epoch": 0.1, + "learning_rate": 1.989733252074446e-05, + "loss": 0.7021, + "step": 2095 + }, + { + "epoch": 0.1, + "learning_rate": 1.989686371946932e-05, + "loss": 1.0386, + "step": 2100 + }, + { + "epoch": 0.1, + "learning_rate": 1.989639491819418e-05, + "loss": 0.4082, + "step": 2105 + }, + { + "epoch": 0.1, + "learning_rate": 1.989592611691904e-05, + "loss": 0.2731, + "step": 2110 + }, + { + "epoch": 0.1, + "learning_rate": 1.98954573156439e-05, + "loss": 0.3498, + "step": 2115 + }, + { + "epoch": 0.1, + "learning_rate": 1.989498851436876e-05, + "loss": 0.3212, + "step": 2120 + }, + { + "epoch": 0.1, + "learning_rate": 1.9894519713093622e-05, + "loss": 0.3869, + "step": 2125 + }, + { + "epoch": 0.1, + "learning_rate": 1.9894050911818482e-05, + "loss": 0.3923, + "step": 2130 + }, + { + "epoch": 0.1, + "learning_rate": 1.9893582110543342e-05, + "loss": 0.4625, + "step": 2135 + }, + { + "epoch": 0.1, + "learning_rate": 1.9893113309268205e-05, + "loss": 0.5782, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 1.9892644507993065e-05, + "loss": 0.845, + "step": 2145 + }, + { + "epoch": 0.1, + "learning_rate": 1.9892175706717925e-05, + "loss": 1.0402, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 1.9891706905442785e-05, + "loss": 0.4479, + "step": 2155 + }, + { + "epoch": 0.1, + "learning_rate": 1.9891238104167645e-05, + "loss": 0.2284, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 1.9890769302892505e-05, + "loss": 0.2668, + "step": 2165 + }, + { + "epoch": 0.1, + "learning_rate": 1.9890300501617365e-05, + "loss": 0.3342, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 1.9889831700342225e-05, + "loss": 0.4317, + "step": 2175 + }, + { + "epoch": 0.1, + "learning_rate": 1.9889362899067085e-05, + "loss": 0.4107, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 1.9888894097791948e-05, + "loss": 0.3555, + "step": 2185 + }, + { + "epoch": 0.1, + "learning_rate": 1.9888425296516808e-05, + "loss": 0.5225, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 1.9887956495241668e-05, + "loss": 0.9319, + "step": 2195 + }, + { + "epoch": 0.1, + "learning_rate": 1.9887487693966528e-05, + "loss": 0.9103, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 1.988701889269139e-05, + "loss": 0.5407, + "step": 2205 + }, + { + "epoch": 0.1, + "learning_rate": 1.988655009141625e-05, + "loss": 0.2933, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 1.988608129014111e-05, + "loss": 0.3158, + "step": 2215 + }, + { + "epoch": 0.1, + "learning_rate": 1.988561248886597e-05, + "loss": 0.3756, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 1.988514368759083e-05, + "loss": 0.3151, + "step": 2225 + }, + { + "epoch": 0.1, + "learning_rate": 1.988467488631569e-05, + "loss": 0.3484, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 1.9884206085040554e-05, + "loss": 0.514, + "step": 2235 + }, + { + "epoch": 0.1, + "learning_rate": 1.9883737283765414e-05, + "loss": 0.4266, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 1.9883268482490274e-05, + "loss": 0.7312, + "step": 2245 + }, + { + "epoch": 0.1, + "learning_rate": 1.9882799681215134e-05, + "loss": 0.7081, + "step": 2250 + }, + { + "epoch": 0.11, + "learning_rate": 1.9882330879939994e-05, + "loss": 0.4765, + "step": 2255 + }, + { + "epoch": 0.11, + "learning_rate": 1.9881862078664854e-05, + "loss": 0.2093, + "step": 2260 + }, + { + "epoch": 0.11, + "learning_rate": 1.9881393277389714e-05, + "loss": 0.3321, + "step": 2265 + }, + { + "epoch": 0.11, + "learning_rate": 1.9880924476114577e-05, + "loss": 0.3887, + "step": 2270 + }, + { + "epoch": 0.11, + "learning_rate": 1.9880455674839437e-05, + "loss": 0.3139, + "step": 2275 + }, + { + "epoch": 0.11, + "learning_rate": 1.9879986873564297e-05, + "loss": 0.3986, + "step": 2280 + }, + { + "epoch": 0.11, + "learning_rate": 1.987951807228916e-05, + "loss": 0.502, + "step": 2285 + }, + { + "epoch": 0.11, + "learning_rate": 1.987904927101402e-05, + "loss": 0.4883, + "step": 2290 + }, + { + "epoch": 0.11, + "learning_rate": 1.987858046973888e-05, + "loss": 0.5289, + "step": 2295 + }, + { + "epoch": 0.11, + "learning_rate": 1.987811166846374e-05, + "loss": 1.2296, + "step": 2300 + }, + { + "epoch": 0.11, + "learning_rate": 1.98776428671886e-05, + "loss": 0.4979, + "step": 2305 + }, + { + "epoch": 0.11, + "learning_rate": 1.987717406591346e-05, + "loss": 0.1878, + "step": 2310 + }, + { + "epoch": 0.11, + "learning_rate": 1.987670526463832e-05, + "loss": 0.3966, + "step": 2315 + }, + { + "epoch": 0.11, + "learning_rate": 1.987623646336318e-05, + "loss": 0.331, + "step": 2320 + }, + { + "epoch": 0.11, + "learning_rate": 1.9875767662088043e-05, + "loss": 0.3299, + "step": 2325 + }, + { + "epoch": 0.11, + "learning_rate": 1.9875298860812903e-05, + "loss": 0.3091, + "step": 2330 + }, + { + "epoch": 0.11, + "learning_rate": 1.9874830059537763e-05, + "loss": 0.4212, + "step": 2335 + }, + { + "epoch": 0.11, + "learning_rate": 1.9874361258262623e-05, + "loss": 0.3994, + "step": 2340 + }, + { + "epoch": 0.11, + "learning_rate": 1.9873892456987483e-05, + "loss": 0.6617, + "step": 2345 + }, + { + "epoch": 0.11, + "learning_rate": 1.9873423655712346e-05, + "loss": 0.7071, + "step": 2350 + }, + { + "epoch": 0.11, + "learning_rate": 1.9872954854437206e-05, + "loss": 0.3459, + "step": 2355 + }, + { + "epoch": 0.11, + "learning_rate": 1.9872486053162066e-05, + "loss": 0.1769, + "step": 2360 + }, + { + "epoch": 0.11, + "learning_rate": 1.9872017251886926e-05, + "loss": 0.3423, + "step": 2365 + }, + { + "epoch": 0.11, + "learning_rate": 1.987154845061179e-05, + "loss": 0.4013, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 1.987107964933665e-05, + "loss": 0.368, + "step": 2375 + }, + { + "epoch": 0.11, + "learning_rate": 1.987061084806151e-05, + "loss": 0.3948, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 1.987014204678637e-05, + "loss": 0.3829, + "step": 2385 + }, + { + "epoch": 0.11, + "learning_rate": 1.986967324551123e-05, + "loss": 0.3998, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 1.986920444423609e-05, + "loss": 0.5129, + "step": 2395 + }, + { + "epoch": 0.11, + "learning_rate": 1.986873564296095e-05, + "loss": 1.1603, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 1.986826684168581e-05, + "loss": 0.4495, + "step": 2405 + }, + { + "epoch": 0.11, + "learning_rate": 1.9867798040410672e-05, + "loss": 0.2745, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 1.9867329239135532e-05, + "loss": 0.3008, + "step": 2415 + }, + { + "epoch": 0.11, + "learning_rate": 1.9866860437860392e-05, + "loss": 0.3176, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 1.9866391636585256e-05, + "loss": 0.3361, + "step": 2425 + }, + { + "epoch": 0.11, + "learning_rate": 1.9865922835310115e-05, + "loss": 0.4109, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 1.9865454034034975e-05, + "loss": 0.4295, + "step": 2435 + }, + { + "epoch": 0.11, + "learning_rate": 1.9864985232759835e-05, + "loss": 0.6172, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 1.9864516431484695e-05, + "loss": 0.5372, + "step": 2445 + }, + { + "epoch": 0.11, + "learning_rate": 1.9864047630209555e-05, + "loss": 1.0839, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 1.9863578828934415e-05, + "loss": 0.5672, + "step": 2455 + }, + { + "epoch": 0.11, + "learning_rate": 1.9863110027659275e-05, + "loss": 0.2294, + "step": 2460 + }, + { + "epoch": 0.12, + "learning_rate": 1.986264122638414e-05, + "loss": 0.4235, + "step": 2465 + }, + { + "epoch": 0.12, + "learning_rate": 1.9862172425109e-05, + "loss": 0.3189, + "step": 2470 + }, + { + "epoch": 0.12, + "learning_rate": 1.9861703623833858e-05, + "loss": 0.2703, + "step": 2475 + }, + { + "epoch": 0.12, + "learning_rate": 1.9861234822558718e-05, + "loss": 0.4522, + "step": 2480 + }, + { + "epoch": 0.12, + "learning_rate": 1.9860766021283578e-05, + "loss": 0.648, + "step": 2485 + }, + { + "epoch": 0.12, + "learning_rate": 1.986029722000844e-05, + "loss": 0.4643, + "step": 2490 + }, + { + "epoch": 0.12, + "learning_rate": 1.98598284187333e-05, + "loss": 0.5613, + "step": 2495 + }, + { + "epoch": 0.12, + "learning_rate": 1.985935961745816e-05, + "loss": 0.9657, + "step": 2500 + }, + { + "epoch": 0.12, + "learning_rate": 1.985889081618302e-05, + "loss": 0.3885, + "step": 2505 + }, + { + "epoch": 0.12, + "learning_rate": 1.9858422014907885e-05, + "loss": 0.252, + "step": 2510 + }, + { + "epoch": 0.12, + "learning_rate": 1.9857953213632744e-05, + "loss": 0.3157, + "step": 2515 + }, + { + "epoch": 0.12, + "learning_rate": 1.9857484412357604e-05, + "loss": 0.331, + "step": 2520 + }, + { + "epoch": 0.12, + "learning_rate": 1.9857015611082464e-05, + "loss": 0.3348, + "step": 2525 + }, + { + "epoch": 0.12, + "learning_rate": 1.9856546809807324e-05, + "loss": 0.4366, + "step": 2530 + }, + { + "epoch": 0.12, + "learning_rate": 1.9856078008532184e-05, + "loss": 0.5167, + "step": 2535 + }, + { + "epoch": 0.12, + "learning_rate": 1.9855609207257044e-05, + "loss": 0.5484, + "step": 2540 + }, + { + "epoch": 0.12, + "learning_rate": 1.9855140405981904e-05, + "loss": 0.682, + "step": 2545 + }, + { + "epoch": 0.12, + "learning_rate": 1.9854671604706764e-05, + "loss": 0.9292, + "step": 2550 + }, + { + "epoch": 0.12, + "learning_rate": 1.9854202803431627e-05, + "loss": 0.2851, + "step": 2555 + }, + { + "epoch": 0.12, + "learning_rate": 1.9853734002156487e-05, + "loss": 0.319, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 1.9853265200881347e-05, + "loss": 0.3735, + "step": 2565 + }, + { + "epoch": 0.12, + "learning_rate": 1.985279639960621e-05, + "loss": 0.3128, + "step": 2570 + }, + { + "epoch": 0.12, + "learning_rate": 1.985232759833107e-05, + "loss": 0.3411, + "step": 2575 + }, + { + "epoch": 0.12, + "learning_rate": 1.985185879705593e-05, + "loss": 0.4413, + "step": 2580 + }, + { + "epoch": 0.12, + "learning_rate": 1.985138999578079e-05, + "loss": 0.5069, + "step": 2585 + }, + { + "epoch": 0.12, + "learning_rate": 1.985092119450565e-05, + "loss": 0.6175, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 1.985045239323051e-05, + "loss": 0.5128, + "step": 2595 + }, + { + "epoch": 0.12, + "learning_rate": 1.984998359195537e-05, + "loss": 0.7707, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 1.9849514790680233e-05, + "loss": 0.388, + "step": 2605 + }, + { + "epoch": 0.12, + "learning_rate": 1.9849045989405093e-05, + "loss": 0.2559, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 1.9848577188129953e-05, + "loss": 0.3089, + "step": 2615 + }, + { + "epoch": 0.12, + "learning_rate": 1.9848108386854813e-05, + "loss": 0.2978, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 1.9847639585579673e-05, + "loss": 0.2533, + "step": 2625 + }, + { + "epoch": 0.12, + "learning_rate": 1.9847170784304533e-05, + "loss": 0.3758, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 1.9846701983029396e-05, + "loss": 0.2824, + "step": 2635 + }, + { + "epoch": 0.12, + "learning_rate": 1.9846233181754256e-05, + "loss": 0.4697, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 1.9845764380479116e-05, + "loss": 0.4773, + "step": 2645 + }, + { + "epoch": 0.12, + "learning_rate": 1.984529557920398e-05, + "loss": 0.9044, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 1.984482677792884e-05, + "loss": 0.3903, + "step": 2655 + }, + { + "epoch": 0.12, + "learning_rate": 1.98443579766537e-05, + "loss": 0.268, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 1.984388917537856e-05, + "loss": 0.205, + "step": 2665 + }, + { + "epoch": 0.12, + "learning_rate": 1.984342037410342e-05, + "loss": 0.3019, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 1.984295157282828e-05, + "loss": 0.4408, + "step": 2675 + }, + { + "epoch": 0.13, + "learning_rate": 1.984248277155314e-05, + "loss": 0.3562, + "step": 2680 + }, + { + "epoch": 0.13, + "learning_rate": 1.9842013970278e-05, + "loss": 0.4502, + "step": 2685 + }, + { + "epoch": 0.13, + "learning_rate": 1.984154516900286e-05, + "loss": 0.6325, + "step": 2690 + }, + { + "epoch": 0.13, + "learning_rate": 1.9841076367727722e-05, + "loss": 0.734, + "step": 2695 + }, + { + "epoch": 0.13, + "learning_rate": 1.9840607566452582e-05, + "loss": 0.825, + "step": 2700 + }, + { + "epoch": 0.13, + "learning_rate": 1.9840138765177442e-05, + "loss": 0.4505, + "step": 2705 + }, + { + "epoch": 0.13, + "learning_rate": 1.9839669963902302e-05, + "loss": 0.2048, + "step": 2710 + }, + { + "epoch": 0.13, + "learning_rate": 1.9839201162627166e-05, + "loss": 0.2263, + "step": 2715 + }, + { + "epoch": 0.13, + "learning_rate": 1.9838732361352025e-05, + "loss": 0.2975, + "step": 2720 + }, + { + "epoch": 0.13, + "learning_rate": 1.9838263560076885e-05, + "loss": 0.3431, + "step": 2725 + }, + { + "epoch": 0.13, + "learning_rate": 1.9837794758801745e-05, + "loss": 0.4268, + "step": 2730 + }, + { + "epoch": 0.13, + "learning_rate": 1.9837325957526605e-05, + "loss": 0.4063, + "step": 2735 + }, + { + "epoch": 0.13, + "learning_rate": 1.9836857156251465e-05, + "loss": 0.4808, + "step": 2740 + }, + { + "epoch": 0.13, + "learning_rate": 1.983638835497633e-05, + "loss": 0.6248, + "step": 2745 + }, + { + "epoch": 0.13, + "learning_rate": 1.983591955370119e-05, + "loss": 0.9324, + "step": 2750 + }, + { + "epoch": 0.13, + "learning_rate": 1.983545075242605e-05, + "loss": 0.4698, + "step": 2755 + }, + { + "epoch": 0.13, + "learning_rate": 1.983498195115091e-05, + "loss": 0.2148, + "step": 2760 + }, + { + "epoch": 0.13, + "learning_rate": 1.9834513149875768e-05, + "loss": 0.2496, + "step": 2765 + }, + { + "epoch": 0.13, + "learning_rate": 1.9834044348600628e-05, + "loss": 0.3243, + "step": 2770 + }, + { + "epoch": 0.13, + "learning_rate": 1.983357554732549e-05, + "loss": 0.312, + "step": 2775 + }, + { + "epoch": 0.13, + "learning_rate": 1.983310674605035e-05, + "loss": 0.3524, + "step": 2780 + }, + { + "epoch": 0.13, + "learning_rate": 1.983263794477521e-05, + "loss": 0.4036, + "step": 2785 + }, + { + "epoch": 0.13, + "learning_rate": 1.983216914350007e-05, + "loss": 0.4246, + "step": 2790 + }, + { + "epoch": 0.13, + "learning_rate": 1.9831700342224935e-05, + "loss": 0.6055, + "step": 2795 + }, + { + "epoch": 0.13, + "learning_rate": 1.9831231540949795e-05, + "loss": 0.6449, + "step": 2800 + }, + { + "epoch": 0.13, + "learning_rate": 1.9830762739674655e-05, + "loss": 0.3477, + "step": 2805 + }, + { + "epoch": 0.13, + "learning_rate": 1.9830293938399514e-05, + "loss": 0.3349, + "step": 2810 + }, + { + "epoch": 0.13, + "learning_rate": 1.9829825137124374e-05, + "loss": 0.2735, + "step": 2815 + }, + { + "epoch": 0.13, + "learning_rate": 1.9829356335849234e-05, + "loss": 0.294, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 1.9828887534574094e-05, + "loss": 0.3747, + "step": 2825 + }, + { + "epoch": 0.13, + "learning_rate": 1.9828418733298954e-05, + "loss": 0.3981, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 1.9827949932023818e-05, + "loss": 0.5341, + "step": 2835 + }, + { + "epoch": 0.13, + "learning_rate": 1.9827481130748677e-05, + "loss": 0.4484, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 1.9827012329473537e-05, + "loss": 0.6313, + "step": 2845 + }, + { + "epoch": 0.13, + "learning_rate": 1.9826543528198397e-05, + "loss": 0.8526, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 1.982607472692326e-05, + "loss": 0.365, + "step": 2855 + }, + { + "epoch": 0.13, + "learning_rate": 1.982560592564812e-05, + "loss": 0.2652, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 1.982513712437298e-05, + "loss": 0.239, + "step": 2865 + }, + { + "epoch": 0.13, + "learning_rate": 1.982466832309784e-05, + "loss": 0.3384, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 1.98241995218227e-05, + "loss": 0.3417, + "step": 2875 + }, + { + "epoch": 0.13, + "learning_rate": 1.982373072054756e-05, + "loss": 0.348, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 1.9823261919272424e-05, + "loss": 0.3826, + "step": 2885 + }, + { + "epoch": 0.13, + "learning_rate": 1.9822793117997284e-05, + "loss": 0.4936, + "step": 2890 + }, + { + "epoch": 0.14, + "learning_rate": 1.9822324316722143e-05, + "loss": 0.5807, + "step": 2895 + }, + { + "epoch": 0.14, + "learning_rate": 1.9821855515447003e-05, + "loss": 1.0115, + "step": 2900 + }, + { + "epoch": 0.14, + "learning_rate": 1.9821386714171863e-05, + "loss": 0.3353, + "step": 2905 + }, + { + "epoch": 0.14, + "learning_rate": 1.9820917912896723e-05, + "loss": 0.2495, + "step": 2910 + }, + { + "epoch": 0.14, + "learning_rate": 1.9820449111621583e-05, + "loss": 0.2535, + "step": 2915 + }, + { + "epoch": 0.14, + "learning_rate": 1.9819980310346447e-05, + "loss": 0.3094, + "step": 2920 + }, + { + "epoch": 0.14, + "learning_rate": 1.9819511509071306e-05, + "loss": 0.2988, + "step": 2925 + }, + { + "epoch": 0.14, + "learning_rate": 1.9819042707796166e-05, + "loss": 0.3186, + "step": 2930 + }, + { + "epoch": 0.14, + "learning_rate": 1.981857390652103e-05, + "loss": 0.4027, + "step": 2935 + }, + { + "epoch": 0.14, + "learning_rate": 1.981810510524589e-05, + "loss": 0.4593, + "step": 2940 + }, + { + "epoch": 0.14, + "learning_rate": 1.981763630397075e-05, + "loss": 0.6236, + "step": 2945 + }, + { + "epoch": 0.14, + "learning_rate": 1.981716750269561e-05, + "loss": 0.9071, + "step": 2950 + }, + { + "epoch": 0.14, + "learning_rate": 1.981669870142047e-05, + "loss": 0.4223, + "step": 2955 + }, + { + "epoch": 0.14, + "learning_rate": 1.981622990014533e-05, + "loss": 0.197, + "step": 2960 + }, + { + "epoch": 0.14, + "learning_rate": 1.981576109887019e-05, + "loss": 0.298, + "step": 2965 + }, + { + "epoch": 0.14, + "learning_rate": 1.981529229759505e-05, + "loss": 0.3526, + "step": 2970 + }, + { + "epoch": 0.14, + "learning_rate": 1.9814823496319913e-05, + "loss": 0.4274, + "step": 2975 + }, + { + "epoch": 0.14, + "learning_rate": 1.9814354695044773e-05, + "loss": 0.3132, + "step": 2980 + }, + { + "epoch": 0.14, + "learning_rate": 1.9813885893769632e-05, + "loss": 0.4685, + "step": 2985 + }, + { + "epoch": 0.14, + "learning_rate": 1.9813417092494492e-05, + "loss": 0.5327, + "step": 2990 + }, + { + "epoch": 0.14, + "learning_rate": 1.9812948291219352e-05, + "loss": 0.5887, + "step": 2995 + }, + { + "epoch": 0.14, + "learning_rate": 1.9812479489944216e-05, + "loss": 0.8516, + "step": 3000 + }, + { + "epoch": 0.14, + "learning_rate": 1.9812010688669076e-05, + "loss": 0.3981, + "step": 3005 + }, + { + "epoch": 0.14, + "learning_rate": 1.9811541887393936e-05, + "loss": 0.2334, + "step": 3010 + }, + { + "epoch": 0.14, + "learning_rate": 1.9811073086118795e-05, + "loss": 0.2776, + "step": 3015 + }, + { + "epoch": 0.14, + "learning_rate": 1.981060428484366e-05, + "loss": 0.2129, + "step": 3020 + }, + { + "epoch": 0.14, + "learning_rate": 1.981013548356852e-05, + "loss": 0.3171, + "step": 3025 + }, + { + "epoch": 0.14, + "learning_rate": 1.980966668229338e-05, + "loss": 0.3339, + "step": 3030 + }, + { + "epoch": 0.14, + "learning_rate": 1.980919788101824e-05, + "loss": 0.4108, + "step": 3035 + }, + { + "epoch": 0.14, + "learning_rate": 1.98087290797431e-05, + "loss": 0.4631, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 1.980826027846796e-05, + "loss": 0.5093, + "step": 3045 + }, + { + "epoch": 0.14, + "learning_rate": 1.980779147719282e-05, + "loss": 0.8253, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 1.980732267591768e-05, + "loss": 0.5317, + "step": 3055 + }, + { + "epoch": 0.14, + "learning_rate": 1.9806853874642538e-05, + "loss": 0.2176, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 1.98063850733674e-05, + "loss": 0.2664, + "step": 3065 + }, + { + "epoch": 0.14, + "learning_rate": 1.980591627209226e-05, + "loss": 0.2988, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 1.980544747081712e-05, + "loss": 0.3123, + "step": 3075 + }, + { + "epoch": 0.14, + "learning_rate": 1.9804978669541985e-05, + "loss": 0.3984, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 1.9804509868266845e-05, + "loss": 0.4429, + "step": 3085 + }, + { + "epoch": 0.14, + "learning_rate": 1.9804041066991705e-05, + "loss": 0.4721, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 1.9803572265716565e-05, + "loss": 0.608, + "step": 3095 + }, + { + "epoch": 0.14, + "learning_rate": 1.9803103464441424e-05, + "loss": 0.7198, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 1.9802634663166284e-05, + "loss": 0.4533, + "step": 3105 + }, + { + "epoch": 0.15, + "learning_rate": 1.9802165861891144e-05, + "loss": 0.2521, + "step": 3110 + }, + { + "epoch": 0.15, + "learning_rate": 1.9801697060616008e-05, + "loss": 0.3518, + "step": 3115 + }, + { + "epoch": 0.15, + "learning_rate": 1.9801228259340868e-05, + "loss": 0.3765, + "step": 3120 + }, + { + "epoch": 0.15, + "learning_rate": 1.9800759458065728e-05, + "loss": 0.4222, + "step": 3125 + }, + { + "epoch": 0.15, + "learning_rate": 1.9800290656790587e-05, + "loss": 0.3578, + "step": 3130 + }, + { + "epoch": 0.15, + "learning_rate": 1.9799821855515447e-05, + "loss": 0.3973, + "step": 3135 + }, + { + "epoch": 0.15, + "learning_rate": 1.9799353054240307e-05, + "loss": 0.4282, + "step": 3140 + }, + { + "epoch": 0.15, + "learning_rate": 1.979888425296517e-05, + "loss": 0.5091, + "step": 3145 + }, + { + "epoch": 0.15, + "learning_rate": 1.979841545169003e-05, + "loss": 1.0688, + "step": 3150 + }, + { + "epoch": 0.15, + "learning_rate": 1.979794665041489e-05, + "loss": 0.4779, + "step": 3155 + }, + { + "epoch": 0.15, + "learning_rate": 1.9797477849139754e-05, + "loss": 0.2231, + "step": 3160 + }, + { + "epoch": 0.15, + "learning_rate": 1.9797009047864614e-05, + "loss": 0.2028, + "step": 3165 + }, + { + "epoch": 0.15, + "learning_rate": 1.9796540246589474e-05, + "loss": 0.2761, + "step": 3170 + }, + { + "epoch": 0.15, + "learning_rate": 1.9796071445314334e-05, + "loss": 0.4209, + "step": 3175 + }, + { + "epoch": 0.15, + "learning_rate": 1.9795602644039194e-05, + "loss": 0.3276, + "step": 3180 + }, + { + "epoch": 0.15, + "learning_rate": 1.9795133842764054e-05, + "loss": 0.4656, + "step": 3185 + }, + { + "epoch": 0.15, + "learning_rate": 1.9794665041488913e-05, + "loss": 0.4872, + "step": 3190 + }, + { + "epoch": 0.15, + "learning_rate": 1.9794196240213773e-05, + "loss": 0.573, + "step": 3195 + }, + { + "epoch": 0.15, + "learning_rate": 1.9793727438938633e-05, + "loss": 0.9714, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 1.9793258637663497e-05, + "loss": 0.449, + "step": 3205 + }, + { + "epoch": 0.15, + "learning_rate": 1.9792789836388357e-05, + "loss": 0.2523, + "step": 3210 + }, + { + "epoch": 0.15, + "learning_rate": 1.9792321035113217e-05, + "loss": 0.2467, + "step": 3215 + }, + { + "epoch": 0.15, + "learning_rate": 1.9791852233838076e-05, + "loss": 0.2898, + "step": 3220 + }, + { + "epoch": 0.15, + "learning_rate": 1.979138343256294e-05, + "loss": 0.3189, + "step": 3225 + }, + { + "epoch": 0.15, + "learning_rate": 1.97909146312878e-05, + "loss": 0.4171, + "step": 3230 + }, + { + "epoch": 0.15, + "learning_rate": 1.979044583001266e-05, + "loss": 0.4367, + "step": 3235 + }, + { + "epoch": 0.15, + "learning_rate": 1.978997702873752e-05, + "loss": 0.4654, + "step": 3240 + }, + { + "epoch": 0.15, + "learning_rate": 1.978950822746238e-05, + "loss": 0.5939, + "step": 3245 + }, + { + "epoch": 0.15, + "learning_rate": 1.978903942618724e-05, + "loss": 1.0234, + "step": 3250 + }, + { + "epoch": 0.15, + "learning_rate": 1.9788570624912103e-05, + "loss": 0.4174, + "step": 3255 + }, + { + "epoch": 0.15, + "learning_rate": 1.9788101823636963e-05, + "loss": 0.1468, + "step": 3260 + }, + { + "epoch": 0.15, + "learning_rate": 1.9787633022361823e-05, + "loss": 0.2742, + "step": 3265 + }, + { + "epoch": 0.15, + "learning_rate": 1.9787164221086683e-05, + "loss": 0.3599, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 1.9786695419811542e-05, + "loss": 0.2513, + "step": 3275 + }, + { + "epoch": 0.15, + "learning_rate": 1.9786226618536402e-05, + "loss": 0.2906, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 1.9785757817261266e-05, + "loss": 0.4572, + "step": 3285 + }, + { + "epoch": 0.15, + "learning_rate": 1.9785289015986126e-05, + "loss": 0.3669, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 1.9784820214710986e-05, + "loss": 0.5854, + "step": 3295 + }, + { + "epoch": 0.15, + "learning_rate": 1.9784351413435846e-05, + "loss": 0.8587, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 1.978388261216071e-05, + "loss": 0.3697, + "step": 3305 + }, + { + "epoch": 0.15, + "learning_rate": 1.978341381088557e-05, + "loss": 0.1952, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 1.978294500961043e-05, + "loss": 0.252, + "step": 3315 + }, + { + "epoch": 0.15, + "learning_rate": 1.978247620833529e-05, + "loss": 0.3353, + "step": 3320 + }, + { + "epoch": 0.16, + "learning_rate": 1.978200740706015e-05, + "loss": 0.3767, + "step": 3325 + }, + { + "epoch": 0.16, + "learning_rate": 1.978153860578501e-05, + "loss": 0.3057, + "step": 3330 + }, + { + "epoch": 0.16, + "learning_rate": 1.978106980450987e-05, + "loss": 0.4199, + "step": 3335 + }, + { + "epoch": 0.16, + "learning_rate": 1.978060100323473e-05, + "loss": 0.4672, + "step": 3340 + }, + { + "epoch": 0.16, + "learning_rate": 1.9780132201959592e-05, + "loss": 0.5891, + "step": 3345 + }, + { + "epoch": 0.16, + "learning_rate": 1.977966340068445e-05, + "loss": 1.0011, + "step": 3350 + }, + { + "epoch": 0.16, + "learning_rate": 1.977919459940931e-05, + "loss": 0.3955, + "step": 3355 + }, + { + "epoch": 0.16, + "learning_rate": 1.977872579813417e-05, + "loss": 0.2046, + "step": 3360 + }, + { + "epoch": 0.16, + "learning_rate": 1.9778256996859035e-05, + "loss": 0.1842, + "step": 3365 + }, + { + "epoch": 0.16, + "learning_rate": 1.9777788195583895e-05, + "loss": 0.3045, + "step": 3370 + }, + { + "epoch": 0.16, + "learning_rate": 1.9777319394308755e-05, + "loss": 0.3251, + "step": 3375 + }, + { + "epoch": 0.16, + "learning_rate": 1.9776850593033615e-05, + "loss": 0.4542, + "step": 3380 + }, + { + "epoch": 0.16, + "learning_rate": 1.9776381791758475e-05, + "loss": 0.428, + "step": 3385 + }, + { + "epoch": 0.16, + "learning_rate": 1.9775912990483335e-05, + "loss": 0.602, + "step": 3390 + }, + { + "epoch": 0.16, + "learning_rate": 1.9775444189208198e-05, + "loss": 0.472, + "step": 3395 + }, + { + "epoch": 0.16, + "learning_rate": 1.9774975387933058e-05, + "loss": 0.8672, + "step": 3400 + }, + { + "epoch": 0.16, + "learning_rate": 1.9774506586657918e-05, + "loss": 0.3989, + "step": 3405 + }, + { + "epoch": 0.16, + "learning_rate": 1.9774037785382778e-05, + "loss": 0.1915, + "step": 3410 + }, + { + "epoch": 0.16, + "learning_rate": 1.9773568984107638e-05, + "loss": 0.222, + "step": 3415 + }, + { + "epoch": 0.16, + "learning_rate": 1.9773100182832498e-05, + "loss": 0.2807, + "step": 3420 + }, + { + "epoch": 0.16, + "learning_rate": 1.9772631381557357e-05, + "loss": 0.369, + "step": 3425 + }, + { + "epoch": 0.16, + "learning_rate": 1.977216258028222e-05, + "loss": 0.4569, + "step": 3430 + }, + { + "epoch": 0.16, + "learning_rate": 1.977169377900708e-05, + "loss": 0.3986, + "step": 3435 + }, + { + "epoch": 0.16, + "learning_rate": 1.977122497773194e-05, + "loss": 0.5065, + "step": 3440 + }, + { + "epoch": 0.16, + "learning_rate": 1.9770756176456804e-05, + "loss": 0.5453, + "step": 3445 + }, + { + "epoch": 0.16, + "learning_rate": 1.9770287375181664e-05, + "loss": 0.879, + "step": 3450 + }, + { + "epoch": 0.16, + "learning_rate": 1.9769818573906524e-05, + "loss": 0.4329, + "step": 3455 + }, + { + "epoch": 0.16, + "learning_rate": 1.9769349772631384e-05, + "loss": 0.2126, + "step": 3460 + }, + { + "epoch": 0.16, + "learning_rate": 1.9768880971356244e-05, + "loss": 0.3154, + "step": 3465 + }, + { + "epoch": 0.16, + "learning_rate": 1.9768412170081104e-05, + "loss": 0.314, + "step": 3470 + }, + { + "epoch": 0.16, + "learning_rate": 1.9767943368805964e-05, + "loss": 0.3373, + "step": 3475 + }, + { + "epoch": 0.16, + "learning_rate": 1.9767474567530823e-05, + "loss": 0.2566, + "step": 3480 + }, + { + "epoch": 0.16, + "learning_rate": 1.9767005766255687e-05, + "loss": 0.4568, + "step": 3485 + }, + { + "epoch": 0.16, + "learning_rate": 1.9766536964980547e-05, + "loss": 0.3373, + "step": 3490 + }, + { + "epoch": 0.16, + "learning_rate": 1.9766068163705407e-05, + "loss": 0.5181, + "step": 3495 + }, + { + "epoch": 0.16, + "learning_rate": 1.9765599362430267e-05, + "loss": 0.8484, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9765130561155127e-05, + "loss": 0.3833, + "step": 3505 + }, + { + "epoch": 0.16, + "learning_rate": 1.976466175987999e-05, + "loss": 0.1497, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 1.976419295860485e-05, + "loss": 0.235, + "step": 3515 + }, + { + "epoch": 0.16, + "learning_rate": 1.976372415732971e-05, + "loss": 0.2564, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 1.976325535605457e-05, + "loss": 0.2755, + "step": 3525 + }, + { + "epoch": 0.16, + "learning_rate": 1.976278655477943e-05, + "loss": 0.416, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 1.9762317753504293e-05, + "loss": 0.4785, + "step": 3535 + }, + { + "epoch": 0.17, + "learning_rate": 1.9761848952229153e-05, + "loss": 0.5485, + "step": 3540 + }, + { + "epoch": 0.17, + "learning_rate": 1.9761380150954013e-05, + "loss": 0.6251, + "step": 3545 + }, + { + "epoch": 0.17, + "learning_rate": 1.9760911349678873e-05, + "loss": 0.8421, + "step": 3550 + }, + { + "epoch": 0.17, + "learning_rate": 1.9760442548403733e-05, + "loss": 0.3436, + "step": 3555 + }, + { + "epoch": 0.17, + "learning_rate": 1.9759973747128593e-05, + "loss": 0.1759, + "step": 3560 + }, + { + "epoch": 0.17, + "learning_rate": 1.9759504945853453e-05, + "loss": 0.2988, + "step": 3565 + }, + { + "epoch": 0.17, + "learning_rate": 1.9759036144578312e-05, + "loss": 0.2328, + "step": 3570 + }, + { + "epoch": 0.17, + "learning_rate": 1.9758567343303176e-05, + "loss": 0.3053, + "step": 3575 + }, + { + "epoch": 0.17, + "learning_rate": 1.9758098542028036e-05, + "loss": 0.3479, + "step": 3580 + }, + { + "epoch": 0.17, + "learning_rate": 1.9757629740752896e-05, + "loss": 0.377, + "step": 3585 + }, + { + "epoch": 0.17, + "learning_rate": 1.975716093947776e-05, + "loss": 0.4114, + "step": 3590 + }, + { + "epoch": 0.17, + "learning_rate": 1.975669213820262e-05, + "loss": 0.488, + "step": 3595 + }, + { + "epoch": 0.17, + "learning_rate": 1.975622333692748e-05, + "loss": 0.5475, + "step": 3600 + }, + { + "epoch": 0.17, + "learning_rate": 1.975575453565234e-05, + "loss": 0.5718, + "step": 3605 + }, + { + "epoch": 0.17, + "learning_rate": 1.97552857343772e-05, + "loss": 0.1882, + "step": 3610 + }, + { + "epoch": 0.17, + "learning_rate": 1.975481693310206e-05, + "loss": 0.2593, + "step": 3615 + }, + { + "epoch": 0.17, + "learning_rate": 1.975434813182692e-05, + "loss": 0.2558, + "step": 3620 + }, + { + "epoch": 0.17, + "learning_rate": 1.9753879330551782e-05, + "loss": 0.2394, + "step": 3625 + }, + { + "epoch": 0.17, + "learning_rate": 1.9753410529276642e-05, + "loss": 0.4207, + "step": 3630 + }, + { + "epoch": 0.17, + "learning_rate": 1.9752941728001502e-05, + "loss": 0.4391, + "step": 3635 + }, + { + "epoch": 0.17, + "learning_rate": 1.9752472926726362e-05, + "loss": 0.7291, + "step": 3640 + }, + { + "epoch": 0.17, + "learning_rate": 1.975200412545122e-05, + "loss": 0.4494, + "step": 3645 + }, + { + "epoch": 0.17, + "learning_rate": 1.975153532417608e-05, + "loss": 0.7069, + "step": 3650 + }, + { + "epoch": 0.17, + "learning_rate": 1.9751066522900945e-05, + "loss": 0.3945, + "step": 3655 + }, + { + "epoch": 0.17, + "learning_rate": 1.9750597721625805e-05, + "loss": 0.2034, + "step": 3660 + }, + { + "epoch": 0.17, + "learning_rate": 1.9750128920350665e-05, + "loss": 0.2243, + "step": 3665 + }, + { + "epoch": 0.17, + "learning_rate": 1.9749660119075528e-05, + "loss": 0.2732, + "step": 3670 + }, + { + "epoch": 0.17, + "learning_rate": 1.9749191317800388e-05, + "loss": 0.3621, + "step": 3675 + }, + { + "epoch": 0.17, + "learning_rate": 1.9748722516525248e-05, + "loss": 0.3214, + "step": 3680 + }, + { + "epoch": 0.17, + "learning_rate": 1.9748253715250108e-05, + "loss": 0.5254, + "step": 3685 + }, + { + "epoch": 0.17, + "learning_rate": 1.9747784913974968e-05, + "loss": 0.5203, + "step": 3690 + }, + { + "epoch": 0.17, + "learning_rate": 1.9747316112699828e-05, + "loss": 0.7242, + "step": 3695 + }, + { + "epoch": 0.17, + "learning_rate": 1.9746847311424688e-05, + "loss": 0.83, + "step": 3700 + }, + { + "epoch": 0.17, + "learning_rate": 1.9746378510149548e-05, + "loss": 0.3907, + "step": 3705 + }, + { + "epoch": 0.17, + "learning_rate": 1.9745909708874408e-05, + "loss": 0.1778, + "step": 3710 + }, + { + "epoch": 0.17, + "learning_rate": 1.974544090759927e-05, + "loss": 0.2049, + "step": 3715 + }, + { + "epoch": 0.17, + "learning_rate": 1.974497210632413e-05, + "loss": 0.3739, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 1.974450330504899e-05, + "loss": 0.3008, + "step": 3725 + }, + { + "epoch": 0.17, + "learning_rate": 1.974403450377385e-05, + "loss": 0.2455, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 1.9743565702498714e-05, + "loss": 0.4178, + "step": 3735 + }, + { + "epoch": 0.17, + "learning_rate": 1.9743096901223574e-05, + "loss": 0.5493, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 1.9742628099948434e-05, + "loss": 0.4286, + "step": 3745 + }, + { + "epoch": 0.17, + "learning_rate": 1.9742159298673294e-05, + "loss": 0.6876, + "step": 3750 + }, + { + "epoch": 0.18, + "learning_rate": 1.9741690497398154e-05, + "loss": 0.3451, + "step": 3755 + }, + { + "epoch": 0.18, + "learning_rate": 1.9741221696123014e-05, + "loss": 0.1612, + "step": 3760 + }, + { + "epoch": 0.18, + "learning_rate": 1.9740752894847877e-05, + "loss": 0.2758, + "step": 3765 + }, + { + "epoch": 0.18, + "learning_rate": 1.9740284093572737e-05, + "loss": 0.2768, + "step": 3770 + }, + { + "epoch": 0.18, + "learning_rate": 1.9739815292297597e-05, + "loss": 0.3298, + "step": 3775 + }, + { + "epoch": 0.18, + "learning_rate": 1.9739346491022457e-05, + "loss": 0.3231, + "step": 3780 + }, + { + "epoch": 0.18, + "learning_rate": 1.9738877689747317e-05, + "loss": 0.3359, + "step": 3785 + }, + { + "epoch": 0.18, + "learning_rate": 1.9738408888472177e-05, + "loss": 0.3687, + "step": 3790 + }, + { + "epoch": 0.18, + "learning_rate": 1.973794008719704e-05, + "loss": 0.5007, + "step": 3795 + }, + { + "epoch": 0.18, + "learning_rate": 1.97374712859219e-05, + "loss": 0.9462, + "step": 3800 + }, + { + "epoch": 0.18, + "learning_rate": 1.973700248464676e-05, + "loss": 0.4636, + "step": 3805 + }, + { + "epoch": 0.18, + "learning_rate": 1.973653368337162e-05, + "loss": 0.1924, + "step": 3810 + }, + { + "epoch": 0.18, + "learning_rate": 1.9736064882096483e-05, + "loss": 0.1923, + "step": 3815 + }, + { + "epoch": 0.18, + "learning_rate": 1.9735596080821343e-05, + "loss": 0.2264, + "step": 3820 + }, + { + "epoch": 0.18, + "learning_rate": 1.9735127279546203e-05, + "loss": 0.3736, + "step": 3825 + }, + { + "epoch": 0.18, + "learning_rate": 1.9734658478271063e-05, + "loss": 0.4152, + "step": 3830 + }, + { + "epoch": 0.18, + "learning_rate": 1.9734189676995923e-05, + "loss": 0.4174, + "step": 3835 + }, + { + "epoch": 0.18, + "learning_rate": 1.9733720875720783e-05, + "loss": 0.4885, + "step": 3840 + }, + { + "epoch": 0.18, + "learning_rate": 1.9733252074445643e-05, + "loss": 0.4311, + "step": 3845 + }, + { + "epoch": 0.18, + "learning_rate": 1.9732783273170503e-05, + "loss": 0.6807, + "step": 3850 + }, + { + "epoch": 0.18, + "learning_rate": 1.9732314471895363e-05, + "loss": 0.4367, + "step": 3855 + }, + { + "epoch": 0.18, + "learning_rate": 1.9731845670620226e-05, + "loss": 0.1888, + "step": 3860 + }, + { + "epoch": 0.18, + "learning_rate": 1.9731376869345086e-05, + "loss": 0.2237, + "step": 3865 + }, + { + "epoch": 0.18, + "learning_rate": 1.9730908068069946e-05, + "loss": 0.2441, + "step": 3870 + }, + { + "epoch": 0.18, + "learning_rate": 1.973043926679481e-05, + "loss": 0.3274, + "step": 3875 + }, + { + "epoch": 0.18, + "learning_rate": 1.972997046551967e-05, + "loss": 0.3156, + "step": 3880 + }, + { + "epoch": 0.18, + "learning_rate": 1.972950166424453e-05, + "loss": 0.3368, + "step": 3885 + }, + { + "epoch": 0.18, + "learning_rate": 1.972903286296939e-05, + "loss": 0.4301, + "step": 3890 + }, + { + "epoch": 0.18, + "learning_rate": 1.972856406169425e-05, + "loss": 0.6439, + "step": 3895 + }, + { + "epoch": 0.18, + "learning_rate": 1.972809526041911e-05, + "loss": 0.839, + "step": 3900 + }, + { + "epoch": 0.18, + "learning_rate": 1.9727626459143972e-05, + "loss": 0.4082, + "step": 3905 + }, + { + "epoch": 0.18, + "learning_rate": 1.9727157657868832e-05, + "loss": 0.1958, + "step": 3910 + }, + { + "epoch": 0.18, + "learning_rate": 1.9726688856593692e-05, + "loss": 0.1713, + "step": 3915 + }, + { + "epoch": 0.18, + "learning_rate": 1.9726220055318552e-05, + "loss": 0.2028, + "step": 3920 + }, + { + "epoch": 0.18, + "learning_rate": 1.9725751254043412e-05, + "loss": 0.2407, + "step": 3925 + }, + { + "epoch": 0.18, + "learning_rate": 1.9725282452768272e-05, + "loss": 0.4262, + "step": 3930 + }, + { + "epoch": 0.18, + "learning_rate": 1.972481365149313e-05, + "loss": 0.3567, + "step": 3935 + }, + { + "epoch": 0.18, + "learning_rate": 1.9724344850217995e-05, + "loss": 0.4333, + "step": 3940 + }, + { + "epoch": 0.18, + "learning_rate": 1.9723876048942855e-05, + "loss": 0.3769, + "step": 3945 + }, + { + "epoch": 0.18, + "learning_rate": 1.9723407247667715e-05, + "loss": 0.9714, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 1.9722938446392578e-05, + "loss": 0.3421, + "step": 3955 + }, + { + "epoch": 0.18, + "learning_rate": 1.9722469645117438e-05, + "loss": 0.1904, + "step": 3960 + }, + { + "epoch": 0.19, + "learning_rate": 1.9722000843842298e-05, + "loss": 0.2153, + "step": 3965 + }, + { + "epoch": 0.19, + "learning_rate": 1.9721532042567158e-05, + "loss": 0.2275, + "step": 3970 + }, + { + "epoch": 0.19, + "learning_rate": 1.9721063241292018e-05, + "loss": 0.3917, + "step": 3975 + }, + { + "epoch": 0.19, + "learning_rate": 1.9720594440016878e-05, + "loss": 0.4462, + "step": 3980 + }, + { + "epoch": 0.19, + "learning_rate": 1.9720125638741738e-05, + "loss": 0.319, + "step": 3985 + }, + { + "epoch": 0.19, + "learning_rate": 1.9719656837466598e-05, + "loss": 0.2997, + "step": 3990 + }, + { + "epoch": 0.19, + "learning_rate": 1.971918803619146e-05, + "loss": 0.601, + "step": 3995 + }, + { + "epoch": 0.19, + "learning_rate": 1.971871923491632e-05, + "loss": 0.6658, + "step": 4000 + }, + { + "epoch": 0.19, + "learning_rate": 1.971825043364118e-05, + "loss": 0.3592, + "step": 4005 + }, + { + "epoch": 0.19, + "learning_rate": 1.971778163236604e-05, + "loss": 0.1744, + "step": 4010 + }, + { + "epoch": 0.19, + "learning_rate": 1.97173128310909e-05, + "loss": 0.2081, + "step": 4015 + }, + { + "epoch": 0.19, + "learning_rate": 1.9716844029815764e-05, + "loss": 0.2472, + "step": 4020 + }, + { + "epoch": 0.19, + "learning_rate": 1.9716375228540624e-05, + "loss": 0.2668, + "step": 4025 + }, + { + "epoch": 0.19, + "learning_rate": 1.9715906427265484e-05, + "loss": 0.2763, + "step": 4030 + }, + { + "epoch": 0.19, + "learning_rate": 1.9715437625990344e-05, + "loss": 0.3903, + "step": 4035 + }, + { + "epoch": 0.19, + "learning_rate": 1.9714968824715204e-05, + "loss": 0.4871, + "step": 4040 + }, + { + "epoch": 0.19, + "learning_rate": 1.9714500023440067e-05, + "loss": 0.6568, + "step": 4045 + }, + { + "epoch": 0.19, + "learning_rate": 1.9714031222164927e-05, + "loss": 0.7185, + "step": 4050 + }, + { + "epoch": 0.19, + "learning_rate": 1.9713562420889787e-05, + "loss": 0.416, + "step": 4055 + }, + { + "epoch": 0.19, + "learning_rate": 1.9713093619614647e-05, + "loss": 0.1623, + "step": 4060 + }, + { + "epoch": 0.19, + "learning_rate": 1.9712624818339507e-05, + "loss": 0.1905, + "step": 4065 + }, + { + "epoch": 0.19, + "learning_rate": 1.9712156017064367e-05, + "loss": 0.2243, + "step": 4070 + }, + { + "epoch": 0.19, + "learning_rate": 1.9711687215789227e-05, + "loss": 0.2595, + "step": 4075 + }, + { + "epoch": 0.19, + "learning_rate": 1.9711218414514087e-05, + "loss": 0.2343, + "step": 4080 + }, + { + "epoch": 0.19, + "learning_rate": 1.971074961323895e-05, + "loss": 0.3081, + "step": 4085 + }, + { + "epoch": 0.19, + "learning_rate": 1.971028081196381e-05, + "loss": 0.4051, + "step": 4090 + }, + { + "epoch": 0.19, + "learning_rate": 1.970981201068867e-05, + "loss": 0.4353, + "step": 4095 + }, + { + "epoch": 0.19, + "learning_rate": 1.9709343209413533e-05, + "loss": 0.8488, + "step": 4100 + }, + { + "epoch": 0.19, + "learning_rate": 1.9708874408138393e-05, + "loss": 0.3971, + "step": 4105 + }, + { + "epoch": 0.19, + "learning_rate": 1.9708405606863253e-05, + "loss": 0.1639, + "step": 4110 + }, + { + "epoch": 0.19, + "learning_rate": 1.9707936805588113e-05, + "loss": 0.1971, + "step": 4115 + }, + { + "epoch": 0.19, + "learning_rate": 1.9707468004312973e-05, + "loss": 0.1918, + "step": 4120 + }, + { + "epoch": 0.19, + "learning_rate": 1.9706999203037833e-05, + "loss": 0.2442, + "step": 4125 + }, + { + "epoch": 0.19, + "learning_rate": 1.9706530401762693e-05, + "loss": 0.3431, + "step": 4130 + }, + { + "epoch": 0.19, + "learning_rate": 1.9706061600487556e-05, + "loss": 0.4143, + "step": 4135 + }, + { + "epoch": 0.19, + "learning_rate": 1.9705592799212416e-05, + "loss": 0.563, + "step": 4140 + }, + { + "epoch": 0.19, + "learning_rate": 1.9705123997937276e-05, + "loss": 0.5536, + "step": 4145 + }, + { + "epoch": 0.19, + "learning_rate": 1.9704655196662136e-05, + "loss": 0.8869, + "step": 4150 + }, + { + "epoch": 0.19, + "learning_rate": 1.9704186395386996e-05, + "loss": 0.3938, + "step": 4155 + }, + { + "epoch": 0.19, + "learning_rate": 1.9703717594111856e-05, + "loss": 0.1912, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 1.970324879283672e-05, + "loss": 0.2132, + "step": 4165 + }, + { + "epoch": 0.19, + "learning_rate": 1.970277999156158e-05, + "loss": 0.1632, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 1.970231119028644e-05, + "loss": 0.2362, + "step": 4175 + }, + { + "epoch": 0.2, + "learning_rate": 1.97018423890113e-05, + "loss": 0.2287, + "step": 4180 + }, + { + "epoch": 0.2, + "learning_rate": 1.9701373587736162e-05, + "loss": 0.3168, + "step": 4185 + }, + { + "epoch": 0.2, + "learning_rate": 1.9700904786461022e-05, + "loss": 0.4876, + "step": 4190 + }, + { + "epoch": 0.2, + "learning_rate": 1.9700435985185882e-05, + "loss": 0.5353, + "step": 4195 + }, + { + "epoch": 0.2, + "learning_rate": 1.9699967183910742e-05, + "loss": 0.6848, + "step": 4200 + }, + { + "epoch": 0.2, + "learning_rate": 1.9699498382635602e-05, + "loss": 0.4614, + "step": 4205 + }, + { + "epoch": 0.2, + "learning_rate": 1.9699029581360462e-05, + "loss": 0.2455, + "step": 4210 + }, + { + "epoch": 0.2, + "learning_rate": 1.9698560780085322e-05, + "loss": 0.1861, + "step": 4215 + }, + { + "epoch": 0.2, + "learning_rate": 1.9698091978810182e-05, + "loss": 0.3312, + "step": 4220 + }, + { + "epoch": 0.2, + "learning_rate": 1.9697623177535045e-05, + "loss": 0.2629, + "step": 4225 + }, + { + "epoch": 0.2, + "learning_rate": 1.9697154376259905e-05, + "loss": 0.2331, + "step": 4230 + }, + { + "epoch": 0.2, + "learning_rate": 1.9696685574984765e-05, + "loss": 0.4012, + "step": 4235 + }, + { + "epoch": 0.2, + "learning_rate": 1.9696216773709628e-05, + "loss": 0.3765, + "step": 4240 + }, + { + "epoch": 0.2, + "learning_rate": 1.9695747972434488e-05, + "loss": 0.5471, + "step": 4245 + }, + { + "epoch": 0.2, + "learning_rate": 1.9695279171159348e-05, + "loss": 0.723, + "step": 4250 + }, + { + "epoch": 0.2, + "learning_rate": 1.9694810369884208e-05, + "loss": 0.4313, + "step": 4255 + }, + { + "epoch": 0.2, + "learning_rate": 1.9694341568609068e-05, + "loss": 0.13, + "step": 4260 + }, + { + "epoch": 0.2, + "learning_rate": 1.9693872767333928e-05, + "loss": 0.2133, + "step": 4265 + }, + { + "epoch": 0.2, + "learning_rate": 1.9693403966058788e-05, + "loss": 0.2618, + "step": 4270 + }, + { + "epoch": 0.2, + "learning_rate": 1.969293516478365e-05, + "loss": 0.2428, + "step": 4275 + }, + { + "epoch": 0.2, + "learning_rate": 1.969246636350851e-05, + "loss": 0.3027, + "step": 4280 + }, + { + "epoch": 0.2, + "learning_rate": 1.969199756223337e-05, + "loss": 0.3958, + "step": 4285 + }, + { + "epoch": 0.2, + "learning_rate": 1.969152876095823e-05, + "loss": 0.5407, + "step": 4290 + }, + { + "epoch": 0.2, + "learning_rate": 1.969105995968309e-05, + "loss": 0.5288, + "step": 4295 + }, + { + "epoch": 0.2, + "learning_rate": 1.969059115840795e-05, + "loss": 0.9622, + "step": 4300 + }, + { + "epoch": 0.2, + "learning_rate": 1.9690122357132814e-05, + "loss": 0.3357, + "step": 4305 + }, + { + "epoch": 0.2, + "learning_rate": 1.9689653555857674e-05, + "loss": 0.2422, + "step": 4310 + }, + { + "epoch": 0.2, + "learning_rate": 1.9689184754582534e-05, + "loss": 0.2317, + "step": 4315 + }, + { + "epoch": 0.2, + "learning_rate": 1.9688715953307397e-05, + "loss": 0.3139, + "step": 4320 + }, + { + "epoch": 0.2, + "learning_rate": 1.9688247152032257e-05, + "loss": 0.2611, + "step": 4325 + }, + { + "epoch": 0.2, + "learning_rate": 1.9687778350757117e-05, + "loss": 0.274, + "step": 4330 + }, + { + "epoch": 0.2, + "learning_rate": 1.9687309549481977e-05, + "loss": 0.4473, + "step": 4335 + }, + { + "epoch": 0.2, + "learning_rate": 1.9686840748206837e-05, + "loss": 0.3167, + "step": 4340 + }, + { + "epoch": 0.2, + "learning_rate": 1.9686371946931697e-05, + "loss": 0.5854, + "step": 4345 + }, + { + "epoch": 0.2, + "learning_rate": 1.9685903145656557e-05, + "loss": 0.8531, + "step": 4350 + }, + { + "epoch": 0.2, + "learning_rate": 1.9685434344381417e-05, + "loss": 0.3545, + "step": 4355 + }, + { + "epoch": 0.2, + "learning_rate": 1.9684965543106277e-05, + "loss": 0.1751, + "step": 4360 + }, + { + "epoch": 0.2, + "learning_rate": 1.9684496741831137e-05, + "loss": 0.2515, + "step": 4365 + }, + { + "epoch": 0.2, + "learning_rate": 1.9684027940556e-05, + "loss": 0.2662, + "step": 4370 + }, + { + "epoch": 0.2, + "learning_rate": 1.968355913928086e-05, + "loss": 0.2842, + "step": 4375 + }, + { + "epoch": 0.2, + "learning_rate": 1.968309033800572e-05, + "loss": 0.4317, + "step": 4380 + }, + { + "epoch": 0.2, + "learning_rate": 1.9682621536730583e-05, + "loss": 0.3946, + "step": 4385 + }, + { + "epoch": 0.2, + "learning_rate": 1.9682152735455443e-05, + "loss": 0.3546, + "step": 4390 + }, + { + "epoch": 0.21, + "learning_rate": 1.9681683934180303e-05, + "loss": 0.4726, + "step": 4395 + }, + { + "epoch": 0.21, + "learning_rate": 1.9681215132905163e-05, + "loss": 0.6882, + "step": 4400 + }, + { + "epoch": 0.21, + "learning_rate": 1.9680746331630023e-05, + "loss": 0.3417, + "step": 4405 + }, + { + "epoch": 0.21, + "learning_rate": 1.9680277530354883e-05, + "loss": 0.1884, + "step": 4410 + }, + { + "epoch": 0.21, + "learning_rate": 1.9679808729079746e-05, + "loss": 0.2584, + "step": 4415 + }, + { + "epoch": 0.21, + "learning_rate": 1.9679339927804606e-05, + "loss": 0.1648, + "step": 4420 + }, + { + "epoch": 0.21, + "learning_rate": 1.9678871126529466e-05, + "loss": 0.3163, + "step": 4425 + }, + { + "epoch": 0.21, + "learning_rate": 1.9678402325254326e-05, + "loss": 0.2756, + "step": 4430 + }, + { + "epoch": 0.21, + "learning_rate": 1.9677933523979186e-05, + "loss": 0.3836, + "step": 4435 + }, + { + "epoch": 0.21, + "learning_rate": 1.9677464722704046e-05, + "loss": 0.3857, + "step": 4440 + }, + { + "epoch": 0.21, + "learning_rate": 1.9676995921428906e-05, + "loss": 0.5504, + "step": 4445 + }, + { + "epoch": 0.21, + "learning_rate": 1.967652712015377e-05, + "loss": 0.796, + "step": 4450 + }, + { + "epoch": 0.21, + "learning_rate": 1.967605831887863e-05, + "loss": 0.4518, + "step": 4455 + }, + { + "epoch": 0.21, + "learning_rate": 1.967558951760349e-05, + "loss": 0.2493, + "step": 4460 + }, + { + "epoch": 0.21, + "learning_rate": 1.9675120716328352e-05, + "loss": 0.1496, + "step": 4465 + }, + { + "epoch": 0.21, + "learning_rate": 1.9674651915053212e-05, + "loss": 0.2788, + "step": 4470 + }, + { + "epoch": 0.21, + "learning_rate": 1.9674183113778072e-05, + "loss": 0.2969, + "step": 4475 + }, + { + "epoch": 0.21, + "learning_rate": 1.9673714312502932e-05, + "loss": 0.3378, + "step": 4480 + }, + { + "epoch": 0.21, + "learning_rate": 1.9673245511227792e-05, + "loss": 0.5223, + "step": 4485 + }, + { + "epoch": 0.21, + "learning_rate": 1.9672776709952652e-05, + "loss": 0.4544, + "step": 4490 + }, + { + "epoch": 0.21, + "learning_rate": 1.9672307908677512e-05, + "loss": 0.6628, + "step": 4495 + }, + { + "epoch": 0.21, + "learning_rate": 1.9671839107402372e-05, + "loss": 0.898, + "step": 4500 + }, + { + "epoch": 0.21, + "learning_rate": 1.9671370306127232e-05, + "loss": 0.3878, + "step": 4505 + }, + { + "epoch": 0.21, + "learning_rate": 1.9670901504852095e-05, + "loss": 0.2018, + "step": 4510 + }, + { + "epoch": 0.21, + "learning_rate": 1.9670432703576955e-05, + "loss": 0.2069, + "step": 4515 + }, + { + "epoch": 0.21, + "learning_rate": 1.9669963902301815e-05, + "loss": 0.242, + "step": 4520 + }, + { + "epoch": 0.21, + "learning_rate": 1.9669495101026675e-05, + "loss": 0.2806, + "step": 4525 + }, + { + "epoch": 0.21, + "learning_rate": 1.9669026299751538e-05, + "loss": 0.3503, + "step": 4530 + }, + { + "epoch": 0.21, + "learning_rate": 1.9668557498476398e-05, + "loss": 0.2483, + "step": 4535 + }, + { + "epoch": 0.21, + "learning_rate": 1.9668088697201258e-05, + "loss": 0.3632, + "step": 4540 + }, + { + "epoch": 0.21, + "learning_rate": 1.9667619895926118e-05, + "loss": 0.5749, + "step": 4545 + }, + { + "epoch": 0.21, + "learning_rate": 1.9667151094650978e-05, + "loss": 0.6014, + "step": 4550 + }, + { + "epoch": 0.21, + "learning_rate": 1.966668229337584e-05, + "loss": 0.3388, + "step": 4555 + }, + { + "epoch": 0.21, + "learning_rate": 1.96662134921007e-05, + "loss": 0.1989, + "step": 4560 + }, + { + "epoch": 0.21, + "learning_rate": 1.966574469082556e-05, + "loss": 0.2078, + "step": 4565 + }, + { + "epoch": 0.21, + "learning_rate": 1.966527588955042e-05, + "loss": 0.2163, + "step": 4570 + }, + { + "epoch": 0.21, + "learning_rate": 1.966480708827528e-05, + "loss": 0.2737, + "step": 4575 + }, + { + "epoch": 0.21, + "learning_rate": 1.966433828700014e-05, + "loss": 0.347, + "step": 4580 + }, + { + "epoch": 0.21, + "learning_rate": 1.9663869485725e-05, + "loss": 0.3929, + "step": 4585 + }, + { + "epoch": 0.21, + "learning_rate": 1.9663400684449864e-05, + "loss": 0.4723, + "step": 4590 + }, + { + "epoch": 0.21, + "learning_rate": 1.9662931883174724e-05, + "loss": 0.5059, + "step": 4595 + }, + { + "epoch": 0.21, + "learning_rate": 1.9662463081899584e-05, + "loss": 0.689, + "step": 4600 + }, + { + "epoch": 0.21, + "learning_rate": 1.9661994280624444e-05, + "loss": 0.374, + "step": 4605 + }, + { + "epoch": 0.22, + "learning_rate": 1.9661525479349307e-05, + "loss": 0.1667, + "step": 4610 + }, + { + "epoch": 0.22, + "learning_rate": 1.9661056678074167e-05, + "loss": 0.1707, + "step": 4615 + }, + { + "epoch": 0.22, + "learning_rate": 1.9660587876799027e-05, + "loss": 0.196, + "step": 4620 + }, + { + "epoch": 0.22, + "learning_rate": 1.9660119075523887e-05, + "loss": 0.3145, + "step": 4625 + }, + { + "epoch": 0.22, + "learning_rate": 1.9659650274248747e-05, + "loss": 0.3389, + "step": 4630 + }, + { + "epoch": 0.22, + "learning_rate": 1.9659181472973607e-05, + "loss": 0.4153, + "step": 4635 + }, + { + "epoch": 0.22, + "learning_rate": 1.9658712671698467e-05, + "loss": 0.3724, + "step": 4640 + }, + { + "epoch": 0.22, + "learning_rate": 1.965824387042333e-05, + "loss": 0.5458, + "step": 4645 + }, + { + "epoch": 0.22, + "learning_rate": 1.965777506914819e-05, + "loss": 0.8993, + "step": 4650 + }, + { + "epoch": 0.22, + "learning_rate": 1.965730626787305e-05, + "loss": 0.3525, + "step": 4655 + }, + { + "epoch": 0.22, + "learning_rate": 1.965683746659791e-05, + "loss": 0.2098, + "step": 4660 + }, + { + "epoch": 0.22, + "learning_rate": 1.965636866532277e-05, + "loss": 0.1881, + "step": 4665 + }, + { + "epoch": 0.22, + "learning_rate": 1.9655899864047633e-05, + "loss": 0.2259, + "step": 4670 + }, + { + "epoch": 0.22, + "learning_rate": 1.9655431062772493e-05, + "loss": 0.3217, + "step": 4675 + }, + { + "epoch": 0.22, + "learning_rate": 1.9654962261497353e-05, + "loss": 0.3272, + "step": 4680 + }, + { + "epoch": 0.22, + "learning_rate": 1.9654493460222213e-05, + "loss": 0.3857, + "step": 4685 + }, + { + "epoch": 0.22, + "learning_rate": 1.9654024658947073e-05, + "loss": 0.4295, + "step": 4690 + }, + { + "epoch": 0.22, + "learning_rate": 1.9653555857671936e-05, + "loss": 0.4549, + "step": 4695 + }, + { + "epoch": 0.22, + "learning_rate": 1.9653087056396796e-05, + "loss": 0.6043, + "step": 4700 + }, + { + "epoch": 0.22, + "learning_rate": 1.9652618255121656e-05, + "loss": 0.3951, + "step": 4705 + }, + { + "epoch": 0.22, + "learning_rate": 1.9652149453846516e-05, + "loss": 0.1837, + "step": 4710 + }, + { + "epoch": 0.22, + "learning_rate": 1.9651680652571376e-05, + "loss": 0.2177, + "step": 4715 + }, + { + "epoch": 0.22, + "learning_rate": 1.9651211851296236e-05, + "loss": 0.2921, + "step": 4720 + }, + { + "epoch": 0.22, + "learning_rate": 1.9650743050021096e-05, + "loss": 0.3297, + "step": 4725 + }, + { + "epoch": 0.22, + "learning_rate": 1.9650274248745956e-05, + "loss": 0.3355, + "step": 4730 + }, + { + "epoch": 0.22, + "learning_rate": 1.964980544747082e-05, + "loss": 0.4129, + "step": 4735 + }, + { + "epoch": 0.22, + "learning_rate": 1.964933664619568e-05, + "loss": 0.425, + "step": 4740 + }, + { + "epoch": 0.22, + "learning_rate": 1.964886784492054e-05, + "loss": 0.4326, + "step": 4745 + }, + { + "epoch": 0.22, + "learning_rate": 1.9648399043645402e-05, + "loss": 0.72, + "step": 4750 + }, + { + "epoch": 0.22, + "learning_rate": 1.9647930242370262e-05, + "loss": 0.4429, + "step": 4755 + }, + { + "epoch": 0.22, + "learning_rate": 1.9647461441095122e-05, + "loss": 0.2009, + "step": 4760 + }, + { + "epoch": 0.22, + "learning_rate": 1.9646992639819982e-05, + "loss": 0.195, + "step": 4765 + }, + { + "epoch": 0.22, + "learning_rate": 1.9646523838544842e-05, + "loss": 0.2269, + "step": 4770 + }, + { + "epoch": 0.22, + "learning_rate": 1.9646055037269702e-05, + "loss": 0.2516, + "step": 4775 + }, + { + "epoch": 0.22, + "learning_rate": 1.9645586235994562e-05, + "loss": 0.2693, + "step": 4780 + }, + { + "epoch": 0.22, + "learning_rate": 1.9645117434719425e-05, + "loss": 0.4334, + "step": 4785 + }, + { + "epoch": 0.22, + "learning_rate": 1.9644648633444285e-05, + "loss": 0.4747, + "step": 4790 + }, + { + "epoch": 0.22, + "learning_rate": 1.9644179832169145e-05, + "loss": 0.5062, + "step": 4795 + }, + { + "epoch": 0.22, + "learning_rate": 1.9643711030894005e-05, + "loss": 0.627, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 1.9643242229618865e-05, + "loss": 0.2878, + "step": 4805 + }, + { + "epoch": 0.22, + "learning_rate": 1.9642773428343725e-05, + "loss": 0.1459, + "step": 4810 + }, + { + "epoch": 0.22, + "learning_rate": 1.964230462706859e-05, + "loss": 0.1581, + "step": 4815 + }, + { + "epoch": 0.22, + "learning_rate": 1.964183582579345e-05, + "loss": 0.2001, + "step": 4820 + }, + { + "epoch": 0.23, + "learning_rate": 1.9641367024518308e-05, + "loss": 0.2857, + "step": 4825 + }, + { + "epoch": 0.23, + "learning_rate": 1.9640898223243168e-05, + "loss": 0.2737, + "step": 4830 + }, + { + "epoch": 0.23, + "learning_rate": 1.964042942196803e-05, + "loss": 0.2742, + "step": 4835 + }, + { + "epoch": 0.23, + "learning_rate": 1.963996062069289e-05, + "loss": 0.4487, + "step": 4840 + }, + { + "epoch": 0.23, + "learning_rate": 1.963949181941775e-05, + "loss": 0.4665, + "step": 4845 + }, + { + "epoch": 0.23, + "learning_rate": 1.963902301814261e-05, + "loss": 0.8322, + "step": 4850 + }, + { + "epoch": 0.23, + "learning_rate": 1.963855421686747e-05, + "loss": 0.3451, + "step": 4855 + }, + { + "epoch": 0.23, + "learning_rate": 1.963808541559233e-05, + "loss": 0.1254, + "step": 4860 + }, + { + "epoch": 0.23, + "learning_rate": 1.963761661431719e-05, + "loss": 0.1569, + "step": 4865 + }, + { + "epoch": 0.23, + "learning_rate": 1.963714781304205e-05, + "loss": 0.2125, + "step": 4870 + }, + { + "epoch": 0.23, + "learning_rate": 1.963667901176691e-05, + "loss": 0.2344, + "step": 4875 + }, + { + "epoch": 0.23, + "learning_rate": 1.9636210210491774e-05, + "loss": 0.2472, + "step": 4880 + }, + { + "epoch": 0.23, + "learning_rate": 1.9635741409216634e-05, + "loss": 0.4773, + "step": 4885 + }, + { + "epoch": 0.23, + "learning_rate": 1.9635272607941494e-05, + "loss": 0.4222, + "step": 4890 + }, + { + "epoch": 0.23, + "learning_rate": 1.9634803806666358e-05, + "loss": 0.4088, + "step": 4895 + }, + { + "epoch": 0.23, + "learning_rate": 1.9634335005391217e-05, + "loss": 0.4062, + "step": 4900 + }, + { + "epoch": 0.23, + "learning_rate": 1.9633866204116077e-05, + "loss": 0.3291, + "step": 4905 + }, + { + "epoch": 0.23, + "learning_rate": 1.9633397402840937e-05, + "loss": 0.133, + "step": 4910 + }, + { + "epoch": 0.23, + "learning_rate": 1.9632928601565797e-05, + "loss": 0.2487, + "step": 4915 + }, + { + "epoch": 0.23, + "learning_rate": 1.9632459800290657e-05, + "loss": 0.1519, + "step": 4920 + }, + { + "epoch": 0.23, + "learning_rate": 1.963199099901552e-05, + "loss": 0.3122, + "step": 4925 + }, + { + "epoch": 0.23, + "learning_rate": 1.963152219774038e-05, + "loss": 0.4063, + "step": 4930 + }, + { + "epoch": 0.23, + "learning_rate": 1.963105339646524e-05, + "loss": 0.3803, + "step": 4935 + }, + { + "epoch": 0.23, + "learning_rate": 1.96305845951901e-05, + "loss": 0.4153, + "step": 4940 + }, + { + "epoch": 0.23, + "learning_rate": 1.963011579391496e-05, + "loss": 0.4193, + "step": 4945 + }, + { + "epoch": 0.23, + "learning_rate": 1.962964699263982e-05, + "loss": 0.88, + "step": 4950 + }, + { + "epoch": 0.23, + "learning_rate": 1.962917819136468e-05, + "loss": 0.4152, + "step": 4955 + }, + { + "epoch": 0.23, + "learning_rate": 1.9628709390089543e-05, + "loss": 0.1861, + "step": 4960 + }, + { + "epoch": 0.23, + "learning_rate": 1.9628240588814403e-05, + "loss": 0.1928, + "step": 4965 + }, + { + "epoch": 0.23, + "learning_rate": 1.9627771787539263e-05, + "loss": 0.3147, + "step": 4970 + }, + { + "epoch": 0.23, + "learning_rate": 1.9627302986264127e-05, + "loss": 0.2551, + "step": 4975 + }, + { + "epoch": 0.23, + "learning_rate": 1.9626834184988987e-05, + "loss": 0.3313, + "step": 4980 + }, + { + "epoch": 0.23, + "learning_rate": 1.9626365383713846e-05, + "loss": 0.4429, + "step": 4985 + }, + { + "epoch": 0.23, + "learning_rate": 1.9625896582438706e-05, + "loss": 0.4012, + "step": 4990 + }, + { + "epoch": 0.23, + "learning_rate": 1.9625427781163566e-05, + "loss": 0.5365, + "step": 4995 + }, + { + "epoch": 0.23, + "learning_rate": 1.9624958979888426e-05, + "loss": 0.7345, + "step": 5000 + }, + { + "epoch": 0.23, + "learning_rate": 1.9624490178613286e-05, + "loss": 0.3755, + "step": 5005 + }, + { + "epoch": 0.23, + "learning_rate": 1.9624021377338146e-05, + "loss": 0.3096, + "step": 5010 + }, + { + "epoch": 0.23, + "learning_rate": 1.9623552576063006e-05, + "loss": 0.1869, + "step": 5015 + }, + { + "epoch": 0.23, + "learning_rate": 1.962308377478787e-05, + "loss": 0.3091, + "step": 5020 + }, + { + "epoch": 0.23, + "learning_rate": 1.962261497351273e-05, + "loss": 0.3136, + "step": 5025 + }, + { + "epoch": 0.23, + "learning_rate": 1.962214617223759e-05, + "loss": 0.2676, + "step": 5030 + }, + { + "epoch": 0.23, + "learning_rate": 1.962167737096245e-05, + "loss": 0.3055, + "step": 5035 + }, + { + "epoch": 0.24, + "learning_rate": 1.9621208569687313e-05, + "loss": 0.3587, + "step": 5040 + }, + { + "epoch": 0.24, + "learning_rate": 1.9620739768412172e-05, + "loss": 0.4906, + "step": 5045 + }, + { + "epoch": 0.24, + "learning_rate": 1.9620270967137032e-05, + "loss": 0.7448, + "step": 5050 + }, + { + "epoch": 0.24, + "learning_rate": 1.9619802165861892e-05, + "loss": 0.3217, + "step": 5055 + }, + { + "epoch": 0.24, + "learning_rate": 1.9619333364586752e-05, + "loss": 0.1442, + "step": 5060 + }, + { + "epoch": 0.24, + "learning_rate": 1.9618864563311616e-05, + "loss": 0.2075, + "step": 5065 + }, + { + "epoch": 0.24, + "learning_rate": 1.9618395762036476e-05, + "loss": 0.2914, + "step": 5070 + }, + { + "epoch": 0.24, + "learning_rate": 1.9617926960761335e-05, + "loss": 0.3173, + "step": 5075 + }, + { + "epoch": 0.24, + "learning_rate": 1.9617458159486195e-05, + "loss": 0.367, + "step": 5080 + }, + { + "epoch": 0.24, + "learning_rate": 1.9616989358211055e-05, + "loss": 0.3039, + "step": 5085 + }, + { + "epoch": 0.24, + "learning_rate": 1.9616520556935915e-05, + "loss": 0.4082, + "step": 5090 + }, + { + "epoch": 0.24, + "learning_rate": 1.9616051755660775e-05, + "loss": 0.4816, + "step": 5095 + }, + { + "epoch": 0.24, + "learning_rate": 1.961558295438564e-05, + "loss": 0.8166, + "step": 5100 + }, + { + "epoch": 0.24, + "learning_rate": 1.96151141531105e-05, + "loss": 0.3055, + "step": 5105 + }, + { + "epoch": 0.24, + "learning_rate": 1.961464535183536e-05, + "loss": 0.1351, + "step": 5110 + }, + { + "epoch": 0.24, + "learning_rate": 1.9614176550560218e-05, + "loss": 0.2588, + "step": 5115 + }, + { + "epoch": 0.24, + "learning_rate": 1.961370774928508e-05, + "loss": 0.2485, + "step": 5120 + }, + { + "epoch": 0.24, + "learning_rate": 1.961323894800994e-05, + "loss": 0.3697, + "step": 5125 + }, + { + "epoch": 0.24, + "learning_rate": 1.96127701467348e-05, + "loss": 0.3124, + "step": 5130 + }, + { + "epoch": 0.24, + "learning_rate": 1.961230134545966e-05, + "loss": 0.2263, + "step": 5135 + }, + { + "epoch": 0.24, + "learning_rate": 1.961183254418452e-05, + "loss": 0.4653, + "step": 5140 + }, + { + "epoch": 0.24, + "learning_rate": 1.961136374290938e-05, + "loss": 0.4411, + "step": 5145 + }, + { + "epoch": 0.24, + "learning_rate": 1.961089494163424e-05, + "loss": 0.7592, + "step": 5150 + }, + { + "epoch": 0.24, + "learning_rate": 1.96104261403591e-05, + "loss": 0.3478, + "step": 5155 + }, + { + "epoch": 0.24, + "learning_rate": 1.9609957339083964e-05, + "loss": 0.2249, + "step": 5160 + }, + { + "epoch": 0.24, + "learning_rate": 1.9609488537808824e-05, + "loss": 0.164, + "step": 5165 + }, + { + "epoch": 0.24, + "learning_rate": 1.9609019736533684e-05, + "loss": 0.2617, + "step": 5170 + }, + { + "epoch": 0.24, + "learning_rate": 1.9608550935258544e-05, + "loss": 0.3287, + "step": 5175 + }, + { + "epoch": 0.24, + "learning_rate": 1.9608082133983408e-05, + "loss": 0.3823, + "step": 5180 + }, + { + "epoch": 0.24, + "learning_rate": 1.9607613332708268e-05, + "loss": 0.3231, + "step": 5185 + }, + { + "epoch": 0.24, + "learning_rate": 1.9607144531433127e-05, + "loss": 0.2896, + "step": 5190 + }, + { + "epoch": 0.24, + "learning_rate": 1.9606675730157987e-05, + "loss": 0.4749, + "step": 5195 + }, + { + "epoch": 0.24, + "learning_rate": 1.9606206928882847e-05, + "loss": 0.5387, + "step": 5200 + }, + { + "epoch": 0.24, + "learning_rate": 1.960573812760771e-05, + "loss": 0.2887, + "step": 5205 + }, + { + "epoch": 0.24, + "learning_rate": 1.960526932633257e-05, + "loss": 0.1177, + "step": 5210 + }, + { + "epoch": 0.24, + "learning_rate": 1.960480052505743e-05, + "loss": 0.2773, + "step": 5215 + }, + { + "epoch": 0.24, + "learning_rate": 1.960433172378229e-05, + "loss": 0.2782, + "step": 5220 + }, + { + "epoch": 0.24, + "learning_rate": 1.960386292250715e-05, + "loss": 0.317, + "step": 5225 + }, + { + "epoch": 0.24, + "learning_rate": 1.960339412123201e-05, + "loss": 0.2609, + "step": 5230 + }, + { + "epoch": 0.24, + "learning_rate": 1.960292531995687e-05, + "loss": 0.3552, + "step": 5235 + }, + { + "epoch": 0.24, + "learning_rate": 1.960245651868173e-05, + "loss": 0.3464, + "step": 5240 + }, + { + "epoch": 0.24, + "learning_rate": 1.9601987717406594e-05, + "loss": 0.5567, + "step": 5245 + }, + { + "epoch": 0.24, + "learning_rate": 1.9601518916131453e-05, + "loss": 0.6603, + "step": 5250 + }, + { + "epoch": 0.25, + "learning_rate": 1.9601050114856313e-05, + "loss": 0.4007, + "step": 5255 + }, + { + "epoch": 0.25, + "learning_rate": 1.9600581313581177e-05, + "loss": 0.1196, + "step": 5260 + }, + { + "epoch": 0.25, + "learning_rate": 1.9600112512306037e-05, + "loss": 0.1901, + "step": 5265 + }, + { + "epoch": 0.25, + "learning_rate": 1.9599643711030897e-05, + "loss": 0.2177, + "step": 5270 + }, + { + "epoch": 0.25, + "learning_rate": 1.9599174909755757e-05, + "loss": 0.2707, + "step": 5275 + }, + { + "epoch": 0.25, + "learning_rate": 1.9598706108480616e-05, + "loss": 0.2405, + "step": 5280 + }, + { + "epoch": 0.25, + "learning_rate": 1.9598237307205476e-05, + "loss": 0.249, + "step": 5285 + }, + { + "epoch": 0.25, + "learning_rate": 1.9597768505930336e-05, + "loss": 0.3312, + "step": 5290 + }, + { + "epoch": 0.25, + "learning_rate": 1.95972997046552e-05, + "loss": 0.4266, + "step": 5295 + }, + { + "epoch": 0.25, + "learning_rate": 1.959683090338006e-05, + "loss": 0.6443, + "step": 5300 + }, + { + "epoch": 0.25, + "learning_rate": 1.959636210210492e-05, + "loss": 0.373, + "step": 5305 + }, + { + "epoch": 0.25, + "learning_rate": 1.959589330082978e-05, + "loss": 0.1447, + "step": 5310 + }, + { + "epoch": 0.25, + "learning_rate": 1.959542449955464e-05, + "loss": 0.2002, + "step": 5315 + }, + { + "epoch": 0.25, + "learning_rate": 1.95949556982795e-05, + "loss": 0.1767, + "step": 5320 + }, + { + "epoch": 0.25, + "learning_rate": 1.9594486897004363e-05, + "loss": 0.2046, + "step": 5325 + }, + { + "epoch": 0.25, + "learning_rate": 1.9594018095729223e-05, + "loss": 0.2941, + "step": 5330 + }, + { + "epoch": 0.25, + "learning_rate": 1.9593549294454082e-05, + "loss": 0.2813, + "step": 5335 + }, + { + "epoch": 0.25, + "learning_rate": 1.9593080493178942e-05, + "loss": 0.3738, + "step": 5340 + }, + { + "epoch": 0.25, + "learning_rate": 1.9592611691903806e-05, + "loss": 0.6161, + "step": 5345 + }, + { + "epoch": 0.25, + "learning_rate": 1.9592142890628666e-05, + "loss": 0.8597, + "step": 5350 + }, + { + "epoch": 0.25, + "learning_rate": 1.9591674089353526e-05, + "loss": 0.3059, + "step": 5355 + }, + { + "epoch": 0.25, + "learning_rate": 1.9591205288078386e-05, + "loss": 0.1577, + "step": 5360 + }, + { + "epoch": 0.25, + "learning_rate": 1.9590736486803245e-05, + "loss": 0.2368, + "step": 5365 + }, + { + "epoch": 0.25, + "learning_rate": 1.9590267685528105e-05, + "loss": 0.2593, + "step": 5370 + }, + { + "epoch": 0.25, + "learning_rate": 1.9589798884252965e-05, + "loss": 0.3036, + "step": 5375 + }, + { + "epoch": 0.25, + "learning_rate": 1.9589330082977825e-05, + "loss": 0.3428, + "step": 5380 + }, + { + "epoch": 0.25, + "learning_rate": 1.9588861281702685e-05, + "loss": 0.3202, + "step": 5385 + }, + { + "epoch": 0.25, + "learning_rate": 1.958839248042755e-05, + "loss": 0.3252, + "step": 5390 + }, + { + "epoch": 0.25, + "learning_rate": 1.958792367915241e-05, + "loss": 0.4546, + "step": 5395 + }, + { + "epoch": 0.25, + "learning_rate": 1.958745487787727e-05, + "loss": 0.7591, + "step": 5400 + }, + { + "epoch": 0.25, + "learning_rate": 1.9586986076602132e-05, + "loss": 0.3765, + "step": 5405 + }, + { + "epoch": 0.25, + "learning_rate": 1.958651727532699e-05, + "loss": 0.1274, + "step": 5410 + }, + { + "epoch": 0.25, + "learning_rate": 1.958604847405185e-05, + "loss": 0.2058, + "step": 5415 + }, + { + "epoch": 0.25, + "learning_rate": 1.958557967277671e-05, + "loss": 0.3033, + "step": 5420 + }, + { + "epoch": 0.25, + "learning_rate": 1.958511087150157e-05, + "loss": 0.1879, + "step": 5425 + }, + { + "epoch": 0.25, + "learning_rate": 1.958464207022643e-05, + "loss": 0.3354, + "step": 5430 + }, + { + "epoch": 0.25, + "learning_rate": 1.9584173268951295e-05, + "loss": 0.312, + "step": 5435 + }, + { + "epoch": 0.25, + "learning_rate": 1.9583704467676155e-05, + "loss": 0.3897, + "step": 5440 + }, + { + "epoch": 0.25, + "learning_rate": 1.9583235666401015e-05, + "loss": 0.5298, + "step": 5445 + }, + { + "epoch": 0.25, + "learning_rate": 1.9582766865125875e-05, + "loss": 0.8369, + "step": 5450 + }, + { + "epoch": 0.25, + "learning_rate": 1.9582298063850734e-05, + "loss": 0.2992, + "step": 5455 + }, + { + "epoch": 0.25, + "learning_rate": 1.9581829262575594e-05, + "loss": 0.1595, + "step": 5460 + }, + { + "epoch": 0.26, + "learning_rate": 1.9581360461300454e-05, + "loss": 0.1288, + "step": 5465 + }, + { + "epoch": 0.26, + "learning_rate": 1.9580891660025318e-05, + "loss": 0.2173, + "step": 5470 + }, + { + "epoch": 0.26, + "learning_rate": 1.9580422858750178e-05, + "loss": 0.2598, + "step": 5475 + }, + { + "epoch": 0.26, + "learning_rate": 1.9579954057475038e-05, + "loss": 0.3008, + "step": 5480 + }, + { + "epoch": 0.26, + "learning_rate": 1.95794852561999e-05, + "loss": 0.4085, + "step": 5485 + }, + { + "epoch": 0.26, + "learning_rate": 1.957901645492476e-05, + "loss": 0.3574, + "step": 5490 + }, + { + "epoch": 0.26, + "learning_rate": 1.957854765364962e-05, + "loss": 0.5726, + "step": 5495 + }, + { + "epoch": 0.26, + "learning_rate": 1.957807885237448e-05, + "loss": 0.7328, + "step": 5500 + }, + { + "epoch": 0.26, + "learning_rate": 1.957761005109934e-05, + "loss": 0.3389, + "step": 5505 + }, + { + "epoch": 0.26, + "learning_rate": 1.95771412498242e-05, + "loss": 0.1435, + "step": 5510 + }, + { + "epoch": 0.26, + "learning_rate": 1.957667244854906e-05, + "loss": 0.1596, + "step": 5515 + }, + { + "epoch": 0.26, + "learning_rate": 1.957620364727392e-05, + "loss": 0.1642, + "step": 5520 + }, + { + "epoch": 0.26, + "learning_rate": 1.957573484599878e-05, + "loss": 0.2219, + "step": 5525 + }, + { + "epoch": 0.26, + "learning_rate": 1.9575266044723644e-05, + "loss": 0.2159, + "step": 5530 + }, + { + "epoch": 0.26, + "learning_rate": 1.9574797243448504e-05, + "loss": 0.2886, + "step": 5535 + }, + { + "epoch": 0.26, + "learning_rate": 1.9574328442173363e-05, + "loss": 0.2399, + "step": 5540 + }, + { + "epoch": 0.26, + "learning_rate": 1.9573859640898223e-05, + "loss": 0.3391, + "step": 5545 + }, + { + "epoch": 0.26, + "learning_rate": 1.9573390839623087e-05, + "loss": 0.7245, + "step": 5550 + }, + { + "epoch": 0.26, + "learning_rate": 1.9572922038347947e-05, + "loss": 0.3067, + "step": 5555 + }, + { + "epoch": 0.26, + "learning_rate": 1.9572453237072807e-05, + "loss": 0.0993, + "step": 5560 + }, + { + "epoch": 0.26, + "learning_rate": 1.9571984435797667e-05, + "loss": 0.24, + "step": 5565 + }, + { + "epoch": 0.26, + "learning_rate": 1.9571515634522526e-05, + "loss": 0.1977, + "step": 5570 + }, + { + "epoch": 0.26, + "learning_rate": 1.957104683324739e-05, + "loss": 0.2716, + "step": 5575 + }, + { + "epoch": 0.26, + "learning_rate": 1.957057803197225e-05, + "loss": 0.3497, + "step": 5580 + }, + { + "epoch": 0.26, + "learning_rate": 1.957010923069711e-05, + "loss": 0.2877, + "step": 5585 + }, + { + "epoch": 0.26, + "learning_rate": 1.956964042942197e-05, + "loss": 0.3118, + "step": 5590 + }, + { + "epoch": 0.26, + "learning_rate": 1.956917162814683e-05, + "loss": 0.4374, + "step": 5595 + }, + { + "epoch": 0.26, + "learning_rate": 1.956870282687169e-05, + "loss": 0.7082, + "step": 5600 + }, + { + "epoch": 0.26, + "learning_rate": 1.956823402559655e-05, + "loss": 0.2997, + "step": 5605 + }, + { + "epoch": 0.26, + "learning_rate": 1.9567765224321413e-05, + "loss": 0.1337, + "step": 5610 + }, + { + "epoch": 0.26, + "learning_rate": 1.9567296423046273e-05, + "loss": 0.1612, + "step": 5615 + }, + { + "epoch": 0.26, + "learning_rate": 1.9566827621771133e-05, + "loss": 0.1726, + "step": 5620 + }, + { + "epoch": 0.26, + "learning_rate": 1.9566358820495996e-05, + "loss": 0.2525, + "step": 5625 + }, + { + "epoch": 0.26, + "learning_rate": 1.9565890019220856e-05, + "loss": 0.2697, + "step": 5630 + }, + { + "epoch": 0.26, + "learning_rate": 1.9565421217945716e-05, + "loss": 0.3576, + "step": 5635 + }, + { + "epoch": 0.26, + "learning_rate": 1.9564952416670576e-05, + "loss": 0.3216, + "step": 5640 + }, + { + "epoch": 0.26, + "learning_rate": 1.9564483615395436e-05, + "loss": 0.4422, + "step": 5645 + }, + { + "epoch": 0.26, + "learning_rate": 1.9564014814120296e-05, + "loss": 0.6881, + "step": 5650 + }, + { + "epoch": 0.26, + "learning_rate": 1.9563546012845156e-05, + "loss": 0.341, + "step": 5655 + }, + { + "epoch": 0.26, + "learning_rate": 1.9563077211570015e-05, + "loss": 0.2462, + "step": 5660 + }, + { + "epoch": 0.26, + "learning_rate": 1.9562608410294875e-05, + "loss": 0.1833, + "step": 5665 + }, + { + "epoch": 0.26, + "learning_rate": 1.956213960901974e-05, + "loss": 0.1736, + "step": 5670 + }, + { + "epoch": 0.26, + "learning_rate": 1.95616708077446e-05, + "loss": 0.3108, + "step": 5675 + }, + { + "epoch": 0.27, + "learning_rate": 1.956120200646946e-05, + "loss": 0.1955, + "step": 5680 + }, + { + "epoch": 0.27, + "learning_rate": 1.956073320519432e-05, + "loss": 0.3466, + "step": 5685 + }, + { + "epoch": 0.27, + "learning_rate": 1.9560264403919182e-05, + "loss": 0.3074, + "step": 5690 + }, + { + "epoch": 0.27, + "learning_rate": 1.9559795602644042e-05, + "loss": 0.4682, + "step": 5695 + }, + { + "epoch": 0.27, + "learning_rate": 1.95593268013689e-05, + "loss": 0.6463, + "step": 5700 + }, + { + "epoch": 0.27, + "learning_rate": 1.955885800009376e-05, + "loss": 0.327, + "step": 5705 + }, + { + "epoch": 0.27, + "learning_rate": 1.955838919881862e-05, + "loss": 0.17, + "step": 5710 + }, + { + "epoch": 0.27, + "learning_rate": 1.9557920397543485e-05, + "loss": 0.1442, + "step": 5715 + }, + { + "epoch": 0.27, + "learning_rate": 1.9557451596268345e-05, + "loss": 0.2012, + "step": 5720 + }, + { + "epoch": 0.27, + "learning_rate": 1.9556982794993205e-05, + "loss": 0.2722, + "step": 5725 + }, + { + "epoch": 0.27, + "learning_rate": 1.9556513993718065e-05, + "loss": 0.2928, + "step": 5730 + }, + { + "epoch": 0.27, + "learning_rate": 1.9556045192442925e-05, + "loss": 0.3275, + "step": 5735 + }, + { + "epoch": 0.27, + "learning_rate": 1.9555576391167785e-05, + "loss": 0.462, + "step": 5740 + }, + { + "epoch": 0.27, + "learning_rate": 1.9555107589892644e-05, + "loss": 0.38, + "step": 5745 + }, + { + "epoch": 0.27, + "learning_rate": 1.9554638788617504e-05, + "loss": 0.7203, + "step": 5750 + }, + { + "epoch": 0.27, + "learning_rate": 1.9554169987342368e-05, + "loss": 0.3981, + "step": 5755 + }, + { + "epoch": 0.27, + "learning_rate": 1.9553701186067228e-05, + "loss": 0.1104, + "step": 5760 + }, + { + "epoch": 0.27, + "learning_rate": 1.9553232384792088e-05, + "loss": 0.2247, + "step": 5765 + }, + { + "epoch": 0.27, + "learning_rate": 1.955276358351695e-05, + "loss": 0.1969, + "step": 5770 + }, + { + "epoch": 0.27, + "learning_rate": 1.955229478224181e-05, + "loss": 0.2883, + "step": 5775 + }, + { + "epoch": 0.27, + "learning_rate": 1.955182598096667e-05, + "loss": 0.2871, + "step": 5780 + }, + { + "epoch": 0.27, + "learning_rate": 1.955135717969153e-05, + "loss": 0.2452, + "step": 5785 + }, + { + "epoch": 0.27, + "learning_rate": 1.955088837841639e-05, + "loss": 0.3934, + "step": 5790 + }, + { + "epoch": 0.27, + "learning_rate": 1.955041957714125e-05, + "loss": 0.4091, + "step": 5795 + }, + { + "epoch": 0.27, + "learning_rate": 1.954995077586611e-05, + "loss": 0.7059, + "step": 5800 + }, + { + "epoch": 0.27, + "learning_rate": 1.954948197459097e-05, + "loss": 0.3207, + "step": 5805 + }, + { + "epoch": 0.27, + "learning_rate": 1.9549013173315834e-05, + "loss": 0.1067, + "step": 5810 + }, + { + "epoch": 0.27, + "learning_rate": 1.9548544372040694e-05, + "loss": 0.1733, + "step": 5815 + }, + { + "epoch": 0.27, + "learning_rate": 1.9548075570765554e-05, + "loss": 0.1885, + "step": 5820 + }, + { + "epoch": 0.27, + "learning_rate": 1.9547606769490414e-05, + "loss": 0.1855, + "step": 5825 + }, + { + "epoch": 0.27, + "learning_rate": 1.9547137968215274e-05, + "loss": 0.2796, + "step": 5830 + }, + { + "epoch": 0.27, + "learning_rate": 1.9546669166940137e-05, + "loss": 0.3381, + "step": 5835 + }, + { + "epoch": 0.27, + "learning_rate": 1.9546200365664997e-05, + "loss": 0.4255, + "step": 5840 + }, + { + "epoch": 0.27, + "learning_rate": 1.9545731564389857e-05, + "loss": 0.3635, + "step": 5845 + }, + { + "epoch": 0.27, + "learning_rate": 1.9545262763114717e-05, + "loss": 0.6507, + "step": 5850 + }, + { + "epoch": 0.27, + "learning_rate": 1.954479396183958e-05, + "loss": 0.3462, + "step": 5855 + }, + { + "epoch": 0.27, + "learning_rate": 1.954432516056444e-05, + "loss": 0.1309, + "step": 5860 + }, + { + "epoch": 0.27, + "learning_rate": 1.95438563592893e-05, + "loss": 0.1158, + "step": 5865 + }, + { + "epoch": 0.27, + "learning_rate": 1.954338755801416e-05, + "loss": 0.1644, + "step": 5870 + }, + { + "epoch": 0.27, + "learning_rate": 1.954291875673902e-05, + "loss": 0.2895, + "step": 5875 + }, + { + "epoch": 0.27, + "learning_rate": 1.954244995546388e-05, + "loss": 0.3314, + "step": 5880 + }, + { + "epoch": 0.27, + "learning_rate": 1.954198115418874e-05, + "loss": 0.2619, + "step": 5885 + }, + { + "epoch": 0.27, + "learning_rate": 1.95415123529136e-05, + "loss": 0.5068, + "step": 5890 + }, + { + "epoch": 0.28, + "learning_rate": 1.954104355163846e-05, + "loss": 0.3092, + "step": 5895 + }, + { + "epoch": 0.28, + "learning_rate": 1.9540574750363323e-05, + "loss": 0.6608, + "step": 5900 + }, + { + "epoch": 0.28, + "learning_rate": 1.9540105949088183e-05, + "loss": 0.3003, + "step": 5905 + }, + { + "epoch": 0.28, + "learning_rate": 1.9539637147813043e-05, + "loss": 0.1948, + "step": 5910 + }, + { + "epoch": 0.28, + "learning_rate": 1.9539168346537906e-05, + "loss": 0.2106, + "step": 5915 + }, + { + "epoch": 0.28, + "learning_rate": 1.9538699545262766e-05, + "loss": 0.1636, + "step": 5920 + }, + { + "epoch": 0.28, + "learning_rate": 1.9538230743987626e-05, + "loss": 0.2327, + "step": 5925 + }, + { + "epoch": 0.28, + "learning_rate": 1.9537761942712486e-05, + "loss": 0.3433, + "step": 5930 + }, + { + "epoch": 0.28, + "learning_rate": 1.9537293141437346e-05, + "loss": 0.2788, + "step": 5935 + }, + { + "epoch": 0.28, + "learning_rate": 1.9536824340162206e-05, + "loss": 0.3844, + "step": 5940 + }, + { + "epoch": 0.28, + "learning_rate": 1.9536355538887066e-05, + "loss": 0.5153, + "step": 5945 + }, + { + "epoch": 0.28, + "learning_rate": 1.953588673761193e-05, + "loss": 0.8266, + "step": 5950 + }, + { + "epoch": 0.28, + "learning_rate": 1.953541793633679e-05, + "loss": 0.3474, + "step": 5955 + }, + { + "epoch": 0.28, + "learning_rate": 1.953494913506165e-05, + "loss": 0.1229, + "step": 5960 + }, + { + "epoch": 0.28, + "learning_rate": 1.953448033378651e-05, + "loss": 0.2005, + "step": 5965 + }, + { + "epoch": 0.28, + "learning_rate": 1.953401153251137e-05, + "loss": 0.1992, + "step": 5970 + }, + { + "epoch": 0.28, + "learning_rate": 1.9533542731236232e-05, + "loss": 0.2129, + "step": 5975 + }, + { + "epoch": 0.28, + "learning_rate": 1.9533073929961092e-05, + "loss": 0.2354, + "step": 5980 + }, + { + "epoch": 0.28, + "learning_rate": 1.9532605128685952e-05, + "loss": 0.3105, + "step": 5985 + }, + { + "epoch": 0.28, + "learning_rate": 1.9532136327410812e-05, + "loss": 0.3057, + "step": 5990 + }, + { + "epoch": 0.28, + "learning_rate": 1.9531667526135675e-05, + "loss": 0.4136, + "step": 5995 + }, + { + "epoch": 0.28, + "learning_rate": 1.9531198724860535e-05, + "loss": 0.6449, + "step": 6000 + }, + { + "epoch": 0.28, + "learning_rate": 1.9530729923585395e-05, + "loss": 0.2958, + "step": 6005 + }, + { + "epoch": 0.28, + "learning_rate": 1.9530261122310255e-05, + "loss": 0.1949, + "step": 6010 + }, + { + "epoch": 0.28, + "learning_rate": 1.9529792321035115e-05, + "loss": 0.2033, + "step": 6015 + }, + { + "epoch": 0.28, + "learning_rate": 1.9529323519759975e-05, + "loss": 0.1695, + "step": 6020 + }, + { + "epoch": 0.28, + "learning_rate": 1.9528854718484835e-05, + "loss": 0.2202, + "step": 6025 + }, + { + "epoch": 0.28, + "learning_rate": 1.9528385917209695e-05, + "loss": 0.4026, + "step": 6030 + }, + { + "epoch": 0.28, + "learning_rate": 1.9527917115934555e-05, + "loss": 0.2653, + "step": 6035 + }, + { + "epoch": 0.28, + "learning_rate": 1.9527448314659418e-05, + "loss": 0.4601, + "step": 6040 + }, + { + "epoch": 0.28, + "learning_rate": 1.9526979513384278e-05, + "loss": 0.4971, + "step": 6045 + }, + { + "epoch": 0.28, + "learning_rate": 1.9526510712109138e-05, + "loss": 0.6803, + "step": 6050 + }, + { + "epoch": 0.28, + "learning_rate": 1.9526041910834e-05, + "loss": 0.3679, + "step": 6055 + }, + { + "epoch": 0.28, + "learning_rate": 1.952557310955886e-05, + "loss": 0.0853, + "step": 6060 + }, + { + "epoch": 0.28, + "learning_rate": 1.952510430828372e-05, + "loss": 0.2384, + "step": 6065 + }, + { + "epoch": 0.28, + "learning_rate": 1.952463550700858e-05, + "loss": 0.1462, + "step": 6070 + }, + { + "epoch": 0.28, + "learning_rate": 1.952416670573344e-05, + "loss": 0.2165, + "step": 6075 + }, + { + "epoch": 0.28, + "learning_rate": 1.95236979044583e-05, + "loss": 0.2811, + "step": 6080 + }, + { + "epoch": 0.28, + "learning_rate": 1.9523229103183164e-05, + "loss": 0.3491, + "step": 6085 + }, + { + "epoch": 0.28, + "learning_rate": 1.9522760301908024e-05, + "loss": 0.4301, + "step": 6090 + }, + { + "epoch": 0.28, + "learning_rate": 1.9522291500632884e-05, + "loss": 0.4114, + "step": 6095 + }, + { + "epoch": 0.28, + "learning_rate": 1.9521822699357744e-05, + "loss": 0.5665, + "step": 6100 + }, + { + "epoch": 0.28, + "learning_rate": 1.9521353898082604e-05, + "loss": 0.3065, + "step": 6105 + }, + { + "epoch": 0.29, + "learning_rate": 1.9520885096807464e-05, + "loss": 0.1459, + "step": 6110 + }, + { + "epoch": 0.29, + "learning_rate": 1.9520416295532324e-05, + "loss": 0.1675, + "step": 6115 + }, + { + "epoch": 0.29, + "learning_rate": 1.9519947494257187e-05, + "loss": 0.1782, + "step": 6120 + }, + { + "epoch": 0.29, + "learning_rate": 1.9519478692982047e-05, + "loss": 0.2657, + "step": 6125 + }, + { + "epoch": 0.29, + "learning_rate": 1.9519009891706907e-05, + "loss": 0.2721, + "step": 6130 + }, + { + "epoch": 0.29, + "learning_rate": 1.951854109043177e-05, + "loss": 0.2553, + "step": 6135 + }, + { + "epoch": 0.29, + "learning_rate": 1.951807228915663e-05, + "loss": 0.3509, + "step": 6140 + }, + { + "epoch": 0.29, + "learning_rate": 1.951760348788149e-05, + "loss": 0.5497, + "step": 6145 + }, + { + "epoch": 0.29, + "learning_rate": 1.951713468660635e-05, + "loss": 0.8548, + "step": 6150 + }, + { + "epoch": 0.29, + "learning_rate": 1.951666588533121e-05, + "loss": 0.3924, + "step": 6155 + }, + { + "epoch": 0.29, + "learning_rate": 1.951619708405607e-05, + "loss": 0.2242, + "step": 6160 + }, + { + "epoch": 0.29, + "learning_rate": 1.951572828278093e-05, + "loss": 0.1475, + "step": 6165 + }, + { + "epoch": 0.29, + "learning_rate": 1.951525948150579e-05, + "loss": 0.2086, + "step": 6170 + }, + { + "epoch": 0.29, + "learning_rate": 1.951479068023065e-05, + "loss": 0.1798, + "step": 6175 + }, + { + "epoch": 0.29, + "learning_rate": 1.9514321878955513e-05, + "loss": 0.269, + "step": 6180 + }, + { + "epoch": 0.29, + "learning_rate": 1.9513853077680373e-05, + "loss": 0.2895, + "step": 6185 + }, + { + "epoch": 0.29, + "learning_rate": 1.9513384276405233e-05, + "loss": 0.3678, + "step": 6190 + }, + { + "epoch": 0.29, + "learning_rate": 1.9512915475130093e-05, + "loss": 0.3819, + "step": 6195 + }, + { + "epoch": 0.29, + "learning_rate": 1.9512446673854956e-05, + "loss": 0.4726, + "step": 6200 + }, + { + "epoch": 0.29, + "learning_rate": 1.9511977872579816e-05, + "loss": 0.309, + "step": 6205 + }, + { + "epoch": 0.29, + "learning_rate": 1.9511509071304676e-05, + "loss": 0.1438, + "step": 6210 + }, + { + "epoch": 0.29, + "learning_rate": 1.9511040270029536e-05, + "loss": 0.2037, + "step": 6215 + }, + { + "epoch": 0.29, + "learning_rate": 1.9510571468754396e-05, + "loss": 0.2466, + "step": 6220 + }, + { + "epoch": 0.29, + "learning_rate": 1.951010266747926e-05, + "loss": 0.2742, + "step": 6225 + }, + { + "epoch": 0.29, + "learning_rate": 1.950963386620412e-05, + "loss": 0.26, + "step": 6230 + }, + { + "epoch": 0.29, + "learning_rate": 1.950916506492898e-05, + "loss": 0.2899, + "step": 6235 + }, + { + "epoch": 0.29, + "learning_rate": 1.950869626365384e-05, + "loss": 0.4501, + "step": 6240 + }, + { + "epoch": 0.29, + "learning_rate": 1.95082274623787e-05, + "loss": 0.5071, + "step": 6245 + }, + { + "epoch": 0.29, + "learning_rate": 1.950775866110356e-05, + "loss": 0.6465, + "step": 6250 + }, + { + "epoch": 0.29, + "learning_rate": 1.950728985982842e-05, + "loss": 0.3001, + "step": 6255 + }, + { + "epoch": 0.29, + "learning_rate": 1.950682105855328e-05, + "loss": 0.136, + "step": 6260 + }, + { + "epoch": 0.29, + "learning_rate": 1.9506352257278142e-05, + "loss": 0.1448, + "step": 6265 + }, + { + "epoch": 0.29, + "learning_rate": 1.9505883456003002e-05, + "loss": 0.2306, + "step": 6270 + }, + { + "epoch": 0.29, + "learning_rate": 1.9505414654727862e-05, + "loss": 0.1849, + "step": 6275 + }, + { + "epoch": 0.29, + "learning_rate": 1.9504945853452725e-05, + "loss": 0.217, + "step": 6280 + }, + { + "epoch": 0.29, + "learning_rate": 1.9504477052177585e-05, + "loss": 0.3257, + "step": 6285 + }, + { + "epoch": 0.29, + "learning_rate": 1.9504008250902445e-05, + "loss": 0.4219, + "step": 6290 + }, + { + "epoch": 0.29, + "learning_rate": 1.9503539449627305e-05, + "loss": 0.4743, + "step": 6295 + }, + { + "epoch": 0.29, + "learning_rate": 1.9503070648352165e-05, + "loss": 0.6075, + "step": 6300 + }, + { + "epoch": 0.29, + "learning_rate": 1.9502601847077025e-05, + "loss": 0.3695, + "step": 6305 + }, + { + "epoch": 0.29, + "learning_rate": 1.9502133045801885e-05, + "loss": 0.1424, + "step": 6310 + }, + { + "epoch": 0.29, + "learning_rate": 1.9501664244526745e-05, + "loss": 0.2252, + "step": 6315 + }, + { + "epoch": 0.29, + "learning_rate": 1.9501195443251608e-05, + "loss": 0.2399, + "step": 6320 + }, + { + "epoch": 0.3, + "learning_rate": 1.9500726641976468e-05, + "loss": 0.247, + "step": 6325 + }, + { + "epoch": 0.3, + "learning_rate": 1.9500257840701328e-05, + "loss": 0.3604, + "step": 6330 + }, + { + "epoch": 0.3, + "learning_rate": 1.9499789039426188e-05, + "loss": 0.4467, + "step": 6335 + }, + { + "epoch": 0.3, + "learning_rate": 1.9499320238151048e-05, + "loss": 0.4687, + "step": 6340 + }, + { + "epoch": 0.3, + "learning_rate": 1.949885143687591e-05, + "loss": 0.3686, + "step": 6345 + }, + { + "epoch": 0.3, + "learning_rate": 1.949838263560077e-05, + "loss": 0.8263, + "step": 6350 + }, + { + "epoch": 0.3, + "learning_rate": 1.949791383432563e-05, + "loss": 0.3793, + "step": 6355 + }, + { + "epoch": 0.3, + "learning_rate": 1.949744503305049e-05, + "loss": 0.1265, + "step": 6360 + }, + { + "epoch": 0.3, + "learning_rate": 1.9496976231775354e-05, + "loss": 0.1601, + "step": 6365 + }, + { + "epoch": 0.3, + "learning_rate": 1.9496507430500214e-05, + "loss": 0.2208, + "step": 6370 + }, + { + "epoch": 0.3, + "learning_rate": 1.9496038629225074e-05, + "loss": 0.2892, + "step": 6375 + }, + { + "epoch": 0.3, + "learning_rate": 1.9495569827949934e-05, + "loss": 0.2399, + "step": 6380 + }, + { + "epoch": 0.3, + "learning_rate": 1.9495101026674794e-05, + "loss": 0.4323, + "step": 6385 + }, + { + "epoch": 0.3, + "learning_rate": 1.9494632225399654e-05, + "loss": 0.4213, + "step": 6390 + }, + { + "epoch": 0.3, + "learning_rate": 1.9494163424124514e-05, + "loss": 0.6043, + "step": 6395 + }, + { + "epoch": 0.3, + "learning_rate": 1.9493694622849374e-05, + "loss": 0.8731, + "step": 6400 + }, + { + "epoch": 0.3, + "learning_rate": 1.9493225821574237e-05, + "loss": 0.2651, + "step": 6405 + }, + { + "epoch": 0.3, + "learning_rate": 1.9492757020299097e-05, + "loss": 0.1896, + "step": 6410 + }, + { + "epoch": 0.3, + "learning_rate": 1.9492288219023957e-05, + "loss": 0.2098, + "step": 6415 + }, + { + "epoch": 0.3, + "learning_rate": 1.9491819417748817e-05, + "loss": 0.2555, + "step": 6420 + }, + { + "epoch": 0.3, + "learning_rate": 1.949135061647368e-05, + "loss": 0.2872, + "step": 6425 + }, + { + "epoch": 0.3, + "learning_rate": 1.949088181519854e-05, + "loss": 0.306, + "step": 6430 + }, + { + "epoch": 0.3, + "learning_rate": 1.94904130139234e-05, + "loss": 0.3411, + "step": 6435 + }, + { + "epoch": 0.3, + "learning_rate": 1.948994421264826e-05, + "loss": 0.3097, + "step": 6440 + }, + { + "epoch": 0.3, + "learning_rate": 1.948947541137312e-05, + "loss": 0.4879, + "step": 6445 + }, + { + "epoch": 0.3, + "learning_rate": 1.948900661009798e-05, + "loss": 0.6127, + "step": 6450 + }, + { + "epoch": 0.3, + "learning_rate": 1.948853780882284e-05, + "loss": 0.3599, + "step": 6455 + }, + { + "epoch": 0.3, + "learning_rate": 1.9488069007547703e-05, + "loss": 0.1394, + "step": 6460 + }, + { + "epoch": 0.3, + "learning_rate": 1.9487600206272563e-05, + "loss": 0.2307, + "step": 6465 + }, + { + "epoch": 0.3, + "learning_rate": 1.9487131404997423e-05, + "loss": 0.2525, + "step": 6470 + }, + { + "epoch": 0.3, + "learning_rate": 1.9486662603722283e-05, + "loss": 0.1769, + "step": 6475 + }, + { + "epoch": 0.3, + "learning_rate": 1.9486193802447143e-05, + "loss": 0.303, + "step": 6480 + }, + { + "epoch": 0.3, + "learning_rate": 1.9485725001172006e-05, + "loss": 0.3339, + "step": 6485 + }, + { + "epoch": 0.3, + "learning_rate": 1.9485256199896866e-05, + "loss": 0.3536, + "step": 6490 + }, + { + "epoch": 0.3, + "learning_rate": 1.9484787398621726e-05, + "loss": 0.4037, + "step": 6495 + }, + { + "epoch": 0.3, + "learning_rate": 1.9484318597346586e-05, + "loss": 0.5179, + "step": 6500 + }, + { + "epoch": 0.3, + "learning_rate": 1.948384979607145e-05, + "loss": 0.2462, + "step": 6505 + }, + { + "epoch": 0.3, + "learning_rate": 1.948338099479631e-05, + "loss": 0.1519, + "step": 6510 + }, + { + "epoch": 0.3, + "learning_rate": 1.948291219352117e-05, + "loss": 0.2413, + "step": 6515 + }, + { + "epoch": 0.3, + "learning_rate": 1.948244339224603e-05, + "loss": 0.1308, + "step": 6520 + }, + { + "epoch": 0.3, + "learning_rate": 1.948197459097089e-05, + "loss": 0.3107, + "step": 6525 + }, + { + "epoch": 0.3, + "learning_rate": 1.948150578969575e-05, + "loss": 0.3151, + "step": 6530 + }, + { + "epoch": 0.3, + "learning_rate": 1.948103698842061e-05, + "loss": 0.3446, + "step": 6535 + }, + { + "epoch": 0.31, + "learning_rate": 1.948056818714547e-05, + "loss": 0.3298, + "step": 6540 + }, + { + "epoch": 0.31, + "learning_rate": 1.948009938587033e-05, + "loss": 0.3325, + "step": 6545 + }, + { + "epoch": 0.31, + "learning_rate": 1.9479630584595192e-05, + "loss": 0.8412, + "step": 6550 + }, + { + "epoch": 0.31, + "learning_rate": 1.9479161783320052e-05, + "loss": 0.2412, + "step": 6555 + }, + { + "epoch": 0.31, + "learning_rate": 1.9478692982044912e-05, + "loss": 0.1557, + "step": 6560 + }, + { + "epoch": 0.31, + "learning_rate": 1.9478224180769775e-05, + "loss": 0.2324, + "step": 6565 + }, + { + "epoch": 0.31, + "learning_rate": 1.9477755379494635e-05, + "loss": 0.2358, + "step": 6570 + }, + { + "epoch": 0.31, + "learning_rate": 1.9477286578219495e-05, + "loss": 0.2939, + "step": 6575 + }, + { + "epoch": 0.31, + "learning_rate": 1.9476817776944355e-05, + "loss": 0.2902, + "step": 6580 + }, + { + "epoch": 0.31, + "learning_rate": 1.9476348975669215e-05, + "loss": 0.271, + "step": 6585 + }, + { + "epoch": 0.31, + "learning_rate": 1.9475880174394075e-05, + "loss": 0.2787, + "step": 6590 + }, + { + "epoch": 0.31, + "learning_rate": 1.9475411373118935e-05, + "loss": 0.4382, + "step": 6595 + }, + { + "epoch": 0.31, + "learning_rate": 1.9474942571843798e-05, + "loss": 0.5563, + "step": 6600 + }, + { + "epoch": 0.31, + "learning_rate": 1.9474473770568658e-05, + "loss": 0.3581, + "step": 6605 + }, + { + "epoch": 0.31, + "learning_rate": 1.9474004969293518e-05, + "loss": 0.1465, + "step": 6610 + }, + { + "epoch": 0.31, + "learning_rate": 1.9473536168018378e-05, + "loss": 0.1957, + "step": 6615 + }, + { + "epoch": 0.31, + "learning_rate": 1.9473067366743238e-05, + "loss": 0.2329, + "step": 6620 + }, + { + "epoch": 0.31, + "learning_rate": 1.9472598565468098e-05, + "loss": 0.2191, + "step": 6625 + }, + { + "epoch": 0.31, + "learning_rate": 1.947212976419296e-05, + "loss": 0.2638, + "step": 6630 + }, + { + "epoch": 0.31, + "learning_rate": 1.947166096291782e-05, + "loss": 0.3851, + "step": 6635 + }, + { + "epoch": 0.31, + "learning_rate": 1.947119216164268e-05, + "loss": 0.421, + "step": 6640 + }, + { + "epoch": 0.31, + "learning_rate": 1.9470723360367544e-05, + "loss": 0.3695, + "step": 6645 + }, + { + "epoch": 0.31, + "learning_rate": 1.9470254559092404e-05, + "loss": 0.7012, + "step": 6650 + }, + { + "epoch": 0.31, + "learning_rate": 1.9469785757817264e-05, + "loss": 0.3262, + "step": 6655 + }, + { + "epoch": 0.31, + "learning_rate": 1.9469316956542124e-05, + "loss": 0.0764, + "step": 6660 + }, + { + "epoch": 0.31, + "learning_rate": 1.9468848155266984e-05, + "loss": 0.2014, + "step": 6665 + }, + { + "epoch": 0.31, + "learning_rate": 1.9468379353991844e-05, + "loss": 0.2009, + "step": 6670 + }, + { + "epoch": 0.31, + "learning_rate": 1.9467910552716704e-05, + "loss": 0.2012, + "step": 6675 + }, + { + "epoch": 0.31, + "learning_rate": 1.9467441751441564e-05, + "loss": 0.2679, + "step": 6680 + }, + { + "epoch": 0.31, + "learning_rate": 1.9466972950166424e-05, + "loss": 0.2762, + "step": 6685 + }, + { + "epoch": 0.31, + "learning_rate": 1.9466504148891287e-05, + "loss": 0.4076, + "step": 6690 + }, + { + "epoch": 0.31, + "learning_rate": 1.9466035347616147e-05, + "loss": 0.3921, + "step": 6695 + }, + { + "epoch": 0.31, + "learning_rate": 1.9465566546341007e-05, + "loss": 0.6968, + "step": 6700 + }, + { + "epoch": 0.31, + "learning_rate": 1.9465097745065867e-05, + "loss": 0.3596, + "step": 6705 + }, + { + "epoch": 0.31, + "learning_rate": 1.946462894379073e-05, + "loss": 0.0913, + "step": 6710 + }, + { + "epoch": 0.31, + "learning_rate": 1.946416014251559e-05, + "loss": 0.1434, + "step": 6715 + }, + { + "epoch": 0.31, + "learning_rate": 1.946369134124045e-05, + "loss": 0.1815, + "step": 6720 + }, + { + "epoch": 0.31, + "learning_rate": 1.946322253996531e-05, + "loss": 0.2293, + "step": 6725 + }, + { + "epoch": 0.31, + "learning_rate": 1.946275373869017e-05, + "loss": 0.2578, + "step": 6730 + }, + { + "epoch": 0.31, + "learning_rate": 1.9462284937415033e-05, + "loss": 0.3253, + "step": 6735 + }, + { + "epoch": 0.31, + "learning_rate": 1.9461816136139893e-05, + "loss": 0.4554, + "step": 6740 + }, + { + "epoch": 0.31, + "learning_rate": 1.9461347334864753e-05, + "loss": 0.433, + "step": 6745 + }, + { + "epoch": 0.31, + "learning_rate": 1.9460878533589613e-05, + "loss": 0.7031, + "step": 6750 + }, + { + "epoch": 0.32, + "learning_rate": 1.9460409732314473e-05, + "loss": 0.3681, + "step": 6755 + }, + { + "epoch": 0.32, + "learning_rate": 1.9459940931039333e-05, + "loss": 0.1622, + "step": 6760 + }, + { + "epoch": 0.32, + "learning_rate": 1.9459472129764193e-05, + "loss": 0.1286, + "step": 6765 + }, + { + "epoch": 0.32, + "learning_rate": 1.9459003328489053e-05, + "loss": 0.2374, + "step": 6770 + }, + { + "epoch": 0.32, + "learning_rate": 1.9458534527213916e-05, + "loss": 0.2842, + "step": 6775 + }, + { + "epoch": 0.32, + "learning_rate": 1.9458065725938776e-05, + "loss": 0.2651, + "step": 6780 + }, + { + "epoch": 0.32, + "learning_rate": 1.9457596924663636e-05, + "loss": 0.402, + "step": 6785 + }, + { + "epoch": 0.32, + "learning_rate": 1.94571281233885e-05, + "loss": 0.3272, + "step": 6790 + }, + { + "epoch": 0.32, + "learning_rate": 1.945665932211336e-05, + "loss": 0.4942, + "step": 6795 + }, + { + "epoch": 0.32, + "learning_rate": 1.945619052083822e-05, + "loss": 0.571, + "step": 6800 + }, + { + "epoch": 0.32, + "learning_rate": 1.945572171956308e-05, + "loss": 0.3568, + "step": 6805 + }, + { + "epoch": 0.32, + "learning_rate": 1.945525291828794e-05, + "loss": 0.1872, + "step": 6810 + }, + { + "epoch": 0.32, + "learning_rate": 1.94547841170128e-05, + "loss": 0.2116, + "step": 6815 + }, + { + "epoch": 0.32, + "learning_rate": 1.945431531573766e-05, + "loss": 0.1498, + "step": 6820 + }, + { + "epoch": 0.32, + "learning_rate": 1.945384651446252e-05, + "loss": 0.2026, + "step": 6825 + }, + { + "epoch": 0.32, + "learning_rate": 1.9453377713187382e-05, + "loss": 0.2388, + "step": 6830 + }, + { + "epoch": 0.32, + "learning_rate": 1.9452908911912242e-05, + "loss": 0.3062, + "step": 6835 + }, + { + "epoch": 0.32, + "learning_rate": 1.9452440110637102e-05, + "loss": 0.4513, + "step": 6840 + }, + { + "epoch": 0.32, + "learning_rate": 1.9451971309361962e-05, + "loss": 0.4118, + "step": 6845 + }, + { + "epoch": 0.32, + "learning_rate": 1.9451502508086822e-05, + "loss": 0.5351, + "step": 6850 + }, + { + "epoch": 0.32, + "learning_rate": 1.9451033706811685e-05, + "loss": 0.2974, + "step": 6855 + }, + { + "epoch": 0.32, + "learning_rate": 1.9450564905536545e-05, + "loss": 0.1387, + "step": 6860 + }, + { + "epoch": 0.32, + "learning_rate": 1.9450096104261405e-05, + "loss": 0.1921, + "step": 6865 + }, + { + "epoch": 0.32, + "learning_rate": 1.9449627302986265e-05, + "loss": 0.2271, + "step": 6870 + }, + { + "epoch": 0.32, + "learning_rate": 1.944915850171113e-05, + "loss": 0.2264, + "step": 6875 + }, + { + "epoch": 0.32, + "learning_rate": 1.944868970043599e-05, + "loss": 0.3337, + "step": 6880 + }, + { + "epoch": 0.32, + "learning_rate": 1.9448220899160848e-05, + "loss": 0.34, + "step": 6885 + }, + { + "epoch": 0.32, + "learning_rate": 1.9447752097885708e-05, + "loss": 0.4425, + "step": 6890 + }, + { + "epoch": 0.32, + "learning_rate": 1.9447283296610568e-05, + "loss": 0.5421, + "step": 6895 + }, + { + "epoch": 0.32, + "learning_rate": 1.9446814495335428e-05, + "loss": 0.5687, + "step": 6900 + }, + { + "epoch": 0.32, + "learning_rate": 1.9446345694060288e-05, + "loss": 0.4261, + "step": 6905 + }, + { + "epoch": 0.32, + "learning_rate": 1.9445876892785148e-05, + "loss": 0.0878, + "step": 6910 + }, + { + "epoch": 0.32, + "learning_rate": 1.944540809151001e-05, + "loss": 0.1664, + "step": 6915 + }, + { + "epoch": 0.32, + "learning_rate": 1.944493929023487e-05, + "loss": 0.173, + "step": 6920 + }, + { + "epoch": 0.32, + "learning_rate": 1.944447048895973e-05, + "loss": 0.1802, + "step": 6925 + }, + { + "epoch": 0.32, + "learning_rate": 1.944400168768459e-05, + "loss": 0.3044, + "step": 6930 + }, + { + "epoch": 0.32, + "learning_rate": 1.9443532886409454e-05, + "loss": 0.2697, + "step": 6935 + }, + { + "epoch": 0.32, + "learning_rate": 1.9443064085134314e-05, + "loss": 0.3823, + "step": 6940 + }, + { + "epoch": 0.32, + "learning_rate": 1.9442595283859174e-05, + "loss": 0.5836, + "step": 6945 + }, + { + "epoch": 0.32, + "learning_rate": 1.9442126482584034e-05, + "loss": 0.8075, + "step": 6950 + }, + { + "epoch": 0.32, + "learning_rate": 1.9441657681308894e-05, + "loss": 0.3048, + "step": 6955 + }, + { + "epoch": 0.32, + "learning_rate": 1.9441188880033754e-05, + "loss": 0.178, + "step": 6960 + }, + { + "epoch": 0.32, + "learning_rate": 1.9440720078758614e-05, + "loss": 0.142, + "step": 6965 + }, + { + "epoch": 0.33, + "learning_rate": 1.9440251277483477e-05, + "loss": 0.1452, + "step": 6970 + }, + { + "epoch": 0.33, + "learning_rate": 1.9439782476208337e-05, + "loss": 0.2158, + "step": 6975 + }, + { + "epoch": 0.33, + "learning_rate": 1.9439313674933197e-05, + "loss": 0.2264, + "step": 6980 + }, + { + "epoch": 0.33, + "learning_rate": 1.9438844873658057e-05, + "loss": 0.3913, + "step": 6985 + }, + { + "epoch": 0.33, + "learning_rate": 1.9438376072382917e-05, + "loss": 0.3618, + "step": 6990 + }, + { + "epoch": 0.33, + "learning_rate": 1.943790727110778e-05, + "loss": 0.4981, + "step": 6995 + }, + { + "epoch": 0.33, + "learning_rate": 1.943743846983264e-05, + "loss": 0.5207, + "step": 7000 + }, + { + "epoch": 0.33, + "learning_rate": 1.94369696685575e-05, + "loss": 0.2584, + "step": 7005 + }, + { + "epoch": 0.33, + "learning_rate": 1.943650086728236e-05, + "loss": 0.1643, + "step": 7010 + }, + { + "epoch": 0.33, + "learning_rate": 1.9436032066007223e-05, + "loss": 0.162, + "step": 7015 + }, + { + "epoch": 0.33, + "learning_rate": 1.9435563264732083e-05, + "loss": 0.2558, + "step": 7020 + }, + { + "epoch": 0.33, + "learning_rate": 1.9435094463456943e-05, + "loss": 0.24, + "step": 7025 + }, + { + "epoch": 0.33, + "learning_rate": 1.9434625662181803e-05, + "loss": 0.2614, + "step": 7030 + }, + { + "epoch": 0.33, + "learning_rate": 1.9434156860906663e-05, + "loss": 0.2827, + "step": 7035 + }, + { + "epoch": 0.33, + "learning_rate": 1.9433688059631523e-05, + "loss": 0.313, + "step": 7040 + }, + { + "epoch": 0.33, + "learning_rate": 1.9433219258356383e-05, + "loss": 0.5073, + "step": 7045 + }, + { + "epoch": 0.33, + "learning_rate": 1.9432750457081243e-05, + "loss": 0.6594, + "step": 7050 + }, + { + "epoch": 0.33, + "learning_rate": 1.9432281655806103e-05, + "loss": 0.2966, + "step": 7055 + }, + { + "epoch": 0.33, + "learning_rate": 1.9431812854530966e-05, + "loss": 0.1413, + "step": 7060 + }, + { + "epoch": 0.33, + "learning_rate": 1.9431344053255826e-05, + "loss": 0.1812, + "step": 7065 + }, + { + "epoch": 0.33, + "learning_rate": 1.9430875251980686e-05, + "loss": 0.1497, + "step": 7070 + }, + { + "epoch": 0.33, + "learning_rate": 1.943040645070555e-05, + "loss": 0.1938, + "step": 7075 + }, + { + "epoch": 0.33, + "learning_rate": 1.942993764943041e-05, + "loss": 0.3134, + "step": 7080 + }, + { + "epoch": 0.33, + "learning_rate": 1.942946884815527e-05, + "loss": 0.2142, + "step": 7085 + }, + { + "epoch": 0.33, + "learning_rate": 1.942900004688013e-05, + "loss": 0.4456, + "step": 7090 + }, + { + "epoch": 0.33, + "learning_rate": 1.942853124560499e-05, + "loss": 0.3444, + "step": 7095 + }, + { + "epoch": 0.33, + "learning_rate": 1.942806244432985e-05, + "loss": 0.8402, + "step": 7100 + }, + { + "epoch": 0.33, + "learning_rate": 1.942759364305471e-05, + "loss": 0.3128, + "step": 7105 + }, + { + "epoch": 0.33, + "learning_rate": 1.9427124841779572e-05, + "loss": 0.1323, + "step": 7110 + }, + { + "epoch": 0.33, + "learning_rate": 1.9426656040504432e-05, + "loss": 0.1779, + "step": 7115 + }, + { + "epoch": 0.33, + "learning_rate": 1.9426187239229292e-05, + "loss": 0.1896, + "step": 7120 + }, + { + "epoch": 0.33, + "learning_rate": 1.9425718437954152e-05, + "loss": 0.2415, + "step": 7125 + }, + { + "epoch": 0.33, + "learning_rate": 1.9425249636679012e-05, + "loss": 0.2888, + "step": 7130 + }, + { + "epoch": 0.33, + "learning_rate": 1.9424780835403872e-05, + "loss": 0.2641, + "step": 7135 + }, + { + "epoch": 0.33, + "learning_rate": 1.9424312034128735e-05, + "loss": 0.3465, + "step": 7140 + }, + { + "epoch": 0.33, + "learning_rate": 1.9423843232853595e-05, + "loss": 0.4948, + "step": 7145 + }, + { + "epoch": 0.33, + "learning_rate": 1.9423374431578455e-05, + "loss": 0.6909, + "step": 7150 + }, + { + "epoch": 0.33, + "learning_rate": 1.942290563030332e-05, + "loss": 0.3321, + "step": 7155 + }, + { + "epoch": 0.33, + "learning_rate": 1.942243682902818e-05, + "loss": 0.1793, + "step": 7160 + }, + { + "epoch": 0.33, + "learning_rate": 1.942196802775304e-05, + "loss": 0.1224, + "step": 7165 + }, + { + "epoch": 0.33, + "learning_rate": 1.94214992264779e-05, + "loss": 0.2602, + "step": 7170 + }, + { + "epoch": 0.33, + "learning_rate": 1.9421030425202758e-05, + "loss": 0.1669, + "step": 7175 + }, + { + "epoch": 0.34, + "learning_rate": 1.9420561623927618e-05, + "loss": 0.1957, + "step": 7180 + }, + { + "epoch": 0.34, + "learning_rate": 1.9420092822652478e-05, + "loss": 0.3104, + "step": 7185 + }, + { + "epoch": 0.34, + "learning_rate": 1.9419624021377338e-05, + "loss": 0.2715, + "step": 7190 + }, + { + "epoch": 0.34, + "learning_rate": 1.9419155220102198e-05, + "loss": 0.3714, + "step": 7195 + }, + { + "epoch": 0.34, + "learning_rate": 1.941868641882706e-05, + "loss": 0.6902, + "step": 7200 + }, + { + "epoch": 0.34, + "learning_rate": 1.941821761755192e-05, + "loss": 0.2193, + "step": 7205 + }, + { + "epoch": 0.34, + "learning_rate": 1.941774881627678e-05, + "loss": 0.1142, + "step": 7210 + }, + { + "epoch": 0.34, + "learning_rate": 1.941728001500164e-05, + "loss": 0.1894, + "step": 7215 + }, + { + "epoch": 0.34, + "learning_rate": 1.9416811213726504e-05, + "loss": 0.2033, + "step": 7220 + }, + { + "epoch": 0.34, + "learning_rate": 1.9416342412451364e-05, + "loss": 0.251, + "step": 7225 + }, + { + "epoch": 0.34, + "learning_rate": 1.9415873611176224e-05, + "loss": 0.3118, + "step": 7230 + }, + { + "epoch": 0.34, + "learning_rate": 1.9415404809901084e-05, + "loss": 0.29, + "step": 7235 + }, + { + "epoch": 0.34, + "learning_rate": 1.9414936008625944e-05, + "loss": 0.331, + "step": 7240 + }, + { + "epoch": 0.34, + "learning_rate": 1.9414467207350804e-05, + "loss": 0.441, + "step": 7245 + }, + { + "epoch": 0.34, + "learning_rate": 1.9413998406075667e-05, + "loss": 0.6936, + "step": 7250 + }, + { + "epoch": 0.34, + "learning_rate": 1.9413529604800527e-05, + "loss": 0.2578, + "step": 7255 + }, + { + "epoch": 0.34, + "learning_rate": 1.9413060803525387e-05, + "loss": 0.1108, + "step": 7260 + }, + { + "epoch": 0.34, + "learning_rate": 1.9412592002250247e-05, + "loss": 0.1718, + "step": 7265 + }, + { + "epoch": 0.34, + "learning_rate": 1.9412123200975107e-05, + "loss": 0.2078, + "step": 7270 + }, + { + "epoch": 0.34, + "learning_rate": 1.9411654399699967e-05, + "loss": 0.237, + "step": 7275 + }, + { + "epoch": 0.34, + "learning_rate": 1.9411185598424827e-05, + "loss": 0.2431, + "step": 7280 + }, + { + "epoch": 0.34, + "learning_rate": 1.941071679714969e-05, + "loss": 0.2729, + "step": 7285 + }, + { + "epoch": 0.34, + "learning_rate": 1.941024799587455e-05, + "loss": 0.3672, + "step": 7290 + }, + { + "epoch": 0.34, + "learning_rate": 1.940977919459941e-05, + "loss": 0.5002, + "step": 7295 + }, + { + "epoch": 0.34, + "learning_rate": 1.9409310393324274e-05, + "loss": 0.7313, + "step": 7300 + }, + { + "epoch": 0.34, + "learning_rate": 1.9408841592049134e-05, + "loss": 0.328, + "step": 7305 + }, + { + "epoch": 0.34, + "learning_rate": 1.9408372790773993e-05, + "loss": 0.2048, + "step": 7310 + }, + { + "epoch": 0.34, + "learning_rate": 1.9407903989498853e-05, + "loss": 0.2032, + "step": 7315 + }, + { + "epoch": 0.34, + "learning_rate": 1.9407435188223713e-05, + "loss": 0.2235, + "step": 7320 + }, + { + "epoch": 0.34, + "learning_rate": 1.9406966386948573e-05, + "loss": 0.198, + "step": 7325 + }, + { + "epoch": 0.34, + "learning_rate": 1.9406497585673433e-05, + "loss": 0.2556, + "step": 7330 + }, + { + "epoch": 0.34, + "learning_rate": 1.9406028784398293e-05, + "loss": 0.358, + "step": 7335 + }, + { + "epoch": 0.34, + "learning_rate": 1.9405559983123156e-05, + "loss": 0.4997, + "step": 7340 + }, + { + "epoch": 0.34, + "learning_rate": 1.9405091181848016e-05, + "loss": 0.4498, + "step": 7345 + }, + { + "epoch": 0.34, + "learning_rate": 1.9404622380572876e-05, + "loss": 0.5937, + "step": 7350 + }, + { + "epoch": 0.34, + "learning_rate": 1.9404153579297736e-05, + "loss": 0.2874, + "step": 7355 + }, + { + "epoch": 0.34, + "learning_rate": 1.94036847780226e-05, + "loss": 0.1371, + "step": 7360 + }, + { + "epoch": 0.34, + "learning_rate": 1.940321597674746e-05, + "loss": 0.1512, + "step": 7365 + }, + { + "epoch": 0.34, + "learning_rate": 1.940274717547232e-05, + "loss": 0.1584, + "step": 7370 + }, + { + "epoch": 0.34, + "learning_rate": 1.940227837419718e-05, + "loss": 0.2193, + "step": 7375 + }, + { + "epoch": 0.34, + "learning_rate": 1.940180957292204e-05, + "loss": 0.2825, + "step": 7380 + }, + { + "epoch": 0.34, + "learning_rate": 1.9401340771646903e-05, + "loss": 0.3116, + "step": 7385 + }, + { + "epoch": 0.34, + "learning_rate": 1.9400871970371763e-05, + "loss": 0.3652, + "step": 7390 + }, + { + "epoch": 0.35, + "learning_rate": 1.9400403169096622e-05, + "loss": 0.4405, + "step": 7395 + }, + { + "epoch": 0.35, + "learning_rate": 1.9399934367821482e-05, + "loss": 0.6158, + "step": 7400 + }, + { + "epoch": 0.35, + "learning_rate": 1.9399465566546342e-05, + "loss": 0.3158, + "step": 7405 + }, + { + "epoch": 0.35, + "learning_rate": 1.9398996765271202e-05, + "loss": 0.0907, + "step": 7410 + }, + { + "epoch": 0.35, + "learning_rate": 1.9398527963996062e-05, + "loss": 0.3189, + "step": 7415 + }, + { + "epoch": 0.35, + "learning_rate": 1.9398059162720922e-05, + "loss": 0.17, + "step": 7420 + }, + { + "epoch": 0.35, + "learning_rate": 1.9397590361445785e-05, + "loss": 0.1948, + "step": 7425 + }, + { + "epoch": 0.35, + "learning_rate": 1.9397121560170645e-05, + "loss": 0.2213, + "step": 7430 + }, + { + "epoch": 0.35, + "learning_rate": 1.9396652758895505e-05, + "loss": 0.2708, + "step": 7435 + }, + { + "epoch": 0.35, + "learning_rate": 1.939618395762037e-05, + "loss": 0.3346, + "step": 7440 + }, + { + "epoch": 0.35, + "learning_rate": 1.939571515634523e-05, + "loss": 0.3433, + "step": 7445 + }, + { + "epoch": 0.35, + "learning_rate": 1.939524635507009e-05, + "loss": 0.6586, + "step": 7450 + }, + { + "epoch": 0.35, + "learning_rate": 1.939477755379495e-05, + "loss": 0.2602, + "step": 7455 + }, + { + "epoch": 0.35, + "learning_rate": 1.939430875251981e-05, + "loss": 0.1247, + "step": 7460 + }, + { + "epoch": 0.35, + "learning_rate": 1.939383995124467e-05, + "loss": 0.1999, + "step": 7465 + }, + { + "epoch": 0.35, + "learning_rate": 1.9393371149969528e-05, + "loss": 0.2082, + "step": 7470 + }, + { + "epoch": 0.35, + "learning_rate": 1.9392902348694388e-05, + "loss": 0.1804, + "step": 7475 + }, + { + "epoch": 0.35, + "learning_rate": 1.939243354741925e-05, + "loss": 0.3072, + "step": 7480 + }, + { + "epoch": 0.35, + "learning_rate": 1.939196474614411e-05, + "loss": 0.2737, + "step": 7485 + }, + { + "epoch": 0.35, + "learning_rate": 1.939149594486897e-05, + "loss": 0.2939, + "step": 7490 + }, + { + "epoch": 0.35, + "learning_rate": 1.939102714359383e-05, + "loss": 0.3612, + "step": 7495 + }, + { + "epoch": 0.35, + "learning_rate": 1.939055834231869e-05, + "loss": 0.591, + "step": 7500 + }, + { + "epoch": 0.35, + "learning_rate": 1.9390089541043555e-05, + "loss": 0.2915, + "step": 7505 + }, + { + "epoch": 0.35, + "learning_rate": 1.9389620739768415e-05, + "loss": 0.1465, + "step": 7510 + }, + { + "epoch": 0.35, + "learning_rate": 1.9389151938493274e-05, + "loss": 0.1969, + "step": 7515 + }, + { + "epoch": 0.35, + "learning_rate": 1.9388683137218134e-05, + "loss": 0.203, + "step": 7520 + }, + { + "epoch": 0.35, + "learning_rate": 1.9388214335942998e-05, + "loss": 0.1947, + "step": 7525 + }, + { + "epoch": 0.35, + "learning_rate": 1.9387745534667858e-05, + "loss": 0.2782, + "step": 7530 + }, + { + "epoch": 0.35, + "learning_rate": 1.9387276733392718e-05, + "loss": 0.2536, + "step": 7535 + }, + { + "epoch": 0.35, + "learning_rate": 1.9386807932117578e-05, + "loss": 0.2968, + "step": 7540 + }, + { + "epoch": 0.35, + "learning_rate": 1.9386339130842437e-05, + "loss": 0.4721, + "step": 7545 + }, + { + "epoch": 0.35, + "learning_rate": 1.9385870329567297e-05, + "loss": 0.6284, + "step": 7550 + }, + { + "epoch": 0.35, + "learning_rate": 1.9385401528292157e-05, + "loss": 0.2729, + "step": 7555 + }, + { + "epoch": 0.35, + "learning_rate": 1.9384932727017017e-05, + "loss": 0.1275, + "step": 7560 + }, + { + "epoch": 0.35, + "learning_rate": 1.9384463925741877e-05, + "loss": 0.2018, + "step": 7565 + }, + { + "epoch": 0.35, + "learning_rate": 1.938399512446674e-05, + "loss": 0.1743, + "step": 7570 + }, + { + "epoch": 0.35, + "learning_rate": 1.93835263231916e-05, + "loss": 0.354, + "step": 7575 + }, + { + "epoch": 0.35, + "learning_rate": 1.938305752191646e-05, + "loss": 0.1568, + "step": 7580 + }, + { + "epoch": 0.35, + "learning_rate": 1.9382588720641324e-05, + "loss": 0.2309, + "step": 7585 + }, + { + "epoch": 0.35, + "learning_rate": 1.9382119919366184e-05, + "loss": 0.3072, + "step": 7590 + }, + { + "epoch": 0.35, + "learning_rate": 1.9381651118091044e-05, + "loss": 0.3901, + "step": 7595 + }, + { + "epoch": 0.35, + "learning_rate": 1.9381182316815903e-05, + "loss": 0.6689, + "step": 7600 + }, + { + "epoch": 0.35, + "learning_rate": 1.9380713515540763e-05, + "loss": 0.3356, + "step": 7605 + }, + { + "epoch": 0.36, + "learning_rate": 1.9380244714265623e-05, + "loss": 0.0993, + "step": 7610 + }, + { + "epoch": 0.36, + "learning_rate": 1.9379775912990483e-05, + "loss": 0.1436, + "step": 7615 + }, + { + "epoch": 0.36, + "learning_rate": 1.9379307111715347e-05, + "loss": 0.1633, + "step": 7620 + }, + { + "epoch": 0.36, + "learning_rate": 1.9378838310440207e-05, + "loss": 0.2219, + "step": 7625 + }, + { + "epoch": 0.36, + "learning_rate": 1.9378369509165066e-05, + "loss": 0.1863, + "step": 7630 + }, + { + "epoch": 0.36, + "learning_rate": 1.9377900707889926e-05, + "loss": 0.382, + "step": 7635 + }, + { + "epoch": 0.36, + "learning_rate": 1.9377431906614786e-05, + "loss": 0.3374, + "step": 7640 + }, + { + "epoch": 0.36, + "learning_rate": 1.9376963105339646e-05, + "loss": 0.3538, + "step": 7645 + }, + { + "epoch": 0.36, + "learning_rate": 1.937649430406451e-05, + "loss": 0.5143, + "step": 7650 + }, + { + "epoch": 0.36, + "learning_rate": 1.937602550278937e-05, + "loss": 0.3543, + "step": 7655 + }, + { + "epoch": 0.36, + "learning_rate": 1.937555670151423e-05, + "loss": 0.1521, + "step": 7660 + }, + { + "epoch": 0.36, + "learning_rate": 1.9375087900239093e-05, + "loss": 0.2513, + "step": 7665 + }, + { + "epoch": 0.36, + "learning_rate": 1.9374619098963953e-05, + "loss": 0.1515, + "step": 7670 + }, + { + "epoch": 0.36, + "learning_rate": 1.9374150297688813e-05, + "loss": 0.2925, + "step": 7675 + }, + { + "epoch": 0.36, + "learning_rate": 1.9373681496413673e-05, + "loss": 0.3807, + "step": 7680 + }, + { + "epoch": 0.36, + "learning_rate": 1.9373212695138533e-05, + "loss": 0.381, + "step": 7685 + }, + { + "epoch": 0.36, + "learning_rate": 1.9372743893863392e-05, + "loss": 0.2791, + "step": 7690 + }, + { + "epoch": 0.36, + "learning_rate": 1.9372275092588252e-05, + "loss": 0.4091, + "step": 7695 + }, + { + "epoch": 0.36, + "learning_rate": 1.9371806291313112e-05, + "loss": 0.7086, + "step": 7700 + }, + { + "epoch": 0.36, + "learning_rate": 1.9371337490037972e-05, + "loss": 0.2184, + "step": 7705 + }, + { + "epoch": 0.36, + "learning_rate": 1.9370868688762836e-05, + "loss": 0.1839, + "step": 7710 + }, + { + "epoch": 0.36, + "learning_rate": 1.9370399887487696e-05, + "loss": 0.1395, + "step": 7715 + }, + { + "epoch": 0.36, + "learning_rate": 1.9369931086212555e-05, + "loss": 0.1088, + "step": 7720 + }, + { + "epoch": 0.36, + "learning_rate": 1.9369462284937415e-05, + "loss": 0.1739, + "step": 7725 + }, + { + "epoch": 0.36, + "learning_rate": 1.936899348366228e-05, + "loss": 0.2314, + "step": 7730 + }, + { + "epoch": 0.36, + "learning_rate": 1.936852468238714e-05, + "loss": 0.3432, + "step": 7735 + }, + { + "epoch": 0.36, + "learning_rate": 1.9368055881112e-05, + "loss": 0.3118, + "step": 7740 + }, + { + "epoch": 0.36, + "learning_rate": 1.936758707983686e-05, + "loss": 0.4382, + "step": 7745 + }, + { + "epoch": 0.36, + "learning_rate": 1.936711827856172e-05, + "loss": 0.4952, + "step": 7750 + }, + { + "epoch": 0.36, + "learning_rate": 1.936664947728658e-05, + "loss": 0.3685, + "step": 7755 + }, + { + "epoch": 0.36, + "learning_rate": 1.936618067601144e-05, + "loss": 0.13, + "step": 7760 + }, + { + "epoch": 0.36, + "learning_rate": 1.93657118747363e-05, + "loss": 0.1563, + "step": 7765 + }, + { + "epoch": 0.36, + "learning_rate": 1.936524307346116e-05, + "loss": 0.2089, + "step": 7770 + }, + { + "epoch": 0.36, + "learning_rate": 1.936477427218602e-05, + "loss": 0.2659, + "step": 7775 + }, + { + "epoch": 0.36, + "learning_rate": 1.936430547091088e-05, + "loss": 0.1983, + "step": 7780 + }, + { + "epoch": 0.36, + "learning_rate": 1.936383666963574e-05, + "loss": 0.2818, + "step": 7785 + }, + { + "epoch": 0.36, + "learning_rate": 1.9363367868360605e-05, + "loss": 0.418, + "step": 7790 + }, + { + "epoch": 0.36, + "learning_rate": 1.9362899067085465e-05, + "loss": 0.4263, + "step": 7795 + }, + { + "epoch": 0.36, + "learning_rate": 1.9362430265810325e-05, + "loss": 0.6848, + "step": 7800 + }, + { + "epoch": 0.36, + "learning_rate": 1.9361961464535184e-05, + "loss": 0.3445, + "step": 7805 + }, + { + "epoch": 0.36, + "learning_rate": 1.9361492663260048e-05, + "loss": 0.1231, + "step": 7810 + }, + { + "epoch": 0.36, + "learning_rate": 1.9361023861984908e-05, + "loss": 0.1814, + "step": 7815 + }, + { + "epoch": 0.36, + "learning_rate": 1.9360555060709768e-05, + "loss": 0.2129, + "step": 7820 + }, + { + "epoch": 0.37, + "learning_rate": 1.9360086259434628e-05, + "loss": 0.185, + "step": 7825 + }, + { + "epoch": 0.37, + "learning_rate": 1.9359617458159488e-05, + "loss": 0.3591, + "step": 7830 + }, + { + "epoch": 0.37, + "learning_rate": 1.9359148656884347e-05, + "loss": 0.4372, + "step": 7835 + }, + { + "epoch": 0.37, + "learning_rate": 1.9358679855609207e-05, + "loss": 0.3435, + "step": 7840 + }, + { + "epoch": 0.37, + "learning_rate": 1.9358211054334067e-05, + "loss": 0.3974, + "step": 7845 + }, + { + "epoch": 0.37, + "learning_rate": 1.935774225305893e-05, + "loss": 0.6837, + "step": 7850 + }, + { + "epoch": 0.37, + "learning_rate": 1.935727345178379e-05, + "loss": 0.3493, + "step": 7855 + }, + { + "epoch": 0.37, + "learning_rate": 1.935680465050865e-05, + "loss": 0.1234, + "step": 7860 + }, + { + "epoch": 0.37, + "learning_rate": 1.935633584923351e-05, + "loss": 0.1565, + "step": 7865 + }, + { + "epoch": 0.37, + "learning_rate": 1.9355867047958374e-05, + "loss": 0.1613, + "step": 7870 + }, + { + "epoch": 0.37, + "learning_rate": 1.9355398246683234e-05, + "loss": 0.2633, + "step": 7875 + }, + { + "epoch": 0.37, + "learning_rate": 1.9354929445408094e-05, + "loss": 0.1702, + "step": 7880 + }, + { + "epoch": 0.37, + "learning_rate": 1.9354460644132954e-05, + "loss": 0.3533, + "step": 7885 + }, + { + "epoch": 0.37, + "learning_rate": 1.9353991842857814e-05, + "loss": 0.2938, + "step": 7890 + }, + { + "epoch": 0.37, + "learning_rate": 1.9353523041582673e-05, + "loss": 0.3936, + "step": 7895 + }, + { + "epoch": 0.37, + "learning_rate": 1.9353054240307537e-05, + "loss": 0.5828, + "step": 7900 + }, + { + "epoch": 0.37, + "learning_rate": 1.9352585439032397e-05, + "loss": 0.2816, + "step": 7905 + }, + { + "epoch": 0.37, + "learning_rate": 1.9352116637757257e-05, + "loss": 0.1281, + "step": 7910 + }, + { + "epoch": 0.37, + "learning_rate": 1.9351647836482117e-05, + "loss": 0.1108, + "step": 7915 + }, + { + "epoch": 0.37, + "learning_rate": 1.9351179035206977e-05, + "loss": 0.1505, + "step": 7920 + }, + { + "epoch": 0.37, + "learning_rate": 1.9350710233931836e-05, + "loss": 0.2456, + "step": 7925 + }, + { + "epoch": 0.37, + "learning_rate": 1.9350241432656696e-05, + "loss": 0.2419, + "step": 7930 + }, + { + "epoch": 0.37, + "learning_rate": 1.934977263138156e-05, + "loss": 0.2355, + "step": 7935 + }, + { + "epoch": 0.37, + "learning_rate": 1.934930383010642e-05, + "loss": 0.3848, + "step": 7940 + }, + { + "epoch": 0.37, + "learning_rate": 1.934883502883128e-05, + "loss": 0.508, + "step": 7945 + }, + { + "epoch": 0.37, + "learning_rate": 1.9348366227556143e-05, + "loss": 0.688, + "step": 7950 + }, + { + "epoch": 0.37, + "learning_rate": 1.9347897426281003e-05, + "loss": 0.2389, + "step": 7955 + }, + { + "epoch": 0.37, + "learning_rate": 1.9347428625005863e-05, + "loss": 0.097, + "step": 7960 + }, + { + "epoch": 0.37, + "learning_rate": 1.9346959823730723e-05, + "loss": 0.1886, + "step": 7965 + }, + { + "epoch": 0.37, + "learning_rate": 1.9346491022455583e-05, + "loss": 0.2037, + "step": 7970 + }, + { + "epoch": 0.37, + "learning_rate": 1.9346022221180443e-05, + "loss": 0.1939, + "step": 7975 + }, + { + "epoch": 0.37, + "learning_rate": 1.9345553419905302e-05, + "loss": 0.2666, + "step": 7980 + }, + { + "epoch": 0.37, + "learning_rate": 1.9345084618630162e-05, + "loss": 0.2793, + "step": 7985 + }, + { + "epoch": 0.37, + "learning_rate": 1.9344615817355026e-05, + "loss": 0.3058, + "step": 7990 + }, + { + "epoch": 0.37, + "learning_rate": 1.9344147016079886e-05, + "loss": 0.3803, + "step": 7995 + }, + { + "epoch": 0.37, + "learning_rate": 1.9343678214804746e-05, + "loss": 0.7639, + "step": 8000 + }, + { + "epoch": 0.37, + "learning_rate": 1.9343209413529606e-05, + "loss": 0.3895, + "step": 8005 + }, + { + "epoch": 0.37, + "learning_rate": 1.9342740612254465e-05, + "loss": 0.075, + "step": 8010 + }, + { + "epoch": 0.37, + "learning_rate": 1.934227181097933e-05, + "loss": 0.1631, + "step": 8015 + }, + { + "epoch": 0.37, + "learning_rate": 1.934180300970419e-05, + "loss": 0.2211, + "step": 8020 + }, + { + "epoch": 0.37, + "learning_rate": 1.934133420842905e-05, + "loss": 0.1265, + "step": 8025 + }, + { + "epoch": 0.37, + "learning_rate": 1.934086540715391e-05, + "loss": 0.2052, + "step": 8030 + }, + { + "epoch": 0.37, + "learning_rate": 1.9340396605878772e-05, + "loss": 0.2435, + "step": 8035 + }, + { + "epoch": 0.38, + "learning_rate": 1.9339927804603632e-05, + "loss": 0.3957, + "step": 8040 + }, + { + "epoch": 0.38, + "learning_rate": 1.9339459003328492e-05, + "loss": 0.3356, + "step": 8045 + }, + { + "epoch": 0.38, + "learning_rate": 1.9338990202053352e-05, + "loss": 0.7917, + "step": 8050 + }, + { + "epoch": 0.38, + "learning_rate": 1.933852140077821e-05, + "loss": 0.3205, + "step": 8055 + }, + { + "epoch": 0.38, + "learning_rate": 1.933805259950307e-05, + "loss": 0.1124, + "step": 8060 + }, + { + "epoch": 0.38, + "learning_rate": 1.933758379822793e-05, + "loss": 0.1538, + "step": 8065 + }, + { + "epoch": 0.38, + "learning_rate": 1.933711499695279e-05, + "loss": 0.2043, + "step": 8070 + }, + { + "epoch": 0.38, + "learning_rate": 1.933664619567765e-05, + "loss": 0.1682, + "step": 8075 + }, + { + "epoch": 0.38, + "learning_rate": 1.9336177394402515e-05, + "loss": 0.2458, + "step": 8080 + }, + { + "epoch": 0.38, + "learning_rate": 1.9335708593127375e-05, + "loss": 0.402, + "step": 8085 + }, + { + "epoch": 0.38, + "learning_rate": 1.9335239791852235e-05, + "loss": 0.4126, + "step": 8090 + }, + { + "epoch": 0.38, + "learning_rate": 1.9334770990577098e-05, + "loss": 0.4594, + "step": 8095 + }, + { + "epoch": 0.38, + "learning_rate": 1.9334302189301958e-05, + "loss": 0.6692, + "step": 8100 + }, + { + "epoch": 0.38, + "learning_rate": 1.9333833388026818e-05, + "loss": 0.3356, + "step": 8105 + }, + { + "epoch": 0.38, + "learning_rate": 1.9333364586751678e-05, + "loss": 0.1183, + "step": 8110 + }, + { + "epoch": 0.38, + "learning_rate": 1.9332895785476538e-05, + "loss": 0.22, + "step": 8115 + }, + { + "epoch": 0.38, + "learning_rate": 1.9332426984201398e-05, + "loss": 0.1996, + "step": 8120 + }, + { + "epoch": 0.38, + "learning_rate": 1.9331958182926257e-05, + "loss": 0.273, + "step": 8125 + }, + { + "epoch": 0.38, + "learning_rate": 1.933148938165112e-05, + "loss": 0.2092, + "step": 8130 + }, + { + "epoch": 0.38, + "learning_rate": 1.933102058037598e-05, + "loss": 0.2448, + "step": 8135 + }, + { + "epoch": 0.38, + "learning_rate": 1.933055177910084e-05, + "loss": 0.2191, + "step": 8140 + }, + { + "epoch": 0.38, + "learning_rate": 1.93300829778257e-05, + "loss": 0.4742, + "step": 8145 + }, + { + "epoch": 0.38, + "learning_rate": 1.932961417655056e-05, + "loss": 0.8307, + "step": 8150 + }, + { + "epoch": 0.38, + "learning_rate": 1.932914537527542e-05, + "loss": 0.2751, + "step": 8155 + }, + { + "epoch": 0.38, + "learning_rate": 1.9328676574000284e-05, + "loss": 0.176, + "step": 8160 + }, + { + "epoch": 0.38, + "learning_rate": 1.9328207772725144e-05, + "loss": 0.1733, + "step": 8165 + }, + { + "epoch": 0.38, + "learning_rate": 1.9327738971450004e-05, + "loss": 0.2006, + "step": 8170 + }, + { + "epoch": 0.38, + "learning_rate": 1.9327270170174867e-05, + "loss": 0.2388, + "step": 8175 + }, + { + "epoch": 0.38, + "learning_rate": 1.9326801368899727e-05, + "loss": 0.3035, + "step": 8180 + }, + { + "epoch": 0.38, + "learning_rate": 1.9326332567624587e-05, + "loss": 0.2985, + "step": 8185 + }, + { + "epoch": 0.38, + "learning_rate": 1.9325863766349447e-05, + "loss": 0.3295, + "step": 8190 + }, + { + "epoch": 0.38, + "learning_rate": 1.9325394965074307e-05, + "loss": 0.4557, + "step": 8195 + }, + { + "epoch": 0.38, + "learning_rate": 1.9324926163799167e-05, + "loss": 0.5938, + "step": 8200 + }, + { + "epoch": 0.38, + "learning_rate": 1.9324457362524027e-05, + "loss": 0.3562, + "step": 8205 + }, + { + "epoch": 0.38, + "learning_rate": 1.9323988561248887e-05, + "loss": 0.1529, + "step": 8210 + }, + { + "epoch": 0.38, + "learning_rate": 1.9323519759973746e-05, + "loss": 0.1482, + "step": 8215 + }, + { + "epoch": 0.38, + "learning_rate": 1.932305095869861e-05, + "loss": 0.2249, + "step": 8220 + }, + { + "epoch": 0.38, + "learning_rate": 1.932258215742347e-05, + "loss": 0.2227, + "step": 8225 + }, + { + "epoch": 0.38, + "learning_rate": 1.932211335614833e-05, + "loss": 0.2618, + "step": 8230 + }, + { + "epoch": 0.38, + "learning_rate": 1.932164455487319e-05, + "loss": 0.2611, + "step": 8235 + }, + { + "epoch": 0.38, + "learning_rate": 1.9321175753598053e-05, + "loss": 0.2406, + "step": 8240 + }, + { + "epoch": 0.38, + "learning_rate": 1.9320706952322913e-05, + "loss": 0.3629, + "step": 8245 + }, + { + "epoch": 0.38, + "learning_rate": 1.9320238151047773e-05, + "loss": 0.4992, + "step": 8250 + }, + { + "epoch": 0.39, + "learning_rate": 1.9319769349772633e-05, + "loss": 0.3151, + "step": 8255 + }, + { + "epoch": 0.39, + "learning_rate": 1.9319300548497493e-05, + "loss": 0.1345, + "step": 8260 + }, + { + "epoch": 0.39, + "learning_rate": 1.9318831747222353e-05, + "loss": 0.1521, + "step": 8265 + }, + { + "epoch": 0.39, + "learning_rate": 1.9318362945947216e-05, + "loss": 0.1733, + "step": 8270 + }, + { + "epoch": 0.39, + "learning_rate": 1.9317894144672076e-05, + "loss": 0.2129, + "step": 8275 + }, + { + "epoch": 0.39, + "learning_rate": 1.9317425343396936e-05, + "loss": 0.2378, + "step": 8280 + }, + { + "epoch": 0.39, + "learning_rate": 1.9316956542121796e-05, + "loss": 0.2658, + "step": 8285 + }, + { + "epoch": 0.39, + "learning_rate": 1.9316487740846656e-05, + "loss": 0.287, + "step": 8290 + }, + { + "epoch": 0.39, + "learning_rate": 1.9316018939571516e-05, + "loss": 0.3836, + "step": 8295 + }, + { + "epoch": 0.39, + "learning_rate": 1.931555013829638e-05, + "loss": 0.6966, + "step": 8300 + }, + { + "epoch": 0.39, + "learning_rate": 1.931508133702124e-05, + "loss": 0.3483, + "step": 8305 + }, + { + "epoch": 0.39, + "learning_rate": 1.93146125357461e-05, + "loss": 0.1619, + "step": 8310 + }, + { + "epoch": 0.39, + "learning_rate": 1.931414373447096e-05, + "loss": 0.1387, + "step": 8315 + }, + { + "epoch": 0.39, + "learning_rate": 1.9313674933195822e-05, + "loss": 0.1587, + "step": 8320 + }, + { + "epoch": 0.39, + "learning_rate": 1.9313206131920682e-05, + "loss": 0.1447, + "step": 8325 + }, + { + "epoch": 0.39, + "learning_rate": 1.9312737330645542e-05, + "loss": 0.2997, + "step": 8330 + }, + { + "epoch": 0.39, + "learning_rate": 1.9312268529370402e-05, + "loss": 0.2873, + "step": 8335 + }, + { + "epoch": 0.39, + "learning_rate": 1.9311799728095262e-05, + "loss": 0.3927, + "step": 8340 + }, + { + "epoch": 0.39, + "learning_rate": 1.931133092682012e-05, + "loss": 0.3392, + "step": 8345 + }, + { + "epoch": 0.39, + "learning_rate": 1.931086212554498e-05, + "loss": 0.3994, + "step": 8350 + }, + { + "epoch": 0.39, + "learning_rate": 1.931039332426984e-05, + "loss": 0.2752, + "step": 8355 + }, + { + "epoch": 0.39, + "learning_rate": 1.9309924522994705e-05, + "loss": 0.1263, + "step": 8360 + }, + { + "epoch": 0.39, + "learning_rate": 1.9309455721719565e-05, + "loss": 0.1493, + "step": 8365 + }, + { + "epoch": 0.39, + "learning_rate": 1.9308986920444425e-05, + "loss": 0.19, + "step": 8370 + }, + { + "epoch": 0.39, + "learning_rate": 1.9308518119169285e-05, + "loss": 0.2094, + "step": 8375 + }, + { + "epoch": 0.39, + "learning_rate": 1.9308049317894148e-05, + "loss": 0.3174, + "step": 8380 + }, + { + "epoch": 0.39, + "learning_rate": 1.9307580516619008e-05, + "loss": 0.2557, + "step": 8385 + }, + { + "epoch": 0.39, + "learning_rate": 1.9307111715343868e-05, + "loss": 0.4019, + "step": 8390 + }, + { + "epoch": 0.39, + "learning_rate": 1.9306642914068728e-05, + "loss": 0.3886, + "step": 8395 + }, + { + "epoch": 0.39, + "learning_rate": 1.9306174112793588e-05, + "loss": 0.5243, + "step": 8400 + }, + { + "epoch": 0.39, + "learning_rate": 1.9305705311518448e-05, + "loss": 0.2447, + "step": 8405 + }, + { + "epoch": 0.39, + "learning_rate": 1.930523651024331e-05, + "loss": 0.1114, + "step": 8410 + }, + { + "epoch": 0.39, + "learning_rate": 1.930476770896817e-05, + "loss": 0.1295, + "step": 8415 + }, + { + "epoch": 0.39, + "learning_rate": 1.930429890769303e-05, + "loss": 0.2123, + "step": 8420 + }, + { + "epoch": 0.39, + "learning_rate": 1.930383010641789e-05, + "loss": 0.2388, + "step": 8425 + }, + { + "epoch": 0.39, + "learning_rate": 1.930336130514275e-05, + "loss": 0.194, + "step": 8430 + }, + { + "epoch": 0.39, + "learning_rate": 1.930289250386761e-05, + "loss": 0.2023, + "step": 8435 + }, + { + "epoch": 0.39, + "learning_rate": 1.930242370259247e-05, + "loss": 0.3366, + "step": 8440 + }, + { + "epoch": 0.39, + "learning_rate": 1.9301954901317334e-05, + "loss": 0.4688, + "step": 8445 + }, + { + "epoch": 0.39, + "learning_rate": 1.9301486100042194e-05, + "loss": 0.5275, + "step": 8450 + }, + { + "epoch": 0.39, + "learning_rate": 1.9301017298767054e-05, + "loss": 0.198, + "step": 8455 + }, + { + "epoch": 0.39, + "learning_rate": 1.9300548497491917e-05, + "loss": 0.1555, + "step": 8460 + }, + { + "epoch": 0.39, + "learning_rate": 1.9300079696216777e-05, + "loss": 0.1445, + "step": 8465 + }, + { + "epoch": 0.4, + "learning_rate": 1.9299610894941637e-05, + "loss": 0.1536, + "step": 8470 + }, + { + "epoch": 0.4, + "learning_rate": 1.9299142093666497e-05, + "loss": 0.2828, + "step": 8475 + }, + { + "epoch": 0.4, + "learning_rate": 1.9298673292391357e-05, + "loss": 0.2771, + "step": 8480 + }, + { + "epoch": 0.4, + "learning_rate": 1.9298204491116217e-05, + "loss": 0.2566, + "step": 8485 + }, + { + "epoch": 0.4, + "learning_rate": 1.9297735689841077e-05, + "loss": 0.2707, + "step": 8490 + }, + { + "epoch": 0.4, + "learning_rate": 1.9297266888565937e-05, + "loss": 0.3241, + "step": 8495 + }, + { + "epoch": 0.4, + "learning_rate": 1.92967980872908e-05, + "loss": 0.7655, + "step": 8500 + }, + { + "epoch": 0.4, + "learning_rate": 1.929632928601566e-05, + "loss": 0.2513, + "step": 8505 + }, + { + "epoch": 0.4, + "learning_rate": 1.929586048474052e-05, + "loss": 0.1115, + "step": 8510 + }, + { + "epoch": 0.4, + "learning_rate": 1.929539168346538e-05, + "loss": 0.1718, + "step": 8515 + }, + { + "epoch": 0.4, + "learning_rate": 1.929492288219024e-05, + "loss": 0.1965, + "step": 8520 + }, + { + "epoch": 0.4, + "learning_rate": 1.9294454080915103e-05, + "loss": 0.2988, + "step": 8525 + }, + { + "epoch": 0.4, + "learning_rate": 1.9293985279639963e-05, + "loss": 0.243, + "step": 8530 + }, + { + "epoch": 0.4, + "learning_rate": 1.9293516478364823e-05, + "loss": 0.2191, + "step": 8535 + }, + { + "epoch": 0.4, + "learning_rate": 1.9293047677089683e-05, + "loss": 0.3439, + "step": 8540 + }, + { + "epoch": 0.4, + "learning_rate": 1.9292578875814543e-05, + "loss": 0.422, + "step": 8545 + }, + { + "epoch": 0.4, + "learning_rate": 1.9292110074539406e-05, + "loss": 0.6352, + "step": 8550 + }, + { + "epoch": 0.4, + "learning_rate": 1.9291641273264266e-05, + "loss": 0.2785, + "step": 8555 + }, + { + "epoch": 0.4, + "learning_rate": 1.9291172471989126e-05, + "loss": 0.1242, + "step": 8560 + }, + { + "epoch": 0.4, + "learning_rate": 1.9290703670713986e-05, + "loss": 0.1312, + "step": 8565 + }, + { + "epoch": 0.4, + "learning_rate": 1.9290234869438846e-05, + "loss": 0.1535, + "step": 8570 + }, + { + "epoch": 0.4, + "learning_rate": 1.9289766068163706e-05, + "loss": 0.2025, + "step": 8575 + }, + { + "epoch": 0.4, + "learning_rate": 1.9289297266888566e-05, + "loss": 0.2068, + "step": 8580 + }, + { + "epoch": 0.4, + "learning_rate": 1.9288828465613426e-05, + "loss": 0.344, + "step": 8585 + }, + { + "epoch": 0.4, + "learning_rate": 1.928835966433829e-05, + "loss": 0.2893, + "step": 8590 + }, + { + "epoch": 0.4, + "learning_rate": 1.928789086306315e-05, + "loss": 0.5662, + "step": 8595 + }, + { + "epoch": 0.4, + "learning_rate": 1.928742206178801e-05, + "loss": 0.5428, + "step": 8600 + }, + { + "epoch": 0.4, + "learning_rate": 1.9286953260512872e-05, + "loss": 0.227, + "step": 8605 + }, + { + "epoch": 0.4, + "learning_rate": 1.9286484459237732e-05, + "loss": 0.1013, + "step": 8610 + }, + { + "epoch": 0.4, + "learning_rate": 1.9286015657962592e-05, + "loss": 0.1645, + "step": 8615 + }, + { + "epoch": 0.4, + "learning_rate": 1.9285546856687452e-05, + "loss": 0.1979, + "step": 8620 + }, + { + "epoch": 0.4, + "learning_rate": 1.9285078055412312e-05, + "loss": 0.2604, + "step": 8625 + }, + { + "epoch": 0.4, + "learning_rate": 1.9284609254137172e-05, + "loss": 0.2365, + "step": 8630 + }, + { + "epoch": 0.4, + "learning_rate": 1.9284140452862032e-05, + "loss": 0.2302, + "step": 8635 + }, + { + "epoch": 0.4, + "learning_rate": 1.9283671651586895e-05, + "loss": 0.2653, + "step": 8640 + }, + { + "epoch": 0.4, + "learning_rate": 1.9283202850311755e-05, + "loss": 0.3308, + "step": 8645 + }, + { + "epoch": 0.4, + "learning_rate": 1.9282734049036615e-05, + "loss": 0.5864, + "step": 8650 + }, + { + "epoch": 0.4, + "learning_rate": 1.9282265247761475e-05, + "loss": 0.3617, + "step": 8655 + }, + { + "epoch": 0.4, + "learning_rate": 1.9281796446486335e-05, + "loss": 0.1467, + "step": 8660 + }, + { + "epoch": 0.4, + "learning_rate": 1.9281327645211195e-05, + "loss": 0.1168, + "step": 8665 + }, + { + "epoch": 0.4, + "learning_rate": 1.9280858843936058e-05, + "loss": 0.2285, + "step": 8670 + }, + { + "epoch": 0.4, + "learning_rate": 1.9280390042660918e-05, + "loss": 0.2232, + "step": 8675 + }, + { + "epoch": 0.41, + "learning_rate": 1.9279921241385778e-05, + "loss": 0.1808, + "step": 8680 + }, + { + "epoch": 0.41, + "learning_rate": 1.927945244011064e-05, + "loss": 0.2971, + "step": 8685 + }, + { + "epoch": 0.41, + "learning_rate": 1.92789836388355e-05, + "loss": 0.2418, + "step": 8690 + }, + { + "epoch": 0.41, + "learning_rate": 1.927851483756036e-05, + "loss": 0.3725, + "step": 8695 + }, + { + "epoch": 0.41, + "learning_rate": 1.927804603628522e-05, + "loss": 0.6266, + "step": 8700 + }, + { + "epoch": 0.41, + "learning_rate": 1.927757723501008e-05, + "loss": 0.3189, + "step": 8705 + }, + { + "epoch": 0.41, + "learning_rate": 1.927710843373494e-05, + "loss": 0.0855, + "step": 8710 + }, + { + "epoch": 0.41, + "learning_rate": 1.92766396324598e-05, + "loss": 0.1166, + "step": 8715 + }, + { + "epoch": 0.41, + "learning_rate": 1.927617083118466e-05, + "loss": 0.2014, + "step": 8720 + }, + { + "epoch": 0.41, + "learning_rate": 1.927570202990952e-05, + "loss": 0.257, + "step": 8725 + }, + { + "epoch": 0.41, + "learning_rate": 1.9275233228634384e-05, + "loss": 0.3014, + "step": 8730 + }, + { + "epoch": 0.41, + "learning_rate": 1.9274764427359244e-05, + "loss": 0.2917, + "step": 8735 + }, + { + "epoch": 0.41, + "learning_rate": 1.9274295626084104e-05, + "loss": 0.3215, + "step": 8740 + }, + { + "epoch": 0.41, + "learning_rate": 1.9273826824808964e-05, + "loss": 0.3349, + "step": 8745 + }, + { + "epoch": 0.41, + "learning_rate": 1.9273358023533827e-05, + "loss": 0.5432, + "step": 8750 + }, + { + "epoch": 0.41, + "learning_rate": 1.9272889222258687e-05, + "loss": 0.3159, + "step": 8755 + }, + { + "epoch": 0.41, + "learning_rate": 1.9272420420983547e-05, + "loss": 0.1048, + "step": 8760 + }, + { + "epoch": 0.41, + "learning_rate": 1.9271951619708407e-05, + "loss": 0.1627, + "step": 8765 + }, + { + "epoch": 0.41, + "learning_rate": 1.9271482818433267e-05, + "loss": 0.1638, + "step": 8770 + }, + { + "epoch": 0.41, + "learning_rate": 1.9271014017158127e-05, + "loss": 0.1727, + "step": 8775 + }, + { + "epoch": 0.41, + "learning_rate": 1.927054521588299e-05, + "loss": 0.1882, + "step": 8780 + }, + { + "epoch": 0.41, + "learning_rate": 1.927007641460785e-05, + "loss": 0.214, + "step": 8785 + }, + { + "epoch": 0.41, + "learning_rate": 1.926960761333271e-05, + "loss": 0.2846, + "step": 8790 + }, + { + "epoch": 0.41, + "learning_rate": 1.926913881205757e-05, + "loss": 0.4149, + "step": 8795 + }, + { + "epoch": 0.41, + "learning_rate": 1.926867001078243e-05, + "loss": 0.5301, + "step": 8800 + }, + { + "epoch": 0.41, + "learning_rate": 1.926820120950729e-05, + "loss": 0.2375, + "step": 8805 + }, + { + "epoch": 0.41, + "learning_rate": 1.9267732408232153e-05, + "loss": 0.1747, + "step": 8810 + }, + { + "epoch": 0.41, + "learning_rate": 1.9267263606957013e-05, + "loss": 0.1291, + "step": 8815 + }, + { + "epoch": 0.41, + "learning_rate": 1.9266794805681873e-05, + "loss": 0.1782, + "step": 8820 + }, + { + "epoch": 0.41, + "learning_rate": 1.9266326004406736e-05, + "loss": 0.1866, + "step": 8825 + }, + { + "epoch": 0.41, + "learning_rate": 1.9265857203131596e-05, + "loss": 0.2627, + "step": 8830 + }, + { + "epoch": 0.41, + "learning_rate": 1.9265388401856456e-05, + "loss": 0.3373, + "step": 8835 + }, + { + "epoch": 0.41, + "learning_rate": 1.9264919600581316e-05, + "loss": 0.3501, + "step": 8840 + }, + { + "epoch": 0.41, + "learning_rate": 1.9264450799306176e-05, + "loss": 0.3043, + "step": 8845 + }, + { + "epoch": 0.41, + "learning_rate": 1.9263981998031036e-05, + "loss": 0.5005, + "step": 8850 + }, + { + "epoch": 0.41, + "learning_rate": 1.9263513196755896e-05, + "loss": 0.2497, + "step": 8855 + }, + { + "epoch": 0.41, + "learning_rate": 1.9263044395480756e-05, + "loss": 0.1362, + "step": 8860 + }, + { + "epoch": 0.41, + "learning_rate": 1.9262575594205616e-05, + "loss": 0.1199, + "step": 8865 + }, + { + "epoch": 0.41, + "learning_rate": 1.9262106792930476e-05, + "loss": 0.1285, + "step": 8870 + }, + { + "epoch": 0.41, + "learning_rate": 1.926163799165534e-05, + "loss": 0.1813, + "step": 8875 + }, + { + "epoch": 0.41, + "learning_rate": 1.92611691903802e-05, + "loss": 0.1687, + "step": 8880 + }, + { + "epoch": 0.41, + "learning_rate": 1.926070038910506e-05, + "loss": 0.2707, + "step": 8885 + }, + { + "epoch": 0.41, + "learning_rate": 1.9260231587829922e-05, + "loss": 0.3641, + "step": 8890 + }, + { + "epoch": 0.42, + "learning_rate": 1.9259762786554782e-05, + "loss": 0.4102, + "step": 8895 + }, + { + "epoch": 0.42, + "learning_rate": 1.9259293985279642e-05, + "loss": 0.5151, + "step": 8900 + }, + { + "epoch": 0.42, + "learning_rate": 1.9258825184004502e-05, + "loss": 0.3642, + "step": 8905 + }, + { + "epoch": 0.42, + "learning_rate": 1.9258356382729362e-05, + "loss": 0.102, + "step": 8910 + }, + { + "epoch": 0.42, + "learning_rate": 1.9257887581454222e-05, + "loss": 0.1298, + "step": 8915 + }, + { + "epoch": 0.42, + "learning_rate": 1.9257418780179085e-05, + "loss": 0.1646, + "step": 8920 + }, + { + "epoch": 0.42, + "learning_rate": 1.9256949978903945e-05, + "loss": 0.2703, + "step": 8925 + }, + { + "epoch": 0.42, + "learning_rate": 1.9256481177628805e-05, + "loss": 0.2339, + "step": 8930 + }, + { + "epoch": 0.42, + "learning_rate": 1.9256012376353665e-05, + "loss": 0.2508, + "step": 8935 + }, + { + "epoch": 0.42, + "learning_rate": 1.9255543575078525e-05, + "loss": 0.2826, + "step": 8940 + }, + { + "epoch": 0.42, + "learning_rate": 1.9255074773803385e-05, + "loss": 0.4002, + "step": 8945 + }, + { + "epoch": 0.42, + "learning_rate": 1.9254605972528245e-05, + "loss": 0.5675, + "step": 8950 + }, + { + "epoch": 0.42, + "learning_rate": 1.9254137171253108e-05, + "loss": 0.2835, + "step": 8955 + }, + { + "epoch": 0.42, + "learning_rate": 1.9253668369977968e-05, + "loss": 0.0916, + "step": 8960 + }, + { + "epoch": 0.42, + "learning_rate": 1.9253199568702828e-05, + "loss": 0.1194, + "step": 8965 + }, + { + "epoch": 0.42, + "learning_rate": 1.925273076742769e-05, + "loss": 0.1408, + "step": 8970 + }, + { + "epoch": 0.42, + "learning_rate": 1.925226196615255e-05, + "loss": 0.1439, + "step": 8975 + }, + { + "epoch": 0.42, + "learning_rate": 1.925179316487741e-05, + "loss": 0.2504, + "step": 8980 + }, + { + "epoch": 0.42, + "learning_rate": 1.925132436360227e-05, + "loss": 0.3225, + "step": 8985 + }, + { + "epoch": 0.42, + "learning_rate": 1.925085556232713e-05, + "loss": 0.3019, + "step": 8990 + }, + { + "epoch": 0.42, + "learning_rate": 1.925038676105199e-05, + "loss": 0.5128, + "step": 8995 + }, + { + "epoch": 0.42, + "learning_rate": 1.924991795977685e-05, + "loss": 0.744, + "step": 9000 + }, + { + "epoch": 0.42, + "learning_rate": 1.924944915850171e-05, + "loss": 0.3016, + "step": 9005 + }, + { + "epoch": 0.42, + "learning_rate": 1.9248980357226574e-05, + "loss": 0.1325, + "step": 9010 + }, + { + "epoch": 0.42, + "learning_rate": 1.9248511555951434e-05, + "loss": 0.125, + "step": 9015 + }, + { + "epoch": 0.42, + "learning_rate": 1.9248042754676294e-05, + "loss": 0.1749, + "step": 9020 + }, + { + "epoch": 0.42, + "learning_rate": 1.9247573953401154e-05, + "loss": 0.2229, + "step": 9025 + }, + { + "epoch": 0.42, + "learning_rate": 1.9247105152126014e-05, + "loss": 0.1848, + "step": 9030 + }, + { + "epoch": 0.42, + "learning_rate": 1.9246636350850877e-05, + "loss": 0.2923, + "step": 9035 + }, + { + "epoch": 0.42, + "learning_rate": 1.9246167549575737e-05, + "loss": 0.3329, + "step": 9040 + }, + { + "epoch": 0.42, + "learning_rate": 1.9245698748300597e-05, + "loss": 0.3743, + "step": 9045 + }, + { + "epoch": 0.42, + "learning_rate": 1.9245229947025457e-05, + "loss": 0.6587, + "step": 9050 + }, + { + "epoch": 0.42, + "learning_rate": 1.9244761145750317e-05, + "loss": 0.2159, + "step": 9055 + }, + { + "epoch": 0.42, + "learning_rate": 1.924429234447518e-05, + "loss": 0.1817, + "step": 9060 + }, + { + "epoch": 0.42, + "learning_rate": 1.924382354320004e-05, + "loss": 0.1316, + "step": 9065 + }, + { + "epoch": 0.42, + "learning_rate": 1.92433547419249e-05, + "loss": 0.1602, + "step": 9070 + }, + { + "epoch": 0.42, + "learning_rate": 1.924288594064976e-05, + "loss": 0.1972, + "step": 9075 + }, + { + "epoch": 0.42, + "learning_rate": 1.924241713937462e-05, + "loss": 0.2425, + "step": 9080 + }, + { + "epoch": 0.42, + "learning_rate": 1.924194833809948e-05, + "loss": 0.3043, + "step": 9085 + }, + { + "epoch": 0.42, + "learning_rate": 1.924147953682434e-05, + "loss": 0.3386, + "step": 9090 + }, + { + "epoch": 0.42, + "learning_rate": 1.92410107355492e-05, + "loss": 0.5037, + "step": 9095 + }, + { + "epoch": 0.42, + "learning_rate": 1.9240541934274063e-05, + "loss": 0.8734, + "step": 9100 + }, + { + "epoch": 0.42, + "learning_rate": 1.9240073132998923e-05, + "loss": 0.34, + "step": 9105 + }, + { + "epoch": 0.43, + "learning_rate": 1.9239604331723783e-05, + "loss": 0.0871, + "step": 9110 + }, + { + "epoch": 0.43, + "learning_rate": 1.9239135530448646e-05, + "loss": 0.1388, + "step": 9115 + }, + { + "epoch": 0.43, + "learning_rate": 1.9238666729173506e-05, + "loss": 0.1693, + "step": 9120 + }, + { + "epoch": 0.43, + "learning_rate": 1.9238197927898366e-05, + "loss": 0.2853, + "step": 9125 + }, + { + "epoch": 0.43, + "learning_rate": 1.9237729126623226e-05, + "loss": 0.313, + "step": 9130 + }, + { + "epoch": 0.43, + "learning_rate": 1.9237260325348086e-05, + "loss": 0.2605, + "step": 9135 + }, + { + "epoch": 0.43, + "learning_rate": 1.9236791524072946e-05, + "loss": 0.2986, + "step": 9140 + }, + { + "epoch": 0.43, + "learning_rate": 1.9236322722797806e-05, + "loss": 0.4476, + "step": 9145 + }, + { + "epoch": 0.43, + "learning_rate": 1.923585392152267e-05, + "loss": 0.6561, + "step": 9150 + }, + { + "epoch": 0.43, + "learning_rate": 1.923538512024753e-05, + "loss": 0.3252, + "step": 9155 + }, + { + "epoch": 0.43, + "learning_rate": 1.923491631897239e-05, + "loss": 0.0688, + "step": 9160 + }, + { + "epoch": 0.43, + "learning_rate": 1.923444751769725e-05, + "loss": 0.1332, + "step": 9165 + }, + { + "epoch": 0.43, + "learning_rate": 1.923397871642211e-05, + "loss": 0.1325, + "step": 9170 + }, + { + "epoch": 0.43, + "learning_rate": 1.9233509915146972e-05, + "loss": 0.127, + "step": 9175 + }, + { + "epoch": 0.43, + "learning_rate": 1.9233041113871832e-05, + "loss": 0.1931, + "step": 9180 + }, + { + "epoch": 0.43, + "learning_rate": 1.9232572312596692e-05, + "loss": 0.1826, + "step": 9185 + }, + { + "epoch": 0.43, + "learning_rate": 1.9232103511321552e-05, + "loss": 0.3144, + "step": 9190 + }, + { + "epoch": 0.43, + "learning_rate": 1.9231634710046412e-05, + "loss": 0.3538, + "step": 9195 + }, + { + "epoch": 0.43, + "learning_rate": 1.9231165908771275e-05, + "loss": 0.5988, + "step": 9200 + }, + { + "epoch": 0.43, + "learning_rate": 1.9230697107496135e-05, + "loss": 0.3227, + "step": 9205 + }, + { + "epoch": 0.43, + "learning_rate": 1.9230228306220995e-05, + "loss": 0.1147, + "step": 9210 + }, + { + "epoch": 0.43, + "learning_rate": 1.9229759504945855e-05, + "loss": 0.133, + "step": 9215 + }, + { + "epoch": 0.43, + "learning_rate": 1.9229290703670715e-05, + "loss": 0.2695, + "step": 9220 + }, + { + "epoch": 0.43, + "learning_rate": 1.9228821902395575e-05, + "loss": 0.2266, + "step": 9225 + }, + { + "epoch": 0.43, + "learning_rate": 1.9228353101120435e-05, + "loss": 0.216, + "step": 9230 + }, + { + "epoch": 0.43, + "learning_rate": 1.9227884299845295e-05, + "loss": 0.3179, + "step": 9235 + }, + { + "epoch": 0.43, + "learning_rate": 1.9227415498570158e-05, + "loss": 0.2371, + "step": 9240 + }, + { + "epoch": 0.43, + "learning_rate": 1.9226946697295018e-05, + "loss": 0.4098, + "step": 9245 + }, + { + "epoch": 0.43, + "learning_rate": 1.9226477896019878e-05, + "loss": 0.533, + "step": 9250 + }, + { + "epoch": 0.43, + "learning_rate": 1.922600909474474e-05, + "loss": 0.2309, + "step": 9255 + }, + { + "epoch": 0.43, + "learning_rate": 1.92255402934696e-05, + "loss": 0.1296, + "step": 9260 + }, + { + "epoch": 0.43, + "learning_rate": 1.922507149219446e-05, + "loss": 0.1396, + "step": 9265 + }, + { + "epoch": 0.43, + "learning_rate": 1.922460269091932e-05, + "loss": 0.2446, + "step": 9270 + }, + { + "epoch": 0.43, + "learning_rate": 1.922413388964418e-05, + "loss": 0.1899, + "step": 9275 + }, + { + "epoch": 0.43, + "learning_rate": 1.922366508836904e-05, + "loss": 0.285, + "step": 9280 + }, + { + "epoch": 0.43, + "learning_rate": 1.92231962870939e-05, + "loss": 0.28, + "step": 9285 + }, + { + "epoch": 0.43, + "learning_rate": 1.9222727485818764e-05, + "loss": 0.2611, + "step": 9290 + }, + { + "epoch": 0.43, + "learning_rate": 1.9222258684543624e-05, + "loss": 0.424, + "step": 9295 + }, + { + "epoch": 0.43, + "learning_rate": 1.9221789883268484e-05, + "loss": 0.615, + "step": 9300 + }, + { + "epoch": 0.43, + "learning_rate": 1.9221321081993344e-05, + "loss": 0.2705, + "step": 9305 + }, + { + "epoch": 0.43, + "learning_rate": 1.9220852280718204e-05, + "loss": 0.0816, + "step": 9310 + }, + { + "epoch": 0.43, + "learning_rate": 1.9220383479443064e-05, + "loss": 0.1157, + "step": 9315 + }, + { + "epoch": 0.43, + "learning_rate": 1.9219914678167927e-05, + "loss": 0.1803, + "step": 9320 + }, + { + "epoch": 0.44, + "learning_rate": 1.9219445876892787e-05, + "loss": 0.1273, + "step": 9325 + }, + { + "epoch": 0.44, + "learning_rate": 1.9218977075617647e-05, + "loss": 0.269, + "step": 9330 + }, + { + "epoch": 0.44, + "learning_rate": 1.921850827434251e-05, + "loss": 0.1694, + "step": 9335 + }, + { + "epoch": 0.44, + "learning_rate": 1.921803947306737e-05, + "loss": 0.303, + "step": 9340 + }, + { + "epoch": 0.44, + "learning_rate": 1.921757067179223e-05, + "loss": 0.4144, + "step": 9345 + }, + { + "epoch": 0.44, + "learning_rate": 1.921710187051709e-05, + "loss": 0.4884, + "step": 9350 + }, + { + "epoch": 0.44, + "learning_rate": 1.921663306924195e-05, + "loss": 0.2585, + "step": 9355 + }, + { + "epoch": 0.44, + "learning_rate": 1.921616426796681e-05, + "loss": 0.0678, + "step": 9360 + }, + { + "epoch": 0.44, + "learning_rate": 1.921569546669167e-05, + "loss": 0.1916, + "step": 9365 + }, + { + "epoch": 0.44, + "learning_rate": 1.921522666541653e-05, + "loss": 0.1597, + "step": 9370 + }, + { + "epoch": 0.44, + "learning_rate": 1.921475786414139e-05, + "loss": 0.2067, + "step": 9375 + }, + { + "epoch": 0.44, + "learning_rate": 1.921428906286625e-05, + "loss": 0.1931, + "step": 9380 + }, + { + "epoch": 0.44, + "learning_rate": 1.9213820261591113e-05, + "loss": 0.427, + "step": 9385 + }, + { + "epoch": 0.44, + "learning_rate": 1.9213351460315973e-05, + "loss": 0.3432, + "step": 9390 + }, + { + "epoch": 0.44, + "learning_rate": 1.9212882659040833e-05, + "loss": 0.3622, + "step": 9395 + }, + { + "epoch": 0.44, + "learning_rate": 1.9212413857765696e-05, + "loss": 0.4917, + "step": 9400 + }, + { + "epoch": 0.44, + "learning_rate": 1.9211945056490556e-05, + "loss": 0.2831, + "step": 9405 + }, + { + "epoch": 0.44, + "learning_rate": 1.9211476255215416e-05, + "loss": 0.1006, + "step": 9410 + }, + { + "epoch": 0.44, + "learning_rate": 1.9211007453940276e-05, + "loss": 0.1673, + "step": 9415 + }, + { + "epoch": 0.44, + "learning_rate": 1.9210538652665136e-05, + "loss": 0.2262, + "step": 9420 + }, + { + "epoch": 0.44, + "learning_rate": 1.9210069851389996e-05, + "loss": 0.156, + "step": 9425 + }, + { + "epoch": 0.44, + "learning_rate": 1.920960105011486e-05, + "loss": 0.1828, + "step": 9430 + }, + { + "epoch": 0.44, + "learning_rate": 1.920913224883972e-05, + "loss": 0.1871, + "step": 9435 + }, + { + "epoch": 0.44, + "learning_rate": 1.920866344756458e-05, + "loss": 0.3073, + "step": 9440 + }, + { + "epoch": 0.44, + "learning_rate": 1.920819464628944e-05, + "loss": 0.4646, + "step": 9445 + }, + { + "epoch": 0.44, + "learning_rate": 1.92077258450143e-05, + "loss": 0.5954, + "step": 9450 + }, + { + "epoch": 0.44, + "learning_rate": 1.920725704373916e-05, + "loss": 0.2833, + "step": 9455 + }, + { + "epoch": 0.44, + "learning_rate": 1.920678824246402e-05, + "loss": 0.1231, + "step": 9460 + }, + { + "epoch": 0.44, + "learning_rate": 1.9206319441188882e-05, + "loss": 0.1253, + "step": 9465 + }, + { + "epoch": 0.44, + "learning_rate": 1.9205850639913742e-05, + "loss": 0.1852, + "step": 9470 + }, + { + "epoch": 0.44, + "learning_rate": 1.9205381838638602e-05, + "loss": 0.2212, + "step": 9475 + }, + { + "epoch": 0.44, + "learning_rate": 1.9204913037363466e-05, + "loss": 0.1587, + "step": 9480 + }, + { + "epoch": 0.44, + "learning_rate": 1.9204444236088325e-05, + "loss": 0.3125, + "step": 9485 + }, + { + "epoch": 0.44, + "learning_rate": 1.9203975434813185e-05, + "loss": 0.3626, + "step": 9490 + }, + { + "epoch": 0.44, + "learning_rate": 1.9203506633538045e-05, + "loss": 0.4767, + "step": 9495 + }, + { + "epoch": 0.44, + "learning_rate": 1.9203037832262905e-05, + "loss": 0.6986, + "step": 9500 + }, + { + "epoch": 0.44, + "learning_rate": 1.9202569030987765e-05, + "loss": 0.2779, + "step": 9505 + }, + { + "epoch": 0.44, + "learning_rate": 1.9202100229712625e-05, + "loss": 0.095, + "step": 9510 + }, + { + "epoch": 0.44, + "learning_rate": 1.9201631428437485e-05, + "loss": 0.1261, + "step": 9515 + }, + { + "epoch": 0.44, + "learning_rate": 1.9201162627162345e-05, + "loss": 0.1483, + "step": 9520 + }, + { + "epoch": 0.44, + "learning_rate": 1.920069382588721e-05, + "loss": 0.1876, + "step": 9525 + }, + { + "epoch": 0.44, + "learning_rate": 1.9200225024612068e-05, + "loss": 0.236, + "step": 9530 + }, + { + "epoch": 0.44, + "learning_rate": 1.9199756223336928e-05, + "loss": 0.2127, + "step": 9535 + }, + { + "epoch": 0.45, + "learning_rate": 1.9199287422061788e-05, + "loss": 0.2471, + "step": 9540 + }, + { + "epoch": 0.45, + "learning_rate": 1.919881862078665e-05, + "loss": 0.4438, + "step": 9545 + }, + { + "epoch": 0.45, + "learning_rate": 1.919834981951151e-05, + "loss": 0.5967, + "step": 9550 + }, + { + "epoch": 0.45, + "learning_rate": 1.919788101823637e-05, + "loss": 0.3444, + "step": 9555 + }, + { + "epoch": 0.45, + "learning_rate": 1.919741221696123e-05, + "loss": 0.142, + "step": 9560 + }, + { + "epoch": 0.45, + "learning_rate": 1.919694341568609e-05, + "loss": 0.0761, + "step": 9565 + }, + { + "epoch": 0.45, + "learning_rate": 1.9196474614410955e-05, + "loss": 0.2179, + "step": 9570 + }, + { + "epoch": 0.45, + "learning_rate": 1.9196005813135814e-05, + "loss": 0.1785, + "step": 9575 + }, + { + "epoch": 0.45, + "learning_rate": 1.9195537011860674e-05, + "loss": 0.2536, + "step": 9580 + }, + { + "epoch": 0.45, + "learning_rate": 1.9195068210585534e-05, + "loss": 0.1959, + "step": 9585 + }, + { + "epoch": 0.45, + "learning_rate": 1.9194599409310394e-05, + "loss": 0.2637, + "step": 9590 + }, + { + "epoch": 0.45, + "learning_rate": 1.9194130608035254e-05, + "loss": 0.4235, + "step": 9595 + }, + { + "epoch": 0.45, + "learning_rate": 1.9193661806760114e-05, + "loss": 0.3954, + "step": 9600 + }, + { + "epoch": 0.45, + "learning_rate": 1.9193193005484977e-05, + "loss": 0.2325, + "step": 9605 + }, + { + "epoch": 0.45, + "learning_rate": 1.9192724204209837e-05, + "loss": 0.1367, + "step": 9610 + }, + { + "epoch": 0.45, + "learning_rate": 1.9192255402934697e-05, + "loss": 0.1429, + "step": 9615 + }, + { + "epoch": 0.45, + "learning_rate": 1.9191786601659557e-05, + "loss": 0.1687, + "step": 9620 + }, + { + "epoch": 0.45, + "learning_rate": 1.919131780038442e-05, + "loss": 0.263, + "step": 9625 + }, + { + "epoch": 0.45, + "learning_rate": 1.919084899910928e-05, + "loss": 0.2288, + "step": 9630 + }, + { + "epoch": 0.45, + "learning_rate": 1.919038019783414e-05, + "loss": 0.2932, + "step": 9635 + }, + { + "epoch": 0.45, + "learning_rate": 1.9189911396559e-05, + "loss": 0.5055, + "step": 9640 + }, + { + "epoch": 0.45, + "learning_rate": 1.918944259528386e-05, + "loss": 0.5668, + "step": 9645 + }, + { + "epoch": 0.45, + "learning_rate": 1.918897379400872e-05, + "loss": 0.5284, + "step": 9650 + }, + { + "epoch": 0.45, + "learning_rate": 1.918850499273358e-05, + "loss": 0.3792, + "step": 9655 + }, + { + "epoch": 0.45, + "learning_rate": 1.9188036191458443e-05, + "loss": 0.1393, + "step": 9660 + }, + { + "epoch": 0.45, + "learning_rate": 1.9187567390183303e-05, + "loss": 0.147, + "step": 9665 + }, + { + "epoch": 0.45, + "learning_rate": 1.9187098588908163e-05, + "loss": 0.1659, + "step": 9670 + }, + { + "epoch": 0.45, + "learning_rate": 1.9186629787633023e-05, + "loss": 0.1626, + "step": 9675 + }, + { + "epoch": 0.45, + "learning_rate": 1.9186160986357883e-05, + "loss": 0.1592, + "step": 9680 + }, + { + "epoch": 0.45, + "learning_rate": 1.9185692185082747e-05, + "loss": 0.2789, + "step": 9685 + }, + { + "epoch": 0.45, + "learning_rate": 1.9185223383807606e-05, + "loss": 0.2428, + "step": 9690 + }, + { + "epoch": 0.45, + "learning_rate": 1.9184754582532466e-05, + "loss": 0.3408, + "step": 9695 + }, + { + "epoch": 0.45, + "learning_rate": 1.9184285781257326e-05, + "loss": 0.6366, + "step": 9700 + }, + { + "epoch": 0.45, + "learning_rate": 1.9183816979982186e-05, + "loss": 0.2249, + "step": 9705 + }, + { + "epoch": 0.45, + "learning_rate": 1.918334817870705e-05, + "loss": 0.1138, + "step": 9710 + }, + { + "epoch": 0.45, + "learning_rate": 1.918287937743191e-05, + "loss": 0.1484, + "step": 9715 + }, + { + "epoch": 0.45, + "learning_rate": 1.918241057615677e-05, + "loss": 0.1745, + "step": 9720 + }, + { + "epoch": 0.45, + "learning_rate": 1.918194177488163e-05, + "loss": 0.1442, + "step": 9725 + }, + { + "epoch": 0.45, + "learning_rate": 1.918147297360649e-05, + "loss": 0.2449, + "step": 9730 + }, + { + "epoch": 0.45, + "learning_rate": 1.918100417233135e-05, + "loss": 0.2956, + "step": 9735 + }, + { + "epoch": 0.45, + "learning_rate": 1.918053537105621e-05, + "loss": 0.3274, + "step": 9740 + }, + { + "epoch": 0.45, + "learning_rate": 1.918006656978107e-05, + "loss": 0.3507, + "step": 9745 + }, + { + "epoch": 0.45, + "learning_rate": 1.9179597768505932e-05, + "loss": 0.6847, + "step": 9750 + }, + { + "epoch": 0.46, + "learning_rate": 1.9179128967230792e-05, + "loss": 0.2816, + "step": 9755 + }, + { + "epoch": 0.46, + "learning_rate": 1.9178660165955652e-05, + "loss": 0.1874, + "step": 9760 + }, + { + "epoch": 0.46, + "learning_rate": 1.9178191364680516e-05, + "loss": 0.1852, + "step": 9765 + }, + { + "epoch": 0.46, + "learning_rate": 1.9177722563405376e-05, + "loss": 0.1936, + "step": 9770 + }, + { + "epoch": 0.46, + "learning_rate": 1.9177253762130236e-05, + "loss": 0.148, + "step": 9775 + }, + { + "epoch": 0.46, + "learning_rate": 1.9176784960855095e-05, + "loss": 0.2847, + "step": 9780 + }, + { + "epoch": 0.46, + "learning_rate": 1.9176316159579955e-05, + "loss": 0.3086, + "step": 9785 + }, + { + "epoch": 0.46, + "learning_rate": 1.9175847358304815e-05, + "loss": 0.2333, + "step": 9790 + }, + { + "epoch": 0.46, + "learning_rate": 1.9175378557029675e-05, + "loss": 0.4765, + "step": 9795 + }, + { + "epoch": 0.46, + "learning_rate": 1.917490975575454e-05, + "loss": 0.5674, + "step": 9800 + }, + { + "epoch": 0.46, + "learning_rate": 1.91744409544794e-05, + "loss": 0.2181, + "step": 9805 + }, + { + "epoch": 0.46, + "learning_rate": 1.917397215320426e-05, + "loss": 0.065, + "step": 9810 + }, + { + "epoch": 0.46, + "learning_rate": 1.917350335192912e-05, + "loss": 0.1668, + "step": 9815 + }, + { + "epoch": 0.46, + "learning_rate": 1.9173034550653978e-05, + "loss": 0.1574, + "step": 9820 + }, + { + "epoch": 0.46, + "learning_rate": 1.9172565749378838e-05, + "loss": 0.1636, + "step": 9825 + }, + { + "epoch": 0.46, + "learning_rate": 1.91720969481037e-05, + "loss": 0.2269, + "step": 9830 + }, + { + "epoch": 0.46, + "learning_rate": 1.917162814682856e-05, + "loss": 0.2636, + "step": 9835 + }, + { + "epoch": 0.46, + "learning_rate": 1.917115934555342e-05, + "loss": 0.1976, + "step": 9840 + }, + { + "epoch": 0.46, + "learning_rate": 1.917069054427828e-05, + "loss": 0.4929, + "step": 9845 + }, + { + "epoch": 0.46, + "learning_rate": 1.9170221743003145e-05, + "loss": 0.6642, + "step": 9850 + }, + { + "epoch": 0.46, + "learning_rate": 1.9169752941728005e-05, + "loss": 0.2061, + "step": 9855 + }, + { + "epoch": 0.46, + "learning_rate": 1.9169284140452865e-05, + "loss": 0.106, + "step": 9860 + }, + { + "epoch": 0.46, + "learning_rate": 1.9168815339177724e-05, + "loss": 0.1755, + "step": 9865 + }, + { + "epoch": 0.46, + "learning_rate": 1.9168346537902584e-05, + "loss": 0.1511, + "step": 9870 + }, + { + "epoch": 0.46, + "learning_rate": 1.9167877736627444e-05, + "loss": 0.2493, + "step": 9875 + }, + { + "epoch": 0.46, + "learning_rate": 1.9167408935352304e-05, + "loss": 0.309, + "step": 9880 + }, + { + "epoch": 0.46, + "learning_rate": 1.9166940134077164e-05, + "loss": 0.2733, + "step": 9885 + }, + { + "epoch": 0.46, + "learning_rate": 1.9166471332802024e-05, + "loss": 0.288, + "step": 9890 + }, + { + "epoch": 0.46, + "learning_rate": 1.9166002531526887e-05, + "loss": 0.3824, + "step": 9895 + }, + { + "epoch": 0.46, + "learning_rate": 1.9165533730251747e-05, + "loss": 0.6902, + "step": 9900 + }, + { + "epoch": 0.46, + "learning_rate": 1.9165064928976607e-05, + "loss": 0.2577, + "step": 9905 + }, + { + "epoch": 0.46, + "learning_rate": 1.916459612770147e-05, + "loss": 0.1099, + "step": 9910 + }, + { + "epoch": 0.46, + "learning_rate": 1.916412732642633e-05, + "loss": 0.1004, + "step": 9915 + }, + { + "epoch": 0.46, + "learning_rate": 1.916365852515119e-05, + "loss": 0.1233, + "step": 9920 + }, + { + "epoch": 0.46, + "learning_rate": 1.916318972387605e-05, + "loss": 0.1185, + "step": 9925 + }, + { + "epoch": 0.46, + "learning_rate": 1.916272092260091e-05, + "loss": 0.2188, + "step": 9930 + }, + { + "epoch": 0.46, + "learning_rate": 1.916225212132577e-05, + "loss": 0.2539, + "step": 9935 + }, + { + "epoch": 0.46, + "learning_rate": 1.9161783320050634e-05, + "loss": 0.2565, + "step": 9940 + }, + { + "epoch": 0.46, + "learning_rate": 1.9161314518775494e-05, + "loss": 0.4717, + "step": 9945 + }, + { + "epoch": 0.46, + "learning_rate": 1.9160845717500354e-05, + "loss": 0.493, + "step": 9950 + }, + { + "epoch": 0.46, + "learning_rate": 1.9160376916225213e-05, + "loss": 0.2629, + "step": 9955 + }, + { + "epoch": 0.46, + "learning_rate": 1.9159908114950073e-05, + "loss": 0.07, + "step": 9960 + }, + { + "epoch": 0.46, + "learning_rate": 1.9159439313674933e-05, + "loss": 0.1622, + "step": 9965 + }, + { + "epoch": 0.47, + "learning_rate": 1.9158970512399793e-05, + "loss": 0.2093, + "step": 9970 + }, + { + "epoch": 0.47, + "learning_rate": 1.9158501711124657e-05, + "loss": 0.1833, + "step": 9975 + }, + { + "epoch": 0.47, + "learning_rate": 1.9158032909849516e-05, + "loss": 0.2507, + "step": 9980 + }, + { + "epoch": 0.47, + "learning_rate": 1.9157564108574376e-05, + "loss": 0.3487, + "step": 9985 + }, + { + "epoch": 0.47, + "learning_rate": 1.915709530729924e-05, + "loss": 0.3129, + "step": 9990 + }, + { + "epoch": 0.47, + "learning_rate": 1.91566265060241e-05, + "loss": 0.4036, + "step": 9995 + }, + { + "epoch": 0.47, + "learning_rate": 1.915615770474896e-05, + "loss": 0.5754, + "step": 10000 + }, + { + "epoch": 0.47, + "learning_rate": 1.915568890347382e-05, + "loss": 0.2006, + "step": 10005 + }, + { + "epoch": 0.47, + "learning_rate": 1.915522010219868e-05, + "loss": 0.1141, + "step": 10010 + }, + { + "epoch": 0.47, + "learning_rate": 1.915475130092354e-05, + "loss": 0.1682, + "step": 10015 + }, + { + "epoch": 0.47, + "learning_rate": 1.91542824996484e-05, + "loss": 0.1504, + "step": 10020 + }, + { + "epoch": 0.47, + "learning_rate": 1.915381369837326e-05, + "loss": 0.2555, + "step": 10025 + }, + { + "epoch": 0.47, + "learning_rate": 1.915334489709812e-05, + "loss": 0.2481, + "step": 10030 + }, + { + "epoch": 0.47, + "learning_rate": 1.9152876095822983e-05, + "loss": 0.1897, + "step": 10035 + }, + { + "epoch": 0.47, + "learning_rate": 1.9152407294547842e-05, + "loss": 0.2689, + "step": 10040 + }, + { + "epoch": 0.47, + "learning_rate": 1.9151938493272702e-05, + "loss": 0.4531, + "step": 10045 + }, + { + "epoch": 0.47, + "learning_rate": 1.9151469691997562e-05, + "loss": 0.6491, + "step": 10050 + }, + { + "epoch": 0.47, + "learning_rate": 1.9151000890722426e-05, + "loss": 0.2868, + "step": 10055 + }, + { + "epoch": 0.47, + "learning_rate": 1.9150532089447286e-05, + "loss": 0.0885, + "step": 10060 + }, + { + "epoch": 0.47, + "learning_rate": 1.9150063288172146e-05, + "loss": 0.1029, + "step": 10065 + }, + { + "epoch": 0.47, + "learning_rate": 1.9149594486897005e-05, + "loss": 0.1705, + "step": 10070 + }, + { + "epoch": 0.47, + "learning_rate": 1.9149125685621865e-05, + "loss": 0.2066, + "step": 10075 + }, + { + "epoch": 0.47, + "learning_rate": 1.914865688434673e-05, + "loss": 0.2518, + "step": 10080 + }, + { + "epoch": 0.47, + "learning_rate": 1.914818808307159e-05, + "loss": 0.1692, + "step": 10085 + }, + { + "epoch": 0.47, + "learning_rate": 1.914771928179645e-05, + "loss": 0.3126, + "step": 10090 + }, + { + "epoch": 0.47, + "learning_rate": 1.914725048052131e-05, + "loss": 0.5543, + "step": 10095 + }, + { + "epoch": 0.47, + "learning_rate": 1.914678167924617e-05, + "loss": 0.8418, + "step": 10100 + }, + { + "epoch": 0.47, + "learning_rate": 1.914631287797103e-05, + "loss": 0.2604, + "step": 10105 + }, + { + "epoch": 0.47, + "learning_rate": 1.914584407669589e-05, + "loss": 0.0586, + "step": 10110 + }, + { + "epoch": 0.47, + "learning_rate": 1.914537527542075e-05, + "loss": 0.1234, + "step": 10115 + }, + { + "epoch": 0.47, + "learning_rate": 1.914490647414561e-05, + "loss": 0.1425, + "step": 10120 + }, + { + "epoch": 0.47, + "learning_rate": 1.914443767287047e-05, + "loss": 0.2744, + "step": 10125 + }, + { + "epoch": 0.47, + "learning_rate": 1.914396887159533e-05, + "loss": 0.2341, + "step": 10130 + }, + { + "epoch": 0.47, + "learning_rate": 1.9143500070320195e-05, + "loss": 0.3112, + "step": 10135 + }, + { + "epoch": 0.47, + "learning_rate": 1.9143031269045055e-05, + "loss": 0.301, + "step": 10140 + }, + { + "epoch": 0.47, + "learning_rate": 1.9142562467769915e-05, + "loss": 0.4034, + "step": 10145 + }, + { + "epoch": 0.47, + "learning_rate": 1.9142093666494775e-05, + "loss": 0.4897, + "step": 10150 + }, + { + "epoch": 0.47, + "learning_rate": 1.9141624865219635e-05, + "loss": 0.2841, + "step": 10155 + }, + { + "epoch": 0.47, + "learning_rate": 1.9141156063944494e-05, + "loss": 0.1272, + "step": 10160 + }, + { + "epoch": 0.47, + "learning_rate": 1.9140687262669354e-05, + "loss": 0.095, + "step": 10165 + }, + { + "epoch": 0.47, + "learning_rate": 1.9140218461394214e-05, + "loss": 0.2431, + "step": 10170 + }, + { + "epoch": 0.47, + "learning_rate": 1.9139749660119078e-05, + "loss": 0.185, + "step": 10175 + }, + { + "epoch": 0.48, + "learning_rate": 1.9139280858843938e-05, + "loss": 0.1765, + "step": 10180 + }, + { + "epoch": 0.48, + "learning_rate": 1.9138812057568797e-05, + "loss": 0.1876, + "step": 10185 + }, + { + "epoch": 0.48, + "learning_rate": 1.9138343256293657e-05, + "loss": 0.205, + "step": 10190 + }, + { + "epoch": 0.48, + "learning_rate": 1.913787445501852e-05, + "loss": 0.2962, + "step": 10195 + }, + { + "epoch": 0.48, + "learning_rate": 1.913740565374338e-05, + "loss": 0.6509, + "step": 10200 + }, + { + "epoch": 0.48, + "learning_rate": 1.913693685246824e-05, + "loss": 0.2717, + "step": 10205 + }, + { + "epoch": 0.48, + "learning_rate": 1.91364680511931e-05, + "loss": 0.1069, + "step": 10210 + }, + { + "epoch": 0.48, + "learning_rate": 1.913599924991796e-05, + "loss": 0.2321, + "step": 10215 + }, + { + "epoch": 0.48, + "learning_rate": 1.9135530448642824e-05, + "loss": 0.1316, + "step": 10220 + }, + { + "epoch": 0.48, + "learning_rate": 1.9135061647367684e-05, + "loss": 0.1441, + "step": 10225 + }, + { + "epoch": 0.48, + "learning_rate": 1.9134592846092544e-05, + "loss": 0.2482, + "step": 10230 + }, + { + "epoch": 0.48, + "learning_rate": 1.9134124044817404e-05, + "loss": 0.3006, + "step": 10235 + }, + { + "epoch": 0.48, + "learning_rate": 1.9133655243542264e-05, + "loss": 0.4242, + "step": 10240 + }, + { + "epoch": 0.48, + "learning_rate": 1.9133186442267123e-05, + "loss": 0.3968, + "step": 10245 + }, + { + "epoch": 0.48, + "learning_rate": 1.9132717640991983e-05, + "loss": 0.5792, + "step": 10250 + }, + { + "epoch": 0.48, + "learning_rate": 1.9132248839716843e-05, + "loss": 0.3714, + "step": 10255 + }, + { + "epoch": 0.48, + "learning_rate": 1.9131780038441707e-05, + "loss": 0.1612, + "step": 10260 + }, + { + "epoch": 0.48, + "learning_rate": 1.9131311237166567e-05, + "loss": 0.1666, + "step": 10265 + }, + { + "epoch": 0.48, + "learning_rate": 1.9130842435891427e-05, + "loss": 0.2056, + "step": 10270 + }, + { + "epoch": 0.48, + "learning_rate": 1.913037363461629e-05, + "loss": 0.2004, + "step": 10275 + }, + { + "epoch": 0.48, + "learning_rate": 1.912990483334115e-05, + "loss": 0.2485, + "step": 10280 + }, + { + "epoch": 0.48, + "learning_rate": 1.912943603206601e-05, + "loss": 0.2626, + "step": 10285 + }, + { + "epoch": 0.48, + "learning_rate": 1.912896723079087e-05, + "loss": 0.3242, + "step": 10290 + }, + { + "epoch": 0.48, + "learning_rate": 1.912849842951573e-05, + "loss": 0.3341, + "step": 10295 + }, + { + "epoch": 0.48, + "learning_rate": 1.912802962824059e-05, + "loss": 0.5659, + "step": 10300 + }, + { + "epoch": 0.48, + "learning_rate": 1.912756082696545e-05, + "loss": 0.2671, + "step": 10305 + }, + { + "epoch": 0.48, + "learning_rate": 1.9127092025690313e-05, + "loss": 0.1295, + "step": 10310 + }, + { + "epoch": 0.48, + "learning_rate": 1.9126623224415173e-05, + "loss": 0.1669, + "step": 10315 + }, + { + "epoch": 0.48, + "learning_rate": 1.9126154423140033e-05, + "loss": 0.1559, + "step": 10320 + }, + { + "epoch": 0.48, + "learning_rate": 1.9125685621864893e-05, + "loss": 0.1648, + "step": 10325 + }, + { + "epoch": 0.48, + "learning_rate": 1.9125216820589753e-05, + "loss": 0.2274, + "step": 10330 + }, + { + "epoch": 0.48, + "learning_rate": 1.9124748019314612e-05, + "loss": 0.3051, + "step": 10335 + }, + { + "epoch": 0.48, + "learning_rate": 1.9124279218039476e-05, + "loss": 0.3135, + "step": 10340 + }, + { + "epoch": 0.48, + "learning_rate": 1.9123810416764336e-05, + "loss": 0.4214, + "step": 10345 + }, + { + "epoch": 0.48, + "learning_rate": 1.9123341615489196e-05, + "loss": 0.4696, + "step": 10350 + }, + { + "epoch": 0.48, + "learning_rate": 1.9122872814214056e-05, + "loss": 0.2314, + "step": 10355 + }, + { + "epoch": 0.48, + "learning_rate": 1.912240401293892e-05, + "loss": 0.0987, + "step": 10360 + }, + { + "epoch": 0.48, + "learning_rate": 1.912193521166378e-05, + "loss": 0.103, + "step": 10365 + }, + { + "epoch": 0.48, + "learning_rate": 1.912146641038864e-05, + "loss": 0.1721, + "step": 10370 + }, + { + "epoch": 0.48, + "learning_rate": 1.91209976091135e-05, + "loss": 0.1067, + "step": 10375 + }, + { + "epoch": 0.48, + "learning_rate": 1.912052880783836e-05, + "loss": 0.2847, + "step": 10380 + }, + { + "epoch": 0.48, + "learning_rate": 1.912006000656322e-05, + "loss": 0.2213, + "step": 10385 + }, + { + "epoch": 0.48, + "learning_rate": 1.911959120528808e-05, + "loss": 0.2882, + "step": 10390 + }, + { + "epoch": 0.49, + "learning_rate": 1.911912240401294e-05, + "loss": 0.4464, + "step": 10395 + }, + { + "epoch": 0.49, + "learning_rate": 1.91186536027378e-05, + "loss": 0.6176, + "step": 10400 + }, + { + "epoch": 0.49, + "learning_rate": 1.911818480146266e-05, + "loss": 0.294, + "step": 10405 + }, + { + "epoch": 0.49, + "learning_rate": 1.911771600018752e-05, + "loss": 0.1381, + "step": 10410 + }, + { + "epoch": 0.49, + "learning_rate": 1.911724719891238e-05, + "loss": 0.1054, + "step": 10415 + }, + { + "epoch": 0.49, + "learning_rate": 1.9116778397637245e-05, + "loss": 0.1755, + "step": 10420 + }, + { + "epoch": 0.49, + "learning_rate": 1.9116309596362105e-05, + "loss": 0.13, + "step": 10425 + }, + { + "epoch": 0.49, + "learning_rate": 1.9115840795086965e-05, + "loss": 0.1956, + "step": 10430 + }, + { + "epoch": 0.49, + "learning_rate": 1.9115371993811825e-05, + "loss": 0.272, + "step": 10435 + }, + { + "epoch": 0.49, + "learning_rate": 1.9114903192536685e-05, + "loss": 0.325, + "step": 10440 + }, + { + "epoch": 0.49, + "learning_rate": 1.9114434391261545e-05, + "loss": 0.4834, + "step": 10445 + }, + { + "epoch": 0.49, + "learning_rate": 1.9113965589986408e-05, + "loss": 0.4972, + "step": 10450 + }, + { + "epoch": 0.49, + "learning_rate": 1.9113496788711268e-05, + "loss": 0.31, + "step": 10455 + }, + { + "epoch": 0.49, + "learning_rate": 1.9113027987436128e-05, + "loss": 0.0929, + "step": 10460 + }, + { + "epoch": 0.49, + "learning_rate": 1.9112559186160988e-05, + "loss": 0.1527, + "step": 10465 + }, + { + "epoch": 0.49, + "learning_rate": 1.9112090384885848e-05, + "loss": 0.2049, + "step": 10470 + }, + { + "epoch": 0.49, + "learning_rate": 1.9111621583610708e-05, + "loss": 0.2407, + "step": 10475 + }, + { + "epoch": 0.49, + "learning_rate": 1.9111152782335567e-05, + "loss": 0.2558, + "step": 10480 + }, + { + "epoch": 0.49, + "learning_rate": 1.911068398106043e-05, + "loss": 0.2731, + "step": 10485 + }, + { + "epoch": 0.49, + "learning_rate": 1.911021517978529e-05, + "loss": 0.3283, + "step": 10490 + }, + { + "epoch": 0.49, + "learning_rate": 1.910974637851015e-05, + "loss": 0.4684, + "step": 10495 + }, + { + "epoch": 0.49, + "learning_rate": 1.9109277577235014e-05, + "loss": 0.4627, + "step": 10500 + }, + { + "epoch": 0.49, + "learning_rate": 1.9108808775959874e-05, + "loss": 0.2122, + "step": 10505 + }, + { + "epoch": 0.49, + "learning_rate": 1.9108339974684734e-05, + "loss": 0.1003, + "step": 10510 + }, + { + "epoch": 0.49, + "learning_rate": 1.9107871173409594e-05, + "loss": 0.1256, + "step": 10515 + }, + { + "epoch": 0.49, + "learning_rate": 1.9107402372134454e-05, + "loss": 0.0787, + "step": 10520 + }, + { + "epoch": 0.49, + "learning_rate": 1.9106933570859314e-05, + "loss": 0.1799, + "step": 10525 + }, + { + "epoch": 0.49, + "learning_rate": 1.9106464769584174e-05, + "loss": 0.2717, + "step": 10530 + }, + { + "epoch": 0.49, + "learning_rate": 1.9105995968309034e-05, + "loss": 0.2664, + "step": 10535 + }, + { + "epoch": 0.49, + "learning_rate": 1.9105527167033893e-05, + "loss": 0.3668, + "step": 10540 + }, + { + "epoch": 0.49, + "learning_rate": 1.9105058365758757e-05, + "loss": 0.3952, + "step": 10545 + }, + { + "epoch": 0.49, + "learning_rate": 1.9104589564483617e-05, + "loss": 0.5244, + "step": 10550 + }, + { + "epoch": 0.49, + "learning_rate": 1.9104120763208477e-05, + "loss": 0.257, + "step": 10555 + }, + { + "epoch": 0.49, + "learning_rate": 1.910365196193334e-05, + "loss": 0.1311, + "step": 10560 + }, + { + "epoch": 0.49, + "learning_rate": 1.91031831606582e-05, + "loss": 0.1379, + "step": 10565 + }, + { + "epoch": 0.49, + "learning_rate": 1.910271435938306e-05, + "loss": 0.2639, + "step": 10570 + }, + { + "epoch": 0.49, + "learning_rate": 1.910224555810792e-05, + "loss": 0.1975, + "step": 10575 + }, + { + "epoch": 0.49, + "learning_rate": 1.910177675683278e-05, + "loss": 0.1877, + "step": 10580 + }, + { + "epoch": 0.49, + "learning_rate": 1.910130795555764e-05, + "loss": 0.1974, + "step": 10585 + }, + { + "epoch": 0.49, + "learning_rate": 1.9100839154282503e-05, + "loss": 0.356, + "step": 10590 + }, + { + "epoch": 0.49, + "learning_rate": 1.9100370353007363e-05, + "loss": 0.3649, + "step": 10595 + }, + { + "epoch": 0.49, + "learning_rate": 1.9099901551732223e-05, + "loss": 0.6513, + "step": 10600 + }, + { + "epoch": 0.49, + "learning_rate": 1.9099432750457083e-05, + "loss": 0.219, + "step": 10605 + }, + { + "epoch": 0.5, + "learning_rate": 1.9098963949181943e-05, + "loss": 0.1712, + "step": 10610 + }, + { + "epoch": 0.5, + "learning_rate": 1.9098495147906803e-05, + "loss": 0.1444, + "step": 10615 + }, + { + "epoch": 0.5, + "learning_rate": 1.9098026346631663e-05, + "loss": 0.1519, + "step": 10620 + }, + { + "epoch": 0.5, + "learning_rate": 1.9097557545356526e-05, + "loss": 0.1516, + "step": 10625 + }, + { + "epoch": 0.5, + "learning_rate": 1.9097088744081386e-05, + "loss": 0.2533, + "step": 10630 + }, + { + "epoch": 0.5, + "learning_rate": 1.9096619942806246e-05, + "loss": 0.2046, + "step": 10635 + }, + { + "epoch": 0.5, + "learning_rate": 1.909615114153111e-05, + "loss": 0.2561, + "step": 10640 + }, + { + "epoch": 0.5, + "learning_rate": 1.909568234025597e-05, + "loss": 0.4102, + "step": 10645 + }, + { + "epoch": 0.5, + "learning_rate": 1.909521353898083e-05, + "loss": 0.4769, + "step": 10650 + }, + { + "epoch": 0.5, + "learning_rate": 1.909474473770569e-05, + "loss": 0.3088, + "step": 10655 + }, + { + "epoch": 0.5, + "learning_rate": 1.909427593643055e-05, + "loss": 0.1282, + "step": 10660 + }, + { + "epoch": 0.5, + "learning_rate": 1.909380713515541e-05, + "loss": 0.1414, + "step": 10665 + }, + { + "epoch": 0.5, + "learning_rate": 1.909333833388027e-05, + "loss": 0.1356, + "step": 10670 + }, + { + "epoch": 0.5, + "learning_rate": 1.909286953260513e-05, + "loss": 0.1508, + "step": 10675 + }, + { + "epoch": 0.5, + "learning_rate": 1.909240073132999e-05, + "loss": 0.2121, + "step": 10680 + }, + { + "epoch": 0.5, + "learning_rate": 1.9091931930054852e-05, + "loss": 0.2012, + "step": 10685 + }, + { + "epoch": 0.5, + "learning_rate": 1.9091463128779712e-05, + "loss": 0.4462, + "step": 10690 + }, + { + "epoch": 0.5, + "learning_rate": 1.9090994327504572e-05, + "loss": 0.3469, + "step": 10695 + }, + { + "epoch": 0.5, + "learning_rate": 1.909052552622943e-05, + "loss": 0.5109, + "step": 10700 + }, + { + "epoch": 0.5, + "learning_rate": 1.9090056724954295e-05, + "loss": 0.2304, + "step": 10705 + }, + { + "epoch": 0.5, + "learning_rate": 1.9089587923679155e-05, + "loss": 0.1118, + "step": 10710 + }, + { + "epoch": 0.5, + "learning_rate": 1.9089119122404015e-05, + "loss": 0.1731, + "step": 10715 + }, + { + "epoch": 0.5, + "learning_rate": 1.9088650321128875e-05, + "loss": 0.1422, + "step": 10720 + }, + { + "epoch": 0.5, + "learning_rate": 1.9088181519853735e-05, + "loss": 0.2179, + "step": 10725 + }, + { + "epoch": 0.5, + "learning_rate": 1.9087712718578598e-05, + "loss": 0.2427, + "step": 10730 + }, + { + "epoch": 0.5, + "learning_rate": 1.9087243917303458e-05, + "loss": 0.2411, + "step": 10735 + }, + { + "epoch": 0.5, + "learning_rate": 1.9086775116028318e-05, + "loss": 0.2579, + "step": 10740 + }, + { + "epoch": 0.5, + "learning_rate": 1.9086306314753178e-05, + "loss": 0.3481, + "step": 10745 + }, + { + "epoch": 0.5, + "learning_rate": 1.9085837513478038e-05, + "loss": 0.5099, + "step": 10750 + }, + { + "epoch": 0.5, + "learning_rate": 1.9085368712202898e-05, + "loss": 0.2352, + "step": 10755 + }, + { + "epoch": 0.5, + "learning_rate": 1.9084899910927758e-05, + "loss": 0.1045, + "step": 10760 + }, + { + "epoch": 0.5, + "learning_rate": 1.9084431109652618e-05, + "loss": 0.2044, + "step": 10765 + }, + { + "epoch": 0.5, + "learning_rate": 1.908396230837748e-05, + "loss": 0.1178, + "step": 10770 + }, + { + "epoch": 0.5, + "learning_rate": 1.908349350710234e-05, + "loss": 0.2247, + "step": 10775 + }, + { + "epoch": 0.5, + "learning_rate": 1.90830247058272e-05, + "loss": 0.1583, + "step": 10780 + }, + { + "epoch": 0.5, + "learning_rate": 1.9082555904552064e-05, + "loss": 0.2375, + "step": 10785 + }, + { + "epoch": 0.5, + "learning_rate": 1.9082087103276924e-05, + "loss": 0.3239, + "step": 10790 + }, + { + "epoch": 0.5, + "learning_rate": 1.9081618302001784e-05, + "loss": 0.3176, + "step": 10795 + }, + { + "epoch": 0.5, + "learning_rate": 1.9081149500726644e-05, + "loss": 0.5501, + "step": 10800 + }, + { + "epoch": 0.5, + "learning_rate": 1.9080680699451504e-05, + "loss": 0.2226, + "step": 10805 + }, + { + "epoch": 0.5, + "learning_rate": 1.9080211898176364e-05, + "loss": 0.085, + "step": 10810 + }, + { + "epoch": 0.5, + "learning_rate": 1.9079743096901224e-05, + "loss": 0.1627, + "step": 10815 + }, + { + "epoch": 0.5, + "learning_rate": 1.9079274295626084e-05, + "loss": 0.0776, + "step": 10820 + }, + { + "epoch": 0.51, + "learning_rate": 1.9078805494350947e-05, + "loss": 0.2069, + "step": 10825 + }, + { + "epoch": 0.51, + "learning_rate": 1.9078336693075807e-05, + "loss": 0.2966, + "step": 10830 + }, + { + "epoch": 0.51, + "learning_rate": 1.9077867891800667e-05, + "loss": 0.3082, + "step": 10835 + }, + { + "epoch": 0.51, + "learning_rate": 1.9077399090525527e-05, + "loss": 0.2768, + "step": 10840 + }, + { + "epoch": 0.51, + "learning_rate": 1.9076930289250387e-05, + "loss": 0.3287, + "step": 10845 + }, + { + "epoch": 0.51, + "learning_rate": 1.907646148797525e-05, + "loss": 0.6915, + "step": 10850 + }, + { + "epoch": 0.51, + "learning_rate": 1.907599268670011e-05, + "loss": 0.2431, + "step": 10855 + }, + { + "epoch": 0.51, + "learning_rate": 1.907552388542497e-05, + "loss": 0.1133, + "step": 10860 + }, + { + "epoch": 0.51, + "learning_rate": 1.907505508414983e-05, + "loss": 0.1717, + "step": 10865 + }, + { + "epoch": 0.51, + "learning_rate": 1.9074586282874693e-05, + "loss": 0.1157, + "step": 10870 + }, + { + "epoch": 0.51, + "learning_rate": 1.9074117481599553e-05, + "loss": 0.2148, + "step": 10875 + }, + { + "epoch": 0.51, + "learning_rate": 1.9073648680324413e-05, + "loss": 0.3085, + "step": 10880 + }, + { + "epoch": 0.51, + "learning_rate": 1.9073179879049273e-05, + "loss": 0.2192, + "step": 10885 + }, + { + "epoch": 0.51, + "learning_rate": 1.9072711077774133e-05, + "loss": 0.2851, + "step": 10890 + }, + { + "epoch": 0.51, + "learning_rate": 1.9072242276498993e-05, + "loss": 0.2993, + "step": 10895 + }, + { + "epoch": 0.51, + "learning_rate": 1.9071773475223853e-05, + "loss": 0.3583, + "step": 10900 + }, + { + "epoch": 0.51, + "learning_rate": 1.9071304673948713e-05, + "loss": 0.3118, + "step": 10905 + }, + { + "epoch": 0.51, + "learning_rate": 1.9070835872673576e-05, + "loss": 0.0893, + "step": 10910 + }, + { + "epoch": 0.51, + "learning_rate": 1.9070367071398436e-05, + "loss": 0.0958, + "step": 10915 + }, + { + "epoch": 0.51, + "learning_rate": 1.9069898270123296e-05, + "loss": 0.1406, + "step": 10920 + }, + { + "epoch": 0.51, + "learning_rate": 1.9069429468848156e-05, + "loss": 0.1843, + "step": 10925 + }, + { + "epoch": 0.51, + "learning_rate": 1.906896066757302e-05, + "loss": 0.2012, + "step": 10930 + }, + { + "epoch": 0.51, + "learning_rate": 1.906849186629788e-05, + "loss": 0.2509, + "step": 10935 + }, + { + "epoch": 0.51, + "learning_rate": 1.906802306502274e-05, + "loss": 0.3919, + "step": 10940 + }, + { + "epoch": 0.51, + "learning_rate": 1.90675542637476e-05, + "loss": 0.3983, + "step": 10945 + }, + { + "epoch": 0.51, + "learning_rate": 1.906708546247246e-05, + "loss": 0.5589, + "step": 10950 + }, + { + "epoch": 0.51, + "learning_rate": 1.906661666119732e-05, + "loss": 0.2583, + "step": 10955 + }, + { + "epoch": 0.51, + "learning_rate": 1.9066147859922182e-05, + "loss": 0.1028, + "step": 10960 + }, + { + "epoch": 0.51, + "learning_rate": 1.9065679058647042e-05, + "loss": 0.1177, + "step": 10965 + }, + { + "epoch": 0.51, + "learning_rate": 1.9065210257371902e-05, + "loss": 0.2102, + "step": 10970 + }, + { + "epoch": 0.51, + "learning_rate": 1.9064741456096762e-05, + "loss": 0.1336, + "step": 10975 + }, + { + "epoch": 0.51, + "learning_rate": 1.9064272654821622e-05, + "loss": 0.2197, + "step": 10980 + }, + { + "epoch": 0.51, + "learning_rate": 1.9063803853546482e-05, + "loss": 0.2254, + "step": 10985 + }, + { + "epoch": 0.51, + "learning_rate": 1.9063335052271345e-05, + "loss": 0.3272, + "step": 10990 + }, + { + "epoch": 0.51, + "learning_rate": 1.9062866250996205e-05, + "loss": 0.3669, + "step": 10995 + }, + { + "epoch": 0.51, + "learning_rate": 1.9062397449721065e-05, + "loss": 0.5502, + "step": 11000 + }, + { + "epoch": 0.51, + "learning_rate": 1.9061928648445925e-05, + "loss": 0.2126, + "step": 11005 + }, + { + "epoch": 0.51, + "learning_rate": 1.9061459847170788e-05, + "loss": 0.1083, + "step": 11010 + }, + { + "epoch": 0.51, + "learning_rate": 1.9060991045895648e-05, + "loss": 0.1021, + "step": 11015 + }, + { + "epoch": 0.51, + "learning_rate": 1.9060522244620508e-05, + "loss": 0.2316, + "step": 11020 + }, + { + "epoch": 0.51, + "learning_rate": 1.9060053443345368e-05, + "loss": 0.1626, + "step": 11025 + }, + { + "epoch": 0.51, + "learning_rate": 1.9059584642070228e-05, + "loss": 0.1842, + "step": 11030 + }, + { + "epoch": 0.51, + "learning_rate": 1.9059115840795088e-05, + "loss": 0.2737, + "step": 11035 + }, + { + "epoch": 0.52, + "learning_rate": 1.9058647039519948e-05, + "loss": 0.2535, + "step": 11040 + }, + { + "epoch": 0.52, + "learning_rate": 1.9058178238244808e-05, + "loss": 0.49, + "step": 11045 + }, + { + "epoch": 0.52, + "learning_rate": 1.9057709436969668e-05, + "loss": 0.7787, + "step": 11050 + }, + { + "epoch": 0.52, + "learning_rate": 1.905724063569453e-05, + "loss": 0.2668, + "step": 11055 + }, + { + "epoch": 0.52, + "learning_rate": 1.905677183441939e-05, + "loss": 0.097, + "step": 11060 + }, + { + "epoch": 0.52, + "learning_rate": 1.905630303314425e-05, + "loss": 0.0949, + "step": 11065 + }, + { + "epoch": 0.52, + "learning_rate": 1.9055834231869114e-05, + "loss": 0.1723, + "step": 11070 + }, + { + "epoch": 0.52, + "learning_rate": 1.9055365430593974e-05, + "loss": 0.2328, + "step": 11075 + }, + { + "epoch": 0.52, + "learning_rate": 1.9054896629318834e-05, + "loss": 0.2321, + "step": 11080 + }, + { + "epoch": 0.52, + "learning_rate": 1.9054427828043694e-05, + "loss": 0.3256, + "step": 11085 + }, + { + "epoch": 0.52, + "learning_rate": 1.9053959026768554e-05, + "loss": 0.331, + "step": 11090 + }, + { + "epoch": 0.52, + "learning_rate": 1.9053490225493414e-05, + "loss": 0.3254, + "step": 11095 + }, + { + "epoch": 0.52, + "learning_rate": 1.9053021424218277e-05, + "loss": 0.6373, + "step": 11100 + }, + { + "epoch": 0.52, + "learning_rate": 1.9052552622943137e-05, + "loss": 0.2815, + "step": 11105 + }, + { + "epoch": 0.52, + "learning_rate": 1.9052083821667997e-05, + "loss": 0.1203, + "step": 11110 + }, + { + "epoch": 0.52, + "learning_rate": 1.9051615020392857e-05, + "loss": 0.2007, + "step": 11115 + }, + { + "epoch": 0.52, + "learning_rate": 1.9051146219117717e-05, + "loss": 0.1206, + "step": 11120 + }, + { + "epoch": 0.52, + "learning_rate": 1.9050677417842577e-05, + "loss": 0.2319, + "step": 11125 + }, + { + "epoch": 0.52, + "learning_rate": 1.9050208616567437e-05, + "loss": 0.1269, + "step": 11130 + }, + { + "epoch": 0.52, + "learning_rate": 1.90497398152923e-05, + "loss": 0.2155, + "step": 11135 + }, + { + "epoch": 0.52, + "learning_rate": 1.904927101401716e-05, + "loss": 0.1693, + "step": 11140 + }, + { + "epoch": 0.52, + "learning_rate": 1.904880221274202e-05, + "loss": 0.4148, + "step": 11145 + }, + { + "epoch": 0.52, + "learning_rate": 1.9048333411466883e-05, + "loss": 0.6929, + "step": 11150 + }, + { + "epoch": 0.52, + "learning_rate": 1.9047864610191743e-05, + "loss": 0.2697, + "step": 11155 + }, + { + "epoch": 0.52, + "learning_rate": 1.9047395808916603e-05, + "loss": 0.115, + "step": 11160 + }, + { + "epoch": 0.52, + "learning_rate": 1.9046927007641463e-05, + "loss": 0.0827, + "step": 11165 + }, + { + "epoch": 0.52, + "learning_rate": 1.9046458206366323e-05, + "loss": 0.132, + "step": 11170 + }, + { + "epoch": 0.52, + "learning_rate": 1.9045989405091183e-05, + "loss": 0.2029, + "step": 11175 + }, + { + "epoch": 0.52, + "learning_rate": 1.9045520603816043e-05, + "loss": 0.1555, + "step": 11180 + }, + { + "epoch": 0.52, + "learning_rate": 1.9045051802540903e-05, + "loss": 0.2248, + "step": 11185 + }, + { + "epoch": 0.52, + "learning_rate": 1.9044583001265763e-05, + "loss": 0.3005, + "step": 11190 + }, + { + "epoch": 0.52, + "learning_rate": 1.9044114199990626e-05, + "loss": 0.335, + "step": 11195 + }, + { + "epoch": 0.52, + "learning_rate": 1.9043645398715486e-05, + "loss": 0.636, + "step": 11200 + }, + { + "epoch": 0.52, + "learning_rate": 1.9043176597440346e-05, + "loss": 0.2538, + "step": 11205 + }, + { + "epoch": 0.52, + "learning_rate": 1.9042707796165206e-05, + "loss": 0.1232, + "step": 11210 + }, + { + "epoch": 0.52, + "learning_rate": 1.904223899489007e-05, + "loss": 0.0881, + "step": 11215 + }, + { + "epoch": 0.52, + "learning_rate": 1.904177019361493e-05, + "loss": 0.1117, + "step": 11220 + }, + { + "epoch": 0.52, + "learning_rate": 1.904130139233979e-05, + "loss": 0.1845, + "step": 11225 + }, + { + "epoch": 0.52, + "learning_rate": 1.904083259106465e-05, + "loss": 0.2521, + "step": 11230 + }, + { + "epoch": 0.52, + "learning_rate": 1.904036378978951e-05, + "loss": 0.2299, + "step": 11235 + }, + { + "epoch": 0.52, + "learning_rate": 1.9039894988514372e-05, + "loss": 0.2237, + "step": 11240 + }, + { + "epoch": 0.52, + "learning_rate": 1.9039426187239232e-05, + "loss": 0.4258, + "step": 11245 + }, + { + "epoch": 0.52, + "learning_rate": 1.9038957385964092e-05, + "loss": 0.6981, + "step": 11250 + }, + { + "epoch": 0.53, + "learning_rate": 1.9038488584688952e-05, + "loss": 0.2667, + "step": 11255 + }, + { + "epoch": 0.53, + "learning_rate": 1.9038019783413812e-05, + "loss": 0.093, + "step": 11260 + }, + { + "epoch": 0.53, + "learning_rate": 1.9037550982138672e-05, + "loss": 0.1913, + "step": 11265 + }, + { + "epoch": 0.53, + "learning_rate": 1.9037082180863532e-05, + "loss": 0.1173, + "step": 11270 + }, + { + "epoch": 0.53, + "learning_rate": 1.9036613379588392e-05, + "loss": 0.2151, + "step": 11275 + }, + { + "epoch": 0.53, + "learning_rate": 1.9036144578313255e-05, + "loss": 0.1969, + "step": 11280 + }, + { + "epoch": 0.53, + "learning_rate": 1.9035675777038115e-05, + "loss": 0.2601, + "step": 11285 + }, + { + "epoch": 0.53, + "learning_rate": 1.9035206975762975e-05, + "loss": 0.2812, + "step": 11290 + }, + { + "epoch": 0.53, + "learning_rate": 1.9034738174487838e-05, + "loss": 0.3009, + "step": 11295 + }, + { + "epoch": 0.53, + "learning_rate": 1.9034269373212698e-05, + "loss": 0.5065, + "step": 11300 + }, + { + "epoch": 0.53, + "learning_rate": 1.9033800571937558e-05, + "loss": 0.2912, + "step": 11305 + }, + { + "epoch": 0.53, + "learning_rate": 1.9033331770662418e-05, + "loss": 0.0743, + "step": 11310 + }, + { + "epoch": 0.53, + "learning_rate": 1.9032862969387278e-05, + "loss": 0.1384, + "step": 11315 + }, + { + "epoch": 0.53, + "learning_rate": 1.9032394168112138e-05, + "loss": 0.1069, + "step": 11320 + }, + { + "epoch": 0.53, + "learning_rate": 1.9031925366836998e-05, + "loss": 0.1452, + "step": 11325 + }, + { + "epoch": 0.53, + "learning_rate": 1.9031456565561858e-05, + "loss": 0.162, + "step": 11330 + }, + { + "epoch": 0.53, + "learning_rate": 1.903098776428672e-05, + "loss": 0.2265, + "step": 11335 + }, + { + "epoch": 0.53, + "learning_rate": 1.903051896301158e-05, + "loss": 0.277, + "step": 11340 + }, + { + "epoch": 0.53, + "learning_rate": 1.903005016173644e-05, + "loss": 0.3428, + "step": 11345 + }, + { + "epoch": 0.53, + "learning_rate": 1.90295813604613e-05, + "loss": 0.4802, + "step": 11350 + }, + { + "epoch": 0.53, + "learning_rate": 1.902911255918616e-05, + "loss": 0.2706, + "step": 11355 + }, + { + "epoch": 0.53, + "learning_rate": 1.9028643757911024e-05, + "loss": 0.087, + "step": 11360 + }, + { + "epoch": 0.53, + "learning_rate": 1.9028174956635884e-05, + "loss": 0.1064, + "step": 11365 + }, + { + "epoch": 0.53, + "learning_rate": 1.9027706155360744e-05, + "loss": 0.0903, + "step": 11370 + }, + { + "epoch": 0.53, + "learning_rate": 1.9027237354085604e-05, + "loss": 0.2627, + "step": 11375 + }, + { + "epoch": 0.53, + "learning_rate": 1.9026768552810467e-05, + "loss": 0.1548, + "step": 11380 + }, + { + "epoch": 0.53, + "learning_rate": 1.9026299751535327e-05, + "loss": 0.2691, + "step": 11385 + }, + { + "epoch": 0.53, + "learning_rate": 1.9025830950260187e-05, + "loss": 0.2731, + "step": 11390 + }, + { + "epoch": 0.53, + "learning_rate": 1.9025362148985047e-05, + "loss": 0.3839, + "step": 11395 + }, + { + "epoch": 0.53, + "learning_rate": 1.9024893347709907e-05, + "loss": 0.6679, + "step": 11400 + }, + { + "epoch": 0.53, + "learning_rate": 1.9024424546434767e-05, + "loss": 0.3018, + "step": 11405 + }, + { + "epoch": 0.53, + "learning_rate": 1.9023955745159627e-05, + "loss": 0.1024, + "step": 11410 + }, + { + "epoch": 0.53, + "learning_rate": 1.9023486943884487e-05, + "loss": 0.1274, + "step": 11415 + }, + { + "epoch": 0.53, + "learning_rate": 1.902301814260935e-05, + "loss": 0.1693, + "step": 11420 + }, + { + "epoch": 0.53, + "learning_rate": 1.902254934133421e-05, + "loss": 0.1957, + "step": 11425 + }, + { + "epoch": 0.53, + "learning_rate": 1.902208054005907e-05, + "loss": 0.1639, + "step": 11430 + }, + { + "epoch": 0.53, + "learning_rate": 1.902161173878393e-05, + "loss": 0.2524, + "step": 11435 + }, + { + "epoch": 0.53, + "learning_rate": 1.9021142937508793e-05, + "loss": 0.4686, + "step": 11440 + }, + { + "epoch": 0.53, + "learning_rate": 1.9020674136233653e-05, + "loss": 0.4812, + "step": 11445 + }, + { + "epoch": 0.53, + "learning_rate": 1.9020205334958513e-05, + "loss": 0.4917, + "step": 11450 + }, + { + "epoch": 0.53, + "learning_rate": 1.9019736533683373e-05, + "loss": 0.2595, + "step": 11455 + }, + { + "epoch": 0.53, + "learning_rate": 1.9019267732408233e-05, + "loss": 0.0769, + "step": 11460 + }, + { + "epoch": 0.53, + "learning_rate": 1.9018798931133093e-05, + "loss": 0.1518, + "step": 11465 + }, + { + "epoch": 0.54, + "learning_rate": 1.9018330129857953e-05, + "loss": 0.1158, + "step": 11470 + }, + { + "epoch": 0.54, + "learning_rate": 1.9017861328582816e-05, + "loss": 0.1847, + "step": 11475 + }, + { + "epoch": 0.54, + "learning_rate": 1.9017392527307676e-05, + "loss": 0.1882, + "step": 11480 + }, + { + "epoch": 0.54, + "learning_rate": 1.9016923726032536e-05, + "loss": 0.2294, + "step": 11485 + }, + { + "epoch": 0.54, + "learning_rate": 1.9016454924757396e-05, + "loss": 0.371, + "step": 11490 + }, + { + "epoch": 0.54, + "learning_rate": 1.9015986123482256e-05, + "loss": 0.3808, + "step": 11495 + }, + { + "epoch": 0.54, + "learning_rate": 1.901551732220712e-05, + "loss": 0.5649, + "step": 11500 + }, + { + "epoch": 0.54, + "learning_rate": 1.901504852093198e-05, + "loss": 0.3712, + "step": 11505 + }, + { + "epoch": 0.54, + "learning_rate": 1.901457971965684e-05, + "loss": 0.1052, + "step": 11510 + }, + { + "epoch": 0.54, + "learning_rate": 1.90141109183817e-05, + "loss": 0.1216, + "step": 11515 + }, + { + "epoch": 0.54, + "learning_rate": 1.9013642117106562e-05, + "loss": 0.1673, + "step": 11520 + }, + { + "epoch": 0.54, + "learning_rate": 1.9013173315831422e-05, + "loss": 0.1775, + "step": 11525 + }, + { + "epoch": 0.54, + "learning_rate": 1.9012704514556282e-05, + "loss": 0.2124, + "step": 11530 + }, + { + "epoch": 0.54, + "learning_rate": 1.9012235713281142e-05, + "loss": 0.2195, + "step": 11535 + }, + { + "epoch": 0.54, + "learning_rate": 1.9011766912006002e-05, + "loss": 0.2557, + "step": 11540 + }, + { + "epoch": 0.54, + "learning_rate": 1.9011298110730862e-05, + "loss": 0.3398, + "step": 11545 + }, + { + "epoch": 0.54, + "learning_rate": 1.9010829309455722e-05, + "loss": 0.6744, + "step": 11550 + }, + { + "epoch": 0.54, + "learning_rate": 1.9010360508180582e-05, + "loss": 0.2358, + "step": 11555 + }, + { + "epoch": 0.54, + "learning_rate": 1.9009891706905442e-05, + "loss": 0.0978, + "step": 11560 + }, + { + "epoch": 0.54, + "learning_rate": 1.9009422905630305e-05, + "loss": 0.1722, + "step": 11565 + }, + { + "epoch": 0.54, + "learning_rate": 1.9008954104355165e-05, + "loss": 0.1497, + "step": 11570 + }, + { + "epoch": 0.54, + "learning_rate": 1.9008485303080025e-05, + "loss": 0.2723, + "step": 11575 + }, + { + "epoch": 0.54, + "learning_rate": 1.900801650180489e-05, + "loss": 0.1787, + "step": 11580 + }, + { + "epoch": 0.54, + "learning_rate": 1.900754770052975e-05, + "loss": 0.3611, + "step": 11585 + }, + { + "epoch": 0.54, + "learning_rate": 1.9007078899254608e-05, + "loss": 0.2891, + "step": 11590 + }, + { + "epoch": 0.54, + "learning_rate": 1.9006610097979468e-05, + "loss": 0.3841, + "step": 11595 + }, + { + "epoch": 0.54, + "learning_rate": 1.9006141296704328e-05, + "loss": 0.6541, + "step": 11600 + }, + { + "epoch": 0.54, + "learning_rate": 1.9005672495429188e-05, + "loss": 0.234, + "step": 11605 + }, + { + "epoch": 0.54, + "learning_rate": 1.900520369415405e-05, + "loss": 0.1288, + "step": 11610 + }, + { + "epoch": 0.54, + "learning_rate": 1.900473489287891e-05, + "loss": 0.1206, + "step": 11615 + }, + { + "epoch": 0.54, + "learning_rate": 1.900426609160377e-05, + "loss": 0.176, + "step": 11620 + }, + { + "epoch": 0.54, + "learning_rate": 1.900379729032863e-05, + "loss": 0.2343, + "step": 11625 + }, + { + "epoch": 0.54, + "learning_rate": 1.900332848905349e-05, + "loss": 0.2604, + "step": 11630 + }, + { + "epoch": 0.54, + "learning_rate": 1.900285968777835e-05, + "loss": 0.2572, + "step": 11635 + }, + { + "epoch": 0.54, + "learning_rate": 1.900239088650321e-05, + "loss": 0.2942, + "step": 11640 + }, + { + "epoch": 0.54, + "learning_rate": 1.9001922085228074e-05, + "loss": 0.4468, + "step": 11645 + }, + { + "epoch": 0.54, + "learning_rate": 1.9001453283952934e-05, + "loss": 0.7121, + "step": 11650 + }, + { + "epoch": 0.54, + "learning_rate": 1.9000984482677794e-05, + "loss": 0.2138, + "step": 11655 + }, + { + "epoch": 0.54, + "learning_rate": 1.9000515681402657e-05, + "loss": 0.1269, + "step": 11660 + }, + { + "epoch": 0.54, + "learning_rate": 1.9000046880127517e-05, + "loss": 0.1191, + "step": 11665 + }, + { + "epoch": 0.54, + "learning_rate": 1.8999578078852377e-05, + "loss": 0.2002, + "step": 11670 + }, + { + "epoch": 0.54, + "learning_rate": 1.8999109277577237e-05, + "loss": 0.1856, + "step": 11675 + }, + { + "epoch": 0.55, + "learning_rate": 1.8998640476302097e-05, + "loss": 0.1776, + "step": 11680 + }, + { + "epoch": 0.55, + "learning_rate": 1.8998171675026957e-05, + "loss": 0.3287, + "step": 11685 + }, + { + "epoch": 0.55, + "learning_rate": 1.8997702873751817e-05, + "loss": 0.2521, + "step": 11690 + }, + { + "epoch": 0.55, + "learning_rate": 1.8997234072476677e-05, + "loss": 0.2936, + "step": 11695 + }, + { + "epoch": 0.55, + "learning_rate": 1.8996765271201537e-05, + "loss": 0.5431, + "step": 11700 + }, + { + "epoch": 0.55, + "learning_rate": 1.89962964699264e-05, + "loss": 0.2955, + "step": 11705 + }, + { + "epoch": 0.55, + "learning_rate": 1.899582766865126e-05, + "loss": 0.067, + "step": 11710 + }, + { + "epoch": 0.55, + "learning_rate": 1.899535886737612e-05, + "loss": 0.1617, + "step": 11715 + }, + { + "epoch": 0.55, + "learning_rate": 1.899489006610098e-05, + "loss": 0.2038, + "step": 11720 + }, + { + "epoch": 0.55, + "learning_rate": 1.8994421264825843e-05, + "loss": 0.259, + "step": 11725 + }, + { + "epoch": 0.55, + "learning_rate": 1.8993952463550703e-05, + "loss": 0.18, + "step": 11730 + }, + { + "epoch": 0.55, + "learning_rate": 1.8993483662275563e-05, + "loss": 0.2509, + "step": 11735 + }, + { + "epoch": 0.55, + "learning_rate": 1.8993014861000423e-05, + "loss": 0.3614, + "step": 11740 + }, + { + "epoch": 0.55, + "learning_rate": 1.8992546059725283e-05, + "loss": 0.4022, + "step": 11745 + }, + { + "epoch": 0.55, + "learning_rate": 1.8992077258450146e-05, + "loss": 0.5694, + "step": 11750 + }, + { + "epoch": 0.55, + "learning_rate": 1.8991608457175006e-05, + "loss": 0.2532, + "step": 11755 + }, + { + "epoch": 0.55, + "learning_rate": 1.8991139655899866e-05, + "loss": 0.1058, + "step": 11760 + }, + { + "epoch": 0.55, + "learning_rate": 1.8990670854624726e-05, + "loss": 0.133, + "step": 11765 + }, + { + "epoch": 0.55, + "learning_rate": 1.8990202053349586e-05, + "loss": 0.1465, + "step": 11770 + }, + { + "epoch": 0.55, + "learning_rate": 1.8989733252074446e-05, + "loss": 0.1479, + "step": 11775 + }, + { + "epoch": 0.55, + "learning_rate": 1.8989264450799306e-05, + "loss": 0.2112, + "step": 11780 + }, + { + "epoch": 0.55, + "learning_rate": 1.8988795649524166e-05, + "loss": 0.229, + "step": 11785 + }, + { + "epoch": 0.55, + "learning_rate": 1.898832684824903e-05, + "loss": 0.2436, + "step": 11790 + }, + { + "epoch": 0.55, + "learning_rate": 1.898785804697389e-05, + "loss": 0.3649, + "step": 11795 + }, + { + "epoch": 0.55, + "learning_rate": 1.898738924569875e-05, + "loss": 0.7018, + "step": 11800 + }, + { + "epoch": 0.55, + "learning_rate": 1.8986920444423613e-05, + "loss": 0.255, + "step": 11805 + }, + { + "epoch": 0.55, + "learning_rate": 1.8986451643148472e-05, + "loss": 0.13, + "step": 11810 + }, + { + "epoch": 0.55, + "learning_rate": 1.8985982841873332e-05, + "loss": 0.1292, + "step": 11815 + }, + { + "epoch": 0.55, + "learning_rate": 1.8985514040598192e-05, + "loss": 0.1515, + "step": 11820 + }, + { + "epoch": 0.55, + "learning_rate": 1.8985045239323052e-05, + "loss": 0.1737, + "step": 11825 + }, + { + "epoch": 0.55, + "learning_rate": 1.8984576438047912e-05, + "loss": 0.1838, + "step": 11830 + }, + { + "epoch": 0.55, + "learning_rate": 1.8984107636772772e-05, + "loss": 0.2843, + "step": 11835 + }, + { + "epoch": 0.55, + "learning_rate": 1.8983638835497632e-05, + "loss": 0.309, + "step": 11840 + }, + { + "epoch": 0.55, + "learning_rate": 1.8983170034222495e-05, + "loss": 0.3991, + "step": 11845 + }, + { + "epoch": 0.55, + "learning_rate": 1.8982701232947355e-05, + "loss": 0.7293, + "step": 11850 + }, + { + "epoch": 0.55, + "learning_rate": 1.8982232431672215e-05, + "loss": 0.2043, + "step": 11855 + }, + { + "epoch": 0.55, + "learning_rate": 1.8981763630397075e-05, + "loss": 0.1122, + "step": 11860 + }, + { + "epoch": 0.55, + "learning_rate": 1.8981294829121935e-05, + "loss": 0.1696, + "step": 11865 + }, + { + "epoch": 0.55, + "learning_rate": 1.89808260278468e-05, + "loss": 0.1471, + "step": 11870 + }, + { + "epoch": 0.55, + "learning_rate": 1.898035722657166e-05, + "loss": 0.2066, + "step": 11875 + }, + { + "epoch": 0.55, + "learning_rate": 1.8979888425296518e-05, + "loss": 0.2047, + "step": 11880 + }, + { + "epoch": 0.55, + "learning_rate": 1.8979419624021378e-05, + "loss": 0.2603, + "step": 11885 + }, + { + "epoch": 0.55, + "learning_rate": 1.897895082274624e-05, + "loss": 0.3069, + "step": 11890 + }, + { + "epoch": 0.56, + "learning_rate": 1.89784820214711e-05, + "loss": 0.4019, + "step": 11895 + }, + { + "epoch": 0.56, + "learning_rate": 1.897801322019596e-05, + "loss": 0.5205, + "step": 11900 + }, + { + "epoch": 0.56, + "learning_rate": 1.897754441892082e-05, + "loss": 0.2621, + "step": 11905 + }, + { + "epoch": 0.56, + "learning_rate": 1.897707561764568e-05, + "loss": 0.1128, + "step": 11910 + }, + { + "epoch": 0.56, + "learning_rate": 1.897660681637054e-05, + "loss": 0.1462, + "step": 11915 + }, + { + "epoch": 0.56, + "learning_rate": 1.89761380150954e-05, + "loss": 0.1285, + "step": 11920 + }, + { + "epoch": 0.56, + "learning_rate": 1.897566921382026e-05, + "loss": 0.1582, + "step": 11925 + }, + { + "epoch": 0.56, + "learning_rate": 1.8975200412545124e-05, + "loss": 0.2148, + "step": 11930 + }, + { + "epoch": 0.56, + "learning_rate": 1.8974731611269984e-05, + "loss": 0.2385, + "step": 11935 + }, + { + "epoch": 0.56, + "learning_rate": 1.8974262809994844e-05, + "loss": 0.2819, + "step": 11940 + }, + { + "epoch": 0.56, + "learning_rate": 1.8973794008719704e-05, + "loss": 0.2586, + "step": 11945 + }, + { + "epoch": 0.56, + "learning_rate": 1.8973325207444568e-05, + "loss": 0.7001, + "step": 11950 + }, + { + "epoch": 0.56, + "learning_rate": 1.8972856406169427e-05, + "loss": 0.2394, + "step": 11955 + }, + { + "epoch": 0.56, + "learning_rate": 1.8972387604894287e-05, + "loss": 0.0715, + "step": 11960 + }, + { + "epoch": 0.56, + "learning_rate": 1.8971918803619147e-05, + "loss": 0.1559, + "step": 11965 + }, + { + "epoch": 0.56, + "learning_rate": 1.8971450002344007e-05, + "loss": 0.164, + "step": 11970 + }, + { + "epoch": 0.56, + "learning_rate": 1.8970981201068867e-05, + "loss": 0.1327, + "step": 11975 + }, + { + "epoch": 0.56, + "learning_rate": 1.8970512399793727e-05, + "loss": 0.2868, + "step": 11980 + }, + { + "epoch": 0.56, + "learning_rate": 1.897004359851859e-05, + "loss": 0.2215, + "step": 11985 + }, + { + "epoch": 0.56, + "learning_rate": 1.896957479724345e-05, + "loss": 0.3209, + "step": 11990 + }, + { + "epoch": 0.56, + "learning_rate": 1.896910599596831e-05, + "loss": 0.3683, + "step": 11995 + }, + { + "epoch": 0.56, + "learning_rate": 1.896863719469317e-05, + "loss": 0.6778, + "step": 12000 + }, + { + "epoch": 0.56, + "learning_rate": 1.896816839341803e-05, + "loss": 0.2626, + "step": 12005 + }, + { + "epoch": 0.56, + "learning_rate": 1.8967699592142894e-05, + "loss": 0.1251, + "step": 12010 + }, + { + "epoch": 0.56, + "learning_rate": 1.8967230790867753e-05, + "loss": 0.134, + "step": 12015 + }, + { + "epoch": 0.56, + "learning_rate": 1.8966761989592613e-05, + "loss": 0.1713, + "step": 12020 + }, + { + "epoch": 0.56, + "learning_rate": 1.8966293188317473e-05, + "loss": 0.1434, + "step": 12025 + }, + { + "epoch": 0.56, + "learning_rate": 1.8965824387042337e-05, + "loss": 0.2269, + "step": 12030 + }, + { + "epoch": 0.56, + "learning_rate": 1.8965355585767197e-05, + "loss": 0.1525, + "step": 12035 + }, + { + "epoch": 0.56, + "learning_rate": 1.8964886784492056e-05, + "loss": 0.2387, + "step": 12040 + }, + { + "epoch": 0.56, + "learning_rate": 1.8964417983216916e-05, + "loss": 0.3101, + "step": 12045 + }, + { + "epoch": 0.56, + "learning_rate": 1.8963949181941776e-05, + "loss": 0.5783, + "step": 12050 + }, + { + "epoch": 0.56, + "learning_rate": 1.8963480380666636e-05, + "loss": 0.2279, + "step": 12055 + }, + { + "epoch": 0.56, + "learning_rate": 1.8963011579391496e-05, + "loss": 0.1041, + "step": 12060 + }, + { + "epoch": 0.56, + "learning_rate": 1.8962542778116356e-05, + "loss": 0.1346, + "step": 12065 + }, + { + "epoch": 0.56, + "learning_rate": 1.8962073976841216e-05, + "loss": 0.1, + "step": 12070 + }, + { + "epoch": 0.56, + "learning_rate": 1.896160517556608e-05, + "loss": 0.1159, + "step": 12075 + }, + { + "epoch": 0.56, + "learning_rate": 1.896113637429094e-05, + "loss": 0.2763, + "step": 12080 + }, + { + "epoch": 0.56, + "learning_rate": 1.89606675730158e-05, + "loss": 0.2358, + "step": 12085 + }, + { + "epoch": 0.56, + "learning_rate": 1.8960198771740663e-05, + "loss": 0.3385, + "step": 12090 + }, + { + "epoch": 0.56, + "learning_rate": 1.8959729970465523e-05, + "loss": 0.3224, + "step": 12095 + }, + { + "epoch": 0.56, + "learning_rate": 1.8959261169190382e-05, + "loss": 0.6675, + "step": 12100 + }, + { + "epoch": 0.56, + "learning_rate": 1.8958792367915242e-05, + "loss": 0.2995, + "step": 12105 + }, + { + "epoch": 0.57, + "learning_rate": 1.8958323566640102e-05, + "loss": 0.0414, + "step": 12110 + }, + { + "epoch": 0.57, + "learning_rate": 1.8957854765364962e-05, + "loss": 0.112, + "step": 12115 + }, + { + "epoch": 0.57, + "learning_rate": 1.8957385964089822e-05, + "loss": 0.1313, + "step": 12120 + }, + { + "epoch": 0.57, + "learning_rate": 1.8956917162814686e-05, + "loss": 0.1316, + "step": 12125 + }, + { + "epoch": 0.57, + "learning_rate": 1.8956448361539545e-05, + "loss": 0.1418, + "step": 12130 + }, + { + "epoch": 0.57, + "learning_rate": 1.8955979560264405e-05, + "loss": 0.2418, + "step": 12135 + }, + { + "epoch": 0.57, + "learning_rate": 1.8955510758989265e-05, + "loss": 0.3003, + "step": 12140 + }, + { + "epoch": 0.57, + "learning_rate": 1.8955041957714125e-05, + "loss": 0.4785, + "step": 12145 + }, + { + "epoch": 0.57, + "learning_rate": 1.8954573156438985e-05, + "loss": 0.5624, + "step": 12150 + }, + { + "epoch": 0.57, + "learning_rate": 1.895410435516385e-05, + "loss": 0.2527, + "step": 12155 + }, + { + "epoch": 0.57, + "learning_rate": 1.895363555388871e-05, + "loss": 0.0963, + "step": 12160 + }, + { + "epoch": 0.57, + "learning_rate": 1.895316675261357e-05, + "loss": 0.0813, + "step": 12165 + }, + { + "epoch": 0.57, + "learning_rate": 1.8952697951338432e-05, + "loss": 0.1403, + "step": 12170 + }, + { + "epoch": 0.57, + "learning_rate": 1.895222915006329e-05, + "loss": 0.1686, + "step": 12175 + }, + { + "epoch": 0.57, + "learning_rate": 1.895176034878815e-05, + "loss": 0.1835, + "step": 12180 + }, + { + "epoch": 0.57, + "learning_rate": 1.895129154751301e-05, + "loss": 0.1565, + "step": 12185 + }, + { + "epoch": 0.57, + "learning_rate": 1.895082274623787e-05, + "loss": 0.383, + "step": 12190 + }, + { + "epoch": 0.57, + "learning_rate": 1.895035394496273e-05, + "loss": 0.375, + "step": 12195 + }, + { + "epoch": 0.57, + "learning_rate": 1.894988514368759e-05, + "loss": 0.8233, + "step": 12200 + }, + { + "epoch": 0.57, + "learning_rate": 1.894941634241245e-05, + "loss": 0.2411, + "step": 12205 + }, + { + "epoch": 0.57, + "learning_rate": 1.894894754113731e-05, + "loss": 0.0966, + "step": 12210 + }, + { + "epoch": 0.57, + "learning_rate": 1.8948478739862174e-05, + "loss": 0.1386, + "step": 12215 + }, + { + "epoch": 0.57, + "learning_rate": 1.8948009938587034e-05, + "loss": 0.2216, + "step": 12220 + }, + { + "epoch": 0.57, + "learning_rate": 1.8947541137311894e-05, + "loss": 0.181, + "step": 12225 + }, + { + "epoch": 0.57, + "learning_rate": 1.8947072336036754e-05, + "loss": 0.1556, + "step": 12230 + }, + { + "epoch": 0.57, + "learning_rate": 1.8946603534761618e-05, + "loss": 0.2458, + "step": 12235 + }, + { + "epoch": 0.57, + "learning_rate": 1.8946134733486478e-05, + "loss": 0.2611, + "step": 12240 + }, + { + "epoch": 0.57, + "learning_rate": 1.8945665932211337e-05, + "loss": 0.305, + "step": 12245 + }, + { + "epoch": 0.57, + "learning_rate": 1.8945197130936197e-05, + "loss": 0.5079, + "step": 12250 + }, + { + "epoch": 0.57, + "learning_rate": 1.8944728329661057e-05, + "loss": 0.2764, + "step": 12255 + }, + { + "epoch": 0.57, + "learning_rate": 1.894425952838592e-05, + "loss": 0.0815, + "step": 12260 + }, + { + "epoch": 0.57, + "learning_rate": 1.894379072711078e-05, + "loss": 0.0799, + "step": 12265 + }, + { + "epoch": 0.57, + "learning_rate": 1.894332192583564e-05, + "loss": 0.1726, + "step": 12270 + }, + { + "epoch": 0.57, + "learning_rate": 1.89428531245605e-05, + "loss": 0.1347, + "step": 12275 + }, + { + "epoch": 0.57, + "learning_rate": 1.894238432328536e-05, + "loss": 0.1555, + "step": 12280 + }, + { + "epoch": 0.57, + "learning_rate": 1.894191552201022e-05, + "loss": 0.2513, + "step": 12285 + }, + { + "epoch": 0.57, + "learning_rate": 1.894144672073508e-05, + "loss": 0.3376, + "step": 12290 + }, + { + "epoch": 0.57, + "learning_rate": 1.8940977919459944e-05, + "loss": 0.3014, + "step": 12295 + }, + { + "epoch": 0.57, + "learning_rate": 1.8940509118184804e-05, + "loss": 0.5171, + "step": 12300 + }, + { + "epoch": 0.57, + "learning_rate": 1.8940040316909663e-05, + "loss": 0.2599, + "step": 12305 + }, + { + "epoch": 0.57, + "learning_rate": 1.8939571515634523e-05, + "loss": 0.0997, + "step": 12310 + }, + { + "epoch": 0.57, + "learning_rate": 1.8939102714359387e-05, + "loss": 0.1469, + "step": 12315 + }, + { + "epoch": 0.57, + "learning_rate": 1.8938633913084247e-05, + "loss": 0.1405, + "step": 12320 + }, + { + "epoch": 0.58, + "learning_rate": 1.8938165111809107e-05, + "loss": 0.2392, + "step": 12325 + }, + { + "epoch": 0.58, + "learning_rate": 1.8937696310533967e-05, + "loss": 0.1594, + "step": 12330 + }, + { + "epoch": 0.58, + "learning_rate": 1.8937227509258826e-05, + "loss": 0.181, + "step": 12335 + }, + { + "epoch": 0.58, + "learning_rate": 1.8936758707983686e-05, + "loss": 0.2841, + "step": 12340 + }, + { + "epoch": 0.58, + "learning_rate": 1.8936289906708546e-05, + "loss": 0.3794, + "step": 12345 + }, + { + "epoch": 0.58, + "learning_rate": 1.8935821105433406e-05, + "loss": 0.4712, + "step": 12350 + }, + { + "epoch": 0.58, + "learning_rate": 1.893535230415827e-05, + "loss": 0.2406, + "step": 12355 + }, + { + "epoch": 0.58, + "learning_rate": 1.893488350288313e-05, + "loss": 0.1092, + "step": 12360 + }, + { + "epoch": 0.58, + "learning_rate": 1.893441470160799e-05, + "loss": 0.0954, + "step": 12365 + }, + { + "epoch": 0.58, + "learning_rate": 1.893394590033285e-05, + "loss": 0.1485, + "step": 12370 + }, + { + "epoch": 0.58, + "learning_rate": 1.8933477099057713e-05, + "loss": 0.2261, + "step": 12375 + }, + { + "epoch": 0.58, + "learning_rate": 1.8933008297782573e-05, + "loss": 0.2628, + "step": 12380 + }, + { + "epoch": 0.58, + "learning_rate": 1.8932539496507433e-05, + "loss": 0.2334, + "step": 12385 + }, + { + "epoch": 0.58, + "learning_rate": 1.8932070695232293e-05, + "loss": 0.2887, + "step": 12390 + }, + { + "epoch": 0.58, + "learning_rate": 1.8931601893957152e-05, + "loss": 0.2995, + "step": 12395 + }, + { + "epoch": 0.58, + "learning_rate": 1.8931133092682016e-05, + "loss": 0.6158, + "step": 12400 + }, + { + "epoch": 0.58, + "learning_rate": 1.8930664291406876e-05, + "loss": 0.2373, + "step": 12405 + }, + { + "epoch": 0.58, + "learning_rate": 1.8930195490131736e-05, + "loss": 0.078, + "step": 12410 + }, + { + "epoch": 0.58, + "learning_rate": 1.8929726688856596e-05, + "loss": 0.119, + "step": 12415 + }, + { + "epoch": 0.58, + "learning_rate": 1.8929257887581455e-05, + "loss": 0.0879, + "step": 12420 + }, + { + "epoch": 0.58, + "learning_rate": 1.8928789086306315e-05, + "loss": 0.1112, + "step": 12425 + }, + { + "epoch": 0.58, + "learning_rate": 1.8928320285031175e-05, + "loss": 0.1409, + "step": 12430 + }, + { + "epoch": 0.58, + "learning_rate": 1.8927851483756035e-05, + "loss": 0.2855, + "step": 12435 + }, + { + "epoch": 0.58, + "learning_rate": 1.89273826824809e-05, + "loss": 0.1745, + "step": 12440 + }, + { + "epoch": 0.58, + "learning_rate": 1.892691388120576e-05, + "loss": 0.344, + "step": 12445 + }, + { + "epoch": 0.58, + "learning_rate": 1.892644507993062e-05, + "loss": 0.4228, + "step": 12450 + }, + { + "epoch": 0.58, + "learning_rate": 1.8925976278655482e-05, + "loss": 0.2344, + "step": 12455 + }, + { + "epoch": 0.58, + "learning_rate": 1.8925507477380342e-05, + "loss": 0.0847, + "step": 12460 + }, + { + "epoch": 0.58, + "learning_rate": 1.89250386761052e-05, + "loss": 0.1617, + "step": 12465 + }, + { + "epoch": 0.58, + "learning_rate": 1.892456987483006e-05, + "loss": 0.1262, + "step": 12470 + }, + { + "epoch": 0.58, + "learning_rate": 1.892410107355492e-05, + "loss": 0.1367, + "step": 12475 + }, + { + "epoch": 0.58, + "learning_rate": 1.892363227227978e-05, + "loss": 0.2337, + "step": 12480 + }, + { + "epoch": 0.58, + "learning_rate": 1.892316347100464e-05, + "loss": 0.2775, + "step": 12485 + }, + { + "epoch": 0.58, + "learning_rate": 1.89226946697295e-05, + "loss": 0.3666, + "step": 12490 + }, + { + "epoch": 0.58, + "learning_rate": 1.8922225868454365e-05, + "loss": 0.3938, + "step": 12495 + }, + { + "epoch": 0.58, + "learning_rate": 1.8921757067179225e-05, + "loss": 0.6377, + "step": 12500 + }, + { + "epoch": 0.58, + "learning_rate": 1.8921288265904085e-05, + "loss": 0.2246, + "step": 12505 + }, + { + "epoch": 0.58, + "learning_rate": 1.8920819464628944e-05, + "loss": 0.074, + "step": 12510 + }, + { + "epoch": 0.58, + "learning_rate": 1.8920350663353804e-05, + "loss": 0.1685, + "step": 12515 + }, + { + "epoch": 0.58, + "learning_rate": 1.8919881862078668e-05, + "loss": 0.1246, + "step": 12520 + }, + { + "epoch": 0.58, + "learning_rate": 1.8919413060803528e-05, + "loss": 0.1052, + "step": 12525 + }, + { + "epoch": 0.58, + "learning_rate": 1.8918944259528388e-05, + "loss": 0.2717, + "step": 12530 + }, + { + "epoch": 0.58, + "learning_rate": 1.8918475458253248e-05, + "loss": 0.1854, + "step": 12535 + }, + { + "epoch": 0.59, + "learning_rate": 1.891800665697811e-05, + "loss": 0.3528, + "step": 12540 + }, + { + "epoch": 0.59, + "learning_rate": 1.891753785570297e-05, + "loss": 0.1803, + "step": 12545 + }, + { + "epoch": 0.59, + "learning_rate": 1.891706905442783e-05, + "loss": 0.5755, + "step": 12550 + }, + { + "epoch": 0.59, + "learning_rate": 1.891660025315269e-05, + "loss": 0.2829, + "step": 12555 + }, + { + "epoch": 0.59, + "learning_rate": 1.891613145187755e-05, + "loss": 0.0912, + "step": 12560 + }, + { + "epoch": 0.59, + "learning_rate": 1.891566265060241e-05, + "loss": 0.1055, + "step": 12565 + }, + { + "epoch": 0.59, + "learning_rate": 1.891519384932727e-05, + "loss": 0.1751, + "step": 12570 + }, + { + "epoch": 0.59, + "learning_rate": 1.891472504805213e-05, + "loss": 0.1399, + "step": 12575 + }, + { + "epoch": 0.59, + "learning_rate": 1.891425624677699e-05, + "loss": 0.1935, + "step": 12580 + }, + { + "epoch": 0.59, + "learning_rate": 1.8913787445501854e-05, + "loss": 0.2453, + "step": 12585 + }, + { + "epoch": 0.59, + "learning_rate": 1.8913318644226714e-05, + "loss": 0.1864, + "step": 12590 + }, + { + "epoch": 0.59, + "learning_rate": 1.8912849842951574e-05, + "loss": 0.3226, + "step": 12595 + }, + { + "epoch": 0.59, + "learning_rate": 1.8912381041676437e-05, + "loss": 0.3684, + "step": 12600 + }, + { + "epoch": 0.59, + "learning_rate": 1.8911912240401297e-05, + "loss": 0.2648, + "step": 12605 + }, + { + "epoch": 0.59, + "learning_rate": 1.8911443439126157e-05, + "loss": 0.061, + "step": 12610 + }, + { + "epoch": 0.59, + "learning_rate": 1.8910974637851017e-05, + "loss": 0.1064, + "step": 12615 + }, + { + "epoch": 0.59, + "learning_rate": 1.8910505836575877e-05, + "loss": 0.1771, + "step": 12620 + }, + { + "epoch": 0.59, + "learning_rate": 1.8910037035300736e-05, + "loss": 0.1259, + "step": 12625 + }, + { + "epoch": 0.59, + "learning_rate": 1.8909568234025596e-05, + "loss": 0.1778, + "step": 12630 + }, + { + "epoch": 0.59, + "learning_rate": 1.890909943275046e-05, + "loss": 0.1552, + "step": 12635 + }, + { + "epoch": 0.59, + "learning_rate": 1.890863063147532e-05, + "loss": 0.2755, + "step": 12640 + }, + { + "epoch": 0.59, + "learning_rate": 1.890816183020018e-05, + "loss": 0.3597, + "step": 12645 + }, + { + "epoch": 0.59, + "learning_rate": 1.890769302892504e-05, + "loss": 0.6109, + "step": 12650 + }, + { + "epoch": 0.59, + "learning_rate": 1.89072242276499e-05, + "loss": 0.2731, + "step": 12655 + }, + { + "epoch": 0.59, + "learning_rate": 1.890675542637476e-05, + "loss": 0.1214, + "step": 12660 + }, + { + "epoch": 0.59, + "learning_rate": 1.8906286625099623e-05, + "loss": 0.1173, + "step": 12665 + }, + { + "epoch": 0.59, + "learning_rate": 1.8905817823824483e-05, + "loss": 0.1032, + "step": 12670 + }, + { + "epoch": 0.59, + "learning_rate": 1.8905349022549343e-05, + "loss": 0.2092, + "step": 12675 + }, + { + "epoch": 0.59, + "learning_rate": 1.8904880221274206e-05, + "loss": 0.2052, + "step": 12680 + }, + { + "epoch": 0.59, + "learning_rate": 1.8904411419999066e-05, + "loss": 0.3043, + "step": 12685 + }, + { + "epoch": 0.59, + "learning_rate": 1.8903942618723926e-05, + "loss": 0.3043, + "step": 12690 + }, + { + "epoch": 0.59, + "learning_rate": 1.8903473817448786e-05, + "loss": 0.3598, + "step": 12695 + }, + { + "epoch": 0.59, + "learning_rate": 1.8903005016173646e-05, + "loss": 0.5739, + "step": 12700 + }, + { + "epoch": 0.59, + "learning_rate": 1.8902536214898506e-05, + "loss": 0.1942, + "step": 12705 + }, + { + "epoch": 0.59, + "learning_rate": 1.8902067413623366e-05, + "loss": 0.0969, + "step": 12710 + }, + { + "epoch": 0.59, + "learning_rate": 1.8901598612348225e-05, + "loss": 0.121, + "step": 12715 + }, + { + "epoch": 0.59, + "learning_rate": 1.8901129811073085e-05, + "loss": 0.1946, + "step": 12720 + }, + { + "epoch": 0.59, + "learning_rate": 1.890066100979795e-05, + "loss": 0.1054, + "step": 12725 + }, + { + "epoch": 0.59, + "learning_rate": 1.890019220852281e-05, + "loss": 0.2131, + "step": 12730 + }, + { + "epoch": 0.59, + "learning_rate": 1.889972340724767e-05, + "loss": 0.3432, + "step": 12735 + }, + { + "epoch": 0.59, + "learning_rate": 1.889925460597253e-05, + "loss": 0.3533, + "step": 12740 + }, + { + "epoch": 0.59, + "learning_rate": 1.8898785804697392e-05, + "loss": 0.2782, + "step": 12745 + }, + { + "epoch": 0.59, + "learning_rate": 1.8898317003422252e-05, + "loss": 1.0246, + "step": 12750 + }, + { + "epoch": 0.6, + "learning_rate": 1.8897848202147112e-05, + "loss": 0.2875, + "step": 12755 + }, + { + "epoch": 0.6, + "learning_rate": 1.889737940087197e-05, + "loss": 0.0689, + "step": 12760 + }, + { + "epoch": 0.6, + "learning_rate": 1.889691059959683e-05, + "loss": 0.1025, + "step": 12765 + }, + { + "epoch": 0.6, + "learning_rate": 1.889644179832169e-05, + "loss": 0.1406, + "step": 12770 + }, + { + "epoch": 0.6, + "learning_rate": 1.8895972997046555e-05, + "loss": 0.1504, + "step": 12775 + }, + { + "epoch": 0.6, + "learning_rate": 1.8895504195771415e-05, + "loss": 0.2003, + "step": 12780 + }, + { + "epoch": 0.6, + "learning_rate": 1.8895035394496275e-05, + "loss": 0.2294, + "step": 12785 + }, + { + "epoch": 0.6, + "learning_rate": 1.8894566593221135e-05, + "loss": 0.2264, + "step": 12790 + }, + { + "epoch": 0.6, + "learning_rate": 1.8894097791945995e-05, + "loss": 0.3517, + "step": 12795 + }, + { + "epoch": 0.6, + "learning_rate": 1.8893628990670854e-05, + "loss": 0.6386, + "step": 12800 + }, + { + "epoch": 0.6, + "learning_rate": 1.8893160189395718e-05, + "loss": 0.2141, + "step": 12805 + }, + { + "epoch": 0.6, + "learning_rate": 1.8892691388120578e-05, + "loss": 0.1371, + "step": 12810 + }, + { + "epoch": 0.6, + "learning_rate": 1.8892222586845438e-05, + "loss": 0.119, + "step": 12815 + }, + { + "epoch": 0.6, + "learning_rate": 1.8891753785570298e-05, + "loss": 0.1502, + "step": 12820 + }, + { + "epoch": 0.6, + "learning_rate": 1.889128498429516e-05, + "loss": 0.1665, + "step": 12825 + }, + { + "epoch": 0.6, + "learning_rate": 1.889081618302002e-05, + "loss": 0.1954, + "step": 12830 + }, + { + "epoch": 0.6, + "learning_rate": 1.889034738174488e-05, + "loss": 0.2543, + "step": 12835 + }, + { + "epoch": 0.6, + "learning_rate": 1.888987858046974e-05, + "loss": 0.2318, + "step": 12840 + }, + { + "epoch": 0.6, + "learning_rate": 1.88894097791946e-05, + "loss": 0.3885, + "step": 12845 + }, + { + "epoch": 0.6, + "learning_rate": 1.888894097791946e-05, + "loss": 0.4542, + "step": 12850 + }, + { + "epoch": 0.6, + "learning_rate": 1.888847217664432e-05, + "loss": 0.2372, + "step": 12855 + }, + { + "epoch": 0.6, + "learning_rate": 1.888800337536918e-05, + "loss": 0.1154, + "step": 12860 + }, + { + "epoch": 0.6, + "learning_rate": 1.8887534574094044e-05, + "loss": 0.0905, + "step": 12865 + }, + { + "epoch": 0.6, + "learning_rate": 1.8887065772818904e-05, + "loss": 0.1059, + "step": 12870 + }, + { + "epoch": 0.6, + "learning_rate": 1.8886596971543764e-05, + "loss": 0.2216, + "step": 12875 + }, + { + "epoch": 0.6, + "learning_rate": 1.8886128170268624e-05, + "loss": 0.1879, + "step": 12880 + }, + { + "epoch": 0.6, + "learning_rate": 1.8885659368993487e-05, + "loss": 0.2195, + "step": 12885 + }, + { + "epoch": 0.6, + "learning_rate": 1.8885190567718347e-05, + "loss": 0.3195, + "step": 12890 + }, + { + "epoch": 0.6, + "learning_rate": 1.8884721766443207e-05, + "loss": 0.2832, + "step": 12895 + }, + { + "epoch": 0.6, + "learning_rate": 1.8884252965168067e-05, + "loss": 0.4605, + "step": 12900 + }, + { + "epoch": 0.6, + "learning_rate": 1.8883784163892927e-05, + "loss": 0.2373, + "step": 12905 + }, + { + "epoch": 0.6, + "learning_rate": 1.888331536261779e-05, + "loss": 0.1402, + "step": 12910 + }, + { + "epoch": 0.6, + "learning_rate": 1.888284656134265e-05, + "loss": 0.0874, + "step": 12915 + }, + { + "epoch": 0.6, + "learning_rate": 1.888237776006751e-05, + "loss": 0.1415, + "step": 12920 + }, + { + "epoch": 0.6, + "learning_rate": 1.888190895879237e-05, + "loss": 0.2131, + "step": 12925 + }, + { + "epoch": 0.6, + "learning_rate": 1.888144015751723e-05, + "loss": 0.1925, + "step": 12930 + }, + { + "epoch": 0.6, + "learning_rate": 1.888097135624209e-05, + "loss": 0.1838, + "step": 12935 + }, + { + "epoch": 0.6, + "learning_rate": 1.888050255496695e-05, + "loss": 0.2667, + "step": 12940 + }, + { + "epoch": 0.6, + "learning_rate": 1.888003375369181e-05, + "loss": 0.4606, + "step": 12945 + }, + { + "epoch": 0.6, + "learning_rate": 1.8879564952416673e-05, + "loss": 0.593, + "step": 12950 + }, + { + "epoch": 0.6, + "learning_rate": 1.8879096151141533e-05, + "loss": 0.2165, + "step": 12955 + }, + { + "epoch": 0.6, + "learning_rate": 1.8878627349866393e-05, + "loss": 0.1231, + "step": 12960 + }, + { + "epoch": 0.6, + "learning_rate": 1.8878158548591256e-05, + "loss": 0.1449, + "step": 12965 + }, + { + "epoch": 0.61, + "learning_rate": 1.8877689747316116e-05, + "loss": 0.1908, + "step": 12970 + }, + { + "epoch": 0.61, + "learning_rate": 1.8877220946040976e-05, + "loss": 0.1959, + "step": 12975 + }, + { + "epoch": 0.61, + "learning_rate": 1.8876752144765836e-05, + "loss": 0.264, + "step": 12980 + }, + { + "epoch": 0.61, + "learning_rate": 1.8876283343490696e-05, + "loss": 0.2641, + "step": 12985 + }, + { + "epoch": 0.61, + "learning_rate": 1.8875814542215556e-05, + "loss": 0.2006, + "step": 12990 + }, + { + "epoch": 0.61, + "learning_rate": 1.8875345740940416e-05, + "loss": 0.3727, + "step": 12995 + }, + { + "epoch": 0.61, + "learning_rate": 1.8874876939665276e-05, + "loss": 0.6469, + "step": 13000 + }, + { + "epoch": 0.61, + "learning_rate": 1.887440813839014e-05, + "loss": 0.1506, + "step": 13005 + }, + { + "epoch": 0.61, + "learning_rate": 1.8873939337115e-05, + "loss": 0.1014, + "step": 13010 + }, + { + "epoch": 0.61, + "learning_rate": 1.887347053583986e-05, + "loss": 0.1381, + "step": 13015 + }, + { + "epoch": 0.61, + "learning_rate": 1.887300173456472e-05, + "loss": 0.1419, + "step": 13020 + }, + { + "epoch": 0.61, + "learning_rate": 1.887253293328958e-05, + "loss": 0.1822, + "step": 13025 + }, + { + "epoch": 0.61, + "learning_rate": 1.8872064132014442e-05, + "loss": 0.2727, + "step": 13030 + }, + { + "epoch": 0.61, + "learning_rate": 1.8871595330739302e-05, + "loss": 0.2228, + "step": 13035 + }, + { + "epoch": 0.61, + "learning_rate": 1.8871126529464162e-05, + "loss": 0.2866, + "step": 13040 + }, + { + "epoch": 0.61, + "learning_rate": 1.8870657728189022e-05, + "loss": 0.2962, + "step": 13045 + }, + { + "epoch": 0.61, + "learning_rate": 1.8870188926913885e-05, + "loss": 0.5063, + "step": 13050 + }, + { + "epoch": 0.61, + "learning_rate": 1.8869720125638745e-05, + "loss": 0.2182, + "step": 13055 + }, + { + "epoch": 0.61, + "learning_rate": 1.8869251324363605e-05, + "loss": 0.168, + "step": 13060 + }, + { + "epoch": 0.61, + "learning_rate": 1.8868782523088465e-05, + "loss": 0.0722, + "step": 13065 + }, + { + "epoch": 0.61, + "learning_rate": 1.8868313721813325e-05, + "loss": 0.1356, + "step": 13070 + }, + { + "epoch": 0.61, + "learning_rate": 1.8867844920538185e-05, + "loss": 0.2862, + "step": 13075 + }, + { + "epoch": 0.61, + "learning_rate": 1.8867376119263045e-05, + "loss": 0.1236, + "step": 13080 + }, + { + "epoch": 0.61, + "learning_rate": 1.8866907317987905e-05, + "loss": 0.2837, + "step": 13085 + }, + { + "epoch": 0.61, + "learning_rate": 1.8866438516712765e-05, + "loss": 0.329, + "step": 13090 + }, + { + "epoch": 0.61, + "learning_rate": 1.8865969715437628e-05, + "loss": 0.3582, + "step": 13095 + }, + { + "epoch": 0.61, + "learning_rate": 1.8865500914162488e-05, + "loss": 0.5645, + "step": 13100 + }, + { + "epoch": 0.61, + "learning_rate": 1.8865032112887348e-05, + "loss": 0.2567, + "step": 13105 + }, + { + "epoch": 0.61, + "learning_rate": 1.886456331161221e-05, + "loss": 0.1308, + "step": 13110 + }, + { + "epoch": 0.61, + "learning_rate": 1.886409451033707e-05, + "loss": 0.1187, + "step": 13115 + }, + { + "epoch": 0.61, + "learning_rate": 1.886362570906193e-05, + "loss": 0.1349, + "step": 13120 + }, + { + "epoch": 0.61, + "learning_rate": 1.886315690778679e-05, + "loss": 0.1611, + "step": 13125 + }, + { + "epoch": 0.61, + "learning_rate": 1.886268810651165e-05, + "loss": 0.1943, + "step": 13130 + }, + { + "epoch": 0.61, + "learning_rate": 1.886221930523651e-05, + "loss": 0.1838, + "step": 13135 + }, + { + "epoch": 0.61, + "learning_rate": 1.886175050396137e-05, + "loss": 0.2555, + "step": 13140 + }, + { + "epoch": 0.61, + "learning_rate": 1.8861281702686234e-05, + "loss": 0.3234, + "step": 13145 + }, + { + "epoch": 0.61, + "learning_rate": 1.8860812901411094e-05, + "loss": 0.5682, + "step": 13150 + }, + { + "epoch": 0.61, + "learning_rate": 1.8860344100135954e-05, + "loss": 0.2446, + "step": 13155 + }, + { + "epoch": 0.61, + "learning_rate": 1.8859875298860814e-05, + "loss": 0.1032, + "step": 13160 + }, + { + "epoch": 0.61, + "learning_rate": 1.8859406497585674e-05, + "loss": 0.0889, + "step": 13165 + }, + { + "epoch": 0.61, + "learning_rate": 1.8858937696310534e-05, + "loss": 0.2059, + "step": 13170 + }, + { + "epoch": 0.61, + "learning_rate": 1.8858468895035397e-05, + "loss": 0.1573, + "step": 13175 + }, + { + "epoch": 0.61, + "learning_rate": 1.8858000093760257e-05, + "loss": 0.1783, + "step": 13180 + }, + { + "epoch": 0.62, + "learning_rate": 1.8857531292485117e-05, + "loss": 0.2177, + "step": 13185 + }, + { + "epoch": 0.62, + "learning_rate": 1.885706249120998e-05, + "loss": 0.24, + "step": 13190 + }, + { + "epoch": 0.62, + "learning_rate": 1.885659368993484e-05, + "loss": 0.362, + "step": 13195 + }, + { + "epoch": 0.62, + "learning_rate": 1.88561248886597e-05, + "loss": 0.5683, + "step": 13200 + }, + { + "epoch": 0.62, + "learning_rate": 1.885565608738456e-05, + "loss": 0.254, + "step": 13205 + }, + { + "epoch": 0.62, + "learning_rate": 1.885518728610942e-05, + "loss": 0.1012, + "step": 13210 + }, + { + "epoch": 0.62, + "learning_rate": 1.885471848483428e-05, + "loss": 0.1314, + "step": 13215 + }, + { + "epoch": 0.62, + "learning_rate": 1.885424968355914e-05, + "loss": 0.1051, + "step": 13220 + }, + { + "epoch": 0.62, + "learning_rate": 1.8853780882284e-05, + "loss": 0.1469, + "step": 13225 + }, + { + "epoch": 0.62, + "learning_rate": 1.885331208100886e-05, + "loss": 0.1632, + "step": 13230 + }, + { + "epoch": 0.62, + "learning_rate": 1.8852843279733723e-05, + "loss": 0.2104, + "step": 13235 + }, + { + "epoch": 0.62, + "learning_rate": 1.8852374478458583e-05, + "loss": 0.3246, + "step": 13240 + }, + { + "epoch": 0.62, + "learning_rate": 1.8851905677183443e-05, + "loss": 0.213, + "step": 13245 + }, + { + "epoch": 0.62, + "learning_rate": 1.8851436875908303e-05, + "loss": 0.5547, + "step": 13250 + }, + { + "epoch": 0.62, + "learning_rate": 1.8850968074633166e-05, + "loss": 0.2415, + "step": 13255 + }, + { + "epoch": 0.62, + "learning_rate": 1.8850499273358026e-05, + "loss": 0.0852, + "step": 13260 + }, + { + "epoch": 0.62, + "learning_rate": 1.8850030472082886e-05, + "loss": 0.0872, + "step": 13265 + }, + { + "epoch": 0.62, + "learning_rate": 1.8849561670807746e-05, + "loss": 0.1391, + "step": 13270 + }, + { + "epoch": 0.62, + "learning_rate": 1.8849092869532606e-05, + "loss": 0.0998, + "step": 13275 + }, + { + "epoch": 0.62, + "learning_rate": 1.8848624068257466e-05, + "loss": 0.1616, + "step": 13280 + }, + { + "epoch": 0.62, + "learning_rate": 1.884815526698233e-05, + "loss": 0.1745, + "step": 13285 + }, + { + "epoch": 0.62, + "learning_rate": 1.884768646570719e-05, + "loss": 0.3633, + "step": 13290 + }, + { + "epoch": 0.62, + "learning_rate": 1.884721766443205e-05, + "loss": 0.4456, + "step": 13295 + }, + { + "epoch": 0.62, + "learning_rate": 1.884674886315691e-05, + "loss": 0.6131, + "step": 13300 + }, + { + "epoch": 0.62, + "learning_rate": 1.884628006188177e-05, + "loss": 0.2623, + "step": 13305 + }, + { + "epoch": 0.62, + "learning_rate": 1.884581126060663e-05, + "loss": 0.0944, + "step": 13310 + }, + { + "epoch": 0.62, + "learning_rate": 1.8845342459331492e-05, + "loss": 0.1549, + "step": 13315 + }, + { + "epoch": 0.62, + "learning_rate": 1.8844873658056352e-05, + "loss": 0.145, + "step": 13320 + }, + { + "epoch": 0.62, + "learning_rate": 1.8844404856781212e-05, + "loss": 0.1983, + "step": 13325 + }, + { + "epoch": 0.62, + "learning_rate": 1.8843936055506072e-05, + "loss": 0.263, + "step": 13330 + }, + { + "epoch": 0.62, + "learning_rate": 1.8843467254230935e-05, + "loss": 0.2859, + "step": 13335 + }, + { + "epoch": 0.62, + "learning_rate": 1.8842998452955795e-05, + "loss": 0.2764, + "step": 13340 + }, + { + "epoch": 0.62, + "learning_rate": 1.8842529651680655e-05, + "loss": 0.3232, + "step": 13345 + }, + { + "epoch": 0.62, + "learning_rate": 1.8842060850405515e-05, + "loss": 0.5258, + "step": 13350 + }, + { + "epoch": 0.62, + "learning_rate": 1.8841592049130375e-05, + "loss": 0.2526, + "step": 13355 + }, + { + "epoch": 0.62, + "learning_rate": 1.8841123247855235e-05, + "loss": 0.0839, + "step": 13360 + }, + { + "epoch": 0.62, + "learning_rate": 1.8840654446580095e-05, + "loss": 0.1898, + "step": 13365 + }, + { + "epoch": 0.62, + "learning_rate": 1.8840185645304955e-05, + "loss": 0.1795, + "step": 13370 + }, + { + "epoch": 0.62, + "learning_rate": 1.8839716844029818e-05, + "loss": 0.1649, + "step": 13375 + }, + { + "epoch": 0.62, + "learning_rate": 1.8839248042754678e-05, + "loss": 0.2422, + "step": 13380 + }, + { + "epoch": 0.62, + "learning_rate": 1.8838779241479538e-05, + "loss": 0.2588, + "step": 13385 + }, + { + "epoch": 0.62, + "learning_rate": 1.8838310440204398e-05, + "loss": 0.2336, + "step": 13390 + }, + { + "epoch": 0.63, + "learning_rate": 1.883784163892926e-05, + "loss": 0.3311, + "step": 13395 + }, + { + "epoch": 0.63, + "learning_rate": 1.883737283765412e-05, + "loss": 0.5475, + "step": 13400 + }, + { + "epoch": 0.63, + "learning_rate": 1.883690403637898e-05, + "loss": 0.3553, + "step": 13405 + }, + { + "epoch": 0.63, + "learning_rate": 1.883643523510384e-05, + "loss": 0.1288, + "step": 13410 + }, + { + "epoch": 0.63, + "learning_rate": 1.88359664338287e-05, + "loss": 0.1275, + "step": 13415 + }, + { + "epoch": 0.63, + "learning_rate": 1.883549763255356e-05, + "loss": 0.1297, + "step": 13420 + }, + { + "epoch": 0.63, + "learning_rate": 1.8835028831278424e-05, + "loss": 0.1836, + "step": 13425 + }, + { + "epoch": 0.63, + "learning_rate": 1.8834560030003284e-05, + "loss": 0.2246, + "step": 13430 + }, + { + "epoch": 0.63, + "learning_rate": 1.8834091228728144e-05, + "loss": 0.1759, + "step": 13435 + }, + { + "epoch": 0.63, + "learning_rate": 1.8833622427453004e-05, + "loss": 0.2417, + "step": 13440 + }, + { + "epoch": 0.63, + "learning_rate": 1.8833153626177864e-05, + "loss": 0.3051, + "step": 13445 + }, + { + "epoch": 0.63, + "learning_rate": 1.8832684824902724e-05, + "loss": 0.5476, + "step": 13450 + }, + { + "epoch": 0.63, + "learning_rate": 1.8832216023627584e-05, + "loss": 0.2013, + "step": 13455 + }, + { + "epoch": 0.63, + "learning_rate": 1.8831747222352447e-05, + "loss": 0.0828, + "step": 13460 + }, + { + "epoch": 0.63, + "learning_rate": 1.8831278421077307e-05, + "loss": 0.0529, + "step": 13465 + }, + { + "epoch": 0.63, + "learning_rate": 1.8830809619802167e-05, + "loss": 0.0868, + "step": 13470 + }, + { + "epoch": 0.63, + "learning_rate": 1.883034081852703e-05, + "loss": 0.1599, + "step": 13475 + }, + { + "epoch": 0.63, + "learning_rate": 1.882987201725189e-05, + "loss": 0.2012, + "step": 13480 + }, + { + "epoch": 0.63, + "learning_rate": 1.882940321597675e-05, + "loss": 0.2599, + "step": 13485 + }, + { + "epoch": 0.63, + "learning_rate": 1.882893441470161e-05, + "loss": 0.1924, + "step": 13490 + }, + { + "epoch": 0.63, + "learning_rate": 1.882846561342647e-05, + "loss": 0.4321, + "step": 13495 + }, + { + "epoch": 0.63, + "learning_rate": 1.882799681215133e-05, + "loss": 0.6084, + "step": 13500 + }, + { + "epoch": 0.63, + "learning_rate": 1.882752801087619e-05, + "loss": 0.1664, + "step": 13505 + }, + { + "epoch": 0.63, + "learning_rate": 1.882705920960105e-05, + "loss": 0.1051, + "step": 13510 + }, + { + "epoch": 0.63, + "learning_rate": 1.8826590408325913e-05, + "loss": 0.1545, + "step": 13515 + }, + { + "epoch": 0.63, + "learning_rate": 1.8826121607050773e-05, + "loss": 0.1605, + "step": 13520 + }, + { + "epoch": 0.63, + "learning_rate": 1.8825652805775633e-05, + "loss": 0.1598, + "step": 13525 + }, + { + "epoch": 0.63, + "learning_rate": 1.8825184004500493e-05, + "loss": 0.2838, + "step": 13530 + }, + { + "epoch": 0.63, + "learning_rate": 1.8824715203225353e-05, + "loss": 0.1525, + "step": 13535 + }, + { + "epoch": 0.63, + "learning_rate": 1.8824246401950216e-05, + "loss": 0.2664, + "step": 13540 + }, + { + "epoch": 0.63, + "learning_rate": 1.8823777600675076e-05, + "loss": 0.3345, + "step": 13545 + }, + { + "epoch": 0.63, + "learning_rate": 1.8823308799399936e-05, + "loss": 0.3239, + "step": 13550 + }, + { + "epoch": 0.63, + "learning_rate": 1.8822839998124796e-05, + "loss": 0.195, + "step": 13555 + }, + { + "epoch": 0.63, + "learning_rate": 1.882237119684966e-05, + "loss": 0.0748, + "step": 13560 + }, + { + "epoch": 0.63, + "learning_rate": 1.882190239557452e-05, + "loss": 0.1101, + "step": 13565 + }, + { + "epoch": 0.63, + "learning_rate": 1.882143359429938e-05, + "loss": 0.1743, + "step": 13570 + }, + { + "epoch": 0.63, + "learning_rate": 1.882096479302424e-05, + "loss": 0.2923, + "step": 13575 + }, + { + "epoch": 0.63, + "learning_rate": 1.88204959917491e-05, + "loss": 0.2246, + "step": 13580 + }, + { + "epoch": 0.63, + "learning_rate": 1.882002719047396e-05, + "loss": 0.3307, + "step": 13585 + }, + { + "epoch": 0.63, + "learning_rate": 1.881955838919882e-05, + "loss": 0.237, + "step": 13590 + }, + { + "epoch": 0.63, + "learning_rate": 1.881908958792368e-05, + "loss": 0.3441, + "step": 13595 + }, + { + "epoch": 0.63, + "learning_rate": 1.881862078664854e-05, + "loss": 0.6305, + "step": 13600 + }, + { + "epoch": 0.63, + "learning_rate": 1.8818151985373402e-05, + "loss": 0.234, + "step": 13605 + }, + { + "epoch": 0.64, + "learning_rate": 1.8817683184098262e-05, + "loss": 0.136, + "step": 13610 + }, + { + "epoch": 0.64, + "learning_rate": 1.8817214382823122e-05, + "loss": 0.1397, + "step": 13615 + }, + { + "epoch": 0.64, + "learning_rate": 1.8816745581547985e-05, + "loss": 0.1026, + "step": 13620 + }, + { + "epoch": 0.64, + "learning_rate": 1.8816276780272845e-05, + "loss": 0.2265, + "step": 13625 + }, + { + "epoch": 0.64, + "learning_rate": 1.8815807978997705e-05, + "loss": 0.2514, + "step": 13630 + }, + { + "epoch": 0.64, + "learning_rate": 1.8815339177722565e-05, + "loss": 0.1731, + "step": 13635 + }, + { + "epoch": 0.64, + "learning_rate": 1.8814870376447425e-05, + "loss": 0.1958, + "step": 13640 + }, + { + "epoch": 0.64, + "learning_rate": 1.8814401575172285e-05, + "loss": 0.3905, + "step": 13645 + }, + { + "epoch": 0.64, + "learning_rate": 1.8813932773897145e-05, + "loss": 0.573, + "step": 13650 + }, + { + "epoch": 0.64, + "learning_rate": 1.8813463972622008e-05, + "loss": 0.2444, + "step": 13655 + }, + { + "epoch": 0.64, + "learning_rate": 1.8812995171346868e-05, + "loss": 0.0534, + "step": 13660 + }, + { + "epoch": 0.64, + "learning_rate": 1.8812526370071728e-05, + "loss": 0.0949, + "step": 13665 + }, + { + "epoch": 0.64, + "learning_rate": 1.8812057568796588e-05, + "loss": 0.1562, + "step": 13670 + }, + { + "epoch": 0.64, + "learning_rate": 1.8811588767521448e-05, + "loss": 0.1673, + "step": 13675 + }, + { + "epoch": 0.64, + "learning_rate": 1.8811119966246308e-05, + "loss": 0.2063, + "step": 13680 + }, + { + "epoch": 0.64, + "learning_rate": 1.881065116497117e-05, + "loss": 0.1861, + "step": 13685 + }, + { + "epoch": 0.64, + "learning_rate": 1.881018236369603e-05, + "loss": 0.2188, + "step": 13690 + }, + { + "epoch": 0.64, + "learning_rate": 1.880971356242089e-05, + "loss": 0.3303, + "step": 13695 + }, + { + "epoch": 0.64, + "learning_rate": 1.8809244761145754e-05, + "loss": 0.6072, + "step": 13700 + }, + { + "epoch": 0.64, + "learning_rate": 1.8808775959870614e-05, + "loss": 0.2465, + "step": 13705 + }, + { + "epoch": 0.64, + "learning_rate": 1.8808307158595474e-05, + "loss": 0.0536, + "step": 13710 + }, + { + "epoch": 0.64, + "learning_rate": 1.8807838357320334e-05, + "loss": 0.0992, + "step": 13715 + }, + { + "epoch": 0.64, + "learning_rate": 1.8807369556045194e-05, + "loss": 0.1814, + "step": 13720 + }, + { + "epoch": 0.64, + "learning_rate": 1.8806900754770054e-05, + "loss": 0.1639, + "step": 13725 + }, + { + "epoch": 0.64, + "learning_rate": 1.8806431953494914e-05, + "loss": 0.174, + "step": 13730 + }, + { + "epoch": 0.64, + "learning_rate": 1.8805963152219774e-05, + "loss": 0.1975, + "step": 13735 + }, + { + "epoch": 0.64, + "learning_rate": 1.8805494350944634e-05, + "loss": 0.175, + "step": 13740 + }, + { + "epoch": 0.64, + "learning_rate": 1.8805025549669497e-05, + "loss": 0.3268, + "step": 13745 + }, + { + "epoch": 0.64, + "learning_rate": 1.8804556748394357e-05, + "loss": 0.582, + "step": 13750 + }, + { + "epoch": 0.64, + "learning_rate": 1.8804087947119217e-05, + "loss": 0.2182, + "step": 13755 + }, + { + "epoch": 0.64, + "learning_rate": 1.880361914584408e-05, + "loss": 0.093, + "step": 13760 + }, + { + "epoch": 0.64, + "learning_rate": 1.880315034456894e-05, + "loss": 0.1654, + "step": 13765 + }, + { + "epoch": 0.64, + "learning_rate": 1.88026815432938e-05, + "loss": 0.0881, + "step": 13770 + }, + { + "epoch": 0.64, + "learning_rate": 1.880221274201866e-05, + "loss": 0.1605, + "step": 13775 + }, + { + "epoch": 0.64, + "learning_rate": 1.880174394074352e-05, + "loss": 0.1614, + "step": 13780 + }, + { + "epoch": 0.64, + "learning_rate": 1.880127513946838e-05, + "loss": 0.1891, + "step": 13785 + }, + { + "epoch": 0.64, + "learning_rate": 1.880080633819324e-05, + "loss": 0.2944, + "step": 13790 + }, + { + "epoch": 0.64, + "learning_rate": 1.8800337536918103e-05, + "loss": 0.4841, + "step": 13795 + }, + { + "epoch": 0.64, + "learning_rate": 1.8799868735642963e-05, + "loss": 0.5165, + "step": 13800 + }, + { + "epoch": 0.64, + "learning_rate": 1.8799399934367823e-05, + "loss": 0.2663, + "step": 13805 + }, + { + "epoch": 0.64, + "learning_rate": 1.8798931133092683e-05, + "loss": 0.067, + "step": 13810 + }, + { + "epoch": 0.64, + "learning_rate": 1.8798462331817543e-05, + "loss": 0.1632, + "step": 13815 + }, + { + "epoch": 0.64, + "learning_rate": 1.8797993530542403e-05, + "loss": 0.0878, + "step": 13820 + }, + { + "epoch": 0.65, + "learning_rate": 1.8797524729267266e-05, + "loss": 0.1335, + "step": 13825 + }, + { + "epoch": 0.65, + "learning_rate": 1.8797055927992126e-05, + "loss": 0.2923, + "step": 13830 + }, + { + "epoch": 0.65, + "learning_rate": 1.8796587126716986e-05, + "loss": 0.2697, + "step": 13835 + }, + { + "epoch": 0.65, + "learning_rate": 1.879611832544185e-05, + "loss": 0.2409, + "step": 13840 + }, + { + "epoch": 0.65, + "learning_rate": 1.879564952416671e-05, + "loss": 0.3278, + "step": 13845 + }, + { + "epoch": 0.65, + "learning_rate": 1.879518072289157e-05, + "loss": 0.4615, + "step": 13850 + }, + { + "epoch": 0.65, + "learning_rate": 1.879471192161643e-05, + "loss": 0.2961, + "step": 13855 + }, + { + "epoch": 0.65, + "learning_rate": 1.879424312034129e-05, + "loss": 0.1467, + "step": 13860 + }, + { + "epoch": 0.65, + "learning_rate": 1.879377431906615e-05, + "loss": 0.0899, + "step": 13865 + }, + { + "epoch": 0.65, + "learning_rate": 1.879330551779101e-05, + "loss": 0.1482, + "step": 13870 + }, + { + "epoch": 0.65, + "learning_rate": 1.879283671651587e-05, + "loss": 0.1441, + "step": 13875 + }, + { + "epoch": 0.65, + "learning_rate": 1.879236791524073e-05, + "loss": 0.1818, + "step": 13880 + }, + { + "epoch": 0.65, + "learning_rate": 1.8791899113965592e-05, + "loss": 0.2261, + "step": 13885 + }, + { + "epoch": 0.65, + "learning_rate": 1.8791430312690452e-05, + "loss": 0.2463, + "step": 13890 + }, + { + "epoch": 0.65, + "learning_rate": 1.8790961511415312e-05, + "loss": 0.2855, + "step": 13895 + }, + { + "epoch": 0.65, + "learning_rate": 1.8790492710140172e-05, + "loss": 0.5662, + "step": 13900 + }, + { + "epoch": 0.65, + "learning_rate": 1.8790023908865035e-05, + "loss": 0.217, + "step": 13905 + }, + { + "epoch": 0.65, + "learning_rate": 1.8789555107589895e-05, + "loss": 0.1254, + "step": 13910 + }, + { + "epoch": 0.65, + "learning_rate": 1.8789086306314755e-05, + "loss": 0.1276, + "step": 13915 + }, + { + "epoch": 0.65, + "learning_rate": 1.8788617505039615e-05, + "loss": 0.1244, + "step": 13920 + }, + { + "epoch": 0.65, + "learning_rate": 1.8788148703764475e-05, + "loss": 0.1612, + "step": 13925 + }, + { + "epoch": 0.65, + "learning_rate": 1.8787679902489335e-05, + "loss": 0.2602, + "step": 13930 + }, + { + "epoch": 0.65, + "learning_rate": 1.87872111012142e-05, + "loss": 0.3629, + "step": 13935 + }, + { + "epoch": 0.65, + "learning_rate": 1.8786742299939058e-05, + "loss": 0.266, + "step": 13940 + }, + { + "epoch": 0.65, + "learning_rate": 1.8786273498663918e-05, + "loss": 0.3049, + "step": 13945 + }, + { + "epoch": 0.65, + "learning_rate": 1.8785804697388778e-05, + "loss": 0.5083, + "step": 13950 + }, + { + "epoch": 0.65, + "learning_rate": 1.8785335896113638e-05, + "loss": 0.2277, + "step": 13955 + }, + { + "epoch": 0.65, + "learning_rate": 1.8784867094838498e-05, + "loss": 0.1174, + "step": 13960 + }, + { + "epoch": 0.65, + "learning_rate": 1.8784398293563358e-05, + "loss": 0.0655, + "step": 13965 + }, + { + "epoch": 0.65, + "learning_rate": 1.878392949228822e-05, + "loss": 0.1587, + "step": 13970 + }, + { + "epoch": 0.65, + "learning_rate": 1.878346069101308e-05, + "loss": 0.1778, + "step": 13975 + }, + { + "epoch": 0.65, + "learning_rate": 1.878299188973794e-05, + "loss": 0.2082, + "step": 13980 + }, + { + "epoch": 0.65, + "learning_rate": 1.8782523088462804e-05, + "loss": 0.1945, + "step": 13985 + }, + { + "epoch": 0.65, + "learning_rate": 1.8782054287187664e-05, + "loss": 0.2287, + "step": 13990 + }, + { + "epoch": 0.65, + "learning_rate": 1.8781585485912524e-05, + "loss": 0.2369, + "step": 13995 + }, + { + "epoch": 0.65, + "learning_rate": 1.8781116684637384e-05, + "loss": 0.5127, + "step": 14000 + }, + { + "epoch": 0.65, + "learning_rate": 1.8780647883362244e-05, + "loss": 0.2929, + "step": 14005 + }, + { + "epoch": 0.65, + "learning_rate": 1.8780179082087104e-05, + "loss": 0.1198, + "step": 14010 + }, + { + "epoch": 0.65, + "learning_rate": 1.8779710280811964e-05, + "loss": 0.0986, + "step": 14015 + }, + { + "epoch": 0.65, + "learning_rate": 1.8779241479536824e-05, + "loss": 0.1393, + "step": 14020 + }, + { + "epoch": 0.65, + "learning_rate": 1.8778772678261687e-05, + "loss": 0.1961, + "step": 14025 + }, + { + "epoch": 0.65, + "learning_rate": 1.8778303876986547e-05, + "loss": 0.2312, + "step": 14030 + }, + { + "epoch": 0.65, + "learning_rate": 1.8777835075711407e-05, + "loss": 0.2327, + "step": 14035 + }, + { + "epoch": 0.66, + "learning_rate": 1.8777366274436267e-05, + "loss": 0.258, + "step": 14040 + }, + { + "epoch": 0.66, + "learning_rate": 1.8776897473161127e-05, + "loss": 0.4388, + "step": 14045 + }, + { + "epoch": 0.66, + "learning_rate": 1.877642867188599e-05, + "loss": 0.5567, + "step": 14050 + }, + { + "epoch": 0.66, + "learning_rate": 1.877595987061085e-05, + "loss": 0.222, + "step": 14055 + }, + { + "epoch": 0.66, + "learning_rate": 1.877549106933571e-05, + "loss": 0.0841, + "step": 14060 + }, + { + "epoch": 0.66, + "learning_rate": 1.877502226806057e-05, + "loss": 0.0847, + "step": 14065 + }, + { + "epoch": 0.66, + "learning_rate": 1.877455346678543e-05, + "loss": 0.181, + "step": 14070 + }, + { + "epoch": 0.66, + "learning_rate": 1.8774084665510293e-05, + "loss": 0.2742, + "step": 14075 + }, + { + "epoch": 0.66, + "learning_rate": 1.8773615864235153e-05, + "loss": 0.1208, + "step": 14080 + }, + { + "epoch": 0.66, + "learning_rate": 1.8773147062960013e-05, + "loss": 0.2701, + "step": 14085 + }, + { + "epoch": 0.66, + "learning_rate": 1.8772678261684873e-05, + "loss": 0.2529, + "step": 14090 + }, + { + "epoch": 0.66, + "learning_rate": 1.8772209460409733e-05, + "loss": 0.3087, + "step": 14095 + }, + { + "epoch": 0.66, + "learning_rate": 1.8771740659134593e-05, + "loss": 0.6273, + "step": 14100 + }, + { + "epoch": 0.66, + "learning_rate": 1.8771271857859453e-05, + "loss": 0.1981, + "step": 14105 + }, + { + "epoch": 0.66, + "learning_rate": 1.8770803056584316e-05, + "loss": 0.1016, + "step": 14110 + }, + { + "epoch": 0.66, + "learning_rate": 1.8770334255309176e-05, + "loss": 0.115, + "step": 14115 + }, + { + "epoch": 0.66, + "learning_rate": 1.8769865454034036e-05, + "loss": 0.07, + "step": 14120 + }, + { + "epoch": 0.66, + "learning_rate": 1.8769396652758896e-05, + "loss": 0.1167, + "step": 14125 + }, + { + "epoch": 0.66, + "learning_rate": 1.876892785148376e-05, + "loss": 0.2139, + "step": 14130 + }, + { + "epoch": 0.66, + "learning_rate": 1.876845905020862e-05, + "loss": 0.2074, + "step": 14135 + }, + { + "epoch": 0.66, + "learning_rate": 1.876799024893348e-05, + "loss": 0.173, + "step": 14140 + }, + { + "epoch": 0.66, + "learning_rate": 1.876752144765834e-05, + "loss": 0.3318, + "step": 14145 + }, + { + "epoch": 0.66, + "learning_rate": 1.87670526463832e-05, + "loss": 0.5468, + "step": 14150 + }, + { + "epoch": 0.66, + "learning_rate": 1.876658384510806e-05, + "loss": 0.242, + "step": 14155 + }, + { + "epoch": 0.66, + "learning_rate": 1.876611504383292e-05, + "loss": 0.0742, + "step": 14160 + }, + { + "epoch": 0.66, + "learning_rate": 1.8765646242557782e-05, + "loss": 0.0878, + "step": 14165 + }, + { + "epoch": 0.66, + "learning_rate": 1.8765177441282642e-05, + "loss": 0.157, + "step": 14170 + }, + { + "epoch": 0.66, + "learning_rate": 1.8764708640007502e-05, + "loss": 0.1693, + "step": 14175 + }, + { + "epoch": 0.66, + "learning_rate": 1.8764239838732362e-05, + "loss": 0.1899, + "step": 14180 + }, + { + "epoch": 0.66, + "learning_rate": 1.8763771037457222e-05, + "loss": 0.2461, + "step": 14185 + }, + { + "epoch": 0.66, + "learning_rate": 1.8763302236182085e-05, + "loss": 0.2921, + "step": 14190 + }, + { + "epoch": 0.66, + "learning_rate": 1.8762833434906945e-05, + "loss": 0.285, + "step": 14195 + }, + { + "epoch": 0.66, + "learning_rate": 1.8762364633631805e-05, + "loss": 0.4061, + "step": 14200 + }, + { + "epoch": 0.66, + "learning_rate": 1.8761895832356665e-05, + "loss": 0.2432, + "step": 14205 + }, + { + "epoch": 0.66, + "learning_rate": 1.876142703108153e-05, + "loss": 0.1719, + "step": 14210 + }, + { + "epoch": 0.66, + "learning_rate": 1.876095822980639e-05, + "loss": 0.1674, + "step": 14215 + }, + { + "epoch": 0.66, + "learning_rate": 1.876048942853125e-05, + "loss": 0.1117, + "step": 14220 + }, + { + "epoch": 0.66, + "learning_rate": 1.876002062725611e-05, + "loss": 0.1181, + "step": 14225 + }, + { + "epoch": 0.66, + "learning_rate": 1.875955182598097e-05, + "loss": 0.2447, + "step": 14230 + }, + { + "epoch": 0.66, + "learning_rate": 1.8759083024705828e-05, + "loss": 0.2381, + "step": 14235 + }, + { + "epoch": 0.66, + "learning_rate": 1.8758614223430688e-05, + "loss": 0.2788, + "step": 14240 + }, + { + "epoch": 0.66, + "learning_rate": 1.8758145422155548e-05, + "loss": 0.3321, + "step": 14245 + }, + { + "epoch": 0.66, + "learning_rate": 1.8757676620880408e-05, + "loss": 0.5291, + "step": 14250 + }, + { + "epoch": 0.67, + "learning_rate": 1.875720781960527e-05, + "loss": 0.1678, + "step": 14255 + }, + { + "epoch": 0.67, + "learning_rate": 1.875673901833013e-05, + "loss": 0.1219, + "step": 14260 + }, + { + "epoch": 0.67, + "learning_rate": 1.875627021705499e-05, + "loss": 0.0905, + "step": 14265 + }, + { + "epoch": 0.67, + "learning_rate": 1.8755801415779855e-05, + "loss": 0.2237, + "step": 14270 + }, + { + "epoch": 0.67, + "learning_rate": 1.8755332614504714e-05, + "loss": 0.1641, + "step": 14275 + }, + { + "epoch": 0.67, + "learning_rate": 1.8754863813229574e-05, + "loss": 0.2348, + "step": 14280 + }, + { + "epoch": 0.67, + "learning_rate": 1.8754395011954434e-05, + "loss": 0.1618, + "step": 14285 + }, + { + "epoch": 0.67, + "learning_rate": 1.8753926210679294e-05, + "loss": 0.2344, + "step": 14290 + }, + { + "epoch": 0.67, + "learning_rate": 1.8753457409404154e-05, + "loss": 0.3558, + "step": 14295 + }, + { + "epoch": 0.67, + "learning_rate": 1.8752988608129014e-05, + "loss": 0.456, + "step": 14300 + }, + { + "epoch": 0.67, + "learning_rate": 1.8752519806853877e-05, + "loss": 0.2746, + "step": 14305 + }, + { + "epoch": 0.67, + "learning_rate": 1.8752051005578737e-05, + "loss": 0.1039, + "step": 14310 + }, + { + "epoch": 0.67, + "learning_rate": 1.8751582204303597e-05, + "loss": 0.1487, + "step": 14315 + }, + { + "epoch": 0.67, + "learning_rate": 1.8751113403028457e-05, + "loss": 0.0682, + "step": 14320 + }, + { + "epoch": 0.67, + "learning_rate": 1.8750644601753317e-05, + "loss": 0.2572, + "step": 14325 + }, + { + "epoch": 0.67, + "learning_rate": 1.8750175800478177e-05, + "loss": 0.2256, + "step": 14330 + }, + { + "epoch": 0.67, + "learning_rate": 1.874970699920304e-05, + "loss": 0.2427, + "step": 14335 + }, + { + "epoch": 0.67, + "learning_rate": 1.87492381979279e-05, + "loss": 0.1959, + "step": 14340 + }, + { + "epoch": 0.67, + "learning_rate": 1.874876939665276e-05, + "loss": 0.3314, + "step": 14345 + }, + { + "epoch": 0.67, + "learning_rate": 1.8748300595377624e-05, + "loss": 0.7407, + "step": 14350 + }, + { + "epoch": 0.67, + "learning_rate": 1.8747831794102484e-05, + "loss": 0.1726, + "step": 14355 + }, + { + "epoch": 0.67, + "learning_rate": 1.8747362992827344e-05, + "loss": 0.1146, + "step": 14360 + }, + { + "epoch": 0.67, + "learning_rate": 1.8746894191552203e-05, + "loss": 0.0943, + "step": 14365 + }, + { + "epoch": 0.67, + "learning_rate": 1.8746425390277063e-05, + "loss": 0.143, + "step": 14370 + }, + { + "epoch": 0.67, + "learning_rate": 1.8745956589001923e-05, + "loss": 0.1744, + "step": 14375 + }, + { + "epoch": 0.67, + "learning_rate": 1.8745487787726783e-05, + "loss": 0.1772, + "step": 14380 + }, + { + "epoch": 0.67, + "learning_rate": 1.8745018986451643e-05, + "loss": 0.1932, + "step": 14385 + }, + { + "epoch": 0.67, + "learning_rate": 1.8744550185176503e-05, + "loss": 0.2547, + "step": 14390 + }, + { + "epoch": 0.67, + "learning_rate": 1.8744081383901363e-05, + "loss": 0.2008, + "step": 14395 + }, + { + "epoch": 0.67, + "learning_rate": 1.8743612582626226e-05, + "loss": 0.5431, + "step": 14400 + }, + { + "epoch": 0.67, + "learning_rate": 1.8743143781351086e-05, + "loss": 0.2268, + "step": 14405 + }, + { + "epoch": 0.67, + "learning_rate": 1.8742674980075946e-05, + "loss": 0.0781, + "step": 14410 + }, + { + "epoch": 0.67, + "learning_rate": 1.874220617880081e-05, + "loss": 0.1206, + "step": 14415 + }, + { + "epoch": 0.67, + "learning_rate": 1.874173737752567e-05, + "loss": 0.0975, + "step": 14420 + }, + { + "epoch": 0.67, + "learning_rate": 1.874126857625053e-05, + "loss": 0.193, + "step": 14425 + }, + { + "epoch": 0.67, + "learning_rate": 1.874079977497539e-05, + "loss": 0.1506, + "step": 14430 + }, + { + "epoch": 0.67, + "learning_rate": 1.874033097370025e-05, + "loss": 0.2245, + "step": 14435 + }, + { + "epoch": 0.67, + "learning_rate": 1.873986217242511e-05, + "loss": 0.2452, + "step": 14440 + }, + { + "epoch": 0.67, + "learning_rate": 1.8739393371149973e-05, + "loss": 0.3057, + "step": 14445 + }, + { + "epoch": 0.67, + "learning_rate": 1.8738924569874833e-05, + "loss": 0.7368, + "step": 14450 + }, + { + "epoch": 0.67, + "learning_rate": 1.8738455768599692e-05, + "loss": 0.2164, + "step": 14455 + }, + { + "epoch": 0.67, + "learning_rate": 1.8737986967324552e-05, + "loss": 0.0852, + "step": 14460 + }, + { + "epoch": 0.67, + "learning_rate": 1.8737518166049412e-05, + "loss": 0.1504, + "step": 14465 + }, + { + "epoch": 0.68, + "learning_rate": 1.8737049364774272e-05, + "loss": 0.2015, + "step": 14470 + }, + { + "epoch": 0.68, + "learning_rate": 1.8736580563499132e-05, + "loss": 0.1602, + "step": 14475 + }, + { + "epoch": 0.68, + "learning_rate": 1.8736111762223995e-05, + "loss": 0.2252, + "step": 14480 + }, + { + "epoch": 0.68, + "learning_rate": 1.8735642960948855e-05, + "loss": 0.1856, + "step": 14485 + }, + { + "epoch": 0.68, + "learning_rate": 1.8735174159673715e-05, + "loss": 0.2775, + "step": 14490 + }, + { + "epoch": 0.68, + "learning_rate": 1.873470535839858e-05, + "loss": 0.2782, + "step": 14495 + }, + { + "epoch": 0.68, + "learning_rate": 1.873423655712344e-05, + "loss": 0.5278, + "step": 14500 + }, + { + "epoch": 0.68, + "learning_rate": 1.87337677558483e-05, + "loss": 0.2166, + "step": 14505 + }, + { + "epoch": 0.68, + "learning_rate": 1.873329895457316e-05, + "loss": 0.1283, + "step": 14510 + }, + { + "epoch": 0.68, + "learning_rate": 1.873283015329802e-05, + "loss": 0.1561, + "step": 14515 + }, + { + "epoch": 0.68, + "learning_rate": 1.873236135202288e-05, + "loss": 0.1354, + "step": 14520 + }, + { + "epoch": 0.68, + "learning_rate": 1.8731892550747738e-05, + "loss": 0.1407, + "step": 14525 + }, + { + "epoch": 0.68, + "learning_rate": 1.8731423749472598e-05, + "loss": 0.2432, + "step": 14530 + }, + { + "epoch": 0.68, + "learning_rate": 1.8730954948197458e-05, + "loss": 0.2267, + "step": 14535 + }, + { + "epoch": 0.68, + "learning_rate": 1.873048614692232e-05, + "loss": 0.2128, + "step": 14540 + }, + { + "epoch": 0.68, + "learning_rate": 1.873001734564718e-05, + "loss": 0.3205, + "step": 14545 + }, + { + "epoch": 0.68, + "learning_rate": 1.872954854437204e-05, + "loss": 0.631, + "step": 14550 + }, + { + "epoch": 0.68, + "learning_rate": 1.87290797430969e-05, + "loss": 0.2592, + "step": 14555 + }, + { + "epoch": 0.68, + "learning_rate": 1.8728610941821765e-05, + "loss": 0.1638, + "step": 14560 + }, + { + "epoch": 0.68, + "learning_rate": 1.8728142140546625e-05, + "loss": 0.1215, + "step": 14565 + }, + { + "epoch": 0.68, + "learning_rate": 1.8727673339271484e-05, + "loss": 0.2601, + "step": 14570 + }, + { + "epoch": 0.68, + "learning_rate": 1.8727204537996344e-05, + "loss": 0.2216, + "step": 14575 + }, + { + "epoch": 0.68, + "learning_rate": 1.8726735736721204e-05, + "loss": 0.1664, + "step": 14580 + }, + { + "epoch": 0.68, + "learning_rate": 1.8726266935446068e-05, + "loss": 0.1903, + "step": 14585 + }, + { + "epoch": 0.68, + "learning_rate": 1.8725798134170928e-05, + "loss": 0.2214, + "step": 14590 + }, + { + "epoch": 0.68, + "learning_rate": 1.8725329332895788e-05, + "loss": 0.384, + "step": 14595 + }, + { + "epoch": 0.68, + "learning_rate": 1.8724860531620647e-05, + "loss": 0.6086, + "step": 14600 + }, + { + "epoch": 0.68, + "learning_rate": 1.8724391730345507e-05, + "loss": 0.1693, + "step": 14605 + }, + { + "epoch": 0.68, + "learning_rate": 1.8723922929070367e-05, + "loss": 0.0831, + "step": 14610 + }, + { + "epoch": 0.68, + "learning_rate": 1.8723454127795227e-05, + "loss": 0.1812, + "step": 14615 + }, + { + "epoch": 0.68, + "learning_rate": 1.872298532652009e-05, + "loss": 0.1165, + "step": 14620 + }, + { + "epoch": 0.68, + "learning_rate": 1.872251652524495e-05, + "loss": 0.1394, + "step": 14625 + }, + { + "epoch": 0.68, + "learning_rate": 1.872204772396981e-05, + "loss": 0.3595, + "step": 14630 + }, + { + "epoch": 0.68, + "learning_rate": 1.872157892269467e-05, + "loss": 0.2451, + "step": 14635 + }, + { + "epoch": 0.68, + "learning_rate": 1.8721110121419534e-05, + "loss": 0.2951, + "step": 14640 + }, + { + "epoch": 0.68, + "learning_rate": 1.8720641320144394e-05, + "loss": 0.4274, + "step": 14645 + }, + { + "epoch": 0.68, + "learning_rate": 1.8720172518869254e-05, + "loss": 0.5065, + "step": 14650 + }, + { + "epoch": 0.68, + "learning_rate": 1.8719703717594113e-05, + "loss": 0.2857, + "step": 14655 + }, + { + "epoch": 0.68, + "learning_rate": 1.8719234916318973e-05, + "loss": 0.0604, + "step": 14660 + }, + { + "epoch": 0.68, + "learning_rate": 1.8718766115043833e-05, + "loss": 0.1006, + "step": 14665 + }, + { + "epoch": 0.68, + "learning_rate": 1.8718297313768693e-05, + "loss": 0.1257, + "step": 14670 + }, + { + "epoch": 0.68, + "learning_rate": 1.8717828512493557e-05, + "loss": 0.2122, + "step": 14675 + }, + { + "epoch": 0.68, + "learning_rate": 1.8717359711218417e-05, + "loss": 0.2196, + "step": 14680 + }, + { + "epoch": 0.69, + "learning_rate": 1.8716890909943276e-05, + "loss": 0.2033, + "step": 14685 + }, + { + "epoch": 0.69, + "learning_rate": 1.8716422108668136e-05, + "loss": 0.2635, + "step": 14690 + }, + { + "epoch": 0.69, + "learning_rate": 1.8715953307392996e-05, + "loss": 0.3508, + "step": 14695 + }, + { + "epoch": 0.69, + "learning_rate": 1.871548450611786e-05, + "loss": 0.5574, + "step": 14700 + }, + { + "epoch": 0.69, + "learning_rate": 1.871501570484272e-05, + "loss": 0.177, + "step": 14705 + }, + { + "epoch": 0.69, + "learning_rate": 1.871454690356758e-05, + "loss": 0.0636, + "step": 14710 + }, + { + "epoch": 0.69, + "learning_rate": 1.871407810229244e-05, + "loss": 0.1045, + "step": 14715 + }, + { + "epoch": 0.69, + "learning_rate": 1.87136093010173e-05, + "loss": 0.2398, + "step": 14720 + }, + { + "epoch": 0.69, + "learning_rate": 1.8713140499742163e-05, + "loss": 0.169, + "step": 14725 + }, + { + "epoch": 0.69, + "learning_rate": 1.8712671698467023e-05, + "loss": 0.1389, + "step": 14730 + }, + { + "epoch": 0.69, + "learning_rate": 1.8712202897191883e-05, + "loss": 0.1491, + "step": 14735 + }, + { + "epoch": 0.69, + "learning_rate": 1.8711734095916743e-05, + "loss": 0.2459, + "step": 14740 + }, + { + "epoch": 0.69, + "learning_rate": 1.8711265294641602e-05, + "loss": 0.3591, + "step": 14745 + }, + { + "epoch": 0.69, + "learning_rate": 1.8710796493366462e-05, + "loss": 0.639, + "step": 14750 + }, + { + "epoch": 0.69, + "learning_rate": 1.8710327692091322e-05, + "loss": 0.1863, + "step": 14755 + }, + { + "epoch": 0.69, + "learning_rate": 1.8709858890816182e-05, + "loss": 0.113, + "step": 14760 + }, + { + "epoch": 0.69, + "learning_rate": 1.8709390089541046e-05, + "loss": 0.134, + "step": 14765 + }, + { + "epoch": 0.69, + "learning_rate": 1.8708921288265906e-05, + "loss": 0.1618, + "step": 14770 + }, + { + "epoch": 0.69, + "learning_rate": 1.8708452486990765e-05, + "loss": 0.1839, + "step": 14775 + }, + { + "epoch": 0.69, + "learning_rate": 1.870798368571563e-05, + "loss": 0.2527, + "step": 14780 + }, + { + "epoch": 0.69, + "learning_rate": 1.870751488444049e-05, + "loss": 0.2761, + "step": 14785 + }, + { + "epoch": 0.69, + "learning_rate": 1.870704608316535e-05, + "loss": 0.2639, + "step": 14790 + }, + { + "epoch": 0.69, + "learning_rate": 1.870657728189021e-05, + "loss": 0.3858, + "step": 14795 + }, + { + "epoch": 0.69, + "learning_rate": 1.870610848061507e-05, + "loss": 0.4675, + "step": 14800 + }, + { + "epoch": 0.69, + "learning_rate": 1.870563967933993e-05, + "loss": 0.2161, + "step": 14805 + }, + { + "epoch": 0.69, + "learning_rate": 1.870517087806479e-05, + "loss": 0.0642, + "step": 14810 + }, + { + "epoch": 0.69, + "learning_rate": 1.8704702076789652e-05, + "loss": 0.1011, + "step": 14815 + }, + { + "epoch": 0.69, + "learning_rate": 1.870423327551451e-05, + "loss": 0.1166, + "step": 14820 + }, + { + "epoch": 0.69, + "learning_rate": 1.870376447423937e-05, + "loss": 0.1504, + "step": 14825 + }, + { + "epoch": 0.69, + "learning_rate": 1.870329567296423e-05, + "loss": 0.2044, + "step": 14830 + }, + { + "epoch": 0.69, + "learning_rate": 1.870282687168909e-05, + "loss": 0.1632, + "step": 14835 + }, + { + "epoch": 0.69, + "learning_rate": 1.870235807041395e-05, + "loss": 0.3202, + "step": 14840 + }, + { + "epoch": 0.69, + "learning_rate": 1.8701889269138815e-05, + "loss": 0.3161, + "step": 14845 + }, + { + "epoch": 0.69, + "learning_rate": 1.8701420467863675e-05, + "loss": 0.5072, + "step": 14850 + }, + { + "epoch": 0.69, + "learning_rate": 1.8700951666588535e-05, + "loss": 0.1994, + "step": 14855 + }, + { + "epoch": 0.69, + "learning_rate": 1.8700482865313394e-05, + "loss": 0.1046, + "step": 14860 + }, + { + "epoch": 0.69, + "learning_rate": 1.8700014064038258e-05, + "loss": 0.1125, + "step": 14865 + }, + { + "epoch": 0.69, + "learning_rate": 1.8699545262763118e-05, + "loss": 0.1324, + "step": 14870 + }, + { + "epoch": 0.69, + "learning_rate": 1.8699076461487978e-05, + "loss": 0.1554, + "step": 14875 + }, + { + "epoch": 0.69, + "learning_rate": 1.8698607660212838e-05, + "loss": 0.1562, + "step": 14880 + }, + { + "epoch": 0.69, + "learning_rate": 1.8698138858937698e-05, + "loss": 0.1357, + "step": 14885 + }, + { + "epoch": 0.69, + "learning_rate": 1.8697670057662557e-05, + "loss": 0.1963, + "step": 14890 + }, + { + "epoch": 0.7, + "learning_rate": 1.8697201256387417e-05, + "loss": 0.2944, + "step": 14895 + }, + { + "epoch": 0.7, + "learning_rate": 1.8696732455112277e-05, + "loss": 0.5406, + "step": 14900 + }, + { + "epoch": 0.7, + "learning_rate": 1.8696263653837137e-05, + "loss": 0.225, + "step": 14905 + }, + { + "epoch": 0.7, + "learning_rate": 1.8695794852562e-05, + "loss": 0.115, + "step": 14910 + }, + { + "epoch": 0.7, + "learning_rate": 1.869532605128686e-05, + "loss": 0.0827, + "step": 14915 + }, + { + "epoch": 0.7, + "learning_rate": 1.869485725001172e-05, + "loss": 0.1133, + "step": 14920 + }, + { + "epoch": 0.7, + "learning_rate": 1.8694388448736584e-05, + "loss": 0.1307, + "step": 14925 + }, + { + "epoch": 0.7, + "learning_rate": 1.8693919647461444e-05, + "loss": 0.1646, + "step": 14930 + }, + { + "epoch": 0.7, + "learning_rate": 1.8693450846186304e-05, + "loss": 0.1663, + "step": 14935 + }, + { + "epoch": 0.7, + "learning_rate": 1.8692982044911164e-05, + "loss": 0.208, + "step": 14940 + }, + { + "epoch": 0.7, + "learning_rate": 1.8692513243636024e-05, + "loss": 0.3248, + "step": 14945 + }, + { + "epoch": 0.7, + "learning_rate": 1.8692044442360883e-05, + "loss": 0.539, + "step": 14950 + }, + { + "epoch": 0.7, + "learning_rate": 1.8691575641085747e-05, + "loss": 0.2796, + "step": 14955 + }, + { + "epoch": 0.7, + "learning_rate": 1.8691106839810607e-05, + "loss": 0.0703, + "step": 14960 + }, + { + "epoch": 0.7, + "learning_rate": 1.8690638038535467e-05, + "loss": 0.1278, + "step": 14965 + }, + { + "epoch": 0.7, + "learning_rate": 1.8690169237260327e-05, + "loss": 0.067, + "step": 14970 + }, + { + "epoch": 0.7, + "learning_rate": 1.8689700435985187e-05, + "loss": 0.192, + "step": 14975 + }, + { + "epoch": 0.7, + "learning_rate": 1.8689231634710046e-05, + "loss": 0.2147, + "step": 14980 + }, + { + "epoch": 0.7, + "learning_rate": 1.8688762833434906e-05, + "loss": 0.2016, + "step": 14985 + }, + { + "epoch": 0.7, + "learning_rate": 1.868829403215977e-05, + "loss": 0.2057, + "step": 14990 + }, + { + "epoch": 0.7, + "learning_rate": 1.868782523088463e-05, + "loss": 0.2402, + "step": 14995 + }, + { + "epoch": 0.7, + "learning_rate": 1.868735642960949e-05, + "loss": 0.5123, + "step": 15000 + }, + { + "epoch": 0.7, + "learning_rate": 1.8686887628334353e-05, + "loss": 0.2725, + "step": 15005 + }, + { + "epoch": 0.7, + "learning_rate": 1.8686418827059213e-05, + "loss": 0.0648, + "step": 15010 + }, + { + "epoch": 0.7, + "learning_rate": 1.8685950025784073e-05, + "loss": 0.1405, + "step": 15015 + }, + { + "epoch": 0.7, + "learning_rate": 1.8685481224508933e-05, + "loss": 0.0819, + "step": 15020 + }, + { + "epoch": 0.7, + "learning_rate": 1.8685012423233793e-05, + "loss": 0.1102, + "step": 15025 + }, + { + "epoch": 0.7, + "learning_rate": 1.8684543621958653e-05, + "loss": 0.2164, + "step": 15030 + }, + { + "epoch": 0.7, + "learning_rate": 1.8684074820683512e-05, + "loss": 0.2653, + "step": 15035 + }, + { + "epoch": 0.7, + "learning_rate": 1.8683606019408372e-05, + "loss": 0.338, + "step": 15040 + }, + { + "epoch": 0.7, + "learning_rate": 1.8683137218133232e-05, + "loss": 0.3184, + "step": 15045 + }, + { + "epoch": 0.7, + "learning_rate": 1.8682668416858096e-05, + "loss": 0.4224, + "step": 15050 + }, + { + "epoch": 0.7, + "learning_rate": 1.8682199615582956e-05, + "loss": 0.2217, + "step": 15055 + }, + { + "epoch": 0.7, + "learning_rate": 1.8681730814307816e-05, + "loss": 0.1586, + "step": 15060 + }, + { + "epoch": 0.7, + "learning_rate": 1.8681262013032675e-05, + "loss": 0.0931, + "step": 15065 + }, + { + "epoch": 0.7, + "learning_rate": 1.868079321175754e-05, + "loss": 0.187, + "step": 15070 + }, + { + "epoch": 0.7, + "learning_rate": 1.86803244104824e-05, + "loss": 0.1737, + "step": 15075 + }, + { + "epoch": 0.7, + "learning_rate": 1.867985560920726e-05, + "loss": 0.2213, + "step": 15080 + }, + { + "epoch": 0.7, + "learning_rate": 1.867938680793212e-05, + "loss": 0.1752, + "step": 15085 + }, + { + "epoch": 0.7, + "learning_rate": 1.867891800665698e-05, + "loss": 0.2712, + "step": 15090 + }, + { + "epoch": 0.7, + "learning_rate": 1.8678449205381842e-05, + "loss": 0.4204, + "step": 15095 + }, + { + "epoch": 0.7, + "learning_rate": 1.8677980404106702e-05, + "loss": 0.5598, + "step": 15100 + }, + { + "epoch": 0.7, + "learning_rate": 1.8677511602831562e-05, + "loss": 0.2795, + "step": 15105 + }, + { + "epoch": 0.71, + "learning_rate": 1.867704280155642e-05, + "loss": 0.0626, + "step": 15110 + }, + { + "epoch": 0.71, + "learning_rate": 1.867657400028128e-05, + "loss": 0.062, + "step": 15115 + }, + { + "epoch": 0.71, + "learning_rate": 1.867610519900614e-05, + "loss": 0.0722, + "step": 15120 + }, + { + "epoch": 0.71, + "learning_rate": 1.8675636397731e-05, + "loss": 0.1323, + "step": 15125 + }, + { + "epoch": 0.71, + "learning_rate": 1.8675167596455865e-05, + "loss": 0.2438, + "step": 15130 + }, + { + "epoch": 0.71, + "learning_rate": 1.8674698795180725e-05, + "loss": 0.1662, + "step": 15135 + }, + { + "epoch": 0.71, + "learning_rate": 1.8674229993905585e-05, + "loss": 0.3017, + "step": 15140 + }, + { + "epoch": 0.71, + "learning_rate": 1.8673761192630448e-05, + "loss": 0.2221, + "step": 15145 + }, + { + "epoch": 0.71, + "learning_rate": 1.8673292391355308e-05, + "loss": 0.4873, + "step": 15150 + }, + { + "epoch": 0.71, + "learning_rate": 1.8672823590080168e-05, + "loss": 0.2781, + "step": 15155 + }, + { + "epoch": 0.71, + "learning_rate": 1.8672354788805028e-05, + "loss": 0.0843, + "step": 15160 + }, + { + "epoch": 0.71, + "learning_rate": 1.8671885987529888e-05, + "loss": 0.1082, + "step": 15165 + }, + { + "epoch": 0.71, + "learning_rate": 1.8671417186254748e-05, + "loss": 0.1206, + "step": 15170 + }, + { + "epoch": 0.71, + "learning_rate": 1.8670948384979608e-05, + "loss": 0.1604, + "step": 15175 + }, + { + "epoch": 0.71, + "learning_rate": 1.8670479583704468e-05, + "loss": 0.1387, + "step": 15180 + }, + { + "epoch": 0.71, + "learning_rate": 1.8670010782429327e-05, + "loss": 0.256, + "step": 15185 + }, + { + "epoch": 0.71, + "learning_rate": 1.866954198115419e-05, + "loss": 0.2167, + "step": 15190 + }, + { + "epoch": 0.71, + "learning_rate": 1.866907317987905e-05, + "loss": 0.3426, + "step": 15195 + }, + { + "epoch": 0.71, + "learning_rate": 1.866860437860391e-05, + "loss": 0.5178, + "step": 15200 + }, + { + "epoch": 0.71, + "learning_rate": 1.866813557732877e-05, + "loss": 0.2076, + "step": 15205 + }, + { + "epoch": 0.71, + "learning_rate": 1.8667666776053634e-05, + "loss": 0.0497, + "step": 15210 + }, + { + "epoch": 0.71, + "learning_rate": 1.8667197974778494e-05, + "loss": 0.1064, + "step": 15215 + }, + { + "epoch": 0.71, + "learning_rate": 1.8666729173503354e-05, + "loss": 0.2208, + "step": 15220 + }, + { + "epoch": 0.71, + "learning_rate": 1.8666260372228214e-05, + "loss": 0.1425, + "step": 15225 + }, + { + "epoch": 0.71, + "learning_rate": 1.8665791570953074e-05, + "loss": 0.1764, + "step": 15230 + }, + { + "epoch": 0.71, + "learning_rate": 1.8665322769677937e-05, + "loss": 0.1408, + "step": 15235 + }, + { + "epoch": 0.71, + "learning_rate": 1.8664853968402797e-05, + "loss": 0.1988, + "step": 15240 + }, + { + "epoch": 0.71, + "learning_rate": 1.8664385167127657e-05, + "loss": 0.3279, + "step": 15245 + }, + { + "epoch": 0.71, + "learning_rate": 1.8663916365852517e-05, + "loss": 0.4128, + "step": 15250 + }, + { + "epoch": 0.71, + "learning_rate": 1.8663447564577377e-05, + "loss": 0.2538, + "step": 15255 + }, + { + "epoch": 0.71, + "learning_rate": 1.8662978763302237e-05, + "loss": 0.101, + "step": 15260 + }, + { + "epoch": 0.71, + "learning_rate": 1.8662509962027097e-05, + "loss": 0.1287, + "step": 15265 + }, + { + "epoch": 0.71, + "learning_rate": 1.8662041160751956e-05, + "loss": 0.1088, + "step": 15270 + }, + { + "epoch": 0.71, + "learning_rate": 1.866157235947682e-05, + "loss": 0.1514, + "step": 15275 + }, + { + "epoch": 0.71, + "learning_rate": 1.866110355820168e-05, + "loss": 0.2464, + "step": 15280 + }, + { + "epoch": 0.71, + "learning_rate": 1.866063475692654e-05, + "loss": 0.2969, + "step": 15285 + }, + { + "epoch": 0.71, + "learning_rate": 1.8660165955651403e-05, + "loss": 0.2988, + "step": 15290 + }, + { + "epoch": 0.71, + "learning_rate": 1.8659697154376263e-05, + "loss": 0.3855, + "step": 15295 + }, + { + "epoch": 0.71, + "learning_rate": 1.8659228353101123e-05, + "loss": 0.6949, + "step": 15300 + }, + { + "epoch": 0.71, + "learning_rate": 1.8658759551825983e-05, + "loss": 0.2056, + "step": 15305 + }, + { + "epoch": 0.71, + "learning_rate": 1.8658290750550843e-05, + "loss": 0.0848, + "step": 15310 + }, + { + "epoch": 0.71, + "learning_rate": 1.8657821949275703e-05, + "loss": 0.1188, + "step": 15315 + }, + { + "epoch": 0.71, + "learning_rate": 1.8657353148000563e-05, + "loss": 0.0881, + "step": 15320 + }, + { + "epoch": 0.72, + "learning_rate": 1.8656884346725426e-05, + "loss": 0.1742, + "step": 15325 + }, + { + "epoch": 0.72, + "learning_rate": 1.8656415545450286e-05, + "loss": 0.1222, + "step": 15330 + }, + { + "epoch": 0.72, + "learning_rate": 1.8655946744175146e-05, + "loss": 0.1678, + "step": 15335 + }, + { + "epoch": 0.72, + "learning_rate": 1.8655477942900006e-05, + "loss": 0.1992, + "step": 15340 + }, + { + "epoch": 0.72, + "learning_rate": 1.8655009141624866e-05, + "loss": 0.3601, + "step": 15345 + }, + { + "epoch": 0.72, + "learning_rate": 1.8654540340349726e-05, + "loss": 0.5375, + "step": 15350 + }, + { + "epoch": 0.72, + "learning_rate": 1.865407153907459e-05, + "loss": 0.2414, + "step": 15355 + }, + { + "epoch": 0.72, + "learning_rate": 1.865360273779945e-05, + "loss": 0.0649, + "step": 15360 + }, + { + "epoch": 0.72, + "learning_rate": 1.865313393652431e-05, + "loss": 0.0756, + "step": 15365 + }, + { + "epoch": 0.72, + "learning_rate": 1.865266513524917e-05, + "loss": 0.1562, + "step": 15370 + }, + { + "epoch": 0.72, + "learning_rate": 1.8652196333974032e-05, + "loss": 0.1215, + "step": 15375 + }, + { + "epoch": 0.72, + "learning_rate": 1.8651727532698892e-05, + "loss": 0.1505, + "step": 15380 + }, + { + "epoch": 0.72, + "learning_rate": 1.8651258731423752e-05, + "loss": 0.197, + "step": 15385 + }, + { + "epoch": 0.72, + "learning_rate": 1.8650789930148612e-05, + "loss": 0.2759, + "step": 15390 + }, + { + "epoch": 0.72, + "learning_rate": 1.8650321128873472e-05, + "loss": 0.2041, + "step": 15395 + }, + { + "epoch": 0.72, + "learning_rate": 1.8649852327598332e-05, + "loss": 0.4761, + "step": 15400 + }, + { + "epoch": 0.72, + "learning_rate": 1.864938352632319e-05, + "loss": 0.1988, + "step": 15405 + }, + { + "epoch": 0.72, + "learning_rate": 1.864891472504805e-05, + "loss": 0.1376, + "step": 15410 + }, + { + "epoch": 0.72, + "learning_rate": 1.864844592377291e-05, + "loss": 0.0872, + "step": 15415 + }, + { + "epoch": 0.72, + "learning_rate": 1.8647977122497775e-05, + "loss": 0.0863, + "step": 15420 + }, + { + "epoch": 0.72, + "learning_rate": 1.8647508321222635e-05, + "loss": 0.135, + "step": 15425 + }, + { + "epoch": 0.72, + "learning_rate": 1.8647039519947495e-05, + "loss": 0.0906, + "step": 15430 + }, + { + "epoch": 0.72, + "learning_rate": 1.8646570718672358e-05, + "loss": 0.2675, + "step": 15435 + }, + { + "epoch": 0.72, + "learning_rate": 1.8646101917397218e-05, + "loss": 0.1789, + "step": 15440 + }, + { + "epoch": 0.72, + "learning_rate": 1.8645633116122078e-05, + "loss": 0.3858, + "step": 15445 + }, + { + "epoch": 0.72, + "learning_rate": 1.8645164314846938e-05, + "loss": 0.6604, + "step": 15450 + }, + { + "epoch": 0.72, + "learning_rate": 1.8644695513571798e-05, + "loss": 0.1742, + "step": 15455 + }, + { + "epoch": 0.72, + "learning_rate": 1.8644226712296658e-05, + "loss": 0.1072, + "step": 15460 + }, + { + "epoch": 0.72, + "learning_rate": 1.864375791102152e-05, + "loss": 0.1201, + "step": 15465 + }, + { + "epoch": 0.72, + "learning_rate": 1.864328910974638e-05, + "loss": 0.1129, + "step": 15470 + }, + { + "epoch": 0.72, + "learning_rate": 1.864282030847124e-05, + "loss": 0.073, + "step": 15475 + }, + { + "epoch": 0.72, + "learning_rate": 1.86423515071961e-05, + "loss": 0.2668, + "step": 15480 + }, + { + "epoch": 0.72, + "learning_rate": 1.864188270592096e-05, + "loss": 0.2092, + "step": 15485 + }, + { + "epoch": 0.72, + "learning_rate": 1.864141390464582e-05, + "loss": 0.252, + "step": 15490 + }, + { + "epoch": 0.72, + "learning_rate": 1.8640945103370684e-05, + "loss": 0.2868, + "step": 15495 + }, + { + "epoch": 0.72, + "learning_rate": 1.8640476302095544e-05, + "loss": 0.6121, + "step": 15500 + }, + { + "epoch": 0.72, + "learning_rate": 1.8640007500820404e-05, + "loss": 0.1869, + "step": 15505 + }, + { + "epoch": 0.72, + "learning_rate": 1.8639538699545264e-05, + "loss": 0.0788, + "step": 15510 + }, + { + "epoch": 0.72, + "learning_rate": 1.8639069898270127e-05, + "loss": 0.0961, + "step": 15515 + }, + { + "epoch": 0.72, + "learning_rate": 1.8638601096994987e-05, + "loss": 0.1343, + "step": 15520 + }, + { + "epoch": 0.72, + "learning_rate": 1.8638132295719847e-05, + "loss": 0.1213, + "step": 15525 + }, + { + "epoch": 0.72, + "learning_rate": 1.8637663494444707e-05, + "loss": 0.2465, + "step": 15530 + }, + { + "epoch": 0.72, + "learning_rate": 1.8637194693169567e-05, + "loss": 0.2933, + "step": 15535 + }, + { + "epoch": 0.73, + "learning_rate": 1.8636725891894427e-05, + "loss": 0.205, + "step": 15540 + }, + { + "epoch": 0.73, + "learning_rate": 1.8636257090619287e-05, + "loss": 0.3036, + "step": 15545 + }, + { + "epoch": 0.73, + "learning_rate": 1.8635788289344147e-05, + "loss": 0.4355, + "step": 15550 + }, + { + "epoch": 0.73, + "learning_rate": 1.8635319488069007e-05, + "loss": 0.2686, + "step": 15555 + }, + { + "epoch": 0.73, + "learning_rate": 1.863485068679387e-05, + "loss": 0.0397, + "step": 15560 + }, + { + "epoch": 0.73, + "learning_rate": 1.863438188551873e-05, + "loss": 0.2227, + "step": 15565 + }, + { + "epoch": 0.73, + "learning_rate": 1.863391308424359e-05, + "loss": 0.1172, + "step": 15570 + }, + { + "epoch": 0.73, + "learning_rate": 1.8633444282968453e-05, + "loss": 0.1019, + "step": 15575 + }, + { + "epoch": 0.73, + "learning_rate": 1.8632975481693313e-05, + "loss": 0.184, + "step": 15580 + }, + { + "epoch": 0.73, + "learning_rate": 1.8632506680418173e-05, + "loss": 0.2066, + "step": 15585 + }, + { + "epoch": 0.73, + "learning_rate": 1.8632037879143033e-05, + "loss": 0.1915, + "step": 15590 + }, + { + "epoch": 0.73, + "learning_rate": 1.8631569077867893e-05, + "loss": 0.2898, + "step": 15595 + }, + { + "epoch": 0.73, + "learning_rate": 1.8631100276592753e-05, + "loss": 0.4485, + "step": 15600 + }, + { + "epoch": 0.73, + "learning_rate": 1.8630631475317616e-05, + "loss": 0.2207, + "step": 15605 + }, + { + "epoch": 0.73, + "learning_rate": 1.8630162674042476e-05, + "loss": 0.1243, + "step": 15610 + }, + { + "epoch": 0.73, + "learning_rate": 1.8629693872767336e-05, + "loss": 0.0872, + "step": 15615 + }, + { + "epoch": 0.73, + "learning_rate": 1.8629225071492196e-05, + "loss": 0.1143, + "step": 15620 + }, + { + "epoch": 0.73, + "learning_rate": 1.8628756270217056e-05, + "loss": 0.065, + "step": 15625 + }, + { + "epoch": 0.73, + "learning_rate": 1.8628287468941916e-05, + "loss": 0.2135, + "step": 15630 + }, + { + "epoch": 0.73, + "learning_rate": 1.8627818667666776e-05, + "loss": 0.2308, + "step": 15635 + }, + { + "epoch": 0.73, + "learning_rate": 1.862734986639164e-05, + "loss": 0.2845, + "step": 15640 + }, + { + "epoch": 0.73, + "learning_rate": 1.86268810651165e-05, + "loss": 0.3182, + "step": 15645 + }, + { + "epoch": 0.73, + "learning_rate": 1.862641226384136e-05, + "loss": 0.6225, + "step": 15650 + }, + { + "epoch": 0.73, + "learning_rate": 1.8625943462566222e-05, + "loss": 0.2644, + "step": 15655 + }, + { + "epoch": 0.73, + "learning_rate": 1.8625474661291082e-05, + "loss": 0.086, + "step": 15660 + }, + { + "epoch": 0.73, + "learning_rate": 1.8625005860015942e-05, + "loss": 0.157, + "step": 15665 + }, + { + "epoch": 0.73, + "learning_rate": 1.8624537058740802e-05, + "loss": 0.1244, + "step": 15670 + }, + { + "epoch": 0.73, + "learning_rate": 1.8624068257465662e-05, + "loss": 0.1208, + "step": 15675 + }, + { + "epoch": 0.73, + "learning_rate": 1.8623599456190522e-05, + "loss": 0.1678, + "step": 15680 + }, + { + "epoch": 0.73, + "learning_rate": 1.8623130654915382e-05, + "loss": 0.2024, + "step": 15685 + }, + { + "epoch": 0.73, + "learning_rate": 1.8622661853640242e-05, + "loss": 0.3057, + "step": 15690 + }, + { + "epoch": 0.73, + "learning_rate": 1.86221930523651e-05, + "loss": 0.2436, + "step": 15695 + }, + { + "epoch": 0.73, + "learning_rate": 1.8621724251089965e-05, + "loss": 0.4505, + "step": 15700 + }, + { + "epoch": 0.73, + "learning_rate": 1.8621255449814825e-05, + "loss": 0.2134, + "step": 15705 + }, + { + "epoch": 0.73, + "learning_rate": 1.8620786648539685e-05, + "loss": 0.0503, + "step": 15710 + }, + { + "epoch": 0.73, + "learning_rate": 1.8620317847264545e-05, + "loss": 0.103, + "step": 15715 + }, + { + "epoch": 0.73, + "learning_rate": 1.8619849045989408e-05, + "loss": 0.132, + "step": 15720 + }, + { + "epoch": 0.73, + "learning_rate": 1.8619380244714268e-05, + "loss": 0.2023, + "step": 15725 + }, + { + "epoch": 0.73, + "learning_rate": 1.8618911443439128e-05, + "loss": 0.1633, + "step": 15730 + }, + { + "epoch": 0.73, + "learning_rate": 1.8618442642163988e-05, + "loss": 0.2361, + "step": 15735 + }, + { + "epoch": 0.73, + "learning_rate": 1.8617973840888848e-05, + "loss": 0.2142, + "step": 15740 + }, + { + "epoch": 0.73, + "learning_rate": 1.861750503961371e-05, + "loss": 0.2203, + "step": 15745 + }, + { + "epoch": 0.73, + "learning_rate": 1.861703623833857e-05, + "loss": 0.4832, + "step": 15750 + }, + { + "epoch": 0.74, + "learning_rate": 1.861656743706343e-05, + "loss": 0.171, + "step": 15755 + }, + { + "epoch": 0.74, + "learning_rate": 1.861609863578829e-05, + "loss": 0.0748, + "step": 15760 + }, + { + "epoch": 0.74, + "learning_rate": 1.861562983451315e-05, + "loss": 0.0726, + "step": 15765 + }, + { + "epoch": 0.74, + "learning_rate": 1.861516103323801e-05, + "loss": 0.1165, + "step": 15770 + }, + { + "epoch": 0.74, + "learning_rate": 1.861469223196287e-05, + "loss": 0.1643, + "step": 15775 + }, + { + "epoch": 0.74, + "learning_rate": 1.861422343068773e-05, + "loss": 0.2114, + "step": 15780 + }, + { + "epoch": 0.74, + "learning_rate": 1.8613754629412594e-05, + "loss": 0.1382, + "step": 15785 + }, + { + "epoch": 0.74, + "learning_rate": 1.8613285828137454e-05, + "loss": 0.1504, + "step": 15790 + }, + { + "epoch": 0.74, + "learning_rate": 1.8612817026862314e-05, + "loss": 0.3677, + "step": 15795 + }, + { + "epoch": 0.74, + "learning_rate": 1.8612348225587177e-05, + "loss": 0.7503, + "step": 15800 + }, + { + "epoch": 0.74, + "learning_rate": 1.8611879424312037e-05, + "loss": 0.1473, + "step": 15805 + }, + { + "epoch": 0.74, + "learning_rate": 1.8611410623036897e-05, + "loss": 0.0413, + "step": 15810 + }, + { + "epoch": 0.74, + "learning_rate": 1.8610941821761757e-05, + "loss": 0.1276, + "step": 15815 + }, + { + "epoch": 0.74, + "learning_rate": 1.8610473020486617e-05, + "loss": 0.12, + "step": 15820 + }, + { + "epoch": 0.74, + "learning_rate": 1.8610004219211477e-05, + "loss": 0.1935, + "step": 15825 + }, + { + "epoch": 0.74, + "learning_rate": 1.8609535417936337e-05, + "loss": 0.1424, + "step": 15830 + }, + { + "epoch": 0.74, + "learning_rate": 1.8609066616661197e-05, + "loss": 0.2974, + "step": 15835 + }, + { + "epoch": 0.74, + "learning_rate": 1.860859781538606e-05, + "loss": 0.1485, + "step": 15840 + }, + { + "epoch": 0.74, + "learning_rate": 1.860812901411092e-05, + "loss": 0.3909, + "step": 15845 + }, + { + "epoch": 0.74, + "learning_rate": 1.860766021283578e-05, + "loss": 0.4972, + "step": 15850 + }, + { + "epoch": 0.74, + "learning_rate": 1.860719141156064e-05, + "loss": 0.2334, + "step": 15855 + }, + { + "epoch": 0.74, + "learning_rate": 1.86067226102855e-05, + "loss": 0.059, + "step": 15860 + }, + { + "epoch": 0.74, + "learning_rate": 1.8606253809010363e-05, + "loss": 0.0805, + "step": 15865 + }, + { + "epoch": 0.74, + "learning_rate": 1.8605785007735223e-05, + "loss": 0.1012, + "step": 15870 + }, + { + "epoch": 0.74, + "learning_rate": 1.8605316206460083e-05, + "loss": 0.0957, + "step": 15875 + }, + { + "epoch": 0.74, + "learning_rate": 1.8604847405184943e-05, + "loss": 0.1382, + "step": 15880 + }, + { + "epoch": 0.74, + "learning_rate": 1.8604378603909806e-05, + "loss": 0.2678, + "step": 15885 + }, + { + "epoch": 0.74, + "learning_rate": 1.8603909802634666e-05, + "loss": 0.2458, + "step": 15890 + }, + { + "epoch": 0.74, + "learning_rate": 1.8603441001359526e-05, + "loss": 0.3033, + "step": 15895 + }, + { + "epoch": 0.74, + "learning_rate": 1.8602972200084386e-05, + "loss": 0.5778, + "step": 15900 + }, + { + "epoch": 0.74, + "learning_rate": 1.8602503398809246e-05, + "loss": 0.2027, + "step": 15905 + }, + { + "epoch": 0.74, + "learning_rate": 1.8602034597534106e-05, + "loss": 0.0999, + "step": 15910 + }, + { + "epoch": 0.74, + "learning_rate": 1.8601565796258966e-05, + "loss": 0.0957, + "step": 15915 + }, + { + "epoch": 0.74, + "learning_rate": 1.8601096994983826e-05, + "loss": 0.2037, + "step": 15920 + }, + { + "epoch": 0.74, + "learning_rate": 1.860062819370869e-05, + "loss": 0.1079, + "step": 15925 + }, + { + "epoch": 0.74, + "learning_rate": 1.860015939243355e-05, + "loss": 0.1333, + "step": 15930 + }, + { + "epoch": 0.74, + "learning_rate": 1.859969059115841e-05, + "loss": 0.2305, + "step": 15935 + }, + { + "epoch": 0.74, + "learning_rate": 1.859922178988327e-05, + "loss": 0.2528, + "step": 15940 + }, + { + "epoch": 0.74, + "learning_rate": 1.8598752988608132e-05, + "loss": 0.3822, + "step": 15945 + }, + { + "epoch": 0.74, + "learning_rate": 1.8598284187332992e-05, + "loss": 0.5118, + "step": 15950 + }, + { + "epoch": 0.74, + "learning_rate": 1.8597815386057852e-05, + "loss": 0.3077, + "step": 15955 + }, + { + "epoch": 0.74, + "learning_rate": 1.8597346584782712e-05, + "loss": 0.0664, + "step": 15960 + }, + { + "epoch": 0.74, + "learning_rate": 1.8596877783507572e-05, + "loss": 0.1011, + "step": 15965 + }, + { + "epoch": 0.75, + "learning_rate": 1.8596408982232432e-05, + "loss": 0.1759, + "step": 15970 + }, + { + "epoch": 0.75, + "learning_rate": 1.8595940180957295e-05, + "loss": 0.1561, + "step": 15975 + }, + { + "epoch": 0.75, + "learning_rate": 1.8595471379682155e-05, + "loss": 0.1241, + "step": 15980 + }, + { + "epoch": 0.75, + "learning_rate": 1.8595002578407015e-05, + "loss": 0.1386, + "step": 15985 + }, + { + "epoch": 0.75, + "learning_rate": 1.8594533777131875e-05, + "loss": 0.2541, + "step": 15990 + }, + { + "epoch": 0.75, + "learning_rate": 1.8594064975856735e-05, + "loss": 0.2858, + "step": 15995 + }, + { + "epoch": 0.75, + "learning_rate": 1.8593596174581595e-05, + "loss": 0.552, + "step": 16000 + }, + { + "epoch": 0.75, + "learning_rate": 1.8593127373306458e-05, + "loss": 0.2186, + "step": 16005 + }, + { + "epoch": 0.75, + "learning_rate": 1.8592658572031318e-05, + "loss": 0.0791, + "step": 16010 + }, + { + "epoch": 0.75, + "learning_rate": 1.8592189770756178e-05, + "loss": 0.1378, + "step": 16015 + }, + { + "epoch": 0.75, + "learning_rate": 1.8591720969481038e-05, + "loss": 0.202, + "step": 16020 + }, + { + "epoch": 0.75, + "learning_rate": 1.85912521682059e-05, + "loss": 0.1665, + "step": 16025 + }, + { + "epoch": 0.75, + "learning_rate": 1.859078336693076e-05, + "loss": 0.2158, + "step": 16030 + }, + { + "epoch": 0.75, + "learning_rate": 1.859031456565562e-05, + "loss": 0.2452, + "step": 16035 + }, + { + "epoch": 0.75, + "learning_rate": 1.858984576438048e-05, + "loss": 0.1764, + "step": 16040 + }, + { + "epoch": 0.75, + "learning_rate": 1.858937696310534e-05, + "loss": 0.3921, + "step": 16045 + }, + { + "epoch": 0.75, + "learning_rate": 1.85889081618302e-05, + "loss": 0.5582, + "step": 16050 + }, + { + "epoch": 0.75, + "learning_rate": 1.858843936055506e-05, + "loss": 0.1606, + "step": 16055 + }, + { + "epoch": 0.75, + "learning_rate": 1.858797055927992e-05, + "loss": 0.089, + "step": 16060 + }, + { + "epoch": 0.75, + "learning_rate": 1.858750175800478e-05, + "loss": 0.0914, + "step": 16065 + }, + { + "epoch": 0.75, + "learning_rate": 1.8587032956729644e-05, + "loss": 0.1239, + "step": 16070 + }, + { + "epoch": 0.75, + "learning_rate": 1.8586564155454504e-05, + "loss": 0.1263, + "step": 16075 + }, + { + "epoch": 0.75, + "learning_rate": 1.8586095354179364e-05, + "loss": 0.1562, + "step": 16080 + }, + { + "epoch": 0.75, + "learning_rate": 1.8585626552904227e-05, + "loss": 0.3022, + "step": 16085 + }, + { + "epoch": 0.75, + "learning_rate": 1.8585157751629087e-05, + "loss": 0.2022, + "step": 16090 + }, + { + "epoch": 0.75, + "learning_rate": 1.8584688950353947e-05, + "loss": 0.3969, + "step": 16095 + }, + { + "epoch": 0.75, + "learning_rate": 1.8584220149078807e-05, + "loss": 0.5977, + "step": 16100 + }, + { + "epoch": 0.75, + "learning_rate": 1.8583751347803667e-05, + "loss": 0.2297, + "step": 16105 + }, + { + "epoch": 0.75, + "learning_rate": 1.8583282546528527e-05, + "loss": 0.0994, + "step": 16110 + }, + { + "epoch": 0.75, + "learning_rate": 1.858281374525339e-05, + "loss": 0.0737, + "step": 16115 + }, + { + "epoch": 0.75, + "learning_rate": 1.858234494397825e-05, + "loss": 0.097, + "step": 16120 + }, + { + "epoch": 0.75, + "learning_rate": 1.858187614270311e-05, + "loss": 0.1379, + "step": 16125 + }, + { + "epoch": 0.75, + "learning_rate": 1.858140734142797e-05, + "loss": 0.1508, + "step": 16130 + }, + { + "epoch": 0.75, + "learning_rate": 1.858093854015283e-05, + "loss": 0.2133, + "step": 16135 + }, + { + "epoch": 0.75, + "learning_rate": 1.858046973887769e-05, + "loss": 0.2874, + "step": 16140 + }, + { + "epoch": 0.75, + "learning_rate": 1.858000093760255e-05, + "loss": 0.2308, + "step": 16145 + }, + { + "epoch": 0.75, + "learning_rate": 1.8579532136327413e-05, + "loss": 0.5109, + "step": 16150 + }, + { + "epoch": 0.75, + "learning_rate": 1.8579063335052273e-05, + "loss": 0.2218, + "step": 16155 + }, + { + "epoch": 0.75, + "learning_rate": 1.8578594533777133e-05, + "loss": 0.1128, + "step": 16160 + }, + { + "epoch": 0.75, + "learning_rate": 1.8578125732501996e-05, + "loss": 0.1386, + "step": 16165 + }, + { + "epoch": 0.75, + "learning_rate": 1.8577656931226856e-05, + "loss": 0.1299, + "step": 16170 + }, + { + "epoch": 0.75, + "learning_rate": 1.8577188129951716e-05, + "loss": 0.1277, + "step": 16175 + }, + { + "epoch": 0.75, + "learning_rate": 1.8576719328676576e-05, + "loss": 0.201, + "step": 16180 + }, + { + "epoch": 0.76, + "learning_rate": 1.8576250527401436e-05, + "loss": 0.2323, + "step": 16185 + }, + { + "epoch": 0.76, + "learning_rate": 1.8575781726126296e-05, + "loss": 0.2772, + "step": 16190 + }, + { + "epoch": 0.76, + "learning_rate": 1.8575312924851156e-05, + "loss": 0.3258, + "step": 16195 + }, + { + "epoch": 0.76, + "learning_rate": 1.8574844123576016e-05, + "loss": 0.3383, + "step": 16200 + }, + { + "epoch": 0.76, + "learning_rate": 1.8574375322300876e-05, + "loss": 0.2396, + "step": 16205 + }, + { + "epoch": 0.76, + "learning_rate": 1.857390652102574e-05, + "loss": 0.0677, + "step": 16210 + }, + { + "epoch": 0.76, + "learning_rate": 1.85734377197506e-05, + "loss": 0.105, + "step": 16215 + }, + { + "epoch": 0.76, + "learning_rate": 1.857296891847546e-05, + "loss": 0.1804, + "step": 16220 + }, + { + "epoch": 0.76, + "learning_rate": 1.857250011720032e-05, + "loss": 0.1835, + "step": 16225 + }, + { + "epoch": 0.76, + "learning_rate": 1.8572031315925182e-05, + "loss": 0.1532, + "step": 16230 + }, + { + "epoch": 0.76, + "learning_rate": 1.8571562514650042e-05, + "loss": 0.2916, + "step": 16235 + }, + { + "epoch": 0.76, + "learning_rate": 1.8571093713374902e-05, + "loss": 0.3538, + "step": 16240 + }, + { + "epoch": 0.76, + "learning_rate": 1.8570624912099762e-05, + "loss": 0.3102, + "step": 16245 + }, + { + "epoch": 0.76, + "learning_rate": 1.8570156110824622e-05, + "loss": 0.5525, + "step": 16250 + }, + { + "epoch": 0.76, + "learning_rate": 1.8569687309549485e-05, + "loss": 0.2395, + "step": 16255 + }, + { + "epoch": 0.76, + "learning_rate": 1.8569218508274345e-05, + "loss": 0.0512, + "step": 16260 + }, + { + "epoch": 0.76, + "learning_rate": 1.8568749706999205e-05, + "loss": 0.0791, + "step": 16265 + }, + { + "epoch": 0.76, + "learning_rate": 1.8568280905724065e-05, + "loss": 0.139, + "step": 16270 + }, + { + "epoch": 0.76, + "learning_rate": 1.8567812104448925e-05, + "loss": 0.1549, + "step": 16275 + }, + { + "epoch": 0.76, + "learning_rate": 1.8567343303173785e-05, + "loss": 0.2231, + "step": 16280 + }, + { + "epoch": 0.76, + "learning_rate": 1.8566874501898645e-05, + "loss": 0.2823, + "step": 16285 + }, + { + "epoch": 0.76, + "learning_rate": 1.8566405700623505e-05, + "loss": 0.2111, + "step": 16290 + }, + { + "epoch": 0.76, + "learning_rate": 1.8565936899348368e-05, + "loss": 0.3525, + "step": 16295 + }, + { + "epoch": 0.76, + "learning_rate": 1.8565468098073228e-05, + "loss": 0.4608, + "step": 16300 + }, + { + "epoch": 0.76, + "learning_rate": 1.8564999296798088e-05, + "loss": 0.1706, + "step": 16305 + }, + { + "epoch": 0.76, + "learning_rate": 1.856453049552295e-05, + "loss": 0.1103, + "step": 16310 + }, + { + "epoch": 0.76, + "learning_rate": 1.856406169424781e-05, + "loss": 0.0674, + "step": 16315 + }, + { + "epoch": 0.76, + "learning_rate": 1.856359289297267e-05, + "loss": 0.0725, + "step": 16320 + }, + { + "epoch": 0.76, + "learning_rate": 1.856312409169753e-05, + "loss": 0.0853, + "step": 16325 + }, + { + "epoch": 0.76, + "learning_rate": 1.856265529042239e-05, + "loss": 0.2292, + "step": 16330 + }, + { + "epoch": 0.76, + "learning_rate": 1.856218648914725e-05, + "loss": 0.2606, + "step": 16335 + }, + { + "epoch": 0.76, + "learning_rate": 1.856171768787211e-05, + "loss": 0.2461, + "step": 16340 + }, + { + "epoch": 0.76, + "learning_rate": 1.856124888659697e-05, + "loss": 0.3852, + "step": 16345 + }, + { + "epoch": 0.76, + "learning_rate": 1.8560780085321834e-05, + "loss": 0.5247, + "step": 16350 + }, + { + "epoch": 0.76, + "learning_rate": 1.8560311284046694e-05, + "loss": 0.2266, + "step": 16355 + }, + { + "epoch": 0.76, + "learning_rate": 1.8559842482771554e-05, + "loss": 0.0598, + "step": 16360 + }, + { + "epoch": 0.76, + "learning_rate": 1.8559373681496414e-05, + "loss": 0.1304, + "step": 16365 + }, + { + "epoch": 0.76, + "learning_rate": 1.8558904880221274e-05, + "loss": 0.106, + "step": 16370 + }, + { + "epoch": 0.76, + "learning_rate": 1.8558436078946137e-05, + "loss": 0.1039, + "step": 16375 + }, + { + "epoch": 0.76, + "learning_rate": 1.8557967277670997e-05, + "loss": 0.1404, + "step": 16380 + }, + { + "epoch": 0.76, + "learning_rate": 1.8557498476395857e-05, + "loss": 0.2072, + "step": 16385 + }, + { + "epoch": 0.76, + "learning_rate": 1.8557029675120717e-05, + "loss": 0.2344, + "step": 16390 + }, + { + "epoch": 0.77, + "learning_rate": 1.855656087384558e-05, + "loss": 0.3281, + "step": 16395 + }, + { + "epoch": 0.77, + "learning_rate": 1.855609207257044e-05, + "loss": 0.5027, + "step": 16400 + }, + { + "epoch": 0.77, + "learning_rate": 1.85556232712953e-05, + "loss": 0.1777, + "step": 16405 + }, + { + "epoch": 0.77, + "learning_rate": 1.855515447002016e-05, + "loss": 0.0762, + "step": 16410 + }, + { + "epoch": 0.77, + "learning_rate": 1.855468566874502e-05, + "loss": 0.0743, + "step": 16415 + }, + { + "epoch": 0.77, + "learning_rate": 1.855421686746988e-05, + "loss": 0.0928, + "step": 16420 + }, + { + "epoch": 0.77, + "learning_rate": 1.855374806619474e-05, + "loss": 0.1213, + "step": 16425 + }, + { + "epoch": 0.77, + "learning_rate": 1.85532792649196e-05, + "loss": 0.1962, + "step": 16430 + }, + { + "epoch": 0.77, + "learning_rate": 1.8552810463644463e-05, + "loss": 0.2512, + "step": 16435 + }, + { + "epoch": 0.77, + "learning_rate": 1.8552341662369323e-05, + "loss": 0.2477, + "step": 16440 + }, + { + "epoch": 0.77, + "learning_rate": 1.8551872861094183e-05, + "loss": 0.2721, + "step": 16445 + }, + { + "epoch": 0.77, + "learning_rate": 1.8551404059819043e-05, + "loss": 0.4693, + "step": 16450 + }, + { + "epoch": 0.77, + "learning_rate": 1.8550935258543906e-05, + "loss": 0.2471, + "step": 16455 + }, + { + "epoch": 0.77, + "learning_rate": 1.8550466457268766e-05, + "loss": 0.0744, + "step": 16460 + }, + { + "epoch": 0.77, + "learning_rate": 1.8549997655993626e-05, + "loss": 0.0537, + "step": 16465 + }, + { + "epoch": 0.77, + "learning_rate": 1.8549528854718486e-05, + "loss": 0.1404, + "step": 16470 + }, + { + "epoch": 0.77, + "learning_rate": 1.8549060053443346e-05, + "loss": 0.0935, + "step": 16475 + }, + { + "epoch": 0.77, + "learning_rate": 1.8548591252168206e-05, + "loss": 0.1698, + "step": 16480 + }, + { + "epoch": 0.77, + "learning_rate": 1.8548122450893066e-05, + "loss": 0.2063, + "step": 16485 + }, + { + "epoch": 0.77, + "learning_rate": 1.854765364961793e-05, + "loss": 0.2454, + "step": 16490 + }, + { + "epoch": 0.77, + "learning_rate": 1.854718484834279e-05, + "loss": 0.3434, + "step": 16495 + }, + { + "epoch": 0.77, + "learning_rate": 1.854671604706765e-05, + "loss": 0.3791, + "step": 16500 + }, + { + "epoch": 0.77, + "learning_rate": 1.854624724579251e-05, + "loss": 0.2235, + "step": 16505 + }, + { + "epoch": 0.77, + "learning_rate": 1.854577844451737e-05, + "loss": 0.0541, + "step": 16510 + }, + { + "epoch": 0.77, + "learning_rate": 1.8545309643242232e-05, + "loss": 0.0699, + "step": 16515 + }, + { + "epoch": 0.77, + "learning_rate": 1.8544840841967092e-05, + "loss": 0.0983, + "step": 16520 + }, + { + "epoch": 0.77, + "learning_rate": 1.8544372040691952e-05, + "loss": 0.1524, + "step": 16525 + }, + { + "epoch": 0.77, + "learning_rate": 1.8543903239416812e-05, + "loss": 0.2209, + "step": 16530 + }, + { + "epoch": 0.77, + "learning_rate": 1.8543434438141676e-05, + "loss": 0.2845, + "step": 16535 + }, + { + "epoch": 0.77, + "learning_rate": 1.8542965636866535e-05, + "loss": 0.1833, + "step": 16540 + }, + { + "epoch": 0.77, + "learning_rate": 1.8542496835591395e-05, + "loss": 0.2518, + "step": 16545 + }, + { + "epoch": 0.77, + "learning_rate": 1.8542028034316255e-05, + "loss": 0.4805, + "step": 16550 + }, + { + "epoch": 0.77, + "learning_rate": 1.8541559233041115e-05, + "loss": 0.2084, + "step": 16555 + }, + { + "epoch": 0.77, + "learning_rate": 1.8541090431765975e-05, + "loss": 0.0975, + "step": 16560 + }, + { + "epoch": 0.77, + "learning_rate": 1.8540621630490835e-05, + "loss": 0.0754, + "step": 16565 + }, + { + "epoch": 0.77, + "learning_rate": 1.8540152829215695e-05, + "loss": 0.1748, + "step": 16570 + }, + { + "epoch": 0.77, + "learning_rate": 1.8539684027940555e-05, + "loss": 0.1479, + "step": 16575 + }, + { + "epoch": 0.77, + "learning_rate": 1.853921522666542e-05, + "loss": 0.1724, + "step": 16580 + }, + { + "epoch": 0.77, + "learning_rate": 1.8538746425390278e-05, + "loss": 0.1805, + "step": 16585 + }, + { + "epoch": 0.77, + "learning_rate": 1.8538277624115138e-05, + "loss": 0.233, + "step": 16590 + }, + { + "epoch": 0.77, + "learning_rate": 1.853780882284e-05, + "loss": 0.2265, + "step": 16595 + }, + { + "epoch": 0.77, + "learning_rate": 1.853734002156486e-05, + "loss": 0.4862, + "step": 16600 + }, + { + "epoch": 0.77, + "learning_rate": 1.853687122028972e-05, + "loss": 0.2111, + "step": 16605 + }, + { + "epoch": 0.78, + "learning_rate": 1.853640241901458e-05, + "loss": 0.0882, + "step": 16610 + }, + { + "epoch": 0.78, + "learning_rate": 1.853593361773944e-05, + "loss": 0.1286, + "step": 16615 + }, + { + "epoch": 0.78, + "learning_rate": 1.85354648164643e-05, + "loss": 0.0689, + "step": 16620 + }, + { + "epoch": 0.78, + "learning_rate": 1.8534996015189165e-05, + "loss": 0.0597, + "step": 16625 + }, + { + "epoch": 0.78, + "learning_rate": 1.8534527213914024e-05, + "loss": 0.1602, + "step": 16630 + }, + { + "epoch": 0.78, + "learning_rate": 1.8534058412638884e-05, + "loss": 0.2968, + "step": 16635 + }, + { + "epoch": 0.78, + "learning_rate": 1.8533589611363744e-05, + "loss": 0.3255, + "step": 16640 + }, + { + "epoch": 0.78, + "learning_rate": 1.8533120810088604e-05, + "loss": 0.3148, + "step": 16645 + }, + { + "epoch": 0.78, + "learning_rate": 1.8532652008813464e-05, + "loss": 0.4054, + "step": 16650 + }, + { + "epoch": 0.78, + "learning_rate": 1.8532183207538324e-05, + "loss": 0.2527, + "step": 16655 + }, + { + "epoch": 0.78, + "learning_rate": 1.8531714406263187e-05, + "loss": 0.0541, + "step": 16660 + }, + { + "epoch": 0.78, + "learning_rate": 1.8531245604988047e-05, + "loss": 0.1128, + "step": 16665 + }, + { + "epoch": 0.78, + "learning_rate": 1.8530776803712907e-05, + "loss": 0.1116, + "step": 16670 + }, + { + "epoch": 0.78, + "learning_rate": 1.853030800243777e-05, + "loss": 0.1365, + "step": 16675 + }, + { + "epoch": 0.78, + "learning_rate": 1.852983920116263e-05, + "loss": 0.1894, + "step": 16680 + }, + { + "epoch": 0.78, + "learning_rate": 1.852937039988749e-05, + "loss": 0.3003, + "step": 16685 + }, + { + "epoch": 0.78, + "learning_rate": 1.852890159861235e-05, + "loss": 0.1914, + "step": 16690 + }, + { + "epoch": 0.78, + "learning_rate": 1.852843279733721e-05, + "loss": 0.3021, + "step": 16695 + }, + { + "epoch": 0.78, + "learning_rate": 1.852796399606207e-05, + "loss": 0.3965, + "step": 16700 + }, + { + "epoch": 0.78, + "learning_rate": 1.852749519478693e-05, + "loss": 0.2011, + "step": 16705 + }, + { + "epoch": 0.78, + "learning_rate": 1.852702639351179e-05, + "loss": 0.0675, + "step": 16710 + }, + { + "epoch": 0.78, + "learning_rate": 1.852655759223665e-05, + "loss": 0.1059, + "step": 16715 + }, + { + "epoch": 0.78, + "learning_rate": 1.8526088790961513e-05, + "loss": 0.1146, + "step": 16720 + }, + { + "epoch": 0.78, + "learning_rate": 1.8525619989686373e-05, + "loss": 0.1235, + "step": 16725 + }, + { + "epoch": 0.78, + "learning_rate": 1.8525151188411233e-05, + "loss": 0.1721, + "step": 16730 + }, + { + "epoch": 0.78, + "learning_rate": 1.8524682387136093e-05, + "loss": 0.1825, + "step": 16735 + }, + { + "epoch": 0.78, + "learning_rate": 1.8524213585860957e-05, + "loss": 0.2841, + "step": 16740 + }, + { + "epoch": 0.78, + "learning_rate": 1.8523744784585816e-05, + "loss": 0.3064, + "step": 16745 + }, + { + "epoch": 0.78, + "learning_rate": 1.8523275983310676e-05, + "loss": 0.4229, + "step": 16750 + }, + { + "epoch": 0.78, + "learning_rate": 1.8522807182035536e-05, + "loss": 0.2078, + "step": 16755 + }, + { + "epoch": 0.78, + "learning_rate": 1.8522338380760396e-05, + "loss": 0.0681, + "step": 16760 + }, + { + "epoch": 0.78, + "learning_rate": 1.852186957948526e-05, + "loss": 0.1168, + "step": 16765 + }, + { + "epoch": 0.78, + "learning_rate": 1.852140077821012e-05, + "loss": 0.0938, + "step": 16770 + }, + { + "epoch": 0.78, + "learning_rate": 1.852093197693498e-05, + "loss": 0.1566, + "step": 16775 + }, + { + "epoch": 0.78, + "learning_rate": 1.852046317565984e-05, + "loss": 0.1815, + "step": 16780 + }, + { + "epoch": 0.78, + "learning_rate": 1.85199943743847e-05, + "loss": 0.2747, + "step": 16785 + }, + { + "epoch": 0.78, + "learning_rate": 1.851952557310956e-05, + "loss": 0.1572, + "step": 16790 + }, + { + "epoch": 0.78, + "learning_rate": 1.851905677183442e-05, + "loss": 0.4039, + "step": 16795 + }, + { + "epoch": 0.78, + "learning_rate": 1.851858797055928e-05, + "loss": 0.5064, + "step": 16800 + }, + { + "epoch": 0.78, + "learning_rate": 1.8518119169284142e-05, + "loss": 0.2327, + "step": 16805 + }, + { + "epoch": 0.78, + "learning_rate": 1.8517650368009002e-05, + "loss": 0.055, + "step": 16810 + }, + { + "epoch": 0.78, + "learning_rate": 1.8517181566733862e-05, + "loss": 0.0713, + "step": 16815 + }, + { + "epoch": 0.78, + "learning_rate": 1.8516712765458726e-05, + "loss": 0.1341, + "step": 16820 + }, + { + "epoch": 0.79, + "learning_rate": 1.8516243964183586e-05, + "loss": 0.2755, + "step": 16825 + }, + { + "epoch": 0.79, + "learning_rate": 1.8515775162908446e-05, + "loss": 0.1865, + "step": 16830 + }, + { + "epoch": 0.79, + "learning_rate": 1.8515306361633305e-05, + "loss": 0.2744, + "step": 16835 + }, + { + "epoch": 0.79, + "learning_rate": 1.8514837560358165e-05, + "loss": 0.1419, + "step": 16840 + }, + { + "epoch": 0.79, + "learning_rate": 1.8514368759083025e-05, + "loss": 0.3494, + "step": 16845 + }, + { + "epoch": 0.79, + "learning_rate": 1.8513899957807885e-05, + "loss": 0.4996, + "step": 16850 + }, + { + "epoch": 0.79, + "learning_rate": 1.8513431156532745e-05, + "loss": 0.1462, + "step": 16855 + }, + { + "epoch": 0.79, + "learning_rate": 1.851296235525761e-05, + "loss": 0.1193, + "step": 16860 + }, + { + "epoch": 0.79, + "learning_rate": 1.851249355398247e-05, + "loss": 0.1351, + "step": 16865 + }, + { + "epoch": 0.79, + "learning_rate": 1.851202475270733e-05, + "loss": 0.0935, + "step": 16870 + }, + { + "epoch": 0.79, + "learning_rate": 1.851155595143219e-05, + "loss": 0.1988, + "step": 16875 + }, + { + "epoch": 0.79, + "learning_rate": 1.8511087150157048e-05, + "loss": 0.1656, + "step": 16880 + }, + { + "epoch": 0.79, + "learning_rate": 1.851061834888191e-05, + "loss": 0.2114, + "step": 16885 + }, + { + "epoch": 0.79, + "learning_rate": 1.851014954760677e-05, + "loss": 0.2165, + "step": 16890 + }, + { + "epoch": 0.79, + "learning_rate": 1.850968074633163e-05, + "loss": 0.2662, + "step": 16895 + }, + { + "epoch": 0.79, + "learning_rate": 1.850921194505649e-05, + "loss": 0.694, + "step": 16900 + }, + { + "epoch": 0.79, + "learning_rate": 1.8508743143781355e-05, + "loss": 0.2216, + "step": 16905 + }, + { + "epoch": 0.79, + "learning_rate": 1.8508274342506215e-05, + "loss": 0.0824, + "step": 16910 + }, + { + "epoch": 0.79, + "learning_rate": 1.8507805541231075e-05, + "loss": 0.0962, + "step": 16915 + }, + { + "epoch": 0.79, + "learning_rate": 1.8507336739955934e-05, + "loss": 0.1082, + "step": 16920 + }, + { + "epoch": 0.79, + "learning_rate": 1.8506867938680794e-05, + "loss": 0.1578, + "step": 16925 + }, + { + "epoch": 0.79, + "learning_rate": 1.8506399137405654e-05, + "loss": 0.1898, + "step": 16930 + }, + { + "epoch": 0.79, + "learning_rate": 1.8505930336130514e-05, + "loss": 0.1983, + "step": 16935 + }, + { + "epoch": 0.79, + "learning_rate": 1.8505461534855374e-05, + "loss": 0.2282, + "step": 16940 + }, + { + "epoch": 0.79, + "learning_rate": 1.8504992733580238e-05, + "loss": 0.2615, + "step": 16945 + }, + { + "epoch": 0.79, + "learning_rate": 1.8504523932305097e-05, + "loss": 0.4631, + "step": 16950 + }, + { + "epoch": 0.79, + "learning_rate": 1.8504055131029957e-05, + "loss": 0.2476, + "step": 16955 + }, + { + "epoch": 0.79, + "learning_rate": 1.850358632975482e-05, + "loss": 0.0629, + "step": 16960 + }, + { + "epoch": 0.79, + "learning_rate": 1.850311752847968e-05, + "loss": 0.0986, + "step": 16965 + }, + { + "epoch": 0.79, + "learning_rate": 1.850264872720454e-05, + "loss": 0.1212, + "step": 16970 + }, + { + "epoch": 0.79, + "learning_rate": 1.85021799259294e-05, + "loss": 0.127, + "step": 16975 + }, + { + "epoch": 0.79, + "learning_rate": 1.850171112465426e-05, + "loss": 0.2048, + "step": 16980 + }, + { + "epoch": 0.79, + "learning_rate": 1.850124232337912e-05, + "loss": 0.2524, + "step": 16985 + }, + { + "epoch": 0.79, + "learning_rate": 1.850077352210398e-05, + "loss": 0.2069, + "step": 16990 + }, + { + "epoch": 0.79, + "learning_rate": 1.850030472082884e-05, + "loss": 0.2817, + "step": 16995 + }, + { + "epoch": 0.79, + "learning_rate": 1.8499835919553704e-05, + "loss": 0.5592, + "step": 17000 + }, + { + "epoch": 0.79, + "learning_rate": 1.8499367118278564e-05, + "loss": 0.2087, + "step": 17005 + }, + { + "epoch": 0.79, + "learning_rate": 1.8498898317003423e-05, + "loss": 0.1038, + "step": 17010 + }, + { + "epoch": 0.79, + "learning_rate": 1.8498429515728283e-05, + "loss": 0.1234, + "step": 17015 + }, + { + "epoch": 0.79, + "learning_rate": 1.8497960714453143e-05, + "loss": 0.1361, + "step": 17020 + }, + { + "epoch": 0.79, + "learning_rate": 1.8497491913178007e-05, + "loss": 0.1025, + "step": 17025 + }, + { + "epoch": 0.79, + "learning_rate": 1.8497023111902867e-05, + "loss": 0.1794, + "step": 17030 + }, + { + "epoch": 0.79, + "learning_rate": 1.8496554310627727e-05, + "loss": 0.2281, + "step": 17035 + }, + { + "epoch": 0.8, + "learning_rate": 1.8496085509352586e-05, + "loss": 0.2468, + "step": 17040 + }, + { + "epoch": 0.8, + "learning_rate": 1.849561670807745e-05, + "loss": 0.3502, + "step": 17045 + }, + { + "epoch": 0.8, + "learning_rate": 1.849514790680231e-05, + "loss": 0.5949, + "step": 17050 + }, + { + "epoch": 0.8, + "learning_rate": 1.849467910552717e-05, + "loss": 0.2099, + "step": 17055 + }, + { + "epoch": 0.8, + "learning_rate": 1.849421030425203e-05, + "loss": 0.0767, + "step": 17060 + }, + { + "epoch": 0.8, + "learning_rate": 1.849374150297689e-05, + "loss": 0.142, + "step": 17065 + }, + { + "epoch": 0.8, + "learning_rate": 1.849327270170175e-05, + "loss": 0.1833, + "step": 17070 + }, + { + "epoch": 0.8, + "learning_rate": 1.849280390042661e-05, + "loss": 0.0711, + "step": 17075 + }, + { + "epoch": 0.8, + "learning_rate": 1.849233509915147e-05, + "loss": 0.1719, + "step": 17080 + }, + { + "epoch": 0.8, + "learning_rate": 1.849186629787633e-05, + "loss": 0.25, + "step": 17085 + }, + { + "epoch": 0.8, + "learning_rate": 1.8491397496601193e-05, + "loss": 0.1838, + "step": 17090 + }, + { + "epoch": 0.8, + "learning_rate": 1.8490928695326052e-05, + "loss": 0.3021, + "step": 17095 + }, + { + "epoch": 0.8, + "learning_rate": 1.8490459894050912e-05, + "loss": 0.5295, + "step": 17100 + }, + { + "epoch": 0.8, + "learning_rate": 1.8489991092775776e-05, + "loss": 0.2923, + "step": 17105 + }, + { + "epoch": 0.8, + "learning_rate": 1.8489522291500636e-05, + "loss": 0.0491, + "step": 17110 + }, + { + "epoch": 0.8, + "learning_rate": 1.8489053490225496e-05, + "loss": 0.0952, + "step": 17115 + }, + { + "epoch": 0.8, + "learning_rate": 1.8488584688950356e-05, + "loss": 0.0852, + "step": 17120 + }, + { + "epoch": 0.8, + "learning_rate": 1.8488115887675215e-05, + "loss": 0.1166, + "step": 17125 + }, + { + "epoch": 0.8, + "learning_rate": 1.8487647086400075e-05, + "loss": 0.1835, + "step": 17130 + }, + { + "epoch": 0.8, + "learning_rate": 1.8487178285124935e-05, + "loss": 0.2004, + "step": 17135 + }, + { + "epoch": 0.8, + "learning_rate": 1.84867094838498e-05, + "loss": 0.2276, + "step": 17140 + }, + { + "epoch": 0.8, + "learning_rate": 1.848624068257466e-05, + "loss": 0.3723, + "step": 17145 + }, + { + "epoch": 0.8, + "learning_rate": 1.848577188129952e-05, + "loss": 0.4231, + "step": 17150 + }, + { + "epoch": 0.8, + "learning_rate": 1.848530308002438e-05, + "loss": 0.2236, + "step": 17155 + }, + { + "epoch": 0.8, + "learning_rate": 1.848483427874924e-05, + "loss": 0.0599, + "step": 17160 + }, + { + "epoch": 0.8, + "learning_rate": 1.84843654774741e-05, + "loss": 0.1565, + "step": 17165 + }, + { + "epoch": 0.8, + "learning_rate": 1.848389667619896e-05, + "loss": 0.1829, + "step": 17170 + }, + { + "epoch": 0.8, + "learning_rate": 1.848342787492382e-05, + "loss": 0.111, + "step": 17175 + }, + { + "epoch": 0.8, + "learning_rate": 1.848295907364868e-05, + "loss": 0.1442, + "step": 17180 + }, + { + "epoch": 0.8, + "learning_rate": 1.8482490272373545e-05, + "loss": 0.2144, + "step": 17185 + }, + { + "epoch": 0.8, + "learning_rate": 1.8482021471098405e-05, + "loss": 0.2541, + "step": 17190 + }, + { + "epoch": 0.8, + "learning_rate": 1.8481552669823265e-05, + "loss": 0.2623, + "step": 17195 + }, + { + "epoch": 0.8, + "learning_rate": 1.8481083868548125e-05, + "loss": 0.4617, + "step": 17200 + }, + { + "epoch": 0.8, + "learning_rate": 1.8480615067272985e-05, + "loss": 0.2354, + "step": 17205 + }, + { + "epoch": 0.8, + "learning_rate": 1.8480146265997845e-05, + "loss": 0.1225, + "step": 17210 + }, + { + "epoch": 0.8, + "learning_rate": 1.8479677464722704e-05, + "loss": 0.0805, + "step": 17215 + }, + { + "epoch": 0.8, + "learning_rate": 1.8479208663447564e-05, + "loss": 0.1008, + "step": 17220 + }, + { + "epoch": 0.8, + "learning_rate": 1.8478739862172424e-05, + "loss": 0.1341, + "step": 17225 + }, + { + "epoch": 0.8, + "learning_rate": 1.8478271060897288e-05, + "loss": 0.1171, + "step": 17230 + }, + { + "epoch": 0.8, + "learning_rate": 1.8477802259622148e-05, + "loss": 0.2465, + "step": 17235 + }, + { + "epoch": 0.8, + "learning_rate": 1.8477333458347008e-05, + "loss": 0.3368, + "step": 17240 + }, + { + "epoch": 0.8, + "learning_rate": 1.8476864657071867e-05, + "loss": 0.4202, + "step": 17245 + }, + { + "epoch": 0.8, + "learning_rate": 1.847639585579673e-05, + "loss": 0.4027, + "step": 17250 + }, + { + "epoch": 0.81, + "learning_rate": 1.847592705452159e-05, + "loss": 0.2201, + "step": 17255 + }, + { + "epoch": 0.81, + "learning_rate": 1.847545825324645e-05, + "loss": 0.0969, + "step": 17260 + }, + { + "epoch": 0.81, + "learning_rate": 1.847498945197131e-05, + "loss": 0.0765, + "step": 17265 + }, + { + "epoch": 0.81, + "learning_rate": 1.847452065069617e-05, + "loss": 0.198, + "step": 17270 + }, + { + "epoch": 0.81, + "learning_rate": 1.8474051849421034e-05, + "loss": 0.153, + "step": 17275 + }, + { + "epoch": 0.81, + "learning_rate": 1.8473583048145894e-05, + "loss": 0.1368, + "step": 17280 + }, + { + "epoch": 0.81, + "learning_rate": 1.8473114246870754e-05, + "loss": 0.1888, + "step": 17285 + }, + { + "epoch": 0.81, + "learning_rate": 1.8472645445595614e-05, + "loss": 0.1688, + "step": 17290 + }, + { + "epoch": 0.81, + "learning_rate": 1.8472176644320474e-05, + "loss": 0.2878, + "step": 17295 + }, + { + "epoch": 0.81, + "learning_rate": 1.8471707843045333e-05, + "loss": 0.5695, + "step": 17300 + }, + { + "epoch": 0.81, + "learning_rate": 1.8471239041770193e-05, + "loss": 0.2027, + "step": 17305 + }, + { + "epoch": 0.81, + "learning_rate": 1.8470770240495057e-05, + "loss": 0.0891, + "step": 17310 + }, + { + "epoch": 0.81, + "learning_rate": 1.8470301439219917e-05, + "loss": 0.109, + "step": 17315 + }, + { + "epoch": 0.81, + "learning_rate": 1.8469832637944777e-05, + "loss": 0.1004, + "step": 17320 + }, + { + "epoch": 0.81, + "learning_rate": 1.8469363836669637e-05, + "loss": 0.0984, + "step": 17325 + }, + { + "epoch": 0.81, + "learning_rate": 1.84688950353945e-05, + "loss": 0.19, + "step": 17330 + }, + { + "epoch": 0.81, + "learning_rate": 1.846842623411936e-05, + "loss": 0.1577, + "step": 17335 + }, + { + "epoch": 0.81, + "learning_rate": 1.846795743284422e-05, + "loss": 0.3391, + "step": 17340 + }, + { + "epoch": 0.81, + "learning_rate": 1.846748863156908e-05, + "loss": 0.2577, + "step": 17345 + }, + { + "epoch": 0.81, + "learning_rate": 1.846701983029394e-05, + "loss": 0.4126, + "step": 17350 + }, + { + "epoch": 0.81, + "learning_rate": 1.84665510290188e-05, + "loss": 0.1848, + "step": 17355 + }, + { + "epoch": 0.81, + "learning_rate": 1.846608222774366e-05, + "loss": 0.1064, + "step": 17360 + }, + { + "epoch": 0.81, + "learning_rate": 1.846561342646852e-05, + "loss": 0.1071, + "step": 17365 + }, + { + "epoch": 0.81, + "learning_rate": 1.8465144625193383e-05, + "loss": 0.1296, + "step": 17370 + }, + { + "epoch": 0.81, + "learning_rate": 1.8464675823918243e-05, + "loss": 0.1236, + "step": 17375 + }, + { + "epoch": 0.81, + "learning_rate": 1.8464207022643103e-05, + "loss": 0.3178, + "step": 17380 + }, + { + "epoch": 0.81, + "learning_rate": 1.8463738221367963e-05, + "loss": 0.3502, + "step": 17385 + }, + { + "epoch": 0.81, + "learning_rate": 1.8463269420092826e-05, + "loss": 0.3099, + "step": 17390 + }, + { + "epoch": 0.81, + "learning_rate": 1.8462800618817686e-05, + "loss": 0.2417, + "step": 17395 + }, + { + "epoch": 0.81, + "learning_rate": 1.8462331817542546e-05, + "loss": 0.4998, + "step": 17400 + }, + { + "epoch": 0.81, + "learning_rate": 1.8461863016267406e-05, + "loss": 0.2151, + "step": 17405 + }, + { + "epoch": 0.81, + "learning_rate": 1.8461394214992266e-05, + "loss": 0.0575, + "step": 17410 + }, + { + "epoch": 0.81, + "learning_rate": 1.846092541371713e-05, + "loss": 0.122, + "step": 17415 + }, + { + "epoch": 0.81, + "learning_rate": 1.846045661244199e-05, + "loss": 0.1077, + "step": 17420 + }, + { + "epoch": 0.81, + "learning_rate": 1.845998781116685e-05, + "loss": 0.1329, + "step": 17425 + }, + { + "epoch": 0.81, + "learning_rate": 1.845951900989171e-05, + "loss": 0.1299, + "step": 17430 + }, + { + "epoch": 0.81, + "learning_rate": 1.845905020861657e-05, + "loss": 0.2726, + "step": 17435 + }, + { + "epoch": 0.81, + "learning_rate": 1.845858140734143e-05, + "loss": 0.1639, + "step": 17440 + }, + { + "epoch": 0.81, + "learning_rate": 1.845811260606629e-05, + "loss": 0.4012, + "step": 17445 + }, + { + "epoch": 0.81, + "learning_rate": 1.845764380479115e-05, + "loss": 0.3916, + "step": 17450 + }, + { + "epoch": 0.81, + "learning_rate": 1.8457175003516012e-05, + "loss": 0.2054, + "step": 17455 + }, + { + "epoch": 0.81, + "learning_rate": 1.8456706202240872e-05, + "loss": 0.0669, + "step": 17460 + }, + { + "epoch": 0.81, + "learning_rate": 1.845623740096573e-05, + "loss": 0.1326, + "step": 17465 + }, + { + "epoch": 0.82, + "learning_rate": 1.8455768599690595e-05, + "loss": 0.1697, + "step": 17470 + }, + { + "epoch": 0.82, + "learning_rate": 1.8455299798415455e-05, + "loss": 0.1535, + "step": 17475 + }, + { + "epoch": 0.82, + "learning_rate": 1.8454830997140315e-05, + "loss": 0.1638, + "step": 17480 + }, + { + "epoch": 0.82, + "learning_rate": 1.8454362195865175e-05, + "loss": 0.1457, + "step": 17485 + }, + { + "epoch": 0.82, + "learning_rate": 1.8453893394590035e-05, + "loss": 0.2595, + "step": 17490 + }, + { + "epoch": 0.82, + "learning_rate": 1.8453424593314895e-05, + "loss": 0.2966, + "step": 17495 + }, + { + "epoch": 0.82, + "learning_rate": 1.8452955792039755e-05, + "loss": 0.6608, + "step": 17500 + }, + { + "epoch": 0.82, + "learning_rate": 1.8452486990764614e-05, + "loss": 0.1939, + "step": 17505 + }, + { + "epoch": 0.82, + "learning_rate": 1.8452018189489478e-05, + "loss": 0.1323, + "step": 17510 + }, + { + "epoch": 0.82, + "learning_rate": 1.8451549388214338e-05, + "loss": 0.1026, + "step": 17515 + }, + { + "epoch": 0.82, + "learning_rate": 1.8451080586939198e-05, + "loss": 0.1229, + "step": 17520 + }, + { + "epoch": 0.82, + "learning_rate": 1.8450611785664058e-05, + "loss": 0.0851, + "step": 17525 + }, + { + "epoch": 0.82, + "learning_rate": 1.8450142984388918e-05, + "loss": 0.1832, + "step": 17530 + }, + { + "epoch": 0.82, + "learning_rate": 1.844967418311378e-05, + "loss": 0.1868, + "step": 17535 + }, + { + "epoch": 0.82, + "learning_rate": 1.844920538183864e-05, + "loss": 0.2952, + "step": 17540 + }, + { + "epoch": 0.82, + "learning_rate": 1.84487365805635e-05, + "loss": 0.2594, + "step": 17545 + }, + { + "epoch": 0.82, + "learning_rate": 1.844826777928836e-05, + "loss": 0.3857, + "step": 17550 + }, + { + "epoch": 0.82, + "learning_rate": 1.8447798978013224e-05, + "loss": 0.2327, + "step": 17555 + }, + { + "epoch": 0.82, + "learning_rate": 1.8447330176738084e-05, + "loss": 0.0328, + "step": 17560 + }, + { + "epoch": 0.82, + "learning_rate": 1.8446861375462944e-05, + "loss": 0.0574, + "step": 17565 + }, + { + "epoch": 0.82, + "learning_rate": 1.8446392574187804e-05, + "loss": 0.1277, + "step": 17570 + }, + { + "epoch": 0.82, + "learning_rate": 1.8445923772912664e-05, + "loss": 0.2752, + "step": 17575 + }, + { + "epoch": 0.82, + "learning_rate": 1.8445454971637524e-05, + "loss": 0.1907, + "step": 17580 + }, + { + "epoch": 0.82, + "learning_rate": 1.8444986170362384e-05, + "loss": 0.2263, + "step": 17585 + }, + { + "epoch": 0.82, + "learning_rate": 1.8444517369087244e-05, + "loss": 0.2014, + "step": 17590 + }, + { + "epoch": 0.82, + "learning_rate": 1.8444048567812103e-05, + "loss": 0.3681, + "step": 17595 + }, + { + "epoch": 0.82, + "learning_rate": 1.8443579766536967e-05, + "loss": 0.53, + "step": 17600 + }, + { + "epoch": 0.82, + "learning_rate": 1.8443110965261827e-05, + "loss": 0.1759, + "step": 17605 + }, + { + "epoch": 0.82, + "learning_rate": 1.8442642163986687e-05, + "loss": 0.106, + "step": 17610 + }, + { + "epoch": 0.82, + "learning_rate": 1.844217336271155e-05, + "loss": 0.0909, + "step": 17615 + }, + { + "epoch": 0.82, + "learning_rate": 1.844170456143641e-05, + "loss": 0.0902, + "step": 17620 + }, + { + "epoch": 0.82, + "learning_rate": 1.844123576016127e-05, + "loss": 0.1365, + "step": 17625 + }, + { + "epoch": 0.82, + "learning_rate": 1.844076695888613e-05, + "loss": 0.1976, + "step": 17630 + }, + { + "epoch": 0.82, + "learning_rate": 1.844029815761099e-05, + "loss": 0.2058, + "step": 17635 + }, + { + "epoch": 0.82, + "learning_rate": 1.843982935633585e-05, + "loss": 0.1611, + "step": 17640 + }, + { + "epoch": 0.82, + "learning_rate": 1.843936055506071e-05, + "loss": 0.2566, + "step": 17645 + }, + { + "epoch": 0.82, + "learning_rate": 1.8438891753785573e-05, + "loss": 0.5291, + "step": 17650 + }, + { + "epoch": 0.82, + "learning_rate": 1.8438422952510433e-05, + "loss": 0.1834, + "step": 17655 + }, + { + "epoch": 0.82, + "learning_rate": 1.8437954151235293e-05, + "loss": 0.1088, + "step": 17660 + }, + { + "epoch": 0.82, + "learning_rate": 1.8437485349960153e-05, + "loss": 0.0704, + "step": 17665 + }, + { + "epoch": 0.82, + "learning_rate": 1.8437016548685013e-05, + "loss": 0.1562, + "step": 17670 + }, + { + "epoch": 0.82, + "learning_rate": 1.8436547747409873e-05, + "loss": 0.1251, + "step": 17675 + }, + { + "epoch": 0.82, + "learning_rate": 1.8436078946134736e-05, + "loss": 0.11, + "step": 17680 + }, + { + "epoch": 0.83, + "learning_rate": 1.8435610144859596e-05, + "loss": 0.1469, + "step": 17685 + }, + { + "epoch": 0.83, + "learning_rate": 1.8435141343584456e-05, + "loss": 0.5929, + "step": 17690 + }, + { + "epoch": 0.83, + "learning_rate": 1.843467254230932e-05, + "loss": 0.312, + "step": 17695 + }, + { + "epoch": 0.83, + "learning_rate": 1.843420374103418e-05, + "loss": 0.627, + "step": 17700 + }, + { + "epoch": 0.83, + "learning_rate": 1.843373493975904e-05, + "loss": 0.2812, + "step": 17705 + }, + { + "epoch": 0.83, + "learning_rate": 1.84332661384839e-05, + "loss": 0.05, + "step": 17710 + }, + { + "epoch": 0.83, + "learning_rate": 1.843279733720876e-05, + "loss": 0.0554, + "step": 17715 + }, + { + "epoch": 0.83, + "learning_rate": 1.843232853593362e-05, + "loss": 0.1176, + "step": 17720 + }, + { + "epoch": 0.83, + "learning_rate": 1.843185973465848e-05, + "loss": 0.1333, + "step": 17725 + }, + { + "epoch": 0.83, + "learning_rate": 1.843139093338334e-05, + "loss": 0.1865, + "step": 17730 + }, + { + "epoch": 0.83, + "learning_rate": 1.84309221321082e-05, + "loss": 0.2019, + "step": 17735 + }, + { + "epoch": 0.83, + "learning_rate": 1.8430453330833062e-05, + "loss": 0.1893, + "step": 17740 + }, + { + "epoch": 0.83, + "learning_rate": 1.8429984529557922e-05, + "loss": 0.1795, + "step": 17745 + }, + { + "epoch": 0.83, + "learning_rate": 1.8429515728282782e-05, + "loss": 0.4059, + "step": 17750 + }, + { + "epoch": 0.83, + "learning_rate": 1.842904692700764e-05, + "loss": 0.2472, + "step": 17755 + }, + { + "epoch": 0.83, + "learning_rate": 1.8428578125732505e-05, + "loss": 0.1022, + "step": 17760 + }, + { + "epoch": 0.83, + "learning_rate": 1.8428109324457365e-05, + "loss": 0.1209, + "step": 17765 + }, + { + "epoch": 0.83, + "learning_rate": 1.8427640523182225e-05, + "loss": 0.13, + "step": 17770 + }, + { + "epoch": 0.83, + "learning_rate": 1.8427171721907085e-05, + "loss": 0.1884, + "step": 17775 + }, + { + "epoch": 0.83, + "learning_rate": 1.8426702920631945e-05, + "loss": 0.2003, + "step": 17780 + }, + { + "epoch": 0.83, + "learning_rate": 1.8426234119356805e-05, + "loss": 0.1951, + "step": 17785 + }, + { + "epoch": 0.83, + "learning_rate": 1.8425765318081668e-05, + "loss": 0.1778, + "step": 17790 + }, + { + "epoch": 0.83, + "learning_rate": 1.8425296516806528e-05, + "loss": 0.3655, + "step": 17795 + }, + { + "epoch": 0.83, + "learning_rate": 1.8424827715531388e-05, + "loss": 0.4681, + "step": 17800 + }, + { + "epoch": 0.83, + "learning_rate": 1.8424358914256248e-05, + "loss": 0.1807, + "step": 17805 + }, + { + "epoch": 0.83, + "learning_rate": 1.8423890112981108e-05, + "loss": 0.041, + "step": 17810 + }, + { + "epoch": 0.83, + "learning_rate": 1.8423421311705968e-05, + "loss": 0.0738, + "step": 17815 + }, + { + "epoch": 0.83, + "learning_rate": 1.842295251043083e-05, + "loss": 0.1138, + "step": 17820 + }, + { + "epoch": 0.83, + "learning_rate": 1.842248370915569e-05, + "loss": 0.1336, + "step": 17825 + }, + { + "epoch": 0.83, + "learning_rate": 1.842201490788055e-05, + "loss": 0.2036, + "step": 17830 + }, + { + "epoch": 0.83, + "learning_rate": 1.842154610660541e-05, + "loss": 0.166, + "step": 17835 + }, + { + "epoch": 0.83, + "learning_rate": 1.8421077305330274e-05, + "loss": 0.1972, + "step": 17840 + }, + { + "epoch": 0.83, + "learning_rate": 1.8420608504055134e-05, + "loss": 0.2675, + "step": 17845 + }, + { + "epoch": 0.83, + "learning_rate": 1.8420139702779994e-05, + "loss": 0.431, + "step": 17850 + }, + { + "epoch": 0.83, + "learning_rate": 1.8419670901504854e-05, + "loss": 0.2293, + "step": 17855 + }, + { + "epoch": 0.83, + "learning_rate": 1.8419202100229714e-05, + "loss": 0.0781, + "step": 17860 + }, + { + "epoch": 0.83, + "learning_rate": 1.8418733298954574e-05, + "loss": 0.0761, + "step": 17865 + }, + { + "epoch": 0.83, + "learning_rate": 1.8418264497679434e-05, + "loss": 0.1226, + "step": 17870 + }, + { + "epoch": 0.83, + "learning_rate": 1.8417795696404294e-05, + "loss": 0.219, + "step": 17875 + }, + { + "epoch": 0.83, + "learning_rate": 1.8417326895129157e-05, + "loss": 0.2012, + "step": 17880 + }, + { + "epoch": 0.83, + "learning_rate": 1.8416858093854017e-05, + "loss": 0.159, + "step": 17885 + }, + { + "epoch": 0.83, + "learning_rate": 1.8416389292578877e-05, + "loss": 0.2031, + "step": 17890 + }, + { + "epoch": 0.84, + "learning_rate": 1.8415920491303737e-05, + "loss": 0.3211, + "step": 17895 + }, + { + "epoch": 0.84, + "learning_rate": 1.84154516900286e-05, + "loss": 0.5196, + "step": 17900 + }, + { + "epoch": 0.84, + "learning_rate": 1.841498288875346e-05, + "loss": 0.2361, + "step": 17905 + }, + { + "epoch": 0.84, + "learning_rate": 1.841451408747832e-05, + "loss": 0.2127, + "step": 17910 + }, + { + "epoch": 0.84, + "learning_rate": 1.841404528620318e-05, + "loss": 0.1421, + "step": 17915 + }, + { + "epoch": 0.84, + "learning_rate": 1.841357648492804e-05, + "loss": 0.1517, + "step": 17920 + }, + { + "epoch": 0.84, + "learning_rate": 1.8413107683652903e-05, + "loss": 0.138, + "step": 17925 + }, + { + "epoch": 0.84, + "learning_rate": 1.8412638882377763e-05, + "loss": 0.2045, + "step": 17930 + }, + { + "epoch": 0.84, + "learning_rate": 1.8412170081102623e-05, + "loss": 0.2729, + "step": 17935 + }, + { + "epoch": 0.84, + "learning_rate": 1.8411701279827483e-05, + "loss": 0.2523, + "step": 17940 + }, + { + "epoch": 0.84, + "learning_rate": 1.8411232478552343e-05, + "loss": 0.3385, + "step": 17945 + }, + { + "epoch": 0.84, + "learning_rate": 1.8410763677277203e-05, + "loss": 0.4665, + "step": 17950 + }, + { + "epoch": 0.84, + "learning_rate": 1.8410294876002063e-05, + "loss": 0.2603, + "step": 17955 + }, + { + "epoch": 0.84, + "learning_rate": 1.8409826074726923e-05, + "loss": 0.0925, + "step": 17960 + }, + { + "epoch": 0.84, + "learning_rate": 1.8409357273451786e-05, + "loss": 0.1406, + "step": 17965 + }, + { + "epoch": 0.84, + "learning_rate": 1.8408888472176646e-05, + "loss": 0.1379, + "step": 17970 + }, + { + "epoch": 0.84, + "learning_rate": 1.8408419670901506e-05, + "loss": 0.0881, + "step": 17975 + }, + { + "epoch": 0.84, + "learning_rate": 1.840795086962637e-05, + "loss": 0.158, + "step": 17980 + }, + { + "epoch": 0.84, + "learning_rate": 1.840748206835123e-05, + "loss": 0.2092, + "step": 17985 + }, + { + "epoch": 0.84, + "learning_rate": 1.840701326707609e-05, + "loss": 0.349, + "step": 17990 + }, + { + "epoch": 0.84, + "learning_rate": 1.840654446580095e-05, + "loss": 0.4693, + "step": 17995 + }, + { + "epoch": 0.84, + "learning_rate": 1.840607566452581e-05, + "loss": 0.4295, + "step": 18000 + }, + { + "epoch": 0.84, + "learning_rate": 1.840560686325067e-05, + "loss": 0.225, + "step": 18005 + }, + { + "epoch": 0.84, + "learning_rate": 1.840513806197553e-05, + "loss": 0.1049, + "step": 18010 + }, + { + "epoch": 0.84, + "learning_rate": 1.840466926070039e-05, + "loss": 0.1065, + "step": 18015 + }, + { + "epoch": 0.84, + "learning_rate": 1.8404200459425252e-05, + "loss": 0.1185, + "step": 18020 + }, + { + "epoch": 0.84, + "learning_rate": 1.8403731658150112e-05, + "loss": 0.2128, + "step": 18025 + }, + { + "epoch": 0.84, + "learning_rate": 1.8403262856874972e-05, + "loss": 0.1386, + "step": 18030 + }, + { + "epoch": 0.84, + "learning_rate": 1.8402794055599832e-05, + "loss": 0.2085, + "step": 18035 + }, + { + "epoch": 0.84, + "learning_rate": 1.8402325254324692e-05, + "loss": 0.2312, + "step": 18040 + }, + { + "epoch": 0.84, + "learning_rate": 1.8401856453049555e-05, + "loss": 0.3012, + "step": 18045 + }, + { + "epoch": 0.84, + "learning_rate": 1.8401387651774415e-05, + "loss": 0.5115, + "step": 18050 + }, + { + "epoch": 0.84, + "learning_rate": 1.8400918850499275e-05, + "loss": 0.2569, + "step": 18055 + }, + { + "epoch": 0.84, + "learning_rate": 1.8400450049224135e-05, + "loss": 0.1007, + "step": 18060 + }, + { + "epoch": 0.84, + "learning_rate": 1.8399981247948998e-05, + "loss": 0.1214, + "step": 18065 + }, + { + "epoch": 0.84, + "learning_rate": 1.8399512446673858e-05, + "loss": 0.1432, + "step": 18070 + }, + { + "epoch": 0.84, + "learning_rate": 1.8399043645398718e-05, + "loss": 0.1994, + "step": 18075 + }, + { + "epoch": 0.84, + "learning_rate": 1.8398574844123578e-05, + "loss": 0.1827, + "step": 18080 + }, + { + "epoch": 0.84, + "learning_rate": 1.8398106042848438e-05, + "loss": 0.2193, + "step": 18085 + }, + { + "epoch": 0.84, + "learning_rate": 1.8397637241573298e-05, + "loss": 0.2757, + "step": 18090 + }, + { + "epoch": 0.84, + "learning_rate": 1.8397168440298158e-05, + "loss": 0.2565, + "step": 18095 + }, + { + "epoch": 0.84, + "learning_rate": 1.8396699639023018e-05, + "loss": 0.4674, + "step": 18100 + }, + { + "epoch": 0.84, + "learning_rate": 1.8396230837747878e-05, + "loss": 0.198, + "step": 18105 + }, + { + "epoch": 0.85, + "learning_rate": 1.839576203647274e-05, + "loss": 0.0999, + "step": 18110 + }, + { + "epoch": 0.85, + "learning_rate": 1.83952932351976e-05, + "loss": 0.0993, + "step": 18115 + }, + { + "epoch": 0.85, + "learning_rate": 1.839482443392246e-05, + "loss": 0.1571, + "step": 18120 + }, + { + "epoch": 0.85, + "learning_rate": 1.8394355632647324e-05, + "loss": 0.0981, + "step": 18125 + }, + { + "epoch": 0.85, + "learning_rate": 1.8393886831372184e-05, + "loss": 0.1901, + "step": 18130 + }, + { + "epoch": 0.85, + "learning_rate": 1.8393418030097044e-05, + "loss": 0.248, + "step": 18135 + }, + { + "epoch": 0.85, + "learning_rate": 1.8392949228821904e-05, + "loss": 0.2168, + "step": 18140 + }, + { + "epoch": 0.85, + "learning_rate": 1.8392480427546764e-05, + "loss": 0.2339, + "step": 18145 + }, + { + "epoch": 0.85, + "learning_rate": 1.8392011626271624e-05, + "loss": 0.5106, + "step": 18150 + }, + { + "epoch": 0.85, + "learning_rate": 1.8391542824996484e-05, + "loss": 0.2561, + "step": 18155 + }, + { + "epoch": 0.85, + "learning_rate": 1.8391074023721347e-05, + "loss": 0.1187, + "step": 18160 + }, + { + "epoch": 0.85, + "learning_rate": 1.8390605222446207e-05, + "loss": 0.0982, + "step": 18165 + }, + { + "epoch": 0.85, + "learning_rate": 1.8390136421171067e-05, + "loss": 0.0904, + "step": 18170 + }, + { + "epoch": 0.85, + "learning_rate": 1.8389667619895927e-05, + "loss": 0.1419, + "step": 18175 + }, + { + "epoch": 0.85, + "learning_rate": 1.8389198818620787e-05, + "loss": 0.1005, + "step": 18180 + }, + { + "epoch": 0.85, + "learning_rate": 1.8388730017345647e-05, + "loss": 0.1154, + "step": 18185 + }, + { + "epoch": 0.85, + "learning_rate": 1.838826121607051e-05, + "loss": 0.2446, + "step": 18190 + }, + { + "epoch": 0.85, + "learning_rate": 1.838779241479537e-05, + "loss": 0.3814, + "step": 18195 + }, + { + "epoch": 0.85, + "learning_rate": 1.838732361352023e-05, + "loss": 0.4632, + "step": 18200 + }, + { + "epoch": 0.85, + "learning_rate": 1.8386854812245093e-05, + "loss": 0.1842, + "step": 18205 + }, + { + "epoch": 0.85, + "learning_rate": 1.8386386010969953e-05, + "loss": 0.1311, + "step": 18210 + }, + { + "epoch": 0.85, + "learning_rate": 1.8385917209694813e-05, + "loss": 0.1317, + "step": 18215 + }, + { + "epoch": 0.85, + "learning_rate": 1.8385448408419673e-05, + "loss": 0.1859, + "step": 18220 + }, + { + "epoch": 0.85, + "learning_rate": 1.8384979607144533e-05, + "loss": 0.0955, + "step": 18225 + }, + { + "epoch": 0.85, + "learning_rate": 1.8384510805869393e-05, + "loss": 0.1593, + "step": 18230 + }, + { + "epoch": 0.85, + "learning_rate": 1.8384042004594253e-05, + "loss": 0.2589, + "step": 18235 + }, + { + "epoch": 0.85, + "learning_rate": 1.8383573203319113e-05, + "loss": 0.3052, + "step": 18240 + }, + { + "epoch": 0.85, + "learning_rate": 1.8383104402043973e-05, + "loss": 0.2756, + "step": 18245 + }, + { + "epoch": 0.85, + "learning_rate": 1.8382635600768836e-05, + "loss": 0.4911, + "step": 18250 + }, + { + "epoch": 0.85, + "learning_rate": 1.8382166799493696e-05, + "loss": 0.1806, + "step": 18255 + }, + { + "epoch": 0.85, + "learning_rate": 1.8381697998218556e-05, + "loss": 0.0599, + "step": 18260 + }, + { + "epoch": 0.85, + "learning_rate": 1.8381229196943416e-05, + "loss": 0.0742, + "step": 18265 + }, + { + "epoch": 0.85, + "learning_rate": 1.838076039566828e-05, + "loss": 0.0986, + "step": 18270 + }, + { + "epoch": 0.85, + "learning_rate": 1.838029159439314e-05, + "loss": 0.201, + "step": 18275 + }, + { + "epoch": 0.85, + "learning_rate": 1.8379822793118e-05, + "loss": 0.2901, + "step": 18280 + }, + { + "epoch": 0.85, + "learning_rate": 1.837935399184286e-05, + "loss": 0.1967, + "step": 18285 + }, + { + "epoch": 0.85, + "learning_rate": 1.837888519056772e-05, + "loss": 0.2117, + "step": 18290 + }, + { + "epoch": 0.85, + "learning_rate": 1.837841638929258e-05, + "loss": 0.2381, + "step": 18295 + }, + { + "epoch": 0.85, + "learning_rate": 1.8377947588017442e-05, + "loss": 0.473, + "step": 18300 + }, + { + "epoch": 0.85, + "learning_rate": 1.8377478786742302e-05, + "loss": 0.2497, + "step": 18305 + }, + { + "epoch": 0.85, + "learning_rate": 1.8377009985467162e-05, + "loss": 0.0581, + "step": 18310 + }, + { + "epoch": 0.85, + "learning_rate": 1.8376541184192022e-05, + "loss": 0.0962, + "step": 18315 + }, + { + "epoch": 0.85, + "learning_rate": 1.8376072382916882e-05, + "loss": 0.089, + "step": 18320 + }, + { + "epoch": 0.86, + "learning_rate": 1.8375603581641742e-05, + "loss": 0.2265, + "step": 18325 + }, + { + "epoch": 0.86, + "learning_rate": 1.8375134780366605e-05, + "loss": 0.1076, + "step": 18330 + }, + { + "epoch": 0.86, + "learning_rate": 1.8374665979091465e-05, + "loss": 0.2302, + "step": 18335 + }, + { + "epoch": 0.86, + "learning_rate": 1.8374197177816325e-05, + "loss": 0.2914, + "step": 18340 + }, + { + "epoch": 0.86, + "learning_rate": 1.837372837654119e-05, + "loss": 0.2649, + "step": 18345 + }, + { + "epoch": 0.86, + "learning_rate": 1.8373259575266048e-05, + "loss": 0.7229, + "step": 18350 + }, + { + "epoch": 0.86, + "learning_rate": 1.8372790773990908e-05, + "loss": 0.2282, + "step": 18355 + }, + { + "epoch": 0.86, + "learning_rate": 1.8372321972715768e-05, + "loss": 0.0604, + "step": 18360 + }, + { + "epoch": 0.86, + "learning_rate": 1.8371853171440628e-05, + "loss": 0.1012, + "step": 18365 + }, + { + "epoch": 0.86, + "learning_rate": 1.8371384370165488e-05, + "loss": 0.1669, + "step": 18370 + }, + { + "epoch": 0.86, + "learning_rate": 1.8370915568890348e-05, + "loss": 0.1804, + "step": 18375 + }, + { + "epoch": 0.86, + "learning_rate": 1.8370446767615208e-05, + "loss": 0.202, + "step": 18380 + }, + { + "epoch": 0.86, + "learning_rate": 1.8369977966340068e-05, + "loss": 0.1953, + "step": 18385 + }, + { + "epoch": 0.86, + "learning_rate": 1.836950916506493e-05, + "loss": 0.2295, + "step": 18390 + }, + { + "epoch": 0.86, + "learning_rate": 1.836904036378979e-05, + "loss": 0.2345, + "step": 18395 + }, + { + "epoch": 0.86, + "learning_rate": 1.836857156251465e-05, + "loss": 0.4057, + "step": 18400 + }, + { + "epoch": 0.86, + "learning_rate": 1.836810276123951e-05, + "loss": 0.206, + "step": 18405 + }, + { + "epoch": 0.86, + "learning_rate": 1.8367633959964374e-05, + "loss": 0.0997, + "step": 18410 + }, + { + "epoch": 0.86, + "learning_rate": 1.8367165158689234e-05, + "loss": 0.0887, + "step": 18415 + }, + { + "epoch": 0.86, + "learning_rate": 1.8366696357414094e-05, + "loss": 0.1298, + "step": 18420 + }, + { + "epoch": 0.86, + "learning_rate": 1.8366227556138954e-05, + "loss": 0.198, + "step": 18425 + }, + { + "epoch": 0.86, + "learning_rate": 1.8365758754863814e-05, + "loss": 0.2134, + "step": 18430 + }, + { + "epoch": 0.86, + "learning_rate": 1.8365289953588674e-05, + "loss": 0.1678, + "step": 18435 + }, + { + "epoch": 0.86, + "learning_rate": 1.8364821152313537e-05, + "loss": 0.2678, + "step": 18440 + }, + { + "epoch": 0.86, + "learning_rate": 1.8364352351038397e-05, + "loss": 0.2635, + "step": 18445 + }, + { + "epoch": 0.86, + "learning_rate": 1.8363883549763257e-05, + "loss": 0.6666, + "step": 18450 + }, + { + "epoch": 0.86, + "learning_rate": 1.8363414748488117e-05, + "loss": 0.1784, + "step": 18455 + }, + { + "epoch": 0.86, + "learning_rate": 1.8362945947212977e-05, + "loss": 0.0755, + "step": 18460 + }, + { + "epoch": 0.86, + "learning_rate": 1.8362477145937837e-05, + "loss": 0.1487, + "step": 18465 + }, + { + "epoch": 0.86, + "learning_rate": 1.8362008344662697e-05, + "loss": 0.0882, + "step": 18470 + }, + { + "epoch": 0.86, + "learning_rate": 1.836153954338756e-05, + "loss": 0.1553, + "step": 18475 + }, + { + "epoch": 0.86, + "learning_rate": 1.836107074211242e-05, + "loss": 0.1412, + "step": 18480 + }, + { + "epoch": 0.86, + "learning_rate": 1.836060194083728e-05, + "loss": 0.2449, + "step": 18485 + }, + { + "epoch": 0.86, + "learning_rate": 1.8360133139562143e-05, + "loss": 0.217, + "step": 18490 + }, + { + "epoch": 0.86, + "learning_rate": 1.8359664338287003e-05, + "loss": 0.3949, + "step": 18495 + }, + { + "epoch": 0.86, + "learning_rate": 1.8359195537011863e-05, + "loss": 0.4204, + "step": 18500 + }, + { + "epoch": 0.86, + "learning_rate": 1.8358726735736723e-05, + "loss": 0.1945, + "step": 18505 + }, + { + "epoch": 0.86, + "learning_rate": 1.8358257934461583e-05, + "loss": 0.0726, + "step": 18510 + }, + { + "epoch": 0.86, + "learning_rate": 1.8357789133186443e-05, + "loss": 0.1058, + "step": 18515 + }, + { + "epoch": 0.86, + "learning_rate": 1.8357320331911303e-05, + "loss": 0.0942, + "step": 18520 + }, + { + "epoch": 0.86, + "learning_rate": 1.8356851530636163e-05, + "loss": 0.1328, + "step": 18525 + }, + { + "epoch": 0.86, + "learning_rate": 1.8356382729361026e-05, + "loss": 0.1709, + "step": 18530 + }, + { + "epoch": 0.86, + "learning_rate": 1.8355913928085886e-05, + "loss": 0.1998, + "step": 18535 + }, + { + "epoch": 0.87, + "learning_rate": 1.8355445126810746e-05, + "loss": 0.3097, + "step": 18540 + }, + { + "epoch": 0.87, + "learning_rate": 1.8354976325535606e-05, + "loss": 0.3424, + "step": 18545 + }, + { + "epoch": 0.87, + "learning_rate": 1.8354507524260466e-05, + "loss": 0.4817, + "step": 18550 + }, + { + "epoch": 0.87, + "learning_rate": 1.835403872298533e-05, + "loss": 0.1701, + "step": 18555 + }, + { + "epoch": 0.87, + "learning_rate": 1.835356992171019e-05, + "loss": 0.0983, + "step": 18560 + }, + { + "epoch": 0.87, + "learning_rate": 1.835310112043505e-05, + "loss": 0.1378, + "step": 18565 + }, + { + "epoch": 0.87, + "learning_rate": 1.835263231915991e-05, + "loss": 0.1254, + "step": 18570 + }, + { + "epoch": 0.87, + "learning_rate": 1.8352163517884772e-05, + "loss": 0.1184, + "step": 18575 + }, + { + "epoch": 0.87, + "learning_rate": 1.8351694716609632e-05, + "loss": 0.1757, + "step": 18580 + }, + { + "epoch": 0.87, + "learning_rate": 1.8351225915334492e-05, + "loss": 0.1283, + "step": 18585 + }, + { + "epoch": 0.87, + "learning_rate": 1.8350757114059352e-05, + "loss": 0.259, + "step": 18590 + }, + { + "epoch": 0.87, + "learning_rate": 1.8350288312784212e-05, + "loss": 0.2993, + "step": 18595 + }, + { + "epoch": 0.87, + "learning_rate": 1.8349819511509072e-05, + "loss": 0.695, + "step": 18600 + }, + { + "epoch": 0.87, + "learning_rate": 1.8349350710233932e-05, + "loss": 0.2788, + "step": 18605 + }, + { + "epoch": 0.87, + "learning_rate": 1.8348881908958792e-05, + "loss": 0.1188, + "step": 18610 + }, + { + "epoch": 0.87, + "learning_rate": 1.8348413107683652e-05, + "loss": 0.0826, + "step": 18615 + }, + { + "epoch": 0.87, + "learning_rate": 1.8347944306408515e-05, + "loss": 0.1032, + "step": 18620 + }, + { + "epoch": 0.87, + "learning_rate": 1.8347475505133375e-05, + "loss": 0.0864, + "step": 18625 + }, + { + "epoch": 0.87, + "learning_rate": 1.8347006703858235e-05, + "loss": 0.183, + "step": 18630 + }, + { + "epoch": 0.87, + "learning_rate": 1.83465379025831e-05, + "loss": 0.1417, + "step": 18635 + }, + { + "epoch": 0.87, + "learning_rate": 1.834606910130796e-05, + "loss": 0.179, + "step": 18640 + }, + { + "epoch": 0.87, + "learning_rate": 1.8345600300032818e-05, + "loss": 0.3551, + "step": 18645 + }, + { + "epoch": 0.87, + "learning_rate": 1.8345131498757678e-05, + "loss": 0.6553, + "step": 18650 + }, + { + "epoch": 0.87, + "learning_rate": 1.8344662697482538e-05, + "loss": 0.1851, + "step": 18655 + }, + { + "epoch": 0.87, + "learning_rate": 1.8344193896207398e-05, + "loss": 0.0574, + "step": 18660 + }, + { + "epoch": 0.87, + "learning_rate": 1.8343725094932258e-05, + "loss": 0.0999, + "step": 18665 + }, + { + "epoch": 0.87, + "learning_rate": 1.834325629365712e-05, + "loss": 0.1026, + "step": 18670 + }, + { + "epoch": 0.87, + "learning_rate": 1.834278749238198e-05, + "loss": 0.1747, + "step": 18675 + }, + { + "epoch": 0.87, + "learning_rate": 1.834231869110684e-05, + "loss": 0.1283, + "step": 18680 + }, + { + "epoch": 0.87, + "learning_rate": 1.83418498898317e-05, + "loss": 0.1925, + "step": 18685 + }, + { + "epoch": 0.87, + "learning_rate": 1.834138108855656e-05, + "loss": 0.2752, + "step": 18690 + }, + { + "epoch": 0.87, + "learning_rate": 1.8340912287281424e-05, + "loss": 0.2445, + "step": 18695 + }, + { + "epoch": 0.87, + "learning_rate": 1.8340443486006284e-05, + "loss": 0.4561, + "step": 18700 + }, + { + "epoch": 0.87, + "learning_rate": 1.8339974684731144e-05, + "loss": 0.2342, + "step": 18705 + }, + { + "epoch": 0.87, + "learning_rate": 1.8339505883456004e-05, + "loss": 0.051, + "step": 18710 + }, + { + "epoch": 0.87, + "learning_rate": 1.8339037082180868e-05, + "loss": 0.1113, + "step": 18715 + }, + { + "epoch": 0.87, + "learning_rate": 1.8338568280905727e-05, + "loss": 0.1012, + "step": 18720 + }, + { + "epoch": 0.87, + "learning_rate": 1.8338099479630587e-05, + "loss": 0.1449, + "step": 18725 + }, + { + "epoch": 0.87, + "learning_rate": 1.8337630678355447e-05, + "loss": 0.1451, + "step": 18730 + }, + { + "epoch": 0.87, + "learning_rate": 1.8337161877080307e-05, + "loss": 0.1471, + "step": 18735 + }, + { + "epoch": 0.87, + "learning_rate": 1.8336693075805167e-05, + "loss": 0.17, + "step": 18740 + }, + { + "epoch": 0.87, + "learning_rate": 1.8336224274530027e-05, + "loss": 0.1593, + "step": 18745 + }, + { + "epoch": 0.87, + "learning_rate": 1.8335755473254887e-05, + "loss": 0.4541, + "step": 18750 + }, + { + "epoch": 0.88, + "learning_rate": 1.8335286671979747e-05, + "loss": 0.2293, + "step": 18755 + }, + { + "epoch": 0.88, + "learning_rate": 1.833481787070461e-05, + "loss": 0.0878, + "step": 18760 + }, + { + "epoch": 0.88, + "learning_rate": 1.833434906942947e-05, + "loss": 0.1217, + "step": 18765 + }, + { + "epoch": 0.88, + "learning_rate": 1.833388026815433e-05, + "loss": 0.1412, + "step": 18770 + }, + { + "epoch": 0.88, + "learning_rate": 1.8333411466879193e-05, + "loss": 0.1524, + "step": 18775 + }, + { + "epoch": 0.88, + "learning_rate": 1.8332942665604053e-05, + "loss": 0.1379, + "step": 18780 + }, + { + "epoch": 0.88, + "learning_rate": 1.8332473864328913e-05, + "loss": 0.3588, + "step": 18785 + }, + { + "epoch": 0.88, + "learning_rate": 1.8332005063053773e-05, + "loss": 0.2402, + "step": 18790 + }, + { + "epoch": 0.88, + "learning_rate": 1.8331536261778633e-05, + "loss": 0.3659, + "step": 18795 + }, + { + "epoch": 0.88, + "learning_rate": 1.8331067460503493e-05, + "loss": 0.4519, + "step": 18800 + }, + { + "epoch": 0.88, + "learning_rate": 1.8330598659228353e-05, + "loss": 0.1818, + "step": 18805 + }, + { + "epoch": 0.88, + "learning_rate": 1.8330129857953216e-05, + "loss": 0.0785, + "step": 18810 + }, + { + "epoch": 0.88, + "learning_rate": 1.8329661056678076e-05, + "loss": 0.1177, + "step": 18815 + }, + { + "epoch": 0.88, + "learning_rate": 1.8329192255402936e-05, + "loss": 0.0967, + "step": 18820 + }, + { + "epoch": 0.88, + "learning_rate": 1.8328723454127796e-05, + "loss": 0.0934, + "step": 18825 + }, + { + "epoch": 0.88, + "learning_rate": 1.8328254652852656e-05, + "loss": 0.1312, + "step": 18830 + }, + { + "epoch": 0.88, + "learning_rate": 1.8327785851577516e-05, + "loss": 0.3729, + "step": 18835 + }, + { + "epoch": 0.88, + "learning_rate": 1.832731705030238e-05, + "loss": 0.1916, + "step": 18840 + }, + { + "epoch": 0.88, + "learning_rate": 1.832684824902724e-05, + "loss": 0.3668, + "step": 18845 + }, + { + "epoch": 0.88, + "learning_rate": 1.83263794477521e-05, + "loss": 0.5375, + "step": 18850 + }, + { + "epoch": 0.88, + "learning_rate": 1.8325910646476963e-05, + "loss": 0.2088, + "step": 18855 + }, + { + "epoch": 0.88, + "learning_rate": 1.8325441845201823e-05, + "loss": 0.0323, + "step": 18860 + }, + { + "epoch": 0.88, + "learning_rate": 1.8324973043926682e-05, + "loss": 0.1017, + "step": 18865 + }, + { + "epoch": 0.88, + "learning_rate": 1.8324504242651542e-05, + "loss": 0.0991, + "step": 18870 + }, + { + "epoch": 0.88, + "learning_rate": 1.8324035441376402e-05, + "loss": 0.1889, + "step": 18875 + }, + { + "epoch": 0.88, + "learning_rate": 1.8323566640101262e-05, + "loss": 0.0697, + "step": 18880 + }, + { + "epoch": 0.88, + "learning_rate": 1.8323097838826122e-05, + "loss": 0.2361, + "step": 18885 + }, + { + "epoch": 0.88, + "learning_rate": 1.8322629037550982e-05, + "loss": 0.2399, + "step": 18890 + }, + { + "epoch": 0.88, + "learning_rate": 1.8322160236275842e-05, + "loss": 0.3615, + "step": 18895 + }, + { + "epoch": 0.88, + "learning_rate": 1.8321691435000705e-05, + "loss": 0.45, + "step": 18900 + }, + { + "epoch": 0.88, + "learning_rate": 1.8321222633725565e-05, + "loss": 0.2541, + "step": 18905 + }, + { + "epoch": 0.88, + "learning_rate": 1.8320753832450425e-05, + "loss": 0.0357, + "step": 18910 + }, + { + "epoch": 0.88, + "learning_rate": 1.8320285031175285e-05, + "loss": 0.069, + "step": 18915 + }, + { + "epoch": 0.88, + "learning_rate": 1.831981622990015e-05, + "loss": 0.1533, + "step": 18920 + }, + { + "epoch": 0.88, + "learning_rate": 1.831934742862501e-05, + "loss": 0.0688, + "step": 18925 + }, + { + "epoch": 0.88, + "learning_rate": 1.831887862734987e-05, + "loss": 0.1047, + "step": 18930 + }, + { + "epoch": 0.88, + "learning_rate": 1.8318409826074728e-05, + "loss": 0.153, + "step": 18935 + }, + { + "epoch": 0.88, + "learning_rate": 1.8317941024799588e-05, + "loss": 0.2231, + "step": 18940 + }, + { + "epoch": 0.88, + "learning_rate": 1.8317472223524448e-05, + "loss": 0.4165, + "step": 18945 + }, + { + "epoch": 0.88, + "learning_rate": 1.831700342224931e-05, + "loss": 0.521, + "step": 18950 + }, + { + "epoch": 0.88, + "learning_rate": 1.831653462097417e-05, + "loss": 0.2484, + "step": 18955 + }, + { + "epoch": 0.88, + "learning_rate": 1.831606581969903e-05, + "loss": 0.1186, + "step": 18960 + }, + { + "epoch": 0.88, + "learning_rate": 1.831559701842389e-05, + "loss": 0.1492, + "step": 18965 + }, + { + "epoch": 0.89, + "learning_rate": 1.831512821714875e-05, + "loss": 0.1672, + "step": 18970 + }, + { + "epoch": 0.89, + "learning_rate": 1.831465941587361e-05, + "loss": 0.2043, + "step": 18975 + }, + { + "epoch": 0.89, + "learning_rate": 1.831419061459847e-05, + "loss": 0.1615, + "step": 18980 + }, + { + "epoch": 0.89, + "learning_rate": 1.8313721813323334e-05, + "loss": 0.1741, + "step": 18985 + }, + { + "epoch": 0.89, + "learning_rate": 1.8313253012048194e-05, + "loss": 0.1771, + "step": 18990 + }, + { + "epoch": 0.89, + "learning_rate": 1.8312784210773054e-05, + "loss": 0.2435, + "step": 18995 + }, + { + "epoch": 0.89, + "learning_rate": 1.8312315409497918e-05, + "loss": 0.4375, + "step": 19000 + }, + { + "epoch": 0.89, + "learning_rate": 1.8311846608222778e-05, + "loss": 0.1995, + "step": 19005 + }, + { + "epoch": 0.89, + "learning_rate": 1.8311377806947637e-05, + "loss": 0.0675, + "step": 19010 + }, + { + "epoch": 0.89, + "learning_rate": 1.8310909005672497e-05, + "loss": 0.1508, + "step": 19015 + }, + { + "epoch": 0.89, + "learning_rate": 1.8310440204397357e-05, + "loss": 0.1821, + "step": 19020 + }, + { + "epoch": 0.89, + "learning_rate": 1.8309971403122217e-05, + "loss": 0.2436, + "step": 19025 + }, + { + "epoch": 0.89, + "learning_rate": 1.8309502601847077e-05, + "loss": 0.1228, + "step": 19030 + }, + { + "epoch": 0.89, + "learning_rate": 1.8309033800571937e-05, + "loss": 0.2127, + "step": 19035 + }, + { + "epoch": 0.89, + "learning_rate": 1.83085649992968e-05, + "loss": 0.1937, + "step": 19040 + }, + { + "epoch": 0.89, + "learning_rate": 1.830809619802166e-05, + "loss": 0.254, + "step": 19045 + }, + { + "epoch": 0.89, + "learning_rate": 1.830762739674652e-05, + "loss": 0.5409, + "step": 19050 + }, + { + "epoch": 0.89, + "learning_rate": 1.830715859547138e-05, + "loss": 0.252, + "step": 19055 + }, + { + "epoch": 0.89, + "learning_rate": 1.830668979419624e-05, + "loss": 0.0875, + "step": 19060 + }, + { + "epoch": 0.89, + "learning_rate": 1.8306220992921104e-05, + "loss": 0.1311, + "step": 19065 + }, + { + "epoch": 0.89, + "learning_rate": 1.8305752191645963e-05, + "loss": 0.1158, + "step": 19070 + }, + { + "epoch": 0.89, + "learning_rate": 1.8305283390370823e-05, + "loss": 0.1872, + "step": 19075 + }, + { + "epoch": 0.89, + "learning_rate": 1.8304814589095683e-05, + "loss": 0.2072, + "step": 19080 + }, + { + "epoch": 0.89, + "learning_rate": 1.8304345787820543e-05, + "loss": 0.2102, + "step": 19085 + }, + { + "epoch": 0.89, + "learning_rate": 1.8303876986545407e-05, + "loss": 0.2013, + "step": 19090 + }, + { + "epoch": 0.89, + "learning_rate": 1.8303408185270267e-05, + "loss": 0.2112, + "step": 19095 + }, + { + "epoch": 0.89, + "learning_rate": 1.8302939383995126e-05, + "loss": 0.6385, + "step": 19100 + }, + { + "epoch": 0.89, + "learning_rate": 1.8302470582719986e-05, + "loss": 0.1278, + "step": 19105 + }, + { + "epoch": 0.89, + "learning_rate": 1.8302001781444846e-05, + "loss": 0.0642, + "step": 19110 + }, + { + "epoch": 0.89, + "learning_rate": 1.8301532980169706e-05, + "loss": 0.0807, + "step": 19115 + }, + { + "epoch": 0.89, + "learning_rate": 1.8301064178894566e-05, + "loss": 0.1014, + "step": 19120 + }, + { + "epoch": 0.89, + "learning_rate": 1.830059537761943e-05, + "loss": 0.15, + "step": 19125 + }, + { + "epoch": 0.89, + "learning_rate": 1.830012657634429e-05, + "loss": 0.1112, + "step": 19130 + }, + { + "epoch": 0.89, + "learning_rate": 1.829965777506915e-05, + "loss": 0.2095, + "step": 19135 + }, + { + "epoch": 0.89, + "learning_rate": 1.829918897379401e-05, + "loss": 0.1831, + "step": 19140 + }, + { + "epoch": 0.89, + "learning_rate": 1.8298720172518873e-05, + "loss": 0.3499, + "step": 19145 + }, + { + "epoch": 0.89, + "learning_rate": 1.8298251371243733e-05, + "loss": 0.5991, + "step": 19150 + }, + { + "epoch": 0.89, + "learning_rate": 1.8297782569968592e-05, + "loss": 0.1876, + "step": 19155 + }, + { + "epoch": 0.89, + "learning_rate": 1.8297313768693452e-05, + "loss": 0.133, + "step": 19160 + }, + { + "epoch": 0.89, + "learning_rate": 1.8296844967418312e-05, + "loss": 0.1359, + "step": 19165 + }, + { + "epoch": 0.89, + "learning_rate": 1.8296376166143172e-05, + "loss": 0.1115, + "step": 19170 + }, + { + "epoch": 0.89, + "learning_rate": 1.8295907364868032e-05, + "loss": 0.1205, + "step": 19175 + }, + { + "epoch": 0.89, + "learning_rate": 1.8295438563592896e-05, + "loss": 0.1723, + "step": 19180 + }, + { + "epoch": 0.9, + "learning_rate": 1.8294969762317755e-05, + "loss": 0.2365, + "step": 19185 + }, + { + "epoch": 0.9, + "learning_rate": 1.8294500961042615e-05, + "loss": 0.3599, + "step": 19190 + }, + { + "epoch": 0.9, + "learning_rate": 1.8294032159767475e-05, + "loss": 0.3155, + "step": 19195 + }, + { + "epoch": 0.9, + "learning_rate": 1.8293563358492335e-05, + "loss": 0.5404, + "step": 19200 + }, + { + "epoch": 0.9, + "learning_rate": 1.82930945572172e-05, + "loss": 0.24, + "step": 19205 + }, + { + "epoch": 0.9, + "learning_rate": 1.829262575594206e-05, + "loss": 0.1003, + "step": 19210 + }, + { + "epoch": 0.9, + "learning_rate": 1.829215695466692e-05, + "loss": 0.1443, + "step": 19215 + }, + { + "epoch": 0.9, + "learning_rate": 1.829168815339178e-05, + "loss": 0.1095, + "step": 19220 + }, + { + "epoch": 0.9, + "learning_rate": 1.8291219352116642e-05, + "loss": 0.133, + "step": 19225 + }, + { + "epoch": 0.9, + "learning_rate": 1.82907505508415e-05, + "loss": 0.1638, + "step": 19230 + }, + { + "epoch": 0.9, + "learning_rate": 1.829028174956636e-05, + "loss": 0.2031, + "step": 19235 + }, + { + "epoch": 0.9, + "learning_rate": 1.828981294829122e-05, + "loss": 0.3121, + "step": 19240 + }, + { + "epoch": 0.9, + "learning_rate": 1.828934414701608e-05, + "loss": 0.2641, + "step": 19245 + }, + { + "epoch": 0.9, + "learning_rate": 1.828887534574094e-05, + "loss": 0.3234, + "step": 19250 + }, + { + "epoch": 0.9, + "learning_rate": 1.82884065444658e-05, + "loss": 0.2048, + "step": 19255 + }, + { + "epoch": 0.9, + "learning_rate": 1.828793774319066e-05, + "loss": 0.0689, + "step": 19260 + }, + { + "epoch": 0.9, + "learning_rate": 1.828746894191552e-05, + "loss": 0.1006, + "step": 19265 + }, + { + "epoch": 0.9, + "learning_rate": 1.8287000140640385e-05, + "loss": 0.0911, + "step": 19270 + }, + { + "epoch": 0.9, + "learning_rate": 1.8286531339365244e-05, + "loss": 0.2028, + "step": 19275 + }, + { + "epoch": 0.9, + "learning_rate": 1.8286062538090104e-05, + "loss": 0.1203, + "step": 19280 + }, + { + "epoch": 0.9, + "learning_rate": 1.8285593736814968e-05, + "loss": 0.1986, + "step": 19285 + }, + { + "epoch": 0.9, + "learning_rate": 1.8285124935539828e-05, + "loss": 0.2303, + "step": 19290 + }, + { + "epoch": 0.9, + "learning_rate": 1.8284656134264688e-05, + "loss": 0.2265, + "step": 19295 + }, + { + "epoch": 0.9, + "learning_rate": 1.8284187332989548e-05, + "loss": 0.6002, + "step": 19300 + }, + { + "epoch": 0.9, + "learning_rate": 1.8283718531714407e-05, + "loss": 0.1912, + "step": 19305 + }, + { + "epoch": 0.9, + "learning_rate": 1.8283249730439267e-05, + "loss": 0.0834, + "step": 19310 + }, + { + "epoch": 0.9, + "learning_rate": 1.8282780929164127e-05, + "loss": 0.1226, + "step": 19315 + }, + { + "epoch": 0.9, + "learning_rate": 1.828231212788899e-05, + "loss": 0.201, + "step": 19320 + }, + { + "epoch": 0.9, + "learning_rate": 1.828184332661385e-05, + "loss": 0.0701, + "step": 19325 + }, + { + "epoch": 0.9, + "learning_rate": 1.828137452533871e-05, + "loss": 0.2523, + "step": 19330 + }, + { + "epoch": 0.9, + "learning_rate": 1.828090572406357e-05, + "loss": 0.1423, + "step": 19335 + }, + { + "epoch": 0.9, + "learning_rate": 1.828043692278843e-05, + "loss": 0.2959, + "step": 19340 + }, + { + "epoch": 0.9, + "learning_rate": 1.827996812151329e-05, + "loss": 0.3732, + "step": 19345 + }, + { + "epoch": 0.9, + "learning_rate": 1.8279499320238154e-05, + "loss": 0.4631, + "step": 19350 + }, + { + "epoch": 0.9, + "learning_rate": 1.8279030518963014e-05, + "loss": 0.1664, + "step": 19355 + }, + { + "epoch": 0.9, + "learning_rate": 1.8278561717687873e-05, + "loss": 0.1044, + "step": 19360 + }, + { + "epoch": 0.9, + "learning_rate": 1.8278092916412737e-05, + "loss": 0.1303, + "step": 19365 + }, + { + "epoch": 0.9, + "learning_rate": 1.8277624115137597e-05, + "loss": 0.1096, + "step": 19370 + }, + { + "epoch": 0.9, + "learning_rate": 1.8277155313862457e-05, + "loss": 0.1389, + "step": 19375 + }, + { + "epoch": 0.9, + "learning_rate": 1.8276686512587317e-05, + "loss": 0.1583, + "step": 19380 + }, + { + "epoch": 0.9, + "learning_rate": 1.8276217711312177e-05, + "loss": 0.2356, + "step": 19385 + }, + { + "epoch": 0.9, + "learning_rate": 1.8275748910037036e-05, + "loss": 0.2076, + "step": 19390 + }, + { + "epoch": 0.9, + "learning_rate": 1.8275280108761896e-05, + "loss": 0.2269, + "step": 19395 + }, + { + "epoch": 0.91, + "learning_rate": 1.8274811307486756e-05, + "loss": 0.3307, + "step": 19400 + }, + { + "epoch": 0.91, + "learning_rate": 1.8274342506211616e-05, + "loss": 0.2044, + "step": 19405 + }, + { + "epoch": 0.91, + "learning_rate": 1.8273873704936476e-05, + "loss": 0.0698, + "step": 19410 + }, + { + "epoch": 0.91, + "learning_rate": 1.827340490366134e-05, + "loss": 0.1467, + "step": 19415 + }, + { + "epoch": 0.91, + "learning_rate": 1.82729361023862e-05, + "loss": 0.1113, + "step": 19420 + }, + { + "epoch": 0.91, + "learning_rate": 1.827246730111106e-05, + "loss": 0.1657, + "step": 19425 + }, + { + "epoch": 0.91, + "learning_rate": 1.8271998499835923e-05, + "loss": 0.1277, + "step": 19430 + }, + { + "epoch": 0.91, + "learning_rate": 1.8271529698560783e-05, + "loss": 0.1894, + "step": 19435 + }, + { + "epoch": 0.91, + "learning_rate": 1.8271060897285643e-05, + "loss": 0.1985, + "step": 19440 + }, + { + "epoch": 0.91, + "learning_rate": 1.8270592096010503e-05, + "loss": 0.3031, + "step": 19445 + }, + { + "epoch": 0.91, + "learning_rate": 1.8270123294735362e-05, + "loss": 0.5417, + "step": 19450 + }, + { + "epoch": 0.91, + "learning_rate": 1.8269654493460222e-05, + "loss": 0.1623, + "step": 19455 + }, + { + "epoch": 0.91, + "learning_rate": 1.8269185692185086e-05, + "loss": 0.0407, + "step": 19460 + }, + { + "epoch": 0.91, + "learning_rate": 1.8268716890909946e-05, + "loss": 0.0865, + "step": 19465 + }, + { + "epoch": 0.91, + "learning_rate": 1.8268248089634806e-05, + "loss": 0.1004, + "step": 19470 + }, + { + "epoch": 0.91, + "learning_rate": 1.8267779288359666e-05, + "loss": 0.1137, + "step": 19475 + }, + { + "epoch": 0.91, + "learning_rate": 1.8267310487084525e-05, + "loss": 0.1354, + "step": 19480 + }, + { + "epoch": 0.91, + "learning_rate": 1.8266841685809385e-05, + "loss": 0.2164, + "step": 19485 + }, + { + "epoch": 0.91, + "learning_rate": 1.8266372884534245e-05, + "loss": 0.2859, + "step": 19490 + }, + { + "epoch": 0.91, + "learning_rate": 1.826590408325911e-05, + "loss": 0.3575, + "step": 19495 + }, + { + "epoch": 0.91, + "learning_rate": 1.826543528198397e-05, + "loss": 0.5411, + "step": 19500 + }, + { + "epoch": 0.91, + "learning_rate": 1.826496648070883e-05, + "loss": 0.2238, + "step": 19505 + }, + { + "epoch": 0.91, + "learning_rate": 1.8264497679433692e-05, + "loss": 0.0554, + "step": 19510 + }, + { + "epoch": 0.91, + "learning_rate": 1.8264028878158552e-05, + "loss": 0.1204, + "step": 19515 + }, + { + "epoch": 0.91, + "learning_rate": 1.826356007688341e-05, + "loss": 0.1493, + "step": 19520 + }, + { + "epoch": 0.91, + "learning_rate": 1.826309127560827e-05, + "loss": 0.1468, + "step": 19525 + }, + { + "epoch": 0.91, + "learning_rate": 1.826262247433313e-05, + "loss": 0.2244, + "step": 19530 + }, + { + "epoch": 0.91, + "learning_rate": 1.826215367305799e-05, + "loss": 0.1374, + "step": 19535 + }, + { + "epoch": 0.91, + "learning_rate": 1.826168487178285e-05, + "loss": 0.1748, + "step": 19540 + }, + { + "epoch": 0.91, + "learning_rate": 1.826121607050771e-05, + "loss": 0.2261, + "step": 19545 + }, + { + "epoch": 0.91, + "learning_rate": 1.8260747269232575e-05, + "loss": 0.4271, + "step": 19550 + }, + { + "epoch": 0.91, + "learning_rate": 1.8260278467957435e-05, + "loss": 0.1653, + "step": 19555 + }, + { + "epoch": 0.91, + "learning_rate": 1.8259809666682295e-05, + "loss": 0.0668, + "step": 19560 + }, + { + "epoch": 0.91, + "learning_rate": 1.8259340865407154e-05, + "loss": 0.1128, + "step": 19565 + }, + { + "epoch": 0.91, + "learning_rate": 1.8258872064132014e-05, + "loss": 0.3179, + "step": 19570 + }, + { + "epoch": 0.91, + "learning_rate": 1.8258403262856878e-05, + "loss": 0.1556, + "step": 19575 + }, + { + "epoch": 0.91, + "learning_rate": 1.8257934461581738e-05, + "loss": 0.2226, + "step": 19580 + }, + { + "epoch": 0.91, + "learning_rate": 1.8257465660306598e-05, + "loss": 0.2039, + "step": 19585 + }, + { + "epoch": 0.91, + "learning_rate": 1.8256996859031458e-05, + "loss": 0.3087, + "step": 19590 + }, + { + "epoch": 0.91, + "learning_rate": 1.8256528057756317e-05, + "loss": 0.3327, + "step": 19595 + }, + { + "epoch": 0.91, + "learning_rate": 1.825605925648118e-05, + "loss": 0.4217, + "step": 19600 + }, + { + "epoch": 0.91, + "learning_rate": 1.825559045520604e-05, + "loss": 0.1834, + "step": 19605 + }, + { + "epoch": 0.92, + "learning_rate": 1.82551216539309e-05, + "loss": 0.0819, + "step": 19610 + }, + { + "epoch": 0.92, + "learning_rate": 1.825465285265576e-05, + "loss": 0.1225, + "step": 19615 + }, + { + "epoch": 0.92, + "learning_rate": 1.825418405138062e-05, + "loss": 0.0862, + "step": 19620 + }, + { + "epoch": 0.92, + "learning_rate": 1.825371525010548e-05, + "loss": 0.2495, + "step": 19625 + }, + { + "epoch": 0.92, + "learning_rate": 1.825324644883034e-05, + "loss": 0.1874, + "step": 19630 + }, + { + "epoch": 0.92, + "learning_rate": 1.8252777647555204e-05, + "loss": 0.2202, + "step": 19635 + }, + { + "epoch": 0.92, + "learning_rate": 1.8252308846280064e-05, + "loss": 0.2002, + "step": 19640 + }, + { + "epoch": 0.92, + "learning_rate": 1.8251840045004924e-05, + "loss": 0.2528, + "step": 19645 + }, + { + "epoch": 0.92, + "learning_rate": 1.8251371243729784e-05, + "loss": 0.4702, + "step": 19650 + }, + { + "epoch": 0.92, + "learning_rate": 1.8250902442454647e-05, + "loss": 0.2313, + "step": 19655 + }, + { + "epoch": 0.92, + "learning_rate": 1.8250433641179507e-05, + "loss": 0.0705, + "step": 19660 + }, + { + "epoch": 0.92, + "learning_rate": 1.8249964839904367e-05, + "loss": 0.0752, + "step": 19665 + }, + { + "epoch": 0.92, + "learning_rate": 1.8249496038629227e-05, + "loss": 0.1123, + "step": 19670 + }, + { + "epoch": 0.92, + "learning_rate": 1.8249027237354087e-05, + "loss": 0.1416, + "step": 19675 + }, + { + "epoch": 0.92, + "learning_rate": 1.8248558436078947e-05, + "loss": 0.2429, + "step": 19680 + }, + { + "epoch": 0.92, + "learning_rate": 1.8248089634803806e-05, + "loss": 0.2266, + "step": 19685 + }, + { + "epoch": 0.92, + "learning_rate": 1.824762083352867e-05, + "loss": 0.2297, + "step": 19690 + }, + { + "epoch": 0.92, + "learning_rate": 1.824715203225353e-05, + "loss": 0.3014, + "step": 19695 + }, + { + "epoch": 0.92, + "learning_rate": 1.824668323097839e-05, + "loss": 0.4253, + "step": 19700 + }, + { + "epoch": 0.92, + "learning_rate": 1.824621442970325e-05, + "loss": 0.2434, + "step": 19705 + }, + { + "epoch": 0.92, + "learning_rate": 1.824574562842811e-05, + "loss": 0.0985, + "step": 19710 + }, + { + "epoch": 0.92, + "learning_rate": 1.8245276827152973e-05, + "loss": 0.1376, + "step": 19715 + }, + { + "epoch": 0.92, + "learning_rate": 1.8244808025877833e-05, + "loss": 0.1281, + "step": 19720 + }, + { + "epoch": 0.92, + "learning_rate": 1.8244339224602693e-05, + "loss": 0.155, + "step": 19725 + }, + { + "epoch": 0.92, + "learning_rate": 1.8243870423327553e-05, + "loss": 0.1448, + "step": 19730 + }, + { + "epoch": 0.92, + "learning_rate": 1.8243401622052413e-05, + "loss": 0.1762, + "step": 19735 + }, + { + "epoch": 0.92, + "learning_rate": 1.8242932820777276e-05, + "loss": 0.3514, + "step": 19740 + }, + { + "epoch": 0.92, + "learning_rate": 1.8242464019502136e-05, + "loss": 0.3415, + "step": 19745 + }, + { + "epoch": 0.92, + "learning_rate": 1.8241995218226996e-05, + "loss": 0.4549, + "step": 19750 + }, + { + "epoch": 0.92, + "learning_rate": 1.8241526416951856e-05, + "loss": 0.2531, + "step": 19755 + }, + { + "epoch": 0.92, + "learning_rate": 1.8241057615676716e-05, + "loss": 0.0812, + "step": 19760 + }, + { + "epoch": 0.92, + "learning_rate": 1.8240588814401576e-05, + "loss": 0.0823, + "step": 19765 + }, + { + "epoch": 0.92, + "learning_rate": 1.8240120013126435e-05, + "loss": 0.0879, + "step": 19770 + }, + { + "epoch": 0.92, + "learning_rate": 1.8239651211851295e-05, + "loss": 0.0641, + "step": 19775 + }, + { + "epoch": 0.92, + "learning_rate": 1.823918241057616e-05, + "loss": 0.1483, + "step": 19780 + }, + { + "epoch": 0.92, + "learning_rate": 1.823871360930102e-05, + "loss": 0.1822, + "step": 19785 + }, + { + "epoch": 0.92, + "learning_rate": 1.823824480802588e-05, + "loss": 0.2754, + "step": 19790 + }, + { + "epoch": 0.92, + "learning_rate": 1.8237776006750742e-05, + "loss": 0.3038, + "step": 19795 + }, + { + "epoch": 0.92, + "learning_rate": 1.8237307205475602e-05, + "loss": 0.3856, + "step": 19800 + }, + { + "epoch": 0.92, + "learning_rate": 1.8236838404200462e-05, + "loss": 0.2713, + "step": 19805 + }, + { + "epoch": 0.92, + "learning_rate": 1.8236369602925322e-05, + "loss": 0.0589, + "step": 19810 + }, + { + "epoch": 0.92, + "learning_rate": 1.823590080165018e-05, + "loss": 0.0968, + "step": 19815 + }, + { + "epoch": 0.92, + "learning_rate": 1.823543200037504e-05, + "loss": 0.0974, + "step": 19820 + }, + { + "epoch": 0.93, + "learning_rate": 1.82349631990999e-05, + "loss": 0.119, + "step": 19825 + }, + { + "epoch": 0.93, + "learning_rate": 1.8234494397824765e-05, + "loss": 0.1067, + "step": 19830 + }, + { + "epoch": 0.93, + "learning_rate": 1.8234025596549625e-05, + "loss": 0.1, + "step": 19835 + }, + { + "epoch": 0.93, + "learning_rate": 1.8233556795274485e-05, + "loss": 0.1671, + "step": 19840 + }, + { + "epoch": 0.93, + "learning_rate": 1.8233087993999345e-05, + "loss": 0.2918, + "step": 19845 + }, + { + "epoch": 0.93, + "learning_rate": 1.8232619192724205e-05, + "loss": 0.6152, + "step": 19850 + }, + { + "epoch": 0.93, + "learning_rate": 1.8232150391449065e-05, + "loss": 0.1955, + "step": 19855 + }, + { + "epoch": 0.93, + "learning_rate": 1.8231681590173928e-05, + "loss": 0.0708, + "step": 19860 + }, + { + "epoch": 0.93, + "learning_rate": 1.8231212788898788e-05, + "loss": 0.0908, + "step": 19865 + }, + { + "epoch": 0.93, + "learning_rate": 1.8230743987623648e-05, + "loss": 0.1555, + "step": 19870 + }, + { + "epoch": 0.93, + "learning_rate": 1.823027518634851e-05, + "loss": 0.1415, + "step": 19875 + }, + { + "epoch": 0.93, + "learning_rate": 1.822980638507337e-05, + "loss": 0.1305, + "step": 19880 + }, + { + "epoch": 0.93, + "learning_rate": 1.822933758379823e-05, + "loss": 0.2142, + "step": 19885 + }, + { + "epoch": 0.93, + "learning_rate": 1.822886878252309e-05, + "loss": 0.2684, + "step": 19890 + }, + { + "epoch": 0.93, + "learning_rate": 1.822839998124795e-05, + "loss": 0.2105, + "step": 19895 + }, + { + "epoch": 0.93, + "learning_rate": 1.822793117997281e-05, + "loss": 0.4126, + "step": 19900 + }, + { + "epoch": 0.93, + "learning_rate": 1.822746237869767e-05, + "loss": 0.2058, + "step": 19905 + }, + { + "epoch": 0.93, + "learning_rate": 1.822699357742253e-05, + "loss": 0.0673, + "step": 19910 + }, + { + "epoch": 0.93, + "learning_rate": 1.822652477614739e-05, + "loss": 0.1449, + "step": 19915 + }, + { + "epoch": 0.93, + "learning_rate": 1.822605597487225e-05, + "loss": 0.0786, + "step": 19920 + }, + { + "epoch": 0.93, + "learning_rate": 1.8225587173597114e-05, + "loss": 0.1939, + "step": 19925 + }, + { + "epoch": 0.93, + "learning_rate": 1.8225118372321974e-05, + "loss": 0.1068, + "step": 19930 + }, + { + "epoch": 0.93, + "learning_rate": 1.8224649571046834e-05, + "loss": 0.1644, + "step": 19935 + }, + { + "epoch": 0.93, + "learning_rate": 1.8224180769771697e-05, + "loss": 0.1876, + "step": 19940 + }, + { + "epoch": 0.93, + "learning_rate": 1.8223711968496557e-05, + "loss": 0.2146, + "step": 19945 + }, + { + "epoch": 0.93, + "learning_rate": 1.8223243167221417e-05, + "loss": 0.4408, + "step": 19950 + }, + { + "epoch": 0.93, + "learning_rate": 1.8222774365946277e-05, + "loss": 0.1771, + "step": 19955 + }, + { + "epoch": 0.93, + "learning_rate": 1.8222305564671137e-05, + "loss": 0.0614, + "step": 19960 + }, + { + "epoch": 0.93, + "learning_rate": 1.8221836763395997e-05, + "loss": 0.0667, + "step": 19965 + }, + { + "epoch": 0.93, + "learning_rate": 1.822136796212086e-05, + "loss": 0.1274, + "step": 19970 + }, + { + "epoch": 0.93, + "learning_rate": 1.822089916084572e-05, + "loss": 0.128, + "step": 19975 + }, + { + "epoch": 0.93, + "learning_rate": 1.822043035957058e-05, + "loss": 0.1434, + "step": 19980 + }, + { + "epoch": 0.93, + "learning_rate": 1.821996155829544e-05, + "loss": 0.2268, + "step": 19985 + }, + { + "epoch": 0.93, + "learning_rate": 1.82194927570203e-05, + "loss": 0.2602, + "step": 19990 + }, + { + "epoch": 0.93, + "learning_rate": 1.821902395574516e-05, + "loss": 0.2319, + "step": 19995 + }, + { + "epoch": 0.93, + "learning_rate": 1.821855515447002e-05, + "loss": 0.624, + "step": 20000 + }, + { + "epoch": 0.93, + "learning_rate": 1.8218086353194883e-05, + "loss": 0.1856, + "step": 20005 + }, + { + "epoch": 0.93, + "learning_rate": 1.8217617551919743e-05, + "loss": 0.0824, + "step": 20010 + }, + { + "epoch": 0.93, + "learning_rate": 1.8217148750644603e-05, + "loss": 0.0766, + "step": 20015 + }, + { + "epoch": 0.93, + "learning_rate": 1.8216679949369466e-05, + "loss": 0.1607, + "step": 20020 + }, + { + "epoch": 0.93, + "learning_rate": 1.8216211148094326e-05, + "loss": 0.1132, + "step": 20025 + }, + { + "epoch": 0.93, + "learning_rate": 1.8215742346819186e-05, + "loss": 0.1429, + "step": 20030 + }, + { + "epoch": 0.93, + "learning_rate": 1.8215273545544046e-05, + "loss": 0.1928, + "step": 20035 + }, + { + "epoch": 0.94, + "learning_rate": 1.8214804744268906e-05, + "loss": 0.2072, + "step": 20040 + }, + { + "epoch": 0.94, + "learning_rate": 1.8214335942993766e-05, + "loss": 0.3352, + "step": 20045 + }, + { + "epoch": 0.94, + "learning_rate": 1.8213867141718626e-05, + "loss": 0.5031, + "step": 20050 + }, + { + "epoch": 0.94, + "learning_rate": 1.8213398340443486e-05, + "loss": 0.2016, + "step": 20055 + }, + { + "epoch": 0.94, + "learning_rate": 1.8212929539168346e-05, + "loss": 0.0557, + "step": 20060 + }, + { + "epoch": 0.94, + "learning_rate": 1.821246073789321e-05, + "loss": 0.0958, + "step": 20065 + }, + { + "epoch": 0.94, + "learning_rate": 1.821199193661807e-05, + "loss": 0.0978, + "step": 20070 + }, + { + "epoch": 0.94, + "learning_rate": 1.821152313534293e-05, + "loss": 0.0867, + "step": 20075 + }, + { + "epoch": 0.94, + "learning_rate": 1.8211054334067792e-05, + "loss": 0.147, + "step": 20080 + }, + { + "epoch": 0.94, + "learning_rate": 1.8210585532792652e-05, + "loss": 0.1557, + "step": 20085 + }, + { + "epoch": 0.94, + "learning_rate": 1.8210116731517512e-05, + "loss": 0.2568, + "step": 20090 + }, + { + "epoch": 0.94, + "learning_rate": 1.8209647930242372e-05, + "loss": 0.3529, + "step": 20095 + }, + { + "epoch": 0.94, + "learning_rate": 1.8209179128967232e-05, + "loss": 0.491, + "step": 20100 + }, + { + "epoch": 0.94, + "learning_rate": 1.820871032769209e-05, + "loss": 0.1841, + "step": 20105 + }, + { + "epoch": 0.94, + "learning_rate": 1.8208241526416955e-05, + "loss": 0.033, + "step": 20110 + }, + { + "epoch": 0.94, + "learning_rate": 1.8207772725141815e-05, + "loss": 0.0934, + "step": 20115 + }, + { + "epoch": 0.94, + "learning_rate": 1.8207303923866675e-05, + "loss": 0.059, + "step": 20120 + }, + { + "epoch": 0.94, + "learning_rate": 1.8206835122591535e-05, + "loss": 0.1281, + "step": 20125 + }, + { + "epoch": 0.94, + "learning_rate": 1.8206366321316395e-05, + "loss": 0.1659, + "step": 20130 + }, + { + "epoch": 0.94, + "learning_rate": 1.8205897520041255e-05, + "loss": 0.1677, + "step": 20135 + }, + { + "epoch": 0.94, + "learning_rate": 1.8205428718766115e-05, + "loss": 0.27, + "step": 20140 + }, + { + "epoch": 0.94, + "learning_rate": 1.8204959917490978e-05, + "loss": 0.2848, + "step": 20145 + }, + { + "epoch": 0.94, + "learning_rate": 1.8204491116215838e-05, + "loss": 0.5694, + "step": 20150 + }, + { + "epoch": 0.94, + "learning_rate": 1.8204022314940698e-05, + "loss": 0.2125, + "step": 20155 + }, + { + "epoch": 0.94, + "learning_rate": 1.820355351366556e-05, + "loss": 0.0379, + "step": 20160 + }, + { + "epoch": 0.94, + "learning_rate": 1.820308471239042e-05, + "loss": 0.1238, + "step": 20165 + }, + { + "epoch": 0.94, + "learning_rate": 1.820261591111528e-05, + "loss": 0.1555, + "step": 20170 + }, + { + "epoch": 0.94, + "learning_rate": 1.820214710984014e-05, + "loss": 0.141, + "step": 20175 + }, + { + "epoch": 0.94, + "learning_rate": 1.8201678308565e-05, + "loss": 0.0992, + "step": 20180 + }, + { + "epoch": 0.94, + "learning_rate": 1.820120950728986e-05, + "loss": 0.1861, + "step": 20185 + }, + { + "epoch": 0.94, + "learning_rate": 1.820074070601472e-05, + "loss": 0.2341, + "step": 20190 + }, + { + "epoch": 0.94, + "learning_rate": 1.820027190473958e-05, + "loss": 0.2778, + "step": 20195 + }, + { + "epoch": 0.94, + "learning_rate": 1.8199803103464444e-05, + "loss": 0.4585, + "step": 20200 + }, + { + "epoch": 0.94, + "learning_rate": 1.8199334302189304e-05, + "loss": 0.193, + "step": 20205 + }, + { + "epoch": 0.94, + "learning_rate": 1.8198865500914164e-05, + "loss": 0.0783, + "step": 20210 + }, + { + "epoch": 0.94, + "learning_rate": 1.8198396699639024e-05, + "loss": 0.1657, + "step": 20215 + }, + { + "epoch": 0.94, + "learning_rate": 1.8197927898363884e-05, + "loss": 0.0852, + "step": 20220 + }, + { + "epoch": 0.94, + "learning_rate": 1.8197459097088747e-05, + "loss": 0.1167, + "step": 20225 + }, + { + "epoch": 0.94, + "learning_rate": 1.8196990295813607e-05, + "loss": 0.1107, + "step": 20230 + }, + { + "epoch": 0.94, + "learning_rate": 1.8196521494538467e-05, + "loss": 0.1642, + "step": 20235 + }, + { + "epoch": 0.94, + "learning_rate": 1.8196052693263327e-05, + "loss": 0.2792, + "step": 20240 + }, + { + "epoch": 0.94, + "learning_rate": 1.8195583891988187e-05, + "loss": 0.37, + "step": 20245 + }, + { + "epoch": 0.94, + "learning_rate": 1.819511509071305e-05, + "loss": 0.3121, + "step": 20250 + }, + { + "epoch": 0.95, + "learning_rate": 1.819464628943791e-05, + "loss": 0.2295, + "step": 20255 + }, + { + "epoch": 0.95, + "learning_rate": 1.819417748816277e-05, + "loss": 0.0714, + "step": 20260 + }, + { + "epoch": 0.95, + "learning_rate": 1.819370868688763e-05, + "loss": 0.1149, + "step": 20265 + }, + { + "epoch": 0.95, + "learning_rate": 1.819323988561249e-05, + "loss": 0.1149, + "step": 20270 + }, + { + "epoch": 0.95, + "learning_rate": 1.819277108433735e-05, + "loss": 0.1848, + "step": 20275 + }, + { + "epoch": 0.95, + "learning_rate": 1.819230228306221e-05, + "loss": 0.1652, + "step": 20280 + }, + { + "epoch": 0.95, + "learning_rate": 1.819183348178707e-05, + "loss": 0.1869, + "step": 20285 + }, + { + "epoch": 0.95, + "learning_rate": 1.8191364680511933e-05, + "loss": 0.2933, + "step": 20290 + }, + { + "epoch": 0.95, + "learning_rate": 1.8190895879236793e-05, + "loss": 0.2485, + "step": 20295 + }, + { + "epoch": 0.95, + "learning_rate": 1.8190427077961653e-05, + "loss": 0.5297, + "step": 20300 + }, + { + "epoch": 0.95, + "learning_rate": 1.8189958276686516e-05, + "loss": 0.2442, + "step": 20305 + }, + { + "epoch": 0.95, + "learning_rate": 1.8189489475411376e-05, + "loss": 0.0492, + "step": 20310 + }, + { + "epoch": 0.95, + "learning_rate": 1.8189020674136236e-05, + "loss": 0.1049, + "step": 20315 + }, + { + "epoch": 0.95, + "learning_rate": 1.8188551872861096e-05, + "loss": 0.1435, + "step": 20320 + }, + { + "epoch": 0.95, + "learning_rate": 1.8188083071585956e-05, + "loss": 0.1713, + "step": 20325 + }, + { + "epoch": 0.95, + "learning_rate": 1.8187614270310816e-05, + "loss": 0.1232, + "step": 20330 + }, + { + "epoch": 0.95, + "learning_rate": 1.8187145469035676e-05, + "loss": 0.1662, + "step": 20335 + }, + { + "epoch": 0.95, + "learning_rate": 1.818667666776054e-05, + "loss": 0.2596, + "step": 20340 + }, + { + "epoch": 0.95, + "learning_rate": 1.81862078664854e-05, + "loss": 0.285, + "step": 20345 + }, + { + "epoch": 0.95, + "learning_rate": 1.818573906521026e-05, + "loss": 0.3881, + "step": 20350 + }, + { + "epoch": 0.95, + "learning_rate": 1.818527026393512e-05, + "loss": 0.2469, + "step": 20355 + }, + { + "epoch": 0.95, + "learning_rate": 1.818480146265998e-05, + "loss": 0.0816, + "step": 20360 + }, + { + "epoch": 0.95, + "learning_rate": 1.818433266138484e-05, + "loss": 0.159, + "step": 20365 + }, + { + "epoch": 0.95, + "learning_rate": 1.8183863860109702e-05, + "loss": 0.0941, + "step": 20370 + }, + { + "epoch": 0.95, + "learning_rate": 1.8183395058834562e-05, + "loss": 0.0982, + "step": 20375 + }, + { + "epoch": 0.95, + "learning_rate": 1.8182926257559422e-05, + "loss": 0.1584, + "step": 20380 + }, + { + "epoch": 0.95, + "learning_rate": 1.8182457456284282e-05, + "loss": 0.1844, + "step": 20385 + }, + { + "epoch": 0.95, + "learning_rate": 1.8181988655009145e-05, + "loss": 0.2987, + "step": 20390 + }, + { + "epoch": 0.95, + "learning_rate": 1.8181519853734005e-05, + "loss": 0.2073, + "step": 20395 + }, + { + "epoch": 0.95, + "learning_rate": 1.8181051052458865e-05, + "loss": 0.4283, + "step": 20400 + }, + { + "epoch": 0.95, + "learning_rate": 1.8180582251183725e-05, + "loss": 0.1415, + "step": 20405 + }, + { + "epoch": 0.95, + "learning_rate": 1.8180113449908585e-05, + "loss": 0.041, + "step": 20410 + }, + { + "epoch": 0.95, + "learning_rate": 1.8179644648633445e-05, + "loss": 0.0686, + "step": 20415 + }, + { + "epoch": 0.95, + "learning_rate": 1.8179175847358305e-05, + "loss": 0.0919, + "step": 20420 + }, + { + "epoch": 0.95, + "learning_rate": 1.8178707046083165e-05, + "loss": 0.1709, + "step": 20425 + }, + { + "epoch": 0.95, + "learning_rate": 1.8178238244808028e-05, + "loss": 0.1945, + "step": 20430 + }, + { + "epoch": 0.95, + "learning_rate": 1.8177769443532888e-05, + "loss": 0.2593, + "step": 20435 + }, + { + "epoch": 0.95, + "learning_rate": 1.8177300642257748e-05, + "loss": 0.2522, + "step": 20440 + }, + { + "epoch": 0.95, + "learning_rate": 1.8176831840982608e-05, + "loss": 0.2546, + "step": 20445 + }, + { + "epoch": 0.95, + "learning_rate": 1.817636303970747e-05, + "loss": 0.4386, + "step": 20450 + }, + { + "epoch": 0.95, + "learning_rate": 1.817589423843233e-05, + "loss": 0.1648, + "step": 20455 + }, + { + "epoch": 0.95, + "learning_rate": 1.817542543715719e-05, + "loss": 0.1356, + "step": 20460 + }, + { + "epoch": 0.95, + "learning_rate": 1.817495663588205e-05, + "loss": 0.0529, + "step": 20465 + }, + { + "epoch": 0.96, + "learning_rate": 1.817448783460691e-05, + "loss": 0.1376, + "step": 20470 + }, + { + "epoch": 0.96, + "learning_rate": 1.817401903333177e-05, + "loss": 0.1926, + "step": 20475 + }, + { + "epoch": 0.96, + "learning_rate": 1.8173550232056634e-05, + "loss": 0.1433, + "step": 20480 + }, + { + "epoch": 0.96, + "learning_rate": 1.8173081430781494e-05, + "loss": 0.0786, + "step": 20485 + }, + { + "epoch": 0.96, + "learning_rate": 1.8172612629506354e-05, + "loss": 0.1545, + "step": 20490 + }, + { + "epoch": 0.96, + "learning_rate": 1.8172143828231214e-05, + "loss": 0.2676, + "step": 20495 + }, + { + "epoch": 0.96, + "learning_rate": 1.8171675026956074e-05, + "loss": 0.4635, + "step": 20500 + }, + { + "epoch": 0.96, + "learning_rate": 1.8171206225680934e-05, + "loss": 0.1845, + "step": 20505 + }, + { + "epoch": 0.96, + "learning_rate": 1.8170737424405797e-05, + "loss": 0.0334, + "step": 20510 + }, + { + "epoch": 0.96, + "learning_rate": 1.8170268623130657e-05, + "loss": 0.0685, + "step": 20515 + }, + { + "epoch": 0.96, + "learning_rate": 1.8169799821855517e-05, + "loss": 0.139, + "step": 20520 + }, + { + "epoch": 0.96, + "learning_rate": 1.8169331020580377e-05, + "loss": 0.1517, + "step": 20525 + }, + { + "epoch": 0.96, + "learning_rate": 1.816886221930524e-05, + "loss": 0.1412, + "step": 20530 + }, + { + "epoch": 0.96, + "learning_rate": 1.81683934180301e-05, + "loss": 0.1334, + "step": 20535 + }, + { + "epoch": 0.96, + "learning_rate": 1.816792461675496e-05, + "loss": 0.3197, + "step": 20540 + }, + { + "epoch": 0.96, + "learning_rate": 1.816745581547982e-05, + "loss": 0.3216, + "step": 20545 + }, + { + "epoch": 0.96, + "learning_rate": 1.816698701420468e-05, + "loss": 0.6545, + "step": 20550 + }, + { + "epoch": 0.96, + "learning_rate": 1.816651821292954e-05, + "loss": 0.1667, + "step": 20555 + }, + { + "epoch": 0.96, + "learning_rate": 1.81660494116544e-05, + "loss": 0.0827, + "step": 20560 + }, + { + "epoch": 0.96, + "learning_rate": 1.816558061037926e-05, + "loss": 0.0818, + "step": 20565 + }, + { + "epoch": 0.96, + "learning_rate": 1.816511180910412e-05, + "loss": 0.1156, + "step": 20570 + }, + { + "epoch": 0.96, + "learning_rate": 1.8164643007828983e-05, + "loss": 0.114, + "step": 20575 + }, + { + "epoch": 0.96, + "learning_rate": 1.8164174206553843e-05, + "loss": 0.1087, + "step": 20580 + }, + { + "epoch": 0.96, + "learning_rate": 1.8163705405278703e-05, + "loss": 0.1839, + "step": 20585 + }, + { + "epoch": 0.96, + "learning_rate": 1.8163236604003566e-05, + "loss": 0.3323, + "step": 20590 + }, + { + "epoch": 0.96, + "learning_rate": 1.8162767802728426e-05, + "loss": 0.3873, + "step": 20595 + }, + { + "epoch": 0.96, + "learning_rate": 1.8162299001453286e-05, + "loss": 0.45, + "step": 20600 + }, + { + "epoch": 0.96, + "learning_rate": 1.8161830200178146e-05, + "loss": 0.2414, + "step": 20605 + }, + { + "epoch": 0.96, + "learning_rate": 1.8161361398903006e-05, + "loss": 0.0611, + "step": 20610 + }, + { + "epoch": 0.96, + "learning_rate": 1.8160892597627866e-05, + "loss": 0.124, + "step": 20615 + }, + { + "epoch": 0.96, + "learning_rate": 1.816042379635273e-05, + "loss": 0.0909, + "step": 20620 + }, + { + "epoch": 0.96, + "learning_rate": 1.815995499507759e-05, + "loss": 0.1789, + "step": 20625 + }, + { + "epoch": 0.96, + "learning_rate": 1.815948619380245e-05, + "loss": 0.1668, + "step": 20630 + }, + { + "epoch": 0.96, + "learning_rate": 1.815901739252731e-05, + "loss": 0.1578, + "step": 20635 + }, + { + "epoch": 0.96, + "learning_rate": 1.815854859125217e-05, + "loss": 0.253, + "step": 20640 + }, + { + "epoch": 0.96, + "learning_rate": 1.815807978997703e-05, + "loss": 0.2484, + "step": 20645 + }, + { + "epoch": 0.96, + "learning_rate": 1.815761098870189e-05, + "loss": 0.4484, + "step": 20650 + }, + { + "epoch": 0.96, + "learning_rate": 1.8157142187426752e-05, + "loss": 0.1914, + "step": 20655 + }, + { + "epoch": 0.96, + "learning_rate": 1.8156673386151612e-05, + "loss": 0.1153, + "step": 20660 + }, + { + "epoch": 0.96, + "learning_rate": 1.8156204584876472e-05, + "loss": 0.1582, + "step": 20665 + }, + { + "epoch": 0.96, + "learning_rate": 1.8155735783601335e-05, + "loss": 0.1714, + "step": 20670 + }, + { + "epoch": 0.96, + "learning_rate": 1.8155266982326195e-05, + "loss": 0.1086, + "step": 20675 + }, + { + "epoch": 0.96, + "learning_rate": 1.8154798181051055e-05, + "loss": 0.1785, + "step": 20680 + }, + { + "epoch": 0.97, + "learning_rate": 1.8154329379775915e-05, + "loss": 0.1845, + "step": 20685 + }, + { + "epoch": 0.97, + "learning_rate": 1.8153860578500775e-05, + "loss": 0.3776, + "step": 20690 + }, + { + "epoch": 0.97, + "learning_rate": 1.8153391777225635e-05, + "loss": 0.1668, + "step": 20695 + }, + { + "epoch": 0.97, + "learning_rate": 1.8152922975950495e-05, + "loss": 0.5262, + "step": 20700 + }, + { + "epoch": 0.97, + "learning_rate": 1.8152454174675355e-05, + "loss": 0.1545, + "step": 20705 + }, + { + "epoch": 0.97, + "learning_rate": 1.8151985373400215e-05, + "loss": 0.1112, + "step": 20710 + }, + { + "epoch": 0.97, + "learning_rate": 1.8151516572125078e-05, + "loss": 0.0721, + "step": 20715 + }, + { + "epoch": 0.97, + "learning_rate": 1.8151047770849938e-05, + "loss": 0.1326, + "step": 20720 + }, + { + "epoch": 0.97, + "learning_rate": 1.8150578969574798e-05, + "loss": 0.1883, + "step": 20725 + }, + { + "epoch": 0.97, + "learning_rate": 1.8150110168299658e-05, + "loss": 0.0903, + "step": 20730 + }, + { + "epoch": 0.97, + "learning_rate": 1.814964136702452e-05, + "loss": 0.2431, + "step": 20735 + }, + { + "epoch": 0.97, + "learning_rate": 1.814917256574938e-05, + "loss": 0.301, + "step": 20740 + }, + { + "epoch": 0.97, + "learning_rate": 1.814870376447424e-05, + "loss": 0.3377, + "step": 20745 + }, + { + "epoch": 0.97, + "learning_rate": 1.81482349631991e-05, + "loss": 0.3813, + "step": 20750 + }, + { + "epoch": 0.97, + "learning_rate": 1.814776616192396e-05, + "loss": 0.154, + "step": 20755 + }, + { + "epoch": 0.97, + "learning_rate": 1.8147297360648824e-05, + "loss": 0.0773, + "step": 20760 + }, + { + "epoch": 0.97, + "learning_rate": 1.8146828559373684e-05, + "loss": 0.0761, + "step": 20765 + }, + { + "epoch": 0.97, + "learning_rate": 1.8146359758098544e-05, + "loss": 0.075, + "step": 20770 + }, + { + "epoch": 0.97, + "learning_rate": 1.8145890956823404e-05, + "loss": 0.2386, + "step": 20775 + }, + { + "epoch": 0.97, + "learning_rate": 1.8145422155548264e-05, + "loss": 0.1128, + "step": 20780 + }, + { + "epoch": 0.97, + "learning_rate": 1.8144953354273124e-05, + "loss": 0.1266, + "step": 20785 + }, + { + "epoch": 0.97, + "learning_rate": 1.8144484552997984e-05, + "loss": 0.2653, + "step": 20790 + }, + { + "epoch": 0.97, + "learning_rate": 1.8144015751722844e-05, + "loss": 0.2307, + "step": 20795 + }, + { + "epoch": 0.97, + "learning_rate": 1.8143546950447707e-05, + "loss": 0.5244, + "step": 20800 + }, + { + "epoch": 0.97, + "learning_rate": 1.8143078149172567e-05, + "loss": 0.2061, + "step": 20805 + }, + { + "epoch": 0.97, + "learning_rate": 1.8142609347897427e-05, + "loss": 0.0903, + "step": 20810 + }, + { + "epoch": 0.97, + "learning_rate": 1.814214054662229e-05, + "loss": 0.0667, + "step": 20815 + }, + { + "epoch": 0.97, + "learning_rate": 1.814167174534715e-05, + "loss": 0.069, + "step": 20820 + }, + { + "epoch": 0.97, + "learning_rate": 1.814120294407201e-05, + "loss": 0.0872, + "step": 20825 + }, + { + "epoch": 0.97, + "learning_rate": 1.814073414279687e-05, + "loss": 0.183, + "step": 20830 + }, + { + "epoch": 0.97, + "learning_rate": 1.814026534152173e-05, + "loss": 0.1693, + "step": 20835 + }, + { + "epoch": 0.97, + "learning_rate": 1.813979654024659e-05, + "loss": 0.2612, + "step": 20840 + }, + { + "epoch": 0.97, + "learning_rate": 1.813932773897145e-05, + "loss": 0.2822, + "step": 20845 + }, + { + "epoch": 0.97, + "learning_rate": 1.8138858937696313e-05, + "loss": 0.5785, + "step": 20850 + }, + { + "epoch": 0.97, + "learning_rate": 1.8138390136421173e-05, + "loss": 0.2277, + "step": 20855 + }, + { + "epoch": 0.97, + "learning_rate": 1.8137921335146033e-05, + "loss": 0.0781, + "step": 20860 + }, + { + "epoch": 0.97, + "learning_rate": 1.8137452533870893e-05, + "loss": 0.0593, + "step": 20865 + }, + { + "epoch": 0.97, + "learning_rate": 1.8136983732595753e-05, + "loss": 0.0789, + "step": 20870 + }, + { + "epoch": 0.97, + "learning_rate": 1.8136514931320613e-05, + "loss": 0.1482, + "step": 20875 + }, + { + "epoch": 0.97, + "learning_rate": 1.8136046130045476e-05, + "loss": 0.1335, + "step": 20880 + }, + { + "epoch": 0.97, + "learning_rate": 1.8135577328770336e-05, + "loss": 0.1823, + "step": 20885 + }, + { + "epoch": 0.97, + "learning_rate": 1.8135108527495196e-05, + "loss": 0.2789, + "step": 20890 + }, + { + "epoch": 0.97, + "learning_rate": 1.8134639726220056e-05, + "loss": 0.2018, + "step": 20895 + }, + { + "epoch": 0.98, + "learning_rate": 1.813417092494492e-05, + "loss": 0.4649, + "step": 20900 + }, + { + "epoch": 0.98, + "learning_rate": 1.813370212366978e-05, + "loss": 0.2208, + "step": 20905 + }, + { + "epoch": 0.98, + "learning_rate": 1.813323332239464e-05, + "loss": 0.0604, + "step": 20910 + }, + { + "epoch": 0.98, + "learning_rate": 1.81327645211195e-05, + "loss": 0.0778, + "step": 20915 + }, + { + "epoch": 0.98, + "learning_rate": 1.813229571984436e-05, + "loss": 0.1314, + "step": 20920 + }, + { + "epoch": 0.98, + "learning_rate": 1.813182691856922e-05, + "loss": 0.1132, + "step": 20925 + }, + { + "epoch": 0.98, + "learning_rate": 1.813135811729408e-05, + "loss": 0.1354, + "step": 20930 + }, + { + "epoch": 0.98, + "learning_rate": 1.813088931601894e-05, + "loss": 0.208, + "step": 20935 + }, + { + "epoch": 0.98, + "learning_rate": 1.8130420514743802e-05, + "loss": 0.26, + "step": 20940 + }, + { + "epoch": 0.98, + "learning_rate": 1.8129951713468662e-05, + "loss": 0.281, + "step": 20945 + }, + { + "epoch": 0.98, + "learning_rate": 1.8129482912193522e-05, + "loss": 0.4454, + "step": 20950 + }, + { + "epoch": 0.98, + "learning_rate": 1.8129014110918382e-05, + "loss": 0.2346, + "step": 20955 + }, + { + "epoch": 0.98, + "learning_rate": 1.8128545309643245e-05, + "loss": 0.0742, + "step": 20960 + }, + { + "epoch": 0.98, + "learning_rate": 1.8128076508368105e-05, + "loss": 0.104, + "step": 20965 + }, + { + "epoch": 0.98, + "learning_rate": 1.8127607707092965e-05, + "loss": 0.0568, + "step": 20970 + }, + { + "epoch": 0.98, + "learning_rate": 1.8127138905817825e-05, + "loss": 0.1337, + "step": 20975 + }, + { + "epoch": 0.98, + "learning_rate": 1.8126670104542685e-05, + "loss": 0.1325, + "step": 20980 + }, + { + "epoch": 0.98, + "learning_rate": 1.8126201303267545e-05, + "loss": 0.1653, + "step": 20985 + }, + { + "epoch": 0.98, + "learning_rate": 1.812573250199241e-05, + "loss": 0.2467, + "step": 20990 + }, + { + "epoch": 0.98, + "learning_rate": 1.8125263700717268e-05, + "loss": 0.3002, + "step": 20995 + }, + { + "epoch": 0.98, + "learning_rate": 1.8124794899442128e-05, + "loss": 0.3036, + "step": 21000 + }, + { + "epoch": 0.98, + "learning_rate": 1.8124326098166988e-05, + "loss": 0.2193, + "step": 21005 + }, + { + "epoch": 0.98, + "learning_rate": 1.8123857296891848e-05, + "loss": 0.0949, + "step": 21010 + }, + { + "epoch": 0.98, + "learning_rate": 1.8123388495616708e-05, + "loss": 0.0853, + "step": 21015 + }, + { + "epoch": 0.98, + "learning_rate": 1.812291969434157e-05, + "loss": 0.0995, + "step": 21020 + }, + { + "epoch": 0.98, + "learning_rate": 1.812245089306643e-05, + "loss": 0.138, + "step": 21025 + }, + { + "epoch": 0.98, + "learning_rate": 1.812198209179129e-05, + "loss": 0.1339, + "step": 21030 + }, + { + "epoch": 0.98, + "learning_rate": 1.812151329051615e-05, + "loss": 0.2001, + "step": 21035 + }, + { + "epoch": 0.98, + "learning_rate": 1.8121044489241014e-05, + "loss": 0.2148, + "step": 21040 + }, + { + "epoch": 0.98, + "learning_rate": 1.8120575687965874e-05, + "loss": 0.2632, + "step": 21045 + }, + { + "epoch": 0.98, + "learning_rate": 1.8120106886690734e-05, + "loss": 0.4485, + "step": 21050 + }, + { + "epoch": 0.98, + "learning_rate": 1.8119638085415594e-05, + "loss": 0.1182, + "step": 21055 + }, + { + "epoch": 0.98, + "learning_rate": 1.8119169284140454e-05, + "loss": 0.0904, + "step": 21060 + }, + { + "epoch": 0.98, + "learning_rate": 1.8118700482865314e-05, + "loss": 0.1157, + "step": 21065 + }, + { + "epoch": 0.98, + "learning_rate": 1.8118231681590174e-05, + "loss": 0.1715, + "step": 21070 + }, + { + "epoch": 0.98, + "learning_rate": 1.8117762880315034e-05, + "loss": 0.154, + "step": 21075 + }, + { + "epoch": 0.98, + "learning_rate": 1.8117294079039894e-05, + "loss": 0.2591, + "step": 21080 + }, + { + "epoch": 0.98, + "learning_rate": 1.8116825277764757e-05, + "loss": 0.2791, + "step": 21085 + }, + { + "epoch": 0.98, + "learning_rate": 1.8116356476489617e-05, + "loss": 0.2793, + "step": 21090 + }, + { + "epoch": 0.98, + "learning_rate": 1.8115887675214477e-05, + "loss": 0.2693, + "step": 21095 + }, + { + "epoch": 0.98, + "learning_rate": 1.811541887393934e-05, + "loss": 0.3771, + "step": 21100 + }, + { + "epoch": 0.98, + "learning_rate": 1.81149500726642e-05, + "loss": 0.1693, + "step": 21105 + }, + { + "epoch": 0.99, + "learning_rate": 1.811448127138906e-05, + "loss": 0.1137, + "step": 21110 + }, + { + "epoch": 0.99, + "learning_rate": 1.811401247011392e-05, + "loss": 0.0693, + "step": 21115 + }, + { + "epoch": 0.99, + "learning_rate": 1.811354366883878e-05, + "loss": 0.0632, + "step": 21120 + }, + { + "epoch": 0.99, + "learning_rate": 1.811307486756364e-05, + "loss": 0.1544, + "step": 21125 + }, + { + "epoch": 0.99, + "learning_rate": 1.8112606066288503e-05, + "loss": 0.2066, + "step": 21130 + }, + { + "epoch": 0.99, + "learning_rate": 1.8112137265013363e-05, + "loss": 0.2304, + "step": 21135 + }, + { + "epoch": 0.99, + "learning_rate": 1.8111668463738223e-05, + "loss": 0.3096, + "step": 21140 + }, + { + "epoch": 0.99, + "learning_rate": 1.8111199662463083e-05, + "loss": 0.302, + "step": 21145 + }, + { + "epoch": 0.99, + "learning_rate": 1.8110730861187943e-05, + "loss": 0.3194, + "step": 21150 + }, + { + "epoch": 0.99, + "learning_rate": 1.8110262059912803e-05, + "loss": 0.1765, + "step": 21155 + }, + { + "epoch": 0.99, + "learning_rate": 1.8109793258637663e-05, + "loss": 0.0763, + "step": 21160 + }, + { + "epoch": 0.99, + "learning_rate": 1.8109324457362526e-05, + "loss": 0.0571, + "step": 21165 + }, + { + "epoch": 0.99, + "learning_rate": 1.8108855656087386e-05, + "loss": 0.1146, + "step": 21170 + }, + { + "epoch": 0.99, + "learning_rate": 1.8108386854812246e-05, + "loss": 0.1685, + "step": 21175 + }, + { + "epoch": 0.99, + "learning_rate": 1.810791805353711e-05, + "loss": 0.1763, + "step": 21180 + }, + { + "epoch": 0.99, + "learning_rate": 1.810744925226197e-05, + "loss": 0.146, + "step": 21185 + }, + { + "epoch": 0.99, + "learning_rate": 1.810698045098683e-05, + "loss": 0.232, + "step": 21190 + }, + { + "epoch": 0.99, + "learning_rate": 1.810651164971169e-05, + "loss": 0.3637, + "step": 21195 + }, + { + "epoch": 0.99, + "learning_rate": 1.810604284843655e-05, + "loss": 0.4796, + "step": 21200 + }, + { + "epoch": 0.99, + "learning_rate": 1.810557404716141e-05, + "loss": 0.2576, + "step": 21205 + }, + { + "epoch": 0.99, + "learning_rate": 1.810510524588627e-05, + "loss": 0.0738, + "step": 21210 + }, + { + "epoch": 0.99, + "learning_rate": 1.810463644461113e-05, + "loss": 0.0893, + "step": 21215 + }, + { + "epoch": 0.99, + "learning_rate": 1.810416764333599e-05, + "loss": 0.1347, + "step": 21220 + }, + { + "epoch": 0.99, + "learning_rate": 1.8103698842060852e-05, + "loss": 0.1224, + "step": 21225 + }, + { + "epoch": 0.99, + "learning_rate": 1.8103230040785712e-05, + "loss": 0.1012, + "step": 21230 + }, + { + "epoch": 0.99, + "learning_rate": 1.8102761239510572e-05, + "loss": 0.1355, + "step": 21235 + }, + { + "epoch": 0.99, + "learning_rate": 1.8102292438235432e-05, + "loss": 0.2893, + "step": 21240 + }, + { + "epoch": 0.99, + "learning_rate": 1.8101823636960295e-05, + "loss": 0.2347, + "step": 21245 + }, + { + "epoch": 0.99, + "learning_rate": 1.8101354835685155e-05, + "loss": 0.4496, + "step": 21250 + }, + { + "epoch": 0.99, + "learning_rate": 1.8100886034410015e-05, + "loss": 0.2292, + "step": 21255 + }, + { + "epoch": 0.99, + "learning_rate": 1.8100417233134875e-05, + "loss": 0.1001, + "step": 21260 + }, + { + "epoch": 0.99, + "learning_rate": 1.8099948431859735e-05, + "loss": 0.0865, + "step": 21265 + }, + { + "epoch": 0.99, + "learning_rate": 1.80994796305846e-05, + "loss": 0.0803, + "step": 21270 + }, + { + "epoch": 0.99, + "learning_rate": 1.809901082930946e-05, + "loss": 0.1175, + "step": 21275 + }, + { + "epoch": 0.99, + "learning_rate": 1.809854202803432e-05, + "loss": 0.2219, + "step": 21280 + }, + { + "epoch": 0.99, + "learning_rate": 1.809807322675918e-05, + "loss": 0.2215, + "step": 21285 + }, + { + "epoch": 0.99, + "learning_rate": 1.8097604425484038e-05, + "loss": 0.221, + "step": 21290 + }, + { + "epoch": 0.99, + "learning_rate": 1.8097135624208898e-05, + "loss": 0.2907, + "step": 21295 + }, + { + "epoch": 0.99, + "learning_rate": 1.8096666822933758e-05, + "loss": 0.5423, + "step": 21300 + }, + { + "epoch": 0.99, + "learning_rate": 1.8096198021658618e-05, + "loss": 0.1497, + "step": 21305 + }, + { + "epoch": 0.99, + "learning_rate": 1.809572922038348e-05, + "loss": 0.1218, + "step": 21310 + }, + { + "epoch": 0.99, + "learning_rate": 1.809526041910834e-05, + "loss": 0.1345, + "step": 21315 + }, + { + "epoch": 0.99, + "learning_rate": 1.80947916178332e-05, + "loss": 0.0717, + "step": 21320 + }, + { + "epoch": 1.0, + "learning_rate": 1.8094322816558065e-05, + "loss": 0.2388, + "step": 21325 + }, + { + "epoch": 1.0, + "learning_rate": 1.8093854015282925e-05, + "loss": 0.1201, + "step": 21330 + }, + { + "epoch": 1.0, + "learning_rate": 1.8093385214007784e-05, + "loss": 0.1755, + "step": 21335 + }, + { + "epoch": 1.0, + "learning_rate": 1.8092916412732644e-05, + "loss": 0.2016, + "step": 21340 + }, + { + "epoch": 1.0, + "learning_rate": 1.8092447611457504e-05, + "loss": 0.2375, + "step": 21345 + }, + { + "epoch": 1.0, + "learning_rate": 1.8091978810182364e-05, + "loss": 0.6163, + "step": 21350 + }, + { + "epoch": 1.0, + "learning_rate": 1.8091510008907224e-05, + "loss": 0.1855, + "step": 21355 + }, + { + "epoch": 1.0, + "learning_rate": 1.8091041207632084e-05, + "loss": 0.1006, + "step": 21360 + }, + { + "epoch": 1.0, + "learning_rate": 1.8090572406356947e-05, + "loss": 0.0872, + "step": 21365 + }, + { + "epoch": 1.0, + "learning_rate": 1.8090103605081807e-05, + "loss": 0.0815, + "step": 21370 + }, + { + "epoch": 1.0, + "learning_rate": 1.8089634803806667e-05, + "loss": 0.1751, + "step": 21375 + }, + { + "epoch": 1.0, + "learning_rate": 1.8089166002531527e-05, + "loss": 0.1182, + "step": 21380 + }, + { + "epoch": 1.0, + "learning_rate": 1.8088697201256387e-05, + "loss": 0.2401, + "step": 21385 + }, + { + "epoch": 1.0, + "learning_rate": 1.808822839998125e-05, + "loss": 0.2828, + "step": 21390 + }, + { + "epoch": 1.0, + "learning_rate": 1.808775959870611e-05, + "loss": 0.2181, + "step": 21395 + }, + { + "epoch": 1.0, + "learning_rate": 1.808729079743097e-05, + "loss": 0.4759, + "step": 21400 + }, + { + "epoch": 1.0, + "learning_rate": 1.808682199615583e-05, + "loss": 0.0917, + "step": 21405 + }, + { + "epoch": 1.0, + "learning_rate": 1.8086353194880694e-05, + "loss": 0.0687, + "step": 21410 + }, + { + "epoch": 1.0, + "learning_rate": 1.8085884393605554e-05, + "loss": 0.0882, + "step": 21415 + }, + { + "epoch": 1.0, + "learning_rate": 1.8085415592330413e-05, + "loss": 0.1867, + "step": 21420 + }, + { + "epoch": 1.0, + "learning_rate": 1.8084946791055273e-05, + "loss": 0.2552, + "step": 21425 + }, + { + "epoch": 1.0, + "learning_rate": 1.8084477989780133e-05, + "loss": 0.4176, + "step": 21430 + }, + { + "epoch": 1.0, + "eval_cer": 0.017639530100105133, + "eval_loss": 0.05617095157504082, + "eval_runtime": 420.9463, + "eval_samples_per_second": 45.255, + "eval_steps_per_second": 11.315, + "eval_wer": 0.151395023974025, + "step": 21431 + } + ], + "max_steps": 214310, + "num_train_epochs": 10, + "total_flos": 2.574958852629261e+18, + "trial_name": null, + "trial_params": null +}