| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9664951741145456, |
| "eval_steps": 500, |
| "global_step": 185000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 5e-06, |
| "loss": 3.6198, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1e-05, |
| "loss": 3.5109, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.5e-05, |
| "loss": 3.4948, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2e-05, |
| "loss": 3.5552, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.5e-05, |
| "loss": 3.5109, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3e-05, |
| "loss": 3.513, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.5000000000000004e-05, |
| "loss": 3.5135, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4e-05, |
| "loss": 3.5552, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4999999999999996e-05, |
| "loss": 3.5392, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5e-05, |
| "loss": 3.5638, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.5e-05, |
| "loss": 3.5414, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6e-05, |
| "loss": 3.5545, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 3.4934, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.000000000000001e-05, |
| "loss": 3.5182, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 7.5e-05, |
| "loss": 3.5703, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 8e-05, |
| "loss": 3.5242, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 8.5e-05, |
| "loss": 3.4979, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 8.999999999999999e-05, |
| "loss": 3.502, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.5e-05, |
| "loss": 3.5335, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0001, |
| "loss": 3.5493, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.000105, |
| "loss": 3.5539, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.00011, |
| "loss": 3.5152, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.000115, |
| "loss": 3.5788, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00012, |
| "loss": 3.5338, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.000125, |
| "loss": 3.5295, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00013000000000000002, |
| "loss": 3.5262, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.000135, |
| "loss": 3.5277, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00014000000000000001, |
| "loss": 3.5143, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.000145, |
| "loss": 3.5899, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00015, |
| "loss": 3.5366, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.000155, |
| "loss": 3.4522, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.00016, |
| "loss": 3.531, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.000165, |
| "loss": 3.5378, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00017, |
| "loss": 3.5002, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.000175, |
| "loss": 3.4772, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00017999999999999998, |
| "loss": 3.5223, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.000185, |
| "loss": 3.5212, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019, |
| "loss": 3.5397, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00019500000000000002, |
| "loss": 3.5471, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0002, |
| "loss": 3.5082, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.000205, |
| "loss": 3.5092, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.00021, |
| "loss": 3.4302, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.000215, |
| "loss": 3.5378, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00022, |
| "loss": 3.4796, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00022500000000000002, |
| "loss": 3.5122, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00023, |
| "loss": 3.5079, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.000235, |
| "loss": 3.484, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00024, |
| "loss": 3.5695, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.000245, |
| "loss": 3.5049, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.00025, |
| "loss": 3.4858, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.000255, |
| "loss": 3.4795, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00026000000000000003, |
| "loss": 3.5687, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00026500000000000004, |
| "loss": 3.4746, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00027, |
| "loss": 3.5224, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.000275, |
| "loss": 3.4772, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00028000000000000003, |
| "loss": 3.5545, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.000285, |
| "loss": 3.5292, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.00029, |
| "loss": 3.5156, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.000295, |
| "loss": 3.5265, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0003, |
| "loss": 3.5554, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.000305, |
| "loss": 3.5095, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00031, |
| "loss": 3.5129, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.000315, |
| "loss": 3.546, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00032, |
| "loss": 3.5059, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00032500000000000004, |
| "loss": 3.4791, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00033, |
| "loss": 3.4911, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.000335, |
| "loss": 3.5105, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00034, |
| "loss": 3.4258, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.000345, |
| "loss": 3.5187, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.00035, |
| "loss": 3.5052, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.000355, |
| "loss": 3.4961, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00035999999999999997, |
| "loss": 3.5155, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.000365, |
| "loss": 3.537, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00037, |
| "loss": 3.4744, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.000375, |
| "loss": 3.4898, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00038, |
| "loss": 3.4827, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00038500000000000003, |
| "loss": 3.5292, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.00039000000000000005, |
| "loss": 3.5189, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.000395, |
| "loss": 3.4855, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0004, |
| "loss": 3.5686, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00040500000000000003, |
| "loss": 3.5008, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00041, |
| "loss": 3.5276, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.000415, |
| "loss": 3.491, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00042, |
| "loss": 3.5629, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.000425, |
| "loss": 3.5554, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00043, |
| "loss": 3.5069, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.000435, |
| "loss": 3.5329, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00044, |
| "loss": 3.5153, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.00044500000000000003, |
| "loss": 3.5705, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00045000000000000004, |
| "loss": 3.4461, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.000455, |
| "loss": 3.5421, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00046, |
| "loss": 3.5423, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.000465, |
| "loss": 3.5614, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00047, |
| "loss": 3.4959, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.000475, |
| "loss": 3.5324, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00048, |
| "loss": 3.4574, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00048499999999999997, |
| "loss": 3.5164, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.00049, |
| "loss": 3.5273, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.000495, |
| "loss": 3.5234, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0005, |
| "loss": 3.5284, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0004999996112877375, |
| "loss": 3.5233, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0004999984451521587, |
| "loss": 3.4993, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0004999965015968901, |
| "loss": 3.5276, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0004999937806279752, |
| "loss": 3.5053, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0004999902822538758, |
| "loss": 3.486, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0004999860064854707, |
| "loss": 3.5093, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0004999809533360561, |
| "loss": 3.531, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0004999751228213458, |
| "loss": 3.5188, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.000499968514959471, |
| "loss": 3.4891, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.00049996112977098, |
| "loss": 3.5318, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0004999529672788389, |
| "loss": 3.538, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0004999440275084302, |
| "loss": 3.5255, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.000499934310487554, |
| "loss": 3.563, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0004999238162464273, |
| "loss": 3.5304, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0004999125448176843, |
| "loss": 3.5348, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0004999004962363751, |
| "loss": 3.4948, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0004998876705399677, |
| "loss": 3.5647, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.000499874067768346, |
| "loss": 3.5225, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0004998596879638106, |
| "loss": 3.4625, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.000499844531171078, |
| "loss": 3.466, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0004998285974372816, |
| "loss": 3.5515, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0004998118868119704, |
| "loss": 3.6037, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0004997943993471093, |
| "loss": 3.5103, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0004997761350970793, |
| "loss": 3.4917, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0004997570941186764, |
| "loss": 3.5306, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0004997372764711125, |
| "loss": 3.4867, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0004997166822160145, |
| "loss": 3.449, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004996953114174239, |
| "loss": 3.5224, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004996731641417981, |
| "loss": 3.5221, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.000499650240458008, |
| "loss": 3.5179, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004996265404373395, |
| "loss": 3.485, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004996020641534924, |
| "loss": 3.5237, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004995768116825806, |
| "loss": 3.51, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004995507831031317, |
| "loss": 3.5003, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004995239784960868, |
| "loss": 3.5773, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0004994963979447999, |
| "loss": 3.5475, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0004994680415350384, |
| "loss": 3.5233, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.000499438909354982, |
| "loss": 3.5136, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0004994090014952231, |
| "loss": 3.5589, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0004993783180487662, |
| "loss": 3.4992, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0004993468591110274, |
| "loss": 3.5307, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0004993146247798345, |
| "loss": 3.5067, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0004992816151554267, |
| "loss": 3.5534, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0004992478303404537, |
| "loss": 3.4614, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0004992132704399764, |
| "loss": 3.4507, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0004991779355614653, |
| "loss": 3.5012, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0004991418258148015, |
| "loss": 3.5332, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0004991049413122752, |
| "loss": 3.5121, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0004990672821685863, |
| "loss": 3.4945, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0004990288485008431, |
| "loss": 3.5502, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.000498989640428563, |
| "loss": 3.5384, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.000498949658073671, |
| "loss": 3.5698, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0004989089015605002, |
| "loss": 3.4467, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0004988673710157913, |
| "loss": 3.5147, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0004988250665686915, |
| "loss": 3.4889, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004987819883507549, |
| "loss": 3.4569, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004987381364959417, |
| "loss": 3.4654, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.000498693511140618, |
| "loss": 3.5525, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004986481124235554, |
| "loss": 3.4769, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004986019404859298, |
| "loss": 3.4723, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004985549954713222, |
| "loss": 3.5086, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004985072775257175, |
| "loss": 3.537, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004984587867975039, |
| "loss": 3.4993, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0004984095234374732, |
| "loss": 3.4976, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004983594875988193, |
| "loss": 3.5206, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004983086794371385, |
| "loss": 3.4844, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004982570991104293, |
| "loss": 3.4813, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004982047467790904, |
| "loss": 3.5309, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004981516226059222, |
| "loss": 3.5115, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004980977267561245, |
| "loss": 3.4775, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004980430593972974, |
| "loss": 3.4671, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004979876206994396, |
| "loss": 3.4842, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0004979314108349489, |
| "loss": 3.462, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.000497874429978621, |
| "loss": 3.4734, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0004978166783076492, |
| "loss": 3.4945, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0004977581560016236, |
| "loss": 3.49, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0004976988632425309, |
| "loss": 3.5377, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0004976388002147538, |
| "loss": 3.5332, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0004975779671050702, |
| "loss": 3.4532, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0004975163641026527, |
| "loss": 3.5116, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.000497453991399068, |
| "loss": 3.4816, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0004973908491882763, |
| "loss": 3.4983, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.000497326937666631, |
| "loss": 3.4687, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004972622570328775, |
| "loss": 3.5534, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004971968074881528, |
| "loss": 3.4703, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004971305892359858, |
| "loss": 3.5117, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004970636024822949, |
| "loss": 3.4832, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004969958474353888, |
| "loss": 3.554, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004969273243059651, |
| "loss": 3.4567, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004968580333071101, |
| "loss": 3.5089, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0004967879746542981, |
| "loss": 3.5264, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.00049671714856539, |
| "loss": 3.546, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004966455552606338, |
| "loss": 3.5111, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004965731949626629, |
| "loss": 3.4773, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004965000678964962, |
| "loss": 3.4718, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004964261742895367, |
| "loss": 3.4829, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004963515143715711, |
| "loss": 3.5101, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004962760883747694, |
| "loss": 3.4787, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004961998965336835, |
| "loss": 3.5048, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004961229390852471, |
| "loss": 3.5439, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004960452162687747, |
| "loss": 3.4837, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0004959667283259607, |
| "loss": 3.4976, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0004958874755008788, |
| "loss": 3.4594, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0004958074580399816, |
| "loss": 3.4317, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0004957266761920991, |
| "loss": 3.4615, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0004956451302084385, |
| "loss": 3.5461, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0004955628203425832, |
| "loss": 3.5074, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.000495479746850492, |
| "loss": 3.5118, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0004953959099904985, |
| "loss": 3.4543, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0004953113100233098, |
| "loss": 3.5353, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0004952259472120064, |
| "loss": 3.541, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004951398218220408, |
| "loss": 3.5633, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004950529341212371, |
| "loss": 3.4821, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004949652843797897, |
| "loss": 3.5209, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004948768728702628, |
| "loss": 3.5295, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004947876998675897, |
| "loss": 3.4903, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004946977656490713, |
| "loss": 3.5398, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004946070704943761, |
| "loss": 3.5016, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004945156146855383, |
| "loss": 3.4882, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004944233985069581, |
| "loss": 3.4632, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0004943304222454001, |
| "loss": 3.4802, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0004942366861899921, |
| "loss": 3.4686, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.000494142190632225, |
| "loss": 3.4896, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0004940469358659516, |
| "loss": 3.4763, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0004939509221873854, |
| "loss": 3.5467, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0004938541498951, |
| "loss": 3.5006, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0004937566192900279, |
| "loss": 3.4922, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.00049365833067546, |
| "loss": 3.4747, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.000493559284357044, |
| "loss": 3.4843, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0004934594806427843, |
| "loss": 3.5593, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.00049335891984304, |
| "loss": 3.5082, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0004932576022705252, |
| "loss": 3.4836, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0004931555282403066, |
| "loss": 3.472, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0004930526980698039, |
| "loss": 3.4998, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0004929491120787878, |
| "loss": 3.4831, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0004928447705893794, |
| "loss": 3.4745, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0004927396739260493, |
| "loss": 3.469, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0004926338224156163, |
| "loss": 3.5138, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0004925272163872468, |
| "loss": 3.4742, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0004924198561724532, |
| "loss": 3.4287, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0004923117421050934, |
| "loss": 3.4663, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0004922028745213696, |
| "loss": 3.4808, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0004920932537598269, |
| "loss": 3.5508, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0004919828801613532, |
| "loss": 3.5053, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0004918717540691766, |
| "loss": 3.5131, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.000491759875828866, |
| "loss": 3.4786, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0004916472457883287, |
| "loss": 3.5234, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0004915338642978103, |
| "loss": 3.5251, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0004914197317098931, |
| "loss": 3.4751, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0004913048483794948, |
| "loss": 3.5188, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.000491189214663868, |
| "loss": 3.4953, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0004910728309225985, |
| "loss": 3.4985, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0004909556975176047, |
| "loss": 3.4845, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0004908378148131362, |
| "loss": 3.6007, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0004907191831757724, |
| "loss": 3.481, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0004905998029744222, |
| "loss": 3.5303, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0004904796745803217, |
| "loss": 3.5077, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0004903587983670339, |
| "loss": 3.481, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0004902371747104476, |
| "loss": 3.451, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0004901148039887756, |
| "loss": 3.4953, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0004899916865825537, |
| "loss": 3.458, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00048986782287464, |
| "loss": 3.4587, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0004897432132502132, |
| "loss": 3.491, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0004896178580967717, |
| "loss": 3.4245, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0004894917578041322, |
| "loss": 3.5217, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0004893649127644283, |
| "loss": 3.5185, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.00048923732337211, |
| "loss": 3.505, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0004891089900239418, |
| "loss": 3.4743, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0004889799131190015, |
| "loss": 3.4964, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0004888500930586793, |
| "loss": 3.5558, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0004887195302466767, |
| "loss": 3.442, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0004885882250890044, |
| "loss": 3.5129, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0004884561779939817, |
| "loss": 3.5072, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0004883233893722354, |
| "loss": 3.4971, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.000488189859636698, |
| "loss": 3.47, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0004880555892026066, |
| "loss": 3.5349, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0004879205784875017, |
| "loss": 3.4856, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0004877848279112259, |
| "loss": 3.5072, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00048764833789592254, |
| "loss": 3.4752, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0004875111088660343, |
| "loss": 3.4654, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.000487373141248302, |
| "loss": 3.5321, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0004872344354717634, |
| "loss": 3.4574, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0004870949919677515, |
| "loss": 3.4621, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00048695481116989357, |
| "loss": 3.532, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.00048681389351410955, |
| "loss": 3.5071, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0004866722394386107, |
| "loss": 3.5263, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00048652984938389853, |
| "loss": 3.5198, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00048638672379276314, |
| "loss": 3.4399, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0004862428631102819, |
| "loss": 3.4343, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.000486098267783818, |
| "loss": 3.4477, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00048595293826301936, |
| "loss": 3.4616, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0004858068749998169, |
| "loss": 3.498, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0004856600784484232, |
| "loss": 3.5423, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00048551254906533135, |
| "loss": 3.5488, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00048536428730931307, |
| "loss": 3.4823, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.00048521529364141776, |
| "loss": 3.4666, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0004850655685249706, |
| "loss": 3.4553, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0004849151124255716, |
| "loss": 3.4764, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0004847639258110939, |
| "loss": 3.504, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.000484612009151682, |
| "loss": 3.5053, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0004844593629197511, |
| "loss": 3.4995, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00048430598758998465, |
| "loss": 3.5613, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.00048415188363933384, |
| "loss": 3.4437, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0004839970515470153, |
| "loss": 3.5437, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0004838414917945101, |
| "loss": 3.4199, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00048368520486556215, |
| "loss": 3.5321, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00048352819124617666, |
| "loss": 3.5139, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00048337045142461845, |
| "loss": 3.5193, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0004832119858914108, |
| "loss": 3.4716, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00048305279513933375, |
| "loss": 3.4594, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0004828928796634224, |
| "loss": 3.5184, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0004827322399609656, |
| "loss": 3.4635, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0004825708765315044, |
| "loss": 3.5565, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.00048240878987683037, |
| "loss": 3.4901, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0004822459805009839, |
| "loss": 3.5068, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0004820824489102531, |
| "loss": 3.4434, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00048191819561317184, |
| "loss": 3.5135, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0004817532211205184, |
| "loss": 3.5205, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00048158752594531346, |
| "loss": 3.5106, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0004814211106028191, |
| "loss": 3.5037, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.00048125397561053676, |
| "loss": 3.5225, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0004810861214882058, |
| "loss": 3.5481, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0004809175487578019, |
| "loss": 3.5227, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0004807482579435353, |
| "loss": 3.5191, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0004805782495718494, |
| "loss": 3.4667, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0004804075241714189, |
| "loss": 3.5535, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0004802360822731482, |
| "loss": 3.5245, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00048006392441016986, |
| "loss": 3.4818, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0004798910511178429, |
| "loss": 3.5417, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00047971746293375107, |
| "loss": 3.4636, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0004795431603977011, |
| "loss": 3.4918, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.00047936814405172143, |
| "loss": 3.4898, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0004791924144400599, |
| "loss": 3.5102, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0004790159721091827, |
| "loss": 3.5459, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00047883881760777205, |
| "loss": 3.4848, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0004786609514867251, |
| "loss": 3.534, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00047848237429915175, |
| "loss": 3.5235, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00047830308660037305, |
| "loss": 3.5164, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0004781230889479198, |
| "loss": 3.5117, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0004779423819015302, |
| "loss": 3.5032, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0004777609660231486, |
| "loss": 3.4958, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.00047757884187692374, |
| "loss": 3.5169, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0004773960100292066, |
| "loss": 3.5191, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0004772124710485492, |
| "loss": 3.4945, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0004770282255057022, |
| "loss": 3.5127, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0004768432739736137, |
| "loss": 3.4561, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00047665761702742705, |
| "loss": 3.558, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0004764712552444794, |
| "loss": 3.5893, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0004762841892042995, |
| "loss": 3.5003, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00047609641948860636, |
| "loss": 3.4897, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0004759079466813072, |
| "loss": 3.4982, |
| "step": 35100 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.00047571877136849537, |
| "loss": 3.524, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.000475528894138449, |
| "loss": 3.4562, |
| "step": 35300 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0004753383155816291, |
| "loss": 3.5271, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00047514703629067726, |
| "loss": 3.4422, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0004749550568604145, |
| "loss": 3.4279, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0004747623778878387, |
| "loss": 3.4677, |
| "step": 35700 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0004745689999721234, |
| "loss": 3.4405, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00047437492371461566, |
| "loss": 3.4902, |
| "step": 35900 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0004741801497188339, |
| "loss": 3.4773, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0004739846785904664, |
| "loss": 3.5532, |
| "step": 36100 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.00047378851093736945, |
| "loss": 3.4676, |
| "step": 36200 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0004735916473695653, |
| "loss": 3.4511, |
| "step": 36300 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00047339408849924, |
| "loss": 3.5473, |
| "step": 36400 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0004731958349407421, |
| "loss": 3.5044, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0004729968873105804, |
| "loss": 3.5104, |
| "step": 36600 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0004727972462274219, |
| "loss": 3.4658, |
| "step": 36700 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00047259691231209006, |
| "loss": 3.4728, |
| "step": 36800 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0004723958861875629, |
| "loss": 3.4425, |
| "step": 36900 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.000472194168478971, |
| "loss": 3.4615, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.00047199175981359556, |
| "loss": 3.5654, |
| "step": 37100 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00047178866082086635, |
| "loss": 3.5196, |
| "step": 37200 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0004715848721323599, |
| "loss": 3.4618, |
| "step": 37300 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00047138039438179765, |
| "loss": 3.4837, |
| "step": 37400 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00047117522820504357, |
| "loss": 3.5105, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.00047096937424010246, |
| "loss": 3.5053, |
| "step": 37600 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0004707628331271182, |
| "loss": 3.5327, |
| "step": 37700 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0004705556055083711, |
| "loss": 3.4411, |
| "step": 37800 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0004703476920282766, |
| "loss": 3.5237, |
| "step": 37900 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0004701390933333829, |
| "loss": 3.4413, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0004699298100723688, |
| "loss": 3.5936, |
| "step": 38100 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0004697198428960422, |
| "loss": 3.5068, |
| "step": 38200 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00046950919245733756, |
| "loss": 3.5562, |
| "step": 38300 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0004692978594113142, |
| "loss": 3.471, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.000469085844415154, |
| "loss": 3.4787, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0004688731481281597, |
| "loss": 3.4779, |
| "step": 38600 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00046865977121175257, |
| "loss": 3.4712, |
| "step": 38700 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00046844571432947025, |
| "loss": 3.4615, |
| "step": 38800 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.00046823097814696515, |
| "loss": 3.4683, |
| "step": 38900 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0004680155633320019, |
| "loss": 3.482, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0004677994705544555, |
| "loss": 3.4946, |
| "step": 39100 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00046758270048630933, |
| "loss": 3.4712, |
| "step": 39200 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00046736525380165284, |
| "loss": 3.4971, |
| "step": 39300 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0004671471311766796, |
| "loss": 3.4925, |
| "step": 39400 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0004669283332896851, |
| "loss": 3.4788, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0004667088608210647, |
| "loss": 3.4891, |
| "step": 39600 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.00046648871445331144, |
| "loss": 3.5367, |
| "step": 39700 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0004662678948710142, |
| "loss": 3.4932, |
| "step": 39800 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0004660464027608552, |
| "loss": 3.5584, |
| "step": 39900 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00046582423881160796, |
| "loss": 3.4568, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00046560140371413526, |
| "loss": 3.4778, |
| "step": 40100 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0004653778981613871, |
| "loss": 3.4778, |
| "step": 40200 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0004651537228483983, |
| "loss": 3.4521, |
| "step": 40300 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0004649288784722862, |
| "loss": 3.4848, |
| "step": 40400 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00046470336573224913, |
| "loss": 3.4623, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0004644771853295635, |
| "loss": 3.5117, |
| "step": 40600 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.00046425033796758207, |
| "loss": 3.4535, |
| "step": 40700 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0004640228243517318, |
| "loss": 3.4737, |
| "step": 40800 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0004637946451895113, |
| "loss": 3.4991, |
| "step": 40900 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0004635658011904887, |
| "loss": 3.517, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00046333629306629997, |
| "loss": 3.4767, |
| "step": 41100 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00046310612153064603, |
| "loss": 3.4917, |
| "step": 41200 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0004628752872992909, |
| "loss": 3.4907, |
| "step": 41300 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0004626437910900591, |
| "loss": 3.5002, |
| "step": 41400 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00046241163362283424, |
| "loss": 3.4782, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0004621788156195559, |
| "loss": 3.48, |
| "step": 41600 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00046194533780421766, |
| "loss": 3.5048, |
| "step": 41700 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.00046171120090286516, |
| "loss": 3.4651, |
| "step": 41800 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0004614764056435934, |
| "loss": 3.5113, |
| "step": 41900 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00046124095275654485, |
| "loss": 3.4631, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00046100484297390676, |
| "loss": 3.4961, |
| "step": 42100 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00046076807702990943, |
| "loss": 3.4688, |
| "step": 42200 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00046053065566082344, |
| "loss": 3.4649, |
| "step": 42300 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0004602925796049574, |
| "loss": 3.5527, |
| "step": 42400 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.00046005384960265617, |
| "loss": 3.5142, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0004598144663962979, |
| "loss": 3.4609, |
| "step": 42600 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.000459574430730292, |
| "loss": 3.5237, |
| "step": 42700 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0004593337433510771, |
| "loss": 3.4829, |
| "step": 42800 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0004590924050071182, |
| "loss": 3.5192, |
| "step": 42900 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00045885041644890467, |
| "loss": 3.4881, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00045860777842894796, |
| "loss": 3.5034, |
| "step": 43100 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00045836449170177896, |
| "loss": 3.486, |
| "step": 43200 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00045812055702394597, |
| "loss": 3.4628, |
| "step": 43300 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00045787597515401223, |
| "loss": 3.4878, |
| "step": 43400 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0004576307468525535, |
| "loss": 3.4622, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0004573848728821557, |
| "loss": 3.5293, |
| "step": 43600 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.00045713835400741274, |
| "loss": 3.4451, |
| "step": 43700 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00045689119099492383, |
| "loss": 3.5038, |
| "step": 43800 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00045664338461329137, |
| "loss": 3.4633, |
| "step": 43900 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0004563949356331184, |
| "loss": 3.4536, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0004561458448270062, |
| "loss": 3.5222, |
| "step": 44100 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0004558961129695519, |
| "loss": 3.4848, |
| "step": 44200 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0004556457408373464, |
| "loss": 3.5302, |
| "step": 44300 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0004553947292089713, |
| "loss": 3.5057, |
| "step": 44400 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.000455143078864997, |
| "loss": 3.537, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.00045489079058798, |
| "loss": 3.4947, |
| "step": 44600 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00045463786516246086, |
| "loss": 3.546, |
| "step": 44700 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00045438430337496117, |
| "loss": 3.5723, |
| "step": 44800 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00045413010601398163, |
| "loss": 3.4514, |
| "step": 44900 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0004538752738699992, |
| "loss": 3.4672, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.000453619807735465, |
| "loss": 3.4934, |
| "step": 45100 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00045336370840480143, |
| "loss": 3.4974, |
| "step": 45200 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00045310697667440026, |
| "loss": 3.4252, |
| "step": 45300 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.00045284961334261965, |
| "loss": 3.5194, |
| "step": 45400 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0004525916192097818, |
| "loss": 3.4608, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0004523329950781705, |
| "loss": 3.5033, |
| "step": 45600 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0004520737417520289, |
| "loss": 3.4757, |
| "step": 45700 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0004518138600375565, |
| "loss": 3.4889, |
| "step": 45800 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0004515533507429069, |
| "loss": 3.4402, |
| "step": 45900 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00045129221467818544, |
| "loss": 3.5092, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0004510304526554464, |
| "loss": 3.5367, |
| "step": 46100 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0004507680654886907, |
| "loss": 3.4865, |
| "step": 46200 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0004505050539938632, |
| "loss": 3.5106, |
| "step": 46300 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.00045024141898885017, |
| "loss": 3.4592, |
| "step": 46400 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.000449977161293477, |
| "loss": 3.4813, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0004497122817295053, |
| "loss": 3.4957, |
| "step": 46600 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00044944678112063046, |
| "loss": 3.4612, |
| "step": 46700 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00044918066029247936, |
| "loss": 3.5378, |
| "step": 46800 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00044891392007260735, |
| "loss": 3.5104, |
| "step": 46900 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.000448646561290496, |
| "loss": 3.4465, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0004483785847775503, |
| "loss": 3.4633, |
| "step": 47100 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0004481099913670965, |
| "loss": 3.4771, |
| "step": 47200 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0004478407818943789, |
| "loss": 3.5111, |
| "step": 47300 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0004475709571965578, |
| "loss": 3.4932, |
| "step": 47400 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.00044730051811270647, |
| "loss": 3.4843, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0004470294654838087, |
| "loss": 3.4771, |
| "step": 47600 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0004467578001527565, |
| "loss": 3.5115, |
| "step": 47700 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00044648552296434695, |
| "loss": 3.5195, |
| "step": 47800 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00044621263476528003, |
| "loss": 3.4532, |
| "step": 47900 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00044593913640415545, |
| "loss": 3.4574, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0004456650287314707, |
| "loss": 3.5016, |
| "step": 48100 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00044539031259961784, |
| "loss": 3.4765, |
| "step": 48200 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00044511498886288105, |
| "loss": 3.465, |
| "step": 48300 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.00044483905837743417, |
| "loss": 3.5028, |
| "step": 48400 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00044456252200133757, |
| "loss": 3.5421, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0004442853805945359, |
| "loss": 3.4714, |
| "step": 48600 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00044400763501885543, |
| "loss": 3.4677, |
| "step": 48700 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0004437292861380009, |
| "loss": 3.535, |
| "step": 48800 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00044345033481755326, |
| "loss": 3.4449, |
| "step": 48900 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.000443170781924967, |
| "loss": 3.4953, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0004428906283295672, |
| "loss": 3.4682, |
| "step": 49100 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00044260987490254695, |
| "loss": 3.4276, |
| "step": 49200 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.00044232852251696467, |
| "loss": 3.5311, |
| "step": 49300 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00044204657204774124, |
| "loss": 3.4406, |
| "step": 49400 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0004417640243716576, |
| "loss": 3.5214, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0004414808803673518, |
| "loss": 3.4624, |
| "step": 49600 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.000441197140915316, |
| "loss": 3.4879, |
| "step": 49700 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0004409128068978944, |
| "loss": 3.52, |
| "step": 49800 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0004406278791992798, |
| "loss": 3.5174, |
| "step": 49900 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00044034235870551156, |
| "loss": 3.4889, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.000440056246304472, |
| "loss": 3.4301, |
| "step": 50100 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0004397695428858844, |
| "loss": 3.4761, |
| "step": 50200 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.00043948224934130985, |
| "loss": 3.4547, |
| "step": 50300 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00043919436656414445, |
| "loss": 3.4262, |
| "step": 50400 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0004389058954496169, |
| "loss": 3.4494, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0004386168368947851, |
| "loss": 3.5187, |
| "step": 50600 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.000438327191798534, |
| "loss": 3.4925, |
| "step": 50700 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0004380369610615722, |
| "loss": 3.4945, |
| "step": 50800 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00043774614558643, |
| "loss": 3.4728, |
| "step": 50900 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0004374547462774555, |
| "loss": 3.5043, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.00043716276404081266, |
| "loss": 3.465, |
| "step": 51100 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0004368701997844781, |
| "loss": 3.455, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00043657705441823826, |
| "loss": 3.4398, |
| "step": 51300 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0004362833288536867, |
| "loss": 3.4834, |
| "step": 51400 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0004359890240042214, |
| "loss": 3.5072, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00043569414078504154, |
| "loss": 3.4757, |
| "step": 51600 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0004353986801131448, |
| "loss": 3.4697, |
| "step": 51700 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00043510264290732474, |
| "loss": 3.5054, |
| "step": 51800 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0004348060300881678, |
| "loss": 3.507, |
| "step": 51900 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00043450884257805014, |
| "loss": 3.504, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0004342110813011352, |
| "loss": 3.5152, |
| "step": 52100 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.00043391274718337084, |
| "loss": 3.4792, |
| "step": 52200 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00043361384115248584, |
| "loss": 3.521, |
| "step": 52300 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0004333143641379879, |
| "loss": 3.4768, |
| "step": 52400 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00043301431707116014, |
| "loss": 3.4492, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0004327137008850582, |
| "loss": 3.4987, |
| "step": 52600 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0004324125165145077, |
| "loss": 3.4817, |
| "step": 52700 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00043211076489610135, |
| "loss": 3.4574, |
| "step": 52800 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0004318084469681952, |
| "loss": 3.4127, |
| "step": 52900 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00043150556367090704, |
| "loss": 3.463, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.00043120211594611235, |
| "loss": 3.5262, |
| "step": 53100 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00043089810473744195, |
| "loss": 3.5513, |
| "step": 53200 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0004305935309902789, |
| "loss": 3.4956, |
| "step": 53300 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00043028839565175563, |
| "loss": 3.5124, |
| "step": 53400 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.000429982699670751, |
| "loss": 3.4991, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0004296764439978871, |
| "loss": 3.5046, |
| "step": 53600 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0004293696295855266, |
| "loss": 3.5114, |
| "step": 53700 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0004290622573877698, |
| "loss": 3.5114, |
| "step": 53800 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.00042875432836045145, |
| "loss": 3.5104, |
| "step": 53900 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0004284458434611378, |
| "loss": 3.4757, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0004281368036491237, |
| "loss": 3.4637, |
| "step": 54100 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00042782720988542976, |
| "loss": 3.4404, |
| "step": 54200 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0004275170631327991, |
| "loss": 3.5348, |
| "step": 54300 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0004272063643556945, |
| "loss": 3.5154, |
| "step": 54400 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00042689511452029526, |
| "loss": 3.528, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0004265833145944945, |
| "loss": 3.5509, |
| "step": 54600 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.00042627096554789584, |
| "loss": 3.5283, |
| "step": 54700 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0004259580683518105, |
| "loss": 3.4573, |
| "step": 54800 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0004256446239792543, |
| "loss": 3.5586, |
| "step": 54900 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0004253306334049446, |
| "loss": 3.5568, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00042501609760529734, |
| "loss": 3.5057, |
| "step": 55100 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0004247010175584239, |
| "loss": 3.5694, |
| "step": 55200 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.000424385394244128, |
| "loss": 3.4765, |
| "step": 55300 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.000424069228643903, |
| "loss": 3.5685, |
| "step": 55400 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00042375252174092824, |
| "loss": 3.5035, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0004234352745200669, |
| "loss": 3.4847, |
| "step": 55600 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.00042311748796786174, |
| "loss": 3.4716, |
| "step": 55700 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0004227991630725333, |
| "loss": 3.4406, |
| "step": 55800 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0004224803008239757, |
| "loss": 3.4917, |
| "step": 55900 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00042216090221375426, |
| "loss": 3.5315, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0004218409682351023, |
| "loss": 3.4636, |
| "step": 56100 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.000421520499882918, |
| "loss": 3.4861, |
| "step": 56200 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0004211994981537609, |
| "loss": 3.5376, |
| "step": 56300 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00042087796404584977, |
| "loss": 3.5678, |
| "step": 56400 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00042055589855905846, |
| "loss": 3.5243, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00042023330269491346, |
| "loss": 3.5343, |
| "step": 56600 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0004199101774565905, |
| "loss": 3.541, |
| "step": 56700 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00041958652384891146, |
| "loss": 3.4849, |
| "step": 56800 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.00041926234287834144, |
| "loss": 3.525, |
| "step": 56900 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00041893763555298527, |
| "loss": 3.5095, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00041861240288258483, |
| "loss": 3.4635, |
| "step": 57100 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0004182866458785155, |
| "loss": 3.5302, |
| "step": 57200 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00041796036555378325, |
| "loss": 3.4834, |
| "step": 57300 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0004176335629230213, |
| "loss": 3.4757, |
| "step": 57400 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00041730623900248717, |
| "loss": 3.5522, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0004169783948100595, |
| "loss": 3.4441, |
| "step": 57600 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0004166500313652347, |
| "loss": 3.5152, |
| "step": 57700 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.00041632114968912404, |
| "loss": 3.4957, |
| "step": 57800 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0004159917508044502, |
| "loss": 3.4784, |
| "step": 57900 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0004156618357355442, |
| "loss": 3.4779, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00041533140550834225, |
| "loss": 3.4575, |
| "step": 58100 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0004150004611503828, |
| "loss": 3.5332, |
| "step": 58200 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0004146690036908028, |
| "loss": 3.5279, |
| "step": 58300 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00041433703416033485, |
| "loss": 3.4968, |
| "step": 58400 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00041400455359130397, |
| "loss": 3.5489, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.00041367156301762444, |
| "loss": 3.5178, |
| "step": 58600 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0004133380634747963, |
| "loss": 3.4902, |
| "step": 58700 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0004130040559999025, |
| "loss": 3.4997, |
| "step": 58800 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0004126695416316054, |
| "loss": 3.4763, |
| "step": 58900 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0004123345214101438, |
| "loss": 3.4574, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00041199899637732934, |
| "loss": 3.4456, |
| "step": 59100 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.00041166296757654366, |
| "loss": 3.4863, |
| "step": 59200 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0004113264360527348, |
| "loss": 3.4747, |
| "step": 59300 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0004109894028524143, |
| "loss": 3.4851, |
| "step": 59400 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0004106518690236536, |
| "loss": 3.4395, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.000410313835616081, |
| "loss": 3.4848, |
| "step": 59600 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0004099753036808783, |
| "loss": 3.4535, |
| "step": 59700 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00040963627427077775, |
| "loss": 3.4992, |
| "step": 59800 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00040929674844005843, |
| "loss": 3.4552, |
| "step": 59900 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00040895672724454305, |
| "loss": 3.5318, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.00040861621174159495, |
| "loss": 3.5415, |
| "step": 60100 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0004082752029901146, |
| "loss": 3.5494, |
| "step": 60200 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0004079337020505362, |
| "loss": 3.4648, |
| "step": 60300 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0004075917099848245, |
| "loss": 3.5292, |
| "step": 60400 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0004072492278564718, |
| "loss": 3.5001, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0004069062567304939, |
| "loss": 3.4884, |
| "step": 60600 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00040656279767342765, |
| "loss": 3.4814, |
| "step": 60700 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0004062188517533268, |
| "loss": 3.4705, |
| "step": 60800 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0004058744200397595, |
| "loss": 3.4994, |
| "step": 60900 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00040552950360380434, |
| "loss": 3.4953, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0004051841035180472, |
| "loss": 3.521, |
| "step": 61100 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0004048382208565784, |
| "loss": 3.5164, |
| "step": 61200 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0004044918566949882, |
| "loss": 3.507, |
| "step": 61300 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.00040414501211036486, |
| "loss": 3.4945, |
| "step": 61400 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0004037976881812901, |
| "loss": 3.4484, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0004034498859878367, |
| "loss": 3.4555, |
| "step": 61600 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00040310160661156437, |
| "loss": 3.4847, |
| "step": 61700 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00040275285113551676, |
| "loss": 3.4943, |
| "step": 61800 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0004024036206442182, |
| "loss": 3.5139, |
| "step": 61900 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00040205391622367016, |
| "loss": 3.4618, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0004017037389613476, |
| "loss": 3.4801, |
| "step": 62100 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0004013530899461963, |
| "loss": 3.5376, |
| "step": 62200 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00040100197026862874, |
| "loss": 3.5269, |
| "step": 62300 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.0004006503810205211, |
| "loss": 3.4552, |
| "step": 62400 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 0.00040029832329520977, |
| "loss": 3.5446, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0003999457981874881, |
| "loss": 3.5022, |
| "step": 62600 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0003995928067936027, |
| "loss": 3.5138, |
| "step": 62700 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00039923935021125015, |
| "loss": 3.4631, |
| "step": 62800 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00039888542953957375, |
| "loss": 3.444, |
| "step": 62900 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00039853104587916005, |
| "loss": 3.5034, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0003981762003320351, |
| "loss": 3.4181, |
| "step": 63100 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00039782089400166155, |
| "loss": 3.515, |
| "step": 63200 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.0003974651279929348, |
| "loss": 3.5136, |
| "step": 63300 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00039710890341217967, |
| "loss": 3.489, |
| "step": 63400 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 0.00039675222136714705, |
| "loss": 3.4723, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00039639508296701045, |
| "loss": 3.4656, |
| "step": 63600 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0003960374893223625, |
| "loss": 3.5267, |
| "step": 63700 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0003956794415452115, |
| "loss": 3.4808, |
| "step": 63800 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00039532094074897785, |
| "loss": 3.5218, |
| "step": 63900 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00039496198804849083, |
| "loss": 3.4797, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00039460258455998497, |
| "loss": 3.4921, |
| "step": 64100 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0003942427314010967, |
| "loss": 3.4503, |
| "step": 64200 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.00039388242969086066, |
| "loss": 3.4776, |
| "step": 64300 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 0.0003935216805497063, |
| "loss": 3.4785, |
| "step": 64400 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00039316048509945457, |
| "loss": 3.4773, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00039279884446331436, |
| "loss": 3.4693, |
| "step": 64600 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00039243675976587876, |
| "loss": 3.4932, |
| "step": 64700 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00039207423213312204, |
| "loss": 3.4971, |
| "step": 64800 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00039171126269239555, |
| "loss": 3.4892, |
| "step": 64900 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.00039134785257242467, |
| "loss": 3.527, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.0003909840029033052, |
| "loss": 3.4535, |
| "step": 65100 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.0003906197148164997, |
| "loss": 3.5352, |
| "step": 65200 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 0.0003902549894448342, |
| "loss": 3.4764, |
| "step": 65300 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00038988982792249454, |
| "loss": 3.4837, |
| "step": 65400 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0003895242313850228, |
| "loss": 3.4606, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00038915820096931364, |
| "loss": 3.4713, |
| "step": 65600 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00038879173781361146, |
| "loss": 3.4764, |
| "step": 65700 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00038842484305750587, |
| "loss": 3.4697, |
| "step": 65800 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00038805751784192876, |
| "loss": 3.4771, |
| "step": 65900 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.00038768976330915073, |
| "loss": 3.542, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0003873215806027773, |
| "loss": 3.4594, |
| "step": 66100 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0003869529708677456, |
| "loss": 3.5125, |
| "step": 66200 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0003865839352503206, |
| "loss": 3.4936, |
| "step": 66300 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0003862144748980917, |
| "loss": 3.4315, |
| "step": 66400 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00038584459095996883, |
| "loss": 3.5041, |
| "step": 66500 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0003854742845861796, |
| "loss": 3.4987, |
| "step": 66600 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00038510355692826504, |
| "loss": 3.4537, |
| "step": 66700 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0003847324091390761, |
| "loss": 3.4199, |
| "step": 66800 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0003843608423727706, |
| "loss": 3.545, |
| "step": 66900 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0003839888577848086, |
| "loss": 3.4893, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.00038361645653195025, |
| "loss": 3.5127, |
| "step": 67100 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 0.0003832436397722509, |
| "loss": 3.4984, |
| "step": 67200 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00038287040866505806, |
| "loss": 3.4773, |
| "step": 67300 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00038249676437100775, |
| "loss": 3.5147, |
| "step": 67400 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00038212270805202113, |
| "loss": 3.4999, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00038174824087130023, |
| "loss": 3.4605, |
| "step": 67600 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.000381373363993325, |
| "loss": 3.5478, |
| "step": 67700 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00038099807858384935, |
| "loss": 3.5431, |
| "step": 67800 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0003806223858098976, |
| "loss": 3.5196, |
| "step": 67900 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.0003802462868397609, |
| "loss": 3.4565, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00037986978284299346, |
| "loss": 3.479, |
| "step": 68100 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 0.00037949287499040895, |
| "loss": 3.4619, |
| "step": 68200 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00037911556445407725, |
| "loss": 3.5283, |
| "step": 68300 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00037873785240731994, |
| "loss": 3.4817, |
| "step": 68400 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0003783597400247077, |
| "loss": 3.4796, |
| "step": 68500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00037798122848205576, |
| "loss": 3.4814, |
| "step": 68600 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0003776023189564206, |
| "loss": 3.5202, |
| "step": 68700 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0003772230126260968, |
| "loss": 3.4558, |
| "step": 68800 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.00037684331067061225, |
| "loss": 3.5382, |
| "step": 68900 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0003764632142707255, |
| "loss": 3.4725, |
| "step": 69000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 0.0003760827246084216, |
| "loss": 3.5022, |
| "step": 69100 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0003757018428669086, |
| "loss": 3.4569, |
| "step": 69200 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0003753205702306135, |
| "loss": 3.4843, |
| "step": 69300 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00037493890788517937, |
| "loss": 3.5093, |
| "step": 69400 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0003745568570174607, |
| "loss": 3.439, |
| "step": 69500 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00037417441881552036, |
| "loss": 3.5336, |
| "step": 69600 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0003737915944686258, |
| "loss": 3.4631, |
| "step": 69700 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.00037340838516724514, |
| "loss": 3.4551, |
| "step": 69800 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0003730247921030436, |
| "loss": 3.4739, |
| "step": 69900 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 0.0003726408164688797, |
| "loss": 3.448, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00037225645945880196, |
| "loss": 3.4937, |
| "step": 70100 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00037187172226804433, |
| "loss": 3.473, |
| "step": 70200 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00037148660609302367, |
| "loss": 3.5094, |
| "step": 70300 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00037110111213133475, |
| "loss": 3.5025, |
| "step": 70400 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0003707152415817476, |
| "loss": 3.51, |
| "step": 70500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.000370328995644203, |
| "loss": 3.5018, |
| "step": 70600 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0003699423755198092, |
| "loss": 3.5345, |
| "step": 70700 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0003695553824108381, |
| "loss": 3.5203, |
| "step": 70800 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.00036916801752072154, |
| "loss": 3.4331, |
| "step": 70900 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 0.0003687802820540473, |
| "loss": 3.4551, |
| "step": 71000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0003683921772165556, |
| "loss": 3.4541, |
| "step": 71100 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0003680037042151353, |
| "loss": 3.4983, |
| "step": 71200 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00036761486425782025, |
| "loss": 3.4602, |
| "step": 71300 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00036722565855378534, |
| "loss": 3.5578, |
| "step": 71400 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0003668360883133426, |
| "loss": 3.4924, |
| "step": 71500 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.0003664461547479381, |
| "loss": 3.5228, |
| "step": 71600 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00036605585907014727, |
| "loss": 3.4527, |
| "step": 71700 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00036566520249367216, |
| "loss": 3.5135, |
| "step": 71800 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 0.00036527418623333655, |
| "loss": 3.4777, |
| "step": 71900 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00036488281150508293, |
| "loss": 3.4908, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0003644910795259687, |
| "loss": 3.4532, |
| "step": 72100 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00036409899151416194, |
| "loss": 3.4572, |
| "step": 72200 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00036370654868893813, |
| "loss": 3.5133, |
| "step": 72300 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0003633137522706758, |
| "loss": 3.528, |
| "step": 72400 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0003629206034808534, |
| "loss": 3.5001, |
| "step": 72500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00036252710354204486, |
| "loss": 3.5106, |
| "step": 72600 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0003621332536779162, |
| "loss": 3.5027, |
| "step": 72700 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.0003617390551132216, |
| "loss": 3.4608, |
| "step": 72800 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 0.00036134450907379965, |
| "loss": 3.4917, |
| "step": 72900 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00036094961678656936, |
| "loss": 3.4839, |
| "step": 73000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00036055437947952654, |
| "loss": 3.5027, |
| "step": 73100 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00036015879838173986, |
| "loss": 3.4551, |
| "step": 73200 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00035976287472334716, |
| "loss": 3.4427, |
| "step": 73300 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00035936660973555145, |
| "loss": 3.4986, |
| "step": 73400 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00035897000465061725, |
| "loss": 3.449, |
| "step": 73500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.0003585730607018667, |
| "loss": 3.4778, |
| "step": 73600 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.00035817577912367537, |
| "loss": 3.5685, |
| "step": 73700 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 0.0003577781611514694, |
| "loss": 3.5013, |
| "step": 73800 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0003573802080217203, |
| "loss": 3.4755, |
| "step": 73900 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0003569819209719425, |
| "loss": 3.4971, |
| "step": 74000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00035658330124068815, |
| "loss": 3.5367, |
| "step": 74100 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0003561843500675445, |
| "loss": 3.4988, |
| "step": 74200 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0003557850686931292, |
| "loss": 3.4971, |
| "step": 74300 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00035538545835908674, |
| "loss": 3.471, |
| "step": 74400 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.00035498552030808464, |
| "loss": 3.4602, |
| "step": 74500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0003545852557838095, |
| "loss": 3.4899, |
| "step": 74600 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 0.0003541846660309631, |
| "loss": 3.4727, |
| "step": 74700 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0003537837522952587, |
| "loss": 3.4743, |
| "step": 74800 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00035338251582341703, |
| "loss": 3.4798, |
| "step": 74900 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0003529809578631622, |
| "loss": 3.4574, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00035257907966321846, |
| "loss": 3.4465, |
| "step": 75100 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00035217688247330553, |
| "loss": 3.4908, |
| "step": 75200 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0003517743675441353, |
| "loss": 3.4411, |
| "step": 75300 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00035137153612740767, |
| "loss": 3.5287, |
| "step": 75400 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0003509683894758068, |
| "loss": 3.4939, |
| "step": 75500 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.0003505649288429969, |
| "loss": 3.4693, |
| "step": 75600 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 0.00035016115548361886, |
| "loss": 3.5589, |
| "step": 75700 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0003497570706532859, |
| "loss": 3.5431, |
| "step": 75800 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0003493526756085799, |
| "loss": 3.5008, |
| "step": 75900 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00034894797160704737, |
| "loss": 3.4913, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0003485429599071954, |
| "loss": 3.5414, |
| "step": 76100 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00034813764176848833, |
| "loss": 3.4656, |
| "step": 76200 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.000347732018451343, |
| "loss": 3.5164, |
| "step": 76300 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00034732609121712566, |
| "loss": 3.5187, |
| "step": 76400 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.00034691986132814737, |
| "loss": 3.4221, |
| "step": 76500 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 0.0003465133300476604, |
| "loss": 3.4573, |
| "step": 76600 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00034610649863985434, |
| "loss": 3.4997, |
| "step": 76700 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0003456993683698521, |
| "loss": 3.4785, |
| "step": 76800 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0003452919405037057, |
| "loss": 3.461, |
| "step": 76900 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00034488421630839307, |
| "loss": 3.4799, |
| "step": 77000 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0003444761970518133, |
| "loss": 3.4866, |
| "step": 77100 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.000344067884002783, |
| "loss": 3.4616, |
| "step": 77200 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0003436592784310325, |
| "loss": 3.5271, |
| "step": 77300 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.00034325038160720186, |
| "loss": 3.5399, |
| "step": 77400 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0003428411948028367, |
| "loss": 3.4931, |
| "step": 77500 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 0.0003424317192903844, |
| "loss": 3.481, |
| "step": 77600 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00034202195634319026, |
| "loss": 3.4759, |
| "step": 77700 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.0003416119072354933, |
| "loss": 3.4805, |
| "step": 77800 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.0003412015732424225, |
| "loss": 3.4803, |
| "step": 77900 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00034079095563999264, |
| "loss": 3.4571, |
| "step": 78000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00034038005570510046, |
| "loss": 3.5024, |
| "step": 78100 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00033996887471552084, |
| "loss": 3.512, |
| "step": 78200 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00033955741394990234, |
| "loss": 3.4331, |
| "step": 78300 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00033914567468776394, |
| "loss": 3.4274, |
| "step": 78400 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 0.00033873365820949025, |
| "loss": 3.5295, |
| "step": 78500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00033832136579632833, |
| "loss": 3.4613, |
| "step": 78600 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0003379087987303829, |
| "loss": 3.459, |
| "step": 78700 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00033749595829461304, |
| "loss": 3.4423, |
| "step": 78800 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00033708284577282796, |
| "loss": 3.5483, |
| "step": 78900 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0003366694624496828, |
| "loss": 3.4994, |
| "step": 79000 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0003362558096106749, |
| "loss": 3.4706, |
| "step": 79100 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00033584188854213974, |
| "loss": 3.5044, |
| "step": 79200 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.00033542770053124696, |
| "loss": 3.455, |
| "step": 79300 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.000335013246865996, |
| "loss": 3.5131, |
| "step": 79400 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0003345985288352129, |
| "loss": 3.5119, |
| "step": 79500 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0003341835477285453, |
| "loss": 3.5121, |
| "step": 79600 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00033376830483645937, |
| "loss": 3.4693, |
| "step": 79700 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00033335280145023493, |
| "loss": 3.4531, |
| "step": 79800 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00033293703886196226, |
| "loss": 3.4548, |
| "step": 79900 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00033252101836453733, |
| "loss": 3.5033, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00033210474125165853, |
| "loss": 3.4889, |
| "step": 80100 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0003316882088178217, |
| "loss": 3.4725, |
| "step": 80200 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.00033127142235831716, |
| "loss": 3.4618, |
| "step": 80300 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 0.0003308543831692249, |
| "loss": 3.4913, |
| "step": 80400 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0003304370925474109, |
| "loss": 3.4637, |
| "step": 80500 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0003300195517905231, |
| "loss": 3.4736, |
| "step": 80600 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.000329601762196987, |
| "loss": 3.4883, |
| "step": 80700 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0003291837250660023, |
| "loss": 3.4966, |
| "step": 80800 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0003287654416975382, |
| "loss": 3.4885, |
| "step": 80900 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0003283469133923297, |
| "loss": 3.4405, |
| "step": 81000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00032792814145187344, |
| "loss": 3.5012, |
| "step": 81100 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.00032750912717842385, |
| "loss": 3.4704, |
| "step": 81200 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 0.0003270898718749886, |
| "loss": 3.5264, |
| "step": 81300 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0003266703768453253, |
| "loss": 3.4999, |
| "step": 81400 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00032625064339393686, |
| "loss": 3.5004, |
| "step": 81500 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0003258306728260674, |
| "loss": 3.4246, |
| "step": 81600 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00032541046644769876, |
| "loss": 3.4751, |
| "step": 81700 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0003249900255655459, |
| "loss": 3.5038, |
| "step": 81800 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00032456935148705303, |
| "loss": 3.5146, |
| "step": 81900 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0003241484455203895, |
| "loss": 3.4961, |
| "step": 82000 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.0003237273089744458, |
| "loss": 3.4722, |
| "step": 82100 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.00032330594315882943, |
| "loss": 3.4567, |
| "step": 82200 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 0.000322884349383861, |
| "loss": 3.5115, |
| "step": 82300 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0003224625289605696, |
| "loss": 3.4695, |
| "step": 82400 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00032204048320068964, |
| "loss": 3.4923, |
| "step": 82500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0003216182134166559, |
| "loss": 3.5214, |
| "step": 82600 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00032119572092160006, |
| "loss": 3.4668, |
| "step": 82700 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00032077300702934607, |
| "loss": 3.4876, |
| "step": 82800 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00032035007305440655, |
| "loss": 3.5109, |
| "step": 82900 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.00031992692031197853, |
| "loss": 3.4911, |
| "step": 83000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0003195035501179392, |
| "loss": 3.4561, |
| "step": 83100 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 0.0003190799637888423, |
| "loss": 3.4836, |
| "step": 83200 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00031865616264191313, |
| "loss": 3.51, |
| "step": 83300 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0003182321479950454, |
| "loss": 3.5249, |
| "step": 83400 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0003178079211667967, |
| "loss": 3.4827, |
| "step": 83500 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00031738348347638444, |
| "loss": 3.4877, |
| "step": 83600 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0003169588362436816, |
| "loss": 3.438, |
| "step": 83700 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0003165339807892129, |
| "loss": 3.412, |
| "step": 83800 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.00031610891843415046, |
| "loss": 3.4433, |
| "step": 83900 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0003156836505003101, |
| "loss": 3.4302, |
| "step": 84000 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 0.0003152581783101465, |
| "loss": 3.44, |
| "step": 84100 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0003148325031867498, |
| "loss": 3.4679, |
| "step": 84200 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00031440662645384115, |
| "loss": 3.5349, |
| "step": 84300 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0003139805494357685, |
| "loss": 3.4902, |
| "step": 84400 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00031355427345750286, |
| "loss": 3.4661, |
| "step": 84500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0003131277998446338, |
| "loss": 3.5095, |
| "step": 84600 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0003127011299233656, |
| "loss": 3.4287, |
| "step": 84700 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00031227426502051267, |
| "loss": 3.5032, |
| "step": 84800 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0003118472064634961, |
| "loss": 3.4561, |
| "step": 84900 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.00031141995558033915, |
| "loss": 3.4984, |
| "step": 85000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 0.0003109925136996631, |
| "loss": 3.4991, |
| "step": 85100 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00031056488215068295, |
| "loss": 3.4838, |
| "step": 85200 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00031013706226320386, |
| "loss": 3.5089, |
| "step": 85300 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.0003097090553676165, |
| "loss": 3.5005, |
| "step": 85400 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.0003092808627948931, |
| "loss": 3.4652, |
| "step": 85500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.00030885248587658336, |
| "loss": 3.4901, |
| "step": 85600 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.0003084239259448101, |
| "loss": 3.5131, |
| "step": 85700 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.0003079951843322653, |
| "loss": 3.4778, |
| "step": 85800 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.0003075662623722059, |
| "loss": 3.4706, |
| "step": 85900 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 0.0003071371613984498, |
| "loss": 3.4887, |
| "step": 86000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0003067078827453715, |
| "loss": 3.4765, |
| "step": 86100 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00030627842774789797, |
| "loss": 3.5069, |
| "step": 86200 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0003058487977415046, |
| "loss": 3.4825, |
| "step": 86300 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0003054189940622109, |
| "loss": 3.45, |
| "step": 86400 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00030498901804657674, |
| "loss": 3.4677, |
| "step": 86500 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0003045588710316976, |
| "loss": 3.4663, |
| "step": 86600 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00030412855435520093, |
| "loss": 3.507, |
| "step": 86700 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0003036980693552415, |
| "loss": 3.461, |
| "step": 86800 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.0003032674173704979, |
| "loss": 3.4856, |
| "step": 86900 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 0.00030283659974016764, |
| "loss": 3.554, |
| "step": 87000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00030240561780396364, |
| "loss": 3.5279, |
| "step": 87100 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00030197447290210945, |
| "loss": 3.4643, |
| "step": 87200 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.0003015431663753357, |
| "loss": 3.5135, |
| "step": 87300 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.0003011116995648753, |
| "loss": 3.5021, |
| "step": 87400 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00030068007381245994, |
| "loss": 3.5225, |
| "step": 87500 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.0003002482904603153, |
| "loss": 3.4466, |
| "step": 87600 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00029981635085115727, |
| "loss": 3.4411, |
| "step": 87700 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00029938425632818766, |
| "loss": 3.4759, |
| "step": 87800 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 0.00029895200823508997, |
| "loss": 3.4777, |
| "step": 87900 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0002985196079160252, |
| "loss": 3.4094, |
| "step": 88000 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00029808705671562796, |
| "loss": 3.4614, |
| "step": 88100 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00029765435597900187, |
| "loss": 3.4643, |
| "step": 88200 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0002972215070517154, |
| "loss": 3.4988, |
| "step": 88300 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00029678851127979826, |
| "loss": 3.4302, |
| "step": 88400 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0002963553700097364, |
| "loss": 3.505, |
| "step": 88500 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0002959220845884686, |
| "loss": 3.4842, |
| "step": 88600 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0002954886563633815, |
| "loss": 3.4964, |
| "step": 88700 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.0002950550866823062, |
| "loss": 3.4869, |
| "step": 88800 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 0.00029462137689351337, |
| "loss": 3.4682, |
| "step": 88900 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0002941875283457096, |
| "loss": 3.4562, |
| "step": 89000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00029375354238803293, |
| "loss": 3.4402, |
| "step": 89100 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00029331942037004856, |
| "loss": 3.4809, |
| "step": 89200 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00029288516364174506, |
| "loss": 3.4746, |
| "step": 89300 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0002924507735535296, |
| "loss": 3.4674, |
| "step": 89400 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0002920162514562243, |
| "loss": 3.5435, |
| "step": 89500 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0002915815987010616, |
| "loss": 3.5372, |
| "step": 89600 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.0002911468166396805, |
| "loss": 3.4674, |
| "step": 89700 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 0.00029071190662412183, |
| "loss": 3.4573, |
| "step": 89800 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0002902768700068245, |
| "loss": 3.4869, |
| "step": 89900 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.000289841708140621, |
| "loss": 3.521, |
| "step": 90000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0002894064223787334, |
| "loss": 3.4807, |
| "step": 90100 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.000288971014074769, |
| "loss": 3.4656, |
| "step": 90200 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00028853548458271616, |
| "loss": 3.4716, |
| "step": 90300 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00028809983525694016, |
| "loss": 3.5101, |
| "step": 90400 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.00028766406745217875, |
| "loss": 3.4879, |
| "step": 90500 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0002872281825235385, |
| "loss": 3.5152, |
| "step": 90600 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 0.0002867921818264897, |
| "loss": 3.4802, |
| "step": 90700 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00028635606671686297, |
| "loss": 3.4585, |
| "step": 90800 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0002859198385508447, |
| "loss": 3.4754, |
| "step": 90900 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00028548349868497266, |
| "loss": 3.4861, |
| "step": 91000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00028504704847613215, |
| "loss": 3.4643, |
| "step": 91100 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00028461048928155166, |
| "loss": 3.4961, |
| "step": 91200 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00028417382245879836, |
| "loss": 3.5433, |
| "step": 91300 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00028373704936577427, |
| "loss": 3.5039, |
| "step": 91400 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.0002833001713607119, |
| "loss": 3.5384, |
| "step": 91500 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.00028286318980216986, |
| "loss": 3.4986, |
| "step": 91600 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 0.000282426106049029, |
| "loss": 3.4655, |
| "step": 91700 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0002819889214604877, |
| "loss": 3.4605, |
| "step": 91800 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0002815516373960582, |
| "loss": 3.5204, |
| "step": 91900 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00028111425521556174, |
| "loss": 3.4644, |
| "step": 92000 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.000280676776279125, |
| "loss": 3.4554, |
| "step": 92100 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00028023920194717534, |
| "loss": 3.4838, |
| "step": 92200 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0002798015335804369, |
| "loss": 3.4925, |
| "step": 92300 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00027936377253992594, |
| "loss": 3.5146, |
| "step": 92400 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0002789259201869474, |
| "loss": 3.4192, |
| "step": 92500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.00027848797788308983, |
| "loss": 3.4699, |
| "step": 92600 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00027804994699022153, |
| "loss": 3.5011, |
| "step": 92700 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00027761182887048633, |
| "loss": 3.4562, |
| "step": 92800 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.0002771736248862994, |
| "loss": 3.5495, |
| "step": 92900 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00027673533640034276, |
| "loss": 3.4774, |
| "step": 93000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00027629696477556135, |
| "loss": 3.4738, |
| "step": 93100 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00027585851137515855, |
| "loss": 3.4803, |
| "step": 93200 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00027541997756259196, |
| "loss": 3.4923, |
| "step": 93300 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00027498136470156955, |
| "loss": 3.4801, |
| "step": 93400 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00027454267415604464, |
| "loss": 3.456, |
| "step": 93500 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 0.00027410390729021273, |
| "loss": 3.5121, |
| "step": 93600 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.000273665065468506, |
| "loss": 3.4838, |
| "step": 93700 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0002732261500555901, |
| "loss": 3.4894, |
| "step": 93800 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0002727871624163596, |
| "loss": 3.4759, |
| "step": 93900 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0002723481039159334, |
| "loss": 3.541, |
| "step": 94000 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0002719089759196509, |
| "loss": 3.467, |
| "step": 94100 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0002714697797930675, |
| "loss": 3.4861, |
| "step": 94200 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00027103051690195083, |
| "loss": 3.4222, |
| "step": 94300 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.0002705911886122757, |
| "loss": 3.4357, |
| "step": 94400 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 0.00027015179629022034, |
| "loss": 3.4971, |
| "step": 94500 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.0002697123413021624, |
| "loss": 3.4512, |
| "step": 94600 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00026927282501467423, |
| "loss": 3.4607, |
| "step": 94700 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00026883324879451863, |
| "loss": 3.4858, |
| "step": 94800 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00026839361400864505, |
| "loss": 3.495, |
| "step": 94900 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.000267953922024185, |
| "loss": 3.4637, |
| "step": 95000 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.0002675141742084477, |
| "loss": 3.4934, |
| "step": 95100 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.0002670743719289161, |
| "loss": 3.4604, |
| "step": 95200 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.00026663451655324253, |
| "loss": 3.5176, |
| "step": 95300 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 0.0002661946094492446, |
| "loss": 3.5227, |
| "step": 95400 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0002657546519849003, |
| "loss": 3.5155, |
| "step": 95500 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00026531464552834465, |
| "loss": 3.5119, |
| "step": 95600 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0002648745914478649, |
| "loss": 3.5198, |
| "step": 95700 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0002644344911118965, |
| "loss": 3.4797, |
| "step": 95800 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00026399434588901836, |
| "loss": 3.4992, |
| "step": 95900 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00026355415714794954, |
| "loss": 3.544, |
| "step": 96000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.00026311392625754385, |
| "loss": 3.5146, |
| "step": 96100 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0002626736545867867, |
| "loss": 3.4587, |
| "step": 96200 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.0002622333435047899, |
| "loss": 3.4642, |
| "step": 96300 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 0.000261792994380788, |
| "loss": 3.5103, |
| "step": 96400 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.000261352608584134, |
| "loss": 3.4636, |
| "step": 96500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0002609121874842945, |
| "loss": 3.4958, |
| "step": 96600 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0002604717324508464, |
| "loss": 3.4457, |
| "step": 96700 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00026003124485347184, |
| "loss": 3.48, |
| "step": 96800 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00025959072606195424, |
| "loss": 3.5188, |
| "step": 96900 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0002591501774461739, |
| "loss": 3.4497, |
| "step": 97000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00025870960037610417, |
| "loss": 3.5086, |
| "step": 97100 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.00025826899622180674, |
| "loss": 3.5139, |
| "step": 97200 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 0.0002578283663534275, |
| "loss": 3.4254, |
| "step": 97300 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00025738771214119224, |
| "loss": 3.5688, |
| "step": 97400 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00025694703495540255, |
| "loss": 3.4497, |
| "step": 97500 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00025650633616643143, |
| "loss": 3.4947, |
| "step": 97600 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00025606561714471915, |
| "loss": 3.4521, |
| "step": 97700 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00025562487926076877, |
| "loss": 3.4697, |
| "step": 97800 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.000255184123885142, |
| "loss": 3.4379, |
| "step": 97900 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.0002547433523884551, |
| "loss": 3.528, |
| "step": 98000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.0002543025661413742, |
| "loss": 3.4807, |
| "step": 98100 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00025386176651461163, |
| "loss": 3.4575, |
| "step": 98200 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 0.00025342095487892097, |
| "loss": 3.4028, |
| "step": 98300 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0002529801326050935, |
| "loss": 3.4966, |
| "step": 98400 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00025253930106395337, |
| "loss": 3.4547, |
| "step": 98500 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00025209846162635343, |
| "loss": 3.4785, |
| "step": 98600 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00025165761566317134, |
| "loss": 3.4614, |
| "step": 98700 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00025121676454530506, |
| "loss": 3.4976, |
| "step": 98800 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0002507759096436684, |
| "loss": 3.49, |
| "step": 98900 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.00025033505232918696, |
| "loss": 3.4812, |
| "step": 99000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0002498941939727939, |
| "loss": 3.4858, |
| "step": 99100 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 0.0002494533359454257, |
| "loss": 3.4524, |
| "step": 99200 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00024901247961801767, |
| "loss": 3.4197, |
| "step": 99300 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00024857162636149983, |
| "loss": 3.4782, |
| "step": 99400 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00024813077754679285, |
| "loss": 3.5059, |
| "step": 99500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00024768993454480335, |
| "loss": 3.5099, |
| "step": 99600 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00024724909872642, |
| "loss": 3.4788, |
| "step": 99700 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00024680827146250915, |
| "loss": 3.4652, |
| "step": 99800 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.0002463674541239104, |
| "loss": 3.4577, |
| "step": 99900 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.00024592664808143264, |
| "loss": 3.4207, |
| "step": 100000 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 0.0002454858547058497, |
| "loss": 3.4953, |
| "step": 100100 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00024504507536789573, |
| "loss": 3.5048, |
| "step": 100200 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0002446043114382615, |
| "loss": 3.4709, |
| "step": 100300 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00024416356428758984, |
| "loss": 3.4563, |
| "step": 100400 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0002437228352864711, |
| "loss": 3.4406, |
| "step": 100500 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00024328212580543963, |
| "loss": 3.4617, |
| "step": 100600 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.0002428414372149687, |
| "loss": 3.5065, |
| "step": 100700 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00024240077088546688, |
| "loss": 3.445, |
| "step": 100800 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00024196012818727334, |
| "loss": 3.469, |
| "step": 100900 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00024151951049065402, |
| "loss": 3.5099, |
| "step": 101000 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 0.00024107891916579674, |
| "loss": 3.5347, |
| "step": 101100 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00024063835558280766, |
| "loss": 3.4266, |
| "step": 101200 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00024019782111170637, |
| "loss": 3.51, |
| "step": 101300 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00023975731712242216, |
| "loss": 3.5066, |
| "step": 101400 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00023931684498478947, |
| "loss": 3.484, |
| "step": 101500 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0002388764060685436, |
| "loss": 3.4547, |
| "step": 101600 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0002384360017433167, |
| "loss": 3.4793, |
| "step": 101700 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00023799563337863314, |
| "loss": 3.4553, |
| "step": 101800 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.0002375553023439056, |
| "loss": 3.498, |
| "step": 101900 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 0.00023711501000843078, |
| "loss": 3.4997, |
| "step": 102000 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00023667475774138491, |
| "loss": 3.4864, |
| "step": 102100 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0002362345469118195, |
| "loss": 3.4955, |
| "step": 102200 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00023579437888865748, |
| "loss": 3.4623, |
| "step": 102300 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0002353542550406884, |
| "loss": 3.491, |
| "step": 102400 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00023491417673656456, |
| "loss": 3.4934, |
| "step": 102500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00023447414534479675, |
| "loss": 3.4448, |
| "step": 102600 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0002340341622337496, |
| "loss": 3.5072, |
| "step": 102700 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0002335942287716379, |
| "loss": 3.4392, |
| "step": 102800 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.00023315434632652162, |
| "loss": 3.5145, |
| "step": 102900 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 0.0002327145162663027, |
| "loss": 3.5082, |
| "step": 103000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.0002322747399587197, |
| "loss": 3.4582, |
| "step": 103100 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.0002318350187713442, |
| "loss": 3.4514, |
| "step": 103200 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.0002313953540715763, |
| "loss": 3.4805, |
| "step": 103300 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00023095574722664053, |
| "loss": 3.464, |
| "step": 103400 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00023051619960358136, |
| "loss": 3.5032, |
| "step": 103500 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00023007671256925928, |
| "loss": 3.4814, |
| "step": 103600 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.00022963728749034632, |
| "loss": 3.4803, |
| "step": 103700 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.0002291979257333217, |
| "loss": 3.5372, |
| "step": 103800 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 0.0002287586286644679, |
| "loss": 3.4892, |
| "step": 103900 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0002283193976498662, |
| "loss": 3.4769, |
| "step": 104000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0002278802340553925, |
| "loss": 3.4673, |
| "step": 104100 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00022744113924671287, |
| "loss": 3.4254, |
| "step": 104200 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0002270021145892797, |
| "loss": 3.4444, |
| "step": 104300 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00022656316144832708, |
| "loss": 3.4923, |
| "step": 104400 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00022612428118886683, |
| "loss": 3.5175, |
| "step": 104500 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00022568547517568395, |
| "loss": 3.4738, |
| "step": 104600 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0002252467447733327, |
| "loss": 3.4461, |
| "step": 104700 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.00022480809134613227, |
| "loss": 3.4521, |
| "step": 104800 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00022436951625816228, |
| "loss": 3.4423, |
| "step": 104900 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00022393102087325884, |
| "loss": 3.5038, |
| "step": 105000 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0002234926065550103, |
| "loss": 3.5472, |
| "step": 105100 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0002230542746667528, |
| "loss": 3.4334, |
| "step": 105200 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0002226160265715662, |
| "loss": 3.4684, |
| "step": 105300 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00022217786363226978, |
| "loss": 3.4708, |
| "step": 105400 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.0002217397872114179, |
| "loss": 3.4876, |
| "step": 105500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00022130179867129606, |
| "loss": 3.4399, |
| "step": 105600 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00022086389937391634, |
| "loss": 3.4705, |
| "step": 105700 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 0.00022042609068101342, |
| "loss": 3.4645, |
| "step": 105800 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00021998837395404013, |
| "loss": 3.4623, |
| "step": 105900 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00021955075055416322, |
| "loss": 3.4495, |
| "step": 106000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00021911322184225957, |
| "loss": 3.4787, |
| "step": 106100 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00021867578917891128, |
| "loss": 3.4702, |
| "step": 106200 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00021823845392440183, |
| "loss": 3.4817, |
| "step": 106300 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.0002178012174387119, |
| "loss": 3.5035, |
| "step": 106400 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00021736408108151496, |
| "loss": 3.4766, |
| "step": 106500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00021692704621217298, |
| "loss": 3.5066, |
| "step": 106600 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 0.00021649011418973266, |
| "loss": 3.5211, |
| "step": 106700 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0002160532863729205, |
| "loss": 3.4887, |
| "step": 106800 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00021561656412013924, |
| "loss": 3.516, |
| "step": 106900 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00021517994878946314, |
| "loss": 3.4538, |
| "step": 107000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00021474344173863424, |
| "loss": 3.4798, |
| "step": 107100 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00021430704432505755, |
| "loss": 3.497, |
| "step": 107200 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00021387075790579735, |
| "loss": 3.5249, |
| "step": 107300 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0002134345838375726, |
| "loss": 3.5129, |
| "step": 107400 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00021299852347675302, |
| "loss": 3.5284, |
| "step": 107500 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.00021256257817935475, |
| "loss": 3.423, |
| "step": 107600 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 0.0002121267493010359, |
| "loss": 3.4436, |
| "step": 107700 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.0002116910381970929, |
| "loss": 3.4876, |
| "step": 107800 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00021125544622245553, |
| "loss": 3.4873, |
| "step": 107900 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00021081997473168344, |
| "loss": 3.4589, |
| "step": 108000 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.0002103846250789615, |
| "loss": 3.4876, |
| "step": 108100 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00020994939861809574, |
| "loss": 3.5278, |
| "step": 108200 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00020951429670250897, |
| "loss": 3.4383, |
| "step": 108300 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.0002090793206852369, |
| "loss": 3.4604, |
| "step": 108400 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.00020864447191892344, |
| "loss": 3.4604, |
| "step": 108500 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 0.0002082097517558171, |
| "loss": 3.4143, |
| "step": 108600 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.0002077751615477664, |
| "loss": 3.4799, |
| "step": 108700 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00020734070264621557, |
| "loss": 3.5034, |
| "step": 108800 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00020690637640220072, |
| "loss": 3.4856, |
| "step": 108900 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00020647218416634512, |
| "loss": 3.4974, |
| "step": 109000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.0002060381272888559, |
| "loss": 3.4974, |
| "step": 109100 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00020560420711951866, |
| "loss": 3.4761, |
| "step": 109200 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00020517042500769426, |
| "loss": 3.4425, |
| "step": 109300 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00020473678230231398, |
| "loss": 3.4528, |
| "step": 109400 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 0.00020430328035187585, |
| "loss": 3.443, |
| "step": 109500 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00020386992050443992, |
| "loss": 3.4533, |
| "step": 109600 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00020343670410762456, |
| "loss": 3.4981, |
| "step": 109700 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00020300363250860205, |
| "loss": 3.4316, |
| "step": 109800 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00020257070705409415, |
| "loss": 3.4337, |
| "step": 109900 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.0002021379290903684, |
| "loss": 3.49, |
| "step": 110000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00020170529996323366, |
| "loss": 3.4601, |
| "step": 110100 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00020127282101803593, |
| "loss": 3.4827, |
| "step": 110200 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00020084049359965402, |
| "loss": 3.4761, |
| "step": 110300 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00020040831905249586, |
| "loss": 3.4625, |
| "step": 110400 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 0.00019997629872049366, |
| "loss": 3.4822, |
| "step": 110500 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00019954443394710035, |
| "loss": 3.5124, |
| "step": 110600 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00019911272607528484, |
| "loss": 3.4623, |
| "step": 110700 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.0001986811764475284, |
| "loss": 3.4704, |
| "step": 110800 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00019824978640582013, |
| "loss": 3.5546, |
| "step": 110900 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00019781855729165265, |
| "loss": 3.4981, |
| "step": 111000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00019738749044601847, |
| "loss": 3.463, |
| "step": 111100 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.0001969565872094053, |
| "loss": 3.4255, |
| "step": 111200 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00019652584892179215, |
| "loss": 3.4782, |
| "step": 111300 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 0.00019609527692264497, |
| "loss": 3.4629, |
| "step": 111400 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00019566487255091278, |
| "loss": 3.4329, |
| "step": 111500 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00019523463714502315, |
| "loss": 3.4519, |
| "step": 111600 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00019480457204287837, |
| "loss": 3.4992, |
| "step": 111700 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.000194374678581851, |
| "loss": 3.5083, |
| "step": 111800 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00019394495809877996, |
| "loss": 3.5005, |
| "step": 111900 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.0001935154119299663, |
| "loss": 3.5134, |
| "step": 112000 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.0001930860414111687, |
| "loss": 3.4555, |
| "step": 112100 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00019265684787760006, |
| "loss": 3.4719, |
| "step": 112200 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.00019222783266392265, |
| "loss": 3.5005, |
| "step": 112300 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 0.0001917989971042443, |
| "loss": 3.4918, |
| "step": 112400 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00019137034253211403, |
| "loss": 3.4612, |
| "step": 112500 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00019094187028051825, |
| "loss": 3.4352, |
| "step": 112600 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00019051358168187618, |
| "loss": 3.4624, |
| "step": 112700 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00019008547806803622, |
| "loss": 3.4301, |
| "step": 112800 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00018965756077027118, |
| "loss": 3.4142, |
| "step": 112900 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00018922983111927484, |
| "loss": 3.4817, |
| "step": 113000 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00018880229044515711, |
| "loss": 3.4376, |
| "step": 113100 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00018837494007744064, |
| "loss": 3.4768, |
| "step": 113200 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 0.00018794778134505587, |
| "loss": 3.4391, |
| "step": 113300 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018752081557633755, |
| "loss": 3.4482, |
| "step": 113400 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018709404409902042, |
| "loss": 3.4682, |
| "step": 113500 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018666746824023476, |
| "loss": 3.4296, |
| "step": 113600 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018624108932650287, |
| "loss": 3.4653, |
| "step": 113700 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018581490868373426, |
| "loss": 3.4661, |
| "step": 113800 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018538892763722225, |
| "loss": 3.497, |
| "step": 113900 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018496314751163917, |
| "loss": 3.4991, |
| "step": 114000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.0001845375696310327, |
| "loss": 3.4538, |
| "step": 114100 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018411219531882156, |
| "loss": 3.5194, |
| "step": 114200 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 0.00018368702589779154, |
| "loss": 3.472, |
| "step": 114300 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00018326206269009106, |
| "loss": 3.4382, |
| "step": 114400 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0001828373070172275, |
| "loss": 3.4555, |
| "step": 114500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0001824127602000626, |
| "loss": 3.4512, |
| "step": 114600 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00018198842355880896, |
| "loss": 3.4754, |
| "step": 114700 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.0001815642984130254, |
| "loss": 3.424, |
| "step": 114800 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00018114038608161299, |
| "loss": 3.4234, |
| "step": 114900 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00018071668788281121, |
| "loss": 3.4654, |
| "step": 115000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00018029320513419334, |
| "loss": 3.4795, |
| "step": 115100 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 0.00017986993915266314, |
| "loss": 3.4652, |
| "step": 115200 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017944689125444992, |
| "loss": 3.4731, |
| "step": 115300 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017902406275510497, |
| "loss": 3.4734, |
| "step": 115400 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017860145496949718, |
| "loss": 3.5131, |
| "step": 115500 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017817906921180926, |
| "loss": 3.4612, |
| "step": 115600 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.0001777569067955333, |
| "loss": 3.466, |
| "step": 115700 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017733496903346704, |
| "loss": 3.4542, |
| "step": 115800 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017691325723770957, |
| "loss": 3.4701, |
| "step": 115900 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017649177271965717, |
| "loss": 3.488, |
| "step": 116000 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 0.00017607051678999945, |
| "loss": 3.5502, |
| "step": 116100 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0001756494907587152, |
| "loss": 3.4421, |
| "step": 116200 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0001752286959350684, |
| "loss": 3.4264, |
| "step": 116300 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00017480813362760373, |
| "loss": 3.4817, |
| "step": 116400 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00017438780514414308, |
| "loss": 3.4756, |
| "step": 116500 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0001739677117917811, |
| "loss": 3.4992, |
| "step": 116600 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00017354785487688139, |
| "loss": 3.4952, |
| "step": 116700 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.000173128235705072, |
| "loss": 3.4887, |
| "step": 116800 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00017270885558124204, |
| "loss": 3.4912, |
| "step": 116900 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.00017228971580953708, |
| "loss": 3.4863, |
| "step": 117000 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 0.0001718708176933551, |
| "loss": 3.4861, |
| "step": 117100 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.000171452162535343, |
| "loss": 3.5107, |
| "step": 117200 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00017103375163739183, |
| "loss": 3.4879, |
| "step": 117300 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00017061558630063317, |
| "loss": 3.4812, |
| "step": 117400 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00017019766782543495, |
| "loss": 3.4672, |
| "step": 117500 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00016977999751139754, |
| "loss": 3.4528, |
| "step": 117600 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.0001693625766573494, |
| "loss": 3.5106, |
| "step": 117700 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00016894540656134345, |
| "loss": 3.4909, |
| "step": 117800 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00016852848852065273, |
| "loss": 3.5071, |
| "step": 117900 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.00016811182383176643, |
| "loss": 3.5112, |
| "step": 118000 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00016769541379038595, |
| "loss": 3.4903, |
| "step": 118100 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.0001672792596914209, |
| "loss": 3.4839, |
| "step": 118200 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00016686336282898485, |
| "loss": 3.4802, |
| "step": 118300 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.0001664477244963914, |
| "loss": 3.4494, |
| "step": 118400 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.0001660323459861504, |
| "loss": 3.4921, |
| "step": 118500 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00016561722858996354, |
| "loss": 3.5153, |
| "step": 118600 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00016520237359872068, |
| "loss": 3.4712, |
| "step": 118700 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00016478778230249544, |
| "loss": 3.4254, |
| "step": 118800 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00016437345599054176, |
| "loss": 3.4655, |
| "step": 118900 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 0.00016395939595128926, |
| "loss": 3.4751, |
| "step": 119000 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00016354560347233972, |
| "loss": 3.4887, |
| "step": 119100 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0001631320798404627, |
| "loss": 3.4719, |
| "step": 119200 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0001627188263415921, |
| "loss": 3.4599, |
| "step": 119300 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00016230584426082134, |
| "loss": 3.4529, |
| "step": 119400 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0001618931348824001, |
| "loss": 3.4712, |
| "step": 119500 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00016148069948972995, |
| "loss": 3.4147, |
| "step": 119600 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0001610685393653604, |
| "loss": 3.4697, |
| "step": 119700 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.00016065665579098503, |
| "loss": 3.522, |
| "step": 119800 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 0.0001602450500474374, |
| "loss": 3.5019, |
| "step": 119900 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00015983372341468716, |
| "loss": 3.5104, |
| "step": 120000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00015942267717183588, |
| "loss": 3.48, |
| "step": 120100 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00015901191259711322, |
| "loss": 3.4844, |
| "step": 120200 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00015860143096787317, |
| "loss": 3.4789, |
| "step": 120300 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00015819123356058961, |
| "loss": 3.4535, |
| "step": 120400 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00015778132165085264, |
| "loss": 3.4639, |
| "step": 120500 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00015737169651336446, |
| "loss": 3.5013, |
| "step": 120600 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.0001569623594219357, |
| "loss": 3.4551, |
| "step": 120700 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 0.00015655331164948107, |
| "loss": 3.4434, |
| "step": 120800 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00015614455446801573, |
| "loss": 3.4543, |
| "step": 120900 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.000155736089148651, |
| "loss": 3.5137, |
| "step": 121000 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00015532791696159078, |
| "loss": 3.5108, |
| "step": 121100 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00015492003917612715, |
| "loss": 3.5088, |
| "step": 121200 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.0001545124570606372, |
| "loss": 3.4808, |
| "step": 121300 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.0001541051718825781, |
| "loss": 3.5011, |
| "step": 121400 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00015369818490848386, |
| "loss": 3.4427, |
| "step": 121500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00015329149740396102, |
| "loss": 3.4482, |
| "step": 121600 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00015288511063368498, |
| "loss": 3.4824, |
| "step": 121700 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 0.00015247902586139583, |
| "loss": 3.4745, |
| "step": 121800 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00015207324434989472, |
| "loss": 3.5294, |
| "step": 121900 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00015166776736103964, |
| "loss": 3.4532, |
| "step": 122000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00015126259615574157, |
| "loss": 3.4671, |
| "step": 122100 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00015085773199396054, |
| "loss": 3.4737, |
| "step": 122200 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00015045317613470206, |
| "loss": 3.4988, |
| "step": 122300 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00015004892983601264, |
| "loss": 3.4739, |
| "step": 122400 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.0001496449943549762, |
| "loss": 3.4934, |
| "step": 122500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00014924137094771017, |
| "loss": 3.5086, |
| "step": 122600 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 0.00014883806086936146, |
| "loss": 3.4656, |
| "step": 122700 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014843506537410274, |
| "loss": 3.4651, |
| "step": 122800 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014803238571512817, |
| "loss": 3.4964, |
| "step": 122900 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014763002314465, |
| "loss": 3.4865, |
| "step": 123000 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014722797891389444, |
| "loss": 3.5156, |
| "step": 123100 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014682625427309753, |
| "loss": 3.4795, |
| "step": 123200 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014642485047150171, |
| "loss": 3.5341, |
| "step": 123300 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014602376875735162, |
| "loss": 3.5358, |
| "step": 123400 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014562301037789028, |
| "loss": 3.4616, |
| "step": 123500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014522257657935533, |
| "loss": 3.4804, |
| "step": 123600 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 0.00014482246860697486, |
| "loss": 3.4295, |
| "step": 123700 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00014442268770496392, |
| "loss": 3.5029, |
| "step": 123800 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00014402323511652045, |
| "loss": 3.4647, |
| "step": 123900 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.0001436241120838214, |
| "loss": 3.4547, |
| "step": 124000 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.000143225319848019, |
| "loss": 3.4493, |
| "step": 124100 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00014282685964923643, |
| "loss": 3.469, |
| "step": 124200 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00014242873272656486, |
| "loss": 3.4242, |
| "step": 124300 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.0001420309403180589, |
| "loss": 3.4473, |
| "step": 124400 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.0001416334836607326, |
| "loss": 3.5084, |
| "step": 124500 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 0.00014123636399055622, |
| "loss": 3.4462, |
| "step": 124600 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00014083958254245215, |
| "loss": 3.5331, |
| "step": 124700 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00014044314055029083, |
| "loss": 3.5069, |
| "step": 124800 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00014004703924688734, |
| "loss": 3.4798, |
| "step": 124900 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00013965127986399688, |
| "loss": 3.468, |
| "step": 125000 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.0001392558636323118, |
| "loss": 3.4743, |
| "step": 125100 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00013886079178145717, |
| "loss": 3.5451, |
| "step": 125200 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00013846606553998716, |
| "loss": 3.4554, |
| "step": 125300 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00013807168613538129, |
| "loss": 3.4819, |
| "step": 125400 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 0.00013767765479404036, |
| "loss": 3.4847, |
| "step": 125500 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00013728397274128293, |
| "loss": 3.4506, |
| "step": 125600 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00013689064120134132, |
| "loss": 3.4751, |
| "step": 125700 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.000136497661397358, |
| "loss": 3.5131, |
| "step": 125800 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00013610503455138123, |
| "loss": 3.4578, |
| "step": 125900 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00013571276188436215, |
| "loss": 3.4376, |
| "step": 126000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00013532084461615035, |
| "loss": 3.4903, |
| "step": 126100 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00013492928396549014, |
| "loss": 3.4624, |
| "step": 126200 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00013453808115001698, |
| "loss": 3.468, |
| "step": 126300 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.00013414723738625352, |
| "loss": 3.4748, |
| "step": 126400 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 0.0001337567538896058, |
| "loss": 3.4878, |
| "step": 126500 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.0001333666318743598, |
| "loss": 3.4782, |
| "step": 126600 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00013297687255367697, |
| "loss": 3.5124, |
| "step": 126700 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00013258747713959125, |
| "loss": 3.4443, |
| "step": 126800 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00013219844684300475, |
| "loss": 3.4409, |
| "step": 126900 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00013180978287368435, |
| "loss": 3.4815, |
| "step": 127000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00013142148644025765, |
| "loss": 3.4525, |
| "step": 127100 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00013103355875020923, |
| "loss": 3.4578, |
| "step": 127200 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.000130646001009877, |
| "loss": 3.4926, |
| "step": 127300 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 0.00013025881442444882, |
| "loss": 3.4594, |
| "step": 127400 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00012987200019795798, |
| "loss": 3.441, |
| "step": 127500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00012948555953327983, |
| "loss": 3.45, |
| "step": 127600 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00012909949363212823, |
| "loss": 3.4696, |
| "step": 127700 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0001287138036950516, |
| "loss": 3.5126, |
| "step": 127800 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00012832849092142917, |
| "loss": 3.4852, |
| "step": 127900 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0001279435565094675, |
| "loss": 3.4656, |
| "step": 128000 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0001275590016561961, |
| "loss": 3.4703, |
| "step": 128100 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00012717482755746467, |
| "loss": 3.469, |
| "step": 128200 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.00012679103540793864, |
| "loss": 3.4687, |
| "step": 128300 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 0.0001264076264010957, |
| "loss": 3.5109, |
| "step": 128400 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012602460172922214, |
| "loss": 3.4951, |
| "step": 128500 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012564196258340904, |
| "loss": 3.5326, |
| "step": 128600 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012525971015354864, |
| "loss": 3.4683, |
| "step": 128700 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012487784562833067, |
| "loss": 3.4843, |
| "step": 128800 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012449637019523832, |
| "loss": 3.5223, |
| "step": 128900 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012411528504054518, |
| "loss": 3.4825, |
| "step": 129000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012373459134931095, |
| "loss": 3.4544, |
| "step": 129100 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012335429030537812, |
| "loss": 3.5195, |
| "step": 129200 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 0.00012297438309136812, |
| "loss": 3.4476, |
| "step": 129300 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.0001225948708886777, |
| "loss": 3.5221, |
| "step": 129400 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.0001222157548774752, |
| "loss": 3.4944, |
| "step": 129500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00012183703623669704, |
| "loss": 3.554, |
| "step": 129600 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00012145871614404383, |
| "loss": 3.4857, |
| "step": 129700 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00012108079577597674, |
| "loss": 3.4504, |
| "step": 129800 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00012070327630771414, |
| "loss": 3.4739, |
| "step": 129900 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.0001203261589132275, |
| "loss": 3.4466, |
| "step": 130000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00011994944476523817, |
| "loss": 3.4815, |
| "step": 130100 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 0.00011957313503521344, |
| "loss": 3.5155, |
| "step": 130200 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0001191972308933627, |
| "loss": 3.4678, |
| "step": 130300 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00011882173350863468, |
| "loss": 3.468, |
| "step": 130400 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00011844664404871281, |
| "loss": 3.4707, |
| "step": 130500 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00011807196368001192, |
| "loss": 3.52, |
| "step": 130600 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0001176976935676749, |
| "loss": 3.5055, |
| "step": 130700 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00011732383487556888, |
| "loss": 3.4498, |
| "step": 130800 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00011695038876628145, |
| "loss": 3.4206, |
| "step": 130900 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00011657735640111742, |
| "loss": 3.4776, |
| "step": 131000 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0001162047389400946, |
| "loss": 3.4434, |
| "step": 131100 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.00011583253754194088, |
| "loss": 3.484, |
| "step": 131200 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011546075336409018, |
| "loss": 3.4894, |
| "step": 131300 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011508938756267933, |
| "loss": 3.483, |
| "step": 131400 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011471844129254359, |
| "loss": 3.4863, |
| "step": 131500 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011434791570721392, |
| "loss": 3.4921, |
| "step": 131600 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011397781195891308, |
| "loss": 3.4906, |
| "step": 131700 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011360813119855193, |
| "loss": 3.5254, |
| "step": 131800 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011323887457572619, |
| "loss": 3.4589, |
| "step": 131900 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011287004323871225, |
| "loss": 3.4672, |
| "step": 132000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 0.00011250163833446433, |
| "loss": 3.4552, |
| "step": 132100 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00011213366100861044, |
| "loss": 3.4868, |
| "step": 132200 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00011176611240544899, |
| "loss": 3.471, |
| "step": 132300 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00011139899366794517, |
| "loss": 3.4567, |
| "step": 132400 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.0001110323059377274, |
| "loss": 3.4666, |
| "step": 132500 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00011066605035508385, |
| "loss": 3.4836, |
| "step": 132600 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.0001103002280589589, |
| "loss": 3.4704, |
| "step": 132700 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010993484018694921, |
| "loss": 3.4604, |
| "step": 132800 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010956988787530092, |
| "loss": 3.4447, |
| "step": 132900 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.0001092053722589055, |
| "loss": 3.4769, |
| "step": 133000 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 0.00010884129447129648, |
| "loss": 3.4205, |
| "step": 133100 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010847765564464593, |
| "loss": 3.4452, |
| "step": 133200 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010811445690976068, |
| "loss": 3.4749, |
| "step": 133300 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010775169939607913, |
| "loss": 3.4909, |
| "step": 133400 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010738938423166778, |
| "loss": 3.4493, |
| "step": 133500 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010702751254321744, |
| "loss": 3.4452, |
| "step": 133600 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010666608545603962, |
| "loss": 3.5166, |
| "step": 133700 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010630510409406355, |
| "loss": 3.513, |
| "step": 133800 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.00010594456957983229, |
| "loss": 3.4369, |
| "step": 133900 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 0.0001055844830344993, |
| "loss": 3.4093, |
| "step": 134000 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010522484557782513, |
| "loss": 3.4877, |
| "step": 134100 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010486565832817354, |
| "loss": 3.5383, |
| "step": 134200 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010450692240250853, |
| "loss": 3.4348, |
| "step": 134300 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.0001041486389163904, |
| "loss": 3.5446, |
| "step": 134400 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010379080898397289, |
| "loss": 3.4961, |
| "step": 134500 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010343343371799885, |
| "loss": 3.4675, |
| "step": 134600 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.0001030765142297975, |
| "loss": 3.4036, |
| "step": 134700 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010272005162928072, |
| "loss": 3.4214, |
| "step": 134800 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 0.00010236404702493967, |
| "loss": 3.5284, |
| "step": 134900 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.000102008501523841, |
| "loss": 3.4838, |
| "step": 135000 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.000101653416231624, |
| "loss": 3.4514, |
| "step": 135100 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00010129879225249666, |
| "loss": 3.45, |
| "step": 135200 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00010094463068923257, |
| "loss": 3.4556, |
| "step": 135300 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00010059093264316724, |
| "loss": 3.4745, |
| "step": 135400 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 0.00010023769921419481, |
| "loss": 3.4483, |
| "step": 135500 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 9.98849315007646e-05, |
| "loss": 3.4857, |
| "step": 135600 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 9.953263059987772e-05, |
| "loss": 3.429, |
| "step": 135700 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 9.918079760708365e-05, |
| "loss": 3.4677, |
| "step": 135800 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 9.882943361647667e-05, |
| "loss": 3.516, |
| "step": 135900 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.847853972069277e-05, |
| "loss": 3.4223, |
| "step": 136000 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.812811701090599e-05, |
| "loss": 3.4708, |
| "step": 136100 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.777816657682523e-05, |
| "loss": 3.4884, |
| "step": 136200 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.742868950669076e-05, |
| "loss": 3.4627, |
| "step": 136300 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.707968688727047e-05, |
| "loss": 3.4592, |
| "step": 136400 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.673115980385744e-05, |
| "loss": 3.5064, |
| "step": 136500 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.638310934026567e-05, |
| "loss": 3.5205, |
| "step": 136600 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.603553657882686e-05, |
| "loss": 3.4942, |
| "step": 136700 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 9.56884426003874e-05, |
| "loss": 3.4722, |
| "step": 136800 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.534182848430484e-05, |
| "loss": 3.4912, |
| "step": 136900 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.49956953084444e-05, |
| "loss": 3.4864, |
| "step": 137000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.465004414917586e-05, |
| "loss": 3.4623, |
| "step": 137100 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.430487608136981e-05, |
| "loss": 3.4253, |
| "step": 137200 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.39601921783948e-05, |
| "loss": 3.4765, |
| "step": 137300 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.36159935121136e-05, |
| "loss": 3.5379, |
| "step": 137400 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.32722811528805e-05, |
| "loss": 3.5132, |
| "step": 137500 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.292905616953681e-05, |
| "loss": 3.4339, |
| "step": 137600 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.258631962940875e-05, |
| "loss": 3.481, |
| "step": 137700 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 9.224407259830347e-05, |
| "loss": 3.425, |
| "step": 137800 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.190231614050592e-05, |
| "loss": 3.4525, |
| "step": 137900 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.156105131877559e-05, |
| "loss": 3.4889, |
| "step": 138000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.122027919434287e-05, |
| "loss": 3.4895, |
| "step": 138100 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.088000082690629e-05, |
| "loss": 3.4183, |
| "step": 138200 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.05402172746288e-05, |
| "loss": 3.4894, |
| "step": 138300 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 9.020092959413473e-05, |
| "loss": 3.4553, |
| "step": 138400 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 8.986213884050629e-05, |
| "loss": 3.4826, |
| "step": 138500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 8.952384606728045e-05, |
| "loss": 3.4792, |
| "step": 138600 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 8.918605232644564e-05, |
| "loss": 3.4568, |
| "step": 138700 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.884875866843844e-05, |
| "loss": 3.4889, |
| "step": 138800 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.851196614214016e-05, |
| "loss": 3.4883, |
| "step": 138900 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.817567579487399e-05, |
| "loss": 3.499, |
| "step": 139000 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.783988867240133e-05, |
| "loss": 3.4862, |
| "step": 139100 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.750460581891877e-05, |
| "loss": 3.4511, |
| "step": 139200 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.716982827705489e-05, |
| "loss": 3.4998, |
| "step": 139300 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.683555708786658e-05, |
| "loss": 3.4064, |
| "step": 139400 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.650179329083629e-05, |
| "loss": 3.4684, |
| "step": 139500 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.616853792386889e-05, |
| "loss": 3.4596, |
| "step": 139600 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 8.583579202328792e-05, |
| "loss": 3.4669, |
| "step": 139700 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 8.550355662383253e-05, |
| "loss": 3.5372, |
| "step": 139800 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 8.517183275865456e-05, |
| "loss": 3.5077, |
| "step": 139900 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 8.484062145931514e-05, |
| "loss": 3.4921, |
| "step": 140000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 8.450992375578137e-05, |
| "loss": 3.4327, |
| "step": 140100 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 8.417974067642336e-05, |
| "loss": 3.5026, |
| "step": 140200 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 8.385007324801066e-05, |
| "loss": 3.4435, |
| "step": 140300 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 8.352092249570953e-05, |
| "loss": 3.4548, |
| "step": 140400 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 8.319228944307933e-05, |
| "loss": 3.4522, |
| "step": 140500 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 8.286417511206992e-05, |
| "loss": 3.4525, |
| "step": 140600 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 8.253658052301751e-05, |
| "loss": 3.4803, |
| "step": 140700 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 8.220950669464253e-05, |
| "loss": 3.4681, |
| "step": 140800 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 8.188295464404577e-05, |
| "loss": 3.5165, |
| "step": 140900 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 8.155692538670568e-05, |
| "loss": 3.4869, |
| "step": 141000 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 8.123141993647456e-05, |
| "loss": 3.4181, |
| "step": 141100 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 8.090643930557625e-05, |
| "loss": 3.4428, |
| "step": 141200 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 8.058198450460239e-05, |
| "loss": 3.4768, |
| "step": 141300 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 8.025805654250942e-05, |
| "loss": 3.4992, |
| "step": 141400 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 7.993465642661555e-05, |
| "loss": 3.4324, |
| "step": 141500 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.961178516259745e-05, |
| "loss": 3.4736, |
| "step": 141600 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.928944375448734e-05, |
| "loss": 3.4966, |
| "step": 141700 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.896763320466968e-05, |
| "loss": 3.4269, |
| "step": 141800 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.864635451387817e-05, |
| "loss": 3.5562, |
| "step": 141900 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.83256086811924e-05, |
| "loss": 3.4452, |
| "step": 142000 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.800539670403514e-05, |
| "loss": 3.465, |
| "step": 142100 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.7685719578169e-05, |
| "loss": 3.4184, |
| "step": 142200 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.73665782976933e-05, |
| "loss": 3.4772, |
| "step": 142300 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.704797385504117e-05, |
| "loss": 3.467, |
| "step": 142400 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 7.6729907240976e-05, |
| "loss": 3.4636, |
| "step": 142500 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.641237944458918e-05, |
| "loss": 3.4184, |
| "step": 142600 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.609539145329622e-05, |
| "loss": 3.4818, |
| "step": 142700 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.57789442528341e-05, |
| "loss": 3.4073, |
| "step": 142800 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.5463038827258e-05, |
| "loss": 3.4903, |
| "step": 142900 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.514767615893844e-05, |
| "loss": 3.482, |
| "step": 143000 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.483285722855815e-05, |
| "loss": 3.4642, |
| "step": 143100 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.4518583015109e-05, |
| "loss": 3.4445, |
| "step": 143200 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.420485449588878e-05, |
| "loss": 3.4651, |
| "step": 143300 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 7.389167264649855e-05, |
| "loss": 3.4787, |
| "step": 143400 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.357903844083924e-05, |
| "loss": 3.4666, |
| "step": 143500 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.326695285110906e-05, |
| "loss": 3.5029, |
| "step": 143600 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.295541684779975e-05, |
| "loss": 3.4557, |
| "step": 143700 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.264443139969432e-05, |
| "loss": 3.4807, |
| "step": 143800 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.23339974738636e-05, |
| "loss": 3.4719, |
| "step": 143900 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.202411603566339e-05, |
| "loss": 3.4985, |
| "step": 144000 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.171478804873152e-05, |
| "loss": 3.4518, |
| "step": 144100 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.140601447498443e-05, |
| "loss": 3.5169, |
| "step": 144200 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.109779627461482e-05, |
| "loss": 3.4492, |
| "step": 144300 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 7.079013440608827e-05, |
| "loss": 3.4762, |
| "step": 144400 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 7.048302982614026e-05, |
| "loss": 3.4617, |
| "step": 144500 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 7.017648348977335e-05, |
| "loss": 3.4682, |
| "step": 144600 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.98704963502541e-05, |
| "loss": 3.4709, |
| "step": 144700 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.95650693591101e-05, |
| "loss": 3.4152, |
| "step": 144800 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.926020346612722e-05, |
| "loss": 3.496, |
| "step": 144900 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.895589961934615e-05, |
| "loss": 3.4792, |
| "step": 145000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.865215876506006e-05, |
| "loss": 3.4939, |
| "step": 145100 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.834898184781135e-05, |
| "loss": 3.4709, |
| "step": 145200 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 6.804636981038867e-05, |
| "loss": 3.4498, |
| "step": 145300 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.774432359382415e-05, |
| "loss": 3.4552, |
| "step": 145400 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.744284413739025e-05, |
| "loss": 3.3939, |
| "step": 145500 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.7141932378597e-05, |
| "loss": 3.449, |
| "step": 145600 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.684158925318931e-05, |
| "loss": 3.4837, |
| "step": 145700 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.654181569514362e-05, |
| "loss": 3.4736, |
| "step": 145800 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.624261263666504e-05, |
| "loss": 3.4677, |
| "step": 145900 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.594398100818483e-05, |
| "loss": 3.4708, |
| "step": 146000 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.564592173835718e-05, |
| "loss": 3.4526, |
| "step": 146100 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 6.534843575405647e-05, |
| "loss": 3.5345, |
| "step": 146200 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.505152398037433e-05, |
| "loss": 3.4945, |
| "step": 146300 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.475518734061667e-05, |
| "loss": 3.4661, |
| "step": 146400 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.445942675630095e-05, |
| "loss": 3.4289, |
| "step": 146500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.416424314715327e-05, |
| "loss": 3.4531, |
| "step": 146600 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.38696374311058e-05, |
| "loss": 3.4917, |
| "step": 146700 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.357561052429305e-05, |
| "loss": 3.3964, |
| "step": 146800 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.328216334105014e-05, |
| "loss": 3.4736, |
| "step": 146900 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.29892967939091e-05, |
| "loss": 3.5101, |
| "step": 147000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.269701179359663e-05, |
| "loss": 3.4323, |
| "step": 147100 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 6.240530924903065e-05, |
| "loss": 3.4316, |
| "step": 147200 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.211419006731808e-05, |
| "loss": 3.4749, |
| "step": 147300 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.182365515375172e-05, |
| "loss": 3.4766, |
| "step": 147400 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.153370541180739e-05, |
| "loss": 3.4461, |
| "step": 147500 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.124434174314131e-05, |
| "loss": 3.5406, |
| "step": 147600 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.0955565047587064e-05, |
| "loss": 3.5152, |
| "step": 147700 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.0667376223153075e-05, |
| "loss": 3.504, |
| "step": 147800 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.03797761660195e-05, |
| "loss": 3.466, |
| "step": 147900 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 6.009276577053582e-05, |
| "loss": 3.4596, |
| "step": 148000 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 5.9806345929217546e-05, |
| "loss": 3.4806, |
| "step": 148100 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.9520517532744015e-05, |
| "loss": 3.4958, |
| "step": 148200 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.923528146995519e-05, |
| "loss": 3.4599, |
| "step": 148300 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.895063862784916e-05, |
| "loss": 3.4378, |
| "step": 148400 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.8666589891579306e-05, |
| "loss": 3.4499, |
| "step": 148500 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.83831361444512e-05, |
| "loss": 3.4667, |
| "step": 148600 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.8100278267920665e-05, |
| "loss": 3.4887, |
| "step": 148700 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.781801714159021e-05, |
| "loss": 3.4164, |
| "step": 148800 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.7536353643206806e-05, |
| "loss": 3.4847, |
| "step": 148900 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.7255288648658754e-05, |
| "loss": 3.43, |
| "step": 149000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 5.6974823031973405e-05, |
| "loss": 3.5193, |
| "step": 149100 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 5.669495766531413e-05, |
| "loss": 3.4507, |
| "step": 149200 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 5.6415693418977844e-05, |
| "loss": 3.494, |
| "step": 149300 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 5.613703116139185e-05, |
| "loss": 3.5268, |
| "step": 149400 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 5.5858971759111756e-05, |
| "loss": 3.4471, |
| "step": 149500 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 5.558151607681827e-05, |
| "loss": 3.4171, |
| "step": 149600 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 5.530466497731501e-05, |
| "loss": 3.4843, |
| "step": 149700 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 5.502841932152511e-05, |
| "loss": 3.4613, |
| "step": 149800 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 5.475277996848921e-05, |
| "loss": 3.5356, |
| "step": 149900 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 5.447774777536249e-05, |
| "loss": 3.4039, |
| "step": 150000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 5.4203323597412066e-05, |
| "loss": 3.4498, |
| "step": 150100 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 5.392950828801435e-05, |
| "loss": 3.4847, |
| "step": 150200 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 5.3656302698652096e-05, |
| "loss": 3.4883, |
| "step": 150300 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 5.3383707678912345e-05, |
| "loss": 3.4473, |
| "step": 150400 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 5.311172407648327e-05, |
| "loss": 3.4113, |
| "step": 150500 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 5.2840352737151766e-05, |
| "loss": 3.4673, |
| "step": 150600 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 5.256959450480078e-05, |
| "loss": 3.4446, |
| "step": 150700 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 5.229945022140667e-05, |
| "loss": 3.4601, |
| "step": 150800 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 5.2029920727036605e-05, |
| "loss": 3.4191, |
| "step": 150900 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 5.1761006859846e-05, |
| "loss": 3.47, |
| "step": 151000 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 5.1492709456075675e-05, |
| "loss": 3.5108, |
| "step": 151100 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 5.1225029350049604e-05, |
| "loss": 3.4592, |
| "step": 151200 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 5.0957967374172134e-05, |
| "loss": 3.4277, |
| "step": 151300 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 5.069152435892535e-05, |
| "loss": 3.4706, |
| "step": 151400 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 5.042570113286668e-05, |
| "loss": 3.4567, |
| "step": 151500 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 5.016049852262591e-05, |
| "loss": 3.4528, |
| "step": 151600 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 4.989591735290328e-05, |
| "loss": 3.4114, |
| "step": 151700 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 4.9631958446466256e-05, |
| "loss": 3.5006, |
| "step": 151800 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 4.936862262414748e-05, |
| "loss": 3.4768, |
| "step": 151900 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.910591070484169e-05, |
| "loss": 3.4693, |
| "step": 152000 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.884382350550368e-05, |
| "loss": 3.447, |
| "step": 152100 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.8582361841145564e-05, |
| "loss": 3.4081, |
| "step": 152200 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.8321526524834156e-05, |
| "loss": 3.4792, |
| "step": 152300 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.806131836768862e-05, |
| "loss": 3.4483, |
| "step": 152400 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.780173817887765e-05, |
| "loss": 3.5346, |
| "step": 152500 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.7542786765617296e-05, |
| "loss": 3.4657, |
| "step": 152600 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.7284464933168235e-05, |
| "loss": 3.4891, |
| "step": 152700 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.702677348483358e-05, |
| "loss": 3.4926, |
| "step": 152800 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.6769713221955723e-05, |
| "loss": 3.4253, |
| "step": 152900 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.6513284943914547e-05, |
| "loss": 3.4591, |
| "step": 153000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.625748944812458e-05, |
| "loss": 3.5055, |
| "step": 153100 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.600232753003267e-05, |
| "loss": 3.4436, |
| "step": 153200 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.574779998311518e-05, |
| "loss": 3.4526, |
| "step": 153300 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.549390759887606e-05, |
| "loss": 3.4926, |
| "step": 153400 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.524065116684392e-05, |
| "loss": 3.4372, |
| "step": 153500 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.498803147456987e-05, |
| "loss": 3.4741, |
| "step": 153600 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.4736049307624817e-05, |
| "loss": 3.4904, |
| "step": 153700 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.448470544959726e-05, |
| "loss": 3.4679, |
| "step": 153800 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.423400068209071e-05, |
| "loss": 3.4663, |
| "step": 153900 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.398393578472132e-05, |
| "loss": 3.5239, |
| "step": 154000 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.3734511535115486e-05, |
| "loss": 3.4588, |
| "step": 154100 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.348572870890718e-05, |
| "loss": 3.5164, |
| "step": 154200 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.323758807973596e-05, |
| "loss": 3.4214, |
| "step": 154300 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.299009041924426e-05, |
| "loss": 3.5172, |
| "step": 154400 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.274323649707509e-05, |
| "loss": 3.5102, |
| "step": 154500 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.249702708086972e-05, |
| "loss": 3.4565, |
| "step": 154600 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 4.225146293626486e-05, |
| "loss": 3.5431, |
| "step": 154700 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.2006544826891065e-05, |
| "loss": 3.4823, |
| "step": 154800 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.176227351436967e-05, |
| "loss": 3.4112, |
| "step": 154900 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.1518649758310766e-05, |
| "loss": 3.468, |
| "step": 155000 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.1275674316310574e-05, |
| "loss": 3.4689, |
| "step": 155100 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.103334794394939e-05, |
| "loss": 3.4525, |
| "step": 155200 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.079167139478909e-05, |
| "loss": 3.4909, |
| "step": 155300 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.055064542037087e-05, |
| "loss": 3.5342, |
| "step": 155400 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.03102707702126e-05, |
| "loss": 3.4856, |
| "step": 155500 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.007054819180692e-05, |
| "loss": 3.4777, |
| "step": 155600 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.983147843061863e-05, |
| "loss": 3.5254, |
| "step": 155700 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.9593062230082685e-05, |
| "loss": 3.4694, |
| "step": 155800 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.935530033160134e-05, |
| "loss": 3.4937, |
| "step": 155900 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.911819347454234e-05, |
| "loss": 3.4633, |
| "step": 156000 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.8881742396236455e-05, |
| "loss": 3.4595, |
| "step": 156100 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.8645947831975145e-05, |
| "loss": 3.4702, |
| "step": 156200 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.841081051500836e-05, |
| "loss": 3.4524, |
| "step": 156300 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.817633117654207e-05, |
| "loss": 3.4967, |
| "step": 156400 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.79425105457363e-05, |
| "loss": 3.4595, |
| "step": 156500 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 3.77093493497026e-05, |
| "loss": 3.4135, |
| "step": 156600 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.74768483135019e-05, |
| "loss": 3.4355, |
| "step": 156700 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.724500816014223e-05, |
| "loss": 3.4815, |
| "step": 156800 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.701382961057648e-05, |
| "loss": 3.4318, |
| "step": 156900 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.678331338370014e-05, |
| "loss": 3.5005, |
| "step": 157000 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.655346019634909e-05, |
| "loss": 3.4748, |
| "step": 157100 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.632427076329739e-05, |
| "loss": 3.4054, |
| "step": 157200 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.609574579725491e-05, |
| "loss": 3.4565, |
| "step": 157300 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.5867886008865315e-05, |
| "loss": 3.4485, |
| "step": 157400 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 3.564069210670379e-05, |
| "loss": 3.4623, |
| "step": 157500 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.541416479727483e-05, |
| "loss": 3.4946, |
| "step": 157600 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.518830478500978e-05, |
| "loss": 3.4598, |
| "step": 157700 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.49631127722653e-05, |
| "loss": 3.4863, |
| "step": 157800 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.473858945932046e-05, |
| "loss": 3.475, |
| "step": 157900 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.451473554437509e-05, |
| "loss": 3.4652, |
| "step": 158000 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.4291551723547146e-05, |
| "loss": 3.4687, |
| "step": 158100 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.4069038690871e-05, |
| "loss": 3.4325, |
| "step": 158200 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.384719713829498e-05, |
| "loss": 3.4336, |
| "step": 158300 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.362602775567935e-05, |
| "loss": 3.4393, |
| "step": 158400 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 3.340553123079421e-05, |
| "loss": 3.5204, |
| "step": 158500 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.3185708249317045e-05, |
| "loss": 3.4819, |
| "step": 158600 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.2966559494830934e-05, |
| "loss": 3.4414, |
| "step": 158700 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.2748085648822506e-05, |
| "loss": 3.4852, |
| "step": 158800 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.2530287390679426e-05, |
| "loss": 3.4863, |
| "step": 158900 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.2313165397688445e-05, |
| "loss": 3.4531, |
| "step": 159000 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.2096720345033445e-05, |
| "loss": 3.4858, |
| "step": 159100 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.188095290579318e-05, |
| "loss": 3.4303, |
| "step": 159200 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.166586375093935e-05, |
| "loss": 3.4155, |
| "step": 159300 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 3.145145354933415e-05, |
| "loss": 3.5033, |
| "step": 159400 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 3.123772296772862e-05, |
| "loss": 3.4965, |
| "step": 159500 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 3.102467267076037e-05, |
| "loss": 3.499, |
| "step": 159600 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 3.0812303320951475e-05, |
| "loss": 3.4457, |
| "step": 159700 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 3.0600615578706524e-05, |
| "loss": 3.4652, |
| "step": 159800 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 3.038961010231048e-05, |
| "loss": 3.4304, |
| "step": 159900 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 3.0179287547926676e-05, |
| "loss": 3.4779, |
| "step": 160000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.996964856959475e-05, |
| "loss": 3.4483, |
| "step": 160100 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.976069381922869e-05, |
| "loss": 3.4724, |
| "step": 160200 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.955242394661456e-05, |
| "loss": 3.4281, |
| "step": 160300 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.9344839599408897e-05, |
| "loss": 3.5002, |
| "step": 160400 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.9137941423136305e-05, |
| "loss": 3.4879, |
| "step": 160500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.8931730061187656e-05, |
| "loss": 3.4045, |
| "step": 160600 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.872620615481808e-05, |
| "loss": 3.4483, |
| "step": 160700 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.8521370343144752e-05, |
| "loss": 3.4465, |
| "step": 160800 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.8317223263145313e-05, |
| "loss": 3.4971, |
| "step": 160900 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.811376554965553e-05, |
| "loss": 3.4879, |
| "step": 161000 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.7910997835367548e-05, |
| "loss": 3.4802, |
| "step": 161100 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.7708920750827565e-05, |
| "loss": 3.4915, |
| "step": 161200 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.750753492443442e-05, |
| "loss": 3.4362, |
| "step": 161300 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.7306840982437215e-05, |
| "loss": 3.4877, |
| "step": 161400 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.710683954893356e-05, |
| "loss": 3.5149, |
| "step": 161500 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.690753124586745e-05, |
| "loss": 3.4533, |
| "step": 161600 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.6708916693027553e-05, |
| "loss": 3.4888, |
| "step": 161700 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.651099650804517e-05, |
| "loss": 3.4699, |
| "step": 161800 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.6313771306392453e-05, |
| "loss": 3.4967, |
| "step": 161900 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.6117241701380052e-05, |
| "loss": 3.4676, |
| "step": 162000 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.592140830415579e-05, |
| "loss": 3.484, |
| "step": 162100 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.5726271723702428e-05, |
| "loss": 3.4728, |
| "step": 162200 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.553183256683578e-05, |
| "loss": 3.4194, |
| "step": 162300 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.533809143820298e-05, |
| "loss": 3.482, |
| "step": 162400 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.5145048940280384e-05, |
| "loss": 3.4789, |
| "step": 162500 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.4952705673371877e-05, |
| "loss": 3.5145, |
| "step": 162600 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.4761062235607007e-05, |
| "loss": 3.4865, |
| "step": 162700 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.4570119222938945e-05, |
| "loss": 3.4171, |
| "step": 162800 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.4379877229142867e-05, |
| "loss": 3.4954, |
| "step": 162900 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.4190336845813928e-05, |
| "loss": 3.4762, |
| "step": 163000 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.400149866236548e-05, |
| "loss": 3.457, |
| "step": 163100 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.3813363266027262e-05, |
| "loss": 3.4829, |
| "step": 163200 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.3625931241843646e-05, |
| "loss": 3.443, |
| "step": 163300 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.3439203172671507e-05, |
| "loss": 3.4499, |
| "step": 163400 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.3253179639178806e-05, |
| "loss": 3.4199, |
| "step": 163500 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.306786121984261e-05, |
| "loss": 3.4471, |
| "step": 163600 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.2883248490947306e-05, |
| "loss": 3.434, |
| "step": 163700 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.2699342026582553e-05, |
| "loss": 3.4878, |
| "step": 163800 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.251614239864211e-05, |
| "loss": 3.5202, |
| "step": 163900 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.2333650176821485e-05, |
| "loss": 3.4719, |
| "step": 164000 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.21518659286164e-05, |
| "loss": 3.4925, |
| "step": 164100 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.1970790219320885e-05, |
| "loss": 3.4056, |
| "step": 164200 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.179042361202582e-05, |
| "loss": 3.48, |
| "step": 164300 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.161076666761688e-05, |
| "loss": 3.4169, |
| "step": 164400 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.1431819944772945e-05, |
| "loss": 3.4857, |
| "step": 164500 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.1253583999964298e-05, |
| "loss": 3.4628, |
| "step": 164600 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.107605938745086e-05, |
| "loss": 3.5136, |
| "step": 164700 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.0899246659280584e-05, |
| "loss": 3.3885, |
| "step": 164800 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.0723146365287743e-05, |
| "loss": 3.4974, |
| "step": 164900 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.0547759053091088e-05, |
| "loss": 3.4751, |
| "step": 165000 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.0373085268092144e-05, |
| "loss": 3.4691, |
| "step": 165100 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.0199125553473696e-05, |
| "loss": 3.4679, |
| "step": 165200 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.0025880450197902e-05, |
| "loss": 3.4301, |
| "step": 165300 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.9853350497004763e-05, |
| "loss": 3.4768, |
| "step": 165400 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.9681536230410386e-05, |
| "loss": 3.4745, |
| "step": 165500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.951043818470516e-05, |
| "loss": 3.4939, |
| "step": 165600 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.9340056891952396e-05, |
| "loss": 3.4786, |
| "step": 165700 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.917039288198652e-05, |
| "loss": 3.4544, |
| "step": 165800 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.9001446682411355e-05, |
| "loss": 3.4916, |
| "step": 165900 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.8833218818598563e-05, |
| "loss": 3.4662, |
| "step": 166000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.866570981368601e-05, |
| "loss": 3.4395, |
| "step": 166100 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.8498920188576187e-05, |
| "loss": 3.4923, |
| "step": 166200 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.833285046193442e-05, |
| "loss": 3.4653, |
| "step": 166300 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.816750115018742e-05, |
| "loss": 3.4962, |
| "step": 166400 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.800287276752166e-05, |
| "loss": 3.4816, |
| "step": 166500 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.783896582588168e-05, |
| "loss": 3.3854, |
| "step": 166600 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.767578083496857e-05, |
| "loss": 3.465, |
| "step": 166700 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.7513318302238486e-05, |
| "loss": 3.4746, |
| "step": 166800 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1.735157873290069e-05, |
| "loss": 3.4732, |
| "step": 166900 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.7190562629916557e-05, |
| "loss": 3.4785, |
| "step": 167000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.7030270493997495e-05, |
| "loss": 3.4685, |
| "step": 167100 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.68707028236037e-05, |
| "loss": 3.4655, |
| "step": 167200 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.6711860114942383e-05, |
| "loss": 3.4566, |
| "step": 167300 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.6553742861966452e-05, |
| "loss": 3.4388, |
| "step": 167400 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.639635155637284e-05, |
| "loss": 3.502, |
| "step": 167500 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.623968668760101e-05, |
| "loss": 3.4313, |
| "step": 167600 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.6083748742831304e-05, |
| "loss": 3.4205, |
| "step": 167700 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.5928538206983755e-05, |
| "loss": 3.4836, |
| "step": 167800 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.577405556271619e-05, |
| "loss": 3.4634, |
| "step": 167900 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.5620301290423135e-05, |
| "loss": 3.4655, |
| "step": 168000 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.546727586823382e-05, |
| "loss": 3.4441, |
| "step": 168100 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.5314979772011155e-05, |
| "loss": 3.4556, |
| "step": 168200 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.5163413475350025e-05, |
| "loss": 3.5384, |
| "step": 168300 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.5012577449575848e-05, |
| "loss": 3.4572, |
| "step": 168400 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.4862472163743146e-05, |
| "loss": 3.4505, |
| "step": 168500 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.471309808463403e-05, |
| "loss": 3.4776, |
| "step": 168600 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.4564455676756766e-05, |
| "loss": 3.5107, |
| "step": 168700 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.4416545402344383e-05, |
| "loss": 3.5006, |
| "step": 168800 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.4269367721353205e-05, |
| "loss": 3.519, |
| "step": 168900 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.4122923091461348e-05, |
| "loss": 3.4775, |
| "step": 169000 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.3977211968067422e-05, |
| "loss": 3.4108, |
| "step": 169100 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.3832234804289023e-05, |
| "loss": 3.498, |
| "step": 169200 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.3687992050961356e-05, |
| "loss": 3.4988, |
| "step": 169300 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.3544484156635923e-05, |
| "loss": 3.4937, |
| "step": 169400 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.34017115675788e-05, |
| "loss": 3.47, |
| "step": 169500 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.3259674727769732e-05, |
| "loss": 3.4696, |
| "step": 169600 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.3118374078900376e-05, |
| "loss": 3.4722, |
| "step": 169700 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.297781006037313e-05, |
| "loss": 3.4794, |
| "step": 169800 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.2837983109299566e-05, |
| "loss": 3.5015, |
| "step": 169900 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.2698893660499394e-05, |
| "loss": 3.4986, |
| "step": 170000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.2560542146498766e-05, |
| "loss": 3.4245, |
| "step": 170100 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.2422928997529142e-05, |
| "loss": 3.4625, |
| "step": 170200 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.2286054641525824e-05, |
| "loss": 3.4951, |
| "step": 170300 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.214991950412675e-05, |
| "loss": 3.4903, |
| "step": 170400 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.2014524008671118e-05, |
| "loss": 3.4828, |
| "step": 170500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.1879868576198049e-05, |
| "loss": 3.4202, |
| "step": 170600 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.1745953625445283e-05, |
| "loss": 3.457, |
| "step": 170700 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.161277957284787e-05, |
| "loss": 3.4711, |
| "step": 170800 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.1480346832536847e-05, |
| "loss": 3.4668, |
| "step": 170900 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.1348655816338176e-05, |
| "loss": 3.4941, |
| "step": 171000 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.1217706933771165e-05, |
| "loss": 3.4354, |
| "step": 171100 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.1087500592047189e-05, |
| "loss": 3.4694, |
| "step": 171200 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.0958037196068693e-05, |
| "loss": 3.4025, |
| "step": 171300 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.0829317148427831e-05, |
| "loss": 3.5235, |
| "step": 171400 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.0701340849404995e-05, |
| "loss": 3.4876, |
| "step": 171500 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.0574108696967955e-05, |
| "loss": 3.5077, |
| "step": 171600 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.0447621086770164e-05, |
| "loss": 3.4487, |
| "step": 171700 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.0321878412149959e-05, |
| "loss": 3.5235, |
| "step": 171800 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.019688106412911e-05, |
| "loss": 3.5167, |
| "step": 171900 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.0072629431411629e-05, |
| "loss": 3.5023, |
| "step": 172000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 9.949123900382578e-06, |
| "loss": 3.4944, |
| "step": 172100 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 9.826364855106851e-06, |
| "loss": 3.4764, |
| "step": 172200 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 9.704352677328055e-06, |
| "loss": 3.4143, |
| "step": 172300 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 9.583087746467212e-06, |
| "loss": 3.478, |
| "step": 172400 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 9.46257043962162e-06, |
| "loss": 3.4686, |
| "step": 172500 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 9.342801131563772e-06, |
| "loss": 3.4824, |
| "step": 172600 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 9.22378019473999e-06, |
| "loss": 3.456, |
| "step": 172700 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 9.105507999269513e-06, |
| "loss": 3.4789, |
| "step": 172800 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 8.98798491294317e-06, |
| "loss": 3.4921, |
| "step": 172900 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 8.871211301222205e-06, |
| "loss": 3.4355, |
| "step": 173000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 8.755187527237362e-06, |
| "loss": 3.4613, |
| "step": 173100 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 8.639913951787537e-06, |
| "loss": 3.4982, |
| "step": 173200 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 8.525390933338761e-06, |
| "loss": 3.498, |
| "step": 173300 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 8.411618828022994e-06, |
| "loss": 3.4502, |
| "step": 173400 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 8.298597989637197e-06, |
| "loss": 3.4695, |
| "step": 173500 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 8.18632876964201e-06, |
| "loss": 3.4537, |
| "step": 173600 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 8.074811517160885e-06, |
| "loss": 3.476, |
| "step": 173700 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.964046578978701e-06, |
| "loss": 3.418, |
| "step": 173800 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.854034299541068e-06, |
| "loss": 3.4262, |
| "step": 173900 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.744775020952887e-06, |
| "loss": 3.4676, |
| "step": 174000 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.636269082977626e-06, |
| "loss": 3.44, |
| "step": 174100 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.528516823035902e-06, |
| "loss": 3.4154, |
| "step": 174200 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.4215185762047385e-06, |
| "loss": 3.4926, |
| "step": 174300 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.315274675216421e-06, |
| "loss": 3.4514, |
| "step": 174400 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 7.2097854504573626e-06, |
| "loss": 3.5559, |
| "step": 174500 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 7.105051229967241e-06, |
| "loss": 3.4448, |
| "step": 174600 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 7.001072339437836e-06, |
| "loss": 3.4527, |
| "step": 174700 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.897849102212083e-06, |
| "loss": 3.4917, |
| "step": 174800 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.795381839283133e-06, |
| "loss": 3.4435, |
| "step": 174900 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.693670869293206e-06, |
| "loss": 3.5062, |
| "step": 175000 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.592716508532742e-06, |
| "loss": 3.4505, |
| "step": 175100 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.492519070939312e-06, |
| "loss": 3.4699, |
| "step": 175200 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.393078868096674e-06, |
| "loss": 3.5317, |
| "step": 175300 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 6.294396209233888e-06, |
| "loss": 3.5148, |
| "step": 175400 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 6.196471401224202e-06, |
| "loss": 3.4562, |
| "step": 175500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 6.0993047485841944e-06, |
| "loss": 3.4535, |
| "step": 175600 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 6.002896553472831e-06, |
| "loss": 3.4372, |
| "step": 175700 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.907247115690489e-06, |
| "loss": 3.4692, |
| "step": 175800 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.812356732678076e-06, |
| "loss": 3.4198, |
| "step": 175900 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.718225699515939e-06, |
| "loss": 3.4826, |
| "step": 176000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.624854308923289e-06, |
| "loss": 3.4803, |
| "step": 176100 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.532242851256891e-06, |
| "loss": 3.4432, |
| "step": 176200 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 5.440391614510487e-06, |
| "loss": 3.4248, |
| "step": 176300 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 5.349300884313624e-06, |
| "loss": 3.4578, |
| "step": 176400 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 5.258970943930991e-06, |
| "loss": 3.4432, |
| "step": 176500 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 5.1694020742614474e-06, |
| "loss": 3.4696, |
| "step": 176600 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 5.080594553837109e-06, |
| "loss": 3.4922, |
| "step": 176700 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 4.992548658822593e-06, |
| "loss": 3.4488, |
| "step": 176800 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 4.905264663014025e-06, |
| "loss": 3.4413, |
| "step": 176900 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 4.818742837838258e-06, |
| "loss": 3.4904, |
| "step": 177000 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 4.732983452352096e-06, |
| "loss": 3.4597, |
| "step": 177100 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 4.647986773241353e-06, |
| "loss": 3.3803, |
| "step": 177200 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 4.563753064819959e-06, |
| "loss": 3.4865, |
| "step": 177300 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 4.480282589029383e-06, |
| "loss": 3.4853, |
| "step": 177400 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 4.397575605437576e-06, |
| "loss": 3.4592, |
| "step": 177500 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 4.315632371238304e-06, |
| "loss": 3.4212, |
| "step": 177600 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 4.234453141250288e-06, |
| "loss": 3.4781, |
| "step": 177700 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 4.154038167916402e-06, |
| "loss": 3.5055, |
| "step": 177800 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 4.074387701302973e-06, |
| "loss": 3.4421, |
| "step": 177900 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 3.995501989098843e-06, |
| "loss": 3.4785, |
| "step": 178000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 3.9173812766148394e-06, |
| "loss": 3.4931, |
| "step": 178100 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 3.840025806782721e-06, |
| "loss": 3.4689, |
| "step": 178200 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.7634358201547035e-06, |
| "loss": 3.4653, |
| "step": 178300 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.6876115549024923e-06, |
| "loss": 3.4776, |
| "step": 178400 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.612553246816669e-06, |
| "loss": 3.4824, |
| "step": 178500 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.538261129305914e-06, |
| "loss": 3.4846, |
| "step": 178600 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.464735433396288e-06, |
| "loss": 3.4451, |
| "step": 178700 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.3919763877304777e-06, |
| "loss": 3.4596, |
| "step": 178800 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.3199842185671903e-06, |
| "loss": 3.4577, |
| "step": 178900 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.248759149780317e-06, |
| "loss": 3.457, |
| "step": 179000 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.1783014028582967e-06, |
| "loss": 3.4635, |
| "step": 179100 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 3.1086111969035048e-06, |
| "loss": 3.5187, |
| "step": 179200 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 3.0396887486313916e-06, |
| "loss": 3.4766, |
| "step": 179300 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.9715342723700133e-06, |
| "loss": 3.4628, |
| "step": 179400 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.9041479800591685e-06, |
| "loss": 3.4998, |
| "step": 179500 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.8375300812499007e-06, |
| "loss": 3.4767, |
| "step": 179600 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.771680783103747e-06, |
| "loss": 3.4474, |
| "step": 179700 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.706600290392186e-06, |
| "loss": 3.4676, |
| "step": 179800 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.642288805495885e-06, |
| "loss": 3.4556, |
| "step": 179900 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.5787465284041188e-06, |
| "loss": 3.4032, |
| "step": 180000 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.5159736567141876e-06, |
| "loss": 3.4813, |
| "step": 180100 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.4539703856308326e-06, |
| "loss": 3.4864, |
| "step": 180200 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.3927369079654313e-06, |
| "loss": 3.4783, |
| "step": 180300 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.332273414135666e-06, |
| "loss": 3.4934, |
| "step": 180400 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.2725800921647164e-06, |
| "loss": 3.4395, |
| "step": 180500 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.213657127680818e-06, |
| "loss": 3.4599, |
| "step": 180600 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.1555047039165944e-06, |
| "loss": 3.482, |
| "step": 180700 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.0981230017085017e-06, |
| "loss": 3.4425, |
| "step": 180800 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.0415121994963314e-06, |
| "loss": 3.4422, |
| "step": 180900 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.9856724733225695e-06, |
| "loss": 3.4536, |
| "step": 181000 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.9306039968319535e-06, |
| "loss": 3.5386, |
| "step": 181100 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.8763069412707778e-06, |
| "loss": 3.5005, |
| "step": 181200 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.822781475486507e-06, |
| "loss": 3.4209, |
| "step": 181300 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.7700277659271625e-06, |
| "loss": 3.4426, |
| "step": 181400 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.7180459766408806e-06, |
| "loss": 3.4718, |
| "step": 181500 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.6668362692753569e-06, |
| "loss": 3.4696, |
| "step": 181600 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.6163988030772347e-06, |
| "loss": 3.4379, |
| "step": 181700 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.5667337348918841e-06, |
| "loss": 3.4895, |
| "step": 181800 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.5178412191626524e-06, |
| "loss": 3.4887, |
| "step": 181900 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.469721407930502e-06, |
| "loss": 3.4375, |
| "step": 182000 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.4223744508334857e-06, |
| "loss": 3.4431, |
| "step": 182100 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.375800495106383e-06, |
| "loss": 3.4643, |
| "step": 182200 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.3299996855801189e-06, |
| "loss": 3.4656, |
| "step": 182300 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.2849721646814306e-06, |
| "loss": 3.4747, |
| "step": 182400 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.2407180724322565e-06, |
| "loss": 3.3942, |
| "step": 182500 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.1972375464494867e-06, |
| "loss": 3.475, |
| "step": 182600 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.154530721944408e-06, |
| "loss": 3.513, |
| "step": 182700 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.11259773172237e-06, |
| "loss": 3.3924, |
| "step": 182800 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.0714387061823427e-06, |
| "loss": 3.4743, |
| "step": 182900 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.031053773316415e-06, |
| "loss": 3.5169, |
| "step": 183000 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 9.914430587095735e-07, |
| "loss": 3.4972, |
| "step": 183100 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 9.52606685539148e-07, |
| "loss": 3.456, |
| "step": 183200 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 9.145447745745883e-07, |
| "loss": 3.4078, |
| "step": 183300 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 8.772574441768821e-07, |
| "loss": 3.4546, |
| "step": 183400 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 8.407448102984161e-07, |
| "loss": 3.4795, |
| "step": 183500 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 8.050069864824483e-07, |
| "loss": 3.4461, |
| "step": 183600 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.700440838628031e-07, |
| "loss": 3.4308, |
| "step": 183700 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.358562111635936e-07, |
| "loss": 3.4372, |
| "step": 183800 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 7.024434746987218e-07, |
| "loss": 3.4851, |
| "step": 183900 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 6.698059783717681e-07, |
| "loss": 3.4375, |
| "step": 184000 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 6.379438236754354e-07, |
| "loss": 3.4974, |
| "step": 184100 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 6.068571096914666e-07, |
| "loss": 3.4536, |
| "step": 184200 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.765459330901446e-07, |
| "loss": 3.4817, |
| "step": 184300 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.470103881300425e-07, |
| "loss": 3.4776, |
| "step": 184400 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 5.182505666578019e-07, |
| "loss": 3.44, |
| "step": 184500 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 4.902665581077991e-07, |
| "loss": 3.4449, |
| "step": 184600 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 4.630584495018408e-07, |
| "loss": 3.5173, |
| "step": 184700 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 4.366263254489411e-07, |
| "loss": 3.4473, |
| "step": 184800 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 4.109702681450167e-07, |
| "loss": 3.442, |
| "step": 184900 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 3.8609035737266486e-07, |
| "loss": 3.436, |
| "step": 185000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 188152, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 5000, |
| "total_flos": 3.0232783618595517e+24, |
| "train_batch_size": 6, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|