| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 100000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001, |
| "grad_norm": 3.9445953369140625, |
| "learning_rate": 2.97e-05, |
| "loss": 6.7008, |
| "num_input_tokens_seen": 6553600, |
| "step": 100, |
| "train_runtime": 61.1942, |
| "train_tokens_per_second": 107095.166 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 0.6828203797340393, |
| "learning_rate": 5.97e-05, |
| "loss": 3.3177, |
| "num_input_tokens_seen": 13107200, |
| "step": 200, |
| "train_runtime": 107.6856, |
| "train_tokens_per_second": 121717.274 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 16.05720329284668, |
| "learning_rate": 8.969999999999998e-05, |
| "loss": 3.0024, |
| "num_input_tokens_seen": 19660800, |
| "step": 300, |
| "train_runtime": 154.3564, |
| "train_tokens_per_second": 127372.748 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 13.74783706665039, |
| "learning_rate": 0.0001197, |
| "loss": 2.6797, |
| "num_input_tokens_seen": 26214400, |
| "step": 400, |
| "train_runtime": 200.698, |
| "train_tokens_per_second": 130616.167 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 12.893468856811523, |
| "learning_rate": 0.00014969999999999998, |
| "loss": 2.4588, |
| "num_input_tokens_seen": 32768000, |
| "step": 500, |
| "train_runtime": 252.1632, |
| "train_tokens_per_second": 129947.566 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 9.021939277648926, |
| "learning_rate": 0.00017969999999999998, |
| "loss": 2.276, |
| "num_input_tokens_seen": 39321600, |
| "step": 600, |
| "train_runtime": 299.2712, |
| "train_tokens_per_second": 131391.184 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 8.669090270996094, |
| "learning_rate": 0.00020969999999999997, |
| "loss": 2.1203, |
| "num_input_tokens_seen": 45875200, |
| "step": 700, |
| "train_runtime": 346.3366, |
| "train_tokens_per_second": 132458.429 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 7.335177898406982, |
| "learning_rate": 0.0002397, |
| "loss": 1.9886, |
| "num_input_tokens_seen": 52428800, |
| "step": 800, |
| "train_runtime": 393.5299, |
| "train_tokens_per_second": 133226.965 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 6.051175117492676, |
| "learning_rate": 0.0002697, |
| "loss": 1.9128, |
| "num_input_tokens_seen": 58982400, |
| "step": 900, |
| "train_runtime": 440.0136, |
| "train_tokens_per_second": 134046.765 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 5.503482818603516, |
| "learning_rate": 0.00029969999999999997, |
| "loss": 1.8296, |
| "num_input_tokens_seen": 65536000, |
| "step": 1000, |
| "train_runtime": 492.2662, |
| "train_tokens_per_second": 133131.222 |
| }, |
| { |
| "epoch": 0.011, |
| "grad_norm": 2.8459227085113525, |
| "learning_rate": 0.00029999925978027874, |
| "loss": 1.779, |
| "num_input_tokens_seen": 72089600, |
| "step": 1100, |
| "train_runtime": 538.0301, |
| "train_tokens_per_second": 133988.032 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 2.292707920074463, |
| "learning_rate": 0.0002999970091452017, |
| "loss": 1.7037, |
| "num_input_tokens_seen": 78643200, |
| "step": 1200, |
| "train_runtime": 585.618, |
| "train_tokens_per_second": 134290.951 |
| }, |
| { |
| "epoch": 0.013, |
| "grad_norm": 3.362025737762451, |
| "learning_rate": 0.00029999324804190795, |
| "loss": 1.6688, |
| "num_input_tokens_seen": 85196800, |
| "step": 1300, |
| "train_runtime": 632.1008, |
| "train_tokens_per_second": 134783.565 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 2.2756998538970947, |
| "learning_rate": 0.0002999879765082716, |
| "loss": 1.6397, |
| "num_input_tokens_seen": 91750400, |
| "step": 1400, |
| "train_runtime": 684.3545, |
| "train_tokens_per_second": 134068.525 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 2.5730831623077393, |
| "learning_rate": 0.000299981194597377, |
| "loss": 1.605, |
| "num_input_tokens_seen": 98304000, |
| "step": 1500, |
| "train_runtime": 730.5087, |
| "train_tokens_per_second": 134569.247 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 1.7514433860778809, |
| "learning_rate": 0.0002999729023775179, |
| "loss": 1.5838, |
| "num_input_tokens_seen": 104857600, |
| "step": 1600, |
| "train_runtime": 781.9407, |
| "train_tokens_per_second": 134099.179 |
| }, |
| { |
| "epoch": 0.017, |
| "grad_norm": 1.8343929052352905, |
| "learning_rate": 0.0002999630999321969, |
| "loss": 1.6037, |
| "num_input_tokens_seen": 111411200, |
| "step": 1700, |
| "train_runtime": 824.7241, |
| "train_tokens_per_second": 135089.057 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 1.5672227144241333, |
| "learning_rate": 0.00029995178736012443, |
| "loss": 1.5627, |
| "num_input_tokens_seen": 117964800, |
| "step": 1800, |
| "train_runtime": 871.9564, |
| "train_tokens_per_second": 135287.497 |
| }, |
| { |
| "epoch": 0.019, |
| "grad_norm": 1.6202061176300049, |
| "learning_rate": 0.0002999389647752181, |
| "loss": 1.5398, |
| "num_input_tokens_seen": 124518400, |
| "step": 1900, |
| "train_runtime": 923.402, |
| "train_tokens_per_second": 134847.439 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 1.5145666599273682, |
| "learning_rate": 0.00029992463230660104, |
| "loss": 1.5389, |
| "num_input_tokens_seen": 131072000, |
| "step": 2000, |
| "train_runtime": 968.9283, |
| "train_tokens_per_second": 135275.229 |
| }, |
| { |
| "epoch": 0.021, |
| "grad_norm": 1.0306257009506226, |
| "learning_rate": 0.00029990879009860117, |
| "loss": 1.5098, |
| "num_input_tokens_seen": 137625600, |
| "step": 2100, |
| "train_runtime": 1020.8371, |
| "train_tokens_per_second": 134816.412 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 2.0710599422454834, |
| "learning_rate": 0.0002998914383107493, |
| "loss": 1.5081, |
| "num_input_tokens_seen": 144179200, |
| "step": 2200, |
| "train_runtime": 1067.2796, |
| "train_tokens_per_second": 135090.368 |
| }, |
| { |
| "epoch": 0.023, |
| "grad_norm": 1.4022581577301025, |
| "learning_rate": 0.0002998725771177778, |
| "loss": 1.521, |
| "num_input_tokens_seen": 150732800, |
| "step": 2300, |
| "train_runtime": 1114.7094, |
| "train_tokens_per_second": 135221.616 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 1.4328904151916504, |
| "learning_rate": 0.00029985220670961847, |
| "loss": 1.4855, |
| "num_input_tokens_seen": 157286400, |
| "step": 2400, |
| "train_runtime": 1160.6217, |
| "train_tokens_per_second": 135519.092 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 1.3760366439819336, |
| "learning_rate": 0.0002998303272914014, |
| "loss": 1.4966, |
| "num_input_tokens_seen": 163840000, |
| "step": 2500, |
| "train_runtime": 1212.6489, |
| "train_tokens_per_second": 135109.18 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 0.9530190825462341, |
| "learning_rate": 0.00029980693908345185, |
| "loss": 1.4795, |
| "num_input_tokens_seen": 170393600, |
| "step": 2600, |
| "train_runtime": 1258.3106, |
| "train_tokens_per_second": 135414.576 |
| }, |
| { |
| "epoch": 0.027, |
| "grad_norm": 0.8715839385986328, |
| "learning_rate": 0.00029978204232128895, |
| "loss": 1.4601, |
| "num_input_tokens_seen": 176947200, |
| "step": 2700, |
| "train_runtime": 1304.6837, |
| "train_tokens_per_second": 135624.597 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 1.1879854202270508, |
| "learning_rate": 0.0002997556372556227, |
| "loss": 1.487, |
| "num_input_tokens_seen": 183500800, |
| "step": 2800, |
| "train_runtime": 1358.2195, |
| "train_tokens_per_second": 135103.938 |
| }, |
| { |
| "epoch": 0.029, |
| "grad_norm": 1.0949848890304565, |
| "learning_rate": 0.0002997277241523519, |
| "loss": 1.4658, |
| "num_input_tokens_seen": 190054400, |
| "step": 2900, |
| "train_runtime": 1404.4203, |
| "train_tokens_per_second": 135325.869 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 1.465809941291809, |
| "learning_rate": 0.00029969830329256125, |
| "loss": 1.4463, |
| "num_input_tokens_seen": 196608000, |
| "step": 3000, |
| "train_runtime": 1451.3838, |
| "train_tokens_per_second": 135462.45 |
| }, |
| { |
| "epoch": 0.031, |
| "grad_norm": 0.9500088095664978, |
| "learning_rate": 0.00029966737497251836, |
| "loss": 1.4533, |
| "num_input_tokens_seen": 203161600, |
| "step": 3100, |
| "train_runtime": 1496.7114, |
| "train_tokens_per_second": 135738.657 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 1.3393683433532715, |
| "learning_rate": 0.0002996349395036711, |
| "loss": 1.4402, |
| "num_input_tokens_seen": 209715200, |
| "step": 3200, |
| "train_runtime": 1549.2536, |
| "train_tokens_per_second": 135365.316 |
| }, |
| { |
| "epoch": 0.033, |
| "grad_norm": 0.7998270988464355, |
| "learning_rate": 0.00029960099721264435, |
| "loss": 1.4467, |
| "num_input_tokens_seen": 216268800, |
| "step": 3300, |
| "train_runtime": 1596.5035, |
| "train_tokens_per_second": 135464.03 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 0.8441318273544312, |
| "learning_rate": 0.0002995655484412365, |
| "loss": 1.4353, |
| "num_input_tokens_seen": 222822400, |
| "step": 3400, |
| "train_runtime": 1642.6114, |
| "train_tokens_per_second": 135651.317 |
| }, |
| { |
| "epoch": 0.035, |
| "grad_norm": 0.7577129006385803, |
| "learning_rate": 0.00029952859354641636, |
| "loss": 1.4253, |
| "num_input_tokens_seen": 229376000, |
| "step": 3500, |
| "train_runtime": 1690.0779, |
| "train_tokens_per_second": 135719.187 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 0.8359817862510681, |
| "learning_rate": 0.00029949013290031924, |
| "loss": 1.4348, |
| "num_input_tokens_seen": 235929600, |
| "step": 3600, |
| "train_runtime": 1736.0232, |
| "train_tokens_per_second": 135902.33 |
| }, |
| { |
| "epoch": 0.037, |
| "grad_norm": 0.7565376162528992, |
| "learning_rate": 0.00029945016689024353, |
| "loss": 1.4114, |
| "num_input_tokens_seen": 242483200, |
| "step": 3700, |
| "train_runtime": 1788.0113, |
| "train_tokens_per_second": 135616.148 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 0.9537010788917542, |
| "learning_rate": 0.0002994086959186464, |
| "loss": 1.4134, |
| "num_input_tokens_seen": 249036800, |
| "step": 3800, |
| "train_runtime": 1835.9254, |
| "train_tokens_per_second": 135646.47 |
| }, |
| { |
| "epoch": 0.039, |
| "grad_norm": 0.8911266922950745, |
| "learning_rate": 0.00029936572040314014, |
| "loss": 1.4224, |
| "num_input_tokens_seen": 255590400, |
| "step": 3900, |
| "train_runtime": 1882.537, |
| "train_tokens_per_second": 135769.123 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.7832906246185303, |
| "learning_rate": 0.0002993212407764877, |
| "loss": 1.4177, |
| "num_input_tokens_seen": 262144000, |
| "step": 4000, |
| "train_runtime": 1928.8118, |
| "train_tokens_per_second": 135909.579 |
| }, |
| { |
| "epoch": 0.041, |
| "grad_norm": 0.8426671624183655, |
| "learning_rate": 0.00029927525748659834, |
| "loss": 1.4194, |
| "num_input_tokens_seen": 268697600, |
| "step": 4100, |
| "train_runtime": 1981.7143, |
| "train_tokens_per_second": 135588.467 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 0.9675344824790955, |
| "learning_rate": 0.0002992277709965234, |
| "loss": 1.4059, |
| "num_input_tokens_seen": 275251200, |
| "step": 4200, |
| "train_runtime": 2027.927, |
| "train_tokens_per_second": 135730.33 |
| }, |
| { |
| "epoch": 0.043, |
| "grad_norm": 1.1866440773010254, |
| "learning_rate": 0.0002991787817844513, |
| "loss": 1.4065, |
| "num_input_tokens_seen": 281804800, |
| "step": 4300, |
| "train_runtime": 2074.708, |
| "train_tokens_per_second": 135828.659 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 0.8417257070541382, |
| "learning_rate": 0.0002991282903437028, |
| "loss": 1.397, |
| "num_input_tokens_seen": 288358400, |
| "step": 4400, |
| "train_runtime": 2126.0513, |
| "train_tokens_per_second": 135630.972 |
| }, |
| { |
| "epoch": 0.045, |
| "grad_norm": 0.8226633071899414, |
| "learning_rate": 0.0002990762971827262, |
| "loss": 1.3996, |
| "num_input_tokens_seen": 294912000, |
| "step": 4500, |
| "train_runtime": 2172.3837, |
| "train_tokens_per_second": 135755.024 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 0.8411224484443665, |
| "learning_rate": 0.00029902280282509197, |
| "loss": 1.4002, |
| "num_input_tokens_seen": 301465600, |
| "step": 4600, |
| "train_runtime": 2220.1775, |
| "train_tokens_per_second": 135784.456 |
| }, |
| { |
| "epoch": 0.047, |
| "grad_norm": 0.7082719802856445, |
| "learning_rate": 0.0002989678078094878, |
| "loss": 1.3804, |
| "num_input_tokens_seen": 308019200, |
| "step": 4700, |
| "train_runtime": 2266.6848, |
| "train_tokens_per_second": 135889.739 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.7628137469291687, |
| "learning_rate": 0.00029891131268971284, |
| "loss": 1.3795, |
| "num_input_tokens_seen": 314572800, |
| "step": 4800, |
| "train_runtime": 2318.5885, |
| "train_tokens_per_second": 135674.269 |
| }, |
| { |
| "epoch": 0.049, |
| "grad_norm": 0.7231079936027527, |
| "learning_rate": 0.0002988533180346723, |
| "loss": 1.3789, |
| "num_input_tokens_seen": 321126400, |
| "step": 4900, |
| "train_runtime": 2364.3453, |
| "train_tokens_per_second": 135820.432 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.7210503816604614, |
| "learning_rate": 0.0002987938244283717, |
| "loss": 1.3641, |
| "num_input_tokens_seen": 327680000, |
| "step": 5000, |
| "train_runtime": 2410.3286, |
| "train_tokens_per_second": 135948.267 |
| }, |
| { |
| "epoch": 0.051, |
| "grad_norm": 0.729364275932312, |
| "learning_rate": 0.00029873283246991105, |
| "loss": 1.3756, |
| "num_input_tokens_seen": 334233600, |
| "step": 5100, |
| "train_runtime": 2458.4762, |
| "train_tokens_per_second": 135951.532 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 0.7513293027877808, |
| "learning_rate": 0.0002986703427734787, |
| "loss": 1.3778, |
| "num_input_tokens_seen": 340787200, |
| "step": 5200, |
| "train_runtime": 2506.9032, |
| "train_tokens_per_second": 135939.511 |
| }, |
| { |
| "epoch": 0.053, |
| "grad_norm": 0.7382386326789856, |
| "learning_rate": 0.00029860635596834517, |
| "loss": 1.3807, |
| "num_input_tokens_seen": 347340800, |
| "step": 5300, |
| "train_runtime": 2559.5035, |
| "train_tokens_per_second": 135706.321 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 0.5869194269180298, |
| "learning_rate": 0.0002985408726988569, |
| "loss": 1.3695, |
| "num_input_tokens_seen": 353894400, |
| "step": 5400, |
| "train_runtime": 2605.4484, |
| "train_tokens_per_second": 135828.598 |
| }, |
| { |
| "epoch": 0.055, |
| "grad_norm": 0.7805973291397095, |
| "learning_rate": 0.0002984738936244296, |
| "loss": 1.3746, |
| "num_input_tokens_seen": 360448000, |
| "step": 5500, |
| "train_runtime": 2655.8515, |
| "train_tokens_per_second": 135718.431 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 0.6918448209762573, |
| "learning_rate": 0.0002984054194195419, |
| "loss": 1.3855, |
| "num_input_tokens_seen": 367001600, |
| "step": 5600, |
| "train_runtime": 2703.0299, |
| "train_tokens_per_second": 135774.155 |
| }, |
| { |
| "epoch": 0.057, |
| "grad_norm": 0.6129201054573059, |
| "learning_rate": 0.0002983354507737283, |
| "loss": 1.3816, |
| "num_input_tokens_seen": 373555200, |
| "step": 5700, |
| "train_runtime": 2750.071, |
| "train_tokens_per_second": 135834.747 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 0.7457948923110962, |
| "learning_rate": 0.00029826398839157215, |
| "loss": 1.3748, |
| "num_input_tokens_seen": 380108800, |
| "step": 5800, |
| "train_runtime": 2795.4164, |
| "train_tokens_per_second": 135975.735 |
| }, |
| { |
| "epoch": 0.059, |
| "grad_norm": 0.6171481013298035, |
| "learning_rate": 0.000298191032992699, |
| "loss": 1.3725, |
| "num_input_tokens_seen": 386662400, |
| "step": 5900, |
| "train_runtime": 2842.5021, |
| "train_tokens_per_second": 136028.889 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.6233596205711365, |
| "learning_rate": 0.0002981165853117688, |
| "loss": 1.3624, |
| "num_input_tokens_seen": 393216000, |
| "step": 6000, |
| "train_runtime": 2892.8273, |
| "train_tokens_per_second": 135927.922 |
| }, |
| { |
| "epoch": 0.061, |
| "grad_norm": 0.5645745396614075, |
| "learning_rate": 0.000298040646098469, |
| "loss": 1.356, |
| "num_input_tokens_seen": 399769600, |
| "step": 6100, |
| "train_runtime": 2940.1153, |
| "train_tokens_per_second": 135970.721 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 0.6580554246902466, |
| "learning_rate": 0.0002979632161175064, |
| "loss": 1.3627, |
| "num_input_tokens_seen": 406323200, |
| "step": 6200, |
| "train_runtime": 2986.9073, |
| "train_tokens_per_second": 136034.754 |
| }, |
| { |
| "epoch": 0.063, |
| "grad_norm": 0.6815545558929443, |
| "learning_rate": 0.0002978842961486003, |
| "loss": 1.3562, |
| "num_input_tokens_seen": 412876800, |
| "step": 6300, |
| "train_runtime": 3038.4238, |
| "train_tokens_per_second": 135885.191 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.9602898955345154, |
| "learning_rate": 0.0002978038869864738, |
| "loss": 1.3562, |
| "num_input_tokens_seen": 419430400, |
| "step": 6400, |
| "train_runtime": 3085.1228, |
| "train_tokens_per_second": 135952.578 |
| }, |
| { |
| "epoch": 0.065, |
| "grad_norm": 0.7086384892463684, |
| "learning_rate": 0.0002977219894408463, |
| "loss": 1.3579, |
| "num_input_tokens_seen": 425984000, |
| "step": 6500, |
| "train_runtime": 3130.8346, |
| "train_tokens_per_second": 136060.844 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 0.5864439010620117, |
| "learning_rate": 0.0002976386043364251, |
| "loss": 1.3563, |
| "num_input_tokens_seen": 432537600, |
| "step": 6600, |
| "train_runtime": 3182.4893, |
| "train_tokens_per_second": 135911.72 |
| }, |
| { |
| "epoch": 0.067, |
| "grad_norm": 0.6041991114616394, |
| "learning_rate": 0.00029755373251289733, |
| "loss": 1.3753, |
| "num_input_tokens_seen": 439091200, |
| "step": 6700, |
| "train_runtime": 3229.4118, |
| "train_tokens_per_second": 135966.308 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 0.7153160572052002, |
| "learning_rate": 0.0002974673748249213, |
| "loss": 1.3475, |
| "num_input_tokens_seen": 445644800, |
| "step": 6800, |
| "train_runtime": 3276.7034, |
| "train_tokens_per_second": 136004.008 |
| }, |
| { |
| "epoch": 0.069, |
| "grad_norm": 0.5409119725227356, |
| "learning_rate": 0.00029737953214211804, |
| "loss": 1.3464, |
| "num_input_tokens_seen": 452198400, |
| "step": 6900, |
| "train_runtime": 3324.3119, |
| "train_tokens_per_second": 136027.67 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.6369441151618958, |
| "learning_rate": 0.0002972902053490623, |
| "loss": 1.3546, |
| "num_input_tokens_seen": 458752000, |
| "step": 7000, |
| "train_runtime": 3370.6322, |
| "train_tokens_per_second": 136102.657 |
| }, |
| { |
| "epoch": 0.071, |
| "grad_norm": 0.8589248061180115, |
| "learning_rate": 0.00029719939534527393, |
| "loss": 1.3479, |
| "num_input_tokens_seen": 465305600, |
| "step": 7100, |
| "train_runtime": 3424.7139, |
| "train_tokens_per_second": 135867.0 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 0.8014613389968872, |
| "learning_rate": 0.00029710710304520866, |
| "loss": 1.3667, |
| "num_input_tokens_seen": 471859200, |
| "step": 7200, |
| "train_runtime": 3472.985, |
| "train_tokens_per_second": 135865.601 |
| }, |
| { |
| "epoch": 0.073, |
| "grad_norm": 0.5970280766487122, |
| "learning_rate": 0.00029701332937824885, |
| "loss": 1.3423, |
| "num_input_tokens_seen": 478412800, |
| "step": 7300, |
| "train_runtime": 3519.3052, |
| "train_tokens_per_second": 135939.558 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 0.6963617205619812, |
| "learning_rate": 0.0002969180752886944, |
| "loss": 1.3443, |
| "num_input_tokens_seen": 484966400, |
| "step": 7400, |
| "train_runtime": 3565.8739, |
| "train_tokens_per_second": 136002.118 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.5769393444061279, |
| "learning_rate": 0.0002968213417357529, |
| "loss": 1.3576, |
| "num_input_tokens_seen": 491520000, |
| "step": 7500, |
| "train_runtime": 3611.5043, |
| "train_tokens_per_second": 136098.411 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 0.5492929816246033, |
| "learning_rate": 0.00029672312969353015, |
| "loss": 1.3422, |
| "num_input_tokens_seen": 498073600, |
| "step": 7600, |
| "train_runtime": 3664.3633, |
| "train_tokens_per_second": 135923.642 |
| }, |
| { |
| "epoch": 0.077, |
| "grad_norm": 0.8065637946128845, |
| "learning_rate": 0.00029662344015102027, |
| "loss": 1.3395, |
| "num_input_tokens_seen": 504627200, |
| "step": 7700, |
| "train_runtime": 3711.2689, |
| "train_tokens_per_second": 135971.608 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 0.552871584892273, |
| "learning_rate": 0.00029652227411209594, |
| "loss": 1.3427, |
| "num_input_tokens_seen": 511180800, |
| "step": 7800, |
| "train_runtime": 3758.1209, |
| "train_tokens_per_second": 136020.319 |
| }, |
| { |
| "epoch": 0.079, |
| "grad_norm": 0.6378001570701599, |
| "learning_rate": 0.0002964196325954979, |
| "loss": 1.3339, |
| "num_input_tokens_seen": 517734400, |
| "step": 7900, |
| "train_runtime": 3804.2295, |
| "train_tokens_per_second": 136094.417 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.6196131706237793, |
| "learning_rate": 0.0002963155166348253, |
| "loss": 1.341, |
| "num_input_tokens_seen": 524288000, |
| "step": 8000, |
| "train_runtime": 3855.6562, |
| "train_tokens_per_second": 135978.93 |
| }, |
| { |
| "epoch": 0.081, |
| "grad_norm": 0.5841253399848938, |
| "learning_rate": 0.0002962099272785246, |
| "loss": 1.3366, |
| "num_input_tokens_seen": 530841600, |
| "step": 8100, |
| "train_runtime": 3903.5348, |
| "train_tokens_per_second": 135989.977 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 0.5912770628929138, |
| "learning_rate": 0.0002961028655898794, |
| "loss": 1.3417, |
| "num_input_tokens_seen": 537395200, |
| "step": 8200, |
| "train_runtime": 3951.3698, |
| "train_tokens_per_second": 136002.255 |
| }, |
| { |
| "epoch": 0.083, |
| "grad_norm": 0.5480249524116516, |
| "learning_rate": 0.0002959943326469998, |
| "loss": 1.3419, |
| "num_input_tokens_seen": 543948800, |
| "step": 8300, |
| "train_runtime": 3997.3554, |
| "train_tokens_per_second": 136077.166 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 0.49880343675613403, |
| "learning_rate": 0.0002958843295428112, |
| "loss": 1.3165, |
| "num_input_tokens_seen": 550502400, |
| "step": 8400, |
| "train_runtime": 4044.3967, |
| "train_tokens_per_second": 136114.838 |
| }, |
| { |
| "epoch": 0.085, |
| "grad_norm": 0.5670176148414612, |
| "learning_rate": 0.0002957728573850438, |
| "loss": 1.3314, |
| "num_input_tokens_seen": 557056000, |
| "step": 8500, |
| "train_runtime": 4095.7201, |
| "train_tokens_per_second": 136009.294 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 2.3274426460266113, |
| "learning_rate": 0.0002956599172962209, |
| "loss": 1.3323, |
| "num_input_tokens_seen": 563609600, |
| "step": 8600, |
| "train_runtime": 4143.1443, |
| "train_tokens_per_second": 136034.268 |
| }, |
| { |
| "epoch": 0.087, |
| "grad_norm": 0.7660558819770813, |
| "learning_rate": 0.0002955455104136479, |
| "loss": 1.3382, |
| "num_input_tokens_seen": 570163200, |
| "step": 8700, |
| "train_runtime": 4190.7065, |
| "train_tokens_per_second": 136054.194 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 0.5114762783050537, |
| "learning_rate": 0.00029542963788940096, |
| "loss": 1.3252, |
| "num_input_tokens_seen": 576716800, |
| "step": 8800, |
| "train_runtime": 4237.8545, |
| "train_tokens_per_second": 136086.974 |
| }, |
| { |
| "epoch": 0.089, |
| "grad_norm": 0.6698548197746277, |
| "learning_rate": 0.00029531230089031505, |
| "loss": 1.3449, |
| "num_input_tokens_seen": 583270400, |
| "step": 8900, |
| "train_runtime": 4285.2299, |
| "train_tokens_per_second": 136111.81 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.5562598705291748, |
| "learning_rate": 0.0002951935005979724, |
| "loss": 1.3204, |
| "num_input_tokens_seen": 589824000, |
| "step": 9000, |
| "train_runtime": 4336.4907, |
| "train_tokens_per_second": 136014.126 |
| }, |
| { |
| "epoch": 0.091, |
| "grad_norm": 0.6327181458473206, |
| "learning_rate": 0.0002950732382086907, |
| "loss": 1.3178, |
| "num_input_tokens_seen": 596377600, |
| "step": 9100, |
| "train_runtime": 4383.0811, |
| "train_tokens_per_second": 136063.555 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 0.6857426166534424, |
| "learning_rate": 0.0002949515149335108, |
| "loss": 1.3332, |
| "num_input_tokens_seen": 602931200, |
| "step": 9200, |
| "train_runtime": 4431.4231, |
| "train_tokens_per_second": 136058.142 |
| }, |
| { |
| "epoch": 0.093, |
| "grad_norm": 0.6040679812431335, |
| "learning_rate": 0.0002948283319981848, |
| "loss": 1.307, |
| "num_input_tokens_seen": 609484800, |
| "step": 9300, |
| "train_runtime": 4478.1663, |
| "train_tokens_per_second": 136101.423 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 1.0060901641845703, |
| "learning_rate": 0.00029470369064316354, |
| "loss": 1.3108, |
| "num_input_tokens_seen": 616038400, |
| "step": 9400, |
| "train_runtime": 4524.7167, |
| "train_tokens_per_second": 136149.607 |
| }, |
| { |
| "epoch": 0.095, |
| "grad_norm": 0.504460871219635, |
| "learning_rate": 0.00029457759212358397, |
| "loss": 1.3169, |
| "num_input_tokens_seen": 622592000, |
| "step": 9500, |
| "train_runtime": 4575.869, |
| "train_tokens_per_second": 136059.84 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.5062097907066345, |
| "learning_rate": 0.00029445003770925686, |
| "loss": 1.3137, |
| "num_input_tokens_seen": 629145600, |
| "step": 9600, |
| "train_runtime": 4621.4422, |
| "train_tokens_per_second": 136136.203 |
| }, |
| { |
| "epoch": 0.097, |
| "grad_norm": 0.5388786792755127, |
| "learning_rate": 0.00029432102868465367, |
| "loss": 1.3128, |
| "num_input_tokens_seen": 635699200, |
| "step": 9700, |
| "train_runtime": 4668.6149, |
| "train_tokens_per_second": 136164.411 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 0.5705980062484741, |
| "learning_rate": 0.0002941905663488939, |
| "loss": 1.3065, |
| "num_input_tokens_seen": 642252800, |
| "step": 9800, |
| "train_runtime": 4715.2389, |
| "train_tokens_per_second": 136207.903 |
| }, |
| { |
| "epoch": 0.099, |
| "grad_norm": 0.5500839352607727, |
| "learning_rate": 0.0002940586520157318, |
| "loss": 1.3222, |
| "num_input_tokens_seen": 648806400, |
| "step": 9900, |
| "train_runtime": 4767.1995, |
| "train_tokens_per_second": 136098.019 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.5740068554878235, |
| "learning_rate": 0.00029392528701354325, |
| "loss": 1.3173, |
| "num_input_tokens_seen": 655360000, |
| "step": 10000, |
| "train_runtime": 4814.2762, |
| "train_tokens_per_second": 136128.458 |
| }, |
| { |
| "epoch": 0.101, |
| "grad_norm": 0.47691279649734497, |
| "learning_rate": 0.00029379047268531243, |
| "loss": 1.3084, |
| "num_input_tokens_seen": 661913600, |
| "step": 10100, |
| "train_runtime": 4861.0919, |
| "train_tokens_per_second": 136165.622 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 0.5993319153785706, |
| "learning_rate": 0.00029365421038861795, |
| "loss": 1.3299, |
| "num_input_tokens_seen": 668467200, |
| "step": 10200, |
| "train_runtime": 4908.6949, |
| "train_tokens_per_second": 136180.229 |
| }, |
| { |
| "epoch": 0.103, |
| "grad_norm": 0.556516170501709, |
| "learning_rate": 0.0002935165014956198, |
| "loss": 1.316, |
| "num_input_tokens_seen": 675020800, |
| "step": 10300, |
| "train_runtime": 4956.5309, |
| "train_tokens_per_second": 136188.156 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 0.6757346391677856, |
| "learning_rate": 0.0002933773473930448, |
| "loss": 1.3048, |
| "num_input_tokens_seen": 681574400, |
| "step": 10400, |
| "train_runtime": 5003.7965, |
| "train_tokens_per_second": 136211.454 |
| }, |
| { |
| "epoch": 0.105, |
| "grad_norm": 0.9610360860824585, |
| "learning_rate": 0.0002932367494821734, |
| "loss": 1.3043, |
| "num_input_tokens_seen": 688128000, |
| "step": 10500, |
| "train_runtime": 5050.8058, |
| "train_tokens_per_second": 136241.232 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 0.5780071020126343, |
| "learning_rate": 0.00029309470917882497, |
| "loss": 1.3015, |
| "num_input_tokens_seen": 694681600, |
| "step": 10600, |
| "train_runtime": 5104.0171, |
| "train_tokens_per_second": 136104.873 |
| }, |
| { |
| "epoch": 0.107, |
| "grad_norm": 0.6387894749641418, |
| "learning_rate": 0.0002929512279133437, |
| "loss": 1.3342, |
| "num_input_tokens_seen": 701235200, |
| "step": 10700, |
| "train_runtime": 5151.2508, |
| "train_tokens_per_second": 136129.112 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 0.48744165897369385, |
| "learning_rate": 0.0002928063071305844, |
| "loss": 1.2999, |
| "num_input_tokens_seen": 707788800, |
| "step": 10800, |
| "train_runtime": 5198.4813, |
| "train_tokens_per_second": 136152.995 |
| }, |
| { |
| "epoch": 0.109, |
| "grad_norm": 0.5223510265350342, |
| "learning_rate": 0.0002926599482898978, |
| "loss": 1.2996, |
| "num_input_tokens_seen": 714342400, |
| "step": 10900, |
| "train_runtime": 5244.0735, |
| "train_tokens_per_second": 136218.99 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.6020687222480774, |
| "learning_rate": 0.00029251215286511573, |
| "loss": 1.3029, |
| "num_input_tokens_seen": 720896000, |
| "step": 11000, |
| "train_runtime": 5291.0983, |
| "train_tokens_per_second": 136246.948 |
| }, |
| { |
| "epoch": 0.111, |
| "grad_norm": 0.5317751169204712, |
| "learning_rate": 0.00029236292234453647, |
| "loss": 1.316, |
| "num_input_tokens_seen": 727449600, |
| "step": 11100, |
| "train_runtime": 5342.4851, |
| "train_tokens_per_second": 136163.15 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 1.2369730472564697, |
| "learning_rate": 0.0002922122582309097, |
| "loss": 1.298, |
| "num_input_tokens_seen": 734003200, |
| "step": 11200, |
| "train_runtime": 5391.0041, |
| "train_tokens_per_second": 136153.338 |
| }, |
| { |
| "epoch": 0.113, |
| "grad_norm": 0.5294257998466492, |
| "learning_rate": 0.0002920601620414215, |
| "loss": 1.316, |
| "num_input_tokens_seen": 740556800, |
| "step": 11300, |
| "train_runtime": 5437.8422, |
| "train_tokens_per_second": 136185.784 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 0.5318885445594788, |
| "learning_rate": 0.0002919066353076786, |
| "loss": 1.2993, |
| "num_input_tokens_seen": 747110400, |
| "step": 11400, |
| "train_runtime": 5484.1183, |
| "train_tokens_per_second": 136231.635 |
| }, |
| { |
| "epoch": 0.115, |
| "grad_norm": 0.5208443403244019, |
| "learning_rate": 0.00029175167957569366, |
| "loss": 1.3066, |
| "num_input_tokens_seen": 753664000, |
| "step": 11500, |
| "train_runtime": 5531.5155, |
| "train_tokens_per_second": 136249.099 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 0.5068408250808716, |
| "learning_rate": 0.0002915952964058691, |
| "loss": 1.3041, |
| "num_input_tokens_seen": 760217600, |
| "step": 11600, |
| "train_runtime": 5578.6188, |
| "train_tokens_per_second": 136273.445 |
| }, |
| { |
| "epoch": 0.117, |
| "grad_norm": 0.6206523776054382, |
| "learning_rate": 0.00029143748737298173, |
| "loss": 1.3061, |
| "num_input_tokens_seen": 766771200, |
| "step": 11700, |
| "train_runtime": 5631.31, |
| "train_tokens_per_second": 136162.136 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 0.5741725564002991, |
| "learning_rate": 0.00029127825406616677, |
| "loss": 1.3097, |
| "num_input_tokens_seen": 773324800, |
| "step": 11800, |
| "train_runtime": 5678.817, |
| "train_tokens_per_second": 136177.096 |
| }, |
| { |
| "epoch": 0.119, |
| "grad_norm": 0.5251154899597168, |
| "learning_rate": 0.0002911175980889019, |
| "loss": 1.3054, |
| "num_input_tokens_seen": 779878400, |
| "step": 11900, |
| "train_runtime": 5725.8659, |
| "train_tokens_per_second": 136202.701 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.4509083032608032, |
| "learning_rate": 0.00029095552105899095, |
| "loss": 1.301, |
| "num_input_tokens_seen": 786432000, |
| "step": 12000, |
| "train_runtime": 5772.0962, |
| "train_tokens_per_second": 136247.211 |
| }, |
| { |
| "epoch": 0.121, |
| "grad_norm": 0.4560108184814453, |
| "learning_rate": 0.0002907920246085478, |
| "loss": 1.2981, |
| "num_input_tokens_seen": 792985600, |
| "step": 12100, |
| "train_runtime": 5817.8977, |
| "train_tokens_per_second": 136301.056 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 1.227121114730835, |
| "learning_rate": 0.00029062711038397996, |
| "loss": 1.302, |
| "num_input_tokens_seen": 799539200, |
| "step": 12200, |
| "train_runtime": 5870.3451, |
| "train_tokens_per_second": 136199.693 |
| }, |
| { |
| "epoch": 0.123, |
| "grad_norm": 0.4861258864402771, |
| "learning_rate": 0.00029046078004597175, |
| "loss": 1.318, |
| "num_input_tokens_seen": 806092800, |
| "step": 12300, |
| "train_runtime": 5916.8489, |
| "train_tokens_per_second": 136236.84 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 0.9702387452125549, |
| "learning_rate": 0.00029029303526946796, |
| "loss": 1.2869, |
| "num_input_tokens_seen": 812646400, |
| "step": 12400, |
| "train_runtime": 5964.0243, |
| "train_tokens_per_second": 136258.063 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.4712119400501251, |
| "learning_rate": 0.0002901238777436565, |
| "loss": 1.2924, |
| "num_input_tokens_seen": 819200000, |
| "step": 12500, |
| "train_runtime": 6009.6089, |
| "train_tokens_per_second": 136315.026 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 0.4670332372188568, |
| "learning_rate": 0.00028995330917195184, |
| "loss": 1.2942, |
| "num_input_tokens_seen": 825753600, |
| "step": 12600, |
| "train_runtime": 6061.3166, |
| "train_tokens_per_second": 136233.371 |
| }, |
| { |
| "epoch": 0.127, |
| "grad_norm": 0.4821685552597046, |
| "learning_rate": 0.00028978133127197765, |
| "loss": 1.2856, |
| "num_input_tokens_seen": 832307200, |
| "step": 12700, |
| "train_runtime": 6108.5206, |
| "train_tokens_per_second": 136253.481 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.5634518265724182, |
| "learning_rate": 0.0002896079457755493, |
| "loss": 1.2982, |
| "num_input_tokens_seen": 838860800, |
| "step": 12800, |
| "train_runtime": 6155.2503, |
| "train_tokens_per_second": 136283.785 |
| }, |
| { |
| "epoch": 0.129, |
| "grad_norm": 0.45673057436943054, |
| "learning_rate": 0.000289433154428657, |
| "loss": 1.2997, |
| "num_input_tokens_seen": 845414400, |
| "step": 12900, |
| "train_runtime": 6202.1106, |
| "train_tokens_per_second": 136310.758 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.4386661648750305, |
| "learning_rate": 0.0002892569589914476, |
| "loss": 1.2985, |
| "num_input_tokens_seen": 851968000, |
| "step": 13000, |
| "train_runtime": 6249.4681, |
| "train_tokens_per_second": 136326.482 |
| }, |
| { |
| "epoch": 0.131, |
| "grad_norm": 0.4749270975589752, |
| "learning_rate": 0.0002890793612382072, |
| "loss": 1.2946, |
| "num_input_tokens_seen": 858521600, |
| "step": 13100, |
| "train_runtime": 6301.6638, |
| "train_tokens_per_second": 136237.291 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 0.5405780673027039, |
| "learning_rate": 0.0002889003629573432, |
| "loss": 1.2857, |
| "num_input_tokens_seen": 865075200, |
| "step": 13200, |
| "train_runtime": 6349.664, |
| "train_tokens_per_second": 136239.523 |
| }, |
| { |
| "epoch": 0.133, |
| "grad_norm": 0.4045722782611847, |
| "learning_rate": 0.00028871996595136626, |
| "loss": 1.3009, |
| "num_input_tokens_seen": 871628800, |
| "step": 13300, |
| "train_runtime": 6396.2349, |
| "train_tokens_per_second": 136272.169 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 0.5851114392280579, |
| "learning_rate": 0.0002885381720368723, |
| "loss": 1.3026, |
| "num_input_tokens_seen": 878182400, |
| "step": 13400, |
| "train_runtime": 6442.8884, |
| "train_tokens_per_second": 136302.594 |
| }, |
| { |
| "epoch": 0.135, |
| "grad_norm": 0.5135608315467834, |
| "learning_rate": 0.000288354983044524, |
| "loss": 1.2778, |
| "num_input_tokens_seen": 884736000, |
| "step": 13500, |
| "train_runtime": 6489.2417, |
| "train_tokens_per_second": 136338.889 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.4828953742980957, |
| "learning_rate": 0.00028817040081903245, |
| "loss": 1.2864, |
| "num_input_tokens_seen": 891289600, |
| "step": 13600, |
| "train_runtime": 6540.9813, |
| "train_tokens_per_second": 136262.368 |
| }, |
| { |
| "epoch": 0.137, |
| "grad_norm": 0.5756350755691528, |
| "learning_rate": 0.00028798442721913867, |
| "loss": 1.2858, |
| "num_input_tokens_seen": 897843200, |
| "step": 13700, |
| "train_runtime": 6588.3179, |
| "train_tokens_per_second": 136278.063 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 0.5231483578681946, |
| "learning_rate": 0.00028779706411759465, |
| "loss": 1.282, |
| "num_input_tokens_seen": 904396800, |
| "step": 13800, |
| "train_runtime": 6635.0521, |
| "train_tokens_per_second": 136305.909 |
| }, |
| { |
| "epoch": 0.139, |
| "grad_norm": 0.5475858449935913, |
| "learning_rate": 0.00028760831340114484, |
| "loss": 1.2797, |
| "num_input_tokens_seen": 910950400, |
| "step": 13900, |
| "train_runtime": 6681.4731, |
| "train_tokens_per_second": 136339.754 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.7064163684844971, |
| "learning_rate": 0.00028741817697050683, |
| "loss": 1.2927, |
| "num_input_tokens_seen": 917504000, |
| "step": 14000, |
| "train_runtime": 6730.4553, |
| "train_tokens_per_second": 136321.238 |
| }, |
| { |
| "epoch": 0.141, |
| "grad_norm": 0.5267386436462402, |
| "learning_rate": 0.00028722665674035233, |
| "loss": 1.2815, |
| "num_input_tokens_seen": 924057600, |
| "step": 14100, |
| "train_runtime": 6782.7717, |
| "train_tokens_per_second": 136235.987 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 0.5816136598587036, |
| "learning_rate": 0.0002870337546392879, |
| "loss": 1.2983, |
| "num_input_tokens_seen": 930611200, |
| "step": 14200, |
| "train_runtime": 6829.7567, |
| "train_tokens_per_second": 136258.323 |
| }, |
| { |
| "epoch": 0.143, |
| "grad_norm": 0.4982451796531677, |
| "learning_rate": 0.00028683947260983576, |
| "loss": 1.3026, |
| "num_input_tokens_seen": 937164800, |
| "step": 14300, |
| "train_runtime": 6877.8163, |
| "train_tokens_per_second": 136259.063 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.49408379197120667, |
| "learning_rate": 0.00028664381260841356, |
| "loss": 1.2869, |
| "num_input_tokens_seen": 943718400, |
| "step": 14400, |
| "train_runtime": 6923.5994, |
| "train_tokens_per_second": 136304.593 |
| }, |
| { |
| "epoch": 0.145, |
| "grad_norm": 0.4885796904563904, |
| "learning_rate": 0.0002864467766053154, |
| "loss": 1.2768, |
| "num_input_tokens_seen": 950272000, |
| "step": 14500, |
| "train_runtime": 6969.9199, |
| "train_tokens_per_second": 136339.014 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 0.5424348711967468, |
| "learning_rate": 0.00028624836658469165, |
| "loss": 1.2806, |
| "num_input_tokens_seen": 956825600, |
| "step": 14600, |
| "train_runtime": 7020.7829, |
| "train_tokens_per_second": 136284.743 |
| }, |
| { |
| "epoch": 0.147, |
| "grad_norm": 0.4333992898464203, |
| "learning_rate": 0.00028604858454452906, |
| "loss": 1.2776, |
| "num_input_tokens_seen": 963379200, |
| "step": 14700, |
| "train_runtime": 7066.7012, |
| "train_tokens_per_second": 136326.58 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 1.3118066787719727, |
| "learning_rate": 0.00028584743249663057, |
| "loss": 1.3039, |
| "num_input_tokens_seen": 969932800, |
| "step": 14800, |
| "train_runtime": 7115.8691, |
| "train_tokens_per_second": 136305.6 |
| }, |
| { |
| "epoch": 0.149, |
| "grad_norm": 0.5320950150489807, |
| "learning_rate": 0.000285644912466595, |
| "loss": 1.2801, |
| "num_input_tokens_seen": 976486400, |
| "step": 14900, |
| "train_runtime": 7162.6662, |
| "train_tokens_per_second": 136330.016 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.6902542114257812, |
| "learning_rate": 0.00028544102649379684, |
| "loss": 1.2832, |
| "num_input_tokens_seen": 983040000, |
| "step": 15000, |
| "train_runtime": 7209.6657, |
| "train_tokens_per_second": 136350.29 |
| }, |
| { |
| "epoch": 0.151, |
| "grad_norm": 0.544683039188385, |
| "learning_rate": 0.00028523577663136556, |
| "loss": 1.2948, |
| "num_input_tokens_seen": 989593600, |
| "step": 15100, |
| "train_runtime": 7261.0326, |
| "train_tokens_per_second": 136288.275 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.500091552734375, |
| "learning_rate": 0.000285029164946165, |
| "loss": 1.2746, |
| "num_input_tokens_seen": 996147200, |
| "step": 15200, |
| "train_runtime": 7306.6445, |
| "train_tokens_per_second": 136334.427 |
| }, |
| { |
| "epoch": 0.153, |
| "grad_norm": 0.4995329678058624, |
| "learning_rate": 0.0002848211935187725, |
| "loss": 1.2893, |
| "num_input_tokens_seen": 1002700800, |
| "step": 15300, |
| "train_runtime": 7353.2711, |
| "train_tokens_per_second": 136361.19 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 0.42985284328460693, |
| "learning_rate": 0.0002846118644434581, |
| "loss": 1.3077, |
| "num_input_tokens_seen": 1009254400, |
| "step": 15400, |
| "train_runtime": 7400.7889, |
| "train_tokens_per_second": 136371.192 |
| }, |
| { |
| "epoch": 0.155, |
| "grad_norm": 0.4847468137741089, |
| "learning_rate": 0.00028440117982816326, |
| "loss": 1.2723, |
| "num_input_tokens_seen": 1015808000, |
| "step": 15500, |
| "train_runtime": 7452.7433, |
| "train_tokens_per_second": 136299.877 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 0.47867411375045776, |
| "learning_rate": 0.0002841891417944796, |
| "loss": 1.2754, |
| "num_input_tokens_seen": 1022361600, |
| "step": 15600, |
| "train_runtime": 7498.8195, |
| "train_tokens_per_second": 136336.339 |
| }, |
| { |
| "epoch": 0.157, |
| "grad_norm": 0.43365904688835144, |
| "learning_rate": 0.0002839757524776279, |
| "loss": 1.2737, |
| "num_input_tokens_seen": 1028915200, |
| "step": 15700, |
| "train_runtime": 7545.0284, |
| "train_tokens_per_second": 136369.957 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 0.5739541053771973, |
| "learning_rate": 0.0002837610140264361, |
| "loss": 1.286, |
| "num_input_tokens_seen": 1035468800, |
| "step": 15800, |
| "train_runtime": 7597.8039, |
| "train_tokens_per_second": 136285.275 |
| }, |
| { |
| "epoch": 0.159, |
| "grad_norm": 0.4836307168006897, |
| "learning_rate": 0.0002835449286033182, |
| "loss": 1.2779, |
| "num_input_tokens_seen": 1042022400, |
| "step": 15900, |
| "train_runtime": 7643.6023, |
| "train_tokens_per_second": 136326.088 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.5712729692459106, |
| "learning_rate": 0.0002833274983842518, |
| "loss": 1.2702, |
| "num_input_tokens_seen": 1048576000, |
| "step": 16000, |
| "train_runtime": 7691.0096, |
| "train_tokens_per_second": 136337.887 |
| }, |
| { |
| "epoch": 0.161, |
| "grad_norm": 0.48568034172058105, |
| "learning_rate": 0.0002831087255587569, |
| "loss": 1.2696, |
| "num_input_tokens_seen": 1055129600, |
| "step": 16100, |
| "train_runtime": 7737.6132, |
| "train_tokens_per_second": 136363.705 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 0.5240116715431213, |
| "learning_rate": 0.0002828886123298734, |
| "loss": 1.2636, |
| "num_input_tokens_seen": 1061683200, |
| "step": 16200, |
| "train_runtime": 7790.0975, |
| "train_tokens_per_second": 136286.253 |
| }, |
| { |
| "epoch": 0.163, |
| "grad_norm": 0.4505080580711365, |
| "learning_rate": 0.00028266716091413906, |
| "loss": 1.2679, |
| "num_input_tokens_seen": 1068236800, |
| "step": 16300, |
| "train_runtime": 7837.0156, |
| "train_tokens_per_second": 136306.581 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 0.38184958696365356, |
| "learning_rate": 0.0002824443735415673, |
| "loss": 1.2801, |
| "num_input_tokens_seen": 1074790400, |
| "step": 16400, |
| "train_runtime": 7884.0198, |
| "train_tokens_per_second": 136325.178 |
| }, |
| { |
| "epoch": 0.165, |
| "grad_norm": 0.860382616519928, |
| "learning_rate": 0.0002822202524556243, |
| "loss": 1.2737, |
| "num_input_tokens_seen": 1081344000, |
| "step": 16500, |
| "train_runtime": 7930.486, |
| "train_tokens_per_second": 136352.803 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 0.771594226360321, |
| "learning_rate": 0.00028199479991320695, |
| "loss": 1.2876, |
| "num_input_tokens_seen": 1087897600, |
| "step": 16600, |
| "train_runtime": 7977.0943, |
| "train_tokens_per_second": 136377.678 |
| }, |
| { |
| "epoch": 0.167, |
| "grad_norm": 0.4533759653568268, |
| "learning_rate": 0.00028176801818461994, |
| "loss": 1.2769, |
| "num_input_tokens_seen": 1094451200, |
| "step": 16700, |
| "train_runtime": 8024.6165, |
| "train_tokens_per_second": 136386.73 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.548772394657135, |
| "learning_rate": 0.00028153990955355273, |
| "loss": 1.2647, |
| "num_input_tokens_seen": 1101004800, |
| "step": 16800, |
| "train_runtime": 8077.0632, |
| "train_tokens_per_second": 136312.515 |
| }, |
| { |
| "epoch": 0.169, |
| "grad_norm": 0.5390068888664246, |
| "learning_rate": 0.00028131047631705665, |
| "loss": 1.2799, |
| "num_input_tokens_seen": 1107558400, |
| "step": 16900, |
| "train_runtime": 8123.3347, |
| "train_tokens_per_second": 136342.824 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.4429817795753479, |
| "learning_rate": 0.00028107972078552187, |
| "loss": 1.2727, |
| "num_input_tokens_seen": 1114112000, |
| "step": 17000, |
| "train_runtime": 8169.0719, |
| "train_tokens_per_second": 136381.71 |
| }, |
| { |
| "epoch": 0.171, |
| "grad_norm": 0.6212127208709717, |
| "learning_rate": 0.0002808476452826541, |
| "loss": 1.2743, |
| "num_input_tokens_seen": 1120665600, |
| "step": 17100, |
| "train_runtime": 8217.1136, |
| "train_tokens_per_second": 136381.904 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 0.44569867849349976, |
| "learning_rate": 0.00028061425214545094, |
| "loss": 1.2628, |
| "num_input_tokens_seen": 1127219200, |
| "step": 17200, |
| "train_runtime": 8268.2495, |
| "train_tokens_per_second": 136331.057 |
| }, |
| { |
| "epoch": 0.173, |
| "grad_norm": 0.5025371313095093, |
| "learning_rate": 0.00028037954372417883, |
| "loss": 1.2651, |
| "num_input_tokens_seen": 1133772800, |
| "step": 17300, |
| "train_runtime": 8315.4333, |
| "train_tokens_per_second": 136345.607 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 0.5257975459098816, |
| "learning_rate": 0.0002801435223823488, |
| "loss": 1.2701, |
| "num_input_tokens_seen": 1140326400, |
| "step": 17400, |
| "train_runtime": 8361.8666, |
| "train_tokens_per_second": 136372.23 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.6858969926834106, |
| "learning_rate": 0.00027990619049669336, |
| "loss": 1.2759, |
| "num_input_tokens_seen": 1146880000, |
| "step": 17500, |
| "train_runtime": 8408.7431, |
| "train_tokens_per_second": 136391.371 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.5586578845977783, |
| "learning_rate": 0.00027966755045714177, |
| "loss": 1.2782, |
| "num_input_tokens_seen": 1153433600, |
| "step": 17600, |
| "train_runtime": 8455.5155, |
| "train_tokens_per_second": 136411.978 |
| }, |
| { |
| "epoch": 0.177, |
| "grad_norm": 0.583242654800415, |
| "learning_rate": 0.00027942760466679673, |
| "loss": 1.287, |
| "num_input_tokens_seen": 1159987200, |
| "step": 17700, |
| "train_runtime": 8508.2754, |
| "train_tokens_per_second": 136336.349 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 0.5521747469902039, |
| "learning_rate": 0.00027918635554190956, |
| "loss": 1.2704, |
| "num_input_tokens_seen": 1166540800, |
| "step": 17800, |
| "train_runtime": 8555.5497, |
| "train_tokens_per_second": 136349.018 |
| }, |
| { |
| "epoch": 0.179, |
| "grad_norm": 0.6325215697288513, |
| "learning_rate": 0.00027894380551185636, |
| "loss": 1.2912, |
| "num_input_tokens_seen": 1173094400, |
| "step": 17900, |
| "train_runtime": 8602.3857, |
| "train_tokens_per_second": 136368.495 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.44643789529800415, |
| "learning_rate": 0.00027869995701911314, |
| "loss": 1.2762, |
| "num_input_tokens_seen": 1179648000, |
| "step": 18000, |
| "train_runtime": 8649.7648, |
| "train_tokens_per_second": 136379.2 |
| }, |
| { |
| "epoch": 0.181, |
| "grad_norm": 0.49556615948677063, |
| "learning_rate": 0.0002784548125192316, |
| "loss": 1.2577, |
| "num_input_tokens_seen": 1186201600, |
| "step": 18100, |
| "train_runtime": 8701.0558, |
| "train_tokens_per_second": 136328.467 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 0.5336231589317322, |
| "learning_rate": 0.0002782083744808141, |
| "loss": 1.2629, |
| "num_input_tokens_seen": 1192755200, |
| "step": 18200, |
| "train_runtime": 8748.3794, |
| "train_tokens_per_second": 136340.131 |
| }, |
| { |
| "epoch": 0.183, |
| "grad_norm": 0.3993295431137085, |
| "learning_rate": 0.000277960645385489, |
| "loss": 1.2621, |
| "num_input_tokens_seen": 1199308800, |
| "step": 18300, |
| "train_runtime": 8795.9903, |
| "train_tokens_per_second": 136347.217 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.5608197450637817, |
| "learning_rate": 0.00027771162772788544, |
| "loss": 1.2746, |
| "num_input_tokens_seen": 1205862400, |
| "step": 18400, |
| "train_runtime": 8844.0918, |
| "train_tokens_per_second": 136346.663 |
| }, |
| { |
| "epoch": 0.185, |
| "grad_norm": 0.5299677848815918, |
| "learning_rate": 0.00027746132401560857, |
| "loss": 1.2608, |
| "num_input_tokens_seen": 1212416000, |
| "step": 18500, |
| "train_runtime": 8890.974, |
| "train_tokens_per_second": 136364.812 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 0.5247559547424316, |
| "learning_rate": 0.0002772097367692139, |
| "loss": 1.2628, |
| "num_input_tokens_seen": 1218969600, |
| "step": 18600, |
| "train_runtime": 8937.3092, |
| "train_tokens_per_second": 136391.119 |
| }, |
| { |
| "epoch": 0.187, |
| "grad_norm": 0.4991471469402313, |
| "learning_rate": 0.00027695686852218226, |
| "loss": 1.2617, |
| "num_input_tokens_seen": 1225523200, |
| "step": 18700, |
| "train_runtime": 8984.1463, |
| "train_tokens_per_second": 136409.532 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 0.4922790229320526, |
| "learning_rate": 0.00027670272182089416, |
| "loss": 1.277, |
| "num_input_tokens_seen": 1232076800, |
| "step": 18800, |
| "train_runtime": 9036.4876, |
| "train_tokens_per_second": 136344.656 |
| }, |
| { |
| "epoch": 0.189, |
| "grad_norm": 0.49377188086509705, |
| "learning_rate": 0.0002764472992246039, |
| "loss": 1.2767, |
| "num_input_tokens_seen": 1238630400, |
| "step": 18900, |
| "train_runtime": 9084.3866, |
| "train_tokens_per_second": 136347.169 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.6417357921600342, |
| "learning_rate": 0.0002761906033054143, |
| "loss": 1.2616, |
| "num_input_tokens_seen": 1245184000, |
| "step": 19000, |
| "train_runtime": 9130.7221, |
| "train_tokens_per_second": 136373.004 |
| }, |
| { |
| "epoch": 0.191, |
| "grad_norm": 0.44580140709877014, |
| "learning_rate": 0.00027593263664825045, |
| "loss": 1.2686, |
| "num_input_tokens_seen": 1251737600, |
| "step": 19100, |
| "train_runtime": 9176.6051, |
| "train_tokens_per_second": 136405.303 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.5867856740951538, |
| "learning_rate": 0.00027567340185083363, |
| "loss": 1.2638, |
| "num_input_tokens_seen": 1258291200, |
| "step": 19200, |
| "train_runtime": 9229.719, |
| "train_tokens_per_second": 136330.391 |
| }, |
| { |
| "epoch": 0.193, |
| "grad_norm": 0.4900195896625519, |
| "learning_rate": 0.00027541290152365537, |
| "loss": 1.263, |
| "num_input_tokens_seen": 1264844800, |
| "step": 19300, |
| "train_runtime": 9276.2421, |
| "train_tokens_per_second": 136353.147 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 0.49572521448135376, |
| "learning_rate": 0.00027515113828995117, |
| "loss": 1.273, |
| "num_input_tokens_seen": 1271398400, |
| "step": 19400, |
| "train_runtime": 9323.5363, |
| "train_tokens_per_second": 136364.396 |
| }, |
| { |
| "epoch": 0.195, |
| "grad_norm": 0.440213680267334, |
| "learning_rate": 0.00027488811478567374, |
| "loss": 1.2657, |
| "num_input_tokens_seen": 1277952000, |
| "step": 19500, |
| "train_runtime": 9371.4717, |
| "train_tokens_per_second": 136366.201 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 0.5604475736618042, |
| "learning_rate": 0.0002746238336594671, |
| "loss": 1.2619, |
| "num_input_tokens_seen": 1284505600, |
| "step": 19600, |
| "train_runtime": 9417.129, |
| "train_tokens_per_second": 136400.978 |
| }, |
| { |
| "epoch": 0.197, |
| "grad_norm": 0.45344123244285583, |
| "learning_rate": 0.00027435829757263894, |
| "loss": 1.2573, |
| "num_input_tokens_seen": 1291059200, |
| "step": 19700, |
| "train_runtime": 9468.5748, |
| "train_tokens_per_second": 136352.009 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 0.7260287404060364, |
| "learning_rate": 0.0002740915091991349, |
| "loss": 1.2668, |
| "num_input_tokens_seen": 1297612800, |
| "step": 19800, |
| "train_runtime": 9515.3702, |
| "train_tokens_per_second": 136370.186 |
| }, |
| { |
| "epoch": 0.199, |
| "grad_norm": 0.47865310311317444, |
| "learning_rate": 0.0002738234712255109, |
| "loss": 1.2674, |
| "num_input_tokens_seen": 1304166400, |
| "step": 19900, |
| "train_runtime": 9562.0606, |
| "train_tokens_per_second": 136389.682 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.8422930240631104, |
| "learning_rate": 0.00027355418635090635, |
| "loss": 1.2671, |
| "num_input_tokens_seen": 1310720000, |
| "step": 20000, |
| "train_runtime": 9614.8867, |
| "train_tokens_per_second": 136321.939 |
| }, |
| { |
| "epoch": 0.201, |
| "grad_norm": 0.8500565886497498, |
| "learning_rate": 0.000273283657287017, |
| "loss": 1.2722, |
| "num_input_tokens_seen": 1317273600, |
| "step": 20100, |
| "train_runtime": 9662.5316, |
| "train_tokens_per_second": 136327.999 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 0.4511219263076782, |
| "learning_rate": 0.00027301188675806745, |
| "loss": 1.257, |
| "num_input_tokens_seen": 1323827200, |
| "step": 20200, |
| "train_runtime": 9710.3614, |
| "train_tokens_per_second": 136331.404 |
| }, |
| { |
| "epoch": 0.203, |
| "grad_norm": 0.6040441393852234, |
| "learning_rate": 0.0002727388775007839, |
| "loss": 1.2787, |
| "num_input_tokens_seen": 1330380800, |
| "step": 20300, |
| "train_runtime": 9757.2415, |
| "train_tokens_per_second": 136348.045 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 0.531548798084259, |
| "learning_rate": 0.0002724646322643666, |
| "loss": 1.2567, |
| "num_input_tokens_seen": 1336934400, |
| "step": 20400, |
| "train_runtime": 9803.907, |
| "train_tokens_per_second": 136367.512 |
| }, |
| { |
| "epoch": 0.205, |
| "grad_norm": 0.5128377079963684, |
| "learning_rate": 0.000272189153810462, |
| "loss": 1.2634, |
| "num_input_tokens_seen": 1343488000, |
| "step": 20500, |
| "train_runtime": 9849.6975, |
| "train_tokens_per_second": 136398.909 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 0.5763120651245117, |
| "learning_rate": 0.0002719124449131351, |
| "loss": 1.2708, |
| "num_input_tokens_seen": 1350041600, |
| "step": 20600, |
| "train_runtime": 9902.5747, |
| "train_tokens_per_second": 136332.382 |
| }, |
| { |
| "epoch": 0.207, |
| "grad_norm": 0.5266316533088684, |
| "learning_rate": 0.00027163450835884144, |
| "loss": 1.2579, |
| "num_input_tokens_seen": 1356595200, |
| "step": 20700, |
| "train_runtime": 9950.4471, |
| "train_tokens_per_second": 136335.1 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.6279749274253845, |
| "learning_rate": 0.00027135534694639894, |
| "loss": 1.2566, |
| "num_input_tokens_seen": 1363148800, |
| "step": 20800, |
| "train_runtime": 9997.0613, |
| "train_tokens_per_second": 136354.951 |
| }, |
| { |
| "epoch": 0.209, |
| "grad_norm": 0.5421542525291443, |
| "learning_rate": 0.00027107496348696003, |
| "loss": 1.2687, |
| "num_input_tokens_seen": 1369702400, |
| "step": 20900, |
| "train_runtime": 10044.3146, |
| "train_tokens_per_second": 136365.939 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.5376498699188232, |
| "learning_rate": 0.00027079336080398296, |
| "loss": 1.2772, |
| "num_input_tokens_seen": 1376256000, |
| "step": 21000, |
| "train_runtime": 10090.6051, |
| "train_tokens_per_second": 136389.839 |
| }, |
| { |
| "epoch": 0.211, |
| "grad_norm": 0.41719597578048706, |
| "learning_rate": 0.00027051054173320366, |
| "loss": 1.2502, |
| "num_input_tokens_seen": 1382809600, |
| "step": 21100, |
| "train_runtime": 10143.3243, |
| "train_tokens_per_second": 136327.063 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 0.4714694321155548, |
| "learning_rate": 0.000270226509122607, |
| "loss": 1.2537, |
| "num_input_tokens_seen": 1389363200, |
| "step": 21200, |
| "train_runtime": 10188.8874, |
| "train_tokens_per_second": 136360.639 |
| }, |
| { |
| "epoch": 0.213, |
| "grad_norm": 0.4616274833679199, |
| "learning_rate": 0.0002699412658323983, |
| "loss": 1.2571, |
| "num_input_tokens_seen": 1395916800, |
| "step": 21300, |
| "train_runtime": 10236.5378, |
| "train_tokens_per_second": 136366.107 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 0.4215717911720276, |
| "learning_rate": 0.00026965481473497423, |
| "loss": 1.2687, |
| "num_input_tokens_seen": 1402470400, |
| "step": 21400, |
| "train_runtime": 10282.9404, |
| "train_tokens_per_second": 136388.071 |
| }, |
| { |
| "epoch": 0.215, |
| "grad_norm": 0.5976271033287048, |
| "learning_rate": 0.0002693671587148942, |
| "loss": 1.2573, |
| "num_input_tokens_seen": 1409024000, |
| "step": 21500, |
| "train_runtime": 10329.955, |
| "train_tokens_per_second": 136401.756 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.5200098752975464, |
| "learning_rate": 0.0002690783006688511, |
| "loss": 1.247, |
| "num_input_tokens_seen": 1415577600, |
| "step": 21600, |
| "train_runtime": 10382.0767, |
| "train_tokens_per_second": 136348.213 |
| }, |
| { |
| "epoch": 0.217, |
| "grad_norm": 0.8170623779296875, |
| "learning_rate": 0.0002687882435056423, |
| "loss": 1.2562, |
| "num_input_tokens_seen": 1422131200, |
| "step": 21700, |
| "train_runtime": 10429.827, |
| "train_tokens_per_second": 136352.329 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 0.52497398853302, |
| "learning_rate": 0.0002684969901461402, |
| "loss": 1.2533, |
| "num_input_tokens_seen": 1428684800, |
| "step": 21800, |
| "train_runtime": 10476.8104, |
| "train_tokens_per_second": 136366.388 |
| }, |
| { |
| "epoch": 0.219, |
| "grad_norm": 0.4417087137699127, |
| "learning_rate": 0.000268204543523263, |
| "loss": 1.2721, |
| "num_input_tokens_seen": 1435238400, |
| "step": 21900, |
| "train_runtime": 10524.1028, |
| "train_tokens_per_second": 136376.319 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.5729189515113831, |
| "learning_rate": 0.0002679109065819447, |
| "loss": 1.2654, |
| "num_input_tokens_seen": 1441792000, |
| "step": 22000, |
| "train_runtime": 10572.3447, |
| "train_tokens_per_second": 136373.911 |
| }, |
| { |
| "epoch": 0.221, |
| "grad_norm": 0.5111753940582275, |
| "learning_rate": 0.0002676160822791062, |
| "loss": 1.2581, |
| "num_input_tokens_seen": 1448345600, |
| "step": 22100, |
| "train_runtime": 10619.3771, |
| "train_tokens_per_second": 136387.057 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 0.4302677512168884, |
| "learning_rate": 0.00026732007358362496, |
| "loss": 1.2581, |
| "num_input_tokens_seen": 1454899200, |
| "step": 22200, |
| "train_runtime": 10666.0714, |
| "train_tokens_per_second": 136404.413 |
| }, |
| { |
| "epoch": 0.223, |
| "grad_norm": 3.9242477416992188, |
| "learning_rate": 0.0002670228834763052, |
| "loss": 1.2872, |
| "num_input_tokens_seen": 1461452800, |
| "step": 22300, |
| "train_runtime": 10719.3985, |
| "train_tokens_per_second": 136337.203 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.7662601470947266, |
| "learning_rate": 0.00026672451494984804, |
| "loss": 1.2602, |
| "num_input_tokens_seen": 1468006400, |
| "step": 22400, |
| "train_runtime": 10767.2807, |
| "train_tokens_per_second": 136339.568 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.48544740676879883, |
| "learning_rate": 0.0002664249710088213, |
| "loss": 1.257, |
| "num_input_tokens_seen": 1474560000, |
| "step": 22500, |
| "train_runtime": 10813.982, |
| "train_tokens_per_second": 136356.802 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 0.4495686888694763, |
| "learning_rate": 0.00026612425466962893, |
| "loss": 1.2552, |
| "num_input_tokens_seen": 1481113600, |
| "step": 22600, |
| "train_runtime": 10860.2948, |
| "train_tokens_per_second": 136378.766 |
| }, |
| { |
| "epoch": 0.227, |
| "grad_norm": 0.5733143091201782, |
| "learning_rate": 0.00026582236896048134, |
| "loss": 1.2403, |
| "num_input_tokens_seen": 1487667200, |
| "step": 22700, |
| "train_runtime": 10907.2107, |
| "train_tokens_per_second": 136393.001 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 0.7318263649940491, |
| "learning_rate": 0.00026551931692136413, |
| "loss": 1.2468, |
| "num_input_tokens_seen": 1494220800, |
| "step": 22800, |
| "train_runtime": 10953.9499, |
| "train_tokens_per_second": 136409.315 |
| }, |
| { |
| "epoch": 0.229, |
| "grad_norm": 0.5192084312438965, |
| "learning_rate": 0.00026521510160400804, |
| "loss": 1.2458, |
| "num_input_tokens_seen": 1500774400, |
| "step": 22900, |
| "train_runtime": 11006.6198, |
| "train_tokens_per_second": 136351.98 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.4651305079460144, |
| "learning_rate": 0.00026490972607185793, |
| "loss": 1.2601, |
| "num_input_tokens_seen": 1507328000, |
| "step": 23000, |
| "train_runtime": 11053.8305, |
| "train_tokens_per_second": 136362.504 |
| }, |
| { |
| "epoch": 0.231, |
| "grad_norm": 0.5470275282859802, |
| "learning_rate": 0.0002646031934000421, |
| "loss": 1.2405, |
| "num_input_tokens_seen": 1513881600, |
| "step": 23100, |
| "train_runtime": 11099.6418, |
| "train_tokens_per_second": 136390.132 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.519235372543335, |
| "learning_rate": 0.00026429550667534095, |
| "loss": 1.2586, |
| "num_input_tokens_seen": 1520435200, |
| "step": 23200, |
| "train_runtime": 11152.1986, |
| "train_tokens_per_second": 136335.018 |
| }, |
| { |
| "epoch": 0.233, |
| "grad_norm": 0.4892626404762268, |
| "learning_rate": 0.0002639866689961565, |
| "loss": 1.2595, |
| "num_input_tokens_seen": 1526988800, |
| "step": 23300, |
| "train_runtime": 11199.2653, |
| "train_tokens_per_second": 136347.23 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 0.4089221656322479, |
| "learning_rate": 0.00026367668347248083, |
| "loss": 1.2393, |
| "num_input_tokens_seen": 1533542400, |
| "step": 23400, |
| "train_runtime": 11247.6635, |
| "train_tokens_per_second": 136343.196 |
| }, |
| { |
| "epoch": 0.235, |
| "grad_norm": 0.467582106590271, |
| "learning_rate": 0.0002633655532258646, |
| "loss": 1.2534, |
| "num_input_tokens_seen": 1540096000, |
| "step": 23500, |
| "train_runtime": 11294.1646, |
| "train_tokens_per_second": 136362.099 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 0.48117080330848694, |
| "learning_rate": 0.000263053281389386, |
| "loss": 1.2644, |
| "num_input_tokens_seen": 1546649600, |
| "step": 23600, |
| "train_runtime": 11340.9021, |
| "train_tokens_per_second": 136378.004 |
| }, |
| { |
| "epoch": 0.237, |
| "grad_norm": 0.4495629072189331, |
| "learning_rate": 0.0002627398711076189, |
| "loss": 1.2442, |
| "num_input_tokens_seen": 1553203200, |
| "step": 23700, |
| "train_runtime": 11387.7566, |
| "train_tokens_per_second": 136392.377 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 0.4376384913921356, |
| "learning_rate": 0.0002624253255366014, |
| "loss": 1.2489, |
| "num_input_tokens_seen": 1559756800, |
| "step": 23800, |
| "train_runtime": 11439.8893, |
| "train_tokens_per_second": 136343.696 |
| }, |
| { |
| "epoch": 0.239, |
| "grad_norm": 0.4419648349285126, |
| "learning_rate": 0.0002621096478438039, |
| "loss": 1.2353, |
| "num_input_tokens_seen": 1566310400, |
| "step": 23900, |
| "train_runtime": 11486.001, |
| "train_tokens_per_second": 136366.904 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.669739305973053, |
| "learning_rate": 0.00026179284120809727, |
| "loss": 1.2528, |
| "num_input_tokens_seen": 1572864000, |
| "step": 24000, |
| "train_runtime": 11533.9608, |
| "train_tokens_per_second": 136368.072 |
| }, |
| { |
| "epoch": 0.241, |
| "grad_norm": 0.4047415554523468, |
| "learning_rate": 0.0002614749088197208, |
| "loss": 1.2679, |
| "num_input_tokens_seen": 1579417600, |
| "step": 24100, |
| "train_runtime": 11582.9583, |
| "train_tokens_per_second": 136357.013 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 0.5224933624267578, |
| "learning_rate": 0.00026115585388025015, |
| "loss": 1.2425, |
| "num_input_tokens_seen": 1585971200, |
| "step": 24200, |
| "train_runtime": 11630.022, |
| "train_tokens_per_second": 136368.719 |
| }, |
| { |
| "epoch": 0.243, |
| "grad_norm": 0.5125856399536133, |
| "learning_rate": 0.00026083567960256493, |
| "loss": 1.2423, |
| "num_input_tokens_seen": 1592524800, |
| "step": 24300, |
| "train_runtime": 11677.13, |
| "train_tokens_per_second": 136379.813 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 0.5344144701957703, |
| "learning_rate": 0.00026051438921081667, |
| "loss": 1.2431, |
| "num_input_tokens_seen": 1599078400, |
| "step": 24400, |
| "train_runtime": 11723.5349, |
| "train_tokens_per_second": 136398.997 |
| }, |
| { |
| "epoch": 0.245, |
| "grad_norm": 0.4386890232563019, |
| "learning_rate": 0.00026019198594039595, |
| "loss": 1.2426, |
| "num_input_tokens_seen": 1605632000, |
| "step": 24500, |
| "train_runtime": 11773.1296, |
| "train_tokens_per_second": 136381.069 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 0.4986630082130432, |
| "learning_rate": 0.00025986847303790026, |
| "loss": 1.2531, |
| "num_input_tokens_seen": 1612185600, |
| "step": 24600, |
| "train_runtime": 11820.6579, |
| "train_tokens_per_second": 136387.13 |
| }, |
| { |
| "epoch": 0.247, |
| "grad_norm": 0.5271715521812439, |
| "learning_rate": 0.00025954385376110076, |
| "loss": 1.249, |
| "num_input_tokens_seen": 1618739200, |
| "step": 24700, |
| "train_runtime": 11867.4874, |
| "train_tokens_per_second": 136401.172 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.45263609290122986, |
| "learning_rate": 0.00025921813137891005, |
| "loss": 1.2507, |
| "num_input_tokens_seen": 1625292800, |
| "step": 24800, |
| "train_runtime": 11919.9131, |
| "train_tokens_per_second": 136351.061 |
| }, |
| { |
| "epoch": 0.249, |
| "grad_norm": 0.5932081937789917, |
| "learning_rate": 0.000258891309171349, |
| "loss": 1.2438, |
| "num_input_tokens_seen": 1631846400, |
| "step": 24900, |
| "train_runtime": 11962.6395, |
| "train_tokens_per_second": 136411.902 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.5539859533309937, |
| "learning_rate": 0.00025856339042951344, |
| "loss": 1.2548, |
| "num_input_tokens_seen": 1638400000, |
| "step": 25000, |
| "train_runtime": 12014.9411, |
| "train_tokens_per_second": 136363.548 |
| }, |
| { |
| "epoch": 0.251, |
| "grad_norm": 0.5236772298812866, |
| "learning_rate": 0.0002582343784555415, |
| "loss": 1.2386, |
| "num_input_tokens_seen": 1644953600, |
| "step": 25100, |
| "train_runtime": 12062.3997, |
| "train_tokens_per_second": 136370.344 |
| }, |
| { |
| "epoch": 0.252, |
| "grad_norm": 0.5913048982620239, |
| "learning_rate": 0.00025790427656258017, |
| "loss": 1.2354, |
| "num_input_tokens_seen": 1651507200, |
| "step": 25200, |
| "train_runtime": 12108.5333, |
| "train_tokens_per_second": 136392.01 |
| }, |
| { |
| "epoch": 0.253, |
| "grad_norm": 0.5929732322692871, |
| "learning_rate": 0.00025757308807475185, |
| "loss": 1.2582, |
| "num_input_tokens_seen": 1658060800, |
| "step": 25300, |
| "train_runtime": 12154.8252, |
| "train_tokens_per_second": 136411.736 |
| }, |
| { |
| "epoch": 0.254, |
| "grad_norm": 0.4542764723300934, |
| "learning_rate": 0.00025724081632712086, |
| "loss": 1.2488, |
| "num_input_tokens_seen": 1664614400, |
| "step": 25400, |
| "train_runtime": 12207.8935, |
| "train_tokens_per_second": 136355.58 |
| }, |
| { |
| "epoch": 0.255, |
| "grad_norm": 1.0848513841629028, |
| "learning_rate": 0.0002569074646656601, |
| "loss": 1.2375, |
| "num_input_tokens_seen": 1671168000, |
| "step": 25500, |
| "train_runtime": 12254.3162, |
| "train_tokens_per_second": 136373.827 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.5190780162811279, |
| "learning_rate": 0.00025657303644721695, |
| "loss": 1.236, |
| "num_input_tokens_seen": 1677721600, |
| "step": 25600, |
| "train_runtime": 12301.2378, |
| "train_tokens_per_second": 136386.405 |
| }, |
| { |
| "epoch": 0.257, |
| "grad_norm": 0.43418362736701965, |
| "learning_rate": 0.00025623753503948004, |
| "loss": 1.2484, |
| "num_input_tokens_seen": 1684275200, |
| "step": 25700, |
| "train_runtime": 12347.684, |
| "train_tokens_per_second": 136404.138 |
| }, |
| { |
| "epoch": 0.258, |
| "grad_norm": 0.4586409032344818, |
| "learning_rate": 0.00025590096382094475, |
| "loss": 1.2674, |
| "num_input_tokens_seen": 1690828800, |
| "step": 25800, |
| "train_runtime": 12394.5809, |
| "train_tokens_per_second": 136416.778 |
| }, |
| { |
| "epoch": 0.259, |
| "grad_norm": 0.5069702863693237, |
| "learning_rate": 0.00025556332618087945, |
| "loss": 1.2428, |
| "num_input_tokens_seen": 1697382400, |
| "step": 25900, |
| "train_runtime": 12447.2116, |
| "train_tokens_per_second": 136366.478 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.591788649559021, |
| "learning_rate": 0.00025522462551929155, |
| "loss": 1.2417, |
| "num_input_tokens_seen": 1703936000, |
| "step": 26000, |
| "train_runtime": 12492.8891, |
| "train_tokens_per_second": 136392.47 |
| }, |
| { |
| "epoch": 0.261, |
| "grad_norm": 0.6001791954040527, |
| "learning_rate": 0.00025488486524689283, |
| "loss": 1.2407, |
| "num_input_tokens_seen": 1710489600, |
| "step": 26100, |
| "train_runtime": 12539.4548, |
| "train_tokens_per_second": 136408.61 |
| }, |
| { |
| "epoch": 0.262, |
| "grad_norm": 0.47005897760391235, |
| "learning_rate": 0.00025454404878506555, |
| "loss": 1.2558, |
| "num_input_tokens_seen": 1717043200, |
| "step": 26200, |
| "train_runtime": 12587.1655, |
| "train_tokens_per_second": 136412.221 |
| }, |
| { |
| "epoch": 0.263, |
| "grad_norm": 0.42708972096443176, |
| "learning_rate": 0.0002542021795658276, |
| "loss": 1.2445, |
| "num_input_tokens_seen": 1723596800, |
| "step": 26300, |
| "train_runtime": 12634.1294, |
| "train_tokens_per_second": 136423.868 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.48100486397743225, |
| "learning_rate": 0.0002538592610317984, |
| "loss": 1.2416, |
| "num_input_tokens_seen": 1730150400, |
| "step": 26400, |
| "train_runtime": 12686.5075, |
| "train_tokens_per_second": 136377.202 |
| }, |
| { |
| "epoch": 0.265, |
| "grad_norm": 0.5689502954483032, |
| "learning_rate": 0.00025351529663616355, |
| "loss": 1.2476, |
| "num_input_tokens_seen": 1736704000, |
| "step": 26500, |
| "train_runtime": 12733.1403, |
| "train_tokens_per_second": 136392.435 |
| }, |
| { |
| "epoch": 0.266, |
| "grad_norm": 0.3999510705471039, |
| "learning_rate": 0.00025317028984264087, |
| "loss": 1.2507, |
| "num_input_tokens_seen": 1743257600, |
| "step": 26600, |
| "train_runtime": 12780.4326, |
| "train_tokens_per_second": 136400.515 |
| }, |
| { |
| "epoch": 0.267, |
| "grad_norm": 0.4349440336227417, |
| "learning_rate": 0.0002528242441254448, |
| "loss": 1.2359, |
| "num_input_tokens_seen": 1749811200, |
| "step": 26700, |
| "train_runtime": 12826.6298, |
| "train_tokens_per_second": 136420.184 |
| }, |
| { |
| "epoch": 0.268, |
| "grad_norm": 0.40468648076057434, |
| "learning_rate": 0.000252477162969252, |
| "loss": 1.2463, |
| "num_input_tokens_seen": 1756364800, |
| "step": 26800, |
| "train_runtime": 12873.4848, |
| "train_tokens_per_second": 136432.739 |
| }, |
| { |
| "epoch": 0.269, |
| "grad_norm": 0.5858653783798218, |
| "learning_rate": 0.00025212904986916584, |
| "loss": 1.2385, |
| "num_input_tokens_seen": 1762918400, |
| "step": 26900, |
| "train_runtime": 12926.2009, |
| "train_tokens_per_second": 136383.336 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.4621046483516693, |
| "learning_rate": 0.00025177990833068133, |
| "loss": 1.2366, |
| "num_input_tokens_seen": 1769472000, |
| "step": 27000, |
| "train_runtime": 12973.4952, |
| "train_tokens_per_second": 136391.31 |
| }, |
| { |
| "epoch": 0.271, |
| "grad_norm": 0.4884892404079437, |
| "learning_rate": 0.0002514297418696499, |
| "loss": 1.2436, |
| "num_input_tokens_seen": 1776025600, |
| "step": 27100, |
| "train_runtime": 13021.2871, |
| "train_tokens_per_second": 136394.013 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.5108981132507324, |
| "learning_rate": 0.0002510785540122439, |
| "loss": 1.2423, |
| "num_input_tokens_seen": 1782579200, |
| "step": 27200, |
| "train_runtime": 13068.0423, |
| "train_tokens_per_second": 136407.517 |
| }, |
| { |
| "epoch": 0.273, |
| "grad_norm": 0.3898067772388458, |
| "learning_rate": 0.0002507263482949212, |
| "loss": 1.2415, |
| "num_input_tokens_seen": 1789132800, |
| "step": 27300, |
| "train_runtime": 13113.8421, |
| "train_tokens_per_second": 136430.864 |
| }, |
| { |
| "epoch": 0.274, |
| "grad_norm": 0.5622383952140808, |
| "learning_rate": 0.0002503731282643894, |
| "loss": 1.2378, |
| "num_input_tokens_seen": 1795686400, |
| "step": 27400, |
| "train_runtime": 13161.1635, |
| "train_tokens_per_second": 136438.272 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 0.7748796343803406, |
| "learning_rate": 0.0002500188974775704, |
| "loss": 1.248, |
| "num_input_tokens_seen": 1802240000, |
| "step": 27500, |
| "train_runtime": 13209.4471, |
| "train_tokens_per_second": 136435.688 |
| }, |
| { |
| "epoch": 0.276, |
| "grad_norm": 0.8867826461791992, |
| "learning_rate": 0.00024966365950156416, |
| "loss": 1.2409, |
| "num_input_tokens_seen": 1808793600, |
| "step": 27600, |
| "train_runtime": 13256.4066, |
| "train_tokens_per_second": 136446.751 |
| }, |
| { |
| "epoch": 0.277, |
| "grad_norm": 0.49997836351394653, |
| "learning_rate": 0.00024930741791361326, |
| "loss": 1.2382, |
| "num_input_tokens_seen": 1815347200, |
| "step": 27700, |
| "train_runtime": 13309.6196, |
| "train_tokens_per_second": 136393.62 |
| }, |
| { |
| "epoch": 0.278, |
| "grad_norm": 0.5048521161079407, |
| "learning_rate": 0.0002489501763010664, |
| "loss": 1.2351, |
| "num_input_tokens_seen": 1821900800, |
| "step": 27800, |
| "train_runtime": 13356.706, |
| "train_tokens_per_second": 136403.451 |
| }, |
| { |
| "epoch": 0.279, |
| "grad_norm": 0.5528578162193298, |
| "learning_rate": 0.00024859193826134285, |
| "loss": 1.2404, |
| "num_input_tokens_seen": 1828454400, |
| "step": 27900, |
| "train_runtime": 13405.5813, |
| "train_tokens_per_second": 136395.01 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.44376805424690247, |
| "learning_rate": 0.00024823270740189556, |
| "loss": 1.2461, |
| "num_input_tokens_seen": 1835008000, |
| "step": 28000, |
| "train_runtime": 13452.7686, |
| "train_tokens_per_second": 136403.743 |
| }, |
| { |
| "epoch": 0.281, |
| "grad_norm": 0.5072674751281738, |
| "learning_rate": 0.00024787248734017527, |
| "loss": 1.2301, |
| "num_input_tokens_seen": 1841561600, |
| "step": 28100, |
| "train_runtime": 13501.0413, |
| "train_tokens_per_second": 136401.449 |
| }, |
| { |
| "epoch": 0.282, |
| "grad_norm": 0.46835577487945557, |
| "learning_rate": 0.0002475112817035941, |
| "loss": 1.237, |
| "num_input_tokens_seen": 1848115200, |
| "step": 28200, |
| "train_runtime": 13547.4814, |
| "train_tokens_per_second": 136417.622 |
| }, |
| { |
| "epoch": 0.283, |
| "grad_norm": 0.4893036186695099, |
| "learning_rate": 0.0002471490941294887, |
| "loss": 1.2612, |
| "num_input_tokens_seen": 1854668800, |
| "step": 28300, |
| "train_runtime": 13593.9904, |
| "train_tokens_per_second": 136432.993 |
| }, |
| { |
| "epoch": 0.284, |
| "grad_norm": 0.66542649269104, |
| "learning_rate": 0.000246785928265084, |
| "loss": 1.2405, |
| "num_input_tokens_seen": 1861222400, |
| "step": 28400, |
| "train_runtime": 13646.3147, |
| "train_tokens_per_second": 136390.113 |
| }, |
| { |
| "epoch": 0.285, |
| "grad_norm": 0.669306755065918, |
| "learning_rate": 0.0002464217877674562, |
| "loss": 1.2409, |
| "num_input_tokens_seen": 1867776000, |
| "step": 28500, |
| "train_runtime": 13692.502, |
| "train_tokens_per_second": 136408.671 |
| }, |
| { |
| "epoch": 0.286, |
| "grad_norm": 0.43464845418930054, |
| "learning_rate": 0.0002460566763034961, |
| "loss": 1.2435, |
| "num_input_tokens_seen": 1874329600, |
| "step": 28600, |
| "train_runtime": 13738.7564, |
| "train_tokens_per_second": 136426.438 |
| }, |
| { |
| "epoch": 0.287, |
| "grad_norm": 0.5084187388420105, |
| "learning_rate": 0.00024569059754987196, |
| "loss": 1.2572, |
| "num_input_tokens_seen": 1880883200, |
| "step": 28700, |
| "train_runtime": 13785.6191, |
| "train_tokens_per_second": 136438.065 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.473603755235672, |
| "learning_rate": 0.00024532355519299296, |
| "loss": 1.2459, |
| "num_input_tokens_seen": 1887436800, |
| "step": 28800, |
| "train_runtime": 13838.5181, |
| "train_tokens_per_second": 136390.095 |
| }, |
| { |
| "epoch": 0.289, |
| "grad_norm": 0.493012011051178, |
| "learning_rate": 0.0002449555529289714, |
| "loss": 1.243, |
| "num_input_tokens_seen": 1893990400, |
| "step": 28900, |
| "train_runtime": 13886.1283, |
| "train_tokens_per_second": 136394.419 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.7421333193778992, |
| "learning_rate": 0.0002445865944635861, |
| "loss": 1.2455, |
| "num_input_tokens_seen": 1900544000, |
| "step": 29000, |
| "train_runtime": 13931.9406, |
| "train_tokens_per_second": 136416.315 |
| }, |
| { |
| "epoch": 0.291, |
| "grad_norm": 0.5027185678482056, |
| "learning_rate": 0.0002442166835122446, |
| "loss": 1.2686, |
| "num_input_tokens_seen": 1907097600, |
| "step": 29100, |
| "train_runtime": 13980.446, |
| "train_tokens_per_second": 136411.785 |
| }, |
| { |
| "epoch": 0.292, |
| "grad_norm": 0.48427557945251465, |
| "learning_rate": 0.00024384582379994614, |
| "loss": 1.2369, |
| "num_input_tokens_seen": 1913651200, |
| "step": 29200, |
| "train_runtime": 14028.0456, |
| "train_tokens_per_second": 136416.095 |
| }, |
| { |
| "epoch": 0.293, |
| "grad_norm": 0.6620755195617676, |
| "learning_rate": 0.00024347401906124388, |
| "loss": 1.2317, |
| "num_input_tokens_seen": 1920204800, |
| "step": 29300, |
| "train_runtime": 14074.3372, |
| "train_tokens_per_second": 136433.054 |
| }, |
| { |
| "epoch": 0.294, |
| "grad_norm": 0.5745883584022522, |
| "learning_rate": 0.0002431012730402075, |
| "loss": 1.2443, |
| "num_input_tokens_seen": 1926758400, |
| "step": 29400, |
| "train_runtime": 14125.645, |
| "train_tokens_per_second": 136401.446 |
| }, |
| { |
| "epoch": 0.295, |
| "grad_norm": 0.441680908203125, |
| "learning_rate": 0.00024272758949038517, |
| "loss": 1.2393, |
| "num_input_tokens_seen": 1933312000, |
| "step": 29500, |
| "train_runtime": 14172.5336, |
| "train_tokens_per_second": 136412.588 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.4417046904563904, |
| "learning_rate": 0.00024235297217476616, |
| "loss": 1.2371, |
| "num_input_tokens_seen": 1939865600, |
| "step": 29600, |
| "train_runtime": 14220.1572, |
| "train_tokens_per_second": 136416.608 |
| }, |
| { |
| "epoch": 0.297, |
| "grad_norm": 0.5888639688491821, |
| "learning_rate": 0.00024197742486574268, |
| "loss": 1.2344, |
| "num_input_tokens_seen": 1946419200, |
| "step": 29700, |
| "train_runtime": 14267.366, |
| "train_tokens_per_second": 136424.565 |
| }, |
| { |
| "epoch": 0.298, |
| "grad_norm": 0.4625283479690552, |
| "learning_rate": 0.0002416009513450719, |
| "loss": 1.2373, |
| "num_input_tokens_seen": 1952972800, |
| "step": 29800, |
| "train_runtime": 14318.8989, |
| "train_tokens_per_second": 136391.27 |
| }, |
| { |
| "epoch": 0.299, |
| "grad_norm": 0.47661375999450684, |
| "learning_rate": 0.00024122355540383806, |
| "loss": 1.2454, |
| "num_input_tokens_seen": 1959526400, |
| "step": 29900, |
| "train_runtime": 14365.8797, |
| "train_tokens_per_second": 136401.42 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.727032482624054, |
| "learning_rate": 0.00024084524084241405, |
| "loss": 1.2379, |
| "num_input_tokens_seen": 1966080000, |
| "step": 30000, |
| "train_runtime": 14415.1273, |
| "train_tokens_per_second": 136390.055 |
| }, |
| { |
| "epoch": 0.301, |
| "grad_norm": 0.45500555634498596, |
| "learning_rate": 0.00024046601147042332, |
| "loss": 1.2358, |
| "num_input_tokens_seen": 1972633600, |
| "step": 30100, |
| "train_runtime": 14461.5845, |
| "train_tokens_per_second": 136405.08 |
| }, |
| { |
| "epoch": 0.302, |
| "grad_norm": 0.44596830010414124, |
| "learning_rate": 0.0002400858711067015, |
| "loss": 1.2301, |
| "num_input_tokens_seen": 1979187200, |
| "step": 30200, |
| "train_runtime": 14508.0707, |
| "train_tokens_per_second": 136419.737 |
| }, |
| { |
| "epoch": 0.303, |
| "grad_norm": 0.4207491874694824, |
| "learning_rate": 0.00023970482357925772, |
| "loss": 1.2441, |
| "num_input_tokens_seen": 1985740800, |
| "step": 30300, |
| "train_runtime": 14555.5751, |
| "train_tokens_per_second": 136424.757 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.4833202064037323, |
| "learning_rate": 0.00023932287272523646, |
| "loss": 1.2351, |
| "num_input_tokens_seen": 1992294400, |
| "step": 30400, |
| "train_runtime": 14601.9546, |
| "train_tokens_per_second": 136440.255 |
| }, |
| { |
| "epoch": 0.305, |
| "grad_norm": 0.5268282294273376, |
| "learning_rate": 0.00023894002239087847, |
| "loss": 1.2384, |
| "num_input_tokens_seen": 1998848000, |
| "step": 30500, |
| "train_runtime": 14654.2539, |
| "train_tokens_per_second": 136400.53 |
| }, |
| { |
| "epoch": 0.306, |
| "grad_norm": 0.4639832377433777, |
| "learning_rate": 0.0002385562764314825, |
| "loss": 1.3007, |
| "num_input_tokens_seen": 2005401600, |
| "step": 30600, |
| "train_runtime": 14702.026, |
| "train_tokens_per_second": 136403.078 |
| }, |
| { |
| "epoch": 0.307, |
| "grad_norm": 0.526703953742981, |
| "learning_rate": 0.00023817163871136596, |
| "loss": 1.2481, |
| "num_input_tokens_seen": 2011955200, |
| "step": 30700, |
| "train_runtime": 14749.4458, |
| "train_tokens_per_second": 136408.868 |
| }, |
| { |
| "epoch": 0.308, |
| "grad_norm": 0.43404075503349304, |
| "learning_rate": 0.00023778611310382652, |
| "loss": 1.2273, |
| "num_input_tokens_seen": 2018508800, |
| "step": 30800, |
| "train_runtime": 14796.5936, |
| "train_tokens_per_second": 136417.128 |
| }, |
| { |
| "epoch": 0.309, |
| "grad_norm": 0.39956456422805786, |
| "learning_rate": 0.0002373997034911027, |
| "loss": 1.2275, |
| "num_input_tokens_seen": 2025062400, |
| "step": 30900, |
| "train_runtime": 14843.3887, |
| "train_tokens_per_second": 136428.578 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.46024298667907715, |
| "learning_rate": 0.00023701241376433506, |
| "loss": 1.2353, |
| "num_input_tokens_seen": 2031616000, |
| "step": 31000, |
| "train_runtime": 14890.8282, |
| "train_tokens_per_second": 136434.05 |
| }, |
| { |
| "epoch": 0.311, |
| "grad_norm": 0.38429203629493713, |
| "learning_rate": 0.0002366242478235268, |
| "loss": 1.2403, |
| "num_input_tokens_seen": 2038169600, |
| "step": 31100, |
| "train_runtime": 14937.8781, |
| "train_tokens_per_second": 136443.047 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.5401485562324524, |
| "learning_rate": 0.00023623520957750471, |
| "loss": 1.2273, |
| "num_input_tokens_seen": 2044723200, |
| "step": 31200, |
| "train_runtime": 14990.0842, |
| "train_tokens_per_second": 136405.051 |
| }, |
| { |
| "epoch": 0.313, |
| "grad_norm": 0.5360187888145447, |
| "learning_rate": 0.00023584530294387953, |
| "loss": 1.2312, |
| "num_input_tokens_seen": 2051276800, |
| "step": 31300, |
| "train_runtime": 15037.4257, |
| "train_tokens_per_second": 136411.434 |
| }, |
| { |
| "epoch": 0.314, |
| "grad_norm": 0.4468795359134674, |
| "learning_rate": 0.00023545453184900682, |
| "loss": 1.2383, |
| "num_input_tokens_seen": 2057830400, |
| "step": 31400, |
| "train_runtime": 15083.4771, |
| "train_tokens_per_second": 136429.444 |
| }, |
| { |
| "epoch": 0.315, |
| "grad_norm": 0.4575517177581787, |
| "learning_rate": 0.00023506290022794706, |
| "loss": 1.2354, |
| "num_input_tokens_seen": 2064384000, |
| "step": 31500, |
| "train_runtime": 15131.2692, |
| "train_tokens_per_second": 136431.648 |
| }, |
| { |
| "epoch": 0.316, |
| "grad_norm": 0.7983475923538208, |
| "learning_rate": 0.00023467041202442643, |
| "loss": 1.2309, |
| "num_input_tokens_seen": 2070937600, |
| "step": 31600, |
| "train_runtime": 15178.6218, |
| "train_tokens_per_second": 136437.789 |
| }, |
| { |
| "epoch": 0.317, |
| "grad_norm": 0.4316498339176178, |
| "learning_rate": 0.00023427707119079669, |
| "loss": 1.2462, |
| "num_input_tokens_seen": 2077491200, |
| "step": 31700, |
| "train_runtime": 15225.1881, |
| "train_tokens_per_second": 136450.938 |
| }, |
| { |
| "epoch": 0.318, |
| "grad_norm": 0.5765666365623474, |
| "learning_rate": 0.0002338828816879957, |
| "loss": 1.2367, |
| "num_input_tokens_seen": 2084044800, |
| "step": 31800, |
| "train_runtime": 15277.5735, |
| "train_tokens_per_second": 136412.029 |
| }, |
| { |
| "epoch": 0.319, |
| "grad_norm": 0.44825831055641174, |
| "learning_rate": 0.00023348784748550744, |
| "loss": 1.2354, |
| "num_input_tokens_seen": 2090598400, |
| "step": 31900, |
| "train_runtime": 15324.8285, |
| "train_tokens_per_second": 136419.04 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.5602436661720276, |
| "learning_rate": 0.00023309197256132184, |
| "loss": 1.2324, |
| "num_input_tokens_seen": 2097152000, |
| "step": 32000, |
| "train_runtime": 15371.4775, |
| "train_tokens_per_second": 136431.387 |
| }, |
| { |
| "epoch": 0.321, |
| "grad_norm": 0.4002476930618286, |
| "learning_rate": 0.00023269526090189505, |
| "loss": 1.2396, |
| "num_input_tokens_seen": 2103705600, |
| "step": 32100, |
| "train_runtime": 15419.2672, |
| "train_tokens_per_second": 136433.565 |
| }, |
| { |
| "epoch": 0.322, |
| "grad_norm": 0.4306688606739044, |
| "learning_rate": 0.00023229771650210907, |
| "loss": 1.2468, |
| "num_input_tokens_seen": 2110259200, |
| "step": 32200, |
| "train_runtime": 15466.1068, |
| "train_tokens_per_second": 136444.111 |
| }, |
| { |
| "epoch": 0.323, |
| "grad_norm": 0.584658145904541, |
| "learning_rate": 0.00023189934336523163, |
| "loss": 1.2459, |
| "num_input_tokens_seen": 2116812800, |
| "step": 32300, |
| "train_runtime": 15513.277, |
| "train_tokens_per_second": 136451.686 |
| }, |
| { |
| "epoch": 0.324, |
| "grad_norm": 0.4049496352672577, |
| "learning_rate": 0.00023150014550287574, |
| "loss": 1.2455, |
| "num_input_tokens_seen": 2123366400, |
| "step": 32400, |
| "train_runtime": 15565.7808, |
| "train_tokens_per_second": 136412.456 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 0.45713433623313904, |
| "learning_rate": 0.00023110012693495943, |
| "loss": 1.2308, |
| "num_input_tokens_seen": 2129920000, |
| "step": 32500, |
| "train_runtime": 15610.6324, |
| "train_tokens_per_second": 136440.341 |
| }, |
| { |
| "epoch": 0.326, |
| "grad_norm": 0.5710960030555725, |
| "learning_rate": 0.00023069929168966527, |
| "loss": 1.2434, |
| "num_input_tokens_seen": 2136473600, |
| "step": 32600, |
| "train_runtime": 15657.7335, |
| "train_tokens_per_second": 136448.458 |
| }, |
| { |
| "epoch": 0.327, |
| "grad_norm": 0.5807371735572815, |
| "learning_rate": 0.0002302976438033997, |
| "loss": 1.2292, |
| "num_input_tokens_seen": 2143027200, |
| "step": 32700, |
| "train_runtime": 15710.1819, |
| "train_tokens_per_second": 136410.082 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.4462313652038574, |
| "learning_rate": 0.0002298951873207525, |
| "loss": 1.2427, |
| "num_input_tokens_seen": 2149580800, |
| "step": 32800, |
| "train_runtime": 15757.3708, |
| "train_tokens_per_second": 136417.479 |
| }, |
| { |
| "epoch": 0.329, |
| "grad_norm": 0.6099971532821655, |
| "learning_rate": 0.00022949192629445606, |
| "loss": 1.2313, |
| "num_input_tokens_seen": 2156134400, |
| "step": 32900, |
| "train_runtime": 15804.1823, |
| "train_tokens_per_second": 136428.089 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.8630947470664978, |
| "learning_rate": 0.0002290878647853443, |
| "loss": 1.247, |
| "num_input_tokens_seen": 2162688000, |
| "step": 33000, |
| "train_runtime": 15852.2039, |
| "train_tokens_per_second": 136428.223 |
| }, |
| { |
| "epoch": 0.331, |
| "grad_norm": 0.5154317021369934, |
| "learning_rate": 0.00022868300686231224, |
| "loss": 1.2246, |
| "num_input_tokens_seen": 2169241600, |
| "step": 33100, |
| "train_runtime": 15899.5617, |
| "train_tokens_per_second": 136434.05 |
| }, |
| { |
| "epoch": 0.332, |
| "grad_norm": 0.5033185482025146, |
| "learning_rate": 0.00022827735660227457, |
| "loss": 1.2271, |
| "num_input_tokens_seen": 2175795200, |
| "step": 33200, |
| "train_runtime": 15947.1716, |
| "train_tokens_per_second": 136437.686 |
| }, |
| { |
| "epoch": 0.333, |
| "grad_norm": 0.7760284543037415, |
| "learning_rate": 0.000227870918090125, |
| "loss": 1.2445, |
| "num_input_tokens_seen": 2182348800, |
| "step": 33300, |
| "train_runtime": 16000.1889, |
| "train_tokens_per_second": 136395.189 |
| }, |
| { |
| "epoch": 0.334, |
| "grad_norm": 0.5042400360107422, |
| "learning_rate": 0.00022746369541869476, |
| "loss": 1.223, |
| "num_input_tokens_seen": 2188902400, |
| "step": 33400, |
| "train_runtime": 16047.8873, |
| "train_tokens_per_second": 136398.166 |
| }, |
| { |
| "epoch": 0.335, |
| "grad_norm": 0.421273410320282, |
| "learning_rate": 0.00022705569268871163, |
| "loss": 1.2222, |
| "num_input_tokens_seen": 2195456000, |
| "step": 33500, |
| "train_runtime": 16094.6711, |
| "train_tokens_per_second": 136408.876 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.48292359709739685, |
| "learning_rate": 0.00022664691400875865, |
| "loss": 1.222, |
| "num_input_tokens_seen": 2202009600, |
| "step": 33600, |
| "train_runtime": 16143.6943, |
| "train_tokens_per_second": 136400.601 |
| }, |
| { |
| "epoch": 0.337, |
| "grad_norm": 0.4301004409790039, |
| "learning_rate": 0.00022623736349523254, |
| "loss": 1.2308, |
| "num_input_tokens_seen": 2208563200, |
| "step": 33700, |
| "train_runtime": 16189.7469, |
| "train_tokens_per_second": 136417.401 |
| }, |
| { |
| "epoch": 0.338, |
| "grad_norm": 0.6592893600463867, |
| "learning_rate": 0.00022582704527230238, |
| "loss": 1.2401, |
| "num_input_tokens_seen": 2215116800, |
| "step": 33800, |
| "train_runtime": 16235.6512, |
| "train_tokens_per_second": 136435.353 |
| }, |
| { |
| "epoch": 0.339, |
| "grad_norm": 0.6183221340179443, |
| "learning_rate": 0.0002254159634718682, |
| "loss": 1.2364, |
| "num_input_tokens_seen": 2221670400, |
| "step": 33900, |
| "train_runtime": 16283.1306, |
| "train_tokens_per_second": 136440.003 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.529971182346344, |
| "learning_rate": 0.00022500412223351915, |
| "loss": 1.2222, |
| "num_input_tokens_seen": 2228224000, |
| "step": 34000, |
| "train_runtime": 16330.1955, |
| "train_tokens_per_second": 136448.091 |
| }, |
| { |
| "epoch": 0.341, |
| "grad_norm": 0.41906896233558655, |
| "learning_rate": 0.0002245915257044919, |
| "loss": 1.2261, |
| "num_input_tokens_seen": 2234777600, |
| "step": 34100, |
| "train_runtime": 16381.7912, |
| "train_tokens_per_second": 136418.391 |
| }, |
| { |
| "epoch": 0.342, |
| "grad_norm": 0.4326164722442627, |
| "learning_rate": 0.00022417817803962892, |
| "loss": 1.2452, |
| "num_input_tokens_seen": 2241331200, |
| "step": 34200, |
| "train_runtime": 16429.3997, |
| "train_tokens_per_second": 136421.978 |
| }, |
| { |
| "epoch": 0.343, |
| "grad_norm": 0.8329346179962158, |
| "learning_rate": 0.0002237640834013366, |
| "loss": 1.2197, |
| "num_input_tokens_seen": 2247884800, |
| "step": 34300, |
| "train_runtime": 16476.2139, |
| "train_tokens_per_second": 136432.121 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.4649752378463745, |
| "learning_rate": 0.0002233492459595434, |
| "loss": 1.2255, |
| "num_input_tokens_seen": 2254438400, |
| "step": 34400, |
| "train_runtime": 16523.092, |
| "train_tokens_per_second": 136441.678 |
| }, |
| { |
| "epoch": 0.345, |
| "grad_norm": 0.5218563675880432, |
| "learning_rate": 0.00022293366989165772, |
| "loss": 1.2365, |
| "num_input_tokens_seen": 2260992000, |
| "step": 34500, |
| "train_runtime": 16575.1624, |
| "train_tokens_per_second": 136408.437 |
| }, |
| { |
| "epoch": 0.346, |
| "grad_norm": 0.8002403974533081, |
| "learning_rate": 0.00022251735938252587, |
| "loss": 1.2179, |
| "num_input_tokens_seen": 2267545600, |
| "step": 34600, |
| "train_runtime": 16622.274, |
| "train_tokens_per_second": 136416.088 |
| }, |
| { |
| "epoch": 0.347, |
| "grad_norm": 0.5648475289344788, |
| "learning_rate": 0.0002221003186243902, |
| "loss": 1.2301, |
| "num_input_tokens_seen": 2274099200, |
| "step": 34700, |
| "train_runtime": 16668.9107, |
| "train_tokens_per_second": 136427.583 |
| }, |
| { |
| "epoch": 0.348, |
| "grad_norm": 0.4631340801715851, |
| "learning_rate": 0.00022168255181684643, |
| "loss": 1.2292, |
| "num_input_tokens_seen": 2280652800, |
| "step": 34800, |
| "train_runtime": 16715.4649, |
| "train_tokens_per_second": 136439.687 |
| }, |
| { |
| "epoch": 0.349, |
| "grad_norm": 0.4492770731449127, |
| "learning_rate": 0.00022126406316680172, |
| "loss": 1.226, |
| "num_input_tokens_seen": 2287206400, |
| "step": 34900, |
| "train_runtime": 16761.744, |
| "train_tokens_per_second": 136453.963 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.5984812378883362, |
| "learning_rate": 0.00022084485688843208, |
| "loss": 1.2332, |
| "num_input_tokens_seen": 2293760000, |
| "step": 35000, |
| "train_runtime": 16816.4332, |
| "train_tokens_per_second": 136399.912 |
| }, |
| { |
| "epoch": 0.351, |
| "grad_norm": 0.6245887875556946, |
| "learning_rate": 0.00022042493720314003, |
| "loss": 1.2324, |
| "num_input_tokens_seen": 2300313600, |
| "step": 35100, |
| "train_runtime": 16864.2018, |
| "train_tokens_per_second": 136402.163 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.6719664335250854, |
| "learning_rate": 0.00022000430833951228, |
| "loss": 1.2272, |
| "num_input_tokens_seen": 2306867200, |
| "step": 35200, |
| "train_runtime": 16910.313, |
| "train_tokens_per_second": 136417.77 |
| }, |
| { |
| "epoch": 0.353, |
| "grad_norm": 0.43880173563957214, |
| "learning_rate": 0.00021958297453327673, |
| "loss": 1.2572, |
| "num_input_tokens_seen": 2313420800, |
| "step": 35300, |
| "train_runtime": 16958.9376, |
| "train_tokens_per_second": 136413.073 |
| }, |
| { |
| "epoch": 0.354, |
| "grad_norm": 0.6195557713508606, |
| "learning_rate": 0.00021916094002726012, |
| "loss": 1.2299, |
| "num_input_tokens_seen": 2319974400, |
| "step": 35400, |
| "train_runtime": 17005.9814, |
| "train_tokens_per_second": 136421.083 |
| }, |
| { |
| "epoch": 0.355, |
| "grad_norm": 0.5288188457489014, |
| "learning_rate": 0.00021873820907134534, |
| "loss": 1.2157, |
| "num_input_tokens_seen": 2326528000, |
| "step": 35500, |
| "train_runtime": 17053.3579, |
| "train_tokens_per_second": 136426.387 |
| }, |
| { |
| "epoch": 0.356, |
| "grad_norm": 0.4962466061115265, |
| "learning_rate": 0.0002183147859224283, |
| "loss": 1.2282, |
| "num_input_tokens_seen": 2333081600, |
| "step": 35600, |
| "train_runtime": 17099.0541, |
| "train_tokens_per_second": 136445.068 |
| }, |
| { |
| "epoch": 0.357, |
| "grad_norm": 0.4940129518508911, |
| "learning_rate": 0.00021789067484437544, |
| "loss": 1.2349, |
| "num_input_tokens_seen": 2339635200, |
| "step": 35700, |
| "train_runtime": 17146.892, |
| "train_tokens_per_second": 136446.605 |
| }, |
| { |
| "epoch": 0.358, |
| "grad_norm": 0.5929033160209656, |
| "learning_rate": 0.00021746588010798068, |
| "loss": 1.2368, |
| "num_input_tokens_seen": 2346188800, |
| "step": 35800, |
| "train_runtime": 17199.6266, |
| "train_tokens_per_second": 136409.287 |
| }, |
| { |
| "epoch": 0.359, |
| "grad_norm": 0.4825666546821594, |
| "learning_rate": 0.00021704040599092216, |
| "loss": 1.2215, |
| "num_input_tokens_seen": 2352742400, |
| "step": 35900, |
| "train_runtime": 17246.2748, |
| "train_tokens_per_second": 136420.324 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.4572449028491974, |
| "learning_rate": 0.00021661425677771965, |
| "loss": 1.2291, |
| "num_input_tokens_seen": 2359296000, |
| "step": 36000, |
| "train_runtime": 17292.1332, |
| "train_tokens_per_second": 136437.533 |
| }, |
| { |
| "epoch": 0.361, |
| "grad_norm": 0.467132568359375, |
| "learning_rate": 0.00021618743675969095, |
| "loss": 1.2295, |
| "num_input_tokens_seen": 2365849600, |
| "step": 36100, |
| "train_runtime": 17339.1599, |
| "train_tokens_per_second": 136445.457 |
| }, |
| { |
| "epoch": 0.362, |
| "grad_norm": 0.4863705635070801, |
| "learning_rate": 0.0002157599502349089, |
| "loss": 1.2154, |
| "num_input_tokens_seen": 2372403200, |
| "step": 36200, |
| "train_runtime": 17386.7454, |
| "train_tokens_per_second": 136448.952 |
| }, |
| { |
| "epoch": 0.363, |
| "grad_norm": 0.43923652172088623, |
| "learning_rate": 0.00021533180150815802, |
| "loss": 1.2268, |
| "num_input_tokens_seen": 2378956800, |
| "step": 36300, |
| "train_runtime": 17439.0785, |
| "train_tokens_per_second": 136415.282 |
| }, |
| { |
| "epoch": 0.364, |
| "grad_norm": 0.5028465390205383, |
| "learning_rate": 0.00021490299489089132, |
| "loss": 1.2293, |
| "num_input_tokens_seen": 2385510400, |
| "step": 36400, |
| "train_runtime": 17485.9662, |
| "train_tokens_per_second": 136424.283 |
| }, |
| { |
| "epoch": 0.365, |
| "grad_norm": 0.4366530478000641, |
| "learning_rate": 0.00021447353470118656, |
| "loss": 1.2276, |
| "num_input_tokens_seen": 2392064000, |
| "step": 36500, |
| "train_runtime": 17533.3809, |
| "train_tokens_per_second": 136429.136 |
| }, |
| { |
| "epoch": 0.366, |
| "grad_norm": 0.46415793895721436, |
| "learning_rate": 0.00021404342526370326, |
| "loss": 1.2227, |
| "num_input_tokens_seen": 2398617600, |
| "step": 36600, |
| "train_runtime": 17580.8443, |
| "train_tokens_per_second": 136433.584 |
| }, |
| { |
| "epoch": 0.367, |
| "grad_norm": 0.6382859349250793, |
| "learning_rate": 0.00021361267090963846, |
| "loss": 1.2212, |
| "num_input_tokens_seen": 2405171200, |
| "step": 36700, |
| "train_runtime": 17626.7905, |
| "train_tokens_per_second": 136449.753 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.6642177700996399, |
| "learning_rate": 0.0002131812759766839, |
| "loss": 1.2317, |
| "num_input_tokens_seen": 2411724800, |
| "step": 36800, |
| "train_runtime": 17679.381, |
| "train_tokens_per_second": 136414.55 |
| }, |
| { |
| "epoch": 0.369, |
| "grad_norm": 0.4071521461009979, |
| "learning_rate": 0.00021274924480898169, |
| "loss": 1.2262, |
| "num_input_tokens_seen": 2418278400, |
| "step": 36900, |
| "train_runtime": 17726.5473, |
| "train_tokens_per_second": 136421.288 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.5301467776298523, |
| "learning_rate": 0.00021231658175708087, |
| "loss": 1.2192, |
| "num_input_tokens_seen": 2424832000, |
| "step": 37000, |
| "train_runtime": 17772.7667, |
| "train_tokens_per_second": 136435.258 |
| }, |
| { |
| "epoch": 0.371, |
| "grad_norm": 0.5216257572174072, |
| "learning_rate": 0.00021188329117789357, |
| "loss": 1.213, |
| "num_input_tokens_seen": 2431385600, |
| "step": 37100, |
| "train_runtime": 17824.6083, |
| "train_tokens_per_second": 136406.116 |
| }, |
| { |
| "epoch": 0.372, |
| "grad_norm": 0.5098195672035217, |
| "learning_rate": 0.0002114493774346512, |
| "loss": 1.2311, |
| "num_input_tokens_seen": 2437939200, |
| "step": 37200, |
| "train_runtime": 17870.9901, |
| "train_tokens_per_second": 136418.81 |
| }, |
| { |
| "epoch": 0.373, |
| "grad_norm": 0.47295039892196655, |
| "learning_rate": 0.00021101484489686025, |
| "loss": 1.2211, |
| "num_input_tokens_seen": 2444492800, |
| "step": 37300, |
| "train_runtime": 17918.4906, |
| "train_tokens_per_second": 136422.919 |
| }, |
| { |
| "epoch": 0.374, |
| "grad_norm": 0.49752944707870483, |
| "learning_rate": 0.00021057969794025866, |
| "loss": 1.2292, |
| "num_input_tokens_seen": 2451046400, |
| "step": 37400, |
| "train_runtime": 17965.5373, |
| "train_tokens_per_second": 136430.453 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.9500930905342102, |
| "learning_rate": 0.00021014394094677128, |
| "loss": 1.2187, |
| "num_input_tokens_seen": 2457600000, |
| "step": 37500, |
| "train_runtime": 18012.267, |
| "train_tokens_per_second": 136440.349 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.4800110459327698, |
| "learning_rate": 0.00020970757830446633, |
| "loss": 1.2336, |
| "num_input_tokens_seen": 2464153600, |
| "step": 37600, |
| "train_runtime": 18059.6653, |
| "train_tokens_per_second": 136445.143 |
| }, |
| { |
| "epoch": 0.377, |
| "grad_norm": 0.48905813694000244, |
| "learning_rate": 0.00020927061440751072, |
| "loss": 1.2189, |
| "num_input_tokens_seen": 2470707200, |
| "step": 37700, |
| "train_runtime": 18111.7548, |
| "train_tokens_per_second": 136414.567 |
| }, |
| { |
| "epoch": 0.378, |
| "grad_norm": 0.593604564666748, |
| "learning_rate": 0.00020883305365612602, |
| "loss": 1.2178, |
| "num_input_tokens_seen": 2477260800, |
| "step": 37800, |
| "train_runtime": 18157.6424, |
| "train_tokens_per_second": 136430.751 |
| }, |
| { |
| "epoch": 0.379, |
| "grad_norm": 0.46399399638175964, |
| "learning_rate": 0.00020839490045654425, |
| "loss": 1.2141, |
| "num_input_tokens_seen": 2483814400, |
| "step": 37900, |
| "train_runtime": 18204.4326, |
| "train_tokens_per_second": 136440.089 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.5679593086242676, |
| "learning_rate": 0.00020795615922096313, |
| "loss": 1.2332, |
| "num_input_tokens_seen": 2490368000, |
| "step": 38000, |
| "train_runtime": 18252.6627, |
| "train_tokens_per_second": 136438.614 |
| }, |
| { |
| "epoch": 0.381, |
| "grad_norm": 0.48073315620422363, |
| "learning_rate": 0.00020751683436750207, |
| "loss": 1.2369, |
| "num_input_tokens_seen": 2496921600, |
| "step": 38100, |
| "train_runtime": 18300.6025, |
| "train_tokens_per_second": 136439.311 |
| }, |
| { |
| "epoch": 0.382, |
| "grad_norm": 0.4134567677974701, |
| "learning_rate": 0.00020707693032015752, |
| "loss": 1.2168, |
| "num_input_tokens_seen": 2503475200, |
| "step": 38200, |
| "train_runtime": 18351.6848, |
| "train_tokens_per_second": 136416.641 |
| }, |
| { |
| "epoch": 0.383, |
| "grad_norm": 0.4675845503807068, |
| "learning_rate": 0.00020663645150875834, |
| "loss": 1.2272, |
| "num_input_tokens_seen": 2510028800, |
| "step": 38300, |
| "train_runtime": 18398.2852, |
| "train_tokens_per_second": 136427.323 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.4632211923599243, |
| "learning_rate": 0.00020619540236892125, |
| "loss": 1.2444, |
| "num_input_tokens_seen": 2516582400, |
| "step": 38400, |
| "train_runtime": 18445.2271, |
| "train_tokens_per_second": 136435.425 |
| }, |
| { |
| "epoch": 0.385, |
| "grad_norm": 0.5543389916419983, |
| "learning_rate": 0.00020575378734200616, |
| "loss": 1.22, |
| "num_input_tokens_seen": 2523136000, |
| "step": 38500, |
| "train_runtime": 18492.3307, |
| "train_tokens_per_second": 136442.292 |
| }, |
| { |
| "epoch": 0.386, |
| "grad_norm": 0.5775281190872192, |
| "learning_rate": 0.0002053116108750715, |
| "loss": 1.2277, |
| "num_input_tokens_seen": 2529689600, |
| "step": 38600, |
| "train_runtime": 18544.2017, |
| "train_tokens_per_second": 136414.047 |
| }, |
| { |
| "epoch": 0.387, |
| "grad_norm": 0.5202789306640625, |
| "learning_rate": 0.0002048688774208294, |
| "loss": 1.2203, |
| "num_input_tokens_seen": 2536243200, |
| "step": 38700, |
| "train_runtime": 18591.8641, |
| "train_tokens_per_second": 136416.832 |
| }, |
| { |
| "epoch": 0.388, |
| "grad_norm": 0.44833704829216003, |
| "learning_rate": 0.0002044255914376009, |
| "loss": 1.2209, |
| "num_input_tokens_seen": 2542796800, |
| "step": 38800, |
| "train_runtime": 18637.8905, |
| "train_tokens_per_second": 136431.577 |
| }, |
| { |
| "epoch": 0.389, |
| "grad_norm": 0.5180789828300476, |
| "learning_rate": 0.00020398175738927082, |
| "loss": 1.2105, |
| "num_input_tokens_seen": 2549350400, |
| "step": 38900, |
| "train_runtime": 18684.0663, |
| "train_tokens_per_second": 136445.159 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.6083468794822693, |
| "learning_rate": 0.00020353737974524312, |
| "loss": 1.2136, |
| "num_input_tokens_seen": 2555904000, |
| "step": 39000, |
| "train_runtime": 18730.572, |
| "train_tokens_per_second": 136456.27 |
| }, |
| { |
| "epoch": 0.391, |
| "grad_norm": 0.39693883061408997, |
| "learning_rate": 0.00020309246298039584, |
| "loss": 1.2285, |
| "num_input_tokens_seen": 2562457600, |
| "step": 39100, |
| "train_runtime": 18784.1544, |
| "train_tokens_per_second": 136415.914 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.5166248679161072, |
| "learning_rate": 0.0002026470115750357, |
| "loss": 1.223, |
| "num_input_tokens_seen": 2569011200, |
| "step": 39200, |
| "train_runtime": 18830.687, |
| "train_tokens_per_second": 136426.844 |
| }, |
| { |
| "epoch": 0.393, |
| "grad_norm": 0.4967111051082611, |
| "learning_rate": 0.0002022010300148535, |
| "loss": 1.2163, |
| "num_input_tokens_seen": 2575564800, |
| "step": 39300, |
| "train_runtime": 18876.8963, |
| "train_tokens_per_second": 136440.057 |
| }, |
| { |
| "epoch": 0.394, |
| "grad_norm": 0.627816915512085, |
| "learning_rate": 0.0002017545227908786, |
| "loss": 1.2328, |
| "num_input_tokens_seen": 2582118400, |
| "step": 39400, |
| "train_runtime": 18923.6736, |
| "train_tokens_per_second": 136449.109 |
| }, |
| { |
| "epoch": 0.395, |
| "grad_norm": 0.489969938993454, |
| "learning_rate": 0.00020130749439943376, |
| "loss": 1.224, |
| "num_input_tokens_seen": 2588672000, |
| "step": 39500, |
| "train_runtime": 18970.0964, |
| "train_tokens_per_second": 136460.666 |
| }, |
| { |
| "epoch": 0.396, |
| "grad_norm": 0.6713995933532715, |
| "learning_rate": 0.00020085994934208998, |
| "loss": 1.2156, |
| "num_input_tokens_seen": 2595225600, |
| "step": 39600, |
| "train_runtime": 19023.1241, |
| "train_tokens_per_second": 136424.784 |
| }, |
| { |
| "epoch": 0.397, |
| "grad_norm": 0.4549367427825928, |
| "learning_rate": 0.00020041189212562094, |
| "loss": 1.2094, |
| "num_input_tokens_seen": 2601779200, |
| "step": 39700, |
| "train_runtime": 19070.6234, |
| "train_tokens_per_second": 136428.639 |
| }, |
| { |
| "epoch": 0.398, |
| "grad_norm": 0.47548773884773254, |
| "learning_rate": 0.0001999633272619579, |
| "loss": 1.2244, |
| "num_input_tokens_seen": 2608332800, |
| "step": 39800, |
| "train_runtime": 19117.4992, |
| "train_tokens_per_second": 136436.925 |
| }, |
| { |
| "epoch": 0.399, |
| "grad_norm": 0.46569159626960754, |
| "learning_rate": 0.00019951425926814404, |
| "loss": 1.2189, |
| "num_input_tokens_seen": 2614886400, |
| "step": 39900, |
| "train_runtime": 19164.3173, |
| "train_tokens_per_second": 136445.581 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5518438220024109, |
| "learning_rate": 0.00019906469266628904, |
| "loss": 1.2097, |
| "num_input_tokens_seen": 2621440000, |
| "step": 40000, |
| "train_runtime": 19211.1586, |
| "train_tokens_per_second": 136454.029 |
| }, |
| { |
| "epoch": 0.401, |
| "grad_norm": 0.4615115821361542, |
| "learning_rate": 0.0001986146319835236, |
| "loss": 1.2177, |
| "num_input_tokens_seen": 2627993600, |
| "step": 40100, |
| "train_runtime": 19263.5816, |
| "train_tokens_per_second": 136422.897 |
| }, |
| { |
| "epoch": 0.402, |
| "grad_norm": 0.4154411554336548, |
| "learning_rate": 0.00019816408175195383, |
| "loss": 1.2262, |
| "num_input_tokens_seen": 2634547200, |
| "step": 40200, |
| "train_runtime": 19310.6242, |
| "train_tokens_per_second": 136429.935 |
| }, |
| { |
| "epoch": 0.403, |
| "grad_norm": 0.48504838347435, |
| "learning_rate": 0.0001977130465086155, |
| "loss": 1.2205, |
| "num_input_tokens_seen": 2641100800, |
| "step": 40300, |
| "train_runtime": 19356.9428, |
| "train_tokens_per_second": 136442.042 |
| }, |
| { |
| "epoch": 0.404, |
| "grad_norm": 0.477006196975708, |
| "learning_rate": 0.0001972615307954286, |
| "loss": 1.2099, |
| "num_input_tokens_seen": 2647654400, |
| "step": 40400, |
| "train_runtime": 19403.4467, |
| "train_tokens_per_second": 136452.788 |
| }, |
| { |
| "epoch": 0.405, |
| "grad_norm": 0.46401214599609375, |
| "learning_rate": 0.00019680953915915124, |
| "loss": 1.2142, |
| "num_input_tokens_seen": 2654208000, |
| "step": 40500, |
| "train_runtime": 19456.0604, |
| "train_tokens_per_second": 136420.629 |
| }, |
| { |
| "epoch": 0.406, |
| "grad_norm": 0.4205267131328583, |
| "learning_rate": 0.00019635707615133427, |
| "loss": 1.2233, |
| "num_input_tokens_seen": 2660761600, |
| "step": 40600, |
| "train_runtime": 19503.129, |
| "train_tokens_per_second": 136427.422 |
| }, |
| { |
| "epoch": 0.407, |
| "grad_norm": 0.7298253178596497, |
| "learning_rate": 0.00019590414632827513, |
| "loss": 1.2143, |
| "num_input_tokens_seen": 2667315200, |
| "step": 40700, |
| "train_runtime": 19550.1113, |
| "train_tokens_per_second": 136434.783 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.47734642028808594, |
| "learning_rate": 0.00019545075425097204, |
| "loss": 1.222, |
| "num_input_tokens_seen": 2673868800, |
| "step": 40800, |
| "train_runtime": 19596.9887, |
| "train_tokens_per_second": 136442.84 |
| }, |
| { |
| "epoch": 0.409, |
| "grad_norm": 0.4535351097583771, |
| "learning_rate": 0.00019499690448507827, |
| "loss": 1.2373, |
| "num_input_tokens_seen": 2680422400, |
| "step": 40900, |
| "train_runtime": 19649.1805, |
| "train_tokens_per_second": 136413.954 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.572079062461853, |
| "learning_rate": 0.00019454260160085588, |
| "loss": 1.2125, |
| "num_input_tokens_seen": 2686976000, |
| "step": 41000, |
| "train_runtime": 19697.7854, |
| "train_tokens_per_second": 136410.056 |
| }, |
| { |
| "epoch": 0.411, |
| "grad_norm": 0.4487378001213074, |
| "learning_rate": 0.0001940878501731299, |
| "loss": 1.2124, |
| "num_input_tokens_seen": 2693529600, |
| "step": 41100, |
| "train_runtime": 19744.9135, |
| "train_tokens_per_second": 136416.379 |
| }, |
| { |
| "epoch": 0.412, |
| "grad_norm": 0.47419917583465576, |
| "learning_rate": 0.00019363265478124214, |
| "loss": 1.2037, |
| "num_input_tokens_seen": 2700083200, |
| "step": 41200, |
| "train_runtime": 19791.8314, |
| "train_tokens_per_second": 136424.121 |
| }, |
| { |
| "epoch": 0.413, |
| "grad_norm": 0.6295040845870972, |
| "learning_rate": 0.00019317702000900516, |
| "loss": 1.2246, |
| "num_input_tokens_seen": 2706636800, |
| "step": 41300, |
| "train_runtime": 19838.5236, |
| "train_tokens_per_second": 136433.379 |
| }, |
| { |
| "epoch": 0.414, |
| "grad_norm": 0.53326016664505, |
| "learning_rate": 0.000192720950444656, |
| "loss": 1.2192, |
| "num_input_tokens_seen": 2713190400, |
| "step": 41400, |
| "train_runtime": 19885.4264, |
| "train_tokens_per_second": 136441.147 |
| }, |
| { |
| "epoch": 0.415, |
| "grad_norm": 0.49727046489715576, |
| "learning_rate": 0.00019226445068081018, |
| "loss": 1.2279, |
| "num_input_tokens_seen": 2719744000, |
| "step": 41500, |
| "train_runtime": 19937.4737, |
| "train_tokens_per_second": 136413.672 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.47963398694992065, |
| "learning_rate": 0.00019180752531441523, |
| "loss": 1.2226, |
| "num_input_tokens_seen": 2726297600, |
| "step": 41600, |
| "train_runtime": 19984.6667, |
| "train_tokens_per_second": 136419.468 |
| }, |
| { |
| "epoch": 0.417, |
| "grad_norm": 0.4789304733276367, |
| "learning_rate": 0.00019135017894670456, |
| "loss": 1.2222, |
| "num_input_tokens_seen": 2732851200, |
| "step": 41700, |
| "train_runtime": 20032.7071, |
| "train_tokens_per_second": 136419.465 |
| }, |
| { |
| "epoch": 0.418, |
| "grad_norm": 0.6693325638771057, |
| "learning_rate": 0.0001908924161831509, |
| "loss": 1.2366, |
| "num_input_tokens_seen": 2739404800, |
| "step": 41800, |
| "train_runtime": 20078.7138, |
| "train_tokens_per_second": 136433.281 |
| }, |
| { |
| "epoch": 0.419, |
| "grad_norm": 0.41989439725875854, |
| "learning_rate": 0.0001904342416334203, |
| "loss": 1.2212, |
| "num_input_tokens_seen": 2745958400, |
| "step": 41900, |
| "train_runtime": 20125.0521, |
| "train_tokens_per_second": 136444.785 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.5444014072418213, |
| "learning_rate": 0.00018997565991132532, |
| "loss": 1.2164, |
| "num_input_tokens_seen": 2752512000, |
| "step": 42000, |
| "train_runtime": 20177.4596, |
| "train_tokens_per_second": 136415.191 |
| }, |
| { |
| "epoch": 0.421, |
| "grad_norm": 0.5790873169898987, |
| "learning_rate": 0.0001895166756347789, |
| "loss": 1.215, |
| "num_input_tokens_seen": 2759065600, |
| "step": 42100, |
| "train_runtime": 20224.878, |
| "train_tokens_per_second": 136419.394 |
| }, |
| { |
| "epoch": 0.422, |
| "grad_norm": 0.4666343927383423, |
| "learning_rate": 0.0001890572934257475, |
| "loss": 1.2229, |
| "num_input_tokens_seen": 2765619200, |
| "step": 42200, |
| "train_runtime": 20270.922, |
| "train_tokens_per_second": 136432.827 |
| }, |
| { |
| "epoch": 0.423, |
| "grad_norm": 0.4322357177734375, |
| "learning_rate": 0.00018859751791020497, |
| "loss": 1.2258, |
| "num_input_tokens_seen": 2772172800, |
| "step": 42300, |
| "train_runtime": 20317.4494, |
| "train_tokens_per_second": 136442.954 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.6240208148956299, |
| "learning_rate": 0.0001881373537180856, |
| "loss": 1.221, |
| "num_input_tokens_seen": 2778726400, |
| "step": 42400, |
| "train_runtime": 20364.5753, |
| "train_tokens_per_second": 136449.023 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 0.5865579843521118, |
| "learning_rate": 0.00018767680548323766, |
| "loss": 1.2244, |
| "num_input_tokens_seen": 2785280000, |
| "step": 42500, |
| "train_runtime": 20417.9029, |
| "train_tokens_per_second": 136413.617 |
| }, |
| { |
| "epoch": 0.426, |
| "grad_norm": 0.5201649069786072, |
| "learning_rate": 0.0001872158778433768, |
| "loss": 1.2076, |
| "num_input_tokens_seen": 2791833600, |
| "step": 42600, |
| "train_runtime": 20464.7135, |
| "train_tokens_per_second": 136421.827 |
| }, |
| { |
| "epoch": 0.427, |
| "grad_norm": 0.5092735290527344, |
| "learning_rate": 0.0001867545754400392, |
| "loss": 1.2057, |
| "num_input_tokens_seen": 2798387200, |
| "step": 42700, |
| "train_runtime": 20511.0273, |
| "train_tokens_per_second": 136433.303 |
| }, |
| { |
| "epoch": 0.428, |
| "grad_norm": 0.4439486265182495, |
| "learning_rate": 0.000186292902918535, |
| "loss": 1.209, |
| "num_input_tokens_seen": 2804940800, |
| "step": 42800, |
| "train_runtime": 20558.3684, |
| "train_tokens_per_second": 136437.909 |
| }, |
| { |
| "epoch": 0.429, |
| "grad_norm": 0.4466177225112915, |
| "learning_rate": 0.00018583086492790136, |
| "loss": 1.218, |
| "num_input_tokens_seen": 2811494400, |
| "step": 42900, |
| "train_runtime": 20605.5543, |
| "train_tokens_per_second": 136443.522 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.5813594460487366, |
| "learning_rate": 0.00018536846612085566, |
| "loss": 1.2161, |
| "num_input_tokens_seen": 2818048000, |
| "step": 43000, |
| "train_runtime": 20658.6134, |
| "train_tokens_per_second": 136410.317 |
| }, |
| { |
| "epoch": 0.431, |
| "grad_norm": 0.49140629172325134, |
| "learning_rate": 0.00018490571115374878, |
| "loss": 1.227, |
| "num_input_tokens_seen": 2824601600, |
| "step": 43100, |
| "train_runtime": 20705.6255, |
| "train_tokens_per_second": 136417.11 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.4938826858997345, |
| "learning_rate": 0.00018444260468651816, |
| "loss": 1.2252, |
| "num_input_tokens_seen": 2831155200, |
| "step": 43200, |
| "train_runtime": 20752.3571, |
| "train_tokens_per_second": 136425.717 |
| }, |
| { |
| "epoch": 0.433, |
| "grad_norm": 0.5228791832923889, |
| "learning_rate": 0.00018397915138264068, |
| "loss": 1.2274, |
| "num_input_tokens_seen": 2837708800, |
| "step": 43300, |
| "train_runtime": 20799.4436, |
| "train_tokens_per_second": 136431.957 |
| }, |
| { |
| "epoch": 0.434, |
| "grad_norm": 0.46896296739578247, |
| "learning_rate": 0.00018351535590908606, |
| "loss": 1.2043, |
| "num_input_tokens_seen": 2844262400, |
| "step": 43400, |
| "train_runtime": 20845.6184, |
| "train_tokens_per_second": 136444.137 |
| }, |
| { |
| "epoch": 0.435, |
| "grad_norm": 0.4269004464149475, |
| "learning_rate": 0.00018305122293626948, |
| "loss": 1.2213, |
| "num_input_tokens_seen": 2850816000, |
| "step": 43500, |
| "train_runtime": 20897.7485, |
| "train_tokens_per_second": 136417.375 |
| }, |
| { |
| "epoch": 0.436, |
| "grad_norm": 0.6213890314102173, |
| "learning_rate": 0.00018258675713800492, |
| "loss": 1.2096, |
| "num_input_tokens_seen": 2857369600, |
| "step": 43600, |
| "train_runtime": 20944.9642, |
| "train_tokens_per_second": 136422.749 |
| }, |
| { |
| "epoch": 0.437, |
| "grad_norm": 0.4281384348869324, |
| "learning_rate": 0.00018212196319145773, |
| "loss": 1.2111, |
| "num_input_tokens_seen": 2863923200, |
| "step": 43700, |
| "train_runtime": 20992.0443, |
| "train_tokens_per_second": 136428.98 |
| }, |
| { |
| "epoch": 0.438, |
| "grad_norm": 1.044310212135315, |
| "learning_rate": 0.00018165684577709778, |
| "loss": 1.2142, |
| "num_input_tokens_seen": 2870476800, |
| "step": 43800, |
| "train_runtime": 21039.718, |
| "train_tokens_per_second": 136431.334 |
| }, |
| { |
| "epoch": 0.439, |
| "grad_norm": 0.445425808429718, |
| "learning_rate": 0.0001811914095786524, |
| "loss": 1.218, |
| "num_input_tokens_seen": 2877030400, |
| "step": 43900, |
| "train_runtime": 21088.215, |
| "train_tokens_per_second": 136428.351 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.43947216868400574, |
| "learning_rate": 0.0001807256592830588, |
| "loss": 1.2124, |
| "num_input_tokens_seen": 2883584000, |
| "step": 44000, |
| "train_runtime": 21136.0286, |
| "train_tokens_per_second": 136429.793 |
| }, |
| { |
| "epoch": 0.441, |
| "grad_norm": 0.5147203803062439, |
| "learning_rate": 0.00018025959958041732, |
| "loss": 1.2227, |
| "num_input_tokens_seen": 2890137600, |
| "step": 44100, |
| "train_runtime": 21182.9913, |
| "train_tokens_per_second": 136436.708 |
| }, |
| { |
| "epoch": 0.442, |
| "grad_norm": 0.473652184009552, |
| "learning_rate": 0.00017979323516394407, |
| "loss": 1.2277, |
| "num_input_tokens_seen": 2896691200, |
| "step": 44200, |
| "train_runtime": 21236.5796, |
| "train_tokens_per_second": 136401.024 |
| }, |
| { |
| "epoch": 0.443, |
| "grad_norm": 0.4356568157672882, |
| "learning_rate": 0.00017932657072992344, |
| "loss": 1.2018, |
| "num_input_tokens_seen": 2903244800, |
| "step": 44300, |
| "train_runtime": 21282.9387, |
| "train_tokens_per_second": 136411.838 |
| }, |
| { |
| "epoch": 0.444, |
| "grad_norm": 0.4458017647266388, |
| "learning_rate": 0.00017885961097766117, |
| "loss": 1.2124, |
| "num_input_tokens_seen": 2909798400, |
| "step": 44400, |
| "train_runtime": 21331.1223, |
| "train_tokens_per_second": 136410.938 |
| }, |
| { |
| "epoch": 0.445, |
| "grad_norm": 0.5065773725509644, |
| "learning_rate": 0.00017839236060943674, |
| "loss": 1.2262, |
| "num_input_tokens_seen": 2916352000, |
| "step": 44500, |
| "train_runtime": 21377.5493, |
| "train_tokens_per_second": 136421.25 |
| }, |
| { |
| "epoch": 0.446, |
| "grad_norm": 0.5424425601959229, |
| "learning_rate": 0.0001779248243304562, |
| "loss": 1.2171, |
| "num_input_tokens_seen": 2922905600, |
| "step": 44600, |
| "train_runtime": 21424.9021, |
| "train_tokens_per_second": 136425.622 |
| }, |
| { |
| "epoch": 0.447, |
| "grad_norm": 0.4595748484134674, |
| "learning_rate": 0.00017745700684880465, |
| "loss": 1.2039, |
| "num_input_tokens_seen": 2929459200, |
| "step": 44700, |
| "train_runtime": 21472.2167, |
| "train_tokens_per_second": 136430.218 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.5353960990905762, |
| "learning_rate": 0.000176988912875399, |
| "loss": 1.2075, |
| "num_input_tokens_seen": 2936012800, |
| "step": 44800, |
| "train_runtime": 21524.5148, |
| "train_tokens_per_second": 136403.205 |
| }, |
| { |
| "epoch": 0.449, |
| "grad_norm": 0.4949302673339844, |
| "learning_rate": 0.00017652054712394028, |
| "loss": 1.2174, |
| "num_input_tokens_seen": 2942566400, |
| "step": 44900, |
| "train_runtime": 21571.6626, |
| "train_tokens_per_second": 136408.883 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.5596060752868652, |
| "learning_rate": 0.0001760519143108665, |
| "loss": 1.2178, |
| "num_input_tokens_seen": 2949120000, |
| "step": 45000, |
| "train_runtime": 21618.3195, |
| "train_tokens_per_second": 136417.634 |
| }, |
| { |
| "epoch": 0.451, |
| "grad_norm": 0.5348083972930908, |
| "learning_rate": 0.00017558301915530483, |
| "loss": 1.215, |
| "num_input_tokens_seen": 2955673600, |
| "step": 45100, |
| "train_runtime": 21666.1069, |
| "train_tokens_per_second": 136419.229 |
| }, |
| { |
| "epoch": 0.452, |
| "grad_norm": 0.46748441457748413, |
| "learning_rate": 0.00017511386637902428, |
| "loss": 1.2104, |
| "num_input_tokens_seen": 2962227200, |
| "step": 45200, |
| "train_runtime": 21713.1957, |
| "train_tokens_per_second": 136425.206 |
| }, |
| { |
| "epoch": 0.453, |
| "grad_norm": 0.47188806533813477, |
| "learning_rate": 0.00017464446070638814, |
| "loss": 1.213, |
| "num_input_tokens_seen": 2968780800, |
| "step": 45300, |
| "train_runtime": 21760.1393, |
| "train_tokens_per_second": 136432.068 |
| }, |
| { |
| "epoch": 0.454, |
| "grad_norm": 0.5225762128829956, |
| "learning_rate": 0.00017417480686430622, |
| "loss": 1.2152, |
| "num_input_tokens_seen": 2975334400, |
| "step": 45400, |
| "train_runtime": 21812.7666, |
| "train_tokens_per_second": 136403.348 |
| }, |
| { |
| "epoch": 0.455, |
| "grad_norm": 0.5889186263084412, |
| "learning_rate": 0.00017370490958218765, |
| "loss": 1.2214, |
| "num_input_tokens_seen": 2981888000, |
| "step": 45500, |
| "train_runtime": 21859.0263, |
| "train_tokens_per_second": 136414.493 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.6613258719444275, |
| "learning_rate": 0.00017323477359189272, |
| "loss": 1.2334, |
| "num_input_tokens_seen": 2988441600, |
| "step": 45600, |
| "train_runtime": 21905.9003, |
| "train_tokens_per_second": 136421.766 |
| }, |
| { |
| "epoch": 0.457, |
| "grad_norm": 0.4657646715641022, |
| "learning_rate": 0.00017276440362768564, |
| "loss": 1.2132, |
| "num_input_tokens_seen": 2994995200, |
| "step": 45700, |
| "train_runtime": 21952.9851, |
| "train_tokens_per_second": 136427.697 |
| }, |
| { |
| "epoch": 0.458, |
| "grad_norm": 0.8410550355911255, |
| "learning_rate": 0.0001722938044261868, |
| "loss": 1.2073, |
| "num_input_tokens_seen": 3001548800, |
| "step": 45800, |
| "train_runtime": 22005.352, |
| "train_tokens_per_second": 136400.854 |
| }, |
| { |
| "epoch": 0.459, |
| "grad_norm": 0.7687750458717346, |
| "learning_rate": 0.0001718229807263249, |
| "loss": 1.2116, |
| "num_input_tokens_seen": 3008102400, |
| "step": 45900, |
| "train_runtime": 22051.2762, |
| "train_tokens_per_second": 136413.982 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.40700653195381165, |
| "learning_rate": 0.0001713519372692894, |
| "loss": 1.2082, |
| "num_input_tokens_seen": 3014656000, |
| "step": 46000, |
| "train_runtime": 22102.8898, |
| "train_tokens_per_second": 136391.939 |
| }, |
| { |
| "epoch": 0.461, |
| "grad_norm": 0.44239944219589233, |
| "learning_rate": 0.0001708806787984826, |
| "loss": 1.2177, |
| "num_input_tokens_seen": 3021209600, |
| "step": 46100, |
| "train_runtime": 22149.1222, |
| "train_tokens_per_second": 136403.13 |
| }, |
| { |
| "epoch": 0.462, |
| "grad_norm": 0.4981868267059326, |
| "learning_rate": 0.00017040921005947212, |
| "loss": 1.2073, |
| "num_input_tokens_seen": 3027763200, |
| "step": 46200, |
| "train_runtime": 22195.5009, |
| "train_tokens_per_second": 136413.376 |
| }, |
| { |
| "epoch": 0.463, |
| "grad_norm": 0.5651112198829651, |
| "learning_rate": 0.0001699375357999429, |
| "loss": 1.2098, |
| "num_input_tokens_seen": 3034316800, |
| "step": 46300, |
| "train_runtime": 22241.367, |
| "train_tokens_per_second": 136426.722 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 1.1314237117767334, |
| "learning_rate": 0.0001694656607696496, |
| "loss": 1.2335, |
| "num_input_tokens_seen": 3040870400, |
| "step": 46400, |
| "train_runtime": 22294.8896, |
| "train_tokens_per_second": 136393.158 |
| }, |
| { |
| "epoch": 0.465, |
| "grad_norm": 0.568980872631073, |
| "learning_rate": 0.0001689935897203684, |
| "loss": 1.2096, |
| "num_input_tokens_seen": 3047424000, |
| "step": 46500, |
| "train_runtime": 22342.7849, |
| "train_tokens_per_second": 136394.098 |
| }, |
| { |
| "epoch": 0.466, |
| "grad_norm": 0.7110226154327393, |
| "learning_rate": 0.0001685213274058496, |
| "loss": 1.2136, |
| "num_input_tokens_seen": 3053977600, |
| "step": 46600, |
| "train_runtime": 22393.3193, |
| "train_tokens_per_second": 136378.96 |
| }, |
| { |
| "epoch": 0.467, |
| "grad_norm": 0.5052018761634827, |
| "learning_rate": 0.00016804887858176944, |
| "loss": 1.2237, |
| "num_input_tokens_seen": 3060531200, |
| "step": 46700, |
| "train_runtime": 22441.2606, |
| "train_tokens_per_second": 136379.647 |
| }, |
| { |
| "epoch": 0.468, |
| "grad_norm": 0.4663156270980835, |
| "learning_rate": 0.00016757624800568238, |
| "loss": 1.2071, |
| "num_input_tokens_seen": 3067084800, |
| "step": 46800, |
| "train_runtime": 22487.9084, |
| "train_tokens_per_second": 136388.175 |
| }, |
| { |
| "epoch": 0.469, |
| "grad_norm": 0.5441033840179443, |
| "learning_rate": 0.00016710344043697301, |
| "loss": 1.2078, |
| "num_input_tokens_seen": 3073638400, |
| "step": 46900, |
| "train_runtime": 22534.6023, |
| "train_tokens_per_second": 136396.39 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.4578142464160919, |
| "learning_rate": 0.0001666304606368083, |
| "loss": 1.1956, |
| "num_input_tokens_seen": 3080192000, |
| "step": 47000, |
| "train_runtime": 22587.0441, |
| "train_tokens_per_second": 136369.858 |
| }, |
| { |
| "epoch": 0.471, |
| "grad_norm": 0.6252749562263489, |
| "learning_rate": 0.00016615731336808962, |
| "loss": 1.1911, |
| "num_input_tokens_seen": 3086745600, |
| "step": 47100, |
| "train_runtime": 22634.7186, |
| "train_tokens_per_second": 136372.166 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.45418813824653625, |
| "learning_rate": 0.0001656840033954047, |
| "loss": 1.22, |
| "num_input_tokens_seen": 3093299200, |
| "step": 47200, |
| "train_runtime": 22681.221, |
| "train_tokens_per_second": 136381.511 |
| }, |
| { |
| "epoch": 0.473, |
| "grad_norm": 0.55946284532547, |
| "learning_rate": 0.00016521053548497973, |
| "loss": 1.2073, |
| "num_input_tokens_seen": 3099852800, |
| "step": 47300, |
| "train_runtime": 22728.7635, |
| "train_tokens_per_second": 136384.577 |
| }, |
| { |
| "epoch": 0.474, |
| "grad_norm": 0.508859395980835, |
| "learning_rate": 0.0001647369144046313, |
| "loss": 1.1957, |
| "num_input_tokens_seen": 3106406400, |
| "step": 47400, |
| "train_runtime": 22775.8652, |
| "train_tokens_per_second": 136390.27 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 0.5557622313499451, |
| "learning_rate": 0.00016426314492371842, |
| "loss": 1.1996, |
| "num_input_tokens_seen": 3112960000, |
| "step": 47500, |
| "train_runtime": 22823.5391, |
| "train_tokens_per_second": 136392.519 |
| }, |
| { |
| "epoch": 0.476, |
| "grad_norm": 0.5686858296394348, |
| "learning_rate": 0.0001637892318130945, |
| "loss": 1.201, |
| "num_input_tokens_seen": 3119513600, |
| "step": 47600, |
| "train_runtime": 22875.0526, |
| "train_tokens_per_second": 136371.866 |
| }, |
| { |
| "epoch": 0.477, |
| "grad_norm": 0.47568413615226746, |
| "learning_rate": 0.00016331517984505934, |
| "loss": 1.2132, |
| "num_input_tokens_seen": 3126067200, |
| "step": 47700, |
| "train_runtime": 22923.2754, |
| "train_tokens_per_second": 136370.878 |
| }, |
| { |
| "epoch": 0.478, |
| "grad_norm": 0.40612325072288513, |
| "learning_rate": 0.00016284099379331092, |
| "loss": 1.2085, |
| "num_input_tokens_seen": 3132620800, |
| "step": 47800, |
| "train_runtime": 22970.2831, |
| "train_tokens_per_second": 136377.109 |
| }, |
| { |
| "epoch": 0.479, |
| "grad_norm": 0.491755872964859, |
| "learning_rate": 0.00016236667843289759, |
| "loss": 1.206, |
| "num_input_tokens_seen": 3139174400, |
| "step": 47900, |
| "train_runtime": 23016.8676, |
| "train_tokens_per_second": 136385.821 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.2421867847442627, |
| "learning_rate": 0.00016189223854016973, |
| "loss": 1.1991, |
| "num_input_tokens_seen": 3145728000, |
| "step": 48000, |
| "train_runtime": 23070.1067, |
| "train_tokens_per_second": 136355.156 |
| }, |
| { |
| "epoch": 0.481, |
| "grad_norm": 0.44709935784339905, |
| "learning_rate": 0.00016141767889273182, |
| "loss": 1.1987, |
| "num_input_tokens_seen": 3152281600, |
| "step": 48100, |
| "train_runtime": 23117.6704, |
| "train_tokens_per_second": 136358.1 |
| }, |
| { |
| "epoch": 0.482, |
| "grad_norm": 0.6956078410148621, |
| "learning_rate": 0.00016094300426939417, |
| "loss": 1.206, |
| "num_input_tokens_seen": 3158835200, |
| "step": 48200, |
| "train_runtime": 23164.6084, |
| "train_tokens_per_second": 136364.714 |
| }, |
| { |
| "epoch": 0.483, |
| "grad_norm": 0.4756148159503937, |
| "learning_rate": 0.00016046821945012505, |
| "loss": 1.213, |
| "num_input_tokens_seen": 3165388800, |
| "step": 48300, |
| "train_runtime": 23212.4256, |
| "train_tokens_per_second": 136366.137 |
| }, |
| { |
| "epoch": 0.484, |
| "grad_norm": 0.4668136239051819, |
| "learning_rate": 0.00015999332921600226, |
| "loss": 1.2027, |
| "num_input_tokens_seen": 3171942400, |
| "step": 48400, |
| "train_runtime": 23260.1957, |
| "train_tokens_per_second": 136367.829 |
| }, |
| { |
| "epoch": 0.485, |
| "grad_norm": 0.48166415095329285, |
| "learning_rate": 0.00015951833834916532, |
| "loss": 1.1885, |
| "num_input_tokens_seen": 3178496000, |
| "step": 48500, |
| "train_runtime": 23308.4042, |
| "train_tokens_per_second": 136366.951 |
| }, |
| { |
| "epoch": 0.486, |
| "grad_norm": 1.4835230112075806, |
| "learning_rate": 0.00015904325163276672, |
| "loss": 1.2144, |
| "num_input_tokens_seen": 3185049600, |
| "step": 48600, |
| "train_runtime": 23355.0119, |
| "train_tokens_per_second": 136375.422 |
| }, |
| { |
| "epoch": 0.487, |
| "grad_norm": 0.47993043065071106, |
| "learning_rate": 0.00015856807385092466, |
| "loss": 1.2092, |
| "num_input_tokens_seen": 3191603200, |
| "step": 48700, |
| "train_runtime": 23408.2289, |
| "train_tokens_per_second": 136345.352 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.4617721736431122, |
| "learning_rate": 0.00015809280978867405, |
| "loss": 1.2079, |
| "num_input_tokens_seen": 3198156800, |
| "step": 48800, |
| "train_runtime": 23456.3091, |
| "train_tokens_per_second": 136345.27 |
| }, |
| { |
| "epoch": 0.489, |
| "grad_norm": 0.4698822796344757, |
| "learning_rate": 0.0001576174642319187, |
| "loss": 1.2221, |
| "num_input_tokens_seen": 3204710400, |
| "step": 48900, |
| "train_runtime": 23502.92, |
| "train_tokens_per_second": 136353.713 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.5454009771347046, |
| "learning_rate": 0.0001571420419673831, |
| "loss": 1.201, |
| "num_input_tokens_seen": 3211264000, |
| "step": 49000, |
| "train_runtime": 23550.5868, |
| "train_tokens_per_second": 136356.008 |
| }, |
| { |
| "epoch": 0.491, |
| "grad_norm": 0.9021556973457336, |
| "learning_rate": 0.0001566665477825642, |
| "loss": 1.2047, |
| "num_input_tokens_seen": 3217817600, |
| "step": 49100, |
| "train_runtime": 23597.4655, |
| "train_tokens_per_second": 136362.848 |
| }, |
| { |
| "epoch": 0.492, |
| "grad_norm": 0.4959240257740021, |
| "learning_rate": 0.0001561909864656831, |
| "loss": 1.2042, |
| "num_input_tokens_seen": 3224371200, |
| "step": 49200, |
| "train_runtime": 23650.6048, |
| "train_tokens_per_second": 136333.562 |
| }, |
| { |
| "epoch": 0.493, |
| "grad_norm": 0.554251492023468, |
| "learning_rate": 0.00015571536280563705, |
| "loss": 1.2163, |
| "num_input_tokens_seen": 3230924800, |
| "step": 49300, |
| "train_runtime": 23697.3685, |
| "train_tokens_per_second": 136341.079 |
| }, |
| { |
| "epoch": 0.494, |
| "grad_norm": 0.5000952482223511, |
| "learning_rate": 0.000155239681591951, |
| "loss": 1.2086, |
| "num_input_tokens_seen": 3237478400, |
| "step": 49400, |
| "train_runtime": 23745.4988, |
| "train_tokens_per_second": 136340.72 |
| }, |
| { |
| "epoch": 0.495, |
| "grad_norm": 0.7438832521438599, |
| "learning_rate": 0.00015476394761472953, |
| "loss": 1.1999, |
| "num_input_tokens_seen": 3244032000, |
| "step": 49500, |
| "train_runtime": 23793.3349, |
| "train_tokens_per_second": 136342.048 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.4872761368751526, |
| "learning_rate": 0.00015428816566460843, |
| "loss": 1.194, |
| "num_input_tokens_seen": 3250585600, |
| "step": 49600, |
| "train_runtime": 23839.649, |
| "train_tokens_per_second": 136352.074 |
| }, |
| { |
| "epoch": 0.497, |
| "grad_norm": 0.48635321855545044, |
| "learning_rate": 0.00015381234053270669, |
| "loss": 1.1957, |
| "num_input_tokens_seen": 3257139200, |
| "step": 49700, |
| "train_runtime": 23886.4418, |
| "train_tokens_per_second": 136359.33 |
| }, |
| { |
| "epoch": 0.498, |
| "grad_norm": 0.899361252784729, |
| "learning_rate": 0.0001533364770105781, |
| "loss": 1.201, |
| "num_input_tokens_seen": 3263692800, |
| "step": 49800, |
| "train_runtime": 23933.6337, |
| "train_tokens_per_second": 136364.283 |
| }, |
| { |
| "epoch": 0.499, |
| "grad_norm": 0.5460925698280334, |
| "learning_rate": 0.0001528605798901631, |
| "loss": 1.2086, |
| "num_input_tokens_seen": 3270246400, |
| "step": 49900, |
| "train_runtime": 23985.6033, |
| "train_tokens_per_second": 136342.053 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.4763907194137573, |
| "learning_rate": 0.00015238465396374027, |
| "loss": 1.1987, |
| "num_input_tokens_seen": 3276800000, |
| "step": 50000, |
| "train_runtime": 24033.0829, |
| "train_tokens_per_second": 136345.388 |
| }, |
| { |
| "epoch": 0.501, |
| "grad_norm": 0.4716530442237854, |
| "learning_rate": 0.00015190870402387858, |
| "loss": 1.2083, |
| "num_input_tokens_seen": 3283353600, |
| "step": 50100, |
| "train_runtime": 24080.0017, |
| "train_tokens_per_second": 136351.884 |
| }, |
| { |
| "epoch": 0.502, |
| "grad_norm": 0.65655517578125, |
| "learning_rate": 0.00015143273486338857, |
| "loss": 1.2026, |
| "num_input_tokens_seen": 3289907200, |
| "step": 50200, |
| "train_runtime": 24132.759, |
| "train_tokens_per_second": 136325.366 |
| }, |
| { |
| "epoch": 0.503, |
| "grad_norm": 0.494205117225647, |
| "learning_rate": 0.00015095675127527438, |
| "loss": 1.208, |
| "num_input_tokens_seen": 3296460800, |
| "step": 50300, |
| "train_runtime": 24179.9126, |
| "train_tokens_per_second": 136330.551 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 0.485307902097702, |
| "learning_rate": 0.00015048075805268547, |
| "loss": 1.1965, |
| "num_input_tokens_seen": 3303014400, |
| "step": 50400, |
| "train_runtime": 24227.2685, |
| "train_tokens_per_second": 136334.577 |
| }, |
| { |
| "epoch": 0.505, |
| "grad_norm": 0.4843132793903351, |
| "learning_rate": 0.00015000475998886825, |
| "loss": 1.2028, |
| "num_input_tokens_seen": 3309568000, |
| "step": 50500, |
| "train_runtime": 24274.7092, |
| "train_tokens_per_second": 136338.111 |
| }, |
| { |
| "epoch": 0.506, |
| "grad_norm": 0.4654887020587921, |
| "learning_rate": 0.00014952876187711804, |
| "loss": 1.2151, |
| "num_input_tokens_seen": 3316121600, |
| "step": 50600, |
| "train_runtime": 24321.273, |
| "train_tokens_per_second": 136346.547 |
| }, |
| { |
| "epoch": 0.507, |
| "grad_norm": 0.4625457525253296, |
| "learning_rate": 0.00014905276851073053, |
| "loss": 1.209, |
| "num_input_tokens_seen": 3322675200, |
| "step": 50700, |
| "train_runtime": 24374.7609, |
| "train_tokens_per_second": 136316.217 |
| }, |
| { |
| "epoch": 0.508, |
| "grad_norm": 0.527594268321991, |
| "learning_rate": 0.00014857678468295352, |
| "loss": 1.2043, |
| "num_input_tokens_seen": 3329228800, |
| "step": 50800, |
| "train_runtime": 24422.59, |
| "train_tokens_per_second": 136317.598 |
| }, |
| { |
| "epoch": 0.509, |
| "grad_norm": 0.4604775011539459, |
| "learning_rate": 0.00014810081518693902, |
| "loss": 1.1895, |
| "num_input_tokens_seen": 3335782400, |
| "step": 50900, |
| "train_runtime": 24468.7673, |
| "train_tokens_per_second": 136328.176 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.4973219335079193, |
| "learning_rate": 0.0001476248648156945, |
| "loss": 1.1977, |
| "num_input_tokens_seen": 3342336000, |
| "step": 51000, |
| "train_runtime": 24516.9703, |
| "train_tokens_per_second": 136327.448 |
| }, |
| { |
| "epoch": 0.511, |
| "grad_norm": 0.42552006244659424, |
| "learning_rate": 0.00014714893836203485, |
| "loss": 1.2109, |
| "num_input_tokens_seen": 3348889600, |
| "step": 51100, |
| "train_runtime": 24564.5614, |
| "train_tokens_per_second": 136330.12 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.5027197003364563, |
| "learning_rate": 0.0001466730406185343, |
| "loss": 1.1949, |
| "num_input_tokens_seen": 3355443200, |
| "step": 51200, |
| "train_runtime": 24611.9784, |
| "train_tokens_per_second": 136333.745 |
| }, |
| { |
| "epoch": 0.513, |
| "grad_norm": 0.6097121238708496, |
| "learning_rate": 0.0001461971763774778, |
| "loss": 1.2, |
| "num_input_tokens_seen": 3361996800, |
| "step": 51300, |
| "train_runtime": 24665.0046, |
| "train_tokens_per_second": 136306.352 |
| }, |
| { |
| "epoch": 0.514, |
| "grad_norm": 0.9953346848487854, |
| "learning_rate": 0.0001457213504308129, |
| "loss": 1.1919, |
| "num_input_tokens_seen": 3368550400, |
| "step": 51400, |
| "train_runtime": 24711.3817, |
| "train_tokens_per_second": 136315.745 |
| }, |
| { |
| "epoch": 0.515, |
| "grad_norm": 0.5582478642463684, |
| "learning_rate": 0.00014524556757010177, |
| "loss": 1.1924, |
| "num_input_tokens_seen": 3375104000, |
| "step": 51500, |
| "train_runtime": 24758.0554, |
| "train_tokens_per_second": 136323.469 |
| }, |
| { |
| "epoch": 0.516, |
| "grad_norm": 0.5084798336029053, |
| "learning_rate": 0.00014476983258647234, |
| "loss": 1.2068, |
| "num_input_tokens_seen": 3381657600, |
| "step": 51600, |
| "train_runtime": 24807.6959, |
| "train_tokens_per_second": 136314.86 |
| }, |
| { |
| "epoch": 0.517, |
| "grad_norm": 0.6907379627227783, |
| "learning_rate": 0.0001442941502705707, |
| "loss": 1.1945, |
| "num_input_tokens_seen": 3388211200, |
| "step": 51700, |
| "train_runtime": 24855.3849, |
| "train_tokens_per_second": 136316.988 |
| }, |
| { |
| "epoch": 0.518, |
| "grad_norm": 0.6037150025367737, |
| "learning_rate": 0.0001438185254125125, |
| "loss": 1.2053, |
| "num_input_tokens_seen": 3394764800, |
| "step": 51800, |
| "train_runtime": 24901.8712, |
| "train_tokens_per_second": 136325.691 |
| }, |
| { |
| "epoch": 0.519, |
| "grad_norm": 0.6816796064376831, |
| "learning_rate": 0.00014334296280183473, |
| "loss": 1.2019, |
| "num_input_tokens_seen": 3401318400, |
| "step": 51900, |
| "train_runtime": 24955.4949, |
| "train_tokens_per_second": 136295.37 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.5201036930084229, |
| "learning_rate": 0.00014286746722744768, |
| "loss": 1.206, |
| "num_input_tokens_seen": 3407872000, |
| "step": 52000, |
| "train_runtime": 25002.9753, |
| "train_tokens_per_second": 136298.659 |
| }, |
| { |
| "epoch": 0.521, |
| "grad_norm": 0.5104642510414124, |
| "learning_rate": 0.00014239204347758647, |
| "loss": 1.2029, |
| "num_input_tokens_seen": 3414425600, |
| "step": 52100, |
| "train_runtime": 25051.9745, |
| "train_tokens_per_second": 136293.672 |
| }, |
| { |
| "epoch": 0.522, |
| "grad_norm": 0.4965505003929138, |
| "learning_rate": 0.00014191669633976294, |
| "loss": 1.1961, |
| "num_input_tokens_seen": 3420979200, |
| "step": 52200, |
| "train_runtime": 25099.3949, |
| "train_tokens_per_second": 136297.278 |
| }, |
| { |
| "epoch": 0.523, |
| "grad_norm": 0.5390327572822571, |
| "learning_rate": 0.00014144143060071756, |
| "loss": 1.194, |
| "num_input_tokens_seen": 3427532800, |
| "step": 52300, |
| "train_runtime": 25146.6291, |
| "train_tokens_per_second": 136301.879 |
| }, |
| { |
| "epoch": 0.524, |
| "grad_norm": 2.647089719772339, |
| "learning_rate": 0.000140966251046371, |
| "loss": 1.2006, |
| "num_input_tokens_seen": 3434086400, |
| "step": 52400, |
| "train_runtime": 25194.2742, |
| "train_tokens_per_second": 136304.24 |
| }, |
| { |
| "epoch": 0.525, |
| "grad_norm": 0.46030643582344055, |
| "learning_rate": 0.0001404911624617761, |
| "loss": 1.2071, |
| "num_input_tokens_seen": 3440640000, |
| "step": 52500, |
| "train_runtime": 25247.7567, |
| "train_tokens_per_second": 136275.077 |
| }, |
| { |
| "epoch": 0.526, |
| "grad_norm": 0.487699031829834, |
| "learning_rate": 0.00014001616963106966, |
| "loss": 1.2046, |
| "num_input_tokens_seen": 3447193600, |
| "step": 52600, |
| "train_runtime": 25295.5054, |
| "train_tokens_per_second": 136276.921 |
| }, |
| { |
| "epoch": 0.527, |
| "grad_norm": 0.4782906472682953, |
| "learning_rate": 0.00013954127733742416, |
| "loss": 1.1891, |
| "num_input_tokens_seen": 3453747200, |
| "step": 52700, |
| "train_runtime": 25344.1317, |
| "train_tokens_per_second": 136274.039 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.595632016658783, |
| "learning_rate": 0.0001390664903629998, |
| "loss": 1.1867, |
| "num_input_tokens_seen": 3460300800, |
| "step": 52800, |
| "train_runtime": 25391.6777, |
| "train_tokens_per_second": 136276.966 |
| }, |
| { |
| "epoch": 0.529, |
| "grad_norm": 0.5201537609100342, |
| "learning_rate": 0.0001385918134888961, |
| "loss": 1.1955, |
| "num_input_tokens_seen": 3466854400, |
| "step": 52900, |
| "train_runtime": 25439.3874, |
| "train_tokens_per_second": 136279.005 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.4726644456386566, |
| "learning_rate": 0.00013811725149510387, |
| "loss": 1.206, |
| "num_input_tokens_seen": 3473408000, |
| "step": 53000, |
| "train_runtime": 25492.0415, |
| "train_tokens_per_second": 136254.603 |
| }, |
| { |
| "epoch": 0.531, |
| "grad_norm": 0.5846008062362671, |
| "learning_rate": 0.0001376428091604572, |
| "loss": 1.2117, |
| "num_input_tokens_seen": 3479961600, |
| "step": 53100, |
| "train_runtime": 25540.3083, |
| "train_tokens_per_second": 136253.704 |
| }, |
| { |
| "epoch": 0.532, |
| "grad_norm": 0.4758647382259369, |
| "learning_rate": 0.00013716849126258512, |
| "loss": 1.2042, |
| "num_input_tokens_seen": 3486515200, |
| "step": 53200, |
| "train_runtime": 25589.0853, |
| "train_tokens_per_second": 136250.091 |
| }, |
| { |
| "epoch": 0.533, |
| "grad_norm": 0.4607105255126953, |
| "learning_rate": 0.00013669430257786354, |
| "loss": 1.1992, |
| "num_input_tokens_seen": 3493068800, |
| "step": 53300, |
| "train_runtime": 25636.4376, |
| "train_tokens_per_second": 136254.063 |
| }, |
| { |
| "epoch": 0.534, |
| "grad_norm": 0.6885077357292175, |
| "learning_rate": 0.00013622024788136728, |
| "loss": 1.2006, |
| "num_input_tokens_seen": 3499622400, |
| "step": 53400, |
| "train_runtime": 25684.4816, |
| "train_tokens_per_second": 136254.352 |
| }, |
| { |
| "epoch": 0.535, |
| "grad_norm": 0.6578366160392761, |
| "learning_rate": 0.00013574633194682185, |
| "loss": 1.1948, |
| "num_input_tokens_seen": 3506176000, |
| "step": 53500, |
| "train_runtime": 25730.7322, |
| "train_tokens_per_second": 136264.136 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 0.4718693196773529, |
| "learning_rate": 0.0001352725595465555, |
| "loss": 1.2, |
| "num_input_tokens_seen": 3512729600, |
| "step": 53600, |
| "train_runtime": 25783.9922, |
| "train_tokens_per_second": 136236.839 |
| }, |
| { |
| "epoch": 0.537, |
| "grad_norm": 0.5561531186103821, |
| "learning_rate": 0.000134798935451451, |
| "loss": 1.2052, |
| "num_input_tokens_seen": 3519283200, |
| "step": 53700, |
| "train_runtime": 25832.8858, |
| "train_tokens_per_second": 136232.677 |
| }, |
| { |
| "epoch": 0.538, |
| "grad_norm": 0.5250628590583801, |
| "learning_rate": 0.00013432546443089768, |
| "loss": 1.2, |
| "num_input_tokens_seen": 3525836800, |
| "step": 53800, |
| "train_runtime": 25880.0084, |
| "train_tokens_per_second": 136237.854 |
| }, |
| { |
| "epoch": 0.539, |
| "grad_norm": 0.5457636117935181, |
| "learning_rate": 0.0001338521512527436, |
| "loss": 1.1944, |
| "num_input_tokens_seen": 3532390400, |
| "step": 53900, |
| "train_runtime": 25927.8228, |
| "train_tokens_per_second": 136239.376 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.4437522292137146, |
| "learning_rate": 0.00013337900068324712, |
| "loss": 1.1912, |
| "num_input_tokens_seen": 3538944000, |
| "step": 54000, |
| "train_runtime": 25975.6777, |
| "train_tokens_per_second": 136240.68 |
| }, |
| { |
| "epoch": 0.541, |
| "grad_norm": 0.5343025326728821, |
| "learning_rate": 0.00013290601748702918, |
| "loss": 1.188, |
| "num_input_tokens_seen": 3545497600, |
| "step": 54100, |
| "train_runtime": 26027.6243, |
| "train_tokens_per_second": 136220.562 |
| }, |
| { |
| "epoch": 0.542, |
| "grad_norm": 0.4907335042953491, |
| "learning_rate": 0.00013243320642702543, |
| "loss": 1.1909, |
| "num_input_tokens_seen": 3552051200, |
| "step": 54200, |
| "train_runtime": 26075.5648, |
| "train_tokens_per_second": 136221.448 |
| }, |
| { |
| "epoch": 0.543, |
| "grad_norm": 0.7268043160438538, |
| "learning_rate": 0.0001319605722644379, |
| "loss": 1.1911, |
| "num_input_tokens_seen": 3558604800, |
| "step": 54300, |
| "train_runtime": 26122.2114, |
| "train_tokens_per_second": 136229.079 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 1.3769776821136475, |
| "learning_rate": 0.0001314881197586874, |
| "loss": 1.224, |
| "num_input_tokens_seen": 3565158400, |
| "step": 54400, |
| "train_runtime": 26170.2324, |
| "train_tokens_per_second": 136229.528 |
| }, |
| { |
| "epoch": 0.545, |
| "grad_norm": 0.7141419649124146, |
| "learning_rate": 0.0001310158536673654, |
| "loss": 1.2025, |
| "num_input_tokens_seen": 3571712000, |
| "step": 54500, |
| "train_runtime": 26217.6992, |
| "train_tokens_per_second": 136232.854 |
| }, |
| { |
| "epoch": 0.546, |
| "grad_norm": 0.5124280452728271, |
| "learning_rate": 0.0001305437787461862, |
| "loss": 1.1972, |
| "num_input_tokens_seen": 3578265600, |
| "step": 54600, |
| "train_runtime": 26264.9719, |
| "train_tokens_per_second": 136237.176 |
| }, |
| { |
| "epoch": 0.547, |
| "grad_norm": 0.5609524250030518, |
| "learning_rate": 0.00013007189974893903, |
| "loss": 1.1924, |
| "num_input_tokens_seen": 3584819200, |
| "step": 54700, |
| "train_runtime": 26319.2824, |
| "train_tokens_per_second": 136205.051 |
| }, |
| { |
| "epoch": 0.548, |
| "grad_norm": 0.5220986604690552, |
| "learning_rate": 0.00012960022142744016, |
| "loss": 1.188, |
| "num_input_tokens_seen": 3591372800, |
| "step": 54800, |
| "train_runtime": 26367.119, |
| "train_tokens_per_second": 136206.493 |
| }, |
| { |
| "epoch": 0.549, |
| "grad_norm": 0.5159165263175964, |
| "learning_rate": 0.00012912874853148506, |
| "loss": 1.1891, |
| "num_input_tokens_seen": 3597926400, |
| "step": 54900, |
| "train_runtime": 26415.2651, |
| "train_tokens_per_second": 136206.333 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5019519925117493, |
| "learning_rate": 0.00012865748580880053, |
| "loss": 1.1827, |
| "num_input_tokens_seen": 3604480000, |
| "step": 55000, |
| "train_runtime": 26462.5595, |
| "train_tokens_per_second": 136210.558 |
| }, |
| { |
| "epoch": 0.551, |
| "grad_norm": 0.5309172868728638, |
| "learning_rate": 0.0001281864380049969, |
| "loss": 1.1876, |
| "num_input_tokens_seen": 3611033600, |
| "step": 55100, |
| "train_runtime": 26514.9513, |
| "train_tokens_per_second": 136188.581 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 0.5431755781173706, |
| "learning_rate": 0.00012771560986352042, |
| "loss": 1.2038, |
| "num_input_tokens_seen": 3617587200, |
| "step": 55200, |
| "train_runtime": 26562.7975, |
| "train_tokens_per_second": 136189.993 |
| }, |
| { |
| "epoch": 0.553, |
| "grad_norm": 0.5063371658325195, |
| "learning_rate": 0.0001272450061256052, |
| "loss": 1.1837, |
| "num_input_tokens_seen": 3624140800, |
| "step": 55300, |
| "train_runtime": 26609.2594, |
| "train_tokens_per_second": 136198.484 |
| }, |
| { |
| "epoch": 0.554, |
| "grad_norm": 0.502314567565918, |
| "learning_rate": 0.00012677463153022565, |
| "loss": 1.1988, |
| "num_input_tokens_seen": 3630694400, |
| "step": 55400, |
| "train_runtime": 26655.8656, |
| "train_tokens_per_second": 136206.209 |
| }, |
| { |
| "epoch": 0.555, |
| "grad_norm": 0.5824739336967468, |
| "learning_rate": 0.0001263044908140488, |
| "loss": 1.1917, |
| "num_input_tokens_seen": 3637248000, |
| "step": 55500, |
| "train_runtime": 26707.6694, |
| "train_tokens_per_second": 136187.398 |
| }, |
| { |
| "epoch": 0.556, |
| "grad_norm": 0.5498598217964172, |
| "learning_rate": 0.00012583458871138632, |
| "loss": 1.1908, |
| "num_input_tokens_seen": 3643801600, |
| "step": 55600, |
| "train_runtime": 26755.8413, |
| "train_tokens_per_second": 136187.144 |
| }, |
| { |
| "epoch": 0.557, |
| "grad_norm": 0.5867239832878113, |
| "learning_rate": 0.00012536492995414723, |
| "loss": 1.193, |
| "num_input_tokens_seen": 3650355200, |
| "step": 55700, |
| "train_runtime": 26804.5182, |
| "train_tokens_per_second": 136184.324 |
| }, |
| { |
| "epoch": 0.558, |
| "grad_norm": 0.5584626197814941, |
| "learning_rate": 0.00012489551927179007, |
| "loss": 1.1833, |
| "num_input_tokens_seen": 3656908800, |
| "step": 55800, |
| "train_runtime": 26850.8981, |
| "train_tokens_per_second": 136193.165 |
| }, |
| { |
| "epoch": 0.559, |
| "grad_norm": 0.48578086495399475, |
| "learning_rate": 0.00012442636139127508, |
| "loss": 1.1919, |
| "num_input_tokens_seen": 3663462400, |
| "step": 55900, |
| "train_runtime": 26898.2376, |
| "train_tokens_per_second": 136197.116 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.5344805121421814, |
| "learning_rate": 0.00012395746103701695, |
| "loss": 1.1978, |
| "num_input_tokens_seen": 3670016000, |
| "step": 56000, |
| "train_runtime": 26951.1383, |
| "train_tokens_per_second": 136172.95 |
| }, |
| { |
| "epoch": 0.561, |
| "grad_norm": 0.5378079414367676, |
| "learning_rate": 0.00012348882293083708, |
| "loss": 1.192, |
| "num_input_tokens_seen": 3676569600, |
| "step": 56100, |
| "train_runtime": 26999.7429, |
| "train_tokens_per_second": 136170.541 |
| }, |
| { |
| "epoch": 0.562, |
| "grad_norm": 0.6195780038833618, |
| "learning_rate": 0.00012302045179191594, |
| "loss": 1.1919, |
| "num_input_tokens_seen": 3683123200, |
| "step": 56200, |
| "train_runtime": 27047.827, |
| "train_tokens_per_second": 136170.761 |
| }, |
| { |
| "epoch": 0.563, |
| "grad_norm": 0.5348559617996216, |
| "learning_rate": 0.00012255235233674572, |
| "loss": 1.1875, |
| "num_input_tokens_seen": 3689676800, |
| "step": 56300, |
| "train_runtime": 27094.1422, |
| "train_tokens_per_second": 136179.871 |
| }, |
| { |
| "epoch": 0.564, |
| "grad_norm": 0.48098888993263245, |
| "learning_rate": 0.00012208452927908278, |
| "loss": 1.1818, |
| "num_input_tokens_seen": 3696230400, |
| "step": 56400, |
| "train_runtime": 27141.6856, |
| "train_tokens_per_second": 136182.787 |
| }, |
| { |
| "epoch": 0.565, |
| "grad_norm": 0.585021436214447, |
| "learning_rate": 0.00012161698732990003, |
| "loss": 1.1887, |
| "num_input_tokens_seen": 3702784000, |
| "step": 56500, |
| "train_runtime": 27194.4825, |
| "train_tokens_per_second": 136159.385 |
| }, |
| { |
| "epoch": 0.566, |
| "grad_norm": 0.5269266963005066, |
| "learning_rate": 0.00012114973119733987, |
| "loss": 1.187, |
| "num_input_tokens_seen": 3709337600, |
| "step": 56600, |
| "train_runtime": 27242.6521, |
| "train_tokens_per_second": 136159.196 |
| }, |
| { |
| "epoch": 0.567, |
| "grad_norm": 0.5563040971755981, |
| "learning_rate": 0.00012068276558666616, |
| "loss": 1.1996, |
| "num_input_tokens_seen": 3715891200, |
| "step": 56700, |
| "train_runtime": 27290.3101, |
| "train_tokens_per_second": 136161.56 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.6131460666656494, |
| "learning_rate": 0.00012021609520021752, |
| "loss": 1.195, |
| "num_input_tokens_seen": 3722444800, |
| "step": 56800, |
| "train_runtime": 27337.7804, |
| "train_tokens_per_second": 136164.851 |
| }, |
| { |
| "epoch": 0.569, |
| "grad_norm": 0.5921023488044739, |
| "learning_rate": 0.00011974972473735957, |
| "loss": 1.2018, |
| "num_input_tokens_seen": 3728998400, |
| "step": 56900, |
| "train_runtime": 27384.9126, |
| "train_tokens_per_second": 136169.812 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.4582422375679016, |
| "learning_rate": 0.00011928365889443764, |
| "loss": 1.1914, |
| "num_input_tokens_seen": 3735552000, |
| "step": 57000, |
| "train_runtime": 27436.2125, |
| "train_tokens_per_second": 136154.07 |
| }, |
| { |
| "epoch": 0.571, |
| "grad_norm": 0.6521887183189392, |
| "learning_rate": 0.00011881790236472966, |
| "loss": 1.2041, |
| "num_input_tokens_seen": 3742105600, |
| "step": 57100, |
| "train_runtime": 27484.9505, |
| "train_tokens_per_second": 136151.076 |
| }, |
| { |
| "epoch": 0.572, |
| "grad_norm": 0.5971055030822754, |
| "learning_rate": 0.00011835245983839869, |
| "loss": 1.1992, |
| "num_input_tokens_seen": 3748659200, |
| "step": 57200, |
| "train_runtime": 27531.7756, |
| "train_tokens_per_second": 136157.553 |
| }, |
| { |
| "epoch": 0.573, |
| "grad_norm": 0.5187013745307922, |
| "learning_rate": 0.00011788733600244575, |
| "loss": 1.193, |
| "num_input_tokens_seen": 3755212800, |
| "step": 57300, |
| "train_runtime": 27579.3239, |
| "train_tokens_per_second": 136160.437 |
| }, |
| { |
| "epoch": 0.574, |
| "grad_norm": 0.5805628299713135, |
| "learning_rate": 0.00011742253554066278, |
| "loss": 1.1925, |
| "num_input_tokens_seen": 3761766400, |
| "step": 57400, |
| "train_runtime": 27633.4529, |
| "train_tokens_per_second": 136130.885 |
| }, |
| { |
| "epoch": 0.575, |
| "grad_norm": 0.5242844223976135, |
| "learning_rate": 0.00011695806313358523, |
| "loss": 1.1991, |
| "num_input_tokens_seen": 3768320000, |
| "step": 57500, |
| "train_runtime": 27681.3237, |
| "train_tokens_per_second": 136132.218 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.7652018666267395, |
| "learning_rate": 0.00011649392345844506, |
| "loss": 1.192, |
| "num_input_tokens_seen": 3774873600, |
| "step": 57600, |
| "train_runtime": 27728.8266, |
| "train_tokens_per_second": 136135.353 |
| }, |
| { |
| "epoch": 0.577, |
| "grad_norm": 0.5232011675834656, |
| "learning_rate": 0.00011603012118912372, |
| "loss": 1.2019, |
| "num_input_tokens_seen": 3781427200, |
| "step": 57700, |
| "train_runtime": 27778.1555, |
| "train_tokens_per_second": 136129.528 |
| }, |
| { |
| "epoch": 0.578, |
| "grad_norm": 0.5537053942680359, |
| "learning_rate": 0.00011556666099610485, |
| "loss": 1.1948, |
| "num_input_tokens_seen": 3787980800, |
| "step": 57800, |
| "train_runtime": 27824.9287, |
| "train_tokens_per_second": 136136.227 |
| }, |
| { |
| "epoch": 0.579, |
| "grad_norm": 0.6031852960586548, |
| "learning_rate": 0.00011510354754642745, |
| "loss": 1.1888, |
| "num_input_tokens_seen": 3794534400, |
| "step": 57900, |
| "train_runtime": 27872.2044, |
| "train_tokens_per_second": 136140.448 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.5748854875564575, |
| "learning_rate": 0.00011464078550363887, |
| "loss": 1.1921, |
| "num_input_tokens_seen": 3801088000, |
| "step": 58000, |
| "train_runtime": 27925.2055, |
| "train_tokens_per_second": 136116.742 |
| }, |
| { |
| "epoch": 0.581, |
| "grad_norm": 0.5586141347885132, |
| "learning_rate": 0.0001141783795277477, |
| "loss": 1.2024, |
| "num_input_tokens_seen": 3807641600, |
| "step": 58100, |
| "train_runtime": 27972.7534, |
| "train_tokens_per_second": 136119.657 |
| }, |
| { |
| "epoch": 0.582, |
| "grad_norm": 0.4893476366996765, |
| "learning_rate": 0.00011371633427517696, |
| "loss": 1.2034, |
| "num_input_tokens_seen": 3814195200, |
| "step": 58200, |
| "train_runtime": 28020.2529, |
| "train_tokens_per_second": 136122.797 |
| }, |
| { |
| "epoch": 0.583, |
| "grad_norm": 0.5007518529891968, |
| "learning_rate": 0.00011325465439871731, |
| "loss": 1.1885, |
| "num_input_tokens_seen": 3820748800, |
| "step": 58300, |
| "train_runtime": 28067.154, |
| "train_tokens_per_second": 136128.829 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.5260310769081116, |
| "learning_rate": 0.00011279334454747989, |
| "loss": 1.1931, |
| "num_input_tokens_seen": 3827302400, |
| "step": 58400, |
| "train_runtime": 28120.6157, |
| "train_tokens_per_second": 136103.08 |
| }, |
| { |
| "epoch": 0.585, |
| "grad_norm": 0.5364392399787903, |
| "learning_rate": 0.00011233240936684981, |
| "loss": 1.1928, |
| "num_input_tokens_seen": 3833856000, |
| "step": 58500, |
| "train_runtime": 28168.5149, |
| "train_tokens_per_second": 136104.3 |
| }, |
| { |
| "epoch": 0.586, |
| "grad_norm": 0.49333399534225464, |
| "learning_rate": 0.00011187185349843916, |
| "loss": 1.1935, |
| "num_input_tokens_seen": 3840409600, |
| "step": 58600, |
| "train_runtime": 28215.0596, |
| "train_tokens_per_second": 136112.05 |
| }, |
| { |
| "epoch": 0.587, |
| "grad_norm": 0.5711957216262817, |
| "learning_rate": 0.00011141168158004053, |
| "loss": 1.1812, |
| "num_input_tokens_seen": 3846963200, |
| "step": 58700, |
| "train_runtime": 28264.2863, |
| "train_tokens_per_second": 136106.858 |
| }, |
| { |
| "epoch": 0.588, |
| "grad_norm": 1.0157184600830078, |
| "learning_rate": 0.00011095189824557998, |
| "loss": 1.1929, |
| "num_input_tokens_seen": 3853516800, |
| "step": 58800, |
| "train_runtime": 28311.6057, |
| "train_tokens_per_second": 136110.853 |
| }, |
| { |
| "epoch": 0.589, |
| "grad_norm": 0.552700936794281, |
| "learning_rate": 0.00011049250812507054, |
| "loss": 1.1909, |
| "num_input_tokens_seen": 3860070400, |
| "step": 58900, |
| "train_runtime": 28359.0956, |
| "train_tokens_per_second": 136114.016 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.46860748529434204, |
| "learning_rate": 0.00011003351584456571, |
| "loss": 1.1972, |
| "num_input_tokens_seen": 3866624000, |
| "step": 59000, |
| "train_runtime": 28412.8978, |
| "train_tokens_per_second": 136086.929 |
| }, |
| { |
| "epoch": 0.591, |
| "grad_norm": 0.5399055480957031, |
| "learning_rate": 0.0001095749260261126, |
| "loss": 1.1895, |
| "num_input_tokens_seen": 3873177600, |
| "step": 59100, |
| "train_runtime": 28462.0603, |
| "train_tokens_per_second": 136082.123 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.49921005964279175, |
| "learning_rate": 0.00010911674328770559, |
| "loss": 1.1968, |
| "num_input_tokens_seen": 3879731200, |
| "step": 59200, |
| "train_runtime": 28510.9551, |
| "train_tokens_per_second": 136078.612 |
| }, |
| { |
| "epoch": 0.593, |
| "grad_norm": 0.5357686877250671, |
| "learning_rate": 0.00010865897224323979, |
| "loss": 1.1889, |
| "num_input_tokens_seen": 3886284800, |
| "step": 59300, |
| "train_runtime": 28558.3344, |
| "train_tokens_per_second": 136082.334 |
| }, |
| { |
| "epoch": 0.594, |
| "grad_norm": 0.5710283517837524, |
| "learning_rate": 0.00010820161750246453, |
| "loss": 1.1864, |
| "num_input_tokens_seen": 3892838400, |
| "step": 59400, |
| "train_runtime": 28606.454, |
| "train_tokens_per_second": 136082.522 |
| }, |
| { |
| "epoch": 0.595, |
| "grad_norm": 0.6333475112915039, |
| "learning_rate": 0.00010774468367093696, |
| "loss": 1.2009, |
| "num_input_tokens_seen": 3899392000, |
| "step": 59500, |
| "train_runtime": 28653.986, |
| "train_tokens_per_second": 136085.5 |
| }, |
| { |
| "epoch": 0.596, |
| "grad_norm": 0.5585243701934814, |
| "learning_rate": 0.00010728817534997573, |
| "loss": 1.1877, |
| "num_input_tokens_seen": 3905945600, |
| "step": 59600, |
| "train_runtime": 28701.832, |
| "train_tokens_per_second": 136086.979 |
| }, |
| { |
| "epoch": 0.597, |
| "grad_norm": 0.5805736184120178, |
| "learning_rate": 0.00010683209713661453, |
| "loss": 1.211, |
| "num_input_tokens_seen": 3912499200, |
| "step": 59700, |
| "train_runtime": 28751.7229, |
| "train_tokens_per_second": 136078.774 |
| }, |
| { |
| "epoch": 0.598, |
| "grad_norm": 0.5607670545578003, |
| "learning_rate": 0.00010637645362355589, |
| "loss": 1.196, |
| "num_input_tokens_seen": 3919052800, |
| "step": 59800, |
| "train_runtime": 28798.1873, |
| "train_tokens_per_second": 136086.788 |
| }, |
| { |
| "epoch": 0.599, |
| "grad_norm": 0.4962175488471985, |
| "learning_rate": 0.00010592124939912497, |
| "loss": 1.1889, |
| "num_input_tokens_seen": 3925606400, |
| "step": 59900, |
| "train_runtime": 28852.3337, |
| "train_tokens_per_second": 136058.54 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.6488810777664185, |
| "learning_rate": 0.00010546648904722326, |
| "loss": 1.1968, |
| "num_input_tokens_seen": 3932160000, |
| "step": 60000, |
| "train_runtime": 28898.713, |
| "train_tokens_per_second": 136066.959 |
| }, |
| { |
| "epoch": 0.601, |
| "grad_norm": 0.9370976686477661, |
| "learning_rate": 0.0001050121771472824, |
| "loss": 1.183, |
| "num_input_tokens_seen": 3938713600, |
| "step": 60100, |
| "train_runtime": 28946.5523, |
| "train_tokens_per_second": 136068.488 |
| }, |
| { |
| "epoch": 0.602, |
| "grad_norm": 0.5040610432624817, |
| "learning_rate": 0.0001045583182742182, |
| "loss": 1.2023, |
| "num_input_tokens_seen": 3945267200, |
| "step": 60200, |
| "train_runtime": 28994.2594, |
| "train_tokens_per_second": 136070.632 |
| }, |
| { |
| "epoch": 0.603, |
| "grad_norm": 0.5120612382888794, |
| "learning_rate": 0.00010410491699838448, |
| "loss": 1.1865, |
| "num_input_tokens_seen": 3951820800, |
| "step": 60300, |
| "train_runtime": 29042.095, |
| "train_tokens_per_second": 136072.167 |
| }, |
| { |
| "epoch": 0.604, |
| "grad_norm": 0.8983064889907837, |
| "learning_rate": 0.00010365197788552707, |
| "loss": 1.1734, |
| "num_input_tokens_seen": 3958374400, |
| "step": 60400, |
| "train_runtime": 29090.1772, |
| "train_tokens_per_second": 136072.543 |
| }, |
| { |
| "epoch": 0.605, |
| "grad_norm": 0.5155735015869141, |
| "learning_rate": 0.00010319950549673778, |
| "loss": 1.1923, |
| "num_input_tokens_seen": 3964928000, |
| "step": 60500, |
| "train_runtime": 29143.642, |
| "train_tokens_per_second": 136047.787 |
| }, |
| { |
| "epoch": 0.606, |
| "grad_norm": 1.5562913417816162, |
| "learning_rate": 0.00010274750438840855, |
| "loss": 1.1877, |
| "num_input_tokens_seen": 3971481600, |
| "step": 60600, |
| "train_runtime": 29191.8256, |
| "train_tokens_per_second": 136047.73 |
| }, |
| { |
| "epoch": 0.607, |
| "grad_norm": 0.5603190064430237, |
| "learning_rate": 0.00010229597911218554, |
| "loss": 1.1862, |
| "num_input_tokens_seen": 3978035200, |
| "step": 60700, |
| "train_runtime": 29240.4534, |
| "train_tokens_per_second": 136045.606 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.550956130027771, |
| "learning_rate": 0.00010184493421492324, |
| "loss": 1.1869, |
| "num_input_tokens_seen": 3984588800, |
| "step": 60800, |
| "train_runtime": 29287.1822, |
| "train_tokens_per_second": 136052.31 |
| }, |
| { |
| "epoch": 0.609, |
| "grad_norm": 0.5152813196182251, |
| "learning_rate": 0.0001013943742386388, |
| "loss": 1.1902, |
| "num_input_tokens_seen": 3991142400, |
| "step": 60900, |
| "train_runtime": 29335.0152, |
| "train_tokens_per_second": 136053.872 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.5258508324623108, |
| "learning_rate": 0.00010094430372046616, |
| "loss": 1.1843, |
| "num_input_tokens_seen": 3997696000, |
| "step": 61000, |
| "train_runtime": 29387.778, |
| "train_tokens_per_second": 136032.605 |
| }, |
| { |
| "epoch": 0.611, |
| "grad_norm": 0.5804030895233154, |
| "learning_rate": 0.0001004947271926104, |
| "loss": 1.1872, |
| "num_input_tokens_seen": 4004249600, |
| "step": 61100, |
| "train_runtime": 29435.5024, |
| "train_tokens_per_second": 136034.695 |
| }, |
| { |
| "epoch": 0.612, |
| "grad_norm": 0.5679774284362793, |
| "learning_rate": 0.00010004564918230222, |
| "loss": 1.1933, |
| "num_input_tokens_seen": 4010803200, |
| "step": 61200, |
| "train_runtime": 29483.504, |
| "train_tokens_per_second": 136035.5 |
| }, |
| { |
| "epoch": 0.613, |
| "grad_norm": 0.611191987991333, |
| "learning_rate": 9.959707421175217e-05, |
| "loss": 1.1926, |
| "num_input_tokens_seen": 4017356800, |
| "step": 61300, |
| "train_runtime": 29529.7223, |
| "train_tokens_per_second": 136044.517 |
| }, |
| { |
| "epoch": 0.614, |
| "grad_norm": 0.5725626945495605, |
| "learning_rate": 9.914900679810522e-05, |
| "loss": 1.1812, |
| "num_input_tokens_seen": 4023910400, |
| "step": 61400, |
| "train_runtime": 29577.4052, |
| "train_tokens_per_second": 136046.769 |
| }, |
| { |
| "epoch": 0.615, |
| "grad_norm": 0.6058773398399353, |
| "learning_rate": 9.870145145339529e-05, |
| "loss": 1.1904, |
| "num_input_tokens_seen": 4030464000, |
| "step": 61500, |
| "train_runtime": 29630.1636, |
| "train_tokens_per_second": 136025.708 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.5151665806770325, |
| "learning_rate": 9.825441268449969e-05, |
| "loss": 1.1783, |
| "num_input_tokens_seen": 4037017600, |
| "step": 61600, |
| "train_runtime": 29677.4813, |
| "train_tokens_per_second": 136029.657 |
| }, |
| { |
| "epoch": 0.617, |
| "grad_norm": 0.5461622476577759, |
| "learning_rate": 9.780789499309391e-05, |
| "loss": 1.1825, |
| "num_input_tokens_seen": 4043571200, |
| "step": 61700, |
| "train_runtime": 29725.432, |
| "train_tokens_per_second": 136030.696 |
| }, |
| { |
| "epoch": 0.618, |
| "grad_norm": 0.8243169784545898, |
| "learning_rate": 9.736190287560608e-05, |
| "loss": 1.1933, |
| "num_input_tokens_seen": 4050124800, |
| "step": 61800, |
| "train_runtime": 29772.1739, |
| "train_tokens_per_second": 136037.255 |
| }, |
| { |
| "epoch": 0.619, |
| "grad_norm": 0.4877258539199829, |
| "learning_rate": 9.691644082317186e-05, |
| "loss": 1.1881, |
| "num_input_tokens_seen": 4056678400, |
| "step": 61900, |
| "train_runtime": 29825.721, |
| "train_tokens_per_second": 136012.752 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 0.5376379489898682, |
| "learning_rate": 9.647151332158926e-05, |
| "loss": 1.1812, |
| "num_input_tokens_seen": 4063232000, |
| "step": 62000, |
| "train_runtime": 29872.1612, |
| "train_tokens_per_second": 136020.691 |
| }, |
| { |
| "epoch": 0.621, |
| "grad_norm": 0.5128985643386841, |
| "learning_rate": 9.60271248512732e-05, |
| "loss": 1.1719, |
| "num_input_tokens_seen": 4069785600, |
| "step": 62100, |
| "train_runtime": 29919.8698, |
| "train_tokens_per_second": 136022.838 |
| }, |
| { |
| "epoch": 0.622, |
| "grad_norm": 0.6911051273345947, |
| "learning_rate": 9.558327988721068e-05, |
| "loss": 1.199, |
| "num_input_tokens_seen": 4076339200, |
| "step": 62200, |
| "train_runtime": 29967.7263, |
| "train_tokens_per_second": 136024.307 |
| }, |
| { |
| "epoch": 0.623, |
| "grad_norm": 0.5334423184394836, |
| "learning_rate": 9.513998289891559e-05, |
| "loss": 1.1922, |
| "num_input_tokens_seen": 4082892800, |
| "step": 62300, |
| "train_runtime": 30014.7483, |
| "train_tokens_per_second": 136029.553 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.47934290766716003, |
| "learning_rate": 9.469723835038361e-05, |
| "loss": 1.1864, |
| "num_input_tokens_seen": 4089446400, |
| "step": 62400, |
| "train_runtime": 30062.3944, |
| "train_tokens_per_second": 136031.959 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.6690011620521545, |
| "learning_rate": 9.42550507000475e-05, |
| "loss": 1.1887, |
| "num_input_tokens_seen": 4096000000, |
| "step": 62500, |
| "train_runtime": 30115.1503, |
| "train_tokens_per_second": 136011.275 |
| }, |
| { |
| "epoch": 0.626, |
| "grad_norm": 0.5379562973976135, |
| "learning_rate": 9.381342440073194e-05, |
| "loss": 1.1873, |
| "num_input_tokens_seen": 4102553600, |
| "step": 62600, |
| "train_runtime": 30162.8214, |
| "train_tokens_per_second": 136013.589 |
| }, |
| { |
| "epoch": 0.627, |
| "grad_norm": 0.5619449615478516, |
| "learning_rate": 9.337236389960886e-05, |
| "loss": 1.184, |
| "num_input_tokens_seen": 4109107200, |
| "step": 62700, |
| "train_runtime": 30211.3171, |
| "train_tokens_per_second": 136012.183 |
| }, |
| { |
| "epoch": 0.628, |
| "grad_norm": 0.9017994999885559, |
| "learning_rate": 9.293187363815265e-05, |
| "loss": 1.1869, |
| "num_input_tokens_seen": 4115660800, |
| "step": 62800, |
| "train_runtime": 30263.5761, |
| "train_tokens_per_second": 135993.869 |
| }, |
| { |
| "epoch": 0.629, |
| "grad_norm": 0.6502019762992859, |
| "learning_rate": 9.249195805209533e-05, |
| "loss": 1.1944, |
| "num_input_tokens_seen": 4122214400, |
| "step": 62900, |
| "train_runtime": 30310.6247, |
| "train_tokens_per_second": 135998.992 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 0.5749123096466064, |
| "learning_rate": 9.205262157138192e-05, |
| "loss": 1.1896, |
| "num_input_tokens_seen": 4128768000, |
| "step": 63000, |
| "train_runtime": 30359.0787, |
| "train_tokens_per_second": 135997.803 |
| }, |
| { |
| "epoch": 0.631, |
| "grad_norm": 0.4843611419200897, |
| "learning_rate": 9.161386862012601e-05, |
| "loss": 1.1932, |
| "num_input_tokens_seen": 4135321600, |
| "step": 63100, |
| "train_runtime": 30406.8492, |
| "train_tokens_per_second": 135999.675 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.634504497051239, |
| "learning_rate": 9.11757036165649e-05, |
| "loss": 1.181, |
| "num_input_tokens_seen": 4141875200, |
| "step": 63200, |
| "train_runtime": 30453.794, |
| "train_tokens_per_second": 136005.228 |
| }, |
| { |
| "epoch": 0.633, |
| "grad_norm": 0.605948269367218, |
| "learning_rate": 9.073813097301521e-05, |
| "loss": 1.1742, |
| "num_input_tokens_seen": 4148428800, |
| "step": 63300, |
| "train_runtime": 30506.719, |
| "train_tokens_per_second": 135984.102 |
| }, |
| { |
| "epoch": 0.634, |
| "grad_norm": 0.5731847882270813, |
| "learning_rate": 9.030115509582883e-05, |
| "loss": 1.1809, |
| "num_input_tokens_seen": 4154982400, |
| "step": 63400, |
| "train_runtime": 30554.7018, |
| "train_tokens_per_second": 135985.042 |
| }, |
| { |
| "epoch": 0.635, |
| "grad_norm": 0.9707246422767639, |
| "learning_rate": 8.986478038534775e-05, |
| "loss": 1.1981, |
| "num_input_tokens_seen": 4161536000, |
| "step": 63500, |
| "train_runtime": 30602.1945, |
| "train_tokens_per_second": 135988.156 |
| }, |
| { |
| "epoch": 0.636, |
| "grad_norm": 0.7120965719223022, |
| "learning_rate": 8.942901123586059e-05, |
| "loss": 1.1816, |
| "num_input_tokens_seen": 4168089600, |
| "step": 63600, |
| "train_runtime": 30649.6499, |
| "train_tokens_per_second": 135991.426 |
| }, |
| { |
| "epoch": 0.637, |
| "grad_norm": 0.5136720538139343, |
| "learning_rate": 8.899385203555781e-05, |
| "loss": 1.177, |
| "num_input_tokens_seen": 4174643200, |
| "step": 63700, |
| "train_runtime": 30696.6221, |
| "train_tokens_per_second": 135996.827 |
| }, |
| { |
| "epoch": 0.638, |
| "grad_norm": 0.5284336805343628, |
| "learning_rate": 8.855930716648774e-05, |
| "loss": 1.184, |
| "num_input_tokens_seen": 4181196800, |
| "step": 63800, |
| "train_runtime": 30745.5123, |
| "train_tokens_per_second": 135993.727 |
| }, |
| { |
| "epoch": 0.639, |
| "grad_norm": 0.5269259810447693, |
| "learning_rate": 8.812538100451239e-05, |
| "loss": 1.2174, |
| "num_input_tokens_seen": 4187750400, |
| "step": 63900, |
| "train_runtime": 30792.1632, |
| "train_tokens_per_second": 136000.526 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.5354572534561157, |
| "learning_rate": 8.769207791926338e-05, |
| "loss": 1.1771, |
| "num_input_tokens_seen": 4194304000, |
| "step": 64000, |
| "train_runtime": 30846.5823, |
| "train_tokens_per_second": 135973.054 |
| }, |
| { |
| "epoch": 0.641, |
| "grad_norm": 0.7058772444725037, |
| "learning_rate": 8.725940227409797e-05, |
| "loss": 1.179, |
| "num_input_tokens_seen": 4200857600, |
| "step": 64100, |
| "train_runtime": 30893.4429, |
| "train_tokens_per_second": 135978.94 |
| }, |
| { |
| "epoch": 0.642, |
| "grad_norm": 0.5777366161346436, |
| "learning_rate": 8.682735842605509e-05, |
| "loss": 1.182, |
| "num_input_tokens_seen": 4207411200, |
| "step": 64200, |
| "train_runtime": 30940.3826, |
| "train_tokens_per_second": 135984.459 |
| }, |
| { |
| "epoch": 0.643, |
| "grad_norm": 0.5608710646629333, |
| "learning_rate": 8.639595072581158e-05, |
| "loss": 1.1904, |
| "num_input_tokens_seen": 4213964800, |
| "step": 64300, |
| "train_runtime": 30988.4894, |
| "train_tokens_per_second": 135984.841 |
| }, |
| { |
| "epoch": 0.644, |
| "grad_norm": 0.6048064231872559, |
| "learning_rate": 8.596518351763806e-05, |
| "loss": 1.1851, |
| "num_input_tokens_seen": 4220518400, |
| "step": 64400, |
| "train_runtime": 31041.3711, |
| "train_tokens_per_second": 135964.304 |
| }, |
| { |
| "epoch": 0.645, |
| "grad_norm": 0.47835734486579895, |
| "learning_rate": 8.553506113935561e-05, |
| "loss": 1.1803, |
| "num_input_tokens_seen": 4227072000, |
| "step": 64500, |
| "train_runtime": 31089.9624, |
| "train_tokens_per_second": 135962.596 |
| }, |
| { |
| "epoch": 0.646, |
| "grad_norm": 1.1150704622268677, |
| "learning_rate": 8.510558792229183e-05, |
| "loss": 1.1878, |
| "num_input_tokens_seen": 4233625600, |
| "step": 64600, |
| "train_runtime": 31137.4325, |
| "train_tokens_per_second": 135965.79 |
| }, |
| { |
| "epoch": 0.647, |
| "grad_norm": 0.6650880575180054, |
| "learning_rate": 8.467676819123716e-05, |
| "loss": 1.1951, |
| "num_input_tokens_seen": 4240179200, |
| "step": 64700, |
| "train_runtime": 31185.0957, |
| "train_tokens_per_second": 135968.132 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.7750310897827148, |
| "learning_rate": 8.424860626440158e-05, |
| "loss": 1.1829, |
| "num_input_tokens_seen": 4246732800, |
| "step": 64800, |
| "train_runtime": 31237.5852, |
| "train_tokens_per_second": 135949.459 |
| }, |
| { |
| "epoch": 0.649, |
| "grad_norm": 0.595783531665802, |
| "learning_rate": 8.382110645337102e-05, |
| "loss": 1.1856, |
| "num_input_tokens_seen": 4253286400, |
| "step": 64900, |
| "train_runtime": 31285.0064, |
| "train_tokens_per_second": 135952.87 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.6093938946723938, |
| "learning_rate": 8.339427306306365e-05, |
| "loss": 1.1842, |
| "num_input_tokens_seen": 4259840000, |
| "step": 65000, |
| "train_runtime": 31332.1176, |
| "train_tokens_per_second": 135957.615 |
| }, |
| { |
| "epoch": 0.651, |
| "grad_norm": 0.6823499798774719, |
| "learning_rate": 8.296811039168716e-05, |
| "loss": 1.1818, |
| "num_input_tokens_seen": 4266393600, |
| "step": 65100, |
| "train_runtime": 31381.0925, |
| "train_tokens_per_second": 135954.273 |
| }, |
| { |
| "epoch": 0.652, |
| "grad_norm": 0.5052744746208191, |
| "learning_rate": 8.254262273069477e-05, |
| "loss": 1.2034, |
| "num_input_tokens_seen": 4272947200, |
| "step": 65200, |
| "train_runtime": 31428.8012, |
| "train_tokens_per_second": 135956.417 |
| }, |
| { |
| "epoch": 0.653, |
| "grad_norm": 0.5003641247749329, |
| "learning_rate": 8.211781436474263e-05, |
| "loss": 1.177, |
| "num_input_tokens_seen": 4279500800, |
| "step": 65300, |
| "train_runtime": 31476.0702, |
| "train_tokens_per_second": 135960.454 |
| }, |
| { |
| "epoch": 0.654, |
| "grad_norm": 0.5675527453422546, |
| "learning_rate": 8.169368957164613e-05, |
| "loss": 1.1707, |
| "num_input_tokens_seen": 4286054400, |
| "step": 65400, |
| "train_runtime": 31524.8831, |
| "train_tokens_per_second": 135957.82 |
| }, |
| { |
| "epoch": 0.655, |
| "grad_norm": 0.5109818577766418, |
| "learning_rate": 8.127025262233731e-05, |
| "loss": 1.187, |
| "num_input_tokens_seen": 4292608000, |
| "step": 65500, |
| "train_runtime": 31578.0721, |
| "train_tokens_per_second": 135936.354 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.6228885054588318, |
| "learning_rate": 8.084750778082159e-05, |
| "loss": 1.1944, |
| "num_input_tokens_seen": 4299161600, |
| "step": 65600, |
| "train_runtime": 31626.6624, |
| "train_tokens_per_second": 135934.723 |
| }, |
| { |
| "epoch": 0.657, |
| "grad_norm": 0.6139951348304749, |
| "learning_rate": 8.042545930413473e-05, |
| "loss": 1.1788, |
| "num_input_tokens_seen": 4305715200, |
| "step": 65700, |
| "train_runtime": 31673.2442, |
| "train_tokens_per_second": 135941.717 |
| }, |
| { |
| "epoch": 0.658, |
| "grad_norm": 0.6792371273040771, |
| "learning_rate": 8.000411144230025e-05, |
| "loss": 1.2019, |
| "num_input_tokens_seen": 4312268800, |
| "step": 65800, |
| "train_runtime": 31721.455, |
| "train_tokens_per_second": 135941.709 |
| }, |
| { |
| "epoch": 0.659, |
| "grad_norm": 0.546470582485199, |
| "learning_rate": 7.95834684382865e-05, |
| "loss": 1.1905, |
| "num_input_tokens_seen": 4318822400, |
| "step": 65900, |
| "train_runtime": 31770.1998, |
| "train_tokens_per_second": 135939.416 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.5273057818412781, |
| "learning_rate": 7.916353452796378e-05, |
| "loss": 1.1769, |
| "num_input_tokens_seen": 4325376000, |
| "step": 66000, |
| "train_runtime": 31818.123, |
| "train_tokens_per_second": 135940.64 |
| }, |
| { |
| "epoch": 0.661, |
| "grad_norm": 0.5213398933410645, |
| "learning_rate": 7.874431394006188e-05, |
| "loss": 1.1834, |
| "num_input_tokens_seen": 4331929600, |
| "step": 66100, |
| "train_runtime": 31870.8187, |
| "train_tokens_per_second": 135921.504 |
| }, |
| { |
| "epoch": 0.662, |
| "grad_norm": 0.5762707591056824, |
| "learning_rate": 7.832581089612762e-05, |
| "loss": 1.1875, |
| "num_input_tokens_seen": 4338483200, |
| "step": 66200, |
| "train_runtime": 31918.6258, |
| "train_tokens_per_second": 135923.245 |
| }, |
| { |
| "epoch": 0.663, |
| "grad_norm": 0.6153529286384583, |
| "learning_rate": 7.790802961048183e-05, |
| "loss": 1.1895, |
| "num_input_tokens_seen": 4345036800, |
| "step": 66300, |
| "train_runtime": 31967.5441, |
| "train_tokens_per_second": 135920.257 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 0.6668293476104736, |
| "learning_rate": 7.749097429017749e-05, |
| "loss": 1.1835, |
| "num_input_tokens_seen": 4351590400, |
| "step": 66400, |
| "train_runtime": 32014.502, |
| "train_tokens_per_second": 135925.6 |
| }, |
| { |
| "epoch": 0.665, |
| "grad_norm": 0.49117180705070496, |
| "learning_rate": 7.70746491349571e-05, |
| "loss": 1.1762, |
| "num_input_tokens_seen": 4358144000, |
| "step": 66500, |
| "train_runtime": 32062.234, |
| "train_tokens_per_second": 135927.646 |
| }, |
| { |
| "epoch": 0.666, |
| "grad_norm": 0.5580335259437561, |
| "learning_rate": 7.665905833721025e-05, |
| "loss": 1.1751, |
| "num_input_tokens_seen": 4364697600, |
| "step": 66600, |
| "train_runtime": 32116.4057, |
| "train_tokens_per_second": 135902.431 |
| }, |
| { |
| "epoch": 0.667, |
| "grad_norm": 0.4941908121109009, |
| "learning_rate": 7.624420608193171e-05, |
| "loss": 1.1991, |
| "num_input_tokens_seen": 4371251200, |
| "step": 66700, |
| "train_runtime": 32164.7962, |
| "train_tokens_per_second": 135901.722 |
| }, |
| { |
| "epoch": 0.668, |
| "grad_norm": 0.5203377604484558, |
| "learning_rate": 7.583009654667912e-05, |
| "loss": 1.1892, |
| "num_input_tokens_seen": 4377804800, |
| "step": 66800, |
| "train_runtime": 32211.7614, |
| "train_tokens_per_second": 135907.029 |
| }, |
| { |
| "epoch": 0.669, |
| "grad_norm": 0.5924380421638489, |
| "learning_rate": 7.541673390153087e-05, |
| "loss": 1.1749, |
| "num_input_tokens_seen": 4384358400, |
| "step": 66900, |
| "train_runtime": 32259.5523, |
| "train_tokens_per_second": 135908.842 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 0.5180861353874207, |
| "learning_rate": 7.500412230904416e-05, |
| "loss": 1.1833, |
| "num_input_tokens_seen": 4390912000, |
| "step": 67000, |
| "train_runtime": 32305.7062, |
| "train_tokens_per_second": 135917.536 |
| }, |
| { |
| "epoch": 0.671, |
| "grad_norm": 0.5575404167175293, |
| "learning_rate": 7.459226592421318e-05, |
| "loss": 1.1908, |
| "num_input_tokens_seen": 4397465600, |
| "step": 67100, |
| "train_runtime": 32353.5616, |
| "train_tokens_per_second": 135919.058 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.519868016242981, |
| "learning_rate": 7.418116889442721e-05, |
| "loss": 1.191, |
| "num_input_tokens_seen": 4404019200, |
| "step": 67200, |
| "train_runtime": 32407.2129, |
| "train_tokens_per_second": 135896.265 |
| }, |
| { |
| "epoch": 0.673, |
| "grad_norm": 0.5036019086837769, |
| "learning_rate": 7.377083535942868e-05, |
| "loss": 1.1771, |
| "num_input_tokens_seen": 4410572800, |
| "step": 67300, |
| "train_runtime": 32454.4825, |
| "train_tokens_per_second": 135900.266 |
| }, |
| { |
| "epoch": 0.674, |
| "grad_norm": 0.5349675416946411, |
| "learning_rate": 7.336126945127178e-05, |
| "loss": 1.1834, |
| "num_input_tokens_seen": 4417126400, |
| "step": 67400, |
| "train_runtime": 32501.8427, |
| "train_tokens_per_second": 135903.876 |
| }, |
| { |
| "epoch": 0.675, |
| "grad_norm": 0.675538957118988, |
| "learning_rate": 7.29524752942807e-05, |
| "loss": 1.1852, |
| "num_input_tokens_seen": 4423680000, |
| "step": 67500, |
| "train_runtime": 32550.3797, |
| "train_tokens_per_second": 135902.562 |
| }, |
| { |
| "epoch": 0.676, |
| "grad_norm": 0.5116747617721558, |
| "learning_rate": 7.254445700500798e-05, |
| "loss": 1.1816, |
| "num_input_tokens_seen": 4430233600, |
| "step": 67600, |
| "train_runtime": 32598.0387, |
| "train_tokens_per_second": 135904.913 |
| }, |
| { |
| "epoch": 0.677, |
| "grad_norm": 0.5892815589904785, |
| "learning_rate": 7.213721869219329e-05, |
| "loss": 1.1827, |
| "num_input_tokens_seen": 4436787200, |
| "step": 67700, |
| "train_runtime": 32650.3715, |
| "train_tokens_per_second": 135887.802 |
| }, |
| { |
| "epoch": 0.678, |
| "grad_norm": 0.6862092614173889, |
| "learning_rate": 7.173076445672198e-05, |
| "loss": 1.1801, |
| "num_input_tokens_seen": 4443340800, |
| "step": 67800, |
| "train_runtime": 32698.6817, |
| "train_tokens_per_second": 135887.46 |
| }, |
| { |
| "epoch": 0.679, |
| "grad_norm": 0.8308249115943909, |
| "learning_rate": 7.132509839158359e-05, |
| "loss": 1.1887, |
| "num_input_tokens_seen": 4449894400, |
| "step": 67900, |
| "train_runtime": 32745.9782, |
| "train_tokens_per_second": 135891.326 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.5063105225563049, |
| "learning_rate": 7.092022458183096e-05, |
| "loss": 1.1949, |
| "num_input_tokens_seen": 4456448000, |
| "step": 68000, |
| "train_runtime": 32794.3077, |
| "train_tokens_per_second": 135890.9 |
| }, |
| { |
| "epoch": 0.681, |
| "grad_norm": 0.6090216040611267, |
| "learning_rate": 7.051614710453888e-05, |
| "loss": 1.1827, |
| "num_input_tokens_seen": 4463001600, |
| "step": 68100, |
| "train_runtime": 32841.7871, |
| "train_tokens_per_second": 135893.993 |
| }, |
| { |
| "epoch": 0.682, |
| "grad_norm": 0.5802315473556519, |
| "learning_rate": 7.011287002876296e-05, |
| "loss": 1.1808, |
| "num_input_tokens_seen": 4469555200, |
| "step": 68200, |
| "train_runtime": 32889.3297, |
| "train_tokens_per_second": 135896.816 |
| }, |
| { |
| "epoch": 0.683, |
| "grad_norm": 0.5431249141693115, |
| "learning_rate": 6.971039741549894e-05, |
| "loss": 1.1872, |
| "num_input_tokens_seen": 4476108800, |
| "step": 68300, |
| "train_runtime": 32943.0615, |
| "train_tokens_per_second": 135874.099 |
| }, |
| { |
| "epoch": 0.684, |
| "grad_norm": 0.8621413111686707, |
| "learning_rate": 6.930873331764162e-05, |
| "loss": 1.1776, |
| "num_input_tokens_seen": 4482662400, |
| "step": 68400, |
| "train_runtime": 32991.0019, |
| "train_tokens_per_second": 135875.304 |
| }, |
| { |
| "epoch": 0.685, |
| "grad_norm": 0.6102387309074402, |
| "learning_rate": 6.890788177994391e-05, |
| "loss": 1.18, |
| "num_input_tokens_seen": 4489216000, |
| "step": 68500, |
| "train_runtime": 33039.2288, |
| "train_tokens_per_second": 135875.326 |
| }, |
| { |
| "epoch": 0.686, |
| "grad_norm": 0.5266649723052979, |
| "learning_rate": 6.850784683897641e-05, |
| "loss": 1.1743, |
| "num_input_tokens_seen": 4495769600, |
| "step": 68600, |
| "train_runtime": 33086.8363, |
| "train_tokens_per_second": 135877.893 |
| }, |
| { |
| "epoch": 0.687, |
| "grad_norm": 0.5879511833190918, |
| "learning_rate": 6.810863252308653e-05, |
| "loss": 1.1803, |
| "num_input_tokens_seen": 4502323200, |
| "step": 68700, |
| "train_runtime": 33133.6328, |
| "train_tokens_per_second": 135883.778 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.5183672308921814, |
| "learning_rate": 6.771024285235792e-05, |
| "loss": 1.1834, |
| "num_input_tokens_seen": 4508876800, |
| "step": 68800, |
| "train_runtime": 33182.6281, |
| "train_tokens_per_second": 135880.642 |
| }, |
| { |
| "epoch": 0.689, |
| "grad_norm": 0.5091114640235901, |
| "learning_rate": 6.73126818385702e-05, |
| "loss": 1.1913, |
| "num_input_tokens_seen": 4515430400, |
| "step": 68900, |
| "train_runtime": 33236.4019, |
| "train_tokens_per_second": 135857.979 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.7696628570556641, |
| "learning_rate": 6.691595348515837e-05, |
| "loss": 1.1786, |
| "num_input_tokens_seen": 4521984000, |
| "step": 69000, |
| "train_runtime": 33285.7582, |
| "train_tokens_per_second": 135853.417 |
| }, |
| { |
| "epoch": 0.691, |
| "grad_norm": 0.5338857769966125, |
| "learning_rate": 6.65200617871726e-05, |
| "loss": 1.1832, |
| "num_input_tokens_seen": 4528537600, |
| "step": 69100, |
| "train_runtime": 33332.8826, |
| "train_tokens_per_second": 135857.965 |
| }, |
| { |
| "epoch": 0.692, |
| "grad_norm": 0.7705228328704834, |
| "learning_rate": 6.612501073123775e-05, |
| "loss": 1.1762, |
| "num_input_tokens_seen": 4535091200, |
| "step": 69200, |
| "train_runtime": 33380.8611, |
| "train_tokens_per_second": 135859.024 |
| }, |
| { |
| "epoch": 0.693, |
| "grad_norm": 0.5423911213874817, |
| "learning_rate": 6.573080429551368e-05, |
| "loss": 1.19, |
| "num_input_tokens_seen": 4541644800, |
| "step": 69300, |
| "train_runtime": 33429.7481, |
| "train_tokens_per_second": 135856.387 |
| }, |
| { |
| "epoch": 0.694, |
| "grad_norm": 0.5332856774330139, |
| "learning_rate": 6.533744644965482e-05, |
| "loss": 1.1753, |
| "num_input_tokens_seen": 4548198400, |
| "step": 69400, |
| "train_runtime": 33476.6955, |
| "train_tokens_per_second": 135861.629 |
| }, |
| { |
| "epoch": 0.695, |
| "grad_norm": 0.5862846970558167, |
| "learning_rate": 6.494494115477023e-05, |
| "loss": 1.1799, |
| "num_input_tokens_seen": 4554752000, |
| "step": 69500, |
| "train_runtime": 33523.7618, |
| "train_tokens_per_second": 135866.375 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.658592164516449, |
| "learning_rate": 6.455329236338394e-05, |
| "loss": 1.1846, |
| "num_input_tokens_seen": 4561305600, |
| "step": 69600, |
| "train_runtime": 33571.8888, |
| "train_tokens_per_second": 135866.815 |
| }, |
| { |
| "epoch": 0.697, |
| "grad_norm": 0.5558256506919861, |
| "learning_rate": 6.416250401939496e-05, |
| "loss": 1.1873, |
| "num_input_tokens_seen": 4567859200, |
| "step": 69700, |
| "train_runtime": 33620.7189, |
| "train_tokens_per_second": 135864.412 |
| }, |
| { |
| "epoch": 0.698, |
| "grad_norm": 0.5283026099205017, |
| "learning_rate": 6.377258005803746e-05, |
| "loss": 1.1743, |
| "num_input_tokens_seen": 4574412800, |
| "step": 69800, |
| "train_runtime": 33674.4741, |
| "train_tokens_per_second": 135842.145 |
| }, |
| { |
| "epoch": 0.699, |
| "grad_norm": 0.802412211894989, |
| "learning_rate": 6.338352440584149e-05, |
| "loss": 1.1782, |
| "num_input_tokens_seen": 4580966400, |
| "step": 69900, |
| "train_runtime": 33722.7187, |
| "train_tokens_per_second": 135842.144 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.5585867762565613, |
| "learning_rate": 6.299534098059318e-05, |
| "loss": 1.1809, |
| "num_input_tokens_seen": 4587520000, |
| "step": 70000, |
| "train_runtime": 33770.2671, |
| "train_tokens_per_second": 135844.943 |
| }, |
| { |
| "epoch": 0.701, |
| "grad_norm": 0.6285941004753113, |
| "learning_rate": 6.260803369129522e-05, |
| "loss": 1.1807, |
| "num_input_tokens_seen": 4594073600, |
| "step": 70100, |
| "train_runtime": 33818.7011, |
| "train_tokens_per_second": 135844.176 |
| }, |
| { |
| "epoch": 0.702, |
| "grad_norm": 0.9580085277557373, |
| "learning_rate": 6.222160643812774e-05, |
| "loss": 1.1802, |
| "num_input_tokens_seen": 4600627200, |
| "step": 70200, |
| "train_runtime": 33866.618, |
| "train_tokens_per_second": 135845.487 |
| }, |
| { |
| "epoch": 0.703, |
| "grad_norm": 0.6520081162452698, |
| "learning_rate": 6.183606311240901e-05, |
| "loss": 1.1879, |
| "num_input_tokens_seen": 4607180800, |
| "step": 70300, |
| "train_runtime": 33915.2388, |
| "train_tokens_per_second": 135843.973 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.520710289478302, |
| "learning_rate": 6.145140759655585e-05, |
| "loss": 1.179, |
| "num_input_tokens_seen": 4613734400, |
| "step": 70400, |
| "train_runtime": 33968.6026, |
| "train_tokens_per_second": 135823.497 |
| }, |
| { |
| "epoch": 0.705, |
| "grad_norm": 0.5945906639099121, |
| "learning_rate": 6.10676437640451e-05, |
| "loss": 1.192, |
| "num_input_tokens_seen": 4620288000, |
| "step": 70500, |
| "train_runtime": 34016.7254, |
| "train_tokens_per_second": 135824.008 |
| }, |
| { |
| "epoch": 0.706, |
| "grad_norm": 0.5285692811012268, |
| "learning_rate": 6.068477547937436e-05, |
| "loss": 1.1855, |
| "num_input_tokens_seen": 4626841600, |
| "step": 70600, |
| "train_runtime": 34064.6033, |
| "train_tokens_per_second": 135825.495 |
| }, |
| { |
| "epoch": 0.707, |
| "grad_norm": 0.6492000222206116, |
| "learning_rate": 6.030280659802294e-05, |
| "loss": 1.192, |
| "num_input_tokens_seen": 4633395200, |
| "step": 70700, |
| "train_runtime": 34111.1694, |
| "train_tokens_per_second": 135832.2 |
| }, |
| { |
| "epoch": 0.708, |
| "grad_norm": 0.5521112084388733, |
| "learning_rate": 5.9921740966413204e-05, |
| "loss": 1.1781, |
| "num_input_tokens_seen": 4639948800, |
| "step": 70800, |
| "train_runtime": 34162.8893, |
| "train_tokens_per_second": 135818.395 |
| }, |
| { |
| "epoch": 0.709, |
| "grad_norm": 0.9012600183486938, |
| "learning_rate": 5.954158242187197e-05, |
| "loss": 1.1748, |
| "num_input_tokens_seen": 4646502400, |
| "step": 70900, |
| "train_runtime": 34211.5739, |
| "train_tokens_per_second": 135816.68 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.4976861774921417, |
| "learning_rate": 5.91623347925914e-05, |
| "loss": 1.1902, |
| "num_input_tokens_seen": 4653056000, |
| "step": 71000, |
| "train_runtime": 34258.4131, |
| "train_tokens_per_second": 135822.287 |
| }, |
| { |
| "epoch": 0.711, |
| "grad_norm": 0.5690837502479553, |
| "learning_rate": 5.8784001897590996e-05, |
| "loss": 1.1767, |
| "num_input_tokens_seen": 4659609600, |
| "step": 71100, |
| "train_runtime": 34307.7023, |
| "train_tokens_per_second": 135818.177 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 0.5648302435874939, |
| "learning_rate": 5.840658754667877e-05, |
| "loss": 1.182, |
| "num_input_tokens_seen": 4666163200, |
| "step": 71200, |
| "train_runtime": 34355.8058, |
| "train_tokens_per_second": 135818.768 |
| }, |
| { |
| "epoch": 0.713, |
| "grad_norm": 0.5309351086616516, |
| "learning_rate": 5.8030095540413144e-05, |
| "loss": 1.1755, |
| "num_input_tokens_seen": 4672716800, |
| "step": 71300, |
| "train_runtime": 34402.7961, |
| "train_tokens_per_second": 135823.751 |
| }, |
| { |
| "epoch": 0.714, |
| "grad_norm": 1.0066486597061157, |
| "learning_rate": 5.7654529670064326e-05, |
| "loss": 1.2073, |
| "num_input_tokens_seen": 4679270400, |
| "step": 71400, |
| "train_runtime": 34458.8447, |
| "train_tokens_per_second": 135793.015 |
| }, |
| { |
| "epoch": 0.715, |
| "grad_norm": 0.625823974609375, |
| "learning_rate": 5.7279893717576485e-05, |
| "loss": 1.2012, |
| "num_input_tokens_seen": 4685824000, |
| "step": 71500, |
| "train_runtime": 34506.5957, |
| "train_tokens_per_second": 135795.024 |
| }, |
| { |
| "epoch": 0.716, |
| "grad_norm": 0.512055516242981, |
| "learning_rate": 5.690619145552958e-05, |
| "loss": 1.1702, |
| "num_input_tokens_seen": 4692377600, |
| "step": 71600, |
| "train_runtime": 34554.5393, |
| "train_tokens_per_second": 135796.271 |
| }, |
| { |
| "epoch": 0.717, |
| "grad_norm": 0.749454915523529, |
| "learning_rate": 5.6533426647101135e-05, |
| "loss": 1.1812, |
| "num_input_tokens_seen": 4698931200, |
| "step": 71700, |
| "train_runtime": 34601.4153, |
| "train_tokens_per_second": 135801.705 |
| }, |
| { |
| "epoch": 0.718, |
| "grad_norm": 0.5417782068252563, |
| "learning_rate": 5.6161603046028674e-05, |
| "loss": 1.1681, |
| "num_input_tokens_seen": 4705484800, |
| "step": 71800, |
| "train_runtime": 34650.0822, |
| "train_tokens_per_second": 135800.105 |
| }, |
| { |
| "epoch": 0.719, |
| "grad_norm": 0.7127480506896973, |
| "learning_rate": 5.579072439657179e-05, |
| "loss": 1.1946, |
| "num_input_tokens_seen": 4712038400, |
| "step": 71900, |
| "train_runtime": 34698.539, |
| "train_tokens_per_second": 135799.331 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.5434790253639221, |
| "learning_rate": 5.542079443347431e-05, |
| "loss": 1.1761, |
| "num_input_tokens_seen": 4718592000, |
| "step": 72000, |
| "train_runtime": 34745.7766, |
| "train_tokens_per_second": 135803.325 |
| }, |
| { |
| "epoch": 0.721, |
| "grad_norm": 0.5872786045074463, |
| "learning_rate": 5.505181688192682e-05, |
| "loss": 1.1758, |
| "num_input_tokens_seen": 4725145600, |
| "step": 72100, |
| "train_runtime": 34797.942, |
| "train_tokens_per_second": 135788.076 |
| }, |
| { |
| "epoch": 0.722, |
| "grad_norm": 0.5440493822097778, |
| "learning_rate": 5.468379545752925e-05, |
| "loss": 1.2086, |
| "num_input_tokens_seen": 4731699200, |
| "step": 72200, |
| "train_runtime": 34846.6082, |
| "train_tokens_per_second": 135786.507 |
| }, |
| { |
| "epoch": 0.723, |
| "grad_norm": 0.5699992775917053, |
| "learning_rate": 5.4316733866253166e-05, |
| "loss": 1.1705, |
| "num_input_tokens_seen": 4738252800, |
| "step": 72300, |
| "train_runtime": 34894.2941, |
| "train_tokens_per_second": 135788.756 |
| }, |
| { |
| "epoch": 0.724, |
| "grad_norm": 0.7067492604255676, |
| "learning_rate": 5.3950635804404754e-05, |
| "loss": 1.1788, |
| "num_input_tokens_seen": 4744806400, |
| "step": 72400, |
| "train_runtime": 34943.1279, |
| "train_tokens_per_second": 135786.539 |
| }, |
| { |
| "epoch": 0.725, |
| "grad_norm": 0.4926595389842987, |
| "learning_rate": 5.358550495858751e-05, |
| "loss": 1.1712, |
| "num_input_tokens_seen": 4751360000, |
| "step": 72500, |
| "train_runtime": 34988.8033, |
| "train_tokens_per_second": 135796.585 |
| }, |
| { |
| "epoch": 0.726, |
| "grad_norm": 0.6217764616012573, |
| "learning_rate": 5.322134500566487e-05, |
| "loss": 1.199, |
| "num_input_tokens_seen": 4757913600, |
| "step": 72600, |
| "train_runtime": 35043.098, |
| "train_tokens_per_second": 135773.201 |
| }, |
| { |
| "epoch": 0.727, |
| "grad_norm": 0.5704054236412048, |
| "learning_rate": 5.285815961272359e-05, |
| "loss": 1.1782, |
| "num_input_tokens_seen": 4764467200, |
| "step": 72700, |
| "train_runtime": 35090.0359, |
| "train_tokens_per_second": 135778.351 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 0.6081520915031433, |
| "learning_rate": 5.249595243703658e-05, |
| "loss": 1.1679, |
| "num_input_tokens_seen": 4771020800, |
| "step": 72800, |
| "train_runtime": 35136.6254, |
| "train_tokens_per_second": 135784.833 |
| }, |
| { |
| "epoch": 0.729, |
| "grad_norm": 0.6235555410385132, |
| "learning_rate": 5.213472712602598e-05, |
| "loss": 1.1707, |
| "num_input_tokens_seen": 4777574400, |
| "step": 72900, |
| "train_runtime": 35185.4188, |
| "train_tokens_per_second": 135782.792 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.5777461528778076, |
| "learning_rate": 5.17744873172267e-05, |
| "loss": 1.1816, |
| "num_input_tokens_seen": 4784128000, |
| "step": 73000, |
| "train_runtime": 35238.2318, |
| "train_tokens_per_second": 135765.268 |
| }, |
| { |
| "epoch": 0.731, |
| "grad_norm": 0.569218635559082, |
| "learning_rate": 5.1415236638249694e-05, |
| "loss": 1.1757, |
| "num_input_tokens_seen": 4790681600, |
| "step": 73100, |
| "train_runtime": 35286.0257, |
| "train_tokens_per_second": 135767.106 |
| }, |
| { |
| "epoch": 0.732, |
| "grad_norm": 1.2679173946380615, |
| "learning_rate": 5.105697870674519e-05, |
| "loss": 1.1686, |
| "num_input_tokens_seen": 4797235200, |
| "step": 73200, |
| "train_runtime": 35333.5517, |
| "train_tokens_per_second": 135769.969 |
| }, |
| { |
| "epoch": 0.733, |
| "grad_norm": 0.5663115382194519, |
| "learning_rate": 5.069971713036664e-05, |
| "loss": 1.1699, |
| "num_input_tokens_seen": 4803788800, |
| "step": 73300, |
| "train_runtime": 35380.3642, |
| "train_tokens_per_second": 135775.561 |
| }, |
| { |
| "epoch": 0.734, |
| "grad_norm": 0.5404617190361023, |
| "learning_rate": 5.034345550673415e-05, |
| "loss": 1.1916, |
| "num_input_tokens_seen": 4810342400, |
| "step": 73400, |
| "train_runtime": 35434.8234, |
| "train_tokens_per_second": 135751.838 |
| }, |
| { |
| "epoch": 0.735, |
| "grad_norm": 0.7994534373283386, |
| "learning_rate": 4.998819742339835e-05, |
| "loss": 1.1842, |
| "num_input_tokens_seen": 4816896000, |
| "step": 73500, |
| "train_runtime": 35482.3263, |
| "train_tokens_per_second": 135754.797 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.6482565402984619, |
| "learning_rate": 4.963394645780411e-05, |
| "loss": 1.1789, |
| "num_input_tokens_seen": 4823449600, |
| "step": 73600, |
| "train_runtime": 35530.782, |
| "train_tokens_per_second": 135754.107 |
| }, |
| { |
| "epoch": 0.737, |
| "grad_norm": 0.5401994585990906, |
| "learning_rate": 4.928070617725482e-05, |
| "loss": 1.1832, |
| "num_input_tokens_seen": 4830003200, |
| "step": 73700, |
| "train_runtime": 35578.1016, |
| "train_tokens_per_second": 135757.755 |
| }, |
| { |
| "epoch": 0.738, |
| "grad_norm": 0.5170857906341553, |
| "learning_rate": 4.892848013887613e-05, |
| "loss": 1.1804, |
| "num_input_tokens_seen": 4836556800, |
| "step": 73800, |
| "train_runtime": 35625.1017, |
| "train_tokens_per_second": 135762.61 |
| }, |
| { |
| "epoch": 0.739, |
| "grad_norm": 0.5744811296463013, |
| "learning_rate": 4.857727188958031e-05, |
| "loss": 1.181, |
| "num_input_tokens_seen": 4843110400, |
| "step": 73900, |
| "train_runtime": 35672.7413, |
| "train_tokens_per_second": 135765.019 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.6613340377807617, |
| "learning_rate": 4.822708496603052e-05, |
| "loss": 1.1879, |
| "num_input_tokens_seen": 4849664000, |
| "step": 74000, |
| "train_runtime": 35721.0554, |
| "train_tokens_per_second": 135764.858 |
| }, |
| { |
| "epoch": 0.741, |
| "grad_norm": 0.5571849346160889, |
| "learning_rate": 4.7877922894605304e-05, |
| "loss": 1.1781, |
| "num_input_tokens_seen": 4856217600, |
| "step": 74100, |
| "train_runtime": 35771.1997, |
| "train_tokens_per_second": 135757.75 |
| }, |
| { |
| "epoch": 0.742, |
| "grad_norm": 0.6960323452949524, |
| "learning_rate": 4.752978919136273e-05, |
| "loss": 1.1702, |
| "num_input_tokens_seen": 4862771200, |
| "step": 74200, |
| "train_runtime": 35823.4168, |
| "train_tokens_per_second": 135742.808 |
| }, |
| { |
| "epoch": 0.743, |
| "grad_norm": 0.5823075175285339, |
| "learning_rate": 4.7182687362005337e-05, |
| "loss": 1.1762, |
| "num_input_tokens_seen": 4869324800, |
| "step": 74300, |
| "train_runtime": 35872.0393, |
| "train_tokens_per_second": 135741.511 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.5310567021369934, |
| "learning_rate": 4.6836620901844794e-05, |
| "loss": 1.1737, |
| "num_input_tokens_seen": 4875878400, |
| "step": 74400, |
| "train_runtime": 35918.2124, |
| "train_tokens_per_second": 135749.473 |
| }, |
| { |
| "epoch": 0.745, |
| "grad_norm": 0.560118556022644, |
| "learning_rate": 4.64915932957664e-05, |
| "loss": 1.1746, |
| "num_input_tokens_seen": 4882432000, |
| "step": 74500, |
| "train_runtime": 35972.3831, |
| "train_tokens_per_second": 135727.232 |
| }, |
| { |
| "epoch": 0.746, |
| "grad_norm": 0.5729120969772339, |
| "learning_rate": 4.614760801819433e-05, |
| "loss": 1.1729, |
| "num_input_tokens_seen": 4888985600, |
| "step": 74600, |
| "train_runtime": 36018.4093, |
| "train_tokens_per_second": 135735.744 |
| }, |
| { |
| "epoch": 0.747, |
| "grad_norm": 0.5329717397689819, |
| "learning_rate": 4.58046685330566e-05, |
| "loss": 1.1969, |
| "num_input_tokens_seen": 4895539200, |
| "step": 74700, |
| "train_runtime": 36066.8487, |
| "train_tokens_per_second": 135735.152 |
| }, |
| { |
| "epoch": 0.748, |
| "grad_norm": 0.5714908838272095, |
| "learning_rate": 4.546277829374993e-05, |
| "loss": 1.172, |
| "num_input_tokens_seen": 4902092800, |
| "step": 74800, |
| "train_runtime": 36115.3648, |
| "train_tokens_per_second": 135734.273 |
| }, |
| { |
| "epoch": 0.749, |
| "grad_norm": 0.5672817826271057, |
| "learning_rate": 4.5121940743105246e-05, |
| "loss": 1.1813, |
| "num_input_tokens_seen": 4908646400, |
| "step": 74900, |
| "train_runtime": 36164.0493, |
| "train_tokens_per_second": 135732.765 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.5890370607376099, |
| "learning_rate": 4.478215931335295e-05, |
| "loss": 1.1667, |
| "num_input_tokens_seen": 4915200000, |
| "step": 75000, |
| "train_runtime": 36215.8524, |
| "train_tokens_per_second": 135719.572 |
| }, |
| { |
| "epoch": 0.751, |
| "grad_norm": 0.6215245127677917, |
| "learning_rate": 4.4443437426088205e-05, |
| "loss": 1.179, |
| "num_input_tokens_seen": 4921753600, |
| "step": 75100, |
| "train_runtime": 36264.1849, |
| "train_tokens_per_second": 135719.405 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 1.4719446897506714, |
| "learning_rate": 4.410577849223666e-05, |
| "loss": 1.1847, |
| "num_input_tokens_seen": 4928307200, |
| "step": 75200, |
| "train_runtime": 36312.9781, |
| "train_tokens_per_second": 135717.516 |
| }, |
| { |
| "epoch": 0.753, |
| "grad_norm": 1.3475043773651123, |
| "learning_rate": 4.376918591202006e-05, |
| "loss": 1.1745, |
| "num_input_tokens_seen": 4934860800, |
| "step": 75300, |
| "train_runtime": 36359.7761, |
| "train_tokens_per_second": 135723.08 |
| }, |
| { |
| "epoch": 0.754, |
| "grad_norm": 0.9558594822883606, |
| "learning_rate": 4.3433663074922046e-05, |
| "loss": 1.181, |
| "num_input_tokens_seen": 4941414400, |
| "step": 75400, |
| "train_runtime": 36406.8385, |
| "train_tokens_per_second": 135727.644 |
| }, |
| { |
| "epoch": 0.755, |
| "grad_norm": 0.5916360020637512, |
| "learning_rate": 4.309921335965367e-05, |
| "loss": 1.1706, |
| "num_input_tokens_seen": 4947968000, |
| "step": 75500, |
| "train_runtime": 36460.2599, |
| "train_tokens_per_second": 135708.522 |
| }, |
| { |
| "epoch": 0.756, |
| "grad_norm": 0.5985275506973267, |
| "learning_rate": 4.276584013411992e-05, |
| "loss": 1.1758, |
| "num_input_tokens_seen": 4954521600, |
| "step": 75600, |
| "train_runtime": 36507.6786, |
| "train_tokens_per_second": 135711.768 |
| }, |
| { |
| "epoch": 0.757, |
| "grad_norm": 0.5550095438957214, |
| "learning_rate": 4.243354675538555e-05, |
| "loss": 1.1705, |
| "num_input_tokens_seen": 4961075200, |
| "step": 75700, |
| "train_runtime": 36554.9962, |
| "train_tokens_per_second": 135715.38 |
| }, |
| { |
| "epoch": 0.758, |
| "grad_norm": 0.5496001243591309, |
| "learning_rate": 4.210233656964111e-05, |
| "loss": 1.1746, |
| "num_input_tokens_seen": 4967628800, |
| "step": 75800, |
| "train_runtime": 36602.3493, |
| "train_tokens_per_second": 135718.851 |
| }, |
| { |
| "epoch": 0.759, |
| "grad_norm": 0.570070743560791, |
| "learning_rate": 4.1772212912169516e-05, |
| "loss": 1.1771, |
| "num_input_tokens_seen": 4974182400, |
| "step": 75900, |
| "train_runtime": 36656.3482, |
| "train_tokens_per_second": 135697.707 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.7570028305053711, |
| "learning_rate": 4.14431791073124e-05, |
| "loss": 1.1756, |
| "num_input_tokens_seen": 4980736000, |
| "step": 76000, |
| "train_runtime": 36704.1036, |
| "train_tokens_per_second": 135699.704 |
| }, |
| { |
| "epoch": 0.761, |
| "grad_norm": 0.6243161559104919, |
| "learning_rate": 4.111523846843639e-05, |
| "loss": 1.1667, |
| "num_input_tokens_seen": 4987289600, |
| "step": 76100, |
| "train_runtime": 36753.037, |
| "train_tokens_per_second": 135697.347 |
| }, |
| { |
| "epoch": 0.762, |
| "grad_norm": 0.5531216263771057, |
| "learning_rate": 4.078839429790019e-05, |
| "loss": 1.1755, |
| "num_input_tokens_seen": 4993843200, |
| "step": 76200, |
| "train_runtime": 36800.3039, |
| "train_tokens_per_second": 135701.14 |
| }, |
| { |
| "epoch": 0.763, |
| "grad_norm": 0.5894837379455566, |
| "learning_rate": 4.046264988702097e-05, |
| "loss": 1.1778, |
| "num_input_tokens_seen": 5000396800, |
| "step": 76300, |
| "train_runtime": 36847.8696, |
| "train_tokens_per_second": 135703.824 |
| }, |
| { |
| "epoch": 0.764, |
| "grad_norm": 0.6210083365440369, |
| "learning_rate": 4.013800851604123e-05, |
| "loss": 1.1729, |
| "num_input_tokens_seen": 5006950400, |
| "step": 76400, |
| "train_runtime": 36901.2456, |
| "train_tokens_per_second": 135685.133 |
| }, |
| { |
| "epoch": 0.765, |
| "grad_norm": 0.5929700136184692, |
| "learning_rate": 3.981447345409606e-05, |
| "loss": 1.171, |
| "num_input_tokens_seen": 5013504000, |
| "step": 76500, |
| "train_runtime": 36949.2788, |
| "train_tokens_per_second": 135686.113 |
| }, |
| { |
| "epoch": 0.766, |
| "grad_norm": 0.5809143781661987, |
| "learning_rate": 3.949204795917995e-05, |
| "loss": 1.1775, |
| "num_input_tokens_seen": 5020057600, |
| "step": 76600, |
| "train_runtime": 36996.6957, |
| "train_tokens_per_second": 135689.35 |
| }, |
| { |
| "epoch": 0.767, |
| "grad_norm": 0.5398791432380676, |
| "learning_rate": 3.917073527811399e-05, |
| "loss": 1.1765, |
| "num_input_tokens_seen": 5026611200, |
| "step": 76700, |
| "train_runtime": 37044.9859, |
| "train_tokens_per_second": 135689.381 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.8559983372688293, |
| "learning_rate": 3.885053864651334e-05, |
| "loss": 1.1661, |
| "num_input_tokens_seen": 5033164800, |
| "step": 76800, |
| "train_runtime": 37092.5707, |
| "train_tokens_per_second": 135691.992 |
| }, |
| { |
| "epoch": 0.769, |
| "grad_norm": 1.0961577892303467, |
| "learning_rate": 3.8531461288754564e-05, |
| "loss": 1.1734, |
| "num_input_tokens_seen": 5039718400, |
| "step": 76900, |
| "train_runtime": 37145.642, |
| "train_tokens_per_second": 135674.554 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 0.5564078688621521, |
| "learning_rate": 3.821350641794305e-05, |
| "loss": 1.1783, |
| "num_input_tokens_seen": 5046272000, |
| "step": 77000, |
| "train_runtime": 37194.2194, |
| "train_tokens_per_second": 135673.556 |
| }, |
| { |
| "epoch": 0.771, |
| "grad_norm": 0.6036384701728821, |
| "learning_rate": 3.789667723588087e-05, |
| "loss": 1.1651, |
| "num_input_tokens_seen": 5052825600, |
| "step": 77100, |
| "train_runtime": 37242.6728, |
| "train_tokens_per_second": 135673.012 |
| }, |
| { |
| "epoch": 0.772, |
| "grad_norm": 1.4465519189834595, |
| "learning_rate": 3.758097693303431e-05, |
| "loss": 1.1783, |
| "num_input_tokens_seen": 5059379200, |
| "step": 77200, |
| "train_runtime": 37290.7014, |
| "train_tokens_per_second": 135674.015 |
| }, |
| { |
| "epoch": 0.773, |
| "grad_norm": 0.5566693544387817, |
| "learning_rate": 3.7266408688502005e-05, |
| "loss": 1.1751, |
| "num_input_tokens_seen": 5065932800, |
| "step": 77300, |
| "train_runtime": 37338.6452, |
| "train_tokens_per_second": 135675.324 |
| }, |
| { |
| "epoch": 0.774, |
| "grad_norm": 0.653806209564209, |
| "learning_rate": 3.695297566998256e-05, |
| "loss": 1.1709, |
| "num_input_tokens_seen": 5072486400, |
| "step": 77400, |
| "train_runtime": 37386.3122, |
| "train_tokens_per_second": 135677.634 |
| }, |
| { |
| "epoch": 0.775, |
| "grad_norm": 0.8704593777656555, |
| "learning_rate": 3.664068103374307e-05, |
| "loss": 1.1794, |
| "num_input_tokens_seen": 5079040000, |
| "step": 77500, |
| "train_runtime": 37436.1356, |
| "train_tokens_per_second": 135672.123 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 0.6627979874610901, |
| "learning_rate": 3.63295279245871e-05, |
| "loss": 1.175, |
| "num_input_tokens_seen": 5085593600, |
| "step": 77600, |
| "train_runtime": 37484.0969, |
| "train_tokens_per_second": 135673.366 |
| }, |
| { |
| "epoch": 0.777, |
| "grad_norm": 0.6232652068138123, |
| "learning_rate": 3.601951947582291e-05, |
| "loss": 1.1665, |
| "num_input_tokens_seen": 5092147200, |
| "step": 77700, |
| "train_runtime": 37536.8508, |
| "train_tokens_per_second": 135657.283 |
| }, |
| { |
| "epoch": 0.778, |
| "grad_norm": 0.5873488187789917, |
| "learning_rate": 3.571065880923216e-05, |
| "loss": 1.1734, |
| "num_input_tokens_seen": 5098700800, |
| "step": 77800, |
| "train_runtime": 37584.0839, |
| "train_tokens_per_second": 135661.17 |
| }, |
| { |
| "epoch": 0.779, |
| "grad_norm": 0.56858891248703, |
| "learning_rate": 3.540294903503841e-05, |
| "loss": 1.1696, |
| "num_input_tokens_seen": 5105254400, |
| "step": 77900, |
| "train_runtime": 37631.6286, |
| "train_tokens_per_second": 135663.924 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 0.5939886569976807, |
| "learning_rate": 3.5096393251875566e-05, |
| "loss": 1.1784, |
| "num_input_tokens_seen": 5111808000, |
| "step": 78000, |
| "train_runtime": 37679.4424, |
| "train_tokens_per_second": 135665.702 |
| }, |
| { |
| "epoch": 0.781, |
| "grad_norm": 0.5839298367500305, |
| "learning_rate": 3.479099454675701e-05, |
| "loss": 1.1672, |
| "num_input_tokens_seen": 5118361600, |
| "step": 78100, |
| "train_runtime": 37733.7363, |
| "train_tokens_per_second": 135644.177 |
| }, |
| { |
| "epoch": 0.782, |
| "grad_norm": 0.6057742238044739, |
| "learning_rate": 3.448675599504434e-05, |
| "loss": 1.1767, |
| "num_input_tokens_seen": 5124915200, |
| "step": 78200, |
| "train_runtime": 37781.8162, |
| "train_tokens_per_second": 135645.019 |
| }, |
| { |
| "epoch": 0.783, |
| "grad_norm": 0.9875990748405457, |
| "learning_rate": 3.418368066041633e-05, |
| "loss": 1.1619, |
| "num_input_tokens_seen": 5131468800, |
| "step": 78300, |
| "train_runtime": 37829.8727, |
| "train_tokens_per_second": 135645.944 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.5806832313537598, |
| "learning_rate": 3.388177159483826e-05, |
| "loss": 1.1747, |
| "num_input_tokens_seen": 5138022400, |
| "step": 78400, |
| "train_runtime": 37877.7351, |
| "train_tokens_per_second": 135647.561 |
| }, |
| { |
| "epoch": 0.785, |
| "grad_norm": 0.7016937136650085, |
| "learning_rate": 3.3581031838531116e-05, |
| "loss": 1.1664, |
| "num_input_tokens_seen": 5144576000, |
| "step": 78500, |
| "train_runtime": 37924.0105, |
| "train_tokens_per_second": 135654.851 |
| }, |
| { |
| "epoch": 0.786, |
| "grad_norm": 0.7171750664710999, |
| "learning_rate": 3.328146441994084e-05, |
| "loss": 1.1905, |
| "num_input_tokens_seen": 5151129600, |
| "step": 78600, |
| "train_runtime": 37971.9481, |
| "train_tokens_per_second": 135656.184 |
| }, |
| { |
| "epoch": 0.787, |
| "grad_norm": 0.5550017356872559, |
| "learning_rate": 3.2983072355708026e-05, |
| "loss": 1.1741, |
| "num_input_tokens_seen": 5157683200, |
| "step": 78700, |
| "train_runtime": 38021.3003, |
| "train_tokens_per_second": 135652.467 |
| }, |
| { |
| "epoch": 0.788, |
| "grad_norm": 0.5833317637443542, |
| "learning_rate": 3.2685858650637486e-05, |
| "loss": 1.176, |
| "num_input_tokens_seen": 5164236800, |
| "step": 78800, |
| "train_runtime": 38074.1209, |
| "train_tokens_per_second": 135636.403 |
| }, |
| { |
| "epoch": 0.789, |
| "grad_norm": 0.9918714165687561, |
| "learning_rate": 3.238982629766793e-05, |
| "loss": 1.1653, |
| "num_input_tokens_seen": 5170790400, |
| "step": 78900, |
| "train_runtime": 38121.5575, |
| "train_tokens_per_second": 135639.537 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.2304959297180176, |
| "learning_rate": 3.209497827784177e-05, |
| "loss": 1.177, |
| "num_input_tokens_seen": 5177344000, |
| "step": 79000, |
| "train_runtime": 38168.8984, |
| "train_tokens_per_second": 135643.003 |
| }, |
| { |
| "epoch": 0.791, |
| "grad_norm": 0.5920888185501099, |
| "learning_rate": 3.1801317560275394e-05, |
| "loss": 1.1717, |
| "num_input_tokens_seen": 5183897600, |
| "step": 79100, |
| "train_runtime": 38223.2691, |
| "train_tokens_per_second": 135621.513 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.5991621017456055, |
| "learning_rate": 3.150884710212895e-05, |
| "loss": 1.1933, |
| "num_input_tokens_seen": 5190451200, |
| "step": 79200, |
| "train_runtime": 38270.5225, |
| "train_tokens_per_second": 135625.303 |
| }, |
| { |
| "epoch": 0.793, |
| "grad_norm": 0.6007819175720215, |
| "learning_rate": 3.121756984857665e-05, |
| "loss": 1.1721, |
| "num_input_tokens_seen": 5197004800, |
| "step": 79300, |
| "train_runtime": 38316.5535, |
| "train_tokens_per_second": 135633.41 |
| }, |
| { |
| "epoch": 0.794, |
| "grad_norm": 0.6040635704994202, |
| "learning_rate": 3.092748873277725e-05, |
| "loss": 1.1784, |
| "num_input_tokens_seen": 5203558400, |
| "step": 79400, |
| "train_runtime": 38364.1371, |
| "train_tokens_per_second": 135636.008 |
| }, |
| { |
| "epoch": 0.795, |
| "grad_norm": 1.8925070762634277, |
| "learning_rate": 3.06386066758444e-05, |
| "loss": 1.179, |
| "num_input_tokens_seen": 5210112000, |
| "step": 79500, |
| "train_runtime": 38412.6561, |
| "train_tokens_per_second": 135635.297 |
| }, |
| { |
| "epoch": 0.796, |
| "grad_norm": 0.6026915311813354, |
| "learning_rate": 3.0350926586817127e-05, |
| "loss": 1.1706, |
| "num_input_tokens_seen": 5216665600, |
| "step": 79600, |
| "train_runtime": 38465.3514, |
| "train_tokens_per_second": 135619.861 |
| }, |
| { |
| "epoch": 0.797, |
| "grad_norm": 0.7981861233711243, |
| "learning_rate": 3.0064451362630765e-05, |
| "loss": 1.1796, |
| "num_input_tokens_seen": 5223219200, |
| "step": 79700, |
| "train_runtime": 38512.271, |
| "train_tokens_per_second": 135624.804 |
| }, |
| { |
| "epoch": 0.798, |
| "grad_norm": 1.3739973306655884, |
| "learning_rate": 2.9779183888087683e-05, |
| "loss": 1.1827, |
| "num_input_tokens_seen": 5229772800, |
| "step": 79800, |
| "train_runtime": 38560.5377, |
| "train_tokens_per_second": 135624.997 |
| }, |
| { |
| "epoch": 0.799, |
| "grad_norm": 0.7507041692733765, |
| "learning_rate": 2.9495127035828103e-05, |
| "loss": 1.164, |
| "num_input_tokens_seen": 5236326400, |
| "step": 79900, |
| "train_runtime": 38608.5419, |
| "train_tokens_per_second": 135626.111 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.5848426818847656, |
| "learning_rate": 2.921228366630144e-05, |
| "loss": 1.1746, |
| "num_input_tokens_seen": 5242880000, |
| "step": 80000, |
| "train_runtime": 38660.3487, |
| "train_tokens_per_second": 135613.883 |
| }, |
| { |
| "epoch": 0.801, |
| "grad_norm": 0.5851396322250366, |
| "learning_rate": 2.8930656627737276e-05, |
| "loss": 1.1999, |
| "num_input_tokens_seen": 5249433600, |
| "step": 80100, |
| "train_runtime": 38707.849, |
| "train_tokens_per_second": 135616.774 |
| }, |
| { |
| "epoch": 0.802, |
| "grad_norm": 0.5581755638122559, |
| "learning_rate": 2.8650248756116727e-05, |
| "loss": 1.1657, |
| "num_input_tokens_seen": 5255987200, |
| "step": 80200, |
| "train_runtime": 38755.0614, |
| "train_tokens_per_second": 135620.665 |
| }, |
| { |
| "epoch": 0.803, |
| "grad_norm": 0.8737390637397766, |
| "learning_rate": 2.8371062875143968e-05, |
| "loss": 1.168, |
| "num_input_tokens_seen": 5262540800, |
| "step": 80300, |
| "train_runtime": 38809.0814, |
| "train_tokens_per_second": 135600.757 |
| }, |
| { |
| "epoch": 0.804, |
| "grad_norm": 0.6018446683883667, |
| "learning_rate": 2.809310179621776e-05, |
| "loss": 1.1603, |
| "num_input_tokens_seen": 5269094400, |
| "step": 80400, |
| "train_runtime": 38856.5205, |
| "train_tokens_per_second": 135603.866 |
| }, |
| { |
| "epoch": 0.805, |
| "grad_norm": 0.5673835873603821, |
| "learning_rate": 2.781636831840303e-05, |
| "loss": 1.1748, |
| "num_input_tokens_seen": 5275648000, |
| "step": 80500, |
| "train_runtime": 38904.9212, |
| "train_tokens_per_second": 135603.616 |
| }, |
| { |
| "epoch": 0.806, |
| "grad_norm": 0.5929433703422546, |
| "learning_rate": 2.754086522840282e-05, |
| "loss": 1.1663, |
| "num_input_tokens_seen": 5282201600, |
| "step": 80600, |
| "train_runtime": 38952.3955, |
| "train_tokens_per_second": 135606.592 |
| }, |
| { |
| "epoch": 0.807, |
| "grad_norm": 0.555366039276123, |
| "learning_rate": 2.7266595300530204e-05, |
| "loss": 1.1665, |
| "num_input_tokens_seen": 5288755200, |
| "step": 80700, |
| "train_runtime": 39001.4372, |
| "train_tokens_per_second": 135604.11 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.5364073514938354, |
| "learning_rate": 2.6993561296680342e-05, |
| "loss": 1.1687, |
| "num_input_tokens_seen": 5295308800, |
| "step": 80800, |
| "train_runtime": 39048.23, |
| "train_tokens_per_second": 135609.445 |
| }, |
| { |
| "epoch": 0.809, |
| "grad_norm": 0.9588598608970642, |
| "learning_rate": 2.672176596630258e-05, |
| "loss": 1.1831, |
| "num_input_tokens_seen": 5301862400, |
| "step": 80900, |
| "train_runtime": 39096.7929, |
| "train_tokens_per_second": 135608.627 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.6481744050979614, |
| "learning_rate": 2.6451212046372883e-05, |
| "loss": 1.1686, |
| "num_input_tokens_seen": 5308416000, |
| "step": 81000, |
| "train_runtime": 39152.1435, |
| "train_tokens_per_second": 135584.301 |
| }, |
| { |
| "epoch": 0.811, |
| "grad_norm": 0.5828465819358826, |
| "learning_rate": 2.6181902261366256e-05, |
| "loss": 1.1662, |
| "num_input_tokens_seen": 5314969600, |
| "step": 81100, |
| "train_runtime": 39199.715, |
| "train_tokens_per_second": 135586.945 |
| }, |
| { |
| "epoch": 0.812, |
| "grad_norm": 0.5715954899787903, |
| "learning_rate": 2.5913839323229195e-05, |
| "loss": 1.1623, |
| "num_input_tokens_seen": 5321523200, |
| "step": 81200, |
| "train_runtime": 39246.528, |
| "train_tokens_per_second": 135592.203 |
| }, |
| { |
| "epoch": 0.813, |
| "grad_norm": 0.8631576299667358, |
| "learning_rate": 2.564702593135253e-05, |
| "loss": 1.1896, |
| "num_input_tokens_seen": 5328076800, |
| "step": 81300, |
| "train_runtime": 39294.7731, |
| "train_tokens_per_second": 135592.507 |
| }, |
| { |
| "epoch": 0.814, |
| "grad_norm": 0.5882650017738342, |
| "learning_rate": 2.538146477254419e-05, |
| "loss": 1.1728, |
| "num_input_tokens_seen": 5334630400, |
| "step": 81400, |
| "train_runtime": 39341.8017, |
| "train_tokens_per_second": 135597.003 |
| }, |
| { |
| "epoch": 0.815, |
| "grad_norm": 0.5567020773887634, |
| "learning_rate": 2.5117158521002033e-05, |
| "loss": 1.1669, |
| "num_input_tokens_seen": 5341184000, |
| "step": 81500, |
| "train_runtime": 39389.3033, |
| "train_tokens_per_second": 135599.86 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.7412062883377075, |
| "learning_rate": 2.4854109838287116e-05, |
| "loss": 1.1629, |
| "num_input_tokens_seen": 5347737600, |
| "step": 81600, |
| "train_runtime": 39443.4282, |
| "train_tokens_per_second": 135579.939 |
| }, |
| { |
| "epoch": 0.817, |
| "grad_norm": 0.6353700757026672, |
| "learning_rate": 2.459232137329679e-05, |
| "loss": 1.1676, |
| "num_input_tokens_seen": 5354291200, |
| "step": 81700, |
| "train_runtime": 39490.3956, |
| "train_tokens_per_second": 135584.643 |
| }, |
| { |
| "epoch": 0.818, |
| "grad_norm": 0.6541226506233215, |
| "learning_rate": 2.4331795762237894e-05, |
| "loss": 1.1669, |
| "num_input_tokens_seen": 5360844800, |
| "step": 81800, |
| "train_runtime": 39539.3049, |
| "train_tokens_per_second": 135582.677 |
| }, |
| { |
| "epoch": 0.819, |
| "grad_norm": 0.684333086013794, |
| "learning_rate": 2.4072535628600514e-05, |
| "loss": 1.1623, |
| "num_input_tokens_seen": 5367398400, |
| "step": 81900, |
| "train_runtime": 39587.3713, |
| "train_tokens_per_second": 135583.602 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.5568915605545044, |
| "learning_rate": 2.3814543583131306e-05, |
| "loss": 1.1662, |
| "num_input_tokens_seen": 5373952000, |
| "step": 82000, |
| "train_runtime": 39636.1132, |
| "train_tokens_per_second": 135582.214 |
| }, |
| { |
| "epoch": 0.821, |
| "grad_norm": 0.6357592940330505, |
| "learning_rate": 2.3557822223807287e-05, |
| "loss": 1.1617, |
| "num_input_tokens_seen": 5380505600, |
| "step": 82100, |
| "train_runtime": 39683.9299, |
| "train_tokens_per_second": 135583.991 |
| }, |
| { |
| "epoch": 0.822, |
| "grad_norm": 0.6660736203193665, |
| "learning_rate": 2.3302374135809727e-05, |
| "loss": 1.1788, |
| "num_input_tokens_seen": 5387059200, |
| "step": 82200, |
| "train_runtime": 39731.7683, |
| "train_tokens_per_second": 135585.69 |
| }, |
| { |
| "epoch": 0.823, |
| "grad_norm": 0.6093869805335999, |
| "learning_rate": 2.304820189149798e-05, |
| "loss": 1.1823, |
| "num_input_tokens_seen": 5393612800, |
| "step": 82300, |
| "train_runtime": 39780.5498, |
| "train_tokens_per_second": 135584.169 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 1.0343610048294067, |
| "learning_rate": 2.2795308050383787e-05, |
| "loss": 1.1942, |
| "num_input_tokens_seen": 5400166400, |
| "step": 82400, |
| "train_runtime": 39833.9775, |
| "train_tokens_per_second": 135566.839 |
| }, |
| { |
| "epoch": 0.825, |
| "grad_norm": 0.5363211035728455, |
| "learning_rate": 2.2543695159105248e-05, |
| "loss": 1.1659, |
| "num_input_tokens_seen": 5406720000, |
| "step": 82500, |
| "train_runtime": 39881.8503, |
| "train_tokens_per_second": 135568.434 |
| }, |
| { |
| "epoch": 0.826, |
| "grad_norm": 0.9732265472412109, |
| "learning_rate": 2.2293365751401443e-05, |
| "loss": 1.1757, |
| "num_input_tokens_seen": 5413273600, |
| "step": 82600, |
| "train_runtime": 39929.975, |
| "train_tokens_per_second": 135569.171 |
| }, |
| { |
| "epoch": 0.827, |
| "grad_norm": 0.5309200286865234, |
| "learning_rate": 2.2044322348086735e-05, |
| "loss": 1.1651, |
| "num_input_tokens_seen": 5419827200, |
| "step": 82700, |
| "train_runtime": 39978.229, |
| "train_tokens_per_second": 135569.467 |
| }, |
| { |
| "epoch": 0.828, |
| "grad_norm": 0.543769121170044, |
| "learning_rate": 2.1796567457025372e-05, |
| "loss": 1.1685, |
| "num_input_tokens_seen": 5426380800, |
| "step": 82800, |
| "train_runtime": 40026.0125, |
| "train_tokens_per_second": 135571.356 |
| }, |
| { |
| "epoch": 0.829, |
| "grad_norm": 0.5210631489753723, |
| "learning_rate": 2.15501035731064e-05, |
| "loss": 1.1778, |
| "num_input_tokens_seen": 5432934400, |
| "step": 82900, |
| "train_runtime": 40075.0654, |
| "train_tokens_per_second": 135568.947 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.3538480997085571, |
| "learning_rate": 2.1304933178218426e-05, |
| "loss": 1.1655, |
| "num_input_tokens_seen": 5439488000, |
| "step": 83000, |
| "train_runtime": 40123.2015, |
| "train_tokens_per_second": 135569.64 |
| }, |
| { |
| "epoch": 0.831, |
| "grad_norm": 1.2901802062988281, |
| "learning_rate": 2.1061058741224518e-05, |
| "loss": 1.1668, |
| "num_input_tokens_seen": 5446041600, |
| "step": 83100, |
| "train_runtime": 40170.8312, |
| "train_tokens_per_second": 135572.042 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.6960340142250061, |
| "learning_rate": 2.0818482717937596e-05, |
| "loss": 1.163, |
| "num_input_tokens_seen": 5452595200, |
| "step": 83200, |
| "train_runtime": 40225.6882, |
| "train_tokens_per_second": 135550.079 |
| }, |
| { |
| "epoch": 0.833, |
| "grad_norm": 0.537268340587616, |
| "learning_rate": 2.0577207551095552e-05, |
| "loss": 1.1689, |
| "num_input_tokens_seen": 5459148800, |
| "step": 83300, |
| "train_runtime": 40273.4785, |
| "train_tokens_per_second": 135551.956 |
| }, |
| { |
| "epoch": 0.834, |
| "grad_norm": 0.564239501953125, |
| "learning_rate": 2.0337235670336584e-05, |
| "loss": 1.1662, |
| "num_input_tokens_seen": 5465702400, |
| "step": 83400, |
| "train_runtime": 40320.9705, |
| "train_tokens_per_second": 135554.832 |
| }, |
| { |
| "epoch": 0.835, |
| "grad_norm": 0.520041823387146, |
| "learning_rate": 2.0098569492174887e-05, |
| "loss": 1.1642, |
| "num_input_tokens_seen": 5472256000, |
| "step": 83500, |
| "train_runtime": 40369.1468, |
| "train_tokens_per_second": 135555.404 |
| }, |
| { |
| "epoch": 0.836, |
| "grad_norm": 0.616112232208252, |
| "learning_rate": 1.9861211419976258e-05, |
| "loss": 1.1671, |
| "num_input_tokens_seen": 5478809600, |
| "step": 83600, |
| "train_runtime": 40416.0661, |
| "train_tokens_per_second": 135560.19 |
| }, |
| { |
| "epoch": 0.837, |
| "grad_norm": 1.3083754777908325, |
| "learning_rate": 1.962516384393377e-05, |
| "loss": 1.1778, |
| "num_input_tokens_seen": 5485363200, |
| "step": 83700, |
| "train_runtime": 40465.3405, |
| "train_tokens_per_second": 135557.075 |
| }, |
| { |
| "epoch": 0.838, |
| "grad_norm": 0.5721991062164307, |
| "learning_rate": 1.939042914104396e-05, |
| "loss": 1.179, |
| "num_input_tokens_seen": 5491916800, |
| "step": 83800, |
| "train_runtime": 40513.1256, |
| "train_tokens_per_second": 135558.951 |
| }, |
| { |
| "epoch": 0.839, |
| "grad_norm": 0.8014708161354065, |
| "learning_rate": 1.9157009675082702e-05, |
| "loss": 1.1698, |
| "num_input_tokens_seen": 5498470400, |
| "step": 83900, |
| "train_runtime": 40567.2253, |
| "train_tokens_per_second": 135539.721 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.7554424405097961, |
| "learning_rate": 1.8924907796581363e-05, |
| "loss": 1.1689, |
| "num_input_tokens_seen": 5505024000, |
| "step": 84000, |
| "train_runtime": 40615.2949, |
| "train_tokens_per_second": 135540.663 |
| }, |
| { |
| "epoch": 0.841, |
| "grad_norm": 0.6026338934898376, |
| "learning_rate": 1.869412584280329e-05, |
| "loss": 1.1727, |
| "num_input_tokens_seen": 5511577600, |
| "step": 84100, |
| "train_runtime": 40664.1179, |
| "train_tokens_per_second": 135539.091 |
| }, |
| { |
| "epoch": 0.842, |
| "grad_norm": 0.6569694876670837, |
| "learning_rate": 1.8464666137720208e-05, |
| "loss": 1.1717, |
| "num_input_tokens_seen": 5518131200, |
| "step": 84200, |
| "train_runtime": 40713.1869, |
| "train_tokens_per_second": 135536.705 |
| }, |
| { |
| "epoch": 0.843, |
| "grad_norm": 0.5886375904083252, |
| "learning_rate": 1.823653099198884e-05, |
| "loss": 1.1764, |
| "num_input_tokens_seen": 5524684800, |
| "step": 84300, |
| "train_runtime": 40759.1071, |
| "train_tokens_per_second": 135544.795 |
| }, |
| { |
| "epoch": 0.844, |
| "grad_norm": 0.6782867908477783, |
| "learning_rate": 1.800972270292749e-05, |
| "loss": 1.1637, |
| "num_input_tokens_seen": 5531238400, |
| "step": 84400, |
| "train_runtime": 40811.77, |
| "train_tokens_per_second": 135530.471 |
| }, |
| { |
| "epoch": 0.845, |
| "grad_norm": 0.6513829231262207, |
| "learning_rate": 1.778424355449317e-05, |
| "loss": 1.165, |
| "num_input_tokens_seen": 5537792000, |
| "step": 84500, |
| "train_runtime": 40858.6857, |
| "train_tokens_per_second": 135535.245 |
| }, |
| { |
| "epoch": 0.846, |
| "grad_norm": 0.6192531585693359, |
| "learning_rate": 1.756009581725841e-05, |
| "loss": 1.1589, |
| "num_input_tokens_seen": 5544345600, |
| "step": 84600, |
| "train_runtime": 40906.0609, |
| "train_tokens_per_second": 135538.487 |
| }, |
| { |
| "epoch": 0.847, |
| "grad_norm": 0.5640349388122559, |
| "learning_rate": 1.7337281748388387e-05, |
| "loss": 1.1653, |
| "num_input_tokens_seen": 5550899200, |
| "step": 84700, |
| "train_runtime": 40952.689, |
| "train_tokens_per_second": 135544.194 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.5606239438056946, |
| "learning_rate": 1.7115803591618312e-05, |
| "loss": 1.1734, |
| "num_input_tokens_seen": 5557452800, |
| "step": 84800, |
| "train_runtime": 41006.8402, |
| "train_tokens_per_second": 135525.019 |
| }, |
| { |
| "epoch": 0.849, |
| "grad_norm": 0.5700273513793945, |
| "learning_rate": 1.6895663577230816e-05, |
| "loss": 1.1755, |
| "num_input_tokens_seen": 5564006400, |
| "step": 84900, |
| "train_runtime": 41054.6511, |
| "train_tokens_per_second": 135526.822 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.7111489176750183, |
| "learning_rate": 1.667686392203333e-05, |
| "loss": 1.1673, |
| "num_input_tokens_seen": 5570560000, |
| "step": 85000, |
| "train_runtime": 41102.7763, |
| "train_tokens_per_second": 135527.585 |
| }, |
| { |
| "epoch": 0.851, |
| "grad_norm": 0.5908454060554504, |
| "learning_rate": 1.6459406829335996e-05, |
| "loss": 1.1767, |
| "num_input_tokens_seen": 5577113600, |
| "step": 85100, |
| "train_runtime": 41150.3215, |
| "train_tokens_per_second": 135530.256 |
| }, |
| { |
| "epoch": 0.852, |
| "grad_norm": 0.6215232610702515, |
| "learning_rate": 1.624329448892932e-05, |
| "loss": 1.171, |
| "num_input_tokens_seen": 5583667200, |
| "step": 85200, |
| "train_runtime": 41205.5284, |
| "train_tokens_per_second": 135507.72 |
| }, |
| { |
| "epoch": 0.853, |
| "grad_norm": 0.6203814744949341, |
| "learning_rate": 1.6028529077062163e-05, |
| "loss": 1.1591, |
| "num_input_tokens_seen": 5590220800, |
| "step": 85300, |
| "train_runtime": 41253.0291, |
| "train_tokens_per_second": 135510.553 |
| }, |
| { |
| "epoch": 0.854, |
| "grad_norm": 0.5267207026481628, |
| "learning_rate": 1.5815112756419805e-05, |
| "loss": 1.185, |
| "num_input_tokens_seen": 5596774400, |
| "step": 85400, |
| "train_runtime": 41301.2794, |
| "train_tokens_per_second": 135510.921 |
| }, |
| { |
| "epoch": 0.855, |
| "grad_norm": 0.5815737843513489, |
| "learning_rate": 1.5603047676102313e-05, |
| "loss": 1.173, |
| "num_input_tokens_seen": 5603328000, |
| "step": 85500, |
| "train_runtime": 41349.0127, |
| "train_tokens_per_second": 135512.982 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.6342357397079468, |
| "learning_rate": 1.5392335971602638e-05, |
| "loss": 1.1568, |
| "num_input_tokens_seen": 5609881600, |
| "step": 85600, |
| "train_runtime": 41397.1556, |
| "train_tokens_per_second": 135513.697 |
| }, |
| { |
| "epoch": 0.857, |
| "grad_norm": 0.6623713970184326, |
| "learning_rate": 1.5182979764785258e-05, |
| "loss": 1.1649, |
| "num_input_tokens_seen": 5616435200, |
| "step": 85700, |
| "train_runtime": 41450.3243, |
| "train_tokens_per_second": 135497.98 |
| }, |
| { |
| "epoch": 0.858, |
| "grad_norm": 0.6217081546783447, |
| "learning_rate": 1.4974981163864896e-05, |
| "loss": 1.1772, |
| "num_input_tokens_seen": 5622988800, |
| "step": 85800, |
| "train_runtime": 41497.5379, |
| "train_tokens_per_second": 135501.745 |
| }, |
| { |
| "epoch": 0.859, |
| "grad_norm": 0.6180946826934814, |
| "learning_rate": 1.4768342263385192e-05, |
| "loss": 1.1601, |
| "num_input_tokens_seen": 5629542400, |
| "step": 85900, |
| "train_runtime": 41546.6611, |
| "train_tokens_per_second": 135499.274 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 0.5609486103057861, |
| "learning_rate": 1.4563065144197517e-05, |
| "loss": 1.1866, |
| "num_input_tokens_seen": 5636096000, |
| "step": 86000, |
| "train_runtime": 41594.2678, |
| "train_tokens_per_second": 135501.748 |
| }, |
| { |
| "epoch": 0.861, |
| "grad_norm": 0.5352550148963928, |
| "learning_rate": 1.4359151873440216e-05, |
| "loss": 1.1732, |
| "num_input_tokens_seen": 5642649600, |
| "step": 86100, |
| "train_runtime": 41640.8053, |
| "train_tokens_per_second": 135507.696 |
| }, |
| { |
| "epoch": 0.862, |
| "grad_norm": 0.5788577198982239, |
| "learning_rate": 1.415660450451767e-05, |
| "loss": 1.1785, |
| "num_input_tokens_seen": 5649203200, |
| "step": 86200, |
| "train_runtime": 41695.0254, |
| "train_tokens_per_second": 135488.662 |
| }, |
| { |
| "epoch": 0.863, |
| "grad_norm": 0.5672028064727783, |
| "learning_rate": 1.3955425077079595e-05, |
| "loss": 1.1692, |
| "num_input_tokens_seen": 5655756800, |
| "step": 86300, |
| "train_runtime": 41742.7936, |
| "train_tokens_per_second": 135490.615 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.577563464641571, |
| "learning_rate": 1.375561561700061e-05, |
| "loss": 1.1662, |
| "num_input_tokens_seen": 5662310400, |
| "step": 86400, |
| "train_runtime": 41789.652, |
| "train_tokens_per_second": 135495.515 |
| }, |
| { |
| "epoch": 0.865, |
| "grad_norm": 0.544994592666626, |
| "learning_rate": 1.3557178136359798e-05, |
| "loss": 1.1665, |
| "num_input_tokens_seen": 5668864000, |
| "step": 86500, |
| "train_runtime": 41842.8709, |
| "train_tokens_per_second": 135479.805 |
| }, |
| { |
| "epoch": 0.866, |
| "grad_norm": 0.5978608727455139, |
| "learning_rate": 1.3360114633420333e-05, |
| "loss": 1.1644, |
| "num_input_tokens_seen": 5675417600, |
| "step": 86600, |
| "train_runtime": 41891.5143, |
| "train_tokens_per_second": 135478.932 |
| }, |
| { |
| "epoch": 0.867, |
| "grad_norm": 0.6005887985229492, |
| "learning_rate": 1.3164427092609503e-05, |
| "loss": 1.1742, |
| "num_input_tokens_seen": 5681971200, |
| "step": 86700, |
| "train_runtime": 41939.4895, |
| "train_tokens_per_second": 135480.218 |
| }, |
| { |
| "epoch": 0.868, |
| "grad_norm": 0.5312247276306152, |
| "learning_rate": 1.2970117484498732e-05, |
| "loss": 1.1575, |
| "num_input_tokens_seen": 5688524800, |
| "step": 86800, |
| "train_runtime": 41987.1811, |
| "train_tokens_per_second": 135482.418 |
| }, |
| { |
| "epoch": 0.869, |
| "grad_norm": 0.9317598342895508, |
| "learning_rate": 1.2777187765783558e-05, |
| "loss": 1.1668, |
| "num_input_tokens_seen": 5695078400, |
| "step": 86900, |
| "train_runtime": 42034.5611, |
| "train_tokens_per_second": 135485.616 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.5501394271850586, |
| "learning_rate": 1.2585639879264103e-05, |
| "loss": 1.1741, |
| "num_input_tokens_seen": 5701632000, |
| "step": 87000, |
| "train_runtime": 42082.1201, |
| "train_tokens_per_second": 135488.231 |
| }, |
| { |
| "epoch": 0.871, |
| "grad_norm": 0.6144236326217651, |
| "learning_rate": 1.2395475753825518e-05, |
| "loss": 1.1665, |
| "num_input_tokens_seen": 5708185600, |
| "step": 87100, |
| "train_runtime": 42136.7086, |
| "train_tokens_per_second": 135468.236 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.6324082612991333, |
| "learning_rate": 1.2206697304418367e-05, |
| "loss": 1.1523, |
| "num_input_tokens_seen": 5714739200, |
| "step": 87200, |
| "train_runtime": 42184.2095, |
| "train_tokens_per_second": 135471.051 |
| }, |
| { |
| "epoch": 0.873, |
| "grad_norm": 0.6486518979072571, |
| "learning_rate": 1.2019306432039594e-05, |
| "loss": 1.1872, |
| "num_input_tokens_seen": 5721292800, |
| "step": 87300, |
| "train_runtime": 42230.9222, |
| "train_tokens_per_second": 135476.388 |
| }, |
| { |
| "epoch": 0.874, |
| "grad_norm": 0.5755148530006409, |
| "learning_rate": 1.1833305023713153e-05, |
| "loss": 1.1963, |
| "num_input_tokens_seen": 5727846400, |
| "step": 87400, |
| "train_runtime": 42278.9901, |
| "train_tokens_per_second": 135477.37 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.6408706307411194, |
| "learning_rate": 1.1648694952471205e-05, |
| "loss": 1.163, |
| "num_input_tokens_seen": 5734400000, |
| "step": 87500, |
| "train_runtime": 42326.8376, |
| "train_tokens_per_second": 135479.056 |
| }, |
| { |
| "epoch": 0.876, |
| "grad_norm": 0.6233325600624084, |
| "learning_rate": 1.1465478077335088e-05, |
| "loss": 1.1591, |
| "num_input_tokens_seen": 5740953600, |
| "step": 87600, |
| "train_runtime": 42379.4952, |
| "train_tokens_per_second": 135465.36 |
| }, |
| { |
| "epoch": 0.877, |
| "grad_norm": 0.8282228708267212, |
| "learning_rate": 1.1283656243296695e-05, |
| "loss": 1.1799, |
| "num_input_tokens_seen": 5747507200, |
| "step": 87700, |
| "train_runtime": 42427.8149, |
| "train_tokens_per_second": 135465.548 |
| }, |
| { |
| "epoch": 0.878, |
| "grad_norm": 0.7755045294761658, |
| "learning_rate": 1.1103231281299923e-05, |
| "loss": 1.1565, |
| "num_input_tokens_seen": 5754060800, |
| "step": 87800, |
| "train_runtime": 42474.6192, |
| "train_tokens_per_second": 135470.568 |
| }, |
| { |
| "epoch": 0.879, |
| "grad_norm": 0.6230588555335999, |
| "learning_rate": 1.0924205008222086e-05, |
| "loss": 1.1673, |
| "num_input_tokens_seen": 5760614400, |
| "step": 87900, |
| "train_runtime": 42522.6205, |
| "train_tokens_per_second": 135471.764 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.5966441035270691, |
| "learning_rate": 1.0746579226855768e-05, |
| "loss": 1.1628, |
| "num_input_tokens_seen": 5767168000, |
| "step": 88000, |
| "train_runtime": 42576.1454, |
| "train_tokens_per_second": 135455.381 |
| }, |
| { |
| "epoch": 0.881, |
| "grad_norm": 0.6604552865028381, |
| "learning_rate": 1.0570355725890678e-05, |
| "loss": 1.1769, |
| "num_input_tokens_seen": 5773721600, |
| "step": 88100, |
| "train_runtime": 42624.8502, |
| "train_tokens_per_second": 135454.355 |
| }, |
| { |
| "epoch": 0.882, |
| "grad_norm": 0.5727500319480896, |
| "learning_rate": 1.0395536279895428e-05, |
| "loss": 1.1571, |
| "num_input_tokens_seen": 5780275200, |
| "step": 88200, |
| "train_runtime": 42673.6883, |
| "train_tokens_per_second": 135452.909 |
| }, |
| { |
| "epoch": 0.883, |
| "grad_norm": 0.5748215317726135, |
| "learning_rate": 1.0222122649299952e-05, |
| "loss": 1.1666, |
| "num_input_tokens_seen": 5786828800, |
| "step": 88300, |
| "train_runtime": 42720.0242, |
| "train_tokens_per_second": 135459.399 |
| }, |
| { |
| "epoch": 0.884, |
| "grad_norm": 0.6671021580696106, |
| "learning_rate": 1.0050116580377593e-05, |
| "loss": 1.1887, |
| "num_input_tokens_seen": 5793382400, |
| "step": 88400, |
| "train_runtime": 42766.9841, |
| "train_tokens_per_second": 135463.899 |
| }, |
| { |
| "epoch": 0.885, |
| "grad_norm": 0.7352688908576965, |
| "learning_rate": 9.879519805227515e-06, |
| "loss": 1.173, |
| "num_input_tokens_seen": 5799936000, |
| "step": 88500, |
| "train_runtime": 42820.4689, |
| "train_tokens_per_second": 135447.746 |
| }, |
| { |
| "epoch": 0.886, |
| "grad_norm": 0.5779001712799072, |
| "learning_rate": 9.710334041757351e-06, |
| "loss": 1.1612, |
| "num_input_tokens_seen": 5806489600, |
| "step": 88600, |
| "train_runtime": 42866.8877, |
| "train_tokens_per_second": 135453.958 |
| }, |
| { |
| "epoch": 0.887, |
| "grad_norm": 0.7246189713478088, |
| "learning_rate": 9.542560993665932e-06, |
| "loss": 1.1926, |
| "num_input_tokens_seen": 5813043200, |
| "step": 88700, |
| "train_runtime": 42915.9912, |
| "train_tokens_per_second": 135451.682 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.5459685921669006, |
| "learning_rate": 9.376202350425888e-06, |
| "loss": 1.1698, |
| "num_input_tokens_seen": 5819596800, |
| "step": 88800, |
| "train_runtime": 42964.4051, |
| "train_tokens_per_second": 135451.586 |
| }, |
| { |
| "epoch": 0.889, |
| "grad_norm": 0.5574699640274048, |
| "learning_rate": 9.211259787266972e-06, |
| "loss": 1.1627, |
| "num_input_tokens_seen": 5826150400, |
| "step": 88900, |
| "train_runtime": 43011.9797, |
| "train_tokens_per_second": 135454.133 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 0.5637386441230774, |
| "learning_rate": 9.047734965158966e-06, |
| "loss": 1.1659, |
| "num_input_tokens_seen": 5832704000, |
| "step": 89000, |
| "train_runtime": 43065.5789, |
| "train_tokens_per_second": 135437.724 |
| }, |
| { |
| "epoch": 0.891, |
| "grad_norm": 0.5420241951942444, |
| "learning_rate": 8.885629530794997e-06, |
| "loss": 1.1693, |
| "num_input_tokens_seen": 5839257600, |
| "step": 89100, |
| "train_runtime": 43113.8932, |
| "train_tokens_per_second": 135437.957 |
| }, |
| { |
| "epoch": 0.892, |
| "grad_norm": 0.5701260566711426, |
| "learning_rate": 8.724945116574983e-06, |
| "loss": 1.1592, |
| "num_input_tokens_seen": 5845811200, |
| "step": 89200, |
| "train_runtime": 43161.415, |
| "train_tokens_per_second": 135440.675 |
| }, |
| { |
| "epoch": 0.893, |
| "grad_norm": 0.5882892608642578, |
| "learning_rate": 8.565683340589185e-06, |
| "loss": 1.1601, |
| "num_input_tokens_seen": 5852364800, |
| "step": 89300, |
| "train_runtime": 43209.5307, |
| "train_tokens_per_second": 135441.527 |
| }, |
| { |
| "epoch": 0.894, |
| "grad_norm": 0.5708109736442566, |
| "learning_rate": 8.40784580660196e-06, |
| "loss": 1.1684, |
| "num_input_tokens_seen": 5858918400, |
| "step": 89400, |
| "train_runtime": 43257.3597, |
| "train_tokens_per_second": 135443.273 |
| }, |
| { |
| "epoch": 0.895, |
| "grad_norm": 0.5796698927879333, |
| "learning_rate": 8.251434104035465e-06, |
| "loss": 1.1753, |
| "num_input_tokens_seen": 5865472000, |
| "step": 89500, |
| "train_runtime": 43305.3116, |
| "train_tokens_per_second": 135444.632 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.9602819681167603, |
| "learning_rate": 8.09644980795383e-06, |
| "loss": 1.1672, |
| "num_input_tokens_seen": 5872025600, |
| "step": 89600, |
| "train_runtime": 43360.788, |
| "train_tokens_per_second": 135422.484 |
| }, |
| { |
| "epoch": 0.897, |
| "grad_norm": 0.6962534189224243, |
| "learning_rate": 7.942894479047252e-06, |
| "loss": 1.1622, |
| "num_input_tokens_seen": 5878579200, |
| "step": 89700, |
| "train_runtime": 43407.8503, |
| "train_tokens_per_second": 135426.637 |
| }, |
| { |
| "epoch": 0.898, |
| "grad_norm": 0.6292552351951599, |
| "learning_rate": 7.790769663616098e-06, |
| "loss": 1.1632, |
| "num_input_tokens_seen": 5885132800, |
| "step": 89800, |
| "train_runtime": 43455.9389, |
| "train_tokens_per_second": 135427.584 |
| }, |
| { |
| "epoch": 0.899, |
| "grad_norm": 0.5883670449256897, |
| "learning_rate": 7.64007689355563e-06, |
| "loss": 1.1632, |
| "num_input_tokens_seen": 5891686400, |
| "step": 89900, |
| "train_runtime": 43504.2315, |
| "train_tokens_per_second": 135427.893 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.8059070706367493, |
| "learning_rate": 7.490817686340361e-06, |
| "loss": 1.1728, |
| "num_input_tokens_seen": 5898240000, |
| "step": 90000, |
| "train_runtime": 43552.1457, |
| "train_tokens_per_second": 135429.378 |
| }, |
| { |
| "epoch": 0.901, |
| "grad_norm": 0.5949374437332153, |
| "learning_rate": 7.342993545008818e-06, |
| "loss": 1.1732, |
| "num_input_tokens_seen": 5904793600, |
| "step": 90100, |
| "train_runtime": 43599.6931, |
| "train_tokens_per_second": 135431.999 |
| }, |
| { |
| "epoch": 0.902, |
| "grad_norm": 0.6094557642936707, |
| "learning_rate": 7.196605958148505e-06, |
| "loss": 1.1713, |
| "num_input_tokens_seen": 5911347200, |
| "step": 90200, |
| "train_runtime": 43653.2541, |
| "train_tokens_per_second": 135415.957 |
| }, |
| { |
| "epoch": 0.903, |
| "grad_norm": 0.6275845170021057, |
| "learning_rate": 7.051656399880778e-06, |
| "loss": 1.1743, |
| "num_input_tokens_seen": 5917900800, |
| "step": 90300, |
| "train_runtime": 43702.1275, |
| "train_tokens_per_second": 135414.478 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 0.7113337516784668, |
| "learning_rate": 6.9081463298460815e-06, |
| "loss": 1.162, |
| "num_input_tokens_seen": 5924454400, |
| "step": 90400, |
| "train_runtime": 43749.6704, |
| "train_tokens_per_second": 135417.121 |
| }, |
| { |
| "epoch": 0.905, |
| "grad_norm": 0.6237180233001709, |
| "learning_rate": 6.766077193189201e-06, |
| "loss": 1.159, |
| "num_input_tokens_seen": 5931008000, |
| "step": 90500, |
| "train_runtime": 43797.6522, |
| "train_tokens_per_second": 135418.4 |
| }, |
| { |
| "epoch": 0.906, |
| "grad_norm": 0.9803968667984009, |
| "learning_rate": 6.625450420544831e-06, |
| "loss": 1.1788, |
| "num_input_tokens_seen": 5937561600, |
| "step": 90600, |
| "train_runtime": 43846.1111, |
| "train_tokens_per_second": 135418.203 |
| }, |
| { |
| "epoch": 0.907, |
| "grad_norm": 0.5648267269134521, |
| "learning_rate": 6.486267428022967e-06, |
| "loss": 1.1581, |
| "num_input_tokens_seen": 5944115200, |
| "step": 90700, |
| "train_runtime": 43893.4216, |
| "train_tokens_per_second": 135421.55 |
| }, |
| { |
| "epoch": 0.908, |
| "grad_norm": 0.610898494720459, |
| "learning_rate": 6.34852961719477e-06, |
| "loss": 1.1557, |
| "num_input_tokens_seen": 5950668800, |
| "step": 90800, |
| "train_runtime": 43947.4481, |
| "train_tokens_per_second": 135404.194 |
| }, |
| { |
| "epoch": 0.909, |
| "grad_norm": 0.732876718044281, |
| "learning_rate": 6.212238375078521e-06, |
| "loss": 1.1683, |
| "num_input_tokens_seen": 5957222400, |
| "step": 90900, |
| "train_runtime": 43996.4271, |
| "train_tokens_per_second": 135402.413 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 0.5793011784553528, |
| "learning_rate": 6.077395074125491e-06, |
| "loss": 1.1747, |
| "num_input_tokens_seen": 5963776000, |
| "step": 91000, |
| "train_runtime": 44044.5112, |
| "train_tokens_per_second": 135403.387 |
| }, |
| { |
| "epoch": 0.911, |
| "grad_norm": 0.6567527651786804, |
| "learning_rate": 5.944001072206212e-06, |
| "loss": 1.1594, |
| "num_input_tokens_seen": 5970329600, |
| "step": 91100, |
| "train_runtime": 44091.43, |
| "train_tokens_per_second": 135407.938 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.6197203397750854, |
| "learning_rate": 5.812057712596807e-06, |
| "loss": 1.1504, |
| "num_input_tokens_seen": 5976883200, |
| "step": 91200, |
| "train_runtime": 44140.2623, |
| "train_tokens_per_second": 135406.608 |
| }, |
| { |
| "epoch": 0.913, |
| "grad_norm": 0.6190736889839172, |
| "learning_rate": 5.681566323965486e-06, |
| "loss": 1.1645, |
| "num_input_tokens_seen": 5983436800, |
| "step": 91300, |
| "train_runtime": 44194.3429, |
| "train_tokens_per_second": 135389.202 |
| }, |
| { |
| "epoch": 0.914, |
| "grad_norm": 0.5632036924362183, |
| "learning_rate": 5.552528220359004e-06, |
| "loss": 1.1691, |
| "num_input_tokens_seen": 5989990400, |
| "step": 91400, |
| "train_runtime": 44242.165, |
| "train_tokens_per_second": 135390.987 |
| }, |
| { |
| "epoch": 0.915, |
| "grad_norm": 0.6650084257125854, |
| "learning_rate": 5.424944701189704e-06, |
| "loss": 1.1587, |
| "num_input_tokens_seen": 5996544000, |
| "step": 91500, |
| "train_runtime": 44290.3253, |
| "train_tokens_per_second": 135391.735 |
| }, |
| { |
| "epoch": 0.916, |
| "grad_norm": 0.6665343642234802, |
| "learning_rate": 5.298817051222182e-06, |
| "loss": 1.16, |
| "num_input_tokens_seen": 6003097600, |
| "step": 91600, |
| "train_runtime": 44344.1461, |
| "train_tokens_per_second": 135375.199 |
| }, |
| { |
| "epoch": 0.917, |
| "grad_norm": 0.9934324026107788, |
| "learning_rate": 5.174146540560442e-06, |
| "loss": 1.186, |
| "num_input_tokens_seen": 6009651200, |
| "step": 91700, |
| "train_runtime": 44386.6411, |
| "train_tokens_per_second": 135393.241 |
| }, |
| { |
| "epoch": 0.918, |
| "grad_norm": 0.587840735912323, |
| "learning_rate": 5.050934424635195e-06, |
| "loss": 1.1685, |
| "num_input_tokens_seen": 6016204800, |
| "step": 91800, |
| "train_runtime": 44440.2445, |
| "train_tokens_per_second": 135377.401 |
| }, |
| { |
| "epoch": 0.919, |
| "grad_norm": 0.6308780312538147, |
| "learning_rate": 4.9291819441910465e-06, |
| "loss": 1.1593, |
| "num_input_tokens_seen": 6022758400, |
| "step": 91900, |
| "train_runtime": 44487.4748, |
| "train_tokens_per_second": 135380.99 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.6875436305999756, |
| "learning_rate": 4.808890325274129e-06, |
| "loss": 1.1686, |
| "num_input_tokens_seen": 6029312000, |
| "step": 92000, |
| "train_runtime": 44535.4396, |
| "train_tokens_per_second": 135382.339 |
| }, |
| { |
| "epoch": 0.921, |
| "grad_norm": 0.6450539231300354, |
| "learning_rate": 4.690060779219723e-06, |
| "loss": 1.1669, |
| "num_input_tokens_seen": 6035865600, |
| "step": 92100, |
| "train_runtime": 44583.0204, |
| "train_tokens_per_second": 135384.852 |
| }, |
| { |
| "epoch": 0.922, |
| "grad_norm": 1.0118526220321655, |
| "learning_rate": 4.572694502640023e-06, |
| "loss": 1.1601, |
| "num_input_tokens_seen": 6042419200, |
| "step": 92200, |
| "train_runtime": 44632.4327, |
| "train_tokens_per_second": 135381.803 |
| }, |
| { |
| "epoch": 0.923, |
| "grad_norm": 0.5630050897598267, |
| "learning_rate": 4.456792677412141e-06, |
| "loss": 1.164, |
| "num_input_tokens_seen": 6048972800, |
| "step": 92300, |
| "train_runtime": 44685.5287, |
| "train_tokens_per_second": 135367.6 |
| }, |
| { |
| "epoch": 0.924, |
| "grad_norm": 0.5819036364555359, |
| "learning_rate": 4.342356470666153e-06, |
| "loss": 1.177, |
| "num_input_tokens_seen": 6055526400, |
| "step": 92400, |
| "train_runtime": 44733.1102, |
| "train_tokens_per_second": 135370.118 |
| }, |
| { |
| "epoch": 0.925, |
| "grad_norm": 0.5852016806602478, |
| "learning_rate": 4.22938703477344e-06, |
| "loss": 1.1846, |
| "num_input_tokens_seen": 6062080000, |
| "step": 92500, |
| "train_runtime": 44781.2518, |
| "train_tokens_per_second": 135370.937 |
| }, |
| { |
| "epoch": 0.926, |
| "grad_norm": 0.7466326355934143, |
| "learning_rate": 4.117885507334884e-06, |
| "loss": 1.1564, |
| "num_input_tokens_seen": 6068633600, |
| "step": 92600, |
| "train_runtime": 44829.0669, |
| "train_tokens_per_second": 135372.739 |
| }, |
| { |
| "epoch": 0.927, |
| "grad_norm": 0.7777779698371887, |
| "learning_rate": 4.007853011169687e-06, |
| "loss": 1.1654, |
| "num_input_tokens_seen": 6075187200, |
| "step": 92700, |
| "train_runtime": 44882.4041, |
| "train_tokens_per_second": 135357.883 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.9159000515937805, |
| "learning_rate": 3.899290654303855e-06, |
| "loss": 1.1854, |
| "num_input_tokens_seen": 6081740800, |
| "step": 92800, |
| "train_runtime": 44929.6625, |
| "train_tokens_per_second": 135361.373 |
| }, |
| { |
| "epoch": 0.929, |
| "grad_norm": 0.5948230028152466, |
| "learning_rate": 3.7921995299591168e-06, |
| "loss": 1.1602, |
| "num_input_tokens_seen": 6088294400, |
| "step": 92900, |
| "train_runtime": 44977.4717, |
| "train_tokens_per_second": 135363.198 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 0.5999124646186829, |
| "learning_rate": 3.686580716541887e-06, |
| "loss": 1.1484, |
| "num_input_tokens_seen": 6094848000, |
| "step": 93000, |
| "train_runtime": 45026.2424, |
| "train_tokens_per_second": 135362.128 |
| }, |
| { |
| "epoch": 0.931, |
| "grad_norm": 0.6015925407409668, |
| "learning_rate": 3.582435277632456e-06, |
| "loss": 1.1638, |
| "num_input_tokens_seen": 6101401600, |
| "step": 93100, |
| "train_runtime": 45073.6825, |
| "train_tokens_per_second": 135365.057 |
| }, |
| { |
| "epoch": 0.932, |
| "grad_norm": 0.5493288040161133, |
| "learning_rate": 3.479764261974266e-06, |
| "loss": 1.1644, |
| "num_input_tokens_seen": 6107955200, |
| "step": 93200, |
| "train_runtime": 45131.734, |
| "train_tokens_per_second": 135336.152 |
| }, |
| { |
| "epoch": 0.933, |
| "grad_norm": 0.5847836136817932, |
| "learning_rate": 3.3785687034632523e-06, |
| "loss": 1.1528, |
| "num_input_tokens_seen": 6114508800, |
| "step": 93300, |
| "train_runtime": 45180.4411, |
| "train_tokens_per_second": 135335.305 |
| }, |
| { |
| "epoch": 0.934, |
| "grad_norm": 0.6086737513542175, |
| "learning_rate": 3.2788496211376024e-06, |
| "loss": 1.1525, |
| "num_input_tokens_seen": 6121062400, |
| "step": 93400, |
| "train_runtime": 45228.3556, |
| "train_tokens_per_second": 135336.833 |
| }, |
| { |
| "epoch": 0.935, |
| "grad_norm": 0.6097891330718994, |
| "learning_rate": 3.180608019167363e-06, |
| "loss": 1.1681, |
| "num_input_tokens_seen": 6127616000, |
| "step": 93500, |
| "train_runtime": 45275.6501, |
| "train_tokens_per_second": 135340.21 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 0.5980057716369629, |
| "learning_rate": 3.0838448868443665e-06, |
| "loss": 1.1603, |
| "num_input_tokens_seen": 6134169600, |
| "step": 93600, |
| "train_runtime": 45322.6488, |
| "train_tokens_per_second": 135344.464 |
| }, |
| { |
| "epoch": 0.937, |
| "grad_norm": 0.7306444048881531, |
| "learning_rate": 2.988561198572287e-06, |
| "loss": 1.1702, |
| "num_input_tokens_seen": 6140723200, |
| "step": 93700, |
| "train_runtime": 45376.9708, |
| "train_tokens_per_second": 135326.865 |
| }, |
| { |
| "epoch": 0.938, |
| "grad_norm": 0.9187434911727905, |
| "learning_rate": 2.8947579138567987e-06, |
| "loss": 1.1654, |
| "num_input_tokens_seen": 6147276800, |
| "step": 93800, |
| "train_runtime": 45427.1088, |
| "train_tokens_per_second": 135321.771 |
| }, |
| { |
| "epoch": 0.939, |
| "grad_norm": 0.6403319835662842, |
| "learning_rate": 2.8024359772959525e-06, |
| "loss": 1.1581, |
| "num_input_tokens_seen": 6153830400, |
| "step": 93900, |
| "train_runtime": 45475.34, |
| "train_tokens_per_second": 135322.362 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.7088416218757629, |
| "learning_rate": 2.711596318570597e-06, |
| "loss": 1.1683, |
| "num_input_tokens_seen": 6160384000, |
| "step": 94000, |
| "train_runtime": 45523.8789, |
| "train_tokens_per_second": 135322.037 |
| }, |
| { |
| "epoch": 0.941, |
| "grad_norm": 0.6289553642272949, |
| "learning_rate": 2.6222398524351206e-06, |
| "loss": 1.1538, |
| "num_input_tokens_seen": 6166937600, |
| "step": 94100, |
| "train_runtime": 45571.6907, |
| "train_tokens_per_second": 135323.871 |
| }, |
| { |
| "epoch": 0.942, |
| "grad_norm": 0.8788822889328003, |
| "learning_rate": 2.5343674787081435e-06, |
| "loss": 1.1666, |
| "num_input_tokens_seen": 6173491200, |
| "step": 94200, |
| "train_runtime": 45621.3271, |
| "train_tokens_per_second": 135320.29 |
| }, |
| { |
| "epoch": 0.943, |
| "grad_norm": 0.575515866279602, |
| "learning_rate": 2.4479800822634565e-06, |
| "loss": 1.1685, |
| "num_input_tokens_seen": 6180044800, |
| "step": 94300, |
| "train_runtime": 45670.6842, |
| "train_tokens_per_second": 135317.543 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.5740439891815186, |
| "learning_rate": 2.3630785330212286e-06, |
| "loss": 1.1588, |
| "num_input_tokens_seen": 6186598400, |
| "step": 94400, |
| "train_runtime": 45717.875, |
| "train_tokens_per_second": 135321.215 |
| }, |
| { |
| "epoch": 0.945, |
| "grad_norm": 0.6576538681983948, |
| "learning_rate": 2.2796636859390815e-06, |
| "loss": 1.1492, |
| "num_input_tokens_seen": 6193152000, |
| "step": 94500, |
| "train_runtime": 45766.0209, |
| "train_tokens_per_second": 135322.055 |
| }, |
| { |
| "epoch": 0.946, |
| "grad_norm": 0.5781713128089905, |
| "learning_rate": 2.197736381003612e-06, |
| "loss": 1.1725, |
| "num_input_tokens_seen": 6199705600, |
| "step": 94600, |
| "train_runtime": 45819.6687, |
| "train_tokens_per_second": 135306.644 |
| }, |
| { |
| "epoch": 0.947, |
| "grad_norm": 0.6812490820884705, |
| "learning_rate": 2.1172974432218826e-06, |
| "loss": 1.1509, |
| "num_input_tokens_seen": 6206259200, |
| "step": 94700, |
| "train_runtime": 45866.8187, |
| "train_tokens_per_second": 135310.435 |
| }, |
| { |
| "epoch": 0.948, |
| "grad_norm": 0.8884466886520386, |
| "learning_rate": 2.0383476826130786e-06, |
| "loss": 1.157, |
| "num_input_tokens_seen": 6212812800, |
| "step": 94800, |
| "train_runtime": 45915.7744, |
| "train_tokens_per_second": 135308.897 |
| }, |
| { |
| "epoch": 0.949, |
| "grad_norm": 0.6096293926239014, |
| "learning_rate": 1.96088789420043e-06, |
| "loss": 1.1609, |
| "num_input_tokens_seen": 6219366400, |
| "step": 94900, |
| "train_runtime": 45963.3824, |
| "train_tokens_per_second": 135311.33 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.5762118697166443, |
| "learning_rate": 1.8849188580031539e-06, |
| "loss": 1.1621, |
| "num_input_tokens_seen": 6225920000, |
| "step": 95000, |
| "train_runtime": 46012.4538, |
| "train_tokens_per_second": 135309.454 |
| }, |
| { |
| "epoch": 0.951, |
| "grad_norm": 0.5296618938446045, |
| "learning_rate": 1.8104413390286066e-06, |
| "loss": 1.157, |
| "num_input_tokens_seen": 6232473600, |
| "step": 95100, |
| "train_runtime": 46059.2761, |
| "train_tokens_per_second": 135314.189 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.6025533676147461, |
| "learning_rate": 1.7374560872645438e-06, |
| "loss": 1.1507, |
| "num_input_tokens_seen": 6239027200, |
| "step": 95200, |
| "train_runtime": 46113.68, |
| "train_tokens_per_second": 135296.667 |
| }, |
| { |
| "epoch": 0.953, |
| "grad_norm": 0.616148829460144, |
| "learning_rate": 1.6659638376716578e-06, |
| "loss": 1.1711, |
| "num_input_tokens_seen": 6245580800, |
| "step": 95300, |
| "train_runtime": 46162.0494, |
| "train_tokens_per_second": 135296.87 |
| }, |
| { |
| "epoch": 0.954, |
| "grad_norm": 0.6661262512207031, |
| "learning_rate": 1.5959653101761172e-06, |
| "loss": 1.1604, |
| "num_input_tokens_seen": 6252134400, |
| "step": 95400, |
| "train_runtime": 46208.848, |
| "train_tokens_per_second": 135301.672 |
| }, |
| { |
| "epoch": 0.955, |
| "grad_norm": 0.8173303604125977, |
| "learning_rate": 1.5274612096623063e-06, |
| "loss": 1.1498, |
| "num_input_tokens_seen": 6258688000, |
| "step": 95500, |
| "train_runtime": 46256.5159, |
| "train_tokens_per_second": 135303.922 |
| }, |
| { |
| "epoch": 0.956, |
| "grad_norm": 0.6189817786216736, |
| "learning_rate": 1.4604522259657635e-06, |
| "loss": 1.1602, |
| "num_input_tokens_seen": 6265241600, |
| "step": 95600, |
| "train_runtime": 46309.4141, |
| "train_tokens_per_second": 135290.885 |
| }, |
| { |
| "epoch": 0.957, |
| "grad_norm": 0.7523248195648193, |
| "learning_rate": 1.3949390338662047e-06, |
| "loss": 1.1655, |
| "num_input_tokens_seen": 6271795200, |
| "step": 95700, |
| "train_runtime": 46357.4405, |
| "train_tokens_per_second": 135292.094 |
| }, |
| { |
| "epoch": 0.958, |
| "grad_norm": 0.5935103297233582, |
| "learning_rate": 1.330922293080744e-06, |
| "loss": 1.1702, |
| "num_input_tokens_seen": 6278348800, |
| "step": 95800, |
| "train_runtime": 46406.0604, |
| "train_tokens_per_second": 135291.571 |
| }, |
| { |
| "epoch": 0.959, |
| "grad_norm": 0.8042653203010559, |
| "learning_rate": 1.2684026482572662e-06, |
| "loss": 1.1623, |
| "num_input_tokens_seen": 6284902400, |
| "step": 95900, |
| "train_runtime": 46454.8491, |
| "train_tokens_per_second": 135290.557 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.5935735106468201, |
| "learning_rate": 1.2073807289678993e-06, |
| "loss": 1.1441, |
| "num_input_tokens_seen": 6291456000, |
| "step": 96000, |
| "train_runtime": 46502.688, |
| "train_tokens_per_second": 135292.308 |
| }, |
| { |
| "epoch": 0.961, |
| "grad_norm": 0.5718377828598022, |
| "learning_rate": 1.147857149702669e-06, |
| "loss": 1.1618, |
| "num_input_tokens_seen": 6298009600, |
| "step": 96100, |
| "train_runtime": 46555.2337, |
| "train_tokens_per_second": 135280.378 |
| }, |
| { |
| "epoch": 0.962, |
| "grad_norm": 0.6801995635032654, |
| "learning_rate": 1.0898325098633697e-06, |
| "loss": 1.1479, |
| "num_input_tokens_seen": 6304563200, |
| "step": 96200, |
| "train_runtime": 46603.2751, |
| "train_tokens_per_second": 135281.548 |
| }, |
| { |
| "epoch": 0.963, |
| "grad_norm": 0.5564619898796082, |
| "learning_rate": 1.0333073937575043e-06, |
| "loss": 1.1582, |
| "num_input_tokens_seen": 6311116800, |
| "step": 96300, |
| "train_runtime": 46652.5681, |
| "train_tokens_per_second": 135279.087 |
| }, |
| { |
| "epoch": 0.964, |
| "grad_norm": 0.6501321792602539, |
| "learning_rate": 9.782823705923204e-07, |
| "loss": 1.1617, |
| "num_input_tokens_seen": 6317670400, |
| "step": 96400, |
| "train_runtime": 46700.1727, |
| "train_tokens_per_second": 135281.521 |
| }, |
| { |
| "epoch": 0.965, |
| "grad_norm": 0.6728459596633911, |
| "learning_rate": 9.247579944692162e-07, |
| "loss": 1.1592, |
| "num_input_tokens_seen": 6324224000, |
| "step": 96500, |
| "train_runtime": 46748.7553, |
| "train_tokens_per_second": 135281.12 |
| }, |
| { |
| "epoch": 0.966, |
| "grad_norm": 0.5893784761428833, |
| "learning_rate": 8.72734804378078e-07, |
| "loss": 1.1691, |
| "num_input_tokens_seen": 6330777600, |
| "step": 96600, |
| "train_runtime": 46801.015, |
| "train_tokens_per_second": 135270.092 |
| }, |
| { |
| "epoch": 0.967, |
| "grad_norm": 0.8625339269638062, |
| "learning_rate": 8.222133241918172e-07, |
| "loss": 1.1518, |
| "num_input_tokens_seen": 6337331200, |
| "step": 96700, |
| "train_runtime": 46847.2237, |
| "train_tokens_per_second": 135276.559 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 0.6501858830451965, |
| "learning_rate": 7.731940626612088e-07, |
| "loss": 1.1693, |
| "num_input_tokens_seen": 6343884800, |
| "step": 96800, |
| "train_runtime": 46895.3712, |
| "train_tokens_per_second": 135277.419 |
| }, |
| { |
| "epoch": 0.969, |
| "grad_norm": 0.6575475335121155, |
| "learning_rate": 7.256775134096615e-07, |
| "loss": 1.1552, |
| "num_input_tokens_seen": 6350438400, |
| "step": 96900, |
| "train_runtime": 46942.8491, |
| "train_tokens_per_second": 135280.208 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 0.5287050604820251, |
| "learning_rate": 6.796641549283055e-07, |
| "loss": 1.1946, |
| "num_input_tokens_seen": 6356992000, |
| "step": 97000, |
| "train_runtime": 46991.8919, |
| "train_tokens_per_second": 135278.486 |
| }, |
| { |
| "epoch": 0.971, |
| "grad_norm": 0.568566083908081, |
| "learning_rate": 6.351544505711292e-07, |
| "loss": 1.1559, |
| "num_input_tokens_seen": 6363545600, |
| "step": 97100, |
| "train_runtime": 47040.0316, |
| "train_tokens_per_second": 135279.365 |
| }, |
| { |
| "epoch": 0.972, |
| "grad_norm": 0.9329395890235901, |
| "learning_rate": 5.921488485503833e-07, |
| "loss": 1.1603, |
| "num_input_tokens_seen": 6370099200, |
| "step": 97200, |
| "train_runtime": 47092.2725, |
| "train_tokens_per_second": 135268.46 |
| }, |
| { |
| "epoch": 0.973, |
| "grad_norm": 0.6256415843963623, |
| "learning_rate": 5.506477819319843e-07, |
| "loss": 1.1571, |
| "num_input_tokens_seen": 6376652800, |
| "step": 97300, |
| "train_runtime": 47139.4068, |
| "train_tokens_per_second": 135272.233 |
| }, |
| { |
| "epoch": 0.974, |
| "grad_norm": 0.7202081680297852, |
| "learning_rate": 5.106516686312345e-07, |
| "loss": 1.1638, |
| "num_input_tokens_seen": 6383206400, |
| "step": 97400, |
| "train_runtime": 47191.9059, |
| "train_tokens_per_second": 135260.619 |
| }, |
| { |
| "epoch": 0.975, |
| "grad_norm": 1.2700363397598267, |
| "learning_rate": 4.721609114085256e-07, |
| "loss": 1.1649, |
| "num_input_tokens_seen": 6389760000, |
| "step": 97500, |
| "train_runtime": 47240.0777, |
| "train_tokens_per_second": 135261.42 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.5555500388145447, |
| "learning_rate": 4.3517589786539186e-07, |
| "loss": 1.1505, |
| "num_input_tokens_seen": 6396313600, |
| "step": 97600, |
| "train_runtime": 47287.972, |
| "train_tokens_per_second": 135263.013 |
| }, |
| { |
| "epoch": 0.977, |
| "grad_norm": 0.6499391198158264, |
| "learning_rate": 3.996970004404798e-07, |
| "loss": 1.153, |
| "num_input_tokens_seen": 6402867200, |
| "step": 97700, |
| "train_runtime": 47335.8726, |
| "train_tokens_per_second": 135264.586 |
| }, |
| { |
| "epoch": 0.978, |
| "grad_norm": 0.6353591084480286, |
| "learning_rate": 3.657245764058847e-07, |
| "loss": 1.1621, |
| "num_input_tokens_seen": 6409420800, |
| "step": 97800, |
| "train_runtime": 47382.5196, |
| "train_tokens_per_second": 135269.733 |
| }, |
| { |
| "epoch": 0.979, |
| "grad_norm": 0.62052321434021, |
| "learning_rate": 3.3325896786355334e-07, |
| "loss": 1.1539, |
| "num_input_tokens_seen": 6415974400, |
| "step": 97900, |
| "train_runtime": 47435.6023, |
| "train_tokens_per_second": 135256.518 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.5979087352752686, |
| "learning_rate": 3.023005017418201e-07, |
| "loss": 1.1615, |
| "num_input_tokens_seen": 6422528000, |
| "step": 98000, |
| "train_runtime": 47484.0018, |
| "train_tokens_per_second": 135256.671 |
| }, |
| { |
| "epoch": 0.981, |
| "grad_norm": 1.0899096727371216, |
| "learning_rate": 2.7284948979205967e-07, |
| "loss": 1.166, |
| "num_input_tokens_seen": 6429081600, |
| "step": 98100, |
| "train_runtime": 47531.611, |
| "train_tokens_per_second": 135259.072 |
| }, |
| { |
| "epoch": 0.982, |
| "grad_norm": 0.6240010857582092, |
| "learning_rate": 2.449062285856729e-07, |
| "loss": 1.1565, |
| "num_input_tokens_seen": 6435635200, |
| "step": 98200, |
| "train_runtime": 47578.8884, |
| "train_tokens_per_second": 135262.412 |
| }, |
| { |
| "epoch": 0.983, |
| "grad_norm": 0.7941544651985168, |
| "learning_rate": 2.184709995109557e-07, |
| "loss": 1.1572, |
| "num_input_tokens_seen": 6442188800, |
| "step": 98300, |
| "train_runtime": 47627.3828, |
| "train_tokens_per_second": 135262.289 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 0.5704551339149475, |
| "learning_rate": 1.9354406877038487e-07, |
| "loss": 1.1629, |
| "num_input_tokens_seen": 6448742400, |
| "step": 98400, |
| "train_runtime": 47679.6586, |
| "train_tokens_per_second": 135251.438 |
| }, |
| { |
| "epoch": 0.985, |
| "grad_norm": 0.5758212208747864, |
| "learning_rate": 1.7012568737788668e-07, |
| "loss": 1.1892, |
| "num_input_tokens_seen": 6455296000, |
| "step": 98500, |
| "train_runtime": 47728.7818, |
| "train_tokens_per_second": 135249.545 |
| }, |
| { |
| "epoch": 0.986, |
| "grad_norm": 0.5768951773643494, |
| "learning_rate": 1.4821609115630574e-07, |
| "loss": 1.1617, |
| "num_input_tokens_seen": 6461849600, |
| "step": 98600, |
| "train_runtime": 47775.3275, |
| "train_tokens_per_second": 135254.952 |
| }, |
| { |
| "epoch": 0.987, |
| "grad_norm": 0.5714033842086792, |
| "learning_rate": 1.278155007350068e-07, |
| "loss": 1.1712, |
| "num_input_tokens_seen": 6468403200, |
| "step": 98700, |
| "train_runtime": 47823.1467, |
| "train_tokens_per_second": 135256.746 |
| }, |
| { |
| "epoch": 0.988, |
| "grad_norm": 1.029975414276123, |
| "learning_rate": 1.089241215477099e-07, |
| "loss": 1.1621, |
| "num_input_tokens_seen": 6474956800, |
| "step": 98800, |
| "train_runtime": 47875.5087, |
| "train_tokens_per_second": 135245.702 |
| }, |
| { |
| "epoch": 0.989, |
| "grad_norm": 0.5554516315460205, |
| "learning_rate": 9.154214383042535e-08, |
| "loss": 1.1489, |
| "num_input_tokens_seen": 6481510400, |
| "step": 98900, |
| "train_runtime": 47923.8409, |
| "train_tokens_per_second": 135246.055 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 0.6340943574905396, |
| "learning_rate": 7.566974261945524e-08, |
| "loss": 1.1721, |
| "num_input_tokens_seen": 6488064000, |
| "step": 99000, |
| "train_runtime": 47972.1937, |
| "train_tokens_per_second": 135246.348 |
| }, |
| { |
| "epoch": 0.991, |
| "grad_norm": 0.582399845123291, |
| "learning_rate": 6.13070777496949e-08, |
| "loss": 1.1497, |
| "num_input_tokens_seen": 6494617600, |
| "step": 99100, |
| "train_runtime": 48020.3976, |
| "train_tokens_per_second": 135247.06 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.6133337020874023, |
| "learning_rate": 4.845429385303412e-08, |
| "loss": 1.1601, |
| "num_input_tokens_seen": 6501171200, |
| "step": 99200, |
| "train_runtime": 48068.6895, |
| "train_tokens_per_second": 135247.523 |
| }, |
| { |
| "epoch": 0.993, |
| "grad_norm": 0.5691381096839905, |
| "learning_rate": 3.711152035685838e-08, |
| "loss": 1.1571, |
| "num_input_tokens_seen": 6507724800, |
| "step": 99300, |
| "train_runtime": 48115.7967, |
| "train_tokens_per_second": 135251.315 |
| }, |
| { |
| "epoch": 0.994, |
| "grad_norm": 0.6613404750823975, |
| "learning_rate": 2.727887148278318e-08, |
| "loss": 1.1569, |
| "num_input_tokens_seen": 6514278400, |
| "step": 99400, |
| "train_runtime": 48169.6246, |
| "train_tokens_per_second": 135236.229 |
| }, |
| { |
| "epoch": 0.995, |
| "grad_norm": 0.5285235047340393, |
| "learning_rate": 1.8956446245455005e-08, |
| "loss": 1.1722, |
| "num_input_tokens_seen": 6520832000, |
| "step": 99500, |
| "train_runtime": 48217.4936, |
| "train_tokens_per_second": 135237.888 |
| }, |
| { |
| "epoch": 0.996, |
| "grad_norm": 0.8071156144142151, |
| "learning_rate": 1.2144328451618724e-08, |
| "loss": 1.1571, |
| "num_input_tokens_seen": 6527385600, |
| "step": 99600, |
| "train_runtime": 48264.7605, |
| "train_tokens_per_second": 135241.231 |
| }, |
| { |
| "epoch": 0.997, |
| "grad_norm": 0.5775815844535828, |
| "learning_rate": 6.84258669920168e-09, |
| "loss": 1.1634, |
| "num_input_tokens_seen": 6533939200, |
| "step": 99700, |
| "train_runtime": 48314.0709, |
| "train_tokens_per_second": 135238.846 |
| }, |
| { |
| "epoch": 0.998, |
| "grad_norm": 0.5299545526504517, |
| "learning_rate": 3.0512743767141524e-09, |
| "loss": 1.1563, |
| "num_input_tokens_seen": 6540492800, |
| "step": 99800, |
| "train_runtime": 48364.7142, |
| "train_tokens_per_second": 135232.74 |
| }, |
| { |
| "epoch": 0.999, |
| "grad_norm": 0.636650800704956, |
| "learning_rate": 7.70429662616534e-10, |
| "loss": 1.1653, |
| "num_input_tokens_seen": 6547046400, |
| "step": 99900, |
| "train_runtime": 48412.6126, |
| "train_tokens_per_second": 135234.313 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5705932974815369, |
| "learning_rate": 7.552498626495208e-14, |
| "loss": 1.1814, |
| "num_input_tokens_seen": 6553600000, |
| "step": 100000, |
| "train_runtime": 48460.0302, |
| "train_tokens_per_second": 135237.225 |
| }, |
| { |
| "epoch": 1.0, |
| "num_input_tokens_seen": 6553600000, |
| "step": 100000, |
| "total_flos": 1.23866185728e+17, |
| "train_loss": 1.241861473388672, |
| "train_runtime": 48460.2218, |
| "train_samples_per_second": 528.268, |
| "train_steps_per_second": 2.064 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 6553600000, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.23866185728e+17, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|