| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.61, |
| "eval_steps": 500, |
| "global_step": 61000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.001, |
| "grad_norm": 17.39519691467285, |
| "learning_rate": 2.97e-05, |
| "loss": 9.7941, |
| "num_input_tokens_seen": 6553600, |
| "step": 100, |
| "train_runtime": 74.0623, |
| "train_tokens_per_second": 88487.632 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 10.212440490722656, |
| "learning_rate": 5.97e-05, |
| "loss": 1.0389, |
| "num_input_tokens_seen": 13107200, |
| "step": 200, |
| "train_runtime": 135.0365, |
| "train_tokens_per_second": 97064.126 |
| }, |
| { |
| "epoch": 0.003, |
| "grad_norm": 6.982235908508301, |
| "learning_rate": 8.969999999999998e-05, |
| "loss": 0.7951, |
| "num_input_tokens_seen": 19660800, |
| "step": 300, |
| "train_runtime": 196.4342, |
| "train_tokens_per_second": 100088.472 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 2.089735507965088, |
| "learning_rate": 0.0001197, |
| "loss": 0.6341, |
| "num_input_tokens_seen": 26214400, |
| "step": 400, |
| "train_runtime": 257.5653, |
| "train_tokens_per_second": 101777.682 |
| }, |
| { |
| "epoch": 0.005, |
| "grad_norm": 2.6269969940185547, |
| "learning_rate": 0.00014969999999999998, |
| "loss": 0.5353, |
| "num_input_tokens_seen": 32768000, |
| "step": 500, |
| "train_runtime": 323.599, |
| "train_tokens_per_second": 101261.143 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 0.9126470685005188, |
| "learning_rate": 0.00017969999999999998, |
| "loss": 0.4822, |
| "num_input_tokens_seen": 39321600, |
| "step": 600, |
| "train_runtime": 385.3073, |
| "train_tokens_per_second": 102052.566 |
| }, |
| { |
| "epoch": 0.007, |
| "grad_norm": 0.7452394366264343, |
| "learning_rate": 0.00020969999999999997, |
| "loss": 0.4534, |
| "num_input_tokens_seen": 45875200, |
| "step": 700, |
| "train_runtime": 447.534, |
| "train_tokens_per_second": 102506.63 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 0.6909123659133911, |
| "learning_rate": 0.0002397, |
| "loss": 0.4323, |
| "num_input_tokens_seen": 52428800, |
| "step": 800, |
| "train_runtime": 510.1043, |
| "train_tokens_per_second": 102780.558 |
| }, |
| { |
| "epoch": 0.009, |
| "grad_norm": 0.5689504146575928, |
| "learning_rate": 0.0002697, |
| "loss": 0.4262, |
| "num_input_tokens_seen": 58982400, |
| "step": 900, |
| "train_runtime": 571.3595, |
| "train_tokens_per_second": 103231.669 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.42208704352378845, |
| "learning_rate": 0.00029969999999999997, |
| "loss": 0.4158, |
| "num_input_tokens_seen": 65536000, |
| "step": 1000, |
| "train_runtime": 638.5123, |
| "train_tokens_per_second": 102638.586 |
| }, |
| { |
| "epoch": 0.011, |
| "grad_norm": 0.4542798399925232, |
| "learning_rate": 0.00029999925978027874, |
| "loss": 0.4127, |
| "num_input_tokens_seen": 72089600, |
| "step": 1100, |
| "train_runtime": 698.6527, |
| "train_tokens_per_second": 103183.742 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 0.4086480736732483, |
| "learning_rate": 0.0002999970091452017, |
| "loss": 0.4018, |
| "num_input_tokens_seen": 78643200, |
| "step": 1200, |
| "train_runtime": 761.7182, |
| "train_tokens_per_second": 103244.485 |
| }, |
| { |
| "epoch": 0.013, |
| "grad_norm": 0.37623685598373413, |
| "learning_rate": 0.00029999324804190795, |
| "loss": 0.3969, |
| "num_input_tokens_seen": 85196800, |
| "step": 1300, |
| "train_runtime": 827.9033, |
| "train_tokens_per_second": 102906.7 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 0.3346163332462311, |
| "learning_rate": 0.0002999879765082716, |
| "loss": 0.3906, |
| "num_input_tokens_seen": 91750400, |
| "step": 1400, |
| "train_runtime": 889.5401, |
| "train_tokens_per_second": 103143.635 |
| }, |
| { |
| "epoch": 0.015, |
| "grad_norm": 0.4093320369720459, |
| "learning_rate": 0.000299981194597377, |
| "loss": 0.3852, |
| "num_input_tokens_seen": 98304000, |
| "step": 1500, |
| "train_runtime": 950.9359, |
| "train_tokens_per_second": 103376.055 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 0.3808560371398926, |
| "learning_rate": 0.0002999729023775179, |
| "loss": 0.3819, |
| "num_input_tokens_seen": 104857600, |
| "step": 1600, |
| "train_runtime": 1017.4047, |
| "train_tokens_per_second": 103063.807 |
| }, |
| { |
| "epoch": 0.017, |
| "grad_norm": 0.3014701306819916, |
| "learning_rate": 0.0002999630999321969, |
| "loss": 0.387, |
| "num_input_tokens_seen": 111411200, |
| "step": 1700, |
| "train_runtime": 1075.027, |
| "train_tokens_per_second": 103635.721 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 0.25073230266571045, |
| "learning_rate": 0.00029995178736012443, |
| "loss": 0.382, |
| "num_input_tokens_seen": 117964800, |
| "step": 1800, |
| "train_runtime": 1141.6684, |
| "train_tokens_per_second": 103326.671 |
| }, |
| { |
| "epoch": 0.019, |
| "grad_norm": 0.2569698989391327, |
| "learning_rate": 0.0002999389647752181, |
| "loss": 0.3745, |
| "num_input_tokens_seen": 124518400, |
| "step": 1900, |
| "train_runtime": 1202.9974, |
| "train_tokens_per_second": 103506.793 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.2895148694515228, |
| "learning_rate": 0.00029992463230660104, |
| "loss": 0.3747, |
| "num_input_tokens_seen": 131072000, |
| "step": 2000, |
| "train_runtime": 1271.272, |
| "train_tokens_per_second": 103103.035 |
| }, |
| { |
| "epoch": 0.021, |
| "grad_norm": 0.28352853655815125, |
| "learning_rate": 0.00029990879009860117, |
| "loss": 0.3701, |
| "num_input_tokens_seen": 137625600, |
| "step": 2100, |
| "train_runtime": 1335.8501, |
| "train_tokens_per_second": 103024.736 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 0.2598542273044586, |
| "learning_rate": 0.0002998914383107493, |
| "loss": 0.3715, |
| "num_input_tokens_seen": 144179200, |
| "step": 2200, |
| "train_runtime": 1400.0516, |
| "train_tokens_per_second": 102981.347 |
| }, |
| { |
| "epoch": 0.023, |
| "grad_norm": 0.300857275724411, |
| "learning_rate": 0.0002998725771177778, |
| "loss": 0.3723, |
| "num_input_tokens_seen": 150732800, |
| "step": 2300, |
| "train_runtime": 1465.03, |
| "train_tokens_per_second": 102887.178 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 0.19827991724014282, |
| "learning_rate": 0.00029985220670961847, |
| "loss": 0.3654, |
| "num_input_tokens_seen": 157286400, |
| "step": 2400, |
| "train_runtime": 1534.4652, |
| "train_tokens_per_second": 102502.423 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 0.36876365542411804, |
| "learning_rate": 0.0002998303272914014, |
| "loss": 0.368, |
| "num_input_tokens_seen": 163840000, |
| "step": 2500, |
| "train_runtime": 1598.5928, |
| "train_tokens_per_second": 102490.141 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 0.23755036294460297, |
| "learning_rate": 0.00029980693908345185, |
| "loss": 0.3648, |
| "num_input_tokens_seen": 170393600, |
| "step": 2600, |
| "train_runtime": 1661.9675, |
| "train_tokens_per_second": 102525.227 |
| }, |
| { |
| "epoch": 0.027, |
| "grad_norm": 0.3921568691730499, |
| "learning_rate": 0.00029978204232128895, |
| "loss": 0.3633, |
| "num_input_tokens_seen": 176947200, |
| "step": 2700, |
| "train_runtime": 1731.9606, |
| "train_tokens_per_second": 102165.837 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 0.1964094191789627, |
| "learning_rate": 0.0002997556372556227, |
| "loss": 0.365, |
| "num_input_tokens_seen": 183500800, |
| "step": 2800, |
| "train_runtime": 1796.4926, |
| "train_tokens_per_second": 102143.922 |
| }, |
| { |
| "epoch": 0.029, |
| "grad_norm": 0.2469199150800705, |
| "learning_rate": 0.0002997277241523519, |
| "loss": 0.364, |
| "num_input_tokens_seen": 190054400, |
| "step": 2900, |
| "train_runtime": 1860.3342, |
| "train_tokens_per_second": 102161.428 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.19437766075134277, |
| "learning_rate": 0.00029969830329256125, |
| "loss": 0.3574, |
| "num_input_tokens_seen": 196608000, |
| "step": 3000, |
| "train_runtime": 1924.7283, |
| "train_tokens_per_second": 102148.444 |
| }, |
| { |
| "epoch": 0.031, |
| "grad_norm": 0.23198598623275757, |
| "learning_rate": 0.00029966737497251836, |
| "loss": 0.3599, |
| "num_input_tokens_seen": 203161600, |
| "step": 3100, |
| "train_runtime": 1993.345, |
| "train_tokens_per_second": 101919.94 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 0.22857527434825897, |
| "learning_rate": 0.0002996349395036711, |
| "loss": 0.3579, |
| "num_input_tokens_seen": 209715200, |
| "step": 3200, |
| "train_runtime": 2057.8023, |
| "train_tokens_per_second": 101912.218 |
| }, |
| { |
| "epoch": 0.033, |
| "grad_norm": 0.24812710285186768, |
| "learning_rate": 0.00029960099721264435, |
| "loss": 0.3612, |
| "num_input_tokens_seen": 216268800, |
| "step": 3300, |
| "train_runtime": 2121.9536, |
| "train_tokens_per_second": 101919.666 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 0.21982239186763763, |
| "learning_rate": 0.0002995655484412365, |
| "loss": 0.3554, |
| "num_input_tokens_seen": 222822400, |
| "step": 3400, |
| "train_runtime": 2186.6347, |
| "train_tokens_per_second": 101901.979 |
| }, |
| { |
| "epoch": 0.035, |
| "grad_norm": 0.3460980951786041, |
| "learning_rate": 0.00029952859354641636, |
| "loss": 0.3568, |
| "num_input_tokens_seen": 229376000, |
| "step": 3500, |
| "train_runtime": 2256.5384, |
| "train_tokens_per_second": 101649.502 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 0.25577911734580994, |
| "learning_rate": 0.00029949013290031924, |
| "loss": 0.354, |
| "num_input_tokens_seen": 235929600, |
| "step": 3600, |
| "train_runtime": 2320.5776, |
| "train_tokens_per_second": 101668.483 |
| }, |
| { |
| "epoch": 0.037, |
| "grad_norm": 0.16108086705207825, |
| "learning_rate": 0.00029945016689024353, |
| "loss": 0.3509, |
| "num_input_tokens_seen": 242483200, |
| "step": 3700, |
| "train_runtime": 2383.8992, |
| "train_tokens_per_second": 101717.051 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 0.2431662529706955, |
| "learning_rate": 0.0002994086959186464, |
| "loss": 0.3527, |
| "num_input_tokens_seen": 249036800, |
| "step": 3800, |
| "train_runtime": 2448.8427, |
| "train_tokens_per_second": 101695.71 |
| }, |
| { |
| "epoch": 0.039, |
| "grad_norm": 0.18574966490268707, |
| "learning_rate": 0.00029936572040314014, |
| "loss": 0.3546, |
| "num_input_tokens_seen": 255590400, |
| "step": 3900, |
| "train_runtime": 2518.1288, |
| "train_tokens_per_second": 101500.13 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.15902996063232422, |
| "learning_rate": 0.0002993212407764877, |
| "loss": 0.3519, |
| "num_input_tokens_seen": 262144000, |
| "step": 4000, |
| "train_runtime": 2581.8809, |
| "train_tokens_per_second": 101532.18 |
| }, |
| { |
| "epoch": 0.041, |
| "grad_norm": 0.21019065380096436, |
| "learning_rate": 0.00029927525748659834, |
| "loss": 0.3567, |
| "num_input_tokens_seen": 268697600, |
| "step": 4100, |
| "train_runtime": 2646.5068, |
| "train_tokens_per_second": 101529.154 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 0.18648174405097961, |
| "learning_rate": 0.0002992277709965234, |
| "loss": 0.3512, |
| "num_input_tokens_seen": 275251200, |
| "step": 4200, |
| "train_runtime": 2710.4754, |
| "train_tokens_per_second": 101550.895 |
| }, |
| { |
| "epoch": 0.043, |
| "grad_norm": 0.21123889088630676, |
| "learning_rate": 0.0002991787817844513, |
| "loss": 0.3521, |
| "num_input_tokens_seen": 281804800, |
| "step": 4300, |
| "train_runtime": 2780.6173, |
| "train_tokens_per_second": 101346.13 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 0.22183509171009064, |
| "learning_rate": 0.0002991282903437028, |
| "loss": 0.3486, |
| "num_input_tokens_seen": 288358400, |
| "step": 4400, |
| "train_runtime": 2843.584, |
| "train_tokens_per_second": 101406.674 |
| }, |
| { |
| "epoch": 0.045, |
| "grad_norm": 0.19213925302028656, |
| "learning_rate": 0.0002990762971827262, |
| "loss": 0.3481, |
| "num_input_tokens_seen": 294912000, |
| "step": 4500, |
| "train_runtime": 2906.5309, |
| "train_tokens_per_second": 101465.29 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 0.16215530037879944, |
| "learning_rate": 0.00029902280282509197, |
| "loss": 0.3506, |
| "num_input_tokens_seen": 301465600, |
| "step": 4600, |
| "train_runtime": 2977.8135, |
| "train_tokens_per_second": 101237.232 |
| }, |
| { |
| "epoch": 0.047, |
| "grad_norm": 0.17120705544948578, |
| "learning_rate": 0.0002989678078094878, |
| "loss": 0.3433, |
| "num_input_tokens_seen": 308019200, |
| "step": 4700, |
| "train_runtime": 3040.7538, |
| "train_tokens_per_second": 101296.988 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.26389873027801514, |
| "learning_rate": 0.00029891131268971284, |
| "loss": 0.345, |
| "num_input_tokens_seen": 314572800, |
| "step": 4800, |
| "train_runtime": 3104.3446, |
| "train_tokens_per_second": 101333.081 |
| }, |
| { |
| "epoch": 0.049, |
| "grad_norm": 0.1639779806137085, |
| "learning_rate": 0.0002988533180346723, |
| "loss": 0.3431, |
| "num_input_tokens_seen": 321126400, |
| "step": 4900, |
| "train_runtime": 3172.6385, |
| "train_tokens_per_second": 101217.457 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.21486082673072815, |
| "learning_rate": 0.0002987938244283717, |
| "loss": 0.3413, |
| "num_input_tokens_seen": 327680000, |
| "step": 5000, |
| "train_runtime": 3237.5961, |
| "train_tokens_per_second": 101210.896 |
| }, |
| { |
| "epoch": 0.051, |
| "grad_norm": 0.20326170325279236, |
| "learning_rate": 0.00029873283246991105, |
| "loss": 0.3457, |
| "num_input_tokens_seen": 334233600, |
| "step": 5100, |
| "train_runtime": 3302.3096, |
| "train_tokens_per_second": 101212.074 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 0.171161487698555, |
| "learning_rate": 0.0002986703427734787, |
| "loss": 0.345, |
| "num_input_tokens_seen": 340787200, |
| "step": 5200, |
| "train_runtime": 3367.4928, |
| "train_tokens_per_second": 101199.089 |
| }, |
| { |
| "epoch": 0.053, |
| "grad_norm": 0.19781792163848877, |
| "learning_rate": 0.00029860635596834517, |
| "loss": 0.3455, |
| "num_input_tokens_seen": 347340800, |
| "step": 5300, |
| "train_runtime": 3430.9148, |
| "train_tokens_per_second": 101238.538 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 0.1795511543750763, |
| "learning_rate": 0.0002985408726988569, |
| "loss": 0.3439, |
| "num_input_tokens_seen": 353894400, |
| "step": 5400, |
| "train_runtime": 3498.4556, |
| "train_tokens_per_second": 101157.322 |
| }, |
| { |
| "epoch": 0.055, |
| "grad_norm": 0.1671728938817978, |
| "learning_rate": 0.0002984738936244296, |
| "loss": 0.3422, |
| "num_input_tokens_seen": 360448000, |
| "step": 5500, |
| "train_runtime": 3561.4394, |
| "train_tokens_per_second": 101208.516 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 0.17824003100395203, |
| "learning_rate": 0.0002984054194195419, |
| "loss": 0.3489, |
| "num_input_tokens_seen": 367001600, |
| "step": 5600, |
| "train_runtime": 3625.8956, |
| "train_tokens_per_second": 101216.814 |
| }, |
| { |
| "epoch": 0.057, |
| "grad_norm": 0.1654757708311081, |
| "learning_rate": 0.0002983354507737283, |
| "loss": 0.3463, |
| "num_input_tokens_seen": 373555200, |
| "step": 5700, |
| "train_runtime": 3690.173, |
| "train_tokens_per_second": 101229.725 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 0.2033533751964569, |
| "learning_rate": 0.00029826398839157215, |
| "loss": 0.3462, |
| "num_input_tokens_seen": 380108800, |
| "step": 5800, |
| "train_runtime": 3759.2019, |
| "train_tokens_per_second": 101114.229 |
| }, |
| { |
| "epoch": 0.059, |
| "grad_norm": 0.19753150641918182, |
| "learning_rate": 0.000298191032992699, |
| "loss": 0.3436, |
| "num_input_tokens_seen": 386662400, |
| "step": 5900, |
| "train_runtime": 3822.1964, |
| "train_tokens_per_second": 101162.357 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.13978537917137146, |
| "learning_rate": 0.0002981165853117688, |
| "loss": 0.3393, |
| "num_input_tokens_seen": 393216000, |
| "step": 6000, |
| "train_runtime": 3890.9859, |
| "train_tokens_per_second": 101058.192 |
| }, |
| { |
| "epoch": 0.061, |
| "grad_norm": 0.28539636731147766, |
| "learning_rate": 0.000298040646098469, |
| "loss": 0.3419, |
| "num_input_tokens_seen": 399769600, |
| "step": 6100, |
| "train_runtime": 3955.42, |
| "train_tokens_per_second": 101068.813 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 0.14195021986961365, |
| "learning_rate": 0.0002979632161175064, |
| "loss": 0.3408, |
| "num_input_tokens_seen": 406323200, |
| "step": 6200, |
| "train_runtime": 4019.3462, |
| "train_tokens_per_second": 101091.865 |
| }, |
| { |
| "epoch": 0.063, |
| "grad_norm": 0.26058393716812134, |
| "learning_rate": 0.0002978842961486003, |
| "loss": 0.3411, |
| "num_input_tokens_seen": 412876800, |
| "step": 6300, |
| "train_runtime": 4082.619, |
| "train_tokens_per_second": 101130.379 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.1645655333995819, |
| "learning_rate": 0.0002978038869864738, |
| "loss": 0.3392, |
| "num_input_tokens_seen": 419430400, |
| "step": 6400, |
| "train_runtime": 4152.2955, |
| "train_tokens_per_second": 101011.694 |
| }, |
| { |
| "epoch": 0.065, |
| "grad_norm": 0.1678280532360077, |
| "learning_rate": 0.0002977219894408463, |
| "loss": 0.338, |
| "num_input_tokens_seen": 425984000, |
| "step": 6500, |
| "train_runtime": 4215.8141, |
| "train_tokens_per_second": 101044.304 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 0.19337573647499084, |
| "learning_rate": 0.0002976386043364251, |
| "loss": 0.3424, |
| "num_input_tokens_seen": 432537600, |
| "step": 6600, |
| "train_runtime": 4278.8465, |
| "train_tokens_per_second": 101087.432 |
| }, |
| { |
| "epoch": 0.067, |
| "grad_norm": 0.14295175671577454, |
| "learning_rate": 0.00029755373251289733, |
| "loss": 0.3443, |
| "num_input_tokens_seen": 439091200, |
| "step": 6700, |
| "train_runtime": 4348.6665, |
| "train_tokens_per_second": 100971.459 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 0.22164900600910187, |
| "learning_rate": 0.0002974673748249213, |
| "loss": 0.339, |
| "num_input_tokens_seen": 445644800, |
| "step": 6800, |
| "train_runtime": 4413.12, |
| "train_tokens_per_second": 100981.799 |
| }, |
| { |
| "epoch": 0.069, |
| "grad_norm": 0.1831408590078354, |
| "learning_rate": 0.00029737953214211804, |
| "loss": 0.3398, |
| "num_input_tokens_seen": 452198400, |
| "step": 6900, |
| "train_runtime": 4477.6672, |
| "train_tokens_per_second": 100989.73 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.21329298615455627, |
| "learning_rate": 0.0002972902053490623, |
| "loss": 0.3372, |
| "num_input_tokens_seen": 458752000, |
| "step": 7000, |
| "train_runtime": 4541.4752, |
| "train_tokens_per_second": 101013.873 |
| }, |
| { |
| "epoch": 0.071, |
| "grad_norm": 0.16601704061031342, |
| "learning_rate": 0.00029719939534527393, |
| "loss": 0.3436, |
| "num_input_tokens_seen": 465305600, |
| "step": 7100, |
| "train_runtime": 4607.1943, |
| "train_tokens_per_second": 100995.436 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 0.2303948849439621, |
| "learning_rate": 0.00029710710304520866, |
| "loss": 0.339, |
| "num_input_tokens_seen": 471859200, |
| "step": 7200, |
| "train_runtime": 4672.0421, |
| "train_tokens_per_second": 100996.349 |
| }, |
| { |
| "epoch": 0.073, |
| "grad_norm": 0.21449029445648193, |
| "learning_rate": 0.00029701332937824885, |
| "loss": 0.336, |
| "num_input_tokens_seen": 478412800, |
| "step": 7300, |
| "train_runtime": 4742.0375, |
| "train_tokens_per_second": 100887.605 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 0.1367533802986145, |
| "learning_rate": 0.0002969180752886944, |
| "loss": 0.3397, |
| "num_input_tokens_seen": 484966400, |
| "step": 7400, |
| "train_runtime": 4805.1341, |
| "train_tokens_per_second": 100926.716 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.1852603256702423, |
| "learning_rate": 0.0002968213417357529, |
| "loss": 0.34, |
| "num_input_tokens_seen": 491520000, |
| "step": 7500, |
| "train_runtime": 4867.6611, |
| "train_tokens_per_second": 100976.628 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 0.18590585887432098, |
| "learning_rate": 0.00029672312969353015, |
| "loss": 0.3375, |
| "num_input_tokens_seen": 498073600, |
| "step": 7600, |
| "train_runtime": 4938.9456, |
| "train_tokens_per_second": 100846.14 |
| }, |
| { |
| "epoch": 0.077, |
| "grad_norm": 0.17078232765197754, |
| "learning_rate": 0.00029662344015102027, |
| "loss": 0.3374, |
| "num_input_tokens_seen": 504627200, |
| "step": 7700, |
| "train_runtime": 5003.5948, |
| "train_tokens_per_second": 100852.931 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 0.14574670791625977, |
| "learning_rate": 0.00029652227411209594, |
| "loss": 0.3369, |
| "num_input_tokens_seen": 511180800, |
| "step": 7800, |
| "train_runtime": 5067.2522, |
| "train_tokens_per_second": 100879.289 |
| }, |
| { |
| "epoch": 0.079, |
| "grad_norm": 0.1603483408689499, |
| "learning_rate": 0.0002964196325954979, |
| "loss": 0.3352, |
| "num_input_tokens_seen": 517734400, |
| "step": 7900, |
| "train_runtime": 5131.2908, |
| "train_tokens_per_second": 100897.497 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.16576310992240906, |
| "learning_rate": 0.0002963155166348253, |
| "loss": 0.3376, |
| "num_input_tokens_seen": 524288000, |
| "step": 8000, |
| "train_runtime": 5200.6662, |
| "train_tokens_per_second": 100811.699 |
| }, |
| { |
| "epoch": 0.081, |
| "grad_norm": 0.31833919882774353, |
| "learning_rate": 0.0002962099272785246, |
| "loss": 0.3382, |
| "num_input_tokens_seen": 530841600, |
| "step": 8100, |
| "train_runtime": 5266.7639, |
| "train_tokens_per_second": 100790.849 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 0.14755409955978394, |
| "learning_rate": 0.0002961028655898794, |
| "loss": 0.3348, |
| "num_input_tokens_seen": 537395200, |
| "step": 8200, |
| "train_runtime": 5331.3948, |
| "train_tokens_per_second": 100798.238 |
| }, |
| { |
| "epoch": 0.083, |
| "grad_norm": 0.2060171663761139, |
| "learning_rate": 0.0002959943326469998, |
| "loss": 0.3338, |
| "num_input_tokens_seen": 543948800, |
| "step": 8300, |
| "train_runtime": 5395.0396, |
| "train_tokens_per_second": 100823.876 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 0.16461625695228577, |
| "learning_rate": 0.0002958843295428112, |
| "loss": 0.3326, |
| "num_input_tokens_seen": 550502400, |
| "step": 8400, |
| "train_runtime": 5458.2259, |
| "train_tokens_per_second": 100857.387 |
| }, |
| { |
| "epoch": 0.085, |
| "grad_norm": 0.15455660223960876, |
| "learning_rate": 0.0002957728573850438, |
| "loss": 0.3339, |
| "num_input_tokens_seen": 557056000, |
| "step": 8500, |
| "train_runtime": 5527.7417, |
| "train_tokens_per_second": 100774.607 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 0.17872081696987152, |
| "learning_rate": 0.0002956599172962209, |
| "loss": 0.3404, |
| "num_input_tokens_seen": 563609600, |
| "step": 8600, |
| "train_runtime": 5593.3318, |
| "train_tokens_per_second": 100764.557 |
| }, |
| { |
| "epoch": 0.087, |
| "grad_norm": 0.19022491574287415, |
| "learning_rate": 0.0002955455104136479, |
| "loss": 0.3329, |
| "num_input_tokens_seen": 570163200, |
| "step": 8700, |
| "train_runtime": 5659.0887, |
| "train_tokens_per_second": 100751.77 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 0.14710059762001038, |
| "learning_rate": 0.00029542963788940096, |
| "loss": 0.3323, |
| "num_input_tokens_seen": 576716800, |
| "step": 8800, |
| "train_runtime": 5722.168, |
| "train_tokens_per_second": 100786.415 |
| }, |
| { |
| "epoch": 0.089, |
| "grad_norm": 0.1998033970594406, |
| "learning_rate": 0.00029531230089031505, |
| "loss": 0.3378, |
| "num_input_tokens_seen": 583270400, |
| "step": 8900, |
| "train_runtime": 5787.7324, |
| "train_tokens_per_second": 100777.016 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.125193253159523, |
| "learning_rate": 0.0002951935005979724, |
| "loss": 0.3325, |
| "num_input_tokens_seen": 589824000, |
| "step": 9000, |
| "train_runtime": 5855.8455, |
| "train_tokens_per_second": 100723.968 |
| }, |
| { |
| "epoch": 0.091, |
| "grad_norm": 0.19552631676197052, |
| "learning_rate": 0.0002950732382086907, |
| "loss": 0.3316, |
| "num_input_tokens_seen": 596377600, |
| "step": 9100, |
| "train_runtime": 5921.9714, |
| "train_tokens_per_second": 100705.923 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 0.16468137502670288, |
| "learning_rate": 0.0002949515149335108, |
| "loss": 0.3349, |
| "num_input_tokens_seen": 602931200, |
| "step": 9200, |
| "train_runtime": 5986.1243, |
| "train_tokens_per_second": 100721.464 |
| }, |
| { |
| "epoch": 0.093, |
| "grad_norm": 0.1658785343170166, |
| "learning_rate": 0.0002948283319981848, |
| "loss": 0.3281, |
| "num_input_tokens_seen": 609484800, |
| "step": 9300, |
| "train_runtime": 6050.7028, |
| "train_tokens_per_second": 100729.588 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 0.16668474674224854, |
| "learning_rate": 0.00029470369064316354, |
| "loss": 0.3301, |
| "num_input_tokens_seen": 616038400, |
| "step": 9400, |
| "train_runtime": 6115.0892, |
| "train_tokens_per_second": 100740.706 |
| }, |
| { |
| "epoch": 0.095, |
| "grad_norm": 0.16522246599197388, |
| "learning_rate": 0.00029457759212358397, |
| "loss": 0.3305, |
| "num_input_tokens_seen": 622592000, |
| "step": 9500, |
| "train_runtime": 6183.2082, |
| "train_tokens_per_second": 100690.77 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.2229623645544052, |
| "learning_rate": 0.00029445003770925686, |
| "loss": 0.3289, |
| "num_input_tokens_seen": 629145600, |
| "step": 9600, |
| "train_runtime": 6247.5147, |
| "train_tokens_per_second": 100703.341 |
| }, |
| { |
| "epoch": 0.097, |
| "grad_norm": 0.16620689630508423, |
| "learning_rate": 0.00029432102868465367, |
| "loss": 0.3299, |
| "num_input_tokens_seen": 635699200, |
| "step": 9700, |
| "train_runtime": 6312.7504, |
| "train_tokens_per_second": 100700.829 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 0.15970012545585632, |
| "learning_rate": 0.0002941905663488939, |
| "loss": 0.3292, |
| "num_input_tokens_seen": 642252800, |
| "step": 9800, |
| "train_runtime": 6382.1987, |
| "train_tokens_per_second": 100631.903 |
| }, |
| { |
| "epoch": 0.099, |
| "grad_norm": 0.14614014327526093, |
| "learning_rate": 0.0002940586520157318, |
| "loss": 0.3329, |
| "num_input_tokens_seen": 648806400, |
| "step": 9900, |
| "train_runtime": 6445.6924, |
| "train_tokens_per_second": 100657.362 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.16558828949928284, |
| "learning_rate": 0.00029392528701354325, |
| "loss": 0.3286, |
| "num_input_tokens_seen": 655360000, |
| "step": 10000, |
| "train_runtime": 6509.151, |
| "train_tokens_per_second": 100682.87 |
| }, |
| { |
| "epoch": 0.101, |
| "grad_norm": 0.1442118138074875, |
| "learning_rate": 0.00029379047268531243, |
| "loss": 0.3314, |
| "num_input_tokens_seen": 661913600, |
| "step": 10100, |
| "train_runtime": 6575.3071, |
| "train_tokens_per_second": 100666.568 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 0.16007182002067566, |
| "learning_rate": 0.00029365421038861795, |
| "loss": 0.3326, |
| "num_input_tokens_seen": 668467200, |
| "step": 10200, |
| "train_runtime": 6639.6314, |
| "train_tokens_per_second": 100678.359 |
| }, |
| { |
| "epoch": 0.103, |
| "grad_norm": 0.1417239010334015, |
| "learning_rate": 0.0002935165014956198, |
| "loss": 0.3292, |
| "num_input_tokens_seen": 675020800, |
| "step": 10300, |
| "train_runtime": 6704.2875, |
| "train_tokens_per_second": 100684.942 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 0.20092202723026276, |
| "learning_rate": 0.0002933773473930448, |
| "loss": 0.3251, |
| "num_input_tokens_seen": 681574400, |
| "step": 10400, |
| "train_runtime": 6769.9733, |
| "train_tokens_per_second": 100676.083 |
| }, |
| { |
| "epoch": 0.105, |
| "grad_norm": 0.12387008965015411, |
| "learning_rate": 0.0002932367494821734, |
| "loss": 0.3302, |
| "num_input_tokens_seen": 688128000, |
| "step": 10500, |
| "train_runtime": 6840.7627, |
| "train_tokens_per_second": 100592.292 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 0.17865417897701263, |
| "learning_rate": 0.00029309470917882497, |
| "loss": 0.328, |
| "num_input_tokens_seen": 694681600, |
| "step": 10600, |
| "train_runtime": 6905.9119, |
| "train_tokens_per_second": 100592.305 |
| }, |
| { |
| "epoch": 0.107, |
| "grad_norm": 0.14125974476337433, |
| "learning_rate": 0.0002929512279133437, |
| "loss": 0.3296, |
| "num_input_tokens_seen": 701235200, |
| "step": 10700, |
| "train_runtime": 6969.9941, |
| "train_tokens_per_second": 100607.718 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 0.15725336968898773, |
| "learning_rate": 0.0002928063071305844, |
| "loss": 0.3279, |
| "num_input_tokens_seen": 707788800, |
| "step": 10800, |
| "train_runtime": 7032.9479, |
| "train_tokens_per_second": 100638.994 |
| }, |
| { |
| "epoch": 0.109, |
| "grad_norm": 0.15254800021648407, |
| "learning_rate": 0.0002926599482898978, |
| "loss": 0.3276, |
| "num_input_tokens_seen": 714342400, |
| "step": 10900, |
| "train_runtime": 7097.644, |
| "train_tokens_per_second": 100645.002 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.23630526661872864, |
| "learning_rate": 0.00029251215286511573, |
| "loss": 0.3278, |
| "num_input_tokens_seen": 720896000, |
| "step": 11000, |
| "train_runtime": 7167.7206, |
| "train_tokens_per_second": 100575.348 |
| }, |
| { |
| "epoch": 0.111, |
| "grad_norm": 0.14799726009368896, |
| "learning_rate": 0.00029236292234453647, |
| "loss": 0.3264, |
| "num_input_tokens_seen": 727449600, |
| "step": 11100, |
| "train_runtime": 7232.1207, |
| "train_tokens_per_second": 100585.932 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.17712198197841644, |
| "learning_rate": 0.0002922122582309097, |
| "loss": 0.3304, |
| "num_input_tokens_seen": 734003200, |
| "step": 11200, |
| "train_runtime": 7296.7016, |
| "train_tokens_per_second": 100593.835 |
| }, |
| { |
| "epoch": 0.113, |
| "grad_norm": 0.1620536595582962, |
| "learning_rate": 0.0002920601620414215, |
| "loss": 0.3266, |
| "num_input_tokens_seen": 740556800, |
| "step": 11300, |
| "train_runtime": 7359.3874, |
| "train_tokens_per_second": 100627.506 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 0.1695978045463562, |
| "learning_rate": 0.0002919066353076786, |
| "loss": 0.3269, |
| "num_input_tokens_seen": 747110400, |
| "step": 11400, |
| "train_runtime": 7425.5624, |
| "train_tokens_per_second": 100613.308 |
| }, |
| { |
| "epoch": 0.115, |
| "grad_norm": 0.23728708922863007, |
| "learning_rate": 0.00029175167957569366, |
| "loss": 0.3269, |
| "num_input_tokens_seen": 753664000, |
| "step": 11500, |
| "train_runtime": 7489.1752, |
| "train_tokens_per_second": 100633.779 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 0.14579418301582336, |
| "learning_rate": 0.0002915952964058691, |
| "loss": 0.3254, |
| "num_input_tokens_seen": 760217600, |
| "step": 11600, |
| "train_runtime": 7559.1466, |
| "train_tokens_per_second": 100569.237 |
| }, |
| { |
| "epoch": 0.117, |
| "grad_norm": 0.15569131076335907, |
| "learning_rate": 0.00029143748737298173, |
| "loss": 0.3309, |
| "num_input_tokens_seen": 766771200, |
| "step": 11700, |
| "train_runtime": 7625.7219, |
| "train_tokens_per_second": 100550.638 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 0.15939873456954956, |
| "learning_rate": 0.00029127825406616677, |
| "loss": 0.3251, |
| "num_input_tokens_seen": 773324800, |
| "step": 11800, |
| "train_runtime": 7690.5664, |
| "train_tokens_per_second": 100554.987 |
| }, |
| { |
| "epoch": 0.119, |
| "grad_norm": 0.1355784833431244, |
| "learning_rate": 0.0002911175980889019, |
| "loss": 0.3287, |
| "num_input_tokens_seen": 779878400, |
| "step": 11900, |
| "train_runtime": 7753.5378, |
| "train_tokens_per_second": 100583.556 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.19504176080226898, |
| "learning_rate": 0.00029095552105899095, |
| "loss": 0.325, |
| "num_input_tokens_seen": 786432000, |
| "step": 12000, |
| "train_runtime": 7817.9364, |
| "train_tokens_per_second": 100593.297 |
| }, |
| { |
| "epoch": 0.121, |
| "grad_norm": 0.1594318449497223, |
| "learning_rate": 0.0002907920246085478, |
| "loss": 0.3242, |
| "num_input_tokens_seen": 792985600, |
| "step": 12100, |
| "train_runtime": 7887.1116, |
| "train_tokens_per_second": 100541.953 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 0.15172167122364044, |
| "learning_rate": 0.00029062711038397996, |
| "loss": 0.3325, |
| "num_input_tokens_seen": 799539200, |
| "step": 12200, |
| "train_runtime": 7952.1371, |
| "train_tokens_per_second": 100543.94 |
| }, |
| { |
| "epoch": 0.123, |
| "grad_norm": 0.13253241777420044, |
| "learning_rate": 0.00029046078004597175, |
| "loss": 0.3239, |
| "num_input_tokens_seen": 806092800, |
| "step": 12300, |
| "train_runtime": 8016.3597, |
| "train_tokens_per_second": 100555.966 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 0.2943899929523468, |
| "learning_rate": 0.00029029303526946796, |
| "loss": 0.3238, |
| "num_input_tokens_seen": 812646400, |
| "step": 12400, |
| "train_runtime": 8079.6597, |
| "train_tokens_per_second": 100579.286 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.1583172082901001, |
| "learning_rate": 0.0002901238777436565, |
| "loss": 0.3217, |
| "num_input_tokens_seen": 819200000, |
| "step": 12500, |
| "train_runtime": 8148.9297, |
| "train_tokens_per_second": 100528.539 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 0.1598382592201233, |
| "learning_rate": 0.00028995330917195184, |
| "loss": 0.3245, |
| "num_input_tokens_seen": 825753600, |
| "step": 12600, |
| "train_runtime": 8213.0201, |
| "train_tokens_per_second": 100542.016 |
| }, |
| { |
| "epoch": 0.127, |
| "grad_norm": 0.13507018983364105, |
| "learning_rate": 0.00028978133127197765, |
| "loss": 0.3247, |
| "num_input_tokens_seen": 832307200, |
| "step": 12700, |
| "train_runtime": 8277.3925, |
| "train_tokens_per_second": 100551.859 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.1688830703496933, |
| "learning_rate": 0.0002896079457755493, |
| "loss": 0.3258, |
| "num_input_tokens_seen": 838860800, |
| "step": 12800, |
| "train_runtime": 8342.3491, |
| "train_tokens_per_second": 100554.507 |
| }, |
| { |
| "epoch": 0.129, |
| "grad_norm": 0.2753322422504425, |
| "learning_rate": 0.000289433154428657, |
| "loss": 0.3249, |
| "num_input_tokens_seen": 845414400, |
| "step": 12900, |
| "train_runtime": 8406.9898, |
| "train_tokens_per_second": 100560.892 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 0.20588786900043488, |
| "learning_rate": 0.0002892569589914476, |
| "loss": 0.3232, |
| "num_input_tokens_seen": 851968000, |
| "step": 13000, |
| "train_runtime": 8475.9626, |
| "train_tokens_per_second": 100515.781 |
| }, |
| { |
| "epoch": 0.131, |
| "grad_norm": 0.1462445855140686, |
| "learning_rate": 0.0002890793612382072, |
| "loss": 0.3239, |
| "num_input_tokens_seen": 858521600, |
| "step": 13100, |
| "train_runtime": 8539.9861, |
| "train_tokens_per_second": 100529.625 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 0.11379440873861313, |
| "learning_rate": 0.0002889003629573432, |
| "loss": 0.3249, |
| "num_input_tokens_seen": 865075200, |
| "step": 13200, |
| "train_runtime": 8604.867, |
| "train_tokens_per_second": 100533.244 |
| }, |
| { |
| "epoch": 0.133, |
| "grad_norm": 0.12769202888011932, |
| "learning_rate": 0.00028871996595136626, |
| "loss": 0.327, |
| "num_input_tokens_seen": 871628800, |
| "step": 13300, |
| "train_runtime": 8669.3605, |
| "train_tokens_per_second": 100541.303 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 0.14837151765823364, |
| "learning_rate": 0.0002885381720368723, |
| "loss": 0.321, |
| "num_input_tokens_seen": 878182400, |
| "step": 13400, |
| "train_runtime": 8738.2624, |
| "train_tokens_per_second": 100498.515 |
| }, |
| { |
| "epoch": 0.135, |
| "grad_norm": 0.1538904309272766, |
| "learning_rate": 0.000288354983044524, |
| "loss": 0.3207, |
| "num_input_tokens_seen": 884736000, |
| "step": 13500, |
| "train_runtime": 8802.2586, |
| "train_tokens_per_second": 100512.385 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.12802962958812714, |
| "learning_rate": 0.00028817040081903245, |
| "loss": 0.3241, |
| "num_input_tokens_seen": 891289600, |
| "step": 13600, |
| "train_runtime": 8866.1163, |
| "train_tokens_per_second": 100527.624 |
| }, |
| { |
| "epoch": 0.137, |
| "grad_norm": 0.35466450452804565, |
| "learning_rate": 0.00028798442721913867, |
| "loss": 0.3214, |
| "num_input_tokens_seen": 897843200, |
| "step": 13700, |
| "train_runtime": 8930.5828, |
| "train_tokens_per_second": 100535.79 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 0.13867586851119995, |
| "learning_rate": 0.00028779706411759465, |
| "loss": 0.3199, |
| "num_input_tokens_seen": 904396800, |
| "step": 13800, |
| "train_runtime": 9001.3287, |
| "train_tokens_per_second": 100473.7 |
| }, |
| { |
| "epoch": 0.139, |
| "grad_norm": 0.2114623785018921, |
| "learning_rate": 0.00028760831340114484, |
| "loss": 0.3234, |
| "num_input_tokens_seen": 910950400, |
| "step": 13900, |
| "train_runtime": 9066.3163, |
| "train_tokens_per_second": 100476.353 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 0.14202618598937988, |
| "learning_rate": 0.00028741817697050683, |
| "loss": 0.3232, |
| "num_input_tokens_seen": 917504000, |
| "step": 14000, |
| "train_runtime": 9130.2003, |
| "train_tokens_per_second": 100491.114 |
| }, |
| { |
| "epoch": 0.141, |
| "grad_norm": 0.1686236560344696, |
| "learning_rate": 0.00028722665674035233, |
| "loss": 0.3203, |
| "num_input_tokens_seen": 924057600, |
| "step": 14100, |
| "train_runtime": 9195.1426, |
| "train_tokens_per_second": 100494.102 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 0.14483292400836945, |
| "learning_rate": 0.0002870337546392879, |
| "loss": 0.3321, |
| "num_input_tokens_seen": 930611200, |
| "step": 14200, |
| "train_runtime": 9259.404, |
| "train_tokens_per_second": 100504.438 |
| }, |
| { |
| "epoch": 0.143, |
| "grad_norm": 0.12517394125461578, |
| "learning_rate": 0.00028683947260983576, |
| "loss": 0.3233, |
| "num_input_tokens_seen": 937164800, |
| "step": 14300, |
| "train_runtime": 9324.1454, |
| "train_tokens_per_second": 100509.458 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.24776680767536163, |
| "learning_rate": 0.00028664381260841356, |
| "loss": 0.3192, |
| "num_input_tokens_seen": 943718400, |
| "step": 14400, |
| "train_runtime": 9393.645, |
| "train_tokens_per_second": 100463.494 |
| }, |
| { |
| "epoch": 0.145, |
| "grad_norm": 0.4200928807258606, |
| "learning_rate": 0.0002864467766053154, |
| "loss": 0.321, |
| "num_input_tokens_seen": 950272000, |
| "step": 14500, |
| "train_runtime": 9456.5857, |
| "train_tokens_per_second": 100487.853 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 0.14573471248149872, |
| "learning_rate": 0.00028624836658469165, |
| "loss": 0.3198, |
| "num_input_tokens_seen": 956825600, |
| "step": 14600, |
| "train_runtime": 9525.9633, |
| "train_tokens_per_second": 100443.973 |
| }, |
| { |
| "epoch": 0.147, |
| "grad_norm": 0.1546989232301712, |
| "learning_rate": 0.00028604858454452906, |
| "loss": 0.3267, |
| "num_input_tokens_seen": 963379200, |
| "step": 14700, |
| "train_runtime": 9585.7512, |
| "train_tokens_per_second": 100501.169 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 0.172988623380661, |
| "learning_rate": 0.00028584743249663057, |
| "loss": 0.3222, |
| "num_input_tokens_seen": 969932800, |
| "step": 14800, |
| "train_runtime": 9650.7111, |
| "train_tokens_per_second": 100503.765 |
| }, |
| { |
| "epoch": 0.149, |
| "grad_norm": 0.19345735013484955, |
| "learning_rate": 0.000285644912466595, |
| "loss": 0.3194, |
| "num_input_tokens_seen": 976486400, |
| "step": 14900, |
| "train_runtime": 9721.1196, |
| "train_tokens_per_second": 100449.994 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.13317954540252686, |
| "learning_rate": 0.00028544102649379684, |
| "loss": 0.3236, |
| "num_input_tokens_seen": 983040000, |
| "step": 15000, |
| "train_runtime": 9784.7921, |
| "train_tokens_per_second": 100466.11 |
| }, |
| { |
| "epoch": 0.151, |
| "grad_norm": 0.17458604276180267, |
| "learning_rate": 0.00028523577663136556, |
| "loss": 0.3208, |
| "num_input_tokens_seen": 989593600, |
| "step": 15100, |
| "train_runtime": 9853.1273, |
| "train_tokens_per_second": 100434.468 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.1358109712600708, |
| "learning_rate": 0.000285029164946165, |
| "loss": 0.3237, |
| "num_input_tokens_seen": 996147200, |
| "step": 15200, |
| "train_runtime": 9917.7044, |
| "train_tokens_per_second": 100441.307 |
| }, |
| { |
| "epoch": 0.153, |
| "grad_norm": 0.16100633144378662, |
| "learning_rate": 0.0002848211935187725, |
| "loss": 0.3267, |
| "num_input_tokens_seen": 1002700800, |
| "step": 15300, |
| "train_runtime": 9982.8922, |
| "train_tokens_per_second": 100441.914 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 0.20419622957706451, |
| "learning_rate": 0.0002846118644434581, |
| "loss": 0.3193, |
| "num_input_tokens_seen": 1009254400, |
| "step": 15400, |
| "train_runtime": 10046.3454, |
| "train_tokens_per_second": 100459.855 |
| }, |
| { |
| "epoch": 0.155, |
| "grad_norm": 0.17805695533752441, |
| "learning_rate": 0.00028440117982816326, |
| "loss": 0.3159, |
| "num_input_tokens_seen": 1015808000, |
| "step": 15500, |
| "train_runtime": 10110.0124, |
| "train_tokens_per_second": 100475.446 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 0.17533563077449799, |
| "learning_rate": 0.0002841891417944796, |
| "loss": 0.3216, |
| "num_input_tokens_seen": 1022361600, |
| "step": 15600, |
| "train_runtime": 10178.7469, |
| "train_tokens_per_second": 100440.812 |
| }, |
| { |
| "epoch": 0.157, |
| "grad_norm": 0.13143610954284668, |
| "learning_rate": 0.0002839757524776279, |
| "loss": 0.3234, |
| "num_input_tokens_seen": 1028915200, |
| "step": 15700, |
| "train_runtime": 10243.1395, |
| "train_tokens_per_second": 100449.203 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 0.13563373684883118, |
| "learning_rate": 0.0002837610140264361, |
| "loss": 0.3194, |
| "num_input_tokens_seen": 1035468800, |
| "step": 15800, |
| "train_runtime": 10307.5423, |
| "train_tokens_per_second": 100457.39 |
| }, |
| { |
| "epoch": 0.159, |
| "grad_norm": 0.14616088569164276, |
| "learning_rate": 0.0002835449286033182, |
| "loss": 0.3178, |
| "num_input_tokens_seen": 1042022400, |
| "step": 15900, |
| "train_runtime": 10378.0909, |
| "train_tokens_per_second": 100405.982 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.1539888232946396, |
| "learning_rate": 0.0002833274983842518, |
| "loss": 0.3156, |
| "num_input_tokens_seen": 1048576000, |
| "step": 16000, |
| "train_runtime": 10441.484, |
| "train_tokens_per_second": 100424.039 |
| }, |
| { |
| "epoch": 0.161, |
| "grad_norm": 0.15786372125148773, |
| "learning_rate": 0.0002831087255587569, |
| "loss": 0.318, |
| "num_input_tokens_seen": 1055129600, |
| "step": 16100, |
| "train_runtime": 10505.72, |
| "train_tokens_per_second": 100433.821 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 0.14359760284423828, |
| "learning_rate": 0.0002828886123298734, |
| "loss": 0.3179, |
| "num_input_tokens_seen": 1061683200, |
| "step": 16200, |
| "train_runtime": 10570.7713, |
| "train_tokens_per_second": 100435.736 |
| }, |
| { |
| "epoch": 0.163, |
| "grad_norm": 0.1415397673845291, |
| "learning_rate": 0.00028266716091413906, |
| "loss": 0.32, |
| "num_input_tokens_seen": 1068236800, |
| "step": 16300, |
| "train_runtime": 10635.2645, |
| "train_tokens_per_second": 100442.899 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 0.1199110895395279, |
| "learning_rate": 0.0002824443735415673, |
| "loss": 0.3188, |
| "num_input_tokens_seen": 1074790400, |
| "step": 16400, |
| "train_runtime": 10704.7074, |
| "train_tokens_per_second": 100403.529 |
| }, |
| { |
| "epoch": 0.165, |
| "grad_norm": 0.18369431793689728, |
| "learning_rate": 0.0002822202524556243, |
| "loss": 0.3208, |
| "num_input_tokens_seen": 1081344000, |
| "step": 16500, |
| "train_runtime": 10770.1863, |
| "train_tokens_per_second": 100401.606 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 0.2615172266960144, |
| "learning_rate": 0.00028199479991320695, |
| "loss": 0.3224, |
| "num_input_tokens_seen": 1087897600, |
| "step": 16600, |
| "train_runtime": 10834.6749, |
| "train_tokens_per_second": 100408.883 |
| }, |
| { |
| "epoch": 0.167, |
| "grad_norm": 0.1250002384185791, |
| "learning_rate": 0.00028176801818461994, |
| "loss": 0.3171, |
| "num_input_tokens_seen": 1094451200, |
| "step": 16700, |
| "train_runtime": 10899.3075, |
| "train_tokens_per_second": 100414.747 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.14198775589466095, |
| "learning_rate": 0.00028153990955355273, |
| "loss": 0.3194, |
| "num_input_tokens_seen": 1101004800, |
| "step": 16800, |
| "train_runtime": 10964.3423, |
| "train_tokens_per_second": 100416.858 |
| }, |
| { |
| "epoch": 0.169, |
| "grad_norm": 0.14076939225196838, |
| "learning_rate": 0.00028131047631705665, |
| "loss": 0.3189, |
| "num_input_tokens_seen": 1107558400, |
| "step": 16900, |
| "train_runtime": 11033.6033, |
| "train_tokens_per_second": 100380.48 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 0.13334921002388, |
| "learning_rate": 0.00028107972078552187, |
| "loss": 0.3198, |
| "num_input_tokens_seen": 1114112000, |
| "step": 17000, |
| "train_runtime": 11098.612, |
| "train_tokens_per_second": 100383.003 |
| }, |
| { |
| "epoch": 0.171, |
| "grad_norm": 0.13615840673446655, |
| "learning_rate": 0.0002808476452826541, |
| "loss": 0.3168, |
| "num_input_tokens_seen": 1120665600, |
| "step": 17100, |
| "train_runtime": 11161.3832, |
| "train_tokens_per_second": 100405.62 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 0.14747090637683868, |
| "learning_rate": 0.00028061425214545094, |
| "loss": 0.3163, |
| "num_input_tokens_seen": 1127219200, |
| "step": 17200, |
| "train_runtime": 11231.5954, |
| "train_tokens_per_second": 100361.45 |
| }, |
| { |
| "epoch": 0.173, |
| "grad_norm": 0.15957149863243103, |
| "learning_rate": 0.00028037954372417883, |
| "loss": 0.317, |
| "num_input_tokens_seen": 1133772800, |
| "step": 17300, |
| "train_runtime": 11295.5019, |
| "train_tokens_per_second": 100373.831 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 0.20420241355895996, |
| "learning_rate": 0.0002801435223823488, |
| "loss": 0.3207, |
| "num_input_tokens_seen": 1140326400, |
| "step": 17400, |
| "train_runtime": 11360.8649, |
| "train_tokens_per_second": 100373.203 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.20070046186447144, |
| "learning_rate": 0.00027990619049669336, |
| "loss": 0.3206, |
| "num_input_tokens_seen": 1146880000, |
| "step": 17500, |
| "train_runtime": 11424.854, |
| "train_tokens_per_second": 100384.652 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.13903649151325226, |
| "learning_rate": 0.00027966755045714177, |
| "loss": 0.3227, |
| "num_input_tokens_seen": 1153433600, |
| "step": 17600, |
| "train_runtime": 11488.6874, |
| "train_tokens_per_second": 100397.336 |
| }, |
| { |
| "epoch": 0.177, |
| "grad_norm": 0.15853877365589142, |
| "learning_rate": 0.00027942760466679673, |
| "loss": 0.3168, |
| "num_input_tokens_seen": 1159987200, |
| "step": 17700, |
| "train_runtime": 11559.2862, |
| "train_tokens_per_second": 100351.11 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 0.14262589812278748, |
| "learning_rate": 0.00027918635554190956, |
| "loss": 0.3235, |
| "num_input_tokens_seen": 1166540800, |
| "step": 17800, |
| "train_runtime": 11622.4751, |
| "train_tokens_per_second": 100369.395 |
| }, |
| { |
| "epoch": 0.179, |
| "grad_norm": 0.14338357746601105, |
| "learning_rate": 0.00027894380551185636, |
| "loss": 0.3204, |
| "num_input_tokens_seen": 1173094400, |
| "step": 17900, |
| "train_runtime": 11687.9668, |
| "train_tokens_per_second": 100367.705 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.12374505400657654, |
| "learning_rate": 0.00027869995701911314, |
| "loss": 0.3156, |
| "num_input_tokens_seen": 1179648000, |
| "step": 18000, |
| "train_runtime": 11751.6619, |
| "train_tokens_per_second": 100381.377 |
| }, |
| { |
| "epoch": 0.181, |
| "grad_norm": 0.11708634346723557, |
| "learning_rate": 0.0002784548125192316, |
| "loss": 0.3145, |
| "num_input_tokens_seen": 1186201600, |
| "step": 18100, |
| "train_runtime": 11816.0633, |
| "train_tokens_per_second": 100388.9 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 0.1318449079990387, |
| "learning_rate": 0.0002782083744808141, |
| "loss": 0.3159, |
| "num_input_tokens_seen": 1192755200, |
| "step": 18200, |
| "train_runtime": 11887.7736, |
| "train_tokens_per_second": 100334.616 |
| }, |
| { |
| "epoch": 0.183, |
| "grad_norm": 0.3383175730705261, |
| "learning_rate": 0.000277960645385489, |
| "loss": 0.3191, |
| "num_input_tokens_seen": 1199308800, |
| "step": 18300, |
| "train_runtime": 11953.3207, |
| "train_tokens_per_second": 100332.688 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.13779285550117493, |
| "learning_rate": 0.00027771162772788544, |
| "loss": 0.3168, |
| "num_input_tokens_seen": 1205862400, |
| "step": 18400, |
| "train_runtime": 12016.7432, |
| "train_tokens_per_second": 100348.521 |
| }, |
| { |
| "epoch": 0.185, |
| "grad_norm": 0.15161630511283875, |
| "learning_rate": 0.00027746132401560857, |
| "loss": 0.3146, |
| "num_input_tokens_seen": 1212416000, |
| "step": 18500, |
| "train_runtime": 12081.3443, |
| "train_tokens_per_second": 100354.395 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 0.1523953378200531, |
| "learning_rate": 0.0002772097367692139, |
| "loss": 0.3172, |
| "num_input_tokens_seen": 1218969600, |
| "step": 18600, |
| "train_runtime": 12145.9663, |
| "train_tokens_per_second": 100360.035 |
| }, |
| { |
| "epoch": 0.187, |
| "grad_norm": 0.12802754342556, |
| "learning_rate": 0.00027695686852218226, |
| "loss": 0.3198, |
| "num_input_tokens_seen": 1225523200, |
| "step": 18700, |
| "train_runtime": 12215.5887, |
| "train_tokens_per_second": 100324.53 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 0.13653679192066193, |
| "learning_rate": 0.00027670272182089416, |
| "loss": 0.319, |
| "num_input_tokens_seen": 1232076800, |
| "step": 18800, |
| "train_runtime": 12280.146, |
| "train_tokens_per_second": 100330.794 |
| }, |
| { |
| "epoch": 0.189, |
| "grad_norm": 0.15152159333229065, |
| "learning_rate": 0.0002764472992246039, |
| "loss": 0.3165, |
| "num_input_tokens_seen": 1238630400, |
| "step": 18900, |
| "train_runtime": 12344.6292, |
| "train_tokens_per_second": 100337.594 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.13211041688919067, |
| "learning_rate": 0.0002761906033054143, |
| "loss": 0.3161, |
| "num_input_tokens_seen": 1245184000, |
| "step": 19000, |
| "train_runtime": 12407.4556, |
| "train_tokens_per_second": 100357.724 |
| }, |
| { |
| "epoch": 0.191, |
| "grad_norm": 0.19933822751045227, |
| "learning_rate": 0.00027593263664825045, |
| "loss": 0.3173, |
| "num_input_tokens_seen": 1251737600, |
| "step": 19100, |
| "train_runtime": 12472.5241, |
| "train_tokens_per_second": 100359.606 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.1472938358783722, |
| "learning_rate": 0.00027567340185083363, |
| "loss": 0.3157, |
| "num_input_tokens_seen": 1258291200, |
| "step": 19200, |
| "train_runtime": 12542.0532, |
| "train_tokens_per_second": 100325.774 |
| }, |
| { |
| "epoch": 0.193, |
| "grad_norm": 0.1466071903705597, |
| "learning_rate": 0.00027541290152365537, |
| "loss": 0.3188, |
| "num_input_tokens_seen": 1264844800, |
| "step": 19300, |
| "train_runtime": 12606.5735, |
| "train_tokens_per_second": 100332.164 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 0.1384386122226715, |
| "learning_rate": 0.00027515113828995117, |
| "loss": 0.318, |
| "num_input_tokens_seen": 1271398400, |
| "step": 19400, |
| "train_runtime": 12672.5058, |
| "train_tokens_per_second": 100327.309 |
| }, |
| { |
| "epoch": 0.195, |
| "grad_norm": 0.16287657618522644, |
| "learning_rate": 0.00027488811478567374, |
| "loss": 0.3153, |
| "num_input_tokens_seen": 1277952000, |
| "step": 19500, |
| "train_runtime": 12735.4985, |
| "train_tokens_per_second": 100345.66 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 0.14955779910087585, |
| "learning_rate": 0.0002746238336594671, |
| "loss": 0.3144, |
| "num_input_tokens_seen": 1284505600, |
| "step": 19600, |
| "train_runtime": 12804.8911, |
| "train_tokens_per_second": 100313.669 |
| }, |
| { |
| "epoch": 0.197, |
| "grad_norm": 0.15176887810230255, |
| "learning_rate": 0.00027435829757263894, |
| "loss": 0.3172, |
| "num_input_tokens_seen": 1291059200, |
| "step": 19700, |
| "train_runtime": 12869.0984, |
| "train_tokens_per_second": 100322.428 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 0.12215608358383179, |
| "learning_rate": 0.0002740915091991349, |
| "loss": 0.3182, |
| "num_input_tokens_seen": 1297612800, |
| "step": 19800, |
| "train_runtime": 12932.8746, |
| "train_tokens_per_second": 100334.446 |
| }, |
| { |
| "epoch": 0.199, |
| "grad_norm": 0.248954638838768, |
| "learning_rate": 0.0002738234712255109, |
| "loss": 0.3171, |
| "num_input_tokens_seen": 1304166400, |
| "step": 19900, |
| "train_runtime": 13003.7739, |
| "train_tokens_per_second": 100291.378 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.18855011463165283, |
| "learning_rate": 0.00027355418635090635, |
| "loss": 0.3181, |
| "num_input_tokens_seen": 1310720000, |
| "step": 20000, |
| "train_runtime": 13068.3505, |
| "train_tokens_per_second": 100297.279 |
| }, |
| { |
| "epoch": 0.201, |
| "grad_norm": 0.17624643445014954, |
| "learning_rate": 0.000273283657287017, |
| "loss": 0.3147, |
| "num_input_tokens_seen": 1317273600, |
| "step": 20100, |
| "train_runtime": 13133.7291, |
| "train_tokens_per_second": 100296.998 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 0.12586164474487305, |
| "learning_rate": 0.00027301188675806745, |
| "loss": 0.3203, |
| "num_input_tokens_seen": 1323827200, |
| "step": 20200, |
| "train_runtime": 13197.5369, |
| "train_tokens_per_second": 100308.657 |
| }, |
| { |
| "epoch": 0.203, |
| "grad_norm": 0.13073797523975372, |
| "learning_rate": 0.0002727388775007839, |
| "loss": 0.3149, |
| "num_input_tokens_seen": 1330380800, |
| "step": 20300, |
| "train_runtime": 13261.8266, |
| "train_tokens_per_second": 100316.558 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 0.12983232736587524, |
| "learning_rate": 0.0002724646322643666, |
| "loss": 0.3157, |
| "num_input_tokens_seen": 1336934400, |
| "step": 20400, |
| "train_runtime": 13325.295, |
| "train_tokens_per_second": 100330.567 |
| }, |
| { |
| "epoch": 0.205, |
| "grad_norm": 0.2400187999010086, |
| "learning_rate": 0.000272189153810462, |
| "loss": 0.3178, |
| "num_input_tokens_seen": 1343488000, |
| "step": 20500, |
| "train_runtime": 13395.2424, |
| "train_tokens_per_second": 100295.908 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 0.11757266521453857, |
| "learning_rate": 0.0002719124449131351, |
| "loss": 0.3164, |
| "num_input_tokens_seen": 1350041600, |
| "step": 20600, |
| "train_runtime": 13459.4754, |
| "train_tokens_per_second": 100304.177 |
| }, |
| { |
| "epoch": 0.207, |
| "grad_norm": 0.1606636494398117, |
| "learning_rate": 0.00027163450835884144, |
| "loss": 0.3146, |
| "num_input_tokens_seen": 1356595200, |
| "step": 20700, |
| "train_runtime": 13524.1715, |
| "train_tokens_per_second": 100308.932 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.1295078545808792, |
| "learning_rate": 0.00027135534694639894, |
| "loss": 0.3175, |
| "num_input_tokens_seen": 1363148800, |
| "step": 20800, |
| "train_runtime": 13588.4538, |
| "train_tokens_per_second": 100316.697 |
| }, |
| { |
| "epoch": 0.209, |
| "grad_norm": 0.18409083783626556, |
| "learning_rate": 0.00027107496348696003, |
| "loss": 0.3189, |
| "num_input_tokens_seen": 1369702400, |
| "step": 20900, |
| "train_runtime": 13653.2417, |
| "train_tokens_per_second": 100320.673 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.12083840370178223, |
| "learning_rate": 0.00027079336080398296, |
| "loss": 0.3139, |
| "num_input_tokens_seen": 1376256000, |
| "step": 21000, |
| "train_runtime": 13723.0075, |
| "train_tokens_per_second": 100288.22 |
| }, |
| { |
| "epoch": 0.211, |
| "grad_norm": 0.16270384192466736, |
| "learning_rate": 0.00027051054173320366, |
| "loss": 0.3147, |
| "num_input_tokens_seen": 1382809600, |
| "step": 21100, |
| "train_runtime": 13787.7693, |
| "train_tokens_per_second": 100292.482 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 0.12299864739179611, |
| "learning_rate": 0.000270226509122607, |
| "loss": 0.3137, |
| "num_input_tokens_seen": 1389363200, |
| "step": 21200, |
| "train_runtime": 13851.6298, |
| "train_tokens_per_second": 100303.229 |
| }, |
| { |
| "epoch": 0.213, |
| "grad_norm": 0.12248677760362625, |
| "learning_rate": 0.0002699412658323983, |
| "loss": 0.3177, |
| "num_input_tokens_seen": 1395916800, |
| "step": 21300, |
| "train_runtime": 13915.8434, |
| "train_tokens_per_second": 100311.333 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 0.13090935349464417, |
| "learning_rate": 0.00026965481473497423, |
| "loss": 0.3146, |
| "num_input_tokens_seen": 1402470400, |
| "step": 21400, |
| "train_runtime": 13985.645, |
| "train_tokens_per_second": 100279.28 |
| }, |
| { |
| "epoch": 0.215, |
| "grad_norm": 0.1279245913028717, |
| "learning_rate": 0.0002693671587148942, |
| "loss": 0.3128, |
| "num_input_tokens_seen": 1409024000, |
| "step": 21500, |
| "train_runtime": 14050.4506, |
| "train_tokens_per_second": 100283.19 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.15504342317581177, |
| "learning_rate": 0.0002690783006688511, |
| "loss": 0.3145, |
| "num_input_tokens_seen": 1415577600, |
| "step": 21600, |
| "train_runtime": 14115.855, |
| "train_tokens_per_second": 100282.81 |
| }, |
| { |
| "epoch": 0.217, |
| "grad_norm": 0.1325046420097351, |
| "learning_rate": 0.0002687882435056423, |
| "loss": 0.3138, |
| "num_input_tokens_seen": 1422131200, |
| "step": 21700, |
| "train_runtime": 14179.61, |
| "train_tokens_per_second": 100294.098 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 0.17374184727668762, |
| "learning_rate": 0.0002684969901461402, |
| "loss": 0.3179, |
| "num_input_tokens_seen": 1428684800, |
| "step": 21800, |
| "train_runtime": 14245.0199, |
| "train_tokens_per_second": 100293.633 |
| }, |
| { |
| "epoch": 0.219, |
| "grad_norm": 0.16908228397369385, |
| "learning_rate": 0.000268204543523263, |
| "loss": 0.3182, |
| "num_input_tokens_seen": 1435238400, |
| "step": 21900, |
| "train_runtime": 14310.1147, |
| "train_tokens_per_second": 100295.381 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 0.15052039921283722, |
| "learning_rate": 0.0002679109065819447, |
| "loss": 0.3148, |
| "num_input_tokens_seen": 1441792000, |
| "step": 22000, |
| "train_runtime": 14374.221, |
| "train_tokens_per_second": 100304.01 |
| }, |
| { |
| "epoch": 0.221, |
| "grad_norm": 0.1661474108695984, |
| "learning_rate": 0.0002676160822791062, |
| "loss": 0.3142, |
| "num_input_tokens_seen": 1448345600, |
| "step": 22100, |
| "train_runtime": 14445.9108, |
| "train_tokens_per_second": 100259.902 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 0.16423378884792328, |
| "learning_rate": 0.00026732007358362496, |
| "loss": 0.323, |
| "num_input_tokens_seen": 1454899200, |
| "step": 22200, |
| "train_runtime": 14510.5733, |
| "train_tokens_per_second": 100264.763 |
| }, |
| { |
| "epoch": 0.223, |
| "grad_norm": 0.14868460595607758, |
| "learning_rate": 0.0002670228834763052, |
| "loss": 0.3155, |
| "num_input_tokens_seen": 1461452800, |
| "step": 22300, |
| "train_runtime": 14575.7382, |
| "train_tokens_per_second": 100266.126 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.1287386268377304, |
| "learning_rate": 0.00026672451494984804, |
| "loss": 0.3152, |
| "num_input_tokens_seen": 1468006400, |
| "step": 22400, |
| "train_runtime": 14639.7379, |
| "train_tokens_per_second": 100275.456 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.14276720583438873, |
| "learning_rate": 0.0002664249710088213, |
| "loss": 0.3131, |
| "num_input_tokens_seen": 1474560000, |
| "step": 22500, |
| "train_runtime": 14703.588, |
| "train_tokens_per_second": 100285.726 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 0.1419740915298462, |
| "learning_rate": 0.00026612425466962893, |
| "loss": 0.3112, |
| "num_input_tokens_seen": 1481113600, |
| "step": 22600, |
| "train_runtime": 14773.1939, |
| "train_tokens_per_second": 100256.83 |
| }, |
| { |
| "epoch": 0.227, |
| "grad_norm": 0.12067803740501404, |
| "learning_rate": 0.00026582236896048134, |
| "loss": 0.3122, |
| "num_input_tokens_seen": 1487667200, |
| "step": 22700, |
| "train_runtime": 14837.1829, |
| "train_tokens_per_second": 100266.15 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 0.1338939219713211, |
| "learning_rate": 0.00026551931692136413, |
| "loss": 0.3128, |
| "num_input_tokens_seen": 1494220800, |
| "step": 22800, |
| "train_runtime": 14900.9562, |
| "train_tokens_per_second": 100276.84 |
| }, |
| { |
| "epoch": 0.229, |
| "grad_norm": 0.16754469275474548, |
| "learning_rate": 0.00026521510160400804, |
| "loss": 0.3133, |
| "num_input_tokens_seen": 1500774400, |
| "step": 22900, |
| "train_runtime": 14965.1238, |
| "train_tokens_per_second": 100284.797 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 0.12648451328277588, |
| "learning_rate": 0.00026490972607185793, |
| "loss": 0.311, |
| "num_input_tokens_seen": 1507328000, |
| "step": 23000, |
| "train_runtime": 15034.861, |
| "train_tokens_per_second": 100255.533 |
| }, |
| { |
| "epoch": 0.231, |
| "grad_norm": 0.12040221691131592, |
| "learning_rate": 0.0002646031934000421, |
| "loss": 0.3166, |
| "num_input_tokens_seen": 1513881600, |
| "step": 23100, |
| "train_runtime": 15099.2676, |
| "train_tokens_per_second": 100261.922 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.12486282736063004, |
| "learning_rate": 0.00026429550667534095, |
| "loss": 0.3151, |
| "num_input_tokens_seen": 1520435200, |
| "step": 23200, |
| "train_runtime": 15164.1184, |
| "train_tokens_per_second": 100265.321 |
| }, |
| { |
| "epoch": 0.233, |
| "grad_norm": 0.18211719393730164, |
| "learning_rate": 0.0002639866689961565, |
| "loss": 0.3117, |
| "num_input_tokens_seen": 1526988800, |
| "step": 23300, |
| "train_runtime": 15229.7058, |
| "train_tokens_per_second": 100263.841 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 0.13128802180290222, |
| "learning_rate": 0.00026367668347248083, |
| "loss": 0.3125, |
| "num_input_tokens_seen": 1533542400, |
| "step": 23400, |
| "train_runtime": 15293.6404, |
| "train_tokens_per_second": 100273.209 |
| }, |
| { |
| "epoch": 0.235, |
| "grad_norm": 0.11493753641843796, |
| "learning_rate": 0.0002633655532258646, |
| "loss": 0.317, |
| "num_input_tokens_seen": 1540096000, |
| "step": 23500, |
| "train_runtime": 15365.113, |
| "train_tokens_per_second": 100233.301 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 0.15309779345989227, |
| "learning_rate": 0.000263053281389386, |
| "loss": 0.3136, |
| "num_input_tokens_seen": 1546649600, |
| "step": 23600, |
| "train_runtime": 15428.6523, |
| "train_tokens_per_second": 100245.282 |
| }, |
| { |
| "epoch": 0.237, |
| "grad_norm": 0.15829730033874512, |
| "learning_rate": 0.0002627398711076189, |
| "loss": 0.3098, |
| "num_input_tokens_seen": 1553203200, |
| "step": 23700, |
| "train_runtime": 15493.1944, |
| "train_tokens_per_second": 100250.675 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 0.13252806663513184, |
| "learning_rate": 0.0002624253255366014, |
| "loss": 0.3096, |
| "num_input_tokens_seen": 1559756800, |
| "step": 23800, |
| "train_runtime": 15556.5037, |
| "train_tokens_per_second": 100263.969 |
| }, |
| { |
| "epoch": 0.239, |
| "grad_norm": 0.18889528512954712, |
| "learning_rate": 0.0002621096478438039, |
| "loss": 0.3146, |
| "num_input_tokens_seen": 1566310400, |
| "step": 23900, |
| "train_runtime": 15621.7412, |
| "train_tokens_per_second": 100264.777 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.16285447776317596, |
| "learning_rate": 0.00026179284120809727, |
| "loss": 0.3168, |
| "num_input_tokens_seen": 1572864000, |
| "step": 24000, |
| "train_runtime": 15687.4424, |
| "train_tokens_per_second": 100262.615 |
| }, |
| { |
| "epoch": 0.241, |
| "grad_norm": 0.14852070808410645, |
| "learning_rate": 0.0002614749088197208, |
| "loss": 0.3115, |
| "num_input_tokens_seen": 1579417600, |
| "step": 24100, |
| "train_runtime": 15752.1472, |
| "train_tokens_per_second": 100266.813 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 0.22735795378684998, |
| "learning_rate": 0.00026115585388025015, |
| "loss": 0.3099, |
| "num_input_tokens_seen": 1585971200, |
| "step": 24200, |
| "train_runtime": 15823.0117, |
| "train_tokens_per_second": 100231.943 |
| }, |
| { |
| "epoch": 0.243, |
| "grad_norm": 0.16086964309215546, |
| "learning_rate": 0.00026083567960256493, |
| "loss": 0.3107, |
| "num_input_tokens_seen": 1592524800, |
| "step": 24300, |
| "train_runtime": 15889.3517, |
| "train_tokens_per_second": 100225.914 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 0.15085358917713165, |
| "learning_rate": 0.00026051438921081667, |
| "loss": 0.3112, |
| "num_input_tokens_seen": 1599078400, |
| "step": 24400, |
| "train_runtime": 15954.2137, |
| "train_tokens_per_second": 100229.22 |
| }, |
| { |
| "epoch": 0.245, |
| "grad_norm": 0.14889656007289886, |
| "learning_rate": 0.00026019198594039595, |
| "loss": 0.3147, |
| "num_input_tokens_seen": 1605632000, |
| "step": 24500, |
| "train_runtime": 16020.1883, |
| "train_tokens_per_second": 100225.539 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 0.15055876970291138, |
| "learning_rate": 0.00025986847303790026, |
| "loss": 0.3125, |
| "num_input_tokens_seen": 1612185600, |
| "step": 24600, |
| "train_runtime": 16084.1346, |
| "train_tokens_per_second": 100234.525 |
| }, |
| { |
| "epoch": 0.247, |
| "grad_norm": 0.14507324993610382, |
| "learning_rate": 0.00025954385376110076, |
| "loss": 0.3115, |
| "num_input_tokens_seen": 1618739200, |
| "step": 24700, |
| "train_runtime": 16148.9618, |
| "train_tokens_per_second": 100237.973 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.1229107677936554, |
| "learning_rate": 0.00025921813137891005, |
| "loss": 0.3147, |
| "num_input_tokens_seen": 1625292800, |
| "step": 24800, |
| "train_runtime": 16214.7466, |
| "train_tokens_per_second": 100235.473 |
| }, |
| { |
| "epoch": 0.249, |
| "grad_norm": 0.1423114389181137, |
| "learning_rate": 0.000258891309171349, |
| "loss": 0.3127, |
| "num_input_tokens_seen": 1631846400, |
| "step": 24900, |
| "train_runtime": 16278.9968, |
| "train_tokens_per_second": 100242.442 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.15807275474071503, |
| "learning_rate": 0.00025856339042951344, |
| "loss": 0.3088, |
| "num_input_tokens_seen": 1638400000, |
| "step": 25000, |
| "train_runtime": 16343.5944, |
| "train_tokens_per_second": 100247.226 |
| }, |
| { |
| "epoch": 0.251, |
| "grad_norm": 0.15635885298252106, |
| "learning_rate": 0.0002582343784555415, |
| "loss": 0.3105, |
| "num_input_tokens_seen": 1644953600, |
| "step": 25100, |
| "train_runtime": 16414.1861, |
| "train_tokens_per_second": 100215.362 |
| }, |
| { |
| "epoch": 0.252, |
| "grad_norm": 0.13579483330249786, |
| "learning_rate": 0.00025790427656258017, |
| "loss": 0.3159, |
| "num_input_tokens_seen": 1651507200, |
| "step": 25200, |
| "train_runtime": 16478.0373, |
| "train_tokens_per_second": 100224.752 |
| }, |
| { |
| "epoch": 0.253, |
| "grad_norm": 0.14977572858333588, |
| "learning_rate": 0.00025757308807475185, |
| "loss": 0.3115, |
| "num_input_tokens_seen": 1658060800, |
| "step": 25300, |
| "train_runtime": 16542.7006, |
| "train_tokens_per_second": 100229.149 |
| }, |
| { |
| "epoch": 0.254, |
| "grad_norm": 0.1324361115694046, |
| "learning_rate": 0.00025724081632712086, |
| "loss": 0.3108, |
| "num_input_tokens_seen": 1664614400, |
| "step": 25400, |
| "train_runtime": 16607.2591, |
| "train_tokens_per_second": 100234.144 |
| }, |
| { |
| "epoch": 0.255, |
| "grad_norm": 0.12053392827510834, |
| "learning_rate": 0.0002569074646656601, |
| "loss": 0.3081, |
| "num_input_tokens_seen": 1671168000, |
| "step": 25500, |
| "train_runtime": 16676.4765, |
| "train_tokens_per_second": 100211.096 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.16214688122272491, |
| "learning_rate": 0.00025657303644721695, |
| "loss": 0.3154, |
| "num_input_tokens_seen": 1677721600, |
| "step": 25600, |
| "train_runtime": 16741.4269, |
| "train_tokens_per_second": 100213.776 |
| }, |
| { |
| "epoch": 0.257, |
| "grad_norm": 0.13730435073375702, |
| "learning_rate": 0.00025623753503948004, |
| "loss": 0.3159, |
| "num_input_tokens_seen": 1684275200, |
| "step": 25700, |
| "train_runtime": 16805.4849, |
| "train_tokens_per_second": 100221.755 |
| }, |
| { |
| "epoch": 0.258, |
| "grad_norm": 0.16218283772468567, |
| "learning_rate": 0.00025590096382094475, |
| "loss": 0.3111, |
| "num_input_tokens_seen": 1690828800, |
| "step": 25800, |
| "train_runtime": 16869.8548, |
| "train_tokens_per_second": 100227.821 |
| }, |
| { |
| "epoch": 0.259, |
| "grad_norm": 0.15016646683216095, |
| "learning_rate": 0.00025556332618087945, |
| "loss": 0.3106, |
| "num_input_tokens_seen": 1697382400, |
| "step": 25900, |
| "train_runtime": 16938.0105, |
| "train_tokens_per_second": 100211.439 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 0.1398506760597229, |
| "learning_rate": 0.00025522462551929155, |
| "loss": 0.313, |
| "num_input_tokens_seen": 1703936000, |
| "step": 26000, |
| "train_runtime": 17003.6995, |
| "train_tokens_per_second": 100209.722 |
| }, |
| { |
| "epoch": 0.261, |
| "grad_norm": 0.12380320578813553, |
| "learning_rate": 0.00025488486524689283, |
| "loss": 0.3133, |
| "num_input_tokens_seen": 1710489600, |
| "step": 26100, |
| "train_runtime": 17069.3522, |
| "train_tokens_per_second": 100208.232 |
| }, |
| { |
| "epoch": 0.262, |
| "grad_norm": 0.14536257088184357, |
| "learning_rate": 0.00025454404878506555, |
| "loss": 0.3115, |
| "num_input_tokens_seen": 1717043200, |
| "step": 26200, |
| "train_runtime": 17132.7395, |
| "train_tokens_per_second": 100220.003 |
| }, |
| { |
| "epoch": 0.263, |
| "grad_norm": 0.14442390203475952, |
| "learning_rate": 0.0002542021795658276, |
| "loss": 0.311, |
| "num_input_tokens_seen": 1723596800, |
| "step": 26300, |
| "train_runtime": 17196.4745, |
| "train_tokens_per_second": 100229.66 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.12595972418785095, |
| "learning_rate": 0.0002538592610317984, |
| "loss": 0.3118, |
| "num_input_tokens_seen": 1730150400, |
| "step": 26400, |
| "train_runtime": 17266.9358, |
| "train_tokens_per_second": 100200.199 |
| }, |
| { |
| "epoch": 0.265, |
| "grad_norm": 0.1587669402360916, |
| "learning_rate": 0.00025351529663616355, |
| "loss": 0.3132, |
| "num_input_tokens_seen": 1736704000, |
| "step": 26500, |
| "train_runtime": 17331.5833, |
| "train_tokens_per_second": 100204.578 |
| }, |
| { |
| "epoch": 0.266, |
| "grad_norm": 0.1406719982624054, |
| "learning_rate": 0.00025317028984264087, |
| "loss": 0.3099, |
| "num_input_tokens_seen": 1743257600, |
| "step": 26600, |
| "train_runtime": 17395.5945, |
| "train_tokens_per_second": 100212.591 |
| }, |
| { |
| "epoch": 0.267, |
| "grad_norm": 0.1677832007408142, |
| "learning_rate": 0.0002528242441254448, |
| "loss": 0.309, |
| "num_input_tokens_seen": 1749811200, |
| "step": 26700, |
| "train_runtime": 17459.1185, |
| "train_tokens_per_second": 100223.342 |
| }, |
| { |
| "epoch": 0.268, |
| "grad_norm": 0.13640043139457703, |
| "learning_rate": 0.000252477162969252, |
| "loss": 0.3112, |
| "num_input_tokens_seen": 1756364800, |
| "step": 26800, |
| "train_runtime": 17523.2088, |
| "train_tokens_per_second": 100230.775 |
| }, |
| { |
| "epoch": 0.269, |
| "grad_norm": 0.12981313467025757, |
| "learning_rate": 0.00025212904986916584, |
| "loss": 0.3124, |
| "num_input_tokens_seen": 1762918400, |
| "step": 26900, |
| "train_runtime": 17587.6922, |
| "train_tokens_per_second": 100235.914 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.14338868856430054, |
| "learning_rate": 0.00025177990833068133, |
| "loss": 0.3124, |
| "num_input_tokens_seen": 1769472000, |
| "step": 27000, |
| "train_runtime": 17658.758, |
| "train_tokens_per_second": 100203.649 |
| }, |
| { |
| "epoch": 0.271, |
| "grad_norm": 0.17518877983093262, |
| "learning_rate": 0.0002514297418696499, |
| "loss": 0.3076, |
| "num_input_tokens_seen": 1776025600, |
| "step": 27100, |
| "train_runtime": 17723.3886, |
| "train_tokens_per_second": 100208.016 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.1369880735874176, |
| "learning_rate": 0.0002510785540122439, |
| "loss": 0.3114, |
| "num_input_tokens_seen": 1782579200, |
| "step": 27200, |
| "train_runtime": 17786.611, |
| "train_tokens_per_second": 100220.283 |
| }, |
| { |
| "epoch": 0.273, |
| "grad_norm": 0.15111377835273743, |
| "learning_rate": 0.0002507263482949212, |
| "loss": 0.3144, |
| "num_input_tokens_seen": 1789132800, |
| "step": 27300, |
| "train_runtime": 17852.1418, |
| "train_tokens_per_second": 100219.504 |
| }, |
| { |
| "epoch": 0.274, |
| "grad_norm": 0.140447199344635, |
| "learning_rate": 0.0002503731282643894, |
| "loss": 0.3103, |
| "num_input_tokens_seen": 1795686400, |
| "step": 27400, |
| "train_runtime": 17917.1236, |
| "train_tokens_per_second": 100221.801 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 0.1373315006494522, |
| "learning_rate": 0.0002500188974775704, |
| "loss": 0.3095, |
| "num_input_tokens_seen": 1802240000, |
| "step": 27500, |
| "train_runtime": 17981.4799, |
| "train_tokens_per_second": 100227.568 |
| }, |
| { |
| "epoch": 0.276, |
| "grad_norm": 0.1453147530555725, |
| "learning_rate": 0.00024966365950156416, |
| "loss": 0.3085, |
| "num_input_tokens_seen": 1808793600, |
| "step": 27600, |
| "train_runtime": 18052.109, |
| "train_tokens_per_second": 100198.464 |
| }, |
| { |
| "epoch": 0.277, |
| "grad_norm": 0.19097484648227692, |
| "learning_rate": 0.00024930741791361326, |
| "loss": 0.3128, |
| "num_input_tokens_seen": 1815347200, |
| "step": 27700, |
| "train_runtime": 18117.9773, |
| "train_tokens_per_second": 100195.909 |
| }, |
| { |
| "epoch": 0.278, |
| "grad_norm": 0.2222718745470047, |
| "learning_rate": 0.0002489501763010664, |
| "loss": 0.3107, |
| "num_input_tokens_seen": 1821900800, |
| "step": 27800, |
| "train_runtime": 18178.1946, |
| "train_tokens_per_second": 100224.519 |
| }, |
| { |
| "epoch": 0.279, |
| "grad_norm": 0.16960225999355316, |
| "learning_rate": 0.00024859193826134285, |
| "loss": 0.3093, |
| "num_input_tokens_seen": 1828454400, |
| "step": 27900, |
| "train_runtime": 18248.1866, |
| "train_tokens_per_second": 100199.238 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.15540289878845215, |
| "learning_rate": 0.00024823270740189556, |
| "loss": 0.3084, |
| "num_input_tokens_seen": 1835008000, |
| "step": 28000, |
| "train_runtime": 18313.0722, |
| "train_tokens_per_second": 100202.084 |
| }, |
| { |
| "epoch": 0.281, |
| "grad_norm": 0.1421203911304474, |
| "learning_rate": 0.00024787248734017527, |
| "loss": 0.3119, |
| "num_input_tokens_seen": 1841561600, |
| "step": 28100, |
| "train_runtime": 18377.039, |
| "train_tokens_per_second": 100209.919 |
| }, |
| { |
| "epoch": 0.282, |
| "grad_norm": 0.131204292178154, |
| "learning_rate": 0.0002475112817035941, |
| "loss": 0.3127, |
| "num_input_tokens_seen": 1848115200, |
| "step": 28200, |
| "train_runtime": 18441.4656, |
| "train_tokens_per_second": 100215.202 |
| }, |
| { |
| "epoch": 0.283, |
| "grad_norm": 0.1507508009672165, |
| "learning_rate": 0.0002471490941294887, |
| "loss": 0.3118, |
| "num_input_tokens_seen": 1854668800, |
| "step": 28300, |
| "train_runtime": 18511.3095, |
| "train_tokens_per_second": 100191.118 |
| }, |
| { |
| "epoch": 0.284, |
| "grad_norm": 0.12522923946380615, |
| "learning_rate": 0.000246785928265084, |
| "loss": 0.3104, |
| "num_input_tokens_seen": 1861222400, |
| "step": 28400, |
| "train_runtime": 18574.4697, |
| "train_tokens_per_second": 100203.259 |
| }, |
| { |
| "epoch": 0.285, |
| "grad_norm": 0.2087126076221466, |
| "learning_rate": 0.0002464217877674562, |
| "loss": 0.3132, |
| "num_input_tokens_seen": 1867776000, |
| "step": 28500, |
| "train_runtime": 18638.8332, |
| "train_tokens_per_second": 100208.848 |
| }, |
| { |
| "epoch": 0.286, |
| "grad_norm": 0.1495303064584732, |
| "learning_rate": 0.0002460566763034961, |
| "loss": 0.3159, |
| "num_input_tokens_seen": 1874329600, |
| "step": 28600, |
| "train_runtime": 18703.8924, |
| "train_tokens_per_second": 100210.671 |
| }, |
| { |
| "epoch": 0.287, |
| "grad_norm": 0.14563380181789398, |
| "learning_rate": 0.00024569059754987196, |
| "loss": 0.3116, |
| "num_input_tokens_seen": 1880883200, |
| "step": 28700, |
| "train_runtime": 18774.7813, |
| "train_tokens_per_second": 100181.364 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.12803615629673004, |
| "learning_rate": 0.00024532355519299296, |
| "loss": 0.3099, |
| "num_input_tokens_seen": 1887436800, |
| "step": 28800, |
| "train_runtime": 18838.435, |
| "train_tokens_per_second": 100190.743 |
| }, |
| { |
| "epoch": 0.289, |
| "grad_norm": 0.5618897676467896, |
| "learning_rate": 0.0002449555529289714, |
| "loss": 0.3129, |
| "num_input_tokens_seen": 1893990400, |
| "step": 28900, |
| "train_runtime": 18901.8999, |
| "train_tokens_per_second": 100201.06 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 0.15488959848880768, |
| "learning_rate": 0.0002445865944635861, |
| "loss": 0.3155, |
| "num_input_tokens_seen": 1900544000, |
| "step": 29000, |
| "train_runtime": 18967.9894, |
| "train_tokens_per_second": 100197.441 |
| }, |
| { |
| "epoch": 0.291, |
| "grad_norm": 0.13676992058753967, |
| "learning_rate": 0.0002442166835122446, |
| "loss": 0.3101, |
| "num_input_tokens_seen": 1907097600, |
| "step": 29100, |
| "train_runtime": 19031.1664, |
| "train_tokens_per_second": 100209.181 |
| }, |
| { |
| "epoch": 0.292, |
| "grad_norm": 0.11402736604213715, |
| "learning_rate": 0.00024384582379994614, |
| "loss": 0.3094, |
| "num_input_tokens_seen": 1913651200, |
| "step": 29200, |
| "train_runtime": 19096.1775, |
| "train_tokens_per_second": 100211.218 |
| }, |
| { |
| "epoch": 0.293, |
| "grad_norm": 0.1358448714017868, |
| "learning_rate": 0.00024347401906124388, |
| "loss": 0.309, |
| "num_input_tokens_seen": 1920204800, |
| "step": 29300, |
| "train_runtime": 19165.3098, |
| "train_tokens_per_second": 100191.691 |
| }, |
| { |
| "epoch": 0.294, |
| "grad_norm": 0.14608891308307648, |
| "learning_rate": 0.0002431012730402075, |
| "loss": 0.3119, |
| "num_input_tokens_seen": 1926758400, |
| "step": 29400, |
| "train_runtime": 19230.3069, |
| "train_tokens_per_second": 100193.845 |
| }, |
| { |
| "epoch": 0.295, |
| "grad_norm": 0.1501711755990982, |
| "learning_rate": 0.00024272758949038517, |
| "loss": 0.3091, |
| "num_input_tokens_seen": 1933312000, |
| "step": 29500, |
| "train_runtime": 19294.7627, |
| "train_tokens_per_second": 100198.796 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.1614496409893036, |
| "learning_rate": 0.00024235297217476616, |
| "loss": 0.3104, |
| "num_input_tokens_seen": 1939865600, |
| "step": 29600, |
| "train_runtime": 19364.7415, |
| "train_tokens_per_second": 100175.135 |
| }, |
| { |
| "epoch": 0.297, |
| "grad_norm": 0.11902807652950287, |
| "learning_rate": 0.00024197742486574268, |
| "loss": 0.3126, |
| "num_input_tokens_seen": 1946419200, |
| "step": 29700, |
| "train_runtime": 19429.1038, |
| "train_tokens_per_second": 100180.596 |
| }, |
| { |
| "epoch": 0.298, |
| "grad_norm": 0.12998123466968536, |
| "learning_rate": 0.0002416009513450719, |
| "loss": 0.3102, |
| "num_input_tokens_seen": 1952972800, |
| "step": 29800, |
| "train_runtime": 19494.2244, |
| "train_tokens_per_second": 100182.124 |
| }, |
| { |
| "epoch": 0.299, |
| "grad_norm": 0.2079559862613678, |
| "learning_rate": 0.00024122355540383806, |
| "loss": 0.311, |
| "num_input_tokens_seen": 1959526400, |
| "step": 29900, |
| "train_runtime": 19559.2072, |
| "train_tokens_per_second": 100184.347 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.15128397941589355, |
| "learning_rate": 0.00024084524084241405, |
| "loss": 0.3076, |
| "num_input_tokens_seen": 1966080000, |
| "step": 30000, |
| "train_runtime": 19623.3669, |
| "train_tokens_per_second": 100190.758 |
| }, |
| { |
| "epoch": 0.301, |
| "grad_norm": 0.13512304425239563, |
| "learning_rate": 0.00024046601147042332, |
| "loss": 0.3119, |
| "num_input_tokens_seen": 1972633600, |
| "step": 30100, |
| "train_runtime": 19688.91, |
| "train_tokens_per_second": 100190.086 |
| }, |
| { |
| "epoch": 0.302, |
| "grad_norm": 0.12716713547706604, |
| "learning_rate": 0.0002400858711067015, |
| "loss": 0.3093, |
| "num_input_tokens_seen": 1979187200, |
| "step": 30200, |
| "train_runtime": 19753.5863, |
| "train_tokens_per_second": 100193.816 |
| }, |
| { |
| "epoch": 0.303, |
| "grad_norm": 0.1301889717578888, |
| "learning_rate": 0.00023970482357925772, |
| "loss": 0.31, |
| "num_input_tokens_seen": 1985740800, |
| "step": 30300, |
| "train_runtime": 19823.6081, |
| "train_tokens_per_second": 100170.503 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.13871292769908905, |
| "learning_rate": 0.00023932287272523646, |
| "loss": 0.3084, |
| "num_input_tokens_seen": 1992294400, |
| "step": 30400, |
| "train_runtime": 19887.7656, |
| "train_tokens_per_second": 100176.885 |
| }, |
| { |
| "epoch": 0.305, |
| "grad_norm": 0.12449346482753754, |
| "learning_rate": 0.00023894002239087847, |
| "loss": 0.3276, |
| "num_input_tokens_seen": 1998848000, |
| "step": 30500, |
| "train_runtime": 19952.5714, |
| "train_tokens_per_second": 100179.97 |
| }, |
| { |
| "epoch": 0.306, |
| "grad_norm": 0.1523977369070053, |
| "learning_rate": 0.0002385562764314825, |
| "loss": 0.3097, |
| "num_input_tokens_seen": 2005401600, |
| "step": 30600, |
| "train_runtime": 20017.8352, |
| "train_tokens_per_second": 100180.743 |
| }, |
| { |
| "epoch": 0.307, |
| "grad_norm": 0.1439458280801773, |
| "learning_rate": 0.00023817163871136596, |
| "loss": 0.3048, |
| "num_input_tokens_seen": 2011955200, |
| "step": 30700, |
| "train_runtime": 20081.8889, |
| "train_tokens_per_second": 100187.548 |
| }, |
| { |
| "epoch": 0.308, |
| "grad_norm": 0.12756380438804626, |
| "learning_rate": 0.00023778611310382652, |
| "loss": 0.3075, |
| "num_input_tokens_seen": 2018508800, |
| "step": 30800, |
| "train_runtime": 20145.6107, |
| "train_tokens_per_second": 100195.96 |
| }, |
| { |
| "epoch": 0.309, |
| "grad_norm": 0.14607320725917816, |
| "learning_rate": 0.0002373997034911027, |
| "loss": 0.3139, |
| "num_input_tokens_seen": 2025062400, |
| "step": 30900, |
| "train_runtime": 20210.9796, |
| "train_tokens_per_second": 100196.153 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.12456675618886948, |
| "learning_rate": 0.00023701241376433506, |
| "loss": 0.3089, |
| "num_input_tokens_seen": 2031616000, |
| "step": 31000, |
| "train_runtime": 20281.0675, |
| "train_tokens_per_second": 100173.031 |
| }, |
| { |
| "epoch": 0.311, |
| "grad_norm": 0.13834626972675323, |
| "learning_rate": 0.0002366242478235268, |
| "loss": 0.3066, |
| "num_input_tokens_seen": 2038169600, |
| "step": 31100, |
| "train_runtime": 20346.0263, |
| "train_tokens_per_second": 100175.315 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.1534184068441391, |
| "learning_rate": 0.00023623520957750471, |
| "loss": 0.3082, |
| "num_input_tokens_seen": 2044723200, |
| "step": 31200, |
| "train_runtime": 20409.76, |
| "train_tokens_per_second": 100183.598 |
| }, |
| { |
| "epoch": 0.313, |
| "grad_norm": 0.12966671586036682, |
| "learning_rate": 0.00023584530294387953, |
| "loss": 0.3126, |
| "num_input_tokens_seen": 2051276800, |
| "step": 31300, |
| "train_runtime": 20475.6348, |
| "train_tokens_per_second": 100181.353 |
| }, |
| { |
| "epoch": 0.314, |
| "grad_norm": 0.14474999904632568, |
| "learning_rate": 0.00023545453184900682, |
| "loss": 0.3091, |
| "num_input_tokens_seen": 2057830400, |
| "step": 31400, |
| "train_runtime": 20539.196, |
| "train_tokens_per_second": 100190.407 |
| }, |
| { |
| "epoch": 0.315, |
| "grad_norm": 0.13208946585655212, |
| "learning_rate": 0.00023506290022794706, |
| "loss": 0.3095, |
| "num_input_tokens_seen": 2064384000, |
| "step": 31500, |
| "train_runtime": 20604.221, |
| "train_tokens_per_second": 100192.286 |
| }, |
| { |
| "epoch": 0.316, |
| "grad_norm": 0.15090374648571014, |
| "learning_rate": 0.00023467041202442643, |
| "loss": 0.3073, |
| "num_input_tokens_seen": 2070937600, |
| "step": 31600, |
| "train_runtime": 20674.5759, |
| "train_tokens_per_second": 100168.323 |
| }, |
| { |
| "epoch": 0.317, |
| "grad_norm": 0.18638543784618378, |
| "learning_rate": 0.00023427707119079669, |
| "loss": 0.312, |
| "num_input_tokens_seen": 2077491200, |
| "step": 31700, |
| "train_runtime": 20738.8671, |
| "train_tokens_per_second": 100173.804 |
| }, |
| { |
| "epoch": 0.318, |
| "grad_norm": 0.1385478377342224, |
| "learning_rate": 0.0002338828816879957, |
| "loss": 0.3095, |
| "num_input_tokens_seen": 2084044800, |
| "step": 31800, |
| "train_runtime": 20802.7906, |
| "train_tokens_per_second": 100181.021 |
| }, |
| { |
| "epoch": 0.319, |
| "grad_norm": 0.15265443921089172, |
| "learning_rate": 0.00023348784748550744, |
| "loss": 0.3103, |
| "num_input_tokens_seen": 2090598400, |
| "step": 31900, |
| "train_runtime": 20868.0311, |
| "train_tokens_per_second": 100181.871 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.15918248891830444, |
| "learning_rate": 0.00023309197256132184, |
| "loss": 0.3102, |
| "num_input_tokens_seen": 2097152000, |
| "step": 32000, |
| "train_runtime": 20937.8931, |
| "train_tokens_per_second": 100160.603 |
| }, |
| { |
| "epoch": 0.321, |
| "grad_norm": 0.14801020920276642, |
| "learning_rate": 0.00023269526090189505, |
| "loss": 0.3147, |
| "num_input_tokens_seen": 2103705600, |
| "step": 32100, |
| "train_runtime": 21002.9142, |
| "train_tokens_per_second": 100162.557 |
| }, |
| { |
| "epoch": 0.322, |
| "grad_norm": 0.18616679310798645, |
| "learning_rate": 0.00023229771650210907, |
| "loss": 0.3099, |
| "num_input_tokens_seen": 2110259200, |
| "step": 32200, |
| "train_runtime": 21067.872, |
| "train_tokens_per_second": 100164.801 |
| }, |
| { |
| "epoch": 0.323, |
| "grad_norm": 0.13931268453598022, |
| "learning_rate": 0.00023189934336523163, |
| "loss": 0.3115, |
| "num_input_tokens_seen": 2116812800, |
| "step": 32300, |
| "train_runtime": 21131.2256, |
| "train_tokens_per_second": 100174.634 |
| }, |
| { |
| "epoch": 0.324, |
| "grad_norm": 0.1734631061553955, |
| "learning_rate": 0.00023150014550287574, |
| "loss": 0.3112, |
| "num_input_tokens_seen": 2123366400, |
| "step": 32400, |
| "train_runtime": 21201.6285, |
| "train_tokens_per_second": 100151.099 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 0.13876596093177795, |
| "learning_rate": 0.00023110012693495943, |
| "loss": 0.31, |
| "num_input_tokens_seen": 2129920000, |
| "step": 32500, |
| "train_runtime": 21265.8205, |
| "train_tokens_per_second": 100156.963 |
| }, |
| { |
| "epoch": 0.326, |
| "grad_norm": 0.20441171526908875, |
| "learning_rate": 0.00023069929168966527, |
| "loss": 0.3095, |
| "num_input_tokens_seen": 2136473600, |
| "step": 32600, |
| "train_runtime": 21329.6315, |
| "train_tokens_per_second": 100164.581 |
| }, |
| { |
| "epoch": 0.327, |
| "grad_norm": 0.12022672593593597, |
| "learning_rate": 0.0002302976438033997, |
| "loss": 0.3089, |
| "num_input_tokens_seen": 2143027200, |
| "step": 32700, |
| "train_runtime": 21394.0086, |
| "train_tokens_per_second": 100169.502 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.23158074915409088, |
| "learning_rate": 0.0002298951873207525, |
| "loss": 0.3121, |
| "num_input_tokens_seen": 2149580800, |
| "step": 32800, |
| "train_runtime": 21459.8938, |
| "train_tokens_per_second": 100167.355 |
| }, |
| { |
| "epoch": 0.329, |
| "grad_norm": 0.11978685855865479, |
| "learning_rate": 0.00022949192629445606, |
| "loss": 0.308, |
| "num_input_tokens_seen": 2156134400, |
| "step": 32900, |
| "train_runtime": 21524.2825, |
| "train_tokens_per_second": 100172.185 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 0.16882842779159546, |
| "learning_rate": 0.0002290878647853443, |
| "loss": 0.3076, |
| "num_input_tokens_seen": 2162688000, |
| "step": 33000, |
| "train_runtime": 21595.0222, |
| "train_tokens_per_second": 100147.524 |
| }, |
| { |
| "epoch": 0.331, |
| "grad_norm": 0.1368299126625061, |
| "learning_rate": 0.00022868300686231224, |
| "loss": 0.3078, |
| "num_input_tokens_seen": 2169241600, |
| "step": 33100, |
| "train_runtime": 21659.0361, |
| "train_tokens_per_second": 100154.115 |
| }, |
| { |
| "epoch": 0.332, |
| "grad_norm": 0.13301041722297668, |
| "learning_rate": 0.00022827735660227457, |
| "loss": 0.3103, |
| "num_input_tokens_seen": 2175795200, |
| "step": 33200, |
| "train_runtime": 21723.8934, |
| "train_tokens_per_second": 100156.779 |
| }, |
| { |
| "epoch": 0.333, |
| "grad_norm": 0.13545189797878265, |
| "learning_rate": 0.000227870918090125, |
| "loss": 0.3068, |
| "num_input_tokens_seen": 2182348800, |
| "step": 33300, |
| "train_runtime": 21788.4359, |
| "train_tokens_per_second": 100160.875 |
| }, |
| { |
| "epoch": 0.334, |
| "grad_norm": 0.2138141542673111, |
| "learning_rate": 0.00022746369541869476, |
| "loss": 0.3059, |
| "num_input_tokens_seen": 2188902400, |
| "step": 33400, |
| "train_runtime": 21853.4857, |
| "train_tokens_per_second": 100162.621 |
| }, |
| { |
| "epoch": 0.335, |
| "grad_norm": 0.1255991905927658, |
| "learning_rate": 0.00022705569268871163, |
| "loss": 0.3099, |
| "num_input_tokens_seen": 2195456000, |
| "step": 33500, |
| "train_runtime": 21918.1728, |
| "train_tokens_per_second": 100166.014 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.1330287754535675, |
| "learning_rate": 0.00022664691400875865, |
| "loss": 0.3093, |
| "num_input_tokens_seen": 2202009600, |
| "step": 33600, |
| "train_runtime": 21987.6743, |
| "train_tokens_per_second": 100147.454 |
| }, |
| { |
| "epoch": 0.337, |
| "grad_norm": 0.1321260631084442, |
| "learning_rate": 0.00022623736349523254, |
| "loss": 0.3109, |
| "num_input_tokens_seen": 2208563200, |
| "step": 33700, |
| "train_runtime": 22052.5483, |
| "train_tokens_per_second": 100150.022 |
| }, |
| { |
| "epoch": 0.338, |
| "grad_norm": 0.13865865767002106, |
| "learning_rate": 0.00022582704527230238, |
| "loss": 0.3068, |
| "num_input_tokens_seen": 2215116800, |
| "step": 33800, |
| "train_runtime": 22117.0958, |
| "train_tokens_per_second": 100154.054 |
| }, |
| { |
| "epoch": 0.339, |
| "grad_norm": 0.13597998023033142, |
| "learning_rate": 0.0002254159634718682, |
| "loss": 0.3061, |
| "num_input_tokens_seen": 2221670400, |
| "step": 33900, |
| "train_runtime": 22180.0605, |
| "train_tokens_per_second": 100165.209 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 0.14176584780216217, |
| "learning_rate": 0.00022500412223351915, |
| "loss": 0.3114, |
| "num_input_tokens_seen": 2228224000, |
| "step": 34000, |
| "train_runtime": 22251.2759, |
| "train_tokens_per_second": 100139.157 |
| }, |
| { |
| "epoch": 0.341, |
| "grad_norm": 0.13006241619586945, |
| "learning_rate": 0.0002245915257044919, |
| "loss": 0.3071, |
| "num_input_tokens_seen": 2234777600, |
| "step": 34100, |
| "train_runtime": 22315.7056, |
| "train_tokens_per_second": 100143.712 |
| }, |
| { |
| "epoch": 0.342, |
| "grad_norm": 0.186634823679924, |
| "learning_rate": 0.00022417817803962892, |
| "loss": 0.3032, |
| "num_input_tokens_seen": 2241331200, |
| "step": 34200, |
| "train_runtime": 22380.1064, |
| "train_tokens_per_second": 100148.371 |
| }, |
| { |
| "epoch": 0.343, |
| "grad_norm": 0.1767393946647644, |
| "learning_rate": 0.0002237640834013366, |
| "loss": 0.3085, |
| "num_input_tokens_seen": 2247884800, |
| "step": 34300, |
| "train_runtime": 22444.6012, |
| "train_tokens_per_second": 100152.584 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.15075454115867615, |
| "learning_rate": 0.0002233492459595434, |
| "loss": 0.3099, |
| "num_input_tokens_seen": 2254438400, |
| "step": 34400, |
| "train_runtime": 22509.6493, |
| "train_tokens_per_second": 100154.31 |
| }, |
| { |
| "epoch": 0.345, |
| "grad_norm": 0.15754783153533936, |
| "learning_rate": 0.00022293366989165772, |
| "loss": 0.307, |
| "num_input_tokens_seen": 2260992000, |
| "step": 34500, |
| "train_runtime": 22579.4848, |
| "train_tokens_per_second": 100134.791 |
| }, |
| { |
| "epoch": 0.346, |
| "grad_norm": 0.13372038304805756, |
| "learning_rate": 0.00022251735938252587, |
| "loss": 0.3066, |
| "num_input_tokens_seen": 2267545600, |
| "step": 34600, |
| "train_runtime": 22643.953, |
| "train_tokens_per_second": 100139.123 |
| }, |
| { |
| "epoch": 0.347, |
| "grad_norm": 0.17753738164901733, |
| "learning_rate": 0.0002221003186243902, |
| "loss": 0.3087, |
| "num_input_tokens_seen": 2274099200, |
| "step": 34700, |
| "train_runtime": 22708.6869, |
| "train_tokens_per_second": 100142.259 |
| }, |
| { |
| "epoch": 0.348, |
| "grad_norm": 0.1375788450241089, |
| "learning_rate": 0.00022168255181684643, |
| "loss": 0.3064, |
| "num_input_tokens_seen": 2280652800, |
| "step": 34800, |
| "train_runtime": 22774.2018, |
| "train_tokens_per_second": 100141.942 |
| }, |
| { |
| "epoch": 0.349, |
| "grad_norm": 0.14929898083209991, |
| "learning_rate": 0.00022126406316680172, |
| "loss": 0.3108, |
| "num_input_tokens_seen": 2287206400, |
| "step": 34900, |
| "train_runtime": 22839.776, |
| "train_tokens_per_second": 100141.367 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.15789327025413513, |
| "learning_rate": 0.00022084485688843208, |
| "loss": 0.3082, |
| "num_input_tokens_seen": 2293760000, |
| "step": 35000, |
| "train_runtime": 22904.3853, |
| "train_tokens_per_second": 100145.015 |
| }, |
| { |
| "epoch": 0.351, |
| "grad_norm": 0.1339723765850067, |
| "learning_rate": 0.00022042493720314003, |
| "loss": 0.3127, |
| "num_input_tokens_seen": 2300313600, |
| "step": 35100, |
| "train_runtime": 22968.8594, |
| "train_tokens_per_second": 100149.231 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.14159700274467468, |
| "learning_rate": 0.00022000430833951228, |
| "loss": 0.3096, |
| "num_input_tokens_seen": 2306867200, |
| "step": 35200, |
| "train_runtime": 23033.0283, |
| "train_tokens_per_second": 100154.751 |
| }, |
| { |
| "epoch": 0.353, |
| "grad_norm": 0.17289403080940247, |
| "learning_rate": 0.00021958297453327673, |
| "loss": 0.3058, |
| "num_input_tokens_seen": 2313420800, |
| "step": 35300, |
| "train_runtime": 23103.5037, |
| "train_tokens_per_second": 100132.899 |
| }, |
| { |
| "epoch": 0.354, |
| "grad_norm": 0.1353076845407486, |
| "learning_rate": 0.00021916094002726012, |
| "loss": 0.3048, |
| "num_input_tokens_seen": 2319974400, |
| "step": 35400, |
| "train_runtime": 23166.8292, |
| "train_tokens_per_second": 100142.077 |
| }, |
| { |
| "epoch": 0.355, |
| "grad_norm": 0.12303294241428375, |
| "learning_rate": 0.00021873820907134534, |
| "loss": 0.3102, |
| "num_input_tokens_seen": 2326528000, |
| "step": 35500, |
| "train_runtime": 23232.6655, |
| "train_tokens_per_second": 100140.382 |
| }, |
| { |
| "epoch": 0.356, |
| "grad_norm": 0.14765286445617676, |
| "learning_rate": 0.0002183147859224283, |
| "loss": 0.3106, |
| "num_input_tokens_seen": 2333081600, |
| "step": 35600, |
| "train_runtime": 23296.4196, |
| "train_tokens_per_second": 100147.647 |
| }, |
| { |
| "epoch": 0.357, |
| "grad_norm": 0.13833215832710266, |
| "learning_rate": 0.00021789067484437544, |
| "loss": 0.3055, |
| "num_input_tokens_seen": 2339635200, |
| "step": 35700, |
| "train_runtime": 23361.5704, |
| "train_tokens_per_second": 100148.884 |
| }, |
| { |
| "epoch": 0.358, |
| "grad_norm": 0.13157132267951965, |
| "learning_rate": 0.00021746588010798068, |
| "loss": 0.3081, |
| "num_input_tokens_seen": 2346188800, |
| "step": 35800, |
| "train_runtime": 23430.7927, |
| "train_tokens_per_second": 100132.711 |
| }, |
| { |
| "epoch": 0.359, |
| "grad_norm": 0.12913836538791656, |
| "learning_rate": 0.00021704040599092216, |
| "loss": 0.3094, |
| "num_input_tokens_seen": 2352742400, |
| "step": 35900, |
| "train_runtime": 23495.4052, |
| "train_tokens_per_second": 100136.277 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.13528013229370117, |
| "learning_rate": 0.00021661425677771965, |
| "loss": 0.3061, |
| "num_input_tokens_seen": 2359296000, |
| "step": 36000, |
| "train_runtime": 23559.8424, |
| "train_tokens_per_second": 100140.568 |
| }, |
| { |
| "epoch": 0.361, |
| "grad_norm": 0.15519119799137115, |
| "learning_rate": 0.00021618743675969095, |
| "loss": 0.3065, |
| "num_input_tokens_seen": 2365849600, |
| "step": 36100, |
| "train_runtime": 23624.7603, |
| "train_tokens_per_second": 100142.798 |
| }, |
| { |
| "epoch": 0.362, |
| "grad_norm": 0.14744772017002106, |
| "learning_rate": 0.0002157599502349089, |
| "loss": 0.3068, |
| "num_input_tokens_seen": 2372403200, |
| "step": 36200, |
| "train_runtime": 23688.8845, |
| "train_tokens_per_second": 100148.371 |
| }, |
| { |
| "epoch": 0.363, |
| "grad_norm": 0.13838911056518555, |
| "learning_rate": 0.00021533180150815802, |
| "loss": 0.3097, |
| "num_input_tokens_seen": 2378956800, |
| "step": 36300, |
| "train_runtime": 23759.9908, |
| "train_tokens_per_second": 100124.483 |
| }, |
| { |
| "epoch": 0.364, |
| "grad_norm": 0.12536117434501648, |
| "learning_rate": 0.00021490299489089132, |
| "loss": 0.3067, |
| "num_input_tokens_seen": 2385510400, |
| "step": 36400, |
| "train_runtime": 23823.7123, |
| "train_tokens_per_second": 100131.767 |
| }, |
| { |
| "epoch": 0.365, |
| "grad_norm": 0.14205192029476166, |
| "learning_rate": 0.00021447353470118656, |
| "loss": 0.3049, |
| "num_input_tokens_seen": 2392064000, |
| "step": 36500, |
| "train_runtime": 23887.5453, |
| "train_tokens_per_second": 100138.544 |
| }, |
| { |
| "epoch": 0.366, |
| "grad_norm": 0.11950815469026566, |
| "learning_rate": 0.00021404342526370326, |
| "loss": 0.3072, |
| "num_input_tokens_seen": 2398617600, |
| "step": 36600, |
| "train_runtime": 23951.3108, |
| "train_tokens_per_second": 100145.567 |
| }, |
| { |
| "epoch": 0.367, |
| "grad_norm": 0.1286599189043045, |
| "learning_rate": 0.00021361267090963846, |
| "loss": 0.3096, |
| "num_input_tokens_seen": 2405171200, |
| "step": 36700, |
| "train_runtime": 24016.5354, |
| "train_tokens_per_second": 100146.468 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.12663663923740387, |
| "learning_rate": 0.0002131812759766839, |
| "loss": 0.3054, |
| "num_input_tokens_seen": 2411724800, |
| "step": 36800, |
| "train_runtime": 24085.8974, |
| "train_tokens_per_second": 100130.162 |
| }, |
| { |
| "epoch": 0.369, |
| "grad_norm": 0.16495896875858307, |
| "learning_rate": 0.00021274924480898169, |
| "loss": 0.3037, |
| "num_input_tokens_seen": 2418278400, |
| "step": 36900, |
| "train_runtime": 24149.4634, |
| "train_tokens_per_second": 100137.977 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.13351881504058838, |
| "learning_rate": 0.00021231658175708087, |
| "loss": 0.309, |
| "num_input_tokens_seen": 2424832000, |
| "step": 37000, |
| "train_runtime": 24214.3635, |
| "train_tokens_per_second": 100140.233 |
| }, |
| { |
| "epoch": 0.371, |
| "grad_norm": 0.13137440383434296, |
| "learning_rate": 0.00021188329117789357, |
| "loss": 0.3061, |
| "num_input_tokens_seen": 2431385600, |
| "step": 37100, |
| "train_runtime": 24284.8537, |
| "train_tokens_per_second": 100119.426 |
| }, |
| { |
| "epoch": 0.372, |
| "grad_norm": 0.17069390416145325, |
| "learning_rate": 0.0002114493774346512, |
| "loss": 0.3075, |
| "num_input_tokens_seen": 2437939200, |
| "step": 37200, |
| "train_runtime": 24349.7441, |
| "train_tokens_per_second": 100121.759 |
| }, |
| { |
| "epoch": 0.373, |
| "grad_norm": 0.13554754853248596, |
| "learning_rate": 0.00021101484489686025, |
| "loss": 0.3056, |
| "num_input_tokens_seen": 2444492800, |
| "step": 37300, |
| "train_runtime": 24413.4106, |
| "train_tokens_per_second": 100129.099 |
| }, |
| { |
| "epoch": 0.374, |
| "grad_norm": 0.24161159992218018, |
| "learning_rate": 0.00021057969794025866, |
| "loss": 0.3084, |
| "num_input_tokens_seen": 2451046400, |
| "step": 37400, |
| "train_runtime": 24479.2787, |
| "train_tokens_per_second": 100127.395 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.11480960994958878, |
| "learning_rate": 0.00021014394094677128, |
| "loss": 0.3065, |
| "num_input_tokens_seen": 2457600000, |
| "step": 37500, |
| "train_runtime": 24543.1085, |
| "train_tokens_per_second": 100134.015 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.1333978921175003, |
| "learning_rate": 0.00020970757830446633, |
| "loss": 0.3047, |
| "num_input_tokens_seen": 2464153600, |
| "step": 37600, |
| "train_runtime": 24612.4036, |
| "train_tokens_per_second": 100118.365 |
| }, |
| { |
| "epoch": 0.377, |
| "grad_norm": 0.1306515485048294, |
| "learning_rate": 0.00020927061440751072, |
| "loss": 0.3039, |
| "num_input_tokens_seen": 2470707200, |
| "step": 37700, |
| "train_runtime": 24676.7406, |
| "train_tokens_per_second": 100122.915 |
| }, |
| { |
| "epoch": 0.378, |
| "grad_norm": 0.19177651405334473, |
| "learning_rate": 0.00020883305365612602, |
| "loss": 0.3091, |
| "num_input_tokens_seen": 2477260800, |
| "step": 37800, |
| "train_runtime": 24742.4612, |
| "train_tokens_per_second": 100121.842 |
| }, |
| { |
| "epoch": 0.379, |
| "grad_norm": 0.14794479310512543, |
| "learning_rate": 0.00020839490045654425, |
| "loss": 0.3103, |
| "num_input_tokens_seen": 2483814400, |
| "step": 37900, |
| "train_runtime": 24807.833, |
| "train_tokens_per_second": 100122.183 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 0.1391579508781433, |
| "learning_rate": 0.00020795615922096313, |
| "loss": 0.305, |
| "num_input_tokens_seen": 2490368000, |
| "step": 38000, |
| "train_runtime": 24871.0815, |
| "train_tokens_per_second": 100131.07 |
| }, |
| { |
| "epoch": 0.381, |
| "grad_norm": 0.14466038346290588, |
| "learning_rate": 0.00020751683436750207, |
| "loss": 0.3066, |
| "num_input_tokens_seen": 2496921600, |
| "step": 38100, |
| "train_runtime": 24941.5584, |
| "train_tokens_per_second": 100110.89 |
| }, |
| { |
| "epoch": 0.382, |
| "grad_norm": 0.14706650376319885, |
| "learning_rate": 0.00020707693032015752, |
| "loss": 0.3131, |
| "num_input_tokens_seen": 2503475200, |
| "step": 38200, |
| "train_runtime": 25006.658, |
| "train_tokens_per_second": 100112.346 |
| }, |
| { |
| "epoch": 0.383, |
| "grad_norm": 0.1455349326133728, |
| "learning_rate": 0.00020663645150875834, |
| "loss": 0.3058, |
| "num_input_tokens_seen": 2510028800, |
| "step": 38300, |
| "train_runtime": 25070.3473, |
| "train_tokens_per_second": 100119.427 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.13858123123645782, |
| "learning_rate": 0.00020619540236892125, |
| "loss": 0.3066, |
| "num_input_tokens_seen": 2516582400, |
| "step": 38400, |
| "train_runtime": 25135.6982, |
| "train_tokens_per_second": 100119.853 |
| }, |
| { |
| "epoch": 0.385, |
| "grad_norm": 0.17408473789691925, |
| "learning_rate": 0.00020575378734200616, |
| "loss": 0.3068, |
| "num_input_tokens_seen": 2523136000, |
| "step": 38500, |
| "train_runtime": 25206.1351, |
| "train_tokens_per_second": 100100.075 |
| }, |
| { |
| "epoch": 0.386, |
| "grad_norm": 0.12729153037071228, |
| "learning_rate": 0.0002053116108750715, |
| "loss": 0.3062, |
| "num_input_tokens_seen": 2529689600, |
| "step": 38600, |
| "train_runtime": 25270.823, |
| "train_tokens_per_second": 100103.174 |
| }, |
| { |
| "epoch": 0.387, |
| "grad_norm": 0.15452224016189575, |
| "learning_rate": 0.0002048688774208294, |
| "loss": 0.3029, |
| "num_input_tokens_seen": 2536243200, |
| "step": 38700, |
| "train_runtime": 25334.6018, |
| "train_tokens_per_second": 100109.851 |
| }, |
| { |
| "epoch": 0.388, |
| "grad_norm": 0.11749983578920364, |
| "learning_rate": 0.0002044255914376009, |
| "loss": 0.3055, |
| "num_input_tokens_seen": 2542796800, |
| "step": 38800, |
| "train_runtime": 25398.9456, |
| "train_tokens_per_second": 100114.266 |
| }, |
| { |
| "epoch": 0.389, |
| "grad_norm": 0.12558670341968536, |
| "learning_rate": 0.00020398175738927082, |
| "loss": 0.307, |
| "num_input_tokens_seen": 2549350400, |
| "step": 38900, |
| "train_runtime": 25469.3443, |
| "train_tokens_per_second": 100094.858 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.11652723699808121, |
| "learning_rate": 0.00020353737974524312, |
| "loss": 0.3059, |
| "num_input_tokens_seen": 2555904000, |
| "step": 39000, |
| "train_runtime": 25534.1962, |
| "train_tokens_per_second": 100097.296 |
| }, |
| { |
| "epoch": 0.391, |
| "grad_norm": 0.14530417323112488, |
| "learning_rate": 0.00020309246298039584, |
| "loss": 0.3043, |
| "num_input_tokens_seen": 2562457600, |
| "step": 39100, |
| "train_runtime": 25597.7668, |
| "train_tokens_per_second": 100104.733 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.2145591825246811, |
| "learning_rate": 0.0002026470115750357, |
| "loss": 0.3097, |
| "num_input_tokens_seen": 2569011200, |
| "step": 39200, |
| "train_runtime": 25662.2383, |
| "train_tokens_per_second": 100108.618 |
| }, |
| { |
| "epoch": 0.393, |
| "grad_norm": 0.13407446444034576, |
| "learning_rate": 0.0002022010300148535, |
| "loss": 0.3072, |
| "num_input_tokens_seen": 2575564800, |
| "step": 39300, |
| "train_runtime": 25726.7635, |
| "train_tokens_per_second": 100112.274 |
| }, |
| { |
| "epoch": 0.394, |
| "grad_norm": 0.20070548355579376, |
| "learning_rate": 0.0002017545227908786, |
| "loss": 0.3042, |
| "num_input_tokens_seen": 2582118400, |
| "step": 39400, |
| "train_runtime": 25798.3829, |
| "train_tokens_per_second": 100088.382 |
| }, |
| { |
| "epoch": 0.395, |
| "grad_norm": 0.12969562411308289, |
| "learning_rate": 0.00020130749439943376, |
| "loss": 0.3025, |
| "num_input_tokens_seen": 2588672000, |
| "step": 39500, |
| "train_runtime": 25861.9837, |
| "train_tokens_per_second": 100095.647 |
| }, |
| { |
| "epoch": 0.396, |
| "grad_norm": 0.22430787980556488, |
| "learning_rate": 0.00020085994934208998, |
| "loss": 0.3075, |
| "num_input_tokens_seen": 2595225600, |
| "step": 39600, |
| "train_runtime": 25927.1388, |
| "train_tokens_per_second": 100096.876 |
| }, |
| { |
| "epoch": 0.397, |
| "grad_norm": 0.1543964445590973, |
| "learning_rate": 0.00020041189212562094, |
| "loss": 0.3061, |
| "num_input_tokens_seen": 2601779200, |
| "step": 39700, |
| "train_runtime": 25990.8084, |
| "train_tokens_per_second": 100103.82 |
| }, |
| { |
| "epoch": 0.398, |
| "grad_norm": 0.17474599182605743, |
| "learning_rate": 0.0001999633272619579, |
| "loss": 0.3026, |
| "num_input_tokens_seen": 2608332800, |
| "step": 39800, |
| "train_runtime": 26055.1661, |
| "train_tokens_per_second": 100108.086 |
| }, |
| { |
| "epoch": 0.399, |
| "grad_norm": 0.12200487405061722, |
| "learning_rate": 0.00019951425926814404, |
| "loss": 0.3051, |
| "num_input_tokens_seen": 2614886400, |
| "step": 39900, |
| "train_runtime": 26125.5167, |
| "train_tokens_per_second": 100089.366 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.12909364700317383, |
| "learning_rate": 0.00019906469266628904, |
| "loss": 0.3083, |
| "num_input_tokens_seen": 2621440000, |
| "step": 40000, |
| "train_runtime": 26189.9855, |
| "train_tokens_per_second": 100093.221 |
| }, |
| { |
| "epoch": 0.401, |
| "grad_norm": 0.14507311582565308, |
| "learning_rate": 0.0001986146319835236, |
| "loss": 0.3063, |
| "num_input_tokens_seen": 2627993600, |
| "step": 40100, |
| "train_runtime": 26254.1189, |
| "train_tokens_per_second": 100098.335 |
| }, |
| { |
| "epoch": 0.402, |
| "grad_norm": 0.15015749633312225, |
| "learning_rate": 0.00019816408175195383, |
| "loss": 0.3024, |
| "num_input_tokens_seen": 2634547200, |
| "step": 40200, |
| "train_runtime": 26317.4656, |
| "train_tokens_per_second": 100106.417 |
| }, |
| { |
| "epoch": 0.403, |
| "grad_norm": 0.1793050467967987, |
| "learning_rate": 0.0001977130465086155, |
| "loss": 0.3058, |
| "num_input_tokens_seen": 2641100800, |
| "step": 40300, |
| "train_runtime": 26387.6285, |
| "train_tokens_per_second": 100088.6 |
| }, |
| { |
| "epoch": 0.404, |
| "grad_norm": 0.13494957983493805, |
| "learning_rate": 0.0001972615307954286, |
| "loss": 0.3058, |
| "num_input_tokens_seen": 2647654400, |
| "step": 40400, |
| "train_runtime": 26452.3646, |
| "train_tokens_per_second": 100091.407 |
| }, |
| { |
| "epoch": 0.405, |
| "grad_norm": 0.15225248038768768, |
| "learning_rate": 0.00019680953915915124, |
| "loss": 0.3032, |
| "num_input_tokens_seen": 2654208000, |
| "step": 40500, |
| "train_runtime": 26516.6796, |
| "train_tokens_per_second": 100095.79 |
| }, |
| { |
| "epoch": 0.406, |
| "grad_norm": 0.15482735633850098, |
| "learning_rate": 0.00019635707615133427, |
| "loss": 0.3061, |
| "num_input_tokens_seen": 2660761600, |
| "step": 40600, |
| "train_runtime": 26585.3848, |
| "train_tokens_per_second": 100083.622 |
| }, |
| { |
| "epoch": 0.407, |
| "grad_norm": 0.15725013613700867, |
| "learning_rate": 0.00019590414632827513, |
| "loss": 0.3101, |
| "num_input_tokens_seen": 2667315200, |
| "step": 40700, |
| "train_runtime": 26649.9092, |
| "train_tokens_per_second": 100087.215 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.16835036873817444, |
| "learning_rate": 0.00019545075425097204, |
| "loss": 0.3049, |
| "num_input_tokens_seen": 2673868800, |
| "step": 40800, |
| "train_runtime": 26714.9814, |
| "train_tokens_per_second": 100088.739 |
| }, |
| { |
| "epoch": 0.409, |
| "grad_norm": 0.167361319065094, |
| "learning_rate": 0.00019499690448507827, |
| "loss": 0.3027, |
| "num_input_tokens_seen": 2680422400, |
| "step": 40900, |
| "train_runtime": 26779.2716, |
| "train_tokens_per_second": 100093.178 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 0.1781291663646698, |
| "learning_rate": 0.00019454260160085588, |
| "loss": 0.3005, |
| "num_input_tokens_seen": 2686976000, |
| "step": 41000, |
| "train_runtime": 26843.9197, |
| "train_tokens_per_second": 100096.261 |
| }, |
| { |
| "epoch": 0.411, |
| "grad_norm": 0.1289975345134735, |
| "learning_rate": 0.0001940878501731299, |
| "loss": 0.3085, |
| "num_input_tokens_seen": 2693529600, |
| "step": 41100, |
| "train_runtime": 26914.2047, |
| "train_tokens_per_second": 100078.365 |
| }, |
| { |
| "epoch": 0.412, |
| "grad_norm": 0.12804220616817474, |
| "learning_rate": 0.00019363265478124214, |
| "loss": 0.3062, |
| "num_input_tokens_seen": 2700083200, |
| "step": 41200, |
| "train_runtime": 26979.3069, |
| "train_tokens_per_second": 100079.784 |
| }, |
| { |
| "epoch": 0.413, |
| "grad_norm": 0.14838483929634094, |
| "learning_rate": 0.00019317702000900516, |
| "loss": 0.3065, |
| "num_input_tokens_seen": 2706636800, |
| "step": 41300, |
| "train_runtime": 27043.7101, |
| "train_tokens_per_second": 100083.783 |
| }, |
| { |
| "epoch": 0.414, |
| "grad_norm": 0.3049434423446655, |
| "learning_rate": 0.000192720950444656, |
| "loss": 0.3075, |
| "num_input_tokens_seen": 2713190400, |
| "step": 41400, |
| "train_runtime": 27108.2869, |
| "train_tokens_per_second": 100087.121 |
| }, |
| { |
| "epoch": 0.415, |
| "grad_norm": 0.16474822163581848, |
| "learning_rate": 0.00019226445068081018, |
| "loss": 0.3087, |
| "num_input_tokens_seen": 2719744000, |
| "step": 41500, |
| "train_runtime": 27173.4382, |
| "train_tokens_per_second": 100088.328 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.18445253372192383, |
| "learning_rate": 0.00019180752531441523, |
| "loss": 0.3065, |
| "num_input_tokens_seen": 2726297600, |
| "step": 41600, |
| "train_runtime": 27237.7945, |
| "train_tokens_per_second": 100092.45 |
| }, |
| { |
| "epoch": 0.417, |
| "grad_norm": 0.1226682960987091, |
| "learning_rate": 0.00019135017894670456, |
| "loss": 0.3062, |
| "num_input_tokens_seen": 2732851200, |
| "step": 41700, |
| "train_runtime": 27307.5255, |
| "train_tokens_per_second": 100076.852 |
| }, |
| { |
| "epoch": 0.418, |
| "grad_norm": 0.12846247851848602, |
| "learning_rate": 0.0001908924161831509, |
| "loss": 0.3064, |
| "num_input_tokens_seen": 2739404800, |
| "step": 41800, |
| "train_runtime": 27371.4125, |
| "train_tokens_per_second": 100082.698 |
| }, |
| { |
| "epoch": 0.419, |
| "grad_norm": 0.14241133630275726, |
| "learning_rate": 0.0001904342416334203, |
| "loss": 0.3048, |
| "num_input_tokens_seen": 2745958400, |
| "step": 41900, |
| "train_runtime": 27436.5912, |
| "train_tokens_per_second": 100083.803 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.19496770203113556, |
| "learning_rate": 0.00018997565991132532, |
| "loss": 0.3046, |
| "num_input_tokens_seen": 2752512000, |
| "step": 42000, |
| "train_runtime": 27500.5131, |
| "train_tokens_per_second": 100089.478 |
| }, |
| { |
| "epoch": 0.421, |
| "grad_norm": 0.16859756410121918, |
| "learning_rate": 0.0001895166756347789, |
| "loss": 0.3082, |
| "num_input_tokens_seen": 2759065600, |
| "step": 42100, |
| "train_runtime": 27570.8932, |
| "train_tokens_per_second": 100071.68 |
| }, |
| { |
| "epoch": 0.422, |
| "grad_norm": 0.13300351798534393, |
| "learning_rate": 0.0001890572934257475, |
| "loss": 0.3065, |
| "num_input_tokens_seen": 2765619200, |
| "step": 42200, |
| "train_runtime": 27634.6434, |
| "train_tokens_per_second": 100077.977 |
| }, |
| { |
| "epoch": 0.423, |
| "grad_norm": 0.14460822939872742, |
| "learning_rate": 0.00018859751791020497, |
| "loss": 0.3055, |
| "num_input_tokens_seen": 2772172800, |
| "step": 42300, |
| "train_runtime": 27700.3395, |
| "train_tokens_per_second": 100077.214 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.1369091421365738, |
| "learning_rate": 0.0001881373537180856, |
| "loss": 0.3026, |
| "num_input_tokens_seen": 2778726400, |
| "step": 42400, |
| "train_runtime": 27764.0211, |
| "train_tokens_per_second": 100083.716 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 0.15593157708644867, |
| "learning_rate": 0.00018767680548323766, |
| "loss": 0.3014, |
| "num_input_tokens_seen": 2785280000, |
| "step": 42500, |
| "train_runtime": 27828.3317, |
| "train_tokens_per_second": 100087.926 |
| }, |
| { |
| "epoch": 0.426, |
| "grad_norm": 0.18689674139022827, |
| "learning_rate": 0.0001872158778433768, |
| "loss": 0.3041, |
| "num_input_tokens_seen": 2791833600, |
| "step": 42600, |
| "train_runtime": 27897.9539, |
| "train_tokens_per_second": 100073.059 |
| }, |
| { |
| "epoch": 0.427, |
| "grad_norm": 0.1532142609357834, |
| "learning_rate": 0.0001867545754400392, |
| "loss": 0.3041, |
| "num_input_tokens_seen": 2798387200, |
| "step": 42700, |
| "train_runtime": 27964.2157, |
| "train_tokens_per_second": 100070.291 |
| }, |
| { |
| "epoch": 0.428, |
| "grad_norm": 0.12894967198371887, |
| "learning_rate": 0.000186292902918535, |
| "loss": 0.3047, |
| "num_input_tokens_seen": 2804940800, |
| "step": 42800, |
| "train_runtime": 28028.1798, |
| "train_tokens_per_second": 100075.739 |
| }, |
| { |
| "epoch": 0.429, |
| "grad_norm": 0.14526289701461792, |
| "learning_rate": 0.00018583086492790136, |
| "loss": 0.3097, |
| "num_input_tokens_seen": 2811494400, |
| "step": 42900, |
| "train_runtime": 28093.2724, |
| "train_tokens_per_second": 100077.142 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 0.15546266734600067, |
| "learning_rate": 0.00018536846612085566, |
| "loss": 0.3066, |
| "num_input_tokens_seen": 2818048000, |
| "step": 43000, |
| "train_runtime": 28157.8145, |
| "train_tokens_per_second": 100080.495 |
| }, |
| { |
| "epoch": 0.431, |
| "grad_norm": 0.16307438910007477, |
| "learning_rate": 0.00018490571115374878, |
| "loss": 0.3073, |
| "num_input_tokens_seen": 2824601600, |
| "step": 43100, |
| "train_runtime": 28227.9591, |
| "train_tokens_per_second": 100063.968 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.1360054761171341, |
| "learning_rate": 0.00018444260468651816, |
| "loss": 0.3013, |
| "num_input_tokens_seen": 2831155200, |
| "step": 43200, |
| "train_runtime": 28291.3921, |
| "train_tokens_per_second": 100071.258 |
| }, |
| { |
| "epoch": 0.433, |
| "grad_norm": 0.1404498666524887, |
| "learning_rate": 0.00018397915138264068, |
| "loss": 0.3066, |
| "num_input_tokens_seen": 2837708800, |
| "step": 43300, |
| "train_runtime": 28355.3195, |
| "train_tokens_per_second": 100076.771 |
| }, |
| { |
| "epoch": 0.434, |
| "grad_norm": 0.1926499307155609, |
| "learning_rate": 0.00018351535590908606, |
| "loss": 0.3012, |
| "num_input_tokens_seen": 2844262400, |
| "step": 43400, |
| "train_runtime": 28420.6726, |
| "train_tokens_per_second": 100077.237 |
| }, |
| { |
| "epoch": 0.435, |
| "grad_norm": 0.13713879883289337, |
| "learning_rate": 0.00018305122293626948, |
| "loss": 0.3029, |
| "num_input_tokens_seen": 2850816000, |
| "step": 43500, |
| "train_runtime": 28490.1826, |
| "train_tokens_per_second": 100063.1 |
| }, |
| { |
| "epoch": 0.436, |
| "grad_norm": 0.1541578322649002, |
| "learning_rate": 0.00018258675713800492, |
| "loss": 0.3061, |
| "num_input_tokens_seen": 2857369600, |
| "step": 43600, |
| "train_runtime": 28555.7903, |
| "train_tokens_per_second": 100062.704 |
| }, |
| { |
| "epoch": 0.437, |
| "grad_norm": 0.14117270708084106, |
| "learning_rate": 0.00018212196319145773, |
| "loss": 0.3053, |
| "num_input_tokens_seen": 2863923200, |
| "step": 43700, |
| "train_runtime": 28622.1811, |
| "train_tokens_per_second": 100059.572 |
| }, |
| { |
| "epoch": 0.438, |
| "grad_norm": 0.14943140745162964, |
| "learning_rate": 0.00018165684577709778, |
| "loss": 0.3043, |
| "num_input_tokens_seen": 2870476800, |
| "step": 43800, |
| "train_runtime": 28686.5648, |
| "train_tokens_per_second": 100063.455 |
| }, |
| { |
| "epoch": 0.439, |
| "grad_norm": 0.14043770730495453, |
| "learning_rate": 0.0001811914095786524, |
| "loss": 0.3048, |
| "num_input_tokens_seen": 2877030400, |
| "step": 43900, |
| "train_runtime": 28751.3532, |
| "train_tokens_per_second": 100065.913 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.17811591923236847, |
| "learning_rate": 0.0001807256592830588, |
| "loss": 0.3088, |
| "num_input_tokens_seen": 2883584000, |
| "step": 44000, |
| "train_runtime": 28815.5193, |
| "train_tokens_per_second": 100070.52 |
| }, |
| { |
| "epoch": 0.441, |
| "grad_norm": 0.14588113129138947, |
| "learning_rate": 0.00018025959958041732, |
| "loss": 0.3017, |
| "num_input_tokens_seen": 2890137600, |
| "step": 44100, |
| "train_runtime": 28880.019, |
| "train_tokens_per_second": 100073.951 |
| }, |
| { |
| "epoch": 0.442, |
| "grad_norm": 0.22986213862895966, |
| "learning_rate": 0.00017979323516394407, |
| "loss": 0.3049, |
| "num_input_tokens_seen": 2896691200, |
| "step": 44200, |
| "train_runtime": 28945.7871, |
| "train_tokens_per_second": 100072.981 |
| }, |
| { |
| "epoch": 0.443, |
| "grad_norm": 0.853501558303833, |
| "learning_rate": 0.00017932657072992344, |
| "loss": 0.3081, |
| "num_input_tokens_seen": 2903244800, |
| "step": 44300, |
| "train_runtime": 29016.3509, |
| "train_tokens_per_second": 100055.476 |
| }, |
| { |
| "epoch": 0.444, |
| "grad_norm": 0.15835335850715637, |
| "learning_rate": 0.00017885961097766117, |
| "loss": 0.3035, |
| "num_input_tokens_seen": 2909798400, |
| "step": 44400, |
| "train_runtime": 29079.9877, |
| "train_tokens_per_second": 100061.886 |
| }, |
| { |
| "epoch": 0.445, |
| "grad_norm": 0.25418880581855774, |
| "learning_rate": 0.00017839236060943674, |
| "loss": 0.3014, |
| "num_input_tokens_seen": 2916352000, |
| "step": 44500, |
| "train_runtime": 29144.3776, |
| "train_tokens_per_second": 100065.681 |
| }, |
| { |
| "epoch": 0.446, |
| "grad_norm": 0.14922253787517548, |
| "learning_rate": 0.0001779248243304562, |
| "loss": 0.3038, |
| "num_input_tokens_seen": 2922905600, |
| "step": 44600, |
| "train_runtime": 29208.2393, |
| "train_tokens_per_second": 100071.27 |
| }, |
| { |
| "epoch": 0.447, |
| "grad_norm": 0.14103923738002777, |
| "learning_rate": 0.00017745700684880465, |
| "loss": 0.3064, |
| "num_input_tokens_seen": 2929459200, |
| "step": 44700, |
| "train_runtime": 29273.1105, |
| "train_tokens_per_second": 100073.383 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.15813007950782776, |
| "learning_rate": 0.000176988912875399, |
| "loss": 0.3049, |
| "num_input_tokens_seen": 2936012800, |
| "step": 44800, |
| "train_runtime": 29342.9224, |
| "train_tokens_per_second": 100058.636 |
| }, |
| { |
| "epoch": 0.449, |
| "grad_norm": 0.1471075564622879, |
| "learning_rate": 0.00017652054712394028, |
| "loss": 0.3029, |
| "num_input_tokens_seen": 2942566400, |
| "step": 44900, |
| "train_runtime": 29408.1792, |
| "train_tokens_per_second": 100059.455 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.16910097002983093, |
| "learning_rate": 0.0001760519143108665, |
| "loss": 0.3026, |
| "num_input_tokens_seen": 2949120000, |
| "step": 45000, |
| "train_runtime": 29472.6802, |
| "train_tokens_per_second": 100062.837 |
| }, |
| { |
| "epoch": 0.451, |
| "grad_norm": 0.15087512135505676, |
| "learning_rate": 0.00017558301915530483, |
| "loss": 0.305, |
| "num_input_tokens_seen": 2955673600, |
| "step": 45100, |
| "train_runtime": 29537.0324, |
| "train_tokens_per_second": 100066.708 |
| }, |
| { |
| "epoch": 0.452, |
| "grad_norm": 0.16292531788349152, |
| "learning_rate": 0.00017511386637902428, |
| "loss": 0.305, |
| "num_input_tokens_seen": 2962227200, |
| "step": 45200, |
| "train_runtime": 29600.4356, |
| "train_tokens_per_second": 100073.77 |
| }, |
| { |
| "epoch": 0.453, |
| "grad_norm": 0.14504611492156982, |
| "learning_rate": 0.00017464446070638814, |
| "loss": 0.3061, |
| "num_input_tokens_seen": 2968780800, |
| "step": 45300, |
| "train_runtime": 29670.2849, |
| "train_tokens_per_second": 100059.06 |
| }, |
| { |
| "epoch": 0.454, |
| "grad_norm": 0.14068329334259033, |
| "learning_rate": 0.00017417480686430622, |
| "loss": 0.3096, |
| "num_input_tokens_seen": 2975334400, |
| "step": 45400, |
| "train_runtime": 29735.31, |
| "train_tokens_per_second": 100060.648 |
| }, |
| { |
| "epoch": 0.455, |
| "grad_norm": 0.139748677611351, |
| "learning_rate": 0.00017370490958218765, |
| "loss": 0.3027, |
| "num_input_tokens_seen": 2981888000, |
| "step": 45500, |
| "train_runtime": 29800.4491, |
| "train_tokens_per_second": 100061.848 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.1487821340560913, |
| "learning_rate": 0.00017323477359189272, |
| "loss": 0.3023, |
| "num_input_tokens_seen": 2988441600, |
| "step": 45600, |
| "train_runtime": 29869.053, |
| "train_tokens_per_second": 100051.434 |
| }, |
| { |
| "epoch": 0.457, |
| "grad_norm": 0.15015476942062378, |
| "learning_rate": 0.00017276440362768564, |
| "loss": 0.3028, |
| "num_input_tokens_seen": 2994995200, |
| "step": 45700, |
| "train_runtime": 29933.644, |
| "train_tokens_per_second": 100054.481 |
| }, |
| { |
| "epoch": 0.458, |
| "grad_norm": 0.1298416256904602, |
| "learning_rate": 0.0001722938044261868, |
| "loss": 0.3058, |
| "num_input_tokens_seen": 3001548800, |
| "step": 45800, |
| "train_runtime": 29997.6813, |
| "train_tokens_per_second": 100059.36 |
| }, |
| { |
| "epoch": 0.459, |
| "grad_norm": 0.1956530213356018, |
| "learning_rate": 0.0001718229807263249, |
| "loss": 0.3033, |
| "num_input_tokens_seen": 3008102400, |
| "step": 45900, |
| "train_runtime": 30067.1877, |
| "train_tokens_per_second": 100046.018 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.15267929434776306, |
| "learning_rate": 0.0001713519372692894, |
| "loss": 0.3028, |
| "num_input_tokens_seen": 3014656000, |
| "step": 46000, |
| "train_runtime": 30131.0143, |
| "train_tokens_per_second": 100051.594 |
| }, |
| { |
| "epoch": 0.461, |
| "grad_norm": 0.13846905529499054, |
| "learning_rate": 0.0001708806787984826, |
| "loss": 0.3036, |
| "num_input_tokens_seen": 3021209600, |
| "step": 46100, |
| "train_runtime": 30195.5066, |
| "train_tokens_per_second": 100054.94 |
| }, |
| { |
| "epoch": 0.462, |
| "grad_norm": 0.13704828917980194, |
| "learning_rate": 0.00017040921005947212, |
| "loss": 0.3094, |
| "num_input_tokens_seen": 3027763200, |
| "step": 46200, |
| "train_runtime": 30260.3523, |
| "train_tokens_per_second": 100057.104 |
| }, |
| { |
| "epoch": 0.463, |
| "grad_norm": 0.15288543701171875, |
| "learning_rate": 0.0001699375357999429, |
| "loss": 0.3014, |
| "num_input_tokens_seen": 3034316800, |
| "step": 46300, |
| "train_runtime": 30325.5675, |
| "train_tokens_per_second": 100058.039 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.19963988661766052, |
| "learning_rate": 0.0001694656607696496, |
| "loss": 0.3061, |
| "num_input_tokens_seen": 3040870400, |
| "step": 46400, |
| "train_runtime": 30399.8434, |
| "train_tokens_per_second": 100029.147 |
| }, |
| { |
| "epoch": 0.465, |
| "grad_norm": 0.14533430337905884, |
| "learning_rate": 0.0001689935897203684, |
| "loss": 0.3056, |
| "num_input_tokens_seen": 3047424000, |
| "step": 46500, |
| "train_runtime": 30464.3563, |
| "train_tokens_per_second": 100032.443 |
| }, |
| { |
| "epoch": 0.466, |
| "grad_norm": 0.14005503058433533, |
| "learning_rate": 0.0001685213274058496, |
| "loss": 0.3016, |
| "num_input_tokens_seen": 3053977600, |
| "step": 46600, |
| "train_runtime": 30528.7292, |
| "train_tokens_per_second": 100036.185 |
| }, |
| { |
| "epoch": 0.467, |
| "grad_norm": 0.17612388730049133, |
| "learning_rate": 0.00016804887858176944, |
| "loss": 0.3006, |
| "num_input_tokens_seen": 3060531200, |
| "step": 46700, |
| "train_runtime": 30592.7142, |
| "train_tokens_per_second": 100041.179 |
| }, |
| { |
| "epoch": 0.468, |
| "grad_norm": 0.13526348769664764, |
| "learning_rate": 0.00016757624800568238, |
| "loss": 0.3001, |
| "num_input_tokens_seen": 3067084800, |
| "step": 46800, |
| "train_runtime": 30656.5144, |
| "train_tokens_per_second": 100046.755 |
| }, |
| { |
| "epoch": 0.469, |
| "grad_norm": 0.6205772161483765, |
| "learning_rate": 0.00016710344043697301, |
| "loss": 0.3016, |
| "num_input_tokens_seen": 3073638400, |
| "step": 46900, |
| "train_runtime": 30727.0215, |
| "train_tokens_per_second": 100030.47 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.15328101813793182, |
| "learning_rate": 0.0001666304606368083, |
| "loss": 0.3049, |
| "num_input_tokens_seen": 3080192000, |
| "step": 47000, |
| "train_runtime": 30792.0203, |
| "train_tokens_per_second": 100032.15 |
| }, |
| { |
| "epoch": 0.471, |
| "grad_norm": 0.1804981380701065, |
| "learning_rate": 0.00016615731336808962, |
| "loss": 0.3008, |
| "num_input_tokens_seen": 3086745600, |
| "step": 47100, |
| "train_runtime": 30856.1119, |
| "train_tokens_per_second": 100036.764 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.1460595428943634, |
| "learning_rate": 0.0001656840033954047, |
| "loss": 0.2996, |
| "num_input_tokens_seen": 3093299200, |
| "step": 47200, |
| "train_runtime": 30922.3293, |
| "train_tokens_per_second": 100034.482 |
| }, |
| { |
| "epoch": 0.473, |
| "grad_norm": 0.17493313550949097, |
| "learning_rate": 0.00016521053548497973, |
| "loss": 0.3005, |
| "num_input_tokens_seen": 3099852800, |
| "step": 47300, |
| "train_runtime": 30985.6891, |
| "train_tokens_per_second": 100041.435 |
| }, |
| { |
| "epoch": 0.474, |
| "grad_norm": 0.11990969628095627, |
| "learning_rate": 0.0001647369144046313, |
| "loss": 0.2995, |
| "num_input_tokens_seen": 3106406400, |
| "step": 47400, |
| "train_runtime": 31056.5152, |
| "train_tokens_per_second": 100024.307 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 0.15634778141975403, |
| "learning_rate": 0.00016426314492371842, |
| "loss": 0.3054, |
| "num_input_tokens_seen": 3112960000, |
| "step": 47500, |
| "train_runtime": 31121.0302, |
| "train_tokens_per_second": 100027.537 |
| }, |
| { |
| "epoch": 0.476, |
| "grad_norm": 0.14218732714653015, |
| "learning_rate": 0.0001637892318130945, |
| "loss": 0.3036, |
| "num_input_tokens_seen": 3119513600, |
| "step": 47600, |
| "train_runtime": 31185.6411, |
| "train_tokens_per_second": 100030.446 |
| }, |
| { |
| "epoch": 0.477, |
| "grad_norm": 0.147688090801239, |
| "learning_rate": 0.00016331517984505934, |
| "loss": 0.3003, |
| "num_input_tokens_seen": 3126067200, |
| "step": 47700, |
| "train_runtime": 31250.7507, |
| "train_tokens_per_second": 100031.748 |
| }, |
| { |
| "epoch": 0.478, |
| "grad_norm": 0.1728331595659256, |
| "learning_rate": 0.00016284099379331092, |
| "loss": 0.2997, |
| "num_input_tokens_seen": 3132620800, |
| "step": 47800, |
| "train_runtime": 31321.2751, |
| "train_tokens_per_second": 100015.749 |
| }, |
| { |
| "epoch": 0.479, |
| "grad_norm": 0.12835726141929626, |
| "learning_rate": 0.00016236667843289759, |
| "loss": 0.2989, |
| "num_input_tokens_seen": 3139174400, |
| "step": 47900, |
| "train_runtime": 31386.2974, |
| "train_tokens_per_second": 100017.353 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.13368946313858032, |
| "learning_rate": 0.00016189223854016973, |
| "loss": 0.3078, |
| "num_input_tokens_seen": 3145728000, |
| "step": 48000, |
| "train_runtime": 31451.659, |
| "train_tokens_per_second": 100017.872 |
| }, |
| { |
| "epoch": 0.481, |
| "grad_norm": 0.12727653980255127, |
| "learning_rate": 0.00016141767889273182, |
| "loss": 0.3017, |
| "num_input_tokens_seen": 3152281600, |
| "step": 48100, |
| "train_runtime": 31516.5086, |
| "train_tokens_per_second": 100020.013 |
| }, |
| { |
| "epoch": 0.482, |
| "grad_norm": 0.16222263872623444, |
| "learning_rate": 0.00016094300426939417, |
| "loss": 0.3009, |
| "num_input_tokens_seen": 3158835200, |
| "step": 48200, |
| "train_runtime": 31581.3453, |
| "train_tokens_per_second": 100022.186 |
| }, |
| { |
| "epoch": 0.483, |
| "grad_norm": 0.15287387371063232, |
| "learning_rate": 0.00016046821945012505, |
| "loss": 0.2975, |
| "num_input_tokens_seen": 3165388800, |
| "step": 48300, |
| "train_runtime": 31645.8484, |
| "train_tokens_per_second": 100025.405 |
| }, |
| { |
| "epoch": 0.484, |
| "grad_norm": 0.13035738468170166, |
| "learning_rate": 0.00015999332921600226, |
| "loss": 0.3046, |
| "num_input_tokens_seen": 3171942400, |
| "step": 48400, |
| "train_runtime": 31716.5254, |
| "train_tokens_per_second": 100009.139 |
| }, |
| { |
| "epoch": 0.485, |
| "grad_norm": 0.16508948802947998, |
| "learning_rate": 0.00015951833834916532, |
| "loss": 0.3061, |
| "num_input_tokens_seen": 3178496000, |
| "step": 48500, |
| "train_runtime": 31781.7614, |
| "train_tokens_per_second": 100010.064 |
| }, |
| { |
| "epoch": 0.486, |
| "grad_norm": 0.1543286293745041, |
| "learning_rate": 0.00015904325163276672, |
| "loss": 0.2995, |
| "num_input_tokens_seen": 3185049600, |
| "step": 48600, |
| "train_runtime": 31847.2029, |
| "train_tokens_per_second": 100010.34 |
| }, |
| { |
| "epoch": 0.487, |
| "grad_norm": 0.13470540940761566, |
| "learning_rate": 0.00015856807385092466, |
| "loss": 0.3067, |
| "num_input_tokens_seen": 3191603200, |
| "step": 48700, |
| "train_runtime": 31911.0411, |
| "train_tokens_per_second": 100015.64 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.15521059930324554, |
| "learning_rate": 0.00015809280978867405, |
| "loss": 0.3009, |
| "num_input_tokens_seen": 3198156800, |
| "step": 48800, |
| "train_runtime": 31975.3091, |
| "train_tokens_per_second": 100019.574 |
| }, |
| { |
| "epoch": 0.489, |
| "grad_norm": 0.16505663096904755, |
| "learning_rate": 0.0001576174642319187, |
| "loss": 0.3019, |
| "num_input_tokens_seen": 3204710400, |
| "step": 48900, |
| "train_runtime": 32039.3359, |
| "train_tokens_per_second": 100024.246 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 0.15701062977313995, |
| "learning_rate": 0.0001571420419673831, |
| "loss": 0.3025, |
| "num_input_tokens_seen": 3211264000, |
| "step": 49000, |
| "train_runtime": 32104.9123, |
| "train_tokens_per_second": 100024.07 |
| }, |
| { |
| "epoch": 0.491, |
| "grad_norm": 0.22376379370689392, |
| "learning_rate": 0.0001566665477825642, |
| "loss": 0.3035, |
| "num_input_tokens_seen": 3217817600, |
| "step": 49100, |
| "train_runtime": 32177.5739, |
| "train_tokens_per_second": 100001.871 |
| }, |
| { |
| "epoch": 0.492, |
| "grad_norm": 0.1716614067554474, |
| "learning_rate": 0.0001561909864656831, |
| "loss": 0.3046, |
| "num_input_tokens_seen": 3224371200, |
| "step": 49200, |
| "train_runtime": 32241.8903, |
| "train_tokens_per_second": 100005.65 |
| }, |
| { |
| "epoch": 0.493, |
| "grad_norm": 0.17557290196418762, |
| "learning_rate": 0.00015571536280563705, |
| "loss": 0.2987, |
| "num_input_tokens_seen": 3230924800, |
| "step": 49300, |
| "train_runtime": 32307.4373, |
| "train_tokens_per_second": 100005.605 |
| }, |
| { |
| "epoch": 0.494, |
| "grad_norm": 0.16884572803974152, |
| "learning_rate": 0.000155239681591951, |
| "loss": 0.2986, |
| "num_input_tokens_seen": 3237478400, |
| "step": 49400, |
| "train_runtime": 32371.4412, |
| "train_tokens_per_second": 100010.326 |
| }, |
| { |
| "epoch": 0.495, |
| "grad_norm": 0.15279650688171387, |
| "learning_rate": 0.00015476394761472953, |
| "loss": 0.2982, |
| "num_input_tokens_seen": 3244032000, |
| "step": 49500, |
| "train_runtime": 32436.5241, |
| "train_tokens_per_second": 100011.702 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.1866491436958313, |
| "learning_rate": 0.00015428816566460843, |
| "loss": 0.3038, |
| "num_input_tokens_seen": 3250585600, |
| "step": 49600, |
| "train_runtime": 32508.3167, |
| "train_tokens_per_second": 99992.43 |
| }, |
| { |
| "epoch": 0.497, |
| "grad_norm": 0.14084835350513458, |
| "learning_rate": 0.00015381234053270669, |
| "loss": 0.3027, |
| "num_input_tokens_seen": 3257139200, |
| "step": 49700, |
| "train_runtime": 32572.1194, |
| "train_tokens_per_second": 99997.767 |
| }, |
| { |
| "epoch": 0.498, |
| "grad_norm": 0.16111333668231964, |
| "learning_rate": 0.0001533364770105781, |
| "loss": 0.3015, |
| "num_input_tokens_seen": 3263692800, |
| "step": 49800, |
| "train_runtime": 32637.2501, |
| "train_tokens_per_second": 99999.013 |
| }, |
| { |
| "epoch": 0.499, |
| "grad_norm": 0.14655210077762604, |
| "learning_rate": 0.0001528605798901631, |
| "loss": 0.3012, |
| "num_input_tokens_seen": 3270246400, |
| "step": 49900, |
| "train_runtime": 32707.4201, |
| "train_tokens_per_second": 99984.847 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.1385914832353592, |
| "learning_rate": 0.00015238465396374027, |
| "loss": 0.3027, |
| "num_input_tokens_seen": 3276800000, |
| "step": 50000, |
| "train_runtime": 32772.7798, |
| "train_tokens_per_second": 99985.415 |
| }, |
| { |
| "epoch": 0.501, |
| "grad_norm": 0.1433262825012207, |
| "learning_rate": 0.00015190870402387858, |
| "loss": 0.3006, |
| "num_input_tokens_seen": 3283353600, |
| "step": 50100, |
| "train_runtime": 32837.3412, |
| "train_tokens_per_second": 99988.412 |
| }, |
| { |
| "epoch": 0.502, |
| "grad_norm": 0.15529057383537292, |
| "learning_rate": 0.00015143273486338857, |
| "loss": 0.2995, |
| "num_input_tokens_seen": 3289907200, |
| "step": 50200, |
| "train_runtime": 32902.1033, |
| "train_tokens_per_second": 99990.787 |
| }, |
| { |
| "epoch": 0.503, |
| "grad_norm": 0.1301671862602234, |
| "learning_rate": 0.00015095675127527438, |
| "loss": 0.3055, |
| "num_input_tokens_seen": 3296460800, |
| "step": 50300, |
| "train_runtime": 32967.0743, |
| "train_tokens_per_second": 99992.519 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 0.1454419493675232, |
| "learning_rate": 0.00015048075805268547, |
| "loss": 0.3036, |
| "num_input_tokens_seen": 3303014400, |
| "step": 50400, |
| "train_runtime": 33033.1243, |
| "train_tokens_per_second": 99990.978 |
| }, |
| { |
| "epoch": 0.505, |
| "grad_norm": 0.1473357379436493, |
| "learning_rate": 0.00015000475998886825, |
| "loss": 0.3018, |
| "num_input_tokens_seen": 3309568000, |
| "step": 50500, |
| "train_runtime": 33105.2406, |
| "train_tokens_per_second": 99971.121 |
| }, |
| { |
| "epoch": 0.506, |
| "grad_norm": 0.13996386528015137, |
| "learning_rate": 0.00014952876187711804, |
| "loss": 0.2974, |
| "num_input_tokens_seen": 3316121600, |
| "step": 50600, |
| "train_runtime": 33169.1198, |
| "train_tokens_per_second": 99976.171 |
| }, |
| { |
| "epoch": 0.507, |
| "grad_norm": 0.14000660181045532, |
| "learning_rate": 0.00014905276851073053, |
| "loss": 0.2992, |
| "num_input_tokens_seen": 3322675200, |
| "step": 50700, |
| "train_runtime": 33234.0005, |
| "train_tokens_per_second": 99978.19 |
| }, |
| { |
| "epoch": 0.508, |
| "grad_norm": 0.14661286771297455, |
| "learning_rate": 0.00014857678468295352, |
| "loss": 0.3045, |
| "num_input_tokens_seen": 3329228800, |
| "step": 50800, |
| "train_runtime": 33299.7758, |
| "train_tokens_per_second": 99977.514 |
| }, |
| { |
| "epoch": 0.509, |
| "grad_norm": 0.15111635625362396, |
| "learning_rate": 0.00014810081518693902, |
| "loss": 0.3006, |
| "num_input_tokens_seen": 3335782400, |
| "step": 50900, |
| "train_runtime": 33370.9097, |
| "train_tokens_per_second": 99960.787 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.12965109944343567, |
| "learning_rate": 0.0001476248648156945, |
| "loss": 0.2986, |
| "num_input_tokens_seen": 3342336000, |
| "step": 51000, |
| "train_runtime": 33435.7602, |
| "train_tokens_per_second": 99962.913 |
| }, |
| { |
| "epoch": 0.511, |
| "grad_norm": 0.13791891932487488, |
| "learning_rate": 0.00014714893836203485, |
| "loss": 0.2994, |
| "num_input_tokens_seen": 3348889600, |
| "step": 51100, |
| "train_runtime": 33500.2878, |
| "train_tokens_per_second": 99965.995 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.1420348435640335, |
| "learning_rate": 0.0001466730406185343, |
| "loss": 0.2996, |
| "num_input_tokens_seen": 3355443200, |
| "step": 51200, |
| "train_runtime": 33564.5521, |
| "train_tokens_per_second": 99969.849 |
| }, |
| { |
| "epoch": 0.513, |
| "grad_norm": 0.1938745528459549, |
| "learning_rate": 0.0001461971763774778, |
| "loss": 0.3007, |
| "num_input_tokens_seen": 3361996800, |
| "step": 51300, |
| "train_runtime": 33630.8004, |
| "train_tokens_per_second": 99967.79 |
| }, |
| { |
| "epoch": 0.514, |
| "grad_norm": 0.1449531763792038, |
| "learning_rate": 0.0001457213504308129, |
| "loss": 0.3011, |
| "num_input_tokens_seen": 3368550400, |
| "step": 51400, |
| "train_runtime": 33696.4447, |
| "train_tokens_per_second": 99967.532 |
| }, |
| { |
| "epoch": 0.515, |
| "grad_norm": 0.16473324596881866, |
| "learning_rate": 0.00014524556757010177, |
| "loss": 0.3005, |
| "num_input_tokens_seen": 3375104000, |
| "step": 51500, |
| "train_runtime": 33766.6492, |
| "train_tokens_per_second": 99953.773 |
| }, |
| { |
| "epoch": 0.516, |
| "grad_norm": 0.1542610377073288, |
| "learning_rate": 0.00014476983258647234, |
| "loss": 0.3012, |
| "num_input_tokens_seen": 3381657600, |
| "step": 51600, |
| "train_runtime": 33832.0917, |
| "train_tokens_per_second": 99954.139 |
| }, |
| { |
| "epoch": 0.517, |
| "grad_norm": 0.1388223022222519, |
| "learning_rate": 0.0001442941502705707, |
| "loss": 0.3031, |
| "num_input_tokens_seen": 3388211200, |
| "step": 51700, |
| "train_runtime": 33896.7212, |
| "train_tokens_per_second": 99956.901 |
| }, |
| { |
| "epoch": 0.518, |
| "grad_norm": 0.19452647864818573, |
| "learning_rate": 0.0001438185254125125, |
| "loss": 0.3011, |
| "num_input_tokens_seen": 3394764800, |
| "step": 51800, |
| "train_runtime": 33962.0557, |
| "train_tokens_per_second": 99957.577 |
| }, |
| { |
| "epoch": 0.519, |
| "grad_norm": 0.16043786704540253, |
| "learning_rate": 0.00014334296280183473, |
| "loss": 0.2997, |
| "num_input_tokens_seen": 3401318400, |
| "step": 51900, |
| "train_runtime": 34027.5551, |
| "train_tokens_per_second": 99957.766 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.19769923388957977, |
| "learning_rate": 0.00014286746722744768, |
| "loss": 0.3007, |
| "num_input_tokens_seen": 3407872000, |
| "step": 52000, |
| "train_runtime": 34098.2307, |
| "train_tokens_per_second": 99942.781 |
| }, |
| { |
| "epoch": 0.521, |
| "grad_norm": 0.1524592489004135, |
| "learning_rate": 0.00014239204347758647, |
| "loss": 0.299, |
| "num_input_tokens_seen": 3414425600, |
| "step": 52100, |
| "train_runtime": 34164.2522, |
| "train_tokens_per_second": 99941.47 |
| }, |
| { |
| "epoch": 0.522, |
| "grad_norm": 0.14221727848052979, |
| "learning_rate": 0.00014191669633976294, |
| "loss": 0.3029, |
| "num_input_tokens_seen": 3420979200, |
| "step": 52200, |
| "train_runtime": 34227.7165, |
| "train_tokens_per_second": 99947.632 |
| }, |
| { |
| "epoch": 0.523, |
| "grad_norm": 0.15958262979984283, |
| "learning_rate": 0.00014144143060071756, |
| "loss": 0.3005, |
| "num_input_tokens_seen": 3427532800, |
| "step": 52300, |
| "train_runtime": 34292.9446, |
| "train_tokens_per_second": 99948.629 |
| }, |
| { |
| "epoch": 0.524, |
| "grad_norm": 0.1545192301273346, |
| "learning_rate": 0.000140966251046371, |
| "loss": 0.3024, |
| "num_input_tokens_seen": 3434086400, |
| "step": 52400, |
| "train_runtime": 34357.5392, |
| "train_tokens_per_second": 99951.466 |
| }, |
| { |
| "epoch": 0.525, |
| "grad_norm": 0.14636173844337463, |
| "learning_rate": 0.0001404911624617761, |
| "loss": 0.2967, |
| "num_input_tokens_seen": 3440640000, |
| "step": 52500, |
| "train_runtime": 34423.9361, |
| "train_tokens_per_second": 99949.058 |
| }, |
| { |
| "epoch": 0.526, |
| "grad_norm": 0.26764926314353943, |
| "learning_rate": 0.00014001616963106966, |
| "loss": 0.2982, |
| "num_input_tokens_seen": 3447193600, |
| "step": 52600, |
| "train_runtime": 34489.4544, |
| "train_tokens_per_second": 99949.206 |
| }, |
| { |
| "epoch": 0.527, |
| "grad_norm": 0.20636320114135742, |
| "learning_rate": 0.00013954127733742416, |
| "loss": 0.3011, |
| "num_input_tokens_seen": 3453747200, |
| "step": 52700, |
| "train_runtime": 34559.9071, |
| "train_tokens_per_second": 99935.083 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.1523534059524536, |
| "learning_rate": 0.0001390664903629998, |
| "loss": 0.3042, |
| "num_input_tokens_seen": 3460300800, |
| "step": 52800, |
| "train_runtime": 34624.4507, |
| "train_tokens_per_second": 99938.071 |
| }, |
| { |
| "epoch": 0.529, |
| "grad_norm": 0.15213948488235474, |
| "learning_rate": 0.0001385918134888961, |
| "loss": 0.3024, |
| "num_input_tokens_seen": 3466854400, |
| "step": 52900, |
| "train_runtime": 34690.2273, |
| "train_tokens_per_second": 99937.495 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 0.14115960896015167, |
| "learning_rate": 0.00013811725149510387, |
| "loss": 0.2999, |
| "num_input_tokens_seen": 3473408000, |
| "step": 53000, |
| "train_runtime": 34756.5786, |
| "train_tokens_per_second": 99935.268 |
| }, |
| { |
| "epoch": 0.531, |
| "grad_norm": 0.16747893393039703, |
| "learning_rate": 0.0001376428091604572, |
| "loss": 0.3011, |
| "num_input_tokens_seen": 3479961600, |
| "step": 53100, |
| "train_runtime": 34823.0381, |
| "train_tokens_per_second": 99932.74 |
| }, |
| { |
| "epoch": 0.532, |
| "grad_norm": 0.1266140639781952, |
| "learning_rate": 0.00013716849126258512, |
| "loss": 0.2985, |
| "num_input_tokens_seen": 3486515200, |
| "step": 53200, |
| "train_runtime": 34892.7557, |
| "train_tokens_per_second": 99920.89 |
| }, |
| { |
| "epoch": 0.533, |
| "grad_norm": 0.14753171801567078, |
| "learning_rate": 0.00013669430257786354, |
| "loss": 0.2996, |
| "num_input_tokens_seen": 3493068800, |
| "step": 53300, |
| "train_runtime": 34957.0461, |
| "train_tokens_per_second": 99924.599 |
| }, |
| { |
| "epoch": 0.534, |
| "grad_norm": 0.2617182731628418, |
| "learning_rate": 0.00013622024788136728, |
| "loss": 0.3027, |
| "num_input_tokens_seen": 3499622400, |
| "step": 53400, |
| "train_runtime": 35022.8837, |
| "train_tokens_per_second": 99923.879 |
| }, |
| { |
| "epoch": 0.535, |
| "grad_norm": 0.17150761187076569, |
| "learning_rate": 0.00013574633194682185, |
| "loss": 0.3027, |
| "num_input_tokens_seen": 3506176000, |
| "step": 53500, |
| "train_runtime": 35088.2396, |
| "train_tokens_per_second": 99924.534 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 0.16566570103168488, |
| "learning_rate": 0.0001352725595465555, |
| "loss": 0.2999, |
| "num_input_tokens_seen": 3512729600, |
| "step": 53600, |
| "train_runtime": 35153.6189, |
| "train_tokens_per_second": 99925.12 |
| }, |
| { |
| "epoch": 0.537, |
| "grad_norm": 0.13577675819396973, |
| "learning_rate": 0.000134798935451451, |
| "loss": 0.2969, |
| "num_input_tokens_seen": 3519283200, |
| "step": 53700, |
| "train_runtime": 35225.0068, |
| "train_tokens_per_second": 99908.659 |
| }, |
| { |
| "epoch": 0.538, |
| "grad_norm": 0.20843537151813507, |
| "learning_rate": 0.00013432546443089768, |
| "loss": 0.2967, |
| "num_input_tokens_seen": 3525836800, |
| "step": 53800, |
| "train_runtime": 35288.0858, |
| "train_tokens_per_second": 99915.785 |
| }, |
| { |
| "epoch": 0.539, |
| "grad_norm": 0.15664201974868774, |
| "learning_rate": 0.0001338521512527436, |
| "loss": 0.3007, |
| "num_input_tokens_seen": 3532390400, |
| "step": 53900, |
| "train_runtime": 35353.7477, |
| "train_tokens_per_second": 99915.586 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 0.14205297827720642, |
| "learning_rate": 0.00013337900068324712, |
| "loss": 0.3001, |
| "num_input_tokens_seen": 3538944000, |
| "step": 54000, |
| "train_runtime": 35423.5891, |
| "train_tokens_per_second": 99903.598 |
| }, |
| { |
| "epoch": 0.541, |
| "grad_norm": 0.13229498267173767, |
| "learning_rate": 0.00013290601748702918, |
| "loss": 0.2931, |
| "num_input_tokens_seen": 3545497600, |
| "step": 54100, |
| "train_runtime": 35489.6646, |
| "train_tokens_per_second": 99902.257 |
| }, |
| { |
| "epoch": 0.542, |
| "grad_norm": 0.1380510926246643, |
| "learning_rate": 0.00013243320642702543, |
| "loss": 0.3116, |
| "num_input_tokens_seen": 3552051200, |
| "step": 54200, |
| "train_runtime": 35554.9224, |
| "train_tokens_per_second": 99903.219 |
| }, |
| { |
| "epoch": 0.543, |
| "grad_norm": 0.16735288500785828, |
| "learning_rate": 0.0001319605722644379, |
| "loss": 0.2998, |
| "num_input_tokens_seen": 3558604800, |
| "step": 54300, |
| "train_runtime": 35619.4728, |
| "train_tokens_per_second": 99906.161 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.17502574622631073, |
| "learning_rate": 0.0001314881197586874, |
| "loss": 0.3004, |
| "num_input_tokens_seen": 3565158400, |
| "step": 54400, |
| "train_runtime": 35685.8161, |
| "train_tokens_per_second": 99904.074 |
| }, |
| { |
| "epoch": 0.545, |
| "grad_norm": 0.14805424213409424, |
| "learning_rate": 0.0001310158536673654, |
| "loss": 0.2983, |
| "num_input_tokens_seen": 3571712000, |
| "step": 54500, |
| "train_runtime": 35750.1467, |
| "train_tokens_per_second": 99907.618 |
| }, |
| { |
| "epoch": 0.546, |
| "grad_norm": 0.1533045917749405, |
| "learning_rate": 0.0001305437787461862, |
| "loss": 0.2976, |
| "num_input_tokens_seen": 3578265600, |
| "step": 54600, |
| "train_runtime": 35816.4973, |
| "train_tokens_per_second": 99905.515 |
| }, |
| { |
| "epoch": 0.547, |
| "grad_norm": 0.18475773930549622, |
| "learning_rate": 0.00013007189974893903, |
| "loss": 0.2951, |
| "num_input_tokens_seen": 3584819200, |
| "step": 54700, |
| "train_runtime": 35886.6478, |
| "train_tokens_per_second": 99892.841 |
| }, |
| { |
| "epoch": 0.548, |
| "grad_norm": 0.13913068175315857, |
| "learning_rate": 0.00012960022142744016, |
| "loss": 0.297, |
| "num_input_tokens_seen": 3591372800, |
| "step": 54800, |
| "train_runtime": 35950.7798, |
| "train_tokens_per_second": 99896.937 |
| }, |
| { |
| "epoch": 0.549, |
| "grad_norm": 0.15448203682899475, |
| "learning_rate": 0.00012912874853148506, |
| "loss": 0.303, |
| "num_input_tokens_seen": 3597926400, |
| "step": 54900, |
| "train_runtime": 36015.8762, |
| "train_tokens_per_second": 99898.344 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.15416036546230316, |
| "learning_rate": 0.00012865748580880053, |
| "loss": 0.2979, |
| "num_input_tokens_seen": 3604480000, |
| "step": 55000, |
| "train_runtime": 36080.201, |
| "train_tokens_per_second": 99901.883 |
| }, |
| { |
| "epoch": 0.551, |
| "grad_norm": 0.14506150782108307, |
| "learning_rate": 0.0001281864380049969, |
| "loss": 0.2983, |
| "num_input_tokens_seen": 3611033600, |
| "step": 55100, |
| "train_runtime": 36150.2521, |
| "train_tokens_per_second": 99889.583 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 0.17357710003852844, |
| "learning_rate": 0.00012771560986352042, |
| "loss": 0.2986, |
| "num_input_tokens_seen": 3617587200, |
| "step": 55200, |
| "train_runtime": 36215.2659, |
| "train_tokens_per_second": 99891.223 |
| }, |
| { |
| "epoch": 0.553, |
| "grad_norm": 0.16711916029453278, |
| "learning_rate": 0.0001272450061256052, |
| "loss": 0.2979, |
| "num_input_tokens_seen": 3624140800, |
| "step": 55300, |
| "train_runtime": 36279.3222, |
| "train_tokens_per_second": 99895.494 |
| }, |
| { |
| "epoch": 0.554, |
| "grad_norm": 0.1502256691455841, |
| "learning_rate": 0.00012677463153022565, |
| "loss": 0.3007, |
| "num_input_tokens_seen": 3630694400, |
| "step": 55400, |
| "train_runtime": 36345.9552, |
| "train_tokens_per_second": 99892.667 |
| }, |
| { |
| "epoch": 0.555, |
| "grad_norm": 0.15480037033557892, |
| "learning_rate": 0.0001263044908140488, |
| "loss": 0.2975, |
| "num_input_tokens_seen": 3637248000, |
| "step": 55500, |
| "train_runtime": 36415.9598, |
| "train_tokens_per_second": 99880.602 |
| }, |
| { |
| "epoch": 0.556, |
| "grad_norm": 0.15693609416484833, |
| "learning_rate": 0.00012583458871138632, |
| "loss": 0.2978, |
| "num_input_tokens_seen": 3643801600, |
| "step": 55600, |
| "train_runtime": 36480.5541, |
| "train_tokens_per_second": 99883.395 |
| }, |
| { |
| "epoch": 0.557, |
| "grad_norm": 0.147445410490036, |
| "learning_rate": 0.00012536492995414723, |
| "loss": 0.2991, |
| "num_input_tokens_seen": 3650355200, |
| "step": 55700, |
| "train_runtime": 36545.2319, |
| "train_tokens_per_second": 99885.95 |
| }, |
| { |
| "epoch": 0.558, |
| "grad_norm": 0.13640980422496796, |
| "learning_rate": 0.00012489551927179007, |
| "loss": 0.2987, |
| "num_input_tokens_seen": 3656908800, |
| "step": 55800, |
| "train_runtime": 36611.0993, |
| "train_tokens_per_second": 99885.25 |
| }, |
| { |
| "epoch": 0.559, |
| "grad_norm": 0.14373840391635895, |
| "learning_rate": 0.00012442636139127508, |
| "loss": 0.3, |
| "num_input_tokens_seen": 3663462400, |
| "step": 55900, |
| "train_runtime": 36676.4606, |
| "train_tokens_per_second": 99885.931 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.14679211378097534, |
| "learning_rate": 0.00012395746103701695, |
| "loss": 0.2996, |
| "num_input_tokens_seen": 3670016000, |
| "step": 56000, |
| "train_runtime": 36748.2938, |
| "train_tokens_per_second": 99869.018 |
| }, |
| { |
| "epoch": 0.561, |
| "grad_norm": 0.15536077320575714, |
| "learning_rate": 0.00012348882293083708, |
| "loss": 0.2953, |
| "num_input_tokens_seen": 3676569600, |
| "step": 56100, |
| "train_runtime": 36813.4246, |
| "train_tokens_per_second": 99870.35 |
| }, |
| { |
| "epoch": 0.562, |
| "grad_norm": 0.16678054630756378, |
| "learning_rate": 0.00012302045179191594, |
| "loss": 0.2969, |
| "num_input_tokens_seen": 3683123200, |
| "step": 56200, |
| "train_runtime": 36877.8431, |
| "train_tokens_per_second": 99873.607 |
| }, |
| { |
| "epoch": 0.563, |
| "grad_norm": 0.15781697630882263, |
| "learning_rate": 0.00012255235233674572, |
| "loss": 0.2972, |
| "num_input_tokens_seen": 3689676800, |
| "step": 56300, |
| "train_runtime": 36943.2178, |
| "train_tokens_per_second": 99874.267 |
| }, |
| { |
| "epoch": 0.564, |
| "grad_norm": 0.13541863858699799, |
| "learning_rate": 0.00012208452927908278, |
| "loss": 0.302, |
| "num_input_tokens_seen": 3696230400, |
| "step": 56400, |
| "train_runtime": 37008.8029, |
| "train_tokens_per_second": 99874.357 |
| }, |
| { |
| "epoch": 0.565, |
| "grad_norm": 0.1400034874677658, |
| "learning_rate": 0.00012161698732990003, |
| "loss": 0.3, |
| "num_input_tokens_seen": 3702784000, |
| "step": 56500, |
| "train_runtime": 37078.9889, |
| "train_tokens_per_second": 99862.054 |
| }, |
| { |
| "epoch": 0.566, |
| "grad_norm": 0.1511828452348709, |
| "learning_rate": 0.00012114973119733987, |
| "loss": 0.3017, |
| "num_input_tokens_seen": 3709337600, |
| "step": 56600, |
| "train_runtime": 37144.0507, |
| "train_tokens_per_second": 99863.573 |
| }, |
| { |
| "epoch": 0.567, |
| "grad_norm": 0.15576902031898499, |
| "learning_rate": 0.00012068276558666616, |
| "loss": 0.2981, |
| "num_input_tokens_seen": 3715891200, |
| "step": 56700, |
| "train_runtime": 37206.97, |
| "train_tokens_per_second": 99870.836 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.24084219336509705, |
| "learning_rate": 0.00012021609520021752, |
| "loss": 0.3025, |
| "num_input_tokens_seen": 3722444800, |
| "step": 56800, |
| "train_runtime": 37278.1305, |
| "train_tokens_per_second": 99855.995 |
| }, |
| { |
| "epoch": 0.569, |
| "grad_norm": 0.16832643747329712, |
| "learning_rate": 0.00011974972473735957, |
| "loss": 0.301, |
| "num_input_tokens_seen": 3728998400, |
| "step": 56900, |
| "train_runtime": 37343.2452, |
| "train_tokens_per_second": 99857.374 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.18326181173324585, |
| "learning_rate": 0.00011928365889443764, |
| "loss": 0.2987, |
| "num_input_tokens_seen": 3735552000, |
| "step": 57000, |
| "train_runtime": 37407.594, |
| "train_tokens_per_second": 99860.793 |
| }, |
| { |
| "epoch": 0.571, |
| "grad_norm": 0.15526984632015228, |
| "learning_rate": 0.00011881790236472966, |
| "loss": 0.2991, |
| "num_input_tokens_seen": 3742105600, |
| "step": 57100, |
| "train_runtime": 37474.3952, |
| "train_tokens_per_second": 99857.665 |
| }, |
| { |
| "epoch": 0.572, |
| "grad_norm": 0.18177416920661926, |
| "learning_rate": 0.00011835245983839869, |
| "loss": 0.3002, |
| "num_input_tokens_seen": 3748659200, |
| "step": 57200, |
| "train_runtime": 37538.8922, |
| "train_tokens_per_second": 99860.677 |
| }, |
| { |
| "epoch": 0.573, |
| "grad_norm": 0.1915498822927475, |
| "learning_rate": 0.00011788733600244575, |
| "loss": 0.2986, |
| "num_input_tokens_seen": 3755212800, |
| "step": 57300, |
| "train_runtime": 37605.3867, |
| "train_tokens_per_second": 99858.375 |
| }, |
| { |
| "epoch": 0.574, |
| "grad_norm": 0.15175184607505798, |
| "learning_rate": 0.00011742253554066278, |
| "loss": 0.3015, |
| "num_input_tokens_seen": 3761766400, |
| "step": 57400, |
| "train_runtime": 37678.0051, |
| "train_tokens_per_second": 99839.851 |
| }, |
| { |
| "epoch": 0.575, |
| "grad_norm": 0.16369026899337769, |
| "learning_rate": 0.00011695806313358523, |
| "loss": 0.3003, |
| "num_input_tokens_seen": 3768320000, |
| "step": 57500, |
| "train_runtime": 37742.0245, |
| "train_tokens_per_second": 99844.141 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.16646848618984222, |
| "learning_rate": 0.00011649392345844506, |
| "loss": 0.2972, |
| "num_input_tokens_seen": 3774873600, |
| "step": 57600, |
| "train_runtime": 37807.5481, |
| "train_tokens_per_second": 99844.444 |
| }, |
| { |
| "epoch": 0.577, |
| "grad_norm": 0.14035099744796753, |
| "learning_rate": 0.00011603012118912372, |
| "loss": 0.2985, |
| "num_input_tokens_seen": 3781427200, |
| "step": 57700, |
| "train_runtime": 37871.8826, |
| "train_tokens_per_second": 99847.88 |
| }, |
| { |
| "epoch": 0.578, |
| "grad_norm": 0.14899714291095734, |
| "learning_rate": 0.00011556666099610485, |
| "loss": 0.3008, |
| "num_input_tokens_seen": 3787980800, |
| "step": 57800, |
| "train_runtime": 37943.2827, |
| "train_tokens_per_second": 99832.712 |
| }, |
| { |
| "epoch": 0.579, |
| "grad_norm": 0.15600667893886566, |
| "learning_rate": 0.00011510354754642745, |
| "loss": 0.303, |
| "num_input_tokens_seen": 3794534400, |
| "step": 57900, |
| "train_runtime": 38008.9332, |
| "train_tokens_per_second": 99832.699 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 0.1631072610616684, |
| "learning_rate": 0.00011464078550363887, |
| "loss": 0.2978, |
| "num_input_tokens_seen": 3801088000, |
| "step": 58000, |
| "train_runtime": 38073.7575, |
| "train_tokens_per_second": 99834.853 |
| }, |
| { |
| "epoch": 0.581, |
| "grad_norm": 0.1560899019241333, |
| "learning_rate": 0.0001141783795277477, |
| "loss": 0.299, |
| "num_input_tokens_seen": 3807641600, |
| "step": 58100, |
| "train_runtime": 38139.694, |
| "train_tokens_per_second": 99834.089 |
| }, |
| { |
| "epoch": 0.582, |
| "grad_norm": 0.1506076604127884, |
| "learning_rate": 0.00011371633427517696, |
| "loss": 0.2985, |
| "num_input_tokens_seen": 3814195200, |
| "step": 58200, |
| "train_runtime": 38209.9556, |
| "train_tokens_per_second": 99822.026 |
| }, |
| { |
| "epoch": 0.583, |
| "grad_norm": 0.16049940884113312, |
| "learning_rate": 0.00011325465439871731, |
| "loss": 0.2998, |
| "num_input_tokens_seen": 3820748800, |
| "step": 58300, |
| "train_runtime": 38274.5015, |
| "train_tokens_per_second": 99824.913 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.15604519844055176, |
| "learning_rate": 0.00011279334454747989, |
| "loss": 0.2969, |
| "num_input_tokens_seen": 3827302400, |
| "step": 58400, |
| "train_runtime": 38341.4547, |
| "train_tokens_per_second": 99821.523 |
| }, |
| { |
| "epoch": 0.585, |
| "grad_norm": 0.15963351726531982, |
| "learning_rate": 0.00011233240936684981, |
| "loss": 0.2988, |
| "num_input_tokens_seen": 3833856000, |
| "step": 58500, |
| "train_runtime": 38406.0222, |
| "train_tokens_per_second": 99824.345 |
| }, |
| { |
| "epoch": 0.586, |
| "grad_norm": 0.15443411469459534, |
| "learning_rate": 0.00011187185349843916, |
| "loss": 0.298, |
| "num_input_tokens_seen": 3840409600, |
| "step": 58600, |
| "train_runtime": 38472.0656, |
| "train_tokens_per_second": 99823.327 |
| }, |
| { |
| "epoch": 0.587, |
| "grad_norm": 0.15459220111370087, |
| "learning_rate": 0.00011141168158004053, |
| "loss": 0.3004, |
| "num_input_tokens_seen": 3846963200, |
| "step": 58700, |
| "train_runtime": 38542.0532, |
| "train_tokens_per_second": 99812.098 |
| }, |
| { |
| "epoch": 0.588, |
| "grad_norm": 0.16199928522109985, |
| "learning_rate": 0.00011095189824557998, |
| "loss": 0.2985, |
| "num_input_tokens_seen": 3853516800, |
| "step": 58800, |
| "train_runtime": 38609.4411, |
| "train_tokens_per_second": 99807.63 |
| }, |
| { |
| "epoch": 0.589, |
| "grad_norm": 0.2209610939025879, |
| "learning_rate": 0.00011049250812507054, |
| "loss": 0.3005, |
| "num_input_tokens_seen": 3860070400, |
| "step": 58900, |
| "train_runtime": 38675.4402, |
| "train_tokens_per_second": 99806.761 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 0.22285670042037964, |
| "learning_rate": 0.00011003351584456571, |
| "loss": 0.298, |
| "num_input_tokens_seen": 3866624000, |
| "step": 59000, |
| "train_runtime": 38740.3065, |
| "train_tokens_per_second": 99808.813 |
| }, |
| { |
| "epoch": 0.591, |
| "grad_norm": 0.2148812711238861, |
| "learning_rate": 0.0001095749260261126, |
| "loss": 0.2966, |
| "num_input_tokens_seen": 3873177600, |
| "step": 59100, |
| "train_runtime": 38806.3344, |
| "train_tokens_per_second": 99807.871 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.21284043788909912, |
| "learning_rate": 0.00010911674328770559, |
| "loss": 0.3009, |
| "num_input_tokens_seen": 3879731200, |
| "step": 59200, |
| "train_runtime": 38871.8466, |
| "train_tokens_per_second": 99808.256 |
| }, |
| { |
| "epoch": 0.593, |
| "grad_norm": 0.1655593365430832, |
| "learning_rate": 0.00010865897224323979, |
| "loss": 0.2981, |
| "num_input_tokens_seen": 3886284800, |
| "step": 59300, |
| "train_runtime": 38937.7196, |
| "train_tokens_per_second": 99807.714 |
| }, |
| { |
| "epoch": 0.594, |
| "grad_norm": 0.17153207957744598, |
| "learning_rate": 0.00010820161750246453, |
| "loss": 0.3042, |
| "num_input_tokens_seen": 3892838400, |
| "step": 59400, |
| "train_runtime": 39004.8582, |
| "train_tokens_per_second": 99803.937 |
| }, |
| { |
| "epoch": 0.595, |
| "grad_norm": 0.15362666547298431, |
| "learning_rate": 0.00010774468367093696, |
| "loss": 0.3001, |
| "num_input_tokens_seen": 3899392000, |
| "step": 59500, |
| "train_runtime": 39068.7475, |
| "train_tokens_per_second": 99808.472 |
| }, |
| { |
| "epoch": 0.596, |
| "grad_norm": 0.15481388568878174, |
| "learning_rate": 0.00010728817534997573, |
| "loss": 0.2973, |
| "num_input_tokens_seen": 3905945600, |
| "step": 59600, |
| "train_runtime": 39137.2916, |
| "train_tokens_per_second": 99801.122 |
| }, |
| { |
| "epoch": 0.597, |
| "grad_norm": 0.1292748749256134, |
| "learning_rate": 0.00010683209713661453, |
| "loss": 0.2993, |
| "num_input_tokens_seen": 3912499200, |
| "step": 59700, |
| "train_runtime": 39198.2818, |
| "train_tokens_per_second": 99813.028 |
| }, |
| { |
| "epoch": 0.598, |
| "grad_norm": 0.14853951334953308, |
| "learning_rate": 0.00010637645362355589, |
| "loss": 0.2967, |
| "num_input_tokens_seen": 3919052800, |
| "step": 59800, |
| "train_runtime": 39262.6162, |
| "train_tokens_per_second": 99816.395 |
| }, |
| { |
| "epoch": 0.599, |
| "grad_norm": 0.13745439052581787, |
| "learning_rate": 0.00010592124939912497, |
| "loss": 0.3023, |
| "num_input_tokens_seen": 3925606400, |
| "step": 59900, |
| "train_runtime": 39328.4755, |
| "train_tokens_per_second": 99815.88 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.14352121949195862, |
| "learning_rate": 0.00010546648904722326, |
| "loss": 0.2973, |
| "num_input_tokens_seen": 3932160000, |
| "step": 60000, |
| "train_runtime": 39393.6967, |
| "train_tokens_per_second": 99816.984 |
| }, |
| { |
| "epoch": 0.601, |
| "grad_norm": 0.16375063359737396, |
| "learning_rate": 0.0001050121771472824, |
| "loss": 0.2934, |
| "num_input_tokens_seen": 3938713600, |
| "step": 60100, |
| "train_runtime": 39465.7876, |
| "train_tokens_per_second": 99800.709 |
| }, |
| { |
| "epoch": 0.602, |
| "grad_norm": 0.144679456949234, |
| "learning_rate": 0.0001045583182742182, |
| "loss": 0.2983, |
| "num_input_tokens_seen": 3945267200, |
| "step": 60200, |
| "train_runtime": 39531.166, |
| "train_tokens_per_second": 99801.438 |
| }, |
| { |
| "epoch": 0.603, |
| "grad_norm": 0.33903974294662476, |
| "learning_rate": 0.00010410491699838448, |
| "loss": 0.2981, |
| "num_input_tokens_seen": 3951820800, |
| "step": 60300, |
| "train_runtime": 39596.8662, |
| "train_tokens_per_second": 99801.352 |
| }, |
| { |
| "epoch": 0.604, |
| "grad_norm": 0.1823410987854004, |
| "learning_rate": 0.00010365197788552707, |
| "loss": 0.2986, |
| "num_input_tokens_seen": 3958374400, |
| "step": 60400, |
| "train_runtime": 39664.1206, |
| "train_tokens_per_second": 99797.357 |
| }, |
| { |
| "epoch": 0.605, |
| "grad_norm": 0.18758277595043182, |
| "learning_rate": 0.00010319950549673778, |
| "loss": 0.2967, |
| "num_input_tokens_seen": 3964928000, |
| "step": 60500, |
| "train_runtime": 39728.4695, |
| "train_tokens_per_second": 99800.673 |
| }, |
| { |
| "epoch": 0.606, |
| "grad_norm": 0.173909991979599, |
| "learning_rate": 0.00010274750438840855, |
| "loss": 0.2981, |
| "num_input_tokens_seen": 3971481600, |
| "step": 60600, |
| "train_runtime": 39794.5098, |
| "train_tokens_per_second": 99799.737 |
| }, |
| { |
| "epoch": 0.607, |
| "grad_norm": 0.14504651725292206, |
| "learning_rate": 0.00010229597911218554, |
| "loss": 0.2967, |
| "num_input_tokens_seen": 3978035200, |
| "step": 60700, |
| "train_runtime": 39864.8024, |
| "train_tokens_per_second": 99788.158 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.1418026238679886, |
| "learning_rate": 0.00010184493421492324, |
| "loss": 0.2976, |
| "num_input_tokens_seen": 3984588800, |
| "step": 60800, |
| "train_runtime": 39931.2064, |
| "train_tokens_per_second": 99786.337 |
| }, |
| { |
| "epoch": 0.609, |
| "grad_norm": 0.18415790796279907, |
| "learning_rate": 0.0001013943742386388, |
| "loss": 0.2997, |
| "num_input_tokens_seen": 3991142400, |
| "step": 60900, |
| "train_runtime": 39996.7127, |
| "train_tokens_per_second": 99786.761 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.14107364416122437, |
| "learning_rate": 0.00010094430372046616, |
| "loss": 0.2979, |
| "num_input_tokens_seen": 3997696000, |
| "step": 61000, |
| "train_runtime": 40068.6157, |
| "train_tokens_per_second": 99771.253 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 3997696000, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.5963643723776e+16, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|