| { |
| "best_global_step": 4875, |
| "best_metric": 4.976211071014404, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/Gencode-BPE/checkpoint-4875", |
| "epoch": 1.3829787234042552, |
| "eval_steps": 125, |
| "global_step": 4875, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005673758865248227, |
| "grad_norm": 1275.0146484375, |
| "loss": 281.4781, |
| "lr": 2e-06, |
| "step": 2, |
| "tokens_trained": 0.000192256 |
| }, |
| { |
| "epoch": 0.0011347517730496454, |
| "grad_norm": 1437.579833984375, |
| "loss": 267.2211, |
| "lr": 6e-06, |
| "step": 4, |
| "tokens_trained": 0.000382024 |
| }, |
| { |
| "epoch": 0.001702127659574468, |
| "grad_norm": 1719.271484375, |
| "loss": 219.3822, |
| "lr": 1e-05, |
| "step": 6, |
| "tokens_trained": 0.00057072 |
| }, |
| { |
| "epoch": 0.0022695035460992908, |
| "grad_norm": 1444.94970703125, |
| "loss": 133.8172, |
| "lr": 1.4e-05, |
| "step": 8, |
| "tokens_trained": 0.000761336 |
| }, |
| { |
| "epoch": 0.0028368794326241137, |
| "grad_norm": 238.9689178466797, |
| "loss": 90.8177, |
| "lr": 1.8e-05, |
| "step": 10, |
| "tokens_trained": 0.000953248 |
| }, |
| { |
| "epoch": 0.003404255319148936, |
| "grad_norm": 158.53497314453125, |
| "loss": 84.6922, |
| "lr": 2.2e-05, |
| "step": 12, |
| "tokens_trained": 0.00114424 |
| }, |
| { |
| "epoch": 0.003971631205673759, |
| "grad_norm": 146.10595703125, |
| "loss": 76.7055, |
| "lr": 2.6e-05, |
| "step": 14, |
| "tokens_trained": 0.001334104 |
| }, |
| { |
| "epoch": 0.0045390070921985815, |
| "grad_norm": 140.69964599609375, |
| "loss": 67.9952, |
| "lr": 3e-05, |
| "step": 16, |
| "tokens_trained": 0.00152392 |
| }, |
| { |
| "epoch": 0.005106382978723404, |
| "grad_norm": 108.80303192138672, |
| "loss": 57.8088, |
| "lr": 3.4000000000000007e-05, |
| "step": 18, |
| "tokens_trained": 0.001713872 |
| }, |
| { |
| "epoch": 0.005673758865248227, |
| "grad_norm": 106.82334899902344, |
| "loss": 48.6585, |
| "lr": 3.8e-05, |
| "step": 20, |
| "tokens_trained": 0.001903976 |
| }, |
| { |
| "epoch": 0.00624113475177305, |
| "grad_norm": 93.58769989013672, |
| "loss": 41.7984, |
| "lr": 4.2000000000000004e-05, |
| "step": 22, |
| "tokens_trained": 0.002094288 |
| }, |
| { |
| "epoch": 0.006808510638297872, |
| "grad_norm": 87.5854721069336, |
| "loss": 37.6201, |
| "lr": 4.6e-05, |
| "step": 24, |
| "tokens_trained": 0.002282496 |
| }, |
| { |
| "epoch": 0.007375886524822695, |
| "grad_norm": 84.12794494628906, |
| "loss": 35.0091, |
| "lr": 5e-05, |
| "step": 26, |
| "tokens_trained": 0.00247068 |
| }, |
| { |
| "epoch": 0.007943262411347518, |
| "grad_norm": 79.77535247802734, |
| "loss": 33.2253, |
| "lr": 5.4e-05, |
| "step": 28, |
| "tokens_trained": 0.002662888 |
| }, |
| { |
| "epoch": 0.00851063829787234, |
| "grad_norm": 66.42157745361328, |
| "loss": 32.0682, |
| "lr": 5.800000000000001e-05, |
| "step": 30, |
| "tokens_trained": 0.002851968 |
| }, |
| { |
| "epoch": 0.009078014184397163, |
| "grad_norm": 87.52485656738281, |
| "loss": 30.893, |
| "lr": 6.2e-05, |
| "step": 32, |
| "tokens_trained": 0.003041384 |
| }, |
| { |
| "epoch": 0.009645390070921986, |
| "grad_norm": 58.33614730834961, |
| "loss": 30.0513, |
| "lr": 6.6e-05, |
| "step": 34, |
| "tokens_trained": 0.003232872 |
| }, |
| { |
| "epoch": 0.010212765957446808, |
| "grad_norm": 54.629329681396484, |
| "loss": 29.0115, |
| "lr": 7.000000000000001e-05, |
| "step": 36, |
| "tokens_trained": 0.003423824 |
| }, |
| { |
| "epoch": 0.01078014184397163, |
| "grad_norm": 52.79097366333008, |
| "loss": 28.2084, |
| "lr": 7.4e-05, |
| "step": 38, |
| "tokens_trained": 0.003613232 |
| }, |
| { |
| "epoch": 0.011347517730496455, |
| "grad_norm": 54.481224060058594, |
| "loss": 27.4345, |
| "lr": 7.8e-05, |
| "step": 40, |
| "tokens_trained": 0.003800952 |
| }, |
| { |
| "epoch": 0.011914893617021277, |
| "grad_norm": 58.7069091796875, |
| "loss": 26.5936, |
| "lr": 8.2e-05, |
| "step": 42, |
| "tokens_trained": 0.003991512 |
| }, |
| { |
| "epoch": 0.0124822695035461, |
| "grad_norm": 49.30760955810547, |
| "loss": 26.0608, |
| "lr": 8.599999999999999e-05, |
| "step": 44, |
| "tokens_trained": 0.004180648 |
| }, |
| { |
| "epoch": 0.013049645390070922, |
| "grad_norm": 61.902587890625, |
| "loss": 25.5363, |
| "lr": 8.999999999999999e-05, |
| "step": 46, |
| "tokens_trained": 0.00437148 |
| }, |
| { |
| "epoch": 0.013617021276595745, |
| "grad_norm": 46.76111602783203, |
| "loss": 24.9599, |
| "lr": 9.400000000000001e-05, |
| "step": 48, |
| "tokens_trained": 0.004559344 |
| }, |
| { |
| "epoch": 0.014184397163120567, |
| "grad_norm": 57.06416702270508, |
| "loss": 24.4087, |
| "lr": 9.800000000000001e-05, |
| "step": 50, |
| "tokens_trained": 0.004749256 |
| }, |
| { |
| "epoch": 0.01475177304964539, |
| "grad_norm": 44.798736572265625, |
| "loss": 24.1444, |
| "lr": 0.000102, |
| "step": 52, |
| "tokens_trained": 0.004940192 |
| }, |
| { |
| "epoch": 0.015319148936170212, |
| "grad_norm": 40.29296875, |
| "loss": 23.6011, |
| "lr": 0.000106, |
| "step": 54, |
| "tokens_trained": 0.005130304 |
| }, |
| { |
| "epoch": 0.015886524822695036, |
| "grad_norm": 38.75099563598633, |
| "loss": 23.1781, |
| "lr": 0.00011, |
| "step": 56, |
| "tokens_trained": 0.005322864 |
| }, |
| { |
| "epoch": 0.016453900709219857, |
| "grad_norm": 37.470706939697266, |
| "loss": 22.9136, |
| "lr": 0.000114, |
| "step": 58, |
| "tokens_trained": 0.00551392 |
| }, |
| { |
| "epoch": 0.01702127659574468, |
| "grad_norm": 35.1894645690918, |
| "loss": 22.6336, |
| "lr": 0.000118, |
| "step": 60, |
| "tokens_trained": 0.005703096 |
| }, |
| { |
| "epoch": 0.017588652482269502, |
| "grad_norm": 35.136573791503906, |
| "loss": 22.2998, |
| "lr": 0.000122, |
| "step": 62, |
| "tokens_trained": 0.005892448 |
| }, |
| { |
| "epoch": 0.018156028368794326, |
| "grad_norm": 38.05111312866211, |
| "loss": 21.9401, |
| "lr": 0.000126, |
| "step": 64, |
| "tokens_trained": 0.006081656 |
| }, |
| { |
| "epoch": 0.01872340425531915, |
| "grad_norm": 35.63850021362305, |
| "loss": 21.7206, |
| "lr": 0.00013000000000000002, |
| "step": 66, |
| "tokens_trained": 0.006273032 |
| }, |
| { |
| "epoch": 0.01929078014184397, |
| "grad_norm": 34.327667236328125, |
| "loss": 21.4051, |
| "lr": 0.000134, |
| "step": 68, |
| "tokens_trained": 0.00646304 |
| }, |
| { |
| "epoch": 0.019858156028368795, |
| "grad_norm": 31.457059860229492, |
| "loss": 21.0774, |
| "lr": 0.00013800000000000002, |
| "step": 70, |
| "tokens_trained": 0.006652832 |
| }, |
| { |
| "epoch": 0.020425531914893616, |
| "grad_norm": 34.91672897338867, |
| "loss": 20.8718, |
| "lr": 0.00014199999999999998, |
| "step": 72, |
| "tokens_trained": 0.006843512 |
| }, |
| { |
| "epoch": 0.02099290780141844, |
| "grad_norm": 27.959579467773438, |
| "loss": 20.6932, |
| "lr": 0.000146, |
| "step": 74, |
| "tokens_trained": 0.007033584 |
| }, |
| { |
| "epoch": 0.02156028368794326, |
| "grad_norm": 26.569866180419922, |
| "loss": 20.4072, |
| "lr": 0.00015, |
| "step": 76, |
| "tokens_trained": 0.007224032 |
| }, |
| { |
| "epoch": 0.022127659574468085, |
| "grad_norm": 28.009904861450195, |
| "loss": 20.2229, |
| "lr": 0.000154, |
| "step": 78, |
| "tokens_trained": 0.00741368 |
| }, |
| { |
| "epoch": 0.02269503546099291, |
| "grad_norm": 28.892959594726562, |
| "loss": 20.0528, |
| "lr": 0.000158, |
| "step": 80, |
| "tokens_trained": 0.00760416 |
| }, |
| { |
| "epoch": 0.02326241134751773, |
| "grad_norm": 31.58131980895996, |
| "loss": 19.8016, |
| "lr": 0.000162, |
| "step": 82, |
| "tokens_trained": 0.007793952 |
| }, |
| { |
| "epoch": 0.023829787234042554, |
| "grad_norm": 31.01254653930664, |
| "loss": 19.634, |
| "lr": 0.00016600000000000002, |
| "step": 84, |
| "tokens_trained": 0.007980792 |
| }, |
| { |
| "epoch": 0.024397163120567375, |
| "grad_norm": 28.732515335083008, |
| "loss": 19.3777, |
| "lr": 0.00017, |
| "step": 86, |
| "tokens_trained": 0.008171968 |
| }, |
| { |
| "epoch": 0.0249645390070922, |
| "grad_norm": 24.31264877319336, |
| "loss": 19.1346, |
| "lr": 0.000174, |
| "step": 88, |
| "tokens_trained": 0.008361632 |
| }, |
| { |
| "epoch": 0.02553191489361702, |
| "grad_norm": 26.557010650634766, |
| "loss": 19.0014, |
| "lr": 0.000178, |
| "step": 90, |
| "tokens_trained": 0.008552328 |
| }, |
| { |
| "epoch": 0.026099290780141844, |
| "grad_norm": 21.156103134155273, |
| "loss": 18.7032, |
| "lr": 0.000182, |
| "step": 92, |
| "tokens_trained": 0.008743136 |
| }, |
| { |
| "epoch": 0.02666666666666667, |
| "grad_norm": 25.7484188079834, |
| "loss": 18.4836, |
| "lr": 0.000186, |
| "step": 94, |
| "tokens_trained": 0.008932056 |
| }, |
| { |
| "epoch": 0.02723404255319149, |
| "grad_norm": 22.27949333190918, |
| "loss": 18.2233, |
| "lr": 0.00019, |
| "step": 96, |
| "tokens_trained": 0.009121608 |
| }, |
| { |
| "epoch": 0.027801418439716313, |
| "grad_norm": 24.9247989654541, |
| "loss": 17.9867, |
| "lr": 0.000194, |
| "step": 98, |
| "tokens_trained": 0.009311008 |
| }, |
| { |
| "epoch": 0.028368794326241134, |
| "grad_norm": 24.302066802978516, |
| "loss": 17.8016, |
| "lr": 0.00019800000000000002, |
| "step": 100, |
| "tokens_trained": 0.009501456 |
| }, |
| { |
| "epoch": 0.02893617021276596, |
| "grad_norm": 23.458459854125977, |
| "loss": 17.6295, |
| "lr": 0.000202, |
| "step": 102, |
| "tokens_trained": 0.009693952 |
| }, |
| { |
| "epoch": 0.02950354609929078, |
| "grad_norm": 24.092350006103516, |
| "loss": 17.4593, |
| "lr": 0.000206, |
| "step": 104, |
| "tokens_trained": 0.009883328 |
| }, |
| { |
| "epoch": 0.030070921985815603, |
| "grad_norm": 22.54726219177246, |
| "loss": 17.2141, |
| "lr": 0.00021, |
| "step": 106, |
| "tokens_trained": 0.01007316 |
| }, |
| { |
| "epoch": 0.030638297872340424, |
| "grad_norm": 21.334760665893555, |
| "loss": 17.044, |
| "lr": 0.000214, |
| "step": 108, |
| "tokens_trained": 0.010266504 |
| }, |
| { |
| "epoch": 0.031205673758865248, |
| "grad_norm": 20.584287643432617, |
| "loss": 16.8919, |
| "lr": 0.000218, |
| "step": 110, |
| "tokens_trained": 0.010455736 |
| }, |
| { |
| "epoch": 0.03177304964539007, |
| "grad_norm": 23.51676368713379, |
| "loss": 16.751, |
| "lr": 0.000222, |
| "step": 112, |
| "tokens_trained": 0.010645208 |
| }, |
| { |
| "epoch": 0.03234042553191489, |
| "grad_norm": 23.278276443481445, |
| "loss": 16.5997, |
| "lr": 0.00022600000000000002, |
| "step": 114, |
| "tokens_trained": 0.010838928 |
| }, |
| { |
| "epoch": 0.032907801418439714, |
| "grad_norm": 25.4830265045166, |
| "loss": 16.3416, |
| "lr": 0.00023, |
| "step": 116, |
| "tokens_trained": 0.011027792 |
| }, |
| { |
| "epoch": 0.03347517730496454, |
| "grad_norm": 29.442413330078125, |
| "loss": 16.24, |
| "lr": 0.00023400000000000002, |
| "step": 118, |
| "tokens_trained": 0.011217456 |
| }, |
| { |
| "epoch": 0.03404255319148936, |
| "grad_norm": 21.77578353881836, |
| "loss": 16.1922, |
| "lr": 0.00023799999999999998, |
| "step": 120, |
| "tokens_trained": 0.01140804 |
| }, |
| { |
| "epoch": 0.03460992907801418, |
| "grad_norm": 27.040719985961914, |
| "loss": 15.9059, |
| "lr": 0.000242, |
| "step": 122, |
| "tokens_trained": 0.011597816 |
| }, |
| { |
| "epoch": 0.035177304964539004, |
| "grad_norm": 24.74480628967285, |
| "loss": 15.7818, |
| "lr": 0.000246, |
| "step": 124, |
| "tokens_trained": 0.011785624 |
| }, |
| { |
| "epoch": 0.03546099290780142, |
| "eval_loss": 15.553059577941895, |
| "eval_runtime": 23.5485, |
| "step": 125, |
| "tokens_trained": 0.011880832 |
| }, |
| { |
| "epoch": 0.03574468085106383, |
| "grad_norm": 23.13482666015625, |
| "loss": 15.5739, |
| "lr": 0.00025, |
| "step": 126, |
| "tokens_trained": 0.011975976 |
| }, |
| { |
| "epoch": 0.03631205673758865, |
| "grad_norm": 22.8618106842041, |
| "loss": 15.4302, |
| "lr": 0.000254, |
| "step": 128, |
| "tokens_trained": 0.012166744 |
| }, |
| { |
| "epoch": 0.03687943262411347, |
| "grad_norm": 26.804859161376953, |
| "loss": 15.3623, |
| "lr": 0.00025800000000000004, |
| "step": 130, |
| "tokens_trained": 0.01235436 |
| }, |
| { |
| "epoch": 0.0374468085106383, |
| "grad_norm": 21.826601028442383, |
| "loss": 15.1465, |
| "lr": 0.000262, |
| "step": 132, |
| "tokens_trained": 0.012544976 |
| }, |
| { |
| "epoch": 0.03801418439716312, |
| "grad_norm": 39.447086334228516, |
| "loss": 15.0137, |
| "lr": 0.000266, |
| "step": 134, |
| "tokens_trained": 0.012736352 |
| }, |
| { |
| "epoch": 0.03858156028368794, |
| "grad_norm": 23.44275665283203, |
| "loss": 14.9355, |
| "lr": 0.00027, |
| "step": 136, |
| "tokens_trained": 0.012925008 |
| }, |
| { |
| "epoch": 0.03914893617021276, |
| "grad_norm": 21.631427764892578, |
| "loss": 14.6825, |
| "lr": 0.00027400000000000005, |
| "step": 138, |
| "tokens_trained": 0.013114672 |
| }, |
| { |
| "epoch": 0.03971631205673759, |
| "grad_norm": 23.674650192260742, |
| "loss": 14.5194, |
| "lr": 0.00027800000000000004, |
| "step": 140, |
| "tokens_trained": 0.013304016 |
| }, |
| { |
| "epoch": 0.04028368794326241, |
| "grad_norm": 23.974796295166016, |
| "loss": 14.4829, |
| "lr": 0.00028199999999999997, |
| "step": 142, |
| "tokens_trained": 0.013496696 |
| }, |
| { |
| "epoch": 0.04085106382978723, |
| "grad_norm": 26.112201690673828, |
| "loss": 14.3027, |
| "lr": 0.00028599999999999996, |
| "step": 144, |
| "tokens_trained": 0.013684816 |
| }, |
| { |
| "epoch": 0.04141843971631206, |
| "grad_norm": 20.67386817932129, |
| "loss": 14.1499, |
| "lr": 0.00029, |
| "step": 146, |
| "tokens_trained": 0.013874832 |
| }, |
| { |
| "epoch": 0.04198581560283688, |
| "grad_norm": 24.253408432006836, |
| "loss": 13.9378, |
| "lr": 0.000294, |
| "step": 148, |
| "tokens_trained": 0.014065056 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 35.716087341308594, |
| "loss": 14.0562, |
| "lr": 0.000298, |
| "step": 150, |
| "tokens_trained": 0.014256784 |
| }, |
| { |
| "epoch": 0.04312056737588652, |
| "grad_norm": 29.414331436157227, |
| "loss": 14.0462, |
| "lr": 0.000302, |
| "step": 152, |
| "tokens_trained": 0.014446312 |
| }, |
| { |
| "epoch": 0.04368794326241135, |
| "grad_norm": 30.687482833862305, |
| "loss": 13.7603, |
| "lr": 0.000306, |
| "step": 154, |
| "tokens_trained": 0.014639872 |
| }, |
| { |
| "epoch": 0.04425531914893617, |
| "grad_norm": 29.806455612182617, |
| "loss": 13.708, |
| "lr": 0.00031, |
| "step": 156, |
| "tokens_trained": 0.014831112 |
| }, |
| { |
| "epoch": 0.04482269503546099, |
| "grad_norm": 24.900897979736328, |
| "loss": 13.548, |
| "lr": 0.000314, |
| "step": 158, |
| "tokens_trained": 0.015021288 |
| }, |
| { |
| "epoch": 0.04539007092198582, |
| "grad_norm": 24.29252815246582, |
| "loss": 13.3119, |
| "lr": 0.00031800000000000003, |
| "step": 160, |
| "tokens_trained": 0.01521228 |
| }, |
| { |
| "epoch": 0.04595744680851064, |
| "grad_norm": 20.68342399597168, |
| "loss": 13.1829, |
| "lr": 0.000322, |
| "step": 162, |
| "tokens_trained": 0.015403688 |
| }, |
| { |
| "epoch": 0.04652482269503546, |
| "grad_norm": 20.822795867919922, |
| "loss": 12.9044, |
| "lr": 0.000326, |
| "step": 164, |
| "tokens_trained": 0.015593416 |
| }, |
| { |
| "epoch": 0.04709219858156028, |
| "grad_norm": 21.689916610717773, |
| "loss": 12.6862, |
| "lr": 0.00033, |
| "step": 166, |
| "tokens_trained": 0.015784408 |
| }, |
| { |
| "epoch": 0.04765957446808511, |
| "grad_norm": 17.873889923095703, |
| "loss": 12.5502, |
| "lr": 0.00033400000000000004, |
| "step": 168, |
| "tokens_trained": 0.0159744 |
| }, |
| { |
| "epoch": 0.04822695035460993, |
| "grad_norm": 18.951616287231445, |
| "loss": 12.308, |
| "lr": 0.00033800000000000003, |
| "step": 170, |
| "tokens_trained": 0.016163736 |
| }, |
| { |
| "epoch": 0.04879432624113475, |
| "grad_norm": 15.146363258361816, |
| "loss": 12.1558, |
| "lr": 0.000342, |
| "step": 172, |
| "tokens_trained": 0.016353832 |
| }, |
| { |
| "epoch": 0.04936170212765958, |
| "grad_norm": 18.336984634399414, |
| "loss": 12.0386, |
| "lr": 0.000346, |
| "step": 174, |
| "tokens_trained": 0.016545088 |
| }, |
| { |
| "epoch": 0.0499290780141844, |
| "grad_norm": 17.221126556396484, |
| "loss": 11.8791, |
| "lr": 0.00035, |
| "step": 176, |
| "tokens_trained": 0.016735704 |
| }, |
| { |
| "epoch": 0.05049645390070922, |
| "grad_norm": 19.362564086914062, |
| "loss": 11.7224, |
| "lr": 0.000354, |
| "step": 178, |
| "tokens_trained": 0.016927944 |
| }, |
| { |
| "epoch": 0.05106382978723404, |
| "grad_norm": 15.564507484436035, |
| "loss": 11.6448, |
| "lr": 0.000358, |
| "step": 180, |
| "tokens_trained": 0.017116096 |
| }, |
| { |
| "epoch": 0.05163120567375887, |
| "grad_norm": 20.711383819580078, |
| "loss": 11.4398, |
| "lr": 0.000362, |
| "step": 182, |
| "tokens_trained": 0.01730564 |
| }, |
| { |
| "epoch": 0.05219858156028369, |
| "grad_norm": 18.627403259277344, |
| "loss": 11.3377, |
| "lr": 0.000366, |
| "step": 184, |
| "tokens_trained": 0.017495864 |
| }, |
| { |
| "epoch": 0.05276595744680851, |
| "grad_norm": 15.00942325592041, |
| "loss": 11.1416, |
| "lr": 0.00037, |
| "step": 186, |
| "tokens_trained": 0.017686464 |
| }, |
| { |
| "epoch": 0.05333333333333334, |
| "grad_norm": 17.070598602294922, |
| "loss": 11.0148, |
| "lr": 0.000374, |
| "step": 188, |
| "tokens_trained": 0.017879488 |
| }, |
| { |
| "epoch": 0.05390070921985816, |
| "grad_norm": 16.101457595825195, |
| "loss": 10.8874, |
| "lr": 0.000378, |
| "step": 190, |
| "tokens_trained": 0.018068312 |
| }, |
| { |
| "epoch": 0.05446808510638298, |
| "grad_norm": 15.613334655761719, |
| "loss": 10.7055, |
| "lr": 0.000382, |
| "step": 192, |
| "tokens_trained": 0.018255752 |
| }, |
| { |
| "epoch": 0.0550354609929078, |
| "grad_norm": 17.671857833862305, |
| "loss": 10.5706, |
| "lr": 0.000386, |
| "step": 194, |
| "tokens_trained": 0.018447096 |
| }, |
| { |
| "epoch": 0.05560283687943263, |
| "grad_norm": 16.080909729003906, |
| "loss": 10.4476, |
| "lr": 0.00039000000000000005, |
| "step": 196, |
| "tokens_trained": 0.018637264 |
| }, |
| { |
| "epoch": 0.05617021276595745, |
| "grad_norm": 15.02849292755127, |
| "loss": 10.2962, |
| "lr": 0.00039400000000000004, |
| "step": 198, |
| "tokens_trained": 0.018827552 |
| }, |
| { |
| "epoch": 0.05673758865248227, |
| "grad_norm": 14.990167617797852, |
| "loss": 10.1912, |
| "lr": 0.000398, |
| "step": 200, |
| "tokens_trained": 0.019018 |
| }, |
| { |
| "epoch": 0.05730496453900709, |
| "grad_norm": 15.390633583068848, |
| "loss": 10.0442, |
| "lr": 0.000402, |
| "step": 202, |
| "tokens_trained": 0.019209864 |
| }, |
| { |
| "epoch": 0.05787234042553192, |
| "grad_norm": 16.871570587158203, |
| "loss": 9.9685, |
| "lr": 0.00040600000000000006, |
| "step": 204, |
| "tokens_trained": 0.019400176 |
| }, |
| { |
| "epoch": 0.05843971631205674, |
| "grad_norm": 20.16544532775879, |
| "loss": 9.8531, |
| "lr": 0.00041, |
| "step": 206, |
| "tokens_trained": 0.019589424 |
| }, |
| { |
| "epoch": 0.05900709219858156, |
| "grad_norm": 16.825023651123047, |
| "loss": 9.7777, |
| "lr": 0.000414, |
| "step": 208, |
| "tokens_trained": 0.019779112 |
| }, |
| { |
| "epoch": 0.059574468085106386, |
| "grad_norm": 16.43510627746582, |
| "loss": 9.6122, |
| "lr": 0.00041799999999999997, |
| "step": 210, |
| "tokens_trained": 0.019970048 |
| }, |
| { |
| "epoch": 0.060141843971631206, |
| "grad_norm": 17.340473175048828, |
| "loss": 9.4859, |
| "lr": 0.000422, |
| "step": 212, |
| "tokens_trained": 0.020160968 |
| }, |
| { |
| "epoch": 0.06070921985815603, |
| "grad_norm": 15.019119262695312, |
| "loss": 9.3656, |
| "lr": 0.000426, |
| "step": 214, |
| "tokens_trained": 0.020349664 |
| }, |
| { |
| "epoch": 0.06127659574468085, |
| "grad_norm": 13.379194259643555, |
| "loss": 9.2348, |
| "lr": 0.00043, |
| "step": 216, |
| "tokens_trained": 0.020538192 |
| }, |
| { |
| "epoch": 0.061843971631205676, |
| "grad_norm": 16.71472930908203, |
| "loss": 9.2258, |
| "lr": 0.00043400000000000003, |
| "step": 218, |
| "tokens_trained": 0.020728936 |
| }, |
| { |
| "epoch": 0.062411347517730496, |
| "grad_norm": 12.743139266967773, |
| "loss": 9.0569, |
| "lr": 0.000438, |
| "step": 220, |
| "tokens_trained": 0.020917472 |
| }, |
| { |
| "epoch": 0.06297872340425532, |
| "grad_norm": 15.739934921264648, |
| "loss": 8.9623, |
| "lr": 0.000442, |
| "step": 222, |
| "tokens_trained": 0.02110928 |
| }, |
| { |
| "epoch": 0.06354609929078014, |
| "grad_norm": 14.23620891571045, |
| "loss": 8.8201, |
| "lr": 0.000446, |
| "step": 224, |
| "tokens_trained": 0.021300168 |
| }, |
| { |
| "epoch": 0.06411347517730497, |
| "grad_norm": 13.005538940429688, |
| "loss": 8.7235, |
| "lr": 0.00045000000000000004, |
| "step": 226, |
| "tokens_trained": 0.021490272 |
| }, |
| { |
| "epoch": 0.06468085106382979, |
| "grad_norm": 17.17629051208496, |
| "loss": 8.6907, |
| "lr": 0.00045400000000000003, |
| "step": 228, |
| "tokens_trained": 0.021681552 |
| }, |
| { |
| "epoch": 0.06524822695035461, |
| "grad_norm": 14.430739402770996, |
| "loss": 8.6196, |
| "lr": 0.000458, |
| "step": 230, |
| "tokens_trained": 0.02187236 |
| }, |
| { |
| "epoch": 0.06581560283687943, |
| "grad_norm": 14.575714111328125, |
| "loss": 8.4741, |
| "lr": 0.000462, |
| "step": 232, |
| "tokens_trained": 0.022061976 |
| }, |
| { |
| "epoch": 0.06638297872340425, |
| "grad_norm": 13.892754554748535, |
| "loss": 8.4118, |
| "lr": 0.00046600000000000005, |
| "step": 234, |
| "tokens_trained": 0.022252008 |
| }, |
| { |
| "epoch": 0.06695035460992908, |
| "grad_norm": 11.58240795135498, |
| "loss": 8.2781, |
| "lr": 0.00047, |
| "step": 236, |
| "tokens_trained": 0.02244284 |
| }, |
| { |
| "epoch": 0.0675177304964539, |
| "grad_norm": 13.022644996643066, |
| "loss": 8.2139, |
| "lr": 0.000474, |
| "step": 238, |
| "tokens_trained": 0.022631152 |
| }, |
| { |
| "epoch": 0.06808510638297872, |
| "grad_norm": 11.844677925109863, |
| "loss": 8.1134, |
| "lr": 0.00047799999999999996, |
| "step": 240, |
| "tokens_trained": 0.022821096 |
| }, |
| { |
| "epoch": 0.06865248226950355, |
| "grad_norm": 13.878067016601562, |
| "loss": 8.0221, |
| "lr": 0.000482, |
| "step": 242, |
| "tokens_trained": 0.023011656 |
| }, |
| { |
| "epoch": 0.06921985815602837, |
| "grad_norm": 12.34648323059082, |
| "loss": 7.9755, |
| "lr": 0.000486, |
| "step": 244, |
| "tokens_trained": 0.023201 |
| }, |
| { |
| "epoch": 0.06978723404255319, |
| "grad_norm": 14.238297462463379, |
| "loss": 7.8969, |
| "lr": 0.00049, |
| "step": 246, |
| "tokens_trained": 0.023391128 |
| }, |
| { |
| "epoch": 0.07035460992907801, |
| "grad_norm": 14.386019706726074, |
| "loss": 7.8627, |
| "lr": 0.000494, |
| "step": 248, |
| "tokens_trained": 0.023581768 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "grad_norm": 13.623086929321289, |
| "loss": 7.7568, |
| "lr": 0.000498, |
| "step": 250, |
| "tokens_trained": 0.023771248 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "eval_loss": 7.70297384262085, |
| "eval_runtime": 21.3853, |
| "step": 250, |
| "tokens_trained": 0.023771248 |
| }, |
| { |
| "epoch": 0.07148936170212766, |
| "grad_norm": 14.347646713256836, |
| "loss": 7.6842, |
| "lr": 0.0005020000000000001, |
| "step": 252, |
| "tokens_trained": 0.023961056 |
| }, |
| { |
| "epoch": 0.07205673758865248, |
| "grad_norm": 12.5592041015625, |
| "loss": 7.6516, |
| "lr": 0.000506, |
| "step": 254, |
| "tokens_trained": 0.024150968 |
| }, |
| { |
| "epoch": 0.0726241134751773, |
| "grad_norm": 13.219141960144043, |
| "loss": 7.5789, |
| "lr": 0.00051, |
| "step": 256, |
| "tokens_trained": 0.024340072 |
| }, |
| { |
| "epoch": 0.07319148936170213, |
| "grad_norm": 12.654081344604492, |
| "loss": 7.5369, |
| "lr": 0.000514, |
| "step": 258, |
| "tokens_trained": 0.024529296 |
| }, |
| { |
| "epoch": 0.07375886524822695, |
| "grad_norm": 13.136971473693848, |
| "loss": 7.4949, |
| "lr": 0.000518, |
| "step": 260, |
| "tokens_trained": 0.024719688 |
| }, |
| { |
| "epoch": 0.07432624113475177, |
| "grad_norm": 12.680288314819336, |
| "loss": 7.3904, |
| "lr": 0.000522, |
| "step": 262, |
| "tokens_trained": 0.024909632 |
| }, |
| { |
| "epoch": 0.0748936170212766, |
| "grad_norm": 12.754518508911133, |
| "loss": 7.3514, |
| "lr": 0.000526, |
| "step": 264, |
| "tokens_trained": 0.025098416 |
| }, |
| { |
| "epoch": 0.07546099290780142, |
| "grad_norm": 13.22311019897461, |
| "loss": 7.2951, |
| "lr": 0.0005300000000000001, |
| "step": 266, |
| "tokens_trained": 0.025287344 |
| }, |
| { |
| "epoch": 0.07602836879432624, |
| "grad_norm": 12.11903190612793, |
| "loss": 7.2229, |
| "lr": 0.0005340000000000001, |
| "step": 268, |
| "tokens_trained": 0.025477152 |
| }, |
| { |
| "epoch": 0.07659574468085106, |
| "grad_norm": 13.771833419799805, |
| "loss": 7.1815, |
| "lr": 0.0005380000000000001, |
| "step": 270, |
| "tokens_trained": 0.025668288 |
| }, |
| { |
| "epoch": 0.07716312056737588, |
| "grad_norm": 11.756864547729492, |
| "loss": 7.1669, |
| "lr": 0.0005420000000000001, |
| "step": 272, |
| "tokens_trained": 0.025858528 |
| }, |
| { |
| "epoch": 0.0777304964539007, |
| "grad_norm": 13.613094329833984, |
| "loss": 7.1079, |
| "lr": 0.000546, |
| "step": 274, |
| "tokens_trained": 0.026048616 |
| }, |
| { |
| "epoch": 0.07829787234042553, |
| "grad_norm": 10.001923561096191, |
| "loss": 7.0508, |
| "lr": 0.00055, |
| "step": 276, |
| "tokens_trained": 0.026236944 |
| }, |
| { |
| "epoch": 0.07886524822695036, |
| "grad_norm": 14.262083053588867, |
| "loss": 6.9955, |
| "lr": 0.000554, |
| "step": 278, |
| "tokens_trained": 0.026426848 |
| }, |
| { |
| "epoch": 0.07943262411347518, |
| "grad_norm": 12.381136894226074, |
| "loss": 6.9831, |
| "lr": 0.000558, |
| "step": 280, |
| "tokens_trained": 0.026616784 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 9.815845489501953, |
| "loss": 6.917, |
| "lr": 0.0005620000000000001, |
| "step": 282, |
| "tokens_trained": 0.026805176 |
| }, |
| { |
| "epoch": 0.08056737588652482, |
| "grad_norm": 11.669997215270996, |
| "loss": 6.8999, |
| "lr": 0.000566, |
| "step": 284, |
| "tokens_trained": 0.02699488 |
| }, |
| { |
| "epoch": 0.08113475177304964, |
| "grad_norm": 12.770941734313965, |
| "loss": 6.8998, |
| "lr": 0.00057, |
| "step": 286, |
| "tokens_trained": 0.027185784 |
| }, |
| { |
| "epoch": 0.08170212765957446, |
| "grad_norm": 15.572457313537598, |
| "loss": 6.841, |
| "lr": 0.000574, |
| "step": 288, |
| "tokens_trained": 0.027375896 |
| }, |
| { |
| "epoch": 0.08226950354609928, |
| "grad_norm": 10.980833053588867, |
| "loss": 6.8545, |
| "lr": 0.000578, |
| "step": 290, |
| "tokens_trained": 0.02756588 |
| }, |
| { |
| "epoch": 0.08283687943262412, |
| "grad_norm": 11.678337097167969, |
| "loss": 6.7853, |
| "lr": 0.0005819999999999999, |
| "step": 292, |
| "tokens_trained": 0.02775456 |
| }, |
| { |
| "epoch": 0.08340425531914894, |
| "grad_norm": 9.77885913848877, |
| "loss": 6.7465, |
| "lr": 0.0005859999999999999, |
| "step": 294, |
| "tokens_trained": 0.027942856 |
| }, |
| { |
| "epoch": 0.08397163120567376, |
| "grad_norm": 13.62730884552002, |
| "loss": 6.7276, |
| "lr": 0.00059, |
| "step": 296, |
| "tokens_trained": 0.028133152 |
| }, |
| { |
| "epoch": 0.08453900709219858, |
| "grad_norm": 10.644404411315918, |
| "loss": 6.6802, |
| "lr": 0.000594, |
| "step": 298, |
| "tokens_trained": 0.028322192 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 11.130610466003418, |
| "loss": 6.6548, |
| "lr": 0.000598, |
| "step": 300, |
| "tokens_trained": 0.0285122 |
| }, |
| { |
| "epoch": 0.08567375886524822, |
| "grad_norm": 11.557455062866211, |
| "loss": 6.6155, |
| "lr": 0.000602, |
| "step": 302, |
| "tokens_trained": 0.028699792 |
| }, |
| { |
| "epoch": 0.08624113475177304, |
| "grad_norm": 9.276884078979492, |
| "loss": 6.5989, |
| "lr": 0.000606, |
| "step": 304, |
| "tokens_trained": 0.028889896 |
| }, |
| { |
| "epoch": 0.08680851063829788, |
| "grad_norm": 9.616179466247559, |
| "loss": 6.5773, |
| "lr": 0.00061, |
| "step": 306, |
| "tokens_trained": 0.029082272 |
| }, |
| { |
| "epoch": 0.0873758865248227, |
| "grad_norm": 10.575953483581543, |
| "loss": 6.5358, |
| "lr": 0.000614, |
| "step": 308, |
| "tokens_trained": 0.029273352 |
| }, |
| { |
| "epoch": 0.08794326241134752, |
| "grad_norm": 9.089850425720215, |
| "loss": 6.5088, |
| "lr": 0.0006180000000000001, |
| "step": 310, |
| "tokens_trained": 0.029463848 |
| }, |
| { |
| "epoch": 0.08851063829787234, |
| "grad_norm": 9.090002059936523, |
| "loss": 6.4849, |
| "lr": 0.000622, |
| "step": 312, |
| "tokens_trained": 0.029653272 |
| }, |
| { |
| "epoch": 0.08907801418439716, |
| "grad_norm": 12.038308143615723, |
| "loss": 6.4624, |
| "lr": 0.000626, |
| "step": 314, |
| "tokens_trained": 0.029841928 |
| }, |
| { |
| "epoch": 0.08964539007092198, |
| "grad_norm": 9.073866844177246, |
| "loss": 6.4515, |
| "lr": 0.00063, |
| "step": 316, |
| "tokens_trained": 0.030029808 |
| }, |
| { |
| "epoch": 0.0902127659574468, |
| "grad_norm": 8.727197647094727, |
| "loss": 6.43, |
| "lr": 0.000634, |
| "step": 318, |
| "tokens_trained": 0.030221288 |
| }, |
| { |
| "epoch": 0.09078014184397164, |
| "grad_norm": 14.558151245117188, |
| "loss": 6.4487, |
| "lr": 0.000638, |
| "step": 320, |
| "tokens_trained": 0.030410872 |
| }, |
| { |
| "epoch": 0.09134751773049646, |
| "grad_norm": 9.98914623260498, |
| "loss": 6.4279, |
| "lr": 0.000642, |
| "step": 322, |
| "tokens_trained": 0.030602376 |
| }, |
| { |
| "epoch": 0.09191489361702128, |
| "grad_norm": 10.395442962646484, |
| "loss": 6.4311, |
| "lr": 0.000646, |
| "step": 324, |
| "tokens_trained": 0.030792968 |
| }, |
| { |
| "epoch": 0.0924822695035461, |
| "grad_norm": 10.8250093460083, |
| "loss": 6.3726, |
| "lr": 0.0006500000000000001, |
| "step": 326, |
| "tokens_trained": 0.030982944 |
| }, |
| { |
| "epoch": 0.09304964539007092, |
| "grad_norm": 9.73416805267334, |
| "loss": 6.34, |
| "lr": 0.0006540000000000001, |
| "step": 328, |
| "tokens_trained": 0.031174928 |
| }, |
| { |
| "epoch": 0.09361702127659574, |
| "grad_norm": 8.596503257751465, |
| "loss": 6.3322, |
| "lr": 0.0006580000000000001, |
| "step": 330, |
| "tokens_trained": 0.031364288 |
| }, |
| { |
| "epoch": 0.09418439716312056, |
| "grad_norm": 8.49472427368164, |
| "loss": 6.3096, |
| "lr": 0.000662, |
| "step": 332, |
| "tokens_trained": 0.03155376 |
| }, |
| { |
| "epoch": 0.0947517730496454, |
| "grad_norm": 7.857503414154053, |
| "loss": 6.2368, |
| "lr": 0.000666, |
| "step": 334, |
| "tokens_trained": 0.031744368 |
| }, |
| { |
| "epoch": 0.09531914893617022, |
| "grad_norm": 9.007513999938965, |
| "loss": 6.198, |
| "lr": 0.00067, |
| "step": 336, |
| "tokens_trained": 0.031934136 |
| }, |
| { |
| "epoch": 0.09588652482269504, |
| "grad_norm": 8.185524940490723, |
| "loss": 6.2328, |
| "lr": 0.000674, |
| "step": 338, |
| "tokens_trained": 0.032124984 |
| }, |
| { |
| "epoch": 0.09645390070921986, |
| "grad_norm": 8.784396171569824, |
| "loss": 6.1945, |
| "lr": 0.0006780000000000001, |
| "step": 340, |
| "tokens_trained": 0.032316016 |
| }, |
| { |
| "epoch": 0.09702127659574468, |
| "grad_norm": 8.642311096191406, |
| "loss": 6.218, |
| "lr": 0.0006820000000000001, |
| "step": 342, |
| "tokens_trained": 0.032506224 |
| }, |
| { |
| "epoch": 0.0975886524822695, |
| "grad_norm": 8.493780136108398, |
| "loss": 6.194, |
| "lr": 0.0006860000000000001, |
| "step": 344, |
| "tokens_trained": 0.032696152 |
| }, |
| { |
| "epoch": 0.09815602836879432, |
| "grad_norm": 9.120508193969727, |
| "loss": 6.2241, |
| "lr": 0.00069, |
| "step": 346, |
| "tokens_trained": 0.032885688 |
| }, |
| { |
| "epoch": 0.09872340425531916, |
| "grad_norm": 9.34500503540039, |
| "loss": 6.1548, |
| "lr": 0.000694, |
| "step": 348, |
| "tokens_trained": 0.03307568 |
| }, |
| { |
| "epoch": 0.09929078014184398, |
| "grad_norm": 7.483356952667236, |
| "loss": 6.1282, |
| "lr": 0.0006979999999999999, |
| "step": 350, |
| "tokens_trained": 0.033267208 |
| }, |
| { |
| "epoch": 0.0998581560283688, |
| "grad_norm": 7.974069118499756, |
| "loss": 6.1032, |
| "lr": 0.0007019999999999999, |
| "step": 352, |
| "tokens_trained": 0.033458144 |
| }, |
| { |
| "epoch": 0.10042553191489362, |
| "grad_norm": 8.247384071350098, |
| "loss": 6.1698, |
| "lr": 0.0007059999999999999, |
| "step": 354, |
| "tokens_trained": 0.033650352 |
| }, |
| { |
| "epoch": 0.10099290780141844, |
| "grad_norm": 8.554885864257812, |
| "loss": 6.1429, |
| "lr": 0.00071, |
| "step": 356, |
| "tokens_trained": 0.033840232 |
| }, |
| { |
| "epoch": 0.10156028368794326, |
| "grad_norm": 7.209281921386719, |
| "loss": 6.0997, |
| "lr": 0.000714, |
| "step": 358, |
| "tokens_trained": 0.034030032 |
| }, |
| { |
| "epoch": 0.10212765957446808, |
| "grad_norm": 8.660383224487305, |
| "loss": 6.1497, |
| "lr": 0.000718, |
| "step": 360, |
| "tokens_trained": 0.034218592 |
| }, |
| { |
| "epoch": 0.10269503546099291, |
| "grad_norm": 9.382761001586914, |
| "loss": 6.0665, |
| "lr": 0.000722, |
| "step": 362, |
| "tokens_trained": 0.034408408 |
| }, |
| { |
| "epoch": 0.10326241134751774, |
| "grad_norm": 6.915714263916016, |
| "loss": 6.0636, |
| "lr": 0.000726, |
| "step": 364, |
| "tokens_trained": 0.034600016 |
| }, |
| { |
| "epoch": 0.10382978723404256, |
| "grad_norm": 7.8990631103515625, |
| "loss": 6.0975, |
| "lr": 0.00073, |
| "step": 366, |
| "tokens_trained": 0.034790792 |
| }, |
| { |
| "epoch": 0.10439716312056738, |
| "grad_norm": 8.859809875488281, |
| "loss": 6.0754, |
| "lr": 0.000734, |
| "step": 368, |
| "tokens_trained": 0.034981304 |
| }, |
| { |
| "epoch": 0.1049645390070922, |
| "grad_norm": 7.392801761627197, |
| "loss": 6.039, |
| "lr": 0.000738, |
| "step": 370, |
| "tokens_trained": 0.03516956 |
| }, |
| { |
| "epoch": 0.10553191489361702, |
| "grad_norm": 9.427324295043945, |
| "loss": 6.084, |
| "lr": 0.000742, |
| "step": 372, |
| "tokens_trained": 0.035358816 |
| }, |
| { |
| "epoch": 0.10609929078014184, |
| "grad_norm": 7.168910503387451, |
| "loss": 6.0498, |
| "lr": 0.000746, |
| "step": 374, |
| "tokens_trained": 0.035548016 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "eval_loss": 6.038269996643066, |
| "eval_runtime": 21.3445, |
| "step": 375, |
| "tokens_trained": 0.035644104 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 7.899259567260742, |
| "loss": 6.0345, |
| "lr": 0.00075, |
| "step": 376, |
| "tokens_trained": 0.035739856 |
| }, |
| { |
| "epoch": 0.1072340425531915, |
| "grad_norm": 8.91533374786377, |
| "loss": 6.0386, |
| "lr": 0.000754, |
| "step": 378, |
| "tokens_trained": 0.035930264 |
| }, |
| { |
| "epoch": 0.10780141843971631, |
| "grad_norm": 6.998043060302734, |
| "loss": 6.0294, |
| "lr": 0.000758, |
| "step": 380, |
| "tokens_trained": 0.036119616 |
| }, |
| { |
| "epoch": 0.10836879432624114, |
| "grad_norm": 7.343894958496094, |
| "loss": 6.0116, |
| "lr": 0.000762, |
| "step": 382, |
| "tokens_trained": 0.036308416 |
| }, |
| { |
| "epoch": 0.10893617021276596, |
| "grad_norm": 8.182528495788574, |
| "loss": 5.9904, |
| "lr": 0.0007660000000000001, |
| "step": 384, |
| "tokens_trained": 0.036497264 |
| }, |
| { |
| "epoch": 0.10950354609929078, |
| "grad_norm": 7.927818775177002, |
| "loss": 6.0345, |
| "lr": 0.0007700000000000001, |
| "step": 386, |
| "tokens_trained": 0.036688192 |
| }, |
| { |
| "epoch": 0.1100709219858156, |
| "grad_norm": 8.07447338104248, |
| "loss": 5.9685, |
| "lr": 0.0007740000000000001, |
| "step": 388, |
| "tokens_trained": 0.036878256 |
| }, |
| { |
| "epoch": 0.11063829787234042, |
| "grad_norm": 7.281871318817139, |
| "loss": 6.0125, |
| "lr": 0.000778, |
| "step": 390, |
| "tokens_trained": 0.037068272 |
| }, |
| { |
| "epoch": 0.11120567375886525, |
| "grad_norm": 8.298929214477539, |
| "loss": 6.0071, |
| "lr": 0.000782, |
| "step": 392, |
| "tokens_trained": 0.037259464 |
| }, |
| { |
| "epoch": 0.11177304964539007, |
| "grad_norm": 7.546716690063477, |
| "loss": 5.9721, |
| "lr": 0.000786, |
| "step": 394, |
| "tokens_trained": 0.037449696 |
| }, |
| { |
| "epoch": 0.1123404255319149, |
| "grad_norm": 8.28548526763916, |
| "loss": 5.9819, |
| "lr": 0.00079, |
| "step": 396, |
| "tokens_trained": 0.037639672 |
| }, |
| { |
| "epoch": 0.11290780141843972, |
| "grad_norm": 7.064655303955078, |
| "loss": 5.9873, |
| "lr": 0.0007940000000000001, |
| "step": 398, |
| "tokens_trained": 0.03782712 |
| }, |
| { |
| "epoch": 0.11347517730496454, |
| "grad_norm": 7.743175506591797, |
| "loss": 5.9528, |
| "lr": 0.0007980000000000001, |
| "step": 400, |
| "tokens_trained": 0.03801792 |
| }, |
| { |
| "epoch": 0.11404255319148936, |
| "grad_norm": 7.00898551940918, |
| "loss": 5.9504, |
| "lr": 0.0008020000000000001, |
| "step": 402, |
| "tokens_trained": 0.038209176 |
| }, |
| { |
| "epoch": 0.11460992907801418, |
| "grad_norm": 7.9350409507751465, |
| "loss": 5.9555, |
| "lr": 0.0008060000000000001, |
| "step": 404, |
| "tokens_trained": 0.03839824 |
| }, |
| { |
| "epoch": 0.11517730496453901, |
| "grad_norm": 7.048569679260254, |
| "loss": 5.9787, |
| "lr": 0.0008100000000000001, |
| "step": 406, |
| "tokens_trained": 0.03858732 |
| }, |
| { |
| "epoch": 0.11574468085106383, |
| "grad_norm": 7.088194370269775, |
| "loss": 5.928, |
| "lr": 0.0008139999999999999, |
| "step": 408, |
| "tokens_trained": 0.038777712 |
| }, |
| { |
| "epoch": 0.11631205673758865, |
| "grad_norm": 8.230712890625, |
| "loss": 5.9716, |
| "lr": 0.0008179999999999999, |
| "step": 410, |
| "tokens_trained": 0.038969464 |
| }, |
| { |
| "epoch": 0.11687943262411347, |
| "grad_norm": 8.076972007751465, |
| "loss": 5.9624, |
| "lr": 0.0008219999999999999, |
| "step": 412, |
| "tokens_trained": 0.039162064 |
| }, |
| { |
| "epoch": 0.1174468085106383, |
| "grad_norm": 8.065289497375488, |
| "loss": 5.9937, |
| "lr": 0.000826, |
| "step": 414, |
| "tokens_trained": 0.039348688 |
| }, |
| { |
| "epoch": 0.11801418439716312, |
| "grad_norm": 6.393420696258545, |
| "loss": 5.9278, |
| "lr": 0.00083, |
| "step": 416, |
| "tokens_trained": 0.03953732 |
| }, |
| { |
| "epoch": 0.11858156028368794, |
| "grad_norm": 7.384702682495117, |
| "loss": 5.931, |
| "lr": 0.000834, |
| "step": 418, |
| "tokens_trained": 0.039729808 |
| }, |
| { |
| "epoch": 0.11914893617021277, |
| "grad_norm": 7.007425308227539, |
| "loss": 5.93, |
| "lr": 0.000838, |
| "step": 420, |
| "tokens_trained": 0.039921096 |
| }, |
| { |
| "epoch": 0.11971631205673759, |
| "grad_norm": 7.112692832946777, |
| "loss": 5.9625, |
| "lr": 0.000842, |
| "step": 422, |
| "tokens_trained": 0.040110856 |
| }, |
| { |
| "epoch": 0.12028368794326241, |
| "grad_norm": 8.484418869018555, |
| "loss": 5.9848, |
| "lr": 0.000846, |
| "step": 424, |
| "tokens_trained": 0.040300504 |
| }, |
| { |
| "epoch": 0.12085106382978723, |
| "grad_norm": 6.633459091186523, |
| "loss": 6.0226, |
| "lr": 0.00085, |
| "step": 426, |
| "tokens_trained": 0.04049056 |
| }, |
| { |
| "epoch": 0.12141843971631205, |
| "grad_norm": 7.796964168548584, |
| "loss": 5.9152, |
| "lr": 0.000854, |
| "step": 428, |
| "tokens_trained": 0.040680544 |
| }, |
| { |
| "epoch": 0.12198581560283688, |
| "grad_norm": 7.833578586578369, |
| "loss": 5.924, |
| "lr": 0.000858, |
| "step": 430, |
| "tokens_trained": 0.040873128 |
| }, |
| { |
| "epoch": 0.1225531914893617, |
| "grad_norm": 6.7470550537109375, |
| "loss": 5.9318, |
| "lr": 0.000862, |
| "step": 432, |
| "tokens_trained": 0.041063488 |
| }, |
| { |
| "epoch": 0.12312056737588653, |
| "grad_norm": 6.066318988800049, |
| "loss": 5.9569, |
| "lr": 0.000866, |
| "step": 434, |
| "tokens_trained": 0.041254368 |
| }, |
| { |
| "epoch": 0.12368794326241135, |
| "grad_norm": 6.753541469573975, |
| "loss": 5.8851, |
| "lr": 0.00087, |
| "step": 436, |
| "tokens_trained": 0.04144516 |
| }, |
| { |
| "epoch": 0.12425531914893617, |
| "grad_norm": 6.471331596374512, |
| "loss": 5.864, |
| "lr": 0.000874, |
| "step": 438, |
| "tokens_trained": 0.041636912 |
| }, |
| { |
| "epoch": 0.12482269503546099, |
| "grad_norm": 6.129056930541992, |
| "loss": 5.8965, |
| "lr": 0.000878, |
| "step": 440, |
| "tokens_trained": 0.041828104 |
| }, |
| { |
| "epoch": 0.1253900709219858, |
| "grad_norm": 6.478890895843506, |
| "loss": 5.8817, |
| "lr": 0.000882, |
| "step": 442, |
| "tokens_trained": 0.04201808 |
| }, |
| { |
| "epoch": 0.12595744680851065, |
| "grad_norm": 6.014713287353516, |
| "loss": 5.8268, |
| "lr": 0.0008860000000000001, |
| "step": 444, |
| "tokens_trained": 0.042207328 |
| }, |
| { |
| "epoch": 0.12652482269503545, |
| "grad_norm": 5.505755424499512, |
| "loss": 5.8684, |
| "lr": 0.0008900000000000001, |
| "step": 446, |
| "tokens_trained": 0.042398152 |
| }, |
| { |
| "epoch": 0.1270921985815603, |
| "grad_norm": 10.096606254577637, |
| "loss": 5.8608, |
| "lr": 0.000894, |
| "step": 448, |
| "tokens_trained": 0.042588984 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 6.388499736785889, |
| "loss": 5.8766, |
| "lr": 0.000898, |
| "step": 450, |
| "tokens_trained": 0.042778592 |
| }, |
| { |
| "epoch": 0.12822695035460993, |
| "grad_norm": 7.145125865936279, |
| "loss": 5.8571, |
| "lr": 0.000902, |
| "step": 452, |
| "tokens_trained": 0.042967176 |
| }, |
| { |
| "epoch": 0.12879432624113477, |
| "grad_norm": 6.826383113861084, |
| "loss": 5.8655, |
| "lr": 0.000906, |
| "step": 454, |
| "tokens_trained": 0.043158952 |
| }, |
| { |
| "epoch": 0.12936170212765957, |
| "grad_norm": 6.036892414093018, |
| "loss": 5.8775, |
| "lr": 0.00091, |
| "step": 456, |
| "tokens_trained": 0.043349288 |
| }, |
| { |
| "epoch": 0.1299290780141844, |
| "grad_norm": 6.36528205871582, |
| "loss": 5.8908, |
| "lr": 0.0009140000000000001, |
| "step": 458, |
| "tokens_trained": 0.043539888 |
| }, |
| { |
| "epoch": 0.13049645390070921, |
| "grad_norm": 6.317558288574219, |
| "loss": 5.8702, |
| "lr": 0.0009180000000000001, |
| "step": 460, |
| "tokens_trained": 0.04373232 |
| }, |
| { |
| "epoch": 0.13106382978723405, |
| "grad_norm": 6.427131175994873, |
| "loss": 5.8399, |
| "lr": 0.0009220000000000001, |
| "step": 462, |
| "tokens_trained": 0.043922744 |
| }, |
| { |
| "epoch": 0.13163120567375886, |
| "grad_norm": 5.666539669036865, |
| "loss": 5.7899, |
| "lr": 0.0009260000000000001, |
| "step": 464, |
| "tokens_trained": 0.044112888 |
| }, |
| { |
| "epoch": 0.1321985815602837, |
| "grad_norm": 5.241824150085449, |
| "loss": 5.8203, |
| "lr": 0.00093, |
| "step": 466, |
| "tokens_trained": 0.04430244 |
| }, |
| { |
| "epoch": 0.1327659574468085, |
| "grad_norm": 6.072646141052246, |
| "loss": 5.8367, |
| "lr": 0.000934, |
| "step": 468, |
| "tokens_trained": 0.044493528 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 6.414418697357178, |
| "loss": 5.8236, |
| "lr": 0.0009379999999999999, |
| "step": 470, |
| "tokens_trained": 0.044682328 |
| }, |
| { |
| "epoch": 0.13390070921985817, |
| "grad_norm": 6.958801746368408, |
| "loss": 5.8179, |
| "lr": 0.000942, |
| "step": 472, |
| "tokens_trained": 0.044874256 |
| }, |
| { |
| "epoch": 0.13446808510638297, |
| "grad_norm": 5.787843227386475, |
| "loss": 5.8478, |
| "lr": 0.000946, |
| "step": 474, |
| "tokens_trained": 0.045065616 |
| }, |
| { |
| "epoch": 0.1350354609929078, |
| "grad_norm": 5.5841240882873535, |
| "loss": 5.8307, |
| "lr": 0.00095, |
| "step": 476, |
| "tokens_trained": 0.045257024 |
| }, |
| { |
| "epoch": 0.13560283687943261, |
| "grad_norm": 6.607712745666504, |
| "loss": 5.8512, |
| "lr": 0.000954, |
| "step": 478, |
| "tokens_trained": 0.045446432 |
| }, |
| { |
| "epoch": 0.13617021276595745, |
| "grad_norm": 5.473597049713135, |
| "loss": 5.8174, |
| "lr": 0.000958, |
| "step": 480, |
| "tokens_trained": 0.045636392 |
| }, |
| { |
| "epoch": 0.13673758865248226, |
| "grad_norm": 5.435728549957275, |
| "loss": 5.8308, |
| "lr": 0.000962, |
| "step": 482, |
| "tokens_trained": 0.045823784 |
| }, |
| { |
| "epoch": 0.1373049645390071, |
| "grad_norm": 6.049300670623779, |
| "loss": 5.8293, |
| "lr": 0.000966, |
| "step": 484, |
| "tokens_trained": 0.046013408 |
| }, |
| { |
| "epoch": 0.13787234042553193, |
| "grad_norm": 6.311764717102051, |
| "loss": 5.8086, |
| "lr": 0.0009699999999999999, |
| "step": 486, |
| "tokens_trained": 0.046202528 |
| }, |
| { |
| "epoch": 0.13843971631205673, |
| "grad_norm": 5.886009216308594, |
| "loss": 5.7986, |
| "lr": 0.000974, |
| "step": 488, |
| "tokens_trained": 0.04639404 |
| }, |
| { |
| "epoch": 0.13900709219858157, |
| "grad_norm": 5.438202381134033, |
| "loss": 5.8473, |
| "lr": 0.000978, |
| "step": 490, |
| "tokens_trained": 0.046586512 |
| }, |
| { |
| "epoch": 0.13957446808510637, |
| "grad_norm": 5.08393669128418, |
| "loss": 5.7613, |
| "lr": 0.000982, |
| "step": 492, |
| "tokens_trained": 0.046777448 |
| }, |
| { |
| "epoch": 0.1401418439716312, |
| "grad_norm": 5.645389080047607, |
| "loss": 5.7723, |
| "lr": 0.0009860000000000001, |
| "step": 494, |
| "tokens_trained": 0.046966096 |
| }, |
| { |
| "epoch": 0.14070921985815601, |
| "grad_norm": 6.320916652679443, |
| "loss": 5.7772, |
| "lr": 0.00099, |
| "step": 496, |
| "tokens_trained": 0.047155152 |
| }, |
| { |
| "epoch": 0.14127659574468085, |
| "grad_norm": 5.573540210723877, |
| "loss": 5.7412, |
| "lr": 0.000994, |
| "step": 498, |
| "tokens_trained": 0.047345352 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "grad_norm": 4.939594745635986, |
| "loss": 5.8208, |
| "lr": 0.000998, |
| "step": 500, |
| "tokens_trained": 0.047535016 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "eval_loss": 5.799490928649902, |
| "eval_runtime": 20.8575, |
| "step": 500, |
| "tokens_trained": 0.047535016 |
| }, |
| { |
| "epoch": 0.1424113475177305, |
| "grad_norm": 5.805343151092529, |
| "loss": 5.7734, |
| "lr": 0.00099986013986014, |
| "step": 502, |
| "tokens_trained": 0.047724216 |
| }, |
| { |
| "epoch": 0.14297872340425533, |
| "grad_norm": 5.831176280975342, |
| "loss": 5.8044, |
| "lr": 0.0009995804195804196, |
| "step": 504, |
| "tokens_trained": 0.047914328 |
| }, |
| { |
| "epoch": 0.14354609929078013, |
| "grad_norm": 5.045091152191162, |
| "loss": 5.8133, |
| "lr": 0.0009993006993006994, |
| "step": 506, |
| "tokens_trained": 0.048105032 |
| }, |
| { |
| "epoch": 0.14411347517730497, |
| "grad_norm": 5.276819705963135, |
| "loss": 5.7555, |
| "lr": 0.000999020979020979, |
| "step": 508, |
| "tokens_trained": 0.048293104 |
| }, |
| { |
| "epoch": 0.14468085106382977, |
| "grad_norm": 5.710324287414551, |
| "loss": 5.7619, |
| "lr": 0.0009987412587412587, |
| "step": 510, |
| "tokens_trained": 0.048483888 |
| }, |
| { |
| "epoch": 0.1452482269503546, |
| "grad_norm": 4.9472527503967285, |
| "loss": 5.767, |
| "lr": 0.0009984615384615386, |
| "step": 512, |
| "tokens_trained": 0.04867336 |
| }, |
| { |
| "epoch": 0.14581560283687944, |
| "grad_norm": 5.410078525543213, |
| "loss": 5.7238, |
| "lr": 0.0009981818181818182, |
| "step": 514, |
| "tokens_trained": 0.048863104 |
| }, |
| { |
| "epoch": 0.14638297872340425, |
| "grad_norm": 6.025843143463135, |
| "loss": 5.7664, |
| "lr": 0.000997902097902098, |
| "step": 516, |
| "tokens_trained": 0.049053856 |
| }, |
| { |
| "epoch": 0.14695035460992908, |
| "grad_norm": 5.3211669921875, |
| "loss": 5.747, |
| "lr": 0.0009976223776223777, |
| "step": 518, |
| "tokens_trained": 0.049245104 |
| }, |
| { |
| "epoch": 0.1475177304964539, |
| "grad_norm": 6.059483051300049, |
| "loss": 5.7611, |
| "lr": 0.0009973426573426573, |
| "step": 520, |
| "tokens_trained": 0.049434368 |
| }, |
| { |
| "epoch": 0.14808510638297873, |
| "grad_norm": 5.362505912780762, |
| "loss": 5.7607, |
| "lr": 0.000997062937062937, |
| "step": 522, |
| "tokens_trained": 0.049622648 |
| }, |
| { |
| "epoch": 0.14865248226950353, |
| "grad_norm": 5.391371726989746, |
| "loss": 5.7857, |
| "lr": 0.0009967832167832168, |
| "step": 524, |
| "tokens_trained": 0.049812304 |
| }, |
| { |
| "epoch": 0.14921985815602837, |
| "grad_norm": 4.3839030265808105, |
| "loss": 5.7334, |
| "lr": 0.0009965034965034964, |
| "step": 526, |
| "tokens_trained": 0.05000356 |
| }, |
| { |
| "epoch": 0.1497872340425532, |
| "grad_norm": 5.008530616760254, |
| "loss": 5.7475, |
| "lr": 0.0009962237762237763, |
| "step": 528, |
| "tokens_trained": 0.050193304 |
| }, |
| { |
| "epoch": 0.150354609929078, |
| "grad_norm": 5.068671226501465, |
| "loss": 5.7866, |
| "lr": 0.000995944055944056, |
| "step": 530, |
| "tokens_trained": 0.050382856 |
| }, |
| { |
| "epoch": 0.15092198581560284, |
| "grad_norm": 5.399240493774414, |
| "loss": 5.6857, |
| "lr": 0.0009956643356643356, |
| "step": 532, |
| "tokens_trained": 0.050570864 |
| }, |
| { |
| "epoch": 0.15148936170212765, |
| "grad_norm": 5.689481735229492, |
| "loss": 5.7586, |
| "lr": 0.0009953846153846154, |
| "step": 534, |
| "tokens_trained": 0.050760384 |
| }, |
| { |
| "epoch": 0.15205673758865249, |
| "grad_norm": 4.652275562286377, |
| "loss": 5.7866, |
| "lr": 0.000995104895104895, |
| "step": 536, |
| "tokens_trained": 0.050952712 |
| }, |
| { |
| "epoch": 0.1526241134751773, |
| "grad_norm": 4.126920223236084, |
| "loss": 5.7261, |
| "lr": 0.000994825174825175, |
| "step": 538, |
| "tokens_trained": 0.051141656 |
| }, |
| { |
| "epoch": 0.15319148936170213, |
| "grad_norm": 4.233098030090332, |
| "loss": 5.6903, |
| "lr": 0.0009945454545454546, |
| "step": 540, |
| "tokens_trained": 0.051331256 |
| }, |
| { |
| "epoch": 0.15375886524822696, |
| "grad_norm": 4.271973133087158, |
| "loss": 5.7293, |
| "lr": 0.0009942657342657344, |
| "step": 542, |
| "tokens_trained": 0.051522072 |
| }, |
| { |
| "epoch": 0.15432624113475177, |
| "grad_norm": 4.653008937835693, |
| "loss": 5.7133, |
| "lr": 0.000993986013986014, |
| "step": 544, |
| "tokens_trained": 0.051711624 |
| }, |
| { |
| "epoch": 0.1548936170212766, |
| "grad_norm": 4.192624092102051, |
| "loss": 5.6876, |
| "lr": 0.0009937062937062937, |
| "step": 546, |
| "tokens_trained": 0.051901744 |
| }, |
| { |
| "epoch": 0.1554609929078014, |
| "grad_norm": 5.497848033905029, |
| "loss": 5.7378, |
| "lr": 0.0009934265734265735, |
| "step": 548, |
| "tokens_trained": 0.052092872 |
| }, |
| { |
| "epoch": 0.15602836879432624, |
| "grad_norm": 4.350259780883789, |
| "loss": 5.6533, |
| "lr": 0.0009931468531468532, |
| "step": 550, |
| "tokens_trained": 0.052281768 |
| }, |
| { |
| "epoch": 0.15659574468085105, |
| "grad_norm": 4.515641689300537, |
| "loss": 5.7492, |
| "lr": 0.000992867132867133, |
| "step": 552, |
| "tokens_trained": 0.052471848 |
| }, |
| { |
| "epoch": 0.15716312056737589, |
| "grad_norm": 4.628066539764404, |
| "loss": 5.7113, |
| "lr": 0.0009925874125874127, |
| "step": 554, |
| "tokens_trained": 0.052660168 |
| }, |
| { |
| "epoch": 0.15773049645390072, |
| "grad_norm": 4.8322930335998535, |
| "loss": 5.6696, |
| "lr": 0.0009923076923076923, |
| "step": 556, |
| "tokens_trained": 0.05284776 |
| }, |
| { |
| "epoch": 0.15829787234042553, |
| "grad_norm": 3.999706506729126, |
| "loss": 5.7296, |
| "lr": 0.000992027972027972, |
| "step": 558, |
| "tokens_trained": 0.053037344 |
| }, |
| { |
| "epoch": 0.15886524822695036, |
| "grad_norm": 4.332971572875977, |
| "loss": 5.7362, |
| "lr": 0.0009917482517482518, |
| "step": 560, |
| "tokens_trained": 0.053228168 |
| }, |
| { |
| "epoch": 0.15943262411347517, |
| "grad_norm": 4.500301361083984, |
| "loss": 5.6982, |
| "lr": 0.0009914685314685314, |
| "step": 562, |
| "tokens_trained": 0.05341856 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 4.721808910369873, |
| "loss": 5.7166, |
| "lr": 0.0009911888111888113, |
| "step": 564, |
| "tokens_trained": 0.053608824 |
| }, |
| { |
| "epoch": 0.1605673758865248, |
| "grad_norm": 5.265316009521484, |
| "loss": 5.7069, |
| "lr": 0.000990909090909091, |
| "step": 566, |
| "tokens_trained": 0.053799728 |
| }, |
| { |
| "epoch": 0.16113475177304964, |
| "grad_norm": 5.024131774902344, |
| "loss": 5.7113, |
| "lr": 0.0009906293706293705, |
| "step": 568, |
| "tokens_trained": 0.05398944 |
| }, |
| { |
| "epoch": 0.16170212765957448, |
| "grad_norm": 4.063276767730713, |
| "loss": 5.6251, |
| "lr": 0.0009903496503496504, |
| "step": 570, |
| "tokens_trained": 0.054176512 |
| }, |
| { |
| "epoch": 0.1622695035460993, |
| "grad_norm": 4.15974760055542, |
| "loss": 5.6912, |
| "lr": 0.00099006993006993, |
| "step": 572, |
| "tokens_trained": 0.054367072 |
| }, |
| { |
| "epoch": 0.16283687943262412, |
| "grad_norm": 4.338894844055176, |
| "loss": 5.6807, |
| "lr": 0.0009897902097902099, |
| "step": 574, |
| "tokens_trained": 0.054559184 |
| }, |
| { |
| "epoch": 0.16340425531914893, |
| "grad_norm": 5.535487174987793, |
| "loss": 5.6765, |
| "lr": 0.0009895104895104895, |
| "step": 576, |
| "tokens_trained": 0.054748904 |
| }, |
| { |
| "epoch": 0.16397163120567376, |
| "grad_norm": 4.379040241241455, |
| "loss": 5.6884, |
| "lr": 0.0009892307692307694, |
| "step": 578, |
| "tokens_trained": 0.054936136 |
| }, |
| { |
| "epoch": 0.16453900709219857, |
| "grad_norm": 4.746179103851318, |
| "loss": 5.6885, |
| "lr": 0.000988951048951049, |
| "step": 580, |
| "tokens_trained": 0.055125584 |
| }, |
| { |
| "epoch": 0.1651063829787234, |
| "grad_norm": 4.949806213378906, |
| "loss": 5.7061, |
| "lr": 0.0009886713286713286, |
| "step": 582, |
| "tokens_trained": 0.055314608 |
| }, |
| { |
| "epoch": 0.16567375886524824, |
| "grad_norm": 4.507448196411133, |
| "loss": 5.6339, |
| "lr": 0.0009883916083916085, |
| "step": 584, |
| "tokens_trained": 0.055503992 |
| }, |
| { |
| "epoch": 0.16624113475177305, |
| "grad_norm": 4.131013870239258, |
| "loss": 5.7122, |
| "lr": 0.0009881118881118881, |
| "step": 586, |
| "tokens_trained": 0.055693376 |
| }, |
| { |
| "epoch": 0.16680851063829788, |
| "grad_norm": 5.32897424697876, |
| "loss": 5.7192, |
| "lr": 0.000987832167832168, |
| "step": 588, |
| "tokens_trained": 0.05588452 |
| }, |
| { |
| "epoch": 0.1673758865248227, |
| "grad_norm": 4.166877746582031, |
| "loss": 5.6666, |
| "lr": 0.0009875524475524476, |
| "step": 590, |
| "tokens_trained": 0.056073936 |
| }, |
| { |
| "epoch": 0.16794326241134752, |
| "grad_norm": 4.393389701843262, |
| "loss": 5.6113, |
| "lr": 0.0009872727272727273, |
| "step": 592, |
| "tokens_trained": 0.056262224 |
| }, |
| { |
| "epoch": 0.16851063829787233, |
| "grad_norm": 4.466696739196777, |
| "loss": 5.6466, |
| "lr": 0.000986993006993007, |
| "step": 594, |
| "tokens_trained": 0.056454008 |
| }, |
| { |
| "epoch": 0.16907801418439716, |
| "grad_norm": 3.9413373470306396, |
| "loss": 5.6838, |
| "lr": 0.0009867132867132867, |
| "step": 596, |
| "tokens_trained": 0.05664444 |
| }, |
| { |
| "epoch": 0.169645390070922, |
| "grad_norm": 3.594649314880371, |
| "loss": 5.6684, |
| "lr": 0.0009864335664335664, |
| "step": 598, |
| "tokens_trained": 0.056833864 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 3.5969483852386475, |
| "loss": 5.6619, |
| "lr": 0.0009861538461538462, |
| "step": 600, |
| "tokens_trained": 0.05702332 |
| }, |
| { |
| "epoch": 0.17078014184397164, |
| "grad_norm": 3.845414638519287, |
| "loss": 5.5855, |
| "lr": 0.0009858741258741259, |
| "step": 602, |
| "tokens_trained": 0.057212776 |
| }, |
| { |
| "epoch": 0.17134751773049645, |
| "grad_norm": 3.9198834896087646, |
| "loss": 5.6551, |
| "lr": 0.0009855944055944055, |
| "step": 604, |
| "tokens_trained": 0.05740152 |
| }, |
| { |
| "epoch": 0.17191489361702128, |
| "grad_norm": 3.6764986515045166, |
| "loss": 5.6228, |
| "lr": 0.0009853146853146854, |
| "step": 606, |
| "tokens_trained": 0.057595616 |
| }, |
| { |
| "epoch": 0.1724822695035461, |
| "grad_norm": 3.8210043907165527, |
| "loss": 5.6557, |
| "lr": 0.000985034965034965, |
| "step": 608, |
| "tokens_trained": 0.057783968 |
| }, |
| { |
| "epoch": 0.17304964539007092, |
| "grad_norm": 3.893644094467163, |
| "loss": 5.6675, |
| "lr": 0.0009847552447552449, |
| "step": 610, |
| "tokens_trained": 0.057974832 |
| }, |
| { |
| "epoch": 0.17361702127659576, |
| "grad_norm": 3.280839681625366, |
| "loss": 5.6442, |
| "lr": 0.0009844755244755245, |
| "step": 612, |
| "tokens_trained": 0.058166272 |
| }, |
| { |
| "epoch": 0.17418439716312056, |
| "grad_norm": 3.4350404739379883, |
| "loss": 5.6555, |
| "lr": 0.0009841958041958043, |
| "step": 614, |
| "tokens_trained": 0.058356008 |
| }, |
| { |
| "epoch": 0.1747517730496454, |
| "grad_norm": 3.7700448036193848, |
| "loss": 5.6138, |
| "lr": 0.000983916083916084, |
| "step": 616, |
| "tokens_trained": 0.058546792 |
| }, |
| { |
| "epoch": 0.1753191489361702, |
| "grad_norm": 3.8182730674743652, |
| "loss": 5.6931, |
| "lr": 0.0009836363636363636, |
| "step": 618, |
| "tokens_trained": 0.058736296 |
| }, |
| { |
| "epoch": 0.17588652482269504, |
| "grad_norm": 3.9105372428894043, |
| "loss": 5.6431, |
| "lr": 0.0009833566433566435, |
| "step": 620, |
| "tokens_trained": 0.058927576 |
| }, |
| { |
| "epoch": 0.17645390070921985, |
| "grad_norm": 3.8897712230682373, |
| "loss": 5.6203, |
| "lr": 0.000983076923076923, |
| "step": 622, |
| "tokens_trained": 0.059118416 |
| }, |
| { |
| "epoch": 0.17702127659574468, |
| "grad_norm": 3.512194871902466, |
| "loss": 5.6292, |
| "lr": 0.000982797202797203, |
| "step": 624, |
| "tokens_trained": 0.059308568 |
| }, |
| { |
| "epoch": 0.1773049645390071, |
| "eval_loss": 5.630118370056152, |
| "eval_runtime": 21.1591, |
| "step": 625, |
| "tokens_trained": 0.059404056 |
| }, |
| { |
| "epoch": 0.17758865248226952, |
| "grad_norm": 2.990100383758545, |
| "loss": 5.622, |
| "lr": 0.0009825174825174826, |
| "step": 626, |
| "tokens_trained": 0.059499776 |
| }, |
| { |
| "epoch": 0.17815602836879432, |
| "grad_norm": 3.0487334728240967, |
| "loss": 5.6629, |
| "lr": 0.0009822377622377622, |
| "step": 628, |
| "tokens_trained": 0.059690208 |
| }, |
| { |
| "epoch": 0.17872340425531916, |
| "grad_norm": 3.6905510425567627, |
| "loss": 5.6345, |
| "lr": 0.0009819580419580419, |
| "step": 630, |
| "tokens_trained": 0.059881352 |
| }, |
| { |
| "epoch": 0.17929078014184396, |
| "grad_norm": 3.302255630493164, |
| "loss": 5.6733, |
| "lr": 0.0009816783216783217, |
| "step": 632, |
| "tokens_trained": 0.060071896 |
| }, |
| { |
| "epoch": 0.1798581560283688, |
| "grad_norm": 3.6833834648132324, |
| "loss": 5.5868, |
| "lr": 0.0009813986013986014, |
| "step": 634, |
| "tokens_trained": 0.060260504 |
| }, |
| { |
| "epoch": 0.1804255319148936, |
| "grad_norm": 3.1528804302215576, |
| "loss": 5.6128, |
| "lr": 0.0009811188811188812, |
| "step": 636, |
| "tokens_trained": 0.060450584 |
| }, |
| { |
| "epoch": 0.18099290780141844, |
| "grad_norm": 3.788860559463501, |
| "loss": 5.6235, |
| "lr": 0.0009808391608391608, |
| "step": 638, |
| "tokens_trained": 0.060640872 |
| }, |
| { |
| "epoch": 0.18156028368794327, |
| "grad_norm": 3.192462682723999, |
| "loss": 5.545, |
| "lr": 0.0009805594405594405, |
| "step": 640, |
| "tokens_trained": 0.060832776 |
| }, |
| { |
| "epoch": 0.18212765957446808, |
| "grad_norm": 3.505732774734497, |
| "loss": 5.5801, |
| "lr": 0.0009802797202797203, |
| "step": 642, |
| "tokens_trained": 0.06102204 |
| }, |
| { |
| "epoch": 0.18269503546099292, |
| "grad_norm": 3.9589102268218994, |
| "loss": 5.6091, |
| "lr": 0.00098, |
| "step": 644, |
| "tokens_trained": 0.061209744 |
| }, |
| { |
| "epoch": 0.18326241134751772, |
| "grad_norm": 3.4410059452056885, |
| "loss": 5.6279, |
| "lr": 0.0009797202797202798, |
| "step": 646, |
| "tokens_trained": 0.061400392 |
| }, |
| { |
| "epoch": 0.18382978723404256, |
| "grad_norm": 3.7746005058288574, |
| "loss": 5.6124, |
| "lr": 0.0009794405594405595, |
| "step": 648, |
| "tokens_trained": 0.061592232 |
| }, |
| { |
| "epoch": 0.18439716312056736, |
| "grad_norm": 3.75022292137146, |
| "loss": 5.5826, |
| "lr": 0.000979160839160839, |
| "step": 650, |
| "tokens_trained": 0.061781824 |
| }, |
| { |
| "epoch": 0.1849645390070922, |
| "grad_norm": 3.7629313468933105, |
| "loss": 5.555, |
| "lr": 0.000978881118881119, |
| "step": 652, |
| "tokens_trained": 0.061972744 |
| }, |
| { |
| "epoch": 0.18553191489361703, |
| "grad_norm": 4.5046820640563965, |
| "loss": 5.5972, |
| "lr": 0.0009786013986013986, |
| "step": 654, |
| "tokens_trained": 0.062163456 |
| }, |
| { |
| "epoch": 0.18609929078014184, |
| "grad_norm": 3.443138599395752, |
| "loss": 5.6061, |
| "lr": 0.0009783216783216782, |
| "step": 656, |
| "tokens_trained": 0.06235208 |
| }, |
| { |
| "epoch": 0.18666666666666668, |
| "grad_norm": 3.2661828994750977, |
| "loss": 5.5479, |
| "lr": 0.000978041958041958, |
| "step": 658, |
| "tokens_trained": 0.062544416 |
| }, |
| { |
| "epoch": 0.18723404255319148, |
| "grad_norm": 3.9571003913879395, |
| "loss": 5.6069, |
| "lr": 0.000977762237762238, |
| "step": 660, |
| "tokens_trained": 0.062733992 |
| }, |
| { |
| "epoch": 0.18780141843971632, |
| "grad_norm": 3.705880641937256, |
| "loss": 5.5915, |
| "lr": 0.0009774825174825176, |
| "step": 662, |
| "tokens_trained": 0.062922536 |
| }, |
| { |
| "epoch": 0.18836879432624112, |
| "grad_norm": 4.066433429718018, |
| "loss": 5.6031, |
| "lr": 0.0009772027972027972, |
| "step": 664, |
| "tokens_trained": 0.063114224 |
| }, |
| { |
| "epoch": 0.18893617021276596, |
| "grad_norm": 3.356651782989502, |
| "loss": 5.6045, |
| "lr": 0.0009769230769230768, |
| "step": 666, |
| "tokens_trained": 0.063304616 |
| }, |
| { |
| "epoch": 0.1895035460992908, |
| "grad_norm": 3.8084938526153564, |
| "loss": 5.6138, |
| "lr": 0.0009766433566433567, |
| "step": 668, |
| "tokens_trained": 0.06349476 |
| }, |
| { |
| "epoch": 0.1900709219858156, |
| "grad_norm": 4.282619953155518, |
| "loss": 5.5704, |
| "lr": 0.0009763636363636363, |
| "step": 670, |
| "tokens_trained": 0.063684848 |
| }, |
| { |
| "epoch": 0.19063829787234043, |
| "grad_norm": 3.045057773590088, |
| "loss": 5.6427, |
| "lr": 0.0009760839160839161, |
| "step": 672, |
| "tokens_trained": 0.063875192 |
| }, |
| { |
| "epoch": 0.19120567375886524, |
| "grad_norm": 3.360164165496826, |
| "loss": 5.5778, |
| "lr": 0.0009758041958041958, |
| "step": 674, |
| "tokens_trained": 0.06406636 |
| }, |
| { |
| "epoch": 0.19177304964539008, |
| "grad_norm": 3.5778472423553467, |
| "loss": 5.5389, |
| "lr": 0.0009755244755244756, |
| "step": 676, |
| "tokens_trained": 0.064254376 |
| }, |
| { |
| "epoch": 0.19234042553191488, |
| "grad_norm": 3.34869384765625, |
| "loss": 5.5894, |
| "lr": 0.0009752447552447553, |
| "step": 678, |
| "tokens_trained": 0.0644448 |
| }, |
| { |
| "epoch": 0.19290780141843972, |
| "grad_norm": 3.083582878112793, |
| "loss": 5.5776, |
| "lr": 0.0009749650349650349, |
| "step": 680, |
| "tokens_trained": 0.064633712 |
| }, |
| { |
| "epoch": 0.19347517730496455, |
| "grad_norm": 3.345973491668701, |
| "loss": 5.5987, |
| "lr": 0.0009746853146853148, |
| "step": 682, |
| "tokens_trained": 0.064824808 |
| }, |
| { |
| "epoch": 0.19404255319148936, |
| "grad_norm": 3.9262702465057373, |
| "loss": 5.64, |
| "lr": 0.0009744055944055944, |
| "step": 684, |
| "tokens_trained": 0.065016224 |
| }, |
| { |
| "epoch": 0.1946099290780142, |
| "grad_norm": 3.298543930053711, |
| "loss": 5.587, |
| "lr": 0.0009741258741258742, |
| "step": 686, |
| "tokens_trained": 0.065204216 |
| }, |
| { |
| "epoch": 0.195177304964539, |
| "grad_norm": 3.118626832962036, |
| "loss": 5.5864, |
| "lr": 0.0009738461538461538, |
| "step": 688, |
| "tokens_trained": 0.065393256 |
| }, |
| { |
| "epoch": 0.19574468085106383, |
| "grad_norm": 2.983548402786255, |
| "loss": 5.5506, |
| "lr": 0.0009735664335664336, |
| "step": 690, |
| "tokens_trained": 0.06558324 |
| }, |
| { |
| "epoch": 0.19631205673758864, |
| "grad_norm": 3.5204527378082275, |
| "loss": 5.5336, |
| "lr": 0.0009732867132867133, |
| "step": 692, |
| "tokens_trained": 0.065775624 |
| }, |
| { |
| "epoch": 0.19687943262411348, |
| "grad_norm": 3.138550281524658, |
| "loss": 5.5677, |
| "lr": 0.000973006993006993, |
| "step": 694, |
| "tokens_trained": 0.0659666 |
| }, |
| { |
| "epoch": 0.1974468085106383, |
| "grad_norm": 3.0961053371429443, |
| "loss": 5.5714, |
| "lr": 0.0009727272727272728, |
| "step": 696, |
| "tokens_trained": 0.066155512 |
| }, |
| { |
| "epoch": 0.19801418439716312, |
| "grad_norm": 3.4929685592651367, |
| "loss": 5.5829, |
| "lr": 0.0009724475524475524, |
| "step": 698, |
| "tokens_trained": 0.06634576 |
| }, |
| { |
| "epoch": 0.19858156028368795, |
| "grad_norm": 3.1820616722106934, |
| "loss": 5.6108, |
| "lr": 0.0009721678321678323, |
| "step": 700, |
| "tokens_trained": 0.066537016 |
| }, |
| { |
| "epoch": 0.19914893617021276, |
| "grad_norm": 3.4244654178619385, |
| "loss": 5.6025, |
| "lr": 0.0009718881118881119, |
| "step": 702, |
| "tokens_trained": 0.066727856 |
| }, |
| { |
| "epoch": 0.1997163120567376, |
| "grad_norm": 3.258605480194092, |
| "loss": 5.5581, |
| "lr": 0.0009716083916083917, |
| "step": 704, |
| "tokens_trained": 0.066916672 |
| }, |
| { |
| "epoch": 0.2002836879432624, |
| "grad_norm": 2.7159688472747803, |
| "loss": 5.5478, |
| "lr": 0.0009713286713286713, |
| "step": 706, |
| "tokens_trained": 0.067107704 |
| }, |
| { |
| "epoch": 0.20085106382978724, |
| "grad_norm": 3.1941912174224854, |
| "loss": 5.6126, |
| "lr": 0.000971048951048951, |
| "step": 708, |
| "tokens_trained": 0.067297896 |
| }, |
| { |
| "epoch": 0.20141843971631207, |
| "grad_norm": 3.20470929145813, |
| "loss": 5.5628, |
| "lr": 0.0009707692307692308, |
| "step": 710, |
| "tokens_trained": 0.06748608 |
| }, |
| { |
| "epoch": 0.20198581560283688, |
| "grad_norm": 3.6400153636932373, |
| "loss": 5.5758, |
| "lr": 0.0009704895104895105, |
| "step": 712, |
| "tokens_trained": 0.0676766 |
| }, |
| { |
| "epoch": 0.2025531914893617, |
| "grad_norm": 2.881639003753662, |
| "loss": 5.5512, |
| "lr": 0.0009702097902097903, |
| "step": 714, |
| "tokens_trained": 0.067865848 |
| }, |
| { |
| "epoch": 0.20312056737588652, |
| "grad_norm": 3.1113905906677246, |
| "loss": 5.5396, |
| "lr": 0.0009699300699300699, |
| "step": 716, |
| "tokens_trained": 0.068055368 |
| }, |
| { |
| "epoch": 0.20368794326241135, |
| "grad_norm": 3.135014772415161, |
| "loss": 5.5763, |
| "lr": 0.0009696503496503498, |
| "step": 718, |
| "tokens_trained": 0.068248544 |
| }, |
| { |
| "epoch": 0.20425531914893616, |
| "grad_norm": 3.1870718002319336, |
| "loss": 5.5903, |
| "lr": 0.0009693706293706294, |
| "step": 720, |
| "tokens_trained": 0.068436944 |
| }, |
| { |
| "epoch": 0.204822695035461, |
| "grad_norm": 3.125596523284912, |
| "loss": 5.6033, |
| "lr": 0.0009690909090909091, |
| "step": 722, |
| "tokens_trained": 0.06862548 |
| }, |
| { |
| "epoch": 0.20539007092198583, |
| "grad_norm": 2.897671699523926, |
| "loss": 5.5946, |
| "lr": 0.0009688111888111888, |
| "step": 724, |
| "tokens_trained": 0.068815232 |
| }, |
| { |
| "epoch": 0.20595744680851064, |
| "grad_norm": 2.855313539505005, |
| "loss": 5.5731, |
| "lr": 0.0009685314685314685, |
| "step": 726, |
| "tokens_trained": 0.06900692 |
| }, |
| { |
| "epoch": 0.20652482269503547, |
| "grad_norm": 2.7760672569274902, |
| "loss": 5.4949, |
| "lr": 0.0009682517482517483, |
| "step": 728, |
| "tokens_trained": 0.069195376 |
| }, |
| { |
| "epoch": 0.20709219858156028, |
| "grad_norm": 2.9300007820129395, |
| "loss": 5.5491, |
| "lr": 0.000967972027972028, |
| "step": 730, |
| "tokens_trained": 0.069385512 |
| }, |
| { |
| "epoch": 0.2076595744680851, |
| "grad_norm": 3.299860954284668, |
| "loss": 5.5405, |
| "lr": 0.0009676923076923078, |
| "step": 732, |
| "tokens_trained": 0.069573304 |
| }, |
| { |
| "epoch": 0.20822695035460992, |
| "grad_norm": 3.300189256668091, |
| "loss": 5.5797, |
| "lr": 0.0009674125874125874, |
| "step": 734, |
| "tokens_trained": 0.069764248 |
| }, |
| { |
| "epoch": 0.20879432624113475, |
| "grad_norm": 2.932995557785034, |
| "loss": 5.5556, |
| "lr": 0.0009671328671328672, |
| "step": 736, |
| "tokens_trained": 0.06995496 |
| }, |
| { |
| "epoch": 0.2093617021276596, |
| "grad_norm": 2.6711719036102295, |
| "loss": 5.48, |
| "lr": 0.0009668531468531469, |
| "step": 738, |
| "tokens_trained": 0.070142776 |
| }, |
| { |
| "epoch": 0.2099290780141844, |
| "grad_norm": 2.833314895629883, |
| "loss": 5.542, |
| "lr": 0.0009665734265734266, |
| "step": 740, |
| "tokens_trained": 0.070332064 |
| }, |
| { |
| "epoch": 0.21049645390070923, |
| "grad_norm": 2.899843215942383, |
| "loss": 5.5649, |
| "lr": 0.0009662937062937063, |
| "step": 742, |
| "tokens_trained": 0.070523448 |
| }, |
| { |
| "epoch": 0.21106382978723404, |
| "grad_norm": 2.96528697013855, |
| "loss": 5.5277, |
| "lr": 0.000966013986013986, |
| "step": 744, |
| "tokens_trained": 0.070713768 |
| }, |
| { |
| "epoch": 0.21163120567375887, |
| "grad_norm": 2.921109437942505, |
| "loss": 5.5646, |
| "lr": 0.0009657342657342657, |
| "step": 746, |
| "tokens_trained": 0.070905704 |
| }, |
| { |
| "epoch": 0.21219858156028368, |
| "grad_norm": 3.2725329399108887, |
| "loss": 5.4786, |
| "lr": 0.0009654545454545455, |
| "step": 748, |
| "tokens_trained": 0.071096008 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 2.8296804428100586, |
| "loss": 5.573, |
| "lr": 0.0009651748251748252, |
| "step": 750, |
| "tokens_trained": 0.07128828 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "eval_loss": 5.535472869873047, |
| "eval_runtime": 21.0109, |
| "step": 750, |
| "tokens_trained": 0.07128828 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 3.0509591102600098, |
| "loss": 5.6037, |
| "lr": 0.0009648951048951049, |
| "step": 752, |
| "tokens_trained": 0.071479496 |
| }, |
| { |
| "epoch": 0.21390070921985815, |
| "grad_norm": 2.6773571968078613, |
| "loss": 5.5266, |
| "lr": 0.0009646153846153846, |
| "step": 754, |
| "tokens_trained": 0.071668568 |
| }, |
| { |
| "epoch": 0.214468085106383, |
| "grad_norm": 2.9600210189819336, |
| "loss": 5.5362, |
| "lr": 0.0009643356643356644, |
| "step": 756, |
| "tokens_trained": 0.071860552 |
| }, |
| { |
| "epoch": 0.2150354609929078, |
| "grad_norm": 2.6674885749816895, |
| "loss": 5.5388, |
| "lr": 0.0009640559440559441, |
| "step": 758, |
| "tokens_trained": 0.07204912 |
| }, |
| { |
| "epoch": 0.21560283687943263, |
| "grad_norm": 2.50179386138916, |
| "loss": 5.5027, |
| "lr": 0.0009637762237762237, |
| "step": 760, |
| "tokens_trained": 0.072239952 |
| }, |
| { |
| "epoch": 0.21617021276595744, |
| "grad_norm": 2.843411684036255, |
| "loss": 5.5221, |
| "lr": 0.0009634965034965035, |
| "step": 762, |
| "tokens_trained": 0.07243076 |
| }, |
| { |
| "epoch": 0.21673758865248227, |
| "grad_norm": 2.8686277866363525, |
| "loss": 5.4896, |
| "lr": 0.0009632167832167832, |
| "step": 764, |
| "tokens_trained": 0.072623272 |
| }, |
| { |
| "epoch": 0.2173049645390071, |
| "grad_norm": 2.611424684524536, |
| "loss": 5.5557, |
| "lr": 0.000962937062937063, |
| "step": 766, |
| "tokens_trained": 0.07281408 |
| }, |
| { |
| "epoch": 0.2178723404255319, |
| "grad_norm": 3.013145685195923, |
| "loss": 5.4964, |
| "lr": 0.0009626573426573427, |
| "step": 768, |
| "tokens_trained": 0.073005016 |
| }, |
| { |
| "epoch": 0.21843971631205675, |
| "grad_norm": 2.8682022094726562, |
| "loss": 5.5232, |
| "lr": 0.0009623776223776224, |
| "step": 770, |
| "tokens_trained": 0.07319652 |
| }, |
| { |
| "epoch": 0.21900709219858155, |
| "grad_norm": 2.6478466987609863, |
| "loss": 5.5517, |
| "lr": 0.0009620979020979021, |
| "step": 772, |
| "tokens_trained": 0.073387048 |
| }, |
| { |
| "epoch": 0.2195744680851064, |
| "grad_norm": 2.7273097038269043, |
| "loss": 5.5572, |
| "lr": 0.0009618181818181818, |
| "step": 774, |
| "tokens_trained": 0.073577424 |
| }, |
| { |
| "epoch": 0.2201418439716312, |
| "grad_norm": 3.104907751083374, |
| "loss": 5.5081, |
| "lr": 0.0009615384615384616, |
| "step": 776, |
| "tokens_trained": 0.073766712 |
| }, |
| { |
| "epoch": 0.22070921985815603, |
| "grad_norm": 2.9616432189941406, |
| "loss": 5.5059, |
| "lr": 0.0009612587412587412, |
| "step": 778, |
| "tokens_trained": 0.073956272 |
| }, |
| { |
| "epoch": 0.22127659574468084, |
| "grad_norm": 3.330319881439209, |
| "loss": 5.4811, |
| "lr": 0.000960979020979021, |
| "step": 780, |
| "tokens_trained": 0.074144008 |
| }, |
| { |
| "epoch": 0.22184397163120567, |
| "grad_norm": 2.964371919631958, |
| "loss": 5.4763, |
| "lr": 0.0009606993006993007, |
| "step": 782, |
| "tokens_trained": 0.074333888 |
| }, |
| { |
| "epoch": 0.2224113475177305, |
| "grad_norm": 3.13899827003479, |
| "loss": 5.5262, |
| "lr": 0.0009604195804195805, |
| "step": 784, |
| "tokens_trained": 0.074523584 |
| }, |
| { |
| "epoch": 0.2229787234042553, |
| "grad_norm": 3.2576637268066406, |
| "loss": 5.4983, |
| "lr": 0.0009601398601398602, |
| "step": 786, |
| "tokens_trained": 0.074714128 |
| }, |
| { |
| "epoch": 0.22354609929078015, |
| "grad_norm": 2.916149616241455, |
| "loss": 5.504, |
| "lr": 0.0009598601398601398, |
| "step": 788, |
| "tokens_trained": 0.074905104 |
| }, |
| { |
| "epoch": 0.22411347517730495, |
| "grad_norm": 2.842733144760132, |
| "loss": 5.4997, |
| "lr": 0.0009595804195804196, |
| "step": 790, |
| "tokens_trained": 0.075096328 |
| }, |
| { |
| "epoch": 0.2246808510638298, |
| "grad_norm": 2.880695104598999, |
| "loss": 5.5131, |
| "lr": 0.0009593006993006993, |
| "step": 792, |
| "tokens_trained": 0.075286104 |
| }, |
| { |
| "epoch": 0.2252482269503546, |
| "grad_norm": 2.620516300201416, |
| "loss": 5.5291, |
| "lr": 0.0009590209790209791, |
| "step": 794, |
| "tokens_trained": 0.075477392 |
| }, |
| { |
| "epoch": 0.22581560283687943, |
| "grad_norm": 2.622455358505249, |
| "loss": 5.5433, |
| "lr": 0.0009587412587412587, |
| "step": 796, |
| "tokens_trained": 0.0756682 |
| }, |
| { |
| "epoch": 0.22638297872340427, |
| "grad_norm": 2.532047986984253, |
| "loss": 5.5169, |
| "lr": 0.0009584615384615385, |
| "step": 798, |
| "tokens_trained": 0.075856528 |
| }, |
| { |
| "epoch": 0.22695035460992907, |
| "grad_norm": 2.628110885620117, |
| "loss": 5.5369, |
| "lr": 0.0009581818181818182, |
| "step": 800, |
| "tokens_trained": 0.076046256 |
| }, |
| { |
| "epoch": 0.2275177304964539, |
| "grad_norm": 2.376600980758667, |
| "loss": 5.4888, |
| "lr": 0.000957902097902098, |
| "step": 802, |
| "tokens_trained": 0.076236016 |
| }, |
| { |
| "epoch": 0.22808510638297871, |
| "grad_norm": 2.433666706085205, |
| "loss": 5.5044, |
| "lr": 0.0009576223776223777, |
| "step": 804, |
| "tokens_trained": 0.07642324 |
| }, |
| { |
| "epoch": 0.22865248226950355, |
| "grad_norm": 2.3850929737091064, |
| "loss": 5.4941, |
| "lr": 0.0009573426573426573, |
| "step": 806, |
| "tokens_trained": 0.07661376 |
| }, |
| { |
| "epoch": 0.22921985815602836, |
| "grad_norm": 2.4664969444274902, |
| "loss": 5.5257, |
| "lr": 0.0009570629370629371, |
| "step": 808, |
| "tokens_trained": 0.076804952 |
| }, |
| { |
| "epoch": 0.2297872340425532, |
| "grad_norm": 2.8514602184295654, |
| "loss": 5.5335, |
| "lr": 0.0009567832167832168, |
| "step": 810, |
| "tokens_trained": 0.076995064 |
| }, |
| { |
| "epoch": 0.23035460992907802, |
| "grad_norm": 2.508887767791748, |
| "loss": 5.5093, |
| "lr": 0.0009565034965034966, |
| "step": 812, |
| "tokens_trained": 0.077185344 |
| }, |
| { |
| "epoch": 0.23092198581560283, |
| "grad_norm": 2.5842514038085938, |
| "loss": 5.5246, |
| "lr": 0.0009562237762237762, |
| "step": 814, |
| "tokens_trained": 0.077375232 |
| }, |
| { |
| "epoch": 0.23148936170212767, |
| "grad_norm": 2.621562957763672, |
| "loss": 5.4948, |
| "lr": 0.0009559440559440559, |
| "step": 816, |
| "tokens_trained": 0.07756528 |
| }, |
| { |
| "epoch": 0.23205673758865247, |
| "grad_norm": 2.3230698108673096, |
| "loss": 5.5367, |
| "lr": 0.0009556643356643357, |
| "step": 818, |
| "tokens_trained": 0.077754936 |
| }, |
| { |
| "epoch": 0.2326241134751773, |
| "grad_norm": 2.728039264678955, |
| "loss": 5.4548, |
| "lr": 0.0009553846153846154, |
| "step": 820, |
| "tokens_trained": 0.077944056 |
| }, |
| { |
| "epoch": 0.23319148936170211, |
| "grad_norm": 2.786271333694458, |
| "loss": 5.4701, |
| "lr": 0.0009551048951048952, |
| "step": 822, |
| "tokens_trained": 0.07813272 |
| }, |
| { |
| "epoch": 0.23375886524822695, |
| "grad_norm": 2.449995517730713, |
| "loss": 5.5505, |
| "lr": 0.0009548251748251748, |
| "step": 824, |
| "tokens_trained": 0.078321888 |
| }, |
| { |
| "epoch": 0.23432624113475178, |
| "grad_norm": 2.394447088241577, |
| "loss": 5.4709, |
| "lr": 0.0009545454545454546, |
| "step": 826, |
| "tokens_trained": 0.078510288 |
| }, |
| { |
| "epoch": 0.2348936170212766, |
| "grad_norm": 2.5857675075531006, |
| "loss": 5.4986, |
| "lr": 0.0009542657342657343, |
| "step": 828, |
| "tokens_trained": 0.078698032 |
| }, |
| { |
| "epoch": 0.23546099290780143, |
| "grad_norm": 2.728743314743042, |
| "loss": 5.4983, |
| "lr": 0.000953986013986014, |
| "step": 830, |
| "tokens_trained": 0.078890608 |
| }, |
| { |
| "epoch": 0.23602836879432623, |
| "grad_norm": 2.3619866371154785, |
| "loss": 5.4985, |
| "lr": 0.0009537062937062937, |
| "step": 832, |
| "tokens_trained": 0.079081968 |
| }, |
| { |
| "epoch": 0.23659574468085107, |
| "grad_norm": 2.6265158653259277, |
| "loss": 5.5088, |
| "lr": 0.0009534265734265734, |
| "step": 834, |
| "tokens_trained": 0.079270712 |
| }, |
| { |
| "epoch": 0.23716312056737587, |
| "grad_norm": 2.3731281757354736, |
| "loss": 5.4682, |
| "lr": 0.0009531468531468532, |
| "step": 836, |
| "tokens_trained": 0.079459912 |
| }, |
| { |
| "epoch": 0.2377304964539007, |
| "grad_norm": 2.375283718109131, |
| "loss": 5.4278, |
| "lr": 0.0009528671328671329, |
| "step": 838, |
| "tokens_trained": 0.079649408 |
| }, |
| { |
| "epoch": 0.23829787234042554, |
| "grad_norm": 2.6856729984283447, |
| "loss": 5.5277, |
| "lr": 0.0009525874125874127, |
| "step": 840, |
| "tokens_trained": 0.079839552 |
| }, |
| { |
| "epoch": 0.23886524822695035, |
| "grad_norm": 2.5037410259246826, |
| "loss": 5.5022, |
| "lr": 0.0009523076923076923, |
| "step": 842, |
| "tokens_trained": 0.08002732 |
| }, |
| { |
| "epoch": 0.23943262411347518, |
| "grad_norm": 2.25175404548645, |
| "loss": 5.4918, |
| "lr": 0.000952027972027972, |
| "step": 844, |
| "tokens_trained": 0.080216416 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.3555264472961426, |
| "loss": 5.5134, |
| "lr": 0.0009517482517482518, |
| "step": 846, |
| "tokens_trained": 0.080406928 |
| }, |
| { |
| "epoch": 0.24056737588652483, |
| "grad_norm": 2.390998601913452, |
| "loss": 5.4721, |
| "lr": 0.0009514685314685315, |
| "step": 848, |
| "tokens_trained": 0.080596232 |
| }, |
| { |
| "epoch": 0.24113475177304963, |
| "grad_norm": 2.1585986614227295, |
| "loss": 5.4511, |
| "lr": 0.0009511888111888112, |
| "step": 850, |
| "tokens_trained": 0.080786848 |
| }, |
| { |
| "epoch": 0.24170212765957447, |
| "grad_norm": 2.7733986377716064, |
| "loss": 5.5269, |
| "lr": 0.0009509090909090909, |
| "step": 852, |
| "tokens_trained": 0.080978144 |
| }, |
| { |
| "epoch": 0.2422695035460993, |
| "grad_norm": 2.8021209239959717, |
| "loss": 5.4751, |
| "lr": 0.0009506293706293707, |
| "step": 854, |
| "tokens_trained": 0.081167712 |
| }, |
| { |
| "epoch": 0.2428368794326241, |
| "grad_norm": 2.5434224605560303, |
| "loss": 5.5154, |
| "lr": 0.0009503496503496504, |
| "step": 856, |
| "tokens_trained": 0.081357584 |
| }, |
| { |
| "epoch": 0.24340425531914894, |
| "grad_norm": 2.456421136856079, |
| "loss": 5.5459, |
| "lr": 0.0009500699300699301, |
| "step": 858, |
| "tokens_trained": 0.081545992 |
| }, |
| { |
| "epoch": 0.24397163120567375, |
| "grad_norm": 2.317312002182007, |
| "loss": 5.4644, |
| "lr": 0.0009497902097902098, |
| "step": 860, |
| "tokens_trained": 0.081735392 |
| }, |
| { |
| "epoch": 0.24453900709219858, |
| "grad_norm": 2.3580780029296875, |
| "loss": 5.4359, |
| "lr": 0.0009495104895104895, |
| "step": 862, |
| "tokens_trained": 0.081925608 |
| }, |
| { |
| "epoch": 0.2451063829787234, |
| "grad_norm": 2.6440224647521973, |
| "loss": 5.4757, |
| "lr": 0.0009492307692307693, |
| "step": 864, |
| "tokens_trained": 0.08211328 |
| }, |
| { |
| "epoch": 0.24567375886524823, |
| "grad_norm": 2.5468132495880127, |
| "loss": 5.4115, |
| "lr": 0.000948951048951049, |
| "step": 866, |
| "tokens_trained": 0.082303736 |
| }, |
| { |
| "epoch": 0.24624113475177306, |
| "grad_norm": 2.431992530822754, |
| "loss": 5.4655, |
| "lr": 0.0009486713286713286, |
| "step": 868, |
| "tokens_trained": 0.082492896 |
| }, |
| { |
| "epoch": 0.24680851063829787, |
| "grad_norm": 2.443335771560669, |
| "loss": 5.4684, |
| "lr": 0.0009483916083916084, |
| "step": 870, |
| "tokens_trained": 0.082684024 |
| }, |
| { |
| "epoch": 0.2473758865248227, |
| "grad_norm": 2.6467180252075195, |
| "loss": 5.5017, |
| "lr": 0.0009481118881118881, |
| "step": 872, |
| "tokens_trained": 0.08287444 |
| }, |
| { |
| "epoch": 0.2479432624113475, |
| "grad_norm": 2.6044974327087402, |
| "loss": 5.4637, |
| "lr": 0.0009478321678321679, |
| "step": 874, |
| "tokens_trained": 0.08306436 |
| }, |
| { |
| "epoch": 0.24822695035460993, |
| "eval_loss": 5.4816508293151855, |
| "eval_runtime": 20.9467, |
| "step": 875, |
| "tokens_trained": 0.083158888 |
| }, |
| { |
| "epoch": 0.24851063829787234, |
| "grad_norm": 2.6221189498901367, |
| "loss": 5.4785, |
| "lr": 0.0009475524475524476, |
| "step": 876, |
| "tokens_trained": 0.083253472 |
| }, |
| { |
| "epoch": 0.24907801418439715, |
| "grad_norm": 2.409327983856201, |
| "loss": 5.42, |
| "lr": 0.0009472727272727273, |
| "step": 878, |
| "tokens_trained": 0.08344528 |
| }, |
| { |
| "epoch": 0.24964539007092199, |
| "grad_norm": 2.2504723072052, |
| "loss": 5.399, |
| "lr": 0.000946993006993007, |
| "step": 880, |
| "tokens_trained": 0.083635752 |
| }, |
| { |
| "epoch": 0.2502127659574468, |
| "grad_norm": 2.3018665313720703, |
| "loss": 5.4512, |
| "lr": 0.0009467132867132868, |
| "step": 882, |
| "tokens_trained": 0.08382576 |
| }, |
| { |
| "epoch": 0.2507801418439716, |
| "grad_norm": 2.5774636268615723, |
| "loss": 5.4592, |
| "lr": 0.0009464335664335665, |
| "step": 884, |
| "tokens_trained": 0.084016232 |
| }, |
| { |
| "epoch": 0.25134751773049646, |
| "grad_norm": 2.614935874938965, |
| "loss": 5.4772, |
| "lr": 0.0009461538461538461, |
| "step": 886, |
| "tokens_trained": 0.084206992 |
| }, |
| { |
| "epoch": 0.2519148936170213, |
| "grad_norm": 2.4281506538391113, |
| "loss": 5.4972, |
| "lr": 0.0009458741258741259, |
| "step": 888, |
| "tokens_trained": 0.084395848 |
| }, |
| { |
| "epoch": 0.2524822695035461, |
| "grad_norm": 2.3668100833892822, |
| "loss": 5.4505, |
| "lr": 0.0009455944055944056, |
| "step": 890, |
| "tokens_trained": 0.084583704 |
| }, |
| { |
| "epoch": 0.2530496453900709, |
| "grad_norm": 2.1937146186828613, |
| "loss": 5.4981, |
| "lr": 0.0009453146853146854, |
| "step": 892, |
| "tokens_trained": 0.08477096 |
| }, |
| { |
| "epoch": 0.25361702127659574, |
| "grad_norm": 2.2917556762695312, |
| "loss": 5.4224, |
| "lr": 0.000945034965034965, |
| "step": 894, |
| "tokens_trained": 0.084961048 |
| }, |
| { |
| "epoch": 0.2541843971631206, |
| "grad_norm": 2.1254703998565674, |
| "loss": 5.4409, |
| "lr": 0.0009447552447552447, |
| "step": 896, |
| "tokens_trained": 0.085153256 |
| }, |
| { |
| "epoch": 0.2547517730496454, |
| "grad_norm": 2.267159938812256, |
| "loss": 5.4527, |
| "lr": 0.0009444755244755245, |
| "step": 898, |
| "tokens_trained": 0.085343128 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 2.1975555419921875, |
| "loss": 5.516, |
| "lr": 0.0009441958041958042, |
| "step": 900, |
| "tokens_trained": 0.085534024 |
| }, |
| { |
| "epoch": 0.255886524822695, |
| "grad_norm": 2.3459436893463135, |
| "loss": 5.4592, |
| "lr": 0.000943916083916084, |
| "step": 902, |
| "tokens_trained": 0.085725136 |
| }, |
| { |
| "epoch": 0.25645390070921986, |
| "grad_norm": 2.4788501262664795, |
| "loss": 5.3937, |
| "lr": 0.0009436363636363636, |
| "step": 904, |
| "tokens_trained": 0.08591548 |
| }, |
| { |
| "epoch": 0.2570212765957447, |
| "grad_norm": 2.415065288543701, |
| "loss": 5.3991, |
| "lr": 0.0009433566433566434, |
| "step": 906, |
| "tokens_trained": 0.086105008 |
| }, |
| { |
| "epoch": 0.25758865248226953, |
| "grad_norm": 2.1260058879852295, |
| "loss": 5.4122, |
| "lr": 0.0009430769230769231, |
| "step": 908, |
| "tokens_trained": 0.08629424 |
| }, |
| { |
| "epoch": 0.2581560283687943, |
| "grad_norm": 2.1759092807769775, |
| "loss": 5.4663, |
| "lr": 0.0009427972027972029, |
| "step": 910, |
| "tokens_trained": 0.086485784 |
| }, |
| { |
| "epoch": 0.25872340425531914, |
| "grad_norm": 2.3481245040893555, |
| "loss": 5.4398, |
| "lr": 0.0009425174825174825, |
| "step": 912, |
| "tokens_trained": 0.086676744 |
| }, |
| { |
| "epoch": 0.259290780141844, |
| "grad_norm": 2.312612533569336, |
| "loss": 5.4615, |
| "lr": 0.0009422377622377622, |
| "step": 914, |
| "tokens_trained": 0.086866424 |
| }, |
| { |
| "epoch": 0.2598581560283688, |
| "grad_norm": 2.4709548950195312, |
| "loss": 5.4062, |
| "lr": 0.000941958041958042, |
| "step": 916, |
| "tokens_trained": 0.087055824 |
| }, |
| { |
| "epoch": 0.2604255319148936, |
| "grad_norm": 2.3664543628692627, |
| "loss": 5.4696, |
| "lr": 0.0009416783216783217, |
| "step": 918, |
| "tokens_trained": 0.087244136 |
| }, |
| { |
| "epoch": 0.26099290780141843, |
| "grad_norm": 2.423687696456909, |
| "loss": 5.4762, |
| "lr": 0.0009413986013986015, |
| "step": 920, |
| "tokens_trained": 0.087432584 |
| }, |
| { |
| "epoch": 0.26156028368794326, |
| "grad_norm": 2.4002890586853027, |
| "loss": 5.4743, |
| "lr": 0.0009411188811188811, |
| "step": 922, |
| "tokens_trained": 0.087622248 |
| }, |
| { |
| "epoch": 0.2621276595744681, |
| "grad_norm": 2.107527494430542, |
| "loss": 5.4013, |
| "lr": 0.0009408391608391608, |
| "step": 924, |
| "tokens_trained": 0.087809888 |
| }, |
| { |
| "epoch": 0.26269503546099293, |
| "grad_norm": 2.05177640914917, |
| "loss": 5.4601, |
| "lr": 0.0009405594405594406, |
| "step": 926, |
| "tokens_trained": 0.088002704 |
| }, |
| { |
| "epoch": 0.2632624113475177, |
| "grad_norm": 2.303874969482422, |
| "loss": 5.456, |
| "lr": 0.0009402797202797203, |
| "step": 928, |
| "tokens_trained": 0.088191344 |
| }, |
| { |
| "epoch": 0.26382978723404255, |
| "grad_norm": 2.4369659423828125, |
| "loss": 5.4162, |
| "lr": 0.00094, |
| "step": 930, |
| "tokens_trained": 0.088380832 |
| }, |
| { |
| "epoch": 0.2643971631205674, |
| "grad_norm": 2.4750819206237793, |
| "loss": 5.455, |
| "lr": 0.0009397202797202797, |
| "step": 932, |
| "tokens_trained": 0.088569936 |
| }, |
| { |
| "epoch": 0.2649645390070922, |
| "grad_norm": 2.09557843208313, |
| "loss": 5.4273, |
| "lr": 0.0009394405594405595, |
| "step": 934, |
| "tokens_trained": 0.08876116 |
| }, |
| { |
| "epoch": 0.265531914893617, |
| "grad_norm": 2.0984373092651367, |
| "loss": 5.4342, |
| "lr": 0.0009391608391608392, |
| "step": 936, |
| "tokens_trained": 0.088951032 |
| }, |
| { |
| "epoch": 0.26609929078014183, |
| "grad_norm": 2.1150097846984863, |
| "loss": 5.4344, |
| "lr": 0.000938881118881119, |
| "step": 938, |
| "tokens_trained": 0.08914124 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 2.1577563285827637, |
| "loss": 5.455, |
| "lr": 0.0009386013986013986, |
| "step": 940, |
| "tokens_trained": 0.089330952 |
| }, |
| { |
| "epoch": 0.2672340425531915, |
| "grad_norm": 2.0483016967773438, |
| "loss": 5.413, |
| "lr": 0.0009383216783216783, |
| "step": 942, |
| "tokens_trained": 0.08952116 |
| }, |
| { |
| "epoch": 0.26780141843971633, |
| "grad_norm": 2.3116559982299805, |
| "loss": 5.455, |
| "lr": 0.0009380419580419581, |
| "step": 944, |
| "tokens_trained": 0.089712888 |
| }, |
| { |
| "epoch": 0.2683687943262411, |
| "grad_norm": 2.2459256649017334, |
| "loss": 5.3971, |
| "lr": 0.0009377622377622378, |
| "step": 946, |
| "tokens_trained": 0.089903936 |
| }, |
| { |
| "epoch": 0.26893617021276595, |
| "grad_norm": 2.3048787117004395, |
| "loss": 5.4454, |
| "lr": 0.0009374825174825175, |
| "step": 948, |
| "tokens_trained": 0.090095888 |
| }, |
| { |
| "epoch": 0.2695035460992908, |
| "grad_norm": 2.196735143661499, |
| "loss": 5.4101, |
| "lr": 0.0009372027972027972, |
| "step": 950, |
| "tokens_trained": 0.090287472 |
| }, |
| { |
| "epoch": 0.2700709219858156, |
| "grad_norm": 2.3908562660217285, |
| "loss": 5.4731, |
| "lr": 0.0009369230769230769, |
| "step": 952, |
| "tokens_trained": 0.090476568 |
| }, |
| { |
| "epoch": 0.27063829787234045, |
| "grad_norm": 2.154932975769043, |
| "loss": 5.4104, |
| "lr": 0.0009366433566433567, |
| "step": 954, |
| "tokens_trained": 0.090665592 |
| }, |
| { |
| "epoch": 0.27120567375886523, |
| "grad_norm": 2.340907096862793, |
| "loss": 5.3707, |
| "lr": 0.0009363636363636364, |
| "step": 956, |
| "tokens_trained": 0.090853232 |
| }, |
| { |
| "epoch": 0.27177304964539006, |
| "grad_norm": 2.1736438274383545, |
| "loss": 5.4484, |
| "lr": 0.0009360839160839161, |
| "step": 958, |
| "tokens_trained": 0.091043808 |
| }, |
| { |
| "epoch": 0.2723404255319149, |
| "grad_norm": 2.3518154621124268, |
| "loss": 5.4919, |
| "lr": 0.0009358041958041958, |
| "step": 960, |
| "tokens_trained": 0.09123384 |
| }, |
| { |
| "epoch": 0.27290780141843973, |
| "grad_norm": 2.6673426628112793, |
| "loss": 5.4008, |
| "lr": 0.0009355244755244755, |
| "step": 962, |
| "tokens_trained": 0.091422544 |
| }, |
| { |
| "epoch": 0.2734751773049645, |
| "grad_norm": 2.4755311012268066, |
| "loss": 5.4533, |
| "lr": 0.0009352447552447553, |
| "step": 964, |
| "tokens_trained": 0.09161544 |
| }, |
| { |
| "epoch": 0.27404255319148935, |
| "grad_norm": 2.338452100753784, |
| "loss": 5.4953, |
| "lr": 0.0009349650349650349, |
| "step": 966, |
| "tokens_trained": 0.091806344 |
| }, |
| { |
| "epoch": 0.2746099290780142, |
| "grad_norm": 2.170426845550537, |
| "loss": 5.4588, |
| "lr": 0.0009346853146853147, |
| "step": 968, |
| "tokens_trained": 0.091996648 |
| }, |
| { |
| "epoch": 0.275177304964539, |
| "grad_norm": 2.2587599754333496, |
| "loss": 5.4547, |
| "lr": 0.0009344055944055944, |
| "step": 970, |
| "tokens_trained": 0.09218848 |
| }, |
| { |
| "epoch": 0.27574468085106385, |
| "grad_norm": 2.0009043216705322, |
| "loss": 5.4116, |
| "lr": 0.0009341258741258742, |
| "step": 972, |
| "tokens_trained": 0.092377984 |
| }, |
| { |
| "epoch": 0.27631205673758863, |
| "grad_norm": 2.0617294311523438, |
| "loss": 5.4541, |
| "lr": 0.0009338461538461539, |
| "step": 974, |
| "tokens_trained": 0.092569472 |
| }, |
| { |
| "epoch": 0.27687943262411346, |
| "grad_norm": 2.059300661087036, |
| "loss": 5.4414, |
| "lr": 0.0009335664335664336, |
| "step": 976, |
| "tokens_trained": 0.092758496 |
| }, |
| { |
| "epoch": 0.2774468085106383, |
| "grad_norm": 2.2815263271331787, |
| "loss": 5.4435, |
| "lr": 0.0009332867132867133, |
| "step": 978, |
| "tokens_trained": 0.092950368 |
| }, |
| { |
| "epoch": 0.27801418439716313, |
| "grad_norm": 2.1770365238189697, |
| "loss": 5.4481, |
| "lr": 0.0009330069930069929, |
| "step": 980, |
| "tokens_trained": 0.093140552 |
| }, |
| { |
| "epoch": 0.27858156028368797, |
| "grad_norm": 2.0089797973632812, |
| "loss": 5.4117, |
| "lr": 0.0009327272727272728, |
| "step": 982, |
| "tokens_trained": 0.093332312 |
| }, |
| { |
| "epoch": 0.27914893617021275, |
| "grad_norm": 2.2188286781311035, |
| "loss": 5.4594, |
| "lr": 0.0009324475524475524, |
| "step": 984, |
| "tokens_trained": 0.093520792 |
| }, |
| { |
| "epoch": 0.2797163120567376, |
| "grad_norm": 2.310481548309326, |
| "loss": 5.393, |
| "lr": 0.0009321678321678322, |
| "step": 986, |
| "tokens_trained": 0.093710608 |
| }, |
| { |
| "epoch": 0.2802836879432624, |
| "grad_norm": 2.3832972049713135, |
| "loss": 5.4277, |
| "lr": 0.0009318881118881119, |
| "step": 988, |
| "tokens_trained": 0.093900952 |
| }, |
| { |
| "epoch": 0.28085106382978725, |
| "grad_norm": 2.011126756668091, |
| "loss": 5.4097, |
| "lr": 0.0009316083916083917, |
| "step": 990, |
| "tokens_trained": 0.094091 |
| }, |
| { |
| "epoch": 0.28141843971631203, |
| "grad_norm": 2.2632968425750732, |
| "loss": 5.4388, |
| "lr": 0.0009313286713286714, |
| "step": 992, |
| "tokens_trained": 0.094281216 |
| }, |
| { |
| "epoch": 0.28198581560283686, |
| "grad_norm": 2.3477587699890137, |
| "loss": 5.3728, |
| "lr": 0.000931048951048951, |
| "step": 994, |
| "tokens_trained": 0.094470264 |
| }, |
| { |
| "epoch": 0.2825531914893617, |
| "grad_norm": 2.486196756362915, |
| "loss": 5.414, |
| "lr": 0.0009307692307692308, |
| "step": 996, |
| "tokens_trained": 0.094662816 |
| }, |
| { |
| "epoch": 0.28312056737588653, |
| "grad_norm": 2.5286316871643066, |
| "loss": 5.4063, |
| "lr": 0.0009304895104895104, |
| "step": 998, |
| "tokens_trained": 0.094852896 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "grad_norm": 2.712406635284424, |
| "loss": 5.4274, |
| "lr": 0.0009302097902097903, |
| "step": 1000, |
| "tokens_trained": 0.09504336 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "eval_loss": 5.434450626373291, |
| "eval_runtime": 21.0388, |
| "step": 1000, |
| "tokens_trained": 0.09504336 |
| }, |
| { |
| "epoch": 0.28425531914893615, |
| "grad_norm": 2.245316505432129, |
| "loss": 5.3551, |
| "lr": 0.0009299300699300699, |
| "step": 1002, |
| "tokens_trained": 0.095233944 |
| }, |
| { |
| "epoch": 0.284822695035461, |
| "grad_norm": 2.335533618927002, |
| "loss": 5.4608, |
| "lr": 0.0009296503496503497, |
| "step": 1004, |
| "tokens_trained": 0.095423184 |
| }, |
| { |
| "epoch": 0.2853900709219858, |
| "grad_norm": 2.232128858566284, |
| "loss": 5.4374, |
| "lr": 0.0009293706293706294, |
| "step": 1006, |
| "tokens_trained": 0.095612672 |
| }, |
| { |
| "epoch": 0.28595744680851065, |
| "grad_norm": 2.148329257965088, |
| "loss": 5.463, |
| "lr": 0.0009290909090909091, |
| "step": 1008, |
| "tokens_trained": 0.095802784 |
| }, |
| { |
| "epoch": 0.2865248226950355, |
| "grad_norm": 1.9580810070037842, |
| "loss": 5.291, |
| "lr": 0.0009288111888111889, |
| "step": 1010, |
| "tokens_trained": 0.095990776 |
| }, |
| { |
| "epoch": 0.28709219858156027, |
| "grad_norm": 1.9873988628387451, |
| "loss": 5.4103, |
| "lr": 0.0009285314685314685, |
| "step": 1012, |
| "tokens_trained": 0.096180648 |
| }, |
| { |
| "epoch": 0.2876595744680851, |
| "grad_norm": 2.0297746658325195, |
| "loss": 5.4078, |
| "lr": 0.0009282517482517483, |
| "step": 1014, |
| "tokens_trained": 0.09637224 |
| }, |
| { |
| "epoch": 0.28822695035460993, |
| "grad_norm": 1.928497076034546, |
| "loss": 5.3162, |
| "lr": 0.0009279720279720279, |
| "step": 1016, |
| "tokens_trained": 0.096561744 |
| }, |
| { |
| "epoch": 0.28879432624113477, |
| "grad_norm": 2.1219675540924072, |
| "loss": 5.4358, |
| "lr": 0.0009276923076923078, |
| "step": 1018, |
| "tokens_trained": 0.096752296 |
| }, |
| { |
| "epoch": 0.28936170212765955, |
| "grad_norm": 2.0021066665649414, |
| "loss": 5.4232, |
| "lr": 0.0009274125874125874, |
| "step": 1020, |
| "tokens_trained": 0.096943856 |
| }, |
| { |
| "epoch": 0.2899290780141844, |
| "grad_norm": 1.9920068979263306, |
| "loss": 5.407, |
| "lr": 0.0009271328671328671, |
| "step": 1022, |
| "tokens_trained": 0.097133632 |
| }, |
| { |
| "epoch": 0.2904964539007092, |
| "grad_norm": 1.8810361623764038, |
| "loss": 5.4293, |
| "lr": 0.0009268531468531469, |
| "step": 1024, |
| "tokens_trained": 0.097325976 |
| }, |
| { |
| "epoch": 0.29106382978723405, |
| "grad_norm": 1.8560134172439575, |
| "loss": 5.4236, |
| "lr": 0.0009265734265734266, |
| "step": 1026, |
| "tokens_trained": 0.0975142 |
| }, |
| { |
| "epoch": 0.2916312056737589, |
| "grad_norm": 2.1735010147094727, |
| "loss": 5.4252, |
| "lr": 0.0009262937062937064, |
| "step": 1028, |
| "tokens_trained": 0.0977042 |
| }, |
| { |
| "epoch": 0.29219858156028367, |
| "grad_norm": 2.2467288970947266, |
| "loss": 5.3756, |
| "lr": 0.000926013986013986, |
| "step": 1030, |
| "tokens_trained": 0.097893376 |
| }, |
| { |
| "epoch": 0.2927659574468085, |
| "grad_norm": 1.9609313011169434, |
| "loss": 5.4091, |
| "lr": 0.0009257342657342658, |
| "step": 1032, |
| "tokens_trained": 0.0980824 |
| }, |
| { |
| "epoch": 0.29333333333333333, |
| "grad_norm": 2.116384267807007, |
| "loss": 5.4001, |
| "lr": 0.0009254545454545454, |
| "step": 1034, |
| "tokens_trained": 0.098271304 |
| }, |
| { |
| "epoch": 0.29390070921985817, |
| "grad_norm": 2.1869800090789795, |
| "loss": 5.4102, |
| "lr": 0.0009251748251748252, |
| "step": 1036, |
| "tokens_trained": 0.098461528 |
| }, |
| { |
| "epoch": 0.294468085106383, |
| "grad_norm": 2.2882192134857178, |
| "loss": 5.4723, |
| "lr": 0.0009248951048951049, |
| "step": 1038, |
| "tokens_trained": 0.09865268 |
| }, |
| { |
| "epoch": 0.2950354609929078, |
| "grad_norm": 2.1590888500213623, |
| "loss": 5.3523, |
| "lr": 0.0009246153846153846, |
| "step": 1040, |
| "tokens_trained": 0.098842688 |
| }, |
| { |
| "epoch": 0.2956028368794326, |
| "grad_norm": 2.284207582473755, |
| "loss": 5.4647, |
| "lr": 0.0009243356643356644, |
| "step": 1042, |
| "tokens_trained": 0.099031544 |
| }, |
| { |
| "epoch": 0.29617021276595745, |
| "grad_norm": 2.333207845687866, |
| "loss": 5.4655, |
| "lr": 0.0009240559440559441, |
| "step": 1044, |
| "tokens_trained": 0.09922264 |
| }, |
| { |
| "epoch": 0.2967375886524823, |
| "grad_norm": 2.357572555541992, |
| "loss": 5.3909, |
| "lr": 0.0009237762237762239, |
| "step": 1046, |
| "tokens_trained": 0.099411416 |
| }, |
| { |
| "epoch": 0.29730496453900707, |
| "grad_norm": 1.88053297996521, |
| "loss": 5.4119, |
| "lr": 0.0009234965034965035, |
| "step": 1048, |
| "tokens_trained": 0.099602112 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 1.8860585689544678, |
| "loss": 5.3867, |
| "lr": 0.0009232167832167832, |
| "step": 1050, |
| "tokens_trained": 0.099792552 |
| }, |
| { |
| "epoch": 0.29843971631205674, |
| "grad_norm": 2.000173568725586, |
| "loss": 5.3773, |
| "lr": 0.0009229370629370629, |
| "step": 1052, |
| "tokens_trained": 0.099981752 |
| }, |
| { |
| "epoch": 0.29900709219858157, |
| "grad_norm": 2.015394926071167, |
| "loss": 5.3936, |
| "lr": 0.0009226573426573427, |
| "step": 1054, |
| "tokens_trained": 0.10017428 |
| }, |
| { |
| "epoch": 0.2995744680851064, |
| "grad_norm": 2.0050301551818848, |
| "loss": 5.3653, |
| "lr": 0.0009223776223776224, |
| "step": 1056, |
| "tokens_trained": 0.100364544 |
| }, |
| { |
| "epoch": 0.3001418439716312, |
| "grad_norm": 1.7397475242614746, |
| "loss": 5.3224, |
| "lr": 0.0009220979020979021, |
| "step": 1058, |
| "tokens_trained": 0.100555648 |
| }, |
| { |
| "epoch": 0.300709219858156, |
| "grad_norm": 1.9808533191680908, |
| "loss": 5.3822, |
| "lr": 0.0009218181818181819, |
| "step": 1060, |
| "tokens_trained": 0.100744968 |
| }, |
| { |
| "epoch": 0.30127659574468085, |
| "grad_norm": 2.034329652786255, |
| "loss": 5.3961, |
| "lr": 0.0009215384615384616, |
| "step": 1062, |
| "tokens_trained": 0.100934176 |
| }, |
| { |
| "epoch": 0.3018439716312057, |
| "grad_norm": 2.1286778450012207, |
| "loss": 5.4061, |
| "lr": 0.0009212587412587413, |
| "step": 1064, |
| "tokens_trained": 0.101125216 |
| }, |
| { |
| "epoch": 0.3024113475177305, |
| "grad_norm": 2.131822347640991, |
| "loss": 5.3675, |
| "lr": 0.000920979020979021, |
| "step": 1066, |
| "tokens_trained": 0.101314504 |
| }, |
| { |
| "epoch": 0.3029787234042553, |
| "grad_norm": 2.162069320678711, |
| "loss": 5.4552, |
| "lr": 0.0009206993006993007, |
| "step": 1068, |
| "tokens_trained": 0.101503352 |
| }, |
| { |
| "epoch": 0.30354609929078014, |
| "grad_norm": 2.5730931758880615, |
| "loss": 5.3978, |
| "lr": 0.0009204195804195804, |
| "step": 1070, |
| "tokens_trained": 0.101691504 |
| }, |
| { |
| "epoch": 0.30411347517730497, |
| "grad_norm": 2.2053022384643555, |
| "loss": 5.3604, |
| "lr": 0.0009201398601398602, |
| "step": 1072, |
| "tokens_trained": 0.101883072 |
| }, |
| { |
| "epoch": 0.3046808510638298, |
| "grad_norm": 2.1578407287597656, |
| "loss": 5.4236, |
| "lr": 0.0009198601398601398, |
| "step": 1074, |
| "tokens_trained": 0.102075832 |
| }, |
| { |
| "epoch": 0.3052482269503546, |
| "grad_norm": 2.0061423778533936, |
| "loss": 5.3882, |
| "lr": 0.0009195804195804196, |
| "step": 1076, |
| "tokens_trained": 0.102266768 |
| }, |
| { |
| "epoch": 0.3058156028368794, |
| "grad_norm": 1.8915576934814453, |
| "loss": 5.3539, |
| "lr": 0.0009193006993006993, |
| "step": 1078, |
| "tokens_trained": 0.102457096 |
| }, |
| { |
| "epoch": 0.30638297872340425, |
| "grad_norm": 2.15053129196167, |
| "loss": 5.4222, |
| "lr": 0.0009190209790209791, |
| "step": 1080, |
| "tokens_trained": 0.102647544 |
| }, |
| { |
| "epoch": 0.3069503546099291, |
| "grad_norm": 2.241217851638794, |
| "loss": 5.4275, |
| "lr": 0.0009187412587412588, |
| "step": 1082, |
| "tokens_trained": 0.10283904 |
| }, |
| { |
| "epoch": 0.3075177304964539, |
| "grad_norm": 2.37854266166687, |
| "loss": 5.419, |
| "lr": 0.0009184615384615385, |
| "step": 1084, |
| "tokens_trained": 0.103028464 |
| }, |
| { |
| "epoch": 0.3080851063829787, |
| "grad_norm": 2.00118350982666, |
| "loss": 5.4225, |
| "lr": 0.0009181818181818182, |
| "step": 1086, |
| "tokens_trained": 0.10321896 |
| }, |
| { |
| "epoch": 0.30865248226950354, |
| "grad_norm": 2.2643723487854004, |
| "loss": 5.4487, |
| "lr": 0.0009179020979020978, |
| "step": 1088, |
| "tokens_trained": 0.103409256 |
| }, |
| { |
| "epoch": 0.30921985815602837, |
| "grad_norm": 2.4618585109710693, |
| "loss": 5.4211, |
| "lr": 0.0009176223776223777, |
| "step": 1090, |
| "tokens_trained": 0.103597272 |
| }, |
| { |
| "epoch": 0.3097872340425532, |
| "grad_norm": 2.141491174697876, |
| "loss": 5.3758, |
| "lr": 0.0009173426573426573, |
| "step": 1092, |
| "tokens_trained": 0.103786128 |
| }, |
| { |
| "epoch": 0.31035460992907804, |
| "grad_norm": 1.9777475595474243, |
| "loss": 5.4129, |
| "lr": 0.0009170629370629371, |
| "step": 1094, |
| "tokens_trained": 0.103974864 |
| }, |
| { |
| "epoch": 0.3109219858156028, |
| "grad_norm": 1.9153270721435547, |
| "loss": 5.3912, |
| "lr": 0.0009167832167832168, |
| "step": 1096, |
| "tokens_trained": 0.104163864 |
| }, |
| { |
| "epoch": 0.31148936170212765, |
| "grad_norm": 2.172558546066284, |
| "loss": 5.3425, |
| "lr": 0.0009165034965034966, |
| "step": 1098, |
| "tokens_trained": 0.104353136 |
| }, |
| { |
| "epoch": 0.3120567375886525, |
| "grad_norm": 2.049896717071533, |
| "loss": 5.3732, |
| "lr": 0.0009162237762237763, |
| "step": 1100, |
| "tokens_trained": 0.10454476 |
| }, |
| { |
| "epoch": 0.3126241134751773, |
| "grad_norm": 1.9415545463562012, |
| "loss": 5.3873, |
| "lr": 0.0009159440559440559, |
| "step": 1102, |
| "tokens_trained": 0.104734296 |
| }, |
| { |
| "epoch": 0.3131914893617021, |
| "grad_norm": 1.7280856370925903, |
| "loss": 5.3857, |
| "lr": 0.0009156643356643357, |
| "step": 1104, |
| "tokens_trained": 0.104925648 |
| }, |
| { |
| "epoch": 0.31375886524822694, |
| "grad_norm": 1.9120069742202759, |
| "loss": 5.3216, |
| "lr": 0.0009153846153846153, |
| "step": 1106, |
| "tokens_trained": 0.105115776 |
| }, |
| { |
| "epoch": 0.31432624113475177, |
| "grad_norm": 2.007101058959961, |
| "loss": 5.4074, |
| "lr": 0.0009151048951048952, |
| "step": 1108, |
| "tokens_trained": 0.105305656 |
| }, |
| { |
| "epoch": 0.3148936170212766, |
| "grad_norm": 1.9159268140792847, |
| "loss": 5.3625, |
| "lr": 0.0009148251748251748, |
| "step": 1110, |
| "tokens_trained": 0.105494632 |
| }, |
| { |
| "epoch": 0.31546099290780144, |
| "grad_norm": 1.9235239028930664, |
| "loss": 5.3362, |
| "lr": 0.0009145454545454546, |
| "step": 1112, |
| "tokens_trained": 0.105683536 |
| }, |
| { |
| "epoch": 0.3160283687943262, |
| "grad_norm": 1.8954299688339233, |
| "loss": 5.3531, |
| "lr": 0.0009142657342657343, |
| "step": 1114, |
| "tokens_trained": 0.105873176 |
| }, |
| { |
| "epoch": 0.31659574468085105, |
| "grad_norm": 2.026578426361084, |
| "loss": 5.408, |
| "lr": 0.000913986013986014, |
| "step": 1116, |
| "tokens_trained": 0.10606276 |
| }, |
| { |
| "epoch": 0.3171631205673759, |
| "grad_norm": 1.9014806747436523, |
| "loss": 5.363, |
| "lr": 0.0009137062937062938, |
| "step": 1118, |
| "tokens_trained": 0.106254616 |
| }, |
| { |
| "epoch": 0.3177304964539007, |
| "grad_norm": 1.849649429321289, |
| "loss": 5.3811, |
| "lr": 0.0009134265734265734, |
| "step": 1120, |
| "tokens_trained": 0.106445376 |
| }, |
| { |
| "epoch": 0.31829787234042556, |
| "grad_norm": 1.7405186891555786, |
| "loss": 5.3504, |
| "lr": 0.0009131468531468532, |
| "step": 1122, |
| "tokens_trained": 0.106636072 |
| }, |
| { |
| "epoch": 0.31886524822695034, |
| "grad_norm": 1.867285966873169, |
| "loss": 5.3675, |
| "lr": 0.0009128671328671328, |
| "step": 1124, |
| "tokens_trained": 0.106827896 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "eval_loss": 5.3796281814575195, |
| "eval_runtime": 20.7444, |
| "step": 1125, |
| "tokens_trained": 0.106922416 |
| }, |
| { |
| "epoch": 0.31943262411347517, |
| "grad_norm": 1.8044356107711792, |
| "loss": 5.3717, |
| "lr": 0.0009125874125874127, |
| "step": 1126, |
| "tokens_trained": 0.107016056 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.6348600387573242, |
| "loss": 5.4676, |
| "lr": 0.0009123076923076923, |
| "step": 1128, |
| "tokens_trained": 0.107203992 |
| }, |
| { |
| "epoch": 0.32056737588652484, |
| "grad_norm": 1.7802475690841675, |
| "loss": 5.3304, |
| "lr": 0.000912027972027972, |
| "step": 1130, |
| "tokens_trained": 0.107394224 |
| }, |
| { |
| "epoch": 0.3211347517730496, |
| "grad_norm": 1.7695430517196655, |
| "loss": 5.3611, |
| "lr": 0.0009117482517482518, |
| "step": 1132, |
| "tokens_trained": 0.107583464 |
| }, |
| { |
| "epoch": 0.32170212765957445, |
| "grad_norm": 2.026853322982788, |
| "loss": 5.363, |
| "lr": 0.0009114685314685315, |
| "step": 1134, |
| "tokens_trained": 0.107776088 |
| }, |
| { |
| "epoch": 0.3222695035460993, |
| "grad_norm": 1.803208589553833, |
| "loss": 5.3801, |
| "lr": 0.0009111888111888113, |
| "step": 1136, |
| "tokens_trained": 0.107964416 |
| }, |
| { |
| "epoch": 0.3228368794326241, |
| "grad_norm": 1.812386155128479, |
| "loss": 5.3721, |
| "lr": 0.0009109090909090909, |
| "step": 1138, |
| "tokens_trained": 0.108153104 |
| }, |
| { |
| "epoch": 0.32340425531914896, |
| "grad_norm": 1.605839490890503, |
| "loss": 5.3339, |
| "lr": 0.0009106293706293707, |
| "step": 1140, |
| "tokens_trained": 0.108341408 |
| }, |
| { |
| "epoch": 0.32397163120567374, |
| "grad_norm": 1.7169313430786133, |
| "loss": 5.4051, |
| "lr": 0.0009103496503496503, |
| "step": 1142, |
| "tokens_trained": 0.108532312 |
| }, |
| { |
| "epoch": 0.3245390070921986, |
| "grad_norm": 2.0499444007873535, |
| "loss": 5.2992, |
| "lr": 0.0009100699300699301, |
| "step": 1144, |
| "tokens_trained": 0.108721864 |
| }, |
| { |
| "epoch": 0.3251063829787234, |
| "grad_norm": 1.988674521446228, |
| "loss": 5.3862, |
| "lr": 0.0009097902097902098, |
| "step": 1146, |
| "tokens_trained": 0.108912352 |
| }, |
| { |
| "epoch": 0.32567375886524824, |
| "grad_norm": 1.8733936548233032, |
| "loss": 5.3627, |
| "lr": 0.0009095104895104895, |
| "step": 1148, |
| "tokens_trained": 0.109101952 |
| }, |
| { |
| "epoch": 0.3262411347517731, |
| "grad_norm": 1.978704810142517, |
| "loss": 5.3668, |
| "lr": 0.0009092307692307692, |
| "step": 1150, |
| "tokens_trained": 0.109292712 |
| }, |
| { |
| "epoch": 0.32680851063829786, |
| "grad_norm": 1.9723341464996338, |
| "loss": 5.3545, |
| "lr": 0.000908951048951049, |
| "step": 1152, |
| "tokens_trained": 0.109484992 |
| }, |
| { |
| "epoch": 0.3273758865248227, |
| "grad_norm": 2.165728807449341, |
| "loss": 5.3731, |
| "lr": 0.0009086713286713288, |
| "step": 1154, |
| "tokens_trained": 0.109674336 |
| }, |
| { |
| "epoch": 0.3279432624113475, |
| "grad_norm": 1.9241019487380981, |
| "loss": 5.3456, |
| "lr": 0.0009083916083916084, |
| "step": 1156, |
| "tokens_trained": 0.109863368 |
| }, |
| { |
| "epoch": 0.32851063829787236, |
| "grad_norm": 1.9442275762557983, |
| "loss": 5.4065, |
| "lr": 0.0009081118881118881, |
| "step": 1158, |
| "tokens_trained": 0.110051744 |
| }, |
| { |
| "epoch": 0.32907801418439714, |
| "grad_norm": 1.7714000940322876, |
| "loss": 5.3888, |
| "lr": 0.0009078321678321678, |
| "step": 1160, |
| "tokens_trained": 0.11024344 |
| }, |
| { |
| "epoch": 0.329645390070922, |
| "grad_norm": 2.043646812438965, |
| "loss": 5.3835, |
| "lr": 0.0009075524475524476, |
| "step": 1162, |
| "tokens_trained": 0.11043488 |
| }, |
| { |
| "epoch": 0.3302127659574468, |
| "grad_norm": 1.837196946144104, |
| "loss": 5.3554, |
| "lr": 0.0009072727272727273, |
| "step": 1164, |
| "tokens_trained": 0.110626104 |
| }, |
| { |
| "epoch": 0.33078014184397164, |
| "grad_norm": 1.874135971069336, |
| "loss": 5.3457, |
| "lr": 0.000906993006993007, |
| "step": 1166, |
| "tokens_trained": 0.110814768 |
| }, |
| { |
| "epoch": 0.3313475177304965, |
| "grad_norm": 1.6493511199951172, |
| "loss": 5.3118, |
| "lr": 0.0009067132867132866, |
| "step": 1168, |
| "tokens_trained": 0.111004104 |
| }, |
| { |
| "epoch": 0.33191489361702126, |
| "grad_norm": 1.8386362791061401, |
| "loss": 5.3422, |
| "lr": 0.0009064335664335665, |
| "step": 1170, |
| "tokens_trained": 0.11119544 |
| }, |
| { |
| "epoch": 0.3324822695035461, |
| "grad_norm": 2.020859718322754, |
| "loss": 5.3565, |
| "lr": 0.0009061538461538462, |
| "step": 1172, |
| "tokens_trained": 0.111384384 |
| }, |
| { |
| "epoch": 0.3330496453900709, |
| "grad_norm": 2.049401044845581, |
| "loss": 5.3358, |
| "lr": 0.0009058741258741259, |
| "step": 1174, |
| "tokens_trained": 0.111573944 |
| }, |
| { |
| "epoch": 0.33361702127659576, |
| "grad_norm": 1.965345025062561, |
| "loss": 5.3431, |
| "lr": 0.0009055944055944056, |
| "step": 1176, |
| "tokens_trained": 0.111763504 |
| }, |
| { |
| "epoch": 0.3341843971631206, |
| "grad_norm": 1.9792066812515259, |
| "loss": 5.3579, |
| "lr": 0.0009053146853146853, |
| "step": 1178, |
| "tokens_trained": 0.111953664 |
| }, |
| { |
| "epoch": 0.3347517730496454, |
| "grad_norm": 1.7790883779525757, |
| "loss": 5.3499, |
| "lr": 0.0009050349650349651, |
| "step": 1180, |
| "tokens_trained": 0.11214324 |
| }, |
| { |
| "epoch": 0.3353191489361702, |
| "grad_norm": 1.6504682302474976, |
| "loss": 5.3415, |
| "lr": 0.0009047552447552448, |
| "step": 1182, |
| "tokens_trained": 0.112331256 |
| }, |
| { |
| "epoch": 0.33588652482269504, |
| "grad_norm": 1.9687312841415405, |
| "loss": 5.3565, |
| "lr": 0.0009044755244755245, |
| "step": 1184, |
| "tokens_trained": 0.11252208 |
| }, |
| { |
| "epoch": 0.3364539007092199, |
| "grad_norm": 1.7077507972717285, |
| "loss": 5.3568, |
| "lr": 0.0009041958041958041, |
| "step": 1186, |
| "tokens_trained": 0.112714272 |
| }, |
| { |
| "epoch": 0.33702127659574466, |
| "grad_norm": 1.6311697959899902, |
| "loss": 5.3345, |
| "lr": 0.000903916083916084, |
| "step": 1188, |
| "tokens_trained": 0.11290428 |
| }, |
| { |
| "epoch": 0.3375886524822695, |
| "grad_norm": 1.975233793258667, |
| "loss": 5.4161, |
| "lr": 0.0009036363636363637, |
| "step": 1190, |
| "tokens_trained": 0.113093984 |
| }, |
| { |
| "epoch": 0.3381560283687943, |
| "grad_norm": 1.7567362785339355, |
| "loss": 5.3481, |
| "lr": 0.0009033566433566434, |
| "step": 1192, |
| "tokens_trained": 0.113284904 |
| }, |
| { |
| "epoch": 0.33872340425531916, |
| "grad_norm": 2.121367931365967, |
| "loss": 5.3729, |
| "lr": 0.0009030769230769231, |
| "step": 1194, |
| "tokens_trained": 0.113477952 |
| }, |
| { |
| "epoch": 0.339290780141844, |
| "grad_norm": 2.143253803253174, |
| "loss": 5.3866, |
| "lr": 0.0009027972027972027, |
| "step": 1196, |
| "tokens_trained": 0.11366872 |
| }, |
| { |
| "epoch": 0.3398581560283688, |
| "grad_norm": 2.1118557453155518, |
| "loss": 5.3501, |
| "lr": 0.0009025174825174826, |
| "step": 1198, |
| "tokens_trained": 0.113861552 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 1.8132637739181519, |
| "loss": 5.3325, |
| "lr": 0.0009022377622377622, |
| "step": 1200, |
| "tokens_trained": 0.114051176 |
| }, |
| { |
| "epoch": 0.34099290780141844, |
| "grad_norm": 1.761227011680603, |
| "loss": 5.3629, |
| "lr": 0.000901958041958042, |
| "step": 1202, |
| "tokens_trained": 0.114240808 |
| }, |
| { |
| "epoch": 0.3415602836879433, |
| "grad_norm": 1.8358371257781982, |
| "loss": 5.3644, |
| "lr": 0.0009016783216783216, |
| "step": 1204, |
| "tokens_trained": 0.114430968 |
| }, |
| { |
| "epoch": 0.3421276595744681, |
| "grad_norm": 2.0768542289733887, |
| "loss": 5.3705, |
| "lr": 0.0009013986013986014, |
| "step": 1206, |
| "tokens_trained": 0.114620544 |
| }, |
| { |
| "epoch": 0.3426950354609929, |
| "grad_norm": 1.6928143501281738, |
| "loss": 5.2534, |
| "lr": 0.0009011188811188812, |
| "step": 1208, |
| "tokens_trained": 0.114811928 |
| }, |
| { |
| "epoch": 0.3432624113475177, |
| "grad_norm": 1.8634029626846313, |
| "loss": 5.3679, |
| "lr": 0.0009008391608391609, |
| "step": 1210, |
| "tokens_trained": 0.115002912 |
| }, |
| { |
| "epoch": 0.34382978723404256, |
| "grad_norm": 1.8048954010009766, |
| "loss": 5.3049, |
| "lr": 0.0009005594405594406, |
| "step": 1212, |
| "tokens_trained": 0.115192544 |
| }, |
| { |
| "epoch": 0.3443971631205674, |
| "grad_norm": 1.9170348644256592, |
| "loss": 5.2457, |
| "lr": 0.0009002797202797202, |
| "step": 1214, |
| "tokens_trained": 0.115383248 |
| }, |
| { |
| "epoch": 0.3449645390070922, |
| "grad_norm": 1.788751482963562, |
| "loss": 5.3678, |
| "lr": 0.0009000000000000001, |
| "step": 1216, |
| "tokens_trained": 0.115574304 |
| }, |
| { |
| "epoch": 0.345531914893617, |
| "grad_norm": 1.9751293659210205, |
| "loss": 5.3352, |
| "lr": 0.0008997202797202797, |
| "step": 1218, |
| "tokens_trained": 0.115766008 |
| }, |
| { |
| "epoch": 0.34609929078014184, |
| "grad_norm": 1.8202649354934692, |
| "loss": 5.37, |
| "lr": 0.0008994405594405595, |
| "step": 1220, |
| "tokens_trained": 0.11595804 |
| }, |
| { |
| "epoch": 0.3466666666666667, |
| "grad_norm": 1.656063199043274, |
| "loss": 5.3664, |
| "lr": 0.0008991608391608391, |
| "step": 1222, |
| "tokens_trained": 0.116146344 |
| }, |
| { |
| "epoch": 0.3472340425531915, |
| "grad_norm": 1.7509667873382568, |
| "loss": 5.3636, |
| "lr": 0.0008988811188811188, |
| "step": 1224, |
| "tokens_trained": 0.116334568 |
| }, |
| { |
| "epoch": 0.3478014184397163, |
| "grad_norm": 1.8556638956069946, |
| "loss": 5.3577, |
| "lr": 0.0008986013986013987, |
| "step": 1226, |
| "tokens_trained": 0.116525704 |
| }, |
| { |
| "epoch": 0.3483687943262411, |
| "grad_norm": 2.026033639907837, |
| "loss": 5.3657, |
| "lr": 0.0008983216783216783, |
| "step": 1228, |
| "tokens_trained": 0.116716032 |
| }, |
| { |
| "epoch": 0.34893617021276596, |
| "grad_norm": 1.6965924501419067, |
| "loss": 5.304, |
| "lr": 0.0008980419580419581, |
| "step": 1230, |
| "tokens_trained": 0.116904832 |
| }, |
| { |
| "epoch": 0.3495035460992908, |
| "grad_norm": 1.8144174814224243, |
| "loss": 5.3759, |
| "lr": 0.0008977622377622377, |
| "step": 1232, |
| "tokens_trained": 0.117095536 |
| }, |
| { |
| "epoch": 0.3500709219858156, |
| "grad_norm": 1.7229580879211426, |
| "loss": 5.3244, |
| "lr": 0.0008974825174825176, |
| "step": 1234, |
| "tokens_trained": 0.117285952 |
| }, |
| { |
| "epoch": 0.3506382978723404, |
| "grad_norm": 1.722578525543213, |
| "loss": 5.3442, |
| "lr": 0.0008972027972027972, |
| "step": 1236, |
| "tokens_trained": 0.117477488 |
| }, |
| { |
| "epoch": 0.35120567375886524, |
| "grad_norm": 1.8006796836853027, |
| "loss": 5.3624, |
| "lr": 0.000896923076923077, |
| "step": 1238, |
| "tokens_trained": 0.117667352 |
| }, |
| { |
| "epoch": 0.3517730496453901, |
| "grad_norm": 1.7172250747680664, |
| "loss": 5.3002, |
| "lr": 0.0008966433566433566, |
| "step": 1240, |
| "tokens_trained": 0.117856504 |
| }, |
| { |
| "epoch": 0.3523404255319149, |
| "grad_norm": 1.8281760215759277, |
| "loss": 5.3311, |
| "lr": 0.0008963636363636363, |
| "step": 1242, |
| "tokens_trained": 0.11804676 |
| }, |
| { |
| "epoch": 0.3529078014184397, |
| "grad_norm": 1.7666652202606201, |
| "loss": 5.3847, |
| "lr": 0.0008960839160839162, |
| "step": 1244, |
| "tokens_trained": 0.118235688 |
| }, |
| { |
| "epoch": 0.3534751773049645, |
| "grad_norm": 1.7723621129989624, |
| "loss": 5.3506, |
| "lr": 0.0008958041958041958, |
| "step": 1246, |
| "tokens_trained": 0.11842632 |
| }, |
| { |
| "epoch": 0.35404255319148936, |
| "grad_norm": 1.7779643535614014, |
| "loss": 5.3066, |
| "lr": 0.0008955244755244756, |
| "step": 1248, |
| "tokens_trained": 0.118616536 |
| }, |
| { |
| "epoch": 0.3546099290780142, |
| "grad_norm": 1.746245265007019, |
| "loss": 5.2993, |
| "lr": 0.0008952447552447552, |
| "step": 1250, |
| "tokens_trained": 0.118807672 |
| }, |
| { |
| "epoch": 0.3546099290780142, |
| "eval_loss": 5.34489107131958, |
| "eval_runtime": 21.0838, |
| "step": 1250, |
| "tokens_trained": 0.118807672 |
| }, |
| { |
| "epoch": 0.35517730496453903, |
| "grad_norm": 1.8439521789550781, |
| "loss": 5.3796, |
| "lr": 0.000894965034965035, |
| "step": 1252, |
| "tokens_trained": 0.118996672 |
| }, |
| { |
| "epoch": 0.3557446808510638, |
| "grad_norm": 1.7830157279968262, |
| "loss": 5.3435, |
| "lr": 0.0008946853146853147, |
| "step": 1254, |
| "tokens_trained": 0.119189544 |
| }, |
| { |
| "epoch": 0.35631205673758864, |
| "grad_norm": 1.6022379398345947, |
| "loss": 5.3772, |
| "lr": 0.0008944055944055944, |
| "step": 1256, |
| "tokens_trained": 0.119379312 |
| }, |
| { |
| "epoch": 0.3568794326241135, |
| "grad_norm": 1.6100343465805054, |
| "loss": 5.3411, |
| "lr": 0.0008941258741258741, |
| "step": 1258, |
| "tokens_trained": 0.119572072 |
| }, |
| { |
| "epoch": 0.3574468085106383, |
| "grad_norm": 1.7826210260391235, |
| "loss": 5.317, |
| "lr": 0.0008938461538461538, |
| "step": 1260, |
| "tokens_trained": 0.119761304 |
| }, |
| { |
| "epoch": 0.3580141843971631, |
| "grad_norm": 1.510432243347168, |
| "loss": 5.4018, |
| "lr": 0.0008935664335664337, |
| "step": 1262, |
| "tokens_trained": 0.11994984 |
| }, |
| { |
| "epoch": 0.35858156028368793, |
| "grad_norm": 1.7209227085113525, |
| "loss": 5.3651, |
| "lr": 0.0008932867132867133, |
| "step": 1264, |
| "tokens_trained": 0.120139368 |
| }, |
| { |
| "epoch": 0.35914893617021276, |
| "grad_norm": 1.7528654336929321, |
| "loss": 5.3329, |
| "lr": 0.000893006993006993, |
| "step": 1266, |
| "tokens_trained": 0.1203308 |
| }, |
| { |
| "epoch": 0.3597163120567376, |
| "grad_norm": 1.8427083492279053, |
| "loss": 5.3897, |
| "lr": 0.0008927272727272727, |
| "step": 1268, |
| "tokens_trained": 0.12052112 |
| }, |
| { |
| "epoch": 0.36028368794326243, |
| "grad_norm": 1.530527114868164, |
| "loss": 5.3407, |
| "lr": 0.0008924475524475525, |
| "step": 1270, |
| "tokens_trained": 0.120709456 |
| }, |
| { |
| "epoch": 0.3608510638297872, |
| "grad_norm": 1.5996145009994507, |
| "loss": 5.3697, |
| "lr": 0.0008921678321678322, |
| "step": 1272, |
| "tokens_trained": 0.12089976 |
| }, |
| { |
| "epoch": 0.36141843971631205, |
| "grad_norm": 1.5235425233840942, |
| "loss": 5.335, |
| "lr": 0.0008918881118881119, |
| "step": 1274, |
| "tokens_trained": 0.121089184 |
| }, |
| { |
| "epoch": 0.3619858156028369, |
| "grad_norm": 1.757206678390503, |
| "loss": 5.2983, |
| "lr": 0.0008916083916083916, |
| "step": 1276, |
| "tokens_trained": 0.1212798 |
| }, |
| { |
| "epoch": 0.3625531914893617, |
| "grad_norm": 1.5952467918395996, |
| "loss": 5.3593, |
| "lr": 0.0008913286713286713, |
| "step": 1278, |
| "tokens_trained": 0.121472816 |
| }, |
| { |
| "epoch": 0.36312056737588655, |
| "grad_norm": 1.6975666284561157, |
| "loss": 5.3867, |
| "lr": 0.0008910489510489512, |
| "step": 1280, |
| "tokens_trained": 0.121659944 |
| }, |
| { |
| "epoch": 0.36368794326241133, |
| "grad_norm": 1.8659151792526245, |
| "loss": 5.3032, |
| "lr": 0.0008907692307692308, |
| "step": 1282, |
| "tokens_trained": 0.121848552 |
| }, |
| { |
| "epoch": 0.36425531914893616, |
| "grad_norm": 1.8692409992218018, |
| "loss": 5.3643, |
| "lr": 0.0008904895104895105, |
| "step": 1284, |
| "tokens_trained": 0.12203916 |
| }, |
| { |
| "epoch": 0.364822695035461, |
| "grad_norm": 1.786490559577942, |
| "loss": 5.4001, |
| "lr": 0.0008902097902097902, |
| "step": 1286, |
| "tokens_trained": 0.122228464 |
| }, |
| { |
| "epoch": 0.36539007092198583, |
| "grad_norm": 1.6635786294937134, |
| "loss": 5.3158, |
| "lr": 0.00088993006993007, |
| "step": 1288, |
| "tokens_trained": 0.122419768 |
| }, |
| { |
| "epoch": 0.3659574468085106, |
| "grad_norm": 1.8413279056549072, |
| "loss": 5.315, |
| "lr": 0.0008896503496503497, |
| "step": 1290, |
| "tokens_trained": 0.122608512 |
| }, |
| { |
| "epoch": 0.36652482269503545, |
| "grad_norm": 1.802370548248291, |
| "loss": 5.3203, |
| "lr": 0.0008893706293706294, |
| "step": 1292, |
| "tokens_trained": 0.122795944 |
| }, |
| { |
| "epoch": 0.3670921985815603, |
| "grad_norm": 1.5968035459518433, |
| "loss": 5.3833, |
| "lr": 0.000889090909090909, |
| "step": 1294, |
| "tokens_trained": 0.1229842 |
| }, |
| { |
| "epoch": 0.3676595744680851, |
| "grad_norm": 1.8354761600494385, |
| "loss": 5.3365, |
| "lr": 0.0008888111888111888, |
| "step": 1296, |
| "tokens_trained": 0.123175336 |
| }, |
| { |
| "epoch": 0.36822695035460995, |
| "grad_norm": 1.925227403640747, |
| "loss": 5.3687, |
| "lr": 0.0008885314685314686, |
| "step": 1298, |
| "tokens_trained": 0.123366848 |
| }, |
| { |
| "epoch": 0.36879432624113473, |
| "grad_norm": 1.7477060556411743, |
| "loss": 5.4033, |
| "lr": 0.0008882517482517483, |
| "step": 1300, |
| "tokens_trained": 0.123556656 |
| }, |
| { |
| "epoch": 0.36936170212765956, |
| "grad_norm": 1.8925527334213257, |
| "loss": 5.2854, |
| "lr": 0.000887972027972028, |
| "step": 1302, |
| "tokens_trained": 0.12374612 |
| }, |
| { |
| "epoch": 0.3699290780141844, |
| "grad_norm": 1.8904681205749512, |
| "loss": 5.2903, |
| "lr": 0.0008876923076923077, |
| "step": 1304, |
| "tokens_trained": 0.123936192 |
| }, |
| { |
| "epoch": 0.37049645390070923, |
| "grad_norm": 1.9903556108474731, |
| "loss": 5.2994, |
| "lr": 0.0008874125874125875, |
| "step": 1306, |
| "tokens_trained": 0.124126112 |
| }, |
| { |
| "epoch": 0.37106382978723407, |
| "grad_norm": 2.014011859893799, |
| "loss": 5.353, |
| "lr": 0.0008871328671328671, |
| "step": 1308, |
| "tokens_trained": 0.124314592 |
| }, |
| { |
| "epoch": 0.37163120567375885, |
| "grad_norm": 1.9086287021636963, |
| "loss": 5.3924, |
| "lr": 0.0008868531468531469, |
| "step": 1310, |
| "tokens_trained": 0.124503496 |
| }, |
| { |
| "epoch": 0.3721985815602837, |
| "grad_norm": 1.8927134275436401, |
| "loss": 5.3098, |
| "lr": 0.0008865734265734265, |
| "step": 1312, |
| "tokens_trained": 0.124693296 |
| }, |
| { |
| "epoch": 0.3727659574468085, |
| "grad_norm": 1.850883960723877, |
| "loss": 5.356, |
| "lr": 0.0008862937062937063, |
| "step": 1314, |
| "tokens_trained": 0.124883528 |
| }, |
| { |
| "epoch": 0.37333333333333335, |
| "grad_norm": 1.813315510749817, |
| "loss": 5.3564, |
| "lr": 0.0008860139860139861, |
| "step": 1316, |
| "tokens_trained": 0.125072328 |
| }, |
| { |
| "epoch": 0.37390070921985813, |
| "grad_norm": 1.6776509284973145, |
| "loss": 5.3348, |
| "lr": 0.0008857342657342658, |
| "step": 1318, |
| "tokens_trained": 0.125263128 |
| }, |
| { |
| "epoch": 0.37446808510638296, |
| "grad_norm": 1.7775620222091675, |
| "loss": 5.298, |
| "lr": 0.0008854545454545455, |
| "step": 1320, |
| "tokens_trained": 0.125453944 |
| }, |
| { |
| "epoch": 0.3750354609929078, |
| "grad_norm": 1.6916086673736572, |
| "loss": 5.332, |
| "lr": 0.0008851748251748251, |
| "step": 1322, |
| "tokens_trained": 0.125644264 |
| }, |
| { |
| "epoch": 0.37560283687943263, |
| "grad_norm": 1.7182034254074097, |
| "loss": 5.3405, |
| "lr": 0.000884895104895105, |
| "step": 1324, |
| "tokens_trained": 0.125835256 |
| }, |
| { |
| "epoch": 0.37617021276595747, |
| "grad_norm": 1.690463662147522, |
| "loss": 5.355, |
| "lr": 0.0008846153846153846, |
| "step": 1326, |
| "tokens_trained": 0.126025952 |
| }, |
| { |
| "epoch": 0.37673758865248225, |
| "grad_norm": 1.7073352336883545, |
| "loss": 5.3304, |
| "lr": 0.0008843356643356644, |
| "step": 1328, |
| "tokens_trained": 0.126217456 |
| }, |
| { |
| "epoch": 0.3773049645390071, |
| "grad_norm": 1.6633049249649048, |
| "loss": 5.2724, |
| "lr": 0.000884055944055944, |
| "step": 1330, |
| "tokens_trained": 0.126407688 |
| }, |
| { |
| "epoch": 0.3778723404255319, |
| "grad_norm": 1.618843913078308, |
| "loss": 5.2952, |
| "lr": 0.0008837762237762238, |
| "step": 1332, |
| "tokens_trained": 0.126599504 |
| }, |
| { |
| "epoch": 0.37843971631205675, |
| "grad_norm": 1.7496757507324219, |
| "loss": 5.2846, |
| "lr": 0.0008834965034965036, |
| "step": 1334, |
| "tokens_trained": 0.126787648 |
| }, |
| { |
| "epoch": 0.3790070921985816, |
| "grad_norm": 1.7284750938415527, |
| "loss": 5.3229, |
| "lr": 0.0008832167832167832, |
| "step": 1336, |
| "tokens_trained": 0.126977568 |
| }, |
| { |
| "epoch": 0.37957446808510636, |
| "grad_norm": 1.55423903465271, |
| "loss": 5.3112, |
| "lr": 0.000882937062937063, |
| "step": 1338, |
| "tokens_trained": 0.12716944 |
| }, |
| { |
| "epoch": 0.3801418439716312, |
| "grad_norm": 1.5783073902130127, |
| "loss": 5.3002, |
| "lr": 0.0008826573426573426, |
| "step": 1340, |
| "tokens_trained": 0.127357296 |
| }, |
| { |
| "epoch": 0.38070921985815603, |
| "grad_norm": 1.6970964670181274, |
| "loss": 5.3003, |
| "lr": 0.0008823776223776225, |
| "step": 1342, |
| "tokens_trained": 0.127547112 |
| }, |
| { |
| "epoch": 0.38127659574468087, |
| "grad_norm": 1.8086830377578735, |
| "loss": 5.3018, |
| "lr": 0.0008820979020979021, |
| "step": 1344, |
| "tokens_trained": 0.12773616 |
| }, |
| { |
| "epoch": 0.38184397163120565, |
| "grad_norm": 1.6589199304580688, |
| "loss": 5.2903, |
| "lr": 0.0008818181818181819, |
| "step": 1346, |
| "tokens_trained": 0.127924704 |
| }, |
| { |
| "epoch": 0.3824113475177305, |
| "grad_norm": 1.6546344757080078, |
| "loss": 5.2639, |
| "lr": 0.0008815384615384615, |
| "step": 1348, |
| "tokens_trained": 0.128114848 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 1.6867282390594482, |
| "loss": 5.2713, |
| "lr": 0.0008812587412587412, |
| "step": 1350, |
| "tokens_trained": 0.12830572 |
| }, |
| { |
| "epoch": 0.38354609929078015, |
| "grad_norm": 1.6336158514022827, |
| "loss": 5.2688, |
| "lr": 0.0008809790209790211, |
| "step": 1352, |
| "tokens_trained": 0.128497336 |
| }, |
| { |
| "epoch": 0.384113475177305, |
| "grad_norm": 1.591659665107727, |
| "loss": 5.3073, |
| "lr": 0.0008806993006993007, |
| "step": 1354, |
| "tokens_trained": 0.128689088 |
| }, |
| { |
| "epoch": 0.38468085106382977, |
| "grad_norm": 1.6427522897720337, |
| "loss": 5.2649, |
| "lr": 0.0008804195804195805, |
| "step": 1356, |
| "tokens_trained": 0.128879208 |
| }, |
| { |
| "epoch": 0.3852482269503546, |
| "grad_norm": 1.693124771118164, |
| "loss": 5.334, |
| "lr": 0.0008801398601398601, |
| "step": 1358, |
| "tokens_trained": 0.129069376 |
| }, |
| { |
| "epoch": 0.38581560283687943, |
| "grad_norm": 1.6677430868148804, |
| "loss": 5.3091, |
| "lr": 0.00087986013986014, |
| "step": 1360, |
| "tokens_trained": 0.12925972 |
| }, |
| { |
| "epoch": 0.38638297872340427, |
| "grad_norm": 1.6829359531402588, |
| "loss": 5.3529, |
| "lr": 0.0008795804195804196, |
| "step": 1362, |
| "tokens_trained": 0.129449816 |
| }, |
| { |
| "epoch": 0.3869503546099291, |
| "grad_norm": 1.6984829902648926, |
| "loss": 5.2832, |
| "lr": 0.0008793006993006993, |
| "step": 1364, |
| "tokens_trained": 0.129638736 |
| }, |
| { |
| "epoch": 0.3875177304964539, |
| "grad_norm": 1.6351298093795776, |
| "loss": 5.3654, |
| "lr": 0.000879020979020979, |
| "step": 1366, |
| "tokens_trained": 0.129831512 |
| }, |
| { |
| "epoch": 0.3880851063829787, |
| "grad_norm": 1.588394045829773, |
| "loss": 5.3203, |
| "lr": 0.0008787412587412587, |
| "step": 1368, |
| "tokens_trained": 0.130021424 |
| }, |
| { |
| "epoch": 0.38865248226950355, |
| "grad_norm": 1.7608240842819214, |
| "loss": 5.3387, |
| "lr": 0.0008784615384615386, |
| "step": 1370, |
| "tokens_trained": 0.130211848 |
| }, |
| { |
| "epoch": 0.3892198581560284, |
| "grad_norm": 1.7742120027542114, |
| "loss": 5.3054, |
| "lr": 0.0008781818181818182, |
| "step": 1372, |
| "tokens_trained": 0.130400256 |
| }, |
| { |
| "epoch": 0.38978723404255317, |
| "grad_norm": 1.8751057386398315, |
| "loss": 5.3569, |
| "lr": 0.000877902097902098, |
| "step": 1374, |
| "tokens_trained": 0.130591616 |
| }, |
| { |
| "epoch": 0.3900709219858156, |
| "eval_loss": 5.315512180328369, |
| "eval_runtime": 20.9232, |
| "step": 1375, |
| "tokens_trained": 0.130685128 |
| }, |
| { |
| "epoch": 0.390354609929078, |
| "grad_norm": 1.8666746616363525, |
| "loss": 5.3088, |
| "lr": 0.0008776223776223776, |
| "step": 1376, |
| "tokens_trained": 0.130781056 |
| }, |
| { |
| "epoch": 0.39092198581560283, |
| "grad_norm": 1.8694190979003906, |
| "loss": 5.2391, |
| "lr": 0.0008773426573426574, |
| "step": 1378, |
| "tokens_trained": 0.130971152 |
| }, |
| { |
| "epoch": 0.39148936170212767, |
| "grad_norm": 2.0663323402404785, |
| "loss": 5.3497, |
| "lr": 0.0008770629370629371, |
| "step": 1380, |
| "tokens_trained": 0.131163224 |
| }, |
| { |
| "epoch": 0.3920567375886525, |
| "grad_norm": 1.956207275390625, |
| "loss": 5.3227, |
| "lr": 0.0008767832167832168, |
| "step": 1382, |
| "tokens_trained": 0.131353832 |
| }, |
| { |
| "epoch": 0.3926241134751773, |
| "grad_norm": 1.6816498041152954, |
| "loss": 5.2626, |
| "lr": 0.0008765034965034965, |
| "step": 1384, |
| "tokens_trained": 0.13154472 |
| }, |
| { |
| "epoch": 0.3931914893617021, |
| "grad_norm": 1.655116319656372, |
| "loss": 5.3334, |
| "lr": 0.0008762237762237762, |
| "step": 1386, |
| "tokens_trained": 0.131732128 |
| }, |
| { |
| "epoch": 0.39375886524822695, |
| "grad_norm": 1.6439241170883179, |
| "loss": 5.3038, |
| "lr": 0.0008759440559440561, |
| "step": 1388, |
| "tokens_trained": 0.131920728 |
| }, |
| { |
| "epoch": 0.3943262411347518, |
| "grad_norm": 1.5000464916229248, |
| "loss": 5.2798, |
| "lr": 0.0008756643356643357, |
| "step": 1390, |
| "tokens_trained": 0.1321094 |
| }, |
| { |
| "epoch": 0.3948936170212766, |
| "grad_norm": 1.7129333019256592, |
| "loss": 5.2918, |
| "lr": 0.0008753846153846154, |
| "step": 1392, |
| "tokens_trained": 0.132299832 |
| }, |
| { |
| "epoch": 0.3954609929078014, |
| "grad_norm": 1.7489241361618042, |
| "loss": 5.3551, |
| "lr": 0.0008751048951048951, |
| "step": 1394, |
| "tokens_trained": 0.13249016 |
| }, |
| { |
| "epoch": 0.39602836879432624, |
| "grad_norm": 1.6597840785980225, |
| "loss": 5.3718, |
| "lr": 0.0008748251748251749, |
| "step": 1396, |
| "tokens_trained": 0.132680568 |
| }, |
| { |
| "epoch": 0.39659574468085107, |
| "grad_norm": 1.8800175189971924, |
| "loss": 5.3578, |
| "lr": 0.0008745454545454546, |
| "step": 1398, |
| "tokens_trained": 0.132871296 |
| }, |
| { |
| "epoch": 0.3971631205673759, |
| "grad_norm": 1.8190884590148926, |
| "loss": 5.2714, |
| "lr": 0.0008742657342657343, |
| "step": 1400, |
| "tokens_trained": 0.133062288 |
| }, |
| { |
| "epoch": 0.3977304964539007, |
| "grad_norm": 1.602634310722351, |
| "loss": 5.2914, |
| "lr": 0.0008739860139860139, |
| "step": 1402, |
| "tokens_trained": 0.133252584 |
| }, |
| { |
| "epoch": 0.3982978723404255, |
| "grad_norm": 1.7363992929458618, |
| "loss": 5.3154, |
| "lr": 0.0008737062937062937, |
| "step": 1404, |
| "tokens_trained": 0.133444784 |
| }, |
| { |
| "epoch": 0.39886524822695035, |
| "grad_norm": 1.7578014135360718, |
| "loss": 5.3735, |
| "lr": 0.0008734265734265734, |
| "step": 1406, |
| "tokens_trained": 0.133636288 |
| }, |
| { |
| "epoch": 0.3994326241134752, |
| "grad_norm": 1.8847187757492065, |
| "loss": 5.3118, |
| "lr": 0.0008731468531468532, |
| "step": 1408, |
| "tokens_trained": 0.133825824 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.750780701637268, |
| "loss": 5.3101, |
| "lr": 0.0008728671328671329, |
| "step": 1410, |
| "tokens_trained": 0.134016688 |
| }, |
| { |
| "epoch": 0.4005673758865248, |
| "grad_norm": 1.6785613298416138, |
| "loss": 5.2823, |
| "lr": 0.0008725874125874126, |
| "step": 1412, |
| "tokens_trained": 0.134208992 |
| }, |
| { |
| "epoch": 0.40113475177304964, |
| "grad_norm": 1.7419382333755493, |
| "loss": 5.2388, |
| "lr": 0.0008723076923076924, |
| "step": 1414, |
| "tokens_trained": 0.134398376 |
| }, |
| { |
| "epoch": 0.40170212765957447, |
| "grad_norm": 1.6936920881271362, |
| "loss": 5.2824, |
| "lr": 0.000872027972027972, |
| "step": 1416, |
| "tokens_trained": 0.134589264 |
| }, |
| { |
| "epoch": 0.4022695035460993, |
| "grad_norm": 1.7408183813095093, |
| "loss": 5.2692, |
| "lr": 0.0008717482517482518, |
| "step": 1418, |
| "tokens_trained": 0.134776568 |
| }, |
| { |
| "epoch": 0.40283687943262414, |
| "grad_norm": 1.7089916467666626, |
| "loss": 5.2309, |
| "lr": 0.0008714685314685314, |
| "step": 1420, |
| "tokens_trained": 0.134967616 |
| }, |
| { |
| "epoch": 0.4034042553191489, |
| "grad_norm": 1.6850922107696533, |
| "loss": 5.3656, |
| "lr": 0.0008711888111888112, |
| "step": 1422, |
| "tokens_trained": 0.135158272 |
| }, |
| { |
| "epoch": 0.40397163120567375, |
| "grad_norm": 1.546431064605713, |
| "loss": 5.3455, |
| "lr": 0.0008709090909090909, |
| "step": 1424, |
| "tokens_trained": 0.135349512 |
| }, |
| { |
| "epoch": 0.4045390070921986, |
| "grad_norm": 1.3656421899795532, |
| "loss": 5.2842, |
| "lr": 0.0008706293706293707, |
| "step": 1426, |
| "tokens_trained": 0.135538512 |
| }, |
| { |
| "epoch": 0.4051063829787234, |
| "grad_norm": 1.5918062925338745, |
| "loss": 5.3243, |
| "lr": 0.0008703496503496504, |
| "step": 1428, |
| "tokens_trained": 0.13572968 |
| }, |
| { |
| "epoch": 0.4056737588652482, |
| "grad_norm": 1.563009262084961, |
| "loss": 5.2539, |
| "lr": 0.00087006993006993, |
| "step": 1430, |
| "tokens_trained": 0.135919568 |
| }, |
| { |
| "epoch": 0.40624113475177304, |
| "grad_norm": 1.6144121885299683, |
| "loss": 5.2844, |
| "lr": 0.0008697902097902099, |
| "step": 1432, |
| "tokens_trained": 0.136109304 |
| }, |
| { |
| "epoch": 0.40680851063829787, |
| "grad_norm": 1.5911130905151367, |
| "loss": 5.3205, |
| "lr": 0.0008695104895104895, |
| "step": 1434, |
| "tokens_trained": 0.136296696 |
| }, |
| { |
| "epoch": 0.4073758865248227, |
| "grad_norm": 1.60932457447052, |
| "loss": 5.3783, |
| "lr": 0.0008692307692307693, |
| "step": 1436, |
| "tokens_trained": 0.136484912 |
| }, |
| { |
| "epoch": 0.40794326241134754, |
| "grad_norm": 1.559644341468811, |
| "loss": 5.2785, |
| "lr": 0.0008689510489510489, |
| "step": 1438, |
| "tokens_trained": 0.136675736 |
| }, |
| { |
| "epoch": 0.4085106382978723, |
| "grad_norm": 1.5167043209075928, |
| "loss": 5.3224, |
| "lr": 0.0008686713286713287, |
| "step": 1440, |
| "tokens_trained": 0.136864928 |
| }, |
| { |
| "epoch": 0.40907801418439715, |
| "grad_norm": 1.5843397378921509, |
| "loss": 5.3075, |
| "lr": 0.0008683916083916084, |
| "step": 1442, |
| "tokens_trained": 0.137056688 |
| }, |
| { |
| "epoch": 0.409645390070922, |
| "grad_norm": 1.581120491027832, |
| "loss": 5.2863, |
| "lr": 0.0008681118881118881, |
| "step": 1444, |
| "tokens_trained": 0.137244664 |
| }, |
| { |
| "epoch": 0.4102127659574468, |
| "grad_norm": 1.6355490684509277, |
| "loss": 5.348, |
| "lr": 0.0008678321678321679, |
| "step": 1446, |
| "tokens_trained": 0.13743372 |
| }, |
| { |
| "epoch": 0.41078014184397166, |
| "grad_norm": 1.5543185472488403, |
| "loss": 5.3268, |
| "lr": 0.0008675524475524475, |
| "step": 1448, |
| "tokens_trained": 0.13762696 |
| }, |
| { |
| "epoch": 0.41134751773049644, |
| "grad_norm": 1.5313750505447388, |
| "loss": 5.2784, |
| "lr": 0.0008672727272727273, |
| "step": 1450, |
| "tokens_trained": 0.137817376 |
| }, |
| { |
| "epoch": 0.41191489361702127, |
| "grad_norm": 1.7918111085891724, |
| "loss": 5.3063, |
| "lr": 0.000866993006993007, |
| "step": 1452, |
| "tokens_trained": 0.138007944 |
| }, |
| { |
| "epoch": 0.4124822695035461, |
| "grad_norm": 1.5105966329574585, |
| "loss": 5.2432, |
| "lr": 0.0008667132867132868, |
| "step": 1454, |
| "tokens_trained": 0.138199776 |
| }, |
| { |
| "epoch": 0.41304964539007094, |
| "grad_norm": 1.4441865682601929, |
| "loss": 5.269, |
| "lr": 0.0008664335664335664, |
| "step": 1456, |
| "tokens_trained": 0.13839124 |
| }, |
| { |
| "epoch": 0.4136170212765957, |
| "grad_norm": 1.473544955253601, |
| "loss": 5.2377, |
| "lr": 0.0008661538461538461, |
| "step": 1458, |
| "tokens_trained": 0.138580704 |
| }, |
| { |
| "epoch": 0.41418439716312055, |
| "grad_norm": 1.6085572242736816, |
| "loss": 5.245, |
| "lr": 0.0008658741258741259, |
| "step": 1460, |
| "tokens_trained": 0.138770176 |
| }, |
| { |
| "epoch": 0.4147517730496454, |
| "grad_norm": 1.609894871711731, |
| "loss": 5.3124, |
| "lr": 0.0008655944055944056, |
| "step": 1462, |
| "tokens_trained": 0.138961656 |
| }, |
| { |
| "epoch": 0.4153191489361702, |
| "grad_norm": 1.6923688650131226, |
| "loss": 5.3099, |
| "lr": 0.0008653146853146854, |
| "step": 1464, |
| "tokens_trained": 0.139151128 |
| }, |
| { |
| "epoch": 0.41588652482269506, |
| "grad_norm": 1.7480796575546265, |
| "loss": 5.2608, |
| "lr": 0.000865034965034965, |
| "step": 1466, |
| "tokens_trained": 0.139341168 |
| }, |
| { |
| "epoch": 0.41645390070921984, |
| "grad_norm": 1.725832223892212, |
| "loss": 5.2863, |
| "lr": 0.0008647552447552448, |
| "step": 1468, |
| "tokens_trained": 0.139530448 |
| }, |
| { |
| "epoch": 0.41702127659574467, |
| "grad_norm": 1.7886406183242798, |
| "loss": 5.231, |
| "lr": 0.0008644755244755245, |
| "step": 1470, |
| "tokens_trained": 0.13972244 |
| }, |
| { |
| "epoch": 0.4175886524822695, |
| "grad_norm": 1.803231954574585, |
| "loss": 5.2428, |
| "lr": 0.0008641958041958042, |
| "step": 1472, |
| "tokens_trained": 0.139913136 |
| }, |
| { |
| "epoch": 0.41815602836879434, |
| "grad_norm": 1.5347254276275635, |
| "loss": 5.2215, |
| "lr": 0.0008639160839160839, |
| "step": 1474, |
| "tokens_trained": 0.140104072 |
| }, |
| { |
| "epoch": 0.4187234042553192, |
| "grad_norm": 1.4485915899276733, |
| "loss": 5.2364, |
| "lr": 0.0008636363636363636, |
| "step": 1476, |
| "tokens_trained": 0.140294312 |
| }, |
| { |
| "epoch": 0.41929078014184396, |
| "grad_norm": 1.6130446195602417, |
| "loss": 5.3088, |
| "lr": 0.0008633566433566434, |
| "step": 1478, |
| "tokens_trained": 0.140482968 |
| }, |
| { |
| "epoch": 0.4198581560283688, |
| "grad_norm": 1.5839030742645264, |
| "loss": 5.3215, |
| "lr": 0.0008630769230769231, |
| "step": 1480, |
| "tokens_trained": 0.140674208 |
| }, |
| { |
| "epoch": 0.4204255319148936, |
| "grad_norm": 1.7519373893737793, |
| "loss": 5.3331, |
| "lr": 0.0008627972027972029, |
| "step": 1482, |
| "tokens_trained": 0.140864408 |
| }, |
| { |
| "epoch": 0.42099290780141846, |
| "grad_norm": 1.6718385219573975, |
| "loss": 5.231, |
| "lr": 0.0008625174825174825, |
| "step": 1484, |
| "tokens_trained": 0.141054696 |
| }, |
| { |
| "epoch": 0.42156028368794324, |
| "grad_norm": 1.5733797550201416, |
| "loss": 5.2621, |
| "lr": 0.0008622377622377622, |
| "step": 1486, |
| "tokens_trained": 0.141245712 |
| }, |
| { |
| "epoch": 0.4221276595744681, |
| "grad_norm": 1.549985647201538, |
| "loss": 5.2574, |
| "lr": 0.000861958041958042, |
| "step": 1488, |
| "tokens_trained": 0.141434232 |
| }, |
| { |
| "epoch": 0.4226950354609929, |
| "grad_norm": 1.651908278465271, |
| "loss": 5.2953, |
| "lr": 0.0008616783216783217, |
| "step": 1490, |
| "tokens_trained": 0.141623936 |
| }, |
| { |
| "epoch": 0.42326241134751774, |
| "grad_norm": 1.5680350065231323, |
| "loss": 5.288, |
| "lr": 0.0008613986013986014, |
| "step": 1492, |
| "tokens_trained": 0.141813904 |
| }, |
| { |
| "epoch": 0.4238297872340426, |
| "grad_norm": 1.5155646800994873, |
| "loss": 5.2529, |
| "lr": 0.0008611188811188811, |
| "step": 1494, |
| "tokens_trained": 0.14200372 |
| }, |
| { |
| "epoch": 0.42439716312056736, |
| "grad_norm": 1.5949562788009644, |
| "loss": 5.3064, |
| "lr": 0.0008608391608391609, |
| "step": 1496, |
| "tokens_trained": 0.142194496 |
| }, |
| { |
| "epoch": 0.4249645390070922, |
| "grad_norm": 1.6359357833862305, |
| "loss": 5.3452, |
| "lr": 0.0008605594405594406, |
| "step": 1498, |
| "tokens_trained": 0.142384592 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 1.648120403289795, |
| "loss": 5.3427, |
| "lr": 0.0008602797202797203, |
| "step": 1500, |
| "tokens_trained": 0.142573368 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "eval_loss": 5.282389163970947, |
| "eval_runtime": 20.5657, |
| "step": 1500, |
| "tokens_trained": 0.142573368 |
| }, |
| { |
| "epoch": 0.42609929078014186, |
| "grad_norm": 1.6313989162445068, |
| "loss": 5.2442, |
| "lr": 0.00086, |
| "step": 1502, |
| "tokens_trained": 0.142764584 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 1.447824239730835, |
| "loss": 5.2979, |
| "lr": 0.0008597202797202797, |
| "step": 1504, |
| "tokens_trained": 0.142953912 |
| }, |
| { |
| "epoch": 0.4272340425531915, |
| "grad_norm": 1.4285600185394287, |
| "loss": 5.317, |
| "lr": 0.0008594405594405595, |
| "step": 1506, |
| "tokens_trained": 0.143145944 |
| }, |
| { |
| "epoch": 0.4278014184397163, |
| "grad_norm": 1.4464077949523926, |
| "loss": 5.2746, |
| "lr": 0.0008591608391608392, |
| "step": 1508, |
| "tokens_trained": 0.1433374 |
| }, |
| { |
| "epoch": 0.42836879432624114, |
| "grad_norm": 1.3554625511169434, |
| "loss": 5.276, |
| "lr": 0.0008588811188811188, |
| "step": 1510, |
| "tokens_trained": 0.143529088 |
| }, |
| { |
| "epoch": 0.428936170212766, |
| "grad_norm": 1.4690148830413818, |
| "loss": 5.2976, |
| "lr": 0.0008586013986013986, |
| "step": 1512, |
| "tokens_trained": 0.1437192 |
| }, |
| { |
| "epoch": 0.42950354609929076, |
| "grad_norm": 1.4911222457885742, |
| "loss": 5.2727, |
| "lr": 0.0008583216783216783, |
| "step": 1514, |
| "tokens_trained": 0.143907728 |
| }, |
| { |
| "epoch": 0.4300709219858156, |
| "grad_norm": 1.5823880434036255, |
| "loss": 5.2481, |
| "lr": 0.0008580419580419581, |
| "step": 1516, |
| "tokens_trained": 0.144097048 |
| }, |
| { |
| "epoch": 0.4306382978723404, |
| "grad_norm": 1.486588716506958, |
| "loss": 5.2561, |
| "lr": 0.0008577622377622378, |
| "step": 1518, |
| "tokens_trained": 0.14428652 |
| }, |
| { |
| "epoch": 0.43120567375886526, |
| "grad_norm": 1.5762882232666016, |
| "loss": 5.267, |
| "lr": 0.0008574825174825175, |
| "step": 1520, |
| "tokens_trained": 0.144476848 |
| }, |
| { |
| "epoch": 0.4317730496453901, |
| "grad_norm": 1.6832828521728516, |
| "loss": 5.3329, |
| "lr": 0.0008572027972027972, |
| "step": 1522, |
| "tokens_trained": 0.144667568 |
| }, |
| { |
| "epoch": 0.4323404255319149, |
| "grad_norm": 1.7036137580871582, |
| "loss": 5.2326, |
| "lr": 0.000856923076923077, |
| "step": 1524, |
| "tokens_trained": 0.144860328 |
| }, |
| { |
| "epoch": 0.4329078014184397, |
| "grad_norm": 1.8102291822433472, |
| "loss": 5.251, |
| "lr": 0.0008566433566433567, |
| "step": 1526, |
| "tokens_trained": 0.1450528 |
| }, |
| { |
| "epoch": 0.43347517730496454, |
| "grad_norm": 1.667229413986206, |
| "loss": 5.2841, |
| "lr": 0.0008563636363636363, |
| "step": 1528, |
| "tokens_trained": 0.145240952 |
| }, |
| { |
| "epoch": 0.4340425531914894, |
| "grad_norm": 1.6709800958633423, |
| "loss": 5.2387, |
| "lr": 0.0008560839160839161, |
| "step": 1530, |
| "tokens_trained": 0.145431376 |
| }, |
| { |
| "epoch": 0.4346099290780142, |
| "grad_norm": 1.600885272026062, |
| "loss": 5.2179, |
| "lr": 0.0008558041958041958, |
| "step": 1532, |
| "tokens_trained": 0.145620184 |
| }, |
| { |
| "epoch": 0.435177304964539, |
| "grad_norm": 1.5783873796463013, |
| "loss": 5.2432, |
| "lr": 0.0008555244755244756, |
| "step": 1534, |
| "tokens_trained": 0.145810616 |
| }, |
| { |
| "epoch": 0.4357446808510638, |
| "grad_norm": 1.5059685707092285, |
| "loss": 5.2604, |
| "lr": 0.0008552447552447553, |
| "step": 1536, |
| "tokens_trained": 0.14600232 |
| }, |
| { |
| "epoch": 0.43631205673758866, |
| "grad_norm": 1.5880341529846191, |
| "loss": 5.249, |
| "lr": 0.000854965034965035, |
| "step": 1538, |
| "tokens_trained": 0.146192504 |
| }, |
| { |
| "epoch": 0.4368794326241135, |
| "grad_norm": 1.430004596710205, |
| "loss": 5.2668, |
| "lr": 0.0008546853146853147, |
| "step": 1540, |
| "tokens_trained": 0.146382264 |
| }, |
| { |
| "epoch": 0.4374468085106383, |
| "grad_norm": 1.4099256992340088, |
| "loss": 5.2839, |
| "lr": 0.0008544055944055944, |
| "step": 1542, |
| "tokens_trained": 0.146570432 |
| }, |
| { |
| "epoch": 0.4380141843971631, |
| "grad_norm": 1.3938827514648438, |
| "loss": 5.2534, |
| "lr": 0.0008541258741258742, |
| "step": 1544, |
| "tokens_trained": 0.146763736 |
| }, |
| { |
| "epoch": 0.43858156028368794, |
| "grad_norm": 1.4359923601150513, |
| "loss": 5.2202, |
| "lr": 0.0008538461538461538, |
| "step": 1546, |
| "tokens_trained": 0.146953944 |
| }, |
| { |
| "epoch": 0.4391489361702128, |
| "grad_norm": 1.5405043363571167, |
| "loss": 5.2613, |
| "lr": 0.0008535664335664336, |
| "step": 1548, |
| "tokens_trained": 0.147144664 |
| }, |
| { |
| "epoch": 0.4397163120567376, |
| "grad_norm": 1.6448051929473877, |
| "loss": 5.299, |
| "lr": 0.0008532867132867133, |
| "step": 1550, |
| "tokens_trained": 0.147335064 |
| }, |
| { |
| "epoch": 0.4402836879432624, |
| "grad_norm": 1.6528949737548828, |
| "loss": 5.3004, |
| "lr": 0.000853006993006993, |
| "step": 1552, |
| "tokens_trained": 0.147524088 |
| }, |
| { |
| "epoch": 0.4408510638297872, |
| "grad_norm": 1.637702226638794, |
| "loss": 5.2298, |
| "lr": 0.0008527272727272728, |
| "step": 1554, |
| "tokens_trained": 0.147716296 |
| }, |
| { |
| "epoch": 0.44141843971631206, |
| "grad_norm": 1.7230212688446045, |
| "loss": 5.2806, |
| "lr": 0.0008524475524475524, |
| "step": 1556, |
| "tokens_trained": 0.147905216 |
| }, |
| { |
| "epoch": 0.4419858156028369, |
| "grad_norm": 1.6216089725494385, |
| "loss": 5.3062, |
| "lr": 0.0008521678321678322, |
| "step": 1558, |
| "tokens_trained": 0.148092312 |
| }, |
| { |
| "epoch": 0.4425531914893617, |
| "grad_norm": 1.5734955072402954, |
| "loss": 5.2607, |
| "lr": 0.0008518881118881119, |
| "step": 1560, |
| "tokens_trained": 0.148282712 |
| }, |
| { |
| "epoch": 0.4431205673758865, |
| "grad_norm": 1.6687103509902954, |
| "loss": 5.2737, |
| "lr": 0.0008516083916083917, |
| "step": 1562, |
| "tokens_trained": 0.148474672 |
| }, |
| { |
| "epoch": 0.44368794326241134, |
| "grad_norm": 1.547277569770813, |
| "loss": 5.3183, |
| "lr": 0.0008513286713286713, |
| "step": 1564, |
| "tokens_trained": 0.148667824 |
| }, |
| { |
| "epoch": 0.4442553191489362, |
| "grad_norm": 1.3782074451446533, |
| "loss": 5.266, |
| "lr": 0.000851048951048951, |
| "step": 1566, |
| "tokens_trained": 0.14885704 |
| }, |
| { |
| "epoch": 0.444822695035461, |
| "grad_norm": 1.5648273229599, |
| "loss": 5.2954, |
| "lr": 0.0008507692307692308, |
| "step": 1568, |
| "tokens_trained": 0.14904804 |
| }, |
| { |
| "epoch": 0.4453900709219858, |
| "grad_norm": 1.5675908327102661, |
| "loss": 5.2897, |
| "lr": 0.0008504895104895105, |
| "step": 1570, |
| "tokens_trained": 0.149237048 |
| }, |
| { |
| "epoch": 0.4459574468085106, |
| "grad_norm": 1.5399287939071655, |
| "loss": 5.2993, |
| "lr": 0.0008502097902097903, |
| "step": 1572, |
| "tokens_trained": 0.149427328 |
| }, |
| { |
| "epoch": 0.44652482269503546, |
| "grad_norm": 1.7170253992080688, |
| "loss": 5.2756, |
| "lr": 0.0008499300699300699, |
| "step": 1574, |
| "tokens_trained": 0.149618448 |
| }, |
| { |
| "epoch": 0.4470921985815603, |
| "grad_norm": 1.5694142580032349, |
| "loss": 5.2294, |
| "lr": 0.0008496503496503497, |
| "step": 1576, |
| "tokens_trained": 0.149809416 |
| }, |
| { |
| "epoch": 0.44765957446808513, |
| "grad_norm": 1.5410487651824951, |
| "loss": 5.2392, |
| "lr": 0.0008493706293706294, |
| "step": 1578, |
| "tokens_trained": 0.149999608 |
| }, |
| { |
| "epoch": 0.4482269503546099, |
| "grad_norm": 1.5991896390914917, |
| "loss": 5.2569, |
| "lr": 0.0008490909090909091, |
| "step": 1580, |
| "tokens_trained": 0.150190224 |
| }, |
| { |
| "epoch": 0.44879432624113474, |
| "grad_norm": 1.5861775875091553, |
| "loss": 5.3151, |
| "lr": 0.0008488111888111888, |
| "step": 1582, |
| "tokens_trained": 0.150380592 |
| }, |
| { |
| "epoch": 0.4493617021276596, |
| "grad_norm": 1.530462622642517, |
| "loss": 5.3242, |
| "lr": 0.0008485314685314685, |
| "step": 1584, |
| "tokens_trained": 0.15056992 |
| }, |
| { |
| "epoch": 0.4499290780141844, |
| "grad_norm": 1.5658655166625977, |
| "loss": 5.2933, |
| "lr": 0.0008482517482517483, |
| "step": 1586, |
| "tokens_trained": 0.150760336 |
| }, |
| { |
| "epoch": 0.4504964539007092, |
| "grad_norm": 1.4187430143356323, |
| "loss": 5.2235, |
| "lr": 0.000847972027972028, |
| "step": 1588, |
| "tokens_trained": 0.150949088 |
| }, |
| { |
| "epoch": 0.451063829787234, |
| "grad_norm": 1.6921541690826416, |
| "loss": 5.2496, |
| "lr": 0.0008476923076923078, |
| "step": 1590, |
| "tokens_trained": 0.151140016 |
| }, |
| { |
| "epoch": 0.45163120567375886, |
| "grad_norm": 1.6049220561981201, |
| "loss": 5.2767, |
| "lr": 0.0008474125874125874, |
| "step": 1592, |
| "tokens_trained": 0.151330944 |
| }, |
| { |
| "epoch": 0.4521985815602837, |
| "grad_norm": 1.513168454170227, |
| "loss": 5.2904, |
| "lr": 0.0008471328671328671, |
| "step": 1594, |
| "tokens_trained": 0.151520152 |
| }, |
| { |
| "epoch": 0.45276595744680853, |
| "grad_norm": 1.5247087478637695, |
| "loss": 5.2391, |
| "lr": 0.0008468531468531469, |
| "step": 1596, |
| "tokens_trained": 0.151711592 |
| }, |
| { |
| "epoch": 0.4533333333333333, |
| "grad_norm": 1.5005898475646973, |
| "loss": 5.3025, |
| "lr": 0.0008465734265734266, |
| "step": 1598, |
| "tokens_trained": 0.151902736 |
| }, |
| { |
| "epoch": 0.45390070921985815, |
| "grad_norm": 1.3196156024932861, |
| "loss": 5.3025, |
| "lr": 0.0008462937062937063, |
| "step": 1600, |
| "tokens_trained": 0.152094032 |
| }, |
| { |
| "epoch": 0.454468085106383, |
| "grad_norm": 1.5037102699279785, |
| "loss": 5.2348, |
| "lr": 0.000846013986013986, |
| "step": 1602, |
| "tokens_trained": 0.15228336 |
| }, |
| { |
| "epoch": 0.4550354609929078, |
| "grad_norm": 1.404539942741394, |
| "loss": 5.2551, |
| "lr": 0.0008457342657342658, |
| "step": 1604, |
| "tokens_trained": 0.152474776 |
| }, |
| { |
| "epoch": 0.45560283687943265, |
| "grad_norm": 1.4784883260726929, |
| "loss": 5.2927, |
| "lr": 0.0008454545454545455, |
| "step": 1606, |
| "tokens_trained": 0.152663392 |
| }, |
| { |
| "epoch": 0.45617021276595743, |
| "grad_norm": 1.3743332624435425, |
| "loss": 5.2542, |
| "lr": 0.0008451748251748252, |
| "step": 1608, |
| "tokens_trained": 0.152852512 |
| }, |
| { |
| "epoch": 0.45673758865248226, |
| "grad_norm": 1.4161995649337769, |
| "loss": 5.2518, |
| "lr": 0.0008448951048951049, |
| "step": 1610, |
| "tokens_trained": 0.15304428 |
| }, |
| { |
| "epoch": 0.4573049645390071, |
| "grad_norm": 1.5045989751815796, |
| "loss": 5.2735, |
| "lr": 0.0008446153846153846, |
| "step": 1612, |
| "tokens_trained": 0.153234632 |
| }, |
| { |
| "epoch": 0.45787234042553193, |
| "grad_norm": 1.3695783615112305, |
| "loss": 5.2294, |
| "lr": 0.0008443356643356644, |
| "step": 1614, |
| "tokens_trained": 0.1534248 |
| }, |
| { |
| "epoch": 0.4584397163120567, |
| "grad_norm": 1.4551646709442139, |
| "loss": 5.2639, |
| "lr": 0.0008440559440559441, |
| "step": 1616, |
| "tokens_trained": 0.153614944 |
| }, |
| { |
| "epoch": 0.45900709219858155, |
| "grad_norm": 1.5018376111984253, |
| "loss": 5.2989, |
| "lr": 0.0008437762237762238, |
| "step": 1618, |
| "tokens_trained": 0.153803784 |
| }, |
| { |
| "epoch": 0.4595744680851064, |
| "grad_norm": 1.5295960903167725, |
| "loss": 5.33, |
| "lr": 0.0008434965034965035, |
| "step": 1620, |
| "tokens_trained": 0.153993752 |
| }, |
| { |
| "epoch": 0.4601418439716312, |
| "grad_norm": 1.417626142501831, |
| "loss": 5.2134, |
| "lr": 0.0008432167832167832, |
| "step": 1622, |
| "tokens_trained": 0.154184448 |
| }, |
| { |
| "epoch": 0.46070921985815605, |
| "grad_norm": 1.5715348720550537, |
| "loss": 5.2782, |
| "lr": 0.000842937062937063, |
| "step": 1624, |
| "tokens_trained": 0.154373632 |
| }, |
| { |
| "epoch": 0.46099290780141844, |
| "eval_loss": 5.266384601593018, |
| "eval_runtime": 21.0916, |
| "step": 1625, |
| "tokens_trained": 0.154468808 |
| }, |
| { |
| "epoch": 0.46127659574468083, |
| "grad_norm": 1.5504534244537354, |
| "loss": 5.2307, |
| "lr": 0.0008426573426573427, |
| "step": 1626, |
| "tokens_trained": 0.154564864 |
| }, |
| { |
| "epoch": 0.46184397163120566, |
| "grad_norm": 1.483108401298523, |
| "loss": 5.2578, |
| "lr": 0.0008423776223776224, |
| "step": 1628, |
| "tokens_trained": 0.154755312 |
| }, |
| { |
| "epoch": 0.4624113475177305, |
| "grad_norm": 1.5631264448165894, |
| "loss": 5.3291, |
| "lr": 0.0008420979020979021, |
| "step": 1630, |
| "tokens_trained": 0.154943736 |
| }, |
| { |
| "epoch": 0.46297872340425533, |
| "grad_norm": 1.4680705070495605, |
| "loss": 5.2256, |
| "lr": 0.0008418181818181819, |
| "step": 1632, |
| "tokens_trained": 0.15513452 |
| }, |
| { |
| "epoch": 0.46354609929078017, |
| "grad_norm": 1.468338966369629, |
| "loss": 5.2712, |
| "lr": 0.0008415384615384616, |
| "step": 1634, |
| "tokens_trained": 0.155325288 |
| }, |
| { |
| "epoch": 0.46411347517730495, |
| "grad_norm": 1.4557780027389526, |
| "loss": 5.2808, |
| "lr": 0.0008412587412587412, |
| "step": 1636, |
| "tokens_trained": 0.155515328 |
| }, |
| { |
| "epoch": 0.4646808510638298, |
| "grad_norm": 1.4534999132156372, |
| "loss": 5.2707, |
| "lr": 0.000840979020979021, |
| "step": 1638, |
| "tokens_trained": 0.155706752 |
| }, |
| { |
| "epoch": 0.4652482269503546, |
| "grad_norm": 1.4011393785476685, |
| "loss": 5.3028, |
| "lr": 0.0008406993006993006, |
| "step": 1640, |
| "tokens_trained": 0.155895336 |
| }, |
| { |
| "epoch": 0.46581560283687945, |
| "grad_norm": 1.307922601699829, |
| "loss": 5.2188, |
| "lr": 0.0008404195804195805, |
| "step": 1642, |
| "tokens_trained": 0.156085936 |
| }, |
| { |
| "epoch": 0.46638297872340423, |
| "grad_norm": 1.359922170639038, |
| "loss": 5.2863, |
| "lr": 0.0008401398601398602, |
| "step": 1644, |
| "tokens_trained": 0.15627636 |
| }, |
| { |
| "epoch": 0.46695035460992906, |
| "grad_norm": 1.6204577684402466, |
| "loss": 5.2877, |
| "lr": 0.0008398601398601399, |
| "step": 1646, |
| "tokens_trained": 0.156465192 |
| }, |
| { |
| "epoch": 0.4675177304964539, |
| "grad_norm": 1.7367322444915771, |
| "loss": 5.2501, |
| "lr": 0.0008395804195804196, |
| "step": 1648, |
| "tokens_trained": 0.15665336 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 1.7013088464736938, |
| "loss": 5.2522, |
| "lr": 0.0008393006993006993, |
| "step": 1650, |
| "tokens_trained": 0.156843128 |
| }, |
| { |
| "epoch": 0.46865248226950357, |
| "grad_norm": 1.6429578065872192, |
| "loss": 5.2978, |
| "lr": 0.0008390209790209791, |
| "step": 1652, |
| "tokens_trained": 0.157034328 |
| }, |
| { |
| "epoch": 0.46921985815602835, |
| "grad_norm": 1.527243733406067, |
| "loss": 5.2384, |
| "lr": 0.0008387412587412587, |
| "step": 1654, |
| "tokens_trained": 0.157222784 |
| }, |
| { |
| "epoch": 0.4697872340425532, |
| "grad_norm": 1.4792861938476562, |
| "loss": 5.2149, |
| "lr": 0.0008384615384615385, |
| "step": 1656, |
| "tokens_trained": 0.15741308 |
| }, |
| { |
| "epoch": 0.470354609929078, |
| "grad_norm": 1.4050098657608032, |
| "loss": 5.229, |
| "lr": 0.0008381818181818181, |
| "step": 1658, |
| "tokens_trained": 0.157603872 |
| }, |
| { |
| "epoch": 0.47092198581560285, |
| "grad_norm": 1.4799182415008545, |
| "loss": 5.2235, |
| "lr": 0.000837902097902098, |
| "step": 1660, |
| "tokens_trained": 0.157793352 |
| }, |
| { |
| "epoch": 0.4714893617021277, |
| "grad_norm": 1.4031378030776978, |
| "loss": 5.23, |
| "lr": 0.0008376223776223776, |
| "step": 1662, |
| "tokens_trained": 0.157984416 |
| }, |
| { |
| "epoch": 0.47205673758865246, |
| "grad_norm": 1.5775604248046875, |
| "loss": 5.2811, |
| "lr": 0.0008373426573426573, |
| "step": 1664, |
| "tokens_trained": 0.158176048 |
| }, |
| { |
| "epoch": 0.4726241134751773, |
| "grad_norm": 1.4855432510375977, |
| "loss": 5.2363, |
| "lr": 0.0008370629370629371, |
| "step": 1666, |
| "tokens_trained": 0.158368152 |
| }, |
| { |
| "epoch": 0.47319148936170213, |
| "grad_norm": 1.5609453916549683, |
| "loss": 5.2984, |
| "lr": 0.0008367832167832168, |
| "step": 1668, |
| "tokens_trained": 0.15855684 |
| }, |
| { |
| "epoch": 0.47375886524822697, |
| "grad_norm": 1.5052629709243774, |
| "loss": 5.213, |
| "lr": 0.0008365034965034966, |
| "step": 1670, |
| "tokens_trained": 0.15874712 |
| }, |
| { |
| "epoch": 0.47432624113475175, |
| "grad_norm": 1.5655242204666138, |
| "loss": 5.2551, |
| "lr": 0.0008362237762237762, |
| "step": 1672, |
| "tokens_trained": 0.158937104 |
| }, |
| { |
| "epoch": 0.4748936170212766, |
| "grad_norm": 1.301142930984497, |
| "loss": 5.1564, |
| "lr": 0.000835944055944056, |
| "step": 1674, |
| "tokens_trained": 0.159128096 |
| }, |
| { |
| "epoch": 0.4754609929078014, |
| "grad_norm": 1.5447527170181274, |
| "loss": 5.2547, |
| "lr": 0.0008356643356643356, |
| "step": 1676, |
| "tokens_trained": 0.159318968 |
| }, |
| { |
| "epoch": 0.47602836879432625, |
| "grad_norm": 1.638100266456604, |
| "loss": 5.2301, |
| "lr": 0.0008353846153846154, |
| "step": 1678, |
| "tokens_trained": 0.159508648 |
| }, |
| { |
| "epoch": 0.4765957446808511, |
| "grad_norm": 1.6203068494796753, |
| "loss": 5.2644, |
| "lr": 0.0008351048951048951, |
| "step": 1680, |
| "tokens_trained": 0.159698648 |
| }, |
| { |
| "epoch": 0.47716312056737586, |
| "grad_norm": 1.4097110033035278, |
| "loss": 5.2047, |
| "lr": 0.0008348251748251748, |
| "step": 1682, |
| "tokens_trained": 0.159887392 |
| }, |
| { |
| "epoch": 0.4777304964539007, |
| "grad_norm": 1.3377385139465332, |
| "loss": 5.2685, |
| "lr": 0.0008345454545454546, |
| "step": 1684, |
| "tokens_trained": 0.160076904 |
| }, |
| { |
| "epoch": 0.47829787234042553, |
| "grad_norm": 1.4079371690750122, |
| "loss": 5.2842, |
| "lr": 0.0008342657342657343, |
| "step": 1686, |
| "tokens_trained": 0.160266712 |
| }, |
| { |
| "epoch": 0.47886524822695037, |
| "grad_norm": 1.6039987802505493, |
| "loss": 5.2248, |
| "lr": 0.0008339860139860141, |
| "step": 1688, |
| "tokens_trained": 0.160455464 |
| }, |
| { |
| "epoch": 0.4794326241134752, |
| "grad_norm": 1.639218807220459, |
| "loss": 5.2007, |
| "lr": 0.0008337062937062937, |
| "step": 1690, |
| "tokens_trained": 0.16064472 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.8226710557937622, |
| "loss": 5.2427, |
| "lr": 0.0008334265734265734, |
| "step": 1692, |
| "tokens_trained": 0.160835192 |
| }, |
| { |
| "epoch": 0.4805673758865248, |
| "grad_norm": 1.6480419635772705, |
| "loss": 5.1944, |
| "lr": 0.0008331468531468531, |
| "step": 1694, |
| "tokens_trained": 0.161025272 |
| }, |
| { |
| "epoch": 0.48113475177304965, |
| "grad_norm": 1.666717290878296, |
| "loss": 5.2879, |
| "lr": 0.0008328671328671329, |
| "step": 1696, |
| "tokens_trained": 0.161214016 |
| }, |
| { |
| "epoch": 0.4817021276595745, |
| "grad_norm": 1.5092660188674927, |
| "loss": 5.2612, |
| "lr": 0.0008325874125874126, |
| "step": 1698, |
| "tokens_trained": 0.161405448 |
| }, |
| { |
| "epoch": 0.48226950354609927, |
| "grad_norm": 1.4042121171951294, |
| "loss": 5.2373, |
| "lr": 0.0008323076923076923, |
| "step": 1700, |
| "tokens_trained": 0.161595896 |
| }, |
| { |
| "epoch": 0.4828368794326241, |
| "grad_norm": 1.4937382936477661, |
| "loss": 5.2172, |
| "lr": 0.000832027972027972, |
| "step": 1702, |
| "tokens_trained": 0.161783904 |
| }, |
| { |
| "epoch": 0.48340425531914893, |
| "grad_norm": 1.4652959108352661, |
| "loss": 5.2704, |
| "lr": 0.0008317482517482518, |
| "step": 1704, |
| "tokens_trained": 0.161975888 |
| }, |
| { |
| "epoch": 0.48397163120567377, |
| "grad_norm": 1.3021745681762695, |
| "loss": 5.2672, |
| "lr": 0.0008314685314685315, |
| "step": 1706, |
| "tokens_trained": 0.162165808 |
| }, |
| { |
| "epoch": 0.4845390070921986, |
| "grad_norm": 1.3580701351165771, |
| "loss": 5.2467, |
| "lr": 0.0008311888111888112, |
| "step": 1708, |
| "tokens_trained": 0.162355152 |
| }, |
| { |
| "epoch": 0.4851063829787234, |
| "grad_norm": 1.480072259902954, |
| "loss": 5.2797, |
| "lr": 0.0008309090909090909, |
| "step": 1710, |
| "tokens_trained": 0.162544744 |
| }, |
| { |
| "epoch": 0.4856737588652482, |
| "grad_norm": 1.3532829284667969, |
| "loss": 5.2556, |
| "lr": 0.0008306293706293706, |
| "step": 1712, |
| "tokens_trained": 0.162734976 |
| }, |
| { |
| "epoch": 0.48624113475177305, |
| "grad_norm": 1.240332007408142, |
| "loss": 5.2153, |
| "lr": 0.0008303496503496504, |
| "step": 1714, |
| "tokens_trained": 0.162924992 |
| }, |
| { |
| "epoch": 0.4868085106382979, |
| "grad_norm": 1.4141086339950562, |
| "loss": 5.2056, |
| "lr": 0.00083006993006993, |
| "step": 1716, |
| "tokens_trained": 0.163114008 |
| }, |
| { |
| "epoch": 0.4873758865248227, |
| "grad_norm": 1.321721076965332, |
| "loss": 5.2223, |
| "lr": 0.0008297902097902098, |
| "step": 1718, |
| "tokens_trained": 0.163304416 |
| }, |
| { |
| "epoch": 0.4879432624113475, |
| "grad_norm": 1.5437248945236206, |
| "loss": 5.2727, |
| "lr": 0.0008295104895104895, |
| "step": 1720, |
| "tokens_trained": 0.163493816 |
| }, |
| { |
| "epoch": 0.48851063829787233, |
| "grad_norm": 1.7218859195709229, |
| "loss": 5.2323, |
| "lr": 0.0008292307692307693, |
| "step": 1722, |
| "tokens_trained": 0.163683984 |
| }, |
| { |
| "epoch": 0.48907801418439717, |
| "grad_norm": 1.5534045696258545, |
| "loss": 5.1983, |
| "lr": 0.000828951048951049, |
| "step": 1724, |
| "tokens_trained": 0.163874968 |
| }, |
| { |
| "epoch": 0.489645390070922, |
| "grad_norm": 1.3675404787063599, |
| "loss": 5.2086, |
| "lr": 0.0008286713286713287, |
| "step": 1726, |
| "tokens_trained": 0.164065152 |
| }, |
| { |
| "epoch": 0.4902127659574468, |
| "grad_norm": 1.5178970098495483, |
| "loss": 5.2529, |
| "lr": 0.0008283916083916084, |
| "step": 1728, |
| "tokens_trained": 0.164255952 |
| }, |
| { |
| "epoch": 0.4907801418439716, |
| "grad_norm": 1.4910545349121094, |
| "loss": 5.2931, |
| "lr": 0.000828111888111888, |
| "step": 1730, |
| "tokens_trained": 0.164447112 |
| }, |
| { |
| "epoch": 0.49134751773049645, |
| "grad_norm": 1.5647637844085693, |
| "loss": 5.2603, |
| "lr": 0.0008278321678321679, |
| "step": 1732, |
| "tokens_trained": 0.16463704 |
| }, |
| { |
| "epoch": 0.4919148936170213, |
| "grad_norm": 1.4607906341552734, |
| "loss": 5.2702, |
| "lr": 0.0008275524475524475, |
| "step": 1734, |
| "tokens_trained": 0.164827312 |
| }, |
| { |
| "epoch": 0.4924822695035461, |
| "grad_norm": 1.5806026458740234, |
| "loss": 5.2356, |
| "lr": 0.0008272727272727273, |
| "step": 1736, |
| "tokens_trained": 0.165015224 |
| }, |
| { |
| "epoch": 0.4930496453900709, |
| "grad_norm": 1.5417263507843018, |
| "loss": 5.262, |
| "lr": 0.000826993006993007, |
| "step": 1738, |
| "tokens_trained": 0.16520484 |
| }, |
| { |
| "epoch": 0.49361702127659574, |
| "grad_norm": 1.511680245399475, |
| "loss": 5.2634, |
| "lr": 0.0008267132867132868, |
| "step": 1740, |
| "tokens_trained": 0.165393064 |
| }, |
| { |
| "epoch": 0.49418439716312057, |
| "grad_norm": 1.4468717575073242, |
| "loss": 5.2452, |
| "lr": 0.0008264335664335665, |
| "step": 1742, |
| "tokens_trained": 0.165584472 |
| }, |
| { |
| "epoch": 0.4947517730496454, |
| "grad_norm": 1.423187017440796, |
| "loss": 5.2533, |
| "lr": 0.0008261538461538461, |
| "step": 1744, |
| "tokens_trained": 0.165773768 |
| }, |
| { |
| "epoch": 0.49531914893617024, |
| "grad_norm": 1.512462854385376, |
| "loss": 5.2152, |
| "lr": 0.0008258741258741259, |
| "step": 1746, |
| "tokens_trained": 0.165963456 |
| }, |
| { |
| "epoch": 0.495886524822695, |
| "grad_norm": 1.4620780944824219, |
| "loss": 5.2511, |
| "lr": 0.0008255944055944055, |
| "step": 1748, |
| "tokens_trained": 0.166152136 |
| }, |
| { |
| "epoch": 0.49645390070921985, |
| "grad_norm": 1.4943009614944458, |
| "loss": 5.2829, |
| "lr": 0.0008253146853146854, |
| "step": 1750, |
| "tokens_trained": 0.16634248 |
| }, |
| { |
| "epoch": 0.49645390070921985, |
| "eval_loss": 5.23966646194458, |
| "eval_runtime": 20.5954, |
| "step": 1750, |
| "tokens_trained": 0.16634248 |
| }, |
| { |
| "epoch": 0.4970212765957447, |
| "grad_norm": 1.6739267110824585, |
| "loss": 5.2306, |
| "lr": 0.000825034965034965, |
| "step": 1752, |
| "tokens_trained": 0.166532864 |
| }, |
| { |
| "epoch": 0.4975886524822695, |
| "grad_norm": 1.6125763654708862, |
| "loss": 5.2845, |
| "lr": 0.0008247552447552448, |
| "step": 1754, |
| "tokens_trained": 0.166722944 |
| }, |
| { |
| "epoch": 0.4981560283687943, |
| "grad_norm": 1.5872310400009155, |
| "loss": 5.2075, |
| "lr": 0.0008244755244755245, |
| "step": 1756, |
| "tokens_trained": 0.16691184 |
| }, |
| { |
| "epoch": 0.49872340425531914, |
| "grad_norm": 1.4396610260009766, |
| "loss": 5.2532, |
| "lr": 0.0008241958041958042, |
| "step": 1758, |
| "tokens_trained": 0.167101896 |
| }, |
| { |
| "epoch": 0.49929078014184397, |
| "grad_norm": 1.363879680633545, |
| "loss": 5.2252, |
| "lr": 0.000823916083916084, |
| "step": 1760, |
| "tokens_trained": 0.167289384 |
| }, |
| { |
| "epoch": 0.4998581560283688, |
| "grad_norm": 1.395561695098877, |
| "loss": 5.2097, |
| "lr": 0.0008236363636363636, |
| "step": 1762, |
| "tokens_trained": 0.167479424 |
| }, |
| { |
| "epoch": 0.5004255319148936, |
| "grad_norm": 1.413736343383789, |
| "loss": 5.2283, |
| "lr": 0.0008233566433566434, |
| "step": 1764, |
| "tokens_trained": 0.167668256 |
| }, |
| { |
| "epoch": 0.5009929078014185, |
| "grad_norm": 1.4240859746932983, |
| "loss": 5.2574, |
| "lr": 0.000823076923076923, |
| "step": 1766, |
| "tokens_trained": 0.167858616 |
| }, |
| { |
| "epoch": 0.5015602836879433, |
| "grad_norm": 1.437165379524231, |
| "loss": 5.2511, |
| "lr": 0.0008227972027972029, |
| "step": 1768, |
| "tokens_trained": 0.168048272 |
| }, |
| { |
| "epoch": 0.502127659574468, |
| "grad_norm": 1.458575963973999, |
| "loss": 5.2183, |
| "lr": 0.0008225174825174825, |
| "step": 1770, |
| "tokens_trained": 0.168240184 |
| }, |
| { |
| "epoch": 0.5026950354609929, |
| "grad_norm": 1.5224673748016357, |
| "loss": 5.259, |
| "lr": 0.0008222377622377622, |
| "step": 1772, |
| "tokens_trained": 0.168429536 |
| }, |
| { |
| "epoch": 0.5032624113475177, |
| "grad_norm": 1.578438401222229, |
| "loss": 5.2108, |
| "lr": 0.000821958041958042, |
| "step": 1774, |
| "tokens_trained": 0.168619312 |
| }, |
| { |
| "epoch": 0.5038297872340426, |
| "grad_norm": 1.4880632162094116, |
| "loss": 5.229, |
| "lr": 0.0008216783216783217, |
| "step": 1776, |
| "tokens_trained": 0.168808344 |
| }, |
| { |
| "epoch": 0.5043971631205674, |
| "grad_norm": 1.3741049766540527, |
| "loss": 5.2873, |
| "lr": 0.0008213986013986015, |
| "step": 1778, |
| "tokens_trained": 0.168999112 |
| }, |
| { |
| "epoch": 0.5049645390070922, |
| "grad_norm": 1.4396610260009766, |
| "loss": 5.3237, |
| "lr": 0.0008211188811188811, |
| "step": 1780, |
| "tokens_trained": 0.169189288 |
| }, |
| { |
| "epoch": 0.505531914893617, |
| "grad_norm": 1.4296880960464478, |
| "loss": 5.2228, |
| "lr": 0.0008208391608391609, |
| "step": 1782, |
| "tokens_trained": 0.16937864 |
| }, |
| { |
| "epoch": 0.5060992907801418, |
| "grad_norm": 1.5704258680343628, |
| "loss": 5.2569, |
| "lr": 0.0008205594405594405, |
| "step": 1784, |
| "tokens_trained": 0.169569024 |
| }, |
| { |
| "epoch": 0.5066666666666667, |
| "grad_norm": 1.458261489868164, |
| "loss": 5.1818, |
| "lr": 0.0008202797202797203, |
| "step": 1786, |
| "tokens_trained": 0.16975932 |
| }, |
| { |
| "epoch": 0.5072340425531915, |
| "grad_norm": 1.5307244062423706, |
| "loss": 5.2684, |
| "lr": 0.00082, |
| "step": 1788, |
| "tokens_trained": 0.169949064 |
| }, |
| { |
| "epoch": 0.5078014184397163, |
| "grad_norm": 1.3966363668441772, |
| "loss": 5.2125, |
| "lr": 0.0008197202797202797, |
| "step": 1790, |
| "tokens_trained": 0.170139352 |
| }, |
| { |
| "epoch": 0.5083687943262412, |
| "grad_norm": 1.4094839096069336, |
| "loss": 5.2518, |
| "lr": 0.0008194405594405595, |
| "step": 1792, |
| "tokens_trained": 0.170330336 |
| }, |
| { |
| "epoch": 0.5089361702127659, |
| "grad_norm": 1.266122817993164, |
| "loss": 5.2409, |
| "lr": 0.0008191608391608392, |
| "step": 1794, |
| "tokens_trained": 0.170521848 |
| }, |
| { |
| "epoch": 0.5095035460992908, |
| "grad_norm": 1.3079488277435303, |
| "loss": 5.182, |
| "lr": 0.000818881118881119, |
| "step": 1796, |
| "tokens_trained": 0.170710664 |
| }, |
| { |
| "epoch": 0.5100709219858156, |
| "grad_norm": 1.2961090803146362, |
| "loss": 5.2456, |
| "lr": 0.0008186013986013986, |
| "step": 1798, |
| "tokens_trained": 0.170900016 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 1.3402773141860962, |
| "loss": 5.1888, |
| "lr": 0.0008183216783216783, |
| "step": 1800, |
| "tokens_trained": 0.171089824 |
| }, |
| { |
| "epoch": 0.5112056737588653, |
| "grad_norm": 1.386769413948059, |
| "loss": 5.1715, |
| "lr": 0.000818041958041958, |
| "step": 1802, |
| "tokens_trained": 0.171279448 |
| }, |
| { |
| "epoch": 0.51177304964539, |
| "grad_norm": 1.4280421733856201, |
| "loss": 5.2131, |
| "lr": 0.0008177622377622378, |
| "step": 1804, |
| "tokens_trained": 0.17147048 |
| }, |
| { |
| "epoch": 0.512340425531915, |
| "grad_norm": 1.4805412292480469, |
| "loss": 5.2379, |
| "lr": 0.0008174825174825175, |
| "step": 1806, |
| "tokens_trained": 0.171662264 |
| }, |
| { |
| "epoch": 0.5129078014184397, |
| "grad_norm": 1.4608936309814453, |
| "loss": 5.2412, |
| "lr": 0.0008172027972027972, |
| "step": 1808, |
| "tokens_trained": 0.171853176 |
| }, |
| { |
| "epoch": 0.5134751773049645, |
| "grad_norm": 1.550136923789978, |
| "loss": 5.1828, |
| "lr": 0.000816923076923077, |
| "step": 1810, |
| "tokens_trained": 0.172043344 |
| }, |
| { |
| "epoch": 0.5140425531914894, |
| "grad_norm": 1.4756869077682495, |
| "loss": 5.199, |
| "lr": 0.0008166433566433567, |
| "step": 1812, |
| "tokens_trained": 0.172231952 |
| }, |
| { |
| "epoch": 0.5146099290780142, |
| "grad_norm": 1.4199044704437256, |
| "loss": 5.2074, |
| "lr": 0.0008163636363636364, |
| "step": 1814, |
| "tokens_trained": 0.172420376 |
| }, |
| { |
| "epoch": 0.5151773049645391, |
| "grad_norm": 1.3477959632873535, |
| "loss": 5.1672, |
| "lr": 0.0008160839160839161, |
| "step": 1816, |
| "tokens_trained": 0.172610248 |
| }, |
| { |
| "epoch": 0.5157446808510638, |
| "grad_norm": 1.3331218957901, |
| "loss": 5.2267, |
| "lr": 0.0008158041958041958, |
| "step": 1818, |
| "tokens_trained": 0.172799168 |
| }, |
| { |
| "epoch": 0.5163120567375886, |
| "grad_norm": 1.2391384840011597, |
| "loss": 5.2088, |
| "lr": 0.0008155244755244755, |
| "step": 1820, |
| "tokens_trained": 0.172989328 |
| }, |
| { |
| "epoch": 0.5168794326241135, |
| "grad_norm": 1.3377013206481934, |
| "loss": 5.2279, |
| "lr": 0.0008152447552447553, |
| "step": 1822, |
| "tokens_trained": 0.173179376 |
| }, |
| { |
| "epoch": 0.5174468085106383, |
| "grad_norm": 1.285628318786621, |
| "loss": 5.3006, |
| "lr": 0.000814965034965035, |
| "step": 1824, |
| "tokens_trained": 0.173370408 |
| }, |
| { |
| "epoch": 0.5180141843971631, |
| "grad_norm": 1.2010120153427124, |
| "loss": 5.2264, |
| "lr": 0.0008146853146853147, |
| "step": 1826, |
| "tokens_trained": 0.173561144 |
| }, |
| { |
| "epoch": 0.518581560283688, |
| "grad_norm": 1.2953096628189087, |
| "loss": 5.1879, |
| "lr": 0.0008144055944055944, |
| "step": 1828, |
| "tokens_trained": 0.173753592 |
| }, |
| { |
| "epoch": 0.5191489361702127, |
| "grad_norm": 1.256910800933838, |
| "loss": 5.2402, |
| "lr": 0.0008141258741258742, |
| "step": 1830, |
| "tokens_trained": 0.173943752 |
| }, |
| { |
| "epoch": 0.5197163120567376, |
| "grad_norm": 1.338755488395691, |
| "loss": 5.2556, |
| "lr": 0.0008138461538461539, |
| "step": 1832, |
| "tokens_trained": 0.174130504 |
| }, |
| { |
| "epoch": 0.5202836879432624, |
| "grad_norm": 1.380715012550354, |
| "loss": 5.2047, |
| "lr": 0.0008135664335664336, |
| "step": 1834, |
| "tokens_trained": 0.174322088 |
| }, |
| { |
| "epoch": 0.5208510638297872, |
| "grad_norm": 1.4989492893218994, |
| "loss": 5.1873, |
| "lr": 0.0008132867132867133, |
| "step": 1836, |
| "tokens_trained": 0.17451164 |
| }, |
| { |
| "epoch": 0.5214184397163121, |
| "grad_norm": 1.3239110708236694, |
| "loss": 5.202, |
| "lr": 0.000813006993006993, |
| "step": 1838, |
| "tokens_trained": 0.174701896 |
| }, |
| { |
| "epoch": 0.5219858156028369, |
| "grad_norm": 1.397745132446289, |
| "loss": 5.2259, |
| "lr": 0.0008127272727272728, |
| "step": 1840, |
| "tokens_trained": 0.174892336 |
| }, |
| { |
| "epoch": 0.5225531914893617, |
| "grad_norm": 1.3992305994033813, |
| "loss": 5.1771, |
| "lr": 0.0008124475524475524, |
| "step": 1842, |
| "tokens_trained": 0.17508276 |
| }, |
| { |
| "epoch": 0.5231205673758865, |
| "grad_norm": 1.38923180103302, |
| "loss": 5.1981, |
| "lr": 0.0008121678321678322, |
| "step": 1844, |
| "tokens_trained": 0.175273272 |
| }, |
| { |
| "epoch": 0.5236879432624113, |
| "grad_norm": 1.478642225265503, |
| "loss": 5.2533, |
| "lr": 0.0008118881118881119, |
| "step": 1846, |
| "tokens_trained": 0.175462352 |
| }, |
| { |
| "epoch": 0.5242553191489362, |
| "grad_norm": 1.332709789276123, |
| "loss": 5.2205, |
| "lr": 0.0008116083916083917, |
| "step": 1848, |
| "tokens_trained": 0.175648128 |
| }, |
| { |
| "epoch": 0.524822695035461, |
| "grad_norm": 1.4612590074539185, |
| "loss": 5.2207, |
| "lr": 0.0008113286713286714, |
| "step": 1850, |
| "tokens_trained": 0.175837712 |
| }, |
| { |
| "epoch": 0.5253900709219859, |
| "grad_norm": 1.4682700634002686, |
| "loss": 5.2576, |
| "lr": 0.000811048951048951, |
| "step": 1852, |
| "tokens_trained": 0.176029512 |
| }, |
| { |
| "epoch": 0.5259574468085106, |
| "grad_norm": 1.3380264043807983, |
| "loss": 5.2435, |
| "lr": 0.0008107692307692308, |
| "step": 1854, |
| "tokens_trained": 0.176220432 |
| }, |
| { |
| "epoch": 0.5265248226950354, |
| "grad_norm": 1.2452281713485718, |
| "loss": 5.2973, |
| "lr": 0.0008104895104895104, |
| "step": 1856, |
| "tokens_trained": 0.176412144 |
| }, |
| { |
| "epoch": 0.5270921985815603, |
| "grad_norm": 1.392592191696167, |
| "loss": 5.2028, |
| "lr": 0.0008102097902097903, |
| "step": 1858, |
| "tokens_trained": 0.17660144 |
| }, |
| { |
| "epoch": 0.5276595744680851, |
| "grad_norm": 1.4258657693862915, |
| "loss": 5.2342, |
| "lr": 0.0008099300699300699, |
| "step": 1860, |
| "tokens_trained": 0.176790424 |
| }, |
| { |
| "epoch": 0.52822695035461, |
| "grad_norm": 1.4627033472061157, |
| "loss": 5.1732, |
| "lr": 0.0008096503496503497, |
| "step": 1862, |
| "tokens_trained": 0.176983296 |
| }, |
| { |
| "epoch": 0.5287943262411348, |
| "grad_norm": 1.4448645114898682, |
| "loss": 5.2001, |
| "lr": 0.0008093706293706294, |
| "step": 1864, |
| "tokens_trained": 0.177174544 |
| }, |
| { |
| "epoch": 0.5293617021276595, |
| "grad_norm": 1.3879749774932861, |
| "loss": 5.1642, |
| "lr": 0.0008090909090909092, |
| "step": 1866, |
| "tokens_trained": 0.17736428 |
| }, |
| { |
| "epoch": 0.5299290780141844, |
| "grad_norm": 1.2791417837142944, |
| "loss": 5.1975, |
| "lr": 0.0008088111888111889, |
| "step": 1868, |
| "tokens_trained": 0.177553752 |
| }, |
| { |
| "epoch": 0.5304964539007092, |
| "grad_norm": 1.3620632886886597, |
| "loss": 5.1742, |
| "lr": 0.0008085314685314685, |
| "step": 1870, |
| "tokens_trained": 0.177746448 |
| }, |
| { |
| "epoch": 0.531063829787234, |
| "grad_norm": 1.2759565114974976, |
| "loss": 5.2076, |
| "lr": 0.0008082517482517483, |
| "step": 1872, |
| "tokens_trained": 0.177937888 |
| }, |
| { |
| "epoch": 0.5316312056737589, |
| "grad_norm": 1.3390915393829346, |
| "loss": 5.2387, |
| "lr": 0.0008079720279720279, |
| "step": 1874, |
| "tokens_trained": 0.178127776 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "eval_loss": 5.228371620178223, |
| "eval_runtime": 20.9372, |
| "step": 1875, |
| "tokens_trained": 0.17822376 |
| }, |
| { |
| "epoch": 0.5321985815602837, |
| "grad_norm": 1.3872885704040527, |
| "loss": 5.2053, |
| "lr": 0.0008076923076923078, |
| "step": 1876, |
| "tokens_trained": 0.178318616 |
| }, |
| { |
| "epoch": 0.5327659574468085, |
| "grad_norm": 1.4238568544387817, |
| "loss": 5.2091, |
| "lr": 0.0008074125874125874, |
| "step": 1878, |
| "tokens_trained": 0.178509272 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.3352588415145874, |
| "loss": 5.2471, |
| "lr": 0.0008071328671328671, |
| "step": 1880, |
| "tokens_trained": 0.178698016 |
| }, |
| { |
| "epoch": 0.5339007092198581, |
| "grad_norm": 1.2931993007659912, |
| "loss": 5.2315, |
| "lr": 0.0008068531468531469, |
| "step": 1882, |
| "tokens_trained": 0.17888628 |
| }, |
| { |
| "epoch": 0.534468085106383, |
| "grad_norm": 1.3475919961929321, |
| "loss": 5.2337, |
| "lr": 0.0008065734265734265, |
| "step": 1884, |
| "tokens_trained": 0.179076944 |
| }, |
| { |
| "epoch": 0.5350354609929078, |
| "grad_norm": 1.3263812065124512, |
| "loss": 5.2017, |
| "lr": 0.0008062937062937064, |
| "step": 1886, |
| "tokens_trained": 0.179266128 |
| }, |
| { |
| "epoch": 0.5356028368794327, |
| "grad_norm": 1.3956594467163086, |
| "loss": 5.1907, |
| "lr": 0.000806013986013986, |
| "step": 1888, |
| "tokens_trained": 0.179454848 |
| }, |
| { |
| "epoch": 0.5361702127659574, |
| "grad_norm": 1.4399393796920776, |
| "loss": 5.216, |
| "lr": 0.0008057342657342658, |
| "step": 1890, |
| "tokens_trained": 0.179643992 |
| }, |
| { |
| "epoch": 0.5367375886524822, |
| "grad_norm": 1.278714656829834, |
| "loss": 5.1689, |
| "lr": 0.0008054545454545454, |
| "step": 1892, |
| "tokens_trained": 0.179831416 |
| }, |
| { |
| "epoch": 0.5373049645390071, |
| "grad_norm": 1.3517796993255615, |
| "loss": 5.1319, |
| "lr": 0.0008051748251748253, |
| "step": 1894, |
| "tokens_trained": 0.180022528 |
| }, |
| { |
| "epoch": 0.5378723404255319, |
| "grad_norm": 1.2710460424423218, |
| "loss": 5.1619, |
| "lr": 0.0008048951048951049, |
| "step": 1896, |
| "tokens_trained": 0.180212936 |
| }, |
| { |
| "epoch": 0.5384397163120568, |
| "grad_norm": 1.3603075742721558, |
| "loss": 5.1615, |
| "lr": 0.0008046153846153846, |
| "step": 1898, |
| "tokens_trained": 0.180404648 |
| }, |
| { |
| "epoch": 0.5390070921985816, |
| "grad_norm": 1.422122836112976, |
| "loss": 5.1801, |
| "lr": 0.0008043356643356644, |
| "step": 1900, |
| "tokens_trained": 0.18059388 |
| }, |
| { |
| "epoch": 0.5395744680851063, |
| "grad_norm": 1.4242218732833862, |
| "loss": 5.2367, |
| "lr": 0.000804055944055944, |
| "step": 1902, |
| "tokens_trained": 0.180783248 |
| }, |
| { |
| "epoch": 0.5401418439716312, |
| "grad_norm": 1.4476134777069092, |
| "loss": 5.252, |
| "lr": 0.0008037762237762239, |
| "step": 1904, |
| "tokens_trained": 0.180971152 |
| }, |
| { |
| "epoch": 0.540709219858156, |
| "grad_norm": 1.4724863767623901, |
| "loss": 5.2042, |
| "lr": 0.0008034965034965035, |
| "step": 1906, |
| "tokens_trained": 0.181159992 |
| }, |
| { |
| "epoch": 0.5412765957446809, |
| "grad_norm": 1.4014806747436523, |
| "loss": 5.2514, |
| "lr": 0.0008032167832167832, |
| "step": 1908, |
| "tokens_trained": 0.18135032 |
| }, |
| { |
| "epoch": 0.5418439716312057, |
| "grad_norm": 1.3511682748794556, |
| "loss": 5.2036, |
| "lr": 0.0008029370629370629, |
| "step": 1910, |
| "tokens_trained": 0.181540312 |
| }, |
| { |
| "epoch": 0.5424113475177305, |
| "grad_norm": 1.3011739253997803, |
| "loss": 5.24, |
| "lr": 0.0008026573426573427, |
| "step": 1912, |
| "tokens_trained": 0.181731104 |
| }, |
| { |
| "epoch": 0.5429787234042553, |
| "grad_norm": 1.2753015756607056, |
| "loss": 5.25, |
| "lr": 0.0008023776223776224, |
| "step": 1914, |
| "tokens_trained": 0.18192008 |
| }, |
| { |
| "epoch": 0.5435460992907801, |
| "grad_norm": 1.4685192108154297, |
| "loss": 5.1619, |
| "lr": 0.0008020979020979021, |
| "step": 1916, |
| "tokens_trained": 0.182110072 |
| }, |
| { |
| "epoch": 0.544113475177305, |
| "grad_norm": 1.4695900678634644, |
| "loss": 5.2626, |
| "lr": 0.0008018181818181818, |
| "step": 1918, |
| "tokens_trained": 0.182300224 |
| }, |
| { |
| "epoch": 0.5446808510638298, |
| "grad_norm": 1.4895613193511963, |
| "loss": 5.1766, |
| "lr": 0.0008015384615384615, |
| "step": 1920, |
| "tokens_trained": 0.182490712 |
| }, |
| { |
| "epoch": 0.5452482269503546, |
| "grad_norm": 1.3073184490203857, |
| "loss": 5.2281, |
| "lr": 0.0008012587412587414, |
| "step": 1922, |
| "tokens_trained": 0.182681168 |
| }, |
| { |
| "epoch": 0.5458156028368795, |
| "grad_norm": 1.2414125204086304, |
| "loss": 5.2099, |
| "lr": 0.000800979020979021, |
| "step": 1924, |
| "tokens_trained": 0.182870504 |
| }, |
| { |
| "epoch": 0.5463829787234042, |
| "grad_norm": 1.2407176494598389, |
| "loss": 5.1116, |
| "lr": 0.0008006993006993007, |
| "step": 1926, |
| "tokens_trained": 0.1830618 |
| }, |
| { |
| "epoch": 0.546950354609929, |
| "grad_norm": 1.4507744312286377, |
| "loss": 5.1658, |
| "lr": 0.0008004195804195804, |
| "step": 1928, |
| "tokens_trained": 0.183250072 |
| }, |
| { |
| "epoch": 0.5475177304964539, |
| "grad_norm": 1.348907232284546, |
| "loss": 5.231, |
| "lr": 0.0008001398601398602, |
| "step": 1930, |
| "tokens_trained": 0.18344004 |
| }, |
| { |
| "epoch": 0.5480851063829787, |
| "grad_norm": 1.4393324851989746, |
| "loss": 5.2393, |
| "lr": 0.0007998601398601399, |
| "step": 1932, |
| "tokens_trained": 0.183630032 |
| }, |
| { |
| "epoch": 0.5486524822695036, |
| "grad_norm": 1.3569602966308594, |
| "loss": 5.2068, |
| "lr": 0.0007995804195804196, |
| "step": 1934, |
| "tokens_trained": 0.183820816 |
| }, |
| { |
| "epoch": 0.5492198581560284, |
| "grad_norm": 1.362021803855896, |
| "loss": 5.1641, |
| "lr": 0.0007993006993006992, |
| "step": 1936, |
| "tokens_trained": 0.184009824 |
| }, |
| { |
| "epoch": 0.5497872340425531, |
| "grad_norm": 1.2926445007324219, |
| "loss": 5.1983, |
| "lr": 0.000799020979020979, |
| "step": 1938, |
| "tokens_trained": 0.184199544 |
| }, |
| { |
| "epoch": 0.550354609929078, |
| "grad_norm": 1.3065440654754639, |
| "loss": 5.3009, |
| "lr": 0.0007987412587412588, |
| "step": 1940, |
| "tokens_trained": 0.1843906 |
| }, |
| { |
| "epoch": 0.5509219858156028, |
| "grad_norm": 1.3288060426712036, |
| "loss": 5.2347, |
| "lr": 0.0007984615384615385, |
| "step": 1942, |
| "tokens_trained": 0.184580304 |
| }, |
| { |
| "epoch": 0.5514893617021277, |
| "grad_norm": 1.4742496013641357, |
| "loss": 5.1497, |
| "lr": 0.0007981818181818182, |
| "step": 1944, |
| "tokens_trained": 0.184771832 |
| }, |
| { |
| "epoch": 0.5520567375886525, |
| "grad_norm": 1.3907397985458374, |
| "loss": 5.2001, |
| "lr": 0.0007979020979020979, |
| "step": 1946, |
| "tokens_trained": 0.184963744 |
| }, |
| { |
| "epoch": 0.5526241134751773, |
| "grad_norm": 1.3324332237243652, |
| "loss": 5.2056, |
| "lr": 0.0007976223776223777, |
| "step": 1948, |
| "tokens_trained": 0.185152248 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 1.258155345916748, |
| "loss": 5.1999, |
| "lr": 0.0007973426573426573, |
| "step": 1950, |
| "tokens_trained": 0.18534196 |
| }, |
| { |
| "epoch": 0.5537588652482269, |
| "grad_norm": 1.3515956401824951, |
| "loss": 5.1988, |
| "lr": 0.0007970629370629371, |
| "step": 1952, |
| "tokens_trained": 0.18553156 |
| }, |
| { |
| "epoch": 0.5543262411347518, |
| "grad_norm": 1.535507321357727, |
| "loss": 5.2198, |
| "lr": 0.0007967832167832167, |
| "step": 1954, |
| "tokens_trained": 0.185719792 |
| }, |
| { |
| "epoch": 0.5548936170212766, |
| "grad_norm": 1.3124226331710815, |
| "loss": 5.1468, |
| "lr": 0.0007965034965034965, |
| "step": 1956, |
| "tokens_trained": 0.18591288 |
| }, |
| { |
| "epoch": 0.5554609929078014, |
| "grad_norm": 1.2720654010772705, |
| "loss": 5.1939, |
| "lr": 0.0007962237762237763, |
| "step": 1958, |
| "tokens_trained": 0.186102344 |
| }, |
| { |
| "epoch": 0.5560283687943263, |
| "grad_norm": 1.2731753587722778, |
| "loss": 5.2063, |
| "lr": 0.000795944055944056, |
| "step": 1960, |
| "tokens_trained": 0.186291976 |
| }, |
| { |
| "epoch": 0.556595744680851, |
| "grad_norm": 1.3020576238632202, |
| "loss": 5.266, |
| "lr": 0.0007956643356643357, |
| "step": 1962, |
| "tokens_trained": 0.186483504 |
| }, |
| { |
| "epoch": 0.5571631205673759, |
| "grad_norm": 1.300626277923584, |
| "loss": 5.2159, |
| "lr": 0.0007953846153846153, |
| "step": 1964, |
| "tokens_trained": 0.18667372 |
| }, |
| { |
| "epoch": 0.5577304964539007, |
| "grad_norm": 1.3075426816940308, |
| "loss": 5.2136, |
| "lr": 0.0007951048951048952, |
| "step": 1966, |
| "tokens_trained": 0.186864808 |
| }, |
| { |
| "epoch": 0.5582978723404255, |
| "grad_norm": 1.4623394012451172, |
| "loss": 5.2081, |
| "lr": 0.0007948251748251748, |
| "step": 1968, |
| "tokens_trained": 0.187056272 |
| }, |
| { |
| "epoch": 0.5588652482269504, |
| "grad_norm": 1.4950625896453857, |
| "loss": 5.1885, |
| "lr": 0.0007945454545454546, |
| "step": 1970, |
| "tokens_trained": 0.187244464 |
| }, |
| { |
| "epoch": 0.5594326241134752, |
| "grad_norm": 1.517152190208435, |
| "loss": 5.2558, |
| "lr": 0.0007942657342657342, |
| "step": 1972, |
| "tokens_trained": 0.187433216 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.4226372241973877, |
| "loss": 5.236, |
| "lr": 0.000793986013986014, |
| "step": 1974, |
| "tokens_trained": 0.187622632 |
| }, |
| { |
| "epoch": 0.5605673758865248, |
| "grad_norm": 1.3692735433578491, |
| "loss": 5.2089, |
| "lr": 0.0007937062937062938, |
| "step": 1976, |
| "tokens_trained": 0.18781324 |
| }, |
| { |
| "epoch": 0.5611347517730496, |
| "grad_norm": 1.3344841003417969, |
| "loss": 5.2052, |
| "lr": 0.0007934265734265734, |
| "step": 1978, |
| "tokens_trained": 0.188002488 |
| }, |
| { |
| "epoch": 0.5617021276595745, |
| "grad_norm": 1.3929632902145386, |
| "loss": 5.2353, |
| "lr": 0.0007931468531468532, |
| "step": 1980, |
| "tokens_trained": 0.188194712 |
| }, |
| { |
| "epoch": 0.5622695035460993, |
| "grad_norm": 1.3147000074386597, |
| "loss": 5.2071, |
| "lr": 0.0007928671328671328, |
| "step": 1982, |
| "tokens_trained": 0.188387056 |
| }, |
| { |
| "epoch": 0.5628368794326241, |
| "grad_norm": 1.351483702659607, |
| "loss": 5.2196, |
| "lr": 0.0007925874125874127, |
| "step": 1984, |
| "tokens_trained": 0.188579048 |
| }, |
| { |
| "epoch": 0.563404255319149, |
| "grad_norm": 1.3840581178665161, |
| "loss": 5.1889, |
| "lr": 0.0007923076923076923, |
| "step": 1986, |
| "tokens_trained": 0.18876896 |
| }, |
| { |
| "epoch": 0.5639716312056737, |
| "grad_norm": 1.3427214622497559, |
| "loss": 5.192, |
| "lr": 0.000792027972027972, |
| "step": 1988, |
| "tokens_trained": 0.18895832 |
| }, |
| { |
| "epoch": 0.5645390070921986, |
| "grad_norm": 1.2931344509124756, |
| "loss": 5.1942, |
| "lr": 0.0007917482517482517, |
| "step": 1990, |
| "tokens_trained": 0.18915036 |
| }, |
| { |
| "epoch": 0.5651063829787234, |
| "grad_norm": 1.2408664226531982, |
| "loss": 5.2014, |
| "lr": 0.0007914685314685314, |
| "step": 1992, |
| "tokens_trained": 0.189339784 |
| }, |
| { |
| "epoch": 0.5656737588652482, |
| "grad_norm": 1.342760682106018, |
| "loss": 5.2056, |
| "lr": 0.0007911888111888113, |
| "step": 1994, |
| "tokens_trained": 0.189530776 |
| }, |
| { |
| "epoch": 0.5662411347517731, |
| "grad_norm": 1.2647815942764282, |
| "loss": 5.2338, |
| "lr": 0.0007909090909090909, |
| "step": 1996, |
| "tokens_trained": 0.189720312 |
| }, |
| { |
| "epoch": 0.5668085106382978, |
| "grad_norm": 1.1956689357757568, |
| "loss": 5.1464, |
| "lr": 0.0007906293706293707, |
| "step": 1998, |
| "tokens_trained": 0.189909592 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "grad_norm": 1.287185549736023, |
| "loss": 5.1919, |
| "lr": 0.0007903496503496503, |
| "step": 2000, |
| "tokens_trained": 0.190100544 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "eval_loss": 5.208409309387207, |
| "eval_runtime": 21.1643, |
| "step": 2000, |
| "tokens_trained": 0.190100544 |
| }, |
| { |
| "epoch": 0.5679432624113475, |
| "grad_norm": 1.3409695625305176, |
| "loss": 5.1723, |
| "lr": 0.0007900699300699302, |
| "step": 2002, |
| "tokens_trained": 0.190291792 |
| }, |
| { |
| "epoch": 0.5685106382978723, |
| "grad_norm": 1.3951654434204102, |
| "loss": 5.243, |
| "lr": 0.0007897902097902098, |
| "step": 2004, |
| "tokens_trained": 0.190481864 |
| }, |
| { |
| "epoch": 0.5690780141843972, |
| "grad_norm": 1.2949507236480713, |
| "loss": 5.2248, |
| "lr": 0.0007895104895104895, |
| "step": 2006, |
| "tokens_trained": 0.19067228 |
| }, |
| { |
| "epoch": 0.569645390070922, |
| "grad_norm": 1.3585959672927856, |
| "loss": 5.1889, |
| "lr": 0.0007892307692307692, |
| "step": 2008, |
| "tokens_trained": 0.190860368 |
| }, |
| { |
| "epoch": 0.5702127659574469, |
| "grad_norm": 1.2834774255752563, |
| "loss": 5.2067, |
| "lr": 0.0007889510489510489, |
| "step": 2010, |
| "tokens_trained": 0.191051904 |
| }, |
| { |
| "epoch": 0.5707801418439716, |
| "grad_norm": 1.3544108867645264, |
| "loss": 5.2041, |
| "lr": 0.0007886713286713288, |
| "step": 2012, |
| "tokens_trained": 0.191242688 |
| }, |
| { |
| "epoch": 0.5713475177304964, |
| "grad_norm": 1.3536330461502075, |
| "loss": 5.2131, |
| "lr": 0.0007883916083916084, |
| "step": 2014, |
| "tokens_trained": 0.191431104 |
| }, |
| { |
| "epoch": 0.5719148936170213, |
| "grad_norm": 1.337441325187683, |
| "loss": 5.2036, |
| "lr": 0.0007881118881118882, |
| "step": 2016, |
| "tokens_trained": 0.19162204 |
| }, |
| { |
| "epoch": 0.5724822695035461, |
| "grad_norm": 1.4701579809188843, |
| "loss": 5.2049, |
| "lr": 0.0007878321678321678, |
| "step": 2018, |
| "tokens_trained": 0.191813352 |
| }, |
| { |
| "epoch": 0.573049645390071, |
| "grad_norm": 1.4354153871536255, |
| "loss": 5.2583, |
| "lr": 0.0007875524475524476, |
| "step": 2020, |
| "tokens_trained": 0.192004064 |
| }, |
| { |
| "epoch": 0.5736170212765958, |
| "grad_norm": 1.358913540840149, |
| "loss": 5.1961, |
| "lr": 0.0007872727272727273, |
| "step": 2022, |
| "tokens_trained": 0.192193232 |
| }, |
| { |
| "epoch": 0.5741843971631205, |
| "grad_norm": 1.3889496326446533, |
| "loss": 5.1755, |
| "lr": 0.000786993006993007, |
| "step": 2024, |
| "tokens_trained": 0.192385416 |
| }, |
| { |
| "epoch": 0.5747517730496454, |
| "grad_norm": 1.4138504266738892, |
| "loss": 5.2423, |
| "lr": 0.0007867132867132867, |
| "step": 2026, |
| "tokens_trained": 0.192575904 |
| }, |
| { |
| "epoch": 0.5753191489361702, |
| "grad_norm": 1.2651748657226562, |
| "loss": 5.1574, |
| "lr": 0.0007864335664335664, |
| "step": 2028, |
| "tokens_trained": 0.192765568 |
| }, |
| { |
| "epoch": 0.5758865248226951, |
| "grad_norm": 1.304296612739563, |
| "loss": 5.1978, |
| "lr": 0.0007861538461538463, |
| "step": 2030, |
| "tokens_trained": 0.192956176 |
| }, |
| { |
| "epoch": 0.5764539007092199, |
| "grad_norm": 1.2884007692337036, |
| "loss": 5.1945, |
| "lr": 0.0007858741258741259, |
| "step": 2032, |
| "tokens_trained": 0.193146208 |
| }, |
| { |
| "epoch": 0.5770212765957446, |
| "grad_norm": 1.4838171005249023, |
| "loss": 5.1348, |
| "lr": 0.0007855944055944056, |
| "step": 2034, |
| "tokens_trained": 0.193335664 |
| }, |
| { |
| "epoch": 0.5775886524822695, |
| "grad_norm": 1.456529974937439, |
| "loss": 5.2284, |
| "lr": 0.0007853146853146853, |
| "step": 2036, |
| "tokens_trained": 0.193525216 |
| }, |
| { |
| "epoch": 0.5781560283687943, |
| "grad_norm": 1.3471657037734985, |
| "loss": 5.2101, |
| "lr": 0.0007850349650349651, |
| "step": 2038, |
| "tokens_trained": 0.19371268 |
| }, |
| { |
| "epoch": 0.5787234042553191, |
| "grad_norm": 1.3996837139129639, |
| "loss": 5.1828, |
| "lr": 0.0007847552447552448, |
| "step": 2040, |
| "tokens_trained": 0.193903536 |
| }, |
| { |
| "epoch": 0.579290780141844, |
| "grad_norm": 1.4071470499038696, |
| "loss": 5.1724, |
| "lr": 0.0007844755244755245, |
| "step": 2042, |
| "tokens_trained": 0.194092384 |
| }, |
| { |
| "epoch": 0.5798581560283688, |
| "grad_norm": 1.4125159978866577, |
| "loss": 5.1602, |
| "lr": 0.0007841958041958041, |
| "step": 2044, |
| "tokens_trained": 0.19428356 |
| }, |
| { |
| "epoch": 0.5804255319148937, |
| "grad_norm": 1.3602298498153687, |
| "loss": 5.1904, |
| "lr": 0.0007839160839160839, |
| "step": 2046, |
| "tokens_trained": 0.194473352 |
| }, |
| { |
| "epoch": 0.5809929078014184, |
| "grad_norm": 1.2836074829101562, |
| "loss": 5.1648, |
| "lr": 0.0007836363636363637, |
| "step": 2048, |
| "tokens_trained": 0.194663624 |
| }, |
| { |
| "epoch": 0.5815602836879432, |
| "grad_norm": 1.306192398071289, |
| "loss": 5.2037, |
| "lr": 0.0007833566433566434, |
| "step": 2050, |
| "tokens_trained": 0.194854 |
| }, |
| { |
| "epoch": 0.5821276595744681, |
| "grad_norm": 1.3130674362182617, |
| "loss": 5.223, |
| "lr": 0.0007830769230769231, |
| "step": 2052, |
| "tokens_trained": 0.195044368 |
| }, |
| { |
| "epoch": 0.5826950354609929, |
| "grad_norm": 1.2337714433670044, |
| "loss": 5.1609, |
| "lr": 0.0007827972027972028, |
| "step": 2054, |
| "tokens_trained": 0.195237064 |
| }, |
| { |
| "epoch": 0.5832624113475178, |
| "grad_norm": 1.2249869108200073, |
| "loss": 5.1352, |
| "lr": 0.0007825174825174826, |
| "step": 2056, |
| "tokens_trained": 0.195425016 |
| }, |
| { |
| "epoch": 0.5838297872340426, |
| "grad_norm": 1.2610726356506348, |
| "loss": 5.2304, |
| "lr": 0.0007822377622377622, |
| "step": 2058, |
| "tokens_trained": 0.195614488 |
| }, |
| { |
| "epoch": 0.5843971631205673, |
| "grad_norm": 1.1917920112609863, |
| "loss": 5.1964, |
| "lr": 0.000781958041958042, |
| "step": 2060, |
| "tokens_trained": 0.19580392 |
| }, |
| { |
| "epoch": 0.5849645390070922, |
| "grad_norm": 1.2248187065124512, |
| "loss": 5.0901, |
| "lr": 0.0007816783216783216, |
| "step": 2062, |
| "tokens_trained": 0.195993096 |
| }, |
| { |
| "epoch": 0.585531914893617, |
| "grad_norm": 1.4138745069503784, |
| "loss": 5.1806, |
| "lr": 0.0007813986013986014, |
| "step": 2064, |
| "tokens_trained": 0.196183824 |
| }, |
| { |
| "epoch": 0.5860992907801419, |
| "grad_norm": 1.389195442199707, |
| "loss": 5.1813, |
| "lr": 0.0007811188811188812, |
| "step": 2066, |
| "tokens_trained": 0.196373912 |
| }, |
| { |
| "epoch": 0.5866666666666667, |
| "grad_norm": 1.2737247943878174, |
| "loss": 5.1935, |
| "lr": 0.0007808391608391609, |
| "step": 2068, |
| "tokens_trained": 0.196564696 |
| }, |
| { |
| "epoch": 0.5872340425531914, |
| "grad_norm": 1.443703293800354, |
| "loss": 5.2376, |
| "lr": 0.0007805594405594406, |
| "step": 2070, |
| "tokens_trained": 0.196754472 |
| }, |
| { |
| "epoch": 0.5878014184397163, |
| "grad_norm": 1.367251992225647, |
| "loss": 5.2505, |
| "lr": 0.0007802797202797202, |
| "step": 2072, |
| "tokens_trained": 0.196945288 |
| }, |
| { |
| "epoch": 0.5883687943262411, |
| "grad_norm": 1.4049919843673706, |
| "loss": 5.2155, |
| "lr": 0.0007800000000000001, |
| "step": 2074, |
| "tokens_trained": 0.197135328 |
| }, |
| { |
| "epoch": 0.588936170212766, |
| "grad_norm": 1.5119894742965698, |
| "loss": 5.189, |
| "lr": 0.0007797202797202797, |
| "step": 2076, |
| "tokens_trained": 0.197325152 |
| }, |
| { |
| "epoch": 0.5895035460992908, |
| "grad_norm": 1.349288821220398, |
| "loss": 5.1626, |
| "lr": 0.0007794405594405595, |
| "step": 2078, |
| "tokens_trained": 0.197514576 |
| }, |
| { |
| "epoch": 0.5900709219858156, |
| "grad_norm": 1.2594739198684692, |
| "loss": 5.2222, |
| "lr": 0.0007791608391608391, |
| "step": 2080, |
| "tokens_trained": 0.197705064 |
| }, |
| { |
| "epoch": 0.5906382978723405, |
| "grad_norm": 1.0747008323669434, |
| "loss": 5.1669, |
| "lr": 0.0007788811188811189, |
| "step": 2082, |
| "tokens_trained": 0.197895032 |
| }, |
| { |
| "epoch": 0.5912056737588652, |
| "grad_norm": 1.1089273691177368, |
| "loss": 5.1071, |
| "lr": 0.0007786013986013987, |
| "step": 2084, |
| "tokens_trained": 0.198085832 |
| }, |
| { |
| "epoch": 0.5917730496453901, |
| "grad_norm": 1.153296709060669, |
| "loss": 5.1483, |
| "lr": 0.0007783216783216783, |
| "step": 2086, |
| "tokens_trained": 0.198272104 |
| }, |
| { |
| "epoch": 0.5923404255319149, |
| "grad_norm": 1.1960811614990234, |
| "loss": 5.1703, |
| "lr": 0.0007780419580419581, |
| "step": 2088, |
| "tokens_trained": 0.198459976 |
| }, |
| { |
| "epoch": 0.5929078014184397, |
| "grad_norm": 1.073548674583435, |
| "loss": 5.2449, |
| "lr": 0.0007777622377622377, |
| "step": 2090, |
| "tokens_trained": 0.198648376 |
| }, |
| { |
| "epoch": 0.5934751773049646, |
| "grad_norm": 1.233362078666687, |
| "loss": 5.1987, |
| "lr": 0.0007774825174825176, |
| "step": 2092, |
| "tokens_trained": 0.198839144 |
| }, |
| { |
| "epoch": 0.5940425531914894, |
| "grad_norm": 1.3649506568908691, |
| "loss": 5.183, |
| "lr": 0.0007772027972027972, |
| "step": 2094, |
| "tokens_trained": 0.199029064 |
| }, |
| { |
| "epoch": 0.5946099290780141, |
| "grad_norm": 1.2620112895965576, |
| "loss": 5.1343, |
| "lr": 0.000776923076923077, |
| "step": 2096, |
| "tokens_trained": 0.199218376 |
| }, |
| { |
| "epoch": 0.595177304964539, |
| "grad_norm": 1.3836737871170044, |
| "loss": 5.248, |
| "lr": 0.0007766433566433566, |
| "step": 2098, |
| "tokens_trained": 0.199407736 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 1.3027995824813843, |
| "loss": 5.1813, |
| "lr": 0.0007763636363636363, |
| "step": 2100, |
| "tokens_trained": 0.199597888 |
| }, |
| { |
| "epoch": 0.5963120567375887, |
| "grad_norm": 1.2857698202133179, |
| "loss": 5.2111, |
| "lr": 0.0007760839160839162, |
| "step": 2102, |
| "tokens_trained": 0.19978852 |
| }, |
| { |
| "epoch": 0.5968794326241135, |
| "grad_norm": 1.3470538854599, |
| "loss": 5.1505, |
| "lr": 0.0007758041958041958, |
| "step": 2104, |
| "tokens_trained": 0.199978536 |
| }, |
| { |
| "epoch": 0.5974468085106382, |
| "grad_norm": 1.230573058128357, |
| "loss": 5.1222, |
| "lr": 0.0007755244755244756, |
| "step": 2106, |
| "tokens_trained": 0.200170024 |
| }, |
| { |
| "epoch": 0.5980141843971631, |
| "grad_norm": 1.2551500797271729, |
| "loss": 5.1297, |
| "lr": 0.0007752447552447552, |
| "step": 2108, |
| "tokens_trained": 0.20035992 |
| }, |
| { |
| "epoch": 0.5985815602836879, |
| "grad_norm": 1.2162272930145264, |
| "loss": 5.233, |
| "lr": 0.0007749650349650351, |
| "step": 2110, |
| "tokens_trained": 0.200548976 |
| }, |
| { |
| "epoch": 0.5991489361702128, |
| "grad_norm": 1.2617305517196655, |
| "loss": 5.2118, |
| "lr": 0.0007746853146853147, |
| "step": 2112, |
| "tokens_trained": 0.200740656 |
| }, |
| { |
| "epoch": 0.5997163120567376, |
| "grad_norm": 1.4057862758636475, |
| "loss": 5.2215, |
| "lr": 0.0007744055944055944, |
| "step": 2114, |
| "tokens_trained": 0.200930944 |
| }, |
| { |
| "epoch": 0.6002836879432624, |
| "grad_norm": 1.3729593753814697, |
| "loss": 5.1773, |
| "lr": 0.0007741258741258741, |
| "step": 2116, |
| "tokens_trained": 0.201122528 |
| }, |
| { |
| "epoch": 0.6008510638297873, |
| "grad_norm": 1.3300920724868774, |
| "loss": 5.1573, |
| "lr": 0.0007738461538461538, |
| "step": 2118, |
| "tokens_trained": 0.201310224 |
| }, |
| { |
| "epoch": 0.601418439716312, |
| "grad_norm": 1.33209228515625, |
| "loss": 5.1523, |
| "lr": 0.0007735664335664337, |
| "step": 2120, |
| "tokens_trained": 0.201499048 |
| }, |
| { |
| "epoch": 0.6019858156028369, |
| "grad_norm": 1.1407768726348877, |
| "loss": 5.1453, |
| "lr": 0.0007732867132867133, |
| "step": 2122, |
| "tokens_trained": 0.201688872 |
| }, |
| { |
| "epoch": 0.6025531914893617, |
| "grad_norm": 1.1250742673873901, |
| "loss": 5.173, |
| "lr": 0.0007730069930069931, |
| "step": 2124, |
| "tokens_trained": 0.201880504 |
| }, |
| { |
| "epoch": 0.6028368794326241, |
| "eval_loss": 5.190411567687988, |
| "eval_runtime": 20.812, |
| "step": 2125, |
| "tokens_trained": 0.201976984 |
| }, |
| { |
| "epoch": 0.6031205673758865, |
| "grad_norm": 1.2974287271499634, |
| "loss": 5.1878, |
| "lr": 0.0007727272727272727, |
| "step": 2126, |
| "tokens_trained": 0.20207104 |
| }, |
| { |
| "epoch": 0.6036879432624114, |
| "grad_norm": 1.251120924949646, |
| "loss": 5.203, |
| "lr": 0.0007724475524475525, |
| "step": 2128, |
| "tokens_trained": 0.202261848 |
| }, |
| { |
| "epoch": 0.6042553191489362, |
| "grad_norm": 1.3494654893875122, |
| "loss": 5.1981, |
| "lr": 0.0007721678321678322, |
| "step": 2130, |
| "tokens_trained": 0.202452936 |
| }, |
| { |
| "epoch": 0.604822695035461, |
| "grad_norm": 1.2586653232574463, |
| "loss": 5.1786, |
| "lr": 0.0007718881118881119, |
| "step": 2132, |
| "tokens_trained": 0.202642168 |
| }, |
| { |
| "epoch": 0.6053900709219858, |
| "grad_norm": 1.228868842124939, |
| "loss": 5.1651, |
| "lr": 0.0007716083916083916, |
| "step": 2134, |
| "tokens_trained": 0.202830528 |
| }, |
| { |
| "epoch": 0.6059574468085106, |
| "grad_norm": 1.25627863407135, |
| "loss": 5.2033, |
| "lr": 0.0007713286713286713, |
| "step": 2136, |
| "tokens_trained": 0.203022216 |
| }, |
| { |
| "epoch": 0.6065248226950355, |
| "grad_norm": 1.1568467617034912, |
| "loss": 5.1659, |
| "lr": 0.0007710489510489512, |
| "step": 2138, |
| "tokens_trained": 0.203211696 |
| }, |
| { |
| "epoch": 0.6070921985815603, |
| "grad_norm": 1.1502138376235962, |
| "loss": 5.1935, |
| "lr": 0.0007707692307692308, |
| "step": 2140, |
| "tokens_trained": 0.203403224 |
| }, |
| { |
| "epoch": 0.6076595744680852, |
| "grad_norm": 1.2491158246994019, |
| "loss": 5.1367, |
| "lr": 0.0007704895104895105, |
| "step": 2142, |
| "tokens_trained": 0.203594912 |
| }, |
| { |
| "epoch": 0.6082269503546099, |
| "grad_norm": 1.3012075424194336, |
| "loss": 5.1954, |
| "lr": 0.0007702097902097902, |
| "step": 2144, |
| "tokens_trained": 0.203787032 |
| }, |
| { |
| "epoch": 0.6087943262411347, |
| "grad_norm": 1.2956688404083252, |
| "loss": 5.2255, |
| "lr": 0.0007699300699300699, |
| "step": 2146, |
| "tokens_trained": 0.203979064 |
| }, |
| { |
| "epoch": 0.6093617021276596, |
| "grad_norm": 1.3562579154968262, |
| "loss": 5.2371, |
| "lr": 0.0007696503496503497, |
| "step": 2148, |
| "tokens_trained": 0.20416828 |
| }, |
| { |
| "epoch": 0.6099290780141844, |
| "grad_norm": 1.2726640701293945, |
| "loss": 5.154, |
| "lr": 0.0007693706293706294, |
| "step": 2150, |
| "tokens_trained": 0.20435532 |
| }, |
| { |
| "epoch": 0.6104964539007092, |
| "grad_norm": 1.1975597143173218, |
| "loss": 5.1559, |
| "lr": 0.000769090909090909, |
| "step": 2152, |
| "tokens_trained": 0.204545416 |
| }, |
| { |
| "epoch": 0.6110638297872341, |
| "grad_norm": 1.2840410470962524, |
| "loss": 5.2558, |
| "lr": 0.0007688111888111888, |
| "step": 2154, |
| "tokens_trained": 0.204734752 |
| }, |
| { |
| "epoch": 0.6116312056737588, |
| "grad_norm": 1.4807062149047852, |
| "loss": 5.229, |
| "lr": 0.0007685314685314686, |
| "step": 2156, |
| "tokens_trained": 0.204925432 |
| }, |
| { |
| "epoch": 0.6121985815602837, |
| "grad_norm": 1.3909307718276978, |
| "loss": 5.2128, |
| "lr": 0.0007682517482517483, |
| "step": 2158, |
| "tokens_trained": 0.205117624 |
| }, |
| { |
| "epoch": 0.6127659574468085, |
| "grad_norm": 1.3998613357543945, |
| "loss": 5.1344, |
| "lr": 0.000767972027972028, |
| "step": 2160, |
| "tokens_trained": 0.205309032 |
| }, |
| { |
| "epoch": 0.6133333333333333, |
| "grad_norm": 1.3821474313735962, |
| "loss": 5.2223, |
| "lr": 0.0007676923076923077, |
| "step": 2162, |
| "tokens_trained": 0.205498112 |
| }, |
| { |
| "epoch": 0.6139007092198582, |
| "grad_norm": 1.280150294303894, |
| "loss": 5.1357, |
| "lr": 0.0007674125874125874, |
| "step": 2164, |
| "tokens_trained": 0.205686112 |
| }, |
| { |
| "epoch": 0.614468085106383, |
| "grad_norm": 1.2361094951629639, |
| "loss": 5.1285, |
| "lr": 0.0007671328671328672, |
| "step": 2166, |
| "tokens_trained": 0.20587828 |
| }, |
| { |
| "epoch": 0.6150354609929078, |
| "grad_norm": 1.1495496034622192, |
| "loss": 5.1597, |
| "lr": 0.0007668531468531469, |
| "step": 2168, |
| "tokens_trained": 0.206068272 |
| }, |
| { |
| "epoch": 0.6156028368794326, |
| "grad_norm": 1.2377156019210815, |
| "loss": 5.1208, |
| "lr": 0.0007665734265734265, |
| "step": 2170, |
| "tokens_trained": 0.206257272 |
| }, |
| { |
| "epoch": 0.6161702127659574, |
| "grad_norm": 1.226664423942566, |
| "loss": 5.2143, |
| "lr": 0.0007662937062937063, |
| "step": 2172, |
| "tokens_trained": 0.206449824 |
| }, |
| { |
| "epoch": 0.6167375886524823, |
| "grad_norm": 1.1939537525177002, |
| "loss": 5.0847, |
| "lr": 0.000766013986013986, |
| "step": 2174, |
| "tokens_trained": 0.206636992 |
| }, |
| { |
| "epoch": 0.6173049645390071, |
| "grad_norm": 1.233585238456726, |
| "loss": 5.1647, |
| "lr": 0.0007657342657342658, |
| "step": 2176, |
| "tokens_trained": 0.206828288 |
| }, |
| { |
| "epoch": 0.617872340425532, |
| "grad_norm": 1.3282006978988647, |
| "loss": 5.1748, |
| "lr": 0.0007654545454545455, |
| "step": 2178, |
| "tokens_trained": 0.207019064 |
| }, |
| { |
| "epoch": 0.6184397163120567, |
| "grad_norm": 1.2299532890319824, |
| "loss": 5.248, |
| "lr": 0.0007651748251748251, |
| "step": 2180, |
| "tokens_trained": 0.20720844 |
| }, |
| { |
| "epoch": 0.6190070921985815, |
| "grad_norm": 1.279590129852295, |
| "loss": 5.1467, |
| "lr": 0.0007648951048951049, |
| "step": 2182, |
| "tokens_trained": 0.207398952 |
| }, |
| { |
| "epoch": 0.6195744680851064, |
| "grad_norm": 1.30775785446167, |
| "loss": 5.1981, |
| "lr": 0.0007646153846153846, |
| "step": 2184, |
| "tokens_trained": 0.207589224 |
| }, |
| { |
| "epoch": 0.6201418439716312, |
| "grad_norm": 1.2829056978225708, |
| "loss": 5.1976, |
| "lr": 0.0007643356643356644, |
| "step": 2186, |
| "tokens_trained": 0.20778024 |
| }, |
| { |
| "epoch": 0.6207092198581561, |
| "grad_norm": 1.2149474620819092, |
| "loss": 5.2186, |
| "lr": 0.000764055944055944, |
| "step": 2188, |
| "tokens_trained": 0.207969176 |
| }, |
| { |
| "epoch": 0.6212765957446809, |
| "grad_norm": 1.239912748336792, |
| "loss": 5.15, |
| "lr": 0.0007637762237762238, |
| "step": 2190, |
| "tokens_trained": 0.208159016 |
| }, |
| { |
| "epoch": 0.6218439716312056, |
| "grad_norm": 1.322252869606018, |
| "loss": 5.2447, |
| "lr": 0.0007634965034965035, |
| "step": 2192, |
| "tokens_trained": 0.2083502 |
| }, |
| { |
| "epoch": 0.6224113475177305, |
| "grad_norm": 1.1804618835449219, |
| "loss": 5.1924, |
| "lr": 0.0007632167832167833, |
| "step": 2194, |
| "tokens_trained": 0.208539616 |
| }, |
| { |
| "epoch": 0.6229787234042553, |
| "grad_norm": 1.2914003133773804, |
| "loss": 5.1559, |
| "lr": 0.000762937062937063, |
| "step": 2196, |
| "tokens_trained": 0.208731032 |
| }, |
| { |
| "epoch": 0.6235460992907801, |
| "grad_norm": 1.2175878286361694, |
| "loss": 5.1335, |
| "lr": 0.0007626573426573426, |
| "step": 2198, |
| "tokens_trained": 0.208923952 |
| }, |
| { |
| "epoch": 0.624113475177305, |
| "grad_norm": 1.2267946004867554, |
| "loss": 5.1697, |
| "lr": 0.0007623776223776224, |
| "step": 2200, |
| "tokens_trained": 0.20911168 |
| }, |
| { |
| "epoch": 0.6246808510638298, |
| "grad_norm": 1.2482635974884033, |
| "loss": 5.1986, |
| "lr": 0.0007620979020979021, |
| "step": 2202, |
| "tokens_trained": 0.209299504 |
| }, |
| { |
| "epoch": 0.6252482269503546, |
| "grad_norm": 1.3256076574325562, |
| "loss": 5.1955, |
| "lr": 0.0007618181818181819, |
| "step": 2204, |
| "tokens_trained": 0.20948936 |
| }, |
| { |
| "epoch": 0.6258156028368794, |
| "grad_norm": 1.205692172050476, |
| "loss": 5.1175, |
| "lr": 0.0007615384615384615, |
| "step": 2206, |
| "tokens_trained": 0.209678072 |
| }, |
| { |
| "epoch": 0.6263829787234042, |
| "grad_norm": 1.2371326684951782, |
| "loss": 5.1798, |
| "lr": 0.0007612587412587412, |
| "step": 2208, |
| "tokens_trained": 0.209868904 |
| }, |
| { |
| "epoch": 0.6269503546099291, |
| "grad_norm": 1.1657975912094116, |
| "loss": 5.159, |
| "lr": 0.000760979020979021, |
| "step": 2210, |
| "tokens_trained": 0.210060992 |
| }, |
| { |
| "epoch": 0.6275177304964539, |
| "grad_norm": 1.18202543258667, |
| "loss": 5.2157, |
| "lr": 0.0007606993006993007, |
| "step": 2212, |
| "tokens_trained": 0.210252096 |
| }, |
| { |
| "epoch": 0.6280851063829788, |
| "grad_norm": 1.220446228981018, |
| "loss": 5.1677, |
| "lr": 0.0007604195804195805, |
| "step": 2214, |
| "tokens_trained": 0.210444176 |
| }, |
| { |
| "epoch": 0.6286524822695035, |
| "grad_norm": 1.1070069074630737, |
| "loss": 5.1702, |
| "lr": 0.0007601398601398601, |
| "step": 2216, |
| "tokens_trained": 0.210633376 |
| }, |
| { |
| "epoch": 0.6292198581560283, |
| "grad_norm": 1.3031543493270874, |
| "loss": 5.2253, |
| "lr": 0.0007598601398601399, |
| "step": 2218, |
| "tokens_trained": 0.21082368 |
| }, |
| { |
| "epoch": 0.6297872340425532, |
| "grad_norm": 1.0999404191970825, |
| "loss": 5.1942, |
| "lr": 0.0007595804195804196, |
| "step": 2220, |
| "tokens_trained": 0.211013448 |
| }, |
| { |
| "epoch": 0.630354609929078, |
| "grad_norm": 1.2241060733795166, |
| "loss": 5.1408, |
| "lr": 0.0007593006993006993, |
| "step": 2222, |
| "tokens_trained": 0.211205176 |
| }, |
| { |
| "epoch": 0.6309219858156029, |
| "grad_norm": 1.3057242631912231, |
| "loss": 5.2234, |
| "lr": 0.000759020979020979, |
| "step": 2224, |
| "tokens_trained": 0.211396464 |
| }, |
| { |
| "epoch": 0.6314893617021277, |
| "grad_norm": 1.2667888402938843, |
| "loss": 5.1675, |
| "lr": 0.0007587412587412587, |
| "step": 2226, |
| "tokens_trained": 0.211587608 |
| }, |
| { |
| "epoch": 0.6320567375886524, |
| "grad_norm": 1.1653670072555542, |
| "loss": 5.2081, |
| "lr": 0.0007584615384615385, |
| "step": 2228, |
| "tokens_trained": 0.211779832 |
| }, |
| { |
| "epoch": 0.6326241134751773, |
| "grad_norm": 1.1786928176879883, |
| "loss": 5.1772, |
| "lr": 0.0007581818181818182, |
| "step": 2230, |
| "tokens_trained": 0.211971584 |
| }, |
| { |
| "epoch": 0.6331914893617021, |
| "grad_norm": 1.242872714996338, |
| "loss": 5.1378, |
| "lr": 0.000757902097902098, |
| "step": 2232, |
| "tokens_trained": 0.212161024 |
| }, |
| { |
| "epoch": 0.633758865248227, |
| "grad_norm": 1.2831401824951172, |
| "loss": 5.1488, |
| "lr": 0.0007576223776223776, |
| "step": 2234, |
| "tokens_trained": 0.21235084 |
| }, |
| { |
| "epoch": 0.6343262411347518, |
| "grad_norm": 1.269600510597229, |
| "loss": 5.1454, |
| "lr": 0.0007573426573426573, |
| "step": 2236, |
| "tokens_trained": 0.212539504 |
| }, |
| { |
| "epoch": 0.6348936170212766, |
| "grad_norm": 1.2224805355072021, |
| "loss": 5.1123, |
| "lr": 0.0007570629370629371, |
| "step": 2238, |
| "tokens_trained": 0.21272884 |
| }, |
| { |
| "epoch": 0.6354609929078014, |
| "grad_norm": 1.2404342889785767, |
| "loss": 5.2023, |
| "lr": 0.0007567832167832168, |
| "step": 2240, |
| "tokens_trained": 0.212920128 |
| }, |
| { |
| "epoch": 0.6360283687943262, |
| "grad_norm": 1.1551696062088013, |
| "loss": 5.1529, |
| "lr": 0.0007565034965034965, |
| "step": 2242, |
| "tokens_trained": 0.213110744 |
| }, |
| { |
| "epoch": 0.6365957446808511, |
| "grad_norm": 1.2342238426208496, |
| "loss": 5.182, |
| "lr": 0.0007562237762237762, |
| "step": 2244, |
| "tokens_trained": 0.213298584 |
| }, |
| { |
| "epoch": 0.6371631205673759, |
| "grad_norm": 1.2631146907806396, |
| "loss": 5.1442, |
| "lr": 0.000755944055944056, |
| "step": 2246, |
| "tokens_trained": 0.213488512 |
| }, |
| { |
| "epoch": 0.6377304964539007, |
| "grad_norm": 1.2031443119049072, |
| "loss": 5.1041, |
| "lr": 0.0007556643356643357, |
| "step": 2248, |
| "tokens_trained": 0.21367964 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 1.127889633178711, |
| "loss": 5.1889, |
| "lr": 0.0007553846153846154, |
| "step": 2250, |
| "tokens_trained": 0.213871584 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "eval_loss": 5.1714253425598145, |
| "eval_runtime": 20.5005, |
| "step": 2250, |
| "tokens_trained": 0.213871584 |
| }, |
| { |
| "epoch": 0.6388652482269503, |
| "grad_norm": 1.1281750202178955, |
| "loss": 5.1039, |
| "lr": 0.0007551048951048951, |
| "step": 2252, |
| "tokens_trained": 0.214061624 |
| }, |
| { |
| "epoch": 0.6394326241134751, |
| "grad_norm": 1.1058608293533325, |
| "loss": 5.1562, |
| "lr": 0.0007548251748251748, |
| "step": 2254, |
| "tokens_trained": 0.214252024 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.0579496622085571, |
| "loss": 5.1476, |
| "lr": 0.0007545454545454546, |
| "step": 2256, |
| "tokens_trained": 0.214442624 |
| }, |
| { |
| "epoch": 0.6405673758865248, |
| "grad_norm": 1.1370742321014404, |
| "loss": 5.1948, |
| "lr": 0.0007542657342657343, |
| "step": 2258, |
| "tokens_trained": 0.214634016 |
| }, |
| { |
| "epoch": 0.6411347517730497, |
| "grad_norm": 1.1118457317352295, |
| "loss": 5.169, |
| "lr": 0.000753986013986014, |
| "step": 2260, |
| "tokens_trained": 0.214823368 |
| }, |
| { |
| "epoch": 0.6417021276595745, |
| "grad_norm": 1.039004921913147, |
| "loss": 5.1454, |
| "lr": 0.0007537062937062937, |
| "step": 2262, |
| "tokens_trained": 0.21501196 |
| }, |
| { |
| "epoch": 0.6422695035460992, |
| "grad_norm": 1.2534265518188477, |
| "loss": 5.1455, |
| "lr": 0.0007534265734265734, |
| "step": 2264, |
| "tokens_trained": 0.215200808 |
| }, |
| { |
| "epoch": 0.6428368794326241, |
| "grad_norm": 1.2437689304351807, |
| "loss": 5.1966, |
| "lr": 0.0007531468531468532, |
| "step": 2266, |
| "tokens_trained": 0.21539036 |
| }, |
| { |
| "epoch": 0.6434042553191489, |
| "grad_norm": 1.1795995235443115, |
| "loss": 5.1716, |
| "lr": 0.0007528671328671329, |
| "step": 2268, |
| "tokens_trained": 0.215582088 |
| }, |
| { |
| "epoch": 0.6439716312056738, |
| "grad_norm": 1.3241360187530518, |
| "loss": 5.1638, |
| "lr": 0.0007525874125874126, |
| "step": 2270, |
| "tokens_trained": 0.215771936 |
| }, |
| { |
| "epoch": 0.6445390070921986, |
| "grad_norm": 1.2526317834854126, |
| "loss": 5.1067, |
| "lr": 0.0007523076923076923, |
| "step": 2272, |
| "tokens_trained": 0.215960792 |
| }, |
| { |
| "epoch": 0.6451063829787234, |
| "grad_norm": 1.249042272567749, |
| "loss": 5.1466, |
| "lr": 0.0007520279720279721, |
| "step": 2274, |
| "tokens_trained": 0.216151448 |
| }, |
| { |
| "epoch": 0.6456737588652482, |
| "grad_norm": 1.1926413774490356, |
| "loss": 5.1886, |
| "lr": 0.0007517482517482518, |
| "step": 2276, |
| "tokens_trained": 0.216340368 |
| }, |
| { |
| "epoch": 0.646241134751773, |
| "grad_norm": 1.1615192890167236, |
| "loss": 5.1538, |
| "lr": 0.0007514685314685314, |
| "step": 2278, |
| "tokens_trained": 0.216531264 |
| }, |
| { |
| "epoch": 0.6468085106382979, |
| "grad_norm": 1.1265521049499512, |
| "loss": 5.1518, |
| "lr": 0.0007511888111888112, |
| "step": 2280, |
| "tokens_trained": 0.216722024 |
| }, |
| { |
| "epoch": 0.6473758865248227, |
| "grad_norm": 1.0598393678665161, |
| "loss": 5.1776, |
| "lr": 0.0007509090909090909, |
| "step": 2282, |
| "tokens_trained": 0.216913232 |
| }, |
| { |
| "epoch": 0.6479432624113475, |
| "grad_norm": 1.1727370023727417, |
| "loss": 5.2083, |
| "lr": 0.0007506293706293707, |
| "step": 2284, |
| "tokens_trained": 0.217103136 |
| }, |
| { |
| "epoch": 0.6485106382978724, |
| "grad_norm": 1.1411634683609009, |
| "loss": 5.182, |
| "lr": 0.0007503496503496504, |
| "step": 2286, |
| "tokens_trained": 0.21729368 |
| }, |
| { |
| "epoch": 0.6490780141843971, |
| "grad_norm": 1.2293574810028076, |
| "loss": 5.1725, |
| "lr": 0.00075006993006993, |
| "step": 2288, |
| "tokens_trained": 0.217485624 |
| }, |
| { |
| "epoch": 0.649645390070922, |
| "grad_norm": 1.3079198598861694, |
| "loss": 5.1531, |
| "lr": 0.0007497902097902098, |
| "step": 2290, |
| "tokens_trained": 0.217675192 |
| }, |
| { |
| "epoch": 0.6502127659574468, |
| "grad_norm": 1.1579710245132446, |
| "loss": 5.1162, |
| "lr": 0.0007495104895104895, |
| "step": 2292, |
| "tokens_trained": 0.2178658 |
| }, |
| { |
| "epoch": 0.6507801418439716, |
| "grad_norm": 1.1968539953231812, |
| "loss": 5.1652, |
| "lr": 0.0007492307692307693, |
| "step": 2294, |
| "tokens_trained": 0.218057984 |
| }, |
| { |
| "epoch": 0.6513475177304965, |
| "grad_norm": 1.3666965961456299, |
| "loss": 5.2035, |
| "lr": 0.0007489510489510489, |
| "step": 2296, |
| "tokens_trained": 0.218249704 |
| }, |
| { |
| "epoch": 0.6519148936170213, |
| "grad_norm": 1.3615487813949585, |
| "loss": 5.1704, |
| "lr": 0.0007486713286713287, |
| "step": 2298, |
| "tokens_trained": 0.218441792 |
| }, |
| { |
| "epoch": 0.6524822695035462, |
| "grad_norm": 1.2289810180664062, |
| "loss": 5.1683, |
| "lr": 0.0007483916083916084, |
| "step": 2300, |
| "tokens_trained": 0.218630624 |
| }, |
| { |
| "epoch": 0.6530496453900709, |
| "grad_norm": 1.1299561262130737, |
| "loss": 5.1672, |
| "lr": 0.0007481118881118882, |
| "step": 2302, |
| "tokens_trained": 0.218819928 |
| }, |
| { |
| "epoch": 0.6536170212765957, |
| "grad_norm": 1.186132550239563, |
| "loss": 5.1456, |
| "lr": 0.0007478321678321679, |
| "step": 2304, |
| "tokens_trained": 0.219008792 |
| }, |
| { |
| "epoch": 0.6541843971631206, |
| "grad_norm": 1.2106919288635254, |
| "loss": 5.1998, |
| "lr": 0.0007475524475524475, |
| "step": 2306, |
| "tokens_trained": 0.219198584 |
| }, |
| { |
| "epoch": 0.6547517730496454, |
| "grad_norm": 1.2485368251800537, |
| "loss": 5.1473, |
| "lr": 0.0007472727272727273, |
| "step": 2308, |
| "tokens_trained": 0.219386768 |
| }, |
| { |
| "epoch": 0.6553191489361702, |
| "grad_norm": 1.1855547428131104, |
| "loss": 5.1721, |
| "lr": 0.000746993006993007, |
| "step": 2310, |
| "tokens_trained": 0.219575904 |
| }, |
| { |
| "epoch": 0.655886524822695, |
| "grad_norm": 1.3077043294906616, |
| "loss": 5.1444, |
| "lr": 0.0007467132867132868, |
| "step": 2312, |
| "tokens_trained": 0.219767712 |
| }, |
| { |
| "epoch": 0.6564539007092198, |
| "grad_norm": 1.3514399528503418, |
| "loss": 5.198, |
| "lr": 0.0007464335664335664, |
| "step": 2314, |
| "tokens_trained": 0.219959384 |
| }, |
| { |
| "epoch": 0.6570212765957447, |
| "grad_norm": 1.0906041860580444, |
| "loss": 5.115, |
| "lr": 0.0007461538461538462, |
| "step": 2316, |
| "tokens_trained": 0.2201464 |
| }, |
| { |
| "epoch": 0.6575886524822695, |
| "grad_norm": 1.154425859451294, |
| "loss": 5.1186, |
| "lr": 0.0007458741258741259, |
| "step": 2318, |
| "tokens_trained": 0.220336992 |
| }, |
| { |
| "epoch": 0.6581560283687943, |
| "grad_norm": 1.1141375303268433, |
| "loss": 5.1709, |
| "lr": 0.0007455944055944056, |
| "step": 2320, |
| "tokens_trained": 0.220525928 |
| }, |
| { |
| "epoch": 0.6587234042553192, |
| "grad_norm": 1.0958452224731445, |
| "loss": 5.1641, |
| "lr": 0.0007453146853146854, |
| "step": 2322, |
| "tokens_trained": 0.220715056 |
| }, |
| { |
| "epoch": 0.659290780141844, |
| "grad_norm": 1.168017029762268, |
| "loss": 5.1666, |
| "lr": 0.000745034965034965, |
| "step": 2324, |
| "tokens_trained": 0.220905264 |
| }, |
| { |
| "epoch": 0.6598581560283688, |
| "grad_norm": 1.044488549232483, |
| "loss": 5.2079, |
| "lr": 0.0007447552447552448, |
| "step": 2326, |
| "tokens_trained": 0.221096736 |
| }, |
| { |
| "epoch": 0.6604255319148936, |
| "grad_norm": 1.2333874702453613, |
| "loss": 5.1166, |
| "lr": 0.0007444755244755245, |
| "step": 2328, |
| "tokens_trained": 0.221287184 |
| }, |
| { |
| "epoch": 0.6609929078014184, |
| "grad_norm": 1.1800497770309448, |
| "loss": 5.1561, |
| "lr": 0.0007441958041958043, |
| "step": 2330, |
| "tokens_trained": 0.221477312 |
| }, |
| { |
| "epoch": 0.6615602836879433, |
| "grad_norm": 1.118755578994751, |
| "loss": 5.1513, |
| "lr": 0.0007439160839160839, |
| "step": 2332, |
| "tokens_trained": 0.221665208 |
| }, |
| { |
| "epoch": 0.6621276595744681, |
| "grad_norm": 1.2018475532531738, |
| "loss": 5.1007, |
| "lr": 0.0007436363636363636, |
| "step": 2334, |
| "tokens_trained": 0.221855608 |
| }, |
| { |
| "epoch": 0.662695035460993, |
| "grad_norm": 1.1832036972045898, |
| "loss": 5.0944, |
| "lr": 0.0007433566433566433, |
| "step": 2336, |
| "tokens_trained": 0.222043856 |
| }, |
| { |
| "epoch": 0.6632624113475177, |
| "grad_norm": 1.3179196119308472, |
| "loss": 5.1645, |
| "lr": 0.0007430769230769231, |
| "step": 2338, |
| "tokens_trained": 0.222235728 |
| }, |
| { |
| "epoch": 0.6638297872340425, |
| "grad_norm": 1.1313154697418213, |
| "loss": 5.1733, |
| "lr": 0.0007427972027972029, |
| "step": 2340, |
| "tokens_trained": 0.222424688 |
| }, |
| { |
| "epoch": 0.6643971631205674, |
| "grad_norm": 1.2135043144226074, |
| "loss": 5.1291, |
| "lr": 0.0007425174825174825, |
| "step": 2342, |
| "tokens_trained": 0.222611952 |
| }, |
| { |
| "epoch": 0.6649645390070922, |
| "grad_norm": 1.2418344020843506, |
| "loss": 5.178, |
| "lr": 0.0007422377622377622, |
| "step": 2344, |
| "tokens_trained": 0.222803264 |
| }, |
| { |
| "epoch": 0.6655319148936171, |
| "grad_norm": 1.2896099090576172, |
| "loss": 5.1772, |
| "lr": 0.000741958041958042, |
| "step": 2346, |
| "tokens_trained": 0.22299108 |
| }, |
| { |
| "epoch": 0.6660992907801419, |
| "grad_norm": 1.150012731552124, |
| "loss": 5.1334, |
| "lr": 0.0007416783216783217, |
| "step": 2348, |
| "tokens_trained": 0.223182336 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.307721495628357, |
| "loss": 5.0898, |
| "lr": 0.0007413986013986014, |
| "step": 2350, |
| "tokens_trained": 0.223371664 |
| }, |
| { |
| "epoch": 0.6672340425531915, |
| "grad_norm": 1.2633092403411865, |
| "loss": 5.1344, |
| "lr": 0.0007411188811188811, |
| "step": 2352, |
| "tokens_trained": 0.223561984 |
| }, |
| { |
| "epoch": 0.6678014184397163, |
| "grad_norm": 1.1801539659500122, |
| "loss": 5.1242, |
| "lr": 0.0007408391608391608, |
| "step": 2354, |
| "tokens_trained": 0.223750344 |
| }, |
| { |
| "epoch": 0.6683687943262412, |
| "grad_norm": 1.1279330253601074, |
| "loss": 5.1348, |
| "lr": 0.0007405594405594406, |
| "step": 2356, |
| "tokens_trained": 0.223941528 |
| }, |
| { |
| "epoch": 0.668936170212766, |
| "grad_norm": 1.193912148475647, |
| "loss": 5.1823, |
| "lr": 0.0007402797202797204, |
| "step": 2358, |
| "tokens_trained": 0.224132064 |
| }, |
| { |
| "epoch": 0.6695035460992907, |
| "grad_norm": 1.1424062252044678, |
| "loss": 5.1452, |
| "lr": 0.00074, |
| "step": 2360, |
| "tokens_trained": 0.2243216 |
| }, |
| { |
| "epoch": 0.6700709219858156, |
| "grad_norm": 1.1543093919754028, |
| "loss": 5.1199, |
| "lr": 0.0007397202797202797, |
| "step": 2362, |
| "tokens_trained": 0.224509992 |
| }, |
| { |
| "epoch": 0.6706382978723404, |
| "grad_norm": 1.2291040420532227, |
| "loss": 5.0824, |
| "lr": 0.0007394405594405595, |
| "step": 2364, |
| "tokens_trained": 0.22470124 |
| }, |
| { |
| "epoch": 0.6712056737588652, |
| "grad_norm": 1.1839559078216553, |
| "loss": 5.1486, |
| "lr": 0.0007391608391608392, |
| "step": 2366, |
| "tokens_trained": 0.224893488 |
| }, |
| { |
| "epoch": 0.6717730496453901, |
| "grad_norm": 1.1374263763427734, |
| "loss": 5.1482, |
| "lr": 0.0007388811188811189, |
| "step": 2368, |
| "tokens_trained": 0.225083304 |
| }, |
| { |
| "epoch": 0.6723404255319149, |
| "grad_norm": 1.2041044235229492, |
| "loss": 5.1055, |
| "lr": 0.0007386013986013986, |
| "step": 2370, |
| "tokens_trained": 0.225273256 |
| }, |
| { |
| "epoch": 0.6729078014184398, |
| "grad_norm": 1.1405609846115112, |
| "loss": 5.1647, |
| "lr": 0.0007383216783216782, |
| "step": 2372, |
| "tokens_trained": 0.225461976 |
| }, |
| { |
| "epoch": 0.6734751773049645, |
| "grad_norm": 1.112979531288147, |
| "loss": 5.1232, |
| "lr": 0.0007380419580419581, |
| "step": 2374, |
| "tokens_trained": 0.225651248 |
| }, |
| { |
| "epoch": 0.6737588652482269, |
| "eval_loss": 5.160866737365723, |
| "eval_runtime": 20.3049, |
| "step": 2375, |
| "tokens_trained": 0.22574612 |
| }, |
| { |
| "epoch": 0.6740425531914893, |
| "grad_norm": 1.2868081331253052, |
| "loss": 5.1802, |
| "lr": 0.0007377622377622378, |
| "step": 2376, |
| "tokens_trained": 0.225840616 |
| }, |
| { |
| "epoch": 0.6746099290780142, |
| "grad_norm": 1.0904244184494019, |
| "loss": 5.1093, |
| "lr": 0.0007374825174825175, |
| "step": 2378, |
| "tokens_trained": 0.22602952 |
| }, |
| { |
| "epoch": 0.675177304964539, |
| "grad_norm": 1.182820200920105, |
| "loss": 5.1425, |
| "lr": 0.0007372027972027972, |
| "step": 2380, |
| "tokens_trained": 0.226219912 |
| }, |
| { |
| "epoch": 0.6757446808510639, |
| "grad_norm": 1.29615318775177, |
| "loss": 5.2044, |
| "lr": 0.000736923076923077, |
| "step": 2382, |
| "tokens_trained": 0.226409832 |
| }, |
| { |
| "epoch": 0.6763120567375887, |
| "grad_norm": 1.2440109252929688, |
| "loss": 5.1722, |
| "lr": 0.0007366433566433567, |
| "step": 2384, |
| "tokens_trained": 0.226600912 |
| }, |
| { |
| "epoch": 0.6768794326241134, |
| "grad_norm": 1.2176823616027832, |
| "loss": 5.1237, |
| "lr": 0.0007363636363636363, |
| "step": 2386, |
| "tokens_trained": 0.226788136 |
| }, |
| { |
| "epoch": 0.6774468085106383, |
| "grad_norm": 1.1725387573242188, |
| "loss": 5.1334, |
| "lr": 0.0007360839160839161, |
| "step": 2388, |
| "tokens_trained": 0.22697924 |
| }, |
| { |
| "epoch": 0.6780141843971631, |
| "grad_norm": 1.0678813457489014, |
| "loss": 5.1306, |
| "lr": 0.0007358041958041957, |
| "step": 2390, |
| "tokens_trained": 0.227169576 |
| }, |
| { |
| "epoch": 0.678581560283688, |
| "grad_norm": 1.1266731023788452, |
| "loss": 5.1956, |
| "lr": 0.0007355244755244756, |
| "step": 2392, |
| "tokens_trained": 0.227361776 |
| }, |
| { |
| "epoch": 0.6791489361702128, |
| "grad_norm": 1.2048848867416382, |
| "loss": 5.1599, |
| "lr": 0.0007352447552447553, |
| "step": 2394, |
| "tokens_trained": 0.227551768 |
| }, |
| { |
| "epoch": 0.6797163120567375, |
| "grad_norm": 1.2414182424545288, |
| "loss": 5.1836, |
| "lr": 0.000734965034965035, |
| "step": 2396, |
| "tokens_trained": 0.227743072 |
| }, |
| { |
| "epoch": 0.6802836879432624, |
| "grad_norm": 1.1587010622024536, |
| "loss": 5.1589, |
| "lr": 0.0007346853146853147, |
| "step": 2398, |
| "tokens_trained": 0.227933848 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 1.1487596035003662, |
| "loss": 5.1494, |
| "lr": 0.0007344055944055944, |
| "step": 2400, |
| "tokens_trained": 0.228122304 |
| }, |
| { |
| "epoch": 0.6814184397163121, |
| "grad_norm": 1.1008368730545044, |
| "loss": 5.1614, |
| "lr": 0.0007341258741258742, |
| "step": 2402, |
| "tokens_trained": 0.228311624 |
| }, |
| { |
| "epoch": 0.6819858156028369, |
| "grad_norm": 1.0571539402008057, |
| "loss": 5.1373, |
| "lr": 0.0007338461538461538, |
| "step": 2404, |
| "tokens_trained": 0.228501208 |
| }, |
| { |
| "epoch": 0.6825531914893617, |
| "grad_norm": 1.1685987710952759, |
| "loss": 5.1439, |
| "lr": 0.0007335664335664336, |
| "step": 2406, |
| "tokens_trained": 0.228691272 |
| }, |
| { |
| "epoch": 0.6831205673758866, |
| "grad_norm": 1.2319012880325317, |
| "loss": 5.1949, |
| "lr": 0.0007332867132867132, |
| "step": 2408, |
| "tokens_trained": 0.228881608 |
| }, |
| { |
| "epoch": 0.6836879432624113, |
| "grad_norm": 1.1806107759475708, |
| "loss": 5.1467, |
| "lr": 0.0007330069930069931, |
| "step": 2410, |
| "tokens_trained": 0.229073152 |
| }, |
| { |
| "epoch": 0.6842553191489362, |
| "grad_norm": 1.1616697311401367, |
| "loss": 5.1553, |
| "lr": 0.0007327272727272728, |
| "step": 2412, |
| "tokens_trained": 0.229263656 |
| }, |
| { |
| "epoch": 0.684822695035461, |
| "grad_norm": 1.143112063407898, |
| "loss": 5.091, |
| "lr": 0.0007324475524475524, |
| "step": 2414, |
| "tokens_trained": 0.229454224 |
| }, |
| { |
| "epoch": 0.6853900709219858, |
| "grad_norm": 1.2467398643493652, |
| "loss": 5.1778, |
| "lr": 0.0007321678321678322, |
| "step": 2416, |
| "tokens_trained": 0.22964568 |
| }, |
| { |
| "epoch": 0.6859574468085107, |
| "grad_norm": 1.1989973783493042, |
| "loss": 5.146, |
| "lr": 0.0007318881118881119, |
| "step": 2418, |
| "tokens_trained": 0.229836448 |
| }, |
| { |
| "epoch": 0.6865248226950355, |
| "grad_norm": 1.3296927213668823, |
| "loss": 5.1446, |
| "lr": 0.0007316083916083917, |
| "step": 2420, |
| "tokens_trained": 0.230027424 |
| }, |
| { |
| "epoch": 0.6870921985815602, |
| "grad_norm": 1.256990671157837, |
| "loss": 5.1396, |
| "lr": 0.0007313286713286713, |
| "step": 2422, |
| "tokens_trained": 0.23022012 |
| }, |
| { |
| "epoch": 0.6876595744680851, |
| "grad_norm": 1.1474595069885254, |
| "loss": 5.1263, |
| "lr": 0.0007310489510489511, |
| "step": 2424, |
| "tokens_trained": 0.230410232 |
| }, |
| { |
| "epoch": 0.6882269503546099, |
| "grad_norm": 1.2070049047470093, |
| "loss": 5.1169, |
| "lr": 0.0007307692307692307, |
| "step": 2426, |
| "tokens_trained": 0.230601056 |
| }, |
| { |
| "epoch": 0.6887943262411348, |
| "grad_norm": 1.2047003507614136, |
| "loss": 5.1146, |
| "lr": 0.0007304895104895105, |
| "step": 2428, |
| "tokens_trained": 0.230791056 |
| }, |
| { |
| "epoch": 0.6893617021276596, |
| "grad_norm": 1.3246855735778809, |
| "loss": 5.1864, |
| "lr": 0.0007302097902097902, |
| "step": 2430, |
| "tokens_trained": 0.230981904 |
| }, |
| { |
| "epoch": 0.6899290780141843, |
| "grad_norm": 1.2012712955474854, |
| "loss": 5.168, |
| "lr": 0.0007299300699300699, |
| "step": 2432, |
| "tokens_trained": 0.231170976 |
| }, |
| { |
| "epoch": 0.6904964539007092, |
| "grad_norm": 1.2258418798446655, |
| "loss": 5.14, |
| "lr": 0.0007296503496503497, |
| "step": 2434, |
| "tokens_trained": 0.231362024 |
| }, |
| { |
| "epoch": 0.691063829787234, |
| "grad_norm": 1.2767595052719116, |
| "loss": 5.1775, |
| "lr": 0.0007293706293706294, |
| "step": 2436, |
| "tokens_trained": 0.23155 |
| }, |
| { |
| "epoch": 0.6916312056737589, |
| "grad_norm": 1.204324722290039, |
| "loss": 5.1357, |
| "lr": 0.0007290909090909092, |
| "step": 2438, |
| "tokens_trained": 0.231739944 |
| }, |
| { |
| "epoch": 0.6921985815602837, |
| "grad_norm": 1.1876553297042847, |
| "loss": 5.1185, |
| "lr": 0.0007288111888111888, |
| "step": 2440, |
| "tokens_trained": 0.231930448 |
| }, |
| { |
| "epoch": 0.6927659574468085, |
| "grad_norm": 1.2512568235397339, |
| "loss": 5.1212, |
| "lr": 0.0007285314685314685, |
| "step": 2442, |
| "tokens_trained": 0.23212152 |
| }, |
| { |
| "epoch": 0.6933333333333334, |
| "grad_norm": 1.2961020469665527, |
| "loss": 5.0622, |
| "lr": 0.0007282517482517482, |
| "step": 2444, |
| "tokens_trained": 0.232310856 |
| }, |
| { |
| "epoch": 0.6939007092198581, |
| "grad_norm": 1.1042410135269165, |
| "loss": 5.1317, |
| "lr": 0.000727972027972028, |
| "step": 2446, |
| "tokens_trained": 0.232499144 |
| }, |
| { |
| "epoch": 0.694468085106383, |
| "grad_norm": 1.0408610105514526, |
| "loss": 5.1562, |
| "lr": 0.0007276923076923077, |
| "step": 2448, |
| "tokens_trained": 0.232689864 |
| }, |
| { |
| "epoch": 0.6950354609929078, |
| "grad_norm": 1.1109600067138672, |
| "loss": 5.1463, |
| "lr": 0.0007274125874125874, |
| "step": 2450, |
| "tokens_trained": 0.232878912 |
| }, |
| { |
| "epoch": 0.6956028368794326, |
| "grad_norm": 1.0867618322372437, |
| "loss": 5.105, |
| "lr": 0.0007271328671328672, |
| "step": 2452, |
| "tokens_trained": 0.233069416 |
| }, |
| { |
| "epoch": 0.6961702127659575, |
| "grad_norm": 1.0342003107070923, |
| "loss": 5.1431, |
| "lr": 0.0007268531468531469, |
| "step": 2454, |
| "tokens_trained": 0.233258552 |
| }, |
| { |
| "epoch": 0.6967375886524823, |
| "grad_norm": 1.2264306545257568, |
| "loss": 5.1646, |
| "lr": 0.0007265734265734266, |
| "step": 2456, |
| "tokens_trained": 0.233448464 |
| }, |
| { |
| "epoch": 0.6973049645390071, |
| "grad_norm": 1.1715648174285889, |
| "loss": 5.1194, |
| "lr": 0.0007262937062937063, |
| "step": 2458, |
| "tokens_trained": 0.23364024 |
| }, |
| { |
| "epoch": 0.6978723404255319, |
| "grad_norm": 1.05716872215271, |
| "loss": 5.09, |
| "lr": 0.000726013986013986, |
| "step": 2460, |
| "tokens_trained": 0.233829848 |
| }, |
| { |
| "epoch": 0.6984397163120567, |
| "grad_norm": 1.1329678297042847, |
| "loss": 5.1303, |
| "lr": 0.0007257342657342657, |
| "step": 2462, |
| "tokens_trained": 0.234021368 |
| }, |
| { |
| "epoch": 0.6990070921985816, |
| "grad_norm": 1.2084178924560547, |
| "loss": 5.1393, |
| "lr": 0.0007254545454545455, |
| "step": 2464, |
| "tokens_trained": 0.234210264 |
| }, |
| { |
| "epoch": 0.6995744680851064, |
| "grad_norm": 1.0744361877441406, |
| "loss": 5.1067, |
| "lr": 0.0007251748251748252, |
| "step": 2466, |
| "tokens_trained": 0.234399616 |
| }, |
| { |
| "epoch": 0.7001418439716312, |
| "grad_norm": 1.1711128950119019, |
| "loss": 5.1226, |
| "lr": 0.0007248951048951049, |
| "step": 2468, |
| "tokens_trained": 0.234589936 |
| }, |
| { |
| "epoch": 0.700709219858156, |
| "grad_norm": 1.2188383340835571, |
| "loss": 5.1139, |
| "lr": 0.0007246153846153846, |
| "step": 2470, |
| "tokens_trained": 0.234781376 |
| }, |
| { |
| "epoch": 0.7012765957446808, |
| "grad_norm": 1.1662676334381104, |
| "loss": 5.137, |
| "lr": 0.0007243356643356644, |
| "step": 2472, |
| "tokens_trained": 0.234972192 |
| }, |
| { |
| "epoch": 0.7018439716312057, |
| "grad_norm": 1.18717622756958, |
| "loss": 5.1665, |
| "lr": 0.0007240559440559441, |
| "step": 2474, |
| "tokens_trained": 0.235162472 |
| }, |
| { |
| "epoch": 0.7024113475177305, |
| "grad_norm": 1.1546517610549927, |
| "loss": 5.1503, |
| "lr": 0.0007237762237762238, |
| "step": 2476, |
| "tokens_trained": 0.23535256 |
| }, |
| { |
| "epoch": 0.7029787234042553, |
| "grad_norm": 1.0647573471069336, |
| "loss": 5.155, |
| "lr": 0.0007234965034965035, |
| "step": 2478, |
| "tokens_trained": 0.235543424 |
| }, |
| { |
| "epoch": 0.7035460992907802, |
| "grad_norm": 1.1157219409942627, |
| "loss": 5.1561, |
| "lr": 0.0007232167832167831, |
| "step": 2480, |
| "tokens_trained": 0.23573568 |
| }, |
| { |
| "epoch": 0.7041134751773049, |
| "grad_norm": 1.1972934007644653, |
| "loss": 5.1271, |
| "lr": 0.000722937062937063, |
| "step": 2482, |
| "tokens_trained": 0.235927072 |
| }, |
| { |
| "epoch": 0.7046808510638298, |
| "grad_norm": 1.0370620489120483, |
| "loss": 5.1016, |
| "lr": 0.0007226573426573426, |
| "step": 2484, |
| "tokens_trained": 0.236116528 |
| }, |
| { |
| "epoch": 0.7052482269503546, |
| "grad_norm": 1.1389620304107666, |
| "loss": 5.1422, |
| "lr": 0.0007223776223776224, |
| "step": 2486, |
| "tokens_trained": 0.236305864 |
| }, |
| { |
| "epoch": 0.7058156028368794, |
| "grad_norm": 1.1045559644699097, |
| "loss": 5.1434, |
| "lr": 0.0007220979020979021, |
| "step": 2488, |
| "tokens_trained": 0.236494224 |
| }, |
| { |
| "epoch": 0.7063829787234043, |
| "grad_norm": 1.1014395952224731, |
| "loss": 5.1462, |
| "lr": 0.0007218181818181819, |
| "step": 2490, |
| "tokens_trained": 0.236684376 |
| }, |
| { |
| "epoch": 0.706950354609929, |
| "grad_norm": 1.0460759401321411, |
| "loss": 5.126, |
| "lr": 0.0007215384615384616, |
| "step": 2492, |
| "tokens_trained": 0.236875272 |
| }, |
| { |
| "epoch": 0.707517730496454, |
| "grad_norm": 1.0848767757415771, |
| "loss": 5.1387, |
| "lr": 0.0007212587412587412, |
| "step": 2494, |
| "tokens_trained": 0.237065552 |
| }, |
| { |
| "epoch": 0.7080851063829787, |
| "grad_norm": 1.1626802682876587, |
| "loss": 5.1509, |
| "lr": 0.000720979020979021, |
| "step": 2496, |
| "tokens_trained": 0.237254944 |
| }, |
| { |
| "epoch": 0.7086524822695035, |
| "grad_norm": 1.1846860647201538, |
| "loss": 5.098, |
| "lr": 0.0007206993006993006, |
| "step": 2498, |
| "tokens_trained": 0.237444488 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "grad_norm": 1.2549248933792114, |
| "loss": 5.1104, |
| "lr": 0.0007204195804195805, |
| "step": 2500, |
| "tokens_trained": 0.237633528 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "eval_loss": 5.141824245452881, |
| "eval_runtime": 20.5081, |
| "step": 2500, |
| "tokens_trained": 0.237633528 |
| }, |
| { |
| "epoch": 0.7097872340425532, |
| "grad_norm": 1.19071626663208, |
| "loss": 5.2249, |
| "lr": 0.0007201398601398601, |
| "step": 2502, |
| "tokens_trained": 0.237823136 |
| }, |
| { |
| "epoch": 0.7103546099290781, |
| "grad_norm": 1.162804365158081, |
| "loss": 5.1099, |
| "lr": 0.0007198601398601399, |
| "step": 2504, |
| "tokens_trained": 0.238012752 |
| }, |
| { |
| "epoch": 0.7109219858156028, |
| "grad_norm": 1.0964027643203735, |
| "loss": 5.1015, |
| "lr": 0.0007195804195804196, |
| "step": 2506, |
| "tokens_trained": 0.238205472 |
| }, |
| { |
| "epoch": 0.7114893617021276, |
| "grad_norm": 1.0719815492630005, |
| "loss": 5.1425, |
| "lr": 0.0007193006993006994, |
| "step": 2508, |
| "tokens_trained": 0.238394848 |
| }, |
| { |
| "epoch": 0.7120567375886525, |
| "grad_norm": 1.1835323572158813, |
| "loss": 5.0744, |
| "lr": 0.0007190209790209791, |
| "step": 2510, |
| "tokens_trained": 0.238583408 |
| }, |
| { |
| "epoch": 0.7126241134751773, |
| "grad_norm": 1.0975273847579956, |
| "loss": 5.0346, |
| "lr": 0.0007187412587412587, |
| "step": 2512, |
| "tokens_trained": 0.238773544 |
| }, |
| { |
| "epoch": 0.7131914893617022, |
| "grad_norm": 1.1507470607757568, |
| "loss": 5.146, |
| "lr": 0.0007184615384615385, |
| "step": 2514, |
| "tokens_trained": 0.238962624 |
| }, |
| { |
| "epoch": 0.713758865248227, |
| "grad_norm": 1.1186292171478271, |
| "loss": 5.1934, |
| "lr": 0.0007181818181818181, |
| "step": 2516, |
| "tokens_trained": 0.239152848 |
| }, |
| { |
| "epoch": 0.7143262411347517, |
| "grad_norm": 1.0672920942306519, |
| "loss": 5.1488, |
| "lr": 0.000717902097902098, |
| "step": 2518, |
| "tokens_trained": 0.239344248 |
| }, |
| { |
| "epoch": 0.7148936170212766, |
| "grad_norm": 1.1226296424865723, |
| "loss": 5.0799, |
| "lr": 0.0007176223776223776, |
| "step": 2520, |
| "tokens_trained": 0.239535088 |
| }, |
| { |
| "epoch": 0.7154609929078014, |
| "grad_norm": 1.134265422821045, |
| "loss": 5.1677, |
| "lr": 0.0007173426573426573, |
| "step": 2522, |
| "tokens_trained": 0.23972356 |
| }, |
| { |
| "epoch": 0.7160283687943262, |
| "grad_norm": 1.1157846450805664, |
| "loss": 5.1576, |
| "lr": 0.0007170629370629371, |
| "step": 2524, |
| "tokens_trained": 0.239914104 |
| }, |
| { |
| "epoch": 0.7165957446808511, |
| "grad_norm": 1.096637487411499, |
| "loss": 5.1512, |
| "lr": 0.0007167832167832168, |
| "step": 2526, |
| "tokens_trained": 0.24010344 |
| }, |
| { |
| "epoch": 0.7171631205673759, |
| "grad_norm": 1.0092846155166626, |
| "loss": 5.0907, |
| "lr": 0.0007165034965034966, |
| "step": 2528, |
| "tokens_trained": 0.240294496 |
| }, |
| { |
| "epoch": 0.7177304964539007, |
| "grad_norm": 0.9926803112030029, |
| "loss": 5.112, |
| "lr": 0.0007162237762237762, |
| "step": 2530, |
| "tokens_trained": 0.240484752 |
| }, |
| { |
| "epoch": 0.7182978723404255, |
| "grad_norm": 1.031894326210022, |
| "loss": 5.13, |
| "lr": 0.000715944055944056, |
| "step": 2532, |
| "tokens_trained": 0.240674024 |
| }, |
| { |
| "epoch": 0.7188652482269503, |
| "grad_norm": 1.0606821775436401, |
| "loss": 5.1229, |
| "lr": 0.0007156643356643356, |
| "step": 2534, |
| "tokens_trained": 0.24086436 |
| }, |
| { |
| "epoch": 0.7194326241134752, |
| "grad_norm": 1.0486221313476562, |
| "loss": 5.1179, |
| "lr": 0.0007153846153846155, |
| "step": 2536, |
| "tokens_trained": 0.241052096 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.073940396308899, |
| "loss": 5.1147, |
| "lr": 0.0007151048951048951, |
| "step": 2538, |
| "tokens_trained": 0.241242064 |
| }, |
| { |
| "epoch": 0.7205673758865249, |
| "grad_norm": 1.0888422727584839, |
| "loss": 5.1442, |
| "lr": 0.0007148251748251748, |
| "step": 2540, |
| "tokens_trained": 0.241429472 |
| }, |
| { |
| "epoch": 0.7211347517730496, |
| "grad_norm": 1.0362575054168701, |
| "loss": 5.1482, |
| "lr": 0.0007145454545454546, |
| "step": 2542, |
| "tokens_trained": 0.241619464 |
| }, |
| { |
| "epoch": 0.7217021276595744, |
| "grad_norm": 1.020987629890442, |
| "loss": 5.1809, |
| "lr": 0.0007142657342657343, |
| "step": 2544, |
| "tokens_trained": 0.241810584 |
| }, |
| { |
| "epoch": 0.7222695035460993, |
| "grad_norm": 1.1145941019058228, |
| "loss": 5.07, |
| "lr": 0.0007139860139860141, |
| "step": 2546, |
| "tokens_trained": 0.242001336 |
| }, |
| { |
| "epoch": 0.7228368794326241, |
| "grad_norm": 1.114311933517456, |
| "loss": 5.1288, |
| "lr": 0.0007137062937062937, |
| "step": 2548, |
| "tokens_trained": 0.242191648 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 1.2127752304077148, |
| "loss": 5.1414, |
| "lr": 0.0007134265734265734, |
| "step": 2550, |
| "tokens_trained": 0.2423814 |
| }, |
| { |
| "epoch": 0.7239716312056738, |
| "grad_norm": 1.2173429727554321, |
| "loss": 5.0843, |
| "lr": 0.0007131468531468531, |
| "step": 2552, |
| "tokens_trained": 0.242571344 |
| }, |
| { |
| "epoch": 0.7245390070921985, |
| "grad_norm": 1.269544005393982, |
| "loss": 5.0945, |
| "lr": 0.0007128671328671329, |
| "step": 2554, |
| "tokens_trained": 0.242760304 |
| }, |
| { |
| "epoch": 0.7251063829787234, |
| "grad_norm": 1.1891573667526245, |
| "loss": 5.1301, |
| "lr": 0.0007125874125874126, |
| "step": 2556, |
| "tokens_trained": 0.242950432 |
| }, |
| { |
| "epoch": 0.7256737588652482, |
| "grad_norm": 1.1826258897781372, |
| "loss": 5.1463, |
| "lr": 0.0007123076923076923, |
| "step": 2558, |
| "tokens_trained": 0.243140944 |
| }, |
| { |
| "epoch": 0.7262411347517731, |
| "grad_norm": 1.0478367805480957, |
| "loss": 5.1082, |
| "lr": 0.0007120279720279721, |
| "step": 2560, |
| "tokens_trained": 0.243331192 |
| }, |
| { |
| "epoch": 0.7268085106382979, |
| "grad_norm": 1.05866539478302, |
| "loss": 5.135, |
| "lr": 0.0007117482517482518, |
| "step": 2562, |
| "tokens_trained": 0.243519712 |
| }, |
| { |
| "epoch": 0.7273758865248227, |
| "grad_norm": 1.1300735473632812, |
| "loss": 5.0985, |
| "lr": 0.0007114685314685315, |
| "step": 2564, |
| "tokens_trained": 0.243710408 |
| }, |
| { |
| "epoch": 0.7279432624113475, |
| "grad_norm": 1.0662705898284912, |
| "loss": 5.1482, |
| "lr": 0.0007111888111888112, |
| "step": 2566, |
| "tokens_trained": 0.243899576 |
| }, |
| { |
| "epoch": 0.7285106382978723, |
| "grad_norm": 1.0905804634094238, |
| "loss": 5.103, |
| "lr": 0.0007109090909090909, |
| "step": 2568, |
| "tokens_trained": 0.244090984 |
| }, |
| { |
| "epoch": 0.7290780141843972, |
| "grad_norm": 1.2062023878097534, |
| "loss": 5.1318, |
| "lr": 0.0007106293706293706, |
| "step": 2570, |
| "tokens_trained": 0.244280584 |
| }, |
| { |
| "epoch": 0.729645390070922, |
| "grad_norm": 1.0444546937942505, |
| "loss": 5.144, |
| "lr": 0.0007103496503496504, |
| "step": 2572, |
| "tokens_trained": 0.244471384 |
| }, |
| { |
| "epoch": 0.7302127659574468, |
| "grad_norm": 1.0395665168762207, |
| "loss": 5.0944, |
| "lr": 0.0007100699300699301, |
| "step": 2574, |
| "tokens_trained": 0.24466056 |
| }, |
| { |
| "epoch": 0.7307801418439717, |
| "grad_norm": 1.0630977153778076, |
| "loss": 5.1038, |
| "lr": 0.0007097902097902098, |
| "step": 2576, |
| "tokens_trained": 0.2448524 |
| }, |
| { |
| "epoch": 0.7313475177304964, |
| "grad_norm": 1.1561299562454224, |
| "loss": 5.1544, |
| "lr": 0.0007095104895104895, |
| "step": 2578, |
| "tokens_trained": 0.245042104 |
| }, |
| { |
| "epoch": 0.7319148936170212, |
| "grad_norm": 1.1774277687072754, |
| "loss": 5.1366, |
| "lr": 0.0007092307692307692, |
| "step": 2580, |
| "tokens_trained": 0.245231832 |
| }, |
| { |
| "epoch": 0.7324822695035461, |
| "grad_norm": 1.2139825820922852, |
| "loss": 5.1195, |
| "lr": 0.000708951048951049, |
| "step": 2582, |
| "tokens_trained": 0.24542076 |
| }, |
| { |
| "epoch": 0.7330496453900709, |
| "grad_norm": 1.1340903043746948, |
| "loss": 5.1476, |
| "lr": 0.0007086713286713287, |
| "step": 2584, |
| "tokens_trained": 0.245613128 |
| }, |
| { |
| "epoch": 0.7336170212765958, |
| "grad_norm": 1.2109994888305664, |
| "loss": 5.1359, |
| "lr": 0.0007083916083916084, |
| "step": 2586, |
| "tokens_trained": 0.245803992 |
| }, |
| { |
| "epoch": 0.7341843971631206, |
| "grad_norm": 1.1087621450424194, |
| "loss": 5.1287, |
| "lr": 0.000708111888111888, |
| "step": 2588, |
| "tokens_trained": 0.245994816 |
| }, |
| { |
| "epoch": 0.7347517730496453, |
| "grad_norm": 1.206106424331665, |
| "loss": 5.1618, |
| "lr": 0.0007078321678321679, |
| "step": 2590, |
| "tokens_trained": 0.246183624 |
| }, |
| { |
| "epoch": 0.7353191489361702, |
| "grad_norm": 1.0370070934295654, |
| "loss": 5.1103, |
| "lr": 0.0007075524475524475, |
| "step": 2592, |
| "tokens_trained": 0.246375232 |
| }, |
| { |
| "epoch": 0.735886524822695, |
| "grad_norm": 0.9844968914985657, |
| "loss": 5.1266, |
| "lr": 0.0007072727272727273, |
| "step": 2594, |
| "tokens_trained": 0.246565048 |
| }, |
| { |
| "epoch": 0.7364539007092199, |
| "grad_norm": 1.0623670816421509, |
| "loss": 5.1341, |
| "lr": 0.000706993006993007, |
| "step": 2596, |
| "tokens_trained": 0.246754136 |
| }, |
| { |
| "epoch": 0.7370212765957447, |
| "grad_norm": 1.1878798007965088, |
| "loss": 5.1178, |
| "lr": 0.0007067132867132867, |
| "step": 2598, |
| "tokens_trained": 0.246944496 |
| }, |
| { |
| "epoch": 0.7375886524822695, |
| "grad_norm": 1.045849323272705, |
| "loss": 5.1151, |
| "lr": 0.0007064335664335665, |
| "step": 2600, |
| "tokens_trained": 0.247135616 |
| }, |
| { |
| "epoch": 0.7381560283687943, |
| "grad_norm": 1.1081782579421997, |
| "loss": 5.0699, |
| "lr": 0.0007061538461538462, |
| "step": 2602, |
| "tokens_trained": 0.247326864 |
| }, |
| { |
| "epoch": 0.7387234042553191, |
| "grad_norm": 1.0893741846084595, |
| "loss": 5.0967, |
| "lr": 0.0007058741258741259, |
| "step": 2604, |
| "tokens_trained": 0.247515736 |
| }, |
| { |
| "epoch": 0.739290780141844, |
| "grad_norm": 1.128481149673462, |
| "loss": 5.1136, |
| "lr": 0.0007055944055944055, |
| "step": 2606, |
| "tokens_trained": 0.24770688 |
| }, |
| { |
| "epoch": 0.7398581560283688, |
| "grad_norm": 1.0735145807266235, |
| "loss": 5.1127, |
| "lr": 0.0007053146853146854, |
| "step": 2608, |
| "tokens_trained": 0.247897584 |
| }, |
| { |
| "epoch": 0.7404255319148936, |
| "grad_norm": 1.0027481317520142, |
| "loss": 5.1157, |
| "lr": 0.000705034965034965, |
| "step": 2610, |
| "tokens_trained": 0.248088352 |
| }, |
| { |
| "epoch": 0.7409929078014185, |
| "grad_norm": 1.0782684087753296, |
| "loss": 5.1268, |
| "lr": 0.0007047552447552448, |
| "step": 2612, |
| "tokens_trained": 0.248277752 |
| }, |
| { |
| "epoch": 0.7415602836879432, |
| "grad_norm": 1.0961271524429321, |
| "loss": 5.1024, |
| "lr": 0.0007044755244755245, |
| "step": 2614, |
| "tokens_trained": 0.248466504 |
| }, |
| { |
| "epoch": 0.7421276595744681, |
| "grad_norm": 0.9727640151977539, |
| "loss": 5.067, |
| "lr": 0.0007041958041958041, |
| "step": 2616, |
| "tokens_trained": 0.248657896 |
| }, |
| { |
| "epoch": 0.7426950354609929, |
| "grad_norm": 0.9756829738616943, |
| "loss": 5.1326, |
| "lr": 0.000703916083916084, |
| "step": 2618, |
| "tokens_trained": 0.248849288 |
| }, |
| { |
| "epoch": 0.7432624113475177, |
| "grad_norm": 0.9990546703338623, |
| "loss": 5.2016, |
| "lr": 0.0007036363636363636, |
| "step": 2620, |
| "tokens_trained": 0.24903988 |
| }, |
| { |
| "epoch": 0.7438297872340426, |
| "grad_norm": 1.062199592590332, |
| "loss": 5.1517, |
| "lr": 0.0007033566433566434, |
| "step": 2622, |
| "tokens_trained": 0.24922872 |
| }, |
| { |
| "epoch": 0.7443971631205674, |
| "grad_norm": 1.138197422027588, |
| "loss": 5.1052, |
| "lr": 0.000703076923076923, |
| "step": 2624, |
| "tokens_trained": 0.249420464 |
| }, |
| { |
| "epoch": 0.7446808510638298, |
| "eval_loss": 5.127779960632324, |
| "eval_runtime": 20.9141, |
| "step": 2625, |
| "tokens_trained": 0.249516464 |
| }, |
| { |
| "epoch": 0.7449645390070923, |
| "grad_norm": 1.1704756021499634, |
| "loss": 5.1167, |
| "lr": 0.0007027972027972029, |
| "step": 2626, |
| "tokens_trained": 0.249612824 |
| }, |
| { |
| "epoch": 0.745531914893617, |
| "grad_norm": 1.067280888557434, |
| "loss": 5.0877, |
| "lr": 0.0007025174825174825, |
| "step": 2628, |
| "tokens_trained": 0.249801672 |
| }, |
| { |
| "epoch": 0.7460992907801418, |
| "grad_norm": 1.0734069347381592, |
| "loss": 5.091, |
| "lr": 0.0007022377622377623, |
| "step": 2630, |
| "tokens_trained": 0.249993136 |
| }, |
| { |
| "epoch": 0.7466666666666667, |
| "grad_norm": 1.0817586183547974, |
| "loss": 5.0894, |
| "lr": 0.000701958041958042, |
| "step": 2632, |
| "tokens_trained": 0.25018232 |
| }, |
| { |
| "epoch": 0.7472340425531915, |
| "grad_norm": 1.0738139152526855, |
| "loss": 5.1141, |
| "lr": 0.0007016783216783216, |
| "step": 2634, |
| "tokens_trained": 0.250373456 |
| }, |
| { |
| "epoch": 0.7478014184397163, |
| "grad_norm": 1.0292818546295166, |
| "loss": 5.0746, |
| "lr": 0.0007013986013986015, |
| "step": 2636, |
| "tokens_trained": 0.250563552 |
| }, |
| { |
| "epoch": 0.7483687943262411, |
| "grad_norm": 1.0308977365493774, |
| "loss": 5.1346, |
| "lr": 0.0007011188811188811, |
| "step": 2638, |
| "tokens_trained": 0.25075176 |
| }, |
| { |
| "epoch": 0.7489361702127659, |
| "grad_norm": 1.0287693738937378, |
| "loss": 5.1137, |
| "lr": 0.0007008391608391609, |
| "step": 2640, |
| "tokens_trained": 0.250939456 |
| }, |
| { |
| "epoch": 0.7495035460992908, |
| "grad_norm": 1.043565273284912, |
| "loss": 5.1251, |
| "lr": 0.0007005594405594405, |
| "step": 2642, |
| "tokens_trained": 0.251130456 |
| }, |
| { |
| "epoch": 0.7500709219858156, |
| "grad_norm": 1.0977740287780762, |
| "loss": 5.0959, |
| "lr": 0.0007002797202797204, |
| "step": 2644, |
| "tokens_trained": 0.251320016 |
| }, |
| { |
| "epoch": 0.7506382978723404, |
| "grad_norm": 1.0304359197616577, |
| "loss": 5.0893, |
| "lr": 0.0007, |
| "step": 2646, |
| "tokens_trained": 0.251509824 |
| }, |
| { |
| "epoch": 0.7512056737588653, |
| "grad_norm": 1.0331344604492188, |
| "loss": 5.1238, |
| "lr": 0.0006997202797202797, |
| "step": 2648, |
| "tokens_trained": 0.251700504 |
| }, |
| { |
| "epoch": 0.75177304964539, |
| "grad_norm": 1.0405573844909668, |
| "loss": 5.1301, |
| "lr": 0.0006994405594405595, |
| "step": 2650, |
| "tokens_trained": 0.251890936 |
| }, |
| { |
| "epoch": 0.7523404255319149, |
| "grad_norm": 1.0685805082321167, |
| "loss": 5.1354, |
| "lr": 0.0006991608391608391, |
| "step": 2652, |
| "tokens_trained": 0.252081296 |
| }, |
| { |
| "epoch": 0.7529078014184397, |
| "grad_norm": 1.0597950220108032, |
| "loss": 5.1229, |
| "lr": 0.000698881118881119, |
| "step": 2654, |
| "tokens_trained": 0.252270456 |
| }, |
| { |
| "epoch": 0.7534751773049645, |
| "grad_norm": 1.0094919204711914, |
| "loss": 5.1077, |
| "lr": 0.0006986013986013986, |
| "step": 2656, |
| "tokens_trained": 0.252459416 |
| }, |
| { |
| "epoch": 0.7540425531914894, |
| "grad_norm": 1.0850694179534912, |
| "loss": 5.0876, |
| "lr": 0.0006983216783216784, |
| "step": 2658, |
| "tokens_trained": 0.252649656 |
| }, |
| { |
| "epoch": 0.7546099290780142, |
| "grad_norm": 1.0182054042816162, |
| "loss": 5.0842, |
| "lr": 0.000698041958041958, |
| "step": 2660, |
| "tokens_trained": 0.252840488 |
| }, |
| { |
| "epoch": 0.755177304964539, |
| "grad_norm": 1.074000597000122, |
| "loss": 5.1387, |
| "lr": 0.0006977622377622378, |
| "step": 2662, |
| "tokens_trained": 0.253030672 |
| }, |
| { |
| "epoch": 0.7557446808510638, |
| "grad_norm": 1.1259658336639404, |
| "loss": 5.1334, |
| "lr": 0.0006974825174825175, |
| "step": 2664, |
| "tokens_trained": 0.253221976 |
| }, |
| { |
| "epoch": 0.7563120567375886, |
| "grad_norm": 1.0146551132202148, |
| "loss": 5.0995, |
| "lr": 0.0006972027972027972, |
| "step": 2666, |
| "tokens_trained": 0.253414352 |
| }, |
| { |
| "epoch": 0.7568794326241135, |
| "grad_norm": 1.1268185377120972, |
| "loss": 5.1201, |
| "lr": 0.000696923076923077, |
| "step": 2668, |
| "tokens_trained": 0.25360448 |
| }, |
| { |
| "epoch": 0.7574468085106383, |
| "grad_norm": 1.025431752204895, |
| "loss": 5.0546, |
| "lr": 0.0006966433566433566, |
| "step": 2670, |
| "tokens_trained": 0.253791368 |
| }, |
| { |
| "epoch": 0.7580141843971632, |
| "grad_norm": 1.108112096786499, |
| "loss": 5.0917, |
| "lr": 0.0006963636363636365, |
| "step": 2672, |
| "tokens_trained": 0.253982112 |
| }, |
| { |
| "epoch": 0.758581560283688, |
| "grad_norm": 1.1009857654571533, |
| "loss": 5.1447, |
| "lr": 0.0006960839160839161, |
| "step": 2674, |
| "tokens_trained": 0.254173328 |
| }, |
| { |
| "epoch": 0.7591489361702127, |
| "grad_norm": 1.0718492269515991, |
| "loss": 5.1093, |
| "lr": 0.0006958041958041958, |
| "step": 2676, |
| "tokens_trained": 0.254363624 |
| }, |
| { |
| "epoch": 0.7597163120567376, |
| "grad_norm": 1.0715916156768799, |
| "loss": 5.1287, |
| "lr": 0.0006955244755244755, |
| "step": 2678, |
| "tokens_trained": 0.25455316 |
| }, |
| { |
| "epoch": 0.7602836879432624, |
| "grad_norm": 1.0953240394592285, |
| "loss": 5.1031, |
| "lr": 0.0006952447552447553, |
| "step": 2680, |
| "tokens_trained": 0.254742424 |
| }, |
| { |
| "epoch": 0.7608510638297873, |
| "grad_norm": 1.0574376583099365, |
| "loss": 5.1316, |
| "lr": 0.000694965034965035, |
| "step": 2682, |
| "tokens_trained": 0.254933624 |
| }, |
| { |
| "epoch": 0.7614184397163121, |
| "grad_norm": 1.1887143850326538, |
| "loss": 5.1261, |
| "lr": 0.0006946853146853147, |
| "step": 2684, |
| "tokens_trained": 0.255124424 |
| }, |
| { |
| "epoch": 0.7619858156028368, |
| "grad_norm": 1.0359193086624146, |
| "loss": 5.1584, |
| "lr": 0.0006944055944055943, |
| "step": 2686, |
| "tokens_trained": 0.255314344 |
| }, |
| { |
| "epoch": 0.7625531914893617, |
| "grad_norm": 1.1207493543624878, |
| "loss": 5.1496, |
| "lr": 0.0006941258741258741, |
| "step": 2688, |
| "tokens_trained": 0.255503192 |
| }, |
| { |
| "epoch": 0.7631205673758865, |
| "grad_norm": 1.1609482765197754, |
| "loss": 5.1403, |
| "lr": 0.0006938461538461539, |
| "step": 2690, |
| "tokens_trained": 0.25569088 |
| }, |
| { |
| "epoch": 0.7636879432624113, |
| "grad_norm": 1.0204665660858154, |
| "loss": 5.0891, |
| "lr": 0.0006935664335664336, |
| "step": 2692, |
| "tokens_trained": 0.255880216 |
| }, |
| { |
| "epoch": 0.7642553191489362, |
| "grad_norm": 1.064090371131897, |
| "loss": 5.0507, |
| "lr": 0.0006932867132867133, |
| "step": 2694, |
| "tokens_trained": 0.256070744 |
| }, |
| { |
| "epoch": 0.764822695035461, |
| "grad_norm": 1.1102992296218872, |
| "loss": 5.062, |
| "lr": 0.000693006993006993, |
| "step": 2696, |
| "tokens_trained": 0.256261136 |
| }, |
| { |
| "epoch": 0.7653900709219859, |
| "grad_norm": 1.0316580533981323, |
| "loss": 5.0933, |
| "lr": 0.0006927272727272728, |
| "step": 2698, |
| "tokens_trained": 0.256452032 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 1.0681291818618774, |
| "loss": 5.088, |
| "lr": 0.0006924475524475524, |
| "step": 2700, |
| "tokens_trained": 0.25664152 |
| }, |
| { |
| "epoch": 0.7665248226950354, |
| "grad_norm": 1.1148093938827515, |
| "loss": 5.0389, |
| "lr": 0.0006921678321678322, |
| "step": 2702, |
| "tokens_trained": 0.25683068 |
| }, |
| { |
| "epoch": 0.7670921985815603, |
| "grad_norm": 1.0831029415130615, |
| "loss": 5.1181, |
| "lr": 0.0006918881118881118, |
| "step": 2704, |
| "tokens_trained": 0.257020752 |
| }, |
| { |
| "epoch": 0.7676595744680851, |
| "grad_norm": 1.0877745151519775, |
| "loss": 5.1822, |
| "lr": 0.0006916083916083916, |
| "step": 2706, |
| "tokens_trained": 0.257209136 |
| }, |
| { |
| "epoch": 0.76822695035461, |
| "grad_norm": 1.0823218822479248, |
| "loss": 5.0855, |
| "lr": 0.0006913286713286714, |
| "step": 2708, |
| "tokens_trained": 0.257398504 |
| }, |
| { |
| "epoch": 0.7687943262411348, |
| "grad_norm": 1.0309520959854126, |
| "loss": 5.141, |
| "lr": 0.0006910489510489511, |
| "step": 2710, |
| "tokens_trained": 0.257589568 |
| }, |
| { |
| "epoch": 0.7693617021276595, |
| "grad_norm": 1.0433647632598877, |
| "loss": 5.057, |
| "lr": 0.0006907692307692308, |
| "step": 2712, |
| "tokens_trained": 0.257781368 |
| }, |
| { |
| "epoch": 0.7699290780141844, |
| "grad_norm": 1.05474054813385, |
| "loss": 5.0639, |
| "lr": 0.0006904895104895104, |
| "step": 2714, |
| "tokens_trained": 0.25797212 |
| }, |
| { |
| "epoch": 0.7704964539007092, |
| "grad_norm": 1.0005548000335693, |
| "loss": 5.1155, |
| "lr": 0.0006902097902097903, |
| "step": 2716, |
| "tokens_trained": 0.25815968 |
| }, |
| { |
| "epoch": 0.7710638297872341, |
| "grad_norm": 0.9644413590431213, |
| "loss": 5.1092, |
| "lr": 0.0006899300699300699, |
| "step": 2718, |
| "tokens_trained": 0.258350192 |
| }, |
| { |
| "epoch": 0.7716312056737589, |
| "grad_norm": 1.0715434551239014, |
| "loss": 5.0827, |
| "lr": 0.0006896503496503497, |
| "step": 2720, |
| "tokens_trained": 0.258539872 |
| }, |
| { |
| "epoch": 0.7721985815602836, |
| "grad_norm": 1.0799431800842285, |
| "loss": 5.1489, |
| "lr": 0.0006893706293706293, |
| "step": 2722, |
| "tokens_trained": 0.258728696 |
| }, |
| { |
| "epoch": 0.7727659574468085, |
| "grad_norm": 1.0224812030792236, |
| "loss": 5.0897, |
| "lr": 0.0006890909090909091, |
| "step": 2724, |
| "tokens_trained": 0.258918368 |
| }, |
| { |
| "epoch": 0.7733333333333333, |
| "grad_norm": 1.2171430587768555, |
| "loss": 5.1283, |
| "lr": 0.0006888111888111889, |
| "step": 2726, |
| "tokens_trained": 0.259107072 |
| }, |
| { |
| "epoch": 0.7739007092198582, |
| "grad_norm": 1.0420043468475342, |
| "loss": 5.1325, |
| "lr": 0.0006885314685314685, |
| "step": 2728, |
| "tokens_trained": 0.259297744 |
| }, |
| { |
| "epoch": 0.774468085106383, |
| "grad_norm": 1.0326933860778809, |
| "loss": 5.1543, |
| "lr": 0.0006882517482517483, |
| "step": 2730, |
| "tokens_trained": 0.259486832 |
| }, |
| { |
| "epoch": 0.7750354609929078, |
| "grad_norm": 1.1191221475601196, |
| "loss": 5.1182, |
| "lr": 0.0006879720279720279, |
| "step": 2732, |
| "tokens_trained": 0.2596774 |
| }, |
| { |
| "epoch": 0.7756028368794327, |
| "grad_norm": 1.089678168296814, |
| "loss": 5.09, |
| "lr": 0.0006876923076923078, |
| "step": 2734, |
| "tokens_trained": 0.259868248 |
| }, |
| { |
| "epoch": 0.7761702127659574, |
| "grad_norm": 1.0944526195526123, |
| "loss": 5.1519, |
| "lr": 0.0006874125874125874, |
| "step": 2736, |
| "tokens_trained": 0.260056992 |
| }, |
| { |
| "epoch": 0.7767375886524823, |
| "grad_norm": 1.0774682760238647, |
| "loss": 5.0998, |
| "lr": 0.0006871328671328672, |
| "step": 2738, |
| "tokens_trained": 0.2602478 |
| }, |
| { |
| "epoch": 0.7773049645390071, |
| "grad_norm": 1.0795758962631226, |
| "loss": 5.1483, |
| "lr": 0.0006868531468531468, |
| "step": 2740, |
| "tokens_trained": 0.260435896 |
| }, |
| { |
| "epoch": 0.7778723404255319, |
| "grad_norm": 1.229885458946228, |
| "loss": 5.0991, |
| "lr": 0.0006865734265734265, |
| "step": 2742, |
| "tokens_trained": 0.260624176 |
| }, |
| { |
| "epoch": 0.7784397163120568, |
| "grad_norm": 1.2816888093948364, |
| "loss": 5.1131, |
| "lr": 0.0006862937062937064, |
| "step": 2744, |
| "tokens_trained": 0.26081632 |
| }, |
| { |
| "epoch": 0.7790070921985816, |
| "grad_norm": 1.127356767654419, |
| "loss": 5.0589, |
| "lr": 0.000686013986013986, |
| "step": 2746, |
| "tokens_trained": 0.261003088 |
| }, |
| { |
| "epoch": 0.7795744680851063, |
| "grad_norm": 1.073644995689392, |
| "loss": 5.1402, |
| "lr": 0.0006857342657342658, |
| "step": 2748, |
| "tokens_trained": 0.261192088 |
| }, |
| { |
| "epoch": 0.7801418439716312, |
| "grad_norm": 1.0892105102539062, |
| "loss": 5.1231, |
| "lr": 0.0006854545454545454, |
| "step": 2750, |
| "tokens_trained": 0.261381504 |
| }, |
| { |
| "epoch": 0.7801418439716312, |
| "eval_loss": 5.11714506149292, |
| "eval_runtime": 20.9289, |
| "step": 2750, |
| "tokens_trained": 0.261381504 |
| }, |
| { |
| "epoch": 0.780709219858156, |
| "grad_norm": 1.2366212606430054, |
| "loss": 5.079, |
| "lr": 0.0006851748251748253, |
| "step": 2752, |
| "tokens_trained": 0.261572936 |
| }, |
| { |
| "epoch": 0.7812765957446809, |
| "grad_norm": 1.2283895015716553, |
| "loss": 5.0414, |
| "lr": 0.0006848951048951049, |
| "step": 2754, |
| "tokens_trained": 0.26176184 |
| }, |
| { |
| "epoch": 0.7818439716312057, |
| "grad_norm": 1.2296546697616577, |
| "loss": 5.0758, |
| "lr": 0.0006846153846153846, |
| "step": 2756, |
| "tokens_trained": 0.261952224 |
| }, |
| { |
| "epoch": 0.7824113475177304, |
| "grad_norm": 1.1455234289169312, |
| "loss": 5.0903, |
| "lr": 0.0006843356643356643, |
| "step": 2758, |
| "tokens_trained": 0.262142128 |
| }, |
| { |
| "epoch": 0.7829787234042553, |
| "grad_norm": 0.9795711040496826, |
| "loss": 5.1101, |
| "lr": 0.000684055944055944, |
| "step": 2760, |
| "tokens_trained": 0.262331464 |
| }, |
| { |
| "epoch": 0.7835460992907801, |
| "grad_norm": 1.1363111734390259, |
| "loss": 5.0948, |
| "lr": 0.0006837762237762239, |
| "step": 2762, |
| "tokens_trained": 0.262523048 |
| }, |
| { |
| "epoch": 0.784113475177305, |
| "grad_norm": 1.0878827571868896, |
| "loss": 5.0942, |
| "lr": 0.0006834965034965035, |
| "step": 2764, |
| "tokens_trained": 0.26271264 |
| }, |
| { |
| "epoch": 0.7846808510638298, |
| "grad_norm": 1.1213501691818237, |
| "loss": 5.0863, |
| "lr": 0.0006832167832167833, |
| "step": 2766, |
| "tokens_trained": 0.262903952 |
| }, |
| { |
| "epoch": 0.7852482269503546, |
| "grad_norm": 1.1156904697418213, |
| "loss": 5.1835, |
| "lr": 0.0006829370629370629, |
| "step": 2768, |
| "tokens_trained": 0.2630932 |
| }, |
| { |
| "epoch": 0.7858156028368795, |
| "grad_norm": 1.2105063199996948, |
| "loss": 5.152, |
| "lr": 0.0006826573426573427, |
| "step": 2770, |
| "tokens_trained": 0.2632822 |
| }, |
| { |
| "epoch": 0.7863829787234042, |
| "grad_norm": 1.056512475013733, |
| "loss": 5.129, |
| "lr": 0.0006823776223776224, |
| "step": 2772, |
| "tokens_trained": 0.263471056 |
| }, |
| { |
| "epoch": 0.7869503546099291, |
| "grad_norm": 1.124480128288269, |
| "loss": 5.1122, |
| "lr": 0.0006820979020979021, |
| "step": 2774, |
| "tokens_trained": 0.26365952 |
| }, |
| { |
| "epoch": 0.7875177304964539, |
| "grad_norm": 1.1403707265853882, |
| "loss": 5.1283, |
| "lr": 0.0006818181818181818, |
| "step": 2776, |
| "tokens_trained": 0.263850128 |
| }, |
| { |
| "epoch": 0.7880851063829787, |
| "grad_norm": 1.0712953805923462, |
| "loss": 5.0901, |
| "lr": 0.0006815384615384615, |
| "step": 2778, |
| "tokens_trained": 0.264036944 |
| }, |
| { |
| "epoch": 0.7886524822695036, |
| "grad_norm": 1.1485860347747803, |
| "loss": 5.0673, |
| "lr": 0.0006812587412587414, |
| "step": 2780, |
| "tokens_trained": 0.2642284 |
| }, |
| { |
| "epoch": 0.7892198581560284, |
| "grad_norm": 1.144534945487976, |
| "loss": 5.0939, |
| "lr": 0.000680979020979021, |
| "step": 2782, |
| "tokens_trained": 0.264417248 |
| }, |
| { |
| "epoch": 0.7897872340425532, |
| "grad_norm": 1.0953861474990845, |
| "loss": 5.0998, |
| "lr": 0.0006806993006993007, |
| "step": 2784, |
| "tokens_trained": 0.264605776 |
| }, |
| { |
| "epoch": 0.790354609929078, |
| "grad_norm": 1.0519598722457886, |
| "loss": 5.1059, |
| "lr": 0.0006804195804195804, |
| "step": 2786, |
| "tokens_trained": 0.264795928 |
| }, |
| { |
| "epoch": 0.7909219858156028, |
| "grad_norm": 1.064609408378601, |
| "loss": 5.1017, |
| "lr": 0.0006801398601398602, |
| "step": 2788, |
| "tokens_trained": 0.264986 |
| }, |
| { |
| "epoch": 0.7914893617021277, |
| "grad_norm": 1.0485059022903442, |
| "loss": 5.0636, |
| "lr": 0.0006798601398601399, |
| "step": 2790, |
| "tokens_trained": 0.265176936 |
| }, |
| { |
| "epoch": 0.7920567375886525, |
| "grad_norm": 1.1277351379394531, |
| "loss": 5.0689, |
| "lr": 0.0006795804195804196, |
| "step": 2792, |
| "tokens_trained": 0.265366584 |
| }, |
| { |
| "epoch": 0.7926241134751772, |
| "grad_norm": 1.0692890882492065, |
| "loss": 5.0922, |
| "lr": 0.0006793006993006992, |
| "step": 2794, |
| "tokens_trained": 0.265557456 |
| }, |
| { |
| "epoch": 0.7931914893617021, |
| "grad_norm": 0.9836872220039368, |
| "loss": 5.0702, |
| "lr": 0.000679020979020979, |
| "step": 2796, |
| "tokens_trained": 0.265747056 |
| }, |
| { |
| "epoch": 0.7937588652482269, |
| "grad_norm": 1.0450890064239502, |
| "loss": 5.0778, |
| "lr": 0.0006787412587412588, |
| "step": 2798, |
| "tokens_trained": 0.265935536 |
| }, |
| { |
| "epoch": 0.7943262411347518, |
| "grad_norm": 1.017853856086731, |
| "loss": 5.1401, |
| "lr": 0.0006784615384615385, |
| "step": 2800, |
| "tokens_trained": 0.266124376 |
| }, |
| { |
| "epoch": 0.7948936170212766, |
| "grad_norm": 0.9698541760444641, |
| "loss": 5.0882, |
| "lr": 0.0006781818181818182, |
| "step": 2802, |
| "tokens_trained": 0.266312192 |
| }, |
| { |
| "epoch": 0.7954609929078014, |
| "grad_norm": 0.9696250557899475, |
| "loss": 5.1424, |
| "lr": 0.0006779020979020979, |
| "step": 2804, |
| "tokens_trained": 0.266503584 |
| }, |
| { |
| "epoch": 0.7960283687943263, |
| "grad_norm": 1.011576533317566, |
| "loss": 5.062, |
| "lr": 0.0006776223776223777, |
| "step": 2806, |
| "tokens_trained": 0.266693776 |
| }, |
| { |
| "epoch": 0.796595744680851, |
| "grad_norm": 0.9681981801986694, |
| "loss": 5.1343, |
| "lr": 0.0006773426573426574, |
| "step": 2808, |
| "tokens_trained": 0.26688324 |
| }, |
| { |
| "epoch": 0.7971631205673759, |
| "grad_norm": 0.9778586626052856, |
| "loss": 5.0619, |
| "lr": 0.0006770629370629371, |
| "step": 2810, |
| "tokens_trained": 0.267072 |
| }, |
| { |
| "epoch": 0.7977304964539007, |
| "grad_norm": 0.9624539613723755, |
| "loss": 5.0943, |
| "lr": 0.0006767832167832167, |
| "step": 2812, |
| "tokens_trained": 0.267260184 |
| }, |
| { |
| "epoch": 0.7982978723404255, |
| "grad_norm": 1.0591245889663696, |
| "loss": 5.101, |
| "lr": 0.0006765034965034965, |
| "step": 2814, |
| "tokens_trained": 0.267450632 |
| }, |
| { |
| "epoch": 0.7988652482269504, |
| "grad_norm": 1.0650452375411987, |
| "loss": 5.0754, |
| "lr": 0.0006762237762237763, |
| "step": 2816, |
| "tokens_trained": 0.267641848 |
| }, |
| { |
| "epoch": 0.7994326241134752, |
| "grad_norm": 1.0241055488586426, |
| "loss": 5.113, |
| "lr": 0.000675944055944056, |
| "step": 2818, |
| "tokens_trained": 0.267831232 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.9588684439659119, |
| "loss": 5.1124, |
| "lr": 0.0006756643356643357, |
| "step": 2820, |
| "tokens_trained": 0.268022016 |
| }, |
| { |
| "epoch": 0.8005673758865248, |
| "grad_norm": 1.0146323442459106, |
| "loss": 5.0773, |
| "lr": 0.0006753846153846153, |
| "step": 2822, |
| "tokens_trained": 0.268211504 |
| }, |
| { |
| "epoch": 0.8011347517730496, |
| "grad_norm": 1.040366530418396, |
| "loss": 5.0735, |
| "lr": 0.0006751048951048951, |
| "step": 2824, |
| "tokens_trained": 0.268400704 |
| }, |
| { |
| "epoch": 0.8017021276595745, |
| "grad_norm": 1.0419392585754395, |
| "loss": 5.1243, |
| "lr": 0.0006748251748251748, |
| "step": 2826, |
| "tokens_trained": 0.268592936 |
| }, |
| { |
| "epoch": 0.8022695035460993, |
| "grad_norm": 1.0807193517684937, |
| "loss": 5.0938, |
| "lr": 0.0006745454545454546, |
| "step": 2828, |
| "tokens_trained": 0.26878236 |
| }, |
| { |
| "epoch": 0.8028368794326242, |
| "grad_norm": 1.0357084274291992, |
| "loss": 5.138, |
| "lr": 0.0006742657342657342, |
| "step": 2830, |
| "tokens_trained": 0.268973808 |
| }, |
| { |
| "epoch": 0.8034042553191489, |
| "grad_norm": 1.0543837547302246, |
| "loss": 5.1219, |
| "lr": 0.000673986013986014, |
| "step": 2832, |
| "tokens_trained": 0.269163576 |
| }, |
| { |
| "epoch": 0.8039716312056737, |
| "grad_norm": 0.9575244188308716, |
| "loss": 5.0388, |
| "lr": 0.0006737062937062938, |
| "step": 2834, |
| "tokens_trained": 0.26935304 |
| }, |
| { |
| "epoch": 0.8045390070921986, |
| "grad_norm": 1.0559078454971313, |
| "loss": 5.1569, |
| "lr": 0.0006734265734265734, |
| "step": 2836, |
| "tokens_trained": 0.269542488 |
| }, |
| { |
| "epoch": 0.8051063829787234, |
| "grad_norm": 1.1365549564361572, |
| "loss": 5.1392, |
| "lr": 0.0006731468531468532, |
| "step": 2838, |
| "tokens_trained": 0.269732336 |
| }, |
| { |
| "epoch": 0.8056737588652483, |
| "grad_norm": 1.0022294521331787, |
| "loss": 5.1017, |
| "lr": 0.0006728671328671328, |
| "step": 2840, |
| "tokens_trained": 0.269922384 |
| }, |
| { |
| "epoch": 0.8062411347517731, |
| "grad_norm": 0.9790627360343933, |
| "loss": 5.1443, |
| "lr": 0.0006725874125874126, |
| "step": 2842, |
| "tokens_trained": 0.270111096 |
| }, |
| { |
| "epoch": 0.8068085106382978, |
| "grad_norm": 1.0328103303909302, |
| "loss": 5.087, |
| "lr": 0.0006723076923076923, |
| "step": 2844, |
| "tokens_trained": 0.27030036 |
| }, |
| { |
| "epoch": 0.8073758865248227, |
| "grad_norm": 1.0813841819763184, |
| "loss": 5.0995, |
| "lr": 0.0006720279720279721, |
| "step": 2846, |
| "tokens_trained": 0.270490936 |
| }, |
| { |
| "epoch": 0.8079432624113475, |
| "grad_norm": 1.1210085153579712, |
| "loss": 5.0929, |
| "lr": 0.0006717482517482517, |
| "step": 2848, |
| "tokens_trained": 0.27067904 |
| }, |
| { |
| "epoch": 0.8085106382978723, |
| "grad_norm": 1.10624361038208, |
| "loss": 5.0861, |
| "lr": 0.0006714685314685314, |
| "step": 2850, |
| "tokens_trained": 0.270869664 |
| }, |
| { |
| "epoch": 0.8090780141843972, |
| "grad_norm": 0.9984250664710999, |
| "loss": 5.1126, |
| "lr": 0.0006711888111888113, |
| "step": 2852, |
| "tokens_trained": 0.271059912 |
| }, |
| { |
| "epoch": 0.809645390070922, |
| "grad_norm": 1.0100075006484985, |
| "loss": 5.0131, |
| "lr": 0.0006709090909090909, |
| "step": 2854, |
| "tokens_trained": 0.271248128 |
| }, |
| { |
| "epoch": 0.8102127659574468, |
| "grad_norm": 1.0718857049942017, |
| "loss": 5.0978, |
| "lr": 0.0006706293706293707, |
| "step": 2856, |
| "tokens_trained": 0.271437752 |
| }, |
| { |
| "epoch": 0.8107801418439716, |
| "grad_norm": 1.0277525186538696, |
| "loss": 5.022, |
| "lr": 0.0006703496503496503, |
| "step": 2858, |
| "tokens_trained": 0.271627272 |
| }, |
| { |
| "epoch": 0.8113475177304964, |
| "grad_norm": 1.1056699752807617, |
| "loss": 5.1678, |
| "lr": 0.0006700699300699301, |
| "step": 2860, |
| "tokens_trained": 0.271815032 |
| }, |
| { |
| "epoch": 0.8119148936170213, |
| "grad_norm": 0.9853792190551758, |
| "loss": 5.1511, |
| "lr": 0.0006697902097902098, |
| "step": 2862, |
| "tokens_trained": 0.272004128 |
| }, |
| { |
| "epoch": 0.8124822695035461, |
| "grad_norm": 1.0207619667053223, |
| "loss": 5.0681, |
| "lr": 0.0006695104895104895, |
| "step": 2864, |
| "tokens_trained": 0.272193024 |
| }, |
| { |
| "epoch": 0.813049645390071, |
| "grad_norm": 1.0080488920211792, |
| "loss": 5.0712, |
| "lr": 0.0006692307692307692, |
| "step": 2866, |
| "tokens_trained": 0.27238368 |
| }, |
| { |
| "epoch": 0.8136170212765957, |
| "grad_norm": 1.1197504997253418, |
| "loss": 5.0545, |
| "lr": 0.0006689510489510489, |
| "step": 2868, |
| "tokens_trained": 0.272573416 |
| }, |
| { |
| "epoch": 0.8141843971631205, |
| "grad_norm": 1.0667881965637207, |
| "loss": 5.0492, |
| "lr": 0.0006686713286713288, |
| "step": 2870, |
| "tokens_trained": 0.272762152 |
| }, |
| { |
| "epoch": 0.8147517730496454, |
| "grad_norm": 1.0861417055130005, |
| "loss": 5.1656, |
| "lr": 0.0006683916083916084, |
| "step": 2872, |
| "tokens_trained": 0.272951384 |
| }, |
| { |
| "epoch": 0.8153191489361702, |
| "grad_norm": 0.9590932130813599, |
| "loss": 5.0987, |
| "lr": 0.0006681118881118882, |
| "step": 2874, |
| "tokens_trained": 0.273142368 |
| }, |
| { |
| "epoch": 0.8156028368794326, |
| "eval_loss": 5.098834037780762, |
| "eval_runtime": 21.1414, |
| "step": 2875, |
| "tokens_trained": 0.273239384 |
| }, |
| { |
| "epoch": 0.8158865248226951, |
| "grad_norm": 0.9762487411499023, |
| "loss": 5.0346, |
| "lr": 0.0006678321678321678, |
| "step": 2876, |
| "tokens_trained": 0.273334 |
| }, |
| { |
| "epoch": 0.8164539007092199, |
| "grad_norm": 1.059070110321045, |
| "loss": 5.0466, |
| "lr": 0.0006675524475524475, |
| "step": 2878, |
| "tokens_trained": 0.2735232 |
| }, |
| { |
| "epoch": 0.8170212765957446, |
| "grad_norm": 1.0384489297866821, |
| "loss": 5.0647, |
| "lr": 0.0006672727272727273, |
| "step": 2880, |
| "tokens_trained": 0.27371452 |
| }, |
| { |
| "epoch": 0.8175886524822695, |
| "grad_norm": 1.0188980102539062, |
| "loss": 5.135, |
| "lr": 0.000666993006993007, |
| "step": 2882, |
| "tokens_trained": 0.273903312 |
| }, |
| { |
| "epoch": 0.8181560283687943, |
| "grad_norm": 1.0437567234039307, |
| "loss": 5.1251, |
| "lr": 0.0006667132867132867, |
| "step": 2884, |
| "tokens_trained": 0.27409364 |
| }, |
| { |
| "epoch": 0.8187234042553192, |
| "grad_norm": 1.040148138999939, |
| "loss": 5.0829, |
| "lr": 0.0006664335664335664, |
| "step": 2886, |
| "tokens_trained": 0.274283392 |
| }, |
| { |
| "epoch": 0.819290780141844, |
| "grad_norm": 0.9796963930130005, |
| "loss": 5.1062, |
| "lr": 0.0006661538461538463, |
| "step": 2888, |
| "tokens_trained": 0.27447272 |
| }, |
| { |
| "epoch": 0.8198581560283688, |
| "grad_norm": 1.0791646242141724, |
| "loss": 5.0677, |
| "lr": 0.0006658741258741259, |
| "step": 2890, |
| "tokens_trained": 0.274661656 |
| }, |
| { |
| "epoch": 0.8204255319148936, |
| "grad_norm": 1.075614094734192, |
| "loss": 5.0932, |
| "lr": 0.0006655944055944056, |
| "step": 2892, |
| "tokens_trained": 0.274851216 |
| }, |
| { |
| "epoch": 0.8209929078014184, |
| "grad_norm": 1.0696609020233154, |
| "loss": 5.1614, |
| "lr": 0.0006653146853146853, |
| "step": 2894, |
| "tokens_trained": 0.275040176 |
| }, |
| { |
| "epoch": 0.8215602836879433, |
| "grad_norm": 1.0564289093017578, |
| "loss": 5.1165, |
| "lr": 0.000665034965034965, |
| "step": 2896, |
| "tokens_trained": 0.27522948 |
| }, |
| { |
| "epoch": 0.8221276595744681, |
| "grad_norm": 1.0135756731033325, |
| "loss": 5.1222, |
| "lr": 0.0006647552447552448, |
| "step": 2898, |
| "tokens_trained": 0.275419392 |
| }, |
| { |
| "epoch": 0.8226950354609929, |
| "grad_norm": 1.0177373886108398, |
| "loss": 5.1085, |
| "lr": 0.0006644755244755245, |
| "step": 2900, |
| "tokens_trained": 0.27561 |
| }, |
| { |
| "epoch": 0.8232624113475178, |
| "grad_norm": 0.9718354344367981, |
| "loss": 5.069, |
| "lr": 0.0006641958041958042, |
| "step": 2902, |
| "tokens_trained": 0.275800288 |
| }, |
| { |
| "epoch": 0.8238297872340425, |
| "grad_norm": 1.011567234992981, |
| "loss": 5.1668, |
| "lr": 0.0006639160839160839, |
| "step": 2904, |
| "tokens_trained": 0.275988672 |
| }, |
| { |
| "epoch": 0.8243971631205673, |
| "grad_norm": 1.0020220279693604, |
| "loss": 5.0616, |
| "lr": 0.0006636363636363638, |
| "step": 2906, |
| "tokens_trained": 0.276180112 |
| }, |
| { |
| "epoch": 0.8249645390070922, |
| "grad_norm": 0.9929330945014954, |
| "loss": 5.0723, |
| "lr": 0.0006633566433566434, |
| "step": 2908, |
| "tokens_trained": 0.276368136 |
| }, |
| { |
| "epoch": 0.825531914893617, |
| "grad_norm": 0.9768717885017395, |
| "loss": 5.0872, |
| "lr": 0.0006630769230769231, |
| "step": 2910, |
| "tokens_trained": 0.276557936 |
| }, |
| { |
| "epoch": 0.8260992907801419, |
| "grad_norm": 1.0068199634552002, |
| "loss": 5.1279, |
| "lr": 0.0006627972027972028, |
| "step": 2912, |
| "tokens_trained": 0.276748584 |
| }, |
| { |
| "epoch": 0.8266666666666667, |
| "grad_norm": 0.953273594379425, |
| "loss": 5.0422, |
| "lr": 0.0006625174825174825, |
| "step": 2914, |
| "tokens_trained": 0.276939168 |
| }, |
| { |
| "epoch": 0.8272340425531914, |
| "grad_norm": 0.9808285236358643, |
| "loss": 5.1278, |
| "lr": 0.0006622377622377623, |
| "step": 2916, |
| "tokens_trained": 0.277128728 |
| }, |
| { |
| "epoch": 0.8278014184397163, |
| "grad_norm": 0.9755997061729431, |
| "loss": 5.0661, |
| "lr": 0.000661958041958042, |
| "step": 2918, |
| "tokens_trained": 0.27731964 |
| }, |
| { |
| "epoch": 0.8283687943262411, |
| "grad_norm": 0.9573803544044495, |
| "loss": 5.0744, |
| "lr": 0.0006616783216783216, |
| "step": 2920, |
| "tokens_trained": 0.277508704 |
| }, |
| { |
| "epoch": 0.828936170212766, |
| "grad_norm": 1.1060761213302612, |
| "loss": 5.1124, |
| "lr": 0.0006613986013986014, |
| "step": 2922, |
| "tokens_trained": 0.277698576 |
| }, |
| { |
| "epoch": 0.8295035460992908, |
| "grad_norm": 1.1377017498016357, |
| "loss": 5.1375, |
| "lr": 0.0006611188811188812, |
| "step": 2924, |
| "tokens_trained": 0.277887456 |
| }, |
| { |
| "epoch": 0.8300709219858156, |
| "grad_norm": 1.0315862894058228, |
| "loss": 5.0839, |
| "lr": 0.0006608391608391609, |
| "step": 2926, |
| "tokens_trained": 0.278076232 |
| }, |
| { |
| "epoch": 0.8306382978723404, |
| "grad_norm": 0.9509685635566711, |
| "loss": 5.0986, |
| "lr": 0.0006605594405594406, |
| "step": 2928, |
| "tokens_trained": 0.278265168 |
| }, |
| { |
| "epoch": 0.8312056737588652, |
| "grad_norm": 0.9749555587768555, |
| "loss": 5.0292, |
| "lr": 0.0006602797202797203, |
| "step": 2930, |
| "tokens_trained": 0.27845612 |
| }, |
| { |
| "epoch": 0.8317730496453901, |
| "grad_norm": 0.9728718400001526, |
| "loss": 5.1113, |
| "lr": 0.00066, |
| "step": 2932, |
| "tokens_trained": 0.278647 |
| }, |
| { |
| "epoch": 0.8323404255319149, |
| "grad_norm": 0.8888244032859802, |
| "loss": 5.0698, |
| "lr": 0.0006597202797202797, |
| "step": 2934, |
| "tokens_trained": 0.278834704 |
| }, |
| { |
| "epoch": 0.8329078014184397, |
| "grad_norm": 0.9745096564292908, |
| "loss": 5.1356, |
| "lr": 0.0006594405594405595, |
| "step": 2936, |
| "tokens_trained": 0.27902504 |
| }, |
| { |
| "epoch": 0.8334751773049646, |
| "grad_norm": 1.023566484451294, |
| "loss": 5.0733, |
| "lr": 0.0006591608391608391, |
| "step": 2938, |
| "tokens_trained": 0.279214024 |
| }, |
| { |
| "epoch": 0.8340425531914893, |
| "grad_norm": 0.9370903968811035, |
| "loss": 5.108, |
| "lr": 0.0006588811188811189, |
| "step": 2940, |
| "tokens_trained": 0.279402336 |
| }, |
| { |
| "epoch": 0.8346099290780142, |
| "grad_norm": 1.037245750427246, |
| "loss": 5.1539, |
| "lr": 0.0006586013986013986, |
| "step": 2942, |
| "tokens_trained": 0.279594456 |
| }, |
| { |
| "epoch": 0.835177304964539, |
| "grad_norm": 1.1117267608642578, |
| "loss": 5.0984, |
| "lr": 0.0006583216783216784, |
| "step": 2944, |
| "tokens_trained": 0.279784736 |
| }, |
| { |
| "epoch": 0.8357446808510638, |
| "grad_norm": 1.0760383605957031, |
| "loss": 5.0798, |
| "lr": 0.0006580419580419581, |
| "step": 2946, |
| "tokens_trained": 0.279974272 |
| }, |
| { |
| "epoch": 0.8363120567375887, |
| "grad_norm": 1.0359710454940796, |
| "loss": 5.1052, |
| "lr": 0.0006577622377622377, |
| "step": 2948, |
| "tokens_trained": 0.280162576 |
| }, |
| { |
| "epoch": 0.8368794326241135, |
| "grad_norm": 1.0630141496658325, |
| "loss": 5.0561, |
| "lr": 0.0006574825174825175, |
| "step": 2950, |
| "tokens_trained": 0.280351752 |
| }, |
| { |
| "epoch": 0.8374468085106384, |
| "grad_norm": 1.0445481538772583, |
| "loss": 5.1009, |
| "lr": 0.0006572027972027972, |
| "step": 2952, |
| "tokens_trained": 0.280541392 |
| }, |
| { |
| "epoch": 0.8380141843971631, |
| "grad_norm": 1.0606142282485962, |
| "loss": 5.0109, |
| "lr": 0.000656923076923077, |
| "step": 2954, |
| "tokens_trained": 0.280732192 |
| }, |
| { |
| "epoch": 0.8385815602836879, |
| "grad_norm": 1.0462067127227783, |
| "loss": 5.1411, |
| "lr": 0.0006566433566433566, |
| "step": 2956, |
| "tokens_trained": 0.280922712 |
| }, |
| { |
| "epoch": 0.8391489361702128, |
| "grad_norm": 0.9841874241828918, |
| "loss": 5.0773, |
| "lr": 0.0006563636363636364, |
| "step": 2958, |
| "tokens_trained": 0.28111024 |
| }, |
| { |
| "epoch": 0.8397163120567376, |
| "grad_norm": 1.1026822328567505, |
| "loss": 5.1128, |
| "lr": 0.0006560839160839161, |
| "step": 2960, |
| "tokens_trained": 0.281302152 |
| }, |
| { |
| "epoch": 0.8402836879432624, |
| "grad_norm": 0.9562904834747314, |
| "loss": 5.0521, |
| "lr": 0.0006558041958041958, |
| "step": 2962, |
| "tokens_trained": 0.281490768 |
| }, |
| { |
| "epoch": 0.8408510638297872, |
| "grad_norm": 1.038006067276001, |
| "loss": 5.0931, |
| "lr": 0.0006555244755244756, |
| "step": 2964, |
| "tokens_trained": 0.281682552 |
| }, |
| { |
| "epoch": 0.841418439716312, |
| "grad_norm": 1.008678913116455, |
| "loss": 5.0728, |
| "lr": 0.0006552447552447552, |
| "step": 2966, |
| "tokens_trained": 0.281871816 |
| }, |
| { |
| "epoch": 0.8419858156028369, |
| "grad_norm": 0.9977920651435852, |
| "loss": 5.086, |
| "lr": 0.000654965034965035, |
| "step": 2968, |
| "tokens_trained": 0.2820618 |
| }, |
| { |
| "epoch": 0.8425531914893617, |
| "grad_norm": 0.9422287344932556, |
| "loss": 5.0844, |
| "lr": 0.0006546853146853147, |
| "step": 2970, |
| "tokens_trained": 0.282253032 |
| }, |
| { |
| "epoch": 0.8431205673758865, |
| "grad_norm": 1.0029969215393066, |
| "loss": 5.0928, |
| "lr": 0.0006544055944055945, |
| "step": 2972, |
| "tokens_trained": 0.282443296 |
| }, |
| { |
| "epoch": 0.8436879432624114, |
| "grad_norm": 1.0643123388290405, |
| "loss": 5.0988, |
| "lr": 0.0006541258741258741, |
| "step": 2974, |
| "tokens_trained": 0.282634024 |
| }, |
| { |
| "epoch": 0.8442553191489361, |
| "grad_norm": 1.0360649824142456, |
| "loss": 5.0634, |
| "lr": 0.0006538461538461538, |
| "step": 2976, |
| "tokens_trained": 0.282825768 |
| }, |
| { |
| "epoch": 0.844822695035461, |
| "grad_norm": 0.9609996676445007, |
| "loss": 5.1155, |
| "lr": 0.0006535664335664336, |
| "step": 2978, |
| "tokens_trained": 0.283016704 |
| }, |
| { |
| "epoch": 0.8453900709219858, |
| "grad_norm": 0.9547716379165649, |
| "loss": 5.0769, |
| "lr": 0.0006532867132867133, |
| "step": 2980, |
| "tokens_trained": 0.283205288 |
| }, |
| { |
| "epoch": 0.8459574468085106, |
| "grad_norm": 1.0286030769348145, |
| "loss": 5.0849, |
| "lr": 0.0006530069930069931, |
| "step": 2982, |
| "tokens_trained": 0.283395192 |
| }, |
| { |
| "epoch": 0.8465248226950355, |
| "grad_norm": 0.9071921706199646, |
| "loss": 5.0308, |
| "lr": 0.0006527272727272727, |
| "step": 2984, |
| "tokens_trained": 0.283587048 |
| }, |
| { |
| "epoch": 0.8470921985815603, |
| "grad_norm": 0.851090133190155, |
| "loss": 5.0601, |
| "lr": 0.0006524475524475524, |
| "step": 2986, |
| "tokens_trained": 0.28377872 |
| }, |
| { |
| "epoch": 0.8476595744680852, |
| "grad_norm": 0.946025550365448, |
| "loss": 5.0863, |
| "lr": 0.0006521678321678322, |
| "step": 2988, |
| "tokens_trained": 0.283968304 |
| }, |
| { |
| "epoch": 0.8482269503546099, |
| "grad_norm": 0.994915783405304, |
| "loss": 5.1034, |
| "lr": 0.0006518881118881119, |
| "step": 2990, |
| "tokens_trained": 0.284158704 |
| }, |
| { |
| "epoch": 0.8487943262411347, |
| "grad_norm": 0.9354639053344727, |
| "loss": 5.0749, |
| "lr": 0.0006516083916083916, |
| "step": 2992, |
| "tokens_trained": 0.284350032 |
| }, |
| { |
| "epoch": 0.8493617021276596, |
| "grad_norm": 0.9014646410942078, |
| "loss": 5.0753, |
| "lr": 0.0006513286713286713, |
| "step": 2994, |
| "tokens_trained": 0.284541136 |
| }, |
| { |
| "epoch": 0.8499290780141844, |
| "grad_norm": 0.9647039771080017, |
| "loss": 5.1391, |
| "lr": 0.0006510489510489511, |
| "step": 2996, |
| "tokens_trained": 0.28473112 |
| }, |
| { |
| "epoch": 0.8504964539007093, |
| "grad_norm": 0.9687992930412292, |
| "loss": 5.0058, |
| "lr": 0.0006507692307692308, |
| "step": 2998, |
| "tokens_trained": 0.284922608 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.9827167987823486, |
| "loss": 5.0597, |
| "lr": 0.0006504895104895106, |
| "step": 3000, |
| "tokens_trained": 0.285112344 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "eval_loss": 5.092260837554932, |
| "eval_runtime": 20.8128, |
| "step": 3000, |
| "tokens_trained": 0.285112344 |
| }, |
| { |
| "epoch": 0.8516312056737588, |
| "grad_norm": 1.1164077520370483, |
| "loss": 4.9872, |
| "lr": 0.0006502097902097902, |
| "step": 3002, |
| "tokens_trained": 0.285299144 |
| }, |
| { |
| "epoch": 0.8521985815602837, |
| "grad_norm": 1.0835845470428467, |
| "loss": 4.999, |
| "lr": 0.0006499300699300699, |
| "step": 3004, |
| "tokens_trained": 0.28548968 |
| }, |
| { |
| "epoch": 0.8527659574468085, |
| "grad_norm": 1.135926365852356, |
| "loss": 5.1038, |
| "lr": 0.0006496503496503497, |
| "step": 3006, |
| "tokens_trained": 0.28568256 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 1.0743507146835327, |
| "loss": 5.0964, |
| "lr": 0.0006493706293706294, |
| "step": 3008, |
| "tokens_trained": 0.28587348 |
| }, |
| { |
| "epoch": 0.8539007092198582, |
| "grad_norm": 0.9776538014411926, |
| "loss": 5.0208, |
| "lr": 0.0006490909090909091, |
| "step": 3010, |
| "tokens_trained": 0.286061968 |
| }, |
| { |
| "epoch": 0.854468085106383, |
| "grad_norm": 0.9797994494438171, |
| "loss": 5.0238, |
| "lr": 0.0006488111888111888, |
| "step": 3012, |
| "tokens_trained": 0.28625252 |
| }, |
| { |
| "epoch": 0.8550354609929078, |
| "grad_norm": 0.8697059154510498, |
| "loss": 5.0017, |
| "lr": 0.0006485314685314685, |
| "step": 3014, |
| "tokens_trained": 0.286443872 |
| }, |
| { |
| "epoch": 0.8556028368794326, |
| "grad_norm": 0.9378446340560913, |
| "loss": 5.0856, |
| "lr": 0.0006482517482517483, |
| "step": 3016, |
| "tokens_trained": 0.286633232 |
| }, |
| { |
| "epoch": 0.8561702127659574, |
| "grad_norm": 0.9418164491653442, |
| "loss": 5.0637, |
| "lr": 0.000647972027972028, |
| "step": 3018, |
| "tokens_trained": 0.286824032 |
| }, |
| { |
| "epoch": 0.8567375886524823, |
| "grad_norm": 0.9479710459709167, |
| "loss": 5.0941, |
| "lr": 0.0006476923076923077, |
| "step": 3020, |
| "tokens_trained": 0.2870158 |
| }, |
| { |
| "epoch": 0.8573049645390071, |
| "grad_norm": 0.9716140627861023, |
| "loss": 5.1218, |
| "lr": 0.0006474125874125874, |
| "step": 3022, |
| "tokens_trained": 0.287206184 |
| }, |
| { |
| "epoch": 0.857872340425532, |
| "grad_norm": 0.9651079177856445, |
| "loss": 5.0137, |
| "lr": 0.0006471328671328672, |
| "step": 3024, |
| "tokens_trained": 0.287395568 |
| }, |
| { |
| "epoch": 0.8584397163120567, |
| "grad_norm": 1.0485713481903076, |
| "loss": 5.0713, |
| "lr": 0.0006468531468531469, |
| "step": 3026, |
| "tokens_trained": 0.28758644 |
| }, |
| { |
| "epoch": 0.8590070921985815, |
| "grad_norm": 1.0849828720092773, |
| "loss": 5.0241, |
| "lr": 0.0006465734265734265, |
| "step": 3028, |
| "tokens_trained": 0.287773088 |
| }, |
| { |
| "epoch": 0.8595744680851064, |
| "grad_norm": 1.0668689012527466, |
| "loss": 5.0694, |
| "lr": 0.0006462937062937063, |
| "step": 3030, |
| "tokens_trained": 0.287963544 |
| }, |
| { |
| "epoch": 0.8601418439716312, |
| "grad_norm": 0.9943816065788269, |
| "loss": 5.0807, |
| "lr": 0.0006460139860139859, |
| "step": 3032, |
| "tokens_trained": 0.288152376 |
| }, |
| { |
| "epoch": 0.8607092198581561, |
| "grad_norm": 1.104642629623413, |
| "loss": 5.1047, |
| "lr": 0.0006457342657342658, |
| "step": 3034, |
| "tokens_trained": 0.288343064 |
| }, |
| { |
| "epoch": 0.8612765957446809, |
| "grad_norm": 1.0915707349777222, |
| "loss": 5.1332, |
| "lr": 0.0006454545454545455, |
| "step": 3036, |
| "tokens_trained": 0.28853308 |
| }, |
| { |
| "epoch": 0.8618439716312056, |
| "grad_norm": 0.9935365319252014, |
| "loss": 5.0799, |
| "lr": 0.0006451748251748252, |
| "step": 3038, |
| "tokens_trained": 0.288726184 |
| }, |
| { |
| "epoch": 0.8624113475177305, |
| "grad_norm": 0.9564308524131775, |
| "loss": 5.0549, |
| "lr": 0.0006448951048951049, |
| "step": 3040, |
| "tokens_trained": 0.288916368 |
| }, |
| { |
| "epoch": 0.8629787234042553, |
| "grad_norm": 1.0183926820755005, |
| "loss": 5.1508, |
| "lr": 0.0006446153846153846, |
| "step": 3042, |
| "tokens_trained": 0.28910616 |
| }, |
| { |
| "epoch": 0.8635460992907802, |
| "grad_norm": 0.8167940974235535, |
| "loss": 5.1238, |
| "lr": 0.0006443356643356644, |
| "step": 3044, |
| "tokens_trained": 0.289295008 |
| }, |
| { |
| "epoch": 0.864113475177305, |
| "grad_norm": 0.981560468673706, |
| "loss": 5.0692, |
| "lr": 0.000644055944055944, |
| "step": 3046, |
| "tokens_trained": 0.289483192 |
| }, |
| { |
| "epoch": 0.8646808510638297, |
| "grad_norm": 0.9596647024154663, |
| "loss": 5.0557, |
| "lr": 0.0006437762237762238, |
| "step": 3048, |
| "tokens_trained": 0.289672528 |
| }, |
| { |
| "epoch": 0.8652482269503546, |
| "grad_norm": 0.9322229027748108, |
| "loss": 5.0769, |
| "lr": 0.0006434965034965034, |
| "step": 3050, |
| "tokens_trained": 0.28986108 |
| }, |
| { |
| "epoch": 0.8658156028368794, |
| "grad_norm": 0.94253009557724, |
| "loss": 5.0556, |
| "lr": 0.0006432167832167833, |
| "step": 3052, |
| "tokens_trained": 0.29005032 |
| }, |
| { |
| "epoch": 0.8663829787234043, |
| "grad_norm": 0.9793356657028198, |
| "loss": 5.0821, |
| "lr": 0.000642937062937063, |
| "step": 3054, |
| "tokens_trained": 0.290238496 |
| }, |
| { |
| "epoch": 0.8669503546099291, |
| "grad_norm": 1.0270706415176392, |
| "loss": 5.1137, |
| "lr": 0.0006426573426573426, |
| "step": 3056, |
| "tokens_trained": 0.290428552 |
| }, |
| { |
| "epoch": 0.8675177304964539, |
| "grad_norm": 1.0015908479690552, |
| "loss": 5.0426, |
| "lr": 0.0006423776223776224, |
| "step": 3058, |
| "tokens_trained": 0.290617592 |
| }, |
| { |
| "epoch": 0.8680851063829788, |
| "grad_norm": 1.1663475036621094, |
| "loss": 5.0152, |
| "lr": 0.0006420979020979021, |
| "step": 3060, |
| "tokens_trained": 0.290806784 |
| }, |
| { |
| "epoch": 0.8686524822695035, |
| "grad_norm": 1.1144863367080688, |
| "loss": 5.1324, |
| "lr": 0.0006418181818181819, |
| "step": 3062, |
| "tokens_trained": 0.290997672 |
| }, |
| { |
| "epoch": 0.8692198581560284, |
| "grad_norm": 1.086949110031128, |
| "loss": 5.0896, |
| "lr": 0.0006415384615384615, |
| "step": 3064, |
| "tokens_trained": 0.291187224 |
| }, |
| { |
| "epoch": 0.8697872340425532, |
| "grad_norm": 1.0380237102508545, |
| "loss": 5.0964, |
| "lr": 0.0006412587412587413, |
| "step": 3066, |
| "tokens_trained": 0.291378608 |
| }, |
| { |
| "epoch": 0.870354609929078, |
| "grad_norm": 0.9731833338737488, |
| "loss": 5.0113, |
| "lr": 0.0006409790209790209, |
| "step": 3068, |
| "tokens_trained": 0.291568064 |
| }, |
| { |
| "epoch": 0.8709219858156029, |
| "grad_norm": 0.9414166212081909, |
| "loss": 5.0396, |
| "lr": 0.0006406993006993007, |
| "step": 3070, |
| "tokens_trained": 0.291757936 |
| }, |
| { |
| "epoch": 0.8714893617021277, |
| "grad_norm": 1.0375349521636963, |
| "loss": 5.1187, |
| "lr": 0.0006404195804195805, |
| "step": 3072, |
| "tokens_trained": 0.291948704 |
| }, |
| { |
| "epoch": 0.8720567375886524, |
| "grad_norm": 0.9412112236022949, |
| "loss": 5.0955, |
| "lr": 0.0006401398601398601, |
| "step": 3074, |
| "tokens_trained": 0.292141128 |
| }, |
| { |
| "epoch": 0.8726241134751773, |
| "grad_norm": 0.9645117521286011, |
| "loss": 5.1278, |
| "lr": 0.0006398601398601399, |
| "step": 3076, |
| "tokens_trained": 0.292331704 |
| }, |
| { |
| "epoch": 0.8731914893617021, |
| "grad_norm": 0.9918674230575562, |
| "loss": 5.0726, |
| "lr": 0.0006395804195804196, |
| "step": 3078, |
| "tokens_trained": 0.292519984 |
| }, |
| { |
| "epoch": 0.873758865248227, |
| "grad_norm": 0.8824833035469055, |
| "loss": 5.1334, |
| "lr": 0.0006393006993006994, |
| "step": 3080, |
| "tokens_trained": 0.292712064 |
| }, |
| { |
| "epoch": 0.8743262411347518, |
| "grad_norm": 1.0651589632034302, |
| "loss": 5.0911, |
| "lr": 0.000639020979020979, |
| "step": 3082, |
| "tokens_trained": 0.292901816 |
| }, |
| { |
| "epoch": 0.8748936170212765, |
| "grad_norm": 1.0067808628082275, |
| "loss": 5.1345, |
| "lr": 0.0006387412587412587, |
| "step": 3084, |
| "tokens_trained": 0.293094064 |
| }, |
| { |
| "epoch": 0.8754609929078014, |
| "grad_norm": 0.8916751146316528, |
| "loss": 5.1117, |
| "lr": 0.0006384615384615384, |
| "step": 3086, |
| "tokens_trained": 0.293284272 |
| }, |
| { |
| "epoch": 0.8760283687943262, |
| "grad_norm": 1.0009779930114746, |
| "loss": 5.1143, |
| "lr": 0.0006381818181818182, |
| "step": 3088, |
| "tokens_trained": 0.293474352 |
| }, |
| { |
| "epoch": 0.8765957446808511, |
| "grad_norm": 1.0289413928985596, |
| "loss": 5.0551, |
| "lr": 0.000637902097902098, |
| "step": 3090, |
| "tokens_trained": 0.293661976 |
| }, |
| { |
| "epoch": 0.8771631205673759, |
| "grad_norm": 0.9375638961791992, |
| "loss": 5.0666, |
| "lr": 0.0006376223776223776, |
| "step": 3092, |
| "tokens_trained": 0.293851968 |
| }, |
| { |
| "epoch": 0.8777304964539007, |
| "grad_norm": 0.9490086436271667, |
| "loss": 5.0901, |
| "lr": 0.0006373426573426574, |
| "step": 3094, |
| "tokens_trained": 0.294041608 |
| }, |
| { |
| "epoch": 0.8782978723404256, |
| "grad_norm": 0.932090163230896, |
| "loss": 5.0783, |
| "lr": 0.0006370629370629371, |
| "step": 3096, |
| "tokens_trained": 0.29423028 |
| }, |
| { |
| "epoch": 0.8788652482269503, |
| "grad_norm": 0.9120060205459595, |
| "loss": 5.1065, |
| "lr": 0.0006367832167832168, |
| "step": 3098, |
| "tokens_trained": 0.294421528 |
| }, |
| { |
| "epoch": 0.8794326241134752, |
| "grad_norm": 0.8693923950195312, |
| "loss": 5.0689, |
| "lr": 0.0006365034965034965, |
| "step": 3100, |
| "tokens_trained": 0.294609832 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.834987223148346, |
| "loss": 5.0542, |
| "lr": 0.0006362237762237762, |
| "step": 3102, |
| "tokens_trained": 0.294799424 |
| }, |
| { |
| "epoch": 0.8805673758865248, |
| "grad_norm": 0.9196602702140808, |
| "loss": 5.0212, |
| "lr": 0.0006359440559440559, |
| "step": 3104, |
| "tokens_trained": 0.294990504 |
| }, |
| { |
| "epoch": 0.8811347517730497, |
| "grad_norm": 1.0392085313796997, |
| "loss": 5.0734, |
| "lr": 0.0006356643356643357, |
| "step": 3106, |
| "tokens_trained": 0.295181112 |
| }, |
| { |
| "epoch": 0.8817021276595745, |
| "grad_norm": 1.0879757404327393, |
| "loss": 5.0834, |
| "lr": 0.0006353846153846155, |
| "step": 3108, |
| "tokens_trained": 0.295371224 |
| }, |
| { |
| "epoch": 0.8822695035460993, |
| "grad_norm": 1.0321052074432373, |
| "loss": 5.1132, |
| "lr": 0.0006351048951048951, |
| "step": 3110, |
| "tokens_trained": 0.295563288 |
| }, |
| { |
| "epoch": 0.8828368794326241, |
| "grad_norm": 0.9930777549743652, |
| "loss": 5.0855, |
| "lr": 0.0006348251748251748, |
| "step": 3112, |
| "tokens_trained": 0.295753864 |
| }, |
| { |
| "epoch": 0.8834042553191489, |
| "grad_norm": 1.007925033569336, |
| "loss": 5.0728, |
| "lr": 0.0006345454545454546, |
| "step": 3114, |
| "tokens_trained": 0.29594536 |
| }, |
| { |
| "epoch": 0.8839716312056738, |
| "grad_norm": 1.0430697202682495, |
| "loss": 5.161, |
| "lr": 0.0006342657342657343, |
| "step": 3116, |
| "tokens_trained": 0.296135144 |
| }, |
| { |
| "epoch": 0.8845390070921986, |
| "grad_norm": 0.9607092142105103, |
| "loss": 5.0514, |
| "lr": 0.000633986013986014, |
| "step": 3118, |
| "tokens_trained": 0.296325736 |
| }, |
| { |
| "epoch": 0.8851063829787233, |
| "grad_norm": 1.0054502487182617, |
| "loss": 5.03, |
| "lr": 0.0006337062937062937, |
| "step": 3120, |
| "tokens_trained": 0.296514408 |
| }, |
| { |
| "epoch": 0.8856737588652482, |
| "grad_norm": 1.0535473823547363, |
| "loss": 5.1082, |
| "lr": 0.0006334265734265733, |
| "step": 3122, |
| "tokens_trained": 0.296702248 |
| }, |
| { |
| "epoch": 0.886241134751773, |
| "grad_norm": 0.9889388680458069, |
| "loss": 5.0583, |
| "lr": 0.0006331468531468532, |
| "step": 3124, |
| "tokens_trained": 0.296891656 |
| }, |
| { |
| "epoch": 0.8865248226950354, |
| "eval_loss": 5.07567024230957, |
| "eval_runtime": 20.7649, |
| "step": 3125, |
| "tokens_trained": 0.296985944 |
| }, |
| { |
| "epoch": 0.8868085106382979, |
| "grad_norm": 1.008825421333313, |
| "loss": 5.0698, |
| "lr": 0.0006328671328671329, |
| "step": 3126, |
| "tokens_trained": 0.297081752 |
| }, |
| { |
| "epoch": 0.8873758865248227, |
| "grad_norm": 0.9656203985214233, |
| "loss": 5.0915, |
| "lr": 0.0006325874125874126, |
| "step": 3128, |
| "tokens_trained": 0.297269568 |
| }, |
| { |
| "epoch": 0.8879432624113475, |
| "grad_norm": 0.9101914167404175, |
| "loss": 5.0821, |
| "lr": 0.0006323076923076923, |
| "step": 3130, |
| "tokens_trained": 0.297457936 |
| }, |
| { |
| "epoch": 0.8885106382978724, |
| "grad_norm": 1.03163480758667, |
| "loss": 5.07, |
| "lr": 0.0006320279720279721, |
| "step": 3132, |
| "tokens_trained": 0.297646944 |
| }, |
| { |
| "epoch": 0.8890780141843971, |
| "grad_norm": 1.0470985174179077, |
| "loss": 5.0165, |
| "lr": 0.0006317482517482518, |
| "step": 3134, |
| "tokens_trained": 0.29783736 |
| }, |
| { |
| "epoch": 0.889645390070922, |
| "grad_norm": 1.0149681568145752, |
| "loss": 5.0809, |
| "lr": 0.0006314685314685314, |
| "step": 3136, |
| "tokens_trained": 0.298027048 |
| }, |
| { |
| "epoch": 0.8902127659574468, |
| "grad_norm": 1.017217993736267, |
| "loss": 5.0323, |
| "lr": 0.0006311888111888112, |
| "step": 3138, |
| "tokens_trained": 0.298218432 |
| }, |
| { |
| "epoch": 0.8907801418439716, |
| "grad_norm": 1.0002187490463257, |
| "loss": 5.0818, |
| "lr": 0.0006309090909090908, |
| "step": 3140, |
| "tokens_trained": 0.29840768 |
| }, |
| { |
| "epoch": 0.8913475177304965, |
| "grad_norm": 0.9259346723556519, |
| "loss": 5.0863, |
| "lr": 0.0006306293706293707, |
| "step": 3142, |
| "tokens_trained": 0.298599384 |
| }, |
| { |
| "epoch": 0.8919148936170213, |
| "grad_norm": 0.9437862634658813, |
| "loss": 5.1282, |
| "lr": 0.0006303496503496504, |
| "step": 3144, |
| "tokens_trained": 0.298789024 |
| }, |
| { |
| "epoch": 0.8924822695035461, |
| "grad_norm": 0.9849722981452942, |
| "loss": 5.0658, |
| "lr": 0.0006300699300699301, |
| "step": 3146, |
| "tokens_trained": 0.298979648 |
| }, |
| { |
| "epoch": 0.8930496453900709, |
| "grad_norm": 1.1129319667816162, |
| "loss": 5.0663, |
| "lr": 0.0006297902097902098, |
| "step": 3148, |
| "tokens_trained": 0.299170416 |
| }, |
| { |
| "epoch": 0.8936170212765957, |
| "grad_norm": 1.101006031036377, |
| "loss": 5.0394, |
| "lr": 0.0006295104895104896, |
| "step": 3150, |
| "tokens_trained": 0.299361408 |
| }, |
| { |
| "epoch": 0.8941843971631206, |
| "grad_norm": 1.0711042881011963, |
| "loss": 5.0696, |
| "lr": 0.0006292307692307693, |
| "step": 3152, |
| "tokens_trained": 0.29955124 |
| }, |
| { |
| "epoch": 0.8947517730496454, |
| "grad_norm": 1.0250879526138306, |
| "loss": 5.0645, |
| "lr": 0.0006289510489510489, |
| "step": 3154, |
| "tokens_trained": 0.299742168 |
| }, |
| { |
| "epoch": 0.8953191489361703, |
| "grad_norm": 1.0772818326950073, |
| "loss": 5.06, |
| "lr": 0.0006286713286713287, |
| "step": 3156, |
| "tokens_trained": 0.299931536 |
| }, |
| { |
| "epoch": 0.895886524822695, |
| "grad_norm": 1.1049630641937256, |
| "loss": 5.0823, |
| "lr": 0.0006283916083916083, |
| "step": 3158, |
| "tokens_trained": 0.300121944 |
| }, |
| { |
| "epoch": 0.8964539007092198, |
| "grad_norm": 1.0998307466506958, |
| "loss": 5.0334, |
| "lr": 0.0006281118881118882, |
| "step": 3160, |
| "tokens_trained": 0.300311336 |
| }, |
| { |
| "epoch": 0.8970212765957447, |
| "grad_norm": 1.0796667337417603, |
| "loss": 5.1029, |
| "lr": 0.0006278321678321679, |
| "step": 3162, |
| "tokens_trained": 0.300499712 |
| }, |
| { |
| "epoch": 0.8975886524822695, |
| "grad_norm": 1.054004192352295, |
| "loss": 5.0425, |
| "lr": 0.0006275524475524475, |
| "step": 3164, |
| "tokens_trained": 0.300689128 |
| }, |
| { |
| "epoch": 0.8981560283687944, |
| "grad_norm": 0.9226939082145691, |
| "loss": 5.0712, |
| "lr": 0.0006272727272727273, |
| "step": 3166, |
| "tokens_trained": 0.300878016 |
| }, |
| { |
| "epoch": 0.8987234042553192, |
| "grad_norm": 0.8905312418937683, |
| "loss": 5.0948, |
| "lr": 0.000626993006993007, |
| "step": 3168, |
| "tokens_trained": 0.301067672 |
| }, |
| { |
| "epoch": 0.8992907801418439, |
| "grad_norm": 0.92462557554245, |
| "loss": 5.0488, |
| "lr": 0.0006267132867132868, |
| "step": 3170, |
| "tokens_trained": 0.30125668 |
| }, |
| { |
| "epoch": 0.8998581560283688, |
| "grad_norm": 0.911163866519928, |
| "loss": 5.0655, |
| "lr": 0.0006264335664335664, |
| "step": 3172, |
| "tokens_trained": 0.301447736 |
| }, |
| { |
| "epoch": 0.9004255319148936, |
| "grad_norm": 1.0041508674621582, |
| "loss": 5.1074, |
| "lr": 0.0006261538461538462, |
| "step": 3174, |
| "tokens_trained": 0.301636976 |
| }, |
| { |
| "epoch": 0.9009929078014184, |
| "grad_norm": 1.1221826076507568, |
| "loss": 5.1076, |
| "lr": 0.0006258741258741258, |
| "step": 3176, |
| "tokens_trained": 0.301831152 |
| }, |
| { |
| "epoch": 0.9015602836879433, |
| "grad_norm": 1.0674721002578735, |
| "loss": 5.1029, |
| "lr": 0.0006255944055944057, |
| "step": 3178, |
| "tokens_trained": 0.302021192 |
| }, |
| { |
| "epoch": 0.902127659574468, |
| "grad_norm": 0.9207814335823059, |
| "loss": 5.1237, |
| "lr": 0.0006253146853146854, |
| "step": 3180, |
| "tokens_trained": 0.302214408 |
| }, |
| { |
| "epoch": 0.902695035460993, |
| "grad_norm": 0.9445079565048218, |
| "loss": 5.0714, |
| "lr": 0.000625034965034965, |
| "step": 3182, |
| "tokens_trained": 0.302406056 |
| }, |
| { |
| "epoch": 0.9032624113475177, |
| "grad_norm": 0.930630624294281, |
| "loss": 5.0326, |
| "lr": 0.0006247552447552448, |
| "step": 3184, |
| "tokens_trained": 0.302596376 |
| }, |
| { |
| "epoch": 0.9038297872340425, |
| "grad_norm": 0.9014614224433899, |
| "loss": 5.0768, |
| "lr": 0.0006244755244755245, |
| "step": 3186, |
| "tokens_trained": 0.302787288 |
| }, |
| { |
| "epoch": 0.9043971631205674, |
| "grad_norm": 0.9306453466415405, |
| "loss": 5.139, |
| "lr": 0.0006241958041958043, |
| "step": 3188, |
| "tokens_trained": 0.302976344 |
| }, |
| { |
| "epoch": 0.9049645390070922, |
| "grad_norm": 0.9506230354309082, |
| "loss": 5.0932, |
| "lr": 0.0006239160839160839, |
| "step": 3190, |
| "tokens_trained": 0.303166928 |
| }, |
| { |
| "epoch": 0.9055319148936171, |
| "grad_norm": 0.8852939605712891, |
| "loss": 5.0761, |
| "lr": 0.0006236363636363636, |
| "step": 3192, |
| "tokens_trained": 0.303357176 |
| }, |
| { |
| "epoch": 0.9060992907801418, |
| "grad_norm": 0.9017012119293213, |
| "loss": 4.9965, |
| "lr": 0.0006233566433566433, |
| "step": 3194, |
| "tokens_trained": 0.303547344 |
| }, |
| { |
| "epoch": 0.9066666666666666, |
| "grad_norm": 0.8619939684867859, |
| "loss": 5.0892, |
| "lr": 0.0006230769230769231, |
| "step": 3196, |
| "tokens_trained": 0.303737392 |
| }, |
| { |
| "epoch": 0.9072340425531915, |
| "grad_norm": 0.8667910695075989, |
| "loss": 5.1222, |
| "lr": 0.0006227972027972028, |
| "step": 3198, |
| "tokens_trained": 0.303926592 |
| }, |
| { |
| "epoch": 0.9078014184397163, |
| "grad_norm": 0.9172303676605225, |
| "loss": 5.0891, |
| "lr": 0.0006225174825174825, |
| "step": 3200, |
| "tokens_trained": 0.304117744 |
| }, |
| { |
| "epoch": 0.9083687943262412, |
| "grad_norm": 0.9247593879699707, |
| "loss": 5.0528, |
| "lr": 0.0006222377622377623, |
| "step": 3202, |
| "tokens_trained": 0.304304792 |
| }, |
| { |
| "epoch": 0.908936170212766, |
| "grad_norm": 0.9245242476463318, |
| "loss": 5.027, |
| "lr": 0.000621958041958042, |
| "step": 3204, |
| "tokens_trained": 0.304496016 |
| }, |
| { |
| "epoch": 0.9095035460992907, |
| "grad_norm": 0.8890556693077087, |
| "loss": 5.0348, |
| "lr": 0.0006216783216783217, |
| "step": 3206, |
| "tokens_trained": 0.304685896 |
| }, |
| { |
| "epoch": 0.9100709219858156, |
| "grad_norm": 0.9343590140342712, |
| "loss": 5.103, |
| "lr": 0.0006213986013986014, |
| "step": 3208, |
| "tokens_trained": 0.304876864 |
| }, |
| { |
| "epoch": 0.9106382978723404, |
| "grad_norm": 0.9546332955360413, |
| "loss": 5.0456, |
| "lr": 0.0006211188811188811, |
| "step": 3210, |
| "tokens_trained": 0.305067744 |
| }, |
| { |
| "epoch": 0.9112056737588653, |
| "grad_norm": 0.9404990077018738, |
| "loss": 5.0357, |
| "lr": 0.0006208391608391608, |
| "step": 3212, |
| "tokens_trained": 0.305256552 |
| }, |
| { |
| "epoch": 0.9117730496453901, |
| "grad_norm": 0.8743602633476257, |
| "loss": 5.0564, |
| "lr": 0.0006205594405594406, |
| "step": 3214, |
| "tokens_trained": 0.305446568 |
| }, |
| { |
| "epoch": 0.9123404255319149, |
| "grad_norm": 0.9437069892883301, |
| "loss": 5.0703, |
| "lr": 0.0006202797202797203, |
| "step": 3216, |
| "tokens_trained": 0.305636344 |
| }, |
| { |
| "epoch": 0.9129078014184397, |
| "grad_norm": 0.970951497554779, |
| "loss": 5.0722, |
| "lr": 0.00062, |
| "step": 3218, |
| "tokens_trained": 0.305825936 |
| }, |
| { |
| "epoch": 0.9134751773049645, |
| "grad_norm": 0.9047942757606506, |
| "loss": 5.113, |
| "lr": 0.0006197202797202797, |
| "step": 3220, |
| "tokens_trained": 0.306016936 |
| }, |
| { |
| "epoch": 0.9140425531914894, |
| "grad_norm": 0.9751421213150024, |
| "loss": 5.0465, |
| "lr": 0.0006194405594405595, |
| "step": 3222, |
| "tokens_trained": 0.306207216 |
| }, |
| { |
| "epoch": 0.9146099290780142, |
| "grad_norm": 0.9317526817321777, |
| "loss": 5.0601, |
| "lr": 0.0006191608391608392, |
| "step": 3224, |
| "tokens_trained": 0.306396832 |
| }, |
| { |
| "epoch": 0.915177304964539, |
| "grad_norm": 0.9828630685806274, |
| "loss": 5.0857, |
| "lr": 0.0006188811188811189, |
| "step": 3226, |
| "tokens_trained": 0.30658724 |
| }, |
| { |
| "epoch": 0.9157446808510639, |
| "grad_norm": 0.9108901619911194, |
| "loss": 5.0525, |
| "lr": 0.0006186013986013986, |
| "step": 3228, |
| "tokens_trained": 0.30677856 |
| }, |
| { |
| "epoch": 0.9163120567375886, |
| "grad_norm": 0.8517162203788757, |
| "loss": 5.1157, |
| "lr": 0.0006183216783216783, |
| "step": 3230, |
| "tokens_trained": 0.3069698 |
| }, |
| { |
| "epoch": 0.9168794326241134, |
| "grad_norm": 0.9589570760726929, |
| "loss": 5.0823, |
| "lr": 0.0006180419580419581, |
| "step": 3232, |
| "tokens_trained": 0.307160952 |
| }, |
| { |
| "epoch": 0.9174468085106383, |
| "grad_norm": 1.0031661987304688, |
| "loss": 5.0808, |
| "lr": 0.0006177622377622377, |
| "step": 3234, |
| "tokens_trained": 0.307352776 |
| }, |
| { |
| "epoch": 0.9180141843971631, |
| "grad_norm": 0.9295787215232849, |
| "loss": 5.0699, |
| "lr": 0.0006174825174825175, |
| "step": 3236, |
| "tokens_trained": 0.3075432 |
| }, |
| { |
| "epoch": 0.918581560283688, |
| "grad_norm": 0.9967226982116699, |
| "loss": 5.0036, |
| "lr": 0.0006172027972027972, |
| "step": 3238, |
| "tokens_trained": 0.307735016 |
| }, |
| { |
| "epoch": 0.9191489361702128, |
| "grad_norm": 1.0219292640686035, |
| "loss": 5.1142, |
| "lr": 0.000616923076923077, |
| "step": 3240, |
| "tokens_trained": 0.307926624 |
| }, |
| { |
| "epoch": 0.9197163120567375, |
| "grad_norm": 1.0547230243682861, |
| "loss": 5.0429, |
| "lr": 0.0006166433566433567, |
| "step": 3242, |
| "tokens_trained": 0.30811696 |
| }, |
| { |
| "epoch": 0.9202836879432624, |
| "grad_norm": 1.0130624771118164, |
| "loss": 5.1345, |
| "lr": 0.0006163636363636364, |
| "step": 3244, |
| "tokens_trained": 0.30830848 |
| }, |
| { |
| "epoch": 0.9208510638297872, |
| "grad_norm": 0.8802092671394348, |
| "loss": 5.0404, |
| "lr": 0.0006160839160839161, |
| "step": 3246, |
| "tokens_trained": 0.308497688 |
| }, |
| { |
| "epoch": 0.9214184397163121, |
| "grad_norm": 0.970391571521759, |
| "loss": 5.0875, |
| "lr": 0.0006158041958041957, |
| "step": 3248, |
| "tokens_trained": 0.308686352 |
| }, |
| { |
| "epoch": 0.9219858156028369, |
| "grad_norm": 0.9314327239990234, |
| "loss": 5.0519, |
| "lr": 0.0006155244755244756, |
| "step": 3250, |
| "tokens_trained": 0.308875888 |
| }, |
| { |
| "epoch": 0.9219858156028369, |
| "eval_loss": 5.063432216644287, |
| "eval_runtime": 20.6963, |
| "step": 3250, |
| "tokens_trained": 0.308875888 |
| }, |
| { |
| "epoch": 0.9225531914893617, |
| "grad_norm": 0.875278890132904, |
| "loss": 4.9958, |
| "lr": 0.0006152447552447552, |
| "step": 3252, |
| "tokens_trained": 0.309068888 |
| }, |
| { |
| "epoch": 0.9231205673758865, |
| "grad_norm": 0.9115424156188965, |
| "loss": 4.9971, |
| "lr": 0.000614965034965035, |
| "step": 3254, |
| "tokens_trained": 0.309260656 |
| }, |
| { |
| "epoch": 0.9236879432624113, |
| "grad_norm": 0.9202569723129272, |
| "loss": 5.0103, |
| "lr": 0.0006146853146853147, |
| "step": 3256, |
| "tokens_trained": 0.309452672 |
| }, |
| { |
| "epoch": 0.9242553191489362, |
| "grad_norm": 0.9471083879470825, |
| "loss": 5.0429, |
| "lr": 0.0006144055944055945, |
| "step": 3258, |
| "tokens_trained": 0.30964252 |
| }, |
| { |
| "epoch": 0.924822695035461, |
| "grad_norm": 0.9518803954124451, |
| "loss": 5.0143, |
| "lr": 0.0006141258741258742, |
| "step": 3260, |
| "tokens_trained": 0.309831288 |
| }, |
| { |
| "epoch": 0.9253900709219858, |
| "grad_norm": 0.9274792671203613, |
| "loss": 5.0121, |
| "lr": 0.0006138461538461538, |
| "step": 3262, |
| "tokens_trained": 0.310021056 |
| }, |
| { |
| "epoch": 0.9259574468085107, |
| "grad_norm": 0.9414265751838684, |
| "loss": 5.1362, |
| "lr": 0.0006135664335664336, |
| "step": 3264, |
| "tokens_trained": 0.310210328 |
| }, |
| { |
| "epoch": 0.9265248226950354, |
| "grad_norm": 0.968233048915863, |
| "loss": 4.9792, |
| "lr": 0.0006132867132867132, |
| "step": 3266, |
| "tokens_trained": 0.310399616 |
| }, |
| { |
| "epoch": 0.9270921985815603, |
| "grad_norm": 0.9223787784576416, |
| "loss": 5.0543, |
| "lr": 0.0006130069930069931, |
| "step": 3268, |
| "tokens_trained": 0.310588952 |
| }, |
| { |
| "epoch": 0.9276595744680851, |
| "grad_norm": 0.9317581653594971, |
| "loss": 5.0053, |
| "lr": 0.0006127272727272727, |
| "step": 3270, |
| "tokens_trained": 0.310779576 |
| }, |
| { |
| "epoch": 0.9282269503546099, |
| "grad_norm": 0.8910759687423706, |
| "loss": 5.1044, |
| "lr": 0.0006124475524475525, |
| "step": 3272, |
| "tokens_trained": 0.310970096 |
| }, |
| { |
| "epoch": 0.9287943262411348, |
| "grad_norm": 0.8903452157974243, |
| "loss": 5.093, |
| "lr": 0.0006121678321678322, |
| "step": 3274, |
| "tokens_trained": 0.311158808 |
| }, |
| { |
| "epoch": 0.9293617021276596, |
| "grad_norm": 0.9635697603225708, |
| "loss": 5.0149, |
| "lr": 0.0006118881118881118, |
| "step": 3276, |
| "tokens_trained": 0.311348672 |
| }, |
| { |
| "epoch": 0.9299290780141845, |
| "grad_norm": 1.0122349262237549, |
| "loss": 5.0353, |
| "lr": 0.0006116083916083917, |
| "step": 3278, |
| "tokens_trained": 0.31153696 |
| }, |
| { |
| "epoch": 0.9304964539007092, |
| "grad_norm": 0.9734505414962769, |
| "loss": 5.0531, |
| "lr": 0.0006113286713286713, |
| "step": 3280, |
| "tokens_trained": 0.311728288 |
| }, |
| { |
| "epoch": 0.931063829787234, |
| "grad_norm": 0.9433160424232483, |
| "loss": 5.0234, |
| "lr": 0.0006110489510489511, |
| "step": 3282, |
| "tokens_trained": 0.311917352 |
| }, |
| { |
| "epoch": 0.9316312056737589, |
| "grad_norm": 0.9984011054039001, |
| "loss": 5.0355, |
| "lr": 0.0006107692307692307, |
| "step": 3284, |
| "tokens_trained": 0.312108504 |
| }, |
| { |
| "epoch": 0.9321985815602837, |
| "grad_norm": 1.0186588764190674, |
| "loss": 4.9903, |
| "lr": 0.0006104895104895106, |
| "step": 3286, |
| "tokens_trained": 0.312300216 |
| }, |
| { |
| "epoch": 0.9327659574468085, |
| "grad_norm": 0.984987735748291, |
| "loss": 5.0188, |
| "lr": 0.0006102097902097902, |
| "step": 3288, |
| "tokens_trained": 0.312490928 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 0.9382873773574829, |
| "loss": 5.0223, |
| "lr": 0.0006099300699300699, |
| "step": 3290, |
| "tokens_trained": 0.312681928 |
| }, |
| { |
| "epoch": 0.9339007092198581, |
| "grad_norm": 0.8770633339881897, |
| "loss": 5.05, |
| "lr": 0.0006096503496503497, |
| "step": 3292, |
| "tokens_trained": 0.312870072 |
| }, |
| { |
| "epoch": 0.934468085106383, |
| "grad_norm": 0.9703201055526733, |
| "loss": 5.0905, |
| "lr": 0.0006093706293706293, |
| "step": 3294, |
| "tokens_trained": 0.313060608 |
| }, |
| { |
| "epoch": 0.9350354609929078, |
| "grad_norm": 0.9052334427833557, |
| "loss": 5.0416, |
| "lr": 0.0006090909090909092, |
| "step": 3296, |
| "tokens_trained": 0.313251584 |
| }, |
| { |
| "epoch": 0.9356028368794326, |
| "grad_norm": 0.949390709400177, |
| "loss": 4.9757, |
| "lr": 0.0006088111888111888, |
| "step": 3298, |
| "tokens_trained": 0.313440784 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 0.9845399260520935, |
| "loss": 5.0403, |
| "lr": 0.0006085314685314686, |
| "step": 3300, |
| "tokens_trained": 0.313631088 |
| }, |
| { |
| "epoch": 0.9367375886524822, |
| "grad_norm": 0.921394407749176, |
| "loss": 5.0464, |
| "lr": 0.0006082517482517482, |
| "step": 3302, |
| "tokens_trained": 0.313821704 |
| }, |
| { |
| "epoch": 0.9373049645390071, |
| "grad_norm": 0.9639559984207153, |
| "loss": 5.0658, |
| "lr": 0.000607972027972028, |
| "step": 3304, |
| "tokens_trained": 0.314011048 |
| }, |
| { |
| "epoch": 0.9378723404255319, |
| "grad_norm": 0.9921663403511047, |
| "loss": 5.0469, |
| "lr": 0.0006076923076923077, |
| "step": 3306, |
| "tokens_trained": 0.314199264 |
| }, |
| { |
| "epoch": 0.9384397163120567, |
| "grad_norm": 0.9891427159309387, |
| "loss": 5.0784, |
| "lr": 0.0006074125874125874, |
| "step": 3308, |
| "tokens_trained": 0.314388688 |
| }, |
| { |
| "epoch": 0.9390070921985816, |
| "grad_norm": 0.966525673866272, |
| "loss": 5.0759, |
| "lr": 0.0006071328671328672, |
| "step": 3310, |
| "tokens_trained": 0.31457712 |
| }, |
| { |
| "epoch": 0.9395744680851064, |
| "grad_norm": 0.9262145757675171, |
| "loss": 4.9822, |
| "lr": 0.0006068531468531468, |
| "step": 3312, |
| "tokens_trained": 0.314768096 |
| }, |
| { |
| "epoch": 0.9401418439716313, |
| "grad_norm": 0.9138565063476562, |
| "loss": 5.059, |
| "lr": 0.0006065734265734267, |
| "step": 3314, |
| "tokens_trained": 0.314959 |
| }, |
| { |
| "epoch": 0.940709219858156, |
| "grad_norm": 0.9083120226860046, |
| "loss": 5.0523, |
| "lr": 0.0006062937062937063, |
| "step": 3316, |
| "tokens_trained": 0.315148288 |
| }, |
| { |
| "epoch": 0.9412765957446808, |
| "grad_norm": 0.9483383893966675, |
| "loss": 5.0821, |
| "lr": 0.000606013986013986, |
| "step": 3318, |
| "tokens_trained": 0.31533864 |
| }, |
| { |
| "epoch": 0.9418439716312057, |
| "grad_norm": 0.8716344833374023, |
| "loss": 5.1046, |
| "lr": 0.0006057342657342657, |
| "step": 3320, |
| "tokens_trained": 0.31552972 |
| }, |
| { |
| "epoch": 0.9424113475177305, |
| "grad_norm": 0.9287091493606567, |
| "loss": 5.0531, |
| "lr": 0.0006054545454545455, |
| "step": 3322, |
| "tokens_trained": 0.315720136 |
| }, |
| { |
| "epoch": 0.9429787234042554, |
| "grad_norm": 0.9560433030128479, |
| "loss": 5.087, |
| "lr": 0.0006051748251748252, |
| "step": 3324, |
| "tokens_trained": 0.315911024 |
| }, |
| { |
| "epoch": 0.9435460992907801, |
| "grad_norm": 0.8612940311431885, |
| "loss": 5.1338, |
| "lr": 0.0006048951048951049, |
| "step": 3326, |
| "tokens_trained": 0.316102368 |
| }, |
| { |
| "epoch": 0.9441134751773049, |
| "grad_norm": 1.0215116739273071, |
| "loss": 5.034, |
| "lr": 0.0006046153846153846, |
| "step": 3328, |
| "tokens_trained": 0.316292296 |
| }, |
| { |
| "epoch": 0.9446808510638298, |
| "grad_norm": 1.0019500255584717, |
| "loss": 5.101, |
| "lr": 0.0006043356643356643, |
| "step": 3330, |
| "tokens_trained": 0.31648088 |
| }, |
| { |
| "epoch": 0.9452482269503546, |
| "grad_norm": 0.9435174465179443, |
| "loss": 5.0476, |
| "lr": 0.0006040559440559441, |
| "step": 3332, |
| "tokens_trained": 0.316672936 |
| }, |
| { |
| "epoch": 0.9458156028368795, |
| "grad_norm": 0.9211596846580505, |
| "loss": 5.039, |
| "lr": 0.0006037762237762238, |
| "step": 3334, |
| "tokens_trained": 0.31686408 |
| }, |
| { |
| "epoch": 0.9463829787234043, |
| "grad_norm": 0.9332453608512878, |
| "loss": 5.0857, |
| "lr": 0.0006034965034965035, |
| "step": 3336, |
| "tokens_trained": 0.317053896 |
| }, |
| { |
| "epoch": 0.946950354609929, |
| "grad_norm": 0.8761624097824097, |
| "loss": 5.0614, |
| "lr": 0.0006032167832167832, |
| "step": 3338, |
| "tokens_trained": 0.317245016 |
| }, |
| { |
| "epoch": 0.9475177304964539, |
| "grad_norm": 0.9113523364067078, |
| "loss": 5.0609, |
| "lr": 0.000602937062937063, |
| "step": 3340, |
| "tokens_trained": 0.317433592 |
| }, |
| { |
| "epoch": 0.9480851063829787, |
| "grad_norm": 1.0509337186813354, |
| "loss": 4.9984, |
| "lr": 0.0006026573426573426, |
| "step": 3342, |
| "tokens_trained": 0.317623392 |
| }, |
| { |
| "epoch": 0.9486524822695035, |
| "grad_norm": 0.9496453404426575, |
| "loss": 5.0632, |
| "lr": 0.0006023776223776224, |
| "step": 3344, |
| "tokens_trained": 0.317814848 |
| }, |
| { |
| "epoch": 0.9492198581560284, |
| "grad_norm": 0.913977861404419, |
| "loss": 5.0816, |
| "lr": 0.0006020979020979021, |
| "step": 3346, |
| "tokens_trained": 0.318003232 |
| }, |
| { |
| "epoch": 0.9497872340425532, |
| "grad_norm": 0.9476690292358398, |
| "loss": 5.1321, |
| "lr": 0.0006018181818181818, |
| "step": 3348, |
| "tokens_trained": 0.31819216 |
| }, |
| { |
| "epoch": 0.950354609929078, |
| "grad_norm": 1.0221197605133057, |
| "loss": 5.0602, |
| "lr": 0.0006015384615384616, |
| "step": 3350, |
| "tokens_trained": 0.318379648 |
| }, |
| { |
| "epoch": 0.9509219858156028, |
| "grad_norm": 0.9944773316383362, |
| "loss": 5.0595, |
| "lr": 0.0006012587412587413, |
| "step": 3352, |
| "tokens_trained": 0.3185692 |
| }, |
| { |
| "epoch": 0.9514893617021276, |
| "grad_norm": 0.9641481041908264, |
| "loss": 5.0842, |
| "lr": 0.000600979020979021, |
| "step": 3354, |
| "tokens_trained": 0.318758464 |
| }, |
| { |
| "epoch": 0.9520567375886525, |
| "grad_norm": 0.8794710636138916, |
| "loss": 5.0385, |
| "lr": 0.0006006993006993006, |
| "step": 3356, |
| "tokens_trained": 0.318948528 |
| }, |
| { |
| "epoch": 0.9526241134751773, |
| "grad_norm": 0.9986928701400757, |
| "loss": 5.0325, |
| "lr": 0.0006004195804195805, |
| "step": 3358, |
| "tokens_trained": 0.319137168 |
| }, |
| { |
| "epoch": 0.9531914893617022, |
| "grad_norm": 0.9385401606559753, |
| "loss": 4.9957, |
| "lr": 0.0006001398601398601, |
| "step": 3360, |
| "tokens_trained": 0.319327992 |
| }, |
| { |
| "epoch": 0.953758865248227, |
| "grad_norm": 0.9591023921966553, |
| "loss": 5.0883, |
| "lr": 0.0005998601398601399, |
| "step": 3362, |
| "tokens_trained": 0.319518928 |
| }, |
| { |
| "epoch": 0.9543262411347517, |
| "grad_norm": 0.9454349279403687, |
| "loss": 5.0639, |
| "lr": 0.0005995804195804196, |
| "step": 3364, |
| "tokens_trained": 0.319711176 |
| }, |
| { |
| "epoch": 0.9548936170212766, |
| "grad_norm": 0.9882696270942688, |
| "loss": 5.0326, |
| "lr": 0.0005993006993006993, |
| "step": 3366, |
| "tokens_trained": 0.319901272 |
| }, |
| { |
| "epoch": 0.9554609929078014, |
| "grad_norm": 0.9254516959190369, |
| "loss": 5.0454, |
| "lr": 0.0005990209790209791, |
| "step": 3368, |
| "tokens_trained": 0.320091928 |
| }, |
| { |
| "epoch": 0.9560283687943263, |
| "grad_norm": 0.9193766117095947, |
| "loss": 4.9996, |
| "lr": 0.0005987412587412587, |
| "step": 3370, |
| "tokens_trained": 0.320282712 |
| }, |
| { |
| "epoch": 0.9565957446808511, |
| "grad_norm": 0.9373677372932434, |
| "loss": 5.1228, |
| "lr": 0.0005984615384615385, |
| "step": 3372, |
| "tokens_trained": 0.320472168 |
| }, |
| { |
| "epoch": 0.9571631205673758, |
| "grad_norm": 0.9842008352279663, |
| "loss": 5.0338, |
| "lr": 0.0005981818181818181, |
| "step": 3374, |
| "tokens_trained": 0.320662592 |
| }, |
| { |
| "epoch": 0.9574468085106383, |
| "eval_loss": 5.064303398132324, |
| "eval_runtime": 20.617, |
| "step": 3375, |
| "tokens_trained": 0.320758504 |
| }, |
| { |
| "epoch": 0.9577304964539007, |
| "grad_norm": 0.9934602379798889, |
| "loss": 5.0444, |
| "lr": 0.000597902097902098, |
| "step": 3376, |
| "tokens_trained": 0.320853552 |
| }, |
| { |
| "epoch": 0.9582978723404255, |
| "grad_norm": 0.9192136526107788, |
| "loss": 5.0502, |
| "lr": 0.0005976223776223776, |
| "step": 3378, |
| "tokens_trained": 0.321043072 |
| }, |
| { |
| "epoch": 0.9588652482269504, |
| "grad_norm": 0.9416385293006897, |
| "loss": 5.0676, |
| "lr": 0.0005973426573426574, |
| "step": 3380, |
| "tokens_trained": 0.321234024 |
| }, |
| { |
| "epoch": 0.9594326241134752, |
| "grad_norm": 0.87016761302948, |
| "loss": 5.0474, |
| "lr": 0.0005970629370629371, |
| "step": 3382, |
| "tokens_trained": 0.321423504 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.9421593546867371, |
| "loss": 5.0148, |
| "lr": 0.0005967832167832167, |
| "step": 3384, |
| "tokens_trained": 0.32161436 |
| }, |
| { |
| "epoch": 0.9605673758865249, |
| "grad_norm": 0.9040830135345459, |
| "loss": 5.0065, |
| "lr": 0.0005965034965034966, |
| "step": 3386, |
| "tokens_trained": 0.321804688 |
| }, |
| { |
| "epoch": 0.9611347517730496, |
| "grad_norm": 0.9497122764587402, |
| "loss": 5.0882, |
| "lr": 0.0005962237762237762, |
| "step": 3388, |
| "tokens_trained": 0.321994728 |
| }, |
| { |
| "epoch": 0.9617021276595744, |
| "grad_norm": 0.9700525999069214, |
| "loss": 5.0462, |
| "lr": 0.000595944055944056, |
| "step": 3390, |
| "tokens_trained": 0.322186 |
| }, |
| { |
| "epoch": 0.9622695035460993, |
| "grad_norm": 0.9304616451263428, |
| "loss": 5.0781, |
| "lr": 0.0005956643356643356, |
| "step": 3392, |
| "tokens_trained": 0.322376408 |
| }, |
| { |
| "epoch": 0.9628368794326241, |
| "grad_norm": 0.8804431557655334, |
| "loss": 5.1449, |
| "lr": 0.0005953846153846155, |
| "step": 3394, |
| "tokens_trained": 0.322566024 |
| }, |
| { |
| "epoch": 0.963404255319149, |
| "grad_norm": 0.8852412700653076, |
| "loss": 5.0602, |
| "lr": 0.0005951048951048951, |
| "step": 3396, |
| "tokens_trained": 0.322758272 |
| }, |
| { |
| "epoch": 0.9639716312056738, |
| "grad_norm": 1.015409231185913, |
| "loss": 5.0753, |
| "lr": 0.0005948251748251748, |
| "step": 3398, |
| "tokens_trained": 0.322948904 |
| }, |
| { |
| "epoch": 0.9645390070921985, |
| "grad_norm": 0.9504172205924988, |
| "loss": 5.1003, |
| "lr": 0.0005945454545454546, |
| "step": 3400, |
| "tokens_trained": 0.323140944 |
| }, |
| { |
| "epoch": 0.9651063829787234, |
| "grad_norm": 0.8708069920539856, |
| "loss": 5.0316, |
| "lr": 0.0005942657342657342, |
| "step": 3402, |
| "tokens_trained": 0.323331864 |
| }, |
| { |
| "epoch": 0.9656737588652482, |
| "grad_norm": 0.8804038166999817, |
| "loss": 5.038, |
| "lr": 0.0005939860139860141, |
| "step": 3404, |
| "tokens_trained": 0.323521296 |
| }, |
| { |
| "epoch": 0.9662411347517731, |
| "grad_norm": 0.901345431804657, |
| "loss": 5.1247, |
| "lr": 0.0005937062937062937, |
| "step": 3406, |
| "tokens_trained": 0.323713464 |
| }, |
| { |
| "epoch": 0.9668085106382979, |
| "grad_norm": 0.8839131593704224, |
| "loss": 5.058, |
| "lr": 0.0005934265734265735, |
| "step": 3408, |
| "tokens_trained": 0.323903208 |
| }, |
| { |
| "epoch": 0.9673758865248226, |
| "grad_norm": 0.9157027006149292, |
| "loss": 5.007, |
| "lr": 0.0005931468531468531, |
| "step": 3410, |
| "tokens_trained": 0.324091224 |
| }, |
| { |
| "epoch": 0.9679432624113475, |
| "grad_norm": 0.9776141047477722, |
| "loss": 5.0244, |
| "lr": 0.000592867132867133, |
| "step": 3412, |
| "tokens_trained": 0.324281696 |
| }, |
| { |
| "epoch": 0.9685106382978723, |
| "grad_norm": 0.8768822550773621, |
| "loss": 5.0321, |
| "lr": 0.0005925874125874126, |
| "step": 3414, |
| "tokens_trained": 0.324471136 |
| }, |
| { |
| "epoch": 0.9690780141843972, |
| "grad_norm": 0.7926638722419739, |
| "loss": 5.0433, |
| "lr": 0.0005923076923076923, |
| "step": 3416, |
| "tokens_trained": 0.324661816 |
| }, |
| { |
| "epoch": 0.969645390070922, |
| "grad_norm": 0.8630013465881348, |
| "loss": 5.0876, |
| "lr": 0.0005920279720279721, |
| "step": 3418, |
| "tokens_trained": 0.324852152 |
| }, |
| { |
| "epoch": 0.9702127659574468, |
| "grad_norm": 0.8769309520721436, |
| "loss": 5.0611, |
| "lr": 0.0005917482517482517, |
| "step": 3420, |
| "tokens_trained": 0.325042144 |
| }, |
| { |
| "epoch": 0.9707801418439717, |
| "grad_norm": 0.8933807611465454, |
| "loss": 5.0288, |
| "lr": 0.0005914685314685316, |
| "step": 3422, |
| "tokens_trained": 0.325232376 |
| }, |
| { |
| "epoch": 0.9713475177304964, |
| "grad_norm": 0.9544175267219543, |
| "loss": 5.0038, |
| "lr": 0.0005911888111888112, |
| "step": 3424, |
| "tokens_trained": 0.325423784 |
| }, |
| { |
| "epoch": 0.9719148936170213, |
| "grad_norm": 0.9057655930519104, |
| "loss": 5.0613, |
| "lr": 0.0005909090909090909, |
| "step": 3426, |
| "tokens_trained": 0.325614744 |
| }, |
| { |
| "epoch": 0.9724822695035461, |
| "grad_norm": 0.8956878781318665, |
| "loss": 5.0327, |
| "lr": 0.0005906293706293706, |
| "step": 3428, |
| "tokens_trained": 0.325803144 |
| }, |
| { |
| "epoch": 0.9730496453900709, |
| "grad_norm": 0.8879814147949219, |
| "loss": 5.0018, |
| "lr": 0.0005903496503496504, |
| "step": 3430, |
| "tokens_trained": 0.3259944 |
| }, |
| { |
| "epoch": 0.9736170212765958, |
| "grad_norm": 0.8801882863044739, |
| "loss": 5.125, |
| "lr": 0.0005900699300699301, |
| "step": 3432, |
| "tokens_trained": 0.326185928 |
| }, |
| { |
| "epoch": 0.9741843971631206, |
| "grad_norm": 0.8586528301239014, |
| "loss": 5.0299, |
| "lr": 0.0005897902097902098, |
| "step": 3434, |
| "tokens_trained": 0.326378416 |
| }, |
| { |
| "epoch": 0.9747517730496454, |
| "grad_norm": 0.8574861884117126, |
| "loss": 5.0743, |
| "lr": 0.0005895104895104896, |
| "step": 3436, |
| "tokens_trained": 0.326569616 |
| }, |
| { |
| "epoch": 0.9753191489361702, |
| "grad_norm": 0.8478572368621826, |
| "loss": 5.0547, |
| "lr": 0.0005892307692307692, |
| "step": 3438, |
| "tokens_trained": 0.326759744 |
| }, |
| { |
| "epoch": 0.975886524822695, |
| "grad_norm": 0.8645881414413452, |
| "loss": 5.0504, |
| "lr": 0.000588951048951049, |
| "step": 3440, |
| "tokens_trained": 0.3269478 |
| }, |
| { |
| "epoch": 0.9764539007092199, |
| "grad_norm": 0.8346559405326843, |
| "loss": 5.0472, |
| "lr": 0.0005886713286713287, |
| "step": 3442, |
| "tokens_trained": 0.32714012 |
| }, |
| { |
| "epoch": 0.9770212765957447, |
| "grad_norm": 0.8666026592254639, |
| "loss": 5.0557, |
| "lr": 0.0005883916083916084, |
| "step": 3444, |
| "tokens_trained": 0.327329992 |
| }, |
| { |
| "epoch": 0.9775886524822694, |
| "grad_norm": 0.9243910312652588, |
| "loss": 5.0326, |
| "lr": 0.0005881118881118881, |
| "step": 3446, |
| "tokens_trained": 0.327520664 |
| }, |
| { |
| "epoch": 0.9781560283687943, |
| "grad_norm": 0.8909792304039001, |
| "loss": 5.0948, |
| "lr": 0.0005878321678321679, |
| "step": 3448, |
| "tokens_trained": 0.327712056 |
| }, |
| { |
| "epoch": 0.9787234042553191, |
| "grad_norm": 0.8586627840995789, |
| "loss": 5.0587, |
| "lr": 0.0005875524475524476, |
| "step": 3450, |
| "tokens_trained": 0.327903456 |
| }, |
| { |
| "epoch": 0.979290780141844, |
| "grad_norm": 0.9551260471343994, |
| "loss": 5.0493, |
| "lr": 0.0005872727272727273, |
| "step": 3452, |
| "tokens_trained": 0.328093768 |
| }, |
| { |
| "epoch": 0.9798581560283688, |
| "grad_norm": 0.8501218557357788, |
| "loss": 5.0725, |
| "lr": 0.0005869930069930069, |
| "step": 3454, |
| "tokens_trained": 0.328281896 |
| }, |
| { |
| "epoch": 0.9804255319148936, |
| "grad_norm": 0.8573510646820068, |
| "loss": 5.057, |
| "lr": 0.0005867132867132867, |
| "step": 3456, |
| "tokens_trained": 0.32847448 |
| }, |
| { |
| "epoch": 0.9809929078014185, |
| "grad_norm": 0.8716034889221191, |
| "loss": 4.9833, |
| "lr": 0.0005864335664335665, |
| "step": 3458, |
| "tokens_trained": 0.328661304 |
| }, |
| { |
| "epoch": 0.9815602836879432, |
| "grad_norm": 0.8251221179962158, |
| "loss": 5.0059, |
| "lr": 0.0005861538461538462, |
| "step": 3460, |
| "tokens_trained": 0.328850496 |
| }, |
| { |
| "epoch": 0.9821276595744681, |
| "grad_norm": 0.8577293753623962, |
| "loss": 5.0385, |
| "lr": 0.0005858741258741259, |
| "step": 3462, |
| "tokens_trained": 0.329039896 |
| }, |
| { |
| "epoch": 0.9826950354609929, |
| "grad_norm": 0.9229962825775146, |
| "loss": 5.0115, |
| "lr": 0.0005855944055944055, |
| "step": 3464, |
| "tokens_trained": 0.329230472 |
| }, |
| { |
| "epoch": 0.9832624113475177, |
| "grad_norm": 0.8700546622276306, |
| "loss": 5.0319, |
| "lr": 0.0005853146853146854, |
| "step": 3466, |
| "tokens_trained": 0.32941888 |
| }, |
| { |
| "epoch": 0.9838297872340426, |
| "grad_norm": 0.8610907196998596, |
| "loss": 5.0327, |
| "lr": 0.000585034965034965, |
| "step": 3468, |
| "tokens_trained": 0.329611152 |
| }, |
| { |
| "epoch": 0.9843971631205674, |
| "grad_norm": 0.7971277236938477, |
| "loss": 5.0813, |
| "lr": 0.0005847552447552448, |
| "step": 3470, |
| "tokens_trained": 0.329800024 |
| }, |
| { |
| "epoch": 0.9849645390070922, |
| "grad_norm": 0.9169178009033203, |
| "loss": 4.9764, |
| "lr": 0.0005844755244755244, |
| "step": 3472, |
| "tokens_trained": 0.329991688 |
| }, |
| { |
| "epoch": 0.985531914893617, |
| "grad_norm": 0.9630699157714844, |
| "loss": 5.0263, |
| "lr": 0.0005841958041958042, |
| "step": 3474, |
| "tokens_trained": 0.33018312 |
| }, |
| { |
| "epoch": 0.9860992907801418, |
| "grad_norm": 0.9706154465675354, |
| "loss": 4.9928, |
| "lr": 0.000583916083916084, |
| "step": 3476, |
| "tokens_trained": 0.330372336 |
| }, |
| { |
| "epoch": 0.9866666666666667, |
| "grad_norm": 0.9754578471183777, |
| "loss": 5.0122, |
| "lr": 0.0005836363636363636, |
| "step": 3478, |
| "tokens_trained": 0.330564472 |
| }, |
| { |
| "epoch": 0.9872340425531915, |
| "grad_norm": 0.9906936287879944, |
| "loss": 5.0495, |
| "lr": 0.0005833566433566434, |
| "step": 3480, |
| "tokens_trained": 0.3307554 |
| }, |
| { |
| "epoch": 0.9878014184397164, |
| "grad_norm": 0.9739910960197449, |
| "loss": 4.9801, |
| "lr": 0.000583076923076923, |
| "step": 3482, |
| "tokens_trained": 0.330944608 |
| }, |
| { |
| "epoch": 0.9883687943262411, |
| "grad_norm": 1.0058059692382812, |
| "loss": 5.0974, |
| "lr": 0.0005827972027972029, |
| "step": 3484, |
| "tokens_trained": 0.331134752 |
| }, |
| { |
| "epoch": 0.9889361702127659, |
| "grad_norm": 1.0330032110214233, |
| "loss": 5.1054, |
| "lr": 0.0005825174825174825, |
| "step": 3486, |
| "tokens_trained": 0.331323744 |
| }, |
| { |
| "epoch": 0.9895035460992908, |
| "grad_norm": 0.9857019186019897, |
| "loss": 5.0417, |
| "lr": 0.0005822377622377623, |
| "step": 3488, |
| "tokens_trained": 0.33151316 |
| }, |
| { |
| "epoch": 0.9900709219858156, |
| "grad_norm": 0.8929789066314697, |
| "loss": 5.0753, |
| "lr": 0.0005819580419580419, |
| "step": 3490, |
| "tokens_trained": 0.331703136 |
| }, |
| { |
| "epoch": 0.9906382978723405, |
| "grad_norm": 0.9803673624992371, |
| "loss": 5.0748, |
| "lr": 0.0005816783216783216, |
| "step": 3492, |
| "tokens_trained": 0.331894376 |
| }, |
| { |
| "epoch": 0.9912056737588653, |
| "grad_norm": 1.0658507347106934, |
| "loss": 4.952, |
| "lr": 0.0005813986013986015, |
| "step": 3494, |
| "tokens_trained": 0.33208472 |
| }, |
| { |
| "epoch": 0.99177304964539, |
| "grad_norm": 0.9646208882331848, |
| "loss": 5.0638, |
| "lr": 0.0005811188811188811, |
| "step": 3496, |
| "tokens_trained": 0.332274704 |
| }, |
| { |
| "epoch": 0.9923404255319149, |
| "grad_norm": 0.9479737281799316, |
| "loss": 4.9608, |
| "lr": 0.0005808391608391609, |
| "step": 3498, |
| "tokens_trained": 0.332464656 |
| }, |
| { |
| "epoch": 0.9929078014184397, |
| "grad_norm": 0.9422057867050171, |
| "loss": 4.9805, |
| "lr": 0.0005805594405594405, |
| "step": 3500, |
| "tokens_trained": 0.332653056 |
| }, |
| { |
| "epoch": 0.9929078014184397, |
| "eval_loss": 5.051141738891602, |
| "eval_runtime": 20.5005, |
| "step": 3500, |
| "tokens_trained": 0.332653056 |
| }, |
| { |
| "epoch": 0.9934751773049645, |
| "grad_norm": 0.8606115579605103, |
| "loss": 5.014, |
| "lr": 0.0005802797202797204, |
| "step": 3502, |
| "tokens_trained": 0.33284184 |
| }, |
| { |
| "epoch": 0.9940425531914894, |
| "grad_norm": 0.9218055009841919, |
| "loss": 5.015, |
| "lr": 0.00058, |
| "step": 3504, |
| "tokens_trained": 0.333031504 |
| }, |
| { |
| "epoch": 0.9946099290780142, |
| "grad_norm": 0.8346299529075623, |
| "loss": 5.0793, |
| "lr": 0.0005797202797202797, |
| "step": 3506, |
| "tokens_trained": 0.333222184 |
| }, |
| { |
| "epoch": 0.995177304964539, |
| "grad_norm": 0.9426013231277466, |
| "loss": 5.0416, |
| "lr": 0.0005794405594405594, |
| "step": 3508, |
| "tokens_trained": 0.333413 |
| }, |
| { |
| "epoch": 0.9957446808510638, |
| "grad_norm": 0.973893940448761, |
| "loss": 5.0579, |
| "lr": 0.0005791608391608391, |
| "step": 3510, |
| "tokens_trained": 0.333602176 |
| }, |
| { |
| "epoch": 0.9963120567375886, |
| "grad_norm": 0.9642478823661804, |
| "loss": 5.1078, |
| "lr": 0.000578881118881119, |
| "step": 3512, |
| "tokens_trained": 0.333792992 |
| }, |
| { |
| "epoch": 0.9968794326241135, |
| "grad_norm": 0.9709126949310303, |
| "loss": 5.0379, |
| "lr": 0.0005786013986013986, |
| "step": 3514, |
| "tokens_trained": 0.333982568 |
| }, |
| { |
| "epoch": 0.9974468085106383, |
| "grad_norm": 0.9238979816436768, |
| "loss": 5.0391, |
| "lr": 0.0005783216783216784, |
| "step": 3516, |
| "tokens_trained": 0.334171688 |
| }, |
| { |
| "epoch": 0.9980141843971632, |
| "grad_norm": 0.884200930595398, |
| "loss": 5.0402, |
| "lr": 0.000578041958041958, |
| "step": 3518, |
| "tokens_trained": 0.334361968 |
| }, |
| { |
| "epoch": 0.9985815602836879, |
| "grad_norm": 0.9557647705078125, |
| "loss": 5.0816, |
| "lr": 0.0005777622377622377, |
| "step": 3520, |
| "tokens_trained": 0.3345518 |
| }, |
| { |
| "epoch": 0.9991489361702127, |
| "grad_norm": 0.963729202747345, |
| "loss": 5.0047, |
| "lr": 0.0005774825174825175, |
| "step": 3522, |
| "tokens_trained": 0.334743104 |
| }, |
| { |
| "epoch": 0.9997163120567376, |
| "grad_norm": 0.8432028889656067, |
| "loss": 5.0323, |
| "lr": 0.0005772027972027972, |
| "step": 3524, |
| "tokens_trained": 0.334932336 |
| }, |
| { |
| "epoch": 1.0002836879432624, |
| "grad_norm": 0.9493524432182312, |
| "loss": 5.0686, |
| "lr": 0.0005769230769230769, |
| "step": 3526, |
| "tokens_trained": 0.335119632 |
| }, |
| { |
| "epoch": 1.0008510638297872, |
| "grad_norm": 0.8715479969978333, |
| "loss": 4.9798, |
| "lr": 0.0005766433566433566, |
| "step": 3528, |
| "tokens_trained": 0.335308752 |
| }, |
| { |
| "epoch": 1.001418439716312, |
| "grad_norm": 0.9414225816726685, |
| "loss": 5.0294, |
| "lr": 0.0005763636363636365, |
| "step": 3530, |
| "tokens_trained": 0.335499976 |
| }, |
| { |
| "epoch": 1.001985815602837, |
| "grad_norm": 0.9580470323562622, |
| "loss": 5.0097, |
| "lr": 0.0005760839160839161, |
| "step": 3532, |
| "tokens_trained": 0.335687704 |
| }, |
| { |
| "epoch": 1.0025531914893617, |
| "grad_norm": 0.8775055408477783, |
| "loss": 5.047, |
| "lr": 0.0005758041958041958, |
| "step": 3534, |
| "tokens_trained": 0.335877328 |
| }, |
| { |
| "epoch": 1.0031205673758865, |
| "grad_norm": 0.8149566054344177, |
| "loss": 5.0598, |
| "lr": 0.0005755244755244755, |
| "step": 3536, |
| "tokens_trained": 0.33606848 |
| }, |
| { |
| "epoch": 1.0036879432624113, |
| "grad_norm": 0.8992729783058167, |
| "loss": 4.9875, |
| "lr": 0.0005752447552447552, |
| "step": 3538, |
| "tokens_trained": 0.336259808 |
| }, |
| { |
| "epoch": 1.004255319148936, |
| "grad_norm": 0.913520872592926, |
| "loss": 5.0254, |
| "lr": 0.000574965034965035, |
| "step": 3540, |
| "tokens_trained": 0.336449872 |
| }, |
| { |
| "epoch": 1.004822695035461, |
| "grad_norm": 0.9528400301933289, |
| "loss": 4.9949, |
| "lr": 0.0005746853146853147, |
| "step": 3542, |
| "tokens_trained": 0.336640192 |
| }, |
| { |
| "epoch": 1.0053900709219858, |
| "grad_norm": 0.933910071849823, |
| "loss": 5.0776, |
| "lr": 0.0005744055944055944, |
| "step": 3544, |
| "tokens_trained": 0.336829088 |
| }, |
| { |
| "epoch": 1.0059574468085106, |
| "grad_norm": 0.9097418785095215, |
| "loss": 5.0021, |
| "lr": 0.0005741258741258741, |
| "step": 3546, |
| "tokens_trained": 0.337021576 |
| }, |
| { |
| "epoch": 1.0065248226950354, |
| "grad_norm": 0.8718441724777222, |
| "loss": 5.0946, |
| "lr": 0.000573846153846154, |
| "step": 3548, |
| "tokens_trained": 0.337210208 |
| }, |
| { |
| "epoch": 1.0070921985815602, |
| "grad_norm": 0.887489378452301, |
| "loss": 4.9686, |
| "lr": 0.0005735664335664336, |
| "step": 3550, |
| "tokens_trained": 0.337401176 |
| }, |
| { |
| "epoch": 1.0076595744680852, |
| "grad_norm": 0.8851242065429688, |
| "loss": 5.0423, |
| "lr": 0.0005732867132867133, |
| "step": 3552, |
| "tokens_trained": 0.337589216 |
| }, |
| { |
| "epoch": 1.00822695035461, |
| "grad_norm": 0.8381972312927246, |
| "loss": 5.0645, |
| "lr": 0.000573006993006993, |
| "step": 3554, |
| "tokens_trained": 0.337777424 |
| }, |
| { |
| "epoch": 1.0087943262411347, |
| "grad_norm": 0.8307796716690063, |
| "loss": 5.036, |
| "lr": 0.0005727272727272727, |
| "step": 3556, |
| "tokens_trained": 0.337967088 |
| }, |
| { |
| "epoch": 1.0093617021276595, |
| "grad_norm": 0.9271431565284729, |
| "loss": 5.0384, |
| "lr": 0.0005724475524475525, |
| "step": 3558, |
| "tokens_trained": 0.33815904 |
| }, |
| { |
| "epoch": 1.0099290780141843, |
| "grad_norm": 0.9501886367797852, |
| "loss": 5.0929, |
| "lr": 0.0005721678321678322, |
| "step": 3560, |
| "tokens_trained": 0.338349184 |
| }, |
| { |
| "epoch": 1.0104964539007093, |
| "grad_norm": 0.9176658987998962, |
| "loss": 5.0721, |
| "lr": 0.0005718881118881118, |
| "step": 3562, |
| "tokens_trained": 0.338539664 |
| }, |
| { |
| "epoch": 1.011063829787234, |
| "grad_norm": 0.8755439519882202, |
| "loss": 5.0864, |
| "lr": 0.0005716083916083916, |
| "step": 3564, |
| "tokens_trained": 0.33872792 |
| }, |
| { |
| "epoch": 1.0116312056737589, |
| "grad_norm": 0.9178908467292786, |
| "loss": 5.035, |
| "lr": 0.0005713286713286714, |
| "step": 3566, |
| "tokens_trained": 0.33891592 |
| }, |
| { |
| "epoch": 1.0121985815602836, |
| "grad_norm": 0.9046779870986938, |
| "loss": 5.0286, |
| "lr": 0.0005710489510489511, |
| "step": 3568, |
| "tokens_trained": 0.3391062 |
| }, |
| { |
| "epoch": 1.0127659574468084, |
| "grad_norm": 0.8680547475814819, |
| "loss": 5.036, |
| "lr": 0.0005707692307692308, |
| "step": 3570, |
| "tokens_trained": 0.339295896 |
| }, |
| { |
| "epoch": 1.0133333333333334, |
| "grad_norm": 0.8271722793579102, |
| "loss": 5.0438, |
| "lr": 0.0005704895104895105, |
| "step": 3572, |
| "tokens_trained": 0.339487368 |
| }, |
| { |
| "epoch": 1.0139007092198582, |
| "grad_norm": 0.8582717180252075, |
| "loss": 5.1501, |
| "lr": 0.0005702097902097902, |
| "step": 3574, |
| "tokens_trained": 0.339678792 |
| }, |
| { |
| "epoch": 1.014468085106383, |
| "grad_norm": 0.9433448314666748, |
| "loss": 5.0575, |
| "lr": 0.0005699300699300699, |
| "step": 3576, |
| "tokens_trained": 0.33987056 |
| }, |
| { |
| "epoch": 1.0150354609929078, |
| "grad_norm": 0.8291800022125244, |
| "loss": 5.0284, |
| "lr": 0.0005696503496503497, |
| "step": 3578, |
| "tokens_trained": 0.340059304 |
| }, |
| { |
| "epoch": 1.0156028368794325, |
| "grad_norm": 0.8057491183280945, |
| "loss": 5.0737, |
| "lr": 0.0005693706293706293, |
| "step": 3580, |
| "tokens_trained": 0.34024912 |
| }, |
| { |
| "epoch": 1.0161702127659575, |
| "grad_norm": 0.8925788998603821, |
| "loss": 5.017, |
| "lr": 0.0005690909090909091, |
| "step": 3582, |
| "tokens_trained": 0.340439688 |
| }, |
| { |
| "epoch": 1.0167375886524823, |
| "grad_norm": 0.8613091707229614, |
| "loss": 5.0778, |
| "lr": 0.0005688111888111889, |
| "step": 3584, |
| "tokens_trained": 0.34063064 |
| }, |
| { |
| "epoch": 1.017304964539007, |
| "grad_norm": 0.9694734811782837, |
| "loss": 5.0831, |
| "lr": 0.0005685314685314686, |
| "step": 3586, |
| "tokens_trained": 0.340820944 |
| }, |
| { |
| "epoch": 1.0178723404255319, |
| "grad_norm": 0.9405204653739929, |
| "loss": 5.0819, |
| "lr": 0.0005682517482517483, |
| "step": 3588, |
| "tokens_trained": 0.341008368 |
| }, |
| { |
| "epoch": 1.0184397163120567, |
| "grad_norm": 0.9191365838050842, |
| "loss": 5.016, |
| "lr": 0.0005679720279720279, |
| "step": 3590, |
| "tokens_trained": 0.341198984 |
| }, |
| { |
| "epoch": 1.0190070921985817, |
| "grad_norm": 0.9363374710083008, |
| "loss": 5.0432, |
| "lr": 0.0005676923076923077, |
| "step": 3592, |
| "tokens_trained": 0.341391808 |
| }, |
| { |
| "epoch": 1.0195744680851064, |
| "grad_norm": 0.9394513368606567, |
| "loss": 5.0159, |
| "lr": 0.0005674125874125874, |
| "step": 3594, |
| "tokens_trained": 0.34158276 |
| }, |
| { |
| "epoch": 1.0201418439716312, |
| "grad_norm": 0.8832948803901672, |
| "loss": 5.1156, |
| "lr": 0.0005671328671328672, |
| "step": 3596, |
| "tokens_trained": 0.341772296 |
| }, |
| { |
| "epoch": 1.020709219858156, |
| "grad_norm": 0.8347297310829163, |
| "loss": 5.0666, |
| "lr": 0.0005668531468531468, |
| "step": 3598, |
| "tokens_trained": 0.341959528 |
| }, |
| { |
| "epoch": 1.0212765957446808, |
| "grad_norm": 0.8295504450798035, |
| "loss": 5.0179, |
| "lr": 0.0005665734265734265, |
| "step": 3600, |
| "tokens_trained": 0.342150464 |
| }, |
| { |
| "epoch": 1.0218439716312058, |
| "grad_norm": 0.9434390068054199, |
| "loss": 5.0127, |
| "lr": 0.0005662937062937064, |
| "step": 3602, |
| "tokens_trained": 0.342339448 |
| }, |
| { |
| "epoch": 1.0224113475177306, |
| "grad_norm": 0.9653499722480774, |
| "loss": 5.0665, |
| "lr": 0.000566013986013986, |
| "step": 3604, |
| "tokens_trained": 0.342530488 |
| }, |
| { |
| "epoch": 1.0229787234042553, |
| "grad_norm": 0.8737668991088867, |
| "loss": 5.0718, |
| "lr": 0.0005657342657342658, |
| "step": 3606, |
| "tokens_trained": 0.342719696 |
| }, |
| { |
| "epoch": 1.02354609929078, |
| "grad_norm": 0.8800668716430664, |
| "loss": 5.0302, |
| "lr": 0.0005654545454545454, |
| "step": 3608, |
| "tokens_trained": 0.342909824 |
| }, |
| { |
| "epoch": 1.0241134751773049, |
| "grad_norm": 0.904245913028717, |
| "loss": 5.0692, |
| "lr": 0.0005651748251748252, |
| "step": 3610, |
| "tokens_trained": 0.343098816 |
| }, |
| { |
| "epoch": 1.02468085106383, |
| "grad_norm": 0.8640607595443726, |
| "loss": 5.0146, |
| "lr": 0.0005648951048951049, |
| "step": 3612, |
| "tokens_trained": 0.343288344 |
| }, |
| { |
| "epoch": 1.0252482269503547, |
| "grad_norm": 0.9330228567123413, |
| "loss": 5.0123, |
| "lr": 0.0005646153846153847, |
| "step": 3614, |
| "tokens_trained": 0.34347712 |
| }, |
| { |
| "epoch": 1.0258156028368794, |
| "grad_norm": 0.8850971460342407, |
| "loss": 5.0718, |
| "lr": 0.0005643356643356643, |
| "step": 3616, |
| "tokens_trained": 0.343666264 |
| }, |
| { |
| "epoch": 1.0263829787234042, |
| "grad_norm": 0.9091493487358093, |
| "loss": 5.0508, |
| "lr": 0.000564055944055944, |
| "step": 3618, |
| "tokens_trained": 0.343854008 |
| }, |
| { |
| "epoch": 1.026950354609929, |
| "grad_norm": 0.8939360976219177, |
| "loss": 5.0492, |
| "lr": 0.0005637762237762239, |
| "step": 3620, |
| "tokens_trained": 0.344046368 |
| }, |
| { |
| "epoch": 1.027517730496454, |
| "grad_norm": 0.9629043340682983, |
| "loss": 5.0234, |
| "lr": 0.0005634965034965035, |
| "step": 3622, |
| "tokens_trained": 0.344236592 |
| }, |
| { |
| "epoch": 1.0280851063829788, |
| "grad_norm": 0.955611526966095, |
| "loss": 4.9878, |
| "lr": 0.0005632167832167833, |
| "step": 3624, |
| "tokens_trained": 0.344425704 |
| }, |
| { |
| "epoch": 1.0283687943262412, |
| "eval_loss": 5.0450639724731445, |
| "eval_runtime": 20.6963, |
| "step": 3625, |
| "tokens_trained": 0.344518808 |
| }, |
| { |
| "epoch": 1.0286524822695036, |
| "grad_norm": 0.9501426815986633, |
| "loss": 5.0039, |
| "lr": 0.0005629370629370629, |
| "step": 3626, |
| "tokens_trained": 0.344612688 |
| }, |
| { |
| "epoch": 1.0292198581560283, |
| "grad_norm": 0.9446471333503723, |
| "loss": 5.0306, |
| "lr": 0.0005626573426573426, |
| "step": 3628, |
| "tokens_trained": 0.344802448 |
| }, |
| { |
| "epoch": 1.0297872340425531, |
| "grad_norm": 0.9773867726325989, |
| "loss": 5.0016, |
| "lr": 0.0005623776223776224, |
| "step": 3630, |
| "tokens_trained": 0.344992872 |
| }, |
| { |
| "epoch": 1.030354609929078, |
| "grad_norm": 0.8802851438522339, |
| "loss": 5.0263, |
| "lr": 0.0005620979020979021, |
| "step": 3632, |
| "tokens_trained": 0.345182064 |
| }, |
| { |
| "epoch": 1.030921985815603, |
| "grad_norm": 0.9009132385253906, |
| "loss": 4.9681, |
| "lr": 0.0005618181818181818, |
| "step": 3634, |
| "tokens_trained": 0.345372888 |
| }, |
| { |
| "epoch": 1.0314893617021277, |
| "grad_norm": 0.9252756834030151, |
| "loss": 4.9491, |
| "lr": 0.0005615384615384615, |
| "step": 3636, |
| "tokens_trained": 0.345563088 |
| }, |
| { |
| "epoch": 1.0320567375886525, |
| "grad_norm": 0.9195572733879089, |
| "loss": 5.0525, |
| "lr": 0.0005612587412587414, |
| "step": 3638, |
| "tokens_trained": 0.345753928 |
| }, |
| { |
| "epoch": 1.0326241134751772, |
| "grad_norm": 0.8032271862030029, |
| "loss": 5.0535, |
| "lr": 0.000560979020979021, |
| "step": 3640, |
| "tokens_trained": 0.345945664 |
| }, |
| { |
| "epoch": 1.033191489361702, |
| "grad_norm": 0.7840321660041809, |
| "loss": 4.9713, |
| "lr": 0.0005606993006993008, |
| "step": 3642, |
| "tokens_trained": 0.346134096 |
| }, |
| { |
| "epoch": 1.033758865248227, |
| "grad_norm": 0.8394534587860107, |
| "loss": 5.0695, |
| "lr": 0.0005604195804195804, |
| "step": 3644, |
| "tokens_trained": 0.346325368 |
| }, |
| { |
| "epoch": 1.0343262411347518, |
| "grad_norm": 0.8543218374252319, |
| "loss": 4.9826, |
| "lr": 0.0005601398601398601, |
| "step": 3646, |
| "tokens_trained": 0.346515088 |
| }, |
| { |
| "epoch": 1.0348936170212766, |
| "grad_norm": 0.8483793139457703, |
| "loss": 4.9956, |
| "lr": 0.0005598601398601399, |
| "step": 3648, |
| "tokens_trained": 0.346705304 |
| }, |
| { |
| "epoch": 1.0354609929078014, |
| "grad_norm": 0.8377392888069153, |
| "loss": 4.9123, |
| "lr": 0.0005595804195804196, |
| "step": 3650, |
| "tokens_trained": 0.34689744 |
| }, |
| { |
| "epoch": 1.0360283687943261, |
| "grad_norm": 0.902778685092926, |
| "loss": 5.0771, |
| "lr": 0.0005593006993006993, |
| "step": 3652, |
| "tokens_trained": 0.347086984 |
| }, |
| { |
| "epoch": 1.0365957446808511, |
| "grad_norm": 0.915446937084198, |
| "loss": 5.0235, |
| "lr": 0.000559020979020979, |
| "step": 3654, |
| "tokens_trained": 0.347278816 |
| }, |
| { |
| "epoch": 1.037163120567376, |
| "grad_norm": 0.803059458732605, |
| "loss": 5.0255, |
| "lr": 0.0005587412587412589, |
| "step": 3656, |
| "tokens_trained": 0.347468136 |
| }, |
| { |
| "epoch": 1.0377304964539007, |
| "grad_norm": 0.9930711984634399, |
| "loss": 5.0759, |
| "lr": 0.0005584615384615385, |
| "step": 3658, |
| "tokens_trained": 0.347659624 |
| }, |
| { |
| "epoch": 1.0382978723404255, |
| "grad_norm": 0.9266470670700073, |
| "loss": 5.0732, |
| "lr": 0.0005581818181818182, |
| "step": 3660, |
| "tokens_trained": 0.347848536 |
| }, |
| { |
| "epoch": 1.0388652482269503, |
| "grad_norm": 0.8442680835723877, |
| "loss": 5.0594, |
| "lr": 0.0005579020979020979, |
| "step": 3662, |
| "tokens_trained": 0.348039968 |
| }, |
| { |
| "epoch": 1.0394326241134753, |
| "grad_norm": 0.8922600746154785, |
| "loss": 4.9876, |
| "lr": 0.0005576223776223776, |
| "step": 3664, |
| "tokens_trained": 0.348229432 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.8602802753448486, |
| "loss": 5.0332, |
| "lr": 0.0005573426573426574, |
| "step": 3666, |
| "tokens_trained": 0.348420184 |
| }, |
| { |
| "epoch": 1.0405673758865248, |
| "grad_norm": 0.8762813806533813, |
| "loss": 4.9641, |
| "lr": 0.0005570629370629371, |
| "step": 3668, |
| "tokens_trained": 0.348609504 |
| }, |
| { |
| "epoch": 1.0411347517730496, |
| "grad_norm": 0.8674803972244263, |
| "loss": 5.0701, |
| "lr": 0.0005567832167832167, |
| "step": 3670, |
| "tokens_trained": 0.348799384 |
| }, |
| { |
| "epoch": 1.0417021276595744, |
| "grad_norm": 0.8296146988868713, |
| "loss": 5.0432, |
| "lr": 0.0005565034965034965, |
| "step": 3672, |
| "tokens_trained": 0.34898944 |
| }, |
| { |
| "epoch": 1.0422695035460994, |
| "grad_norm": 0.7757400870323181, |
| "loss": 5.0742, |
| "lr": 0.0005562237762237763, |
| "step": 3674, |
| "tokens_trained": 0.349178752 |
| }, |
| { |
| "epoch": 1.0428368794326242, |
| "grad_norm": 0.8509469032287598, |
| "loss": 5.0754, |
| "lr": 0.000555944055944056, |
| "step": 3676, |
| "tokens_trained": 0.349369944 |
| }, |
| { |
| "epoch": 1.043404255319149, |
| "grad_norm": 0.8896392583847046, |
| "loss": 5.0305, |
| "lr": 0.0005556643356643357, |
| "step": 3678, |
| "tokens_trained": 0.3495604 |
| }, |
| { |
| "epoch": 1.0439716312056737, |
| "grad_norm": 0.8363154530525208, |
| "loss": 4.9969, |
| "lr": 0.0005553846153846154, |
| "step": 3680, |
| "tokens_trained": 0.349749488 |
| }, |
| { |
| "epoch": 1.0445390070921985, |
| "grad_norm": 0.8382596969604492, |
| "loss": 4.9747, |
| "lr": 0.0005551048951048951, |
| "step": 3682, |
| "tokens_trained": 0.349939408 |
| }, |
| { |
| "epoch": 1.0451063829787235, |
| "grad_norm": 0.9114118218421936, |
| "loss": 4.9993, |
| "lr": 0.0005548251748251748, |
| "step": 3684, |
| "tokens_trained": 0.350129704 |
| }, |
| { |
| "epoch": 1.0456737588652483, |
| "grad_norm": 0.8570284843444824, |
| "loss": 5.0509, |
| "lr": 0.0005545454545454546, |
| "step": 3686, |
| "tokens_trained": 0.350319608 |
| }, |
| { |
| "epoch": 1.046241134751773, |
| "grad_norm": 0.8100084066390991, |
| "loss": 4.9202, |
| "lr": 0.0005542657342657342, |
| "step": 3688, |
| "tokens_trained": 0.35051 |
| }, |
| { |
| "epoch": 1.0468085106382978, |
| "grad_norm": 0.9485516548156738, |
| "loss": 4.983, |
| "lr": 0.000553986013986014, |
| "step": 3690, |
| "tokens_trained": 0.350702976 |
| }, |
| { |
| "epoch": 1.0473758865248226, |
| "grad_norm": 0.9124506115913391, |
| "loss": 5.0354, |
| "lr": 0.0005537062937062938, |
| "step": 3692, |
| "tokens_trained": 0.350894824 |
| }, |
| { |
| "epoch": 1.0479432624113476, |
| "grad_norm": 0.9002963900566101, |
| "loss": 5.0171, |
| "lr": 0.0005534265734265735, |
| "step": 3694, |
| "tokens_trained": 0.351085672 |
| }, |
| { |
| "epoch": 1.0485106382978724, |
| "grad_norm": 0.8576133251190186, |
| "loss": 5.0411, |
| "lr": 0.0005531468531468532, |
| "step": 3696, |
| "tokens_trained": 0.351274576 |
| }, |
| { |
| "epoch": 1.0490780141843972, |
| "grad_norm": 0.8824317455291748, |
| "loss": 5.034, |
| "lr": 0.0005528671328671328, |
| "step": 3698, |
| "tokens_trained": 0.351465168 |
| }, |
| { |
| "epoch": 1.049645390070922, |
| "grad_norm": 0.9119421243667603, |
| "loss": 5.0423, |
| "lr": 0.0005525874125874126, |
| "step": 3700, |
| "tokens_trained": 0.35165588 |
| }, |
| { |
| "epoch": 1.0502127659574467, |
| "grad_norm": 0.8260598182678223, |
| "loss": 5.0213, |
| "lr": 0.0005523076923076923, |
| "step": 3702, |
| "tokens_trained": 0.351846456 |
| }, |
| { |
| "epoch": 1.0507801418439717, |
| "grad_norm": 0.9968200922012329, |
| "loss": 4.9728, |
| "lr": 0.0005520279720279721, |
| "step": 3704, |
| "tokens_trained": 0.352036312 |
| }, |
| { |
| "epoch": 1.0513475177304965, |
| "grad_norm": 0.9910591840744019, |
| "loss": 5.0692, |
| "lr": 0.0005517482517482517, |
| "step": 3706, |
| "tokens_trained": 0.352227032 |
| }, |
| { |
| "epoch": 1.0519148936170213, |
| "grad_norm": 0.8656545877456665, |
| "loss": 5.0201, |
| "lr": 0.0005514685314685315, |
| "step": 3708, |
| "tokens_trained": 0.35241624 |
| }, |
| { |
| "epoch": 1.052482269503546, |
| "grad_norm": 0.9561606049537659, |
| "loss": 5.055, |
| "lr": 0.0005511888111888111, |
| "step": 3710, |
| "tokens_trained": 0.352607936 |
| }, |
| { |
| "epoch": 1.0530496453900708, |
| "grad_norm": 0.9602318406105042, |
| "loss": 5.0372, |
| "lr": 0.0005509090909090909, |
| "step": 3712, |
| "tokens_trained": 0.352797584 |
| }, |
| { |
| "epoch": 1.0536170212765958, |
| "grad_norm": 0.9743978381156921, |
| "loss": 5.0101, |
| "lr": 0.0005506293706293707, |
| "step": 3714, |
| "tokens_trained": 0.352988184 |
| }, |
| { |
| "epoch": 1.0541843971631206, |
| "grad_norm": 0.9676964282989502, |
| "loss": 5.0518, |
| "lr": 0.0005503496503496503, |
| "step": 3716, |
| "tokens_trained": 0.353180088 |
| }, |
| { |
| "epoch": 1.0547517730496454, |
| "grad_norm": 0.8736178874969482, |
| "loss": 5.0278, |
| "lr": 0.0005500699300699301, |
| "step": 3718, |
| "tokens_trained": 0.353370808 |
| }, |
| { |
| "epoch": 1.0553191489361702, |
| "grad_norm": 0.8516846895217896, |
| "loss": 4.9893, |
| "lr": 0.0005497902097902098, |
| "step": 3720, |
| "tokens_trained": 0.35356052 |
| }, |
| { |
| "epoch": 1.055886524822695, |
| "grad_norm": 1.0038187503814697, |
| "loss": 5.0376, |
| "lr": 0.0005495104895104896, |
| "step": 3722, |
| "tokens_trained": 0.353752744 |
| }, |
| { |
| "epoch": 1.05645390070922, |
| "grad_norm": 0.9077925682067871, |
| "loss": 5.045, |
| "lr": 0.0005492307692307692, |
| "step": 3724, |
| "tokens_trained": 0.353944136 |
| }, |
| { |
| "epoch": 1.0570212765957447, |
| "grad_norm": 0.8750975728034973, |
| "loss": 5.0275, |
| "lr": 0.0005489510489510489, |
| "step": 3726, |
| "tokens_trained": 0.354135648 |
| }, |
| { |
| "epoch": 1.0575886524822695, |
| "grad_norm": 0.9059204459190369, |
| "loss": 5.0502, |
| "lr": 0.0005486713286713286, |
| "step": 3728, |
| "tokens_trained": 0.354325256 |
| }, |
| { |
| "epoch": 1.0581560283687943, |
| "grad_norm": 0.8883426189422607, |
| "loss": 5.0016, |
| "lr": 0.0005483916083916084, |
| "step": 3730, |
| "tokens_trained": 0.354517776 |
| }, |
| { |
| "epoch": 1.058723404255319, |
| "grad_norm": 0.911379873752594, |
| "loss": 5.0363, |
| "lr": 0.0005481118881118882, |
| "step": 3732, |
| "tokens_trained": 0.354706528 |
| }, |
| { |
| "epoch": 1.0592907801418439, |
| "grad_norm": 0.8956911563873291, |
| "loss": 5.0028, |
| "lr": 0.0005478321678321678, |
| "step": 3734, |
| "tokens_trained": 0.354896352 |
| }, |
| { |
| "epoch": 1.0598581560283689, |
| "grad_norm": 0.9133324027061462, |
| "loss": 5.0426, |
| "lr": 0.0005475524475524476, |
| "step": 3736, |
| "tokens_trained": 0.3550884 |
| }, |
| { |
| "epoch": 1.0604255319148936, |
| "grad_norm": 0.8321526050567627, |
| "loss": 4.9918, |
| "lr": 0.0005472727272727273, |
| "step": 3738, |
| "tokens_trained": 0.355277608 |
| }, |
| { |
| "epoch": 1.0609929078014184, |
| "grad_norm": 0.8607254028320312, |
| "loss": 5.021, |
| "lr": 0.000546993006993007, |
| "step": 3740, |
| "tokens_trained": 0.355467432 |
| }, |
| { |
| "epoch": 1.0615602836879432, |
| "grad_norm": 0.8457037806510925, |
| "loss": 5.037, |
| "lr": 0.0005467132867132867, |
| "step": 3742, |
| "tokens_trained": 0.355659088 |
| }, |
| { |
| "epoch": 1.0621276595744682, |
| "grad_norm": 0.9381092190742493, |
| "loss": 4.9878, |
| "lr": 0.0005464335664335664, |
| "step": 3744, |
| "tokens_trained": 0.35585168 |
| }, |
| { |
| "epoch": 1.062695035460993, |
| "grad_norm": 0.8678731918334961, |
| "loss": 5.0716, |
| "lr": 0.0005461538461538461, |
| "step": 3746, |
| "tokens_trained": 0.356040984 |
| }, |
| { |
| "epoch": 1.0632624113475178, |
| "grad_norm": 0.8570135235786438, |
| "loss": 5.0018, |
| "lr": 0.0005458741258741259, |
| "step": 3748, |
| "tokens_trained": 0.356230064 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "grad_norm": 0.9624084234237671, |
| "loss": 5.0531, |
| "lr": 0.0005455944055944057, |
| "step": 3750, |
| "tokens_trained": 0.356419352 |
| }, |
| { |
| "epoch": 1.0638297872340425, |
| "eval_loss": 5.037150859832764, |
| "eval_runtime": 20.8153, |
| "step": 3750, |
| "tokens_trained": 0.356419352 |
| }, |
| { |
| "epoch": 1.0643971631205673, |
| "grad_norm": 0.9213569760322571, |
| "loss": 5.0228, |
| "lr": 0.0005453146853146853, |
| "step": 3752, |
| "tokens_trained": 0.356611128 |
| }, |
| { |
| "epoch": 1.064964539007092, |
| "grad_norm": 0.8769538998603821, |
| "loss": 5.0138, |
| "lr": 0.000545034965034965, |
| "step": 3754, |
| "tokens_trained": 0.356800248 |
| }, |
| { |
| "epoch": 1.065531914893617, |
| "grad_norm": 0.9480370879173279, |
| "loss": 5.056, |
| "lr": 0.0005447552447552448, |
| "step": 3756, |
| "tokens_trained": 0.35699148 |
| }, |
| { |
| "epoch": 1.0660992907801419, |
| "grad_norm": 0.8391848206520081, |
| "loss": 5.0256, |
| "lr": 0.0005444755244755245, |
| "step": 3758, |
| "tokens_trained": 0.357182168 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 0.85853111743927, |
| "loss": 5.0147, |
| "lr": 0.0005441958041958042, |
| "step": 3760, |
| "tokens_trained": 0.357373032 |
| }, |
| { |
| "epoch": 1.0672340425531914, |
| "grad_norm": 0.8469287753105164, |
| "loss": 4.9702, |
| "lr": 0.0005439160839160839, |
| "step": 3762, |
| "tokens_trained": 0.357562944 |
| }, |
| { |
| "epoch": 1.0678014184397162, |
| "grad_norm": 0.8880507349967957, |
| "loss": 5.0123, |
| "lr": 0.0005436363636363635, |
| "step": 3764, |
| "tokens_trained": 0.357753872 |
| }, |
| { |
| "epoch": 1.0683687943262412, |
| "grad_norm": 0.9235898852348328, |
| "loss": 4.9693, |
| "lr": 0.0005433566433566434, |
| "step": 3766, |
| "tokens_trained": 0.357944312 |
| }, |
| { |
| "epoch": 1.068936170212766, |
| "grad_norm": 0.8787907361984253, |
| "loss": 4.9987, |
| "lr": 0.0005430769230769231, |
| "step": 3768, |
| "tokens_trained": 0.35813388 |
| }, |
| { |
| "epoch": 1.0695035460992908, |
| "grad_norm": 0.8627321124076843, |
| "loss": 4.9938, |
| "lr": 0.0005427972027972028, |
| "step": 3770, |
| "tokens_trained": 0.35832436 |
| }, |
| { |
| "epoch": 1.0700709219858155, |
| "grad_norm": 0.8891534805297852, |
| "loss": 4.9982, |
| "lr": 0.0005425174825174825, |
| "step": 3772, |
| "tokens_trained": 0.35851672 |
| }, |
| { |
| "epoch": 1.0706382978723403, |
| "grad_norm": 0.947503387928009, |
| "loss": 5.0114, |
| "lr": 0.0005422377622377623, |
| "step": 3774, |
| "tokens_trained": 0.358705936 |
| }, |
| { |
| "epoch": 1.0712056737588653, |
| "grad_norm": 0.9056106805801392, |
| "loss": 5.0199, |
| "lr": 0.000541958041958042, |
| "step": 3776, |
| "tokens_trained": 0.358896904 |
| }, |
| { |
| "epoch": 1.07177304964539, |
| "grad_norm": 0.9422404766082764, |
| "loss": 5.0556, |
| "lr": 0.0005416783216783216, |
| "step": 3778, |
| "tokens_trained": 0.35908716 |
| }, |
| { |
| "epoch": 1.0723404255319149, |
| "grad_norm": 0.9013909101486206, |
| "loss": 5.0516, |
| "lr": 0.0005413986013986014, |
| "step": 3780, |
| "tokens_trained": 0.359276784 |
| }, |
| { |
| "epoch": 1.0729078014184397, |
| "grad_norm": 0.8561504483222961, |
| "loss": 4.973, |
| "lr": 0.000541118881118881, |
| "step": 3782, |
| "tokens_trained": 0.35946816 |
| }, |
| { |
| "epoch": 1.0734751773049644, |
| "grad_norm": 0.8561832308769226, |
| "loss": 5.053, |
| "lr": 0.0005408391608391609, |
| "step": 3784, |
| "tokens_trained": 0.3596616 |
| }, |
| { |
| "epoch": 1.0740425531914894, |
| "grad_norm": 0.7730107307434082, |
| "loss": 5.006, |
| "lr": 0.0005405594405594406, |
| "step": 3786, |
| "tokens_trained": 0.359853624 |
| }, |
| { |
| "epoch": 1.0746099290780142, |
| "grad_norm": 0.889777660369873, |
| "loss": 5.0275, |
| "lr": 0.0005402797202797203, |
| "step": 3788, |
| "tokens_trained": 0.360044616 |
| }, |
| { |
| "epoch": 1.075177304964539, |
| "grad_norm": 0.8839589357376099, |
| "loss": 5.0635, |
| "lr": 0.00054, |
| "step": 3790, |
| "tokens_trained": 0.36023484 |
| }, |
| { |
| "epoch": 1.0757446808510638, |
| "grad_norm": 0.8542807102203369, |
| "loss": 5.0161, |
| "lr": 0.0005397202797202798, |
| "step": 3792, |
| "tokens_trained": 0.3604262 |
| }, |
| { |
| "epoch": 1.0763120567375886, |
| "grad_norm": 0.8976609706878662, |
| "loss": 5.0693, |
| "lr": 0.0005394405594405595, |
| "step": 3794, |
| "tokens_trained": 0.360615912 |
| }, |
| { |
| "epoch": 1.0768794326241136, |
| "grad_norm": 0.8138758540153503, |
| "loss": 4.9589, |
| "lr": 0.0005391608391608391, |
| "step": 3796, |
| "tokens_trained": 0.360807648 |
| }, |
| { |
| "epoch": 1.0774468085106383, |
| "grad_norm": 0.8604118227958679, |
| "loss": 5.0311, |
| "lr": 0.0005388811188811189, |
| "step": 3798, |
| "tokens_trained": 0.360998824 |
| }, |
| { |
| "epoch": 1.0780141843971631, |
| "grad_norm": 0.8839350342750549, |
| "loss": 5.0355, |
| "lr": 0.0005386013986013985, |
| "step": 3800, |
| "tokens_trained": 0.36119052 |
| }, |
| { |
| "epoch": 1.078581560283688, |
| "grad_norm": 0.9019435048103333, |
| "loss": 4.9899, |
| "lr": 0.0005383216783216784, |
| "step": 3802, |
| "tokens_trained": 0.361380456 |
| }, |
| { |
| "epoch": 1.0791489361702127, |
| "grad_norm": 0.8486269116401672, |
| "loss": 5.033, |
| "lr": 0.0005380419580419581, |
| "step": 3804, |
| "tokens_trained": 0.361569216 |
| }, |
| { |
| "epoch": 1.0797163120567377, |
| "grad_norm": 0.8133941888809204, |
| "loss": 5.0129, |
| "lr": 0.0005377622377622377, |
| "step": 3806, |
| "tokens_trained": 0.361761648 |
| }, |
| { |
| "epoch": 1.0802836879432625, |
| "grad_norm": 0.8590590357780457, |
| "loss": 5.0938, |
| "lr": 0.0005374825174825175, |
| "step": 3808, |
| "tokens_trained": 0.361950784 |
| }, |
| { |
| "epoch": 1.0808510638297872, |
| "grad_norm": 0.8362659215927124, |
| "loss": 5.0397, |
| "lr": 0.0005372027972027972, |
| "step": 3810, |
| "tokens_trained": 0.3621414 |
| }, |
| { |
| "epoch": 1.081418439716312, |
| "grad_norm": 0.912358820438385, |
| "loss": 4.9804, |
| "lr": 0.000536923076923077, |
| "step": 3812, |
| "tokens_trained": 0.362330072 |
| }, |
| { |
| "epoch": 1.0819858156028368, |
| "grad_norm": 0.9518508911132812, |
| "loss": 4.9975, |
| "lr": 0.0005366433566433566, |
| "step": 3814, |
| "tokens_trained": 0.362521472 |
| }, |
| { |
| "epoch": 1.0825531914893618, |
| "grad_norm": 0.8164550065994263, |
| "loss": 4.9745, |
| "lr": 0.0005363636363636364, |
| "step": 3816, |
| "tokens_trained": 0.362710744 |
| }, |
| { |
| "epoch": 1.0831205673758866, |
| "grad_norm": 0.9007307887077332, |
| "loss": 4.9715, |
| "lr": 0.000536083916083916, |
| "step": 3818, |
| "tokens_trained": 0.362900624 |
| }, |
| { |
| "epoch": 1.0836879432624114, |
| "grad_norm": 0.8775385022163391, |
| "loss": 5.0296, |
| "lr": 0.0005358041958041959, |
| "step": 3820, |
| "tokens_trained": 0.36309048 |
| }, |
| { |
| "epoch": 1.0842553191489361, |
| "grad_norm": 0.7864426970481873, |
| "loss": 4.9837, |
| "lr": 0.0005355244755244756, |
| "step": 3822, |
| "tokens_trained": 0.363280088 |
| }, |
| { |
| "epoch": 1.084822695035461, |
| "grad_norm": 0.7757525444030762, |
| "loss": 5.0445, |
| "lr": 0.0005352447552447552, |
| "step": 3824, |
| "tokens_trained": 0.363470768 |
| }, |
| { |
| "epoch": 1.085390070921986, |
| "grad_norm": 0.7588837146759033, |
| "loss": 5.0431, |
| "lr": 0.000534965034965035, |
| "step": 3826, |
| "tokens_trained": 0.363661176 |
| }, |
| { |
| "epoch": 1.0859574468085107, |
| "grad_norm": 0.8844705820083618, |
| "loss": 5.0192, |
| "lr": 0.0005346853146853147, |
| "step": 3828, |
| "tokens_trained": 0.363852544 |
| }, |
| { |
| "epoch": 1.0865248226950355, |
| "grad_norm": 0.8446291089057922, |
| "loss": 5.0647, |
| "lr": 0.0005344055944055945, |
| "step": 3830, |
| "tokens_trained": 0.364044088 |
| }, |
| { |
| "epoch": 1.0870921985815603, |
| "grad_norm": 0.8611181974411011, |
| "loss": 5.0475, |
| "lr": 0.0005341258741258741, |
| "step": 3832, |
| "tokens_trained": 0.364234688 |
| }, |
| { |
| "epoch": 1.087659574468085, |
| "grad_norm": 0.8670753240585327, |
| "loss": 5.0243, |
| "lr": 0.0005338461538461538, |
| "step": 3834, |
| "tokens_trained": 0.364424096 |
| }, |
| { |
| "epoch": 1.0882269503546098, |
| "grad_norm": 0.8563777208328247, |
| "loss": 5.0512, |
| "lr": 0.0005335664335664335, |
| "step": 3836, |
| "tokens_trained": 0.364611896 |
| }, |
| { |
| "epoch": 1.0887943262411348, |
| "grad_norm": 0.849647581577301, |
| "loss": 5.0089, |
| "lr": 0.0005332867132867133, |
| "step": 3838, |
| "tokens_trained": 0.364800808 |
| }, |
| { |
| "epoch": 1.0893617021276596, |
| "grad_norm": 0.8674852252006531, |
| "loss": 5.0018, |
| "lr": 0.0005330069930069931, |
| "step": 3840, |
| "tokens_trained": 0.364993432 |
| }, |
| { |
| "epoch": 1.0899290780141844, |
| "grad_norm": 0.8642079830169678, |
| "loss": 4.9989, |
| "lr": 0.0005327272727272727, |
| "step": 3842, |
| "tokens_trained": 0.365182432 |
| }, |
| { |
| "epoch": 1.0904964539007092, |
| "grad_norm": 0.8550288081169128, |
| "loss": 4.9855, |
| "lr": 0.0005324475524475525, |
| "step": 3844, |
| "tokens_trained": 0.365372416 |
| }, |
| { |
| "epoch": 1.0910638297872342, |
| "grad_norm": 0.901297926902771, |
| "loss": 5.0342, |
| "lr": 0.0005321678321678322, |
| "step": 3846, |
| "tokens_trained": 0.365564576 |
| }, |
| { |
| "epoch": 1.091631205673759, |
| "grad_norm": 0.8426658511161804, |
| "loss": 5.0301, |
| "lr": 0.000531888111888112, |
| "step": 3848, |
| "tokens_trained": 0.36575708 |
| }, |
| { |
| "epoch": 1.0921985815602837, |
| "grad_norm": 0.8530165553092957, |
| "loss": 5.071, |
| "lr": 0.0005316083916083916, |
| "step": 3850, |
| "tokens_trained": 0.365947984 |
| }, |
| { |
| "epoch": 1.0927659574468085, |
| "grad_norm": 0.9010403156280518, |
| "loss": 5.0279, |
| "lr": 0.0005313286713286713, |
| "step": 3852, |
| "tokens_trained": 0.366136392 |
| }, |
| { |
| "epoch": 1.0933333333333333, |
| "grad_norm": 0.9402730464935303, |
| "loss": 4.9896, |
| "lr": 0.000531048951048951, |
| "step": 3854, |
| "tokens_trained": 0.36632536 |
| }, |
| { |
| "epoch": 1.093900709219858, |
| "grad_norm": 0.8633377552032471, |
| "loss": 5.0093, |
| "lr": 0.0005307692307692308, |
| "step": 3856, |
| "tokens_trained": 0.366515056 |
| }, |
| { |
| "epoch": 1.094468085106383, |
| "grad_norm": 0.8778465390205383, |
| "loss": 4.9574, |
| "lr": 0.0005304895104895106, |
| "step": 3858, |
| "tokens_trained": 0.366705328 |
| }, |
| { |
| "epoch": 1.0950354609929078, |
| "grad_norm": 0.8562993407249451, |
| "loss": 4.9938, |
| "lr": 0.0005302097902097902, |
| "step": 3860, |
| "tokens_trained": 0.36689668 |
| }, |
| { |
| "epoch": 1.0956028368794326, |
| "grad_norm": 0.8061450719833374, |
| "loss": 5.0132, |
| "lr": 0.0005299300699300699, |
| "step": 3862, |
| "tokens_trained": 0.367087104 |
| }, |
| { |
| "epoch": 1.0961702127659574, |
| "grad_norm": 0.9253963232040405, |
| "loss": 4.9672, |
| "lr": 0.0005296503496503497, |
| "step": 3864, |
| "tokens_trained": 0.36727676 |
| }, |
| { |
| "epoch": 1.0967375886524824, |
| "grad_norm": 0.8339546918869019, |
| "loss": 4.9757, |
| "lr": 0.0005293706293706294, |
| "step": 3866, |
| "tokens_trained": 0.367467928 |
| }, |
| { |
| "epoch": 1.0973049645390072, |
| "grad_norm": 0.7303675413131714, |
| "loss": 4.9613, |
| "lr": 0.0005290909090909091, |
| "step": 3868, |
| "tokens_trained": 0.36765876 |
| }, |
| { |
| "epoch": 1.097872340425532, |
| "grad_norm": 0.8417290449142456, |
| "loss": 5.0074, |
| "lr": 0.0005288111888111888, |
| "step": 3870, |
| "tokens_trained": 0.367848064 |
| }, |
| { |
| "epoch": 1.0984397163120567, |
| "grad_norm": 0.7773861289024353, |
| "loss": 4.9411, |
| "lr": 0.0005285314685314684, |
| "step": 3872, |
| "tokens_trained": 0.368038176 |
| }, |
| { |
| "epoch": 1.0990070921985815, |
| "grad_norm": 0.8101850152015686, |
| "loss": 5.0479, |
| "lr": 0.0005282517482517483, |
| "step": 3874, |
| "tokens_trained": 0.368228208 |
| }, |
| { |
| "epoch": 1.099290780141844, |
| "eval_loss": 5.027334690093994, |
| "eval_runtime": 20.6629, |
| "step": 3875, |
| "tokens_trained": 0.368324424 |
| }, |
| { |
| "epoch": 1.0995744680851063, |
| "grad_norm": 0.8131702542304993, |
| "loss": 5.045, |
| "lr": 0.000527972027972028, |
| "step": 3876, |
| "tokens_trained": 0.368421216 |
| }, |
| { |
| "epoch": 1.1001418439716313, |
| "grad_norm": 0.7819017171859741, |
| "loss": 5.0151, |
| "lr": 0.0005276923076923077, |
| "step": 3878, |
| "tokens_trained": 0.368612904 |
| }, |
| { |
| "epoch": 1.100709219858156, |
| "grad_norm": 0.8118953108787537, |
| "loss": 5.0233, |
| "lr": 0.0005274125874125874, |
| "step": 3880, |
| "tokens_trained": 0.368803144 |
| }, |
| { |
| "epoch": 1.1012765957446808, |
| "grad_norm": 0.8203917741775513, |
| "loss": 4.9401, |
| "lr": 0.0005271328671328672, |
| "step": 3882, |
| "tokens_trained": 0.368993072 |
| }, |
| { |
| "epoch": 1.1018439716312056, |
| "grad_norm": 0.8229494690895081, |
| "loss": 5.0605, |
| "lr": 0.0005268531468531469, |
| "step": 3884, |
| "tokens_trained": 0.36918396 |
| }, |
| { |
| "epoch": 1.1024113475177304, |
| "grad_norm": 0.7224509119987488, |
| "loss": 5.03, |
| "lr": 0.0005265734265734266, |
| "step": 3886, |
| "tokens_trained": 0.36937192 |
| }, |
| { |
| "epoch": 1.1029787234042554, |
| "grad_norm": 0.8122052550315857, |
| "loss": 5.0416, |
| "lr": 0.0005262937062937063, |
| "step": 3888, |
| "tokens_trained": 0.36956204 |
| }, |
| { |
| "epoch": 1.1035460992907802, |
| "grad_norm": 0.8190508484840393, |
| "loss": 5.0106, |
| "lr": 0.0005260139860139859, |
| "step": 3890, |
| "tokens_trained": 0.369753208 |
| }, |
| { |
| "epoch": 1.104113475177305, |
| "grad_norm": 0.845341682434082, |
| "loss": 5.0, |
| "lr": 0.0005257342657342658, |
| "step": 3892, |
| "tokens_trained": 0.36994372 |
| }, |
| { |
| "epoch": 1.1046808510638297, |
| "grad_norm": 0.9587157964706421, |
| "loss": 5.0319, |
| "lr": 0.0005254545454545455, |
| "step": 3894, |
| "tokens_trained": 0.370133776 |
| }, |
| { |
| "epoch": 1.1052482269503545, |
| "grad_norm": 0.8882042169570923, |
| "loss": 4.9617, |
| "lr": 0.0005251748251748252, |
| "step": 3896, |
| "tokens_trained": 0.370322504 |
| }, |
| { |
| "epoch": 1.1058156028368795, |
| "grad_norm": 0.879010021686554, |
| "loss": 4.9197, |
| "lr": 0.0005248951048951049, |
| "step": 3898, |
| "tokens_trained": 0.370514864 |
| }, |
| { |
| "epoch": 1.1063829787234043, |
| "grad_norm": 0.9890635013580322, |
| "loss": 5.0381, |
| "lr": 0.0005246153846153847, |
| "step": 3900, |
| "tokens_trained": 0.370706568 |
| }, |
| { |
| "epoch": 1.106950354609929, |
| "grad_norm": 0.8491361737251282, |
| "loss": 5.0187, |
| "lr": 0.0005243356643356644, |
| "step": 3902, |
| "tokens_trained": 0.370899112 |
| }, |
| { |
| "epoch": 1.1075177304964539, |
| "grad_norm": 0.8746361136436462, |
| "loss": 5.0972, |
| "lr": 0.000524055944055944, |
| "step": 3904, |
| "tokens_trained": 0.37108932 |
| }, |
| { |
| "epoch": 1.1080851063829786, |
| "grad_norm": 0.9623220562934875, |
| "loss": 5.0143, |
| "lr": 0.0005237762237762238, |
| "step": 3906, |
| "tokens_trained": 0.371276808 |
| }, |
| { |
| "epoch": 1.1086524822695036, |
| "grad_norm": 0.8145681023597717, |
| "loss": 5.0081, |
| "lr": 0.0005234965034965034, |
| "step": 3908, |
| "tokens_trained": 0.3714666 |
| }, |
| { |
| "epoch": 1.1092198581560284, |
| "grad_norm": 0.8862302899360657, |
| "loss": 5.0613, |
| "lr": 0.0005232167832167833, |
| "step": 3910, |
| "tokens_trained": 0.371654632 |
| }, |
| { |
| "epoch": 1.1097872340425532, |
| "grad_norm": 0.8897994160652161, |
| "loss": 5.0447, |
| "lr": 0.000522937062937063, |
| "step": 3912, |
| "tokens_trained": 0.37184496 |
| }, |
| { |
| "epoch": 1.110354609929078, |
| "grad_norm": 0.9659616947174072, |
| "loss": 5.0852, |
| "lr": 0.0005226573426573427, |
| "step": 3914, |
| "tokens_trained": 0.372034032 |
| }, |
| { |
| "epoch": 1.1109219858156028, |
| "grad_norm": 0.8457762002944946, |
| "loss": 4.9992, |
| "lr": 0.0005223776223776224, |
| "step": 3916, |
| "tokens_trained": 0.372224056 |
| }, |
| { |
| "epoch": 1.1114893617021278, |
| "grad_norm": 0.8297874927520752, |
| "loss": 5.0416, |
| "lr": 0.0005220979020979021, |
| "step": 3918, |
| "tokens_trained": 0.372413992 |
| }, |
| { |
| "epoch": 1.1120567375886525, |
| "grad_norm": 0.8436822295188904, |
| "loss": 5.0201, |
| "lr": 0.0005218181818181819, |
| "step": 3920, |
| "tokens_trained": 0.372604784 |
| }, |
| { |
| "epoch": 1.1126241134751773, |
| "grad_norm": 0.8133619427680969, |
| "loss": 5.0074, |
| "lr": 0.0005215384615384615, |
| "step": 3922, |
| "tokens_trained": 0.372796808 |
| }, |
| { |
| "epoch": 1.113191489361702, |
| "grad_norm": 0.7879509925842285, |
| "loss": 5.0536, |
| "lr": 0.0005212587412587413, |
| "step": 3924, |
| "tokens_trained": 0.372988416 |
| }, |
| { |
| "epoch": 1.1137588652482269, |
| "grad_norm": 0.8212776780128479, |
| "loss": 5.0259, |
| "lr": 0.0005209790209790209, |
| "step": 3926, |
| "tokens_trained": 0.373178784 |
| }, |
| { |
| "epoch": 1.1143262411347519, |
| "grad_norm": 0.8426427245140076, |
| "loss": 5.0017, |
| "lr": 0.0005206993006993008, |
| "step": 3928, |
| "tokens_trained": 0.373367992 |
| }, |
| { |
| "epoch": 1.1148936170212767, |
| "grad_norm": 0.8375839591026306, |
| "loss": 4.9984, |
| "lr": 0.0005204195804195805, |
| "step": 3930, |
| "tokens_trained": 0.373558848 |
| }, |
| { |
| "epoch": 1.1154609929078014, |
| "grad_norm": 0.907742440700531, |
| "loss": 5.0629, |
| "lr": 0.0005201398601398601, |
| "step": 3932, |
| "tokens_trained": 0.373748656 |
| }, |
| { |
| "epoch": 1.1160283687943262, |
| "grad_norm": 0.8619366884231567, |
| "loss": 4.9702, |
| "lr": 0.0005198601398601399, |
| "step": 3934, |
| "tokens_trained": 0.373937232 |
| }, |
| { |
| "epoch": 1.116595744680851, |
| "grad_norm": 0.8558400273323059, |
| "loss": 4.9833, |
| "lr": 0.0005195804195804196, |
| "step": 3936, |
| "tokens_trained": 0.3741268 |
| }, |
| { |
| "epoch": 1.117163120567376, |
| "grad_norm": 0.7772043347358704, |
| "loss": 5.0636, |
| "lr": 0.0005193006993006994, |
| "step": 3938, |
| "tokens_trained": 0.374315752 |
| }, |
| { |
| "epoch": 1.1177304964539008, |
| "grad_norm": 0.9044018387794495, |
| "loss": 5.0419, |
| "lr": 0.000519020979020979, |
| "step": 3940, |
| "tokens_trained": 0.374504464 |
| }, |
| { |
| "epoch": 1.1182978723404255, |
| "grad_norm": 0.8944953083992004, |
| "loss": 4.961, |
| "lr": 0.0005187412587412588, |
| "step": 3942, |
| "tokens_trained": 0.374695528 |
| }, |
| { |
| "epoch": 1.1188652482269503, |
| "grad_norm": 0.8230746984481812, |
| "loss": 5.0148, |
| "lr": 0.0005184615384615384, |
| "step": 3944, |
| "tokens_trained": 0.374886128 |
| }, |
| { |
| "epoch": 1.119432624113475, |
| "grad_norm": 0.7891346216201782, |
| "loss": 4.9601, |
| "lr": 0.0005181818181818182, |
| "step": 3946, |
| "tokens_trained": 0.375074408 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.8364359140396118, |
| "loss": 5.0317, |
| "lr": 0.000517902097902098, |
| "step": 3948, |
| "tokens_trained": 0.37526636 |
| }, |
| { |
| "epoch": 1.1205673758865249, |
| "grad_norm": 0.7932770252227783, |
| "loss": 4.9399, |
| "lr": 0.0005176223776223776, |
| "step": 3950, |
| "tokens_trained": 0.375455888 |
| }, |
| { |
| "epoch": 1.1211347517730497, |
| "grad_norm": 0.8276688456535339, |
| "loss": 4.9465, |
| "lr": 0.0005173426573426574, |
| "step": 3952, |
| "tokens_trained": 0.37564728 |
| }, |
| { |
| "epoch": 1.1217021276595744, |
| "grad_norm": 0.8242233991622925, |
| "loss": 5.0069, |
| "lr": 0.000517062937062937, |
| "step": 3954, |
| "tokens_trained": 0.375839296 |
| }, |
| { |
| "epoch": 1.1222695035460992, |
| "grad_norm": 0.8828895688056946, |
| "loss": 4.9488, |
| "lr": 0.0005167832167832169, |
| "step": 3956, |
| "tokens_trained": 0.376028744 |
| }, |
| { |
| "epoch": 1.122836879432624, |
| "grad_norm": 0.8730418682098389, |
| "loss": 4.9729, |
| "lr": 0.0005165034965034965, |
| "step": 3958, |
| "tokens_trained": 0.376217656 |
| }, |
| { |
| "epoch": 1.123404255319149, |
| "grad_norm": 0.7701008915901184, |
| "loss": 4.9922, |
| "lr": 0.0005162237762237762, |
| "step": 3960, |
| "tokens_trained": 0.3764066 |
| }, |
| { |
| "epoch": 1.1239716312056738, |
| "grad_norm": 0.8723980784416199, |
| "loss": 4.9452, |
| "lr": 0.0005159440559440559, |
| "step": 3962, |
| "tokens_trained": 0.376594952 |
| }, |
| { |
| "epoch": 1.1245390070921986, |
| "grad_norm": 0.9300636053085327, |
| "loss": 5.0595, |
| "lr": 0.0005156643356643357, |
| "step": 3964, |
| "tokens_trained": 0.376785256 |
| }, |
| { |
| "epoch": 1.1251063829787233, |
| "grad_norm": 0.8684858083724976, |
| "loss": 5.0372, |
| "lr": 0.0005153846153846154, |
| "step": 3966, |
| "tokens_trained": 0.376975 |
| }, |
| { |
| "epoch": 1.1256737588652483, |
| "grad_norm": 0.8335841298103333, |
| "loss": 5.0636, |
| "lr": 0.0005151048951048951, |
| "step": 3968, |
| "tokens_trained": 0.377164552 |
| }, |
| { |
| "epoch": 1.1262411347517731, |
| "grad_norm": 0.8454932570457458, |
| "loss": 4.9603, |
| "lr": 0.0005148251748251748, |
| "step": 3970, |
| "tokens_trained": 0.377353968 |
| }, |
| { |
| "epoch": 1.126808510638298, |
| "grad_norm": 0.8978991508483887, |
| "loss": 5.0161, |
| "lr": 0.0005145454545454545, |
| "step": 3972, |
| "tokens_trained": 0.377543664 |
| }, |
| { |
| "epoch": 1.1273758865248227, |
| "grad_norm": 0.863207995891571, |
| "loss": 4.9949, |
| "lr": 0.0005142657342657343, |
| "step": 3974, |
| "tokens_trained": 0.37773332 |
| }, |
| { |
| "epoch": 1.1279432624113475, |
| "grad_norm": 0.8614553213119507, |
| "loss": 4.9812, |
| "lr": 0.000513986013986014, |
| "step": 3976, |
| "tokens_trained": 0.377921272 |
| }, |
| { |
| "epoch": 1.1285106382978722, |
| "grad_norm": 0.8703583478927612, |
| "loss": 4.9823, |
| "lr": 0.0005137062937062937, |
| "step": 3978, |
| "tokens_trained": 0.378112584 |
| }, |
| { |
| "epoch": 1.1290780141843972, |
| "grad_norm": 0.7951223254203796, |
| "loss": 4.9732, |
| "lr": 0.0005134265734265734, |
| "step": 3980, |
| "tokens_trained": 0.378302088 |
| }, |
| { |
| "epoch": 1.129645390070922, |
| "grad_norm": 0.8486145734786987, |
| "loss": 4.9422, |
| "lr": 0.0005131468531468532, |
| "step": 3982, |
| "tokens_trained": 0.37849388 |
| }, |
| { |
| "epoch": 1.1302127659574468, |
| "grad_norm": 0.8054757714271545, |
| "loss": 4.9911, |
| "lr": 0.0005128671328671328, |
| "step": 3984, |
| "tokens_trained": 0.378682928 |
| }, |
| { |
| "epoch": 1.1307801418439716, |
| "grad_norm": 0.83322674036026, |
| "loss": 5.0289, |
| "lr": 0.0005125874125874126, |
| "step": 3986, |
| "tokens_trained": 0.378874296 |
| }, |
| { |
| "epoch": 1.1313475177304966, |
| "grad_norm": 0.8249304890632629, |
| "loss": 5.0455, |
| "lr": 0.0005123076923076923, |
| "step": 3988, |
| "tokens_trained": 0.379067408 |
| }, |
| { |
| "epoch": 1.1319148936170214, |
| "grad_norm": 0.8258949518203735, |
| "loss": 4.9703, |
| "lr": 0.000512027972027972, |
| "step": 3990, |
| "tokens_trained": 0.379255328 |
| }, |
| { |
| "epoch": 1.1324822695035461, |
| "grad_norm": 0.8535506725311279, |
| "loss": 5.0652, |
| "lr": 0.0005117482517482518, |
| "step": 3992, |
| "tokens_trained": 0.379446152 |
| }, |
| { |
| "epoch": 1.133049645390071, |
| "grad_norm": 0.8468305468559265, |
| "loss": 5.0071, |
| "lr": 0.0005114685314685315, |
| "step": 3994, |
| "tokens_trained": 0.379637664 |
| }, |
| { |
| "epoch": 1.1336170212765957, |
| "grad_norm": 0.8334465026855469, |
| "loss": 5.043, |
| "lr": 0.0005111888111888112, |
| "step": 3996, |
| "tokens_trained": 0.379829408 |
| }, |
| { |
| "epoch": 1.1341843971631205, |
| "grad_norm": 0.8690851926803589, |
| "loss": 4.9637, |
| "lr": 0.0005109090909090908, |
| "step": 3998, |
| "tokens_trained": 0.380021248 |
| }, |
| { |
| "epoch": 1.1347517730496455, |
| "grad_norm": 0.7997336983680725, |
| "loss": 5.0168, |
| "lr": 0.0005106293706293707, |
| "step": 4000, |
| "tokens_trained": 0.380212256 |
| }, |
| { |
| "epoch": 1.1347517730496455, |
| "eval_loss": 5.021827697753906, |
| "eval_runtime": 20.8538, |
| "step": 4000, |
| "tokens_trained": 0.380212256 |
| }, |
| { |
| "epoch": 1.1353191489361703, |
| "grad_norm": 0.8898105621337891, |
| "loss": 4.9954, |
| "lr": 0.0005103496503496503, |
| "step": 4002, |
| "tokens_trained": 0.380403128 |
| }, |
| { |
| "epoch": 1.135886524822695, |
| "grad_norm": 0.8997061848640442, |
| "loss": 5.0087, |
| "lr": 0.0005100699300699301, |
| "step": 4004, |
| "tokens_trained": 0.3805902 |
| }, |
| { |
| "epoch": 1.1364539007092198, |
| "grad_norm": 0.8276216387748718, |
| "loss": 4.9579, |
| "lr": 0.0005097902097902098, |
| "step": 4006, |
| "tokens_trained": 0.380778288 |
| }, |
| { |
| "epoch": 1.1370212765957446, |
| "grad_norm": 0.8275374174118042, |
| "loss": 4.973, |
| "lr": 0.0005095104895104895, |
| "step": 4008, |
| "tokens_trained": 0.38096896 |
| }, |
| { |
| "epoch": 1.1375886524822696, |
| "grad_norm": 0.881206214427948, |
| "loss": 5.0027, |
| "lr": 0.0005092307692307693, |
| "step": 4010, |
| "tokens_trained": 0.381159008 |
| }, |
| { |
| "epoch": 1.1381560283687944, |
| "grad_norm": 0.8062921762466431, |
| "loss": 4.9771, |
| "lr": 0.0005089510489510489, |
| "step": 4012, |
| "tokens_trained": 0.381350336 |
| }, |
| { |
| "epoch": 1.1387234042553191, |
| "grad_norm": 0.8482317924499512, |
| "loss": 4.972, |
| "lr": 0.0005086713286713287, |
| "step": 4014, |
| "tokens_trained": 0.381540512 |
| }, |
| { |
| "epoch": 1.139290780141844, |
| "grad_norm": 0.8180603981018066, |
| "loss": 5.0052, |
| "lr": 0.0005083916083916083, |
| "step": 4016, |
| "tokens_trained": 0.38173168 |
| }, |
| { |
| "epoch": 1.1398581560283687, |
| "grad_norm": 0.7816891670227051, |
| "loss": 4.9689, |
| "lr": 0.0005081118881118882, |
| "step": 4018, |
| "tokens_trained": 0.381922056 |
| }, |
| { |
| "epoch": 1.1404255319148937, |
| "grad_norm": 0.831451952457428, |
| "loss": 4.9931, |
| "lr": 0.0005078321678321678, |
| "step": 4020, |
| "tokens_trained": 0.382111816 |
| }, |
| { |
| "epoch": 1.1409929078014185, |
| "grad_norm": 0.8557744026184082, |
| "loss": 5.0101, |
| "lr": 0.0005075524475524476, |
| "step": 4022, |
| "tokens_trained": 0.38230276 |
| }, |
| { |
| "epoch": 1.1415602836879433, |
| "grad_norm": 0.8070439696311951, |
| "loss": 5.0457, |
| "lr": 0.0005072727272727273, |
| "step": 4024, |
| "tokens_trained": 0.38249052 |
| }, |
| { |
| "epoch": 1.142127659574468, |
| "grad_norm": 0.9021100401878357, |
| "loss": 4.9979, |
| "lr": 0.0005069930069930069, |
| "step": 4026, |
| "tokens_trained": 0.382679696 |
| }, |
| { |
| "epoch": 1.1426950354609928, |
| "grad_norm": 0.8565911650657654, |
| "loss": 4.9828, |
| "lr": 0.0005067132867132868, |
| "step": 4028, |
| "tokens_trained": 0.382869408 |
| }, |
| { |
| "epoch": 1.1432624113475178, |
| "grad_norm": 0.8522788286209106, |
| "loss": 5.0306, |
| "lr": 0.0005064335664335664, |
| "step": 4030, |
| "tokens_trained": 0.383058416 |
| }, |
| { |
| "epoch": 1.1438297872340426, |
| "grad_norm": 0.79361891746521, |
| "loss": 5.0027, |
| "lr": 0.0005061538461538462, |
| "step": 4032, |
| "tokens_trained": 0.383248504 |
| }, |
| { |
| "epoch": 1.1443971631205674, |
| "grad_norm": 0.8457452654838562, |
| "loss": 4.9762, |
| "lr": 0.0005058741258741258, |
| "step": 4034, |
| "tokens_trained": 0.383439016 |
| }, |
| { |
| "epoch": 1.1449645390070922, |
| "grad_norm": 0.9091781377792358, |
| "loss": 5.0534, |
| "lr": 0.0005055944055944057, |
| "step": 4036, |
| "tokens_trained": 0.383630552 |
| }, |
| { |
| "epoch": 1.145531914893617, |
| "grad_norm": 0.8448526263237, |
| "loss": 5.0068, |
| "lr": 0.0005053146853146853, |
| "step": 4038, |
| "tokens_trained": 0.383817712 |
| }, |
| { |
| "epoch": 1.1460992907801417, |
| "grad_norm": 0.7852639555931091, |
| "loss": 4.9615, |
| "lr": 0.000505034965034965, |
| "step": 4040, |
| "tokens_trained": 0.384008192 |
| }, |
| { |
| "epoch": 1.1466666666666667, |
| "grad_norm": 0.7787274122238159, |
| "loss": 5.0035, |
| "lr": 0.0005047552447552448, |
| "step": 4042, |
| "tokens_trained": 0.38419848 |
| }, |
| { |
| "epoch": 1.1472340425531915, |
| "grad_norm": 0.9463234543800354, |
| "loss": 5.0284, |
| "lr": 0.0005044755244755244, |
| "step": 4044, |
| "tokens_trained": 0.384390448 |
| }, |
| { |
| "epoch": 1.1478014184397163, |
| "grad_norm": 0.9096873998641968, |
| "loss": 5.0104, |
| "lr": 0.0005041958041958043, |
| "step": 4046, |
| "tokens_trained": 0.384578688 |
| }, |
| { |
| "epoch": 1.148368794326241, |
| "grad_norm": 0.8237007856369019, |
| "loss": 5.0225, |
| "lr": 0.0005039160839160839, |
| "step": 4048, |
| "tokens_trained": 0.384769368 |
| }, |
| { |
| "epoch": 1.148936170212766, |
| "grad_norm": 0.8391951322555542, |
| "loss": 4.9316, |
| "lr": 0.0005036363636363637, |
| "step": 4050, |
| "tokens_trained": 0.384959448 |
| }, |
| { |
| "epoch": 1.1495035460992908, |
| "grad_norm": 0.8555214405059814, |
| "loss": 5.0299, |
| "lr": 0.0005033566433566433, |
| "step": 4052, |
| "tokens_trained": 0.385148392 |
| }, |
| { |
| "epoch": 1.1500709219858156, |
| "grad_norm": 0.813484251499176, |
| "loss": 5.0792, |
| "lr": 0.0005030769230769231, |
| "step": 4054, |
| "tokens_trained": 0.385338144 |
| }, |
| { |
| "epoch": 1.1506382978723404, |
| "grad_norm": 0.8149204850196838, |
| "loss": 5.0607, |
| "lr": 0.0005027972027972028, |
| "step": 4056, |
| "tokens_trained": 0.385528776 |
| }, |
| { |
| "epoch": 1.1512056737588652, |
| "grad_norm": 0.8909300565719604, |
| "loss": 5.007, |
| "lr": 0.0005025174825174825, |
| "step": 4058, |
| "tokens_trained": 0.385717672 |
| }, |
| { |
| "epoch": 1.15177304964539, |
| "grad_norm": 0.8447635173797607, |
| "loss": 5.024, |
| "lr": 0.0005022377622377623, |
| "step": 4060, |
| "tokens_trained": 0.3859074 |
| }, |
| { |
| "epoch": 1.152340425531915, |
| "grad_norm": 0.8429125547409058, |
| "loss": 4.9871, |
| "lr": 0.0005019580419580419, |
| "step": 4062, |
| "tokens_trained": 0.386096712 |
| }, |
| { |
| "epoch": 1.1529078014184397, |
| "grad_norm": 0.8532034158706665, |
| "loss": 4.9807, |
| "lr": 0.0005016783216783218, |
| "step": 4064, |
| "tokens_trained": 0.386290392 |
| }, |
| { |
| "epoch": 1.1534751773049645, |
| "grad_norm": 0.8414303064346313, |
| "loss": 5.0426, |
| "lr": 0.0005013986013986014, |
| "step": 4066, |
| "tokens_trained": 0.386484048 |
| }, |
| { |
| "epoch": 1.1540425531914893, |
| "grad_norm": 0.8659424185752869, |
| "loss": 4.9572, |
| "lr": 0.0005011188811188811, |
| "step": 4068, |
| "tokens_trained": 0.386670896 |
| }, |
| { |
| "epoch": 1.1546099290780143, |
| "grad_norm": 0.8472128510475159, |
| "loss": 4.9993, |
| "lr": 0.0005008391608391608, |
| "step": 4070, |
| "tokens_trained": 0.38686096 |
| }, |
| { |
| "epoch": 1.155177304964539, |
| "grad_norm": 0.7704010009765625, |
| "loss": 5.0267, |
| "lr": 0.0005005594405594406, |
| "step": 4072, |
| "tokens_trained": 0.387052256 |
| }, |
| { |
| "epoch": 1.1557446808510639, |
| "grad_norm": 0.8503726720809937, |
| "loss": 4.953, |
| "lr": 0.0005002797202797203, |
| "step": 4074, |
| "tokens_trained": 0.387241648 |
| }, |
| { |
| "epoch": 1.1563120567375886, |
| "grad_norm": 0.8159539699554443, |
| "loss": 5.0096, |
| "lr": 0.0005, |
| "step": 4076, |
| "tokens_trained": 0.387432368 |
| }, |
| { |
| "epoch": 1.1568794326241134, |
| "grad_norm": 0.7673088312149048, |
| "loss": 4.9996, |
| "lr": 0.0004997202797202798, |
| "step": 4078, |
| "tokens_trained": 0.387620656 |
| }, |
| { |
| "epoch": 1.1574468085106382, |
| "grad_norm": 0.8308261036872864, |
| "loss": 5.0114, |
| "lr": 0.0004994405594405594, |
| "step": 4080, |
| "tokens_trained": 0.387809712 |
| }, |
| { |
| "epoch": 1.1580141843971632, |
| "grad_norm": 0.8294357657432556, |
| "loss": 5.0508, |
| "lr": 0.0004991608391608391, |
| "step": 4082, |
| "tokens_trained": 0.387999152 |
| }, |
| { |
| "epoch": 1.158581560283688, |
| "grad_norm": 0.8797832727432251, |
| "loss": 4.9784, |
| "lr": 0.0004988811188811189, |
| "step": 4084, |
| "tokens_trained": 0.3881876 |
| }, |
| { |
| "epoch": 1.1591489361702128, |
| "grad_norm": 0.8250353932380676, |
| "loss": 4.959, |
| "lr": 0.0004986013986013986, |
| "step": 4086, |
| "tokens_trained": 0.38837592 |
| }, |
| { |
| "epoch": 1.1597163120567375, |
| "grad_norm": 0.8896451592445374, |
| "loss": 5.0103, |
| "lr": 0.0004983216783216784, |
| "step": 4088, |
| "tokens_trained": 0.388565768 |
| }, |
| { |
| "epoch": 1.1602836879432625, |
| "grad_norm": 0.7970037460327148, |
| "loss": 5.0534, |
| "lr": 0.0004980419580419581, |
| "step": 4090, |
| "tokens_trained": 0.388755536 |
| }, |
| { |
| "epoch": 1.1608510638297873, |
| "grad_norm": 0.8623605966567993, |
| "loss": 4.986, |
| "lr": 0.0004977622377622378, |
| "step": 4092, |
| "tokens_trained": 0.388947 |
| }, |
| { |
| "epoch": 1.161418439716312, |
| "grad_norm": 0.8195328712463379, |
| "loss": 5.0193, |
| "lr": 0.0004974825174825175, |
| "step": 4094, |
| "tokens_trained": 0.38913532 |
| }, |
| { |
| "epoch": 1.1619858156028369, |
| "grad_norm": 0.8058289885520935, |
| "loss": 5.0001, |
| "lr": 0.0004972027972027972, |
| "step": 4096, |
| "tokens_trained": 0.389325904 |
| }, |
| { |
| "epoch": 1.1625531914893616, |
| "grad_norm": 0.8325840830802917, |
| "loss": 5.0711, |
| "lr": 0.0004969230769230769, |
| "step": 4098, |
| "tokens_trained": 0.3895166 |
| }, |
| { |
| "epoch": 1.1631205673758864, |
| "grad_norm": 0.8684342503547668, |
| "loss": 4.9548, |
| "lr": 0.0004966433566433566, |
| "step": 4100, |
| "tokens_trained": 0.389704048 |
| }, |
| { |
| "epoch": 1.1636879432624114, |
| "grad_norm": 0.891304612159729, |
| "loss": 4.9711, |
| "lr": 0.0004963636363636364, |
| "step": 4102, |
| "tokens_trained": 0.389893816 |
| }, |
| { |
| "epoch": 1.1642553191489362, |
| "grad_norm": 0.8750278353691101, |
| "loss": 5.0493, |
| "lr": 0.0004960839160839161, |
| "step": 4104, |
| "tokens_trained": 0.390082752 |
| }, |
| { |
| "epoch": 1.164822695035461, |
| "grad_norm": 0.8391188383102417, |
| "loss": 4.9804, |
| "lr": 0.0004958041958041959, |
| "step": 4106, |
| "tokens_trained": 0.390272096 |
| }, |
| { |
| "epoch": 1.1653900709219858, |
| "grad_norm": 0.8190635442733765, |
| "loss": 5.0121, |
| "lr": 0.0004955244755244756, |
| "step": 4108, |
| "tokens_trained": 0.390462024 |
| }, |
| { |
| "epoch": 1.1659574468085105, |
| "grad_norm": 0.7800264954566956, |
| "loss": 4.9819, |
| "lr": 0.0004952447552447552, |
| "step": 4110, |
| "tokens_trained": 0.390651968 |
| }, |
| { |
| "epoch": 1.1665248226950355, |
| "grad_norm": 0.8210972547531128, |
| "loss": 4.9929, |
| "lr": 0.000494965034965035, |
| "step": 4112, |
| "tokens_trained": 0.390842776 |
| }, |
| { |
| "epoch": 1.1670921985815603, |
| "grad_norm": 0.9442235827445984, |
| "loss": 5.0133, |
| "lr": 0.0004946853146853147, |
| "step": 4114, |
| "tokens_trained": 0.391031856 |
| }, |
| { |
| "epoch": 1.167659574468085, |
| "grad_norm": 0.8627631068229675, |
| "loss": 4.9587, |
| "lr": 0.0004944055944055944, |
| "step": 4116, |
| "tokens_trained": 0.391223288 |
| }, |
| { |
| "epoch": 1.1682269503546099, |
| "grad_norm": 0.7751641869544983, |
| "loss": 4.9934, |
| "lr": 0.0004941258741258741, |
| "step": 4118, |
| "tokens_trained": 0.391412784 |
| }, |
| { |
| "epoch": 1.1687943262411347, |
| "grad_norm": 0.8243580460548401, |
| "loss": 5.0126, |
| "lr": 0.0004938461538461538, |
| "step": 4120, |
| "tokens_trained": 0.391603056 |
| }, |
| { |
| "epoch": 1.1693617021276597, |
| "grad_norm": 0.8990906476974487, |
| "loss": 5.0234, |
| "lr": 0.0004935664335664336, |
| "step": 4122, |
| "tokens_trained": 0.391793368 |
| }, |
| { |
| "epoch": 1.1699290780141844, |
| "grad_norm": 0.8721649050712585, |
| "loss": 4.997, |
| "lr": 0.0004932867132867133, |
| "step": 4124, |
| "tokens_trained": 0.39198508 |
| }, |
| { |
| "epoch": 1.1702127659574468, |
| "eval_loss": 5.014278411865234, |
| "eval_runtime": 21.0162, |
| "step": 4125, |
| "tokens_trained": 0.392082752 |
| }, |
| { |
| "epoch": 1.1704964539007092, |
| "grad_norm": 0.7662192583084106, |
| "loss": 4.9791, |
| "lr": 0.0004930069930069931, |
| "step": 4126, |
| "tokens_trained": 0.392179088 |
| }, |
| { |
| "epoch": 1.171063829787234, |
| "grad_norm": 0.9081931710243225, |
| "loss": 4.9882, |
| "lr": 0.0004927272727272727, |
| "step": 4128, |
| "tokens_trained": 0.392369312 |
| }, |
| { |
| "epoch": 1.1716312056737588, |
| "grad_norm": 0.8503204584121704, |
| "loss": 5.0403, |
| "lr": 0.0004924475524475525, |
| "step": 4130, |
| "tokens_trained": 0.392557944 |
| }, |
| { |
| "epoch": 1.1721985815602838, |
| "grad_norm": 0.8676162362098694, |
| "loss": 5.0716, |
| "lr": 0.0004921678321678322, |
| "step": 4132, |
| "tokens_trained": 0.39274924 |
| }, |
| { |
| "epoch": 1.1727659574468086, |
| "grad_norm": 0.8527748584747314, |
| "loss": 5.0416, |
| "lr": 0.0004918881118881118, |
| "step": 4134, |
| "tokens_trained": 0.392939672 |
| }, |
| { |
| "epoch": 1.1733333333333333, |
| "grad_norm": 0.8113415241241455, |
| "loss": 5.0525, |
| "lr": 0.0004916083916083916, |
| "step": 4136, |
| "tokens_trained": 0.393131152 |
| }, |
| { |
| "epoch": 1.1739007092198581, |
| "grad_norm": 0.8555265665054321, |
| "loss": 5.0734, |
| "lr": 0.0004913286713286713, |
| "step": 4138, |
| "tokens_trained": 0.39332136 |
| }, |
| { |
| "epoch": 1.174468085106383, |
| "grad_norm": 0.9134076237678528, |
| "loss": 4.9742, |
| "lr": 0.0004910489510489511, |
| "step": 4140, |
| "tokens_trained": 0.393509376 |
| }, |
| { |
| "epoch": 1.1750354609929077, |
| "grad_norm": 0.8159533739089966, |
| "loss": 5.0728, |
| "lr": 0.0004907692307692308, |
| "step": 4142, |
| "tokens_trained": 0.393699616 |
| }, |
| { |
| "epoch": 1.1756028368794327, |
| "grad_norm": 0.8070579767227173, |
| "loss": 5.0032, |
| "lr": 0.0004904895104895106, |
| "step": 4144, |
| "tokens_trained": 0.393888176 |
| }, |
| { |
| "epoch": 1.1761702127659575, |
| "grad_norm": 0.8635644316673279, |
| "loss": 5.0564, |
| "lr": 0.0004902097902097902, |
| "step": 4146, |
| "tokens_trained": 0.39407804 |
| }, |
| { |
| "epoch": 1.1767375886524822, |
| "grad_norm": 0.8500214219093323, |
| "loss": 4.9698, |
| "lr": 0.00048993006993007, |
| "step": 4148, |
| "tokens_trained": 0.394268456 |
| }, |
| { |
| "epoch": 1.177304964539007, |
| "grad_norm": 0.8485430479049683, |
| "loss": 4.9751, |
| "lr": 0.0004896503496503497, |
| "step": 4150, |
| "tokens_trained": 0.394459912 |
| }, |
| { |
| "epoch": 1.177872340425532, |
| "grad_norm": 0.8265682458877563, |
| "loss": 4.9703, |
| "lr": 0.0004893706293706293, |
| "step": 4152, |
| "tokens_trained": 0.394650984 |
| }, |
| { |
| "epoch": 1.1784397163120568, |
| "grad_norm": 0.7867625951766968, |
| "loss": 4.8901, |
| "lr": 0.0004890909090909091, |
| "step": 4154, |
| "tokens_trained": 0.394843184 |
| }, |
| { |
| "epoch": 1.1790070921985816, |
| "grad_norm": 0.8666532635688782, |
| "loss": 4.9144, |
| "lr": 0.0004888111888111888, |
| "step": 4156, |
| "tokens_trained": 0.39503568 |
| }, |
| { |
| "epoch": 1.1795744680851064, |
| "grad_norm": 0.862920880317688, |
| "loss": 4.9529, |
| "lr": 0.0004885314685314686, |
| "step": 4158, |
| "tokens_trained": 0.395225424 |
| }, |
| { |
| "epoch": 1.1801418439716311, |
| "grad_norm": 0.810485303401947, |
| "loss": 5.0165, |
| "lr": 0.0004882517482517483, |
| "step": 4160, |
| "tokens_trained": 0.395415632 |
| }, |
| { |
| "epoch": 1.180709219858156, |
| "grad_norm": 0.7997188568115234, |
| "loss": 5.0197, |
| "lr": 0.000487972027972028, |
| "step": 4162, |
| "tokens_trained": 0.39560452 |
| }, |
| { |
| "epoch": 1.181276595744681, |
| "grad_norm": 0.8133664727210999, |
| "loss": 5.0056, |
| "lr": 0.0004876923076923077, |
| "step": 4164, |
| "tokens_trained": 0.395794008 |
| }, |
| { |
| "epoch": 1.1818439716312057, |
| "grad_norm": 0.8120067119598389, |
| "loss": 4.913, |
| "lr": 0.00048741258741258743, |
| "step": 4166, |
| "tokens_trained": 0.395983296 |
| }, |
| { |
| "epoch": 1.1824113475177305, |
| "grad_norm": 0.8434014320373535, |
| "loss": 4.9777, |
| "lr": 0.0004871328671328671, |
| "step": 4168, |
| "tokens_trained": 0.396175216 |
| }, |
| { |
| "epoch": 1.1829787234042553, |
| "grad_norm": 0.8452426195144653, |
| "loss": 4.9693, |
| "lr": 0.00048685314685314687, |
| "step": 4170, |
| "tokens_trained": 0.3963634 |
| }, |
| { |
| "epoch": 1.1835460992907803, |
| "grad_norm": 0.8733723759651184, |
| "loss": 4.9757, |
| "lr": 0.00048657342657342656, |
| "step": 4172, |
| "tokens_trained": 0.39655404 |
| }, |
| { |
| "epoch": 1.184113475177305, |
| "grad_norm": 0.8372209072113037, |
| "loss": 4.9725, |
| "lr": 0.0004862937062937063, |
| "step": 4174, |
| "tokens_trained": 0.396744688 |
| }, |
| { |
| "epoch": 1.1846808510638298, |
| "grad_norm": 0.7722007632255554, |
| "loss": 5.0234, |
| "lr": 0.000486013986013986, |
| "step": 4176, |
| "tokens_trained": 0.396935848 |
| }, |
| { |
| "epoch": 1.1852482269503546, |
| "grad_norm": 0.8685297966003418, |
| "loss": 4.9777, |
| "lr": 0.0004857342657342658, |
| "step": 4178, |
| "tokens_trained": 0.39712576 |
| }, |
| { |
| "epoch": 1.1858156028368794, |
| "grad_norm": 0.8083483576774597, |
| "loss": 4.973, |
| "lr": 0.0004854545454545455, |
| "step": 4180, |
| "tokens_trained": 0.397315672 |
| }, |
| { |
| "epoch": 1.1863829787234041, |
| "grad_norm": 0.8481479287147522, |
| "loss": 5.0308, |
| "lr": 0.00048517482517482517, |
| "step": 4182, |
| "tokens_trained": 0.39750464 |
| }, |
| { |
| "epoch": 1.1869503546099291, |
| "grad_norm": 0.7996193170547485, |
| "loss": 4.9251, |
| "lr": 0.0004848951048951049, |
| "step": 4184, |
| "tokens_trained": 0.397693584 |
| }, |
| { |
| "epoch": 1.187517730496454, |
| "grad_norm": 0.811189591884613, |
| "loss": 5.0092, |
| "lr": 0.0004846153846153846, |
| "step": 4186, |
| "tokens_trained": 0.397883352 |
| }, |
| { |
| "epoch": 1.1880851063829787, |
| "grad_norm": 0.9195986390113831, |
| "loss": 4.961, |
| "lr": 0.00048433566433566435, |
| "step": 4188, |
| "tokens_trained": 0.398073712 |
| }, |
| { |
| "epoch": 1.1886524822695035, |
| "grad_norm": 0.8444050550460815, |
| "loss": 4.9707, |
| "lr": 0.00048405594405594404, |
| "step": 4190, |
| "tokens_trained": 0.398265744 |
| }, |
| { |
| "epoch": 1.1892198581560285, |
| "grad_norm": 0.859663724899292, |
| "loss": 5.0202, |
| "lr": 0.0004837762237762238, |
| "step": 4192, |
| "tokens_trained": 0.39845568 |
| }, |
| { |
| "epoch": 1.1897872340425533, |
| "grad_norm": 0.8403055667877197, |
| "loss": 4.9831, |
| "lr": 0.0004834965034965035, |
| "step": 4194, |
| "tokens_trained": 0.398647696 |
| }, |
| { |
| "epoch": 1.190354609929078, |
| "grad_norm": 0.8377063870429993, |
| "loss": 5.0545, |
| "lr": 0.0004832167832167833, |
| "step": 4196, |
| "tokens_trained": 0.398838432 |
| }, |
| { |
| "epoch": 1.1909219858156028, |
| "grad_norm": 0.8102120161056519, |
| "loss": 5.0068, |
| "lr": 0.00048293706293706297, |
| "step": 4198, |
| "tokens_trained": 0.399027968 |
| }, |
| { |
| "epoch": 1.1914893617021276, |
| "grad_norm": 0.8520330190658569, |
| "loss": 5.0102, |
| "lr": 0.00048265734265734266, |
| "step": 4200, |
| "tokens_trained": 0.3992202 |
| }, |
| { |
| "epoch": 1.1920567375886524, |
| "grad_norm": 0.8204303979873657, |
| "loss": 5.0303, |
| "lr": 0.0004823776223776224, |
| "step": 4202, |
| "tokens_trained": 0.399411656 |
| }, |
| { |
| "epoch": 1.1926241134751774, |
| "grad_norm": 0.8569766879081726, |
| "loss": 5.0097, |
| "lr": 0.0004820979020979021, |
| "step": 4204, |
| "tokens_trained": 0.399602136 |
| }, |
| { |
| "epoch": 1.1931914893617022, |
| "grad_norm": 0.8269557952880859, |
| "loss": 4.9694, |
| "lr": 0.00048181818181818184, |
| "step": 4206, |
| "tokens_trained": 0.399793544 |
| }, |
| { |
| "epoch": 1.193758865248227, |
| "grad_norm": 0.9124187231063843, |
| "loss": 4.9506, |
| "lr": 0.0004815384615384615, |
| "step": 4208, |
| "tokens_trained": 0.399982856 |
| }, |
| { |
| "epoch": 1.1943262411347517, |
| "grad_norm": 0.8813201189041138, |
| "loss": 4.9989, |
| "lr": 0.00048125874125874127, |
| "step": 4210, |
| "tokens_trained": 0.400173184 |
| }, |
| { |
| "epoch": 1.1948936170212765, |
| "grad_norm": 0.8605351448059082, |
| "loss": 5.0437, |
| "lr": 0.00048097902097902096, |
| "step": 4212, |
| "tokens_trained": 0.400363824 |
| }, |
| { |
| "epoch": 1.1954609929078015, |
| "grad_norm": 0.8277431726455688, |
| "loss": 5.0283, |
| "lr": 0.00048069930069930076, |
| "step": 4214, |
| "tokens_trained": 0.400554648 |
| }, |
| { |
| "epoch": 1.1960283687943263, |
| "grad_norm": 0.828187108039856, |
| "loss": 5.0573, |
| "lr": 0.00048041958041958045, |
| "step": 4216, |
| "tokens_trained": 0.400746632 |
| }, |
| { |
| "epoch": 1.196595744680851, |
| "grad_norm": 0.8459845781326294, |
| "loss": 5.0734, |
| "lr": 0.00048013986013986014, |
| "step": 4218, |
| "tokens_trained": 0.400937568 |
| }, |
| { |
| "epoch": 1.1971631205673758, |
| "grad_norm": 0.7948288321495056, |
| "loss": 5.011, |
| "lr": 0.0004798601398601399, |
| "step": 4220, |
| "tokens_trained": 0.401127024 |
| }, |
| { |
| "epoch": 1.1977304964539006, |
| "grad_norm": 0.8868036866188049, |
| "loss": 5.0318, |
| "lr": 0.0004795804195804196, |
| "step": 4222, |
| "tokens_trained": 0.401318248 |
| }, |
| { |
| "epoch": 1.1982978723404256, |
| "grad_norm": 0.7660478353500366, |
| "loss": 5.0656, |
| "lr": 0.0004793006993006993, |
| "step": 4224, |
| "tokens_trained": 0.401506136 |
| }, |
| { |
| "epoch": 1.1988652482269504, |
| "grad_norm": 0.779299259185791, |
| "loss": 4.9907, |
| "lr": 0.000479020979020979, |
| "step": 4226, |
| "tokens_trained": 0.401696856 |
| }, |
| { |
| "epoch": 1.1994326241134752, |
| "grad_norm": 0.7903150916099548, |
| "loss": 4.9744, |
| "lr": 0.00047874125874125875, |
| "step": 4228, |
| "tokens_trained": 0.401885744 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.7829038500785828, |
| "loss": 4.9847, |
| "lr": 0.00047846153846153844, |
| "step": 4230, |
| "tokens_trained": 0.402075072 |
| }, |
| { |
| "epoch": 1.2005673758865247, |
| "grad_norm": 0.9025991559028625, |
| "loss": 4.9758, |
| "lr": 0.00047818181818181824, |
| "step": 4232, |
| "tokens_trained": 0.4022674 |
| }, |
| { |
| "epoch": 1.2011347517730497, |
| "grad_norm": 0.8891049027442932, |
| "loss": 4.9791, |
| "lr": 0.00047790209790209793, |
| "step": 4234, |
| "tokens_trained": 0.402459792 |
| }, |
| { |
| "epoch": 1.2017021276595745, |
| "grad_norm": 0.7566952109336853, |
| "loss": 5.0183, |
| "lr": 0.0004776223776223776, |
| "step": 4236, |
| "tokens_trained": 0.402649768 |
| }, |
| { |
| "epoch": 1.2022695035460993, |
| "grad_norm": 0.80048668384552, |
| "loss": 4.9493, |
| "lr": 0.00047734265734265737, |
| "step": 4238, |
| "tokens_trained": 0.4028382 |
| }, |
| { |
| "epoch": 1.202836879432624, |
| "grad_norm": 0.7540125250816345, |
| "loss": 4.9685, |
| "lr": 0.00047706293706293706, |
| "step": 4240, |
| "tokens_trained": 0.403028848 |
| }, |
| { |
| "epoch": 1.2034042553191489, |
| "grad_norm": 0.7707799673080444, |
| "loss": 4.984, |
| "lr": 0.0004767832167832168, |
| "step": 4242, |
| "tokens_trained": 0.40321844 |
| }, |
| { |
| "epoch": 1.2039716312056739, |
| "grad_norm": 0.7681775093078613, |
| "loss": 4.9807, |
| "lr": 0.0004765034965034965, |
| "step": 4244, |
| "tokens_trained": 0.40340716 |
| }, |
| { |
| "epoch": 1.2045390070921986, |
| "grad_norm": 0.7557908892631531, |
| "loss": 4.9912, |
| "lr": 0.00047622377622377624, |
| "step": 4246, |
| "tokens_trained": 0.403600152 |
| }, |
| { |
| "epoch": 1.2051063829787234, |
| "grad_norm": 0.822948694229126, |
| "loss": 5.0144, |
| "lr": 0.00047594405594405593, |
| "step": 4248, |
| "tokens_trained": 0.403788 |
| }, |
| { |
| "epoch": 1.2056737588652482, |
| "grad_norm": 0.7625008225440979, |
| "loss": 4.8949, |
| "lr": 0.00047566433566433573, |
| "step": 4250, |
| "tokens_trained": 0.40397872 |
| }, |
| { |
| "epoch": 1.2056737588652482, |
| "eval_loss": 5.00390625, |
| "eval_runtime": 20.2421, |
| "step": 4250, |
| "tokens_trained": 0.40397872 |
| }, |
| { |
| "epoch": 1.206241134751773, |
| "grad_norm": 0.7532864212989807, |
| "loss": 5.0128, |
| "lr": 0.0004753846153846154, |
| "step": 4252, |
| "tokens_trained": 0.404169384 |
| }, |
| { |
| "epoch": 1.206808510638298, |
| "grad_norm": 0.69386887550354, |
| "loss": 4.9849, |
| "lr": 0.0004751048951048951, |
| "step": 4254, |
| "tokens_trained": 0.40435968 |
| }, |
| { |
| "epoch": 1.2073758865248228, |
| "grad_norm": 0.7845306992530823, |
| "loss": 5.0254, |
| "lr": 0.00047482517482517485, |
| "step": 4256, |
| "tokens_trained": 0.404549424 |
| }, |
| { |
| "epoch": 1.2079432624113475, |
| "grad_norm": 0.8036428093910217, |
| "loss": 4.9676, |
| "lr": 0.00047454545454545454, |
| "step": 4258, |
| "tokens_trained": 0.404739344 |
| }, |
| { |
| "epoch": 1.2085106382978723, |
| "grad_norm": 0.8440237045288086, |
| "loss": 4.9965, |
| "lr": 0.0004742657342657343, |
| "step": 4260, |
| "tokens_trained": 0.40492952 |
| }, |
| { |
| "epoch": 1.209078014184397, |
| "grad_norm": 0.7936769127845764, |
| "loss": 5.0458, |
| "lr": 0.000473986013986014, |
| "step": 4262, |
| "tokens_trained": 0.405117144 |
| }, |
| { |
| "epoch": 1.2096453900709219, |
| "grad_norm": 0.8117086291313171, |
| "loss": 5.0196, |
| "lr": 0.0004737062937062937, |
| "step": 4264, |
| "tokens_trained": 0.405310184 |
| }, |
| { |
| "epoch": 1.2102127659574469, |
| "grad_norm": 0.7395413517951965, |
| "loss": 4.9655, |
| "lr": 0.0004734265734265734, |
| "step": 4266, |
| "tokens_trained": 0.405498272 |
| }, |
| { |
| "epoch": 1.2107801418439716, |
| "grad_norm": 0.8879559636116028, |
| "loss": 4.9637, |
| "lr": 0.0004731468531468531, |
| "step": 4268, |
| "tokens_trained": 0.4056908 |
| }, |
| { |
| "epoch": 1.2113475177304964, |
| "grad_norm": 0.8651279211044312, |
| "loss": 4.945, |
| "lr": 0.0004728671328671329, |
| "step": 4270, |
| "tokens_trained": 0.405879384 |
| }, |
| { |
| "epoch": 1.2119148936170212, |
| "grad_norm": 0.8421851992607117, |
| "loss": 4.9391, |
| "lr": 0.0004725874125874126, |
| "step": 4272, |
| "tokens_trained": 0.406071432 |
| }, |
| { |
| "epoch": 1.2124822695035462, |
| "grad_norm": 0.815262496471405, |
| "loss": 5.0465, |
| "lr": 0.00047230769230769234, |
| "step": 4274, |
| "tokens_trained": 0.406262776 |
| }, |
| { |
| "epoch": 1.213049645390071, |
| "grad_norm": 0.8042894005775452, |
| "loss": 4.8908, |
| "lr": 0.00047202797202797203, |
| "step": 4276, |
| "tokens_trained": 0.406452656 |
| }, |
| { |
| "epoch": 1.2136170212765958, |
| "grad_norm": 0.8514822721481323, |
| "loss": 4.9961, |
| "lr": 0.00047174825174825177, |
| "step": 4278, |
| "tokens_trained": 0.406642224 |
| }, |
| { |
| "epoch": 1.2141843971631205, |
| "grad_norm": 0.7532519102096558, |
| "loss": 4.9658, |
| "lr": 0.00047146853146853146, |
| "step": 4280, |
| "tokens_trained": 0.406830288 |
| }, |
| { |
| "epoch": 1.2147517730496453, |
| "grad_norm": 0.7978721261024475, |
| "loss": 4.9477, |
| "lr": 0.0004711888111888112, |
| "step": 4282, |
| "tokens_trained": 0.4070214 |
| }, |
| { |
| "epoch": 1.21531914893617, |
| "grad_norm": 0.8998175859451294, |
| "loss": 5.0531, |
| "lr": 0.0004709090909090909, |
| "step": 4284, |
| "tokens_trained": 0.407211064 |
| }, |
| { |
| "epoch": 1.215886524822695, |
| "grad_norm": 0.7281949520111084, |
| "loss": 4.9474, |
| "lr": 0.0004706293706293706, |
| "step": 4286, |
| "tokens_trained": 0.40740104 |
| }, |
| { |
| "epoch": 1.2164539007092199, |
| "grad_norm": 0.7590287923812866, |
| "loss": 5.0104, |
| "lr": 0.0004703496503496504, |
| "step": 4288, |
| "tokens_trained": 0.40759144 |
| }, |
| { |
| "epoch": 1.2170212765957447, |
| "grad_norm": 0.8452118039131165, |
| "loss": 5.024, |
| "lr": 0.0004700699300699301, |
| "step": 4290, |
| "tokens_trained": 0.407780576 |
| }, |
| { |
| "epoch": 1.2175886524822694, |
| "grad_norm": 0.8062863945960999, |
| "loss": 5.0099, |
| "lr": 0.0004697902097902098, |
| "step": 4292, |
| "tokens_trained": 0.407971808 |
| }, |
| { |
| "epoch": 1.2181560283687944, |
| "grad_norm": 0.8372058272361755, |
| "loss": 5.0832, |
| "lr": 0.0004695104895104895, |
| "step": 4294, |
| "tokens_trained": 0.408162104 |
| }, |
| { |
| "epoch": 1.2187234042553192, |
| "grad_norm": 0.7989845871925354, |
| "loss": 4.971, |
| "lr": 0.00046923076923076926, |
| "step": 4296, |
| "tokens_trained": 0.408351392 |
| }, |
| { |
| "epoch": 1.219290780141844, |
| "grad_norm": 0.7519237399101257, |
| "loss": 4.9739, |
| "lr": 0.00046895104895104895, |
| "step": 4298, |
| "tokens_trained": 0.408541056 |
| }, |
| { |
| "epoch": 1.2198581560283688, |
| "grad_norm": 0.769143283367157, |
| "loss": 4.9483, |
| "lr": 0.0004686713286713287, |
| "step": 4300, |
| "tokens_trained": 0.408731728 |
| }, |
| { |
| "epoch": 1.2204255319148936, |
| "grad_norm": 0.7855169177055359, |
| "loss": 5.0007, |
| "lr": 0.0004683916083916084, |
| "step": 4302, |
| "tokens_trained": 0.408921824 |
| }, |
| { |
| "epoch": 1.2209929078014183, |
| "grad_norm": 0.8531661629676819, |
| "loss": 5.018, |
| "lr": 0.00046811188811188807, |
| "step": 4304, |
| "tokens_trained": 0.409112528 |
| }, |
| { |
| "epoch": 1.2215602836879433, |
| "grad_norm": 0.8178502321243286, |
| "loss": 4.9869, |
| "lr": 0.00046783216783216787, |
| "step": 4306, |
| "tokens_trained": 0.40930284 |
| }, |
| { |
| "epoch": 1.2221276595744681, |
| "grad_norm": 0.7806143164634705, |
| "loss": 4.9561, |
| "lr": 0.00046755244755244756, |
| "step": 4308, |
| "tokens_trained": 0.409492304 |
| }, |
| { |
| "epoch": 1.222695035460993, |
| "grad_norm": 0.7506605982780457, |
| "loss": 4.937, |
| "lr": 0.0004672727272727273, |
| "step": 4310, |
| "tokens_trained": 0.409680208 |
| }, |
| { |
| "epoch": 1.2232624113475177, |
| "grad_norm": 0.8441674113273621, |
| "loss": 4.9137, |
| "lr": 0.000466993006993007, |
| "step": 4312, |
| "tokens_trained": 0.409869952 |
| }, |
| { |
| "epoch": 1.2238297872340427, |
| "grad_norm": 0.8911812901496887, |
| "loss": 5.0072, |
| "lr": 0.00046671328671328674, |
| "step": 4314, |
| "tokens_trained": 0.410058728 |
| }, |
| { |
| "epoch": 1.2243971631205675, |
| "grad_norm": 0.7732901573181152, |
| "loss": 4.9094, |
| "lr": 0.00046643356643356643, |
| "step": 4316, |
| "tokens_trained": 0.410249624 |
| }, |
| { |
| "epoch": 1.2249645390070922, |
| "grad_norm": 0.7372212409973145, |
| "loss": 4.9646, |
| "lr": 0.0004661538461538462, |
| "step": 4318, |
| "tokens_trained": 0.410440088 |
| }, |
| { |
| "epoch": 1.225531914893617, |
| "grad_norm": 0.8266177177429199, |
| "loss": 5.01, |
| "lr": 0.00046587412587412587, |
| "step": 4320, |
| "tokens_trained": 0.410630384 |
| }, |
| { |
| "epoch": 1.2260992907801418, |
| "grad_norm": 0.7471604347229004, |
| "loss": 4.9741, |
| "lr": 0.00046559440559440556, |
| "step": 4322, |
| "tokens_trained": 0.410819312 |
| }, |
| { |
| "epoch": 1.2266666666666666, |
| "grad_norm": 0.8529990911483765, |
| "loss": 5.0115, |
| "lr": 0.00046531468531468536, |
| "step": 4324, |
| "tokens_trained": 0.411007776 |
| }, |
| { |
| "epoch": 1.2272340425531916, |
| "grad_norm": 0.8250638246536255, |
| "loss": 4.9974, |
| "lr": 0.00046503496503496505, |
| "step": 4326, |
| "tokens_trained": 0.4111994 |
| }, |
| { |
| "epoch": 1.2278014184397164, |
| "grad_norm": 0.7049713730812073, |
| "loss": 4.9412, |
| "lr": 0.0004647552447552448, |
| "step": 4328, |
| "tokens_trained": 0.411387512 |
| }, |
| { |
| "epoch": 1.2283687943262411, |
| "grad_norm": 0.8164275884628296, |
| "loss": 4.9696, |
| "lr": 0.0004644755244755245, |
| "step": 4330, |
| "tokens_trained": 0.411579192 |
| }, |
| { |
| "epoch": 1.228936170212766, |
| "grad_norm": 0.786007821559906, |
| "loss": 4.9015, |
| "lr": 0.0004641958041958042, |
| "step": 4332, |
| "tokens_trained": 0.411769256 |
| }, |
| { |
| "epoch": 1.2295035460992907, |
| "grad_norm": 0.7956440448760986, |
| "loss": 4.9864, |
| "lr": 0.0004639160839160839, |
| "step": 4334, |
| "tokens_trained": 0.411958112 |
| }, |
| { |
| "epoch": 1.2300709219858157, |
| "grad_norm": 0.7968415021896362, |
| "loss": 5.0563, |
| "lr": 0.00046363636363636366, |
| "step": 4336, |
| "tokens_trained": 0.412148936 |
| }, |
| { |
| "epoch": 1.2306382978723405, |
| "grad_norm": 0.9666130542755127, |
| "loss": 4.9907, |
| "lr": 0.00046335664335664335, |
| "step": 4338, |
| "tokens_trained": 0.412337728 |
| }, |
| { |
| "epoch": 1.2312056737588652, |
| "grad_norm": 0.9147318005561829, |
| "loss": 5.0003, |
| "lr": 0.00046307692307692304, |
| "step": 4340, |
| "tokens_trained": 0.412527736 |
| }, |
| { |
| "epoch": 1.23177304964539, |
| "grad_norm": 0.7779629230499268, |
| "loss": 4.9392, |
| "lr": 0.00046279720279720284, |
| "step": 4342, |
| "tokens_trained": 0.412717944 |
| }, |
| { |
| "epoch": 1.2323404255319148, |
| "grad_norm": 0.8160842061042786, |
| "loss": 4.9644, |
| "lr": 0.00046251748251748253, |
| "step": 4344, |
| "tokens_trained": 0.412909288 |
| }, |
| { |
| "epoch": 1.2329078014184398, |
| "grad_norm": 0.8430790305137634, |
| "loss": 4.9472, |
| "lr": 0.0004622377622377623, |
| "step": 4346, |
| "tokens_trained": 0.413097912 |
| }, |
| { |
| "epoch": 1.2334751773049646, |
| "grad_norm": 0.8291404843330383, |
| "loss": 4.9647, |
| "lr": 0.00046195804195804196, |
| "step": 4348, |
| "tokens_trained": 0.413290272 |
| }, |
| { |
| "epoch": 1.2340425531914894, |
| "grad_norm": 0.8272704482078552, |
| "loss": 4.9685, |
| "lr": 0.0004616783216783217, |
| "step": 4350, |
| "tokens_trained": 0.41348152 |
| }, |
| { |
| "epoch": 1.2346099290780141, |
| "grad_norm": 0.7785531282424927, |
| "loss": 5.0172, |
| "lr": 0.0004613986013986014, |
| "step": 4352, |
| "tokens_trained": 0.413670184 |
| }, |
| { |
| "epoch": 1.235177304964539, |
| "grad_norm": 0.8512988090515137, |
| "loss": 4.9727, |
| "lr": 0.00046111888111888114, |
| "step": 4354, |
| "tokens_trained": 0.413860232 |
| }, |
| { |
| "epoch": 1.235744680851064, |
| "grad_norm": 0.7373901009559631, |
| "loss": 4.9092, |
| "lr": 0.00046083916083916083, |
| "step": 4356, |
| "tokens_trained": 0.414051312 |
| }, |
| { |
| "epoch": 1.2363120567375887, |
| "grad_norm": 0.7716902494430542, |
| "loss": 4.9456, |
| "lr": 0.0004605594405594405, |
| "step": 4358, |
| "tokens_trained": 0.414239448 |
| }, |
| { |
| "epoch": 1.2368794326241135, |
| "grad_norm": 0.8303737044334412, |
| "loss": 4.9656, |
| "lr": 0.0004602797202797203, |
| "step": 4360, |
| "tokens_trained": 0.414430488 |
| }, |
| { |
| "epoch": 1.2374468085106383, |
| "grad_norm": 0.850261926651001, |
| "loss": 4.9407, |
| "lr": 0.00046, |
| "step": 4362, |
| "tokens_trained": 0.414620536 |
| }, |
| { |
| "epoch": 1.238014184397163, |
| "grad_norm": 0.8391888737678528, |
| "loss": 4.9772, |
| "lr": 0.00045972027972027976, |
| "step": 4364, |
| "tokens_trained": 0.4148106 |
| }, |
| { |
| "epoch": 1.2385815602836878, |
| "grad_norm": 0.8289617300033569, |
| "loss": 5.0061, |
| "lr": 0.00045944055944055945, |
| "step": 4366, |
| "tokens_trained": 0.414998608 |
| }, |
| { |
| "epoch": 1.2391489361702128, |
| "grad_norm": 0.801800549030304, |
| "loss": 5.0436, |
| "lr": 0.0004591608391608392, |
| "step": 4368, |
| "tokens_trained": 0.415190568 |
| }, |
| { |
| "epoch": 1.2397163120567376, |
| "grad_norm": 0.8448522686958313, |
| "loss": 4.9398, |
| "lr": 0.0004588811188811189, |
| "step": 4370, |
| "tokens_trained": 0.415378536 |
| }, |
| { |
| "epoch": 1.2402836879432624, |
| "grad_norm": 0.8992466330528259, |
| "loss": 4.9277, |
| "lr": 0.0004586013986013986, |
| "step": 4372, |
| "tokens_trained": 0.4155704 |
| }, |
| { |
| "epoch": 1.2408510638297872, |
| "grad_norm": 0.8534346222877502, |
| "loss": 4.8933, |
| "lr": 0.0004583216783216783, |
| "step": 4374, |
| "tokens_trained": 0.41575984 |
| }, |
| { |
| "epoch": 1.2411347517730495, |
| "eval_loss": 4.997620582580566, |
| "eval_runtime": 20.4786, |
| "step": 4375, |
| "tokens_trained": 0.415855704 |
| }, |
| { |
| "epoch": 1.2414184397163122, |
| "grad_norm": 0.8547607064247131, |
| "loss": 5.0331, |
| "lr": 0.000458041958041958, |
| "step": 4376, |
| "tokens_trained": 0.415951704 |
| }, |
| { |
| "epoch": 1.241985815602837, |
| "grad_norm": 0.7995121479034424, |
| "loss": 4.9727, |
| "lr": 0.0004577622377622378, |
| "step": 4378, |
| "tokens_trained": 0.416142464 |
| }, |
| { |
| "epoch": 1.2425531914893617, |
| "grad_norm": 0.7953593730926514, |
| "loss": 5.054, |
| "lr": 0.0004574825174825175, |
| "step": 4380, |
| "tokens_trained": 0.416331184 |
| }, |
| { |
| "epoch": 1.2431205673758865, |
| "grad_norm": 0.8307169079780579, |
| "loss": 4.9694, |
| "lr": 0.00045720279720279724, |
| "step": 4382, |
| "tokens_trained": 0.416522688 |
| }, |
| { |
| "epoch": 1.2436879432624113, |
| "grad_norm": 0.8380933403968811, |
| "loss": 4.9432, |
| "lr": 0.00045692307692307693, |
| "step": 4384, |
| "tokens_trained": 0.416712408 |
| }, |
| { |
| "epoch": 1.244255319148936, |
| "grad_norm": 0.8354132771492004, |
| "loss": 4.9649, |
| "lr": 0.0004566433566433567, |
| "step": 4386, |
| "tokens_trained": 0.416902056 |
| }, |
| { |
| "epoch": 1.244822695035461, |
| "grad_norm": 0.8815358877182007, |
| "loss": 4.9998, |
| "lr": 0.00045636363636363637, |
| "step": 4388, |
| "tokens_trained": 0.417090856 |
| }, |
| { |
| "epoch": 1.2453900709219858, |
| "grad_norm": 0.8799077868461609, |
| "loss": 4.984, |
| "lr": 0.00045608391608391606, |
| "step": 4390, |
| "tokens_trained": 0.417281408 |
| }, |
| { |
| "epoch": 1.2459574468085106, |
| "grad_norm": 0.9041373133659363, |
| "loss": 4.9209, |
| "lr": 0.0004558041958041958, |
| "step": 4392, |
| "tokens_trained": 0.41747192 |
| }, |
| { |
| "epoch": 1.2465248226950354, |
| "grad_norm": 0.8234816193580627, |
| "loss": 5.022, |
| "lr": 0.0004555244755244755, |
| "step": 4394, |
| "tokens_trained": 0.41766064 |
| }, |
| { |
| "epoch": 1.2470921985815604, |
| "grad_norm": 0.8067740797996521, |
| "loss": 5.0255, |
| "lr": 0.00045524475524475524, |
| "step": 4396, |
| "tokens_trained": 0.417852816 |
| }, |
| { |
| "epoch": 1.2476595744680852, |
| "grad_norm": 0.812566876411438, |
| "loss": 4.9524, |
| "lr": 0.000454965034965035, |
| "step": 4398, |
| "tokens_trained": 0.418043848 |
| }, |
| { |
| "epoch": 1.24822695035461, |
| "grad_norm": 0.7977521419525146, |
| "loss": 5.023, |
| "lr": 0.0004546853146853147, |
| "step": 4400, |
| "tokens_trained": 0.418234224 |
| }, |
| { |
| "epoch": 1.2487943262411347, |
| "grad_norm": 0.7514439225196838, |
| "loss": 4.9909, |
| "lr": 0.0004544055944055944, |
| "step": 4402, |
| "tokens_trained": 0.418424576 |
| }, |
| { |
| "epoch": 1.2493617021276595, |
| "grad_norm": 0.7931577563285828, |
| "loss": 5.0128, |
| "lr": 0.00045412587412587416, |
| "step": 4404, |
| "tokens_trained": 0.418616776 |
| }, |
| { |
| "epoch": 1.2499290780141843, |
| "grad_norm": 0.787543773651123, |
| "loss": 4.9616, |
| "lr": 0.00045384615384615385, |
| "step": 4406, |
| "tokens_trained": 0.418805744 |
| }, |
| { |
| "epoch": 1.2504964539007093, |
| "grad_norm": 0.7384114861488342, |
| "loss": 5.0641, |
| "lr": 0.00045356643356643354, |
| "step": 4408, |
| "tokens_trained": 0.418997784 |
| }, |
| { |
| "epoch": 1.251063829787234, |
| "grad_norm": 0.8014666438102722, |
| "loss": 4.9652, |
| "lr": 0.0004532867132867133, |
| "step": 4410, |
| "tokens_trained": 0.419187464 |
| }, |
| { |
| "epoch": 1.2516312056737589, |
| "grad_norm": 0.7648611068725586, |
| "loss": 4.9813, |
| "lr": 0.000453006993006993, |
| "step": 4412, |
| "tokens_trained": 0.419376864 |
| }, |
| { |
| "epoch": 1.2521985815602836, |
| "grad_norm": 0.7647461891174316, |
| "loss": 5.0052, |
| "lr": 0.0004527272727272727, |
| "step": 4414, |
| "tokens_trained": 0.419568352 |
| }, |
| { |
| "epoch": 1.2527659574468086, |
| "grad_norm": 0.7152479887008667, |
| "loss": 4.9851, |
| "lr": 0.00045244755244755247, |
| "step": 4416, |
| "tokens_trained": 0.419759464 |
| }, |
| { |
| "epoch": 1.2533333333333334, |
| "grad_norm": 0.7977505326271057, |
| "loss": 5.0082, |
| "lr": 0.0004521678321678322, |
| "step": 4418, |
| "tokens_trained": 0.419951 |
| }, |
| { |
| "epoch": 1.2539007092198582, |
| "grad_norm": 0.7556982040405273, |
| "loss": 5.0207, |
| "lr": 0.0004518881118881119, |
| "step": 4420, |
| "tokens_trained": 0.420141312 |
| }, |
| { |
| "epoch": 1.254468085106383, |
| "grad_norm": 0.8059271574020386, |
| "loss": 5.0286, |
| "lr": 0.00045160839160839165, |
| "step": 4422, |
| "tokens_trained": 0.420330672 |
| }, |
| { |
| "epoch": 1.2550354609929077, |
| "grad_norm": 0.836380660533905, |
| "loss": 4.9406, |
| "lr": 0.00045132867132867134, |
| "step": 4424, |
| "tokens_trained": 0.420519952 |
| }, |
| { |
| "epoch": 1.2556028368794325, |
| "grad_norm": 0.7693254947662354, |
| "loss": 4.9533, |
| "lr": 0.000451048951048951, |
| "step": 4426, |
| "tokens_trained": 0.42070948 |
| }, |
| { |
| "epoch": 1.2561702127659575, |
| "grad_norm": 0.8241584897041321, |
| "loss": 5.0407, |
| "lr": 0.00045076923076923077, |
| "step": 4428, |
| "tokens_trained": 0.420899504 |
| }, |
| { |
| "epoch": 1.2567375886524823, |
| "grad_norm": 0.7866604328155518, |
| "loss": 4.9119, |
| "lr": 0.00045048951048951046, |
| "step": 4430, |
| "tokens_trained": 0.421088352 |
| }, |
| { |
| "epoch": 1.257304964539007, |
| "grad_norm": 0.8286674618721008, |
| "loss": 5.016, |
| "lr": 0.0004502097902097902, |
| "step": 4432, |
| "tokens_trained": 0.421277528 |
| }, |
| { |
| "epoch": 1.2578723404255319, |
| "grad_norm": 0.7921491265296936, |
| "loss": 4.9991, |
| "lr": 0.00044993006993006995, |
| "step": 4434, |
| "tokens_trained": 0.421468272 |
| }, |
| { |
| "epoch": 1.2584397163120569, |
| "grad_norm": 0.807640016078949, |
| "loss": 5.042, |
| "lr": 0.0004496503496503497, |
| "step": 4436, |
| "tokens_trained": 0.421658096 |
| }, |
| { |
| "epoch": 1.2590070921985816, |
| "grad_norm": 0.7414442896842957, |
| "loss": 4.9647, |
| "lr": 0.0004493706293706294, |
| "step": 4438, |
| "tokens_trained": 0.421849712 |
| }, |
| { |
| "epoch": 1.2595744680851064, |
| "grad_norm": 0.8236945867538452, |
| "loss": 4.9562, |
| "lr": 0.00044909090909090913, |
| "step": 4440, |
| "tokens_trained": 0.422038344 |
| }, |
| { |
| "epoch": 1.2601418439716312, |
| "grad_norm": 0.7859675884246826, |
| "loss": 4.9568, |
| "lr": 0.0004488111888111888, |
| "step": 4442, |
| "tokens_trained": 0.422227928 |
| }, |
| { |
| "epoch": 1.260709219858156, |
| "grad_norm": 0.7467136383056641, |
| "loss": 4.9543, |
| "lr": 0.0004485314685314685, |
| "step": 4444, |
| "tokens_trained": 0.422415664 |
| }, |
| { |
| "epoch": 1.2612765957446808, |
| "grad_norm": 0.711588978767395, |
| "loss": 5.0494, |
| "lr": 0.00044825174825174826, |
| "step": 4446, |
| "tokens_trained": 0.422606696 |
| }, |
| { |
| "epoch": 1.2618439716312055, |
| "grad_norm": 0.750599205493927, |
| "loss": 4.9878, |
| "lr": 0.00044797202797202795, |
| "step": 4448, |
| "tokens_trained": 0.422796416 |
| }, |
| { |
| "epoch": 1.2624113475177305, |
| "grad_norm": 0.7823654413223267, |
| "loss": 4.947, |
| "lr": 0.0004476923076923077, |
| "step": 4450, |
| "tokens_trained": 0.422986968 |
| }, |
| { |
| "epoch": 1.2629787234042553, |
| "grad_norm": 0.8101715445518494, |
| "loss": 4.925, |
| "lr": 0.00044741258741258744, |
| "step": 4452, |
| "tokens_trained": 0.423174384 |
| }, |
| { |
| "epoch": 1.26354609929078, |
| "grad_norm": 0.8134462237358093, |
| "loss": 5.051, |
| "lr": 0.0004471328671328672, |
| "step": 4454, |
| "tokens_trained": 0.42336536 |
| }, |
| { |
| "epoch": 1.264113475177305, |
| "grad_norm": 0.8446463942527771, |
| "loss": 4.9789, |
| "lr": 0.00044685314685314687, |
| "step": 4456, |
| "tokens_trained": 0.423556136 |
| }, |
| { |
| "epoch": 1.2646808510638299, |
| "grad_norm": 0.7812824845314026, |
| "loss": 4.9819, |
| "lr": 0.0004465734265734266, |
| "step": 4458, |
| "tokens_trained": 0.423745736 |
| }, |
| { |
| "epoch": 1.2652482269503547, |
| "grad_norm": 0.7645587921142578, |
| "loss": 4.9824, |
| "lr": 0.0004462937062937063, |
| "step": 4460, |
| "tokens_trained": 0.423935408 |
| }, |
| { |
| "epoch": 1.2658156028368794, |
| "grad_norm": 0.8110623955726624, |
| "loss": 4.9664, |
| "lr": 0.000446013986013986, |
| "step": 4462, |
| "tokens_trained": 0.424125264 |
| }, |
| { |
| "epoch": 1.2663829787234042, |
| "grad_norm": 0.7860397696495056, |
| "loss": 4.9871, |
| "lr": 0.00044573426573426574, |
| "step": 4464, |
| "tokens_trained": 0.424314544 |
| }, |
| { |
| "epoch": 1.266950354609929, |
| "grad_norm": 0.7764657735824585, |
| "loss": 5.0335, |
| "lr": 0.00044545454545454543, |
| "step": 4466, |
| "tokens_trained": 0.424502264 |
| }, |
| { |
| "epoch": 1.2675177304964538, |
| "grad_norm": 0.7725886702537537, |
| "loss": 4.9705, |
| "lr": 0.0004451748251748252, |
| "step": 4468, |
| "tokens_trained": 0.424691888 |
| }, |
| { |
| "epoch": 1.2680851063829788, |
| "grad_norm": 0.8336632251739502, |
| "loss": 5.0535, |
| "lr": 0.0004448951048951049, |
| "step": 4470, |
| "tokens_trained": 0.424880992 |
| }, |
| { |
| "epoch": 1.2686524822695036, |
| "grad_norm": 0.7934354543685913, |
| "loss": 5.0105, |
| "lr": 0.00044461538461538466, |
| "step": 4472, |
| "tokens_trained": 0.425069536 |
| }, |
| { |
| "epoch": 1.2692198581560283, |
| "grad_norm": 0.7649230360984802, |
| "loss": 4.978, |
| "lr": 0.00044433566433566435, |
| "step": 4474, |
| "tokens_trained": 0.425259168 |
| }, |
| { |
| "epoch": 1.2697872340425531, |
| "grad_norm": 0.7798753976821899, |
| "loss": 5.0526, |
| "lr": 0.0004440559440559441, |
| "step": 4476, |
| "tokens_trained": 0.425450064 |
| }, |
| { |
| "epoch": 1.2703546099290781, |
| "grad_norm": 0.7455066442489624, |
| "loss": 4.9914, |
| "lr": 0.0004437762237762238, |
| "step": 4478, |
| "tokens_trained": 0.42564068 |
| }, |
| { |
| "epoch": 1.270921985815603, |
| "grad_norm": 0.7951638698577881, |
| "loss": 5.0092, |
| "lr": 0.0004434965034965035, |
| "step": 4480, |
| "tokens_trained": 0.42583048 |
| }, |
| { |
| "epoch": 1.2714893617021277, |
| "grad_norm": 0.7585451602935791, |
| "loss": 5.016, |
| "lr": 0.0004432167832167832, |
| "step": 4482, |
| "tokens_trained": 0.42602172 |
| }, |
| { |
| "epoch": 1.2720567375886525, |
| "grad_norm": 0.8267669081687927, |
| "loss": 4.972, |
| "lr": 0.0004429370629370629, |
| "step": 4484, |
| "tokens_trained": 0.426212496 |
| }, |
| { |
| "epoch": 1.2726241134751772, |
| "grad_norm": 0.7738245129585266, |
| "loss": 5.0239, |
| "lr": 0.00044265734265734266, |
| "step": 4486, |
| "tokens_trained": 0.426401408 |
| }, |
| { |
| "epoch": 1.273191489361702, |
| "grad_norm": 0.9146332144737244, |
| "loss": 5.0361, |
| "lr": 0.0004423776223776224, |
| "step": 4488, |
| "tokens_trained": 0.426591056 |
| }, |
| { |
| "epoch": 1.273758865248227, |
| "grad_norm": 0.8278553485870361, |
| "loss": 4.9512, |
| "lr": 0.00044209790209790215, |
| "step": 4490, |
| "tokens_trained": 0.42678144 |
| }, |
| { |
| "epoch": 1.2743262411347518, |
| "grad_norm": 0.7594732046127319, |
| "loss": 4.9391, |
| "lr": 0.00044181818181818184, |
| "step": 4492, |
| "tokens_trained": 0.426971472 |
| }, |
| { |
| "epoch": 1.2748936170212766, |
| "grad_norm": 0.8350242376327515, |
| "loss": 4.9151, |
| "lr": 0.00044153846153846153, |
| "step": 4494, |
| "tokens_trained": 0.427161504 |
| }, |
| { |
| "epoch": 1.2754609929078013, |
| "grad_norm": 0.85927414894104, |
| "loss": 4.9303, |
| "lr": 0.0004412587412587413, |
| "step": 4496, |
| "tokens_trained": 0.427351448 |
| }, |
| { |
| "epoch": 1.2760283687943264, |
| "grad_norm": 0.8133000135421753, |
| "loss": 4.9668, |
| "lr": 0.00044097902097902096, |
| "step": 4498, |
| "tokens_trained": 0.427539384 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "grad_norm": 0.7529495358467102, |
| "loss": 4.9364, |
| "lr": 0.0004406993006993007, |
| "step": 4500, |
| "tokens_trained": 0.427730552 |
| }, |
| { |
| "epoch": 1.2765957446808511, |
| "eval_loss": 4.999549388885498, |
| "eval_runtime": 20.6764, |
| "step": 4500, |
| "tokens_trained": 0.427730552 |
| }, |
| { |
| "epoch": 1.277163120567376, |
| "grad_norm": 0.7332281470298767, |
| "loss": 5.024, |
| "lr": 0.0004404195804195804, |
| "step": 4502, |
| "tokens_trained": 0.427922016 |
| }, |
| { |
| "epoch": 1.2777304964539007, |
| "grad_norm": 0.7735735774040222, |
| "loss": 4.9235, |
| "lr": 0.00044013986013986014, |
| "step": 4504, |
| "tokens_trained": 0.428112824 |
| }, |
| { |
| "epoch": 1.2782978723404255, |
| "grad_norm": 0.8075562119483948, |
| "loss": 5.0712, |
| "lr": 0.0004398601398601399, |
| "step": 4506, |
| "tokens_trained": 0.428306056 |
| }, |
| { |
| "epoch": 1.2788652482269502, |
| "grad_norm": 0.8019667863845825, |
| "loss": 4.9597, |
| "lr": 0.00043958041958041963, |
| "step": 4508, |
| "tokens_trained": 0.428496768 |
| }, |
| { |
| "epoch": 1.2794326241134752, |
| "grad_norm": 0.7908930778503418, |
| "loss": 4.9471, |
| "lr": 0.0004393006993006993, |
| "step": 4510, |
| "tokens_trained": 0.428685312 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.8128061890602112, |
| "loss": 4.9244, |
| "lr": 0.000439020979020979, |
| "step": 4512, |
| "tokens_trained": 0.428875184 |
| }, |
| { |
| "epoch": 1.2805673758865248, |
| "grad_norm": 0.7859349250793457, |
| "loss": 5.0096, |
| "lr": 0.00043874125874125876, |
| "step": 4514, |
| "tokens_trained": 0.429066688 |
| }, |
| { |
| "epoch": 1.2811347517730496, |
| "grad_norm": 0.7396280169487, |
| "loss": 4.9263, |
| "lr": 0.00043846153846153845, |
| "step": 4516, |
| "tokens_trained": 0.429254336 |
| }, |
| { |
| "epoch": 1.2817021276595746, |
| "grad_norm": 0.8057092428207397, |
| "loss": 4.9705, |
| "lr": 0.0004381818181818182, |
| "step": 4518, |
| "tokens_trained": 0.429446032 |
| }, |
| { |
| "epoch": 1.2822695035460994, |
| "grad_norm": 0.8460845351219177, |
| "loss": 4.9311, |
| "lr": 0.0004379020979020979, |
| "step": 4520, |
| "tokens_trained": 0.429636152 |
| }, |
| { |
| "epoch": 1.2828368794326241, |
| "grad_norm": 0.7627289891242981, |
| "loss": 4.9622, |
| "lr": 0.00043762237762237763, |
| "step": 4522, |
| "tokens_trained": 0.429825536 |
| }, |
| { |
| "epoch": 1.283404255319149, |
| "grad_norm": 0.7211505174636841, |
| "loss": 4.9851, |
| "lr": 0.0004373426573426573, |
| "step": 4524, |
| "tokens_trained": 0.430016616 |
| }, |
| { |
| "epoch": 1.2839716312056737, |
| "grad_norm": 0.7647969722747803, |
| "loss": 4.9708, |
| "lr": 0.0004370629370629371, |
| "step": 4526, |
| "tokens_trained": 0.430208336 |
| }, |
| { |
| "epoch": 1.2845390070921985, |
| "grad_norm": 0.7541454434394836, |
| "loss": 4.9404, |
| "lr": 0.0004367832167832168, |
| "step": 4528, |
| "tokens_trained": 0.430398968 |
| }, |
| { |
| "epoch": 1.2851063829787235, |
| "grad_norm": 0.7825188636779785, |
| "loss": 4.9741, |
| "lr": 0.0004365034965034965, |
| "step": 4530, |
| "tokens_trained": 0.430589112 |
| }, |
| { |
| "epoch": 1.2856737588652483, |
| "grad_norm": 0.7198429107666016, |
| "loss": 4.9745, |
| "lr": 0.00043622377622377624, |
| "step": 4532, |
| "tokens_trained": 0.43077964 |
| }, |
| { |
| "epoch": 1.286241134751773, |
| "grad_norm": 0.7174004912376404, |
| "loss": 5.037, |
| "lr": 0.00043594405594405593, |
| "step": 4534, |
| "tokens_trained": 0.43096964 |
| }, |
| { |
| "epoch": 1.2868085106382978, |
| "grad_norm": 0.7118927240371704, |
| "loss": 5.0456, |
| "lr": 0.0004356643356643357, |
| "step": 4536, |
| "tokens_trained": 0.431160024 |
| }, |
| { |
| "epoch": 1.2873758865248228, |
| "grad_norm": 0.7081615924835205, |
| "loss": 4.9763, |
| "lr": 0.00043538461538461537, |
| "step": 4538, |
| "tokens_trained": 0.431351344 |
| }, |
| { |
| "epoch": 1.2879432624113476, |
| "grad_norm": 0.7620618343353271, |
| "loss": 4.9863, |
| "lr": 0.0004351048951048951, |
| "step": 4540, |
| "tokens_trained": 0.43154232 |
| }, |
| { |
| "epoch": 1.2885106382978724, |
| "grad_norm": 0.8104450702667236, |
| "loss": 4.9903, |
| "lr": 0.0004348251748251748, |
| "step": 4542, |
| "tokens_trained": 0.431731592 |
| }, |
| { |
| "epoch": 1.2890780141843972, |
| "grad_norm": 0.7488150000572205, |
| "loss": 5.0189, |
| "lr": 0.0004345454545454546, |
| "step": 4544, |
| "tokens_trained": 0.431922608 |
| }, |
| { |
| "epoch": 1.289645390070922, |
| "grad_norm": 0.7956752181053162, |
| "loss": 4.9259, |
| "lr": 0.0004342657342657343, |
| "step": 4546, |
| "tokens_trained": 0.432113808 |
| }, |
| { |
| "epoch": 1.2902127659574467, |
| "grad_norm": 0.7799624800682068, |
| "loss": 5.0129, |
| "lr": 0.000433986013986014, |
| "step": 4548, |
| "tokens_trained": 0.432304088 |
| }, |
| { |
| "epoch": 1.2907801418439715, |
| "grad_norm": 0.792834997177124, |
| "loss": 5.0647, |
| "lr": 0.0004337062937062937, |
| "step": 4550, |
| "tokens_trained": 0.432493096 |
| }, |
| { |
| "epoch": 1.2913475177304965, |
| "grad_norm": 0.7479969263076782, |
| "loss": 4.9514, |
| "lr": 0.0004334265734265734, |
| "step": 4552, |
| "tokens_trained": 0.432680128 |
| }, |
| { |
| "epoch": 1.2919148936170213, |
| "grad_norm": 0.7381340861320496, |
| "loss": 4.9865, |
| "lr": 0.00043314685314685316, |
| "step": 4554, |
| "tokens_trained": 0.43287188 |
| }, |
| { |
| "epoch": 1.292482269503546, |
| "grad_norm": 0.7690939903259277, |
| "loss": 4.9704, |
| "lr": 0.00043286713286713285, |
| "step": 4556, |
| "tokens_trained": 0.43306148 |
| }, |
| { |
| "epoch": 1.293049645390071, |
| "grad_norm": 0.7883870005607605, |
| "loss": 4.9766, |
| "lr": 0.0004325874125874126, |
| "step": 4558, |
| "tokens_trained": 0.433252704 |
| }, |
| { |
| "epoch": 1.2936170212765958, |
| "grad_norm": 0.782208263874054, |
| "loss": 4.9967, |
| "lr": 0.0004323076923076923, |
| "step": 4560, |
| "tokens_trained": 0.433444272 |
| }, |
| { |
| "epoch": 1.2941843971631206, |
| "grad_norm": 0.7333335280418396, |
| "loss": 4.9237, |
| "lr": 0.0004320279720279721, |
| "step": 4562, |
| "tokens_trained": 0.433634264 |
| }, |
| { |
| "epoch": 1.2947517730496454, |
| "grad_norm": 0.7663769721984863, |
| "loss": 4.9961, |
| "lr": 0.0004317482517482518, |
| "step": 4564, |
| "tokens_trained": 0.433825632 |
| }, |
| { |
| "epoch": 1.2953191489361702, |
| "grad_norm": 0.75322026014328, |
| "loss": 4.9294, |
| "lr": 0.00043146853146853147, |
| "step": 4566, |
| "tokens_trained": 0.434015424 |
| }, |
| { |
| "epoch": 1.295886524822695, |
| "grad_norm": 0.7660694718360901, |
| "loss": 4.953, |
| "lr": 0.0004311888111888112, |
| "step": 4568, |
| "tokens_trained": 0.434208048 |
| }, |
| { |
| "epoch": 1.2964539007092197, |
| "grad_norm": 0.7548807859420776, |
| "loss": 4.9164, |
| "lr": 0.0004309090909090909, |
| "step": 4570, |
| "tokens_trained": 0.434397424 |
| }, |
| { |
| "epoch": 1.2970212765957447, |
| "grad_norm": 0.760160505771637, |
| "loss": 4.9748, |
| "lr": 0.00043062937062937065, |
| "step": 4572, |
| "tokens_trained": 0.434588752 |
| }, |
| { |
| "epoch": 1.2975886524822695, |
| "grad_norm": 0.8081098198890686, |
| "loss": 4.9596, |
| "lr": 0.00043034965034965034, |
| "step": 4574, |
| "tokens_trained": 0.434779696 |
| }, |
| { |
| "epoch": 1.2981560283687943, |
| "grad_norm": 0.7557078003883362, |
| "loss": 4.979, |
| "lr": 0.0004300699300699301, |
| "step": 4576, |
| "tokens_trained": 0.434971072 |
| }, |
| { |
| "epoch": 1.298723404255319, |
| "grad_norm": 0.7966912984848022, |
| "loss": 4.9257, |
| "lr": 0.00042979020979020977, |
| "step": 4578, |
| "tokens_trained": 0.435160496 |
| }, |
| { |
| "epoch": 1.299290780141844, |
| "grad_norm": 0.8104644417762756, |
| "loss": 4.9675, |
| "lr": 0.00042951048951048957, |
| "step": 4580, |
| "tokens_trained": 0.435349392 |
| }, |
| { |
| "epoch": 1.2998581560283688, |
| "grad_norm": 0.711733877658844, |
| "loss": 4.929, |
| "lr": 0.00042923076923076926, |
| "step": 4582, |
| "tokens_trained": 0.435539752 |
| }, |
| { |
| "epoch": 1.3004255319148936, |
| "grad_norm": 0.7435249090194702, |
| "loss": 5.0012, |
| "lr": 0.00042895104895104895, |
| "step": 4584, |
| "tokens_trained": 0.435730112 |
| }, |
| { |
| "epoch": 1.3009929078014184, |
| "grad_norm": 0.8262581825256348, |
| "loss": 4.9065, |
| "lr": 0.0004286713286713287, |
| "step": 4586, |
| "tokens_trained": 0.435918552 |
| }, |
| { |
| "epoch": 1.3015602836879432, |
| "grad_norm": 0.7614077925682068, |
| "loss": 5.022, |
| "lr": 0.0004283916083916084, |
| "step": 4588, |
| "tokens_trained": 0.43611052 |
| }, |
| { |
| "epoch": 1.302127659574468, |
| "grad_norm": 0.7792633175849915, |
| "loss": 4.9763, |
| "lr": 0.00042811188811188813, |
| "step": 4590, |
| "tokens_trained": 0.43629848 |
| }, |
| { |
| "epoch": 1.302695035460993, |
| "grad_norm": 0.748753011226654, |
| "loss": 4.9588, |
| "lr": 0.0004278321678321678, |
| "step": 4592, |
| "tokens_trained": 0.436487384 |
| }, |
| { |
| "epoch": 1.3032624113475177, |
| "grad_norm": 0.6770404577255249, |
| "loss": 5.0546, |
| "lr": 0.00042755244755244756, |
| "step": 4594, |
| "tokens_trained": 0.436677688 |
| }, |
| { |
| "epoch": 1.3038297872340425, |
| "grad_norm": 0.7595148682594299, |
| "loss": 4.9832, |
| "lr": 0.00042727272727272726, |
| "step": 4596, |
| "tokens_trained": 0.436866288 |
| }, |
| { |
| "epoch": 1.3043971631205673, |
| "grad_norm": 0.7239478230476379, |
| "loss": 4.9597, |
| "lr": 0.00042699300699300705, |
| "step": 4598, |
| "tokens_trained": 0.437057192 |
| }, |
| { |
| "epoch": 1.3049645390070923, |
| "grad_norm": 0.7907828092575073, |
| "loss": 5.0041, |
| "lr": 0.00042671328671328674, |
| "step": 4600, |
| "tokens_trained": 0.437247856 |
| }, |
| { |
| "epoch": 1.305531914893617, |
| "grad_norm": 0.6975818872451782, |
| "loss": 4.9256, |
| "lr": 0.00042643356643356643, |
| "step": 4602, |
| "tokens_trained": 0.43743696 |
| }, |
| { |
| "epoch": 1.3060992907801419, |
| "grad_norm": 0.7589024305343628, |
| "loss": 4.9781, |
| "lr": 0.0004261538461538462, |
| "step": 4604, |
| "tokens_trained": 0.437627408 |
| }, |
| { |
| "epoch": 1.3066666666666666, |
| "grad_norm": 0.7332574725151062, |
| "loss": 5.0012, |
| "lr": 0.00042587412587412587, |
| "step": 4606, |
| "tokens_trained": 0.43781732 |
| }, |
| { |
| "epoch": 1.3072340425531914, |
| "grad_norm": 0.8402982950210571, |
| "loss": 4.9202, |
| "lr": 0.0004255944055944056, |
| "step": 4608, |
| "tokens_trained": 0.438006368 |
| }, |
| { |
| "epoch": 1.3078014184397162, |
| "grad_norm": 0.8018138408660889, |
| "loss": 4.9518, |
| "lr": 0.0004253146853146853, |
| "step": 4610, |
| "tokens_trained": 0.438196728 |
| }, |
| { |
| "epoch": 1.3083687943262412, |
| "grad_norm": 0.8211417198181152, |
| "loss": 4.9916, |
| "lr": 0.00042503496503496505, |
| "step": 4612, |
| "tokens_trained": 0.43838568 |
| }, |
| { |
| "epoch": 1.308936170212766, |
| "grad_norm": 0.8054932355880737, |
| "loss": 4.9329, |
| "lr": 0.00042475524475524474, |
| "step": 4614, |
| "tokens_trained": 0.438577096 |
| }, |
| { |
| "epoch": 1.3095035460992908, |
| "grad_norm": 0.795623779296875, |
| "loss": 4.9572, |
| "lr": 0.0004244755244755245, |
| "step": 4616, |
| "tokens_trained": 0.438767032 |
| }, |
| { |
| "epoch": 1.3100709219858155, |
| "grad_norm": 0.7230743169784546, |
| "loss": 5.0013, |
| "lr": 0.00042419580419580423, |
| "step": 4618, |
| "tokens_trained": 0.438955216 |
| }, |
| { |
| "epoch": 1.3106382978723405, |
| "grad_norm": 0.7714941501617432, |
| "loss": 4.9493, |
| "lr": 0.0004239160839160839, |
| "step": 4620, |
| "tokens_trained": 0.439145848 |
| }, |
| { |
| "epoch": 1.3112056737588653, |
| "grad_norm": 0.7291305661201477, |
| "loss": 4.9792, |
| "lr": 0.00042363636363636366, |
| "step": 4622, |
| "tokens_trained": 0.439334768 |
| }, |
| { |
| "epoch": 1.31177304964539, |
| "grad_norm": 0.6893495321273804, |
| "loss": 4.9703, |
| "lr": 0.00042335664335664335, |
| "step": 4624, |
| "tokens_trained": 0.439524928 |
| }, |
| { |
| "epoch": 1.3120567375886525, |
| "eval_loss": 4.985546112060547, |
| "eval_runtime": 20.6802, |
| "step": 4625, |
| "tokens_trained": 0.439619056 |
| }, |
| { |
| "epoch": 1.3123404255319149, |
| "grad_norm": 0.7363048791885376, |
| "loss": 4.9635, |
| "lr": 0.0004230769230769231, |
| "step": 4626, |
| "tokens_trained": 0.439714232 |
| }, |
| { |
| "epoch": 1.3129078014184397, |
| "grad_norm": 0.7479920387268066, |
| "loss": 5.0308, |
| "lr": 0.0004227972027972028, |
| "step": 4628, |
| "tokens_trained": 0.439904056 |
| }, |
| { |
| "epoch": 1.3134751773049644, |
| "grad_norm": 0.7858623266220093, |
| "loss": 4.9504, |
| "lr": 0.00042251748251748253, |
| "step": 4630, |
| "tokens_trained": 0.440093304 |
| }, |
| { |
| "epoch": 1.3140425531914894, |
| "grad_norm": 0.7382465600967407, |
| "loss": 4.9397, |
| "lr": 0.0004222377622377622, |
| "step": 4632, |
| "tokens_trained": 0.440283584 |
| }, |
| { |
| "epoch": 1.3146099290780142, |
| "grad_norm": 0.7232691049575806, |
| "loss": 5.0304, |
| "lr": 0.00042195804195804197, |
| "step": 4634, |
| "tokens_trained": 0.440473064 |
| }, |
| { |
| "epoch": 1.315177304964539, |
| "grad_norm": 0.7827140092849731, |
| "loss": 5.0059, |
| "lr": 0.0004216783216783217, |
| "step": 4636, |
| "tokens_trained": 0.440664664 |
| }, |
| { |
| "epoch": 1.3157446808510638, |
| "grad_norm": 0.7799215316772461, |
| "loss": 4.9534, |
| "lr": 0.0004213986013986014, |
| "step": 4638, |
| "tokens_trained": 0.4408536 |
| }, |
| { |
| "epoch": 1.3163120567375888, |
| "grad_norm": 0.8065125346183777, |
| "loss": 4.99, |
| "lr": 0.00042111888111888115, |
| "step": 4640, |
| "tokens_trained": 0.441042616 |
| }, |
| { |
| "epoch": 1.3168794326241136, |
| "grad_norm": 0.7722545266151428, |
| "loss": 4.9687, |
| "lr": 0.00042083916083916084, |
| "step": 4642, |
| "tokens_trained": 0.441233296 |
| }, |
| { |
| "epoch": 1.3174468085106383, |
| "grad_norm": 0.7521271109580994, |
| "loss": 5.0357, |
| "lr": 0.0004205594405594406, |
| "step": 4644, |
| "tokens_trained": 0.441423976 |
| }, |
| { |
| "epoch": 1.3180141843971631, |
| "grad_norm": 0.7580513954162598, |
| "loss": 4.9353, |
| "lr": 0.00042027972027972027, |
| "step": 4646, |
| "tokens_trained": 0.441612488 |
| }, |
| { |
| "epoch": 1.318581560283688, |
| "grad_norm": 0.7603718638420105, |
| "loss": 5.0189, |
| "lr": 0.00042, |
| "step": 4648, |
| "tokens_trained": 0.441800944 |
| }, |
| { |
| "epoch": 1.3191489361702127, |
| "grad_norm": 0.7828201055526733, |
| "loss": 4.941, |
| "lr": 0.0004197202797202797, |
| "step": 4650, |
| "tokens_trained": 0.441990072 |
| }, |
| { |
| "epoch": 1.3197163120567375, |
| "grad_norm": 0.7227108478546143, |
| "loss": 4.9707, |
| "lr": 0.0004194405594405594, |
| "step": 4652, |
| "tokens_trained": 0.44218048 |
| }, |
| { |
| "epoch": 1.3202836879432625, |
| "grad_norm": 0.8121836185455322, |
| "loss": 4.91, |
| "lr": 0.0004191608391608392, |
| "step": 4654, |
| "tokens_trained": 0.442370728 |
| }, |
| { |
| "epoch": 1.3208510638297872, |
| "grad_norm": 0.6706936955451965, |
| "loss": 4.907, |
| "lr": 0.0004188811188811189, |
| "step": 4656, |
| "tokens_trained": 0.442560352 |
| }, |
| { |
| "epoch": 1.321418439716312, |
| "grad_norm": 0.7793337106704712, |
| "loss": 5.0206, |
| "lr": 0.00041860139860139863, |
| "step": 4658, |
| "tokens_trained": 0.442750192 |
| }, |
| { |
| "epoch": 1.321985815602837, |
| "grad_norm": 0.7981981039047241, |
| "loss": 5.0155, |
| "lr": 0.0004183216783216783, |
| "step": 4660, |
| "tokens_trained": 0.442940848 |
| }, |
| { |
| "epoch": 1.3225531914893618, |
| "grad_norm": 0.7972844243049622, |
| "loss": 4.9879, |
| "lr": 0.00041804195804195807, |
| "step": 4662, |
| "tokens_trained": 0.443128896 |
| }, |
| { |
| "epoch": 1.3231205673758866, |
| "grad_norm": 0.8017681241035461, |
| "loss": 4.9746, |
| "lr": 0.00041776223776223776, |
| "step": 4664, |
| "tokens_trained": 0.443320528 |
| }, |
| { |
| "epoch": 1.3236879432624113, |
| "grad_norm": 0.7505584955215454, |
| "loss": 4.9819, |
| "lr": 0.0004174825174825175, |
| "step": 4666, |
| "tokens_trained": 0.443510888 |
| }, |
| { |
| "epoch": 1.3242553191489361, |
| "grad_norm": 0.772155225276947, |
| "loss": 5.0783, |
| "lr": 0.0004172027972027972, |
| "step": 4668, |
| "tokens_trained": 0.443701856 |
| }, |
| { |
| "epoch": 1.324822695035461, |
| "grad_norm": 0.7051090598106384, |
| "loss": 4.9403, |
| "lr": 0.0004169230769230769, |
| "step": 4670, |
| "tokens_trained": 0.44389428 |
| }, |
| { |
| "epoch": 1.3253900709219857, |
| "grad_norm": 0.7992343902587891, |
| "loss": 4.9498, |
| "lr": 0.0004166433566433567, |
| "step": 4672, |
| "tokens_trained": 0.444087272 |
| }, |
| { |
| "epoch": 1.3259574468085107, |
| "grad_norm": 0.7696804404258728, |
| "loss": 5.0109, |
| "lr": 0.00041636363636363637, |
| "step": 4674, |
| "tokens_trained": 0.444274648 |
| }, |
| { |
| "epoch": 1.3265248226950355, |
| "grad_norm": 0.7982995510101318, |
| "loss": 4.9506, |
| "lr": 0.0004160839160839161, |
| "step": 4676, |
| "tokens_trained": 0.444464632 |
| }, |
| { |
| "epoch": 1.3270921985815602, |
| "grad_norm": 0.8207205533981323, |
| "loss": 4.9527, |
| "lr": 0.0004158041958041958, |
| "step": 4678, |
| "tokens_trained": 0.444655184 |
| }, |
| { |
| "epoch": 1.327659574468085, |
| "grad_norm": 0.7874724268913269, |
| "loss": 4.9924, |
| "lr": 0.00041552447552447555, |
| "step": 4680, |
| "tokens_trained": 0.444845096 |
| }, |
| { |
| "epoch": 1.32822695035461, |
| "grad_norm": 0.7951269149780273, |
| "loss": 5.0061, |
| "lr": 0.00041524475524475524, |
| "step": 4682, |
| "tokens_trained": 0.445034912 |
| }, |
| { |
| "epoch": 1.3287943262411348, |
| "grad_norm": 0.7952069640159607, |
| "loss": 5.029, |
| "lr": 0.000414965034965035, |
| "step": 4684, |
| "tokens_trained": 0.445224664 |
| }, |
| { |
| "epoch": 1.3293617021276596, |
| "grad_norm": 0.7753441333770752, |
| "loss": 5.0353, |
| "lr": 0.0004146853146853147, |
| "step": 4686, |
| "tokens_trained": 0.44541256 |
| }, |
| { |
| "epoch": 1.3299290780141844, |
| "grad_norm": 0.7112265229225159, |
| "loss": 4.9221, |
| "lr": 0.00041440559440559437, |
| "step": 4688, |
| "tokens_trained": 0.445604696 |
| }, |
| { |
| "epoch": 1.3304964539007091, |
| "grad_norm": 0.7774649262428284, |
| "loss": 5.0125, |
| "lr": 0.00041412587412587417, |
| "step": 4690, |
| "tokens_trained": 0.445794752 |
| }, |
| { |
| "epoch": 1.331063829787234, |
| "grad_norm": 0.8355589509010315, |
| "loss": 4.9665, |
| "lr": 0.00041384615384615386, |
| "step": 4692, |
| "tokens_trained": 0.44598544 |
| }, |
| { |
| "epoch": 1.331631205673759, |
| "grad_norm": 0.7191185355186462, |
| "loss": 4.9798, |
| "lr": 0.0004135664335664336, |
| "step": 4694, |
| "tokens_trained": 0.44617436 |
| }, |
| { |
| "epoch": 1.3321985815602837, |
| "grad_norm": 0.7386505007743835, |
| "loss": 4.9756, |
| "lr": 0.0004132867132867133, |
| "step": 4696, |
| "tokens_trained": 0.446363384 |
| }, |
| { |
| "epoch": 1.3327659574468085, |
| "grad_norm": 0.7661808133125305, |
| "loss": 4.9374, |
| "lr": 0.00041300699300699304, |
| "step": 4698, |
| "tokens_trained": 0.44655264 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.7530731558799744, |
| "loss": 4.9681, |
| "lr": 0.0004127272727272727, |
| "step": 4700, |
| "tokens_trained": 0.446743016 |
| }, |
| { |
| "epoch": 1.3339007092198583, |
| "grad_norm": 0.7512504458427429, |
| "loss": 4.9827, |
| "lr": 0.00041244755244755247, |
| "step": 4702, |
| "tokens_trained": 0.446932608 |
| }, |
| { |
| "epoch": 1.334468085106383, |
| "grad_norm": 0.7335140109062195, |
| "loss": 4.9586, |
| "lr": 0.00041216783216783216, |
| "step": 4704, |
| "tokens_trained": 0.447122208 |
| }, |
| { |
| "epoch": 1.3350354609929078, |
| "grad_norm": 0.7327559590339661, |
| "loss": 4.9666, |
| "lr": 0.00041188811188811185, |
| "step": 4706, |
| "tokens_trained": 0.447312824 |
| }, |
| { |
| "epoch": 1.3356028368794326, |
| "grad_norm": 0.7450160980224609, |
| "loss": 4.9197, |
| "lr": 0.00041160839160839165, |
| "step": 4708, |
| "tokens_trained": 0.447500672 |
| }, |
| { |
| "epoch": 1.3361702127659574, |
| "grad_norm": 0.6740980744361877, |
| "loss": 5.0133, |
| "lr": 0.00041132867132867134, |
| "step": 4710, |
| "tokens_trained": 0.447689552 |
| }, |
| { |
| "epoch": 1.3367375886524822, |
| "grad_norm": 0.7320116758346558, |
| "loss": 4.9751, |
| "lr": 0.0004110489510489511, |
| "step": 4712, |
| "tokens_trained": 0.447880128 |
| }, |
| { |
| "epoch": 1.3373049645390072, |
| "grad_norm": 0.7833261489868164, |
| "loss": 4.9285, |
| "lr": 0.0004107692307692308, |
| "step": 4714, |
| "tokens_trained": 0.448069496 |
| }, |
| { |
| "epoch": 1.337872340425532, |
| "grad_norm": 0.7570978999137878, |
| "loss": 5.0047, |
| "lr": 0.0004104895104895105, |
| "step": 4716, |
| "tokens_trained": 0.448258184 |
| }, |
| { |
| "epoch": 1.3384397163120567, |
| "grad_norm": 0.7320883274078369, |
| "loss": 4.9751, |
| "lr": 0.0004102097902097902, |
| "step": 4718, |
| "tokens_trained": 0.448449488 |
| }, |
| { |
| "epoch": 1.3390070921985815, |
| "grad_norm": 0.7385469675064087, |
| "loss": 4.9712, |
| "lr": 0.0004099300699300699, |
| "step": 4720, |
| "tokens_trained": 0.448638776 |
| }, |
| { |
| "epoch": 1.3395744680851065, |
| "grad_norm": 0.7620404958724976, |
| "loss": 4.8906, |
| "lr": 0.00040965034965034964, |
| "step": 4722, |
| "tokens_trained": 0.448830528 |
| }, |
| { |
| "epoch": 1.3401418439716313, |
| "grad_norm": 0.7389976382255554, |
| "loss": 4.9994, |
| "lr": 0.00040937062937062934, |
| "step": 4724, |
| "tokens_trained": 0.449018952 |
| }, |
| { |
| "epoch": 1.340709219858156, |
| "grad_norm": 0.7150964140892029, |
| "loss": 4.9244, |
| "lr": 0.00040909090909090913, |
| "step": 4726, |
| "tokens_trained": 0.44920784 |
| }, |
| { |
| "epoch": 1.3412765957446808, |
| "grad_norm": 0.7163580060005188, |
| "loss": 5.0069, |
| "lr": 0.0004088111888111888, |
| "step": 4728, |
| "tokens_trained": 0.449396696 |
| }, |
| { |
| "epoch": 1.3418439716312056, |
| "grad_norm": 0.7657668590545654, |
| "loss": 4.9322, |
| "lr": 0.00040853146853146857, |
| "step": 4730, |
| "tokens_trained": 0.449585568 |
| }, |
| { |
| "epoch": 1.3424113475177304, |
| "grad_norm": 0.7743586301803589, |
| "loss": 4.9691, |
| "lr": 0.00040825174825174826, |
| "step": 4732, |
| "tokens_trained": 0.44977396 |
| }, |
| { |
| "epoch": 1.3429787234042554, |
| "grad_norm": 0.8050113320350647, |
| "loss": 4.9514, |
| "lr": 0.000407972027972028, |
| "step": 4734, |
| "tokens_trained": 0.449964656 |
| }, |
| { |
| "epoch": 1.3435460992907802, |
| "grad_norm": 0.7641178965568542, |
| "loss": 4.8956, |
| "lr": 0.0004076923076923077, |
| "step": 4736, |
| "tokens_trained": 0.450154896 |
| }, |
| { |
| "epoch": 1.344113475177305, |
| "grad_norm": 0.8350791931152344, |
| "loss": 4.9625, |
| "lr": 0.0004074125874125874, |
| "step": 4738, |
| "tokens_trained": 0.450344528 |
| }, |
| { |
| "epoch": 1.3446808510638297, |
| "grad_norm": 0.7148427367210388, |
| "loss": 4.9657, |
| "lr": 0.00040713286713286713, |
| "step": 4740, |
| "tokens_trained": 0.450535416 |
| }, |
| { |
| "epoch": 1.3452482269503547, |
| "grad_norm": 0.7961207032203674, |
| "loss": 4.897, |
| "lr": 0.0004068531468531468, |
| "step": 4742, |
| "tokens_trained": 0.450724968 |
| }, |
| { |
| "epoch": 1.3458156028368795, |
| "grad_norm": 0.8115900754928589, |
| "loss": 4.9171, |
| "lr": 0.0004065734265734266, |
| "step": 4744, |
| "tokens_trained": 0.450916104 |
| }, |
| { |
| "epoch": 1.3463829787234043, |
| "grad_norm": 0.7608439326286316, |
| "loss": 4.9817, |
| "lr": 0.0004062937062937063, |
| "step": 4746, |
| "tokens_trained": 0.451107272 |
| }, |
| { |
| "epoch": 1.346950354609929, |
| "grad_norm": 0.7412408590316772, |
| "loss": 5.0053, |
| "lr": 0.00040601398601398605, |
| "step": 4748, |
| "tokens_trained": 0.451297408 |
| }, |
| { |
| "epoch": 1.3475177304964538, |
| "grad_norm": 0.7785027623176575, |
| "loss": 5.0091, |
| "lr": 0.00040573426573426574, |
| "step": 4750, |
| "tokens_trained": 0.451488272 |
| }, |
| { |
| "epoch": 1.3475177304964538, |
| "eval_loss": 4.9776997566223145, |
| "eval_runtime": 20.4142, |
| "step": 4750, |
| "tokens_trained": 0.451488272 |
| }, |
| { |
| "epoch": 1.3480851063829786, |
| "grad_norm": 0.7034481763839722, |
| "loss": 5.0312, |
| "lr": 0.0004054545454545455, |
| "step": 4752, |
| "tokens_trained": 0.451678048 |
| }, |
| { |
| "epoch": 1.3486524822695036, |
| "grad_norm": 0.8021607398986816, |
| "loss": 4.9923, |
| "lr": 0.0004051748251748252, |
| "step": 4754, |
| "tokens_trained": 0.451867816 |
| }, |
| { |
| "epoch": 1.3492198581560284, |
| "grad_norm": 0.7409330606460571, |
| "loss": 4.9429, |
| "lr": 0.00040489510489510487, |
| "step": 4756, |
| "tokens_trained": 0.45205644 |
| }, |
| { |
| "epoch": 1.3497872340425532, |
| "grad_norm": 0.6617271900177002, |
| "loss": 5.0044, |
| "lr": 0.0004046153846153846, |
| "step": 4758, |
| "tokens_trained": 0.452247464 |
| }, |
| { |
| "epoch": 1.350354609929078, |
| "grad_norm": 0.7742848992347717, |
| "loss": 4.9794, |
| "lr": 0.0004043356643356643, |
| "step": 4760, |
| "tokens_trained": 0.452437608 |
| }, |
| { |
| "epoch": 1.350921985815603, |
| "grad_norm": 0.7627806663513184, |
| "loss": 4.9562, |
| "lr": 0.0004040559440559441, |
| "step": 4762, |
| "tokens_trained": 0.452627568 |
| }, |
| { |
| "epoch": 1.3514893617021277, |
| "grad_norm": 0.8105679750442505, |
| "loss": 5.0514, |
| "lr": 0.0004037762237762238, |
| "step": 4764, |
| "tokens_trained": 0.452817176 |
| }, |
| { |
| "epoch": 1.3520567375886525, |
| "grad_norm": 0.7783811688423157, |
| "loss": 4.9414, |
| "lr": 0.00040349650349650354, |
| "step": 4766, |
| "tokens_trained": 0.4530078 |
| }, |
| { |
| "epoch": 1.3526241134751773, |
| "grad_norm": 0.7357584238052368, |
| "loss": 4.9184, |
| "lr": 0.00040321678321678323, |
| "step": 4768, |
| "tokens_trained": 0.453196856 |
| }, |
| { |
| "epoch": 1.353191489361702, |
| "grad_norm": 0.79344242811203, |
| "loss": 4.8904, |
| "lr": 0.00040293706293706297, |
| "step": 4770, |
| "tokens_trained": 0.4533878 |
| }, |
| { |
| "epoch": 1.3537588652482269, |
| "grad_norm": 0.7372890710830688, |
| "loss": 4.9378, |
| "lr": 0.00040265734265734266, |
| "step": 4772, |
| "tokens_trained": 0.4535766 |
| }, |
| { |
| "epoch": 1.3543262411347516, |
| "grad_norm": 0.7920981049537659, |
| "loss": 4.9701, |
| "lr": 0.00040237762237762235, |
| "step": 4774, |
| "tokens_trained": 0.45376792 |
| }, |
| { |
| "epoch": 1.3548936170212766, |
| "grad_norm": 0.7568764686584473, |
| "loss": 5.0008, |
| "lr": 0.0004020979020979021, |
| "step": 4776, |
| "tokens_trained": 0.453958072 |
| }, |
| { |
| "epoch": 1.3554609929078014, |
| "grad_norm": 0.7389140129089355, |
| "loss": 4.9886, |
| "lr": 0.0004018181818181818, |
| "step": 4778, |
| "tokens_trained": 0.454147016 |
| }, |
| { |
| "epoch": 1.3560283687943262, |
| "grad_norm": 0.7528326511383057, |
| "loss": 4.9669, |
| "lr": 0.00040153846153846153, |
| "step": 4780, |
| "tokens_trained": 0.454338392 |
| }, |
| { |
| "epoch": 1.3565957446808512, |
| "grad_norm": 0.7838888764381409, |
| "loss": 5.0034, |
| "lr": 0.0004012587412587413, |
| "step": 4782, |
| "tokens_trained": 0.45452652 |
| }, |
| { |
| "epoch": 1.357163120567376, |
| "grad_norm": 0.8001760244369507, |
| "loss": 4.969, |
| "lr": 0.000400979020979021, |
| "step": 4784, |
| "tokens_trained": 0.454714896 |
| }, |
| { |
| "epoch": 1.3577304964539008, |
| "grad_norm": 0.7670722007751465, |
| "loss": 5.0728, |
| "lr": 0.0004006993006993007, |
| "step": 4786, |
| "tokens_trained": 0.45490428 |
| }, |
| { |
| "epoch": 1.3582978723404255, |
| "grad_norm": 0.7396910786628723, |
| "loss": 4.9123, |
| "lr": 0.00040041958041958046, |
| "step": 4788, |
| "tokens_trained": 0.45509412 |
| }, |
| { |
| "epoch": 1.3588652482269503, |
| "grad_norm": 0.8072660565376282, |
| "loss": 4.9988, |
| "lr": 0.00040013986013986015, |
| "step": 4790, |
| "tokens_trained": 0.455283592 |
| }, |
| { |
| "epoch": 1.359432624113475, |
| "grad_norm": 0.7714769840240479, |
| "loss": 4.9984, |
| "lr": 0.00039986013986013984, |
| "step": 4792, |
| "tokens_trained": 0.455476456 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.749272882938385, |
| "loss": 5.02, |
| "lr": 0.0003995804195804196, |
| "step": 4794, |
| "tokens_trained": 0.455666568 |
| }, |
| { |
| "epoch": 1.3605673758865249, |
| "grad_norm": 0.9460277557373047, |
| "loss": 5.0115, |
| "lr": 0.00039930069930069927, |
| "step": 4796, |
| "tokens_trained": 0.455855472 |
| }, |
| { |
| "epoch": 1.3611347517730497, |
| "grad_norm": 0.8013962507247925, |
| "loss": 4.8935, |
| "lr": 0.000399020979020979, |
| "step": 4798, |
| "tokens_trained": 0.456043256 |
| }, |
| { |
| "epoch": 1.3617021276595744, |
| "grad_norm": 0.8356024026870728, |
| "loss": 5.004, |
| "lr": 0.00039874125874125876, |
| "step": 4800, |
| "tokens_trained": 0.456232728 |
| }, |
| { |
| "epoch": 1.3622695035460992, |
| "grad_norm": 0.7791249752044678, |
| "loss": 4.9025, |
| "lr": 0.0003984615384615385, |
| "step": 4802, |
| "tokens_trained": 0.456422672 |
| }, |
| { |
| "epoch": 1.3628368794326242, |
| "grad_norm": 0.7426172494888306, |
| "loss": 4.9706, |
| "lr": 0.0003981818181818182, |
| "step": 4804, |
| "tokens_trained": 0.456612888 |
| }, |
| { |
| "epoch": 1.363404255319149, |
| "grad_norm": 0.8252729773521423, |
| "loss": 4.9679, |
| "lr": 0.00039790209790209794, |
| "step": 4806, |
| "tokens_trained": 0.456802432 |
| }, |
| { |
| "epoch": 1.3639716312056738, |
| "grad_norm": 0.7870017290115356, |
| "loss": 4.9609, |
| "lr": 0.00039762237762237763, |
| "step": 4808, |
| "tokens_trained": 0.456990752 |
| }, |
| { |
| "epoch": 1.3645390070921986, |
| "grad_norm": 0.815733790397644, |
| "loss": 4.9634, |
| "lr": 0.0003973426573426573, |
| "step": 4810, |
| "tokens_trained": 0.457181528 |
| }, |
| { |
| "epoch": 1.3651063829787233, |
| "grad_norm": 0.6886212825775146, |
| "loss": 4.954, |
| "lr": 0.00039706293706293707, |
| "step": 4812, |
| "tokens_trained": 0.457370064 |
| }, |
| { |
| "epoch": 1.365673758865248, |
| "grad_norm": 0.7102149724960327, |
| "loss": 4.8986, |
| "lr": 0.00039678321678321676, |
| "step": 4814, |
| "tokens_trained": 0.457559112 |
| }, |
| { |
| "epoch": 1.3662411347517731, |
| "grad_norm": 0.7671045064926147, |
| "loss": 4.9504, |
| "lr": 0.0003965034965034965, |
| "step": 4816, |
| "tokens_trained": 0.457749888 |
| }, |
| { |
| "epoch": 1.366808510638298, |
| "grad_norm": 0.7828851938247681, |
| "loss": 4.9522, |
| "lr": 0.00039622377622377625, |
| "step": 4818, |
| "tokens_trained": 0.457939616 |
| }, |
| { |
| "epoch": 1.3673758865248227, |
| "grad_norm": 0.7570793628692627, |
| "loss": 4.9273, |
| "lr": 0.000395944055944056, |
| "step": 4820, |
| "tokens_trained": 0.458131776 |
| }, |
| { |
| "epoch": 1.3679432624113474, |
| "grad_norm": 0.7246227860450745, |
| "loss": 5.0342, |
| "lr": 0.0003956643356643357, |
| "step": 4822, |
| "tokens_trained": 0.458323576 |
| }, |
| { |
| "epoch": 1.3685106382978725, |
| "grad_norm": 0.7387742400169373, |
| "loss": 4.976, |
| "lr": 0.0003953846153846154, |
| "step": 4824, |
| "tokens_trained": 0.4585148 |
| }, |
| { |
| "epoch": 1.3690780141843972, |
| "grad_norm": 0.7457069158554077, |
| "loss": 5.0033, |
| "lr": 0.0003951048951048951, |
| "step": 4826, |
| "tokens_trained": 0.458706352 |
| }, |
| { |
| "epoch": 1.369645390070922, |
| "grad_norm": 0.721156895160675, |
| "loss": 4.9869, |
| "lr": 0.0003948251748251748, |
| "step": 4828, |
| "tokens_trained": 0.45889584 |
| }, |
| { |
| "epoch": 1.3702127659574468, |
| "grad_norm": 0.7440138459205627, |
| "loss": 4.9304, |
| "lr": 0.00039454545454545455, |
| "step": 4830, |
| "tokens_trained": 0.459084496 |
| }, |
| { |
| "epoch": 1.3707801418439716, |
| "grad_norm": 0.7051060199737549, |
| "loss": 4.9388, |
| "lr": 0.00039426573426573424, |
| "step": 4832, |
| "tokens_trained": 0.459273328 |
| }, |
| { |
| "epoch": 1.3713475177304963, |
| "grad_norm": 0.7923696637153625, |
| "loss": 4.9632, |
| "lr": 0.000393986013986014, |
| "step": 4834, |
| "tokens_trained": 0.459461936 |
| }, |
| { |
| "epoch": 1.3719148936170213, |
| "grad_norm": 0.7542476654052734, |
| "loss": 4.9849, |
| "lr": 0.00039370629370629373, |
| "step": 4836, |
| "tokens_trained": 0.459654296 |
| }, |
| { |
| "epoch": 1.3724822695035461, |
| "grad_norm": 0.6460102200508118, |
| "loss": 4.9345, |
| "lr": 0.0003934265734265735, |
| "step": 4838, |
| "tokens_trained": 0.459840832 |
| }, |
| { |
| "epoch": 1.373049645390071, |
| "grad_norm": 0.6898486614227295, |
| "loss": 4.9322, |
| "lr": 0.00039314685314685316, |
| "step": 4840, |
| "tokens_trained": 0.46003016 |
| }, |
| { |
| "epoch": 1.3736170212765957, |
| "grad_norm": 0.7820252776145935, |
| "loss": 4.9832, |
| "lr": 0.00039286713286713286, |
| "step": 4842, |
| "tokens_trained": 0.460220928 |
| }, |
| { |
| "epoch": 1.3741843971631207, |
| "grad_norm": 0.681734561920166, |
| "loss": 4.8975, |
| "lr": 0.0003925874125874126, |
| "step": 4844, |
| "tokens_trained": 0.460410064 |
| }, |
| { |
| "epoch": 1.3747517730496455, |
| "grad_norm": 0.7517859935760498, |
| "loss": 4.941, |
| "lr": 0.0003923076923076923, |
| "step": 4846, |
| "tokens_trained": 0.46059848 |
| }, |
| { |
| "epoch": 1.3753191489361702, |
| "grad_norm": 0.7375074625015259, |
| "loss": 4.9473, |
| "lr": 0.00039202797202797203, |
| "step": 4848, |
| "tokens_trained": 0.46078916 |
| }, |
| { |
| "epoch": 1.375886524822695, |
| "grad_norm": 0.728672444820404, |
| "loss": 4.962, |
| "lr": 0.0003917482517482517, |
| "step": 4850, |
| "tokens_trained": 0.460977672 |
| }, |
| { |
| "epoch": 1.3764539007092198, |
| "grad_norm": 0.7166595458984375, |
| "loss": 4.9366, |
| "lr": 0.00039146853146853147, |
| "step": 4852, |
| "tokens_trained": 0.461166912 |
| }, |
| { |
| "epoch": 1.3770212765957446, |
| "grad_norm": 0.7807113528251648, |
| "loss": 4.9279, |
| "lr": 0.0003911888111888112, |
| "step": 4854, |
| "tokens_trained": 0.46135684 |
| }, |
| { |
| "epoch": 1.3775886524822696, |
| "grad_norm": 0.7296082973480225, |
| "loss": 4.9246, |
| "lr": 0.00039090909090909096, |
| "step": 4856, |
| "tokens_trained": 0.461546944 |
| }, |
| { |
| "epoch": 1.3781560283687944, |
| "grad_norm": 0.7450242638587952, |
| "loss": 4.9474, |
| "lr": 0.00039062937062937065, |
| "step": 4858, |
| "tokens_trained": 0.461736576 |
| }, |
| { |
| "epoch": 1.3787234042553191, |
| "grad_norm": 0.6994244456291199, |
| "loss": 4.9334, |
| "lr": 0.00039034965034965034, |
| "step": 4860, |
| "tokens_trained": 0.461925424 |
| }, |
| { |
| "epoch": 1.379290780141844, |
| "grad_norm": 0.7981341481208801, |
| "loss": 4.9785, |
| "lr": 0.0003900699300699301, |
| "step": 4862, |
| "tokens_trained": 0.462115912 |
| }, |
| { |
| "epoch": 1.379858156028369, |
| "grad_norm": 0.6945004463195801, |
| "loss": 4.9581, |
| "lr": 0.0003897902097902098, |
| "step": 4864, |
| "tokens_trained": 0.462306424 |
| }, |
| { |
| "epoch": 1.3804255319148937, |
| "grad_norm": 0.7116626501083374, |
| "loss": 4.947, |
| "lr": 0.0003895104895104895, |
| "step": 4866, |
| "tokens_trained": 0.462497352 |
| }, |
| { |
| "epoch": 1.3809929078014185, |
| "grad_norm": 0.7096779346466064, |
| "loss": 4.956, |
| "lr": 0.0003892307692307692, |
| "step": 4868, |
| "tokens_trained": 0.462686872 |
| }, |
| { |
| "epoch": 1.3815602836879433, |
| "grad_norm": 0.6993130445480347, |
| "loss": 4.9038, |
| "lr": 0.00038895104895104895, |
| "step": 4870, |
| "tokens_trained": 0.462877712 |
| }, |
| { |
| "epoch": 1.382127659574468, |
| "grad_norm": 0.7118195295333862, |
| "loss": 4.9709, |
| "lr": 0.0003886713286713287, |
| "step": 4872, |
| "tokens_trained": 0.463069304 |
| }, |
| { |
| "epoch": 1.3826950354609928, |
| "grad_norm": 0.760608971118927, |
| "loss": 4.9574, |
| "lr": 0.00038839160839160844, |
| "step": 4874, |
| "tokens_trained": 0.463260616 |
| }, |
| { |
| "epoch": 1.3829787234042552, |
| "eval_loss": 4.976211071014404, |
| "eval_runtime": 20.5866, |
| "step": 4875, |
| "tokens_trained": 0.463356504 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 7650, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 125, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|