| { |
| "best_global_step": 2500, |
| "best_metric": 5.141824245452881, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/Gencode-BPE/checkpoint-2500", |
| "epoch": 0.7092198581560284, |
| "eval_steps": 125, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005673758865248227, |
| "grad_norm": 1275.0146484375, |
| "loss": 281.4781, |
| "lr": 2e-06, |
| "step": 2, |
| "tokens_trained": 0.000192256 |
| }, |
| { |
| "epoch": 0.0011347517730496454, |
| "grad_norm": 1437.579833984375, |
| "loss": 267.2211, |
| "lr": 6e-06, |
| "step": 4, |
| "tokens_trained": 0.000382024 |
| }, |
| { |
| "epoch": 0.001702127659574468, |
| "grad_norm": 1719.271484375, |
| "loss": 219.3822, |
| "lr": 1e-05, |
| "step": 6, |
| "tokens_trained": 0.00057072 |
| }, |
| { |
| "epoch": 0.0022695035460992908, |
| "grad_norm": 1444.94970703125, |
| "loss": 133.8172, |
| "lr": 1.4e-05, |
| "step": 8, |
| "tokens_trained": 0.000761336 |
| }, |
| { |
| "epoch": 0.0028368794326241137, |
| "grad_norm": 238.9689178466797, |
| "loss": 90.8177, |
| "lr": 1.8e-05, |
| "step": 10, |
| "tokens_trained": 0.000953248 |
| }, |
| { |
| "epoch": 0.003404255319148936, |
| "grad_norm": 158.53497314453125, |
| "loss": 84.6922, |
| "lr": 2.2e-05, |
| "step": 12, |
| "tokens_trained": 0.00114424 |
| }, |
| { |
| "epoch": 0.003971631205673759, |
| "grad_norm": 146.10595703125, |
| "loss": 76.7055, |
| "lr": 2.6e-05, |
| "step": 14, |
| "tokens_trained": 0.001334104 |
| }, |
| { |
| "epoch": 0.0045390070921985815, |
| "grad_norm": 140.69964599609375, |
| "loss": 67.9952, |
| "lr": 3e-05, |
| "step": 16, |
| "tokens_trained": 0.00152392 |
| }, |
| { |
| "epoch": 0.005106382978723404, |
| "grad_norm": 108.80303192138672, |
| "loss": 57.8088, |
| "lr": 3.4000000000000007e-05, |
| "step": 18, |
| "tokens_trained": 0.001713872 |
| }, |
| { |
| "epoch": 0.005673758865248227, |
| "grad_norm": 106.82334899902344, |
| "loss": 48.6585, |
| "lr": 3.8e-05, |
| "step": 20, |
| "tokens_trained": 0.001903976 |
| }, |
| { |
| "epoch": 0.00624113475177305, |
| "grad_norm": 93.58769989013672, |
| "loss": 41.7984, |
| "lr": 4.2000000000000004e-05, |
| "step": 22, |
| "tokens_trained": 0.002094288 |
| }, |
| { |
| "epoch": 0.006808510638297872, |
| "grad_norm": 87.5854721069336, |
| "loss": 37.6201, |
| "lr": 4.6e-05, |
| "step": 24, |
| "tokens_trained": 0.002282496 |
| }, |
| { |
| "epoch": 0.007375886524822695, |
| "grad_norm": 84.12794494628906, |
| "loss": 35.0091, |
| "lr": 5e-05, |
| "step": 26, |
| "tokens_trained": 0.00247068 |
| }, |
| { |
| "epoch": 0.007943262411347518, |
| "grad_norm": 79.77535247802734, |
| "loss": 33.2253, |
| "lr": 5.4e-05, |
| "step": 28, |
| "tokens_trained": 0.002662888 |
| }, |
| { |
| "epoch": 0.00851063829787234, |
| "grad_norm": 66.42157745361328, |
| "loss": 32.0682, |
| "lr": 5.800000000000001e-05, |
| "step": 30, |
| "tokens_trained": 0.002851968 |
| }, |
| { |
| "epoch": 0.009078014184397163, |
| "grad_norm": 87.52485656738281, |
| "loss": 30.893, |
| "lr": 6.2e-05, |
| "step": 32, |
| "tokens_trained": 0.003041384 |
| }, |
| { |
| "epoch": 0.009645390070921986, |
| "grad_norm": 58.33614730834961, |
| "loss": 30.0513, |
| "lr": 6.6e-05, |
| "step": 34, |
| "tokens_trained": 0.003232872 |
| }, |
| { |
| "epoch": 0.010212765957446808, |
| "grad_norm": 54.629329681396484, |
| "loss": 29.0115, |
| "lr": 7.000000000000001e-05, |
| "step": 36, |
| "tokens_trained": 0.003423824 |
| }, |
| { |
| "epoch": 0.01078014184397163, |
| "grad_norm": 52.79097366333008, |
| "loss": 28.2084, |
| "lr": 7.4e-05, |
| "step": 38, |
| "tokens_trained": 0.003613232 |
| }, |
| { |
| "epoch": 0.011347517730496455, |
| "grad_norm": 54.481224060058594, |
| "loss": 27.4345, |
| "lr": 7.8e-05, |
| "step": 40, |
| "tokens_trained": 0.003800952 |
| }, |
| { |
| "epoch": 0.011914893617021277, |
| "grad_norm": 58.7069091796875, |
| "loss": 26.5936, |
| "lr": 8.2e-05, |
| "step": 42, |
| "tokens_trained": 0.003991512 |
| }, |
| { |
| "epoch": 0.0124822695035461, |
| "grad_norm": 49.30760955810547, |
| "loss": 26.0608, |
| "lr": 8.599999999999999e-05, |
| "step": 44, |
| "tokens_trained": 0.004180648 |
| }, |
| { |
| "epoch": 0.013049645390070922, |
| "grad_norm": 61.902587890625, |
| "loss": 25.5363, |
| "lr": 8.999999999999999e-05, |
| "step": 46, |
| "tokens_trained": 0.00437148 |
| }, |
| { |
| "epoch": 0.013617021276595745, |
| "grad_norm": 46.76111602783203, |
| "loss": 24.9599, |
| "lr": 9.400000000000001e-05, |
| "step": 48, |
| "tokens_trained": 0.004559344 |
| }, |
| { |
| "epoch": 0.014184397163120567, |
| "grad_norm": 57.06416702270508, |
| "loss": 24.4087, |
| "lr": 9.800000000000001e-05, |
| "step": 50, |
| "tokens_trained": 0.004749256 |
| }, |
| { |
| "epoch": 0.01475177304964539, |
| "grad_norm": 44.798736572265625, |
| "loss": 24.1444, |
| "lr": 0.000102, |
| "step": 52, |
| "tokens_trained": 0.004940192 |
| }, |
| { |
| "epoch": 0.015319148936170212, |
| "grad_norm": 40.29296875, |
| "loss": 23.6011, |
| "lr": 0.000106, |
| "step": 54, |
| "tokens_trained": 0.005130304 |
| }, |
| { |
| "epoch": 0.015886524822695036, |
| "grad_norm": 38.75099563598633, |
| "loss": 23.1781, |
| "lr": 0.00011, |
| "step": 56, |
| "tokens_trained": 0.005322864 |
| }, |
| { |
| "epoch": 0.016453900709219857, |
| "grad_norm": 37.470706939697266, |
| "loss": 22.9136, |
| "lr": 0.000114, |
| "step": 58, |
| "tokens_trained": 0.00551392 |
| }, |
| { |
| "epoch": 0.01702127659574468, |
| "grad_norm": 35.1894645690918, |
| "loss": 22.6336, |
| "lr": 0.000118, |
| "step": 60, |
| "tokens_trained": 0.005703096 |
| }, |
| { |
| "epoch": 0.017588652482269502, |
| "grad_norm": 35.136573791503906, |
| "loss": 22.2998, |
| "lr": 0.000122, |
| "step": 62, |
| "tokens_trained": 0.005892448 |
| }, |
| { |
| "epoch": 0.018156028368794326, |
| "grad_norm": 38.05111312866211, |
| "loss": 21.9401, |
| "lr": 0.000126, |
| "step": 64, |
| "tokens_trained": 0.006081656 |
| }, |
| { |
| "epoch": 0.01872340425531915, |
| "grad_norm": 35.63850021362305, |
| "loss": 21.7206, |
| "lr": 0.00013000000000000002, |
| "step": 66, |
| "tokens_trained": 0.006273032 |
| }, |
| { |
| "epoch": 0.01929078014184397, |
| "grad_norm": 34.327667236328125, |
| "loss": 21.4051, |
| "lr": 0.000134, |
| "step": 68, |
| "tokens_trained": 0.00646304 |
| }, |
| { |
| "epoch": 0.019858156028368795, |
| "grad_norm": 31.457059860229492, |
| "loss": 21.0774, |
| "lr": 0.00013800000000000002, |
| "step": 70, |
| "tokens_trained": 0.006652832 |
| }, |
| { |
| "epoch": 0.020425531914893616, |
| "grad_norm": 34.91672897338867, |
| "loss": 20.8718, |
| "lr": 0.00014199999999999998, |
| "step": 72, |
| "tokens_trained": 0.006843512 |
| }, |
| { |
| "epoch": 0.02099290780141844, |
| "grad_norm": 27.959579467773438, |
| "loss": 20.6932, |
| "lr": 0.000146, |
| "step": 74, |
| "tokens_trained": 0.007033584 |
| }, |
| { |
| "epoch": 0.02156028368794326, |
| "grad_norm": 26.569866180419922, |
| "loss": 20.4072, |
| "lr": 0.00015, |
| "step": 76, |
| "tokens_trained": 0.007224032 |
| }, |
| { |
| "epoch": 0.022127659574468085, |
| "grad_norm": 28.009904861450195, |
| "loss": 20.2229, |
| "lr": 0.000154, |
| "step": 78, |
| "tokens_trained": 0.00741368 |
| }, |
| { |
| "epoch": 0.02269503546099291, |
| "grad_norm": 28.892959594726562, |
| "loss": 20.0528, |
| "lr": 0.000158, |
| "step": 80, |
| "tokens_trained": 0.00760416 |
| }, |
| { |
| "epoch": 0.02326241134751773, |
| "grad_norm": 31.58131980895996, |
| "loss": 19.8016, |
| "lr": 0.000162, |
| "step": 82, |
| "tokens_trained": 0.007793952 |
| }, |
| { |
| "epoch": 0.023829787234042554, |
| "grad_norm": 31.01254653930664, |
| "loss": 19.634, |
| "lr": 0.00016600000000000002, |
| "step": 84, |
| "tokens_trained": 0.007980792 |
| }, |
| { |
| "epoch": 0.024397163120567375, |
| "grad_norm": 28.732515335083008, |
| "loss": 19.3777, |
| "lr": 0.00017, |
| "step": 86, |
| "tokens_trained": 0.008171968 |
| }, |
| { |
| "epoch": 0.0249645390070922, |
| "grad_norm": 24.31264877319336, |
| "loss": 19.1346, |
| "lr": 0.000174, |
| "step": 88, |
| "tokens_trained": 0.008361632 |
| }, |
| { |
| "epoch": 0.02553191489361702, |
| "grad_norm": 26.557010650634766, |
| "loss": 19.0014, |
| "lr": 0.000178, |
| "step": 90, |
| "tokens_trained": 0.008552328 |
| }, |
| { |
| "epoch": 0.026099290780141844, |
| "grad_norm": 21.156103134155273, |
| "loss": 18.7032, |
| "lr": 0.000182, |
| "step": 92, |
| "tokens_trained": 0.008743136 |
| }, |
| { |
| "epoch": 0.02666666666666667, |
| "grad_norm": 25.7484188079834, |
| "loss": 18.4836, |
| "lr": 0.000186, |
| "step": 94, |
| "tokens_trained": 0.008932056 |
| }, |
| { |
| "epoch": 0.02723404255319149, |
| "grad_norm": 22.27949333190918, |
| "loss": 18.2233, |
| "lr": 0.00019, |
| "step": 96, |
| "tokens_trained": 0.009121608 |
| }, |
| { |
| "epoch": 0.027801418439716313, |
| "grad_norm": 24.9247989654541, |
| "loss": 17.9867, |
| "lr": 0.000194, |
| "step": 98, |
| "tokens_trained": 0.009311008 |
| }, |
| { |
| "epoch": 0.028368794326241134, |
| "grad_norm": 24.302066802978516, |
| "loss": 17.8016, |
| "lr": 0.00019800000000000002, |
| "step": 100, |
| "tokens_trained": 0.009501456 |
| }, |
| { |
| "epoch": 0.02893617021276596, |
| "grad_norm": 23.458459854125977, |
| "loss": 17.6295, |
| "lr": 0.000202, |
| "step": 102, |
| "tokens_trained": 0.009693952 |
| }, |
| { |
| "epoch": 0.02950354609929078, |
| "grad_norm": 24.092350006103516, |
| "loss": 17.4593, |
| "lr": 0.000206, |
| "step": 104, |
| "tokens_trained": 0.009883328 |
| }, |
| { |
| "epoch": 0.030070921985815603, |
| "grad_norm": 22.54726219177246, |
| "loss": 17.2141, |
| "lr": 0.00021, |
| "step": 106, |
| "tokens_trained": 0.01007316 |
| }, |
| { |
| "epoch": 0.030638297872340424, |
| "grad_norm": 21.334760665893555, |
| "loss": 17.044, |
| "lr": 0.000214, |
| "step": 108, |
| "tokens_trained": 0.010266504 |
| }, |
| { |
| "epoch": 0.031205673758865248, |
| "grad_norm": 20.584287643432617, |
| "loss": 16.8919, |
| "lr": 0.000218, |
| "step": 110, |
| "tokens_trained": 0.010455736 |
| }, |
| { |
| "epoch": 0.03177304964539007, |
| "grad_norm": 23.51676368713379, |
| "loss": 16.751, |
| "lr": 0.000222, |
| "step": 112, |
| "tokens_trained": 0.010645208 |
| }, |
| { |
| "epoch": 0.03234042553191489, |
| "grad_norm": 23.278276443481445, |
| "loss": 16.5997, |
| "lr": 0.00022600000000000002, |
| "step": 114, |
| "tokens_trained": 0.010838928 |
| }, |
| { |
| "epoch": 0.032907801418439714, |
| "grad_norm": 25.4830265045166, |
| "loss": 16.3416, |
| "lr": 0.00023, |
| "step": 116, |
| "tokens_trained": 0.011027792 |
| }, |
| { |
| "epoch": 0.03347517730496454, |
| "grad_norm": 29.442413330078125, |
| "loss": 16.24, |
| "lr": 0.00023400000000000002, |
| "step": 118, |
| "tokens_trained": 0.011217456 |
| }, |
| { |
| "epoch": 0.03404255319148936, |
| "grad_norm": 21.77578353881836, |
| "loss": 16.1922, |
| "lr": 0.00023799999999999998, |
| "step": 120, |
| "tokens_trained": 0.01140804 |
| }, |
| { |
| "epoch": 0.03460992907801418, |
| "grad_norm": 27.040719985961914, |
| "loss": 15.9059, |
| "lr": 0.000242, |
| "step": 122, |
| "tokens_trained": 0.011597816 |
| }, |
| { |
| "epoch": 0.035177304964539004, |
| "grad_norm": 24.74480628967285, |
| "loss": 15.7818, |
| "lr": 0.000246, |
| "step": 124, |
| "tokens_trained": 0.011785624 |
| }, |
| { |
| "epoch": 0.03546099290780142, |
| "eval_loss": 15.553059577941895, |
| "eval_runtime": 23.5485, |
| "step": 125, |
| "tokens_trained": 0.011880832 |
| }, |
| { |
| "epoch": 0.03574468085106383, |
| "grad_norm": 23.13482666015625, |
| "loss": 15.5739, |
| "lr": 0.00025, |
| "step": 126, |
| "tokens_trained": 0.011975976 |
| }, |
| { |
| "epoch": 0.03631205673758865, |
| "grad_norm": 22.8618106842041, |
| "loss": 15.4302, |
| "lr": 0.000254, |
| "step": 128, |
| "tokens_trained": 0.012166744 |
| }, |
| { |
| "epoch": 0.03687943262411347, |
| "grad_norm": 26.804859161376953, |
| "loss": 15.3623, |
| "lr": 0.00025800000000000004, |
| "step": 130, |
| "tokens_trained": 0.01235436 |
| }, |
| { |
| "epoch": 0.0374468085106383, |
| "grad_norm": 21.826601028442383, |
| "loss": 15.1465, |
| "lr": 0.000262, |
| "step": 132, |
| "tokens_trained": 0.012544976 |
| }, |
| { |
| "epoch": 0.03801418439716312, |
| "grad_norm": 39.447086334228516, |
| "loss": 15.0137, |
| "lr": 0.000266, |
| "step": 134, |
| "tokens_trained": 0.012736352 |
| }, |
| { |
| "epoch": 0.03858156028368794, |
| "grad_norm": 23.44275665283203, |
| "loss": 14.9355, |
| "lr": 0.00027, |
| "step": 136, |
| "tokens_trained": 0.012925008 |
| }, |
| { |
| "epoch": 0.03914893617021276, |
| "grad_norm": 21.631427764892578, |
| "loss": 14.6825, |
| "lr": 0.00027400000000000005, |
| "step": 138, |
| "tokens_trained": 0.013114672 |
| }, |
| { |
| "epoch": 0.03971631205673759, |
| "grad_norm": 23.674650192260742, |
| "loss": 14.5194, |
| "lr": 0.00027800000000000004, |
| "step": 140, |
| "tokens_trained": 0.013304016 |
| }, |
| { |
| "epoch": 0.04028368794326241, |
| "grad_norm": 23.974796295166016, |
| "loss": 14.4829, |
| "lr": 0.00028199999999999997, |
| "step": 142, |
| "tokens_trained": 0.013496696 |
| }, |
| { |
| "epoch": 0.04085106382978723, |
| "grad_norm": 26.112201690673828, |
| "loss": 14.3027, |
| "lr": 0.00028599999999999996, |
| "step": 144, |
| "tokens_trained": 0.013684816 |
| }, |
| { |
| "epoch": 0.04141843971631206, |
| "grad_norm": 20.67386817932129, |
| "loss": 14.1499, |
| "lr": 0.00029, |
| "step": 146, |
| "tokens_trained": 0.013874832 |
| }, |
| { |
| "epoch": 0.04198581560283688, |
| "grad_norm": 24.253408432006836, |
| "loss": 13.9378, |
| "lr": 0.000294, |
| "step": 148, |
| "tokens_trained": 0.014065056 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 35.716087341308594, |
| "loss": 14.0562, |
| "lr": 0.000298, |
| "step": 150, |
| "tokens_trained": 0.014256784 |
| }, |
| { |
| "epoch": 0.04312056737588652, |
| "grad_norm": 29.414331436157227, |
| "loss": 14.0462, |
| "lr": 0.000302, |
| "step": 152, |
| "tokens_trained": 0.014446312 |
| }, |
| { |
| "epoch": 0.04368794326241135, |
| "grad_norm": 30.687482833862305, |
| "loss": 13.7603, |
| "lr": 0.000306, |
| "step": 154, |
| "tokens_trained": 0.014639872 |
| }, |
| { |
| "epoch": 0.04425531914893617, |
| "grad_norm": 29.806455612182617, |
| "loss": 13.708, |
| "lr": 0.00031, |
| "step": 156, |
| "tokens_trained": 0.014831112 |
| }, |
| { |
| "epoch": 0.04482269503546099, |
| "grad_norm": 24.900897979736328, |
| "loss": 13.548, |
| "lr": 0.000314, |
| "step": 158, |
| "tokens_trained": 0.015021288 |
| }, |
| { |
| "epoch": 0.04539007092198582, |
| "grad_norm": 24.29252815246582, |
| "loss": 13.3119, |
| "lr": 0.00031800000000000003, |
| "step": 160, |
| "tokens_trained": 0.01521228 |
| }, |
| { |
| "epoch": 0.04595744680851064, |
| "grad_norm": 20.68342399597168, |
| "loss": 13.1829, |
| "lr": 0.000322, |
| "step": 162, |
| "tokens_trained": 0.015403688 |
| }, |
| { |
| "epoch": 0.04652482269503546, |
| "grad_norm": 20.822795867919922, |
| "loss": 12.9044, |
| "lr": 0.000326, |
| "step": 164, |
| "tokens_trained": 0.015593416 |
| }, |
| { |
| "epoch": 0.04709219858156028, |
| "grad_norm": 21.689916610717773, |
| "loss": 12.6862, |
| "lr": 0.00033, |
| "step": 166, |
| "tokens_trained": 0.015784408 |
| }, |
| { |
| "epoch": 0.04765957446808511, |
| "grad_norm": 17.873889923095703, |
| "loss": 12.5502, |
| "lr": 0.00033400000000000004, |
| "step": 168, |
| "tokens_trained": 0.0159744 |
| }, |
| { |
| "epoch": 0.04822695035460993, |
| "grad_norm": 18.951616287231445, |
| "loss": 12.308, |
| "lr": 0.00033800000000000003, |
| "step": 170, |
| "tokens_trained": 0.016163736 |
| }, |
| { |
| "epoch": 0.04879432624113475, |
| "grad_norm": 15.146363258361816, |
| "loss": 12.1558, |
| "lr": 0.000342, |
| "step": 172, |
| "tokens_trained": 0.016353832 |
| }, |
| { |
| "epoch": 0.04936170212765958, |
| "grad_norm": 18.336984634399414, |
| "loss": 12.0386, |
| "lr": 0.000346, |
| "step": 174, |
| "tokens_trained": 0.016545088 |
| }, |
| { |
| "epoch": 0.0499290780141844, |
| "grad_norm": 17.221126556396484, |
| "loss": 11.8791, |
| "lr": 0.00035, |
| "step": 176, |
| "tokens_trained": 0.016735704 |
| }, |
| { |
| "epoch": 0.05049645390070922, |
| "grad_norm": 19.362564086914062, |
| "loss": 11.7224, |
| "lr": 0.000354, |
| "step": 178, |
| "tokens_trained": 0.016927944 |
| }, |
| { |
| "epoch": 0.05106382978723404, |
| "grad_norm": 15.564507484436035, |
| "loss": 11.6448, |
| "lr": 0.000358, |
| "step": 180, |
| "tokens_trained": 0.017116096 |
| }, |
| { |
| "epoch": 0.05163120567375887, |
| "grad_norm": 20.711383819580078, |
| "loss": 11.4398, |
| "lr": 0.000362, |
| "step": 182, |
| "tokens_trained": 0.01730564 |
| }, |
| { |
| "epoch": 0.05219858156028369, |
| "grad_norm": 18.627403259277344, |
| "loss": 11.3377, |
| "lr": 0.000366, |
| "step": 184, |
| "tokens_trained": 0.017495864 |
| }, |
| { |
| "epoch": 0.05276595744680851, |
| "grad_norm": 15.00942325592041, |
| "loss": 11.1416, |
| "lr": 0.00037, |
| "step": 186, |
| "tokens_trained": 0.017686464 |
| }, |
| { |
| "epoch": 0.05333333333333334, |
| "grad_norm": 17.070598602294922, |
| "loss": 11.0148, |
| "lr": 0.000374, |
| "step": 188, |
| "tokens_trained": 0.017879488 |
| }, |
| { |
| "epoch": 0.05390070921985816, |
| "grad_norm": 16.101457595825195, |
| "loss": 10.8874, |
| "lr": 0.000378, |
| "step": 190, |
| "tokens_trained": 0.018068312 |
| }, |
| { |
| "epoch": 0.05446808510638298, |
| "grad_norm": 15.613334655761719, |
| "loss": 10.7055, |
| "lr": 0.000382, |
| "step": 192, |
| "tokens_trained": 0.018255752 |
| }, |
| { |
| "epoch": 0.0550354609929078, |
| "grad_norm": 17.671857833862305, |
| "loss": 10.5706, |
| "lr": 0.000386, |
| "step": 194, |
| "tokens_trained": 0.018447096 |
| }, |
| { |
| "epoch": 0.05560283687943263, |
| "grad_norm": 16.080909729003906, |
| "loss": 10.4476, |
| "lr": 0.00039000000000000005, |
| "step": 196, |
| "tokens_trained": 0.018637264 |
| }, |
| { |
| "epoch": 0.05617021276595745, |
| "grad_norm": 15.02849292755127, |
| "loss": 10.2962, |
| "lr": 0.00039400000000000004, |
| "step": 198, |
| "tokens_trained": 0.018827552 |
| }, |
| { |
| "epoch": 0.05673758865248227, |
| "grad_norm": 14.990167617797852, |
| "loss": 10.1912, |
| "lr": 0.000398, |
| "step": 200, |
| "tokens_trained": 0.019018 |
| }, |
| { |
| "epoch": 0.05730496453900709, |
| "grad_norm": 15.390633583068848, |
| "loss": 10.0442, |
| "lr": 0.000402, |
| "step": 202, |
| "tokens_trained": 0.019209864 |
| }, |
| { |
| "epoch": 0.05787234042553192, |
| "grad_norm": 16.871570587158203, |
| "loss": 9.9685, |
| "lr": 0.00040600000000000006, |
| "step": 204, |
| "tokens_trained": 0.019400176 |
| }, |
| { |
| "epoch": 0.05843971631205674, |
| "grad_norm": 20.16544532775879, |
| "loss": 9.8531, |
| "lr": 0.00041, |
| "step": 206, |
| "tokens_trained": 0.019589424 |
| }, |
| { |
| "epoch": 0.05900709219858156, |
| "grad_norm": 16.825023651123047, |
| "loss": 9.7777, |
| "lr": 0.000414, |
| "step": 208, |
| "tokens_trained": 0.019779112 |
| }, |
| { |
| "epoch": 0.059574468085106386, |
| "grad_norm": 16.43510627746582, |
| "loss": 9.6122, |
| "lr": 0.00041799999999999997, |
| "step": 210, |
| "tokens_trained": 0.019970048 |
| }, |
| { |
| "epoch": 0.060141843971631206, |
| "grad_norm": 17.340473175048828, |
| "loss": 9.4859, |
| "lr": 0.000422, |
| "step": 212, |
| "tokens_trained": 0.020160968 |
| }, |
| { |
| "epoch": 0.06070921985815603, |
| "grad_norm": 15.019119262695312, |
| "loss": 9.3656, |
| "lr": 0.000426, |
| "step": 214, |
| "tokens_trained": 0.020349664 |
| }, |
| { |
| "epoch": 0.06127659574468085, |
| "grad_norm": 13.379194259643555, |
| "loss": 9.2348, |
| "lr": 0.00043, |
| "step": 216, |
| "tokens_trained": 0.020538192 |
| }, |
| { |
| "epoch": 0.061843971631205676, |
| "grad_norm": 16.71472930908203, |
| "loss": 9.2258, |
| "lr": 0.00043400000000000003, |
| "step": 218, |
| "tokens_trained": 0.020728936 |
| }, |
| { |
| "epoch": 0.062411347517730496, |
| "grad_norm": 12.743139266967773, |
| "loss": 9.0569, |
| "lr": 0.000438, |
| "step": 220, |
| "tokens_trained": 0.020917472 |
| }, |
| { |
| "epoch": 0.06297872340425532, |
| "grad_norm": 15.739934921264648, |
| "loss": 8.9623, |
| "lr": 0.000442, |
| "step": 222, |
| "tokens_trained": 0.02110928 |
| }, |
| { |
| "epoch": 0.06354609929078014, |
| "grad_norm": 14.23620891571045, |
| "loss": 8.8201, |
| "lr": 0.000446, |
| "step": 224, |
| "tokens_trained": 0.021300168 |
| }, |
| { |
| "epoch": 0.06411347517730497, |
| "grad_norm": 13.005538940429688, |
| "loss": 8.7235, |
| "lr": 0.00045000000000000004, |
| "step": 226, |
| "tokens_trained": 0.021490272 |
| }, |
| { |
| "epoch": 0.06468085106382979, |
| "grad_norm": 17.17629051208496, |
| "loss": 8.6907, |
| "lr": 0.00045400000000000003, |
| "step": 228, |
| "tokens_trained": 0.021681552 |
| }, |
| { |
| "epoch": 0.06524822695035461, |
| "grad_norm": 14.430739402770996, |
| "loss": 8.6196, |
| "lr": 0.000458, |
| "step": 230, |
| "tokens_trained": 0.02187236 |
| }, |
| { |
| "epoch": 0.06581560283687943, |
| "grad_norm": 14.575714111328125, |
| "loss": 8.4741, |
| "lr": 0.000462, |
| "step": 232, |
| "tokens_trained": 0.022061976 |
| }, |
| { |
| "epoch": 0.06638297872340425, |
| "grad_norm": 13.892754554748535, |
| "loss": 8.4118, |
| "lr": 0.00046600000000000005, |
| "step": 234, |
| "tokens_trained": 0.022252008 |
| }, |
| { |
| "epoch": 0.06695035460992908, |
| "grad_norm": 11.58240795135498, |
| "loss": 8.2781, |
| "lr": 0.00047, |
| "step": 236, |
| "tokens_trained": 0.02244284 |
| }, |
| { |
| "epoch": 0.0675177304964539, |
| "grad_norm": 13.022644996643066, |
| "loss": 8.2139, |
| "lr": 0.000474, |
| "step": 238, |
| "tokens_trained": 0.022631152 |
| }, |
| { |
| "epoch": 0.06808510638297872, |
| "grad_norm": 11.844677925109863, |
| "loss": 8.1134, |
| "lr": 0.00047799999999999996, |
| "step": 240, |
| "tokens_trained": 0.022821096 |
| }, |
| { |
| "epoch": 0.06865248226950355, |
| "grad_norm": 13.878067016601562, |
| "loss": 8.0221, |
| "lr": 0.000482, |
| "step": 242, |
| "tokens_trained": 0.023011656 |
| }, |
| { |
| "epoch": 0.06921985815602837, |
| "grad_norm": 12.34648323059082, |
| "loss": 7.9755, |
| "lr": 0.000486, |
| "step": 244, |
| "tokens_trained": 0.023201 |
| }, |
| { |
| "epoch": 0.06978723404255319, |
| "grad_norm": 14.238297462463379, |
| "loss": 7.8969, |
| "lr": 0.00049, |
| "step": 246, |
| "tokens_trained": 0.023391128 |
| }, |
| { |
| "epoch": 0.07035460992907801, |
| "grad_norm": 14.386019706726074, |
| "loss": 7.8627, |
| "lr": 0.000494, |
| "step": 248, |
| "tokens_trained": 0.023581768 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "grad_norm": 13.623086929321289, |
| "loss": 7.7568, |
| "lr": 0.000498, |
| "step": 250, |
| "tokens_trained": 0.023771248 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "eval_loss": 7.70297384262085, |
| "eval_runtime": 21.3853, |
| "step": 250, |
| "tokens_trained": 0.023771248 |
| }, |
| { |
| "epoch": 0.07148936170212766, |
| "grad_norm": 14.347646713256836, |
| "loss": 7.6842, |
| "lr": 0.0005020000000000001, |
| "step": 252, |
| "tokens_trained": 0.023961056 |
| }, |
| { |
| "epoch": 0.07205673758865248, |
| "grad_norm": 12.5592041015625, |
| "loss": 7.6516, |
| "lr": 0.000506, |
| "step": 254, |
| "tokens_trained": 0.024150968 |
| }, |
| { |
| "epoch": 0.0726241134751773, |
| "grad_norm": 13.219141960144043, |
| "loss": 7.5789, |
| "lr": 0.00051, |
| "step": 256, |
| "tokens_trained": 0.024340072 |
| }, |
| { |
| "epoch": 0.07319148936170213, |
| "grad_norm": 12.654081344604492, |
| "loss": 7.5369, |
| "lr": 0.000514, |
| "step": 258, |
| "tokens_trained": 0.024529296 |
| }, |
| { |
| "epoch": 0.07375886524822695, |
| "grad_norm": 13.136971473693848, |
| "loss": 7.4949, |
| "lr": 0.000518, |
| "step": 260, |
| "tokens_trained": 0.024719688 |
| }, |
| { |
| "epoch": 0.07432624113475177, |
| "grad_norm": 12.680288314819336, |
| "loss": 7.3904, |
| "lr": 0.000522, |
| "step": 262, |
| "tokens_trained": 0.024909632 |
| }, |
| { |
| "epoch": 0.0748936170212766, |
| "grad_norm": 12.754518508911133, |
| "loss": 7.3514, |
| "lr": 0.000526, |
| "step": 264, |
| "tokens_trained": 0.025098416 |
| }, |
| { |
| "epoch": 0.07546099290780142, |
| "grad_norm": 13.22311019897461, |
| "loss": 7.2951, |
| "lr": 0.0005300000000000001, |
| "step": 266, |
| "tokens_trained": 0.025287344 |
| }, |
| { |
| "epoch": 0.07602836879432624, |
| "grad_norm": 12.11903190612793, |
| "loss": 7.2229, |
| "lr": 0.0005340000000000001, |
| "step": 268, |
| "tokens_trained": 0.025477152 |
| }, |
| { |
| "epoch": 0.07659574468085106, |
| "grad_norm": 13.771833419799805, |
| "loss": 7.1815, |
| "lr": 0.0005380000000000001, |
| "step": 270, |
| "tokens_trained": 0.025668288 |
| }, |
| { |
| "epoch": 0.07716312056737588, |
| "grad_norm": 11.756864547729492, |
| "loss": 7.1669, |
| "lr": 0.0005420000000000001, |
| "step": 272, |
| "tokens_trained": 0.025858528 |
| }, |
| { |
| "epoch": 0.0777304964539007, |
| "grad_norm": 13.613094329833984, |
| "loss": 7.1079, |
| "lr": 0.000546, |
| "step": 274, |
| "tokens_trained": 0.026048616 |
| }, |
| { |
| "epoch": 0.07829787234042553, |
| "grad_norm": 10.001923561096191, |
| "loss": 7.0508, |
| "lr": 0.00055, |
| "step": 276, |
| "tokens_trained": 0.026236944 |
| }, |
| { |
| "epoch": 0.07886524822695036, |
| "grad_norm": 14.262083053588867, |
| "loss": 6.9955, |
| "lr": 0.000554, |
| "step": 278, |
| "tokens_trained": 0.026426848 |
| }, |
| { |
| "epoch": 0.07943262411347518, |
| "grad_norm": 12.381136894226074, |
| "loss": 6.9831, |
| "lr": 0.000558, |
| "step": 280, |
| "tokens_trained": 0.026616784 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 9.815845489501953, |
| "loss": 6.917, |
| "lr": 0.0005620000000000001, |
| "step": 282, |
| "tokens_trained": 0.026805176 |
| }, |
| { |
| "epoch": 0.08056737588652482, |
| "grad_norm": 11.669997215270996, |
| "loss": 6.8999, |
| "lr": 0.000566, |
| "step": 284, |
| "tokens_trained": 0.02699488 |
| }, |
| { |
| "epoch": 0.08113475177304964, |
| "grad_norm": 12.770941734313965, |
| "loss": 6.8998, |
| "lr": 0.00057, |
| "step": 286, |
| "tokens_trained": 0.027185784 |
| }, |
| { |
| "epoch": 0.08170212765957446, |
| "grad_norm": 15.572457313537598, |
| "loss": 6.841, |
| "lr": 0.000574, |
| "step": 288, |
| "tokens_trained": 0.027375896 |
| }, |
| { |
| "epoch": 0.08226950354609928, |
| "grad_norm": 10.980833053588867, |
| "loss": 6.8545, |
| "lr": 0.000578, |
| "step": 290, |
| "tokens_trained": 0.02756588 |
| }, |
| { |
| "epoch": 0.08283687943262412, |
| "grad_norm": 11.678337097167969, |
| "loss": 6.7853, |
| "lr": 0.0005819999999999999, |
| "step": 292, |
| "tokens_trained": 0.02775456 |
| }, |
| { |
| "epoch": 0.08340425531914894, |
| "grad_norm": 9.77885913848877, |
| "loss": 6.7465, |
| "lr": 0.0005859999999999999, |
| "step": 294, |
| "tokens_trained": 0.027942856 |
| }, |
| { |
| "epoch": 0.08397163120567376, |
| "grad_norm": 13.62730884552002, |
| "loss": 6.7276, |
| "lr": 0.00059, |
| "step": 296, |
| "tokens_trained": 0.028133152 |
| }, |
| { |
| "epoch": 0.08453900709219858, |
| "grad_norm": 10.644404411315918, |
| "loss": 6.6802, |
| "lr": 0.000594, |
| "step": 298, |
| "tokens_trained": 0.028322192 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 11.130610466003418, |
| "loss": 6.6548, |
| "lr": 0.000598, |
| "step": 300, |
| "tokens_trained": 0.0285122 |
| }, |
| { |
| "epoch": 0.08567375886524822, |
| "grad_norm": 11.557455062866211, |
| "loss": 6.6155, |
| "lr": 0.000602, |
| "step": 302, |
| "tokens_trained": 0.028699792 |
| }, |
| { |
| "epoch": 0.08624113475177304, |
| "grad_norm": 9.276884078979492, |
| "loss": 6.5989, |
| "lr": 0.000606, |
| "step": 304, |
| "tokens_trained": 0.028889896 |
| }, |
| { |
| "epoch": 0.08680851063829788, |
| "grad_norm": 9.616179466247559, |
| "loss": 6.5773, |
| "lr": 0.00061, |
| "step": 306, |
| "tokens_trained": 0.029082272 |
| }, |
| { |
| "epoch": 0.0873758865248227, |
| "grad_norm": 10.575953483581543, |
| "loss": 6.5358, |
| "lr": 0.000614, |
| "step": 308, |
| "tokens_trained": 0.029273352 |
| }, |
| { |
| "epoch": 0.08794326241134752, |
| "grad_norm": 9.089850425720215, |
| "loss": 6.5088, |
| "lr": 0.0006180000000000001, |
| "step": 310, |
| "tokens_trained": 0.029463848 |
| }, |
| { |
| "epoch": 0.08851063829787234, |
| "grad_norm": 9.090002059936523, |
| "loss": 6.4849, |
| "lr": 0.000622, |
| "step": 312, |
| "tokens_trained": 0.029653272 |
| }, |
| { |
| "epoch": 0.08907801418439716, |
| "grad_norm": 12.038308143615723, |
| "loss": 6.4624, |
| "lr": 0.000626, |
| "step": 314, |
| "tokens_trained": 0.029841928 |
| }, |
| { |
| "epoch": 0.08964539007092198, |
| "grad_norm": 9.073866844177246, |
| "loss": 6.4515, |
| "lr": 0.00063, |
| "step": 316, |
| "tokens_trained": 0.030029808 |
| }, |
| { |
| "epoch": 0.0902127659574468, |
| "grad_norm": 8.727197647094727, |
| "loss": 6.43, |
| "lr": 0.000634, |
| "step": 318, |
| "tokens_trained": 0.030221288 |
| }, |
| { |
| "epoch": 0.09078014184397164, |
| "grad_norm": 14.558151245117188, |
| "loss": 6.4487, |
| "lr": 0.000638, |
| "step": 320, |
| "tokens_trained": 0.030410872 |
| }, |
| { |
| "epoch": 0.09134751773049646, |
| "grad_norm": 9.98914623260498, |
| "loss": 6.4279, |
| "lr": 0.000642, |
| "step": 322, |
| "tokens_trained": 0.030602376 |
| }, |
| { |
| "epoch": 0.09191489361702128, |
| "grad_norm": 10.395442962646484, |
| "loss": 6.4311, |
| "lr": 0.000646, |
| "step": 324, |
| "tokens_trained": 0.030792968 |
| }, |
| { |
| "epoch": 0.0924822695035461, |
| "grad_norm": 10.8250093460083, |
| "loss": 6.3726, |
| "lr": 0.0006500000000000001, |
| "step": 326, |
| "tokens_trained": 0.030982944 |
| }, |
| { |
| "epoch": 0.09304964539007092, |
| "grad_norm": 9.73416805267334, |
| "loss": 6.34, |
| "lr": 0.0006540000000000001, |
| "step": 328, |
| "tokens_trained": 0.031174928 |
| }, |
| { |
| "epoch": 0.09361702127659574, |
| "grad_norm": 8.596503257751465, |
| "loss": 6.3322, |
| "lr": 0.0006580000000000001, |
| "step": 330, |
| "tokens_trained": 0.031364288 |
| }, |
| { |
| "epoch": 0.09418439716312056, |
| "grad_norm": 8.49472427368164, |
| "loss": 6.3096, |
| "lr": 0.000662, |
| "step": 332, |
| "tokens_trained": 0.03155376 |
| }, |
| { |
| "epoch": 0.0947517730496454, |
| "grad_norm": 7.857503414154053, |
| "loss": 6.2368, |
| "lr": 0.000666, |
| "step": 334, |
| "tokens_trained": 0.031744368 |
| }, |
| { |
| "epoch": 0.09531914893617022, |
| "grad_norm": 9.007513999938965, |
| "loss": 6.198, |
| "lr": 0.00067, |
| "step": 336, |
| "tokens_trained": 0.031934136 |
| }, |
| { |
| "epoch": 0.09588652482269504, |
| "grad_norm": 8.185524940490723, |
| "loss": 6.2328, |
| "lr": 0.000674, |
| "step": 338, |
| "tokens_trained": 0.032124984 |
| }, |
| { |
| "epoch": 0.09645390070921986, |
| "grad_norm": 8.784396171569824, |
| "loss": 6.1945, |
| "lr": 0.0006780000000000001, |
| "step": 340, |
| "tokens_trained": 0.032316016 |
| }, |
| { |
| "epoch": 0.09702127659574468, |
| "grad_norm": 8.642311096191406, |
| "loss": 6.218, |
| "lr": 0.0006820000000000001, |
| "step": 342, |
| "tokens_trained": 0.032506224 |
| }, |
| { |
| "epoch": 0.0975886524822695, |
| "grad_norm": 8.493780136108398, |
| "loss": 6.194, |
| "lr": 0.0006860000000000001, |
| "step": 344, |
| "tokens_trained": 0.032696152 |
| }, |
| { |
| "epoch": 0.09815602836879432, |
| "grad_norm": 9.120508193969727, |
| "loss": 6.2241, |
| "lr": 0.00069, |
| "step": 346, |
| "tokens_trained": 0.032885688 |
| }, |
| { |
| "epoch": 0.09872340425531916, |
| "grad_norm": 9.34500503540039, |
| "loss": 6.1548, |
| "lr": 0.000694, |
| "step": 348, |
| "tokens_trained": 0.03307568 |
| }, |
| { |
| "epoch": 0.09929078014184398, |
| "grad_norm": 7.483356952667236, |
| "loss": 6.1282, |
| "lr": 0.0006979999999999999, |
| "step": 350, |
| "tokens_trained": 0.033267208 |
| }, |
| { |
| "epoch": 0.0998581560283688, |
| "grad_norm": 7.974069118499756, |
| "loss": 6.1032, |
| "lr": 0.0007019999999999999, |
| "step": 352, |
| "tokens_trained": 0.033458144 |
| }, |
| { |
| "epoch": 0.10042553191489362, |
| "grad_norm": 8.247384071350098, |
| "loss": 6.1698, |
| "lr": 0.0007059999999999999, |
| "step": 354, |
| "tokens_trained": 0.033650352 |
| }, |
| { |
| "epoch": 0.10099290780141844, |
| "grad_norm": 8.554885864257812, |
| "loss": 6.1429, |
| "lr": 0.00071, |
| "step": 356, |
| "tokens_trained": 0.033840232 |
| }, |
| { |
| "epoch": 0.10156028368794326, |
| "grad_norm": 7.209281921386719, |
| "loss": 6.0997, |
| "lr": 0.000714, |
| "step": 358, |
| "tokens_trained": 0.034030032 |
| }, |
| { |
| "epoch": 0.10212765957446808, |
| "grad_norm": 8.660383224487305, |
| "loss": 6.1497, |
| "lr": 0.000718, |
| "step": 360, |
| "tokens_trained": 0.034218592 |
| }, |
| { |
| "epoch": 0.10269503546099291, |
| "grad_norm": 9.382761001586914, |
| "loss": 6.0665, |
| "lr": 0.000722, |
| "step": 362, |
| "tokens_trained": 0.034408408 |
| }, |
| { |
| "epoch": 0.10326241134751774, |
| "grad_norm": 6.915714263916016, |
| "loss": 6.0636, |
| "lr": 0.000726, |
| "step": 364, |
| "tokens_trained": 0.034600016 |
| }, |
| { |
| "epoch": 0.10382978723404256, |
| "grad_norm": 7.8990631103515625, |
| "loss": 6.0975, |
| "lr": 0.00073, |
| "step": 366, |
| "tokens_trained": 0.034790792 |
| }, |
| { |
| "epoch": 0.10439716312056738, |
| "grad_norm": 8.859809875488281, |
| "loss": 6.0754, |
| "lr": 0.000734, |
| "step": 368, |
| "tokens_trained": 0.034981304 |
| }, |
| { |
| "epoch": 0.1049645390070922, |
| "grad_norm": 7.392801761627197, |
| "loss": 6.039, |
| "lr": 0.000738, |
| "step": 370, |
| "tokens_trained": 0.03516956 |
| }, |
| { |
| "epoch": 0.10553191489361702, |
| "grad_norm": 9.427324295043945, |
| "loss": 6.084, |
| "lr": 0.000742, |
| "step": 372, |
| "tokens_trained": 0.035358816 |
| }, |
| { |
| "epoch": 0.10609929078014184, |
| "grad_norm": 7.168910503387451, |
| "loss": 6.0498, |
| "lr": 0.000746, |
| "step": 374, |
| "tokens_trained": 0.035548016 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "eval_loss": 6.038269996643066, |
| "eval_runtime": 21.3445, |
| "step": 375, |
| "tokens_trained": 0.035644104 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 7.899259567260742, |
| "loss": 6.0345, |
| "lr": 0.00075, |
| "step": 376, |
| "tokens_trained": 0.035739856 |
| }, |
| { |
| "epoch": 0.1072340425531915, |
| "grad_norm": 8.91533374786377, |
| "loss": 6.0386, |
| "lr": 0.000754, |
| "step": 378, |
| "tokens_trained": 0.035930264 |
| }, |
| { |
| "epoch": 0.10780141843971631, |
| "grad_norm": 6.998043060302734, |
| "loss": 6.0294, |
| "lr": 0.000758, |
| "step": 380, |
| "tokens_trained": 0.036119616 |
| }, |
| { |
| "epoch": 0.10836879432624114, |
| "grad_norm": 7.343894958496094, |
| "loss": 6.0116, |
| "lr": 0.000762, |
| "step": 382, |
| "tokens_trained": 0.036308416 |
| }, |
| { |
| "epoch": 0.10893617021276596, |
| "grad_norm": 8.182528495788574, |
| "loss": 5.9904, |
| "lr": 0.0007660000000000001, |
| "step": 384, |
| "tokens_trained": 0.036497264 |
| }, |
| { |
| "epoch": 0.10950354609929078, |
| "grad_norm": 7.927818775177002, |
| "loss": 6.0345, |
| "lr": 0.0007700000000000001, |
| "step": 386, |
| "tokens_trained": 0.036688192 |
| }, |
| { |
| "epoch": 0.1100709219858156, |
| "grad_norm": 8.07447338104248, |
| "loss": 5.9685, |
| "lr": 0.0007740000000000001, |
| "step": 388, |
| "tokens_trained": 0.036878256 |
| }, |
| { |
| "epoch": 0.11063829787234042, |
| "grad_norm": 7.281871318817139, |
| "loss": 6.0125, |
| "lr": 0.000778, |
| "step": 390, |
| "tokens_trained": 0.037068272 |
| }, |
| { |
| "epoch": 0.11120567375886525, |
| "grad_norm": 8.298929214477539, |
| "loss": 6.0071, |
| "lr": 0.000782, |
| "step": 392, |
| "tokens_trained": 0.037259464 |
| }, |
| { |
| "epoch": 0.11177304964539007, |
| "grad_norm": 7.546716690063477, |
| "loss": 5.9721, |
| "lr": 0.000786, |
| "step": 394, |
| "tokens_trained": 0.037449696 |
| }, |
| { |
| "epoch": 0.1123404255319149, |
| "grad_norm": 8.28548526763916, |
| "loss": 5.9819, |
| "lr": 0.00079, |
| "step": 396, |
| "tokens_trained": 0.037639672 |
| }, |
| { |
| "epoch": 0.11290780141843972, |
| "grad_norm": 7.064655303955078, |
| "loss": 5.9873, |
| "lr": 0.0007940000000000001, |
| "step": 398, |
| "tokens_trained": 0.03782712 |
| }, |
| { |
| "epoch": 0.11347517730496454, |
| "grad_norm": 7.743175506591797, |
| "loss": 5.9528, |
| "lr": 0.0007980000000000001, |
| "step": 400, |
| "tokens_trained": 0.03801792 |
| }, |
| { |
| "epoch": 0.11404255319148936, |
| "grad_norm": 7.00898551940918, |
| "loss": 5.9504, |
| "lr": 0.0008020000000000001, |
| "step": 402, |
| "tokens_trained": 0.038209176 |
| }, |
| { |
| "epoch": 0.11460992907801418, |
| "grad_norm": 7.9350409507751465, |
| "loss": 5.9555, |
| "lr": 0.0008060000000000001, |
| "step": 404, |
| "tokens_trained": 0.03839824 |
| }, |
| { |
| "epoch": 0.11517730496453901, |
| "grad_norm": 7.048569679260254, |
| "loss": 5.9787, |
| "lr": 0.0008100000000000001, |
| "step": 406, |
| "tokens_trained": 0.03858732 |
| }, |
| { |
| "epoch": 0.11574468085106383, |
| "grad_norm": 7.088194370269775, |
| "loss": 5.928, |
| "lr": 0.0008139999999999999, |
| "step": 408, |
| "tokens_trained": 0.038777712 |
| }, |
| { |
| "epoch": 0.11631205673758865, |
| "grad_norm": 8.230712890625, |
| "loss": 5.9716, |
| "lr": 0.0008179999999999999, |
| "step": 410, |
| "tokens_trained": 0.038969464 |
| }, |
| { |
| "epoch": 0.11687943262411347, |
| "grad_norm": 8.076972007751465, |
| "loss": 5.9624, |
| "lr": 0.0008219999999999999, |
| "step": 412, |
| "tokens_trained": 0.039162064 |
| }, |
| { |
| "epoch": 0.1174468085106383, |
| "grad_norm": 8.065289497375488, |
| "loss": 5.9937, |
| "lr": 0.000826, |
| "step": 414, |
| "tokens_trained": 0.039348688 |
| }, |
| { |
| "epoch": 0.11801418439716312, |
| "grad_norm": 6.393420696258545, |
| "loss": 5.9278, |
| "lr": 0.00083, |
| "step": 416, |
| "tokens_trained": 0.03953732 |
| }, |
| { |
| "epoch": 0.11858156028368794, |
| "grad_norm": 7.384702682495117, |
| "loss": 5.931, |
| "lr": 0.000834, |
| "step": 418, |
| "tokens_trained": 0.039729808 |
| }, |
| { |
| "epoch": 0.11914893617021277, |
| "grad_norm": 7.007425308227539, |
| "loss": 5.93, |
| "lr": 0.000838, |
| "step": 420, |
| "tokens_trained": 0.039921096 |
| }, |
| { |
| "epoch": 0.11971631205673759, |
| "grad_norm": 7.112692832946777, |
| "loss": 5.9625, |
| "lr": 0.000842, |
| "step": 422, |
| "tokens_trained": 0.040110856 |
| }, |
| { |
| "epoch": 0.12028368794326241, |
| "grad_norm": 8.484418869018555, |
| "loss": 5.9848, |
| "lr": 0.000846, |
| "step": 424, |
| "tokens_trained": 0.040300504 |
| }, |
| { |
| "epoch": 0.12085106382978723, |
| "grad_norm": 6.633459091186523, |
| "loss": 6.0226, |
| "lr": 0.00085, |
| "step": 426, |
| "tokens_trained": 0.04049056 |
| }, |
| { |
| "epoch": 0.12141843971631205, |
| "grad_norm": 7.796964168548584, |
| "loss": 5.9152, |
| "lr": 0.000854, |
| "step": 428, |
| "tokens_trained": 0.040680544 |
| }, |
| { |
| "epoch": 0.12198581560283688, |
| "grad_norm": 7.833578586578369, |
| "loss": 5.924, |
| "lr": 0.000858, |
| "step": 430, |
| "tokens_trained": 0.040873128 |
| }, |
| { |
| "epoch": 0.1225531914893617, |
| "grad_norm": 6.7470550537109375, |
| "loss": 5.9318, |
| "lr": 0.000862, |
| "step": 432, |
| "tokens_trained": 0.041063488 |
| }, |
| { |
| "epoch": 0.12312056737588653, |
| "grad_norm": 6.066318988800049, |
| "loss": 5.9569, |
| "lr": 0.000866, |
| "step": 434, |
| "tokens_trained": 0.041254368 |
| }, |
| { |
| "epoch": 0.12368794326241135, |
| "grad_norm": 6.753541469573975, |
| "loss": 5.8851, |
| "lr": 0.00087, |
| "step": 436, |
| "tokens_trained": 0.04144516 |
| }, |
| { |
| "epoch": 0.12425531914893617, |
| "grad_norm": 6.471331596374512, |
| "loss": 5.864, |
| "lr": 0.000874, |
| "step": 438, |
| "tokens_trained": 0.041636912 |
| }, |
| { |
| "epoch": 0.12482269503546099, |
| "grad_norm": 6.129056930541992, |
| "loss": 5.8965, |
| "lr": 0.000878, |
| "step": 440, |
| "tokens_trained": 0.041828104 |
| }, |
| { |
| "epoch": 0.1253900709219858, |
| "grad_norm": 6.478890895843506, |
| "loss": 5.8817, |
| "lr": 0.000882, |
| "step": 442, |
| "tokens_trained": 0.04201808 |
| }, |
| { |
| "epoch": 0.12595744680851065, |
| "grad_norm": 6.014713287353516, |
| "loss": 5.8268, |
| "lr": 0.0008860000000000001, |
| "step": 444, |
| "tokens_trained": 0.042207328 |
| }, |
| { |
| "epoch": 0.12652482269503545, |
| "grad_norm": 5.505755424499512, |
| "loss": 5.8684, |
| "lr": 0.0008900000000000001, |
| "step": 446, |
| "tokens_trained": 0.042398152 |
| }, |
| { |
| "epoch": 0.1270921985815603, |
| "grad_norm": 10.096606254577637, |
| "loss": 5.8608, |
| "lr": 0.000894, |
| "step": 448, |
| "tokens_trained": 0.042588984 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 6.388499736785889, |
| "loss": 5.8766, |
| "lr": 0.000898, |
| "step": 450, |
| "tokens_trained": 0.042778592 |
| }, |
| { |
| "epoch": 0.12822695035460993, |
| "grad_norm": 7.145125865936279, |
| "loss": 5.8571, |
| "lr": 0.000902, |
| "step": 452, |
| "tokens_trained": 0.042967176 |
| }, |
| { |
| "epoch": 0.12879432624113477, |
| "grad_norm": 6.826383113861084, |
| "loss": 5.8655, |
| "lr": 0.000906, |
| "step": 454, |
| "tokens_trained": 0.043158952 |
| }, |
| { |
| "epoch": 0.12936170212765957, |
| "grad_norm": 6.036892414093018, |
| "loss": 5.8775, |
| "lr": 0.00091, |
| "step": 456, |
| "tokens_trained": 0.043349288 |
| }, |
| { |
| "epoch": 0.1299290780141844, |
| "grad_norm": 6.36528205871582, |
| "loss": 5.8908, |
| "lr": 0.0009140000000000001, |
| "step": 458, |
| "tokens_trained": 0.043539888 |
| }, |
| { |
| "epoch": 0.13049645390070921, |
| "grad_norm": 6.317558288574219, |
| "loss": 5.8702, |
| "lr": 0.0009180000000000001, |
| "step": 460, |
| "tokens_trained": 0.04373232 |
| }, |
| { |
| "epoch": 0.13106382978723405, |
| "grad_norm": 6.427131175994873, |
| "loss": 5.8399, |
| "lr": 0.0009220000000000001, |
| "step": 462, |
| "tokens_trained": 0.043922744 |
| }, |
| { |
| "epoch": 0.13163120567375886, |
| "grad_norm": 5.666539669036865, |
| "loss": 5.7899, |
| "lr": 0.0009260000000000001, |
| "step": 464, |
| "tokens_trained": 0.044112888 |
| }, |
| { |
| "epoch": 0.1321985815602837, |
| "grad_norm": 5.241824150085449, |
| "loss": 5.8203, |
| "lr": 0.00093, |
| "step": 466, |
| "tokens_trained": 0.04430244 |
| }, |
| { |
| "epoch": 0.1327659574468085, |
| "grad_norm": 6.072646141052246, |
| "loss": 5.8367, |
| "lr": 0.000934, |
| "step": 468, |
| "tokens_trained": 0.044493528 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 6.414418697357178, |
| "loss": 5.8236, |
| "lr": 0.0009379999999999999, |
| "step": 470, |
| "tokens_trained": 0.044682328 |
| }, |
| { |
| "epoch": 0.13390070921985817, |
| "grad_norm": 6.958801746368408, |
| "loss": 5.8179, |
| "lr": 0.000942, |
| "step": 472, |
| "tokens_trained": 0.044874256 |
| }, |
| { |
| "epoch": 0.13446808510638297, |
| "grad_norm": 5.787843227386475, |
| "loss": 5.8478, |
| "lr": 0.000946, |
| "step": 474, |
| "tokens_trained": 0.045065616 |
| }, |
| { |
| "epoch": 0.1350354609929078, |
| "grad_norm": 5.5841240882873535, |
| "loss": 5.8307, |
| "lr": 0.00095, |
| "step": 476, |
| "tokens_trained": 0.045257024 |
| }, |
| { |
| "epoch": 0.13560283687943261, |
| "grad_norm": 6.607712745666504, |
| "loss": 5.8512, |
| "lr": 0.000954, |
| "step": 478, |
| "tokens_trained": 0.045446432 |
| }, |
| { |
| "epoch": 0.13617021276595745, |
| "grad_norm": 5.473597049713135, |
| "loss": 5.8174, |
| "lr": 0.000958, |
| "step": 480, |
| "tokens_trained": 0.045636392 |
| }, |
| { |
| "epoch": 0.13673758865248226, |
| "grad_norm": 5.435728549957275, |
| "loss": 5.8308, |
| "lr": 0.000962, |
| "step": 482, |
| "tokens_trained": 0.045823784 |
| }, |
| { |
| "epoch": 0.1373049645390071, |
| "grad_norm": 6.049300670623779, |
| "loss": 5.8293, |
| "lr": 0.000966, |
| "step": 484, |
| "tokens_trained": 0.046013408 |
| }, |
| { |
| "epoch": 0.13787234042553193, |
| "grad_norm": 6.311764717102051, |
| "loss": 5.8086, |
| "lr": 0.0009699999999999999, |
| "step": 486, |
| "tokens_trained": 0.046202528 |
| }, |
| { |
| "epoch": 0.13843971631205673, |
| "grad_norm": 5.886009216308594, |
| "loss": 5.7986, |
| "lr": 0.000974, |
| "step": 488, |
| "tokens_trained": 0.04639404 |
| }, |
| { |
| "epoch": 0.13900709219858157, |
| "grad_norm": 5.438202381134033, |
| "loss": 5.8473, |
| "lr": 0.000978, |
| "step": 490, |
| "tokens_trained": 0.046586512 |
| }, |
| { |
| "epoch": 0.13957446808510637, |
| "grad_norm": 5.08393669128418, |
| "loss": 5.7613, |
| "lr": 0.000982, |
| "step": 492, |
| "tokens_trained": 0.046777448 |
| }, |
| { |
| "epoch": 0.1401418439716312, |
| "grad_norm": 5.645389080047607, |
| "loss": 5.7723, |
| "lr": 0.0009860000000000001, |
| "step": 494, |
| "tokens_trained": 0.046966096 |
| }, |
| { |
| "epoch": 0.14070921985815601, |
| "grad_norm": 6.320916652679443, |
| "loss": 5.7772, |
| "lr": 0.00099, |
| "step": 496, |
| "tokens_trained": 0.047155152 |
| }, |
| { |
| "epoch": 0.14127659574468085, |
| "grad_norm": 5.573540210723877, |
| "loss": 5.7412, |
| "lr": 0.000994, |
| "step": 498, |
| "tokens_trained": 0.047345352 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "grad_norm": 4.939594745635986, |
| "loss": 5.8208, |
| "lr": 0.000998, |
| "step": 500, |
| "tokens_trained": 0.047535016 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "eval_loss": 5.799490928649902, |
| "eval_runtime": 20.8575, |
| "step": 500, |
| "tokens_trained": 0.047535016 |
| }, |
| { |
| "epoch": 0.1424113475177305, |
| "grad_norm": 5.805343151092529, |
| "loss": 5.7734, |
| "lr": 0.00099986013986014, |
| "step": 502, |
| "tokens_trained": 0.047724216 |
| }, |
| { |
| "epoch": 0.14297872340425533, |
| "grad_norm": 5.831176280975342, |
| "loss": 5.8044, |
| "lr": 0.0009995804195804196, |
| "step": 504, |
| "tokens_trained": 0.047914328 |
| }, |
| { |
| "epoch": 0.14354609929078013, |
| "grad_norm": 5.045091152191162, |
| "loss": 5.8133, |
| "lr": 0.0009993006993006994, |
| "step": 506, |
| "tokens_trained": 0.048105032 |
| }, |
| { |
| "epoch": 0.14411347517730497, |
| "grad_norm": 5.276819705963135, |
| "loss": 5.7555, |
| "lr": 0.000999020979020979, |
| "step": 508, |
| "tokens_trained": 0.048293104 |
| }, |
| { |
| "epoch": 0.14468085106382977, |
| "grad_norm": 5.710324287414551, |
| "loss": 5.7619, |
| "lr": 0.0009987412587412587, |
| "step": 510, |
| "tokens_trained": 0.048483888 |
| }, |
| { |
| "epoch": 0.1452482269503546, |
| "grad_norm": 4.9472527503967285, |
| "loss": 5.767, |
| "lr": 0.0009984615384615386, |
| "step": 512, |
| "tokens_trained": 0.04867336 |
| }, |
| { |
| "epoch": 0.14581560283687944, |
| "grad_norm": 5.410078525543213, |
| "loss": 5.7238, |
| "lr": 0.0009981818181818182, |
| "step": 514, |
| "tokens_trained": 0.048863104 |
| }, |
| { |
| "epoch": 0.14638297872340425, |
| "grad_norm": 6.025843143463135, |
| "loss": 5.7664, |
| "lr": 0.000997902097902098, |
| "step": 516, |
| "tokens_trained": 0.049053856 |
| }, |
| { |
| "epoch": 0.14695035460992908, |
| "grad_norm": 5.3211669921875, |
| "loss": 5.747, |
| "lr": 0.0009976223776223777, |
| "step": 518, |
| "tokens_trained": 0.049245104 |
| }, |
| { |
| "epoch": 0.1475177304964539, |
| "grad_norm": 6.059483051300049, |
| "loss": 5.7611, |
| "lr": 0.0009973426573426573, |
| "step": 520, |
| "tokens_trained": 0.049434368 |
| }, |
| { |
| "epoch": 0.14808510638297873, |
| "grad_norm": 5.362505912780762, |
| "loss": 5.7607, |
| "lr": 0.000997062937062937, |
| "step": 522, |
| "tokens_trained": 0.049622648 |
| }, |
| { |
| "epoch": 0.14865248226950353, |
| "grad_norm": 5.391371726989746, |
| "loss": 5.7857, |
| "lr": 0.0009967832167832168, |
| "step": 524, |
| "tokens_trained": 0.049812304 |
| }, |
| { |
| "epoch": 0.14921985815602837, |
| "grad_norm": 4.3839030265808105, |
| "loss": 5.7334, |
| "lr": 0.0009965034965034964, |
| "step": 526, |
| "tokens_trained": 0.05000356 |
| }, |
| { |
| "epoch": 0.1497872340425532, |
| "grad_norm": 5.008530616760254, |
| "loss": 5.7475, |
| "lr": 0.0009962237762237763, |
| "step": 528, |
| "tokens_trained": 0.050193304 |
| }, |
| { |
| "epoch": 0.150354609929078, |
| "grad_norm": 5.068671226501465, |
| "loss": 5.7866, |
| "lr": 0.000995944055944056, |
| "step": 530, |
| "tokens_trained": 0.050382856 |
| }, |
| { |
| "epoch": 0.15092198581560284, |
| "grad_norm": 5.399240493774414, |
| "loss": 5.6857, |
| "lr": 0.0009956643356643356, |
| "step": 532, |
| "tokens_trained": 0.050570864 |
| }, |
| { |
| "epoch": 0.15148936170212765, |
| "grad_norm": 5.689481735229492, |
| "loss": 5.7586, |
| "lr": 0.0009953846153846154, |
| "step": 534, |
| "tokens_trained": 0.050760384 |
| }, |
| { |
| "epoch": 0.15205673758865249, |
| "grad_norm": 4.652275562286377, |
| "loss": 5.7866, |
| "lr": 0.000995104895104895, |
| "step": 536, |
| "tokens_trained": 0.050952712 |
| }, |
| { |
| "epoch": 0.1526241134751773, |
| "grad_norm": 4.126920223236084, |
| "loss": 5.7261, |
| "lr": 0.000994825174825175, |
| "step": 538, |
| "tokens_trained": 0.051141656 |
| }, |
| { |
| "epoch": 0.15319148936170213, |
| "grad_norm": 4.233098030090332, |
| "loss": 5.6903, |
| "lr": 0.0009945454545454546, |
| "step": 540, |
| "tokens_trained": 0.051331256 |
| }, |
| { |
| "epoch": 0.15375886524822696, |
| "grad_norm": 4.271973133087158, |
| "loss": 5.7293, |
| "lr": 0.0009942657342657344, |
| "step": 542, |
| "tokens_trained": 0.051522072 |
| }, |
| { |
| "epoch": 0.15432624113475177, |
| "grad_norm": 4.653008937835693, |
| "loss": 5.7133, |
| "lr": 0.000993986013986014, |
| "step": 544, |
| "tokens_trained": 0.051711624 |
| }, |
| { |
| "epoch": 0.1548936170212766, |
| "grad_norm": 4.192624092102051, |
| "loss": 5.6876, |
| "lr": 0.0009937062937062937, |
| "step": 546, |
| "tokens_trained": 0.051901744 |
| }, |
| { |
| "epoch": 0.1554609929078014, |
| "grad_norm": 5.497848033905029, |
| "loss": 5.7378, |
| "lr": 0.0009934265734265735, |
| "step": 548, |
| "tokens_trained": 0.052092872 |
| }, |
| { |
| "epoch": 0.15602836879432624, |
| "grad_norm": 4.350259780883789, |
| "loss": 5.6533, |
| "lr": 0.0009931468531468532, |
| "step": 550, |
| "tokens_trained": 0.052281768 |
| }, |
| { |
| "epoch": 0.15659574468085105, |
| "grad_norm": 4.515641689300537, |
| "loss": 5.7492, |
| "lr": 0.000992867132867133, |
| "step": 552, |
| "tokens_trained": 0.052471848 |
| }, |
| { |
| "epoch": 0.15716312056737589, |
| "grad_norm": 4.628066539764404, |
| "loss": 5.7113, |
| "lr": 0.0009925874125874127, |
| "step": 554, |
| "tokens_trained": 0.052660168 |
| }, |
| { |
| "epoch": 0.15773049645390072, |
| "grad_norm": 4.8322930335998535, |
| "loss": 5.6696, |
| "lr": 0.0009923076923076923, |
| "step": 556, |
| "tokens_trained": 0.05284776 |
| }, |
| { |
| "epoch": 0.15829787234042553, |
| "grad_norm": 3.999706506729126, |
| "loss": 5.7296, |
| "lr": 0.000992027972027972, |
| "step": 558, |
| "tokens_trained": 0.053037344 |
| }, |
| { |
| "epoch": 0.15886524822695036, |
| "grad_norm": 4.332971572875977, |
| "loss": 5.7362, |
| "lr": 0.0009917482517482518, |
| "step": 560, |
| "tokens_trained": 0.053228168 |
| }, |
| { |
| "epoch": 0.15943262411347517, |
| "grad_norm": 4.500301361083984, |
| "loss": 5.6982, |
| "lr": 0.0009914685314685314, |
| "step": 562, |
| "tokens_trained": 0.05341856 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 4.721808910369873, |
| "loss": 5.7166, |
| "lr": 0.0009911888111888113, |
| "step": 564, |
| "tokens_trained": 0.053608824 |
| }, |
| { |
| "epoch": 0.1605673758865248, |
| "grad_norm": 5.265316009521484, |
| "loss": 5.7069, |
| "lr": 0.000990909090909091, |
| "step": 566, |
| "tokens_trained": 0.053799728 |
| }, |
| { |
| "epoch": 0.16113475177304964, |
| "grad_norm": 5.024131774902344, |
| "loss": 5.7113, |
| "lr": 0.0009906293706293705, |
| "step": 568, |
| "tokens_trained": 0.05398944 |
| }, |
| { |
| "epoch": 0.16170212765957448, |
| "grad_norm": 4.063276767730713, |
| "loss": 5.6251, |
| "lr": 0.0009903496503496504, |
| "step": 570, |
| "tokens_trained": 0.054176512 |
| }, |
| { |
| "epoch": 0.1622695035460993, |
| "grad_norm": 4.15974760055542, |
| "loss": 5.6912, |
| "lr": 0.00099006993006993, |
| "step": 572, |
| "tokens_trained": 0.054367072 |
| }, |
| { |
| "epoch": 0.16283687943262412, |
| "grad_norm": 4.338894844055176, |
| "loss": 5.6807, |
| "lr": 0.0009897902097902099, |
| "step": 574, |
| "tokens_trained": 0.054559184 |
| }, |
| { |
| "epoch": 0.16340425531914893, |
| "grad_norm": 5.535487174987793, |
| "loss": 5.6765, |
| "lr": 0.0009895104895104895, |
| "step": 576, |
| "tokens_trained": 0.054748904 |
| }, |
| { |
| "epoch": 0.16397163120567376, |
| "grad_norm": 4.379040241241455, |
| "loss": 5.6884, |
| "lr": 0.0009892307692307694, |
| "step": 578, |
| "tokens_trained": 0.054936136 |
| }, |
| { |
| "epoch": 0.16453900709219857, |
| "grad_norm": 4.746179103851318, |
| "loss": 5.6885, |
| "lr": 0.000988951048951049, |
| "step": 580, |
| "tokens_trained": 0.055125584 |
| }, |
| { |
| "epoch": 0.1651063829787234, |
| "grad_norm": 4.949806213378906, |
| "loss": 5.7061, |
| "lr": 0.0009886713286713286, |
| "step": 582, |
| "tokens_trained": 0.055314608 |
| }, |
| { |
| "epoch": 0.16567375886524824, |
| "grad_norm": 4.507448196411133, |
| "loss": 5.6339, |
| "lr": 0.0009883916083916085, |
| "step": 584, |
| "tokens_trained": 0.055503992 |
| }, |
| { |
| "epoch": 0.16624113475177305, |
| "grad_norm": 4.131013870239258, |
| "loss": 5.7122, |
| "lr": 0.0009881118881118881, |
| "step": 586, |
| "tokens_trained": 0.055693376 |
| }, |
| { |
| "epoch": 0.16680851063829788, |
| "grad_norm": 5.32897424697876, |
| "loss": 5.7192, |
| "lr": 0.000987832167832168, |
| "step": 588, |
| "tokens_trained": 0.05588452 |
| }, |
| { |
| "epoch": 0.1673758865248227, |
| "grad_norm": 4.166877746582031, |
| "loss": 5.6666, |
| "lr": 0.0009875524475524476, |
| "step": 590, |
| "tokens_trained": 0.056073936 |
| }, |
| { |
| "epoch": 0.16794326241134752, |
| "grad_norm": 4.393389701843262, |
| "loss": 5.6113, |
| "lr": 0.0009872727272727273, |
| "step": 592, |
| "tokens_trained": 0.056262224 |
| }, |
| { |
| "epoch": 0.16851063829787233, |
| "grad_norm": 4.466696739196777, |
| "loss": 5.6466, |
| "lr": 0.000986993006993007, |
| "step": 594, |
| "tokens_trained": 0.056454008 |
| }, |
| { |
| "epoch": 0.16907801418439716, |
| "grad_norm": 3.9413373470306396, |
| "loss": 5.6838, |
| "lr": 0.0009867132867132867, |
| "step": 596, |
| "tokens_trained": 0.05664444 |
| }, |
| { |
| "epoch": 0.169645390070922, |
| "grad_norm": 3.594649314880371, |
| "loss": 5.6684, |
| "lr": 0.0009864335664335664, |
| "step": 598, |
| "tokens_trained": 0.056833864 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 3.5969483852386475, |
| "loss": 5.6619, |
| "lr": 0.0009861538461538462, |
| "step": 600, |
| "tokens_trained": 0.05702332 |
| }, |
| { |
| "epoch": 0.17078014184397164, |
| "grad_norm": 3.845414638519287, |
| "loss": 5.5855, |
| "lr": 0.0009858741258741259, |
| "step": 602, |
| "tokens_trained": 0.057212776 |
| }, |
| { |
| "epoch": 0.17134751773049645, |
| "grad_norm": 3.9198834896087646, |
| "loss": 5.6551, |
| "lr": 0.0009855944055944055, |
| "step": 604, |
| "tokens_trained": 0.05740152 |
| }, |
| { |
| "epoch": 0.17191489361702128, |
| "grad_norm": 3.6764986515045166, |
| "loss": 5.6228, |
| "lr": 0.0009853146853146854, |
| "step": 606, |
| "tokens_trained": 0.057595616 |
| }, |
| { |
| "epoch": 0.1724822695035461, |
| "grad_norm": 3.8210043907165527, |
| "loss": 5.6557, |
| "lr": 0.000985034965034965, |
| "step": 608, |
| "tokens_trained": 0.057783968 |
| }, |
| { |
| "epoch": 0.17304964539007092, |
| "grad_norm": 3.893644094467163, |
| "loss": 5.6675, |
| "lr": 0.0009847552447552449, |
| "step": 610, |
| "tokens_trained": 0.057974832 |
| }, |
| { |
| "epoch": 0.17361702127659576, |
| "grad_norm": 3.280839681625366, |
| "loss": 5.6442, |
| "lr": 0.0009844755244755245, |
| "step": 612, |
| "tokens_trained": 0.058166272 |
| }, |
| { |
| "epoch": 0.17418439716312056, |
| "grad_norm": 3.4350404739379883, |
| "loss": 5.6555, |
| "lr": 0.0009841958041958043, |
| "step": 614, |
| "tokens_trained": 0.058356008 |
| }, |
| { |
| "epoch": 0.1747517730496454, |
| "grad_norm": 3.7700448036193848, |
| "loss": 5.6138, |
| "lr": 0.000983916083916084, |
| "step": 616, |
| "tokens_trained": 0.058546792 |
| }, |
| { |
| "epoch": 0.1753191489361702, |
| "grad_norm": 3.8182730674743652, |
| "loss": 5.6931, |
| "lr": 0.0009836363636363636, |
| "step": 618, |
| "tokens_trained": 0.058736296 |
| }, |
| { |
| "epoch": 0.17588652482269504, |
| "grad_norm": 3.9105372428894043, |
| "loss": 5.6431, |
| "lr": 0.0009833566433566435, |
| "step": 620, |
| "tokens_trained": 0.058927576 |
| }, |
| { |
| "epoch": 0.17645390070921985, |
| "grad_norm": 3.8897712230682373, |
| "loss": 5.6203, |
| "lr": 0.000983076923076923, |
| "step": 622, |
| "tokens_trained": 0.059118416 |
| }, |
| { |
| "epoch": 0.17702127659574468, |
| "grad_norm": 3.512194871902466, |
| "loss": 5.6292, |
| "lr": 0.000982797202797203, |
| "step": 624, |
| "tokens_trained": 0.059308568 |
| }, |
| { |
| "epoch": 0.1773049645390071, |
| "eval_loss": 5.630118370056152, |
| "eval_runtime": 21.1591, |
| "step": 625, |
| "tokens_trained": 0.059404056 |
| }, |
| { |
| "epoch": 0.17758865248226952, |
| "grad_norm": 2.990100383758545, |
| "loss": 5.622, |
| "lr": 0.0009825174825174826, |
| "step": 626, |
| "tokens_trained": 0.059499776 |
| }, |
| { |
| "epoch": 0.17815602836879432, |
| "grad_norm": 3.0487334728240967, |
| "loss": 5.6629, |
| "lr": 0.0009822377622377622, |
| "step": 628, |
| "tokens_trained": 0.059690208 |
| }, |
| { |
| "epoch": 0.17872340425531916, |
| "grad_norm": 3.6905510425567627, |
| "loss": 5.6345, |
| "lr": 0.0009819580419580419, |
| "step": 630, |
| "tokens_trained": 0.059881352 |
| }, |
| { |
| "epoch": 0.17929078014184396, |
| "grad_norm": 3.302255630493164, |
| "loss": 5.6733, |
| "lr": 0.0009816783216783217, |
| "step": 632, |
| "tokens_trained": 0.060071896 |
| }, |
| { |
| "epoch": 0.1798581560283688, |
| "grad_norm": 3.6833834648132324, |
| "loss": 5.5868, |
| "lr": 0.0009813986013986014, |
| "step": 634, |
| "tokens_trained": 0.060260504 |
| }, |
| { |
| "epoch": 0.1804255319148936, |
| "grad_norm": 3.1528804302215576, |
| "loss": 5.6128, |
| "lr": 0.0009811188811188812, |
| "step": 636, |
| "tokens_trained": 0.060450584 |
| }, |
| { |
| "epoch": 0.18099290780141844, |
| "grad_norm": 3.788860559463501, |
| "loss": 5.6235, |
| "lr": 0.0009808391608391608, |
| "step": 638, |
| "tokens_trained": 0.060640872 |
| }, |
| { |
| "epoch": 0.18156028368794327, |
| "grad_norm": 3.192462682723999, |
| "loss": 5.545, |
| "lr": 0.0009805594405594405, |
| "step": 640, |
| "tokens_trained": 0.060832776 |
| }, |
| { |
| "epoch": 0.18212765957446808, |
| "grad_norm": 3.505732774734497, |
| "loss": 5.5801, |
| "lr": 0.0009802797202797203, |
| "step": 642, |
| "tokens_trained": 0.06102204 |
| }, |
| { |
| "epoch": 0.18269503546099292, |
| "grad_norm": 3.9589102268218994, |
| "loss": 5.6091, |
| "lr": 0.00098, |
| "step": 644, |
| "tokens_trained": 0.061209744 |
| }, |
| { |
| "epoch": 0.18326241134751772, |
| "grad_norm": 3.4410059452056885, |
| "loss": 5.6279, |
| "lr": 0.0009797202797202798, |
| "step": 646, |
| "tokens_trained": 0.061400392 |
| }, |
| { |
| "epoch": 0.18382978723404256, |
| "grad_norm": 3.7746005058288574, |
| "loss": 5.6124, |
| "lr": 0.0009794405594405595, |
| "step": 648, |
| "tokens_trained": 0.061592232 |
| }, |
| { |
| "epoch": 0.18439716312056736, |
| "grad_norm": 3.75022292137146, |
| "loss": 5.5826, |
| "lr": 0.000979160839160839, |
| "step": 650, |
| "tokens_trained": 0.061781824 |
| }, |
| { |
| "epoch": 0.1849645390070922, |
| "grad_norm": 3.7629313468933105, |
| "loss": 5.555, |
| "lr": 0.000978881118881119, |
| "step": 652, |
| "tokens_trained": 0.061972744 |
| }, |
| { |
| "epoch": 0.18553191489361703, |
| "grad_norm": 4.5046820640563965, |
| "loss": 5.5972, |
| "lr": 0.0009786013986013986, |
| "step": 654, |
| "tokens_trained": 0.062163456 |
| }, |
| { |
| "epoch": 0.18609929078014184, |
| "grad_norm": 3.443138599395752, |
| "loss": 5.6061, |
| "lr": 0.0009783216783216782, |
| "step": 656, |
| "tokens_trained": 0.06235208 |
| }, |
| { |
| "epoch": 0.18666666666666668, |
| "grad_norm": 3.2661828994750977, |
| "loss": 5.5479, |
| "lr": 0.000978041958041958, |
| "step": 658, |
| "tokens_trained": 0.062544416 |
| }, |
| { |
| "epoch": 0.18723404255319148, |
| "grad_norm": 3.9571003913879395, |
| "loss": 5.6069, |
| "lr": 0.000977762237762238, |
| "step": 660, |
| "tokens_trained": 0.062733992 |
| }, |
| { |
| "epoch": 0.18780141843971632, |
| "grad_norm": 3.705880641937256, |
| "loss": 5.5915, |
| "lr": 0.0009774825174825176, |
| "step": 662, |
| "tokens_trained": 0.062922536 |
| }, |
| { |
| "epoch": 0.18836879432624112, |
| "grad_norm": 4.066433429718018, |
| "loss": 5.6031, |
| "lr": 0.0009772027972027972, |
| "step": 664, |
| "tokens_trained": 0.063114224 |
| }, |
| { |
| "epoch": 0.18893617021276596, |
| "grad_norm": 3.356651782989502, |
| "loss": 5.6045, |
| "lr": 0.0009769230769230768, |
| "step": 666, |
| "tokens_trained": 0.063304616 |
| }, |
| { |
| "epoch": 0.1895035460992908, |
| "grad_norm": 3.8084938526153564, |
| "loss": 5.6138, |
| "lr": 0.0009766433566433567, |
| "step": 668, |
| "tokens_trained": 0.06349476 |
| }, |
| { |
| "epoch": 0.1900709219858156, |
| "grad_norm": 4.282619953155518, |
| "loss": 5.5704, |
| "lr": 0.0009763636363636363, |
| "step": 670, |
| "tokens_trained": 0.063684848 |
| }, |
| { |
| "epoch": 0.19063829787234043, |
| "grad_norm": 3.045057773590088, |
| "loss": 5.6427, |
| "lr": 0.0009760839160839161, |
| "step": 672, |
| "tokens_trained": 0.063875192 |
| }, |
| { |
| "epoch": 0.19120567375886524, |
| "grad_norm": 3.360164165496826, |
| "loss": 5.5778, |
| "lr": 0.0009758041958041958, |
| "step": 674, |
| "tokens_trained": 0.06406636 |
| }, |
| { |
| "epoch": 0.19177304964539008, |
| "grad_norm": 3.5778472423553467, |
| "loss": 5.5389, |
| "lr": 0.0009755244755244756, |
| "step": 676, |
| "tokens_trained": 0.064254376 |
| }, |
| { |
| "epoch": 0.19234042553191488, |
| "grad_norm": 3.34869384765625, |
| "loss": 5.5894, |
| "lr": 0.0009752447552447553, |
| "step": 678, |
| "tokens_trained": 0.0644448 |
| }, |
| { |
| "epoch": 0.19290780141843972, |
| "grad_norm": 3.083582878112793, |
| "loss": 5.5776, |
| "lr": 0.0009749650349650349, |
| "step": 680, |
| "tokens_trained": 0.064633712 |
| }, |
| { |
| "epoch": 0.19347517730496455, |
| "grad_norm": 3.345973491668701, |
| "loss": 5.5987, |
| "lr": 0.0009746853146853148, |
| "step": 682, |
| "tokens_trained": 0.064824808 |
| }, |
| { |
| "epoch": 0.19404255319148936, |
| "grad_norm": 3.9262702465057373, |
| "loss": 5.64, |
| "lr": 0.0009744055944055944, |
| "step": 684, |
| "tokens_trained": 0.065016224 |
| }, |
| { |
| "epoch": 0.1946099290780142, |
| "grad_norm": 3.298543930053711, |
| "loss": 5.587, |
| "lr": 0.0009741258741258742, |
| "step": 686, |
| "tokens_trained": 0.065204216 |
| }, |
| { |
| "epoch": 0.195177304964539, |
| "grad_norm": 3.118626832962036, |
| "loss": 5.5864, |
| "lr": 0.0009738461538461538, |
| "step": 688, |
| "tokens_trained": 0.065393256 |
| }, |
| { |
| "epoch": 0.19574468085106383, |
| "grad_norm": 2.983548402786255, |
| "loss": 5.5506, |
| "lr": 0.0009735664335664336, |
| "step": 690, |
| "tokens_trained": 0.06558324 |
| }, |
| { |
| "epoch": 0.19631205673758864, |
| "grad_norm": 3.5204527378082275, |
| "loss": 5.5336, |
| "lr": 0.0009732867132867133, |
| "step": 692, |
| "tokens_trained": 0.065775624 |
| }, |
| { |
| "epoch": 0.19687943262411348, |
| "grad_norm": 3.138550281524658, |
| "loss": 5.5677, |
| "lr": 0.000973006993006993, |
| "step": 694, |
| "tokens_trained": 0.0659666 |
| }, |
| { |
| "epoch": 0.1974468085106383, |
| "grad_norm": 3.0961053371429443, |
| "loss": 5.5714, |
| "lr": 0.0009727272727272728, |
| "step": 696, |
| "tokens_trained": 0.066155512 |
| }, |
| { |
| "epoch": 0.19801418439716312, |
| "grad_norm": 3.4929685592651367, |
| "loss": 5.5829, |
| "lr": 0.0009724475524475524, |
| "step": 698, |
| "tokens_trained": 0.06634576 |
| }, |
| { |
| "epoch": 0.19858156028368795, |
| "grad_norm": 3.1820616722106934, |
| "loss": 5.6108, |
| "lr": 0.0009721678321678323, |
| "step": 700, |
| "tokens_trained": 0.066537016 |
| }, |
| { |
| "epoch": 0.19914893617021276, |
| "grad_norm": 3.4244654178619385, |
| "loss": 5.6025, |
| "lr": 0.0009718881118881119, |
| "step": 702, |
| "tokens_trained": 0.066727856 |
| }, |
| { |
| "epoch": 0.1997163120567376, |
| "grad_norm": 3.258605480194092, |
| "loss": 5.5581, |
| "lr": 0.0009716083916083917, |
| "step": 704, |
| "tokens_trained": 0.066916672 |
| }, |
| { |
| "epoch": 0.2002836879432624, |
| "grad_norm": 2.7159688472747803, |
| "loss": 5.5478, |
| "lr": 0.0009713286713286713, |
| "step": 706, |
| "tokens_trained": 0.067107704 |
| }, |
| { |
| "epoch": 0.20085106382978724, |
| "grad_norm": 3.1941912174224854, |
| "loss": 5.6126, |
| "lr": 0.000971048951048951, |
| "step": 708, |
| "tokens_trained": 0.067297896 |
| }, |
| { |
| "epoch": 0.20141843971631207, |
| "grad_norm": 3.20470929145813, |
| "loss": 5.5628, |
| "lr": 0.0009707692307692308, |
| "step": 710, |
| "tokens_trained": 0.06748608 |
| }, |
| { |
| "epoch": 0.20198581560283688, |
| "grad_norm": 3.6400153636932373, |
| "loss": 5.5758, |
| "lr": 0.0009704895104895105, |
| "step": 712, |
| "tokens_trained": 0.0676766 |
| }, |
| { |
| "epoch": 0.2025531914893617, |
| "grad_norm": 2.881639003753662, |
| "loss": 5.5512, |
| "lr": 0.0009702097902097903, |
| "step": 714, |
| "tokens_trained": 0.067865848 |
| }, |
| { |
| "epoch": 0.20312056737588652, |
| "grad_norm": 3.1113905906677246, |
| "loss": 5.5396, |
| "lr": 0.0009699300699300699, |
| "step": 716, |
| "tokens_trained": 0.068055368 |
| }, |
| { |
| "epoch": 0.20368794326241135, |
| "grad_norm": 3.135014772415161, |
| "loss": 5.5763, |
| "lr": 0.0009696503496503498, |
| "step": 718, |
| "tokens_trained": 0.068248544 |
| }, |
| { |
| "epoch": 0.20425531914893616, |
| "grad_norm": 3.1870718002319336, |
| "loss": 5.5903, |
| "lr": 0.0009693706293706294, |
| "step": 720, |
| "tokens_trained": 0.068436944 |
| }, |
| { |
| "epoch": 0.204822695035461, |
| "grad_norm": 3.125596523284912, |
| "loss": 5.6033, |
| "lr": 0.0009690909090909091, |
| "step": 722, |
| "tokens_trained": 0.06862548 |
| }, |
| { |
| "epoch": 0.20539007092198583, |
| "grad_norm": 2.897671699523926, |
| "loss": 5.5946, |
| "lr": 0.0009688111888111888, |
| "step": 724, |
| "tokens_trained": 0.068815232 |
| }, |
| { |
| "epoch": 0.20595744680851064, |
| "grad_norm": 2.855313539505005, |
| "loss": 5.5731, |
| "lr": 0.0009685314685314685, |
| "step": 726, |
| "tokens_trained": 0.06900692 |
| }, |
| { |
| "epoch": 0.20652482269503547, |
| "grad_norm": 2.7760672569274902, |
| "loss": 5.4949, |
| "lr": 0.0009682517482517483, |
| "step": 728, |
| "tokens_trained": 0.069195376 |
| }, |
| { |
| "epoch": 0.20709219858156028, |
| "grad_norm": 2.9300007820129395, |
| "loss": 5.5491, |
| "lr": 0.000967972027972028, |
| "step": 730, |
| "tokens_trained": 0.069385512 |
| }, |
| { |
| "epoch": 0.2076595744680851, |
| "grad_norm": 3.299860954284668, |
| "loss": 5.5405, |
| "lr": 0.0009676923076923078, |
| "step": 732, |
| "tokens_trained": 0.069573304 |
| }, |
| { |
| "epoch": 0.20822695035460992, |
| "grad_norm": 3.300189256668091, |
| "loss": 5.5797, |
| "lr": 0.0009674125874125874, |
| "step": 734, |
| "tokens_trained": 0.069764248 |
| }, |
| { |
| "epoch": 0.20879432624113475, |
| "grad_norm": 2.932995557785034, |
| "loss": 5.5556, |
| "lr": 0.0009671328671328672, |
| "step": 736, |
| "tokens_trained": 0.06995496 |
| }, |
| { |
| "epoch": 0.2093617021276596, |
| "grad_norm": 2.6711719036102295, |
| "loss": 5.48, |
| "lr": 0.0009668531468531469, |
| "step": 738, |
| "tokens_trained": 0.070142776 |
| }, |
| { |
| "epoch": 0.2099290780141844, |
| "grad_norm": 2.833314895629883, |
| "loss": 5.542, |
| "lr": 0.0009665734265734266, |
| "step": 740, |
| "tokens_trained": 0.070332064 |
| }, |
| { |
| "epoch": 0.21049645390070923, |
| "grad_norm": 2.899843215942383, |
| "loss": 5.5649, |
| "lr": 0.0009662937062937063, |
| "step": 742, |
| "tokens_trained": 0.070523448 |
| }, |
| { |
| "epoch": 0.21106382978723404, |
| "grad_norm": 2.96528697013855, |
| "loss": 5.5277, |
| "lr": 0.000966013986013986, |
| "step": 744, |
| "tokens_trained": 0.070713768 |
| }, |
| { |
| "epoch": 0.21163120567375887, |
| "grad_norm": 2.921109437942505, |
| "loss": 5.5646, |
| "lr": 0.0009657342657342657, |
| "step": 746, |
| "tokens_trained": 0.070905704 |
| }, |
| { |
| "epoch": 0.21219858156028368, |
| "grad_norm": 3.2725329399108887, |
| "loss": 5.4786, |
| "lr": 0.0009654545454545455, |
| "step": 748, |
| "tokens_trained": 0.071096008 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 2.8296804428100586, |
| "loss": 5.573, |
| "lr": 0.0009651748251748252, |
| "step": 750, |
| "tokens_trained": 0.07128828 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "eval_loss": 5.535472869873047, |
| "eval_runtime": 21.0109, |
| "step": 750, |
| "tokens_trained": 0.07128828 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 3.0509591102600098, |
| "loss": 5.6037, |
| "lr": 0.0009648951048951049, |
| "step": 752, |
| "tokens_trained": 0.071479496 |
| }, |
| { |
| "epoch": 0.21390070921985815, |
| "grad_norm": 2.6773571968078613, |
| "loss": 5.5266, |
| "lr": 0.0009646153846153846, |
| "step": 754, |
| "tokens_trained": 0.071668568 |
| }, |
| { |
| "epoch": 0.214468085106383, |
| "grad_norm": 2.9600210189819336, |
| "loss": 5.5362, |
| "lr": 0.0009643356643356644, |
| "step": 756, |
| "tokens_trained": 0.071860552 |
| }, |
| { |
| "epoch": 0.2150354609929078, |
| "grad_norm": 2.6674885749816895, |
| "loss": 5.5388, |
| "lr": 0.0009640559440559441, |
| "step": 758, |
| "tokens_trained": 0.07204912 |
| }, |
| { |
| "epoch": 0.21560283687943263, |
| "grad_norm": 2.50179386138916, |
| "loss": 5.5027, |
| "lr": 0.0009637762237762237, |
| "step": 760, |
| "tokens_trained": 0.072239952 |
| }, |
| { |
| "epoch": 0.21617021276595744, |
| "grad_norm": 2.843411684036255, |
| "loss": 5.5221, |
| "lr": 0.0009634965034965035, |
| "step": 762, |
| "tokens_trained": 0.07243076 |
| }, |
| { |
| "epoch": 0.21673758865248227, |
| "grad_norm": 2.8686277866363525, |
| "loss": 5.4896, |
| "lr": 0.0009632167832167832, |
| "step": 764, |
| "tokens_trained": 0.072623272 |
| }, |
| { |
| "epoch": 0.2173049645390071, |
| "grad_norm": 2.611424684524536, |
| "loss": 5.5557, |
| "lr": 0.000962937062937063, |
| "step": 766, |
| "tokens_trained": 0.07281408 |
| }, |
| { |
| "epoch": 0.2178723404255319, |
| "grad_norm": 3.013145685195923, |
| "loss": 5.4964, |
| "lr": 0.0009626573426573427, |
| "step": 768, |
| "tokens_trained": 0.073005016 |
| }, |
| { |
| "epoch": 0.21843971631205675, |
| "grad_norm": 2.8682022094726562, |
| "loss": 5.5232, |
| "lr": 0.0009623776223776224, |
| "step": 770, |
| "tokens_trained": 0.07319652 |
| }, |
| { |
| "epoch": 0.21900709219858155, |
| "grad_norm": 2.6478466987609863, |
| "loss": 5.5517, |
| "lr": 0.0009620979020979021, |
| "step": 772, |
| "tokens_trained": 0.073387048 |
| }, |
| { |
| "epoch": 0.2195744680851064, |
| "grad_norm": 2.7273097038269043, |
| "loss": 5.5572, |
| "lr": 0.0009618181818181818, |
| "step": 774, |
| "tokens_trained": 0.073577424 |
| }, |
| { |
| "epoch": 0.2201418439716312, |
| "grad_norm": 3.104907751083374, |
| "loss": 5.5081, |
| "lr": 0.0009615384615384616, |
| "step": 776, |
| "tokens_trained": 0.073766712 |
| }, |
| { |
| "epoch": 0.22070921985815603, |
| "grad_norm": 2.9616432189941406, |
| "loss": 5.5059, |
| "lr": 0.0009612587412587412, |
| "step": 778, |
| "tokens_trained": 0.073956272 |
| }, |
| { |
| "epoch": 0.22127659574468084, |
| "grad_norm": 3.330319881439209, |
| "loss": 5.4811, |
| "lr": 0.000960979020979021, |
| "step": 780, |
| "tokens_trained": 0.074144008 |
| }, |
| { |
| "epoch": 0.22184397163120567, |
| "grad_norm": 2.964371919631958, |
| "loss": 5.4763, |
| "lr": 0.0009606993006993007, |
| "step": 782, |
| "tokens_trained": 0.074333888 |
| }, |
| { |
| "epoch": 0.2224113475177305, |
| "grad_norm": 3.13899827003479, |
| "loss": 5.5262, |
| "lr": 0.0009604195804195805, |
| "step": 784, |
| "tokens_trained": 0.074523584 |
| }, |
| { |
| "epoch": 0.2229787234042553, |
| "grad_norm": 3.2576637268066406, |
| "loss": 5.4983, |
| "lr": 0.0009601398601398602, |
| "step": 786, |
| "tokens_trained": 0.074714128 |
| }, |
| { |
| "epoch": 0.22354609929078015, |
| "grad_norm": 2.916149616241455, |
| "loss": 5.504, |
| "lr": 0.0009598601398601398, |
| "step": 788, |
| "tokens_trained": 0.074905104 |
| }, |
| { |
| "epoch": 0.22411347517730495, |
| "grad_norm": 2.842733144760132, |
| "loss": 5.4997, |
| "lr": 0.0009595804195804196, |
| "step": 790, |
| "tokens_trained": 0.075096328 |
| }, |
| { |
| "epoch": 0.2246808510638298, |
| "grad_norm": 2.880695104598999, |
| "loss": 5.5131, |
| "lr": 0.0009593006993006993, |
| "step": 792, |
| "tokens_trained": 0.075286104 |
| }, |
| { |
| "epoch": 0.2252482269503546, |
| "grad_norm": 2.620516300201416, |
| "loss": 5.5291, |
| "lr": 0.0009590209790209791, |
| "step": 794, |
| "tokens_trained": 0.075477392 |
| }, |
| { |
| "epoch": 0.22581560283687943, |
| "grad_norm": 2.622455358505249, |
| "loss": 5.5433, |
| "lr": 0.0009587412587412587, |
| "step": 796, |
| "tokens_trained": 0.0756682 |
| }, |
| { |
| "epoch": 0.22638297872340427, |
| "grad_norm": 2.532047986984253, |
| "loss": 5.5169, |
| "lr": 0.0009584615384615385, |
| "step": 798, |
| "tokens_trained": 0.075856528 |
| }, |
| { |
| "epoch": 0.22695035460992907, |
| "grad_norm": 2.628110885620117, |
| "loss": 5.5369, |
| "lr": 0.0009581818181818182, |
| "step": 800, |
| "tokens_trained": 0.076046256 |
| }, |
| { |
| "epoch": 0.2275177304964539, |
| "grad_norm": 2.376600980758667, |
| "loss": 5.4888, |
| "lr": 0.000957902097902098, |
| "step": 802, |
| "tokens_trained": 0.076236016 |
| }, |
| { |
| "epoch": 0.22808510638297871, |
| "grad_norm": 2.433666706085205, |
| "loss": 5.5044, |
| "lr": 0.0009576223776223777, |
| "step": 804, |
| "tokens_trained": 0.07642324 |
| }, |
| { |
| "epoch": 0.22865248226950355, |
| "grad_norm": 2.3850929737091064, |
| "loss": 5.4941, |
| "lr": 0.0009573426573426573, |
| "step": 806, |
| "tokens_trained": 0.07661376 |
| }, |
| { |
| "epoch": 0.22921985815602836, |
| "grad_norm": 2.4664969444274902, |
| "loss": 5.5257, |
| "lr": 0.0009570629370629371, |
| "step": 808, |
| "tokens_trained": 0.076804952 |
| }, |
| { |
| "epoch": 0.2297872340425532, |
| "grad_norm": 2.8514602184295654, |
| "loss": 5.5335, |
| "lr": 0.0009567832167832168, |
| "step": 810, |
| "tokens_trained": 0.076995064 |
| }, |
| { |
| "epoch": 0.23035460992907802, |
| "grad_norm": 2.508887767791748, |
| "loss": 5.5093, |
| "lr": 0.0009565034965034966, |
| "step": 812, |
| "tokens_trained": 0.077185344 |
| }, |
| { |
| "epoch": 0.23092198581560283, |
| "grad_norm": 2.5842514038085938, |
| "loss": 5.5246, |
| "lr": 0.0009562237762237762, |
| "step": 814, |
| "tokens_trained": 0.077375232 |
| }, |
| { |
| "epoch": 0.23148936170212767, |
| "grad_norm": 2.621562957763672, |
| "loss": 5.4948, |
| "lr": 0.0009559440559440559, |
| "step": 816, |
| "tokens_trained": 0.07756528 |
| }, |
| { |
| "epoch": 0.23205673758865247, |
| "grad_norm": 2.3230698108673096, |
| "loss": 5.5367, |
| "lr": 0.0009556643356643357, |
| "step": 818, |
| "tokens_trained": 0.077754936 |
| }, |
| { |
| "epoch": 0.2326241134751773, |
| "grad_norm": 2.728039264678955, |
| "loss": 5.4548, |
| "lr": 0.0009553846153846154, |
| "step": 820, |
| "tokens_trained": 0.077944056 |
| }, |
| { |
| "epoch": 0.23319148936170211, |
| "grad_norm": 2.786271333694458, |
| "loss": 5.4701, |
| "lr": 0.0009551048951048952, |
| "step": 822, |
| "tokens_trained": 0.07813272 |
| }, |
| { |
| "epoch": 0.23375886524822695, |
| "grad_norm": 2.449995517730713, |
| "loss": 5.5505, |
| "lr": 0.0009548251748251748, |
| "step": 824, |
| "tokens_trained": 0.078321888 |
| }, |
| { |
| "epoch": 0.23432624113475178, |
| "grad_norm": 2.394447088241577, |
| "loss": 5.4709, |
| "lr": 0.0009545454545454546, |
| "step": 826, |
| "tokens_trained": 0.078510288 |
| }, |
| { |
| "epoch": 0.2348936170212766, |
| "grad_norm": 2.5857675075531006, |
| "loss": 5.4986, |
| "lr": 0.0009542657342657343, |
| "step": 828, |
| "tokens_trained": 0.078698032 |
| }, |
| { |
| "epoch": 0.23546099290780143, |
| "grad_norm": 2.728743314743042, |
| "loss": 5.4983, |
| "lr": 0.000953986013986014, |
| "step": 830, |
| "tokens_trained": 0.078890608 |
| }, |
| { |
| "epoch": 0.23602836879432623, |
| "grad_norm": 2.3619866371154785, |
| "loss": 5.4985, |
| "lr": 0.0009537062937062937, |
| "step": 832, |
| "tokens_trained": 0.079081968 |
| }, |
| { |
| "epoch": 0.23659574468085107, |
| "grad_norm": 2.6265158653259277, |
| "loss": 5.5088, |
| "lr": 0.0009534265734265734, |
| "step": 834, |
| "tokens_trained": 0.079270712 |
| }, |
| { |
| "epoch": 0.23716312056737587, |
| "grad_norm": 2.3731281757354736, |
| "loss": 5.4682, |
| "lr": 0.0009531468531468532, |
| "step": 836, |
| "tokens_trained": 0.079459912 |
| }, |
| { |
| "epoch": 0.2377304964539007, |
| "grad_norm": 2.375283718109131, |
| "loss": 5.4278, |
| "lr": 0.0009528671328671329, |
| "step": 838, |
| "tokens_trained": 0.079649408 |
| }, |
| { |
| "epoch": 0.23829787234042554, |
| "grad_norm": 2.6856729984283447, |
| "loss": 5.5277, |
| "lr": 0.0009525874125874127, |
| "step": 840, |
| "tokens_trained": 0.079839552 |
| }, |
| { |
| "epoch": 0.23886524822695035, |
| "grad_norm": 2.5037410259246826, |
| "loss": 5.5022, |
| "lr": 0.0009523076923076923, |
| "step": 842, |
| "tokens_trained": 0.08002732 |
| }, |
| { |
| "epoch": 0.23943262411347518, |
| "grad_norm": 2.25175404548645, |
| "loss": 5.4918, |
| "lr": 0.000952027972027972, |
| "step": 844, |
| "tokens_trained": 0.080216416 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.3555264472961426, |
| "loss": 5.5134, |
| "lr": 0.0009517482517482518, |
| "step": 846, |
| "tokens_trained": 0.080406928 |
| }, |
| { |
| "epoch": 0.24056737588652483, |
| "grad_norm": 2.390998601913452, |
| "loss": 5.4721, |
| "lr": 0.0009514685314685315, |
| "step": 848, |
| "tokens_trained": 0.080596232 |
| }, |
| { |
| "epoch": 0.24113475177304963, |
| "grad_norm": 2.1585986614227295, |
| "loss": 5.4511, |
| "lr": 0.0009511888111888112, |
| "step": 850, |
| "tokens_trained": 0.080786848 |
| }, |
| { |
| "epoch": 0.24170212765957447, |
| "grad_norm": 2.7733986377716064, |
| "loss": 5.5269, |
| "lr": 0.0009509090909090909, |
| "step": 852, |
| "tokens_trained": 0.080978144 |
| }, |
| { |
| "epoch": 0.2422695035460993, |
| "grad_norm": 2.8021209239959717, |
| "loss": 5.4751, |
| "lr": 0.0009506293706293707, |
| "step": 854, |
| "tokens_trained": 0.081167712 |
| }, |
| { |
| "epoch": 0.2428368794326241, |
| "grad_norm": 2.5434224605560303, |
| "loss": 5.5154, |
| "lr": 0.0009503496503496504, |
| "step": 856, |
| "tokens_trained": 0.081357584 |
| }, |
| { |
| "epoch": 0.24340425531914894, |
| "grad_norm": 2.456421136856079, |
| "loss": 5.5459, |
| "lr": 0.0009500699300699301, |
| "step": 858, |
| "tokens_trained": 0.081545992 |
| }, |
| { |
| "epoch": 0.24397163120567375, |
| "grad_norm": 2.317312002182007, |
| "loss": 5.4644, |
| "lr": 0.0009497902097902098, |
| "step": 860, |
| "tokens_trained": 0.081735392 |
| }, |
| { |
| "epoch": 0.24453900709219858, |
| "grad_norm": 2.3580780029296875, |
| "loss": 5.4359, |
| "lr": 0.0009495104895104895, |
| "step": 862, |
| "tokens_trained": 0.081925608 |
| }, |
| { |
| "epoch": 0.2451063829787234, |
| "grad_norm": 2.6440224647521973, |
| "loss": 5.4757, |
| "lr": 0.0009492307692307693, |
| "step": 864, |
| "tokens_trained": 0.08211328 |
| }, |
| { |
| "epoch": 0.24567375886524823, |
| "grad_norm": 2.5468132495880127, |
| "loss": 5.4115, |
| "lr": 0.000948951048951049, |
| "step": 866, |
| "tokens_trained": 0.082303736 |
| }, |
| { |
| "epoch": 0.24624113475177306, |
| "grad_norm": 2.431992530822754, |
| "loss": 5.4655, |
| "lr": 0.0009486713286713286, |
| "step": 868, |
| "tokens_trained": 0.082492896 |
| }, |
| { |
| "epoch": 0.24680851063829787, |
| "grad_norm": 2.443335771560669, |
| "loss": 5.4684, |
| "lr": 0.0009483916083916084, |
| "step": 870, |
| "tokens_trained": 0.082684024 |
| }, |
| { |
| "epoch": 0.2473758865248227, |
| "grad_norm": 2.6467180252075195, |
| "loss": 5.5017, |
| "lr": 0.0009481118881118881, |
| "step": 872, |
| "tokens_trained": 0.08287444 |
| }, |
| { |
| "epoch": 0.2479432624113475, |
| "grad_norm": 2.6044974327087402, |
| "loss": 5.4637, |
| "lr": 0.0009478321678321679, |
| "step": 874, |
| "tokens_trained": 0.08306436 |
| }, |
| { |
| "epoch": 0.24822695035460993, |
| "eval_loss": 5.4816508293151855, |
| "eval_runtime": 20.9467, |
| "step": 875, |
| "tokens_trained": 0.083158888 |
| }, |
| { |
| "epoch": 0.24851063829787234, |
| "grad_norm": 2.6221189498901367, |
| "loss": 5.4785, |
| "lr": 0.0009475524475524476, |
| "step": 876, |
| "tokens_trained": 0.083253472 |
| }, |
| { |
| "epoch": 0.24907801418439715, |
| "grad_norm": 2.409327983856201, |
| "loss": 5.42, |
| "lr": 0.0009472727272727273, |
| "step": 878, |
| "tokens_trained": 0.08344528 |
| }, |
| { |
| "epoch": 0.24964539007092199, |
| "grad_norm": 2.2504723072052, |
| "loss": 5.399, |
| "lr": 0.000946993006993007, |
| "step": 880, |
| "tokens_trained": 0.083635752 |
| }, |
| { |
| "epoch": 0.2502127659574468, |
| "grad_norm": 2.3018665313720703, |
| "loss": 5.4512, |
| "lr": 0.0009467132867132868, |
| "step": 882, |
| "tokens_trained": 0.08382576 |
| }, |
| { |
| "epoch": 0.2507801418439716, |
| "grad_norm": 2.5774636268615723, |
| "loss": 5.4592, |
| "lr": 0.0009464335664335665, |
| "step": 884, |
| "tokens_trained": 0.084016232 |
| }, |
| { |
| "epoch": 0.25134751773049646, |
| "grad_norm": 2.614935874938965, |
| "loss": 5.4772, |
| "lr": 0.0009461538461538461, |
| "step": 886, |
| "tokens_trained": 0.084206992 |
| }, |
| { |
| "epoch": 0.2519148936170213, |
| "grad_norm": 2.4281506538391113, |
| "loss": 5.4972, |
| "lr": 0.0009458741258741259, |
| "step": 888, |
| "tokens_trained": 0.084395848 |
| }, |
| { |
| "epoch": 0.2524822695035461, |
| "grad_norm": 2.3668100833892822, |
| "loss": 5.4505, |
| "lr": 0.0009455944055944056, |
| "step": 890, |
| "tokens_trained": 0.084583704 |
| }, |
| { |
| "epoch": 0.2530496453900709, |
| "grad_norm": 2.1937146186828613, |
| "loss": 5.4981, |
| "lr": 0.0009453146853146854, |
| "step": 892, |
| "tokens_trained": 0.08477096 |
| }, |
| { |
| "epoch": 0.25361702127659574, |
| "grad_norm": 2.2917556762695312, |
| "loss": 5.4224, |
| "lr": 0.000945034965034965, |
| "step": 894, |
| "tokens_trained": 0.084961048 |
| }, |
| { |
| "epoch": 0.2541843971631206, |
| "grad_norm": 2.1254703998565674, |
| "loss": 5.4409, |
| "lr": 0.0009447552447552447, |
| "step": 896, |
| "tokens_trained": 0.085153256 |
| }, |
| { |
| "epoch": 0.2547517730496454, |
| "grad_norm": 2.267159938812256, |
| "loss": 5.4527, |
| "lr": 0.0009444755244755245, |
| "step": 898, |
| "tokens_trained": 0.085343128 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 2.1975555419921875, |
| "loss": 5.516, |
| "lr": 0.0009441958041958042, |
| "step": 900, |
| "tokens_trained": 0.085534024 |
| }, |
| { |
| "epoch": 0.255886524822695, |
| "grad_norm": 2.3459436893463135, |
| "loss": 5.4592, |
| "lr": 0.000943916083916084, |
| "step": 902, |
| "tokens_trained": 0.085725136 |
| }, |
| { |
| "epoch": 0.25645390070921986, |
| "grad_norm": 2.4788501262664795, |
| "loss": 5.3937, |
| "lr": 0.0009436363636363636, |
| "step": 904, |
| "tokens_trained": 0.08591548 |
| }, |
| { |
| "epoch": 0.2570212765957447, |
| "grad_norm": 2.415065288543701, |
| "loss": 5.3991, |
| "lr": 0.0009433566433566434, |
| "step": 906, |
| "tokens_trained": 0.086105008 |
| }, |
| { |
| "epoch": 0.25758865248226953, |
| "grad_norm": 2.1260058879852295, |
| "loss": 5.4122, |
| "lr": 0.0009430769230769231, |
| "step": 908, |
| "tokens_trained": 0.08629424 |
| }, |
| { |
| "epoch": 0.2581560283687943, |
| "grad_norm": 2.1759092807769775, |
| "loss": 5.4663, |
| "lr": 0.0009427972027972029, |
| "step": 910, |
| "tokens_trained": 0.086485784 |
| }, |
| { |
| "epoch": 0.25872340425531914, |
| "grad_norm": 2.3481245040893555, |
| "loss": 5.4398, |
| "lr": 0.0009425174825174825, |
| "step": 912, |
| "tokens_trained": 0.086676744 |
| }, |
| { |
| "epoch": 0.259290780141844, |
| "grad_norm": 2.312612533569336, |
| "loss": 5.4615, |
| "lr": 0.0009422377622377622, |
| "step": 914, |
| "tokens_trained": 0.086866424 |
| }, |
| { |
| "epoch": 0.2598581560283688, |
| "grad_norm": 2.4709548950195312, |
| "loss": 5.4062, |
| "lr": 0.000941958041958042, |
| "step": 916, |
| "tokens_trained": 0.087055824 |
| }, |
| { |
| "epoch": 0.2604255319148936, |
| "grad_norm": 2.3664543628692627, |
| "loss": 5.4696, |
| "lr": 0.0009416783216783217, |
| "step": 918, |
| "tokens_trained": 0.087244136 |
| }, |
| { |
| "epoch": 0.26099290780141843, |
| "grad_norm": 2.423687696456909, |
| "loss": 5.4762, |
| "lr": 0.0009413986013986015, |
| "step": 920, |
| "tokens_trained": 0.087432584 |
| }, |
| { |
| "epoch": 0.26156028368794326, |
| "grad_norm": 2.4002890586853027, |
| "loss": 5.4743, |
| "lr": 0.0009411188811188811, |
| "step": 922, |
| "tokens_trained": 0.087622248 |
| }, |
| { |
| "epoch": 0.2621276595744681, |
| "grad_norm": 2.107527494430542, |
| "loss": 5.4013, |
| "lr": 0.0009408391608391608, |
| "step": 924, |
| "tokens_trained": 0.087809888 |
| }, |
| { |
| "epoch": 0.26269503546099293, |
| "grad_norm": 2.05177640914917, |
| "loss": 5.4601, |
| "lr": 0.0009405594405594406, |
| "step": 926, |
| "tokens_trained": 0.088002704 |
| }, |
| { |
| "epoch": 0.2632624113475177, |
| "grad_norm": 2.303874969482422, |
| "loss": 5.456, |
| "lr": 0.0009402797202797203, |
| "step": 928, |
| "tokens_trained": 0.088191344 |
| }, |
| { |
| "epoch": 0.26382978723404255, |
| "grad_norm": 2.4369659423828125, |
| "loss": 5.4162, |
| "lr": 0.00094, |
| "step": 930, |
| "tokens_trained": 0.088380832 |
| }, |
| { |
| "epoch": 0.2643971631205674, |
| "grad_norm": 2.4750819206237793, |
| "loss": 5.455, |
| "lr": 0.0009397202797202797, |
| "step": 932, |
| "tokens_trained": 0.088569936 |
| }, |
| { |
| "epoch": 0.2649645390070922, |
| "grad_norm": 2.09557843208313, |
| "loss": 5.4273, |
| "lr": 0.0009394405594405595, |
| "step": 934, |
| "tokens_trained": 0.08876116 |
| }, |
| { |
| "epoch": 0.265531914893617, |
| "grad_norm": 2.0984373092651367, |
| "loss": 5.4342, |
| "lr": 0.0009391608391608392, |
| "step": 936, |
| "tokens_trained": 0.088951032 |
| }, |
| { |
| "epoch": 0.26609929078014183, |
| "grad_norm": 2.1150097846984863, |
| "loss": 5.4344, |
| "lr": 0.000938881118881119, |
| "step": 938, |
| "tokens_trained": 0.08914124 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 2.1577563285827637, |
| "loss": 5.455, |
| "lr": 0.0009386013986013986, |
| "step": 940, |
| "tokens_trained": 0.089330952 |
| }, |
| { |
| "epoch": 0.2672340425531915, |
| "grad_norm": 2.0483016967773438, |
| "loss": 5.413, |
| "lr": 0.0009383216783216783, |
| "step": 942, |
| "tokens_trained": 0.08952116 |
| }, |
| { |
| "epoch": 0.26780141843971633, |
| "grad_norm": 2.3116559982299805, |
| "loss": 5.455, |
| "lr": 0.0009380419580419581, |
| "step": 944, |
| "tokens_trained": 0.089712888 |
| }, |
| { |
| "epoch": 0.2683687943262411, |
| "grad_norm": 2.2459256649017334, |
| "loss": 5.3971, |
| "lr": 0.0009377622377622378, |
| "step": 946, |
| "tokens_trained": 0.089903936 |
| }, |
| { |
| "epoch": 0.26893617021276595, |
| "grad_norm": 2.3048787117004395, |
| "loss": 5.4454, |
| "lr": 0.0009374825174825175, |
| "step": 948, |
| "tokens_trained": 0.090095888 |
| }, |
| { |
| "epoch": 0.2695035460992908, |
| "grad_norm": 2.196735143661499, |
| "loss": 5.4101, |
| "lr": 0.0009372027972027972, |
| "step": 950, |
| "tokens_trained": 0.090287472 |
| }, |
| { |
| "epoch": 0.2700709219858156, |
| "grad_norm": 2.3908562660217285, |
| "loss": 5.4731, |
| "lr": 0.0009369230769230769, |
| "step": 952, |
| "tokens_trained": 0.090476568 |
| }, |
| { |
| "epoch": 0.27063829787234045, |
| "grad_norm": 2.154932975769043, |
| "loss": 5.4104, |
| "lr": 0.0009366433566433567, |
| "step": 954, |
| "tokens_trained": 0.090665592 |
| }, |
| { |
| "epoch": 0.27120567375886523, |
| "grad_norm": 2.340907096862793, |
| "loss": 5.3707, |
| "lr": 0.0009363636363636364, |
| "step": 956, |
| "tokens_trained": 0.090853232 |
| }, |
| { |
| "epoch": 0.27177304964539006, |
| "grad_norm": 2.1736438274383545, |
| "loss": 5.4484, |
| "lr": 0.0009360839160839161, |
| "step": 958, |
| "tokens_trained": 0.091043808 |
| }, |
| { |
| "epoch": 0.2723404255319149, |
| "grad_norm": 2.3518154621124268, |
| "loss": 5.4919, |
| "lr": 0.0009358041958041958, |
| "step": 960, |
| "tokens_trained": 0.09123384 |
| }, |
| { |
| "epoch": 0.27290780141843973, |
| "grad_norm": 2.6673426628112793, |
| "loss": 5.4008, |
| "lr": 0.0009355244755244755, |
| "step": 962, |
| "tokens_trained": 0.091422544 |
| }, |
| { |
| "epoch": 0.2734751773049645, |
| "grad_norm": 2.4755311012268066, |
| "loss": 5.4533, |
| "lr": 0.0009352447552447553, |
| "step": 964, |
| "tokens_trained": 0.09161544 |
| }, |
| { |
| "epoch": 0.27404255319148935, |
| "grad_norm": 2.338452100753784, |
| "loss": 5.4953, |
| "lr": 0.0009349650349650349, |
| "step": 966, |
| "tokens_trained": 0.091806344 |
| }, |
| { |
| "epoch": 0.2746099290780142, |
| "grad_norm": 2.170426845550537, |
| "loss": 5.4588, |
| "lr": 0.0009346853146853147, |
| "step": 968, |
| "tokens_trained": 0.091996648 |
| }, |
| { |
| "epoch": 0.275177304964539, |
| "grad_norm": 2.2587599754333496, |
| "loss": 5.4547, |
| "lr": 0.0009344055944055944, |
| "step": 970, |
| "tokens_trained": 0.09218848 |
| }, |
| { |
| "epoch": 0.27574468085106385, |
| "grad_norm": 2.0009043216705322, |
| "loss": 5.4116, |
| "lr": 0.0009341258741258742, |
| "step": 972, |
| "tokens_trained": 0.092377984 |
| }, |
| { |
| "epoch": 0.27631205673758863, |
| "grad_norm": 2.0617294311523438, |
| "loss": 5.4541, |
| "lr": 0.0009338461538461539, |
| "step": 974, |
| "tokens_trained": 0.092569472 |
| }, |
| { |
| "epoch": 0.27687943262411346, |
| "grad_norm": 2.059300661087036, |
| "loss": 5.4414, |
| "lr": 0.0009335664335664336, |
| "step": 976, |
| "tokens_trained": 0.092758496 |
| }, |
| { |
| "epoch": 0.2774468085106383, |
| "grad_norm": 2.2815263271331787, |
| "loss": 5.4435, |
| "lr": 0.0009332867132867133, |
| "step": 978, |
| "tokens_trained": 0.092950368 |
| }, |
| { |
| "epoch": 0.27801418439716313, |
| "grad_norm": 2.1770365238189697, |
| "loss": 5.4481, |
| "lr": 0.0009330069930069929, |
| "step": 980, |
| "tokens_trained": 0.093140552 |
| }, |
| { |
| "epoch": 0.27858156028368797, |
| "grad_norm": 2.0089797973632812, |
| "loss": 5.4117, |
| "lr": 0.0009327272727272728, |
| "step": 982, |
| "tokens_trained": 0.093332312 |
| }, |
| { |
| "epoch": 0.27914893617021275, |
| "grad_norm": 2.2188286781311035, |
| "loss": 5.4594, |
| "lr": 0.0009324475524475524, |
| "step": 984, |
| "tokens_trained": 0.093520792 |
| }, |
| { |
| "epoch": 0.2797163120567376, |
| "grad_norm": 2.310481548309326, |
| "loss": 5.393, |
| "lr": 0.0009321678321678322, |
| "step": 986, |
| "tokens_trained": 0.093710608 |
| }, |
| { |
| "epoch": 0.2802836879432624, |
| "grad_norm": 2.3832972049713135, |
| "loss": 5.4277, |
| "lr": 0.0009318881118881119, |
| "step": 988, |
| "tokens_trained": 0.093900952 |
| }, |
| { |
| "epoch": 0.28085106382978725, |
| "grad_norm": 2.011126756668091, |
| "loss": 5.4097, |
| "lr": 0.0009316083916083917, |
| "step": 990, |
| "tokens_trained": 0.094091 |
| }, |
| { |
| "epoch": 0.28141843971631203, |
| "grad_norm": 2.2632968425750732, |
| "loss": 5.4388, |
| "lr": 0.0009313286713286714, |
| "step": 992, |
| "tokens_trained": 0.094281216 |
| }, |
| { |
| "epoch": 0.28198581560283686, |
| "grad_norm": 2.3477587699890137, |
| "loss": 5.3728, |
| "lr": 0.000931048951048951, |
| "step": 994, |
| "tokens_trained": 0.094470264 |
| }, |
| { |
| "epoch": 0.2825531914893617, |
| "grad_norm": 2.486196756362915, |
| "loss": 5.414, |
| "lr": 0.0009307692307692308, |
| "step": 996, |
| "tokens_trained": 0.094662816 |
| }, |
| { |
| "epoch": 0.28312056737588653, |
| "grad_norm": 2.5286316871643066, |
| "loss": 5.4063, |
| "lr": 0.0009304895104895104, |
| "step": 998, |
| "tokens_trained": 0.094852896 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "grad_norm": 2.712406635284424, |
| "loss": 5.4274, |
| "lr": 0.0009302097902097903, |
| "step": 1000, |
| "tokens_trained": 0.09504336 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "eval_loss": 5.434450626373291, |
| "eval_runtime": 21.0388, |
| "step": 1000, |
| "tokens_trained": 0.09504336 |
| }, |
| { |
| "epoch": 0.28425531914893615, |
| "grad_norm": 2.245316505432129, |
| "loss": 5.3551, |
| "lr": 0.0009299300699300699, |
| "step": 1002, |
| "tokens_trained": 0.095233944 |
| }, |
| { |
| "epoch": 0.284822695035461, |
| "grad_norm": 2.335533618927002, |
| "loss": 5.4608, |
| "lr": 0.0009296503496503497, |
| "step": 1004, |
| "tokens_trained": 0.095423184 |
| }, |
| { |
| "epoch": 0.2853900709219858, |
| "grad_norm": 2.232128858566284, |
| "loss": 5.4374, |
| "lr": 0.0009293706293706294, |
| "step": 1006, |
| "tokens_trained": 0.095612672 |
| }, |
| { |
| "epoch": 0.28595744680851065, |
| "grad_norm": 2.148329257965088, |
| "loss": 5.463, |
| "lr": 0.0009290909090909091, |
| "step": 1008, |
| "tokens_trained": 0.095802784 |
| }, |
| { |
| "epoch": 0.2865248226950355, |
| "grad_norm": 1.9580810070037842, |
| "loss": 5.291, |
| "lr": 0.0009288111888111889, |
| "step": 1010, |
| "tokens_trained": 0.095990776 |
| }, |
| { |
| "epoch": 0.28709219858156027, |
| "grad_norm": 1.9873988628387451, |
| "loss": 5.4103, |
| "lr": 0.0009285314685314685, |
| "step": 1012, |
| "tokens_trained": 0.096180648 |
| }, |
| { |
| "epoch": 0.2876595744680851, |
| "grad_norm": 2.0297746658325195, |
| "loss": 5.4078, |
| "lr": 0.0009282517482517483, |
| "step": 1014, |
| "tokens_trained": 0.09637224 |
| }, |
| { |
| "epoch": 0.28822695035460993, |
| "grad_norm": 1.928497076034546, |
| "loss": 5.3162, |
| "lr": 0.0009279720279720279, |
| "step": 1016, |
| "tokens_trained": 0.096561744 |
| }, |
| { |
| "epoch": 0.28879432624113477, |
| "grad_norm": 2.1219675540924072, |
| "loss": 5.4358, |
| "lr": 0.0009276923076923078, |
| "step": 1018, |
| "tokens_trained": 0.096752296 |
| }, |
| { |
| "epoch": 0.28936170212765955, |
| "grad_norm": 2.0021066665649414, |
| "loss": 5.4232, |
| "lr": 0.0009274125874125874, |
| "step": 1020, |
| "tokens_trained": 0.096943856 |
| }, |
| { |
| "epoch": 0.2899290780141844, |
| "grad_norm": 1.9920068979263306, |
| "loss": 5.407, |
| "lr": 0.0009271328671328671, |
| "step": 1022, |
| "tokens_trained": 0.097133632 |
| }, |
| { |
| "epoch": 0.2904964539007092, |
| "grad_norm": 1.8810361623764038, |
| "loss": 5.4293, |
| "lr": 0.0009268531468531469, |
| "step": 1024, |
| "tokens_trained": 0.097325976 |
| }, |
| { |
| "epoch": 0.29106382978723405, |
| "grad_norm": 1.8560134172439575, |
| "loss": 5.4236, |
| "lr": 0.0009265734265734266, |
| "step": 1026, |
| "tokens_trained": 0.0975142 |
| }, |
| { |
| "epoch": 0.2916312056737589, |
| "grad_norm": 2.1735010147094727, |
| "loss": 5.4252, |
| "lr": 0.0009262937062937064, |
| "step": 1028, |
| "tokens_trained": 0.0977042 |
| }, |
| { |
| "epoch": 0.29219858156028367, |
| "grad_norm": 2.2467288970947266, |
| "loss": 5.3756, |
| "lr": 0.000926013986013986, |
| "step": 1030, |
| "tokens_trained": 0.097893376 |
| }, |
| { |
| "epoch": 0.2927659574468085, |
| "grad_norm": 1.9609313011169434, |
| "loss": 5.4091, |
| "lr": 0.0009257342657342658, |
| "step": 1032, |
| "tokens_trained": 0.0980824 |
| }, |
| { |
| "epoch": 0.29333333333333333, |
| "grad_norm": 2.116384267807007, |
| "loss": 5.4001, |
| "lr": 0.0009254545454545454, |
| "step": 1034, |
| "tokens_trained": 0.098271304 |
| }, |
| { |
| "epoch": 0.29390070921985817, |
| "grad_norm": 2.1869800090789795, |
| "loss": 5.4102, |
| "lr": 0.0009251748251748252, |
| "step": 1036, |
| "tokens_trained": 0.098461528 |
| }, |
| { |
| "epoch": 0.294468085106383, |
| "grad_norm": 2.2882192134857178, |
| "loss": 5.4723, |
| "lr": 0.0009248951048951049, |
| "step": 1038, |
| "tokens_trained": 0.09865268 |
| }, |
| { |
| "epoch": 0.2950354609929078, |
| "grad_norm": 2.1590888500213623, |
| "loss": 5.3523, |
| "lr": 0.0009246153846153846, |
| "step": 1040, |
| "tokens_trained": 0.098842688 |
| }, |
| { |
| "epoch": 0.2956028368794326, |
| "grad_norm": 2.284207582473755, |
| "loss": 5.4647, |
| "lr": 0.0009243356643356644, |
| "step": 1042, |
| "tokens_trained": 0.099031544 |
| }, |
| { |
| "epoch": 0.29617021276595745, |
| "grad_norm": 2.333207845687866, |
| "loss": 5.4655, |
| "lr": 0.0009240559440559441, |
| "step": 1044, |
| "tokens_trained": 0.09922264 |
| }, |
| { |
| "epoch": 0.2967375886524823, |
| "grad_norm": 2.357572555541992, |
| "loss": 5.3909, |
| "lr": 0.0009237762237762239, |
| "step": 1046, |
| "tokens_trained": 0.099411416 |
| }, |
| { |
| "epoch": 0.29730496453900707, |
| "grad_norm": 1.88053297996521, |
| "loss": 5.4119, |
| "lr": 0.0009234965034965035, |
| "step": 1048, |
| "tokens_trained": 0.099602112 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 1.8860585689544678, |
| "loss": 5.3867, |
| "lr": 0.0009232167832167832, |
| "step": 1050, |
| "tokens_trained": 0.099792552 |
| }, |
| { |
| "epoch": 0.29843971631205674, |
| "grad_norm": 2.000173568725586, |
| "loss": 5.3773, |
| "lr": 0.0009229370629370629, |
| "step": 1052, |
| "tokens_trained": 0.099981752 |
| }, |
| { |
| "epoch": 0.29900709219858157, |
| "grad_norm": 2.015394926071167, |
| "loss": 5.3936, |
| "lr": 0.0009226573426573427, |
| "step": 1054, |
| "tokens_trained": 0.10017428 |
| }, |
| { |
| "epoch": 0.2995744680851064, |
| "grad_norm": 2.0050301551818848, |
| "loss": 5.3653, |
| "lr": 0.0009223776223776224, |
| "step": 1056, |
| "tokens_trained": 0.100364544 |
| }, |
| { |
| "epoch": 0.3001418439716312, |
| "grad_norm": 1.7397475242614746, |
| "loss": 5.3224, |
| "lr": 0.0009220979020979021, |
| "step": 1058, |
| "tokens_trained": 0.100555648 |
| }, |
| { |
| "epoch": 0.300709219858156, |
| "grad_norm": 1.9808533191680908, |
| "loss": 5.3822, |
| "lr": 0.0009218181818181819, |
| "step": 1060, |
| "tokens_trained": 0.100744968 |
| }, |
| { |
| "epoch": 0.30127659574468085, |
| "grad_norm": 2.034329652786255, |
| "loss": 5.3961, |
| "lr": 0.0009215384615384616, |
| "step": 1062, |
| "tokens_trained": 0.100934176 |
| }, |
| { |
| "epoch": 0.3018439716312057, |
| "grad_norm": 2.1286778450012207, |
| "loss": 5.4061, |
| "lr": 0.0009212587412587413, |
| "step": 1064, |
| "tokens_trained": 0.101125216 |
| }, |
| { |
| "epoch": 0.3024113475177305, |
| "grad_norm": 2.131822347640991, |
| "loss": 5.3675, |
| "lr": 0.000920979020979021, |
| "step": 1066, |
| "tokens_trained": 0.101314504 |
| }, |
| { |
| "epoch": 0.3029787234042553, |
| "grad_norm": 2.162069320678711, |
| "loss": 5.4552, |
| "lr": 0.0009206993006993007, |
| "step": 1068, |
| "tokens_trained": 0.101503352 |
| }, |
| { |
| "epoch": 0.30354609929078014, |
| "grad_norm": 2.5730931758880615, |
| "loss": 5.3978, |
| "lr": 0.0009204195804195804, |
| "step": 1070, |
| "tokens_trained": 0.101691504 |
| }, |
| { |
| "epoch": 0.30411347517730497, |
| "grad_norm": 2.2053022384643555, |
| "loss": 5.3604, |
| "lr": 0.0009201398601398602, |
| "step": 1072, |
| "tokens_trained": 0.101883072 |
| }, |
| { |
| "epoch": 0.3046808510638298, |
| "grad_norm": 2.1578407287597656, |
| "loss": 5.4236, |
| "lr": 0.0009198601398601398, |
| "step": 1074, |
| "tokens_trained": 0.102075832 |
| }, |
| { |
| "epoch": 0.3052482269503546, |
| "grad_norm": 2.0061423778533936, |
| "loss": 5.3882, |
| "lr": 0.0009195804195804196, |
| "step": 1076, |
| "tokens_trained": 0.102266768 |
| }, |
| { |
| "epoch": 0.3058156028368794, |
| "grad_norm": 1.8915576934814453, |
| "loss": 5.3539, |
| "lr": 0.0009193006993006993, |
| "step": 1078, |
| "tokens_trained": 0.102457096 |
| }, |
| { |
| "epoch": 0.30638297872340425, |
| "grad_norm": 2.15053129196167, |
| "loss": 5.4222, |
| "lr": 0.0009190209790209791, |
| "step": 1080, |
| "tokens_trained": 0.102647544 |
| }, |
| { |
| "epoch": 0.3069503546099291, |
| "grad_norm": 2.241217851638794, |
| "loss": 5.4275, |
| "lr": 0.0009187412587412588, |
| "step": 1082, |
| "tokens_trained": 0.10283904 |
| }, |
| { |
| "epoch": 0.3075177304964539, |
| "grad_norm": 2.37854266166687, |
| "loss": 5.419, |
| "lr": 0.0009184615384615385, |
| "step": 1084, |
| "tokens_trained": 0.103028464 |
| }, |
| { |
| "epoch": 0.3080851063829787, |
| "grad_norm": 2.00118350982666, |
| "loss": 5.4225, |
| "lr": 0.0009181818181818182, |
| "step": 1086, |
| "tokens_trained": 0.10321896 |
| }, |
| { |
| "epoch": 0.30865248226950354, |
| "grad_norm": 2.2643723487854004, |
| "loss": 5.4487, |
| "lr": 0.0009179020979020978, |
| "step": 1088, |
| "tokens_trained": 0.103409256 |
| }, |
| { |
| "epoch": 0.30921985815602837, |
| "grad_norm": 2.4618585109710693, |
| "loss": 5.4211, |
| "lr": 0.0009176223776223777, |
| "step": 1090, |
| "tokens_trained": 0.103597272 |
| }, |
| { |
| "epoch": 0.3097872340425532, |
| "grad_norm": 2.141491174697876, |
| "loss": 5.3758, |
| "lr": 0.0009173426573426573, |
| "step": 1092, |
| "tokens_trained": 0.103786128 |
| }, |
| { |
| "epoch": 0.31035460992907804, |
| "grad_norm": 1.9777475595474243, |
| "loss": 5.4129, |
| "lr": 0.0009170629370629371, |
| "step": 1094, |
| "tokens_trained": 0.103974864 |
| }, |
| { |
| "epoch": 0.3109219858156028, |
| "grad_norm": 1.9153270721435547, |
| "loss": 5.3912, |
| "lr": 0.0009167832167832168, |
| "step": 1096, |
| "tokens_trained": 0.104163864 |
| }, |
| { |
| "epoch": 0.31148936170212765, |
| "grad_norm": 2.172558546066284, |
| "loss": 5.3425, |
| "lr": 0.0009165034965034966, |
| "step": 1098, |
| "tokens_trained": 0.104353136 |
| }, |
| { |
| "epoch": 0.3120567375886525, |
| "grad_norm": 2.049896717071533, |
| "loss": 5.3732, |
| "lr": 0.0009162237762237763, |
| "step": 1100, |
| "tokens_trained": 0.10454476 |
| }, |
| { |
| "epoch": 0.3126241134751773, |
| "grad_norm": 1.9415545463562012, |
| "loss": 5.3873, |
| "lr": 0.0009159440559440559, |
| "step": 1102, |
| "tokens_trained": 0.104734296 |
| }, |
| { |
| "epoch": 0.3131914893617021, |
| "grad_norm": 1.7280856370925903, |
| "loss": 5.3857, |
| "lr": 0.0009156643356643357, |
| "step": 1104, |
| "tokens_trained": 0.104925648 |
| }, |
| { |
| "epoch": 0.31375886524822694, |
| "grad_norm": 1.9120069742202759, |
| "loss": 5.3216, |
| "lr": 0.0009153846153846153, |
| "step": 1106, |
| "tokens_trained": 0.105115776 |
| }, |
| { |
| "epoch": 0.31432624113475177, |
| "grad_norm": 2.007101058959961, |
| "loss": 5.4074, |
| "lr": 0.0009151048951048952, |
| "step": 1108, |
| "tokens_trained": 0.105305656 |
| }, |
| { |
| "epoch": 0.3148936170212766, |
| "grad_norm": 1.9159268140792847, |
| "loss": 5.3625, |
| "lr": 0.0009148251748251748, |
| "step": 1110, |
| "tokens_trained": 0.105494632 |
| }, |
| { |
| "epoch": 0.31546099290780144, |
| "grad_norm": 1.9235239028930664, |
| "loss": 5.3362, |
| "lr": 0.0009145454545454546, |
| "step": 1112, |
| "tokens_trained": 0.105683536 |
| }, |
| { |
| "epoch": 0.3160283687943262, |
| "grad_norm": 1.8954299688339233, |
| "loss": 5.3531, |
| "lr": 0.0009142657342657343, |
| "step": 1114, |
| "tokens_trained": 0.105873176 |
| }, |
| { |
| "epoch": 0.31659574468085105, |
| "grad_norm": 2.026578426361084, |
| "loss": 5.408, |
| "lr": 0.000913986013986014, |
| "step": 1116, |
| "tokens_trained": 0.10606276 |
| }, |
| { |
| "epoch": 0.3171631205673759, |
| "grad_norm": 1.9014806747436523, |
| "loss": 5.363, |
| "lr": 0.0009137062937062938, |
| "step": 1118, |
| "tokens_trained": 0.106254616 |
| }, |
| { |
| "epoch": 0.3177304964539007, |
| "grad_norm": 1.849649429321289, |
| "loss": 5.3811, |
| "lr": 0.0009134265734265734, |
| "step": 1120, |
| "tokens_trained": 0.106445376 |
| }, |
| { |
| "epoch": 0.31829787234042556, |
| "grad_norm": 1.7405186891555786, |
| "loss": 5.3504, |
| "lr": 0.0009131468531468532, |
| "step": 1122, |
| "tokens_trained": 0.106636072 |
| }, |
| { |
| "epoch": 0.31886524822695034, |
| "grad_norm": 1.867285966873169, |
| "loss": 5.3675, |
| "lr": 0.0009128671328671328, |
| "step": 1124, |
| "tokens_trained": 0.106827896 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "eval_loss": 5.3796281814575195, |
| "eval_runtime": 20.7444, |
| "step": 1125, |
| "tokens_trained": 0.106922416 |
| }, |
| { |
| "epoch": 0.31943262411347517, |
| "grad_norm": 1.8044356107711792, |
| "loss": 5.3717, |
| "lr": 0.0009125874125874127, |
| "step": 1126, |
| "tokens_trained": 0.107016056 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.6348600387573242, |
| "loss": 5.4676, |
| "lr": 0.0009123076923076923, |
| "step": 1128, |
| "tokens_trained": 0.107203992 |
| }, |
| { |
| "epoch": 0.32056737588652484, |
| "grad_norm": 1.7802475690841675, |
| "loss": 5.3304, |
| "lr": 0.000912027972027972, |
| "step": 1130, |
| "tokens_trained": 0.107394224 |
| }, |
| { |
| "epoch": 0.3211347517730496, |
| "grad_norm": 1.7695430517196655, |
| "loss": 5.3611, |
| "lr": 0.0009117482517482518, |
| "step": 1132, |
| "tokens_trained": 0.107583464 |
| }, |
| { |
| "epoch": 0.32170212765957445, |
| "grad_norm": 2.026853322982788, |
| "loss": 5.363, |
| "lr": 0.0009114685314685315, |
| "step": 1134, |
| "tokens_trained": 0.107776088 |
| }, |
| { |
| "epoch": 0.3222695035460993, |
| "grad_norm": 1.803208589553833, |
| "loss": 5.3801, |
| "lr": 0.0009111888111888113, |
| "step": 1136, |
| "tokens_trained": 0.107964416 |
| }, |
| { |
| "epoch": 0.3228368794326241, |
| "grad_norm": 1.812386155128479, |
| "loss": 5.3721, |
| "lr": 0.0009109090909090909, |
| "step": 1138, |
| "tokens_trained": 0.108153104 |
| }, |
| { |
| "epoch": 0.32340425531914896, |
| "grad_norm": 1.605839490890503, |
| "loss": 5.3339, |
| "lr": 0.0009106293706293707, |
| "step": 1140, |
| "tokens_trained": 0.108341408 |
| }, |
| { |
| "epoch": 0.32397163120567374, |
| "grad_norm": 1.7169313430786133, |
| "loss": 5.4051, |
| "lr": 0.0009103496503496503, |
| "step": 1142, |
| "tokens_trained": 0.108532312 |
| }, |
| { |
| "epoch": 0.3245390070921986, |
| "grad_norm": 2.0499444007873535, |
| "loss": 5.2992, |
| "lr": 0.0009100699300699301, |
| "step": 1144, |
| "tokens_trained": 0.108721864 |
| }, |
| { |
| "epoch": 0.3251063829787234, |
| "grad_norm": 1.988674521446228, |
| "loss": 5.3862, |
| "lr": 0.0009097902097902098, |
| "step": 1146, |
| "tokens_trained": 0.108912352 |
| }, |
| { |
| "epoch": 0.32567375886524824, |
| "grad_norm": 1.8733936548233032, |
| "loss": 5.3627, |
| "lr": 0.0009095104895104895, |
| "step": 1148, |
| "tokens_trained": 0.109101952 |
| }, |
| { |
| "epoch": 0.3262411347517731, |
| "grad_norm": 1.978704810142517, |
| "loss": 5.3668, |
| "lr": 0.0009092307692307692, |
| "step": 1150, |
| "tokens_trained": 0.109292712 |
| }, |
| { |
| "epoch": 0.32680851063829786, |
| "grad_norm": 1.9723341464996338, |
| "loss": 5.3545, |
| "lr": 0.000908951048951049, |
| "step": 1152, |
| "tokens_trained": 0.109484992 |
| }, |
| { |
| "epoch": 0.3273758865248227, |
| "grad_norm": 2.165728807449341, |
| "loss": 5.3731, |
| "lr": 0.0009086713286713288, |
| "step": 1154, |
| "tokens_trained": 0.109674336 |
| }, |
| { |
| "epoch": 0.3279432624113475, |
| "grad_norm": 1.9241019487380981, |
| "loss": 5.3456, |
| "lr": 0.0009083916083916084, |
| "step": 1156, |
| "tokens_trained": 0.109863368 |
| }, |
| { |
| "epoch": 0.32851063829787236, |
| "grad_norm": 1.9442275762557983, |
| "loss": 5.4065, |
| "lr": 0.0009081118881118881, |
| "step": 1158, |
| "tokens_trained": 0.110051744 |
| }, |
| { |
| "epoch": 0.32907801418439714, |
| "grad_norm": 1.7714000940322876, |
| "loss": 5.3888, |
| "lr": 0.0009078321678321678, |
| "step": 1160, |
| "tokens_trained": 0.11024344 |
| }, |
| { |
| "epoch": 0.329645390070922, |
| "grad_norm": 2.043646812438965, |
| "loss": 5.3835, |
| "lr": 0.0009075524475524476, |
| "step": 1162, |
| "tokens_trained": 0.11043488 |
| }, |
| { |
| "epoch": 0.3302127659574468, |
| "grad_norm": 1.837196946144104, |
| "loss": 5.3554, |
| "lr": 0.0009072727272727273, |
| "step": 1164, |
| "tokens_trained": 0.110626104 |
| }, |
| { |
| "epoch": 0.33078014184397164, |
| "grad_norm": 1.874135971069336, |
| "loss": 5.3457, |
| "lr": 0.000906993006993007, |
| "step": 1166, |
| "tokens_trained": 0.110814768 |
| }, |
| { |
| "epoch": 0.3313475177304965, |
| "grad_norm": 1.6493511199951172, |
| "loss": 5.3118, |
| "lr": 0.0009067132867132866, |
| "step": 1168, |
| "tokens_trained": 0.111004104 |
| }, |
| { |
| "epoch": 0.33191489361702126, |
| "grad_norm": 1.8386362791061401, |
| "loss": 5.3422, |
| "lr": 0.0009064335664335665, |
| "step": 1170, |
| "tokens_trained": 0.11119544 |
| }, |
| { |
| "epoch": 0.3324822695035461, |
| "grad_norm": 2.020859718322754, |
| "loss": 5.3565, |
| "lr": 0.0009061538461538462, |
| "step": 1172, |
| "tokens_trained": 0.111384384 |
| }, |
| { |
| "epoch": 0.3330496453900709, |
| "grad_norm": 2.049401044845581, |
| "loss": 5.3358, |
| "lr": 0.0009058741258741259, |
| "step": 1174, |
| "tokens_trained": 0.111573944 |
| }, |
| { |
| "epoch": 0.33361702127659576, |
| "grad_norm": 1.965345025062561, |
| "loss": 5.3431, |
| "lr": 0.0009055944055944056, |
| "step": 1176, |
| "tokens_trained": 0.111763504 |
| }, |
| { |
| "epoch": 0.3341843971631206, |
| "grad_norm": 1.9792066812515259, |
| "loss": 5.3579, |
| "lr": 0.0009053146853146853, |
| "step": 1178, |
| "tokens_trained": 0.111953664 |
| }, |
| { |
| "epoch": 0.3347517730496454, |
| "grad_norm": 1.7790883779525757, |
| "loss": 5.3499, |
| "lr": 0.0009050349650349651, |
| "step": 1180, |
| "tokens_trained": 0.11214324 |
| }, |
| { |
| "epoch": 0.3353191489361702, |
| "grad_norm": 1.6504682302474976, |
| "loss": 5.3415, |
| "lr": 0.0009047552447552448, |
| "step": 1182, |
| "tokens_trained": 0.112331256 |
| }, |
| { |
| "epoch": 0.33588652482269504, |
| "grad_norm": 1.9687312841415405, |
| "loss": 5.3565, |
| "lr": 0.0009044755244755245, |
| "step": 1184, |
| "tokens_trained": 0.11252208 |
| }, |
| { |
| "epoch": 0.3364539007092199, |
| "grad_norm": 1.7077507972717285, |
| "loss": 5.3568, |
| "lr": 0.0009041958041958041, |
| "step": 1186, |
| "tokens_trained": 0.112714272 |
| }, |
| { |
| "epoch": 0.33702127659574466, |
| "grad_norm": 1.6311697959899902, |
| "loss": 5.3345, |
| "lr": 0.000903916083916084, |
| "step": 1188, |
| "tokens_trained": 0.11290428 |
| }, |
| { |
| "epoch": 0.3375886524822695, |
| "grad_norm": 1.975233793258667, |
| "loss": 5.4161, |
| "lr": 0.0009036363636363637, |
| "step": 1190, |
| "tokens_trained": 0.113093984 |
| }, |
| { |
| "epoch": 0.3381560283687943, |
| "grad_norm": 1.7567362785339355, |
| "loss": 5.3481, |
| "lr": 0.0009033566433566434, |
| "step": 1192, |
| "tokens_trained": 0.113284904 |
| }, |
| { |
| "epoch": 0.33872340425531916, |
| "grad_norm": 2.121367931365967, |
| "loss": 5.3729, |
| "lr": 0.0009030769230769231, |
| "step": 1194, |
| "tokens_trained": 0.113477952 |
| }, |
| { |
| "epoch": 0.339290780141844, |
| "grad_norm": 2.143253803253174, |
| "loss": 5.3866, |
| "lr": 0.0009027972027972027, |
| "step": 1196, |
| "tokens_trained": 0.11366872 |
| }, |
| { |
| "epoch": 0.3398581560283688, |
| "grad_norm": 2.1118557453155518, |
| "loss": 5.3501, |
| "lr": 0.0009025174825174826, |
| "step": 1198, |
| "tokens_trained": 0.113861552 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 1.8132637739181519, |
| "loss": 5.3325, |
| "lr": 0.0009022377622377622, |
| "step": 1200, |
| "tokens_trained": 0.114051176 |
| }, |
| { |
| "epoch": 0.34099290780141844, |
| "grad_norm": 1.761227011680603, |
| "loss": 5.3629, |
| "lr": 0.000901958041958042, |
| "step": 1202, |
| "tokens_trained": 0.114240808 |
| }, |
| { |
| "epoch": 0.3415602836879433, |
| "grad_norm": 1.8358371257781982, |
| "loss": 5.3644, |
| "lr": 0.0009016783216783216, |
| "step": 1204, |
| "tokens_trained": 0.114430968 |
| }, |
| { |
| "epoch": 0.3421276595744681, |
| "grad_norm": 2.0768542289733887, |
| "loss": 5.3705, |
| "lr": 0.0009013986013986014, |
| "step": 1206, |
| "tokens_trained": 0.114620544 |
| }, |
| { |
| "epoch": 0.3426950354609929, |
| "grad_norm": 1.6928143501281738, |
| "loss": 5.2534, |
| "lr": 0.0009011188811188812, |
| "step": 1208, |
| "tokens_trained": 0.114811928 |
| }, |
| { |
| "epoch": 0.3432624113475177, |
| "grad_norm": 1.8634029626846313, |
| "loss": 5.3679, |
| "lr": 0.0009008391608391609, |
| "step": 1210, |
| "tokens_trained": 0.115002912 |
| }, |
| { |
| "epoch": 0.34382978723404256, |
| "grad_norm": 1.8048954010009766, |
| "loss": 5.3049, |
| "lr": 0.0009005594405594406, |
| "step": 1212, |
| "tokens_trained": 0.115192544 |
| }, |
| { |
| "epoch": 0.3443971631205674, |
| "grad_norm": 1.9170348644256592, |
| "loss": 5.2457, |
| "lr": 0.0009002797202797202, |
| "step": 1214, |
| "tokens_trained": 0.115383248 |
| }, |
| { |
| "epoch": 0.3449645390070922, |
| "grad_norm": 1.788751482963562, |
| "loss": 5.3678, |
| "lr": 0.0009000000000000001, |
| "step": 1216, |
| "tokens_trained": 0.115574304 |
| }, |
| { |
| "epoch": 0.345531914893617, |
| "grad_norm": 1.9751293659210205, |
| "loss": 5.3352, |
| "lr": 0.0008997202797202797, |
| "step": 1218, |
| "tokens_trained": 0.115766008 |
| }, |
| { |
| "epoch": 0.34609929078014184, |
| "grad_norm": 1.8202649354934692, |
| "loss": 5.37, |
| "lr": 0.0008994405594405595, |
| "step": 1220, |
| "tokens_trained": 0.11595804 |
| }, |
| { |
| "epoch": 0.3466666666666667, |
| "grad_norm": 1.656063199043274, |
| "loss": 5.3664, |
| "lr": 0.0008991608391608391, |
| "step": 1222, |
| "tokens_trained": 0.116146344 |
| }, |
| { |
| "epoch": 0.3472340425531915, |
| "grad_norm": 1.7509667873382568, |
| "loss": 5.3636, |
| "lr": 0.0008988811188811188, |
| "step": 1224, |
| "tokens_trained": 0.116334568 |
| }, |
| { |
| "epoch": 0.3478014184397163, |
| "grad_norm": 1.8556638956069946, |
| "loss": 5.3577, |
| "lr": 0.0008986013986013987, |
| "step": 1226, |
| "tokens_trained": 0.116525704 |
| }, |
| { |
| "epoch": 0.3483687943262411, |
| "grad_norm": 2.026033639907837, |
| "loss": 5.3657, |
| "lr": 0.0008983216783216783, |
| "step": 1228, |
| "tokens_trained": 0.116716032 |
| }, |
| { |
| "epoch": 0.34893617021276596, |
| "grad_norm": 1.6965924501419067, |
| "loss": 5.304, |
| "lr": 0.0008980419580419581, |
| "step": 1230, |
| "tokens_trained": 0.116904832 |
| }, |
| { |
| "epoch": 0.3495035460992908, |
| "grad_norm": 1.8144174814224243, |
| "loss": 5.3759, |
| "lr": 0.0008977622377622377, |
| "step": 1232, |
| "tokens_trained": 0.117095536 |
| }, |
| { |
| "epoch": 0.3500709219858156, |
| "grad_norm": 1.7229580879211426, |
| "loss": 5.3244, |
| "lr": 0.0008974825174825176, |
| "step": 1234, |
| "tokens_trained": 0.117285952 |
| }, |
| { |
| "epoch": 0.3506382978723404, |
| "grad_norm": 1.722578525543213, |
| "loss": 5.3442, |
| "lr": 0.0008972027972027972, |
| "step": 1236, |
| "tokens_trained": 0.117477488 |
| }, |
| { |
| "epoch": 0.35120567375886524, |
| "grad_norm": 1.8006796836853027, |
| "loss": 5.3624, |
| "lr": 0.000896923076923077, |
| "step": 1238, |
| "tokens_trained": 0.117667352 |
| }, |
| { |
| "epoch": 0.3517730496453901, |
| "grad_norm": 1.7172250747680664, |
| "loss": 5.3002, |
| "lr": 0.0008966433566433566, |
| "step": 1240, |
| "tokens_trained": 0.117856504 |
| }, |
| { |
| "epoch": 0.3523404255319149, |
| "grad_norm": 1.8281760215759277, |
| "loss": 5.3311, |
| "lr": 0.0008963636363636363, |
| "step": 1242, |
| "tokens_trained": 0.11804676 |
| }, |
| { |
| "epoch": 0.3529078014184397, |
| "grad_norm": 1.7666652202606201, |
| "loss": 5.3847, |
| "lr": 0.0008960839160839162, |
| "step": 1244, |
| "tokens_trained": 0.118235688 |
| }, |
| { |
| "epoch": 0.3534751773049645, |
| "grad_norm": 1.7723621129989624, |
| "loss": 5.3506, |
| "lr": 0.0008958041958041958, |
| "step": 1246, |
| "tokens_trained": 0.11842632 |
| }, |
| { |
| "epoch": 0.35404255319148936, |
| "grad_norm": 1.7779643535614014, |
| "loss": 5.3066, |
| "lr": 0.0008955244755244756, |
| "step": 1248, |
| "tokens_trained": 0.118616536 |
| }, |
| { |
| "epoch": 0.3546099290780142, |
| "grad_norm": 1.746245265007019, |
| "loss": 5.2993, |
| "lr": 0.0008952447552447552, |
| "step": 1250, |
| "tokens_trained": 0.118807672 |
| }, |
| { |
| "epoch": 0.3546099290780142, |
| "eval_loss": 5.34489107131958, |
| "eval_runtime": 21.0838, |
| "step": 1250, |
| "tokens_trained": 0.118807672 |
| }, |
| { |
| "epoch": 0.35517730496453903, |
| "grad_norm": 1.8439521789550781, |
| "loss": 5.3796, |
| "lr": 0.000894965034965035, |
| "step": 1252, |
| "tokens_trained": 0.118996672 |
| }, |
| { |
| "epoch": 0.3557446808510638, |
| "grad_norm": 1.7830157279968262, |
| "loss": 5.3435, |
| "lr": 0.0008946853146853147, |
| "step": 1254, |
| "tokens_trained": 0.119189544 |
| }, |
| { |
| "epoch": 0.35631205673758864, |
| "grad_norm": 1.6022379398345947, |
| "loss": 5.3772, |
| "lr": 0.0008944055944055944, |
| "step": 1256, |
| "tokens_trained": 0.119379312 |
| }, |
| { |
| "epoch": 0.3568794326241135, |
| "grad_norm": 1.6100343465805054, |
| "loss": 5.3411, |
| "lr": 0.0008941258741258741, |
| "step": 1258, |
| "tokens_trained": 0.119572072 |
| }, |
| { |
| "epoch": 0.3574468085106383, |
| "grad_norm": 1.7826210260391235, |
| "loss": 5.317, |
| "lr": 0.0008938461538461538, |
| "step": 1260, |
| "tokens_trained": 0.119761304 |
| }, |
| { |
| "epoch": 0.3580141843971631, |
| "grad_norm": 1.510432243347168, |
| "loss": 5.4018, |
| "lr": 0.0008935664335664337, |
| "step": 1262, |
| "tokens_trained": 0.11994984 |
| }, |
| { |
| "epoch": 0.35858156028368793, |
| "grad_norm": 1.7209227085113525, |
| "loss": 5.3651, |
| "lr": 0.0008932867132867133, |
| "step": 1264, |
| "tokens_trained": 0.120139368 |
| }, |
| { |
| "epoch": 0.35914893617021276, |
| "grad_norm": 1.7528654336929321, |
| "loss": 5.3329, |
| "lr": 0.000893006993006993, |
| "step": 1266, |
| "tokens_trained": 0.1203308 |
| }, |
| { |
| "epoch": 0.3597163120567376, |
| "grad_norm": 1.8427083492279053, |
| "loss": 5.3897, |
| "lr": 0.0008927272727272727, |
| "step": 1268, |
| "tokens_trained": 0.12052112 |
| }, |
| { |
| "epoch": 0.36028368794326243, |
| "grad_norm": 1.530527114868164, |
| "loss": 5.3407, |
| "lr": 0.0008924475524475525, |
| "step": 1270, |
| "tokens_trained": 0.120709456 |
| }, |
| { |
| "epoch": 0.3608510638297872, |
| "grad_norm": 1.5996145009994507, |
| "loss": 5.3697, |
| "lr": 0.0008921678321678322, |
| "step": 1272, |
| "tokens_trained": 0.12089976 |
| }, |
| { |
| "epoch": 0.36141843971631205, |
| "grad_norm": 1.5235425233840942, |
| "loss": 5.335, |
| "lr": 0.0008918881118881119, |
| "step": 1274, |
| "tokens_trained": 0.121089184 |
| }, |
| { |
| "epoch": 0.3619858156028369, |
| "grad_norm": 1.757206678390503, |
| "loss": 5.2983, |
| "lr": 0.0008916083916083916, |
| "step": 1276, |
| "tokens_trained": 0.1212798 |
| }, |
| { |
| "epoch": 0.3625531914893617, |
| "grad_norm": 1.5952467918395996, |
| "loss": 5.3593, |
| "lr": 0.0008913286713286713, |
| "step": 1278, |
| "tokens_trained": 0.121472816 |
| }, |
| { |
| "epoch": 0.36312056737588655, |
| "grad_norm": 1.6975666284561157, |
| "loss": 5.3867, |
| "lr": 0.0008910489510489512, |
| "step": 1280, |
| "tokens_trained": 0.121659944 |
| }, |
| { |
| "epoch": 0.36368794326241133, |
| "grad_norm": 1.8659151792526245, |
| "loss": 5.3032, |
| "lr": 0.0008907692307692308, |
| "step": 1282, |
| "tokens_trained": 0.121848552 |
| }, |
| { |
| "epoch": 0.36425531914893616, |
| "grad_norm": 1.8692409992218018, |
| "loss": 5.3643, |
| "lr": 0.0008904895104895105, |
| "step": 1284, |
| "tokens_trained": 0.12203916 |
| }, |
| { |
| "epoch": 0.364822695035461, |
| "grad_norm": 1.786490559577942, |
| "loss": 5.4001, |
| "lr": 0.0008902097902097902, |
| "step": 1286, |
| "tokens_trained": 0.122228464 |
| }, |
| { |
| "epoch": 0.36539007092198583, |
| "grad_norm": 1.6635786294937134, |
| "loss": 5.3158, |
| "lr": 0.00088993006993007, |
| "step": 1288, |
| "tokens_trained": 0.122419768 |
| }, |
| { |
| "epoch": 0.3659574468085106, |
| "grad_norm": 1.8413279056549072, |
| "loss": 5.315, |
| "lr": 0.0008896503496503497, |
| "step": 1290, |
| "tokens_trained": 0.122608512 |
| }, |
| { |
| "epoch": 0.36652482269503545, |
| "grad_norm": 1.802370548248291, |
| "loss": 5.3203, |
| "lr": 0.0008893706293706294, |
| "step": 1292, |
| "tokens_trained": 0.122795944 |
| }, |
| { |
| "epoch": 0.3670921985815603, |
| "grad_norm": 1.5968035459518433, |
| "loss": 5.3833, |
| "lr": 0.000889090909090909, |
| "step": 1294, |
| "tokens_trained": 0.1229842 |
| }, |
| { |
| "epoch": 0.3676595744680851, |
| "grad_norm": 1.8354761600494385, |
| "loss": 5.3365, |
| "lr": 0.0008888111888111888, |
| "step": 1296, |
| "tokens_trained": 0.123175336 |
| }, |
| { |
| "epoch": 0.36822695035460995, |
| "grad_norm": 1.925227403640747, |
| "loss": 5.3687, |
| "lr": 0.0008885314685314686, |
| "step": 1298, |
| "tokens_trained": 0.123366848 |
| }, |
| { |
| "epoch": 0.36879432624113473, |
| "grad_norm": 1.7477060556411743, |
| "loss": 5.4033, |
| "lr": 0.0008882517482517483, |
| "step": 1300, |
| "tokens_trained": 0.123556656 |
| }, |
| { |
| "epoch": 0.36936170212765956, |
| "grad_norm": 1.8925527334213257, |
| "loss": 5.2854, |
| "lr": 0.000887972027972028, |
| "step": 1302, |
| "tokens_trained": 0.12374612 |
| }, |
| { |
| "epoch": 0.3699290780141844, |
| "grad_norm": 1.8904681205749512, |
| "loss": 5.2903, |
| "lr": 0.0008876923076923077, |
| "step": 1304, |
| "tokens_trained": 0.123936192 |
| }, |
| { |
| "epoch": 0.37049645390070923, |
| "grad_norm": 1.9903556108474731, |
| "loss": 5.2994, |
| "lr": 0.0008874125874125875, |
| "step": 1306, |
| "tokens_trained": 0.124126112 |
| }, |
| { |
| "epoch": 0.37106382978723407, |
| "grad_norm": 2.014011859893799, |
| "loss": 5.353, |
| "lr": 0.0008871328671328671, |
| "step": 1308, |
| "tokens_trained": 0.124314592 |
| }, |
| { |
| "epoch": 0.37163120567375885, |
| "grad_norm": 1.9086287021636963, |
| "loss": 5.3924, |
| "lr": 0.0008868531468531469, |
| "step": 1310, |
| "tokens_trained": 0.124503496 |
| }, |
| { |
| "epoch": 0.3721985815602837, |
| "grad_norm": 1.8927134275436401, |
| "loss": 5.3098, |
| "lr": 0.0008865734265734265, |
| "step": 1312, |
| "tokens_trained": 0.124693296 |
| }, |
| { |
| "epoch": 0.3727659574468085, |
| "grad_norm": 1.850883960723877, |
| "loss": 5.356, |
| "lr": 0.0008862937062937063, |
| "step": 1314, |
| "tokens_trained": 0.124883528 |
| }, |
| { |
| "epoch": 0.37333333333333335, |
| "grad_norm": 1.813315510749817, |
| "loss": 5.3564, |
| "lr": 0.0008860139860139861, |
| "step": 1316, |
| "tokens_trained": 0.125072328 |
| }, |
| { |
| "epoch": 0.37390070921985813, |
| "grad_norm": 1.6776509284973145, |
| "loss": 5.3348, |
| "lr": 0.0008857342657342658, |
| "step": 1318, |
| "tokens_trained": 0.125263128 |
| }, |
| { |
| "epoch": 0.37446808510638296, |
| "grad_norm": 1.7775620222091675, |
| "loss": 5.298, |
| "lr": 0.0008854545454545455, |
| "step": 1320, |
| "tokens_trained": 0.125453944 |
| }, |
| { |
| "epoch": 0.3750354609929078, |
| "grad_norm": 1.6916086673736572, |
| "loss": 5.332, |
| "lr": 0.0008851748251748251, |
| "step": 1322, |
| "tokens_trained": 0.125644264 |
| }, |
| { |
| "epoch": 0.37560283687943263, |
| "grad_norm": 1.7182034254074097, |
| "loss": 5.3405, |
| "lr": 0.000884895104895105, |
| "step": 1324, |
| "tokens_trained": 0.125835256 |
| }, |
| { |
| "epoch": 0.37617021276595747, |
| "grad_norm": 1.690463662147522, |
| "loss": 5.355, |
| "lr": 0.0008846153846153846, |
| "step": 1326, |
| "tokens_trained": 0.126025952 |
| }, |
| { |
| "epoch": 0.37673758865248225, |
| "grad_norm": 1.7073352336883545, |
| "loss": 5.3304, |
| "lr": 0.0008843356643356644, |
| "step": 1328, |
| "tokens_trained": 0.126217456 |
| }, |
| { |
| "epoch": 0.3773049645390071, |
| "grad_norm": 1.6633049249649048, |
| "loss": 5.2724, |
| "lr": 0.000884055944055944, |
| "step": 1330, |
| "tokens_trained": 0.126407688 |
| }, |
| { |
| "epoch": 0.3778723404255319, |
| "grad_norm": 1.618843913078308, |
| "loss": 5.2952, |
| "lr": 0.0008837762237762238, |
| "step": 1332, |
| "tokens_trained": 0.126599504 |
| }, |
| { |
| "epoch": 0.37843971631205675, |
| "grad_norm": 1.7496757507324219, |
| "loss": 5.2846, |
| "lr": 0.0008834965034965036, |
| "step": 1334, |
| "tokens_trained": 0.126787648 |
| }, |
| { |
| "epoch": 0.3790070921985816, |
| "grad_norm": 1.7284750938415527, |
| "loss": 5.3229, |
| "lr": 0.0008832167832167832, |
| "step": 1336, |
| "tokens_trained": 0.126977568 |
| }, |
| { |
| "epoch": 0.37957446808510636, |
| "grad_norm": 1.55423903465271, |
| "loss": 5.3112, |
| "lr": 0.000882937062937063, |
| "step": 1338, |
| "tokens_trained": 0.12716944 |
| }, |
| { |
| "epoch": 0.3801418439716312, |
| "grad_norm": 1.5783073902130127, |
| "loss": 5.3002, |
| "lr": 0.0008826573426573426, |
| "step": 1340, |
| "tokens_trained": 0.127357296 |
| }, |
| { |
| "epoch": 0.38070921985815603, |
| "grad_norm": 1.6970964670181274, |
| "loss": 5.3003, |
| "lr": 0.0008823776223776225, |
| "step": 1342, |
| "tokens_trained": 0.127547112 |
| }, |
| { |
| "epoch": 0.38127659574468087, |
| "grad_norm": 1.8086830377578735, |
| "loss": 5.3018, |
| "lr": 0.0008820979020979021, |
| "step": 1344, |
| "tokens_trained": 0.12773616 |
| }, |
| { |
| "epoch": 0.38184397163120565, |
| "grad_norm": 1.6589199304580688, |
| "loss": 5.2903, |
| "lr": 0.0008818181818181819, |
| "step": 1346, |
| "tokens_trained": 0.127924704 |
| }, |
| { |
| "epoch": 0.3824113475177305, |
| "grad_norm": 1.6546344757080078, |
| "loss": 5.2639, |
| "lr": 0.0008815384615384615, |
| "step": 1348, |
| "tokens_trained": 0.128114848 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 1.6867282390594482, |
| "loss": 5.2713, |
| "lr": 0.0008812587412587412, |
| "step": 1350, |
| "tokens_trained": 0.12830572 |
| }, |
| { |
| "epoch": 0.38354609929078015, |
| "grad_norm": 1.6336158514022827, |
| "loss": 5.2688, |
| "lr": 0.0008809790209790211, |
| "step": 1352, |
| "tokens_trained": 0.128497336 |
| }, |
| { |
| "epoch": 0.384113475177305, |
| "grad_norm": 1.591659665107727, |
| "loss": 5.3073, |
| "lr": 0.0008806993006993007, |
| "step": 1354, |
| "tokens_trained": 0.128689088 |
| }, |
| { |
| "epoch": 0.38468085106382977, |
| "grad_norm": 1.6427522897720337, |
| "loss": 5.2649, |
| "lr": 0.0008804195804195805, |
| "step": 1356, |
| "tokens_trained": 0.128879208 |
| }, |
| { |
| "epoch": 0.3852482269503546, |
| "grad_norm": 1.693124771118164, |
| "loss": 5.334, |
| "lr": 0.0008801398601398601, |
| "step": 1358, |
| "tokens_trained": 0.129069376 |
| }, |
| { |
| "epoch": 0.38581560283687943, |
| "grad_norm": 1.6677430868148804, |
| "loss": 5.3091, |
| "lr": 0.00087986013986014, |
| "step": 1360, |
| "tokens_trained": 0.12925972 |
| }, |
| { |
| "epoch": 0.38638297872340427, |
| "grad_norm": 1.6829359531402588, |
| "loss": 5.3529, |
| "lr": 0.0008795804195804196, |
| "step": 1362, |
| "tokens_trained": 0.129449816 |
| }, |
| { |
| "epoch": 0.3869503546099291, |
| "grad_norm": 1.6984829902648926, |
| "loss": 5.2832, |
| "lr": 0.0008793006993006993, |
| "step": 1364, |
| "tokens_trained": 0.129638736 |
| }, |
| { |
| "epoch": 0.3875177304964539, |
| "grad_norm": 1.6351298093795776, |
| "loss": 5.3654, |
| "lr": 0.000879020979020979, |
| "step": 1366, |
| "tokens_trained": 0.129831512 |
| }, |
| { |
| "epoch": 0.3880851063829787, |
| "grad_norm": 1.588394045829773, |
| "loss": 5.3203, |
| "lr": 0.0008787412587412587, |
| "step": 1368, |
| "tokens_trained": 0.130021424 |
| }, |
| { |
| "epoch": 0.38865248226950355, |
| "grad_norm": 1.7608240842819214, |
| "loss": 5.3387, |
| "lr": 0.0008784615384615386, |
| "step": 1370, |
| "tokens_trained": 0.130211848 |
| }, |
| { |
| "epoch": 0.3892198581560284, |
| "grad_norm": 1.7742120027542114, |
| "loss": 5.3054, |
| "lr": 0.0008781818181818182, |
| "step": 1372, |
| "tokens_trained": 0.130400256 |
| }, |
| { |
| "epoch": 0.38978723404255317, |
| "grad_norm": 1.8751057386398315, |
| "loss": 5.3569, |
| "lr": 0.000877902097902098, |
| "step": 1374, |
| "tokens_trained": 0.130591616 |
| }, |
| { |
| "epoch": 0.3900709219858156, |
| "eval_loss": 5.315512180328369, |
| "eval_runtime": 20.9232, |
| "step": 1375, |
| "tokens_trained": 0.130685128 |
| }, |
| { |
| "epoch": 0.390354609929078, |
| "grad_norm": 1.8666746616363525, |
| "loss": 5.3088, |
| "lr": 0.0008776223776223776, |
| "step": 1376, |
| "tokens_trained": 0.130781056 |
| }, |
| { |
| "epoch": 0.39092198581560283, |
| "grad_norm": 1.8694190979003906, |
| "loss": 5.2391, |
| "lr": 0.0008773426573426574, |
| "step": 1378, |
| "tokens_trained": 0.130971152 |
| }, |
| { |
| "epoch": 0.39148936170212767, |
| "grad_norm": 2.0663323402404785, |
| "loss": 5.3497, |
| "lr": 0.0008770629370629371, |
| "step": 1380, |
| "tokens_trained": 0.131163224 |
| }, |
| { |
| "epoch": 0.3920567375886525, |
| "grad_norm": 1.956207275390625, |
| "loss": 5.3227, |
| "lr": 0.0008767832167832168, |
| "step": 1382, |
| "tokens_trained": 0.131353832 |
| }, |
| { |
| "epoch": 0.3926241134751773, |
| "grad_norm": 1.6816498041152954, |
| "loss": 5.2626, |
| "lr": 0.0008765034965034965, |
| "step": 1384, |
| "tokens_trained": 0.13154472 |
| }, |
| { |
| "epoch": 0.3931914893617021, |
| "grad_norm": 1.655116319656372, |
| "loss": 5.3334, |
| "lr": 0.0008762237762237762, |
| "step": 1386, |
| "tokens_trained": 0.131732128 |
| }, |
| { |
| "epoch": 0.39375886524822695, |
| "grad_norm": 1.6439241170883179, |
| "loss": 5.3038, |
| "lr": 0.0008759440559440561, |
| "step": 1388, |
| "tokens_trained": 0.131920728 |
| }, |
| { |
| "epoch": 0.3943262411347518, |
| "grad_norm": 1.5000464916229248, |
| "loss": 5.2798, |
| "lr": 0.0008756643356643357, |
| "step": 1390, |
| "tokens_trained": 0.1321094 |
| }, |
| { |
| "epoch": 0.3948936170212766, |
| "grad_norm": 1.7129333019256592, |
| "loss": 5.2918, |
| "lr": 0.0008753846153846154, |
| "step": 1392, |
| "tokens_trained": 0.132299832 |
| }, |
| { |
| "epoch": 0.3954609929078014, |
| "grad_norm": 1.7489241361618042, |
| "loss": 5.3551, |
| "lr": 0.0008751048951048951, |
| "step": 1394, |
| "tokens_trained": 0.13249016 |
| }, |
| { |
| "epoch": 0.39602836879432624, |
| "grad_norm": 1.6597840785980225, |
| "loss": 5.3718, |
| "lr": 0.0008748251748251749, |
| "step": 1396, |
| "tokens_trained": 0.132680568 |
| }, |
| { |
| "epoch": 0.39659574468085107, |
| "grad_norm": 1.8800175189971924, |
| "loss": 5.3578, |
| "lr": 0.0008745454545454546, |
| "step": 1398, |
| "tokens_trained": 0.132871296 |
| }, |
| { |
| "epoch": 0.3971631205673759, |
| "grad_norm": 1.8190884590148926, |
| "loss": 5.2714, |
| "lr": 0.0008742657342657343, |
| "step": 1400, |
| "tokens_trained": 0.133062288 |
| }, |
| { |
| "epoch": 0.3977304964539007, |
| "grad_norm": 1.602634310722351, |
| "loss": 5.2914, |
| "lr": 0.0008739860139860139, |
| "step": 1402, |
| "tokens_trained": 0.133252584 |
| }, |
| { |
| "epoch": 0.3982978723404255, |
| "grad_norm": 1.7363992929458618, |
| "loss": 5.3154, |
| "lr": 0.0008737062937062937, |
| "step": 1404, |
| "tokens_trained": 0.133444784 |
| }, |
| { |
| "epoch": 0.39886524822695035, |
| "grad_norm": 1.7578014135360718, |
| "loss": 5.3735, |
| "lr": 0.0008734265734265734, |
| "step": 1406, |
| "tokens_trained": 0.133636288 |
| }, |
| { |
| "epoch": 0.3994326241134752, |
| "grad_norm": 1.8847187757492065, |
| "loss": 5.3118, |
| "lr": 0.0008731468531468532, |
| "step": 1408, |
| "tokens_trained": 0.133825824 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.750780701637268, |
| "loss": 5.3101, |
| "lr": 0.0008728671328671329, |
| "step": 1410, |
| "tokens_trained": 0.134016688 |
| }, |
| { |
| "epoch": 0.4005673758865248, |
| "grad_norm": 1.6785613298416138, |
| "loss": 5.2823, |
| "lr": 0.0008725874125874126, |
| "step": 1412, |
| "tokens_trained": 0.134208992 |
| }, |
| { |
| "epoch": 0.40113475177304964, |
| "grad_norm": 1.7419382333755493, |
| "loss": 5.2388, |
| "lr": 0.0008723076923076924, |
| "step": 1414, |
| "tokens_trained": 0.134398376 |
| }, |
| { |
| "epoch": 0.40170212765957447, |
| "grad_norm": 1.6936920881271362, |
| "loss": 5.2824, |
| "lr": 0.000872027972027972, |
| "step": 1416, |
| "tokens_trained": 0.134589264 |
| }, |
| { |
| "epoch": 0.4022695035460993, |
| "grad_norm": 1.7408183813095093, |
| "loss": 5.2692, |
| "lr": 0.0008717482517482518, |
| "step": 1418, |
| "tokens_trained": 0.134776568 |
| }, |
| { |
| "epoch": 0.40283687943262414, |
| "grad_norm": 1.7089916467666626, |
| "loss": 5.2309, |
| "lr": 0.0008714685314685314, |
| "step": 1420, |
| "tokens_trained": 0.134967616 |
| }, |
| { |
| "epoch": 0.4034042553191489, |
| "grad_norm": 1.6850922107696533, |
| "loss": 5.3656, |
| "lr": 0.0008711888111888112, |
| "step": 1422, |
| "tokens_trained": 0.135158272 |
| }, |
| { |
| "epoch": 0.40397163120567375, |
| "grad_norm": 1.546431064605713, |
| "loss": 5.3455, |
| "lr": 0.0008709090909090909, |
| "step": 1424, |
| "tokens_trained": 0.135349512 |
| }, |
| { |
| "epoch": 0.4045390070921986, |
| "grad_norm": 1.3656421899795532, |
| "loss": 5.2842, |
| "lr": 0.0008706293706293707, |
| "step": 1426, |
| "tokens_trained": 0.135538512 |
| }, |
| { |
| "epoch": 0.4051063829787234, |
| "grad_norm": 1.5918062925338745, |
| "loss": 5.3243, |
| "lr": 0.0008703496503496504, |
| "step": 1428, |
| "tokens_trained": 0.13572968 |
| }, |
| { |
| "epoch": 0.4056737588652482, |
| "grad_norm": 1.563009262084961, |
| "loss": 5.2539, |
| "lr": 0.00087006993006993, |
| "step": 1430, |
| "tokens_trained": 0.135919568 |
| }, |
| { |
| "epoch": 0.40624113475177304, |
| "grad_norm": 1.6144121885299683, |
| "loss": 5.2844, |
| "lr": 0.0008697902097902099, |
| "step": 1432, |
| "tokens_trained": 0.136109304 |
| }, |
| { |
| "epoch": 0.40680851063829787, |
| "grad_norm": 1.5911130905151367, |
| "loss": 5.3205, |
| "lr": 0.0008695104895104895, |
| "step": 1434, |
| "tokens_trained": 0.136296696 |
| }, |
| { |
| "epoch": 0.4073758865248227, |
| "grad_norm": 1.60932457447052, |
| "loss": 5.3783, |
| "lr": 0.0008692307692307693, |
| "step": 1436, |
| "tokens_trained": 0.136484912 |
| }, |
| { |
| "epoch": 0.40794326241134754, |
| "grad_norm": 1.559644341468811, |
| "loss": 5.2785, |
| "lr": 0.0008689510489510489, |
| "step": 1438, |
| "tokens_trained": 0.136675736 |
| }, |
| { |
| "epoch": 0.4085106382978723, |
| "grad_norm": 1.5167043209075928, |
| "loss": 5.3224, |
| "lr": 0.0008686713286713287, |
| "step": 1440, |
| "tokens_trained": 0.136864928 |
| }, |
| { |
| "epoch": 0.40907801418439715, |
| "grad_norm": 1.5843397378921509, |
| "loss": 5.3075, |
| "lr": 0.0008683916083916084, |
| "step": 1442, |
| "tokens_trained": 0.137056688 |
| }, |
| { |
| "epoch": 0.409645390070922, |
| "grad_norm": 1.581120491027832, |
| "loss": 5.2863, |
| "lr": 0.0008681118881118881, |
| "step": 1444, |
| "tokens_trained": 0.137244664 |
| }, |
| { |
| "epoch": 0.4102127659574468, |
| "grad_norm": 1.6355490684509277, |
| "loss": 5.348, |
| "lr": 0.0008678321678321679, |
| "step": 1446, |
| "tokens_trained": 0.13743372 |
| }, |
| { |
| "epoch": 0.41078014184397166, |
| "grad_norm": 1.5543185472488403, |
| "loss": 5.3268, |
| "lr": 0.0008675524475524475, |
| "step": 1448, |
| "tokens_trained": 0.13762696 |
| }, |
| { |
| "epoch": 0.41134751773049644, |
| "grad_norm": 1.5313750505447388, |
| "loss": 5.2784, |
| "lr": 0.0008672727272727273, |
| "step": 1450, |
| "tokens_trained": 0.137817376 |
| }, |
| { |
| "epoch": 0.41191489361702127, |
| "grad_norm": 1.7918111085891724, |
| "loss": 5.3063, |
| "lr": 0.000866993006993007, |
| "step": 1452, |
| "tokens_trained": 0.138007944 |
| }, |
| { |
| "epoch": 0.4124822695035461, |
| "grad_norm": 1.5105966329574585, |
| "loss": 5.2432, |
| "lr": 0.0008667132867132868, |
| "step": 1454, |
| "tokens_trained": 0.138199776 |
| }, |
| { |
| "epoch": 0.41304964539007094, |
| "grad_norm": 1.4441865682601929, |
| "loss": 5.269, |
| "lr": 0.0008664335664335664, |
| "step": 1456, |
| "tokens_trained": 0.13839124 |
| }, |
| { |
| "epoch": 0.4136170212765957, |
| "grad_norm": 1.473544955253601, |
| "loss": 5.2377, |
| "lr": 0.0008661538461538461, |
| "step": 1458, |
| "tokens_trained": 0.138580704 |
| }, |
| { |
| "epoch": 0.41418439716312055, |
| "grad_norm": 1.6085572242736816, |
| "loss": 5.245, |
| "lr": 0.0008658741258741259, |
| "step": 1460, |
| "tokens_trained": 0.138770176 |
| }, |
| { |
| "epoch": 0.4147517730496454, |
| "grad_norm": 1.609894871711731, |
| "loss": 5.3124, |
| "lr": 0.0008655944055944056, |
| "step": 1462, |
| "tokens_trained": 0.138961656 |
| }, |
| { |
| "epoch": 0.4153191489361702, |
| "grad_norm": 1.6923688650131226, |
| "loss": 5.3099, |
| "lr": 0.0008653146853146854, |
| "step": 1464, |
| "tokens_trained": 0.139151128 |
| }, |
| { |
| "epoch": 0.41588652482269506, |
| "grad_norm": 1.7480796575546265, |
| "loss": 5.2608, |
| "lr": 0.000865034965034965, |
| "step": 1466, |
| "tokens_trained": 0.139341168 |
| }, |
| { |
| "epoch": 0.41645390070921984, |
| "grad_norm": 1.725832223892212, |
| "loss": 5.2863, |
| "lr": 0.0008647552447552448, |
| "step": 1468, |
| "tokens_trained": 0.139530448 |
| }, |
| { |
| "epoch": 0.41702127659574467, |
| "grad_norm": 1.7886406183242798, |
| "loss": 5.231, |
| "lr": 0.0008644755244755245, |
| "step": 1470, |
| "tokens_trained": 0.13972244 |
| }, |
| { |
| "epoch": 0.4175886524822695, |
| "grad_norm": 1.803231954574585, |
| "loss": 5.2428, |
| "lr": 0.0008641958041958042, |
| "step": 1472, |
| "tokens_trained": 0.139913136 |
| }, |
| { |
| "epoch": 0.41815602836879434, |
| "grad_norm": 1.5347254276275635, |
| "loss": 5.2215, |
| "lr": 0.0008639160839160839, |
| "step": 1474, |
| "tokens_trained": 0.140104072 |
| }, |
| { |
| "epoch": 0.4187234042553192, |
| "grad_norm": 1.4485915899276733, |
| "loss": 5.2364, |
| "lr": 0.0008636363636363636, |
| "step": 1476, |
| "tokens_trained": 0.140294312 |
| }, |
| { |
| "epoch": 0.41929078014184396, |
| "grad_norm": 1.6130446195602417, |
| "loss": 5.3088, |
| "lr": 0.0008633566433566434, |
| "step": 1478, |
| "tokens_trained": 0.140482968 |
| }, |
| { |
| "epoch": 0.4198581560283688, |
| "grad_norm": 1.5839030742645264, |
| "loss": 5.3215, |
| "lr": 0.0008630769230769231, |
| "step": 1480, |
| "tokens_trained": 0.140674208 |
| }, |
| { |
| "epoch": 0.4204255319148936, |
| "grad_norm": 1.7519373893737793, |
| "loss": 5.3331, |
| "lr": 0.0008627972027972029, |
| "step": 1482, |
| "tokens_trained": 0.140864408 |
| }, |
| { |
| "epoch": 0.42099290780141846, |
| "grad_norm": 1.6718385219573975, |
| "loss": 5.231, |
| "lr": 0.0008625174825174825, |
| "step": 1484, |
| "tokens_trained": 0.141054696 |
| }, |
| { |
| "epoch": 0.42156028368794324, |
| "grad_norm": 1.5733797550201416, |
| "loss": 5.2621, |
| "lr": 0.0008622377622377622, |
| "step": 1486, |
| "tokens_trained": 0.141245712 |
| }, |
| { |
| "epoch": 0.4221276595744681, |
| "grad_norm": 1.549985647201538, |
| "loss": 5.2574, |
| "lr": 0.000861958041958042, |
| "step": 1488, |
| "tokens_trained": 0.141434232 |
| }, |
| { |
| "epoch": 0.4226950354609929, |
| "grad_norm": 1.651908278465271, |
| "loss": 5.2953, |
| "lr": 0.0008616783216783217, |
| "step": 1490, |
| "tokens_trained": 0.141623936 |
| }, |
| { |
| "epoch": 0.42326241134751774, |
| "grad_norm": 1.5680350065231323, |
| "loss": 5.288, |
| "lr": 0.0008613986013986014, |
| "step": 1492, |
| "tokens_trained": 0.141813904 |
| }, |
| { |
| "epoch": 0.4238297872340426, |
| "grad_norm": 1.5155646800994873, |
| "loss": 5.2529, |
| "lr": 0.0008611188811188811, |
| "step": 1494, |
| "tokens_trained": 0.14200372 |
| }, |
| { |
| "epoch": 0.42439716312056736, |
| "grad_norm": 1.5949562788009644, |
| "loss": 5.3064, |
| "lr": 0.0008608391608391609, |
| "step": 1496, |
| "tokens_trained": 0.142194496 |
| }, |
| { |
| "epoch": 0.4249645390070922, |
| "grad_norm": 1.6359357833862305, |
| "loss": 5.3452, |
| "lr": 0.0008605594405594406, |
| "step": 1498, |
| "tokens_trained": 0.142384592 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 1.648120403289795, |
| "loss": 5.3427, |
| "lr": 0.0008602797202797203, |
| "step": 1500, |
| "tokens_trained": 0.142573368 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "eval_loss": 5.282389163970947, |
| "eval_runtime": 20.5657, |
| "step": 1500, |
| "tokens_trained": 0.142573368 |
| }, |
| { |
| "epoch": 0.42609929078014186, |
| "grad_norm": 1.6313989162445068, |
| "loss": 5.2442, |
| "lr": 0.00086, |
| "step": 1502, |
| "tokens_trained": 0.142764584 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 1.447824239730835, |
| "loss": 5.2979, |
| "lr": 0.0008597202797202797, |
| "step": 1504, |
| "tokens_trained": 0.142953912 |
| }, |
| { |
| "epoch": 0.4272340425531915, |
| "grad_norm": 1.4285600185394287, |
| "loss": 5.317, |
| "lr": 0.0008594405594405595, |
| "step": 1506, |
| "tokens_trained": 0.143145944 |
| }, |
| { |
| "epoch": 0.4278014184397163, |
| "grad_norm": 1.4464077949523926, |
| "loss": 5.2746, |
| "lr": 0.0008591608391608392, |
| "step": 1508, |
| "tokens_trained": 0.1433374 |
| }, |
| { |
| "epoch": 0.42836879432624114, |
| "grad_norm": 1.3554625511169434, |
| "loss": 5.276, |
| "lr": 0.0008588811188811188, |
| "step": 1510, |
| "tokens_trained": 0.143529088 |
| }, |
| { |
| "epoch": 0.428936170212766, |
| "grad_norm": 1.4690148830413818, |
| "loss": 5.2976, |
| "lr": 0.0008586013986013986, |
| "step": 1512, |
| "tokens_trained": 0.1437192 |
| }, |
| { |
| "epoch": 0.42950354609929076, |
| "grad_norm": 1.4911222457885742, |
| "loss": 5.2727, |
| "lr": 0.0008583216783216783, |
| "step": 1514, |
| "tokens_trained": 0.143907728 |
| }, |
| { |
| "epoch": 0.4300709219858156, |
| "grad_norm": 1.5823880434036255, |
| "loss": 5.2481, |
| "lr": 0.0008580419580419581, |
| "step": 1516, |
| "tokens_trained": 0.144097048 |
| }, |
| { |
| "epoch": 0.4306382978723404, |
| "grad_norm": 1.486588716506958, |
| "loss": 5.2561, |
| "lr": 0.0008577622377622378, |
| "step": 1518, |
| "tokens_trained": 0.14428652 |
| }, |
| { |
| "epoch": 0.43120567375886526, |
| "grad_norm": 1.5762882232666016, |
| "loss": 5.267, |
| "lr": 0.0008574825174825175, |
| "step": 1520, |
| "tokens_trained": 0.144476848 |
| }, |
| { |
| "epoch": 0.4317730496453901, |
| "grad_norm": 1.6832828521728516, |
| "loss": 5.3329, |
| "lr": 0.0008572027972027972, |
| "step": 1522, |
| "tokens_trained": 0.144667568 |
| }, |
| { |
| "epoch": 0.4323404255319149, |
| "grad_norm": 1.7036137580871582, |
| "loss": 5.2326, |
| "lr": 0.000856923076923077, |
| "step": 1524, |
| "tokens_trained": 0.144860328 |
| }, |
| { |
| "epoch": 0.4329078014184397, |
| "grad_norm": 1.8102291822433472, |
| "loss": 5.251, |
| "lr": 0.0008566433566433567, |
| "step": 1526, |
| "tokens_trained": 0.1450528 |
| }, |
| { |
| "epoch": 0.43347517730496454, |
| "grad_norm": 1.667229413986206, |
| "loss": 5.2841, |
| "lr": 0.0008563636363636363, |
| "step": 1528, |
| "tokens_trained": 0.145240952 |
| }, |
| { |
| "epoch": 0.4340425531914894, |
| "grad_norm": 1.6709800958633423, |
| "loss": 5.2387, |
| "lr": 0.0008560839160839161, |
| "step": 1530, |
| "tokens_trained": 0.145431376 |
| }, |
| { |
| "epoch": 0.4346099290780142, |
| "grad_norm": 1.600885272026062, |
| "loss": 5.2179, |
| "lr": 0.0008558041958041958, |
| "step": 1532, |
| "tokens_trained": 0.145620184 |
| }, |
| { |
| "epoch": 0.435177304964539, |
| "grad_norm": 1.5783873796463013, |
| "loss": 5.2432, |
| "lr": 0.0008555244755244756, |
| "step": 1534, |
| "tokens_trained": 0.145810616 |
| }, |
| { |
| "epoch": 0.4357446808510638, |
| "grad_norm": 1.5059685707092285, |
| "loss": 5.2604, |
| "lr": 0.0008552447552447553, |
| "step": 1536, |
| "tokens_trained": 0.14600232 |
| }, |
| { |
| "epoch": 0.43631205673758866, |
| "grad_norm": 1.5880341529846191, |
| "loss": 5.249, |
| "lr": 0.000854965034965035, |
| "step": 1538, |
| "tokens_trained": 0.146192504 |
| }, |
| { |
| "epoch": 0.4368794326241135, |
| "grad_norm": 1.430004596710205, |
| "loss": 5.2668, |
| "lr": 0.0008546853146853147, |
| "step": 1540, |
| "tokens_trained": 0.146382264 |
| }, |
| { |
| "epoch": 0.4374468085106383, |
| "grad_norm": 1.4099256992340088, |
| "loss": 5.2839, |
| "lr": 0.0008544055944055944, |
| "step": 1542, |
| "tokens_trained": 0.146570432 |
| }, |
| { |
| "epoch": 0.4380141843971631, |
| "grad_norm": 1.3938827514648438, |
| "loss": 5.2534, |
| "lr": 0.0008541258741258742, |
| "step": 1544, |
| "tokens_trained": 0.146763736 |
| }, |
| { |
| "epoch": 0.43858156028368794, |
| "grad_norm": 1.4359923601150513, |
| "loss": 5.2202, |
| "lr": 0.0008538461538461538, |
| "step": 1546, |
| "tokens_trained": 0.146953944 |
| }, |
| { |
| "epoch": 0.4391489361702128, |
| "grad_norm": 1.5405043363571167, |
| "loss": 5.2613, |
| "lr": 0.0008535664335664336, |
| "step": 1548, |
| "tokens_trained": 0.147144664 |
| }, |
| { |
| "epoch": 0.4397163120567376, |
| "grad_norm": 1.6448051929473877, |
| "loss": 5.299, |
| "lr": 0.0008532867132867133, |
| "step": 1550, |
| "tokens_trained": 0.147335064 |
| }, |
| { |
| "epoch": 0.4402836879432624, |
| "grad_norm": 1.6528949737548828, |
| "loss": 5.3004, |
| "lr": 0.000853006993006993, |
| "step": 1552, |
| "tokens_trained": 0.147524088 |
| }, |
| { |
| "epoch": 0.4408510638297872, |
| "grad_norm": 1.637702226638794, |
| "loss": 5.2298, |
| "lr": 0.0008527272727272728, |
| "step": 1554, |
| "tokens_trained": 0.147716296 |
| }, |
| { |
| "epoch": 0.44141843971631206, |
| "grad_norm": 1.7230212688446045, |
| "loss": 5.2806, |
| "lr": 0.0008524475524475524, |
| "step": 1556, |
| "tokens_trained": 0.147905216 |
| }, |
| { |
| "epoch": 0.4419858156028369, |
| "grad_norm": 1.6216089725494385, |
| "loss": 5.3062, |
| "lr": 0.0008521678321678322, |
| "step": 1558, |
| "tokens_trained": 0.148092312 |
| }, |
| { |
| "epoch": 0.4425531914893617, |
| "grad_norm": 1.5734955072402954, |
| "loss": 5.2607, |
| "lr": 0.0008518881118881119, |
| "step": 1560, |
| "tokens_trained": 0.148282712 |
| }, |
| { |
| "epoch": 0.4431205673758865, |
| "grad_norm": 1.6687103509902954, |
| "loss": 5.2737, |
| "lr": 0.0008516083916083917, |
| "step": 1562, |
| "tokens_trained": 0.148474672 |
| }, |
| { |
| "epoch": 0.44368794326241134, |
| "grad_norm": 1.547277569770813, |
| "loss": 5.3183, |
| "lr": 0.0008513286713286713, |
| "step": 1564, |
| "tokens_trained": 0.148667824 |
| }, |
| { |
| "epoch": 0.4442553191489362, |
| "grad_norm": 1.3782074451446533, |
| "loss": 5.266, |
| "lr": 0.000851048951048951, |
| "step": 1566, |
| "tokens_trained": 0.14885704 |
| }, |
| { |
| "epoch": 0.444822695035461, |
| "grad_norm": 1.5648273229599, |
| "loss": 5.2954, |
| "lr": 0.0008507692307692308, |
| "step": 1568, |
| "tokens_trained": 0.14904804 |
| }, |
| { |
| "epoch": 0.4453900709219858, |
| "grad_norm": 1.5675908327102661, |
| "loss": 5.2897, |
| "lr": 0.0008504895104895105, |
| "step": 1570, |
| "tokens_trained": 0.149237048 |
| }, |
| { |
| "epoch": 0.4459574468085106, |
| "grad_norm": 1.5399287939071655, |
| "loss": 5.2993, |
| "lr": 0.0008502097902097903, |
| "step": 1572, |
| "tokens_trained": 0.149427328 |
| }, |
| { |
| "epoch": 0.44652482269503546, |
| "grad_norm": 1.7170253992080688, |
| "loss": 5.2756, |
| "lr": 0.0008499300699300699, |
| "step": 1574, |
| "tokens_trained": 0.149618448 |
| }, |
| { |
| "epoch": 0.4470921985815603, |
| "grad_norm": 1.5694142580032349, |
| "loss": 5.2294, |
| "lr": 0.0008496503496503497, |
| "step": 1576, |
| "tokens_trained": 0.149809416 |
| }, |
| { |
| "epoch": 0.44765957446808513, |
| "grad_norm": 1.5410487651824951, |
| "loss": 5.2392, |
| "lr": 0.0008493706293706294, |
| "step": 1578, |
| "tokens_trained": 0.149999608 |
| }, |
| { |
| "epoch": 0.4482269503546099, |
| "grad_norm": 1.5991896390914917, |
| "loss": 5.2569, |
| "lr": 0.0008490909090909091, |
| "step": 1580, |
| "tokens_trained": 0.150190224 |
| }, |
| { |
| "epoch": 0.44879432624113474, |
| "grad_norm": 1.5861775875091553, |
| "loss": 5.3151, |
| "lr": 0.0008488111888111888, |
| "step": 1582, |
| "tokens_trained": 0.150380592 |
| }, |
| { |
| "epoch": 0.4493617021276596, |
| "grad_norm": 1.530462622642517, |
| "loss": 5.3242, |
| "lr": 0.0008485314685314685, |
| "step": 1584, |
| "tokens_trained": 0.15056992 |
| }, |
| { |
| "epoch": 0.4499290780141844, |
| "grad_norm": 1.5658655166625977, |
| "loss": 5.2933, |
| "lr": 0.0008482517482517483, |
| "step": 1586, |
| "tokens_trained": 0.150760336 |
| }, |
| { |
| "epoch": 0.4504964539007092, |
| "grad_norm": 1.4187430143356323, |
| "loss": 5.2235, |
| "lr": 0.000847972027972028, |
| "step": 1588, |
| "tokens_trained": 0.150949088 |
| }, |
| { |
| "epoch": 0.451063829787234, |
| "grad_norm": 1.6921541690826416, |
| "loss": 5.2496, |
| "lr": 0.0008476923076923078, |
| "step": 1590, |
| "tokens_trained": 0.151140016 |
| }, |
| { |
| "epoch": 0.45163120567375886, |
| "grad_norm": 1.6049220561981201, |
| "loss": 5.2767, |
| "lr": 0.0008474125874125874, |
| "step": 1592, |
| "tokens_trained": 0.151330944 |
| }, |
| { |
| "epoch": 0.4521985815602837, |
| "grad_norm": 1.513168454170227, |
| "loss": 5.2904, |
| "lr": 0.0008471328671328671, |
| "step": 1594, |
| "tokens_trained": 0.151520152 |
| }, |
| { |
| "epoch": 0.45276595744680853, |
| "grad_norm": 1.5247087478637695, |
| "loss": 5.2391, |
| "lr": 0.0008468531468531469, |
| "step": 1596, |
| "tokens_trained": 0.151711592 |
| }, |
| { |
| "epoch": 0.4533333333333333, |
| "grad_norm": 1.5005898475646973, |
| "loss": 5.3025, |
| "lr": 0.0008465734265734266, |
| "step": 1598, |
| "tokens_trained": 0.151902736 |
| }, |
| { |
| "epoch": 0.45390070921985815, |
| "grad_norm": 1.3196156024932861, |
| "loss": 5.3025, |
| "lr": 0.0008462937062937063, |
| "step": 1600, |
| "tokens_trained": 0.152094032 |
| }, |
| { |
| "epoch": 0.454468085106383, |
| "grad_norm": 1.5037102699279785, |
| "loss": 5.2348, |
| "lr": 0.000846013986013986, |
| "step": 1602, |
| "tokens_trained": 0.15228336 |
| }, |
| { |
| "epoch": 0.4550354609929078, |
| "grad_norm": 1.404539942741394, |
| "loss": 5.2551, |
| "lr": 0.0008457342657342658, |
| "step": 1604, |
| "tokens_trained": 0.152474776 |
| }, |
| { |
| "epoch": 0.45560283687943265, |
| "grad_norm": 1.4784883260726929, |
| "loss": 5.2927, |
| "lr": 0.0008454545454545455, |
| "step": 1606, |
| "tokens_trained": 0.152663392 |
| }, |
| { |
| "epoch": 0.45617021276595743, |
| "grad_norm": 1.3743332624435425, |
| "loss": 5.2542, |
| "lr": 0.0008451748251748252, |
| "step": 1608, |
| "tokens_trained": 0.152852512 |
| }, |
| { |
| "epoch": 0.45673758865248226, |
| "grad_norm": 1.4161995649337769, |
| "loss": 5.2518, |
| "lr": 0.0008448951048951049, |
| "step": 1610, |
| "tokens_trained": 0.15304428 |
| }, |
| { |
| "epoch": 0.4573049645390071, |
| "grad_norm": 1.5045989751815796, |
| "loss": 5.2735, |
| "lr": 0.0008446153846153846, |
| "step": 1612, |
| "tokens_trained": 0.153234632 |
| }, |
| { |
| "epoch": 0.45787234042553193, |
| "grad_norm": 1.3695783615112305, |
| "loss": 5.2294, |
| "lr": 0.0008443356643356644, |
| "step": 1614, |
| "tokens_trained": 0.1534248 |
| }, |
| { |
| "epoch": 0.4584397163120567, |
| "grad_norm": 1.4551646709442139, |
| "loss": 5.2639, |
| "lr": 0.0008440559440559441, |
| "step": 1616, |
| "tokens_trained": 0.153614944 |
| }, |
| { |
| "epoch": 0.45900709219858155, |
| "grad_norm": 1.5018376111984253, |
| "loss": 5.2989, |
| "lr": 0.0008437762237762238, |
| "step": 1618, |
| "tokens_trained": 0.153803784 |
| }, |
| { |
| "epoch": 0.4595744680851064, |
| "grad_norm": 1.5295960903167725, |
| "loss": 5.33, |
| "lr": 0.0008434965034965035, |
| "step": 1620, |
| "tokens_trained": 0.153993752 |
| }, |
| { |
| "epoch": 0.4601418439716312, |
| "grad_norm": 1.417626142501831, |
| "loss": 5.2134, |
| "lr": 0.0008432167832167832, |
| "step": 1622, |
| "tokens_trained": 0.154184448 |
| }, |
| { |
| "epoch": 0.46070921985815605, |
| "grad_norm": 1.5715348720550537, |
| "loss": 5.2782, |
| "lr": 0.000842937062937063, |
| "step": 1624, |
| "tokens_trained": 0.154373632 |
| }, |
| { |
| "epoch": 0.46099290780141844, |
| "eval_loss": 5.266384601593018, |
| "eval_runtime": 21.0916, |
| "step": 1625, |
| "tokens_trained": 0.154468808 |
| }, |
| { |
| "epoch": 0.46127659574468083, |
| "grad_norm": 1.5504534244537354, |
| "loss": 5.2307, |
| "lr": 0.0008426573426573427, |
| "step": 1626, |
| "tokens_trained": 0.154564864 |
| }, |
| { |
| "epoch": 0.46184397163120566, |
| "grad_norm": 1.483108401298523, |
| "loss": 5.2578, |
| "lr": 0.0008423776223776224, |
| "step": 1628, |
| "tokens_trained": 0.154755312 |
| }, |
| { |
| "epoch": 0.4624113475177305, |
| "grad_norm": 1.5631264448165894, |
| "loss": 5.3291, |
| "lr": 0.0008420979020979021, |
| "step": 1630, |
| "tokens_trained": 0.154943736 |
| }, |
| { |
| "epoch": 0.46297872340425533, |
| "grad_norm": 1.4680705070495605, |
| "loss": 5.2256, |
| "lr": 0.0008418181818181819, |
| "step": 1632, |
| "tokens_trained": 0.15513452 |
| }, |
| { |
| "epoch": 0.46354609929078017, |
| "grad_norm": 1.468338966369629, |
| "loss": 5.2712, |
| "lr": 0.0008415384615384616, |
| "step": 1634, |
| "tokens_trained": 0.155325288 |
| }, |
| { |
| "epoch": 0.46411347517730495, |
| "grad_norm": 1.4557780027389526, |
| "loss": 5.2808, |
| "lr": 0.0008412587412587412, |
| "step": 1636, |
| "tokens_trained": 0.155515328 |
| }, |
| { |
| "epoch": 0.4646808510638298, |
| "grad_norm": 1.4534999132156372, |
| "loss": 5.2707, |
| "lr": 0.000840979020979021, |
| "step": 1638, |
| "tokens_trained": 0.155706752 |
| }, |
| { |
| "epoch": 0.4652482269503546, |
| "grad_norm": 1.4011393785476685, |
| "loss": 5.3028, |
| "lr": 0.0008406993006993006, |
| "step": 1640, |
| "tokens_trained": 0.155895336 |
| }, |
| { |
| "epoch": 0.46581560283687945, |
| "grad_norm": 1.307922601699829, |
| "loss": 5.2188, |
| "lr": 0.0008404195804195805, |
| "step": 1642, |
| "tokens_trained": 0.156085936 |
| }, |
| { |
| "epoch": 0.46638297872340423, |
| "grad_norm": 1.359922170639038, |
| "loss": 5.2863, |
| "lr": 0.0008401398601398602, |
| "step": 1644, |
| "tokens_trained": 0.15627636 |
| }, |
| { |
| "epoch": 0.46695035460992906, |
| "grad_norm": 1.6204577684402466, |
| "loss": 5.2877, |
| "lr": 0.0008398601398601399, |
| "step": 1646, |
| "tokens_trained": 0.156465192 |
| }, |
| { |
| "epoch": 0.4675177304964539, |
| "grad_norm": 1.7367322444915771, |
| "loss": 5.2501, |
| "lr": 0.0008395804195804196, |
| "step": 1648, |
| "tokens_trained": 0.15665336 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 1.7013088464736938, |
| "loss": 5.2522, |
| "lr": 0.0008393006993006993, |
| "step": 1650, |
| "tokens_trained": 0.156843128 |
| }, |
| { |
| "epoch": 0.46865248226950357, |
| "grad_norm": 1.6429578065872192, |
| "loss": 5.2978, |
| "lr": 0.0008390209790209791, |
| "step": 1652, |
| "tokens_trained": 0.157034328 |
| }, |
| { |
| "epoch": 0.46921985815602835, |
| "grad_norm": 1.527243733406067, |
| "loss": 5.2384, |
| "lr": 0.0008387412587412587, |
| "step": 1654, |
| "tokens_trained": 0.157222784 |
| }, |
| { |
| "epoch": 0.4697872340425532, |
| "grad_norm": 1.4792861938476562, |
| "loss": 5.2149, |
| "lr": 0.0008384615384615385, |
| "step": 1656, |
| "tokens_trained": 0.15741308 |
| }, |
| { |
| "epoch": 0.470354609929078, |
| "grad_norm": 1.4050098657608032, |
| "loss": 5.229, |
| "lr": 0.0008381818181818181, |
| "step": 1658, |
| "tokens_trained": 0.157603872 |
| }, |
| { |
| "epoch": 0.47092198581560285, |
| "grad_norm": 1.4799182415008545, |
| "loss": 5.2235, |
| "lr": 0.000837902097902098, |
| "step": 1660, |
| "tokens_trained": 0.157793352 |
| }, |
| { |
| "epoch": 0.4714893617021277, |
| "grad_norm": 1.4031378030776978, |
| "loss": 5.23, |
| "lr": 0.0008376223776223776, |
| "step": 1662, |
| "tokens_trained": 0.157984416 |
| }, |
| { |
| "epoch": 0.47205673758865246, |
| "grad_norm": 1.5775604248046875, |
| "loss": 5.2811, |
| "lr": 0.0008373426573426573, |
| "step": 1664, |
| "tokens_trained": 0.158176048 |
| }, |
| { |
| "epoch": 0.4726241134751773, |
| "grad_norm": 1.4855432510375977, |
| "loss": 5.2363, |
| "lr": 0.0008370629370629371, |
| "step": 1666, |
| "tokens_trained": 0.158368152 |
| }, |
| { |
| "epoch": 0.47319148936170213, |
| "grad_norm": 1.5609453916549683, |
| "loss": 5.2984, |
| "lr": 0.0008367832167832168, |
| "step": 1668, |
| "tokens_trained": 0.15855684 |
| }, |
| { |
| "epoch": 0.47375886524822697, |
| "grad_norm": 1.5052629709243774, |
| "loss": 5.213, |
| "lr": 0.0008365034965034966, |
| "step": 1670, |
| "tokens_trained": 0.15874712 |
| }, |
| { |
| "epoch": 0.47432624113475175, |
| "grad_norm": 1.5655242204666138, |
| "loss": 5.2551, |
| "lr": 0.0008362237762237762, |
| "step": 1672, |
| "tokens_trained": 0.158937104 |
| }, |
| { |
| "epoch": 0.4748936170212766, |
| "grad_norm": 1.301142930984497, |
| "loss": 5.1564, |
| "lr": 0.000835944055944056, |
| "step": 1674, |
| "tokens_trained": 0.159128096 |
| }, |
| { |
| "epoch": 0.4754609929078014, |
| "grad_norm": 1.5447527170181274, |
| "loss": 5.2547, |
| "lr": 0.0008356643356643356, |
| "step": 1676, |
| "tokens_trained": 0.159318968 |
| }, |
| { |
| "epoch": 0.47602836879432625, |
| "grad_norm": 1.638100266456604, |
| "loss": 5.2301, |
| "lr": 0.0008353846153846154, |
| "step": 1678, |
| "tokens_trained": 0.159508648 |
| }, |
| { |
| "epoch": 0.4765957446808511, |
| "grad_norm": 1.6203068494796753, |
| "loss": 5.2644, |
| "lr": 0.0008351048951048951, |
| "step": 1680, |
| "tokens_trained": 0.159698648 |
| }, |
| { |
| "epoch": 0.47716312056737586, |
| "grad_norm": 1.4097110033035278, |
| "loss": 5.2047, |
| "lr": 0.0008348251748251748, |
| "step": 1682, |
| "tokens_trained": 0.159887392 |
| }, |
| { |
| "epoch": 0.4777304964539007, |
| "grad_norm": 1.3377385139465332, |
| "loss": 5.2685, |
| "lr": 0.0008345454545454546, |
| "step": 1684, |
| "tokens_trained": 0.160076904 |
| }, |
| { |
| "epoch": 0.47829787234042553, |
| "grad_norm": 1.4079371690750122, |
| "loss": 5.2842, |
| "lr": 0.0008342657342657343, |
| "step": 1686, |
| "tokens_trained": 0.160266712 |
| }, |
| { |
| "epoch": 0.47886524822695037, |
| "grad_norm": 1.6039987802505493, |
| "loss": 5.2248, |
| "lr": 0.0008339860139860141, |
| "step": 1688, |
| "tokens_trained": 0.160455464 |
| }, |
| { |
| "epoch": 0.4794326241134752, |
| "grad_norm": 1.639218807220459, |
| "loss": 5.2007, |
| "lr": 0.0008337062937062937, |
| "step": 1690, |
| "tokens_trained": 0.16064472 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.8226710557937622, |
| "loss": 5.2427, |
| "lr": 0.0008334265734265734, |
| "step": 1692, |
| "tokens_trained": 0.160835192 |
| }, |
| { |
| "epoch": 0.4805673758865248, |
| "grad_norm": 1.6480419635772705, |
| "loss": 5.1944, |
| "lr": 0.0008331468531468531, |
| "step": 1694, |
| "tokens_trained": 0.161025272 |
| }, |
| { |
| "epoch": 0.48113475177304965, |
| "grad_norm": 1.666717290878296, |
| "loss": 5.2879, |
| "lr": 0.0008328671328671329, |
| "step": 1696, |
| "tokens_trained": 0.161214016 |
| }, |
| { |
| "epoch": 0.4817021276595745, |
| "grad_norm": 1.5092660188674927, |
| "loss": 5.2612, |
| "lr": 0.0008325874125874126, |
| "step": 1698, |
| "tokens_trained": 0.161405448 |
| }, |
| { |
| "epoch": 0.48226950354609927, |
| "grad_norm": 1.4042121171951294, |
| "loss": 5.2373, |
| "lr": 0.0008323076923076923, |
| "step": 1700, |
| "tokens_trained": 0.161595896 |
| }, |
| { |
| "epoch": 0.4828368794326241, |
| "grad_norm": 1.4937382936477661, |
| "loss": 5.2172, |
| "lr": 0.000832027972027972, |
| "step": 1702, |
| "tokens_trained": 0.161783904 |
| }, |
| { |
| "epoch": 0.48340425531914893, |
| "grad_norm": 1.4652959108352661, |
| "loss": 5.2704, |
| "lr": 0.0008317482517482518, |
| "step": 1704, |
| "tokens_trained": 0.161975888 |
| }, |
| { |
| "epoch": 0.48397163120567377, |
| "grad_norm": 1.3021745681762695, |
| "loss": 5.2672, |
| "lr": 0.0008314685314685315, |
| "step": 1706, |
| "tokens_trained": 0.162165808 |
| }, |
| { |
| "epoch": 0.4845390070921986, |
| "grad_norm": 1.3580701351165771, |
| "loss": 5.2467, |
| "lr": 0.0008311888111888112, |
| "step": 1708, |
| "tokens_trained": 0.162355152 |
| }, |
| { |
| "epoch": 0.4851063829787234, |
| "grad_norm": 1.480072259902954, |
| "loss": 5.2797, |
| "lr": 0.0008309090909090909, |
| "step": 1710, |
| "tokens_trained": 0.162544744 |
| }, |
| { |
| "epoch": 0.4856737588652482, |
| "grad_norm": 1.3532829284667969, |
| "loss": 5.2556, |
| "lr": 0.0008306293706293706, |
| "step": 1712, |
| "tokens_trained": 0.162734976 |
| }, |
| { |
| "epoch": 0.48624113475177305, |
| "grad_norm": 1.240332007408142, |
| "loss": 5.2153, |
| "lr": 0.0008303496503496504, |
| "step": 1714, |
| "tokens_trained": 0.162924992 |
| }, |
| { |
| "epoch": 0.4868085106382979, |
| "grad_norm": 1.4141086339950562, |
| "loss": 5.2056, |
| "lr": 0.00083006993006993, |
| "step": 1716, |
| "tokens_trained": 0.163114008 |
| }, |
| { |
| "epoch": 0.4873758865248227, |
| "grad_norm": 1.321721076965332, |
| "loss": 5.2223, |
| "lr": 0.0008297902097902098, |
| "step": 1718, |
| "tokens_trained": 0.163304416 |
| }, |
| { |
| "epoch": 0.4879432624113475, |
| "grad_norm": 1.5437248945236206, |
| "loss": 5.2727, |
| "lr": 0.0008295104895104895, |
| "step": 1720, |
| "tokens_trained": 0.163493816 |
| }, |
| { |
| "epoch": 0.48851063829787233, |
| "grad_norm": 1.7218859195709229, |
| "loss": 5.2323, |
| "lr": 0.0008292307692307693, |
| "step": 1722, |
| "tokens_trained": 0.163683984 |
| }, |
| { |
| "epoch": 0.48907801418439717, |
| "grad_norm": 1.5534045696258545, |
| "loss": 5.1983, |
| "lr": 0.000828951048951049, |
| "step": 1724, |
| "tokens_trained": 0.163874968 |
| }, |
| { |
| "epoch": 0.489645390070922, |
| "grad_norm": 1.3675404787063599, |
| "loss": 5.2086, |
| "lr": 0.0008286713286713287, |
| "step": 1726, |
| "tokens_trained": 0.164065152 |
| }, |
| { |
| "epoch": 0.4902127659574468, |
| "grad_norm": 1.5178970098495483, |
| "loss": 5.2529, |
| "lr": 0.0008283916083916084, |
| "step": 1728, |
| "tokens_trained": 0.164255952 |
| }, |
| { |
| "epoch": 0.4907801418439716, |
| "grad_norm": 1.4910545349121094, |
| "loss": 5.2931, |
| "lr": 0.000828111888111888, |
| "step": 1730, |
| "tokens_trained": 0.164447112 |
| }, |
| { |
| "epoch": 0.49134751773049645, |
| "grad_norm": 1.5647637844085693, |
| "loss": 5.2603, |
| "lr": 0.0008278321678321679, |
| "step": 1732, |
| "tokens_trained": 0.16463704 |
| }, |
| { |
| "epoch": 0.4919148936170213, |
| "grad_norm": 1.4607906341552734, |
| "loss": 5.2702, |
| "lr": 0.0008275524475524475, |
| "step": 1734, |
| "tokens_trained": 0.164827312 |
| }, |
| { |
| "epoch": 0.4924822695035461, |
| "grad_norm": 1.5806026458740234, |
| "loss": 5.2356, |
| "lr": 0.0008272727272727273, |
| "step": 1736, |
| "tokens_trained": 0.165015224 |
| }, |
| { |
| "epoch": 0.4930496453900709, |
| "grad_norm": 1.5417263507843018, |
| "loss": 5.262, |
| "lr": 0.000826993006993007, |
| "step": 1738, |
| "tokens_trained": 0.16520484 |
| }, |
| { |
| "epoch": 0.49361702127659574, |
| "grad_norm": 1.511680245399475, |
| "loss": 5.2634, |
| "lr": 0.0008267132867132868, |
| "step": 1740, |
| "tokens_trained": 0.165393064 |
| }, |
| { |
| "epoch": 0.49418439716312057, |
| "grad_norm": 1.4468717575073242, |
| "loss": 5.2452, |
| "lr": 0.0008264335664335665, |
| "step": 1742, |
| "tokens_trained": 0.165584472 |
| }, |
| { |
| "epoch": 0.4947517730496454, |
| "grad_norm": 1.423187017440796, |
| "loss": 5.2533, |
| "lr": 0.0008261538461538461, |
| "step": 1744, |
| "tokens_trained": 0.165773768 |
| }, |
| { |
| "epoch": 0.49531914893617024, |
| "grad_norm": 1.512462854385376, |
| "loss": 5.2152, |
| "lr": 0.0008258741258741259, |
| "step": 1746, |
| "tokens_trained": 0.165963456 |
| }, |
| { |
| "epoch": 0.495886524822695, |
| "grad_norm": 1.4620780944824219, |
| "loss": 5.2511, |
| "lr": 0.0008255944055944055, |
| "step": 1748, |
| "tokens_trained": 0.166152136 |
| }, |
| { |
| "epoch": 0.49645390070921985, |
| "grad_norm": 1.4943009614944458, |
| "loss": 5.2829, |
| "lr": 0.0008253146853146854, |
| "step": 1750, |
| "tokens_trained": 0.16634248 |
| }, |
| { |
| "epoch": 0.49645390070921985, |
| "eval_loss": 5.23966646194458, |
| "eval_runtime": 20.5954, |
| "step": 1750, |
| "tokens_trained": 0.16634248 |
| }, |
| { |
| "epoch": 0.4970212765957447, |
| "grad_norm": 1.6739267110824585, |
| "loss": 5.2306, |
| "lr": 0.000825034965034965, |
| "step": 1752, |
| "tokens_trained": 0.166532864 |
| }, |
| { |
| "epoch": 0.4975886524822695, |
| "grad_norm": 1.6125763654708862, |
| "loss": 5.2845, |
| "lr": 0.0008247552447552448, |
| "step": 1754, |
| "tokens_trained": 0.166722944 |
| }, |
| { |
| "epoch": 0.4981560283687943, |
| "grad_norm": 1.5872310400009155, |
| "loss": 5.2075, |
| "lr": 0.0008244755244755245, |
| "step": 1756, |
| "tokens_trained": 0.16691184 |
| }, |
| { |
| "epoch": 0.49872340425531914, |
| "grad_norm": 1.4396610260009766, |
| "loss": 5.2532, |
| "lr": 0.0008241958041958042, |
| "step": 1758, |
| "tokens_trained": 0.167101896 |
| }, |
| { |
| "epoch": 0.49929078014184397, |
| "grad_norm": 1.363879680633545, |
| "loss": 5.2252, |
| "lr": 0.000823916083916084, |
| "step": 1760, |
| "tokens_trained": 0.167289384 |
| }, |
| { |
| "epoch": 0.4998581560283688, |
| "grad_norm": 1.395561695098877, |
| "loss": 5.2097, |
| "lr": 0.0008236363636363636, |
| "step": 1762, |
| "tokens_trained": 0.167479424 |
| }, |
| { |
| "epoch": 0.5004255319148936, |
| "grad_norm": 1.413736343383789, |
| "loss": 5.2283, |
| "lr": 0.0008233566433566434, |
| "step": 1764, |
| "tokens_trained": 0.167668256 |
| }, |
| { |
| "epoch": 0.5009929078014185, |
| "grad_norm": 1.4240859746932983, |
| "loss": 5.2574, |
| "lr": 0.000823076923076923, |
| "step": 1766, |
| "tokens_trained": 0.167858616 |
| }, |
| { |
| "epoch": 0.5015602836879433, |
| "grad_norm": 1.437165379524231, |
| "loss": 5.2511, |
| "lr": 0.0008227972027972029, |
| "step": 1768, |
| "tokens_trained": 0.168048272 |
| }, |
| { |
| "epoch": 0.502127659574468, |
| "grad_norm": 1.458575963973999, |
| "loss": 5.2183, |
| "lr": 0.0008225174825174825, |
| "step": 1770, |
| "tokens_trained": 0.168240184 |
| }, |
| { |
| "epoch": 0.5026950354609929, |
| "grad_norm": 1.5224673748016357, |
| "loss": 5.259, |
| "lr": 0.0008222377622377622, |
| "step": 1772, |
| "tokens_trained": 0.168429536 |
| }, |
| { |
| "epoch": 0.5032624113475177, |
| "grad_norm": 1.578438401222229, |
| "loss": 5.2108, |
| "lr": 0.000821958041958042, |
| "step": 1774, |
| "tokens_trained": 0.168619312 |
| }, |
| { |
| "epoch": 0.5038297872340426, |
| "grad_norm": 1.4880632162094116, |
| "loss": 5.229, |
| "lr": 0.0008216783216783217, |
| "step": 1776, |
| "tokens_trained": 0.168808344 |
| }, |
| { |
| "epoch": 0.5043971631205674, |
| "grad_norm": 1.3741049766540527, |
| "loss": 5.2873, |
| "lr": 0.0008213986013986015, |
| "step": 1778, |
| "tokens_trained": 0.168999112 |
| }, |
| { |
| "epoch": 0.5049645390070922, |
| "grad_norm": 1.4396610260009766, |
| "loss": 5.3237, |
| "lr": 0.0008211188811188811, |
| "step": 1780, |
| "tokens_trained": 0.169189288 |
| }, |
| { |
| "epoch": 0.505531914893617, |
| "grad_norm": 1.4296880960464478, |
| "loss": 5.2228, |
| "lr": 0.0008208391608391609, |
| "step": 1782, |
| "tokens_trained": 0.16937864 |
| }, |
| { |
| "epoch": 0.5060992907801418, |
| "grad_norm": 1.5704258680343628, |
| "loss": 5.2569, |
| "lr": 0.0008205594405594405, |
| "step": 1784, |
| "tokens_trained": 0.169569024 |
| }, |
| { |
| "epoch": 0.5066666666666667, |
| "grad_norm": 1.458261489868164, |
| "loss": 5.1818, |
| "lr": 0.0008202797202797203, |
| "step": 1786, |
| "tokens_trained": 0.16975932 |
| }, |
| { |
| "epoch": 0.5072340425531915, |
| "grad_norm": 1.5307244062423706, |
| "loss": 5.2684, |
| "lr": 0.00082, |
| "step": 1788, |
| "tokens_trained": 0.169949064 |
| }, |
| { |
| "epoch": 0.5078014184397163, |
| "grad_norm": 1.3966363668441772, |
| "loss": 5.2125, |
| "lr": 0.0008197202797202797, |
| "step": 1790, |
| "tokens_trained": 0.170139352 |
| }, |
| { |
| "epoch": 0.5083687943262412, |
| "grad_norm": 1.4094839096069336, |
| "loss": 5.2518, |
| "lr": 0.0008194405594405595, |
| "step": 1792, |
| "tokens_trained": 0.170330336 |
| }, |
| { |
| "epoch": 0.5089361702127659, |
| "grad_norm": 1.266122817993164, |
| "loss": 5.2409, |
| "lr": 0.0008191608391608392, |
| "step": 1794, |
| "tokens_trained": 0.170521848 |
| }, |
| { |
| "epoch": 0.5095035460992908, |
| "grad_norm": 1.3079488277435303, |
| "loss": 5.182, |
| "lr": 0.000818881118881119, |
| "step": 1796, |
| "tokens_trained": 0.170710664 |
| }, |
| { |
| "epoch": 0.5100709219858156, |
| "grad_norm": 1.2961090803146362, |
| "loss": 5.2456, |
| "lr": 0.0008186013986013986, |
| "step": 1798, |
| "tokens_trained": 0.170900016 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 1.3402773141860962, |
| "loss": 5.1888, |
| "lr": 0.0008183216783216783, |
| "step": 1800, |
| "tokens_trained": 0.171089824 |
| }, |
| { |
| "epoch": 0.5112056737588653, |
| "grad_norm": 1.386769413948059, |
| "loss": 5.1715, |
| "lr": 0.000818041958041958, |
| "step": 1802, |
| "tokens_trained": 0.171279448 |
| }, |
| { |
| "epoch": 0.51177304964539, |
| "grad_norm": 1.4280421733856201, |
| "loss": 5.2131, |
| "lr": 0.0008177622377622378, |
| "step": 1804, |
| "tokens_trained": 0.17147048 |
| }, |
| { |
| "epoch": 0.512340425531915, |
| "grad_norm": 1.4805412292480469, |
| "loss": 5.2379, |
| "lr": 0.0008174825174825175, |
| "step": 1806, |
| "tokens_trained": 0.171662264 |
| }, |
| { |
| "epoch": 0.5129078014184397, |
| "grad_norm": 1.4608936309814453, |
| "loss": 5.2412, |
| "lr": 0.0008172027972027972, |
| "step": 1808, |
| "tokens_trained": 0.171853176 |
| }, |
| { |
| "epoch": 0.5134751773049645, |
| "grad_norm": 1.550136923789978, |
| "loss": 5.1828, |
| "lr": 0.000816923076923077, |
| "step": 1810, |
| "tokens_trained": 0.172043344 |
| }, |
| { |
| "epoch": 0.5140425531914894, |
| "grad_norm": 1.4756869077682495, |
| "loss": 5.199, |
| "lr": 0.0008166433566433567, |
| "step": 1812, |
| "tokens_trained": 0.172231952 |
| }, |
| { |
| "epoch": 0.5146099290780142, |
| "grad_norm": 1.4199044704437256, |
| "loss": 5.2074, |
| "lr": 0.0008163636363636364, |
| "step": 1814, |
| "tokens_trained": 0.172420376 |
| }, |
| { |
| "epoch": 0.5151773049645391, |
| "grad_norm": 1.3477959632873535, |
| "loss": 5.1672, |
| "lr": 0.0008160839160839161, |
| "step": 1816, |
| "tokens_trained": 0.172610248 |
| }, |
| { |
| "epoch": 0.5157446808510638, |
| "grad_norm": 1.3331218957901, |
| "loss": 5.2267, |
| "lr": 0.0008158041958041958, |
| "step": 1818, |
| "tokens_trained": 0.172799168 |
| }, |
| { |
| "epoch": 0.5163120567375886, |
| "grad_norm": 1.2391384840011597, |
| "loss": 5.2088, |
| "lr": 0.0008155244755244755, |
| "step": 1820, |
| "tokens_trained": 0.172989328 |
| }, |
| { |
| "epoch": 0.5168794326241135, |
| "grad_norm": 1.3377013206481934, |
| "loss": 5.2279, |
| "lr": 0.0008152447552447553, |
| "step": 1822, |
| "tokens_trained": 0.173179376 |
| }, |
| { |
| "epoch": 0.5174468085106383, |
| "grad_norm": 1.285628318786621, |
| "loss": 5.3006, |
| "lr": 0.000814965034965035, |
| "step": 1824, |
| "tokens_trained": 0.173370408 |
| }, |
| { |
| "epoch": 0.5180141843971631, |
| "grad_norm": 1.2010120153427124, |
| "loss": 5.2264, |
| "lr": 0.0008146853146853147, |
| "step": 1826, |
| "tokens_trained": 0.173561144 |
| }, |
| { |
| "epoch": 0.518581560283688, |
| "grad_norm": 1.2953096628189087, |
| "loss": 5.1879, |
| "lr": 0.0008144055944055944, |
| "step": 1828, |
| "tokens_trained": 0.173753592 |
| }, |
| { |
| "epoch": 0.5191489361702127, |
| "grad_norm": 1.256910800933838, |
| "loss": 5.2402, |
| "lr": 0.0008141258741258742, |
| "step": 1830, |
| "tokens_trained": 0.173943752 |
| }, |
| { |
| "epoch": 0.5197163120567376, |
| "grad_norm": 1.338755488395691, |
| "loss": 5.2556, |
| "lr": 0.0008138461538461539, |
| "step": 1832, |
| "tokens_trained": 0.174130504 |
| }, |
| { |
| "epoch": 0.5202836879432624, |
| "grad_norm": 1.380715012550354, |
| "loss": 5.2047, |
| "lr": 0.0008135664335664336, |
| "step": 1834, |
| "tokens_trained": 0.174322088 |
| }, |
| { |
| "epoch": 0.5208510638297872, |
| "grad_norm": 1.4989492893218994, |
| "loss": 5.1873, |
| "lr": 0.0008132867132867133, |
| "step": 1836, |
| "tokens_trained": 0.17451164 |
| }, |
| { |
| "epoch": 0.5214184397163121, |
| "grad_norm": 1.3239110708236694, |
| "loss": 5.202, |
| "lr": 0.000813006993006993, |
| "step": 1838, |
| "tokens_trained": 0.174701896 |
| }, |
| { |
| "epoch": 0.5219858156028369, |
| "grad_norm": 1.397745132446289, |
| "loss": 5.2259, |
| "lr": 0.0008127272727272728, |
| "step": 1840, |
| "tokens_trained": 0.174892336 |
| }, |
| { |
| "epoch": 0.5225531914893617, |
| "grad_norm": 1.3992305994033813, |
| "loss": 5.1771, |
| "lr": 0.0008124475524475524, |
| "step": 1842, |
| "tokens_trained": 0.17508276 |
| }, |
| { |
| "epoch": 0.5231205673758865, |
| "grad_norm": 1.38923180103302, |
| "loss": 5.1981, |
| "lr": 0.0008121678321678322, |
| "step": 1844, |
| "tokens_trained": 0.175273272 |
| }, |
| { |
| "epoch": 0.5236879432624113, |
| "grad_norm": 1.478642225265503, |
| "loss": 5.2533, |
| "lr": 0.0008118881118881119, |
| "step": 1846, |
| "tokens_trained": 0.175462352 |
| }, |
| { |
| "epoch": 0.5242553191489362, |
| "grad_norm": 1.332709789276123, |
| "loss": 5.2205, |
| "lr": 0.0008116083916083917, |
| "step": 1848, |
| "tokens_trained": 0.175648128 |
| }, |
| { |
| "epoch": 0.524822695035461, |
| "grad_norm": 1.4612590074539185, |
| "loss": 5.2207, |
| "lr": 0.0008113286713286714, |
| "step": 1850, |
| "tokens_trained": 0.175837712 |
| }, |
| { |
| "epoch": 0.5253900709219859, |
| "grad_norm": 1.4682700634002686, |
| "loss": 5.2576, |
| "lr": 0.000811048951048951, |
| "step": 1852, |
| "tokens_trained": 0.176029512 |
| }, |
| { |
| "epoch": 0.5259574468085106, |
| "grad_norm": 1.3380264043807983, |
| "loss": 5.2435, |
| "lr": 0.0008107692307692308, |
| "step": 1854, |
| "tokens_trained": 0.176220432 |
| }, |
| { |
| "epoch": 0.5265248226950354, |
| "grad_norm": 1.2452281713485718, |
| "loss": 5.2973, |
| "lr": 0.0008104895104895104, |
| "step": 1856, |
| "tokens_trained": 0.176412144 |
| }, |
| { |
| "epoch": 0.5270921985815603, |
| "grad_norm": 1.392592191696167, |
| "loss": 5.2028, |
| "lr": 0.0008102097902097903, |
| "step": 1858, |
| "tokens_trained": 0.17660144 |
| }, |
| { |
| "epoch": 0.5276595744680851, |
| "grad_norm": 1.4258657693862915, |
| "loss": 5.2342, |
| "lr": 0.0008099300699300699, |
| "step": 1860, |
| "tokens_trained": 0.176790424 |
| }, |
| { |
| "epoch": 0.52822695035461, |
| "grad_norm": 1.4627033472061157, |
| "loss": 5.1732, |
| "lr": 0.0008096503496503497, |
| "step": 1862, |
| "tokens_trained": 0.176983296 |
| }, |
| { |
| "epoch": 0.5287943262411348, |
| "grad_norm": 1.4448645114898682, |
| "loss": 5.2001, |
| "lr": 0.0008093706293706294, |
| "step": 1864, |
| "tokens_trained": 0.177174544 |
| }, |
| { |
| "epoch": 0.5293617021276595, |
| "grad_norm": 1.3879749774932861, |
| "loss": 5.1642, |
| "lr": 0.0008090909090909092, |
| "step": 1866, |
| "tokens_trained": 0.17736428 |
| }, |
| { |
| "epoch": 0.5299290780141844, |
| "grad_norm": 1.2791417837142944, |
| "loss": 5.1975, |
| "lr": 0.0008088111888111889, |
| "step": 1868, |
| "tokens_trained": 0.177553752 |
| }, |
| { |
| "epoch": 0.5304964539007092, |
| "grad_norm": 1.3620632886886597, |
| "loss": 5.1742, |
| "lr": 0.0008085314685314685, |
| "step": 1870, |
| "tokens_trained": 0.177746448 |
| }, |
| { |
| "epoch": 0.531063829787234, |
| "grad_norm": 1.2759565114974976, |
| "loss": 5.2076, |
| "lr": 0.0008082517482517483, |
| "step": 1872, |
| "tokens_trained": 0.177937888 |
| }, |
| { |
| "epoch": 0.5316312056737589, |
| "grad_norm": 1.3390915393829346, |
| "loss": 5.2387, |
| "lr": 0.0008079720279720279, |
| "step": 1874, |
| "tokens_trained": 0.178127776 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "eval_loss": 5.228371620178223, |
| "eval_runtime": 20.9372, |
| "step": 1875, |
| "tokens_trained": 0.17822376 |
| }, |
| { |
| "epoch": 0.5321985815602837, |
| "grad_norm": 1.3872885704040527, |
| "loss": 5.2053, |
| "lr": 0.0008076923076923078, |
| "step": 1876, |
| "tokens_trained": 0.178318616 |
| }, |
| { |
| "epoch": 0.5327659574468085, |
| "grad_norm": 1.4238568544387817, |
| "loss": 5.2091, |
| "lr": 0.0008074125874125874, |
| "step": 1878, |
| "tokens_trained": 0.178509272 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.3352588415145874, |
| "loss": 5.2471, |
| "lr": 0.0008071328671328671, |
| "step": 1880, |
| "tokens_trained": 0.178698016 |
| }, |
| { |
| "epoch": 0.5339007092198581, |
| "grad_norm": 1.2931993007659912, |
| "loss": 5.2315, |
| "lr": 0.0008068531468531469, |
| "step": 1882, |
| "tokens_trained": 0.17888628 |
| }, |
| { |
| "epoch": 0.534468085106383, |
| "grad_norm": 1.3475919961929321, |
| "loss": 5.2337, |
| "lr": 0.0008065734265734265, |
| "step": 1884, |
| "tokens_trained": 0.179076944 |
| }, |
| { |
| "epoch": 0.5350354609929078, |
| "grad_norm": 1.3263812065124512, |
| "loss": 5.2017, |
| "lr": 0.0008062937062937064, |
| "step": 1886, |
| "tokens_trained": 0.179266128 |
| }, |
| { |
| "epoch": 0.5356028368794327, |
| "grad_norm": 1.3956594467163086, |
| "loss": 5.1907, |
| "lr": 0.000806013986013986, |
| "step": 1888, |
| "tokens_trained": 0.179454848 |
| }, |
| { |
| "epoch": 0.5361702127659574, |
| "grad_norm": 1.4399393796920776, |
| "loss": 5.216, |
| "lr": 0.0008057342657342658, |
| "step": 1890, |
| "tokens_trained": 0.179643992 |
| }, |
| { |
| "epoch": 0.5367375886524822, |
| "grad_norm": 1.278714656829834, |
| "loss": 5.1689, |
| "lr": 0.0008054545454545454, |
| "step": 1892, |
| "tokens_trained": 0.179831416 |
| }, |
| { |
| "epoch": 0.5373049645390071, |
| "grad_norm": 1.3517796993255615, |
| "loss": 5.1319, |
| "lr": 0.0008051748251748253, |
| "step": 1894, |
| "tokens_trained": 0.180022528 |
| }, |
| { |
| "epoch": 0.5378723404255319, |
| "grad_norm": 1.2710460424423218, |
| "loss": 5.1619, |
| "lr": 0.0008048951048951049, |
| "step": 1896, |
| "tokens_trained": 0.180212936 |
| }, |
| { |
| "epoch": 0.5384397163120568, |
| "grad_norm": 1.3603075742721558, |
| "loss": 5.1615, |
| "lr": 0.0008046153846153846, |
| "step": 1898, |
| "tokens_trained": 0.180404648 |
| }, |
| { |
| "epoch": 0.5390070921985816, |
| "grad_norm": 1.422122836112976, |
| "loss": 5.1801, |
| "lr": 0.0008043356643356644, |
| "step": 1900, |
| "tokens_trained": 0.18059388 |
| }, |
| { |
| "epoch": 0.5395744680851063, |
| "grad_norm": 1.4242218732833862, |
| "loss": 5.2367, |
| "lr": 0.000804055944055944, |
| "step": 1902, |
| "tokens_trained": 0.180783248 |
| }, |
| { |
| "epoch": 0.5401418439716312, |
| "grad_norm": 1.4476134777069092, |
| "loss": 5.252, |
| "lr": 0.0008037762237762239, |
| "step": 1904, |
| "tokens_trained": 0.180971152 |
| }, |
| { |
| "epoch": 0.540709219858156, |
| "grad_norm": 1.4724863767623901, |
| "loss": 5.2042, |
| "lr": 0.0008034965034965035, |
| "step": 1906, |
| "tokens_trained": 0.181159992 |
| }, |
| { |
| "epoch": 0.5412765957446809, |
| "grad_norm": 1.4014806747436523, |
| "loss": 5.2514, |
| "lr": 0.0008032167832167832, |
| "step": 1908, |
| "tokens_trained": 0.18135032 |
| }, |
| { |
| "epoch": 0.5418439716312057, |
| "grad_norm": 1.3511682748794556, |
| "loss": 5.2036, |
| "lr": 0.0008029370629370629, |
| "step": 1910, |
| "tokens_trained": 0.181540312 |
| }, |
| { |
| "epoch": 0.5424113475177305, |
| "grad_norm": 1.3011739253997803, |
| "loss": 5.24, |
| "lr": 0.0008026573426573427, |
| "step": 1912, |
| "tokens_trained": 0.181731104 |
| }, |
| { |
| "epoch": 0.5429787234042553, |
| "grad_norm": 1.2753015756607056, |
| "loss": 5.25, |
| "lr": 0.0008023776223776224, |
| "step": 1914, |
| "tokens_trained": 0.18192008 |
| }, |
| { |
| "epoch": 0.5435460992907801, |
| "grad_norm": 1.4685192108154297, |
| "loss": 5.1619, |
| "lr": 0.0008020979020979021, |
| "step": 1916, |
| "tokens_trained": 0.182110072 |
| }, |
| { |
| "epoch": 0.544113475177305, |
| "grad_norm": 1.4695900678634644, |
| "loss": 5.2626, |
| "lr": 0.0008018181818181818, |
| "step": 1918, |
| "tokens_trained": 0.182300224 |
| }, |
| { |
| "epoch": 0.5446808510638298, |
| "grad_norm": 1.4895613193511963, |
| "loss": 5.1766, |
| "lr": 0.0008015384615384615, |
| "step": 1920, |
| "tokens_trained": 0.182490712 |
| }, |
| { |
| "epoch": 0.5452482269503546, |
| "grad_norm": 1.3073184490203857, |
| "loss": 5.2281, |
| "lr": 0.0008012587412587414, |
| "step": 1922, |
| "tokens_trained": 0.182681168 |
| }, |
| { |
| "epoch": 0.5458156028368795, |
| "grad_norm": 1.2414125204086304, |
| "loss": 5.2099, |
| "lr": 0.000800979020979021, |
| "step": 1924, |
| "tokens_trained": 0.182870504 |
| }, |
| { |
| "epoch": 0.5463829787234042, |
| "grad_norm": 1.2407176494598389, |
| "loss": 5.1116, |
| "lr": 0.0008006993006993007, |
| "step": 1926, |
| "tokens_trained": 0.1830618 |
| }, |
| { |
| "epoch": 0.546950354609929, |
| "grad_norm": 1.4507744312286377, |
| "loss": 5.1658, |
| "lr": 0.0008004195804195804, |
| "step": 1928, |
| "tokens_trained": 0.183250072 |
| }, |
| { |
| "epoch": 0.5475177304964539, |
| "grad_norm": 1.348907232284546, |
| "loss": 5.231, |
| "lr": 0.0008001398601398602, |
| "step": 1930, |
| "tokens_trained": 0.18344004 |
| }, |
| { |
| "epoch": 0.5480851063829787, |
| "grad_norm": 1.4393324851989746, |
| "loss": 5.2393, |
| "lr": 0.0007998601398601399, |
| "step": 1932, |
| "tokens_trained": 0.183630032 |
| }, |
| { |
| "epoch": 0.5486524822695036, |
| "grad_norm": 1.3569602966308594, |
| "loss": 5.2068, |
| "lr": 0.0007995804195804196, |
| "step": 1934, |
| "tokens_trained": 0.183820816 |
| }, |
| { |
| "epoch": 0.5492198581560284, |
| "grad_norm": 1.362021803855896, |
| "loss": 5.1641, |
| "lr": 0.0007993006993006992, |
| "step": 1936, |
| "tokens_trained": 0.184009824 |
| }, |
| { |
| "epoch": 0.5497872340425531, |
| "grad_norm": 1.2926445007324219, |
| "loss": 5.1983, |
| "lr": 0.000799020979020979, |
| "step": 1938, |
| "tokens_trained": 0.184199544 |
| }, |
| { |
| "epoch": 0.550354609929078, |
| "grad_norm": 1.3065440654754639, |
| "loss": 5.3009, |
| "lr": 0.0007987412587412588, |
| "step": 1940, |
| "tokens_trained": 0.1843906 |
| }, |
| { |
| "epoch": 0.5509219858156028, |
| "grad_norm": 1.3288060426712036, |
| "loss": 5.2347, |
| "lr": 0.0007984615384615385, |
| "step": 1942, |
| "tokens_trained": 0.184580304 |
| }, |
| { |
| "epoch": 0.5514893617021277, |
| "grad_norm": 1.4742496013641357, |
| "loss": 5.1497, |
| "lr": 0.0007981818181818182, |
| "step": 1944, |
| "tokens_trained": 0.184771832 |
| }, |
| { |
| "epoch": 0.5520567375886525, |
| "grad_norm": 1.3907397985458374, |
| "loss": 5.2001, |
| "lr": 0.0007979020979020979, |
| "step": 1946, |
| "tokens_trained": 0.184963744 |
| }, |
| { |
| "epoch": 0.5526241134751773, |
| "grad_norm": 1.3324332237243652, |
| "loss": 5.2056, |
| "lr": 0.0007976223776223777, |
| "step": 1948, |
| "tokens_trained": 0.185152248 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 1.258155345916748, |
| "loss": 5.1999, |
| "lr": 0.0007973426573426573, |
| "step": 1950, |
| "tokens_trained": 0.18534196 |
| }, |
| { |
| "epoch": 0.5537588652482269, |
| "grad_norm": 1.3515956401824951, |
| "loss": 5.1988, |
| "lr": 0.0007970629370629371, |
| "step": 1952, |
| "tokens_trained": 0.18553156 |
| }, |
| { |
| "epoch": 0.5543262411347518, |
| "grad_norm": 1.535507321357727, |
| "loss": 5.2198, |
| "lr": 0.0007967832167832167, |
| "step": 1954, |
| "tokens_trained": 0.185719792 |
| }, |
| { |
| "epoch": 0.5548936170212766, |
| "grad_norm": 1.3124226331710815, |
| "loss": 5.1468, |
| "lr": 0.0007965034965034965, |
| "step": 1956, |
| "tokens_trained": 0.18591288 |
| }, |
| { |
| "epoch": 0.5554609929078014, |
| "grad_norm": 1.2720654010772705, |
| "loss": 5.1939, |
| "lr": 0.0007962237762237763, |
| "step": 1958, |
| "tokens_trained": 0.186102344 |
| }, |
| { |
| "epoch": 0.5560283687943263, |
| "grad_norm": 1.2731753587722778, |
| "loss": 5.2063, |
| "lr": 0.000795944055944056, |
| "step": 1960, |
| "tokens_trained": 0.186291976 |
| }, |
| { |
| "epoch": 0.556595744680851, |
| "grad_norm": 1.3020576238632202, |
| "loss": 5.266, |
| "lr": 0.0007956643356643357, |
| "step": 1962, |
| "tokens_trained": 0.186483504 |
| }, |
| { |
| "epoch": 0.5571631205673759, |
| "grad_norm": 1.300626277923584, |
| "loss": 5.2159, |
| "lr": 0.0007953846153846153, |
| "step": 1964, |
| "tokens_trained": 0.18667372 |
| }, |
| { |
| "epoch": 0.5577304964539007, |
| "grad_norm": 1.3075426816940308, |
| "loss": 5.2136, |
| "lr": 0.0007951048951048952, |
| "step": 1966, |
| "tokens_trained": 0.186864808 |
| }, |
| { |
| "epoch": 0.5582978723404255, |
| "grad_norm": 1.4623394012451172, |
| "loss": 5.2081, |
| "lr": 0.0007948251748251748, |
| "step": 1968, |
| "tokens_trained": 0.187056272 |
| }, |
| { |
| "epoch": 0.5588652482269504, |
| "grad_norm": 1.4950625896453857, |
| "loss": 5.1885, |
| "lr": 0.0007945454545454546, |
| "step": 1970, |
| "tokens_trained": 0.187244464 |
| }, |
| { |
| "epoch": 0.5594326241134752, |
| "grad_norm": 1.517152190208435, |
| "loss": 5.2558, |
| "lr": 0.0007942657342657342, |
| "step": 1972, |
| "tokens_trained": 0.187433216 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.4226372241973877, |
| "loss": 5.236, |
| "lr": 0.000793986013986014, |
| "step": 1974, |
| "tokens_trained": 0.187622632 |
| }, |
| { |
| "epoch": 0.5605673758865248, |
| "grad_norm": 1.3692735433578491, |
| "loss": 5.2089, |
| "lr": 0.0007937062937062938, |
| "step": 1976, |
| "tokens_trained": 0.18781324 |
| }, |
| { |
| "epoch": 0.5611347517730496, |
| "grad_norm": 1.3344841003417969, |
| "loss": 5.2052, |
| "lr": 0.0007934265734265734, |
| "step": 1978, |
| "tokens_trained": 0.188002488 |
| }, |
| { |
| "epoch": 0.5617021276595745, |
| "grad_norm": 1.3929632902145386, |
| "loss": 5.2353, |
| "lr": 0.0007931468531468532, |
| "step": 1980, |
| "tokens_trained": 0.188194712 |
| }, |
| { |
| "epoch": 0.5622695035460993, |
| "grad_norm": 1.3147000074386597, |
| "loss": 5.2071, |
| "lr": 0.0007928671328671328, |
| "step": 1982, |
| "tokens_trained": 0.188387056 |
| }, |
| { |
| "epoch": 0.5628368794326241, |
| "grad_norm": 1.351483702659607, |
| "loss": 5.2196, |
| "lr": 0.0007925874125874127, |
| "step": 1984, |
| "tokens_trained": 0.188579048 |
| }, |
| { |
| "epoch": 0.563404255319149, |
| "grad_norm": 1.3840581178665161, |
| "loss": 5.1889, |
| "lr": 0.0007923076923076923, |
| "step": 1986, |
| "tokens_trained": 0.18876896 |
| }, |
| { |
| "epoch": 0.5639716312056737, |
| "grad_norm": 1.3427214622497559, |
| "loss": 5.192, |
| "lr": 0.000792027972027972, |
| "step": 1988, |
| "tokens_trained": 0.18895832 |
| }, |
| { |
| "epoch": 0.5645390070921986, |
| "grad_norm": 1.2931344509124756, |
| "loss": 5.1942, |
| "lr": 0.0007917482517482517, |
| "step": 1990, |
| "tokens_trained": 0.18915036 |
| }, |
| { |
| "epoch": 0.5651063829787234, |
| "grad_norm": 1.2408664226531982, |
| "loss": 5.2014, |
| "lr": 0.0007914685314685314, |
| "step": 1992, |
| "tokens_trained": 0.189339784 |
| }, |
| { |
| "epoch": 0.5656737588652482, |
| "grad_norm": 1.342760682106018, |
| "loss": 5.2056, |
| "lr": 0.0007911888111888113, |
| "step": 1994, |
| "tokens_trained": 0.189530776 |
| }, |
| { |
| "epoch": 0.5662411347517731, |
| "grad_norm": 1.2647815942764282, |
| "loss": 5.2338, |
| "lr": 0.0007909090909090909, |
| "step": 1996, |
| "tokens_trained": 0.189720312 |
| }, |
| { |
| "epoch": 0.5668085106382978, |
| "grad_norm": 1.1956689357757568, |
| "loss": 5.1464, |
| "lr": 0.0007906293706293707, |
| "step": 1998, |
| "tokens_trained": 0.189909592 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "grad_norm": 1.287185549736023, |
| "loss": 5.1919, |
| "lr": 0.0007903496503496503, |
| "step": 2000, |
| "tokens_trained": 0.190100544 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "eval_loss": 5.208409309387207, |
| "eval_runtime": 21.1643, |
| "step": 2000, |
| "tokens_trained": 0.190100544 |
| }, |
| { |
| "epoch": 0.5679432624113475, |
| "grad_norm": 1.3409695625305176, |
| "loss": 5.1723, |
| "lr": 0.0007900699300699302, |
| "step": 2002, |
| "tokens_trained": 0.190291792 |
| }, |
| { |
| "epoch": 0.5685106382978723, |
| "grad_norm": 1.3951654434204102, |
| "loss": 5.243, |
| "lr": 0.0007897902097902098, |
| "step": 2004, |
| "tokens_trained": 0.190481864 |
| }, |
| { |
| "epoch": 0.5690780141843972, |
| "grad_norm": 1.2949507236480713, |
| "loss": 5.2248, |
| "lr": 0.0007895104895104895, |
| "step": 2006, |
| "tokens_trained": 0.19067228 |
| }, |
| { |
| "epoch": 0.569645390070922, |
| "grad_norm": 1.3585959672927856, |
| "loss": 5.1889, |
| "lr": 0.0007892307692307692, |
| "step": 2008, |
| "tokens_trained": 0.190860368 |
| }, |
| { |
| "epoch": 0.5702127659574469, |
| "grad_norm": 1.2834774255752563, |
| "loss": 5.2067, |
| "lr": 0.0007889510489510489, |
| "step": 2010, |
| "tokens_trained": 0.191051904 |
| }, |
| { |
| "epoch": 0.5707801418439716, |
| "grad_norm": 1.3544108867645264, |
| "loss": 5.2041, |
| "lr": 0.0007886713286713288, |
| "step": 2012, |
| "tokens_trained": 0.191242688 |
| }, |
| { |
| "epoch": 0.5713475177304964, |
| "grad_norm": 1.3536330461502075, |
| "loss": 5.2131, |
| "lr": 0.0007883916083916084, |
| "step": 2014, |
| "tokens_trained": 0.191431104 |
| }, |
| { |
| "epoch": 0.5719148936170213, |
| "grad_norm": 1.337441325187683, |
| "loss": 5.2036, |
| "lr": 0.0007881118881118882, |
| "step": 2016, |
| "tokens_trained": 0.19162204 |
| }, |
| { |
| "epoch": 0.5724822695035461, |
| "grad_norm": 1.4701579809188843, |
| "loss": 5.2049, |
| "lr": 0.0007878321678321678, |
| "step": 2018, |
| "tokens_trained": 0.191813352 |
| }, |
| { |
| "epoch": 0.573049645390071, |
| "grad_norm": 1.4354153871536255, |
| "loss": 5.2583, |
| "lr": 0.0007875524475524476, |
| "step": 2020, |
| "tokens_trained": 0.192004064 |
| }, |
| { |
| "epoch": 0.5736170212765958, |
| "grad_norm": 1.358913540840149, |
| "loss": 5.1961, |
| "lr": 0.0007872727272727273, |
| "step": 2022, |
| "tokens_trained": 0.192193232 |
| }, |
| { |
| "epoch": 0.5741843971631205, |
| "grad_norm": 1.3889496326446533, |
| "loss": 5.1755, |
| "lr": 0.000786993006993007, |
| "step": 2024, |
| "tokens_trained": 0.192385416 |
| }, |
| { |
| "epoch": 0.5747517730496454, |
| "grad_norm": 1.4138504266738892, |
| "loss": 5.2423, |
| "lr": 0.0007867132867132867, |
| "step": 2026, |
| "tokens_trained": 0.192575904 |
| }, |
| { |
| "epoch": 0.5753191489361702, |
| "grad_norm": 1.2651748657226562, |
| "loss": 5.1574, |
| "lr": 0.0007864335664335664, |
| "step": 2028, |
| "tokens_trained": 0.192765568 |
| }, |
| { |
| "epoch": 0.5758865248226951, |
| "grad_norm": 1.304296612739563, |
| "loss": 5.1978, |
| "lr": 0.0007861538461538463, |
| "step": 2030, |
| "tokens_trained": 0.192956176 |
| }, |
| { |
| "epoch": 0.5764539007092199, |
| "grad_norm": 1.2884007692337036, |
| "loss": 5.1945, |
| "lr": 0.0007858741258741259, |
| "step": 2032, |
| "tokens_trained": 0.193146208 |
| }, |
| { |
| "epoch": 0.5770212765957446, |
| "grad_norm": 1.4838171005249023, |
| "loss": 5.1348, |
| "lr": 0.0007855944055944056, |
| "step": 2034, |
| "tokens_trained": 0.193335664 |
| }, |
| { |
| "epoch": 0.5775886524822695, |
| "grad_norm": 1.456529974937439, |
| "loss": 5.2284, |
| "lr": 0.0007853146853146853, |
| "step": 2036, |
| "tokens_trained": 0.193525216 |
| }, |
| { |
| "epoch": 0.5781560283687943, |
| "grad_norm": 1.3471657037734985, |
| "loss": 5.2101, |
| "lr": 0.0007850349650349651, |
| "step": 2038, |
| "tokens_trained": 0.19371268 |
| }, |
| { |
| "epoch": 0.5787234042553191, |
| "grad_norm": 1.3996837139129639, |
| "loss": 5.1828, |
| "lr": 0.0007847552447552448, |
| "step": 2040, |
| "tokens_trained": 0.193903536 |
| }, |
| { |
| "epoch": 0.579290780141844, |
| "grad_norm": 1.4071470499038696, |
| "loss": 5.1724, |
| "lr": 0.0007844755244755245, |
| "step": 2042, |
| "tokens_trained": 0.194092384 |
| }, |
| { |
| "epoch": 0.5798581560283688, |
| "grad_norm": 1.4125159978866577, |
| "loss": 5.1602, |
| "lr": 0.0007841958041958041, |
| "step": 2044, |
| "tokens_trained": 0.19428356 |
| }, |
| { |
| "epoch": 0.5804255319148937, |
| "grad_norm": 1.3602298498153687, |
| "loss": 5.1904, |
| "lr": 0.0007839160839160839, |
| "step": 2046, |
| "tokens_trained": 0.194473352 |
| }, |
| { |
| "epoch": 0.5809929078014184, |
| "grad_norm": 1.2836074829101562, |
| "loss": 5.1648, |
| "lr": 0.0007836363636363637, |
| "step": 2048, |
| "tokens_trained": 0.194663624 |
| }, |
| { |
| "epoch": 0.5815602836879432, |
| "grad_norm": 1.306192398071289, |
| "loss": 5.2037, |
| "lr": 0.0007833566433566434, |
| "step": 2050, |
| "tokens_trained": 0.194854 |
| }, |
| { |
| "epoch": 0.5821276595744681, |
| "grad_norm": 1.3130674362182617, |
| "loss": 5.223, |
| "lr": 0.0007830769230769231, |
| "step": 2052, |
| "tokens_trained": 0.195044368 |
| }, |
| { |
| "epoch": 0.5826950354609929, |
| "grad_norm": 1.2337714433670044, |
| "loss": 5.1609, |
| "lr": 0.0007827972027972028, |
| "step": 2054, |
| "tokens_trained": 0.195237064 |
| }, |
| { |
| "epoch": 0.5832624113475178, |
| "grad_norm": 1.2249869108200073, |
| "loss": 5.1352, |
| "lr": 0.0007825174825174826, |
| "step": 2056, |
| "tokens_trained": 0.195425016 |
| }, |
| { |
| "epoch": 0.5838297872340426, |
| "grad_norm": 1.2610726356506348, |
| "loss": 5.2304, |
| "lr": 0.0007822377622377622, |
| "step": 2058, |
| "tokens_trained": 0.195614488 |
| }, |
| { |
| "epoch": 0.5843971631205673, |
| "grad_norm": 1.1917920112609863, |
| "loss": 5.1964, |
| "lr": 0.000781958041958042, |
| "step": 2060, |
| "tokens_trained": 0.19580392 |
| }, |
| { |
| "epoch": 0.5849645390070922, |
| "grad_norm": 1.2248187065124512, |
| "loss": 5.0901, |
| "lr": 0.0007816783216783216, |
| "step": 2062, |
| "tokens_trained": 0.195993096 |
| }, |
| { |
| "epoch": 0.585531914893617, |
| "grad_norm": 1.4138745069503784, |
| "loss": 5.1806, |
| "lr": 0.0007813986013986014, |
| "step": 2064, |
| "tokens_trained": 0.196183824 |
| }, |
| { |
| "epoch": 0.5860992907801419, |
| "grad_norm": 1.389195442199707, |
| "loss": 5.1813, |
| "lr": 0.0007811188811188812, |
| "step": 2066, |
| "tokens_trained": 0.196373912 |
| }, |
| { |
| "epoch": 0.5866666666666667, |
| "grad_norm": 1.2737247943878174, |
| "loss": 5.1935, |
| "lr": 0.0007808391608391609, |
| "step": 2068, |
| "tokens_trained": 0.196564696 |
| }, |
| { |
| "epoch": 0.5872340425531914, |
| "grad_norm": 1.443703293800354, |
| "loss": 5.2376, |
| "lr": 0.0007805594405594406, |
| "step": 2070, |
| "tokens_trained": 0.196754472 |
| }, |
| { |
| "epoch": 0.5878014184397163, |
| "grad_norm": 1.367251992225647, |
| "loss": 5.2505, |
| "lr": 0.0007802797202797202, |
| "step": 2072, |
| "tokens_trained": 0.196945288 |
| }, |
| { |
| "epoch": 0.5883687943262411, |
| "grad_norm": 1.4049919843673706, |
| "loss": 5.2155, |
| "lr": 0.0007800000000000001, |
| "step": 2074, |
| "tokens_trained": 0.197135328 |
| }, |
| { |
| "epoch": 0.588936170212766, |
| "grad_norm": 1.5119894742965698, |
| "loss": 5.189, |
| "lr": 0.0007797202797202797, |
| "step": 2076, |
| "tokens_trained": 0.197325152 |
| }, |
| { |
| "epoch": 0.5895035460992908, |
| "grad_norm": 1.349288821220398, |
| "loss": 5.1626, |
| "lr": 0.0007794405594405595, |
| "step": 2078, |
| "tokens_trained": 0.197514576 |
| }, |
| { |
| "epoch": 0.5900709219858156, |
| "grad_norm": 1.2594739198684692, |
| "loss": 5.2222, |
| "lr": 0.0007791608391608391, |
| "step": 2080, |
| "tokens_trained": 0.197705064 |
| }, |
| { |
| "epoch": 0.5906382978723405, |
| "grad_norm": 1.0747008323669434, |
| "loss": 5.1669, |
| "lr": 0.0007788811188811189, |
| "step": 2082, |
| "tokens_trained": 0.197895032 |
| }, |
| { |
| "epoch": 0.5912056737588652, |
| "grad_norm": 1.1089273691177368, |
| "loss": 5.1071, |
| "lr": 0.0007786013986013987, |
| "step": 2084, |
| "tokens_trained": 0.198085832 |
| }, |
| { |
| "epoch": 0.5917730496453901, |
| "grad_norm": 1.153296709060669, |
| "loss": 5.1483, |
| "lr": 0.0007783216783216783, |
| "step": 2086, |
| "tokens_trained": 0.198272104 |
| }, |
| { |
| "epoch": 0.5923404255319149, |
| "grad_norm": 1.1960811614990234, |
| "loss": 5.1703, |
| "lr": 0.0007780419580419581, |
| "step": 2088, |
| "tokens_trained": 0.198459976 |
| }, |
| { |
| "epoch": 0.5929078014184397, |
| "grad_norm": 1.073548674583435, |
| "loss": 5.2449, |
| "lr": 0.0007777622377622377, |
| "step": 2090, |
| "tokens_trained": 0.198648376 |
| }, |
| { |
| "epoch": 0.5934751773049646, |
| "grad_norm": 1.233362078666687, |
| "loss": 5.1987, |
| "lr": 0.0007774825174825176, |
| "step": 2092, |
| "tokens_trained": 0.198839144 |
| }, |
| { |
| "epoch": 0.5940425531914894, |
| "grad_norm": 1.3649506568908691, |
| "loss": 5.183, |
| "lr": 0.0007772027972027972, |
| "step": 2094, |
| "tokens_trained": 0.199029064 |
| }, |
| { |
| "epoch": 0.5946099290780141, |
| "grad_norm": 1.2620112895965576, |
| "loss": 5.1343, |
| "lr": 0.000776923076923077, |
| "step": 2096, |
| "tokens_trained": 0.199218376 |
| }, |
| { |
| "epoch": 0.595177304964539, |
| "grad_norm": 1.3836737871170044, |
| "loss": 5.248, |
| "lr": 0.0007766433566433566, |
| "step": 2098, |
| "tokens_trained": 0.199407736 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 1.3027995824813843, |
| "loss": 5.1813, |
| "lr": 0.0007763636363636363, |
| "step": 2100, |
| "tokens_trained": 0.199597888 |
| }, |
| { |
| "epoch": 0.5963120567375887, |
| "grad_norm": 1.2857698202133179, |
| "loss": 5.2111, |
| "lr": 0.0007760839160839162, |
| "step": 2102, |
| "tokens_trained": 0.19978852 |
| }, |
| { |
| "epoch": 0.5968794326241135, |
| "grad_norm": 1.3470538854599, |
| "loss": 5.1505, |
| "lr": 0.0007758041958041958, |
| "step": 2104, |
| "tokens_trained": 0.199978536 |
| }, |
| { |
| "epoch": 0.5974468085106382, |
| "grad_norm": 1.230573058128357, |
| "loss": 5.1222, |
| "lr": 0.0007755244755244756, |
| "step": 2106, |
| "tokens_trained": 0.200170024 |
| }, |
| { |
| "epoch": 0.5980141843971631, |
| "grad_norm": 1.2551500797271729, |
| "loss": 5.1297, |
| "lr": 0.0007752447552447552, |
| "step": 2108, |
| "tokens_trained": 0.20035992 |
| }, |
| { |
| "epoch": 0.5985815602836879, |
| "grad_norm": 1.2162272930145264, |
| "loss": 5.233, |
| "lr": 0.0007749650349650351, |
| "step": 2110, |
| "tokens_trained": 0.200548976 |
| }, |
| { |
| "epoch": 0.5991489361702128, |
| "grad_norm": 1.2617305517196655, |
| "loss": 5.2118, |
| "lr": 0.0007746853146853147, |
| "step": 2112, |
| "tokens_trained": 0.200740656 |
| }, |
| { |
| "epoch": 0.5997163120567376, |
| "grad_norm": 1.4057862758636475, |
| "loss": 5.2215, |
| "lr": 0.0007744055944055944, |
| "step": 2114, |
| "tokens_trained": 0.200930944 |
| }, |
| { |
| "epoch": 0.6002836879432624, |
| "grad_norm": 1.3729593753814697, |
| "loss": 5.1773, |
| "lr": 0.0007741258741258741, |
| "step": 2116, |
| "tokens_trained": 0.201122528 |
| }, |
| { |
| "epoch": 0.6008510638297873, |
| "grad_norm": 1.3300920724868774, |
| "loss": 5.1573, |
| "lr": 0.0007738461538461538, |
| "step": 2118, |
| "tokens_trained": 0.201310224 |
| }, |
| { |
| "epoch": 0.601418439716312, |
| "grad_norm": 1.33209228515625, |
| "loss": 5.1523, |
| "lr": 0.0007735664335664337, |
| "step": 2120, |
| "tokens_trained": 0.201499048 |
| }, |
| { |
| "epoch": 0.6019858156028369, |
| "grad_norm": 1.1407768726348877, |
| "loss": 5.1453, |
| "lr": 0.0007732867132867133, |
| "step": 2122, |
| "tokens_trained": 0.201688872 |
| }, |
| { |
| "epoch": 0.6025531914893617, |
| "grad_norm": 1.1250742673873901, |
| "loss": 5.173, |
| "lr": 0.0007730069930069931, |
| "step": 2124, |
| "tokens_trained": 0.201880504 |
| }, |
| { |
| "epoch": 0.6028368794326241, |
| "eval_loss": 5.190411567687988, |
| "eval_runtime": 20.812, |
| "step": 2125, |
| "tokens_trained": 0.201976984 |
| }, |
| { |
| "epoch": 0.6031205673758865, |
| "grad_norm": 1.2974287271499634, |
| "loss": 5.1878, |
| "lr": 0.0007727272727272727, |
| "step": 2126, |
| "tokens_trained": 0.20207104 |
| }, |
| { |
| "epoch": 0.6036879432624114, |
| "grad_norm": 1.251120924949646, |
| "loss": 5.203, |
| "lr": 0.0007724475524475525, |
| "step": 2128, |
| "tokens_trained": 0.202261848 |
| }, |
| { |
| "epoch": 0.6042553191489362, |
| "grad_norm": 1.3494654893875122, |
| "loss": 5.1981, |
| "lr": 0.0007721678321678322, |
| "step": 2130, |
| "tokens_trained": 0.202452936 |
| }, |
| { |
| "epoch": 0.604822695035461, |
| "grad_norm": 1.2586653232574463, |
| "loss": 5.1786, |
| "lr": 0.0007718881118881119, |
| "step": 2132, |
| "tokens_trained": 0.202642168 |
| }, |
| { |
| "epoch": 0.6053900709219858, |
| "grad_norm": 1.228868842124939, |
| "loss": 5.1651, |
| "lr": 0.0007716083916083916, |
| "step": 2134, |
| "tokens_trained": 0.202830528 |
| }, |
| { |
| "epoch": 0.6059574468085106, |
| "grad_norm": 1.25627863407135, |
| "loss": 5.2033, |
| "lr": 0.0007713286713286713, |
| "step": 2136, |
| "tokens_trained": 0.203022216 |
| }, |
| { |
| "epoch": 0.6065248226950355, |
| "grad_norm": 1.1568467617034912, |
| "loss": 5.1659, |
| "lr": 0.0007710489510489512, |
| "step": 2138, |
| "tokens_trained": 0.203211696 |
| }, |
| { |
| "epoch": 0.6070921985815603, |
| "grad_norm": 1.1502138376235962, |
| "loss": 5.1935, |
| "lr": 0.0007707692307692308, |
| "step": 2140, |
| "tokens_trained": 0.203403224 |
| }, |
| { |
| "epoch": 0.6076595744680852, |
| "grad_norm": 1.2491158246994019, |
| "loss": 5.1367, |
| "lr": 0.0007704895104895105, |
| "step": 2142, |
| "tokens_trained": 0.203594912 |
| }, |
| { |
| "epoch": 0.6082269503546099, |
| "grad_norm": 1.3012075424194336, |
| "loss": 5.1954, |
| "lr": 0.0007702097902097902, |
| "step": 2144, |
| "tokens_trained": 0.203787032 |
| }, |
| { |
| "epoch": 0.6087943262411347, |
| "grad_norm": 1.2956688404083252, |
| "loss": 5.2255, |
| "lr": 0.0007699300699300699, |
| "step": 2146, |
| "tokens_trained": 0.203979064 |
| }, |
| { |
| "epoch": 0.6093617021276596, |
| "grad_norm": 1.3562579154968262, |
| "loss": 5.2371, |
| "lr": 0.0007696503496503497, |
| "step": 2148, |
| "tokens_trained": 0.20416828 |
| }, |
| { |
| "epoch": 0.6099290780141844, |
| "grad_norm": 1.2726640701293945, |
| "loss": 5.154, |
| "lr": 0.0007693706293706294, |
| "step": 2150, |
| "tokens_trained": 0.20435532 |
| }, |
| { |
| "epoch": 0.6104964539007092, |
| "grad_norm": 1.1975597143173218, |
| "loss": 5.1559, |
| "lr": 0.000769090909090909, |
| "step": 2152, |
| "tokens_trained": 0.204545416 |
| }, |
| { |
| "epoch": 0.6110638297872341, |
| "grad_norm": 1.2840410470962524, |
| "loss": 5.2558, |
| "lr": 0.0007688111888111888, |
| "step": 2154, |
| "tokens_trained": 0.204734752 |
| }, |
| { |
| "epoch": 0.6116312056737588, |
| "grad_norm": 1.4807062149047852, |
| "loss": 5.229, |
| "lr": 0.0007685314685314686, |
| "step": 2156, |
| "tokens_trained": 0.204925432 |
| }, |
| { |
| "epoch": 0.6121985815602837, |
| "grad_norm": 1.3909307718276978, |
| "loss": 5.2128, |
| "lr": 0.0007682517482517483, |
| "step": 2158, |
| "tokens_trained": 0.205117624 |
| }, |
| { |
| "epoch": 0.6127659574468085, |
| "grad_norm": 1.3998613357543945, |
| "loss": 5.1344, |
| "lr": 0.000767972027972028, |
| "step": 2160, |
| "tokens_trained": 0.205309032 |
| }, |
| { |
| "epoch": 0.6133333333333333, |
| "grad_norm": 1.3821474313735962, |
| "loss": 5.2223, |
| "lr": 0.0007676923076923077, |
| "step": 2162, |
| "tokens_trained": 0.205498112 |
| }, |
| { |
| "epoch": 0.6139007092198582, |
| "grad_norm": 1.280150294303894, |
| "loss": 5.1357, |
| "lr": 0.0007674125874125874, |
| "step": 2164, |
| "tokens_trained": 0.205686112 |
| }, |
| { |
| "epoch": 0.614468085106383, |
| "grad_norm": 1.2361094951629639, |
| "loss": 5.1285, |
| "lr": 0.0007671328671328672, |
| "step": 2166, |
| "tokens_trained": 0.20587828 |
| }, |
| { |
| "epoch": 0.6150354609929078, |
| "grad_norm": 1.1495496034622192, |
| "loss": 5.1597, |
| "lr": 0.0007668531468531469, |
| "step": 2168, |
| "tokens_trained": 0.206068272 |
| }, |
| { |
| "epoch": 0.6156028368794326, |
| "grad_norm": 1.2377156019210815, |
| "loss": 5.1208, |
| "lr": 0.0007665734265734265, |
| "step": 2170, |
| "tokens_trained": 0.206257272 |
| }, |
| { |
| "epoch": 0.6161702127659574, |
| "grad_norm": 1.226664423942566, |
| "loss": 5.2143, |
| "lr": 0.0007662937062937063, |
| "step": 2172, |
| "tokens_trained": 0.206449824 |
| }, |
| { |
| "epoch": 0.6167375886524823, |
| "grad_norm": 1.1939537525177002, |
| "loss": 5.0847, |
| "lr": 0.000766013986013986, |
| "step": 2174, |
| "tokens_trained": 0.206636992 |
| }, |
| { |
| "epoch": 0.6173049645390071, |
| "grad_norm": 1.233585238456726, |
| "loss": 5.1647, |
| "lr": 0.0007657342657342658, |
| "step": 2176, |
| "tokens_trained": 0.206828288 |
| }, |
| { |
| "epoch": 0.617872340425532, |
| "grad_norm": 1.3282006978988647, |
| "loss": 5.1748, |
| "lr": 0.0007654545454545455, |
| "step": 2178, |
| "tokens_trained": 0.207019064 |
| }, |
| { |
| "epoch": 0.6184397163120567, |
| "grad_norm": 1.2299532890319824, |
| "loss": 5.248, |
| "lr": 0.0007651748251748251, |
| "step": 2180, |
| "tokens_trained": 0.20720844 |
| }, |
| { |
| "epoch": 0.6190070921985815, |
| "grad_norm": 1.279590129852295, |
| "loss": 5.1467, |
| "lr": 0.0007648951048951049, |
| "step": 2182, |
| "tokens_trained": 0.207398952 |
| }, |
| { |
| "epoch": 0.6195744680851064, |
| "grad_norm": 1.30775785446167, |
| "loss": 5.1981, |
| "lr": 0.0007646153846153846, |
| "step": 2184, |
| "tokens_trained": 0.207589224 |
| }, |
| { |
| "epoch": 0.6201418439716312, |
| "grad_norm": 1.2829056978225708, |
| "loss": 5.1976, |
| "lr": 0.0007643356643356644, |
| "step": 2186, |
| "tokens_trained": 0.20778024 |
| }, |
| { |
| "epoch": 0.6207092198581561, |
| "grad_norm": 1.2149474620819092, |
| "loss": 5.2186, |
| "lr": 0.000764055944055944, |
| "step": 2188, |
| "tokens_trained": 0.207969176 |
| }, |
| { |
| "epoch": 0.6212765957446809, |
| "grad_norm": 1.239912748336792, |
| "loss": 5.15, |
| "lr": 0.0007637762237762238, |
| "step": 2190, |
| "tokens_trained": 0.208159016 |
| }, |
| { |
| "epoch": 0.6218439716312056, |
| "grad_norm": 1.322252869606018, |
| "loss": 5.2447, |
| "lr": 0.0007634965034965035, |
| "step": 2192, |
| "tokens_trained": 0.2083502 |
| }, |
| { |
| "epoch": 0.6224113475177305, |
| "grad_norm": 1.1804618835449219, |
| "loss": 5.1924, |
| "lr": 0.0007632167832167833, |
| "step": 2194, |
| "tokens_trained": 0.208539616 |
| }, |
| { |
| "epoch": 0.6229787234042553, |
| "grad_norm": 1.2914003133773804, |
| "loss": 5.1559, |
| "lr": 0.000762937062937063, |
| "step": 2196, |
| "tokens_trained": 0.208731032 |
| }, |
| { |
| "epoch": 0.6235460992907801, |
| "grad_norm": 1.2175878286361694, |
| "loss": 5.1335, |
| "lr": 0.0007626573426573426, |
| "step": 2198, |
| "tokens_trained": 0.208923952 |
| }, |
| { |
| "epoch": 0.624113475177305, |
| "grad_norm": 1.2267946004867554, |
| "loss": 5.1697, |
| "lr": 0.0007623776223776224, |
| "step": 2200, |
| "tokens_trained": 0.20911168 |
| }, |
| { |
| "epoch": 0.6246808510638298, |
| "grad_norm": 1.2482635974884033, |
| "loss": 5.1986, |
| "lr": 0.0007620979020979021, |
| "step": 2202, |
| "tokens_trained": 0.209299504 |
| }, |
| { |
| "epoch": 0.6252482269503546, |
| "grad_norm": 1.3256076574325562, |
| "loss": 5.1955, |
| "lr": 0.0007618181818181819, |
| "step": 2204, |
| "tokens_trained": 0.20948936 |
| }, |
| { |
| "epoch": 0.6258156028368794, |
| "grad_norm": 1.205692172050476, |
| "loss": 5.1175, |
| "lr": 0.0007615384615384615, |
| "step": 2206, |
| "tokens_trained": 0.209678072 |
| }, |
| { |
| "epoch": 0.6263829787234042, |
| "grad_norm": 1.2371326684951782, |
| "loss": 5.1798, |
| "lr": 0.0007612587412587412, |
| "step": 2208, |
| "tokens_trained": 0.209868904 |
| }, |
| { |
| "epoch": 0.6269503546099291, |
| "grad_norm": 1.1657975912094116, |
| "loss": 5.159, |
| "lr": 0.000760979020979021, |
| "step": 2210, |
| "tokens_trained": 0.210060992 |
| }, |
| { |
| "epoch": 0.6275177304964539, |
| "grad_norm": 1.18202543258667, |
| "loss": 5.2157, |
| "lr": 0.0007606993006993007, |
| "step": 2212, |
| "tokens_trained": 0.210252096 |
| }, |
| { |
| "epoch": 0.6280851063829788, |
| "grad_norm": 1.220446228981018, |
| "loss": 5.1677, |
| "lr": 0.0007604195804195805, |
| "step": 2214, |
| "tokens_trained": 0.210444176 |
| }, |
| { |
| "epoch": 0.6286524822695035, |
| "grad_norm": 1.1070069074630737, |
| "loss": 5.1702, |
| "lr": 0.0007601398601398601, |
| "step": 2216, |
| "tokens_trained": 0.210633376 |
| }, |
| { |
| "epoch": 0.6292198581560283, |
| "grad_norm": 1.3031543493270874, |
| "loss": 5.2253, |
| "lr": 0.0007598601398601399, |
| "step": 2218, |
| "tokens_trained": 0.21082368 |
| }, |
| { |
| "epoch": 0.6297872340425532, |
| "grad_norm": 1.0999404191970825, |
| "loss": 5.1942, |
| "lr": 0.0007595804195804196, |
| "step": 2220, |
| "tokens_trained": 0.211013448 |
| }, |
| { |
| "epoch": 0.630354609929078, |
| "grad_norm": 1.2241060733795166, |
| "loss": 5.1408, |
| "lr": 0.0007593006993006993, |
| "step": 2222, |
| "tokens_trained": 0.211205176 |
| }, |
| { |
| "epoch": 0.6309219858156029, |
| "grad_norm": 1.3057242631912231, |
| "loss": 5.2234, |
| "lr": 0.000759020979020979, |
| "step": 2224, |
| "tokens_trained": 0.211396464 |
| }, |
| { |
| "epoch": 0.6314893617021277, |
| "grad_norm": 1.2667888402938843, |
| "loss": 5.1675, |
| "lr": 0.0007587412587412587, |
| "step": 2226, |
| "tokens_trained": 0.211587608 |
| }, |
| { |
| "epoch": 0.6320567375886524, |
| "grad_norm": 1.1653670072555542, |
| "loss": 5.2081, |
| "lr": 0.0007584615384615385, |
| "step": 2228, |
| "tokens_trained": 0.211779832 |
| }, |
| { |
| "epoch": 0.6326241134751773, |
| "grad_norm": 1.1786928176879883, |
| "loss": 5.1772, |
| "lr": 0.0007581818181818182, |
| "step": 2230, |
| "tokens_trained": 0.211971584 |
| }, |
| { |
| "epoch": 0.6331914893617021, |
| "grad_norm": 1.242872714996338, |
| "loss": 5.1378, |
| "lr": 0.000757902097902098, |
| "step": 2232, |
| "tokens_trained": 0.212161024 |
| }, |
| { |
| "epoch": 0.633758865248227, |
| "grad_norm": 1.2831401824951172, |
| "loss": 5.1488, |
| "lr": 0.0007576223776223776, |
| "step": 2234, |
| "tokens_trained": 0.21235084 |
| }, |
| { |
| "epoch": 0.6343262411347518, |
| "grad_norm": 1.269600510597229, |
| "loss": 5.1454, |
| "lr": 0.0007573426573426573, |
| "step": 2236, |
| "tokens_trained": 0.212539504 |
| }, |
| { |
| "epoch": 0.6348936170212766, |
| "grad_norm": 1.2224805355072021, |
| "loss": 5.1123, |
| "lr": 0.0007570629370629371, |
| "step": 2238, |
| "tokens_trained": 0.21272884 |
| }, |
| { |
| "epoch": 0.6354609929078014, |
| "grad_norm": 1.2404342889785767, |
| "loss": 5.2023, |
| "lr": 0.0007567832167832168, |
| "step": 2240, |
| "tokens_trained": 0.212920128 |
| }, |
| { |
| "epoch": 0.6360283687943262, |
| "grad_norm": 1.1551696062088013, |
| "loss": 5.1529, |
| "lr": 0.0007565034965034965, |
| "step": 2242, |
| "tokens_trained": 0.213110744 |
| }, |
| { |
| "epoch": 0.6365957446808511, |
| "grad_norm": 1.2342238426208496, |
| "loss": 5.182, |
| "lr": 0.0007562237762237762, |
| "step": 2244, |
| "tokens_trained": 0.213298584 |
| }, |
| { |
| "epoch": 0.6371631205673759, |
| "grad_norm": 1.2631146907806396, |
| "loss": 5.1442, |
| "lr": 0.000755944055944056, |
| "step": 2246, |
| "tokens_trained": 0.213488512 |
| }, |
| { |
| "epoch": 0.6377304964539007, |
| "grad_norm": 1.2031443119049072, |
| "loss": 5.1041, |
| "lr": 0.0007556643356643357, |
| "step": 2248, |
| "tokens_trained": 0.21367964 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 1.127889633178711, |
| "loss": 5.1889, |
| "lr": 0.0007553846153846154, |
| "step": 2250, |
| "tokens_trained": 0.213871584 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "eval_loss": 5.1714253425598145, |
| "eval_runtime": 20.5005, |
| "step": 2250, |
| "tokens_trained": 0.213871584 |
| }, |
| { |
| "epoch": 0.6388652482269503, |
| "grad_norm": 1.1281750202178955, |
| "loss": 5.1039, |
| "lr": 0.0007551048951048951, |
| "step": 2252, |
| "tokens_trained": 0.214061624 |
| }, |
| { |
| "epoch": 0.6394326241134751, |
| "grad_norm": 1.1058608293533325, |
| "loss": 5.1562, |
| "lr": 0.0007548251748251748, |
| "step": 2254, |
| "tokens_trained": 0.214252024 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.0579496622085571, |
| "loss": 5.1476, |
| "lr": 0.0007545454545454546, |
| "step": 2256, |
| "tokens_trained": 0.214442624 |
| }, |
| { |
| "epoch": 0.6405673758865248, |
| "grad_norm": 1.1370742321014404, |
| "loss": 5.1948, |
| "lr": 0.0007542657342657343, |
| "step": 2258, |
| "tokens_trained": 0.214634016 |
| }, |
| { |
| "epoch": 0.6411347517730497, |
| "grad_norm": 1.1118457317352295, |
| "loss": 5.169, |
| "lr": 0.000753986013986014, |
| "step": 2260, |
| "tokens_trained": 0.214823368 |
| }, |
| { |
| "epoch": 0.6417021276595745, |
| "grad_norm": 1.039004921913147, |
| "loss": 5.1454, |
| "lr": 0.0007537062937062937, |
| "step": 2262, |
| "tokens_trained": 0.21501196 |
| }, |
| { |
| "epoch": 0.6422695035460992, |
| "grad_norm": 1.2534265518188477, |
| "loss": 5.1455, |
| "lr": 0.0007534265734265734, |
| "step": 2264, |
| "tokens_trained": 0.215200808 |
| }, |
| { |
| "epoch": 0.6428368794326241, |
| "grad_norm": 1.2437689304351807, |
| "loss": 5.1966, |
| "lr": 0.0007531468531468532, |
| "step": 2266, |
| "tokens_trained": 0.21539036 |
| }, |
| { |
| "epoch": 0.6434042553191489, |
| "grad_norm": 1.1795995235443115, |
| "loss": 5.1716, |
| "lr": 0.0007528671328671329, |
| "step": 2268, |
| "tokens_trained": 0.215582088 |
| }, |
| { |
| "epoch": 0.6439716312056738, |
| "grad_norm": 1.3241360187530518, |
| "loss": 5.1638, |
| "lr": 0.0007525874125874126, |
| "step": 2270, |
| "tokens_trained": 0.215771936 |
| }, |
| { |
| "epoch": 0.6445390070921986, |
| "grad_norm": 1.2526317834854126, |
| "loss": 5.1067, |
| "lr": 0.0007523076923076923, |
| "step": 2272, |
| "tokens_trained": 0.215960792 |
| }, |
| { |
| "epoch": 0.6451063829787234, |
| "grad_norm": 1.249042272567749, |
| "loss": 5.1466, |
| "lr": 0.0007520279720279721, |
| "step": 2274, |
| "tokens_trained": 0.216151448 |
| }, |
| { |
| "epoch": 0.6456737588652482, |
| "grad_norm": 1.1926413774490356, |
| "loss": 5.1886, |
| "lr": 0.0007517482517482518, |
| "step": 2276, |
| "tokens_trained": 0.216340368 |
| }, |
| { |
| "epoch": 0.646241134751773, |
| "grad_norm": 1.1615192890167236, |
| "loss": 5.1538, |
| "lr": 0.0007514685314685314, |
| "step": 2278, |
| "tokens_trained": 0.216531264 |
| }, |
| { |
| "epoch": 0.6468085106382979, |
| "grad_norm": 1.1265521049499512, |
| "loss": 5.1518, |
| "lr": 0.0007511888111888112, |
| "step": 2280, |
| "tokens_trained": 0.216722024 |
| }, |
| { |
| "epoch": 0.6473758865248227, |
| "grad_norm": 1.0598393678665161, |
| "loss": 5.1776, |
| "lr": 0.0007509090909090909, |
| "step": 2282, |
| "tokens_trained": 0.216913232 |
| }, |
| { |
| "epoch": 0.6479432624113475, |
| "grad_norm": 1.1727370023727417, |
| "loss": 5.2083, |
| "lr": 0.0007506293706293707, |
| "step": 2284, |
| "tokens_trained": 0.217103136 |
| }, |
| { |
| "epoch": 0.6485106382978724, |
| "grad_norm": 1.1411634683609009, |
| "loss": 5.182, |
| "lr": 0.0007503496503496504, |
| "step": 2286, |
| "tokens_trained": 0.21729368 |
| }, |
| { |
| "epoch": 0.6490780141843971, |
| "grad_norm": 1.2293574810028076, |
| "loss": 5.1725, |
| "lr": 0.00075006993006993, |
| "step": 2288, |
| "tokens_trained": 0.217485624 |
| }, |
| { |
| "epoch": 0.649645390070922, |
| "grad_norm": 1.3079198598861694, |
| "loss": 5.1531, |
| "lr": 0.0007497902097902098, |
| "step": 2290, |
| "tokens_trained": 0.217675192 |
| }, |
| { |
| "epoch": 0.6502127659574468, |
| "grad_norm": 1.1579710245132446, |
| "loss": 5.1162, |
| "lr": 0.0007495104895104895, |
| "step": 2292, |
| "tokens_trained": 0.2178658 |
| }, |
| { |
| "epoch": 0.6507801418439716, |
| "grad_norm": 1.1968539953231812, |
| "loss": 5.1652, |
| "lr": 0.0007492307692307693, |
| "step": 2294, |
| "tokens_trained": 0.218057984 |
| }, |
| { |
| "epoch": 0.6513475177304965, |
| "grad_norm": 1.3666965961456299, |
| "loss": 5.2035, |
| "lr": 0.0007489510489510489, |
| "step": 2296, |
| "tokens_trained": 0.218249704 |
| }, |
| { |
| "epoch": 0.6519148936170213, |
| "grad_norm": 1.3615487813949585, |
| "loss": 5.1704, |
| "lr": 0.0007486713286713287, |
| "step": 2298, |
| "tokens_trained": 0.218441792 |
| }, |
| { |
| "epoch": 0.6524822695035462, |
| "grad_norm": 1.2289810180664062, |
| "loss": 5.1683, |
| "lr": 0.0007483916083916084, |
| "step": 2300, |
| "tokens_trained": 0.218630624 |
| }, |
| { |
| "epoch": 0.6530496453900709, |
| "grad_norm": 1.1299561262130737, |
| "loss": 5.1672, |
| "lr": 0.0007481118881118882, |
| "step": 2302, |
| "tokens_trained": 0.218819928 |
| }, |
| { |
| "epoch": 0.6536170212765957, |
| "grad_norm": 1.186132550239563, |
| "loss": 5.1456, |
| "lr": 0.0007478321678321679, |
| "step": 2304, |
| "tokens_trained": 0.219008792 |
| }, |
| { |
| "epoch": 0.6541843971631206, |
| "grad_norm": 1.2106919288635254, |
| "loss": 5.1998, |
| "lr": 0.0007475524475524475, |
| "step": 2306, |
| "tokens_trained": 0.219198584 |
| }, |
| { |
| "epoch": 0.6547517730496454, |
| "grad_norm": 1.2485368251800537, |
| "loss": 5.1473, |
| "lr": 0.0007472727272727273, |
| "step": 2308, |
| "tokens_trained": 0.219386768 |
| }, |
| { |
| "epoch": 0.6553191489361702, |
| "grad_norm": 1.1855547428131104, |
| "loss": 5.1721, |
| "lr": 0.000746993006993007, |
| "step": 2310, |
| "tokens_trained": 0.219575904 |
| }, |
| { |
| "epoch": 0.655886524822695, |
| "grad_norm": 1.3077043294906616, |
| "loss": 5.1444, |
| "lr": 0.0007467132867132868, |
| "step": 2312, |
| "tokens_trained": 0.219767712 |
| }, |
| { |
| "epoch": 0.6564539007092198, |
| "grad_norm": 1.3514399528503418, |
| "loss": 5.198, |
| "lr": 0.0007464335664335664, |
| "step": 2314, |
| "tokens_trained": 0.219959384 |
| }, |
| { |
| "epoch": 0.6570212765957447, |
| "grad_norm": 1.0906041860580444, |
| "loss": 5.115, |
| "lr": 0.0007461538461538462, |
| "step": 2316, |
| "tokens_trained": 0.2201464 |
| }, |
| { |
| "epoch": 0.6575886524822695, |
| "grad_norm": 1.154425859451294, |
| "loss": 5.1186, |
| "lr": 0.0007458741258741259, |
| "step": 2318, |
| "tokens_trained": 0.220336992 |
| }, |
| { |
| "epoch": 0.6581560283687943, |
| "grad_norm": 1.1141375303268433, |
| "loss": 5.1709, |
| "lr": 0.0007455944055944056, |
| "step": 2320, |
| "tokens_trained": 0.220525928 |
| }, |
| { |
| "epoch": 0.6587234042553192, |
| "grad_norm": 1.0958452224731445, |
| "loss": 5.1641, |
| "lr": 0.0007453146853146854, |
| "step": 2322, |
| "tokens_trained": 0.220715056 |
| }, |
| { |
| "epoch": 0.659290780141844, |
| "grad_norm": 1.168017029762268, |
| "loss": 5.1666, |
| "lr": 0.000745034965034965, |
| "step": 2324, |
| "tokens_trained": 0.220905264 |
| }, |
| { |
| "epoch": 0.6598581560283688, |
| "grad_norm": 1.044488549232483, |
| "loss": 5.2079, |
| "lr": 0.0007447552447552448, |
| "step": 2326, |
| "tokens_trained": 0.221096736 |
| }, |
| { |
| "epoch": 0.6604255319148936, |
| "grad_norm": 1.2333874702453613, |
| "loss": 5.1166, |
| "lr": 0.0007444755244755245, |
| "step": 2328, |
| "tokens_trained": 0.221287184 |
| }, |
| { |
| "epoch": 0.6609929078014184, |
| "grad_norm": 1.1800497770309448, |
| "loss": 5.1561, |
| "lr": 0.0007441958041958043, |
| "step": 2330, |
| "tokens_trained": 0.221477312 |
| }, |
| { |
| "epoch": 0.6615602836879433, |
| "grad_norm": 1.118755578994751, |
| "loss": 5.1513, |
| "lr": 0.0007439160839160839, |
| "step": 2332, |
| "tokens_trained": 0.221665208 |
| }, |
| { |
| "epoch": 0.6621276595744681, |
| "grad_norm": 1.2018475532531738, |
| "loss": 5.1007, |
| "lr": 0.0007436363636363636, |
| "step": 2334, |
| "tokens_trained": 0.221855608 |
| }, |
| { |
| "epoch": 0.662695035460993, |
| "grad_norm": 1.1832036972045898, |
| "loss": 5.0944, |
| "lr": 0.0007433566433566433, |
| "step": 2336, |
| "tokens_trained": 0.222043856 |
| }, |
| { |
| "epoch": 0.6632624113475177, |
| "grad_norm": 1.3179196119308472, |
| "loss": 5.1645, |
| "lr": 0.0007430769230769231, |
| "step": 2338, |
| "tokens_trained": 0.222235728 |
| }, |
| { |
| "epoch": 0.6638297872340425, |
| "grad_norm": 1.1313154697418213, |
| "loss": 5.1733, |
| "lr": 0.0007427972027972029, |
| "step": 2340, |
| "tokens_trained": 0.222424688 |
| }, |
| { |
| "epoch": 0.6643971631205674, |
| "grad_norm": 1.2135043144226074, |
| "loss": 5.1291, |
| "lr": 0.0007425174825174825, |
| "step": 2342, |
| "tokens_trained": 0.222611952 |
| }, |
| { |
| "epoch": 0.6649645390070922, |
| "grad_norm": 1.2418344020843506, |
| "loss": 5.178, |
| "lr": 0.0007422377622377622, |
| "step": 2344, |
| "tokens_trained": 0.222803264 |
| }, |
| { |
| "epoch": 0.6655319148936171, |
| "grad_norm": 1.2896099090576172, |
| "loss": 5.1772, |
| "lr": 0.000741958041958042, |
| "step": 2346, |
| "tokens_trained": 0.22299108 |
| }, |
| { |
| "epoch": 0.6660992907801419, |
| "grad_norm": 1.150012731552124, |
| "loss": 5.1334, |
| "lr": 0.0007416783216783217, |
| "step": 2348, |
| "tokens_trained": 0.223182336 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.307721495628357, |
| "loss": 5.0898, |
| "lr": 0.0007413986013986014, |
| "step": 2350, |
| "tokens_trained": 0.223371664 |
| }, |
| { |
| "epoch": 0.6672340425531915, |
| "grad_norm": 1.2633092403411865, |
| "loss": 5.1344, |
| "lr": 0.0007411188811188811, |
| "step": 2352, |
| "tokens_trained": 0.223561984 |
| }, |
| { |
| "epoch": 0.6678014184397163, |
| "grad_norm": 1.1801539659500122, |
| "loss": 5.1242, |
| "lr": 0.0007408391608391608, |
| "step": 2354, |
| "tokens_trained": 0.223750344 |
| }, |
| { |
| "epoch": 0.6683687943262412, |
| "grad_norm": 1.1279330253601074, |
| "loss": 5.1348, |
| "lr": 0.0007405594405594406, |
| "step": 2356, |
| "tokens_trained": 0.223941528 |
| }, |
| { |
| "epoch": 0.668936170212766, |
| "grad_norm": 1.193912148475647, |
| "loss": 5.1823, |
| "lr": 0.0007402797202797204, |
| "step": 2358, |
| "tokens_trained": 0.224132064 |
| }, |
| { |
| "epoch": 0.6695035460992907, |
| "grad_norm": 1.1424062252044678, |
| "loss": 5.1452, |
| "lr": 0.00074, |
| "step": 2360, |
| "tokens_trained": 0.2243216 |
| }, |
| { |
| "epoch": 0.6700709219858156, |
| "grad_norm": 1.1543093919754028, |
| "loss": 5.1199, |
| "lr": 0.0007397202797202797, |
| "step": 2362, |
| "tokens_trained": 0.224509992 |
| }, |
| { |
| "epoch": 0.6706382978723404, |
| "grad_norm": 1.2291040420532227, |
| "loss": 5.0824, |
| "lr": 0.0007394405594405595, |
| "step": 2364, |
| "tokens_trained": 0.22470124 |
| }, |
| { |
| "epoch": 0.6712056737588652, |
| "grad_norm": 1.1839559078216553, |
| "loss": 5.1486, |
| "lr": 0.0007391608391608392, |
| "step": 2366, |
| "tokens_trained": 0.224893488 |
| }, |
| { |
| "epoch": 0.6717730496453901, |
| "grad_norm": 1.1374263763427734, |
| "loss": 5.1482, |
| "lr": 0.0007388811188811189, |
| "step": 2368, |
| "tokens_trained": 0.225083304 |
| }, |
| { |
| "epoch": 0.6723404255319149, |
| "grad_norm": 1.2041044235229492, |
| "loss": 5.1055, |
| "lr": 0.0007386013986013986, |
| "step": 2370, |
| "tokens_trained": 0.225273256 |
| }, |
| { |
| "epoch": 0.6729078014184398, |
| "grad_norm": 1.1405609846115112, |
| "loss": 5.1647, |
| "lr": 0.0007383216783216782, |
| "step": 2372, |
| "tokens_trained": 0.225461976 |
| }, |
| { |
| "epoch": 0.6734751773049645, |
| "grad_norm": 1.112979531288147, |
| "loss": 5.1232, |
| "lr": 0.0007380419580419581, |
| "step": 2374, |
| "tokens_trained": 0.225651248 |
| }, |
| { |
| "epoch": 0.6737588652482269, |
| "eval_loss": 5.160866737365723, |
| "eval_runtime": 20.3049, |
| "step": 2375, |
| "tokens_trained": 0.22574612 |
| }, |
| { |
| "epoch": 0.6740425531914893, |
| "grad_norm": 1.2868081331253052, |
| "loss": 5.1802, |
| "lr": 0.0007377622377622378, |
| "step": 2376, |
| "tokens_trained": 0.225840616 |
| }, |
| { |
| "epoch": 0.6746099290780142, |
| "grad_norm": 1.0904244184494019, |
| "loss": 5.1093, |
| "lr": 0.0007374825174825175, |
| "step": 2378, |
| "tokens_trained": 0.22602952 |
| }, |
| { |
| "epoch": 0.675177304964539, |
| "grad_norm": 1.182820200920105, |
| "loss": 5.1425, |
| "lr": 0.0007372027972027972, |
| "step": 2380, |
| "tokens_trained": 0.226219912 |
| }, |
| { |
| "epoch": 0.6757446808510639, |
| "grad_norm": 1.29615318775177, |
| "loss": 5.2044, |
| "lr": 0.000736923076923077, |
| "step": 2382, |
| "tokens_trained": 0.226409832 |
| }, |
| { |
| "epoch": 0.6763120567375887, |
| "grad_norm": 1.2440109252929688, |
| "loss": 5.1722, |
| "lr": 0.0007366433566433567, |
| "step": 2384, |
| "tokens_trained": 0.226600912 |
| }, |
| { |
| "epoch": 0.6768794326241134, |
| "grad_norm": 1.2176823616027832, |
| "loss": 5.1237, |
| "lr": 0.0007363636363636363, |
| "step": 2386, |
| "tokens_trained": 0.226788136 |
| }, |
| { |
| "epoch": 0.6774468085106383, |
| "grad_norm": 1.1725387573242188, |
| "loss": 5.1334, |
| "lr": 0.0007360839160839161, |
| "step": 2388, |
| "tokens_trained": 0.22697924 |
| }, |
| { |
| "epoch": 0.6780141843971631, |
| "grad_norm": 1.0678813457489014, |
| "loss": 5.1306, |
| "lr": 0.0007358041958041957, |
| "step": 2390, |
| "tokens_trained": 0.227169576 |
| }, |
| { |
| "epoch": 0.678581560283688, |
| "grad_norm": 1.1266731023788452, |
| "loss": 5.1956, |
| "lr": 0.0007355244755244756, |
| "step": 2392, |
| "tokens_trained": 0.227361776 |
| }, |
| { |
| "epoch": 0.6791489361702128, |
| "grad_norm": 1.2048848867416382, |
| "loss": 5.1599, |
| "lr": 0.0007352447552447553, |
| "step": 2394, |
| "tokens_trained": 0.227551768 |
| }, |
| { |
| "epoch": 0.6797163120567375, |
| "grad_norm": 1.2414182424545288, |
| "loss": 5.1836, |
| "lr": 0.000734965034965035, |
| "step": 2396, |
| "tokens_trained": 0.227743072 |
| }, |
| { |
| "epoch": 0.6802836879432624, |
| "grad_norm": 1.1587010622024536, |
| "loss": 5.1589, |
| "lr": 0.0007346853146853147, |
| "step": 2398, |
| "tokens_trained": 0.227933848 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 1.1487596035003662, |
| "loss": 5.1494, |
| "lr": 0.0007344055944055944, |
| "step": 2400, |
| "tokens_trained": 0.228122304 |
| }, |
| { |
| "epoch": 0.6814184397163121, |
| "grad_norm": 1.1008368730545044, |
| "loss": 5.1614, |
| "lr": 0.0007341258741258742, |
| "step": 2402, |
| "tokens_trained": 0.228311624 |
| }, |
| { |
| "epoch": 0.6819858156028369, |
| "grad_norm": 1.0571539402008057, |
| "loss": 5.1373, |
| "lr": 0.0007338461538461538, |
| "step": 2404, |
| "tokens_trained": 0.228501208 |
| }, |
| { |
| "epoch": 0.6825531914893617, |
| "grad_norm": 1.1685987710952759, |
| "loss": 5.1439, |
| "lr": 0.0007335664335664336, |
| "step": 2406, |
| "tokens_trained": 0.228691272 |
| }, |
| { |
| "epoch": 0.6831205673758866, |
| "grad_norm": 1.2319012880325317, |
| "loss": 5.1949, |
| "lr": 0.0007332867132867132, |
| "step": 2408, |
| "tokens_trained": 0.228881608 |
| }, |
| { |
| "epoch": 0.6836879432624113, |
| "grad_norm": 1.1806107759475708, |
| "loss": 5.1467, |
| "lr": 0.0007330069930069931, |
| "step": 2410, |
| "tokens_trained": 0.229073152 |
| }, |
| { |
| "epoch": 0.6842553191489362, |
| "grad_norm": 1.1616697311401367, |
| "loss": 5.1553, |
| "lr": 0.0007327272727272728, |
| "step": 2412, |
| "tokens_trained": 0.229263656 |
| }, |
| { |
| "epoch": 0.684822695035461, |
| "grad_norm": 1.143112063407898, |
| "loss": 5.091, |
| "lr": 0.0007324475524475524, |
| "step": 2414, |
| "tokens_trained": 0.229454224 |
| }, |
| { |
| "epoch": 0.6853900709219858, |
| "grad_norm": 1.2467398643493652, |
| "loss": 5.1778, |
| "lr": 0.0007321678321678322, |
| "step": 2416, |
| "tokens_trained": 0.22964568 |
| }, |
| { |
| "epoch": 0.6859574468085107, |
| "grad_norm": 1.1989973783493042, |
| "loss": 5.146, |
| "lr": 0.0007318881118881119, |
| "step": 2418, |
| "tokens_trained": 0.229836448 |
| }, |
| { |
| "epoch": 0.6865248226950355, |
| "grad_norm": 1.3296927213668823, |
| "loss": 5.1446, |
| "lr": 0.0007316083916083917, |
| "step": 2420, |
| "tokens_trained": 0.230027424 |
| }, |
| { |
| "epoch": 0.6870921985815602, |
| "grad_norm": 1.256990671157837, |
| "loss": 5.1396, |
| "lr": 0.0007313286713286713, |
| "step": 2422, |
| "tokens_trained": 0.23022012 |
| }, |
| { |
| "epoch": 0.6876595744680851, |
| "grad_norm": 1.1474595069885254, |
| "loss": 5.1263, |
| "lr": 0.0007310489510489511, |
| "step": 2424, |
| "tokens_trained": 0.230410232 |
| }, |
| { |
| "epoch": 0.6882269503546099, |
| "grad_norm": 1.2070049047470093, |
| "loss": 5.1169, |
| "lr": 0.0007307692307692307, |
| "step": 2426, |
| "tokens_trained": 0.230601056 |
| }, |
| { |
| "epoch": 0.6887943262411348, |
| "grad_norm": 1.2047003507614136, |
| "loss": 5.1146, |
| "lr": 0.0007304895104895105, |
| "step": 2428, |
| "tokens_trained": 0.230791056 |
| }, |
| { |
| "epoch": 0.6893617021276596, |
| "grad_norm": 1.3246855735778809, |
| "loss": 5.1864, |
| "lr": 0.0007302097902097902, |
| "step": 2430, |
| "tokens_trained": 0.230981904 |
| }, |
| { |
| "epoch": 0.6899290780141843, |
| "grad_norm": 1.2012712955474854, |
| "loss": 5.168, |
| "lr": 0.0007299300699300699, |
| "step": 2432, |
| "tokens_trained": 0.231170976 |
| }, |
| { |
| "epoch": 0.6904964539007092, |
| "grad_norm": 1.2258418798446655, |
| "loss": 5.14, |
| "lr": 0.0007296503496503497, |
| "step": 2434, |
| "tokens_trained": 0.231362024 |
| }, |
| { |
| "epoch": 0.691063829787234, |
| "grad_norm": 1.2767595052719116, |
| "loss": 5.1775, |
| "lr": 0.0007293706293706294, |
| "step": 2436, |
| "tokens_trained": 0.23155 |
| }, |
| { |
| "epoch": 0.6916312056737589, |
| "grad_norm": 1.204324722290039, |
| "loss": 5.1357, |
| "lr": 0.0007290909090909092, |
| "step": 2438, |
| "tokens_trained": 0.231739944 |
| }, |
| { |
| "epoch": 0.6921985815602837, |
| "grad_norm": 1.1876553297042847, |
| "loss": 5.1185, |
| "lr": 0.0007288111888111888, |
| "step": 2440, |
| "tokens_trained": 0.231930448 |
| }, |
| { |
| "epoch": 0.6927659574468085, |
| "grad_norm": 1.2512568235397339, |
| "loss": 5.1212, |
| "lr": 0.0007285314685314685, |
| "step": 2442, |
| "tokens_trained": 0.23212152 |
| }, |
| { |
| "epoch": 0.6933333333333334, |
| "grad_norm": 1.2961020469665527, |
| "loss": 5.0622, |
| "lr": 0.0007282517482517482, |
| "step": 2444, |
| "tokens_trained": 0.232310856 |
| }, |
| { |
| "epoch": 0.6939007092198581, |
| "grad_norm": 1.1042410135269165, |
| "loss": 5.1317, |
| "lr": 0.000727972027972028, |
| "step": 2446, |
| "tokens_trained": 0.232499144 |
| }, |
| { |
| "epoch": 0.694468085106383, |
| "grad_norm": 1.0408610105514526, |
| "loss": 5.1562, |
| "lr": 0.0007276923076923077, |
| "step": 2448, |
| "tokens_trained": 0.232689864 |
| }, |
| { |
| "epoch": 0.6950354609929078, |
| "grad_norm": 1.1109600067138672, |
| "loss": 5.1463, |
| "lr": 0.0007274125874125874, |
| "step": 2450, |
| "tokens_trained": 0.232878912 |
| }, |
| { |
| "epoch": 0.6956028368794326, |
| "grad_norm": 1.0867618322372437, |
| "loss": 5.105, |
| "lr": 0.0007271328671328672, |
| "step": 2452, |
| "tokens_trained": 0.233069416 |
| }, |
| { |
| "epoch": 0.6961702127659575, |
| "grad_norm": 1.0342003107070923, |
| "loss": 5.1431, |
| "lr": 0.0007268531468531469, |
| "step": 2454, |
| "tokens_trained": 0.233258552 |
| }, |
| { |
| "epoch": 0.6967375886524823, |
| "grad_norm": 1.2264306545257568, |
| "loss": 5.1646, |
| "lr": 0.0007265734265734266, |
| "step": 2456, |
| "tokens_trained": 0.233448464 |
| }, |
| { |
| "epoch": 0.6973049645390071, |
| "grad_norm": 1.1715648174285889, |
| "loss": 5.1194, |
| "lr": 0.0007262937062937063, |
| "step": 2458, |
| "tokens_trained": 0.23364024 |
| }, |
| { |
| "epoch": 0.6978723404255319, |
| "grad_norm": 1.05716872215271, |
| "loss": 5.09, |
| "lr": 0.000726013986013986, |
| "step": 2460, |
| "tokens_trained": 0.233829848 |
| }, |
| { |
| "epoch": 0.6984397163120567, |
| "grad_norm": 1.1329678297042847, |
| "loss": 5.1303, |
| "lr": 0.0007257342657342657, |
| "step": 2462, |
| "tokens_trained": 0.234021368 |
| }, |
| { |
| "epoch": 0.6990070921985816, |
| "grad_norm": 1.2084178924560547, |
| "loss": 5.1393, |
| "lr": 0.0007254545454545455, |
| "step": 2464, |
| "tokens_trained": 0.234210264 |
| }, |
| { |
| "epoch": 0.6995744680851064, |
| "grad_norm": 1.0744361877441406, |
| "loss": 5.1067, |
| "lr": 0.0007251748251748252, |
| "step": 2466, |
| "tokens_trained": 0.234399616 |
| }, |
| { |
| "epoch": 0.7001418439716312, |
| "grad_norm": 1.1711128950119019, |
| "loss": 5.1226, |
| "lr": 0.0007248951048951049, |
| "step": 2468, |
| "tokens_trained": 0.234589936 |
| }, |
| { |
| "epoch": 0.700709219858156, |
| "grad_norm": 1.2188383340835571, |
| "loss": 5.1139, |
| "lr": 0.0007246153846153846, |
| "step": 2470, |
| "tokens_trained": 0.234781376 |
| }, |
| { |
| "epoch": 0.7012765957446808, |
| "grad_norm": 1.1662676334381104, |
| "loss": 5.137, |
| "lr": 0.0007243356643356644, |
| "step": 2472, |
| "tokens_trained": 0.234972192 |
| }, |
| { |
| "epoch": 0.7018439716312057, |
| "grad_norm": 1.18717622756958, |
| "loss": 5.1665, |
| "lr": 0.0007240559440559441, |
| "step": 2474, |
| "tokens_trained": 0.235162472 |
| }, |
| { |
| "epoch": 0.7024113475177305, |
| "grad_norm": 1.1546517610549927, |
| "loss": 5.1503, |
| "lr": 0.0007237762237762238, |
| "step": 2476, |
| "tokens_trained": 0.23535256 |
| }, |
| { |
| "epoch": 0.7029787234042553, |
| "grad_norm": 1.0647573471069336, |
| "loss": 5.155, |
| "lr": 0.0007234965034965035, |
| "step": 2478, |
| "tokens_trained": 0.235543424 |
| }, |
| { |
| "epoch": 0.7035460992907802, |
| "grad_norm": 1.1157219409942627, |
| "loss": 5.1561, |
| "lr": 0.0007232167832167831, |
| "step": 2480, |
| "tokens_trained": 0.23573568 |
| }, |
| { |
| "epoch": 0.7041134751773049, |
| "grad_norm": 1.1972934007644653, |
| "loss": 5.1271, |
| "lr": 0.000722937062937063, |
| "step": 2482, |
| "tokens_trained": 0.235927072 |
| }, |
| { |
| "epoch": 0.7046808510638298, |
| "grad_norm": 1.0370620489120483, |
| "loss": 5.1016, |
| "lr": 0.0007226573426573426, |
| "step": 2484, |
| "tokens_trained": 0.236116528 |
| }, |
| { |
| "epoch": 0.7052482269503546, |
| "grad_norm": 1.1389620304107666, |
| "loss": 5.1422, |
| "lr": 0.0007223776223776224, |
| "step": 2486, |
| "tokens_trained": 0.236305864 |
| }, |
| { |
| "epoch": 0.7058156028368794, |
| "grad_norm": 1.1045559644699097, |
| "loss": 5.1434, |
| "lr": 0.0007220979020979021, |
| "step": 2488, |
| "tokens_trained": 0.236494224 |
| }, |
| { |
| "epoch": 0.7063829787234043, |
| "grad_norm": 1.1014395952224731, |
| "loss": 5.1462, |
| "lr": 0.0007218181818181819, |
| "step": 2490, |
| "tokens_trained": 0.236684376 |
| }, |
| { |
| "epoch": 0.706950354609929, |
| "grad_norm": 1.0460759401321411, |
| "loss": 5.126, |
| "lr": 0.0007215384615384616, |
| "step": 2492, |
| "tokens_trained": 0.236875272 |
| }, |
| { |
| "epoch": 0.707517730496454, |
| "grad_norm": 1.0848767757415771, |
| "loss": 5.1387, |
| "lr": 0.0007212587412587412, |
| "step": 2494, |
| "tokens_trained": 0.237065552 |
| }, |
| { |
| "epoch": 0.7080851063829787, |
| "grad_norm": 1.1626802682876587, |
| "loss": 5.1509, |
| "lr": 0.000720979020979021, |
| "step": 2496, |
| "tokens_trained": 0.237254944 |
| }, |
| { |
| "epoch": 0.7086524822695035, |
| "grad_norm": 1.1846860647201538, |
| "loss": 5.098, |
| "lr": 0.0007206993006993006, |
| "step": 2498, |
| "tokens_trained": 0.237444488 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "grad_norm": 1.2549248933792114, |
| "loss": 5.1104, |
| "lr": 0.0007204195804195805, |
| "step": 2500, |
| "tokens_trained": 0.237633528 |
| }, |
| { |
| "epoch": 0.7092198581560284, |
| "eval_loss": 5.141824245452881, |
| "eval_runtime": 20.5081, |
| "step": 2500, |
| "tokens_trained": 0.237633528 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 7650, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 125, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|