| { |
| "best_global_step": 625, |
| "best_metric": 5.630118370056152, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/Gencode-BPE/checkpoint-625", |
| "epoch": 0.1773049645390071, |
| "eval_steps": 125, |
| "global_step": 625, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005673758865248227, |
| "grad_norm": 1275.0146484375, |
| "loss": 281.4781, |
| "lr": 2e-06, |
| "step": 2, |
| "tokens_trained": 0.000192256 |
| }, |
| { |
| "epoch": 0.0011347517730496454, |
| "grad_norm": 1437.579833984375, |
| "loss": 267.2211, |
| "lr": 6e-06, |
| "step": 4, |
| "tokens_trained": 0.000382024 |
| }, |
| { |
| "epoch": 0.001702127659574468, |
| "grad_norm": 1719.271484375, |
| "loss": 219.3822, |
| "lr": 1e-05, |
| "step": 6, |
| "tokens_trained": 0.00057072 |
| }, |
| { |
| "epoch": 0.0022695035460992908, |
| "grad_norm": 1444.94970703125, |
| "loss": 133.8172, |
| "lr": 1.4e-05, |
| "step": 8, |
| "tokens_trained": 0.000761336 |
| }, |
| { |
| "epoch": 0.0028368794326241137, |
| "grad_norm": 238.9689178466797, |
| "loss": 90.8177, |
| "lr": 1.8e-05, |
| "step": 10, |
| "tokens_trained": 0.000953248 |
| }, |
| { |
| "epoch": 0.003404255319148936, |
| "grad_norm": 158.53497314453125, |
| "loss": 84.6922, |
| "lr": 2.2e-05, |
| "step": 12, |
| "tokens_trained": 0.00114424 |
| }, |
| { |
| "epoch": 0.003971631205673759, |
| "grad_norm": 146.10595703125, |
| "loss": 76.7055, |
| "lr": 2.6e-05, |
| "step": 14, |
| "tokens_trained": 0.001334104 |
| }, |
| { |
| "epoch": 0.0045390070921985815, |
| "grad_norm": 140.69964599609375, |
| "loss": 67.9952, |
| "lr": 3e-05, |
| "step": 16, |
| "tokens_trained": 0.00152392 |
| }, |
| { |
| "epoch": 0.005106382978723404, |
| "grad_norm": 108.80303192138672, |
| "loss": 57.8088, |
| "lr": 3.4000000000000007e-05, |
| "step": 18, |
| "tokens_trained": 0.001713872 |
| }, |
| { |
| "epoch": 0.005673758865248227, |
| "grad_norm": 106.82334899902344, |
| "loss": 48.6585, |
| "lr": 3.8e-05, |
| "step": 20, |
| "tokens_trained": 0.001903976 |
| }, |
| { |
| "epoch": 0.00624113475177305, |
| "grad_norm": 93.58769989013672, |
| "loss": 41.7984, |
| "lr": 4.2000000000000004e-05, |
| "step": 22, |
| "tokens_trained": 0.002094288 |
| }, |
| { |
| "epoch": 0.006808510638297872, |
| "grad_norm": 87.5854721069336, |
| "loss": 37.6201, |
| "lr": 4.6e-05, |
| "step": 24, |
| "tokens_trained": 0.002282496 |
| }, |
| { |
| "epoch": 0.007375886524822695, |
| "grad_norm": 84.12794494628906, |
| "loss": 35.0091, |
| "lr": 5e-05, |
| "step": 26, |
| "tokens_trained": 0.00247068 |
| }, |
| { |
| "epoch": 0.007943262411347518, |
| "grad_norm": 79.77535247802734, |
| "loss": 33.2253, |
| "lr": 5.4e-05, |
| "step": 28, |
| "tokens_trained": 0.002662888 |
| }, |
| { |
| "epoch": 0.00851063829787234, |
| "grad_norm": 66.42157745361328, |
| "loss": 32.0682, |
| "lr": 5.800000000000001e-05, |
| "step": 30, |
| "tokens_trained": 0.002851968 |
| }, |
| { |
| "epoch": 0.009078014184397163, |
| "grad_norm": 87.52485656738281, |
| "loss": 30.893, |
| "lr": 6.2e-05, |
| "step": 32, |
| "tokens_trained": 0.003041384 |
| }, |
| { |
| "epoch": 0.009645390070921986, |
| "grad_norm": 58.33614730834961, |
| "loss": 30.0513, |
| "lr": 6.6e-05, |
| "step": 34, |
| "tokens_trained": 0.003232872 |
| }, |
| { |
| "epoch": 0.010212765957446808, |
| "grad_norm": 54.629329681396484, |
| "loss": 29.0115, |
| "lr": 7.000000000000001e-05, |
| "step": 36, |
| "tokens_trained": 0.003423824 |
| }, |
| { |
| "epoch": 0.01078014184397163, |
| "grad_norm": 52.79097366333008, |
| "loss": 28.2084, |
| "lr": 7.4e-05, |
| "step": 38, |
| "tokens_trained": 0.003613232 |
| }, |
| { |
| "epoch": 0.011347517730496455, |
| "grad_norm": 54.481224060058594, |
| "loss": 27.4345, |
| "lr": 7.8e-05, |
| "step": 40, |
| "tokens_trained": 0.003800952 |
| }, |
| { |
| "epoch": 0.011914893617021277, |
| "grad_norm": 58.7069091796875, |
| "loss": 26.5936, |
| "lr": 8.2e-05, |
| "step": 42, |
| "tokens_trained": 0.003991512 |
| }, |
| { |
| "epoch": 0.0124822695035461, |
| "grad_norm": 49.30760955810547, |
| "loss": 26.0608, |
| "lr": 8.599999999999999e-05, |
| "step": 44, |
| "tokens_trained": 0.004180648 |
| }, |
| { |
| "epoch": 0.013049645390070922, |
| "grad_norm": 61.902587890625, |
| "loss": 25.5363, |
| "lr": 8.999999999999999e-05, |
| "step": 46, |
| "tokens_trained": 0.00437148 |
| }, |
| { |
| "epoch": 0.013617021276595745, |
| "grad_norm": 46.76111602783203, |
| "loss": 24.9599, |
| "lr": 9.400000000000001e-05, |
| "step": 48, |
| "tokens_trained": 0.004559344 |
| }, |
| { |
| "epoch": 0.014184397163120567, |
| "grad_norm": 57.06416702270508, |
| "loss": 24.4087, |
| "lr": 9.800000000000001e-05, |
| "step": 50, |
| "tokens_trained": 0.004749256 |
| }, |
| { |
| "epoch": 0.01475177304964539, |
| "grad_norm": 44.798736572265625, |
| "loss": 24.1444, |
| "lr": 0.000102, |
| "step": 52, |
| "tokens_trained": 0.004940192 |
| }, |
| { |
| "epoch": 0.015319148936170212, |
| "grad_norm": 40.29296875, |
| "loss": 23.6011, |
| "lr": 0.000106, |
| "step": 54, |
| "tokens_trained": 0.005130304 |
| }, |
| { |
| "epoch": 0.015886524822695036, |
| "grad_norm": 38.75099563598633, |
| "loss": 23.1781, |
| "lr": 0.00011, |
| "step": 56, |
| "tokens_trained": 0.005322864 |
| }, |
| { |
| "epoch": 0.016453900709219857, |
| "grad_norm": 37.470706939697266, |
| "loss": 22.9136, |
| "lr": 0.000114, |
| "step": 58, |
| "tokens_trained": 0.00551392 |
| }, |
| { |
| "epoch": 0.01702127659574468, |
| "grad_norm": 35.1894645690918, |
| "loss": 22.6336, |
| "lr": 0.000118, |
| "step": 60, |
| "tokens_trained": 0.005703096 |
| }, |
| { |
| "epoch": 0.017588652482269502, |
| "grad_norm": 35.136573791503906, |
| "loss": 22.2998, |
| "lr": 0.000122, |
| "step": 62, |
| "tokens_trained": 0.005892448 |
| }, |
| { |
| "epoch": 0.018156028368794326, |
| "grad_norm": 38.05111312866211, |
| "loss": 21.9401, |
| "lr": 0.000126, |
| "step": 64, |
| "tokens_trained": 0.006081656 |
| }, |
| { |
| "epoch": 0.01872340425531915, |
| "grad_norm": 35.63850021362305, |
| "loss": 21.7206, |
| "lr": 0.00013000000000000002, |
| "step": 66, |
| "tokens_trained": 0.006273032 |
| }, |
| { |
| "epoch": 0.01929078014184397, |
| "grad_norm": 34.327667236328125, |
| "loss": 21.4051, |
| "lr": 0.000134, |
| "step": 68, |
| "tokens_trained": 0.00646304 |
| }, |
| { |
| "epoch": 0.019858156028368795, |
| "grad_norm": 31.457059860229492, |
| "loss": 21.0774, |
| "lr": 0.00013800000000000002, |
| "step": 70, |
| "tokens_trained": 0.006652832 |
| }, |
| { |
| "epoch": 0.020425531914893616, |
| "grad_norm": 34.91672897338867, |
| "loss": 20.8718, |
| "lr": 0.00014199999999999998, |
| "step": 72, |
| "tokens_trained": 0.006843512 |
| }, |
| { |
| "epoch": 0.02099290780141844, |
| "grad_norm": 27.959579467773438, |
| "loss": 20.6932, |
| "lr": 0.000146, |
| "step": 74, |
| "tokens_trained": 0.007033584 |
| }, |
| { |
| "epoch": 0.02156028368794326, |
| "grad_norm": 26.569866180419922, |
| "loss": 20.4072, |
| "lr": 0.00015, |
| "step": 76, |
| "tokens_trained": 0.007224032 |
| }, |
| { |
| "epoch": 0.022127659574468085, |
| "grad_norm": 28.009904861450195, |
| "loss": 20.2229, |
| "lr": 0.000154, |
| "step": 78, |
| "tokens_trained": 0.00741368 |
| }, |
| { |
| "epoch": 0.02269503546099291, |
| "grad_norm": 28.892959594726562, |
| "loss": 20.0528, |
| "lr": 0.000158, |
| "step": 80, |
| "tokens_trained": 0.00760416 |
| }, |
| { |
| "epoch": 0.02326241134751773, |
| "grad_norm": 31.58131980895996, |
| "loss": 19.8016, |
| "lr": 0.000162, |
| "step": 82, |
| "tokens_trained": 0.007793952 |
| }, |
| { |
| "epoch": 0.023829787234042554, |
| "grad_norm": 31.01254653930664, |
| "loss": 19.634, |
| "lr": 0.00016600000000000002, |
| "step": 84, |
| "tokens_trained": 0.007980792 |
| }, |
| { |
| "epoch": 0.024397163120567375, |
| "grad_norm": 28.732515335083008, |
| "loss": 19.3777, |
| "lr": 0.00017, |
| "step": 86, |
| "tokens_trained": 0.008171968 |
| }, |
| { |
| "epoch": 0.0249645390070922, |
| "grad_norm": 24.31264877319336, |
| "loss": 19.1346, |
| "lr": 0.000174, |
| "step": 88, |
| "tokens_trained": 0.008361632 |
| }, |
| { |
| "epoch": 0.02553191489361702, |
| "grad_norm": 26.557010650634766, |
| "loss": 19.0014, |
| "lr": 0.000178, |
| "step": 90, |
| "tokens_trained": 0.008552328 |
| }, |
| { |
| "epoch": 0.026099290780141844, |
| "grad_norm": 21.156103134155273, |
| "loss": 18.7032, |
| "lr": 0.000182, |
| "step": 92, |
| "tokens_trained": 0.008743136 |
| }, |
| { |
| "epoch": 0.02666666666666667, |
| "grad_norm": 25.7484188079834, |
| "loss": 18.4836, |
| "lr": 0.000186, |
| "step": 94, |
| "tokens_trained": 0.008932056 |
| }, |
| { |
| "epoch": 0.02723404255319149, |
| "grad_norm": 22.27949333190918, |
| "loss": 18.2233, |
| "lr": 0.00019, |
| "step": 96, |
| "tokens_trained": 0.009121608 |
| }, |
| { |
| "epoch": 0.027801418439716313, |
| "grad_norm": 24.9247989654541, |
| "loss": 17.9867, |
| "lr": 0.000194, |
| "step": 98, |
| "tokens_trained": 0.009311008 |
| }, |
| { |
| "epoch": 0.028368794326241134, |
| "grad_norm": 24.302066802978516, |
| "loss": 17.8016, |
| "lr": 0.00019800000000000002, |
| "step": 100, |
| "tokens_trained": 0.009501456 |
| }, |
| { |
| "epoch": 0.02893617021276596, |
| "grad_norm": 23.458459854125977, |
| "loss": 17.6295, |
| "lr": 0.000202, |
| "step": 102, |
| "tokens_trained": 0.009693952 |
| }, |
| { |
| "epoch": 0.02950354609929078, |
| "grad_norm": 24.092350006103516, |
| "loss": 17.4593, |
| "lr": 0.000206, |
| "step": 104, |
| "tokens_trained": 0.009883328 |
| }, |
| { |
| "epoch": 0.030070921985815603, |
| "grad_norm": 22.54726219177246, |
| "loss": 17.2141, |
| "lr": 0.00021, |
| "step": 106, |
| "tokens_trained": 0.01007316 |
| }, |
| { |
| "epoch": 0.030638297872340424, |
| "grad_norm": 21.334760665893555, |
| "loss": 17.044, |
| "lr": 0.000214, |
| "step": 108, |
| "tokens_trained": 0.010266504 |
| }, |
| { |
| "epoch": 0.031205673758865248, |
| "grad_norm": 20.584287643432617, |
| "loss": 16.8919, |
| "lr": 0.000218, |
| "step": 110, |
| "tokens_trained": 0.010455736 |
| }, |
| { |
| "epoch": 0.03177304964539007, |
| "grad_norm": 23.51676368713379, |
| "loss": 16.751, |
| "lr": 0.000222, |
| "step": 112, |
| "tokens_trained": 0.010645208 |
| }, |
| { |
| "epoch": 0.03234042553191489, |
| "grad_norm": 23.278276443481445, |
| "loss": 16.5997, |
| "lr": 0.00022600000000000002, |
| "step": 114, |
| "tokens_trained": 0.010838928 |
| }, |
| { |
| "epoch": 0.032907801418439714, |
| "grad_norm": 25.4830265045166, |
| "loss": 16.3416, |
| "lr": 0.00023, |
| "step": 116, |
| "tokens_trained": 0.011027792 |
| }, |
| { |
| "epoch": 0.03347517730496454, |
| "grad_norm": 29.442413330078125, |
| "loss": 16.24, |
| "lr": 0.00023400000000000002, |
| "step": 118, |
| "tokens_trained": 0.011217456 |
| }, |
| { |
| "epoch": 0.03404255319148936, |
| "grad_norm": 21.77578353881836, |
| "loss": 16.1922, |
| "lr": 0.00023799999999999998, |
| "step": 120, |
| "tokens_trained": 0.01140804 |
| }, |
| { |
| "epoch": 0.03460992907801418, |
| "grad_norm": 27.040719985961914, |
| "loss": 15.9059, |
| "lr": 0.000242, |
| "step": 122, |
| "tokens_trained": 0.011597816 |
| }, |
| { |
| "epoch": 0.035177304964539004, |
| "grad_norm": 24.74480628967285, |
| "loss": 15.7818, |
| "lr": 0.000246, |
| "step": 124, |
| "tokens_trained": 0.011785624 |
| }, |
| { |
| "epoch": 0.03546099290780142, |
| "eval_loss": 15.553059577941895, |
| "eval_runtime": 23.5485, |
| "step": 125, |
| "tokens_trained": 0.011880832 |
| }, |
| { |
| "epoch": 0.03574468085106383, |
| "grad_norm": 23.13482666015625, |
| "loss": 15.5739, |
| "lr": 0.00025, |
| "step": 126, |
| "tokens_trained": 0.011975976 |
| }, |
| { |
| "epoch": 0.03631205673758865, |
| "grad_norm": 22.8618106842041, |
| "loss": 15.4302, |
| "lr": 0.000254, |
| "step": 128, |
| "tokens_trained": 0.012166744 |
| }, |
| { |
| "epoch": 0.03687943262411347, |
| "grad_norm": 26.804859161376953, |
| "loss": 15.3623, |
| "lr": 0.00025800000000000004, |
| "step": 130, |
| "tokens_trained": 0.01235436 |
| }, |
| { |
| "epoch": 0.0374468085106383, |
| "grad_norm": 21.826601028442383, |
| "loss": 15.1465, |
| "lr": 0.000262, |
| "step": 132, |
| "tokens_trained": 0.012544976 |
| }, |
| { |
| "epoch": 0.03801418439716312, |
| "grad_norm": 39.447086334228516, |
| "loss": 15.0137, |
| "lr": 0.000266, |
| "step": 134, |
| "tokens_trained": 0.012736352 |
| }, |
| { |
| "epoch": 0.03858156028368794, |
| "grad_norm": 23.44275665283203, |
| "loss": 14.9355, |
| "lr": 0.00027, |
| "step": 136, |
| "tokens_trained": 0.012925008 |
| }, |
| { |
| "epoch": 0.03914893617021276, |
| "grad_norm": 21.631427764892578, |
| "loss": 14.6825, |
| "lr": 0.00027400000000000005, |
| "step": 138, |
| "tokens_trained": 0.013114672 |
| }, |
| { |
| "epoch": 0.03971631205673759, |
| "grad_norm": 23.674650192260742, |
| "loss": 14.5194, |
| "lr": 0.00027800000000000004, |
| "step": 140, |
| "tokens_trained": 0.013304016 |
| }, |
| { |
| "epoch": 0.04028368794326241, |
| "grad_norm": 23.974796295166016, |
| "loss": 14.4829, |
| "lr": 0.00028199999999999997, |
| "step": 142, |
| "tokens_trained": 0.013496696 |
| }, |
| { |
| "epoch": 0.04085106382978723, |
| "grad_norm": 26.112201690673828, |
| "loss": 14.3027, |
| "lr": 0.00028599999999999996, |
| "step": 144, |
| "tokens_trained": 0.013684816 |
| }, |
| { |
| "epoch": 0.04141843971631206, |
| "grad_norm": 20.67386817932129, |
| "loss": 14.1499, |
| "lr": 0.00029, |
| "step": 146, |
| "tokens_trained": 0.013874832 |
| }, |
| { |
| "epoch": 0.04198581560283688, |
| "grad_norm": 24.253408432006836, |
| "loss": 13.9378, |
| "lr": 0.000294, |
| "step": 148, |
| "tokens_trained": 0.014065056 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 35.716087341308594, |
| "loss": 14.0562, |
| "lr": 0.000298, |
| "step": 150, |
| "tokens_trained": 0.014256784 |
| }, |
| { |
| "epoch": 0.04312056737588652, |
| "grad_norm": 29.414331436157227, |
| "loss": 14.0462, |
| "lr": 0.000302, |
| "step": 152, |
| "tokens_trained": 0.014446312 |
| }, |
| { |
| "epoch": 0.04368794326241135, |
| "grad_norm": 30.687482833862305, |
| "loss": 13.7603, |
| "lr": 0.000306, |
| "step": 154, |
| "tokens_trained": 0.014639872 |
| }, |
| { |
| "epoch": 0.04425531914893617, |
| "grad_norm": 29.806455612182617, |
| "loss": 13.708, |
| "lr": 0.00031, |
| "step": 156, |
| "tokens_trained": 0.014831112 |
| }, |
| { |
| "epoch": 0.04482269503546099, |
| "grad_norm": 24.900897979736328, |
| "loss": 13.548, |
| "lr": 0.000314, |
| "step": 158, |
| "tokens_trained": 0.015021288 |
| }, |
| { |
| "epoch": 0.04539007092198582, |
| "grad_norm": 24.29252815246582, |
| "loss": 13.3119, |
| "lr": 0.00031800000000000003, |
| "step": 160, |
| "tokens_trained": 0.01521228 |
| }, |
| { |
| "epoch": 0.04595744680851064, |
| "grad_norm": 20.68342399597168, |
| "loss": 13.1829, |
| "lr": 0.000322, |
| "step": 162, |
| "tokens_trained": 0.015403688 |
| }, |
| { |
| "epoch": 0.04652482269503546, |
| "grad_norm": 20.822795867919922, |
| "loss": 12.9044, |
| "lr": 0.000326, |
| "step": 164, |
| "tokens_trained": 0.015593416 |
| }, |
| { |
| "epoch": 0.04709219858156028, |
| "grad_norm": 21.689916610717773, |
| "loss": 12.6862, |
| "lr": 0.00033, |
| "step": 166, |
| "tokens_trained": 0.015784408 |
| }, |
| { |
| "epoch": 0.04765957446808511, |
| "grad_norm": 17.873889923095703, |
| "loss": 12.5502, |
| "lr": 0.00033400000000000004, |
| "step": 168, |
| "tokens_trained": 0.0159744 |
| }, |
| { |
| "epoch": 0.04822695035460993, |
| "grad_norm": 18.951616287231445, |
| "loss": 12.308, |
| "lr": 0.00033800000000000003, |
| "step": 170, |
| "tokens_trained": 0.016163736 |
| }, |
| { |
| "epoch": 0.04879432624113475, |
| "grad_norm": 15.146363258361816, |
| "loss": 12.1558, |
| "lr": 0.000342, |
| "step": 172, |
| "tokens_trained": 0.016353832 |
| }, |
| { |
| "epoch": 0.04936170212765958, |
| "grad_norm": 18.336984634399414, |
| "loss": 12.0386, |
| "lr": 0.000346, |
| "step": 174, |
| "tokens_trained": 0.016545088 |
| }, |
| { |
| "epoch": 0.0499290780141844, |
| "grad_norm": 17.221126556396484, |
| "loss": 11.8791, |
| "lr": 0.00035, |
| "step": 176, |
| "tokens_trained": 0.016735704 |
| }, |
| { |
| "epoch": 0.05049645390070922, |
| "grad_norm": 19.362564086914062, |
| "loss": 11.7224, |
| "lr": 0.000354, |
| "step": 178, |
| "tokens_trained": 0.016927944 |
| }, |
| { |
| "epoch": 0.05106382978723404, |
| "grad_norm": 15.564507484436035, |
| "loss": 11.6448, |
| "lr": 0.000358, |
| "step": 180, |
| "tokens_trained": 0.017116096 |
| }, |
| { |
| "epoch": 0.05163120567375887, |
| "grad_norm": 20.711383819580078, |
| "loss": 11.4398, |
| "lr": 0.000362, |
| "step": 182, |
| "tokens_trained": 0.01730564 |
| }, |
| { |
| "epoch": 0.05219858156028369, |
| "grad_norm": 18.627403259277344, |
| "loss": 11.3377, |
| "lr": 0.000366, |
| "step": 184, |
| "tokens_trained": 0.017495864 |
| }, |
| { |
| "epoch": 0.05276595744680851, |
| "grad_norm": 15.00942325592041, |
| "loss": 11.1416, |
| "lr": 0.00037, |
| "step": 186, |
| "tokens_trained": 0.017686464 |
| }, |
| { |
| "epoch": 0.05333333333333334, |
| "grad_norm": 17.070598602294922, |
| "loss": 11.0148, |
| "lr": 0.000374, |
| "step": 188, |
| "tokens_trained": 0.017879488 |
| }, |
| { |
| "epoch": 0.05390070921985816, |
| "grad_norm": 16.101457595825195, |
| "loss": 10.8874, |
| "lr": 0.000378, |
| "step": 190, |
| "tokens_trained": 0.018068312 |
| }, |
| { |
| "epoch": 0.05446808510638298, |
| "grad_norm": 15.613334655761719, |
| "loss": 10.7055, |
| "lr": 0.000382, |
| "step": 192, |
| "tokens_trained": 0.018255752 |
| }, |
| { |
| "epoch": 0.0550354609929078, |
| "grad_norm": 17.671857833862305, |
| "loss": 10.5706, |
| "lr": 0.000386, |
| "step": 194, |
| "tokens_trained": 0.018447096 |
| }, |
| { |
| "epoch": 0.05560283687943263, |
| "grad_norm": 16.080909729003906, |
| "loss": 10.4476, |
| "lr": 0.00039000000000000005, |
| "step": 196, |
| "tokens_trained": 0.018637264 |
| }, |
| { |
| "epoch": 0.05617021276595745, |
| "grad_norm": 15.02849292755127, |
| "loss": 10.2962, |
| "lr": 0.00039400000000000004, |
| "step": 198, |
| "tokens_trained": 0.018827552 |
| }, |
| { |
| "epoch": 0.05673758865248227, |
| "grad_norm": 14.990167617797852, |
| "loss": 10.1912, |
| "lr": 0.000398, |
| "step": 200, |
| "tokens_trained": 0.019018 |
| }, |
| { |
| "epoch": 0.05730496453900709, |
| "grad_norm": 15.390633583068848, |
| "loss": 10.0442, |
| "lr": 0.000402, |
| "step": 202, |
| "tokens_trained": 0.019209864 |
| }, |
| { |
| "epoch": 0.05787234042553192, |
| "grad_norm": 16.871570587158203, |
| "loss": 9.9685, |
| "lr": 0.00040600000000000006, |
| "step": 204, |
| "tokens_trained": 0.019400176 |
| }, |
| { |
| "epoch": 0.05843971631205674, |
| "grad_norm": 20.16544532775879, |
| "loss": 9.8531, |
| "lr": 0.00041, |
| "step": 206, |
| "tokens_trained": 0.019589424 |
| }, |
| { |
| "epoch": 0.05900709219858156, |
| "grad_norm": 16.825023651123047, |
| "loss": 9.7777, |
| "lr": 0.000414, |
| "step": 208, |
| "tokens_trained": 0.019779112 |
| }, |
| { |
| "epoch": 0.059574468085106386, |
| "grad_norm": 16.43510627746582, |
| "loss": 9.6122, |
| "lr": 0.00041799999999999997, |
| "step": 210, |
| "tokens_trained": 0.019970048 |
| }, |
| { |
| "epoch": 0.060141843971631206, |
| "grad_norm": 17.340473175048828, |
| "loss": 9.4859, |
| "lr": 0.000422, |
| "step": 212, |
| "tokens_trained": 0.020160968 |
| }, |
| { |
| "epoch": 0.06070921985815603, |
| "grad_norm": 15.019119262695312, |
| "loss": 9.3656, |
| "lr": 0.000426, |
| "step": 214, |
| "tokens_trained": 0.020349664 |
| }, |
| { |
| "epoch": 0.06127659574468085, |
| "grad_norm": 13.379194259643555, |
| "loss": 9.2348, |
| "lr": 0.00043, |
| "step": 216, |
| "tokens_trained": 0.020538192 |
| }, |
| { |
| "epoch": 0.061843971631205676, |
| "grad_norm": 16.71472930908203, |
| "loss": 9.2258, |
| "lr": 0.00043400000000000003, |
| "step": 218, |
| "tokens_trained": 0.020728936 |
| }, |
| { |
| "epoch": 0.062411347517730496, |
| "grad_norm": 12.743139266967773, |
| "loss": 9.0569, |
| "lr": 0.000438, |
| "step": 220, |
| "tokens_trained": 0.020917472 |
| }, |
| { |
| "epoch": 0.06297872340425532, |
| "grad_norm": 15.739934921264648, |
| "loss": 8.9623, |
| "lr": 0.000442, |
| "step": 222, |
| "tokens_trained": 0.02110928 |
| }, |
| { |
| "epoch": 0.06354609929078014, |
| "grad_norm": 14.23620891571045, |
| "loss": 8.8201, |
| "lr": 0.000446, |
| "step": 224, |
| "tokens_trained": 0.021300168 |
| }, |
| { |
| "epoch": 0.06411347517730497, |
| "grad_norm": 13.005538940429688, |
| "loss": 8.7235, |
| "lr": 0.00045000000000000004, |
| "step": 226, |
| "tokens_trained": 0.021490272 |
| }, |
| { |
| "epoch": 0.06468085106382979, |
| "grad_norm": 17.17629051208496, |
| "loss": 8.6907, |
| "lr": 0.00045400000000000003, |
| "step": 228, |
| "tokens_trained": 0.021681552 |
| }, |
| { |
| "epoch": 0.06524822695035461, |
| "grad_norm": 14.430739402770996, |
| "loss": 8.6196, |
| "lr": 0.000458, |
| "step": 230, |
| "tokens_trained": 0.02187236 |
| }, |
| { |
| "epoch": 0.06581560283687943, |
| "grad_norm": 14.575714111328125, |
| "loss": 8.4741, |
| "lr": 0.000462, |
| "step": 232, |
| "tokens_trained": 0.022061976 |
| }, |
| { |
| "epoch": 0.06638297872340425, |
| "grad_norm": 13.892754554748535, |
| "loss": 8.4118, |
| "lr": 0.00046600000000000005, |
| "step": 234, |
| "tokens_trained": 0.022252008 |
| }, |
| { |
| "epoch": 0.06695035460992908, |
| "grad_norm": 11.58240795135498, |
| "loss": 8.2781, |
| "lr": 0.00047, |
| "step": 236, |
| "tokens_trained": 0.02244284 |
| }, |
| { |
| "epoch": 0.0675177304964539, |
| "grad_norm": 13.022644996643066, |
| "loss": 8.2139, |
| "lr": 0.000474, |
| "step": 238, |
| "tokens_trained": 0.022631152 |
| }, |
| { |
| "epoch": 0.06808510638297872, |
| "grad_norm": 11.844677925109863, |
| "loss": 8.1134, |
| "lr": 0.00047799999999999996, |
| "step": 240, |
| "tokens_trained": 0.022821096 |
| }, |
| { |
| "epoch": 0.06865248226950355, |
| "grad_norm": 13.878067016601562, |
| "loss": 8.0221, |
| "lr": 0.000482, |
| "step": 242, |
| "tokens_trained": 0.023011656 |
| }, |
| { |
| "epoch": 0.06921985815602837, |
| "grad_norm": 12.34648323059082, |
| "loss": 7.9755, |
| "lr": 0.000486, |
| "step": 244, |
| "tokens_trained": 0.023201 |
| }, |
| { |
| "epoch": 0.06978723404255319, |
| "grad_norm": 14.238297462463379, |
| "loss": 7.8969, |
| "lr": 0.00049, |
| "step": 246, |
| "tokens_trained": 0.023391128 |
| }, |
| { |
| "epoch": 0.07035460992907801, |
| "grad_norm": 14.386019706726074, |
| "loss": 7.8627, |
| "lr": 0.000494, |
| "step": 248, |
| "tokens_trained": 0.023581768 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "grad_norm": 13.623086929321289, |
| "loss": 7.7568, |
| "lr": 0.000498, |
| "step": 250, |
| "tokens_trained": 0.023771248 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "eval_loss": 7.70297384262085, |
| "eval_runtime": 21.3853, |
| "step": 250, |
| "tokens_trained": 0.023771248 |
| }, |
| { |
| "epoch": 0.07148936170212766, |
| "grad_norm": 14.347646713256836, |
| "loss": 7.6842, |
| "lr": 0.0005020000000000001, |
| "step": 252, |
| "tokens_trained": 0.023961056 |
| }, |
| { |
| "epoch": 0.07205673758865248, |
| "grad_norm": 12.5592041015625, |
| "loss": 7.6516, |
| "lr": 0.000506, |
| "step": 254, |
| "tokens_trained": 0.024150968 |
| }, |
| { |
| "epoch": 0.0726241134751773, |
| "grad_norm": 13.219141960144043, |
| "loss": 7.5789, |
| "lr": 0.00051, |
| "step": 256, |
| "tokens_trained": 0.024340072 |
| }, |
| { |
| "epoch": 0.07319148936170213, |
| "grad_norm": 12.654081344604492, |
| "loss": 7.5369, |
| "lr": 0.000514, |
| "step": 258, |
| "tokens_trained": 0.024529296 |
| }, |
| { |
| "epoch": 0.07375886524822695, |
| "grad_norm": 13.136971473693848, |
| "loss": 7.4949, |
| "lr": 0.000518, |
| "step": 260, |
| "tokens_trained": 0.024719688 |
| }, |
| { |
| "epoch": 0.07432624113475177, |
| "grad_norm": 12.680288314819336, |
| "loss": 7.3904, |
| "lr": 0.000522, |
| "step": 262, |
| "tokens_trained": 0.024909632 |
| }, |
| { |
| "epoch": 0.0748936170212766, |
| "grad_norm": 12.754518508911133, |
| "loss": 7.3514, |
| "lr": 0.000526, |
| "step": 264, |
| "tokens_trained": 0.025098416 |
| }, |
| { |
| "epoch": 0.07546099290780142, |
| "grad_norm": 13.22311019897461, |
| "loss": 7.2951, |
| "lr": 0.0005300000000000001, |
| "step": 266, |
| "tokens_trained": 0.025287344 |
| }, |
| { |
| "epoch": 0.07602836879432624, |
| "grad_norm": 12.11903190612793, |
| "loss": 7.2229, |
| "lr": 0.0005340000000000001, |
| "step": 268, |
| "tokens_trained": 0.025477152 |
| }, |
| { |
| "epoch": 0.07659574468085106, |
| "grad_norm": 13.771833419799805, |
| "loss": 7.1815, |
| "lr": 0.0005380000000000001, |
| "step": 270, |
| "tokens_trained": 0.025668288 |
| }, |
| { |
| "epoch": 0.07716312056737588, |
| "grad_norm": 11.756864547729492, |
| "loss": 7.1669, |
| "lr": 0.0005420000000000001, |
| "step": 272, |
| "tokens_trained": 0.025858528 |
| }, |
| { |
| "epoch": 0.0777304964539007, |
| "grad_norm": 13.613094329833984, |
| "loss": 7.1079, |
| "lr": 0.000546, |
| "step": 274, |
| "tokens_trained": 0.026048616 |
| }, |
| { |
| "epoch": 0.07829787234042553, |
| "grad_norm": 10.001923561096191, |
| "loss": 7.0508, |
| "lr": 0.00055, |
| "step": 276, |
| "tokens_trained": 0.026236944 |
| }, |
| { |
| "epoch": 0.07886524822695036, |
| "grad_norm": 14.262083053588867, |
| "loss": 6.9955, |
| "lr": 0.000554, |
| "step": 278, |
| "tokens_trained": 0.026426848 |
| }, |
| { |
| "epoch": 0.07943262411347518, |
| "grad_norm": 12.381136894226074, |
| "loss": 6.9831, |
| "lr": 0.000558, |
| "step": 280, |
| "tokens_trained": 0.026616784 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 9.815845489501953, |
| "loss": 6.917, |
| "lr": 0.0005620000000000001, |
| "step": 282, |
| "tokens_trained": 0.026805176 |
| }, |
| { |
| "epoch": 0.08056737588652482, |
| "grad_norm": 11.669997215270996, |
| "loss": 6.8999, |
| "lr": 0.000566, |
| "step": 284, |
| "tokens_trained": 0.02699488 |
| }, |
| { |
| "epoch": 0.08113475177304964, |
| "grad_norm": 12.770941734313965, |
| "loss": 6.8998, |
| "lr": 0.00057, |
| "step": 286, |
| "tokens_trained": 0.027185784 |
| }, |
| { |
| "epoch": 0.08170212765957446, |
| "grad_norm": 15.572457313537598, |
| "loss": 6.841, |
| "lr": 0.000574, |
| "step": 288, |
| "tokens_trained": 0.027375896 |
| }, |
| { |
| "epoch": 0.08226950354609928, |
| "grad_norm": 10.980833053588867, |
| "loss": 6.8545, |
| "lr": 0.000578, |
| "step": 290, |
| "tokens_trained": 0.02756588 |
| }, |
| { |
| "epoch": 0.08283687943262412, |
| "grad_norm": 11.678337097167969, |
| "loss": 6.7853, |
| "lr": 0.0005819999999999999, |
| "step": 292, |
| "tokens_trained": 0.02775456 |
| }, |
| { |
| "epoch": 0.08340425531914894, |
| "grad_norm": 9.77885913848877, |
| "loss": 6.7465, |
| "lr": 0.0005859999999999999, |
| "step": 294, |
| "tokens_trained": 0.027942856 |
| }, |
| { |
| "epoch": 0.08397163120567376, |
| "grad_norm": 13.62730884552002, |
| "loss": 6.7276, |
| "lr": 0.00059, |
| "step": 296, |
| "tokens_trained": 0.028133152 |
| }, |
| { |
| "epoch": 0.08453900709219858, |
| "grad_norm": 10.644404411315918, |
| "loss": 6.6802, |
| "lr": 0.000594, |
| "step": 298, |
| "tokens_trained": 0.028322192 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 11.130610466003418, |
| "loss": 6.6548, |
| "lr": 0.000598, |
| "step": 300, |
| "tokens_trained": 0.0285122 |
| }, |
| { |
| "epoch": 0.08567375886524822, |
| "grad_norm": 11.557455062866211, |
| "loss": 6.6155, |
| "lr": 0.000602, |
| "step": 302, |
| "tokens_trained": 0.028699792 |
| }, |
| { |
| "epoch": 0.08624113475177304, |
| "grad_norm": 9.276884078979492, |
| "loss": 6.5989, |
| "lr": 0.000606, |
| "step": 304, |
| "tokens_trained": 0.028889896 |
| }, |
| { |
| "epoch": 0.08680851063829788, |
| "grad_norm": 9.616179466247559, |
| "loss": 6.5773, |
| "lr": 0.00061, |
| "step": 306, |
| "tokens_trained": 0.029082272 |
| }, |
| { |
| "epoch": 0.0873758865248227, |
| "grad_norm": 10.575953483581543, |
| "loss": 6.5358, |
| "lr": 0.000614, |
| "step": 308, |
| "tokens_trained": 0.029273352 |
| }, |
| { |
| "epoch": 0.08794326241134752, |
| "grad_norm": 9.089850425720215, |
| "loss": 6.5088, |
| "lr": 0.0006180000000000001, |
| "step": 310, |
| "tokens_trained": 0.029463848 |
| }, |
| { |
| "epoch": 0.08851063829787234, |
| "grad_norm": 9.090002059936523, |
| "loss": 6.4849, |
| "lr": 0.000622, |
| "step": 312, |
| "tokens_trained": 0.029653272 |
| }, |
| { |
| "epoch": 0.08907801418439716, |
| "grad_norm": 12.038308143615723, |
| "loss": 6.4624, |
| "lr": 0.000626, |
| "step": 314, |
| "tokens_trained": 0.029841928 |
| }, |
| { |
| "epoch": 0.08964539007092198, |
| "grad_norm": 9.073866844177246, |
| "loss": 6.4515, |
| "lr": 0.00063, |
| "step": 316, |
| "tokens_trained": 0.030029808 |
| }, |
| { |
| "epoch": 0.0902127659574468, |
| "grad_norm": 8.727197647094727, |
| "loss": 6.43, |
| "lr": 0.000634, |
| "step": 318, |
| "tokens_trained": 0.030221288 |
| }, |
| { |
| "epoch": 0.09078014184397164, |
| "grad_norm": 14.558151245117188, |
| "loss": 6.4487, |
| "lr": 0.000638, |
| "step": 320, |
| "tokens_trained": 0.030410872 |
| }, |
| { |
| "epoch": 0.09134751773049646, |
| "grad_norm": 9.98914623260498, |
| "loss": 6.4279, |
| "lr": 0.000642, |
| "step": 322, |
| "tokens_trained": 0.030602376 |
| }, |
| { |
| "epoch": 0.09191489361702128, |
| "grad_norm": 10.395442962646484, |
| "loss": 6.4311, |
| "lr": 0.000646, |
| "step": 324, |
| "tokens_trained": 0.030792968 |
| }, |
| { |
| "epoch": 0.0924822695035461, |
| "grad_norm": 10.8250093460083, |
| "loss": 6.3726, |
| "lr": 0.0006500000000000001, |
| "step": 326, |
| "tokens_trained": 0.030982944 |
| }, |
| { |
| "epoch": 0.09304964539007092, |
| "grad_norm": 9.73416805267334, |
| "loss": 6.34, |
| "lr": 0.0006540000000000001, |
| "step": 328, |
| "tokens_trained": 0.031174928 |
| }, |
| { |
| "epoch": 0.09361702127659574, |
| "grad_norm": 8.596503257751465, |
| "loss": 6.3322, |
| "lr": 0.0006580000000000001, |
| "step": 330, |
| "tokens_trained": 0.031364288 |
| }, |
| { |
| "epoch": 0.09418439716312056, |
| "grad_norm": 8.49472427368164, |
| "loss": 6.3096, |
| "lr": 0.000662, |
| "step": 332, |
| "tokens_trained": 0.03155376 |
| }, |
| { |
| "epoch": 0.0947517730496454, |
| "grad_norm": 7.857503414154053, |
| "loss": 6.2368, |
| "lr": 0.000666, |
| "step": 334, |
| "tokens_trained": 0.031744368 |
| }, |
| { |
| "epoch": 0.09531914893617022, |
| "grad_norm": 9.007513999938965, |
| "loss": 6.198, |
| "lr": 0.00067, |
| "step": 336, |
| "tokens_trained": 0.031934136 |
| }, |
| { |
| "epoch": 0.09588652482269504, |
| "grad_norm": 8.185524940490723, |
| "loss": 6.2328, |
| "lr": 0.000674, |
| "step": 338, |
| "tokens_trained": 0.032124984 |
| }, |
| { |
| "epoch": 0.09645390070921986, |
| "grad_norm": 8.784396171569824, |
| "loss": 6.1945, |
| "lr": 0.0006780000000000001, |
| "step": 340, |
| "tokens_trained": 0.032316016 |
| }, |
| { |
| "epoch": 0.09702127659574468, |
| "grad_norm": 8.642311096191406, |
| "loss": 6.218, |
| "lr": 0.0006820000000000001, |
| "step": 342, |
| "tokens_trained": 0.032506224 |
| }, |
| { |
| "epoch": 0.0975886524822695, |
| "grad_norm": 8.493780136108398, |
| "loss": 6.194, |
| "lr": 0.0006860000000000001, |
| "step": 344, |
| "tokens_trained": 0.032696152 |
| }, |
| { |
| "epoch": 0.09815602836879432, |
| "grad_norm": 9.120508193969727, |
| "loss": 6.2241, |
| "lr": 0.00069, |
| "step": 346, |
| "tokens_trained": 0.032885688 |
| }, |
| { |
| "epoch": 0.09872340425531916, |
| "grad_norm": 9.34500503540039, |
| "loss": 6.1548, |
| "lr": 0.000694, |
| "step": 348, |
| "tokens_trained": 0.03307568 |
| }, |
| { |
| "epoch": 0.09929078014184398, |
| "grad_norm": 7.483356952667236, |
| "loss": 6.1282, |
| "lr": 0.0006979999999999999, |
| "step": 350, |
| "tokens_trained": 0.033267208 |
| }, |
| { |
| "epoch": 0.0998581560283688, |
| "grad_norm": 7.974069118499756, |
| "loss": 6.1032, |
| "lr": 0.0007019999999999999, |
| "step": 352, |
| "tokens_trained": 0.033458144 |
| }, |
| { |
| "epoch": 0.10042553191489362, |
| "grad_norm": 8.247384071350098, |
| "loss": 6.1698, |
| "lr": 0.0007059999999999999, |
| "step": 354, |
| "tokens_trained": 0.033650352 |
| }, |
| { |
| "epoch": 0.10099290780141844, |
| "grad_norm": 8.554885864257812, |
| "loss": 6.1429, |
| "lr": 0.00071, |
| "step": 356, |
| "tokens_trained": 0.033840232 |
| }, |
| { |
| "epoch": 0.10156028368794326, |
| "grad_norm": 7.209281921386719, |
| "loss": 6.0997, |
| "lr": 0.000714, |
| "step": 358, |
| "tokens_trained": 0.034030032 |
| }, |
| { |
| "epoch": 0.10212765957446808, |
| "grad_norm": 8.660383224487305, |
| "loss": 6.1497, |
| "lr": 0.000718, |
| "step": 360, |
| "tokens_trained": 0.034218592 |
| }, |
| { |
| "epoch": 0.10269503546099291, |
| "grad_norm": 9.382761001586914, |
| "loss": 6.0665, |
| "lr": 0.000722, |
| "step": 362, |
| "tokens_trained": 0.034408408 |
| }, |
| { |
| "epoch": 0.10326241134751774, |
| "grad_norm": 6.915714263916016, |
| "loss": 6.0636, |
| "lr": 0.000726, |
| "step": 364, |
| "tokens_trained": 0.034600016 |
| }, |
| { |
| "epoch": 0.10382978723404256, |
| "grad_norm": 7.8990631103515625, |
| "loss": 6.0975, |
| "lr": 0.00073, |
| "step": 366, |
| "tokens_trained": 0.034790792 |
| }, |
| { |
| "epoch": 0.10439716312056738, |
| "grad_norm": 8.859809875488281, |
| "loss": 6.0754, |
| "lr": 0.000734, |
| "step": 368, |
| "tokens_trained": 0.034981304 |
| }, |
| { |
| "epoch": 0.1049645390070922, |
| "grad_norm": 7.392801761627197, |
| "loss": 6.039, |
| "lr": 0.000738, |
| "step": 370, |
| "tokens_trained": 0.03516956 |
| }, |
| { |
| "epoch": 0.10553191489361702, |
| "grad_norm": 9.427324295043945, |
| "loss": 6.084, |
| "lr": 0.000742, |
| "step": 372, |
| "tokens_trained": 0.035358816 |
| }, |
| { |
| "epoch": 0.10609929078014184, |
| "grad_norm": 7.168910503387451, |
| "loss": 6.0498, |
| "lr": 0.000746, |
| "step": 374, |
| "tokens_trained": 0.035548016 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "eval_loss": 6.038269996643066, |
| "eval_runtime": 21.3445, |
| "step": 375, |
| "tokens_trained": 0.035644104 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 7.899259567260742, |
| "loss": 6.0345, |
| "lr": 0.00075, |
| "step": 376, |
| "tokens_trained": 0.035739856 |
| }, |
| { |
| "epoch": 0.1072340425531915, |
| "grad_norm": 8.91533374786377, |
| "loss": 6.0386, |
| "lr": 0.000754, |
| "step": 378, |
| "tokens_trained": 0.035930264 |
| }, |
| { |
| "epoch": 0.10780141843971631, |
| "grad_norm": 6.998043060302734, |
| "loss": 6.0294, |
| "lr": 0.000758, |
| "step": 380, |
| "tokens_trained": 0.036119616 |
| }, |
| { |
| "epoch": 0.10836879432624114, |
| "grad_norm": 7.343894958496094, |
| "loss": 6.0116, |
| "lr": 0.000762, |
| "step": 382, |
| "tokens_trained": 0.036308416 |
| }, |
| { |
| "epoch": 0.10893617021276596, |
| "grad_norm": 8.182528495788574, |
| "loss": 5.9904, |
| "lr": 0.0007660000000000001, |
| "step": 384, |
| "tokens_trained": 0.036497264 |
| }, |
| { |
| "epoch": 0.10950354609929078, |
| "grad_norm": 7.927818775177002, |
| "loss": 6.0345, |
| "lr": 0.0007700000000000001, |
| "step": 386, |
| "tokens_trained": 0.036688192 |
| }, |
| { |
| "epoch": 0.1100709219858156, |
| "grad_norm": 8.07447338104248, |
| "loss": 5.9685, |
| "lr": 0.0007740000000000001, |
| "step": 388, |
| "tokens_trained": 0.036878256 |
| }, |
| { |
| "epoch": 0.11063829787234042, |
| "grad_norm": 7.281871318817139, |
| "loss": 6.0125, |
| "lr": 0.000778, |
| "step": 390, |
| "tokens_trained": 0.037068272 |
| }, |
| { |
| "epoch": 0.11120567375886525, |
| "grad_norm": 8.298929214477539, |
| "loss": 6.0071, |
| "lr": 0.000782, |
| "step": 392, |
| "tokens_trained": 0.037259464 |
| }, |
| { |
| "epoch": 0.11177304964539007, |
| "grad_norm": 7.546716690063477, |
| "loss": 5.9721, |
| "lr": 0.000786, |
| "step": 394, |
| "tokens_trained": 0.037449696 |
| }, |
| { |
| "epoch": 0.1123404255319149, |
| "grad_norm": 8.28548526763916, |
| "loss": 5.9819, |
| "lr": 0.00079, |
| "step": 396, |
| "tokens_trained": 0.037639672 |
| }, |
| { |
| "epoch": 0.11290780141843972, |
| "grad_norm": 7.064655303955078, |
| "loss": 5.9873, |
| "lr": 0.0007940000000000001, |
| "step": 398, |
| "tokens_trained": 0.03782712 |
| }, |
| { |
| "epoch": 0.11347517730496454, |
| "grad_norm": 7.743175506591797, |
| "loss": 5.9528, |
| "lr": 0.0007980000000000001, |
| "step": 400, |
| "tokens_trained": 0.03801792 |
| }, |
| { |
| "epoch": 0.11404255319148936, |
| "grad_norm": 7.00898551940918, |
| "loss": 5.9504, |
| "lr": 0.0008020000000000001, |
| "step": 402, |
| "tokens_trained": 0.038209176 |
| }, |
| { |
| "epoch": 0.11460992907801418, |
| "grad_norm": 7.9350409507751465, |
| "loss": 5.9555, |
| "lr": 0.0008060000000000001, |
| "step": 404, |
| "tokens_trained": 0.03839824 |
| }, |
| { |
| "epoch": 0.11517730496453901, |
| "grad_norm": 7.048569679260254, |
| "loss": 5.9787, |
| "lr": 0.0008100000000000001, |
| "step": 406, |
| "tokens_trained": 0.03858732 |
| }, |
| { |
| "epoch": 0.11574468085106383, |
| "grad_norm": 7.088194370269775, |
| "loss": 5.928, |
| "lr": 0.0008139999999999999, |
| "step": 408, |
| "tokens_trained": 0.038777712 |
| }, |
| { |
| "epoch": 0.11631205673758865, |
| "grad_norm": 8.230712890625, |
| "loss": 5.9716, |
| "lr": 0.0008179999999999999, |
| "step": 410, |
| "tokens_trained": 0.038969464 |
| }, |
| { |
| "epoch": 0.11687943262411347, |
| "grad_norm": 8.076972007751465, |
| "loss": 5.9624, |
| "lr": 0.0008219999999999999, |
| "step": 412, |
| "tokens_trained": 0.039162064 |
| }, |
| { |
| "epoch": 0.1174468085106383, |
| "grad_norm": 8.065289497375488, |
| "loss": 5.9937, |
| "lr": 0.000826, |
| "step": 414, |
| "tokens_trained": 0.039348688 |
| }, |
| { |
| "epoch": 0.11801418439716312, |
| "grad_norm": 6.393420696258545, |
| "loss": 5.9278, |
| "lr": 0.00083, |
| "step": 416, |
| "tokens_trained": 0.03953732 |
| }, |
| { |
| "epoch": 0.11858156028368794, |
| "grad_norm": 7.384702682495117, |
| "loss": 5.931, |
| "lr": 0.000834, |
| "step": 418, |
| "tokens_trained": 0.039729808 |
| }, |
| { |
| "epoch": 0.11914893617021277, |
| "grad_norm": 7.007425308227539, |
| "loss": 5.93, |
| "lr": 0.000838, |
| "step": 420, |
| "tokens_trained": 0.039921096 |
| }, |
| { |
| "epoch": 0.11971631205673759, |
| "grad_norm": 7.112692832946777, |
| "loss": 5.9625, |
| "lr": 0.000842, |
| "step": 422, |
| "tokens_trained": 0.040110856 |
| }, |
| { |
| "epoch": 0.12028368794326241, |
| "grad_norm": 8.484418869018555, |
| "loss": 5.9848, |
| "lr": 0.000846, |
| "step": 424, |
| "tokens_trained": 0.040300504 |
| }, |
| { |
| "epoch": 0.12085106382978723, |
| "grad_norm": 6.633459091186523, |
| "loss": 6.0226, |
| "lr": 0.00085, |
| "step": 426, |
| "tokens_trained": 0.04049056 |
| }, |
| { |
| "epoch": 0.12141843971631205, |
| "grad_norm": 7.796964168548584, |
| "loss": 5.9152, |
| "lr": 0.000854, |
| "step": 428, |
| "tokens_trained": 0.040680544 |
| }, |
| { |
| "epoch": 0.12198581560283688, |
| "grad_norm": 7.833578586578369, |
| "loss": 5.924, |
| "lr": 0.000858, |
| "step": 430, |
| "tokens_trained": 0.040873128 |
| }, |
| { |
| "epoch": 0.1225531914893617, |
| "grad_norm": 6.7470550537109375, |
| "loss": 5.9318, |
| "lr": 0.000862, |
| "step": 432, |
| "tokens_trained": 0.041063488 |
| }, |
| { |
| "epoch": 0.12312056737588653, |
| "grad_norm": 6.066318988800049, |
| "loss": 5.9569, |
| "lr": 0.000866, |
| "step": 434, |
| "tokens_trained": 0.041254368 |
| }, |
| { |
| "epoch": 0.12368794326241135, |
| "grad_norm": 6.753541469573975, |
| "loss": 5.8851, |
| "lr": 0.00087, |
| "step": 436, |
| "tokens_trained": 0.04144516 |
| }, |
| { |
| "epoch": 0.12425531914893617, |
| "grad_norm": 6.471331596374512, |
| "loss": 5.864, |
| "lr": 0.000874, |
| "step": 438, |
| "tokens_trained": 0.041636912 |
| }, |
| { |
| "epoch": 0.12482269503546099, |
| "grad_norm": 6.129056930541992, |
| "loss": 5.8965, |
| "lr": 0.000878, |
| "step": 440, |
| "tokens_trained": 0.041828104 |
| }, |
| { |
| "epoch": 0.1253900709219858, |
| "grad_norm": 6.478890895843506, |
| "loss": 5.8817, |
| "lr": 0.000882, |
| "step": 442, |
| "tokens_trained": 0.04201808 |
| }, |
| { |
| "epoch": 0.12595744680851065, |
| "grad_norm": 6.014713287353516, |
| "loss": 5.8268, |
| "lr": 0.0008860000000000001, |
| "step": 444, |
| "tokens_trained": 0.042207328 |
| }, |
| { |
| "epoch": 0.12652482269503545, |
| "grad_norm": 5.505755424499512, |
| "loss": 5.8684, |
| "lr": 0.0008900000000000001, |
| "step": 446, |
| "tokens_trained": 0.042398152 |
| }, |
| { |
| "epoch": 0.1270921985815603, |
| "grad_norm": 10.096606254577637, |
| "loss": 5.8608, |
| "lr": 0.000894, |
| "step": 448, |
| "tokens_trained": 0.042588984 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 6.388499736785889, |
| "loss": 5.8766, |
| "lr": 0.000898, |
| "step": 450, |
| "tokens_trained": 0.042778592 |
| }, |
| { |
| "epoch": 0.12822695035460993, |
| "grad_norm": 7.145125865936279, |
| "loss": 5.8571, |
| "lr": 0.000902, |
| "step": 452, |
| "tokens_trained": 0.042967176 |
| }, |
| { |
| "epoch": 0.12879432624113477, |
| "grad_norm": 6.826383113861084, |
| "loss": 5.8655, |
| "lr": 0.000906, |
| "step": 454, |
| "tokens_trained": 0.043158952 |
| }, |
| { |
| "epoch": 0.12936170212765957, |
| "grad_norm": 6.036892414093018, |
| "loss": 5.8775, |
| "lr": 0.00091, |
| "step": 456, |
| "tokens_trained": 0.043349288 |
| }, |
| { |
| "epoch": 0.1299290780141844, |
| "grad_norm": 6.36528205871582, |
| "loss": 5.8908, |
| "lr": 0.0009140000000000001, |
| "step": 458, |
| "tokens_trained": 0.043539888 |
| }, |
| { |
| "epoch": 0.13049645390070921, |
| "grad_norm": 6.317558288574219, |
| "loss": 5.8702, |
| "lr": 0.0009180000000000001, |
| "step": 460, |
| "tokens_trained": 0.04373232 |
| }, |
| { |
| "epoch": 0.13106382978723405, |
| "grad_norm": 6.427131175994873, |
| "loss": 5.8399, |
| "lr": 0.0009220000000000001, |
| "step": 462, |
| "tokens_trained": 0.043922744 |
| }, |
| { |
| "epoch": 0.13163120567375886, |
| "grad_norm": 5.666539669036865, |
| "loss": 5.7899, |
| "lr": 0.0009260000000000001, |
| "step": 464, |
| "tokens_trained": 0.044112888 |
| }, |
| { |
| "epoch": 0.1321985815602837, |
| "grad_norm": 5.241824150085449, |
| "loss": 5.8203, |
| "lr": 0.00093, |
| "step": 466, |
| "tokens_trained": 0.04430244 |
| }, |
| { |
| "epoch": 0.1327659574468085, |
| "grad_norm": 6.072646141052246, |
| "loss": 5.8367, |
| "lr": 0.000934, |
| "step": 468, |
| "tokens_trained": 0.044493528 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 6.414418697357178, |
| "loss": 5.8236, |
| "lr": 0.0009379999999999999, |
| "step": 470, |
| "tokens_trained": 0.044682328 |
| }, |
| { |
| "epoch": 0.13390070921985817, |
| "grad_norm": 6.958801746368408, |
| "loss": 5.8179, |
| "lr": 0.000942, |
| "step": 472, |
| "tokens_trained": 0.044874256 |
| }, |
| { |
| "epoch": 0.13446808510638297, |
| "grad_norm": 5.787843227386475, |
| "loss": 5.8478, |
| "lr": 0.000946, |
| "step": 474, |
| "tokens_trained": 0.045065616 |
| }, |
| { |
| "epoch": 0.1350354609929078, |
| "grad_norm": 5.5841240882873535, |
| "loss": 5.8307, |
| "lr": 0.00095, |
| "step": 476, |
| "tokens_trained": 0.045257024 |
| }, |
| { |
| "epoch": 0.13560283687943261, |
| "grad_norm": 6.607712745666504, |
| "loss": 5.8512, |
| "lr": 0.000954, |
| "step": 478, |
| "tokens_trained": 0.045446432 |
| }, |
| { |
| "epoch": 0.13617021276595745, |
| "grad_norm": 5.473597049713135, |
| "loss": 5.8174, |
| "lr": 0.000958, |
| "step": 480, |
| "tokens_trained": 0.045636392 |
| }, |
| { |
| "epoch": 0.13673758865248226, |
| "grad_norm": 5.435728549957275, |
| "loss": 5.8308, |
| "lr": 0.000962, |
| "step": 482, |
| "tokens_trained": 0.045823784 |
| }, |
| { |
| "epoch": 0.1373049645390071, |
| "grad_norm": 6.049300670623779, |
| "loss": 5.8293, |
| "lr": 0.000966, |
| "step": 484, |
| "tokens_trained": 0.046013408 |
| }, |
| { |
| "epoch": 0.13787234042553193, |
| "grad_norm": 6.311764717102051, |
| "loss": 5.8086, |
| "lr": 0.0009699999999999999, |
| "step": 486, |
| "tokens_trained": 0.046202528 |
| }, |
| { |
| "epoch": 0.13843971631205673, |
| "grad_norm": 5.886009216308594, |
| "loss": 5.7986, |
| "lr": 0.000974, |
| "step": 488, |
| "tokens_trained": 0.04639404 |
| }, |
| { |
| "epoch": 0.13900709219858157, |
| "grad_norm": 5.438202381134033, |
| "loss": 5.8473, |
| "lr": 0.000978, |
| "step": 490, |
| "tokens_trained": 0.046586512 |
| }, |
| { |
| "epoch": 0.13957446808510637, |
| "grad_norm": 5.08393669128418, |
| "loss": 5.7613, |
| "lr": 0.000982, |
| "step": 492, |
| "tokens_trained": 0.046777448 |
| }, |
| { |
| "epoch": 0.1401418439716312, |
| "grad_norm": 5.645389080047607, |
| "loss": 5.7723, |
| "lr": 0.0009860000000000001, |
| "step": 494, |
| "tokens_trained": 0.046966096 |
| }, |
| { |
| "epoch": 0.14070921985815601, |
| "grad_norm": 6.320916652679443, |
| "loss": 5.7772, |
| "lr": 0.00099, |
| "step": 496, |
| "tokens_trained": 0.047155152 |
| }, |
| { |
| "epoch": 0.14127659574468085, |
| "grad_norm": 5.573540210723877, |
| "loss": 5.7412, |
| "lr": 0.000994, |
| "step": 498, |
| "tokens_trained": 0.047345352 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "grad_norm": 4.939594745635986, |
| "loss": 5.8208, |
| "lr": 0.000998, |
| "step": 500, |
| "tokens_trained": 0.047535016 |
| }, |
| { |
| "epoch": 0.14184397163120568, |
| "eval_loss": 5.799490928649902, |
| "eval_runtime": 20.8575, |
| "step": 500, |
| "tokens_trained": 0.047535016 |
| }, |
| { |
| "epoch": 0.1424113475177305, |
| "grad_norm": 5.805343151092529, |
| "loss": 5.7734, |
| "lr": 0.00099986013986014, |
| "step": 502, |
| "tokens_trained": 0.047724216 |
| }, |
| { |
| "epoch": 0.14297872340425533, |
| "grad_norm": 5.831176280975342, |
| "loss": 5.8044, |
| "lr": 0.0009995804195804196, |
| "step": 504, |
| "tokens_trained": 0.047914328 |
| }, |
| { |
| "epoch": 0.14354609929078013, |
| "grad_norm": 5.045091152191162, |
| "loss": 5.8133, |
| "lr": 0.0009993006993006994, |
| "step": 506, |
| "tokens_trained": 0.048105032 |
| }, |
| { |
| "epoch": 0.14411347517730497, |
| "grad_norm": 5.276819705963135, |
| "loss": 5.7555, |
| "lr": 0.000999020979020979, |
| "step": 508, |
| "tokens_trained": 0.048293104 |
| }, |
| { |
| "epoch": 0.14468085106382977, |
| "grad_norm": 5.710324287414551, |
| "loss": 5.7619, |
| "lr": 0.0009987412587412587, |
| "step": 510, |
| "tokens_trained": 0.048483888 |
| }, |
| { |
| "epoch": 0.1452482269503546, |
| "grad_norm": 4.9472527503967285, |
| "loss": 5.767, |
| "lr": 0.0009984615384615386, |
| "step": 512, |
| "tokens_trained": 0.04867336 |
| }, |
| { |
| "epoch": 0.14581560283687944, |
| "grad_norm": 5.410078525543213, |
| "loss": 5.7238, |
| "lr": 0.0009981818181818182, |
| "step": 514, |
| "tokens_trained": 0.048863104 |
| }, |
| { |
| "epoch": 0.14638297872340425, |
| "grad_norm": 6.025843143463135, |
| "loss": 5.7664, |
| "lr": 0.000997902097902098, |
| "step": 516, |
| "tokens_trained": 0.049053856 |
| }, |
| { |
| "epoch": 0.14695035460992908, |
| "grad_norm": 5.3211669921875, |
| "loss": 5.747, |
| "lr": 0.0009976223776223777, |
| "step": 518, |
| "tokens_trained": 0.049245104 |
| }, |
| { |
| "epoch": 0.1475177304964539, |
| "grad_norm": 6.059483051300049, |
| "loss": 5.7611, |
| "lr": 0.0009973426573426573, |
| "step": 520, |
| "tokens_trained": 0.049434368 |
| }, |
| { |
| "epoch": 0.14808510638297873, |
| "grad_norm": 5.362505912780762, |
| "loss": 5.7607, |
| "lr": 0.000997062937062937, |
| "step": 522, |
| "tokens_trained": 0.049622648 |
| }, |
| { |
| "epoch": 0.14865248226950353, |
| "grad_norm": 5.391371726989746, |
| "loss": 5.7857, |
| "lr": 0.0009967832167832168, |
| "step": 524, |
| "tokens_trained": 0.049812304 |
| }, |
| { |
| "epoch": 0.14921985815602837, |
| "grad_norm": 4.3839030265808105, |
| "loss": 5.7334, |
| "lr": 0.0009965034965034964, |
| "step": 526, |
| "tokens_trained": 0.05000356 |
| }, |
| { |
| "epoch": 0.1497872340425532, |
| "grad_norm": 5.008530616760254, |
| "loss": 5.7475, |
| "lr": 0.0009962237762237763, |
| "step": 528, |
| "tokens_trained": 0.050193304 |
| }, |
| { |
| "epoch": 0.150354609929078, |
| "grad_norm": 5.068671226501465, |
| "loss": 5.7866, |
| "lr": 0.000995944055944056, |
| "step": 530, |
| "tokens_trained": 0.050382856 |
| }, |
| { |
| "epoch": 0.15092198581560284, |
| "grad_norm": 5.399240493774414, |
| "loss": 5.6857, |
| "lr": 0.0009956643356643356, |
| "step": 532, |
| "tokens_trained": 0.050570864 |
| }, |
| { |
| "epoch": 0.15148936170212765, |
| "grad_norm": 5.689481735229492, |
| "loss": 5.7586, |
| "lr": 0.0009953846153846154, |
| "step": 534, |
| "tokens_trained": 0.050760384 |
| }, |
| { |
| "epoch": 0.15205673758865249, |
| "grad_norm": 4.652275562286377, |
| "loss": 5.7866, |
| "lr": 0.000995104895104895, |
| "step": 536, |
| "tokens_trained": 0.050952712 |
| }, |
| { |
| "epoch": 0.1526241134751773, |
| "grad_norm": 4.126920223236084, |
| "loss": 5.7261, |
| "lr": 0.000994825174825175, |
| "step": 538, |
| "tokens_trained": 0.051141656 |
| }, |
| { |
| "epoch": 0.15319148936170213, |
| "grad_norm": 4.233098030090332, |
| "loss": 5.6903, |
| "lr": 0.0009945454545454546, |
| "step": 540, |
| "tokens_trained": 0.051331256 |
| }, |
| { |
| "epoch": 0.15375886524822696, |
| "grad_norm": 4.271973133087158, |
| "loss": 5.7293, |
| "lr": 0.0009942657342657344, |
| "step": 542, |
| "tokens_trained": 0.051522072 |
| }, |
| { |
| "epoch": 0.15432624113475177, |
| "grad_norm": 4.653008937835693, |
| "loss": 5.7133, |
| "lr": 0.000993986013986014, |
| "step": 544, |
| "tokens_trained": 0.051711624 |
| }, |
| { |
| "epoch": 0.1548936170212766, |
| "grad_norm": 4.192624092102051, |
| "loss": 5.6876, |
| "lr": 0.0009937062937062937, |
| "step": 546, |
| "tokens_trained": 0.051901744 |
| }, |
| { |
| "epoch": 0.1554609929078014, |
| "grad_norm": 5.497848033905029, |
| "loss": 5.7378, |
| "lr": 0.0009934265734265735, |
| "step": 548, |
| "tokens_trained": 0.052092872 |
| }, |
| { |
| "epoch": 0.15602836879432624, |
| "grad_norm": 4.350259780883789, |
| "loss": 5.6533, |
| "lr": 0.0009931468531468532, |
| "step": 550, |
| "tokens_trained": 0.052281768 |
| }, |
| { |
| "epoch": 0.15659574468085105, |
| "grad_norm": 4.515641689300537, |
| "loss": 5.7492, |
| "lr": 0.000992867132867133, |
| "step": 552, |
| "tokens_trained": 0.052471848 |
| }, |
| { |
| "epoch": 0.15716312056737589, |
| "grad_norm": 4.628066539764404, |
| "loss": 5.7113, |
| "lr": 0.0009925874125874127, |
| "step": 554, |
| "tokens_trained": 0.052660168 |
| }, |
| { |
| "epoch": 0.15773049645390072, |
| "grad_norm": 4.8322930335998535, |
| "loss": 5.6696, |
| "lr": 0.0009923076923076923, |
| "step": 556, |
| "tokens_trained": 0.05284776 |
| }, |
| { |
| "epoch": 0.15829787234042553, |
| "grad_norm": 3.999706506729126, |
| "loss": 5.7296, |
| "lr": 0.000992027972027972, |
| "step": 558, |
| "tokens_trained": 0.053037344 |
| }, |
| { |
| "epoch": 0.15886524822695036, |
| "grad_norm": 4.332971572875977, |
| "loss": 5.7362, |
| "lr": 0.0009917482517482518, |
| "step": 560, |
| "tokens_trained": 0.053228168 |
| }, |
| { |
| "epoch": 0.15943262411347517, |
| "grad_norm": 4.500301361083984, |
| "loss": 5.6982, |
| "lr": 0.0009914685314685314, |
| "step": 562, |
| "tokens_trained": 0.05341856 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 4.721808910369873, |
| "loss": 5.7166, |
| "lr": 0.0009911888111888113, |
| "step": 564, |
| "tokens_trained": 0.053608824 |
| }, |
| { |
| "epoch": 0.1605673758865248, |
| "grad_norm": 5.265316009521484, |
| "loss": 5.7069, |
| "lr": 0.000990909090909091, |
| "step": 566, |
| "tokens_trained": 0.053799728 |
| }, |
| { |
| "epoch": 0.16113475177304964, |
| "grad_norm": 5.024131774902344, |
| "loss": 5.7113, |
| "lr": 0.0009906293706293705, |
| "step": 568, |
| "tokens_trained": 0.05398944 |
| }, |
| { |
| "epoch": 0.16170212765957448, |
| "grad_norm": 4.063276767730713, |
| "loss": 5.6251, |
| "lr": 0.0009903496503496504, |
| "step": 570, |
| "tokens_trained": 0.054176512 |
| }, |
| { |
| "epoch": 0.1622695035460993, |
| "grad_norm": 4.15974760055542, |
| "loss": 5.6912, |
| "lr": 0.00099006993006993, |
| "step": 572, |
| "tokens_trained": 0.054367072 |
| }, |
| { |
| "epoch": 0.16283687943262412, |
| "grad_norm": 4.338894844055176, |
| "loss": 5.6807, |
| "lr": 0.0009897902097902099, |
| "step": 574, |
| "tokens_trained": 0.054559184 |
| }, |
| { |
| "epoch": 0.16340425531914893, |
| "grad_norm": 5.535487174987793, |
| "loss": 5.6765, |
| "lr": 0.0009895104895104895, |
| "step": 576, |
| "tokens_trained": 0.054748904 |
| }, |
| { |
| "epoch": 0.16397163120567376, |
| "grad_norm": 4.379040241241455, |
| "loss": 5.6884, |
| "lr": 0.0009892307692307694, |
| "step": 578, |
| "tokens_trained": 0.054936136 |
| }, |
| { |
| "epoch": 0.16453900709219857, |
| "grad_norm": 4.746179103851318, |
| "loss": 5.6885, |
| "lr": 0.000988951048951049, |
| "step": 580, |
| "tokens_trained": 0.055125584 |
| }, |
| { |
| "epoch": 0.1651063829787234, |
| "grad_norm": 4.949806213378906, |
| "loss": 5.7061, |
| "lr": 0.0009886713286713286, |
| "step": 582, |
| "tokens_trained": 0.055314608 |
| }, |
| { |
| "epoch": 0.16567375886524824, |
| "grad_norm": 4.507448196411133, |
| "loss": 5.6339, |
| "lr": 0.0009883916083916085, |
| "step": 584, |
| "tokens_trained": 0.055503992 |
| }, |
| { |
| "epoch": 0.16624113475177305, |
| "grad_norm": 4.131013870239258, |
| "loss": 5.7122, |
| "lr": 0.0009881118881118881, |
| "step": 586, |
| "tokens_trained": 0.055693376 |
| }, |
| { |
| "epoch": 0.16680851063829788, |
| "grad_norm": 5.32897424697876, |
| "loss": 5.7192, |
| "lr": 0.000987832167832168, |
| "step": 588, |
| "tokens_trained": 0.05588452 |
| }, |
| { |
| "epoch": 0.1673758865248227, |
| "grad_norm": 4.166877746582031, |
| "loss": 5.6666, |
| "lr": 0.0009875524475524476, |
| "step": 590, |
| "tokens_trained": 0.056073936 |
| }, |
| { |
| "epoch": 0.16794326241134752, |
| "grad_norm": 4.393389701843262, |
| "loss": 5.6113, |
| "lr": 0.0009872727272727273, |
| "step": 592, |
| "tokens_trained": 0.056262224 |
| }, |
| { |
| "epoch": 0.16851063829787233, |
| "grad_norm": 4.466696739196777, |
| "loss": 5.6466, |
| "lr": 0.000986993006993007, |
| "step": 594, |
| "tokens_trained": 0.056454008 |
| }, |
| { |
| "epoch": 0.16907801418439716, |
| "grad_norm": 3.9413373470306396, |
| "loss": 5.6838, |
| "lr": 0.0009867132867132867, |
| "step": 596, |
| "tokens_trained": 0.05664444 |
| }, |
| { |
| "epoch": 0.169645390070922, |
| "grad_norm": 3.594649314880371, |
| "loss": 5.6684, |
| "lr": 0.0009864335664335664, |
| "step": 598, |
| "tokens_trained": 0.056833864 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 3.5969483852386475, |
| "loss": 5.6619, |
| "lr": 0.0009861538461538462, |
| "step": 600, |
| "tokens_trained": 0.05702332 |
| }, |
| { |
| "epoch": 0.17078014184397164, |
| "grad_norm": 3.845414638519287, |
| "loss": 5.5855, |
| "lr": 0.0009858741258741259, |
| "step": 602, |
| "tokens_trained": 0.057212776 |
| }, |
| { |
| "epoch": 0.17134751773049645, |
| "grad_norm": 3.9198834896087646, |
| "loss": 5.6551, |
| "lr": 0.0009855944055944055, |
| "step": 604, |
| "tokens_trained": 0.05740152 |
| }, |
| { |
| "epoch": 0.17191489361702128, |
| "grad_norm": 3.6764986515045166, |
| "loss": 5.6228, |
| "lr": 0.0009853146853146854, |
| "step": 606, |
| "tokens_trained": 0.057595616 |
| }, |
| { |
| "epoch": 0.1724822695035461, |
| "grad_norm": 3.8210043907165527, |
| "loss": 5.6557, |
| "lr": 0.000985034965034965, |
| "step": 608, |
| "tokens_trained": 0.057783968 |
| }, |
| { |
| "epoch": 0.17304964539007092, |
| "grad_norm": 3.893644094467163, |
| "loss": 5.6675, |
| "lr": 0.0009847552447552449, |
| "step": 610, |
| "tokens_trained": 0.057974832 |
| }, |
| { |
| "epoch": 0.17361702127659576, |
| "grad_norm": 3.280839681625366, |
| "loss": 5.6442, |
| "lr": 0.0009844755244755245, |
| "step": 612, |
| "tokens_trained": 0.058166272 |
| }, |
| { |
| "epoch": 0.17418439716312056, |
| "grad_norm": 3.4350404739379883, |
| "loss": 5.6555, |
| "lr": 0.0009841958041958043, |
| "step": 614, |
| "tokens_trained": 0.058356008 |
| }, |
| { |
| "epoch": 0.1747517730496454, |
| "grad_norm": 3.7700448036193848, |
| "loss": 5.6138, |
| "lr": 0.000983916083916084, |
| "step": 616, |
| "tokens_trained": 0.058546792 |
| }, |
| { |
| "epoch": 0.1753191489361702, |
| "grad_norm": 3.8182730674743652, |
| "loss": 5.6931, |
| "lr": 0.0009836363636363636, |
| "step": 618, |
| "tokens_trained": 0.058736296 |
| }, |
| { |
| "epoch": 0.17588652482269504, |
| "grad_norm": 3.9105372428894043, |
| "loss": 5.6431, |
| "lr": 0.0009833566433566435, |
| "step": 620, |
| "tokens_trained": 0.058927576 |
| }, |
| { |
| "epoch": 0.17645390070921985, |
| "grad_norm": 3.8897712230682373, |
| "loss": 5.6203, |
| "lr": 0.000983076923076923, |
| "step": 622, |
| "tokens_trained": 0.059118416 |
| }, |
| { |
| "epoch": 0.17702127659574468, |
| "grad_norm": 3.512194871902466, |
| "loss": 5.6292, |
| "lr": 0.000982797202797203, |
| "step": 624, |
| "tokens_trained": 0.059308568 |
| }, |
| { |
| "epoch": 0.1773049645390071, |
| "eval_loss": 5.630118370056152, |
| "eval_runtime": 21.1591, |
| "step": 625, |
| "tokens_trained": 0.059404056 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 7650, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 125, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|