| { |
| "best_global_step": 375, |
| "best_metric": 6.038269996643066, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/Gencode-BPE/checkpoint-375", |
| "epoch": 0.10638297872340426, |
| "eval_steps": 125, |
| "global_step": 375, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005673758865248227, |
| "grad_norm": 1275.0146484375, |
| "loss": 281.4781, |
| "lr": 2e-06, |
| "step": 2, |
| "tokens_trained": 0.000192256 |
| }, |
| { |
| "epoch": 0.0011347517730496454, |
| "grad_norm": 1437.579833984375, |
| "loss": 267.2211, |
| "lr": 6e-06, |
| "step": 4, |
| "tokens_trained": 0.000382024 |
| }, |
| { |
| "epoch": 0.001702127659574468, |
| "grad_norm": 1719.271484375, |
| "loss": 219.3822, |
| "lr": 1e-05, |
| "step": 6, |
| "tokens_trained": 0.00057072 |
| }, |
| { |
| "epoch": 0.0022695035460992908, |
| "grad_norm": 1444.94970703125, |
| "loss": 133.8172, |
| "lr": 1.4e-05, |
| "step": 8, |
| "tokens_trained": 0.000761336 |
| }, |
| { |
| "epoch": 0.0028368794326241137, |
| "grad_norm": 238.9689178466797, |
| "loss": 90.8177, |
| "lr": 1.8e-05, |
| "step": 10, |
| "tokens_trained": 0.000953248 |
| }, |
| { |
| "epoch": 0.003404255319148936, |
| "grad_norm": 158.53497314453125, |
| "loss": 84.6922, |
| "lr": 2.2e-05, |
| "step": 12, |
| "tokens_trained": 0.00114424 |
| }, |
| { |
| "epoch": 0.003971631205673759, |
| "grad_norm": 146.10595703125, |
| "loss": 76.7055, |
| "lr": 2.6e-05, |
| "step": 14, |
| "tokens_trained": 0.001334104 |
| }, |
| { |
| "epoch": 0.0045390070921985815, |
| "grad_norm": 140.69964599609375, |
| "loss": 67.9952, |
| "lr": 3e-05, |
| "step": 16, |
| "tokens_trained": 0.00152392 |
| }, |
| { |
| "epoch": 0.005106382978723404, |
| "grad_norm": 108.80303192138672, |
| "loss": 57.8088, |
| "lr": 3.4000000000000007e-05, |
| "step": 18, |
| "tokens_trained": 0.001713872 |
| }, |
| { |
| "epoch": 0.005673758865248227, |
| "grad_norm": 106.82334899902344, |
| "loss": 48.6585, |
| "lr": 3.8e-05, |
| "step": 20, |
| "tokens_trained": 0.001903976 |
| }, |
| { |
| "epoch": 0.00624113475177305, |
| "grad_norm": 93.58769989013672, |
| "loss": 41.7984, |
| "lr": 4.2000000000000004e-05, |
| "step": 22, |
| "tokens_trained": 0.002094288 |
| }, |
| { |
| "epoch": 0.006808510638297872, |
| "grad_norm": 87.5854721069336, |
| "loss": 37.6201, |
| "lr": 4.6e-05, |
| "step": 24, |
| "tokens_trained": 0.002282496 |
| }, |
| { |
| "epoch": 0.007375886524822695, |
| "grad_norm": 84.12794494628906, |
| "loss": 35.0091, |
| "lr": 5e-05, |
| "step": 26, |
| "tokens_trained": 0.00247068 |
| }, |
| { |
| "epoch": 0.007943262411347518, |
| "grad_norm": 79.77535247802734, |
| "loss": 33.2253, |
| "lr": 5.4e-05, |
| "step": 28, |
| "tokens_trained": 0.002662888 |
| }, |
| { |
| "epoch": 0.00851063829787234, |
| "grad_norm": 66.42157745361328, |
| "loss": 32.0682, |
| "lr": 5.800000000000001e-05, |
| "step": 30, |
| "tokens_trained": 0.002851968 |
| }, |
| { |
| "epoch": 0.009078014184397163, |
| "grad_norm": 87.52485656738281, |
| "loss": 30.893, |
| "lr": 6.2e-05, |
| "step": 32, |
| "tokens_trained": 0.003041384 |
| }, |
| { |
| "epoch": 0.009645390070921986, |
| "grad_norm": 58.33614730834961, |
| "loss": 30.0513, |
| "lr": 6.6e-05, |
| "step": 34, |
| "tokens_trained": 0.003232872 |
| }, |
| { |
| "epoch": 0.010212765957446808, |
| "grad_norm": 54.629329681396484, |
| "loss": 29.0115, |
| "lr": 7.000000000000001e-05, |
| "step": 36, |
| "tokens_trained": 0.003423824 |
| }, |
| { |
| "epoch": 0.01078014184397163, |
| "grad_norm": 52.79097366333008, |
| "loss": 28.2084, |
| "lr": 7.4e-05, |
| "step": 38, |
| "tokens_trained": 0.003613232 |
| }, |
| { |
| "epoch": 0.011347517730496455, |
| "grad_norm": 54.481224060058594, |
| "loss": 27.4345, |
| "lr": 7.8e-05, |
| "step": 40, |
| "tokens_trained": 0.003800952 |
| }, |
| { |
| "epoch": 0.011914893617021277, |
| "grad_norm": 58.7069091796875, |
| "loss": 26.5936, |
| "lr": 8.2e-05, |
| "step": 42, |
| "tokens_trained": 0.003991512 |
| }, |
| { |
| "epoch": 0.0124822695035461, |
| "grad_norm": 49.30760955810547, |
| "loss": 26.0608, |
| "lr": 8.599999999999999e-05, |
| "step": 44, |
| "tokens_trained": 0.004180648 |
| }, |
| { |
| "epoch": 0.013049645390070922, |
| "grad_norm": 61.902587890625, |
| "loss": 25.5363, |
| "lr": 8.999999999999999e-05, |
| "step": 46, |
| "tokens_trained": 0.00437148 |
| }, |
| { |
| "epoch": 0.013617021276595745, |
| "grad_norm": 46.76111602783203, |
| "loss": 24.9599, |
| "lr": 9.400000000000001e-05, |
| "step": 48, |
| "tokens_trained": 0.004559344 |
| }, |
| { |
| "epoch": 0.014184397163120567, |
| "grad_norm": 57.06416702270508, |
| "loss": 24.4087, |
| "lr": 9.800000000000001e-05, |
| "step": 50, |
| "tokens_trained": 0.004749256 |
| }, |
| { |
| "epoch": 0.01475177304964539, |
| "grad_norm": 44.798736572265625, |
| "loss": 24.1444, |
| "lr": 0.000102, |
| "step": 52, |
| "tokens_trained": 0.004940192 |
| }, |
| { |
| "epoch": 0.015319148936170212, |
| "grad_norm": 40.29296875, |
| "loss": 23.6011, |
| "lr": 0.000106, |
| "step": 54, |
| "tokens_trained": 0.005130304 |
| }, |
| { |
| "epoch": 0.015886524822695036, |
| "grad_norm": 38.75099563598633, |
| "loss": 23.1781, |
| "lr": 0.00011, |
| "step": 56, |
| "tokens_trained": 0.005322864 |
| }, |
| { |
| "epoch": 0.016453900709219857, |
| "grad_norm": 37.470706939697266, |
| "loss": 22.9136, |
| "lr": 0.000114, |
| "step": 58, |
| "tokens_trained": 0.00551392 |
| }, |
| { |
| "epoch": 0.01702127659574468, |
| "grad_norm": 35.1894645690918, |
| "loss": 22.6336, |
| "lr": 0.000118, |
| "step": 60, |
| "tokens_trained": 0.005703096 |
| }, |
| { |
| "epoch": 0.017588652482269502, |
| "grad_norm": 35.136573791503906, |
| "loss": 22.2998, |
| "lr": 0.000122, |
| "step": 62, |
| "tokens_trained": 0.005892448 |
| }, |
| { |
| "epoch": 0.018156028368794326, |
| "grad_norm": 38.05111312866211, |
| "loss": 21.9401, |
| "lr": 0.000126, |
| "step": 64, |
| "tokens_trained": 0.006081656 |
| }, |
| { |
| "epoch": 0.01872340425531915, |
| "grad_norm": 35.63850021362305, |
| "loss": 21.7206, |
| "lr": 0.00013000000000000002, |
| "step": 66, |
| "tokens_trained": 0.006273032 |
| }, |
| { |
| "epoch": 0.01929078014184397, |
| "grad_norm": 34.327667236328125, |
| "loss": 21.4051, |
| "lr": 0.000134, |
| "step": 68, |
| "tokens_trained": 0.00646304 |
| }, |
| { |
| "epoch": 0.019858156028368795, |
| "grad_norm": 31.457059860229492, |
| "loss": 21.0774, |
| "lr": 0.00013800000000000002, |
| "step": 70, |
| "tokens_trained": 0.006652832 |
| }, |
| { |
| "epoch": 0.020425531914893616, |
| "grad_norm": 34.91672897338867, |
| "loss": 20.8718, |
| "lr": 0.00014199999999999998, |
| "step": 72, |
| "tokens_trained": 0.006843512 |
| }, |
| { |
| "epoch": 0.02099290780141844, |
| "grad_norm": 27.959579467773438, |
| "loss": 20.6932, |
| "lr": 0.000146, |
| "step": 74, |
| "tokens_trained": 0.007033584 |
| }, |
| { |
| "epoch": 0.02156028368794326, |
| "grad_norm": 26.569866180419922, |
| "loss": 20.4072, |
| "lr": 0.00015, |
| "step": 76, |
| "tokens_trained": 0.007224032 |
| }, |
| { |
| "epoch": 0.022127659574468085, |
| "grad_norm": 28.009904861450195, |
| "loss": 20.2229, |
| "lr": 0.000154, |
| "step": 78, |
| "tokens_trained": 0.00741368 |
| }, |
| { |
| "epoch": 0.02269503546099291, |
| "grad_norm": 28.892959594726562, |
| "loss": 20.0528, |
| "lr": 0.000158, |
| "step": 80, |
| "tokens_trained": 0.00760416 |
| }, |
| { |
| "epoch": 0.02326241134751773, |
| "grad_norm": 31.58131980895996, |
| "loss": 19.8016, |
| "lr": 0.000162, |
| "step": 82, |
| "tokens_trained": 0.007793952 |
| }, |
| { |
| "epoch": 0.023829787234042554, |
| "grad_norm": 31.01254653930664, |
| "loss": 19.634, |
| "lr": 0.00016600000000000002, |
| "step": 84, |
| "tokens_trained": 0.007980792 |
| }, |
| { |
| "epoch": 0.024397163120567375, |
| "grad_norm": 28.732515335083008, |
| "loss": 19.3777, |
| "lr": 0.00017, |
| "step": 86, |
| "tokens_trained": 0.008171968 |
| }, |
| { |
| "epoch": 0.0249645390070922, |
| "grad_norm": 24.31264877319336, |
| "loss": 19.1346, |
| "lr": 0.000174, |
| "step": 88, |
| "tokens_trained": 0.008361632 |
| }, |
| { |
| "epoch": 0.02553191489361702, |
| "grad_norm": 26.557010650634766, |
| "loss": 19.0014, |
| "lr": 0.000178, |
| "step": 90, |
| "tokens_trained": 0.008552328 |
| }, |
| { |
| "epoch": 0.026099290780141844, |
| "grad_norm": 21.156103134155273, |
| "loss": 18.7032, |
| "lr": 0.000182, |
| "step": 92, |
| "tokens_trained": 0.008743136 |
| }, |
| { |
| "epoch": 0.02666666666666667, |
| "grad_norm": 25.7484188079834, |
| "loss": 18.4836, |
| "lr": 0.000186, |
| "step": 94, |
| "tokens_trained": 0.008932056 |
| }, |
| { |
| "epoch": 0.02723404255319149, |
| "grad_norm": 22.27949333190918, |
| "loss": 18.2233, |
| "lr": 0.00019, |
| "step": 96, |
| "tokens_trained": 0.009121608 |
| }, |
| { |
| "epoch": 0.027801418439716313, |
| "grad_norm": 24.9247989654541, |
| "loss": 17.9867, |
| "lr": 0.000194, |
| "step": 98, |
| "tokens_trained": 0.009311008 |
| }, |
| { |
| "epoch": 0.028368794326241134, |
| "grad_norm": 24.302066802978516, |
| "loss": 17.8016, |
| "lr": 0.00019800000000000002, |
| "step": 100, |
| "tokens_trained": 0.009501456 |
| }, |
| { |
| "epoch": 0.02893617021276596, |
| "grad_norm": 23.458459854125977, |
| "loss": 17.6295, |
| "lr": 0.000202, |
| "step": 102, |
| "tokens_trained": 0.009693952 |
| }, |
| { |
| "epoch": 0.02950354609929078, |
| "grad_norm": 24.092350006103516, |
| "loss": 17.4593, |
| "lr": 0.000206, |
| "step": 104, |
| "tokens_trained": 0.009883328 |
| }, |
| { |
| "epoch": 0.030070921985815603, |
| "grad_norm": 22.54726219177246, |
| "loss": 17.2141, |
| "lr": 0.00021, |
| "step": 106, |
| "tokens_trained": 0.01007316 |
| }, |
| { |
| "epoch": 0.030638297872340424, |
| "grad_norm": 21.334760665893555, |
| "loss": 17.044, |
| "lr": 0.000214, |
| "step": 108, |
| "tokens_trained": 0.010266504 |
| }, |
| { |
| "epoch": 0.031205673758865248, |
| "grad_norm": 20.584287643432617, |
| "loss": 16.8919, |
| "lr": 0.000218, |
| "step": 110, |
| "tokens_trained": 0.010455736 |
| }, |
| { |
| "epoch": 0.03177304964539007, |
| "grad_norm": 23.51676368713379, |
| "loss": 16.751, |
| "lr": 0.000222, |
| "step": 112, |
| "tokens_trained": 0.010645208 |
| }, |
| { |
| "epoch": 0.03234042553191489, |
| "grad_norm": 23.278276443481445, |
| "loss": 16.5997, |
| "lr": 0.00022600000000000002, |
| "step": 114, |
| "tokens_trained": 0.010838928 |
| }, |
| { |
| "epoch": 0.032907801418439714, |
| "grad_norm": 25.4830265045166, |
| "loss": 16.3416, |
| "lr": 0.00023, |
| "step": 116, |
| "tokens_trained": 0.011027792 |
| }, |
| { |
| "epoch": 0.03347517730496454, |
| "grad_norm": 29.442413330078125, |
| "loss": 16.24, |
| "lr": 0.00023400000000000002, |
| "step": 118, |
| "tokens_trained": 0.011217456 |
| }, |
| { |
| "epoch": 0.03404255319148936, |
| "grad_norm": 21.77578353881836, |
| "loss": 16.1922, |
| "lr": 0.00023799999999999998, |
| "step": 120, |
| "tokens_trained": 0.01140804 |
| }, |
| { |
| "epoch": 0.03460992907801418, |
| "grad_norm": 27.040719985961914, |
| "loss": 15.9059, |
| "lr": 0.000242, |
| "step": 122, |
| "tokens_trained": 0.011597816 |
| }, |
| { |
| "epoch": 0.035177304964539004, |
| "grad_norm": 24.74480628967285, |
| "loss": 15.7818, |
| "lr": 0.000246, |
| "step": 124, |
| "tokens_trained": 0.011785624 |
| }, |
| { |
| "epoch": 0.03546099290780142, |
| "eval_loss": 15.553059577941895, |
| "eval_runtime": 23.5485, |
| "step": 125, |
| "tokens_trained": 0.011880832 |
| }, |
| { |
| "epoch": 0.03574468085106383, |
| "grad_norm": 23.13482666015625, |
| "loss": 15.5739, |
| "lr": 0.00025, |
| "step": 126, |
| "tokens_trained": 0.011975976 |
| }, |
| { |
| "epoch": 0.03631205673758865, |
| "grad_norm": 22.8618106842041, |
| "loss": 15.4302, |
| "lr": 0.000254, |
| "step": 128, |
| "tokens_trained": 0.012166744 |
| }, |
| { |
| "epoch": 0.03687943262411347, |
| "grad_norm": 26.804859161376953, |
| "loss": 15.3623, |
| "lr": 0.00025800000000000004, |
| "step": 130, |
| "tokens_trained": 0.01235436 |
| }, |
| { |
| "epoch": 0.0374468085106383, |
| "grad_norm": 21.826601028442383, |
| "loss": 15.1465, |
| "lr": 0.000262, |
| "step": 132, |
| "tokens_trained": 0.012544976 |
| }, |
| { |
| "epoch": 0.03801418439716312, |
| "grad_norm": 39.447086334228516, |
| "loss": 15.0137, |
| "lr": 0.000266, |
| "step": 134, |
| "tokens_trained": 0.012736352 |
| }, |
| { |
| "epoch": 0.03858156028368794, |
| "grad_norm": 23.44275665283203, |
| "loss": 14.9355, |
| "lr": 0.00027, |
| "step": 136, |
| "tokens_trained": 0.012925008 |
| }, |
| { |
| "epoch": 0.03914893617021276, |
| "grad_norm": 21.631427764892578, |
| "loss": 14.6825, |
| "lr": 0.00027400000000000005, |
| "step": 138, |
| "tokens_trained": 0.013114672 |
| }, |
| { |
| "epoch": 0.03971631205673759, |
| "grad_norm": 23.674650192260742, |
| "loss": 14.5194, |
| "lr": 0.00027800000000000004, |
| "step": 140, |
| "tokens_trained": 0.013304016 |
| }, |
| { |
| "epoch": 0.04028368794326241, |
| "grad_norm": 23.974796295166016, |
| "loss": 14.4829, |
| "lr": 0.00028199999999999997, |
| "step": 142, |
| "tokens_trained": 0.013496696 |
| }, |
| { |
| "epoch": 0.04085106382978723, |
| "grad_norm": 26.112201690673828, |
| "loss": 14.3027, |
| "lr": 0.00028599999999999996, |
| "step": 144, |
| "tokens_trained": 0.013684816 |
| }, |
| { |
| "epoch": 0.04141843971631206, |
| "grad_norm": 20.67386817932129, |
| "loss": 14.1499, |
| "lr": 0.00029, |
| "step": 146, |
| "tokens_trained": 0.013874832 |
| }, |
| { |
| "epoch": 0.04198581560283688, |
| "grad_norm": 24.253408432006836, |
| "loss": 13.9378, |
| "lr": 0.000294, |
| "step": 148, |
| "tokens_trained": 0.014065056 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 35.716087341308594, |
| "loss": 14.0562, |
| "lr": 0.000298, |
| "step": 150, |
| "tokens_trained": 0.014256784 |
| }, |
| { |
| "epoch": 0.04312056737588652, |
| "grad_norm": 29.414331436157227, |
| "loss": 14.0462, |
| "lr": 0.000302, |
| "step": 152, |
| "tokens_trained": 0.014446312 |
| }, |
| { |
| "epoch": 0.04368794326241135, |
| "grad_norm": 30.687482833862305, |
| "loss": 13.7603, |
| "lr": 0.000306, |
| "step": 154, |
| "tokens_trained": 0.014639872 |
| }, |
| { |
| "epoch": 0.04425531914893617, |
| "grad_norm": 29.806455612182617, |
| "loss": 13.708, |
| "lr": 0.00031, |
| "step": 156, |
| "tokens_trained": 0.014831112 |
| }, |
| { |
| "epoch": 0.04482269503546099, |
| "grad_norm": 24.900897979736328, |
| "loss": 13.548, |
| "lr": 0.000314, |
| "step": 158, |
| "tokens_trained": 0.015021288 |
| }, |
| { |
| "epoch": 0.04539007092198582, |
| "grad_norm": 24.29252815246582, |
| "loss": 13.3119, |
| "lr": 0.00031800000000000003, |
| "step": 160, |
| "tokens_trained": 0.01521228 |
| }, |
| { |
| "epoch": 0.04595744680851064, |
| "grad_norm": 20.68342399597168, |
| "loss": 13.1829, |
| "lr": 0.000322, |
| "step": 162, |
| "tokens_trained": 0.015403688 |
| }, |
| { |
| "epoch": 0.04652482269503546, |
| "grad_norm": 20.822795867919922, |
| "loss": 12.9044, |
| "lr": 0.000326, |
| "step": 164, |
| "tokens_trained": 0.015593416 |
| }, |
| { |
| "epoch": 0.04709219858156028, |
| "grad_norm": 21.689916610717773, |
| "loss": 12.6862, |
| "lr": 0.00033, |
| "step": 166, |
| "tokens_trained": 0.015784408 |
| }, |
| { |
| "epoch": 0.04765957446808511, |
| "grad_norm": 17.873889923095703, |
| "loss": 12.5502, |
| "lr": 0.00033400000000000004, |
| "step": 168, |
| "tokens_trained": 0.0159744 |
| }, |
| { |
| "epoch": 0.04822695035460993, |
| "grad_norm": 18.951616287231445, |
| "loss": 12.308, |
| "lr": 0.00033800000000000003, |
| "step": 170, |
| "tokens_trained": 0.016163736 |
| }, |
| { |
| "epoch": 0.04879432624113475, |
| "grad_norm": 15.146363258361816, |
| "loss": 12.1558, |
| "lr": 0.000342, |
| "step": 172, |
| "tokens_trained": 0.016353832 |
| }, |
| { |
| "epoch": 0.04936170212765958, |
| "grad_norm": 18.336984634399414, |
| "loss": 12.0386, |
| "lr": 0.000346, |
| "step": 174, |
| "tokens_trained": 0.016545088 |
| }, |
| { |
| "epoch": 0.0499290780141844, |
| "grad_norm": 17.221126556396484, |
| "loss": 11.8791, |
| "lr": 0.00035, |
| "step": 176, |
| "tokens_trained": 0.016735704 |
| }, |
| { |
| "epoch": 0.05049645390070922, |
| "grad_norm": 19.362564086914062, |
| "loss": 11.7224, |
| "lr": 0.000354, |
| "step": 178, |
| "tokens_trained": 0.016927944 |
| }, |
| { |
| "epoch": 0.05106382978723404, |
| "grad_norm": 15.564507484436035, |
| "loss": 11.6448, |
| "lr": 0.000358, |
| "step": 180, |
| "tokens_trained": 0.017116096 |
| }, |
| { |
| "epoch": 0.05163120567375887, |
| "grad_norm": 20.711383819580078, |
| "loss": 11.4398, |
| "lr": 0.000362, |
| "step": 182, |
| "tokens_trained": 0.01730564 |
| }, |
| { |
| "epoch": 0.05219858156028369, |
| "grad_norm": 18.627403259277344, |
| "loss": 11.3377, |
| "lr": 0.000366, |
| "step": 184, |
| "tokens_trained": 0.017495864 |
| }, |
| { |
| "epoch": 0.05276595744680851, |
| "grad_norm": 15.00942325592041, |
| "loss": 11.1416, |
| "lr": 0.00037, |
| "step": 186, |
| "tokens_trained": 0.017686464 |
| }, |
| { |
| "epoch": 0.05333333333333334, |
| "grad_norm": 17.070598602294922, |
| "loss": 11.0148, |
| "lr": 0.000374, |
| "step": 188, |
| "tokens_trained": 0.017879488 |
| }, |
| { |
| "epoch": 0.05390070921985816, |
| "grad_norm": 16.101457595825195, |
| "loss": 10.8874, |
| "lr": 0.000378, |
| "step": 190, |
| "tokens_trained": 0.018068312 |
| }, |
| { |
| "epoch": 0.05446808510638298, |
| "grad_norm": 15.613334655761719, |
| "loss": 10.7055, |
| "lr": 0.000382, |
| "step": 192, |
| "tokens_trained": 0.018255752 |
| }, |
| { |
| "epoch": 0.0550354609929078, |
| "grad_norm": 17.671857833862305, |
| "loss": 10.5706, |
| "lr": 0.000386, |
| "step": 194, |
| "tokens_trained": 0.018447096 |
| }, |
| { |
| "epoch": 0.05560283687943263, |
| "grad_norm": 16.080909729003906, |
| "loss": 10.4476, |
| "lr": 0.00039000000000000005, |
| "step": 196, |
| "tokens_trained": 0.018637264 |
| }, |
| { |
| "epoch": 0.05617021276595745, |
| "grad_norm": 15.02849292755127, |
| "loss": 10.2962, |
| "lr": 0.00039400000000000004, |
| "step": 198, |
| "tokens_trained": 0.018827552 |
| }, |
| { |
| "epoch": 0.05673758865248227, |
| "grad_norm": 14.990167617797852, |
| "loss": 10.1912, |
| "lr": 0.000398, |
| "step": 200, |
| "tokens_trained": 0.019018 |
| }, |
| { |
| "epoch": 0.05730496453900709, |
| "grad_norm": 15.390633583068848, |
| "loss": 10.0442, |
| "lr": 0.000402, |
| "step": 202, |
| "tokens_trained": 0.019209864 |
| }, |
| { |
| "epoch": 0.05787234042553192, |
| "grad_norm": 16.871570587158203, |
| "loss": 9.9685, |
| "lr": 0.00040600000000000006, |
| "step": 204, |
| "tokens_trained": 0.019400176 |
| }, |
| { |
| "epoch": 0.05843971631205674, |
| "grad_norm": 20.16544532775879, |
| "loss": 9.8531, |
| "lr": 0.00041, |
| "step": 206, |
| "tokens_trained": 0.019589424 |
| }, |
| { |
| "epoch": 0.05900709219858156, |
| "grad_norm": 16.825023651123047, |
| "loss": 9.7777, |
| "lr": 0.000414, |
| "step": 208, |
| "tokens_trained": 0.019779112 |
| }, |
| { |
| "epoch": 0.059574468085106386, |
| "grad_norm": 16.43510627746582, |
| "loss": 9.6122, |
| "lr": 0.00041799999999999997, |
| "step": 210, |
| "tokens_trained": 0.019970048 |
| }, |
| { |
| "epoch": 0.060141843971631206, |
| "grad_norm": 17.340473175048828, |
| "loss": 9.4859, |
| "lr": 0.000422, |
| "step": 212, |
| "tokens_trained": 0.020160968 |
| }, |
| { |
| "epoch": 0.06070921985815603, |
| "grad_norm": 15.019119262695312, |
| "loss": 9.3656, |
| "lr": 0.000426, |
| "step": 214, |
| "tokens_trained": 0.020349664 |
| }, |
| { |
| "epoch": 0.06127659574468085, |
| "grad_norm": 13.379194259643555, |
| "loss": 9.2348, |
| "lr": 0.00043, |
| "step": 216, |
| "tokens_trained": 0.020538192 |
| }, |
| { |
| "epoch": 0.061843971631205676, |
| "grad_norm": 16.71472930908203, |
| "loss": 9.2258, |
| "lr": 0.00043400000000000003, |
| "step": 218, |
| "tokens_trained": 0.020728936 |
| }, |
| { |
| "epoch": 0.062411347517730496, |
| "grad_norm": 12.743139266967773, |
| "loss": 9.0569, |
| "lr": 0.000438, |
| "step": 220, |
| "tokens_trained": 0.020917472 |
| }, |
| { |
| "epoch": 0.06297872340425532, |
| "grad_norm": 15.739934921264648, |
| "loss": 8.9623, |
| "lr": 0.000442, |
| "step": 222, |
| "tokens_trained": 0.02110928 |
| }, |
| { |
| "epoch": 0.06354609929078014, |
| "grad_norm": 14.23620891571045, |
| "loss": 8.8201, |
| "lr": 0.000446, |
| "step": 224, |
| "tokens_trained": 0.021300168 |
| }, |
| { |
| "epoch": 0.06411347517730497, |
| "grad_norm": 13.005538940429688, |
| "loss": 8.7235, |
| "lr": 0.00045000000000000004, |
| "step": 226, |
| "tokens_trained": 0.021490272 |
| }, |
| { |
| "epoch": 0.06468085106382979, |
| "grad_norm": 17.17629051208496, |
| "loss": 8.6907, |
| "lr": 0.00045400000000000003, |
| "step": 228, |
| "tokens_trained": 0.021681552 |
| }, |
| { |
| "epoch": 0.06524822695035461, |
| "grad_norm": 14.430739402770996, |
| "loss": 8.6196, |
| "lr": 0.000458, |
| "step": 230, |
| "tokens_trained": 0.02187236 |
| }, |
| { |
| "epoch": 0.06581560283687943, |
| "grad_norm": 14.575714111328125, |
| "loss": 8.4741, |
| "lr": 0.000462, |
| "step": 232, |
| "tokens_trained": 0.022061976 |
| }, |
| { |
| "epoch": 0.06638297872340425, |
| "grad_norm": 13.892754554748535, |
| "loss": 8.4118, |
| "lr": 0.00046600000000000005, |
| "step": 234, |
| "tokens_trained": 0.022252008 |
| }, |
| { |
| "epoch": 0.06695035460992908, |
| "grad_norm": 11.58240795135498, |
| "loss": 8.2781, |
| "lr": 0.00047, |
| "step": 236, |
| "tokens_trained": 0.02244284 |
| }, |
| { |
| "epoch": 0.0675177304964539, |
| "grad_norm": 13.022644996643066, |
| "loss": 8.2139, |
| "lr": 0.000474, |
| "step": 238, |
| "tokens_trained": 0.022631152 |
| }, |
| { |
| "epoch": 0.06808510638297872, |
| "grad_norm": 11.844677925109863, |
| "loss": 8.1134, |
| "lr": 0.00047799999999999996, |
| "step": 240, |
| "tokens_trained": 0.022821096 |
| }, |
| { |
| "epoch": 0.06865248226950355, |
| "grad_norm": 13.878067016601562, |
| "loss": 8.0221, |
| "lr": 0.000482, |
| "step": 242, |
| "tokens_trained": 0.023011656 |
| }, |
| { |
| "epoch": 0.06921985815602837, |
| "grad_norm": 12.34648323059082, |
| "loss": 7.9755, |
| "lr": 0.000486, |
| "step": 244, |
| "tokens_trained": 0.023201 |
| }, |
| { |
| "epoch": 0.06978723404255319, |
| "grad_norm": 14.238297462463379, |
| "loss": 7.8969, |
| "lr": 0.00049, |
| "step": 246, |
| "tokens_trained": 0.023391128 |
| }, |
| { |
| "epoch": 0.07035460992907801, |
| "grad_norm": 14.386019706726074, |
| "loss": 7.8627, |
| "lr": 0.000494, |
| "step": 248, |
| "tokens_trained": 0.023581768 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "grad_norm": 13.623086929321289, |
| "loss": 7.7568, |
| "lr": 0.000498, |
| "step": 250, |
| "tokens_trained": 0.023771248 |
| }, |
| { |
| "epoch": 0.07092198581560284, |
| "eval_loss": 7.70297384262085, |
| "eval_runtime": 21.3853, |
| "step": 250, |
| "tokens_trained": 0.023771248 |
| }, |
| { |
| "epoch": 0.07148936170212766, |
| "grad_norm": 14.347646713256836, |
| "loss": 7.6842, |
| "lr": 0.0005020000000000001, |
| "step": 252, |
| "tokens_trained": 0.023961056 |
| }, |
| { |
| "epoch": 0.07205673758865248, |
| "grad_norm": 12.5592041015625, |
| "loss": 7.6516, |
| "lr": 0.000506, |
| "step": 254, |
| "tokens_trained": 0.024150968 |
| }, |
| { |
| "epoch": 0.0726241134751773, |
| "grad_norm": 13.219141960144043, |
| "loss": 7.5789, |
| "lr": 0.00051, |
| "step": 256, |
| "tokens_trained": 0.024340072 |
| }, |
| { |
| "epoch": 0.07319148936170213, |
| "grad_norm": 12.654081344604492, |
| "loss": 7.5369, |
| "lr": 0.000514, |
| "step": 258, |
| "tokens_trained": 0.024529296 |
| }, |
| { |
| "epoch": 0.07375886524822695, |
| "grad_norm": 13.136971473693848, |
| "loss": 7.4949, |
| "lr": 0.000518, |
| "step": 260, |
| "tokens_trained": 0.024719688 |
| }, |
| { |
| "epoch": 0.07432624113475177, |
| "grad_norm": 12.680288314819336, |
| "loss": 7.3904, |
| "lr": 0.000522, |
| "step": 262, |
| "tokens_trained": 0.024909632 |
| }, |
| { |
| "epoch": 0.0748936170212766, |
| "grad_norm": 12.754518508911133, |
| "loss": 7.3514, |
| "lr": 0.000526, |
| "step": 264, |
| "tokens_trained": 0.025098416 |
| }, |
| { |
| "epoch": 0.07546099290780142, |
| "grad_norm": 13.22311019897461, |
| "loss": 7.2951, |
| "lr": 0.0005300000000000001, |
| "step": 266, |
| "tokens_trained": 0.025287344 |
| }, |
| { |
| "epoch": 0.07602836879432624, |
| "grad_norm": 12.11903190612793, |
| "loss": 7.2229, |
| "lr": 0.0005340000000000001, |
| "step": 268, |
| "tokens_trained": 0.025477152 |
| }, |
| { |
| "epoch": 0.07659574468085106, |
| "grad_norm": 13.771833419799805, |
| "loss": 7.1815, |
| "lr": 0.0005380000000000001, |
| "step": 270, |
| "tokens_trained": 0.025668288 |
| }, |
| { |
| "epoch": 0.07716312056737588, |
| "grad_norm": 11.756864547729492, |
| "loss": 7.1669, |
| "lr": 0.0005420000000000001, |
| "step": 272, |
| "tokens_trained": 0.025858528 |
| }, |
| { |
| "epoch": 0.0777304964539007, |
| "grad_norm": 13.613094329833984, |
| "loss": 7.1079, |
| "lr": 0.000546, |
| "step": 274, |
| "tokens_trained": 0.026048616 |
| }, |
| { |
| "epoch": 0.07829787234042553, |
| "grad_norm": 10.001923561096191, |
| "loss": 7.0508, |
| "lr": 0.00055, |
| "step": 276, |
| "tokens_trained": 0.026236944 |
| }, |
| { |
| "epoch": 0.07886524822695036, |
| "grad_norm": 14.262083053588867, |
| "loss": 6.9955, |
| "lr": 0.000554, |
| "step": 278, |
| "tokens_trained": 0.026426848 |
| }, |
| { |
| "epoch": 0.07943262411347518, |
| "grad_norm": 12.381136894226074, |
| "loss": 6.9831, |
| "lr": 0.000558, |
| "step": 280, |
| "tokens_trained": 0.026616784 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 9.815845489501953, |
| "loss": 6.917, |
| "lr": 0.0005620000000000001, |
| "step": 282, |
| "tokens_trained": 0.026805176 |
| }, |
| { |
| "epoch": 0.08056737588652482, |
| "grad_norm": 11.669997215270996, |
| "loss": 6.8999, |
| "lr": 0.000566, |
| "step": 284, |
| "tokens_trained": 0.02699488 |
| }, |
| { |
| "epoch": 0.08113475177304964, |
| "grad_norm": 12.770941734313965, |
| "loss": 6.8998, |
| "lr": 0.00057, |
| "step": 286, |
| "tokens_trained": 0.027185784 |
| }, |
| { |
| "epoch": 0.08170212765957446, |
| "grad_norm": 15.572457313537598, |
| "loss": 6.841, |
| "lr": 0.000574, |
| "step": 288, |
| "tokens_trained": 0.027375896 |
| }, |
| { |
| "epoch": 0.08226950354609928, |
| "grad_norm": 10.980833053588867, |
| "loss": 6.8545, |
| "lr": 0.000578, |
| "step": 290, |
| "tokens_trained": 0.02756588 |
| }, |
| { |
| "epoch": 0.08283687943262412, |
| "grad_norm": 11.678337097167969, |
| "loss": 6.7853, |
| "lr": 0.0005819999999999999, |
| "step": 292, |
| "tokens_trained": 0.02775456 |
| }, |
| { |
| "epoch": 0.08340425531914894, |
| "grad_norm": 9.77885913848877, |
| "loss": 6.7465, |
| "lr": 0.0005859999999999999, |
| "step": 294, |
| "tokens_trained": 0.027942856 |
| }, |
| { |
| "epoch": 0.08397163120567376, |
| "grad_norm": 13.62730884552002, |
| "loss": 6.7276, |
| "lr": 0.00059, |
| "step": 296, |
| "tokens_trained": 0.028133152 |
| }, |
| { |
| "epoch": 0.08453900709219858, |
| "grad_norm": 10.644404411315918, |
| "loss": 6.6802, |
| "lr": 0.000594, |
| "step": 298, |
| "tokens_trained": 0.028322192 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 11.130610466003418, |
| "loss": 6.6548, |
| "lr": 0.000598, |
| "step": 300, |
| "tokens_trained": 0.0285122 |
| }, |
| { |
| "epoch": 0.08567375886524822, |
| "grad_norm": 11.557455062866211, |
| "loss": 6.6155, |
| "lr": 0.000602, |
| "step": 302, |
| "tokens_trained": 0.028699792 |
| }, |
| { |
| "epoch": 0.08624113475177304, |
| "grad_norm": 9.276884078979492, |
| "loss": 6.5989, |
| "lr": 0.000606, |
| "step": 304, |
| "tokens_trained": 0.028889896 |
| }, |
| { |
| "epoch": 0.08680851063829788, |
| "grad_norm": 9.616179466247559, |
| "loss": 6.5773, |
| "lr": 0.00061, |
| "step": 306, |
| "tokens_trained": 0.029082272 |
| }, |
| { |
| "epoch": 0.0873758865248227, |
| "grad_norm": 10.575953483581543, |
| "loss": 6.5358, |
| "lr": 0.000614, |
| "step": 308, |
| "tokens_trained": 0.029273352 |
| }, |
| { |
| "epoch": 0.08794326241134752, |
| "grad_norm": 9.089850425720215, |
| "loss": 6.5088, |
| "lr": 0.0006180000000000001, |
| "step": 310, |
| "tokens_trained": 0.029463848 |
| }, |
| { |
| "epoch": 0.08851063829787234, |
| "grad_norm": 9.090002059936523, |
| "loss": 6.4849, |
| "lr": 0.000622, |
| "step": 312, |
| "tokens_trained": 0.029653272 |
| }, |
| { |
| "epoch": 0.08907801418439716, |
| "grad_norm": 12.038308143615723, |
| "loss": 6.4624, |
| "lr": 0.000626, |
| "step": 314, |
| "tokens_trained": 0.029841928 |
| }, |
| { |
| "epoch": 0.08964539007092198, |
| "grad_norm": 9.073866844177246, |
| "loss": 6.4515, |
| "lr": 0.00063, |
| "step": 316, |
| "tokens_trained": 0.030029808 |
| }, |
| { |
| "epoch": 0.0902127659574468, |
| "grad_norm": 8.727197647094727, |
| "loss": 6.43, |
| "lr": 0.000634, |
| "step": 318, |
| "tokens_trained": 0.030221288 |
| }, |
| { |
| "epoch": 0.09078014184397164, |
| "grad_norm": 14.558151245117188, |
| "loss": 6.4487, |
| "lr": 0.000638, |
| "step": 320, |
| "tokens_trained": 0.030410872 |
| }, |
| { |
| "epoch": 0.09134751773049646, |
| "grad_norm": 9.98914623260498, |
| "loss": 6.4279, |
| "lr": 0.000642, |
| "step": 322, |
| "tokens_trained": 0.030602376 |
| }, |
| { |
| "epoch": 0.09191489361702128, |
| "grad_norm": 10.395442962646484, |
| "loss": 6.4311, |
| "lr": 0.000646, |
| "step": 324, |
| "tokens_trained": 0.030792968 |
| }, |
| { |
| "epoch": 0.0924822695035461, |
| "grad_norm": 10.8250093460083, |
| "loss": 6.3726, |
| "lr": 0.0006500000000000001, |
| "step": 326, |
| "tokens_trained": 0.030982944 |
| }, |
| { |
| "epoch": 0.09304964539007092, |
| "grad_norm": 9.73416805267334, |
| "loss": 6.34, |
| "lr": 0.0006540000000000001, |
| "step": 328, |
| "tokens_trained": 0.031174928 |
| }, |
| { |
| "epoch": 0.09361702127659574, |
| "grad_norm": 8.596503257751465, |
| "loss": 6.3322, |
| "lr": 0.0006580000000000001, |
| "step": 330, |
| "tokens_trained": 0.031364288 |
| }, |
| { |
| "epoch": 0.09418439716312056, |
| "grad_norm": 8.49472427368164, |
| "loss": 6.3096, |
| "lr": 0.000662, |
| "step": 332, |
| "tokens_trained": 0.03155376 |
| }, |
| { |
| "epoch": 0.0947517730496454, |
| "grad_norm": 7.857503414154053, |
| "loss": 6.2368, |
| "lr": 0.000666, |
| "step": 334, |
| "tokens_trained": 0.031744368 |
| }, |
| { |
| "epoch": 0.09531914893617022, |
| "grad_norm": 9.007513999938965, |
| "loss": 6.198, |
| "lr": 0.00067, |
| "step": 336, |
| "tokens_trained": 0.031934136 |
| }, |
| { |
| "epoch": 0.09588652482269504, |
| "grad_norm": 8.185524940490723, |
| "loss": 6.2328, |
| "lr": 0.000674, |
| "step": 338, |
| "tokens_trained": 0.032124984 |
| }, |
| { |
| "epoch": 0.09645390070921986, |
| "grad_norm": 8.784396171569824, |
| "loss": 6.1945, |
| "lr": 0.0006780000000000001, |
| "step": 340, |
| "tokens_trained": 0.032316016 |
| }, |
| { |
| "epoch": 0.09702127659574468, |
| "grad_norm": 8.642311096191406, |
| "loss": 6.218, |
| "lr": 0.0006820000000000001, |
| "step": 342, |
| "tokens_trained": 0.032506224 |
| }, |
| { |
| "epoch": 0.0975886524822695, |
| "grad_norm": 8.493780136108398, |
| "loss": 6.194, |
| "lr": 0.0006860000000000001, |
| "step": 344, |
| "tokens_trained": 0.032696152 |
| }, |
| { |
| "epoch": 0.09815602836879432, |
| "grad_norm": 9.120508193969727, |
| "loss": 6.2241, |
| "lr": 0.00069, |
| "step": 346, |
| "tokens_trained": 0.032885688 |
| }, |
| { |
| "epoch": 0.09872340425531916, |
| "grad_norm": 9.34500503540039, |
| "loss": 6.1548, |
| "lr": 0.000694, |
| "step": 348, |
| "tokens_trained": 0.03307568 |
| }, |
| { |
| "epoch": 0.09929078014184398, |
| "grad_norm": 7.483356952667236, |
| "loss": 6.1282, |
| "lr": 0.0006979999999999999, |
| "step": 350, |
| "tokens_trained": 0.033267208 |
| }, |
| { |
| "epoch": 0.0998581560283688, |
| "grad_norm": 7.974069118499756, |
| "loss": 6.1032, |
| "lr": 0.0007019999999999999, |
| "step": 352, |
| "tokens_trained": 0.033458144 |
| }, |
| { |
| "epoch": 0.10042553191489362, |
| "grad_norm": 8.247384071350098, |
| "loss": 6.1698, |
| "lr": 0.0007059999999999999, |
| "step": 354, |
| "tokens_trained": 0.033650352 |
| }, |
| { |
| "epoch": 0.10099290780141844, |
| "grad_norm": 8.554885864257812, |
| "loss": 6.1429, |
| "lr": 0.00071, |
| "step": 356, |
| "tokens_trained": 0.033840232 |
| }, |
| { |
| "epoch": 0.10156028368794326, |
| "grad_norm": 7.209281921386719, |
| "loss": 6.0997, |
| "lr": 0.000714, |
| "step": 358, |
| "tokens_trained": 0.034030032 |
| }, |
| { |
| "epoch": 0.10212765957446808, |
| "grad_norm": 8.660383224487305, |
| "loss": 6.1497, |
| "lr": 0.000718, |
| "step": 360, |
| "tokens_trained": 0.034218592 |
| }, |
| { |
| "epoch": 0.10269503546099291, |
| "grad_norm": 9.382761001586914, |
| "loss": 6.0665, |
| "lr": 0.000722, |
| "step": 362, |
| "tokens_trained": 0.034408408 |
| }, |
| { |
| "epoch": 0.10326241134751774, |
| "grad_norm": 6.915714263916016, |
| "loss": 6.0636, |
| "lr": 0.000726, |
| "step": 364, |
| "tokens_trained": 0.034600016 |
| }, |
| { |
| "epoch": 0.10382978723404256, |
| "grad_norm": 7.8990631103515625, |
| "loss": 6.0975, |
| "lr": 0.00073, |
| "step": 366, |
| "tokens_trained": 0.034790792 |
| }, |
| { |
| "epoch": 0.10439716312056738, |
| "grad_norm": 8.859809875488281, |
| "loss": 6.0754, |
| "lr": 0.000734, |
| "step": 368, |
| "tokens_trained": 0.034981304 |
| }, |
| { |
| "epoch": 0.1049645390070922, |
| "grad_norm": 7.392801761627197, |
| "loss": 6.039, |
| "lr": 0.000738, |
| "step": 370, |
| "tokens_trained": 0.03516956 |
| }, |
| { |
| "epoch": 0.10553191489361702, |
| "grad_norm": 9.427324295043945, |
| "loss": 6.084, |
| "lr": 0.000742, |
| "step": 372, |
| "tokens_trained": 0.035358816 |
| }, |
| { |
| "epoch": 0.10609929078014184, |
| "grad_norm": 7.168910503387451, |
| "loss": 6.0498, |
| "lr": 0.000746, |
| "step": 374, |
| "tokens_trained": 0.035548016 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "eval_loss": 6.038269996643066, |
| "eval_runtime": 21.3445, |
| "step": 375, |
| "tokens_trained": 0.035644104 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 7650, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 125, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|