| { |
| "best_global_step": 125, |
| "best_metric": 15.553059577941895, |
| "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/Gencode-BPE/checkpoint-125", |
| "epoch": 0.03546099290780142, |
| "eval_steps": 125, |
| "global_step": 125, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005673758865248227, |
| "grad_norm": 1275.0146484375, |
| "loss": 281.4781, |
| "lr": 2e-06, |
| "step": 2, |
| "tokens_trained": 0.000192256 |
| }, |
| { |
| "epoch": 0.0011347517730496454, |
| "grad_norm": 1437.579833984375, |
| "loss": 267.2211, |
| "lr": 6e-06, |
| "step": 4, |
| "tokens_trained": 0.000382024 |
| }, |
| { |
| "epoch": 0.001702127659574468, |
| "grad_norm": 1719.271484375, |
| "loss": 219.3822, |
| "lr": 1e-05, |
| "step": 6, |
| "tokens_trained": 0.00057072 |
| }, |
| { |
| "epoch": 0.0022695035460992908, |
| "grad_norm": 1444.94970703125, |
| "loss": 133.8172, |
| "lr": 1.4e-05, |
| "step": 8, |
| "tokens_trained": 0.000761336 |
| }, |
| { |
| "epoch": 0.0028368794326241137, |
| "grad_norm": 238.9689178466797, |
| "loss": 90.8177, |
| "lr": 1.8e-05, |
| "step": 10, |
| "tokens_trained": 0.000953248 |
| }, |
| { |
| "epoch": 0.003404255319148936, |
| "grad_norm": 158.53497314453125, |
| "loss": 84.6922, |
| "lr": 2.2e-05, |
| "step": 12, |
| "tokens_trained": 0.00114424 |
| }, |
| { |
| "epoch": 0.003971631205673759, |
| "grad_norm": 146.10595703125, |
| "loss": 76.7055, |
| "lr": 2.6e-05, |
| "step": 14, |
| "tokens_trained": 0.001334104 |
| }, |
| { |
| "epoch": 0.0045390070921985815, |
| "grad_norm": 140.69964599609375, |
| "loss": 67.9952, |
| "lr": 3e-05, |
| "step": 16, |
| "tokens_trained": 0.00152392 |
| }, |
| { |
| "epoch": 0.005106382978723404, |
| "grad_norm": 108.80303192138672, |
| "loss": 57.8088, |
| "lr": 3.4000000000000007e-05, |
| "step": 18, |
| "tokens_trained": 0.001713872 |
| }, |
| { |
| "epoch": 0.005673758865248227, |
| "grad_norm": 106.82334899902344, |
| "loss": 48.6585, |
| "lr": 3.8e-05, |
| "step": 20, |
| "tokens_trained": 0.001903976 |
| }, |
| { |
| "epoch": 0.00624113475177305, |
| "grad_norm": 93.58769989013672, |
| "loss": 41.7984, |
| "lr": 4.2000000000000004e-05, |
| "step": 22, |
| "tokens_trained": 0.002094288 |
| }, |
| { |
| "epoch": 0.006808510638297872, |
| "grad_norm": 87.5854721069336, |
| "loss": 37.6201, |
| "lr": 4.6e-05, |
| "step": 24, |
| "tokens_trained": 0.002282496 |
| }, |
| { |
| "epoch": 0.007375886524822695, |
| "grad_norm": 84.12794494628906, |
| "loss": 35.0091, |
| "lr": 5e-05, |
| "step": 26, |
| "tokens_trained": 0.00247068 |
| }, |
| { |
| "epoch": 0.007943262411347518, |
| "grad_norm": 79.77535247802734, |
| "loss": 33.2253, |
| "lr": 5.4e-05, |
| "step": 28, |
| "tokens_trained": 0.002662888 |
| }, |
| { |
| "epoch": 0.00851063829787234, |
| "grad_norm": 66.42157745361328, |
| "loss": 32.0682, |
| "lr": 5.800000000000001e-05, |
| "step": 30, |
| "tokens_trained": 0.002851968 |
| }, |
| { |
| "epoch": 0.009078014184397163, |
| "grad_norm": 87.52485656738281, |
| "loss": 30.893, |
| "lr": 6.2e-05, |
| "step": 32, |
| "tokens_trained": 0.003041384 |
| }, |
| { |
| "epoch": 0.009645390070921986, |
| "grad_norm": 58.33614730834961, |
| "loss": 30.0513, |
| "lr": 6.6e-05, |
| "step": 34, |
| "tokens_trained": 0.003232872 |
| }, |
| { |
| "epoch": 0.010212765957446808, |
| "grad_norm": 54.629329681396484, |
| "loss": 29.0115, |
| "lr": 7.000000000000001e-05, |
| "step": 36, |
| "tokens_trained": 0.003423824 |
| }, |
| { |
| "epoch": 0.01078014184397163, |
| "grad_norm": 52.79097366333008, |
| "loss": 28.2084, |
| "lr": 7.4e-05, |
| "step": 38, |
| "tokens_trained": 0.003613232 |
| }, |
| { |
| "epoch": 0.011347517730496455, |
| "grad_norm": 54.481224060058594, |
| "loss": 27.4345, |
| "lr": 7.8e-05, |
| "step": 40, |
| "tokens_trained": 0.003800952 |
| }, |
| { |
| "epoch": 0.011914893617021277, |
| "grad_norm": 58.7069091796875, |
| "loss": 26.5936, |
| "lr": 8.2e-05, |
| "step": 42, |
| "tokens_trained": 0.003991512 |
| }, |
| { |
| "epoch": 0.0124822695035461, |
| "grad_norm": 49.30760955810547, |
| "loss": 26.0608, |
| "lr": 8.599999999999999e-05, |
| "step": 44, |
| "tokens_trained": 0.004180648 |
| }, |
| { |
| "epoch": 0.013049645390070922, |
| "grad_norm": 61.902587890625, |
| "loss": 25.5363, |
| "lr": 8.999999999999999e-05, |
| "step": 46, |
| "tokens_trained": 0.00437148 |
| }, |
| { |
| "epoch": 0.013617021276595745, |
| "grad_norm": 46.76111602783203, |
| "loss": 24.9599, |
| "lr": 9.400000000000001e-05, |
| "step": 48, |
| "tokens_trained": 0.004559344 |
| }, |
| { |
| "epoch": 0.014184397163120567, |
| "grad_norm": 57.06416702270508, |
| "loss": 24.4087, |
| "lr": 9.800000000000001e-05, |
| "step": 50, |
| "tokens_trained": 0.004749256 |
| }, |
| { |
| "epoch": 0.01475177304964539, |
| "grad_norm": 44.798736572265625, |
| "loss": 24.1444, |
| "lr": 0.000102, |
| "step": 52, |
| "tokens_trained": 0.004940192 |
| }, |
| { |
| "epoch": 0.015319148936170212, |
| "grad_norm": 40.29296875, |
| "loss": 23.6011, |
| "lr": 0.000106, |
| "step": 54, |
| "tokens_trained": 0.005130304 |
| }, |
| { |
| "epoch": 0.015886524822695036, |
| "grad_norm": 38.75099563598633, |
| "loss": 23.1781, |
| "lr": 0.00011, |
| "step": 56, |
| "tokens_trained": 0.005322864 |
| }, |
| { |
| "epoch": 0.016453900709219857, |
| "grad_norm": 37.470706939697266, |
| "loss": 22.9136, |
| "lr": 0.000114, |
| "step": 58, |
| "tokens_trained": 0.00551392 |
| }, |
| { |
| "epoch": 0.01702127659574468, |
| "grad_norm": 35.1894645690918, |
| "loss": 22.6336, |
| "lr": 0.000118, |
| "step": 60, |
| "tokens_trained": 0.005703096 |
| }, |
| { |
| "epoch": 0.017588652482269502, |
| "grad_norm": 35.136573791503906, |
| "loss": 22.2998, |
| "lr": 0.000122, |
| "step": 62, |
| "tokens_trained": 0.005892448 |
| }, |
| { |
| "epoch": 0.018156028368794326, |
| "grad_norm": 38.05111312866211, |
| "loss": 21.9401, |
| "lr": 0.000126, |
| "step": 64, |
| "tokens_trained": 0.006081656 |
| }, |
| { |
| "epoch": 0.01872340425531915, |
| "grad_norm": 35.63850021362305, |
| "loss": 21.7206, |
| "lr": 0.00013000000000000002, |
| "step": 66, |
| "tokens_trained": 0.006273032 |
| }, |
| { |
| "epoch": 0.01929078014184397, |
| "grad_norm": 34.327667236328125, |
| "loss": 21.4051, |
| "lr": 0.000134, |
| "step": 68, |
| "tokens_trained": 0.00646304 |
| }, |
| { |
| "epoch": 0.019858156028368795, |
| "grad_norm": 31.457059860229492, |
| "loss": 21.0774, |
| "lr": 0.00013800000000000002, |
| "step": 70, |
| "tokens_trained": 0.006652832 |
| }, |
| { |
| "epoch": 0.020425531914893616, |
| "grad_norm": 34.91672897338867, |
| "loss": 20.8718, |
| "lr": 0.00014199999999999998, |
| "step": 72, |
| "tokens_trained": 0.006843512 |
| }, |
| { |
| "epoch": 0.02099290780141844, |
| "grad_norm": 27.959579467773438, |
| "loss": 20.6932, |
| "lr": 0.000146, |
| "step": 74, |
| "tokens_trained": 0.007033584 |
| }, |
| { |
| "epoch": 0.02156028368794326, |
| "grad_norm": 26.569866180419922, |
| "loss": 20.4072, |
| "lr": 0.00015, |
| "step": 76, |
| "tokens_trained": 0.007224032 |
| }, |
| { |
| "epoch": 0.022127659574468085, |
| "grad_norm": 28.009904861450195, |
| "loss": 20.2229, |
| "lr": 0.000154, |
| "step": 78, |
| "tokens_trained": 0.00741368 |
| }, |
| { |
| "epoch": 0.02269503546099291, |
| "grad_norm": 28.892959594726562, |
| "loss": 20.0528, |
| "lr": 0.000158, |
| "step": 80, |
| "tokens_trained": 0.00760416 |
| }, |
| { |
| "epoch": 0.02326241134751773, |
| "grad_norm": 31.58131980895996, |
| "loss": 19.8016, |
| "lr": 0.000162, |
| "step": 82, |
| "tokens_trained": 0.007793952 |
| }, |
| { |
| "epoch": 0.023829787234042554, |
| "grad_norm": 31.01254653930664, |
| "loss": 19.634, |
| "lr": 0.00016600000000000002, |
| "step": 84, |
| "tokens_trained": 0.007980792 |
| }, |
| { |
| "epoch": 0.024397163120567375, |
| "grad_norm": 28.732515335083008, |
| "loss": 19.3777, |
| "lr": 0.00017, |
| "step": 86, |
| "tokens_trained": 0.008171968 |
| }, |
| { |
| "epoch": 0.0249645390070922, |
| "grad_norm": 24.31264877319336, |
| "loss": 19.1346, |
| "lr": 0.000174, |
| "step": 88, |
| "tokens_trained": 0.008361632 |
| }, |
| { |
| "epoch": 0.02553191489361702, |
| "grad_norm": 26.557010650634766, |
| "loss": 19.0014, |
| "lr": 0.000178, |
| "step": 90, |
| "tokens_trained": 0.008552328 |
| }, |
| { |
| "epoch": 0.026099290780141844, |
| "grad_norm": 21.156103134155273, |
| "loss": 18.7032, |
| "lr": 0.000182, |
| "step": 92, |
| "tokens_trained": 0.008743136 |
| }, |
| { |
| "epoch": 0.02666666666666667, |
| "grad_norm": 25.7484188079834, |
| "loss": 18.4836, |
| "lr": 0.000186, |
| "step": 94, |
| "tokens_trained": 0.008932056 |
| }, |
| { |
| "epoch": 0.02723404255319149, |
| "grad_norm": 22.27949333190918, |
| "loss": 18.2233, |
| "lr": 0.00019, |
| "step": 96, |
| "tokens_trained": 0.009121608 |
| }, |
| { |
| "epoch": 0.027801418439716313, |
| "grad_norm": 24.9247989654541, |
| "loss": 17.9867, |
| "lr": 0.000194, |
| "step": 98, |
| "tokens_trained": 0.009311008 |
| }, |
| { |
| "epoch": 0.028368794326241134, |
| "grad_norm": 24.302066802978516, |
| "loss": 17.8016, |
| "lr": 0.00019800000000000002, |
| "step": 100, |
| "tokens_trained": 0.009501456 |
| }, |
| { |
| "epoch": 0.02893617021276596, |
| "grad_norm": 23.458459854125977, |
| "loss": 17.6295, |
| "lr": 0.000202, |
| "step": 102, |
| "tokens_trained": 0.009693952 |
| }, |
| { |
| "epoch": 0.02950354609929078, |
| "grad_norm": 24.092350006103516, |
| "loss": 17.4593, |
| "lr": 0.000206, |
| "step": 104, |
| "tokens_trained": 0.009883328 |
| }, |
| { |
| "epoch": 0.030070921985815603, |
| "grad_norm": 22.54726219177246, |
| "loss": 17.2141, |
| "lr": 0.00021, |
| "step": 106, |
| "tokens_trained": 0.01007316 |
| }, |
| { |
| "epoch": 0.030638297872340424, |
| "grad_norm": 21.334760665893555, |
| "loss": 17.044, |
| "lr": 0.000214, |
| "step": 108, |
| "tokens_trained": 0.010266504 |
| }, |
| { |
| "epoch": 0.031205673758865248, |
| "grad_norm": 20.584287643432617, |
| "loss": 16.8919, |
| "lr": 0.000218, |
| "step": 110, |
| "tokens_trained": 0.010455736 |
| }, |
| { |
| "epoch": 0.03177304964539007, |
| "grad_norm": 23.51676368713379, |
| "loss": 16.751, |
| "lr": 0.000222, |
| "step": 112, |
| "tokens_trained": 0.010645208 |
| }, |
| { |
| "epoch": 0.03234042553191489, |
| "grad_norm": 23.278276443481445, |
| "loss": 16.5997, |
| "lr": 0.00022600000000000002, |
| "step": 114, |
| "tokens_trained": 0.010838928 |
| }, |
| { |
| "epoch": 0.032907801418439714, |
| "grad_norm": 25.4830265045166, |
| "loss": 16.3416, |
| "lr": 0.00023, |
| "step": 116, |
| "tokens_trained": 0.011027792 |
| }, |
| { |
| "epoch": 0.03347517730496454, |
| "grad_norm": 29.442413330078125, |
| "loss": 16.24, |
| "lr": 0.00023400000000000002, |
| "step": 118, |
| "tokens_trained": 0.011217456 |
| }, |
| { |
| "epoch": 0.03404255319148936, |
| "grad_norm": 21.77578353881836, |
| "loss": 16.1922, |
| "lr": 0.00023799999999999998, |
| "step": 120, |
| "tokens_trained": 0.01140804 |
| }, |
| { |
| "epoch": 0.03460992907801418, |
| "grad_norm": 27.040719985961914, |
| "loss": 15.9059, |
| "lr": 0.000242, |
| "step": 122, |
| "tokens_trained": 0.011597816 |
| }, |
| { |
| "epoch": 0.035177304964539004, |
| "grad_norm": 24.74480628967285, |
| "loss": 15.7818, |
| "lr": 0.000246, |
| "step": 124, |
| "tokens_trained": 0.011785624 |
| }, |
| { |
| "epoch": 0.03546099290780142, |
| "eval_loss": 15.553059577941895, |
| "eval_runtime": 23.5485, |
| "step": 125, |
| "tokens_trained": 0.011880832 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 7650, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 125, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|