{ "best_global_step": 125, "best_metric": 15.553059577941895, "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/Gencode-BPE/checkpoint-125", "epoch": 0.03546099290780142, "eval_steps": 125, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005673758865248227, "grad_norm": 1275.0146484375, "loss": 281.4781, "lr": 2e-06, "step": 2, "tokens_trained": 0.000192256 }, { "epoch": 0.0011347517730496454, "grad_norm": 1437.579833984375, "loss": 267.2211, "lr": 6e-06, "step": 4, "tokens_trained": 0.000382024 }, { "epoch": 0.001702127659574468, "grad_norm": 1719.271484375, "loss": 219.3822, "lr": 1e-05, "step": 6, "tokens_trained": 0.00057072 }, { "epoch": 0.0022695035460992908, "grad_norm": 1444.94970703125, "loss": 133.8172, "lr": 1.4e-05, "step": 8, "tokens_trained": 0.000761336 }, { "epoch": 0.0028368794326241137, "grad_norm": 238.9689178466797, "loss": 90.8177, "lr": 1.8e-05, "step": 10, "tokens_trained": 0.000953248 }, { "epoch": 0.003404255319148936, "grad_norm": 158.53497314453125, "loss": 84.6922, "lr": 2.2e-05, "step": 12, "tokens_trained": 0.00114424 }, { "epoch": 0.003971631205673759, "grad_norm": 146.10595703125, "loss": 76.7055, "lr": 2.6e-05, "step": 14, "tokens_trained": 0.001334104 }, { "epoch": 0.0045390070921985815, "grad_norm": 140.69964599609375, "loss": 67.9952, "lr": 3e-05, "step": 16, "tokens_trained": 0.00152392 }, { "epoch": 0.005106382978723404, "grad_norm": 108.80303192138672, "loss": 57.8088, "lr": 3.4000000000000007e-05, "step": 18, "tokens_trained": 0.001713872 }, { "epoch": 0.005673758865248227, "grad_norm": 106.82334899902344, "loss": 48.6585, "lr": 3.8e-05, "step": 20, "tokens_trained": 0.001903976 }, { "epoch": 0.00624113475177305, "grad_norm": 93.58769989013672, "loss": 41.7984, "lr": 4.2000000000000004e-05, "step": 22, "tokens_trained": 0.002094288 }, { "epoch": 0.006808510638297872, "grad_norm": 87.5854721069336, "loss": 37.6201, "lr": 4.6e-05, "step": 24, "tokens_trained": 0.002282496 }, { "epoch": 0.007375886524822695, "grad_norm": 84.12794494628906, "loss": 35.0091, "lr": 5e-05, "step": 26, "tokens_trained": 0.00247068 }, { "epoch": 0.007943262411347518, "grad_norm": 79.77535247802734, "loss": 33.2253, "lr": 5.4e-05, "step": 28, "tokens_trained": 0.002662888 }, { "epoch": 0.00851063829787234, "grad_norm": 66.42157745361328, "loss": 32.0682, "lr": 5.800000000000001e-05, "step": 30, "tokens_trained": 0.002851968 }, { "epoch": 0.009078014184397163, "grad_norm": 87.52485656738281, "loss": 30.893, "lr": 6.2e-05, "step": 32, "tokens_trained": 0.003041384 }, { "epoch": 0.009645390070921986, "grad_norm": 58.33614730834961, "loss": 30.0513, "lr": 6.6e-05, "step": 34, "tokens_trained": 0.003232872 }, { "epoch": 0.010212765957446808, "grad_norm": 54.629329681396484, "loss": 29.0115, "lr": 7.000000000000001e-05, "step": 36, "tokens_trained": 0.003423824 }, { "epoch": 0.01078014184397163, "grad_norm": 52.79097366333008, "loss": 28.2084, "lr": 7.4e-05, "step": 38, "tokens_trained": 0.003613232 }, { "epoch": 0.011347517730496455, "grad_norm": 54.481224060058594, "loss": 27.4345, "lr": 7.8e-05, "step": 40, "tokens_trained": 0.003800952 }, { "epoch": 0.011914893617021277, "grad_norm": 58.7069091796875, "loss": 26.5936, "lr": 8.2e-05, "step": 42, "tokens_trained": 0.003991512 }, { "epoch": 0.0124822695035461, "grad_norm": 49.30760955810547, "loss": 26.0608, "lr": 8.599999999999999e-05, "step": 44, "tokens_trained": 0.004180648 }, { "epoch": 0.013049645390070922, "grad_norm": 61.902587890625, "loss": 25.5363, "lr": 8.999999999999999e-05, "step": 46, "tokens_trained": 0.00437148 }, { "epoch": 0.013617021276595745, "grad_norm": 46.76111602783203, "loss": 24.9599, "lr": 9.400000000000001e-05, "step": 48, "tokens_trained": 0.004559344 }, { "epoch": 0.014184397163120567, "grad_norm": 57.06416702270508, "loss": 24.4087, "lr": 9.800000000000001e-05, "step": 50, "tokens_trained": 0.004749256 }, { "epoch": 0.01475177304964539, "grad_norm": 44.798736572265625, "loss": 24.1444, "lr": 0.000102, "step": 52, "tokens_trained": 0.004940192 }, { "epoch": 0.015319148936170212, "grad_norm": 40.29296875, "loss": 23.6011, "lr": 0.000106, "step": 54, "tokens_trained": 0.005130304 }, { "epoch": 0.015886524822695036, "grad_norm": 38.75099563598633, "loss": 23.1781, "lr": 0.00011, "step": 56, "tokens_trained": 0.005322864 }, { "epoch": 0.016453900709219857, "grad_norm": 37.470706939697266, "loss": 22.9136, "lr": 0.000114, "step": 58, "tokens_trained": 0.00551392 }, { "epoch": 0.01702127659574468, "grad_norm": 35.1894645690918, "loss": 22.6336, "lr": 0.000118, "step": 60, "tokens_trained": 0.005703096 }, { "epoch": 0.017588652482269502, "grad_norm": 35.136573791503906, "loss": 22.2998, "lr": 0.000122, "step": 62, "tokens_trained": 0.005892448 }, { "epoch": 0.018156028368794326, "grad_norm": 38.05111312866211, "loss": 21.9401, "lr": 0.000126, "step": 64, "tokens_trained": 0.006081656 }, { "epoch": 0.01872340425531915, "grad_norm": 35.63850021362305, "loss": 21.7206, "lr": 0.00013000000000000002, "step": 66, "tokens_trained": 0.006273032 }, { "epoch": 0.01929078014184397, "grad_norm": 34.327667236328125, "loss": 21.4051, "lr": 0.000134, "step": 68, "tokens_trained": 0.00646304 }, { "epoch": 0.019858156028368795, "grad_norm": 31.457059860229492, "loss": 21.0774, "lr": 0.00013800000000000002, "step": 70, "tokens_trained": 0.006652832 }, { "epoch": 0.020425531914893616, "grad_norm": 34.91672897338867, "loss": 20.8718, "lr": 0.00014199999999999998, "step": 72, "tokens_trained": 0.006843512 }, { "epoch": 0.02099290780141844, "grad_norm": 27.959579467773438, "loss": 20.6932, "lr": 0.000146, "step": 74, "tokens_trained": 0.007033584 }, { "epoch": 0.02156028368794326, "grad_norm": 26.569866180419922, "loss": 20.4072, "lr": 0.00015, "step": 76, "tokens_trained": 0.007224032 }, { "epoch": 0.022127659574468085, "grad_norm": 28.009904861450195, "loss": 20.2229, "lr": 0.000154, "step": 78, "tokens_trained": 0.00741368 }, { "epoch": 0.02269503546099291, "grad_norm": 28.892959594726562, "loss": 20.0528, "lr": 0.000158, "step": 80, "tokens_trained": 0.00760416 }, { "epoch": 0.02326241134751773, "grad_norm": 31.58131980895996, "loss": 19.8016, "lr": 0.000162, "step": 82, "tokens_trained": 0.007793952 }, { "epoch": 0.023829787234042554, "grad_norm": 31.01254653930664, "loss": 19.634, "lr": 0.00016600000000000002, "step": 84, "tokens_trained": 0.007980792 }, { "epoch": 0.024397163120567375, "grad_norm": 28.732515335083008, "loss": 19.3777, "lr": 0.00017, "step": 86, "tokens_trained": 0.008171968 }, { "epoch": 0.0249645390070922, "grad_norm": 24.31264877319336, "loss": 19.1346, "lr": 0.000174, "step": 88, "tokens_trained": 0.008361632 }, { "epoch": 0.02553191489361702, "grad_norm": 26.557010650634766, "loss": 19.0014, "lr": 0.000178, "step": 90, "tokens_trained": 0.008552328 }, { "epoch": 0.026099290780141844, "grad_norm": 21.156103134155273, "loss": 18.7032, "lr": 0.000182, "step": 92, "tokens_trained": 0.008743136 }, { "epoch": 0.02666666666666667, "grad_norm": 25.7484188079834, "loss": 18.4836, "lr": 0.000186, "step": 94, "tokens_trained": 0.008932056 }, { "epoch": 0.02723404255319149, "grad_norm": 22.27949333190918, "loss": 18.2233, "lr": 0.00019, "step": 96, "tokens_trained": 0.009121608 }, { "epoch": 0.027801418439716313, "grad_norm": 24.9247989654541, "loss": 17.9867, "lr": 0.000194, "step": 98, "tokens_trained": 0.009311008 }, { "epoch": 0.028368794326241134, "grad_norm": 24.302066802978516, "loss": 17.8016, "lr": 0.00019800000000000002, "step": 100, "tokens_trained": 0.009501456 }, { "epoch": 0.02893617021276596, "grad_norm": 23.458459854125977, "loss": 17.6295, "lr": 0.000202, "step": 102, "tokens_trained": 0.009693952 }, { "epoch": 0.02950354609929078, "grad_norm": 24.092350006103516, "loss": 17.4593, "lr": 0.000206, "step": 104, "tokens_trained": 0.009883328 }, { "epoch": 0.030070921985815603, "grad_norm": 22.54726219177246, "loss": 17.2141, "lr": 0.00021, "step": 106, "tokens_trained": 0.01007316 }, { "epoch": 0.030638297872340424, "grad_norm": 21.334760665893555, "loss": 17.044, "lr": 0.000214, "step": 108, "tokens_trained": 0.010266504 }, { "epoch": 0.031205673758865248, "grad_norm": 20.584287643432617, "loss": 16.8919, "lr": 0.000218, "step": 110, "tokens_trained": 0.010455736 }, { "epoch": 0.03177304964539007, "grad_norm": 23.51676368713379, "loss": 16.751, "lr": 0.000222, "step": 112, "tokens_trained": 0.010645208 }, { "epoch": 0.03234042553191489, "grad_norm": 23.278276443481445, "loss": 16.5997, "lr": 0.00022600000000000002, "step": 114, "tokens_trained": 0.010838928 }, { "epoch": 0.032907801418439714, "grad_norm": 25.4830265045166, "loss": 16.3416, "lr": 0.00023, "step": 116, "tokens_trained": 0.011027792 }, { "epoch": 0.03347517730496454, "grad_norm": 29.442413330078125, "loss": 16.24, "lr": 0.00023400000000000002, "step": 118, "tokens_trained": 0.011217456 }, { "epoch": 0.03404255319148936, "grad_norm": 21.77578353881836, "loss": 16.1922, "lr": 0.00023799999999999998, "step": 120, "tokens_trained": 0.01140804 }, { "epoch": 0.03460992907801418, "grad_norm": 27.040719985961914, "loss": 15.9059, "lr": 0.000242, "step": 122, "tokens_trained": 0.011597816 }, { "epoch": 0.035177304964539004, "grad_norm": 24.74480628967285, "loss": 15.7818, "lr": 0.000246, "step": 124, "tokens_trained": 0.011785624 }, { "epoch": 0.03546099290780142, "eval_loss": 15.553059577941895, "eval_runtime": 23.5485, "step": 125, "tokens_trained": 0.011880832 } ], "logging_steps": 2, "max_steps": 7650, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 125, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }