{ "best_global_step": 900, "best_metric": 0.595556914806366, "best_model_checkpoint": "/content/drive/MyDrive/Colab Notebooks/AITF_baseqwen3_8B_V1.2/checkpoint-900", "epoch": 1.0, "eval_steps": 100, "global_step": 929, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021531422419593594, "grad_norm": 0.38749924302101135, "learning_rate": 7.6e-05, "loss": 1.7456, "step": 20 }, { "epoch": 0.04306284483918719, "grad_norm": 0.23645007610321045, "learning_rate": 0.00015600000000000002, "loss": 1.2508, "step": 40 }, { "epoch": 0.06459426725878079, "grad_norm": 0.16630907356739044, "learning_rate": 0.0001999482703462211, "loss": 1.0123, "step": 60 }, { "epoch": 0.08612568967837438, "grad_norm": 0.1874742954969406, "learning_rate": 0.00019946334007549978, "loss": 0.9242, "step": 80 }, { "epoch": 0.10765711209796797, "grad_norm": 0.1982610672712326, "learning_rate": 0.0001984704140331751, "loss": 0.8479, "step": 100 }, { "epoch": 0.10765711209796797, "eval_loss": 0.7989935874938965, "eval_runtime": 368.247, "eval_samples_per_second": 10.105, "eval_steps_per_second": 2.528, "step": 100 }, { "epoch": 0.12918853451756157, "grad_norm": 0.2351531982421875, "learning_rate": 0.0001969745634568572, "loss": 0.8075, "step": 120 }, { "epoch": 0.15071995693715515, "grad_norm": 0.2675715982913971, "learning_rate": 0.00019498342820427794, "loss": 0.8, "step": 140 }, { "epoch": 0.17225137935674875, "grad_norm": 0.2564864754676819, "learning_rate": 0.00019250717773373462, "loss": 0.7552, "step": 160 }, { "epoch": 0.19378280177634236, "grad_norm": 0.28067538142204285, "learning_rate": 0.0001895584591649349, "loss": 0.7529, "step": 180 }, { "epoch": 0.21531422419593593, "grad_norm": 0.25266703963279724, "learning_rate": 0.00018615233268551643, "loss": 0.727, "step": 200 }, { "epoch": 0.21531422419593593, "eval_loss": 0.7019057869911194, "eval_runtime": 367.354, "eval_samples_per_second": 10.129, "eval_steps_per_second": 2.534, "step": 200 }, { "epoch": 0.23684564661552954, "grad_norm": 0.2786823809146881, "learning_rate": 0.00018230619463314266, "loss": 0.7363, "step": 220 }, { "epoch": 0.25837706903512314, "grad_norm": 0.2877826690673828, "learning_rate": 0.0001780396886460237, "loss": 0.7094, "step": 240 }, { "epoch": 0.27990849145471675, "grad_norm": 0.3062780499458313, "learning_rate": 0.00017337460533564845, "loss": 0.7013, "step": 260 }, { "epoch": 0.3014399138743103, "grad_norm": 0.29140764474868774, "learning_rate": 0.0001683347709941367, "loss": 0.6772, "step": 280 }, { "epoch": 0.3229713362939039, "grad_norm": 0.2650732398033142, "learning_rate": 0.00016294592590462316, "loss": 0.6896, "step": 300 }, { "epoch": 0.3229713362939039, "eval_loss": 0.6610061526298523, "eval_runtime": 367.3279, "eval_samples_per_second": 10.13, "eval_steps_per_second": 2.535, "step": 300 }, { "epoch": 0.3445027587134975, "grad_norm": 0.2866383194923401, "learning_rate": 0.00015723559287618728, "loss": 0.6685, "step": 320 }, { "epoch": 0.3660341811330911, "grad_norm": 0.2941460609436035, "learning_rate": 0.00015123293667476887, "loss": 0.6743, "step": 340 }, { "epoch": 0.3875656035526847, "grad_norm": 0.2664410471916199, "learning_rate": 0.00014496861506800758, "loss": 0.6755, "step": 360 }, { "epoch": 0.4090970259722783, "grad_norm": 0.2797100245952606, "learning_rate": 0.00013847462224477538, "loss": 0.6691, "step": 380 }, { "epoch": 0.43062844839187187, "grad_norm": 0.30962368845939636, "learning_rate": 0.00013178412540911457, "loss": 0.6658, "step": 400 }, { "epoch": 0.43062844839187187, "eval_loss": 0.6373162865638733, "eval_runtime": 367.4733, "eval_samples_per_second": 10.126, "eval_steps_per_second": 2.534, "step": 400 }, { "epoch": 0.45215987081146547, "grad_norm": 0.29752910137176514, "learning_rate": 0.00012493129538315788, "loss": 0.6587, "step": 420 }, { "epoch": 0.4736912932310591, "grad_norm": 0.304659903049469, "learning_rate": 0.00011795113208420208, "loss": 0.6466, "step": 440 }, { "epoch": 0.4952227156506527, "grad_norm": 0.3261611759662628, "learning_rate": 0.00011087928576728865, "loss": 0.6216, "step": 460 }, { "epoch": 0.5167541380702463, "grad_norm": 0.3342382311820984, "learning_rate": 0.00010375187494627098, "loss": 0.6435, "step": 480 }, { "epoch": 0.5382855604898399, "grad_norm": 0.2871527075767517, "learning_rate": 9.660530192331191e-05, "loss": 0.643, "step": 500 }, { "epoch": 0.5382855604898399, "eval_loss": 0.6207689642906189, "eval_runtime": 367.6602, "eval_samples_per_second": 10.121, "eval_steps_per_second": 2.532, "step": 500 }, { "epoch": 0.5598169829094335, "grad_norm": 0.3142107129096985, "learning_rate": 8.947606686897045e-05, "loss": 0.6355, "step": 520 }, { "epoch": 0.5813484053290271, "grad_norm": 0.3034207820892334, "learning_rate": 8.240058140243834e-05, "loss": 0.639, "step": 540 }, { "epoch": 0.6028798277486206, "grad_norm": 0.2917335033416748, "learning_rate": 7.541498262404125e-05, "loss": 0.6345, "step": 560 }, { "epoch": 0.6244112501682142, "grad_norm": 0.301921010017395, "learning_rate": 6.855494854980857e-05, "loss": 0.6361, "step": 580 }, { "epoch": 0.6459426725878078, "grad_norm": 0.2999766170978546, "learning_rate": 6.185551589075482e-05, "loss": 0.6266, "step": 600 }, { "epoch": 0.6459426725878078, "eval_loss": 0.6095116138458252, "eval_runtime": 369.6908, "eval_samples_per_second": 10.065, "eval_steps_per_second": 2.518, "step": 600 }, { "epoch": 0.6674740950074014, "grad_norm": 0.2857365310192108, "learning_rate": 5.535090110754131e-05, "loss": 0.6379, "step": 620 }, { "epoch": 0.689005517426995, "grad_norm": 0.2973370850086212, "learning_rate": 4.9074325654457446e-05, "loss": 0.6126, "step": 640 }, { "epoch": 0.7105369398465886, "grad_norm": 0.3231595754623413, "learning_rate": 4.305784630526416e-05, "loss": 0.6105, "step": 660 }, { "epoch": 0.7320683622661822, "grad_norm": 0.2923620939254761, "learning_rate": 3.7332191427488784e-05, "loss": 0.6266, "step": 680 }, { "epoch": 0.7535997846857758, "grad_norm": 0.32130059599876404, "learning_rate": 3.192660404137729e-05, "loss": 0.6088, "step": 700 }, { "epoch": 0.7535997846857758, "eval_loss": 0.6016086935997009, "eval_runtime": 367.7262, "eval_samples_per_second": 10.119, "eval_steps_per_second": 2.532, "step": 700 }, { "epoch": 0.7751312071053694, "grad_norm": 0.2963238060474396, "learning_rate": 2.6868692465060828e-05, "loss": 0.6256, "step": 720 }, { "epoch": 0.796662629524963, "grad_norm": 0.2954428195953369, "learning_rate": 2.2184289308744844e-05, "loss": 0.6201, "step": 740 }, { "epoch": 0.8181940519445566, "grad_norm": 0.3122366964817047, "learning_rate": 1.7897319538090962e-05, "loss": 0.6164, "step": 760 }, { "epoch": 0.8397254743641501, "grad_norm": 0.3299662470817566, "learning_rate": 1.402967828063897e-05, "loss": 0.6026, "step": 780 }, { "epoch": 0.8612568967837437, "grad_norm": 0.2979361414909363, "learning_rate": 1.0601118999356907e-05, "loss": 0.6103, "step": 800 }, { "epoch": 0.8612568967837437, "eval_loss": 0.596613347530365, "eval_runtime": 367.8439, "eval_samples_per_second": 10.116, "eval_steps_per_second": 2.531, "step": 800 }, { "epoch": 0.8827883192033373, "grad_norm": 0.3615986108779907, "learning_rate": 7.629152604458156e-06, "loss": 0.6052, "step": 820 }, { "epoch": 0.9043197416229309, "grad_norm": 0.31248706579208374, "learning_rate": 5.128958018758012e-06, "loss": 0.5928, "step": 840 }, { "epoch": 0.9258511640425245, "grad_norm": 0.3367156982421875, "learning_rate": 3.1133046533455947e-06, "loss": 0.6049, "step": 860 }, { "epoch": 0.9473825864621181, "grad_norm": 0.28718671202659607, "learning_rate": 1.592487189516212e-06, "loss": 0.6053, "step": 880 }, { "epoch": 0.9689140088817118, "grad_norm": 0.32894209027290344, "learning_rate": 5.742730000568908e-07, "loss": 0.6172, "step": 900 }, { "epoch": 0.9689140088817118, "eval_loss": 0.595556914806366, "eval_runtime": 367.6823, "eval_samples_per_second": 10.12, "eval_steps_per_second": 2.532, "step": 900 }, { "epoch": 0.9904454313013054, "grad_norm": 0.34396475553512573, "learning_rate": 6.386247842353754e-08, "loss": 0.6045, "step": 920 } ], "logging_steps": 20, "max_steps": 929, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2554944340455096e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }