| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9974025974025973, |
| "eval_steps": 500, |
| "global_step": 576, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05194805194805195, |
| "grad_norm": 6.558798236067014, |
| "learning_rate": 7.758620689655173e-07, |
| "loss": 1.1038, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1038961038961039, |
| "grad_norm": 2.674816595181998, |
| "learning_rate": 1.6379310344827587e-06, |
| "loss": 0.992, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.15584415584415584, |
| "grad_norm": 1.8226313744055744, |
| "learning_rate": 2.5e-06, |
| "loss": 0.84, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2077922077922078, |
| "grad_norm": 1.5584327305344476, |
| "learning_rate": 3.362068965517242e-06, |
| "loss": 0.7842, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2597402597402597, |
| "grad_norm": 1.5690300333908958, |
| "learning_rate": 4.224137931034483e-06, |
| "loss": 0.7462, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3116883116883117, |
| "grad_norm": 1.3587459505022679, |
| "learning_rate": 4.999954022123679e-06, |
| "loss": 0.7281, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 1.5619125769090973, |
| "learning_rate": 4.994438722989841e-06, |
| "loss": 0.7182, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4155844155844156, |
| "grad_norm": 1.3120284796636292, |
| "learning_rate": 4.979751088147192e-06, |
| "loss": 0.71, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4675324675324675, |
| "grad_norm": 1.4094824038705405, |
| "learning_rate": 4.955945125704375e-06, |
| "loss": 0.7064, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5194805194805194, |
| "grad_norm": 1.274095106150924, |
| "learning_rate": 4.923108372900683e-06, |
| "loss": 0.677, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 1.4208094097198067, |
| "learning_rate": 4.881361574221648e-06, |
| "loss": 0.6915, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6233766233766234, |
| "grad_norm": 1.374753133285299, |
| "learning_rate": 4.830858237407799e-06, |
| "loss": 0.6861, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6753246753246753, |
| "grad_norm": 1.293455293244359, |
| "learning_rate": 4.771784068989186e-06, |
| "loss": 0.6975, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 1.3020469143910023, |
| "learning_rate": 4.7043562914212915e-06, |
| "loss": 0.6799, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7792207792207793, |
| "grad_norm": 1.3651429159451645, |
| "learning_rate": 4.6288228443332786e-06, |
| "loss": 0.6913, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8311688311688312, |
| "grad_norm": 1.5410204906658607, |
| "learning_rate": 4.5454614728256995e-06, |
| "loss": 0.6805, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8831168831168831, |
| "grad_norm": 1.3733140100266965, |
| "learning_rate": 4.454578706170075e-06, |
| "loss": 0.6643, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.935064935064935, |
| "grad_norm": 1.2861021481912802, |
| "learning_rate": 4.356508730665804e-06, |
| "loss": 0.658, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.987012987012987, |
| "grad_norm": 1.288753522105602, |
| "learning_rate": 4.251612160799017e-06, |
| "loss": 0.6886, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0415584415584416, |
| "grad_norm": 1.2471057575436326, |
| "learning_rate": 4.140274713221985e-06, |
| "loss": 0.6175, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0935064935064935, |
| "grad_norm": 1.187620072173425, |
| "learning_rate": 4.022905788428984e-06, |
| "loss": 0.5921, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1454545454545455, |
| "grad_norm": 1.206352433048565, |
| "learning_rate": 3.899936965343989e-06, |
| "loss": 0.6058, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1974025974025975, |
| "grad_norm": 1.410833521608183, |
| "learning_rate": 3.7718204143557337e-06, |
| "loss": 0.6233, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2493506493506494, |
| "grad_norm": 1.3100928026008483, |
| "learning_rate": 3.6390272346356225e-06, |
| "loss": 0.6183, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.3012987012987014, |
| "grad_norm": 1.2235762612191803, |
| "learning_rate": 3.5020457218523407e-06, |
| "loss": 0.5841, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3532467532467534, |
| "grad_norm": 1.3015881612011941, |
| "learning_rate": 3.3613795726529795e-06, |
| "loss": 0.6015, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.4051948051948053, |
| "grad_norm": 1.2270855673243282, |
| "learning_rate": 3.2175460325130176e-06, |
| "loss": 0.5974, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.457142857142857, |
| "grad_norm": 1.299855240564656, |
| "learning_rate": 3.0710739937657035e-06, |
| "loss": 0.586, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.509090909090909, |
| "grad_norm": 1.2487072210315888, |
| "learning_rate": 2.9225020508046233e-06, |
| "loss": 0.6151, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.561038961038961, |
| "grad_norm": 1.2285734897854397, |
| "learning_rate": 2.7723765196106773e-06, |
| "loss": 0.5882, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.612987012987013, |
| "grad_norm": 1.3395108447307076, |
| "learning_rate": 2.621249428885908e-06, |
| "loss": 0.6164, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.664935064935065, |
| "grad_norm": 1.1702020402228575, |
| "learning_rate": 2.4696764901809926e-06, |
| "loss": 0.5999, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.716883116883117, |
| "grad_norm": 1.217176969741526, |
| "learning_rate": 2.3182150544804878e-06, |
| "loss": 0.588, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7688311688311689, |
| "grad_norm": 1.2406628193012295, |
| "learning_rate": 2.1674220627596814e-06, |
| "loss": 0.5786, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8207792207792208, |
| "grad_norm": 1.1903958246809216, |
| "learning_rate": 2.017851998049107e-06, |
| "loss": 0.5822, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.8727272727272726, |
| "grad_norm": 1.2667654877861587, |
| "learning_rate": 1.8700548465371877e-06, |
| "loss": 0.5837, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.9246753246753245, |
| "grad_norm": 1.2920373716993399, |
| "learning_rate": 1.7245740752082901e-06, |
| "loss": 0.5871, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.9766233766233765, |
| "grad_norm": 1.275603929027079, |
| "learning_rate": 1.5819446334526363e-06, |
| "loss": 0.5838, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.031168831168831, |
| "grad_norm": 1.133575867354361, |
| "learning_rate": 1.4426909859963716e-06, |
| "loss": 0.5697, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.083116883116883, |
| "grad_norm": 1.2321064809517404, |
| "learning_rate": 1.3073251843849503e-06, |
| "loss": 0.5431, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.135064935064935, |
| "grad_norm": 1.2519940232258298, |
| "learning_rate": 1.1763449841111906e-06, |
| "loss": 0.5016, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.187012987012987, |
| "grad_norm": 1.2387163111275854, |
| "learning_rate": 1.05023201431156e-06, |
| "loss": 0.5098, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.238961038961039, |
| "grad_norm": 1.19694862951574, |
| "learning_rate": 9.294500067608941e-07, |
| "loss": 0.5287, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.290909090909091, |
| "grad_norm": 1.2777524766342223, |
| "learning_rate": 8.144430906777756e-07, |
| "loss": 0.516, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.342857142857143, |
| "grad_norm": 1.1637303035803468, |
| "learning_rate": 7.056341596107299e-07, |
| "loss": 0.5143, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.394805194805195, |
| "grad_norm": 1.1933528178678503, |
| "learning_rate": 6.034233164104184e-07, |
| "loss": 0.5239, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.446753246753247, |
| "grad_norm": 1.2098669627294527, |
| "learning_rate": 5.081864020058125e-07, |
| "loss": 0.5196, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.498701298701299, |
| "grad_norm": 1.168439112620554, |
| "learning_rate": 4.20273613394232e-07, |
| "loss": 0.5272, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.5506493506493504, |
| "grad_norm": 1.1297680535945829, |
| "learning_rate": 3.400082159270418e-07, |
| "loss": 0.5169, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.602597402597403, |
| "grad_norm": 1.0857638524201654, |
| "learning_rate": 2.676853546260791e-07, |
| "loss": 0.5253, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.6545454545454543, |
| "grad_norm": 1.1477687512423902, |
| "learning_rate": 2.0357096890174482e-07, |
| "loss": 0.5277, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.7064935064935067, |
| "grad_norm": 1.1757736817804982, |
| "learning_rate": 1.4790081466345863e-07, |
| "loss": 0.5381, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.7584415584415583, |
| "grad_norm": 1.1528599201248226, |
| "learning_rate": 1.0087959741828607e-07, |
| "loss": 0.5028, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.8103896103896107, |
| "grad_norm": 1.241278074991346, |
| "learning_rate": 6.268021954544095e-08, |
| "loss": 0.525, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.862337662337662, |
| "grad_norm": 1.178930136102725, |
| "learning_rate": 3.3443144514516965e-08, |
| "loss": 0.5231, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.914285714285714, |
| "grad_norm": 1.1659672855787477, |
| "learning_rate": 1.3275880385284767e-08, |
| "loss": 0.5317, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.966233766233766, |
| "grad_norm": 1.2174438194181065, |
| "learning_rate": 2.252584488296461e-09, |
| "loss": 0.5093, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.9974025974025973, |
| "step": 576, |
| "total_flos": 465586638684160.0, |
| "train_loss": 0.6205948731965489, |
| "train_runtime": 15697.9454, |
| "train_samples_per_second": 1.177, |
| "train_steps_per_second": 0.037 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 576, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 465586638684160.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|