| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.4999036237471087, |
| "eval_steps": 5187, |
| "global_step": 5187, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00481881264456438, |
| "grad_norm": 1.6099064350128174, |
| "learning_rate": 6.294155427103405e-07, |
| "loss": 0.095972900390625, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.00963762528912876, |
| "grad_norm": 1.0794726610183716, |
| "learning_rate": 1.2716763005780348e-06, |
| "loss": 0.03401387691497803, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.014456437933693137, |
| "grad_norm": 0.9826000928878784, |
| "learning_rate": 1.9139370584457295e-06, |
| "loss": 0.021355185508728027, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01927525057825752, |
| "grad_norm": 1.6832308769226074, |
| "learning_rate": 2.5561978163134233e-06, |
| "loss": 0.016741816997528077, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.024094063222821895, |
| "grad_norm": 1.745717167854309, |
| "learning_rate": 3.198458574181118e-06, |
| "loss": 0.014754180908203124, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.028912875867386275, |
| "grad_norm": 0.41400647163391113, |
| "learning_rate": 3.8407193320488126e-06, |
| "loss": 0.014074199199676514, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03373168851195066, |
| "grad_norm": 0.7941911220550537, |
| "learning_rate": 4.482980089916507e-06, |
| "loss": 0.013227691650390625, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.03855050115651504, |
| "grad_norm": 0.5189383029937744, |
| "learning_rate": 5.125240847784201e-06, |
| "loss": 0.013188705444335938, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04336931380107941, |
| "grad_norm": 1.6657729148864746, |
| "learning_rate": 5.767501605651895e-06, |
| "loss": 0.012030971050262452, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.04818812644564379, |
| "grad_norm": 0.32716143131256104, |
| "learning_rate": 6.4097623635195895e-06, |
| "loss": 0.012086995840072633, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05300693909020817, |
| "grad_norm": 0.3107227385044098, |
| "learning_rate": 7.052023121387284e-06, |
| "loss": 0.011651687622070313, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.05782575173477255, |
| "grad_norm": 0.6159315705299377, |
| "learning_rate": 7.694283879254977e-06, |
| "loss": 0.011140645742416381, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.06264456437933694, |
| "grad_norm": 2.1143224239349365, |
| "learning_rate": 8.336544637122673e-06, |
| "loss": 0.012795639038085938, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.06746337702390132, |
| "grad_norm": 2.545966863632202, |
| "learning_rate": 8.978805394990367e-06, |
| "loss": 0.013084233999252319, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0722821896684657, |
| "grad_norm": 1.8246541023254395, |
| "learning_rate": 9.621066152858061e-06, |
| "loss": 0.012490972280502319, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.07710100231303008, |
| "grad_norm": 0.959894597530365, |
| "learning_rate": 1.0263326910725756e-05, |
| "loss": 0.011189931631088256, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.08191981495759446, |
| "grad_norm": 4.178706645965576, |
| "learning_rate": 1.090558766859345e-05, |
| "loss": 0.012091522216796874, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.08673862760215882, |
| "grad_norm": 0.7208101153373718, |
| "learning_rate": 1.1547848426461144e-05, |
| "loss": 0.012419841289520263, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0915574402467232, |
| "grad_norm": 1.3128774166107178, |
| "learning_rate": 1.2190109184328838e-05, |
| "loss": 0.011686071157455444, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.09637625289128758, |
| "grad_norm": 1.2861932516098022, |
| "learning_rate": 1.2832369942196533e-05, |
| "loss": 0.011539828777313233, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.10119506553585196, |
| "grad_norm": 1.6700119972229004, |
| "learning_rate": 1.3474630700064227e-05, |
| "loss": 0.012701009511947631, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.10601387818041634, |
| "grad_norm": 1.6825134754180908, |
| "learning_rate": 1.4116891457931921e-05, |
| "loss": 0.0122796630859375, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.11083269082498072, |
| "grad_norm": 0.18270175158977509, |
| "learning_rate": 1.4759152215799615e-05, |
| "loss": 0.014598617553710938, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.1156515034695451, |
| "grad_norm": 2.148013114929199, |
| "learning_rate": 1.540141297366731e-05, |
| "loss": 0.012004268169403077, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.12047031611410948, |
| "grad_norm": 0.2672475278377533, |
| "learning_rate": 1.6043673731535007e-05, |
| "loss": 0.011746572256088257, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.12528912875867387, |
| "grad_norm": 1.4434212446212769, |
| "learning_rate": 1.66859344894027e-05, |
| "loss": 0.012035726308822632, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.13010794140323825, |
| "grad_norm": 0.4635275602340698, |
| "learning_rate": 1.7328195247270396e-05, |
| "loss": 0.012548294067382813, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.13492675404780263, |
| "grad_norm": 0.548039436340332, |
| "learning_rate": 1.7970456005138088e-05, |
| "loss": 0.012786407470703125, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.139745566692367, |
| "grad_norm": 0.7193094491958618, |
| "learning_rate": 1.8612716763005784e-05, |
| "loss": 0.011991348266601563, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.1445643793369314, |
| "grad_norm": 0.5346310138702393, |
| "learning_rate": 1.9254977520873477e-05, |
| "loss": 0.012789205312728882, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.14938319198149577, |
| "grad_norm": 0.40978488326072693, |
| "learning_rate": 1.9897238278741172e-05, |
| "loss": 0.012897975444793701, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.15420200462606015, |
| "grad_norm": 1.7101497650146484, |
| "learning_rate": 1.9999900451301277e-05, |
| "loss": 0.014085414409637452, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.15902081727062453, |
| "grad_norm": 0.2999866306781769, |
| "learning_rate": 1.9999522349843378e-05, |
| "loss": 0.01345428466796875, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.1638396299151889, |
| "grad_norm": 0.3239404857158661, |
| "learning_rate": 1.9998862094545145e-05, |
| "loss": 0.012794520854949951, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.16865844255975326, |
| "grad_norm": 0.1947634220123291, |
| "learning_rate": 1.999791970403682e-05, |
| "loss": 0.013333181142807007, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.17347725520431764, |
| "grad_norm": 0.8663123846054077, |
| "learning_rate": 1.9996695204909593e-05, |
| "loss": 0.012868322134017944, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.17829606784888202, |
| "grad_norm": 0.504265546798706, |
| "learning_rate": 1.9995188631714816e-05, |
| "loss": 0.013335164785385132, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.1831148804934464, |
| "grad_norm": 0.36884820461273193, |
| "learning_rate": 1.9993400026963072e-05, |
| "loss": 0.013257879018783569, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.18793369313801078, |
| "grad_norm": 0.6779909729957581, |
| "learning_rate": 1.9991329441122937e-05, |
| "loss": 0.012929306030273438, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.19275250578257516, |
| "grad_norm": 0.19276919960975647, |
| "learning_rate": 1.9988976932619574e-05, |
| "loss": 0.012471644878387452, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.19757131842713954, |
| "grad_norm": 2.5498015880584717, |
| "learning_rate": 1.9986342567833087e-05, |
| "loss": 0.013909963369369506, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.20239013107170392, |
| "grad_norm": 0.7871809005737305, |
| "learning_rate": 1.9983426421096636e-05, |
| "loss": 0.013362987041473389, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.2072089437162683, |
| "grad_norm": 0.5824525356292725, |
| "learning_rate": 1.9980228574694357e-05, |
| "loss": 0.012392985820770263, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.21202775636083268, |
| "grad_norm": 1.331589698791504, |
| "learning_rate": 1.9976749118859023e-05, |
| "loss": 0.012818679809570313, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.21684656900539706, |
| "grad_norm": 0.22093236446380615, |
| "learning_rate": 1.9972988151769507e-05, |
| "loss": 0.012426936626434326, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.22166538164996144, |
| "grad_norm": 1.7017446756362915, |
| "learning_rate": 1.9968945779548007e-05, |
| "loss": 0.013253505229949952, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.22648419429452582, |
| "grad_norm": 0.16596011817455292, |
| "learning_rate": 1.9964622116257056e-05, |
| "loss": 0.012612838745117188, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.2313030069390902, |
| "grad_norm": 0.2315380871295929, |
| "learning_rate": 1.99600172838963e-05, |
| "loss": 0.012747344970703125, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.23612181958365458, |
| "grad_norm": 0.3689921796321869, |
| "learning_rate": 1.9955131412399064e-05, |
| "loss": 0.012891719341278076, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.24094063222821896, |
| "grad_norm": 0.5337355732917786, |
| "learning_rate": 1.994996463962867e-05, |
| "loss": 0.012232768535614013, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.24575944487278334, |
| "grad_norm": 0.1625846028327942, |
| "learning_rate": 1.9944517111374558e-05, |
| "loss": 0.013072433471679688, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.25057825751734775, |
| "grad_norm": 0.7666917443275452, |
| "learning_rate": 1.9938788981348175e-05, |
| "loss": 0.011926066875457764, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.2553970701619121, |
| "grad_norm": 0.5212653279304504, |
| "learning_rate": 1.9932780411178628e-05, |
| "loss": 0.012572301626205444, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.2602158828064765, |
| "grad_norm": 1.3501203060150146, |
| "learning_rate": 1.9926491570408126e-05, |
| "loss": 0.012271144390106202, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.26503469545104086, |
| "grad_norm": 0.1507686972618103, |
| "learning_rate": 1.991992263648721e-05, |
| "loss": 0.013356069326400757, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.26985350809560527, |
| "grad_norm": 1.450133204460144, |
| "learning_rate": 1.9913073794769727e-05, |
| "loss": 0.012582473754882813, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.2746723207401696, |
| "grad_norm": 0.20175831019878387, |
| "learning_rate": 1.9905945238507597e-05, |
| "loss": 0.012466964721679687, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.279491133384734, |
| "grad_norm": 0.9705828428268433, |
| "learning_rate": 1.989853716884539e-05, |
| "loss": 0.012320556640625, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.2843099460292984, |
| "grad_norm": 0.29664674401283264, |
| "learning_rate": 1.9890849794814616e-05, |
| "loss": 0.012661590576171874, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.2891287586738628, |
| "grad_norm": 0.7184270620346069, |
| "learning_rate": 1.9882883333327844e-05, |
| "loss": 0.012468541860580445, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.29394757131842714, |
| "grad_norm": 1.2583141326904297, |
| "learning_rate": 1.987463800917259e-05, |
| "loss": 0.013047130107879638, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.29876638396299154, |
| "grad_norm": 0.3179360628128052, |
| "learning_rate": 1.986611405500495e-05, |
| "loss": 0.012662353515625, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.3035851966075559, |
| "grad_norm": 0.4840896427631378, |
| "learning_rate": 1.9857311711343047e-05, |
| "loss": 0.012455928325653075, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.3084040092521203, |
| "grad_norm": 0.8195740580558777, |
| "learning_rate": 1.984823122656026e-05, |
| "loss": 0.012638804912567138, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.31322282189668466, |
| "grad_norm": 0.1526927649974823, |
| "learning_rate": 1.9838872856878185e-05, |
| "loss": 0.01197858214378357, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.31804163454124906, |
| "grad_norm": 0.27761492133140564, |
| "learning_rate": 1.982923686635944e-05, |
| "loss": 0.012119649648666382, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.3228604471858134, |
| "grad_norm": 1.5864442586898804, |
| "learning_rate": 1.981932352690017e-05, |
| "loss": 0.012395553588867188, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.3276792598303778, |
| "grad_norm": 1.2578856945037842, |
| "learning_rate": 1.980913311822243e-05, |
| "loss": 0.01248263120651245, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.3324980724749422, |
| "grad_norm": 0.30967798829078674, |
| "learning_rate": 1.979866592786624e-05, |
| "loss": 0.012674357891082764, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.3373168851195065, |
| "grad_norm": 1.2549265623092651, |
| "learning_rate": 1.9787922251181513e-05, |
| "loss": 0.012856496572494507, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.34213569776407093, |
| "grad_norm": 0.8616346120834351, |
| "learning_rate": 1.977690239131968e-05, |
| "loss": 0.012417705059051513, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.3469545104086353, |
| "grad_norm": 0.27554193139076233, |
| "learning_rate": 1.976560665922518e-05, |
| "loss": 0.012434184551239014, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.3517733230531997, |
| "grad_norm": 0.6482635140419006, |
| "learning_rate": 1.9754035373626646e-05, |
| "loss": 0.012548320293426514, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.35659213569776405, |
| "grad_norm": 0.8016761541366577, |
| "learning_rate": 1.9742188861027957e-05, |
| "loss": 0.012607015371322632, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.36141094834232845, |
| "grad_norm": 0.3415991961956024, |
| "learning_rate": 1.9730067455698964e-05, |
| "loss": 0.012693126201629639, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.3662297609868928, |
| "grad_norm": 1.8076531887054443, |
| "learning_rate": 1.9717671499666125e-05, |
| "loss": 0.01259676694869995, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.3710485736314572, |
| "grad_norm": 0.1274661123752594, |
| "learning_rate": 1.97050013427028e-05, |
| "loss": 0.012547200918197632, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.37586738627602156, |
| "grad_norm": 0.20902210474014282, |
| "learning_rate": 1.9692057342319407e-05, |
| "loss": 0.0122760009765625, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.38068619892058597, |
| "grad_norm": 0.5315442085266113, |
| "learning_rate": 1.9678839863753336e-05, |
| "loss": 0.012463277578353882, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.3855050115651503, |
| "grad_norm": 1.2004033327102661, |
| "learning_rate": 1.966534927995864e-05, |
| "loss": 0.012470932006835937, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.39032382420971473, |
| "grad_norm": 0.1404499113559723, |
| "learning_rate": 1.9651585971595494e-05, |
| "loss": 0.012283198833465576, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.3951426368542791, |
| "grad_norm": 0.4698476791381836, |
| "learning_rate": 1.9637550327019488e-05, |
| "loss": 0.012424596548080445, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.3999614494988435, |
| "grad_norm": 0.11542811989784241, |
| "learning_rate": 1.9623242742270635e-05, |
| "loss": 0.012442626953125, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.40478026214340784, |
| "grad_norm": 0.309451699256897, |
| "learning_rate": 1.9608663621062222e-05, |
| "loss": 0.011946996450424194, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.40959907478797225, |
| "grad_norm": 0.17752103507518768, |
| "learning_rate": 1.9593813374769396e-05, |
| "loss": 0.012281291484832764, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.4144178874325366, |
| "grad_norm": 0.9642850160598755, |
| "learning_rate": 1.9578692422417578e-05, |
| "loss": 0.011960487365722656, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.419236700077101, |
| "grad_norm": 0.38358408212661743, |
| "learning_rate": 1.9563301190670625e-05, |
| "loss": 0.012084554433822631, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.42405551272166536, |
| "grad_norm": 0.3695308566093445, |
| "learning_rate": 1.954764011381879e-05, |
| "loss": 0.011912307739257812, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.42887432536622977, |
| "grad_norm": 0.1504460573196411, |
| "learning_rate": 1.9531709633766486e-05, |
| "loss": 0.0125970721244812, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.4336931380107941, |
| "grad_norm": 0.7031795382499695, |
| "learning_rate": 1.951551020001979e-05, |
| "loss": 0.011779887676239014, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.43851195065535853, |
| "grad_norm": 0.9349226355552673, |
| "learning_rate": 1.9499042269673785e-05, |
| "loss": 0.012475408315658569, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.4433307632999229, |
| "grad_norm": 1.299560308456421, |
| "learning_rate": 1.9482306307399642e-05, |
| "loss": 0.012063064575195313, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.4481495759444873, |
| "grad_norm": 0.3574764132499695, |
| "learning_rate": 1.9465302785431518e-05, |
| "loss": 0.012038066387176513, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.45296838858905164, |
| "grad_norm": 0.39971208572387695, |
| "learning_rate": 1.9448032183553237e-05, |
| "loss": 0.012331008911132812, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.45778720123361605, |
| "grad_norm": 0.7214897274971008, |
| "learning_rate": 1.9430494989084733e-05, |
| "loss": 0.012144622802734375, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.4626060138781804, |
| "grad_norm": 0.5722167491912842, |
| "learning_rate": 1.9412691696868327e-05, |
| "loss": 0.012291641235351562, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.4674248265227448, |
| "grad_norm": 0.193996861577034, |
| "learning_rate": 1.9394622809254735e-05, |
| "loss": 0.012149810791015625, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.47224363916730916, |
| "grad_norm": 0.7098087072372437, |
| "learning_rate": 1.9376288836088916e-05, |
| "loss": 0.012204360961914063, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.47706245181187357, |
| "grad_norm": 0.5333502292633057, |
| "learning_rate": 1.9357690294695673e-05, |
| "loss": 0.012274196147918701, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.4818812644564379, |
| "grad_norm": 0.13141588866710663, |
| "learning_rate": 1.9338827709865064e-05, |
| "loss": 0.012214864492416383, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.4867000771010023, |
| "grad_norm": 0.2113286256790161, |
| "learning_rate": 1.9319701613837577e-05, |
| "loss": 0.012197240591049194, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.4915188897455667, |
| "grad_norm": 0.7589179873466492, |
| "learning_rate": 1.9300312546289144e-05, |
| "loss": 0.012238616943359376, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.4963377023901311, |
| "grad_norm": 0.47019609808921814, |
| "learning_rate": 1.928066105431588e-05, |
| "loss": 0.012136790752410889, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.4999036237471087, |
| "eval_loss": 0.007605554535984993, |
| "eval_mae": 0.06400828063488007, |
| "eval_mse": 0.007605642545968294, |
| "eval_runtime": 4371.2139, |
| "eval_samples_per_second": 105.223, |
| "eval_spearman": 0.9365585006265831, |
| "eval_steps_per_second": 0.548, |
| "step": 5187 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 31128, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 5187, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.704276921907937e+19, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|