MemReranker-4B / trainer_state.json
kk04jy's picture
Upload 8 files
b2af1c8 verified
raw
history blame
20.6 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.4999036237471087,
"eval_steps": 5187,
"global_step": 5187,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00481881264456438,
"grad_norm": 1.6099064350128174,
"learning_rate": 6.294155427103405e-07,
"loss": 0.095972900390625,
"step": 50
},
{
"epoch": 0.00963762528912876,
"grad_norm": 1.0794726610183716,
"learning_rate": 1.2716763005780348e-06,
"loss": 0.03401387691497803,
"step": 100
},
{
"epoch": 0.014456437933693137,
"grad_norm": 0.9826000928878784,
"learning_rate": 1.9139370584457295e-06,
"loss": 0.021355185508728027,
"step": 150
},
{
"epoch": 0.01927525057825752,
"grad_norm": 1.6832308769226074,
"learning_rate": 2.5561978163134233e-06,
"loss": 0.016741816997528077,
"step": 200
},
{
"epoch": 0.024094063222821895,
"grad_norm": 1.745717167854309,
"learning_rate": 3.198458574181118e-06,
"loss": 0.014754180908203124,
"step": 250
},
{
"epoch": 0.028912875867386275,
"grad_norm": 0.41400647163391113,
"learning_rate": 3.8407193320488126e-06,
"loss": 0.014074199199676514,
"step": 300
},
{
"epoch": 0.03373168851195066,
"grad_norm": 0.7941911220550537,
"learning_rate": 4.482980089916507e-06,
"loss": 0.013227691650390625,
"step": 350
},
{
"epoch": 0.03855050115651504,
"grad_norm": 0.5189383029937744,
"learning_rate": 5.125240847784201e-06,
"loss": 0.013188705444335938,
"step": 400
},
{
"epoch": 0.04336931380107941,
"grad_norm": 1.6657729148864746,
"learning_rate": 5.767501605651895e-06,
"loss": 0.012030971050262452,
"step": 450
},
{
"epoch": 0.04818812644564379,
"grad_norm": 0.32716143131256104,
"learning_rate": 6.4097623635195895e-06,
"loss": 0.012086995840072633,
"step": 500
},
{
"epoch": 0.05300693909020817,
"grad_norm": 0.3107227385044098,
"learning_rate": 7.052023121387284e-06,
"loss": 0.011651687622070313,
"step": 550
},
{
"epoch": 0.05782575173477255,
"grad_norm": 0.6159315705299377,
"learning_rate": 7.694283879254977e-06,
"loss": 0.011140645742416381,
"step": 600
},
{
"epoch": 0.06264456437933694,
"grad_norm": 2.1143224239349365,
"learning_rate": 8.336544637122673e-06,
"loss": 0.012795639038085938,
"step": 650
},
{
"epoch": 0.06746337702390132,
"grad_norm": 2.545966863632202,
"learning_rate": 8.978805394990367e-06,
"loss": 0.013084233999252319,
"step": 700
},
{
"epoch": 0.0722821896684657,
"grad_norm": 1.8246541023254395,
"learning_rate": 9.621066152858061e-06,
"loss": 0.012490972280502319,
"step": 750
},
{
"epoch": 0.07710100231303008,
"grad_norm": 0.959894597530365,
"learning_rate": 1.0263326910725756e-05,
"loss": 0.011189931631088256,
"step": 800
},
{
"epoch": 0.08191981495759446,
"grad_norm": 4.178706645965576,
"learning_rate": 1.090558766859345e-05,
"loss": 0.012091522216796874,
"step": 850
},
{
"epoch": 0.08673862760215882,
"grad_norm": 0.7208101153373718,
"learning_rate": 1.1547848426461144e-05,
"loss": 0.012419841289520263,
"step": 900
},
{
"epoch": 0.0915574402467232,
"grad_norm": 1.3128774166107178,
"learning_rate": 1.2190109184328838e-05,
"loss": 0.011686071157455444,
"step": 950
},
{
"epoch": 0.09637625289128758,
"grad_norm": 1.2861932516098022,
"learning_rate": 1.2832369942196533e-05,
"loss": 0.011539828777313233,
"step": 1000
},
{
"epoch": 0.10119506553585196,
"grad_norm": 1.6700119972229004,
"learning_rate": 1.3474630700064227e-05,
"loss": 0.012701009511947631,
"step": 1050
},
{
"epoch": 0.10601387818041634,
"grad_norm": 1.6825134754180908,
"learning_rate": 1.4116891457931921e-05,
"loss": 0.0122796630859375,
"step": 1100
},
{
"epoch": 0.11083269082498072,
"grad_norm": 0.18270175158977509,
"learning_rate": 1.4759152215799615e-05,
"loss": 0.014598617553710938,
"step": 1150
},
{
"epoch": 0.1156515034695451,
"grad_norm": 2.148013114929199,
"learning_rate": 1.540141297366731e-05,
"loss": 0.012004268169403077,
"step": 1200
},
{
"epoch": 0.12047031611410948,
"grad_norm": 0.2672475278377533,
"learning_rate": 1.6043673731535007e-05,
"loss": 0.011746572256088257,
"step": 1250
},
{
"epoch": 0.12528912875867387,
"grad_norm": 1.4434212446212769,
"learning_rate": 1.66859344894027e-05,
"loss": 0.012035726308822632,
"step": 1300
},
{
"epoch": 0.13010794140323825,
"grad_norm": 0.4635275602340698,
"learning_rate": 1.7328195247270396e-05,
"loss": 0.012548294067382813,
"step": 1350
},
{
"epoch": 0.13492675404780263,
"grad_norm": 0.548039436340332,
"learning_rate": 1.7970456005138088e-05,
"loss": 0.012786407470703125,
"step": 1400
},
{
"epoch": 0.139745566692367,
"grad_norm": 0.7193094491958618,
"learning_rate": 1.8612716763005784e-05,
"loss": 0.011991348266601563,
"step": 1450
},
{
"epoch": 0.1445643793369314,
"grad_norm": 0.5346310138702393,
"learning_rate": 1.9254977520873477e-05,
"loss": 0.012789205312728882,
"step": 1500
},
{
"epoch": 0.14938319198149577,
"grad_norm": 0.40978488326072693,
"learning_rate": 1.9897238278741172e-05,
"loss": 0.012897975444793701,
"step": 1550
},
{
"epoch": 0.15420200462606015,
"grad_norm": 1.7101497650146484,
"learning_rate": 1.9999900451301277e-05,
"loss": 0.014085414409637452,
"step": 1600
},
{
"epoch": 0.15902081727062453,
"grad_norm": 0.2999866306781769,
"learning_rate": 1.9999522349843378e-05,
"loss": 0.01345428466796875,
"step": 1650
},
{
"epoch": 0.1638396299151889,
"grad_norm": 0.3239404857158661,
"learning_rate": 1.9998862094545145e-05,
"loss": 0.012794520854949951,
"step": 1700
},
{
"epoch": 0.16865844255975326,
"grad_norm": 0.1947634220123291,
"learning_rate": 1.999791970403682e-05,
"loss": 0.013333181142807007,
"step": 1750
},
{
"epoch": 0.17347725520431764,
"grad_norm": 0.8663123846054077,
"learning_rate": 1.9996695204909593e-05,
"loss": 0.012868322134017944,
"step": 1800
},
{
"epoch": 0.17829606784888202,
"grad_norm": 0.504265546798706,
"learning_rate": 1.9995188631714816e-05,
"loss": 0.013335164785385132,
"step": 1850
},
{
"epoch": 0.1831148804934464,
"grad_norm": 0.36884820461273193,
"learning_rate": 1.9993400026963072e-05,
"loss": 0.013257879018783569,
"step": 1900
},
{
"epoch": 0.18793369313801078,
"grad_norm": 0.6779909729957581,
"learning_rate": 1.9991329441122937e-05,
"loss": 0.012929306030273438,
"step": 1950
},
{
"epoch": 0.19275250578257516,
"grad_norm": 0.19276919960975647,
"learning_rate": 1.9988976932619574e-05,
"loss": 0.012471644878387452,
"step": 2000
},
{
"epoch": 0.19757131842713954,
"grad_norm": 2.5498015880584717,
"learning_rate": 1.9986342567833087e-05,
"loss": 0.013909963369369506,
"step": 2050
},
{
"epoch": 0.20239013107170392,
"grad_norm": 0.7871809005737305,
"learning_rate": 1.9983426421096636e-05,
"loss": 0.013362987041473389,
"step": 2100
},
{
"epoch": 0.2072089437162683,
"grad_norm": 0.5824525356292725,
"learning_rate": 1.9980228574694357e-05,
"loss": 0.012392985820770263,
"step": 2150
},
{
"epoch": 0.21202775636083268,
"grad_norm": 1.331589698791504,
"learning_rate": 1.9976749118859023e-05,
"loss": 0.012818679809570313,
"step": 2200
},
{
"epoch": 0.21684656900539706,
"grad_norm": 0.22093236446380615,
"learning_rate": 1.9972988151769507e-05,
"loss": 0.012426936626434326,
"step": 2250
},
{
"epoch": 0.22166538164996144,
"grad_norm": 1.7017446756362915,
"learning_rate": 1.9968945779548007e-05,
"loss": 0.013253505229949952,
"step": 2300
},
{
"epoch": 0.22648419429452582,
"grad_norm": 0.16596011817455292,
"learning_rate": 1.9964622116257056e-05,
"loss": 0.012612838745117188,
"step": 2350
},
{
"epoch": 0.2313030069390902,
"grad_norm": 0.2315380871295929,
"learning_rate": 1.99600172838963e-05,
"loss": 0.012747344970703125,
"step": 2400
},
{
"epoch": 0.23612181958365458,
"grad_norm": 0.3689921796321869,
"learning_rate": 1.9955131412399064e-05,
"loss": 0.012891719341278076,
"step": 2450
},
{
"epoch": 0.24094063222821896,
"grad_norm": 0.5337355732917786,
"learning_rate": 1.994996463962867e-05,
"loss": 0.012232768535614013,
"step": 2500
},
{
"epoch": 0.24575944487278334,
"grad_norm": 0.1625846028327942,
"learning_rate": 1.9944517111374558e-05,
"loss": 0.013072433471679688,
"step": 2550
},
{
"epoch": 0.25057825751734775,
"grad_norm": 0.7666917443275452,
"learning_rate": 1.9938788981348175e-05,
"loss": 0.011926066875457764,
"step": 2600
},
{
"epoch": 0.2553970701619121,
"grad_norm": 0.5212653279304504,
"learning_rate": 1.9932780411178628e-05,
"loss": 0.012572301626205444,
"step": 2650
},
{
"epoch": 0.2602158828064765,
"grad_norm": 1.3501203060150146,
"learning_rate": 1.9926491570408126e-05,
"loss": 0.012271144390106202,
"step": 2700
},
{
"epoch": 0.26503469545104086,
"grad_norm": 0.1507686972618103,
"learning_rate": 1.991992263648721e-05,
"loss": 0.013356069326400757,
"step": 2750
},
{
"epoch": 0.26985350809560527,
"grad_norm": 1.450133204460144,
"learning_rate": 1.9913073794769727e-05,
"loss": 0.012582473754882813,
"step": 2800
},
{
"epoch": 0.2746723207401696,
"grad_norm": 0.20175831019878387,
"learning_rate": 1.9905945238507597e-05,
"loss": 0.012466964721679687,
"step": 2850
},
{
"epoch": 0.279491133384734,
"grad_norm": 0.9705828428268433,
"learning_rate": 1.989853716884539e-05,
"loss": 0.012320556640625,
"step": 2900
},
{
"epoch": 0.2843099460292984,
"grad_norm": 0.29664674401283264,
"learning_rate": 1.9890849794814616e-05,
"loss": 0.012661590576171874,
"step": 2950
},
{
"epoch": 0.2891287586738628,
"grad_norm": 0.7184270620346069,
"learning_rate": 1.9882883333327844e-05,
"loss": 0.012468541860580445,
"step": 3000
},
{
"epoch": 0.29394757131842714,
"grad_norm": 1.2583141326904297,
"learning_rate": 1.987463800917259e-05,
"loss": 0.013047130107879638,
"step": 3050
},
{
"epoch": 0.29876638396299154,
"grad_norm": 0.3179360628128052,
"learning_rate": 1.986611405500495e-05,
"loss": 0.012662353515625,
"step": 3100
},
{
"epoch": 0.3035851966075559,
"grad_norm": 0.4840896427631378,
"learning_rate": 1.9857311711343047e-05,
"loss": 0.012455928325653075,
"step": 3150
},
{
"epoch": 0.3084040092521203,
"grad_norm": 0.8195740580558777,
"learning_rate": 1.984823122656026e-05,
"loss": 0.012638804912567138,
"step": 3200
},
{
"epoch": 0.31322282189668466,
"grad_norm": 0.1526927649974823,
"learning_rate": 1.9838872856878185e-05,
"loss": 0.01197858214378357,
"step": 3250
},
{
"epoch": 0.31804163454124906,
"grad_norm": 0.27761492133140564,
"learning_rate": 1.982923686635944e-05,
"loss": 0.012119649648666382,
"step": 3300
},
{
"epoch": 0.3228604471858134,
"grad_norm": 1.5864442586898804,
"learning_rate": 1.981932352690017e-05,
"loss": 0.012395553588867188,
"step": 3350
},
{
"epoch": 0.3276792598303778,
"grad_norm": 1.2578856945037842,
"learning_rate": 1.980913311822243e-05,
"loss": 0.01248263120651245,
"step": 3400
},
{
"epoch": 0.3324980724749422,
"grad_norm": 0.30967798829078674,
"learning_rate": 1.979866592786624e-05,
"loss": 0.012674357891082764,
"step": 3450
},
{
"epoch": 0.3373168851195065,
"grad_norm": 1.2549265623092651,
"learning_rate": 1.9787922251181513e-05,
"loss": 0.012856496572494507,
"step": 3500
},
{
"epoch": 0.34213569776407093,
"grad_norm": 0.8616346120834351,
"learning_rate": 1.977690239131968e-05,
"loss": 0.012417705059051513,
"step": 3550
},
{
"epoch": 0.3469545104086353,
"grad_norm": 0.27554193139076233,
"learning_rate": 1.976560665922518e-05,
"loss": 0.012434184551239014,
"step": 3600
},
{
"epoch": 0.3517733230531997,
"grad_norm": 0.6482635140419006,
"learning_rate": 1.9754035373626646e-05,
"loss": 0.012548320293426514,
"step": 3650
},
{
"epoch": 0.35659213569776405,
"grad_norm": 0.8016761541366577,
"learning_rate": 1.9742188861027957e-05,
"loss": 0.012607015371322632,
"step": 3700
},
{
"epoch": 0.36141094834232845,
"grad_norm": 0.3415991961956024,
"learning_rate": 1.9730067455698964e-05,
"loss": 0.012693126201629639,
"step": 3750
},
{
"epoch": 0.3662297609868928,
"grad_norm": 1.8076531887054443,
"learning_rate": 1.9717671499666125e-05,
"loss": 0.01259676694869995,
"step": 3800
},
{
"epoch": 0.3710485736314572,
"grad_norm": 0.1274661123752594,
"learning_rate": 1.97050013427028e-05,
"loss": 0.012547200918197632,
"step": 3850
},
{
"epoch": 0.37586738627602156,
"grad_norm": 0.20902210474014282,
"learning_rate": 1.9692057342319407e-05,
"loss": 0.0122760009765625,
"step": 3900
},
{
"epoch": 0.38068619892058597,
"grad_norm": 0.5315442085266113,
"learning_rate": 1.9678839863753336e-05,
"loss": 0.012463277578353882,
"step": 3950
},
{
"epoch": 0.3855050115651503,
"grad_norm": 1.2004033327102661,
"learning_rate": 1.966534927995864e-05,
"loss": 0.012470932006835937,
"step": 4000
},
{
"epoch": 0.39032382420971473,
"grad_norm": 0.1404499113559723,
"learning_rate": 1.9651585971595494e-05,
"loss": 0.012283198833465576,
"step": 4050
},
{
"epoch": 0.3951426368542791,
"grad_norm": 0.4698476791381836,
"learning_rate": 1.9637550327019488e-05,
"loss": 0.012424596548080445,
"step": 4100
},
{
"epoch": 0.3999614494988435,
"grad_norm": 0.11542811989784241,
"learning_rate": 1.9623242742270635e-05,
"loss": 0.012442626953125,
"step": 4150
},
{
"epoch": 0.40478026214340784,
"grad_norm": 0.309451699256897,
"learning_rate": 1.9608663621062222e-05,
"loss": 0.011946996450424194,
"step": 4200
},
{
"epoch": 0.40959907478797225,
"grad_norm": 0.17752103507518768,
"learning_rate": 1.9593813374769396e-05,
"loss": 0.012281291484832764,
"step": 4250
},
{
"epoch": 0.4144178874325366,
"grad_norm": 0.9642850160598755,
"learning_rate": 1.9578692422417578e-05,
"loss": 0.011960487365722656,
"step": 4300
},
{
"epoch": 0.419236700077101,
"grad_norm": 0.38358408212661743,
"learning_rate": 1.9563301190670625e-05,
"loss": 0.012084554433822631,
"step": 4350
},
{
"epoch": 0.42405551272166536,
"grad_norm": 0.3695308566093445,
"learning_rate": 1.954764011381879e-05,
"loss": 0.011912307739257812,
"step": 4400
},
{
"epoch": 0.42887432536622977,
"grad_norm": 0.1504460573196411,
"learning_rate": 1.9531709633766486e-05,
"loss": 0.0125970721244812,
"step": 4450
},
{
"epoch": 0.4336931380107941,
"grad_norm": 0.7031795382499695,
"learning_rate": 1.951551020001979e-05,
"loss": 0.011779887676239014,
"step": 4500
},
{
"epoch": 0.43851195065535853,
"grad_norm": 0.9349226355552673,
"learning_rate": 1.9499042269673785e-05,
"loss": 0.012475408315658569,
"step": 4550
},
{
"epoch": 0.4433307632999229,
"grad_norm": 1.299560308456421,
"learning_rate": 1.9482306307399642e-05,
"loss": 0.012063064575195313,
"step": 4600
},
{
"epoch": 0.4481495759444873,
"grad_norm": 0.3574764132499695,
"learning_rate": 1.9465302785431518e-05,
"loss": 0.012038066387176513,
"step": 4650
},
{
"epoch": 0.45296838858905164,
"grad_norm": 0.39971208572387695,
"learning_rate": 1.9448032183553237e-05,
"loss": 0.012331008911132812,
"step": 4700
},
{
"epoch": 0.45778720123361605,
"grad_norm": 0.7214897274971008,
"learning_rate": 1.9430494989084733e-05,
"loss": 0.012144622802734375,
"step": 4750
},
{
"epoch": 0.4626060138781804,
"grad_norm": 0.5722167491912842,
"learning_rate": 1.9412691696868327e-05,
"loss": 0.012291641235351562,
"step": 4800
},
{
"epoch": 0.4674248265227448,
"grad_norm": 0.193996861577034,
"learning_rate": 1.9394622809254735e-05,
"loss": 0.012149810791015625,
"step": 4850
},
{
"epoch": 0.47224363916730916,
"grad_norm": 0.7098087072372437,
"learning_rate": 1.9376288836088916e-05,
"loss": 0.012204360961914063,
"step": 4900
},
{
"epoch": 0.47706245181187357,
"grad_norm": 0.5333502292633057,
"learning_rate": 1.9357690294695673e-05,
"loss": 0.012274196147918701,
"step": 4950
},
{
"epoch": 0.4818812644564379,
"grad_norm": 0.13141588866710663,
"learning_rate": 1.9338827709865064e-05,
"loss": 0.012214864492416383,
"step": 5000
},
{
"epoch": 0.4867000771010023,
"grad_norm": 0.2113286256790161,
"learning_rate": 1.9319701613837577e-05,
"loss": 0.012197240591049194,
"step": 5050
},
{
"epoch": 0.4915188897455667,
"grad_norm": 0.7589179873466492,
"learning_rate": 1.9300312546289144e-05,
"loss": 0.012238616943359376,
"step": 5100
},
{
"epoch": 0.4963377023901311,
"grad_norm": 0.47019609808921814,
"learning_rate": 1.928066105431588e-05,
"loss": 0.012136790752410889,
"step": 5150
},
{
"epoch": 0.4999036237471087,
"eval_loss": 0.007605554535984993,
"eval_mae": 0.06400828063488007,
"eval_mse": 0.007605642545968294,
"eval_runtime": 4371.2139,
"eval_samples_per_second": 105.223,
"eval_spearman": 0.9365585006265831,
"eval_steps_per_second": 0.548,
"step": 5187
}
],
"logging_steps": 50,
"max_steps": 31128,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5187,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.704276921907937e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}