Text Classification
Transformers
Safetensors
sentence-transformers
English
Chinese
qwen3
text-generation
reranker
memory
agent
cross-encoder
text-embeddings-inference
Instructions to use IAAR-Shanghai/MemReranker-4B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use IAAR-Shanghai/MemReranker-4B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="IAAR-Shanghai/MemReranker-4B")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("IAAR-Shanghai/MemReranker-4B") model = AutoModelForCausalLM.from_pretrained("IAAR-Shanghai/MemReranker-4B") - sentence-transformers
How to use IAAR-Shanghai/MemReranker-4B with sentence-transformers:
from sentence_transformers import CrossEncoder model = CrossEncoder("IAAR-Shanghai/MemReranker-4B") query = "Which planet is known as the Red Planet?" passages = [ "Venus is often called Earth's twin because of its similar size and proximity.", "Mars, known for its reddish appearance, is often referred to as the Red Planet.", "Jupiter, the largest planet in our solar system, has a prominent red spot.", "Saturn, famous for its rings, is sometimes mistaken for the Red Planet." ] scores = model.predict([(query, passage) for passage in passages]) print(scores) - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4999036237471087, | |
| "eval_steps": 5187, | |
| "global_step": 5187, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00481881264456438, | |
| "grad_norm": 1.6099064350128174, | |
| "learning_rate": 6.294155427103405e-07, | |
| "loss": 0.095972900390625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.00963762528912876, | |
| "grad_norm": 1.0794726610183716, | |
| "learning_rate": 1.2716763005780348e-06, | |
| "loss": 0.03401387691497803, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.014456437933693137, | |
| "grad_norm": 0.9826000928878784, | |
| "learning_rate": 1.9139370584457295e-06, | |
| "loss": 0.021355185508728027, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01927525057825752, | |
| "grad_norm": 1.6832308769226074, | |
| "learning_rate": 2.5561978163134233e-06, | |
| "loss": 0.016741816997528077, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.024094063222821895, | |
| "grad_norm": 1.745717167854309, | |
| "learning_rate": 3.198458574181118e-06, | |
| "loss": 0.014754180908203124, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.028912875867386275, | |
| "grad_norm": 0.41400647163391113, | |
| "learning_rate": 3.8407193320488126e-06, | |
| "loss": 0.014074199199676514, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03373168851195066, | |
| "grad_norm": 0.7941911220550537, | |
| "learning_rate": 4.482980089916507e-06, | |
| "loss": 0.013227691650390625, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.03855050115651504, | |
| "grad_norm": 0.5189383029937744, | |
| "learning_rate": 5.125240847784201e-06, | |
| "loss": 0.013188705444335938, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.04336931380107941, | |
| "grad_norm": 1.6657729148864746, | |
| "learning_rate": 5.767501605651895e-06, | |
| "loss": 0.012030971050262452, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.04818812644564379, | |
| "grad_norm": 0.32716143131256104, | |
| "learning_rate": 6.4097623635195895e-06, | |
| "loss": 0.012086995840072633, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05300693909020817, | |
| "grad_norm": 0.3107227385044098, | |
| "learning_rate": 7.052023121387284e-06, | |
| "loss": 0.011651687622070313, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.05782575173477255, | |
| "grad_norm": 0.6159315705299377, | |
| "learning_rate": 7.694283879254977e-06, | |
| "loss": 0.011140645742416381, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.06264456437933694, | |
| "grad_norm": 2.1143224239349365, | |
| "learning_rate": 8.336544637122673e-06, | |
| "loss": 0.012795639038085938, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.06746337702390132, | |
| "grad_norm": 2.545966863632202, | |
| "learning_rate": 8.978805394990367e-06, | |
| "loss": 0.013084233999252319, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.0722821896684657, | |
| "grad_norm": 1.8246541023254395, | |
| "learning_rate": 9.621066152858061e-06, | |
| "loss": 0.012490972280502319, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.07710100231303008, | |
| "grad_norm": 0.959894597530365, | |
| "learning_rate": 1.0263326910725756e-05, | |
| "loss": 0.011189931631088256, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.08191981495759446, | |
| "grad_norm": 4.178706645965576, | |
| "learning_rate": 1.090558766859345e-05, | |
| "loss": 0.012091522216796874, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.08673862760215882, | |
| "grad_norm": 0.7208101153373718, | |
| "learning_rate": 1.1547848426461144e-05, | |
| "loss": 0.012419841289520263, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0915574402467232, | |
| "grad_norm": 1.3128774166107178, | |
| "learning_rate": 1.2190109184328838e-05, | |
| "loss": 0.011686071157455444, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.09637625289128758, | |
| "grad_norm": 1.2861932516098022, | |
| "learning_rate": 1.2832369942196533e-05, | |
| "loss": 0.011539828777313233, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.10119506553585196, | |
| "grad_norm": 1.6700119972229004, | |
| "learning_rate": 1.3474630700064227e-05, | |
| "loss": 0.012701009511947631, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.10601387818041634, | |
| "grad_norm": 1.6825134754180908, | |
| "learning_rate": 1.4116891457931921e-05, | |
| "loss": 0.0122796630859375, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.11083269082498072, | |
| "grad_norm": 0.18270175158977509, | |
| "learning_rate": 1.4759152215799615e-05, | |
| "loss": 0.014598617553710938, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.1156515034695451, | |
| "grad_norm": 2.148013114929199, | |
| "learning_rate": 1.540141297366731e-05, | |
| "loss": 0.012004268169403077, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.12047031611410948, | |
| "grad_norm": 0.2672475278377533, | |
| "learning_rate": 1.6043673731535007e-05, | |
| "loss": 0.011746572256088257, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.12528912875867387, | |
| "grad_norm": 1.4434212446212769, | |
| "learning_rate": 1.66859344894027e-05, | |
| "loss": 0.012035726308822632, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.13010794140323825, | |
| "grad_norm": 0.4635275602340698, | |
| "learning_rate": 1.7328195247270396e-05, | |
| "loss": 0.012548294067382813, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.13492675404780263, | |
| "grad_norm": 0.548039436340332, | |
| "learning_rate": 1.7970456005138088e-05, | |
| "loss": 0.012786407470703125, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.139745566692367, | |
| "grad_norm": 0.7193094491958618, | |
| "learning_rate": 1.8612716763005784e-05, | |
| "loss": 0.011991348266601563, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.1445643793369314, | |
| "grad_norm": 0.5346310138702393, | |
| "learning_rate": 1.9254977520873477e-05, | |
| "loss": 0.012789205312728882, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.14938319198149577, | |
| "grad_norm": 0.40978488326072693, | |
| "learning_rate": 1.9897238278741172e-05, | |
| "loss": 0.012897975444793701, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.15420200462606015, | |
| "grad_norm": 1.7101497650146484, | |
| "learning_rate": 1.9999900451301277e-05, | |
| "loss": 0.014085414409637452, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.15902081727062453, | |
| "grad_norm": 0.2999866306781769, | |
| "learning_rate": 1.9999522349843378e-05, | |
| "loss": 0.01345428466796875, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.1638396299151889, | |
| "grad_norm": 0.3239404857158661, | |
| "learning_rate": 1.9998862094545145e-05, | |
| "loss": 0.012794520854949951, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.16865844255975326, | |
| "grad_norm": 0.1947634220123291, | |
| "learning_rate": 1.999791970403682e-05, | |
| "loss": 0.013333181142807007, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.17347725520431764, | |
| "grad_norm": 0.8663123846054077, | |
| "learning_rate": 1.9996695204909593e-05, | |
| "loss": 0.012868322134017944, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.17829606784888202, | |
| "grad_norm": 0.504265546798706, | |
| "learning_rate": 1.9995188631714816e-05, | |
| "loss": 0.013335164785385132, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.1831148804934464, | |
| "grad_norm": 0.36884820461273193, | |
| "learning_rate": 1.9993400026963072e-05, | |
| "loss": 0.013257879018783569, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.18793369313801078, | |
| "grad_norm": 0.6779909729957581, | |
| "learning_rate": 1.9991329441122937e-05, | |
| "loss": 0.012929306030273438, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.19275250578257516, | |
| "grad_norm": 0.19276919960975647, | |
| "learning_rate": 1.9988976932619574e-05, | |
| "loss": 0.012471644878387452, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.19757131842713954, | |
| "grad_norm": 2.5498015880584717, | |
| "learning_rate": 1.9986342567833087e-05, | |
| "loss": 0.013909963369369506, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.20239013107170392, | |
| "grad_norm": 0.7871809005737305, | |
| "learning_rate": 1.9983426421096636e-05, | |
| "loss": 0.013362987041473389, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.2072089437162683, | |
| "grad_norm": 0.5824525356292725, | |
| "learning_rate": 1.9980228574694357e-05, | |
| "loss": 0.012392985820770263, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.21202775636083268, | |
| "grad_norm": 1.331589698791504, | |
| "learning_rate": 1.9976749118859023e-05, | |
| "loss": 0.012818679809570313, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.21684656900539706, | |
| "grad_norm": 0.22093236446380615, | |
| "learning_rate": 1.9972988151769507e-05, | |
| "loss": 0.012426936626434326, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.22166538164996144, | |
| "grad_norm": 1.7017446756362915, | |
| "learning_rate": 1.9968945779548007e-05, | |
| "loss": 0.013253505229949952, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.22648419429452582, | |
| "grad_norm": 0.16596011817455292, | |
| "learning_rate": 1.9964622116257056e-05, | |
| "loss": 0.012612838745117188, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.2313030069390902, | |
| "grad_norm": 0.2315380871295929, | |
| "learning_rate": 1.99600172838963e-05, | |
| "loss": 0.012747344970703125, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.23612181958365458, | |
| "grad_norm": 0.3689921796321869, | |
| "learning_rate": 1.9955131412399064e-05, | |
| "loss": 0.012891719341278076, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.24094063222821896, | |
| "grad_norm": 0.5337355732917786, | |
| "learning_rate": 1.994996463962867e-05, | |
| "loss": 0.012232768535614013, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.24575944487278334, | |
| "grad_norm": 0.1625846028327942, | |
| "learning_rate": 1.9944517111374558e-05, | |
| "loss": 0.013072433471679688, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.25057825751734775, | |
| "grad_norm": 0.7666917443275452, | |
| "learning_rate": 1.9938788981348175e-05, | |
| "loss": 0.011926066875457764, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.2553970701619121, | |
| "grad_norm": 0.5212653279304504, | |
| "learning_rate": 1.9932780411178628e-05, | |
| "loss": 0.012572301626205444, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.2602158828064765, | |
| "grad_norm": 1.3501203060150146, | |
| "learning_rate": 1.9926491570408126e-05, | |
| "loss": 0.012271144390106202, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.26503469545104086, | |
| "grad_norm": 0.1507686972618103, | |
| "learning_rate": 1.991992263648721e-05, | |
| "loss": 0.013356069326400757, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.26985350809560527, | |
| "grad_norm": 1.450133204460144, | |
| "learning_rate": 1.9913073794769727e-05, | |
| "loss": 0.012582473754882813, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.2746723207401696, | |
| "grad_norm": 0.20175831019878387, | |
| "learning_rate": 1.9905945238507597e-05, | |
| "loss": 0.012466964721679687, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.279491133384734, | |
| "grad_norm": 0.9705828428268433, | |
| "learning_rate": 1.989853716884539e-05, | |
| "loss": 0.012320556640625, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.2843099460292984, | |
| "grad_norm": 0.29664674401283264, | |
| "learning_rate": 1.9890849794814616e-05, | |
| "loss": 0.012661590576171874, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.2891287586738628, | |
| "grad_norm": 0.7184270620346069, | |
| "learning_rate": 1.9882883333327844e-05, | |
| "loss": 0.012468541860580445, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.29394757131842714, | |
| "grad_norm": 1.2583141326904297, | |
| "learning_rate": 1.987463800917259e-05, | |
| "loss": 0.013047130107879638, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.29876638396299154, | |
| "grad_norm": 0.3179360628128052, | |
| "learning_rate": 1.986611405500495e-05, | |
| "loss": 0.012662353515625, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.3035851966075559, | |
| "grad_norm": 0.4840896427631378, | |
| "learning_rate": 1.9857311711343047e-05, | |
| "loss": 0.012455928325653075, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.3084040092521203, | |
| "grad_norm": 0.8195740580558777, | |
| "learning_rate": 1.984823122656026e-05, | |
| "loss": 0.012638804912567138, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.31322282189668466, | |
| "grad_norm": 0.1526927649974823, | |
| "learning_rate": 1.9838872856878185e-05, | |
| "loss": 0.01197858214378357, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.31804163454124906, | |
| "grad_norm": 0.27761492133140564, | |
| "learning_rate": 1.982923686635944e-05, | |
| "loss": 0.012119649648666382, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.3228604471858134, | |
| "grad_norm": 1.5864442586898804, | |
| "learning_rate": 1.981932352690017e-05, | |
| "loss": 0.012395553588867188, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.3276792598303778, | |
| "grad_norm": 1.2578856945037842, | |
| "learning_rate": 1.980913311822243e-05, | |
| "loss": 0.01248263120651245, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.3324980724749422, | |
| "grad_norm": 0.30967798829078674, | |
| "learning_rate": 1.979866592786624e-05, | |
| "loss": 0.012674357891082764, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.3373168851195065, | |
| "grad_norm": 1.2549265623092651, | |
| "learning_rate": 1.9787922251181513e-05, | |
| "loss": 0.012856496572494507, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.34213569776407093, | |
| "grad_norm": 0.8616346120834351, | |
| "learning_rate": 1.977690239131968e-05, | |
| "loss": 0.012417705059051513, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.3469545104086353, | |
| "grad_norm": 0.27554193139076233, | |
| "learning_rate": 1.976560665922518e-05, | |
| "loss": 0.012434184551239014, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.3517733230531997, | |
| "grad_norm": 0.6482635140419006, | |
| "learning_rate": 1.9754035373626646e-05, | |
| "loss": 0.012548320293426514, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.35659213569776405, | |
| "grad_norm": 0.8016761541366577, | |
| "learning_rate": 1.9742188861027957e-05, | |
| "loss": 0.012607015371322632, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.36141094834232845, | |
| "grad_norm": 0.3415991961956024, | |
| "learning_rate": 1.9730067455698964e-05, | |
| "loss": 0.012693126201629639, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.3662297609868928, | |
| "grad_norm": 1.8076531887054443, | |
| "learning_rate": 1.9717671499666125e-05, | |
| "loss": 0.01259676694869995, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.3710485736314572, | |
| "grad_norm": 0.1274661123752594, | |
| "learning_rate": 1.97050013427028e-05, | |
| "loss": 0.012547200918197632, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.37586738627602156, | |
| "grad_norm": 0.20902210474014282, | |
| "learning_rate": 1.9692057342319407e-05, | |
| "loss": 0.0122760009765625, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.38068619892058597, | |
| "grad_norm": 0.5315442085266113, | |
| "learning_rate": 1.9678839863753336e-05, | |
| "loss": 0.012463277578353882, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.3855050115651503, | |
| "grad_norm": 1.2004033327102661, | |
| "learning_rate": 1.966534927995864e-05, | |
| "loss": 0.012470932006835937, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.39032382420971473, | |
| "grad_norm": 0.1404499113559723, | |
| "learning_rate": 1.9651585971595494e-05, | |
| "loss": 0.012283198833465576, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.3951426368542791, | |
| "grad_norm": 0.4698476791381836, | |
| "learning_rate": 1.9637550327019488e-05, | |
| "loss": 0.012424596548080445, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.3999614494988435, | |
| "grad_norm": 0.11542811989784241, | |
| "learning_rate": 1.9623242742270635e-05, | |
| "loss": 0.012442626953125, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.40478026214340784, | |
| "grad_norm": 0.309451699256897, | |
| "learning_rate": 1.9608663621062222e-05, | |
| "loss": 0.011946996450424194, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.40959907478797225, | |
| "grad_norm": 0.17752103507518768, | |
| "learning_rate": 1.9593813374769396e-05, | |
| "loss": 0.012281291484832764, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.4144178874325366, | |
| "grad_norm": 0.9642850160598755, | |
| "learning_rate": 1.9578692422417578e-05, | |
| "loss": 0.011960487365722656, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.419236700077101, | |
| "grad_norm": 0.38358408212661743, | |
| "learning_rate": 1.9563301190670625e-05, | |
| "loss": 0.012084554433822631, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.42405551272166536, | |
| "grad_norm": 0.3695308566093445, | |
| "learning_rate": 1.954764011381879e-05, | |
| "loss": 0.011912307739257812, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.42887432536622977, | |
| "grad_norm": 0.1504460573196411, | |
| "learning_rate": 1.9531709633766486e-05, | |
| "loss": 0.0125970721244812, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.4336931380107941, | |
| "grad_norm": 0.7031795382499695, | |
| "learning_rate": 1.951551020001979e-05, | |
| "loss": 0.011779887676239014, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.43851195065535853, | |
| "grad_norm": 0.9349226355552673, | |
| "learning_rate": 1.9499042269673785e-05, | |
| "loss": 0.012475408315658569, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.4433307632999229, | |
| "grad_norm": 1.299560308456421, | |
| "learning_rate": 1.9482306307399642e-05, | |
| "loss": 0.012063064575195313, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.4481495759444873, | |
| "grad_norm": 0.3574764132499695, | |
| "learning_rate": 1.9465302785431518e-05, | |
| "loss": 0.012038066387176513, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.45296838858905164, | |
| "grad_norm": 0.39971208572387695, | |
| "learning_rate": 1.9448032183553237e-05, | |
| "loss": 0.012331008911132812, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.45778720123361605, | |
| "grad_norm": 0.7214897274971008, | |
| "learning_rate": 1.9430494989084733e-05, | |
| "loss": 0.012144622802734375, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.4626060138781804, | |
| "grad_norm": 0.5722167491912842, | |
| "learning_rate": 1.9412691696868327e-05, | |
| "loss": 0.012291641235351562, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.4674248265227448, | |
| "grad_norm": 0.193996861577034, | |
| "learning_rate": 1.9394622809254735e-05, | |
| "loss": 0.012149810791015625, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.47224363916730916, | |
| "grad_norm": 0.7098087072372437, | |
| "learning_rate": 1.9376288836088916e-05, | |
| "loss": 0.012204360961914063, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.47706245181187357, | |
| "grad_norm": 0.5333502292633057, | |
| "learning_rate": 1.9357690294695673e-05, | |
| "loss": 0.012274196147918701, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.4818812644564379, | |
| "grad_norm": 0.13141588866710663, | |
| "learning_rate": 1.9338827709865064e-05, | |
| "loss": 0.012214864492416383, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4867000771010023, | |
| "grad_norm": 0.2113286256790161, | |
| "learning_rate": 1.9319701613837577e-05, | |
| "loss": 0.012197240591049194, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.4915188897455667, | |
| "grad_norm": 0.7589179873466492, | |
| "learning_rate": 1.9300312546289144e-05, | |
| "loss": 0.012238616943359376, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.4963377023901311, | |
| "grad_norm": 0.47019609808921814, | |
| "learning_rate": 1.928066105431588e-05, | |
| "loss": 0.012136790752410889, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.4999036237471087, | |
| "eval_loss": 0.007605554535984993, | |
| "eval_mae": 0.06400828063488007, | |
| "eval_mse": 0.007605642545968294, | |
| "eval_runtime": 4371.2139, | |
| "eval_samples_per_second": 105.223, | |
| "eval_spearman": 0.9365585006265831, | |
| "eval_steps_per_second": 0.548, | |
| "step": 5187 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 31128, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 5187, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.704276921907937e+19, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |