gemma4-e4b-kinetic3K_FT / trainer_state.json
bear7011's picture
Upload final LoRA adapter artifacts
38805cf verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1170,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.025682182985553772,
"grad_norm": 41.89281463623047,
"learning_rate": 5e-05,
"loss": 41.062384033203124,
"step": 10
},
{
"epoch": 0.051364365971107544,
"grad_norm": 19.25795555114746,
"learning_rate": 0.00010555555555555557,
"loss": 23.114556884765626,
"step": 20
},
{
"epoch": 0.07704654895666131,
"grad_norm": 15.17092514038086,
"learning_rate": 0.0001611111111111111,
"loss": 19.892947387695312,
"step": 30
},
{
"epoch": 0.10272873194221509,
"grad_norm": 12.6154146194458,
"learning_rate": 0.0001999965463076377,
"loss": 18.954251098632813,
"step": 40
},
{
"epoch": 0.12841091492776885,
"grad_norm": 13.695039749145508,
"learning_rate": 0.00019993515396856082,
"loss": 18.60211944580078,
"step": 50
},
{
"epoch": 0.15409309791332262,
"grad_norm": 13.03475570678711,
"learning_rate": 0.00019979706714271113,
"loss": 17.00911102294922,
"step": 60
},
{
"epoch": 0.1797752808988764,
"grad_norm": 12.657264709472656,
"learning_rate": 0.0001995823918037908,
"loss": 16.737196350097655,
"step": 70
},
{
"epoch": 0.20545746388443017,
"grad_norm": 9.450530052185059,
"learning_rate": 0.00019929129270278366,
"loss": 17.192404174804686,
"step": 80
},
{
"epoch": 0.23113964686998395,
"grad_norm": 11.038394927978516,
"learning_rate": 0.0001989239932415185,
"loss": 17.626002502441406,
"step": 90
},
{
"epoch": 0.2568218298555377,
"grad_norm": 10.112431526184082,
"learning_rate": 0.00019848077530122083,
"loss": 17.375982666015624,
"step": 100
},
{
"epoch": 0.2825040128410915,
"grad_norm": 10.951290130615234,
"learning_rate": 0.0001979619790261853,
"loss": 16.52630157470703,
"step": 110
},
{
"epoch": 0.30818619582664525,
"grad_norm": 13.097779273986816,
"learning_rate": 0.00019736800256273457,
"loss": 18.660000610351563,
"step": 120
},
{
"epoch": 0.33386837881219905,
"grad_norm": 10.489601135253906,
"learning_rate": 0.00019669930175366472,
"loss": 18.038082885742188,
"step": 130
},
{
"epoch": 0.3595505617977528,
"grad_norm": 12.037221908569336,
"learning_rate": 0.0001959563897884124,
"loss": 16.976956176757813,
"step": 140
},
{
"epoch": 0.3852327447833066,
"grad_norm": 13.149781227111816,
"learning_rate": 0.00019513983680921108,
"loss": 17.55983123779297,
"step": 150
},
{
"epoch": 0.41091492776886035,
"grad_norm": 12.796188354492188,
"learning_rate": 0.00019425026947353992,
"loss": 17.86356201171875,
"step": 160
},
{
"epoch": 0.43659711075441415,
"grad_norm": 14.400433540344238,
"learning_rate": 0.0001932883704732001,
"loss": 17.5066162109375,
"step": 170
},
{
"epoch": 0.4622792937399679,
"grad_norm": 16.03416633605957,
"learning_rate": 0.00019225487801038788,
"loss": 18.057034301757813,
"step": 180
},
{
"epoch": 0.48796147672552165,
"grad_norm": 11.53113842010498,
"learning_rate": 0.00019115058523116733,
"loss": 16.775308227539064,
"step": 190
},
{
"epoch": 0.5136436597110754,
"grad_norm": 12.079059600830078,
"learning_rate": 0.00018997633961677582,
"loss": 17.352998352050783,
"step": 200
},
{
"epoch": 0.5393258426966292,
"grad_norm": 11.70594310760498,
"learning_rate": 0.00018873304233323122,
"loss": 18.17870635986328,
"step": 210
},
{
"epoch": 0.565008025682183,
"grad_norm": 13.156736373901367,
"learning_rate": 0.00018742164753973855,
"loss": 16.766079711914063,
"step": 220
},
{
"epoch": 0.5906902086677368,
"grad_norm": 11.801448822021484,
"learning_rate": 0.0001860431616564278,
"loss": 17.98457489013672,
"step": 230
},
{
"epoch": 0.6163723916532905,
"grad_norm": 11.434338569641113,
"learning_rate": 0.0001845986425919841,
"loss": 17.011558532714844,
"step": 240
},
{
"epoch": 0.6420545746388443,
"grad_norm": 11.592355728149414,
"learning_rate": 0.00018308919893176396,
"loss": 18.073362731933592,
"step": 250
},
{
"epoch": 0.6677367576243981,
"grad_norm": 12.140549659729004,
"learning_rate": 0.00018151598908701947,
"loss": 17.267474365234374,
"step": 260
},
{
"epoch": 0.6934189406099518,
"grad_norm": 12.207261085510254,
"learning_rate": 0.00017988022040588416,
"loss": 17.970367431640625,
"step": 270
},
{
"epoch": 0.7191011235955056,
"grad_norm": 12.024649620056152,
"learning_rate": 0.000178183148246803,
"loss": 18.595138549804688,
"step": 280
},
{
"epoch": 0.7447833065810594,
"grad_norm": 10.953705787658691,
"learning_rate": 0.0001764260750151167,
"loss": 17.661044311523437,
"step": 290
},
{
"epoch": 0.7704654895666132,
"grad_norm": 12.161933898925781,
"learning_rate": 0.0001746103491635407,
"loss": 17.323040771484376,
"step": 300
},
{
"epoch": 0.7961476725521669,
"grad_norm": 14.1478271484375,
"learning_rate": 0.00017273736415730488,
"loss": 17.589439392089844,
"step": 310
},
{
"epoch": 0.8218298555377207,
"grad_norm": 11.344724655151367,
"learning_rate": 0.0001708085574047494,
"loss": 18.087261962890626,
"step": 320
},
{
"epoch": 0.8475120385232745,
"grad_norm": 11.49538803100586,
"learning_rate": 0.00016882540915419623,
"loss": 16.709014892578125,
"step": 330
},
{
"epoch": 0.8731942215088283,
"grad_norm": 11.975954055786133,
"learning_rate": 0.00016678944135794374,
"loss": 18.784584045410156,
"step": 340
},
{
"epoch": 0.898876404494382,
"grad_norm": 10.979130744934082,
"learning_rate": 0.00016470221650425582,
"loss": 17.035598754882812,
"step": 350
},
{
"epoch": 0.9245585874799358,
"grad_norm": 10.86307144165039,
"learning_rate": 0.00016256533641824177,
"loss": 17.912120056152343,
"step": 360
},
{
"epoch": 0.9502407704654896,
"grad_norm": 11.18655776977539,
"learning_rate": 0.00016038044103254775,
"loss": 16.4998046875,
"step": 370
},
{
"epoch": 0.9759229534510433,
"grad_norm": 11.321154594421387,
"learning_rate": 0.00015814920712880267,
"loss": 17.580471801757813,
"step": 380
},
{
"epoch": 1.0,
"grad_norm": 6.271613597869873,
"learning_rate": 0.0001558733470507847,
"loss": 15.328689575195312,
"step": 390
},
{
"epoch": 1.0256821829855538,
"grad_norm": 13.181764602661133,
"learning_rate": 0.00015355460739029586,
"loss": 15.265965270996094,
"step": 400
},
{
"epoch": 1.0513643659711076,
"grad_norm": 11.904574394226074,
"learning_rate": 0.00015119476764675305,
"loss": 14.833596801757812,
"step": 410
},
{
"epoch": 1.0770465489566614,
"grad_norm": 12.352519989013672,
"learning_rate": 0.0001487956388615247,
"loss": 13.833314514160156,
"step": 420
},
{
"epoch": 1.102728731942215,
"grad_norm": 15.478001594543457,
"learning_rate": 0.00014635906222806058,
"loss": 15.767561340332032,
"step": 430
},
{
"epoch": 1.1284109149277688,
"grad_norm": 15.335821151733398,
"learning_rate": 0.00014388690767888154,
"loss": 14.462684631347656,
"step": 440
},
{
"epoch": 1.1540930979133226,
"grad_norm": 16.043373107910156,
"learning_rate": 0.00014138107245051392,
"loss": 15.908651733398438,
"step": 450
},
{
"epoch": 1.1797752808988764,
"grad_norm": 12.852351188659668,
"learning_rate": 0.00013884347962746948,
"loss": 15.758128356933593,
"step": 460
},
{
"epoch": 1.2054574638844302,
"grad_norm": 15.320487022399902,
"learning_rate": 0.00013627607666638858,
"loss": 15.051063537597656,
"step": 470
},
{
"epoch": 1.231139646869984,
"grad_norm": 16.30264663696289,
"learning_rate": 0.00013368083390147913,
"loss": 14.599794006347656,
"step": 480
},
{
"epoch": 1.2568218298555376,
"grad_norm": 13.734286308288574,
"learning_rate": 0.00013105974303239838,
"loss": 14.052903747558593,
"step": 490
},
{
"epoch": 1.2825040128410916,
"grad_norm": 15.704442024230957,
"learning_rate": 0.0001284148155957379,
"loss": 13.530386352539063,
"step": 500
},
{
"epoch": 1.3081861958266452,
"grad_norm": 14.808961868286133,
"learning_rate": 0.00012574808142128477,
"loss": 14.240873718261719,
"step": 510
},
{
"epoch": 1.333868378812199,
"grad_norm": 13.389018058776855,
"learning_rate": 0.00012306158707424403,
"loss": 14.467668151855468,
"step": 520
},
{
"epoch": 1.3595505617977528,
"grad_norm": 16.077634811401367,
"learning_rate": 0.00012035739428461739,
"loss": 13.67303466796875,
"step": 530
},
{
"epoch": 1.3852327447833066,
"grad_norm": 14.300647735595703,
"learning_rate": 0.00011763757836494403,
"loss": 15.072747802734375,
"step": 540
},
{
"epoch": 1.4109149277688604,
"grad_norm": 12.59499454498291,
"learning_rate": 0.00011490422661761744,
"loss": 14.030448913574219,
"step": 550
},
{
"epoch": 1.4365971107544142,
"grad_norm": 17.819976806640625,
"learning_rate": 0.00011215943673300093,
"loss": 13.556326293945313,
"step": 560
},
{
"epoch": 1.462279293739968,
"grad_norm": 15.307595252990723,
"learning_rate": 0.00010940531517957073,
"loss": 15.464706420898438,
"step": 570
},
{
"epoch": 1.4879614767255216,
"grad_norm": 14.918625831604004,
"learning_rate": 0.00010664397558732244,
"loss": 14.040945434570313,
"step": 580
},
{
"epoch": 1.5136436597110754,
"grad_norm": 13.040802955627441,
"learning_rate": 0.0001038775371256817,
"loss": 14.671842956542969,
"step": 590
},
{
"epoch": 1.5393258426966292,
"grad_norm": 14.143183708190918,
"learning_rate": 0.00010110812287716327,
"loss": 14.198591613769532,
"step": 600
},
{
"epoch": 1.565008025682183,
"grad_norm": 16.373567581176758,
"learning_rate": 9.833785820802739e-05,
"loss": 14.670704650878907,
"step": 610
},
{
"epoch": 1.5906902086677368,
"grad_norm": 17.14202117919922,
"learning_rate": 9.556886913718317e-05,
"loss": 14.713813781738281,
"step": 620
},
{
"epoch": 1.6163723916532904,
"grad_norm": 13.428468704223633,
"learning_rate": 9.280328070459135e-05,
"loss": 14.256681823730469,
"step": 630
},
{
"epoch": 1.6420545746388444,
"grad_norm": 14.759570121765137,
"learning_rate": 9.004321534041835e-05,
"loss": 14.875436401367187,
"step": 640
},
{
"epoch": 1.667736757624398,
"grad_norm": 14.605072021484375,
"learning_rate": 8.729079123619286e-05,
"loss": 14.151382446289062,
"step": 650
},
{
"epoch": 1.6934189406099518,
"grad_norm": 15.300275802612305,
"learning_rate": 8.454812071921596e-05,
"loss": 14.209205627441406,
"step": 660
},
{
"epoch": 1.7191011235955056,
"grad_norm": 15.565445899963379,
"learning_rate": 8.181730863147093e-05,
"loss": 15.246949768066406,
"step": 670
},
{
"epoch": 1.7447833065810594,
"grad_norm": 13.463878631591797,
"learning_rate": 7.910045071427829e-05,
"loss": 14.081675720214843,
"step": 680
},
{
"epoch": 1.7704654895666132,
"grad_norm": 15.799697875976562,
"learning_rate": 7.63996319999347e-05,
"loss": 14.659947204589844,
"step": 690
},
{
"epoch": 1.7961476725521668,
"grad_norm": 15.421786308288574,
"learning_rate": 7.371692521157048e-05,
"loss": 13.642781066894532,
"step": 700
},
{
"epoch": 1.8218298555377208,
"grad_norm": 16.400951385498047,
"learning_rate": 7.10543891724537e-05,
"loss": 14.9028076171875,
"step": 710
},
{
"epoch": 1.8475120385232744,
"grad_norm": 16.549213409423828,
"learning_rate": 6.841406722596191e-05,
"loss": 14.747923278808594,
"step": 720
},
{
"epoch": 1.8731942215088284,
"grad_norm": 14.868678092956543,
"learning_rate": 6.579798566743314e-05,
"loss": 14.379522705078125,
"step": 730
},
{
"epoch": 1.898876404494382,
"grad_norm": 17.223217010498047,
"learning_rate": 6.320815218910101e-05,
"loss": 14.513031005859375,
"step": 740
},
{
"epoch": 1.9245585874799358,
"grad_norm": 15.178607940673828,
"learning_rate": 6.064655433930624e-05,
"loss": 14.75238494873047,
"step": 750
},
{
"epoch": 1.9502407704654896,
"grad_norm": 14.607568740844727,
"learning_rate": 5.8115157997167536e-05,
"loss": 14.215359497070313,
"step": 760
},
{
"epoch": 1.9759229534510432,
"grad_norm": 15.849374771118164,
"learning_rate": 5.561590586388221e-05,
"loss": 13.844842529296875,
"step": 770
},
{
"epoch": 2.0,
"grad_norm": 8.945368766784668,
"learning_rate": 5.315071597181504e-05,
"loss": 13.500308227539062,
"step": 780
},
{
"epoch": 2.0256821829855536,
"grad_norm": 14.872233390808105,
"learning_rate": 5.072148021251821e-05,
"loss": 10.96926498413086,
"step": 790
},
{
"epoch": 2.0513643659711076,
"grad_norm": 16.96340560913086,
"learning_rate": 4.833006288481371e-05,
"loss": 10.601210021972657,
"step": 800
},
{
"epoch": 2.077046548956661,
"grad_norm": 17.987470626831055,
"learning_rate": 4.597829926405075e-05,
"loss": 11.348848724365235,
"step": 810
},
{
"epoch": 2.102728731942215,
"grad_norm": 18.53151512145996,
"learning_rate": 4.3667994193637796e-05,
"loss": 10.507981109619141,
"step": 820
},
{
"epoch": 2.128410914927769,
"grad_norm": 23.948802947998047,
"learning_rate": 4.140092069992867e-05,
"loss": 11.215933227539063,
"step": 830
},
{
"epoch": 2.154093097913323,
"grad_norm": 21.3605899810791,
"learning_rate": 3.91788186315269e-05,
"loss": 10.393006896972656,
"step": 840
},
{
"epoch": 2.1797752808988764,
"grad_norm": 26.548303604125977,
"learning_rate": 3.7003393324051874e-05,
"loss": 10.384098052978516,
"step": 850
},
{
"epoch": 2.20545746388443,
"grad_norm": 23.979907989501953,
"learning_rate": 3.487631429139183e-05,
"loss": 10.094139862060548,
"step": 860
},
{
"epoch": 2.231139646869984,
"grad_norm": 18.00309944152832,
"learning_rate": 3.279921394444776e-05,
"loss": 10.624467468261718,
"step": 870
},
{
"epoch": 2.2568218298555376,
"grad_norm": 24.657569885253906,
"learning_rate": 3.077368633835205e-05,
"loss": 10.711078643798828,
"step": 880
},
{
"epoch": 2.2825040128410916,
"grad_norm": 22.551204681396484,
"learning_rate": 2.8801285949122593e-05,
"loss": 10.329103088378906,
"step": 890
},
{
"epoch": 2.308186195826645,
"grad_norm": 26.180330276489258,
"learning_rate": 2.688352648069198e-05,
"loss": 9.944695281982423,
"step": 900
},
{
"epoch": 2.333868378812199,
"grad_norm": 34.10860061645508,
"learning_rate": 2.502187970322657e-05,
"loss": 10.465196990966797,
"step": 910
},
{
"epoch": 2.359550561797753,
"grad_norm": 25.76052474975586,
"learning_rate": 2.321777432362764e-05,
"loss": 10.892754364013673,
"step": 920
},
{
"epoch": 2.3852327447833064,
"grad_norm": 25.387516021728516,
"learning_rate": 2.1472594889080756e-05,
"loss": 10.628679656982422,
"step": 930
},
{
"epoch": 2.4109149277688604,
"grad_norm": 22.750202178955078,
"learning_rate": 1.9787680724495617e-05,
"loss": 10.46801986694336,
"step": 940
},
{
"epoch": 2.436597110754414,
"grad_norm": 29.39733123779297,
"learning_rate": 1.8164324904650965e-05,
"loss": 11.32564697265625,
"step": 950
},
{
"epoch": 2.462279293739968,
"grad_norm": 23.36044692993164,
"learning_rate": 1.660377326183412e-05,
"loss": 10.110736083984374,
"step": 960
},
{
"epoch": 2.4879614767255216,
"grad_norm": 22.447628021240234,
"learning_rate": 1.5107223429736272e-05,
"loss": 10.582487487792969,
"step": 970
},
{
"epoch": 2.513643659711075,
"grad_norm": 21.37103271484375,
"learning_rate": 1.3675823924337506e-05,
"loss": 9.974002838134766,
"step": 980
},
{
"epoch": 2.539325842696629,
"grad_norm": 23.942567825317383,
"learning_rate": 1.2310673262486705e-05,
"loss": 10.401480102539063,
"step": 990
},
{
"epoch": 2.5650080256821832,
"grad_norm": 23.232219696044922,
"learning_rate": 1.1012819118853147e-05,
"loss": 10.403594970703125,
"step": 1000
},
{
"epoch": 2.590690208667737,
"grad_norm": 26.392332077026367,
"learning_rate": 9.783257521896227e-06,
"loss": 11.170610046386718,
"step": 1010
},
{
"epoch": 2.6163723916532904,
"grad_norm": 22.941421508789062,
"learning_rate": 8.62293208947107e-06,
"loss": 9.754792785644531,
"step": 1020
},
{
"epoch": 2.6420545746388444,
"grad_norm": 31.97374153137207,
"learning_rate": 7.532733304655848e-06,
"loss": 10.895748138427734,
"step": 1030
},
{
"epoch": 2.667736757624398,
"grad_norm": 22.15962791442871,
"learning_rate": 6.5134978323574066e-06,
"loss": 10.086806488037109,
"step": 1040
},
{
"epoch": 2.693418940609952,
"grad_norm": 23.121976852416992,
"learning_rate": 5.566007877218882e-06,
"loss": 9.806757354736328,
"step": 1050
},
{
"epoch": 2.7191011235955056,
"grad_norm": 22.89173698425293,
"learning_rate": 4.6909905833226966e-06,
"loss": 9.998442077636719,
"step": 1060
},
{
"epoch": 2.744783306581059,
"grad_norm": 28.151565551757812,
"learning_rate": 3.8891174761491735e-06,
"loss": 10.232617950439453,
"step": 1070
},
{
"epoch": 2.770465489566613,
"grad_norm": 27.67608070373535,
"learning_rate": 3.161003947219421e-06,
"loss": 11.071966552734375,
"step": 1080
},
{
"epoch": 2.796147672552167,
"grad_norm": 26.361303329467773,
"learning_rate": 2.5072087818176382e-06,
"loss": 11.048786926269532,
"step": 1090
},
{
"epoch": 2.821829855537721,
"grad_norm": 24.121335983276367,
"learning_rate": 1.928233730155604e-06,
"loss": 10.665110015869141,
"step": 1100
},
{
"epoch": 2.8475120385232744,
"grad_norm": 26.22313117980957,
"learning_rate": 1.4245231223081301e-06,
"loss": 11.044110107421876,
"step": 1110
},
{
"epoch": 2.8731942215088284,
"grad_norm": 26.499591827392578,
"learning_rate": 9.964635272153633e-07,
"loss": 10.147935485839843,
"step": 1120
},
{
"epoch": 2.898876404494382,
"grad_norm": 23.90386390686035,
"learning_rate": 6.443834560132534e-07,
"loss": 10.296646881103516,
"step": 1130
},
{
"epoch": 2.924558587479936,
"grad_norm": 24.210269927978516,
"learning_rate": 3.685531099202111e-07,
"loss": 10.592522430419923,
"step": 1140
},
{
"epoch": 2.9502407704654896,
"grad_norm": 28.532909393310547,
"learning_rate": 1.6918417287318245e-07,
"loss": 9.853338623046875,
"step": 1150
},
{
"epoch": 2.975922953451043,
"grad_norm": 27.218006134033203,
"learning_rate": 4.642964907235481e-08,
"loss": 10.533452606201172,
"step": 1160
},
{
"epoch": 3.0,
"grad_norm": 14.907074928283691,
"learning_rate": 3.837455592847761e-10,
"loss": 9.5769775390625,
"step": 1170
},
{
"epoch": 3.0,
"step": 1170,
"total_flos": 2.709259627140219e+17,
"train_loss": 14.446504472259782,
"train_runtime": 5026.5595,
"train_samples_per_second": 1.859,
"train_steps_per_second": 0.233
}
],
"logging_steps": 10,
"max_steps": 1170,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.709259627140219e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}