Image-Text-to-Text
PEFT
Safetensors
gemma
gemma4
lora
video-understanding
action-recognition
image-sequence
conversational
Instructions to use bear7011/gemma4-e4b-kinetic3K_FT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use bear7011/gemma4-e4b-kinetic3K_FT with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("google/gemma-4-e4b-it") model = PeftModel.from_pretrained(base_model, "bear7011/gemma4-e4b-kinetic3K_FT") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1170, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.025682182985553772, | |
| "grad_norm": 41.89281463623047, | |
| "learning_rate": 5e-05, | |
| "loss": 41.062384033203124, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.051364365971107544, | |
| "grad_norm": 19.25795555114746, | |
| "learning_rate": 0.00010555555555555557, | |
| "loss": 23.114556884765626, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07704654895666131, | |
| "grad_norm": 15.17092514038086, | |
| "learning_rate": 0.0001611111111111111, | |
| "loss": 19.892947387695312, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10272873194221509, | |
| "grad_norm": 12.6154146194458, | |
| "learning_rate": 0.0001999965463076377, | |
| "loss": 18.954251098632813, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12841091492776885, | |
| "grad_norm": 13.695039749145508, | |
| "learning_rate": 0.00019993515396856082, | |
| "loss": 18.60211944580078, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15409309791332262, | |
| "grad_norm": 13.03475570678711, | |
| "learning_rate": 0.00019979706714271113, | |
| "loss": 17.00911102294922, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1797752808988764, | |
| "grad_norm": 12.657264709472656, | |
| "learning_rate": 0.0001995823918037908, | |
| "loss": 16.737196350097655, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.20545746388443017, | |
| "grad_norm": 9.450530052185059, | |
| "learning_rate": 0.00019929129270278366, | |
| "loss": 17.192404174804686, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.23113964686998395, | |
| "grad_norm": 11.038394927978516, | |
| "learning_rate": 0.0001989239932415185, | |
| "loss": 17.626002502441406, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2568218298555377, | |
| "grad_norm": 10.112431526184082, | |
| "learning_rate": 0.00019848077530122083, | |
| "loss": 17.375982666015624, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2825040128410915, | |
| "grad_norm": 10.951290130615234, | |
| "learning_rate": 0.0001979619790261853, | |
| "loss": 16.52630157470703, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.30818619582664525, | |
| "grad_norm": 13.097779273986816, | |
| "learning_rate": 0.00019736800256273457, | |
| "loss": 18.660000610351563, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.33386837881219905, | |
| "grad_norm": 10.489601135253906, | |
| "learning_rate": 0.00019669930175366472, | |
| "loss": 18.038082885742188, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3595505617977528, | |
| "grad_norm": 12.037221908569336, | |
| "learning_rate": 0.0001959563897884124, | |
| "loss": 16.976956176757813, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3852327447833066, | |
| "grad_norm": 13.149781227111816, | |
| "learning_rate": 0.00019513983680921108, | |
| "loss": 17.55983123779297, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.41091492776886035, | |
| "grad_norm": 12.796188354492188, | |
| "learning_rate": 0.00019425026947353992, | |
| "loss": 17.86356201171875, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.43659711075441415, | |
| "grad_norm": 14.400433540344238, | |
| "learning_rate": 0.0001932883704732001, | |
| "loss": 17.5066162109375, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4622792937399679, | |
| "grad_norm": 16.03416633605957, | |
| "learning_rate": 0.00019225487801038788, | |
| "loss": 18.057034301757813, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.48796147672552165, | |
| "grad_norm": 11.53113842010498, | |
| "learning_rate": 0.00019115058523116733, | |
| "loss": 16.775308227539064, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5136436597110754, | |
| "grad_norm": 12.079059600830078, | |
| "learning_rate": 0.00018997633961677582, | |
| "loss": 17.352998352050783, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5393258426966292, | |
| "grad_norm": 11.70594310760498, | |
| "learning_rate": 0.00018873304233323122, | |
| "loss": 18.17870635986328, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.565008025682183, | |
| "grad_norm": 13.156736373901367, | |
| "learning_rate": 0.00018742164753973855, | |
| "loss": 16.766079711914063, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5906902086677368, | |
| "grad_norm": 11.801448822021484, | |
| "learning_rate": 0.0001860431616564278, | |
| "loss": 17.98457489013672, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6163723916532905, | |
| "grad_norm": 11.434338569641113, | |
| "learning_rate": 0.0001845986425919841, | |
| "loss": 17.011558532714844, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6420545746388443, | |
| "grad_norm": 11.592355728149414, | |
| "learning_rate": 0.00018308919893176396, | |
| "loss": 18.073362731933592, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6677367576243981, | |
| "grad_norm": 12.140549659729004, | |
| "learning_rate": 0.00018151598908701947, | |
| "loss": 17.267474365234374, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6934189406099518, | |
| "grad_norm": 12.207261085510254, | |
| "learning_rate": 0.00017988022040588416, | |
| "loss": 17.970367431640625, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7191011235955056, | |
| "grad_norm": 12.024649620056152, | |
| "learning_rate": 0.000178183148246803, | |
| "loss": 18.595138549804688, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7447833065810594, | |
| "grad_norm": 10.953705787658691, | |
| "learning_rate": 0.0001764260750151167, | |
| "loss": 17.661044311523437, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7704654895666132, | |
| "grad_norm": 12.161933898925781, | |
| "learning_rate": 0.0001746103491635407, | |
| "loss": 17.323040771484376, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7961476725521669, | |
| "grad_norm": 14.1478271484375, | |
| "learning_rate": 0.00017273736415730488, | |
| "loss": 17.589439392089844, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8218298555377207, | |
| "grad_norm": 11.344724655151367, | |
| "learning_rate": 0.0001708085574047494, | |
| "loss": 18.087261962890626, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8475120385232745, | |
| "grad_norm": 11.49538803100586, | |
| "learning_rate": 0.00016882540915419623, | |
| "loss": 16.709014892578125, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8731942215088283, | |
| "grad_norm": 11.975954055786133, | |
| "learning_rate": 0.00016678944135794374, | |
| "loss": 18.784584045410156, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.898876404494382, | |
| "grad_norm": 10.979130744934082, | |
| "learning_rate": 0.00016470221650425582, | |
| "loss": 17.035598754882812, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9245585874799358, | |
| "grad_norm": 10.86307144165039, | |
| "learning_rate": 0.00016256533641824177, | |
| "loss": 17.912120056152343, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9502407704654896, | |
| "grad_norm": 11.18655776977539, | |
| "learning_rate": 0.00016038044103254775, | |
| "loss": 16.4998046875, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9759229534510433, | |
| "grad_norm": 11.321154594421387, | |
| "learning_rate": 0.00015814920712880267, | |
| "loss": 17.580471801757813, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 6.271613597869873, | |
| "learning_rate": 0.0001558733470507847, | |
| "loss": 15.328689575195312, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.0256821829855538, | |
| "grad_norm": 13.181764602661133, | |
| "learning_rate": 0.00015355460739029586, | |
| "loss": 15.265965270996094, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0513643659711076, | |
| "grad_norm": 11.904574394226074, | |
| "learning_rate": 0.00015119476764675305, | |
| "loss": 14.833596801757812, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0770465489566614, | |
| "grad_norm": 12.352519989013672, | |
| "learning_rate": 0.0001487956388615247, | |
| "loss": 13.833314514160156, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.102728731942215, | |
| "grad_norm": 15.478001594543457, | |
| "learning_rate": 0.00014635906222806058, | |
| "loss": 15.767561340332032, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.1284109149277688, | |
| "grad_norm": 15.335821151733398, | |
| "learning_rate": 0.00014388690767888154, | |
| "loss": 14.462684631347656, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.1540930979133226, | |
| "grad_norm": 16.043373107910156, | |
| "learning_rate": 0.00014138107245051392, | |
| "loss": 15.908651733398438, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.1797752808988764, | |
| "grad_norm": 12.852351188659668, | |
| "learning_rate": 0.00013884347962746948, | |
| "loss": 15.758128356933593, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.2054574638844302, | |
| "grad_norm": 15.320487022399902, | |
| "learning_rate": 0.00013627607666638858, | |
| "loss": 15.051063537597656, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.231139646869984, | |
| "grad_norm": 16.30264663696289, | |
| "learning_rate": 0.00013368083390147913, | |
| "loss": 14.599794006347656, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2568218298555376, | |
| "grad_norm": 13.734286308288574, | |
| "learning_rate": 0.00013105974303239838, | |
| "loss": 14.052903747558593, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.2825040128410916, | |
| "grad_norm": 15.704442024230957, | |
| "learning_rate": 0.0001284148155957379, | |
| "loss": 13.530386352539063, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.3081861958266452, | |
| "grad_norm": 14.808961868286133, | |
| "learning_rate": 0.00012574808142128477, | |
| "loss": 14.240873718261719, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.333868378812199, | |
| "grad_norm": 13.389018058776855, | |
| "learning_rate": 0.00012306158707424403, | |
| "loss": 14.467668151855468, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.3595505617977528, | |
| "grad_norm": 16.077634811401367, | |
| "learning_rate": 0.00012035739428461739, | |
| "loss": 13.67303466796875, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.3852327447833066, | |
| "grad_norm": 14.300647735595703, | |
| "learning_rate": 0.00011763757836494403, | |
| "loss": 15.072747802734375, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.4109149277688604, | |
| "grad_norm": 12.59499454498291, | |
| "learning_rate": 0.00011490422661761744, | |
| "loss": 14.030448913574219, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.4365971107544142, | |
| "grad_norm": 17.819976806640625, | |
| "learning_rate": 0.00011215943673300093, | |
| "loss": 13.556326293945313, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.462279293739968, | |
| "grad_norm": 15.307595252990723, | |
| "learning_rate": 0.00010940531517957073, | |
| "loss": 15.464706420898438, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.4879614767255216, | |
| "grad_norm": 14.918625831604004, | |
| "learning_rate": 0.00010664397558732244, | |
| "loss": 14.040945434570313, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.5136436597110754, | |
| "grad_norm": 13.040802955627441, | |
| "learning_rate": 0.0001038775371256817, | |
| "loss": 14.671842956542969, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.5393258426966292, | |
| "grad_norm": 14.143183708190918, | |
| "learning_rate": 0.00010110812287716327, | |
| "loss": 14.198591613769532, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.565008025682183, | |
| "grad_norm": 16.373567581176758, | |
| "learning_rate": 9.833785820802739e-05, | |
| "loss": 14.670704650878907, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.5906902086677368, | |
| "grad_norm": 17.14202117919922, | |
| "learning_rate": 9.556886913718317e-05, | |
| "loss": 14.713813781738281, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.6163723916532904, | |
| "grad_norm": 13.428468704223633, | |
| "learning_rate": 9.280328070459135e-05, | |
| "loss": 14.256681823730469, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.6420545746388444, | |
| "grad_norm": 14.759570121765137, | |
| "learning_rate": 9.004321534041835e-05, | |
| "loss": 14.875436401367187, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.667736757624398, | |
| "grad_norm": 14.605072021484375, | |
| "learning_rate": 8.729079123619286e-05, | |
| "loss": 14.151382446289062, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.6934189406099518, | |
| "grad_norm": 15.300275802612305, | |
| "learning_rate": 8.454812071921596e-05, | |
| "loss": 14.209205627441406, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.7191011235955056, | |
| "grad_norm": 15.565445899963379, | |
| "learning_rate": 8.181730863147093e-05, | |
| "loss": 15.246949768066406, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.7447833065810594, | |
| "grad_norm": 13.463878631591797, | |
| "learning_rate": 7.910045071427829e-05, | |
| "loss": 14.081675720214843, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.7704654895666132, | |
| "grad_norm": 15.799697875976562, | |
| "learning_rate": 7.63996319999347e-05, | |
| "loss": 14.659947204589844, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.7961476725521668, | |
| "grad_norm": 15.421786308288574, | |
| "learning_rate": 7.371692521157048e-05, | |
| "loss": 13.642781066894532, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.8218298555377208, | |
| "grad_norm": 16.400951385498047, | |
| "learning_rate": 7.10543891724537e-05, | |
| "loss": 14.9028076171875, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.8475120385232744, | |
| "grad_norm": 16.549213409423828, | |
| "learning_rate": 6.841406722596191e-05, | |
| "loss": 14.747923278808594, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.8731942215088284, | |
| "grad_norm": 14.868678092956543, | |
| "learning_rate": 6.579798566743314e-05, | |
| "loss": 14.379522705078125, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.898876404494382, | |
| "grad_norm": 17.223217010498047, | |
| "learning_rate": 6.320815218910101e-05, | |
| "loss": 14.513031005859375, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.9245585874799358, | |
| "grad_norm": 15.178607940673828, | |
| "learning_rate": 6.064655433930624e-05, | |
| "loss": 14.75238494873047, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.9502407704654896, | |
| "grad_norm": 14.607568740844727, | |
| "learning_rate": 5.8115157997167536e-05, | |
| "loss": 14.215359497070313, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.9759229534510432, | |
| "grad_norm": 15.849374771118164, | |
| "learning_rate": 5.561590586388221e-05, | |
| "loss": 13.844842529296875, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 8.945368766784668, | |
| "learning_rate": 5.315071597181504e-05, | |
| "loss": 13.500308227539062, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.0256821829855536, | |
| "grad_norm": 14.872233390808105, | |
| "learning_rate": 5.072148021251821e-05, | |
| "loss": 10.96926498413086, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.0513643659711076, | |
| "grad_norm": 16.96340560913086, | |
| "learning_rate": 4.833006288481371e-05, | |
| "loss": 10.601210021972657, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.077046548956661, | |
| "grad_norm": 17.987470626831055, | |
| "learning_rate": 4.597829926405075e-05, | |
| "loss": 11.348848724365235, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.102728731942215, | |
| "grad_norm": 18.53151512145996, | |
| "learning_rate": 4.3667994193637796e-05, | |
| "loss": 10.507981109619141, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.128410914927769, | |
| "grad_norm": 23.948802947998047, | |
| "learning_rate": 4.140092069992867e-05, | |
| "loss": 11.215933227539063, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.154093097913323, | |
| "grad_norm": 21.3605899810791, | |
| "learning_rate": 3.91788186315269e-05, | |
| "loss": 10.393006896972656, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.1797752808988764, | |
| "grad_norm": 26.548303604125977, | |
| "learning_rate": 3.7003393324051874e-05, | |
| "loss": 10.384098052978516, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.20545746388443, | |
| "grad_norm": 23.979907989501953, | |
| "learning_rate": 3.487631429139183e-05, | |
| "loss": 10.094139862060548, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.231139646869984, | |
| "grad_norm": 18.00309944152832, | |
| "learning_rate": 3.279921394444776e-05, | |
| "loss": 10.624467468261718, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.2568218298555376, | |
| "grad_norm": 24.657569885253906, | |
| "learning_rate": 3.077368633835205e-05, | |
| "loss": 10.711078643798828, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.2825040128410916, | |
| "grad_norm": 22.551204681396484, | |
| "learning_rate": 2.8801285949122593e-05, | |
| "loss": 10.329103088378906, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.308186195826645, | |
| "grad_norm": 26.180330276489258, | |
| "learning_rate": 2.688352648069198e-05, | |
| "loss": 9.944695281982423, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.333868378812199, | |
| "grad_norm": 34.10860061645508, | |
| "learning_rate": 2.502187970322657e-05, | |
| "loss": 10.465196990966797, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.359550561797753, | |
| "grad_norm": 25.76052474975586, | |
| "learning_rate": 2.321777432362764e-05, | |
| "loss": 10.892754364013673, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.3852327447833064, | |
| "grad_norm": 25.387516021728516, | |
| "learning_rate": 2.1472594889080756e-05, | |
| "loss": 10.628679656982422, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.4109149277688604, | |
| "grad_norm": 22.750202178955078, | |
| "learning_rate": 1.9787680724495617e-05, | |
| "loss": 10.46801986694336, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.436597110754414, | |
| "grad_norm": 29.39733123779297, | |
| "learning_rate": 1.8164324904650965e-05, | |
| "loss": 11.32564697265625, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.462279293739968, | |
| "grad_norm": 23.36044692993164, | |
| "learning_rate": 1.660377326183412e-05, | |
| "loss": 10.110736083984374, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.4879614767255216, | |
| "grad_norm": 22.447628021240234, | |
| "learning_rate": 1.5107223429736272e-05, | |
| "loss": 10.582487487792969, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.513643659711075, | |
| "grad_norm": 21.37103271484375, | |
| "learning_rate": 1.3675823924337506e-05, | |
| "loss": 9.974002838134766, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.539325842696629, | |
| "grad_norm": 23.942567825317383, | |
| "learning_rate": 1.2310673262486705e-05, | |
| "loss": 10.401480102539063, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.5650080256821832, | |
| "grad_norm": 23.232219696044922, | |
| "learning_rate": 1.1012819118853147e-05, | |
| "loss": 10.403594970703125, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.590690208667737, | |
| "grad_norm": 26.392332077026367, | |
| "learning_rate": 9.783257521896227e-06, | |
| "loss": 11.170610046386718, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.6163723916532904, | |
| "grad_norm": 22.941421508789062, | |
| "learning_rate": 8.62293208947107e-06, | |
| "loss": 9.754792785644531, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.6420545746388444, | |
| "grad_norm": 31.97374153137207, | |
| "learning_rate": 7.532733304655848e-06, | |
| "loss": 10.895748138427734, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.667736757624398, | |
| "grad_norm": 22.15962791442871, | |
| "learning_rate": 6.5134978323574066e-06, | |
| "loss": 10.086806488037109, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.693418940609952, | |
| "grad_norm": 23.121976852416992, | |
| "learning_rate": 5.566007877218882e-06, | |
| "loss": 9.806757354736328, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.7191011235955056, | |
| "grad_norm": 22.89173698425293, | |
| "learning_rate": 4.6909905833226966e-06, | |
| "loss": 9.998442077636719, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.744783306581059, | |
| "grad_norm": 28.151565551757812, | |
| "learning_rate": 3.8891174761491735e-06, | |
| "loss": 10.232617950439453, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.770465489566613, | |
| "grad_norm": 27.67608070373535, | |
| "learning_rate": 3.161003947219421e-06, | |
| "loss": 11.071966552734375, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.796147672552167, | |
| "grad_norm": 26.361303329467773, | |
| "learning_rate": 2.5072087818176382e-06, | |
| "loss": 11.048786926269532, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.821829855537721, | |
| "grad_norm": 24.121335983276367, | |
| "learning_rate": 1.928233730155604e-06, | |
| "loss": 10.665110015869141, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.8475120385232744, | |
| "grad_norm": 26.22313117980957, | |
| "learning_rate": 1.4245231223081301e-06, | |
| "loss": 11.044110107421876, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.8731942215088284, | |
| "grad_norm": 26.499591827392578, | |
| "learning_rate": 9.964635272153633e-07, | |
| "loss": 10.147935485839843, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.898876404494382, | |
| "grad_norm": 23.90386390686035, | |
| "learning_rate": 6.443834560132534e-07, | |
| "loss": 10.296646881103516, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.924558587479936, | |
| "grad_norm": 24.210269927978516, | |
| "learning_rate": 3.685531099202111e-07, | |
| "loss": 10.592522430419923, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.9502407704654896, | |
| "grad_norm": 28.532909393310547, | |
| "learning_rate": 1.6918417287318245e-07, | |
| "loss": 9.853338623046875, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.975922953451043, | |
| "grad_norm": 27.218006134033203, | |
| "learning_rate": 4.642964907235481e-08, | |
| "loss": 10.533452606201172, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 14.907074928283691, | |
| "learning_rate": 3.837455592847761e-10, | |
| "loss": 9.5769775390625, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1170, | |
| "total_flos": 2.709259627140219e+17, | |
| "train_loss": 14.446504472259782, | |
| "train_runtime": 5026.5595, | |
| "train_samples_per_second": 1.859, | |
| "train_steps_per_second": 0.233 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1170, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.709259627140219e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |