Image-Text-to-Text
PEFT
Safetensors
English
vision-language
blockchain-security
attack-detection
lora
agentic-economy
dogon
rocm
amd
conversational
Instructions to use Ibonon/imina_na_lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Ibonon/imina_na_lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct") model = PeftModel.from_pretrained(base_model, "Ibonon/imina_na_lora") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 2.131298542022705, | |
| "learning_rate": 0.00019962000000000002, | |
| "loss": 2.0121, | |
| "mean_token_accuracy": 0.6703190118074417, | |
| "num_tokens": 2348.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 1.2890418767929077, | |
| "learning_rate": 0.00019922, | |
| "loss": 0.2751, | |
| "mean_token_accuracy": 0.9120438575744629, | |
| "num_tokens": 4697.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 0.8126867413520813, | |
| "learning_rate": 0.00019882, | |
| "loss": 0.1966, | |
| "mean_token_accuracy": 0.9203487157821655, | |
| "num_tokens": 7014.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.6051881313323975, | |
| "learning_rate": 0.00019842000000000001, | |
| "loss": 0.1851, | |
| "mean_token_accuracy": 0.9291799515485764, | |
| "num_tokens": 9327.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.6378348469734192, | |
| "learning_rate": 0.00019802, | |
| "loss": 0.1766, | |
| "mean_token_accuracy": 0.9286984890699387, | |
| "num_tokens": 11670.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 0.624138593673706, | |
| "learning_rate": 0.00019762, | |
| "loss": 0.1784, | |
| "mean_token_accuracy": 0.9264282643795013, | |
| "num_tokens": 14000.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.028, | |
| "grad_norm": 0.2111046463251114, | |
| "learning_rate": 0.00019722, | |
| "loss": 0.1702, | |
| "mean_token_accuracy": 0.9321970880031586, | |
| "num_tokens": 16329.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.5350440740585327, | |
| "learning_rate": 0.00019682, | |
| "loss": 0.171, | |
| "mean_token_accuracy": 0.9311463803052902, | |
| "num_tokens": 18667.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 0.19237647950649261, | |
| "learning_rate": 0.00019642, | |
| "loss": 0.167, | |
| "mean_token_accuracy": 0.9345656305551528, | |
| "num_tokens": 20985.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.28153756260871887, | |
| "learning_rate": 0.00019602, | |
| "loss": 0.1674, | |
| "mean_token_accuracy": 0.9329667061567306, | |
| "num_tokens": 23325.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.044, | |
| "grad_norm": 0.8545331954956055, | |
| "learning_rate": 0.00019562, | |
| "loss": 0.166, | |
| "mean_token_accuracy": 0.9344212204217911, | |
| "num_tokens": 25670.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.24941129982471466, | |
| "learning_rate": 0.00019522, | |
| "loss": 0.1661, | |
| "mean_token_accuracy": 0.934859549999237, | |
| "num_tokens": 28016.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.052, | |
| "grad_norm": 0.29549548029899597, | |
| "learning_rate": 0.00019482, | |
| "loss": 0.1707, | |
| "mean_token_accuracy": 0.9345517784357071, | |
| "num_tokens": 30345.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 0.20388178527355194, | |
| "learning_rate": 0.00019442, | |
| "loss": 0.1673, | |
| "mean_token_accuracy": 0.9353183209896088, | |
| "num_tokens": 32691.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.10762794315814972, | |
| "learning_rate": 0.00019402, | |
| "loss": 0.1642, | |
| "mean_token_accuracy": 0.9325390756130219, | |
| "num_tokens": 35030.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.07676753401756287, | |
| "learning_rate": 0.00019362, | |
| "loss": 0.1633, | |
| "mean_token_accuracy": 0.9348760217428207, | |
| "num_tokens": 37359.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.068, | |
| "grad_norm": 0.06781225651502609, | |
| "learning_rate": 0.00019322, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.936154904961586, | |
| "num_tokens": 39707.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 0.10010460019111633, | |
| "learning_rate": 0.00019282000000000001, | |
| "loss": 0.1583, | |
| "mean_token_accuracy": 0.9410124599933625, | |
| "num_tokens": 42071.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.076, | |
| "grad_norm": 0.07932794839143753, | |
| "learning_rate": 0.00019242, | |
| "loss": 0.1608, | |
| "mean_token_accuracy": 0.9380002528429031, | |
| "num_tokens": 44404.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.06678586453199387, | |
| "learning_rate": 0.00019202, | |
| "loss": 0.1633, | |
| "mean_token_accuracy": 0.9347729980945587, | |
| "num_tokens": 46715.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 0.05118393525481224, | |
| "learning_rate": 0.00019162, | |
| "loss": 0.1621, | |
| "mean_token_accuracy": 0.9356200367212295, | |
| "num_tokens": 49030.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 0.07563836127519608, | |
| "learning_rate": 0.00019122, | |
| "loss": 0.1603, | |
| "mean_token_accuracy": 0.9362942427396774, | |
| "num_tokens": 51366.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.092, | |
| "grad_norm": 0.053388580679893494, | |
| "learning_rate": 0.00019082, | |
| "loss": 0.1585, | |
| "mean_token_accuracy": 0.9377258807420731, | |
| "num_tokens": 53706.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.05659119412302971, | |
| "learning_rate": 0.00019042, | |
| "loss": 0.1575, | |
| "mean_token_accuracy": 0.937972965836525, | |
| "num_tokens": 56052.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.04934714362025261, | |
| "learning_rate": 0.00019002, | |
| "loss": 0.1606, | |
| "mean_token_accuracy": 0.9356311202049256, | |
| "num_tokens": 58374.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.05647804215550423, | |
| "learning_rate": 0.00018962000000000002, | |
| "loss": 0.1587, | |
| "mean_token_accuracy": 0.9353525519371033, | |
| "num_tokens": 60706.0, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 0.058523017913103104, | |
| "learning_rate": 0.00018922, | |
| "loss": 0.1595, | |
| "mean_token_accuracy": 0.9371987581253052, | |
| "num_tokens": 63021.0, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.03793497756123543, | |
| "learning_rate": 0.00018882000000000003, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9341553807258606, | |
| "num_tokens": 65369.0, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.116, | |
| "grad_norm": 0.04743633046746254, | |
| "learning_rate": 0.00018842000000000002, | |
| "loss": 0.1586, | |
| "mean_token_accuracy": 0.936157900094986, | |
| "num_tokens": 67700.0, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.05463261529803276, | |
| "learning_rate": 0.00018802, | |
| "loss": 0.1578, | |
| "mean_token_accuracy": 0.937694975733757, | |
| "num_tokens": 70038.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.124, | |
| "grad_norm": 0.11279874294996262, | |
| "learning_rate": 0.00018762000000000002, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9382745862007141, | |
| "num_tokens": 72367.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.050823234021663666, | |
| "learning_rate": 0.00018722, | |
| "loss": 0.1602, | |
| "mean_token_accuracy": 0.9388011395931244, | |
| "num_tokens": 74677.0, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 0.04983159899711609, | |
| "learning_rate": 0.00018682000000000003, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9374112606048584, | |
| "num_tokens": 77014.0, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.05084273964166641, | |
| "learning_rate": 0.00018642000000000002, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9387890577316285, | |
| "num_tokens": 79339.0, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.04936506226658821, | |
| "learning_rate": 0.00018602, | |
| "loss": 0.157, | |
| "mean_token_accuracy": 0.9359721839427948, | |
| "num_tokens": 81688.0, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.04119481146335602, | |
| "learning_rate": 0.00018562000000000003, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9359663873910904, | |
| "num_tokens": 84025.0, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.148, | |
| "grad_norm": 0.03988514095544815, | |
| "learning_rate": 0.00018522000000000002, | |
| "loss": 0.1595, | |
| "mean_token_accuracy": 0.9372004926204681, | |
| "num_tokens": 86341.0, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.04381653666496277, | |
| "learning_rate": 0.00018482, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9373360633850097, | |
| "num_tokens": 88665.0, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 0.03504428267478943, | |
| "learning_rate": 0.00018442000000000003, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9403579801321029, | |
| "num_tokens": 90997.0, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.03745226562023163, | |
| "learning_rate": 0.00018402000000000002, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9388439536094666, | |
| "num_tokens": 93322.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.164, | |
| "grad_norm": 0.033838506788015366, | |
| "learning_rate": 0.00018362, | |
| "loss": 0.1562, | |
| "mean_token_accuracy": 0.9360319077968597, | |
| "num_tokens": 95688.0, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.0314440056681633, | |
| "learning_rate": 0.00018322000000000002, | |
| "loss": 0.1596, | |
| "mean_token_accuracy": 0.9349893003702163, | |
| "num_tokens": 98005.0, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.172, | |
| "grad_norm": 0.06577116250991821, | |
| "learning_rate": 0.00018282000000000001, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.9364802747964859, | |
| "num_tokens": 100327.0, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.029735982418060303, | |
| "learning_rate": 0.00018242, | |
| "loss": 0.1576, | |
| "mean_token_accuracy": 0.9365677893161773, | |
| "num_tokens": 102671.0, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.03155644237995148, | |
| "learning_rate": 0.00018202000000000002, | |
| "loss": 0.1587, | |
| "mean_token_accuracy": 0.9387517213821411, | |
| "num_tokens": 104996.0, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.027411427348852158, | |
| "learning_rate": 0.00018162, | |
| "loss": 0.1581, | |
| "mean_token_accuracy": 0.9342827945947647, | |
| "num_tokens": 107337.0, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.188, | |
| "grad_norm": 0.024014495313167572, | |
| "learning_rate": 0.00018122, | |
| "loss": 0.1583, | |
| "mean_token_accuracy": 0.9355534523725509, | |
| "num_tokens": 109674.0, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.02990046516060829, | |
| "learning_rate": 0.00018082000000000002, | |
| "loss": 0.1577, | |
| "mean_token_accuracy": 0.9373938798904419, | |
| "num_tokens": 112015.0, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.196, | |
| "grad_norm": 0.020676879212260246, | |
| "learning_rate": 0.00018042, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9363098949193954, | |
| "num_tokens": 114349.0, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.024556942284107208, | |
| "learning_rate": 0.00018002, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.9355833351612091, | |
| "num_tokens": 116672.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 0.22343556582927704, | |
| "learning_rate": 0.00017962000000000002, | |
| "loss": 0.157, | |
| "mean_token_accuracy": 0.9363467574119568, | |
| "num_tokens": 119023.0, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.515048623085022, | |
| "learning_rate": 0.00017922, | |
| "loss": 0.1585, | |
| "mean_token_accuracy": 0.9374267637729645, | |
| "num_tokens": 121366.0, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.212, | |
| "grad_norm": 0.2672664225101471, | |
| "learning_rate": 0.00017882, | |
| "loss": 0.1704, | |
| "mean_token_accuracy": 0.938492265343666, | |
| "num_tokens": 123719.0, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.11909265071153641, | |
| "learning_rate": 0.00017842000000000002, | |
| "loss": 0.1752, | |
| "mean_token_accuracy": 0.9297878712415695, | |
| "num_tokens": 126053.0, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.11977271735668182, | |
| "learning_rate": 0.00017802, | |
| "loss": 0.1652, | |
| "mean_token_accuracy": 0.9360411554574967, | |
| "num_tokens": 128379.0, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.17722292244434357, | |
| "learning_rate": 0.00017762, | |
| "loss": 0.1697, | |
| "mean_token_accuracy": 0.9314894318580628, | |
| "num_tokens": 130700.0, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.228, | |
| "grad_norm": 0.8375388979911804, | |
| "learning_rate": 0.00017722000000000001, | |
| "loss": 0.1895, | |
| "mean_token_accuracy": 0.9316652357578278, | |
| "num_tokens": 133026.0, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.10569056123495102, | |
| "learning_rate": 0.00017682, | |
| "loss": 0.1679, | |
| "mean_token_accuracy": 0.9333775132894516, | |
| "num_tokens": 135371.0, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.236, | |
| "grad_norm": 0.07626856118440628, | |
| "learning_rate": 0.00017642, | |
| "loss": 0.1613, | |
| "mean_token_accuracy": 0.9380175620317459, | |
| "num_tokens": 137695.0, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.06852507591247559, | |
| "learning_rate": 0.00017602, | |
| "loss": 0.1693, | |
| "mean_token_accuracy": 0.9329774439334869, | |
| "num_tokens": 140007.0, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.244, | |
| "grad_norm": 0.11691898107528687, | |
| "learning_rate": 0.00017562, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.9360336065292358, | |
| "num_tokens": 142345.0, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.10780195891857147, | |
| "learning_rate": 0.00017522000000000002, | |
| "loss": 0.1586, | |
| "mean_token_accuracy": 0.9376411676406861, | |
| "num_tokens": 144693.0, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.252, | |
| "grad_norm": 0.07631397247314453, | |
| "learning_rate": 0.00017482, | |
| "loss": 0.1633, | |
| "mean_token_accuracy": 0.9390978574752807, | |
| "num_tokens": 147031.0, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.05569858103990555, | |
| "learning_rate": 0.00017442, | |
| "loss": 0.1615, | |
| "mean_token_accuracy": 0.9400094121694564, | |
| "num_tokens": 149371.0, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.04815123230218887, | |
| "learning_rate": 0.00017402000000000002, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9370361328125, | |
| "num_tokens": 151716.0, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 0.11904877424240112, | |
| "learning_rate": 0.00017362, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.941026845574379, | |
| "num_tokens": 154046.0, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.268, | |
| "grad_norm": 0.12382964044809341, | |
| "learning_rate": 0.00017322, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9340965986251831, | |
| "num_tokens": 156382.0, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.05523005872964859, | |
| "learning_rate": 0.00017282000000000002, | |
| "loss": 0.1593, | |
| "mean_token_accuracy": 0.9396583586931229, | |
| "num_tokens": 158705.0, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.276, | |
| "grad_norm": 0.05591598525643349, | |
| "learning_rate": 0.00017242, | |
| "loss": 0.1592, | |
| "mean_token_accuracy": 0.9364501267671586, | |
| "num_tokens": 161025.0, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.06780663877725601, | |
| "learning_rate": 0.00017202, | |
| "loss": 0.1618, | |
| "mean_token_accuracy": 0.936437115073204, | |
| "num_tokens": 163345.0, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.284, | |
| "grad_norm": 0.03291817009449005, | |
| "learning_rate": 0.00017162000000000001, | |
| "loss": 0.1569, | |
| "mean_token_accuracy": 0.9380638599395752, | |
| "num_tokens": 165711.0, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.04884820431470871, | |
| "learning_rate": 0.00017122, | |
| "loss": 0.1608, | |
| "mean_token_accuracy": 0.9370934247970581, | |
| "num_tokens": 168007.0, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.292, | |
| "grad_norm": 0.04577581211924553, | |
| "learning_rate": 0.00017082, | |
| "loss": 0.1595, | |
| "mean_token_accuracy": 0.9374603897333145, | |
| "num_tokens": 170332.0, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 0.03866467997431755, | |
| "learning_rate": 0.00017042, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9355076909065246, | |
| "num_tokens": 172667.0, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.06204424798488617, | |
| "learning_rate": 0.00017002, | |
| "loss": 0.1599, | |
| "mean_token_accuracy": 0.9372841835021972, | |
| "num_tokens": 174970.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.03288702666759491, | |
| "learning_rate": 0.00016962, | |
| "loss": 0.1578, | |
| "mean_token_accuracy": 0.93585424721241, | |
| "num_tokens": 177317.0, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.308, | |
| "grad_norm": 0.03605024516582489, | |
| "learning_rate": 0.00016922, | |
| "loss": 0.1597, | |
| "mean_token_accuracy": 0.9366346269845962, | |
| "num_tokens": 179629.0, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.03328383341431618, | |
| "learning_rate": 0.00016882, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9372009009122848, | |
| "num_tokens": 181960.0, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.316, | |
| "grad_norm": 0.03522924706339836, | |
| "learning_rate": 0.00016842, | |
| "loss": 0.1572, | |
| "mean_token_accuracy": 0.9355730235576629, | |
| "num_tokens": 184314.0, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.0317777544260025, | |
| "learning_rate": 0.00016802, | |
| "loss": 0.1572, | |
| "mean_token_accuracy": 0.9378984242677688, | |
| "num_tokens": 186658.0, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.324, | |
| "grad_norm": 0.07111163437366486, | |
| "learning_rate": 0.00016762, | |
| "loss": 0.1613, | |
| "mean_token_accuracy": 0.9344337552785873, | |
| "num_tokens": 188974.0, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 0.04765714704990387, | |
| "learning_rate": 0.00016722, | |
| "loss": 0.1608, | |
| "mean_token_accuracy": 0.9345141768455505, | |
| "num_tokens": 191276.0, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.332, | |
| "grad_norm": 0.041960619390010834, | |
| "learning_rate": 0.00016682, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9375987917184829, | |
| "num_tokens": 193597.0, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.041757769882678986, | |
| "learning_rate": 0.00016642, | |
| "loss": 0.157, | |
| "mean_token_accuracy": 0.9367031455039978, | |
| "num_tokens": 195949.0, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.05323236435651779, | |
| "learning_rate": 0.00016601999999999999, | |
| "loss": 0.1654, | |
| "mean_token_accuracy": 0.9359392642974853, | |
| "num_tokens": 198267.0, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 0.08934314548969269, | |
| "learning_rate": 0.00016562, | |
| "loss": 0.1619, | |
| "mean_token_accuracy": 0.9331722050905228, | |
| "num_tokens": 200613.0, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.348, | |
| "grad_norm": 0.033347178250551224, | |
| "learning_rate": 0.00016522, | |
| "loss": 0.1611, | |
| "mean_token_accuracy": 0.9317500472068787, | |
| "num_tokens": 202927.0, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.03238425776362419, | |
| "learning_rate": 0.00016482, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9358267247676849, | |
| "num_tokens": 205273.0, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.356, | |
| "grad_norm": 0.03249628096818924, | |
| "learning_rate": 0.00016442000000000003, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9367831707000732, | |
| "num_tokens": 207595.0, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.034572117030620575, | |
| "learning_rate": 0.00016402000000000002, | |
| "loss": 0.1612, | |
| "mean_token_accuracy": 0.9356453567743301, | |
| "num_tokens": 209892.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.364, | |
| "grad_norm": 0.04566624388098717, | |
| "learning_rate": 0.00016362, | |
| "loss": 0.1605, | |
| "mean_token_accuracy": 0.9359289228916168, | |
| "num_tokens": 212194.0, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.029180865734815598, | |
| "learning_rate": 0.00016322000000000003, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9378338158130646, | |
| "num_tokens": 214535.0, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.372, | |
| "grad_norm": 0.04812979698181152, | |
| "learning_rate": 0.00016282000000000002, | |
| "loss": 0.1576, | |
| "mean_token_accuracy": 0.9391636937856674, | |
| "num_tokens": 216872.0, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 0.06872449070215225, | |
| "learning_rate": 0.00016242, | |
| "loss": 0.1606, | |
| "mean_token_accuracy": 0.9389324098825454, | |
| "num_tokens": 219184.0, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.05308040603995323, | |
| "learning_rate": 0.00016202000000000002, | |
| "loss": 0.1592, | |
| "mean_token_accuracy": 0.9385550439357757, | |
| "num_tokens": 221499.0, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.13082465529441833, | |
| "learning_rate": 0.00016162000000000001, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9380007416009903, | |
| "num_tokens": 223847.0, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.388, | |
| "grad_norm": 0.03414028137922287, | |
| "learning_rate": 0.00016122, | |
| "loss": 0.1602, | |
| "mean_token_accuracy": 0.9369382321834564, | |
| "num_tokens": 226156.0, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 0.04112333804368973, | |
| "learning_rate": 0.00016082000000000002, | |
| "loss": 0.1597, | |
| "mean_token_accuracy": 0.935492268204689, | |
| "num_tokens": 228475.0, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.396, | |
| "grad_norm": 0.02955610118806362, | |
| "learning_rate": 0.00016042, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.93908212184906, | |
| "num_tokens": 230792.0, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.024307863786816597, | |
| "learning_rate": 0.00016002, | |
| "loss": 0.1595, | |
| "mean_token_accuracy": 0.9393438696861267, | |
| "num_tokens": 233105.0, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 10000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2823215531802624.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |