Image-Text-to-Text
PEFT
Safetensors
English
vision-language
blockchain-security
attack-detection
lora
agentic-economy
dogon
rocm
amd
conversational
Instructions to use Ibonon/imina_na_lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Ibonon/imina_na_lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-VL-2B-Instruct") model = PeftModel.from_pretrained(base_model, "Ibonon/imina_na_lora") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 2.131298542022705, | |
| "learning_rate": 0.00019962000000000002, | |
| "loss": 2.0121, | |
| "mean_token_accuracy": 0.6703190118074417, | |
| "num_tokens": 2348.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 1.2890418767929077, | |
| "learning_rate": 0.00019922, | |
| "loss": 0.2751, | |
| "mean_token_accuracy": 0.9120438575744629, | |
| "num_tokens": 4697.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 0.8126867413520813, | |
| "learning_rate": 0.00019882, | |
| "loss": 0.1966, | |
| "mean_token_accuracy": 0.9203487157821655, | |
| "num_tokens": 7014.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.6051881313323975, | |
| "learning_rate": 0.00019842000000000001, | |
| "loss": 0.1851, | |
| "mean_token_accuracy": 0.9291799515485764, | |
| "num_tokens": 9327.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.6378348469734192, | |
| "learning_rate": 0.00019802, | |
| "loss": 0.1766, | |
| "mean_token_accuracy": 0.9286984890699387, | |
| "num_tokens": 11670.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 0.624138593673706, | |
| "learning_rate": 0.00019762, | |
| "loss": 0.1784, | |
| "mean_token_accuracy": 0.9264282643795013, | |
| "num_tokens": 14000.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.028, | |
| "grad_norm": 0.2111046463251114, | |
| "learning_rate": 0.00019722, | |
| "loss": 0.1702, | |
| "mean_token_accuracy": 0.9321970880031586, | |
| "num_tokens": 16329.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.5350440740585327, | |
| "learning_rate": 0.00019682, | |
| "loss": 0.171, | |
| "mean_token_accuracy": 0.9311463803052902, | |
| "num_tokens": 18667.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 0.19237647950649261, | |
| "learning_rate": 0.00019642, | |
| "loss": 0.167, | |
| "mean_token_accuracy": 0.9345656305551528, | |
| "num_tokens": 20985.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.28153756260871887, | |
| "learning_rate": 0.00019602, | |
| "loss": 0.1674, | |
| "mean_token_accuracy": 0.9329667061567306, | |
| "num_tokens": 23325.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.044, | |
| "grad_norm": 0.8545331954956055, | |
| "learning_rate": 0.00019562, | |
| "loss": 0.166, | |
| "mean_token_accuracy": 0.9344212204217911, | |
| "num_tokens": 25670.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.24941129982471466, | |
| "learning_rate": 0.00019522, | |
| "loss": 0.1661, | |
| "mean_token_accuracy": 0.934859549999237, | |
| "num_tokens": 28016.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.052, | |
| "grad_norm": 0.29549548029899597, | |
| "learning_rate": 0.00019482, | |
| "loss": 0.1707, | |
| "mean_token_accuracy": 0.9345517784357071, | |
| "num_tokens": 30345.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 0.20388178527355194, | |
| "learning_rate": 0.00019442, | |
| "loss": 0.1673, | |
| "mean_token_accuracy": 0.9353183209896088, | |
| "num_tokens": 32691.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.10762794315814972, | |
| "learning_rate": 0.00019402, | |
| "loss": 0.1642, | |
| "mean_token_accuracy": 0.9325390756130219, | |
| "num_tokens": 35030.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.07676753401756287, | |
| "learning_rate": 0.00019362, | |
| "loss": 0.1633, | |
| "mean_token_accuracy": 0.9348760217428207, | |
| "num_tokens": 37359.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.068, | |
| "grad_norm": 0.06781225651502609, | |
| "learning_rate": 0.00019322, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.936154904961586, | |
| "num_tokens": 39707.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 0.10010460019111633, | |
| "learning_rate": 0.00019282000000000001, | |
| "loss": 0.1583, | |
| "mean_token_accuracy": 0.9410124599933625, | |
| "num_tokens": 42071.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.076, | |
| "grad_norm": 0.07932794839143753, | |
| "learning_rate": 0.00019242, | |
| "loss": 0.1608, | |
| "mean_token_accuracy": 0.9380002528429031, | |
| "num_tokens": 44404.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.06678586453199387, | |
| "learning_rate": 0.00019202, | |
| "loss": 0.1633, | |
| "mean_token_accuracy": 0.9347729980945587, | |
| "num_tokens": 46715.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 0.05118393525481224, | |
| "learning_rate": 0.00019162, | |
| "loss": 0.1621, | |
| "mean_token_accuracy": 0.9356200367212295, | |
| "num_tokens": 49030.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 0.07563836127519608, | |
| "learning_rate": 0.00019122, | |
| "loss": 0.1603, | |
| "mean_token_accuracy": 0.9362942427396774, | |
| "num_tokens": 51366.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.092, | |
| "grad_norm": 0.053388580679893494, | |
| "learning_rate": 0.00019082, | |
| "loss": 0.1585, | |
| "mean_token_accuracy": 0.9377258807420731, | |
| "num_tokens": 53706.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.05659119412302971, | |
| "learning_rate": 0.00019042, | |
| "loss": 0.1575, | |
| "mean_token_accuracy": 0.937972965836525, | |
| "num_tokens": 56052.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.04934714362025261, | |
| "learning_rate": 0.00019002, | |
| "loss": 0.1606, | |
| "mean_token_accuracy": 0.9356311202049256, | |
| "num_tokens": 58374.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 0.05647804215550423, | |
| "learning_rate": 0.00018962000000000002, | |
| "loss": 0.1587, | |
| "mean_token_accuracy": 0.9353525519371033, | |
| "num_tokens": 60706.0, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 0.058523017913103104, | |
| "learning_rate": 0.00018922, | |
| "loss": 0.1595, | |
| "mean_token_accuracy": 0.9371987581253052, | |
| "num_tokens": 63021.0, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.03793497756123543, | |
| "learning_rate": 0.00018882000000000003, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9341553807258606, | |
| "num_tokens": 65369.0, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.116, | |
| "grad_norm": 0.04743633046746254, | |
| "learning_rate": 0.00018842000000000002, | |
| "loss": 0.1586, | |
| "mean_token_accuracy": 0.936157900094986, | |
| "num_tokens": 67700.0, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.05463261529803276, | |
| "learning_rate": 0.00018802, | |
| "loss": 0.1578, | |
| "mean_token_accuracy": 0.937694975733757, | |
| "num_tokens": 70038.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.124, | |
| "grad_norm": 0.11279874294996262, | |
| "learning_rate": 0.00018762000000000002, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9382745862007141, | |
| "num_tokens": 72367.0, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.050823234021663666, | |
| "learning_rate": 0.00018722, | |
| "loss": 0.1602, | |
| "mean_token_accuracy": 0.9388011395931244, | |
| "num_tokens": 74677.0, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 0.04983159899711609, | |
| "learning_rate": 0.00018682000000000003, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9374112606048584, | |
| "num_tokens": 77014.0, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.05084273964166641, | |
| "learning_rate": 0.00018642000000000002, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9387890577316285, | |
| "num_tokens": 79339.0, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.04936506226658821, | |
| "learning_rate": 0.00018602, | |
| "loss": 0.157, | |
| "mean_token_accuracy": 0.9359721839427948, | |
| "num_tokens": 81688.0, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.04119481146335602, | |
| "learning_rate": 0.00018562000000000003, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9359663873910904, | |
| "num_tokens": 84025.0, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.148, | |
| "grad_norm": 0.03988514095544815, | |
| "learning_rate": 0.00018522000000000002, | |
| "loss": 0.1595, | |
| "mean_token_accuracy": 0.9372004926204681, | |
| "num_tokens": 86341.0, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.04381653666496277, | |
| "learning_rate": 0.00018482, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9373360633850097, | |
| "num_tokens": 88665.0, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 0.03504428267478943, | |
| "learning_rate": 0.00018442000000000003, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9403579801321029, | |
| "num_tokens": 90997.0, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.03745226562023163, | |
| "learning_rate": 0.00018402000000000002, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9388439536094666, | |
| "num_tokens": 93322.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.164, | |
| "grad_norm": 0.033838506788015366, | |
| "learning_rate": 0.00018362, | |
| "loss": 0.1562, | |
| "mean_token_accuracy": 0.9360319077968597, | |
| "num_tokens": 95688.0, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.0314440056681633, | |
| "learning_rate": 0.00018322000000000002, | |
| "loss": 0.1596, | |
| "mean_token_accuracy": 0.9349893003702163, | |
| "num_tokens": 98005.0, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.172, | |
| "grad_norm": 0.06577116250991821, | |
| "learning_rate": 0.00018282000000000001, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.9364802747964859, | |
| "num_tokens": 100327.0, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.029735982418060303, | |
| "learning_rate": 0.00018242, | |
| "loss": 0.1576, | |
| "mean_token_accuracy": 0.9365677893161773, | |
| "num_tokens": 102671.0, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.03155644237995148, | |
| "learning_rate": 0.00018202000000000002, | |
| "loss": 0.1587, | |
| "mean_token_accuracy": 0.9387517213821411, | |
| "num_tokens": 104996.0, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.027411427348852158, | |
| "learning_rate": 0.00018162, | |
| "loss": 0.1581, | |
| "mean_token_accuracy": 0.9342827945947647, | |
| "num_tokens": 107337.0, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.188, | |
| "grad_norm": 0.024014495313167572, | |
| "learning_rate": 0.00018122, | |
| "loss": 0.1583, | |
| "mean_token_accuracy": 0.9355534523725509, | |
| "num_tokens": 109674.0, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.02990046516060829, | |
| "learning_rate": 0.00018082000000000002, | |
| "loss": 0.1577, | |
| "mean_token_accuracy": 0.9373938798904419, | |
| "num_tokens": 112015.0, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.196, | |
| "grad_norm": 0.020676879212260246, | |
| "learning_rate": 0.00018042, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9363098949193954, | |
| "num_tokens": 114349.0, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.024556942284107208, | |
| "learning_rate": 0.00018002, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.9355833351612091, | |
| "num_tokens": 116672.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 0.22343556582927704, | |
| "learning_rate": 0.00017962000000000002, | |
| "loss": 0.157, | |
| "mean_token_accuracy": 0.9363467574119568, | |
| "num_tokens": 119023.0, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.515048623085022, | |
| "learning_rate": 0.00017922, | |
| "loss": 0.1585, | |
| "mean_token_accuracy": 0.9374267637729645, | |
| "num_tokens": 121366.0, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.212, | |
| "grad_norm": 0.2672664225101471, | |
| "learning_rate": 0.00017882, | |
| "loss": 0.1704, | |
| "mean_token_accuracy": 0.938492265343666, | |
| "num_tokens": 123719.0, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.11909265071153641, | |
| "learning_rate": 0.00017842000000000002, | |
| "loss": 0.1752, | |
| "mean_token_accuracy": 0.9297878712415695, | |
| "num_tokens": 126053.0, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.11977271735668182, | |
| "learning_rate": 0.00017802, | |
| "loss": 0.1652, | |
| "mean_token_accuracy": 0.9360411554574967, | |
| "num_tokens": 128379.0, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.17722292244434357, | |
| "learning_rate": 0.00017762, | |
| "loss": 0.1697, | |
| "mean_token_accuracy": 0.9314894318580628, | |
| "num_tokens": 130700.0, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.228, | |
| "grad_norm": 0.8375388979911804, | |
| "learning_rate": 0.00017722000000000001, | |
| "loss": 0.1895, | |
| "mean_token_accuracy": 0.9316652357578278, | |
| "num_tokens": 133026.0, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.10569056123495102, | |
| "learning_rate": 0.00017682, | |
| "loss": 0.1679, | |
| "mean_token_accuracy": 0.9333775132894516, | |
| "num_tokens": 135371.0, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.236, | |
| "grad_norm": 0.07626856118440628, | |
| "learning_rate": 0.00017642, | |
| "loss": 0.1613, | |
| "mean_token_accuracy": 0.9380175620317459, | |
| "num_tokens": 137695.0, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.06852507591247559, | |
| "learning_rate": 0.00017602, | |
| "loss": 0.1693, | |
| "mean_token_accuracy": 0.9329774439334869, | |
| "num_tokens": 140007.0, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.244, | |
| "grad_norm": 0.11691898107528687, | |
| "learning_rate": 0.00017562, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.9360336065292358, | |
| "num_tokens": 142345.0, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.10780195891857147, | |
| "learning_rate": 0.00017522000000000002, | |
| "loss": 0.1586, | |
| "mean_token_accuracy": 0.9376411676406861, | |
| "num_tokens": 144693.0, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.252, | |
| "grad_norm": 0.07631397247314453, | |
| "learning_rate": 0.00017482, | |
| "loss": 0.1633, | |
| "mean_token_accuracy": 0.9390978574752807, | |
| "num_tokens": 147031.0, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.05569858103990555, | |
| "learning_rate": 0.00017442, | |
| "loss": 0.1615, | |
| "mean_token_accuracy": 0.9400094121694564, | |
| "num_tokens": 149371.0, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.04815123230218887, | |
| "learning_rate": 0.00017402000000000002, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9370361328125, | |
| "num_tokens": 151716.0, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 0.11904877424240112, | |
| "learning_rate": 0.00017362, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.941026845574379, | |
| "num_tokens": 154046.0, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.268, | |
| "grad_norm": 0.12382964044809341, | |
| "learning_rate": 0.00017322, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9340965986251831, | |
| "num_tokens": 156382.0, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.05523005872964859, | |
| "learning_rate": 0.00017282000000000002, | |
| "loss": 0.1593, | |
| "mean_token_accuracy": 0.9396583586931229, | |
| "num_tokens": 158705.0, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.276, | |
| "grad_norm": 0.05591598525643349, | |
| "learning_rate": 0.00017242, | |
| "loss": 0.1592, | |
| "mean_token_accuracy": 0.9364501267671586, | |
| "num_tokens": 161025.0, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.06780663877725601, | |
| "learning_rate": 0.00017202, | |
| "loss": 0.1618, | |
| "mean_token_accuracy": 0.936437115073204, | |
| "num_tokens": 163345.0, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.284, | |
| "grad_norm": 0.03291817009449005, | |
| "learning_rate": 0.00017162000000000001, | |
| "loss": 0.1569, | |
| "mean_token_accuracy": 0.9380638599395752, | |
| "num_tokens": 165711.0, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.04884820431470871, | |
| "learning_rate": 0.00017122, | |
| "loss": 0.1608, | |
| "mean_token_accuracy": 0.9370934247970581, | |
| "num_tokens": 168007.0, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.292, | |
| "grad_norm": 0.04577581211924553, | |
| "learning_rate": 0.00017082, | |
| "loss": 0.1595, | |
| "mean_token_accuracy": 0.9374603897333145, | |
| "num_tokens": 170332.0, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.296, | |
| "grad_norm": 0.03866467997431755, | |
| "learning_rate": 0.00017042, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9355076909065246, | |
| "num_tokens": 172667.0, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.06204424798488617, | |
| "learning_rate": 0.00017002, | |
| "loss": 0.1599, | |
| "mean_token_accuracy": 0.9372841835021972, | |
| "num_tokens": 174970.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.03288702666759491, | |
| "learning_rate": 0.00016962, | |
| "loss": 0.1578, | |
| "mean_token_accuracy": 0.93585424721241, | |
| "num_tokens": 177317.0, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.308, | |
| "grad_norm": 0.03605024516582489, | |
| "learning_rate": 0.00016922, | |
| "loss": 0.1597, | |
| "mean_token_accuracy": 0.9366346269845962, | |
| "num_tokens": 179629.0, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.03328383341431618, | |
| "learning_rate": 0.00016882, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9372009009122848, | |
| "num_tokens": 181960.0, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.316, | |
| "grad_norm": 0.03522924706339836, | |
| "learning_rate": 0.00016842, | |
| "loss": 0.1572, | |
| "mean_token_accuracy": 0.9355730235576629, | |
| "num_tokens": 184314.0, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.0317777544260025, | |
| "learning_rate": 0.00016802, | |
| "loss": 0.1572, | |
| "mean_token_accuracy": 0.9378984242677688, | |
| "num_tokens": 186658.0, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.324, | |
| "grad_norm": 0.07111163437366486, | |
| "learning_rate": 0.00016762, | |
| "loss": 0.1613, | |
| "mean_token_accuracy": 0.9344337552785873, | |
| "num_tokens": 188974.0, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.328, | |
| "grad_norm": 0.04765714704990387, | |
| "learning_rate": 0.00016722, | |
| "loss": 0.1608, | |
| "mean_token_accuracy": 0.9345141768455505, | |
| "num_tokens": 191276.0, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.332, | |
| "grad_norm": 0.041960619390010834, | |
| "learning_rate": 0.00016682, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9375987917184829, | |
| "num_tokens": 193597.0, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.041757769882678986, | |
| "learning_rate": 0.00016642, | |
| "loss": 0.157, | |
| "mean_token_accuracy": 0.9367031455039978, | |
| "num_tokens": 195949.0, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.05323236435651779, | |
| "learning_rate": 0.00016601999999999999, | |
| "loss": 0.1654, | |
| "mean_token_accuracy": 0.9359392642974853, | |
| "num_tokens": 198267.0, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.344, | |
| "grad_norm": 0.08934314548969269, | |
| "learning_rate": 0.00016562, | |
| "loss": 0.1619, | |
| "mean_token_accuracy": 0.9331722050905228, | |
| "num_tokens": 200613.0, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.348, | |
| "grad_norm": 0.033347178250551224, | |
| "learning_rate": 0.00016522, | |
| "loss": 0.1611, | |
| "mean_token_accuracy": 0.9317500472068787, | |
| "num_tokens": 202927.0, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.03238425776362419, | |
| "learning_rate": 0.00016482, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9358267247676849, | |
| "num_tokens": 205273.0, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.356, | |
| "grad_norm": 0.03249628096818924, | |
| "learning_rate": 0.00016442000000000003, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9367831707000732, | |
| "num_tokens": 207595.0, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.034572117030620575, | |
| "learning_rate": 0.00016402000000000002, | |
| "loss": 0.1612, | |
| "mean_token_accuracy": 0.9356453567743301, | |
| "num_tokens": 209892.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.364, | |
| "grad_norm": 0.04566624388098717, | |
| "learning_rate": 0.00016362, | |
| "loss": 0.1605, | |
| "mean_token_accuracy": 0.9359289228916168, | |
| "num_tokens": 212194.0, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.029180865734815598, | |
| "learning_rate": 0.00016322000000000003, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9378338158130646, | |
| "num_tokens": 214535.0, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.372, | |
| "grad_norm": 0.04812979698181152, | |
| "learning_rate": 0.00016282000000000002, | |
| "loss": 0.1576, | |
| "mean_token_accuracy": 0.9391636937856674, | |
| "num_tokens": 216872.0, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.376, | |
| "grad_norm": 0.06872449070215225, | |
| "learning_rate": 0.00016242, | |
| "loss": 0.1606, | |
| "mean_token_accuracy": 0.9389324098825454, | |
| "num_tokens": 219184.0, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.05308040603995323, | |
| "learning_rate": 0.00016202000000000002, | |
| "loss": 0.1592, | |
| "mean_token_accuracy": 0.9385550439357757, | |
| "num_tokens": 221499.0, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.13082465529441833, | |
| "learning_rate": 0.00016162000000000001, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9380007416009903, | |
| "num_tokens": 223847.0, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.388, | |
| "grad_norm": 0.03414028137922287, | |
| "learning_rate": 0.00016122, | |
| "loss": 0.1602, | |
| "mean_token_accuracy": 0.9369382321834564, | |
| "num_tokens": 226156.0, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.392, | |
| "grad_norm": 0.04112333804368973, | |
| "learning_rate": 0.00016082000000000002, | |
| "loss": 0.1597, | |
| "mean_token_accuracy": 0.935492268204689, | |
| "num_tokens": 228475.0, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.396, | |
| "grad_norm": 0.02955610118806362, | |
| "learning_rate": 0.00016042, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.93908212184906, | |
| "num_tokens": 230792.0, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.024307863786816597, | |
| "learning_rate": 0.00016002, | |
| "loss": 0.1595, | |
| "mean_token_accuracy": 0.9393438696861267, | |
| "num_tokens": 233105.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.404, | |
| "grad_norm": 0.031049860641360283, | |
| "learning_rate": 0.00015962000000000002, | |
| "loss": 0.157, | |
| "mean_token_accuracy": 0.9350471049547195, | |
| "num_tokens": 235457.0, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 0.031201306730508804, | |
| "learning_rate": 0.00015922, | |
| "loss": 0.1597, | |
| "mean_token_accuracy": 0.9370091885328293, | |
| "num_tokens": 237770.0, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.412, | |
| "grad_norm": 0.03218454122543335, | |
| "learning_rate": 0.00015882, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.9386389076709747, | |
| "num_tokens": 240090.0, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.03348623961210251, | |
| "learning_rate": 0.00015842000000000002, | |
| "loss": 0.1597, | |
| "mean_token_accuracy": 0.9362810254096985, | |
| "num_tokens": 242393.0, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.04007818177342415, | |
| "learning_rate": 0.00015802, | |
| "loss": 0.1581, | |
| "mean_token_accuracy": 0.9370237767696381, | |
| "num_tokens": 244733.0, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.424, | |
| "grad_norm": 0.03378809243440628, | |
| "learning_rate": 0.00015762, | |
| "loss": 0.1568, | |
| "mean_token_accuracy": 0.9377921044826507, | |
| "num_tokens": 247088.0, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.428, | |
| "grad_norm": 0.02798735350370407, | |
| "learning_rate": 0.00015722000000000002, | |
| "loss": 0.1586, | |
| "mean_token_accuracy": 0.9383140057325363, | |
| "num_tokens": 249415.0, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.02400992065668106, | |
| "learning_rate": 0.00015682, | |
| "loss": 0.1581, | |
| "mean_token_accuracy": 0.9389418184757232, | |
| "num_tokens": 251751.0, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.436, | |
| "grad_norm": 0.028334975242614746, | |
| "learning_rate": 0.00015642000000000002, | |
| "loss": 0.1578, | |
| "mean_token_accuracy": 0.936533722281456, | |
| "num_tokens": 254093.0, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.024794427677989006, | |
| "learning_rate": 0.00015602000000000001, | |
| "loss": 0.1587, | |
| "mean_token_accuracy": 0.9344067484140396, | |
| "num_tokens": 256422.0, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.444, | |
| "grad_norm": 0.024761928245425224, | |
| "learning_rate": 0.00015562, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.936401879787445, | |
| "num_tokens": 258747.0, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.023300737142562866, | |
| "learning_rate": 0.00015522000000000002, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9409920126199722, | |
| "num_tokens": 261059.0, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.452, | |
| "grad_norm": 0.023498738184571266, | |
| "learning_rate": 0.00015482, | |
| "loss": 0.1595, | |
| "mean_token_accuracy": 0.9378518283367157, | |
| "num_tokens": 263369.0, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 0.020731788128614426, | |
| "learning_rate": 0.00015442, | |
| "loss": 0.1586, | |
| "mean_token_accuracy": 0.9395518034696579, | |
| "num_tokens": 265691.0, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 0.03587990626692772, | |
| "learning_rate": 0.00015402000000000002, | |
| "loss": 0.1587, | |
| "mean_token_accuracy": 0.9378482937812805, | |
| "num_tokens": 268016.0, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.03809090331196785, | |
| "learning_rate": 0.00015362, | |
| "loss": 0.1588, | |
| "mean_token_accuracy": 0.9367815405130386, | |
| "num_tokens": 270334.0, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.468, | |
| "grad_norm": 0.04313996061682701, | |
| "learning_rate": 0.00015322, | |
| "loss": 0.1593, | |
| "mean_token_accuracy": 0.9363488733768464, | |
| "num_tokens": 272651.0, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.472, | |
| "grad_norm": 0.033811088651418686, | |
| "learning_rate": 0.00015282000000000002, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9361798793077469, | |
| "num_tokens": 274964.0, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.476, | |
| "grad_norm": 0.03164658322930336, | |
| "learning_rate": 0.00015242, | |
| "loss": 0.1598, | |
| "mean_token_accuracy": 0.9361578047275543, | |
| "num_tokens": 277276.0, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.030314739793539047, | |
| "learning_rate": 0.00015202, | |
| "loss": 0.1601, | |
| "mean_token_accuracy": 0.9364828914403915, | |
| "num_tokens": 279583.0, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.484, | |
| "grad_norm": 0.03359575197100639, | |
| "learning_rate": 0.00015162000000000002, | |
| "loss": 0.1577, | |
| "mean_token_accuracy": 0.9370669215917588, | |
| "num_tokens": 281927.0, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.488, | |
| "grad_norm": 0.025949697941541672, | |
| "learning_rate": 0.00015122, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9364959686994553, | |
| "num_tokens": 284267.0, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.492, | |
| "grad_norm": 0.031149016693234444, | |
| "learning_rate": 0.00015082, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.9384445637464524, | |
| "num_tokens": 286585.0, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.031569018959999084, | |
| "learning_rate": 0.00015042, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.9359267175197601, | |
| "num_tokens": 288907.0, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.02912713773548603, | |
| "learning_rate": 0.00015002, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9354292452335358, | |
| "num_tokens": 291240.0, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 0.029648004099726677, | |
| "learning_rate": 0.00014962, | |
| "loss": 0.1577, | |
| "mean_token_accuracy": 0.9371567130088806, | |
| "num_tokens": 293587.0, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.508, | |
| "grad_norm": 0.01994331367313862, | |
| "learning_rate": 0.00014922, | |
| "loss": 0.1565, | |
| "mean_token_accuracy": 0.9374659866094589, | |
| "num_tokens": 295947.0, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.022220291197299957, | |
| "learning_rate": 0.00014882, | |
| "loss": 0.1576, | |
| "mean_token_accuracy": 0.9352899432182312, | |
| "num_tokens": 298291.0, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.516, | |
| "grad_norm": 0.019389133900403976, | |
| "learning_rate": 0.00014842, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9369660496711731, | |
| "num_tokens": 300630.0, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.025073856115341187, | |
| "learning_rate": 0.00014802, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9384920775890351, | |
| "num_tokens": 302943.0, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.524, | |
| "grad_norm": 0.02601858787238598, | |
| "learning_rate": 0.00014762, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.9373117983341217, | |
| "num_tokens": 305265.0, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.0248605664819479, | |
| "learning_rate": 0.00014722, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9345557481050492, | |
| "num_tokens": 307583.0, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.532, | |
| "grad_norm": 0.022037120535969734, | |
| "learning_rate": 0.00014682, | |
| "loss": 0.1596, | |
| "mean_token_accuracy": 0.9348031014204026, | |
| "num_tokens": 309896.0, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.536, | |
| "grad_norm": 0.03458873927593231, | |
| "learning_rate": 0.00014642, | |
| "loss": 0.1572, | |
| "mean_token_accuracy": 0.9376107037067414, | |
| "num_tokens": 312244.0, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.016396528109908104, | |
| "learning_rate": 0.00014602, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9370677560567856, | |
| "num_tokens": 314578.0, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.01821085438132286, | |
| "learning_rate": 0.00014562, | |
| "loss": 0.1596, | |
| "mean_token_accuracy": 0.9343250393867493, | |
| "num_tokens": 316892.0, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.548, | |
| "grad_norm": 0.025619324296712875, | |
| "learning_rate": 0.00014522, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9362540364265441, | |
| "num_tokens": 319213.0, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 0.01870078593492508, | |
| "learning_rate": 0.00014482, | |
| "loss": 0.1598, | |
| "mean_token_accuracy": 0.9338186293840408, | |
| "num_tokens": 321525.0, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.556, | |
| "grad_norm": 0.018730677664279938, | |
| "learning_rate": 0.00014442, | |
| "loss": 0.1566, | |
| "mean_token_accuracy": 0.9394799619913101, | |
| "num_tokens": 323880.0, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.019898803904652596, | |
| "learning_rate": 0.00014402, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9384922862052918, | |
| "num_tokens": 326214.0, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.564, | |
| "grad_norm": 0.021964257583022118, | |
| "learning_rate": 0.00014362, | |
| "loss": 0.1588, | |
| "mean_token_accuracy": 0.9378354996442795, | |
| "num_tokens": 328536.0, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.568, | |
| "grad_norm": 0.02397042326629162, | |
| "learning_rate": 0.00014322, | |
| "loss": 0.157, | |
| "mean_token_accuracy": 0.9381829768419265, | |
| "num_tokens": 330888.0, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.572, | |
| "grad_norm": 0.017819812521338463, | |
| "learning_rate": 0.00014282, | |
| "loss": 0.1569, | |
| "mean_token_accuracy": 0.940555801987648, | |
| "num_tokens": 333234.0, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.019056344404816628, | |
| "learning_rate": 0.00014242, | |
| "loss": 0.1621, | |
| "mean_token_accuracy": 0.9321106940507888, | |
| "num_tokens": 335516.0, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 0.020357482135295868, | |
| "learning_rate": 0.00014202, | |
| "loss": 0.1586, | |
| "mean_token_accuracy": 0.9384677648544312, | |
| "num_tokens": 337843.0, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.584, | |
| "grad_norm": 0.02105647511780262, | |
| "learning_rate": 0.00014162, | |
| "loss": 0.1592, | |
| "mean_token_accuracy": 0.9367755681276322, | |
| "num_tokens": 340159.0, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.588, | |
| "grad_norm": 0.02058851346373558, | |
| "learning_rate": 0.00014122, | |
| "loss": 0.1583, | |
| "mean_token_accuracy": 0.9383471548557282, | |
| "num_tokens": 342487.0, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.022488698363304138, | |
| "learning_rate": 0.00014082, | |
| "loss": 0.1575, | |
| "mean_token_accuracy": 0.9383687317371369, | |
| "num_tokens": 344831.0, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.596, | |
| "grad_norm": 0.01753912679851055, | |
| "learning_rate": 0.00014042, | |
| "loss": 0.1583, | |
| "mean_token_accuracy": 0.9367416232824326, | |
| "num_tokens": 347163.0, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.01856599561870098, | |
| "learning_rate": 0.00014002, | |
| "loss": 0.156, | |
| "mean_token_accuracy": 0.9410459071397781, | |
| "num_tokens": 349528.0, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.604, | |
| "grad_norm": 0.018140017986297607, | |
| "learning_rate": 0.00013962000000000002, | |
| "loss": 0.1588, | |
| "mean_token_accuracy": 0.9355444282293319, | |
| "num_tokens": 351852.0, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.01846504583954811, | |
| "learning_rate": 0.00013922, | |
| "loss": 0.1592, | |
| "mean_token_accuracy": 0.9346548557281494, | |
| "num_tokens": 354175.0, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.612, | |
| "grad_norm": 0.020237931981682777, | |
| "learning_rate": 0.00013882000000000003, | |
| "loss": 0.1578, | |
| "mean_token_accuracy": 0.9351754993200302, | |
| "num_tokens": 356517.0, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.616, | |
| "grad_norm": 0.017988894134759903, | |
| "learning_rate": 0.00013842000000000002, | |
| "loss": 0.1596, | |
| "mean_token_accuracy": 0.9330779641866684, | |
| "num_tokens": 358833.0, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.01684187725186348, | |
| "learning_rate": 0.00013802, | |
| "loss": 0.1571, | |
| "mean_token_accuracy": 0.9366971403360367, | |
| "num_tokens": 361182.0, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.019796263426542282, | |
| "learning_rate": 0.00013762000000000003, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.937585511803627, | |
| "num_tokens": 363494.0, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.628, | |
| "grad_norm": 0.027366606518626213, | |
| "learning_rate": 0.00013722000000000002, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9360651940107345, | |
| "num_tokens": 365832.0, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.632, | |
| "grad_norm": 0.01635519415140152, | |
| "learning_rate": 0.00013682, | |
| "loss": 0.1575, | |
| "mean_token_accuracy": 0.9379144310951233, | |
| "num_tokens": 368176.0, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.636, | |
| "grad_norm": 0.01604699157178402, | |
| "learning_rate": 0.00013642000000000003, | |
| "loss": 0.1597, | |
| "mean_token_accuracy": 0.9374749541282654, | |
| "num_tokens": 370489.0, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.01789054647088051, | |
| "learning_rate": 0.00013602000000000002, | |
| "loss": 0.1577, | |
| "mean_token_accuracy": 0.9346486628055573, | |
| "num_tokens": 372828.0, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.644, | |
| "grad_norm": 0.018880745396018028, | |
| "learning_rate": 0.00013562, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9359114378690719, | |
| "num_tokens": 375150.0, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.648, | |
| "grad_norm": 0.017004717141389847, | |
| "learning_rate": 0.00013522000000000002, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9370229512453079, | |
| "num_tokens": 377478.0, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.652, | |
| "grad_norm": 0.016740689054131508, | |
| "learning_rate": 0.00013482000000000001, | |
| "loss": 0.1581, | |
| "mean_token_accuracy": 0.9358916640281677, | |
| "num_tokens": 379814.0, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.015598557889461517, | |
| "learning_rate": 0.00013442, | |
| "loss": 0.157, | |
| "mean_token_accuracy": 0.9368194431066513, | |
| "num_tokens": 382165.0, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 0.017107274383306503, | |
| "learning_rate": 0.00013402000000000002, | |
| "loss": 0.158, | |
| "mean_token_accuracy": 0.9370757102966308, | |
| "num_tokens": 384499.0, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.664, | |
| "grad_norm": 0.019015047699213028, | |
| "learning_rate": 0.00013362, | |
| "loss": 0.1578, | |
| "mean_token_accuracy": 0.938113397359848, | |
| "num_tokens": 386835.0, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.668, | |
| "grad_norm": 0.02084503509104252, | |
| "learning_rate": 0.00013322, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9364703267812728, | |
| "num_tokens": 389148.0, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.02378230169415474, | |
| "learning_rate": 0.00013282000000000002, | |
| "loss": 0.1598, | |
| "mean_token_accuracy": 0.9386055022478104, | |
| "num_tokens": 391454.0, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.676, | |
| "grad_norm": 0.014733811840415001, | |
| "learning_rate": 0.00013242, | |
| "loss": 0.1585, | |
| "mean_token_accuracy": 0.938000163435936, | |
| "num_tokens": 393786.0, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.015965940430760384, | |
| "learning_rate": 0.00013202, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9366149872541427, | |
| "num_tokens": 396103.0, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.684, | |
| "grad_norm": 0.014542631804943085, | |
| "learning_rate": 0.00013162000000000002, | |
| "loss": 0.161, | |
| "mean_token_accuracy": 0.9346071958541871, | |
| "num_tokens": 398396.0, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.014583873562514782, | |
| "learning_rate": 0.00013122, | |
| "loss": 0.1587, | |
| "mean_token_accuracy": 0.9348091840744018, | |
| "num_tokens": 400725.0, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.692, | |
| "grad_norm": 0.017750371247529984, | |
| "learning_rate": 0.00013082, | |
| "loss": 0.1577, | |
| "mean_token_accuracy": 0.9374497979879379, | |
| "num_tokens": 403064.0, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.696, | |
| "grad_norm": 0.01569240354001522, | |
| "learning_rate": 0.00013042000000000002, | |
| "loss": 0.1566, | |
| "mean_token_accuracy": 0.938677328824997, | |
| "num_tokens": 405418.0, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.012828970327973366, | |
| "learning_rate": 0.00013002, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9367502212524415, | |
| "num_tokens": 407754.0, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.01372049655765295, | |
| "learning_rate": 0.00012962, | |
| "loss": 0.1559, | |
| "mean_token_accuracy": 0.9384414672851562, | |
| "num_tokens": 410118.0, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.708, | |
| "grad_norm": 0.015669073909521103, | |
| "learning_rate": 0.00012922, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.9381854623556137, | |
| "num_tokens": 412440.0, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.712, | |
| "grad_norm": 0.017576146870851517, | |
| "learning_rate": 0.00012882, | |
| "loss": 0.1585, | |
| "mean_token_accuracy": 0.9391775250434875, | |
| "num_tokens": 414766.0, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.716, | |
| "grad_norm": 0.02452530339360237, | |
| "learning_rate": 0.00012842, | |
| "loss": 0.1559, | |
| "mean_token_accuracy": 0.9388908207416534, | |
| "num_tokens": 417117.0, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.019447200000286102, | |
| "learning_rate": 0.00012802, | |
| "loss": 0.1602, | |
| "mean_token_accuracy": 0.936455848813057, | |
| "num_tokens": 419428.0, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.724, | |
| "grad_norm": 0.021487407386302948, | |
| "learning_rate": 0.00012762, | |
| "loss": 0.158, | |
| "mean_token_accuracy": 0.9349821031093597, | |
| "num_tokens": 421763.0, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.728, | |
| "grad_norm": 0.013334971852600574, | |
| "learning_rate": 0.00012722000000000002, | |
| "loss": 0.1566, | |
| "mean_token_accuracy": 0.9395059019327163, | |
| "num_tokens": 424115.0, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.732, | |
| "grad_norm": 0.023315824568271637, | |
| "learning_rate": 0.00012682, | |
| "loss": 0.1596, | |
| "mean_token_accuracy": 0.936040785908699, | |
| "num_tokens": 426426.0, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.021865224465727806, | |
| "learning_rate": 0.00012642, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9385276228189469, | |
| "num_tokens": 428761.0, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.014885502867400646, | |
| "learning_rate": 0.00012602000000000002, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9356612026691437, | |
| "num_tokens": 431102.0, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.744, | |
| "grad_norm": 0.020914755761623383, | |
| "learning_rate": 0.00012562, | |
| "loss": 0.1576, | |
| "mean_token_accuracy": 0.9347645163536071, | |
| "num_tokens": 433444.0, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.748, | |
| "grad_norm": 0.011823791079223156, | |
| "learning_rate": 0.00012522, | |
| "loss": 0.1587, | |
| "mean_token_accuracy": 0.93475821018219, | |
| "num_tokens": 435772.0, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.01868574135005474, | |
| "learning_rate": 0.00012482000000000001, | |
| "loss": 0.1616, | |
| "mean_token_accuracy": 0.9349653989076614, | |
| "num_tokens": 438056.0, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.756, | |
| "grad_norm": 0.013049358502030373, | |
| "learning_rate": 0.00012442, | |
| "loss": 0.1581, | |
| "mean_token_accuracy": 0.9381139695644378, | |
| "num_tokens": 440390.0, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.01722385175526142, | |
| "learning_rate": 0.00012402, | |
| "loss": 0.1568, | |
| "mean_token_accuracy": 0.9363266348838806, | |
| "num_tokens": 442741.0, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.764, | |
| "grad_norm": 0.014651446603238583, | |
| "learning_rate": 0.00012362, | |
| "loss": 0.1568, | |
| "mean_token_accuracy": 0.9367681205272674, | |
| "num_tokens": 445092.0, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.012667631730437279, | |
| "learning_rate": 0.00012322, | |
| "loss": 0.1583, | |
| "mean_token_accuracy": 0.9367698729038239, | |
| "num_tokens": 447426.0, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.772, | |
| "grad_norm": 0.017640771344304085, | |
| "learning_rate": 0.00012282, | |
| "loss": 0.1607, | |
| "mean_token_accuracy": 0.9369511902332306, | |
| "num_tokens": 449722.0, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.776, | |
| "grad_norm": 0.0181003175675869, | |
| "learning_rate": 0.00012242, | |
| "loss": 0.158, | |
| "mean_token_accuracy": 0.9366753160953522, | |
| "num_tokens": 452055.0, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.025726528838276863, | |
| "learning_rate": 0.00012202, | |
| "loss": 0.1575, | |
| "mean_token_accuracy": 0.937316569685936, | |
| "num_tokens": 454397.0, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.019859878346323967, | |
| "learning_rate": 0.00012162, | |
| "loss": 0.1601, | |
| "mean_token_accuracy": 0.9387007981538773, | |
| "num_tokens": 456702.0, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.788, | |
| "grad_norm": 0.013250220566987991, | |
| "learning_rate": 0.00012122, | |
| "loss": 0.1574, | |
| "mean_token_accuracy": 0.936699178814888, | |
| "num_tokens": 459047.0, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.792, | |
| "grad_norm": 0.013169913552701473, | |
| "learning_rate": 0.00012082, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.936829337477684, | |
| "num_tokens": 461362.0, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.796, | |
| "grad_norm": 0.012993971817195415, | |
| "learning_rate": 0.00012042, | |
| "loss": 0.1601, | |
| "mean_token_accuracy": 0.9353927552700043, | |
| "num_tokens": 463667.0, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.013329907320439816, | |
| "learning_rate": 0.00012001999999999999, | |
| "loss": 0.1585, | |
| "mean_token_accuracy": 0.9350113153457642, | |
| "num_tokens": 465998.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.804, | |
| "grad_norm": 0.01984967105090618, | |
| "learning_rate": 0.00011962, | |
| "loss": 0.1585, | |
| "mean_token_accuracy": 0.9361959755420685, | |
| "num_tokens": 468327.0, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.808, | |
| "grad_norm": 0.01188992615789175, | |
| "learning_rate": 0.00011922, | |
| "loss": 0.1574, | |
| "mean_token_accuracy": 0.9366391479969025, | |
| "num_tokens": 470674.0, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.812, | |
| "grad_norm": 0.011742761358618736, | |
| "learning_rate": 0.00011882, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.9355545520782471, | |
| "num_tokens": 472995.0, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.014725361950695515, | |
| "learning_rate": 0.00011842, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9372966170310975, | |
| "num_tokens": 475328.0, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.013188617303967476, | |
| "learning_rate": 0.00011802, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9364562898874282, | |
| "num_tokens": 477648.0, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.824, | |
| "grad_norm": 0.01250818558037281, | |
| "learning_rate": 0.00011762, | |
| "loss": 0.1599, | |
| "mean_token_accuracy": 0.9361166715621948, | |
| "num_tokens": 479955.0, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.828, | |
| "grad_norm": 0.012422624975442886, | |
| "learning_rate": 0.00011721999999999999, | |
| "loss": 0.1578, | |
| "mean_token_accuracy": 0.9352377116680145, | |
| "num_tokens": 482295.0, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.013321579433977604, | |
| "learning_rate": 0.00011682, | |
| "loss": 0.1585, | |
| "mean_token_accuracy": 0.9374675869941711, | |
| "num_tokens": 484619.0, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.836, | |
| "grad_norm": 0.010815759189426899, | |
| "learning_rate": 0.00011642, | |
| "loss": 0.158, | |
| "mean_token_accuracy": 0.937660351395607, | |
| "num_tokens": 486953.0, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.014364980161190033, | |
| "learning_rate": 0.00011601999999999999, | |
| "loss": 0.1612, | |
| "mean_token_accuracy": 0.9387158721685409, | |
| "num_tokens": 489241.0, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.844, | |
| "grad_norm": 0.01238577626645565, | |
| "learning_rate": 0.00011562, | |
| "loss": 0.1581, | |
| "mean_token_accuracy": 0.9387918084859848, | |
| "num_tokens": 491572.0, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.01074713934212923, | |
| "learning_rate": 0.00011522, | |
| "loss": 0.1579, | |
| "mean_token_accuracy": 0.9364918410778046, | |
| "num_tokens": 493911.0, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.852, | |
| "grad_norm": 0.018538950011134148, | |
| "learning_rate": 0.00011482000000000002, | |
| "loss": 0.1596, | |
| "mean_token_accuracy": 0.9368883103132248, | |
| "num_tokens": 496218.0, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.856, | |
| "grad_norm": 0.012234380468726158, | |
| "learning_rate": 0.00011442000000000002, | |
| "loss": 0.1563, | |
| "mean_token_accuracy": 0.9358471721410752, | |
| "num_tokens": 498579.0, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 0.012620363384485245, | |
| "learning_rate": 0.00011402000000000001, | |
| "loss": 0.1584, | |
| "mean_token_accuracy": 0.9370616048574447, | |
| "num_tokens": 500910.0, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.014111168682575226, | |
| "learning_rate": 0.00011362000000000001, | |
| "loss": 0.1599, | |
| "mean_token_accuracy": 0.9376936018466949, | |
| "num_tokens": 503215.0, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.868, | |
| "grad_norm": 0.01211662869900465, | |
| "learning_rate": 0.00011322000000000002, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9375888526439666, | |
| "num_tokens": 505546.0, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.872, | |
| "grad_norm": 0.012831459753215313, | |
| "learning_rate": 0.00011282000000000002, | |
| "loss": 0.1603, | |
| "mean_token_accuracy": 0.9398003369569778, | |
| "num_tokens": 507842.0, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.876, | |
| "grad_norm": 0.018729697912931442, | |
| "learning_rate": 0.00011242000000000001, | |
| "loss": 0.1568, | |
| "mean_token_accuracy": 0.9386621713638306, | |
| "num_tokens": 510196.0, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.020225845277309418, | |
| "learning_rate": 0.00011202000000000002, | |
| "loss": 0.1586, | |
| "mean_token_accuracy": 0.9374362021684647, | |
| "num_tokens": 512521.0, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.884, | |
| "grad_norm": 0.03628004714846611, | |
| "learning_rate": 0.00011162000000000002, | |
| "loss": 0.1598, | |
| "mean_token_accuracy": 0.9386918365955352, | |
| "num_tokens": 514827.0, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.888, | |
| "grad_norm": 0.02236984297633171, | |
| "learning_rate": 0.00011122000000000001, | |
| "loss": 0.1574, | |
| "mean_token_accuracy": 0.9361942201852799, | |
| "num_tokens": 517171.0, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.892, | |
| "grad_norm": 0.015914643183350563, | |
| "learning_rate": 0.00011082000000000001, | |
| "loss": 0.1569, | |
| "mean_token_accuracy": 0.9363924354314804, | |
| "num_tokens": 519523.0, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.011004773899912834, | |
| "learning_rate": 0.00011042000000000002, | |
| "loss": 0.1575, | |
| "mean_token_accuracy": 0.9354314595460892, | |
| "num_tokens": 521871.0, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.01320959534496069, | |
| "learning_rate": 0.00011002000000000001, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9367985218763352, | |
| "num_tokens": 524205.0, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.904, | |
| "grad_norm": 0.016331197693943977, | |
| "learning_rate": 0.00010962000000000001, | |
| "loss": 0.1577, | |
| "mean_token_accuracy": 0.9359941184520721, | |
| "num_tokens": 526544.0, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.908, | |
| "grad_norm": 0.011050857603549957, | |
| "learning_rate": 0.00010922000000000001, | |
| "loss": 0.1577, | |
| "mean_token_accuracy": 0.9339568525552749, | |
| "num_tokens": 528887.0, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 0.010212858207523823, | |
| "learning_rate": 0.00010882, | |
| "loss": 0.1573, | |
| "mean_token_accuracy": 0.9375766962766647, | |
| "num_tokens": 531234.0, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.916, | |
| "grad_norm": 0.02018802985548973, | |
| "learning_rate": 0.00010842000000000001, | |
| "loss": 0.1569, | |
| "mean_token_accuracy": 0.9405841529369354, | |
| "num_tokens": 533583.0, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.020224014297127724, | |
| "learning_rate": 0.00010802000000000001, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.9367424637079239, | |
| "num_tokens": 535905.0, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.924, | |
| "grad_norm": 0.01311077456921339, | |
| "learning_rate": 0.00010762, | |
| "loss": 0.1604, | |
| "mean_token_accuracy": 0.9359079092741013, | |
| "num_tokens": 538208.0, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.012784361839294434, | |
| "learning_rate": 0.00010722000000000001, | |
| "loss": 0.1588, | |
| "mean_token_accuracy": 0.933400297164917, | |
| "num_tokens": 540532.0, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.932, | |
| "grad_norm": 0.01144441869109869, | |
| "learning_rate": 0.00010682000000000001, | |
| "loss": 0.1576, | |
| "mean_token_accuracy": 0.9386591941118241, | |
| "num_tokens": 542872.0, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.936, | |
| "grad_norm": 0.012654446065425873, | |
| "learning_rate": 0.00010642000000000001, | |
| "loss": 0.1582, | |
| "mean_token_accuracy": 0.9370446026325225, | |
| "num_tokens": 545204.0, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.011317115277051926, | |
| "learning_rate": 0.00010602, | |
| "loss": 0.1561, | |
| "mean_token_accuracy": 0.940504989027977, | |
| "num_tokens": 547566.0, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 0.010913815349340439, | |
| "learning_rate": 0.00010562000000000001, | |
| "loss": 0.1594, | |
| "mean_token_accuracy": 0.9370900303125381, | |
| "num_tokens": 549883.0, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.948, | |
| "grad_norm": 0.013113110326230526, | |
| "learning_rate": 0.00010522000000000001, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9380158931016922, | |
| "num_tokens": 552201.0, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.952, | |
| "grad_norm": 0.011078396812081337, | |
| "learning_rate": 0.00010482, | |
| "loss": 0.1576, | |
| "mean_token_accuracy": 0.9394772380590439, | |
| "num_tokens": 554539.0, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.956, | |
| "grad_norm": 0.011660384945571423, | |
| "learning_rate": 0.00010442, | |
| "loss": 0.1601, | |
| "mean_token_accuracy": 0.9355088382959366, | |
| "num_tokens": 556846.0, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.011648285202682018, | |
| "learning_rate": 0.00010402000000000001, | |
| "loss": 0.1573, | |
| "mean_token_accuracy": 0.9378848969936371, | |
| "num_tokens": 559191.0, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.964, | |
| "grad_norm": 0.012828272767364979, | |
| "learning_rate": 0.00010362, | |
| "loss": 0.1566, | |
| "mean_token_accuracy": 0.936909893155098, | |
| "num_tokens": 561548.0, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.968, | |
| "grad_norm": 0.015896698459982872, | |
| "learning_rate": 0.00010322, | |
| "loss": 0.1596, | |
| "mean_token_accuracy": 0.9348096013069153, | |
| "num_tokens": 563862.0, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.972, | |
| "grad_norm": 0.016044626012444496, | |
| "learning_rate": 0.00010282000000000001, | |
| "loss": 0.1606, | |
| "mean_token_accuracy": 0.9357615917921066, | |
| "num_tokens": 566161.0, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 0.010811380110681057, | |
| "learning_rate": 0.00010242, | |
| "loss": 0.1576, | |
| "mean_token_accuracy": 0.9412378251552582, | |
| "num_tokens": 568500.0, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.01213027909398079, | |
| "learning_rate": 0.00010202, | |
| "loss": 0.1589, | |
| "mean_token_accuracy": 0.9359610795974731, | |
| "num_tokens": 570823.0, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.984, | |
| "grad_norm": 0.012461444362998009, | |
| "learning_rate": 0.00010162, | |
| "loss": 0.1578, | |
| "mean_token_accuracy": 0.9338973581790924, | |
| "num_tokens": 573165.0, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.988, | |
| "grad_norm": 0.009025659412145615, | |
| "learning_rate": 0.00010122000000000001, | |
| "loss": 0.1568, | |
| "mean_token_accuracy": 0.9369413673877716, | |
| "num_tokens": 575520.0, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.010953028686344624, | |
| "learning_rate": 0.00010082, | |
| "loss": 0.1591, | |
| "mean_token_accuracy": 0.9341312050819397, | |
| "num_tokens": 577840.0, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.996, | |
| "grad_norm": 0.00946497917175293, | |
| "learning_rate": 0.00010042, | |
| "loss": 0.1575, | |
| "mean_token_accuracy": 0.9408338129520416, | |
| "num_tokens": 580179.0, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.009777408093214035, | |
| "learning_rate": 0.00010002000000000001, | |
| "loss": 0.159, | |
| "mean_token_accuracy": 0.935635381937027, | |
| "num_tokens": 582500.0, | |
| "step": 5000 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 10000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7056874607118336.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |