| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 1560, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0032102728731942215, |
| "grad_norm": 2.4753810438494113, |
| "learning_rate": 1.4893617021276595e-07, |
| "loss": 0.7879232168197632, |
| "step": 1, |
| "token_acc": 0.7756388598457696 |
| }, |
| { |
| "epoch": 0.006420545746388443, |
| "grad_norm": 2.0688547647937128, |
| "learning_rate": 2.978723404255319e-07, |
| "loss": 0.8343099355697632, |
| "step": 2, |
| "token_acc": 0.7691886054104653 |
| }, |
| { |
| "epoch": 0.009630818619582664, |
| "grad_norm": 2.2647251246154494, |
| "learning_rate": 4.4680851063829783e-07, |
| "loss": 0.8496907949447632, |
| "step": 3, |
| "token_acc": 0.7638933733394063 |
| }, |
| { |
| "epoch": 0.012841091492776886, |
| "grad_norm": 2.309927904076833, |
| "learning_rate": 5.957446808510638e-07, |
| "loss": 0.84716796875, |
| "step": 4, |
| "token_acc": 0.7630275625571407 |
| }, |
| { |
| "epoch": 0.016051364365971106, |
| "grad_norm": 2.537086534556216, |
| "learning_rate": 7.446808510638298e-07, |
| "loss": 0.85986328125, |
| "step": 5, |
| "token_acc": 0.7569718906167684 |
| }, |
| { |
| "epoch": 0.019261637239165328, |
| "grad_norm": 2.4172209878112727, |
| "learning_rate": 8.936170212765957e-07, |
| "loss": 0.800048828125, |
| "step": 6, |
| "token_acc": 0.7704034280523686 |
| }, |
| { |
| "epoch": 0.02247191011235955, |
| "grad_norm": 2.2999894579971696, |
| "learning_rate": 1.0425531914893615e-06, |
| "loss": 0.8470052480697632, |
| "step": 7, |
| "token_acc": 0.7622465717309417 |
| }, |
| { |
| "epoch": 0.025682182985553772, |
| "grad_norm": 1.964783949379289, |
| "learning_rate": 1.1914893617021276e-06, |
| "loss": 0.83154296875, |
| "step": 8, |
| "token_acc": 0.76941155597467 |
| }, |
| { |
| "epoch": 0.028892455858747994, |
| "grad_norm": 1.9180672449757712, |
| "learning_rate": 1.3404255319148935e-06, |
| "loss": 0.7979329824447632, |
| "step": 9, |
| "token_acc": 0.7754702921919281 |
| }, |
| { |
| "epoch": 0.03210272873194221, |
| "grad_norm": 2.240827763658055, |
| "learning_rate": 1.4893617021276596e-06, |
| "loss": 0.8218180537223816, |
| "step": 10, |
| "token_acc": 0.7658292813448604 |
| }, |
| { |
| "epoch": 0.03531300160513644, |
| "grad_norm": 1.8783322418040913, |
| "learning_rate": 1.6382978723404255e-06, |
| "loss": 0.8649088740348816, |
| "step": 11, |
| "token_acc": 0.7554619804912056 |
| }, |
| { |
| "epoch": 0.038523274478330656, |
| "grad_norm": 1.3644736752145021, |
| "learning_rate": 1.7872340425531913e-06, |
| "loss": 0.7535807490348816, |
| "step": 12, |
| "token_acc": 0.7855808513669389 |
| }, |
| { |
| "epoch": 0.04173354735152488, |
| "grad_norm": 1.4208147986467878, |
| "learning_rate": 1.9361702127659576e-06, |
| "loss": 0.7366536855697632, |
| "step": 13, |
| "token_acc": 0.7883947769962283 |
| }, |
| { |
| "epoch": 0.0449438202247191, |
| "grad_norm": 1.421550488421433, |
| "learning_rate": 2.085106382978723e-06, |
| "loss": 0.742919921875, |
| "step": 14, |
| "token_acc": 0.7863493121733124 |
| }, |
| { |
| "epoch": 0.048154093097913325, |
| "grad_norm": 1.3022390396539427, |
| "learning_rate": 2.2340425531914894e-06, |
| "loss": 0.7919921875, |
| "step": 15, |
| "token_acc": 0.7746175480275829 |
| }, |
| { |
| "epoch": 0.051364365971107544, |
| "grad_norm": 1.1231071316042414, |
| "learning_rate": 2.3829787234042553e-06, |
| "loss": 0.7566325068473816, |
| "step": 16, |
| "token_acc": 0.7794038646429666 |
| }, |
| { |
| "epoch": 0.05457463884430177, |
| "grad_norm": 0.9333351622252422, |
| "learning_rate": 2.5319148936170216e-06, |
| "loss": 0.7769368886947632, |
| "step": 17, |
| "token_acc": 0.777008609403375 |
| }, |
| { |
| "epoch": 0.05778491171749599, |
| "grad_norm": 0.929802928146088, |
| "learning_rate": 2.680851063829787e-06, |
| "loss": 0.7835286855697632, |
| "step": 18, |
| "token_acc": 0.772856360370574 |
| }, |
| { |
| "epoch": 0.060995184590690206, |
| "grad_norm": 0.9066023006988848, |
| "learning_rate": 2.829787234042553e-06, |
| "loss": 0.7682291865348816, |
| "step": 19, |
| "token_acc": 0.7768975575262956 |
| }, |
| { |
| "epoch": 0.06420545746388442, |
| "grad_norm": 0.7225896156448947, |
| "learning_rate": 2.978723404255319e-06, |
| "loss": 0.7460123896598816, |
| "step": 20, |
| "token_acc": 0.7847207901601844 |
| }, |
| { |
| "epoch": 0.06741573033707865, |
| "grad_norm": 0.7109085666571867, |
| "learning_rate": 3.127659574468085e-06, |
| "loss": 0.7208659052848816, |
| "step": 21, |
| "token_acc": 0.7857597824218354 |
| }, |
| { |
| "epoch": 0.07062600321027288, |
| "grad_norm": 0.8150780472279927, |
| "learning_rate": 3.276595744680851e-06, |
| "loss": 0.7197265625, |
| "step": 22, |
| "token_acc": 0.7923169108386342 |
| }, |
| { |
| "epoch": 0.0738362760834671, |
| "grad_norm": 0.7753241464505493, |
| "learning_rate": 3.425531914893617e-06, |
| "loss": 0.6572672724723816, |
| "step": 23, |
| "token_acc": 0.8042959454201616 |
| }, |
| { |
| "epoch": 0.07704654895666131, |
| "grad_norm": 1.0500219315630146, |
| "learning_rate": 3.5744680851063827e-06, |
| "loss": 0.7864583730697632, |
| "step": 24, |
| "token_acc": 0.7682198177819095 |
| }, |
| { |
| "epoch": 0.08025682182985554, |
| "grad_norm": 1.0637751453192557, |
| "learning_rate": 3.723404255319149e-06, |
| "loss": 0.7196452021598816, |
| "step": 25, |
| "token_acc": 0.7878780656644498 |
| }, |
| { |
| "epoch": 0.08346709470304976, |
| "grad_norm": 1.122751448745593, |
| "learning_rate": 3.872340425531915e-06, |
| "loss": 0.763916015625, |
| "step": 26, |
| "token_acc": 0.7736624491344681 |
| }, |
| { |
| "epoch": 0.08667736757624397, |
| "grad_norm": 0.8175549093574106, |
| "learning_rate": 4.0212765957446816e-06, |
| "loss": 0.7271322011947632, |
| "step": 27, |
| "token_acc": 0.7838902363141947 |
| }, |
| { |
| "epoch": 0.0898876404494382, |
| "grad_norm": 0.8938896614780193, |
| "learning_rate": 4.170212765957446e-06, |
| "loss": 0.710205078125, |
| "step": 28, |
| "token_acc": 0.7874908688244899 |
| }, |
| { |
| "epoch": 0.09309791332263243, |
| "grad_norm": 0.7111591256473304, |
| "learning_rate": 4.3191489361702125e-06, |
| "loss": 0.680419921875, |
| "step": 29, |
| "token_acc": 0.7951899206740415 |
| }, |
| { |
| "epoch": 0.09630818619582665, |
| "grad_norm": 0.5553474125446014, |
| "learning_rate": 4.468085106382979e-06, |
| "loss": 0.715576171875, |
| "step": 30, |
| "token_acc": 0.78816810172129 |
| }, |
| { |
| "epoch": 0.09951845906902086, |
| "grad_norm": 0.6125224035794444, |
| "learning_rate": 4.617021276595744e-06, |
| "loss": 0.7332357168197632, |
| "step": 31, |
| "token_acc": 0.7818265572355082 |
| }, |
| { |
| "epoch": 0.10272873194221509, |
| "grad_norm": 0.5248667594940402, |
| "learning_rate": 4.7659574468085105e-06, |
| "loss": 0.6513671875, |
| "step": 32, |
| "token_acc": 0.8066042872298029 |
| }, |
| { |
| "epoch": 0.10593900481540931, |
| "grad_norm": 0.6190819259396853, |
| "learning_rate": 4.914893617021277e-06, |
| "loss": 0.7200521230697632, |
| "step": 33, |
| "token_acc": 0.7850674209358465 |
| }, |
| { |
| "epoch": 0.10914927768860354, |
| "grad_norm": 0.6451198104159461, |
| "learning_rate": 5.063829787234043e-06, |
| "loss": 0.6764323115348816, |
| "step": 34, |
| "token_acc": 0.7948570707957826 |
| }, |
| { |
| "epoch": 0.11235955056179775, |
| "grad_norm": 0.5786823323345861, |
| "learning_rate": 5.2127659574468086e-06, |
| "loss": 0.6573486328125, |
| "step": 35, |
| "token_acc": 0.7999205209200293 |
| }, |
| { |
| "epoch": 0.11556982343499198, |
| "grad_norm": 0.5871758694058516, |
| "learning_rate": 5.361702127659574e-06, |
| "loss": 0.7178548574447632, |
| "step": 36, |
| "token_acc": 0.7870625485393392 |
| }, |
| { |
| "epoch": 0.1187800963081862, |
| "grad_norm": 0.5369227527562553, |
| "learning_rate": 5.51063829787234e-06, |
| "loss": 0.6741536855697632, |
| "step": 37, |
| "token_acc": 0.795958329652928 |
| }, |
| { |
| "epoch": 0.12199036918138041, |
| "grad_norm": 0.42895258820168175, |
| "learning_rate": 5.659574468085106e-06, |
| "loss": 0.706787109375, |
| "step": 38, |
| "token_acc": 0.788144228221681 |
| }, |
| { |
| "epoch": 0.12520064205457465, |
| "grad_norm": 0.43361997762214843, |
| "learning_rate": 5.808510638297872e-06, |
| "loss": 0.7215983271598816, |
| "step": 39, |
| "token_acc": 0.7811117204862373 |
| }, |
| { |
| "epoch": 0.12841091492776885, |
| "grad_norm": 0.39452497235838196, |
| "learning_rate": 5.957446808510638e-06, |
| "loss": 0.6888021230697632, |
| "step": 40, |
| "token_acc": 0.7913482530959579 |
| }, |
| { |
| "epoch": 0.13162118780096307, |
| "grad_norm": 0.38163409173267143, |
| "learning_rate": 6.106382978723405e-06, |
| "loss": 0.7011312246322632, |
| "step": 41, |
| "token_acc": 0.7881589276009903 |
| }, |
| { |
| "epoch": 0.1348314606741573, |
| "grad_norm": 0.4436482274993076, |
| "learning_rate": 6.25531914893617e-06, |
| "loss": 0.6253255605697632, |
| "step": 42, |
| "token_acc": 0.8081617238255353 |
| }, |
| { |
| "epoch": 0.13804173354735153, |
| "grad_norm": 0.4375956803307934, |
| "learning_rate": 6.404255319148936e-06, |
| "loss": 0.6582845449447632, |
| "step": 43, |
| "token_acc": 0.7959535510226482 |
| }, |
| { |
| "epoch": 0.14125200642054575, |
| "grad_norm": 0.39330235188333057, |
| "learning_rate": 6.553191489361702e-06, |
| "loss": 0.6446126699447632, |
| "step": 44, |
| "token_acc": 0.8014317040118041 |
| }, |
| { |
| "epoch": 0.14446227929373998, |
| "grad_norm": 0.37194650846262567, |
| "learning_rate": 6.702127659574468e-06, |
| "loss": 0.6810709834098816, |
| "step": 45, |
| "token_acc": 0.792782252006574 |
| }, |
| { |
| "epoch": 0.1476725521669342, |
| "grad_norm": 0.31530649176304015, |
| "learning_rate": 6.851063829787234e-06, |
| "loss": 0.6333822011947632, |
| "step": 46, |
| "token_acc": 0.8059375415995946 |
| }, |
| { |
| "epoch": 0.1508828250401284, |
| "grad_norm": 0.31437722737003226, |
| "learning_rate": 7e-06, |
| "loss": 0.630126953125, |
| "step": 47, |
| "token_acc": 0.8044129458240332 |
| }, |
| { |
| "epoch": 0.15409309791332262, |
| "grad_norm": 0.274516902775598, |
| "learning_rate": 6.999992454990655e-06, |
| "loss": 0.6744791865348816, |
| "step": 48, |
| "token_acc": 0.7951283361151459 |
| }, |
| { |
| "epoch": 0.15730337078651685, |
| "grad_norm": 0.3108158442861642, |
| "learning_rate": 6.999969819995152e-06, |
| "loss": 0.6378580927848816, |
| "step": 49, |
| "token_acc": 0.8030468177870123 |
| }, |
| { |
| "epoch": 0.16051364365971107, |
| "grad_norm": 0.3116005863059165, |
| "learning_rate": 6.999932095111077e-06, |
| "loss": 0.6702067255973816, |
| "step": 50, |
| "token_acc": 0.7972457736243002 |
| }, |
| { |
| "epoch": 0.1637239165329053, |
| "grad_norm": 0.32992462397943517, |
| "learning_rate": 6.999879280501081e-06, |
| "loss": 0.572021484375, |
| "step": 51, |
| "token_acc": 0.823265086079219 |
| }, |
| { |
| "epoch": 0.16693418940609953, |
| "grad_norm": 0.3633482687659926, |
| "learning_rate": 6.999811376392871e-06, |
| "loss": 0.6148681640625, |
| "step": 52, |
| "token_acc": 0.809984832111816 |
| }, |
| { |
| "epoch": 0.17014446227929375, |
| "grad_norm": 0.30599321045708705, |
| "learning_rate": 6.999728383079208e-06, |
| "loss": 0.62841796875, |
| "step": 53, |
| "token_acc": 0.8064036729664908 |
| }, |
| { |
| "epoch": 0.17335473515248795, |
| "grad_norm": 0.25905438447963114, |
| "learning_rate": 6.999630300917915e-06, |
| "loss": 0.6336263418197632, |
| "step": 54, |
| "token_acc": 0.8045163559291338 |
| }, |
| { |
| "epoch": 0.17656500802568217, |
| "grad_norm": 0.2447879175964714, |
| "learning_rate": 6.999517130331867e-06, |
| "loss": 0.5997314453125, |
| "step": 55, |
| "token_acc": 0.8159714374370309 |
| }, |
| { |
| "epoch": 0.1797752808988764, |
| "grad_norm": 0.2974010229219283, |
| "learning_rate": 6.999388871808989e-06, |
| "loss": 0.6444498896598816, |
| "step": 56, |
| "token_acc": 0.8016444560621508 |
| }, |
| { |
| "epoch": 0.18298555377207062, |
| "grad_norm": 0.27346434417319015, |
| "learning_rate": 6.999245525902262e-06, |
| "loss": 0.6520182490348816, |
| "step": 57, |
| "token_acc": 0.8006354831734891 |
| }, |
| { |
| "epoch": 0.18619582664526485, |
| "grad_norm": 0.24593285011001234, |
| "learning_rate": 6.9990870932297095e-06, |
| "loss": 0.6388346552848816, |
| "step": 58, |
| "token_acc": 0.804055135767979 |
| }, |
| { |
| "epoch": 0.18940609951845908, |
| "grad_norm": 0.2748750065623674, |
| "learning_rate": 6.998913574474406e-06, |
| "loss": 0.65362548828125, |
| "step": 59, |
| "token_acc": 0.8014784291270444 |
| }, |
| { |
| "epoch": 0.1926163723916533, |
| "grad_norm": 0.2968798961595123, |
| "learning_rate": 6.998724970384465e-06, |
| "loss": 0.6461588740348816, |
| "step": 60, |
| "token_acc": 0.7991732757932588 |
| }, |
| { |
| "epoch": 0.1958266452648475, |
| "grad_norm": 0.23922264245631886, |
| "learning_rate": 6.998521281773041e-06, |
| "loss": 0.6253255605697632, |
| "step": 61, |
| "token_acc": 0.8057149527805801 |
| }, |
| { |
| "epoch": 0.19903691813804172, |
| "grad_norm": 0.22117484958176203, |
| "learning_rate": 6.998302509518322e-06, |
| "loss": 0.5834554433822632, |
| "step": 62, |
| "token_acc": 0.8194585867590003 |
| }, |
| { |
| "epoch": 0.20224719101123595, |
| "grad_norm": 0.2369306420451283, |
| "learning_rate": 6.998068654563534e-06, |
| "loss": 0.6092122793197632, |
| "step": 63, |
| "token_acc": 0.8114008548966387 |
| }, |
| { |
| "epoch": 0.20545746388443017, |
| "grad_norm": 0.2374452306327778, |
| "learning_rate": 6.997819717916924e-06, |
| "loss": 0.57958984375, |
| "step": 64, |
| "token_acc": 0.8179867806451919 |
| }, |
| { |
| "epoch": 0.2086677367576244, |
| "grad_norm": 0.24491102842825901, |
| "learning_rate": 6.997555700651767e-06, |
| "loss": 0.6301676630973816, |
| "step": 65, |
| "token_acc": 0.8041523895074651 |
| }, |
| { |
| "epoch": 0.21187800963081863, |
| "grad_norm": 0.2533635108541559, |
| "learning_rate": 6.997276603906356e-06, |
| "loss": 0.6229248046875, |
| "step": 66, |
| "token_acc": 0.8068026927120842 |
| }, |
| { |
| "epoch": 0.21508828250401285, |
| "grad_norm": 0.25272404960266603, |
| "learning_rate": 6.996982428883997e-06, |
| "loss": 0.6161295771598816, |
| "step": 67, |
| "token_acc": 0.8068630844776081 |
| }, |
| { |
| "epoch": 0.21829855537720708, |
| "grad_norm": 0.24472278025695055, |
| "learning_rate": 6.996673176853009e-06, |
| "loss": 0.64013671875, |
| "step": 68, |
| "token_acc": 0.8036322295114298 |
| }, |
| { |
| "epoch": 0.22150882825040127, |
| "grad_norm": 0.2854371340953023, |
| "learning_rate": 6.9963488491467085e-06, |
| "loss": 0.6022135615348816, |
| "step": 69, |
| "token_acc": 0.8134928438501238 |
| }, |
| { |
| "epoch": 0.2247191011235955, |
| "grad_norm": 0.25148079147298236, |
| "learning_rate": 6.996009447163415e-06, |
| "loss": 0.6416015625, |
| "step": 70, |
| "token_acc": 0.8002263049431778 |
| }, |
| { |
| "epoch": 0.22792937399678972, |
| "grad_norm": 0.3285179445881002, |
| "learning_rate": 6.995654972366437e-06, |
| "loss": 0.6038411855697632, |
| "step": 71, |
| "token_acc": 0.8102099753673148 |
| }, |
| { |
| "epoch": 0.23113964686998395, |
| "grad_norm": 0.27484980697196754, |
| "learning_rate": 6.995285426284069e-06, |
| "loss": 0.6334635615348816, |
| "step": 72, |
| "token_acc": 0.8013070425989607 |
| }, |
| { |
| "epoch": 0.23434991974317818, |
| "grad_norm": 0.22943171657225794, |
| "learning_rate": 6.994900810509586e-06, |
| "loss": 0.62158203125, |
| "step": 73, |
| "token_acc": 0.8064510438728842 |
| }, |
| { |
| "epoch": 0.2375601926163724, |
| "grad_norm": 0.22114969362856388, |
| "learning_rate": 6.994501126701231e-06, |
| "loss": 0.607666015625, |
| "step": 74, |
| "token_acc": 0.8105192034063484 |
| }, |
| { |
| "epoch": 0.24077046548956663, |
| "grad_norm": 0.21411939799484717, |
| "learning_rate": 6.994086376582216e-06, |
| "loss": 0.64404296875, |
| "step": 75, |
| "token_acc": 0.8001576820735192 |
| }, |
| { |
| "epoch": 0.24398073836276082, |
| "grad_norm": 0.30133931257007796, |
| "learning_rate": 6.993656561940708e-06, |
| "loss": 0.6025797724723816, |
| "step": 76, |
| "token_acc": 0.8135537136285721 |
| }, |
| { |
| "epoch": 0.24719101123595505, |
| "grad_norm": 0.23684167938690207, |
| "learning_rate": 6.993211684629825e-06, |
| "loss": 0.628662109375, |
| "step": 77, |
| "token_acc": 0.8000817371987206 |
| }, |
| { |
| "epoch": 0.2504012841091493, |
| "grad_norm": 0.22858724073281955, |
| "learning_rate": 6.992751746567627e-06, |
| "loss": 0.58447265625, |
| "step": 78, |
| "token_acc": 0.8176294504797399 |
| }, |
| { |
| "epoch": 0.2536115569823435, |
| "grad_norm": 0.2266475406878526, |
| "learning_rate": 6.9922767497371035e-06, |
| "loss": 0.6127523183822632, |
| "step": 79, |
| "token_acc": 0.8079507163086846 |
| }, |
| { |
| "epoch": 0.2568218298555377, |
| "grad_norm": 0.2103005674438925, |
| "learning_rate": 6.991786696186174e-06, |
| "loss": 0.5852457880973816, |
| "step": 80, |
| "token_acc": 0.8153684854626521 |
| }, |
| { |
| "epoch": 0.26003210272873195, |
| "grad_norm": 0.22723109993760274, |
| "learning_rate": 6.9912815880276726e-06, |
| "loss": 0.6097819209098816, |
| "step": 81, |
| "token_acc": 0.8103663148795159 |
| }, |
| { |
| "epoch": 0.26324237560192615, |
| "grad_norm": 0.29984159529039645, |
| "learning_rate": 6.990761427439339e-06, |
| "loss": 0.6161702871322632, |
| "step": 82, |
| "token_acc": 0.807772867606998 |
| }, |
| { |
| "epoch": 0.2664526484751204, |
| "grad_norm": 0.20165801770814, |
| "learning_rate": 6.990226216663812e-06, |
| "loss": 0.6199544668197632, |
| "step": 83, |
| "token_acc": 0.8052925761371553 |
| }, |
| { |
| "epoch": 0.2696629213483146, |
| "grad_norm": 0.20292966725426143, |
| "learning_rate": 6.989675958008616e-06, |
| "loss": 0.6083984375, |
| "step": 84, |
| "token_acc": 0.8112003737594581 |
| }, |
| { |
| "epoch": 0.27287319422150885, |
| "grad_norm": 0.23864143651314107, |
| "learning_rate": 6.9891106538461556e-06, |
| "loss": 0.6287435293197632, |
| "step": 85, |
| "token_acc": 0.8035761022475708 |
| }, |
| { |
| "epoch": 0.27608346709470305, |
| "grad_norm": 0.20165640241083152, |
| "learning_rate": 6.988530306613702e-06, |
| "loss": 0.562744140625, |
| "step": 86, |
| "token_acc": 0.8244872851494846 |
| }, |
| { |
| "epoch": 0.27929373996789725, |
| "grad_norm": 0.24403191645122582, |
| "learning_rate": 6.987934918813385e-06, |
| "loss": 0.6338704824447632, |
| "step": 87, |
| "token_acc": 0.7988280640081371 |
| }, |
| { |
| "epoch": 0.2825040128410915, |
| "grad_norm": 0.19545947759314103, |
| "learning_rate": 6.987324493012178e-06, |
| "loss": 0.5802001953125, |
| "step": 88, |
| "token_acc": 0.8199500271690323 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.2253804481388312, |
| "learning_rate": 6.986699031841892e-06, |
| "loss": 0.595947265625, |
| "step": 89, |
| "token_acc": 0.8126973130192251 |
| }, |
| { |
| "epoch": 0.28892455858747995, |
| "grad_norm": 0.21931866178043172, |
| "learning_rate": 6.986058537999162e-06, |
| "loss": 0.5707194209098816, |
| "step": 90, |
| "token_acc": 0.8200324964871083 |
| }, |
| { |
| "epoch": 0.29213483146067415, |
| "grad_norm": 0.20499347869349804, |
| "learning_rate": 6.9854030142454365e-06, |
| "loss": 0.6116536855697632, |
| "step": 91, |
| "token_acc": 0.8094465472802599 |
| }, |
| { |
| "epoch": 0.2953451043338684, |
| "grad_norm": 0.2250303266125255, |
| "learning_rate": 6.98473246340696e-06, |
| "loss": 0.599365234375, |
| "step": 92, |
| "token_acc": 0.811629323867537 |
| }, |
| { |
| "epoch": 0.2985553772070626, |
| "grad_norm": 0.2085257553005371, |
| "learning_rate": 6.98404688837477e-06, |
| "loss": 0.579345703125, |
| "step": 93, |
| "token_acc": 0.8167167856323254 |
| }, |
| { |
| "epoch": 0.3017656500802568, |
| "grad_norm": 0.2245820178674568, |
| "learning_rate": 6.983346292104677e-06, |
| "loss": 0.6009928584098816, |
| "step": 94, |
| "token_acc": 0.8108060415463085 |
| }, |
| { |
| "epoch": 0.30497592295345105, |
| "grad_norm": 0.23427956495661934, |
| "learning_rate": 6.982630677617255e-06, |
| "loss": 0.6172689199447632, |
| "step": 95, |
| "token_acc": 0.8067812881737619 |
| }, |
| { |
| "epoch": 0.30818619582664525, |
| "grad_norm": 0.2338919293838274, |
| "learning_rate": 6.98190004799783e-06, |
| "loss": 0.5944417715072632, |
| "step": 96, |
| "token_acc": 0.8127140785908364 |
| }, |
| { |
| "epoch": 0.3113964686998395, |
| "grad_norm": 0.24889797189768018, |
| "learning_rate": 6.981154406396462e-06, |
| "loss": 0.5865072011947632, |
| "step": 97, |
| "token_acc": 0.8161886338728965 |
| }, |
| { |
| "epoch": 0.3146067415730337, |
| "grad_norm": 0.20962406790387186, |
| "learning_rate": 6.980393756027937e-06, |
| "loss": 0.5, |
| "step": 98, |
| "token_acc": 0.8442178208592472 |
| }, |
| { |
| "epoch": 0.31781701444622795, |
| "grad_norm": 0.2289159905663413, |
| "learning_rate": 6.979618100171748e-06, |
| "loss": 0.5677490234375, |
| "step": 99, |
| "token_acc": 0.8194265030485797 |
| }, |
| { |
| "epoch": 0.32102728731942215, |
| "grad_norm": 0.24122198762820238, |
| "learning_rate": 6.978827442172083e-06, |
| "loss": 0.5986735224723816, |
| "step": 100, |
| "token_acc": 0.8116517932311837 |
| }, |
| { |
| "epoch": 0.32423756019261635, |
| "grad_norm": 0.22373246065875144, |
| "learning_rate": 6.978021785437813e-06, |
| "loss": 0.5494791865348816, |
| "step": 101, |
| "token_acc": 0.8246630012780631 |
| }, |
| { |
| "epoch": 0.3274478330658106, |
| "grad_norm": 0.23636873692563587, |
| "learning_rate": 6.9772011334424736e-06, |
| "loss": 0.633056640625, |
| "step": 102, |
| "token_acc": 0.8022693088313757 |
| }, |
| { |
| "epoch": 0.3306581059390048, |
| "grad_norm": 0.22038734453112976, |
| "learning_rate": 6.976365489724251e-06, |
| "loss": 0.5868327021598816, |
| "step": 103, |
| "token_acc": 0.8138595932069553 |
| }, |
| { |
| "epoch": 0.33386837881219905, |
| "grad_norm": 0.22408692701933014, |
| "learning_rate": 6.975514857885968e-06, |
| "loss": 0.5525309443473816, |
| "step": 104, |
| "token_acc": 0.8250399138957633 |
| }, |
| { |
| "epoch": 0.33707865168539325, |
| "grad_norm": 0.23880530373560283, |
| "learning_rate": 6.974649241595068e-06, |
| "loss": 0.5885009765625, |
| "step": 105, |
| "token_acc": 0.8135449856905108 |
| }, |
| { |
| "epoch": 0.3402889245585875, |
| "grad_norm": 0.2219089889348867, |
| "learning_rate": 6.973768644583598e-06, |
| "loss": 0.5853678584098816, |
| "step": 106, |
| "token_acc": 0.8156692860923206 |
| }, |
| { |
| "epoch": 0.3434991974317817, |
| "grad_norm": 0.24434844798833436, |
| "learning_rate": 6.972873070648195e-06, |
| "loss": 0.6019694209098816, |
| "step": 107, |
| "token_acc": 0.8092352417680213 |
| }, |
| { |
| "epoch": 0.3467094703049759, |
| "grad_norm": 0.24131505642984427, |
| "learning_rate": 6.971962523650066e-06, |
| "loss": 0.5548909902572632, |
| "step": 108, |
| "token_acc": 0.8241489043461055 |
| }, |
| { |
| "epoch": 0.34991974317817015, |
| "grad_norm": 0.21370848840195006, |
| "learning_rate": 6.971037007514973e-06, |
| "loss": 0.5423991084098816, |
| "step": 109, |
| "token_acc": 0.8294367863882048 |
| }, |
| { |
| "epoch": 0.35313001605136435, |
| "grad_norm": 0.21529534769229164, |
| "learning_rate": 6.970096526233219e-06, |
| "loss": 0.5777181386947632, |
| "step": 110, |
| "token_acc": 0.8161277117442256 |
| }, |
| { |
| "epoch": 0.3563402889245586, |
| "grad_norm": 0.2126697222990689, |
| "learning_rate": 6.9691410838596274e-06, |
| "loss": 0.539794921875, |
| "step": 111, |
| "token_acc": 0.8310754970924679 |
| }, |
| { |
| "epoch": 0.3595505617977528, |
| "grad_norm": 0.2099849961286063, |
| "learning_rate": 6.9681706845135235e-06, |
| "loss": 0.54931640625, |
| "step": 112, |
| "token_acc": 0.8269279755101857 |
| }, |
| { |
| "epoch": 0.36276083467094705, |
| "grad_norm": 0.19964465289795394, |
| "learning_rate": 6.96718533237872e-06, |
| "loss": 0.5533854365348816, |
| "step": 113, |
| "token_acc": 0.8252085016943435 |
| }, |
| { |
| "epoch": 0.36597110754414125, |
| "grad_norm": 0.22495124874675887, |
| "learning_rate": 6.9661850317035e-06, |
| "loss": 0.5847982168197632, |
| "step": 114, |
| "token_acc": 0.8134031162952646 |
| }, |
| { |
| "epoch": 0.36918138041733545, |
| "grad_norm": 0.21442270445925285, |
| "learning_rate": 6.96516978680059e-06, |
| "loss": 0.567138671875, |
| "step": 115, |
| "token_acc": 0.819186331796326 |
| }, |
| { |
| "epoch": 0.3723916532905297, |
| "grad_norm": 0.23733459205129076, |
| "learning_rate": 6.964139602047153e-06, |
| "loss": 0.552734375, |
| "step": 116, |
| "token_acc": 0.8262689282403223 |
| }, |
| { |
| "epoch": 0.3756019261637239, |
| "grad_norm": 0.22871584287933092, |
| "learning_rate": 6.963094481884764e-06, |
| "loss": 0.5907389521598816, |
| "step": 117, |
| "token_acc": 0.8136596935605276 |
| }, |
| { |
| "epoch": 0.37881219903691815, |
| "grad_norm": 0.27109815884075683, |
| "learning_rate": 6.962034430819388e-06, |
| "loss": 0.603759765625, |
| "step": 118, |
| "token_acc": 0.807743927578312 |
| }, |
| { |
| "epoch": 0.38202247191011235, |
| "grad_norm": 0.22684991921359934, |
| "learning_rate": 6.960959453421364e-06, |
| "loss": 0.5983480215072632, |
| "step": 119, |
| "token_acc": 0.8135154163800631 |
| }, |
| { |
| "epoch": 0.3852327447833066, |
| "grad_norm": 0.26065427109275385, |
| "learning_rate": 6.959869554325387e-06, |
| "loss": 0.6192220449447632, |
| "step": 120, |
| "token_acc": 0.8068024339349146 |
| }, |
| { |
| "epoch": 0.3884430176565008, |
| "grad_norm": 0.23060650144915434, |
| "learning_rate": 6.958764738230486e-06, |
| "loss": 0.59130859375, |
| "step": 121, |
| "token_acc": 0.8157852810433464 |
| }, |
| { |
| "epoch": 0.391653290529695, |
| "grad_norm": 0.23337115115264662, |
| "learning_rate": 6.957645009900002e-06, |
| "loss": 0.5838623046875, |
| "step": 122, |
| "token_acc": 0.8170180478634359 |
| }, |
| { |
| "epoch": 0.39486356340288925, |
| "grad_norm": 0.22749336370908144, |
| "learning_rate": 6.95651037416157e-06, |
| "loss": 0.542236328125, |
| "step": 123, |
| "token_acc": 0.8281357519840995 |
| }, |
| { |
| "epoch": 0.39807383627608345, |
| "grad_norm": 0.26777825600197724, |
| "learning_rate": 6.955360835907094e-06, |
| "loss": 0.5811361074447632, |
| "step": 124, |
| "token_acc": 0.8172125757480218 |
| }, |
| { |
| "epoch": 0.4012841091492777, |
| "grad_norm": 0.21125480157250714, |
| "learning_rate": 6.9541964000927365e-06, |
| "loss": 0.5238444209098816, |
| "step": 125, |
| "token_acc": 0.8307404356732712 |
| }, |
| { |
| "epoch": 0.4044943820224719, |
| "grad_norm": 0.28968235140244575, |
| "learning_rate": 6.953017071738884e-06, |
| "loss": 0.5297445058822632, |
| "step": 126, |
| "token_acc": 0.8288457580216044 |
| }, |
| { |
| "epoch": 0.40770465489566615, |
| "grad_norm": 0.22532405020503135, |
| "learning_rate": 6.951822855930132e-06, |
| "loss": 0.6102702021598816, |
| "step": 127, |
| "token_acc": 0.8099563264673997 |
| }, |
| { |
| "epoch": 0.41091492776886035, |
| "grad_norm": 0.24207770615521643, |
| "learning_rate": 6.950613757815262e-06, |
| "loss": 0.5835775136947632, |
| "step": 128, |
| "token_acc": 0.8142721823466087 |
| }, |
| { |
| "epoch": 0.41412520064205455, |
| "grad_norm": 0.2818806687612919, |
| "learning_rate": 6.949389782607224e-06, |
| "loss": 0.5836181640625, |
| "step": 129, |
| "token_acc": 0.8151410447642872 |
| }, |
| { |
| "epoch": 0.4173354735152488, |
| "grad_norm": 0.22555436718218647, |
| "learning_rate": 6.948150935583104e-06, |
| "loss": 0.5754801630973816, |
| "step": 130, |
| "token_acc": 0.8165735381099055 |
| }, |
| { |
| "epoch": 0.420545746388443, |
| "grad_norm": 0.22108665350881793, |
| "learning_rate": 6.946897222084108e-06, |
| "loss": 0.495849609375, |
| "step": 131, |
| "token_acc": 0.8415052834998561 |
| }, |
| { |
| "epoch": 0.42375601926163725, |
| "grad_norm": 0.24997735771174617, |
| "learning_rate": 6.945628647515542e-06, |
| "loss": 0.5591227412223816, |
| "step": 132, |
| "token_acc": 0.8239088365246904 |
| }, |
| { |
| "epoch": 0.42696629213483145, |
| "grad_norm": 0.24031477779236896, |
| "learning_rate": 6.944345217346779e-06, |
| "loss": 0.5630697011947632, |
| "step": 133, |
| "token_acc": 0.8211002632665639 |
| }, |
| { |
| "epoch": 0.4301765650080257, |
| "grad_norm": 0.22334587736808414, |
| "learning_rate": 6.943046937111243e-06, |
| "loss": 0.5450846552848816, |
| "step": 134, |
| "token_acc": 0.8270578092652001 |
| }, |
| { |
| "epoch": 0.4333868378812199, |
| "grad_norm": 0.2535913704396145, |
| "learning_rate": 6.9417338124063855e-06, |
| "loss": 0.5626220703125, |
| "step": 135, |
| "token_acc": 0.8230574363105163 |
| }, |
| { |
| "epoch": 0.43659711075441415, |
| "grad_norm": 0.2360783896662543, |
| "learning_rate": 6.940405848893657e-06, |
| "loss": 0.5987955927848816, |
| "step": 136, |
| "token_acc": 0.8105675443353312 |
| }, |
| { |
| "epoch": 0.43980738362760835, |
| "grad_norm": 0.227552351357898, |
| "learning_rate": 6.939063052298481e-06, |
| "loss": 0.5629069209098816, |
| "step": 137, |
| "token_acc": 0.8191981932274853 |
| }, |
| { |
| "epoch": 0.44301765650080255, |
| "grad_norm": 0.23651265721469195, |
| "learning_rate": 6.9377054284102395e-06, |
| "loss": 0.572021484375, |
| "step": 138, |
| "token_acc": 0.8184342833848558 |
| }, |
| { |
| "epoch": 0.4462279293739968, |
| "grad_norm": 0.23035871036509648, |
| "learning_rate": 6.936332983082238e-06, |
| "loss": 0.5441080927848816, |
| "step": 139, |
| "token_acc": 0.8275835179057841 |
| }, |
| { |
| "epoch": 0.449438202247191, |
| "grad_norm": 0.2241414851384322, |
| "learning_rate": 6.934945722231681e-06, |
| "loss": 0.5607503652572632, |
| "step": 140, |
| "token_acc": 0.8231049158892737 |
| }, |
| { |
| "epoch": 0.45264847512038525, |
| "grad_norm": 0.20522800991476106, |
| "learning_rate": 6.933543651839656e-06, |
| "loss": 0.4959309995174408, |
| "step": 141, |
| "token_acc": 0.8439340400471143 |
| }, |
| { |
| "epoch": 0.45585874799357945, |
| "grad_norm": 0.28087736110337097, |
| "learning_rate": 6.932126777951095e-06, |
| "loss": 0.5668131709098816, |
| "step": 142, |
| "token_acc": 0.8209694360085737 |
| }, |
| { |
| "epoch": 0.4590690208667737, |
| "grad_norm": 0.24579741713184028, |
| "learning_rate": 6.930695106674754e-06, |
| "loss": 0.6044921875, |
| "step": 143, |
| "token_acc": 0.8082383181728449 |
| }, |
| { |
| "epoch": 0.4622792937399679, |
| "grad_norm": 0.2186687760996141, |
| "learning_rate": 6.929248644183193e-06, |
| "loss": 0.5465494990348816, |
| "step": 144, |
| "token_acc": 0.8264947550565516 |
| }, |
| { |
| "epoch": 0.4654895666131621, |
| "grad_norm": 0.21880386821151576, |
| "learning_rate": 6.927787396712737e-06, |
| "loss": 0.556884765625, |
| "step": 145, |
| "token_acc": 0.8225188628651489 |
| }, |
| { |
| "epoch": 0.46869983948635635, |
| "grad_norm": 0.2181719869340069, |
| "learning_rate": 6.926311370563459e-06, |
| "loss": 0.5694987177848816, |
| "step": 146, |
| "token_acc": 0.8188421765901659 |
| }, |
| { |
| "epoch": 0.47191011235955055, |
| "grad_norm": 0.24663948215716094, |
| "learning_rate": 6.924820572099146e-06, |
| "loss": 0.5752360224723816, |
| "step": 147, |
| "token_acc": 0.8163516597284418 |
| }, |
| { |
| "epoch": 0.4751203852327448, |
| "grad_norm": 0.21405980314840437, |
| "learning_rate": 6.92331500774728e-06, |
| "loss": 0.5713704824447632, |
| "step": 148, |
| "token_acc": 0.8194312733495944 |
| }, |
| { |
| "epoch": 0.478330658105939, |
| "grad_norm": 0.2288108170237036, |
| "learning_rate": 6.921794683999001e-06, |
| "loss": 0.553466796875, |
| "step": 149, |
| "token_acc": 0.8241651875080771 |
| }, |
| { |
| "epoch": 0.48154093097913325, |
| "grad_norm": 0.22572327698038352, |
| "learning_rate": 6.920259607409083e-06, |
| "loss": 0.5987142324447632, |
| "step": 150, |
| "token_acc": 0.8091493502900298 |
| }, |
| { |
| "epoch": 0.48475120385232745, |
| "grad_norm": 0.23659179885041934, |
| "learning_rate": 6.918709784595909e-06, |
| "loss": 0.5702311396598816, |
| "step": 151, |
| "token_acc": 0.8184588832975306 |
| }, |
| { |
| "epoch": 0.48796147672552165, |
| "grad_norm": 0.2172415460157859, |
| "learning_rate": 6.917145222241438e-06, |
| "loss": 0.55322265625, |
| "step": 152, |
| "token_acc": 0.8255652742967593 |
| }, |
| { |
| "epoch": 0.4911717495987159, |
| "grad_norm": 0.2415770177141649, |
| "learning_rate": 6.915565927091175e-06, |
| "loss": 0.569091796875, |
| "step": 153, |
| "token_acc": 0.8180283259225865 |
| }, |
| { |
| "epoch": 0.4943820224719101, |
| "grad_norm": 0.21994415876302964, |
| "learning_rate": 6.913971905954148e-06, |
| "loss": 0.5682780146598816, |
| "step": 154, |
| "token_acc": 0.8188937695372782 |
| }, |
| { |
| "epoch": 0.49759229534510435, |
| "grad_norm": 0.22183078942159312, |
| "learning_rate": 6.912363165702875e-06, |
| "loss": 0.5708822011947632, |
| "step": 155, |
| "token_acc": 0.820008484414334 |
| }, |
| { |
| "epoch": 0.5008025682182986, |
| "grad_norm": 0.2301899962788731, |
| "learning_rate": 6.910739713273332e-06, |
| "loss": 0.5730794668197632, |
| "step": 156, |
| "token_acc": 0.8183678267873312 |
| }, |
| { |
| "epoch": 0.5040128410914928, |
| "grad_norm": 0.24344159329348375, |
| "learning_rate": 6.909101555664925e-06, |
| "loss": 0.52001953125, |
| "step": 157, |
| "token_acc": 0.8352379923752359 |
| }, |
| { |
| "epoch": 0.507223113964687, |
| "grad_norm": 0.24562350866351212, |
| "learning_rate": 6.907448699940466e-06, |
| "loss": 0.593017578125, |
| "step": 158, |
| "token_acc": 0.8103424645897165 |
| }, |
| { |
| "epoch": 0.5104333868378812, |
| "grad_norm": 0.23059316629978308, |
| "learning_rate": 6.90578115322613e-06, |
| "loss": 0.5559896230697632, |
| "step": 159, |
| "token_acc": 0.8221401735275833 |
| }, |
| { |
| "epoch": 0.5136436597110754, |
| "grad_norm": 0.19449172151921634, |
| "learning_rate": 6.904098922711437e-06, |
| "loss": 0.5581868886947632, |
| "step": 160, |
| "token_acc": 0.821773430889453 |
| }, |
| { |
| "epoch": 0.5168539325842697, |
| "grad_norm": 0.2583978315849545, |
| "learning_rate": 6.902402015649211e-06, |
| "loss": 0.595703125, |
| "step": 161, |
| "token_acc": 0.8105830700080301 |
| }, |
| { |
| "epoch": 0.5200642054574639, |
| "grad_norm": 0.22692930990663654, |
| "learning_rate": 6.900690439355556e-06, |
| "loss": 0.5867513418197632, |
| "step": 162, |
| "token_acc": 0.8146894100986661 |
| }, |
| { |
| "epoch": 0.5232744783306581, |
| "grad_norm": 0.28318555908117266, |
| "learning_rate": 6.898964201209819e-06, |
| "loss": 0.60546875, |
| "step": 163, |
| "token_acc": 0.8075459800056409 |
| }, |
| { |
| "epoch": 0.5264847512038523, |
| "grad_norm": 0.24970083895982967, |
| "learning_rate": 6.897223308654561e-06, |
| "loss": 0.5707194209098816, |
| "step": 164, |
| "token_acc": 0.8176165295145137 |
| }, |
| { |
| "epoch": 0.5296950240770465, |
| "grad_norm": 0.25340900406989025, |
| "learning_rate": 6.895467769195527e-06, |
| "loss": 0.548583984375, |
| "step": 165, |
| "token_acc": 0.825177801435191 |
| }, |
| { |
| "epoch": 0.5329052969502408, |
| "grad_norm": 0.32645258457685933, |
| "learning_rate": 6.8936975904016085e-06, |
| "loss": 0.5934244990348816, |
| "step": 166, |
| "token_acc": 0.8111397321354924 |
| }, |
| { |
| "epoch": 0.536115569823435, |
| "grad_norm": 0.2407030461325421, |
| "learning_rate": 6.891912779904814e-06, |
| "loss": 0.5758463740348816, |
| "step": 167, |
| "token_acc": 0.8148208275034046 |
| }, |
| { |
| "epoch": 0.5393258426966292, |
| "grad_norm": 0.22576411609848393, |
| "learning_rate": 6.8901133454002365e-06, |
| "loss": 0.6165364980697632, |
| "step": 168, |
| "token_acc": 0.8023810162399925 |
| }, |
| { |
| "epoch": 0.5425361155698234, |
| "grad_norm": 0.25395984417353595, |
| "learning_rate": 6.888299294646019e-06, |
| "loss": 0.5718587636947632, |
| "step": 169, |
| "token_acc": 0.81836172332396 |
| }, |
| { |
| "epoch": 0.5457463884430177, |
| "grad_norm": 0.2224281422448542, |
| "learning_rate": 6.8864706354633215e-06, |
| "loss": 0.546630859375, |
| "step": 170, |
| "token_acc": 0.8248056872037914 |
| }, |
| { |
| "epoch": 0.5489566613162119, |
| "grad_norm": 0.22587769476968814, |
| "learning_rate": 6.884627375736286e-06, |
| "loss": 0.5841471552848816, |
| "step": 171, |
| "token_acc": 0.8147992586599647 |
| }, |
| { |
| "epoch": 0.5521669341894061, |
| "grad_norm": 0.2170054892285852, |
| "learning_rate": 6.882769523412006e-06, |
| "loss": 0.5365804433822632, |
| "step": 172, |
| "token_acc": 0.828471935090668 |
| }, |
| { |
| "epoch": 0.5553772070626003, |
| "grad_norm": 0.23690655653269174, |
| "learning_rate": 6.88089708650049e-06, |
| "loss": 0.5306803584098816, |
| "step": 173, |
| "token_acc": 0.8298377031583641 |
| }, |
| { |
| "epoch": 0.5585874799357945, |
| "grad_norm": 0.2314215800076655, |
| "learning_rate": 6.879010073074624e-06, |
| "loss": 0.6065267324447632, |
| "step": 174, |
| "token_acc": 0.8071848070756561 |
| }, |
| { |
| "epoch": 0.5617977528089888, |
| "grad_norm": 0.22361031206178827, |
| "learning_rate": 6.8771084912701436e-06, |
| "loss": 0.5253092646598816, |
| "step": 175, |
| "token_acc": 0.8319281568387744 |
| }, |
| { |
| "epoch": 0.565008025682183, |
| "grad_norm": 0.22176935468268558, |
| "learning_rate": 6.8751923492855915e-06, |
| "loss": 0.5660807490348816, |
| "step": 176, |
| "token_acc": 0.8193021750652316 |
| }, |
| { |
| "epoch": 0.5682182985553772, |
| "grad_norm": 0.24144561116957808, |
| "learning_rate": 6.873261655382288e-06, |
| "loss": 0.5496826171875, |
| "step": 177, |
| "token_acc": 0.8220418344678725 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.21286758327817357, |
| "learning_rate": 6.8713164178842926e-06, |
| "loss": 0.5799967646598816, |
| "step": 178, |
| "token_acc": 0.8143100972023347 |
| }, |
| { |
| "epoch": 0.5746388443017657, |
| "grad_norm": 0.21782450812586338, |
| "learning_rate": 6.8693566451783665e-06, |
| "loss": 0.5998942255973816, |
| "step": 179, |
| "token_acc": 0.8102601393648907 |
| }, |
| { |
| "epoch": 0.5778491171749599, |
| "grad_norm": 0.2324683480162456, |
| "learning_rate": 6.867382345713942e-06, |
| "loss": 0.5740560293197632, |
| "step": 180, |
| "token_acc": 0.8173885189328248 |
| }, |
| { |
| "epoch": 0.5810593900481541, |
| "grad_norm": 0.20533388731698282, |
| "learning_rate": 6.86539352800308e-06, |
| "loss": 0.4949137568473816, |
| "step": 181, |
| "token_acc": 0.8424438091193526 |
| }, |
| { |
| "epoch": 0.5842696629213483, |
| "grad_norm": 0.20592976059215887, |
| "learning_rate": 6.8633902006204375e-06, |
| "loss": 0.5462239980697632, |
| "step": 182, |
| "token_acc": 0.8261973157867761 |
| }, |
| { |
| "epoch": 0.5874799357945425, |
| "grad_norm": 0.2113373512227851, |
| "learning_rate": 6.861372372203227e-06, |
| "loss": 0.5481771230697632, |
| "step": 183, |
| "token_acc": 0.8251484703826323 |
| }, |
| { |
| "epoch": 0.5906902086677368, |
| "grad_norm": 0.1933295692622174, |
| "learning_rate": 6.859340051451183e-06, |
| "loss": 0.5323079824447632, |
| "step": 184, |
| "token_acc": 0.8299652429059305 |
| }, |
| { |
| "epoch": 0.593900481540931, |
| "grad_norm": 0.23548376959220121, |
| "learning_rate": 6.857293247126522e-06, |
| "loss": 0.5689290761947632, |
| "step": 185, |
| "token_acc": 0.8176760652421292 |
| }, |
| { |
| "epoch": 0.5971107544141252, |
| "grad_norm": 0.20081126428895105, |
| "learning_rate": 6.855231968053906e-06, |
| "loss": 0.5013834834098816, |
| "step": 186, |
| "token_acc": 0.8375914147549965 |
| }, |
| { |
| "epoch": 0.6003210272873194, |
| "grad_norm": 0.20025396799297465, |
| "learning_rate": 6.8531562231204015e-06, |
| "loss": 0.511474609375, |
| "step": 187, |
| "token_acc": 0.8372179968386327 |
| }, |
| { |
| "epoch": 0.6035313001605136, |
| "grad_norm": 0.27351283743607113, |
| "learning_rate": 6.851066021275448e-06, |
| "loss": 0.5250651240348816, |
| "step": 188, |
| "token_acc": 0.8299763504644978 |
| }, |
| { |
| "epoch": 0.6067415730337079, |
| "grad_norm": 0.2426189973247391, |
| "learning_rate": 6.8489613715308116e-06, |
| "loss": 0.5476888418197632, |
| "step": 189, |
| "token_acc": 0.825513469139694 |
| }, |
| { |
| "epoch": 0.6099518459069021, |
| "grad_norm": 0.2400231078753033, |
| "learning_rate": 6.846842282960551e-06, |
| "loss": 0.5440673828125, |
| "step": 190, |
| "token_acc": 0.8253329204087952 |
| }, |
| { |
| "epoch": 0.6131621187800963, |
| "grad_norm": 0.25681990063139026, |
| "learning_rate": 6.8447087647009756e-06, |
| "loss": 0.5882975459098816, |
| "step": 191, |
| "token_acc": 0.8140240697564722 |
| }, |
| { |
| "epoch": 0.6163723916532905, |
| "grad_norm": 0.21533682569585577, |
| "learning_rate": 6.842560825950609e-06, |
| "loss": 0.5162353515625, |
| "step": 192, |
| "token_acc": 0.8349828905724699 |
| }, |
| { |
| "epoch": 0.6195826645264848, |
| "grad_norm": 0.224070847170404, |
| "learning_rate": 6.840398475970147e-06, |
| "loss": 0.5556640625, |
| "step": 193, |
| "token_acc": 0.8219916672593991 |
| }, |
| { |
| "epoch": 0.622792937399679, |
| "grad_norm": 0.20086320548268272, |
| "learning_rate": 6.838221724082419e-06, |
| "loss": 0.55078125, |
| "step": 194, |
| "token_acc": 0.8239912087816152 |
| }, |
| { |
| "epoch": 0.6260032102728732, |
| "grad_norm": 0.22337205031208743, |
| "learning_rate": 6.836030579672347e-06, |
| "loss": 0.5475260615348816, |
| "step": 195, |
| "token_acc": 0.8235302077295884 |
| }, |
| { |
| "epoch": 0.6292134831460674, |
| "grad_norm": 0.22668574083983906, |
| "learning_rate": 6.833825052186905e-06, |
| "loss": 0.5760091543197632, |
| "step": 196, |
| "token_acc": 0.8175243891128943 |
| }, |
| { |
| "epoch": 0.6324237560192616, |
| "grad_norm": 0.19877544092977742, |
| "learning_rate": 6.8316051511350786e-06, |
| "loss": 0.536376953125, |
| "step": 197, |
| "token_acc": 0.8281901004896317 |
| }, |
| { |
| "epoch": 0.6356340288924559, |
| "grad_norm": 0.20395951641675453, |
| "learning_rate": 6.8293708860878245e-06, |
| "loss": 0.5526530146598816, |
| "step": 198, |
| "token_acc": 0.8250786404619416 |
| }, |
| { |
| "epoch": 0.6388443017656501, |
| "grad_norm": 0.21289607589577456, |
| "learning_rate": 6.82712226667803e-06, |
| "loss": 0.548095703125, |
| "step": 199, |
| "token_acc": 0.8248148446845975 |
| }, |
| { |
| "epoch": 0.6420545746388443, |
| "grad_norm": 0.24152612355007852, |
| "learning_rate": 6.824859302600468e-06, |
| "loss": 0.5478109121322632, |
| "step": 200, |
| "token_acc": 0.8234885791051114 |
| }, |
| { |
| "epoch": 0.6452648475120385, |
| "grad_norm": 0.22741295294045016, |
| "learning_rate": 6.822582003611759e-06, |
| "loss": 0.54248046875, |
| "step": 201, |
| "token_acc": 0.8279165289983051 |
| }, |
| { |
| "epoch": 0.6484751203852327, |
| "grad_norm": 0.21542183542521456, |
| "learning_rate": 6.820290379530326e-06, |
| "loss": 0.5347900390625, |
| "step": 202, |
| "token_acc": 0.8269245689989603 |
| }, |
| { |
| "epoch": 0.651685393258427, |
| "grad_norm": 0.21563815091926725, |
| "learning_rate": 6.817984440236357e-06, |
| "loss": 0.5167643427848816, |
| "step": 203, |
| "token_acc": 0.8355193840088763 |
| }, |
| { |
| "epoch": 0.6548956661316212, |
| "grad_norm": 0.21492257068771026, |
| "learning_rate": 6.8156641956717535e-06, |
| "loss": 0.5599772334098816, |
| "step": 204, |
| "token_acc": 0.8229220720036458 |
| }, |
| { |
| "epoch": 0.6581059390048154, |
| "grad_norm": 0.21079929662710306, |
| "learning_rate": 6.8133296558401e-06, |
| "loss": 0.5957845449447632, |
| "step": 205, |
| "token_acc": 0.8099493419818274 |
| }, |
| { |
| "epoch": 0.6613162118780096, |
| "grad_norm": 0.28263826007405335, |
| "learning_rate": 6.81098083080661e-06, |
| "loss": 0.5642904043197632, |
| "step": 206, |
| "token_acc": 0.8202062771524219 |
| }, |
| { |
| "epoch": 0.6645264847512039, |
| "grad_norm": 0.23663061523679904, |
| "learning_rate": 6.808617730698085e-06, |
| "loss": 0.5949300527572632, |
| "step": 207, |
| "token_acc": 0.8093328906747198 |
| }, |
| { |
| "epoch": 0.6677367576243981, |
| "grad_norm": 0.21481492269788427, |
| "learning_rate": 6.806240365702877e-06, |
| "loss": 0.5576986074447632, |
| "step": 208, |
| "token_acc": 0.8225621783463226 |
| }, |
| { |
| "epoch": 0.6709470304975923, |
| "grad_norm": 0.2596134684640292, |
| "learning_rate": 6.803848746070839e-06, |
| "loss": 0.5392252802848816, |
| "step": 209, |
| "token_acc": 0.8259330772465088 |
| }, |
| { |
| "epoch": 0.6741573033707865, |
| "grad_norm": 0.24809961814052503, |
| "learning_rate": 6.801442882113278e-06, |
| "loss": 0.52099609375, |
| "step": 210, |
| "token_acc": 0.8323869477105627 |
| }, |
| { |
| "epoch": 0.6773675762439807, |
| "grad_norm": 0.2443118739139523, |
| "learning_rate": 6.79902278420292e-06, |
| "loss": 0.5388997793197632, |
| "step": 211, |
| "token_acc": 0.8263394986714556 |
| }, |
| { |
| "epoch": 0.680577849117175, |
| "grad_norm": 0.21098866976666678, |
| "learning_rate": 6.796588462773857e-06, |
| "loss": 0.5052286982536316, |
| "step": 212, |
| "token_acc": 0.8393440527484701 |
| }, |
| { |
| "epoch": 0.6837881219903692, |
| "grad_norm": 0.24102593312514728, |
| "learning_rate": 6.794139928321504e-06, |
| "loss": 0.552490234375, |
| "step": 213, |
| "token_acc": 0.820462355331019 |
| }, |
| { |
| "epoch": 0.6869983948635634, |
| "grad_norm": 0.19538302780441605, |
| "learning_rate": 6.791677191402555e-06, |
| "loss": 0.5192464590072632, |
| "step": 214, |
| "token_acc": 0.8333250621849146 |
| }, |
| { |
| "epoch": 0.6902086677367576, |
| "grad_norm": 0.21456432126279212, |
| "learning_rate": 6.789200262634939e-06, |
| "loss": 0.5003662109375, |
| "step": 215, |
| "token_acc": 0.8399372573500391 |
| }, |
| { |
| "epoch": 0.6934189406099518, |
| "grad_norm": 0.2153404027287284, |
| "learning_rate": 6.7867091526977696e-06, |
| "loss": 0.5514323115348816, |
| "step": 216, |
| "token_acc": 0.8233947895282249 |
| }, |
| { |
| "epoch": 0.6966292134831461, |
| "grad_norm": 0.21079426317177405, |
| "learning_rate": 6.784203872331302e-06, |
| "loss": 0.5425618886947632, |
| "step": 217, |
| "token_acc": 0.8267645216064796 |
| }, |
| { |
| "epoch": 0.6998394863563403, |
| "grad_norm": 0.22090843136929758, |
| "learning_rate": 6.7816844323368905e-06, |
| "loss": 0.5508829951286316, |
| "step": 218, |
| "token_acc": 0.8237542129810146 |
| }, |
| { |
| "epoch": 0.7030497592295345, |
| "grad_norm": 0.30110343371175113, |
| "learning_rate": 6.77915084357693e-06, |
| "loss": 0.546875, |
| "step": 219, |
| "token_acc": 0.8256972124018396 |
| }, |
| { |
| "epoch": 0.7062600321027287, |
| "grad_norm": 0.2404307206347699, |
| "learning_rate": 6.776603116974823e-06, |
| "loss": 0.599609375, |
| "step": 220, |
| "token_acc": 0.8071373867727709 |
| }, |
| { |
| "epoch": 0.709470304975923, |
| "grad_norm": 0.2704381577683076, |
| "learning_rate": 6.7740412635149225e-06, |
| "loss": 0.57293701171875, |
| "step": 221, |
| "token_acc": 0.8164267936370523 |
| }, |
| { |
| "epoch": 0.7126805778491172, |
| "grad_norm": 0.21919175912057806, |
| "learning_rate": 6.771465294242493e-06, |
| "loss": 0.5669759511947632, |
| "step": 222, |
| "token_acc": 0.8191622319226122 |
| }, |
| { |
| "epoch": 0.7158908507223114, |
| "grad_norm": 0.21425950203620117, |
| "learning_rate": 6.768875220263655e-06, |
| "loss": 0.559814453125, |
| "step": 223, |
| "token_acc": 0.8224226963471117 |
| }, |
| { |
| "epoch": 0.7191011235955056, |
| "grad_norm": 0.21223607679357182, |
| "learning_rate": 6.76627105274534e-06, |
| "loss": 0.5444743037223816, |
| "step": 224, |
| "token_acc": 0.8234702086507026 |
| }, |
| { |
| "epoch": 0.7223113964686998, |
| "grad_norm": 0.20942358002404907, |
| "learning_rate": 6.763652802915245e-06, |
| "loss": 0.5237223505973816, |
| "step": 225, |
| "token_acc": 0.8334582114515168 |
| }, |
| { |
| "epoch": 0.7255216693418941, |
| "grad_norm": 0.24723183006765018, |
| "learning_rate": 6.761020482061782e-06, |
| "loss": 0.56414794921875, |
| "step": 226, |
| "token_acc": 0.8203677260789879 |
| }, |
| { |
| "epoch": 0.7287319422150883, |
| "grad_norm": 0.25030847996600203, |
| "learning_rate": 6.758374101534027e-06, |
| "loss": 0.5577799677848816, |
| "step": 227, |
| "token_acc": 0.8203356490268309 |
| }, |
| { |
| "epoch": 0.7319422150882825, |
| "grad_norm": 0.3205086458960867, |
| "learning_rate": 6.755713672741676e-06, |
| "loss": 0.5999755859375, |
| "step": 228, |
| "token_acc": 0.8054511124060968 |
| }, |
| { |
| "epoch": 0.7351524879614767, |
| "grad_norm": 0.24550784670563833, |
| "learning_rate": 6.753039207154989e-06, |
| "loss": 0.57373046875, |
| "step": 229, |
| "token_acc": 0.8176859273836078 |
| }, |
| { |
| "epoch": 0.7383627608346709, |
| "grad_norm": 0.19001728402352852, |
| "learning_rate": 6.750350716304752e-06, |
| "loss": 0.5404459834098816, |
| "step": 230, |
| "token_acc": 0.8257092883215871 |
| }, |
| { |
| "epoch": 0.7415730337078652, |
| "grad_norm": 0.20564095580716163, |
| "learning_rate": 6.747648211782212e-06, |
| "loss": 0.5472005605697632, |
| "step": 231, |
| "token_acc": 0.8256463868333908 |
| }, |
| { |
| "epoch": 0.7447833065810594, |
| "grad_norm": 0.25323339440670445, |
| "learning_rate": 6.74493170523904e-06, |
| "loss": 0.5516764521598816, |
| "step": 232, |
| "token_acc": 0.8231926897249683 |
| }, |
| { |
| "epoch": 0.7479935794542536, |
| "grad_norm": 0.23805514044333884, |
| "learning_rate": 6.742201208387276e-06, |
| "loss": 0.52392578125, |
| "step": 233, |
| "token_acc": 0.8286431336834839 |
| }, |
| { |
| "epoch": 0.7512038523274478, |
| "grad_norm": 0.20890219888317152, |
| "learning_rate": 6.739456732999274e-06, |
| "loss": 0.5518392324447632, |
| "step": 234, |
| "token_acc": 0.8245726725848885 |
| }, |
| { |
| "epoch": 0.7544141252006421, |
| "grad_norm": 0.20338522734836773, |
| "learning_rate": 6.73669829090766e-06, |
| "loss": 0.53076171875, |
| "step": 235, |
| "token_acc": 0.8298385429058719 |
| }, |
| { |
| "epoch": 0.7576243980738363, |
| "grad_norm": 0.2643362744904908, |
| "learning_rate": 6.733925894005273e-06, |
| "loss": 0.5685221552848816, |
| "step": 236, |
| "token_acc": 0.8183341855576388 |
| }, |
| { |
| "epoch": 0.7608346709470305, |
| "grad_norm": 0.2287330654705364, |
| "learning_rate": 6.731139554245122e-06, |
| "loss": 0.5322672724723816, |
| "step": 237, |
| "token_acc": 0.8259961312934699 |
| }, |
| { |
| "epoch": 0.7640449438202247, |
| "grad_norm": 0.2275245612718834, |
| "learning_rate": 6.728339283640325e-06, |
| "loss": 0.4906412959098816, |
| "step": 238, |
| "token_acc": 0.8402331584922113 |
| }, |
| { |
| "epoch": 0.7672552166934189, |
| "grad_norm": 0.24729750718952795, |
| "learning_rate": 6.7255250942640625e-06, |
| "loss": 0.5785726308822632, |
| "step": 239, |
| "token_acc": 0.8139347537406585 |
| }, |
| { |
| "epoch": 0.7704654895666132, |
| "grad_norm": 0.22470429416037957, |
| "learning_rate": 6.722696998249527e-06, |
| "loss": 0.5373942255973816, |
| "step": 240, |
| "token_acc": 0.8278915767997669 |
| }, |
| { |
| "epoch": 0.7736757624398074, |
| "grad_norm": 0.21124522226813244, |
| "learning_rate": 6.719855007789868e-06, |
| "loss": 0.5575765371322632, |
| "step": 241, |
| "token_acc": 0.8219577750122666 |
| }, |
| { |
| "epoch": 0.7768860353130016, |
| "grad_norm": 0.22522506304223616, |
| "learning_rate": 6.71699913513814e-06, |
| "loss": 0.5452474355697632, |
| "step": 242, |
| "token_acc": 0.8278975153651517 |
| }, |
| { |
| "epoch": 0.7800963081861958, |
| "grad_norm": 0.21054417790079533, |
| "learning_rate": 6.714129392607248e-06, |
| "loss": 0.5528157949447632, |
| "step": 243, |
| "token_acc": 0.8216659366800207 |
| }, |
| { |
| "epoch": 0.78330658105939, |
| "grad_norm": 0.2168513347663671, |
| "learning_rate": 6.7112457925698985e-06, |
| "loss": 0.5669759511947632, |
| "step": 244, |
| "token_acc": 0.8197795876351085 |
| }, |
| { |
| "epoch": 0.7865168539325843, |
| "grad_norm": 0.271555617520745, |
| "learning_rate": 6.7083483474585395e-06, |
| "loss": 0.5638834834098816, |
| "step": 245, |
| "token_acc": 0.8143064734116244 |
| }, |
| { |
| "epoch": 0.7897271268057785, |
| "grad_norm": 0.21704184254450343, |
| "learning_rate": 6.705437069765319e-06, |
| "loss": 0.56658935546875, |
| "step": 246, |
| "token_acc": 0.817545748116254 |
| }, |
| { |
| "epoch": 0.7929373996789727, |
| "grad_norm": 0.22862825396828354, |
| "learning_rate": 6.702511972042014e-06, |
| "loss": 0.5729166865348816, |
| "step": 247, |
| "token_acc": 0.8182453879142039 |
| }, |
| { |
| "epoch": 0.7961476725521669, |
| "grad_norm": 0.2265701155951088, |
| "learning_rate": 6.6995730668999925e-06, |
| "loss": 0.5604655146598816, |
| "step": 248, |
| "token_acc": 0.8183653526421868 |
| }, |
| { |
| "epoch": 0.7993579454253612, |
| "grad_norm": 0.23197032599583667, |
| "learning_rate": 6.696620367010148e-06, |
| "loss": 0.5447591543197632, |
| "step": 249, |
| "token_acc": 0.8233240936035472 |
| }, |
| { |
| "epoch": 0.8025682182985554, |
| "grad_norm": 0.23312218134153492, |
| "learning_rate": 6.693653885102853e-06, |
| "loss": 0.5588786005973816, |
| "step": 250, |
| "token_acc": 0.819455498267914 |
| }, |
| { |
| "epoch": 0.8057784911717496, |
| "grad_norm": 0.2151929612075134, |
| "learning_rate": 6.690673633967896e-06, |
| "loss": 0.5681559443473816, |
| "step": 251, |
| "token_acc": 0.8187757678531563 |
| }, |
| { |
| "epoch": 0.8089887640449438, |
| "grad_norm": 0.21115778956555162, |
| "learning_rate": 6.687679626454435e-06, |
| "loss": 0.5596517324447632, |
| "step": 252, |
| "token_acc": 0.8215622369212267 |
| }, |
| { |
| "epoch": 0.812199036918138, |
| "grad_norm": 0.2174992394129607, |
| "learning_rate": 6.684671875470934e-06, |
| "loss": 0.5286458730697632, |
| "step": 253, |
| "token_acc": 0.8295877099816256 |
| }, |
| { |
| "epoch": 0.8154093097913323, |
| "grad_norm": 0.2983534969364284, |
| "learning_rate": 6.6816503939851136e-06, |
| "loss": 0.5350748896598816, |
| "step": 254, |
| "token_acc": 0.825895675553894 |
| }, |
| { |
| "epoch": 0.8186195826645265, |
| "grad_norm": 0.2085815626895177, |
| "learning_rate": 6.678615195023891e-06, |
| "loss": 0.5171305537223816, |
| "step": 255, |
| "token_acc": 0.8348302581547042 |
| }, |
| { |
| "epoch": 0.8218298555377207, |
| "grad_norm": 0.21559592718354698, |
| "learning_rate": 6.675566291673325e-06, |
| "loss": 0.5474853515625, |
| "step": 256, |
| "token_acc": 0.824765407128191 |
| }, |
| { |
| "epoch": 0.8250401284109149, |
| "grad_norm": 0.24128741129795617, |
| "learning_rate": 6.672503697078562e-06, |
| "loss": 0.5852864980697632, |
| "step": 257, |
| "token_acc": 0.8155020986063753 |
| }, |
| { |
| "epoch": 0.8282504012841091, |
| "grad_norm": 0.2293702550872797, |
| "learning_rate": 6.669427424443776e-06, |
| "loss": 0.5548502802848816, |
| "step": 258, |
| "token_acc": 0.8226406443632063 |
| }, |
| { |
| "epoch": 0.8314606741573034, |
| "grad_norm": 0.25674300678141526, |
| "learning_rate": 6.666337487032113e-06, |
| "loss": 0.5816243886947632, |
| "step": 259, |
| "token_acc": 0.8115385153930885 |
| }, |
| { |
| "epoch": 0.8346709470304976, |
| "grad_norm": 0.21850937742829685, |
| "learning_rate": 6.663233898165635e-06, |
| "loss": 0.5177409052848816, |
| "step": 260, |
| "token_acc": 0.8329880272119501 |
| }, |
| { |
| "epoch": 0.8378812199036918, |
| "grad_norm": 0.3843277677133425, |
| "learning_rate": 6.660116671225258e-06, |
| "loss": 0.5751953125, |
| "step": 261, |
| "token_acc": 0.8143645310289503 |
| }, |
| { |
| "epoch": 0.841091492776886, |
| "grad_norm": 0.21134853251628777, |
| "learning_rate": 6.656985819650703e-06, |
| "loss": 0.45391845703125, |
| "step": 262, |
| "token_acc": 0.8531797499880263 |
| }, |
| { |
| "epoch": 0.8443017656500803, |
| "grad_norm": 0.25633734763605176, |
| "learning_rate": 6.653841356940426e-06, |
| "loss": 0.5741373896598816, |
| "step": 263, |
| "token_acc": 0.8163117184245008 |
| }, |
| { |
| "epoch": 0.8475120385232745, |
| "grad_norm": 0.20414507237731114, |
| "learning_rate": 6.650683296651573e-06, |
| "loss": 0.5130208730697632, |
| "step": 264, |
| "token_acc": 0.8335838411053109 |
| }, |
| { |
| "epoch": 0.8507223113964687, |
| "grad_norm": 0.23368657687253075, |
| "learning_rate": 6.647511652399912e-06, |
| "loss": 0.576904296875, |
| "step": 265, |
| "token_acc": 0.8158995625019959 |
| }, |
| { |
| "epoch": 0.8539325842696629, |
| "grad_norm": 0.2823726018701455, |
| "learning_rate": 6.6443264378597775e-06, |
| "loss": 0.5238851308822632, |
| "step": 266, |
| "token_acc": 0.8303428680614855 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.2660958114189501, |
| "learning_rate": 6.641127666764009e-06, |
| "loss": 0.5625, |
| "step": 267, |
| "token_acc": 0.8188886633069231 |
| }, |
| { |
| "epoch": 0.8603531300160514, |
| "grad_norm": 0.23790729136692979, |
| "learning_rate": 6.6379153529038996e-06, |
| "loss": 0.5369466543197632, |
| "step": 268, |
| "token_acc": 0.8278342392587025 |
| }, |
| { |
| "epoch": 0.8635634028892456, |
| "grad_norm": 0.22080856072472654, |
| "learning_rate": 6.634689510129127e-06, |
| "loss": 0.4977213740348816, |
| "step": 269, |
| "token_acc": 0.8393823857083582 |
| }, |
| { |
| "epoch": 0.8667736757624398, |
| "grad_norm": 0.2412716818325412, |
| "learning_rate": 6.6314501523477e-06, |
| "loss": 0.5535482168197632, |
| "step": 270, |
| "token_acc": 0.8220067709239879 |
| }, |
| { |
| "epoch": 0.869983948635634, |
| "grad_norm": 0.2396795881940935, |
| "learning_rate": 6.628197293525894e-06, |
| "loss": 0.56494140625, |
| "step": 271, |
| "token_acc": 0.8181763386789053 |
| }, |
| { |
| "epoch": 0.8731942215088283, |
| "grad_norm": 0.26498431498454633, |
| "learning_rate": 6.624930947688197e-06, |
| "loss": 0.5183919668197632, |
| "step": 272, |
| "token_acc": 0.8312001392731633 |
| }, |
| { |
| "epoch": 0.8764044943820225, |
| "grad_norm": 0.1927504153360831, |
| "learning_rate": 6.6216511289172395e-06, |
| "loss": 0.5640462636947632, |
| "step": 273, |
| "token_acc": 0.8176485799109028 |
| }, |
| { |
| "epoch": 0.8796147672552167, |
| "grad_norm": 0.19474833122907118, |
| "learning_rate": 6.618357851353749e-06, |
| "loss": 0.4883219599723816, |
| "step": 274, |
| "token_acc": 0.844173214072538 |
| }, |
| { |
| "epoch": 0.8828250401284109, |
| "grad_norm": 0.24092291673702348, |
| "learning_rate": 6.615051129196469e-06, |
| "loss": 0.5417073965072632, |
| "step": 275, |
| "token_acc": 0.8236458084139298 |
| }, |
| { |
| "epoch": 0.8860353130016051, |
| "grad_norm": 0.21413177551118223, |
| "learning_rate": 6.611730976702116e-06, |
| "loss": 0.4571940302848816, |
| "step": 276, |
| "token_acc": 0.8514311032529894 |
| }, |
| { |
| "epoch": 0.8892455858747994, |
| "grad_norm": 0.20494712262771816, |
| "learning_rate": 6.608397408185307e-06, |
| "loss": 0.5249837636947632, |
| "step": 277, |
| "token_acc": 0.8307591432318229 |
| }, |
| { |
| "epoch": 0.8924558587479936, |
| "grad_norm": 0.227860155385193, |
| "learning_rate": 6.605050438018503e-06, |
| "loss": 0.5576171875, |
| "step": 278, |
| "token_acc": 0.8215625938536182 |
| }, |
| { |
| "epoch": 0.8956661316211878, |
| "grad_norm": 0.20272894805165137, |
| "learning_rate": 6.6016900806319444e-06, |
| "loss": 0.5760091543197632, |
| "step": 279, |
| "token_acc": 0.8148033546019078 |
| }, |
| { |
| "epoch": 0.898876404494382, |
| "grad_norm": 0.21292167264886414, |
| "learning_rate": 6.598316350513591e-06, |
| "loss": 0.5145263671875, |
| "step": 280, |
| "token_acc": 0.8337830790752344 |
| }, |
| { |
| "epoch": 0.9020866773675762, |
| "grad_norm": 0.2211511021106543, |
| "learning_rate": 6.594929262209055e-06, |
| "loss": 0.5369466543197632, |
| "step": 281, |
| "token_acc": 0.8276449799280647 |
| }, |
| { |
| "epoch": 0.9052969502407705, |
| "grad_norm": 0.2047148842889143, |
| "learning_rate": 6.591528830321546e-06, |
| "loss": 0.5138346552848816, |
| "step": 282, |
| "token_acc": 0.8341773490893749 |
| }, |
| { |
| "epoch": 0.9085072231139647, |
| "grad_norm": 0.24542746740430346, |
| "learning_rate": 6.5881150695118e-06, |
| "loss": 0.539306640625, |
| "step": 283, |
| "token_acc": 0.8257388871001344 |
| }, |
| { |
| "epoch": 0.9117174959871589, |
| "grad_norm": 0.23317430848356677, |
| "learning_rate": 6.5846879944980224e-06, |
| "loss": 0.5642904043197632, |
| "step": 284, |
| "token_acc": 0.8206747364141221 |
| }, |
| { |
| "epoch": 0.9149277688603531, |
| "grad_norm": 0.22878109419510925, |
| "learning_rate": 6.58124762005582e-06, |
| "loss": 0.5784912109375, |
| "step": 285, |
| "token_acc": 0.8153904333455296 |
| }, |
| { |
| "epoch": 0.9181380417335474, |
| "grad_norm": 0.23490487636437477, |
| "learning_rate": 6.577793961018139e-06, |
| "loss": 0.56982421875, |
| "step": 286, |
| "token_acc": 0.8179441732969294 |
| }, |
| { |
| "epoch": 0.9213483146067416, |
| "grad_norm": 0.2213625777811698, |
| "learning_rate": 6.574327032275203e-06, |
| "loss": 0.5587565302848816, |
| "step": 287, |
| "token_acc": 0.8212527035117336 |
| }, |
| { |
| "epoch": 0.9245585874799358, |
| "grad_norm": 0.23982512719414933, |
| "learning_rate": 6.570846848774445e-06, |
| "loss": 0.51416015625, |
| "step": 288, |
| "token_acc": 0.8342100757147085 |
| }, |
| { |
| "epoch": 0.92776886035313, |
| "grad_norm": 0.24863035113160709, |
| "learning_rate": 6.567353425520448e-06, |
| "loss": 0.5484212636947632, |
| "step": 289, |
| "token_acc": 0.823044297514873 |
| }, |
| { |
| "epoch": 0.9309791332263242, |
| "grad_norm": 0.24695733139934517, |
| "learning_rate": 6.563846777574875e-06, |
| "loss": 0.5726318359375, |
| "step": 290, |
| "token_acc": 0.8166731715915243 |
| }, |
| { |
| "epoch": 0.9341894060995185, |
| "grad_norm": 0.22335565967464163, |
| "learning_rate": 6.5603269200564055e-06, |
| "loss": 0.5408529043197632, |
| "step": 291, |
| "token_acc": 0.8273028089404346 |
| }, |
| { |
| "epoch": 0.9373996789727127, |
| "grad_norm": 0.18186030967506026, |
| "learning_rate": 6.556793868140674e-06, |
| "loss": 0.4903157651424408, |
| "step": 292, |
| "token_acc": 0.8423202288740971 |
| }, |
| { |
| "epoch": 0.9406099518459069, |
| "grad_norm": 0.22464903635996314, |
| "learning_rate": 6.5532476370602e-06, |
| "loss": 0.5187174677848816, |
| "step": 293, |
| "token_acc": 0.8333880337392103 |
| }, |
| { |
| "epoch": 0.9438202247191011, |
| "grad_norm": 0.2003623985055459, |
| "learning_rate": 6.549688242104324e-06, |
| "loss": 0.4782308042049408, |
| "step": 294, |
| "token_acc": 0.8428174821070171 |
| }, |
| { |
| "epoch": 0.9470304975922953, |
| "grad_norm": 0.3648452937041888, |
| "learning_rate": 6.546115698619143e-06, |
| "loss": 0.5069173574447632, |
| "step": 295, |
| "token_acc": 0.8375849390096171 |
| }, |
| { |
| "epoch": 0.9502407704654896, |
| "grad_norm": 0.2668698493414911, |
| "learning_rate": 6.54253002200744e-06, |
| "loss": 0.5812174677848816, |
| "step": 296, |
| "token_acc": 0.8119673669275481 |
| }, |
| { |
| "epoch": 0.9534510433386838, |
| "grad_norm": 0.23425175593147715, |
| "learning_rate": 6.538931227728625e-06, |
| "loss": 0.5435384511947632, |
| "step": 297, |
| "token_acc": 0.823641333000032 |
| }, |
| { |
| "epoch": 0.956661316211878, |
| "grad_norm": 0.21316620580353451, |
| "learning_rate": 6.535319331298662e-06, |
| "loss": 0.5441080927848816, |
| "step": 298, |
| "token_acc": 0.8267760735277975 |
| }, |
| { |
| "epoch": 0.9598715890850722, |
| "grad_norm": 0.24898603165299715, |
| "learning_rate": 6.531694348290001e-06, |
| "loss": 0.5554606318473816, |
| "step": 299, |
| "token_acc": 0.8209872341754991 |
| }, |
| { |
| "epoch": 0.9630818619582665, |
| "grad_norm": 0.2554315265827971, |
| "learning_rate": 6.528056294331519e-06, |
| "loss": 0.4842122495174408, |
| "step": 300, |
| "token_acc": 0.8440150411488848 |
| }, |
| { |
| "epoch": 0.9662921348314607, |
| "grad_norm": 0.22974814057192716, |
| "learning_rate": 6.524405185108444e-06, |
| "loss": 0.5605875849723816, |
| "step": 301, |
| "token_acc": 0.8200306844214243 |
| }, |
| { |
| "epoch": 0.9695024077046549, |
| "grad_norm": 0.2596599946392832, |
| "learning_rate": 6.520741036362294e-06, |
| "loss": 0.5032145380973816, |
| "step": 302, |
| "token_acc": 0.8374895144901848 |
| }, |
| { |
| "epoch": 0.9727126805778491, |
| "grad_norm": 0.2402742203312852, |
| "learning_rate": 6.517063863890802e-06, |
| "loss": 0.5245768427848816, |
| "step": 303, |
| "token_acc": 0.8304467561823956 |
| }, |
| { |
| "epoch": 0.9759229534510433, |
| "grad_norm": 0.23484754575093275, |
| "learning_rate": 6.513373683547856e-06, |
| "loss": 0.5390218496322632, |
| "step": 304, |
| "token_acc": 0.8251677096206059 |
| }, |
| { |
| "epoch": 0.9791332263242376, |
| "grad_norm": 0.22608927358046563, |
| "learning_rate": 6.509670511243424e-06, |
| "loss": 0.4981282651424408, |
| "step": 305, |
| "token_acc": 0.8377231819118646 |
| }, |
| { |
| "epoch": 0.9823434991974318, |
| "grad_norm": 0.19516925536917554, |
| "learning_rate": 6.505954362943486e-06, |
| "loss": 0.4888509213924408, |
| "step": 306, |
| "token_acc": 0.8426081587359 |
| }, |
| { |
| "epoch": 0.985553772070626, |
| "grad_norm": 0.2890322962206889, |
| "learning_rate": 6.502225254669973e-06, |
| "loss": 0.5541178584098816, |
| "step": 307, |
| "token_acc": 0.8245721970122185 |
| }, |
| { |
| "epoch": 0.9887640449438202, |
| "grad_norm": 0.2439684361938674, |
| "learning_rate": 6.498483202500689e-06, |
| "loss": 0.5735677480697632, |
| "step": 308, |
| "token_acc": 0.815568343528531 |
| }, |
| { |
| "epoch": 0.9919743178170144, |
| "grad_norm": 0.20662452726270084, |
| "learning_rate": 6.4947282225692425e-06, |
| "loss": 0.5161539912223816, |
| "step": 309, |
| "token_acc": 0.8335114537040171 |
| }, |
| { |
| "epoch": 0.9951845906902087, |
| "grad_norm": 0.30566444126626124, |
| "learning_rate": 6.490960331064983e-06, |
| "loss": 0.5284830927848816, |
| "step": 310, |
| "token_acc": 0.8267753724083531 |
| }, |
| { |
| "epoch": 0.9983948635634029, |
| "grad_norm": 0.20539089555886036, |
| "learning_rate": 6.487179544232924e-06, |
| "loss": 0.5338541865348816, |
| "step": 311, |
| "token_acc": 0.8280595721254926 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.26945301378123576, |
| "learning_rate": 6.48338587837368e-06, |
| "loss": 0.53369140625, |
| "step": 312, |
| "token_acc": 0.8281918169384378 |
| }, |
| { |
| "epoch": 1.0032102728731942, |
| "grad_norm": 0.2228766062119388, |
| "learning_rate": 6.47957934984339e-06, |
| "loss": 0.5152994990348816, |
| "step": 313, |
| "token_acc": 0.8322323489910715 |
| }, |
| { |
| "epoch": 1.0064205457463884, |
| "grad_norm": 0.2202353695498369, |
| "learning_rate": 6.4757599750536495e-06, |
| "loss": 0.5165609121322632, |
| "step": 314, |
| "token_acc": 0.8306170672021838 |
| }, |
| { |
| "epoch": 1.0096308186195826, |
| "grad_norm": 0.21944860025769794, |
| "learning_rate": 6.471927770471441e-06, |
| "loss": 0.506591796875, |
| "step": 315, |
| "token_acc": 0.8360700243853959 |
| }, |
| { |
| "epoch": 1.0128410914927768, |
| "grad_norm": 0.19852273162473266, |
| "learning_rate": 6.468082752619062e-06, |
| "loss": 0.4534912109375, |
| "step": 316, |
| "token_acc": 0.8498115059824072 |
| }, |
| { |
| "epoch": 1.0160513643659712, |
| "grad_norm": 0.1950395060592887, |
| "learning_rate": 6.464224938074051e-06, |
| "loss": 0.462646484375, |
| "step": 317, |
| "token_acc": 0.849263431737473 |
| }, |
| { |
| "epoch": 1.0192616372391654, |
| "grad_norm": 0.22619155870689964, |
| "learning_rate": 6.460354343469121e-06, |
| "loss": 0.5100911855697632, |
| "step": 318, |
| "token_acc": 0.8333788909396087 |
| }, |
| { |
| "epoch": 1.0224719101123596, |
| "grad_norm": 0.1966958888107114, |
| "learning_rate": 6.456470985492086e-06, |
| "loss": 0.4920247495174408, |
| "step": 319, |
| "token_acc": 0.8367696940757496 |
| }, |
| { |
| "epoch": 1.0256821829855538, |
| "grad_norm": 0.19779145399479958, |
| "learning_rate": 6.452574880885788e-06, |
| "loss": 0.520263671875, |
| "step": 320, |
| "token_acc": 0.8299368954007383 |
| }, |
| { |
| "epoch": 1.028892455858748, |
| "grad_norm": 0.21033363002221026, |
| "learning_rate": 6.4486660464480225e-06, |
| "loss": 0.50634765625, |
| "step": 321, |
| "token_acc": 0.8340756326797889 |
| }, |
| { |
| "epoch": 1.0321027287319422, |
| "grad_norm": 0.18963872381058564, |
| "learning_rate": 6.4447444990314716e-06, |
| "loss": 0.4986572265625, |
| "step": 322, |
| "token_acc": 0.8362821544068159 |
| }, |
| { |
| "epoch": 1.0353130016051364, |
| "grad_norm": 0.2209164173614081, |
| "learning_rate": 6.4408102555436264e-06, |
| "loss": 0.4700114130973816, |
| "step": 323, |
| "token_acc": 0.8459083202661721 |
| }, |
| { |
| "epoch": 1.0385232744783306, |
| "grad_norm": 0.33229018679196365, |
| "learning_rate": 6.436863332946721e-06, |
| "loss": 0.5262451171875, |
| "step": 324, |
| "token_acc": 0.8283396932868827 |
| }, |
| { |
| "epoch": 1.0417335473515248, |
| "grad_norm": 0.20218430124439193, |
| "learning_rate": 6.432903748257647e-06, |
| "loss": 0.5170491933822632, |
| "step": 325, |
| "token_acc": 0.8299572023876562 |
| }, |
| { |
| "epoch": 1.0449438202247192, |
| "grad_norm": 0.22146372328593175, |
| "learning_rate": 6.428931518547896e-06, |
| "loss": 0.5284830927848816, |
| "step": 326, |
| "token_acc": 0.828352607066829 |
| }, |
| { |
| "epoch": 1.0481540930979134, |
| "grad_norm": 0.22608426879553686, |
| "learning_rate": 6.424946660943472e-06, |
| "loss": 0.4761556088924408, |
| "step": 327, |
| "token_acc": 0.8440760758481691 |
| }, |
| { |
| "epoch": 1.0513643659711076, |
| "grad_norm": 0.22334666046146132, |
| "learning_rate": 6.420949192624826e-06, |
| "loss": 0.5010172724723816, |
| "step": 328, |
| "token_acc": 0.8354667860206632 |
| }, |
| { |
| "epoch": 1.0545746388443018, |
| "grad_norm": 0.21680735707121704, |
| "learning_rate": 6.416939130826778e-06, |
| "loss": 0.473876953125, |
| "step": 329, |
| "token_acc": 0.844303615954024 |
| }, |
| { |
| "epoch": 1.057784911717496, |
| "grad_norm": 0.24793959036723234, |
| "learning_rate": 6.412916492838444e-06, |
| "loss": 0.5398763418197632, |
| "step": 330, |
| "token_acc": 0.824620045732701 |
| }, |
| { |
| "epoch": 1.0609951845906902, |
| "grad_norm": 0.21297159564652285, |
| "learning_rate": 6.4088812960031625e-06, |
| "loss": 0.5343017578125, |
| "step": 331, |
| "token_acc": 0.8273545743431096 |
| }, |
| { |
| "epoch": 1.0642054574638844, |
| "grad_norm": 0.20927854637993462, |
| "learning_rate": 6.404833557718418e-06, |
| "loss": 0.4844157099723816, |
| "step": 332, |
| "token_acc": 0.8403367191581549 |
| }, |
| { |
| "epoch": 1.0674157303370786, |
| "grad_norm": 0.22252358860857668, |
| "learning_rate": 6.400773295435766e-06, |
| "loss": 0.5533854365348816, |
| "step": 333, |
| "token_acc": 0.8207041890009662 |
| }, |
| { |
| "epoch": 1.0706260032102728, |
| "grad_norm": 0.19982580110995563, |
| "learning_rate": 6.396700526660759e-06, |
| "loss": 0.4607340693473816, |
| "step": 334, |
| "token_acc": 0.8489513299287037 |
| }, |
| { |
| "epoch": 1.0738362760834672, |
| "grad_norm": 0.2175771128151161, |
| "learning_rate": 6.392615268952871e-06, |
| "loss": 0.494140625, |
| "step": 335, |
| "token_acc": 0.8397883541365987 |
| }, |
| { |
| "epoch": 1.0770465489566614, |
| "grad_norm": 0.24194777122457137, |
| "learning_rate": 6.388517539925422e-06, |
| "loss": 0.5191243886947632, |
| "step": 336, |
| "token_acc": 0.8303176430366812 |
| }, |
| { |
| "epoch": 1.0802568218298556, |
| "grad_norm": 0.21096680669019466, |
| "learning_rate": 6.384407357245495e-06, |
| "loss": 0.5267741084098816, |
| "step": 337, |
| "token_acc": 0.8252223007261563 |
| }, |
| { |
| "epoch": 1.0834670947030498, |
| "grad_norm": 0.23444580996675599, |
| "learning_rate": 6.380284738633876e-06, |
| "loss": 0.5298665761947632, |
| "step": 338, |
| "token_acc": 0.8276450107105229 |
| }, |
| { |
| "epoch": 1.086677367576244, |
| "grad_norm": 0.22588709190384268, |
| "learning_rate": 6.376149701864961e-06, |
| "loss": 0.4950765073299408, |
| "step": 339, |
| "token_acc": 0.8365316647599914 |
| }, |
| { |
| "epoch": 1.0898876404494382, |
| "grad_norm": 0.25313796690731927, |
| "learning_rate": 6.372002264766688e-06, |
| "loss": 0.5378011465072632, |
| "step": 340, |
| "token_acc": 0.8255440705674001 |
| }, |
| { |
| "epoch": 1.0930979133226324, |
| "grad_norm": 0.21913878331030762, |
| "learning_rate": 6.367842445220458e-06, |
| "loss": 0.537109375, |
| "step": 341, |
| "token_acc": 0.8250007981544305 |
| }, |
| { |
| "epoch": 1.0963081861958266, |
| "grad_norm": 0.22448672487090238, |
| "learning_rate": 6.363670261161057e-06, |
| "loss": 0.4685872495174408, |
| "step": 342, |
| "token_acc": 0.8437887567778065 |
| }, |
| { |
| "epoch": 1.0995184590690208, |
| "grad_norm": 0.19124533755087705, |
| "learning_rate": 6.359485730576581e-06, |
| "loss": 0.4889729917049408, |
| "step": 343, |
| "token_acc": 0.840408457946595 |
| }, |
| { |
| "epoch": 1.102728731942215, |
| "grad_norm": 0.2168277664403285, |
| "learning_rate": 6.355288871508358e-06, |
| "loss": 0.5118408203125, |
| "step": 344, |
| "token_acc": 0.8335477930980931 |
| }, |
| { |
| "epoch": 1.1059390048154094, |
| "grad_norm": 0.22068343255790987, |
| "learning_rate": 6.351079702050868e-06, |
| "loss": 0.5257161855697632, |
| "step": 345, |
| "token_acc": 0.8293478332683655 |
| }, |
| { |
| "epoch": 1.1091492776886036, |
| "grad_norm": 0.17736870716031858, |
| "learning_rate": 6.346858240351667e-06, |
| "loss": 0.4034017026424408, |
| "step": 346, |
| "token_acc": 0.8668660975357989 |
| }, |
| { |
| "epoch": 1.1123595505617978, |
| "grad_norm": 0.22649924176834732, |
| "learning_rate": 6.342624504611308e-06, |
| "loss": 0.4998779296875, |
| "step": 347, |
| "token_acc": 0.8358005764012703 |
| }, |
| { |
| "epoch": 1.115569823434992, |
| "grad_norm": 0.21562170580193807, |
| "learning_rate": 6.338378513083264e-06, |
| "loss": 0.5145670771598816, |
| "step": 348, |
| "token_acc": 0.8321265570415991 |
| }, |
| { |
| "epoch": 1.1187800963081862, |
| "grad_norm": 0.2002318871361869, |
| "learning_rate": 6.334120284073845e-06, |
| "loss": 0.5229899287223816, |
| "step": 349, |
| "token_acc": 0.8315942248719818 |
| }, |
| { |
| "epoch": 1.1219903691813804, |
| "grad_norm": 0.23677754906933055, |
| "learning_rate": 6.329849835942125e-06, |
| "loss": 0.5107828974723816, |
| "step": 350, |
| "token_acc": 0.8333928908270134 |
| }, |
| { |
| "epoch": 1.1252006420545746, |
| "grad_norm": 0.25803905866296484, |
| "learning_rate": 6.325567187099859e-06, |
| "loss": 0.5131022334098816, |
| "step": 351, |
| "token_acc": 0.8336294261255828 |
| }, |
| { |
| "epoch": 1.1284109149277688, |
| "grad_norm": 0.29486525496945437, |
| "learning_rate": 6.321272356011404e-06, |
| "loss": 0.4559326171875, |
| "step": 352, |
| "token_acc": 0.8493671388941911 |
| }, |
| { |
| "epoch": 1.131621187800963, |
| "grad_norm": 0.20294298663958116, |
| "learning_rate": 6.31696536119364e-06, |
| "loss": 0.51611328125, |
| "step": 353, |
| "token_acc": 0.8324970433783843 |
| }, |
| { |
| "epoch": 1.1348314606741572, |
| "grad_norm": 0.21077888101344128, |
| "learning_rate": 6.312646221215891e-06, |
| "loss": 0.5026448965072632, |
| "step": 354, |
| "token_acc": 0.8361661983842797 |
| }, |
| { |
| "epoch": 1.1380417335473516, |
| "grad_norm": 0.22827729326842286, |
| "learning_rate": 6.308314954699845e-06, |
| "loss": 0.5194091796875, |
| "step": 355, |
| "token_acc": 0.8296480882851123 |
| }, |
| { |
| "epoch": 1.1412520064205458, |
| "grad_norm": 0.2092342197897483, |
| "learning_rate": 6.303971580319469e-06, |
| "loss": 0.5154622793197632, |
| "step": 356, |
| "token_acc": 0.8319102496742222 |
| }, |
| { |
| "epoch": 1.14446227929374, |
| "grad_norm": 0.19947979635092572, |
| "learning_rate": 6.299616116800936e-06, |
| "loss": 0.4569498896598816, |
| "step": 357, |
| "token_acc": 0.848671552549395 |
| }, |
| { |
| "epoch": 1.1476725521669342, |
| "grad_norm": 0.23453848382593498, |
| "learning_rate": 6.295248582922538e-06, |
| "loss": 0.4909261167049408, |
| "step": 358, |
| "token_acc": 0.8384680608615377 |
| }, |
| { |
| "epoch": 1.1508828250401284, |
| "grad_norm": 0.954323915842282, |
| "learning_rate": 6.290868997514609e-06, |
| "loss": 0.5150553584098816, |
| "step": 359, |
| "token_acc": 0.8319439384775722 |
| }, |
| { |
| "epoch": 1.1540930979133226, |
| "grad_norm": 0.23039156354638235, |
| "learning_rate": 6.2864773794594435e-06, |
| "loss": 0.5189616084098816, |
| "step": 360, |
| "token_acc": 0.8291666480554228 |
| }, |
| { |
| "epoch": 1.1573033707865168, |
| "grad_norm": 0.2108602621989691, |
| "learning_rate": 6.28207374769121e-06, |
| "loss": 0.48828125, |
| "step": 361, |
| "token_acc": 0.8417036259188337 |
| }, |
| { |
| "epoch": 1.160513643659711, |
| "grad_norm": 0.2274478694052877, |
| "learning_rate": 6.277658121195879e-06, |
| "loss": 0.5316569209098816, |
| "step": 362, |
| "token_acc": 0.8250233183762744 |
| }, |
| { |
| "epoch": 1.1637239165329052, |
| "grad_norm": 0.19314602070313494, |
| "learning_rate": 6.273230519011129e-06, |
| "loss": 0.5065511465072632, |
| "step": 363, |
| "token_acc": 0.8337296713987661 |
| }, |
| { |
| "epoch": 1.1669341894060996, |
| "grad_norm": 0.19156656647439638, |
| "learning_rate": 6.2687909602262775e-06, |
| "loss": 0.4707845151424408, |
| "step": 364, |
| "token_acc": 0.8435817672766602 |
| }, |
| { |
| "epoch": 1.1701444622792938, |
| "grad_norm": 0.1983194701465737, |
| "learning_rate": 6.2643394639821855e-06, |
| "loss": 0.4794921875, |
| "step": 365, |
| "token_acc": 0.8451988430219622 |
| }, |
| { |
| "epoch": 1.173354735152488, |
| "grad_norm": 0.2140867347077317, |
| "learning_rate": 6.2598760494711865e-06, |
| "loss": 0.5105184316635132, |
| "step": 366, |
| "token_acc": 0.8330230414412061 |
| }, |
| { |
| "epoch": 1.1765650080256822, |
| "grad_norm": 0.21944917336007386, |
| "learning_rate": 6.255400735936998e-06, |
| "loss": 0.4921468198299408, |
| "step": 367, |
| "token_acc": 0.8382150994132302 |
| }, |
| { |
| "epoch": 1.1797752808988764, |
| "grad_norm": 0.23449231199354112, |
| "learning_rate": 6.250913542674637e-06, |
| "loss": 0.5512288808822632, |
| "step": 368, |
| "token_acc": 0.8213648698273107 |
| }, |
| { |
| "epoch": 1.1829855537720706, |
| "grad_norm": 0.2031330605981291, |
| "learning_rate": 6.246414489030342e-06, |
| "loss": 0.50830078125, |
| "step": 369, |
| "token_acc": 0.8335424810046952 |
| }, |
| { |
| "epoch": 1.1861958266452648, |
| "grad_norm": 0.23655728339205756, |
| "learning_rate": 6.241903594401484e-06, |
| "loss": 0.535888671875, |
| "step": 370, |
| "token_acc": 0.825756292557336 |
| }, |
| { |
| "epoch": 1.189406099518459, |
| "grad_norm": 2.107458585311116, |
| "learning_rate": 6.237380878236488e-06, |
| "loss": 0.481201171875, |
| "step": 371, |
| "token_acc": 0.8415841111341924 |
| }, |
| { |
| "epoch": 1.1926163723916532, |
| "grad_norm": 0.20710561438383962, |
| "learning_rate": 6.2328463600347465e-06, |
| "loss": 0.4458414912223816, |
| "step": 372, |
| "token_acc": 0.8520495554363945 |
| }, |
| { |
| "epoch": 1.1958266452648476, |
| "grad_norm": 0.20708739457868908, |
| "learning_rate": 6.228300059346533e-06, |
| "loss": 0.5088704824447632, |
| "step": 373, |
| "token_acc": 0.8342107871219757 |
| }, |
| { |
| "epoch": 1.1990369181380418, |
| "grad_norm": 0.20213338810098505, |
| "learning_rate": 6.223741995772923e-06, |
| "loss": 0.522705078125, |
| "step": 374, |
| "token_acc": 0.8303547302535117 |
| }, |
| { |
| "epoch": 1.202247191011236, |
| "grad_norm": 0.255102267938197, |
| "learning_rate": 6.219172188965709e-06, |
| "loss": 0.4881591796875, |
| "step": 375, |
| "token_acc": 0.8406651832875708 |
| }, |
| { |
| "epoch": 1.2054574638844302, |
| "grad_norm": 0.19269656731911167, |
| "learning_rate": 6.214590658627308e-06, |
| "loss": 0.4801228940486908, |
| "step": 376, |
| "token_acc": 0.8424494317329764 |
| }, |
| { |
| "epoch": 1.2086677367576244, |
| "grad_norm": 0.2078396922394465, |
| "learning_rate": 6.209997424510687e-06, |
| "loss": 0.4632975459098816, |
| "step": 377, |
| "token_acc": 0.8472165411231166 |
| }, |
| { |
| "epoch": 1.2118780096308186, |
| "grad_norm": 0.27323577905583074, |
| "learning_rate": 6.205392506419271e-06, |
| "loss": 0.4674479365348816, |
| "step": 378, |
| "token_acc": 0.847078453706124 |
| }, |
| { |
| "epoch": 1.2150882825040128, |
| "grad_norm": 0.2110978207882355, |
| "learning_rate": 6.2007759242068585e-06, |
| "loss": 0.4643961787223816, |
| "step": 379, |
| "token_acc": 0.8469605268875274 |
| }, |
| { |
| "epoch": 1.218298555377207, |
| "grad_norm": 0.22639346046683828, |
| "learning_rate": 6.196147697777541e-06, |
| "loss": 0.5506998896598816, |
| "step": 380, |
| "token_acc": 0.8201897189997169 |
| }, |
| { |
| "epoch": 1.2215088282504012, |
| "grad_norm": 0.2475511079473807, |
| "learning_rate": 6.191507847085608e-06, |
| "loss": 0.4781901240348816, |
| "step": 381, |
| "token_acc": 0.8447922066935496 |
| }, |
| { |
| "epoch": 1.2247191011235956, |
| "grad_norm": 0.24040115062575793, |
| "learning_rate": 6.186856392135472e-06, |
| "loss": 0.5130208730697632, |
| "step": 382, |
| "token_acc": 0.8315499466295303 |
| }, |
| { |
| "epoch": 1.2279293739967898, |
| "grad_norm": 0.1815363847276098, |
| "learning_rate": 6.182193352981573e-06, |
| "loss": 0.4622802734375, |
| "step": 383, |
| "token_acc": 0.8477780786218334 |
| }, |
| { |
| "epoch": 1.231139646869984, |
| "grad_norm": 0.20482340368559337, |
| "learning_rate": 6.177518749728295e-06, |
| "loss": 0.5057780146598816, |
| "step": 384, |
| "token_acc": 0.8352884439770376 |
| }, |
| { |
| "epoch": 1.2343499197431782, |
| "grad_norm": 0.19762870907244168, |
| "learning_rate": 6.172832602529881e-06, |
| "loss": 0.49853515625, |
| "step": 385, |
| "token_acc": 0.8363799989361367 |
| }, |
| { |
| "epoch": 1.2375601926163724, |
| "grad_norm": 0.2227216191441955, |
| "learning_rate": 6.168134931590346e-06, |
| "loss": 0.5113932490348816, |
| "step": 386, |
| "token_acc": 0.8322664894148927 |
| }, |
| { |
| "epoch": 1.2407704654895666, |
| "grad_norm": 0.20112034748752594, |
| "learning_rate": 6.163425757163387e-06, |
| "loss": 0.4695638120174408, |
| "step": 387, |
| "token_acc": 0.8444868529687521 |
| }, |
| { |
| "epoch": 1.2439807383627608, |
| "grad_norm": 0.22492795778104924, |
| "learning_rate": 6.158705099552299e-06, |
| "loss": 0.466064453125, |
| "step": 388, |
| "token_acc": 0.848569682910181 |
| }, |
| { |
| "epoch": 1.247191011235955, |
| "grad_norm": 0.18710929793776945, |
| "learning_rate": 6.153972979109884e-06, |
| "loss": 0.4784342646598816, |
| "step": 389, |
| "token_acc": 0.842638103523271 |
| }, |
| { |
| "epoch": 1.2504012841091492, |
| "grad_norm": 0.19948237667815355, |
| "learning_rate": 6.149229416238368e-06, |
| "loss": 0.4717610776424408, |
| "step": 390, |
| "token_acc": 0.8457017327855467 |
| }, |
| { |
| "epoch": 1.2536115569823436, |
| "grad_norm": 0.24721986040967323, |
| "learning_rate": 6.144474431389309e-06, |
| "loss": 0.5409342646598816, |
| "step": 391, |
| "token_acc": 0.8249820153645141 |
| }, |
| { |
| "epoch": 1.2568218298555376, |
| "grad_norm": 0.20262551772341117, |
| "learning_rate": 6.139708045063508e-06, |
| "loss": 0.537353515625, |
| "step": 392, |
| "token_acc": 0.8233082420552443 |
| }, |
| { |
| "epoch": 1.260032102728732, |
| "grad_norm": 0.20651343606655426, |
| "learning_rate": 6.134930277810927e-06, |
| "loss": 0.5173746943473816, |
| "step": 393, |
| "token_acc": 0.8314509609945887 |
| }, |
| { |
| "epoch": 1.2632423756019262, |
| "grad_norm": 0.21318989113773756, |
| "learning_rate": 6.1301411502305915e-06, |
| "loss": 0.4995931088924408, |
| "step": 394, |
| "token_acc": 0.836391847497132 |
| }, |
| { |
| "epoch": 1.2664526484751204, |
| "grad_norm": 0.19507151642627543, |
| "learning_rate": 6.1253406829705105e-06, |
| "loss": 0.4830729365348816, |
| "step": 395, |
| "token_acc": 0.8431513934296263 |
| }, |
| { |
| "epoch": 1.2696629213483146, |
| "grad_norm": 0.21904835008775547, |
| "learning_rate": 6.12052889672758e-06, |
| "loss": 0.5050048828125, |
| "step": 396, |
| "token_acc": 0.8356795815804924 |
| }, |
| { |
| "epoch": 1.2728731942215088, |
| "grad_norm": 0.2014526276910395, |
| "learning_rate": 6.115705812247499e-06, |
| "loss": 0.5210775136947632, |
| "step": 397, |
| "token_acc": 0.8303255017672488 |
| }, |
| { |
| "epoch": 1.276083467094703, |
| "grad_norm": 0.2212609849779949, |
| "learning_rate": 6.110871450324678e-06, |
| "loss": 0.4835612177848816, |
| "step": 398, |
| "token_acc": 0.8395820907570261 |
| }, |
| { |
| "epoch": 1.2792937399678972, |
| "grad_norm": 0.21543834649039986, |
| "learning_rate": 6.106025831802148e-06, |
| "loss": 0.4977620542049408, |
| "step": 399, |
| "token_acc": 0.8376060380030089 |
| }, |
| { |
| "epoch": 1.2825040128410916, |
| "grad_norm": 0.2240179499546737, |
| "learning_rate": 6.101168977571472e-06, |
| "loss": 0.5069173574447632, |
| "step": 400, |
| "token_acc": 0.8339093844348606 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.1957158009734539, |
| "learning_rate": 6.096300908572658e-06, |
| "loss": 0.515380859375, |
| "step": 401, |
| "token_acc": 0.8308960711150132 |
| }, |
| { |
| "epoch": 1.28892455858748, |
| "grad_norm": 0.18337021533975237, |
| "learning_rate": 6.091421645794063e-06, |
| "loss": 0.4827880859375, |
| "step": 402, |
| "token_acc": 0.8408876549431548 |
| }, |
| { |
| "epoch": 1.2921348314606742, |
| "grad_norm": 0.201454794503546, |
| "learning_rate": 6.086531210272307e-06, |
| "loss": 0.48809814453125, |
| "step": 403, |
| "token_acc": 0.8390145324144904 |
| }, |
| { |
| "epoch": 1.2953451043338684, |
| "grad_norm": 0.21590878833530364, |
| "learning_rate": 6.0816296230921774e-06, |
| "loss": 0.5361328125, |
| "step": 404, |
| "token_acc": 0.8257730768061761 |
| }, |
| { |
| "epoch": 1.2985553772070626, |
| "grad_norm": 0.18895177189761647, |
| "learning_rate": 6.076716905386546e-06, |
| "loss": 0.4814453125, |
| "step": 405, |
| "token_acc": 0.8436404513000572 |
| }, |
| { |
| "epoch": 1.3017656500802568, |
| "grad_norm": 0.19402420650392968, |
| "learning_rate": 6.071793078336268e-06, |
| "loss": 0.4874267578125, |
| "step": 406, |
| "token_acc": 0.8412702912442461 |
| }, |
| { |
| "epoch": 1.304975922953451, |
| "grad_norm": 0.19851857071944187, |
| "learning_rate": 6.0668581631701e-06, |
| "loss": 0.51611328125, |
| "step": 407, |
| "token_acc": 0.8293106083450718 |
| }, |
| { |
| "epoch": 1.3081861958266452, |
| "grad_norm": 0.18981501741141846, |
| "learning_rate": 6.061912181164602e-06, |
| "loss": 0.4933675229549408, |
| "step": 408, |
| "token_acc": 0.8380443881201454 |
| }, |
| { |
| "epoch": 1.3113964686998396, |
| "grad_norm": 0.26280017805298755, |
| "learning_rate": 6.056955153644048e-06, |
| "loss": 0.5085042715072632, |
| "step": 409, |
| "token_acc": 0.8320101659806012 |
| }, |
| { |
| "epoch": 1.3146067415730336, |
| "grad_norm": 0.20834143665165283, |
| "learning_rate": 6.051987101980336e-06, |
| "loss": 0.4640299677848816, |
| "step": 410, |
| "token_acc": 0.8469984896533973 |
| }, |
| { |
| "epoch": 1.317817014446228, |
| "grad_norm": 0.20377462902343046, |
| "learning_rate": 6.047008047592892e-06, |
| "loss": 0.4606119990348816, |
| "step": 411, |
| "token_acc": 0.8498952878946993 |
| }, |
| { |
| "epoch": 1.3210272873194222, |
| "grad_norm": 0.2256058408114048, |
| "learning_rate": 6.042018011948578e-06, |
| "loss": 0.5301920771598816, |
| "step": 412, |
| "token_acc": 0.8273486921025606 |
| }, |
| { |
| "epoch": 1.3242375601926164, |
| "grad_norm": 0.2039682659306468, |
| "learning_rate": 6.0370170165616056e-06, |
| "loss": 0.4898274838924408, |
| "step": 413, |
| "token_acc": 0.8401188260484118 |
| }, |
| { |
| "epoch": 1.3274478330658106, |
| "grad_norm": 0.19661549991262828, |
| "learning_rate": 6.0320050829934346e-06, |
| "loss": 0.501953125, |
| "step": 414, |
| "token_acc": 0.8351441080350566 |
| }, |
| { |
| "epoch": 1.3306581059390048, |
| "grad_norm": 0.2009643860247893, |
| "learning_rate": 6.026982232852684e-06, |
| "loss": 0.5013834834098816, |
| "step": 415, |
| "token_acc": 0.8359275770809191 |
| }, |
| { |
| "epoch": 1.333868378812199, |
| "grad_norm": 0.21808790105770282, |
| "learning_rate": 6.021948487795043e-06, |
| "loss": 0.4978841245174408, |
| "step": 416, |
| "token_acc": 0.8399703730053248 |
| }, |
| { |
| "epoch": 1.3370786516853932, |
| "grad_norm": 0.20526463177341095, |
| "learning_rate": 6.016903869523169e-06, |
| "loss": 0.5048828125, |
| "step": 417, |
| "token_acc": 0.834811573183773 |
| }, |
| { |
| "epoch": 1.3402889245585876, |
| "grad_norm": 0.21299152332644122, |
| "learning_rate": 6.011848399786601e-06, |
| "loss": 0.49658203125, |
| "step": 418, |
| "token_acc": 0.8379083588445921 |
| }, |
| { |
| "epoch": 1.3434991974317816, |
| "grad_norm": 0.24218971921492033, |
| "learning_rate": 6.0067821003816626e-06, |
| "loss": 0.5063883662223816, |
| "step": 419, |
| "token_acc": 0.8342907607753908 |
| }, |
| { |
| "epoch": 1.346709470304976, |
| "grad_norm": 0.23838925650407486, |
| "learning_rate": 6.0017049931513685e-06, |
| "loss": 0.4720052182674408, |
| "step": 420, |
| "token_acc": 0.8453532915568743 |
| }, |
| { |
| "epoch": 1.3499197431781702, |
| "grad_norm": 0.18940147678462618, |
| "learning_rate": 5.996617099985331e-06, |
| "loss": 0.4628499448299408, |
| "step": 421, |
| "token_acc": 0.8485520945983281 |
| }, |
| { |
| "epoch": 1.3531300160513644, |
| "grad_norm": 0.21754239145548293, |
| "learning_rate": 5.991518442819664e-06, |
| "loss": 0.503662109375, |
| "step": 422, |
| "token_acc": 0.8317651653677501 |
| }, |
| { |
| "epoch": 1.3563402889245586, |
| "grad_norm": 0.20677671370390927, |
| "learning_rate": 5.986409043636894e-06, |
| "loss": 0.4958903193473816, |
| "step": 423, |
| "token_acc": 0.8347628821195424 |
| }, |
| { |
| "epoch": 1.3595505617977528, |
| "grad_norm": 0.21125756075422883, |
| "learning_rate": 5.981288924465855e-06, |
| "loss": 0.4697265625, |
| "step": 424, |
| "token_acc": 0.8464578221460343 |
| }, |
| { |
| "epoch": 1.362760834670947, |
| "grad_norm": 0.1845227895272064, |
| "learning_rate": 5.9761581073816055e-06, |
| "loss": 0.4358724057674408, |
| "step": 425, |
| "token_acc": 0.856153648909988 |
| }, |
| { |
| "epoch": 1.3659711075441412, |
| "grad_norm": 0.22226226474729194, |
| "learning_rate": 5.971016614505321e-06, |
| "loss": 0.48388671875, |
| "step": 426, |
| "token_acc": 0.841594074255456 |
| }, |
| { |
| "epoch": 1.3691813804173354, |
| "grad_norm": 0.3627275381953577, |
| "learning_rate": 5.965864468004209e-06, |
| "loss": 0.5054525136947632, |
| "step": 427, |
| "token_acc": 0.8334647244765889 |
| }, |
| { |
| "epoch": 1.3723916532905296, |
| "grad_norm": 0.2446819517168817, |
| "learning_rate": 5.96070169009141e-06, |
| "loss": 0.5396932363510132, |
| "step": 428, |
| "token_acc": 0.8251210462343914 |
| }, |
| { |
| "epoch": 1.375601926163724, |
| "grad_norm": 0.18480486993834852, |
| "learning_rate": 5.955528303025899e-06, |
| "loss": 0.492919921875, |
| "step": 429, |
| "token_acc": 0.838200488309484 |
| }, |
| { |
| "epoch": 1.3788121990369182, |
| "grad_norm": 0.1996582980430958, |
| "learning_rate": 5.950344329112392e-06, |
| "loss": 0.4872233271598816, |
| "step": 430, |
| "token_acc": 0.8402870290077712 |
| }, |
| { |
| "epoch": 1.3820224719101124, |
| "grad_norm": 0.19915530342516288, |
| "learning_rate": 5.94514979070125e-06, |
| "loss": 0.4737955927848816, |
| "step": 431, |
| "token_acc": 0.8434756493995772 |
| }, |
| { |
| "epoch": 1.3852327447833066, |
| "grad_norm": 0.21306869814823742, |
| "learning_rate": 5.939944710188383e-06, |
| "loss": 0.4843343198299408, |
| "step": 432, |
| "token_acc": 0.840401274727809 |
| }, |
| { |
| "epoch": 1.3884430176565008, |
| "grad_norm": 0.21153154196928545, |
| "learning_rate": 5.934729110015151e-06, |
| "loss": 0.4997355341911316, |
| "step": 433, |
| "token_acc": 0.8352082065345394 |
| }, |
| { |
| "epoch": 1.391653290529695, |
| "grad_norm": 0.1948748437790448, |
| "learning_rate": 5.929503012668269e-06, |
| "loss": 0.4604085385799408, |
| "step": 434, |
| "token_acc": 0.8470840873900802 |
| }, |
| { |
| "epoch": 1.3948635634028892, |
| "grad_norm": 0.19686780321241873, |
| "learning_rate": 5.924266440679711e-06, |
| "loss": 0.4471028745174408, |
| "step": 435, |
| "token_acc": 0.8513346822951796 |
| }, |
| { |
| "epoch": 1.3980738362760834, |
| "grad_norm": 0.23488653128023249, |
| "learning_rate": 5.919019416626611e-06, |
| "loss": 0.4792887568473816, |
| "step": 436, |
| "token_acc": 0.8414461663137132 |
| }, |
| { |
| "epoch": 1.4012841091492776, |
| "grad_norm": 0.21251232363695088, |
| "learning_rate": 5.913761963131167e-06, |
| "loss": 0.5232747793197632, |
| "step": 437, |
| "token_acc": 0.8290875849854311 |
| }, |
| { |
| "epoch": 1.404494382022472, |
| "grad_norm": 0.22499625939133328, |
| "learning_rate": 5.908494102860541e-06, |
| "loss": 0.5137939453125, |
| "step": 438, |
| "token_acc": 0.8317246352356477 |
| }, |
| { |
| "epoch": 1.4077046548956662, |
| "grad_norm": 0.1950334456413267, |
| "learning_rate": 5.903215858526765e-06, |
| "loss": 0.5267741084098816, |
| "step": 439, |
| "token_acc": 0.8279046746612088 |
| }, |
| { |
| "epoch": 1.4109149277688604, |
| "grad_norm": 0.22516438572620065, |
| "learning_rate": 5.89792725288664e-06, |
| "loss": 0.4964192807674408, |
| "step": 440, |
| "token_acc": 0.8369873037845605 |
| }, |
| { |
| "epoch": 1.4141252006420546, |
| "grad_norm": 0.24835908416757446, |
| "learning_rate": 5.892628308741642e-06, |
| "loss": 0.515869140625, |
| "step": 441, |
| "token_acc": 0.828441323656562 |
| }, |
| { |
| "epoch": 1.4173354735152488, |
| "grad_norm": 0.20400606035164545, |
| "learning_rate": 5.8873190489378146e-06, |
| "loss": 0.5088704824447632, |
| "step": 442, |
| "token_acc": 0.8347255754640608 |
| }, |
| { |
| "epoch": 1.420545746388443, |
| "grad_norm": 0.1986845262037535, |
| "learning_rate": 5.881999496365684e-06, |
| "loss": 0.4674479365348816, |
| "step": 443, |
| "token_acc": 0.846090615198355 |
| }, |
| { |
| "epoch": 1.4237560192616372, |
| "grad_norm": 0.267957898844908, |
| "learning_rate": 5.876669673960148e-06, |
| "loss": 0.5269572138786316, |
| "step": 444, |
| "token_acc": 0.8272518904385817 |
| }, |
| { |
| "epoch": 1.4269662921348314, |
| "grad_norm": 0.1954436329565384, |
| "learning_rate": 5.871329604700384e-06, |
| "loss": 0.5074869990348816, |
| "step": 445, |
| "token_acc": 0.8332683584946571 |
| }, |
| { |
| "epoch": 1.4301765650080256, |
| "grad_norm": 0.2367051809395509, |
| "learning_rate": 5.865979311609748e-06, |
| "loss": 0.53369140625, |
| "step": 446, |
| "token_acc": 0.8254907318262998 |
| }, |
| { |
| "epoch": 1.43338683788122, |
| "grad_norm": 0.20668963218170677, |
| "learning_rate": 5.860618817755674e-06, |
| "loss": 0.4967448115348816, |
| "step": 447, |
| "token_acc": 0.8361870607490132 |
| }, |
| { |
| "epoch": 1.4365971107544142, |
| "grad_norm": 0.19895590716270828, |
| "learning_rate": 5.8552481462495785e-06, |
| "loss": 0.5145670771598816, |
| "step": 448, |
| "token_acc": 0.8321965107826509 |
| }, |
| { |
| "epoch": 1.4398073836276084, |
| "grad_norm": 0.30832311716577054, |
| "learning_rate": 5.849867320246756e-06, |
| "loss": 0.5150553584098816, |
| "step": 449, |
| "token_acc": 0.8315047127468582 |
| }, |
| { |
| "epoch": 1.4430176565008026, |
| "grad_norm": 0.20139986527570522, |
| "learning_rate": 5.844476362946282e-06, |
| "loss": 0.5021159052848816, |
| "step": 450, |
| "token_acc": 0.8357744885476138 |
| }, |
| { |
| "epoch": 1.4462279293739968, |
| "grad_norm": 0.20570084102187688, |
| "learning_rate": 5.8390752975909116e-06, |
| "loss": 0.4951171875, |
| "step": 451, |
| "token_acc": 0.8379211979893351 |
| }, |
| { |
| "epoch": 1.449438202247191, |
| "grad_norm": 0.35580604726544524, |
| "learning_rate": 5.833664147466983e-06, |
| "loss": 0.4720458984375, |
| "step": 452, |
| "token_acc": 0.8447361847752629 |
| }, |
| { |
| "epoch": 1.4526484751203852, |
| "grad_norm": 0.21944795964910477, |
| "learning_rate": 5.828242935904313e-06, |
| "loss": 0.51861572265625, |
| "step": 453, |
| "token_acc": 0.8304687110031447 |
| }, |
| { |
| "epoch": 1.4558587479935794, |
| "grad_norm": 0.216206526603621, |
| "learning_rate": 5.8228116862760936e-06, |
| "loss": 0.519775390625, |
| "step": 454, |
| "token_acc": 0.8273322676894389 |
| }, |
| { |
| "epoch": 1.4590690208667736, |
| "grad_norm": 0.23632585935268247, |
| "learning_rate": 5.8173704219988015e-06, |
| "loss": 0.518798828125, |
| "step": 455, |
| "token_acc": 0.8292118566043417 |
| }, |
| { |
| "epoch": 1.462279293739968, |
| "grad_norm": 0.2133500391661819, |
| "learning_rate": 5.811919166532087e-06, |
| "loss": 0.4931640625, |
| "step": 456, |
| "token_acc": 0.8363678333960607 |
| }, |
| { |
| "epoch": 1.465489566613162, |
| "grad_norm": 0.20470785515570017, |
| "learning_rate": 5.806457943378678e-06, |
| "loss": 0.4711100459098816, |
| "step": 457, |
| "token_acc": 0.8461975940270282 |
| }, |
| { |
| "epoch": 1.4686998394863564, |
| "grad_norm": 0.2195825179878936, |
| "learning_rate": 5.8009867760842776e-06, |
| "loss": 0.4962972104549408, |
| "step": 458, |
| "token_acc": 0.8377250322321346 |
| }, |
| { |
| "epoch": 1.4719101123595506, |
| "grad_norm": 0.2198457397287571, |
| "learning_rate": 5.795505688237461e-06, |
| "loss": 0.5032552480697632, |
| "step": 459, |
| "token_acc": 0.8350467062766113 |
| }, |
| { |
| "epoch": 1.4751203852327448, |
| "grad_norm": 0.2337449787419216, |
| "learning_rate": 5.790014703469577e-06, |
| "loss": 0.5010172724723816, |
| "step": 460, |
| "token_acc": 0.8338096031025574 |
| }, |
| { |
| "epoch": 1.478330658105939, |
| "grad_norm": 0.21231226916932114, |
| "learning_rate": 5.7845138454546445e-06, |
| "loss": 0.5118001699447632, |
| "step": 461, |
| "token_acc": 0.8346163924673687 |
| }, |
| { |
| "epoch": 1.4815409309791332, |
| "grad_norm": 0.2078743299374229, |
| "learning_rate": 5.779003137909246e-06, |
| "loss": 0.5108846426010132, |
| "step": 462, |
| "token_acc": 0.8348133684777426 |
| }, |
| { |
| "epoch": 1.4847512038523274, |
| "grad_norm": 0.2592011427768495, |
| "learning_rate": 5.773482604592436e-06, |
| "loss": 0.5184326171875, |
| "step": 463, |
| "token_acc": 0.8289103308278782 |
| }, |
| { |
| "epoch": 1.4879614767255216, |
| "grad_norm": 0.20072440357092186, |
| "learning_rate": 5.767952269305628e-06, |
| "loss": 0.52734375, |
| "step": 464, |
| "token_acc": 0.8298738313921822 |
| }, |
| { |
| "epoch": 1.491171749598716, |
| "grad_norm": 0.22554287682014237, |
| "learning_rate": 5.762412155892497e-06, |
| "loss": 0.5120443105697632, |
| "step": 465, |
| "token_acc": 0.8331116711856353 |
| }, |
| { |
| "epoch": 1.49438202247191, |
| "grad_norm": 0.19988828001667414, |
| "learning_rate": 5.756862288238876e-06, |
| "loss": 0.5431722402572632, |
| "step": 466, |
| "token_acc": 0.8247288141532609 |
| }, |
| { |
| "epoch": 1.4975922953451044, |
| "grad_norm": 0.21785806141984132, |
| "learning_rate": 5.751302690272653e-06, |
| "loss": 0.4822591245174408, |
| "step": 467, |
| "token_acc": 0.8409143683909689 |
| }, |
| { |
| "epoch": 1.5008025682182986, |
| "grad_norm": 0.2504631057534585, |
| "learning_rate": 5.745733385963666e-06, |
| "loss": 0.5161947011947632, |
| "step": 468, |
| "token_acc": 0.8309906856663261 |
| }, |
| { |
| "epoch": 1.5040128410914928, |
| "grad_norm": 0.22185281387691191, |
| "learning_rate": 5.740154399323604e-06, |
| "loss": 0.5100911855697632, |
| "step": 469, |
| "token_acc": 0.8328053877676979 |
| }, |
| { |
| "epoch": 1.507223113964687, |
| "grad_norm": 0.20799497094441494, |
| "learning_rate": 5.7345657544058975e-06, |
| "loss": 0.5203857421875, |
| "step": 470, |
| "token_acc": 0.8293026919927851 |
| }, |
| { |
| "epoch": 1.5104333868378812, |
| "grad_norm": 0.2731861663055686, |
| "learning_rate": 5.728967475305622e-06, |
| "loss": 0.5336100459098816, |
| "step": 471, |
| "token_acc": 0.8260466631908238 |
| }, |
| { |
| "epoch": 1.5136436597110754, |
| "grad_norm": 0.17070530437224793, |
| "learning_rate": 5.723359586159385e-06, |
| "loss": 0.4242350459098816, |
| "step": 472, |
| "token_acc": 0.8605290565725756 |
| }, |
| { |
| "epoch": 1.5168539325842696, |
| "grad_norm": 0.2212458940736086, |
| "learning_rate": 5.717742111145232e-06, |
| "loss": 0.52850341796875, |
| "step": 473, |
| "token_acc": 0.829130877534756 |
| }, |
| { |
| "epoch": 1.520064205457464, |
| "grad_norm": 0.2414877669947761, |
| "learning_rate": 5.7121150744825345e-06, |
| "loss": 0.5250651240348816, |
| "step": 474, |
| "token_acc": 0.8277093675602236 |
| }, |
| { |
| "epoch": 1.523274478330658, |
| "grad_norm": 0.24570889317680952, |
| "learning_rate": 5.70647850043189e-06, |
| "loss": 0.5235189199447632, |
| "step": 475, |
| "token_acc": 0.827997714766816 |
| }, |
| { |
| "epoch": 1.5264847512038524, |
| "grad_norm": 0.2245178646870182, |
| "learning_rate": 5.700832413295014e-06, |
| "loss": 0.5039469599723816, |
| "step": 476, |
| "token_acc": 0.8342653694641449 |
| }, |
| { |
| "epoch": 1.5296950240770464, |
| "grad_norm": 0.18979828763233803, |
| "learning_rate": 5.695176837414639e-06, |
| "loss": 0.44140625, |
| "step": 477, |
| "token_acc": 0.8553784055133313 |
| }, |
| { |
| "epoch": 1.5329052969502408, |
| "grad_norm": 0.26777629112744844, |
| "learning_rate": 5.689511797174406e-06, |
| "loss": 0.4769287109375, |
| "step": 478, |
| "token_acc": 0.8443998026767429 |
| }, |
| { |
| "epoch": 1.536115569823435, |
| "grad_norm": 0.20772215310261655, |
| "learning_rate": 5.68383731699876e-06, |
| "loss": 0.4756266474723816, |
| "step": 479, |
| "token_acc": 0.8454203175667018 |
| }, |
| { |
| "epoch": 1.5393258426966292, |
| "grad_norm": 0.20996237183201139, |
| "learning_rate": 5.678153421352851e-06, |
| "loss": 0.498779296875, |
| "step": 480, |
| "token_acc": 0.8373191847204661 |
| }, |
| { |
| "epoch": 1.5425361155698234, |
| "grad_norm": 0.22226152308127473, |
| "learning_rate": 5.672460134742417e-06, |
| "loss": 0.5416259765625, |
| "step": 481, |
| "token_acc": 0.8240733038205491 |
| }, |
| { |
| "epoch": 1.5457463884430176, |
| "grad_norm": 0.22278318566681762, |
| "learning_rate": 5.666757481713687e-06, |
| "loss": 0.5304362177848816, |
| "step": 482, |
| "token_acc": 0.8288821307241506 |
| }, |
| { |
| "epoch": 1.548956661316212, |
| "grad_norm": 0.18975820697676749, |
| "learning_rate": 5.661045486853273e-06, |
| "loss": 0.4460042417049408, |
| "step": 483, |
| "token_acc": 0.8508166917859834 |
| }, |
| { |
| "epoch": 1.552166934189406, |
| "grad_norm": 0.23953556254565397, |
| "learning_rate": 5.655324174788063e-06, |
| "loss": 0.4954427182674408, |
| "step": 484, |
| "token_acc": 0.8386156763424147 |
| }, |
| { |
| "epoch": 1.5553772070626004, |
| "grad_norm": 0.21308687243633692, |
| "learning_rate": 5.649593570185116e-06, |
| "loss": 0.5013021230697632, |
| "step": 485, |
| "token_acc": 0.8365669483628418 |
| }, |
| { |
| "epoch": 1.5585874799357944, |
| "grad_norm": 0.2591747558605951, |
| "learning_rate": 5.643853697751556e-06, |
| "loss": 0.5220947265625, |
| "step": 486, |
| "token_acc": 0.82841962791029 |
| }, |
| { |
| "epoch": 1.5617977528089888, |
| "grad_norm": 0.19126924879912047, |
| "learning_rate": 5.638104582234462e-06, |
| "loss": 0.4590657651424408, |
| "step": 487, |
| "token_acc": 0.8493991109615291 |
| }, |
| { |
| "epoch": 1.565008025682183, |
| "grad_norm": 0.21861579173115944, |
| "learning_rate": 5.6323462484207665e-06, |
| "loss": 0.5420736074447632, |
| "step": 488, |
| "token_acc": 0.8220674942647027 |
| }, |
| { |
| "epoch": 1.5682182985553772, |
| "grad_norm": 0.20998260867078428, |
| "learning_rate": 5.626578721137146e-06, |
| "loss": 0.4426676630973816, |
| "step": 489, |
| "token_acc": 0.8554470581902353 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.22131084892106537, |
| "learning_rate": 5.6208020252499125e-06, |
| "loss": 0.5141195058822632, |
| "step": 490, |
| "token_acc": 0.8318744388867513 |
| }, |
| { |
| "epoch": 1.5746388443017656, |
| "grad_norm": 0.20986941585535346, |
| "learning_rate": 5.6150161856649075e-06, |
| "loss": 0.4871826171875, |
| "step": 491, |
| "token_acc": 0.8383738855963362 |
| }, |
| { |
| "epoch": 1.57784911717496, |
| "grad_norm": 0.25274465913526994, |
| "learning_rate": 5.609221227327397e-06, |
| "loss": 0.4795735776424408, |
| "step": 492, |
| "token_acc": 0.8422403366168165 |
| }, |
| { |
| "epoch": 1.581059390048154, |
| "grad_norm": 0.2097105194028596, |
| "learning_rate": 5.603417175221961e-06, |
| "loss": 0.524169921875, |
| "step": 493, |
| "token_acc": 0.8295362334704794 |
| }, |
| { |
| "epoch": 1.5842696629213484, |
| "grad_norm": 0.20754286664850768, |
| "learning_rate": 5.597604054372387e-06, |
| "loss": 0.51318359375, |
| "step": 494, |
| "token_acc": 0.8333822870324183 |
| }, |
| { |
| "epoch": 1.5874799357945424, |
| "grad_norm": 0.21371753822542933, |
| "learning_rate": 5.59178188984156e-06, |
| "loss": 0.52392578125, |
| "step": 495, |
| "token_acc": 0.8291340676249795 |
| }, |
| { |
| "epoch": 1.5906902086677368, |
| "grad_norm": 0.24999506191109322, |
| "learning_rate": 5.585950706731359e-06, |
| "loss": 0.4842122495174408, |
| "step": 496, |
| "token_acc": 0.8399531895544081 |
| }, |
| { |
| "epoch": 1.593900481540931, |
| "grad_norm": 0.21426267112364675, |
| "learning_rate": 5.580110530182542e-06, |
| "loss": 0.5196126699447632, |
| "step": 497, |
| "token_acc": 0.8292234827081937 |
| }, |
| { |
| "epoch": 1.5971107544141252, |
| "grad_norm": 0.20205484433806986, |
| "learning_rate": 5.574261385374648e-06, |
| "loss": 0.4690958857536316, |
| "step": 498, |
| "token_acc": 0.8430140522879065 |
| }, |
| { |
| "epoch": 1.6003210272873194, |
| "grad_norm": 0.24302697861059383, |
| "learning_rate": 5.568403297525875e-06, |
| "loss": 0.496826171875, |
| "step": 499, |
| "token_acc": 0.8369248877989177 |
| }, |
| { |
| "epoch": 1.6035313001605136, |
| "grad_norm": 0.1950042091838039, |
| "learning_rate": 5.562536291892984e-06, |
| "loss": 0.4523112177848816, |
| "step": 500, |
| "token_acc": 0.8496571087889186 |
| }, |
| { |
| "epoch": 1.606741573033708, |
| "grad_norm": 0.2068802556309059, |
| "learning_rate": 5.556660393771181e-06, |
| "loss": 0.4870198667049408, |
| "step": 501, |
| "token_acc": 0.8402496542590332 |
| }, |
| { |
| "epoch": 1.609951845906902, |
| "grad_norm": 0.2243379981959925, |
| "learning_rate": 5.550775628494012e-06, |
| "loss": 0.5233561396598816, |
| "step": 502, |
| "token_acc": 0.8274344767729369 |
| }, |
| { |
| "epoch": 1.6131621187800964, |
| "grad_norm": 0.1927744421853587, |
| "learning_rate": 5.544882021433255e-06, |
| "loss": 0.5045573115348816, |
| "step": 503, |
| "token_acc": 0.8343204270724673 |
| }, |
| { |
| "epoch": 1.6163723916532904, |
| "grad_norm": 0.21113102526395744, |
| "learning_rate": 5.5389795979988046e-06, |
| "loss": 0.5104166865348816, |
| "step": 504, |
| "token_acc": 0.8325613633742619 |
| }, |
| { |
| "epoch": 1.6195826645264848, |
| "grad_norm": 0.20121076756883371, |
| "learning_rate": 5.533068383638573e-06, |
| "loss": 0.4603271484375, |
| "step": 505, |
| "token_acc": 0.8479993692296718 |
| }, |
| { |
| "epoch": 1.622792937399679, |
| "grad_norm": 0.1935855855657875, |
| "learning_rate": 5.5271484038383664e-06, |
| "loss": 0.4717203974723816, |
| "step": 506, |
| "token_acc": 0.8432898963659861 |
| }, |
| { |
| "epoch": 1.6260032102728732, |
| "grad_norm": 0.1924072294082364, |
| "learning_rate": 5.52121968412179e-06, |
| "loss": 0.5055745840072632, |
| "step": 507, |
| "token_acc": 0.8345738040983084 |
| }, |
| { |
| "epoch": 1.6292134831460674, |
| "grad_norm": 0.21261427100461092, |
| "learning_rate": 5.515282250050126e-06, |
| "loss": 0.5099284052848816, |
| "step": 508, |
| "token_acc": 0.8314976854366483 |
| }, |
| { |
| "epoch": 1.6324237560192616, |
| "grad_norm": 0.2025806990103089, |
| "learning_rate": 5.509336127222227e-06, |
| "loss": 0.4560343623161316, |
| "step": 509, |
| "token_acc": 0.8485463993320789 |
| }, |
| { |
| "epoch": 1.635634028892456, |
| "grad_norm": 0.19793694638195608, |
| "learning_rate": 5.50338134127441e-06, |
| "loss": 0.4739583432674408, |
| "step": 510, |
| "token_acc": 0.8435436655565424 |
| }, |
| { |
| "epoch": 1.63884430176565, |
| "grad_norm": 0.20239792327651074, |
| "learning_rate": 5.497417917880343e-06, |
| "loss": 0.4736328125, |
| "step": 511, |
| "token_acc": 0.8439130236170116 |
| }, |
| { |
| "epoch": 1.6420545746388444, |
| "grad_norm": 0.24438066987043003, |
| "learning_rate": 5.4914458827509284e-06, |
| "loss": 0.504638671875, |
| "step": 512, |
| "token_acc": 0.8329907322901383 |
| }, |
| { |
| "epoch": 1.6452648475120384, |
| "grad_norm": 0.20114343359896492, |
| "learning_rate": 5.485465261634202e-06, |
| "loss": 0.4823405146598816, |
| "step": 513, |
| "token_acc": 0.8400644581090766 |
| }, |
| { |
| "epoch": 1.6484751203852328, |
| "grad_norm": 0.19304136590878382, |
| "learning_rate": 5.4794760803152185e-06, |
| "loss": 0.4589436948299408, |
| "step": 514, |
| "token_acc": 0.8478929820632646 |
| }, |
| { |
| "epoch": 1.651685393258427, |
| "grad_norm": 0.20488526796239803, |
| "learning_rate": 5.473478364615935e-06, |
| "loss": 0.509521484375, |
| "step": 515, |
| "token_acc": 0.832314781042273 |
| }, |
| { |
| "epoch": 1.6548956661316212, |
| "grad_norm": 0.21491797606119356, |
| "learning_rate": 5.467472140395109e-06, |
| "loss": 0.5096029043197632, |
| "step": 516, |
| "token_acc": 0.8323196545778732 |
| }, |
| { |
| "epoch": 1.6581059390048154, |
| "grad_norm": 0.22279949178529096, |
| "learning_rate": 5.461457433548176e-06, |
| "loss": 0.509765625, |
| "step": 517, |
| "token_acc": 0.8327975491177504 |
| }, |
| { |
| "epoch": 1.6613162118780096, |
| "grad_norm": 0.28204149423499697, |
| "learning_rate": 5.455434270007149e-06, |
| "loss": 0.4602457880973816, |
| "step": 518, |
| "token_acc": 0.8462769014072337 |
| }, |
| { |
| "epoch": 1.664526484751204, |
| "grad_norm": 0.19857347221417296, |
| "learning_rate": 5.449402675740499e-06, |
| "loss": 0.4959716796875, |
| "step": 519, |
| "token_acc": 0.8394384067964914 |
| }, |
| { |
| "epoch": 1.667736757624398, |
| "grad_norm": 0.19651498000755524, |
| "learning_rate": 5.443362676753047e-06, |
| "loss": 0.5087077021598816, |
| "step": 520, |
| "token_acc": 0.8350759960516969 |
| }, |
| { |
| "epoch": 1.6709470304975924, |
| "grad_norm": 0.22185821418458507, |
| "learning_rate": 5.4373142990858475e-06, |
| "loss": 0.522705078125, |
| "step": 521, |
| "token_acc": 0.8278426661895878 |
| }, |
| { |
| "epoch": 1.6741573033707864, |
| "grad_norm": 0.21252124551024912, |
| "learning_rate": 5.4312575688160834e-06, |
| "loss": 0.4680989682674408, |
| "step": 522, |
| "token_acc": 0.8462134699192948 |
| }, |
| { |
| "epoch": 1.6773675762439808, |
| "grad_norm": 0.195019164160461, |
| "learning_rate": 5.4251925120569444e-06, |
| "loss": 0.4916178584098816, |
| "step": 523, |
| "token_acc": 0.838224431417199 |
| }, |
| { |
| "epoch": 1.680577849117175, |
| "grad_norm": 0.1954125028857243, |
| "learning_rate": 5.4191191549575235e-06, |
| "loss": 0.5023600459098816, |
| "step": 524, |
| "token_acc": 0.8380056429926596 |
| }, |
| { |
| "epoch": 1.6837881219903692, |
| "grad_norm": 0.20550599221336258, |
| "learning_rate": 5.4130375237027e-06, |
| "loss": 0.4671224057674408, |
| "step": 525, |
| "token_acc": 0.848831486823923 |
| }, |
| { |
| "epoch": 1.6869983948635634, |
| "grad_norm": 0.2248887463696617, |
| "learning_rate": 5.406947644513022e-06, |
| "loss": 0.5421549677848816, |
| "step": 526, |
| "token_acc": 0.8226678940196518 |
| }, |
| { |
| "epoch": 1.6902086677367576, |
| "grad_norm": 0.20967538841981723, |
| "learning_rate": 5.400849543644603e-06, |
| "loss": 0.4853108823299408, |
| "step": 527, |
| "token_acc": 0.8407424576984543 |
| }, |
| { |
| "epoch": 1.6934189406099518, |
| "grad_norm": 0.20696923631557104, |
| "learning_rate": 5.394743247389001e-06, |
| "loss": 0.5126139521598816, |
| "step": 528, |
| "token_acc": 0.8323176383810414 |
| }, |
| { |
| "epoch": 1.696629213483146, |
| "grad_norm": 0.1939717195751301, |
| "learning_rate": 5.388628782073109e-06, |
| "loss": 0.4879150390625, |
| "step": 529, |
| "token_acc": 0.8364967403475068 |
| }, |
| { |
| "epoch": 1.6998394863563404, |
| "grad_norm": 0.20816101945087143, |
| "learning_rate": 5.382506174059041e-06, |
| "loss": 0.5115153193473816, |
| "step": 530, |
| "token_acc": 0.8326466761482434 |
| }, |
| { |
| "epoch": 1.7030497592295344, |
| "grad_norm": 0.2124824253704799, |
| "learning_rate": 5.376375449744016e-06, |
| "loss": 0.5000407099723816, |
| "step": 531, |
| "token_acc": 0.8352673820710945 |
| }, |
| { |
| "epoch": 1.7062600321027288, |
| "grad_norm": 0.20749686944009402, |
| "learning_rate": 5.370236635560248e-06, |
| "loss": 0.5111491084098816, |
| "step": 532, |
| "token_acc": 0.8330238015681216 |
| }, |
| { |
| "epoch": 1.709470304975923, |
| "grad_norm": 0.21515083840345234, |
| "learning_rate": 5.364089757974825e-06, |
| "loss": 0.4954427182674408, |
| "step": 533, |
| "token_acc": 0.8367462041670953 |
| }, |
| { |
| "epoch": 1.7126805778491172, |
| "grad_norm": 0.20415546098643264, |
| "learning_rate": 5.357934843489607e-06, |
| "loss": 0.4798177182674408, |
| "step": 534, |
| "token_acc": 0.843754797148208 |
| }, |
| { |
| "epoch": 1.7158908507223114, |
| "grad_norm": 0.20982427849673252, |
| "learning_rate": 5.3517719186411e-06, |
| "loss": 0.5299072265625, |
| "step": 535, |
| "token_acc": 0.824616460691853 |
| }, |
| { |
| "epoch": 1.7191011235955056, |
| "grad_norm": 0.19471497160695844, |
| "learning_rate": 5.3456010100003475e-06, |
| "loss": 0.4965006709098816, |
| "step": 536, |
| "token_acc": 0.8370856785490932 |
| }, |
| { |
| "epoch": 1.7223113964686998, |
| "grad_norm": 0.21374619126322247, |
| "learning_rate": 5.339422144172813e-06, |
| "loss": 0.477294921875, |
| "step": 537, |
| "token_acc": 0.8420311047825308 |
| }, |
| { |
| "epoch": 1.725521669341894, |
| "grad_norm": 0.2174473719352107, |
| "learning_rate": 5.333235347798271e-06, |
| "loss": 0.5260416865348816, |
| "step": 538, |
| "token_acc": 0.8281320331805363 |
| }, |
| { |
| "epoch": 1.7287319422150884, |
| "grad_norm": 0.18649816958919183, |
| "learning_rate": 5.327040647550682e-06, |
| "loss": 0.5035807490348816, |
| "step": 539, |
| "token_acc": 0.8344767359437466 |
| }, |
| { |
| "epoch": 1.7319422150882824, |
| "grad_norm": 0.2090737717426926, |
| "learning_rate": 5.320838070138088e-06, |
| "loss": 0.470703125, |
| "step": 540, |
| "token_acc": 0.8442940369697607 |
| }, |
| { |
| "epoch": 1.7351524879614768, |
| "grad_norm": 0.19379465591646067, |
| "learning_rate": 5.3146276423024916e-06, |
| "loss": 0.4705810546875, |
| "step": 541, |
| "token_acc": 0.8456177731583734 |
| }, |
| { |
| "epoch": 1.7383627608346708, |
| "grad_norm": 0.2089526337613315, |
| "learning_rate": 5.308409390819741e-06, |
| "loss": 0.4715983271598816, |
| "step": 542, |
| "token_acc": 0.8441938895802779 |
| }, |
| { |
| "epoch": 1.7415730337078652, |
| "grad_norm": 0.18693608307321663, |
| "learning_rate": 5.30218334249942e-06, |
| "loss": 0.4944254755973816, |
| "step": 543, |
| "token_acc": 0.8356434935135585 |
| }, |
| { |
| "epoch": 1.7447833065810594, |
| "grad_norm": 0.20269334957465848, |
| "learning_rate": 5.295949524184719e-06, |
| "loss": 0.4752604365348816, |
| "step": 544, |
| "token_acc": 0.8436504266306717 |
| }, |
| { |
| "epoch": 1.7479935794542536, |
| "grad_norm": 0.2138523875202584, |
| "learning_rate": 5.289707962752339e-06, |
| "loss": 0.4871826171875, |
| "step": 545, |
| "token_acc": 0.8399199325842195 |
| }, |
| { |
| "epoch": 1.7512038523274478, |
| "grad_norm": 0.21718445041799453, |
| "learning_rate": 5.283458685112356e-06, |
| "loss": 0.5204671621322632, |
| "step": 546, |
| "token_acc": 0.8286762334730732 |
| }, |
| { |
| "epoch": 1.754414125200642, |
| "grad_norm": 0.1844341076843839, |
| "learning_rate": 5.277201718208119e-06, |
| "loss": 0.46923828125, |
| "step": 547, |
| "token_acc": 0.8450318961286986 |
| }, |
| { |
| "epoch": 1.7576243980738364, |
| "grad_norm": 0.22487524878241977, |
| "learning_rate": 5.2709370890161275e-06, |
| "loss": 0.4814860224723816, |
| "step": 548, |
| "token_acc": 0.8417112567809547 |
| }, |
| { |
| "epoch": 1.7608346709470304, |
| "grad_norm": 0.33064628612141134, |
| "learning_rate": 5.264664824545915e-06, |
| "loss": 0.4855143427848816, |
| "step": 549, |
| "token_acc": 0.8410220085334438 |
| }, |
| { |
| "epoch": 1.7640449438202248, |
| "grad_norm": 0.23119216688303074, |
| "learning_rate": 5.258384951839937e-06, |
| "loss": 0.5033366084098816, |
| "step": 550, |
| "token_acc": 0.8337788557407072 |
| }, |
| { |
| "epoch": 1.7672552166934188, |
| "grad_norm": 0.18103127956229184, |
| "learning_rate": 5.252097497973448e-06, |
| "loss": 0.4745280146598816, |
| "step": 551, |
| "token_acc": 0.8419503784039933 |
| }, |
| { |
| "epoch": 1.7704654895666132, |
| "grad_norm": 0.23833812707404017, |
| "learning_rate": 5.245802490054391e-06, |
| "loss": 0.5206705927848816, |
| "step": 552, |
| "token_acc": 0.8301833414810544 |
| }, |
| { |
| "epoch": 1.7736757624398074, |
| "grad_norm": 0.19785227845167025, |
| "learning_rate": 5.239499955223275e-06, |
| "loss": 0.5011393427848816, |
| "step": 553, |
| "token_acc": 0.8353842042141859 |
| }, |
| { |
| "epoch": 1.7768860353130016, |
| "grad_norm": 0.19304852583653132, |
| "learning_rate": 5.233189920653065e-06, |
| "loss": 0.473388671875, |
| "step": 554, |
| "token_acc": 0.8432906503448305 |
| }, |
| { |
| "epoch": 1.7800963081861958, |
| "grad_norm": 0.17681531991205873, |
| "learning_rate": 5.226872413549056e-06, |
| "loss": 0.4810384213924408, |
| "step": 555, |
| "token_acc": 0.8424199560655011 |
| }, |
| { |
| "epoch": 1.78330658105939, |
| "grad_norm": 0.222715883875662, |
| "learning_rate": 5.220547461148762e-06, |
| "loss": 0.5354411005973816, |
| "step": 556, |
| "token_acc": 0.8273922278443526 |
| }, |
| { |
| "epoch": 1.7865168539325844, |
| "grad_norm": 0.21206983063444448, |
| "learning_rate": 5.2142150907217994e-06, |
| "loss": 0.5223795771598816, |
| "step": 557, |
| "token_acc": 0.8301004175651783 |
| }, |
| { |
| "epoch": 1.7897271268057784, |
| "grad_norm": 0.22442095897752923, |
| "learning_rate": 5.207875329569763e-06, |
| "loss": 0.5176595449447632, |
| "step": 558, |
| "token_acc": 0.8284297882890461 |
| }, |
| { |
| "epoch": 1.7929373996789728, |
| "grad_norm": 0.1920913997085975, |
| "learning_rate": 5.201528205026115e-06, |
| "loss": 0.4936116635799408, |
| "step": 559, |
| "token_acc": 0.8377082151513588 |
| }, |
| { |
| "epoch": 1.7961476725521668, |
| "grad_norm": 0.20956077689588662, |
| "learning_rate": 5.195173744456062e-06, |
| "loss": 0.516357421875, |
| "step": 560, |
| "token_acc": 0.8303534127757773 |
| }, |
| { |
| "epoch": 1.7993579454253612, |
| "grad_norm": 0.19020157762826823, |
| "learning_rate": 5.188811975256443e-06, |
| "loss": 0.4615071713924408, |
| "step": 561, |
| "token_acc": 0.8485206394794728 |
| }, |
| { |
| "epoch": 1.8025682182985554, |
| "grad_norm": 0.23222755465752573, |
| "learning_rate": 5.182442924855604e-06, |
| "loss": 0.5149332880973816, |
| "step": 562, |
| "token_acc": 0.8293938407833624 |
| }, |
| { |
| "epoch": 1.8057784911717496, |
| "grad_norm": 0.20156009297422847, |
| "learning_rate": 5.176066620713284e-06, |
| "loss": 0.4759928584098816, |
| "step": 563, |
| "token_acc": 0.8466319757217003 |
| }, |
| { |
| "epoch": 1.8089887640449438, |
| "grad_norm": 0.22296232660664472, |
| "learning_rate": 5.169683090320499e-06, |
| "loss": 0.4822184443473816, |
| "step": 564, |
| "token_acc": 0.8399143768271938 |
| }, |
| { |
| "epoch": 1.812199036918138, |
| "grad_norm": 0.21622319508582735, |
| "learning_rate": 5.163292361199418e-06, |
| "loss": 0.5107828974723816, |
| "step": 565, |
| "token_acc": 0.8304115767143985 |
| }, |
| { |
| "epoch": 1.8154093097913324, |
| "grad_norm": 0.18927721817608498, |
| "learning_rate": 5.156894460903245e-06, |
| "loss": 0.463134765625, |
| "step": 566, |
| "token_acc": 0.8469723153690331 |
| }, |
| { |
| "epoch": 1.8186195826645264, |
| "grad_norm": 0.2353979649474993, |
| "learning_rate": 5.1504894170161064e-06, |
| "loss": 0.5115560293197632, |
| "step": 567, |
| "token_acc": 0.8329049258062717 |
| }, |
| { |
| "epoch": 1.8218298555377208, |
| "grad_norm": 0.21250723283268672, |
| "learning_rate": 5.144077257152926e-06, |
| "loss": 0.5072428584098816, |
| "step": 568, |
| "token_acc": 0.8336668242384212 |
| }, |
| { |
| "epoch": 1.8250401284109148, |
| "grad_norm": 0.1973486114398243, |
| "learning_rate": 5.137658008959306e-06, |
| "loss": 0.5082194209098816, |
| "step": 569, |
| "token_acc": 0.8336676845589859 |
| }, |
| { |
| "epoch": 1.8282504012841092, |
| "grad_norm": 0.1891113883288137, |
| "learning_rate": 5.131231700111412e-06, |
| "loss": 0.5130615234375, |
| "step": 570, |
| "token_acc": 0.8317067866491336 |
| }, |
| { |
| "epoch": 1.8314606741573034, |
| "grad_norm": 0.1811753985751082, |
| "learning_rate": 5.124798358315848e-06, |
| "loss": 0.4811198115348816, |
| "step": 571, |
| "token_acc": 0.8425846984605 |
| }, |
| { |
| "epoch": 1.8346709470304976, |
| "grad_norm": 0.2251090035119156, |
| "learning_rate": 5.118358011309543e-06, |
| "loss": 0.5309244990348816, |
| "step": 572, |
| "token_acc": 0.8256256709608158 |
| }, |
| { |
| "epoch": 1.8378812199036918, |
| "grad_norm": 0.21647657132954062, |
| "learning_rate": 5.1119106868596285e-06, |
| "loss": 0.5172526240348816, |
| "step": 573, |
| "token_acc": 0.8320323617707402 |
| }, |
| { |
| "epoch": 1.841091492776886, |
| "grad_norm": 0.21316393483658413, |
| "learning_rate": 5.105456412763317e-06, |
| "loss": 0.4793294370174408, |
| "step": 574, |
| "token_acc": 0.8424825576937821 |
| }, |
| { |
| "epoch": 1.8443017656500804, |
| "grad_norm": 0.19319232333509864, |
| "learning_rate": 5.0989952168477845e-06, |
| "loss": 0.5150553584098816, |
| "step": 575, |
| "token_acc": 0.8323303300826294 |
| }, |
| { |
| "epoch": 1.8475120385232744, |
| "grad_norm": 0.1928503515554526, |
| "learning_rate": 5.092527126970049e-06, |
| "loss": 0.4501546323299408, |
| "step": 576, |
| "token_acc": 0.8541077268114217 |
| }, |
| { |
| "epoch": 1.8507223113964688, |
| "grad_norm": 0.22284305361976314, |
| "learning_rate": 5.086052171016856e-06, |
| "loss": 0.5310465693473816, |
| "step": 577, |
| "token_acc": 0.8276428803018245 |
| }, |
| { |
| "epoch": 1.8539325842696628, |
| "grad_norm": 0.19548158623323453, |
| "learning_rate": 5.079570376904545e-06, |
| "loss": 0.4556071162223816, |
| "step": 578, |
| "token_acc": 0.8501566517150454 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.20558731330933078, |
| "learning_rate": 5.073081772578948e-06, |
| "loss": 0.4671224057674408, |
| "step": 579, |
| "token_acc": 0.8471051563225707 |
| }, |
| { |
| "epoch": 1.8603531300160514, |
| "grad_norm": 0.24311090611247288, |
| "learning_rate": 5.06658638601525e-06, |
| "loss": 0.5386556386947632, |
| "step": 580, |
| "token_acc": 0.82159393957155 |
| }, |
| { |
| "epoch": 1.8635634028892456, |
| "grad_norm": 0.20978509717502059, |
| "learning_rate": 5.060084245217884e-06, |
| "loss": 0.5139974355697632, |
| "step": 581, |
| "token_acc": 0.8326921636335957 |
| }, |
| { |
| "epoch": 1.8667736757624398, |
| "grad_norm": 0.2685738757132505, |
| "learning_rate": 5.0535753782203984e-06, |
| "loss": 0.502685546875, |
| "step": 582, |
| "token_acc": 0.8341369158210662 |
| }, |
| { |
| "epoch": 1.869983948635634, |
| "grad_norm": 0.22599843111261422, |
| "learning_rate": 5.047059813085343e-06, |
| "loss": 0.4997151792049408, |
| "step": 583, |
| "token_acc": 0.8349442492783665 |
| }, |
| { |
| "epoch": 1.8731942215088284, |
| "grad_norm": 0.2066873416116241, |
| "learning_rate": 5.040537577904148e-06, |
| "loss": 0.4751790463924408, |
| "step": 584, |
| "token_acc": 0.8440484294631824 |
| }, |
| { |
| "epoch": 1.8764044943820224, |
| "grad_norm": 0.20099590959736363, |
| "learning_rate": 5.034008700796996e-06, |
| "loss": 0.459716796875, |
| "step": 585, |
| "token_acc": 0.8476779365774649 |
| }, |
| { |
| "epoch": 1.8796147672552168, |
| "grad_norm": 0.21775156659306671, |
| "learning_rate": 5.027473209912714e-06, |
| "loss": 0.5242513418197632, |
| "step": 586, |
| "token_acc": 0.8305953601755901 |
| }, |
| { |
| "epoch": 1.8828250401284108, |
| "grad_norm": 0.1890495343970011, |
| "learning_rate": 5.020931133428634e-06, |
| "loss": 0.4951171875, |
| "step": 587, |
| "token_acc": 0.836662561631107 |
| }, |
| { |
| "epoch": 1.8860353130016052, |
| "grad_norm": 0.21527350209501137, |
| "learning_rate": 5.014382499550491e-06, |
| "loss": 0.53173828125, |
| "step": 588, |
| "token_acc": 0.8277492238314829 |
| }, |
| { |
| "epoch": 1.8892455858747994, |
| "grad_norm": 0.23227571055684113, |
| "learning_rate": 5.007827336512283e-06, |
| "loss": 0.4992268979549408, |
| "step": 589, |
| "token_acc": 0.8370985707887444 |
| }, |
| { |
| "epoch": 1.8924558587479936, |
| "grad_norm": 0.21042210745504783, |
| "learning_rate": 5.001265672576164e-06, |
| "loss": 0.5146484375, |
| "step": 590, |
| "token_acc": 0.8314888476811728 |
| }, |
| { |
| "epoch": 1.8956661316211878, |
| "grad_norm": 0.23265310716693266, |
| "learning_rate": 4.994697536032316e-06, |
| "loss": 0.46502685546875, |
| "step": 591, |
| "token_acc": 0.8469708939265049 |
| }, |
| { |
| "epoch": 1.898876404494382, |
| "grad_norm": 0.19709877781742627, |
| "learning_rate": 4.988122955198823e-06, |
| "loss": 0.526123046875, |
| "step": 592, |
| "token_acc": 0.8270824317198007 |
| }, |
| { |
| "epoch": 1.9020866773675762, |
| "grad_norm": 0.2112550438235136, |
| "learning_rate": 4.981541958421558e-06, |
| "loss": 0.4967854917049408, |
| "step": 593, |
| "token_acc": 0.8364650593827906 |
| }, |
| { |
| "epoch": 1.9052969502407704, |
| "grad_norm": 0.30968707063177286, |
| "learning_rate": 4.974954574074051e-06, |
| "loss": 0.4849446713924408, |
| "step": 594, |
| "token_acc": 0.840327030844899 |
| }, |
| { |
| "epoch": 1.9085072231139648, |
| "grad_norm": 0.17649448504083676, |
| "learning_rate": 4.9683608305573775e-06, |
| "loss": 0.4849853515625, |
| "step": 595, |
| "token_acc": 0.8408735274724193 |
| }, |
| { |
| "epoch": 1.9117174959871588, |
| "grad_norm": 0.1931223434545565, |
| "learning_rate": 4.961760756300024e-06, |
| "loss": 0.488037109375, |
| "step": 596, |
| "token_acc": 0.8391003302273251 |
| }, |
| { |
| "epoch": 1.9149277688603532, |
| "grad_norm": 0.19289562481645892, |
| "learning_rate": 4.955154379757776e-06, |
| "loss": 0.4826253354549408, |
| "step": 597, |
| "token_acc": 0.8411694360867358 |
| }, |
| { |
| "epoch": 1.9181380417335474, |
| "grad_norm": 0.2027631539141705, |
| "learning_rate": 4.94854172941359e-06, |
| "loss": 0.466064453125, |
| "step": 598, |
| "token_acc": 0.8464385225571056 |
| }, |
| { |
| "epoch": 1.9213483146067416, |
| "grad_norm": 0.19077233268267782, |
| "learning_rate": 4.94192283377747e-06, |
| "loss": 0.4860026240348816, |
| "step": 599, |
| "token_acc": 0.8392299481728827 |
| }, |
| { |
| "epoch": 1.9245585874799358, |
| "grad_norm": 0.42816646022917115, |
| "learning_rate": 4.935297721386346e-06, |
| "loss": 0.53564453125, |
| "step": 600, |
| "token_acc": 0.8237000288030063 |
| }, |
| { |
| "epoch": 1.92776886035313, |
| "grad_norm": 0.18538141827096788, |
| "learning_rate": 4.928666420803953e-06, |
| "loss": 0.5026448965072632, |
| "step": 601, |
| "token_acc": 0.8352271388377014 |
| }, |
| { |
| "epoch": 1.9309791332263242, |
| "grad_norm": 0.18836200455506089, |
| "learning_rate": 4.922028960620707e-06, |
| "loss": 0.4625651240348816, |
| "step": 602, |
| "token_acc": 0.8463538880118882 |
| }, |
| { |
| "epoch": 1.9341894060995184, |
| "grad_norm": 3.8210876490438364, |
| "learning_rate": 4.915385369453577e-06, |
| "loss": 0.4749755859375, |
| "step": 603, |
| "token_acc": 0.8578914327444173 |
| }, |
| { |
| "epoch": 1.9373996789727128, |
| "grad_norm": 0.18920287412411185, |
| "learning_rate": 4.908735675945967e-06, |
| "loss": 0.474853515625, |
| "step": 604, |
| "token_acc": 0.8450734160217078 |
| }, |
| { |
| "epoch": 1.9406099518459068, |
| "grad_norm": 0.18893026029493232, |
| "learning_rate": 4.902079908767593e-06, |
| "loss": 0.4932454526424408, |
| "step": 605, |
| "token_acc": 0.8372230383233746 |
| }, |
| { |
| "epoch": 1.9438202247191012, |
| "grad_norm": 0.19603551127341237, |
| "learning_rate": 4.895418096614352e-06, |
| "loss": 0.4828287959098816, |
| "step": 606, |
| "token_acc": 0.8403485501639586 |
| }, |
| { |
| "epoch": 1.9470304975922952, |
| "grad_norm": 0.19978754345494049, |
| "learning_rate": 4.888750268208213e-06, |
| "loss": 0.472900390625, |
| "step": 607, |
| "token_acc": 0.8435411698135478 |
| }, |
| { |
| "epoch": 1.9502407704654896, |
| "grad_norm": 0.16106835196772315, |
| "learning_rate": 4.88207645229707e-06, |
| "loss": 0.4534098505973816, |
| "step": 608, |
| "token_acc": 0.8508265038647486 |
| }, |
| { |
| "epoch": 1.9534510433386838, |
| "grad_norm": 0.20706264592958304, |
| "learning_rate": 4.8753966776546435e-06, |
| "loss": 0.5162353515625, |
| "step": 609, |
| "token_acc": 0.8308325069891165 |
| }, |
| { |
| "epoch": 1.956661316211878, |
| "grad_norm": 0.20059240987585883, |
| "learning_rate": 4.868710973080339e-06, |
| "loss": 0.4964599609375, |
| "step": 610, |
| "token_acc": 0.8357304231324441 |
| }, |
| { |
| "epoch": 1.9598715890850722, |
| "grad_norm": 0.1861954935618554, |
| "learning_rate": 4.862019367399132e-06, |
| "loss": 0.49462890625, |
| "step": 611, |
| "token_acc": 0.8372238527765166 |
| }, |
| { |
| "epoch": 1.9630818619582664, |
| "grad_norm": 0.18074506091497944, |
| "learning_rate": 4.855321889461436e-06, |
| "loss": 0.4484049677848816, |
| "step": 612, |
| "token_acc": 0.8514336315420921 |
| }, |
| { |
| "epoch": 1.9662921348314608, |
| "grad_norm": 0.19022034618725248, |
| "learning_rate": 4.848618568142984e-06, |
| "loss": 0.4744466245174408, |
| "step": 613, |
| "token_acc": 0.8430896562046518 |
| }, |
| { |
| "epoch": 1.9695024077046548, |
| "grad_norm": 0.2108594040760019, |
| "learning_rate": 4.841909432344706e-06, |
| "loss": 0.5034586787223816, |
| "step": 614, |
| "token_acc": 0.8326992650209126 |
| }, |
| { |
| "epoch": 1.9727126805778492, |
| "grad_norm": 0.21038379203970337, |
| "learning_rate": 4.8351945109925935e-06, |
| "loss": 0.485595703125, |
| "step": 615, |
| "token_acc": 0.8397556248498602 |
| }, |
| { |
| "epoch": 1.9759229534510432, |
| "grad_norm": 0.19789035644639225, |
| "learning_rate": 4.82847383303759e-06, |
| "loss": 0.4847412109375, |
| "step": 616, |
| "token_acc": 0.840059550300896 |
| }, |
| { |
| "epoch": 1.9791332263242376, |
| "grad_norm": 0.18719895439265477, |
| "learning_rate": 4.821747427455452e-06, |
| "loss": 0.5332845449447632, |
| "step": 617, |
| "token_acc": 0.8263378386364979 |
| }, |
| { |
| "epoch": 1.9823434991974318, |
| "grad_norm": 0.20232274397686473, |
| "learning_rate": 4.815015323246633e-06, |
| "loss": 0.488525390625, |
| "step": 618, |
| "token_acc": 0.8385997224780295 |
| }, |
| { |
| "epoch": 1.985553772070626, |
| "grad_norm": 0.204908880630734, |
| "learning_rate": 4.808277549436157e-06, |
| "loss": 0.5272623896598816, |
| "step": 619, |
| "token_acc": 0.8287716233266886 |
| }, |
| { |
| "epoch": 1.9887640449438202, |
| "grad_norm": 0.18923767196243826, |
| "learning_rate": 4.801534135073487e-06, |
| "loss": 0.4903971552848816, |
| "step": 620, |
| "token_acc": 0.8410040926815862 |
| }, |
| { |
| "epoch": 1.9919743178170144, |
| "grad_norm": 0.19604318807580265, |
| "learning_rate": 4.794785109232412e-06, |
| "loss": 0.4755045771598816, |
| "step": 621, |
| "token_acc": 0.8440249094547587 |
| }, |
| { |
| "epoch": 1.9951845906902088, |
| "grad_norm": 0.19360171069988216, |
| "learning_rate": 4.788030501010908e-06, |
| "loss": 0.4340413510799408, |
| "step": 622, |
| "token_acc": 0.8566109542706771 |
| }, |
| { |
| "epoch": 1.9983948635634028, |
| "grad_norm": 0.21169639803746398, |
| "learning_rate": 4.781270339531025e-06, |
| "loss": 0.4751790463924408, |
| "step": 623, |
| "token_acc": 0.8425490126991452 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.32969776523469907, |
| "learning_rate": 4.774504653938749e-06, |
| "loss": 0.5048828125, |
| "step": 624, |
| "token_acc": 0.835168437064428 |
| }, |
| { |
| "epoch": 2.0032102728731944, |
| "grad_norm": 0.2115590717349967, |
| "learning_rate": 4.767733473403889e-06, |
| "loss": 0.4543660581111908, |
| "step": 625, |
| "token_acc": 0.8494195117786585 |
| }, |
| { |
| "epoch": 2.0064205457463884, |
| "grad_norm": 0.20321529746345438, |
| "learning_rate": 4.760956827119941e-06, |
| "loss": 0.4671224057674408, |
| "step": 626, |
| "token_acc": 0.8450132819713727 |
| }, |
| { |
| "epoch": 2.009630818619583, |
| "grad_norm": 0.19687550291202977, |
| "learning_rate": 4.754174744303969e-06, |
| "loss": 0.4146728515625, |
| "step": 627, |
| "token_acc": 0.8618291589277594 |
| }, |
| { |
| "epoch": 2.012841091492777, |
| "grad_norm": 0.2523161476109027, |
| "learning_rate": 4.747387254196472e-06, |
| "loss": 0.4659017026424408, |
| "step": 628, |
| "token_acc": 0.8450511225888058 |
| }, |
| { |
| "epoch": 2.016051364365971, |
| "grad_norm": 0.2275898129138819, |
| "learning_rate": 4.740594386061269e-06, |
| "loss": 0.4815673828125, |
| "step": 629, |
| "token_acc": 0.8391848027771384 |
| }, |
| { |
| "epoch": 2.019261637239165, |
| "grad_norm": 0.20945775756940324, |
| "learning_rate": 4.733796169185358e-06, |
| "loss": 0.4023844599723816, |
| "step": 630, |
| "token_acc": 0.8670760098186089 |
| }, |
| { |
| "epoch": 2.0224719101123596, |
| "grad_norm": 0.18606275376579812, |
| "learning_rate": 4.726992632878804e-06, |
| "loss": 0.4026286005973816, |
| "step": 631, |
| "token_acc": 0.8687089536923019 |
| }, |
| { |
| "epoch": 2.0256821829855536, |
| "grad_norm": 0.24944135985122234, |
| "learning_rate": 4.7201838064746045e-06, |
| "loss": 0.4796549677848816, |
| "step": 632, |
| "token_acc": 0.8415990151837972 |
| }, |
| { |
| "epoch": 2.028892455858748, |
| "grad_norm": 0.21927512660431955, |
| "learning_rate": 4.713369719328564e-06, |
| "loss": 0.4185384213924408, |
| "step": 633, |
| "token_acc": 0.8599196770434493 |
| }, |
| { |
| "epoch": 2.0321027287319424, |
| "grad_norm": 0.20423909469957818, |
| "learning_rate": 4.706550400819168e-06, |
| "loss": 0.4786173701286316, |
| "step": 634, |
| "token_acc": 0.8406345834834711 |
| }, |
| { |
| "epoch": 2.0353130016051364, |
| "grad_norm": 0.2110739290103949, |
| "learning_rate": 4.699725880347459e-06, |
| "loss": 0.4677327573299408, |
| "step": 635, |
| "token_acc": 0.842199147099176 |
| }, |
| { |
| "epoch": 2.038523274478331, |
| "grad_norm": 0.2241130535375935, |
| "learning_rate": 4.692896187336904e-06, |
| "loss": 0.45947265625, |
| "step": 636, |
| "token_acc": 0.8468612098922518 |
| }, |
| { |
| "epoch": 2.041733547351525, |
| "grad_norm": 0.21080453370461158, |
| "learning_rate": 4.686061351233276e-06, |
| "loss": 0.4320882260799408, |
| "step": 637, |
| "token_acc": 0.8533629402120904 |
| }, |
| { |
| "epoch": 2.044943820224719, |
| "grad_norm": 0.32144418793695584, |
| "learning_rate": 4.6792214015045174e-06, |
| "loss": 0.4599609375, |
| "step": 638, |
| "token_acc": 0.8464602124837394 |
| }, |
| { |
| "epoch": 2.048154093097913, |
| "grad_norm": 0.2078479815708183, |
| "learning_rate": 4.672376367640618e-06, |
| "loss": 0.44677734375, |
| "step": 639, |
| "token_acc": 0.8507954992333089 |
| }, |
| { |
| "epoch": 2.0513643659711076, |
| "grad_norm": 0.19777970399487305, |
| "learning_rate": 4.6655262791534915e-06, |
| "loss": 0.447021484375, |
| "step": 640, |
| "token_acc": 0.8517660703332297 |
| }, |
| { |
| "epoch": 2.0545746388443016, |
| "grad_norm": 0.23410952163019927, |
| "learning_rate": 4.658671165576841e-06, |
| "loss": 0.48828125, |
| "step": 641, |
| "token_acc": 0.8380002042953089 |
| }, |
| { |
| "epoch": 2.057784911717496, |
| "grad_norm": 0.23586584692038676, |
| "learning_rate": 4.6518110564660345e-06, |
| "loss": 0.50079345703125, |
| "step": 642, |
| "token_acc": 0.8353889961913865 |
| }, |
| { |
| "epoch": 2.0609951845906904, |
| "grad_norm": 0.18434627206392934, |
| "learning_rate": 4.644945981397981e-06, |
| "loss": 0.4385986328125, |
| "step": 643, |
| "token_acc": 0.8545233421570331 |
| }, |
| { |
| "epoch": 2.0642054574638844, |
| "grad_norm": 0.24034833905211023, |
| "learning_rate": 4.6380759699709955e-06, |
| "loss": 0.4774169921875, |
| "step": 644, |
| "token_acc": 0.8411591494593856 |
| }, |
| { |
| "epoch": 2.067415730337079, |
| "grad_norm": 0.20241650168423786, |
| "learning_rate": 4.631201051804681e-06, |
| "loss": 0.4654134213924408, |
| "step": 645, |
| "token_acc": 0.8455160407197725 |
| }, |
| { |
| "epoch": 2.070626003210273, |
| "grad_norm": 0.2046900122680403, |
| "learning_rate": 4.6243212565397895e-06, |
| "loss": 0.473876953125, |
| "step": 646, |
| "token_acc": 0.8431059778612698 |
| }, |
| { |
| "epoch": 2.073836276083467, |
| "grad_norm": 0.2417909269891525, |
| "learning_rate": 4.6174366138381075e-06, |
| "loss": 0.5037841796875, |
| "step": 647, |
| "token_acc": 0.8344951828933971 |
| }, |
| { |
| "epoch": 2.077046548956661, |
| "grad_norm": 0.22303446814288203, |
| "learning_rate": 4.610547153382314e-06, |
| "loss": 0.4219563901424408, |
| "step": 648, |
| "token_acc": 0.8592609646904761 |
| }, |
| { |
| "epoch": 2.0802568218298556, |
| "grad_norm": 0.18113955215323252, |
| "learning_rate": 4.6036529048758625e-06, |
| "loss": 0.4479573667049408, |
| "step": 649, |
| "token_acc": 0.8503887564028392 |
| }, |
| { |
| "epoch": 2.0834670947030496, |
| "grad_norm": 0.20016249384220633, |
| "learning_rate": 4.596753898042852e-06, |
| "loss": 0.4075927734375, |
| "step": 650, |
| "token_acc": 0.8642372361853052 |
| }, |
| { |
| "epoch": 2.086677367576244, |
| "grad_norm": 0.2758564437294458, |
| "learning_rate": 4.589850162627892e-06, |
| "loss": 0.4864909052848816, |
| "step": 651, |
| "token_acc": 0.8406675969996761 |
| }, |
| { |
| "epoch": 2.0898876404494384, |
| "grad_norm": 0.20650339064451034, |
| "learning_rate": 4.582941728395984e-06, |
| "loss": 0.4553629755973816, |
| "step": 652, |
| "token_acc": 0.8495991193830745 |
| }, |
| { |
| "epoch": 2.0930979133226324, |
| "grad_norm": 0.2135335946405334, |
| "learning_rate": 4.5760286251323835e-06, |
| "loss": 0.4881998896598816, |
| "step": 653, |
| "token_acc": 0.838402452002496 |
| }, |
| { |
| "epoch": 2.096308186195827, |
| "grad_norm": 0.18863240697781536, |
| "learning_rate": 4.56911088264248e-06, |
| "loss": 0.4012858271598816, |
| "step": 654, |
| "token_acc": 0.8660901925722599 |
| }, |
| { |
| "epoch": 2.099518459069021, |
| "grad_norm": 0.1892004302680043, |
| "learning_rate": 4.562188530751662e-06, |
| "loss": 0.4156901240348816, |
| "step": 655, |
| "token_acc": 0.8613871908997803 |
| }, |
| { |
| "epoch": 2.102728731942215, |
| "grad_norm": 0.20815692597784538, |
| "learning_rate": 4.555261599305191e-06, |
| "loss": 0.4726969599723816, |
| "step": 656, |
| "token_acc": 0.8432399054351278 |
| }, |
| { |
| "epoch": 2.105939004815409, |
| "grad_norm": 0.2005084264907335, |
| "learning_rate": 4.548330118168078e-06, |
| "loss": 0.453369140625, |
| "step": 657, |
| "token_acc": 0.8475700262574954 |
| }, |
| { |
| "epoch": 2.1091492776886036, |
| "grad_norm": 0.1976983371473078, |
| "learning_rate": 4.5413941172249414e-06, |
| "loss": 0.47509765625, |
| "step": 658, |
| "token_acc": 0.8436715912866749 |
| }, |
| { |
| "epoch": 2.1123595505617976, |
| "grad_norm": 0.20121465339934608, |
| "learning_rate": 4.534453626379895e-06, |
| "loss": 0.4298502802848816, |
| "step": 659, |
| "token_acc": 0.8559626604434072 |
| }, |
| { |
| "epoch": 2.115569823434992, |
| "grad_norm": 0.2188682255969682, |
| "learning_rate": 4.527508675556402e-06, |
| "loss": 0.4773356318473816, |
| "step": 660, |
| "token_acc": 0.8424510479284684 |
| }, |
| { |
| "epoch": 2.1187800963081864, |
| "grad_norm": 0.196298873218584, |
| "learning_rate": 4.520559294697162e-06, |
| "loss": 0.4510905146598816, |
| "step": 661, |
| "token_acc": 0.8485553706957581 |
| }, |
| { |
| "epoch": 2.1219903691813804, |
| "grad_norm": 0.20127195747756338, |
| "learning_rate": 4.513605513763971e-06, |
| "loss": 0.441162109375, |
| "step": 662, |
| "token_acc": 0.8537049095552068 |
| }, |
| { |
| "epoch": 2.125200642054575, |
| "grad_norm": 0.20422156474354985, |
| "learning_rate": 4.5066473627375944e-06, |
| "loss": 0.430908203125, |
| "step": 663, |
| "token_acc": 0.8582405961292723 |
| }, |
| { |
| "epoch": 2.128410914927769, |
| "grad_norm": 0.19325151540226462, |
| "learning_rate": 4.499684871617642e-06, |
| "loss": 0.4597981870174408, |
| "step": 664, |
| "token_acc": 0.8459079539061464 |
| }, |
| { |
| "epoch": 2.131621187800963, |
| "grad_norm": 0.21773389555672684, |
| "learning_rate": 4.492718070422433e-06, |
| "loss": 0.4646809995174408, |
| "step": 665, |
| "token_acc": 0.846901257790134 |
| }, |
| { |
| "epoch": 2.134831460674157, |
| "grad_norm": 0.21424623915215318, |
| "learning_rate": 4.4857469891888724e-06, |
| "loss": 0.4659423828125, |
| "step": 666, |
| "token_acc": 0.8441213392519439 |
| }, |
| { |
| "epoch": 2.1380417335473516, |
| "grad_norm": 0.2009249890431782, |
| "learning_rate": 4.4787716579723136e-06, |
| "loss": 0.4574788510799408, |
| "step": 667, |
| "token_acc": 0.8479140475611946 |
| }, |
| { |
| "epoch": 2.1412520064205456, |
| "grad_norm": 0.1754109779980525, |
| "learning_rate": 4.471792106846437e-06, |
| "loss": 0.4366862177848816, |
| "step": 668, |
| "token_acc": 0.8545431885861233 |
| }, |
| { |
| "epoch": 2.14446227929374, |
| "grad_norm": 0.19503766537161335, |
| "learning_rate": 4.4648083659031164e-06, |
| "loss": 0.4374593198299408, |
| "step": 669, |
| "token_acc": 0.8539004313079454 |
| }, |
| { |
| "epoch": 2.1476725521669344, |
| "grad_norm": 0.20520643998076005, |
| "learning_rate": 4.45782046525229e-06, |
| "loss": 0.4506022334098816, |
| "step": 670, |
| "token_acc": 0.8490898021870575 |
| }, |
| { |
| "epoch": 2.1508828250401284, |
| "grad_norm": 0.2149478358921993, |
| "learning_rate": 4.450828435021828e-06, |
| "loss": 0.4812418818473816, |
| "step": 671, |
| "token_acc": 0.8411253322095418 |
| }, |
| { |
| "epoch": 2.154093097913323, |
| "grad_norm": 0.24205969670556346, |
| "learning_rate": 4.443832305357409e-06, |
| "loss": 0.4271240234375, |
| "step": 672, |
| "token_acc": 0.8578997251920647 |
| }, |
| { |
| "epoch": 2.157303370786517, |
| "grad_norm": 0.2004422310098208, |
| "learning_rate": 4.436832106422381e-06, |
| "loss": 0.4443766474723816, |
| "step": 673, |
| "token_acc": 0.8535244828617516 |
| }, |
| { |
| "epoch": 2.160513643659711, |
| "grad_norm": 0.21111911297117497, |
| "learning_rate": 4.429827868397641e-06, |
| "loss": 0.49969482421875, |
| "step": 674, |
| "token_acc": 0.835968496333625 |
| }, |
| { |
| "epoch": 2.163723916532905, |
| "grad_norm": 0.19869004633584814, |
| "learning_rate": 4.422819621481496e-06, |
| "loss": 0.4911295771598816, |
| "step": 675, |
| "token_acc": 0.8370750860720663 |
| }, |
| { |
| "epoch": 2.1669341894060996, |
| "grad_norm": 0.18351061549754683, |
| "learning_rate": 4.415807395889543e-06, |
| "loss": 0.4704183042049408, |
| "step": 676, |
| "token_acc": 0.8436283717276069 |
| }, |
| { |
| "epoch": 2.1701444622792936, |
| "grad_norm": 0.20781851776302881, |
| "learning_rate": 4.408791221854526e-06, |
| "loss": 0.4776204526424408, |
| "step": 677, |
| "token_acc": 0.8419597303153555 |
| }, |
| { |
| "epoch": 2.173354735152488, |
| "grad_norm": 0.19484554074620664, |
| "learning_rate": 4.401771129626217e-06, |
| "loss": 0.4590250849723816, |
| "step": 678, |
| "token_acc": 0.8474181663804423 |
| }, |
| { |
| "epoch": 2.176565008025682, |
| "grad_norm": 0.19215576220084854, |
| "learning_rate": 4.39474714947128e-06, |
| "loss": 0.4247233271598816, |
| "step": 679, |
| "token_acc": 0.857625869274352 |
| }, |
| { |
| "epoch": 2.1797752808988764, |
| "grad_norm": 0.19807829984933298, |
| "learning_rate": 4.38771931167314e-06, |
| "loss": 0.3979085385799408, |
| "step": 680, |
| "token_acc": 0.8667534544040275 |
| }, |
| { |
| "epoch": 2.182985553772071, |
| "grad_norm": 0.20487202093763737, |
| "learning_rate": 4.380687646531856e-06, |
| "loss": 0.4185791015625, |
| "step": 681, |
| "token_acc": 0.8599961127795507 |
| }, |
| { |
| "epoch": 2.186195826645265, |
| "grad_norm": 0.2158434149497215, |
| "learning_rate": 4.373652184363989e-06, |
| "loss": 0.4339803159236908, |
| "step": 682, |
| "token_acc": 0.8547810099872699 |
| }, |
| { |
| "epoch": 2.189406099518459, |
| "grad_norm": 0.19450239119508037, |
| "learning_rate": 4.366612955502466e-06, |
| "loss": 0.4608968198299408, |
| "step": 683, |
| "token_acc": 0.8457622285510433 |
| }, |
| { |
| "epoch": 2.192616372391653, |
| "grad_norm": 8.151322593196062, |
| "learning_rate": 4.35956999029646e-06, |
| "loss": 0.6525065302848816, |
| "step": 684, |
| "token_acc": 0.8290942820194461 |
| }, |
| { |
| "epoch": 2.1958266452648476, |
| "grad_norm": 0.2301432405581243, |
| "learning_rate": 4.352523319111249e-06, |
| "loss": 0.3920084834098816, |
| "step": 685, |
| "token_acc": 0.8705705882287679 |
| }, |
| { |
| "epoch": 2.1990369181380416, |
| "grad_norm": 0.19177589079146826, |
| "learning_rate": 4.34547297232809e-06, |
| "loss": 0.4669596552848816, |
| "step": 686, |
| "token_acc": 0.8443450342652492 |
| }, |
| { |
| "epoch": 2.202247191011236, |
| "grad_norm": 0.19407418830797563, |
| "learning_rate": 4.338418980344086e-06, |
| "loss": 0.44915771484375, |
| "step": 687, |
| "token_acc": 0.849480469239529 |
| }, |
| { |
| "epoch": 2.20545746388443, |
| "grad_norm": 0.21316814997568834, |
| "learning_rate": 4.331361373572058e-06, |
| "loss": 0.4639892578125, |
| "step": 688, |
| "token_acc": 0.8474408172948753 |
| }, |
| { |
| "epoch": 2.2086677367576244, |
| "grad_norm": 0.2011592865916337, |
| "learning_rate": 4.324300182440413e-06, |
| "loss": 0.4359130859375, |
| "step": 689, |
| "token_acc": 0.8551578038190392 |
| }, |
| { |
| "epoch": 2.211878009630819, |
| "grad_norm": 0.19686597083465757, |
| "learning_rate": 4.317235437393007e-06, |
| "loss": 0.4206950068473816, |
| "step": 690, |
| "token_acc": 0.8606955810759271 |
| }, |
| { |
| "epoch": 2.215088282504013, |
| "grad_norm": 0.1967845907073751, |
| "learning_rate": 4.310167168889025e-06, |
| "loss": 0.460693359375, |
| "step": 691, |
| "token_acc": 0.8455688388190843 |
| }, |
| { |
| "epoch": 2.218298555377207, |
| "grad_norm": 0.22090097209538617, |
| "learning_rate": 4.303095407402835e-06, |
| "loss": 0.4521484375, |
| "step": 692, |
| "token_acc": 0.8506517253734294 |
| }, |
| { |
| "epoch": 2.221508828250401, |
| "grad_norm": 0.21677968474594916, |
| "learning_rate": 4.296020183423873e-06, |
| "loss": 0.458984375, |
| "step": 693, |
| "token_acc": 0.8488590619141185 |
| }, |
| { |
| "epoch": 2.2247191011235956, |
| "grad_norm": 0.25446383871644274, |
| "learning_rate": 4.288941527456497e-06, |
| "loss": 0.47802734375, |
| "step": 694, |
| "token_acc": 0.8432776673814599 |
| }, |
| { |
| "epoch": 2.2279293739967896, |
| "grad_norm": 0.4021141479368582, |
| "learning_rate": 4.281859470019866e-06, |
| "loss": 0.4942220151424408, |
| "step": 695, |
| "token_acc": 0.8364409878451377 |
| }, |
| { |
| "epoch": 2.231139646869984, |
| "grad_norm": 0.21117603453855527, |
| "learning_rate": 4.274774041647802e-06, |
| "loss": 0.4697265625, |
| "step": 696, |
| "token_acc": 0.8429563959002171 |
| }, |
| { |
| "epoch": 2.234349919743178, |
| "grad_norm": 0.20475132687397243, |
| "learning_rate": 4.267685272888662e-06, |
| "loss": 0.4669596552848816, |
| "step": 697, |
| "token_acc": 0.8455216817521362 |
| }, |
| { |
| "epoch": 2.2375601926163724, |
| "grad_norm": 0.21314161795787429, |
| "learning_rate": 4.260593194305204e-06, |
| "loss": 0.4713541865348816, |
| "step": 698, |
| "token_acc": 0.8424913415995721 |
| }, |
| { |
| "epoch": 2.240770465489567, |
| "grad_norm": 0.20683152437768806, |
| "learning_rate": 4.253497836474453e-06, |
| "loss": 0.457763671875, |
| "step": 699, |
| "token_acc": 0.8489541017992153 |
| }, |
| { |
| "epoch": 2.243980738362761, |
| "grad_norm": 0.17876551858602532, |
| "learning_rate": 4.2463992299875805e-06, |
| "loss": 0.4122314453125, |
| "step": 700, |
| "token_acc": 0.8616135939943479 |
| }, |
| { |
| "epoch": 2.247191011235955, |
| "grad_norm": 0.19472552746773453, |
| "learning_rate": 4.239297405449754e-06, |
| "loss": 0.4484049677848816, |
| "step": 701, |
| "token_acc": 0.8514272976743756 |
| }, |
| { |
| "epoch": 2.250401284109149, |
| "grad_norm": 0.4749107558904206, |
| "learning_rate": 4.232192393480025e-06, |
| "loss": 0.449462890625, |
| "step": 702, |
| "token_acc": 0.8503469105115239 |
| }, |
| { |
| "epoch": 2.2536115569823436, |
| "grad_norm": 0.20430373824253736, |
| "learning_rate": 4.22508422471118e-06, |
| "loss": 0.4173991084098816, |
| "step": 703, |
| "token_acc": 0.8616719451442656 |
| }, |
| { |
| "epoch": 2.2568218298555376, |
| "grad_norm": 0.17014621877440333, |
| "learning_rate": 4.2179729297896215e-06, |
| "loss": 0.390655517578125, |
| "step": 704, |
| "token_acc": 0.8682284140866495 |
| }, |
| { |
| "epoch": 2.260032102728732, |
| "grad_norm": 0.2702584195216989, |
| "learning_rate": 4.210858539375225e-06, |
| "loss": 0.46490478515625, |
| "step": 705, |
| "token_acc": 0.8425552069903721 |
| }, |
| { |
| "epoch": 2.263242375601926, |
| "grad_norm": 0.18780220901872502, |
| "learning_rate": 4.203741084141217e-06, |
| "loss": 0.4297078549861908, |
| "step": 706, |
| "token_acc": 0.855920493511375 |
| }, |
| { |
| "epoch": 2.2664526484751204, |
| "grad_norm": 0.19912047965979465, |
| "learning_rate": 4.196620594774033e-06, |
| "loss": 0.4477742612361908, |
| "step": 707, |
| "token_acc": 0.8516944724579766 |
| }, |
| { |
| "epoch": 2.2696629213483144, |
| "grad_norm": 0.18743590774241778, |
| "learning_rate": 4.189497101973194e-06, |
| "loss": 0.4573567807674408, |
| "step": 708, |
| "token_acc": 0.8472192071315597 |
| }, |
| { |
| "epoch": 2.272873194221509, |
| "grad_norm": 0.196464694698763, |
| "learning_rate": 4.182370636451168e-06, |
| "loss": 0.470947265625, |
| "step": 709, |
| "token_acc": 0.8416422255356567 |
| }, |
| { |
| "epoch": 2.276083467094703, |
| "grad_norm": 0.2003085439880675, |
| "learning_rate": 4.175241228933239e-06, |
| "loss": 0.4788818359375, |
| "step": 710, |
| "token_acc": 0.840434185236257 |
| }, |
| { |
| "epoch": 2.279293739967897, |
| "grad_norm": 0.2707263062539891, |
| "learning_rate": 4.168108910157378e-06, |
| "loss": 0.4853515625, |
| "step": 711, |
| "token_acc": 0.8384725258346581 |
| }, |
| { |
| "epoch": 2.2825040128410916, |
| "grad_norm": 0.20944891376742802, |
| "learning_rate": 4.160973710874105e-06, |
| "loss": 0.4217529296875, |
| "step": 712, |
| "token_acc": 0.8596543309402678 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 0.21908698982662267, |
| "learning_rate": 4.153835661846362e-06, |
| "loss": 0.4789225459098816, |
| "step": 713, |
| "token_acc": 0.8408887490165224 |
| }, |
| { |
| "epoch": 2.28892455858748, |
| "grad_norm": 0.20629193977552543, |
| "learning_rate": 4.146694793849371e-06, |
| "loss": 0.4779866635799408, |
| "step": 714, |
| "token_acc": 0.8418222501838504 |
| }, |
| { |
| "epoch": 2.292134831460674, |
| "grad_norm": 0.3562937509884993, |
| "learning_rate": 4.139551137670518e-06, |
| "loss": 0.40576171875, |
| "step": 715, |
| "token_acc": 0.8659504454433432 |
| }, |
| { |
| "epoch": 2.2953451043338684, |
| "grad_norm": 0.20903340846333765, |
| "learning_rate": 4.132404724109203e-06, |
| "loss": 0.425048828125, |
| "step": 716, |
| "token_acc": 0.8585834387938246 |
| }, |
| { |
| "epoch": 2.2985553772070624, |
| "grad_norm": 0.20841632184926223, |
| "learning_rate": 4.125255583976713e-06, |
| "loss": 0.489990234375, |
| "step": 717, |
| "token_acc": 0.837642594100864 |
| }, |
| { |
| "epoch": 2.301765650080257, |
| "grad_norm": 0.19144223083979675, |
| "learning_rate": 4.118103748096096e-06, |
| "loss": 0.4186604917049408, |
| "step": 718, |
| "token_acc": 0.861624575018568 |
| }, |
| { |
| "epoch": 2.304975922953451, |
| "grad_norm": 0.18462822644417673, |
| "learning_rate": 4.110949247302018e-06, |
| "loss": 0.4283040463924408, |
| "step": 719, |
| "token_acc": 0.8584933901493453 |
| }, |
| { |
| "epoch": 2.308186195826645, |
| "grad_norm": 0.23122194639269375, |
| "learning_rate": 4.103792112440638e-06, |
| "loss": 0.4649251401424408, |
| "step": 720, |
| "token_acc": 0.8475111352464667 |
| }, |
| { |
| "epoch": 2.3113964686998396, |
| "grad_norm": 0.20138008057163814, |
| "learning_rate": 4.096632374369469e-06, |
| "loss": 0.471435546875, |
| "step": 721, |
| "token_acc": 0.8418223324087484 |
| }, |
| { |
| "epoch": 2.3146067415730336, |
| "grad_norm": 0.18998635568779773, |
| "learning_rate": 4.089470063957249e-06, |
| "loss": 0.4586588740348816, |
| "step": 722, |
| "token_acc": 0.8465928619671051 |
| }, |
| { |
| "epoch": 2.317817014446228, |
| "grad_norm": 0.20400593017285748, |
| "learning_rate": 4.082305212083804e-06, |
| "loss": 0.4299723505973816, |
| "step": 723, |
| "token_acc": 0.8568945784409309 |
| }, |
| { |
| "epoch": 2.321027287319422, |
| "grad_norm": 0.20762724598838142, |
| "learning_rate": 4.075137849639922e-06, |
| "loss": 0.4675700068473816, |
| "step": 724, |
| "token_acc": 0.8452025635686357 |
| }, |
| { |
| "epoch": 2.3242375601926164, |
| "grad_norm": 0.1880619188409803, |
| "learning_rate": 4.0679680075272115e-06, |
| "loss": 0.456787109375, |
| "step": 725, |
| "token_acc": 0.8487387689720907 |
| }, |
| { |
| "epoch": 2.3274478330658104, |
| "grad_norm": 0.19835165112775277, |
| "learning_rate": 4.060795716657973e-06, |
| "loss": 0.4452311396598816, |
| "step": 726, |
| "token_acc": 0.8512359014143344 |
| }, |
| { |
| "epoch": 2.330658105939005, |
| "grad_norm": 0.2121134079480636, |
| "learning_rate": 4.053621007955064e-06, |
| "loss": 0.4849446713924408, |
| "step": 727, |
| "token_acc": 0.8393768648219405 |
| }, |
| { |
| "epoch": 2.333868378812199, |
| "grad_norm": 0.22909333366098328, |
| "learning_rate": 4.046443912351768e-06, |
| "loss": 0.4340413510799408, |
| "step": 728, |
| "token_acc": 0.8557074860326944 |
| }, |
| { |
| "epoch": 2.337078651685393, |
| "grad_norm": 0.2203957460996992, |
| "learning_rate": 4.039264460791657e-06, |
| "loss": 0.4347737729549408, |
| "step": 729, |
| "token_acc": 0.8545071956839781 |
| }, |
| { |
| "epoch": 2.3402889245585876, |
| "grad_norm": 0.17220469104421543, |
| "learning_rate": 4.032082684228464e-06, |
| "loss": 0.458251953125, |
| "step": 730, |
| "token_acc": 0.8449290110193061 |
| }, |
| { |
| "epoch": 2.3434991974317816, |
| "grad_norm": 0.19203683386704218, |
| "learning_rate": 4.0248986136259406e-06, |
| "loss": 0.4306233823299408, |
| "step": 731, |
| "token_acc": 0.856137690285591 |
| }, |
| { |
| "epoch": 2.346709470304976, |
| "grad_norm": 0.2031216819897101, |
| "learning_rate": 4.017712279957736e-06, |
| "loss": 0.442138671875, |
| "step": 732, |
| "token_acc": 0.8552035462444415 |
| }, |
| { |
| "epoch": 2.34991974317817, |
| "grad_norm": 0.19608268201438736, |
| "learning_rate": 4.01052371420725e-06, |
| "loss": 0.4835408627986908, |
| "step": 733, |
| "token_acc": 0.8386815758131784 |
| }, |
| { |
| "epoch": 2.3531300160513644, |
| "grad_norm": 0.21101181910808853, |
| "learning_rate": 4.003332947367512e-06, |
| "loss": 0.4925944209098816, |
| "step": 734, |
| "token_acc": 0.8369294385129835 |
| }, |
| { |
| "epoch": 2.3563402889245584, |
| "grad_norm": 0.20991820512037176, |
| "learning_rate": 3.996140010441033e-06, |
| "loss": 0.4532063901424408, |
| "step": 735, |
| "token_acc": 0.849036176457023 |
| }, |
| { |
| "epoch": 2.359550561797753, |
| "grad_norm": 0.18535631476063846, |
| "learning_rate": 3.988944934439692e-06, |
| "loss": 0.501953125, |
| "step": 736, |
| "token_acc": 0.8317749768371802 |
| }, |
| { |
| "epoch": 2.362760834670947, |
| "grad_norm": 0.18506516727500621, |
| "learning_rate": 3.981747750384578e-06, |
| "loss": 0.4610188901424408, |
| "step": 737, |
| "token_acc": 0.845921960373089 |
| }, |
| { |
| "epoch": 2.365971107544141, |
| "grad_norm": 0.208056339714116, |
| "learning_rate": 3.974548489305876e-06, |
| "loss": 0.4741618037223816, |
| "step": 738, |
| "token_acc": 0.8420869682092947 |
| }, |
| { |
| "epoch": 2.3691813804173356, |
| "grad_norm": 0.20281908769650198, |
| "learning_rate": 3.9673471822427244e-06, |
| "loss": 0.4227702021598816, |
| "step": 739, |
| "token_acc": 0.8587991226400339 |
| }, |
| { |
| "epoch": 2.3723916532905296, |
| "grad_norm": 0.1996124616676339, |
| "learning_rate": 3.960143860243085e-06, |
| "loss": 0.4525553584098816, |
| "step": 740, |
| "token_acc": 0.848268109039626 |
| }, |
| { |
| "epoch": 2.375601926163724, |
| "grad_norm": 0.20996239997114738, |
| "learning_rate": 3.952938554363601e-06, |
| "loss": 0.4862060546875, |
| "step": 741, |
| "token_acc": 0.8388721388484253 |
| }, |
| { |
| "epoch": 2.378812199036918, |
| "grad_norm": 0.2013275896535193, |
| "learning_rate": 3.9457312956694736e-06, |
| "loss": 0.4324544370174408, |
| "step": 742, |
| "token_acc": 0.855948261073139 |
| }, |
| { |
| "epoch": 2.3820224719101124, |
| "grad_norm": 0.20345530910490214, |
| "learning_rate": 3.938522115234324e-06, |
| "loss": 0.486328125, |
| "step": 743, |
| "token_acc": 0.8371477100211938 |
| }, |
| { |
| "epoch": 2.3852327447833064, |
| "grad_norm": 0.20546084474227075, |
| "learning_rate": 3.931311044140055e-06, |
| "loss": 0.4718017578125, |
| "step": 744, |
| "token_acc": 0.8443781924795707 |
| }, |
| { |
| "epoch": 2.388443017656501, |
| "grad_norm": 0.21296384006021174, |
| "learning_rate": 3.924098113476726e-06, |
| "loss": 0.4545084834098816, |
| "step": 745, |
| "token_acc": 0.8494246157263653 |
| }, |
| { |
| "epoch": 2.391653290529695, |
| "grad_norm": 0.2221018040614752, |
| "learning_rate": 3.916883354342406e-06, |
| "loss": 0.5028483271598816, |
| "step": 746, |
| "token_acc": 0.8337560616098695 |
| }, |
| { |
| "epoch": 2.394863563402889, |
| "grad_norm": 0.3265950556384243, |
| "learning_rate": 3.9096667978430576e-06, |
| "loss": 0.4358724057674408, |
| "step": 747, |
| "token_acc": 0.8543744663599452 |
| }, |
| { |
| "epoch": 2.3980738362760836, |
| "grad_norm": 0.20055502563884153, |
| "learning_rate": 3.902448475092382e-06, |
| "loss": 0.4836018979549408, |
| "step": 748, |
| "token_acc": 0.8387875378488737 |
| }, |
| { |
| "epoch": 2.4012841091492776, |
| "grad_norm": 0.17420122955205622, |
| "learning_rate": 3.895228417211706e-06, |
| "loss": 0.420654296875, |
| "step": 749, |
| "token_acc": 0.8596989345596846 |
| }, |
| { |
| "epoch": 2.404494382022472, |
| "grad_norm": 0.19038474616744408, |
| "learning_rate": 3.888006655329828e-06, |
| "loss": 0.4512125849723816, |
| "step": 750, |
| "token_acc": 0.8501353108361196 |
| }, |
| { |
| "epoch": 2.407704654895666, |
| "grad_norm": 0.1809603835495357, |
| "learning_rate": 3.880783220582899e-06, |
| "loss": 0.40673828125, |
| "step": 751, |
| "token_acc": 0.865091380129019 |
| }, |
| { |
| "epoch": 2.4109149277688604, |
| "grad_norm": 0.19655293867885018, |
| "learning_rate": 3.87355814411428e-06, |
| "loss": 0.4643961787223816, |
| "step": 752, |
| "token_acc": 0.8461353315018977 |
| }, |
| { |
| "epoch": 2.4141252006420544, |
| "grad_norm": 0.21727278049897028, |
| "learning_rate": 3.86633145707441e-06, |
| "loss": 0.4464518427848816, |
| "step": 753, |
| "token_acc": 0.8512993522371843 |
| }, |
| { |
| "epoch": 2.417335473515249, |
| "grad_norm": 0.19141762810827992, |
| "learning_rate": 3.8591031906206735e-06, |
| "loss": 0.4381917417049408, |
| "step": 754, |
| "token_acc": 0.8541728933456112 |
| }, |
| { |
| "epoch": 2.420545746388443, |
| "grad_norm": 0.1989953727497561, |
| "learning_rate": 3.851873375917263e-06, |
| "loss": 0.4230143427848816, |
| "step": 755, |
| "token_acc": 0.8586672776916691 |
| }, |
| { |
| "epoch": 2.423756019261637, |
| "grad_norm": 0.23181582051230495, |
| "learning_rate": 3.8446420441350484e-06, |
| "loss": 0.4677327573299408, |
| "step": 756, |
| "token_acc": 0.8457994937242906 |
| }, |
| { |
| "epoch": 2.4269662921348316, |
| "grad_norm": 0.23951874435691276, |
| "learning_rate": 3.837409226451436e-06, |
| "loss": 0.4405517578125, |
| "step": 757, |
| "token_acc": 0.854444105779012 |
| }, |
| { |
| "epoch": 2.4301765650080256, |
| "grad_norm": 0.2528461108687528, |
| "learning_rate": 3.830174954050243e-06, |
| "loss": 0.460693359375, |
| "step": 758, |
| "token_acc": 0.8479104216428002 |
| }, |
| { |
| "epoch": 2.43338683788122, |
| "grad_norm": 0.1975669389803627, |
| "learning_rate": 3.822939258121557e-06, |
| "loss": 0.4350179135799408, |
| "step": 759, |
| "token_acc": 0.8552898624331037 |
| }, |
| { |
| "epoch": 2.436597110754414, |
| "grad_norm": 0.19115008055065782, |
| "learning_rate": 3.815702169861602e-06, |
| "loss": 0.436279296875, |
| "step": 760, |
| "token_acc": 0.854701305830316 |
| }, |
| { |
| "epoch": 2.4398073836276084, |
| "grad_norm": 0.19506918799904394, |
| "learning_rate": 3.808463720472607e-06, |
| "loss": 0.4281005859375, |
| "step": 761, |
| "token_acc": 0.8570146038416017 |
| }, |
| { |
| "epoch": 2.4430176565008024, |
| "grad_norm": 0.19842708334907894, |
| "learning_rate": 3.8012239411626655e-06, |
| "loss": 0.4752604365348816, |
| "step": 762, |
| "token_acc": 0.8403435924721215 |
| }, |
| { |
| "epoch": 2.446227929373997, |
| "grad_norm": 0.24844771648018837, |
| "learning_rate": 3.79398286314561e-06, |
| "loss": 0.4596354365348816, |
| "step": 763, |
| "token_acc": 0.8490057426109398 |
| }, |
| { |
| "epoch": 2.449438202247191, |
| "grad_norm": 0.19715040303854942, |
| "learning_rate": 3.7867405176408694e-06, |
| "loss": 0.4914957880973816, |
| "step": 764, |
| "token_acc": 0.8356615134323727 |
| }, |
| { |
| "epoch": 2.452648475120385, |
| "grad_norm": 0.2202478281163918, |
| "learning_rate": 3.7794969358733367e-06, |
| "loss": 0.4671224057674408, |
| "step": 765, |
| "token_acc": 0.8444183398261912 |
| }, |
| { |
| "epoch": 2.4558587479935796, |
| "grad_norm": 0.20459951635979012, |
| "learning_rate": 3.772252149073237e-06, |
| "loss": 0.4458821713924408, |
| "step": 766, |
| "token_acc": 0.8511224546199564 |
| }, |
| { |
| "epoch": 2.4590690208667736, |
| "grad_norm": 0.22450217665761618, |
| "learning_rate": 3.765006188475989e-06, |
| "loss": 0.4777018427848816, |
| "step": 767, |
| "token_acc": 0.8393996073719856 |
| }, |
| { |
| "epoch": 2.462279293739968, |
| "grad_norm": 0.19987629475264548, |
| "learning_rate": 3.7577590853220737e-06, |
| "loss": 0.463134765625, |
| "step": 768, |
| "token_acc": 0.8434089581091296 |
| }, |
| { |
| "epoch": 2.465489566613162, |
| "grad_norm": 0.20397409384425116, |
| "learning_rate": 3.7505108708568964e-06, |
| "loss": 0.438232421875, |
| "step": 769, |
| "token_acc": 0.8527221354299462 |
| }, |
| { |
| "epoch": 2.4686998394863564, |
| "grad_norm": 0.18616229570938478, |
| "learning_rate": 3.7432615763306564e-06, |
| "loss": 0.4168294370174408, |
| "step": 770, |
| "token_acc": 0.8615336290326948 |
| }, |
| { |
| "epoch": 2.4719101123595504, |
| "grad_norm": 0.19419952375362187, |
| "learning_rate": 3.736011232998206e-06, |
| "loss": 0.4766438901424408, |
| "step": 771, |
| "token_acc": 0.8408157756094587 |
| }, |
| { |
| "epoch": 2.475120385232745, |
| "grad_norm": 0.22402713438110902, |
| "learning_rate": 3.7287598721189225e-06, |
| "loss": 0.4800211787223816, |
| "step": 772, |
| "token_acc": 0.8409965346527944 |
| }, |
| { |
| "epoch": 2.478330658105939, |
| "grad_norm": 0.218272784858108, |
| "learning_rate": 3.721507524956569e-06, |
| "loss": 0.4397786557674408, |
| "step": 773, |
| "token_acc": 0.8515345114181647 |
| }, |
| { |
| "epoch": 2.481540930979133, |
| "grad_norm": 0.20590338299311356, |
| "learning_rate": 3.7142542227791597e-06, |
| "loss": 0.4883626401424408, |
| "step": 774, |
| "token_acc": 0.839136971273008 |
| }, |
| { |
| "epoch": 2.4847512038523276, |
| "grad_norm": 0.19709327674881771, |
| "learning_rate": 3.7069999968588315e-06, |
| "loss": 0.4468587338924408, |
| "step": 775, |
| "token_acc": 0.850644285251288 |
| }, |
| { |
| "epoch": 2.4879614767255216, |
| "grad_norm": 0.22022884628507708, |
| "learning_rate": 3.6997448784716943e-06, |
| "loss": 0.4592692255973816, |
| "step": 776, |
| "token_acc": 0.8474834727817374 |
| }, |
| { |
| "epoch": 2.491171749598716, |
| "grad_norm": 0.19724246193795616, |
| "learning_rate": 3.692488898897716e-06, |
| "loss": 0.4389241635799408, |
| "step": 777, |
| "token_acc": 0.8543228524690571 |
| }, |
| { |
| "epoch": 2.49438202247191, |
| "grad_norm": 0.1985194744702316, |
| "learning_rate": 3.6852320894205706e-06, |
| "loss": 0.4640299677848816, |
| "step": 778, |
| "token_acc": 0.8440977155286219 |
| }, |
| { |
| "epoch": 2.4975922953451044, |
| "grad_norm": 0.19701491228097626, |
| "learning_rate": 3.6779744813275153e-06, |
| "loss": 0.4671224057674408, |
| "step": 779, |
| "token_acc": 0.8440456139508346 |
| }, |
| { |
| "epoch": 2.5008025682182984, |
| "grad_norm": 0.18974099331427471, |
| "learning_rate": 3.670716105909243e-06, |
| "loss": 0.44677734375, |
| "step": 780, |
| "token_acc": 0.8511272770027192 |
| }, |
| { |
| "epoch": 2.504012841091493, |
| "grad_norm": 0.19515822626448606, |
| "learning_rate": 3.6634569944597646e-06, |
| "loss": 0.464599609375, |
| "step": 781, |
| "token_acc": 0.8460107523664183 |
| }, |
| { |
| "epoch": 2.5072231139646872, |
| "grad_norm": 0.1963284075241601, |
| "learning_rate": 3.656197178276256e-06, |
| "loss": 0.4373372495174408, |
| "step": 782, |
| "token_acc": 0.8526304538973646 |
| }, |
| { |
| "epoch": 2.510433386837881, |
| "grad_norm": 0.20458475147133778, |
| "learning_rate": 3.648936688658937e-06, |
| "loss": 0.48388671875, |
| "step": 783, |
| "token_acc": 0.8395201014301259 |
| }, |
| { |
| "epoch": 2.513643659711075, |
| "grad_norm": 0.1875839660932636, |
| "learning_rate": 3.641675556910928e-06, |
| "loss": 0.4583333432674408, |
| "step": 784, |
| "token_acc": 0.8477605477347625 |
| }, |
| { |
| "epoch": 2.5168539325842696, |
| "grad_norm": 0.20861108437682185, |
| "learning_rate": 3.634413814338117e-06, |
| "loss": 0.4627278745174408, |
| "step": 785, |
| "token_acc": 0.8475534760608696 |
| }, |
| { |
| "epoch": 2.520064205457464, |
| "grad_norm": 0.18447292570032636, |
| "learning_rate": 3.6271514922490315e-06, |
| "loss": 0.4423828125, |
| "step": 786, |
| "token_acc": 0.8558945701293097 |
| }, |
| { |
| "epoch": 2.523274478330658, |
| "grad_norm": 0.183868580124139, |
| "learning_rate": 3.619888621954688e-06, |
| "loss": 0.484619140625, |
| "step": 787, |
| "token_acc": 0.8412224592966723 |
| }, |
| { |
| "epoch": 2.5264847512038524, |
| "grad_norm": 0.2087754611682151, |
| "learning_rate": 3.612625234768476e-06, |
| "loss": 0.4520670771598816, |
| "step": 788, |
| "token_acc": 0.8494552689221115 |
| }, |
| { |
| "epoch": 2.5296950240770464, |
| "grad_norm": 0.1737629291121771, |
| "learning_rate": 3.6053613620060055e-06, |
| "loss": 0.4601237177848816, |
| "step": 789, |
| "token_acc": 0.8457257104020746 |
| }, |
| { |
| "epoch": 2.532905296950241, |
| "grad_norm": 0.17419155995660712, |
| "learning_rate": 3.5980970349849883e-06, |
| "loss": 0.404541015625, |
| "step": 790, |
| "token_acc": 0.8639127816961982 |
| }, |
| { |
| "epoch": 2.5361155698234352, |
| "grad_norm": 0.19772364419461955, |
| "learning_rate": 3.590832285025086e-06, |
| "loss": 0.45989990234375, |
| "step": 791, |
| "token_acc": 0.8458237277279737 |
| }, |
| { |
| "epoch": 2.539325842696629, |
| "grad_norm": 0.22027306794790522, |
| "learning_rate": 3.58356714344779e-06, |
| "loss": 0.437255859375, |
| "step": 792, |
| "token_acc": 0.8555278541953233 |
| }, |
| { |
| "epoch": 2.542536115569823, |
| "grad_norm": 0.3041945916167895, |
| "learning_rate": 3.576301641576279e-06, |
| "loss": 0.460693359375, |
| "step": 793, |
| "token_acc": 0.8462330337091692 |
| }, |
| { |
| "epoch": 2.5457463884430176, |
| "grad_norm": 0.17833110360469118, |
| "learning_rate": 3.5690358107352828e-06, |
| "loss": 0.4147135615348816, |
| "step": 794, |
| "token_acc": 0.8615169139768822 |
| }, |
| { |
| "epoch": 2.548956661316212, |
| "grad_norm": 0.18640330833188645, |
| "learning_rate": 3.5617696822509507e-06, |
| "loss": 0.4475911557674408, |
| "step": 795, |
| "token_acc": 0.8501959436814666 |
| }, |
| { |
| "epoch": 2.552166934189406, |
| "grad_norm": 0.1998174527481229, |
| "learning_rate": 3.5545032874507157e-06, |
| "loss": 0.4499104917049408, |
| "step": 796, |
| "token_acc": 0.8487911479756449 |
| }, |
| { |
| "epoch": 2.5553772070626004, |
| "grad_norm": 0.20594816291408258, |
| "learning_rate": 3.5472366576631594e-06, |
| "loss": 0.4593912959098816, |
| "step": 797, |
| "token_acc": 0.8464348456884555 |
| }, |
| { |
| "epoch": 2.5585874799357944, |
| "grad_norm": 0.18735094753253273, |
| "learning_rate": 3.539969824217874e-06, |
| "loss": 0.4056803584098816, |
| "step": 798, |
| "token_acc": 0.8645695718537573 |
| }, |
| { |
| "epoch": 2.561797752808989, |
| "grad_norm": 0.2318710491909044, |
| "learning_rate": 3.5327028184453347e-06, |
| "loss": 0.4589945673942566, |
| "step": 799, |
| "token_acc": 0.8473718998422008 |
| }, |
| { |
| "epoch": 2.5650080256821832, |
| "grad_norm": 0.20321706431944855, |
| "learning_rate": 3.525435671676754e-06, |
| "loss": 0.43487548828125, |
| "step": 800, |
| "token_acc": 0.8548923961390004 |
| }, |
| { |
| "epoch": 2.568218298555377, |
| "grad_norm": 0.19226811243347322, |
| "learning_rate": 3.518168415243957e-06, |
| "loss": 0.4361775815486908, |
| "step": 801, |
| "token_acc": 0.8546455004782437 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 0.20749835339845354, |
| "learning_rate": 3.510901080479237e-06, |
| "loss": 0.4540202021598816, |
| "step": 802, |
| "token_acc": 0.8498480465746403 |
| }, |
| { |
| "epoch": 2.5746388443017656, |
| "grad_norm": 0.19982609438065352, |
| "learning_rate": 3.5036336987152294e-06, |
| "loss": 0.4513753354549408, |
| "step": 803, |
| "token_acc": 0.8490265800398946 |
| }, |
| { |
| "epoch": 2.57784911717496, |
| "grad_norm": 0.1904092351412821, |
| "learning_rate": 3.4963663012847697e-06, |
| "loss": 0.3978678584098816, |
| "step": 804, |
| "token_acc": 0.8680137044910768 |
| }, |
| { |
| "epoch": 2.581059390048154, |
| "grad_norm": 0.21102941528050104, |
| "learning_rate": 3.4890989195207632e-06, |
| "loss": 0.4449462890625, |
| "step": 805, |
| "token_acc": 0.8526128110191377 |
| }, |
| { |
| "epoch": 2.5842696629213484, |
| "grad_norm": 0.1838912778194567, |
| "learning_rate": 3.481831584756044e-06, |
| "loss": 0.4648641049861908, |
| "step": 806, |
| "token_acc": 0.8464704322688614 |
| }, |
| { |
| "epoch": 2.5874799357945424, |
| "grad_norm": 0.19920244205940324, |
| "learning_rate": 3.4745643283232463e-06, |
| "loss": 0.4841105341911316, |
| "step": 807, |
| "token_acc": 0.8388059850648198 |
| }, |
| { |
| "epoch": 2.590690208667737, |
| "grad_norm": 0.20205278122271417, |
| "learning_rate": 3.467297181554665e-06, |
| "loss": 0.4539388120174408, |
| "step": 808, |
| "token_acc": 0.850211181323957 |
| }, |
| { |
| "epoch": 2.5939004815409312, |
| "grad_norm": 0.2255035918908677, |
| "learning_rate": 3.4600301757821263e-06, |
| "loss": 0.4786784052848816, |
| "step": 809, |
| "token_acc": 0.8422286899694498 |
| }, |
| { |
| "epoch": 2.597110754414125, |
| "grad_norm": 0.20465355855215184, |
| "learning_rate": 3.452763342336842e-06, |
| "loss": 0.445556640625, |
| "step": 810, |
| "token_acc": 0.8522404470057361 |
| }, |
| { |
| "epoch": 2.600321027287319, |
| "grad_norm": 0.19109348036684573, |
| "learning_rate": 3.4454967125492846e-06, |
| "loss": 0.4539388120174408, |
| "step": 811, |
| "token_acc": 0.8483287395177225 |
| }, |
| { |
| "epoch": 2.6035313001605136, |
| "grad_norm": 0.23276755370704322, |
| "learning_rate": 3.4382303177490496e-06, |
| "loss": 0.4812825620174408, |
| "step": 812, |
| "token_acc": 0.840984396834211 |
| }, |
| { |
| "epoch": 2.606741573033708, |
| "grad_norm": 0.2098915307857685, |
| "learning_rate": 3.430964189264718e-06, |
| "loss": 0.4757487177848816, |
| "step": 813, |
| "token_acc": 0.8404143981582304 |
| }, |
| { |
| "epoch": 2.609951845906902, |
| "grad_norm": 0.19399775263086028, |
| "learning_rate": 3.423698358423722e-06, |
| "loss": 0.4791666865348816, |
| "step": 814, |
| "token_acc": 0.840336714916079 |
| }, |
| { |
| "epoch": 2.6131621187800964, |
| "grad_norm": 0.19057683090155922, |
| "learning_rate": 3.4164328565522094e-06, |
| "loss": 0.44775390625, |
| "step": 815, |
| "token_acc": 0.8495610915312901 |
| }, |
| { |
| "epoch": 2.6163723916532904, |
| "grad_norm": 0.2334463421118229, |
| "learning_rate": 3.409167714974914e-06, |
| "loss": 0.46875, |
| "step": 816, |
| "token_acc": 0.8432766126209109 |
| }, |
| { |
| "epoch": 2.619582664526485, |
| "grad_norm": 0.19364829944623, |
| "learning_rate": 3.401902965015013e-06, |
| "loss": 0.4464518427848816, |
| "step": 817, |
| "token_acc": 0.8517511283500472 |
| }, |
| { |
| "epoch": 2.6227929373996792, |
| "grad_norm": 0.20261948803065144, |
| "learning_rate": 3.394638637993994e-06, |
| "loss": 0.4549967646598816, |
| "step": 818, |
| "token_acc": 0.8495956798893742 |
| }, |
| { |
| "epoch": 2.626003210272873, |
| "grad_norm": 0.20631452948450796, |
| "learning_rate": 3.3873747652315244e-06, |
| "loss": 0.4468994140625, |
| "step": 819, |
| "token_acc": 0.851270244733375 |
| }, |
| { |
| "epoch": 2.629213483146067, |
| "grad_norm": 0.20133481061321648, |
| "learning_rate": 3.3801113780453125e-06, |
| "loss": 0.4449869990348816, |
| "step": 820, |
| "token_acc": 0.8522231643786884 |
| }, |
| { |
| "epoch": 2.6324237560192616, |
| "grad_norm": 0.1962973708838001, |
| "learning_rate": 3.3728485077509697e-06, |
| "loss": 0.4533284604549408, |
| "step": 821, |
| "token_acc": 0.8500484370255164 |
| }, |
| { |
| "epoch": 2.635634028892456, |
| "grad_norm": 0.17071948339566242, |
| "learning_rate": 3.3655861856618823e-06, |
| "loss": 0.3774007260799408, |
| "step": 822, |
| "token_acc": 0.8739059570666708 |
| }, |
| { |
| "epoch": 2.63884430176565, |
| "grad_norm": 0.19106041053421854, |
| "learning_rate": 3.3583244430890726e-06, |
| "loss": 0.4742838740348816, |
| "step": 823, |
| "token_acc": 0.8406814502542903 |
| }, |
| { |
| "epoch": 2.6420545746388444, |
| "grad_norm": 0.20086818900021164, |
| "learning_rate": 3.3510633113410633e-06, |
| "loss": 0.4076741635799408, |
| "step": 824, |
| "token_acc": 0.8653939506998632 |
| }, |
| { |
| "epoch": 2.6452648475120384, |
| "grad_norm": 0.16920033356636768, |
| "learning_rate": 3.343802821723743e-06, |
| "loss": 0.4669596552848816, |
| "step": 825, |
| "token_acc": 0.8433515081125119 |
| }, |
| { |
| "epoch": 2.648475120385233, |
| "grad_norm": 0.1995228870044419, |
| "learning_rate": 3.3365430055402357e-06, |
| "loss": 0.4744466245174408, |
| "step": 826, |
| "token_acc": 0.842604107498794 |
| }, |
| { |
| "epoch": 2.6516853932584272, |
| "grad_norm": 0.2049747416218833, |
| "learning_rate": 3.329283894090757e-06, |
| "loss": 0.4518229365348816, |
| "step": 827, |
| "token_acc": 0.849603820827889 |
| }, |
| { |
| "epoch": 2.654895666131621, |
| "grad_norm": 0.1956104974467398, |
| "learning_rate": 3.3220255186724863e-06, |
| "loss": 0.4283854365348816, |
| "step": 828, |
| "token_acc": 0.8578459150560697 |
| }, |
| { |
| "epoch": 2.658105939004815, |
| "grad_norm": 0.19695478096696453, |
| "learning_rate": 3.314767910579429e-06, |
| "loss": 0.4253743588924408, |
| "step": 829, |
| "token_acc": 0.8580703400546449 |
| }, |
| { |
| "epoch": 2.6613162118780096, |
| "grad_norm": 0.20303513871125345, |
| "learning_rate": 3.307511101102284e-06, |
| "loss": 0.4791666865348816, |
| "step": 830, |
| "token_acc": 0.8423049552177565 |
| }, |
| { |
| "epoch": 2.664526484751204, |
| "grad_norm": 0.19433971541630624, |
| "learning_rate": 3.3002551215283064e-06, |
| "loss": 0.4288330078125, |
| "step": 831, |
| "token_acc": 0.8565091912178404 |
| }, |
| { |
| "epoch": 2.667736757624398, |
| "grad_norm": 0.18358183780570975, |
| "learning_rate": 3.29300000314117e-06, |
| "loss": 0.457763671875, |
| "step": 832, |
| "token_acc": 0.8482874371407758 |
| }, |
| { |
| "epoch": 2.6709470304975924, |
| "grad_norm": 0.19470243762283243, |
| "learning_rate": 3.2857457772208398e-06, |
| "loss": 0.4110514521598816, |
| "step": 833, |
| "token_acc": 0.8618599939575965 |
| }, |
| { |
| "epoch": 2.6741573033707864, |
| "grad_norm": 0.18791430734811737, |
| "learning_rate": 3.278492475043431e-06, |
| "loss": 0.4446004331111908, |
| "step": 834, |
| "token_acc": 0.8527573789212444 |
| }, |
| { |
| "epoch": 2.677367576243981, |
| "grad_norm": 0.23214117649891247, |
| "learning_rate": 3.2712401278810783e-06, |
| "loss": 0.4806315302848816, |
| "step": 835, |
| "token_acc": 0.8403051280497658 |
| }, |
| { |
| "epoch": 2.6805778491171752, |
| "grad_norm": 0.18133293043714516, |
| "learning_rate": 3.2639887670017936e-06, |
| "loss": 0.476318359375, |
| "step": 836, |
| "token_acc": 0.8414659617161572 |
| }, |
| { |
| "epoch": 2.683788121990369, |
| "grad_norm": 0.18789082052914685, |
| "learning_rate": 3.2567384236693443e-06, |
| "loss": 0.458740234375, |
| "step": 837, |
| "token_acc": 0.8453170581614093 |
| }, |
| { |
| "epoch": 2.686998394863563, |
| "grad_norm": 0.19806392638633866, |
| "learning_rate": 3.249489129143104e-06, |
| "loss": 0.4776204526424408, |
| "step": 838, |
| "token_acc": 0.841645773855243 |
| }, |
| { |
| "epoch": 2.6902086677367576, |
| "grad_norm": 0.26211907964329023, |
| "learning_rate": 3.242240914677927e-06, |
| "loss": 0.4622395932674408, |
| "step": 839, |
| "token_acc": 0.8466941454653522 |
| }, |
| { |
| "epoch": 2.693418940609952, |
| "grad_norm": 0.2016656550776637, |
| "learning_rate": 3.234993811524011e-06, |
| "loss": 0.4531657099723816, |
| "step": 840, |
| "token_acc": 0.851422100408659 |
| }, |
| { |
| "epoch": 2.696629213483146, |
| "grad_norm": 0.19652448445502282, |
| "learning_rate": 3.227747850926763e-06, |
| "loss": 0.4515380859375, |
| "step": 841, |
| "token_acc": 0.8508233163907988 |
| }, |
| { |
| "epoch": 2.6998394863563404, |
| "grad_norm": 0.19941596229783648, |
| "learning_rate": 3.2205030641266645e-06, |
| "loss": 0.4519856870174408, |
| "step": 842, |
| "token_acc": 0.8473833129538995 |
| }, |
| { |
| "epoch": 2.7030497592295344, |
| "grad_norm": 0.22351326000827032, |
| "learning_rate": 3.213259482359131e-06, |
| "loss": 0.4437662959098816, |
| "step": 843, |
| "token_acc": 0.8541969557985216 |
| }, |
| { |
| "epoch": 2.706260032102729, |
| "grad_norm": 0.19902224213641, |
| "learning_rate": 3.20601713685439e-06, |
| "loss": 0.4616495966911316, |
| "step": 844, |
| "token_acc": 0.8445843509396473 |
| }, |
| { |
| "epoch": 2.7094703049759232, |
| "grad_norm": 0.18117357208751828, |
| "learning_rate": 3.198776058837335e-06, |
| "loss": 0.4361979365348816, |
| "step": 845, |
| "token_acc": 0.8555711350441413 |
| }, |
| { |
| "epoch": 2.712680577849117, |
| "grad_norm": 0.18226417638252332, |
| "learning_rate": 3.1915362795273947e-06, |
| "loss": 0.4409586787223816, |
| "step": 846, |
| "token_acc": 0.8520998316484529 |
| }, |
| { |
| "epoch": 2.715890850722311, |
| "grad_norm": 0.20695418343201752, |
| "learning_rate": 3.1842978301383973e-06, |
| "loss": 0.4451497495174408, |
| "step": 847, |
| "token_acc": 0.8534579934212124 |
| }, |
| { |
| "epoch": 2.7191011235955056, |
| "grad_norm": 0.20765131898028952, |
| "learning_rate": 3.1770607418784433e-06, |
| "loss": 0.4486897885799408, |
| "step": 848, |
| "token_acc": 0.851849861967661 |
| }, |
| { |
| "epoch": 2.7223113964687, |
| "grad_norm": 0.28670311782786223, |
| "learning_rate": 3.169825045949757e-06, |
| "loss": 0.4382731318473816, |
| "step": 849, |
| "token_acc": 0.8550369051504041 |
| }, |
| { |
| "epoch": 2.725521669341894, |
| "grad_norm": 0.19885506160440816, |
| "learning_rate": 3.162590773548564e-06, |
| "loss": 0.4499918818473816, |
| "step": 850, |
| "token_acc": 0.8506404746304723 |
| }, |
| { |
| "epoch": 2.7287319422150884, |
| "grad_norm": 0.6588682730611575, |
| "learning_rate": 3.1553579558649523e-06, |
| "loss": 0.4435628354549408, |
| "step": 851, |
| "token_acc": 0.850721246130347 |
| }, |
| { |
| "epoch": 2.7319422150882824, |
| "grad_norm": 0.21443528224992153, |
| "learning_rate": 3.1481266240827373e-06, |
| "loss": 0.4580892026424408, |
| "step": 852, |
| "token_acc": 0.8484892268236739 |
| }, |
| { |
| "epoch": 2.735152487961477, |
| "grad_norm": 0.18428221838796965, |
| "learning_rate": 3.1408968093793272e-06, |
| "loss": 0.4455973505973816, |
| "step": 853, |
| "token_acc": 0.8506541039353359 |
| }, |
| { |
| "epoch": 2.738362760834671, |
| "grad_norm": 0.21296547405294453, |
| "learning_rate": 3.1336685429255904e-06, |
| "loss": 0.4669596552848816, |
| "step": 854, |
| "token_acc": 0.844635929237041 |
| }, |
| { |
| "epoch": 2.741573033707865, |
| "grad_norm": 0.1970673523338949, |
| "learning_rate": 3.126441855885721e-06, |
| "loss": 0.4756673276424408, |
| "step": 855, |
| "token_acc": 0.8407943698334166 |
| }, |
| { |
| "epoch": 2.744783306581059, |
| "grad_norm": 0.18533116824438595, |
| "learning_rate": 3.1192167794171016e-06, |
| "loss": 0.4705810546875, |
| "step": 856, |
| "token_acc": 0.8437527360868543 |
| }, |
| { |
| "epoch": 2.7479935794542536, |
| "grad_norm": 0.20673945150207626, |
| "learning_rate": 3.111993344670173e-06, |
| "loss": 0.4440104365348816, |
| "step": 857, |
| "token_acc": 0.8527011318719799 |
| }, |
| { |
| "epoch": 2.751203852327448, |
| "grad_norm": 0.20432971411016607, |
| "learning_rate": 3.104771582788294e-06, |
| "loss": 0.4159749448299408, |
| "step": 858, |
| "token_acc": 0.8613609396227064 |
| }, |
| { |
| "epoch": 2.754414125200642, |
| "grad_norm": 0.20358212567304018, |
| "learning_rate": 3.0975515249076175e-06, |
| "loss": 0.47308349609375, |
| "step": 859, |
| "token_acc": 0.8437423208886284 |
| }, |
| { |
| "epoch": 2.7576243980738364, |
| "grad_norm": 0.23790587834419183, |
| "learning_rate": 3.0903332021569436e-06, |
| "loss": 0.4655354917049408, |
| "step": 860, |
| "token_acc": 0.8450927179357717 |
| }, |
| { |
| "epoch": 2.7608346709470304, |
| "grad_norm": 0.18307641448187867, |
| "learning_rate": 3.083116645657593e-06, |
| "loss": 0.43994140625, |
| "step": 861, |
| "token_acc": 0.8525941455216446 |
| }, |
| { |
| "epoch": 2.764044943820225, |
| "grad_norm": 0.1811725024850713, |
| "learning_rate": 3.075901886523275e-06, |
| "loss": 0.4523112177848816, |
| "step": 862, |
| "token_acc": 0.85006509693893 |
| }, |
| { |
| "epoch": 2.767255216693419, |
| "grad_norm": 0.1989735727923974, |
| "learning_rate": 3.068688955859945e-06, |
| "loss": 0.453857421875, |
| "step": 863, |
| "token_acc": 0.8496101461606868 |
| }, |
| { |
| "epoch": 2.770465489566613, |
| "grad_norm": 0.19077714887441913, |
| "learning_rate": 3.0614778847656763e-06, |
| "loss": 0.4422200620174408, |
| "step": 864, |
| "token_acc": 0.8537910905751728 |
| }, |
| { |
| "epoch": 2.773675762439807, |
| "grad_norm": 0.1977640130828424, |
| "learning_rate": 3.054268704330526e-06, |
| "loss": 0.4406535029411316, |
| "step": 865, |
| "token_acc": 0.8514452702525254 |
| }, |
| { |
| "epoch": 2.7768860353130016, |
| "grad_norm": 0.1808731306349855, |
| "learning_rate": 3.047061445636399e-06, |
| "loss": 0.4327799677848816, |
| "step": 866, |
| "token_acc": 0.8544081155438911 |
| }, |
| { |
| "epoch": 2.780096308186196, |
| "grad_norm": 0.1851079324071504, |
| "learning_rate": 3.039856139756916e-06, |
| "loss": 0.4536946713924408, |
| "step": 867, |
| "token_acc": 0.8493277191079274 |
| }, |
| { |
| "epoch": 2.78330658105939, |
| "grad_norm": 0.1925088846332903, |
| "learning_rate": 3.032652817757274e-06, |
| "loss": 0.44561767578125, |
| "step": 868, |
| "token_acc": 0.8513310539364469 |
| }, |
| { |
| "epoch": 2.7865168539325844, |
| "grad_norm": 0.17806624631080792, |
| "learning_rate": 3.0254515106941246e-06, |
| "loss": 0.4298909604549408, |
| "step": 869, |
| "token_acc": 0.8550168629794302 |
| }, |
| { |
| "epoch": 2.7897271268057784, |
| "grad_norm": 0.20298500989104132, |
| "learning_rate": 3.018252249615423e-06, |
| "loss": 0.4869384765625, |
| "step": 870, |
| "token_acc": 0.8397122828775442 |
| }, |
| { |
| "epoch": 2.792937399678973, |
| "grad_norm": 0.20918645966745378, |
| "learning_rate": 3.0110550655603096e-06, |
| "loss": 0.4236246943473816, |
| "step": 871, |
| "token_acc": 0.8599267937563877 |
| }, |
| { |
| "epoch": 2.796147672552167, |
| "grad_norm": 0.1949185958594448, |
| "learning_rate": 3.0038599895589657e-06, |
| "loss": 0.4535319209098816, |
| "step": 872, |
| "token_acc": 0.8489785408365976 |
| }, |
| { |
| "epoch": 2.799357945425361, |
| "grad_norm": 0.2025989275985742, |
| "learning_rate": 2.9966670526324888e-06, |
| "loss": 0.4698486328125, |
| "step": 873, |
| "token_acc": 0.8440375618917535 |
| }, |
| { |
| "epoch": 2.802568218298555, |
| "grad_norm": 0.17858666316037558, |
| "learning_rate": 2.9894762857927506e-06, |
| "loss": 0.4878743588924408, |
| "step": 874, |
| "token_acc": 0.8400573840707599 |
| }, |
| { |
| "epoch": 2.8057784911717496, |
| "grad_norm": 0.1916535937295437, |
| "learning_rate": 2.982287720042266e-06, |
| "loss": 0.437744140625, |
| "step": 875, |
| "token_acc": 0.8551442274926736 |
| }, |
| { |
| "epoch": 2.808988764044944, |
| "grad_norm": 0.1802093570224421, |
| "learning_rate": 2.9751013863740598e-06, |
| "loss": 0.4361165463924408, |
| "step": 876, |
| "token_acc": 0.8539694867975819 |
| }, |
| { |
| "epoch": 2.812199036918138, |
| "grad_norm": 0.18341640952769703, |
| "learning_rate": 2.9679173157715376e-06, |
| "loss": 0.4659423828125, |
| "step": 877, |
| "token_acc": 0.8473923909641549 |
| }, |
| { |
| "epoch": 2.8154093097913324, |
| "grad_norm": 0.19926651946578333, |
| "learning_rate": 2.960735539208344e-06, |
| "loss": 0.4440104365348816, |
| "step": 878, |
| "token_acc": 0.8531331105177661 |
| }, |
| { |
| "epoch": 2.8186195826645264, |
| "grad_norm": 0.18126871767280156, |
| "learning_rate": 2.953556087648232e-06, |
| "loss": 0.4615478515625, |
| "step": 879, |
| "token_acc": 0.846012540905167 |
| }, |
| { |
| "epoch": 2.821829855537721, |
| "grad_norm": 0.2048869147921809, |
| "learning_rate": 2.9463789920449363e-06, |
| "loss": 0.45703125, |
| "step": 880, |
| "token_acc": 0.848266982006728 |
| }, |
| { |
| "epoch": 2.825040128410915, |
| "grad_norm": 0.21465195556789202, |
| "learning_rate": 2.9392042833420274e-06, |
| "loss": 0.4917399287223816, |
| "step": 881, |
| "token_acc": 0.8380246428627718 |
| }, |
| { |
| "epoch": 2.828250401284109, |
| "grad_norm": 0.20000902524393988, |
| "learning_rate": 2.9320319924727893e-06, |
| "loss": 0.4508056640625, |
| "step": 882, |
| "token_acc": 0.8502047138063448 |
| }, |
| { |
| "epoch": 2.831460674157303, |
| "grad_norm": 0.1963068923956562, |
| "learning_rate": 2.924862150360078e-06, |
| "loss": 0.4275716245174408, |
| "step": 883, |
| "token_acc": 0.8565266918083189 |
| }, |
| { |
| "epoch": 2.8346709470304976, |
| "grad_norm": 0.193862012915289, |
| "learning_rate": 2.9176947879161956e-06, |
| "loss": 0.4766438901424408, |
| "step": 884, |
| "token_acc": 0.843147802741985 |
| }, |
| { |
| "epoch": 2.837881219903692, |
| "grad_norm": 0.17750407315632236, |
| "learning_rate": 2.9105299360427524e-06, |
| "loss": 0.4188639521598816, |
| "step": 885, |
| "token_acc": 0.8615091938541838 |
| }, |
| { |
| "epoch": 2.841091492776886, |
| "grad_norm": 0.20567900106548453, |
| "learning_rate": 2.903367625630531e-06, |
| "loss": 0.4632568359375, |
| "step": 886, |
| "token_acc": 0.8458096462571061 |
| }, |
| { |
| "epoch": 2.8443017656500804, |
| "grad_norm": 0.20028858738778038, |
| "learning_rate": 2.8962078875593617e-06, |
| "loss": 0.4229329526424408, |
| "step": 887, |
| "token_acc": 0.8594632037806461 |
| }, |
| { |
| "epoch": 2.8475120385232744, |
| "grad_norm": 0.19611757309044633, |
| "learning_rate": 2.889050752697982e-06, |
| "loss": 0.4334309995174408, |
| "step": 888, |
| "token_acc": 0.8550386869639498 |
| }, |
| { |
| "epoch": 2.850722311396469, |
| "grad_norm": 0.17221350932947482, |
| "learning_rate": 2.8818962519039052e-06, |
| "loss": 0.4227091670036316, |
| "step": 889, |
| "token_acc": 0.8588126879425745 |
| }, |
| { |
| "epoch": 2.853932584269663, |
| "grad_norm": 0.19068808416823477, |
| "learning_rate": 2.874744416023286e-06, |
| "loss": 0.443115234375, |
| "step": 890, |
| "token_acc": 0.8531313450132613 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.18792769221448774, |
| "learning_rate": 2.8675952758907976e-06, |
| "loss": 0.4404296875, |
| "step": 891, |
| "token_acc": 0.8532470114456258 |
| }, |
| { |
| "epoch": 2.860353130016051, |
| "grad_norm": 0.18774354635019833, |
| "learning_rate": 2.8604488623294816e-06, |
| "loss": 0.4722086787223816, |
| "step": 892, |
| "token_acc": 0.8409748913356431 |
| }, |
| { |
| "epoch": 2.8635634028892456, |
| "grad_norm": 0.17303731091005972, |
| "learning_rate": 2.8533052061506273e-06, |
| "loss": 0.4560546875, |
| "step": 893, |
| "token_acc": 0.8479154513444437 |
| }, |
| { |
| "epoch": 2.86677367576244, |
| "grad_norm": 0.20252387350469153, |
| "learning_rate": 2.8461643381536386e-06, |
| "loss": 0.4105224609375, |
| "step": 894, |
| "token_acc": 0.8620219153015483 |
| }, |
| { |
| "epoch": 2.869983948635634, |
| "grad_norm": 0.23064818047776917, |
| "learning_rate": 2.8390262891258956e-06, |
| "loss": 0.44384765625, |
| "step": 895, |
| "token_acc": 0.852279850676447 |
| }, |
| { |
| "epoch": 2.8731942215088284, |
| "grad_norm": 0.2039541978845269, |
| "learning_rate": 2.831891089842623e-06, |
| "loss": 0.4642741084098816, |
| "step": 896, |
| "token_acc": 0.8480350311624173 |
| }, |
| { |
| "epoch": 2.8764044943820224, |
| "grad_norm": 0.19298964547659045, |
| "learning_rate": 2.824758771066761e-06, |
| "loss": 0.47314453125, |
| "step": 897, |
| "token_acc": 0.8419148311180962 |
| }, |
| { |
| "epoch": 2.879614767255217, |
| "grad_norm": 0.22735774599571984, |
| "learning_rate": 2.817629363548833e-06, |
| "loss": 0.4599202573299408, |
| "step": 898, |
| "token_acc": 0.8460708646745579 |
| }, |
| { |
| "epoch": 2.882825040128411, |
| "grad_norm": 0.17385809855249318, |
| "learning_rate": 2.8105028980268066e-06, |
| "loss": 0.4387614130973816, |
| "step": 899, |
| "token_acc": 0.8537094497487598 |
| }, |
| { |
| "epoch": 2.886035313001605, |
| "grad_norm": 0.21561442777127388, |
| "learning_rate": 2.8033794052259683e-06, |
| "loss": 0.4597981870174408, |
| "step": 900, |
| "token_acc": 0.8463050783653008 |
| }, |
| { |
| "epoch": 2.889245585874799, |
| "grad_norm": 0.1925141857167567, |
| "learning_rate": 2.796258915858783e-06, |
| "loss": 0.4646809995174408, |
| "step": 901, |
| "token_acc": 0.8462298777070343 |
| }, |
| { |
| "epoch": 2.8924558587479936, |
| "grad_norm": 0.1967049507414003, |
| "learning_rate": 2.789141460624775e-06, |
| "loss": 0.4554036557674408, |
| "step": 902, |
| "token_acc": 0.8482254803972719 |
| }, |
| { |
| "epoch": 2.895666131621188, |
| "grad_norm": 0.19182500378328768, |
| "learning_rate": 2.782027070210379e-06, |
| "loss": 0.4325358271598816, |
| "step": 903, |
| "token_acc": 0.8540655882553516 |
| }, |
| { |
| "epoch": 2.898876404494382, |
| "grad_norm": 0.20492626814952483, |
| "learning_rate": 2.7749157752888192e-06, |
| "loss": 0.447021484375, |
| "step": 904, |
| "token_acc": 0.8528819674309748 |
| }, |
| { |
| "epoch": 2.902086677367576, |
| "grad_norm": 0.18973215032814825, |
| "learning_rate": 2.767807606519975e-06, |
| "loss": 0.4326985776424408, |
| "step": 905, |
| "token_acc": 0.8551310028184196 |
| }, |
| { |
| "epoch": 2.9052969502407704, |
| "grad_norm": 0.20870842711893867, |
| "learning_rate": 2.760702594550246e-06, |
| "loss": 0.4281412959098816, |
| "step": 906, |
| "token_acc": 0.8567996447685152 |
| }, |
| { |
| "epoch": 2.908507223113965, |
| "grad_norm": 0.21820108404227115, |
| "learning_rate": 2.753600770012421e-06, |
| "loss": 0.4617513120174408, |
| "step": 907, |
| "token_acc": 0.846852006172273 |
| }, |
| { |
| "epoch": 2.911717495987159, |
| "grad_norm": 0.2082233355287873, |
| "learning_rate": 2.7465021635255465e-06, |
| "loss": 0.4466145932674408, |
| "step": 908, |
| "token_acc": 0.8515980815438643 |
| }, |
| { |
| "epoch": 2.914927768860353, |
| "grad_norm": 0.20502239244120005, |
| "learning_rate": 2.739406805694797e-06, |
| "loss": 0.4312540888786316, |
| "step": 909, |
| "token_acc": 0.8561211161593144 |
| }, |
| { |
| "epoch": 2.918138041733547, |
| "grad_norm": 0.18589225273227217, |
| "learning_rate": 2.732314727111338e-06, |
| "loss": 0.457763671875, |
| "step": 910, |
| "token_acc": 0.8447814188569882 |
| }, |
| { |
| "epoch": 2.9213483146067416, |
| "grad_norm": 0.19325400267429202, |
| "learning_rate": 2.725225958352197e-06, |
| "loss": 0.4157511591911316, |
| "step": 911, |
| "token_acc": 0.8615710983757084 |
| }, |
| { |
| "epoch": 2.924558587479936, |
| "grad_norm": 0.24863658946228606, |
| "learning_rate": 2.7181405299801342e-06, |
| "loss": 0.441162109375, |
| "step": 912, |
| "token_acc": 0.8537013414150237 |
| }, |
| { |
| "epoch": 2.92776886035313, |
| "grad_norm": 0.1679392411358542, |
| "learning_rate": 2.7110584725435037e-06, |
| "loss": 0.3821207880973816, |
| "step": 913, |
| "token_acc": 0.8727992820526524 |
| }, |
| { |
| "epoch": 2.930979133226324, |
| "grad_norm": 0.17898720333894108, |
| "learning_rate": 2.703979816576128e-06, |
| "loss": 0.4577229917049408, |
| "step": 914, |
| "token_acc": 0.8485272656543374 |
| }, |
| { |
| "epoch": 2.9341894060995184, |
| "grad_norm": 0.20953372056682842, |
| "learning_rate": 2.6969045925971647e-06, |
| "loss": 0.4898274838924408, |
| "step": 915, |
| "token_acc": 0.8378350593275259 |
| }, |
| { |
| "epoch": 2.937399678972713, |
| "grad_norm": 0.18986984243575183, |
| "learning_rate": 2.689832831110976e-06, |
| "loss": 0.4823405146598816, |
| "step": 916, |
| "token_acc": 0.8401791302339995 |
| }, |
| { |
| "epoch": 2.940609951845907, |
| "grad_norm": 0.18376410703464807, |
| "learning_rate": 2.682764562606993e-06, |
| "loss": 0.4639892578125, |
| "step": 917, |
| "token_acc": 0.8443565602409577 |
| }, |
| { |
| "epoch": 2.943820224719101, |
| "grad_norm": 0.20423108351344285, |
| "learning_rate": 2.6756998175595865e-06, |
| "loss": 0.4396159052848816, |
| "step": 918, |
| "token_acc": 0.8543520731929763 |
| }, |
| { |
| "epoch": 2.947030497592295, |
| "grad_norm": 0.19283053616253365, |
| "learning_rate": 2.6686386264279417e-06, |
| "loss": 0.4716796875, |
| "step": 919, |
| "token_acc": 0.8447049600482436 |
| }, |
| { |
| "epoch": 2.9502407704654896, |
| "grad_norm": 0.17396974121526895, |
| "learning_rate": 2.6615810196559143e-06, |
| "loss": 0.419921875, |
| "step": 920, |
| "token_acc": 0.8606441665206895 |
| }, |
| { |
| "epoch": 2.953451043338684, |
| "grad_norm": 0.18122123451983121, |
| "learning_rate": 2.6545270276719115e-06, |
| "loss": 0.4344889521598816, |
| "step": 921, |
| "token_acc": 0.8557424324856561 |
| }, |
| { |
| "epoch": 2.956661316211878, |
| "grad_norm": 0.21164130487363986, |
| "learning_rate": 2.6474766808887508e-06, |
| "loss": 0.4502767026424408, |
| "step": 922, |
| "token_acc": 0.8507737208341514 |
| }, |
| { |
| "epoch": 2.959871589085072, |
| "grad_norm": 0.20693230256283227, |
| "learning_rate": 2.6404300097035397e-06, |
| "loss": 0.4093017578125, |
| "step": 923, |
| "token_acc": 0.8623301720111289 |
| }, |
| { |
| "epoch": 2.9630818619582664, |
| "grad_norm": 0.2128202281439714, |
| "learning_rate": 2.6333870444975333e-06, |
| "loss": 0.4647623896598816, |
| "step": 924, |
| "token_acc": 0.8452988006954382 |
| }, |
| { |
| "epoch": 2.966292134831461, |
| "grad_norm": 0.19432742708017048, |
| "learning_rate": 2.6263478156360117e-06, |
| "loss": 0.4721272885799408, |
| "step": 925, |
| "token_acc": 0.8423774782670488 |
| }, |
| { |
| "epoch": 2.969502407704655, |
| "grad_norm": 0.20592585126650714, |
| "learning_rate": 2.619312353468143e-06, |
| "loss": 0.4940185546875, |
| "step": 926, |
| "token_acc": 0.8377018770376724 |
| }, |
| { |
| "epoch": 2.972712680577849, |
| "grad_norm": 0.1833160802061042, |
| "learning_rate": 2.61228068832686e-06, |
| "loss": 0.4482015073299408, |
| "step": 927, |
| "token_acc": 0.8498228651561001 |
| }, |
| { |
| "epoch": 2.975922953451043, |
| "grad_norm": 0.21859060590959328, |
| "learning_rate": 2.605252850528721e-06, |
| "loss": 0.4634196162223816, |
| "step": 928, |
| "token_acc": 0.8474287448047855 |
| }, |
| { |
| "epoch": 2.9791332263242376, |
| "grad_norm": 0.32814155266427864, |
| "learning_rate": 2.5982288703737832e-06, |
| "loss": 0.4471842646598816, |
| "step": 929, |
| "token_acc": 0.8499125107252692 |
| }, |
| { |
| "epoch": 2.982343499197432, |
| "grad_norm": 0.18383434017442052, |
| "learning_rate": 2.5912087781454747e-06, |
| "loss": 0.4506022334098816, |
| "step": 930, |
| "token_acc": 0.8479707935620925 |
| }, |
| { |
| "epoch": 2.985553772070626, |
| "grad_norm": 0.21009008880803093, |
| "learning_rate": 2.584192604110458e-06, |
| "loss": 0.4718831479549408, |
| "step": 931, |
| "token_acc": 0.8435183994391784 |
| }, |
| { |
| "epoch": 2.98876404494382, |
| "grad_norm": 0.20869763364072635, |
| "learning_rate": 2.577180378518505e-06, |
| "loss": 0.4437255859375, |
| "step": 932, |
| "token_acc": 0.8529731354610436 |
| }, |
| { |
| "epoch": 2.9919743178170144, |
| "grad_norm": 0.1925184488565764, |
| "learning_rate": 2.5701721316023596e-06, |
| "loss": 0.4805094599723816, |
| "step": 933, |
| "token_acc": 0.8384617433930094 |
| }, |
| { |
| "epoch": 2.995184590690209, |
| "grad_norm": 0.19414097485510715, |
| "learning_rate": 2.56316789357762e-06, |
| "loss": 0.4686279296875, |
| "step": 934, |
| "token_acc": 0.8461598371896937 |
| }, |
| { |
| "epoch": 2.998394863563403, |
| "grad_norm": 0.20311846242856332, |
| "learning_rate": 2.556167694642592e-06, |
| "loss": 0.4471435546875, |
| "step": 935, |
| "token_acc": 0.8523729443628189 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.27709428039989725, |
| "learning_rate": 2.5491715649781713e-06, |
| "loss": 0.4915364682674408, |
| "step": 936, |
| "token_acc": 0.8348490575489441 |
| }, |
| { |
| "epoch": 3.0032102728731944, |
| "grad_norm": 0.19512840843235554, |
| "learning_rate": 2.54217953474771e-06, |
| "loss": 0.4064127802848816, |
| "step": 937, |
| "token_acc": 0.8645122771380337 |
| }, |
| { |
| "epoch": 3.0064205457463884, |
| "grad_norm": 0.19152822679354511, |
| "learning_rate": 2.5351916340968834e-06, |
| "loss": 0.430419921875, |
| "step": 938, |
| "token_acc": 0.8565615697709739 |
| }, |
| { |
| "epoch": 3.009630818619583, |
| "grad_norm": 0.19480721878283314, |
| "learning_rate": 2.5282078931535636e-06, |
| "loss": 0.4052734375, |
| "step": 939, |
| "token_acc": 0.865073050011375 |
| }, |
| { |
| "epoch": 3.012841091492777, |
| "grad_norm": 0.20094176240337724, |
| "learning_rate": 2.5212283420276868e-06, |
| "loss": 0.4458821713924408, |
| "step": 940, |
| "token_acc": 0.8485933197149823 |
| }, |
| { |
| "epoch": 3.016051364365971, |
| "grad_norm": 0.20026529443568822, |
| "learning_rate": 2.5142530108111283e-06, |
| "loss": 0.4403890073299408, |
| "step": 941, |
| "token_acc": 0.8525967790426874 |
| }, |
| { |
| "epoch": 3.019261637239165, |
| "grad_norm": 0.21284407108413642, |
| "learning_rate": 2.507281929577567e-06, |
| "loss": 0.4329833984375, |
| "step": 942, |
| "token_acc": 0.8550810846812996 |
| }, |
| { |
| "epoch": 3.0224719101123596, |
| "grad_norm": 0.18205057534794428, |
| "learning_rate": 2.5003151283823577e-06, |
| "loss": 0.4101766049861908, |
| "step": 943, |
| "token_acc": 0.8625629537300611 |
| }, |
| { |
| "epoch": 3.0256821829855536, |
| "grad_norm": 0.2183224419846251, |
| "learning_rate": 2.493352637262405e-06, |
| "loss": 0.4402262568473816, |
| "step": 944, |
| "token_acc": 0.8541160334907777 |
| }, |
| { |
| "epoch": 3.028892455858748, |
| "grad_norm": 0.19560530960918013, |
| "learning_rate": 2.48639448623603e-06, |
| "loss": 0.4363200068473816, |
| "step": 945, |
| "token_acc": 0.8543690262716656 |
| }, |
| { |
| "epoch": 3.0321027287319424, |
| "grad_norm": 0.20229017181299092, |
| "learning_rate": 2.4794407053028385e-06, |
| "loss": 0.4148763120174408, |
| "step": 946, |
| "token_acc": 0.8634134775874861 |
| }, |
| { |
| "epoch": 3.0353130016051364, |
| "grad_norm": 0.1807469477199333, |
| "learning_rate": 2.4724913244435983e-06, |
| "loss": 0.3597819209098816, |
| "step": 947, |
| "token_acc": 0.8800449186083527 |
| }, |
| { |
| "epoch": 3.038523274478331, |
| "grad_norm": 0.19730264955019094, |
| "learning_rate": 2.465546373620106e-06, |
| "loss": 0.4295145869255066, |
| "step": 948, |
| "token_acc": 0.8556339526610096 |
| }, |
| { |
| "epoch": 3.041733547351525, |
| "grad_norm": 0.203882798491726, |
| "learning_rate": 2.458605882775059e-06, |
| "loss": 0.4242350459098816, |
| "step": 949, |
| "token_acc": 0.8572224411495525 |
| }, |
| { |
| "epoch": 3.044943820224719, |
| "grad_norm": 0.20160987589550436, |
| "learning_rate": 2.4516698818319232e-06, |
| "loss": 0.4075114130973816, |
| "step": 950, |
| "token_acc": 0.8645283352488727 |
| }, |
| { |
| "epoch": 3.048154093097913, |
| "grad_norm": 0.1762688523749818, |
| "learning_rate": 2.444738400694808e-06, |
| "loss": 0.4170125424861908, |
| "step": 951, |
| "token_acc": 0.8620549639827875 |
| }, |
| { |
| "epoch": 3.0513643659711076, |
| "grad_norm": 0.2054457353905813, |
| "learning_rate": 2.4378114692483384e-06, |
| "loss": 0.4302571713924408, |
| "step": 952, |
| "token_acc": 0.8568869894067945 |
| }, |
| { |
| "epoch": 3.0545746388443016, |
| "grad_norm": 0.19065678215481155, |
| "learning_rate": 2.43088911735752e-06, |
| "loss": 0.4544677734375, |
| "step": 953, |
| "token_acc": 0.848384410373228 |
| }, |
| { |
| "epoch": 3.057784911717496, |
| "grad_norm": 0.1911349829751549, |
| "learning_rate": 2.4239713748676156e-06, |
| "loss": 0.4250895380973816, |
| "step": 954, |
| "token_acc": 0.8578417229285682 |
| }, |
| { |
| "epoch": 3.0609951845906904, |
| "grad_norm": 0.17086755774006357, |
| "learning_rate": 2.4170582716040163e-06, |
| "loss": 0.4202474057674408, |
| "step": 955, |
| "token_acc": 0.8604329880984048 |
| }, |
| { |
| "epoch": 3.0642054574638844, |
| "grad_norm": 0.19718572745924365, |
| "learning_rate": 2.4101498373721078e-06, |
| "loss": 0.4644775390625, |
| "step": 956, |
| "token_acc": 0.8432624399789125 |
| }, |
| { |
| "epoch": 3.067415730337079, |
| "grad_norm": 0.18969121035642866, |
| "learning_rate": 2.403246101957149e-06, |
| "loss": 0.4046630859375, |
| "step": 957, |
| "token_acc": 0.8642841551259974 |
| }, |
| { |
| "epoch": 3.070626003210273, |
| "grad_norm": 0.33889632940191633, |
| "learning_rate": 2.3963470951241374e-06, |
| "loss": 0.3937581479549408, |
| "step": 958, |
| "token_acc": 0.8685400647452663 |
| }, |
| { |
| "epoch": 3.073836276083467, |
| "grad_norm": 0.18441537435055547, |
| "learning_rate": 2.389452846617687e-06, |
| "loss": 0.4249267578125, |
| "step": 959, |
| "token_acc": 0.8557664988332394 |
| }, |
| { |
| "epoch": 3.077046548956661, |
| "grad_norm": 0.2157696044564956, |
| "learning_rate": 2.382563386161894e-06, |
| "loss": 0.4385172724723816, |
| "step": 960, |
| "token_acc": 0.8540134551100046 |
| }, |
| { |
| "epoch": 3.0802568218298556, |
| "grad_norm": 0.20472017309447713, |
| "learning_rate": 2.3756787434602096e-06, |
| "loss": 0.4079183042049408, |
| "step": 961, |
| "token_acc": 0.8627625265476284 |
| }, |
| { |
| "epoch": 3.0834670947030496, |
| "grad_norm": 0.22485005465461746, |
| "learning_rate": 2.3687989481953195e-06, |
| "loss": 0.4535319209098816, |
| "step": 962, |
| "token_acc": 0.8453375149611685 |
| }, |
| { |
| "epoch": 3.086677367576244, |
| "grad_norm": 0.1902880806662592, |
| "learning_rate": 2.3619240300290044e-06, |
| "loss": 0.4248250424861908, |
| "step": 963, |
| "token_acc": 0.8567758712111124 |
| }, |
| { |
| "epoch": 3.0898876404494384, |
| "grad_norm": 0.2626089363437338, |
| "learning_rate": 2.35505401860202e-06, |
| "loss": 0.4044189453125, |
| "step": 964, |
| "token_acc": 0.8657033443677186 |
| }, |
| { |
| "epoch": 3.0930979133226324, |
| "grad_norm": 0.19604226172297987, |
| "learning_rate": 2.348188943533965e-06, |
| "loss": 0.4424947202205658, |
| "step": 965, |
| "token_acc": 0.8500906036946045 |
| }, |
| { |
| "epoch": 3.096308186195827, |
| "grad_norm": 0.18951032806705173, |
| "learning_rate": 2.3413288344231596e-06, |
| "loss": 0.4405517578125, |
| "step": 966, |
| "token_acc": 0.8498719155442217 |
| }, |
| { |
| "epoch": 3.099518459069021, |
| "grad_norm": 0.26910487145254985, |
| "learning_rate": 2.334473720846509e-06, |
| "loss": 0.4261881709098816, |
| "step": 967, |
| "token_acc": 0.855647533546374 |
| }, |
| { |
| "epoch": 3.102728731942215, |
| "grad_norm": 0.2019726031413763, |
| "learning_rate": 2.3276236323593822e-06, |
| "loss": 0.4241536557674408, |
| "step": 968, |
| "token_acc": 0.8591222331565688 |
| }, |
| { |
| "epoch": 3.105939004815409, |
| "grad_norm": 0.20559129697543618, |
| "learning_rate": 2.3207785984954833e-06, |
| "loss": 0.4336751401424408, |
| "step": 969, |
| "token_acc": 0.8538620095006778 |
| }, |
| { |
| "epoch": 3.1091492776886036, |
| "grad_norm": 0.16203566424131216, |
| "learning_rate": 2.3139386487667245e-06, |
| "loss": 0.3148600459098816, |
| "step": 970, |
| "token_acc": 0.8947029334998015 |
| }, |
| { |
| "epoch": 3.1123595505617976, |
| "grad_norm": 0.19421525075904014, |
| "learning_rate": 2.307103812663096e-06, |
| "loss": 0.4341227412223816, |
| "step": 971, |
| "token_acc": 0.8553111018864443 |
| }, |
| { |
| "epoch": 3.115569823434992, |
| "grad_norm": 0.20517033907826865, |
| "learning_rate": 2.300274119652542e-06, |
| "loss": 0.4022013545036316, |
| "step": 972, |
| "token_acc": 0.8658842524777902 |
| }, |
| { |
| "epoch": 3.1187800963081864, |
| "grad_norm": 0.2040197534762892, |
| "learning_rate": 2.293449599180832e-06, |
| "loss": 0.464599609375, |
| "step": 973, |
| "token_acc": 0.8440537666160114 |
| }, |
| { |
| "epoch": 3.1219903691813804, |
| "grad_norm": 0.22646536031991613, |
| "learning_rate": 2.286630280671437e-06, |
| "loss": 0.42828369140625, |
| "step": 974, |
| "token_acc": 0.8590015726546195 |
| }, |
| { |
| "epoch": 3.125200642054575, |
| "grad_norm": 0.25340772779513127, |
| "learning_rate": 2.2798161935253967e-06, |
| "loss": 0.4293212890625, |
| "step": 975, |
| "token_acc": 0.8541718416925375 |
| }, |
| { |
| "epoch": 3.128410914927769, |
| "grad_norm": 0.2115180488720503, |
| "learning_rate": 2.2730073671211954e-06, |
| "loss": 0.4131673276424408, |
| "step": 976, |
| "token_acc": 0.8628617261205171 |
| }, |
| { |
| "epoch": 3.131621187800963, |
| "grad_norm": 0.19415733984357036, |
| "learning_rate": 2.2662038308146425e-06, |
| "loss": 0.4080810546875, |
| "step": 977, |
| "token_acc": 0.8640001032273604 |
| }, |
| { |
| "epoch": 3.134831460674157, |
| "grad_norm": 0.19776123553300654, |
| "learning_rate": 2.2594056139387326e-06, |
| "loss": 0.4088541865348816, |
| "step": 978, |
| "token_acc": 0.8627189376828179 |
| }, |
| { |
| "epoch": 3.1380417335473516, |
| "grad_norm": 0.19643692765873402, |
| "learning_rate": 2.2526127458035274e-06, |
| "loss": 0.4188232421875, |
| "step": 979, |
| "token_acc": 0.8607515414116894 |
| }, |
| { |
| "epoch": 3.1412520064205456, |
| "grad_norm": 0.2835352812737188, |
| "learning_rate": 2.245825255696032e-06, |
| "loss": 0.4149169921875, |
| "step": 980, |
| "token_acc": 0.8628155780149843 |
| }, |
| { |
| "epoch": 3.14446227929374, |
| "grad_norm": 0.19422023326342555, |
| "learning_rate": 2.2390431728800596e-06, |
| "loss": 0.4036458432674408, |
| "step": 981, |
| "token_acc": 0.8621452357882656 |
| }, |
| { |
| "epoch": 3.1476725521669344, |
| "grad_norm": 0.20035159839745434, |
| "learning_rate": 2.232266526596112e-06, |
| "loss": 0.4271647334098816, |
| "step": 982, |
| "token_acc": 0.8575271868100122 |
| }, |
| { |
| "epoch": 3.1508828250401284, |
| "grad_norm": 0.21219608624863834, |
| "learning_rate": 2.225495346061251e-06, |
| "loss": 0.4122314453125, |
| "step": 983, |
| "token_acc": 0.861788195878113 |
| }, |
| { |
| "epoch": 3.154093097913323, |
| "grad_norm": 0.19015710407463216, |
| "learning_rate": 2.218729660468976e-06, |
| "loss": 0.4386393427848816, |
| "step": 984, |
| "token_acc": 0.8533309039609425 |
| }, |
| { |
| "epoch": 3.157303370786517, |
| "grad_norm": 0.19189470990403354, |
| "learning_rate": 2.2119694989890917e-06, |
| "loss": 0.38525390625, |
| "step": 985, |
| "token_acc": 0.8706470303086263 |
| }, |
| { |
| "epoch": 3.160513643659711, |
| "grad_norm": 0.2180881731814661, |
| "learning_rate": 2.205214890767588e-06, |
| "loss": 0.4072062373161316, |
| "step": 986, |
| "token_acc": 0.863917869592352 |
| }, |
| { |
| "epoch": 3.163723916532905, |
| "grad_norm": 0.18266606436803548, |
| "learning_rate": 2.1984658649265122e-06, |
| "loss": 0.4138997495174408, |
| "step": 987, |
| "token_acc": 0.8617649696808686 |
| }, |
| { |
| "epoch": 3.1669341894060996, |
| "grad_norm": 0.19465676121423098, |
| "learning_rate": 2.1917224505638445e-06, |
| "loss": 0.4266764521598816, |
| "step": 988, |
| "token_acc": 0.8576549359584379 |
| }, |
| { |
| "epoch": 3.1701444622792936, |
| "grad_norm": 0.18927644434754856, |
| "learning_rate": 2.184984676753367e-06, |
| "loss": 0.4324951171875, |
| "step": 989, |
| "token_acc": 0.8562302060894557 |
| }, |
| { |
| "epoch": 3.173354735152488, |
| "grad_norm": 0.17236404705380387, |
| "learning_rate": 2.178252572544548e-06, |
| "loss": 0.41937255859375, |
| "step": 990, |
| "token_acc": 0.8578650493179596 |
| }, |
| { |
| "epoch": 3.176565008025682, |
| "grad_norm": 0.21801763116849704, |
| "learning_rate": 2.17152616696241e-06, |
| "loss": 0.4388834834098816, |
| "step": 991, |
| "token_acc": 0.8528569371329824 |
| }, |
| { |
| "epoch": 3.1797752808988764, |
| "grad_norm": 0.21130005789340922, |
| "learning_rate": 2.164805489007407e-06, |
| "loss": 0.4567057490348816, |
| "step": 992, |
| "token_acc": 0.8481650790407845 |
| }, |
| { |
| "epoch": 3.182985553772071, |
| "grad_norm": 0.19356572500109256, |
| "learning_rate": 2.1580905676552955e-06, |
| "loss": 0.4306844174861908, |
| "step": 993, |
| "token_acc": 0.8551083071577856 |
| }, |
| { |
| "epoch": 3.186195826645265, |
| "grad_norm": 0.1917648485899303, |
| "learning_rate": 2.151381431857016e-06, |
| "loss": 0.3867594599723816, |
| "step": 994, |
| "token_acc": 0.8688500205724047 |
| }, |
| { |
| "epoch": 3.189406099518459, |
| "grad_norm": 0.20898748869275882, |
| "learning_rate": 2.144678110538565e-06, |
| "loss": 0.4288737177848816, |
| "step": 995, |
| "token_acc": 0.8575603299412129 |
| }, |
| { |
| "epoch": 3.192616372391653, |
| "grad_norm": 0.1927262947095155, |
| "learning_rate": 2.137980632600869e-06, |
| "loss": 0.44091796875, |
| "step": 996, |
| "token_acc": 0.8518801725086996 |
| }, |
| { |
| "epoch": 3.1958266452648476, |
| "grad_norm": 0.20628044257936604, |
| "learning_rate": 2.1312890269196606e-06, |
| "loss": 0.4136962890625, |
| "step": 997, |
| "token_acc": 0.8620663241011797 |
| }, |
| { |
| "epoch": 3.1990369181380416, |
| "grad_norm": 0.18145359781735665, |
| "learning_rate": 2.1246033223453577e-06, |
| "loss": 0.4010009765625, |
| "step": 998, |
| "token_acc": 0.8663384327083898 |
| }, |
| { |
| "epoch": 3.202247191011236, |
| "grad_norm": 0.19250739862363292, |
| "learning_rate": 2.117923547702931e-06, |
| "loss": 0.4561360776424408, |
| "step": 999, |
| "token_acc": 0.8479883342024478 |
| }, |
| { |
| "epoch": 3.20545746388443, |
| "grad_norm": 0.21105422746513886, |
| "learning_rate": 2.111249731791789e-06, |
| "loss": 0.402587890625, |
| "step": 1000, |
| "token_acc": 0.865575555443972 |
| }, |
| { |
| "epoch": 3.2086677367576244, |
| "grad_norm": 0.19229547194042054, |
| "learning_rate": 2.1045819033856467e-06, |
| "loss": 0.4468587338924408, |
| "step": 1001, |
| "token_acc": 0.8522915598447114 |
| }, |
| { |
| "epoch": 3.211878009630819, |
| "grad_norm": 0.2212323711144648, |
| "learning_rate": 2.097920091232407e-06, |
| "loss": 0.4637858271598816, |
| "step": 1002, |
| "token_acc": 0.8459636819096726 |
| }, |
| { |
| "epoch": 3.215088282504013, |
| "grad_norm": 0.19460807788678874, |
| "learning_rate": 2.0912643240540335e-06, |
| "loss": 0.3863932490348816, |
| "step": 1003, |
| "token_acc": 0.8696600839923991 |
| }, |
| { |
| "epoch": 3.218298555377207, |
| "grad_norm": 0.1870363025497072, |
| "learning_rate": 2.0846146305464225e-06, |
| "loss": 0.3617960810661316, |
| "step": 1004, |
| "token_acc": 0.8792915105125078 |
| }, |
| { |
| "epoch": 3.221508828250401, |
| "grad_norm": 0.20513714397130442, |
| "learning_rate": 2.0779710393792932e-06, |
| "loss": 0.3919270932674408, |
| "step": 1005, |
| "token_acc": 0.8705517097544863 |
| }, |
| { |
| "epoch": 3.2247191011235956, |
| "grad_norm": 0.19359366136276182, |
| "learning_rate": 2.0713335791960465e-06, |
| "loss": 0.429443359375, |
| "step": 1006, |
| "token_acc": 0.857015650051013 |
| }, |
| { |
| "epoch": 3.2279293739967896, |
| "grad_norm": 0.19892443344289307, |
| "learning_rate": 2.0647022786136554e-06, |
| "loss": 0.3619384765625, |
| "step": 1007, |
| "token_acc": 0.8794119270801904 |
| }, |
| { |
| "epoch": 3.231139646869984, |
| "grad_norm": 0.22539113984089676, |
| "learning_rate": 2.0580771662225306e-06, |
| "loss": 0.4684651792049408, |
| "step": 1008, |
| "token_acc": 0.8445699764674204 |
| }, |
| { |
| "epoch": 3.234349919743178, |
| "grad_norm": 0.18846405767145527, |
| "learning_rate": 2.0514582705864104e-06, |
| "loss": 0.3821614682674408, |
| "step": 1009, |
| "token_acc": 0.8711276613950546 |
| }, |
| { |
| "epoch": 3.2375601926163724, |
| "grad_norm": 0.1917418511098612, |
| "learning_rate": 2.0448456202422237e-06, |
| "loss": 0.4560750424861908, |
| "step": 1010, |
| "token_acc": 0.8468040474533667 |
| }, |
| { |
| "epoch": 3.240770465489567, |
| "grad_norm": 0.1817089652687439, |
| "learning_rate": 2.038239243699975e-06, |
| "loss": 0.4158528745174408, |
| "step": 1011, |
| "token_acc": 0.8605414302163478 |
| }, |
| { |
| "epoch": 3.243980738362761, |
| "grad_norm": 0.18348380772382225, |
| "learning_rate": 2.0316391694426233e-06, |
| "loss": 0.3862508237361908, |
| "step": 1012, |
| "token_acc": 0.8690134339812778 |
| }, |
| { |
| "epoch": 3.247191011235955, |
| "grad_norm": 0.17828657081582217, |
| "learning_rate": 2.025045425925949e-06, |
| "loss": 0.4139607846736908, |
| "step": 1013, |
| "token_acc": 0.860817646908989 |
| }, |
| { |
| "epoch": 3.250401284109149, |
| "grad_norm": 0.1998903770169443, |
| "learning_rate": 2.0184580415784434e-06, |
| "loss": 0.4397786557674408, |
| "step": 1014, |
| "token_acc": 0.8526518065840059 |
| }, |
| { |
| "epoch": 3.2536115569823436, |
| "grad_norm": 0.20429705875890047, |
| "learning_rate": 2.011877044801176e-06, |
| "loss": 0.448974609375, |
| "step": 1015, |
| "token_acc": 0.8497649731919912 |
| }, |
| { |
| "epoch": 3.2568218298555376, |
| "grad_norm": 0.23203221634672286, |
| "learning_rate": 2.0053024639676837e-06, |
| "loss": 0.4231770932674408, |
| "step": 1016, |
| "token_acc": 0.858826049953659 |
| }, |
| { |
| "epoch": 3.260032102728732, |
| "grad_norm": 0.21003263011654322, |
| "learning_rate": 1.9987343274238364e-06, |
| "loss": 0.4134928584098816, |
| "step": 1017, |
| "token_acc": 0.8616680586797567 |
| }, |
| { |
| "epoch": 3.263242375601926, |
| "grad_norm": 0.18406406356467608, |
| "learning_rate": 1.9921726634877184e-06, |
| "loss": 0.4248860776424408, |
| "step": 1018, |
| "token_acc": 0.8571214968287904 |
| }, |
| { |
| "epoch": 3.2664526484751204, |
| "grad_norm": 0.17697575754028663, |
| "learning_rate": 1.9856175004495094e-06, |
| "loss": 0.4256998896598816, |
| "step": 1019, |
| "token_acc": 0.8561404893189583 |
| }, |
| { |
| "epoch": 3.2696629213483144, |
| "grad_norm": 0.2605367137069997, |
| "learning_rate": 1.9790688665713654e-06, |
| "loss": 0.4170735776424408, |
| "step": 1020, |
| "token_acc": 0.8610197515302906 |
| }, |
| { |
| "epoch": 3.272873194221509, |
| "grad_norm": 0.20227045376577477, |
| "learning_rate": 1.9725267900872873e-06, |
| "loss": 0.4197591245174408, |
| "step": 1021, |
| "token_acc": 0.8594213814186346 |
| }, |
| { |
| "epoch": 3.276083467094703, |
| "grad_norm": 0.21082038704435982, |
| "learning_rate": 1.965991299203003e-06, |
| "loss": 0.4347737729549408, |
| "step": 1022, |
| "token_acc": 0.8566319448464662 |
| }, |
| { |
| "epoch": 3.279293739967897, |
| "grad_norm": 0.1971525837293343, |
| "learning_rate": 1.9594624220958527e-06, |
| "loss": 0.4326985776424408, |
| "step": 1023, |
| "token_acc": 0.8559198751263897 |
| }, |
| { |
| "epoch": 3.2825040128410916, |
| "grad_norm": 0.22558923803886857, |
| "learning_rate": 1.952940186914657e-06, |
| "loss": 0.4354248046875, |
| "step": 1024, |
| "token_acc": 0.8549900900806426 |
| }, |
| { |
| "epoch": 3.2857142857142856, |
| "grad_norm": 0.20270403082450897, |
| "learning_rate": 1.946424621779602e-06, |
| "loss": 0.3877767026424408, |
| "step": 1025, |
| "token_acc": 0.8710687777985294 |
| }, |
| { |
| "epoch": 3.28892455858748, |
| "grad_norm": 0.18679692952044738, |
| "learning_rate": 1.9399157547821164e-06, |
| "loss": 0.4424235224723816, |
| "step": 1026, |
| "token_acc": 0.8517893098255529 |
| }, |
| { |
| "epoch": 3.292134831460674, |
| "grad_norm": 0.1810744270024889, |
| "learning_rate": 1.9334136139847496e-06, |
| "loss": 0.4027913510799408, |
| "step": 1027, |
| "token_acc": 0.8643299823981214 |
| }, |
| { |
| "epoch": 3.2953451043338684, |
| "grad_norm": 0.20370073172200107, |
| "learning_rate": 1.9269182274210527e-06, |
| "loss": 0.4059651792049408, |
| "step": 1028, |
| "token_acc": 0.8649818280909212 |
| }, |
| { |
| "epoch": 3.2985553772070624, |
| "grad_norm": 0.20792285403348992, |
| "learning_rate": 1.9204296230954554e-06, |
| "loss": 0.4032389521598816, |
| "step": 1029, |
| "token_acc": 0.865385416259151 |
| }, |
| { |
| "epoch": 3.301765650080257, |
| "grad_norm": 0.205865819325021, |
| "learning_rate": 1.913947828983146e-06, |
| "loss": 0.3989054560661316, |
| "step": 1030, |
| "token_acc": 0.8651247377965826 |
| }, |
| { |
| "epoch": 3.304975922953451, |
| "grad_norm": 0.18321717295258097, |
| "learning_rate": 1.907472873029951e-06, |
| "loss": 0.425048828125, |
| "step": 1031, |
| "token_acc": 0.8577393883418105 |
| }, |
| { |
| "epoch": 3.308186195826645, |
| "grad_norm": 0.20031557766613411, |
| "learning_rate": 1.9010047831522165e-06, |
| "loss": 0.4626871943473816, |
| "step": 1032, |
| "token_acc": 0.8463195523818052 |
| }, |
| { |
| "epoch": 3.3113964686998396, |
| "grad_norm": 0.18526980410321806, |
| "learning_rate": 1.8945435872366825e-06, |
| "loss": 0.3961588740348816, |
| "step": 1033, |
| "token_acc": 0.8664393756668253 |
| }, |
| { |
| "epoch": 3.3146067415730336, |
| "grad_norm": 0.18682164125358777, |
| "learning_rate": 1.8880893131403718e-06, |
| "loss": 0.454345703125, |
| "step": 1034, |
| "token_acc": 0.8475725916635419 |
| }, |
| { |
| "epoch": 3.317817014446228, |
| "grad_norm": 0.17088822277097934, |
| "learning_rate": 1.881641988690457e-06, |
| "loss": 0.3936360776424408, |
| "step": 1035, |
| "token_acc": 0.8675179845668575 |
| }, |
| { |
| "epoch": 3.321027287319422, |
| "grad_norm": 0.2650766352186625, |
| "learning_rate": 1.8752016416841512e-06, |
| "loss": 0.4297282099723816, |
| "step": 1036, |
| "token_acc": 0.856829096564697 |
| }, |
| { |
| "epoch": 3.3242375601926164, |
| "grad_norm": 0.19811218982637763, |
| "learning_rate": 1.8687682998885876e-06, |
| "loss": 0.3743693232536316, |
| "step": 1037, |
| "token_acc": 0.8732669791645157 |
| }, |
| { |
| "epoch": 3.3274478330658104, |
| "grad_norm": 0.1881599599845014, |
| "learning_rate": 1.8623419910406943e-06, |
| "loss": 0.4101969599723816, |
| "step": 1038, |
| "token_acc": 0.8626658730357066 |
| }, |
| { |
| "epoch": 3.330658105939005, |
| "grad_norm": 0.17212595163092956, |
| "learning_rate": 1.8559227428470747e-06, |
| "loss": 0.3761800229549408, |
| "step": 1039, |
| "token_acc": 0.8748992295470729 |
| }, |
| { |
| "epoch": 3.333868378812199, |
| "grad_norm": 0.18688649843976768, |
| "learning_rate": 1.8495105829838924e-06, |
| "loss": 0.431640625, |
| "step": 1040, |
| "token_acc": 0.8571577847439916 |
| }, |
| { |
| "epoch": 3.337078651685393, |
| "grad_norm": 0.1860904717139982, |
| "learning_rate": 1.8431055390967545e-06, |
| "loss": 0.4176839292049408, |
| "step": 1041, |
| "token_acc": 0.8614076828902081 |
| }, |
| { |
| "epoch": 3.3402889245585876, |
| "grad_norm": 0.17756907190261467, |
| "learning_rate": 1.8367076388005824e-06, |
| "loss": 0.3920491635799408, |
| "step": 1042, |
| "token_acc": 0.867667418755237 |
| }, |
| { |
| "epoch": 3.3434991974317816, |
| "grad_norm": 0.213774360292066, |
| "learning_rate": 1.8303169096795024e-06, |
| "loss": 0.4037882685661316, |
| "step": 1043, |
| "token_acc": 0.8641746530816108 |
| }, |
| { |
| "epoch": 3.346709470304976, |
| "grad_norm": 0.20945621378126955, |
| "learning_rate": 1.8239333792867157e-06, |
| "loss": 0.4090169370174408, |
| "step": 1044, |
| "token_acc": 0.8622145082550352 |
| }, |
| { |
| "epoch": 3.34991974317817, |
| "grad_norm": 0.22957551762719874, |
| "learning_rate": 1.8175570751443967e-06, |
| "loss": 0.4228108823299408, |
| "step": 1045, |
| "token_acc": 0.8606316225992386 |
| }, |
| { |
| "epoch": 3.3531300160513644, |
| "grad_norm": 0.2130972617406167, |
| "learning_rate": 1.8111880247435576e-06, |
| "loss": 0.441650390625, |
| "step": 1046, |
| "token_acc": 0.8529377749003533 |
| }, |
| { |
| "epoch": 3.3563402889245584, |
| "grad_norm": 0.21202748343191813, |
| "learning_rate": 1.8048262555439376e-06, |
| "loss": 0.42041015625, |
| "step": 1047, |
| "token_acc": 0.8588775990968166 |
| }, |
| { |
| "epoch": 3.359550561797753, |
| "grad_norm": 0.19406791196728218, |
| "learning_rate": 1.7984717949738856e-06, |
| "loss": 0.447998046875, |
| "step": 1048, |
| "token_acc": 0.8512821019043048 |
| }, |
| { |
| "epoch": 3.362760834670947, |
| "grad_norm": 0.20578464294018312, |
| "learning_rate": 1.7921246704302371e-06, |
| "loss": 0.420135498046875, |
| "step": 1049, |
| "token_acc": 0.8593331574316282 |
| }, |
| { |
| "epoch": 3.365971107544141, |
| "grad_norm": 0.1932854579791998, |
| "learning_rate": 1.785784909278201e-06, |
| "loss": 0.4093017578125, |
| "step": 1050, |
| "token_acc": 0.86227689958784 |
| }, |
| { |
| "epoch": 3.3691813804173356, |
| "grad_norm": 0.25070884029858714, |
| "learning_rate": 1.779452538851238e-06, |
| "loss": 0.4414876401424408, |
| "step": 1051, |
| "token_acc": 0.852587075660224 |
| }, |
| { |
| "epoch": 3.3723916532905296, |
| "grad_norm": 0.18868857617721213, |
| "learning_rate": 1.7731275864509448e-06, |
| "loss": 0.4228922724723816, |
| "step": 1052, |
| "token_acc": 0.8585962533972208 |
| }, |
| { |
| "epoch": 3.375601926163724, |
| "grad_norm": 0.18853519073810057, |
| "learning_rate": 1.7668100793469358e-06, |
| "loss": 0.4308268427848816, |
| "step": 1053, |
| "token_acc": 0.8560997968551486 |
| }, |
| { |
| "epoch": 3.378812199036918, |
| "grad_norm": 0.19465336749692538, |
| "learning_rate": 1.7605000447767236e-06, |
| "loss": 0.3777669370174408, |
| "step": 1054, |
| "token_acc": 0.8727723193027433 |
| }, |
| { |
| "epoch": 3.3820224719101124, |
| "grad_norm": 0.2399168093589425, |
| "learning_rate": 1.75419750994561e-06, |
| "loss": 0.4230143427848816, |
| "step": 1055, |
| "token_acc": 0.8576663638855595 |
| }, |
| { |
| "epoch": 3.3852327447833064, |
| "grad_norm": 0.19297225432494655, |
| "learning_rate": 1.7479025020265528e-06, |
| "loss": 0.4257405698299408, |
| "step": 1056, |
| "token_acc": 0.8584355198978895 |
| }, |
| { |
| "epoch": 3.388443017656501, |
| "grad_norm": 0.19770981928921164, |
| "learning_rate": 1.7416150481600637e-06, |
| "loss": 0.427001953125, |
| "step": 1057, |
| "token_acc": 0.8572976882953657 |
| }, |
| { |
| "epoch": 3.391653290529695, |
| "grad_norm": 0.1967714452508458, |
| "learning_rate": 1.7353351754540841e-06, |
| "loss": 0.4223226010799408, |
| "step": 1058, |
| "token_acc": 0.8588774508651131 |
| }, |
| { |
| "epoch": 3.394863563402889, |
| "grad_norm": 0.20156157565544705, |
| "learning_rate": 1.7290629109838722e-06, |
| "loss": 0.44744873046875, |
| "step": 1059, |
| "token_acc": 0.8510266465895987 |
| }, |
| { |
| "epoch": 3.3980738362760836, |
| "grad_norm": 0.29294148874106124, |
| "learning_rate": 1.7227982817918816e-06, |
| "loss": 0.4371337890625, |
| "step": 1060, |
| "token_acc": 0.8539906878706369 |
| }, |
| { |
| "epoch": 3.4012841091492776, |
| "grad_norm": 0.17675164494761733, |
| "learning_rate": 1.7165413148876447e-06, |
| "loss": 0.4316813349723816, |
| "step": 1061, |
| "token_acc": 0.8541013910639639 |
| }, |
| { |
| "epoch": 3.404494382022472, |
| "grad_norm": 0.1913527448471836, |
| "learning_rate": 1.7102920372476608e-06, |
| "loss": 0.4197184443473816, |
| "step": 1062, |
| "token_acc": 0.8606378569957566 |
| }, |
| { |
| "epoch": 3.407704654895666, |
| "grad_norm": 0.19051572266218328, |
| "learning_rate": 1.70405047581528e-06, |
| "loss": 0.3822021484375, |
| "step": 1063, |
| "token_acc": 0.8714002443917528 |
| }, |
| { |
| "epoch": 3.4109149277688604, |
| "grad_norm": 0.19422030281559116, |
| "learning_rate": 1.697816657500582e-06, |
| "loss": 0.4290771484375, |
| "step": 1064, |
| "token_acc": 0.8554621600932835 |
| }, |
| { |
| "epoch": 3.4141252006420544, |
| "grad_norm": 0.17356650516464564, |
| "learning_rate": 1.6915906091802583e-06, |
| "loss": 0.3907877802848816, |
| "step": 1065, |
| "token_acc": 0.869664393294144 |
| }, |
| { |
| "epoch": 3.417335473515249, |
| "grad_norm": 0.18506265465323435, |
| "learning_rate": 1.6853723576975085e-06, |
| "loss": 0.4239095151424408, |
| "step": 1066, |
| "token_acc": 0.8579896319427388 |
| }, |
| { |
| "epoch": 3.420545746388443, |
| "grad_norm": 0.18809580147202737, |
| "learning_rate": 1.6791619298619126e-06, |
| "loss": 0.4306233823299408, |
| "step": 1067, |
| "token_acc": 0.8561063060656751 |
| }, |
| { |
| "epoch": 3.423756019261637, |
| "grad_norm": 0.21513901135562274, |
| "learning_rate": 1.6729593524493186e-06, |
| "loss": 0.4053751826286316, |
| "step": 1068, |
| "token_acc": 0.8634594704413222 |
| }, |
| { |
| "epoch": 3.4269662921348316, |
| "grad_norm": 0.20250597705842183, |
| "learning_rate": 1.6667646522017295e-06, |
| "loss": 0.444091796875, |
| "step": 1069, |
| "token_acc": 0.8529030188498646 |
| }, |
| { |
| "epoch": 3.4301765650080256, |
| "grad_norm": 0.17578672679283108, |
| "learning_rate": 1.6605778558271862e-06, |
| "loss": 0.4008992612361908, |
| "step": 1070, |
| "token_acc": 0.8645487521749183 |
| }, |
| { |
| "epoch": 3.43338683788122, |
| "grad_norm": 0.183147867321015, |
| "learning_rate": 1.6543989899996526e-06, |
| "loss": 0.467529296875, |
| "step": 1071, |
| "token_acc": 0.8420635991977785 |
| }, |
| { |
| "epoch": 3.436597110754414, |
| "grad_norm": 0.18479237213228097, |
| "learning_rate": 1.6482280813588998e-06, |
| "loss": 0.4294026792049408, |
| "step": 1072, |
| "token_acc": 0.8548516566839212 |
| }, |
| { |
| "epoch": 3.4398073836276084, |
| "grad_norm": 0.200020323621944, |
| "learning_rate": 1.642065156510393e-06, |
| "loss": 0.4534912109375, |
| "step": 1073, |
| "token_acc": 0.849654429128104 |
| }, |
| { |
| "epoch": 3.4430176565008024, |
| "grad_norm": 0.2045486438267927, |
| "learning_rate": 1.6359102420251753e-06, |
| "loss": 0.4272868037223816, |
| "step": 1074, |
| "token_acc": 0.8583280572521158 |
| }, |
| { |
| "epoch": 3.446227929373997, |
| "grad_norm": 0.20198169472431834, |
| "learning_rate": 1.6297633644397536e-06, |
| "loss": 0.4178263545036316, |
| "step": 1075, |
| "token_acc": 0.860547363917803 |
| }, |
| { |
| "epoch": 3.449438202247191, |
| "grad_norm": 0.20642737825944937, |
| "learning_rate": 1.6236245502559828e-06, |
| "loss": 0.4259033203125, |
| "step": 1076, |
| "token_acc": 0.8581598032324395 |
| }, |
| { |
| "epoch": 3.452648475120385, |
| "grad_norm": 0.2186125005753993, |
| "learning_rate": 1.6174938259409593e-06, |
| "loss": 0.440673828125, |
| "step": 1077, |
| "token_acc": 0.8533212996389892 |
| }, |
| { |
| "epoch": 3.4558587479935796, |
| "grad_norm": 0.1996792453333026, |
| "learning_rate": 1.611371217926891e-06, |
| "loss": 0.4647623896598816, |
| "step": 1078, |
| "token_acc": 0.8443061839272337 |
| }, |
| { |
| "epoch": 3.4590690208667736, |
| "grad_norm": 0.18869326440153297, |
| "learning_rate": 1.6052567526109985e-06, |
| "loss": 0.4488525390625, |
| "step": 1079, |
| "token_acc": 0.8479692213321363 |
| }, |
| { |
| "epoch": 3.462279293739968, |
| "grad_norm": 0.17097828495515807, |
| "learning_rate": 1.5991504563553965e-06, |
| "loss": 0.3851521909236908, |
| "step": 1080, |
| "token_acc": 0.8694733632443263 |
| }, |
| { |
| "epoch": 3.465489566613162, |
| "grad_norm": 0.18433664031908198, |
| "learning_rate": 1.5930523554869788e-06, |
| "loss": 0.4507243037223816, |
| "step": 1081, |
| "token_acc": 0.8492495641690979 |
| }, |
| { |
| "epoch": 3.4686998394863564, |
| "grad_norm": 0.18108294483264578, |
| "learning_rate": 1.5869624762973012e-06, |
| "loss": 0.4585368037223816, |
| "step": 1082, |
| "token_acc": 0.8456435862523524 |
| }, |
| { |
| "epoch": 3.4719101123595504, |
| "grad_norm": 0.1862200238454039, |
| "learning_rate": 1.5808808450424756e-06, |
| "loss": 0.4508056640625, |
| "step": 1083, |
| "token_acc": 0.8489838434912815 |
| }, |
| { |
| "epoch": 3.475120385232745, |
| "grad_norm": 0.19007392226631, |
| "learning_rate": 1.5748074879430552e-06, |
| "loss": 0.4535319209098816, |
| "step": 1084, |
| "token_acc": 0.8487748765942239 |
| }, |
| { |
| "epoch": 3.478330658105939, |
| "grad_norm": 0.20476245510406388, |
| "learning_rate": 1.5687424311839173e-06, |
| "loss": 0.4156494140625, |
| "step": 1085, |
| "token_acc": 0.8628871056168218 |
| }, |
| { |
| "epoch": 3.481540930979133, |
| "grad_norm": 0.1815932832460928, |
| "learning_rate": 1.5626857009141536e-06, |
| "loss": 0.3939208984375, |
| "step": 1086, |
| "token_acc": 0.8679247922855052 |
| }, |
| { |
| "epoch": 3.4847512038523276, |
| "grad_norm": 0.1767455152787183, |
| "learning_rate": 1.5566373232469535e-06, |
| "loss": 0.4333903193473816, |
| "step": 1087, |
| "token_acc": 0.8548684500362472 |
| }, |
| { |
| "epoch": 3.4879614767255216, |
| "grad_norm": 0.22172730233556032, |
| "learning_rate": 1.5505973242595009e-06, |
| "loss": 0.400390625, |
| "step": 1088, |
| "token_acc": 0.8676109572690723 |
| }, |
| { |
| "epoch": 3.491171749598716, |
| "grad_norm": 0.19981305365613308, |
| "learning_rate": 1.5445657299928508e-06, |
| "loss": 0.4515787959098816, |
| "step": 1089, |
| "token_acc": 0.8486542162122311 |
| }, |
| { |
| "epoch": 3.49438202247191, |
| "grad_norm": 0.18951995793081028, |
| "learning_rate": 1.538542566451824e-06, |
| "loss": 0.4265950620174408, |
| "step": 1090, |
| "token_acc": 0.8581693972059831 |
| }, |
| { |
| "epoch": 3.4975922953451044, |
| "grad_norm": 0.20528684366503747, |
| "learning_rate": 1.5325278596048915e-06, |
| "loss": 0.4165852963924408, |
| "step": 1091, |
| "token_acc": 0.8622296703966629 |
| }, |
| { |
| "epoch": 3.5008025682182984, |
| "grad_norm": 0.18774317434356294, |
| "learning_rate": 1.5265216353840644e-06, |
| "loss": 0.4366455078125, |
| "step": 1092, |
| "token_acc": 0.8520290575983089 |
| }, |
| { |
| "epoch": 3.504012841091493, |
| "grad_norm": 0.18366926525043567, |
| "learning_rate": 1.5205239196847812e-06, |
| "loss": 0.4434000849723816, |
| "step": 1093, |
| "token_acc": 0.8506869031300036 |
| }, |
| { |
| "epoch": 3.5072231139646872, |
| "grad_norm": 0.19422266190957502, |
| "learning_rate": 1.5145347383657976e-06, |
| "loss": 0.4432576596736908, |
| "step": 1094, |
| "token_acc": 0.8515391064798179 |
| }, |
| { |
| "epoch": 3.510433386837881, |
| "grad_norm": 0.19262752744132836, |
| "learning_rate": 1.508554117249072e-06, |
| "loss": 0.4134928584098816, |
| "step": 1095, |
| "token_acc": 0.8611599095351802 |
| }, |
| { |
| "epoch": 3.513643659711075, |
| "grad_norm": 0.17738962252261778, |
| "learning_rate": 1.5025820821196583e-06, |
| "loss": 0.4323323667049408, |
| "step": 1096, |
| "token_acc": 0.8561772785419778 |
| }, |
| { |
| "epoch": 3.5168539325842696, |
| "grad_norm": 0.21144620803921835, |
| "learning_rate": 1.4966186587255889e-06, |
| "loss": 0.4349772334098816, |
| "step": 1097, |
| "token_acc": 0.8560637856538078 |
| }, |
| { |
| "epoch": 3.520064205457464, |
| "grad_norm": 0.18107969840908128, |
| "learning_rate": 1.4906638727777738e-06, |
| "loss": 0.4111124873161316, |
| "step": 1098, |
| "token_acc": 0.8620681322045094 |
| }, |
| { |
| "epoch": 3.523274478330658, |
| "grad_norm": 0.1930633849319264, |
| "learning_rate": 1.4847177499498753e-06, |
| "loss": 0.41156005859375, |
| "step": 1099, |
| "token_acc": 0.8612365749114891 |
| }, |
| { |
| "epoch": 3.5264847512038524, |
| "grad_norm": 0.20760349114225327, |
| "learning_rate": 1.4787803158782105e-06, |
| "loss": 0.4454752802848816, |
| "step": 1100, |
| "token_acc": 0.8515730061840415 |
| }, |
| { |
| "epoch": 3.5296950240770464, |
| "grad_norm": 0.20209657459754093, |
| "learning_rate": 1.4728515961616324e-06, |
| "loss": 0.4329020380973816, |
| "step": 1101, |
| "token_acc": 0.8555188046740311 |
| }, |
| { |
| "epoch": 3.532905296950241, |
| "grad_norm": 0.20337891262559785, |
| "learning_rate": 1.4669316163614273e-06, |
| "loss": 0.399169921875, |
| "step": 1102, |
| "token_acc": 0.8667258058203838 |
| }, |
| { |
| "epoch": 3.5361155698234352, |
| "grad_norm": 0.1858405661724601, |
| "learning_rate": 1.461020402001196e-06, |
| "loss": 0.4512125849723816, |
| "step": 1103, |
| "token_acc": 0.8481120263363716 |
| }, |
| { |
| "epoch": 3.539325842696629, |
| "grad_norm": 0.21039533480772285, |
| "learning_rate": 1.4551179785667453e-06, |
| "loss": 0.4616292417049408, |
| "step": 1104, |
| "token_acc": 0.8470263196161116 |
| }, |
| { |
| "epoch": 3.542536115569823, |
| "grad_norm": 0.21814246301992302, |
| "learning_rate": 1.449224371505988e-06, |
| "loss": 0.3966064453125, |
| "step": 1105, |
| "token_acc": 0.8673032108437989 |
| }, |
| { |
| "epoch": 3.5457463884430176, |
| "grad_norm": 0.1799367306449473, |
| "learning_rate": 1.443339606228819e-06, |
| "loss": 0.416748046875, |
| "step": 1106, |
| "token_acc": 0.861918932987615 |
| }, |
| { |
| "epoch": 3.548956661316212, |
| "grad_norm": 0.1978903281145319, |
| "learning_rate": 1.4374637081070172e-06, |
| "loss": 0.447998046875, |
| "step": 1107, |
| "token_acc": 0.8488240043187931 |
| }, |
| { |
| "epoch": 3.552166934189406, |
| "grad_norm": 0.20214133587068753, |
| "learning_rate": 1.4315967024741249e-06, |
| "loss": 0.4388427734375, |
| "step": 1108, |
| "token_acc": 0.8534245399592104 |
| }, |
| { |
| "epoch": 3.5553772070626004, |
| "grad_norm": 0.1712702137477399, |
| "learning_rate": 1.4257386146253524e-06, |
| "loss": 0.4484456479549408, |
| "step": 1109, |
| "token_acc": 0.8492724436033093 |
| }, |
| { |
| "epoch": 3.5585874799357944, |
| "grad_norm": 0.2248722413536391, |
| "learning_rate": 1.419889469817458e-06, |
| "loss": 0.4341227412223816, |
| "step": 1110, |
| "token_acc": 0.8557268722466961 |
| }, |
| { |
| "epoch": 3.561797752808989, |
| "grad_norm": 0.23382033976015443, |
| "learning_rate": 1.4140492932686423e-06, |
| "loss": 0.4353434443473816, |
| "step": 1111, |
| "token_acc": 0.8557752170517189 |
| }, |
| { |
| "epoch": 3.5650080256821832, |
| "grad_norm": 0.20087595501719435, |
| "learning_rate": 1.4082181101584404e-06, |
| "loss": 0.3586222529411316, |
| "step": 1112, |
| "token_acc": 0.8811013304106066 |
| }, |
| { |
| "epoch": 3.568218298555377, |
| "grad_norm": 0.17604647276989846, |
| "learning_rate": 1.4023959456276134e-06, |
| "loss": 0.4018758237361908, |
| "step": 1113, |
| "token_acc": 0.8666440619959501 |
| }, |
| { |
| "epoch": 3.571428571428571, |
| "grad_norm": 0.19157315090032623, |
| "learning_rate": 1.396582824778039e-06, |
| "loss": 0.4198405146598816, |
| "step": 1114, |
| "token_acc": 0.8591179259464835 |
| }, |
| { |
| "epoch": 3.5746388443017656, |
| "grad_norm": 0.20802221020070402, |
| "learning_rate": 1.390778772672603e-06, |
| "loss": 0.4651286005973816, |
| "step": 1115, |
| "token_acc": 0.8433386152010968 |
| }, |
| { |
| "epoch": 3.57784911717496, |
| "grad_norm": 0.18483138886919764, |
| "learning_rate": 1.3849838143350928e-06, |
| "loss": 0.42041015625, |
| "step": 1116, |
| "token_acc": 0.8592253803464793 |
| }, |
| { |
| "epoch": 3.581059390048154, |
| "grad_norm": 0.3119281222048741, |
| "learning_rate": 1.379197974750088e-06, |
| "loss": 0.4130859375, |
| "step": 1117, |
| "token_acc": 0.8610746129086024 |
| }, |
| { |
| "epoch": 3.5842696629213484, |
| "grad_norm": 0.2250280863661116, |
| "learning_rate": 1.3734212788628542e-06, |
| "loss": 0.4375407099723816, |
| "step": 1118, |
| "token_acc": 0.852986013309395 |
| }, |
| { |
| "epoch": 3.5874799357945424, |
| "grad_norm": 0.2319937697956126, |
| "learning_rate": 1.367653751579232e-06, |
| "loss": 0.4305623471736908, |
| "step": 1119, |
| "token_acc": 0.8549000532980083 |
| }, |
| { |
| "epoch": 3.590690208667737, |
| "grad_norm": 0.20874108633816135, |
| "learning_rate": 1.3618954177655385e-06, |
| "loss": 0.439453125, |
| "step": 1120, |
| "token_acc": 0.8523911679448383 |
| }, |
| { |
| "epoch": 3.5939004815409312, |
| "grad_norm": 0.18549311244906191, |
| "learning_rate": 1.3561463022484448e-06, |
| "loss": 0.4788004755973816, |
| "step": 1121, |
| "token_acc": 0.8400730047060446 |
| }, |
| { |
| "epoch": 3.597110754414125, |
| "grad_norm": 0.21571752979548245, |
| "learning_rate": 1.3504064298148833e-06, |
| "loss": 0.3993733823299408, |
| "step": 1122, |
| "token_acc": 0.8675419990525366 |
| }, |
| { |
| "epoch": 3.600321027287319, |
| "grad_norm": 0.20805103036182104, |
| "learning_rate": 1.3446758252119366e-06, |
| "loss": 0.4430135190486908, |
| "step": 1123, |
| "token_acc": 0.8532176419899573 |
| }, |
| { |
| "epoch": 3.6035313001605136, |
| "grad_norm": 0.18063209287797274, |
| "learning_rate": 1.3389545131467282e-06, |
| "loss": 0.3862711787223816, |
| "step": 1124, |
| "token_acc": 0.8700313564313901 |
| }, |
| { |
| "epoch": 3.606741573033708, |
| "grad_norm": 0.20079075250668693, |
| "learning_rate": 1.3332425182863144e-06, |
| "loss": 0.4552001953125, |
| "step": 1125, |
| "token_acc": 0.8473574577338565 |
| }, |
| { |
| "epoch": 3.609951845906902, |
| "grad_norm": 0.24814098522889355, |
| "learning_rate": 1.3275398652575832e-06, |
| "loss": 0.429931640625, |
| "step": 1126, |
| "token_acc": 0.8559153243433212 |
| }, |
| { |
| "epoch": 3.6131621187800964, |
| "grad_norm": 0.20186596614224114, |
| "learning_rate": 1.321846578647149e-06, |
| "loss": 0.4421793818473816, |
| "step": 1127, |
| "token_acc": 0.8515140875865225 |
| }, |
| { |
| "epoch": 3.6163723916532904, |
| "grad_norm": 0.1972261927521971, |
| "learning_rate": 1.3161626830012393e-06, |
| "loss": 0.3323161005973816, |
| "step": 1128, |
| "token_acc": 0.8880017935239064 |
| }, |
| { |
| "epoch": 3.619582664526485, |
| "grad_norm": 0.21275115071244285, |
| "learning_rate": 1.3104882028255943e-06, |
| "loss": 0.413818359375, |
| "step": 1129, |
| "token_acc": 0.8623194980291856 |
| }, |
| { |
| "epoch": 3.6227929373996792, |
| "grad_norm": 0.2484032770033825, |
| "learning_rate": 1.3048231625853613e-06, |
| "loss": 0.4005330502986908, |
| "step": 1130, |
| "token_acc": 0.865271191260239 |
| }, |
| { |
| "epoch": 3.626003210272873, |
| "grad_norm": 0.19869890039868315, |
| "learning_rate": 1.2991675867049857e-06, |
| "loss": 0.4834798276424408, |
| "step": 1131, |
| "token_acc": 0.8374210033176216 |
| }, |
| { |
| "epoch": 3.629213483146067, |
| "grad_norm": 0.1847331440731312, |
| "learning_rate": 1.29352149956811e-06, |
| "loss": 0.4325968623161316, |
| "step": 1132, |
| "token_acc": 0.8542738246013079 |
| }, |
| { |
| "epoch": 3.6324237560192616, |
| "grad_norm": 0.2072766407741738, |
| "learning_rate": 1.2878849255174652e-06, |
| "loss": 0.3939208984375, |
| "step": 1133, |
| "token_acc": 0.8672659842381567 |
| }, |
| { |
| "epoch": 3.635634028892456, |
| "grad_norm": 0.20737047250930485, |
| "learning_rate": 1.282257888854768e-06, |
| "loss": 0.4196370542049408, |
| "step": 1134, |
| "token_acc": 0.8583164933418487 |
| }, |
| { |
| "epoch": 3.63884430176565, |
| "grad_norm": 0.20254598573908703, |
| "learning_rate": 1.2766404138406151e-06, |
| "loss": 0.4584147334098816, |
| "step": 1135, |
| "token_acc": 0.845699420024143 |
| }, |
| { |
| "epoch": 3.6420545746388444, |
| "grad_norm": 0.18676784506103364, |
| "learning_rate": 1.271032524694379e-06, |
| "loss": 0.4593912959098816, |
| "step": 1136, |
| "token_acc": 0.8467912156242051 |
| }, |
| { |
| "epoch": 3.6452648475120384, |
| "grad_norm": 0.18404519693391058, |
| "learning_rate": 1.2654342455941026e-06, |
| "loss": 0.3817545771598816, |
| "step": 1137, |
| "token_acc": 0.8718617498601229 |
| }, |
| { |
| "epoch": 3.648475120385233, |
| "grad_norm": 0.19740877602255696, |
| "learning_rate": 1.2598456006763967e-06, |
| "loss": 0.4697265625, |
| "step": 1138, |
| "token_acc": 0.8445315531024508 |
| }, |
| { |
| "epoch": 3.6516853932584272, |
| "grad_norm": 0.18986912441857076, |
| "learning_rate": 1.2542666140363343e-06, |
| "loss": 0.4017333984375, |
| "step": 1139, |
| "token_acc": 0.863951752722064 |
| }, |
| { |
| "epoch": 3.654895666131621, |
| "grad_norm": 0.1789450935530543, |
| "learning_rate": 1.2486973097273469e-06, |
| "loss": 0.4638671875, |
| "step": 1140, |
| "token_acc": 0.8456421328521097 |
| }, |
| { |
| "epoch": 3.658105939004815, |
| "grad_norm": 0.22878218933401717, |
| "learning_rate": 1.2431377117611247e-06, |
| "loss": 0.4156901240348816, |
| "step": 1141, |
| "token_acc": 0.8583248996535044 |
| }, |
| { |
| "epoch": 3.6613162118780096, |
| "grad_norm": 0.19936716843844962, |
| "learning_rate": 1.2375878441075035e-06, |
| "loss": 0.4248860776424408, |
| "step": 1142, |
| "token_acc": 0.8554407218253489 |
| }, |
| { |
| "epoch": 3.664526484751204, |
| "grad_norm": 0.2154430937518813, |
| "learning_rate": 1.2320477306943728e-06, |
| "loss": 0.450439453125, |
| "step": 1143, |
| "token_acc": 0.8500447622106792 |
| }, |
| { |
| "epoch": 3.667736757624398, |
| "grad_norm": 0.19034491907180862, |
| "learning_rate": 1.2265173954075636e-06, |
| "loss": 0.4224446713924408, |
| "step": 1144, |
| "token_acc": 0.8556876330592085 |
| }, |
| { |
| "epoch": 3.6709470304975924, |
| "grad_norm": 0.17777679326467932, |
| "learning_rate": 1.2209968620907537e-06, |
| "loss": 0.3982747495174408, |
| "step": 1145, |
| "token_acc": 0.8657402270980548 |
| }, |
| { |
| "epoch": 3.6741573033707864, |
| "grad_norm": 0.18537100453971045, |
| "learning_rate": 1.2154861545453573e-06, |
| "loss": 0.4394938349723816, |
| "step": 1146, |
| "token_acc": 0.8517562914294342 |
| }, |
| { |
| "epoch": 3.677367576243981, |
| "grad_norm": 0.1815554961453709, |
| "learning_rate": 1.2099852965304223e-06, |
| "loss": 0.4091390073299408, |
| "step": 1147, |
| "token_acc": 0.8609376121760356 |
| }, |
| { |
| "epoch": 3.6805778491171752, |
| "grad_norm": 0.19774937138330145, |
| "learning_rate": 1.2044943117625385e-06, |
| "loss": 0.4618733823299408, |
| "step": 1148, |
| "token_acc": 0.844277955355182 |
| }, |
| { |
| "epoch": 3.683788121990369, |
| "grad_norm": 0.1776439903869948, |
| "learning_rate": 1.1990132239157223e-06, |
| "loss": 0.4086507260799408, |
| "step": 1149, |
| "token_acc": 0.8623496479124965 |
| }, |
| { |
| "epoch": 3.686998394863563, |
| "grad_norm": 0.18174880879649488, |
| "learning_rate": 1.193542056621323e-06, |
| "loss": 0.4283040463924408, |
| "step": 1150, |
| "token_acc": 0.8550191434654225 |
| }, |
| { |
| "epoch": 3.6902086677367576, |
| "grad_norm": 0.2481070438423436, |
| "learning_rate": 1.1880808334679128e-06, |
| "loss": 0.4292806088924408, |
| "step": 1151, |
| "token_acc": 0.8554810096320871 |
| }, |
| { |
| "epoch": 3.693418940609952, |
| "grad_norm": 0.20829243678595485, |
| "learning_rate": 1.1826295780011986e-06, |
| "loss": 0.4122721552848816, |
| "step": 1152, |
| "token_acc": 0.8618100241611001 |
| }, |
| { |
| "epoch": 3.696629213483146, |
| "grad_norm": 0.20267863009891, |
| "learning_rate": 1.1771883137239067e-06, |
| "loss": 0.4258219599723816, |
| "step": 1153, |
| "token_acc": 0.8578250897204026 |
| }, |
| { |
| "epoch": 3.6998394863563404, |
| "grad_norm": 0.1732365798885481, |
| "learning_rate": 1.171757064095688e-06, |
| "loss": 0.4292806088924408, |
| "step": 1154, |
| "token_acc": 0.8550376709009048 |
| }, |
| { |
| "epoch": 3.7030497592295344, |
| "grad_norm": 0.20397059894965575, |
| "learning_rate": 1.1663358525330169e-06, |
| "loss": 0.4227294921875, |
| "step": 1155, |
| "token_acc": 0.8581558816694186 |
| }, |
| { |
| "epoch": 3.706260032102729, |
| "grad_norm": 0.19318253491872506, |
| "learning_rate": 1.1609247024090888e-06, |
| "loss": 0.4373779296875, |
| "step": 1156, |
| "token_acc": 0.8549174782922965 |
| }, |
| { |
| "epoch": 3.7094703049759232, |
| "grad_norm": 0.2202135539105138, |
| "learning_rate": 1.1555236370537193e-06, |
| "loss": 0.3937581479549408, |
| "step": 1157, |
| "token_acc": 0.8677404738211112 |
| }, |
| { |
| "epoch": 3.712680577849117, |
| "grad_norm": 0.19454901193411642, |
| "learning_rate": 1.150132679753245e-06, |
| "loss": 0.4218343198299408, |
| "step": 1158, |
| "token_acc": 0.8581630168320942 |
| }, |
| { |
| "epoch": 3.715890850722311, |
| "grad_norm": 0.19007603665918127, |
| "learning_rate": 1.1447518537504223e-06, |
| "loss": 0.4286295771598816, |
| "step": 1159, |
| "token_acc": 0.8576809818327997 |
| }, |
| { |
| "epoch": 3.7191011235955056, |
| "grad_norm": 0.1864005885740874, |
| "learning_rate": 1.1393811822443264e-06, |
| "loss": 0.42724609375, |
| "step": 1160, |
| "token_acc": 0.8559439804490412 |
| }, |
| { |
| "epoch": 3.7223113964687, |
| "grad_norm": 0.2071776233361781, |
| "learning_rate": 1.134020688390253e-06, |
| "loss": 0.4482015073299408, |
| "step": 1161, |
| "token_acc": 0.8493717095311946 |
| }, |
| { |
| "epoch": 3.725521669341894, |
| "grad_norm": 0.18174949422424447, |
| "learning_rate": 1.1286703952996156e-06, |
| "loss": 0.4434000849723816, |
| "step": 1162, |
| "token_acc": 0.8515254746286025 |
| }, |
| { |
| "epoch": 3.7287319422150884, |
| "grad_norm": 0.21834567026723553, |
| "learning_rate": 1.1233303260398527e-06, |
| "loss": 0.4306437373161316, |
| "step": 1163, |
| "token_acc": 0.8558070632128901 |
| }, |
| { |
| "epoch": 3.7319422150882824, |
| "grad_norm": 0.19693327775941002, |
| "learning_rate": 1.1180005036343169e-06, |
| "loss": 0.4174397885799408, |
| "step": 1164, |
| "token_acc": 0.8600014018228531 |
| }, |
| { |
| "epoch": 3.735152487961477, |
| "grad_norm": 0.21766650753027825, |
| "learning_rate": 1.112680951062185e-06, |
| "loss": 0.480712890625, |
| "step": 1165, |
| "token_acc": 0.8373244659218679 |
| }, |
| { |
| "epoch": 3.738362760834671, |
| "grad_norm": 0.45324960611379955, |
| "learning_rate": 1.1073716912583585e-06, |
| "loss": 0.42547607421875, |
| "step": 1166, |
| "token_acc": 0.8576685244412483 |
| }, |
| { |
| "epoch": 3.741573033707865, |
| "grad_norm": 0.17298076771061435, |
| "learning_rate": 1.1020727471133605e-06, |
| "loss": 0.415771484375, |
| "step": 1167, |
| "token_acc": 0.8597448800837256 |
| }, |
| { |
| "epoch": 3.744783306581059, |
| "grad_norm": 0.23965119219664902, |
| "learning_rate": 1.0967841414732362e-06, |
| "loss": 0.4188639521598816, |
| "step": 1168, |
| "token_acc": 0.8588890963340573 |
| }, |
| { |
| "epoch": 3.7479935794542536, |
| "grad_norm": 0.23365722674624714, |
| "learning_rate": 1.0915058971394593e-06, |
| "loss": 0.4141032099723816, |
| "step": 1169, |
| "token_acc": 0.8631264796385842 |
| }, |
| { |
| "epoch": 3.751203852327448, |
| "grad_norm": 0.19910767380766714, |
| "learning_rate": 1.086238036868833e-06, |
| "loss": 0.4235026240348816, |
| "step": 1170, |
| "token_acc": 0.8583032988349921 |
| }, |
| { |
| "epoch": 3.754414125200642, |
| "grad_norm": 0.2076959812992001, |
| "learning_rate": 1.0809805833733883e-06, |
| "loss": 0.4021809995174408, |
| "step": 1171, |
| "token_acc": 0.864621498039401 |
| }, |
| { |
| "epoch": 3.7576243980738364, |
| "grad_norm": 0.2152311019271978, |
| "learning_rate": 1.0757335593202886e-06, |
| "loss": 0.4444173276424408, |
| "step": 1172, |
| "token_acc": 0.8518791374036445 |
| }, |
| { |
| "epoch": 3.7608346709470304, |
| "grad_norm": 0.18972984221535008, |
| "learning_rate": 1.0704969873317306e-06, |
| "loss": 0.4087321162223816, |
| "step": 1173, |
| "token_acc": 0.8632072158845779 |
| }, |
| { |
| "epoch": 3.764044943820225, |
| "grad_norm": 0.18348596001885115, |
| "learning_rate": 1.0652708899848494e-06, |
| "loss": 0.39996337890625, |
| "step": 1174, |
| "token_acc": 0.867081444511051 |
| }, |
| { |
| "epoch": 3.767255216693419, |
| "grad_norm": 0.18413587804123854, |
| "learning_rate": 1.0600552898116172e-06, |
| "loss": 0.4168701171875, |
| "step": 1175, |
| "token_acc": 0.8600618621035828 |
| }, |
| { |
| "epoch": 3.770465489566613, |
| "grad_norm": 0.18783556488467354, |
| "learning_rate": 1.05485020929875e-06, |
| "loss": 0.430908203125, |
| "step": 1176, |
| "token_acc": 0.8561088357397764 |
| }, |
| { |
| "epoch": 3.773675762439807, |
| "grad_norm": 0.19351334790591254, |
| "learning_rate": 1.0496556708876086e-06, |
| "loss": 0.4070841670036316, |
| "step": 1177, |
| "token_acc": 0.862757148366175 |
| }, |
| { |
| "epoch": 3.7768860353130016, |
| "grad_norm": 0.20338648261683012, |
| "learning_rate": 1.0444716969741018e-06, |
| "loss": 0.3898722529411316, |
| "step": 1178, |
| "token_acc": 0.8694972769663388 |
| }, |
| { |
| "epoch": 3.780096308186196, |
| "grad_norm": 0.2198683114658492, |
| "learning_rate": 1.0392983099085907e-06, |
| "loss": 0.4216715693473816, |
| "step": 1179, |
| "token_acc": 0.860980687212047 |
| }, |
| { |
| "epoch": 3.78330658105939, |
| "grad_norm": 0.197249833176062, |
| "learning_rate": 1.0341355319957916e-06, |
| "loss": 0.3811849057674408, |
| "step": 1180, |
| "token_acc": 0.8726937908278484 |
| }, |
| { |
| "epoch": 3.7865168539325844, |
| "grad_norm": 0.18020529721358267, |
| "learning_rate": 1.0289833854946801e-06, |
| "loss": 0.4131673276424408, |
| "step": 1181, |
| "token_acc": 0.8598117137770124 |
| }, |
| { |
| "epoch": 3.7897271268057784, |
| "grad_norm": 0.1751467317582751, |
| "learning_rate": 1.0238418926183956e-06, |
| "loss": 0.3791097104549408, |
| "step": 1182, |
| "token_acc": 0.8740450591380495 |
| }, |
| { |
| "epoch": 3.792937399678973, |
| "grad_norm": 0.19982818717924114, |
| "learning_rate": 1.0187110755341436e-06, |
| "loss": 0.4158935546875, |
| "step": 1183, |
| "token_acc": 0.8595560648576157 |
| }, |
| { |
| "epoch": 3.796147672552167, |
| "grad_norm": 0.1702957983130496, |
| "learning_rate": 1.0135909563631064e-06, |
| "loss": 0.3960774838924408, |
| "step": 1184, |
| "token_acc": 0.8663887873513991 |
| }, |
| { |
| "epoch": 3.799357945425361, |
| "grad_norm": 0.19815997110927433, |
| "learning_rate": 1.0084815571803357e-06, |
| "loss": 0.4346415400505066, |
| "step": 1185, |
| "token_acc": 0.8551757443018521 |
| }, |
| { |
| "epoch": 3.802568218298555, |
| "grad_norm": 0.19117230772053223, |
| "learning_rate": 1.0033829000146702e-06, |
| "loss": 0.4427490234375, |
| "step": 1186, |
| "token_acc": 0.8522398677275012 |
| }, |
| { |
| "epoch": 3.8057784911717496, |
| "grad_norm": 0.16156607508241005, |
| "learning_rate": 9.982950068486312e-07, |
| "loss": 0.3183797299861908, |
| "step": 1187, |
| "token_acc": 0.892503984314602 |
| }, |
| { |
| "epoch": 3.808988764044944, |
| "grad_norm": 0.19397666298086996, |
| "learning_rate": 9.93217899618337e-07, |
| "loss": 0.3892822265625, |
| "step": 1188, |
| "token_acc": 0.8689912031120274 |
| }, |
| { |
| "epoch": 3.812199036918138, |
| "grad_norm": 0.18867901164404668, |
| "learning_rate": 9.881516002133995e-07, |
| "loss": 0.4711507260799408, |
| "step": 1189, |
| "token_acc": 0.8411834094055081 |
| }, |
| { |
| "epoch": 3.8154093097913324, |
| "grad_norm": 0.19601761144473115, |
| "learning_rate": 9.8309613047683e-07, |
| "loss": 0.4140625, |
| "step": 1190, |
| "token_acc": 0.8603962006782715 |
| }, |
| { |
| "epoch": 3.8186195826645264, |
| "grad_norm": 0.18588347051992335, |
| "learning_rate": 9.780515122049564e-07, |
| "loss": 0.4267781674861908, |
| "step": 1191, |
| "token_acc": 0.8573543108369303 |
| }, |
| { |
| "epoch": 3.821829855537721, |
| "grad_norm": 0.184640472732058, |
| "learning_rate": 9.730177671473151e-07, |
| "loss": 0.4150594174861908, |
| "step": 1192, |
| "token_acc": 0.8593442234726605 |
| }, |
| { |
| "epoch": 3.825040128410915, |
| "grad_norm": 0.19597044202919392, |
| "learning_rate": 9.679949170065668e-07, |
| "loss": 0.4122721552848816, |
| "step": 1193, |
| "token_acc": 0.860177508284942 |
| }, |
| { |
| "epoch": 3.828250401284109, |
| "grad_norm": 0.16703914573084216, |
| "learning_rate": 9.629829834383947e-07, |
| "loss": 0.4218343198299408, |
| "step": 1194, |
| "token_acc": 0.8574244150518524 |
| }, |
| { |
| "epoch": 3.831460674157303, |
| "grad_norm": 0.1748554615572873, |
| "learning_rate": 9.579819880514217e-07, |
| "loss": 0.3974812924861908, |
| "step": 1195, |
| "token_acc": 0.8660061383591329 |
| }, |
| { |
| "epoch": 3.8346709470304976, |
| "grad_norm": 0.20295828023991147, |
| "learning_rate": 9.529919524071083e-07, |
| "loss": 0.3984782099723816, |
| "step": 1196, |
| "token_acc": 0.8674985290933711 |
| }, |
| { |
| "epoch": 3.837881219903692, |
| "grad_norm": 0.3432526102252035, |
| "learning_rate": 9.480128980196639e-07, |
| "loss": 0.4263916015625, |
| "step": 1197, |
| "token_acc": 0.8576995283116747 |
| }, |
| { |
| "epoch": 3.841091492776886, |
| "grad_norm": 0.20788825979486256, |
| "learning_rate": 9.430448463559517e-07, |
| "loss": 0.4392293393611908, |
| "step": 1198, |
| "token_acc": 0.8542843486838969 |
| }, |
| { |
| "epoch": 3.8443017656500804, |
| "grad_norm": 0.17981879755399513, |
| "learning_rate": 9.380878188353982e-07, |
| "loss": 0.4596761167049408, |
| "step": 1199, |
| "token_acc": 0.8479297106788458 |
| }, |
| { |
| "epoch": 3.8475120385232744, |
| "grad_norm": 0.20592159337948013, |
| "learning_rate": 9.331418368299001e-07, |
| "loss": 0.3804931640625, |
| "step": 1200, |
| "token_acc": 0.8733441746298368 |
| }, |
| { |
| "epoch": 3.850722311396469, |
| "grad_norm": 0.1836405567831981, |
| "learning_rate": 9.282069216637321e-07, |
| "loss": 0.4243571162223816, |
| "step": 1201, |
| "token_acc": 0.8580087289236278 |
| }, |
| { |
| "epoch": 3.853932584269663, |
| "grad_norm": 0.1912407427340455, |
| "learning_rate": 9.232830946134545e-07, |
| "loss": 0.4245198667049408, |
| "step": 1202, |
| "token_acc": 0.8562418907682475 |
| }, |
| { |
| "epoch": 3.857142857142857, |
| "grad_norm": 0.21147971576987462, |
| "learning_rate": 9.183703769078224e-07, |
| "loss": 0.4187825620174408, |
| "step": 1203, |
| "token_acc": 0.8594776104662264 |
| }, |
| { |
| "epoch": 3.860353130016051, |
| "grad_norm": 0.19363916582208013, |
| "learning_rate": 9.134687897276935e-07, |
| "loss": 0.4059651792049408, |
| "step": 1204, |
| "token_acc": 0.8651164117356721 |
| }, |
| { |
| "epoch": 3.8635634028892456, |
| "grad_norm": 0.1979326843110549, |
| "learning_rate": 9.085783542059362e-07, |
| "loss": 0.4379476010799408, |
| "step": 1205, |
| "token_acc": 0.8531043384119547 |
| }, |
| { |
| "epoch": 3.86677367576244, |
| "grad_norm": 0.20822780148569695, |
| "learning_rate": 9.036990914273424e-07, |
| "loss": 0.4237467646598816, |
| "step": 1206, |
| "token_acc": 0.8593451623169955 |
| }, |
| { |
| "epoch": 3.869983948635634, |
| "grad_norm": 0.216570254338795, |
| "learning_rate": 8.988310224285286e-07, |
| "loss": 0.4561360776424408, |
| "step": 1207, |
| "token_acc": 0.8473712875328652 |
| }, |
| { |
| "epoch": 3.8731942215088284, |
| "grad_norm": 0.24795318793900514, |
| "learning_rate": 8.939741681978527e-07, |
| "loss": 0.4324544370174408, |
| "step": 1208, |
| "token_acc": 0.8556788135891982 |
| }, |
| { |
| "epoch": 3.8764044943820224, |
| "grad_norm": 0.21213726196244703, |
| "learning_rate": 8.891285496753224e-07, |
| "loss": 0.42626953125, |
| "step": 1209, |
| "token_acc": 0.8567778242243094 |
| }, |
| { |
| "epoch": 3.879614767255217, |
| "grad_norm": 0.18405467114149734, |
| "learning_rate": 8.842941877525016e-07, |
| "loss": 0.3857015073299408, |
| "step": 1210, |
| "token_acc": 0.8689668297330032 |
| }, |
| { |
| "epoch": 3.882825040128411, |
| "grad_norm": 0.192615100166161, |
| "learning_rate": 8.794711032724204e-07, |
| "loss": 0.4140218198299408, |
| "step": 1211, |
| "token_acc": 0.8614851736081778 |
| }, |
| { |
| "epoch": 3.886035313001605, |
| "grad_norm": 0.19011937935493017, |
| "learning_rate": 8.746593170294891e-07, |
| "loss": 0.4407958984375, |
| "step": 1212, |
| "token_acc": 0.8529518626081146 |
| }, |
| { |
| "epoch": 3.889245585874799, |
| "grad_norm": 0.20036411640046875, |
| "learning_rate": 8.69858849769408e-07, |
| "loss": 0.4180094599723816, |
| "step": 1213, |
| "token_acc": 0.8602316017291542 |
| }, |
| { |
| "epoch": 3.8924558587479936, |
| "grad_norm": 0.43419652542817555, |
| "learning_rate": 8.650697221890728e-07, |
| "loss": 0.41845703125, |
| "step": 1214, |
| "token_acc": 0.8598270119786144 |
| }, |
| { |
| "epoch": 3.895666131621188, |
| "grad_norm": 0.18461668284934152, |
| "learning_rate": 8.602919549364914e-07, |
| "loss": 0.4590250849723816, |
| "step": 1215, |
| "token_acc": 0.8466133712323088 |
| }, |
| { |
| "epoch": 3.898876404494382, |
| "grad_norm": 0.19081795301141688, |
| "learning_rate": 8.55525568610691e-07, |
| "loss": 0.42529296875, |
| "step": 1216, |
| "token_acc": 0.8577144479993796 |
| }, |
| { |
| "epoch": 3.902086677367576, |
| "grad_norm": 0.19483729991308552, |
| "learning_rate": 8.507705837616316e-07, |
| "loss": 0.4462077021598816, |
| "step": 1217, |
| "token_acc": 0.8520786007770702 |
| }, |
| { |
| "epoch": 3.9052969502407704, |
| "grad_norm": 0.17692550192830483, |
| "learning_rate": 8.460270208901157e-07, |
| "loss": 0.4711100459098816, |
| "step": 1218, |
| "token_acc": 0.8430626746772136 |
| }, |
| { |
| "epoch": 3.908507223113965, |
| "grad_norm": 0.425992909344649, |
| "learning_rate": 8.412949004477013e-07, |
| "loss": 0.4374593198299408, |
| "step": 1219, |
| "token_acc": 0.8545313912557241 |
| }, |
| { |
| "epoch": 3.911717495987159, |
| "grad_norm": 0.18135473863521517, |
| "learning_rate": 8.36574242836613e-07, |
| "loss": 0.4267578125, |
| "step": 1220, |
| "token_acc": 0.857118550899144 |
| }, |
| { |
| "epoch": 3.914927768860353, |
| "grad_norm": 0.20475846877488454, |
| "learning_rate": 8.318650684096542e-07, |
| "loss": 0.3864339292049408, |
| "step": 1221, |
| "token_acc": 0.8697558260738323 |
| }, |
| { |
| "epoch": 3.918138041733547, |
| "grad_norm": 0.19525904130356977, |
| "learning_rate": 8.271673974701181e-07, |
| "loss": 0.3937174677848816, |
| "step": 1222, |
| "token_acc": 0.8689606579284498 |
| }, |
| { |
| "epoch": 3.9213483146067416, |
| "grad_norm": 0.18789724020524992, |
| "learning_rate": 8.224812502717055e-07, |
| "loss": 0.4049275815486908, |
| "step": 1223, |
| "token_acc": 0.8645857224509708 |
| }, |
| { |
| "epoch": 3.924558587479936, |
| "grad_norm": 0.1885665996962879, |
| "learning_rate": 8.178066470184274e-07, |
| "loss": 0.3941243588924408, |
| "step": 1224, |
| "token_acc": 0.8686516652726115 |
| }, |
| { |
| "epoch": 3.92776886035313, |
| "grad_norm": 0.1811780429895728, |
| "learning_rate": 8.13143607864528e-07, |
| "loss": 0.4338786005973816, |
| "step": 1225, |
| "token_acc": 0.8549170275370654 |
| }, |
| { |
| "epoch": 3.930979133226324, |
| "grad_norm": 0.19625335214307585, |
| "learning_rate": 8.084921529143908e-07, |
| "loss": 0.3824259638786316, |
| "step": 1226, |
| "token_acc": 0.8723617876476475 |
| }, |
| { |
| "epoch": 3.9341894060995184, |
| "grad_norm": 0.18965753363437576, |
| "learning_rate": 8.0385230222246e-07, |
| "loss": 0.4338786005973816, |
| "step": 1227, |
| "token_acc": 0.8552555443194048 |
| }, |
| { |
| "epoch": 3.937399678972713, |
| "grad_norm": 0.18550838689006327, |
| "learning_rate": 7.99224075793142e-07, |
| "loss": 0.4319661557674408, |
| "step": 1228, |
| "token_acc": 0.8545585791754989 |
| }, |
| { |
| "epoch": 3.940609951845907, |
| "grad_norm": 0.19022732749052057, |
| "learning_rate": 7.946074935807302e-07, |
| "loss": 0.4304606318473816, |
| "step": 1229, |
| "token_acc": 0.8549475072272658 |
| }, |
| { |
| "epoch": 3.943820224719101, |
| "grad_norm": 0.19115968352145013, |
| "learning_rate": 7.900025754893128e-07, |
| "loss": 0.4150797724723816, |
| "step": 1230, |
| "token_acc": 0.8610937896452516 |
| }, |
| { |
| "epoch": 3.947030497592295, |
| "grad_norm": 0.18621797333773915, |
| "learning_rate": 7.854093413726916e-07, |
| "loss": 0.4222005307674408, |
| "step": 1231, |
| "token_acc": 0.8602832744218125 |
| }, |
| { |
| "epoch": 3.9502407704654896, |
| "grad_norm": 0.17754732315506336, |
| "learning_rate": 7.808278110342917e-07, |
| "loss": 0.3959554135799408, |
| "step": 1232, |
| "token_acc": 0.8670500443540994 |
| }, |
| { |
| "epoch": 3.953451043338684, |
| "grad_norm": 0.19867554700988296, |
| "learning_rate": 7.76258004227076e-07, |
| "loss": 0.4458821713924408, |
| "step": 1233, |
| "token_acc": 0.8505125893374252 |
| }, |
| { |
| "epoch": 3.956661316211878, |
| "grad_norm": 0.21808630417891475, |
| "learning_rate": 7.716999406534674e-07, |
| "loss": 0.4442545771598816, |
| "step": 1234, |
| "token_acc": 0.8525282271934536 |
| }, |
| { |
| "epoch": 3.959871589085072, |
| "grad_norm": 0.1916423004733177, |
| "learning_rate": 7.671536399652543e-07, |
| "loss": 0.4322306513786316, |
| "step": 1235, |
| "token_acc": 0.8557067931446188 |
| }, |
| { |
| "epoch": 3.9630818619582664, |
| "grad_norm": 0.17649795985908107, |
| "learning_rate": 7.626191217635132e-07, |
| "loss": 0.4193522334098816, |
| "step": 1236, |
| "token_acc": 0.858990975699193 |
| }, |
| { |
| "epoch": 3.966292134831461, |
| "grad_norm": 0.1896890062451, |
| "learning_rate": 7.580964055985161e-07, |
| "loss": 0.4132080078125, |
| "step": 1237, |
| "token_acc": 0.8614007097683751 |
| }, |
| { |
| "epoch": 3.969502407704655, |
| "grad_norm": 0.20149925006058916, |
| "learning_rate": 7.535855109696586e-07, |
| "loss": 0.4415283203125, |
| "step": 1238, |
| "token_acc": 0.8525706179844579 |
| }, |
| { |
| "epoch": 3.972712680577849, |
| "grad_norm": 0.20316594264377988, |
| "learning_rate": 7.49086457325363e-07, |
| "loss": 0.4009602963924408, |
| "step": 1239, |
| "token_acc": 0.8664792315556717 |
| }, |
| { |
| "epoch": 3.975922953451043, |
| "grad_norm": 0.20270725393588832, |
| "learning_rate": 7.44599264063002e-07, |
| "loss": 0.4393310546875, |
| "step": 1240, |
| "token_acc": 0.8533326569378449 |
| }, |
| { |
| "epoch": 3.9791332263242376, |
| "grad_norm": 0.17646688963320326, |
| "learning_rate": 7.401239505288131e-07, |
| "loss": 0.4202474057674408, |
| "step": 1241, |
| "token_acc": 0.8593622787548933 |
| }, |
| { |
| "epoch": 3.982343499197432, |
| "grad_norm": 0.18633841996323106, |
| "learning_rate": 7.356605360178147e-07, |
| "loss": 0.4261067807674408, |
| "step": 1242, |
| "token_acc": 0.8577198930194864 |
| }, |
| { |
| "epoch": 3.985553772070626, |
| "grad_norm": 0.18117890155614608, |
| "learning_rate": 7.312090397737231e-07, |
| "loss": 0.4084879755973816, |
| "step": 1243, |
| "token_acc": 0.8637038960977068 |
| }, |
| { |
| "epoch": 3.98876404494382, |
| "grad_norm": 0.19441876582248407, |
| "learning_rate": 7.267694809888707e-07, |
| "loss": 0.4258219599723816, |
| "step": 1244, |
| "token_acc": 0.8575173994011962 |
| }, |
| { |
| "epoch": 3.9919743178170144, |
| "grad_norm": 0.21274611864180318, |
| "learning_rate": 7.223418788041214e-07, |
| "loss": 0.4119059443473816, |
| "step": 1245, |
| "token_acc": 0.8628398705941747 |
| }, |
| { |
| "epoch": 3.995184590690209, |
| "grad_norm": 0.20675182669422074, |
| "learning_rate": 7.179262523087899e-07, |
| "loss": 0.46435546875, |
| "step": 1246, |
| "token_acc": 0.8461486321611582 |
| }, |
| { |
| "epoch": 3.998394863563403, |
| "grad_norm": 0.18832618517471383, |
| "learning_rate": 7.135226205405573e-07, |
| "loss": 0.4163411557674408, |
| "step": 1247, |
| "token_acc": 0.8615401559705481 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.27131761596680815, |
| "learning_rate": 7.091310024853904e-07, |
| "loss": 0.4319661557674408, |
| "step": 1248, |
| "token_acc": 0.8548751265570262 |
| }, |
| { |
| "epoch": 4.003210272873194, |
| "grad_norm": 0.211277310082995, |
| "learning_rate": 7.04751417077463e-07, |
| "loss": 0.3932088315486908, |
| "step": 1249, |
| "token_acc": 0.8676984176763316 |
| }, |
| { |
| "epoch": 4.006420545746389, |
| "grad_norm": 0.19452299161645747, |
| "learning_rate": 7.003838831990654e-07, |
| "loss": 0.3834025263786316, |
| "step": 1250, |
| "token_acc": 0.8716185002637179 |
| }, |
| { |
| "epoch": 4.009630818619582, |
| "grad_norm": 0.18002877066507167, |
| "learning_rate": 6.960284196805311e-07, |
| "loss": 0.4623616635799408, |
| "step": 1251, |
| "token_acc": 0.8435193660275556 |
| }, |
| { |
| "epoch": 4.012841091492777, |
| "grad_norm": 0.18782300122295928, |
| "learning_rate": 6.916850453001553e-07, |
| "loss": 0.4035237729549408, |
| "step": 1252, |
| "token_acc": 0.8641802933682129 |
| }, |
| { |
| "epoch": 4.016051364365971, |
| "grad_norm": 0.1864350065242437, |
| "learning_rate": 6.873537787841092e-07, |
| "loss": 0.4337565302848816, |
| "step": 1253, |
| "token_acc": 0.8550117239586191 |
| }, |
| { |
| "epoch": 4.019261637239166, |
| "grad_norm": 0.18476754648253124, |
| "learning_rate": 6.830346388063606e-07, |
| "loss": 0.44610595703125, |
| "step": 1254, |
| "token_acc": 0.8506658750185543 |
| }, |
| { |
| "epoch": 4.022471910112359, |
| "grad_norm": 0.18612140337980987, |
| "learning_rate": 6.787276439885962e-07, |
| "loss": 0.4018147885799408, |
| "step": 1255, |
| "token_acc": 0.8653541351935264 |
| }, |
| { |
| "epoch": 4.025682182985554, |
| "grad_norm": 0.2266902701351228, |
| "learning_rate": 6.744328129001411e-07, |
| "loss": 0.4254353940486908, |
| "step": 1256, |
| "token_acc": 0.8567928136612766 |
| }, |
| { |
| "epoch": 4.028892455858748, |
| "grad_norm": 0.19151032858380448, |
| "learning_rate": 6.701501640578749e-07, |
| "loss": 0.4202067255973816, |
| "step": 1257, |
| "token_acc": 0.8581763091032826 |
| }, |
| { |
| "epoch": 4.032102728731942, |
| "grad_norm": 0.19253982319972482, |
| "learning_rate": 6.65879715926155e-07, |
| "loss": 0.427001953125, |
| "step": 1258, |
| "token_acc": 0.8558299365735071 |
| }, |
| { |
| "epoch": 4.035313001605137, |
| "grad_norm": 0.20714121082879816, |
| "learning_rate": 6.616214869167364e-07, |
| "loss": 0.4225260615348816, |
| "step": 1259, |
| "token_acc": 0.857597815037414 |
| }, |
| { |
| "epoch": 4.03852327447833, |
| "grad_norm": 0.188984662915425, |
| "learning_rate": 6.573754953886914e-07, |
| "loss": 0.4147135615348816, |
| "step": 1260, |
| "token_acc": 0.861747953533917 |
| }, |
| { |
| "epoch": 4.041733547351525, |
| "grad_norm": 0.1809464007645627, |
| "learning_rate": 6.531417596483331e-07, |
| "loss": 0.4534098505973816, |
| "step": 1261, |
| "token_acc": 0.8476468626880285 |
| }, |
| { |
| "epoch": 4.044943820224719, |
| "grad_norm": 0.22937913994865552, |
| "learning_rate": 6.489202979491323e-07, |
| "loss": 0.4196370542049408, |
| "step": 1262, |
| "token_acc": 0.8600287993230477 |
| }, |
| { |
| "epoch": 4.048154093097914, |
| "grad_norm": 0.20729747118528538, |
| "learning_rate": 6.447111284916422e-07, |
| "loss": 0.3974609375, |
| "step": 1263, |
| "token_acc": 0.8688817547202342 |
| }, |
| { |
| "epoch": 4.051364365971107, |
| "grad_norm": 0.18473772975637504, |
| "learning_rate": 6.405142694234194e-07, |
| "loss": 0.376220703125, |
| "step": 1264, |
| "token_acc": 0.8740485875668322 |
| }, |
| { |
| "epoch": 4.054574638844302, |
| "grad_norm": 0.18264189060046002, |
| "learning_rate": 6.363297388389433e-07, |
| "loss": 0.4558512568473816, |
| "step": 1265, |
| "token_acc": 0.8466898374159838 |
| }, |
| { |
| "epoch": 4.057784911717496, |
| "grad_norm": 0.1736263742905703, |
| "learning_rate": 6.321575547795431e-07, |
| "loss": 0.4064534604549408, |
| "step": 1266, |
| "token_acc": 0.8641185236155824 |
| }, |
| { |
| "epoch": 4.06099518459069, |
| "grad_norm": 0.20439738580390066, |
| "learning_rate": 6.279977352333124e-07, |
| "loss": 0.385498046875, |
| "step": 1267, |
| "token_acc": 0.8698766189625405 |
| }, |
| { |
| "epoch": 4.064205457463885, |
| "grad_norm": 0.16700348386684835, |
| "learning_rate": 6.238502981350388e-07, |
| "loss": 0.4293619990348816, |
| "step": 1268, |
| "token_acc": 0.854447191015641 |
| }, |
| { |
| "epoch": 4.067415730337078, |
| "grad_norm": 0.19162768232809374, |
| "learning_rate": 6.197152613661231e-07, |
| "loss": 0.4134928584098816, |
| "step": 1269, |
| "token_acc": 0.8621758337761007 |
| }, |
| { |
| "epoch": 4.070626003210273, |
| "grad_norm": 0.17159317224373907, |
| "learning_rate": 6.155926427545048e-07, |
| "loss": 0.4090779721736908, |
| "step": 1270, |
| "token_acc": 0.8615923279862137 |
| }, |
| { |
| "epoch": 4.073836276083467, |
| "grad_norm": 0.18400403175015234, |
| "learning_rate": 6.114824600745797e-07, |
| "loss": 0.41064453125, |
| "step": 1271, |
| "token_acc": 0.8617532043587804 |
| }, |
| { |
| "epoch": 4.077046548956662, |
| "grad_norm": 0.19234784803990349, |
| "learning_rate": 6.07384731047129e-07, |
| "loss": 0.4007975459098816, |
| "step": 1272, |
| "token_acc": 0.8649410469740091 |
| }, |
| { |
| "epoch": 4.080256821829855, |
| "grad_norm": 0.18526287274208925, |
| "learning_rate": 6.032994733392405e-07, |
| "loss": 0.3879598081111908, |
| "step": 1273, |
| "token_acc": 0.869797958265236 |
| }, |
| { |
| "epoch": 4.08346709470305, |
| "grad_norm": 0.20014484746202704, |
| "learning_rate": 5.99226704564234e-07, |
| "loss": 0.4639485776424408, |
| "step": 1274, |
| "token_acc": 0.8447181048034629 |
| }, |
| { |
| "epoch": 4.086677367576244, |
| "grad_norm": 0.1863915286208777, |
| "learning_rate": 5.951664422815826e-07, |
| "loss": 0.409912109375, |
| "step": 1275, |
| "token_acc": 0.8622634658441072 |
| }, |
| { |
| "epoch": 4.089887640449438, |
| "grad_norm": 0.18608349962296986, |
| "learning_rate": 5.911187039968373e-07, |
| "loss": 0.4313151240348816, |
| "step": 1276, |
| "token_acc": 0.8556820057995549 |
| }, |
| { |
| "epoch": 4.093097913322633, |
| "grad_norm": 0.19141297490867865, |
| "learning_rate": 5.870835071615557e-07, |
| "loss": 0.388214111328125, |
| "step": 1277, |
| "token_acc": 0.869686242075975 |
| }, |
| { |
| "epoch": 4.096308186195826, |
| "grad_norm": 0.1899764844237124, |
| "learning_rate": 5.83060869173222e-07, |
| "loss": 0.4187418818473816, |
| "step": 1278, |
| "token_acc": 0.8578907762758216 |
| }, |
| { |
| "epoch": 4.099518459069021, |
| "grad_norm": 0.1850950058073246, |
| "learning_rate": 5.790508073751745e-07, |
| "loss": 0.4100341796875, |
| "step": 1279, |
| "token_acc": 0.8624161154309232 |
| }, |
| { |
| "epoch": 4.102728731942215, |
| "grad_norm": 0.2287332501505277, |
| "learning_rate": 5.750533390565272e-07, |
| "loss": 0.3760172724723816, |
| "step": 1280, |
| "token_acc": 0.8740483573664101 |
| }, |
| { |
| "epoch": 4.10593900481541, |
| "grad_norm": 0.19641243226746088, |
| "learning_rate": 5.710684814521035e-07, |
| "loss": 0.4447428584098816, |
| "step": 1281, |
| "token_acc": 0.849857485304379 |
| }, |
| { |
| "epoch": 4.109149277688603, |
| "grad_norm": 0.189976985915476, |
| "learning_rate": 5.670962517423525e-07, |
| "loss": 0.4637451171875, |
| "step": 1282, |
| "token_acc": 0.8427576539156075 |
| }, |
| { |
| "epoch": 4.112359550561798, |
| "grad_norm": 0.1906013835451214, |
| "learning_rate": 5.631366670532798e-07, |
| "loss": 0.3881022334098816, |
| "step": 1283, |
| "token_acc": 0.8686223344216135 |
| }, |
| { |
| "epoch": 4.115569823434992, |
| "grad_norm": 0.19301371581474472, |
| "learning_rate": 5.591897444563736e-07, |
| "loss": 0.4277750849723816, |
| "step": 1284, |
| "token_acc": 0.8561654968599927 |
| }, |
| { |
| "epoch": 4.118780096308186, |
| "grad_norm": 0.18155881845410735, |
| "learning_rate": 5.552555009685293e-07, |
| "loss": 0.4273885190486908, |
| "step": 1285, |
| "token_acc": 0.8550006421686092 |
| }, |
| { |
| "epoch": 4.121990369181381, |
| "grad_norm": 0.19461825352865922, |
| "learning_rate": 5.513339535519781e-07, |
| "loss": 0.37548828125, |
| "step": 1286, |
| "token_acc": 0.8735851601449866 |
| }, |
| { |
| "epoch": 4.125200642054574, |
| "grad_norm": 0.1850682483956793, |
| "learning_rate": 5.474251191142121e-07, |
| "loss": 0.3612467646598816, |
| "step": 1287, |
| "token_acc": 0.879464152848493 |
| }, |
| { |
| "epoch": 4.128410914927769, |
| "grad_norm": 0.18187017146954892, |
| "learning_rate": 5.435290145079132e-07, |
| "loss": 0.3720296323299408, |
| "step": 1288, |
| "token_acc": 0.8751402982064638 |
| }, |
| { |
| "epoch": 4.131621187800963, |
| "grad_norm": 0.18969987814168832, |
| "learning_rate": 5.396456565308787e-07, |
| "loss": 0.397216796875, |
| "step": 1289, |
| "token_acc": 0.8655026671998424 |
| }, |
| { |
| "epoch": 4.134831460674158, |
| "grad_norm": 0.19480607739692923, |
| "learning_rate": 5.35775061925949e-07, |
| "loss": 0.3852946162223816, |
| "step": 1290, |
| "token_acc": 0.8710818106339356 |
| }, |
| { |
| "epoch": 4.138041733547351, |
| "grad_norm": 0.19043422055610776, |
| "learning_rate": 5.319172473809382e-07, |
| "loss": 0.404541015625, |
| "step": 1291, |
| "token_acc": 0.8630114619051036 |
| }, |
| { |
| "epoch": 4.141252006420546, |
| "grad_norm": 0.18711730373563912, |
| "learning_rate": 5.280722295285595e-07, |
| "loss": 0.4005126953125, |
| "step": 1292, |
| "token_acc": 0.8656115364475748 |
| }, |
| { |
| "epoch": 4.14446227929374, |
| "grad_norm": 0.18535051535270514, |
| "learning_rate": 5.24240024946351e-07, |
| "loss": 0.4049886167049408, |
| "step": 1293, |
| "token_acc": 0.8628473494046731 |
| }, |
| { |
| "epoch": 4.147672552166934, |
| "grad_norm": 0.21974202967212095, |
| "learning_rate": 5.204206501566099e-07, |
| "loss": 0.42431640625, |
| "step": 1294, |
| "token_acc": 0.8570635265956126 |
| }, |
| { |
| "epoch": 4.150882825040128, |
| "grad_norm": 0.18477324438232298, |
| "learning_rate": 5.166141216263194e-07, |
| "loss": 0.4139811396598816, |
| "step": 1295, |
| "token_acc": 0.8617092496576585 |
| }, |
| { |
| "epoch": 4.154093097913322, |
| "grad_norm": 0.1648615941744021, |
| "learning_rate": 5.128204557670763e-07, |
| "loss": 0.4064534604549408, |
| "step": 1296, |
| "token_acc": 0.8613643514903288 |
| }, |
| { |
| "epoch": 4.157303370786517, |
| "grad_norm": 0.1951050217780076, |
| "learning_rate": 5.090396689350181e-07, |
| "loss": 0.4375813901424408, |
| "step": 1297, |
| "token_acc": 0.8542707776890804 |
| }, |
| { |
| "epoch": 4.160513643659711, |
| "grad_norm": 0.1986719886463106, |
| "learning_rate": 5.052717774307574e-07, |
| "loss": 0.4270426630973816, |
| "step": 1298, |
| "token_acc": 0.8555437054366243 |
| }, |
| { |
| "epoch": 4.163723916532906, |
| "grad_norm": 0.17380420268148986, |
| "learning_rate": 5.015167974993112e-07, |
| "loss": 0.337158203125, |
| "step": 1299, |
| "token_acc": 0.8873299549549549 |
| }, |
| { |
| "epoch": 4.166934189406099, |
| "grad_norm": 0.1902791869258638, |
| "learning_rate": 4.977747453300264e-07, |
| "loss": 0.39013671875, |
| "step": 1300, |
| "token_acc": 0.8680480170890513 |
| }, |
| { |
| "epoch": 4.170144462279294, |
| "grad_norm": 0.19729112488192352, |
| "learning_rate": 4.940456370565138e-07, |
| "loss": 0.3991902768611908, |
| "step": 1301, |
| "token_acc": 0.8665999383150703 |
| }, |
| { |
| "epoch": 4.173354735152488, |
| "grad_norm": 0.18428870178583523, |
| "learning_rate": 4.903294887565769e-07, |
| "loss": 0.4064534604549408, |
| "step": 1302, |
| "token_acc": 0.8632890489370607 |
| }, |
| { |
| "epoch": 4.176565008025682, |
| "grad_norm": 0.18397492412651675, |
| "learning_rate": 4.86626316452144e-07, |
| "loss": 0.4265543818473816, |
| "step": 1303, |
| "token_acc": 0.8573058678742547 |
| }, |
| { |
| "epoch": 4.179775280898877, |
| "grad_norm": 0.26311590341567126, |
| "learning_rate": 4.829361361091972e-07, |
| "loss": 0.3391927182674408, |
| "step": 1304, |
| "token_acc": 0.8873305688832924 |
| }, |
| { |
| "epoch": 4.18298555377207, |
| "grad_norm": 0.17443450600993915, |
| "learning_rate": 4.792589636377056e-07, |
| "loss": 0.4116618037223816, |
| "step": 1305, |
| "token_acc": 0.8612885944354927 |
| }, |
| { |
| "epoch": 4.186195826645265, |
| "grad_norm": 0.20438381798059105, |
| "learning_rate": 4.755948148915554e-07, |
| "loss": 0.398681640625, |
| "step": 1306, |
| "token_acc": 0.8670303756225186 |
| }, |
| { |
| "epoch": 4.189406099518459, |
| "grad_norm": 0.18502705799301722, |
| "learning_rate": 4.7194370566848097e-07, |
| "loss": 0.4068196713924408, |
| "step": 1307, |
| "token_acc": 0.8630395206028574 |
| }, |
| { |
| "epoch": 4.192616372391654, |
| "grad_norm": 0.18958605233060205, |
| "learning_rate": 4.683056517099986e-07, |
| "loss": 0.4548746943473816, |
| "step": 1308, |
| "token_acc": 0.8456803796560372 |
| }, |
| { |
| "epoch": 4.195826645264847, |
| "grad_norm": 0.21726464831762202, |
| "learning_rate": 4.6468066870133904e-07, |
| "loss": 0.3997802734375, |
| "step": 1309, |
| "token_acc": 0.8656721717315794 |
| }, |
| { |
| "epoch": 4.199036918138042, |
| "grad_norm": 0.19156875723094768, |
| "learning_rate": 4.610687722713753e-07, |
| "loss": 0.4374593198299408, |
| "step": 1310, |
| "token_acc": 0.8513498633256577 |
| }, |
| { |
| "epoch": 4.202247191011236, |
| "grad_norm": 0.18441342108706746, |
| "learning_rate": 4.574699779925604e-07, |
| "loss": 0.4659017026424408, |
| "step": 1311, |
| "token_acc": 0.8423244482044691 |
| }, |
| { |
| "epoch": 4.20545746388443, |
| "grad_norm": 0.19162538923812916, |
| "learning_rate": 4.538843013808577e-07, |
| "loss": 0.4342448115348816, |
| "step": 1312, |
| "token_acc": 0.8543519985629906 |
| }, |
| { |
| "epoch": 4.208667736757624, |
| "grad_norm": 0.16005559784512735, |
| "learning_rate": 4.503117578956767e-07, |
| "loss": 0.4018147885799408, |
| "step": 1313, |
| "token_acc": 0.8647396556289849 |
| }, |
| { |
| "epoch": 4.211878009630818, |
| "grad_norm": 0.18564687246334158, |
| "learning_rate": 4.467523629398009e-07, |
| "loss": 0.451904296875, |
| "step": 1314, |
| "token_acc": 0.8479268243087531 |
| }, |
| { |
| "epoch": 4.215088282504013, |
| "grad_norm": 0.19836548645783045, |
| "learning_rate": 4.432061318593257e-07, |
| "loss": 0.3826090693473816, |
| "step": 1315, |
| "token_acc": 0.8713426809813936 |
| }, |
| { |
| "epoch": 4.218298555377207, |
| "grad_norm": 0.17610950259048228, |
| "learning_rate": 4.3967307994359414e-07, |
| "loss": 0.4306640625, |
| "step": 1316, |
| "token_acc": 0.8540626922541564 |
| }, |
| { |
| "epoch": 4.221508828250402, |
| "grad_norm": 0.30184321987677176, |
| "learning_rate": 4.361532224251251e-07, |
| "loss": 0.3817138671875, |
| "step": 1317, |
| "token_acc": 0.8715654018295902 |
| }, |
| { |
| "epoch": 4.224719101123595, |
| "grad_norm": 0.1992537562440866, |
| "learning_rate": 4.3264657447955243e-07, |
| "loss": 0.40087890625, |
| "step": 1318, |
| "token_acc": 0.866381413593607 |
| }, |
| { |
| "epoch": 4.22792937399679, |
| "grad_norm": 0.1819451245164796, |
| "learning_rate": 4.2915315122555434e-07, |
| "loss": 0.4207763671875, |
| "step": 1319, |
| "token_acc": 0.8575846667951931 |
| }, |
| { |
| "epoch": 4.231139646869984, |
| "grad_norm": 0.19547967012422543, |
| "learning_rate": 4.256729677247972e-07, |
| "loss": 0.40771484375, |
| "step": 1320, |
| "token_acc": 0.8633353803433637 |
| }, |
| { |
| "epoch": 4.234349919743178, |
| "grad_norm": 0.199687286811994, |
| "learning_rate": 4.2220603898186126e-07, |
| "loss": 0.4063720703125, |
| "step": 1321, |
| "token_acc": 0.8648657558814221 |
| }, |
| { |
| "epoch": 4.237560192616373, |
| "grad_norm": 0.2227566496132186, |
| "learning_rate": 4.1875237994418113e-07, |
| "loss": 0.4142252802848816, |
| "step": 1322, |
| "token_acc": 0.8610649940844372 |
| }, |
| { |
| "epoch": 4.240770465489566, |
| "grad_norm": 0.217719196604295, |
| "learning_rate": 4.1531200550197745e-07, |
| "loss": 0.4201151728630066, |
| "step": 1323, |
| "token_acc": 0.8603017061548934 |
| }, |
| { |
| "epoch": 4.243980738362761, |
| "grad_norm": 0.19340551330810762, |
| "learning_rate": 4.118849304881995e-07, |
| "loss": 0.4350382685661316, |
| "step": 1324, |
| "token_acc": 0.8524961221250393 |
| }, |
| { |
| "epoch": 4.247191011235955, |
| "grad_norm": 0.18788995064724226, |
| "learning_rate": 4.084711696784538e-07, |
| "loss": 0.4066569209098816, |
| "step": 1325, |
| "token_acc": 0.864263793463344 |
| }, |
| { |
| "epoch": 4.25040128410915, |
| "grad_norm": 0.17846806815698746, |
| "learning_rate": 4.0507073779094485e-07, |
| "loss": 0.369384765625, |
| "step": 1326, |
| "token_acc": 0.8754548796945046 |
| }, |
| { |
| "epoch": 4.253611556982343, |
| "grad_norm": 0.22897066984574038, |
| "learning_rate": 4.0168364948640966e-07, |
| "loss": 0.4083251953125, |
| "step": 1327, |
| "token_acc": 0.8632289416846652 |
| }, |
| { |
| "epoch": 4.256821829855538, |
| "grad_norm": 0.19357435650404023, |
| "learning_rate": 3.9830991936805577e-07, |
| "loss": 0.3915202021598816, |
| "step": 1328, |
| "token_acc": 0.8685719589159757 |
| }, |
| { |
| "epoch": 4.260032102728732, |
| "grad_norm": 0.20243731055282949, |
| "learning_rate": 3.949495619814973e-07, |
| "loss": 0.4164225459098816, |
| "step": 1329, |
| "token_acc": 0.8601415282856744 |
| }, |
| { |
| "epoch": 4.263242375601926, |
| "grad_norm": 0.17523671564463278, |
| "learning_rate": 3.916025918146934e-07, |
| "loss": 0.3895263671875, |
| "step": 1330, |
| "token_acc": 0.8680372892236324 |
| }, |
| { |
| "epoch": 4.26645264847512, |
| "grad_norm": 0.18796892300648876, |
| "learning_rate": 3.8826902329788484e-07, |
| "loss": 0.4241740107536316, |
| "step": 1331, |
| "token_acc": 0.8579220936572364 |
| }, |
| { |
| "epoch": 4.269662921348314, |
| "grad_norm": 0.2064109356932018, |
| "learning_rate": 3.8494887080353166e-07, |
| "loss": 0.4365031123161316, |
| "step": 1332, |
| "token_acc": 0.8546197429602375 |
| }, |
| { |
| "epoch": 4.272873194221509, |
| "grad_norm": 0.18203455865795215, |
| "learning_rate": 3.816421486462513e-07, |
| "loss": 0.3876953125, |
| "step": 1333, |
| "token_acc": 0.8693844281300428 |
| }, |
| { |
| "epoch": 4.276083467094703, |
| "grad_norm": 0.213574817895332, |
| "learning_rate": 3.783488710827593e-07, |
| "loss": 0.3750813901424408, |
| "step": 1334, |
| "token_acc": 0.873373327290794 |
| }, |
| { |
| "epoch": 4.279293739967898, |
| "grad_norm": 0.2140942980497027, |
| "learning_rate": 3.75069052311804e-07, |
| "loss": 0.4498087763786316, |
| "step": 1335, |
| "token_acc": 0.847457992476908 |
| }, |
| { |
| "epoch": 4.282504012841091, |
| "grad_norm": 0.19578800561907392, |
| "learning_rate": 3.718027064741062e-07, |
| "loss": 0.39044189453125, |
| "step": 1336, |
| "token_acc": 0.869082210694379 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 0.22104067865914637, |
| "learning_rate": 3.6854984765229984e-07, |
| "loss": 0.408935546875, |
| "step": 1337, |
| "token_acc": 0.8625050650133584 |
| }, |
| { |
| "epoch": 4.28892455858748, |
| "grad_norm": 0.2028154694754138, |
| "learning_rate": 3.6531048987087264e-07, |
| "loss": 0.4321696162223816, |
| "step": 1338, |
| "token_acc": 0.8558052507913844 |
| }, |
| { |
| "epoch": 4.292134831460674, |
| "grad_norm": 0.16476898952580152, |
| "learning_rate": 3.620846470961007e-07, |
| "loss": 0.4442138671875, |
| "step": 1339, |
| "token_acc": 0.8504904439728512 |
| }, |
| { |
| "epoch": 4.295345104333869, |
| "grad_norm": 0.20213718293160277, |
| "learning_rate": 3.5887233323599124e-07, |
| "loss": 0.3688761591911316, |
| "step": 1340, |
| "token_acc": 0.8757740050097411 |
| }, |
| { |
| "epoch": 4.298555377207062, |
| "grad_norm": 0.1890008889185189, |
| "learning_rate": 3.556735621402229e-07, |
| "loss": 0.4384969174861908, |
| "step": 1341, |
| "token_acc": 0.8527271306309512 |
| }, |
| { |
| "epoch": 4.301765650080257, |
| "grad_norm": 0.17718855075211482, |
| "learning_rate": 3.5248834760008757e-07, |
| "loss": 0.42041015625, |
| "step": 1342, |
| "token_acc": 0.8577206231880415 |
| }, |
| { |
| "epoch": 4.304975922953451, |
| "grad_norm": 0.21645292762759322, |
| "learning_rate": 3.493167033484262e-07, |
| "loss": 0.39642333984375, |
| "step": 1343, |
| "token_acc": 0.8662993750318829 |
| }, |
| { |
| "epoch": 4.308186195826646, |
| "grad_norm": 0.19869971515422113, |
| "learning_rate": 3.4615864305957355e-07, |
| "loss": 0.421630859375, |
| "step": 1344, |
| "token_acc": 0.8597636831455622 |
| }, |
| { |
| "epoch": 4.311396468699839, |
| "grad_norm": 0.6629400351496374, |
| "learning_rate": 3.430141803492977e-07, |
| "loss": 0.4062907099723816, |
| "step": 1345, |
| "token_acc": 0.8629788139687634 |
| }, |
| { |
| "epoch": 4.314606741573034, |
| "grad_norm": 0.17736584722766163, |
| "learning_rate": 3.398833287747417e-07, |
| "loss": 0.4195963740348816, |
| "step": 1346, |
| "token_acc": 0.859551744597776 |
| }, |
| { |
| "epoch": 4.317817014446228, |
| "grad_norm": 0.20646363440762722, |
| "learning_rate": 3.367661018343655e-07, |
| "loss": 0.389892578125, |
| "step": 1347, |
| "token_acc": 0.8695244463289735 |
| }, |
| { |
| "epoch": 4.321027287319422, |
| "grad_norm": 0.19356832119573253, |
| "learning_rate": 3.3366251296788696e-07, |
| "loss": 0.4179280698299408, |
| "step": 1348, |
| "token_acc": 0.859493141500395 |
| }, |
| { |
| "epoch": 4.324237560192616, |
| "grad_norm": 0.17699602920724225, |
| "learning_rate": 3.3057257555622425e-07, |
| "loss": 0.3675944209098816, |
| "step": 1349, |
| "token_acc": 0.8763813748365427 |
| }, |
| { |
| "epoch": 4.32744783306581, |
| "grad_norm": 0.1814342308311311, |
| "learning_rate": 3.274963029214385e-07, |
| "loss": 0.3459879755973816, |
| "step": 1350, |
| "token_acc": 0.8825021382326544 |
| }, |
| { |
| "epoch": 4.330658105939005, |
| "grad_norm": 0.2027288765185631, |
| "learning_rate": 3.2443370832667525e-07, |
| "loss": 0.3924560546875, |
| "step": 1351, |
| "token_acc": 0.8671193230773763 |
| }, |
| { |
| "epoch": 4.333868378812199, |
| "grad_norm": 0.19985007877208522, |
| "learning_rate": 3.2138480497611007e-07, |
| "loss": 0.3663737177848816, |
| "step": 1352, |
| "token_acc": 0.8762456313156786 |
| }, |
| { |
| "epoch": 4.337078651685394, |
| "grad_norm": 0.20362923197268062, |
| "learning_rate": 3.1834960601488696e-07, |
| "loss": 0.3803914487361908, |
| "step": 1353, |
| "token_acc": 0.871517642182498 |
| }, |
| { |
| "epoch": 4.340288924558587, |
| "grad_norm": 0.19443326332011304, |
| "learning_rate": 3.1532812452906623e-07, |
| "loss": 0.4407552182674408, |
| "step": 1354, |
| "token_acc": 0.853539740537423 |
| }, |
| { |
| "epoch": 4.343499197431782, |
| "grad_norm": 0.21761872939676885, |
| "learning_rate": 3.123203735455647e-07, |
| "loss": 0.4045817255973816, |
| "step": 1355, |
| "token_acc": 0.8657207443102927 |
| }, |
| { |
| "epoch": 4.346709470304976, |
| "grad_norm": 0.20338509759310694, |
| "learning_rate": 3.0932636603210396e-07, |
| "loss": 0.3558756709098816, |
| "step": 1356, |
| "token_acc": 0.8807724789605788 |
| }, |
| { |
| "epoch": 4.34991974317817, |
| "grad_norm": 0.1818871067962439, |
| "learning_rate": 3.0634611489714747e-07, |
| "loss": 0.4090169370174408, |
| "step": 1357, |
| "token_acc": 0.8621424332609016 |
| }, |
| { |
| "epoch": 4.353130016051364, |
| "grad_norm": 0.19834895457950263, |
| "learning_rate": 3.0337963298985143e-07, |
| "loss": 0.3870036005973816, |
| "step": 1358, |
| "token_acc": 0.8697542308742986 |
| }, |
| { |
| "epoch": 4.356340288924558, |
| "grad_norm": 0.19239094391841294, |
| "learning_rate": 3.0042693310000774e-07, |
| "loss": 0.4254557490348816, |
| "step": 1359, |
| "token_acc": 0.8584187137021286 |
| }, |
| { |
| "epoch": 4.359550561797753, |
| "grad_norm": 0.19741820606832636, |
| "learning_rate": 2.9748802795798573e-07, |
| "loss": 0.4197591245174408, |
| "step": 1360, |
| "token_acc": 0.8586211261004959 |
| }, |
| { |
| "epoch": 4.362760834670947, |
| "grad_norm": 0.17549385904831483, |
| "learning_rate": 2.9456293023468175e-07, |
| "loss": 0.3958333432674408, |
| "step": 1361, |
| "token_acc": 0.8666089202823778 |
| }, |
| { |
| "epoch": 4.365971107544142, |
| "grad_norm": 0.18418482505957448, |
| "learning_rate": 2.916516525414597e-07, |
| "loss": 0.3685709834098816, |
| "step": 1362, |
| "token_acc": 0.8757923270088352 |
| }, |
| { |
| "epoch": 4.369181380417335, |
| "grad_norm": 0.17301330514887706, |
| "learning_rate": 2.887542074301019e-07, |
| "loss": 0.4075927734375, |
| "step": 1363, |
| "token_acc": 0.8605788261905724 |
| }, |
| { |
| "epoch": 4.37239165329053, |
| "grad_norm": 0.19604758020776158, |
| "learning_rate": 2.8587060739275174e-07, |
| "loss": 0.408935546875, |
| "step": 1364, |
| "token_acc": 0.8637168977001795 |
| }, |
| { |
| "epoch": 4.375601926163724, |
| "grad_norm": 0.18596677995931085, |
| "learning_rate": 2.830008648618606e-07, |
| "loss": 0.384521484375, |
| "step": 1365, |
| "token_acc": 0.8701020161208599 |
| }, |
| { |
| "epoch": 4.378812199036918, |
| "grad_norm": 0.18061133688689154, |
| "learning_rate": 2.801449922101314e-07, |
| "loss": 0.3860066831111908, |
| "step": 1366, |
| "token_acc": 0.8702969224355789 |
| }, |
| { |
| "epoch": 4.382022471910112, |
| "grad_norm": 0.16828215003829966, |
| "learning_rate": 2.7730300175047263e-07, |
| "loss": 0.3342081904411316, |
| "step": 1367, |
| "token_acc": 0.8873373657943231 |
| }, |
| { |
| "epoch": 4.385232744783306, |
| "grad_norm": 0.35555500278869245, |
| "learning_rate": 2.744749057359378e-07, |
| "loss": 0.4285888671875, |
| "step": 1368, |
| "token_acc": 0.8555756341438024 |
| }, |
| { |
| "epoch": 4.388443017656501, |
| "grad_norm": 0.18514465832240257, |
| "learning_rate": 2.716607163596759e-07, |
| "loss": 0.387451171875, |
| "step": 1369, |
| "token_acc": 0.8703410803035158 |
| }, |
| { |
| "epoch": 4.391653290529695, |
| "grad_norm": 0.1937482638050718, |
| "learning_rate": 2.688604457548783e-07, |
| "loss": 0.4518229365348816, |
| "step": 1370, |
| "token_acc": 0.8489506475979381 |
| }, |
| { |
| "epoch": 4.39486356340289, |
| "grad_norm": 0.22216734997918833, |
| "learning_rate": 2.660741059947267e-07, |
| "loss": 0.4341634213924408, |
| "step": 1371, |
| "token_acc": 0.8556407722211241 |
| }, |
| { |
| "epoch": 4.398073836276083, |
| "grad_norm": 0.17854562511660027, |
| "learning_rate": 2.6330170909234055e-07, |
| "loss": 0.3616740107536316, |
| "step": 1372, |
| "token_acc": 0.878413306698194 |
| }, |
| { |
| "epoch": 4.401284109149278, |
| "grad_norm": 0.15704979914554196, |
| "learning_rate": 2.605432670007265e-07, |
| "loss": 0.360595703125, |
| "step": 1373, |
| "token_acc": 0.8790725725260599 |
| }, |
| { |
| "epoch": 4.404494382022472, |
| "grad_norm": 0.18433319937167036, |
| "learning_rate": 2.5779879161272474e-07, |
| "loss": 0.44140625, |
| "step": 1374, |
| "token_acc": 0.8535901582776324 |
| }, |
| { |
| "epoch": 4.407704654895666, |
| "grad_norm": 0.25076313067208356, |
| "learning_rate": 2.550682947609599e-07, |
| "loss": 0.4233601987361908, |
| "step": 1375, |
| "token_acc": 0.8588385113791485 |
| }, |
| { |
| "epoch": 4.41091492776886, |
| "grad_norm": 0.22379555574253152, |
| "learning_rate": 2.5235178821778793e-07, |
| "loss": 0.44354248046875, |
| "step": 1376, |
| "token_acc": 0.8516841003543864 |
| }, |
| { |
| "epoch": 4.414125200642054, |
| "grad_norm": 0.1936509284000272, |
| "learning_rate": 2.496492836952486e-07, |
| "loss": 0.4043172299861908, |
| "step": 1377, |
| "token_acc": 0.8650482422621322 |
| }, |
| { |
| "epoch": 4.417335473515249, |
| "grad_norm": 0.18093605782753522, |
| "learning_rate": 2.469607928450114e-07, |
| "loss": 0.3992106318473816, |
| "step": 1378, |
| "token_acc": 0.8653276993296248 |
| }, |
| { |
| "epoch": 4.420545746388443, |
| "grad_norm": 0.17557936684703238, |
| "learning_rate": 2.442863272583258e-07, |
| "loss": 0.4185791015625, |
| "step": 1379, |
| "token_acc": 0.8599879260834397 |
| }, |
| { |
| "epoch": 4.423756019261638, |
| "grad_norm": 0.1928260334885516, |
| "learning_rate": 2.4162589846597307e-07, |
| "loss": 0.4438883662223816, |
| "step": 1380, |
| "token_acc": 0.8516120924151115 |
| }, |
| { |
| "epoch": 4.426966292134831, |
| "grad_norm": 0.19447542899409975, |
| "learning_rate": 2.389795179382183e-07, |
| "loss": 0.4063720703125, |
| "step": 1381, |
| "token_acc": 0.8649005377133958 |
| }, |
| { |
| "epoch": 4.430176565008026, |
| "grad_norm": 0.1961732042045321, |
| "learning_rate": 2.3634719708475504e-07, |
| "loss": 0.4538167417049408, |
| "step": 1382, |
| "token_acc": 0.8484574145451315 |
| }, |
| { |
| "epoch": 4.43338683788122, |
| "grad_norm": 0.2123227934041795, |
| "learning_rate": 2.3372894725465985e-07, |
| "loss": 0.4500325620174408, |
| "step": 1383, |
| "token_acc": 0.8502840199626592 |
| }, |
| { |
| "epoch": 4.436597110754414, |
| "grad_norm": 0.18582801640426513, |
| "learning_rate": 2.3112477973634532e-07, |
| "loss": 0.3983357846736908, |
| "step": 1384, |
| "token_acc": 0.865070239292316 |
| }, |
| { |
| "epoch": 4.439807383627608, |
| "grad_norm": 0.19852931167320112, |
| "learning_rate": 2.2853470575750666e-07, |
| "loss": 0.4342448115348816, |
| "step": 1385, |
| "token_acc": 0.8552494168787108 |
| }, |
| { |
| "epoch": 4.443017656500802, |
| "grad_norm": 0.2054451058675366, |
| "learning_rate": 2.2595873648507686e-07, |
| "loss": 0.4345296323299408, |
| "step": 1386, |
| "token_acc": 0.854400060575443 |
| }, |
| { |
| "epoch": 4.446227929373997, |
| "grad_norm": 0.20872486334243198, |
| "learning_rate": 2.2339688302517752e-07, |
| "loss": 0.3849284052848816, |
| "step": 1387, |
| "token_acc": 0.8710310516021074 |
| }, |
| { |
| "epoch": 4.449438202247191, |
| "grad_norm": 0.1677447267888503, |
| "learning_rate": 2.208491564230704e-07, |
| "loss": 0.3639323115348816, |
| "step": 1388, |
| "token_acc": 0.8765660123119159 |
| }, |
| { |
| "epoch": 4.452648475120386, |
| "grad_norm": 0.20084403258571848, |
| "learning_rate": 2.1831556766310999e-07, |
| "loss": 0.4356689453125, |
| "step": 1389, |
| "token_acc": 0.8543227886379977 |
| }, |
| { |
| "epoch": 4.455858747993579, |
| "grad_norm": 0.19999448020787874, |
| "learning_rate": 2.1579612766869688e-07, |
| "loss": 0.3855387568473816, |
| "step": 1390, |
| "token_acc": 0.8703626579023295 |
| }, |
| { |
| "epoch": 4.459069020866774, |
| "grad_norm": 0.17428186503514684, |
| "learning_rate": 2.132908473022303e-07, |
| "loss": 0.3995158076286316, |
| "step": 1391, |
| "token_acc": 0.867400816167333 |
| }, |
| { |
| "epoch": 4.462279293739968, |
| "grad_norm": 0.19426057533907357, |
| "learning_rate": 2.1079973736506118e-07, |
| "loss": 0.3830973505973816, |
| "step": 1392, |
| "token_acc": 0.872240673210398 |
| }, |
| { |
| "epoch": 4.465489566613162, |
| "grad_norm": 0.18324896369450797, |
| "learning_rate": 2.0832280859744473e-07, |
| "loss": 0.4040934443473816, |
| "step": 1393, |
| "token_acc": 0.8639917176570526 |
| }, |
| { |
| "epoch": 4.468699839486356, |
| "grad_norm": 0.1958613880239227, |
| "learning_rate": 2.058600716784957e-07, |
| "loss": 0.4177653193473816, |
| "step": 1394, |
| "token_acc": 0.8590019981339825 |
| }, |
| { |
| "epoch": 4.47191011235955, |
| "grad_norm": 0.20201865980897998, |
| "learning_rate": 2.034115372261433e-07, |
| "loss": 0.4073486328125, |
| "step": 1395, |
| "token_acc": 0.8629729587765791 |
| }, |
| { |
| "epoch": 4.475120385232745, |
| "grad_norm": 0.219232387300634, |
| "learning_rate": 2.0097721579707965e-07, |
| "loss": 0.4217122495174408, |
| "step": 1396, |
| "token_acc": 0.8575014442075773 |
| }, |
| { |
| "epoch": 4.478330658105939, |
| "grad_norm": 0.2053024503342439, |
| "learning_rate": 1.985571178867216e-07, |
| "loss": 0.4247233271598816, |
| "step": 1397, |
| "token_acc": 0.8595181614995406 |
| }, |
| { |
| "epoch": 4.481540930979134, |
| "grad_norm": 0.197725254363412, |
| "learning_rate": 1.9615125392916088e-07, |
| "loss": 0.4213460385799408, |
| "step": 1398, |
| "token_acc": 0.8587392006416064 |
| }, |
| { |
| "epoch": 4.484751203852327, |
| "grad_norm": 0.18897462231141934, |
| "learning_rate": 1.9375963429712278e-07, |
| "loss": 0.4065144956111908, |
| "step": 1399, |
| "token_acc": 0.8645349504146544 |
| }, |
| { |
| "epoch": 4.487961476725522, |
| "grad_norm": 0.2470233814439173, |
| "learning_rate": 1.9138226930191543e-07, |
| "loss": 0.3607584834098816, |
| "step": 1400, |
| "token_acc": 0.8788034346501037 |
| }, |
| { |
| "epoch": 4.491171749598716, |
| "grad_norm": 0.21359818787913157, |
| "learning_rate": 1.8901916919339063e-07, |
| "loss": 0.4027913510799408, |
| "step": 1401, |
| "token_acc": 0.8638582562514848 |
| }, |
| { |
| "epoch": 4.49438202247191, |
| "grad_norm": 0.1851844159684862, |
| "learning_rate": 1.866703441598999e-07, |
| "loss": 0.4230143427848816, |
| "step": 1402, |
| "token_acc": 0.8582203402573039 |
| }, |
| { |
| "epoch": 4.497592295345104, |
| "grad_norm": 0.2157643414774233, |
| "learning_rate": 1.8433580432824604e-07, |
| "loss": 0.443603515625, |
| "step": 1403, |
| "token_acc": 0.8509721741439272 |
| }, |
| { |
| "epoch": 4.500802568218298, |
| "grad_norm": 0.18862363130606546, |
| "learning_rate": 1.8201555976364443e-07, |
| "loss": 0.4197998046875, |
| "step": 1404, |
| "token_acc": 0.8586452996070447 |
| }, |
| { |
| "epoch": 4.504012841091493, |
| "grad_norm": 0.21602876440113533, |
| "learning_rate": 1.7970962046967388e-07, |
| "loss": 0.4269612729549408, |
| "step": 1405, |
| "token_acc": 0.8580807203352152 |
| }, |
| { |
| "epoch": 4.507223113964687, |
| "grad_norm": 0.19575645391056692, |
| "learning_rate": 1.7741799638824157e-07, |
| "loss": 0.4242960810661316, |
| "step": 1406, |
| "token_acc": 0.8554766200266856 |
| }, |
| { |
| "epoch": 4.510433386837882, |
| "grad_norm": 0.18945812539306195, |
| "learning_rate": 1.7514069739953219e-07, |
| "loss": 0.373779296875, |
| "step": 1407, |
| "token_acc": 0.8747324228171732 |
| }, |
| { |
| "epoch": 4.513643659711075, |
| "grad_norm": 0.19167885218390776, |
| "learning_rate": 1.728777333219698e-07, |
| "loss": 0.447265625, |
| "step": 1408, |
| "token_acc": 0.8509208269492929 |
| }, |
| { |
| "epoch": 4.51685393258427, |
| "grad_norm": 0.18022233494326212, |
| "learning_rate": 1.7062911391217515e-07, |
| "loss": 0.4439290463924408, |
| "step": 1409, |
| "token_acc": 0.849044687255004 |
| }, |
| { |
| "epoch": 4.520064205457464, |
| "grad_norm": 0.23067829941613088, |
| "learning_rate": 1.6839484886492133e-07, |
| "loss": 0.3721517026424408, |
| "step": 1410, |
| "token_acc": 0.8760147029100293 |
| }, |
| { |
| "epoch": 4.523274478330658, |
| "grad_norm": 0.1756868996664974, |
| "learning_rate": 1.6617494781309534e-07, |
| "loss": 0.389892578125, |
| "step": 1411, |
| "token_acc": 0.8671128158562844 |
| }, |
| { |
| "epoch": 4.526484751203852, |
| "grad_norm": 0.20804654536137415, |
| "learning_rate": 1.6396942032765293e-07, |
| "loss": 0.4468587338924408, |
| "step": 1412, |
| "token_acc": 0.850228639546784 |
| }, |
| { |
| "epoch": 4.529695024077046, |
| "grad_norm": 0.21308168454138018, |
| "learning_rate": 1.617782759175807e-07, |
| "loss": 0.4160970151424408, |
| "step": 1413, |
| "token_acc": 0.8622860673783908 |
| }, |
| { |
| "epoch": 4.532905296950241, |
| "grad_norm": 0.22297079035303088, |
| "learning_rate": 1.5960152402985277e-07, |
| "loss": 0.3673502802848816, |
| "step": 1414, |
| "token_acc": 0.8770901721910811 |
| }, |
| { |
| "epoch": 4.536115569823435, |
| "grad_norm": 0.18659098268766408, |
| "learning_rate": 1.574391740493913e-07, |
| "loss": 0.4529622495174408, |
| "step": 1415, |
| "token_acc": 0.8480984391282939 |
| }, |
| { |
| "epoch": 4.539325842696629, |
| "grad_norm": 0.197242571565292, |
| "learning_rate": 1.5529123529902472e-07, |
| "loss": 0.4353841245174408, |
| "step": 1416, |
| "token_acc": 0.8548159161410306 |
| }, |
| { |
| "epoch": 4.542536115569823, |
| "grad_norm": 0.20715809917824884, |
| "learning_rate": 1.5315771703944953e-07, |
| "loss": 0.43798828125, |
| "step": 1417, |
| "token_acc": 0.8528016169399727 |
| }, |
| { |
| "epoch": 4.545746388443018, |
| "grad_norm": 0.18474227890442782, |
| "learning_rate": 1.5103862846918847e-07, |
| "loss": 0.4179891049861908, |
| "step": 1418, |
| "token_acc": 0.8599016224268302 |
| }, |
| { |
| "epoch": 4.548956661316212, |
| "grad_norm": 0.20946972284990092, |
| "learning_rate": 1.4893397872455183e-07, |
| "loss": 0.3983154296875, |
| "step": 1419, |
| "token_acc": 0.8677837933098281 |
| }, |
| { |
| "epoch": 4.552166934189406, |
| "grad_norm": 0.19986064963725622, |
| "learning_rate": 1.468437768795981e-07, |
| "loss": 0.4313151240348816, |
| "step": 1420, |
| "token_acc": 0.8547023739216093 |
| }, |
| { |
| "epoch": 4.5553772070626, |
| "grad_norm": 0.18486103454766173, |
| "learning_rate": 1.4476803194609477e-07, |
| "loss": 0.38690185546875, |
| "step": 1421, |
| "token_acc": 0.869865230167784 |
| }, |
| { |
| "epoch": 4.558587479935794, |
| "grad_norm": 0.19289911775635488, |
| "learning_rate": 1.4270675287347833e-07, |
| "loss": 0.435546875, |
| "step": 1422, |
| "token_acc": 0.8535203141238655 |
| }, |
| { |
| "epoch": 4.561797752808989, |
| "grad_norm": 0.18956544459287866, |
| "learning_rate": 1.4065994854881654e-07, |
| "loss": 0.4341227412223816, |
| "step": 1423, |
| "token_acc": 0.8545348884673308 |
| }, |
| { |
| "epoch": 4.565008025682183, |
| "grad_norm": 0.17367270405131094, |
| "learning_rate": 1.3862762779677262e-07, |
| "loss": 0.4180094599723816, |
| "step": 1424, |
| "token_acc": 0.8567365363850598 |
| }, |
| { |
| "epoch": 4.568218298555378, |
| "grad_norm": 0.20824227581500948, |
| "learning_rate": 1.3660979937956268e-07, |
| "loss": 0.4293619990348816, |
| "step": 1425, |
| "token_acc": 0.8565126394525634 |
| }, |
| { |
| "epoch": 4.571428571428571, |
| "grad_norm": 0.19559983755189972, |
| "learning_rate": 1.3460647199691945e-07, |
| "loss": 0.4120280146598816, |
| "step": 1426, |
| "token_acc": 0.8629014499737051 |
| }, |
| { |
| "epoch": 4.574638844301766, |
| "grad_norm": 0.20180565953660315, |
| "learning_rate": 1.3261765428605766e-07, |
| "loss": 0.3741862177848816, |
| "step": 1427, |
| "token_acc": 0.8753082725636212 |
| }, |
| { |
| "epoch": 4.57784911717496, |
| "grad_norm": 0.20104038941301108, |
| "learning_rate": 1.3064335482163337e-07, |
| "loss": 0.4160970151424408, |
| "step": 1428, |
| "token_acc": 0.8594041300507503 |
| }, |
| { |
| "epoch": 4.581059390048154, |
| "grad_norm": 0.1965890577139458, |
| "learning_rate": 1.2868358211570812e-07, |
| "loss": 0.4365234375, |
| "step": 1429, |
| "token_acc": 0.8519271262041217 |
| }, |
| { |
| "epoch": 4.584269662921348, |
| "grad_norm": 0.19053080611967452, |
| "learning_rate": 1.267383446177121e-07, |
| "loss": 0.361114501953125, |
| "step": 1430, |
| "token_acc": 0.878369035008151 |
| }, |
| { |
| "epoch": 4.587479935794542, |
| "grad_norm": 0.6296605966778805, |
| "learning_rate": 1.248076507144087e-07, |
| "loss": 0.4423421323299408, |
| "step": 1431, |
| "token_acc": 0.8508115231639366 |
| }, |
| { |
| "epoch": 4.590690208667737, |
| "grad_norm": 0.17643731780551508, |
| "learning_rate": 1.2289150872985642e-07, |
| "loss": 0.3570760190486908, |
| "step": 1432, |
| "token_acc": 0.8799221630383898 |
| }, |
| { |
| "epoch": 4.593900481540931, |
| "grad_norm": 0.1920351337084536, |
| "learning_rate": 1.2098992692537563e-07, |
| "loss": 0.3970947265625, |
| "step": 1433, |
| "token_acc": 0.8656649371161228 |
| }, |
| { |
| "epoch": 4.597110754414125, |
| "grad_norm": 0.24155602493913697, |
| "learning_rate": 1.1910291349951024e-07, |
| "loss": 0.401123046875, |
| "step": 1434, |
| "token_acc": 0.8663016423800989 |
| }, |
| { |
| "epoch": 4.600321027287319, |
| "grad_norm": 0.23493455092849216, |
| "learning_rate": 1.1723047658799368e-07, |
| "loss": 0.3884684443473816, |
| "step": 1435, |
| "token_acc": 0.8696389403108131 |
| }, |
| { |
| "epoch": 4.603531300160514, |
| "grad_norm": 0.20660259029932984, |
| "learning_rate": 1.1537262426371425e-07, |
| "loss": 0.3963419795036316, |
| "step": 1436, |
| "token_acc": 0.8671111703086193 |
| }, |
| { |
| "epoch": 4.606741573033708, |
| "grad_norm": 0.1878147356962549, |
| "learning_rate": 1.1352936453667861e-07, |
| "loss": 0.4197591245174408, |
| "step": 1437, |
| "token_acc": 0.8595295977028828 |
| }, |
| { |
| "epoch": 4.609951845906902, |
| "grad_norm": 0.19082198521134558, |
| "learning_rate": 1.1170070535398108e-07, |
| "loss": 0.4320882260799408, |
| "step": 1438, |
| "token_acc": 0.8556060993004294 |
| }, |
| { |
| "epoch": 4.613162118780096, |
| "grad_norm": 0.18879001901055564, |
| "learning_rate": 1.098866545997636e-07, |
| "loss": 0.4278564453125, |
| "step": 1439, |
| "token_acc": 0.8558024296675192 |
| }, |
| { |
| "epoch": 4.61637239165329, |
| "grad_norm": 0.18946714623578712, |
| "learning_rate": 1.0808722009518584e-07, |
| "loss": 0.3866373896598816, |
| "step": 1440, |
| "token_acc": 0.8710574365640688 |
| }, |
| { |
| "epoch": 4.619582664526485, |
| "grad_norm": 0.1939962029433925, |
| "learning_rate": 1.0630240959839137e-07, |
| "loss": 0.4243571162223816, |
| "step": 1441, |
| "token_acc": 0.8579994341180615 |
| }, |
| { |
| "epoch": 4.622792937399679, |
| "grad_norm": 0.21957470785072045, |
| "learning_rate": 1.0453223080447272e-07, |
| "loss": 0.4058024287223816, |
| "step": 1442, |
| "token_acc": 0.864173404097019 |
| }, |
| { |
| "epoch": 4.626003210272874, |
| "grad_norm": 0.19758763574672267, |
| "learning_rate": 1.0277669134543866e-07, |
| "loss": 0.3876953125, |
| "step": 1443, |
| "token_acc": 0.869858307509244 |
| }, |
| { |
| "epoch": 4.629213483146067, |
| "grad_norm": 0.20067056305259742, |
| "learning_rate": 1.0103579879018088e-07, |
| "loss": 0.3555908203125, |
| "step": 1444, |
| "token_acc": 0.8820659184604286 |
| }, |
| { |
| "epoch": 4.632423756019262, |
| "grad_norm": 0.23677955722305155, |
| "learning_rate": 9.930956064444363e-08, |
| "loss": 0.4253336787223816, |
| "step": 1445, |
| "token_acc": 0.857207984790406 |
| }, |
| { |
| "epoch": 4.635634028892456, |
| "grad_norm": 0.20852218454016413, |
| "learning_rate": 9.759798435078798e-08, |
| "loss": 0.3988037109375, |
| "step": 1446, |
| "token_acc": 0.8677262375703081 |
| }, |
| { |
| "epoch": 4.63884430176565, |
| "grad_norm": 0.20420025529095903, |
| "learning_rate": 9.590107728856268e-08, |
| "loss": 0.4260661005973816, |
| "step": 1447, |
| "token_acc": 0.8579336664926283 |
| }, |
| { |
| "epoch": 4.642054574638844, |
| "grad_norm": 0.23411856679995743, |
| "learning_rate": 9.421884677386915e-08, |
| "loss": 0.4188639521598816, |
| "step": 1448, |
| "token_acc": 0.859142508633147 |
| }, |
| { |
| "epoch": 4.645264847512038, |
| "grad_norm": 0.18200197390792874, |
| "learning_rate": 9.255130005953398e-08, |
| "loss": 0.3849690854549408, |
| "step": 1449, |
| "token_acc": 0.872107766818619 |
| }, |
| { |
| "epoch": 4.648475120385233, |
| "grad_norm": 0.2916389218672418, |
| "learning_rate": 9.089844433507426e-08, |
| "loss": 0.4009196162223816, |
| "step": 1450, |
| "token_acc": 0.8648391516003826 |
| }, |
| { |
| "epoch": 4.651685393258427, |
| "grad_norm": 0.17308443268439605, |
| "learning_rate": 8.926028672666886e-08, |
| "loss": 0.383544921875, |
| "step": 1451, |
| "token_acc": 0.871333844537722 |
| }, |
| { |
| "epoch": 4.654895666131621, |
| "grad_norm": 0.17301234315676522, |
| "learning_rate": 8.763683429712498e-08, |
| "loss": 0.3865559995174408, |
| "step": 1452, |
| "token_acc": 0.8685990988114892 |
| }, |
| { |
| "epoch": 4.658105939004815, |
| "grad_norm": 0.280043466935383, |
| "learning_rate": 8.602809404585143e-08, |
| "loss": 0.4070638120174408, |
| "step": 1453, |
| "token_acc": 0.8629206897298851 |
| }, |
| { |
| "epoch": 4.66131621187801, |
| "grad_norm": 0.21422537153162374, |
| "learning_rate": 8.44340729088251e-08, |
| "loss": 0.4311930537223816, |
| "step": 1454, |
| "token_acc": 0.8536791894362028 |
| }, |
| { |
| "epoch": 4.664526484751204, |
| "grad_norm": 0.17201229972776902, |
| "learning_rate": 8.285477775856264e-08, |
| "loss": 0.3789469599723816, |
| "step": 1455, |
| "token_acc": 0.8721962958676278 |
| }, |
| { |
| "epoch": 4.667736757624398, |
| "grad_norm": 0.20234057546368325, |
| "learning_rate": 8.129021540409099e-08, |
| "loss": 0.43701171875, |
| "step": 1456, |
| "token_acc": 0.8541992043359913 |
| }, |
| { |
| "epoch": 4.670947030497592, |
| "grad_norm": 0.22317333643394066, |
| "learning_rate": 7.974039259091692e-08, |
| "loss": 0.3818766474723816, |
| "step": 1457, |
| "token_acc": 0.8724333764763185 |
| }, |
| { |
| "epoch": 4.674157303370786, |
| "grad_norm": 0.19956761588146485, |
| "learning_rate": 7.820531600099962e-08, |
| "loss": 0.4542236328125, |
| "step": 1458, |
| "token_acc": 0.8496136963053602 |
| }, |
| { |
| "epoch": 4.677367576243981, |
| "grad_norm": 0.2720616063396832, |
| "learning_rate": 7.668499225272025e-08, |
| "loss": 0.4219563901424408, |
| "step": 1459, |
| "token_acc": 0.8568100054002388 |
| }, |
| { |
| "epoch": 4.680577849117175, |
| "grad_norm": 0.1929782285557914, |
| "learning_rate": 7.517942790085363e-08, |
| "loss": 0.4552815854549408, |
| "step": 1460, |
| "token_acc": 0.8469568147492945 |
| }, |
| { |
| "epoch": 4.68378812199037, |
| "grad_norm": 0.20352188963079326, |
| "learning_rate": 7.368862943654147e-08, |
| "loss": 0.384765625, |
| "step": 1461, |
| "token_acc": 0.8714652683423914 |
| }, |
| { |
| "epoch": 4.686998394863563, |
| "grad_norm": 0.19969466438102046, |
| "learning_rate": 7.221260328726276e-08, |
| "loss": 0.4564616084098816, |
| "step": 1462, |
| "token_acc": 0.8487703460161007 |
| }, |
| { |
| "epoch": 4.690208667736758, |
| "grad_norm": 0.17978501403580038, |
| "learning_rate": 7.075135581680658e-08, |
| "loss": 0.4076334834098816, |
| "step": 1463, |
| "token_acc": 0.8603003744043567 |
| }, |
| { |
| "epoch": 4.693418940609952, |
| "grad_norm": 0.20366337402097498, |
| "learning_rate": 6.930489332524536e-08, |
| "loss": 0.4206136167049408, |
| "step": 1464, |
| "token_acc": 0.8593547259114949 |
| }, |
| { |
| "epoch": 4.696629213483146, |
| "grad_norm": 0.19419110557592836, |
| "learning_rate": 6.787322204890527e-08, |
| "loss": 0.4336954951286316, |
| "step": 1465, |
| "token_acc": 0.8534309255558069 |
| }, |
| { |
| "epoch": 4.69983948635634, |
| "grad_norm": 0.18294404853503618, |
| "learning_rate": 6.645634816034335e-08, |
| "loss": 0.4189046323299408, |
| "step": 1466, |
| "token_acc": 0.8585958978222711 |
| }, |
| { |
| "epoch": 4.703049759229534, |
| "grad_norm": 0.17335992283179122, |
| "learning_rate": 6.50542777683179e-08, |
| "loss": 0.4037272334098816, |
| "step": 1467, |
| "token_acc": 0.8626201549093779 |
| }, |
| { |
| "epoch": 4.706260032102729, |
| "grad_norm": 0.1795614119830208, |
| "learning_rate": 6.366701691776256e-08, |
| "loss": 0.4220377802848816, |
| "step": 1468, |
| "token_acc": 0.8588724081963655 |
| }, |
| { |
| "epoch": 4.709470304975923, |
| "grad_norm": 0.20472933364890156, |
| "learning_rate": 6.229457158976014e-08, |
| "loss": 0.4786784052848816, |
| "step": 1469, |
| "token_acc": 0.8412095888868124 |
| }, |
| { |
| "epoch": 4.712680577849117, |
| "grad_norm": 0.18039371374750934, |
| "learning_rate": 6.09369477015187e-08, |
| "loss": 0.3718668818473816, |
| "step": 1470, |
| "token_acc": 0.8749293821269497 |
| }, |
| { |
| "epoch": 4.715890850722311, |
| "grad_norm": 0.32958483208060935, |
| "learning_rate": 5.959415110634375e-08, |
| "loss": 0.3703206479549408, |
| "step": 1471, |
| "token_acc": 0.8735466357782282 |
| }, |
| { |
| "epoch": 4.719101123595506, |
| "grad_norm": 0.19713307989540768, |
| "learning_rate": 5.826618759361396e-08, |
| "loss": 0.4169515073299408, |
| "step": 1472, |
| "token_acc": 0.8602492479587451 |
| }, |
| { |
| "epoch": 4.7223113964687, |
| "grad_norm": 0.19385974784241175, |
| "learning_rate": 5.6953062888756566e-08, |
| "loss": 0.4407552182674408, |
| "step": 1473, |
| "token_acc": 0.8521287513030279 |
| }, |
| { |
| "epoch": 4.725521669341894, |
| "grad_norm": 0.19774134819733538, |
| "learning_rate": 5.565478265322138e-08, |
| "loss": 0.3862508237361908, |
| "step": 1474, |
| "token_acc": 0.8715619692168759 |
| }, |
| { |
| "epoch": 4.728731942215088, |
| "grad_norm": 0.19218443693933007, |
| "learning_rate": 5.4371352484458235e-08, |
| "loss": 0.3851521909236908, |
| "step": 1475, |
| "token_acc": 0.8701733830356677 |
| }, |
| { |
| "epoch": 4.731942215088282, |
| "grad_norm": 0.19666432540868903, |
| "learning_rate": 5.310277791589174e-08, |
| "loss": 0.4151204526424408, |
| "step": 1476, |
| "token_acc": 0.8608209992893119 |
| }, |
| { |
| "epoch": 4.735152487961477, |
| "grad_norm": 0.18416564772277194, |
| "learning_rate": 5.1849064416896796e-08, |
| "loss": 0.4468994140625, |
| "step": 1477, |
| "token_acc": 0.8500327093928715 |
| }, |
| { |
| "epoch": 4.738362760834671, |
| "grad_norm": 0.19335434520437636, |
| "learning_rate": 5.061021739277605e-08, |
| "loss": 0.4487711787223816, |
| "step": 1478, |
| "token_acc": 0.848170593264962 |
| }, |
| { |
| "epoch": 4.741573033707866, |
| "grad_norm": 0.20958484707121441, |
| "learning_rate": 4.9386242184737364e-08, |
| "loss": 0.4549560546875, |
| "step": 1479, |
| "token_acc": 0.8482572892840267 |
| }, |
| { |
| "epoch": 4.744783306581059, |
| "grad_norm": 0.23289234156230862, |
| "learning_rate": 4.817714406986856e-08, |
| "loss": 0.4112345576286316, |
| "step": 1480, |
| "token_acc": 0.8634959645826338 |
| }, |
| { |
| "epoch": 4.747993579454254, |
| "grad_norm": 0.18402731735735614, |
| "learning_rate": 4.698292826111644e-08, |
| "loss": 0.3708903193473816, |
| "step": 1481, |
| "token_acc": 0.8732122154116143 |
| }, |
| { |
| "epoch": 4.751203852327448, |
| "grad_norm": 0.2018874354019744, |
| "learning_rate": 4.580359990726307e-08, |
| "loss": 0.4150390625, |
| "step": 1482, |
| "token_acc": 0.8606020526688325 |
| }, |
| { |
| "epoch": 4.754414125200642, |
| "grad_norm": 0.1861877841024952, |
| "learning_rate": 4.4639164092905194e-08, |
| "loss": 0.412841796875, |
| "step": 1483, |
| "token_acc": 0.861394849559571 |
| }, |
| { |
| "epoch": 4.757624398073836, |
| "grad_norm": 0.20083510848897834, |
| "learning_rate": 4.3489625838430524e-08, |
| "loss": 0.4234822690486908, |
| "step": 1484, |
| "token_acc": 0.8593256868624384 |
| }, |
| { |
| "epoch": 4.76083467094703, |
| "grad_norm": 0.18492708213928652, |
| "learning_rate": 4.235499009999794e-08, |
| "loss": 0.3972371518611908, |
| "step": 1485, |
| "token_acc": 0.8659506220432518 |
| }, |
| { |
| "epoch": 4.764044943820225, |
| "grad_norm": 0.24624452749715423, |
| "learning_rate": 4.1235261769513364e-08, |
| "loss": 0.4518229365348816, |
| "step": 1486, |
| "token_acc": 0.848350926099052 |
| }, |
| { |
| "epoch": 4.767255216693419, |
| "grad_norm": 0.20175500356945403, |
| "learning_rate": 4.0130445674612326e-08, |
| "loss": 0.4331461787223816, |
| "step": 1487, |
| "token_acc": 0.8546658464127549 |
| }, |
| { |
| "epoch": 4.770465489566613, |
| "grad_norm": 0.17764076834579673, |
| "learning_rate": 3.9040546578635814e-08, |
| "loss": 0.411865234375, |
| "step": 1488, |
| "token_acc": 0.8623808613308531 |
| }, |
| { |
| "epoch": 4.773675762439807, |
| "grad_norm": 0.1781652695765274, |
| "learning_rate": 3.796556918061245e-08, |
| "loss": 0.408203125, |
| "step": 1489, |
| "token_acc": 0.86270329295958 |
| }, |
| { |
| "epoch": 4.776886035313002, |
| "grad_norm": 0.28325044353823575, |
| "learning_rate": 3.69055181152359e-08, |
| "loss": 0.4093017578125, |
| "step": 1490, |
| "token_acc": 0.8637463671884823 |
| }, |
| { |
| "epoch": 4.780096308186196, |
| "grad_norm": 0.21238301054517855, |
| "learning_rate": 3.586039795284629e-08, |
| "loss": 0.3710530698299408, |
| "step": 1491, |
| "token_acc": 0.8759641921780346 |
| }, |
| { |
| "epoch": 4.78330658105939, |
| "grad_norm": 0.17753991981401762, |
| "learning_rate": 3.483021319940993e-08, |
| "loss": 0.3804118037223816, |
| "step": 1492, |
| "token_acc": 0.870852992790632 |
| }, |
| { |
| "epoch": 4.786516853932584, |
| "grad_norm": 0.17697812521163442, |
| "learning_rate": 3.381496829650032e-08, |
| "loss": 0.4010416865348816, |
| "step": 1493, |
| "token_acc": 0.8652478551294941 |
| }, |
| { |
| "epoch": 4.789727126805778, |
| "grad_norm": 0.17608131878830596, |
| "learning_rate": 3.28146676212791e-08, |
| "loss": 0.4404703974723816, |
| "step": 1494, |
| "token_acc": 0.8498514163405005 |
| }, |
| { |
| "epoch": 4.792937399678973, |
| "grad_norm": 0.20871930874623706, |
| "learning_rate": 3.182931548647622e-08, |
| "loss": 0.4266357421875, |
| "step": 1495, |
| "token_acc": 0.8558437590199414 |
| }, |
| { |
| "epoch": 4.796147672552167, |
| "grad_norm": 0.19722909141928582, |
| "learning_rate": 3.085891614037245e-08, |
| "loss": 0.4145914912223816, |
| "step": 1496, |
| "token_acc": 0.8616536435520983 |
| }, |
| { |
| "epoch": 4.799357945425362, |
| "grad_norm": 0.20580969760791265, |
| "learning_rate": 2.9903473766780376e-08, |
| "loss": 0.385986328125, |
| "step": 1497, |
| "token_acc": 0.8712499849602349 |
| }, |
| { |
| "epoch": 4.802568218298555, |
| "grad_norm": 0.17137359713107464, |
| "learning_rate": 2.896299248502687e-08, |
| "loss": 0.3893229365348816, |
| "step": 1498, |
| "token_acc": 0.8688513441804728 |
| }, |
| { |
| "epoch": 4.80577849117175, |
| "grad_norm": 0.18970792203275755, |
| "learning_rate": 2.8037476349934474e-08, |
| "loss": 0.4296875, |
| "step": 1499, |
| "token_acc": 0.8569213598383156 |
| }, |
| { |
| "epoch": 4.808988764044944, |
| "grad_norm": 0.19649667842047547, |
| "learning_rate": 2.7126929351804662e-08, |
| "loss": 0.4334309995174408, |
| "step": 1500, |
| "token_acc": 0.8548346521520377 |
| }, |
| { |
| "epoch": 4.8121990369181376, |
| "grad_norm": 0.17153631748781512, |
| "learning_rate": 2.6231355416401148e-08, |
| "loss": 0.4295857846736908, |
| "step": 1501, |
| "token_acc": 0.8533903553607919 |
| }, |
| { |
| "epoch": 4.815409309791332, |
| "grad_norm": 0.2067960920586377, |
| "learning_rate": 2.5350758404931617e-08, |
| "loss": 0.4170328974723816, |
| "step": 1502, |
| "token_acc": 0.8612176379149875 |
| }, |
| { |
| "epoch": 4.818619582664526, |
| "grad_norm": 0.1776631162311648, |
| "learning_rate": 2.4485142114032187e-08, |
| "loss": 0.3851725459098816, |
| "step": 1503, |
| "token_acc": 0.8703423965016557 |
| }, |
| { |
| "epoch": 4.821829855537721, |
| "grad_norm": 0.1793386927959324, |
| "learning_rate": 2.363451027574953e-08, |
| "loss": 0.3998616635799408, |
| "step": 1504, |
| "token_acc": 0.8650888541318452 |
| }, |
| { |
| "epoch": 4.825040128410915, |
| "grad_norm": 0.17069429441349365, |
| "learning_rate": 2.2798866557526888e-08, |
| "loss": 0.3791911005973816, |
| "step": 1505, |
| "token_acc": 0.8722738939382442 |
| }, |
| { |
| "epoch": 4.828250401284109, |
| "grad_norm": 0.17633776655076083, |
| "learning_rate": 2.197821456218696e-08, |
| "loss": 0.3572896420955658, |
| "step": 1506, |
| "token_acc": 0.8801751024701113 |
| }, |
| { |
| "epoch": 4.831460674157303, |
| "grad_norm": 0.22903235280353987, |
| "learning_rate": 2.117255782791716e-08, |
| "loss": 0.4180501401424408, |
| "step": 1507, |
| "token_acc": 0.8608980437218196 |
| }, |
| { |
| "epoch": 4.834670947030498, |
| "grad_norm": 0.17214558829297427, |
| "learning_rate": 2.0381899828252504e-08, |
| "loss": 0.4264323115348816, |
| "step": 1508, |
| "token_acc": 0.8573647249971219 |
| }, |
| { |
| "epoch": 4.837881219903692, |
| "grad_norm": 0.17332641221650316, |
| "learning_rate": 1.9606243972063175e-08, |
| "loss": 0.4503580927848816, |
| "step": 1509, |
| "token_acc": 0.8480119506732333 |
| }, |
| { |
| "epoch": 4.841091492776886, |
| "grad_norm": 0.20349974006143934, |
| "learning_rate": 1.8845593603537436e-08, |
| "loss": 0.4302571713924408, |
| "step": 1510, |
| "token_acc": 0.8569897104456586 |
| }, |
| { |
| "epoch": 4.84430176565008, |
| "grad_norm": 0.23142774813924935, |
| "learning_rate": 1.809995200217035e-08, |
| "loss": 0.4117431640625, |
| "step": 1511, |
| "token_acc": 0.8611640929603565 |
| }, |
| { |
| "epoch": 4.847512038523274, |
| "grad_norm": 0.2264181651137007, |
| "learning_rate": 1.7369322382744746e-08, |
| "loss": 0.4283447265625, |
| "step": 1512, |
| "token_acc": 0.8577302561930976 |
| }, |
| { |
| "epoch": 4.850722311396469, |
| "grad_norm": 0.1927470595102296, |
| "learning_rate": 1.6653707895323444e-08, |
| "loss": 0.4225260615348816, |
| "step": 1513, |
| "token_acc": 0.8582438450309673 |
| }, |
| { |
| "epoch": 4.853932584269663, |
| "grad_norm": 0.20210916477600854, |
| "learning_rate": 1.595311162523022e-08, |
| "loss": 0.3760783076286316, |
| "step": 1514, |
| "token_acc": 0.8735678850105595 |
| }, |
| { |
| "epoch": 4.857142857142857, |
| "grad_norm": 0.1975673822042727, |
| "learning_rate": 1.5267536593039698e-08, |
| "loss": 0.3906657099723816, |
| "step": 1515, |
| "token_acc": 0.870322058087935 |
| }, |
| { |
| "epoch": 4.860353130016051, |
| "grad_norm": 0.24991527614051293, |
| "learning_rate": 1.4596985754563363e-08, |
| "loss": 0.4143269956111908, |
| "step": 1516, |
| "token_acc": 0.862886973900068 |
| }, |
| { |
| "epoch": 4.863563402889246, |
| "grad_norm": 0.18917055000975244, |
| "learning_rate": 1.3941462000837124e-08, |
| "loss": 0.4033203125, |
| "step": 1517, |
| "token_acc": 0.8647954232777991 |
| }, |
| { |
| "epoch": 4.86677367576244, |
| "grad_norm": 0.18869019026368106, |
| "learning_rate": 1.3300968158107717e-08, |
| "loss": 0.4206136167049408, |
| "step": 1518, |
| "token_acc": 0.8589735573207866 |
| }, |
| { |
| "epoch": 4.8699839486356336, |
| "grad_norm": 0.1784602865105331, |
| "learning_rate": 1.2675506987822216e-08, |
| "loss": 0.4408366084098816, |
| "step": 1519, |
| "token_acc": 0.852177246209995 |
| }, |
| { |
| "epoch": 4.873194221508828, |
| "grad_norm": 0.18978019341405125, |
| "learning_rate": 1.206508118661559e-08, |
| "loss": 0.4346110224723816, |
| "step": 1520, |
| "token_acc": 0.8547129687397841 |
| }, |
| { |
| "epoch": 4.876404494382022, |
| "grad_norm": 0.18810412184264527, |
| "learning_rate": 1.1469693386297885e-08, |
| "loss": 0.398681640625, |
| "step": 1521, |
| "token_acc": 0.8660221011534742 |
| }, |
| { |
| "epoch": 4.879614767255217, |
| "grad_norm": 0.1950421054563736, |
| "learning_rate": 1.0889346153844515e-08, |
| "loss": 0.394287109375, |
| "step": 1522, |
| "token_acc": 0.8688735336654346 |
| }, |
| { |
| "epoch": 4.882825040128411, |
| "grad_norm": 0.18702049234084817, |
| "learning_rate": 1.0324041991383814e-08, |
| "loss": 0.406494140625, |
| "step": 1523, |
| "token_acc": 0.8623502341858371 |
| }, |
| { |
| "epoch": 4.886035313001605, |
| "grad_norm": 0.21819621396045558, |
| "learning_rate": 9.773783336188114e-09, |
| "loss": 0.40478515625, |
| "step": 1524, |
| "token_acc": 0.8655094446934141 |
| }, |
| { |
| "epoch": 4.889245585874799, |
| "grad_norm": 0.19955785487879174, |
| "learning_rate": 9.238572560660129e-09, |
| "loss": 0.4108479917049408, |
| "step": 1525, |
| "token_acc": 0.8620435119485541 |
| }, |
| { |
| "epoch": 4.892455858747994, |
| "grad_norm": 0.18647782080518002, |
| "learning_rate": 8.718411972326757e-09, |
| "loss": 0.4232991635799408, |
| "step": 1526, |
| "token_acc": 0.8586699978124964 |
| }, |
| { |
| "epoch": 4.895666131621188, |
| "grad_norm": 0.1800512728032429, |
| "learning_rate": 8.213303813825068e-09, |
| "loss": 0.3704020380973816, |
| "step": 1527, |
| "token_acc": 0.8743921676948102 |
| }, |
| { |
| "epoch": 4.898876404494382, |
| "grad_norm": 0.22287140390515817, |
| "learning_rate": 7.723250262896497e-09, |
| "loss": 0.3697102963924408, |
| "step": 1528, |
| "token_acc": 0.8767438855396187 |
| }, |
| { |
| "epoch": 4.902086677367576, |
| "grad_norm": 0.1748776167395626, |
| "learning_rate": 7.248253432374007e-09, |
| "loss": 0.4410807490348816, |
| "step": 1529, |
| "token_acc": 0.8521788539708903 |
| }, |
| { |
| "epoch": 4.90529695024077, |
| "grad_norm": 0.19350528949432078, |
| "learning_rate": 6.788315370174713e-09, |
| "loss": 0.4403076171875, |
| "step": 1530, |
| "token_acc": 0.8539682000022666 |
| }, |
| { |
| "epoch": 4.908507223113965, |
| "grad_norm": 0.1766161126773489, |
| "learning_rate": 6.343438059291717e-09, |
| "loss": 0.388671875, |
| "step": 1531, |
| "token_acc": 0.8677562657105857 |
| }, |
| { |
| "epoch": 4.911717495987159, |
| "grad_norm": 0.18770179884828989, |
| "learning_rate": 5.913623417784008e-09, |
| "loss": 0.3328043818473816, |
| "step": 1532, |
| "token_acc": 0.8878665562077332 |
| }, |
| { |
| "epoch": 4.914927768860353, |
| "grad_norm": 0.2087284223936544, |
| "learning_rate": 5.49887329876908e-09, |
| "loss": 0.445068359375, |
| "step": 1533, |
| "token_acc": 0.851431813180593 |
| }, |
| { |
| "epoch": 4.918138041733547, |
| "grad_norm": 0.29371025063288597, |
| "learning_rate": 5.0991894904143795e-09, |
| "loss": 0.3640950620174408, |
| "step": 1534, |
| "token_acc": 0.8780885164228094 |
| }, |
| { |
| "epoch": 4.921348314606742, |
| "grad_norm": 0.19461732027423637, |
| "learning_rate": 4.714573715930703e-09, |
| "loss": 0.4474284052848816, |
| "step": 1535, |
| "token_acc": 0.8507598477321591 |
| }, |
| { |
| "epoch": 4.924558587479936, |
| "grad_norm": 0.17865477768308327, |
| "learning_rate": 4.34502763356287e-09, |
| "loss": 0.4408772885799408, |
| "step": 1536, |
| "token_acc": 0.8527419721633549 |
| }, |
| { |
| "epoch": 4.9277688603531296, |
| "grad_norm": 0.18898947607595706, |
| "learning_rate": 3.990552836585059e-09, |
| "loss": 0.3997599482536316, |
| "step": 1537, |
| "token_acc": 0.8667962943740363 |
| }, |
| { |
| "epoch": 4.930979133226324, |
| "grad_norm": 0.18329780463853837, |
| "learning_rate": 3.651150853291485e-09, |
| "loss": 0.3968505859375, |
| "step": 1538, |
| "token_acc": 0.8657463388056985 |
| }, |
| { |
| "epoch": 4.934189406099518, |
| "grad_norm": 0.1884122370512164, |
| "learning_rate": 3.3268231469913423e-09, |
| "loss": 0.3992919921875, |
| "step": 1539, |
| "token_acc": 0.8665628475564735 |
| }, |
| { |
| "epoch": 4.937399678972713, |
| "grad_norm": 0.18874464868375143, |
| "learning_rate": 3.017571116002593e-09, |
| "loss": 0.3417561948299408, |
| "step": 1540, |
| "token_acc": 0.8858301255573328 |
| }, |
| { |
| "epoch": 4.940609951845907, |
| "grad_norm": 0.17451525373232735, |
| "learning_rate": 2.723396093644581e-09, |
| "loss": 0.357666015625, |
| "step": 1541, |
| "token_acc": 0.8792415830543884 |
| }, |
| { |
| "epoch": 4.943820224719101, |
| "grad_norm": 0.19102676574494215, |
| "learning_rate": 2.44429934823337e-09, |
| "loss": 0.4149983823299408, |
| "step": 1542, |
| "token_acc": 0.8621060995517145 |
| }, |
| { |
| "epoch": 4.947030497592295, |
| "grad_norm": 0.24015820065262727, |
| "learning_rate": 2.1802820830763012e-09, |
| "loss": 0.4070638120174408, |
| "step": 1543, |
| "token_acc": 0.8646235807125959 |
| }, |
| { |
| "epoch": 4.95024077046549, |
| "grad_norm": 0.1838007188654105, |
| "learning_rate": 1.9313454364661698e-09, |
| "loss": 0.4093424677848816, |
| "step": 1544, |
| "token_acc": 0.8628462854637304 |
| }, |
| { |
| "epoch": 4.953451043338684, |
| "grad_norm": 0.19924252505425089, |
| "learning_rate": 1.6974904816773328e-09, |
| "loss": 0.40667724609375, |
| "step": 1545, |
| "token_acc": 0.8624877265573008 |
| }, |
| { |
| "epoch": 4.956661316211878, |
| "grad_norm": 0.2092848808914314, |
| "learning_rate": 1.4787182269594967e-09, |
| "loss": 0.4183756709098816, |
| "step": 1546, |
| "token_acc": 0.8607182153873112 |
| }, |
| { |
| "epoch": 4.959871589085072, |
| "grad_norm": 0.19043936673199025, |
| "learning_rate": 1.275029615534995e-09, |
| "loss": 0.3769124448299408, |
| "step": 1547, |
| "token_acc": 0.8732620497102043 |
| }, |
| { |
| "epoch": 4.963081861958266, |
| "grad_norm": 0.19103057854328268, |
| "learning_rate": 1.0864255255941257e-09, |
| "loss": 0.39697265625, |
| "step": 1548, |
| "token_acc": 0.8661367649301114 |
| }, |
| { |
| "epoch": 4.966292134831461, |
| "grad_norm": 0.17564224410576695, |
| "learning_rate": 9.129067702901006e-10, |
| "loss": 0.386474609375, |
| "step": 1549, |
| "token_acc": 0.8696541885592229 |
| }, |
| { |
| "epoch": 4.969502407704655, |
| "grad_norm": 0.1817798033003913, |
| "learning_rate": 7.544740977382669e-10, |
| "loss": 0.4063313901424408, |
| "step": 1550, |
| "token_acc": 0.8644004938785294 |
| }, |
| { |
| "epoch": 4.972712680577849, |
| "grad_norm": 0.18250215981034876, |
| "learning_rate": 6.11128191010668e-10, |
| "loss": 0.3870442807674408, |
| "step": 1551, |
| "token_acc": 0.8708193163813337 |
| }, |
| { |
| "epoch": 4.975922953451043, |
| "grad_norm": 0.18730655534724885, |
| "learning_rate": 4.828696681333233e-10, |
| "loss": 0.3853759765625, |
| "step": 1552, |
| "token_acc": 0.8718526354510426 |
| }, |
| { |
| "epoch": 4.979133226324238, |
| "grad_norm": 0.1679151678172249, |
| "learning_rate": 3.696990820842849e-10, |
| "loss": 0.3643595576286316, |
| "step": 1553, |
| "token_acc": 0.8772456253978552 |
| }, |
| { |
| "epoch": 4.982343499197432, |
| "grad_norm": 0.20075363139655922, |
| "learning_rate": 2.716169207916952e-10, |
| "loss": 0.4112142026424408, |
| "step": 1554, |
| "token_acc": 0.8639921075248038 |
| }, |
| { |
| "epoch": 4.9855537720706256, |
| "grad_norm": 0.19697059102789963, |
| "learning_rate": 1.886236071295122e-10, |
| "loss": 0.3818359375, |
| "step": 1555, |
| "token_acc": 0.8715292497469914 |
| }, |
| { |
| "epoch": 4.98876404494382, |
| "grad_norm": 0.21720741450497374, |
| "learning_rate": 1.207194989186755e-10, |
| "loss": 0.377685546875, |
| "step": 1556, |
| "token_acc": 0.8723909723400645 |
| }, |
| { |
| "epoch": 4.991974317817014, |
| "grad_norm": 0.1837173297906277, |
| "learning_rate": 6.790488892283176e-11, |
| "loss": 0.3875732421875, |
| "step": 1557, |
| "token_acc": 0.8704183097200379 |
| }, |
| { |
| "epoch": 4.995184590690209, |
| "grad_norm": 0.21153622842687272, |
| "learning_rate": 3.01800048487233e-11, |
| "loss": 0.4175618588924408, |
| "step": 1558, |
| "token_acc": 0.8587473362060063 |
| }, |
| { |
| "epoch": 4.998394863563403, |
| "grad_norm": 0.19093467860237212, |
| "learning_rate": 7.545009344633868e-12, |
| "loss": 0.392578125, |
| "step": 1559, |
| "token_acc": 0.8685876987099844 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.3700371401403032, |
| "learning_rate": 0.0, |
| "loss": 0.412109375, |
| "step": 1560, |
| "token_acc": 0.8622697000631958 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1560, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5902748949715354e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|