| { |
| "best_global_step": 3300, |
| "best_metric": 0.32621017, |
| "best_model_checkpoint": "/mnt/shared-storage-user/mineru4s/jcwang/VPLT/outputs/checkpoints/29_lr2e-5_bs128_e1_VLT_TT_vp_ib09_1m_full/v0-20251204-195443/checkpoint-3300", |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 7806, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00012810658467845247, |
| "grad_norm": 61.75, |
| "learning_rate": 5.115089514066497e-08, |
| "loss": 1.7527258396148682, |
| "step": 1, |
| "token_acc": 0.6929900475984422 |
| }, |
| { |
| "epoch": 0.0006405329233922624, |
| "grad_norm": 73.0, |
| "learning_rate": 2.5575447570332484e-07, |
| "loss": 1.7571117877960205, |
| "step": 5, |
| "token_acc": 0.6861388813834098 |
| }, |
| { |
| "epoch": 0.0012810658467845247, |
| "grad_norm": 58.5, |
| "learning_rate": 5.115089514066497e-07, |
| "loss": 1.7617622375488282, |
| "step": 10, |
| "token_acc": 0.6896253352946267 |
| }, |
| { |
| "epoch": 0.001921598770176787, |
| "grad_norm": 71.0, |
| "learning_rate": 7.672634271099745e-07, |
| "loss": 1.7400665283203125, |
| "step": 15, |
| "token_acc": 0.6863151530854601 |
| }, |
| { |
| "epoch": 0.0025621316935690495, |
| "grad_norm": 53.5, |
| "learning_rate": 1.0230179028132994e-06, |
| "loss": 1.7050804138183593, |
| "step": 20, |
| "token_acc": 0.6901353798396137 |
| }, |
| { |
| "epoch": 0.003202664616961312, |
| "grad_norm": 54.25, |
| "learning_rate": 1.2787723785166241e-06, |
| "loss": 1.6199134826660155, |
| "step": 25, |
| "token_acc": 0.7034818581401362 |
| }, |
| { |
| "epoch": 0.003843197540353574, |
| "grad_norm": 47.25, |
| "learning_rate": 1.534526854219949e-06, |
| "loss": 1.4921659469604491, |
| "step": 30, |
| "token_acc": 0.7096607764736231 |
| }, |
| { |
| "epoch": 0.004483730463745837, |
| "grad_norm": 43.25, |
| "learning_rate": 1.7902813299232737e-06, |
| "loss": 1.3593175888061524, |
| "step": 35, |
| "token_acc": 0.71506187745246 |
| }, |
| { |
| "epoch": 0.005124263387138099, |
| "grad_norm": 30.25, |
| "learning_rate": 2.0460358056265987e-06, |
| "loss": 1.1928886413574218, |
| "step": 40, |
| "token_acc": 0.7095462606514122 |
| }, |
| { |
| "epoch": 0.005764796310530361, |
| "grad_norm": 61.0, |
| "learning_rate": 2.3017902813299235e-06, |
| "loss": 1.000108528137207, |
| "step": 45, |
| "token_acc": 0.6974425102084677 |
| }, |
| { |
| "epoch": 0.006405329233922624, |
| "grad_norm": 47.75, |
| "learning_rate": 2.5575447570332483e-06, |
| "loss": 0.8234397888183593, |
| "step": 50, |
| "token_acc": 0.7172779381976468 |
| }, |
| { |
| "epoch": 0.007045862157314886, |
| "grad_norm": 15.0625, |
| "learning_rate": 2.813299232736573e-06, |
| "loss": 0.6950876235961914, |
| "step": 55, |
| "token_acc": 0.7412205198829402 |
| }, |
| { |
| "epoch": 0.007686395080707148, |
| "grad_norm": 9.6875, |
| "learning_rate": 3.069053708439898e-06, |
| "loss": 0.6445055961608886, |
| "step": 60, |
| "token_acc": 0.7602104627593048 |
| }, |
| { |
| "epoch": 0.00832692800409941, |
| "grad_norm": 4.46875, |
| "learning_rate": 3.3248081841432226e-06, |
| "loss": 0.6182816982269287, |
| "step": 65, |
| "token_acc": 0.7633587786259542 |
| }, |
| { |
| "epoch": 0.008967460927491674, |
| "grad_norm": 4.21875, |
| "learning_rate": 3.5805626598465474e-06, |
| "loss": 0.6172842979431152, |
| "step": 70, |
| "token_acc": 0.7656499417576255 |
| }, |
| { |
| "epoch": 0.009607993850883935, |
| "grad_norm": 3.359375, |
| "learning_rate": 3.836317135549873e-06, |
| "loss": 0.6099654197692871, |
| "step": 75, |
| "token_acc": 0.7649229712912501 |
| }, |
| { |
| "epoch": 0.010248526774276198, |
| "grad_norm": 3.265625, |
| "learning_rate": 4.092071611253197e-06, |
| "loss": 0.6063261985778808, |
| "step": 80, |
| "token_acc": 0.7644379511859746 |
| }, |
| { |
| "epoch": 0.01088905969766846, |
| "grad_norm": 6.34375, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 0.5800480842590332, |
| "step": 85, |
| "token_acc": 0.7750280729031701 |
| }, |
| { |
| "epoch": 0.011529592621060722, |
| "grad_norm": 4.03125, |
| "learning_rate": 4.603580562659847e-06, |
| "loss": 0.5782370567321777, |
| "step": 90, |
| "token_acc": 0.7780650721827193 |
| }, |
| { |
| "epoch": 0.012170125544452985, |
| "grad_norm": 21.125, |
| "learning_rate": 4.859335038363172e-06, |
| "loss": 0.5692886352539063, |
| "step": 95, |
| "token_acc": 0.7785625080745877 |
| }, |
| { |
| "epoch": 0.012810658467845248, |
| "grad_norm": 4.03125, |
| "learning_rate": 5.1150895140664966e-06, |
| "loss": 0.5636235237121582, |
| "step": 100, |
| "token_acc": 0.7817540539378037 |
| }, |
| { |
| "epoch": 0.012810658467845248, |
| "eval_loss": 0.5616942644119263, |
| "eval_runtime": 109.4288, |
| "eval_samples_per_second": 91.384, |
| "eval_steps_per_second": 11.423, |
| "eval_token_acc": 0.782156125595894, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.013451191391237509, |
| "grad_norm": 4.34375, |
| "learning_rate": 5.370843989769821e-06, |
| "loss": 0.5511586189270019, |
| "step": 105, |
| "token_acc": 0.7842739323805 |
| }, |
| { |
| "epoch": 0.014091724314629772, |
| "grad_norm": 7.34375, |
| "learning_rate": 5.626598465473146e-06, |
| "loss": 0.5501980781555176, |
| "step": 110, |
| "token_acc": 0.7883570504527814 |
| }, |
| { |
| "epoch": 0.014732257238022035, |
| "grad_norm": 4.0, |
| "learning_rate": 5.882352941176471e-06, |
| "loss": 0.5424150466918946, |
| "step": 115, |
| "token_acc": 0.7874757908327954 |
| }, |
| { |
| "epoch": 0.015372790161414296, |
| "grad_norm": 4.9375, |
| "learning_rate": 6.138107416879796e-06, |
| "loss": 0.5345050811767578, |
| "step": 120, |
| "token_acc": 0.7919321508524195 |
| }, |
| { |
| "epoch": 0.01601332308480656, |
| "grad_norm": 3.9375, |
| "learning_rate": 6.3938618925831205e-06, |
| "loss": 0.5287456512451172, |
| "step": 125, |
| "token_acc": 0.7929382311045884 |
| }, |
| { |
| "epoch": 0.01665385600819882, |
| "grad_norm": 4.1875, |
| "learning_rate": 6.649616368286445e-06, |
| "loss": 0.5286868572235107, |
| "step": 130, |
| "token_acc": 0.7934768540489235 |
| }, |
| { |
| "epoch": 0.017294388931591083, |
| "grad_norm": 6.25, |
| "learning_rate": 6.90537084398977e-06, |
| "loss": 0.5210060119628906, |
| "step": 135, |
| "token_acc": 0.7963354171157577 |
| }, |
| { |
| "epoch": 0.017934921854983348, |
| "grad_norm": 5.0625, |
| "learning_rate": 7.161125319693095e-06, |
| "loss": 0.5186363697052002, |
| "step": 140, |
| "token_acc": 0.7987665502221072 |
| }, |
| { |
| "epoch": 0.01857545477837561, |
| "grad_norm": 6.09375, |
| "learning_rate": 7.41687979539642e-06, |
| "loss": 0.5118862152099609, |
| "step": 145, |
| "token_acc": 0.7989200863930885 |
| }, |
| { |
| "epoch": 0.01921598770176787, |
| "grad_norm": 6.15625, |
| "learning_rate": 7.672634271099745e-06, |
| "loss": 0.5256869316101074, |
| "step": 150, |
| "token_acc": 0.795885056483828 |
| }, |
| { |
| "epoch": 0.019856520625160134, |
| "grad_norm": 4.25, |
| "learning_rate": 7.92838874680307e-06, |
| "loss": 0.5057379722595214, |
| "step": 155, |
| "token_acc": 0.8006816514948876 |
| }, |
| { |
| "epoch": 0.020497053548552396, |
| "grad_norm": 9.9375, |
| "learning_rate": 8.184143222506395e-06, |
| "loss": 0.49903292655944825, |
| "step": 160, |
| "token_acc": 0.8046234796860174 |
| }, |
| { |
| "epoch": 0.021137586471944657, |
| "grad_norm": 6.78125, |
| "learning_rate": 8.43989769820972e-06, |
| "loss": 0.5005066871643067, |
| "step": 165, |
| "token_acc": 0.8053670973596647 |
| }, |
| { |
| "epoch": 0.02177811939533692, |
| "grad_norm": 5.46875, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 0.4884012222290039, |
| "step": 170, |
| "token_acc": 0.8094040079812613 |
| }, |
| { |
| "epoch": 0.022418652318729183, |
| "grad_norm": 7.90625, |
| "learning_rate": 8.95140664961637e-06, |
| "loss": 0.4938325881958008, |
| "step": 175, |
| "token_acc": 0.806672997237569 |
| }, |
| { |
| "epoch": 0.023059185242121444, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.207161125319694e-06, |
| "loss": 0.5015275478363037, |
| "step": 180, |
| "token_acc": 0.8040328474998926 |
| }, |
| { |
| "epoch": 0.02369971816551371, |
| "grad_norm": 6.59375, |
| "learning_rate": 9.462915601023019e-06, |
| "loss": 0.4769923686981201, |
| "step": 185, |
| "token_acc": 0.8167363295557375 |
| }, |
| { |
| "epoch": 0.02434025108890597, |
| "grad_norm": 8.875, |
| "learning_rate": 9.718670076726344e-06, |
| "loss": 0.48226518630981446, |
| "step": 190, |
| "token_acc": 0.8120698554714384 |
| }, |
| { |
| "epoch": 0.02498078401229823, |
| "grad_norm": 5.875, |
| "learning_rate": 9.974424552429668e-06, |
| "loss": 0.48815107345581055, |
| "step": 195, |
| "token_acc": 0.8090380890897353 |
| }, |
| { |
| "epoch": 0.025621316935690495, |
| "grad_norm": 8.1875, |
| "learning_rate": 1.0230179028132993e-05, |
| "loss": 0.4772751808166504, |
| "step": 200, |
| "token_acc": 0.816347690845466 |
| }, |
| { |
| "epoch": 0.025621316935690495, |
| "eval_loss": 0.47741714119911194, |
| "eval_runtime": 103.9123, |
| "eval_samples_per_second": 96.235, |
| "eval_steps_per_second": 12.029, |
| "eval_token_acc": 0.8152050539557392, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.026261849859082757, |
| "grad_norm": 8.9375, |
| "learning_rate": 1.0485933503836318e-05, |
| "loss": 0.47005443572998046, |
| "step": 205, |
| "token_acc": 0.8176898432764742 |
| }, |
| { |
| "epoch": 0.026902382782475018, |
| "grad_norm": 10.0625, |
| "learning_rate": 1.0741687979539643e-05, |
| "loss": 0.466142463684082, |
| "step": 210, |
| "token_acc": 0.818608860541286 |
| }, |
| { |
| "epoch": 0.027542915705867282, |
| "grad_norm": 7.875, |
| "learning_rate": 1.0997442455242967e-05, |
| "loss": 0.4647233963012695, |
| "step": 215, |
| "token_acc": 0.8217214883881551 |
| }, |
| { |
| "epoch": 0.028183448629259544, |
| "grad_norm": 23.125, |
| "learning_rate": 1.1253196930946292e-05, |
| "loss": 0.46241116523742676, |
| "step": 220, |
| "token_acc": 0.8234608913240433 |
| }, |
| { |
| "epoch": 0.028823981552651805, |
| "grad_norm": 10.9375, |
| "learning_rate": 1.1508951406649617e-05, |
| "loss": 0.4518951416015625, |
| "step": 225, |
| "token_acc": 0.8269572375546986 |
| }, |
| { |
| "epoch": 0.02946451447604407, |
| "grad_norm": 16.875, |
| "learning_rate": 1.1764705882352942e-05, |
| "loss": 0.44137048721313477, |
| "step": 230, |
| "token_acc": 0.8298306556665219 |
| }, |
| { |
| "epoch": 0.03010504739943633, |
| "grad_norm": 12.4375, |
| "learning_rate": 1.2020460358056267e-05, |
| "loss": 0.453232479095459, |
| "step": 235, |
| "token_acc": 0.8256595964821521 |
| }, |
| { |
| "epoch": 0.030745580322828592, |
| "grad_norm": 8.0, |
| "learning_rate": 1.2276214833759591e-05, |
| "loss": 0.4504352569580078, |
| "step": 240, |
| "token_acc": 0.8276514337302782 |
| }, |
| { |
| "epoch": 0.031386113246220856, |
| "grad_norm": 16.25, |
| "learning_rate": 1.2531969309462916e-05, |
| "loss": 0.43747830390930176, |
| "step": 245, |
| "token_acc": 0.829782636878268 |
| }, |
| { |
| "epoch": 0.03202664616961312, |
| "grad_norm": 6.96875, |
| "learning_rate": 1.2787723785166241e-05, |
| "loss": 0.4497882843017578, |
| "step": 250, |
| "token_acc": 0.8272813524236674 |
| }, |
| { |
| "epoch": 0.03266717909300538, |
| "grad_norm": 7.25, |
| "learning_rate": 1.3043478260869566e-05, |
| "loss": 0.4439809322357178, |
| "step": 255, |
| "token_acc": 0.8280041258380608 |
| }, |
| { |
| "epoch": 0.03330771201639764, |
| "grad_norm": 12.375, |
| "learning_rate": 1.329923273657289e-05, |
| "loss": 0.4415604591369629, |
| "step": 260, |
| "token_acc": 0.8288237828522189 |
| }, |
| { |
| "epoch": 0.03394824493978991, |
| "grad_norm": 9.1875, |
| "learning_rate": 1.3554987212276215e-05, |
| "loss": 0.4370439529418945, |
| "step": 265, |
| "token_acc": 0.8311521132804119 |
| }, |
| { |
| "epoch": 0.034588777863182166, |
| "grad_norm": 8.5625, |
| "learning_rate": 1.381074168797954e-05, |
| "loss": 0.4249903678894043, |
| "step": 270, |
| "token_acc": 0.8351054633471646 |
| }, |
| { |
| "epoch": 0.03522931078657443, |
| "grad_norm": 7.65625, |
| "learning_rate": 1.4066496163682865e-05, |
| "loss": 0.42871723175048826, |
| "step": 275, |
| "token_acc": 0.8337790045717243 |
| }, |
| { |
| "epoch": 0.035869843709966695, |
| "grad_norm": 17.25, |
| "learning_rate": 1.432225063938619e-05, |
| "loss": 0.42778358459472654, |
| "step": 280, |
| "token_acc": 0.8345190359160092 |
| }, |
| { |
| "epoch": 0.03651037663335895, |
| "grad_norm": 5.40625, |
| "learning_rate": 1.4578005115089514e-05, |
| "loss": 0.42468814849853515, |
| "step": 285, |
| "token_acc": 0.8370123979437557 |
| }, |
| { |
| "epoch": 0.03715090955675122, |
| "grad_norm": 9.3125, |
| "learning_rate": 1.483375959079284e-05, |
| "loss": 0.42493228912353515, |
| "step": 290, |
| "token_acc": 0.8359849954727719 |
| }, |
| { |
| "epoch": 0.03779144248014348, |
| "grad_norm": 52.25, |
| "learning_rate": 1.5089514066496164e-05, |
| "loss": 0.42238712310791016, |
| "step": 295, |
| "token_acc": 0.8344741486934435 |
| }, |
| { |
| "epoch": 0.03843197540353574, |
| "grad_norm": 12.4375, |
| "learning_rate": 1.534526854219949e-05, |
| "loss": 0.4189589023590088, |
| "step": 300, |
| "token_acc": 0.8357866481946489 |
| }, |
| { |
| "epoch": 0.03843197540353574, |
| "eval_loss": 0.42732954025268555, |
| "eval_runtime": 102.1151, |
| "eval_samples_per_second": 97.929, |
| "eval_steps_per_second": 12.241, |
| "eval_token_acc": 0.8347027589681691, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.039072508326928004, |
| "grad_norm": 8.375, |
| "learning_rate": 1.5601023017902815e-05, |
| "loss": 0.4307071685791016, |
| "step": 305, |
| "token_acc": 0.8350831713112984 |
| }, |
| { |
| "epoch": 0.03971304125032027, |
| "grad_norm": 9.25, |
| "learning_rate": 1.585677749360614e-05, |
| "loss": 0.41645016670227053, |
| "step": 310, |
| "token_acc": 0.8390616240458838 |
| }, |
| { |
| "epoch": 0.04035357417371253, |
| "grad_norm": 239.0, |
| "learning_rate": 1.6112531969309465e-05, |
| "loss": 0.42703800201416015, |
| "step": 315, |
| "token_acc": 0.8354931760451199 |
| }, |
| { |
| "epoch": 0.04099410709710479, |
| "grad_norm": 10.6875, |
| "learning_rate": 1.636828644501279e-05, |
| "loss": 0.41779098510742185, |
| "step": 320, |
| "token_acc": 0.8380400467067423 |
| }, |
| { |
| "epoch": 0.041634640020497056, |
| "grad_norm": 7.03125, |
| "learning_rate": 1.6624040920716114e-05, |
| "loss": 0.418929386138916, |
| "step": 325, |
| "token_acc": 0.8373385012919896 |
| }, |
| { |
| "epoch": 0.042275172943889314, |
| "grad_norm": 10.0, |
| "learning_rate": 1.687979539641944e-05, |
| "loss": 0.4039362907409668, |
| "step": 330, |
| "token_acc": 0.8435945139099208 |
| }, |
| { |
| "epoch": 0.04291570586728158, |
| "grad_norm": 8.375, |
| "learning_rate": 1.7135549872122764e-05, |
| "loss": 0.4099921226501465, |
| "step": 335, |
| "token_acc": 0.8431601226728866 |
| }, |
| { |
| "epoch": 0.04355623879067384, |
| "grad_norm": 13.5, |
| "learning_rate": 1.739130434782609e-05, |
| "loss": 0.3967348575592041, |
| "step": 340, |
| "token_acc": 0.8471802400898177 |
| }, |
| { |
| "epoch": 0.0441967717140661, |
| "grad_norm": 26.0, |
| "learning_rate": 1.7647058823529414e-05, |
| "loss": 0.4075979709625244, |
| "step": 345, |
| "token_acc": 0.8455312553814363 |
| }, |
| { |
| "epoch": 0.044837304637458365, |
| "grad_norm": 11.4375, |
| "learning_rate": 1.790281329923274e-05, |
| "loss": 0.4026969909667969, |
| "step": 350, |
| "token_acc": 0.8446702255898054 |
| }, |
| { |
| "epoch": 0.04547783756085063, |
| "grad_norm": 7.28125, |
| "learning_rate": 1.8158567774936063e-05, |
| "loss": 0.4052872657775879, |
| "step": 355, |
| "token_acc": 0.8408119196717772 |
| }, |
| { |
| "epoch": 0.04611837048424289, |
| "grad_norm": 7.0, |
| "learning_rate": 1.8414322250639388e-05, |
| "loss": 0.3986091136932373, |
| "step": 360, |
| "token_acc": 0.8454004142216086 |
| }, |
| { |
| "epoch": 0.04675890340763515, |
| "grad_norm": 7.0625, |
| "learning_rate": 1.8670076726342713e-05, |
| "loss": 0.4026648044586182, |
| "step": 365, |
| "token_acc": 0.844288421778084 |
| }, |
| { |
| "epoch": 0.04739943633102742, |
| "grad_norm": 8.9375, |
| "learning_rate": 1.8925831202046038e-05, |
| "loss": 0.38652160167694094, |
| "step": 370, |
| "token_acc": 0.851685393258427 |
| }, |
| { |
| "epoch": 0.048039969254419675, |
| "grad_norm": 35.0, |
| "learning_rate": 1.9181585677749362e-05, |
| "loss": 0.3907599687576294, |
| "step": 375, |
| "token_acc": 0.8490174010908147 |
| }, |
| { |
| "epoch": 0.04868050217781194, |
| "grad_norm": 8.25, |
| "learning_rate": 1.9437340153452687e-05, |
| "loss": 0.39287233352661133, |
| "step": 380, |
| "token_acc": 0.8511720096518441 |
| }, |
| { |
| "epoch": 0.049321035101204204, |
| "grad_norm": 12.25, |
| "learning_rate": 1.9693094629156012e-05, |
| "loss": 0.3889561653137207, |
| "step": 385, |
| "token_acc": 0.848631170510886 |
| }, |
| { |
| "epoch": 0.04996156802459646, |
| "grad_norm": 8.9375, |
| "learning_rate": 1.9948849104859337e-05, |
| "loss": 0.38450467586517334, |
| "step": 390, |
| "token_acc": 0.8524625544956188 |
| }, |
| { |
| "epoch": 0.050602100947988726, |
| "grad_norm": 11.3125, |
| "learning_rate": 1.999998563957419e-05, |
| "loss": 0.39484443664550783, |
| "step": 395, |
| "token_acc": 0.8477848646785037 |
| }, |
| { |
| "epoch": 0.05124263387138099, |
| "grad_norm": 14.5, |
| "learning_rate": 1.9999927300415016e-05, |
| "loss": 0.3870053768157959, |
| "step": 400, |
| "token_acc": 0.8513297986982198 |
| }, |
| { |
| "epoch": 0.05124263387138099, |
| "eval_loss": 0.40171390771865845, |
| "eval_runtime": 107.3275, |
| "eval_samples_per_second": 93.173, |
| "eval_steps_per_second": 11.647, |
| "eval_token_acc": 0.8490457391853209, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.05188316679477325, |
| "grad_norm": 7.28125, |
| "learning_rate": 1.9999824085257465e-05, |
| "loss": 0.3954286575317383, |
| "step": 405, |
| "token_acc": 0.8484640466792518 |
| }, |
| { |
| "epoch": 0.05252369971816551, |
| "grad_norm": 8.9375, |
| "learning_rate": 1.9999675994564737e-05, |
| "loss": 0.3951833248138428, |
| "step": 410, |
| "token_acc": 0.8472842901340667 |
| }, |
| { |
| "epoch": 0.05316423264155778, |
| "grad_norm": 8.5625, |
| "learning_rate": 1.99994830290014e-05, |
| "loss": 0.3877572536468506, |
| "step": 415, |
| "token_acc": 0.8527802903045183 |
| }, |
| { |
| "epoch": 0.053804765564950036, |
| "grad_norm": 6.3125, |
| "learning_rate": 1.999924518943342e-05, |
| "loss": 0.3790754318237305, |
| "step": 420, |
| "token_acc": 0.8536078906385188 |
| }, |
| { |
| "epoch": 0.0544452984883423, |
| "grad_norm": 7.21875, |
| "learning_rate": 1.999896247692813e-05, |
| "loss": 0.37895793914794923, |
| "step": 425, |
| "token_acc": 0.8564549713690786 |
| }, |
| { |
| "epoch": 0.055085831411734565, |
| "grad_norm": 7.625, |
| "learning_rate": 1.999863489275424e-05, |
| "loss": 0.3699763774871826, |
| "step": 430, |
| "token_acc": 0.8588138812154696 |
| }, |
| { |
| "epoch": 0.05572636433512682, |
| "grad_norm": 6.1875, |
| "learning_rate": 1.9998262438381828e-05, |
| "loss": 0.3807647228240967, |
| "step": 435, |
| "token_acc": 0.8515137160329013 |
| }, |
| { |
| "epoch": 0.05636689725851909, |
| "grad_norm": 4.875, |
| "learning_rate": 1.9997845115482334e-05, |
| "loss": 0.37743220329284666, |
| "step": 440, |
| "token_acc": 0.853497694064911 |
| }, |
| { |
| "epoch": 0.05700743018191135, |
| "grad_norm": 11.0, |
| "learning_rate": 1.9997382925928544e-05, |
| "loss": 0.36346435546875, |
| "step": 445, |
| "token_acc": 0.8598952244880288 |
| }, |
| { |
| "epoch": 0.05764796310530361, |
| "grad_norm": 6.0625, |
| "learning_rate": 1.99968758717946e-05, |
| "loss": 0.36632614135742186, |
| "step": 450, |
| "token_acc": 0.8593689131281652 |
| }, |
| { |
| "epoch": 0.058288496028695874, |
| "grad_norm": 22.375, |
| "learning_rate": 1.9996323955355972e-05, |
| "loss": 0.38116629123687745, |
| "step": 455, |
| "token_acc": 0.8530322580645161 |
| }, |
| { |
| "epoch": 0.05892902895208814, |
| "grad_norm": 8.9375, |
| "learning_rate": 1.9995727179089463e-05, |
| "loss": 0.3787653684616089, |
| "step": 460, |
| "token_acc": 0.8553301683211049 |
| }, |
| { |
| "epoch": 0.0595695618754804, |
| "grad_norm": 5.71875, |
| "learning_rate": 1.9995085545673177e-05, |
| "loss": 0.37586026191711425, |
| "step": 465, |
| "token_acc": 0.8558023415977961 |
| }, |
| { |
| "epoch": 0.06021009479887266, |
| "grad_norm": 8.5625, |
| "learning_rate": 1.9994399057986537e-05, |
| "loss": 0.36600193977355955, |
| "step": 470, |
| "token_acc": 0.8605023127134397 |
| }, |
| { |
| "epoch": 0.060850627722264926, |
| "grad_norm": 21.25, |
| "learning_rate": 1.9993667719110245e-05, |
| "loss": 0.37952864170074463, |
| "step": 475, |
| "token_acc": 0.8555727954486683 |
| }, |
| { |
| "epoch": 0.061491160645657184, |
| "grad_norm": 16.0, |
| "learning_rate": 1.9992891532326277e-05, |
| "loss": 0.379518985748291, |
| "step": 480, |
| "token_acc": 0.8553931082071851 |
| }, |
| { |
| "epoch": 0.06213169356904945, |
| "grad_norm": 7.0, |
| "learning_rate": 1.9992070501117877e-05, |
| "loss": 0.3733321189880371, |
| "step": 485, |
| "token_acc": 0.8571675153188919 |
| }, |
| { |
| "epoch": 0.06277222649244171, |
| "grad_norm": 11.5, |
| "learning_rate": 1.9991204629169534e-05, |
| "loss": 0.36601009368896487, |
| "step": 490, |
| "token_acc": 0.8613763013521103 |
| }, |
| { |
| "epoch": 0.06341275941583398, |
| "grad_norm": 6.59375, |
| "learning_rate": 1.9990293920366957e-05, |
| "loss": 0.3734764814376831, |
| "step": 495, |
| "token_acc": 0.8549542551355084 |
| }, |
| { |
| "epoch": 0.06405329233922624, |
| "grad_norm": 446.0, |
| "learning_rate": 1.998933837879708e-05, |
| "loss": 0.3742378234863281, |
| "step": 500, |
| "token_acc": 0.8572477856988551 |
| }, |
| { |
| "epoch": 0.06405329233922624, |
| "eval_loss": 0.3812016248703003, |
| "eval_runtime": 102.75, |
| "eval_samples_per_second": 97.324, |
| "eval_steps_per_second": 12.165, |
| "eval_token_acc": 0.8565535875445017, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.06469382526261849, |
| "grad_norm": 9.875, |
| "learning_rate": 1.998833800874802e-05, |
| "loss": 0.377778148651123, |
| "step": 505, |
| "token_acc": 0.8549496860755139 |
| }, |
| { |
| "epoch": 0.06533435818601076, |
| "grad_norm": 8.5, |
| "learning_rate": 1.9987292814709064e-05, |
| "loss": 0.36702466011047363, |
| "step": 510, |
| "token_acc": 0.8602703052808843 |
| }, |
| { |
| "epoch": 0.06597489110940302, |
| "grad_norm": 8.3125, |
| "learning_rate": 1.9986202801370665e-05, |
| "loss": 0.3668750047683716, |
| "step": 515, |
| "token_acc": 0.8610121474868009 |
| }, |
| { |
| "epoch": 0.06661542403279529, |
| "grad_norm": 5.5, |
| "learning_rate": 1.9985067973624402e-05, |
| "loss": 0.368256139755249, |
| "step": 520, |
| "token_acc": 0.8598641210870313 |
| }, |
| { |
| "epoch": 0.06725595695618755, |
| "grad_norm": 6.1875, |
| "learning_rate": 1.9983888336562962e-05, |
| "loss": 0.3637028694152832, |
| "step": 525, |
| "token_acc": 0.8616238543952498 |
| }, |
| { |
| "epoch": 0.06789648987957982, |
| "grad_norm": 8.8125, |
| "learning_rate": 1.9982663895480125e-05, |
| "loss": 0.3575170040130615, |
| "step": 530, |
| "token_acc": 0.8624665342430262 |
| }, |
| { |
| "epoch": 0.06853702280297207, |
| "grad_norm": 4.46875, |
| "learning_rate": 1.9981394655870728e-05, |
| "loss": 0.3676267147064209, |
| "step": 535, |
| "token_acc": 0.8586340206185566 |
| }, |
| { |
| "epoch": 0.06917755572636433, |
| "grad_norm": 5.28125, |
| "learning_rate": 1.998008062343066e-05, |
| "loss": 0.3628795385360718, |
| "step": 540, |
| "token_acc": 0.8599716507022894 |
| }, |
| { |
| "epoch": 0.0698180886497566, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.9978721804056806e-05, |
| "loss": 0.351765513420105, |
| "step": 545, |
| "token_acc": 0.8640237603305785 |
| }, |
| { |
| "epoch": 0.07045862157314886, |
| "grad_norm": 7.4375, |
| "learning_rate": 1.9977318203847056e-05, |
| "loss": 0.35065426826477053, |
| "step": 550, |
| "token_acc": 0.864151596435061 |
| }, |
| { |
| "epoch": 0.07109915449654113, |
| "grad_norm": 5.53125, |
| "learning_rate": 1.9975869829100248e-05, |
| "loss": 0.3636244535446167, |
| "step": 555, |
| "token_acc": 0.8597329888027563 |
| }, |
| { |
| "epoch": 0.07173968741993339, |
| "grad_norm": 5.34375, |
| "learning_rate": 1.9974376686316158e-05, |
| "loss": 0.3594621181488037, |
| "step": 560, |
| "token_acc": 0.8631669907107367 |
| }, |
| { |
| "epoch": 0.07238022034332564, |
| "grad_norm": 4.78125, |
| "learning_rate": 1.9972838782195455e-05, |
| "loss": 0.36011404991149903, |
| "step": 565, |
| "token_acc": 0.8637501078609026 |
| }, |
| { |
| "epoch": 0.0730207532667179, |
| "grad_norm": 4.90625, |
| "learning_rate": 1.99712561236397e-05, |
| "loss": 0.36135361194610593, |
| "step": 570, |
| "token_acc": 0.8631270470608515 |
| }, |
| { |
| "epoch": 0.07366128619011017, |
| "grad_norm": 13.3125, |
| "learning_rate": 1.9969628717751267e-05, |
| "loss": 0.3561633825302124, |
| "step": 575, |
| "token_acc": 0.8632323755285184 |
| }, |
| { |
| "epoch": 0.07430181911350243, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.9967956571833375e-05, |
| "loss": 0.347505521774292, |
| "step": 580, |
| "token_acc": 0.8662591581046517 |
| }, |
| { |
| "epoch": 0.0749423520368947, |
| "grad_norm": 5.90625, |
| "learning_rate": 1.9966239693389982e-05, |
| "loss": 0.3540546417236328, |
| "step": 585, |
| "token_acc": 0.8638228055783429 |
| }, |
| { |
| "epoch": 0.07558288496028696, |
| "grad_norm": 6.28125, |
| "learning_rate": 1.9964478090125815e-05, |
| "loss": 0.33905773162841796, |
| "step": 590, |
| "token_acc": 0.8724406047516199 |
| }, |
| { |
| "epoch": 0.07622341788367921, |
| "grad_norm": 5.125, |
| "learning_rate": 1.9962671769946303e-05, |
| "loss": 0.3554720401763916, |
| "step": 595, |
| "token_acc": 0.8650370115338268 |
| }, |
| { |
| "epoch": 0.07686395080707148, |
| "grad_norm": 16.25, |
| "learning_rate": 1.9960820740957546e-05, |
| "loss": 0.3572436094284058, |
| "step": 600, |
| "token_acc": 0.8659878327652414 |
| }, |
| { |
| "epoch": 0.07686395080707148, |
| "eval_loss": 0.36843228340148926, |
| "eval_runtime": 102.1933, |
| "eval_samples_per_second": 97.854, |
| "eval_steps_per_second": 12.232, |
| "eval_token_acc": 0.8618771835602482, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.07750448373046374, |
| "grad_norm": 5.59375, |
| "learning_rate": 1.9958925011466283e-05, |
| "loss": 0.3591419458389282, |
| "step": 605, |
| "token_acc": 0.8607949646490775 |
| }, |
| { |
| "epoch": 0.07814501665385601, |
| "grad_norm": 7.90625, |
| "learning_rate": 1.9956984589979846e-05, |
| "loss": 0.3471505165100098, |
| "step": 610, |
| "token_acc": 0.8661698828394211 |
| }, |
| { |
| "epoch": 0.07878554957724827, |
| "grad_norm": 6.96875, |
| "learning_rate": 1.9954999485206143e-05, |
| "loss": 0.34771528244018557, |
| "step": 615, |
| "token_acc": 0.8679774069762428 |
| }, |
| { |
| "epoch": 0.07942608250064054, |
| "grad_norm": 15.4375, |
| "learning_rate": 1.9952969706053585e-05, |
| "loss": 0.35360991954803467, |
| "step": 620, |
| "token_acc": 0.8646970023722235 |
| }, |
| { |
| "epoch": 0.08006661542403279, |
| "grad_norm": 5.59375, |
| "learning_rate": 1.995089526163108e-05, |
| "loss": 0.3377622127532959, |
| "step": 625, |
| "token_acc": 0.8723486808070356 |
| }, |
| { |
| "epoch": 0.08070714834742505, |
| "grad_norm": 4.25, |
| "learning_rate": 1.994877616124797e-05, |
| "loss": 0.3453701019287109, |
| "step": 630, |
| "token_acc": 0.8684722042244396 |
| }, |
| { |
| "epoch": 0.08134768127081732, |
| "grad_norm": 10.5625, |
| "learning_rate": 1.9946612414414003e-05, |
| "loss": 0.35302703380584716, |
| "step": 635, |
| "token_acc": 0.8653087478559177 |
| }, |
| { |
| "epoch": 0.08198821419420958, |
| "grad_norm": 18.875, |
| "learning_rate": 1.9944404030839273e-05, |
| "loss": 0.3411895513534546, |
| "step": 640, |
| "token_acc": 0.8682500752979648 |
| }, |
| { |
| "epoch": 0.08262874711760185, |
| "grad_norm": 7.09375, |
| "learning_rate": 1.99421510204342e-05, |
| "loss": 0.34150052070617676, |
| "step": 645, |
| "token_acc": 0.8713020295837633 |
| }, |
| { |
| "epoch": 0.08326928004099411, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.993985339330946e-05, |
| "loss": 0.3460074424743652, |
| "step": 650, |
| "token_acc": 0.8699866454142076 |
| }, |
| { |
| "epoch": 0.08390981296438636, |
| "grad_norm": 6.5, |
| "learning_rate": 1.993751115977596e-05, |
| "loss": 0.338161039352417, |
| "step": 655, |
| "token_acc": 0.8716904276985743 |
| }, |
| { |
| "epoch": 0.08455034588777863, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.993512433034479e-05, |
| "loss": 0.34201898574829104, |
| "step": 660, |
| "token_acc": 0.8709761050857711 |
| }, |
| { |
| "epoch": 0.08519087881117089, |
| "grad_norm": 4.53125, |
| "learning_rate": 1.993269291572716e-05, |
| "loss": 0.33865838050842284, |
| "step": 665, |
| "token_acc": 0.8712311015118791 |
| }, |
| { |
| "epoch": 0.08583141173456316, |
| "grad_norm": 5.40625, |
| "learning_rate": 1.9930216926834366e-05, |
| "loss": 0.3336113691329956, |
| "step": 670, |
| "token_acc": 0.8717793270688088 |
| }, |
| { |
| "epoch": 0.08647194465795542, |
| "grad_norm": 6.3125, |
| "learning_rate": 1.992769637477773e-05, |
| "loss": 0.3334836959838867, |
| "step": 675, |
| "token_acc": 0.8705119896305897 |
| }, |
| { |
| "epoch": 0.08711247758134769, |
| "grad_norm": 6.21875, |
| "learning_rate": 1.9925131270868568e-05, |
| "loss": 0.34505319595336914, |
| "step": 680, |
| "token_acc": 0.8677537009225488 |
| }, |
| { |
| "epoch": 0.08775301050473995, |
| "grad_norm": 8.375, |
| "learning_rate": 1.9922521626618127e-05, |
| "loss": 0.34624500274658204, |
| "step": 685, |
| "token_acc": 0.8689443941158708 |
| }, |
| { |
| "epoch": 0.0883935434281322, |
| "grad_norm": 7.125, |
| "learning_rate": 1.9919867453737524e-05, |
| "loss": 0.34455955028533936, |
| "step": 690, |
| "token_acc": 0.8656286291883521 |
| }, |
| { |
| "epoch": 0.08903407635152447, |
| "grad_norm": 6.0, |
| "learning_rate": 1.9917168764137718e-05, |
| "loss": 0.3470313549041748, |
| "step": 695, |
| "token_acc": 0.8690256366212908 |
| }, |
| { |
| "epoch": 0.08967460927491673, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.991442556992943e-05, |
| "loss": 0.3429619312286377, |
| "step": 700, |
| "token_acc": 0.8695521102497846 |
| }, |
| { |
| "epoch": 0.08967460927491673, |
| "eval_loss": 0.35823777318000793, |
| "eval_runtime": 102.8638, |
| "eval_samples_per_second": 97.216, |
| "eval_steps_per_second": 12.152, |
| "eval_token_acc": 0.8654677732806972, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.090315142198309, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.9911637883423115e-05, |
| "loss": 0.3434779167175293, |
| "step": 705, |
| "token_acc": 0.8690055962117951 |
| }, |
| { |
| "epoch": 0.09095567512170126, |
| "grad_norm": 4.84375, |
| "learning_rate": 1.9908805717128876e-05, |
| "loss": 0.3410910129547119, |
| "step": 710, |
| "token_acc": 0.8684765305683432 |
| }, |
| { |
| "epoch": 0.09159620804509352, |
| "grad_norm": 4.25, |
| "learning_rate": 1.9905929083756442e-05, |
| "loss": 0.34179034233093264, |
| "step": 715, |
| "token_acc": 0.8675887624956912 |
| }, |
| { |
| "epoch": 0.09223674096848578, |
| "grad_norm": 10.5, |
| "learning_rate": 1.990300799621508e-05, |
| "loss": 0.34283981323242185, |
| "step": 720, |
| "token_acc": 0.8702763191873978 |
| }, |
| { |
| "epoch": 0.09287727389187804, |
| "grad_norm": 5.5625, |
| "learning_rate": 1.9900042467613562e-05, |
| "loss": 0.34240546226501467, |
| "step": 725, |
| "token_acc": 0.8690830636461705 |
| }, |
| { |
| "epoch": 0.0935178068152703, |
| "grad_norm": 5.34375, |
| "learning_rate": 1.9897032511260092e-05, |
| "loss": 0.34098148345947266, |
| "step": 730, |
| "token_acc": 0.8700211452984077 |
| }, |
| { |
| "epoch": 0.09415833973866257, |
| "grad_norm": 3.890625, |
| "learning_rate": 1.989397814066224e-05, |
| "loss": 0.32979321479797363, |
| "step": 735, |
| "token_acc": 0.8732680105322226 |
| }, |
| { |
| "epoch": 0.09479887266205483, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.9890879369526907e-05, |
| "loss": 0.33590106964111327, |
| "step": 740, |
| "token_acc": 0.868538938662991 |
| }, |
| { |
| "epoch": 0.0954394055854471, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.9887736211760237e-05, |
| "loss": 0.33802223205566406, |
| "step": 745, |
| "token_acc": 0.8701846511427711 |
| }, |
| { |
| "epoch": 0.09607993850883935, |
| "grad_norm": 8.125, |
| "learning_rate": 1.9884548681467565e-05, |
| "loss": 0.3298491477966309, |
| "step": 750, |
| "token_acc": 0.8728583142721505 |
| }, |
| { |
| "epoch": 0.09672047143223161, |
| "grad_norm": 8.1875, |
| "learning_rate": 1.9881316792953352e-05, |
| "loss": 0.34202146530151367, |
| "step": 755, |
| "token_acc": 0.8698966408268733 |
| }, |
| { |
| "epoch": 0.09736100435562388, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.987804056072113e-05, |
| "loss": 0.3250537872314453, |
| "step": 760, |
| "token_acc": 0.8748375920311823 |
| }, |
| { |
| "epoch": 0.09800153727901614, |
| "grad_norm": 6.0, |
| "learning_rate": 1.987471999947343e-05, |
| "loss": 0.34002318382263186, |
| "step": 765, |
| "token_acc": 0.8696007571846498 |
| }, |
| { |
| "epoch": 0.09864207020240841, |
| "grad_norm": 6.71875, |
| "learning_rate": 1.9871355124111704e-05, |
| "loss": 0.3327933311462402, |
| "step": 770, |
| "token_acc": 0.8724304715840387 |
| }, |
| { |
| "epoch": 0.09928260312580067, |
| "grad_norm": 5.125, |
| "learning_rate": 1.986794594973627e-05, |
| "loss": 0.334125280380249, |
| "step": 775, |
| "token_acc": 0.8736641716782763 |
| }, |
| { |
| "epoch": 0.09992313604919292, |
| "grad_norm": 5.375, |
| "learning_rate": 1.986449249164626e-05, |
| "loss": 0.33797569274902345, |
| "step": 780, |
| "token_acc": 0.870381508850318 |
| }, |
| { |
| "epoch": 0.10056366897258519, |
| "grad_norm": 5.875, |
| "learning_rate": 1.986099476533953e-05, |
| "loss": 0.337173318862915, |
| "step": 785, |
| "token_acc": 0.8695614640883977 |
| }, |
| { |
| "epoch": 0.10120420189597745, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.9857452786512575e-05, |
| "loss": 0.31865544319152833, |
| "step": 790, |
| "token_acc": 0.8768464370803553 |
| }, |
| { |
| "epoch": 0.10184473481936972, |
| "grad_norm": 3.640625, |
| "learning_rate": 1.98538665710605e-05, |
| "loss": 0.3357419013977051, |
| "step": 795, |
| "token_acc": 0.8707342295760083 |
| }, |
| { |
| "epoch": 0.10248526774276198, |
| "grad_norm": 8.375, |
| "learning_rate": 1.985023613507692e-05, |
| "loss": 0.3254246711730957, |
| "step": 800, |
| "token_acc": 0.8745312702038706 |
| }, |
| { |
| "epoch": 0.10248526774276198, |
| "eval_loss": 0.3556683361530304, |
| "eval_runtime": 102.7565, |
| "eval_samples_per_second": 97.317, |
| "eval_steps_per_second": 12.165, |
| "eval_token_acc": 0.866935015032307, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.10312580066615425, |
| "grad_norm": 5.09375, |
| "learning_rate": 1.9846561494853904e-05, |
| "loss": 0.33404462337493895, |
| "step": 805, |
| "token_acc": 0.8705380237972065 |
| }, |
| { |
| "epoch": 0.1037663335895465, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.9842842666881885e-05, |
| "loss": 0.3421541690826416, |
| "step": 810, |
| "token_acc": 0.8684289705566302 |
| }, |
| { |
| "epoch": 0.10440686651293876, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.983907966784959e-05, |
| "loss": 0.3375978469848633, |
| "step": 815, |
| "token_acc": 0.869295677630446 |
| }, |
| { |
| "epoch": 0.10504739943633103, |
| "grad_norm": 9.0625, |
| "learning_rate": 1.9835272514643978e-05, |
| "loss": 0.3273109674453735, |
| "step": 820, |
| "token_acc": 0.8760344827586207 |
| }, |
| { |
| "epoch": 0.10568793235972329, |
| "grad_norm": 6.875, |
| "learning_rate": 1.9831421224350156e-05, |
| "loss": 0.3292600154876709, |
| "step": 825, |
| "token_acc": 0.874255631310952 |
| }, |
| { |
| "epoch": 0.10632846528311556, |
| "grad_norm": 11.5, |
| "learning_rate": 1.98275258142513e-05, |
| "loss": 0.32775206565856935, |
| "step": 830, |
| "token_acc": 0.8719834817395793 |
| }, |
| { |
| "epoch": 0.10696899820650782, |
| "grad_norm": 4.0, |
| "learning_rate": 1.9823586301828572e-05, |
| "loss": 0.3248668909072876, |
| "step": 835, |
| "token_acc": 0.876129143795652 |
| }, |
| { |
| "epoch": 0.10760953112990007, |
| "grad_norm": 16.25, |
| "learning_rate": 1.9819602704761066e-05, |
| "loss": 0.3292513132095337, |
| "step": 840, |
| "token_acc": 0.8749297722459917 |
| }, |
| { |
| "epoch": 0.10825006405329234, |
| "grad_norm": 8.0, |
| "learning_rate": 1.9815575040925693e-05, |
| "loss": 0.3171013116836548, |
| "step": 845, |
| "token_acc": 0.8782070696145027 |
| }, |
| { |
| "epoch": 0.1088905969766846, |
| "grad_norm": 2.96875, |
| "learning_rate": 1.9811503328397133e-05, |
| "loss": 0.319035267829895, |
| "step": 850, |
| "token_acc": 0.8752912747044101 |
| }, |
| { |
| "epoch": 0.10953112990007687, |
| "grad_norm": 5.9375, |
| "learning_rate": 1.9807387585447734e-05, |
| "loss": 0.32436022758483884, |
| "step": 855, |
| "token_acc": 0.876150555291474 |
| }, |
| { |
| "epoch": 0.11017166282346913, |
| "grad_norm": 4.5625, |
| "learning_rate": 1.9803227830547437e-05, |
| "loss": 0.33043532371520995, |
| "step": 860, |
| "token_acc": 0.8730488173995763 |
| }, |
| { |
| "epoch": 0.1108121957468614, |
| "grad_norm": 4.71875, |
| "learning_rate": 1.9799024082363692e-05, |
| "loss": 0.3189000129699707, |
| "step": 865, |
| "token_acc": 0.8785994905668523 |
| }, |
| { |
| "epoch": 0.11145272867025365, |
| "grad_norm": 5.65625, |
| "learning_rate": 1.9794776359761378e-05, |
| "loss": 0.32372350692749025, |
| "step": 870, |
| "token_acc": 0.8751831107281344 |
| }, |
| { |
| "epoch": 0.11209326159364591, |
| "grad_norm": 4.125, |
| "learning_rate": 1.9790484681802707e-05, |
| "loss": 0.3230480670928955, |
| "step": 875, |
| "token_acc": 0.8766306695464363 |
| }, |
| { |
| "epoch": 0.11273379451703817, |
| "grad_norm": 4.8125, |
| "learning_rate": 1.9786149067747163e-05, |
| "loss": 0.32105169296264646, |
| "step": 880, |
| "token_acc": 0.8791981030394481 |
| }, |
| { |
| "epoch": 0.11337432744043044, |
| "grad_norm": 5.0625, |
| "learning_rate": 1.9781769537051384e-05, |
| "loss": 0.3278522968292236, |
| "step": 885, |
| "token_acc": 0.8761810259286423 |
| }, |
| { |
| "epoch": 0.1140148603638227, |
| "grad_norm": 31.375, |
| "learning_rate": 1.9777346109369088e-05, |
| "loss": 0.3238049030303955, |
| "step": 890, |
| "token_acc": 0.8749892120479849 |
| }, |
| { |
| "epoch": 0.11465539328721497, |
| "grad_norm": 4.9375, |
| "learning_rate": 1.9772878804551e-05, |
| "loss": 0.33077249526977537, |
| "step": 895, |
| "token_acc": 0.8735443802294488 |
| }, |
| { |
| "epoch": 0.11529592621060722, |
| "grad_norm": 3.703125, |
| "learning_rate": 1.9768367642644742e-05, |
| "loss": 0.32166156768798826, |
| "step": 900, |
| "token_acc": 0.8777322698857513 |
| }, |
| { |
| "epoch": 0.11529592621060722, |
| "eval_loss": 0.34898844361305237, |
| "eval_runtime": 102.6281, |
| "eval_samples_per_second": 97.439, |
| "eval_steps_per_second": 12.18, |
| "eval_token_acc": 0.8700798954659461, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.11593645913399948, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.9763812643894743e-05, |
| "loss": 0.32224602699279786, |
| "step": 905, |
| "token_acc": 0.8758474759252062 |
| }, |
| { |
| "epoch": 0.11657699205739175, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.975921382874217e-05, |
| "loss": 0.32654175758361814, |
| "step": 910, |
| "token_acc": 0.8768330968047133 |
| }, |
| { |
| "epoch": 0.11721752498078401, |
| "grad_norm": 3.375, |
| "learning_rate": 1.9754571217824815e-05, |
| "loss": 0.3332622528076172, |
| "step": 915, |
| "token_acc": 0.8733866804336603 |
| }, |
| { |
| "epoch": 0.11785805790417628, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.974988483197701e-05, |
| "loss": 0.3266796112060547, |
| "step": 920, |
| "token_acc": 0.873924638678596 |
| }, |
| { |
| "epoch": 0.11849859082756854, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.9745154692229524e-05, |
| "loss": 0.3260995388031006, |
| "step": 925, |
| "token_acc": 0.8767960079153403 |
| }, |
| { |
| "epoch": 0.1191391237509608, |
| "grad_norm": 5.4375, |
| "learning_rate": 1.9740380819809498e-05, |
| "loss": 0.3234872817993164, |
| "step": 930, |
| "token_acc": 0.8770484733482836 |
| }, |
| { |
| "epoch": 0.11977965667435306, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.9735563236140307e-05, |
| "loss": 0.3268592119216919, |
| "step": 935, |
| "token_acc": 0.8750322747224374 |
| }, |
| { |
| "epoch": 0.12042018959774532, |
| "grad_norm": 5.71875, |
| "learning_rate": 1.9730701962841504e-05, |
| "loss": 0.3228474140167236, |
| "step": 940, |
| "token_acc": 0.8774657593246619 |
| }, |
| { |
| "epoch": 0.12106072252113759, |
| "grad_norm": 10.625, |
| "learning_rate": 1.9725797021728687e-05, |
| "loss": 0.32127084732055666, |
| "step": 945, |
| "token_acc": 0.8768540876164195 |
| }, |
| { |
| "epoch": 0.12170125544452985, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.9720848434813437e-05, |
| "loss": 0.3093282222747803, |
| "step": 950, |
| "token_acc": 0.8818652849740932 |
| }, |
| { |
| "epoch": 0.12234178836792212, |
| "grad_norm": 3.640625, |
| "learning_rate": 1.9715856224303193e-05, |
| "loss": 0.3240875244140625, |
| "step": 955, |
| "token_acc": 0.8766339869281046 |
| }, |
| { |
| "epoch": 0.12298232129131437, |
| "grad_norm": 10.125, |
| "learning_rate": 1.9710820412601156e-05, |
| "loss": 0.31369385719299314, |
| "step": 960, |
| "token_acc": 0.8786259212964959 |
| }, |
| { |
| "epoch": 0.12362285421470663, |
| "grad_norm": 4.5, |
| "learning_rate": 1.97057410223062e-05, |
| "loss": 0.3160251617431641, |
| "step": 965, |
| "token_acc": 0.8773067116124292 |
| }, |
| { |
| "epoch": 0.1242633871380989, |
| "grad_norm": 6.1875, |
| "learning_rate": 1.9700618076212767e-05, |
| "loss": 0.32041115760803224, |
| "step": 970, |
| "token_acc": 0.876734235207676 |
| }, |
| { |
| "epoch": 0.12490392006149116, |
| "grad_norm": 6.8125, |
| "learning_rate": 1.969545159731075e-05, |
| "loss": 0.3188473224639893, |
| "step": 975, |
| "token_acc": 0.8775827114696113 |
| }, |
| { |
| "epoch": 0.12554445298488343, |
| "grad_norm": 5.75, |
| "learning_rate": 1.9690241608785404e-05, |
| "loss": 0.31864352226257325, |
| "step": 980, |
| "token_acc": 0.8768779140044898 |
| }, |
| { |
| "epoch": 0.12618498590827568, |
| "grad_norm": 6.03125, |
| "learning_rate": 1.9684988134017254e-05, |
| "loss": 0.32373876571655275, |
| "step": 985, |
| "token_acc": 0.8762113968212948 |
| }, |
| { |
| "epoch": 0.12682551883166795, |
| "grad_norm": 7.625, |
| "learning_rate": 1.9679691196581957e-05, |
| "loss": 0.3241652727127075, |
| "step": 990, |
| "token_acc": 0.8755324183625177 |
| }, |
| { |
| "epoch": 0.1274660517550602, |
| "grad_norm": 5.0625, |
| "learning_rate": 1.9674350820250222e-05, |
| "loss": 0.31421942710876466, |
| "step": 995, |
| "token_acc": 0.8811829816672432 |
| }, |
| { |
| "epoch": 0.12810658467845248, |
| "grad_norm": 4.25, |
| "learning_rate": 1.9668967028987694e-05, |
| "loss": 0.3193212985992432, |
| "step": 1000, |
| "token_acc": 0.8778954619822612 |
| }, |
| { |
| "epoch": 0.12810658467845248, |
| "eval_loss": 0.34447741508483887, |
| "eval_runtime": 102.4624, |
| "eval_samples_per_second": 97.597, |
| "eval_steps_per_second": 12.2, |
| "eval_token_acc": 0.8711014279307462, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.12874711760184473, |
| "grad_norm": 8.5, |
| "learning_rate": 1.966353984695485e-05, |
| "loss": 0.3204664707183838, |
| "step": 1005, |
| "token_acc": 0.8778589649357948 |
| }, |
| { |
| "epoch": 0.12938765052523699, |
| "grad_norm": 3.140625, |
| "learning_rate": 1.965806929850689e-05, |
| "loss": 0.3205430030822754, |
| "step": 1010, |
| "token_acc": 0.877480437508106 |
| }, |
| { |
| "epoch": 0.13002818344862926, |
| "grad_norm": 5.15625, |
| "learning_rate": 1.9652555408193623e-05, |
| "loss": 0.31981477737426756, |
| "step": 1015, |
| "token_acc": 0.8765867722363269 |
| }, |
| { |
| "epoch": 0.13066871637202152, |
| "grad_norm": 5.5, |
| "learning_rate": 1.9646998200759366e-05, |
| "loss": 0.31712310314178466, |
| "step": 1020, |
| "token_acc": 0.8808243342081487 |
| }, |
| { |
| "epoch": 0.1313092492954138, |
| "grad_norm": 5.21875, |
| "learning_rate": 1.9641397701142818e-05, |
| "loss": 0.3185598850250244, |
| "step": 1025, |
| "token_acc": 0.8790852498703096 |
| }, |
| { |
| "epoch": 0.13194978221880604, |
| "grad_norm": 4.625, |
| "learning_rate": 1.9635753934476963e-05, |
| "loss": 0.31679530143737794, |
| "step": 1030, |
| "token_acc": 0.8782927355033412 |
| }, |
| { |
| "epoch": 0.13259031514219832, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.963006692608896e-05, |
| "loss": 0.3205821990966797, |
| "step": 1035, |
| "token_acc": 0.8770858420781008 |
| }, |
| { |
| "epoch": 0.13323084806559057, |
| "grad_norm": 2.640625, |
| "learning_rate": 1.9624336701500005e-05, |
| "loss": 0.3191715717315674, |
| "step": 1040, |
| "token_acc": 0.8761724464331813 |
| }, |
| { |
| "epoch": 0.13387138098898282, |
| "grad_norm": 5.78125, |
| "learning_rate": 1.9618563286425236e-05, |
| "loss": 0.3229659080505371, |
| "step": 1045, |
| "token_acc": 0.8782109398609852 |
| }, |
| { |
| "epoch": 0.1345119139123751, |
| "grad_norm": 3.796875, |
| "learning_rate": 1.9612746706773627e-05, |
| "loss": 0.3189516067504883, |
| "step": 1050, |
| "token_acc": 0.8770367809136881 |
| }, |
| { |
| "epoch": 0.13515244683576735, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.9606886988647846e-05, |
| "loss": 0.31815266609191895, |
| "step": 1055, |
| "token_acc": 0.8782769920662298 |
| }, |
| { |
| "epoch": 0.13579297975915963, |
| "grad_norm": 2.75, |
| "learning_rate": 1.9600984158344153e-05, |
| "loss": 0.3152862548828125, |
| "step": 1060, |
| "token_acc": 0.8782785291448818 |
| }, |
| { |
| "epoch": 0.13643351268255188, |
| "grad_norm": 8.625, |
| "learning_rate": 1.9595038242352283e-05, |
| "loss": 0.31676223278045657, |
| "step": 1065, |
| "token_acc": 0.8806634129486459 |
| }, |
| { |
| "epoch": 0.13707404560594413, |
| "grad_norm": 12.3125, |
| "learning_rate": 1.958904926735532e-05, |
| "loss": 0.31054699420928955, |
| "step": 1070, |
| "token_acc": 0.8809544356230583 |
| }, |
| { |
| "epoch": 0.1377145785293364, |
| "grad_norm": 7.09375, |
| "learning_rate": 1.958301726022958e-05, |
| "loss": 0.30883467197418213, |
| "step": 1075, |
| "token_acc": 0.8819924033149171 |
| }, |
| { |
| "epoch": 0.13835511145272866, |
| "grad_norm": 4.46875, |
| "learning_rate": 1.9576942248044505e-05, |
| "loss": 0.31630141735076905, |
| "step": 1080, |
| "token_acc": 0.8776055124892335 |
| }, |
| { |
| "epoch": 0.13899564437612094, |
| "grad_norm": 4.65625, |
| "learning_rate": 1.95708242580625e-05, |
| "loss": 0.30964021682739257, |
| "step": 1085, |
| "token_acc": 0.8792420327304048 |
| }, |
| { |
| "epoch": 0.1396361772995132, |
| "grad_norm": 4.65625, |
| "learning_rate": 1.956466331773887e-05, |
| "loss": 0.3171691417694092, |
| "step": 1090, |
| "token_acc": 0.8775633293124246 |
| }, |
| { |
| "epoch": 0.14027671022290547, |
| "grad_norm": 3.5625, |
| "learning_rate": 1.9558459454721642e-05, |
| "loss": 0.31899094581604004, |
| "step": 1095, |
| "token_acc": 0.878079188341 |
| }, |
| { |
| "epoch": 0.14091724314629772, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.955221269685148e-05, |
| "loss": 0.31024134159088135, |
| "step": 1100, |
| "token_acc": 0.8816972001382648 |
| }, |
| { |
| "epoch": 0.14091724314629772, |
| "eval_loss": 0.3438940942287445, |
| "eval_runtime": 103.1967, |
| "eval_samples_per_second": 96.902, |
| "eval_steps_per_second": 12.113, |
| "eval_token_acc": 0.8718599642325219, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.14155777606968997, |
| "grad_norm": 4.5, |
| "learning_rate": 1.9545923072161534e-05, |
| "loss": 0.31498963832855226, |
| "step": 1105, |
| "token_acc": 0.8790587219343696 |
| }, |
| { |
| "epoch": 0.14219830899308225, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.9539590608877326e-05, |
| "loss": 0.3086799144744873, |
| "step": 1110, |
| "token_acc": 0.8817727625118442 |
| }, |
| { |
| "epoch": 0.1428388419164745, |
| "grad_norm": 6.46875, |
| "learning_rate": 1.9533215335416623e-05, |
| "loss": 0.3052536487579346, |
| "step": 1115, |
| "token_acc": 0.8838970651519064 |
| }, |
| { |
| "epoch": 0.14347937483986678, |
| "grad_norm": 4.125, |
| "learning_rate": 1.9526797280389314e-05, |
| "loss": 0.3200625658035278, |
| "step": 1120, |
| "token_acc": 0.8772527377770113 |
| }, |
| { |
| "epoch": 0.14411990776325903, |
| "grad_norm": 3.5625, |
| "learning_rate": 1.952033647259727e-05, |
| "loss": 0.3077129364013672, |
| "step": 1125, |
| "token_acc": 0.8831737581039887 |
| }, |
| { |
| "epoch": 0.14476044068665128, |
| "grad_norm": 10.125, |
| "learning_rate": 1.951383294103422e-05, |
| "loss": 0.31737732887268066, |
| "step": 1130, |
| "token_acc": 0.8796427497960411 |
| }, |
| { |
| "epoch": 0.14540097361004356, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.9507286714885623e-05, |
| "loss": 0.31585164070129396, |
| "step": 1135, |
| "token_acc": 0.8788506342221072 |
| }, |
| { |
| "epoch": 0.1460415065334358, |
| "grad_norm": 3.34375, |
| "learning_rate": 1.9500697823528538e-05, |
| "loss": 0.32686147689819334, |
| "step": 1140, |
| "token_acc": 0.8751074806534824 |
| }, |
| { |
| "epoch": 0.1466820394568281, |
| "grad_norm": 4.9375, |
| "learning_rate": 1.9494066296531484e-05, |
| "loss": 0.3137520790100098, |
| "step": 1145, |
| "token_acc": 0.8789589348041539 |
| }, |
| { |
| "epoch": 0.14732257238022034, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.948739216365432e-05, |
| "loss": 0.30913615226745605, |
| "step": 1150, |
| "token_acc": 0.8794063079777366 |
| }, |
| { |
| "epoch": 0.14796310530361262, |
| "grad_norm": 3.5625, |
| "learning_rate": 1.9480675454848103e-05, |
| "loss": 0.3166754722595215, |
| "step": 1155, |
| "token_acc": 0.8766857684518936 |
| }, |
| { |
| "epoch": 0.14860363822700487, |
| "grad_norm": 3.46875, |
| "learning_rate": 1.947391620025495e-05, |
| "loss": 0.3093476057052612, |
| "step": 1160, |
| "token_acc": 0.8816090465708489 |
| }, |
| { |
| "epoch": 0.14924417115039712, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.9467114430207916e-05, |
| "loss": 0.30673789978027344, |
| "step": 1165, |
| "token_acc": 0.8819172300934499 |
| }, |
| { |
| "epoch": 0.1498847040737894, |
| "grad_norm": 4.78125, |
| "learning_rate": 1.9460270175230834e-05, |
| "loss": 0.314839768409729, |
| "step": 1170, |
| "token_acc": 0.8800895239734872 |
| }, |
| { |
| "epoch": 0.15052523699718165, |
| "grad_norm": 5.21875, |
| "learning_rate": 1.9453383466038218e-05, |
| "loss": 0.3102754592895508, |
| "step": 1175, |
| "token_acc": 0.8781979498664829 |
| }, |
| { |
| "epoch": 0.15116576992057393, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.944645433353508e-05, |
| "loss": 0.30159687995910645, |
| "step": 1180, |
| "token_acc": 0.8842528536838464 |
| }, |
| { |
| "epoch": 0.15180630284396618, |
| "grad_norm": 5.96875, |
| "learning_rate": 1.9439482808816823e-05, |
| "loss": 0.31150016784667967, |
| "step": 1185, |
| "token_acc": 0.8806630308755958 |
| }, |
| { |
| "epoch": 0.15244683576735843, |
| "grad_norm": 2.828125, |
| "learning_rate": 1.9432468923169086e-05, |
| "loss": 0.3075159311294556, |
| "step": 1190, |
| "token_acc": 0.8814693313765269 |
| }, |
| { |
| "epoch": 0.1530873686907507, |
| "grad_norm": 6.21875, |
| "learning_rate": 1.9425412708067612e-05, |
| "loss": 0.3062115669250488, |
| "step": 1195, |
| "token_acc": 0.8820224719101124 |
| }, |
| { |
| "epoch": 0.15372790161414296, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.94183141951781e-05, |
| "loss": 0.305180287361145, |
| "step": 1200, |
| "token_acc": 0.8842530282637954 |
| }, |
| { |
| "epoch": 0.15372790161414296, |
| "eval_loss": 0.3408718407154083, |
| "eval_runtime": 103.7033, |
| "eval_samples_per_second": 96.429, |
| "eval_steps_per_second": 12.054, |
| "eval_token_acc": 0.8721810963894779, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.15436843453753524, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.9411173416356065e-05, |
| "loss": 0.30832886695861816, |
| "step": 1205, |
| "token_acc": 0.8802281368821293 |
| }, |
| { |
| "epoch": 0.1550089674609275, |
| "grad_norm": 5.15625, |
| "learning_rate": 1.9403990403646702e-05, |
| "loss": 0.3051230192184448, |
| "step": 1210, |
| "token_acc": 0.8818798910458732 |
| }, |
| { |
| "epoch": 0.15564950038431977, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.9396765189284726e-05, |
| "loss": 0.3141745090484619, |
| "step": 1215, |
| "token_acc": 0.8782784418264683 |
| }, |
| { |
| "epoch": 0.15629003330771202, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.938949780569425e-05, |
| "loss": 0.3076632976531982, |
| "step": 1220, |
| "token_acc": 0.8809513532179393 |
| }, |
| { |
| "epoch": 0.15693056623110427, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.9382188285488612e-05, |
| "loss": 0.30438895225524903, |
| "step": 1225, |
| "token_acc": 0.8833096682586807 |
| }, |
| { |
| "epoch": 0.15757109915449655, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.9374836661470263e-05, |
| "loss": 0.30989761352539064, |
| "step": 1230, |
| "token_acc": 0.8809441615603694 |
| }, |
| { |
| "epoch": 0.1582116320778888, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.9367442966630583e-05, |
| "loss": 0.3067667484283447, |
| "step": 1235, |
| "token_acc": 0.8825937096079276 |
| }, |
| { |
| "epoch": 0.15885216500128108, |
| "grad_norm": 7.46875, |
| "learning_rate": 1.9360007234149756e-05, |
| "loss": 0.29884748458862304, |
| "step": 1240, |
| "token_acc": 0.8846866250269223 |
| }, |
| { |
| "epoch": 0.15949269792467333, |
| "grad_norm": 6.75, |
| "learning_rate": 1.9352529497396623e-05, |
| "loss": 0.3064408779144287, |
| "step": 1245, |
| "token_acc": 0.8819226300615345 |
| }, |
| { |
| "epoch": 0.16013323084806558, |
| "grad_norm": 8.625, |
| "learning_rate": 1.9345009789928507e-05, |
| "loss": 0.3079418182373047, |
| "step": 1250, |
| "token_acc": 0.8822084303077321 |
| }, |
| { |
| "epoch": 0.16077376377145786, |
| "grad_norm": 6.25, |
| "learning_rate": 1.9337448145491106e-05, |
| "loss": 0.3048593044281006, |
| "step": 1255, |
| "token_acc": 0.8844390623648474 |
| }, |
| { |
| "epoch": 0.1614142966948501, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.9329844598018288e-05, |
| "loss": 0.31249561309814455, |
| "step": 1260, |
| "token_acc": 0.8813588549749957 |
| }, |
| { |
| "epoch": 0.16205482961824239, |
| "grad_norm": 10.125, |
| "learning_rate": 1.9322199181631985e-05, |
| "loss": 0.30511524677276614, |
| "step": 1265, |
| "token_acc": 0.8825485961123111 |
| }, |
| { |
| "epoch": 0.16269536254163464, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.9314511930642017e-05, |
| "loss": 0.30724005699157714, |
| "step": 1270, |
| "token_acc": 0.8831095955453878 |
| }, |
| { |
| "epoch": 0.16333589546502691, |
| "grad_norm": 14.375, |
| "learning_rate": 1.930678287954594e-05, |
| "loss": 0.30521693229675295, |
| "step": 1275, |
| "token_acc": 0.8835102252135646 |
| }, |
| { |
| "epoch": 0.16397642838841917, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.9299012063028893e-05, |
| "loss": 0.2963773250579834, |
| "step": 1280, |
| "token_acc": 0.8844027981690993 |
| }, |
| { |
| "epoch": 0.16461696131181142, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.9291199515963445e-05, |
| "loss": 0.30706090927124025, |
| "step": 1285, |
| "token_acc": 0.8825376344086021 |
| }, |
| { |
| "epoch": 0.1652574942352037, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.9283345273409434e-05, |
| "loss": 0.2986742496490479, |
| "step": 1290, |
| "token_acc": 0.8834094237229736 |
| }, |
| { |
| "epoch": 0.16589802715859595, |
| "grad_norm": 26.875, |
| "learning_rate": 1.927544937061382e-05, |
| "loss": 0.30177807807922363, |
| "step": 1295, |
| "token_acc": 0.8840617188173433 |
| }, |
| { |
| "epoch": 0.16653856008198822, |
| "grad_norm": 5.25, |
| "learning_rate": 1.9267511843010508e-05, |
| "loss": 0.3020944356918335, |
| "step": 1300, |
| "token_acc": 0.8823529411764706 |
| }, |
| { |
| "epoch": 0.16653856008198822, |
| "eval_loss": 0.34260889887809753, |
| "eval_runtime": 102.5088, |
| "eval_samples_per_second": 97.553, |
| "eval_steps_per_second": 12.194, |
| "eval_token_acc": 0.8718101333805803, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.16717909300538047, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.925953272622021e-05, |
| "loss": 0.30364112854003905, |
| "step": 1305, |
| "token_acc": 0.8823174931129476 |
| }, |
| { |
| "epoch": 0.16781962592877273, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.9251512056050257e-05, |
| "loss": 0.3062715768814087, |
| "step": 1310, |
| "token_acc": 0.8822084303077321 |
| }, |
| { |
| "epoch": 0.168460158852165, |
| "grad_norm": 6.09375, |
| "learning_rate": 1.9243449868494482e-05, |
| "loss": 0.3047629356384277, |
| "step": 1315, |
| "token_acc": 0.8817324665260258 |
| }, |
| { |
| "epoch": 0.16910069177555725, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.9235346199733013e-05, |
| "loss": 0.30484819412231445, |
| "step": 1320, |
| "token_acc": 0.8817362039953403 |
| }, |
| { |
| "epoch": 0.16974122469894953, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.9227201086132138e-05, |
| "loss": 0.29434518814086913, |
| "step": 1325, |
| "token_acc": 0.8883073063113726 |
| }, |
| { |
| "epoch": 0.17038175762234178, |
| "grad_norm": 3.25, |
| "learning_rate": 1.9219014564244135e-05, |
| "loss": 0.30238900184631345, |
| "step": 1330, |
| "token_acc": 0.8828684914946896 |
| }, |
| { |
| "epoch": 0.17102229054573406, |
| "grad_norm": 5.9375, |
| "learning_rate": 1.9210786670807103e-05, |
| "loss": 0.30103113651275637, |
| "step": 1335, |
| "token_acc": 0.8840491860062348 |
| }, |
| { |
| "epoch": 0.1716628234691263, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.9202517442744804e-05, |
| "loss": 0.3020737409591675, |
| "step": 1340, |
| "token_acc": 0.8843631342768381 |
| }, |
| { |
| "epoch": 0.17230335639251856, |
| "grad_norm": 3.828125, |
| "learning_rate": 1.9194206917166496e-05, |
| "loss": 0.30103378295898436, |
| "step": 1345, |
| "token_acc": 0.8852183650615901 |
| }, |
| { |
| "epoch": 0.17294388931591084, |
| "grad_norm": 13.9375, |
| "learning_rate": 1.9185855131366762e-05, |
| "loss": 0.3041229248046875, |
| "step": 1350, |
| "token_acc": 0.884064226519337 |
| }, |
| { |
| "epoch": 0.1735844222393031, |
| "grad_norm": 20.25, |
| "learning_rate": 1.9177462122825344e-05, |
| "loss": 0.308376407623291, |
| "step": 1355, |
| "token_acc": 0.8811586706323549 |
| }, |
| { |
| "epoch": 0.17422495516269537, |
| "grad_norm": 3.390625, |
| "learning_rate": 1.9169027929206987e-05, |
| "loss": 0.3022352695465088, |
| "step": 1360, |
| "token_acc": 0.8822768434670116 |
| }, |
| { |
| "epoch": 0.17486548808608762, |
| "grad_norm": 6.5625, |
| "learning_rate": 1.916055258836125e-05, |
| "loss": 0.3043084621429443, |
| "step": 1365, |
| "token_acc": 0.8829251495717299 |
| }, |
| { |
| "epoch": 0.1755060210094799, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.9152036138322345e-05, |
| "loss": 0.30508239269256593, |
| "step": 1370, |
| "token_acc": 0.882540092007395 |
| }, |
| { |
| "epoch": 0.17614655393287215, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.9143478617308966e-05, |
| "loss": 0.3004749059677124, |
| "step": 1375, |
| "token_acc": 0.8839621418384546 |
| }, |
| { |
| "epoch": 0.1767870868562644, |
| "grad_norm": 6.125, |
| "learning_rate": 1.913488006372413e-05, |
| "loss": 0.3041959762573242, |
| "step": 1380, |
| "token_acc": 0.8817213611568101 |
| }, |
| { |
| "epoch": 0.17742761977965668, |
| "grad_norm": 11.625, |
| "learning_rate": 1.912624051615498e-05, |
| "loss": 0.3068222522735596, |
| "step": 1385, |
| "token_acc": 0.8823149463893554 |
| }, |
| { |
| "epoch": 0.17806815270304893, |
| "grad_norm": 6.84375, |
| "learning_rate": 1.9117560013372633e-05, |
| "loss": 0.29890620708465576, |
| "step": 1390, |
| "token_acc": 0.885037126575721 |
| }, |
| { |
| "epoch": 0.1787086856264412, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.9108838594331997e-05, |
| "loss": 0.308072566986084, |
| "step": 1395, |
| "token_acc": 0.8794112583921501 |
| }, |
| { |
| "epoch": 0.17934921854983346, |
| "grad_norm": 6.3125, |
| "learning_rate": 1.9100076298171587e-05, |
| "loss": 0.29462456703186035, |
| "step": 1400, |
| "token_acc": 0.885492563052382 |
| }, |
| { |
| "epoch": 0.17934921854983346, |
| "eval_loss": 0.3468918800354004, |
| "eval_runtime": 107.9248, |
| "eval_samples_per_second": 92.657, |
| "eval_steps_per_second": 11.582, |
| "eval_token_acc": 0.8731887869509609, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.1799897514732257, |
| "grad_norm": 7.59375, |
| "learning_rate": 1.9091273164213374e-05, |
| "loss": 0.29882164001464845, |
| "step": 1405, |
| "token_acc": 0.8843217597584645 |
| }, |
| { |
| "epoch": 0.180630284396618, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.9082429231962586e-05, |
| "loss": 0.29425759315490724, |
| "step": 1410, |
| "token_acc": 0.8862321968062149 |
| }, |
| { |
| "epoch": 0.18127081732001024, |
| "grad_norm": 6.0625, |
| "learning_rate": 1.9073544541107544e-05, |
| "loss": 0.2935910701751709, |
| "step": 1415, |
| "token_acc": 0.8873945945945946 |
| }, |
| { |
| "epoch": 0.18191135024340252, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.906461913151947e-05, |
| "loss": 0.29699931144714353, |
| "step": 1420, |
| "token_acc": 0.8851322249978465 |
| }, |
| { |
| "epoch": 0.18255188316679477, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.9055653043252324e-05, |
| "loss": 0.2873663902282715, |
| "step": 1425, |
| "token_acc": 0.8880069025021571 |
| }, |
| { |
| "epoch": 0.18319241609018705, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.9046646316542613e-05, |
| "loss": 0.3060638904571533, |
| "step": 1430, |
| "token_acc": 0.8829251495717299 |
| }, |
| { |
| "epoch": 0.1838329490135793, |
| "grad_norm": 107.5, |
| "learning_rate": 1.9037598991809225e-05, |
| "loss": 0.3029170513153076, |
| "step": 1435, |
| "token_acc": 0.8842598563996732 |
| }, |
| { |
| "epoch": 0.18447348193697155, |
| "grad_norm": 3.984375, |
| "learning_rate": 1.9028511109653212e-05, |
| "loss": 0.29811413288116456, |
| "step": 1440, |
| "token_acc": 0.884185544768069 |
| }, |
| { |
| "epoch": 0.18511401486036383, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.9019382710857663e-05, |
| "loss": 0.291642951965332, |
| "step": 1445, |
| "token_acc": 0.888984918542846 |
| }, |
| { |
| "epoch": 0.18575454778375608, |
| "grad_norm": 5.84375, |
| "learning_rate": 1.901021383638747e-05, |
| "loss": 0.29584593772888185, |
| "step": 1450, |
| "token_acc": 0.8839431769263882 |
| }, |
| { |
| "epoch": 0.18639508070714836, |
| "grad_norm": 5.125, |
| "learning_rate": 1.900100452738917e-05, |
| "loss": 0.29843716621398925, |
| "step": 1455, |
| "token_acc": 0.8849805783340526 |
| }, |
| { |
| "epoch": 0.1870356136305406, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.899175482519077e-05, |
| "loss": 0.3069281578063965, |
| "step": 1460, |
| "token_acc": 0.8841452917886039 |
| }, |
| { |
| "epoch": 0.18767614655393286, |
| "grad_norm": 3.875, |
| "learning_rate": 1.898246477130152e-05, |
| "loss": 0.304925274848938, |
| "step": 1465, |
| "token_acc": 0.8836258819480296 |
| }, |
| { |
| "epoch": 0.18831667947732514, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.8973134407411768e-05, |
| "loss": 0.29193341732025146, |
| "step": 1470, |
| "token_acc": 0.8880477570619025 |
| }, |
| { |
| "epoch": 0.1889572124007174, |
| "grad_norm": 3.84375, |
| "learning_rate": 1.8963763775392766e-05, |
| "loss": 0.2908176898956299, |
| "step": 1475, |
| "token_acc": 0.8886537381764782 |
| }, |
| { |
| "epoch": 0.18959774532410967, |
| "grad_norm": 5.75, |
| "learning_rate": 1.895435291729646e-05, |
| "loss": 0.2873049259185791, |
| "step": 1480, |
| "token_acc": 0.8900970369086814 |
| }, |
| { |
| "epoch": 0.19023827824750192, |
| "grad_norm": 13.0625, |
| "learning_rate": 1.8944901875355325e-05, |
| "loss": 0.29516000747680665, |
| "step": 1485, |
| "token_acc": 0.8862587849782262 |
| }, |
| { |
| "epoch": 0.1908788111708942, |
| "grad_norm": 4.71875, |
| "learning_rate": 1.8935410691982163e-05, |
| "loss": 0.2935316562652588, |
| "step": 1490, |
| "token_acc": 0.8873926367128491 |
| }, |
| { |
| "epoch": 0.19151934409428645, |
| "grad_norm": 4.625, |
| "learning_rate": 1.8925879409769915e-05, |
| "loss": 0.293272590637207, |
| "step": 1495, |
| "token_acc": 0.8866402002243894 |
| }, |
| { |
| "epoch": 0.1921598770176787, |
| "grad_norm": 7.3125, |
| "learning_rate": 1.8916308071491474e-05, |
| "loss": 0.28766617774963377, |
| "step": 1500, |
| "token_acc": 0.8866454279318065 |
| }, |
| { |
| "epoch": 0.1921598770176787, |
| "eval_loss": 0.33948463201522827, |
| "eval_runtime": 105.2784, |
| "eval_samples_per_second": 94.986, |
| "eval_steps_per_second": 11.873, |
| "eval_token_acc": 0.8743791906362293, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.19280040994107098, |
| "grad_norm": 6.8125, |
| "learning_rate": 1.8906696720099492e-05, |
| "loss": 0.2923029899597168, |
| "step": 1505, |
| "token_acc": 0.8879117089153302 |
| }, |
| { |
| "epoch": 0.19344094286446323, |
| "grad_norm": 3.578125, |
| "learning_rate": 1.8897045398726176e-05, |
| "loss": 0.29394724369049074, |
| "step": 1510, |
| "token_acc": 0.8863872082973207 |
| }, |
| { |
| "epoch": 0.1940814757878555, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.8887354150683108e-05, |
| "loss": 0.2944790840148926, |
| "step": 1515, |
| "token_acc": 0.8862324459377962 |
| }, |
| { |
| "epoch": 0.19472200871124776, |
| "grad_norm": 3.296875, |
| "learning_rate": 1.8877623019461053e-05, |
| "loss": 0.292703914642334, |
| "step": 1520, |
| "token_acc": 0.8874542715730579 |
| }, |
| { |
| "epoch": 0.19536254163464, |
| "grad_norm": 5.34375, |
| "learning_rate": 1.886785204872975e-05, |
| "loss": 0.28728461265563965, |
| "step": 1525, |
| "token_acc": 0.8864913949667041 |
| }, |
| { |
| "epoch": 0.1960030745580323, |
| "grad_norm": 11.875, |
| "learning_rate": 1.885804128233772e-05, |
| "loss": 0.29347355365753175, |
| "step": 1530, |
| "token_acc": 0.8853349426674714 |
| }, |
| { |
| "epoch": 0.19664360748142454, |
| "grad_norm": 38.0, |
| "learning_rate": 1.884819076431208e-05, |
| "loss": 0.2958747148513794, |
| "step": 1535, |
| "token_acc": 0.88622210690192 |
| }, |
| { |
| "epoch": 0.19728414040481682, |
| "grad_norm": 4.1875, |
| "learning_rate": 1.8838300538858338e-05, |
| "loss": 0.29049015045166016, |
| "step": 1540, |
| "token_acc": 0.887807818150508 |
| }, |
| { |
| "epoch": 0.19792467332820907, |
| "grad_norm": 5.125, |
| "learning_rate": 1.8828370650360183e-05, |
| "loss": 0.29225118160247804, |
| "step": 1545, |
| "token_acc": 0.8890085278663106 |
| }, |
| { |
| "epoch": 0.19856520625160134, |
| "grad_norm": 5.71875, |
| "learning_rate": 1.8818401143379312e-05, |
| "loss": 0.2903005599975586, |
| "step": 1550, |
| "token_acc": 0.8862319467266281 |
| }, |
| { |
| "epoch": 0.1992057391749936, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.8808392062655206e-05, |
| "loss": 0.2934314966201782, |
| "step": 1555, |
| "token_acc": 0.8883467898622684 |
| }, |
| { |
| "epoch": 0.19984627209838585, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.8798343453104937e-05, |
| "loss": 0.2941242218017578, |
| "step": 1560, |
| "token_acc": 0.887303361127826 |
| }, |
| { |
| "epoch": 0.20048680502177813, |
| "grad_norm": 7.1875, |
| "learning_rate": 1.8788255359822975e-05, |
| "loss": 0.30154945850372317, |
| "step": 1565, |
| "token_acc": 0.8849016823716708 |
| }, |
| { |
| "epoch": 0.20112733794517038, |
| "grad_norm": 5.1875, |
| "learning_rate": 1.8778127828080978e-05, |
| "loss": 0.3002612590789795, |
| "step": 1570, |
| "token_acc": 0.8851415297255442 |
| }, |
| { |
| "epoch": 0.20176787086856265, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.8767960903327575e-05, |
| "loss": 0.28886966705322265, |
| "step": 1575, |
| "token_acc": 0.8898769695661558 |
| }, |
| { |
| "epoch": 0.2024084037919549, |
| "grad_norm": 4.125, |
| "learning_rate": 1.87577546311882e-05, |
| "loss": 0.29037837982177733, |
| "step": 1580, |
| "token_acc": 0.8892447522181346 |
| }, |
| { |
| "epoch": 0.20304893671534716, |
| "grad_norm": 3.8125, |
| "learning_rate": 1.8747509057464844e-05, |
| "loss": 0.2931870937347412, |
| "step": 1585, |
| "token_acc": 0.8877136936625799 |
| }, |
| { |
| "epoch": 0.20368946963873943, |
| "grad_norm": 6.96875, |
| "learning_rate": 1.8737224228135883e-05, |
| "loss": 0.29495954513549805, |
| "step": 1590, |
| "token_acc": 0.8842078167231542 |
| }, |
| { |
| "epoch": 0.20433000256213169, |
| "grad_norm": 3.5625, |
| "learning_rate": 1.872690018935584e-05, |
| "loss": 0.30391521453857423, |
| "step": 1595, |
| "token_acc": 0.882081572820557 |
| }, |
| { |
| "epoch": 0.20497053548552396, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.8716536987455216e-05, |
| "loss": 0.292464280128479, |
| "step": 1600, |
| "token_acc": 0.8882241542566928 |
| }, |
| { |
| "epoch": 0.20497053548552396, |
| "eval_loss": 0.3390945494174957, |
| "eval_runtime": 103.2443, |
| "eval_samples_per_second": 96.858, |
| "eval_steps_per_second": 12.107, |
| "eval_token_acc": 0.8747529220257902, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.20561106840891621, |
| "grad_norm": 4.25, |
| "learning_rate": 1.870613466894025e-05, |
| "loss": 0.29375975131988524, |
| "step": 1605, |
| "token_acc": 0.8863264779278892 |
| }, |
| { |
| "epoch": 0.2062516013323085, |
| "grad_norm": 6.8125, |
| "learning_rate": 1.8695693280492723e-05, |
| "loss": 0.29436321258544923, |
| "step": 1610, |
| "token_acc": 0.8866014901589215 |
| }, |
| { |
| "epoch": 0.20689213425570074, |
| "grad_norm": 3.875, |
| "learning_rate": 1.8685212868969747e-05, |
| "loss": 0.2911177635192871, |
| "step": 1615, |
| "token_acc": 0.8861411643482741 |
| }, |
| { |
| "epoch": 0.207532667179093, |
| "grad_norm": 3.125, |
| "learning_rate": 1.867469348140356e-05, |
| "loss": 0.2982187747955322, |
| "step": 1620, |
| "token_acc": 0.8837629642380685 |
| }, |
| { |
| "epoch": 0.20817320010248527, |
| "grad_norm": 3.25, |
| "learning_rate": 1.8664135165001305e-05, |
| "loss": 0.28237018585205076, |
| "step": 1625, |
| "token_acc": 0.8903340102838871 |
| }, |
| { |
| "epoch": 0.20881373302587752, |
| "grad_norm": 4.65625, |
| "learning_rate": 1.865353796714483e-05, |
| "loss": 0.2914335012435913, |
| "step": 1630, |
| "token_acc": 0.8859261186264308 |
| }, |
| { |
| "epoch": 0.2094542659492698, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.8642901935390457e-05, |
| "loss": 0.2944057464599609, |
| "step": 1635, |
| "token_acc": 0.8854714864981451 |
| }, |
| { |
| "epoch": 0.21009479887266205, |
| "grad_norm": 7.78125, |
| "learning_rate": 1.8632227117468794e-05, |
| "loss": 0.2919133186340332, |
| "step": 1640, |
| "token_acc": 0.8885867219200553 |
| }, |
| { |
| "epoch": 0.2107353317960543, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.86215135612845e-05, |
| "loss": 0.29367570877075194, |
| "step": 1645, |
| "token_acc": 0.8866028091576822 |
| }, |
| { |
| "epoch": 0.21137586471944658, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.8610761314916067e-05, |
| "loss": 0.29374768733978274, |
| "step": 1650, |
| "token_acc": 0.8863107047356164 |
| }, |
| { |
| "epoch": 0.21201639764283883, |
| "grad_norm": 4.4375, |
| "learning_rate": 1.859997042661564e-05, |
| "loss": 0.286625599861145, |
| "step": 1655, |
| "token_acc": 0.8884670147128619 |
| }, |
| { |
| "epoch": 0.2126569305662311, |
| "grad_norm": 2.703125, |
| "learning_rate": 1.858914094480875e-05, |
| "loss": 0.2876077651977539, |
| "step": 1660, |
| "token_acc": 0.8886684208256049 |
| }, |
| { |
| "epoch": 0.21329746348962336, |
| "grad_norm": 5.5625, |
| "learning_rate": 1.8578272918094134e-05, |
| "loss": 0.2962442398071289, |
| "step": 1665, |
| "token_acc": 0.885100138121547 |
| }, |
| { |
| "epoch": 0.21393799641301564, |
| "grad_norm": 10.125, |
| "learning_rate": 1.85673663952435e-05, |
| "loss": 0.2934115886688232, |
| "step": 1670, |
| "token_acc": 0.8867126833477136 |
| }, |
| { |
| "epoch": 0.2145785293364079, |
| "grad_norm": 18.25, |
| "learning_rate": 1.855642142520132e-05, |
| "loss": 0.2942723274230957, |
| "step": 1675, |
| "token_acc": 0.8857585939519256 |
| }, |
| { |
| "epoch": 0.21521906225980014, |
| "grad_norm": 4.78125, |
| "learning_rate": 1.8545438057084587e-05, |
| "loss": 0.29166316986083984, |
| "step": 1680, |
| "token_acc": 0.8873196855834845 |
| }, |
| { |
| "epoch": 0.21585959518319242, |
| "grad_norm": 5.5625, |
| "learning_rate": 1.8534416340182625e-05, |
| "loss": 0.29405913352966306, |
| "step": 1685, |
| "token_acc": 0.8858929269245799 |
| }, |
| { |
| "epoch": 0.21650012810658467, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.852335632395685e-05, |
| "loss": 0.2922976493835449, |
| "step": 1690, |
| "token_acc": 0.8865123084868308 |
| }, |
| { |
| "epoch": 0.21714066102997695, |
| "grad_norm": 3.765625, |
| "learning_rate": 1.851225805804055e-05, |
| "loss": 0.2907034158706665, |
| "step": 1695, |
| "token_acc": 0.8852600656644203 |
| }, |
| { |
| "epoch": 0.2177811939533692, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.850112159223866e-05, |
| "loss": 0.29575324058532715, |
| "step": 1700, |
| "token_acc": 0.8855988654432937 |
| }, |
| { |
| "epoch": 0.2177811939533692, |
| "eval_loss": 0.3360104560852051, |
| "eval_runtime": 105.7607, |
| "eval_samples_per_second": 94.553, |
| "eval_steps_per_second": 11.819, |
| "eval_token_acc": 0.8750740541827463, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.21842172687676145, |
| "grad_norm": 4.46875, |
| "learning_rate": 1.848994697652755e-05, |
| "loss": 0.2958073139190674, |
| "step": 1705, |
| "token_acc": 0.884866163349348 |
| }, |
| { |
| "epoch": 0.21906225980015373, |
| "grad_norm": 6.5, |
| "learning_rate": 1.8478734261054785e-05, |
| "loss": 0.29183714389801024, |
| "step": 1710, |
| "token_acc": 0.8865205384880911 |
| }, |
| { |
| "epoch": 0.21970279272354598, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.8467483496138913e-05, |
| "loss": 0.29586522579193114, |
| "step": 1715, |
| "token_acc": 0.8874040865591861 |
| }, |
| { |
| "epoch": 0.22034332564693826, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.8456194732269227e-05, |
| "loss": 0.298976993560791, |
| "step": 1720, |
| "token_acc": 0.8849873210985516 |
| }, |
| { |
| "epoch": 0.2209838585703305, |
| "grad_norm": 5.25, |
| "learning_rate": 1.8444868020105556e-05, |
| "loss": 0.28900148868560793, |
| "step": 1725, |
| "token_acc": 0.8885448916408669 |
| }, |
| { |
| "epoch": 0.2216243914937228, |
| "grad_norm": 100.5, |
| "learning_rate": 1.8433503410478018e-05, |
| "loss": 0.2942624092102051, |
| "step": 1730, |
| "token_acc": 0.8865779615036817 |
| }, |
| { |
| "epoch": 0.22226492441711504, |
| "grad_norm": 3.40625, |
| "learning_rate": 1.8422100954386805e-05, |
| "loss": 0.2904630184173584, |
| "step": 1735, |
| "token_acc": 0.8851269649334945 |
| }, |
| { |
| "epoch": 0.2229054573405073, |
| "grad_norm": 3.75, |
| "learning_rate": 1.841066070300195e-05, |
| "loss": 0.2874864101409912, |
| "step": 1740, |
| "token_acc": 0.8891145585756882 |
| }, |
| { |
| "epoch": 0.22354599026389957, |
| "grad_norm": 5.75, |
| "learning_rate": 1.8399182707663097e-05, |
| "loss": 0.28712892532348633, |
| "step": 1745, |
| "token_acc": 0.8877060885369337 |
| }, |
| { |
| "epoch": 0.22418652318729182, |
| "grad_norm": 4.71875, |
| "learning_rate": 1.8387667019879267e-05, |
| "loss": 0.29011356830596924, |
| "step": 1750, |
| "token_acc": 0.8868864532339817 |
| }, |
| { |
| "epoch": 0.2248270561106841, |
| "grad_norm": 4.0, |
| "learning_rate": 1.8376113691328638e-05, |
| "loss": 0.2822575569152832, |
| "step": 1755, |
| "token_acc": 0.8909889352908253 |
| }, |
| { |
| "epoch": 0.22546758903407635, |
| "grad_norm": 2.828125, |
| "learning_rate": 1.83645227738583e-05, |
| "loss": 0.28959126472473146, |
| "step": 1760, |
| "token_acc": 0.8877797943133696 |
| }, |
| { |
| "epoch": 0.22610812195746863, |
| "grad_norm": 6.1875, |
| "learning_rate": 1.8352894319484028e-05, |
| "loss": 0.29406278133392333, |
| "step": 1765, |
| "token_acc": 0.8860137145814465 |
| }, |
| { |
| "epoch": 0.22674865488086088, |
| "grad_norm": 5.1875, |
| "learning_rate": 1.834122838039006e-05, |
| "loss": 0.293654203414917, |
| "step": 1770, |
| "token_acc": 0.8864692718195903 |
| }, |
| { |
| "epoch": 0.22738918780425313, |
| "grad_norm": 6.0, |
| "learning_rate": 1.8329525008928835e-05, |
| "loss": 0.28885598182678224, |
| "step": 1775, |
| "token_acc": 0.8875652811946998 |
| }, |
| { |
| "epoch": 0.2280297207276454, |
| "grad_norm": 3.140625, |
| "learning_rate": 1.8317784257620784e-05, |
| "loss": 0.286731481552124, |
| "step": 1780, |
| "token_acc": 0.8878766945859597 |
| }, |
| { |
| "epoch": 0.22867025365103766, |
| "grad_norm": 2.875, |
| "learning_rate": 1.830600617915409e-05, |
| "loss": 0.2842278003692627, |
| "step": 1785, |
| "token_acc": 0.8887885045603167 |
| }, |
| { |
| "epoch": 0.22931078657442994, |
| "grad_norm": 5.53125, |
| "learning_rate": 1.829419082638443e-05, |
| "loss": 0.2927645206451416, |
| "step": 1790, |
| "token_acc": 0.8868731372294933 |
| }, |
| { |
| "epoch": 0.2299513194978222, |
| "grad_norm": 2.453125, |
| "learning_rate": 1.828233825233477e-05, |
| "loss": 0.28806371688842775, |
| "step": 1795, |
| "token_acc": 0.8892287806979751 |
| }, |
| { |
| "epoch": 0.23059185242121444, |
| "grad_norm": 7.71875, |
| "learning_rate": 1.827044851019511e-05, |
| "loss": 0.27867727279663085, |
| "step": 1800, |
| "token_acc": 0.8907344926031664 |
| }, |
| { |
| "epoch": 0.23059185242121444, |
| "eval_loss": 0.3403998911380768, |
| "eval_runtime": 102.8463, |
| "eval_samples_per_second": 97.232, |
| "eval_steps_per_second": 12.154, |
| "eval_token_acc": 0.8755059215662391, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.23123238534460672, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.8258521653322234e-05, |
| "loss": 0.29278562068939207, |
| "step": 1805, |
| "token_acc": 0.886281276962899 |
| }, |
| { |
| "epoch": 0.23187291826799897, |
| "grad_norm": 13.375, |
| "learning_rate": 1.8246557735239497e-05, |
| "loss": 0.28790295124053955, |
| "step": 1810, |
| "token_acc": 0.889273356401384 |
| }, |
| { |
| "epoch": 0.23251345119139125, |
| "grad_norm": 19.125, |
| "learning_rate": 1.8234556809636567e-05, |
| "loss": 0.2872922897338867, |
| "step": 1815, |
| "token_acc": 0.890285369947919 |
| }, |
| { |
| "epoch": 0.2331539841147835, |
| "grad_norm": 8.375, |
| "learning_rate": 1.8222518930369188e-05, |
| "loss": 0.29638094902038575, |
| "step": 1820, |
| "token_acc": 0.8846751229614289 |
| }, |
| { |
| "epoch": 0.23379451703817578, |
| "grad_norm": 6.46875, |
| "learning_rate": 1.8210444151458935e-05, |
| "loss": 0.2879481792449951, |
| "step": 1825, |
| "token_acc": 0.8891678933240973 |
| }, |
| { |
| "epoch": 0.23443504996156803, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.819833252709298e-05, |
| "loss": 0.2846549034118652, |
| "step": 1830, |
| "token_acc": 0.8897593732512591 |
| }, |
| { |
| "epoch": 0.23507558288496028, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.818618411162384e-05, |
| "loss": 0.2873443841934204, |
| "step": 1835, |
| "token_acc": 0.8899616395845007 |
| }, |
| { |
| "epoch": 0.23571611580835256, |
| "grad_norm": 6.125, |
| "learning_rate": 1.817399895956914e-05, |
| "loss": 0.2880409240722656, |
| "step": 1840, |
| "token_acc": 0.889992689115383 |
| }, |
| { |
| "epoch": 0.2363566487317448, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.816177712561136e-05, |
| "loss": 0.28801445960998534, |
| "step": 1845, |
| "token_acc": 0.8874516544907607 |
| }, |
| { |
| "epoch": 0.23699718165513708, |
| "grad_norm": 3.625, |
| "learning_rate": 1.8149518664597604e-05, |
| "loss": 0.2893885850906372, |
| "step": 1850, |
| "token_acc": 0.887526974536038 |
| }, |
| { |
| "epoch": 0.23763771457852934, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.8137223631539335e-05, |
| "loss": 0.28786296844482423, |
| "step": 1855, |
| "token_acc": 0.8883566373209045 |
| }, |
| { |
| "epoch": 0.2382782475019216, |
| "grad_norm": 3.59375, |
| "learning_rate": 1.8124892081612148e-05, |
| "loss": 0.2903712511062622, |
| "step": 1860, |
| "token_acc": 0.8874086807047701 |
| }, |
| { |
| "epoch": 0.23891878042531386, |
| "grad_norm": 5.625, |
| "learning_rate": 1.8112524070155503e-05, |
| "loss": 0.2792266607284546, |
| "step": 1865, |
| "token_acc": 0.8924651889125059 |
| }, |
| { |
| "epoch": 0.23955931334870612, |
| "grad_norm": 6.5625, |
| "learning_rate": 1.8100119652672488e-05, |
| "loss": 0.28893446922302246, |
| "step": 1870, |
| "token_acc": 0.8882462122847153 |
| }, |
| { |
| "epoch": 0.2401998462720984, |
| "grad_norm": 4.46875, |
| "learning_rate": 1.8087678884829573e-05, |
| "loss": 0.28021440505981443, |
| "step": 1875, |
| "token_acc": 0.8914177335229967 |
| }, |
| { |
| "epoch": 0.24084037919549064, |
| "grad_norm": 7.53125, |
| "learning_rate": 1.8075201822456353e-05, |
| "loss": 0.287343430519104, |
| "step": 1880, |
| "token_acc": 0.8879555440682347 |
| }, |
| { |
| "epoch": 0.24148091211888292, |
| "grad_norm": 3.875, |
| "learning_rate": 1.8062688521545294e-05, |
| "loss": 0.2859031677246094, |
| "step": 1885, |
| "token_acc": 0.8882400927396849 |
| }, |
| { |
| "epoch": 0.24212144504227517, |
| "grad_norm": 3.765625, |
| "learning_rate": 1.805013903825149e-05, |
| "loss": 0.2897067070007324, |
| "step": 1890, |
| "token_acc": 0.8861764071598016 |
| }, |
| { |
| "epoch": 0.24276197796566742, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.803755342889242e-05, |
| "loss": 0.28570735454559326, |
| "step": 1895, |
| "token_acc": 0.8883235598482235 |
| }, |
| { |
| "epoch": 0.2434025108890597, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.802493174994766e-05, |
| "loss": 0.2846654176712036, |
| "step": 1900, |
| "token_acc": 0.8901407234740568 |
| }, |
| { |
| "epoch": 0.2434025108890597, |
| "eval_loss": 0.3387065827846527, |
| "eval_runtime": 103.0506, |
| "eval_samples_per_second": 97.04, |
| "eval_steps_per_second": 12.13, |
| "eval_token_acc": 0.8753702709137318, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.24404304381245195, |
| "grad_norm": 2.6875, |
| "learning_rate": 1.8012274058058673e-05, |
| "loss": 0.276248574256897, |
| "step": 1905, |
| "token_acc": 0.8937481149554052 |
| }, |
| { |
| "epoch": 0.24468357673584423, |
| "grad_norm": 4.96875, |
| "learning_rate": 1.799958041002853e-05, |
| "loss": 0.2819145679473877, |
| "step": 1910, |
| "token_acc": 0.8917169974115617 |
| }, |
| { |
| "epoch": 0.24532410965923648, |
| "grad_norm": 5.15625, |
| "learning_rate": 1.7986850862821654e-05, |
| "loss": 0.28824849128723146, |
| "step": 1915, |
| "token_acc": 0.8893915891072044 |
| }, |
| { |
| "epoch": 0.24596464258262873, |
| "grad_norm": 3.375, |
| "learning_rate": 1.797408547356357e-05, |
| "loss": 0.28600053787231444, |
| "step": 1920, |
| "token_acc": 0.8900150959672202 |
| }, |
| { |
| "epoch": 0.246605175506021, |
| "grad_norm": 16.875, |
| "learning_rate": 1.7961284299540666e-05, |
| "loss": 0.2812356948852539, |
| "step": 1925, |
| "token_acc": 0.8895660442600276 |
| }, |
| { |
| "epoch": 0.24724570842941326, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.7948447398199893e-05, |
| "loss": 0.2775670051574707, |
| "step": 1930, |
| "token_acc": 0.892983822129942 |
| }, |
| { |
| "epoch": 0.24788624135280554, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.7935574827148554e-05, |
| "loss": 0.28611729145050047, |
| "step": 1935, |
| "token_acc": 0.8871710951294087 |
| }, |
| { |
| "epoch": 0.2485267742761978, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.7922666644154015e-05, |
| "loss": 0.2785792827606201, |
| "step": 1940, |
| "token_acc": 0.8903730601305494 |
| }, |
| { |
| "epoch": 0.24916730719959007, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.7909722907143456e-05, |
| "loss": 0.28144145011901855, |
| "step": 1945, |
| "token_acc": 0.8897014540135937 |
| }, |
| { |
| "epoch": 0.24980784012298232, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.789674367420361e-05, |
| "loss": 0.2729172706604004, |
| "step": 1950, |
| "token_acc": 0.892122991881154 |
| }, |
| { |
| "epoch": 0.2504483730463746, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.788372900358051e-05, |
| "loss": 0.28403098583221437, |
| "step": 1955, |
| "token_acc": 0.8906849433165223 |
| }, |
| { |
| "epoch": 0.25108890596976685, |
| "grad_norm": 4.0, |
| "learning_rate": 1.7870678953679208e-05, |
| "loss": 0.28090338706970214, |
| "step": 1960, |
| "token_acc": 0.8915574335977924 |
| }, |
| { |
| "epoch": 0.25172943889315913, |
| "grad_norm": 3.015625, |
| "learning_rate": 1.7857593583063533e-05, |
| "loss": 0.2826396942138672, |
| "step": 1965, |
| "token_acc": 0.8903665961397297 |
| }, |
| { |
| "epoch": 0.25236997181655135, |
| "grad_norm": 6.8125, |
| "learning_rate": 1.784447295045582e-05, |
| "loss": 0.28316607475280764, |
| "step": 1970, |
| "token_acc": 0.8914384300194091 |
| }, |
| { |
| "epoch": 0.25301050473994363, |
| "grad_norm": 2.96875, |
| "learning_rate": 1.7831317114736647e-05, |
| "loss": 0.27657251358032225, |
| "step": 1975, |
| "token_acc": 0.8923183631003988 |
| }, |
| { |
| "epoch": 0.2536510376633359, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.7818126134944565e-05, |
| "loss": 0.2740725040435791, |
| "step": 1980, |
| "token_acc": 0.8953624065349872 |
| }, |
| { |
| "epoch": 0.25429157058672813, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.7804900070275853e-05, |
| "loss": 0.2742879867553711, |
| "step": 1985, |
| "token_acc": 0.8926875593542261 |
| }, |
| { |
| "epoch": 0.2549321035101204, |
| "grad_norm": 3.625, |
| "learning_rate": 1.7791638980084217e-05, |
| "loss": 0.2816567897796631, |
| "step": 1990, |
| "token_acc": 0.8898188093183779 |
| }, |
| { |
| "epoch": 0.2555726364335127, |
| "grad_norm": 4.71875, |
| "learning_rate": 1.777834292388056e-05, |
| "loss": 0.28623175621032715, |
| "step": 1995, |
| "token_acc": 0.8866232702504634 |
| }, |
| { |
| "epoch": 0.25621316935690497, |
| "grad_norm": 3.46875, |
| "learning_rate": 1.7765011961332695e-05, |
| "loss": 0.287227988243103, |
| "step": 2000, |
| "token_acc": 0.8884247696150203 |
| }, |
| { |
| "epoch": 0.25621316935690497, |
| "eval_loss": 0.3332171142101288, |
| "eval_runtime": 103.5637, |
| "eval_samples_per_second": 96.559, |
| "eval_steps_per_second": 12.07, |
| "eval_token_acc": 0.8768624280913123, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.2568537022802972, |
| "grad_norm": 7.625, |
| "learning_rate": 1.7751646152265086e-05, |
| "loss": 0.2728090763092041, |
| "step": 2005, |
| "token_acc": 0.8931116389548693 |
| }, |
| { |
| "epoch": 0.25749423520368947, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.7738245556658566e-05, |
| "loss": 0.28285210132598876, |
| "step": 2010, |
| "token_acc": 0.8916788698423637 |
| }, |
| { |
| "epoch": 0.25813476812708175, |
| "grad_norm": 5.4375, |
| "learning_rate": 1.7724810234650086e-05, |
| "loss": 0.2836940050125122, |
| "step": 2015, |
| "token_acc": 0.8885819123677963 |
| }, |
| { |
| "epoch": 0.25877530105047397, |
| "grad_norm": 4.5, |
| "learning_rate": 1.7711340246532433e-05, |
| "loss": 0.28231005668640136, |
| "step": 2020, |
| "token_acc": 0.8901254726710209 |
| }, |
| { |
| "epoch": 0.25941583397386625, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.769783565275396e-05, |
| "loss": 0.27600274085998533, |
| "step": 2025, |
| "token_acc": 0.8926168707952389 |
| }, |
| { |
| "epoch": 0.26005636689725853, |
| "grad_norm": 2.875, |
| "learning_rate": 1.768429651391833e-05, |
| "loss": 0.28248867988586424, |
| "step": 2030, |
| "token_acc": 0.8890473720608575 |
| }, |
| { |
| "epoch": 0.2606968998206508, |
| "grad_norm": 3.8125, |
| "learning_rate": 1.767072289078421e-05, |
| "loss": 0.2763254642486572, |
| "step": 2035, |
| "token_acc": 0.8923250173250173 |
| }, |
| { |
| "epoch": 0.26133743274404303, |
| "grad_norm": 27.5, |
| "learning_rate": 1.7657114844265036e-05, |
| "loss": 0.2861664056777954, |
| "step": 2040, |
| "token_acc": 0.8899861997584958 |
| }, |
| { |
| "epoch": 0.2619779656674353, |
| "grad_norm": 5.46875, |
| "learning_rate": 1.764347243542872e-05, |
| "loss": 0.278385591506958, |
| "step": 2045, |
| "token_acc": 0.8922844175491679 |
| }, |
| { |
| "epoch": 0.2626184985908276, |
| "grad_norm": 7.46875, |
| "learning_rate": 1.7629795725497382e-05, |
| "loss": 0.28106253147125243, |
| "step": 2050, |
| "token_acc": 0.890844918865407 |
| }, |
| { |
| "epoch": 0.2632590315142198, |
| "grad_norm": 3.34375, |
| "learning_rate": 1.7616084775847064e-05, |
| "loss": 0.2838444709777832, |
| "step": 2055, |
| "token_acc": 0.8908127665073007 |
| }, |
| { |
| "epoch": 0.2638995644376121, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.760233964800747e-05, |
| "loss": 0.27664880752563475, |
| "step": 2060, |
| "token_acc": 0.8921712169494445 |
| }, |
| { |
| "epoch": 0.26454009736100437, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.7588560403661686e-05, |
| "loss": 0.2756629228591919, |
| "step": 2065, |
| "token_acc": 0.8935702272629263 |
| }, |
| { |
| "epoch": 0.26518063028439665, |
| "grad_norm": 25.5, |
| "learning_rate": 1.7574747104645894e-05, |
| "loss": 0.28539879322052003, |
| "step": 2070, |
| "token_acc": 0.890754132231405 |
| }, |
| { |
| "epoch": 0.26582116320778887, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.7560899812949097e-05, |
| "loss": 0.28184425830841064, |
| "step": 2075, |
| "token_acc": 0.8913267940113577 |
| }, |
| { |
| "epoch": 0.26646169613118115, |
| "grad_norm": 3.375, |
| "learning_rate": 1.7547018590712862e-05, |
| "loss": 0.2689033508300781, |
| "step": 2080, |
| "token_acc": 0.895397489539749 |
| }, |
| { |
| "epoch": 0.2671022290545734, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.7533103500231002e-05, |
| "loss": 0.2777507543563843, |
| "step": 2085, |
| "token_acc": 0.892097198843282 |
| }, |
| { |
| "epoch": 0.26774276197796565, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.7519154603949332e-05, |
| "loss": 0.2816345691680908, |
| "step": 2090, |
| "token_acc": 0.889937781591933 |
| }, |
| { |
| "epoch": 0.2683832949013579, |
| "grad_norm": 4.9375, |
| "learning_rate": 1.750517196446538e-05, |
| "loss": 0.27451438903808595, |
| "step": 2095, |
| "token_acc": 0.8933247200689061 |
| }, |
| { |
| "epoch": 0.2690238278247502, |
| "grad_norm": 3.828125, |
| "learning_rate": 1.749115564452808e-05, |
| "loss": 0.28593323230743406, |
| "step": 2100, |
| "token_acc": 0.8909067435555365 |
| }, |
| { |
| "epoch": 0.2690238278247502, |
| "eval_loss": 0.3338375389575958, |
| "eval_runtime": 104.1317, |
| "eval_samples_per_second": 96.032, |
| "eval_steps_per_second": 12.004, |
| "eval_token_acc": 0.8768458178073317, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.26966436074814243, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.747710570703753e-05, |
| "loss": 0.28068857192993163, |
| "step": 2105, |
| "token_acc": 0.8920412834132228 |
| }, |
| { |
| "epoch": 0.2703048936715347, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.7463022215044686e-05, |
| "loss": 0.2719306945800781, |
| "step": 2110, |
| "token_acc": 0.8959989630141721 |
| }, |
| { |
| "epoch": 0.270945426594927, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.7448905231751086e-05, |
| "loss": 0.27764501571655276, |
| "step": 2115, |
| "token_acc": 0.8928232144399344 |
| }, |
| { |
| "epoch": 0.27158595951831926, |
| "grad_norm": 2.59375, |
| "learning_rate": 1.743475482050856e-05, |
| "loss": 0.27620573043823243, |
| "step": 2120, |
| "token_acc": 0.8925562707910313 |
| }, |
| { |
| "epoch": 0.2722264924417115, |
| "grad_norm": 8.0, |
| "learning_rate": 1.7420571044818954e-05, |
| "loss": 0.27559990882873536, |
| "step": 2125, |
| "token_acc": 0.8930252645217016 |
| }, |
| { |
| "epoch": 0.27286702536510377, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.7406353968333837e-05, |
| "loss": 0.2709467887878418, |
| "step": 2130, |
| "token_acc": 0.8944920546057448 |
| }, |
| { |
| "epoch": 0.27350755828849604, |
| "grad_norm": 3.796875, |
| "learning_rate": 1.7392103654854223e-05, |
| "loss": 0.27666122913360597, |
| "step": 2135, |
| "token_acc": 0.8931367037149159 |
| }, |
| { |
| "epoch": 0.27414809121188827, |
| "grad_norm": 12.5, |
| "learning_rate": 1.7377820168330285e-05, |
| "loss": 0.28263001441955565, |
| "step": 2140, |
| "token_acc": 0.8904274533413606 |
| }, |
| { |
| "epoch": 0.27478862413528055, |
| "grad_norm": 2.40625, |
| "learning_rate": 1.7363503572861066e-05, |
| "loss": 0.2721690654754639, |
| "step": 2145, |
| "token_acc": 0.8954508143603923 |
| }, |
| { |
| "epoch": 0.2754291570586728, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.734915393269417e-05, |
| "loss": 0.28317282199859617, |
| "step": 2150, |
| "token_acc": 0.8910895342842413 |
| }, |
| { |
| "epoch": 0.2760696899820651, |
| "grad_norm": 16.75, |
| "learning_rate": 1.733477131222552e-05, |
| "loss": 0.27765982151031493, |
| "step": 2155, |
| "token_acc": 0.8932398123843539 |
| }, |
| { |
| "epoch": 0.2767102229054573, |
| "grad_norm": 4.90625, |
| "learning_rate": 1.7320355775999024e-05, |
| "loss": 0.2786709785461426, |
| "step": 2160, |
| "token_acc": 0.8914231613375221 |
| }, |
| { |
| "epoch": 0.2773507558288496, |
| "grad_norm": 6.21875, |
| "learning_rate": 1.7305907388706312e-05, |
| "loss": 0.28313846588134767, |
| "step": 2165, |
| "token_acc": 0.8894102453723634 |
| }, |
| { |
| "epoch": 0.2779912887522419, |
| "grad_norm": 4.96875, |
| "learning_rate": 1.7291426215186436e-05, |
| "loss": 0.27286443710327146, |
| "step": 2170, |
| "token_acc": 0.892789455547898 |
| }, |
| { |
| "epoch": 0.2786318216756341, |
| "grad_norm": 3.828125, |
| "learning_rate": 1.7276912320425584e-05, |
| "loss": 0.270449161529541, |
| "step": 2175, |
| "token_acc": 0.8942574600971548 |
| }, |
| { |
| "epoch": 0.2792723545990264, |
| "grad_norm": 3.84375, |
| "learning_rate": 1.726236576955678e-05, |
| "loss": 0.26513283252716063, |
| "step": 2180, |
| "token_acc": 0.8964592970472526 |
| }, |
| { |
| "epoch": 0.27991288752241866, |
| "grad_norm": 4.4375, |
| "learning_rate": 1.7247786627859594e-05, |
| "loss": 0.2790388822555542, |
| "step": 2185, |
| "token_acc": 0.8934532002752925 |
| }, |
| { |
| "epoch": 0.28055342044581094, |
| "grad_norm": 4.5625, |
| "learning_rate": 1.7233174960759855e-05, |
| "loss": 0.2737919807434082, |
| "step": 2190, |
| "token_acc": 0.8916871152438762 |
| }, |
| { |
| "epoch": 0.28119395336920316, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.721853083382936e-05, |
| "loss": 0.2736166715621948, |
| "step": 2195, |
| "token_acc": 0.8931646005509641 |
| }, |
| { |
| "epoch": 0.28183448629259544, |
| "grad_norm": 2.703125, |
| "learning_rate": 1.7203854312785565e-05, |
| "loss": 0.26971442699432374, |
| "step": 2200, |
| "token_acc": 0.8943965517241379 |
| }, |
| { |
| "epoch": 0.28183448629259544, |
| "eval_loss": 0.33342963457107544, |
| "eval_runtime": 105.1061, |
| "eval_samples_per_second": 95.142, |
| "eval_steps_per_second": 11.893, |
| "eval_token_acc": 0.8773164425201123, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.2824750192159877, |
| "grad_norm": 4.46875, |
| "learning_rate": 1.7189145463491303e-05, |
| "loss": 0.271907377243042, |
| "step": 2205, |
| "token_acc": 0.8919140136992203 |
| }, |
| { |
| "epoch": 0.28311555213937994, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.7174404351954485e-05, |
| "loss": 0.2717395782470703, |
| "step": 2210, |
| "token_acc": 0.8932424268576853 |
| }, |
| { |
| "epoch": 0.2837560850627722, |
| "grad_norm": 4.375, |
| "learning_rate": 1.7159631044327798e-05, |
| "loss": 0.26909971237182617, |
| "step": 2215, |
| "token_acc": 0.8958585509251253 |
| }, |
| { |
| "epoch": 0.2843966179861645, |
| "grad_norm": 9.0, |
| "learning_rate": 1.714482560690842e-05, |
| "loss": 0.2807865858078003, |
| "step": 2220, |
| "token_acc": 0.8910201273008773 |
| }, |
| { |
| "epoch": 0.2850371509095567, |
| "grad_norm": 4.71875, |
| "learning_rate": 1.7129988106137715e-05, |
| "loss": 0.2830962657928467, |
| "step": 2225, |
| "token_acc": 0.8901977644024076 |
| }, |
| { |
| "epoch": 0.285677683832949, |
| "grad_norm": 5.28125, |
| "learning_rate": 1.7115118608600925e-05, |
| "loss": 0.2666552782058716, |
| "step": 2230, |
| "token_acc": 0.8958189058171745 |
| }, |
| { |
| "epoch": 0.2863182167563413, |
| "grad_norm": 6.09375, |
| "learning_rate": 1.7100217181026898e-05, |
| "loss": 0.2754360198974609, |
| "step": 2235, |
| "token_acc": 0.8915355351893481 |
| }, |
| { |
| "epoch": 0.28695874967973356, |
| "grad_norm": 3.921875, |
| "learning_rate": 1.708528389028776e-05, |
| "loss": 0.2791600227355957, |
| "step": 2240, |
| "token_acc": 0.8892330193143201 |
| }, |
| { |
| "epoch": 0.2875992826031258, |
| "grad_norm": 5.71875, |
| "learning_rate": 1.707031880339863e-05, |
| "loss": 0.27956390380859375, |
| "step": 2245, |
| "token_acc": 0.8920872595843552 |
| }, |
| { |
| "epoch": 0.28823981552651806, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.705532198751732e-05, |
| "loss": 0.27212765216827395, |
| "step": 2250, |
| "token_acc": 0.8932557638439992 |
| }, |
| { |
| "epoch": 0.28888034844991034, |
| "grad_norm": 8.1875, |
| "learning_rate": 1.7040293509944027e-05, |
| "loss": 0.27141647338867186, |
| "step": 2255, |
| "token_acc": 0.8947846000950611 |
| }, |
| { |
| "epoch": 0.28952088137330256, |
| "grad_norm": 3.5625, |
| "learning_rate": 1.7025233438121037e-05, |
| "loss": 0.27087936401367185, |
| "step": 2260, |
| "token_acc": 0.8955649693092418 |
| }, |
| { |
| "epoch": 0.29016141429669484, |
| "grad_norm": 3.59375, |
| "learning_rate": 1.7010141839632417e-05, |
| "loss": 0.27354631423950193, |
| "step": 2265, |
| "token_acc": 0.8951717573764818 |
| }, |
| { |
| "epoch": 0.2908019472200871, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.699501878220371e-05, |
| "loss": 0.27661924362182616, |
| "step": 2270, |
| "token_acc": 0.8932264736297828 |
| }, |
| { |
| "epoch": 0.2914424801434794, |
| "grad_norm": 2.46875, |
| "learning_rate": 1.6979864333701645e-05, |
| "loss": 0.271943473815918, |
| "step": 2275, |
| "token_acc": 0.8923408845738943 |
| }, |
| { |
| "epoch": 0.2920830130668716, |
| "grad_norm": 9.75, |
| "learning_rate": 1.6964678562133815e-05, |
| "loss": 0.27072222232818605, |
| "step": 2280, |
| "token_acc": 0.8939870012482245 |
| }, |
| { |
| "epoch": 0.2927235459902639, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.6949461535648377e-05, |
| "loss": 0.26898555755615233, |
| "step": 2285, |
| "token_acc": 0.8956255128039038 |
| }, |
| { |
| "epoch": 0.2933640789136562, |
| "grad_norm": 3.390625, |
| "learning_rate": 1.6934213322533758e-05, |
| "loss": 0.27256574630737307, |
| "step": 2290, |
| "token_acc": 0.8912772451743262 |
| }, |
| { |
| "epoch": 0.2940046118370484, |
| "grad_norm": 28.0, |
| "learning_rate": 1.6918933991218333e-05, |
| "loss": 0.28531837463378906, |
| "step": 2295, |
| "token_acc": 0.8895293813989503 |
| }, |
| { |
| "epoch": 0.2946451447604407, |
| "grad_norm": 8.8125, |
| "learning_rate": 1.6903623610270127e-05, |
| "loss": 0.28380842208862306, |
| "step": 2300, |
| "token_acc": 0.8899965475573969 |
| }, |
| { |
| "epoch": 0.2946451447604407, |
| "eval_loss": 0.32995760440826416, |
| "eval_runtime": 105.2266, |
| "eval_samples_per_second": 95.033, |
| "eval_steps_per_second": 11.879, |
| "eval_token_acc": 0.8770423728344342, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.29528567768383296, |
| "grad_norm": 33.5, |
| "learning_rate": 1.6888282248396498e-05, |
| "loss": 0.2725163459777832, |
| "step": 2305, |
| "token_acc": 0.8936618507051943 |
| }, |
| { |
| "epoch": 0.29592621060722524, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.6872909974443847e-05, |
| "loss": 0.2721263885498047, |
| "step": 2310, |
| "token_acc": 0.8947889750215332 |
| }, |
| { |
| "epoch": 0.29656674353061746, |
| "grad_norm": 4.4375, |
| "learning_rate": 1.685750685739728e-05, |
| "loss": 0.27622146606445314, |
| "step": 2315, |
| "token_acc": 0.8931007685001295 |
| }, |
| { |
| "epoch": 0.29720727645400974, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.6842072966380333e-05, |
| "loss": 0.274534273147583, |
| "step": 2320, |
| "token_acc": 0.8931854473263994 |
| }, |
| { |
| "epoch": 0.297847809377402, |
| "grad_norm": 3.375, |
| "learning_rate": 1.682660837065463e-05, |
| "loss": 0.2757120132446289, |
| "step": 2325, |
| "token_acc": 0.8937096079276174 |
| }, |
| { |
| "epoch": 0.29848834230079424, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.6811113139619596e-05, |
| "loss": 0.276756739616394, |
| "step": 2330, |
| "token_acc": 0.8923500559332244 |
| }, |
| { |
| "epoch": 0.2991288752241865, |
| "grad_norm": 3.125, |
| "learning_rate": 1.6795587342812137e-05, |
| "loss": 0.2754298448562622, |
| "step": 2335, |
| "token_acc": 0.8925148925148925 |
| }, |
| { |
| "epoch": 0.2997694081475788, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.6780031049906317e-05, |
| "loss": 0.2664804935455322, |
| "step": 2340, |
| "token_acc": 0.8968724315438854 |
| }, |
| { |
| "epoch": 0.300409941070971, |
| "grad_norm": 2.640625, |
| "learning_rate": 1.6764444330713062e-05, |
| "loss": 0.2691181182861328, |
| "step": 2345, |
| "token_acc": 0.8944920546057448 |
| }, |
| { |
| "epoch": 0.3010504739943633, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.674882725517984e-05, |
| "loss": 0.27036800384521487, |
| "step": 2350, |
| "token_acc": 0.8949255020513928 |
| }, |
| { |
| "epoch": 0.3016910069177556, |
| "grad_norm": 2.578125, |
| "learning_rate": 1.6733179893390342e-05, |
| "loss": 0.2797673463821411, |
| "step": 2355, |
| "token_acc": 0.8923209292320929 |
| }, |
| { |
| "epoch": 0.30233153984114786, |
| "grad_norm": 13.1875, |
| "learning_rate": 1.671750231556419e-05, |
| "loss": 0.2723313093185425, |
| "step": 2360, |
| "token_acc": 0.894600767009954 |
| }, |
| { |
| "epoch": 0.3029720727645401, |
| "grad_norm": 5.21875, |
| "learning_rate": 1.6701794592056572e-05, |
| "loss": 0.26928038597106935, |
| "step": 2365, |
| "token_acc": 0.8950982509177283 |
| }, |
| { |
| "epoch": 0.30361260568793236, |
| "grad_norm": 2.703125, |
| "learning_rate": 1.6686056793357993e-05, |
| "loss": 0.27132067680358884, |
| "step": 2370, |
| "token_acc": 0.8939844120053395 |
| }, |
| { |
| "epoch": 0.30425313861132464, |
| "grad_norm": 3.28125, |
| "learning_rate": 1.6670288990093904e-05, |
| "loss": 0.2636139392852783, |
| "step": 2375, |
| "token_acc": 0.8964771990490599 |
| }, |
| { |
| "epoch": 0.30489367153471686, |
| "grad_norm": 2.375, |
| "learning_rate": 1.665449125302441e-05, |
| "loss": 0.2698176860809326, |
| "step": 2380, |
| "token_acc": 0.8951257453979777 |
| }, |
| { |
| "epoch": 0.30553420445810914, |
| "grad_norm": 4.96875, |
| "learning_rate": 1.663866365304395e-05, |
| "loss": 0.27487883567810056, |
| "step": 2385, |
| "token_acc": 0.8932954398656504 |
| }, |
| { |
| "epoch": 0.3061747373815014, |
| "grad_norm": 2.6875, |
| "learning_rate": 1.6622806261180975e-05, |
| "loss": 0.27799344062805176, |
| "step": 2390, |
| "token_acc": 0.8914015477214101 |
| }, |
| { |
| "epoch": 0.3068152703048937, |
| "grad_norm": 6.71875, |
| "learning_rate": 1.660691914859763e-05, |
| "loss": 0.2766709566116333, |
| "step": 2395, |
| "token_acc": 0.8933596431022649 |
| }, |
| { |
| "epoch": 0.3074558032282859, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.659100238658944e-05, |
| "loss": 0.2766282081604004, |
| "step": 2400, |
| "token_acc": 0.8921505237294711 |
| }, |
| { |
| "epoch": 0.3074558032282859, |
| "eval_loss": 0.33286821842193604, |
| "eval_runtime": 102.9833, |
| "eval_samples_per_second": 97.103, |
| "eval_steps_per_second": 12.138, |
| "eval_token_acc": 0.8770562147377513, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3080963361516782, |
| "grad_norm": 2.765625, |
| "learning_rate": 1.6575056046584982e-05, |
| "loss": 0.2664001703262329, |
| "step": 2405, |
| "token_acc": 0.8966157299490632 |
| }, |
| { |
| "epoch": 0.3087368690750705, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.6559080200145565e-05, |
| "loss": 0.2731971740722656, |
| "step": 2410, |
| "token_acc": 0.8921074184232078 |
| }, |
| { |
| "epoch": 0.3093774019984627, |
| "grad_norm": 3.796875, |
| "learning_rate": 1.6543074918964923e-05, |
| "loss": 0.27004868984222413, |
| "step": 2415, |
| "token_acc": 0.8929048954065129 |
| }, |
| { |
| "epoch": 0.310017934921855, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.652704027486887e-05, |
| "loss": 0.27138233184814453, |
| "step": 2420, |
| "token_acc": 0.8945260347129506 |
| }, |
| { |
| "epoch": 0.31065846784524725, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.6510976339814998e-05, |
| "loss": 0.27827138900756837, |
| "step": 2425, |
| "token_acc": 0.8920770324415148 |
| }, |
| { |
| "epoch": 0.31129900076863953, |
| "grad_norm": 3.578125, |
| "learning_rate": 1.6494883185892345e-05, |
| "loss": 0.268726110458374, |
| "step": 2430, |
| "token_acc": 0.8919771764502464 |
| }, |
| { |
| "epoch": 0.31193953369203176, |
| "grad_norm": 6.4375, |
| "learning_rate": 1.647876088532107e-05, |
| "loss": 0.27257063388824465, |
| "step": 2435, |
| "token_acc": 0.8935447825339522 |
| }, |
| { |
| "epoch": 0.31258006661542403, |
| "grad_norm": 3.578125, |
| "learning_rate": 1.6462609510452126e-05, |
| "loss": 0.27083382606506345, |
| "step": 2440, |
| "token_acc": 0.8954088212535466 |
| }, |
| { |
| "epoch": 0.3132205995388163, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.6446429133766955e-05, |
| "loss": 0.2705575942993164, |
| "step": 2445, |
| "token_acc": 0.8939740326963723 |
| }, |
| { |
| "epoch": 0.31386113246220854, |
| "grad_norm": 2.6875, |
| "learning_rate": 1.6430219827877137e-05, |
| "loss": 0.27445831298828127, |
| "step": 2450, |
| "token_acc": 0.892545649838883 |
| }, |
| { |
| "epoch": 0.3145016653856008, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.641398166552408e-05, |
| "loss": 0.26441996097564696, |
| "step": 2455, |
| "token_acc": 0.8965159505489755 |
| }, |
| { |
| "epoch": 0.3151421983089931, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.6397714719578692e-05, |
| "loss": 0.2621718406677246, |
| "step": 2460, |
| "token_acc": 0.8974180950314602 |
| }, |
| { |
| "epoch": 0.31578273123238537, |
| "grad_norm": 4.96875, |
| "learning_rate": 1.6381419063041044e-05, |
| "loss": 0.2664108991622925, |
| "step": 2465, |
| "token_acc": 0.8976900534390623 |
| }, |
| { |
| "epoch": 0.3164232641557776, |
| "grad_norm": 3.171875, |
| "learning_rate": 1.636509476904005e-05, |
| "loss": 0.26438174247741697, |
| "step": 2470, |
| "token_acc": 0.8964934333145508 |
| }, |
| { |
| "epoch": 0.3170637970791699, |
| "grad_norm": 3.390625, |
| "learning_rate": 1.634874191083315e-05, |
| "loss": 0.2664673328399658, |
| "step": 2475, |
| "token_acc": 0.8969490355154706 |
| }, |
| { |
| "epoch": 0.31770433000256215, |
| "grad_norm": 7.3125, |
| "learning_rate": 1.6332360561805953e-05, |
| "loss": 0.2602536678314209, |
| "step": 2480, |
| "token_acc": 0.8977645434144658 |
| }, |
| { |
| "epoch": 0.3183448629259544, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.631595079547194e-05, |
| "loss": 0.26571226119995117, |
| "step": 2485, |
| "token_acc": 0.8940942154485625 |
| }, |
| { |
| "epoch": 0.31898539584934665, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.6299512685472104e-05, |
| "loss": 0.2715281009674072, |
| "step": 2490, |
| "token_acc": 0.8944671689989235 |
| }, |
| { |
| "epoch": 0.31962592877273893, |
| "grad_norm": 2.375, |
| "learning_rate": 1.6283046305574646e-05, |
| "loss": 0.26947875022888185, |
| "step": 2495, |
| "token_acc": 0.8938190607734806 |
| }, |
| { |
| "epoch": 0.32026646169613116, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.6266551729674625e-05, |
| "loss": 0.26917757987976076, |
| "step": 2500, |
| "token_acc": 0.8948387096774194 |
| }, |
| { |
| "epoch": 0.32026646169613116, |
| "eval_loss": 0.33583664894104004, |
| "eval_runtime": 101.9417, |
| "eval_samples_per_second": 98.095, |
| "eval_steps_per_second": 12.262, |
| "eval_token_acc": 0.8780445266345903, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.32090699461952343, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.6250029031793637e-05, |
| "loss": 0.26485161781311034, |
| "step": 2505, |
| "token_acc": 0.8968752690023242 |
| }, |
| { |
| "epoch": 0.3215475275429157, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.623347828607948e-05, |
| "loss": 0.27197585105895994, |
| "step": 2510, |
| "token_acc": 0.8942249763318703 |
| }, |
| { |
| "epoch": 0.322188060466308, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.621689956680581e-05, |
| "loss": 0.26804704666137696, |
| "step": 2515, |
| "token_acc": 0.8950142573230796 |
| }, |
| { |
| "epoch": 0.3228285933897002, |
| "grad_norm": 3.390625, |
| "learning_rate": 1.6200292948371826e-05, |
| "loss": 0.27621660232543943, |
| "step": 2520, |
| "token_acc": 0.8921281543364051 |
| }, |
| { |
| "epoch": 0.3234691263130925, |
| "grad_norm": 7.625, |
| "learning_rate": 1.6183658505301937e-05, |
| "loss": 0.270648455619812, |
| "step": 2525, |
| "token_acc": 0.8940274727640701 |
| }, |
| { |
| "epoch": 0.32410965923648477, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.6166996312245403e-05, |
| "loss": 0.2624387502670288, |
| "step": 2530, |
| "token_acc": 0.8973961027763407 |
| }, |
| { |
| "epoch": 0.324750192159877, |
| "grad_norm": 18.0, |
| "learning_rate": 1.6150306443976026e-05, |
| "loss": 0.270206356048584, |
| "step": 2535, |
| "token_acc": 0.8947050707140394 |
| }, |
| { |
| "epoch": 0.32539072508326927, |
| "grad_norm": 7.5625, |
| "learning_rate": 1.6133588975391793e-05, |
| "loss": 0.26768012046813966, |
| "step": 2540, |
| "token_acc": 0.8949362728212195 |
| }, |
| { |
| "epoch": 0.32603125800666155, |
| "grad_norm": 2.53125, |
| "learning_rate": 1.6116843981514568e-05, |
| "loss": 0.265167760848999, |
| "step": 2545, |
| "token_acc": 0.895794614686433 |
| }, |
| { |
| "epoch": 0.32667179093005383, |
| "grad_norm": 2.34375, |
| "learning_rate": 1.6100071537489726e-05, |
| "loss": 0.2654293060302734, |
| "step": 2550, |
| "token_acc": 0.8955745341614907 |
| }, |
| { |
| "epoch": 0.32731232385344605, |
| "grad_norm": 4.71875, |
| "learning_rate": 1.6083271718585828e-05, |
| "loss": 0.2678376197814941, |
| "step": 2555, |
| "token_acc": 0.8955519229114687 |
| }, |
| { |
| "epoch": 0.32795285677683833, |
| "grad_norm": 4.8125, |
| "learning_rate": 1.606644460019429e-05, |
| "loss": 0.2697244644165039, |
| "step": 2560, |
| "token_acc": 0.8963868911760906 |
| }, |
| { |
| "epoch": 0.3285933897002306, |
| "grad_norm": 3.390625, |
| "learning_rate": 1.604959025782904e-05, |
| "loss": 0.2643167972564697, |
| "step": 2565, |
| "token_acc": 0.8960286936606024 |
| }, |
| { |
| "epoch": 0.32923392262362283, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.6032708767126158e-05, |
| "loss": 0.2669541835784912, |
| "step": 2570, |
| "token_acc": 0.8956862407439298 |
| }, |
| { |
| "epoch": 0.3298744555470151, |
| "grad_norm": 7.125, |
| "learning_rate": 1.601580020384358e-05, |
| "loss": 0.27081780433654784, |
| "step": 2575, |
| "token_acc": 0.8950130095403296 |
| }, |
| { |
| "epoch": 0.3305149884704074, |
| "grad_norm": 6.9375, |
| "learning_rate": 1.5998864643860723e-05, |
| "loss": 0.25800356864929197, |
| "step": 2580, |
| "token_acc": 0.8975890576981345 |
| }, |
| { |
| "epoch": 0.33115552139379967, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.5981902163178152e-05, |
| "loss": 0.26956448554992674, |
| "step": 2585, |
| "token_acc": 0.8931310867878997 |
| }, |
| { |
| "epoch": 0.3317960543171919, |
| "grad_norm": 2.34375, |
| "learning_rate": 1.596491283791725e-05, |
| "loss": 0.26112003326416017, |
| "step": 2590, |
| "token_acc": 0.8994970516822755 |
| }, |
| { |
| "epoch": 0.33243658724058417, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.594789674431986e-05, |
| "loss": 0.27035064697265626, |
| "step": 2595, |
| "token_acc": 0.8960434445306439 |
| }, |
| { |
| "epoch": 0.33307712016397645, |
| "grad_norm": 3.140625, |
| "learning_rate": 1.593085395874796e-05, |
| "loss": 0.27758283615112306, |
| "step": 2600, |
| "token_acc": 0.8915802607236587 |
| }, |
| { |
| "epoch": 0.33307712016397645, |
| "eval_loss": 0.3308471143245697, |
| "eval_runtime": 103.8949, |
| "eval_samples_per_second": 96.251, |
| "eval_steps_per_second": 12.031, |
| "eval_token_acc": 0.8782355449003659, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.33371765308736867, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.5913784557683304e-05, |
| "loss": 0.2707799196243286, |
| "step": 2605, |
| "token_acc": 0.8923612603705455 |
| }, |
| { |
| "epoch": 0.33435818601076095, |
| "grad_norm": 2.609375, |
| "learning_rate": 1.5896688617727095e-05, |
| "loss": 0.2663607120513916, |
| "step": 2610, |
| "token_acc": 0.8958297432362571 |
| }, |
| { |
| "epoch": 0.33499871893415323, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.5879566215599623e-05, |
| "loss": 0.2679924488067627, |
| "step": 2615, |
| "token_acc": 0.8955616958811847 |
| }, |
| { |
| "epoch": 0.33563925185754545, |
| "grad_norm": 3.046875, |
| "learning_rate": 1.5862417428139938e-05, |
| "loss": 0.268009090423584, |
| "step": 2620, |
| "token_acc": 0.8962800309997416 |
| }, |
| { |
| "epoch": 0.33627978478093773, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.5845242332305496e-05, |
| "loss": 0.257326078414917, |
| "step": 2625, |
| "token_acc": 0.8986603284356093 |
| }, |
| { |
| "epoch": 0.33692031770433, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.5828041005171818e-05, |
| "loss": 0.2634852647781372, |
| "step": 2630, |
| "token_acc": 0.8965665605369589 |
| }, |
| { |
| "epoch": 0.3375608506277223, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.581081352393213e-05, |
| "loss": 0.2582373857498169, |
| "step": 2635, |
| "token_acc": 0.8991817398794143 |
| }, |
| { |
| "epoch": 0.3382013835511145, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.5793559965897042e-05, |
| "loss": 0.27222495079040526, |
| "step": 2640, |
| "token_acc": 0.891765924391507 |
| }, |
| { |
| "epoch": 0.3388419164745068, |
| "grad_norm": 2.703125, |
| "learning_rate": 1.577628040849418e-05, |
| "loss": 0.2661598205566406, |
| "step": 2645, |
| "token_acc": 0.8968578940562907 |
| }, |
| { |
| "epoch": 0.33948244939789907, |
| "grad_norm": 5.53125, |
| "learning_rate": 1.5758974929267844e-05, |
| "loss": 0.2645248889923096, |
| "step": 2650, |
| "token_acc": 0.899624563069089 |
| }, |
| { |
| "epoch": 0.3401229823212913, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.574164360587867e-05, |
| "loss": 0.2611443281173706, |
| "step": 2655, |
| "token_acc": 0.8988632925616977 |
| }, |
| { |
| "epoch": 0.34076351524468357, |
| "grad_norm": 2.328125, |
| "learning_rate": 1.572428651610326e-05, |
| "loss": 0.27028732299804686, |
| "step": 2660, |
| "token_acc": 0.8950970685721665 |
| }, |
| { |
| "epoch": 0.34140404816807585, |
| "grad_norm": 2.609375, |
| "learning_rate": 1.570690373783386e-05, |
| "loss": 0.2680711269378662, |
| "step": 2665, |
| "token_acc": 0.8941867495897037 |
| }, |
| { |
| "epoch": 0.3420445810914681, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.5689495349077984e-05, |
| "loss": 0.2609850406646729, |
| "step": 2670, |
| "token_acc": 0.8973326405126872 |
| }, |
| { |
| "epoch": 0.34268511401486035, |
| "grad_norm": 2.984375, |
| "learning_rate": 1.5672061427958086e-05, |
| "loss": 0.26308517456054686, |
| "step": 2675, |
| "token_acc": 0.896771416272062 |
| }, |
| { |
| "epoch": 0.3433256469382526, |
| "grad_norm": 2.875, |
| "learning_rate": 1.5654602052711202e-05, |
| "loss": 0.27320644855499265, |
| "step": 2680, |
| "token_acc": 0.8943637769567833 |
| }, |
| { |
| "epoch": 0.3439661798616449, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.563711730168858e-05, |
| "loss": 0.26294333934783937, |
| "step": 2685, |
| "token_acc": 0.8980288752485519 |
| }, |
| { |
| "epoch": 0.34460671278503713, |
| "grad_norm": 2.5, |
| "learning_rate": 1.5619607253355365e-05, |
| "loss": 0.2679460048675537, |
| "step": 2690, |
| "token_acc": 0.8959211771792445 |
| }, |
| { |
| "epoch": 0.3452472457084294, |
| "grad_norm": 2.625, |
| "learning_rate": 1.5602071986290214e-05, |
| "loss": 0.2540433883666992, |
| "step": 2695, |
| "token_acc": 0.8991190188288133 |
| }, |
| { |
| "epoch": 0.3458877786318217, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.558451157918496e-05, |
| "loss": 0.26918482780456543, |
| "step": 2700, |
| "token_acc": 0.8953965852604279 |
| }, |
| { |
| "epoch": 0.3458877786318217, |
| "eval_loss": 0.33141687512397766, |
| "eval_runtime": 102.8827, |
| "eval_samples_per_second": 97.198, |
| "eval_steps_per_second": 12.15, |
| "eval_token_acc": 0.8783047544169513, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.34652831155521396, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.556692611084426e-05, |
| "loss": 0.2630035400390625, |
| "step": 2705, |
| "token_acc": 0.8983968711049986 |
| }, |
| { |
| "epoch": 0.3471688444786062, |
| "grad_norm": 5.09375, |
| "learning_rate": 1.554931566018523e-05, |
| "loss": 0.26360278129577636, |
| "step": 2710, |
| "token_acc": 0.8968615649183147 |
| }, |
| { |
| "epoch": 0.34780937740199847, |
| "grad_norm": 2.671875, |
| "learning_rate": 1.55316803062371e-05, |
| "loss": 0.25818705558776855, |
| "step": 2715, |
| "token_acc": 0.8981868297514967 |
| }, |
| { |
| "epoch": 0.34844991032539074, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.5514020128140854e-05, |
| "loss": 0.26247010231018064, |
| "step": 2720, |
| "token_acc": 0.8978712401965009 |
| }, |
| { |
| "epoch": 0.34909044324878297, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.5496335205148888e-05, |
| "loss": 0.26362130641937254, |
| "step": 2725, |
| "token_acc": 0.8962732651034244 |
| }, |
| { |
| "epoch": 0.34973097617217525, |
| "grad_norm": 2.328125, |
| "learning_rate": 1.547862561662463e-05, |
| "loss": 0.26531424522399905, |
| "step": 2730, |
| "token_acc": 0.8975365632684758 |
| }, |
| { |
| "epoch": 0.3503715090955675, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.546089144204221e-05, |
| "loss": 0.2550010919570923, |
| "step": 2735, |
| "token_acc": 0.9006299620296859 |
| }, |
| { |
| "epoch": 0.3510120420189598, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.5443132760986077e-05, |
| "loss": 0.25297343730926514, |
| "step": 2740, |
| "token_acc": 0.9003972023141352 |
| }, |
| { |
| "epoch": 0.351652574942352, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.5425349653150674e-05, |
| "loss": 0.2688558578491211, |
| "step": 2745, |
| "token_acc": 0.8946644309729567 |
| }, |
| { |
| "epoch": 0.3522931078657443, |
| "grad_norm": 3.765625, |
| "learning_rate": 1.5407542198340045e-05, |
| "loss": 0.25696539878845215, |
| "step": 2750, |
| "token_acc": 0.8994601597927013 |
| }, |
| { |
| "epoch": 0.3529336407891366, |
| "grad_norm": 2.671875, |
| "learning_rate": 1.538971047646751e-05, |
| "loss": 0.2645355224609375, |
| "step": 2755, |
| "token_acc": 0.8972590932597828 |
| }, |
| { |
| "epoch": 0.3535741737125288, |
| "grad_norm": 3.09375, |
| "learning_rate": 1.537185456755528e-05, |
| "loss": 0.2609572410583496, |
| "step": 2760, |
| "token_acc": 0.8978304088512404 |
| }, |
| { |
| "epoch": 0.3542147066359211, |
| "grad_norm": 3.28125, |
| "learning_rate": 1.5353974551734102e-05, |
| "loss": 0.25736873149871825, |
| "step": 2765, |
| "token_acc": 0.9001164445594514 |
| }, |
| { |
| "epoch": 0.35485523955931336, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.533607050924293e-05, |
| "loss": 0.2622791290283203, |
| "step": 2770, |
| "token_acc": 0.8979319258940112 |
| }, |
| { |
| "epoch": 0.3554957724827056, |
| "grad_norm": 3.375, |
| "learning_rate": 1.531814252042852e-05, |
| "loss": 0.2560434818267822, |
| "step": 2775, |
| "token_acc": 0.9002417335750669 |
| }, |
| { |
| "epoch": 0.35613630540609786, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.5300190665745097e-05, |
| "loss": 0.26474769115448, |
| "step": 2780, |
| "token_acc": 0.8953059298034841 |
| }, |
| { |
| "epoch": 0.35677683832949014, |
| "grad_norm": 7.5625, |
| "learning_rate": 1.5282215025753984e-05, |
| "loss": 0.2650959014892578, |
| "step": 2785, |
| "token_acc": 0.8951942520328701 |
| }, |
| { |
| "epoch": 0.3574173712528824, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.526421568112325e-05, |
| "loss": 0.26280429363250735, |
| "step": 2790, |
| "token_acc": 0.8963790945578525 |
| }, |
| { |
| "epoch": 0.35805790417627464, |
| "grad_norm": 8.0625, |
| "learning_rate": 1.5246192712627341e-05, |
| "loss": 0.2684659957885742, |
| "step": 2795, |
| "token_acc": 0.8949045957703406 |
| }, |
| { |
| "epoch": 0.3586984370996669, |
| "grad_norm": 3.390625, |
| "learning_rate": 1.522814620114671e-05, |
| "loss": 0.2673259019851685, |
| "step": 2800, |
| "token_acc": 0.8951397849462366 |
| }, |
| { |
| "epoch": 0.3586984370996669, |
| "eval_loss": 0.3319157361984253, |
| "eval_runtime": 102.1149, |
| "eval_samples_per_second": 97.929, |
| "eval_steps_per_second": 12.241, |
| "eval_token_acc": 0.8786978644711563, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3593389700230592, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.5210076227667467e-05, |
| "loss": 0.26007418632507323, |
| "step": 2805, |
| "token_acc": 0.898749460974558 |
| }, |
| { |
| "epoch": 0.3599795029464514, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.5191982873281016e-05, |
| "loss": 0.2620399951934814, |
| "step": 2810, |
| "token_acc": 0.8979195441988951 |
| }, |
| { |
| "epoch": 0.3606200358698437, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.5173866219183681e-05, |
| "loss": 0.2614466667175293, |
| "step": 2815, |
| "token_acc": 0.8992734192543897 |
| }, |
| { |
| "epoch": 0.361260568793236, |
| "grad_norm": 2.5, |
| "learning_rate": 1.5155726346676342e-05, |
| "loss": 0.2509075880050659, |
| "step": 2820, |
| "token_acc": 0.9027616216449097 |
| }, |
| { |
| "epoch": 0.36190110171662826, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.5137563337164088e-05, |
| "loss": 0.26183514595031737, |
| "step": 2825, |
| "token_acc": 0.8969877438287589 |
| }, |
| { |
| "epoch": 0.3625416346400205, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.5119377272155821e-05, |
| "loss": 0.2658205032348633, |
| "step": 2830, |
| "token_acc": 0.8951929295106704 |
| }, |
| { |
| "epoch": 0.36318216756341276, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.5101168233263925e-05, |
| "loss": 0.25493884086608887, |
| "step": 2835, |
| "token_acc": 0.9017814778070138 |
| }, |
| { |
| "epoch": 0.36382270048680504, |
| "grad_norm": 4.125, |
| "learning_rate": 1.508293630220387e-05, |
| "loss": 0.2533620119094849, |
| "step": 2840, |
| "token_acc": 0.8996077417130048 |
| }, |
| { |
| "epoch": 0.36446323341019726, |
| "grad_norm": 3.5, |
| "learning_rate": 1.506468156079386e-05, |
| "loss": 0.2602185010910034, |
| "step": 2845, |
| "token_acc": 0.8991640093079376 |
| }, |
| { |
| "epoch": 0.36510376633358954, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.5046404090954467e-05, |
| "loss": 0.26317653656005857, |
| "step": 2850, |
| "token_acc": 0.8983934186156696 |
| }, |
| { |
| "epoch": 0.3657442992569818, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.5028103974708259e-05, |
| "loss": 0.2617523670196533, |
| "step": 2855, |
| "token_acc": 0.8986605796976614 |
| }, |
| { |
| "epoch": 0.3663848321803741, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.5009781294179431e-05, |
| "loss": 0.2595290899276733, |
| "step": 2860, |
| "token_acc": 0.8996727523251808 |
| }, |
| { |
| "epoch": 0.3670253651037663, |
| "grad_norm": 7.65625, |
| "learning_rate": 1.4991436131593438e-05, |
| "loss": 0.2566396236419678, |
| "step": 2865, |
| "token_acc": 0.8992037873897138 |
| }, |
| { |
| "epoch": 0.3676658980271586, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.4973068569276627e-05, |
| "loss": 0.2593822479248047, |
| "step": 2870, |
| "token_acc": 0.8983920334526017 |
| }, |
| { |
| "epoch": 0.3683064309505509, |
| "grad_norm": 3.71875, |
| "learning_rate": 1.495467868965587e-05, |
| "loss": 0.25176091194152833, |
| "step": 2875, |
| "token_acc": 0.8993063035891249 |
| }, |
| { |
| "epoch": 0.3689469638739431, |
| "grad_norm": 5.6875, |
| "learning_rate": 1.4936266575258184e-05, |
| "loss": 0.26164243221282957, |
| "step": 2880, |
| "token_acc": 0.8975323047668439 |
| }, |
| { |
| "epoch": 0.3695874967973354, |
| "grad_norm": 8.1875, |
| "learning_rate": 1.4917832308710374e-05, |
| "loss": 0.2630914211273193, |
| "step": 2885, |
| "token_acc": 0.897822806639362 |
| }, |
| { |
| "epoch": 0.37022802972072766, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.489937597273865e-05, |
| "loss": 0.26312851905822754, |
| "step": 2890, |
| "token_acc": 0.8980260322386001 |
| }, |
| { |
| "epoch": 0.3708685626441199, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.4880897650168269e-05, |
| "loss": 0.26306843757629395, |
| "step": 2895, |
| "token_acc": 0.8972188633615478 |
| }, |
| { |
| "epoch": 0.37150909556751216, |
| "grad_norm": 6.25, |
| "learning_rate": 1.4862397423923148e-05, |
| "loss": 0.2542487382888794, |
| "step": 2900, |
| "token_acc": 0.9007903943333477 |
| }, |
| { |
| "epoch": 0.37150909556751216, |
| "eval_loss": 0.3293861448764801, |
| "eval_runtime": 103.439, |
| "eval_samples_per_second": 96.675, |
| "eval_steps_per_second": 12.084, |
| "eval_token_acc": 0.8789276400662197, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.37214962849090444, |
| "grad_norm": 5.3125, |
| "learning_rate": 1.48438753770255e-05, |
| "loss": 0.2586365222930908, |
| "step": 2905, |
| "token_acc": 0.8990952175786299 |
| }, |
| { |
| "epoch": 0.3727901614142967, |
| "grad_norm": 3.40625, |
| "learning_rate": 1.4825331592595471e-05, |
| "loss": 0.25507054328918455, |
| "step": 2910, |
| "token_acc": 0.8996947418203706 |
| }, |
| { |
| "epoch": 0.37343069433768894, |
| "grad_norm": 3.796875, |
| "learning_rate": 1.480676615385074e-05, |
| "loss": 0.25874695777893064, |
| "step": 2915, |
| "token_acc": 0.8979389016117184 |
| }, |
| { |
| "epoch": 0.3740712272610812, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.4788179144106187e-05, |
| "loss": 0.2610326766967773, |
| "step": 2920, |
| "token_acc": 0.8965872504829363 |
| }, |
| { |
| "epoch": 0.3747117601844735, |
| "grad_norm": 2.96875, |
| "learning_rate": 1.4769570646773469e-05, |
| "loss": 0.25159320831298826, |
| "step": 2925, |
| "token_acc": 0.9015691868758916 |
| }, |
| { |
| "epoch": 0.3753522931078657, |
| "grad_norm": 3.0, |
| "learning_rate": 1.4750940745360683e-05, |
| "loss": 0.2555972099304199, |
| "step": 2930, |
| "token_acc": 0.9030378872874774 |
| }, |
| { |
| "epoch": 0.375992826031258, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.4732289523471983e-05, |
| "loss": 0.25429458618164064, |
| "step": 2935, |
| "token_acc": 0.901710690739863 |
| }, |
| { |
| "epoch": 0.3766333589546503, |
| "grad_norm": 3.015625, |
| "learning_rate": 1.47136170648072e-05, |
| "loss": 0.260566258430481, |
| "step": 2940, |
| "token_acc": 0.8977762454749181 |
| }, |
| { |
| "epoch": 0.37727389187804256, |
| "grad_norm": 4.4375, |
| "learning_rate": 1.469492345316146e-05, |
| "loss": 0.2575147390365601, |
| "step": 2945, |
| "token_acc": 0.9003219575016098 |
| }, |
| { |
| "epoch": 0.3779144248014348, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.4676208772424825e-05, |
| "loss": 0.26031718254089353, |
| "step": 2950, |
| "token_acc": 0.8967789165446559 |
| }, |
| { |
| "epoch": 0.37855495772482706, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.4657473106581903e-05, |
| "loss": 0.2566239356994629, |
| "step": 2955, |
| "token_acc": 0.900069096562446 |
| }, |
| { |
| "epoch": 0.37919549064821934, |
| "grad_norm": 24.5, |
| "learning_rate": 1.4638716539711477e-05, |
| "loss": 0.26539459228515627, |
| "step": 2960, |
| "token_acc": 0.8971825516676015 |
| }, |
| { |
| "epoch": 0.37983602357161156, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.4619939155986122e-05, |
| "loss": 0.2547321081161499, |
| "step": 2965, |
| "token_acc": 0.9001380977041257 |
| }, |
| { |
| "epoch": 0.38047655649500384, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.4601141039671837e-05, |
| "loss": 0.26095755100250245, |
| "step": 2970, |
| "token_acc": 0.8989492800622649 |
| }, |
| { |
| "epoch": 0.3811170894183961, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.4582322275127663e-05, |
| "loss": 0.2595865726470947, |
| "step": 2975, |
| "token_acc": 0.8997066689673022 |
| }, |
| { |
| "epoch": 0.3817576223417884, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.4563482946805291e-05, |
| "loss": 0.2566410541534424, |
| "step": 2980, |
| "token_acc": 0.8992691315563198 |
| }, |
| { |
| "epoch": 0.3823981552651806, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.4544623139248707e-05, |
| "loss": 0.26386113166809083, |
| "step": 2985, |
| "token_acc": 0.8982700748773561 |
| }, |
| { |
| "epoch": 0.3830386881885729, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.4525742937093797e-05, |
| "loss": 0.2548778533935547, |
| "step": 2990, |
| "token_acc": 0.8996550237171195 |
| }, |
| { |
| "epoch": 0.3836792211119652, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.4506842425067963e-05, |
| "loss": 0.2560065746307373, |
| "step": 2995, |
| "token_acc": 0.8996068944662836 |
| }, |
| { |
| "epoch": 0.3843197540353574, |
| "grad_norm": 3.84375, |
| "learning_rate": 1.4487921687989763e-05, |
| "loss": 0.2564894676208496, |
| "step": 3000, |
| "token_acc": 0.8991150442477877 |
| }, |
| { |
| "epoch": 0.3843197540353574, |
| "eval_loss": 0.33271661400794983, |
| "eval_runtime": 102.5035, |
| "eval_samples_per_second": 97.558, |
| "eval_steps_per_second": 12.195, |
| "eval_token_acc": 0.879677871226005, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.3849602869587497, |
| "grad_norm": 3.09375, |
| "learning_rate": 1.4468980810768507e-05, |
| "loss": 0.2549588203430176, |
| "step": 3005, |
| "token_acc": 0.9006631071305546 |
| }, |
| { |
| "epoch": 0.38560081988214195, |
| "grad_norm": 2.296875, |
| "learning_rate": 1.4450019878403894e-05, |
| "loss": 0.256690239906311, |
| "step": 3010, |
| "token_acc": 0.9002636014001123 |
| }, |
| { |
| "epoch": 0.3862413528055342, |
| "grad_norm": 6.46875, |
| "learning_rate": 1.4431038975985616e-05, |
| "loss": 0.2593832969665527, |
| "step": 3015, |
| "token_acc": 0.8996518973741888 |
| }, |
| { |
| "epoch": 0.38688188572892646, |
| "grad_norm": 5.46875, |
| "learning_rate": 1.441203818869299e-05, |
| "loss": 0.26778130531311034, |
| "step": 3020, |
| "token_acc": 0.8993683667769519 |
| }, |
| { |
| "epoch": 0.38752241865231873, |
| "grad_norm": 3.15625, |
| "learning_rate": 1.4393017601794558e-05, |
| "loss": 0.25722360610961914, |
| "step": 3025, |
| "token_acc": 0.8998792687133494 |
| }, |
| { |
| "epoch": 0.388162951575711, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.4373977300647735e-05, |
| "loss": 0.25923748016357423, |
| "step": 3030, |
| "token_acc": 0.8972777082704172 |
| }, |
| { |
| "epoch": 0.38880348449910324, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.4354917370698388e-05, |
| "loss": 0.24125347137451172, |
| "step": 3035, |
| "token_acc": 0.9047619047619048 |
| }, |
| { |
| "epoch": 0.3894440174224955, |
| "grad_norm": 2.59375, |
| "learning_rate": 1.4335837897480475e-05, |
| "loss": 0.26301088333129885, |
| "step": 3040, |
| "token_acc": 0.89577136945558 |
| }, |
| { |
| "epoch": 0.3900845503458878, |
| "grad_norm": 5.1875, |
| "learning_rate": 1.4316738966615665e-05, |
| "loss": 0.25510516166687014, |
| "step": 3045, |
| "token_acc": 0.9006117525417887 |
| }, |
| { |
| "epoch": 0.39072508326928, |
| "grad_norm": 4.0, |
| "learning_rate": 1.4297620663812934e-05, |
| "loss": 0.26404881477355957, |
| "step": 3050, |
| "token_acc": 0.8973620897061351 |
| }, |
| { |
| "epoch": 0.3913656161926723, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.4278483074868206e-05, |
| "loss": 0.2587254524230957, |
| "step": 3055, |
| "token_acc": 0.8988434317279476 |
| }, |
| { |
| "epoch": 0.3920061491160646, |
| "grad_norm": 2.625, |
| "learning_rate": 1.4259326285663942e-05, |
| "loss": 0.2552812576293945, |
| "step": 3060, |
| "token_acc": 0.9003674086881348 |
| }, |
| { |
| "epoch": 0.39264668203945685, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.4240150382168766e-05, |
| "loss": 0.2574739933013916, |
| "step": 3065, |
| "token_acc": 0.9006264852019875 |
| }, |
| { |
| "epoch": 0.3932872149628491, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.4220955450437097e-05, |
| "loss": 0.2653143644332886, |
| "step": 3070, |
| "token_acc": 0.8960445153776474 |
| }, |
| { |
| "epoch": 0.39392774788624135, |
| "grad_norm": 3.125, |
| "learning_rate": 1.4201741576608724e-05, |
| "loss": 0.2522631883621216, |
| "step": 3075, |
| "token_acc": 0.9006473888649115 |
| }, |
| { |
| "epoch": 0.39456828080963363, |
| "grad_norm": 2.75, |
| "learning_rate": 1.4182508846908456e-05, |
| "loss": 0.25041637420654295, |
| "step": 3080, |
| "token_acc": 0.9022227988237329 |
| }, |
| { |
| "epoch": 0.39520881373302585, |
| "grad_norm": 2.359375, |
| "learning_rate": 1.4163257347645711e-05, |
| "loss": 0.25646038055419923, |
| "step": 3085, |
| "token_acc": 0.8983489244298831 |
| }, |
| { |
| "epoch": 0.39584934665641813, |
| "grad_norm": 3.46875, |
| "learning_rate": 1.4143987165214146e-05, |
| "loss": 0.2523691654205322, |
| "step": 3090, |
| "token_acc": 0.9009485036164407 |
| }, |
| { |
| "epoch": 0.3964898795798104, |
| "grad_norm": 3.40625, |
| "learning_rate": 1.4124698386091256e-05, |
| "loss": 0.2536661148071289, |
| "step": 3095, |
| "token_acc": 0.8998623537508603 |
| }, |
| { |
| "epoch": 0.3971304125032027, |
| "grad_norm": 3.046875, |
| "learning_rate": 1.4105391096837988e-05, |
| "loss": 0.25694756507873534, |
| "step": 3100, |
| "token_acc": 0.8986861942709455 |
| }, |
| { |
| "epoch": 0.3971304125032027, |
| "eval_loss": 0.3274412453174591, |
| "eval_runtime": 105.1272, |
| "eval_samples_per_second": 95.123, |
| "eval_steps_per_second": 11.89, |
| "eval_token_acc": 0.880165106222766, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.3977709454265949, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.4086065384098367e-05, |
| "loss": 0.2536616802215576, |
| "step": 3105, |
| "token_acc": 0.9024840983324738 |
| }, |
| { |
| "epoch": 0.3984114783499872, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.4066721334599084e-05, |
| "loss": 0.2547293663024902, |
| "step": 3110, |
| "token_acc": 0.8989598169968492 |
| }, |
| { |
| "epoch": 0.39905201127337947, |
| "grad_norm": 2.4375, |
| "learning_rate": 1.4047359035149126e-05, |
| "loss": 0.24942498207092284, |
| "step": 3115, |
| "token_acc": 0.9021344624956792 |
| }, |
| { |
| "epoch": 0.3996925441967717, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.4027978572639375e-05, |
| "loss": 0.25708999633789065, |
| "step": 3120, |
| "token_acc": 0.900335801618736 |
| }, |
| { |
| "epoch": 0.40033307712016397, |
| "grad_norm": 3.25, |
| "learning_rate": 1.4008580034042226e-05, |
| "loss": 0.254312539100647, |
| "step": 3125, |
| "token_acc": 0.9001466528640442 |
| }, |
| { |
| "epoch": 0.40097361004355625, |
| "grad_norm": 4.21875, |
| "learning_rate": 1.3989163506411187e-05, |
| "loss": 0.25107884407043457, |
| "step": 3130, |
| "token_acc": 0.902516670251667 |
| }, |
| { |
| "epoch": 0.40161414296694853, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.39697290768805e-05, |
| "loss": 0.24998018741607667, |
| "step": 3135, |
| "token_acc": 0.9032967981358418 |
| }, |
| { |
| "epoch": 0.40225467589034075, |
| "grad_norm": 2.75, |
| "learning_rate": 1.3950276832664745e-05, |
| "loss": 0.2500455856323242, |
| "step": 3140, |
| "token_acc": 0.9015105740181268 |
| }, |
| { |
| "epoch": 0.40289520881373303, |
| "grad_norm": 4.25, |
| "learning_rate": 1.3930806861058438e-05, |
| "loss": 0.25563080310821534, |
| "step": 3145, |
| "token_acc": 0.8991560454702032 |
| }, |
| { |
| "epoch": 0.4035357417371253, |
| "grad_norm": 3.0, |
| "learning_rate": 1.3911319249435657e-05, |
| "loss": 0.25497581958770754, |
| "step": 3150, |
| "token_acc": 0.8996980155306299 |
| }, |
| { |
| "epoch": 0.40417627466051753, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.3891814085249644e-05, |
| "loss": 0.25732955932617185, |
| "step": 3155, |
| "token_acc": 0.8988880268942333 |
| }, |
| { |
| "epoch": 0.4048168075839098, |
| "grad_norm": 8.375, |
| "learning_rate": 1.3872291456032405e-05, |
| "loss": 0.2536874294281006, |
| "step": 3160, |
| "token_acc": 0.9003486720330592 |
| }, |
| { |
| "epoch": 0.4054573405073021, |
| "grad_norm": 2.6875, |
| "learning_rate": 1.3852751449394324e-05, |
| "loss": 0.2530160427093506, |
| "step": 3165, |
| "token_acc": 0.9021668892430965 |
| }, |
| { |
| "epoch": 0.4060978734306943, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.383319415302377e-05, |
| "loss": 0.2553149938583374, |
| "step": 3170, |
| "token_acc": 0.8996287342427906 |
| }, |
| { |
| "epoch": 0.4067384063540866, |
| "grad_norm": 4.40625, |
| "learning_rate": 1.3813619654686703e-05, |
| "loss": 0.25406613349914553, |
| "step": 3175, |
| "token_acc": 0.9016216216216216 |
| }, |
| { |
| "epoch": 0.40737893927747887, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.3794028042226273e-05, |
| "loss": 0.2548455476760864, |
| "step": 3180, |
| "token_acc": 0.9005142832447384 |
| }, |
| { |
| "epoch": 0.40801947220087115, |
| "grad_norm": 3.15625, |
| "learning_rate": 1.3774419403562437e-05, |
| "loss": 0.2509315013885498, |
| "step": 3185, |
| "token_acc": 0.9003025064822817 |
| }, |
| { |
| "epoch": 0.40866000512426337, |
| "grad_norm": 2.609375, |
| "learning_rate": 1.3754793826691565e-05, |
| "loss": 0.2544880390167236, |
| "step": 3190, |
| "token_acc": 0.8993057052913019 |
| }, |
| { |
| "epoch": 0.40930053804765565, |
| "grad_norm": 2.546875, |
| "learning_rate": 1.3735151399686024e-05, |
| "loss": 0.25415782928466796, |
| "step": 3195, |
| "token_acc": 0.9004563850856798 |
| }, |
| { |
| "epoch": 0.4099410709710479, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.371549221069381e-05, |
| "loss": 0.252706241607666, |
| "step": 3200, |
| "token_acc": 0.9001977984176126 |
| }, |
| { |
| "epoch": 0.4099410709710479, |
| "eval_loss": 0.3304235339164734, |
| "eval_runtime": 103.4877, |
| "eval_samples_per_second": 96.63, |
| "eval_steps_per_second": 12.079, |
| "eval_token_acc": 0.8798495108271368, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.41058160389444015, |
| "grad_norm": 3.171875, |
| "learning_rate": 1.369581634793814e-05, |
| "loss": 0.24554102420806884, |
| "step": 3205, |
| "token_acc": 0.9046261500583128 |
| }, |
| { |
| "epoch": 0.41122213681783243, |
| "grad_norm": 3.15625, |
| "learning_rate": 1.367612389971705e-05, |
| "loss": 0.25843195915222167, |
| "step": 3210, |
| "token_acc": 0.8992231333621061 |
| }, |
| { |
| "epoch": 0.4118626697412247, |
| "grad_norm": 2.703125, |
| "learning_rate": 1.3656414954403015e-05, |
| "loss": 0.2526721477508545, |
| "step": 3215, |
| "token_acc": 0.9009551673694174 |
| }, |
| { |
| "epoch": 0.412503202664617, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.3636689600442535e-05, |
| "loss": 0.2488550662994385, |
| "step": 3220, |
| "token_acc": 0.9032662773091067 |
| }, |
| { |
| "epoch": 0.4131437355880092, |
| "grad_norm": 8.6875, |
| "learning_rate": 1.3616947926355748e-05, |
| "loss": 0.2410456657409668, |
| "step": 3225, |
| "token_acc": 0.9047186932849365 |
| }, |
| { |
| "epoch": 0.4137842685114015, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.3597190020736032e-05, |
| "loss": 0.25398988723754884, |
| "step": 3230, |
| "token_acc": 0.9014248202832422 |
| }, |
| { |
| "epoch": 0.41442480143479377, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.3577415972249608e-05, |
| "loss": 0.24551260471343994, |
| "step": 3235, |
| "token_acc": 0.9037839237174785 |
| }, |
| { |
| "epoch": 0.415065334358186, |
| "grad_norm": 4.125, |
| "learning_rate": 1.3557625869635136e-05, |
| "loss": 0.2562254905700684, |
| "step": 3240, |
| "token_acc": 0.8990086206896551 |
| }, |
| { |
| "epoch": 0.41570586728157827, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.3537819801703323e-05, |
| "loss": 0.2528964996337891, |
| "step": 3245, |
| "token_acc": 0.9020725388601036 |
| }, |
| { |
| "epoch": 0.41634640020497055, |
| "grad_norm": 70.0, |
| "learning_rate": 1.3517997857336522e-05, |
| "loss": 0.2532426595687866, |
| "step": 3250, |
| "token_acc": 0.8992752998015702 |
| }, |
| { |
| "epoch": 0.4169869331283628, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.3498160125488336e-05, |
| "loss": 0.248179292678833, |
| "step": 3255, |
| "token_acc": 0.9034928848641656 |
| }, |
| { |
| "epoch": 0.41762746605175505, |
| "grad_norm": 2.703125, |
| "learning_rate": 1.3478306695183212e-05, |
| "loss": 0.25196003913879395, |
| "step": 3260, |
| "token_acc": 0.9024979507312654 |
| }, |
| { |
| "epoch": 0.4182679989751473, |
| "grad_norm": 3.75, |
| "learning_rate": 1.3458437655516048e-05, |
| "loss": 0.2540182590484619, |
| "step": 3265, |
| "token_acc": 0.9011801322785631 |
| }, |
| { |
| "epoch": 0.4189085318985396, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.3438553095651794e-05, |
| "loss": 0.24988923072814942, |
| "step": 3270, |
| "token_acc": 0.9028460543337645 |
| }, |
| { |
| "epoch": 0.4195490648219318, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.3418653104825044e-05, |
| "loss": 0.25744991302490233, |
| "step": 3275, |
| "token_acc": 0.8989907702924178 |
| }, |
| { |
| "epoch": 0.4201895977453241, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.3398737772339643e-05, |
| "loss": 0.25907082557678224, |
| "step": 3280, |
| "token_acc": 0.8988532405617833 |
| }, |
| { |
| "epoch": 0.4208301306687164, |
| "grad_norm": 2.59375, |
| "learning_rate": 1.3378807187568288e-05, |
| "loss": 0.2617329597473145, |
| "step": 3285, |
| "token_acc": 0.8974062165058949 |
| }, |
| { |
| "epoch": 0.4214706635921086, |
| "grad_norm": 2.34375, |
| "learning_rate": 1.335886143995211e-05, |
| "loss": 0.25168476104736326, |
| "step": 3290, |
| "token_acc": 0.9027303015879884 |
| }, |
| { |
| "epoch": 0.4221111965155009, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.3338900619000299e-05, |
| "loss": 0.25457475185394285, |
| "step": 3295, |
| "token_acc": 0.9008958566629339 |
| }, |
| { |
| "epoch": 0.42275172943889316, |
| "grad_norm": 3.25, |
| "learning_rate": 1.3318924814289682e-05, |
| "loss": 0.25605058670043945, |
| "step": 3300, |
| "token_acc": 0.9003745640849012 |
| }, |
| { |
| "epoch": 0.42275172943889316, |
| "eval_loss": 0.3262101709842682, |
| "eval_runtime": 103.038, |
| "eval_samples_per_second": 97.052, |
| "eval_steps_per_second": 12.131, |
| "eval_token_acc": 0.8801623378421026, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.42339226236228544, |
| "grad_norm": 2.9375, |
| "learning_rate": 1.329893411546433e-05, |
| "loss": 0.2533790826797485, |
| "step": 3305, |
| "token_acc": 0.9021087584630644 |
| }, |
| { |
| "epoch": 0.42403279528567767, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.327892861223515e-05, |
| "loss": 0.26516075134277345, |
| "step": 3310, |
| "token_acc": 0.8975274134594711 |
| }, |
| { |
| "epoch": 0.42467332820906994, |
| "grad_norm": 2.828125, |
| "learning_rate": 1.3258908394379492e-05, |
| "loss": 0.24489293098449708, |
| "step": 3315, |
| "token_acc": 0.9024179620034543 |
| }, |
| { |
| "epoch": 0.4253138611324622, |
| "grad_norm": 2.671875, |
| "learning_rate": 1.323887355174073e-05, |
| "loss": 0.2507158279418945, |
| "step": 3320, |
| "token_acc": 0.9005565382458259 |
| }, |
| { |
| "epoch": 0.42595439405585445, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.3218824174227876e-05, |
| "loss": 0.2552894353866577, |
| "step": 3325, |
| "token_acc": 0.899343072002766 |
| }, |
| { |
| "epoch": 0.4265949269792467, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.3198760351815165e-05, |
| "loss": 0.25093369483947753, |
| "step": 3330, |
| "token_acc": 0.901463793773479 |
| }, |
| { |
| "epoch": 0.427235459902639, |
| "grad_norm": 6.78125, |
| "learning_rate": 1.3178682174541664e-05, |
| "loss": 0.25160994529724123, |
| "step": 3335, |
| "token_acc": 0.9026560138199093 |
| }, |
| { |
| "epoch": 0.4278759928260313, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.3158589732510847e-05, |
| "loss": 0.25160062313079834, |
| "step": 3340, |
| "token_acc": 0.9015646492434664 |
| }, |
| { |
| "epoch": 0.4285165257494235, |
| "grad_norm": 3.015625, |
| "learning_rate": 1.3138483115890214e-05, |
| "loss": 0.24968068599700927, |
| "step": 3345, |
| "token_acc": 0.9020596346087556 |
| }, |
| { |
| "epoch": 0.4291570586728158, |
| "grad_norm": 2.40625, |
| "learning_rate": 1.3118362414910869e-05, |
| "loss": 0.25055222511291503, |
| "step": 3350, |
| "token_acc": 0.902113891285591 |
| }, |
| { |
| "epoch": 0.42979759159620806, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.3098227719867117e-05, |
| "loss": 0.23631854057312013, |
| "step": 3355, |
| "token_acc": 0.9082366187424216 |
| }, |
| { |
| "epoch": 0.4304381245196003, |
| "grad_norm": 3.125, |
| "learning_rate": 1.3078079121116074e-05, |
| "loss": 0.2557328939437866, |
| "step": 3360, |
| "token_acc": 0.9006379860332787 |
| }, |
| { |
| "epoch": 0.43107865744299256, |
| "grad_norm": 2.828125, |
| "learning_rate": 1.305791670907725e-05, |
| "loss": 0.2555293083190918, |
| "step": 3365, |
| "token_acc": 0.9005658502872446 |
| }, |
| { |
| "epoch": 0.43171919036638484, |
| "grad_norm": 2.5625, |
| "learning_rate": 1.3037740574232134e-05, |
| "loss": 0.25120766162872316, |
| "step": 3370, |
| "token_acc": 0.9023821853961678 |
| }, |
| { |
| "epoch": 0.4323597232897771, |
| "grad_norm": 3.09375, |
| "learning_rate": 1.3017550807123806e-05, |
| "loss": 0.2534923553466797, |
| "step": 3375, |
| "token_acc": 0.9000387780602352 |
| }, |
| { |
| "epoch": 0.43300025621316934, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.2997347498356519e-05, |
| "loss": 0.24217534065246582, |
| "step": 3380, |
| "token_acc": 0.9059788473990935 |
| }, |
| { |
| "epoch": 0.4336407891365616, |
| "grad_norm": 2.296875, |
| "learning_rate": 1.2977130738595298e-05, |
| "loss": 0.2505367279052734, |
| "step": 3385, |
| "token_acc": 0.9020945800043187 |
| }, |
| { |
| "epoch": 0.4342813220599539, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.2956900618565532e-05, |
| "loss": 0.24520423412322997, |
| "step": 3390, |
| "token_acc": 0.9031252705393472 |
| }, |
| { |
| "epoch": 0.4349218549833461, |
| "grad_norm": 3.640625, |
| "learning_rate": 1.293665722905256e-05, |
| "loss": 0.2532040596008301, |
| "step": 3395, |
| "token_acc": 0.9023980712102295 |
| }, |
| { |
| "epoch": 0.4355623879067384, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.2916400660901276e-05, |
| "loss": 0.24737958908081054, |
| "step": 3400, |
| "token_acc": 0.9031688624817016 |
| }, |
| { |
| "epoch": 0.4355623879067384, |
| "eval_loss": 0.3326387107372284, |
| "eval_runtime": 103.1507, |
| "eval_samples_per_second": 96.945, |
| "eval_steps_per_second": 12.118, |
| "eval_token_acc": 0.8803395142045611, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.4362029208301307, |
| "grad_norm": 2.265625, |
| "learning_rate": 1.2896131005015717e-05, |
| "loss": 0.25047874450683594, |
| "step": 3405, |
| "token_acc": 0.9031589849818746 |
| }, |
| { |
| "epoch": 0.4368434537535229, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.2875848352358644e-05, |
| "loss": 0.25389971733093264, |
| "step": 3410, |
| "token_acc": 0.9008509541000516 |
| }, |
| { |
| "epoch": 0.4374839866769152, |
| "grad_norm": 3.875, |
| "learning_rate": 1.2855552793951146e-05, |
| "loss": 0.2464221954345703, |
| "step": 3415, |
| "token_acc": 0.9023832138848114 |
| }, |
| { |
| "epoch": 0.43812451960030746, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.2835244420872232e-05, |
| "loss": 0.25347232818603516, |
| "step": 3420, |
| "token_acc": 0.900116044182748 |
| }, |
| { |
| "epoch": 0.43876505252369974, |
| "grad_norm": 2.546875, |
| "learning_rate": 1.2814923324258416e-05, |
| "loss": 0.2549131393432617, |
| "step": 3425, |
| "token_acc": 0.9009849886016603 |
| }, |
| { |
| "epoch": 0.43940558544709196, |
| "grad_norm": 2.9375, |
| "learning_rate": 1.2794589595303316e-05, |
| "loss": 0.24712648391723632, |
| "step": 3430, |
| "token_acc": 0.9032174364296834 |
| }, |
| { |
| "epoch": 0.44004611837048424, |
| "grad_norm": 3.140625, |
| "learning_rate": 1.277424332525723e-05, |
| "loss": 0.24843959808349608, |
| "step": 3435, |
| "token_acc": 0.9036415534988322 |
| }, |
| { |
| "epoch": 0.4406866512938765, |
| "grad_norm": 9.1875, |
| "learning_rate": 1.2753884605426736e-05, |
| "loss": 0.24894342422485352, |
| "step": 3440, |
| "token_acc": 0.9017941861468127 |
| }, |
| { |
| "epoch": 0.44132718421726874, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.273351352717429e-05, |
| "loss": 0.24595353603363038, |
| "step": 3445, |
| "token_acc": 0.9049160516207001 |
| }, |
| { |
| "epoch": 0.441967717140661, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.2713130181917806e-05, |
| "loss": 0.25805752277374266, |
| "step": 3450, |
| "token_acc": 0.8997292302402544 |
| }, |
| { |
| "epoch": 0.4426082500640533, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.269273466113024e-05, |
| "loss": 0.2535008430480957, |
| "step": 3455, |
| "token_acc": 0.9011762685670129 |
| }, |
| { |
| "epoch": 0.4432487829874456, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.2672327056339198e-05, |
| "loss": 0.24500885009765624, |
| "step": 3460, |
| "token_acc": 0.905705264068515 |
| }, |
| { |
| "epoch": 0.4438893159108378, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.2651907459126512e-05, |
| "loss": 0.25068912506103513, |
| "step": 3465, |
| "token_acc": 0.9028803385585352 |
| }, |
| { |
| "epoch": 0.4445298488342301, |
| "grad_norm": 2.828125, |
| "learning_rate": 1.2631475961127822e-05, |
| "loss": 0.2502088785171509, |
| "step": 3470, |
| "token_acc": 0.9028352292312996 |
| }, |
| { |
| "epoch": 0.44517038175762236, |
| "grad_norm": 4.375, |
| "learning_rate": 1.2611032654032185e-05, |
| "loss": 0.2501903295516968, |
| "step": 3475, |
| "token_acc": 0.901529554096094 |
| }, |
| { |
| "epoch": 0.4458109146810146, |
| "grad_norm": 4.125, |
| "learning_rate": 1.2590577629581648e-05, |
| "loss": 0.25160870552062986, |
| "step": 3480, |
| "token_acc": 0.9006165653429914 |
| }, |
| { |
| "epoch": 0.44645144760440686, |
| "grad_norm": 10.75, |
| "learning_rate": 1.2570110979570846e-05, |
| "loss": 0.2540600299835205, |
| "step": 3485, |
| "token_acc": 0.9013576215844646 |
| }, |
| { |
| "epoch": 0.44709198052779914, |
| "grad_norm": 2.625, |
| "learning_rate": 1.2549632795846582e-05, |
| "loss": 0.25437102317810056, |
| "step": 3490, |
| "token_acc": 0.9012979172955026 |
| }, |
| { |
| "epoch": 0.4477325134511914, |
| "grad_norm": 2.609375, |
| "learning_rate": 1.2529143170307418e-05, |
| "loss": 0.25037708282470705, |
| "step": 3495, |
| "token_acc": 0.90243692783771 |
| }, |
| { |
| "epoch": 0.44837304637458364, |
| "grad_norm": 2.984375, |
| "learning_rate": 1.250864219490326e-05, |
| "loss": 0.2490053653717041, |
| "step": 3500, |
| "token_acc": 0.9017818527809013 |
| }, |
| { |
| "epoch": 0.44837304637458364, |
| "eval_loss": 0.3326828181743622, |
| "eval_runtime": 103.6038, |
| "eval_samples_per_second": 96.522, |
| "eval_steps_per_second": 12.065, |
| "eval_token_acc": 0.8804225656244636, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.4490135792979759, |
| "grad_norm": 3.390625, |
| "learning_rate": 1.248812996163495e-05, |
| "loss": 0.24862072467803956, |
| "step": 3505, |
| "token_acc": 0.9011164274322169 |
| }, |
| { |
| "epoch": 0.4496541122213682, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.2467606562553858e-05, |
| "loss": 0.25421929359436035, |
| "step": 3510, |
| "token_acc": 0.9018134715025907 |
| }, |
| { |
| "epoch": 0.4502946451447604, |
| "grad_norm": 5.625, |
| "learning_rate": 1.244707208976145e-05, |
| "loss": 0.24651005268096923, |
| "step": 3515, |
| "token_acc": 0.903206343733839 |
| }, |
| { |
| "epoch": 0.4509351780681527, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.2426526635408896e-05, |
| "loss": 0.24950928688049318, |
| "step": 3520, |
| "token_acc": 0.9035152037955575 |
| }, |
| { |
| "epoch": 0.451575710991545, |
| "grad_norm": 4.875, |
| "learning_rate": 1.240597029169664e-05, |
| "loss": 0.25514960289001465, |
| "step": 3525, |
| "token_acc": 0.8999569336778639 |
| }, |
| { |
| "epoch": 0.45221624391493725, |
| "grad_norm": 2.484375, |
| "learning_rate": 1.2385403150874003e-05, |
| "loss": 0.24595193862915038, |
| "step": 3530, |
| "token_acc": 0.904110774556469 |
| }, |
| { |
| "epoch": 0.4528567768383295, |
| "grad_norm": 2.265625, |
| "learning_rate": 1.2364825305238748e-05, |
| "loss": 0.24859437942504883, |
| "step": 3535, |
| "token_acc": 0.9019379844961241 |
| }, |
| { |
| "epoch": 0.45349730976172176, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.2344236847136683e-05, |
| "loss": 0.25172064304351804, |
| "step": 3540, |
| "token_acc": 0.9005772378736969 |
| }, |
| { |
| "epoch": 0.45413784268511403, |
| "grad_norm": 3.140625, |
| "learning_rate": 1.2323637868961247e-05, |
| "loss": 0.2530811309814453, |
| "step": 3545, |
| "token_acc": 0.900356943190126 |
| }, |
| { |
| "epoch": 0.45477837560850626, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.2303028463153081e-05, |
| "loss": 0.25023765563964845, |
| "step": 3550, |
| "token_acc": 0.9036663650855198 |
| }, |
| { |
| "epoch": 0.45541890853189854, |
| "grad_norm": 3.375, |
| "learning_rate": 1.2282408722199623e-05, |
| "loss": 0.2615813732147217, |
| "step": 3555, |
| "token_acc": 0.8973312132021144 |
| }, |
| { |
| "epoch": 0.4560594414552908, |
| "grad_norm": 4.625, |
| "learning_rate": 1.2261778738634688e-05, |
| "loss": 0.24770092964172363, |
| "step": 3560, |
| "token_acc": 0.9046594673665189 |
| }, |
| { |
| "epoch": 0.45669997437868304, |
| "grad_norm": 3.09375, |
| "learning_rate": 1.2241138605038065e-05, |
| "loss": 0.2506240367889404, |
| "step": 3565, |
| "token_acc": 0.9026666666666666 |
| }, |
| { |
| "epoch": 0.4573405073020753, |
| "grad_norm": 5.75, |
| "learning_rate": 1.2220488414035088e-05, |
| "loss": 0.24530596733093263, |
| "step": 3570, |
| "token_acc": 0.9045893719806763 |
| }, |
| { |
| "epoch": 0.4579810402254676, |
| "grad_norm": 6.125, |
| "learning_rate": 1.2199828258296219e-05, |
| "loss": 0.24243788719177245, |
| "step": 3575, |
| "token_acc": 0.9063712388999051 |
| }, |
| { |
| "epoch": 0.4586215731488599, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.2179158230536648e-05, |
| "loss": 0.25044434070587157, |
| "step": 3580, |
| "token_acc": 0.9025926405245676 |
| }, |
| { |
| "epoch": 0.4592621060722521, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.215847842351586e-05, |
| "loss": 0.23762269020080568, |
| "step": 3585, |
| "token_acc": 0.907480400225235 |
| }, |
| { |
| "epoch": 0.4599026389956444, |
| "grad_norm": 4.25, |
| "learning_rate": 1.213778893003723e-05, |
| "loss": 0.24655213356018066, |
| "step": 3590, |
| "token_acc": 0.9028251024369204 |
| }, |
| { |
| "epoch": 0.46054317191903665, |
| "grad_norm": 2.984375, |
| "learning_rate": 1.2117089842947602e-05, |
| "loss": 0.2541653633117676, |
| "step": 3595, |
| "token_acc": 0.9008563191187229 |
| }, |
| { |
| "epoch": 0.4611837048424289, |
| "grad_norm": 4.25, |
| "learning_rate": 1.2096381255136869e-05, |
| "loss": 0.25534510612487793, |
| "step": 3600, |
| "token_acc": 0.9020348587619129 |
| }, |
| { |
| "epoch": 0.4611837048424289, |
| "eval_loss": 0.33049342036247253, |
| "eval_runtime": 103.5925, |
| "eval_samples_per_second": 96.532, |
| "eval_steps_per_second": 12.067, |
| "eval_token_acc": 0.881139576216288, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.46182423776582116, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.207566325953756e-05, |
| "loss": 0.2503223896026611, |
| "step": 3605, |
| "token_acc": 0.9033789323781988 |
| }, |
| { |
| "epoch": 0.46246477068921343, |
| "grad_norm": 4.21875, |
| "learning_rate": 1.2054935949124429e-05, |
| "loss": 0.2458160400390625, |
| "step": 3610, |
| "token_acc": 0.9040593589577671 |
| }, |
| { |
| "epoch": 0.4631053036126057, |
| "grad_norm": 3.625, |
| "learning_rate": 1.2034199416914026e-05, |
| "loss": 0.2477043390274048, |
| "step": 3615, |
| "token_acc": 0.9040510807196169 |
| }, |
| { |
| "epoch": 0.46374583653599794, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.2013453755964282e-05, |
| "loss": 0.24677414894104005, |
| "step": 3620, |
| "token_acc": 0.9042521044679473 |
| }, |
| { |
| "epoch": 0.4643863694593902, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.1992699059374103e-05, |
| "loss": 0.24625577926635742, |
| "step": 3625, |
| "token_acc": 0.9028618364170845 |
| }, |
| { |
| "epoch": 0.4650269023827825, |
| "grad_norm": 3.0, |
| "learning_rate": 1.197193542028294e-05, |
| "loss": 0.2436453342437744, |
| "step": 3630, |
| "token_acc": 0.9060716139076285 |
| }, |
| { |
| "epoch": 0.4656674353061747, |
| "grad_norm": 3.25, |
| "learning_rate": 1.1951162931870367e-05, |
| "loss": 0.24116950035095214, |
| "step": 3635, |
| "token_acc": 0.9068125053828267 |
| }, |
| { |
| "epoch": 0.466307968229567, |
| "grad_norm": 5.09375, |
| "learning_rate": 1.1930381687355685e-05, |
| "loss": 0.25627937316894533, |
| "step": 3640, |
| "token_acc": 0.899607910724288 |
| }, |
| { |
| "epoch": 0.46694850115295927, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.190959177999748e-05, |
| "loss": 0.23881065845489502, |
| "step": 3645, |
| "token_acc": 0.9060243048047398 |
| }, |
| { |
| "epoch": 0.46758903407635155, |
| "grad_norm": 2.59375, |
| "learning_rate": 1.1888793303093211e-05, |
| "loss": 0.24708976745605468, |
| "step": 3650, |
| "token_acc": 0.9032951905704207 |
| }, |
| { |
| "epoch": 0.4682295669997438, |
| "grad_norm": 3.53125, |
| "learning_rate": 1.18679863499788e-05, |
| "loss": 0.2470097541809082, |
| "step": 3655, |
| "token_acc": 0.9035228009509402 |
| }, |
| { |
| "epoch": 0.46887009992313605, |
| "grad_norm": 2.5625, |
| "learning_rate": 1.1847171014028207e-05, |
| "loss": 0.24061377048492433, |
| "step": 3660, |
| "token_acc": 0.9055094274346999 |
| }, |
| { |
| "epoch": 0.46951063284652833, |
| "grad_norm": 2.421875, |
| "learning_rate": 1.1826347388653005e-05, |
| "loss": 0.24855940341949462, |
| "step": 3665, |
| "token_acc": 0.9045786674737343 |
| }, |
| { |
| "epoch": 0.47015116576992055, |
| "grad_norm": 2.984375, |
| "learning_rate": 1.180551556730198e-05, |
| "loss": 0.2487732172012329, |
| "step": 3670, |
| "token_acc": 0.9030577088716624 |
| }, |
| { |
| "epoch": 0.47079169869331283, |
| "grad_norm": 2.453125, |
| "learning_rate": 1.1784675643460682e-05, |
| "loss": 0.24545960426330565, |
| "step": 3675, |
| "token_acc": 0.9027664242997891 |
| }, |
| { |
| "epoch": 0.4714322316167051, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.176382771065103e-05, |
| "loss": 0.23899221420288086, |
| "step": 3680, |
| "token_acc": 0.906933437269932 |
| }, |
| { |
| "epoch": 0.47207276454009733, |
| "grad_norm": 6.9375, |
| "learning_rate": 1.1742971862430888e-05, |
| "loss": 0.25500404834747314, |
| "step": 3685, |
| "token_acc": 0.9016767964136385 |
| }, |
| { |
| "epoch": 0.4727132974634896, |
| "grad_norm": 4.53125, |
| "learning_rate": 1.1722108192393635e-05, |
| "loss": 0.24634737968444825, |
| "step": 3690, |
| "token_acc": 0.9017565007749269 |
| }, |
| { |
| "epoch": 0.4733538303868819, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.1701236794167753e-05, |
| "loss": 0.2485578775405884, |
| "step": 3695, |
| "token_acc": 0.9019531419167641 |
| }, |
| { |
| "epoch": 0.47399436331027417, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.168035776141641e-05, |
| "loss": 0.24262137413024903, |
| "step": 3700, |
| "token_acc": 0.9076976212062341 |
| }, |
| { |
| "epoch": 0.47399436331027417, |
| "eval_loss": 0.3315788805484772, |
| "eval_runtime": 103.5836, |
| "eval_samples_per_second": 96.54, |
| "eval_steps_per_second": 12.068, |
| "eval_token_acc": 0.8810066939444441, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.4746348962336664, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.165947118783703e-05, |
| "loss": 0.24236671924591063, |
| "step": 3705, |
| "token_acc": 0.906829352418729 |
| }, |
| { |
| "epoch": 0.47527542915705867, |
| "grad_norm": 3.375, |
| "learning_rate": 1.1638577167160874e-05, |
| "loss": 0.24883639812469482, |
| "step": 3710, |
| "token_acc": 0.9045120220224526 |
| }, |
| { |
| "epoch": 0.47591596208045095, |
| "grad_norm": 3.375, |
| "learning_rate": 1.1617675793152631e-05, |
| "loss": 0.2473759651184082, |
| "step": 3715, |
| "token_acc": 0.9040358358099668 |
| }, |
| { |
| "epoch": 0.4765564950038432, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.1596767159609988e-05, |
| "loss": 0.2524222135543823, |
| "step": 3720, |
| "token_acc": 0.9003019844693702 |
| }, |
| { |
| "epoch": 0.47719702792723545, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.1575851360363201e-05, |
| "loss": 0.2499473810195923, |
| "step": 3725, |
| "token_acc": 0.9026130307718755 |
| }, |
| { |
| "epoch": 0.47783756085062773, |
| "grad_norm": 3.8125, |
| "learning_rate": 1.1554928489274697e-05, |
| "loss": 0.24831125736236573, |
| "step": 3730, |
| "token_acc": 0.9022294772112204 |
| }, |
| { |
| "epoch": 0.47847809377402, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.1533998640238626e-05, |
| "loss": 0.2451251983642578, |
| "step": 3735, |
| "token_acc": 0.9032967032967033 |
| }, |
| { |
| "epoch": 0.47911862669741223, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.1513061907180462e-05, |
| "loss": 0.2445608615875244, |
| "step": 3740, |
| "token_acc": 0.9054381711118809 |
| }, |
| { |
| "epoch": 0.4797591596208045, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.1492118384056565e-05, |
| "loss": 0.2464083194732666, |
| "step": 3745, |
| "token_acc": 0.9023748976337227 |
| }, |
| { |
| "epoch": 0.4803996925441968, |
| "grad_norm": 3.5625, |
| "learning_rate": 1.1471168164853769e-05, |
| "loss": 0.24423737525939943, |
| "step": 3750, |
| "token_acc": 0.9049323100801931 |
| }, |
| { |
| "epoch": 0.481040225467589, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.1450211343588962e-05, |
| "loss": 0.24666328430175782, |
| "step": 3755, |
| "token_acc": 0.905542815109688 |
| }, |
| { |
| "epoch": 0.4816807583909813, |
| "grad_norm": 2.28125, |
| "learning_rate": 1.142924801430865e-05, |
| "loss": 0.24257378578186034, |
| "step": 3760, |
| "token_acc": 0.9064537017051586 |
| }, |
| { |
| "epoch": 0.48232129131437357, |
| "grad_norm": 4.25, |
| "learning_rate": 1.1408278271088555e-05, |
| "loss": 0.24482569694519044, |
| "step": 3765, |
| "token_acc": 0.9051044583243593 |
| }, |
| { |
| "epoch": 0.48296182423776585, |
| "grad_norm": 4.75, |
| "learning_rate": 1.1387302208033173e-05, |
| "loss": 0.24971480369567872, |
| "step": 3770, |
| "token_acc": 0.9026384883570783 |
| }, |
| { |
| "epoch": 0.48360235716115807, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.1366319919275368e-05, |
| "loss": 0.24797563552856444, |
| "step": 3775, |
| "token_acc": 0.9017930438539642 |
| }, |
| { |
| "epoch": 0.48424289008455035, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.1345331498975938e-05, |
| "loss": 0.24426445960998536, |
| "step": 3780, |
| "token_acc": 0.9049935428325441 |
| }, |
| { |
| "epoch": 0.4848834230079426, |
| "grad_norm": 3.765625, |
| "learning_rate": 1.1324337041323204e-05, |
| "loss": 0.25280845165252686, |
| "step": 3785, |
| "token_acc": 0.9023151734228418 |
| }, |
| { |
| "epoch": 0.48552395593133485, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.1303336640532567e-05, |
| "loss": 0.24581615924835204, |
| "step": 3790, |
| "token_acc": 0.9035905403072674 |
| }, |
| { |
| "epoch": 0.48616448885472713, |
| "grad_norm": 2.4375, |
| "learning_rate": 1.1282330390846117e-05, |
| "loss": 0.24577610492706298, |
| "step": 3795, |
| "token_acc": 0.9040096680909836 |
| }, |
| { |
| "epoch": 0.4868050217781194, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.1261318386532177e-05, |
| "loss": 0.2388829231262207, |
| "step": 3800, |
| "token_acc": 0.9051091740743937 |
| }, |
| { |
| "epoch": 0.4868050217781194, |
| "eval_loss": 0.3326900601387024, |
| "eval_runtime": 103.7176, |
| "eval_samples_per_second": 96.416, |
| "eval_steps_per_second": 12.052, |
| "eval_token_acc": 0.8808295175819856, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.4874455547015117, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.12403007218849e-05, |
| "loss": 0.2488114833831787, |
| "step": 3805, |
| "token_acc": 0.9019819043515725 |
| }, |
| { |
| "epoch": 0.4880860876249039, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.121927749122384e-05, |
| "loss": 0.24407386779785156, |
| "step": 3810, |
| "token_acc": 0.9046487514041303 |
| }, |
| { |
| "epoch": 0.4887266205482962, |
| "grad_norm": 3.4375, |
| "learning_rate": 1.1198248788893531e-05, |
| "loss": 0.23504881858825682, |
| "step": 3815, |
| "token_acc": 0.908643467748899 |
| }, |
| { |
| "epoch": 0.48936715347168847, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.117721470926306e-05, |
| "loss": 0.240411376953125, |
| "step": 3820, |
| "token_acc": 0.9068462401795735 |
| }, |
| { |
| "epoch": 0.4900076863950807, |
| "grad_norm": 6.03125, |
| "learning_rate": 1.1156175346725644e-05, |
| "loss": 0.23961906433105468, |
| "step": 3825, |
| "token_acc": 0.9053354053354054 |
| }, |
| { |
| "epoch": 0.49064821931847297, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.113513079569821e-05, |
| "loss": 0.24782803058624267, |
| "step": 3830, |
| "token_acc": 0.9041679212009118 |
| }, |
| { |
| "epoch": 0.49128875224186525, |
| "grad_norm": 2.625, |
| "learning_rate": 1.1114081150620968e-05, |
| "loss": 0.24206724166870117, |
| "step": 3835, |
| "token_acc": 0.9066114275850164 |
| }, |
| { |
| "epoch": 0.49192928516525747, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.1093026505956989e-05, |
| "loss": 0.24786317348480225, |
| "step": 3840, |
| "token_acc": 0.9004302925989673 |
| }, |
| { |
| "epoch": 0.49256981808864975, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.107196695619178e-05, |
| "loss": 0.2436119556427002, |
| "step": 3845, |
| "token_acc": 0.9046715265496269 |
| }, |
| { |
| "epoch": 0.493210351012042, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.105090259583286e-05, |
| "loss": 0.25207488536834716, |
| "step": 3850, |
| "token_acc": 0.901183050118305 |
| }, |
| { |
| "epoch": 0.4938508839354343, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.1029833519409337e-05, |
| "loss": 0.24722940921783448, |
| "step": 3855, |
| "token_acc": 0.9046530682651257 |
| }, |
| { |
| "epoch": 0.4944914168588265, |
| "grad_norm": 2.6875, |
| "learning_rate": 1.100875982147148e-05, |
| "loss": 0.24187374114990234, |
| "step": 3860, |
| "token_acc": 0.906386286109072 |
| }, |
| { |
| "epoch": 0.4951319497822188, |
| "grad_norm": 7.78125, |
| "learning_rate": 1.09876815965903e-05, |
| "loss": 0.25092015266418455, |
| "step": 3865, |
| "token_acc": 0.9019010427841909 |
| }, |
| { |
| "epoch": 0.4957724827056111, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.096659893935713e-05, |
| "loss": 0.2399623155593872, |
| "step": 3870, |
| "token_acc": 0.9047742492979045 |
| }, |
| { |
| "epoch": 0.4964130156290033, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.0945511944383178e-05, |
| "loss": 0.24817066192626952, |
| "step": 3875, |
| "token_acc": 0.9023097474791002 |
| }, |
| { |
| "epoch": 0.4970535485523956, |
| "grad_norm": 32.75, |
| "learning_rate": 1.0924420706299131e-05, |
| "loss": 0.23887484073638915, |
| "step": 3880, |
| "token_acc": 0.9074281287657083 |
| }, |
| { |
| "epoch": 0.49769408147578786, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.0903325319754717e-05, |
| "loss": 0.24414317607879638, |
| "step": 3885, |
| "token_acc": 0.9059984419631265 |
| }, |
| { |
| "epoch": 0.49833461439918014, |
| "grad_norm": 6.90625, |
| "learning_rate": 1.0882225879418272e-05, |
| "loss": 0.2399660110473633, |
| "step": 3890, |
| "token_acc": 0.9039769303606783 |
| }, |
| { |
| "epoch": 0.49897514732257237, |
| "grad_norm": 6.09375, |
| "learning_rate": 1.086112247997633e-05, |
| "loss": 0.24311909675598145, |
| "step": 3895, |
| "token_acc": 0.9053466029037956 |
| }, |
| { |
| "epoch": 0.49961568024596464, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.0840015216133195e-05, |
| "loss": 0.24150404930114747, |
| "step": 3900, |
| "token_acc": 0.9062136674848211 |
| }, |
| { |
| "epoch": 0.49961568024596464, |
| "eval_loss": 0.3318501114845276, |
| "eval_runtime": 103.7178, |
| "eval_samples_per_second": 96.415, |
| "eval_steps_per_second": 12.052, |
| "eval_token_acc": 0.880832285962649, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5002562131693569, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.0818904182610505e-05, |
| "loss": 0.23810501098632814, |
| "step": 3905, |
| "token_acc": 0.9054380664652568 |
| }, |
| { |
| "epoch": 0.5008967460927491, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.0797789474146825e-05, |
| "loss": 0.24517326354980468, |
| "step": 3910, |
| "token_acc": 0.9039823773324119 |
| }, |
| { |
| "epoch": 0.5015372790161414, |
| "grad_norm": 2.828125, |
| "learning_rate": 1.07766711854972e-05, |
| "loss": 0.24150619506835938, |
| "step": 3915, |
| "token_acc": 0.9048644922228446 |
| }, |
| { |
| "epoch": 0.5021778119395337, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.0755549411432754e-05, |
| "loss": 0.24119091033935547, |
| "step": 3920, |
| "token_acc": 0.9053400155534433 |
| }, |
| { |
| "epoch": 0.502818344862926, |
| "grad_norm": 3.578125, |
| "learning_rate": 1.0734424246740238e-05, |
| "loss": 0.24077696800231935, |
| "step": 3925, |
| "token_acc": 0.9054638194864701 |
| }, |
| { |
| "epoch": 0.5034588777863183, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.0713295786221634e-05, |
| "loss": 0.24392437934875488, |
| "step": 3930, |
| "token_acc": 0.9047639614736751 |
| }, |
| { |
| "epoch": 0.5040994107097104, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.0692164124693703e-05, |
| "loss": 0.23980698585510254, |
| "step": 3935, |
| "token_acc": 0.905363456066224 |
| }, |
| { |
| "epoch": 0.5047399436331027, |
| "grad_norm": 2.765625, |
| "learning_rate": 1.067102935698758e-05, |
| "loss": 0.23611803054809571, |
| "step": 3940, |
| "token_acc": 0.9073914169760815 |
| }, |
| { |
| "epoch": 0.505380476556495, |
| "grad_norm": 3.40625, |
| "learning_rate": 1.064989157794833e-05, |
| "loss": 0.2380206823348999, |
| "step": 3945, |
| "token_acc": 0.9074281287657083 |
| }, |
| { |
| "epoch": 0.5060210094798873, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.0628750882434537e-05, |
| "loss": 0.2411219596862793, |
| "step": 3950, |
| "token_acc": 0.9061099620820406 |
| }, |
| { |
| "epoch": 0.5066615424032795, |
| "grad_norm": 3.171875, |
| "learning_rate": 1.0607607365317874e-05, |
| "loss": 0.23887009620666505, |
| "step": 3955, |
| "token_acc": 0.906588215083319 |
| }, |
| { |
| "epoch": 0.5073020753266718, |
| "grad_norm": 4.625, |
| "learning_rate": 1.0586461121482672e-05, |
| "loss": 0.2420198917388916, |
| "step": 3960, |
| "token_acc": 0.9068667497957604 |
| }, |
| { |
| "epoch": 0.5079426082500641, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.0565312245825505e-05, |
| "loss": 0.2432565689086914, |
| "step": 3965, |
| "token_acc": 0.905863065706027 |
| }, |
| { |
| "epoch": 0.5085831411734563, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.0544160833254752e-05, |
| "loss": 0.2371816873550415, |
| "step": 3970, |
| "token_acc": 0.9089968976215098 |
| }, |
| { |
| "epoch": 0.5092236740968485, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.052300697869018e-05, |
| "loss": 0.2434596061706543, |
| "step": 3975, |
| "token_acc": 0.9051765010128874 |
| }, |
| { |
| "epoch": 0.5098642070202408, |
| "grad_norm": 25.25, |
| "learning_rate": 1.0501850777062512e-05, |
| "loss": 0.24385199546813965, |
| "step": 3980, |
| "token_acc": 0.9046000258186669 |
| }, |
| { |
| "epoch": 0.5105047399436331, |
| "grad_norm": 3.578125, |
| "learning_rate": 1.0480692323313007e-05, |
| "loss": 0.23917775154113768, |
| "step": 3985, |
| "token_acc": 0.9061435654235827 |
| }, |
| { |
| "epoch": 0.5111452728670254, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.0459531712393025e-05, |
| "loss": 0.2387022018432617, |
| "step": 3990, |
| "token_acc": 0.9047331145275522 |
| }, |
| { |
| "epoch": 0.5117858057904177, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.0438369039263614e-05, |
| "loss": 0.24243345260620117, |
| "step": 3995, |
| "token_acc": 0.9045505472722571 |
| }, |
| { |
| "epoch": 0.5124263387138099, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.0417204398895072e-05, |
| "loss": 0.23408794403076172, |
| "step": 4000, |
| "token_acc": 0.9087918271936279 |
| }, |
| { |
| "epoch": 0.5124263387138099, |
| "eval_loss": 0.3293675482273102, |
| "eval_runtime": 102.8807, |
| "eval_samples_per_second": 97.2, |
| "eval_steps_per_second": 12.15, |
| "eval_token_acc": 0.8810675983190392, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5130668716372021, |
| "grad_norm": 6.1875, |
| "learning_rate": 1.039603788626653e-05, |
| "loss": 0.2410355567932129, |
| "step": 4005, |
| "token_acc": 0.9065908013276435 |
| }, |
| { |
| "epoch": 0.5137074045605944, |
| "grad_norm": 3.0, |
| "learning_rate": 1.0374869596365508e-05, |
| "loss": 0.2497018337249756, |
| "step": 4010, |
| "token_acc": 0.900746973469563 |
| }, |
| { |
| "epoch": 0.5143479374839867, |
| "grad_norm": 6.78125, |
| "learning_rate": 1.035369962418752e-05, |
| "loss": 0.24223339557647705, |
| "step": 4015, |
| "token_acc": 0.9042599153201417 |
| }, |
| { |
| "epoch": 0.5149884704073789, |
| "grad_norm": 4.375, |
| "learning_rate": 1.0332528064735614e-05, |
| "loss": 0.24308998584747316, |
| "step": 4020, |
| "token_acc": 0.905254091300603 |
| }, |
| { |
| "epoch": 0.5156290033307712, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.031135501301997e-05, |
| "loss": 0.24722557067871093, |
| "step": 4025, |
| "token_acc": 0.902273805928291 |
| }, |
| { |
| "epoch": 0.5162695362541635, |
| "grad_norm": 3.4375, |
| "learning_rate": 1.0290180564057461e-05, |
| "loss": 0.23717832565307617, |
| "step": 4030, |
| "token_acc": 0.9058447172747709 |
| }, |
| { |
| "epoch": 0.5169100691775558, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.0269004812871236e-05, |
| "loss": 0.23974413871765138, |
| "step": 4035, |
| "token_acc": 0.906494960806271 |
| }, |
| { |
| "epoch": 0.5175506021009479, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.024782785449028e-05, |
| "loss": 0.25091626644134524, |
| "step": 4040, |
| "token_acc": 0.9013481980814104 |
| }, |
| { |
| "epoch": 0.5181911350243402, |
| "grad_norm": 3.0, |
| "learning_rate": 1.0226649783948997e-05, |
| "loss": 0.2415644645690918, |
| "step": 4045, |
| "token_acc": 0.9057377049180327 |
| }, |
| { |
| "epoch": 0.5188316679477325, |
| "grad_norm": 4.875, |
| "learning_rate": 1.0205470696286787e-05, |
| "loss": 0.24452197551727295, |
| "step": 4050, |
| "token_acc": 0.9055036791600327 |
| }, |
| { |
| "epoch": 0.5194722008711248, |
| "grad_norm": 4.25, |
| "learning_rate": 1.0184290686547611e-05, |
| "loss": 0.23984365463256835, |
| "step": 4055, |
| "token_acc": 0.9044206527370057 |
| }, |
| { |
| "epoch": 0.5201127337945171, |
| "grad_norm": 6.65625, |
| "learning_rate": 1.0163109849779567e-05, |
| "loss": 0.24106016159057617, |
| "step": 4060, |
| "token_acc": 0.9063618718999353 |
| }, |
| { |
| "epoch": 0.5207532667179093, |
| "grad_norm": 3.46875, |
| "learning_rate": 1.0141928281034468e-05, |
| "loss": 0.2418668746948242, |
| "step": 4065, |
| "token_acc": 0.9044542086671836 |
| }, |
| { |
| "epoch": 0.5213937996413016, |
| "grad_norm": 2.9375, |
| "learning_rate": 1.0120746075367406e-05, |
| "loss": 0.2402285099029541, |
| "step": 4070, |
| "token_acc": 0.9048873154304464 |
| }, |
| { |
| "epoch": 0.5220343325646938, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.0099563327836338e-05, |
| "loss": 0.23992910385131835, |
| "step": 4075, |
| "token_acc": 0.906600034464932 |
| }, |
| { |
| "epoch": 0.5226748654880861, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.0078380133501646e-05, |
| "loss": 0.24107756614685058, |
| "step": 4080, |
| "token_acc": 0.9048888506686159 |
| }, |
| { |
| "epoch": 0.5233153984114783, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.0057196587425721e-05, |
| "loss": 0.24715356826782225, |
| "step": 4085, |
| "token_acc": 0.904106634457769 |
| }, |
| { |
| "epoch": 0.5239559313348706, |
| "grad_norm": 3.703125, |
| "learning_rate": 1.0036012784672538e-05, |
| "loss": 0.24057602882385254, |
| "step": 4090, |
| "token_acc": 0.9058342303552207 |
| }, |
| { |
| "epoch": 0.5245964642582629, |
| "grad_norm": 6.59375, |
| "learning_rate": 1.001482882030721e-05, |
| "loss": 0.2389677047729492, |
| "step": 4095, |
| "token_acc": 0.9081055404413352 |
| }, |
| { |
| "epoch": 0.5252369971816552, |
| "grad_norm": 2.53125, |
| "learning_rate": 9.99364478939559e-06, |
| "loss": 0.24011881351470948, |
| "step": 4100, |
| "token_acc": 0.9068454177084231 |
| }, |
| { |
| "epoch": 0.5252369971816552, |
| "eval_loss": 0.33330872654914856, |
| "eval_runtime": 102.9347, |
| "eval_samples_per_second": 97.149, |
| "eval_steps_per_second": 12.144, |
| "eval_token_acc": 0.8811091240289904, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.5258775301050475, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.972460787003814e-06, |
| "loss": 0.24392313957214357, |
| "step": 4105, |
| "token_acc": 0.9049197262514527 |
| }, |
| { |
| "epoch": 0.5265180630284396, |
| "grad_norm": 3.078125, |
| "learning_rate": 9.95127690819791e-06, |
| "loss": 0.24266266822814941, |
| "step": 4110, |
| "token_acc": 0.9032661151327128 |
| }, |
| { |
| "epoch": 0.5271585959518319, |
| "grad_norm": 2.75, |
| "learning_rate": 9.93009324804333e-06, |
| "loss": 0.24082543849945068, |
| "step": 4115, |
| "token_acc": 0.9040199707325471 |
| }, |
| { |
| "epoch": 0.5277991288752242, |
| "grad_norm": 3.921875, |
| "learning_rate": 9.908909901604563e-06, |
| "loss": 0.24310965538024903, |
| "step": 4120, |
| "token_acc": 0.9044476079547905 |
| }, |
| { |
| "epoch": 0.5284396617986165, |
| "grad_norm": 2.453125, |
| "learning_rate": 9.887726963944676e-06, |
| "loss": 0.2375312328338623, |
| "step": 4125, |
| "token_acc": 0.9068424681144432 |
| }, |
| { |
| "epoch": 0.5290801947220087, |
| "grad_norm": 2.90625, |
| "learning_rate": 9.86654453012491e-06, |
| "loss": 0.23663816452026368, |
| "step": 4130, |
| "token_acc": 0.9086728274545534 |
| }, |
| { |
| "epoch": 0.529720727645401, |
| "grad_norm": 2.65625, |
| "learning_rate": 9.845362695204245e-06, |
| "loss": 0.24443821907043456, |
| "step": 4135, |
| "token_acc": 0.9042672413793104 |
| }, |
| { |
| "epoch": 0.5303612605687933, |
| "grad_norm": 4.65625, |
| "learning_rate": 9.824181554238965e-06, |
| "loss": 0.23482506275177, |
| "step": 4140, |
| "token_acc": 0.9089650996842971 |
| }, |
| { |
| "epoch": 0.5310017934921855, |
| "grad_norm": 17.5, |
| "learning_rate": 9.803001202282254e-06, |
| "loss": 0.24258599281311036, |
| "step": 4145, |
| "token_acc": 0.9058409510321446 |
| }, |
| { |
| "epoch": 0.5316423264155777, |
| "grad_norm": 2.890625, |
| "learning_rate": 9.781821734383741e-06, |
| "loss": 0.2373753547668457, |
| "step": 4150, |
| "token_acc": 0.9077843280691941 |
| }, |
| { |
| "epoch": 0.53228285933897, |
| "grad_norm": 3.46875, |
| "learning_rate": 9.760643245589096e-06, |
| "loss": 0.23887972831726073, |
| "step": 4155, |
| "token_acc": 0.9065009065009065 |
| }, |
| { |
| "epoch": 0.5329233922623623, |
| "grad_norm": 3.875, |
| "learning_rate": 9.73946583093959e-06, |
| "loss": 0.2412872791290283, |
| "step": 4160, |
| "token_acc": 0.9053228996474332 |
| }, |
| { |
| "epoch": 0.5335639251857546, |
| "grad_norm": 2.90625, |
| "learning_rate": 9.718289585471683e-06, |
| "loss": 0.23278658390045165, |
| "step": 4165, |
| "token_acc": 0.9080731969860064 |
| }, |
| { |
| "epoch": 0.5342044581091469, |
| "grad_norm": 3.015625, |
| "learning_rate": 9.697114604216573e-06, |
| "loss": 0.24164493083953859, |
| "step": 4170, |
| "token_acc": 0.9067511639937921 |
| }, |
| { |
| "epoch": 0.534844991032539, |
| "grad_norm": 3.109375, |
| "learning_rate": 9.6759409821998e-06, |
| "loss": 0.23885555267333985, |
| "step": 4175, |
| "token_acc": 0.9060220159723721 |
| }, |
| { |
| "epoch": 0.5354855239559313, |
| "grad_norm": 2.875, |
| "learning_rate": 9.65476881444079e-06, |
| "loss": 0.23392415046691895, |
| "step": 4180, |
| "token_acc": 0.9085397540273688 |
| }, |
| { |
| "epoch": 0.5361260568793236, |
| "grad_norm": 3.40625, |
| "learning_rate": 9.633598195952461e-06, |
| "loss": 0.23441019058227539, |
| "step": 4185, |
| "token_acc": 0.908675799086758 |
| }, |
| { |
| "epoch": 0.5367665898027159, |
| "grad_norm": 2.84375, |
| "learning_rate": 9.612429221740761e-06, |
| "loss": 0.23697328567504883, |
| "step": 4190, |
| "token_acc": 0.9047413793103448 |
| }, |
| { |
| "epoch": 0.5374071227261081, |
| "grad_norm": 3.15625, |
| "learning_rate": 9.591261986804264e-06, |
| "loss": 0.24399030208587646, |
| "step": 4195, |
| "token_acc": 0.905359667537809 |
| }, |
| { |
| "epoch": 0.5380476556495004, |
| "grad_norm": 3.25, |
| "learning_rate": 9.570096586133748e-06, |
| "loss": 0.23835985660552977, |
| "step": 4200, |
| "token_acc": 0.9053366669540478 |
| }, |
| { |
| "epoch": 0.5380476556495004, |
| "eval_loss": 0.32872986793518066, |
| "eval_runtime": 104.0894, |
| "eval_samples_per_second": 96.071, |
| "eval_steps_per_second": 12.009, |
| "eval_token_acc": 0.8815603700771271, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.5386881885728927, |
| "grad_norm": 2.625, |
| "learning_rate": 9.548933114711742e-06, |
| "loss": 0.2371370553970337, |
| "step": 4205, |
| "token_acc": 0.908296379213846 |
| }, |
| { |
| "epoch": 0.5393287214962849, |
| "grad_norm": 3.03125, |
| "learning_rate": 9.527771667512138e-06, |
| "loss": 0.2396193265914917, |
| "step": 4210, |
| "token_acc": 0.9050984786450028 |
| }, |
| { |
| "epoch": 0.5399692544196771, |
| "grad_norm": 3.171875, |
| "learning_rate": 9.506612339499725e-06, |
| "loss": 0.23920049667358398, |
| "step": 4215, |
| "token_acc": 0.9055294573977503 |
| }, |
| { |
| "epoch": 0.5406097873430694, |
| "grad_norm": 2.90625, |
| "learning_rate": 9.485455225629798e-06, |
| "loss": 0.23707218170166017, |
| "step": 4220, |
| "token_acc": 0.9071637426900585 |
| }, |
| { |
| "epoch": 0.5412503202664617, |
| "grad_norm": 3.046875, |
| "learning_rate": 9.464300420847698e-06, |
| "loss": 0.2316804885864258, |
| "step": 4225, |
| "token_acc": 0.9091498185588388 |
| }, |
| { |
| "epoch": 0.541890853189854, |
| "grad_norm": 3.28125, |
| "learning_rate": 9.443148020088426e-06, |
| "loss": 0.24402837753295897, |
| "step": 4230, |
| "token_acc": 0.9042589878437797 |
| }, |
| { |
| "epoch": 0.5425313861132463, |
| "grad_norm": 2.625, |
| "learning_rate": 9.421998118276169e-06, |
| "loss": 0.2375174045562744, |
| "step": 4235, |
| "token_acc": 0.9062986675895484 |
| }, |
| { |
| "epoch": 0.5431719190366385, |
| "grad_norm": 4.625, |
| "learning_rate": 9.400850810323925e-06, |
| "loss": 0.24248833656311036, |
| "step": 4240, |
| "token_acc": 0.9057444415606887 |
| }, |
| { |
| "epoch": 0.5438124519600307, |
| "grad_norm": 3.875, |
| "learning_rate": 9.379706191133033e-06, |
| "loss": 0.24248261451721193, |
| "step": 4245, |
| "token_acc": 0.9062149331031506 |
| }, |
| { |
| "epoch": 0.544452984883423, |
| "grad_norm": 3.96875, |
| "learning_rate": 9.358564355592775e-06, |
| "loss": 0.23543434143066405, |
| "step": 4250, |
| "token_acc": 0.9084744298548721 |
| }, |
| { |
| "epoch": 0.5450935178068153, |
| "grad_norm": 3.203125, |
| "learning_rate": 9.337425398579932e-06, |
| "loss": 0.23771212100982667, |
| "step": 4255, |
| "token_acc": 0.9078099493083598 |
| }, |
| { |
| "epoch": 0.5457340507302075, |
| "grad_norm": 3.515625, |
| "learning_rate": 9.316289414958379e-06, |
| "loss": 0.2383446216583252, |
| "step": 4260, |
| "token_acc": 0.9065444799861675 |
| }, |
| { |
| "epoch": 0.5463745836535998, |
| "grad_norm": 4.625, |
| "learning_rate": 9.295156499578647e-06, |
| "loss": 0.24645309448242186, |
| "step": 4265, |
| "token_acc": 0.9030008180135187 |
| }, |
| { |
| "epoch": 0.5470151165769921, |
| "grad_norm": 3.03125, |
| "learning_rate": 9.274026747277487e-06, |
| "loss": 0.23401763439178466, |
| "step": 4270, |
| "token_acc": 0.9084355033672941 |
| }, |
| { |
| "epoch": 0.5476556495003844, |
| "grad_norm": 3.15625, |
| "learning_rate": 9.252900252877464e-06, |
| "loss": 0.24168498516082765, |
| "step": 4275, |
| "token_acc": 0.9051816813588476 |
| }, |
| { |
| "epoch": 0.5482961824237765, |
| "grad_norm": 3.296875, |
| "learning_rate": 9.231777111186514e-06, |
| "loss": 0.23365185260772706, |
| "step": 4280, |
| "token_acc": 0.9065501055079453 |
| }, |
| { |
| "epoch": 0.5489367153471688, |
| "grad_norm": 3.140625, |
| "learning_rate": 9.210657416997543e-06, |
| "loss": 0.2374626636505127, |
| "step": 4285, |
| "token_acc": 0.9064794816414686 |
| }, |
| { |
| "epoch": 0.5495772482705611, |
| "grad_norm": 3.296875, |
| "learning_rate": 9.189541265087966e-06, |
| "loss": 0.23222618103027343, |
| "step": 4290, |
| "token_acc": 0.9083916688272405 |
| }, |
| { |
| "epoch": 0.5502177811939534, |
| "grad_norm": 3.671875, |
| "learning_rate": 9.168428750219323e-06, |
| "loss": 0.24052739143371582, |
| "step": 4295, |
| "token_acc": 0.9065121508259102 |
| }, |
| { |
| "epoch": 0.5508583141173456, |
| "grad_norm": 4.28125, |
| "learning_rate": 9.14731996713681e-06, |
| "loss": 0.2399202346801758, |
| "step": 4300, |
| "token_acc": 0.9062012818292049 |
| }, |
| { |
| "epoch": 0.5508583141173456, |
| "eval_loss": 0.32973331212997437, |
| "eval_runtime": 104.5664, |
| "eval_samples_per_second": 95.633, |
| "eval_steps_per_second": 11.954, |
| "eval_token_acc": 0.8813555099080344, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.5514988470407379, |
| "grad_norm": 4.5625, |
| "learning_rate": 9.126215010568896e-06, |
| "loss": 0.2345888137817383, |
| "step": 4305, |
| "token_acc": 0.9077134986225895 |
| }, |
| { |
| "epoch": 0.5521393799641302, |
| "grad_norm": 3.6875, |
| "learning_rate": 9.105113975226865e-06, |
| "loss": 0.24162061214447023, |
| "step": 4310, |
| "token_acc": 0.9073946393174179 |
| }, |
| { |
| "epoch": 0.5527799128875224, |
| "grad_norm": 2.5625, |
| "learning_rate": 9.08401695580441e-06, |
| "loss": 0.2340158700942993, |
| "step": 4315, |
| "token_acc": 0.9100293711126468 |
| }, |
| { |
| "epoch": 0.5534204458109147, |
| "grad_norm": 3.546875, |
| "learning_rate": 9.062924046977194e-06, |
| "loss": 0.23286752700805663, |
| "step": 4320, |
| "token_acc": 0.9091850828729282 |
| }, |
| { |
| "epoch": 0.5540609787343069, |
| "grad_norm": 3.515625, |
| "learning_rate": 9.041835343402445e-06, |
| "loss": 0.23487985134124756, |
| "step": 4325, |
| "token_acc": 0.9075176937683411 |
| }, |
| { |
| "epoch": 0.5547015116576992, |
| "grad_norm": 2.734375, |
| "learning_rate": 9.020750939718518e-06, |
| "loss": 0.2381136178970337, |
| "step": 4330, |
| "token_acc": 0.9072947672662957 |
| }, |
| { |
| "epoch": 0.5553420445810915, |
| "grad_norm": 3.234375, |
| "learning_rate": 8.999670930544459e-06, |
| "loss": 0.2352077007293701, |
| "step": 4335, |
| "token_acc": 0.9067487855655795 |
| }, |
| { |
| "epoch": 0.5559825775044838, |
| "grad_norm": 3.625, |
| "learning_rate": 8.978595410479609e-06, |
| "loss": 0.2357017993927002, |
| "step": 4340, |
| "token_acc": 0.9078482104355325 |
| }, |
| { |
| "epoch": 0.556623110427876, |
| "grad_norm": 2.65625, |
| "learning_rate": 8.957524474103146e-06, |
| "loss": 0.2459559679031372, |
| "step": 4345, |
| "token_acc": 0.9045916609235011 |
| }, |
| { |
| "epoch": 0.5572636433512682, |
| "grad_norm": 3.21875, |
| "learning_rate": 8.936458215973698e-06, |
| "loss": 0.23383736610412598, |
| "step": 4350, |
| "token_acc": 0.9094742276912486 |
| }, |
| { |
| "epoch": 0.5579041762746605, |
| "grad_norm": 2.734375, |
| "learning_rate": 8.915396730628882e-06, |
| "loss": 0.24825828075408934, |
| "step": 4355, |
| "token_acc": 0.9029686759446767 |
| }, |
| { |
| "epoch": 0.5585447091980528, |
| "grad_norm": 3.25, |
| "learning_rate": 8.894340112584909e-06, |
| "loss": 0.23654026985168458, |
| "step": 4360, |
| "token_acc": 0.9066741350338231 |
| }, |
| { |
| "epoch": 0.559185242121445, |
| "grad_norm": 3.5, |
| "learning_rate": 8.873288456336138e-06, |
| "loss": 0.23700532913208008, |
| "step": 4365, |
| "token_acc": 0.9077705469120373 |
| }, |
| { |
| "epoch": 0.5598257750448373, |
| "grad_norm": 3.765625, |
| "learning_rate": 8.852241856354669e-06, |
| "loss": 0.23611578941345215, |
| "step": 4370, |
| "token_acc": 0.9087608592298051 |
| }, |
| { |
| "epoch": 0.5604663079682296, |
| "grad_norm": 4.5625, |
| "learning_rate": 8.831200407089897e-06, |
| "loss": 0.24070956707000732, |
| "step": 4375, |
| "token_acc": 0.9068157385508493 |
| }, |
| { |
| "epoch": 0.5611068408916219, |
| "grad_norm": 2.453125, |
| "learning_rate": 8.810164202968123e-06, |
| "loss": 0.2372671604156494, |
| "step": 4380, |
| "token_acc": 0.9063080980587142 |
| }, |
| { |
| "epoch": 0.561747373815014, |
| "grad_norm": 2.875, |
| "learning_rate": 8.789133338392099e-06, |
| "loss": 0.2328266382217407, |
| "step": 4385, |
| "token_acc": 0.9102890972732379 |
| }, |
| { |
| "epoch": 0.5623879067384063, |
| "grad_norm": 2.984375, |
| "learning_rate": 8.76810790774061e-06, |
| "loss": 0.2348611831665039, |
| "step": 4390, |
| "token_acc": 0.9081421424874936 |
| }, |
| { |
| "epoch": 0.5630284396617986, |
| "grad_norm": 3.59375, |
| "learning_rate": 8.747088005368068e-06, |
| "loss": 0.2405010223388672, |
| "step": 4395, |
| "token_acc": 0.9055416702576919 |
| }, |
| { |
| "epoch": 0.5636689725851909, |
| "grad_norm": 2.921875, |
| "learning_rate": 8.726073725604061e-06, |
| "loss": 0.2389441728591919, |
| "step": 4400, |
| "token_acc": 0.9059644888812274 |
| }, |
| { |
| "epoch": 0.5636689725851909, |
| "eval_loss": 0.328523188829422, |
| "eval_runtime": 102.5856, |
| "eval_samples_per_second": 97.48, |
| "eval_steps_per_second": 12.185, |
| "eval_token_acc": 0.8815963590257515, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.5643095055085832, |
| "grad_norm": 2.90625, |
| "learning_rate": 8.705065162752961e-06, |
| "loss": 0.24200544357299805, |
| "step": 4405, |
| "token_acc": 0.9059365448361961 |
| }, |
| { |
| "epoch": 0.5649500384319754, |
| "grad_norm": 5.03125, |
| "learning_rate": 8.68406241109347e-06, |
| "loss": 0.24370207786560058, |
| "step": 4410, |
| "token_acc": 0.904322319040635 |
| }, |
| { |
| "epoch": 0.5655905713553676, |
| "grad_norm": 2.65625, |
| "learning_rate": 8.663065564878223e-06, |
| "loss": 0.2380732536315918, |
| "step": 4415, |
| "token_acc": 0.9064141196728368 |
| }, |
| { |
| "epoch": 0.5662311042787599, |
| "grad_norm": 3.421875, |
| "learning_rate": 8.642074718333345e-06, |
| "loss": 0.2384279727935791, |
| "step": 4420, |
| "token_acc": 0.9063185513355413 |
| }, |
| { |
| "epoch": 0.5668716372021522, |
| "grad_norm": 4.1875, |
| "learning_rate": 8.621089965658046e-06, |
| "loss": 0.23173861503601073, |
| "step": 4425, |
| "token_acc": 0.9093538222471619 |
| }, |
| { |
| "epoch": 0.5675121701255444, |
| "grad_norm": 2.859375, |
| "learning_rate": 8.600111401024177e-06, |
| "loss": 0.2245471954345703, |
| "step": 4430, |
| "token_acc": 0.9126788570440496 |
| }, |
| { |
| "epoch": 0.5681527030489367, |
| "grad_norm": 2.75, |
| "learning_rate": 8.57913911857583e-06, |
| "loss": 0.23515353202819825, |
| "step": 4435, |
| "token_acc": 0.9106183959812922 |
| }, |
| { |
| "epoch": 0.568793235972329, |
| "grad_norm": 2.75, |
| "learning_rate": 8.558173212428895e-06, |
| "loss": 0.23450264930725098, |
| "step": 4440, |
| "token_acc": 0.9078670050324745 |
| }, |
| { |
| "epoch": 0.5694337688957213, |
| "grad_norm": 12.875, |
| "learning_rate": 8.537213776670656e-06, |
| "loss": 0.23401873111724852, |
| "step": 4445, |
| "token_acc": 0.9095069510404974 |
| }, |
| { |
| "epoch": 0.5700743018191134, |
| "grad_norm": 5.25, |
| "learning_rate": 8.516260905359364e-06, |
| "loss": 0.23944463729858398, |
| "step": 4450, |
| "token_acc": 0.9062594106259411 |
| }, |
| { |
| "epoch": 0.5707148347425057, |
| "grad_norm": 3.34375, |
| "learning_rate": 8.495314692523795e-06, |
| "loss": 0.23881077766418457, |
| "step": 4455, |
| "token_acc": 0.90836533068726 |
| }, |
| { |
| "epoch": 0.571355367665898, |
| "grad_norm": 2.609375, |
| "learning_rate": 8.47437523216286e-06, |
| "loss": 0.23443114757537842, |
| "step": 4460, |
| "token_acc": 0.9087260486794407 |
| }, |
| { |
| "epoch": 0.5719959005892903, |
| "grad_norm": 6.125, |
| "learning_rate": 8.453442618245155e-06, |
| "loss": 0.24273183345794677, |
| "step": 4465, |
| "token_acc": 0.9040428010527678 |
| }, |
| { |
| "epoch": 0.5726364335126826, |
| "grad_norm": 2.8125, |
| "learning_rate": 8.432516944708565e-06, |
| "loss": 0.23376893997192383, |
| "step": 4470, |
| "token_acc": 0.9095967220185465 |
| }, |
| { |
| "epoch": 0.5732769664360748, |
| "grad_norm": 2.953125, |
| "learning_rate": 8.411598305459812e-06, |
| "loss": 0.23575949668884277, |
| "step": 4475, |
| "token_acc": 0.9096040329182644 |
| }, |
| { |
| "epoch": 0.5739174993594671, |
| "grad_norm": 3.625, |
| "learning_rate": 8.390686794374072e-06, |
| "loss": 0.24351611137390136, |
| "step": 4480, |
| "token_acc": 0.905288150226635 |
| }, |
| { |
| "epoch": 0.5745580322828593, |
| "grad_norm": 4.84375, |
| "learning_rate": 8.369782505294511e-06, |
| "loss": 0.2270632266998291, |
| "step": 4485, |
| "token_acc": 0.9119619294830197 |
| }, |
| { |
| "epoch": 0.5751985652062516, |
| "grad_norm": 3.15625, |
| "learning_rate": 8.348885532031904e-06, |
| "loss": 0.23725566864013672, |
| "step": 4490, |
| "token_acc": 0.9062796515138446 |
| }, |
| { |
| "epoch": 0.5758390981296438, |
| "grad_norm": 2.9375, |
| "learning_rate": 8.327995968364178e-06, |
| "loss": 0.23767762184143065, |
| "step": 4495, |
| "token_acc": 0.907425097698654 |
| }, |
| { |
| "epoch": 0.5764796310530361, |
| "grad_norm": 3.03125, |
| "learning_rate": 8.307113908036024e-06, |
| "loss": 0.24003219604492188, |
| "step": 4500, |
| "token_acc": 0.9065685894954187 |
| }, |
| { |
| "epoch": 0.5764796310530361, |
| "eval_loss": 0.32945460081100464, |
| "eval_runtime": 103.1647, |
| "eval_samples_per_second": 96.932, |
| "eval_steps_per_second": 12.117, |
| "eval_token_acc": 0.881673873684327, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.5771201639764284, |
| "grad_norm": 2.921875, |
| "learning_rate": 8.286239444758448e-06, |
| "loss": 0.225927734375, |
| "step": 4505, |
| "token_acc": 0.9105997573236263 |
| }, |
| { |
| "epoch": 0.5777606968998207, |
| "grad_norm": 2.53125, |
| "learning_rate": 8.265372672208375e-06, |
| "loss": 0.24204869270324708, |
| "step": 4510, |
| "token_acc": 0.907190275023709 |
| }, |
| { |
| "epoch": 0.578401229823213, |
| "grad_norm": 3.109375, |
| "learning_rate": 8.244513684028208e-06, |
| "loss": 0.23642313480377197, |
| "step": 4515, |
| "token_acc": 0.9095570492933471 |
| }, |
| { |
| "epoch": 0.5790417627466051, |
| "grad_norm": 5.25, |
| "learning_rate": 8.223662573825418e-06, |
| "loss": 0.23264212608337403, |
| "step": 4520, |
| "token_acc": 0.9079717630853994 |
| }, |
| { |
| "epoch": 0.5796822956699974, |
| "grad_norm": 2.640625, |
| "learning_rate": 8.202819435172129e-06, |
| "loss": 0.22397842407226562, |
| "step": 4525, |
| "token_acc": 0.9115804932832275 |
| }, |
| { |
| "epoch": 0.5803228285933897, |
| "grad_norm": 3.8125, |
| "learning_rate": 8.181984361604677e-06, |
| "loss": 0.24578235149383545, |
| "step": 4530, |
| "token_acc": 0.9068775316728432 |
| }, |
| { |
| "epoch": 0.580963361516782, |
| "grad_norm": 3.234375, |
| "learning_rate": 8.161157446623227e-06, |
| "loss": 0.23125510215759276, |
| "step": 4535, |
| "token_acc": 0.9093144656801415 |
| }, |
| { |
| "epoch": 0.5816038944401742, |
| "grad_norm": 3.125, |
| "learning_rate": 8.140338783691308e-06, |
| "loss": 0.2348803997039795, |
| "step": 4540, |
| "token_acc": 0.9077382239716251 |
| }, |
| { |
| "epoch": 0.5822444273635665, |
| "grad_norm": 3.125, |
| "learning_rate": 8.119528466235434e-06, |
| "loss": 0.22919659614562987, |
| "step": 4545, |
| "token_acc": 0.9098924731182796 |
| }, |
| { |
| "epoch": 0.5828849602869588, |
| "grad_norm": 18.5, |
| "learning_rate": 8.098726587644659e-06, |
| "loss": 0.23590612411499023, |
| "step": 4550, |
| "token_acc": 0.9070518339934561 |
| }, |
| { |
| "epoch": 0.583525493210351, |
| "grad_norm": 3.359375, |
| "learning_rate": 8.07793324127017e-06, |
| "loss": 0.23590869903564454, |
| "step": 4555, |
| "token_acc": 0.9086673281849951 |
| }, |
| { |
| "epoch": 0.5841660261337432, |
| "grad_norm": 12.1875, |
| "learning_rate": 8.05714852042486e-06, |
| "loss": 0.23389995098114014, |
| "step": 4560, |
| "token_acc": 0.9086344946981173 |
| }, |
| { |
| "epoch": 0.5848065590571355, |
| "grad_norm": 3.53125, |
| "learning_rate": 8.036372518382922e-06, |
| "loss": 0.2384809970855713, |
| "step": 4565, |
| "token_acc": 0.9059391015978293 |
| }, |
| { |
| "epoch": 0.5854470919805278, |
| "grad_norm": 3.84375, |
| "learning_rate": 8.015605328379407e-06, |
| "loss": 0.23714299201965333, |
| "step": 4570, |
| "token_acc": 0.9076115033580162 |
| }, |
| { |
| "epoch": 0.5860876249039201, |
| "grad_norm": 2.609375, |
| "learning_rate": 7.994847043609844e-06, |
| "loss": 0.23302805423736572, |
| "step": 4575, |
| "token_acc": 0.9086178721940311 |
| }, |
| { |
| "epoch": 0.5867281578273124, |
| "grad_norm": 4.59375, |
| "learning_rate": 7.974097757229781e-06, |
| "loss": 0.23717694282531737, |
| "step": 4580, |
| "token_acc": 0.9076014314663907 |
| }, |
| { |
| "epoch": 0.5873686907507046, |
| "grad_norm": 3.0, |
| "learning_rate": 7.953357562354384e-06, |
| "loss": 0.23976330757141112, |
| "step": 4585, |
| "token_acc": 0.9052459298819882 |
| }, |
| { |
| "epoch": 0.5880092236740968, |
| "grad_norm": 5.625, |
| "learning_rate": 7.932626552058032e-06, |
| "loss": 0.23990461826324463, |
| "step": 4590, |
| "token_acc": 0.9076478454039598 |
| }, |
| { |
| "epoch": 0.5886497565974891, |
| "grad_norm": 2.78125, |
| "learning_rate": 7.911904819373873e-06, |
| "loss": 0.23198351860046387, |
| "step": 4595, |
| "token_acc": 0.9084361252479944 |
| }, |
| { |
| "epoch": 0.5892902895208814, |
| "grad_norm": 3.515625, |
| "learning_rate": 7.891192457293433e-06, |
| "loss": 0.2373666524887085, |
| "step": 4600, |
| "token_acc": 0.9076724137931035 |
| }, |
| { |
| "epoch": 0.5892902895208814, |
| "eval_loss": 0.3317316174507141, |
| "eval_runtime": 102.7383, |
| "eval_samples_per_second": 97.335, |
| "eval_steps_per_second": 12.167, |
| "eval_token_acc": 0.8819451749893418, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.5899308224442736, |
| "grad_norm": 2.734375, |
| "learning_rate": 7.870489558766178e-06, |
| "loss": 0.23856124877929688, |
| "step": 4605, |
| "token_acc": 0.9061164587559891 |
| }, |
| { |
| "epoch": 0.5905713553676659, |
| "grad_norm": 5.84375, |
| "learning_rate": 7.84979621669911e-06, |
| "loss": 0.23322293758392335, |
| "step": 4610, |
| "token_acc": 0.9075289241927128 |
| }, |
| { |
| "epoch": 0.5912118882910582, |
| "grad_norm": 2.546875, |
| "learning_rate": 7.829112523956335e-06, |
| "loss": 0.23959455490112305, |
| "step": 4615, |
| "token_acc": 0.9058361730578441 |
| }, |
| { |
| "epoch": 0.5918524212144505, |
| "grad_norm": 3.0, |
| "learning_rate": 7.808438573358674e-06, |
| "loss": 0.2323786735534668, |
| "step": 4620, |
| "token_acc": 0.9092474599621146 |
| }, |
| { |
| "epoch": 0.5924929541378426, |
| "grad_norm": 2.890625, |
| "learning_rate": 7.787774457683209e-06, |
| "loss": 0.23595137596130372, |
| "step": 4625, |
| "token_acc": 0.9091104889080336 |
| }, |
| { |
| "epoch": 0.5931334870612349, |
| "grad_norm": 4.25, |
| "learning_rate": 7.767120269662905e-06, |
| "loss": 0.2342782974243164, |
| "step": 4630, |
| "token_acc": 0.9082426127527217 |
| }, |
| { |
| "epoch": 0.5937740199846272, |
| "grad_norm": 3.59375, |
| "learning_rate": 7.746476101986164e-06, |
| "loss": 0.2340677261352539, |
| "step": 4635, |
| "token_acc": 0.9087346024636058 |
| }, |
| { |
| "epoch": 0.5944145529080195, |
| "grad_norm": 3.0625, |
| "learning_rate": 7.725842047296419e-06, |
| "loss": 0.23336553573608398, |
| "step": 4640, |
| "token_acc": 0.9081429560401523 |
| }, |
| { |
| "epoch": 0.5950550858314118, |
| "grad_norm": 3.203125, |
| "learning_rate": 7.70521819819173e-06, |
| "loss": 0.24391114711761475, |
| "step": 4645, |
| "token_acc": 0.9032941379906623 |
| }, |
| { |
| "epoch": 0.595695618754804, |
| "grad_norm": 4.625, |
| "learning_rate": 7.684604647224345e-06, |
| "loss": 0.23319551944732667, |
| "step": 4650, |
| "token_acc": 0.9080335989661856 |
| }, |
| { |
| "epoch": 0.5963361516781963, |
| "grad_norm": 2.9375, |
| "learning_rate": 7.66400148690031e-06, |
| "loss": 0.22878189086914064, |
| "step": 4655, |
| "token_acc": 0.9093414875748309 |
| }, |
| { |
| "epoch": 0.5969766846015885, |
| "grad_norm": 2.859375, |
| "learning_rate": 7.643408809679034e-06, |
| "loss": 0.2268078327178955, |
| "step": 4660, |
| "token_acc": 0.9114099182844049 |
| }, |
| { |
| "epoch": 0.5976172175249808, |
| "grad_norm": 3.25, |
| "learning_rate": 7.622826707972883e-06, |
| "loss": 0.23129682540893554, |
| "step": 4665, |
| "token_acc": 0.9086430423509075 |
| }, |
| { |
| "epoch": 0.598257750448373, |
| "grad_norm": 2.703125, |
| "learning_rate": 7.602255274146767e-06, |
| "loss": 0.2353008508682251, |
| "step": 4670, |
| "token_acc": 0.9068710222106767 |
| }, |
| { |
| "epoch": 0.5988982833717653, |
| "grad_norm": 2.875, |
| "learning_rate": 7.58169460051772e-06, |
| "loss": 0.2389591693878174, |
| "step": 4675, |
| "token_acc": 0.9058991190188288 |
| }, |
| { |
| "epoch": 0.5995388162951576, |
| "grad_norm": 3.078125, |
| "learning_rate": 7.561144779354483e-06, |
| "loss": 0.23087067604064943, |
| "step": 4680, |
| "token_acc": 0.9091809064692463 |
| }, |
| { |
| "epoch": 0.6001793492185499, |
| "grad_norm": 5.78125, |
| "learning_rate": 7.540605902877108e-06, |
| "loss": 0.2390049457550049, |
| "step": 4685, |
| "token_acc": 0.9069245380763253 |
| }, |
| { |
| "epoch": 0.600819882141942, |
| "grad_norm": 2.9375, |
| "learning_rate": 7.520078063256517e-06, |
| "loss": 0.23379735946655272, |
| "step": 4690, |
| "token_acc": 0.9081875135018362 |
| }, |
| { |
| "epoch": 0.6014604150653343, |
| "grad_norm": 3.5, |
| "learning_rate": 7.4995613526141156e-06, |
| "loss": 0.2288158893585205, |
| "step": 4695, |
| "token_acc": 0.9082493403123243 |
| }, |
| { |
| "epoch": 0.6021009479887266, |
| "grad_norm": 3.515625, |
| "learning_rate": 7.47905586302136e-06, |
| "loss": 0.23635220527648926, |
| "step": 4700, |
| "token_acc": 0.908278174159718 |
| }, |
| { |
| "epoch": 0.6021009479887266, |
| "eval_loss": 0.33331820368766785, |
| "eval_runtime": 103.2322, |
| "eval_samples_per_second": 96.869, |
| "eval_steps_per_second": 12.109, |
| "eval_token_acc": 0.8817569251042295, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.6027414809121189, |
| "grad_norm": 3.03125, |
| "learning_rate": 7.458561686499345e-06, |
| "loss": 0.22352910041809082, |
| "step": 4705, |
| "token_acc": 0.9113029146426093 |
| }, |
| { |
| "epoch": 0.6033820138355112, |
| "grad_norm": 18.75, |
| "learning_rate": 7.438078915018409e-06, |
| "loss": 0.22866015434265136, |
| "step": 4710, |
| "token_acc": 0.9103150625809237 |
| }, |
| { |
| "epoch": 0.6040225467589034, |
| "grad_norm": 2.953125, |
| "learning_rate": 7.417607640497697e-06, |
| "loss": 0.23653111457824708, |
| "step": 4715, |
| "token_acc": 0.9067353067353068 |
| }, |
| { |
| "epoch": 0.6046630796822957, |
| "grad_norm": 4.40625, |
| "learning_rate": 7.397147954804771e-06, |
| "loss": 0.23970022201538085, |
| "step": 4720, |
| "token_acc": 0.9069306076680899 |
| }, |
| { |
| "epoch": 0.6053036126056879, |
| "grad_norm": 5.3125, |
| "learning_rate": 7.376699949755176e-06, |
| "loss": 0.2359128475189209, |
| "step": 4725, |
| "token_acc": 0.9068213176957571 |
| }, |
| { |
| "epoch": 0.6059441455290802, |
| "grad_norm": 3.3125, |
| "learning_rate": 7.356263717112047e-06, |
| "loss": 0.23450722694396972, |
| "step": 4730, |
| "token_acc": 0.9093179469514295 |
| }, |
| { |
| "epoch": 0.6065846784524724, |
| "grad_norm": 3.03125, |
| "learning_rate": 7.335839348585676e-06, |
| "loss": 0.23415303230285645, |
| "step": 4735, |
| "token_acc": 0.9075492812257898 |
| }, |
| { |
| "epoch": 0.6072252113758647, |
| "grad_norm": 2.921875, |
| "learning_rate": 7.315426935833135e-06, |
| "loss": 0.22811522483825683, |
| "step": 4740, |
| "token_acc": 0.9106996417627001 |
| }, |
| { |
| "epoch": 0.607865744299257, |
| "grad_norm": 3.453125, |
| "learning_rate": 7.29502657045782e-06, |
| "loss": 0.23572731018066406, |
| "step": 4745, |
| "token_acc": 0.9084832017941088 |
| }, |
| { |
| "epoch": 0.6085062772226493, |
| "grad_norm": 2.921875, |
| "learning_rate": 7.274638344009079e-06, |
| "loss": 0.22873611450195314, |
| "step": 4750, |
| "token_acc": 0.9101705895055063 |
| }, |
| { |
| "epoch": 0.6091468101460416, |
| "grad_norm": 2.953125, |
| "learning_rate": 7.254262347981777e-06, |
| "loss": 0.23314647674560546, |
| "step": 4755, |
| "token_acc": 0.9090830933241628 |
| }, |
| { |
| "epoch": 0.6097873430694337, |
| "grad_norm": 3.078125, |
| "learning_rate": 7.233898673815891e-06, |
| "loss": 0.2401879072189331, |
| "step": 4760, |
| "token_acc": 0.9065476960213802 |
| }, |
| { |
| "epoch": 0.610427875992826, |
| "grad_norm": 4.625, |
| "learning_rate": 7.213547412896116e-06, |
| "loss": 0.23366336822509765, |
| "step": 4765, |
| "token_acc": 0.9075221619760737 |
| }, |
| { |
| "epoch": 0.6110684089162183, |
| "grad_norm": 2.921875, |
| "learning_rate": 7.193208656551419e-06, |
| "loss": 0.22110800743103026, |
| "step": 4770, |
| "token_acc": 0.9112323547241707 |
| }, |
| { |
| "epoch": 0.6117089418396106, |
| "grad_norm": 3.328125, |
| "learning_rate": 7.172882496054675e-06, |
| "loss": 0.22980756759643556, |
| "step": 4775, |
| "token_acc": 0.9108833830587625 |
| }, |
| { |
| "epoch": 0.6123494747630028, |
| "grad_norm": 8.6875, |
| "learning_rate": 7.152569022622213e-06, |
| "loss": 0.238081693649292, |
| "step": 4780, |
| "token_acc": 0.9061473283762753 |
| }, |
| { |
| "epoch": 0.6129900076863951, |
| "grad_norm": 3.03125, |
| "learning_rate": 7.1322683274134405e-06, |
| "loss": 0.23080739974975586, |
| "step": 4785, |
| "token_acc": 0.9094276239286792 |
| }, |
| { |
| "epoch": 0.6136305406097874, |
| "grad_norm": 3.65625, |
| "learning_rate": 7.111980501530413e-06, |
| "loss": 0.23122069835662842, |
| "step": 4790, |
| "token_acc": 0.9078907501190322 |
| }, |
| { |
| "epoch": 0.6142710735331796, |
| "grad_norm": 3.03125, |
| "learning_rate": 7.091705636017443e-06, |
| "loss": 0.23598337173461914, |
| "step": 4795, |
| "token_acc": 0.9072490063936409 |
| }, |
| { |
| "epoch": 0.6149116064565718, |
| "grad_norm": 5.1875, |
| "learning_rate": 7.071443821860664e-06, |
| "loss": 0.23058700561523438, |
| "step": 4800, |
| "token_acc": 0.9092871637666767 |
| }, |
| { |
| "epoch": 0.6149116064565718, |
| "eval_loss": 0.33330273628234863, |
| "eval_runtime": 103.0939, |
| "eval_samples_per_second": 96.999, |
| "eval_steps_per_second": 12.125, |
| "eval_token_acc": 0.882147266777771, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6155521393799641, |
| "grad_norm": 3.4375, |
| "learning_rate": 7.051195149987662e-06, |
| "loss": 0.23541276454925536, |
| "step": 4805, |
| "token_acc": 0.9070878340577527 |
| }, |
| { |
| "epoch": 0.6161926723033564, |
| "grad_norm": 3.578125, |
| "learning_rate": 7.030959711267026e-06, |
| "loss": 0.24111108779907225, |
| "step": 4810, |
| "token_acc": 0.9057868281995103 |
| }, |
| { |
| "epoch": 0.6168332052267487, |
| "grad_norm": 2.75, |
| "learning_rate": 7.010737596507975e-06, |
| "loss": 0.2280668020248413, |
| "step": 4815, |
| "token_acc": 0.9101400414937759 |
| }, |
| { |
| "epoch": 0.617473738150141, |
| "grad_norm": 3.171875, |
| "learning_rate": 6.990528896459922e-06, |
| "loss": 0.23039546012878417, |
| "step": 4820, |
| "token_acc": 0.9103489771359807 |
| }, |
| { |
| "epoch": 0.6181142710735332, |
| "grad_norm": 3.328125, |
| "learning_rate": 6.9703337018120845e-06, |
| "loss": 0.233514666557312, |
| "step": 4825, |
| "token_acc": 0.9083129058616093 |
| }, |
| { |
| "epoch": 0.6187548039969254, |
| "grad_norm": 4.4375, |
| "learning_rate": 6.9501521031930816e-06, |
| "loss": 0.23697190284729003, |
| "step": 4830, |
| "token_acc": 0.9069807427785419 |
| }, |
| { |
| "epoch": 0.6193953369203177, |
| "grad_norm": 2.953125, |
| "learning_rate": 6.9299841911705e-06, |
| "loss": 0.23227353096008302, |
| "step": 4835, |
| "token_acc": 0.9092987147416545 |
| }, |
| { |
| "epoch": 0.62003586984371, |
| "grad_norm": 3.5625, |
| "learning_rate": 6.909830056250527e-06, |
| "loss": 0.23467817306518554, |
| "step": 4840, |
| "token_acc": 0.909126180109497 |
| }, |
| { |
| "epoch": 0.6206764027671022, |
| "grad_norm": 3.125, |
| "learning_rate": 6.889689788877505e-06, |
| "loss": 0.22795021533966064, |
| "step": 4845, |
| "token_acc": 0.9109432333261386 |
| }, |
| { |
| "epoch": 0.6213169356904945, |
| "grad_norm": 2.609375, |
| "learning_rate": 6.869563479433555e-06, |
| "loss": 0.23201088905334472, |
| "step": 4850, |
| "token_acc": 0.9089618990281242 |
| }, |
| { |
| "epoch": 0.6219574686138868, |
| "grad_norm": 2.875, |
| "learning_rate": 6.849451218238152e-06, |
| "loss": 0.23549177646636962, |
| "step": 4855, |
| "token_acc": 0.9081416921948483 |
| }, |
| { |
| "epoch": 0.6225980015372791, |
| "grad_norm": 2.75, |
| "learning_rate": 6.82935309554774e-06, |
| "loss": 0.22994422912597656, |
| "step": 4860, |
| "token_acc": 0.9110630942091617 |
| }, |
| { |
| "epoch": 0.6232385344606712, |
| "grad_norm": 3.03125, |
| "learning_rate": 6.8092692015552984e-06, |
| "loss": 0.22758188247680664, |
| "step": 4865, |
| "token_acc": 0.9083592938733126 |
| }, |
| { |
| "epoch": 0.6238790673840635, |
| "grad_norm": 3.375, |
| "learning_rate": 6.789199626389971e-06, |
| "loss": 0.22297306060791017, |
| "step": 4870, |
| "token_acc": 0.913397067093481 |
| }, |
| { |
| "epoch": 0.6245196003074558, |
| "grad_norm": 3.5, |
| "learning_rate": 6.7691444601166255e-06, |
| "loss": 0.2313997268676758, |
| "step": 4875, |
| "token_acc": 0.9092045160734293 |
| }, |
| { |
| "epoch": 0.6251601332308481, |
| "grad_norm": 4.125, |
| "learning_rate": 6.749103792735481e-06, |
| "loss": 0.236191987991333, |
| "step": 4880, |
| "token_acc": 0.9096788100883811 |
| }, |
| { |
| "epoch": 0.6258006661542403, |
| "grad_norm": 2.984375, |
| "learning_rate": 6.729077714181692e-06, |
| "loss": 0.2335993766784668, |
| "step": 4885, |
| "token_acc": 0.9090713486530683 |
| }, |
| { |
| "epoch": 0.6264411990776326, |
| "grad_norm": 2.671875, |
| "learning_rate": 6.709066314324929e-06, |
| "loss": 0.23459205627441407, |
| "step": 4890, |
| "token_acc": 0.9073938032064301 |
| }, |
| { |
| "epoch": 0.6270817320010249, |
| "grad_norm": 3.359375, |
| "learning_rate": 6.689069682969009e-06, |
| "loss": 0.2288151502609253, |
| "step": 4895, |
| "token_acc": 0.9099611901681759 |
| }, |
| { |
| "epoch": 0.6277222649244171, |
| "grad_norm": 3.203125, |
| "learning_rate": 6.669087909851459e-06, |
| "loss": 0.23342595100402833, |
| "step": 4900, |
| "token_acc": 0.9083329752030599 |
| }, |
| { |
| "epoch": 0.6277222649244171, |
| "eval_loss": 0.33611026406288147, |
| "eval_runtime": 102.7976, |
| "eval_samples_per_second": 97.278, |
| "eval_steps_per_second": 12.16, |
| "eval_token_acc": 0.8819313330860247, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.6283627978478094, |
| "grad_norm": 9.75, |
| "learning_rate": 6.649121084643133e-06, |
| "loss": 0.2269826889038086, |
| "step": 4905, |
| "token_acc": 0.9081998359169221 |
| }, |
| { |
| "epoch": 0.6290033307712016, |
| "grad_norm": 3.265625, |
| "learning_rate": 6.629169296947804e-06, |
| "loss": 0.2403498649597168, |
| "step": 4910, |
| "token_acc": 0.9052165312002752 |
| }, |
| { |
| "epoch": 0.6296438636945939, |
| "grad_norm": 3.234375, |
| "learning_rate": 6.6092326363017635e-06, |
| "loss": 0.23246257305145263, |
| "step": 4915, |
| "token_acc": 0.9084171289875174 |
| }, |
| { |
| "epoch": 0.6302843966179862, |
| "grad_norm": 3.109375, |
| "learning_rate": 6.589311192173414e-06, |
| "loss": 0.228167724609375, |
| "step": 4920, |
| "token_acc": 0.910772955213557 |
| }, |
| { |
| "epoch": 0.6309249295413785, |
| "grad_norm": 9.5, |
| "learning_rate": 6.5694050539628805e-06, |
| "loss": 0.2342754125595093, |
| "step": 4925, |
| "token_acc": 0.9082616179001721 |
| }, |
| { |
| "epoch": 0.6315654624647707, |
| "grad_norm": 3.15625, |
| "learning_rate": 6.549514311001587e-06, |
| "loss": 0.23288652896881104, |
| "step": 4930, |
| "token_acc": 0.9084992673045427 |
| }, |
| { |
| "epoch": 0.6322059953881629, |
| "grad_norm": 7.25, |
| "learning_rate": 6.529639052551886e-06, |
| "loss": 0.23185653686523439, |
| "step": 4935, |
| "token_acc": 0.911041091160221 |
| }, |
| { |
| "epoch": 0.6328465283115552, |
| "grad_norm": 3.1875, |
| "learning_rate": 6.509779367806625e-06, |
| "loss": 0.23133904933929444, |
| "step": 4940, |
| "token_acc": 0.9111350884764782 |
| }, |
| { |
| "epoch": 0.6334870612349475, |
| "grad_norm": 4.3125, |
| "learning_rate": 6.489935345888774e-06, |
| "loss": 0.22587313652038574, |
| "step": 4945, |
| "token_acc": 0.9098948272161408 |
| }, |
| { |
| "epoch": 0.6341275941583397, |
| "grad_norm": 3.109375, |
| "learning_rate": 6.470107075851011e-06, |
| "loss": 0.2315220832824707, |
| "step": 4950, |
| "token_acc": 0.9086874084288546 |
| }, |
| { |
| "epoch": 0.634768127081732, |
| "grad_norm": 2.640625, |
| "learning_rate": 6.450294646675319e-06, |
| "loss": 0.22459986209869384, |
| "step": 4955, |
| "token_acc": 0.9098201578470695 |
| }, |
| { |
| "epoch": 0.6354086600051243, |
| "grad_norm": 2.796875, |
| "learning_rate": 6.430498147272607e-06, |
| "loss": 0.2365894317626953, |
| "step": 4960, |
| "token_acc": 0.9069646344109351 |
| }, |
| { |
| "epoch": 0.6360491929285165, |
| "grad_norm": 7.15625, |
| "learning_rate": 6.41071766648228e-06, |
| "loss": 0.2363147735595703, |
| "step": 4965, |
| "token_acc": 0.9058361730578441 |
| }, |
| { |
| "epoch": 0.6366897258519087, |
| "grad_norm": 2.875, |
| "learning_rate": 6.390953293071871e-06, |
| "loss": 0.22636122703552247, |
| "step": 4970, |
| "token_acc": 0.9104109944249967 |
| }, |
| { |
| "epoch": 0.637330258775301, |
| "grad_norm": 3.265625, |
| "learning_rate": 6.371205115736618e-06, |
| "loss": 0.22853505611419678, |
| "step": 4975, |
| "token_acc": 0.9105326667815894 |
| }, |
| { |
| "epoch": 0.6379707916986933, |
| "grad_norm": 4.125, |
| "learning_rate": 6.351473223099089e-06, |
| "loss": 0.23797154426574707, |
| "step": 4980, |
| "token_acc": 0.9067096774193548 |
| }, |
| { |
| "epoch": 0.6386113246220856, |
| "grad_norm": 2.78125, |
| "learning_rate": 6.33175770370876e-06, |
| "loss": 0.23546228408813477, |
| "step": 4985, |
| "token_acc": 0.9074697754749568 |
| }, |
| { |
| "epoch": 0.6392518575454779, |
| "grad_norm": 5.90625, |
| "learning_rate": 6.3120586460416454e-06, |
| "loss": 0.22152302265167237, |
| "step": 4990, |
| "token_acc": 0.9129477772982305 |
| }, |
| { |
| "epoch": 0.6398923904688701, |
| "grad_norm": 3.0625, |
| "learning_rate": 6.292376138499865e-06, |
| "loss": 0.23244686126708985, |
| "step": 4995, |
| "token_acc": 0.9084896688856229 |
| }, |
| { |
| "epoch": 0.6405329233922623, |
| "grad_norm": 4.4375, |
| "learning_rate": 6.272710269411286e-06, |
| "loss": 0.2383200168609619, |
| "step": 5000, |
| "token_acc": 0.9069365908404196 |
| }, |
| { |
| "epoch": 0.6405329233922623, |
| "eval_loss": 0.3339642584323883, |
| "eval_runtime": 102.7379, |
| "eval_samples_per_second": 97.335, |
| "eval_steps_per_second": 12.167, |
| "eval_token_acc": 0.8819368698473515, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6411734563156546, |
| "grad_norm": 3.203125, |
| "learning_rate": 6.2530611270290935e-06, |
| "loss": 0.22576665878295898, |
| "step": 5005, |
| "token_acc": 0.9115372986048119 |
| }, |
| { |
| "epoch": 0.6418139892390469, |
| "grad_norm": 3.078125, |
| "learning_rate": 6.23342879953142e-06, |
| "loss": 0.23447873592376708, |
| "step": 5010, |
| "token_acc": 0.9083354860931715 |
| }, |
| { |
| "epoch": 0.6424545221624391, |
| "grad_norm": 3.375, |
| "learning_rate": 6.2138133750209425e-06, |
| "loss": 0.223459792137146, |
| "step": 5015, |
| "token_acc": 0.9112359550561798 |
| }, |
| { |
| "epoch": 0.6430950550858314, |
| "grad_norm": 2.828125, |
| "learning_rate": 6.19421494152447e-06, |
| "loss": 0.22827987670898436, |
| "step": 5020, |
| "token_acc": 0.9120494424755813 |
| }, |
| { |
| "epoch": 0.6437355880092237, |
| "grad_norm": 3.40625, |
| "learning_rate": 6.174633586992569e-06, |
| "loss": 0.22968311309814454, |
| "step": 5025, |
| "token_acc": 0.9102702236779727 |
| }, |
| { |
| "epoch": 0.644376120932616, |
| "grad_norm": 3.859375, |
| "learning_rate": 6.155069399299163e-06, |
| "loss": 0.23479781150817872, |
| "step": 5030, |
| "token_acc": 0.9084179721122396 |
| }, |
| { |
| "epoch": 0.6450166538560081, |
| "grad_norm": 2.890625, |
| "learning_rate": 6.1355224662411375e-06, |
| "loss": 0.2318052291870117, |
| "step": 5035, |
| "token_acc": 0.909961603175288 |
| }, |
| { |
| "epoch": 0.6456571867794004, |
| "grad_norm": 3.703125, |
| "learning_rate": 6.115992875537937e-06, |
| "loss": 0.23980298042297363, |
| "step": 5040, |
| "token_acc": 0.9071697134707637 |
| }, |
| { |
| "epoch": 0.6462977197027927, |
| "grad_norm": 3.40625, |
| "learning_rate": 6.096480714831197e-06, |
| "loss": 0.22896120548248292, |
| "step": 5045, |
| "token_acc": 0.9098480925254617 |
| }, |
| { |
| "epoch": 0.646938252626185, |
| "grad_norm": 3.015625, |
| "learning_rate": 6.076986071684313e-06, |
| "loss": 0.22948775291442872, |
| "step": 5050, |
| "token_acc": 0.9118307426597582 |
| }, |
| { |
| "epoch": 0.6475787855495773, |
| "grad_norm": 2.9375, |
| "learning_rate": 6.057509033582087e-06, |
| "loss": 0.23411431312561035, |
| "step": 5055, |
| "token_acc": 0.9089658138034831 |
| }, |
| { |
| "epoch": 0.6482193184729695, |
| "grad_norm": 3.234375, |
| "learning_rate": 6.038049687930303e-06, |
| "loss": 0.22734377384185792, |
| "step": 5060, |
| "token_acc": 0.9120784583620096 |
| }, |
| { |
| "epoch": 0.6488598513963618, |
| "grad_norm": 3.078125, |
| "learning_rate": 6.018608122055352e-06, |
| "loss": 0.21841344833374024, |
| "step": 5065, |
| "token_acc": 0.9142105036033314 |
| }, |
| { |
| "epoch": 0.649500384319754, |
| "grad_norm": 3.21875, |
| "learning_rate": 5.9991844232038385e-06, |
| "loss": 0.23631734848022462, |
| "step": 5070, |
| "token_acc": 0.9079987900263602 |
| }, |
| { |
| "epoch": 0.6501409172431463, |
| "grad_norm": 5.9375, |
| "learning_rate": 5.9797786785421806e-06, |
| "loss": 0.22841448783874513, |
| "step": 5075, |
| "token_acc": 0.9127186352839559 |
| }, |
| { |
| "epoch": 0.6507814501665385, |
| "grad_norm": 3.265625, |
| "learning_rate": 5.960390975156234e-06, |
| "loss": 0.2350531816482544, |
| "step": 5080, |
| "token_acc": 0.9096751930293749 |
| }, |
| { |
| "epoch": 0.6514219830899308, |
| "grad_norm": 3.15625, |
| "learning_rate": 5.94102140005088e-06, |
| "loss": 0.23367114067077638, |
| "step": 5085, |
| "token_acc": 0.9095145631067961 |
| }, |
| { |
| "epoch": 0.6520625160133231, |
| "grad_norm": 3.234375, |
| "learning_rate": 5.921670040149655e-06, |
| "loss": 0.2327101230621338, |
| "step": 5090, |
| "token_acc": 0.9080370942812983 |
| }, |
| { |
| "epoch": 0.6527030489367154, |
| "grad_norm": 4.0, |
| "learning_rate": 5.902336982294346e-06, |
| "loss": 0.22089247703552245, |
| "step": 5095, |
| "token_acc": 0.9123602296766394 |
| }, |
| { |
| "epoch": 0.6533435818601077, |
| "grad_norm": 5.75, |
| "learning_rate": 5.88302231324462e-06, |
| "loss": 0.23006696701049806, |
| "step": 5100, |
| "token_acc": 0.9100451710045171 |
| }, |
| { |
| "epoch": 0.6533435818601077, |
| "eval_loss": 0.3327247202396393, |
| "eval_runtime": 103.2522, |
| "eval_samples_per_second": 96.85, |
| "eval_steps_per_second": 12.106, |
| "eval_token_acc": 0.8824545570314101, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.6539841147834998, |
| "grad_norm": 3.515625, |
| "learning_rate": 5.863726119677602e-06, |
| "loss": 0.23648326396942138, |
| "step": 5105, |
| "token_acc": 0.9088638125592213 |
| }, |
| { |
| "epoch": 0.6546246477068921, |
| "grad_norm": 2.65625, |
| "learning_rate": 5.844448488187526e-06, |
| "loss": 0.22581000328063966, |
| "step": 5110, |
| "token_acc": 0.9106820331985895 |
| }, |
| { |
| "epoch": 0.6552651806302844, |
| "grad_norm": 2.9375, |
| "learning_rate": 5.825189505285308e-06, |
| "loss": 0.2255998134613037, |
| "step": 5115, |
| "token_acc": 0.9123876210235131 |
| }, |
| { |
| "epoch": 0.6559057135536767, |
| "grad_norm": 5.0, |
| "learning_rate": 5.805949257398195e-06, |
| "loss": 0.23895587921142578, |
| "step": 5120, |
| "token_acc": 0.9072859041982932 |
| }, |
| { |
| "epoch": 0.6565462464770689, |
| "grad_norm": 2.9375, |
| "learning_rate": 5.786727830869337e-06, |
| "loss": 0.2289639711380005, |
| "step": 5125, |
| "token_acc": 0.9104838361603868 |
| }, |
| { |
| "epoch": 0.6571867794004612, |
| "grad_norm": 16.75, |
| "learning_rate": 5.767525311957441e-06, |
| "loss": 0.22975871562957764, |
| "step": 5130, |
| "token_acc": 0.9101954341058457 |
| }, |
| { |
| "epoch": 0.6578273123238535, |
| "grad_norm": 12.4375, |
| "learning_rate": 5.748341786836353e-06, |
| "loss": 0.23110666275024414, |
| "step": 5135, |
| "token_acc": 0.910392569978931 |
| }, |
| { |
| "epoch": 0.6584678452472457, |
| "grad_norm": 2.65625, |
| "learning_rate": 5.729177341594674e-06, |
| "loss": 0.23442704677581788, |
| "step": 5140, |
| "token_acc": 0.9096712966957122 |
| }, |
| { |
| "epoch": 0.6591083781706379, |
| "grad_norm": 2.828125, |
| "learning_rate": 5.710032062235404e-06, |
| "loss": 0.23014814853668214, |
| "step": 5145, |
| "token_acc": 0.9096832657288341 |
| }, |
| { |
| "epoch": 0.6597489110940302, |
| "grad_norm": 3.03125, |
| "learning_rate": 5.690906034675505e-06, |
| "loss": 0.2316150188446045, |
| "step": 5150, |
| "token_acc": 0.9095728632386535 |
| }, |
| { |
| "epoch": 0.6603894440174225, |
| "grad_norm": 2.796875, |
| "learning_rate": 5.671799344745577e-06, |
| "loss": 0.22539763450622557, |
| "step": 5155, |
| "token_acc": 0.9121212121212121 |
| }, |
| { |
| "epoch": 0.6610299769408148, |
| "grad_norm": 2.484375, |
| "learning_rate": 5.652712078189408e-06, |
| "loss": 0.23087406158447266, |
| "step": 5160, |
| "token_acc": 0.9089225734217552 |
| }, |
| { |
| "epoch": 0.6616705098642071, |
| "grad_norm": 3.453125, |
| "learning_rate": 5.633644320663638e-06, |
| "loss": 0.2334925651550293, |
| "step": 5165, |
| "token_acc": 0.9076453650780008 |
| }, |
| { |
| "epoch": 0.6623110427875993, |
| "grad_norm": 2.96875, |
| "learning_rate": 5.614596157737357e-06, |
| "loss": 0.22363200187683105, |
| "step": 5170, |
| "token_acc": 0.9122427805637212 |
| }, |
| { |
| "epoch": 0.6629515757109915, |
| "grad_norm": 3.640625, |
| "learning_rate": 5.5955676748917195e-06, |
| "loss": 0.2343050003051758, |
| "step": 5175, |
| "token_acc": 0.9070922598479613 |
| }, |
| { |
| "epoch": 0.6635921086343838, |
| "grad_norm": 3.484375, |
| "learning_rate": 5.57655895751956e-06, |
| "loss": 0.23191659450531005, |
| "step": 5180, |
| "token_acc": 0.9086604683195593 |
| }, |
| { |
| "epoch": 0.6642326415577761, |
| "grad_norm": 2.9375, |
| "learning_rate": 5.557570090925019e-06, |
| "loss": 0.22515347003936767, |
| "step": 5185, |
| "token_acc": 0.9112387698686939 |
| }, |
| { |
| "epoch": 0.6648731744811683, |
| "grad_norm": 3.828125, |
| "learning_rate": 5.538601160323147e-06, |
| "loss": 0.23082191944122316, |
| "step": 5190, |
| "token_acc": 0.9089143103820418 |
| }, |
| { |
| "epoch": 0.6655137074045606, |
| "grad_norm": 3.8125, |
| "learning_rate": 5.519652250839537e-06, |
| "loss": 0.22431740760803223, |
| "step": 5195, |
| "token_acc": 0.912159537272845 |
| }, |
| { |
| "epoch": 0.6661542403279529, |
| "grad_norm": 3.234375, |
| "learning_rate": 5.500723447509925e-06, |
| "loss": 0.23847784996032714, |
| "step": 5200, |
| "token_acc": 0.9073309241094476 |
| }, |
| { |
| "epoch": 0.6661542403279529, |
| "eval_loss": 0.3346344828605652, |
| "eval_runtime": 103.736, |
| "eval_samples_per_second": 96.399, |
| "eval_steps_per_second": 12.05, |
| "eval_token_acc": 0.8817237045362686, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.6667947732513452, |
| "grad_norm": 3.265625, |
| "learning_rate": 5.48181483527983e-06, |
| "loss": 0.24246997833251954, |
| "step": 5205, |
| "token_acc": 0.9048132493746226 |
| }, |
| { |
| "epoch": 0.6674353061747373, |
| "grad_norm": 2.96875, |
| "learning_rate": 5.462926499004148e-06, |
| "loss": 0.23247838020324707, |
| "step": 5210, |
| "token_acc": 0.9081055404413352 |
| }, |
| { |
| "epoch": 0.6680758390981296, |
| "grad_norm": 4.0, |
| "learning_rate": 5.4440585234467935e-06, |
| "loss": 0.2290191411972046, |
| "step": 5215, |
| "token_acc": 0.9098120365580272 |
| }, |
| { |
| "epoch": 0.6687163720215219, |
| "grad_norm": 2.625, |
| "learning_rate": 5.425210993280306e-06, |
| "loss": 0.22439243793487548, |
| "step": 5220, |
| "token_acc": 0.9100359509680773 |
| }, |
| { |
| "epoch": 0.6693569049449142, |
| "grad_norm": 5.3125, |
| "learning_rate": 5.406383993085471e-06, |
| "loss": 0.22781476974487305, |
| "step": 5225, |
| "token_acc": 0.910941475826972 |
| }, |
| { |
| "epoch": 0.6699974378683065, |
| "grad_norm": 2.828125, |
| "learning_rate": 5.387577607350951e-06, |
| "loss": 0.2305924892425537, |
| "step": 5230, |
| "token_acc": 0.9094285837688421 |
| }, |
| { |
| "epoch": 0.6706379707916987, |
| "grad_norm": 4.125, |
| "learning_rate": 5.368791920472884e-06, |
| "loss": 0.2318443775177002, |
| "step": 5235, |
| "token_acc": 0.9093386392144989 |
| }, |
| { |
| "epoch": 0.6712785037150909, |
| "grad_norm": 3.84375, |
| "learning_rate": 5.35002701675454e-06, |
| "loss": 0.2296751022338867, |
| "step": 5240, |
| "token_acc": 0.9120812882114872 |
| }, |
| { |
| "epoch": 0.6719190366384832, |
| "grad_norm": 2.734375, |
| "learning_rate": 5.331282980405896e-06, |
| "loss": 0.2311159610748291, |
| "step": 5245, |
| "token_acc": 0.9103889922547704 |
| }, |
| { |
| "epoch": 0.6725595695618755, |
| "grad_norm": 4.03125, |
| "learning_rate": 5.3125598955433145e-06, |
| "loss": 0.23089895248413086, |
| "step": 5250, |
| "token_acc": 0.909507544640927 |
| }, |
| { |
| "epoch": 0.6732001024852677, |
| "grad_norm": 2.84375, |
| "learning_rate": 5.293857846189108e-06, |
| "loss": 0.23441662788391113, |
| "step": 5255, |
| "token_acc": 0.9084364357460016 |
| }, |
| { |
| "epoch": 0.67384063540866, |
| "grad_norm": 2.96875, |
| "learning_rate": 5.275176916271197e-06, |
| "loss": 0.2311511754989624, |
| "step": 5260, |
| "token_acc": 0.9103763417683322 |
| }, |
| { |
| "epoch": 0.6744811683320523, |
| "grad_norm": 2.734375, |
| "learning_rate": 5.256517189622742e-06, |
| "loss": 0.23376543521881105, |
| "step": 5265, |
| "token_acc": 0.9086750107898144 |
| }, |
| { |
| "epoch": 0.6751217012554446, |
| "grad_norm": 2.609375, |
| "learning_rate": 5.237878749981724e-06, |
| "loss": 0.22374820709228516, |
| "step": 5270, |
| "token_acc": 0.912248865845755 |
| }, |
| { |
| "epoch": 0.6757622341788367, |
| "grad_norm": 3.859375, |
| "learning_rate": 5.219261680990624e-06, |
| "loss": 0.22372374534606934, |
| "step": 5275, |
| "token_acc": 0.9098982583204 |
| }, |
| { |
| "epoch": 0.676402767102229, |
| "grad_norm": 3.84375, |
| "learning_rate": 5.200666066195993e-06, |
| "loss": 0.22683911323547362, |
| "step": 5280, |
| "token_acc": 0.9123762590239053 |
| }, |
| { |
| "epoch": 0.6770433000256213, |
| "grad_norm": 3.046875, |
| "learning_rate": 5.182091989048121e-06, |
| "loss": 0.22960472106933594, |
| "step": 5285, |
| "token_acc": 0.9087181700474752 |
| }, |
| { |
| "epoch": 0.6776838329490136, |
| "grad_norm": 3.21875, |
| "learning_rate": 5.163539532900639e-06, |
| "loss": 0.23558075428009034, |
| "step": 5290, |
| "token_acc": 0.9076750989502668 |
| }, |
| { |
| "epoch": 0.6783243658724059, |
| "grad_norm": 9.5, |
| "learning_rate": 5.14500878101015e-06, |
| "loss": 0.23191981315612792, |
| "step": 5295, |
| "token_acc": 0.9099460625674218 |
| }, |
| { |
| "epoch": 0.6789648987957981, |
| "grad_norm": 2.765625, |
| "learning_rate": 5.126499816535861e-06, |
| "loss": 0.22278881072998047, |
| "step": 5300, |
| "token_acc": 0.9129082426127527 |
| }, |
| { |
| "epoch": 0.6789648987957981, |
| "eval_loss": 0.3326459527015686, |
| "eval_runtime": 102.5695, |
| "eval_samples_per_second": 97.495, |
| "eval_steps_per_second": 12.187, |
| "eval_token_acc": 0.8820392999318979, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.6796054317191904, |
| "grad_norm": 5.0, |
| "learning_rate": 5.108012722539199e-06, |
| "loss": 0.22774300575256348, |
| "step": 5305, |
| "token_acc": 0.910229284511421 |
| }, |
| { |
| "epoch": 0.6802459646425826, |
| "grad_norm": 4.5, |
| "learning_rate": 5.0895475819834474e-06, |
| "loss": 0.23403663635253907, |
| "step": 5310, |
| "token_acc": 0.9082355973707952 |
| }, |
| { |
| "epoch": 0.6808864975659749, |
| "grad_norm": 4.25, |
| "learning_rate": 5.071104477733372e-06, |
| "loss": 0.23252689838409424, |
| "step": 5315, |
| "token_acc": 0.9085381630012936 |
| }, |
| { |
| "epoch": 0.6815270304893671, |
| "grad_norm": 2.96875, |
| "learning_rate": 5.052683492554844e-06, |
| "loss": 0.23012104034423828, |
| "step": 5320, |
| "token_acc": 0.9094350987394054 |
| }, |
| { |
| "epoch": 0.6821675634127594, |
| "grad_norm": 5.03125, |
| "learning_rate": 5.034284709114476e-06, |
| "loss": 0.2321260929107666, |
| "step": 5325, |
| "token_acc": 0.9089814695386732 |
| }, |
| { |
| "epoch": 0.6828080963361517, |
| "grad_norm": 7.90625, |
| "learning_rate": 5.0159082099792465e-06, |
| "loss": 0.22481832504272461, |
| "step": 5330, |
| "token_acc": 0.9132952973720608 |
| }, |
| { |
| "epoch": 0.683448629259544, |
| "grad_norm": 2.890625, |
| "learning_rate": 4.997554077616128e-06, |
| "loss": 0.2297644853591919, |
| "step": 5335, |
| "token_acc": 0.9089265731255918 |
| }, |
| { |
| "epoch": 0.6840891621829363, |
| "grad_norm": 3.34375, |
| "learning_rate": 4.979222394391721e-06, |
| "loss": 0.22588052749633789, |
| "step": 5340, |
| "token_acc": 0.911449325492909 |
| }, |
| { |
| "epoch": 0.6847296951063284, |
| "grad_norm": 3.515625, |
| "learning_rate": 4.960913242571882e-06, |
| "loss": 0.22864861488342286, |
| "step": 5345, |
| "token_acc": 0.9100142014890046 |
| }, |
| { |
| "epoch": 0.6853702280297207, |
| "grad_norm": 2.953125, |
| "learning_rate": 4.9426267043213594e-06, |
| "loss": 0.23536896705627441, |
| "step": 5350, |
| "token_acc": 0.9080583865952668 |
| }, |
| { |
| "epoch": 0.686010760953113, |
| "grad_norm": 2.96875, |
| "learning_rate": 4.924362861703405e-06, |
| "loss": 0.22937750816345215, |
| "step": 5355, |
| "token_acc": 0.9104786545924968 |
| }, |
| { |
| "epoch": 0.6866512938765053, |
| "grad_norm": 2.953125, |
| "learning_rate": 4.906121796679445e-06, |
| "loss": 0.2339865207672119, |
| "step": 5360, |
| "token_acc": 0.9082639996551278 |
| }, |
| { |
| "epoch": 0.6872918267998975, |
| "grad_norm": 3.046875, |
| "learning_rate": 4.887903591108663e-06, |
| "loss": 0.23555207252502441, |
| "step": 5365, |
| "token_acc": 0.9080790717662226 |
| }, |
| { |
| "epoch": 0.6879323597232898, |
| "grad_norm": 3.546875, |
| "learning_rate": 4.869708326747681e-06, |
| "loss": 0.2278905391693115, |
| "step": 5370, |
| "token_acc": 0.9106388481765669 |
| }, |
| { |
| "epoch": 0.6885728926466821, |
| "grad_norm": 3.671875, |
| "learning_rate": 4.8515360852501496e-06, |
| "loss": 0.22571067810058593, |
| "step": 5375, |
| "token_acc": 0.9102165846923808 |
| }, |
| { |
| "epoch": 0.6892134255700743, |
| "grad_norm": 6.8125, |
| "learning_rate": 4.833386948166409e-06, |
| "loss": 0.23547790050506592, |
| "step": 5380, |
| "token_acc": 0.9070218543902755 |
| }, |
| { |
| "epoch": 0.6898539584934665, |
| "grad_norm": 6.1875, |
| "learning_rate": 4.815260996943126e-06, |
| "loss": 0.23141322135925294, |
| "step": 5385, |
| "token_acc": 0.9082988267770876 |
| }, |
| { |
| "epoch": 0.6904944914168588, |
| "grad_norm": 4.125, |
| "learning_rate": 4.797158312922895e-06, |
| "loss": 0.2272815227508545, |
| "step": 5390, |
| "token_acc": 0.9105712070302404 |
| }, |
| { |
| "epoch": 0.6911350243402511, |
| "grad_norm": 3.671875, |
| "learning_rate": 4.779078977343922e-06, |
| "loss": 0.22905888557434081, |
| "step": 5395, |
| "token_acc": 0.9104548394050442 |
| }, |
| { |
| "epoch": 0.6917755572636434, |
| "grad_norm": 3.46875, |
| "learning_rate": 4.761023071339608e-06, |
| "loss": 0.22437114715576173, |
| "step": 5400, |
| "token_acc": 0.9122693567856527 |
| }, |
| { |
| "epoch": 0.6917755572636434, |
| "eval_loss": 0.3339126706123352, |
| "eval_runtime": 103.3301, |
| "eval_samples_per_second": 96.777, |
| "eval_steps_per_second": 12.097, |
| "eval_token_acc": 0.8821528035390979, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.6924160901870356, |
| "grad_norm": 7.34375, |
| "learning_rate": 4.742990675938228e-06, |
| "loss": 0.22792973518371581, |
| "step": 5405, |
| "token_acc": 0.9097371822490306 |
| }, |
| { |
| "epoch": 0.6930566231104279, |
| "grad_norm": 3.109375, |
| "learning_rate": 4.724981872062545e-06, |
| "loss": 0.22467894554138185, |
| "step": 5410, |
| "token_acc": 0.9115411195577056 |
| }, |
| { |
| "epoch": 0.6936971560338201, |
| "grad_norm": 4.875, |
| "learning_rate": 4.706996740529453e-06, |
| "loss": 0.22711763381958008, |
| "step": 5415, |
| "token_acc": 0.9103552206673843 |
| }, |
| { |
| "epoch": 0.6943376889572124, |
| "grad_norm": 3.546875, |
| "learning_rate": 4.689035362049609e-06, |
| "loss": 0.22871413230895996, |
| "step": 5420, |
| "token_acc": 0.9115798536375377 |
| }, |
| { |
| "epoch": 0.6949782218806047, |
| "grad_norm": 3.203125, |
| "learning_rate": 4.6710978172270794e-06, |
| "loss": 0.22836050987243653, |
| "step": 5425, |
| "token_acc": 0.9107814729922588 |
| }, |
| { |
| "epoch": 0.6956187548039969, |
| "grad_norm": 3.265625, |
| "learning_rate": 4.653184186558975e-06, |
| "loss": 0.22787034511566162, |
| "step": 5430, |
| "token_acc": 0.9093102408340873 |
| }, |
| { |
| "epoch": 0.6962592877273892, |
| "grad_norm": 5.4375, |
| "learning_rate": 4.635294550435086e-06, |
| "loss": 0.21838183403015138, |
| "step": 5435, |
| "token_acc": 0.9151326592342927 |
| }, |
| { |
| "epoch": 0.6968998206507815, |
| "grad_norm": 3.875, |
| "learning_rate": 4.617428989137517e-06, |
| "loss": 0.2287057876586914, |
| "step": 5440, |
| "token_acc": 0.9102171191781413 |
| }, |
| { |
| "epoch": 0.6975403535741738, |
| "grad_norm": 4.0625, |
| "learning_rate": 4.599587582840349e-06, |
| "loss": 0.23020198345184326, |
| "step": 5445, |
| "token_acc": 0.9096006210644354 |
| }, |
| { |
| "epoch": 0.6981808864975659, |
| "grad_norm": 2.8125, |
| "learning_rate": 4.581770411609254e-06, |
| "loss": 0.22472758293151857, |
| "step": 5450, |
| "token_acc": 0.9107643229727982 |
| }, |
| { |
| "epoch": 0.6988214194209582, |
| "grad_norm": 3.546875, |
| "learning_rate": 4.563977555401148e-06, |
| "loss": 0.22312564849853517, |
| "step": 5455, |
| "token_acc": 0.9104580812445981 |
| }, |
| { |
| "epoch": 0.6994619523443505, |
| "grad_norm": 4.21875, |
| "learning_rate": 4.546209094063829e-06, |
| "loss": 0.23030247688293456, |
| "step": 5460, |
| "token_acc": 0.9101837837837837 |
| }, |
| { |
| "epoch": 0.7001024852677428, |
| "grad_norm": 3.359375, |
| "learning_rate": 4.528465107335621e-06, |
| "loss": 0.22946977615356445, |
| "step": 5465, |
| "token_acc": 0.9094075156935248 |
| }, |
| { |
| "epoch": 0.700743018191135, |
| "grad_norm": 4.90625, |
| "learning_rate": 4.5107456748450206e-06, |
| "loss": 0.23560161590576173, |
| "step": 5470, |
| "token_acc": 0.9065336143490043 |
| }, |
| { |
| "epoch": 0.7013835511145273, |
| "grad_norm": 3.3125, |
| "learning_rate": 4.4930508761103145e-06, |
| "loss": 0.23189268112182618, |
| "step": 5475, |
| "token_acc": 0.909141750914175 |
| }, |
| { |
| "epoch": 0.7020240840379196, |
| "grad_norm": 5.46875, |
| "learning_rate": 4.475380790539272e-06, |
| "loss": 0.2286592483520508, |
| "step": 5480, |
| "token_acc": 0.9110030970406057 |
| }, |
| { |
| "epoch": 0.7026646169613118, |
| "grad_norm": 3.328125, |
| "learning_rate": 4.457735497428728e-06, |
| "loss": 0.22808377742767333, |
| "step": 5485, |
| "token_acc": 0.9114810810810811 |
| }, |
| { |
| "epoch": 0.703305149884704, |
| "grad_norm": 3.484375, |
| "learning_rate": 4.4401150759642875e-06, |
| "loss": 0.22788479328155517, |
| "step": 5490, |
| "token_acc": 0.9103065034297126 |
| }, |
| { |
| "epoch": 0.7039456828080963, |
| "grad_norm": 3.71875, |
| "learning_rate": 4.422519605219914e-06, |
| "loss": 0.23326406478881836, |
| "step": 5495, |
| "token_acc": 0.9091496232508073 |
| }, |
| { |
| "epoch": 0.7045862157314886, |
| "grad_norm": 3.828125, |
| "learning_rate": 4.404949164157617e-06, |
| "loss": 0.23126420974731446, |
| "step": 5500, |
| "token_acc": 0.9086606720302887 |
| }, |
| { |
| "epoch": 0.7045862157314886, |
| "eval_loss": 0.3346463441848755, |
| "eval_runtime": 103.293, |
| "eval_samples_per_second": 96.812, |
| "eval_steps_per_second": 12.101, |
| "eval_token_acc": 0.8824379467474296, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.7052267486548809, |
| "grad_norm": 3.03125, |
| "learning_rate": 4.387403831627079e-06, |
| "loss": 0.22369828224182128, |
| "step": 5505, |
| "token_acc": 0.9125932956555503 |
| }, |
| { |
| "epoch": 0.7058672815782732, |
| "grad_norm": 3.46875, |
| "learning_rate": 4.3698836863653005e-06, |
| "loss": 0.23041157722473143, |
| "step": 5510, |
| "token_acc": 0.9091222179172586 |
| }, |
| { |
| "epoch": 0.7065078145016653, |
| "grad_norm": 14.625, |
| "learning_rate": 4.352388806996263e-06, |
| "loss": 0.2362978458404541, |
| "step": 5515, |
| "token_acc": 0.9075854931217388 |
| }, |
| { |
| "epoch": 0.7071483474250576, |
| "grad_norm": 2.578125, |
| "learning_rate": 4.334919272030547e-06, |
| "loss": 0.23041419982910155, |
| "step": 5520, |
| "token_acc": 0.909314147854558 |
| }, |
| { |
| "epoch": 0.7077888803484499, |
| "grad_norm": 2.96875, |
| "learning_rate": 4.317475159865005e-06, |
| "loss": 0.22999229431152343, |
| "step": 5525, |
| "token_acc": 0.9102149842746974 |
| }, |
| { |
| "epoch": 0.7084294132718422, |
| "grad_norm": 3.0625, |
| "learning_rate": 4.300056548782404e-06, |
| "loss": 0.22720894813537598, |
| "step": 5530, |
| "token_acc": 0.9110218883564305 |
| }, |
| { |
| "epoch": 0.7090699461952344, |
| "grad_norm": 2.96875, |
| "learning_rate": 4.282663516951068e-06, |
| "loss": 0.23367710113525392, |
| "step": 5535, |
| "token_acc": 0.9050570260383043 |
| }, |
| { |
| "epoch": 0.7097104791186267, |
| "grad_norm": 3.0, |
| "learning_rate": 4.265296142424529e-06, |
| "loss": 0.22929010391235352, |
| "step": 5540, |
| "token_acc": 0.9078120967048094 |
| }, |
| { |
| "epoch": 0.710351012042019, |
| "grad_norm": 4.03125, |
| "learning_rate": 4.247954503141183e-06, |
| "loss": 0.2340301513671875, |
| "step": 5545, |
| "token_acc": 0.9078794652452392 |
| }, |
| { |
| "epoch": 0.7109915449654112, |
| "grad_norm": 5.09375, |
| "learning_rate": 4.230638676923932e-06, |
| "loss": 0.2315293788909912, |
| "step": 5550, |
| "token_acc": 0.9097634408602151 |
| }, |
| { |
| "epoch": 0.7116320778888034, |
| "grad_norm": 3.5625, |
| "learning_rate": 4.213348741479847e-06, |
| "loss": 0.22180113792419434, |
| "step": 5555, |
| "token_acc": 0.9152183311716385 |
| }, |
| { |
| "epoch": 0.7122726108121957, |
| "grad_norm": 4.53125, |
| "learning_rate": 4.196084774399788e-06, |
| "loss": 0.2180586576461792, |
| "step": 5560, |
| "token_acc": 0.913914992671782 |
| }, |
| { |
| "epoch": 0.712913143735588, |
| "grad_norm": 11.0, |
| "learning_rate": 4.1788468531581065e-06, |
| "loss": 0.23073256015777588, |
| "step": 5565, |
| "token_acc": 0.9099750408813151 |
| }, |
| { |
| "epoch": 0.7135536766589803, |
| "grad_norm": 5.15625, |
| "learning_rate": 4.161635055112254e-06, |
| "loss": 0.2295978307723999, |
| "step": 5570, |
| "token_acc": 0.9100305784056161 |
| }, |
| { |
| "epoch": 0.7141942095823726, |
| "grad_norm": 2.84375, |
| "learning_rate": 4.1444494575024555e-06, |
| "loss": 0.23021929264068602, |
| "step": 5575, |
| "token_acc": 0.9086412318809411 |
| }, |
| { |
| "epoch": 0.7148347425057648, |
| "grad_norm": 3.078125, |
| "learning_rate": 4.1272901374513555e-06, |
| "loss": 0.23160245418548583, |
| "step": 5580, |
| "token_acc": 0.9093453919035315 |
| }, |
| { |
| "epoch": 0.715475275429157, |
| "grad_norm": 3.34375, |
| "learning_rate": 4.110157171963674e-06, |
| "loss": 0.22630250453948975, |
| "step": 5585, |
| "token_acc": 0.9112262521588946 |
| }, |
| { |
| "epoch": 0.7161158083525493, |
| "grad_norm": 5.09375, |
| "learning_rate": 4.093050637925871e-06, |
| "loss": 0.22265501022338868, |
| "step": 5590, |
| "token_acc": 0.9139391854113802 |
| }, |
| { |
| "epoch": 0.7167563412759416, |
| "grad_norm": 3.546875, |
| "learning_rate": 4.07597061210577e-06, |
| "loss": 0.22470180988311766, |
| "step": 5595, |
| "token_acc": 0.911774876804703 |
| }, |
| { |
| "epoch": 0.7173968741993338, |
| "grad_norm": 2.953125, |
| "learning_rate": 4.0589171711522626e-06, |
| "loss": 0.238523530960083, |
| "step": 5600, |
| "token_acc": 0.9068379005240099 |
| }, |
| { |
| "epoch": 0.7173968741993338, |
| "eval_loss": 0.33452367782592773, |
| "eval_runtime": 102.4933, |
| "eval_samples_per_second": 97.567, |
| "eval_steps_per_second": 12.196, |
| "eval_token_acc": 0.882180487345732, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7180374071227261, |
| "grad_norm": 2.859375, |
| "learning_rate": 4.0418903915949125e-06, |
| "loss": 0.22467451095581054, |
| "step": 5605, |
| "token_acc": 0.9131561892417369 |
| }, |
| { |
| "epoch": 0.7186779400461184, |
| "grad_norm": 4.03125, |
| "learning_rate": 4.0248903498436624e-06, |
| "loss": 0.22909164428710938, |
| "step": 5610, |
| "token_acc": 0.9093295464325674 |
| }, |
| { |
| "epoch": 0.7193184729695107, |
| "grad_norm": 3.171875, |
| "learning_rate": 4.007917122188438e-06, |
| "loss": 0.22771682739257812, |
| "step": 5615, |
| "token_acc": 0.9116303129580137 |
| }, |
| { |
| "epoch": 0.7199590058929028, |
| "grad_norm": 2.921875, |
| "learning_rate": 3.990970784798854e-06, |
| "loss": 0.23022587299346925, |
| "step": 5620, |
| "token_acc": 0.9101649768001375 |
| }, |
| { |
| "epoch": 0.7205995388162951, |
| "grad_norm": 3.28125, |
| "learning_rate": 3.974051413723842e-06, |
| "loss": 0.23161954879760743, |
| "step": 5625, |
| "token_acc": 0.9094234079173839 |
| }, |
| { |
| "epoch": 0.7212400717396874, |
| "grad_norm": 4.90625, |
| "learning_rate": 3.957159084891318e-06, |
| "loss": 0.23545317649841307, |
| "step": 5630, |
| "token_acc": 0.908342315154128 |
| }, |
| { |
| "epoch": 0.7218806046630797, |
| "grad_norm": 3.890625, |
| "learning_rate": 3.940293874107854e-06, |
| "loss": 0.2253598690032959, |
| "step": 5635, |
| "token_acc": 0.9121528376746593 |
| }, |
| { |
| "epoch": 0.722521137586472, |
| "grad_norm": 3.546875, |
| "learning_rate": 3.923455857058311e-06, |
| "loss": 0.2275296449661255, |
| "step": 5640, |
| "token_acc": 0.9096149709614971 |
| }, |
| { |
| "epoch": 0.7231616705098642, |
| "grad_norm": 2.828125, |
| "learning_rate": 3.906645109305521e-06, |
| "loss": 0.23534011840820312, |
| "step": 5645, |
| "token_acc": 0.908305245873304 |
| }, |
| { |
| "epoch": 0.7238022034332565, |
| "grad_norm": 3.0625, |
| "learning_rate": 3.88986170628994e-06, |
| "loss": 0.23898892402648925, |
| "step": 5650, |
| "token_acc": 0.9062513444908145 |
| }, |
| { |
| "epoch": 0.7244427363566487, |
| "grad_norm": 3.109375, |
| "learning_rate": 3.873105723329317e-06, |
| "loss": 0.23146333694458007, |
| "step": 5655, |
| "token_acc": 0.908890330953926 |
| }, |
| { |
| "epoch": 0.725083269280041, |
| "grad_norm": 5.375, |
| "learning_rate": 3.856377235618341e-06, |
| "loss": 0.24037771224975585, |
| "step": 5660, |
| "token_acc": 0.9058757646247954 |
| }, |
| { |
| "epoch": 0.7257238022034332, |
| "grad_norm": 5.6875, |
| "learning_rate": 3.839676318228319e-06, |
| "loss": 0.2313528299331665, |
| "step": 5665, |
| "token_acc": 0.9094474614257392 |
| }, |
| { |
| "epoch": 0.7263643351268255, |
| "grad_norm": 3.90625, |
| "learning_rate": 3.823003046106828e-06, |
| "loss": 0.23002188205718993, |
| "step": 5670, |
| "token_acc": 0.9098721713594748 |
| }, |
| { |
| "epoch": 0.7270048680502178, |
| "grad_norm": 3.421875, |
| "learning_rate": 3.8063574940773907e-06, |
| "loss": 0.2305138111114502, |
| "step": 5675, |
| "token_acc": 0.9112459129237652 |
| }, |
| { |
| "epoch": 0.7276454009736101, |
| "grad_norm": 7.6875, |
| "learning_rate": 3.789739736839114e-06, |
| "loss": 0.2200489044189453, |
| "step": 5680, |
| "token_acc": 0.9114310270734852 |
| }, |
| { |
| "epoch": 0.7282859338970024, |
| "grad_norm": 3.109375, |
| "learning_rate": 3.773149848966401e-06, |
| "loss": 0.22987012863159179, |
| "step": 5685, |
| "token_acc": 0.9078311172509066 |
| }, |
| { |
| "epoch": 0.7289264668203945, |
| "grad_norm": 3.15625, |
| "learning_rate": 3.7565879049085562e-06, |
| "loss": 0.22706859111785888, |
| "step": 5690, |
| "token_acc": 0.9124580573001807 |
| }, |
| { |
| "epoch": 0.7295669997437868, |
| "grad_norm": 3.859375, |
| "learning_rate": 3.7400539789895074e-06, |
| "loss": 0.23126349449157715, |
| "step": 5695, |
| "token_acc": 0.9095687389599759 |
| }, |
| { |
| "epoch": 0.7302075326671791, |
| "grad_norm": 3.125, |
| "learning_rate": 3.7235481454074373e-06, |
| "loss": 0.2237870693206787, |
| "step": 5700, |
| "token_acc": 0.9137209201950882 |
| }, |
| { |
| "epoch": 0.7302075326671791, |
| "eval_loss": 0.33403199911117554, |
| "eval_runtime": 103.444, |
| "eval_samples_per_second": 96.671, |
| "eval_steps_per_second": 12.084, |
| "eval_token_acc": 0.8826123547292247, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.7308480655905714, |
| "grad_norm": 3.34375, |
| "learning_rate": 3.70707047823445e-06, |
| "loss": 0.22239408493041993, |
| "step": 5705, |
| "token_acc": 0.9109814094249892 |
| }, |
| { |
| "epoch": 0.7314885985139636, |
| "grad_norm": 3.09375, |
| "learning_rate": 3.6906210514162744e-06, |
| "loss": 0.23225040435791017, |
| "step": 5710, |
| "token_acc": 0.9099184606756116 |
| }, |
| { |
| "epoch": 0.7321291314373559, |
| "grad_norm": 3.671875, |
| "learning_rate": 3.6741999387718773e-06, |
| "loss": 0.2249077320098877, |
| "step": 5715, |
| "token_acc": 0.9111187815506753 |
| }, |
| { |
| "epoch": 0.7327696643607482, |
| "grad_norm": 3.953125, |
| "learning_rate": 3.657807213993192e-06, |
| "loss": 0.2272716522216797, |
| "step": 5720, |
| "token_acc": 0.9099200345796412 |
| }, |
| { |
| "epoch": 0.7334101972841404, |
| "grad_norm": 2.875, |
| "learning_rate": 3.641442950644728e-06, |
| "loss": 0.22431583404541017, |
| "step": 5725, |
| "token_acc": 0.910762060930353 |
| }, |
| { |
| "epoch": 0.7340507302075326, |
| "grad_norm": 2.875, |
| "learning_rate": 3.6251072221632978e-06, |
| "loss": 0.2208378553390503, |
| "step": 5730, |
| "token_acc": 0.9123809523809524 |
| }, |
| { |
| "epoch": 0.7346912631309249, |
| "grad_norm": 4.09375, |
| "learning_rate": 3.608800101857637e-06, |
| "loss": 0.22057173252105713, |
| "step": 5735, |
| "token_acc": 0.9117215168005528 |
| }, |
| { |
| "epoch": 0.7353317960543172, |
| "grad_norm": 9.25, |
| "learning_rate": 3.5925216629081116e-06, |
| "loss": 0.2260368824005127, |
| "step": 5740, |
| "token_acc": 0.910849706997587 |
| }, |
| { |
| "epoch": 0.7359723289777095, |
| "grad_norm": 3.015625, |
| "learning_rate": 3.5762719783663724e-06, |
| "loss": 0.22467224597930907, |
| "step": 5745, |
| "token_acc": 0.9124238891048063 |
| }, |
| { |
| "epoch": 0.7366128619011018, |
| "grad_norm": 3.015625, |
| "learning_rate": 3.5600511211550283e-06, |
| "loss": 0.2277822256088257, |
| "step": 5750, |
| "token_acc": 0.9120300427331981 |
| }, |
| { |
| "epoch": 0.7372533948244939, |
| "grad_norm": 2.6875, |
| "learning_rate": 3.5438591640673346e-06, |
| "loss": 0.21924290657043458, |
| "step": 5755, |
| "token_acc": 0.9147353856796956 |
| }, |
| { |
| "epoch": 0.7378939277478862, |
| "grad_norm": 2.8125, |
| "learning_rate": 3.527696179766833e-06, |
| "loss": 0.229719877243042, |
| "step": 5760, |
| "token_acc": 0.9090753057283845 |
| }, |
| { |
| "epoch": 0.7385344606712785, |
| "grad_norm": 4.75, |
| "learning_rate": 3.5115622407870607e-06, |
| "loss": 0.22485427856445311, |
| "step": 5765, |
| "token_acc": 0.9123470045093408 |
| }, |
| { |
| "epoch": 0.7391749935946708, |
| "grad_norm": 3.6875, |
| "learning_rate": 3.495457419531206e-06, |
| "loss": 0.2279944896697998, |
| "step": 5770, |
| "token_acc": 0.9102829537612146 |
| }, |
| { |
| "epoch": 0.739815526518063, |
| "grad_norm": 3.53125, |
| "learning_rate": 3.4793817882717863e-06, |
| "loss": 0.22675998210906984, |
| "step": 5775, |
| "token_acc": 0.9114967836636014 |
| }, |
| { |
| "epoch": 0.7404560594414553, |
| "grad_norm": 2.90625, |
| "learning_rate": 3.463335419150328e-06, |
| "loss": 0.23147711753845215, |
| "step": 5780, |
| "token_acc": 0.908021712907117 |
| }, |
| { |
| "epoch": 0.7410965923648476, |
| "grad_norm": 4.8125, |
| "learning_rate": 3.4473183841770364e-06, |
| "loss": 0.22812228202819823, |
| "step": 5785, |
| "token_acc": 0.9115860226636219 |
| }, |
| { |
| "epoch": 0.7417371252882398, |
| "grad_norm": 3.421875, |
| "learning_rate": 3.4313307552304785e-06, |
| "loss": 0.22540197372436524, |
| "step": 5790, |
| "token_acc": 0.9123684664481628 |
| }, |
| { |
| "epoch": 0.742377658211632, |
| "grad_norm": 4.25, |
| "learning_rate": 3.4153726040572612e-06, |
| "loss": 0.23054356575012208, |
| "step": 5795, |
| "token_acc": 0.90987696808053 |
| }, |
| { |
| "epoch": 0.7430181911350243, |
| "grad_norm": 3.125, |
| "learning_rate": 3.3994440022716902e-06, |
| "loss": 0.2308722972869873, |
| "step": 5800, |
| "token_acc": 0.9087033288833384 |
| }, |
| { |
| "epoch": 0.7430181911350243, |
| "eval_loss": 0.33602866530418396, |
| "eval_runtime": 106.6156, |
| "eval_samples_per_second": 93.795, |
| "eval_steps_per_second": 11.724, |
| "eval_token_acc": 0.8821970976297124, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.7436587240584166, |
| "grad_norm": 2.765625, |
| "learning_rate": 3.3835450213554887e-06, |
| "loss": 0.23508167266845703, |
| "step": 5805, |
| "token_acc": 0.9071099435855475 |
| }, |
| { |
| "epoch": 0.7442992569818089, |
| "grad_norm": 2.6875, |
| "learning_rate": 3.3676757326574293e-06, |
| "loss": 0.2318406581878662, |
| "step": 5810, |
| "token_acc": 0.9082233589820745 |
| }, |
| { |
| "epoch": 0.7449397899052012, |
| "grad_norm": 3.421875, |
| "learning_rate": 3.351836207393054e-06, |
| "loss": 0.2296595573425293, |
| "step": 5815, |
| "token_acc": 0.9098555100280353 |
| }, |
| { |
| "epoch": 0.7455803228285934, |
| "grad_norm": 3.453125, |
| "learning_rate": 3.3360265166443316e-06, |
| "loss": 0.2280057430267334, |
| "step": 5820, |
| "token_acc": 0.9113049486138699 |
| }, |
| { |
| "epoch": 0.7462208557519856, |
| "grad_norm": 4.875, |
| "learning_rate": 3.3202467313593345e-06, |
| "loss": 0.22925915718078613, |
| "step": 5825, |
| "token_acc": 0.9111722005068511 |
| }, |
| { |
| "epoch": 0.7468613886753779, |
| "grad_norm": 3.0, |
| "learning_rate": 3.304496922351952e-06, |
| "loss": 0.22095022201538086, |
| "step": 5830, |
| "token_acc": 0.9120665861652579 |
| }, |
| { |
| "epoch": 0.7475019215987702, |
| "grad_norm": 3.0625, |
| "learning_rate": 3.2887771603015237e-06, |
| "loss": 0.22771050930023193, |
| "step": 5835, |
| "token_acc": 0.9096476473886229 |
| }, |
| { |
| "epoch": 0.7481424545221624, |
| "grad_norm": 10.0625, |
| "learning_rate": 3.273087515752579e-06, |
| "loss": 0.23041772842407227, |
| "step": 5840, |
| "token_acc": 0.9116174693595719 |
| }, |
| { |
| "epoch": 0.7487829874455547, |
| "grad_norm": 3.75, |
| "learning_rate": 3.2574280591144623e-06, |
| "loss": 0.22076497077941895, |
| "step": 5845, |
| "token_acc": 0.9142450633784599 |
| }, |
| { |
| "epoch": 0.749423520368947, |
| "grad_norm": 3.953125, |
| "learning_rate": 3.2417988606610738e-06, |
| "loss": 0.2274242639541626, |
| "step": 5850, |
| "token_acc": 0.9110910575394268 |
| }, |
| { |
| "epoch": 0.7500640532923393, |
| "grad_norm": 3.71875, |
| "learning_rate": 3.2261999905304996e-06, |
| "loss": 0.23234589099884034, |
| "step": 5855, |
| "token_acc": 0.9092159559834938 |
| }, |
| { |
| "epoch": 0.7507045862157314, |
| "grad_norm": 2.90625, |
| "learning_rate": 3.2106315187247417e-06, |
| "loss": 0.2272249221801758, |
| "step": 5860, |
| "token_acc": 0.9115452624315349 |
| }, |
| { |
| "epoch": 0.7513451191391237, |
| "grad_norm": 3.421875, |
| "learning_rate": 3.1950935151093778e-06, |
| "loss": 0.23643298149108888, |
| "step": 5865, |
| "token_acc": 0.9063857235003225 |
| }, |
| { |
| "epoch": 0.751985652062516, |
| "grad_norm": 3.34375, |
| "learning_rate": 3.179586049413257e-06, |
| "loss": 0.23007550239562988, |
| "step": 5870, |
| "token_acc": 0.9103555536354603 |
| }, |
| { |
| "epoch": 0.7526261849859083, |
| "grad_norm": 3.46875, |
| "learning_rate": 3.164109191228187e-06, |
| "loss": 0.22181496620178223, |
| "step": 5875, |
| "token_acc": 0.912943921195887 |
| }, |
| { |
| "epoch": 0.7532667179093006, |
| "grad_norm": 2.703125, |
| "learning_rate": 3.148663010008618e-06, |
| "loss": 0.22368183135986328, |
| "step": 5880, |
| "token_acc": 0.9129305868097628 |
| }, |
| { |
| "epoch": 0.7539072508326928, |
| "grad_norm": 3.046875, |
| "learning_rate": 3.1332475750713352e-06, |
| "loss": 0.23119454383850097, |
| "step": 5885, |
| "token_acc": 0.9087276008766275 |
| }, |
| { |
| "epoch": 0.7545477837560851, |
| "grad_norm": 3.859375, |
| "learning_rate": 3.1178629555951446e-06, |
| "loss": 0.2248836040496826, |
| "step": 5890, |
| "token_acc": 0.9119996545619413 |
| }, |
| { |
| "epoch": 0.7551883166794773, |
| "grad_norm": 3.671875, |
| "learning_rate": 3.1025092206205642e-06, |
| "loss": 0.22220723628997802, |
| "step": 5895, |
| "token_acc": 0.9144963780614005 |
| }, |
| { |
| "epoch": 0.7558288496028696, |
| "grad_norm": 5.96875, |
| "learning_rate": 3.087186439049512e-06, |
| "loss": 0.23192427158355713, |
| "step": 5900, |
| "token_acc": 0.9082466248172671 |
| }, |
| { |
| "epoch": 0.7558288496028696, |
| "eval_loss": 0.3338736891746521, |
| "eval_runtime": 103.6094, |
| "eval_samples_per_second": 96.516, |
| "eval_steps_per_second": 12.065, |
| "eval_token_acc": 0.8824739356960539, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.7564693825262618, |
| "grad_norm": 5.3125, |
| "learning_rate": 3.0718946796450012e-06, |
| "loss": 0.23041715621948242, |
| "step": 5905, |
| "token_acc": 0.9090869902577808 |
| }, |
| { |
| "epoch": 0.7571099154496541, |
| "grad_norm": 2.6875, |
| "learning_rate": 3.056634011030828e-06, |
| "loss": 0.23436269760131836, |
| "step": 5910, |
| "token_acc": 0.9085108217642494 |
| }, |
| { |
| "epoch": 0.7577504483730464, |
| "grad_norm": 2.96875, |
| "learning_rate": 3.0414045016912673e-06, |
| "loss": 0.22445986270904542, |
| "step": 5915, |
| "token_acc": 0.9102779573367809 |
| }, |
| { |
| "epoch": 0.7583909812964387, |
| "grad_norm": 2.90625, |
| "learning_rate": 3.0262062199707486e-06, |
| "loss": 0.22754263877868652, |
| "step": 5920, |
| "token_acc": 0.910606582801999 |
| }, |
| { |
| "epoch": 0.759031514219831, |
| "grad_norm": 3.515625, |
| "learning_rate": 3.0110392340735892e-06, |
| "loss": 0.2298940658569336, |
| "step": 5925, |
| "token_acc": 0.9089810539035864 |
| }, |
| { |
| "epoch": 0.7596720471432231, |
| "grad_norm": 3.875, |
| "learning_rate": 2.995903612063634e-06, |
| "loss": 0.22265000343322755, |
| "step": 5930, |
| "token_acc": 0.9121761658031088 |
| }, |
| { |
| "epoch": 0.7603125800666154, |
| "grad_norm": 3.09375, |
| "learning_rate": 2.9807994218640035e-06, |
| "loss": 0.22582578659057617, |
| "step": 5935, |
| "token_acc": 0.9107952827335954 |
| }, |
| { |
| "epoch": 0.7609531129900077, |
| "grad_norm": 2.859375, |
| "learning_rate": 2.965726731256743e-06, |
| "loss": 0.23047933578491211, |
| "step": 5940, |
| "token_acc": 0.9101230304338441 |
| }, |
| { |
| "epoch": 0.7615936459134, |
| "grad_norm": 3.15625, |
| "learning_rate": 2.9506856078825473e-06, |
| "loss": 0.22990131378173828, |
| "step": 5945, |
| "token_acc": 0.9090360926867086 |
| }, |
| { |
| "epoch": 0.7622341788367922, |
| "grad_norm": 2.6875, |
| "learning_rate": 2.9356761192404616e-06, |
| "loss": 0.23607187271118163, |
| "step": 5950, |
| "token_acc": 0.905852417302799 |
| }, |
| { |
| "epoch": 0.7628747117601845, |
| "grad_norm": 3.515625, |
| "learning_rate": 2.9206983326875393e-06, |
| "loss": 0.22556428909301757, |
| "step": 5955, |
| "token_acc": 0.9103385965667082 |
| }, |
| { |
| "epoch": 0.7635152446835768, |
| "grad_norm": 2.671875, |
| "learning_rate": 2.905752315438596e-06, |
| "loss": 0.22193589210510253, |
| "step": 5960, |
| "token_acc": 0.9134333505776858 |
| }, |
| { |
| "epoch": 0.764155777606969, |
| "grad_norm": 8.5625, |
| "learning_rate": 2.8908381345658497e-06, |
| "loss": 0.22921185493469237, |
| "step": 5965, |
| "token_acc": 0.9122292224044187 |
| }, |
| { |
| "epoch": 0.7647963105303612, |
| "grad_norm": 3.03125, |
| "learning_rate": 2.875955856998677e-06, |
| "loss": 0.2280503749847412, |
| "step": 5970, |
| "token_acc": 0.9099413692015865 |
| }, |
| { |
| "epoch": 0.7654368434537535, |
| "grad_norm": 3.3125, |
| "learning_rate": 2.8611055495232585e-06, |
| "loss": 0.2285156488418579, |
| "step": 5975, |
| "token_acc": 0.9090477833362084 |
| }, |
| { |
| "epoch": 0.7660773763771458, |
| "grad_norm": 3.328125, |
| "learning_rate": 2.8462872787823213e-06, |
| "loss": 0.22320642471313476, |
| "step": 5980, |
| "token_acc": 0.9124087591240876 |
| }, |
| { |
| "epoch": 0.7667179093005381, |
| "grad_norm": 2.65625, |
| "learning_rate": 2.831501111274816e-06, |
| "loss": 0.23166375160217284, |
| "step": 5985, |
| "token_acc": 0.9094241966788872 |
| }, |
| { |
| "epoch": 0.7673584422239303, |
| "grad_norm": 2.6875, |
| "learning_rate": 2.81674711335563e-06, |
| "loss": 0.22401225566864014, |
| "step": 5990, |
| "token_acc": 0.9130397385171168 |
| }, |
| { |
| "epoch": 0.7679989751473226, |
| "grad_norm": 3.671875, |
| "learning_rate": 2.8020253512352814e-06, |
| "loss": 0.23468830585479736, |
| "step": 5995, |
| "token_acc": 0.9090440165061898 |
| }, |
| { |
| "epoch": 0.7686395080707148, |
| "grad_norm": 4.40625, |
| "learning_rate": 2.7873358909796287e-06, |
| "loss": 0.2302248954772949, |
| "step": 6000, |
| "token_acc": 0.9099036841877942 |
| }, |
| { |
| "epoch": 0.7686395080707148, |
| "eval_loss": 0.334545373916626, |
| "eval_runtime": 103.0056, |
| "eval_samples_per_second": 97.082, |
| "eval_steps_per_second": 12.135, |
| "eval_token_acc": 0.8822773806689514, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.7692800409941071, |
| "grad_norm": 2.984375, |
| "learning_rate": 2.7726787985095717e-06, |
| "loss": 0.23136000633239745, |
| "step": 6005, |
| "token_acc": 0.9077314256162731 |
| }, |
| { |
| "epoch": 0.7699205739174994, |
| "grad_norm": 3.890625, |
| "learning_rate": 2.7580541396007523e-06, |
| "loss": 0.22109587192535402, |
| "step": 6010, |
| "token_acc": 0.9138497449641221 |
| }, |
| { |
| "epoch": 0.7705611068408916, |
| "grad_norm": 3.125, |
| "learning_rate": 2.743461979883265e-06, |
| "loss": 0.2210922956466675, |
| "step": 6015, |
| "token_acc": 0.913861557051614 |
| }, |
| { |
| "epoch": 0.7712016397642839, |
| "grad_norm": 3.328125, |
| "learning_rate": 2.728902384841361e-06, |
| "loss": 0.22745194435119628, |
| "step": 6020, |
| "token_acc": 0.9132437785240679 |
| }, |
| { |
| "epoch": 0.7718421726876762, |
| "grad_norm": 3.328125, |
| "learning_rate": 2.71437541981315e-06, |
| "loss": 0.2273806095123291, |
| "step": 6025, |
| "token_acc": 0.9103077254142458 |
| }, |
| { |
| "epoch": 0.7724827056110684, |
| "grad_norm": 3.546875, |
| "learning_rate": 2.699881149990313e-06, |
| "loss": 0.2318946361541748, |
| "step": 6030, |
| "token_acc": 0.9076658793214516 |
| }, |
| { |
| "epoch": 0.7731232385344606, |
| "grad_norm": 2.8125, |
| "learning_rate": 2.6854196404178077e-06, |
| "loss": 0.22452447414398194, |
| "step": 6035, |
| "token_acc": 0.9115502437761573 |
| }, |
| { |
| "epoch": 0.7737637714578529, |
| "grad_norm": 10.4375, |
| "learning_rate": 2.6709909559935652e-06, |
| "loss": 0.23456428050994874, |
| "step": 6040, |
| "token_acc": 0.9073548387096774 |
| }, |
| { |
| "epoch": 0.7744043043812452, |
| "grad_norm": 4.3125, |
| "learning_rate": 2.6565951614682316e-06, |
| "loss": 0.22777628898620605, |
| "step": 6045, |
| "token_acc": 0.9110881364693719 |
| }, |
| { |
| "epoch": 0.7750448373046375, |
| "grad_norm": 13.1875, |
| "learning_rate": 2.6422323214448275e-06, |
| "loss": 0.2248152017593384, |
| "step": 6050, |
| "token_acc": 0.9121726395589249 |
| }, |
| { |
| "epoch": 0.7756853702280297, |
| "grad_norm": 2.953125, |
| "learning_rate": 2.6279025003785132e-06, |
| "loss": 0.2368108034133911, |
| "step": 6055, |
| "token_acc": 0.9071379369726192 |
| }, |
| { |
| "epoch": 0.776325903151422, |
| "grad_norm": 4.0625, |
| "learning_rate": 2.6136057625762503e-06, |
| "loss": 0.22743830680847169, |
| "step": 6060, |
| "token_acc": 0.9096523429064997 |
| }, |
| { |
| "epoch": 0.7769664360748142, |
| "grad_norm": 3.09375, |
| "learning_rate": 2.5993421721965416e-06, |
| "loss": 0.22994532585144042, |
| "step": 6065, |
| "token_acc": 0.9099663183349167 |
| }, |
| { |
| "epoch": 0.7776069689982065, |
| "grad_norm": 2.90625, |
| "learning_rate": 2.58511179324915e-06, |
| "loss": 0.22852482795715331, |
| "step": 6070, |
| "token_acc": 0.9125949585635359 |
| }, |
| { |
| "epoch": 0.7782475019215987, |
| "grad_norm": 2.703125, |
| "learning_rate": 2.5709146895947713e-06, |
| "loss": 0.23030381202697753, |
| "step": 6075, |
| "token_acc": 0.9101974108640488 |
| }, |
| { |
| "epoch": 0.778888034844991, |
| "grad_norm": 2.96875, |
| "learning_rate": 2.556750924944802e-06, |
| "loss": 0.22189459800720215, |
| "step": 6080, |
| "token_acc": 0.9149477863122465 |
| }, |
| { |
| "epoch": 0.7795285677683833, |
| "grad_norm": 3.5625, |
| "learning_rate": 2.5426205628610046e-06, |
| "loss": 0.22595663070678712, |
| "step": 6085, |
| "token_acc": 0.911052608864529 |
| }, |
| { |
| "epoch": 0.7801691006917756, |
| "grad_norm": 3.734375, |
| "learning_rate": 2.5285236667552503e-06, |
| "loss": 0.22210302352905273, |
| "step": 6090, |
| "token_acc": 0.9138005344366865 |
| }, |
| { |
| "epoch": 0.7808096336151679, |
| "grad_norm": 3.1875, |
| "learning_rate": 2.5144602998892308e-06, |
| "loss": 0.22484986782073973, |
| "step": 6095, |
| "token_acc": 0.9116413781178403 |
| }, |
| { |
| "epoch": 0.78145016653856, |
| "grad_norm": 3.578125, |
| "learning_rate": 2.500430525374167e-06, |
| "loss": 0.2381572961807251, |
| "step": 6100, |
| "token_acc": 0.9064785339413233 |
| }, |
| { |
| "epoch": 0.78145016653856, |
| "eval_loss": 0.3342524766921997, |
| "eval_runtime": 102.5935, |
| "eval_samples_per_second": 97.472, |
| "eval_steps_per_second": 12.184, |
| "eval_token_acc": 0.8827729208077028, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.7820906994619523, |
| "grad_norm": 4.0625, |
| "learning_rate": 2.486434406170529e-06, |
| "loss": 0.23040971755981446, |
| "step": 6105, |
| "token_acc": 0.9120883863450002 |
| }, |
| { |
| "epoch": 0.7827312323853446, |
| "grad_norm": 4.15625, |
| "learning_rate": 2.472472005087758e-06, |
| "loss": 0.23743114471435547, |
| "step": 6110, |
| "token_acc": 0.9058546000428909 |
| }, |
| { |
| "epoch": 0.7833717653087369, |
| "grad_norm": 2.515625, |
| "learning_rate": 2.4585433847839757e-06, |
| "loss": 0.2203622817993164, |
| "step": 6115, |
| "token_acc": 0.913583977208961 |
| }, |
| { |
| "epoch": 0.7840122982321291, |
| "grad_norm": 3.28125, |
| "learning_rate": 2.444648607765713e-06, |
| "loss": 0.2203676223754883, |
| "step": 6120, |
| "token_acc": 0.9142634112494037 |
| }, |
| { |
| "epoch": 0.7846528311555214, |
| "grad_norm": 3.703125, |
| "learning_rate": 2.430787736387621e-06, |
| "loss": 0.2319796562194824, |
| "step": 6125, |
| "token_acc": 0.9104683790200734 |
| }, |
| { |
| "epoch": 0.7852933640789137, |
| "grad_norm": 3.578125, |
| "learning_rate": 2.4169608328521966e-06, |
| "loss": 0.22085697650909425, |
| "step": 6130, |
| "token_acc": 0.9123194047928022 |
| }, |
| { |
| "epoch": 0.7859338970023059, |
| "grad_norm": 3.203125, |
| "learning_rate": 2.4031679592095014e-06, |
| "loss": 0.22805500030517578, |
| "step": 6135, |
| "token_acc": 0.911108238538435 |
| }, |
| { |
| "epoch": 0.7865744299256981, |
| "grad_norm": 3.046875, |
| "learning_rate": 2.3894091773568818e-06, |
| "loss": 0.22629399299621583, |
| "step": 6140, |
| "token_acc": 0.9122247597707588 |
| }, |
| { |
| "epoch": 0.7872149628490904, |
| "grad_norm": 3.6875, |
| "learning_rate": 2.3756845490386947e-06, |
| "loss": 0.22798571586608887, |
| "step": 6145, |
| "token_acc": 0.9104625171939478 |
| }, |
| { |
| "epoch": 0.7878554957724827, |
| "grad_norm": 4.15625, |
| "learning_rate": 2.3619941358460263e-06, |
| "loss": 0.23149216175079346, |
| "step": 6150, |
| "token_acc": 0.9089227327482361 |
| }, |
| { |
| "epoch": 0.788496028695875, |
| "grad_norm": 3.296875, |
| "learning_rate": 2.3483379992164245e-06, |
| "loss": 0.23463683128356932, |
| "step": 6155, |
| "token_acc": 0.9076479697178252 |
| }, |
| { |
| "epoch": 0.7891365616192673, |
| "grad_norm": 2.953125, |
| "learning_rate": 2.334716200433601e-06, |
| "loss": 0.2272404193878174, |
| "step": 6160, |
| "token_acc": 0.9092513668259503 |
| }, |
| { |
| "epoch": 0.7897770945426595, |
| "grad_norm": 3.546875, |
| "learning_rate": 2.3211288006271936e-06, |
| "loss": 0.22353928089141845, |
| "step": 6165, |
| "token_acc": 0.9137633666781649 |
| }, |
| { |
| "epoch": 0.7904176274660517, |
| "grad_norm": 2.921875, |
| "learning_rate": 2.3075758607724486e-06, |
| "loss": 0.22103281021118165, |
| "step": 6170, |
| "token_acc": 0.9112282824790389 |
| }, |
| { |
| "epoch": 0.791058160389444, |
| "grad_norm": 3.046875, |
| "learning_rate": 2.2940574416899895e-06, |
| "loss": 0.22877752780914307, |
| "step": 6175, |
| "token_acc": 0.9084078248477782 |
| }, |
| { |
| "epoch": 0.7916986933128363, |
| "grad_norm": 2.875, |
| "learning_rate": 2.280573604045504e-06, |
| "loss": 0.229004168510437, |
| "step": 6180, |
| "token_acc": 0.9116313220748931 |
| }, |
| { |
| "epoch": 0.7923392262362285, |
| "grad_norm": 2.984375, |
| "learning_rate": 2.2671244083495026e-06, |
| "loss": 0.22659940719604493, |
| "step": 6185, |
| "token_acc": 0.9117138908085695 |
| }, |
| { |
| "epoch": 0.7929797591596208, |
| "grad_norm": 3.203125, |
| "learning_rate": 2.253709914957032e-06, |
| "loss": 0.2304908275604248, |
| "step": 6190, |
| "token_acc": 0.9093925032313658 |
| }, |
| { |
| "epoch": 0.7936202920830131, |
| "grad_norm": 3.75, |
| "learning_rate": 2.2403301840674062e-06, |
| "loss": 0.23479413986206055, |
| "step": 6195, |
| "token_acc": 0.9068654915312675 |
| }, |
| { |
| "epoch": 0.7942608250064054, |
| "grad_norm": 3.9375, |
| "learning_rate": 2.2269852757239473e-06, |
| "loss": 0.22974464893341065, |
| "step": 6200, |
| "token_acc": 0.9106674125392659 |
| }, |
| { |
| "epoch": 0.7942608250064054, |
| "eval_loss": 0.33391064405441284, |
| "eval_runtime": 103.739, |
| "eval_samples_per_second": 96.396, |
| "eval_steps_per_second": 12.049, |
| "eval_token_acc": 0.8825237665479955, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.7949013579297975, |
| "grad_norm": 5.34375, |
| "learning_rate": 2.2136752498136924e-06, |
| "loss": 0.232399320602417, |
| "step": 6205, |
| "token_acc": 0.9098353590207741 |
| }, |
| { |
| "epoch": 0.7955418908531898, |
| "grad_norm": 7.78125, |
| "learning_rate": 2.200400166067147e-06, |
| "loss": 0.22328581809997558, |
| "step": 6210, |
| "token_acc": 0.9126268076840061 |
| }, |
| { |
| "epoch": 0.7961824237765821, |
| "grad_norm": 2.78125, |
| "learning_rate": 2.1871600840580087e-06, |
| "loss": 0.22782430648803711, |
| "step": 6215, |
| "token_acc": 0.9107918620155706 |
| }, |
| { |
| "epoch": 0.7968229566999744, |
| "grad_norm": 7.3125, |
| "learning_rate": 2.1739550632028995e-06, |
| "loss": 0.22463743686676024, |
| "step": 6220, |
| "token_acc": 0.9123086872170727 |
| }, |
| { |
| "epoch": 0.7974634896233667, |
| "grad_norm": 3.171875, |
| "learning_rate": 2.160785162761099e-06, |
| "loss": 0.22946014404296874, |
| "step": 6225, |
| "token_acc": 0.9104503339797457 |
| }, |
| { |
| "epoch": 0.7981040225467589, |
| "grad_norm": 3.234375, |
| "learning_rate": 2.1476504418342803e-06, |
| "loss": 0.22696642875671386, |
| "step": 6230, |
| "token_acc": 0.9110344827586206 |
| }, |
| { |
| "epoch": 0.7987445554701512, |
| "grad_norm": 3.359375, |
| "learning_rate": 2.1345509593662426e-06, |
| "loss": 0.2333219289779663, |
| "step": 6235, |
| "token_acc": 0.9078953042128411 |
| }, |
| { |
| "epoch": 0.7993850883935434, |
| "grad_norm": 4.6875, |
| "learning_rate": 2.1214867741426505e-06, |
| "loss": 0.2281118631362915, |
| "step": 6240, |
| "token_acc": 0.9095139607032058 |
| }, |
| { |
| "epoch": 0.8000256213169357, |
| "grad_norm": 11.375, |
| "learning_rate": 2.108457944790764e-06, |
| "loss": 0.22590672969818115, |
| "step": 6245, |
| "token_acc": 0.9117824773413897 |
| }, |
| { |
| "epoch": 0.8006661542403279, |
| "grad_norm": 2.90625, |
| "learning_rate": 2.095464529779182e-06, |
| "loss": 0.22068183422088622, |
| "step": 6250, |
| "token_acc": 0.9125511302475781 |
| }, |
| { |
| "epoch": 0.8013066871637202, |
| "grad_norm": 3.3125, |
| "learning_rate": 2.0825065874175744e-06, |
| "loss": 0.2325758457183838, |
| "step": 6255, |
| "token_acc": 0.9100626770842277 |
| }, |
| { |
| "epoch": 0.8019472200871125, |
| "grad_norm": 4.125, |
| "learning_rate": 2.069584175856424e-06, |
| "loss": 0.22739195823669434, |
| "step": 6260, |
| "token_acc": 0.9109864018994173 |
| }, |
| { |
| "epoch": 0.8025877530105048, |
| "grad_norm": 4.09375, |
| "learning_rate": 2.056697353086765e-06, |
| "loss": 0.22868261337280274, |
| "step": 6265, |
| "token_acc": 0.9094489893087477 |
| }, |
| { |
| "epoch": 0.8032282859338971, |
| "grad_norm": 3.546875, |
| "learning_rate": 2.0438461769399207e-06, |
| "loss": 0.23165996074676515, |
| "step": 6270, |
| "token_acc": 0.908895110919664 |
| }, |
| { |
| "epoch": 0.8038688188572892, |
| "grad_norm": 3.015625, |
| "learning_rate": 2.031030705087251e-06, |
| "loss": 0.2177964687347412, |
| "step": 6275, |
| "token_acc": 0.9145767686795874 |
| }, |
| { |
| "epoch": 0.8045093517806815, |
| "grad_norm": 3.328125, |
| "learning_rate": 2.0182509950398732e-06, |
| "loss": 0.2247143268585205, |
| "step": 6280, |
| "token_acc": 0.9119068162208801 |
| }, |
| { |
| "epoch": 0.8051498847040738, |
| "grad_norm": 10.4375, |
| "learning_rate": 2.005507104148441e-06, |
| "loss": 0.22496967315673827, |
| "step": 6285, |
| "token_acc": 0.9104271735850683 |
| }, |
| { |
| "epoch": 0.8057904176274661, |
| "grad_norm": 2.984375, |
| "learning_rate": 1.9927990896028416e-06, |
| "loss": 0.22278683185577391, |
| "step": 6290, |
| "token_acc": 0.9130923555863023 |
| }, |
| { |
| "epoch": 0.8064309505508583, |
| "grad_norm": 3.890625, |
| "learning_rate": 1.9801270084319847e-06, |
| "loss": 0.22296977043151855, |
| "step": 6295, |
| "token_acc": 0.9139506811519228 |
| }, |
| { |
| "epoch": 0.8070714834742506, |
| "grad_norm": 5.84375, |
| "learning_rate": 1.967490917503504e-06, |
| "loss": 0.2246922492980957, |
| "step": 6300, |
| "token_acc": 0.9113056226284926 |
| }, |
| { |
| "epoch": 0.8070714834742506, |
| "eval_loss": 0.33457258343696594, |
| "eval_runtime": 102.1762, |
| "eval_samples_per_second": 97.87, |
| "eval_steps_per_second": 12.234, |
| "eval_token_acc": 0.8825071562640149, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.8077120163976428, |
| "grad_norm": 2.59375, |
| "learning_rate": 1.954890873523535e-06, |
| "loss": 0.22967491149902344, |
| "step": 6305, |
| "token_acc": 0.9100146387668991 |
| }, |
| { |
| "epoch": 0.8083525493210351, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.9423269330364446e-06, |
| "loss": 0.23272688388824464, |
| "step": 6310, |
| "token_acc": 0.9100631361937894 |
| }, |
| { |
| "epoch": 0.8089930822444273, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.929799152424576e-06, |
| "loss": 0.22082395553588868, |
| "step": 6315, |
| "token_acc": 0.9147945323616921 |
| }, |
| { |
| "epoch": 0.8096336151678196, |
| "grad_norm": 2.96875, |
| "learning_rate": 1.917307587908013e-06, |
| "loss": 0.22631459236145018, |
| "step": 6320, |
| "token_acc": 0.910735097336729 |
| }, |
| { |
| "epoch": 0.8102741480912119, |
| "grad_norm": 3.578125, |
| "learning_rate": 1.9048522955442973e-06, |
| "loss": 0.22592225074768066, |
| "step": 6325, |
| "token_acc": 0.9107575233483224 |
| }, |
| { |
| "epoch": 0.8109146810146042, |
| "grad_norm": 2.875, |
| "learning_rate": 1.8924333312282072e-06, |
| "loss": 0.22494149208068848, |
| "step": 6330, |
| "token_acc": 0.9138549272043893 |
| }, |
| { |
| "epoch": 0.8115552139379965, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.880050750691489e-06, |
| "loss": 0.23039345741271972, |
| "step": 6335, |
| "token_acc": 0.9105968858131488 |
| }, |
| { |
| "epoch": 0.8121957468613886, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.867704609502613e-06, |
| "loss": 0.22507119178771973, |
| "step": 6340, |
| "token_acc": 0.9115109155233411 |
| }, |
| { |
| "epoch": 0.8128362797847809, |
| "grad_norm": 3.4375, |
| "learning_rate": 1.8553949630665246e-06, |
| "loss": 0.23071153163909913, |
| "step": 6345, |
| "token_acc": 0.9095776837378012 |
| }, |
| { |
| "epoch": 0.8134768127081732, |
| "grad_norm": 5.34375, |
| "learning_rate": 1.843121866624391e-06, |
| "loss": 0.22440800666809083, |
| "step": 6350, |
| "token_acc": 0.9121522693997072 |
| }, |
| { |
| "epoch": 0.8141173456315655, |
| "grad_norm": 3.796875, |
| "learning_rate": 1.8308853752533595e-06, |
| "loss": 0.22544093132019044, |
| "step": 6355, |
| "token_acc": 0.9106696543997242 |
| }, |
| { |
| "epoch": 0.8147578785549577, |
| "grad_norm": 2.34375, |
| "learning_rate": 1.8186855438663042e-06, |
| "loss": 0.2227323532104492, |
| "step": 6360, |
| "token_acc": 0.9120110525861325 |
| }, |
| { |
| "epoch": 0.81539841147835, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.8065224272115866e-06, |
| "loss": 0.22800102233886718, |
| "step": 6365, |
| "token_acc": 0.9097010109701011 |
| }, |
| { |
| "epoch": 0.8160389444017423, |
| "grad_norm": 3.734375, |
| "learning_rate": 1.7943960798728056e-06, |
| "loss": 0.22401859760284423, |
| "step": 6370, |
| "token_acc": 0.9109385113268609 |
| }, |
| { |
| "epoch": 0.8166794773251345, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.7823065562685437e-06, |
| "loss": 0.23256373405456543, |
| "step": 6375, |
| "token_acc": 0.9090204520990313 |
| }, |
| { |
| "epoch": 0.8173200102485267, |
| "grad_norm": 2.9375, |
| "learning_rate": 1.7702539106521467e-06, |
| "loss": 0.22349081039428711, |
| "step": 6380, |
| "token_acc": 0.9125831820931639 |
| }, |
| { |
| "epoch": 0.817960543171919, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.7582381971114548e-06, |
| "loss": 0.23039307594299316, |
| "step": 6385, |
| "token_acc": 0.9086009915930158 |
| }, |
| { |
| "epoch": 0.8186010760953113, |
| "grad_norm": 3.703125, |
| "learning_rate": 1.7462594695685763e-06, |
| "loss": 0.22513654232025146, |
| "step": 6390, |
| "token_acc": 0.9117127975549911 |
| }, |
| { |
| "epoch": 0.8192416090187036, |
| "grad_norm": 3.8125, |
| "learning_rate": 1.7343177817796397e-06, |
| "loss": 0.2271491050720215, |
| "step": 6395, |
| "token_acc": 0.9126460569999569 |
| }, |
| { |
| "epoch": 0.8198821419420959, |
| "grad_norm": 5.0, |
| "learning_rate": 1.7224131873345417e-06, |
| "loss": 0.2326582908630371, |
| "step": 6400, |
| "token_acc": 0.9083699681061977 |
| }, |
| { |
| "epoch": 0.8198821419420959, |
| "eval_loss": 0.334193617105484, |
| "eval_runtime": 106.2327, |
| "eval_samples_per_second": 94.133, |
| "eval_steps_per_second": 11.767, |
| "eval_token_acc": 0.8823493585662003, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.8205226748654881, |
| "grad_norm": 3.75, |
| "learning_rate": 1.7105457396567383e-06, |
| "loss": 0.2375797748565674, |
| "step": 6405, |
| "token_acc": 0.9064124038998411 |
| }, |
| { |
| "epoch": 0.8211632077888803, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.6987154920029625e-06, |
| "loss": 0.22246260643005372, |
| "step": 6410, |
| "token_acc": 0.9119291304721768 |
| }, |
| { |
| "epoch": 0.8218037407122726, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.6869224974630283e-06, |
| "loss": 0.23738515377044678, |
| "step": 6415, |
| "token_acc": 0.9083624143724958 |
| }, |
| { |
| "epoch": 0.8224442736356649, |
| "grad_norm": 3.765625, |
| "learning_rate": 1.675166808959552e-06, |
| "loss": 0.23724117279052734, |
| "step": 6420, |
| "token_acc": 0.9078502673796791 |
| }, |
| { |
| "epoch": 0.8230848065590571, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.6634484792477468e-06, |
| "loss": 0.23424534797668456, |
| "step": 6425, |
| "token_acc": 0.910155913515376 |
| }, |
| { |
| "epoch": 0.8237253394824494, |
| "grad_norm": 3.578125, |
| "learning_rate": 1.6517675609151683e-06, |
| "loss": 0.23035151958465577, |
| "step": 6430, |
| "token_acc": 0.9098056155507559 |
| }, |
| { |
| "epoch": 0.8243658724058417, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.6401241063814854e-06, |
| "loss": 0.22955503463745117, |
| "step": 6435, |
| "token_acc": 0.9094276239286792 |
| }, |
| { |
| "epoch": 0.825006405329234, |
| "grad_norm": 3.125, |
| "learning_rate": 1.6285181678982432e-06, |
| "loss": 0.2227609395980835, |
| "step": 6440, |
| "token_acc": 0.9112541026083952 |
| }, |
| { |
| "epoch": 0.8256469382526261, |
| "grad_norm": 2.6875, |
| "learning_rate": 1.6169497975486282e-06, |
| "loss": 0.22880702018737792, |
| "step": 6445, |
| "token_acc": 0.9112530754953166 |
| }, |
| { |
| "epoch": 0.8262874711760184, |
| "grad_norm": 2.96875, |
| "learning_rate": 1.605419047247232e-06, |
| "loss": 0.2208636999130249, |
| "step": 6450, |
| "token_acc": 0.9133209711501142 |
| }, |
| { |
| "epoch": 0.8269280040994107, |
| "grad_norm": 2.875, |
| "learning_rate": 1.5939259687398279e-06, |
| "loss": 0.22008955478668213, |
| "step": 6455, |
| "token_acc": 0.9133350640359986 |
| }, |
| { |
| "epoch": 0.827568537022803, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.5824706136031255e-06, |
| "loss": 0.22201809883117676, |
| "step": 6460, |
| "token_acc": 0.9131427094996124 |
| }, |
| { |
| "epoch": 0.8282090699461953, |
| "grad_norm": 7.78125, |
| "learning_rate": 1.5710530332445484e-06, |
| "loss": 0.22498104572296143, |
| "step": 6465, |
| "token_acc": 0.9109814094249892 |
| }, |
| { |
| "epoch": 0.8288496028695875, |
| "grad_norm": 5.0625, |
| "learning_rate": 1.559673278902002e-06, |
| "loss": 0.23075518608093262, |
| "step": 6470, |
| "token_acc": 0.909899408539481 |
| }, |
| { |
| "epoch": 0.8294901357929798, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.5483314016436402e-06, |
| "loss": 0.23160152435302733, |
| "step": 6475, |
| "token_acc": 0.9085847468600284 |
| }, |
| { |
| "epoch": 0.830130668716372, |
| "grad_norm": 2.78125, |
| "learning_rate": 1.537027452367641e-06, |
| "loss": 0.2284604549407959, |
| "step": 6480, |
| "token_acc": 0.9090869865377977 |
| }, |
| { |
| "epoch": 0.8307712016397643, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.5257614818019716e-06, |
| "loss": 0.22905595302581788, |
| "step": 6485, |
| "token_acc": 0.9103867022650934 |
| }, |
| { |
| "epoch": 0.8314117345631565, |
| "grad_norm": 4.53125, |
| "learning_rate": 1.5145335405041728e-06, |
| "loss": 0.23354558944702147, |
| "step": 6490, |
| "token_acc": 0.9073544698544699 |
| }, |
| { |
| "epoch": 0.8320522674865488, |
| "grad_norm": 4.25, |
| "learning_rate": 1.50334367886111e-06, |
| "loss": 0.2220928192138672, |
| "step": 6495, |
| "token_acc": 0.9137692440754195 |
| }, |
| { |
| "epoch": 0.8326928004099411, |
| "grad_norm": 2.671875, |
| "learning_rate": 1.4921919470887758e-06, |
| "loss": 0.22195751667022706, |
| "step": 6500, |
| "token_acc": 0.9108550636749545 |
| }, |
| { |
| "epoch": 0.8326928004099411, |
| "eval_loss": 0.3345060646533966, |
| "eval_runtime": 102.629, |
| "eval_samples_per_second": 97.438, |
| "eval_steps_per_second": 12.18, |
| "eval_token_acc": 0.8823133696175759, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 3.21875, |
| "learning_rate": 1.4810783952320417e-06, |
| "loss": 0.2198798656463623, |
| "step": 6505, |
| "token_acc": 0.91326310335895 |
| }, |
| { |
| "epoch": 0.8339738662567256, |
| "grad_norm": 2.96875, |
| "learning_rate": 1.4700030731644444e-06, |
| "loss": 0.22199637889862062, |
| "step": 6510, |
| "token_acc": 0.9108222490931076 |
| }, |
| { |
| "epoch": 0.8346143991801178, |
| "grad_norm": 3.59375, |
| "learning_rate": 1.4589660305879615e-06, |
| "loss": 0.22134122848510743, |
| "step": 6515, |
| "token_acc": 0.9125576981148354 |
| }, |
| { |
| "epoch": 0.8352549321035101, |
| "grad_norm": 3.203125, |
| "learning_rate": 1.4479673170327745e-06, |
| "loss": 0.22954387664794923, |
| "step": 6520, |
| "token_acc": 0.9111398405516052 |
| }, |
| { |
| "epoch": 0.8358954650269024, |
| "grad_norm": 4.3125, |
| "learning_rate": 1.4370069818570787e-06, |
| "loss": 0.22780919075012207, |
| "step": 6525, |
| "token_acc": 0.9098325276243094 |
| }, |
| { |
| "epoch": 0.8365359979502947, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.4260850742468202e-06, |
| "loss": 0.22985472679138183, |
| "step": 6530, |
| "token_acc": 0.909024211298606 |
| }, |
| { |
| "epoch": 0.8371765308736869, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.4152016432155158e-06, |
| "loss": 0.22617745399475098, |
| "step": 6535, |
| "token_acc": 0.9123404622283546 |
| }, |
| { |
| "epoch": 0.8378170637970792, |
| "grad_norm": 2.9375, |
| "learning_rate": 1.4043567376039956e-06, |
| "loss": 0.22737021446228028, |
| "step": 6540, |
| "token_acc": 0.910641053313188 |
| }, |
| { |
| "epoch": 0.8384575967204715, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.393550406080213e-06, |
| "loss": 0.22855916023254394, |
| "step": 6545, |
| "token_acc": 0.9108817204301075 |
| }, |
| { |
| "epoch": 0.8390981296438637, |
| "grad_norm": 4.625, |
| "learning_rate": 1.3827826971390135e-06, |
| "loss": 0.21400003433227538, |
| "step": 6550, |
| "token_acc": 0.9176013805004314 |
| }, |
| { |
| "epoch": 0.8397386625672559, |
| "grad_norm": 5.09375, |
| "learning_rate": 1.372053659101915e-06, |
| "loss": 0.22439954280853272, |
| "step": 6555, |
| "token_acc": 0.9112418357195381 |
| }, |
| { |
| "epoch": 0.8403791954906482, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.361363340116899e-06, |
| "loss": 0.22323524951934814, |
| "step": 6560, |
| "token_acc": 0.9125355634106388 |
| }, |
| { |
| "epoch": 0.8410197284140405, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.3507117881581866e-06, |
| "loss": 0.2269625186920166, |
| "step": 6565, |
| "token_acc": 0.9102128574500108 |
| }, |
| { |
| "epoch": 0.8416602613374328, |
| "grad_norm": 3.125, |
| "learning_rate": 1.3400990510260282e-06, |
| "loss": 0.21720943450927735, |
| "step": 6570, |
| "token_acc": 0.9142647249470637 |
| }, |
| { |
| "epoch": 0.842300794260825, |
| "grad_norm": 3.015625, |
| "learning_rate": 1.3295251763464877e-06, |
| "loss": 0.22070887088775634, |
| "step": 6575, |
| "token_acc": 0.91288746703558 |
| }, |
| { |
| "epoch": 0.8429413271842172, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.3189902115712294e-06, |
| "loss": 0.23354511260986327, |
| "step": 6580, |
| "token_acc": 0.9081190159288995 |
| }, |
| { |
| "epoch": 0.8435818601076095, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.3084942039773018e-06, |
| "loss": 0.22521576881408692, |
| "step": 6585, |
| "token_acc": 0.9107534747622531 |
| }, |
| { |
| "epoch": 0.8442223930310018, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.2980372006669296e-06, |
| "loss": 0.2297739267349243, |
| "step": 6590, |
| "token_acc": 0.9092908902691511 |
| }, |
| { |
| "epoch": 0.844862925954394, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.287619248567301e-06, |
| "loss": 0.22501018047332763, |
| "step": 6595, |
| "token_acc": 0.9111034393475165 |
| }, |
| { |
| "epoch": 0.8455034588777863, |
| "grad_norm": 4.0625, |
| "learning_rate": 1.2772403944303556e-06, |
| "loss": 0.23542351722717286, |
| "step": 6600, |
| "token_acc": 0.9083812301621343 |
| }, |
| { |
| "epoch": 0.8455034588777863, |
| "eval_loss": 0.33506593108177185, |
| "eval_runtime": 103.0867, |
| "eval_samples_per_second": 97.006, |
| "eval_steps_per_second": 12.126, |
| "eval_token_acc": 0.882360432088854, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.8461439918011786, |
| "grad_norm": 2.53125, |
| "learning_rate": 1.266900684832576e-06, |
| "loss": 0.22258315086364747, |
| "step": 6605, |
| "token_acc": 0.9125866597769453 |
| }, |
| { |
| "epoch": 0.8467845247245709, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.2566001661747807e-06, |
| "loss": 0.22833826541900634, |
| "step": 6610, |
| "token_acc": 0.9116735537190083 |
| }, |
| { |
| "epoch": 0.847425057647963, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.2463388846819058e-06, |
| "loss": 0.23099522590637206, |
| "step": 6615, |
| "token_acc": 0.91005291005291 |
| }, |
| { |
| "epoch": 0.8480655905713553, |
| "grad_norm": 2.90625, |
| "learning_rate": 1.2361168864028183e-06, |
| "loss": 0.2343848466873169, |
| "step": 6620, |
| "token_acc": 0.9084549356223176 |
| }, |
| { |
| "epoch": 0.8487061234947476, |
| "grad_norm": 2.890625, |
| "learning_rate": 1.225934217210083e-06, |
| "loss": 0.22270684242248534, |
| "step": 6625, |
| "token_acc": 0.9118066047917116 |
| }, |
| { |
| "epoch": 0.8493466564181399, |
| "grad_norm": 4.0, |
| "learning_rate": 1.2157909227997822e-06, |
| "loss": 0.22519948482513427, |
| "step": 6630, |
| "token_acc": 0.9111332783970161 |
| }, |
| { |
| "epoch": 0.8499871893415322, |
| "grad_norm": 2.71875, |
| "learning_rate": 1.205687048691293e-06, |
| "loss": 0.2298964500427246, |
| "step": 6635, |
| "token_acc": 0.9109686303197212 |
| }, |
| { |
| "epoch": 0.8506277222649244, |
| "grad_norm": 6.0625, |
| "learning_rate": 1.1956226402270821e-06, |
| "loss": 0.22732067108154297, |
| "step": 6640, |
| "token_acc": 0.9125354411891056 |
| }, |
| { |
| "epoch": 0.8512682551883167, |
| "grad_norm": 3.953125, |
| "learning_rate": 1.1855977425725252e-06, |
| "loss": 0.23059117794036865, |
| "step": 6645, |
| "token_acc": 0.9109086197961651 |
| }, |
| { |
| "epoch": 0.8519087881117089, |
| "grad_norm": 3.40625, |
| "learning_rate": 1.1756124007156699e-06, |
| "loss": 0.23375325202941893, |
| "step": 6650, |
| "token_acc": 0.9093841389987958 |
| }, |
| { |
| "epoch": 0.8525493210351012, |
| "grad_norm": 3.609375, |
| "learning_rate": 1.1656666594670673e-06, |
| "loss": 0.22103147506713866, |
| "step": 6655, |
| "token_acc": 0.9112014180104622 |
| }, |
| { |
| "epoch": 0.8531898539584934, |
| "grad_norm": 37.75, |
| "learning_rate": 1.1557605634595437e-06, |
| "loss": 0.2286379814147949, |
| "step": 6660, |
| "token_acc": 0.9100968783638321 |
| }, |
| { |
| "epoch": 0.8538303868818857, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.1458941571480198e-06, |
| "loss": 0.22343990802764893, |
| "step": 6665, |
| "token_acc": 0.911326860841424 |
| }, |
| { |
| "epoch": 0.854470919805278, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.136067484809299e-06, |
| "loss": 0.22610747814178467, |
| "step": 6670, |
| "token_acc": 0.9108187134502924 |
| }, |
| { |
| "epoch": 0.8551114527286703, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.126280590541876e-06, |
| "loss": 0.2264204740524292, |
| "step": 6675, |
| "token_acc": 0.9111684958037444 |
| }, |
| { |
| "epoch": 0.8557519856520626, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.1165335182657365e-06, |
| "loss": 0.23050973415374756, |
| "step": 6680, |
| "token_acc": 0.9092639868460906 |
| }, |
| { |
| "epoch": 0.8563925185754547, |
| "grad_norm": 2.671875, |
| "learning_rate": 1.1068263117221568e-06, |
| "loss": 0.2229710578918457, |
| "step": 6685, |
| "token_acc": 0.9126929378287488 |
| }, |
| { |
| "epoch": 0.857033051498847, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.0971590144735122e-06, |
| "loss": 0.22901148796081544, |
| "step": 6690, |
| "token_acc": 0.9086844368013758 |
| }, |
| { |
| "epoch": 0.8576735844222393, |
| "grad_norm": 3.09375, |
| "learning_rate": 1.0875316699030802e-06, |
| "loss": 0.22709619998931885, |
| "step": 6695, |
| "token_acc": 0.9104509880226574 |
| }, |
| { |
| "epoch": 0.8583141173456316, |
| "grad_norm": 3.171875, |
| "learning_rate": 1.0779443212148444e-06, |
| "loss": 0.2268310546875, |
| "step": 6700, |
| "token_acc": 0.9107427341227126 |
| }, |
| { |
| "epoch": 0.8583141173456316, |
| "eval_loss": 0.3350731432437897, |
| "eval_runtime": 104.1893, |
| "eval_samples_per_second": 95.979, |
| "eval_steps_per_second": 11.997, |
| "eval_token_acc": 0.8825652922579467, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.8589546502690238, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.0683970114333032e-06, |
| "loss": 0.22931032180786132, |
| "step": 6705, |
| "token_acc": 0.9111389236545682 |
| }, |
| { |
| "epoch": 0.8595951831924161, |
| "grad_norm": 5.25, |
| "learning_rate": 1.0588897834032718e-06, |
| "loss": 0.2266333818435669, |
| "step": 6710, |
| "token_acc": 0.9122988654501532 |
| }, |
| { |
| "epoch": 0.8602357161158084, |
| "grad_norm": 4.28125, |
| "learning_rate": 1.0494226797896978e-06, |
| "loss": 0.22155840396881105, |
| "step": 6715, |
| "token_acc": 0.9117913343392973 |
| }, |
| { |
| "epoch": 0.8608762490392006, |
| "grad_norm": 2.859375, |
| "learning_rate": 1.0399957430774598e-06, |
| "loss": 0.23419654369354248, |
| "step": 6720, |
| "token_acc": 0.9080201906898485 |
| }, |
| { |
| "epoch": 0.8615167819625928, |
| "grad_norm": 3.875, |
| "learning_rate": 1.030609015571188e-06, |
| "loss": 0.23095030784606935, |
| "step": 6725, |
| "token_acc": 0.9089889579020014 |
| }, |
| { |
| "epoch": 0.8621573148859851, |
| "grad_norm": 3.265625, |
| "learning_rate": 1.021262539395066e-06, |
| "loss": 0.2203512191772461, |
| "step": 6730, |
| "token_acc": 0.9137580554474287 |
| }, |
| { |
| "epoch": 0.8627978478093774, |
| "grad_norm": 3.0, |
| "learning_rate": 1.0119563564926372e-06, |
| "loss": 0.22832462787628174, |
| "step": 6735, |
| "token_acc": 0.9118601531738133 |
| }, |
| { |
| "epoch": 0.8634383807327697, |
| "grad_norm": 4.90625, |
| "learning_rate": 1.0026905086266392e-06, |
| "loss": 0.22600264549255372, |
| "step": 6740, |
| "token_acc": 0.9125167076273013 |
| }, |
| { |
| "epoch": 0.864078913656162, |
| "grad_norm": 3.046875, |
| "learning_rate": 9.934650373787823e-07, |
| "loss": 0.22522459030151368, |
| "step": 6745, |
| "token_acc": 0.9116609294320138 |
| }, |
| { |
| "epoch": 0.8647194465795542, |
| "grad_norm": 3.421875, |
| "learning_rate": 9.842799841495986e-07, |
| "loss": 0.22795772552490234, |
| "step": 6750, |
| "token_acc": 0.9116564948275115 |
| }, |
| { |
| "epoch": 0.8653599795029464, |
| "grad_norm": 4.75, |
| "learning_rate": 9.751353901582294e-07, |
| "loss": 0.22496397495269777, |
| "step": 6755, |
| "token_acc": 0.9126722718210973 |
| }, |
| { |
| "epoch": 0.8660005124263387, |
| "grad_norm": 2.765625, |
| "learning_rate": 9.660312964422469e-07, |
| "loss": 0.2258981943130493, |
| "step": 6760, |
| "token_acc": 0.9107835531419706 |
| }, |
| { |
| "epoch": 0.866641045349731, |
| "grad_norm": 3.734375, |
| "learning_rate": 9.569677438574842e-07, |
| "loss": 0.22349743843078612, |
| "step": 6765, |
| "token_acc": 0.912551306977749 |
| }, |
| { |
| "epoch": 0.8672815782731232, |
| "grad_norm": 5.46875, |
| "learning_rate": 9.479447730778268e-07, |
| "loss": 0.22322914600372315, |
| "step": 6770, |
| "token_acc": 0.911628910463862 |
| }, |
| { |
| "epoch": 0.8679221111965155, |
| "grad_norm": 2.984375, |
| "learning_rate": 9.389624245950601e-07, |
| "loss": 0.217413330078125, |
| "step": 6775, |
| "token_acc": 0.9126826316244488 |
| }, |
| { |
| "epoch": 0.8685626441199078, |
| "grad_norm": 3.15625, |
| "learning_rate": 9.300207387186555e-07, |
| "loss": 0.237738037109375, |
| "step": 6780, |
| "token_acc": 0.9056814760655456 |
| }, |
| { |
| "epoch": 0.8692031770433001, |
| "grad_norm": 3.96875, |
| "learning_rate": 9.211197555756157e-07, |
| "loss": 0.22690942287445068, |
| "step": 6785, |
| "token_acc": 0.9100542775911088 |
| }, |
| { |
| "epoch": 0.8698437099666922, |
| "grad_norm": 4.40625, |
| "learning_rate": 9.122595151102809e-07, |
| "loss": 0.23275787830352784, |
| "step": 6790, |
| "token_acc": 0.9072804862278546 |
| }, |
| { |
| "epoch": 0.8704842428900845, |
| "grad_norm": 4.65625, |
| "learning_rate": 9.034400570841551e-07, |
| "loss": 0.22703733444213867, |
| "step": 6795, |
| "token_acc": 0.9107196692364012 |
| }, |
| { |
| "epoch": 0.8711247758134768, |
| "grad_norm": 3.0, |
| "learning_rate": 8.946614210757221e-07, |
| "loss": 0.22760224342346191, |
| "step": 6800, |
| "token_acc": 0.9104903571737438 |
| }, |
| { |
| "epoch": 0.8711247758134768, |
| "eval_loss": 0.33460694551467896, |
| "eval_runtime": 104.7122, |
| "eval_samples_per_second": 95.5, |
| "eval_steps_per_second": 11.937, |
| "eval_token_acc": 0.8824462518894198, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.8717653087368691, |
| "grad_norm": 3.1875, |
| "learning_rate": 8.859236464802756e-07, |
| "loss": 0.22689156532287597, |
| "step": 6805, |
| "token_acc": 0.9119266844505637 |
| }, |
| { |
| "epoch": 0.8724058416602614, |
| "grad_norm": 2.84375, |
| "learning_rate": 8.772267725097361e-07, |
| "loss": 0.23056597709655763, |
| "step": 6810, |
| "token_acc": 0.9115866839602248 |
| }, |
| { |
| "epoch": 0.8730463745836536, |
| "grad_norm": 3.375, |
| "learning_rate": 8.685708381924784e-07, |
| "loss": 0.23043975830078126, |
| "step": 6815, |
| "token_acc": 0.9112482202183199 |
| }, |
| { |
| "epoch": 0.8736869075070458, |
| "grad_norm": 2.828125, |
| "learning_rate": 8.599558823731524e-07, |
| "loss": 0.22515459060668946, |
| "step": 6820, |
| "token_acc": 0.9119896305897602 |
| }, |
| { |
| "epoch": 0.8743274404304381, |
| "grad_norm": 4.8125, |
| "learning_rate": 8.513819437125148e-07, |
| "loss": 0.2265780448913574, |
| "step": 6825, |
| "token_acc": 0.9123948304276173 |
| }, |
| { |
| "epoch": 0.8749679733538304, |
| "grad_norm": 3.21875, |
| "learning_rate": 8.428490606872519e-07, |
| "loss": 0.22168455123901368, |
| "step": 6830, |
| "token_acc": 0.9113792656026989 |
| }, |
| { |
| "epoch": 0.8756085062772226, |
| "grad_norm": 2.90625, |
| "learning_rate": 8.343572715898041e-07, |
| "loss": 0.2171454668045044, |
| "step": 6835, |
| "token_acc": 0.914162535029101 |
| }, |
| { |
| "epoch": 0.8762490392006149, |
| "grad_norm": 3.109375, |
| "learning_rate": 8.259066145282024e-07, |
| "loss": 0.21893837451934814, |
| "step": 6840, |
| "token_acc": 0.9133895738697815 |
| }, |
| { |
| "epoch": 0.8768895721240072, |
| "grad_norm": 3.953125, |
| "learning_rate": 8.17497127425888e-07, |
| "loss": 0.22579605579376222, |
| "step": 6845, |
| "token_acc": 0.9109958954417801 |
| }, |
| { |
| "epoch": 0.8775301050473995, |
| "grad_norm": 2.90625, |
| "learning_rate": 8.091288480215509e-07, |
| "loss": 0.2259922981262207, |
| "step": 6850, |
| "token_acc": 0.9134802754081324 |
| }, |
| { |
| "epoch": 0.8781706379707916, |
| "grad_norm": 2.921875, |
| "learning_rate": 8.008018138689477e-07, |
| "loss": 0.23148341178894044, |
| "step": 6855, |
| "token_acc": 0.9080638206123329 |
| }, |
| { |
| "epoch": 0.8788111708941839, |
| "grad_norm": 2.75, |
| "learning_rate": 7.925160623367534e-07, |
| "loss": 0.22035045623779298, |
| "step": 6860, |
| "token_acc": 0.9124632924512005 |
| }, |
| { |
| "epoch": 0.8794517038175762, |
| "grad_norm": 3.125, |
| "learning_rate": 7.842716306083709e-07, |
| "loss": 0.22205777168273927, |
| "step": 6865, |
| "token_acc": 0.9132595729968018 |
| }, |
| { |
| "epoch": 0.8800922367409685, |
| "grad_norm": 2.640625, |
| "learning_rate": 7.760685556817837e-07, |
| "loss": 0.22817633152008057, |
| "step": 6870, |
| "token_acc": 0.908126751455055 |
| }, |
| { |
| "epoch": 0.8807327696643608, |
| "grad_norm": 5.34375, |
| "learning_rate": 7.679068743693741e-07, |
| "loss": 0.2194456100463867, |
| "step": 6875, |
| "token_acc": 0.914544352044352 |
| }, |
| { |
| "epoch": 0.881373302587753, |
| "grad_norm": 3.546875, |
| "learning_rate": 7.59786623297768e-07, |
| "loss": 0.22601814270019532, |
| "step": 6880, |
| "token_acc": 0.911353032659409 |
| }, |
| { |
| "epoch": 0.8820138355111453, |
| "grad_norm": 3.0, |
| "learning_rate": 7.517078389076715e-07, |
| "loss": 0.23260602951049805, |
| "step": 6885, |
| "token_acc": 0.9088834345261163 |
| }, |
| { |
| "epoch": 0.8826543684345375, |
| "grad_norm": 2.765625, |
| "learning_rate": 7.43670557453694e-07, |
| "loss": 0.22155818939208985, |
| "step": 6890, |
| "token_acc": 0.9120893334483056 |
| }, |
| { |
| "epoch": 0.8832949013579298, |
| "grad_norm": 56.0, |
| "learning_rate": 7.35674815004207e-07, |
| "loss": 0.23186612129211426, |
| "step": 6895, |
| "token_acc": 0.9093760742523204 |
| }, |
| { |
| "epoch": 0.883935434281322, |
| "grad_norm": 3.671875, |
| "learning_rate": 7.277206474411591e-07, |
| "loss": 0.22928218841552733, |
| "step": 6900, |
| "token_acc": 0.9102180604326527 |
| }, |
| { |
| "epoch": 0.883935434281322, |
| "eval_loss": 0.3345526456832886, |
| "eval_runtime": 103.6333, |
| "eval_samples_per_second": 96.494, |
| "eval_steps_per_second": 12.062, |
| "eval_token_acc": 0.8824102629407954, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.8845759672047143, |
| "grad_norm": 3.9375, |
| "learning_rate": 7.198080904599314e-07, |
| "loss": 0.22185420989990234, |
| "step": 6905, |
| "token_acc": 0.9122216468151217 |
| }, |
| { |
| "epoch": 0.8852165001281066, |
| "grad_norm": 3.5, |
| "learning_rate": 7.119371795691732e-07, |
| "loss": 0.22938218116760253, |
| "step": 6910, |
| "token_acc": 0.9106750053914169 |
| }, |
| { |
| "epoch": 0.8858570330514989, |
| "grad_norm": 3.078125, |
| "learning_rate": 7.041079500906389e-07, |
| "loss": 0.22525992393493652, |
| "step": 6915, |
| "token_acc": 0.9117697816895332 |
| }, |
| { |
| "epoch": 0.8864975659748912, |
| "grad_norm": 3.015625, |
| "learning_rate": 6.963204371590327e-07, |
| "loss": 0.22642955780029297, |
| "step": 6920, |
| "token_acc": 0.9109674639086404 |
| }, |
| { |
| "epoch": 0.8871380988982833, |
| "grad_norm": 10.5, |
| "learning_rate": 6.885746757218504e-07, |
| "loss": 0.2312746524810791, |
| "step": 6925, |
| "token_acc": 0.9084555651423641 |
| }, |
| { |
| "epoch": 0.8877786318216756, |
| "grad_norm": 4.15625, |
| "learning_rate": 6.808707005392234e-07, |
| "loss": 0.22308661937713622, |
| "step": 6930, |
| "token_acc": 0.9129004329004329 |
| }, |
| { |
| "epoch": 0.8884191647450679, |
| "grad_norm": 4.21875, |
| "learning_rate": 6.73208546183759e-07, |
| "loss": 0.23537328243255615, |
| "step": 6935, |
| "token_acc": 0.9080825451418745 |
| }, |
| { |
| "epoch": 0.8890596976684602, |
| "grad_norm": 2.828125, |
| "learning_rate": 6.655882470403918e-07, |
| "loss": 0.22550048828125, |
| "step": 6940, |
| "token_acc": 0.9109734436598362 |
| }, |
| { |
| "epoch": 0.8897002305918524, |
| "grad_norm": 2.734375, |
| "learning_rate": 6.580098373062227e-07, |
| "loss": 0.21899161338806153, |
| "step": 6945, |
| "token_acc": 0.9117697816895332 |
| }, |
| { |
| "epoch": 0.8903407635152447, |
| "grad_norm": 2.453125, |
| "learning_rate": 6.504733509903693e-07, |
| "loss": 0.22932813167572022, |
| "step": 6950, |
| "token_acc": 0.9097964815453604 |
| }, |
| { |
| "epoch": 0.890981296438637, |
| "grad_norm": 4.28125, |
| "learning_rate": 6.429788219138111e-07, |
| "loss": 0.22290611267089844, |
| "step": 6955, |
| "token_acc": 0.9123820195664354 |
| }, |
| { |
| "epoch": 0.8916218293620292, |
| "grad_norm": 4.53125, |
| "learning_rate": 6.355262837092424e-07, |
| "loss": 0.2280646324157715, |
| "step": 6960, |
| "token_acc": 0.909892094063024 |
| }, |
| { |
| "epoch": 0.8922623622854214, |
| "grad_norm": 3.609375, |
| "learning_rate": 6.281157698209139e-07, |
| "loss": 0.23290627002716063, |
| "step": 6965, |
| "token_acc": 0.9101938603687233 |
| }, |
| { |
| "epoch": 0.8929028952088137, |
| "grad_norm": 4.15625, |
| "learning_rate": 6.207473135044905e-07, |
| "loss": 0.22637267112731935, |
| "step": 6970, |
| "token_acc": 0.9106436069523318 |
| }, |
| { |
| "epoch": 0.893543428132206, |
| "grad_norm": 2.84375, |
| "learning_rate": 6.134209478268904e-07, |
| "loss": 0.22555007934570312, |
| "step": 6975, |
| "token_acc": 0.9121092067866678 |
| }, |
| { |
| "epoch": 0.8941839610555983, |
| "grad_norm": 3.59375, |
| "learning_rate": 6.061367056661582e-07, |
| "loss": 0.2194199800491333, |
| "step": 6980, |
| "token_acc": 0.914880720439884 |
| }, |
| { |
| "epoch": 0.8948244939789906, |
| "grad_norm": 3.34375, |
| "learning_rate": 5.988946197112866e-07, |
| "loss": 0.22179160118103028, |
| "step": 6985, |
| "token_acc": 0.9131431041936878 |
| }, |
| { |
| "epoch": 0.8954650269023828, |
| "grad_norm": 2.546875, |
| "learning_rate": 5.916947224621039e-07, |
| "loss": 0.2265388011932373, |
| "step": 6990, |
| "token_acc": 0.9122412824612194 |
| }, |
| { |
| "epoch": 0.896105559825775, |
| "grad_norm": 3.109375, |
| "learning_rate": 5.845370462290978e-07, |
| "loss": 0.22730591297149658, |
| "step": 6995, |
| "token_acc": 0.909079168281028 |
| }, |
| { |
| "epoch": 0.8967460927491673, |
| "grad_norm": 3.15625, |
| "learning_rate": 5.774216231332875e-07, |
| "loss": 0.22771029472351073, |
| "step": 7000, |
| "token_acc": 0.9113219754151392 |
| }, |
| { |
| "epoch": 0.8967460927491673, |
| "eval_loss": 0.33467555046081543, |
| "eval_runtime": 103.9504, |
| "eval_samples_per_second": 96.2, |
| "eval_steps_per_second": 12.025, |
| "eval_token_acc": 0.8823936526568149, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.8973866256725596, |
| "grad_norm": 3.375, |
| "learning_rate": 5.703484851060825e-07, |
| "loss": 0.23281164169311525, |
| "step": 7005, |
| "token_acc": 0.9091221882929766 |
| }, |
| { |
| "epoch": 0.8980271585959518, |
| "grad_norm": 3.078125, |
| "learning_rate": 5.633176638891191e-07, |
| "loss": 0.2271268367767334, |
| "step": 7010, |
| "token_acc": 0.9112791702679343 |
| }, |
| { |
| "epoch": 0.8986676915193441, |
| "grad_norm": 4.0625, |
| "learning_rate": 5.563291910341462e-07, |
| "loss": 0.22890748977661132, |
| "step": 7015, |
| "token_acc": 0.9119872731963196 |
| }, |
| { |
| "epoch": 0.8993082244427364, |
| "grad_norm": 3.109375, |
| "learning_rate": 5.493830979028569e-07, |
| "loss": 0.22680349349975587, |
| "step": 7020, |
| "token_acc": 0.913206895061995 |
| }, |
| { |
| "epoch": 0.8999487573661287, |
| "grad_norm": 23.25, |
| "learning_rate": 5.424794156667645e-07, |
| "loss": 0.22985119819641114, |
| "step": 7025, |
| "token_acc": 0.9109837054918527 |
| }, |
| { |
| "epoch": 0.9005892902895208, |
| "grad_norm": 4.3125, |
| "learning_rate": 5.356181753070588e-07, |
| "loss": 0.22275919914245607, |
| "step": 7030, |
| "token_acc": 0.9113749190589251 |
| }, |
| { |
| "epoch": 0.9012298232129131, |
| "grad_norm": 3.203125, |
| "learning_rate": 5.287994076144643e-07, |
| "loss": 0.22965426445007325, |
| "step": 7035, |
| "token_acc": 0.9091612903225806 |
| }, |
| { |
| "epoch": 0.9018703561363054, |
| "grad_norm": 5.8125, |
| "learning_rate": 5.220231431891032e-07, |
| "loss": 0.2193136692047119, |
| "step": 7040, |
| "token_acc": 0.9128340853870184 |
| }, |
| { |
| "epoch": 0.9025108890596977, |
| "grad_norm": 19.625, |
| "learning_rate": 5.152894124403618e-07, |
| "loss": 0.2251948356628418, |
| "step": 7045, |
| "token_acc": 0.9117292456079917 |
| }, |
| { |
| "epoch": 0.90315142198309, |
| "grad_norm": 3.84375, |
| "learning_rate": 5.085982455867477e-07, |
| "loss": 0.22256324291229249, |
| "step": 7050, |
| "token_acc": 0.9116405307599518 |
| }, |
| { |
| "epoch": 0.9037919549064822, |
| "grad_norm": 4.1875, |
| "learning_rate": 5.019496726557571e-07, |
| "loss": 0.23459949493408203, |
| "step": 7055, |
| "token_acc": 0.9083365578915689 |
| }, |
| { |
| "epoch": 0.9044324878298745, |
| "grad_norm": 5.3125, |
| "learning_rate": 4.953437234837444e-07, |
| "loss": 0.22082552909851075, |
| "step": 7060, |
| "token_acc": 0.9143460643158893 |
| }, |
| { |
| "epoch": 0.9050730207532667, |
| "grad_norm": 2.859375, |
| "learning_rate": 4.887804277157803e-07, |
| "loss": 0.228281831741333, |
| "step": 7065, |
| "token_acc": 0.9109963417258446 |
| }, |
| { |
| "epoch": 0.905713553676659, |
| "grad_norm": 2.671875, |
| "learning_rate": 4.822598148055235e-07, |
| "loss": 0.2322796106338501, |
| "step": 7070, |
| "token_acc": 0.9108313211452225 |
| }, |
| { |
| "epoch": 0.9063540866000512, |
| "grad_norm": 3.078125, |
| "learning_rate": 4.757819140150888e-07, |
| "loss": 0.23224186897277832, |
| "step": 7075, |
| "token_acc": 0.9088714544357273 |
| }, |
| { |
| "epoch": 0.9069946195234435, |
| "grad_norm": 2.484375, |
| "learning_rate": 4.693467544149133e-07, |
| "loss": 0.21920361518859863, |
| "step": 7080, |
| "token_acc": 0.9131602894657018 |
| }, |
| { |
| "epoch": 0.9076351524468358, |
| "grad_norm": 3.484375, |
| "learning_rate": 4.629543648836288e-07, |
| "loss": 0.21608197689056396, |
| "step": 7085, |
| "token_acc": 0.9152600757836721 |
| }, |
| { |
| "epoch": 0.9082756853702281, |
| "grad_norm": 3.203125, |
| "learning_rate": 4.566047741079316e-07, |
| "loss": 0.2328326940536499, |
| "step": 7090, |
| "token_acc": 0.9090478037846459 |
| }, |
| { |
| "epoch": 0.9089162182936202, |
| "grad_norm": 3.515625, |
| "learning_rate": 4.5029801058244726e-07, |
| "loss": 0.23201301097869872, |
| "step": 7095, |
| "token_acc": 0.9082667817828621 |
| }, |
| { |
| "epoch": 0.9095567512170125, |
| "grad_norm": 3.234375, |
| "learning_rate": 4.4403410260961733e-07, |
| "loss": 0.22749040126800538, |
| "step": 7100, |
| "token_acc": 0.9113321799307958 |
| }, |
| { |
| "epoch": 0.9095567512170125, |
| "eval_loss": 0.3351185917854309, |
| "eval_runtime": 103.2381, |
| "eval_samples_per_second": 96.863, |
| "eval_steps_per_second": 12.108, |
| "eval_token_acc": 0.8822164762943564, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.9101972841404048, |
| "grad_norm": 4.125, |
| "learning_rate": 4.3781307829955375e-07, |
| "loss": 0.22915854454040527, |
| "step": 7105, |
| "token_acc": 0.9114854122803249 |
| }, |
| { |
| "epoch": 0.9108378170637971, |
| "grad_norm": 2.9375, |
| "learning_rate": 4.3163496556993143e-07, |
| "loss": 0.22949614524841308, |
| "step": 7110, |
| "token_acc": 0.9098265398355787 |
| }, |
| { |
| "epoch": 0.9114783499871894, |
| "grad_norm": 4.65625, |
| "learning_rate": 4.2549979214584703e-07, |
| "loss": 0.234299373626709, |
| "step": 7115, |
| "token_acc": 0.9077444835579978 |
| }, |
| { |
| "epoch": 0.9121188829105816, |
| "grad_norm": 2.78125, |
| "learning_rate": 4.194075855597046e-07, |
| "loss": 0.21983301639556885, |
| "step": 7120, |
| "token_acc": 0.9148798481384012 |
| }, |
| { |
| "epoch": 0.9127594158339739, |
| "grad_norm": 2.59375, |
| "learning_rate": 4.133583731510893e-07, |
| "loss": 0.23418021202087402, |
| "step": 7125, |
| "token_acc": 0.9072267311345191 |
| }, |
| { |
| "epoch": 0.9133999487573661, |
| "grad_norm": 2.75, |
| "learning_rate": 4.073521820666393e-07, |
| "loss": 0.22026586532592773, |
| "step": 7130, |
| "token_acc": 0.9122671141517147 |
| }, |
| { |
| "epoch": 0.9140404816807584, |
| "grad_norm": 4.09375, |
| "learning_rate": 4.0138903925993957e-07, |
| "loss": 0.22925994396209717, |
| "step": 7135, |
| "token_acc": 0.910432351043235 |
| }, |
| { |
| "epoch": 0.9146810146041506, |
| "grad_norm": 3.140625, |
| "learning_rate": 3.954689714913762e-07, |
| "loss": 0.22760000228881835, |
| "step": 7140, |
| "token_acc": 0.911580763424628 |
| }, |
| { |
| "epoch": 0.9153215475275429, |
| "grad_norm": 3.9375, |
| "learning_rate": 3.895920053280422e-07, |
| "loss": 0.22435307502746582, |
| "step": 7145, |
| "token_acc": 0.9124141209004882 |
| }, |
| { |
| "epoch": 0.9159620804509352, |
| "grad_norm": 2.9375, |
| "learning_rate": 3.837581671435997e-07, |
| "loss": 0.2232006549835205, |
| "step": 7150, |
| "token_acc": 0.911838464199239 |
| }, |
| { |
| "epoch": 0.9166026133743275, |
| "grad_norm": 3.671875, |
| "learning_rate": 3.779674831181701e-07, |
| "loss": 0.2235502243041992, |
| "step": 7155, |
| "token_acc": 0.9120366369999136 |
| }, |
| { |
| "epoch": 0.9172431462977197, |
| "grad_norm": 2.71875, |
| "learning_rate": 3.722199792382164e-07, |
| "loss": 0.22374234199523926, |
| "step": 7160, |
| "token_acc": 0.9131279129984464 |
| }, |
| { |
| "epoch": 0.9178836792211119, |
| "grad_norm": 3.828125, |
| "learning_rate": 3.665156812964221e-07, |
| "loss": 0.22843289375305176, |
| "step": 7165, |
| "token_acc": 0.9109647990360616 |
| }, |
| { |
| "epoch": 0.9185242121445042, |
| "grad_norm": 2.609375, |
| "learning_rate": 3.608546148915804e-07, |
| "loss": 0.22373640537261963, |
| "step": 7170, |
| "token_acc": 0.9105750592289468 |
| }, |
| { |
| "epoch": 0.9191647450678965, |
| "grad_norm": 3.453125, |
| "learning_rate": 3.552368054284772e-07, |
| "loss": 0.21737513542175294, |
| "step": 7175, |
| "token_acc": 0.9161251191404558 |
| }, |
| { |
| "epoch": 0.9198052779912887, |
| "grad_norm": 3.515625, |
| "learning_rate": 3.496622781177761e-07, |
| "loss": 0.22703731060028076, |
| "step": 7180, |
| "token_acc": 0.9097332931190486 |
| }, |
| { |
| "epoch": 0.920445810914681, |
| "grad_norm": 3.984375, |
| "learning_rate": 3.441310579759072e-07, |
| "loss": 0.22722623348236085, |
| "step": 7185, |
| "token_acc": 0.9093100331425128 |
| }, |
| { |
| "epoch": 0.9210863438380733, |
| "grad_norm": 3.234375, |
| "learning_rate": 3.386431698249526e-07, |
| "loss": 0.2288762092590332, |
| "step": 7190, |
| "token_acc": 0.9103326439158911 |
| }, |
| { |
| "epoch": 0.9217268767614656, |
| "grad_norm": 3.484375, |
| "learning_rate": 3.3319863829253895e-07, |
| "loss": 0.22250890731811523, |
| "step": 7195, |
| "token_acc": 0.9124709527498064 |
| }, |
| { |
| "epoch": 0.9223674096848578, |
| "grad_norm": 3.390625, |
| "learning_rate": 3.277974878117207e-07, |
| "loss": 0.22609634399414064, |
| "step": 7200, |
| "token_acc": 0.9101618122977346 |
| }, |
| { |
| "epoch": 0.9223674096848578, |
| "eval_loss": 0.3345736861228943, |
| "eval_runtime": 102.7358, |
| "eval_samples_per_second": 97.337, |
| "eval_steps_per_second": 12.167, |
| "eval_token_acc": 0.8823410534242101, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.92300794260825, |
| "grad_norm": 3.390625, |
| "learning_rate": 3.2243974262087805e-07, |
| "loss": 0.2214569091796875, |
| "step": 7205, |
| "token_acc": 0.9122594594594594 |
| }, |
| { |
| "epoch": 0.9236484755316423, |
| "grad_norm": 3.125, |
| "learning_rate": 3.171254267636015e-07, |
| "loss": 0.23588757514953612, |
| "step": 7210, |
| "token_acc": 0.9061101549053356 |
| }, |
| { |
| "epoch": 0.9242890084550346, |
| "grad_norm": 5.15625, |
| "learning_rate": 3.1185456408858505e-07, |
| "loss": 0.22405190467834474, |
| "step": 7215, |
| "token_acc": 0.9125026992010364 |
| }, |
| { |
| "epoch": 0.9249295413784269, |
| "grad_norm": 2.921875, |
| "learning_rate": 3.0662717824952894e-07, |
| "loss": 0.22633728981018067, |
| "step": 7220, |
| "token_acc": 0.9114834596829773 |
| }, |
| { |
| "epoch": 0.9255700743018191, |
| "grad_norm": 2.890625, |
| "learning_rate": 3.014432927050126e-07, |
| "loss": 0.22840723991394044, |
| "step": 7225, |
| "token_acc": 0.910606582801999 |
| }, |
| { |
| "epoch": 0.9262106072252114, |
| "grad_norm": 4.09375, |
| "learning_rate": 2.9630293071841397e-07, |
| "loss": 0.22615447044372558, |
| "step": 7230, |
| "token_acc": 0.9125701943844492 |
| }, |
| { |
| "epoch": 0.9268511401486036, |
| "grad_norm": 8.75, |
| "learning_rate": 2.912061153577872e-07, |
| "loss": 0.22545180320739747, |
| "step": 7235, |
| "token_acc": 0.9107838891294933 |
| }, |
| { |
| "epoch": 0.9274916730719959, |
| "grad_norm": 3.375, |
| "learning_rate": 2.861528694957649e-07, |
| "loss": 0.22807738780975342, |
| "step": 7240, |
| "token_acc": 0.9106092073381793 |
| }, |
| { |
| "epoch": 0.9281322059953881, |
| "grad_norm": 3.140625, |
| "learning_rate": 2.8114321580945846e-07, |
| "loss": 0.23368797302246094, |
| "step": 7245, |
| "token_acc": 0.9072138340431023 |
| }, |
| { |
| "epoch": 0.9287727389187804, |
| "grad_norm": 3.359375, |
| "learning_rate": 2.761771767803512e-07, |
| "loss": 0.2348182201385498, |
| "step": 7250, |
| "token_acc": 0.9079960428405522 |
| }, |
| { |
| "epoch": 0.9294132718421727, |
| "grad_norm": 11.25, |
| "learning_rate": 2.71254774694204e-07, |
| "loss": 0.22567691802978515, |
| "step": 7255, |
| "token_acc": 0.9117279965569185 |
| }, |
| { |
| "epoch": 0.930053804765565, |
| "grad_norm": 2.546875, |
| "learning_rate": 2.6637603164094584e-07, |
| "loss": 0.2227564811706543, |
| "step": 7260, |
| "token_acc": 0.9113984055160526 |
| }, |
| { |
| "epoch": 0.9306943376889573, |
| "grad_norm": 12.5625, |
| "learning_rate": 2.615409695145832e-07, |
| "loss": 0.22351694107055664, |
| "step": 7265, |
| "token_acc": 0.9124437910757524 |
| }, |
| { |
| "epoch": 0.9313348706123494, |
| "grad_norm": 3.125, |
| "learning_rate": 2.567496100130973e-07, |
| "loss": 0.22547354698181152, |
| "step": 7270, |
| "token_acc": 0.9113066735688711 |
| }, |
| { |
| "epoch": 0.9319754035357417, |
| "grad_norm": 3.453125, |
| "learning_rate": 2.5200197463834843e-07, |
| "loss": 0.23171014785766603, |
| "step": 7275, |
| "token_acc": 0.9080668134144763 |
| }, |
| { |
| "epoch": 0.932615936459134, |
| "grad_norm": 9.375, |
| "learning_rate": 2.472980846959794e-07, |
| "loss": 0.22420947551727294, |
| "step": 7280, |
| "token_acc": 0.9112663303582977 |
| }, |
| { |
| "epoch": 0.9332564693825263, |
| "grad_norm": 9.25, |
| "learning_rate": 2.4263796129532e-07, |
| "loss": 0.22904155254364014, |
| "step": 7285, |
| "token_acc": 0.9106152457113376 |
| }, |
| { |
| "epoch": 0.9338970023059185, |
| "grad_norm": 4.625, |
| "learning_rate": 2.3802162534929063e-07, |
| "loss": 0.22856383323669432, |
| "step": 7290, |
| "token_acc": 0.9091379087501615 |
| }, |
| { |
| "epoch": 0.9345375352293108, |
| "grad_norm": 3.203125, |
| "learning_rate": 2.33449097574312e-07, |
| "loss": 0.23378937244415282, |
| "step": 7295, |
| "token_acc": 0.90822689545435 |
| }, |
| { |
| "epoch": 0.9351780681527031, |
| "grad_norm": 2.71875, |
| "learning_rate": 2.2892039849020552e-07, |
| "loss": 0.22789459228515624, |
| "step": 7300, |
| "token_acc": 0.9115628641719539 |
| }, |
| { |
| "epoch": 0.9351780681527031, |
| "eval_loss": 0.3358408808708191, |
| "eval_runtime": 109.8432, |
| "eval_samples_per_second": 91.039, |
| "eval_steps_per_second": 11.38, |
| "eval_token_acc": 0.882407494560132, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.9358186010760953, |
| "grad_norm": 4.15625, |
| "learning_rate": 2.2443554842011107e-07, |
| "loss": 0.22101092338562012, |
| "step": 7305, |
| "token_acc": 0.9120034542314335 |
| }, |
| { |
| "epoch": 0.9364591339994875, |
| "grad_norm": 2.796875, |
| "learning_rate": 2.199945674903836e-07, |
| "loss": 0.22407989501953124, |
| "step": 7310, |
| "token_acc": 0.9132890651948948 |
| }, |
| { |
| "epoch": 0.9370996669228798, |
| "grad_norm": 3.25, |
| "learning_rate": 2.155974756305157e-07, |
| "loss": 0.22648565769195556, |
| "step": 7315, |
| "token_acc": 0.9100392258286996 |
| }, |
| { |
| "epoch": 0.9377401998462721, |
| "grad_norm": 3.484375, |
| "learning_rate": 2.112442925730407e-07, |
| "loss": 0.2312589168548584, |
| "step": 7320, |
| "token_acc": 0.9074082027056045 |
| }, |
| { |
| "epoch": 0.9383807327696644, |
| "grad_norm": 3.171875, |
| "learning_rate": 2.0693503785344294e-07, |
| "loss": 0.2254408359527588, |
| "step": 7325, |
| "token_acc": 0.9107181367263317 |
| }, |
| { |
| "epoch": 0.9390212656930567, |
| "grad_norm": 4.40625, |
| "learning_rate": 2.0266973081007335e-07, |
| "loss": 0.22427871227264404, |
| "step": 7330, |
| "token_acc": 0.9129628831314394 |
| }, |
| { |
| "epoch": 0.9396617986164489, |
| "grad_norm": 2.53125, |
| "learning_rate": 1.9844839058406174e-07, |
| "loss": 0.23152542114257812, |
| "step": 7335, |
| "token_acc": 0.9090241796200346 |
| }, |
| { |
| "epoch": 0.9403023315398411, |
| "grad_norm": 3.28125, |
| "learning_rate": 1.9427103611923458e-07, |
| "loss": 0.23547761440277098, |
| "step": 7340, |
| "token_acc": 0.9070280082987552 |
| }, |
| { |
| "epoch": 0.9409428644632334, |
| "grad_norm": 3.71875, |
| "learning_rate": 1.9013768616201856e-07, |
| "loss": 0.23559412956237794, |
| "step": 7345, |
| "token_acc": 0.9068175949040199 |
| }, |
| { |
| "epoch": 0.9415833973866257, |
| "grad_norm": 6.5, |
| "learning_rate": 1.860483592613749e-07, |
| "loss": 0.23159332275390626, |
| "step": 7350, |
| "token_acc": 0.9084540336098337 |
| }, |
| { |
| "epoch": 0.9422239303100179, |
| "grad_norm": 3.40625, |
| "learning_rate": 1.8200307376869396e-07, |
| "loss": 0.228605318069458, |
| "step": 7355, |
| "token_acc": 0.9085082587749483 |
| }, |
| { |
| "epoch": 0.9428644632334102, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.7800184783773433e-07, |
| "loss": 0.22635889053344727, |
| "step": 7360, |
| "token_acc": 0.9109769247358206 |
| }, |
| { |
| "epoch": 0.9435049961568025, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.7404469942452597e-07, |
| "loss": 0.21885204315185547, |
| "step": 7365, |
| "token_acc": 0.9139455635595048 |
| }, |
| { |
| "epoch": 0.9441455290801947, |
| "grad_norm": 3.234375, |
| "learning_rate": 1.7013164628729483e-07, |
| "loss": 0.22530250549316405, |
| "step": 7370, |
| "token_acc": 0.9120013769955678 |
| }, |
| { |
| "epoch": 0.944786062003587, |
| "grad_norm": 3.421875, |
| "learning_rate": 1.6626270598638972e-07, |
| "loss": 0.23129520416259766, |
| "step": 7375, |
| "token_acc": 0.9073307460112118 |
| }, |
| { |
| "epoch": 0.9454265949269792, |
| "grad_norm": 3.359375, |
| "learning_rate": 1.624378958841888e-07, |
| "loss": 0.23236556053161622, |
| "step": 7380, |
| "token_acc": 0.9091888166113815 |
| }, |
| { |
| "epoch": 0.9460671278503715, |
| "grad_norm": 4.34375, |
| "learning_rate": 1.5865723314503535e-07, |
| "loss": 0.2195420265197754, |
| "step": 7385, |
| "token_acc": 0.9148448976064979 |
| }, |
| { |
| "epoch": 0.9467076607737638, |
| "grad_norm": 3.5625, |
| "learning_rate": 1.5492073473515334e-07, |
| "loss": 0.2299337387084961, |
| "step": 7390, |
| "token_acc": 0.9092899459088177 |
| }, |
| { |
| "epoch": 0.9473481936971561, |
| "grad_norm": 3.296875, |
| "learning_rate": 1.5122841742257533e-07, |
| "loss": 0.2305469512939453, |
| "step": 7395, |
| "token_acc": 0.9094121703154628 |
| }, |
| { |
| "epoch": 0.9479887266205483, |
| "grad_norm": 3.0, |
| "learning_rate": 1.475802977770646e-07, |
| "loss": 0.2353046417236328, |
| "step": 7400, |
| "token_acc": 0.9065898637321068 |
| }, |
| { |
| "epoch": 0.9479887266205483, |
| "eval_loss": 0.3345721662044525, |
| "eval_runtime": 102.9085, |
| "eval_samples_per_second": 97.174, |
| "eval_steps_per_second": 12.147, |
| "eval_token_acc": 0.8821223513518003, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.9486292595439405, |
| "grad_norm": 3.3125, |
| "learning_rate": 1.43976392170041e-07, |
| "loss": 0.22667450904846193, |
| "step": 7405, |
| "token_acc": 0.9105691056910569 |
| }, |
| { |
| "epoch": 0.9492697924673328, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.404167167745074e-07, |
| "loss": 0.23315582275390626, |
| "step": 7410, |
| "token_acc": 0.9090674355553643 |
| }, |
| { |
| "epoch": 0.9499103253907251, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.3690128756498e-07, |
| "loss": 0.2316906452178955, |
| "step": 7415, |
| "token_acc": 0.9079900017238407 |
| }, |
| { |
| "epoch": 0.9505508583141173, |
| "grad_norm": 3.5, |
| "learning_rate": 1.3343012031741155e-07, |
| "loss": 0.22472708225250243, |
| "step": 7420, |
| "token_acc": 0.9114065659825309 |
| }, |
| { |
| "epoch": 0.9511913912375096, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.30003230609127e-07, |
| "loss": 0.22815487384796143, |
| "step": 7425, |
| "token_acc": 0.9114841828147253 |
| }, |
| { |
| "epoch": 0.9518319241609019, |
| "grad_norm": 3.375, |
| "learning_rate": 1.266206338187448e-07, |
| "loss": 0.2252589225769043, |
| "step": 7430, |
| "token_acc": 0.911378744712078 |
| }, |
| { |
| "epoch": 0.9524724570842942, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.2328234512611893e-07, |
| "loss": 0.23881807327270507, |
| "step": 7435, |
| "token_acc": 0.9063224808865218 |
| }, |
| { |
| "epoch": 0.9531129900076863, |
| "grad_norm": 3.765625, |
| "learning_rate": 1.1998837951226027e-07, |
| "loss": 0.2236201286315918, |
| "step": 7440, |
| "token_acc": 0.9124207256568445 |
| }, |
| { |
| "epoch": 0.9537535229310786, |
| "grad_norm": 3.671875, |
| "learning_rate": 1.1673875175927773e-07, |
| "loss": 0.22488207817077638, |
| "step": 7445, |
| "token_acc": 0.9117697816895332 |
| }, |
| { |
| "epoch": 0.9543940558544709, |
| "grad_norm": 15.125, |
| "learning_rate": 1.1353347645030488e-07, |
| "loss": 0.23006877899169922, |
| "step": 7450, |
| "token_acc": 0.9091143483305402 |
| }, |
| { |
| "epoch": 0.9550345887778632, |
| "grad_norm": 3.90625, |
| "learning_rate": 1.1037256796943896e-07, |
| "loss": 0.23117449283599853, |
| "step": 7455, |
| "token_acc": 0.9106395825246906 |
| }, |
| { |
| "epoch": 0.9556751217012555, |
| "grad_norm": 3.453125, |
| "learning_rate": 1.072560405016776e-07, |
| "loss": 0.22410707473754882, |
| "step": 7460, |
| "token_acc": 0.913257805067889 |
| }, |
| { |
| "epoch": 0.9563156546246477, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.0418390803284772e-07, |
| "loss": 0.22124795913696288, |
| "step": 7465, |
| "token_acc": 0.9126255442044916 |
| }, |
| { |
| "epoch": 0.95695618754804, |
| "grad_norm": 3.0, |
| "learning_rate": 1.0115618434955233e-07, |
| "loss": 0.22695465087890626, |
| "step": 7470, |
| "token_acc": 0.9115334773218142 |
| }, |
| { |
| "epoch": 0.9575967204714322, |
| "grad_norm": 2.796875, |
| "learning_rate": 9.817288303910267e-08, |
| "loss": 0.22336146831512452, |
| "step": 7475, |
| "token_acc": 0.9113754903228587 |
| }, |
| { |
| "epoch": 0.9582372533948245, |
| "grad_norm": 2.828125, |
| "learning_rate": 9.523401748945837e-08, |
| "loss": 0.22532784938812256, |
| "step": 7480, |
| "token_acc": 0.9119910089046425 |
| }, |
| { |
| "epoch": 0.9588777863182167, |
| "grad_norm": 5.875, |
| "learning_rate": 9.233960088916749e-08, |
| "loss": 0.23188343048095703, |
| "step": 7485, |
| "token_acc": 0.9097377954114197 |
| }, |
| { |
| "epoch": 0.959518319241609, |
| "grad_norm": 12.875, |
| "learning_rate": 8.948964622730761e-08, |
| "loss": 0.22753703594207764, |
| "step": 7490, |
| "token_acc": 0.911497176359012 |
| }, |
| { |
| "epoch": 0.9601588521650013, |
| "grad_norm": 4.84375, |
| "learning_rate": 8.668416629342813e-08, |
| "loss": 0.23263895511627197, |
| "step": 7495, |
| "token_acc": 0.9096037898363479 |
| }, |
| { |
| "epoch": 0.9607993850883936, |
| "grad_norm": 3.40625, |
| "learning_rate": 8.392317367749259e-08, |
| "loss": 0.23171706199645997, |
| "step": 7500, |
| "token_acc": 0.9093455125166962 |
| }, |
| { |
| "epoch": 0.9607993850883936, |
| "eval_loss": 0.3347827196121216, |
| "eval_runtime": 102.5334, |
| "eval_samples_per_second": 97.529, |
| "eval_steps_per_second": 12.191, |
| "eval_token_acc": 0.8825099246446784, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.9614399180117859, |
| "grad_norm": 2.796875, |
| "learning_rate": 8.120668076982085e-08, |
| "loss": 0.23077220916748048, |
| "step": 7505, |
| "token_acc": 0.9088870682592385 |
| }, |
| { |
| "epoch": 0.962080450935178, |
| "grad_norm": 3.296875, |
| "learning_rate": 7.853469976103367e-08, |
| "loss": 0.2174984931945801, |
| "step": 7510, |
| "token_acc": 0.9130472325360505 |
| }, |
| { |
| "epoch": 0.9627209838585703, |
| "grad_norm": 2.546875, |
| "learning_rate": 7.590724264200044e-08, |
| "loss": 0.2254131555557251, |
| "step": 7515, |
| "token_acc": 0.9099638305201516 |
| }, |
| { |
| "epoch": 0.9633615167819626, |
| "grad_norm": 2.828125, |
| "learning_rate": 7.332432120378263e-08, |
| "loss": 0.21682121753692626, |
| "step": 7520, |
| "token_acc": 0.9132147340172272 |
| }, |
| { |
| "epoch": 0.9640020497053549, |
| "grad_norm": 4.53125, |
| "learning_rate": 7.07859470375838e-08, |
| "loss": 0.22123939990997316, |
| "step": 7525, |
| "token_acc": 0.913344287814581 |
| }, |
| { |
| "epoch": 0.9646425826287471, |
| "grad_norm": 4.0, |
| "learning_rate": 6.829213153469294e-08, |
| "loss": 0.2257563591003418, |
| "step": 7530, |
| "token_acc": 0.9111034244206156 |
| }, |
| { |
| "epoch": 0.9652831155521394, |
| "grad_norm": 3.25, |
| "learning_rate": 6.584288588643795e-08, |
| "loss": 0.21516809463500977, |
| "step": 7535, |
| "token_acc": 0.9163821788168186 |
| }, |
| { |
| "epoch": 0.9659236484755317, |
| "grad_norm": 5.46875, |
| "learning_rate": 6.343822108413111e-08, |
| "loss": 0.23532419204711913, |
| "step": 7540, |
| "token_acc": 0.9062943071965628 |
| }, |
| { |
| "epoch": 0.9665641813989239, |
| "grad_norm": 3.515625, |
| "learning_rate": 6.10781479190281e-08, |
| "loss": 0.22491927146911622, |
| "step": 7545, |
| "token_acc": 0.9103519579545944 |
| }, |
| { |
| "epoch": 0.9672047143223161, |
| "grad_norm": 3.078125, |
| "learning_rate": 5.8762676982265785e-08, |
| "loss": 0.23122644424438477, |
| "step": 7550, |
| "token_acc": 0.9087346024636058 |
| }, |
| { |
| "epoch": 0.9678452472457084, |
| "grad_norm": 2.75, |
| "learning_rate": 5.649181866483e-08, |
| "loss": 0.22680530548095704, |
| "step": 7555, |
| "token_acc": 0.911484593837535 |
| }, |
| { |
| "epoch": 0.9684857801691007, |
| "grad_norm": 4.34375, |
| "learning_rate": 5.426558315749675e-08, |
| "loss": 0.22133951187133788, |
| "step": 7560, |
| "token_acc": 0.9124602287384986 |
| }, |
| { |
| "epoch": 0.969126313092493, |
| "grad_norm": 4.71875, |
| "learning_rate": 5.208398045079222e-08, |
| "loss": 0.2312103033065796, |
| "step": 7565, |
| "token_acc": 0.9101176672678862 |
| }, |
| { |
| "epoch": 0.9697668460158853, |
| "grad_norm": 3.078125, |
| "learning_rate": 4.994702033494947e-08, |
| "loss": 0.22229225635528566, |
| "step": 7570, |
| "token_acc": 0.9136234136234136 |
| }, |
| { |
| "epoch": 0.9704073789392775, |
| "grad_norm": 3.125, |
| "learning_rate": 4.785471239985851e-08, |
| "loss": 0.2287161111831665, |
| "step": 7575, |
| "token_acc": 0.9087663454920853 |
| }, |
| { |
| "epoch": 0.9710479118626697, |
| "grad_norm": 2.75, |
| "learning_rate": 4.5807066035028494e-08, |
| "loss": 0.226922607421875, |
| "step": 7580, |
| "token_acc": 0.9117697816895332 |
| }, |
| { |
| "epoch": 0.971688444786062, |
| "grad_norm": 7.21875, |
| "learning_rate": 4.3804090429543366e-08, |
| "loss": 0.23184614181518554, |
| "step": 7585, |
| "token_acc": 0.9090673798636874 |
| }, |
| { |
| "epoch": 0.9723289777094543, |
| "grad_norm": 3.640625, |
| "learning_rate": 4.184579457202298e-08, |
| "loss": 0.22905006408691406, |
| "step": 7590, |
| "token_acc": 0.9108255451713395 |
| }, |
| { |
| "epoch": 0.9729695106328465, |
| "grad_norm": 3.046875, |
| "learning_rate": 3.993218725057868e-08, |
| "loss": 0.22608802318572999, |
| "step": 7595, |
| "token_acc": 0.911466643667256 |
| }, |
| { |
| "epoch": 0.9736100435562388, |
| "grad_norm": 3.140625, |
| "learning_rate": 3.806327705277557e-08, |
| "loss": 0.23126821517944335, |
| "step": 7600, |
| "token_acc": 0.9085889305897071 |
| }, |
| { |
| "epoch": 0.9736100435562388, |
| "eval_loss": 0.33461424708366394, |
| "eval_runtime": 103.0674, |
| "eval_samples_per_second": 97.024, |
| "eval_steps_per_second": 12.128, |
| "eval_token_acc": 0.8822912225722686, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.9742505764796311, |
| "grad_norm": 2.890625, |
| "learning_rate": 3.6239072365596984e-08, |
| "loss": 0.23053784370422364, |
| "step": 7605, |
| "token_acc": 0.9106689064047896 |
| }, |
| { |
| "epoch": 0.9748911094030234, |
| "grad_norm": 3.484375, |
| "learning_rate": 3.4459581375403395e-08, |
| "loss": 0.22285847663879393, |
| "step": 7610, |
| "token_acc": 0.9107173725151253 |
| }, |
| { |
| "epoch": 0.9755316423264155, |
| "grad_norm": 3.390625, |
| "learning_rate": 3.2724812067895795e-08, |
| "loss": 0.22678759098052978, |
| "step": 7615, |
| "token_acc": 0.9103671706263499 |
| }, |
| { |
| "epoch": 0.9761721752498078, |
| "grad_norm": 3.125, |
| "learning_rate": 3.103477222808016e-08, |
| "loss": 0.22554678916931153, |
| "step": 7620, |
| "token_acc": 0.9114635094845094 |
| }, |
| { |
| "epoch": 0.9768127081732001, |
| "grad_norm": 3.171875, |
| "learning_rate": 2.938946944023635e-08, |
| "loss": 0.2290804386138916, |
| "step": 7625, |
| "token_acc": 0.9116782006920415 |
| }, |
| { |
| "epoch": 0.9774532410965924, |
| "grad_norm": 2.96875, |
| "learning_rate": 2.7788911087877067e-08, |
| "loss": 0.22720465660095215, |
| "step": 7630, |
| "token_acc": 0.910641229921192 |
| }, |
| { |
| "epoch": 0.9780937740199847, |
| "grad_norm": 3.40625, |
| "learning_rate": 2.6233104353720063e-08, |
| "loss": 0.22470946311950685, |
| "step": 7635, |
| "token_acc": 0.9122565074987071 |
| }, |
| { |
| "epoch": 0.9787343069433769, |
| "grad_norm": 3.4375, |
| "learning_rate": 2.4722056219654843e-08, |
| "loss": 0.22875847816467285, |
| "step": 7640, |
| "token_acc": 0.9108098773959592 |
| }, |
| { |
| "epoch": 0.9793748398667691, |
| "grad_norm": 3.609375, |
| "learning_rate": 2.3255773466708266e-08, |
| "loss": 0.2208240509033203, |
| "step": 7645, |
| "token_acc": 0.9132234969378072 |
| }, |
| { |
| "epoch": 0.9800153727901614, |
| "grad_norm": 4.75, |
| "learning_rate": 2.1834262675021202e-08, |
| "loss": 0.2248084306716919, |
| "step": 7650, |
| "token_acc": 0.9121955410455775 |
| }, |
| { |
| "epoch": 0.9806559057135537, |
| "grad_norm": 3.625, |
| "learning_rate": 2.0457530223809695e-08, |
| "loss": 0.2183553695678711, |
| "step": 7655, |
| "token_acc": 0.9145100069013112 |
| }, |
| { |
| "epoch": 0.9812964386369459, |
| "grad_norm": 12.8125, |
| "learning_rate": 1.912558229134387e-08, |
| "loss": 0.22381486892700195, |
| "step": 7660, |
| "token_acc": 0.9117545822218398 |
| }, |
| { |
| "epoch": 0.9819369715603382, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.7838424854915714e-08, |
| "loss": 0.22946505546569823, |
| "step": 7665, |
| "token_acc": 0.9092513668259503 |
| }, |
| { |
| "epoch": 0.9825775044837305, |
| "grad_norm": 3.125, |
| "learning_rate": 1.659606369081468e-08, |
| "loss": 0.23253355026245118, |
| "step": 7670, |
| "token_acc": 0.9086559186136736 |
| }, |
| { |
| "epoch": 0.9832180374071228, |
| "grad_norm": 4.375, |
| "learning_rate": 1.5398504374302124e-08, |
| "loss": 0.21708984375, |
| "step": 7675, |
| "token_acc": 0.9138436341694681 |
| }, |
| { |
| "epoch": 0.9838585703305149, |
| "grad_norm": 6.65625, |
| "learning_rate": 1.424575227958358e-08, |
| "loss": 0.2197282314300537, |
| "step": 7680, |
| "token_acc": 0.9130565972671236 |
| }, |
| { |
| "epoch": 0.9844991032539072, |
| "grad_norm": 3.65625, |
| "learning_rate": 1.3137812579785415e-08, |
| "loss": 0.22876739501953125, |
| "step": 7685, |
| "token_acc": 0.9095221666379162 |
| }, |
| { |
| "epoch": 0.9851396361772995, |
| "grad_norm": 2.953125, |
| "learning_rate": 1.2074690246937081e-08, |
| "loss": 0.22080717086791993, |
| "step": 7690, |
| "token_acc": 0.9124427942319316 |
| }, |
| { |
| "epoch": 0.9857801691006918, |
| "grad_norm": 5.46875, |
| "learning_rate": 1.1056390051936705e-08, |
| "loss": 0.23291680812835694, |
| "step": 7695, |
| "token_acc": 0.9067782067782068 |
| }, |
| { |
| "epoch": 0.986420702024084, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.008291656454441e-08, |
| "loss": 0.22717700004577637, |
| "step": 7700, |
| "token_acc": 0.9099401610056395 |
| }, |
| { |
| "epoch": 0.986420702024084, |
| "eval_loss": 0.33420565724372864, |
| "eval_runtime": 102.458, |
| "eval_samples_per_second": 97.601, |
| "eval_steps_per_second": 12.2, |
| "eval_token_acc": 0.8827535421430588, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.9870612349474763, |
| "grad_norm": 2.84375, |
| "learning_rate": 9.154274153351239e-09, |
| "loss": 0.2262244701385498, |
| "step": 7705, |
| "token_acc": 0.9095655175389394 |
| }, |
| { |
| "epoch": 0.9877017678708686, |
| "grad_norm": 2.875, |
| "learning_rate": 8.270466985761393e-09, |
| "loss": 0.22812366485595703, |
| "step": 7710, |
| "token_acc": 0.9118508311084317 |
| }, |
| { |
| "epoch": 0.9883423007942608, |
| "grad_norm": 2.875, |
| "learning_rate": 7.431499027976685e-09, |
| "loss": 0.2235403537750244, |
| "step": 7715, |
| "token_acc": 0.9136387118994317 |
| }, |
| { |
| "epoch": 0.988982833717653, |
| "grad_norm": 2.84375, |
| "learning_rate": 6.637374044978772e-09, |
| "loss": 0.2251359224319458, |
| "step": 7720, |
| "token_acc": 0.9123222748815166 |
| }, |
| { |
| "epoch": 0.9896233666410453, |
| "grad_norm": 3.171875, |
| "learning_rate": 5.88809560050696e-09, |
| "loss": 0.22147438526153565, |
| "step": 7725, |
| "token_acc": 0.9122496546961326 |
| }, |
| { |
| "epoch": 0.9902638995644376, |
| "grad_norm": 11.375, |
| "learning_rate": 5.1836670570493135e-09, |
| "loss": 0.23077549934387206, |
| "step": 7730, |
| "token_acc": 0.9092400690846286 |
| }, |
| { |
| "epoch": 0.9909044324878299, |
| "grad_norm": 3.09375, |
| "learning_rate": 4.524091575819345e-09, |
| "loss": 0.23009955883026123, |
| "step": 7735, |
| "token_acc": 0.9085662603901977 |
| }, |
| { |
| "epoch": 0.9915449654112222, |
| "grad_norm": 3.125, |
| "learning_rate": 3.9093721167526854e-09, |
| "loss": 0.2305884838104248, |
| "step": 7740, |
| "token_acc": 0.9105272196462305 |
| }, |
| { |
| "epoch": 0.9921854983346144, |
| "grad_norm": 2.859375, |
| "learning_rate": 3.339511438481546e-09, |
| "loss": 0.23010706901550293, |
| "step": 7745, |
| "token_acc": 0.9100301334481274 |
| }, |
| { |
| "epoch": 0.9928260312580066, |
| "grad_norm": 2.671875, |
| "learning_rate": 2.8145120983336106e-09, |
| "loss": 0.23720641136169435, |
| "step": 7750, |
| "token_acc": 0.9077843280691941 |
| }, |
| { |
| "epoch": 0.9934665641813989, |
| "grad_norm": 3.515625, |
| "learning_rate": 2.334376452310938e-09, |
| "loss": 0.2344132900238037, |
| "step": 7755, |
| "token_acc": 0.9084367459496725 |
| }, |
| { |
| "epoch": 0.9941070971047912, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.899106655087746e-09, |
| "loss": 0.23155610561370848, |
| "step": 7760, |
| "token_acc": 0.9105244966732913 |
| }, |
| { |
| "epoch": 0.9947476300281834, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.5087046599926435e-09, |
| "loss": 0.22453222274780274, |
| "step": 7765, |
| "token_acc": 0.9133017649591046 |
| }, |
| { |
| "epoch": 0.9953881629515757, |
| "grad_norm": 2.984375, |
| "learning_rate": 1.1631722190086348e-09, |
| "loss": 0.22471303939819337, |
| "step": 7770, |
| "token_acc": 0.9115940774092995 |
| }, |
| { |
| "epoch": 0.996028695874968, |
| "grad_norm": 3.28125, |
| "learning_rate": 8.625108827564621e-10, |
| "loss": 0.22228624820709228, |
| "step": 7775, |
| "token_acc": 0.9125803251822142 |
| }, |
| { |
| "epoch": 0.9966692287983603, |
| "grad_norm": 4.40625, |
| "learning_rate": 6.067220004946084e-10, |
| "loss": 0.226347017288208, |
| "step": 7780, |
| "token_acc": 0.9129628831314394 |
| }, |
| { |
| "epoch": 0.9973097617217525, |
| "grad_norm": 2.671875, |
| "learning_rate": 3.958067201093041e-10, |
| "loss": 0.2226627826690674, |
| "step": 7785, |
| "token_acc": 0.91291213533575 |
| }, |
| { |
| "epoch": 0.9979502946451447, |
| "grad_norm": 5.34375, |
| "learning_rate": 2.297659881111969e-10, |
| "loss": 0.22344522476196288, |
| "step": 7790, |
| "token_acc": 0.9124097007223942 |
| }, |
| { |
| "epoch": 0.998590827568537, |
| "grad_norm": 2.765625, |
| "learning_rate": 1.0860054962980038e-10, |
| "loss": 0.22511889934539794, |
| "step": 7795, |
| "token_acc": 0.9111601540525337 |
| }, |
| { |
| "epoch": 0.9992313604919293, |
| "grad_norm": 2.859375, |
| "learning_rate": 3.23109484112738e-11, |
| "loss": 0.22252602577209474, |
| "step": 7800, |
| "token_acc": 0.9127169127169127 |
| }, |
| { |
| "epoch": 0.9992313604919293, |
| "eval_loss": 0.3345825672149658, |
| "eval_runtime": 103.1214, |
| "eval_samples_per_second": 96.973, |
| "eval_steps_per_second": 12.122, |
| "eval_token_acc": 0.8827258583364247, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.9998718934153216, |
| "grad_norm": 7.1875, |
| "learning_rate": 8.975268150912541e-13, |
| "loss": 0.22932782173156738, |
| "step": 7805, |
| "token_acc": 0.9116207163102293 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.33455130457878113, |
| "eval_runtime": 101.6743, |
| "eval_samples_per_second": 98.353, |
| "eval_steps_per_second": 12.294, |
| "eval_token_acc": 0.8823687372308442, |
| "step": 7806 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 7806, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.2803424419153183e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|