| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9971862689926843, |
| "eval_steps": 500, |
| "global_step": 2664, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0011254924029262803, |
| "grad_norm": 52.80103639717517, |
| "learning_rate": 1.8726591760299626e-07, |
| "loss": 11.1109, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0022509848058525606, |
| "grad_norm": 54.14343134228205, |
| "learning_rate": 3.7453183520599253e-07, |
| "loss": 11.1518, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0033764772087788407, |
| "grad_norm": 55.956204989803545, |
| "learning_rate": 5.617977528089887e-07, |
| "loss": 11.0032, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.004501969611705121, |
| "grad_norm": 52.83455219346365, |
| "learning_rate": 7.490636704119851e-07, |
| "loss": 10.9916, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005627462014631401, |
| "grad_norm": 55.14267600803135, |
| "learning_rate": 9.363295880149814e-07, |
| "loss": 11.0876, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.006752954417557681, |
| "grad_norm": 54.398858725586614, |
| "learning_rate": 1.1235955056179775e-06, |
| "loss": 11.1606, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.007878446820483961, |
| "grad_norm": 54.32209978082585, |
| "learning_rate": 1.310861423220974e-06, |
| "loss": 11.0376, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.009003939223410242, |
| "grad_norm": 58.95322793439154, |
| "learning_rate": 1.4981273408239701e-06, |
| "loss": 10.7967, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.010129431626336522, |
| "grad_norm": 61.239998437535036, |
| "learning_rate": 1.6853932584269663e-06, |
| "loss": 10.6462, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.011254924029262802, |
| "grad_norm": 61.97431021564583, |
| "learning_rate": 1.8726591760299627e-06, |
| "loss": 10.7333, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012380416432189083, |
| "grad_norm": 81.14048682798334, |
| "learning_rate": 2.0599250936329587e-06, |
| "loss": 9.5671, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.013505908835115363, |
| "grad_norm": 86.6613892998916, |
| "learning_rate": 2.247191011235955e-06, |
| "loss": 9.3288, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.014631401238041642, |
| "grad_norm": 97.56062675082107, |
| "learning_rate": 2.4344569288389516e-06, |
| "loss": 8.8569, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.015756893640967922, |
| "grad_norm": 99.72668638976701, |
| "learning_rate": 2.621722846441948e-06, |
| "loss": 8.8229, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.016882386043894203, |
| "grad_norm": 68.62441737325906, |
| "learning_rate": 2.808988764044944e-06, |
| "loss": 4.106, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.018007878446820485, |
| "grad_norm": 60.37331191202543, |
| "learning_rate": 2.9962546816479402e-06, |
| "loss": 3.608, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.019133370849746763, |
| "grad_norm": 48.17406200608835, |
| "learning_rate": 3.1835205992509364e-06, |
| "loss": 3.0803, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.020258863252673044, |
| "grad_norm": 36.4342554407753, |
| "learning_rate": 3.3707865168539327e-06, |
| "loss": 2.5875, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.021384355655599326, |
| "grad_norm": 31.524091079255932, |
| "learning_rate": 3.558052434456929e-06, |
| "loss": 2.33, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.022509848058525603, |
| "grad_norm": 6.74346485385345, |
| "learning_rate": 3.7453183520599255e-06, |
| "loss": 1.3777, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.023635340461451885, |
| "grad_norm": 5.1275756648686786, |
| "learning_rate": 3.932584269662922e-06, |
| "loss": 1.3449, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.024760832864378166, |
| "grad_norm": 4.059089851221617, |
| "learning_rate": 4.1198501872659175e-06, |
| "loss": 1.248, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.025886325267304444, |
| "grad_norm": 3.461699822238443, |
| "learning_rate": 4.307116104868914e-06, |
| "loss": 1.2294, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.027011817670230726, |
| "grad_norm": 2.6194967883079197, |
| "learning_rate": 4.49438202247191e-06, |
| "loss": 1.1185, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.028137310073157007, |
| "grad_norm": 2.1602870716248015, |
| "learning_rate": 4.6816479400749066e-06, |
| "loss": 1.1193, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.029262802476083285, |
| "grad_norm": 1.8135748254982695, |
| "learning_rate": 4.868913857677903e-06, |
| "loss": 1.0589, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.030388294879009566, |
| "grad_norm": 1.321133306398482, |
| "learning_rate": 5.056179775280899e-06, |
| "loss": 0.9617, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.031513787281935844, |
| "grad_norm": 31.284838936554156, |
| "learning_rate": 5.243445692883896e-06, |
| "loss": 0.9937, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.032639279684862126, |
| "grad_norm": 1.8832749187420972, |
| "learning_rate": 5.430711610486891e-06, |
| "loss": 0.9144, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03376477208778841, |
| "grad_norm": 1.4607030575064903, |
| "learning_rate": 5.617977528089888e-06, |
| "loss": 0.8985, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03489026449071469, |
| "grad_norm": 1.0370555540895343, |
| "learning_rate": 5.805243445692885e-06, |
| "loss": 0.8404, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.03601575689364097, |
| "grad_norm": 0.9098468238765742, |
| "learning_rate": 5.9925093632958805e-06, |
| "loss": 0.8352, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.03714124929656725, |
| "grad_norm": 0.8661052907885602, |
| "learning_rate": 6.179775280898876e-06, |
| "loss": 0.8258, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.038266741699493526, |
| "grad_norm": 0.7740808609488935, |
| "learning_rate": 6.367041198501873e-06, |
| "loss": 0.8324, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.03939223410241981, |
| "grad_norm": 0.7782713659204045, |
| "learning_rate": 6.554307116104869e-06, |
| "loss": 0.7588, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.04051772650534609, |
| "grad_norm": 0.6841020945645767, |
| "learning_rate": 6.741573033707865e-06, |
| "loss": 0.7682, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04164321890827237, |
| "grad_norm": 0.6004121622938939, |
| "learning_rate": 6.928838951310862e-06, |
| "loss": 0.7549, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04276871131119865, |
| "grad_norm": 0.6229597097596257, |
| "learning_rate": 7.116104868913858e-06, |
| "loss": 0.7376, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04389420371412493, |
| "grad_norm": 0.7141033532392286, |
| "learning_rate": 7.303370786516854e-06, |
| "loss": 0.7535, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.04501969611705121, |
| "grad_norm": 0.5725153155935927, |
| "learning_rate": 7.490636704119851e-06, |
| "loss": 0.7185, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04614518851997749, |
| "grad_norm": 0.5549438958370185, |
| "learning_rate": 7.677902621722846e-06, |
| "loss": 0.7518, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.04727068092290377, |
| "grad_norm": 0.4660101265627369, |
| "learning_rate": 7.865168539325843e-06, |
| "loss": 0.6787, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.04839617332583005, |
| "grad_norm": 0.4908539170309294, |
| "learning_rate": 8.05243445692884e-06, |
| "loss": 0.7032, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.04952166572875633, |
| "grad_norm": 0.48924522260651016, |
| "learning_rate": 8.239700374531835e-06, |
| "loss": 0.6803, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.050647158131682614, |
| "grad_norm": 0.475140896111031, |
| "learning_rate": 8.426966292134832e-06, |
| "loss": 0.6475, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05177265053460889, |
| "grad_norm": 0.4644093059355716, |
| "learning_rate": 8.614232209737828e-06, |
| "loss": 0.7013, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05289814293753517, |
| "grad_norm": 0.40301032630352857, |
| "learning_rate": 8.801498127340826e-06, |
| "loss": 0.6463, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.05402363534046145, |
| "grad_norm": 0.43480363638927505, |
| "learning_rate": 8.98876404494382e-06, |
| "loss": 0.6775, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05514912774338773, |
| "grad_norm": 0.43971181451177166, |
| "learning_rate": 9.176029962546817e-06, |
| "loss": 0.7007, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.056274620146314014, |
| "grad_norm": 0.41896418177510275, |
| "learning_rate": 9.363295880149813e-06, |
| "loss": 0.6468, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.057400112549240295, |
| "grad_norm": 0.4149971177588748, |
| "learning_rate": 9.550561797752809e-06, |
| "loss": 0.628, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.05852560495216657, |
| "grad_norm": 0.37242192155253623, |
| "learning_rate": 9.737827715355806e-06, |
| "loss": 0.652, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.05965109735509285, |
| "grad_norm": 0.327485240758468, |
| "learning_rate": 9.925093632958802e-06, |
| "loss": 0.6334, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.06077658975801913, |
| "grad_norm": 0.36141343502753065, |
| "learning_rate": 1.0112359550561798e-05, |
| "loss": 0.6259, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.061902082160945414, |
| "grad_norm": 0.38897211704559004, |
| "learning_rate": 1.0299625468164795e-05, |
| "loss": 0.6226, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06302757456387169, |
| "grad_norm": 0.36207952707026725, |
| "learning_rate": 1.0486891385767791e-05, |
| "loss": 0.6289, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06415306696679797, |
| "grad_norm": 0.28595916020001694, |
| "learning_rate": 1.0674157303370787e-05, |
| "loss": 0.6149, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06527855936972425, |
| "grad_norm": 0.29359683815567633, |
| "learning_rate": 1.0861423220973783e-05, |
| "loss": 0.5888, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06640405177265053, |
| "grad_norm": 0.3228509053817298, |
| "learning_rate": 1.104868913857678e-05, |
| "loss": 0.6328, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.06752954417557681, |
| "grad_norm": 0.3068303518794903, |
| "learning_rate": 1.1235955056179776e-05, |
| "loss": 0.571, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0686550365785031, |
| "grad_norm": 0.3231501093567655, |
| "learning_rate": 1.1423220973782772e-05, |
| "loss": 0.5728, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.06978052898142938, |
| "grad_norm": 0.2827526067701556, |
| "learning_rate": 1.161048689138577e-05, |
| "loss": 0.5919, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07090602138435566, |
| "grad_norm": 0.3490733036925077, |
| "learning_rate": 1.1797752808988765e-05, |
| "loss": 0.6319, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.07203151378728194, |
| "grad_norm": 0.36049201575238243, |
| "learning_rate": 1.1985018726591761e-05, |
| "loss": 0.6065, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07315700619020822, |
| "grad_norm": 0.2817612900392732, |
| "learning_rate": 1.2172284644194758e-05, |
| "loss": 0.6022, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0742824985931345, |
| "grad_norm": 0.27300283931060443, |
| "learning_rate": 1.2359550561797752e-05, |
| "loss": 0.5783, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07540799099606077, |
| "grad_norm": 0.3421112627990278, |
| "learning_rate": 1.254681647940075e-05, |
| "loss": 0.576, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.07653348339898705, |
| "grad_norm": 0.33598705329341366, |
| "learning_rate": 1.2734082397003746e-05, |
| "loss": 0.5835, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.07765897580191333, |
| "grad_norm": 0.27960476280957486, |
| "learning_rate": 1.2921348314606743e-05, |
| "loss": 0.5987, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.07878446820483961, |
| "grad_norm": 0.2965350125129841, |
| "learning_rate": 1.3108614232209737e-05, |
| "loss": 0.6026, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0799099606077659, |
| "grad_norm": 0.3122772390396813, |
| "learning_rate": 1.3295880149812733e-05, |
| "loss": 0.574, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.08103545301069218, |
| "grad_norm": 0.3021816040434541, |
| "learning_rate": 1.348314606741573e-05, |
| "loss": 0.5771, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08216094541361846, |
| "grad_norm": 0.2831578746374877, |
| "learning_rate": 1.3670411985018728e-05, |
| "loss": 0.5675, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.08328643781654474, |
| "grad_norm": 0.32441513984635667, |
| "learning_rate": 1.3857677902621724e-05, |
| "loss": 0.5652, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.08441193021947102, |
| "grad_norm": 0.31509832756589373, |
| "learning_rate": 1.4044943820224721e-05, |
| "loss": 0.5725, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0855374226223973, |
| "grad_norm": 0.3068003737845105, |
| "learning_rate": 1.4232209737827715e-05, |
| "loss": 0.5921, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.08666291502532358, |
| "grad_norm": 0.28569121242288503, |
| "learning_rate": 1.4419475655430711e-05, |
| "loss": 0.5517, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.08778840742824986, |
| "grad_norm": 0.30318713510099926, |
| "learning_rate": 1.4606741573033709e-05, |
| "loss": 0.5786, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.08891389983117615, |
| "grad_norm": 0.32791686866753017, |
| "learning_rate": 1.4794007490636705e-05, |
| "loss": 0.5835, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.09003939223410241, |
| "grad_norm": 0.34541735995694495, |
| "learning_rate": 1.4981273408239702e-05, |
| "loss": 0.6003, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0911648846370287, |
| "grad_norm": 0.24219057822403553, |
| "learning_rate": 1.5168539325842698e-05, |
| "loss": 0.5634, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.09229037703995498, |
| "grad_norm": 0.3066124460269189, |
| "learning_rate": 1.5355805243445692e-05, |
| "loss": 0.5385, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.09341586944288126, |
| "grad_norm": 0.36004311246679105, |
| "learning_rate": 1.554307116104869e-05, |
| "loss": 0.542, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.09454136184580754, |
| "grad_norm": 0.277294813524559, |
| "learning_rate": 1.5730337078651687e-05, |
| "loss": 0.5467, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09566685424873382, |
| "grad_norm": 0.2742529403377881, |
| "learning_rate": 1.591760299625468e-05, |
| "loss": 0.5337, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0967923466516601, |
| "grad_norm": 0.37776459034853405, |
| "learning_rate": 1.610486891385768e-05, |
| "loss": 0.5392, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.09791783905458638, |
| "grad_norm": 0.29713498839858976, |
| "learning_rate": 1.6292134831460676e-05, |
| "loss": 0.5513, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.09904333145751266, |
| "grad_norm": 0.2677802103514856, |
| "learning_rate": 1.647940074906367e-05, |
| "loss": 0.5435, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.10016882386043895, |
| "grad_norm": 0.3282651538789268, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.5556, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.10129431626336523, |
| "grad_norm": 0.2903898300830952, |
| "learning_rate": 1.6853932584269665e-05, |
| "loss": 0.5182, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10241980866629151, |
| "grad_norm": 0.32940772248776146, |
| "learning_rate": 1.704119850187266e-05, |
| "loss": 0.5651, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.10354530106921778, |
| "grad_norm": 0.29877064796568714, |
| "learning_rate": 1.7228464419475657e-05, |
| "loss": 0.5232, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.10467079347214406, |
| "grad_norm": 0.3033306544759112, |
| "learning_rate": 1.7415730337078654e-05, |
| "loss": 0.5415, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.10579628587507034, |
| "grad_norm": 0.298699393195244, |
| "learning_rate": 1.760299625468165e-05, |
| "loss": 0.5351, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.10692177827799662, |
| "grad_norm": 0.27344653088956217, |
| "learning_rate": 1.7790262172284646e-05, |
| "loss": 0.5401, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1080472706809229, |
| "grad_norm": 0.2901283593528549, |
| "learning_rate": 1.797752808988764e-05, |
| "loss": 0.5548, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.10917276308384918, |
| "grad_norm": 0.2955399438690073, |
| "learning_rate": 1.8164794007490637e-05, |
| "loss": 0.5336, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.11029825548677546, |
| "grad_norm": 0.3044365394746884, |
| "learning_rate": 1.8352059925093635e-05, |
| "loss": 0.5095, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.11142374788970175, |
| "grad_norm": 0.26929920330702195, |
| "learning_rate": 1.8539325842696632e-05, |
| "loss": 0.5569, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.11254924029262803, |
| "grad_norm": 0.36727845819131605, |
| "learning_rate": 1.8726591760299626e-05, |
| "loss": 0.5818, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11367473269555431, |
| "grad_norm": 0.2836581651837986, |
| "learning_rate": 1.891385767790262e-05, |
| "loss": 0.5373, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.11480022509848059, |
| "grad_norm": 0.29593257115280464, |
| "learning_rate": 1.9101123595505618e-05, |
| "loss": 0.5131, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.11592571750140687, |
| "grad_norm": 0.29964886160890525, |
| "learning_rate": 1.9288389513108615e-05, |
| "loss": 0.5044, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.11705120990433314, |
| "grad_norm": 0.30009105696644967, |
| "learning_rate": 1.9475655430711613e-05, |
| "loss": 0.536, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.11817670230725942, |
| "grad_norm": 0.29291717707624504, |
| "learning_rate": 1.9662921348314607e-05, |
| "loss": 0.5505, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1193021947101857, |
| "grad_norm": 0.3294836067555843, |
| "learning_rate": 1.9850187265917604e-05, |
| "loss": 0.5505, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.12042768711311198, |
| "grad_norm": 0.29401137621422074, |
| "learning_rate": 2.00374531835206e-05, |
| "loss": 0.528, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.12155317951603826, |
| "grad_norm": 0.3030811720009754, |
| "learning_rate": 2.0224719101123596e-05, |
| "loss": 0.538, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.12267867191896455, |
| "grad_norm": 0.32674282662604665, |
| "learning_rate": 2.0411985018726593e-05, |
| "loss": 0.541, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.12380416432189083, |
| "grad_norm": 0.30319983351235286, |
| "learning_rate": 2.059925093632959e-05, |
| "loss": 0.504, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12492965672481711, |
| "grad_norm": 0.3402565154469349, |
| "learning_rate": 2.0786516853932585e-05, |
| "loss": 0.5251, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.12605514912774338, |
| "grad_norm": 0.31872601282001034, |
| "learning_rate": 2.0973782771535582e-05, |
| "loss": 0.5286, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.12718064153066966, |
| "grad_norm": 0.34754536732763297, |
| "learning_rate": 2.1161048689138577e-05, |
| "loss": 0.5235, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.12830613393359594, |
| "grad_norm": 0.30998860710868686, |
| "learning_rate": 2.1348314606741574e-05, |
| "loss": 0.525, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.12943162633652222, |
| "grad_norm": 0.32990918540472725, |
| "learning_rate": 2.153558052434457e-05, |
| "loss": 0.5265, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1305571187394485, |
| "grad_norm": 0.3423710738146026, |
| "learning_rate": 2.1722846441947566e-05, |
| "loss": 0.5338, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.13168261114237478, |
| "grad_norm": 0.2872199647047314, |
| "learning_rate": 2.1910112359550563e-05, |
| "loss": 0.5299, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.13280810354530106, |
| "grad_norm": 0.3317448545714626, |
| "learning_rate": 2.209737827715356e-05, |
| "loss": 0.4959, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.13393359594822735, |
| "grad_norm": 0.31417498563521173, |
| "learning_rate": 2.2284644194756555e-05, |
| "loss": 0.52, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.13505908835115363, |
| "grad_norm": 0.3645759776734259, |
| "learning_rate": 2.2471910112359552e-05, |
| "loss": 0.5296, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1361845807540799, |
| "grad_norm": 0.3180662213331512, |
| "learning_rate": 2.2659176029962546e-05, |
| "loss": 0.5063, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1373100731570062, |
| "grad_norm": 0.3716923342200342, |
| "learning_rate": 2.2846441947565544e-05, |
| "loss": 0.5046, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.13843556555993247, |
| "grad_norm": 0.39150702044794555, |
| "learning_rate": 2.303370786516854e-05, |
| "loss": 0.4959, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.13956105796285875, |
| "grad_norm": 0.3713739740316023, |
| "learning_rate": 2.322097378277154e-05, |
| "loss": 0.5015, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.14068655036578503, |
| "grad_norm": 0.355150041365192, |
| "learning_rate": 2.3408239700374533e-05, |
| "loss": 0.5029, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.14181204276871132, |
| "grad_norm": 0.47357406433732624, |
| "learning_rate": 2.359550561797753e-05, |
| "loss": 0.519, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.1429375351716376, |
| "grad_norm": 0.35841513558308474, |
| "learning_rate": 2.3782771535580524e-05, |
| "loss": 0.517, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.14406302757456388, |
| "grad_norm": 0.32127121635614614, |
| "learning_rate": 2.3970037453183522e-05, |
| "loss": 0.5068, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.14518851997749016, |
| "grad_norm": 0.41380038534756397, |
| "learning_rate": 2.415730337078652e-05, |
| "loss": 0.53, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.14631401238041644, |
| "grad_norm": 0.3342860962607464, |
| "learning_rate": 2.4344569288389517e-05, |
| "loss": 0.5131, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.14743950478334272, |
| "grad_norm": 0.328086226882181, |
| "learning_rate": 2.453183520599251e-05, |
| "loss": 0.5359, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.148564997186269, |
| "grad_norm": 0.3980527154392636, |
| "learning_rate": 2.4719101123595505e-05, |
| "loss": 0.4915, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.14969048958919529, |
| "grad_norm": 0.3664150255854856, |
| "learning_rate": 2.4906367041198502e-05, |
| "loss": 0.5239, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.15081598199212154, |
| "grad_norm": 0.36032405515932203, |
| "learning_rate": 2.50936329588015e-05, |
| "loss": 0.5085, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.15194147439504782, |
| "grad_norm": 0.4406027959320581, |
| "learning_rate": 2.5280898876404497e-05, |
| "loss": 0.5126, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.1530669667979741, |
| "grad_norm": 0.344695754779841, |
| "learning_rate": 2.546816479400749e-05, |
| "loss": 0.5122, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.15419245920090038, |
| "grad_norm": 0.3726483933183008, |
| "learning_rate": 2.565543071161049e-05, |
| "loss": 0.4905, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.15531795160382666, |
| "grad_norm": 0.3449312763960655, |
| "learning_rate": 2.5842696629213486e-05, |
| "loss": 0.4987, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.15644344400675295, |
| "grad_norm": 0.35328970504291957, |
| "learning_rate": 2.6029962546816484e-05, |
| "loss": 0.5054, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.15756893640967923, |
| "grad_norm": 0.3700337092111675, |
| "learning_rate": 2.6217228464419475e-05, |
| "loss": 0.509, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1586944288126055, |
| "grad_norm": 0.301320056673764, |
| "learning_rate": 2.6404494382022472e-05, |
| "loss": 0.4958, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.1598199212155318, |
| "grad_norm": 0.4191378953980472, |
| "learning_rate": 2.6591760299625466e-05, |
| "loss": 0.5387, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.16094541361845807, |
| "grad_norm": 0.3880541184543602, |
| "learning_rate": 2.6779026217228464e-05, |
| "loss": 0.5227, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.16207090602138435, |
| "grad_norm": 0.39927231059272483, |
| "learning_rate": 2.696629213483146e-05, |
| "loss": 0.5237, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.16319639842431063, |
| "grad_norm": 0.3961271339819255, |
| "learning_rate": 2.715355805243446e-05, |
| "loss": 0.5194, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.16432189082723692, |
| "grad_norm": 0.4376696251019293, |
| "learning_rate": 2.7340823970037456e-05, |
| "loss": 0.5178, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.1654473832301632, |
| "grad_norm": 0.44058938921182966, |
| "learning_rate": 2.752808988764045e-05, |
| "loss": 0.4998, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.16657287563308948, |
| "grad_norm": 0.35261095257281155, |
| "learning_rate": 2.7715355805243448e-05, |
| "loss": 0.499, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.16769836803601576, |
| "grad_norm": 0.5218533410763981, |
| "learning_rate": 2.7902621722846445e-05, |
| "loss": 0.5273, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.16882386043894204, |
| "grad_norm": 0.4737891842741366, |
| "learning_rate": 2.8089887640449443e-05, |
| "loss": 0.5003, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.16994935284186832, |
| "grad_norm": 0.392922001496729, |
| "learning_rate": 2.8277153558052437e-05, |
| "loss": 0.5016, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.1710748452447946, |
| "grad_norm": 0.5302514501231146, |
| "learning_rate": 2.846441947565543e-05, |
| "loss": 0.5172, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.17220033764772089, |
| "grad_norm": 0.49803115823639127, |
| "learning_rate": 2.8651685393258425e-05, |
| "loss": 0.4946, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.17332583005064717, |
| "grad_norm": 0.4128451133760804, |
| "learning_rate": 2.8838951310861422e-05, |
| "loss": 0.5232, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.17445132245357345, |
| "grad_norm": 0.6316627266885098, |
| "learning_rate": 2.902621722846442e-05, |
| "loss": 0.5059, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.17557681485649973, |
| "grad_norm": 0.5295204042861669, |
| "learning_rate": 2.9213483146067417e-05, |
| "loss": 0.5243, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.176702307259426, |
| "grad_norm": 0.45607245497823934, |
| "learning_rate": 2.940074906367041e-05, |
| "loss": 0.4821, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.1778277996623523, |
| "grad_norm": 0.6021144875229769, |
| "learning_rate": 2.958801498127341e-05, |
| "loss": 0.5103, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.17895329206527855, |
| "grad_norm": 0.48529780373586173, |
| "learning_rate": 2.9775280898876406e-05, |
| "loss": 0.4922, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.18007878446820483, |
| "grad_norm": 0.4250055623471545, |
| "learning_rate": 2.9962546816479404e-05, |
| "loss": 0.4904, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1812042768711311, |
| "grad_norm": 0.6512919492582171, |
| "learning_rate": 3.01498127340824e-05, |
| "loss": 0.5145, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.1823297692740574, |
| "grad_norm": 0.45356537836570343, |
| "learning_rate": 3.0337078651685396e-05, |
| "loss": 0.489, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.18345526167698367, |
| "grad_norm": 0.4587778769232854, |
| "learning_rate": 3.052434456928839e-05, |
| "loss": 0.5031, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.18458075407990995, |
| "grad_norm": 0.5209259547751122, |
| "learning_rate": 3.0711610486891384e-05, |
| "loss": 0.5122, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.18570624648283623, |
| "grad_norm": 0.3205075873383086, |
| "learning_rate": 3.089887640449438e-05, |
| "loss": 0.484, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.18683173888576252, |
| "grad_norm": 0.44421922243323253, |
| "learning_rate": 3.108614232209738e-05, |
| "loss": 0.4885, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1879572312886888, |
| "grad_norm": 0.38376560722257824, |
| "learning_rate": 3.1273408239700376e-05, |
| "loss": 0.5137, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.18908272369161508, |
| "grad_norm": 0.33178548545336345, |
| "learning_rate": 3.1460674157303374e-05, |
| "loss": 0.5214, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.19020821609454136, |
| "grad_norm": 0.3543354285220051, |
| "learning_rate": 3.164794007490637e-05, |
| "loss": 0.4652, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.19133370849746764, |
| "grad_norm": 0.34821873695435235, |
| "learning_rate": 3.183520599250936e-05, |
| "loss": 0.4695, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.19245920090039392, |
| "grad_norm": 0.346452239854666, |
| "learning_rate": 3.202247191011236e-05, |
| "loss": 0.4891, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.1935846933033202, |
| "grad_norm": 0.4398933317388218, |
| "learning_rate": 3.220973782771536e-05, |
| "loss": 0.4911, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.19471018570624649, |
| "grad_norm": 0.3624677233826849, |
| "learning_rate": 3.2397003745318354e-05, |
| "loss": 0.4912, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.19583567810917277, |
| "grad_norm": 0.3699640798637125, |
| "learning_rate": 3.258426966292135e-05, |
| "loss": 0.5004, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.19696117051209905, |
| "grad_norm": 0.41958584077529965, |
| "learning_rate": 3.277153558052435e-05, |
| "loss": 0.4752, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.19808666291502533, |
| "grad_norm": 0.42502324118725465, |
| "learning_rate": 3.295880149812734e-05, |
| "loss": 0.4922, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.1992121553179516, |
| "grad_norm": 0.36517865445954867, |
| "learning_rate": 3.314606741573034e-05, |
| "loss": 0.5048, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2003376477208779, |
| "grad_norm": 0.41574946856579004, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.4821, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.20146314012380417, |
| "grad_norm": 0.4378349227779501, |
| "learning_rate": 3.352059925093633e-05, |
| "loss": 0.4933, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.20258863252673046, |
| "grad_norm": 0.4776232193190497, |
| "learning_rate": 3.370786516853933e-05, |
| "loss": 0.4751, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.20371412492965674, |
| "grad_norm": 0.43848154415790724, |
| "learning_rate": 3.389513108614232e-05, |
| "loss": 0.4807, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.20483961733258302, |
| "grad_norm": 0.5845165253893854, |
| "learning_rate": 3.408239700374532e-05, |
| "loss": 0.5009, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.20596510973550927, |
| "grad_norm": 0.4082870415882244, |
| "learning_rate": 3.4269662921348316e-05, |
| "loss": 0.4979, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.20709060213843555, |
| "grad_norm": 0.4718495231442118, |
| "learning_rate": 3.445692883895131e-05, |
| "loss": 0.5038, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.20821609454136183, |
| "grad_norm": 0.473455230356082, |
| "learning_rate": 3.464419475655431e-05, |
| "loss": 0.4833, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.20934158694428812, |
| "grad_norm": 0.46125301819415737, |
| "learning_rate": 3.483146067415731e-05, |
| "loss": 0.4889, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2104670793472144, |
| "grad_norm": 0.4432990112364735, |
| "learning_rate": 3.5018726591760305e-05, |
| "loss": 0.4807, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.21159257175014068, |
| "grad_norm": 0.5322159815450351, |
| "learning_rate": 3.52059925093633e-05, |
| "loss": 0.5219, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.21271806415306696, |
| "grad_norm": 0.3936101857237881, |
| "learning_rate": 3.5393258426966294e-05, |
| "loss": 0.4922, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.21384355655599324, |
| "grad_norm": 0.53345431181708, |
| "learning_rate": 3.558052434456929e-05, |
| "loss": 0.4897, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.21496904895891952, |
| "grad_norm": 0.6142180117371377, |
| "learning_rate": 3.576779026217228e-05, |
| "loss": 0.493, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.2160945413618458, |
| "grad_norm": 0.41631870227046885, |
| "learning_rate": 3.595505617977528e-05, |
| "loss": 0.4893, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.21722003376477209, |
| "grad_norm": 0.4305104245523169, |
| "learning_rate": 3.614232209737828e-05, |
| "loss": 0.4866, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.21834552616769837, |
| "grad_norm": 0.5169903617970245, |
| "learning_rate": 3.6329588014981274e-05, |
| "loss": 0.4901, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.21947101857062465, |
| "grad_norm": 0.3860200825591215, |
| "learning_rate": 3.651685393258427e-05, |
| "loss": 0.4549, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.22059651097355093, |
| "grad_norm": 0.5230520554579277, |
| "learning_rate": 3.670411985018727e-05, |
| "loss": 0.4724, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2217220033764772, |
| "grad_norm": 0.39548431249473126, |
| "learning_rate": 3.689138576779027e-05, |
| "loss": 0.4911, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.2228474957794035, |
| "grad_norm": 0.48800271319592975, |
| "learning_rate": 3.7078651685393264e-05, |
| "loss": 0.4572, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.22397298818232977, |
| "grad_norm": 0.41978987611240903, |
| "learning_rate": 3.726591760299626e-05, |
| "loss": 0.4829, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.22509848058525606, |
| "grad_norm": 0.5469472170008755, |
| "learning_rate": 3.745318352059925e-05, |
| "loss": 0.5274, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22622397298818234, |
| "grad_norm": 0.3918679709299485, |
| "learning_rate": 3.764044943820225e-05, |
| "loss": 0.4766, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.22734946539110862, |
| "grad_norm": 0.4611366578168398, |
| "learning_rate": 3.782771535580524e-05, |
| "loss": 0.4533, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.2284749577940349, |
| "grad_norm": 0.35525042824778863, |
| "learning_rate": 3.801498127340824e-05, |
| "loss": 0.4801, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.22960045019696118, |
| "grad_norm": 0.39795327337608555, |
| "learning_rate": 3.8202247191011236e-05, |
| "loss": 0.4796, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.23072594259988746, |
| "grad_norm": 0.40314796565206873, |
| "learning_rate": 3.838951310861423e-05, |
| "loss": 0.4746, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.23185143500281374, |
| "grad_norm": 0.6186856651894296, |
| "learning_rate": 3.857677902621723e-05, |
| "loss": 0.483, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.23297692740574, |
| "grad_norm": 0.6262230101782875, |
| "learning_rate": 3.876404494382023e-05, |
| "loss": 0.5026, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.23410241980866628, |
| "grad_norm": 0.6885063622065476, |
| "learning_rate": 3.8951310861423226e-05, |
| "loss": 0.4961, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.23522791221159256, |
| "grad_norm": 0.46434262483818484, |
| "learning_rate": 3.913857677902622e-05, |
| "loss": 0.4824, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.23635340461451884, |
| "grad_norm": 0.5876521011749303, |
| "learning_rate": 3.9325842696629214e-05, |
| "loss": 0.4799, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.23747889701744512, |
| "grad_norm": 0.5679577524617186, |
| "learning_rate": 3.951310861423221e-05, |
| "loss": 0.4976, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.2386043894203714, |
| "grad_norm": 0.4948818608996542, |
| "learning_rate": 3.970037453183521e-05, |
| "loss": 0.4689, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.23972988182329769, |
| "grad_norm": 0.5366944392366912, |
| "learning_rate": 3.98876404494382e-05, |
| "loss": 0.5052, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.24085537422622397, |
| "grad_norm": 0.46091893449282645, |
| "learning_rate": 4.00749063670412e-05, |
| "loss": 0.504, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.24198086662915025, |
| "grad_norm": 0.6227113043840353, |
| "learning_rate": 4.0262172284644194e-05, |
| "loss": 0.4947, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.24310635903207653, |
| "grad_norm": 0.37975248168226977, |
| "learning_rate": 4.044943820224719e-05, |
| "loss": 0.4681, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.2442318514350028, |
| "grad_norm": 0.6602796166859184, |
| "learning_rate": 4.063670411985019e-05, |
| "loss": 0.4809, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.2453573438379291, |
| "grad_norm": 0.46707379726848597, |
| "learning_rate": 4.082397003745319e-05, |
| "loss": 0.4586, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.24648283624085537, |
| "grad_norm": 0.58153678436466, |
| "learning_rate": 4.1011235955056184e-05, |
| "loss": 0.508, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.24760832864378166, |
| "grad_norm": 0.6402167998756934, |
| "learning_rate": 4.119850187265918e-05, |
| "loss": 0.5039, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.24873382104670794, |
| "grad_norm": 0.5794603595581886, |
| "learning_rate": 4.138576779026217e-05, |
| "loss": 0.4653, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.24985931344963422, |
| "grad_norm": 0.5230659913502629, |
| "learning_rate": 4.157303370786517e-05, |
| "loss": 0.5038, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2509848058525605, |
| "grad_norm": 0.6799656883961334, |
| "learning_rate": 4.176029962546817e-05, |
| "loss": 0.4898, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.25211029825548675, |
| "grad_norm": 0.6621661466046944, |
| "learning_rate": 4.1947565543071165e-05, |
| "loss": 0.4844, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.25323579065841306, |
| "grad_norm": 0.5616167950816823, |
| "learning_rate": 4.2134831460674156e-05, |
| "loss": 0.488, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2543612830613393, |
| "grad_norm": 0.6575771702191943, |
| "learning_rate": 4.232209737827715e-05, |
| "loss": 0.4791, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.2554867754642656, |
| "grad_norm": 0.5488369093936996, |
| "learning_rate": 4.250936329588015e-05, |
| "loss": 0.5005, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.2566122678671919, |
| "grad_norm": 0.6144786136277036, |
| "learning_rate": 4.269662921348315e-05, |
| "loss": 0.4663, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.2577377602701182, |
| "grad_norm": 0.600777544617447, |
| "learning_rate": 4.2883895131086146e-05, |
| "loss": 0.4786, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.25886325267304444, |
| "grad_norm": 0.8023306491917337, |
| "learning_rate": 4.307116104868914e-05, |
| "loss": 0.4861, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.25998874507597075, |
| "grad_norm": 0.5046450430408941, |
| "learning_rate": 4.3258426966292134e-05, |
| "loss": 0.4727, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.261114237478897, |
| "grad_norm": 0.7089229481882726, |
| "learning_rate": 4.344569288389513e-05, |
| "loss": 0.4907, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.2622397298818233, |
| "grad_norm": 0.7578774630778147, |
| "learning_rate": 4.363295880149813e-05, |
| "loss": 0.4568, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.26336522228474957, |
| "grad_norm": 0.43469225376554593, |
| "learning_rate": 4.3820224719101126e-05, |
| "loss": 0.4882, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.2644907146876759, |
| "grad_norm": 0.7064451875198454, |
| "learning_rate": 4.4007490636704124e-05, |
| "loss": 0.4777, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.26561620709060213, |
| "grad_norm": 0.5236443057755517, |
| "learning_rate": 4.419475655430712e-05, |
| "loss": 0.4541, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.26674169949352844, |
| "grad_norm": 0.49987222413307647, |
| "learning_rate": 4.438202247191011e-05, |
| "loss": 0.4813, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.2678671918964547, |
| "grad_norm": 0.4887524110978959, |
| "learning_rate": 4.456928838951311e-05, |
| "loss": 0.4904, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.268992684299381, |
| "grad_norm": 0.43081504689209343, |
| "learning_rate": 4.475655430711611e-05, |
| "loss": 0.4828, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.27011817670230726, |
| "grad_norm": 0.4927726763052006, |
| "learning_rate": 4.4943820224719104e-05, |
| "loss": 0.4661, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.27124366910523356, |
| "grad_norm": 0.48347879754332407, |
| "learning_rate": 4.51310861423221e-05, |
| "loss": 0.4698, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.2723691615081598, |
| "grad_norm": 0.4867564616696551, |
| "learning_rate": 4.531835205992509e-05, |
| "loss": 0.49, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.2734946539110861, |
| "grad_norm": 0.5374635091852026, |
| "learning_rate": 4.550561797752809e-05, |
| "loss": 0.4559, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.2746201463140124, |
| "grad_norm": 0.4772645954109197, |
| "learning_rate": 4.569288389513109e-05, |
| "loss": 0.4717, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.27574563871693863, |
| "grad_norm": 0.4347548915476938, |
| "learning_rate": 4.5880149812734085e-05, |
| "loss": 0.4532, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.27687113111986494, |
| "grad_norm": 0.4759288672038084, |
| "learning_rate": 4.606741573033708e-05, |
| "loss": 0.4945, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.2779966235227912, |
| "grad_norm": 0.5792894788506712, |
| "learning_rate": 4.625468164794008e-05, |
| "loss": 0.4983, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.2791221159257175, |
| "grad_norm": 0.3980178738338264, |
| "learning_rate": 4.644194756554308e-05, |
| "loss": 0.4827, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.28024760832864376, |
| "grad_norm": 0.5408869118408165, |
| "learning_rate": 4.662921348314607e-05, |
| "loss": 0.4528, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.28137310073157007, |
| "grad_norm": 0.5689847204055498, |
| "learning_rate": 4.6816479400749066e-05, |
| "loss": 0.4598, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2824985931344963, |
| "grad_norm": 0.5617698646408457, |
| "learning_rate": 4.700374531835206e-05, |
| "loss": 0.5022, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.28362408553742263, |
| "grad_norm": 0.45230255865587565, |
| "learning_rate": 4.719101123595506e-05, |
| "loss": 0.47, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2847495779403489, |
| "grad_norm": 0.42374590684633967, |
| "learning_rate": 4.737827715355805e-05, |
| "loss": 0.466, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.2858750703432752, |
| "grad_norm": 0.4180084231861174, |
| "learning_rate": 4.756554307116105e-05, |
| "loss": 0.4747, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.28700056274620145, |
| "grad_norm": 0.4787386782007702, |
| "learning_rate": 4.7752808988764046e-05, |
| "loss": 0.4611, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.28812605514912776, |
| "grad_norm": 0.40084601864669134, |
| "learning_rate": 4.7940074906367044e-05, |
| "loss": 0.4577, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.289251547552054, |
| "grad_norm": 0.5597034903024327, |
| "learning_rate": 4.812734082397004e-05, |
| "loss": 0.4843, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.2903770399549803, |
| "grad_norm": 0.4389515417232518, |
| "learning_rate": 4.831460674157304e-05, |
| "loss": 0.4667, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2915025323579066, |
| "grad_norm": 0.5153267551952044, |
| "learning_rate": 4.8501872659176036e-05, |
| "loss": 0.4543, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.2926280247608329, |
| "grad_norm": 0.4414645886637002, |
| "learning_rate": 4.8689138576779034e-05, |
| "loss": 0.4687, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.29375351716375914, |
| "grad_norm": 0.5441323388581608, |
| "learning_rate": 4.8876404494382024e-05, |
| "loss": 0.4904, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.29487900956668545, |
| "grad_norm": 0.47357188882841866, |
| "learning_rate": 4.906367041198502e-05, |
| "loss": 0.4646, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2960045019696117, |
| "grad_norm": 0.43582942744547837, |
| "learning_rate": 4.925093632958801e-05, |
| "loss": 0.4747, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.297129994372538, |
| "grad_norm": 0.4659511261837298, |
| "learning_rate": 4.943820224719101e-05, |
| "loss": 0.4613, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.29825548677546426, |
| "grad_norm": 0.4561502948161637, |
| "learning_rate": 4.962546816479401e-05, |
| "loss": 0.4691, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.29938097917839057, |
| "grad_norm": 0.4541481932977169, |
| "learning_rate": 4.9812734082397005e-05, |
| "loss": 0.4661, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.3005064715813168, |
| "grad_norm": 0.47547350717861037, |
| "learning_rate": 5e-05, |
| "loss": 0.4678, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.3016319639842431, |
| "grad_norm": 0.406819603933647, |
| "learning_rate": 4.997914059240717e-05, |
| "loss": 0.4381, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.3027574563871694, |
| "grad_norm": 0.4460099057965906, |
| "learning_rate": 4.9958281184814356e-05, |
| "loss": 0.468, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.30388294879009564, |
| "grad_norm": 0.5142189061381415, |
| "learning_rate": 4.9937421777221527e-05, |
| "loss": 0.4714, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.30500844119302195, |
| "grad_norm": 0.5665935978369622, |
| "learning_rate": 4.9916562369628704e-05, |
| "loss": 0.4818, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.3061339335959482, |
| "grad_norm": 0.47186870063336683, |
| "learning_rate": 4.989570296203588e-05, |
| "loss": 0.4793, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.3072594259988745, |
| "grad_norm": 0.457167782453592, |
| "learning_rate": 4.987484355444306e-05, |
| "loss": 0.4404, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.30838491840180077, |
| "grad_norm": 0.5127134764884651, |
| "learning_rate": 4.985398414685023e-05, |
| "loss": 0.465, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3095104108047271, |
| "grad_norm": 0.6532795598287193, |
| "learning_rate": 4.983312473925741e-05, |
| "loss": 0.518, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.31063590320765333, |
| "grad_norm": 0.6930677368699536, |
| "learning_rate": 4.981226533166458e-05, |
| "loss": 0.4866, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.31176139561057964, |
| "grad_norm": 0.5061640947852638, |
| "learning_rate": 4.979140592407176e-05, |
| "loss": 0.4609, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.3128868880135059, |
| "grad_norm": 0.5257849117764034, |
| "learning_rate": 4.9770546516478936e-05, |
| "loss": 0.457, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.3140123804164322, |
| "grad_norm": 0.4321326701858845, |
| "learning_rate": 4.974968710888611e-05, |
| "loss": 0.4454, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.31513787281935846, |
| "grad_norm": 0.38723391299657206, |
| "learning_rate": 4.972882770129328e-05, |
| "loss": 0.4643, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.31626336522228476, |
| "grad_norm": 0.5298180060499573, |
| "learning_rate": 4.970796829370046e-05, |
| "loss": 0.4746, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.317388857625211, |
| "grad_norm": 0.4061129177315826, |
| "learning_rate": 4.968710888610764e-05, |
| "loss": 0.4483, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.3185143500281373, |
| "grad_norm": 0.44266904421962466, |
| "learning_rate": 4.9666249478514814e-05, |
| "loss": 0.4462, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.3196398424310636, |
| "grad_norm": 0.5723516835538393, |
| "learning_rate": 4.964539007092199e-05, |
| "loss": 0.4912, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.3207653348339899, |
| "grad_norm": 0.4268190071809671, |
| "learning_rate": 4.962453066332917e-05, |
| "loss": 0.4747, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.32189082723691614, |
| "grad_norm": 0.5119510744401135, |
| "learning_rate": 4.960367125573634e-05, |
| "loss": 0.4822, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.32301631963984245, |
| "grad_norm": 0.6845433911954164, |
| "learning_rate": 4.9582811848143515e-05, |
| "loss": 0.4784, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.3241418120427687, |
| "grad_norm": 0.5512193560031954, |
| "learning_rate": 4.956195244055069e-05, |
| "loss": 0.4652, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.325267304445695, |
| "grad_norm": 0.44176750937437276, |
| "learning_rate": 4.954109303295786e-05, |
| "loss": 0.4591, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.32639279684862127, |
| "grad_norm": 0.4802436465499188, |
| "learning_rate": 4.952023362536504e-05, |
| "loss": 0.4646, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3275182892515476, |
| "grad_norm": 0.5130069834143126, |
| "learning_rate": 4.9499374217772216e-05, |
| "loss": 0.4632, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.32864378165447383, |
| "grad_norm": 0.3492902519652929, |
| "learning_rate": 4.947851481017939e-05, |
| "loss": 0.4479, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3297692740574001, |
| "grad_norm": 0.44213997370659125, |
| "learning_rate": 4.945765540258657e-05, |
| "loss": 0.5066, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.3308947664603264, |
| "grad_norm": 0.42377286032567374, |
| "learning_rate": 4.943679599499375e-05, |
| "loss": 0.4302, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.33202025886325265, |
| "grad_norm": 0.394814830036649, |
| "learning_rate": 4.941593658740092e-05, |
| "loss": 0.4767, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.33314575126617896, |
| "grad_norm": 0.4318002819787604, |
| "learning_rate": 4.9395077179808094e-05, |
| "loss": 0.4672, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.3342712436691052, |
| "grad_norm": 0.4312528420970919, |
| "learning_rate": 4.937421777221527e-05, |
| "loss": 0.4301, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.3353967360720315, |
| "grad_norm": 0.4506557238069578, |
| "learning_rate": 4.935335836462245e-05, |
| "loss": 0.4799, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.3365222284749578, |
| "grad_norm": 0.4035767985622815, |
| "learning_rate": 4.933249895702962e-05, |
| "loss": 0.4685, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.3376477208778841, |
| "grad_norm": 0.3974620013501897, |
| "learning_rate": 4.93116395494368e-05, |
| "loss": 0.4597, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.33877321328081034, |
| "grad_norm": 0.4262423662726608, |
| "learning_rate": 4.929078014184397e-05, |
| "loss": 0.5115, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.33989870568373665, |
| "grad_norm": 0.46574622767948337, |
| "learning_rate": 4.926992073425115e-05, |
| "loss": 0.4633, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.3410241980866629, |
| "grad_norm": 0.3662096934434811, |
| "learning_rate": 4.9249061326658326e-05, |
| "loss": 0.459, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.3421496904895892, |
| "grad_norm": 0.4345723771900289, |
| "learning_rate": 4.92282019190655e-05, |
| "loss": 0.4912, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.34327518289251546, |
| "grad_norm": 0.43786190381782847, |
| "learning_rate": 4.9207342511472674e-05, |
| "loss": 0.4433, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.34440067529544177, |
| "grad_norm": 0.502359362326431, |
| "learning_rate": 4.918648310387986e-05, |
| "loss": 0.4656, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.345526167698368, |
| "grad_norm": 0.36663337654610195, |
| "learning_rate": 4.916562369628703e-05, |
| "loss": 0.4902, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.34665166010129433, |
| "grad_norm": 0.5430348378224088, |
| "learning_rate": 4.9144764288694205e-05, |
| "loss": 0.4762, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3477771525042206, |
| "grad_norm": 0.5126322468885988, |
| "learning_rate": 4.912390488110138e-05, |
| "loss": 0.4537, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.3489026449071469, |
| "grad_norm": 0.5095172872961993, |
| "learning_rate": 4.910304547350855e-05, |
| "loss": 0.4741, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.35002813731007315, |
| "grad_norm": 0.48284683021884783, |
| "learning_rate": 4.908218606591573e-05, |
| "loss": 0.4726, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.35115362971299946, |
| "grad_norm": 0.489802898549214, |
| "learning_rate": 4.9061326658322906e-05, |
| "loss": 0.4686, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.3522791221159257, |
| "grad_norm": 0.39249156296212534, |
| "learning_rate": 4.904046725073008e-05, |
| "loss": 0.4695, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.353404614518852, |
| "grad_norm": 0.5323916362107045, |
| "learning_rate": 4.901960784313725e-05, |
| "loss": 0.4965, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.3545301069217783, |
| "grad_norm": 0.4533239040835354, |
| "learning_rate": 4.899874843554444e-05, |
| "loss": 0.4707, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.3556555993247046, |
| "grad_norm": 0.4206387502485126, |
| "learning_rate": 4.897788902795161e-05, |
| "loss": 0.4579, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.35678109172763084, |
| "grad_norm": 0.6269190751003352, |
| "learning_rate": 4.8957029620358784e-05, |
| "loss": 0.4693, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.3579065841305571, |
| "grad_norm": 0.5737738257462925, |
| "learning_rate": 4.893617021276596e-05, |
| "loss": 0.4618, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3590320765334834, |
| "grad_norm": 0.4441915442589318, |
| "learning_rate": 4.891531080517314e-05, |
| "loss": 0.4873, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.36015756893640966, |
| "grad_norm": 0.4262860610042779, |
| "learning_rate": 4.889445139758031e-05, |
| "loss": 0.4878, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.36128306133933596, |
| "grad_norm": 0.4858584345693206, |
| "learning_rate": 4.8873591989987485e-05, |
| "loss": 0.4669, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.3624085537422622, |
| "grad_norm": 0.3534387563975198, |
| "learning_rate": 4.885273258239466e-05, |
| "loss": 0.4811, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.3635340461451885, |
| "grad_norm": 0.5271950734928447, |
| "learning_rate": 4.883187317480184e-05, |
| "loss": 0.4711, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.3646595385481148, |
| "grad_norm": 0.409281575876073, |
| "learning_rate": 4.8811013767209016e-05, |
| "loss": 0.4659, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3657850309510411, |
| "grad_norm": 0.539894161794808, |
| "learning_rate": 4.879015435961619e-05, |
| "loss": 0.4617, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.36691052335396734, |
| "grad_norm": 0.5540340872215955, |
| "learning_rate": 4.876929495202336e-05, |
| "loss": 0.4488, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.36803601575689365, |
| "grad_norm": 0.5450024514176801, |
| "learning_rate": 4.874843554443054e-05, |
| "loss": 0.4443, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.3691615081598199, |
| "grad_norm": 0.4022020669388415, |
| "learning_rate": 4.872757613683772e-05, |
| "loss": 0.4447, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.3702870005627462, |
| "grad_norm": 0.5015493685184448, |
| "learning_rate": 4.8706716729244894e-05, |
| "loss": 0.4805, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.37141249296567247, |
| "grad_norm": 0.5205564906682338, |
| "learning_rate": 4.8685857321652064e-05, |
| "loss": 0.4727, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.3725379853685988, |
| "grad_norm": 0.48440539535018856, |
| "learning_rate": 4.866499791405924e-05, |
| "loss": 0.4789, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.37366347777152503, |
| "grad_norm": 0.5749114846228484, |
| "learning_rate": 4.864413850646642e-05, |
| "loss": 0.4452, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.37478897017445134, |
| "grad_norm": 0.5347894114064871, |
| "learning_rate": 4.8623279098873595e-05, |
| "loss": 0.466, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.3759144625773776, |
| "grad_norm": 0.5219184880333937, |
| "learning_rate": 4.860241969128077e-05, |
| "loss": 0.4297, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.3770399549803039, |
| "grad_norm": 0.4540713981584441, |
| "learning_rate": 4.858156028368794e-05, |
| "loss": 0.4716, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.37816544738323016, |
| "grad_norm": 0.65356051165203, |
| "learning_rate": 4.856070087609512e-05, |
| "loss": 0.4709, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.37929093978615647, |
| "grad_norm": 0.4310409303487337, |
| "learning_rate": 4.8539841468502296e-05, |
| "loss": 0.4931, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.3804164321890827, |
| "grad_norm": 0.7081733344034011, |
| "learning_rate": 4.851898206090947e-05, |
| "loss": 0.4787, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.38154192459200903, |
| "grad_norm": 0.4155006654801174, |
| "learning_rate": 4.8498122653316644e-05, |
| "loss": 0.4547, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.3826674169949353, |
| "grad_norm": 0.6789073369172818, |
| "learning_rate": 4.847726324572383e-05, |
| "loss": 0.4526, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.38379290939786154, |
| "grad_norm": 0.4700941252868989, |
| "learning_rate": 4.8456403838131e-05, |
| "loss": 0.4585, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.38491840180078785, |
| "grad_norm": 0.5721349237605509, |
| "learning_rate": 4.8435544430538175e-05, |
| "loss": 0.4305, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.3860438942037141, |
| "grad_norm": 0.6679337639014323, |
| "learning_rate": 4.841468502294535e-05, |
| "loss": 0.4727, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.3871693866066404, |
| "grad_norm": 0.6136310708197658, |
| "learning_rate": 4.839382561535253e-05, |
| "loss": 0.4632, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.38829487900956666, |
| "grad_norm": 0.560884620312814, |
| "learning_rate": 4.83729662077597e-05, |
| "loss": 0.4438, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.38942037141249297, |
| "grad_norm": 0.6098279474363337, |
| "learning_rate": 4.835210680016688e-05, |
| "loss": 0.4407, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.3905458638154192, |
| "grad_norm": 0.5154661466104475, |
| "learning_rate": 4.833124739257405e-05, |
| "loss": 0.446, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.39167135621834553, |
| "grad_norm": 0.7038644519573083, |
| "learning_rate": 4.831038798498123e-05, |
| "loss": 0.4646, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.3927968486212718, |
| "grad_norm": 0.5207244024602116, |
| "learning_rate": 4.828952857738841e-05, |
| "loss": 0.4476, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.3939223410241981, |
| "grad_norm": 0.5084794430652734, |
| "learning_rate": 4.8268669169795584e-05, |
| "loss": 0.4478, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.39504783342712435, |
| "grad_norm": 0.5120708559114392, |
| "learning_rate": 4.8247809762202754e-05, |
| "loss": 0.4622, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.39617332583005066, |
| "grad_norm": 0.6265345182325885, |
| "learning_rate": 4.822695035460993e-05, |
| "loss": 0.4706, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.3972988182329769, |
| "grad_norm": 0.5750184584832099, |
| "learning_rate": 4.820609094701711e-05, |
| "loss": 0.4882, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.3984243106359032, |
| "grad_norm": 0.5490268536187386, |
| "learning_rate": 4.818523153942428e-05, |
| "loss": 0.4601, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.3995498030388295, |
| "grad_norm": 0.5317594975111523, |
| "learning_rate": 4.816437213183146e-05, |
| "loss": 0.4587, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.4006752954417558, |
| "grad_norm": 0.49454469908724474, |
| "learning_rate": 4.814351272423863e-05, |
| "loss": 0.4559, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.40180078784468204, |
| "grad_norm": 0.5764930655203424, |
| "learning_rate": 4.812265331664581e-05, |
| "loss": 0.4677, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.40292628024760835, |
| "grad_norm": 0.4254928567571142, |
| "learning_rate": 4.8101793909052986e-05, |
| "loss": 0.4468, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.4040517726505346, |
| "grad_norm": 0.5563885643276592, |
| "learning_rate": 4.808093450146016e-05, |
| "loss": 0.4333, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.4051772650534609, |
| "grad_norm": 0.4150167393933345, |
| "learning_rate": 4.806007509386733e-05, |
| "loss": 0.4515, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.40630275745638716, |
| "grad_norm": 0.4751390092870927, |
| "learning_rate": 4.803921568627452e-05, |
| "loss": 0.4776, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.4074282498593135, |
| "grad_norm": 0.43506496230734293, |
| "learning_rate": 4.801835627868169e-05, |
| "loss": 0.4533, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.4085537422622397, |
| "grad_norm": 0.4488704280500811, |
| "learning_rate": 4.7997496871088864e-05, |
| "loss": 0.4555, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.40967923466516604, |
| "grad_norm": 0.4888805568606264, |
| "learning_rate": 4.797663746349604e-05, |
| "loss": 0.4529, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.4108047270680923, |
| "grad_norm": 0.47826665202586255, |
| "learning_rate": 4.795577805590322e-05, |
| "loss": 0.4565, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.41193021947101854, |
| "grad_norm": 0.427518205471488, |
| "learning_rate": 4.793491864831039e-05, |
| "loss": 0.4622, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.41305571187394485, |
| "grad_norm": 0.4657645019409657, |
| "learning_rate": 4.7914059240717565e-05, |
| "loss": 0.4731, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.4141812042768711, |
| "grad_norm": 0.4980798737202691, |
| "learning_rate": 4.789319983312474e-05, |
| "loss": 0.4496, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.4153066966797974, |
| "grad_norm": 0.4106122018359277, |
| "learning_rate": 4.787234042553192e-05, |
| "loss": 0.4537, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.41643218908272367, |
| "grad_norm": 0.46050479994884624, |
| "learning_rate": 4.785148101793909e-05, |
| "loss": 0.4434, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.41755768148565, |
| "grad_norm": 0.5221350337150601, |
| "learning_rate": 4.783062161034627e-05, |
| "loss": 0.458, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.41868317388857623, |
| "grad_norm": 0.44101630868691777, |
| "learning_rate": 4.780976220275344e-05, |
| "loss": 0.4614, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.41980866629150254, |
| "grad_norm": 0.5767546396305836, |
| "learning_rate": 4.778890279516062e-05, |
| "loss": 0.4829, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.4209341586944288, |
| "grad_norm": 0.47996101159798066, |
| "learning_rate": 4.77680433875678e-05, |
| "loss": 0.4569, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.4220596510973551, |
| "grad_norm": 0.5033820590275159, |
| "learning_rate": 4.774718397997497e-05, |
| "loss": 0.453, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.42318514350028136, |
| "grad_norm": 0.4218904797929267, |
| "learning_rate": 4.7726324572382145e-05, |
| "loss": 0.4936, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.42431063590320767, |
| "grad_norm": 0.41845341895601695, |
| "learning_rate": 4.770546516478932e-05, |
| "loss": 0.4371, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.4254361283061339, |
| "grad_norm": 0.38945322748226296, |
| "learning_rate": 4.76846057571965e-05, |
| "loss": 0.4391, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.42656162070906023, |
| "grad_norm": 0.4161185110299941, |
| "learning_rate": 4.766374634960367e-05, |
| "loss": 0.4485, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.4276871131119865, |
| "grad_norm": 0.3864477310593941, |
| "learning_rate": 4.764288694201085e-05, |
| "loss": 0.4543, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4288126055149128, |
| "grad_norm": 0.48714053443872946, |
| "learning_rate": 4.762202753441802e-05, |
| "loss": 0.4809, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.42993809791783905, |
| "grad_norm": 0.44027599652018634, |
| "learning_rate": 4.76011681268252e-05, |
| "loss": 0.4664, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.43106359032076536, |
| "grad_norm": 0.46260149439173776, |
| "learning_rate": 4.758030871923238e-05, |
| "loss": 0.4358, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.4321890827236916, |
| "grad_norm": 0.37340303441017136, |
| "learning_rate": 4.7559449311639554e-05, |
| "loss": 0.4444, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.4333145751266179, |
| "grad_norm": 0.4414988549473453, |
| "learning_rate": 4.7538589904046724e-05, |
| "loss": 0.4338, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.43444006752954417, |
| "grad_norm": 0.4002550060272223, |
| "learning_rate": 4.751773049645391e-05, |
| "loss": 0.4494, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.4355655599324705, |
| "grad_norm": 0.4158146887262931, |
| "learning_rate": 4.749687108886108e-05, |
| "loss": 0.4454, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.43669105233539673, |
| "grad_norm": 0.35977608941282263, |
| "learning_rate": 4.7476011681268255e-05, |
| "loss": 0.4533, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.43781654473832304, |
| "grad_norm": 0.4764697673218214, |
| "learning_rate": 4.745515227367543e-05, |
| "loss": 0.4835, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.4389420371412493, |
| "grad_norm": 0.35081968018481574, |
| "learning_rate": 4.743429286608261e-05, |
| "loss": 0.4579, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.44006752954417555, |
| "grad_norm": 0.4170219497175011, |
| "learning_rate": 4.741343345848978e-05, |
| "loss": 0.4398, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.44119302194710186, |
| "grad_norm": 0.44456892241843987, |
| "learning_rate": 4.739257405089696e-05, |
| "loss": 0.4805, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.4423185143500281, |
| "grad_norm": 0.3810686961824828, |
| "learning_rate": 4.737171464330413e-05, |
| "loss": 0.4687, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.4434440067529544, |
| "grad_norm": 0.4459516182156416, |
| "learning_rate": 4.73508552357113e-05, |
| "loss": 0.4441, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.4445694991558807, |
| "grad_norm": 0.36574072948327524, |
| "learning_rate": 4.732999582811849e-05, |
| "loss": 0.4759, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.445694991558807, |
| "grad_norm": 0.46519670122225776, |
| "learning_rate": 4.730913642052566e-05, |
| "loss": 0.4622, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.44682048396173324, |
| "grad_norm": 0.3782284810519757, |
| "learning_rate": 4.7288277012932834e-05, |
| "loss": 0.4518, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.44794597636465955, |
| "grad_norm": 0.4321697226823169, |
| "learning_rate": 4.726741760534001e-05, |
| "loss": 0.4213, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.4490714687675858, |
| "grad_norm": 0.3846389059595841, |
| "learning_rate": 4.724655819774719e-05, |
| "loss": 0.4473, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.4501969611705121, |
| "grad_norm": 0.4148349323542458, |
| "learning_rate": 4.722569879015436e-05, |
| "loss": 0.447, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.45132245357343836, |
| "grad_norm": 0.3987423433461808, |
| "learning_rate": 4.720483938256154e-05, |
| "loss": 0.4428, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.4524479459763647, |
| "grad_norm": 0.42246987876628445, |
| "learning_rate": 4.718397997496871e-05, |
| "loss": 0.4456, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4535734383792909, |
| "grad_norm": 0.4060448399568812, |
| "learning_rate": 4.716312056737589e-05, |
| "loss": 0.4734, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.45469893078221724, |
| "grad_norm": 0.38939419691921573, |
| "learning_rate": 4.7142261159783066e-05, |
| "loss": 0.4506, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.4558244231851435, |
| "grad_norm": 0.39441558158161155, |
| "learning_rate": 4.712140175219024e-05, |
| "loss": 0.4611, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.4569499155880698, |
| "grad_norm": 0.37043790127930454, |
| "learning_rate": 4.7100542344597413e-05, |
| "loss": 0.4446, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.45807540799099605, |
| "grad_norm": 0.39081323070794516, |
| "learning_rate": 4.707968293700459e-05, |
| "loss": 0.4586, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.45920090039392236, |
| "grad_norm": 0.38815613346341743, |
| "learning_rate": 4.705882352941177e-05, |
| "loss": 0.4731, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.4603263927968486, |
| "grad_norm": 0.4081757852974463, |
| "learning_rate": 4.7037964121818944e-05, |
| "loss": 0.4392, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.4614518851997749, |
| "grad_norm": 0.3789626983185206, |
| "learning_rate": 4.7017104714226115e-05, |
| "loss": 0.4561, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4625773776027012, |
| "grad_norm": 0.5000455667230893, |
| "learning_rate": 4.69962453066333e-05, |
| "loss": 0.4642, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.4637028700056275, |
| "grad_norm": 0.3422337438589666, |
| "learning_rate": 4.697538589904047e-05, |
| "loss": 0.4592, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.46482836240855374, |
| "grad_norm": 0.5638947084171662, |
| "learning_rate": 4.6954526491447646e-05, |
| "loss": 0.451, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.46595385481148, |
| "grad_norm": 0.38536737227105394, |
| "learning_rate": 4.693366708385482e-05, |
| "loss": 0.4386, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.4670793472144063, |
| "grad_norm": 0.46615900085704925, |
| "learning_rate": 4.691280767626199e-05, |
| "loss": 0.4572, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.46820483961733256, |
| "grad_norm": 0.45954601145736806, |
| "learning_rate": 4.689194826866917e-05, |
| "loss": 0.4745, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.46933033202025887, |
| "grad_norm": 0.3925870159696147, |
| "learning_rate": 4.687108886107635e-05, |
| "loss": 0.4383, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.4704558244231851, |
| "grad_norm": 0.4232172013685177, |
| "learning_rate": 4.6850229453483524e-05, |
| "loss": 0.4455, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.47158131682611143, |
| "grad_norm": 0.4709258500108095, |
| "learning_rate": 4.6829370045890694e-05, |
| "loss": 0.4329, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.4727068092290377, |
| "grad_norm": 0.5478084541084817, |
| "learning_rate": 4.680851063829788e-05, |
| "loss": 0.4778, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.473832301631964, |
| "grad_norm": 0.39060968027365583, |
| "learning_rate": 4.678765123070505e-05, |
| "loss": 0.446, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.47495779403489025, |
| "grad_norm": 0.43252322301543766, |
| "learning_rate": 4.6766791823112225e-05, |
| "loss": 0.4606, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.47608328643781656, |
| "grad_norm": 0.48537861169690405, |
| "learning_rate": 4.67459324155194e-05, |
| "loss": 0.4845, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.4772087788407428, |
| "grad_norm": 0.34601404275255593, |
| "learning_rate": 4.672507300792658e-05, |
| "loss": 0.4357, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.4783342712436691, |
| "grad_norm": 0.42339913946057167, |
| "learning_rate": 4.670421360033375e-05, |
| "loss": 0.4421, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.47945976364659537, |
| "grad_norm": 0.39857659754496044, |
| "learning_rate": 4.668335419274093e-05, |
| "loss": 0.4448, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.4805852560495217, |
| "grad_norm": 0.38982322860737306, |
| "learning_rate": 4.66624947851481e-05, |
| "loss": 0.4449, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.48171074845244793, |
| "grad_norm": 0.4167533082716713, |
| "learning_rate": 4.664163537755528e-05, |
| "loss": 0.4538, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.48283624085537424, |
| "grad_norm": 0.38396785885791673, |
| "learning_rate": 4.662077596996246e-05, |
| "loss": 0.4665, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.4839617332583005, |
| "grad_norm": 0.4460443959564988, |
| "learning_rate": 4.6599916562369634e-05, |
| "loss": 0.4695, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4850872256612268, |
| "grad_norm": 0.4307496077176856, |
| "learning_rate": 4.6579057154776804e-05, |
| "loss": 0.479, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.48621271806415306, |
| "grad_norm": 0.4703944597323029, |
| "learning_rate": 4.655819774718399e-05, |
| "loss": 0.4616, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.48733821046707937, |
| "grad_norm": 0.4532939669627873, |
| "learning_rate": 4.653733833959116e-05, |
| "loss": 0.4386, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.4884637028700056, |
| "grad_norm": 0.38992923384312006, |
| "learning_rate": 4.651647893199833e-05, |
| "loss": 0.4403, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.48958919527293193, |
| "grad_norm": 0.41316331078388, |
| "learning_rate": 4.649561952440551e-05, |
| "loss": 0.431, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.4907146876758582, |
| "grad_norm": 0.36589748301197256, |
| "learning_rate": 4.647476011681268e-05, |
| "loss": 0.4487, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.4918401800787845, |
| "grad_norm": 0.4790306414346754, |
| "learning_rate": 4.645390070921986e-05, |
| "loss": 0.465, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.49296567248171075, |
| "grad_norm": 0.3884802942940033, |
| "learning_rate": 4.6433041301627036e-05, |
| "loss": 0.4488, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.494091164884637, |
| "grad_norm": 0.43931802766911515, |
| "learning_rate": 4.641218189403421e-05, |
| "loss": 0.4844, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.4952166572875633, |
| "grad_norm": 0.35209964530255544, |
| "learning_rate": 4.6391322486441383e-05, |
| "loss": 0.4442, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.49634214969048956, |
| "grad_norm": 0.38004709563408534, |
| "learning_rate": 4.637046307884857e-05, |
| "loss": 0.4753, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.4974676420934159, |
| "grad_norm": 0.3409798351543027, |
| "learning_rate": 4.634960367125574e-05, |
| "loss": 0.4342, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.4985931344963421, |
| "grad_norm": 0.39326837683822974, |
| "learning_rate": 4.6328744263662914e-05, |
| "loss": 0.4539, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.49971862689926844, |
| "grad_norm": 0.34187980768631865, |
| "learning_rate": 4.630788485607009e-05, |
| "loss": 0.4551, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.5008441193021947, |
| "grad_norm": 0.3788406979843315, |
| "learning_rate": 4.628702544847727e-05, |
| "loss": 0.4536, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.501969611705121, |
| "grad_norm": 0.37725539074157804, |
| "learning_rate": 4.626616604088444e-05, |
| "loss": 0.4391, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.5030951041080473, |
| "grad_norm": 0.3294085025027009, |
| "learning_rate": 4.6245306633291616e-05, |
| "loss": 0.4702, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.5042205965109735, |
| "grad_norm": 0.33534954747479645, |
| "learning_rate": 4.622444722569879e-05, |
| "loss": 0.437, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.5053460889138999, |
| "grad_norm": 0.4077362372846078, |
| "learning_rate": 4.620358781810597e-05, |
| "loss": 0.4335, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.5064715813168261, |
| "grad_norm": 0.39599735165417416, |
| "learning_rate": 4.618272841051314e-05, |
| "loss": 0.4428, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5075970737197524, |
| "grad_norm": 0.3481976377046397, |
| "learning_rate": 4.6161869002920323e-05, |
| "loss": 0.4344, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.5087225661226786, |
| "grad_norm": 0.35576918023033427, |
| "learning_rate": 4.6141009595327494e-05, |
| "loss": 0.4343, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.509848058525605, |
| "grad_norm": 0.4458349270928396, |
| "learning_rate": 4.612015018773467e-05, |
| "loss": 0.4418, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.5109735509285313, |
| "grad_norm": 0.39940538094885114, |
| "learning_rate": 4.609929078014185e-05, |
| "loss": 0.4466, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5120990433314575, |
| "grad_norm": 0.401792941347298, |
| "learning_rate": 4.607843137254902e-05, |
| "loss": 0.4317, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.5132245357343838, |
| "grad_norm": 0.3570336540962956, |
| "learning_rate": 4.6057571964956195e-05, |
| "loss": 0.4116, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.5143500281373101, |
| "grad_norm": 0.4132726931400482, |
| "learning_rate": 4.603671255736337e-05, |
| "loss": 0.4451, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.5154755205402364, |
| "grad_norm": 0.3450781738437834, |
| "learning_rate": 4.601585314977055e-05, |
| "loss": 0.4245, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5166010129431626, |
| "grad_norm": 0.4044295769667828, |
| "learning_rate": 4.599499374217772e-05, |
| "loss": 0.4261, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.5177265053460889, |
| "grad_norm": 0.4460134360979799, |
| "learning_rate": 4.59741343345849e-05, |
| "loss": 0.4569, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5188519977490152, |
| "grad_norm": 0.35709408492200145, |
| "learning_rate": 4.595327492699207e-05, |
| "loss": 0.447, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.5199774901519415, |
| "grad_norm": 0.49622852545171614, |
| "learning_rate": 4.593241551939925e-05, |
| "loss": 0.4482, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5211029825548678, |
| "grad_norm": 0.43774205674931815, |
| "learning_rate": 4.591155611180643e-05, |
| "loss": 0.4447, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.522228474957794, |
| "grad_norm": 0.4071440395299347, |
| "learning_rate": 4.5890696704213604e-05, |
| "loss": 0.428, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5233539673607203, |
| "grad_norm": 0.42816990064501337, |
| "learning_rate": 4.5869837296620774e-05, |
| "loss": 0.4264, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5244794597636466, |
| "grad_norm": 0.4003821811209746, |
| "learning_rate": 4.584897788902796e-05, |
| "loss": 0.4502, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5256049521665729, |
| "grad_norm": 0.4306225774416482, |
| "learning_rate": 4.582811848143513e-05, |
| "loss": 0.448, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.5267304445694991, |
| "grad_norm": 0.438000243330178, |
| "learning_rate": 4.5807259073842305e-05, |
| "loss": 0.4461, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5278559369724254, |
| "grad_norm": 0.5459912564891531, |
| "learning_rate": 4.578639966624948e-05, |
| "loss": 0.4454, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5289814293753518, |
| "grad_norm": 0.3937540607846447, |
| "learning_rate": 4.576554025865666e-05, |
| "loss": 0.4511, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.530106921778278, |
| "grad_norm": 0.5255901368328048, |
| "learning_rate": 4.574468085106383e-05, |
| "loss": 0.4656, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5312324141812043, |
| "grad_norm": 0.37290888598540667, |
| "learning_rate": 4.572382144347101e-05, |
| "loss": 0.4346, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5323579065841305, |
| "grad_norm": 0.5151271720318875, |
| "learning_rate": 4.570296203587818e-05, |
| "loss": 0.4386, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.5334833989870569, |
| "grad_norm": 0.5196455814853196, |
| "learning_rate": 4.568210262828536e-05, |
| "loss": 0.4281, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5346088913899831, |
| "grad_norm": 0.5516694216088329, |
| "learning_rate": 4.566124322069254e-05, |
| "loss": 0.4678, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5357343837929094, |
| "grad_norm": 0.40935239231865317, |
| "learning_rate": 4.564038381309971e-05, |
| "loss": 0.4392, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5368598761958356, |
| "grad_norm": 0.4232251188780467, |
| "learning_rate": 4.5619524405506884e-05, |
| "loss": 0.4541, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.537985368598762, |
| "grad_norm": 0.47065714592515695, |
| "learning_rate": 4.559866499791406e-05, |
| "loss": 0.4573, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5391108610016883, |
| "grad_norm": 0.45139662325934604, |
| "learning_rate": 4.557780559032124e-05, |
| "loss": 0.466, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5402363534046145, |
| "grad_norm": 0.43277954798040297, |
| "learning_rate": 4.555694618272841e-05, |
| "loss": 0.4395, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5413618458075408, |
| "grad_norm": 0.3937402339467187, |
| "learning_rate": 4.553608677513559e-05, |
| "loss": 0.4598, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.5424873382104671, |
| "grad_norm": 0.43149504891962365, |
| "learning_rate": 4.551522736754276e-05, |
| "loss": 0.4292, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5436128306133934, |
| "grad_norm": 0.3833426447527127, |
| "learning_rate": 4.549436795994994e-05, |
| "loss": 0.4462, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.5447383230163196, |
| "grad_norm": 0.5753891830767618, |
| "learning_rate": 4.5473508552357116e-05, |
| "loss": 0.4674, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5458638154192459, |
| "grad_norm": 0.37095342775133894, |
| "learning_rate": 4.5452649144764293e-05, |
| "loss": 0.4502, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5469893078221723, |
| "grad_norm": 0.44452090514956777, |
| "learning_rate": 4.5431789737171464e-05, |
| "loss": 0.4195, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5481148002250985, |
| "grad_norm": 0.39266318915026655, |
| "learning_rate": 4.541093032957864e-05, |
| "loss": 0.4308, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.5492402926280248, |
| "grad_norm": 0.38579575811998595, |
| "learning_rate": 4.539007092198582e-05, |
| "loss": 0.4732, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.550365785030951, |
| "grad_norm": 0.3927716846528752, |
| "learning_rate": 4.5369211514392995e-05, |
| "loss": 0.4554, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.5514912774338773, |
| "grad_norm": 0.3518677438969378, |
| "learning_rate": 4.5348352106800165e-05, |
| "loss": 0.435, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5526167698368036, |
| "grad_norm": 0.3989470078182982, |
| "learning_rate": 4.532749269920735e-05, |
| "loss": 0.4581, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5537422622397299, |
| "grad_norm": 0.31132596342015495, |
| "learning_rate": 4.530663329161452e-05, |
| "loss": 0.4166, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.5548677546426561, |
| "grad_norm": 0.42773832735938333, |
| "learning_rate": 4.5285773884021696e-05, |
| "loss": 0.4498, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.5559932470455824, |
| "grad_norm": 0.3337455720428052, |
| "learning_rate": 4.526491447642887e-05, |
| "loss": 0.4959, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.5571187394485088, |
| "grad_norm": 0.3784028479466481, |
| "learning_rate": 4.524405506883605e-05, |
| "loss": 0.4528, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.558244231851435, |
| "grad_norm": 0.3649036934635355, |
| "learning_rate": 4.522319566124322e-05, |
| "loss": 0.4382, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.5593697242543613, |
| "grad_norm": 0.37624738124672374, |
| "learning_rate": 4.52023362536504e-05, |
| "loss": 0.444, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.5604952166572875, |
| "grad_norm": 0.41375280657115326, |
| "learning_rate": 4.5181476846057574e-05, |
| "loss": 0.445, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.5616207090602139, |
| "grad_norm": 0.4473059694404265, |
| "learning_rate": 4.5160617438464744e-05, |
| "loss": 0.425, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.5627462014631401, |
| "grad_norm": 0.37225084914483775, |
| "learning_rate": 4.513975803087193e-05, |
| "loss": 0.441, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5638716938660664, |
| "grad_norm": 0.3940588853331884, |
| "learning_rate": 4.51188986232791e-05, |
| "loss": 0.4466, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.5649971862689926, |
| "grad_norm": 0.3470104737718654, |
| "learning_rate": 4.5098039215686275e-05, |
| "loss": 0.4474, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.566122678671919, |
| "grad_norm": 0.4164834773144051, |
| "learning_rate": 4.507717980809345e-05, |
| "loss": 0.4442, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.5672481710748453, |
| "grad_norm": 0.3652420299854053, |
| "learning_rate": 4.505632040050063e-05, |
| "loss": 0.4436, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5683736634777715, |
| "grad_norm": 0.4103075119748004, |
| "learning_rate": 4.50354609929078e-05, |
| "loss": 0.4459, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5694991558806978, |
| "grad_norm": 0.39102170524673335, |
| "learning_rate": 4.501460158531498e-05, |
| "loss": 0.4268, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.5706246482836241, |
| "grad_norm": 0.4942727267066722, |
| "learning_rate": 4.499374217772215e-05, |
| "loss": 0.4722, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.5717501406865504, |
| "grad_norm": 0.3465319015459766, |
| "learning_rate": 4.497288277012933e-05, |
| "loss": 0.4408, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.5728756330894766, |
| "grad_norm": 0.4074806411985911, |
| "learning_rate": 4.495202336253651e-05, |
| "loss": 0.4212, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.5740011254924029, |
| "grad_norm": 0.38192085376045243, |
| "learning_rate": 4.4931163954943684e-05, |
| "loss": 0.41, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5751266178953293, |
| "grad_norm": 0.3702590158057979, |
| "learning_rate": 4.4910304547350854e-05, |
| "loss": 0.4255, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5762521102982555, |
| "grad_norm": 0.3436403538534127, |
| "learning_rate": 4.488944513975804e-05, |
| "loss": 0.4538, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5773776027011818, |
| "grad_norm": 0.3877342893162592, |
| "learning_rate": 4.486858573216521e-05, |
| "loss": 0.4182, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.578503095104108, |
| "grad_norm": 0.3460201187876074, |
| "learning_rate": 4.4847726324572385e-05, |
| "loss": 0.4432, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5796285875070343, |
| "grad_norm": 0.34511398785310915, |
| "learning_rate": 4.482686691697956e-05, |
| "loss": 0.4469, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5807540799099606, |
| "grad_norm": 0.4258487344474797, |
| "learning_rate": 4.480600750938674e-05, |
| "loss": 0.4583, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5818795723128869, |
| "grad_norm": 0.36803297271961477, |
| "learning_rate": 4.478514810179391e-05, |
| "loss": 0.4428, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5830050647158131, |
| "grad_norm": 0.46401852203645827, |
| "learning_rate": 4.4764288694201086e-05, |
| "loss": 0.4456, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5841305571187394, |
| "grad_norm": 0.39205048802946624, |
| "learning_rate": 4.4743429286608263e-05, |
| "loss": 0.441, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.5852560495216658, |
| "grad_norm": 0.39757611365031714, |
| "learning_rate": 4.4722569879015434e-05, |
| "loss": 0.4428, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.586381541924592, |
| "grad_norm": 0.3647536671953435, |
| "learning_rate": 4.470171047142262e-05, |
| "loss": 0.4513, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5875070343275183, |
| "grad_norm": 0.39429072510874175, |
| "learning_rate": 4.468085106382979e-05, |
| "loss": 0.4132, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.5886325267304445, |
| "grad_norm": 0.40901642747342404, |
| "learning_rate": 4.4659991656236965e-05, |
| "loss": 0.4481, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.5897580191333709, |
| "grad_norm": 0.3992749524524198, |
| "learning_rate": 4.463913224864414e-05, |
| "loss": 0.4468, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.5908835115362971, |
| "grad_norm": 0.4722275927889856, |
| "learning_rate": 4.461827284105132e-05, |
| "loss": 0.4428, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.5920090039392234, |
| "grad_norm": 0.42866183958875864, |
| "learning_rate": 4.459741343345849e-05, |
| "loss": 0.4194, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.5931344963421497, |
| "grad_norm": 0.38204868156886707, |
| "learning_rate": 4.4576554025865666e-05, |
| "loss": 0.4402, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.594259988745076, |
| "grad_norm": 0.35148215802167393, |
| "learning_rate": 4.455569461827284e-05, |
| "loss": 0.4542, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.5953854811480023, |
| "grad_norm": 0.40153400690617524, |
| "learning_rate": 4.453483521068002e-05, |
| "loss": 0.4102, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.5965109735509285, |
| "grad_norm": 0.46986899886821576, |
| "learning_rate": 4.45139758030872e-05, |
| "loss": 0.4436, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.5976364659538548, |
| "grad_norm": 0.35390475462960685, |
| "learning_rate": 4.4493116395494374e-05, |
| "loss": 0.4398, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.5987619583567811, |
| "grad_norm": 0.4482185977258061, |
| "learning_rate": 4.4472256987901544e-05, |
| "loss": 0.4326, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.5998874507597074, |
| "grad_norm": 0.44232865264761434, |
| "learning_rate": 4.445139758030872e-05, |
| "loss": 0.4325, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.6010129431626337, |
| "grad_norm": 0.4183843016810463, |
| "learning_rate": 4.44305381727159e-05, |
| "loss": 0.4553, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.6021384355655599, |
| "grad_norm": 0.4242250812536985, |
| "learning_rate": 4.4409678765123075e-05, |
| "loss": 0.4232, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.6032639279684862, |
| "grad_norm": 0.3888142076123292, |
| "learning_rate": 4.4388819357530245e-05, |
| "loss": 0.4241, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.6043894203714125, |
| "grad_norm": 0.40486855004609845, |
| "learning_rate": 4.436795994993743e-05, |
| "loss": 0.4191, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.6055149127743388, |
| "grad_norm": 0.47154131963320084, |
| "learning_rate": 4.43471005423446e-05, |
| "loss": 0.4595, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.606640405177265, |
| "grad_norm": 0.38490507840256083, |
| "learning_rate": 4.432624113475177e-05, |
| "loss": 0.4199, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.6077658975801913, |
| "grad_norm": 0.46096486506497264, |
| "learning_rate": 4.430538172715895e-05, |
| "loss": 0.4448, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6088913899831176, |
| "grad_norm": 0.4947895759240074, |
| "learning_rate": 4.428452231956612e-05, |
| "loss": 0.4342, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.6100168823860439, |
| "grad_norm": 0.3829854963511767, |
| "learning_rate": 4.42636629119733e-05, |
| "loss": 0.4186, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.6111423747889702, |
| "grad_norm": 0.6245507343869451, |
| "learning_rate": 4.424280350438048e-05, |
| "loss": 0.441, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.6122678671918964, |
| "grad_norm": 0.5300235385565563, |
| "learning_rate": 4.4221944096787654e-05, |
| "loss": 0.4375, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6133933595948228, |
| "grad_norm": 0.4930881980261961, |
| "learning_rate": 4.4201084689194824e-05, |
| "loss": 0.4621, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.614518851997749, |
| "grad_norm": 0.5638424830870375, |
| "learning_rate": 4.418022528160201e-05, |
| "loss": 0.4411, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.6156443444006753, |
| "grad_norm": 0.3716115037856444, |
| "learning_rate": 4.415936587400918e-05, |
| "loss": 0.4528, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.6167698368036015, |
| "grad_norm": 0.5223401927324024, |
| "learning_rate": 4.4138506466416355e-05, |
| "loss": 0.4327, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6178953292065279, |
| "grad_norm": 0.37311721165933265, |
| "learning_rate": 4.411764705882353e-05, |
| "loss": 0.4058, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.6190208216094542, |
| "grad_norm": 0.532332931429002, |
| "learning_rate": 4.409678765123071e-05, |
| "loss": 0.4445, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6201463140123804, |
| "grad_norm": 0.5059754011866813, |
| "learning_rate": 4.407592824363788e-05, |
| "loss": 0.4257, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.6212718064153067, |
| "grad_norm": 0.4904818838015066, |
| "learning_rate": 4.405506883604506e-05, |
| "loss": 0.4353, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.622397298818233, |
| "grad_norm": 0.6200335434273374, |
| "learning_rate": 4.4034209428452233e-05, |
| "loss": 0.4416, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.6235227912211593, |
| "grad_norm": 0.3199203022808196, |
| "learning_rate": 4.401335002085941e-05, |
| "loss": 0.4355, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.6246482836240855, |
| "grad_norm": 0.5681807784529108, |
| "learning_rate": 4.399249061326659e-05, |
| "loss": 0.431, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.6257737760270118, |
| "grad_norm": 0.3995337627796738, |
| "learning_rate": 4.3971631205673764e-05, |
| "loss": 0.4312, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6268992684299382, |
| "grad_norm": 0.5466993132659691, |
| "learning_rate": 4.3950771798080935e-05, |
| "loss": 0.4311, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.6280247608328644, |
| "grad_norm": 0.5670240814298136, |
| "learning_rate": 4.392991239048811e-05, |
| "loss": 0.4564, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6291502532357907, |
| "grad_norm": 0.47107566738859724, |
| "learning_rate": 4.390905298289529e-05, |
| "loss": 0.4436, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.6302757456387169, |
| "grad_norm": 0.5380491861675493, |
| "learning_rate": 4.388819357530246e-05, |
| "loss": 0.4024, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6314012380416432, |
| "grad_norm": 0.37407644137036594, |
| "learning_rate": 4.386733416770964e-05, |
| "loss": 0.4276, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6325267304445695, |
| "grad_norm": 0.5179459476960132, |
| "learning_rate": 4.384647476011681e-05, |
| "loss": 0.4231, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6336522228474958, |
| "grad_norm": 0.3832305989594554, |
| "learning_rate": 4.382561535252399e-05, |
| "loss": 0.4254, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.634777715250422, |
| "grad_norm": 0.48824132268901227, |
| "learning_rate": 4.380475594493117e-05, |
| "loss": 0.4414, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6359032076533483, |
| "grad_norm": 0.45846104242587143, |
| "learning_rate": 4.3783896537338344e-05, |
| "loss": 0.4374, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.6370287000562747, |
| "grad_norm": 0.5017380646906237, |
| "learning_rate": 4.3763037129745514e-05, |
| "loss": 0.4478, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6381541924592009, |
| "grad_norm": 0.4706523687823463, |
| "learning_rate": 4.374217772215269e-05, |
| "loss": 0.4393, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.6392796848621272, |
| "grad_norm": 0.43746034371341663, |
| "learning_rate": 4.372131831455987e-05, |
| "loss": 0.4289, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.6404051772650534, |
| "grad_norm": 0.4971311348473273, |
| "learning_rate": 4.3700458906967045e-05, |
| "loss": 0.4632, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.6415306696679798, |
| "grad_norm": 0.32424868625443787, |
| "learning_rate": 4.367959949937422e-05, |
| "loss": 0.4439, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.642656162070906, |
| "grad_norm": 0.5530000470387829, |
| "learning_rate": 4.36587400917814e-05, |
| "loss": 0.4438, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.6437816544738323, |
| "grad_norm": 0.3619983421314401, |
| "learning_rate": 4.363788068418857e-05, |
| "loss": 0.4193, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6449071468767585, |
| "grad_norm": 0.46202193194933755, |
| "learning_rate": 4.3617021276595746e-05, |
| "loss": 0.4308, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.6460326392796849, |
| "grad_norm": 0.4798799400653708, |
| "learning_rate": 4.359616186900292e-05, |
| "loss": 0.4072, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6471581316826112, |
| "grad_norm": 0.42761886423074474, |
| "learning_rate": 4.35753024614101e-05, |
| "loss": 0.4357, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6482836240855374, |
| "grad_norm": 0.4906300910854437, |
| "learning_rate": 4.355444305381727e-05, |
| "loss": 0.441, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.6494091164884637, |
| "grad_norm": 0.4312074811449326, |
| "learning_rate": 4.3533583646224454e-05, |
| "loss": 0.468, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.65053460889139, |
| "grad_norm": 0.4999437976070137, |
| "learning_rate": 4.3512724238631624e-05, |
| "loss": 0.4442, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6516601012943163, |
| "grad_norm": 0.45200142374904256, |
| "learning_rate": 4.34918648310388e-05, |
| "loss": 0.4351, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.6527855936972425, |
| "grad_norm": 0.4481417460480344, |
| "learning_rate": 4.347100542344598e-05, |
| "loss": 0.4351, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6539110861001688, |
| "grad_norm": 0.416680484799885, |
| "learning_rate": 4.345014601585315e-05, |
| "loss": 0.4726, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.6550365785030952, |
| "grad_norm": 0.45466741269285743, |
| "learning_rate": 4.3429286608260325e-05, |
| "loss": 0.4445, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.6561620709060214, |
| "grad_norm": 0.3767132482639794, |
| "learning_rate": 4.34084272006675e-05, |
| "loss": 0.4494, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.6572875633089477, |
| "grad_norm": 0.4045713565741537, |
| "learning_rate": 4.338756779307468e-05, |
| "loss": 0.4478, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.6584130557118739, |
| "grad_norm": 0.41406546702832436, |
| "learning_rate": 4.336670838548185e-05, |
| "loss": 0.4296, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.6595385481148002, |
| "grad_norm": 0.45192122020443987, |
| "learning_rate": 4.334584897788903e-05, |
| "loss": 0.4548, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.6606640405177265, |
| "grad_norm": 0.42522165235824544, |
| "learning_rate": 4.3324989570296203e-05, |
| "loss": 0.4545, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.6617895329206528, |
| "grad_norm": 0.4025019554306989, |
| "learning_rate": 4.330413016270338e-05, |
| "loss": 0.427, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.662915025323579, |
| "grad_norm": 0.40092550396367915, |
| "learning_rate": 4.328327075511056e-05, |
| "loss": 0.4357, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.6640405177265053, |
| "grad_norm": 0.4029073566780126, |
| "learning_rate": 4.3262411347517734e-05, |
| "loss": 0.4437, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6651660101294317, |
| "grad_norm": 0.3754421567640776, |
| "learning_rate": 4.3241551939924905e-05, |
| "loss": 0.4488, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.6662915025323579, |
| "grad_norm": 0.4093131149759515, |
| "learning_rate": 4.322069253233209e-05, |
| "loss": 0.4296, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6674169949352842, |
| "grad_norm": 0.37396980661829454, |
| "learning_rate": 4.319983312473926e-05, |
| "loss": 0.4135, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.6685424873382104, |
| "grad_norm": 0.39676170583430237, |
| "learning_rate": 4.3178973717146436e-05, |
| "loss": 0.4407, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6696679797411368, |
| "grad_norm": 0.3324304115520877, |
| "learning_rate": 4.315811430955361e-05, |
| "loss": 0.4272, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.670793472144063, |
| "grad_norm": 0.41321744590045745, |
| "learning_rate": 4.313725490196079e-05, |
| "loss": 0.4535, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6719189645469893, |
| "grad_norm": 0.37423186701221084, |
| "learning_rate": 4.311639549436796e-05, |
| "loss": 0.4243, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.6730444569499155, |
| "grad_norm": 0.34707644350816663, |
| "learning_rate": 4.309553608677514e-05, |
| "loss": 0.4224, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6741699493528419, |
| "grad_norm": 0.39162388441219653, |
| "learning_rate": 4.3074676679182314e-05, |
| "loss": 0.4117, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.6752954417557682, |
| "grad_norm": 0.3757134091896751, |
| "learning_rate": 4.305381727158949e-05, |
| "loss": 0.4372, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6764209341586944, |
| "grad_norm": 0.486157183762819, |
| "learning_rate": 4.303295786399667e-05, |
| "loss": 0.4487, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6775464265616207, |
| "grad_norm": 0.34615222028756854, |
| "learning_rate": 4.301209845640384e-05, |
| "loss": 0.438, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.678671918964547, |
| "grad_norm": 0.4148015924613456, |
| "learning_rate": 4.2991239048811015e-05, |
| "loss": 0.4545, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6797974113674733, |
| "grad_norm": 0.3870669252002002, |
| "learning_rate": 4.297037964121819e-05, |
| "loss": 0.4078, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6809229037703995, |
| "grad_norm": 0.31630147919989027, |
| "learning_rate": 4.294952023362537e-05, |
| "loss": 0.4179, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6820483961733258, |
| "grad_norm": 0.4078672238404797, |
| "learning_rate": 4.292866082603254e-05, |
| "loss": 0.4363, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6831738885762522, |
| "grad_norm": 0.38181818903469905, |
| "learning_rate": 4.2907801418439716e-05, |
| "loss": 0.4387, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.6842993809791784, |
| "grad_norm": 0.40887483819289494, |
| "learning_rate": 4.288694201084689e-05, |
| "loss": 0.4279, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6854248733821047, |
| "grad_norm": 0.45835023477255316, |
| "learning_rate": 4.286608260325407e-05, |
| "loss": 0.4553, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.6865503657850309, |
| "grad_norm": 0.4496240755238681, |
| "learning_rate": 4.284522319566125e-05, |
| "loss": 0.4511, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.6876758581879572, |
| "grad_norm": 0.47923459811565877, |
| "learning_rate": 4.2824363788068424e-05, |
| "loss": 0.4494, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.6888013505908835, |
| "grad_norm": 0.4563499971704832, |
| "learning_rate": 4.2803504380475594e-05, |
| "loss": 0.4498, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.6899268429938098, |
| "grad_norm": 0.4658484510143094, |
| "learning_rate": 4.278264497288277e-05, |
| "loss": 0.446, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.691052335396736, |
| "grad_norm": 0.40099697936257683, |
| "learning_rate": 4.276178556528995e-05, |
| "loss": 0.4138, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.6921778277996623, |
| "grad_norm": 0.40681610293383885, |
| "learning_rate": 4.2740926157697125e-05, |
| "loss": 0.4428, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.6933033202025887, |
| "grad_norm": 0.492856289321406, |
| "learning_rate": 4.2720066750104295e-05, |
| "loss": 0.429, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.6944288126055149, |
| "grad_norm": 0.40198116454411964, |
| "learning_rate": 4.269920734251148e-05, |
| "loss": 0.4319, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.6955543050084412, |
| "grad_norm": 0.4049661414838683, |
| "learning_rate": 4.267834793491865e-05, |
| "loss": 0.4371, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.6966797974113674, |
| "grad_norm": 0.4200912676835283, |
| "learning_rate": 4.2657488527325826e-05, |
| "loss": 0.4273, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.6978052898142938, |
| "grad_norm": 0.3579260644405867, |
| "learning_rate": 4.2636629119733e-05, |
| "loss": 0.436, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.69893078221722, |
| "grad_norm": 0.41261145773033614, |
| "learning_rate": 4.261576971214018e-05, |
| "loss": 0.4355, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.7000562746201463, |
| "grad_norm": 0.38195673870959623, |
| "learning_rate": 4.259491030454735e-05, |
| "loss": 0.4407, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.7011817670230726, |
| "grad_norm": 0.47251318617526117, |
| "learning_rate": 4.257405089695453e-05, |
| "loss": 0.4434, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.7023072594259989, |
| "grad_norm": 0.413024502756469, |
| "learning_rate": 4.2553191489361704e-05, |
| "loss": 0.4228, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.7034327518289252, |
| "grad_norm": 0.4129659836054336, |
| "learning_rate": 4.2532332081768875e-05, |
| "loss": 0.4298, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.7045582442318514, |
| "grad_norm": 0.4371192692750543, |
| "learning_rate": 4.251147267417606e-05, |
| "loss": 0.422, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.7056837366347777, |
| "grad_norm": 0.3209464880480147, |
| "learning_rate": 4.249061326658323e-05, |
| "loss": 0.4159, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.706809229037704, |
| "grad_norm": 0.38213551742408286, |
| "learning_rate": 4.2469753858990406e-05, |
| "loss": 0.4651, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.7079347214406303, |
| "grad_norm": 0.37077014672780895, |
| "learning_rate": 4.244889445139758e-05, |
| "loss": 0.428, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.7090602138435566, |
| "grad_norm": 0.37388919361570394, |
| "learning_rate": 4.242803504380476e-05, |
| "loss": 0.4487, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7101857062464828, |
| "grad_norm": 0.355919224811824, |
| "learning_rate": 4.240717563621193e-05, |
| "loss": 0.4273, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.7113111986494092, |
| "grad_norm": 0.3479874917806637, |
| "learning_rate": 4.2386316228619114e-05, |
| "loss": 0.4298, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.7124366910523354, |
| "grad_norm": 0.39097161117850043, |
| "learning_rate": 4.2365456821026284e-05, |
| "loss": 0.4251, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.7135621834552617, |
| "grad_norm": 0.39131656322095426, |
| "learning_rate": 4.234459741343346e-05, |
| "loss": 0.4261, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.7146876758581879, |
| "grad_norm": 1.9337556498338822, |
| "learning_rate": 4.232373800584064e-05, |
| "loss": 0.4553, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.7158131682611142, |
| "grad_norm": 0.7715880476594418, |
| "learning_rate": 4.2302878598247815e-05, |
| "loss": 0.4349, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.7169386606640406, |
| "grad_norm": 0.4198490504250616, |
| "learning_rate": 4.2282019190654985e-05, |
| "loss": 0.4427, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.7180641530669668, |
| "grad_norm": 0.6436591462942758, |
| "learning_rate": 4.226115978306216e-05, |
| "loss": 0.4428, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.7191896454698931, |
| "grad_norm": 0.46958357266306217, |
| "learning_rate": 4.224030037546934e-05, |
| "loss": 0.4096, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.7203151378728193, |
| "grad_norm": 0.5409557375822074, |
| "learning_rate": 4.2219440967876516e-05, |
| "loss": 0.4165, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7214406302757457, |
| "grad_norm": 0.505386305383113, |
| "learning_rate": 4.219858156028369e-05, |
| "loss": 0.4232, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.7225661226786719, |
| "grad_norm": 0.47036754544713516, |
| "learning_rate": 4.217772215269087e-05, |
| "loss": 0.4187, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.7236916150815982, |
| "grad_norm": 0.5935180204625328, |
| "learning_rate": 4.215686274509804e-05, |
| "loss": 0.4326, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.7248171074845244, |
| "grad_norm": 0.37111793924942255, |
| "learning_rate": 4.213600333750522e-05, |
| "loss": 0.4235, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.7259425998874508, |
| "grad_norm": 0.6111195959607152, |
| "learning_rate": 4.2115143929912394e-05, |
| "loss": 0.4254, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.727068092290377, |
| "grad_norm": 0.35910288955770575, |
| "learning_rate": 4.2094284522319564e-05, |
| "loss": 0.4244, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.7281935846933033, |
| "grad_norm": 0.4804262191052388, |
| "learning_rate": 4.207342511472674e-05, |
| "loss": 0.4357, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.7293190770962296, |
| "grad_norm": 0.43546853881795533, |
| "learning_rate": 4.205256570713392e-05, |
| "loss": 0.4471, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.7304445694991559, |
| "grad_norm": 0.36651215549293115, |
| "learning_rate": 4.2031706299541095e-05, |
| "loss": 0.4359, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.7315700619020822, |
| "grad_norm": 0.5416106337436408, |
| "learning_rate": 4.201084689194827e-05, |
| "loss": 0.4514, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7326955543050084, |
| "grad_norm": 0.37666903051702594, |
| "learning_rate": 4.198998748435545e-05, |
| "loss": 0.4423, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.7338210467079347, |
| "grad_norm": 0.44989927473315283, |
| "learning_rate": 4.196912807676262e-05, |
| "loss": 0.4268, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.734946539110861, |
| "grad_norm": 0.3864335324626091, |
| "learning_rate": 4.1948268669169796e-05, |
| "loss": 0.4494, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.7360720315137873, |
| "grad_norm": 0.4000593109156678, |
| "learning_rate": 4.192740926157697e-05, |
| "loss": 0.4434, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.7371975239167136, |
| "grad_norm": 0.423242298419072, |
| "learning_rate": 4.190654985398415e-05, |
| "loss": 0.4328, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.7383230163196398, |
| "grad_norm": 0.44706912801056875, |
| "learning_rate": 4.188569044639132e-05, |
| "loss": 0.4254, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.7394485087225662, |
| "grad_norm": 0.5086338570156853, |
| "learning_rate": 4.1864831038798504e-05, |
| "loss": 0.4425, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.7405740011254924, |
| "grad_norm": 0.4676027167307538, |
| "learning_rate": 4.1843971631205674e-05, |
| "loss": 0.4491, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.7416994935284187, |
| "grad_norm": 0.46458396727329027, |
| "learning_rate": 4.182311222361285e-05, |
| "loss": 0.4015, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.7428249859313449, |
| "grad_norm": 0.390783744931949, |
| "learning_rate": 4.180225281602003e-05, |
| "loss": 0.4184, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7439504783342712, |
| "grad_norm": 0.44526805252316143, |
| "learning_rate": 4.1781393408427205e-05, |
| "loss": 0.4035, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.7450759707371976, |
| "grad_norm": 0.4217385671488669, |
| "learning_rate": 4.1760534000834376e-05, |
| "loss": 0.4332, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.7462014631401238, |
| "grad_norm": 0.44487860783732935, |
| "learning_rate": 4.173967459324156e-05, |
| "loss": 0.4266, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.7473269555430501, |
| "grad_norm": 0.4296879305918086, |
| "learning_rate": 4.171881518564873e-05, |
| "loss": 0.4205, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.7484524479459763, |
| "grad_norm": 0.4948881491751457, |
| "learning_rate": 4.16979557780559e-05, |
| "loss": 0.4447, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.7495779403489027, |
| "grad_norm": 0.41381310448412767, |
| "learning_rate": 4.1677096370463084e-05, |
| "loss": 0.435, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.7507034327518289, |
| "grad_norm": 0.4138662471855155, |
| "learning_rate": 4.1656236962870254e-05, |
| "loss": 0.4351, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.7518289251547552, |
| "grad_norm": 0.3869476402415003, |
| "learning_rate": 4.163537755527743e-05, |
| "loss": 0.4319, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.7529544175576814, |
| "grad_norm": 0.4882528682989917, |
| "learning_rate": 4.161451814768461e-05, |
| "loss": 0.4123, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.7540799099606078, |
| "grad_norm": 0.3739890771080639, |
| "learning_rate": 4.1593658740091785e-05, |
| "loss": 0.4268, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7552054023635341, |
| "grad_norm": 0.5032273771625602, |
| "learning_rate": 4.1572799332498955e-05, |
| "loss": 0.4404, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.7563308947664603, |
| "grad_norm": 0.38387128180956526, |
| "learning_rate": 4.155193992490614e-05, |
| "loss": 0.4505, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7574563871693866, |
| "grad_norm": 0.4995032503495298, |
| "learning_rate": 4.153108051731331e-05, |
| "loss": 0.4211, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.7585818795723129, |
| "grad_norm": 0.46352751067691306, |
| "learning_rate": 4.1510221109720486e-05, |
| "loss": 0.4253, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7597073719752392, |
| "grad_norm": 0.4661239773263893, |
| "learning_rate": 4.148936170212766e-05, |
| "loss": 0.4533, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7608328643781654, |
| "grad_norm": 0.42916960855475605, |
| "learning_rate": 4.146850229453484e-05, |
| "loss": 0.4333, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7619583567810917, |
| "grad_norm": 0.40989406943220275, |
| "learning_rate": 4.144764288694201e-05, |
| "loss": 0.4413, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.7630838491840181, |
| "grad_norm": 0.7522787094637527, |
| "learning_rate": 4.1426783479349194e-05, |
| "loss": 0.4522, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.7642093415869443, |
| "grad_norm": 0.4277705459587538, |
| "learning_rate": 4.1405924071756364e-05, |
| "loss": 0.4348, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.7653348339898706, |
| "grad_norm": 0.4684118417529332, |
| "learning_rate": 4.138506466416354e-05, |
| "loss": 0.422, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7664603263927968, |
| "grad_norm": 0.5197963821538139, |
| "learning_rate": 4.136420525657072e-05, |
| "loss": 0.4299, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.7675858187957231, |
| "grad_norm": 0.5235576475586984, |
| "learning_rate": 4.1343345848977895e-05, |
| "loss": 0.438, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7687113111986494, |
| "grad_norm": 0.46712550772065836, |
| "learning_rate": 4.1322486441385065e-05, |
| "loss": 0.4344, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.7698368036015757, |
| "grad_norm": 0.3222703692853798, |
| "learning_rate": 4.130162703379224e-05, |
| "loss": 0.4263, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.770962296004502, |
| "grad_norm": 0.5188367404561216, |
| "learning_rate": 4.128076762619942e-05, |
| "loss": 0.425, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.7720877884074282, |
| "grad_norm": 0.5386961427613608, |
| "learning_rate": 4.125990821860659e-05, |
| "loss": 0.4276, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7732132808103546, |
| "grad_norm": 0.42911439453279915, |
| "learning_rate": 4.1239048811013766e-05, |
| "loss": 0.4309, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.7743387732132808, |
| "grad_norm": 0.5088405648165022, |
| "learning_rate": 4.121818940342094e-05, |
| "loss": 0.4493, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.7754642656162071, |
| "grad_norm": 0.3815644681020926, |
| "learning_rate": 4.119732999582812e-05, |
| "loss": 0.4077, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.7765897580191333, |
| "grad_norm": 0.4840279343366164, |
| "learning_rate": 4.11764705882353e-05, |
| "loss": 0.4172, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7777152504220597, |
| "grad_norm": 0.333716982007624, |
| "learning_rate": 4.1155611180642474e-05, |
| "loss": 0.42, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.7788407428249859, |
| "grad_norm": 0.5086503847022227, |
| "learning_rate": 4.1134751773049644e-05, |
| "loss": 0.4388, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.7799662352279122, |
| "grad_norm": 0.5138077690790301, |
| "learning_rate": 4.111389236545682e-05, |
| "loss": 0.4472, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.7810917276308385, |
| "grad_norm": 0.5073604041295958, |
| "learning_rate": 4.1093032957864e-05, |
| "loss": 0.4319, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.7822172200337648, |
| "grad_norm": 0.5070487690193936, |
| "learning_rate": 4.1072173550271175e-05, |
| "loss": 0.4406, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.7833427124366911, |
| "grad_norm": 0.39744464693598625, |
| "learning_rate": 4.1051314142678346e-05, |
| "loss": 0.434, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7844682048396173, |
| "grad_norm": 0.4541031658226192, |
| "learning_rate": 4.103045473508553e-05, |
| "loss": 0.4454, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.7855936972425436, |
| "grad_norm": 0.3491750229319607, |
| "learning_rate": 4.10095953274927e-05, |
| "loss": 0.4332, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.7867191896454699, |
| "grad_norm": 0.4022760008208042, |
| "learning_rate": 4.0988735919899877e-05, |
| "loss": 0.4296, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.7878446820483962, |
| "grad_norm": 0.34684627806001544, |
| "learning_rate": 4.0967876512307054e-05, |
| "loss": 0.4331, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.7889701744513224, |
| "grad_norm": 0.4050405845879203, |
| "learning_rate": 4.094701710471423e-05, |
| "loss": 0.4464, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.7900956668542487, |
| "grad_norm": 0.36395612381945763, |
| "learning_rate": 4.09261576971214e-05, |
| "loss": 0.4444, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.7912211592571751, |
| "grad_norm": 0.398848237592344, |
| "learning_rate": 4.0905298289528585e-05, |
| "loss": 0.4288, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.7923466516601013, |
| "grad_norm": 0.40745644685078164, |
| "learning_rate": 4.0884438881935755e-05, |
| "loss": 0.4329, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.7934721440630276, |
| "grad_norm": 0.3547156716364725, |
| "learning_rate": 4.0863579474342925e-05, |
| "loss": 0.4191, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.7945976364659538, |
| "grad_norm": 0.377680056161795, |
| "learning_rate": 4.084272006675011e-05, |
| "loss": 0.4376, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.7957231288688801, |
| "grad_norm": 0.4073180644016936, |
| "learning_rate": 4.082186065915728e-05, |
| "loss": 0.4559, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.7968486212718064, |
| "grad_norm": 0.45186446852813356, |
| "learning_rate": 4.0801001251564456e-05, |
| "loss": 0.4277, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.7979741136747327, |
| "grad_norm": 0.36933911451661233, |
| "learning_rate": 4.078014184397163e-05, |
| "loss": 0.45, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.799099606077659, |
| "grad_norm": 0.35833391487238614, |
| "learning_rate": 4.075928243637881e-05, |
| "loss": 0.4403, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8002250984805852, |
| "grad_norm": 0.3901982149558614, |
| "learning_rate": 4.073842302878598e-05, |
| "loss": 0.4216, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.8013505908835116, |
| "grad_norm": 0.40940384251834244, |
| "learning_rate": 4.0717563621193164e-05, |
| "loss": 0.4021, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.8024760832864378, |
| "grad_norm": 0.42919683308516116, |
| "learning_rate": 4.0696704213600334e-05, |
| "loss": 0.4376, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.8036015756893641, |
| "grad_norm": 0.4073165345943137, |
| "learning_rate": 4.067584480600751e-05, |
| "loss": 0.4153, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.8047270680922903, |
| "grad_norm": 0.4178501498334503, |
| "learning_rate": 4.065498539841469e-05, |
| "loss": 0.414, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.8058525604952167, |
| "grad_norm": 0.4403993787350139, |
| "learning_rate": 4.0634125990821865e-05, |
| "loss": 0.4399, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.806978052898143, |
| "grad_norm": 0.4114972670954794, |
| "learning_rate": 4.0613266583229035e-05, |
| "loss": 0.439, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.8081035453010692, |
| "grad_norm": 0.407394844667869, |
| "learning_rate": 4.059240717563622e-05, |
| "loss": 0.4123, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.8092290377039955, |
| "grad_norm": 0.39800729593005324, |
| "learning_rate": 4.057154776804339e-05, |
| "loss": 0.4236, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.8103545301069218, |
| "grad_norm": 0.4287708410386054, |
| "learning_rate": 4.0550688360450566e-05, |
| "loss": 0.4256, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8114800225098481, |
| "grad_norm": 0.4016484816358628, |
| "learning_rate": 4.052982895285774e-05, |
| "loss": 0.4281, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.8126055149127743, |
| "grad_norm": 0.3719724351542615, |
| "learning_rate": 4.050896954526492e-05, |
| "loss": 0.4077, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.8137310073157006, |
| "grad_norm": 0.4023100055568255, |
| "learning_rate": 4.048811013767209e-05, |
| "loss": 0.4461, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.814856499718627, |
| "grad_norm": 0.4117093051704328, |
| "learning_rate": 4.046725073007927e-05, |
| "loss": 0.4175, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.8159819921215532, |
| "grad_norm": 0.34286385689334, |
| "learning_rate": 4.0446391322486444e-05, |
| "loss": 0.4356, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.8171074845244795, |
| "grad_norm": 0.35591813739094097, |
| "learning_rate": 4.0425531914893614e-05, |
| "loss": 0.4434, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.8182329769274057, |
| "grad_norm": 0.43567208149763015, |
| "learning_rate": 4.040467250730079e-05, |
| "loss": 0.437, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.8193584693303321, |
| "grad_norm": 0.3799825439351934, |
| "learning_rate": 4.038381309970797e-05, |
| "loss": 0.4248, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.8204839617332583, |
| "grad_norm": 0.38216998051723755, |
| "learning_rate": 4.0362953692115145e-05, |
| "loss": 0.4253, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.8216094541361846, |
| "grad_norm": 0.39231774228135824, |
| "learning_rate": 4.034209428452232e-05, |
| "loss": 0.4223, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8227349465391108, |
| "grad_norm": 0.4102144130938295, |
| "learning_rate": 4.03212348769295e-05, |
| "loss": 0.4348, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.8238604389420371, |
| "grad_norm": 0.37115430835787877, |
| "learning_rate": 4.030037546933667e-05, |
| "loss": 0.409, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.8249859313449635, |
| "grad_norm": 0.40499256266698164, |
| "learning_rate": 4.0279516061743847e-05, |
| "loss": 0.4089, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.8261114237478897, |
| "grad_norm": 0.4916550738089128, |
| "learning_rate": 4.0258656654151024e-05, |
| "loss": 0.4272, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.827236916150816, |
| "grad_norm": 0.3681620907401364, |
| "learning_rate": 4.02377972465582e-05, |
| "loss": 0.4446, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.8283624085537422, |
| "grad_norm": 0.4795384990908562, |
| "learning_rate": 4.021693783896537e-05, |
| "loss": 0.4357, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.8294879009566686, |
| "grad_norm": 0.3684736183097587, |
| "learning_rate": 4.0196078431372555e-05, |
| "loss": 0.4119, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.8306133933595948, |
| "grad_norm": 0.43877380657382786, |
| "learning_rate": 4.0175219023779725e-05, |
| "loss": 0.4403, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.8317388857625211, |
| "grad_norm": 0.37814204050253025, |
| "learning_rate": 4.01543596161869e-05, |
| "loss": 0.434, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.8328643781654473, |
| "grad_norm": 0.45099287248352765, |
| "learning_rate": 4.013350020859408e-05, |
| "loss": 0.4149, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.8339898705683737, |
| "grad_norm": 0.34915848381393966, |
| "learning_rate": 4.0112640801001256e-05, |
| "loss": 0.4136, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.8351153629713, |
| "grad_norm": 0.5037598255538088, |
| "learning_rate": 4.0091781393408426e-05, |
| "loss": 0.4398, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.8362408553742262, |
| "grad_norm": 0.3612809802844246, |
| "learning_rate": 4.007092198581561e-05, |
| "loss": 0.4162, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.8373663477771525, |
| "grad_norm": 0.3979488796549818, |
| "learning_rate": 4.005006257822278e-05, |
| "loss": 0.3981, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.8384918401800788, |
| "grad_norm": 0.4440135625243805, |
| "learning_rate": 4.002920317062996e-05, |
| "loss": 0.429, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.8396173325830051, |
| "grad_norm": 0.3448234757480279, |
| "learning_rate": 4.0008343763037134e-05, |
| "loss": 0.4324, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.8407428249859313, |
| "grad_norm": 0.4775835287224156, |
| "learning_rate": 3.9987484355444304e-05, |
| "loss": 0.4249, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.8418683173888576, |
| "grad_norm": 0.3566220202478078, |
| "learning_rate": 3.996662494785148e-05, |
| "loss": 0.4211, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.842993809791784, |
| "grad_norm": 0.5285144169481172, |
| "learning_rate": 3.994576554025866e-05, |
| "loss": 0.4168, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.8441193021947102, |
| "grad_norm": 0.33354278924631714, |
| "learning_rate": 3.9924906132665835e-05, |
| "loss": 0.4261, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8452447945976365, |
| "grad_norm": 0.3372581050524173, |
| "learning_rate": 3.9904046725073005e-05, |
| "loss": 0.4269, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.8463702870005627, |
| "grad_norm": 0.3146244454332402, |
| "learning_rate": 3.988318731748019e-05, |
| "loss": 0.4204, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.8474957794034891, |
| "grad_norm": 0.3706478564537626, |
| "learning_rate": 3.986232790988736e-05, |
| "loss": 0.4521, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.8486212718064153, |
| "grad_norm": 0.34630012288221157, |
| "learning_rate": 3.9841468502294536e-05, |
| "loss": 0.4142, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.8497467642093416, |
| "grad_norm": 0.36373433568245944, |
| "learning_rate": 3.982060909470171e-05, |
| "loss": 0.4252, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.8508722566122678, |
| "grad_norm": 0.3554211790643752, |
| "learning_rate": 3.979974968710889e-05, |
| "loss": 0.4474, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.8519977490151941, |
| "grad_norm": 0.30960141279598913, |
| "learning_rate": 3.977889027951606e-05, |
| "loss": 0.4167, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.8531232414181205, |
| "grad_norm": 0.37614788680975125, |
| "learning_rate": 3.9758030871923244e-05, |
| "loss": 0.4505, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.8542487338210467, |
| "grad_norm": 0.3938651785575828, |
| "learning_rate": 3.9737171464330414e-05, |
| "loss": 0.4349, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.855374226223973, |
| "grad_norm": 0.3460524380953148, |
| "learning_rate": 3.971631205673759e-05, |
| "loss": 0.4396, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8564997186268992, |
| "grad_norm": 0.430535629585179, |
| "learning_rate": 3.969545264914477e-05, |
| "loss": 0.4154, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.8576252110298256, |
| "grad_norm": 0.34446139273212933, |
| "learning_rate": 3.9674593241551945e-05, |
| "loss": 0.3931, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.8587507034327518, |
| "grad_norm": 0.42192087717244775, |
| "learning_rate": 3.9653733833959115e-05, |
| "loss": 0.4261, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.8598761958356781, |
| "grad_norm": 0.40550449281201056, |
| "learning_rate": 3.963287442636629e-05, |
| "loss": 0.4569, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8610016882386043, |
| "grad_norm": 0.3566914781532168, |
| "learning_rate": 3.961201501877347e-05, |
| "loss": 0.4141, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.8621271806415307, |
| "grad_norm": 0.3843475406384751, |
| "learning_rate": 3.9591155611180646e-05, |
| "loss": 0.4332, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.863252673044457, |
| "grad_norm": 0.3366748222918633, |
| "learning_rate": 3.9570296203587817e-05, |
| "loss": 0.4123, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.8643781654473832, |
| "grad_norm": 0.41416075386046697, |
| "learning_rate": 3.9549436795994994e-05, |
| "loss": 0.426, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.8655036578503095, |
| "grad_norm": 0.3752366359688814, |
| "learning_rate": 3.952857738840217e-05, |
| "loss": 0.4402, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.8666291502532358, |
| "grad_norm": 0.37688991154499113, |
| "learning_rate": 3.950771798080935e-05, |
| "loss": 0.4244, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8677546426561621, |
| "grad_norm": 0.42637480636595876, |
| "learning_rate": 3.9486858573216525e-05, |
| "loss": 0.438, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.8688801350590883, |
| "grad_norm": 0.3568635983835573, |
| "learning_rate": 3.9465999165623695e-05, |
| "loss": 0.424, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.8700056274620146, |
| "grad_norm": 0.38797711927011286, |
| "learning_rate": 3.944513975803087e-05, |
| "loss": 0.4321, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.871131119864941, |
| "grad_norm": 0.3904824359653345, |
| "learning_rate": 3.942428035043805e-05, |
| "loss": 0.4161, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.8722566122678672, |
| "grad_norm": 0.47345678909928446, |
| "learning_rate": 3.9403420942845226e-05, |
| "loss": 0.4503, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8733821046707935, |
| "grad_norm": 0.37497244213020403, |
| "learning_rate": 3.9382561535252396e-05, |
| "loss": 0.4255, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.8745075970737197, |
| "grad_norm": 0.4047268746098847, |
| "learning_rate": 3.936170212765958e-05, |
| "loss": 0.4327, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.8756330894766461, |
| "grad_norm": 0.3834914449330313, |
| "learning_rate": 3.934084272006675e-05, |
| "loss": 0.4079, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.8767585818795723, |
| "grad_norm": 0.43021072579406455, |
| "learning_rate": 3.931998331247393e-05, |
| "loss": 0.4143, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.8778840742824986, |
| "grad_norm": 0.3793510230856374, |
| "learning_rate": 3.9299123904881104e-05, |
| "loss": 0.431, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8790095666854248, |
| "grad_norm": 0.37164807483969337, |
| "learning_rate": 3.927826449728828e-05, |
| "loss": 0.4341, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.8801350590883511, |
| "grad_norm": 0.3807695648271021, |
| "learning_rate": 3.925740508969545e-05, |
| "loss": 0.4096, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8812605514912775, |
| "grad_norm": 0.3502384590348891, |
| "learning_rate": 3.9236545682102635e-05, |
| "loss": 0.4117, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.8823860438942037, |
| "grad_norm": 0.41955082958695283, |
| "learning_rate": 3.9215686274509805e-05, |
| "loss": 0.4179, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.88351153629713, |
| "grad_norm": 0.3435394433133878, |
| "learning_rate": 3.919482686691698e-05, |
| "loss": 0.4215, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.8846370287000562, |
| "grad_norm": 0.44230838156044133, |
| "learning_rate": 3.917396745932416e-05, |
| "loss": 0.436, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.8857625211029826, |
| "grad_norm": 0.3248857597519066, |
| "learning_rate": 3.9153108051731336e-05, |
| "loss": 0.41, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.8868880135059088, |
| "grad_norm": 0.48949666561437843, |
| "learning_rate": 3.9132248644138506e-05, |
| "loss": 0.4348, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.8880135059088351, |
| "grad_norm": 0.32922044316292487, |
| "learning_rate": 3.911138923654568e-05, |
| "loss": 0.4084, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.8891389983117614, |
| "grad_norm": 0.4097616554209572, |
| "learning_rate": 3.909052982895286e-05, |
| "loss": 0.4127, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.8902644907146877, |
| "grad_norm": 0.3847502404740843, |
| "learning_rate": 3.906967042136003e-05, |
| "loss": 0.4322, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.891389983117614, |
| "grad_norm": 0.39373480839252734, |
| "learning_rate": 3.9048811013767214e-05, |
| "loss": 0.4085, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.8925154755205402, |
| "grad_norm": 0.4665639076471283, |
| "learning_rate": 3.9027951606174384e-05, |
| "loss": 0.4281, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.8936409679234665, |
| "grad_norm": 0.32986547499650304, |
| "learning_rate": 3.900709219858156e-05, |
| "loss": 0.4261, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.8947664603263928, |
| "grad_norm": 0.484689789318943, |
| "learning_rate": 3.898623279098874e-05, |
| "loss": 0.4236, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.8958919527293191, |
| "grad_norm": 0.3386378151140954, |
| "learning_rate": 3.8965373383395915e-05, |
| "loss": 0.4117, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.8970174451322454, |
| "grad_norm": 0.4810985090257936, |
| "learning_rate": 3.8944513975803085e-05, |
| "loss": 0.4199, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.8981429375351716, |
| "grad_norm": 0.34069553000131625, |
| "learning_rate": 3.892365456821027e-05, |
| "loss": 0.4279, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.899268429938098, |
| "grad_norm": 0.39752219752724677, |
| "learning_rate": 3.890279516061744e-05, |
| "loss": 0.4172, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.9003939223410242, |
| "grad_norm": 0.39022425914879927, |
| "learning_rate": 3.8881935753024616e-05, |
| "loss": 0.3978, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9015194147439505, |
| "grad_norm": 0.3458579209805956, |
| "learning_rate": 3.8861076345431793e-05, |
| "loss": 0.4135, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.9026449071468767, |
| "grad_norm": 0.4407202352189913, |
| "learning_rate": 3.884021693783897e-05, |
| "loss": 0.4154, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.9037703995498031, |
| "grad_norm": 0.47173587942543654, |
| "learning_rate": 3.881935753024614e-05, |
| "loss": 0.4572, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.9048958919527293, |
| "grad_norm": 0.5188592329216469, |
| "learning_rate": 3.879849812265332e-05, |
| "loss": 0.42, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.9060213843556556, |
| "grad_norm": 0.35403721006820305, |
| "learning_rate": 3.8777638715060495e-05, |
| "loss": 0.3938, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.9071468767585819, |
| "grad_norm": 0.4545974955129778, |
| "learning_rate": 3.875677930746767e-05, |
| "loss": 0.4285, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.9082723691615081, |
| "grad_norm": 0.38332622486859297, |
| "learning_rate": 3.873591989987485e-05, |
| "loss": 0.4324, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.9093978615644345, |
| "grad_norm": 0.4520007540189171, |
| "learning_rate": 3.8715060492282026e-05, |
| "loss": 0.4305, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.9105233539673607, |
| "grad_norm": 0.4148933007482292, |
| "learning_rate": 3.8694201084689196e-05, |
| "loss": 0.4218, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.911648846370287, |
| "grad_norm": 0.35859066112911336, |
| "learning_rate": 3.867334167709637e-05, |
| "loss": 0.4395, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9127743387732132, |
| "grad_norm": 0.3384384473732276, |
| "learning_rate": 3.865248226950355e-05, |
| "loss": 0.4331, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.9138998311761396, |
| "grad_norm": 0.3212850763014723, |
| "learning_rate": 3.863162286191072e-05, |
| "loss": 0.4371, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.9150253235790659, |
| "grad_norm": 0.34283786993488946, |
| "learning_rate": 3.86107634543179e-05, |
| "loss": 0.4213, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.9161508159819921, |
| "grad_norm": 0.3578410656828841, |
| "learning_rate": 3.8589904046725074e-05, |
| "loss": 0.4264, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.9172763083849184, |
| "grad_norm": 0.33865929644502085, |
| "learning_rate": 3.856904463913225e-05, |
| "loss": 0.4242, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.9184018007878447, |
| "grad_norm": 0.3392167511998851, |
| "learning_rate": 3.854818523153942e-05, |
| "loss": 0.435, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.919527293190771, |
| "grad_norm": 0.4361222901548229, |
| "learning_rate": 3.8527325823946605e-05, |
| "loss": 0.4351, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.9206527855936972, |
| "grad_norm": 0.38626109347018045, |
| "learning_rate": 3.8506466416353775e-05, |
| "loss": 0.4413, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.9217782779966235, |
| "grad_norm": 0.376739528222001, |
| "learning_rate": 3.848560700876095e-05, |
| "loss": 0.4162, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.9229037703995498, |
| "grad_norm": 0.38666458978007023, |
| "learning_rate": 3.846474760116813e-05, |
| "loss": 0.4308, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.9240292628024761, |
| "grad_norm": 0.49211116299516156, |
| "learning_rate": 3.8443888193575306e-05, |
| "loss": 0.4319, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.9251547552054024, |
| "grad_norm": 0.35408915013798653, |
| "learning_rate": 3.8423028785982476e-05, |
| "loss": 0.4095, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.9262802476083286, |
| "grad_norm": 0.4801831963166499, |
| "learning_rate": 3.840216937838966e-05, |
| "loss": 0.4357, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.927405740011255, |
| "grad_norm": 0.355137877995065, |
| "learning_rate": 3.838130997079683e-05, |
| "loss": 0.4089, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.9285312324141812, |
| "grad_norm": 0.39619886118735476, |
| "learning_rate": 3.836045056320401e-05, |
| "loss": 0.4275, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.9296567248171075, |
| "grad_norm": 0.4149029728443111, |
| "learning_rate": 3.8339591155611184e-05, |
| "loss": 0.4271, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.9307822172200337, |
| "grad_norm": 0.3576650599906339, |
| "learning_rate": 3.831873174801836e-05, |
| "loss": 0.4132, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.93190770962296, |
| "grad_norm": 0.3906733425105834, |
| "learning_rate": 3.829787234042553e-05, |
| "loss": 0.4344, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.9330332020258864, |
| "grad_norm": 0.3593657860758568, |
| "learning_rate": 3.8277012932832715e-05, |
| "loss": 0.4149, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.9341586944288126, |
| "grad_norm": 0.3817439606842503, |
| "learning_rate": 3.8256153525239885e-05, |
| "loss": 0.4069, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9352841868317389, |
| "grad_norm": 0.3973105618276613, |
| "learning_rate": 3.8235294117647055e-05, |
| "loss": 0.4324, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.9364096792346651, |
| "grad_norm": 0.366999411331023, |
| "learning_rate": 3.821443471005424e-05, |
| "loss": 0.4222, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.9375351716375915, |
| "grad_norm": 0.3464567606261278, |
| "learning_rate": 3.819357530246141e-05, |
| "loss": 0.4246, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.9386606640405177, |
| "grad_norm": 0.4438404878074898, |
| "learning_rate": 3.8172715894868586e-05, |
| "loss": 0.4143, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.939786156443444, |
| "grad_norm": 0.3129033931624516, |
| "learning_rate": 3.8151856487275763e-05, |
| "loss": 0.4307, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.9409116488463702, |
| "grad_norm": 0.4970325181275813, |
| "learning_rate": 3.813099707968294e-05, |
| "loss": 0.4212, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.9420371412492966, |
| "grad_norm": 0.3998089884639407, |
| "learning_rate": 3.811013767209011e-05, |
| "loss": 0.4208, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.9431626336522229, |
| "grad_norm": 0.40099412686189473, |
| "learning_rate": 3.8089278264497294e-05, |
| "loss": 0.422, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.9442881260551491, |
| "grad_norm": 0.5330625499056586, |
| "learning_rate": 3.8068418856904465e-05, |
| "loss": 0.4132, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.9454136184580754, |
| "grad_norm": 0.3036089387603486, |
| "learning_rate": 3.804755944931164e-05, |
| "loss": 0.3962, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9465391108610017, |
| "grad_norm": 0.5044966493127429, |
| "learning_rate": 3.802670004171882e-05, |
| "loss": 0.4228, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.947664603263928, |
| "grad_norm": 0.3885120765788415, |
| "learning_rate": 3.8005840634125996e-05, |
| "loss": 0.4228, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9487900956668542, |
| "grad_norm": 0.3285021365606724, |
| "learning_rate": 3.7984981226533166e-05, |
| "loss": 0.4287, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.9499155880697805, |
| "grad_norm": 0.3980934273222264, |
| "learning_rate": 3.796412181894034e-05, |
| "loss": 0.409, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.9510410804727069, |
| "grad_norm": 0.31490077533714694, |
| "learning_rate": 3.794326241134752e-05, |
| "loss": 0.4364, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.9521665728756331, |
| "grad_norm": 0.35959204795616695, |
| "learning_rate": 3.79224030037547e-05, |
| "loss": 0.4283, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.9532920652785594, |
| "grad_norm": 0.4126210300706387, |
| "learning_rate": 3.7901543596161874e-05, |
| "loss": 0.4197, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.9544175576814856, |
| "grad_norm": 0.3222558281528205, |
| "learning_rate": 3.788068418856905e-05, |
| "loss": 0.4277, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.955543050084412, |
| "grad_norm": 0.41145117521139, |
| "learning_rate": 3.785982478097622e-05, |
| "loss": 0.4257, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.9566685424873382, |
| "grad_norm": 0.351379058945545, |
| "learning_rate": 3.78389653733834e-05, |
| "loss": 0.4181, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9577940348902645, |
| "grad_norm": 0.31029125166165955, |
| "learning_rate": 3.7818105965790575e-05, |
| "loss": 0.4297, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.9589195272931907, |
| "grad_norm": 0.3285912637236667, |
| "learning_rate": 3.7797246558197745e-05, |
| "loss": 0.4138, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.960045019696117, |
| "grad_norm": 0.4036547080190634, |
| "learning_rate": 3.777638715060492e-05, |
| "loss": 0.4366, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.9611705120990434, |
| "grad_norm": 0.3686324816741614, |
| "learning_rate": 3.77555277430121e-05, |
| "loss": 0.4337, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9622960045019696, |
| "grad_norm": 0.48923825142344834, |
| "learning_rate": 3.7734668335419276e-05, |
| "loss": 0.4343, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.9634214969048959, |
| "grad_norm": 0.3013034390380091, |
| "learning_rate": 3.7713808927826446e-05, |
| "loss": 0.4216, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9645469893078221, |
| "grad_norm": 0.4352224768520518, |
| "learning_rate": 3.769294952023363e-05, |
| "loss": 0.3941, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.9656724817107485, |
| "grad_norm": 0.3513506338023819, |
| "learning_rate": 3.76720901126408e-05, |
| "loss": 0.4166, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.9667979741136747, |
| "grad_norm": 0.40350199029637573, |
| "learning_rate": 3.765123070504798e-05, |
| "loss": 0.413, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.967923466516601, |
| "grad_norm": 0.362061939817286, |
| "learning_rate": 3.7630371297455154e-05, |
| "loss": 0.411, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9690489589195272, |
| "grad_norm": 0.3399440337787816, |
| "learning_rate": 3.760951188986233e-05, |
| "loss": 0.4392, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.9701744513224536, |
| "grad_norm": 0.37743255775219053, |
| "learning_rate": 3.75886524822695e-05, |
| "loss": 0.4433, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.9712999437253799, |
| "grad_norm": 0.3638973275713123, |
| "learning_rate": 3.7567793074676685e-05, |
| "loss": 0.4203, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.9724254361283061, |
| "grad_norm": 0.3277233424581398, |
| "learning_rate": 3.7546933667083855e-05, |
| "loss": 0.398, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9735509285312324, |
| "grad_norm": 0.3141565988423171, |
| "learning_rate": 3.752607425949103e-05, |
| "loss": 0.3959, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.9746764209341587, |
| "grad_norm": 0.35936283889585385, |
| "learning_rate": 3.750521485189821e-05, |
| "loss": 0.432, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.975801913337085, |
| "grad_norm": 0.31770357894398493, |
| "learning_rate": 3.7484355444305386e-05, |
| "loss": 0.431, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.9769274057400112, |
| "grad_norm": 0.3123167816580969, |
| "learning_rate": 3.7463496036712556e-05, |
| "loss": 0.4025, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9780528981429375, |
| "grad_norm": 0.3692934723238402, |
| "learning_rate": 3.744263662911974e-05, |
| "loss": 0.4221, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.9791783905458639, |
| "grad_norm": 0.354161129420181, |
| "learning_rate": 3.742177722152691e-05, |
| "loss": 0.4172, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9803038829487901, |
| "grad_norm": 0.36175776122206693, |
| "learning_rate": 3.740091781393409e-05, |
| "loss": 0.4247, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.9814293753517164, |
| "grad_norm": 0.33883677517413535, |
| "learning_rate": 3.7380058406341264e-05, |
| "loss": 0.4068, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.9825548677546426, |
| "grad_norm": 0.42954345350848233, |
| "learning_rate": 3.7359198998748435e-05, |
| "loss": 0.422, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.983680360157569, |
| "grad_norm": 0.3555531618337076, |
| "learning_rate": 3.733833959115561e-05, |
| "loss": 0.4117, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.9848058525604952, |
| "grad_norm": 0.3137468970852999, |
| "learning_rate": 3.731748018356279e-05, |
| "loss": 0.4044, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.9859313449634215, |
| "grad_norm": 0.32456521220251544, |
| "learning_rate": 3.7296620775969966e-05, |
| "loss": 0.4133, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.9870568373663478, |
| "grad_norm": 0.31014819015532874, |
| "learning_rate": 3.7275761368377136e-05, |
| "loss": 0.4141, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.988182329769274, |
| "grad_norm": 0.32436938468507787, |
| "learning_rate": 3.725490196078432e-05, |
| "loss": 0.4095, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.9893078221722004, |
| "grad_norm": 0.33188432959790465, |
| "learning_rate": 3.723404255319149e-05, |
| "loss": 0.4029, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.9904333145751266, |
| "grad_norm": 0.3654774295033461, |
| "learning_rate": 3.721318314559867e-05, |
| "loss": 0.4255, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.9915588069780529, |
| "grad_norm": 0.366785518306503, |
| "learning_rate": 3.7192323738005844e-05, |
| "loss": 0.4449, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.9926842993809791, |
| "grad_norm": 0.29826436924819194, |
| "learning_rate": 3.717146433041302e-05, |
| "loss": 0.4048, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.9938097917839055, |
| "grad_norm": 0.3152150195685499, |
| "learning_rate": 3.715060492282019e-05, |
| "loss": 0.4156, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.9949352841868317, |
| "grad_norm": 0.3790269660933605, |
| "learning_rate": 3.712974551522737e-05, |
| "loss": 0.4258, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.996060776589758, |
| "grad_norm": 0.3234490218718985, |
| "learning_rate": 3.7108886107634545e-05, |
| "loss": 0.4225, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.9971862689926843, |
| "grad_norm": 0.3692387676152948, |
| "learning_rate": 3.708802670004172e-05, |
| "loss": 0.4193, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.9983117613956106, |
| "grad_norm": 0.3548734847141469, |
| "learning_rate": 3.70671672924489e-05, |
| "loss": 0.4223, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.9994372537985369, |
| "grad_norm": 0.41390225631408695, |
| "learning_rate": 3.7046307884856076e-05, |
| "loss": 0.4223, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.41390225631408695, |
| "learning_rate": 3.7025448477263246e-05, |
| "loss": 0.43, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.0011254924029263, |
| "grad_norm": 0.5580430959772991, |
| "learning_rate": 3.700458906967042e-05, |
| "loss": 0.3606, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.0022509848058525, |
| "grad_norm": 0.38786233021722444, |
| "learning_rate": 3.69837296620776e-05, |
| "loss": 0.3516, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.0033764772087788, |
| "grad_norm": 0.3607061049938279, |
| "learning_rate": 3.696287025448478e-05, |
| "loss": 0.3586, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.004501969611705, |
| "grad_norm": 0.30722252549464857, |
| "learning_rate": 3.694201084689195e-05, |
| "loss": 0.3566, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.0056274620146315, |
| "grad_norm": 0.4162073977345431, |
| "learning_rate": 3.6921151439299124e-05, |
| "loss": 0.3517, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.0067529544175577, |
| "grad_norm": 0.3477012359425953, |
| "learning_rate": 3.69002920317063e-05, |
| "loss": 0.356, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.007878446820484, |
| "grad_norm": 0.34334848103470345, |
| "learning_rate": 3.687943262411347e-05, |
| "loss": 0.3529, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.0090039392234103, |
| "grad_norm": 0.3303966213040783, |
| "learning_rate": 3.6858573216520655e-05, |
| "loss": 0.3775, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.0101294316263365, |
| "grad_norm": 0.41641276263804705, |
| "learning_rate": 3.6837713808927825e-05, |
| "loss": 0.3482, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.0112549240292628, |
| "grad_norm": 0.3475211971953784, |
| "learning_rate": 3.6816854401335e-05, |
| "loss": 0.3469, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.012380416432189, |
| "grad_norm": 0.3630650367930452, |
| "learning_rate": 3.679599499374218e-05, |
| "loss": 0.3733, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0135059088351153, |
| "grad_norm": 0.33917879336611284, |
| "learning_rate": 3.6775135586149356e-05, |
| "loss": 0.3613, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.0146314012380417, |
| "grad_norm": 0.3916615454670656, |
| "learning_rate": 3.6754276178556526e-05, |
| "loss": 0.3642, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.015756893640968, |
| "grad_norm": 0.387700709428207, |
| "learning_rate": 3.673341677096371e-05, |
| "loss": 0.337, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.0168823860438942, |
| "grad_norm": 0.311008874794384, |
| "learning_rate": 3.671255736337088e-05, |
| "loss": 0.3464, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.0180078784468205, |
| "grad_norm": 0.34204508328431077, |
| "learning_rate": 3.669169795577806e-05, |
| "loss": 0.3493, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.0191333708497468, |
| "grad_norm": 0.35056912513693533, |
| "learning_rate": 3.6670838548185234e-05, |
| "loss": 0.3847, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.020258863252673, |
| "grad_norm": 0.3603063090886555, |
| "learning_rate": 3.664997914059241e-05, |
| "loss": 0.3696, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.0213843556555993, |
| "grad_norm": 0.3406429440812445, |
| "learning_rate": 3.662911973299958e-05, |
| "loss": 0.3585, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.0225098480585255, |
| "grad_norm": 0.4146200559571759, |
| "learning_rate": 3.6608260325406765e-05, |
| "loss": 0.3617, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.023635340461452, |
| "grad_norm": 0.30025908743312035, |
| "learning_rate": 3.6587400917813936e-05, |
| "loss": 0.3416, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.0247608328643782, |
| "grad_norm": 0.4720811356812383, |
| "learning_rate": 3.656654151022111e-05, |
| "loss": 0.3533, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.0258863252673045, |
| "grad_norm": 0.29184795311941897, |
| "learning_rate": 3.654568210262829e-05, |
| "loss": 0.3493, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.0270118176702308, |
| "grad_norm": 0.385289462825186, |
| "learning_rate": 3.6524822695035466e-05, |
| "loss": 0.393, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.028137310073157, |
| "grad_norm": 0.3107082501520784, |
| "learning_rate": 3.650396328744264e-05, |
| "loss": 0.3677, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.0292628024760833, |
| "grad_norm": 0.2892635060197107, |
| "learning_rate": 3.6483103879849814e-05, |
| "loss": 0.3484, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.0303882948790095, |
| "grad_norm": 0.37383301152112214, |
| "learning_rate": 3.646224447225699e-05, |
| "loss": 0.3741, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.0315137872819358, |
| "grad_norm": 0.32042127431190587, |
| "learning_rate": 3.644138506466416e-05, |
| "loss": 0.3453, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.032639279684862, |
| "grad_norm": 0.3227805251806716, |
| "learning_rate": 3.6420525657071345e-05, |
| "loss": 0.347, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.0337647720877885, |
| "grad_norm": 0.33975552005827825, |
| "learning_rate": 3.6399666249478515e-05, |
| "loss": 0.342, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.0348902644907148, |
| "grad_norm": 0.3053184721102955, |
| "learning_rate": 3.637880684188569e-05, |
| "loss": 0.368, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.036015756893641, |
| "grad_norm": 0.4171758873578538, |
| "learning_rate": 3.635794743429287e-05, |
| "loss": 0.3506, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.0371412492965673, |
| "grad_norm": 0.35788110167643483, |
| "learning_rate": 3.6337088026700046e-05, |
| "loss": 0.3678, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.0382667416994935, |
| "grad_norm": 0.40422162482455976, |
| "learning_rate": 3.6316228619107216e-05, |
| "loss": 0.3841, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.0393922341024198, |
| "grad_norm": 0.42302051382729106, |
| "learning_rate": 3.629536921151439e-05, |
| "loss": 0.3609, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.040517726505346, |
| "grad_norm": 0.3002900676912074, |
| "learning_rate": 3.627450980392157e-05, |
| "loss": 0.3764, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.0416432189082723, |
| "grad_norm": 0.4216178632940728, |
| "learning_rate": 3.625365039632875e-05, |
| "loss": 0.3525, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.0427687113111987, |
| "grad_norm": 0.36722403261101394, |
| "learning_rate": 3.6232790988735924e-05, |
| "loss": 0.3651, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.043894203714125, |
| "grad_norm": 0.37487765396444256, |
| "learning_rate": 3.62119315811431e-05, |
| "loss": 0.3732, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.0450196961170513, |
| "grad_norm": 0.40248279158053446, |
| "learning_rate": 3.619107217355027e-05, |
| "loss": 0.3514, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.0461451885199775, |
| "grad_norm": 0.34487298402942634, |
| "learning_rate": 3.617021276595745e-05, |
| "loss": 0.3453, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.0472706809229038, |
| "grad_norm": 0.35894348708147356, |
| "learning_rate": 3.6149353358364625e-05, |
| "loss": 0.3445, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.04839617332583, |
| "grad_norm": 0.46543989700724425, |
| "learning_rate": 3.61284939507718e-05, |
| "loss": 0.3554, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.0495216657287563, |
| "grad_norm": 0.32251577447042856, |
| "learning_rate": 3.610763454317897e-05, |
| "loss": 0.3571, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.0506471581316825, |
| "grad_norm": 0.3539766683535758, |
| "learning_rate": 3.608677513558615e-05, |
| "loss": 0.3291, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.051772650534609, |
| "grad_norm": 0.34471085249350447, |
| "learning_rate": 3.6065915727993326e-05, |
| "loss": 0.3764, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.0528981429375353, |
| "grad_norm": 0.33468302525089494, |
| "learning_rate": 3.6045056320400496e-05, |
| "loss": 0.3479, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.0540236353404615, |
| "grad_norm": 0.36538591134232934, |
| "learning_rate": 3.602419691280768e-05, |
| "loss": 0.3642, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.0551491277433878, |
| "grad_norm": 0.35282922968280045, |
| "learning_rate": 3.600333750521485e-05, |
| "loss": 0.3446, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.056274620146314, |
| "grad_norm": 0.35478764255979334, |
| "learning_rate": 3.598247809762203e-05, |
| "loss": 0.3752, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.0574001125492403, |
| "grad_norm": 0.3565613451966995, |
| "learning_rate": 3.5961618690029204e-05, |
| "loss": 0.362, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.0585256049521665, |
| "grad_norm": 0.33132722259601055, |
| "learning_rate": 3.594075928243638e-05, |
| "loss": 0.3559, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.0596510973550928, |
| "grad_norm": 0.34347700089780575, |
| "learning_rate": 3.591989987484355e-05, |
| "loss": 0.3641, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.060776589758019, |
| "grad_norm": 0.2772476546624268, |
| "learning_rate": 3.5899040467250735e-05, |
| "loss": 0.3433, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.0619020821609455, |
| "grad_norm": 0.36078868188752466, |
| "learning_rate": 3.5878181059657906e-05, |
| "loss": 0.36, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.0630275745638718, |
| "grad_norm": 0.2927763816273808, |
| "learning_rate": 3.585732165206508e-05, |
| "loss": 0.3757, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.064153066966798, |
| "grad_norm": 0.31067799008966573, |
| "learning_rate": 3.583646224447226e-05, |
| "loss": 0.3375, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.0652785593697243, |
| "grad_norm": 0.30786259543828726, |
| "learning_rate": 3.5815602836879437e-05, |
| "loss": 0.3691, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.0664040517726505, |
| "grad_norm": 0.34927488285962766, |
| "learning_rate": 3.579474342928661e-05, |
| "loss": 0.3512, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.0675295441755768, |
| "grad_norm": 0.3134128528998366, |
| "learning_rate": 3.577388402169379e-05, |
| "loss": 0.3684, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.068655036578503, |
| "grad_norm": 0.3684381541500359, |
| "learning_rate": 3.575302461410096e-05, |
| "loss": 0.3635, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.0697805289814293, |
| "grad_norm": 0.3071501276127385, |
| "learning_rate": 3.573216520650814e-05, |
| "loss": 0.3629, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.0709060213843558, |
| "grad_norm": 0.3650935121688607, |
| "learning_rate": 3.5711305798915315e-05, |
| "loss": 0.352, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.072031513787282, |
| "grad_norm": 0.3004157301630184, |
| "learning_rate": 3.569044639132249e-05, |
| "loss": 0.3627, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.0731570061902083, |
| "grad_norm": 0.3588467213474463, |
| "learning_rate": 3.566958698372966e-05, |
| "loss": 0.378, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.0742824985931345, |
| "grad_norm": 0.38695693104692636, |
| "learning_rate": 3.564872757613684e-05, |
| "loss": 0.3558, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.0754079909960608, |
| "grad_norm": 0.30329694533620805, |
| "learning_rate": 3.5627868168544016e-05, |
| "loss": 0.3841, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.076533483398987, |
| "grad_norm": 0.34905611952609783, |
| "learning_rate": 3.5607008760951186e-05, |
| "loss": 0.3689, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.0776589758019133, |
| "grad_norm": 0.28800778538826344, |
| "learning_rate": 3.558614935335837e-05, |
| "loss": 0.3543, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.0787844682048395, |
| "grad_norm": 0.3746527261236155, |
| "learning_rate": 3.556528994576554e-05, |
| "loss": 0.366, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.079909960607766, |
| "grad_norm": 0.32663591501026235, |
| "learning_rate": 3.554443053817272e-05, |
| "loss": 0.3499, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.0810354530106923, |
| "grad_norm": 0.3328189109583666, |
| "learning_rate": 3.5523571130579894e-05, |
| "loss": 0.353, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.0821609454136185, |
| "grad_norm": 0.31964664375303664, |
| "learning_rate": 3.550271172298707e-05, |
| "loss": 0.3672, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.0832864378165448, |
| "grad_norm": 0.36918332363958006, |
| "learning_rate": 3.548185231539424e-05, |
| "loss": 0.3798, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.084411930219471, |
| "grad_norm": 0.3254223917013834, |
| "learning_rate": 3.546099290780142e-05, |
| "loss": 0.3559, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.0855374226223973, |
| "grad_norm": 0.3008814703536633, |
| "learning_rate": 3.5440133500208595e-05, |
| "loss": 0.3609, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.0866629150253235, |
| "grad_norm": 0.35240736109329646, |
| "learning_rate": 3.541927409261577e-05, |
| "loss": 0.3777, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.0877884074282498, |
| "grad_norm": 0.3869312281732699, |
| "learning_rate": 3.539841468502295e-05, |
| "loss": 0.3724, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.088913899831176, |
| "grad_norm": 0.30726021570614737, |
| "learning_rate": 3.5377555277430126e-05, |
| "loss": 0.3531, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.0900393922341025, |
| "grad_norm": 0.34236583353183286, |
| "learning_rate": 3.5356695869837296e-05, |
| "loss": 0.3608, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.0911648846370288, |
| "grad_norm": 0.2916866803109591, |
| "learning_rate": 3.533583646224447e-05, |
| "loss": 0.3624, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.092290377039955, |
| "grad_norm": 0.3145203080926422, |
| "learning_rate": 3.531497705465165e-05, |
| "loss": 0.3684, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.0934158694428813, |
| "grad_norm": 0.2873541218671502, |
| "learning_rate": 3.529411764705883e-05, |
| "loss": 0.3617, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.0945413618458075, |
| "grad_norm": 0.3506652103429166, |
| "learning_rate": 3.5273258239466e-05, |
| "loss": 0.3583, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.0956668542487338, |
| "grad_norm": 0.3025123158669694, |
| "learning_rate": 3.525239883187318e-05, |
| "loss": 0.3472, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.09679234665166, |
| "grad_norm": 0.2899074126357094, |
| "learning_rate": 3.523153942428035e-05, |
| "loss": 0.3675, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.0979178390545863, |
| "grad_norm": 0.3150990472406033, |
| "learning_rate": 3.521068001668753e-05, |
| "loss": 0.3636, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.0990433314575128, |
| "grad_norm": 0.35489391655027186, |
| "learning_rate": 3.5189820609094705e-05, |
| "loss": 0.3384, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.100168823860439, |
| "grad_norm": 0.3041199542435297, |
| "learning_rate": 3.5168961201501876e-05, |
| "loss": 0.3571, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.1012943162633653, |
| "grad_norm": 0.31637443077212757, |
| "learning_rate": 3.514810179390905e-05, |
| "loss": 0.3703, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.1024198086662915, |
| "grad_norm": 0.33113581691565325, |
| "learning_rate": 3.512724238631623e-05, |
| "loss": 0.35, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.1035453010692178, |
| "grad_norm": 0.3300457711599469, |
| "learning_rate": 3.5106382978723407e-05, |
| "loss": 0.3485, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.104670793472144, |
| "grad_norm": 0.37342013448224476, |
| "learning_rate": 3.508552357113058e-05, |
| "loss": 0.3543, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.1057962858750703, |
| "grad_norm": 0.36084265787497494, |
| "learning_rate": 3.506466416353776e-05, |
| "loss": 0.3499, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.1069217782779965, |
| "grad_norm": 0.36650053348727774, |
| "learning_rate": 3.504380475594493e-05, |
| "loss": 0.3727, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.108047270680923, |
| "grad_norm": 0.38335191540233127, |
| "learning_rate": 3.502294534835211e-05, |
| "loss": 0.3557, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.1091727630838493, |
| "grad_norm": 0.36320976195356514, |
| "learning_rate": 3.5002085940759285e-05, |
| "loss": 0.382, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.1102982554867755, |
| "grad_norm": 0.38636958474248506, |
| "learning_rate": 3.498122653316646e-05, |
| "loss": 0.3402, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.1114237478897018, |
| "grad_norm": 0.38017701551768956, |
| "learning_rate": 3.496036712557363e-05, |
| "loss": 0.3742, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.112549240292628, |
| "grad_norm": 0.3198258149962093, |
| "learning_rate": 3.4939507717980816e-05, |
| "loss": 0.3432, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.1136747326955543, |
| "grad_norm": 0.38060186204014107, |
| "learning_rate": 3.4918648310387986e-05, |
| "loss": 0.364, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.1148002250984805, |
| "grad_norm": 0.3522538503310745, |
| "learning_rate": 3.489778890279516e-05, |
| "loss": 0.3862, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.1159257175014068, |
| "grad_norm": 0.34893950721299544, |
| "learning_rate": 3.487692949520234e-05, |
| "loss": 0.3674, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.117051209904333, |
| "grad_norm": 0.3145664530999275, |
| "learning_rate": 3.485607008760952e-05, |
| "loss": 0.3623, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.1181767023072595, |
| "grad_norm": 0.38231007603706296, |
| "learning_rate": 3.483521068001669e-05, |
| "loss": 0.3513, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.1193021947101858, |
| "grad_norm": 0.29574406471189, |
| "learning_rate": 3.481435127242387e-05, |
| "loss": 0.3686, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.120427687113112, |
| "grad_norm": 0.3786384191919254, |
| "learning_rate": 3.479349186483104e-05, |
| "loss": 0.3496, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.1215531795160383, |
| "grad_norm": 0.27933782961377807, |
| "learning_rate": 3.477263245723821e-05, |
| "loss": 0.3865, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.1226786719189645, |
| "grad_norm": 0.3796958540762593, |
| "learning_rate": 3.4751773049645395e-05, |
| "loss": 0.3701, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.1238041643218908, |
| "grad_norm": 0.31019085193512064, |
| "learning_rate": 3.4730913642052565e-05, |
| "loss": 0.3544, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.124929656724817, |
| "grad_norm": 0.3894747761447629, |
| "learning_rate": 3.471005423445974e-05, |
| "loss": 0.3613, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1260551491277433, |
| "grad_norm": 0.3848999285142024, |
| "learning_rate": 3.468919482686692e-05, |
| "loss": 0.3572, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.1271806415306695, |
| "grad_norm": 0.4075083886945119, |
| "learning_rate": 3.4668335419274096e-05, |
| "loss": 0.3534, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.128306133933596, |
| "grad_norm": 0.4244922841249029, |
| "learning_rate": 3.4647476011681266e-05, |
| "loss": 0.3857, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.1294316263365223, |
| "grad_norm": 0.3575947287049676, |
| "learning_rate": 3.462661660408844e-05, |
| "loss": 0.3494, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.1305571187394485, |
| "grad_norm": 0.3920246518678635, |
| "learning_rate": 3.460575719649562e-05, |
| "loss": 0.3693, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.1316826111423748, |
| "grad_norm": 0.3065280136400847, |
| "learning_rate": 3.45848977889028e-05, |
| "loss": 0.3352, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.132808103545301, |
| "grad_norm": 0.38525744406438595, |
| "learning_rate": 3.4564038381309974e-05, |
| "loss": 0.353, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.1339335959482273, |
| "grad_norm": 0.47272322177864035, |
| "learning_rate": 3.454317897371715e-05, |
| "loss": 0.3673, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.1350590883511535, |
| "grad_norm": 0.3327944075995892, |
| "learning_rate": 3.452231956612432e-05, |
| "loss": 0.3523, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.13618458075408, |
| "grad_norm": 0.42906579303424525, |
| "learning_rate": 3.45014601585315e-05, |
| "loss": 0.3577, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.1373100731570063, |
| "grad_norm": 0.31630743768076713, |
| "learning_rate": 3.4480600750938675e-05, |
| "loss": 0.3571, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.1384355655599325, |
| "grad_norm": 0.41005007736044136, |
| "learning_rate": 3.445974134334585e-05, |
| "loss": 0.362, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.1395610579628588, |
| "grad_norm": 0.3846148750924408, |
| "learning_rate": 3.443888193575302e-05, |
| "loss": 0.3554, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.140686550365785, |
| "grad_norm": 0.39499988480138304, |
| "learning_rate": 3.4418022528160206e-05, |
| "loss": 0.367, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.1418120427687113, |
| "grad_norm": 0.35657946077097175, |
| "learning_rate": 3.4397163120567377e-05, |
| "loss": 0.3694, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.1429375351716375, |
| "grad_norm": 0.3728438143327632, |
| "learning_rate": 3.4376303712974554e-05, |
| "loss": 0.3713, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.1440630275745638, |
| "grad_norm": 0.34659822653002426, |
| "learning_rate": 3.435544430538173e-05, |
| "loss": 0.3584, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.14518851997749, |
| "grad_norm": 0.3828982028856398, |
| "learning_rate": 3.43345848977889e-05, |
| "loss": 0.357, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.1463140123804165, |
| "grad_norm": 0.35840428604352054, |
| "learning_rate": 3.431372549019608e-05, |
| "loss": 0.3658, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.1474395047833428, |
| "grad_norm": 0.3642341763560189, |
| "learning_rate": 3.4292866082603255e-05, |
| "loss": 0.3768, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.148564997186269, |
| "grad_norm": 0.47028026081900165, |
| "learning_rate": 3.427200667501043e-05, |
| "loss": 0.3448, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.1496904895891953, |
| "grad_norm": 0.33137638092807364, |
| "learning_rate": 3.42511472674176e-05, |
| "loss": 0.3855, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.1508159819921215, |
| "grad_norm": 0.4049631157313659, |
| "learning_rate": 3.4230287859824786e-05, |
| "loss": 0.3801, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.1519414743950478, |
| "grad_norm": 0.3829633936239526, |
| "learning_rate": 3.4209428452231956e-05, |
| "loss": 0.3791, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.153066966797974, |
| "grad_norm": 0.42759635786809663, |
| "learning_rate": 3.418856904463913e-05, |
| "loss": 0.3676, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.1541924592009003, |
| "grad_norm": 0.3728776125817692, |
| "learning_rate": 3.416770963704631e-05, |
| "loss": 0.3622, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.1553179516038266, |
| "grad_norm": 0.39380341402257635, |
| "learning_rate": 3.414685022945349e-05, |
| "loss": 0.3785, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.156443444006753, |
| "grad_norm": 0.32076593702973827, |
| "learning_rate": 3.412599082186066e-05, |
| "loss": 0.3745, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.1575689364096793, |
| "grad_norm": 0.32908758752319733, |
| "learning_rate": 3.410513141426784e-05, |
| "loss": 0.3496, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.1586944288126055, |
| "grad_norm": 0.41768970871312155, |
| "learning_rate": 3.408427200667501e-05, |
| "loss": 0.3575, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.1598199212155318, |
| "grad_norm": 0.3106359891104045, |
| "learning_rate": 3.406341259908219e-05, |
| "loss": 0.3516, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.160945413618458, |
| "grad_norm": 0.3870701068020313, |
| "learning_rate": 3.4042553191489365e-05, |
| "loss": 0.3356, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.1620709060213843, |
| "grad_norm": 0.38611106269123546, |
| "learning_rate": 3.402169378389654e-05, |
| "loss": 0.3469, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.1631963984243106, |
| "grad_norm": 0.3255124156021805, |
| "learning_rate": 3.400083437630371e-05, |
| "loss": 0.3722, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.164321890827237, |
| "grad_norm": 0.32836642792719567, |
| "learning_rate": 3.3979974968710896e-05, |
| "loss": 0.3544, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.1654473832301633, |
| "grad_norm": 0.3805911934596958, |
| "learning_rate": 3.3959115561118066e-05, |
| "loss": 0.3982, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.1665728756330895, |
| "grad_norm": 0.3368162160417577, |
| "learning_rate": 3.393825615352524e-05, |
| "loss": 0.3679, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.1676983680360158, |
| "grad_norm": 0.31363563073754847, |
| "learning_rate": 3.391739674593242e-05, |
| "loss": 0.3529, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.168823860438942, |
| "grad_norm": 0.34006739877010494, |
| "learning_rate": 3.389653733833959e-05, |
| "loss": 0.3463, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.1699493528418683, |
| "grad_norm": 0.3100061821836274, |
| "learning_rate": 3.387567793074677e-05, |
| "loss": 0.3381, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.1710748452447945, |
| "grad_norm": 0.3065807803890228, |
| "learning_rate": 3.3854818523153944e-05, |
| "loss": 0.3651, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.1722003376477208, |
| "grad_norm": 0.32611882573130585, |
| "learning_rate": 3.383395911556112e-05, |
| "loss": 0.3529, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.173325830050647, |
| "grad_norm": 0.28895452201759864, |
| "learning_rate": 3.381309970796829e-05, |
| "loss": 0.3307, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.1744513224535735, |
| "grad_norm": 0.31616663311663623, |
| "learning_rate": 3.379224030037547e-05, |
| "loss": 0.3615, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.1755768148564998, |
| "grad_norm": 0.2999011173077538, |
| "learning_rate": 3.3771380892782645e-05, |
| "loss": 0.3527, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.176702307259426, |
| "grad_norm": 0.28604936736274933, |
| "learning_rate": 3.375052148518982e-05, |
| "loss": 0.361, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.1778277996623523, |
| "grad_norm": 0.3028269137775988, |
| "learning_rate": 3.3729662077597e-05, |
| "loss": 0.3668, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.1789532920652785, |
| "grad_norm": 0.36698195409495143, |
| "learning_rate": 3.3708802670004176e-05, |
| "loss": 0.352, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.1800787844682048, |
| "grad_norm": 0.2951939270230831, |
| "learning_rate": 3.3687943262411347e-05, |
| "loss": 0.3533, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.181204276871131, |
| "grad_norm": 0.4064761843327334, |
| "learning_rate": 3.3667083854818524e-05, |
| "loss": 0.3601, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.1823297692740573, |
| "grad_norm": 0.325934767924338, |
| "learning_rate": 3.36462244472257e-05, |
| "loss": 0.366, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.1834552616769836, |
| "grad_norm": 0.3444374492643726, |
| "learning_rate": 3.362536503963288e-05, |
| "loss": 0.3591, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.18458075407991, |
| "grad_norm": 0.3902013079098464, |
| "learning_rate": 3.360450563204005e-05, |
| "loss": 0.3609, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.1857062464828363, |
| "grad_norm": 0.3552567977795283, |
| "learning_rate": 3.358364622444723e-05, |
| "loss": 0.3824, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.1868317388857625, |
| "grad_norm": 0.5473634143542325, |
| "learning_rate": 3.35627868168544e-05, |
| "loss": 0.344, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.1879572312886888, |
| "grad_norm": 0.31822857141954713, |
| "learning_rate": 3.354192740926158e-05, |
| "loss": 0.34, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.189082723691615, |
| "grad_norm": 0.35648383062484057, |
| "learning_rate": 3.3521068001668756e-05, |
| "loss": 0.3664, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.1902082160945413, |
| "grad_norm": 0.3533726981414865, |
| "learning_rate": 3.350020859407593e-05, |
| "loss": 0.3643, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.1913337084974676, |
| "grad_norm": 0.38846901904691766, |
| "learning_rate": 3.34793491864831e-05, |
| "loss": 0.364, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.192459200900394, |
| "grad_norm": 0.32829805282614477, |
| "learning_rate": 3.345848977889028e-05, |
| "loss": 0.3505, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.1935846933033203, |
| "grad_norm": 0.3371243132688832, |
| "learning_rate": 3.343763037129746e-05, |
| "loss": 0.3706, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.1947101857062465, |
| "grad_norm": 0.29390329610439453, |
| "learning_rate": 3.341677096370463e-05, |
| "loss": 0.3513, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.1958356781091728, |
| "grad_norm": 0.3589333659631211, |
| "learning_rate": 3.339591155611181e-05, |
| "loss": 0.364, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.196961170512099, |
| "grad_norm": 0.3025901807833534, |
| "learning_rate": 3.337505214851898e-05, |
| "loss": 0.3716, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.1980866629150253, |
| "grad_norm": 0.2990903113895738, |
| "learning_rate": 3.335419274092616e-05, |
| "loss": 0.3703, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.1992121553179516, |
| "grad_norm": 0.3084522992492389, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.3423, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.2003376477208778, |
| "grad_norm": 0.2833543979726358, |
| "learning_rate": 3.331247392574051e-05, |
| "loss": 0.3794, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.201463140123804, |
| "grad_norm": 0.3502254927161911, |
| "learning_rate": 3.329161451814768e-05, |
| "loss": 0.3588, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.2025886325267305, |
| "grad_norm": 0.2824861573083505, |
| "learning_rate": 3.3270755110554866e-05, |
| "loss": 0.3375, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.2037141249296568, |
| "grad_norm": 0.32275485870283527, |
| "learning_rate": 3.3249895702962036e-05, |
| "loss": 0.3659, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.204839617332583, |
| "grad_norm": 0.2831771258197277, |
| "learning_rate": 3.322903629536921e-05, |
| "loss": 0.3608, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.2059651097355093, |
| "grad_norm": 0.4099026461053303, |
| "learning_rate": 3.320817688777639e-05, |
| "loss": 0.3657, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.2070906021384356, |
| "grad_norm": 0.2988459528156424, |
| "learning_rate": 3.318731748018357e-05, |
| "loss": 0.3612, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.2082160945413618, |
| "grad_norm": 0.3285103143387034, |
| "learning_rate": 3.316645807259074e-05, |
| "loss": 0.3243, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.209341586944288, |
| "grad_norm": 0.3140866233456728, |
| "learning_rate": 3.314559866499792e-05, |
| "loss": 0.3605, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.2104670793472143, |
| "grad_norm": 0.3136304362130377, |
| "learning_rate": 3.312473925740509e-05, |
| "loss": 0.3518, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.2115925717501406, |
| "grad_norm": 0.37704920383565566, |
| "learning_rate": 3.310387984981227e-05, |
| "loss": 0.3505, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.212718064153067, |
| "grad_norm": 0.3386484501892276, |
| "learning_rate": 3.3083020442219445e-05, |
| "loss": 0.3581, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.2138435565559933, |
| "grad_norm": 0.2756287445653643, |
| "learning_rate": 3.306216103462662e-05, |
| "loss": 0.3508, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.2149690489589196, |
| "grad_norm": 0.3727446701400803, |
| "learning_rate": 3.304130162703379e-05, |
| "loss": 0.3637, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.2160945413618458, |
| "grad_norm": 0.34205106067470487, |
| "learning_rate": 3.302044221944097e-05, |
| "loss": 0.3719, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.217220033764772, |
| "grad_norm": 0.277943785807938, |
| "learning_rate": 3.2999582811848146e-05, |
| "loss": 0.3663, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.2183455261676983, |
| "grad_norm": 0.32778887865165535, |
| "learning_rate": 3.2978723404255317e-05, |
| "loss": 0.3788, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.2194710185706246, |
| "grad_norm": 0.35850613973050943, |
| "learning_rate": 3.2957863996662494e-05, |
| "loss": 0.3516, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.220596510973551, |
| "grad_norm": 0.32265446214334986, |
| "learning_rate": 3.293700458906967e-05, |
| "loss": 0.3637, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.2217220033764773, |
| "grad_norm": 0.32337082624436203, |
| "learning_rate": 3.291614518147685e-05, |
| "loss": 0.353, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.2228474957794035, |
| "grad_norm": 0.3566976026538077, |
| "learning_rate": 3.2895285773884024e-05, |
| "loss": 0.3702, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.2239729881823298, |
| "grad_norm": 0.3602820801303339, |
| "learning_rate": 3.28744263662912e-05, |
| "loss": 0.3478, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.225098480585256, |
| "grad_norm": 0.3167430696040855, |
| "learning_rate": 3.285356695869837e-05, |
| "loss": 0.3659, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.2262239729881823, |
| "grad_norm": 0.3018055329469023, |
| "learning_rate": 3.283270755110555e-05, |
| "loss": 0.3588, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.2273494653911086, |
| "grad_norm": 0.3230175863991661, |
| "learning_rate": 3.2811848143512726e-05, |
| "loss": 0.3843, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.2284749577940348, |
| "grad_norm": 0.33437635395778137, |
| "learning_rate": 3.27909887359199e-05, |
| "loss": 0.3767, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.229600450196961, |
| "grad_norm": 0.2981951069574055, |
| "learning_rate": 3.277012932832707e-05, |
| "loss": 0.3595, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.2307259425998875, |
| "grad_norm": 0.3380111260459277, |
| "learning_rate": 3.2749269920734257e-05, |
| "loss": 0.3709, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.2318514350028138, |
| "grad_norm": 0.3105746941727841, |
| "learning_rate": 3.272841051314143e-05, |
| "loss": 0.3683, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.23297692740574, |
| "grad_norm": 0.30933018515689015, |
| "learning_rate": 3.2707551105548604e-05, |
| "loss": 0.3592, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.2341024198086663, |
| "grad_norm": 0.3281758704709362, |
| "learning_rate": 3.268669169795578e-05, |
| "loss": 0.3606, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.2352279122115926, |
| "grad_norm": 0.27019773248184603, |
| "learning_rate": 3.266583229036296e-05, |
| "loss": 0.3761, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.2363534046145188, |
| "grad_norm": 0.3336597777910512, |
| "learning_rate": 3.264497288277013e-05, |
| "loss": 0.3959, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.237478897017445, |
| "grad_norm": 0.31907940795687006, |
| "learning_rate": 3.262411347517731e-05, |
| "loss": 0.3615, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2386043894203713, |
| "grad_norm": 0.3307050654215005, |
| "learning_rate": 3.260325406758448e-05, |
| "loss": 0.3705, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.2397298818232976, |
| "grad_norm": 0.31539392998297533, |
| "learning_rate": 3.258239465999165e-05, |
| "loss": 0.3495, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.240855374226224, |
| "grad_norm": 0.33795761226174353, |
| "learning_rate": 3.2561535252398836e-05, |
| "loss": 0.3268, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.2419808666291503, |
| "grad_norm": 0.3027820655749601, |
| "learning_rate": 3.2540675844806006e-05, |
| "loss": 0.3408, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.2431063590320766, |
| "grad_norm": 0.3679583282318403, |
| "learning_rate": 3.251981643721318e-05, |
| "loss": 0.3677, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.2442318514350028, |
| "grad_norm": 0.3613263755121883, |
| "learning_rate": 3.249895702962036e-05, |
| "loss": 0.3829, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.245357343837929, |
| "grad_norm": 0.24750292423243272, |
| "learning_rate": 3.247809762202754e-05, |
| "loss": 0.3482, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.2464828362408553, |
| "grad_norm": 0.3522480210531916, |
| "learning_rate": 3.245723821443471e-05, |
| "loss": 0.3625, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.2476083286437816, |
| "grad_norm": 0.31652713432842655, |
| "learning_rate": 3.243637880684189e-05, |
| "loss": 0.3671, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.248733821046708, |
| "grad_norm": 0.2933496822923214, |
| "learning_rate": 3.241551939924906e-05, |
| "loss": 0.3497, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.2498593134496343, |
| "grad_norm": 0.32050895251241057, |
| "learning_rate": 3.239465999165624e-05, |
| "loss": 0.3716, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.2509848058525606, |
| "grad_norm": 0.3121935413715743, |
| "learning_rate": 3.2373800584063415e-05, |
| "loss": 0.3771, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.2521102982554868, |
| "grad_norm": 0.32265588805772627, |
| "learning_rate": 3.235294117647059e-05, |
| "loss": 0.3439, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.253235790658413, |
| "grad_norm": 0.3064712619565091, |
| "learning_rate": 3.233208176887776e-05, |
| "loss": 0.3546, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.2543612830613393, |
| "grad_norm": 0.2996084699077036, |
| "learning_rate": 3.2311222361284946e-05, |
| "loss": 0.3833, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.2554867754642656, |
| "grad_norm": 0.3107489193677045, |
| "learning_rate": 3.2290362953692116e-05, |
| "loss": 0.364, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.2566122678671918, |
| "grad_norm": 0.29187918061969403, |
| "learning_rate": 3.226950354609929e-05, |
| "loss": 0.3504, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.257737760270118, |
| "grad_norm": 0.2941379965996245, |
| "learning_rate": 3.224864413850647e-05, |
| "loss": 0.3392, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.2588632526730446, |
| "grad_norm": 0.25115923486308955, |
| "learning_rate": 3.222778473091365e-05, |
| "loss": 0.3876, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.2599887450759708, |
| "grad_norm": 0.3252010811279875, |
| "learning_rate": 3.220692532332082e-05, |
| "loss": 0.3375, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.261114237478897, |
| "grad_norm": 0.29814156629055977, |
| "learning_rate": 3.2186065915727994e-05, |
| "loss": 0.3401, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.2622397298818233, |
| "grad_norm": 0.31902570430326976, |
| "learning_rate": 3.216520650813517e-05, |
| "loss": 0.3732, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.2633652222847496, |
| "grad_norm": 0.3010703802720578, |
| "learning_rate": 3.214434710054234e-05, |
| "loss": 0.358, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.2644907146876758, |
| "grad_norm": 0.32852710550779146, |
| "learning_rate": 3.2123487692949525e-05, |
| "loss": 0.3525, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.265616207090602, |
| "grad_norm": 0.32212180119638056, |
| "learning_rate": 3.2102628285356696e-05, |
| "loss": 0.3749, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.2667416994935286, |
| "grad_norm": 0.378384113691716, |
| "learning_rate": 3.208176887776387e-05, |
| "loss": 0.3669, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.2678671918964546, |
| "grad_norm": 0.31165403587755924, |
| "learning_rate": 3.206090947017105e-05, |
| "loss": 0.3559, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.268992684299381, |
| "grad_norm": 0.3679615615830758, |
| "learning_rate": 3.2040050062578227e-05, |
| "loss": 0.3623, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.2701181767023073, |
| "grad_norm": 0.3467806488910905, |
| "learning_rate": 3.20191906549854e-05, |
| "loss": 0.3771, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.2712436691052336, |
| "grad_norm": 1.9333645104311041, |
| "learning_rate": 3.1998331247392574e-05, |
| "loss": 0.3809, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.2723691615081598, |
| "grad_norm": 0.4215069325465578, |
| "learning_rate": 3.197747183979975e-05, |
| "loss": 0.3489, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.273494653911086, |
| "grad_norm": 0.2879811482225369, |
| "learning_rate": 3.195661243220693e-05, |
| "loss": 0.3627, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.2746201463140123, |
| "grad_norm": 0.4477759704739148, |
| "learning_rate": 3.19357530246141e-05, |
| "loss": 0.3623, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.2757456387169386, |
| "grad_norm": 0.3424164269682256, |
| "learning_rate": 3.191489361702128e-05, |
| "loss": 0.3476, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.276871131119865, |
| "grad_norm": 0.32862691867356353, |
| "learning_rate": 3.189403420942845e-05, |
| "loss": 0.3649, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.277996623522791, |
| "grad_norm": 0.3209270264744574, |
| "learning_rate": 3.187317480183563e-05, |
| "loss": 0.3535, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.2791221159257176, |
| "grad_norm": 0.3565891148820592, |
| "learning_rate": 3.1852315394242806e-05, |
| "loss": 0.3443, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.2802476083286438, |
| "grad_norm": 0.28408074419058515, |
| "learning_rate": 3.183145598664998e-05, |
| "loss": 0.369, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.28137310073157, |
| "grad_norm": 0.3637840011075196, |
| "learning_rate": 3.181059657905715e-05, |
| "loss": 0.3608, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.2824985931344963, |
| "grad_norm": 0.3595209908718878, |
| "learning_rate": 3.178973717146434e-05, |
| "loss": 0.3493, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.2836240855374226, |
| "grad_norm": 0.26496883846043384, |
| "learning_rate": 3.176887776387151e-05, |
| "loss": 0.3633, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.2847495779403488, |
| "grad_norm": 0.3336179309407727, |
| "learning_rate": 3.1748018356278684e-05, |
| "loss": 0.3662, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.285875070343275, |
| "grad_norm": 0.32668676414933834, |
| "learning_rate": 3.172715894868586e-05, |
| "loss": 0.3671, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.2870005627462016, |
| "grad_norm": 0.31252062188747054, |
| "learning_rate": 3.170629954109303e-05, |
| "loss": 0.3647, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.2881260551491278, |
| "grad_norm": 0.31744497936057164, |
| "learning_rate": 3.168544013350021e-05, |
| "loss": 0.3622, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.289251547552054, |
| "grad_norm": 0.2862050055745393, |
| "learning_rate": 3.1664580725907385e-05, |
| "loss": 0.3883, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.2903770399549803, |
| "grad_norm": 0.30021118499678395, |
| "learning_rate": 3.164372131831456e-05, |
| "loss": 0.3579, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.2915025323579066, |
| "grad_norm": 0.2910467656286127, |
| "learning_rate": 3.162286191072173e-05, |
| "loss": 0.3534, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.2926280247608328, |
| "grad_norm": 0.28678455388133556, |
| "learning_rate": 3.1602002503128916e-05, |
| "loss": 0.3497, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.293753517163759, |
| "grad_norm": 0.27836486011517614, |
| "learning_rate": 3.1581143095536086e-05, |
| "loss": 0.3443, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.2948790095666856, |
| "grad_norm": 0.30812952315806486, |
| "learning_rate": 3.156028368794326e-05, |
| "loss": 0.3893, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.2960045019696116, |
| "grad_norm": 0.2874885069684301, |
| "learning_rate": 3.153942428035044e-05, |
| "loss": 0.3685, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.297129994372538, |
| "grad_norm": 0.3347706854010768, |
| "learning_rate": 3.151856487275762e-05, |
| "loss": 0.3927, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.2982554867754643, |
| "grad_norm": 0.32176469835749927, |
| "learning_rate": 3.149770546516479e-05, |
| "loss": 0.38, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.2993809791783906, |
| "grad_norm": 0.2898256632538439, |
| "learning_rate": 3.147684605757197e-05, |
| "loss": 0.3616, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.3005064715813168, |
| "grad_norm": 0.33352792276895776, |
| "learning_rate": 3.145598664997914e-05, |
| "loss": 0.3748, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.301631963984243, |
| "grad_norm": 0.3099488701941323, |
| "learning_rate": 3.143512724238632e-05, |
| "loss": 0.372, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.3027574563871693, |
| "grad_norm": 0.30363398725151736, |
| "learning_rate": 3.1414267834793495e-05, |
| "loss": 0.3701, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.3038829487900956, |
| "grad_norm": 0.28447681088914367, |
| "learning_rate": 3.139340842720067e-05, |
| "loss": 0.3774, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.305008441193022, |
| "grad_norm": 0.3003448198337203, |
| "learning_rate": 3.137254901960784e-05, |
| "loss": 0.3598, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.306133933595948, |
| "grad_norm": 0.3379084483923677, |
| "learning_rate": 3.135168961201502e-05, |
| "loss": 0.3708, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.3072594259988746, |
| "grad_norm": 0.28091894310377574, |
| "learning_rate": 3.13308302044222e-05, |
| "loss": 0.3718, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.3083849184018008, |
| "grad_norm": 0.33666696805419777, |
| "learning_rate": 3.1309970796829374e-05, |
| "loss": 0.3521, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.309510410804727, |
| "grad_norm": 0.2784271381389026, |
| "learning_rate": 3.128911138923655e-05, |
| "loss": 0.365, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.3106359032076533, |
| "grad_norm": 0.32996125555463496, |
| "learning_rate": 3.126825198164372e-05, |
| "loss": 0.3663, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.3117613956105796, |
| "grad_norm": 0.29339874231665497, |
| "learning_rate": 3.12473925740509e-05, |
| "loss": 0.3579, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.3128868880135058, |
| "grad_norm": 0.27539689603551204, |
| "learning_rate": 3.1226533166458075e-05, |
| "loss": 0.3428, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.314012380416432, |
| "grad_norm": 0.30586051604779685, |
| "learning_rate": 3.120567375886525e-05, |
| "loss": 0.3742, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.3151378728193586, |
| "grad_norm": 0.318506663490862, |
| "learning_rate": 3.118481435127242e-05, |
| "loss": 0.3555, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.3162633652222848, |
| "grad_norm": 0.32193633165736774, |
| "learning_rate": 3.11639549436796e-05, |
| "loss": 0.3798, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.317388857625211, |
| "grad_norm": 0.26006223637970205, |
| "learning_rate": 3.1143095536086776e-05, |
| "loss": 0.3688, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.3185143500281373, |
| "grad_norm": 0.3091644393869938, |
| "learning_rate": 3.112223612849395e-05, |
| "loss": 0.344, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.3196398424310636, |
| "grad_norm": 0.2950119209666744, |
| "learning_rate": 3.110137672090112e-05, |
| "loss": 0.3807, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.3207653348339898, |
| "grad_norm": 0.2975497460062189, |
| "learning_rate": 3.108051731330831e-05, |
| "loss": 0.3581, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.321890827236916, |
| "grad_norm": 0.3248317526622501, |
| "learning_rate": 3.105965790571548e-05, |
| "loss": 0.3455, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.3230163196398426, |
| "grad_norm": 0.28753503994393237, |
| "learning_rate": 3.1038798498122654e-05, |
| "loss": 0.3526, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.3241418120427686, |
| "grad_norm": 0.309321520103074, |
| "learning_rate": 3.101793909052983e-05, |
| "loss": 0.3671, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.325267304445695, |
| "grad_norm": 0.31093843252643993, |
| "learning_rate": 3.099707968293701e-05, |
| "loss": 0.3797, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.3263927968486213, |
| "grad_norm": 0.2941320554481767, |
| "learning_rate": 3.097622027534418e-05, |
| "loss": 0.3656, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.3275182892515476, |
| "grad_norm": 0.33353760439258306, |
| "learning_rate": 3.095536086775136e-05, |
| "loss": 0.3659, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.3286437816544738, |
| "grad_norm": 0.2569769588199655, |
| "learning_rate": 3.093450146015853e-05, |
| "loss": 0.3842, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.3297692740574, |
| "grad_norm": 0.3330169931726158, |
| "learning_rate": 3.091364205256571e-05, |
| "loss": 0.3643, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.3308947664603263, |
| "grad_norm": 0.2876950022849873, |
| "learning_rate": 3.0892782644972886e-05, |
| "loss": 0.357, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.3320202588632526, |
| "grad_norm": 0.31915621662192034, |
| "learning_rate": 3.087192323738006e-05, |
| "loss": 0.3817, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.333145751266179, |
| "grad_norm": 0.31039603557721346, |
| "learning_rate": 3.085106382978723e-05, |
| "loss": 0.3705, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.334271243669105, |
| "grad_norm": 0.35493105743167, |
| "learning_rate": 3.083020442219441e-05, |
| "loss": 0.375, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.3353967360720316, |
| "grad_norm": 0.2887229895411605, |
| "learning_rate": 3.080934501460159e-05, |
| "loss": 0.3705, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.3365222284749578, |
| "grad_norm": 0.33105501034844814, |
| "learning_rate": 3.078848560700876e-05, |
| "loss": 0.3517, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.337647720877884, |
| "grad_norm": 0.296938807012652, |
| "learning_rate": 3.076762619941594e-05, |
| "loss": 0.3569, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.3387732132808103, |
| "grad_norm": 0.2631825890999598, |
| "learning_rate": 3.074676679182311e-05, |
| "loss": 0.3719, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.3398987056837366, |
| "grad_norm": 0.3302060927278823, |
| "learning_rate": 3.072590738423029e-05, |
| "loss": 0.357, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.3410241980866628, |
| "grad_norm": 0.284964127664028, |
| "learning_rate": 3.0705047976637465e-05, |
| "loss": 0.3637, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.342149690489589, |
| "grad_norm": 0.3450486482153124, |
| "learning_rate": 3.068418856904464e-05, |
| "loss": 0.3545, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.3432751828925156, |
| "grad_norm": 0.3097272198664404, |
| "learning_rate": 3.066332916145181e-05, |
| "loss": 0.3615, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.3444006752954418, |
| "grad_norm": 0.31428949130893125, |
| "learning_rate": 3.0642469753858996e-05, |
| "loss": 0.3577, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.345526167698368, |
| "grad_norm": 0.3459630485656923, |
| "learning_rate": 3.062161034626617e-05, |
| "loss": 0.38, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.3466516601012943, |
| "grad_norm": 0.34840227455144135, |
| "learning_rate": 3.0600750938673344e-05, |
| "loss": 0.3731, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.3477771525042206, |
| "grad_norm": 0.36198967619880806, |
| "learning_rate": 3.057989153108052e-05, |
| "loss": 0.3637, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.3489026449071468, |
| "grad_norm": 0.36980762481338214, |
| "learning_rate": 3.05590321234877e-05, |
| "loss": 0.3545, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.350028137310073, |
| "grad_norm": 0.34738553940185973, |
| "learning_rate": 3.053817271589487e-05, |
| "loss": 0.358, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3511536297129996, |
| "grad_norm": 0.28899221690961746, |
| "learning_rate": 3.0517313308302048e-05, |
| "loss": 0.3692, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.3522791221159256, |
| "grad_norm": 0.348414484092682, |
| "learning_rate": 3.0496453900709222e-05, |
| "loss": 0.3562, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.353404614518852, |
| "grad_norm": 0.28821763744716605, |
| "learning_rate": 3.04755944931164e-05, |
| "loss": 0.3623, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.3545301069217783, |
| "grad_norm": 0.34701713808150375, |
| "learning_rate": 3.0454735085523572e-05, |
| "loss": 0.3667, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.3556555993247046, |
| "grad_norm": 0.34731368442368854, |
| "learning_rate": 3.0433875677930746e-05, |
| "loss": 0.3767, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.3567810917276308, |
| "grad_norm": 0.2873367344993991, |
| "learning_rate": 3.0413016270337923e-05, |
| "loss": 0.3594, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.357906584130557, |
| "grad_norm": 0.33455871764963324, |
| "learning_rate": 3.0392156862745097e-05, |
| "loss": 0.3649, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.3590320765334833, |
| "grad_norm": 0.3444401979121362, |
| "learning_rate": 3.0371297455152277e-05, |
| "loss": 0.3656, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.3601575689364096, |
| "grad_norm": 0.2899964778052406, |
| "learning_rate": 3.0350438047559447e-05, |
| "loss": 0.3457, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.361283061339336, |
| "grad_norm": 0.33907151317470086, |
| "learning_rate": 3.0329578639966627e-05, |
| "loss": 0.3627, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.362408553742262, |
| "grad_norm": 0.3044719983267248, |
| "learning_rate": 3.03087192323738e-05, |
| "loss": 0.3654, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.3635340461451886, |
| "grad_norm": 0.3161562860694256, |
| "learning_rate": 3.0287859824780978e-05, |
| "loss": 0.342, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.3646595385481148, |
| "grad_norm": 0.3068022604762919, |
| "learning_rate": 3.026700041718815e-05, |
| "loss": 0.3539, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.365785030951041, |
| "grad_norm": 0.2850677267934718, |
| "learning_rate": 3.024614100959533e-05, |
| "loss": 0.3725, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.3669105233539673, |
| "grad_norm": 0.3160554970609396, |
| "learning_rate": 3.0225281602002502e-05, |
| "loss": 0.3868, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.3680360157568936, |
| "grad_norm": 0.3373572652606873, |
| "learning_rate": 3.0204422194409683e-05, |
| "loss": 0.3711, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.3691615081598199, |
| "grad_norm": 0.27352074329674897, |
| "learning_rate": 3.0183562786816856e-05, |
| "loss": 0.3593, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.370287000562746, |
| "grad_norm": 0.32866686300985715, |
| "learning_rate": 3.0162703379224033e-05, |
| "loss": 0.3547, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.3714124929656726, |
| "grad_norm": 0.28785334578687116, |
| "learning_rate": 3.0141843971631207e-05, |
| "loss": 0.3703, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.3725379853685988, |
| "grad_norm": 0.3336610653280944, |
| "learning_rate": 3.0120984564038384e-05, |
| "loss": 0.3594, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.373663477771525, |
| "grad_norm": 0.3252899566864213, |
| "learning_rate": 3.0100125156445557e-05, |
| "loss": 0.3666, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.3747889701744513, |
| "grad_norm": 0.34272700759634595, |
| "learning_rate": 3.0079265748852738e-05, |
| "loss": 0.3673, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.3759144625773776, |
| "grad_norm": 0.2839456991740852, |
| "learning_rate": 3.0058406341259908e-05, |
| "loss": 0.3731, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.3770399549803038, |
| "grad_norm": 0.34144486456169987, |
| "learning_rate": 3.0037546933667088e-05, |
| "loss": 0.3535, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.37816544738323, |
| "grad_norm": 0.3510452096605386, |
| "learning_rate": 3.0016687526074262e-05, |
| "loss": 0.3783, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.3792909397861566, |
| "grad_norm": 0.3219709429564443, |
| "learning_rate": 2.9995828118481435e-05, |
| "loss": 0.3653, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.3804164321890826, |
| "grad_norm": 0.2922536824083754, |
| "learning_rate": 2.9974968710888612e-05, |
| "loss": 0.3487, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.381541924592009, |
| "grad_norm": 0.3146465080311366, |
| "learning_rate": 2.9954109303295786e-05, |
| "loss": 0.3582, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.3826674169949353, |
| "grad_norm": 0.3202141542926466, |
| "learning_rate": 2.9933249895702963e-05, |
| "loss": 0.3767, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.3837929093978616, |
| "grad_norm": 0.3126664207698992, |
| "learning_rate": 2.9912390488110137e-05, |
| "loss": 0.3861, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.3849184018007878, |
| "grad_norm": 0.3006754510665695, |
| "learning_rate": 2.9891531080517317e-05, |
| "loss": 0.3826, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.386043894203714, |
| "grad_norm": 0.34183231562741717, |
| "learning_rate": 2.9870671672924487e-05, |
| "loss": 0.3384, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.3871693866066404, |
| "grad_norm": 0.2981637621431096, |
| "learning_rate": 2.9849812265331668e-05, |
| "loss": 0.3644, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.3882948790095666, |
| "grad_norm": 0.32927113911951866, |
| "learning_rate": 2.982895285773884e-05, |
| "loss": 0.37, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.389420371412493, |
| "grad_norm": 0.3516964621170918, |
| "learning_rate": 2.9808093450146018e-05, |
| "loss": 0.3734, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.3905458638154191, |
| "grad_norm": 0.28294383540669815, |
| "learning_rate": 2.9787234042553192e-05, |
| "loss": 0.3545, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.3916713562183456, |
| "grad_norm": 0.36437808290704293, |
| "learning_rate": 2.976637463496037e-05, |
| "loss": 0.3626, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.3927968486212718, |
| "grad_norm": 0.31704230935830585, |
| "learning_rate": 2.9745515227367542e-05, |
| "loss": 0.3536, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.393922341024198, |
| "grad_norm": 0.3234586646771036, |
| "learning_rate": 2.9724655819774723e-05, |
| "loss": 0.3373, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.3950478334271244, |
| "grad_norm": 0.3108185575862165, |
| "learning_rate": 2.9703796412181893e-05, |
| "loss": 0.3685, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.3961733258300506, |
| "grad_norm": 0.3752154178147501, |
| "learning_rate": 2.9682937004589073e-05, |
| "loss": 0.3676, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.3972988182329769, |
| "grad_norm": 0.2884249248162915, |
| "learning_rate": 2.9662077596996247e-05, |
| "loss": 0.3806, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.3984243106359031, |
| "grad_norm": 0.34853899665658195, |
| "learning_rate": 2.9641218189403424e-05, |
| "loss": 0.35, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.3995498030388296, |
| "grad_norm": 0.38250560263603123, |
| "learning_rate": 2.9620358781810597e-05, |
| "loss": 0.3671, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.4006752954417558, |
| "grad_norm": 0.31599487532627424, |
| "learning_rate": 2.9599499374217778e-05, |
| "loss": 0.3833, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.401800787844682, |
| "grad_norm": 0.3500438918178945, |
| "learning_rate": 2.9578639966624948e-05, |
| "loss": 0.365, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.4029262802476083, |
| "grad_norm": 0.41585464581458353, |
| "learning_rate": 2.955778055903212e-05, |
| "loss": 0.3918, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.4040517726505346, |
| "grad_norm": 0.314777082319376, |
| "learning_rate": 2.9536921151439302e-05, |
| "loss": 0.3657, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.4051772650534609, |
| "grad_norm": 0.3941954750192581, |
| "learning_rate": 2.9516061743846472e-05, |
| "loss": 0.3472, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.406302757456387, |
| "grad_norm": 0.40917669615827007, |
| "learning_rate": 2.9495202336253653e-05, |
| "loss": 0.3646, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.4074282498593136, |
| "grad_norm": 0.32821878108438296, |
| "learning_rate": 2.9474342928660826e-05, |
| "loss": 0.3636, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.4085537422622396, |
| "grad_norm": 0.4247196049076011, |
| "learning_rate": 2.9453483521068003e-05, |
| "loss": 0.3412, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.409679234665166, |
| "grad_norm": 0.3851171427422802, |
| "learning_rate": 2.9432624113475177e-05, |
| "loss": 0.3819, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.4108047270680923, |
| "grad_norm": 0.3469070969772743, |
| "learning_rate": 2.9411764705882354e-05, |
| "loss": 0.3616, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.4119302194710186, |
| "grad_norm": 0.444050908424801, |
| "learning_rate": 2.9390905298289527e-05, |
| "loss": 0.3565, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.4130557118739449, |
| "grad_norm": 0.2879248830093118, |
| "learning_rate": 2.9370045890696708e-05, |
| "loss": 0.3601, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.414181204276871, |
| "grad_norm": 0.4483867224495224, |
| "learning_rate": 2.934918648310388e-05, |
| "loss": 0.3682, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.4153066966797974, |
| "grad_norm": 0.3000179145995747, |
| "learning_rate": 2.9328327075511058e-05, |
| "loss": 0.3636, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.4164321890827236, |
| "grad_norm": 0.38516280700639255, |
| "learning_rate": 2.9307467667918232e-05, |
| "loss": 0.3641, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.41755768148565, |
| "grad_norm": 0.3233722496864056, |
| "learning_rate": 2.928660826032541e-05, |
| "loss": 0.353, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.4186831738885761, |
| "grad_norm": 0.35206171266037123, |
| "learning_rate": 2.9265748852732582e-05, |
| "loss": 0.3715, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.4198086662915026, |
| "grad_norm": 0.3416243699378654, |
| "learning_rate": 2.9244889445139763e-05, |
| "loss": 0.374, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.4209341586944289, |
| "grad_norm": 0.354667895796559, |
| "learning_rate": 2.9224030037546933e-05, |
| "loss": 0.3587, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.422059651097355, |
| "grad_norm": 0.35090199403773553, |
| "learning_rate": 2.9203170629954113e-05, |
| "loss": 0.3679, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.4231851435002814, |
| "grad_norm": 0.37870594240912114, |
| "learning_rate": 2.9182311222361287e-05, |
| "loss": 0.3497, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.4243106359032076, |
| "grad_norm": 0.3281629547838438, |
| "learning_rate": 2.9161451814768464e-05, |
| "loss": 0.3457, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.4254361283061339, |
| "grad_norm": 0.35899900559464903, |
| "learning_rate": 2.9140592407175638e-05, |
| "loss": 0.3627, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.4265616207090601, |
| "grad_norm": 0.30995421196825085, |
| "learning_rate": 2.911973299958281e-05, |
| "loss": 0.3939, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.4276871131119866, |
| "grad_norm": 0.3707647704012859, |
| "learning_rate": 2.9098873591989988e-05, |
| "loss": 0.3683, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.4288126055149128, |
| "grad_norm": 0.3493957956050667, |
| "learning_rate": 2.9078014184397162e-05, |
| "loss": 0.3922, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.429938097917839, |
| "grad_norm": 0.3392405036440076, |
| "learning_rate": 2.9057154776804342e-05, |
| "loss": 0.3675, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.4310635903207654, |
| "grad_norm": 0.42908164729503306, |
| "learning_rate": 2.9036295369211512e-05, |
| "loss": 0.3497, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.4321890827236916, |
| "grad_norm": 0.3217604733902604, |
| "learning_rate": 2.9015435961618693e-05, |
| "loss": 0.3419, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.4333145751266179, |
| "grad_norm": 0.285513498828739, |
| "learning_rate": 2.8994576554025866e-05, |
| "loss": 0.3525, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.4344400675295441, |
| "grad_norm": 0.31722207811402275, |
| "learning_rate": 2.8973717146433043e-05, |
| "loss": 0.3537, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.4355655599324706, |
| "grad_norm": 0.3425240574013163, |
| "learning_rate": 2.8952857738840217e-05, |
| "loss": 0.3536, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.4366910523353966, |
| "grad_norm": 0.37106807604623343, |
| "learning_rate": 2.8931998331247394e-05, |
| "loss": 0.3495, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.437816544738323, |
| "grad_norm": 0.28250215963205166, |
| "learning_rate": 2.8911138923654567e-05, |
| "loss": 0.3481, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.4389420371412494, |
| "grad_norm": 0.3549294685688371, |
| "learning_rate": 2.8890279516061748e-05, |
| "loss": 0.376, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.4400675295441756, |
| "grad_norm": 0.30171989887856726, |
| "learning_rate": 2.886942010846892e-05, |
| "loss": 0.3755, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.4411930219471019, |
| "grad_norm": 0.3139221398315422, |
| "learning_rate": 2.88485607008761e-05, |
| "loss": 0.3513, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.4423185143500281, |
| "grad_norm": 0.3174768026324268, |
| "learning_rate": 2.8827701293283272e-05, |
| "loss": 0.3569, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.4434440067529544, |
| "grad_norm": 0.31543813617403643, |
| "learning_rate": 2.880684188569045e-05, |
| "loss": 0.3551, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.4445694991558806, |
| "grad_norm": 0.34405513448928665, |
| "learning_rate": 2.8785982478097623e-05, |
| "loss": 0.3615, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.445694991558807, |
| "grad_norm": 0.3314207471763474, |
| "learning_rate": 2.8765123070504803e-05, |
| "loss": 0.364, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.4468204839617331, |
| "grad_norm": 0.30011672219405916, |
| "learning_rate": 2.8744263662911973e-05, |
| "loss": 0.3541, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.4479459763646596, |
| "grad_norm": 0.36939176440073757, |
| "learning_rate": 2.8723404255319154e-05, |
| "loss": 0.3466, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.4490714687675859, |
| "grad_norm": 0.38877177781745204, |
| "learning_rate": 2.8702544847726327e-05, |
| "loss": 0.3786, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.4501969611705121, |
| "grad_norm": 0.3409728207807626, |
| "learning_rate": 2.8681685440133497e-05, |
| "loss": 0.3791, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.4513224535734384, |
| "grad_norm": 0.42817414825924877, |
| "learning_rate": 2.8660826032540678e-05, |
| "loss": 0.3491, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.4524479459763646, |
| "grad_norm": 0.35198856950809654, |
| "learning_rate": 2.863996662494785e-05, |
| "loss": 0.3679, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.4535734383792909, |
| "grad_norm": 0.32113222683338927, |
| "learning_rate": 2.861910721735503e-05, |
| "loss": 0.3467, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.4546989307822171, |
| "grad_norm": 0.321401722942131, |
| "learning_rate": 2.8598247809762202e-05, |
| "loss": 0.3565, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.4558244231851436, |
| "grad_norm": 0.3506970283170861, |
| "learning_rate": 2.857738840216938e-05, |
| "loss": 0.3691, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.4569499155880699, |
| "grad_norm": 0.3366249220650027, |
| "learning_rate": 2.8556528994576552e-05, |
| "loss": 0.3679, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.458075407990996, |
| "grad_norm": 0.3863825955582127, |
| "learning_rate": 2.8535669586983733e-05, |
| "loss": 0.3656, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.4592009003939224, |
| "grad_norm": 0.3294265653501134, |
| "learning_rate": 2.8514810179390906e-05, |
| "loss": 0.363, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.4603263927968486, |
| "grad_norm": 0.3400753421715317, |
| "learning_rate": 2.8493950771798083e-05, |
| "loss": 0.3677, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.4614518851997749, |
| "grad_norm": 0.34166651813969356, |
| "learning_rate": 2.8473091364205257e-05, |
| "loss": 0.3975, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.4625773776027011, |
| "grad_norm": 0.32157145849641844, |
| "learning_rate": 2.8452231956612434e-05, |
| "loss": 0.3599, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4637028700056276, |
| "grad_norm": 0.29318800149996266, |
| "learning_rate": 2.8431372549019608e-05, |
| "loss": 0.3639, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.4648283624085536, |
| "grad_norm": 0.3275747590035082, |
| "learning_rate": 2.8410513141426788e-05, |
| "loss": 0.3405, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.46595385481148, |
| "grad_norm": 0.2669444641392893, |
| "learning_rate": 2.8389653733833958e-05, |
| "loss": 0.3556, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.4670793472144064, |
| "grad_norm": 0.2984242508636962, |
| "learning_rate": 2.836879432624114e-05, |
| "loss": 0.3475, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.4682048396173326, |
| "grad_norm": 0.34309575853919444, |
| "learning_rate": 2.8347934918648312e-05, |
| "loss": 0.3236, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.4693303320202589, |
| "grad_norm": 0.3386670126038393, |
| "learning_rate": 2.832707551105549e-05, |
| "loss": 0.3692, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.4704558244231851, |
| "grad_norm": 0.3041272096720939, |
| "learning_rate": 2.8306216103462663e-05, |
| "loss": 0.3672, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.4715813168261114, |
| "grad_norm": 0.4280504900617411, |
| "learning_rate": 2.828535669586984e-05, |
| "loss": 0.3532, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.4727068092290376, |
| "grad_norm": 0.28299915352373894, |
| "learning_rate": 2.8264497288277013e-05, |
| "loss": 0.3389, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.473832301631964, |
| "grad_norm": 0.33312026594342037, |
| "learning_rate": 2.8243637880684187e-05, |
| "loss": 0.3711, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.4749577940348901, |
| "grad_norm": 0.3324677079496402, |
| "learning_rate": 2.8222778473091367e-05, |
| "loss": 0.3637, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.4760832864378166, |
| "grad_norm": 0.3180122895020907, |
| "learning_rate": 2.8201919065498537e-05, |
| "loss": 0.3681, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.4772087788407429, |
| "grad_norm": 0.3454913512736821, |
| "learning_rate": 2.8181059657905718e-05, |
| "loss": 0.3788, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.4783342712436691, |
| "grad_norm": 0.2954651043434191, |
| "learning_rate": 2.816020025031289e-05, |
| "loss": 0.3629, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.4794597636465954, |
| "grad_norm": 0.31225437993089217, |
| "learning_rate": 2.813934084272007e-05, |
| "loss": 0.3483, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.4805852560495216, |
| "grad_norm": 0.2931420257748457, |
| "learning_rate": 2.8118481435127242e-05, |
| "loss": 0.3443, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.4817107484524479, |
| "grad_norm": 0.3077463608704642, |
| "learning_rate": 2.809762202753442e-05, |
| "loss": 0.3562, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.4828362408553741, |
| "grad_norm": 0.2868052518006215, |
| "learning_rate": 2.8076762619941593e-05, |
| "loss": 0.3532, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.4839617332583006, |
| "grad_norm": 0.28223423866564457, |
| "learning_rate": 2.8055903212348773e-05, |
| "loss": 0.3334, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.4850872256612269, |
| "grad_norm": 0.2934968437108151, |
| "learning_rate": 2.8035043804755947e-05, |
| "loss": 0.3609, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.4862127180641531, |
| "grad_norm": 0.3726867856164999, |
| "learning_rate": 2.8014184397163124e-05, |
| "loss": 0.3658, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.4873382104670794, |
| "grad_norm": 0.31940065928357514, |
| "learning_rate": 2.7993324989570297e-05, |
| "loss": 0.3752, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.4884637028700056, |
| "grad_norm": 0.343528935258811, |
| "learning_rate": 2.7972465581977474e-05, |
| "loss": 0.3689, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.4895891952729319, |
| "grad_norm": 0.29324201562634045, |
| "learning_rate": 2.7951606174384648e-05, |
| "loss": 0.3701, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.4907146876758581, |
| "grad_norm": 0.307447149562183, |
| "learning_rate": 2.7930746766791828e-05, |
| "loss": 0.3623, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.4918401800787846, |
| "grad_norm": 0.3370769636245937, |
| "learning_rate": 2.7909887359199e-05, |
| "loss": 0.3599, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.4929656724817106, |
| "grad_norm": 0.2871673492029565, |
| "learning_rate": 2.788902795160618e-05, |
| "loss": 0.3719, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.4940911648846371, |
| "grad_norm": 0.36895913560450455, |
| "learning_rate": 2.7868168544013352e-05, |
| "loss": 0.3678, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.4952166572875634, |
| "grad_norm": 0.30425325809005394, |
| "learning_rate": 2.784730913642053e-05, |
| "loss": 0.3654, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.4963421496904896, |
| "grad_norm": 0.3331261517980334, |
| "learning_rate": 2.7826449728827703e-05, |
| "loss": 0.3693, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.4974676420934159, |
| "grad_norm": 0.2798679646502201, |
| "learning_rate": 2.7805590321234876e-05, |
| "loss": 0.3709, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.4985931344963421, |
| "grad_norm": 0.37466709997642333, |
| "learning_rate": 2.7784730913642053e-05, |
| "loss": 0.363, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.4997186268992684, |
| "grad_norm": 0.35357276036020097, |
| "learning_rate": 2.7763871506049227e-05, |
| "loss": 0.3683, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.5008441193021946, |
| "grad_norm": 0.3354334941577856, |
| "learning_rate": 2.7743012098456404e-05, |
| "loss": 0.3438, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.501969611705121, |
| "grad_norm": 0.4101041564365979, |
| "learning_rate": 2.7722152690863578e-05, |
| "loss": 0.3835, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.5030951041080471, |
| "grad_norm": 0.36255802157025624, |
| "learning_rate": 2.7701293283270758e-05, |
| "loss": 0.3477, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.5042205965109736, |
| "grad_norm": 0.4061869558301693, |
| "learning_rate": 2.768043387567793e-05, |
| "loss": 0.3688, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.5053460889138999, |
| "grad_norm": 0.41388849066334216, |
| "learning_rate": 2.765957446808511e-05, |
| "loss": 0.3672, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.5064715813168261, |
| "grad_norm": 0.330932817498522, |
| "learning_rate": 2.7638715060492282e-05, |
| "loss": 0.3643, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.5075970737197524, |
| "grad_norm": 0.3399783880240679, |
| "learning_rate": 2.761785565289946e-05, |
| "loss": 0.3529, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.5087225661226786, |
| "grad_norm": 0.28341696113530734, |
| "learning_rate": 2.7596996245306633e-05, |
| "loss": 0.3496, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.509848058525605, |
| "grad_norm": 0.334967765352759, |
| "learning_rate": 2.7576136837713813e-05, |
| "loss": 0.3415, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.5109735509285311, |
| "grad_norm": 0.2668604468971969, |
| "learning_rate": 2.7555277430120983e-05, |
| "loss": 0.3634, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.5120990433314576, |
| "grad_norm": 0.31156721053710673, |
| "learning_rate": 2.7534418022528164e-05, |
| "loss": 0.3641, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.5132245357343836, |
| "grad_norm": 0.29754957914675184, |
| "learning_rate": 2.7513558614935337e-05, |
| "loss": 0.3785, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.5143500281373101, |
| "grad_norm": 0.2872566093068787, |
| "learning_rate": 2.7492699207342514e-05, |
| "loss": 0.3623, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.5154755205402364, |
| "grad_norm": 0.3526852777204813, |
| "learning_rate": 2.7471839799749688e-05, |
| "loss": 0.3723, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.5166010129431626, |
| "grad_norm": 0.31241125025784733, |
| "learning_rate": 2.7450980392156865e-05, |
| "loss": 0.3507, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.5177265053460889, |
| "grad_norm": 0.3508625079587985, |
| "learning_rate": 2.743012098456404e-05, |
| "loss": 0.3608, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.5188519977490151, |
| "grad_norm": 0.32157619105794166, |
| "learning_rate": 2.740926157697122e-05, |
| "loss": 0.3581, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.5199774901519416, |
| "grad_norm": 0.3494380418250329, |
| "learning_rate": 2.7388402169378392e-05, |
| "loss": 0.3609, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.5211029825548676, |
| "grad_norm": 0.3055065567786005, |
| "learning_rate": 2.7367542761785563e-05, |
| "loss": 0.3672, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.5222284749577941, |
| "grad_norm": 0.3950982220672214, |
| "learning_rate": 2.7346683354192743e-05, |
| "loss": 0.376, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.5233539673607202, |
| "grad_norm": 0.27852848240062467, |
| "learning_rate": 2.7325823946599917e-05, |
| "loss": 0.3485, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.5244794597636466, |
| "grad_norm": 0.3737867565000807, |
| "learning_rate": 2.7304964539007094e-05, |
| "loss": 0.3664, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.5256049521665729, |
| "grad_norm": 0.3119606266731146, |
| "learning_rate": 2.7284105131414267e-05, |
| "loss": 0.3619, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.5267304445694991, |
| "grad_norm": 0.33933519597699924, |
| "learning_rate": 2.7263245723821444e-05, |
| "loss": 0.4008, |
| "step": 1357 |
| }, |
| { |
| "epoch": 1.5278559369724254, |
| "grad_norm": 0.3275255812573412, |
| "learning_rate": 2.7242386316228618e-05, |
| "loss": 0.3702, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.5289814293753516, |
| "grad_norm": 0.3747569415524062, |
| "learning_rate": 2.7221526908635798e-05, |
| "loss": 0.3553, |
| "step": 1359 |
| }, |
| { |
| "epoch": 1.5301069217782781, |
| "grad_norm": 0.30992658499062065, |
| "learning_rate": 2.7200667501042972e-05, |
| "loss": 0.3556, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.5312324141812041, |
| "grad_norm": 0.36837860346575607, |
| "learning_rate": 2.717980809345015e-05, |
| "loss": 0.3536, |
| "step": 1361 |
| }, |
| { |
| "epoch": 1.5323579065841306, |
| "grad_norm": 0.3422637051978812, |
| "learning_rate": 2.7158948685857322e-05, |
| "loss": 0.3676, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.5334833989870569, |
| "grad_norm": 0.2882475832928599, |
| "learning_rate": 2.71380892782645e-05, |
| "loss": 0.3714, |
| "step": 1363 |
| }, |
| { |
| "epoch": 1.5346088913899831, |
| "grad_norm": 0.4680385431354928, |
| "learning_rate": 2.7117229870671673e-05, |
| "loss": 0.3728, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.5357343837929094, |
| "grad_norm": 0.28687340173500175, |
| "learning_rate": 2.7096370463078853e-05, |
| "loss": 0.3677, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.5368598761958356, |
| "grad_norm": 0.3168437934125687, |
| "learning_rate": 2.7075511055486023e-05, |
| "loss": 0.3516, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.5379853685987621, |
| "grad_norm": 0.34254703558592353, |
| "learning_rate": 2.7054651647893204e-05, |
| "loss": 0.3733, |
| "step": 1367 |
| }, |
| { |
| "epoch": 1.5391108610016881, |
| "grad_norm": 0.32210383347863225, |
| "learning_rate": 2.7033792240300377e-05, |
| "loss": 0.3657, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.5402363534046146, |
| "grad_norm": 0.2951642244458056, |
| "learning_rate": 2.7012932832707554e-05, |
| "loss": 0.3624, |
| "step": 1369 |
| }, |
| { |
| "epoch": 1.5413618458075407, |
| "grad_norm": 0.32973184204270484, |
| "learning_rate": 2.6992073425114728e-05, |
| "loss": 0.3466, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.5424873382104671, |
| "grad_norm": 0.32937201569972335, |
| "learning_rate": 2.6971214017521905e-05, |
| "loss": 0.3609, |
| "step": 1371 |
| }, |
| { |
| "epoch": 1.5436128306133934, |
| "grad_norm": 0.294240889891016, |
| "learning_rate": 2.695035460992908e-05, |
| "loss": 0.3528, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.5447383230163196, |
| "grad_norm": 0.38730632898384704, |
| "learning_rate": 2.6929495202336252e-05, |
| "loss": 0.3592, |
| "step": 1373 |
| }, |
| { |
| "epoch": 1.545863815419246, |
| "grad_norm": 0.265405748658469, |
| "learning_rate": 2.6908635794743433e-05, |
| "loss": 0.3523, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.5469893078221721, |
| "grad_norm": 0.3090293159321234, |
| "learning_rate": 2.6887776387150603e-05, |
| "loss": 0.373, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.5481148002250986, |
| "grad_norm": 0.33125373511524786, |
| "learning_rate": 2.6866916979557783e-05, |
| "loss": 0.3376, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.5492402926280247, |
| "grad_norm": 0.3859675477375762, |
| "learning_rate": 2.6846057571964957e-05, |
| "loss": 0.3595, |
| "step": 1377 |
| }, |
| { |
| "epoch": 1.5503657850309511, |
| "grad_norm": 0.2702204865287381, |
| "learning_rate": 2.6825198164372134e-05, |
| "loss": 0.3526, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.5514912774338772, |
| "grad_norm": 0.4216493180934553, |
| "learning_rate": 2.6804338756779307e-05, |
| "loss": 0.3634, |
| "step": 1379 |
| }, |
| { |
| "epoch": 1.5526167698368036, |
| "grad_norm": 0.3402054598291514, |
| "learning_rate": 2.6783479349186484e-05, |
| "loss": 0.3814, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.5537422622397299, |
| "grad_norm": 0.3634322127130347, |
| "learning_rate": 2.6762619941593658e-05, |
| "loss": 0.3739, |
| "step": 1381 |
| }, |
| { |
| "epoch": 1.5548677546426561, |
| "grad_norm": 0.4033902015465824, |
| "learning_rate": 2.6741760534000838e-05, |
| "loss": 0.3731, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.5559932470455824, |
| "grad_norm": 0.40808104649969373, |
| "learning_rate": 2.672090112640801e-05, |
| "loss": 0.3522, |
| "step": 1383 |
| }, |
| { |
| "epoch": 1.5571187394485086, |
| "grad_norm": 0.39575554889808146, |
| "learning_rate": 2.670004171881519e-05, |
| "loss": 0.3466, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.5582442318514351, |
| "grad_norm": 0.33725542522344404, |
| "learning_rate": 2.6679182311222362e-05, |
| "loss": 0.3594, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.5593697242543612, |
| "grad_norm": 0.3562002474404248, |
| "learning_rate": 2.665832290362954e-05, |
| "loss": 0.3766, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.5604952166572876, |
| "grad_norm": 0.2792679981992388, |
| "learning_rate": 2.6637463496036713e-05, |
| "loss": 0.354, |
| "step": 1387 |
| }, |
| { |
| "epoch": 1.5616207090602139, |
| "grad_norm": 0.3631975807941906, |
| "learning_rate": 2.661660408844389e-05, |
| "loss": 0.3628, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.5627462014631401, |
| "grad_norm": 0.2922697632757867, |
| "learning_rate": 2.6595744680851064e-05, |
| "loss": 0.3786, |
| "step": 1389 |
| }, |
| { |
| "epoch": 1.5638716938660664, |
| "grad_norm": 0.3930094259783832, |
| "learning_rate": 2.6574885273258244e-05, |
| "loss": 0.349, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.5649971862689926, |
| "grad_norm": 0.2753952015092564, |
| "learning_rate": 2.6554025865665418e-05, |
| "loss": 0.3606, |
| "step": 1391 |
| }, |
| { |
| "epoch": 1.5661226786719191, |
| "grad_norm": 0.323233762383296, |
| "learning_rate": 2.6533166458072595e-05, |
| "loss": 0.3584, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.5672481710748452, |
| "grad_norm": 0.3065899573190829, |
| "learning_rate": 2.6512307050479768e-05, |
| "loss": 0.3569, |
| "step": 1393 |
| }, |
| { |
| "epoch": 1.5683736634777716, |
| "grad_norm": 0.29359629957776534, |
| "learning_rate": 2.6491447642886942e-05, |
| "loss": 0.3721, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.5694991558806977, |
| "grad_norm": 0.3453639950913077, |
| "learning_rate": 2.647058823529412e-05, |
| "loss": 0.3674, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.5706246482836241, |
| "grad_norm": 0.29618728974968406, |
| "learning_rate": 2.6449728827701292e-05, |
| "loss": 0.3728, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.5717501406865504, |
| "grad_norm": 0.4022340400841394, |
| "learning_rate": 2.642886942010847e-05, |
| "loss": 0.3599, |
| "step": 1397 |
| }, |
| { |
| "epoch": 1.5728756330894766, |
| "grad_norm": 0.34040909178052503, |
| "learning_rate": 2.6408010012515643e-05, |
| "loss": 0.3452, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.574001125492403, |
| "grad_norm": 0.39633565400793064, |
| "learning_rate": 2.6387150604922823e-05, |
| "loss": 0.3638, |
| "step": 1399 |
| }, |
| { |
| "epoch": 1.5751266178953292, |
| "grad_norm": 0.3469815814003443, |
| "learning_rate": 2.6366291197329997e-05, |
| "loss": 0.3617, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.5762521102982556, |
| "grad_norm": 0.3858237301262129, |
| "learning_rate": 2.6345431789737174e-05, |
| "loss": 0.3592, |
| "step": 1401 |
| }, |
| { |
| "epoch": 1.5773776027011817, |
| "grad_norm": 0.36968305499637627, |
| "learning_rate": 2.6324572382144347e-05, |
| "loss": 0.3506, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.5785030951041081, |
| "grad_norm": 0.3505404658131974, |
| "learning_rate": 2.6303712974551524e-05, |
| "loss": 0.3686, |
| "step": 1403 |
| }, |
| { |
| "epoch": 1.5796285875070342, |
| "grad_norm": 0.33758728331020843, |
| "learning_rate": 2.6282853566958698e-05, |
| "loss": 0.3527, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.5807540799099606, |
| "grad_norm": 0.3435492065868497, |
| "learning_rate": 2.626199415936588e-05, |
| "loss": 0.3475, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.581879572312887, |
| "grad_norm": 0.3490084416143491, |
| "learning_rate": 2.624113475177305e-05, |
| "loss": 0.3607, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.5830050647158131, |
| "grad_norm": 0.31414180653905893, |
| "learning_rate": 2.622027534418023e-05, |
| "loss": 0.3504, |
| "step": 1407 |
| }, |
| { |
| "epoch": 1.5841305571187394, |
| "grad_norm": 0.3599821696826535, |
| "learning_rate": 2.6199415936587403e-05, |
| "loss": 0.3615, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.5852560495216657, |
| "grad_norm": 0.42310764019699615, |
| "learning_rate": 2.617855652899458e-05, |
| "loss": 0.3724, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.5863815419245921, |
| "grad_norm": 0.2833525199592301, |
| "learning_rate": 2.6157697121401753e-05, |
| "loss": 0.3617, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.5875070343275182, |
| "grad_norm": 0.3619653752728842, |
| "learning_rate": 2.613683771380893e-05, |
| "loss": 0.3535, |
| "step": 1411 |
| }, |
| { |
| "epoch": 1.5886325267304446, |
| "grad_norm": 0.31893555877641494, |
| "learning_rate": 2.6115978306216104e-05, |
| "loss": 0.3739, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.589758019133371, |
| "grad_norm": 0.367002811604332, |
| "learning_rate": 2.6095118898623284e-05, |
| "loss": 0.3553, |
| "step": 1413 |
| }, |
| { |
| "epoch": 1.5908835115362971, |
| "grad_norm": 0.27151097727860346, |
| "learning_rate": 2.6074259491030458e-05, |
| "loss": 0.3347, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.5920090039392234, |
| "grad_norm": 0.3131896896726996, |
| "learning_rate": 2.6053400083437628e-05, |
| "loss": 0.3546, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.5931344963421497, |
| "grad_norm": 0.36676987492115576, |
| "learning_rate": 2.6032540675844808e-05, |
| "loss": 0.3675, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.5942599887450761, |
| "grad_norm": 0.2950227483896426, |
| "learning_rate": 2.6011681268251982e-05, |
| "loss": 0.3648, |
| "step": 1417 |
| }, |
| { |
| "epoch": 1.5953854811480022, |
| "grad_norm": 0.34344487884738795, |
| "learning_rate": 2.599082186065916e-05, |
| "loss": 0.3597, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.5965109735509286, |
| "grad_norm": 0.320230789996618, |
| "learning_rate": 2.5969962453066332e-05, |
| "loss": 0.3524, |
| "step": 1419 |
| }, |
| { |
| "epoch": 1.5976364659538547, |
| "grad_norm": 0.32035648740276107, |
| "learning_rate": 2.594910304547351e-05, |
| "loss": 0.3595, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.5987619583567811, |
| "grad_norm": 0.2888199453121108, |
| "learning_rate": 2.5928243637880683e-05, |
| "loss": 0.3862, |
| "step": 1421 |
| }, |
| { |
| "epoch": 1.5998874507597074, |
| "grad_norm": 0.32236255339509834, |
| "learning_rate": 2.5907384230287863e-05, |
| "loss": 0.3476, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.6010129431626337, |
| "grad_norm": 0.3203989927659959, |
| "learning_rate": 2.5886524822695034e-05, |
| "loss": 0.3702, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.60213843556556, |
| "grad_norm": 0.2911113101367755, |
| "learning_rate": 2.5865665415102214e-05, |
| "loss": 0.3688, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.6032639279684862, |
| "grad_norm": 0.35071227735634586, |
| "learning_rate": 2.5844806007509388e-05, |
| "loss": 0.3808, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.6043894203714126, |
| "grad_norm": 1.359117007518664, |
| "learning_rate": 2.5823946599916565e-05, |
| "loss": 0.3551, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.6055149127743387, |
| "grad_norm": 0.33498969187479993, |
| "learning_rate": 2.5803087192323738e-05, |
| "loss": 0.3593, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.6066404051772651, |
| "grad_norm": 0.30337597464705507, |
| "learning_rate": 2.5782227784730915e-05, |
| "loss": 0.3674, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.6077658975801912, |
| "grad_norm": 0.3207844519265783, |
| "learning_rate": 2.576136837713809e-05, |
| "loss": 0.3557, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.6088913899831176, |
| "grad_norm": 0.3185723538525886, |
| "learning_rate": 2.574050896954527e-05, |
| "loss": 0.3633, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.610016882386044, |
| "grad_norm": 0.3110802343229136, |
| "learning_rate": 2.5719649561952443e-05, |
| "loss": 0.3621, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.6111423747889702, |
| "grad_norm": 0.39120392030901746, |
| "learning_rate": 2.569879015435962e-05, |
| "loss": 0.3718, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.6122678671918964, |
| "grad_norm": 0.3044483498179327, |
| "learning_rate": 2.5677930746766793e-05, |
| "loss": 0.3525, |
| "step": 1433 |
| }, |
| { |
| "epoch": 1.6133933595948227, |
| "grad_norm": 0.36593260259263516, |
| "learning_rate": 2.565707133917397e-05, |
| "loss": 0.3724, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.6145188519977491, |
| "grad_norm": 0.34991456432334755, |
| "learning_rate": 2.5636211931581144e-05, |
| "loss": 0.3682, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.6156443444006752, |
| "grad_norm": 0.32304123149901537, |
| "learning_rate": 2.5615352523988317e-05, |
| "loss": 0.3496, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.6167698368036016, |
| "grad_norm": 0.34708749419764806, |
| "learning_rate": 2.5594493116395494e-05, |
| "loss": 0.3913, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.617895329206528, |
| "grad_norm": 0.32488187134050506, |
| "learning_rate": 2.5573633708802668e-05, |
| "loss": 0.3469, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.6190208216094542, |
| "grad_norm": 0.31694764933224345, |
| "learning_rate": 2.555277430120985e-05, |
| "loss": 0.3903, |
| "step": 1439 |
| }, |
| { |
| "epoch": 1.6201463140123804, |
| "grad_norm": 0.2966648293508749, |
| "learning_rate": 2.5531914893617022e-05, |
| "loss": 0.3434, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.6212718064153067, |
| "grad_norm": 0.3130351777750274, |
| "learning_rate": 2.55110554860242e-05, |
| "loss": 0.3642, |
| "step": 1441 |
| }, |
| { |
| "epoch": 1.6223972988182331, |
| "grad_norm": 0.288157295810494, |
| "learning_rate": 2.5490196078431373e-05, |
| "loss": 0.3515, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.6235227912211592, |
| "grad_norm": 0.34698217632629985, |
| "learning_rate": 2.546933667083855e-05, |
| "loss": 0.3733, |
| "step": 1443 |
| }, |
| { |
| "epoch": 1.6246482836240856, |
| "grad_norm": 0.2724092253095966, |
| "learning_rate": 2.5448477263245723e-05, |
| "loss": 0.3497, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.6257737760270117, |
| "grad_norm": 0.24953001796720836, |
| "learning_rate": 2.5427617855652904e-05, |
| "loss": 0.3573, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.6268992684299382, |
| "grad_norm": 0.299260486745094, |
| "learning_rate": 2.5406758448060074e-05, |
| "loss": 0.3873, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.6280247608328644, |
| "grad_norm": 0.26925589680552175, |
| "learning_rate": 2.5385899040467254e-05, |
| "loss": 0.3508, |
| "step": 1447 |
| }, |
| { |
| "epoch": 1.6291502532357907, |
| "grad_norm": 0.29454604423730374, |
| "learning_rate": 2.5365039632874428e-05, |
| "loss": 0.3591, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.630275745638717, |
| "grad_norm": 0.27324874812018735, |
| "learning_rate": 2.5344180225281605e-05, |
| "loss": 0.3625, |
| "step": 1449 |
| }, |
| { |
| "epoch": 1.6314012380416432, |
| "grad_norm": 0.27258225073759196, |
| "learning_rate": 2.5323320817688778e-05, |
| "loss": 0.3554, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.6325267304445696, |
| "grad_norm": 0.3035610321463261, |
| "learning_rate": 2.5302461410095955e-05, |
| "loss": 0.3535, |
| "step": 1451 |
| }, |
| { |
| "epoch": 1.6336522228474957, |
| "grad_norm": 0.3628567082505913, |
| "learning_rate": 2.528160200250313e-05, |
| "loss": 0.358, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.6347777152504221, |
| "grad_norm": 0.26138414055253223, |
| "learning_rate": 2.526074259491031e-05, |
| "loss": 0.3664, |
| "step": 1453 |
| }, |
| { |
| "epoch": 1.6359032076533482, |
| "grad_norm": 0.3503328861643792, |
| "learning_rate": 2.5239883187317483e-05, |
| "loss": 0.3377, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.6370287000562747, |
| "grad_norm": 0.2673845892434079, |
| "learning_rate": 2.521902377972466e-05, |
| "loss": 0.3474, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.638154192459201, |
| "grad_norm": 0.27470271868463625, |
| "learning_rate": 2.5198164372131833e-05, |
| "loss": 0.3639, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.6392796848621272, |
| "grad_norm": 0.3112867744755204, |
| "learning_rate": 2.5177304964539007e-05, |
| "loss": 0.3662, |
| "step": 1457 |
| }, |
| { |
| "epoch": 1.6404051772650534, |
| "grad_norm": 0.29872249045203997, |
| "learning_rate": 2.5156445556946184e-05, |
| "loss": 0.3569, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.6415306696679797, |
| "grad_norm": 0.2950030580824579, |
| "learning_rate": 2.5135586149353358e-05, |
| "loss": 0.3877, |
| "step": 1459 |
| }, |
| { |
| "epoch": 1.6426561620709061, |
| "grad_norm": 0.30740378724405815, |
| "learning_rate": 2.5114726741760535e-05, |
| "loss": 0.368, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.6437816544738322, |
| "grad_norm": 0.43735074719358724, |
| "learning_rate": 2.5093867334167708e-05, |
| "loss": 0.3862, |
| "step": 1461 |
| }, |
| { |
| "epoch": 1.6449071468767587, |
| "grad_norm": 0.344358904604338, |
| "learning_rate": 2.507300792657489e-05, |
| "loss": 0.3556, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.646032639279685, |
| "grad_norm": 0.3606518532796079, |
| "learning_rate": 2.505214851898206e-05, |
| "loss": 0.3506, |
| "step": 1463 |
| }, |
| { |
| "epoch": 1.6471581316826112, |
| "grad_norm": 0.26793935225288906, |
| "learning_rate": 2.503128911138924e-05, |
| "loss": 0.3644, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.6482836240855374, |
| "grad_norm": 0.36553458630391006, |
| "learning_rate": 2.5010429703796413e-05, |
| "loss": 0.3786, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.6494091164884637, |
| "grad_norm": 0.3032742387012001, |
| "learning_rate": 2.4989570296203586e-05, |
| "loss": 0.3606, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.6505346088913901, |
| "grad_norm": 0.2573644911193979, |
| "learning_rate": 2.4968710888610763e-05, |
| "loss": 0.3758, |
| "step": 1467 |
| }, |
| { |
| "epoch": 1.6516601012943162, |
| "grad_norm": 0.3260439897844004, |
| "learning_rate": 2.494785148101794e-05, |
| "loss": 0.3701, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.6527855936972426, |
| "grad_norm": 0.2791366230994869, |
| "learning_rate": 2.4926992073425114e-05, |
| "loss": 0.3608, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.6539110861001687, |
| "grad_norm": 0.28073773442639216, |
| "learning_rate": 2.490613266583229e-05, |
| "loss": 0.3552, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.6550365785030952, |
| "grad_norm": 0.2751936808067321, |
| "learning_rate": 2.4885273258239468e-05, |
| "loss": 0.3551, |
| "step": 1471 |
| }, |
| { |
| "epoch": 1.6561620709060214, |
| "grad_norm": 0.31105318511449315, |
| "learning_rate": 2.486441385064664e-05, |
| "loss": 0.3846, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.6572875633089477, |
| "grad_norm": 0.2779436567942526, |
| "learning_rate": 2.484355444305382e-05, |
| "loss": 0.342, |
| "step": 1473 |
| }, |
| { |
| "epoch": 1.658413055711874, |
| "grad_norm": 0.260118994793512, |
| "learning_rate": 2.4822695035460995e-05, |
| "loss": 0.3416, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.6595385481148002, |
| "grad_norm": 0.30797304765243294, |
| "learning_rate": 2.480183562786817e-05, |
| "loss": 0.3649, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.6606640405177266, |
| "grad_norm": 0.27879300341701935, |
| "learning_rate": 2.4780976220275346e-05, |
| "loss": 0.3577, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.6617895329206527, |
| "grad_norm": 0.2618302523010228, |
| "learning_rate": 2.476011681268252e-05, |
| "loss": 0.373, |
| "step": 1477 |
| }, |
| { |
| "epoch": 1.6629150253235792, |
| "grad_norm": 0.2691572921484226, |
| "learning_rate": 2.4739257405089697e-05, |
| "loss": 0.3382, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.6640405177265052, |
| "grad_norm": 0.3021887597303646, |
| "learning_rate": 2.4718397997496874e-05, |
| "loss": 0.3561, |
| "step": 1479 |
| }, |
| { |
| "epoch": 1.6651660101294317, |
| "grad_norm": 0.29571070245395525, |
| "learning_rate": 2.4697538589904047e-05, |
| "loss": 0.3666, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.666291502532358, |
| "grad_norm": 0.3060388532862541, |
| "learning_rate": 2.4676679182311224e-05, |
| "loss": 0.3574, |
| "step": 1481 |
| }, |
| { |
| "epoch": 1.6674169949352842, |
| "grad_norm": 0.262863158327581, |
| "learning_rate": 2.46558197747184e-05, |
| "loss": 0.3515, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.6685424873382104, |
| "grad_norm": 0.26211725924142215, |
| "learning_rate": 2.4634960367125575e-05, |
| "loss": 0.3673, |
| "step": 1483 |
| }, |
| { |
| "epoch": 1.6696679797411367, |
| "grad_norm": 0.27559909119280296, |
| "learning_rate": 2.461410095953275e-05, |
| "loss": 0.3699, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.6707934721440632, |
| "grad_norm": 0.3286258665699544, |
| "learning_rate": 2.459324155193993e-05, |
| "loss": 0.3547, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.6719189645469892, |
| "grad_norm": 0.28394671282033973, |
| "learning_rate": 2.4572382144347102e-05, |
| "loss": 0.3699, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.6730444569499157, |
| "grad_norm": 0.28904710622589413, |
| "learning_rate": 2.4551522736754276e-05, |
| "loss": 0.3498, |
| "step": 1487 |
| }, |
| { |
| "epoch": 1.674169949352842, |
| "grad_norm": 0.3427205931479807, |
| "learning_rate": 2.4530663329161453e-05, |
| "loss": 0.3501, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.6752954417557682, |
| "grad_norm": 0.29275848448510483, |
| "learning_rate": 2.4509803921568626e-05, |
| "loss": 0.3436, |
| "step": 1489 |
| }, |
| { |
| "epoch": 1.6764209341586944, |
| "grad_norm": 0.281092401245526, |
| "learning_rate": 2.4488944513975803e-05, |
| "loss": 0.3618, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.6775464265616207, |
| "grad_norm": 0.3028349680860752, |
| "learning_rate": 2.446808510638298e-05, |
| "loss": 0.36, |
| "step": 1491 |
| }, |
| { |
| "epoch": 1.6786719189645471, |
| "grad_norm": 0.2740774488090583, |
| "learning_rate": 2.4447225698790154e-05, |
| "loss": 0.3761, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.6797974113674732, |
| "grad_norm": 0.3565931288895132, |
| "learning_rate": 2.442636629119733e-05, |
| "loss": 0.3656, |
| "step": 1493 |
| }, |
| { |
| "epoch": 1.6809229037703997, |
| "grad_norm": 0.30994295777900555, |
| "learning_rate": 2.4405506883604508e-05, |
| "loss": 0.3709, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.6820483961733257, |
| "grad_norm": 0.2770746159082683, |
| "learning_rate": 2.438464747601168e-05, |
| "loss": 0.3668, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.6831738885762522, |
| "grad_norm": 0.28851250362528635, |
| "learning_rate": 2.436378806841886e-05, |
| "loss": 0.3375, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.6842993809791784, |
| "grad_norm": 0.30502905628031945, |
| "learning_rate": 2.4342928660826032e-05, |
| "loss": 0.372, |
| "step": 1497 |
| }, |
| { |
| "epoch": 1.6854248733821047, |
| "grad_norm": 0.27606414842777804, |
| "learning_rate": 2.432206925323321e-05, |
| "loss": 0.3667, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.686550365785031, |
| "grad_norm": 0.2807248595539354, |
| "learning_rate": 2.4301209845640386e-05, |
| "loss": 0.3372, |
| "step": 1499 |
| }, |
| { |
| "epoch": 1.6876758581879572, |
| "grad_norm": 0.3189944644623768, |
| "learning_rate": 2.428035043804756e-05, |
| "loss": 0.3556, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.6888013505908837, |
| "grad_norm": 0.3542996839432631, |
| "learning_rate": 2.4259491030454737e-05, |
| "loss": 0.3657, |
| "step": 1501 |
| }, |
| { |
| "epoch": 1.6899268429938097, |
| "grad_norm": 0.26759487532851395, |
| "learning_rate": 2.4238631622861914e-05, |
| "loss": 0.3537, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.6910523353967362, |
| "grad_norm": 0.32892071648122306, |
| "learning_rate": 2.4217772215269087e-05, |
| "loss": 0.3679, |
| "step": 1503 |
| }, |
| { |
| "epoch": 1.6921778277996622, |
| "grad_norm": 0.27325117871239496, |
| "learning_rate": 2.4196912807676264e-05, |
| "loss": 0.3636, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.6933033202025887, |
| "grad_norm": 0.31473981377419813, |
| "learning_rate": 2.417605340008344e-05, |
| "loss": 0.3546, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.694428812605515, |
| "grad_norm": 0.6213973467005295, |
| "learning_rate": 2.4155193992490615e-05, |
| "loss": 0.3569, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.6955543050084412, |
| "grad_norm": 0.29664784115736215, |
| "learning_rate": 2.4134334584897792e-05, |
| "loss": 0.3541, |
| "step": 1507 |
| }, |
| { |
| "epoch": 1.6966797974113674, |
| "grad_norm": 0.30075562806982764, |
| "learning_rate": 2.4113475177304965e-05, |
| "loss": 0.3442, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.6978052898142937, |
| "grad_norm": 0.2798816842619607, |
| "learning_rate": 2.409261576971214e-05, |
| "loss": 0.3723, |
| "step": 1509 |
| }, |
| { |
| "epoch": 1.6989307822172202, |
| "grad_norm": 0.3125716574597028, |
| "learning_rate": 2.4071756362119316e-05, |
| "loss": 0.3534, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.7000562746201462, |
| "grad_norm": 0.2695382685076537, |
| "learning_rate": 2.4050896954526493e-05, |
| "loss": 0.3501, |
| "step": 1511 |
| }, |
| { |
| "epoch": 1.7011817670230727, |
| "grad_norm": 0.30428973956664224, |
| "learning_rate": 2.4030037546933667e-05, |
| "loss": 0.361, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.702307259425999, |
| "grad_norm": 0.2954859753709326, |
| "learning_rate": 2.4009178139340844e-05, |
| "loss": 0.348, |
| "step": 1513 |
| }, |
| { |
| "epoch": 1.7034327518289252, |
| "grad_norm": 0.2535522065599469, |
| "learning_rate": 2.398831873174802e-05, |
| "loss": 0.3448, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.7045582442318514, |
| "grad_norm": 0.2877878849798194, |
| "learning_rate": 2.3967459324155194e-05, |
| "loss": 0.3615, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.7056837366347777, |
| "grad_norm": 0.2679693175700858, |
| "learning_rate": 2.394659991656237e-05, |
| "loss": 0.3575, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.7068092290377042, |
| "grad_norm": 0.270042339489181, |
| "learning_rate": 2.3925740508969545e-05, |
| "loss": 0.3612, |
| "step": 1517 |
| }, |
| { |
| "epoch": 1.7079347214406302, |
| "grad_norm": 0.3277570559960174, |
| "learning_rate": 2.390488110137672e-05, |
| "loss": 0.3539, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.7090602138435567, |
| "grad_norm": 0.273010908537002, |
| "learning_rate": 2.38840216937839e-05, |
| "loss": 0.3813, |
| "step": 1519 |
| }, |
| { |
| "epoch": 1.7101857062464827, |
| "grad_norm": 0.3163418829636289, |
| "learning_rate": 2.3863162286191072e-05, |
| "loss": 0.367, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.7113111986494092, |
| "grad_norm": 0.2790546740132572, |
| "learning_rate": 2.384230287859825e-05, |
| "loss": 0.3616, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.7124366910523354, |
| "grad_norm": 0.3794647847000264, |
| "learning_rate": 2.3821443471005426e-05, |
| "loss": 0.3264, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.7135621834552617, |
| "grad_norm": 0.27180490681435693, |
| "learning_rate": 2.38005840634126e-05, |
| "loss": 0.3667, |
| "step": 1523 |
| }, |
| { |
| "epoch": 1.714687675858188, |
| "grad_norm": 0.3192761006046379, |
| "learning_rate": 2.3779724655819777e-05, |
| "loss": 0.3516, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.7158131682611142, |
| "grad_norm": 0.29770507590073336, |
| "learning_rate": 2.3758865248226954e-05, |
| "loss": 0.3616, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.7169386606640407, |
| "grad_norm": 0.3198828879152863, |
| "learning_rate": 2.3738005840634127e-05, |
| "loss": 0.3529, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.7180641530669667, |
| "grad_norm": 0.3090153579359256, |
| "learning_rate": 2.3717146433041304e-05, |
| "loss": 0.3526, |
| "step": 1527 |
| }, |
| { |
| "epoch": 1.7191896454698932, |
| "grad_norm": 0.3212232642206978, |
| "learning_rate": 2.369628702544848e-05, |
| "loss": 0.3607, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.7203151378728192, |
| "grad_norm": 0.30043128684782044, |
| "learning_rate": 2.367542761785565e-05, |
| "loss": 0.352, |
| "step": 1529 |
| }, |
| { |
| "epoch": 1.7214406302757457, |
| "grad_norm": 0.29295625581516677, |
| "learning_rate": 2.365456821026283e-05, |
| "loss": 0.3523, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.722566122678672, |
| "grad_norm": 0.3148385769404437, |
| "learning_rate": 2.3633708802670006e-05, |
| "loss": 0.3428, |
| "step": 1531 |
| }, |
| { |
| "epoch": 1.7236916150815982, |
| "grad_norm": 0.2809729795961225, |
| "learning_rate": 2.361284939507718e-05, |
| "loss": 0.3501, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.7248171074845244, |
| "grad_norm": 0.26779520094077724, |
| "learning_rate": 2.3591989987484356e-05, |
| "loss": 0.3692, |
| "step": 1533 |
| }, |
| { |
| "epoch": 1.7259425998874507, |
| "grad_norm": 0.34366805506707354, |
| "learning_rate": 2.3571130579891533e-05, |
| "loss": 0.3487, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.7270680922903772, |
| "grad_norm": 0.31386821776914015, |
| "learning_rate": 2.3550271172298707e-05, |
| "loss": 0.3738, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.7281935846933032, |
| "grad_norm": 0.27129248750888196, |
| "learning_rate": 2.3529411764705884e-05, |
| "loss": 0.347, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.7293190770962297, |
| "grad_norm": 0.30115138922829, |
| "learning_rate": 2.3508552357113057e-05, |
| "loss": 0.3644, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.730444569499156, |
| "grad_norm": 0.33400000355343035, |
| "learning_rate": 2.3487692949520234e-05, |
| "loss": 0.3735, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.7315700619020822, |
| "grad_norm": 0.2817713843812286, |
| "learning_rate": 2.346683354192741e-05, |
| "loss": 0.3536, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.7326955543050084, |
| "grad_norm": 0.2536336062497862, |
| "learning_rate": 2.3445974134334585e-05, |
| "loss": 0.3531, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.7338210467079347, |
| "grad_norm": 0.292119578282618, |
| "learning_rate": 2.3425114726741762e-05, |
| "loss": 0.3917, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.7349465391108612, |
| "grad_norm": 0.301736936214816, |
| "learning_rate": 2.340425531914894e-05, |
| "loss": 0.3368, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.7360720315137872, |
| "grad_norm": 0.2834782995265929, |
| "learning_rate": 2.3383395911556112e-05, |
| "loss": 0.3627, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.7371975239167137, |
| "grad_norm": 0.3472332663859999, |
| "learning_rate": 2.336253650396329e-05, |
| "loss": 0.3563, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.7383230163196397, |
| "grad_norm": 0.2770080632091572, |
| "learning_rate": 2.3341677096370466e-05, |
| "loss": 0.3638, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.7394485087225662, |
| "grad_norm": 0.28038474675505726, |
| "learning_rate": 2.332081768877764e-05, |
| "loss": 0.3578, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.7405740011254924, |
| "grad_norm": 0.29413387062574675, |
| "learning_rate": 2.3299958281184817e-05, |
| "loss": 0.3581, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.7416994935284187, |
| "grad_norm": 0.250154894365378, |
| "learning_rate": 2.3279098873591994e-05, |
| "loss": 0.3524, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.742824985931345, |
| "grad_norm": 0.27004730168507385, |
| "learning_rate": 2.3258239465999164e-05, |
| "loss": 0.3592, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.7439504783342712, |
| "grad_norm": 0.30931998115710535, |
| "learning_rate": 2.323738005840634e-05, |
| "loss": 0.3633, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.7450759707371977, |
| "grad_norm": 0.260094920014104, |
| "learning_rate": 2.3216520650813518e-05, |
| "loss": 0.36, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.7462014631401237, |
| "grad_norm": 0.28020792072208933, |
| "learning_rate": 2.3195661243220692e-05, |
| "loss": 0.3619, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.7473269555430502, |
| "grad_norm": 0.29150594274575353, |
| "learning_rate": 2.317480183562787e-05, |
| "loss": 0.3752, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.7484524479459762, |
| "grad_norm": 0.2780077227889234, |
| "learning_rate": 2.3153942428035046e-05, |
| "loss": 0.3404, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.7495779403489027, |
| "grad_norm": 0.26577200333767786, |
| "learning_rate": 2.313308302044222e-05, |
| "loss": 0.3457, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.750703432751829, |
| "grad_norm": 0.297363502447975, |
| "learning_rate": 2.3112223612849396e-05, |
| "loss": 0.3473, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.7518289251547552, |
| "grad_norm": 0.26278420558469534, |
| "learning_rate": 2.309136420525657e-05, |
| "loss": 0.3474, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.7529544175576814, |
| "grad_norm": 0.26900103936760594, |
| "learning_rate": 2.3070504797663747e-05, |
| "loss": 0.3531, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.7540799099606077, |
| "grad_norm": 0.32212325532836394, |
| "learning_rate": 2.3049645390070924e-05, |
| "loss": 0.3671, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.7552054023635342, |
| "grad_norm": 0.2970498028319336, |
| "learning_rate": 2.3028785982478097e-05, |
| "loss": 0.3488, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.7563308947664602, |
| "grad_norm": 0.3127346620449437, |
| "learning_rate": 2.3007926574885274e-05, |
| "loss": 0.3585, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.7574563871693867, |
| "grad_norm": 0.2765174597912786, |
| "learning_rate": 2.298706716729245e-05, |
| "loss": 0.3394, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.758581879572313, |
| "grad_norm": 0.3009783707148293, |
| "learning_rate": 2.2966207759699625e-05, |
| "loss": 0.3629, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.7597073719752392, |
| "grad_norm": 0.30115522865418154, |
| "learning_rate": 2.2945348352106802e-05, |
| "loss": 0.3725, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.7608328643781654, |
| "grad_norm": 0.2930519854916032, |
| "learning_rate": 2.292448894451398e-05, |
| "loss": 0.3569, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.7619583567810917, |
| "grad_norm": 0.3047405558309698, |
| "learning_rate": 2.2903629536921153e-05, |
| "loss": 0.3714, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.7630838491840182, |
| "grad_norm": 0.2590925307869418, |
| "learning_rate": 2.288277012932833e-05, |
| "loss": 0.3429, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.7642093415869442, |
| "grad_norm": 0.26569956950346013, |
| "learning_rate": 2.2861910721735507e-05, |
| "loss": 0.3563, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.7653348339898707, |
| "grad_norm": 0.3212454261162196, |
| "learning_rate": 2.284105131414268e-05, |
| "loss": 0.3628, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.7664603263927967, |
| "grad_norm": 0.25121736290737545, |
| "learning_rate": 2.2820191906549854e-05, |
| "loss": 0.3407, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.7675858187957232, |
| "grad_norm": 0.2568310971026976, |
| "learning_rate": 2.279933249895703e-05, |
| "loss": 0.3556, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.7687113111986494, |
| "grad_norm": 0.2766368973128219, |
| "learning_rate": 2.2778473091364204e-05, |
| "loss": 0.3488, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.7698368036015757, |
| "grad_norm": 0.2830150867726597, |
| "learning_rate": 2.275761368377138e-05, |
| "loss": 0.3563, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.770962296004502, |
| "grad_norm": 0.2704782966697743, |
| "learning_rate": 2.2736754276178558e-05, |
| "loss": 0.3785, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.7720877884074282, |
| "grad_norm": 0.28693834596503254, |
| "learning_rate": 2.2715894868585732e-05, |
| "loss": 0.3568, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.7732132808103547, |
| "grad_norm": 0.300591423339274, |
| "learning_rate": 2.269503546099291e-05, |
| "loss": 0.3401, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.7743387732132807, |
| "grad_norm": 0.25477856684248135, |
| "learning_rate": 2.2674176053400082e-05, |
| "loss": 0.3485, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.7754642656162072, |
| "grad_norm": 0.27289380951433195, |
| "learning_rate": 2.265331664580726e-05, |
| "loss": 0.3607, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.7765897580191332, |
| "grad_norm": 0.28248295859121, |
| "learning_rate": 2.2632457238214436e-05, |
| "loss": 0.3511, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.7777152504220597, |
| "grad_norm": 0.2658629597762577, |
| "learning_rate": 2.261159783062161e-05, |
| "loss": 0.377, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.778840742824986, |
| "grad_norm": 0.27220952413476507, |
| "learning_rate": 2.2590738423028787e-05, |
| "loss": 0.3557, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.7799662352279122, |
| "grad_norm": 0.2328823780748166, |
| "learning_rate": 2.2569879015435964e-05, |
| "loss": 0.3437, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.7810917276308385, |
| "grad_norm": 0.27552976720286626, |
| "learning_rate": 2.2549019607843138e-05, |
| "loss": 0.3499, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.7822172200337647, |
| "grad_norm": 0.26988928502984605, |
| "learning_rate": 2.2528160200250315e-05, |
| "loss": 0.3739, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.7833427124366912, |
| "grad_norm": 0.28360069868054577, |
| "learning_rate": 2.250730079265749e-05, |
| "loss": 0.3586, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.7844682048396172, |
| "grad_norm": 0.30703362231564924, |
| "learning_rate": 2.2486441385064665e-05, |
| "loss": 0.3402, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.7855936972425437, |
| "grad_norm": 0.24229886888660893, |
| "learning_rate": 2.2465581977471842e-05, |
| "loss": 0.3749, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.78671918964547, |
| "grad_norm": 0.3052949362012416, |
| "learning_rate": 2.244472256987902e-05, |
| "loss": 0.3432, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.7878446820483962, |
| "grad_norm": 0.30727845879387705, |
| "learning_rate": 2.2423863162286193e-05, |
| "loss": 0.352, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.7889701744513224, |
| "grad_norm": 0.2871867401825979, |
| "learning_rate": 2.240300375469337e-05, |
| "loss": 0.3654, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.7900956668542487, |
| "grad_norm": 0.29179973425408784, |
| "learning_rate": 2.2382144347100543e-05, |
| "loss": 0.3616, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.7912211592571752, |
| "grad_norm": 0.3214829947161843, |
| "learning_rate": 2.2361284939507717e-05, |
| "loss": 0.3512, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.7923466516601012, |
| "grad_norm": 0.24147795550593532, |
| "learning_rate": 2.2340425531914894e-05, |
| "loss": 0.3462, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.7934721440630277, |
| "grad_norm": 0.27689467276611157, |
| "learning_rate": 2.231956612432207e-05, |
| "loss": 0.36, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.7945976364659537, |
| "grad_norm": 0.28150686561848237, |
| "learning_rate": 2.2298706716729244e-05, |
| "loss": 0.3532, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.7957231288688802, |
| "grad_norm": 0.2581225749795623, |
| "learning_rate": 2.227784730913642e-05, |
| "loss": 0.3559, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.7968486212718064, |
| "grad_norm": 0.3039816245853392, |
| "learning_rate": 2.22569879015436e-05, |
| "loss": 0.3538, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.7979741136747327, |
| "grad_norm": 0.25714237851869526, |
| "learning_rate": 2.2236128493950772e-05, |
| "loss": 0.3442, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.799099606077659, |
| "grad_norm": 0.24074871831754024, |
| "learning_rate": 2.221526908635795e-05, |
| "loss": 0.349, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.8002250984805852, |
| "grad_norm": 0.28044366274540433, |
| "learning_rate": 2.2194409678765123e-05, |
| "loss": 0.3476, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.8013505908835117, |
| "grad_norm": 0.3055206598860284, |
| "learning_rate": 2.21735502711723e-05, |
| "loss": 0.329, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.8024760832864377, |
| "grad_norm": 0.32077315537142925, |
| "learning_rate": 2.2152690863579477e-05, |
| "loss": 0.3632, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.8036015756893642, |
| "grad_norm": 0.3191853556743268, |
| "learning_rate": 2.213183145598665e-05, |
| "loss": 0.3724, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.8047270680922902, |
| "grad_norm": 0.309559125522351, |
| "learning_rate": 2.2110972048393827e-05, |
| "loss": 0.36, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.8058525604952167, |
| "grad_norm": 0.31187663837864876, |
| "learning_rate": 2.2090112640801004e-05, |
| "loss": 0.344, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.806978052898143, |
| "grad_norm": 0.3041182224443529, |
| "learning_rate": 2.2069253233208178e-05, |
| "loss": 0.353, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.8081035453010692, |
| "grad_norm": 0.29282481275876526, |
| "learning_rate": 2.2048393825615355e-05, |
| "loss": 0.3486, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.8092290377039955, |
| "grad_norm": 0.29147172423218604, |
| "learning_rate": 2.202753441802253e-05, |
| "loss": 0.3672, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.8103545301069217, |
| "grad_norm": 0.28412350307097217, |
| "learning_rate": 2.2006675010429705e-05, |
| "loss": 0.3353, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.8114800225098482, |
| "grad_norm": 0.3067481559384326, |
| "learning_rate": 2.1985815602836882e-05, |
| "loss": 0.3768, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.8126055149127742, |
| "grad_norm": 0.28038315675437364, |
| "learning_rate": 2.1964956195244056e-05, |
| "loss": 0.3654, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.8137310073157007, |
| "grad_norm": 0.35539158393187636, |
| "learning_rate": 2.194409678765123e-05, |
| "loss": 0.3538, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.814856499718627, |
| "grad_norm": 0.25885136003612047, |
| "learning_rate": 2.1923237380058406e-05, |
| "loss": 0.3644, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.8159819921215532, |
| "grad_norm": 0.26093239365043436, |
| "learning_rate": 2.1902377972465583e-05, |
| "loss": 0.3708, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.8171074845244795, |
| "grad_norm": 0.2961872877279803, |
| "learning_rate": 2.1881518564872757e-05, |
| "loss": 0.3596, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.8182329769274057, |
| "grad_norm": 0.25680902610020434, |
| "learning_rate": 2.1860659157279934e-05, |
| "loss": 0.3577, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.8193584693303322, |
| "grad_norm": 0.25991234140815395, |
| "learning_rate": 2.183979974968711e-05, |
| "loss": 0.3633, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.8204839617332582, |
| "grad_norm": 0.2613869530348512, |
| "learning_rate": 2.1818940342094285e-05, |
| "loss": 0.3418, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.8216094541361847, |
| "grad_norm": 0.24790904990988194, |
| "learning_rate": 2.179808093450146e-05, |
| "loss": 0.3632, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.8227349465391107, |
| "grad_norm": 0.28799463478351406, |
| "learning_rate": 2.1777221526908635e-05, |
| "loss": 0.3699, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.8238604389420372, |
| "grad_norm": 0.25548160538250764, |
| "learning_rate": 2.1756362119315812e-05, |
| "loss": 0.3442, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.8249859313449635, |
| "grad_norm": 0.2985142619761683, |
| "learning_rate": 2.173550271172299e-05, |
| "loss": 0.3486, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.8261114237478897, |
| "grad_norm": 0.2972946035959545, |
| "learning_rate": 2.1714643304130163e-05, |
| "loss": 0.3414, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.827236916150816, |
| "grad_norm": 0.26170651498968683, |
| "learning_rate": 2.169378389653734e-05, |
| "loss": 0.3284, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.8283624085537422, |
| "grad_norm": 0.2524407858115918, |
| "learning_rate": 2.1672924488944517e-05, |
| "loss": 0.3599, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.8294879009566687, |
| "grad_norm": 0.3335691621333924, |
| "learning_rate": 2.165206508135169e-05, |
| "loss": 0.3603, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.8306133933595947, |
| "grad_norm": 0.2768913167073537, |
| "learning_rate": 2.1631205673758867e-05, |
| "loss": 0.3588, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.8317388857625212, |
| "grad_norm": 0.30050684042922793, |
| "learning_rate": 2.1610346266166044e-05, |
| "loss": 0.372, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.8328643781654472, |
| "grad_norm": 0.2901843574196796, |
| "learning_rate": 2.1589486858573218e-05, |
| "loss": 0.3639, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.8339898705683737, |
| "grad_norm": 0.29902669217912486, |
| "learning_rate": 2.1568627450980395e-05, |
| "loss": 0.3735, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.8351153629713, |
| "grad_norm": 0.30980781618970216, |
| "learning_rate": 2.154776804338757e-05, |
| "loss": 0.3572, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.8362408553742262, |
| "grad_norm": 0.26616420601594276, |
| "learning_rate": 2.1526908635794745e-05, |
| "loss": 0.3582, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.8373663477771525, |
| "grad_norm": 0.29096782812841715, |
| "learning_rate": 2.150604922820192e-05, |
| "loss": 0.3533, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.8384918401800787, |
| "grad_norm": 0.29936454913412547, |
| "learning_rate": 2.1485189820609096e-05, |
| "loss": 0.3441, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.8396173325830052, |
| "grad_norm": 0.34946000879087785, |
| "learning_rate": 2.146433041301627e-05, |
| "loss": 0.3628, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.8407428249859312, |
| "grad_norm": 0.2623712677205065, |
| "learning_rate": 2.1443471005423447e-05, |
| "loss": 0.3545, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.8418683173888577, |
| "grad_norm": 0.2753735634753566, |
| "learning_rate": 2.1422611597830624e-05, |
| "loss": 0.3528, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.842993809791784, |
| "grad_norm": 0.31812525886192866, |
| "learning_rate": 2.1401752190237797e-05, |
| "loss": 0.3697, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.8441193021947102, |
| "grad_norm": 0.29105961621045684, |
| "learning_rate": 2.1380892782644974e-05, |
| "loss": 0.3546, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.8452447945976365, |
| "grad_norm": 0.2691984264982239, |
| "learning_rate": 2.1360033375052148e-05, |
| "loss": 0.3536, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.8463702870005627, |
| "grad_norm": 0.2993538772854178, |
| "learning_rate": 2.1339173967459325e-05, |
| "loss": 0.363, |
| "step": 1641 |
| }, |
| { |
| "epoch": 1.8474957794034892, |
| "grad_norm": 0.29783181788963287, |
| "learning_rate": 2.13183145598665e-05, |
| "loss": 0.3592, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.8486212718064152, |
| "grad_norm": 0.2775688059074239, |
| "learning_rate": 2.1297455152273675e-05, |
| "loss": 0.3581, |
| "step": 1643 |
| }, |
| { |
| "epoch": 1.8497467642093417, |
| "grad_norm": 0.3133614801924746, |
| "learning_rate": 2.1276595744680852e-05, |
| "loss": 0.3498, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.8508722566122677, |
| "grad_norm": 0.2772230818911116, |
| "learning_rate": 2.125573633708803e-05, |
| "loss": 0.3558, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.8519977490151942, |
| "grad_norm": 0.30827283116401644, |
| "learning_rate": 2.1234876929495203e-05, |
| "loss": 0.3614, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.8531232414181205, |
| "grad_norm": 0.24090218764810817, |
| "learning_rate": 2.121401752190238e-05, |
| "loss": 0.3662, |
| "step": 1647 |
| }, |
| { |
| "epoch": 1.8542487338210467, |
| "grad_norm": 0.28761910481188807, |
| "learning_rate": 2.1193158114309557e-05, |
| "loss": 0.3441, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.855374226223973, |
| "grad_norm": 0.2560509442786654, |
| "learning_rate": 2.117229870671673e-05, |
| "loss": 0.3547, |
| "step": 1649 |
| }, |
| { |
| "epoch": 1.8564997186268992, |
| "grad_norm": 0.30034883076743724, |
| "learning_rate": 2.1151439299123907e-05, |
| "loss": 0.3449, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8576252110298257, |
| "grad_norm": 0.34444462233589906, |
| "learning_rate": 2.113057989153108e-05, |
| "loss": 0.3837, |
| "step": 1651 |
| }, |
| { |
| "epoch": 1.8587507034327517, |
| "grad_norm": 0.27692690682489174, |
| "learning_rate": 2.1109720483938258e-05, |
| "loss": 0.3607, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.8598761958356782, |
| "grad_norm": 0.26001142796077303, |
| "learning_rate": 2.1088861076345435e-05, |
| "loss": 0.3398, |
| "step": 1653 |
| }, |
| { |
| "epoch": 1.8610016882386042, |
| "grad_norm": 0.25366060360784753, |
| "learning_rate": 2.106800166875261e-05, |
| "loss": 0.3656, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.8621271806415307, |
| "grad_norm": 0.25058815872177637, |
| "learning_rate": 2.1047142261159782e-05, |
| "loss": 0.3439, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.863252673044457, |
| "grad_norm": 0.28664975028041284, |
| "learning_rate": 2.102628285356696e-05, |
| "loss": 0.3583, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.8643781654473832, |
| "grad_norm": 0.2732549675529288, |
| "learning_rate": 2.1005423445974136e-05, |
| "loss": 0.3305, |
| "step": 1657 |
| }, |
| { |
| "epoch": 1.8655036578503095, |
| "grad_norm": 0.2773666490469463, |
| "learning_rate": 2.098456403838131e-05, |
| "loss": 0.3591, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.8666291502532357, |
| "grad_norm": 0.2690002427002813, |
| "learning_rate": 2.0963704630788487e-05, |
| "loss": 0.3684, |
| "step": 1659 |
| }, |
| { |
| "epoch": 1.8677546426561622, |
| "grad_norm": 0.27085097978896006, |
| "learning_rate": 2.094284522319566e-05, |
| "loss": 0.3384, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.8688801350590882, |
| "grad_norm": 0.24697707069643743, |
| "learning_rate": 2.0921985815602837e-05, |
| "loss": 0.3572, |
| "step": 1661 |
| }, |
| { |
| "epoch": 1.8700056274620147, |
| "grad_norm": 0.2764605247602527, |
| "learning_rate": 2.0901126408010014e-05, |
| "loss": 0.3552, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.871131119864941, |
| "grad_norm": 0.2902550139143697, |
| "learning_rate": 2.0880267000417188e-05, |
| "loss": 0.3581, |
| "step": 1663 |
| }, |
| { |
| "epoch": 1.8722566122678672, |
| "grad_norm": 0.25734658506325125, |
| "learning_rate": 2.0859407592824365e-05, |
| "loss": 0.3509, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.8733821046707935, |
| "grad_norm": 0.29290615718137913, |
| "learning_rate": 2.0838548185231542e-05, |
| "loss": 0.3448, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.8745075970737197, |
| "grad_norm": 0.2633403418767797, |
| "learning_rate": 2.0817688777638715e-05, |
| "loss": 0.3556, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.8756330894766462, |
| "grad_norm": 0.3044255909775045, |
| "learning_rate": 2.0796829370045892e-05, |
| "loss": 0.3451, |
| "step": 1667 |
| }, |
| { |
| "epoch": 1.8767585818795722, |
| "grad_norm": 0.2932864685525451, |
| "learning_rate": 2.077596996245307e-05, |
| "loss": 0.3657, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.8778840742824987, |
| "grad_norm": 0.31135509455954635, |
| "learning_rate": 2.0755110554860243e-05, |
| "loss": 0.3734, |
| "step": 1669 |
| }, |
| { |
| "epoch": 1.8790095666854247, |
| "grad_norm": 0.2664061935893102, |
| "learning_rate": 2.073425114726742e-05, |
| "loss": 0.3629, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.8801350590883512, |
| "grad_norm": 0.2707969930148503, |
| "learning_rate": 2.0713391739674597e-05, |
| "loss": 0.3483, |
| "step": 1671 |
| }, |
| { |
| "epoch": 1.8812605514912775, |
| "grad_norm": 0.2582761473461036, |
| "learning_rate": 2.069253233208177e-05, |
| "loss": 0.366, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.8823860438942037, |
| "grad_norm": 0.2818191859830275, |
| "learning_rate": 2.0671672924488947e-05, |
| "loss": 0.3606, |
| "step": 1673 |
| }, |
| { |
| "epoch": 1.88351153629713, |
| "grad_norm": 0.274907626023918, |
| "learning_rate": 2.065081351689612e-05, |
| "loss": 0.3733, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.8846370287000562, |
| "grad_norm": 0.25302448281459705, |
| "learning_rate": 2.0629954109303295e-05, |
| "loss": 0.344, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.8857625211029827, |
| "grad_norm": 0.2601145397643824, |
| "learning_rate": 2.060909470171047e-05, |
| "loss": 0.3655, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.8868880135059087, |
| "grad_norm": 0.2598011168749623, |
| "learning_rate": 2.058823529411765e-05, |
| "loss": 0.3583, |
| "step": 1677 |
| }, |
| { |
| "epoch": 1.8880135059088352, |
| "grad_norm": 0.2764045861628215, |
| "learning_rate": 2.0567375886524822e-05, |
| "loss": 0.3358, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.8891389983117612, |
| "grad_norm": 0.2505563945259788, |
| "learning_rate": 2.0546516478932e-05, |
| "loss": 0.3326, |
| "step": 1679 |
| }, |
| { |
| "epoch": 1.8902644907146877, |
| "grad_norm": 0.2593385914562438, |
| "learning_rate": 2.0525657071339173e-05, |
| "loss": 0.344, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.891389983117614, |
| "grad_norm": 0.32013539903668187, |
| "learning_rate": 2.050479766374635e-05, |
| "loss": 0.3678, |
| "step": 1681 |
| }, |
| { |
| "epoch": 1.8925154755205402, |
| "grad_norm": 0.2850992099914004, |
| "learning_rate": 2.0483938256153527e-05, |
| "loss": 0.397, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.8936409679234665, |
| "grad_norm": 0.3016034620250037, |
| "learning_rate": 2.04630788485607e-05, |
| "loss": 0.3358, |
| "step": 1683 |
| }, |
| { |
| "epoch": 1.8947664603263927, |
| "grad_norm": 0.322626269426066, |
| "learning_rate": 2.0442219440967877e-05, |
| "loss": 0.3493, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.8958919527293192, |
| "grad_norm": 0.27415129738901345, |
| "learning_rate": 2.0421360033375054e-05, |
| "loss": 0.3612, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.8970174451322452, |
| "grad_norm": 0.3202508460747489, |
| "learning_rate": 2.0400500625782228e-05, |
| "loss": 0.3449, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.8981429375351717, |
| "grad_norm": 0.2610128644172156, |
| "learning_rate": 2.0379641218189405e-05, |
| "loss": 0.334, |
| "step": 1687 |
| }, |
| { |
| "epoch": 1.899268429938098, |
| "grad_norm": 0.26431886989489495, |
| "learning_rate": 2.0358781810596582e-05, |
| "loss": 0.3701, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.9003939223410242, |
| "grad_norm": 0.32289025222752066, |
| "learning_rate": 2.0337922403003756e-05, |
| "loss": 0.3772, |
| "step": 1689 |
| }, |
| { |
| "epoch": 1.9015194147439505, |
| "grad_norm": 0.27620099175466095, |
| "learning_rate": 2.0317062995410932e-05, |
| "loss": 0.3634, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.9026449071468767, |
| "grad_norm": 0.30452855448211125, |
| "learning_rate": 2.029620358781811e-05, |
| "loss": 0.3619, |
| "step": 1691 |
| }, |
| { |
| "epoch": 1.9037703995498032, |
| "grad_norm": 0.30999319017283444, |
| "learning_rate": 2.0275344180225283e-05, |
| "loss": 0.3472, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.9048958919527292, |
| "grad_norm": 0.34073549354424293, |
| "learning_rate": 2.025448477263246e-05, |
| "loss": 0.3417, |
| "step": 1693 |
| }, |
| { |
| "epoch": 1.9060213843556557, |
| "grad_norm": 0.28162550986145274, |
| "learning_rate": 2.0233625365039634e-05, |
| "loss": 0.3536, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.9071468767585817, |
| "grad_norm": 0.3215339598711887, |
| "learning_rate": 2.0212765957446807e-05, |
| "loss": 0.3682, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.9082723691615082, |
| "grad_norm": 0.34154514944007364, |
| "learning_rate": 2.0191906549853984e-05, |
| "loss": 0.3573, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.9093978615644345, |
| "grad_norm": 0.27450876997174517, |
| "learning_rate": 2.017104714226116e-05, |
| "loss": 0.3656, |
| "step": 1697 |
| }, |
| { |
| "epoch": 1.9105233539673607, |
| "grad_norm": 0.32973694211143484, |
| "learning_rate": 2.0150187734668335e-05, |
| "loss": 0.3729, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.911648846370287, |
| "grad_norm": 0.33057591238589434, |
| "learning_rate": 2.0129328327075512e-05, |
| "loss": 0.371, |
| "step": 1699 |
| }, |
| { |
| "epoch": 1.9127743387732132, |
| "grad_norm": 0.28948186161364625, |
| "learning_rate": 2.0108468919482685e-05, |
| "loss": 0.3397, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.9138998311761397, |
| "grad_norm": 0.3007970569880779, |
| "learning_rate": 2.0087609511889862e-05, |
| "loss": 0.3643, |
| "step": 1701 |
| }, |
| { |
| "epoch": 1.9150253235790657, |
| "grad_norm": 0.2612518404162693, |
| "learning_rate": 2.006675010429704e-05, |
| "loss": 0.3532, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.9161508159819922, |
| "grad_norm": 0.31521980587085163, |
| "learning_rate": 2.0045890696704213e-05, |
| "loss": 0.3572, |
| "step": 1703 |
| }, |
| { |
| "epoch": 1.9172763083849182, |
| "grad_norm": 0.32716978204799535, |
| "learning_rate": 2.002503128911139e-05, |
| "loss": 0.3655, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.9184018007878447, |
| "grad_norm": 0.2848312721456602, |
| "learning_rate": 2.0004171881518567e-05, |
| "loss": 0.3293, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.919527293190771, |
| "grad_norm": 0.2849516222624094, |
| "learning_rate": 1.998331247392574e-05, |
| "loss": 0.3493, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.9206527855936972, |
| "grad_norm": 0.2748223750385321, |
| "learning_rate": 1.9962453066332917e-05, |
| "loss": 0.3361, |
| "step": 1707 |
| }, |
| { |
| "epoch": 1.9217782779966235, |
| "grad_norm": 0.3052533145067581, |
| "learning_rate": 1.9941593658740094e-05, |
| "loss": 0.3697, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.9229037703995497, |
| "grad_norm": 0.2819225673013518, |
| "learning_rate": 1.9920734251147268e-05, |
| "loss": 0.3598, |
| "step": 1709 |
| }, |
| { |
| "epoch": 1.9240292628024762, |
| "grad_norm": 0.28297852832083414, |
| "learning_rate": 1.9899874843554445e-05, |
| "loss": 0.3421, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.9251547552054022, |
| "grad_norm": 0.32135792331365465, |
| "learning_rate": 1.9879015435961622e-05, |
| "loss": 0.3728, |
| "step": 1711 |
| }, |
| { |
| "epoch": 1.9262802476083287, |
| "grad_norm": 0.2485116486993189, |
| "learning_rate": 1.9858156028368796e-05, |
| "loss": 0.3494, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.927405740011255, |
| "grad_norm": 0.2749683711636245, |
| "learning_rate": 1.9837296620775973e-05, |
| "loss": 0.346, |
| "step": 1713 |
| }, |
| { |
| "epoch": 1.9285312324141812, |
| "grad_norm": 0.2642179410888402, |
| "learning_rate": 1.9816437213183146e-05, |
| "loss": 0.3548, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.9296567248171075, |
| "grad_norm": 0.25158261695078715, |
| "learning_rate": 1.9795577805590323e-05, |
| "loss": 0.359, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.9307822172200337, |
| "grad_norm": 0.27223176041458313, |
| "learning_rate": 1.9774718397997497e-05, |
| "loss": 0.3414, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.93190770962296, |
| "grad_norm": 0.2782144577617854, |
| "learning_rate": 1.9753858990404674e-05, |
| "loss": 0.3561, |
| "step": 1717 |
| }, |
| { |
| "epoch": 1.9330332020258862, |
| "grad_norm": 0.27538099734788146, |
| "learning_rate": 1.9732999582811847e-05, |
| "loss": 0.3472, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.9341586944288127, |
| "grad_norm": 0.2960828119915571, |
| "learning_rate": 1.9712140175219024e-05, |
| "loss": 0.3496, |
| "step": 1719 |
| }, |
| { |
| "epoch": 1.9352841868317388, |
| "grad_norm": 0.258095045594745, |
| "learning_rate": 1.9691280767626198e-05, |
| "loss": 0.3517, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.9364096792346652, |
| "grad_norm": 0.3024256600541793, |
| "learning_rate": 1.9670421360033375e-05, |
| "loss": 0.3586, |
| "step": 1721 |
| }, |
| { |
| "epoch": 1.9375351716375915, |
| "grad_norm": 0.29098939153442666, |
| "learning_rate": 1.9649561952440552e-05, |
| "loss": 0.3643, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.9386606640405177, |
| "grad_norm": 0.25782898610022725, |
| "learning_rate": 1.9628702544847726e-05, |
| "loss": 0.367, |
| "step": 1723 |
| }, |
| { |
| "epoch": 1.939786156443444, |
| "grad_norm": 0.3495526740430891, |
| "learning_rate": 1.9607843137254903e-05, |
| "loss": 0.3577, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.9409116488463702, |
| "grad_norm": 0.2728973828660973, |
| "learning_rate": 1.958698372966208e-05, |
| "loss": 0.3554, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.9420371412492967, |
| "grad_norm": 0.2901290142358023, |
| "learning_rate": 1.9566124322069253e-05, |
| "loss": 0.37, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.9431626336522227, |
| "grad_norm": 0.3031752356222974, |
| "learning_rate": 1.954526491447643e-05, |
| "loss": 0.3638, |
| "step": 1727 |
| }, |
| { |
| "epoch": 1.9442881260551492, |
| "grad_norm": 0.260909753207997, |
| "learning_rate": 1.9524405506883607e-05, |
| "loss": 0.3618, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.9454136184580753, |
| "grad_norm": 0.28948350014768964, |
| "learning_rate": 1.950354609929078e-05, |
| "loss": 0.3401, |
| "step": 1729 |
| }, |
| { |
| "epoch": 1.9465391108610017, |
| "grad_norm": 0.2623446580726307, |
| "learning_rate": 1.9482686691697958e-05, |
| "loss": 0.3618, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.947664603263928, |
| "grad_norm": 0.2666588748626957, |
| "learning_rate": 1.9461827284105135e-05, |
| "loss": 0.3424, |
| "step": 1731 |
| }, |
| { |
| "epoch": 1.9487900956668542, |
| "grad_norm": 0.23758227129892492, |
| "learning_rate": 1.9440967876512308e-05, |
| "loss": 0.3647, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.9499155880697805, |
| "grad_norm": 1.0235070433552647, |
| "learning_rate": 1.9420108468919485e-05, |
| "loss": 0.362, |
| "step": 1733 |
| }, |
| { |
| "epoch": 1.9510410804727067, |
| "grad_norm": 0.28481161066229677, |
| "learning_rate": 1.939924906132666e-05, |
| "loss": 0.3631, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.9521665728756332, |
| "grad_norm": 0.2848122389618838, |
| "learning_rate": 1.9378389653733836e-05, |
| "loss": 0.3469, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.9532920652785593, |
| "grad_norm": 0.2759014719515173, |
| "learning_rate": 1.9357530246141013e-05, |
| "loss": 0.3425, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.9544175576814857, |
| "grad_norm": 0.27874949300316715, |
| "learning_rate": 1.9336670838548186e-05, |
| "loss": 0.3855, |
| "step": 1737 |
| }, |
| { |
| "epoch": 1.955543050084412, |
| "grad_norm": 0.31363642679753656, |
| "learning_rate": 1.931581143095536e-05, |
| "loss": 0.3536, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.9566685424873382, |
| "grad_norm": 0.2556224324207228, |
| "learning_rate": 1.9294952023362537e-05, |
| "loss": 0.3432, |
| "step": 1739 |
| }, |
| { |
| "epoch": 1.9577940348902645, |
| "grad_norm": 0.2670888092453423, |
| "learning_rate": 1.927409261576971e-05, |
| "loss": 0.3509, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.9589195272931907, |
| "grad_norm": 0.25001267900165874, |
| "learning_rate": 1.9253233208176888e-05, |
| "loss": 0.3323, |
| "step": 1741 |
| }, |
| { |
| "epoch": 1.960045019696117, |
| "grad_norm": 0.2974207872384669, |
| "learning_rate": 1.9232373800584064e-05, |
| "loss": 0.3544, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.9611705120990433, |
| "grad_norm": 0.27472747483190185, |
| "learning_rate": 1.9211514392991238e-05, |
| "loss": 0.3521, |
| "step": 1743 |
| }, |
| { |
| "epoch": 1.9622960045019697, |
| "grad_norm": 0.2683475797146492, |
| "learning_rate": 1.9190654985398415e-05, |
| "loss": 0.3682, |
| "step": 1744 |
| }, |
| { |
| "epoch": 1.9634214969048958, |
| "grad_norm": 0.3822465905741808, |
| "learning_rate": 1.9169795577805592e-05, |
| "loss": 0.3535, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.9645469893078222, |
| "grad_norm": 0.29811948702966473, |
| "learning_rate": 1.9148936170212766e-05, |
| "loss": 0.3732, |
| "step": 1746 |
| }, |
| { |
| "epoch": 1.9656724817107485, |
| "grad_norm": 0.30142259657958986, |
| "learning_rate": 1.9128076762619943e-05, |
| "loss": 0.3432, |
| "step": 1747 |
| }, |
| { |
| "epoch": 1.9667979741136747, |
| "grad_norm": 0.3786818892770981, |
| "learning_rate": 1.910721735502712e-05, |
| "loss": 0.3654, |
| "step": 1748 |
| }, |
| { |
| "epoch": 1.967923466516601, |
| "grad_norm": 0.27029496791481233, |
| "learning_rate": 1.9086357947434293e-05, |
| "loss": 0.3682, |
| "step": 1749 |
| }, |
| { |
| "epoch": 1.9690489589195272, |
| "grad_norm": 0.262798379544428, |
| "learning_rate": 1.906549853984147e-05, |
| "loss": 0.3568, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.9701744513224537, |
| "grad_norm": 0.3135712581670641, |
| "learning_rate": 1.9044639132248647e-05, |
| "loss": 0.3511, |
| "step": 1751 |
| }, |
| { |
| "epoch": 1.9712999437253798, |
| "grad_norm": 0.3158145619580369, |
| "learning_rate": 1.902377972465582e-05, |
| "loss": 0.36, |
| "step": 1752 |
| }, |
| { |
| "epoch": 1.9724254361283062, |
| "grad_norm": 0.318706113946463, |
| "learning_rate": 1.9002920317062998e-05, |
| "loss": 0.3562, |
| "step": 1753 |
| }, |
| { |
| "epoch": 1.9735509285312323, |
| "grad_norm": 0.310806681735437, |
| "learning_rate": 1.898206090947017e-05, |
| "loss": 0.3514, |
| "step": 1754 |
| }, |
| { |
| "epoch": 1.9746764209341587, |
| "grad_norm": 0.2849866940009224, |
| "learning_rate": 1.896120150187735e-05, |
| "loss": 0.3477, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.975801913337085, |
| "grad_norm": 0.2810634482745697, |
| "learning_rate": 1.8940342094284525e-05, |
| "loss": 0.3644, |
| "step": 1756 |
| }, |
| { |
| "epoch": 1.9769274057400112, |
| "grad_norm": 0.2878137639733897, |
| "learning_rate": 1.89194826866917e-05, |
| "loss": 0.3594, |
| "step": 1757 |
| }, |
| { |
| "epoch": 1.9780528981429375, |
| "grad_norm": 0.26057909113445293, |
| "learning_rate": 1.8898623279098873e-05, |
| "loss": 0.3755, |
| "step": 1758 |
| }, |
| { |
| "epoch": 1.9791783905458638, |
| "grad_norm": 0.27092989925442396, |
| "learning_rate": 1.887776387150605e-05, |
| "loss": 0.3648, |
| "step": 1759 |
| }, |
| { |
| "epoch": 1.9803038829487902, |
| "grad_norm": 0.2845108154959281, |
| "learning_rate": 1.8856904463913223e-05, |
| "loss": 0.3449, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.9814293753517163, |
| "grad_norm": 0.24467445189735315, |
| "learning_rate": 1.88360450563204e-05, |
| "loss": 0.3558, |
| "step": 1761 |
| }, |
| { |
| "epoch": 1.9825548677546427, |
| "grad_norm": 0.2715643743977259, |
| "learning_rate": 1.8815185648727577e-05, |
| "loss": 0.3567, |
| "step": 1762 |
| }, |
| { |
| "epoch": 1.983680360157569, |
| "grad_norm": 0.2613996036084293, |
| "learning_rate": 1.879432624113475e-05, |
| "loss": 0.3467, |
| "step": 1763 |
| }, |
| { |
| "epoch": 1.9848058525604952, |
| "grad_norm": 0.2816357872833296, |
| "learning_rate": 1.8773466833541928e-05, |
| "loss": 0.3417, |
| "step": 1764 |
| }, |
| { |
| "epoch": 1.9859313449634215, |
| "grad_norm": 0.29529698315579805, |
| "learning_rate": 1.8752607425949105e-05, |
| "loss": 0.3527, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.9870568373663478, |
| "grad_norm": 0.27238727861070106, |
| "learning_rate": 1.8731748018356278e-05, |
| "loss": 0.3717, |
| "step": 1766 |
| }, |
| { |
| "epoch": 1.988182329769274, |
| "grad_norm": 0.27577156414013015, |
| "learning_rate": 1.8710888610763455e-05, |
| "loss": 0.3632, |
| "step": 1767 |
| }, |
| { |
| "epoch": 1.9893078221722003, |
| "grad_norm": 0.31287278872365587, |
| "learning_rate": 1.8690029203170632e-05, |
| "loss": 0.3678, |
| "step": 1768 |
| }, |
| { |
| "epoch": 1.9904333145751267, |
| "grad_norm": 0.2910024485455243, |
| "learning_rate": 1.8669169795577806e-05, |
| "loss": 0.3661, |
| "step": 1769 |
| }, |
| { |
| "epoch": 1.9915588069780528, |
| "grad_norm": 0.29522751930001573, |
| "learning_rate": 1.8648310387984983e-05, |
| "loss": 0.3733, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.9926842993809792, |
| "grad_norm": 0.2931943333929543, |
| "learning_rate": 1.862745098039216e-05, |
| "loss": 0.3554, |
| "step": 1771 |
| }, |
| { |
| "epoch": 1.9938097917839055, |
| "grad_norm": 0.29961502454826516, |
| "learning_rate": 1.8606591572799333e-05, |
| "loss": 0.3534, |
| "step": 1772 |
| }, |
| { |
| "epoch": 1.9949352841868317, |
| "grad_norm": 0.3016308875068367, |
| "learning_rate": 1.858573216520651e-05, |
| "loss": 0.3868, |
| "step": 1773 |
| }, |
| { |
| "epoch": 1.996060776589758, |
| "grad_norm": 0.3051815491365933, |
| "learning_rate": 1.8564872757613684e-05, |
| "loss": 0.3573, |
| "step": 1774 |
| }, |
| { |
| "epoch": 1.9971862689926843, |
| "grad_norm": 0.3463472368237023, |
| "learning_rate": 1.854401335002086e-05, |
| "loss": 0.3568, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.9983117613956107, |
| "grad_norm": 0.30250184431483823, |
| "learning_rate": 1.8523153942428038e-05, |
| "loss": 0.3679, |
| "step": 1776 |
| }, |
| { |
| "epoch": 1.9994372537985368, |
| "grad_norm": 0.27076812267453526, |
| "learning_rate": 1.850229453483521e-05, |
| "loss": 0.359, |
| "step": 1777 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.41375343672066456, |
| "learning_rate": 1.848143512724239e-05, |
| "loss": 0.3219, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.0011254924029265, |
| "grad_norm": 0.30690460412174675, |
| "learning_rate": 1.8460575719649562e-05, |
| "loss": 0.2847, |
| "step": 1779 |
| }, |
| { |
| "epoch": 2.0022509848058525, |
| "grad_norm": 0.29252851536455965, |
| "learning_rate": 1.8439716312056736e-05, |
| "loss": 0.2916, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.003376477208779, |
| "grad_norm": 0.2867585999652241, |
| "learning_rate": 1.8418856904463913e-05, |
| "loss": 0.304, |
| "step": 1781 |
| }, |
| { |
| "epoch": 2.004501969611705, |
| "grad_norm": 0.3147976773039966, |
| "learning_rate": 1.839799749687109e-05, |
| "loss": 0.2891, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.0056274620146315, |
| "grad_norm": 0.2441846828289504, |
| "learning_rate": 1.8377138089278263e-05, |
| "loss": 0.2909, |
| "step": 1783 |
| }, |
| { |
| "epoch": 2.0067529544175575, |
| "grad_norm": 0.2593896216365388, |
| "learning_rate": 1.835627868168544e-05, |
| "loss": 0.2753, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.007878446820484, |
| "grad_norm": 0.2893905461877493, |
| "learning_rate": 1.8335419274092617e-05, |
| "loss": 0.2818, |
| "step": 1785 |
| }, |
| { |
| "epoch": 2.00900393922341, |
| "grad_norm": 0.2846562929483248, |
| "learning_rate": 1.831455986649979e-05, |
| "loss": 0.2907, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.0101294316263365, |
| "grad_norm": 0.2566724532797832, |
| "learning_rate": 1.8293700458906968e-05, |
| "loss": 0.2865, |
| "step": 1787 |
| }, |
| { |
| "epoch": 2.011254924029263, |
| "grad_norm": 0.30986557763389416, |
| "learning_rate": 1.8272841051314145e-05, |
| "loss": 0.299, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.012380416432189, |
| "grad_norm": 0.2790346837879426, |
| "learning_rate": 1.825198164372132e-05, |
| "loss": 0.2872, |
| "step": 1789 |
| }, |
| { |
| "epoch": 2.0135059088351155, |
| "grad_norm": 0.28965248515971675, |
| "learning_rate": 1.8231122236128495e-05, |
| "loss": 0.282, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.0146314012380415, |
| "grad_norm": 0.26758447999158064, |
| "learning_rate": 1.8210262828535672e-05, |
| "loss": 0.2854, |
| "step": 1791 |
| }, |
| { |
| "epoch": 2.015756893640968, |
| "grad_norm": 0.25752829835015667, |
| "learning_rate": 1.8189403420942846e-05, |
| "loss": 0.2875, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.016882386043894, |
| "grad_norm": 0.26237094621373575, |
| "learning_rate": 1.8168544013350023e-05, |
| "loss": 0.2861, |
| "step": 1793 |
| }, |
| { |
| "epoch": 2.0180078784468205, |
| "grad_norm": 0.25324822624548066, |
| "learning_rate": 1.8147684605757196e-05, |
| "loss": 0.2804, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.019133370849747, |
| "grad_norm": 0.27650509711437854, |
| "learning_rate": 1.8126825198164373e-05, |
| "loss": 0.298, |
| "step": 1795 |
| }, |
| { |
| "epoch": 2.020258863252673, |
| "grad_norm": 0.271607108362916, |
| "learning_rate": 1.810596579057155e-05, |
| "loss": 0.2793, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.0213843556555995, |
| "grad_norm": 0.2763902863245182, |
| "learning_rate": 1.8085106382978724e-05, |
| "loss": 0.2824, |
| "step": 1797 |
| }, |
| { |
| "epoch": 2.0225098480585255, |
| "grad_norm": 0.29074430245042243, |
| "learning_rate": 1.80642469753859e-05, |
| "loss": 0.2847, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.023635340461452, |
| "grad_norm": 0.252760394282513, |
| "learning_rate": 1.8043387567793075e-05, |
| "loss": 0.2729, |
| "step": 1799 |
| }, |
| { |
| "epoch": 2.024760832864378, |
| "grad_norm": 0.25115826895976634, |
| "learning_rate": 1.8022528160200248e-05, |
| "loss": 0.2903, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.0258863252673045, |
| "grad_norm": 0.31665556656306054, |
| "learning_rate": 1.8001668752607425e-05, |
| "loss": 0.2806, |
| "step": 1801 |
| }, |
| { |
| "epoch": 2.0270118176702305, |
| "grad_norm": 0.27565102328032076, |
| "learning_rate": 1.7980809345014602e-05, |
| "loss": 0.2781, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.028137310073157, |
| "grad_norm": 0.26334129144996565, |
| "learning_rate": 1.7959949937421776e-05, |
| "loss": 0.2865, |
| "step": 1803 |
| }, |
| { |
| "epoch": 2.0292628024760835, |
| "grad_norm": 0.29084203177119927, |
| "learning_rate": 1.7939090529828953e-05, |
| "loss": 0.2915, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.0303882948790095, |
| "grad_norm": 0.24821063662817036, |
| "learning_rate": 1.791823112223613e-05, |
| "loss": 0.2784, |
| "step": 1805 |
| }, |
| { |
| "epoch": 2.031513787281936, |
| "grad_norm": 0.2550931735301453, |
| "learning_rate": 1.7897371714643303e-05, |
| "loss": 0.2836, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.032639279684862, |
| "grad_norm": 0.27634727649104684, |
| "learning_rate": 1.787651230705048e-05, |
| "loss": 0.3069, |
| "step": 1807 |
| }, |
| { |
| "epoch": 2.0337647720877885, |
| "grad_norm": 0.24014034990048097, |
| "learning_rate": 1.7855652899457657e-05, |
| "loss": 0.2858, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.0348902644907145, |
| "grad_norm": 0.23529224395747875, |
| "learning_rate": 1.783479349186483e-05, |
| "loss": 0.292, |
| "step": 1809 |
| }, |
| { |
| "epoch": 2.036015756893641, |
| "grad_norm": 0.2226918871531934, |
| "learning_rate": 1.7813934084272008e-05, |
| "loss": 0.289, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.037141249296567, |
| "grad_norm": 0.24875514083553227, |
| "learning_rate": 1.7793074676679185e-05, |
| "loss": 0.2879, |
| "step": 1811 |
| }, |
| { |
| "epoch": 2.0382667416994935, |
| "grad_norm": 0.22101380287283037, |
| "learning_rate": 1.777221526908636e-05, |
| "loss": 0.2785, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.03939223410242, |
| "grad_norm": 0.24344041835452335, |
| "learning_rate": 1.7751355861493535e-05, |
| "loss": 0.2768, |
| "step": 1813 |
| }, |
| { |
| "epoch": 2.040517726505346, |
| "grad_norm": 0.24709305555007302, |
| "learning_rate": 1.773049645390071e-05, |
| "loss": 0.2785, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.0416432189082725, |
| "grad_norm": 0.23036508957897686, |
| "learning_rate": 1.7709637046307886e-05, |
| "loss": 0.2829, |
| "step": 1815 |
| }, |
| { |
| "epoch": 2.0427687113111985, |
| "grad_norm": 0.304777104086667, |
| "learning_rate": 1.7688777638715063e-05, |
| "loss": 0.2842, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.043894203714125, |
| "grad_norm": 0.24563307593084063, |
| "learning_rate": 1.7667918231122237e-05, |
| "loss": 0.2842, |
| "step": 1817 |
| }, |
| { |
| "epoch": 2.045019696117051, |
| "grad_norm": 0.24049156495572827, |
| "learning_rate": 1.7647058823529414e-05, |
| "loss": 0.29, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.0461451885199775, |
| "grad_norm": 0.26152397114334225, |
| "learning_rate": 1.762619941593659e-05, |
| "loss": 0.2943, |
| "step": 1819 |
| }, |
| { |
| "epoch": 2.047270680922904, |
| "grad_norm": 0.24701566468961217, |
| "learning_rate": 1.7605340008343764e-05, |
| "loss": 0.28, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.04839617332583, |
| "grad_norm": 0.22113320376779072, |
| "learning_rate": 1.7584480600750938e-05, |
| "loss": 0.2824, |
| "step": 1821 |
| }, |
| { |
| "epoch": 2.0495216657287565, |
| "grad_norm": 0.2498303273769485, |
| "learning_rate": 1.7563621193158115e-05, |
| "loss": 0.2764, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.0506471581316825, |
| "grad_norm": 0.2613079367123678, |
| "learning_rate": 1.754276178556529e-05, |
| "loss": 0.3029, |
| "step": 1823 |
| }, |
| { |
| "epoch": 2.051772650534609, |
| "grad_norm": 0.2533549657170249, |
| "learning_rate": 1.7521902377972465e-05, |
| "loss": 0.2941, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.052898142937535, |
| "grad_norm": 0.24525113996522538, |
| "learning_rate": 1.7501042970379642e-05, |
| "loss": 0.2791, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.0540236353404615, |
| "grad_norm": 0.22636672236346222, |
| "learning_rate": 1.7480183562786816e-05, |
| "loss": 0.2708, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.0551491277433875, |
| "grad_norm": 0.2318404892918077, |
| "learning_rate": 1.7459324155193993e-05, |
| "loss": 0.2831, |
| "step": 1827 |
| }, |
| { |
| "epoch": 2.056274620146314, |
| "grad_norm": 0.22908482292345286, |
| "learning_rate": 1.743846474760117e-05, |
| "loss": 0.2791, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.0574001125492405, |
| "grad_norm": 0.23199016490767796, |
| "learning_rate": 1.7417605340008343e-05, |
| "loss": 0.2899, |
| "step": 1829 |
| }, |
| { |
| "epoch": 2.0585256049521665, |
| "grad_norm": 0.22679432927238993, |
| "learning_rate": 1.739674593241552e-05, |
| "loss": 0.2705, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.059651097355093, |
| "grad_norm": 0.240936280203786, |
| "learning_rate": 1.7375886524822697e-05, |
| "loss": 0.2796, |
| "step": 1831 |
| }, |
| { |
| "epoch": 2.060776589758019, |
| "grad_norm": 0.23052791316981805, |
| "learning_rate": 1.735502711722987e-05, |
| "loss": 0.298, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.0619020821609455, |
| "grad_norm": 0.22399826316835342, |
| "learning_rate": 1.7334167709637048e-05, |
| "loss": 0.2768, |
| "step": 1833 |
| }, |
| { |
| "epoch": 2.0630275745638715, |
| "grad_norm": 0.24389711598920422, |
| "learning_rate": 1.731330830204422e-05, |
| "loss": 0.2789, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.064153066966798, |
| "grad_norm": 0.24531794065173357, |
| "learning_rate": 1.72924488944514e-05, |
| "loss": 0.2841, |
| "step": 1835 |
| }, |
| { |
| "epoch": 2.065278559369724, |
| "grad_norm": 0.2857308138585535, |
| "learning_rate": 1.7271589486858576e-05, |
| "loss": 0.2746, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.0664040517726505, |
| "grad_norm": 0.2331548964731216, |
| "learning_rate": 1.725073007926575e-05, |
| "loss": 0.2779, |
| "step": 1837 |
| }, |
| { |
| "epoch": 2.067529544175577, |
| "grad_norm": 0.23649426513105923, |
| "learning_rate": 1.7229870671672926e-05, |
| "loss": 0.2913, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.068655036578503, |
| "grad_norm": 0.2777841981435879, |
| "learning_rate": 1.7209011264080103e-05, |
| "loss": 0.2826, |
| "step": 1839 |
| }, |
| { |
| "epoch": 2.0697805289814295, |
| "grad_norm": 0.27066327686914066, |
| "learning_rate": 1.7188151856487277e-05, |
| "loss": 0.2893, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.0709060213843555, |
| "grad_norm": 0.23134899380353294, |
| "learning_rate": 1.716729244889445e-05, |
| "loss": 0.2804, |
| "step": 1841 |
| }, |
| { |
| "epoch": 2.072031513787282, |
| "grad_norm": 0.29223852513335047, |
| "learning_rate": 1.7146433041301627e-05, |
| "loss": 0.2916, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.073157006190208, |
| "grad_norm": 0.2735960908953659, |
| "learning_rate": 1.71255736337088e-05, |
| "loss": 0.2852, |
| "step": 1843 |
| }, |
| { |
| "epoch": 2.0742824985931345, |
| "grad_norm": 0.26821528502754455, |
| "learning_rate": 1.7104714226115978e-05, |
| "loss": 0.2891, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.0754079909960605, |
| "grad_norm": 0.26154260311021144, |
| "learning_rate": 1.7083854818523155e-05, |
| "loss": 0.2875, |
| "step": 1845 |
| }, |
| { |
| "epoch": 2.076533483398987, |
| "grad_norm": 0.31021830521974225, |
| "learning_rate": 1.706299541093033e-05, |
| "loss": 0.2776, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.0776589758019135, |
| "grad_norm": 0.2788988641156972, |
| "learning_rate": 1.7042136003337505e-05, |
| "loss": 0.2886, |
| "step": 1847 |
| }, |
| { |
| "epoch": 2.0787844682048395, |
| "grad_norm": 0.2907858072020635, |
| "learning_rate": 1.7021276595744682e-05, |
| "loss": 0.2895, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.079909960607766, |
| "grad_norm": 0.2542410475178318, |
| "learning_rate": 1.7000417188151856e-05, |
| "loss": 0.2856, |
| "step": 1849 |
| }, |
| { |
| "epoch": 2.081035453010692, |
| "grad_norm": 0.24197984345301113, |
| "learning_rate": 1.6979557780559033e-05, |
| "loss": 0.2824, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.0821609454136185, |
| "grad_norm": 0.2557692899387776, |
| "learning_rate": 1.695869837296621e-05, |
| "loss": 0.2909, |
| "step": 1851 |
| }, |
| { |
| "epoch": 2.0832864378165445, |
| "grad_norm": 0.23793678801447735, |
| "learning_rate": 1.6937838965373384e-05, |
| "loss": 0.2689, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.084411930219471, |
| "grad_norm": 0.29107842473943085, |
| "learning_rate": 1.691697955778056e-05, |
| "loss": 0.284, |
| "step": 1853 |
| }, |
| { |
| "epoch": 2.0855374226223975, |
| "grad_norm": 0.24607914318213508, |
| "learning_rate": 1.6896120150187734e-05, |
| "loss": 0.2957, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.0866629150253235, |
| "grad_norm": 0.21651709890692455, |
| "learning_rate": 1.687526074259491e-05, |
| "loss": 0.2677, |
| "step": 1855 |
| }, |
| { |
| "epoch": 2.08778840742825, |
| "grad_norm": 0.22707602957596063, |
| "learning_rate": 1.6854401335002088e-05, |
| "loss": 0.2854, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.088913899831176, |
| "grad_norm": 0.24846772345755247, |
| "learning_rate": 1.6833541927409262e-05, |
| "loss": 0.2679, |
| "step": 1857 |
| }, |
| { |
| "epoch": 2.0900393922341025, |
| "grad_norm": 0.27573122817807033, |
| "learning_rate": 1.681268251981644e-05, |
| "loss": 0.3007, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.0911648846370285, |
| "grad_norm": 0.23927598344656173, |
| "learning_rate": 1.6791823112223616e-05, |
| "loss": 0.2754, |
| "step": 1859 |
| }, |
| { |
| "epoch": 2.092290377039955, |
| "grad_norm": 0.23518656387715997, |
| "learning_rate": 1.677096370463079e-05, |
| "loss": 0.2766, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.093415869442881, |
| "grad_norm": 0.24448942505615562, |
| "learning_rate": 1.6750104297037966e-05, |
| "loss": 0.2913, |
| "step": 1861 |
| }, |
| { |
| "epoch": 2.0945413618458075, |
| "grad_norm": 0.2336572648593039, |
| "learning_rate": 1.672924488944514e-05, |
| "loss": 0.2942, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.095666854248734, |
| "grad_norm": 0.22716116914003923, |
| "learning_rate": 1.6708385481852313e-05, |
| "loss": 0.2881, |
| "step": 1863 |
| }, |
| { |
| "epoch": 2.09679234665166, |
| "grad_norm": 0.2849566981299875, |
| "learning_rate": 1.668752607425949e-05, |
| "loss": 0.2805, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.0979178390545865, |
| "grad_norm": 0.21858945358126292, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.2807, |
| "step": 1865 |
| }, |
| { |
| "epoch": 2.0990433314575125, |
| "grad_norm": 0.23697889851760048, |
| "learning_rate": 1.664580725907384e-05, |
| "loss": 0.2791, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.100168823860439, |
| "grad_norm": 0.23126363606688877, |
| "learning_rate": 1.6624947851481018e-05, |
| "loss": 0.2878, |
| "step": 1867 |
| }, |
| { |
| "epoch": 2.101294316263365, |
| "grad_norm": 0.2651888938021785, |
| "learning_rate": 1.6604088443888195e-05, |
| "loss": 0.2859, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.1024198086662915, |
| "grad_norm": 0.23270730320241492, |
| "learning_rate": 1.658322903629537e-05, |
| "loss": 0.296, |
| "step": 1869 |
| }, |
| { |
| "epoch": 2.103545301069218, |
| "grad_norm": 0.3042844752542814, |
| "learning_rate": 1.6562369628702546e-05, |
| "loss": 0.3132, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.104670793472144, |
| "grad_norm": 0.24339368794150576, |
| "learning_rate": 1.6541510221109723e-05, |
| "loss": 0.2766, |
| "step": 1871 |
| }, |
| { |
| "epoch": 2.1057962858750705, |
| "grad_norm": 0.26285249575918274, |
| "learning_rate": 1.6520650813516896e-05, |
| "loss": 0.285, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.1069217782779965, |
| "grad_norm": 0.25652733532840694, |
| "learning_rate": 1.6499791405924073e-05, |
| "loss": 0.294, |
| "step": 1873 |
| }, |
| { |
| "epoch": 2.108047270680923, |
| "grad_norm": 0.2543828171392836, |
| "learning_rate": 1.6478931998331247e-05, |
| "loss": 0.2746, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.109172763083849, |
| "grad_norm": 0.2352973105601401, |
| "learning_rate": 1.6458072590738424e-05, |
| "loss": 0.2743, |
| "step": 1875 |
| }, |
| { |
| "epoch": 2.1102982554867755, |
| "grad_norm": 0.25036103571955426, |
| "learning_rate": 1.64372131831456e-05, |
| "loss": 0.2909, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.1114237478897016, |
| "grad_norm": 0.30189436640559725, |
| "learning_rate": 1.6416353775552774e-05, |
| "loss": 0.2969, |
| "step": 1877 |
| }, |
| { |
| "epoch": 2.112549240292628, |
| "grad_norm": 0.24769249438614963, |
| "learning_rate": 1.639549436795995e-05, |
| "loss": 0.282, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.1136747326955545, |
| "grad_norm": 0.25738252269107714, |
| "learning_rate": 1.6374634960367128e-05, |
| "loss": 0.2987, |
| "step": 1879 |
| }, |
| { |
| "epoch": 2.1148002250984805, |
| "grad_norm": 0.3340073534477609, |
| "learning_rate": 1.6353775552774302e-05, |
| "loss": 0.2962, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.115925717501407, |
| "grad_norm": 0.2641953651128952, |
| "learning_rate": 1.633291614518148e-05, |
| "loss": 0.2827, |
| "step": 1881 |
| }, |
| { |
| "epoch": 2.117051209904333, |
| "grad_norm": 0.24068009657515393, |
| "learning_rate": 1.6312056737588656e-05, |
| "loss": 0.2795, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.1181767023072595, |
| "grad_norm": 0.2507429416561855, |
| "learning_rate": 1.6291197329995826e-05, |
| "loss": 0.285, |
| "step": 1883 |
| }, |
| { |
| "epoch": 2.1193021947101855, |
| "grad_norm": 0.2533889963610055, |
| "learning_rate": 1.6270337922403003e-05, |
| "loss": 0.2941, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.120427687113112, |
| "grad_norm": 0.24688775417233455, |
| "learning_rate": 1.624947851481018e-05, |
| "loss": 0.3012, |
| "step": 1885 |
| }, |
| { |
| "epoch": 2.121553179516038, |
| "grad_norm": 0.24915605314972172, |
| "learning_rate": 1.6228619107217354e-05, |
| "loss": 0.2993, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.1226786719189645, |
| "grad_norm": 0.279903262549959, |
| "learning_rate": 1.620775969962453e-05, |
| "loss": 0.2834, |
| "step": 1887 |
| }, |
| { |
| "epoch": 2.123804164321891, |
| "grad_norm": 0.23182376171306077, |
| "learning_rate": 1.6186900292031708e-05, |
| "loss": 0.2916, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.124929656724817, |
| "grad_norm": 0.23000197495504152, |
| "learning_rate": 1.616604088443888e-05, |
| "loss": 0.2747, |
| "step": 1889 |
| }, |
| { |
| "epoch": 2.1260551491277435, |
| "grad_norm": 0.23823286526237422, |
| "learning_rate": 1.6145181476846058e-05, |
| "loss": 0.2781, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.1271806415306695, |
| "grad_norm": 0.21627846743690535, |
| "learning_rate": 1.6124322069253235e-05, |
| "loss": 0.2706, |
| "step": 1891 |
| }, |
| { |
| "epoch": 2.128306133933596, |
| "grad_norm": 0.23892946673013887, |
| "learning_rate": 1.610346266166041e-05, |
| "loss": 0.2914, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.129431626336522, |
| "grad_norm": 0.2656649096475775, |
| "learning_rate": 1.6082603254067586e-05, |
| "loss": 0.2866, |
| "step": 1893 |
| }, |
| { |
| "epoch": 2.1305571187394485, |
| "grad_norm": 0.2227237824020542, |
| "learning_rate": 1.6061743846474763e-05, |
| "loss": 0.2625, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.1316826111423746, |
| "grad_norm": 0.23235111483198348, |
| "learning_rate": 1.6040884438881936e-05, |
| "loss": 0.2859, |
| "step": 1895 |
| }, |
| { |
| "epoch": 2.132808103545301, |
| "grad_norm": 0.25920239106869064, |
| "learning_rate": 1.6020025031289113e-05, |
| "loss": 0.2959, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.1339335959482275, |
| "grad_norm": 0.23719185530213213, |
| "learning_rate": 1.5999165623696287e-05, |
| "loss": 0.2897, |
| "step": 1897 |
| }, |
| { |
| "epoch": 2.1350590883511535, |
| "grad_norm": 0.22876937310915393, |
| "learning_rate": 1.5978306216103464e-05, |
| "loss": 0.2788, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.13618458075408, |
| "grad_norm": 0.26616238576961354, |
| "learning_rate": 1.595744680851064e-05, |
| "loss": 0.2889, |
| "step": 1899 |
| }, |
| { |
| "epoch": 2.137310073157006, |
| "grad_norm": 0.2166404539813475, |
| "learning_rate": 1.5936587400917814e-05, |
| "loss": 0.282, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.1384355655599325, |
| "grad_norm": 0.23700101129905038, |
| "learning_rate": 1.591572799332499e-05, |
| "loss": 0.2968, |
| "step": 1901 |
| }, |
| { |
| "epoch": 2.1395610579628586, |
| "grad_norm": 0.2285745331225241, |
| "learning_rate": 1.589486858573217e-05, |
| "loss": 0.2841, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.140686550365785, |
| "grad_norm": 0.23783838496188303, |
| "learning_rate": 1.5874009178139342e-05, |
| "loss": 0.2909, |
| "step": 1903 |
| }, |
| { |
| "epoch": 2.1418120427687115, |
| "grad_norm": 0.23082103720915573, |
| "learning_rate": 1.5853149770546516e-05, |
| "loss": 0.2824, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.1429375351716375, |
| "grad_norm": 0.25094828821607146, |
| "learning_rate": 1.5832290362953693e-05, |
| "loss": 0.285, |
| "step": 1905 |
| }, |
| { |
| "epoch": 2.144063027574564, |
| "grad_norm": 0.22431109979899386, |
| "learning_rate": 1.5811430955360866e-05, |
| "loss": 0.2737, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.14518851997749, |
| "grad_norm": 0.22492379294000237, |
| "learning_rate": 1.5790571547768043e-05, |
| "loss": 0.2726, |
| "step": 1907 |
| }, |
| { |
| "epoch": 2.1463140123804165, |
| "grad_norm": 0.2314053442523269, |
| "learning_rate": 1.576971214017522e-05, |
| "loss": 0.2754, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.1474395047833426, |
| "grad_norm": 0.24673230264182605, |
| "learning_rate": 1.5748852732582394e-05, |
| "loss": 0.2921, |
| "step": 1909 |
| }, |
| { |
| "epoch": 2.148564997186269, |
| "grad_norm": 0.23606707383444092, |
| "learning_rate": 1.572799332498957e-05, |
| "loss": 0.2804, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.1496904895891955, |
| "grad_norm": 0.235724127482375, |
| "learning_rate": 1.5707133917396748e-05, |
| "loss": 0.2861, |
| "step": 1911 |
| }, |
| { |
| "epoch": 2.1508159819921215, |
| "grad_norm": 0.24483607505245927, |
| "learning_rate": 1.568627450980392e-05, |
| "loss": 0.2878, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.151941474395048, |
| "grad_norm": 0.2552535556772291, |
| "learning_rate": 1.56654151022111e-05, |
| "loss": 0.2857, |
| "step": 1913 |
| }, |
| { |
| "epoch": 2.153066966797974, |
| "grad_norm": 0.22983484882907804, |
| "learning_rate": 1.5644555694618275e-05, |
| "loss": 0.2872, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.1541924592009005, |
| "grad_norm": 0.25772716013553465, |
| "learning_rate": 1.562369628702545e-05, |
| "loss": 0.2959, |
| "step": 1915 |
| }, |
| { |
| "epoch": 2.1553179516038266, |
| "grad_norm": 0.29415752414459184, |
| "learning_rate": 1.5602836879432626e-05, |
| "loss": 0.2909, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.156443444006753, |
| "grad_norm": 0.2564449243204837, |
| "learning_rate": 1.55819774718398e-05, |
| "loss": 0.3047, |
| "step": 1917 |
| }, |
| { |
| "epoch": 2.157568936409679, |
| "grad_norm": 0.2357261136445965, |
| "learning_rate": 1.5561118064246976e-05, |
| "loss": 0.2891, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.1586944288126055, |
| "grad_norm": 0.23236268840383198, |
| "learning_rate": 1.5540258656654153e-05, |
| "loss": 0.2936, |
| "step": 1919 |
| }, |
| { |
| "epoch": 2.159819921215532, |
| "grad_norm": 0.2526992651991741, |
| "learning_rate": 1.5519399249061327e-05, |
| "loss": 0.279, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.160945413618458, |
| "grad_norm": 0.26601612523224494, |
| "learning_rate": 1.5498539841468504e-05, |
| "loss": 0.2805, |
| "step": 1921 |
| }, |
| { |
| "epoch": 2.1620709060213845, |
| "grad_norm": 0.23000298824921414, |
| "learning_rate": 1.547768043387568e-05, |
| "loss": 0.2837, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.1631963984243106, |
| "grad_norm": 0.24154706706349996, |
| "learning_rate": 1.5456821026282855e-05, |
| "loss": 0.2846, |
| "step": 1923 |
| }, |
| { |
| "epoch": 2.164321890827237, |
| "grad_norm": 0.2360397761989054, |
| "learning_rate": 1.543596161869003e-05, |
| "loss": 0.283, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.165447383230163, |
| "grad_norm": 0.2218092507800359, |
| "learning_rate": 1.5415102211097205e-05, |
| "loss": 0.2771, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.1665728756330895, |
| "grad_norm": 0.2386052630849636, |
| "learning_rate": 1.539424280350438e-05, |
| "loss": 0.2924, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.1676983680360156, |
| "grad_norm": 0.24947895655376598, |
| "learning_rate": 1.5373383395911556e-05, |
| "loss": 0.2925, |
| "step": 1927 |
| }, |
| { |
| "epoch": 2.168823860438942, |
| "grad_norm": 0.2800300772115473, |
| "learning_rate": 1.5352523988318733e-05, |
| "loss": 0.2931, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.1699493528418685, |
| "grad_norm": 0.22636221415787847, |
| "learning_rate": 1.5331664580725906e-05, |
| "loss": 0.2875, |
| "step": 1929 |
| }, |
| { |
| "epoch": 2.1710748452447945, |
| "grad_norm": 0.24386646248262941, |
| "learning_rate": 1.5310805173133083e-05, |
| "loss": 0.2905, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.172200337647721, |
| "grad_norm": 0.22365055654311475, |
| "learning_rate": 1.528994576554026e-05, |
| "loss": 0.2802, |
| "step": 1931 |
| }, |
| { |
| "epoch": 2.173325830050647, |
| "grad_norm": 0.25602509803802304, |
| "learning_rate": 1.5269086357947434e-05, |
| "loss": 0.2724, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.1744513224535735, |
| "grad_norm": 0.20551321646228457, |
| "learning_rate": 1.5248226950354611e-05, |
| "loss": 0.2721, |
| "step": 1933 |
| }, |
| { |
| "epoch": 2.1755768148564996, |
| "grad_norm": 0.22807897179549413, |
| "learning_rate": 1.5227367542761786e-05, |
| "loss": 0.2892, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.176702307259426, |
| "grad_norm": 0.25852860308404757, |
| "learning_rate": 1.5206508135168961e-05, |
| "loss": 0.3027, |
| "step": 1935 |
| }, |
| { |
| "epoch": 2.177827799662352, |
| "grad_norm": 0.2279934128116949, |
| "learning_rate": 1.5185648727576138e-05, |
| "loss": 0.2801, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.1789532920652785, |
| "grad_norm": 0.2351411289469844, |
| "learning_rate": 1.5164789319983314e-05, |
| "loss": 0.28, |
| "step": 1937 |
| }, |
| { |
| "epoch": 2.180078784468205, |
| "grad_norm": 0.21552640522315936, |
| "learning_rate": 1.5143929912390489e-05, |
| "loss": 0.2947, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.181204276871131, |
| "grad_norm": 0.24626801791538866, |
| "learning_rate": 1.5123070504797664e-05, |
| "loss": 0.2783, |
| "step": 1939 |
| }, |
| { |
| "epoch": 2.1823297692740575, |
| "grad_norm": 0.23012350879449098, |
| "learning_rate": 1.5102211097204841e-05, |
| "loss": 0.2774, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.1834552616769836, |
| "grad_norm": 0.23081070838683507, |
| "learning_rate": 1.5081351689612017e-05, |
| "loss": 0.2799, |
| "step": 1941 |
| }, |
| { |
| "epoch": 2.18458075407991, |
| "grad_norm": 0.2490023091368887, |
| "learning_rate": 1.5060492282019192e-05, |
| "loss": 0.2916, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.185706246482836, |
| "grad_norm": 0.23226830279863933, |
| "learning_rate": 1.5039632874426369e-05, |
| "loss": 0.274, |
| "step": 1943 |
| }, |
| { |
| "epoch": 2.1868317388857625, |
| "grad_norm": 0.23814945426894574, |
| "learning_rate": 1.5018773466833544e-05, |
| "loss": 0.2987, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.1879572312886886, |
| "grad_norm": 0.22888208424137457, |
| "learning_rate": 1.4997914059240718e-05, |
| "loss": 0.2809, |
| "step": 1945 |
| }, |
| { |
| "epoch": 2.189082723691615, |
| "grad_norm": 0.22117598909045155, |
| "learning_rate": 1.4977054651647893e-05, |
| "loss": 0.2869, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.1902082160945415, |
| "grad_norm": 0.2635412507153887, |
| "learning_rate": 1.4956195244055068e-05, |
| "loss": 0.3102, |
| "step": 1947 |
| }, |
| { |
| "epoch": 2.1913337084974676, |
| "grad_norm": 0.21434697577713013, |
| "learning_rate": 1.4935335836462244e-05, |
| "loss": 0.2748, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.192459200900394, |
| "grad_norm": 0.23605470994586675, |
| "learning_rate": 1.491447642886942e-05, |
| "loss": 0.2859, |
| "step": 1949 |
| }, |
| { |
| "epoch": 2.19358469330332, |
| "grad_norm": 0.2405759189766832, |
| "learning_rate": 1.4893617021276596e-05, |
| "loss": 0.2942, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.1947101857062465, |
| "grad_norm": 0.22131821842232993, |
| "learning_rate": 1.4872757613683771e-05, |
| "loss": 0.2884, |
| "step": 1951 |
| }, |
| { |
| "epoch": 2.1958356781091726, |
| "grad_norm": 0.23216071326486187, |
| "learning_rate": 1.4851898206090946e-05, |
| "loss": 0.2741, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.196961170512099, |
| "grad_norm": 0.2261133526570407, |
| "learning_rate": 1.4831038798498123e-05, |
| "loss": 0.2963, |
| "step": 1953 |
| }, |
| { |
| "epoch": 2.1980866629150255, |
| "grad_norm": 0.2302291451269135, |
| "learning_rate": 1.4810179390905299e-05, |
| "loss": 0.2828, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.1992121553179516, |
| "grad_norm": 0.2535578449757302, |
| "learning_rate": 1.4789319983312474e-05, |
| "loss": 0.3054, |
| "step": 1955 |
| }, |
| { |
| "epoch": 2.200337647720878, |
| "grad_norm": 0.2353316415549731, |
| "learning_rate": 1.4768460575719651e-05, |
| "loss": 0.2851, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.201463140123804, |
| "grad_norm": 0.22300891391695027, |
| "learning_rate": 1.4747601168126826e-05, |
| "loss": 0.2685, |
| "step": 1957 |
| }, |
| { |
| "epoch": 2.2025886325267305, |
| "grad_norm": 0.24986486980542502, |
| "learning_rate": 1.4726741760534002e-05, |
| "loss": 0.2956, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.2037141249296566, |
| "grad_norm": 0.2180771271590878, |
| "learning_rate": 1.4705882352941177e-05, |
| "loss": 0.2922, |
| "step": 1959 |
| }, |
| { |
| "epoch": 2.204839617332583, |
| "grad_norm": 0.22680565869396152, |
| "learning_rate": 1.4685022945348354e-05, |
| "loss": 0.2909, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.205965109735509, |
| "grad_norm": 0.23513680764714112, |
| "learning_rate": 1.4664163537755529e-05, |
| "loss": 0.2786, |
| "step": 1961 |
| }, |
| { |
| "epoch": 2.2070906021384356, |
| "grad_norm": 0.24973876085692792, |
| "learning_rate": 1.4643304130162704e-05, |
| "loss": 0.2853, |
| "step": 1962 |
| }, |
| { |
| "epoch": 2.208216094541362, |
| "grad_norm": 0.22544610054019418, |
| "learning_rate": 1.4622444722569881e-05, |
| "loss": 0.2831, |
| "step": 1963 |
| }, |
| { |
| "epoch": 2.209341586944288, |
| "grad_norm": 0.22330625417293162, |
| "learning_rate": 1.4601585314977057e-05, |
| "loss": 0.2867, |
| "step": 1964 |
| }, |
| { |
| "epoch": 2.2104670793472145, |
| "grad_norm": 0.22525152317015681, |
| "learning_rate": 1.4580725907384232e-05, |
| "loss": 0.29, |
| "step": 1965 |
| }, |
| { |
| "epoch": 2.2115925717501406, |
| "grad_norm": 0.22249714982600474, |
| "learning_rate": 1.4559866499791406e-05, |
| "loss": 0.299, |
| "step": 1966 |
| }, |
| { |
| "epoch": 2.212718064153067, |
| "grad_norm": 0.24092275848280195, |
| "learning_rate": 1.4539007092198581e-05, |
| "loss": 0.2896, |
| "step": 1967 |
| }, |
| { |
| "epoch": 2.213843556555993, |
| "grad_norm": 0.22252299992217103, |
| "learning_rate": 1.4518147684605756e-05, |
| "loss": 0.258, |
| "step": 1968 |
| }, |
| { |
| "epoch": 2.2149690489589196, |
| "grad_norm": 0.23636046190697863, |
| "learning_rate": 1.4497288277012933e-05, |
| "loss": 0.2908, |
| "step": 1969 |
| }, |
| { |
| "epoch": 2.216094541361846, |
| "grad_norm": 0.2596597997389332, |
| "learning_rate": 1.4476428869420108e-05, |
| "loss": 0.27, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.217220033764772, |
| "grad_norm": 0.2531683961826357, |
| "learning_rate": 1.4455569461827284e-05, |
| "loss": 0.2832, |
| "step": 1971 |
| }, |
| { |
| "epoch": 2.2183455261676985, |
| "grad_norm": 0.2593605264440698, |
| "learning_rate": 1.443471005423446e-05, |
| "loss": 0.2841, |
| "step": 1972 |
| }, |
| { |
| "epoch": 2.2194710185706246, |
| "grad_norm": 0.26699737148304314, |
| "learning_rate": 1.4413850646641636e-05, |
| "loss": 0.2799, |
| "step": 1973 |
| }, |
| { |
| "epoch": 2.220596510973551, |
| "grad_norm": 0.2294951675397686, |
| "learning_rate": 1.4392991239048811e-05, |
| "loss": 0.2909, |
| "step": 1974 |
| }, |
| { |
| "epoch": 2.221722003376477, |
| "grad_norm": 0.2245538365625567, |
| "learning_rate": 1.4372131831455987e-05, |
| "loss": 0.2799, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.2228474957794035, |
| "grad_norm": 0.2646561800422373, |
| "learning_rate": 1.4351272423863164e-05, |
| "loss": 0.2765, |
| "step": 1976 |
| }, |
| { |
| "epoch": 2.2239729881823296, |
| "grad_norm": 0.21416819505340884, |
| "learning_rate": 1.4330413016270339e-05, |
| "loss": 0.2801, |
| "step": 1977 |
| }, |
| { |
| "epoch": 2.225098480585256, |
| "grad_norm": 0.21948393418095735, |
| "learning_rate": 1.4309553608677514e-05, |
| "loss": 0.2866, |
| "step": 1978 |
| }, |
| { |
| "epoch": 2.2262239729881825, |
| "grad_norm": 0.22822972297920066, |
| "learning_rate": 1.428869420108469e-05, |
| "loss": 0.287, |
| "step": 1979 |
| }, |
| { |
| "epoch": 2.2273494653911086, |
| "grad_norm": 0.2160982046115744, |
| "learning_rate": 1.4267834793491866e-05, |
| "loss": 0.2925, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.228474957794035, |
| "grad_norm": 0.23144554832269953, |
| "learning_rate": 1.4246975385899042e-05, |
| "loss": 0.3105, |
| "step": 1981 |
| }, |
| { |
| "epoch": 2.229600450196961, |
| "grad_norm": 0.2419669093281673, |
| "learning_rate": 1.4226115978306217e-05, |
| "loss": 0.2795, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.2307259425998875, |
| "grad_norm": 0.2333075873767841, |
| "learning_rate": 1.4205256570713394e-05, |
| "loss": 0.2879, |
| "step": 1983 |
| }, |
| { |
| "epoch": 2.2318514350028136, |
| "grad_norm": 0.2381375140609149, |
| "learning_rate": 1.418439716312057e-05, |
| "loss": 0.2969, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.23297692740574, |
| "grad_norm": 0.22837890307254083, |
| "learning_rate": 1.4163537755527745e-05, |
| "loss": 0.283, |
| "step": 1985 |
| }, |
| { |
| "epoch": 2.234102419808666, |
| "grad_norm": 0.24448338514717682, |
| "learning_rate": 1.414267834793492e-05, |
| "loss": 0.3013, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.2352279122115926, |
| "grad_norm": 0.23420776111487138, |
| "learning_rate": 1.4121818940342093e-05, |
| "loss": 0.2878, |
| "step": 1987 |
| }, |
| { |
| "epoch": 2.236353404614519, |
| "grad_norm": 0.23579942650757943, |
| "learning_rate": 1.4100959532749269e-05, |
| "loss": 0.288, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.237478897017445, |
| "grad_norm": 0.23043040793992384, |
| "learning_rate": 1.4080100125156446e-05, |
| "loss": 0.305, |
| "step": 1989 |
| }, |
| { |
| "epoch": 2.2386043894203715, |
| "grad_norm": 0.24659768389490117, |
| "learning_rate": 1.4059240717563621e-05, |
| "loss": 0.2778, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.2397298818232976, |
| "grad_norm": 0.2525101439952681, |
| "learning_rate": 1.4038381309970796e-05, |
| "loss": 0.2797, |
| "step": 1991 |
| }, |
| { |
| "epoch": 2.240855374226224, |
| "grad_norm": 0.2180718742006463, |
| "learning_rate": 1.4017521902377973e-05, |
| "loss": 0.2824, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.24198086662915, |
| "grad_norm": 0.23039632064460322, |
| "learning_rate": 1.3996662494785149e-05, |
| "loss": 0.2732, |
| "step": 1993 |
| }, |
| { |
| "epoch": 2.2431063590320766, |
| "grad_norm": 0.24390939737808814, |
| "learning_rate": 1.3975803087192324e-05, |
| "loss": 0.2942, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.2442318514350026, |
| "grad_norm": 0.22495659632157705, |
| "learning_rate": 1.39549436795995e-05, |
| "loss": 0.2786, |
| "step": 1995 |
| }, |
| { |
| "epoch": 2.245357343837929, |
| "grad_norm": 0.2220671184762533, |
| "learning_rate": 1.3934084272006676e-05, |
| "loss": 0.2902, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.2464828362408555, |
| "grad_norm": 0.2063740174423525, |
| "learning_rate": 1.3913224864413851e-05, |
| "loss": 0.2794, |
| "step": 1997 |
| }, |
| { |
| "epoch": 2.2476083286437816, |
| "grad_norm": 0.22864397206918258, |
| "learning_rate": 1.3892365456821027e-05, |
| "loss": 0.2899, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.248733821046708, |
| "grad_norm": 0.22641553859678237, |
| "learning_rate": 1.3871506049228202e-05, |
| "loss": 0.2913, |
| "step": 1999 |
| }, |
| { |
| "epoch": 2.249859313449634, |
| "grad_norm": 0.23273883384894037, |
| "learning_rate": 1.3850646641635379e-05, |
| "loss": 0.2896, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.2509848058525606, |
| "grad_norm": 0.25883856114515486, |
| "learning_rate": 1.3829787234042554e-05, |
| "loss": 0.2853, |
| "step": 2001 |
| }, |
| { |
| "epoch": 2.2521102982554866, |
| "grad_norm": 0.24543011458191846, |
| "learning_rate": 1.380892782644973e-05, |
| "loss": 0.2921, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.253235790658413, |
| "grad_norm": 0.2573780345794268, |
| "learning_rate": 1.3788068418856907e-05, |
| "loss": 0.2987, |
| "step": 2003 |
| }, |
| { |
| "epoch": 2.254361283061339, |
| "grad_norm": 0.2344713538028616, |
| "learning_rate": 1.3767209011264082e-05, |
| "loss": 0.2788, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.2554867754642656, |
| "grad_norm": 0.2655728653325266, |
| "learning_rate": 1.3746349603671257e-05, |
| "loss": 0.2891, |
| "step": 2005 |
| }, |
| { |
| "epoch": 2.256612267867192, |
| "grad_norm": 0.24696226562693468, |
| "learning_rate": 1.3725490196078432e-05, |
| "loss": 0.2832, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.257737760270118, |
| "grad_norm": 0.27074526345994904, |
| "learning_rate": 1.370463078848561e-05, |
| "loss": 0.2991, |
| "step": 2007 |
| }, |
| { |
| "epoch": 2.2588632526730446, |
| "grad_norm": 0.21307871627731073, |
| "learning_rate": 1.3683771380892781e-05, |
| "loss": 0.2802, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.2599887450759706, |
| "grad_norm": 0.6066306419781285, |
| "learning_rate": 1.3662911973299958e-05, |
| "loss": 0.3119, |
| "step": 2009 |
| }, |
| { |
| "epoch": 2.261114237478897, |
| "grad_norm": 0.2354767198892578, |
| "learning_rate": 1.3642052565707134e-05, |
| "loss": 0.2931, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.2622397298818235, |
| "grad_norm": 0.3113759768538715, |
| "learning_rate": 1.3621193158114309e-05, |
| "loss": 0.3097, |
| "step": 2011 |
| }, |
| { |
| "epoch": 2.2633652222847496, |
| "grad_norm": 0.23868520954039024, |
| "learning_rate": 1.3600333750521486e-05, |
| "loss": 0.295, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.264490714687676, |
| "grad_norm": 0.25599462430091524, |
| "learning_rate": 1.3579474342928661e-05, |
| "loss": 0.2934, |
| "step": 2013 |
| }, |
| { |
| "epoch": 2.265616207090602, |
| "grad_norm": 0.2378852312729475, |
| "learning_rate": 1.3558614935335836e-05, |
| "loss": 0.2905, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.2667416994935286, |
| "grad_norm": 0.23537606322412846, |
| "learning_rate": 1.3537755527743012e-05, |
| "loss": 0.2933, |
| "step": 2015 |
| }, |
| { |
| "epoch": 2.2678671918964546, |
| "grad_norm": 0.24999030117110338, |
| "learning_rate": 1.3516896120150189e-05, |
| "loss": 0.2907, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.268992684299381, |
| "grad_norm": 0.2189977535068501, |
| "learning_rate": 1.3496036712557364e-05, |
| "loss": 0.2768, |
| "step": 2017 |
| }, |
| { |
| "epoch": 2.270118176702307, |
| "grad_norm": 0.21605143200933585, |
| "learning_rate": 1.347517730496454e-05, |
| "loss": 0.2825, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.2712436691052336, |
| "grad_norm": 0.2190715820513759, |
| "learning_rate": 1.3454317897371716e-05, |
| "loss": 0.2985, |
| "step": 2019 |
| }, |
| { |
| "epoch": 2.27236916150816, |
| "grad_norm": 0.2419287070815025, |
| "learning_rate": 1.3433458489778892e-05, |
| "loss": 0.2812, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.273494653911086, |
| "grad_norm": 0.23856366222450073, |
| "learning_rate": 1.3412599082186067e-05, |
| "loss": 0.2754, |
| "step": 2021 |
| }, |
| { |
| "epoch": 2.2746201463140125, |
| "grad_norm": 0.23128552323354076, |
| "learning_rate": 1.3391739674593242e-05, |
| "loss": 0.2967, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.2757456387169386, |
| "grad_norm": 0.23110186859812204, |
| "learning_rate": 1.3370880267000419e-05, |
| "loss": 0.2905, |
| "step": 2023 |
| }, |
| { |
| "epoch": 2.276871131119865, |
| "grad_norm": 0.23791496512553711, |
| "learning_rate": 1.3350020859407594e-05, |
| "loss": 0.2956, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.277996623522791, |
| "grad_norm": 0.270895607021542, |
| "learning_rate": 1.332916145181477e-05, |
| "loss": 0.2979, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.2791221159257176, |
| "grad_norm": 0.2622847660820458, |
| "learning_rate": 1.3308302044221945e-05, |
| "loss": 0.2805, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.2802476083286436, |
| "grad_norm": 0.2451853343226485, |
| "learning_rate": 1.3287442636629122e-05, |
| "loss": 0.2849, |
| "step": 2027 |
| }, |
| { |
| "epoch": 2.28137310073157, |
| "grad_norm": 0.2181534341062286, |
| "learning_rate": 1.3266583229036297e-05, |
| "loss": 0.2843, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.2824985931344965, |
| "grad_norm": 0.2350791322319216, |
| "learning_rate": 1.3245723821443471e-05, |
| "loss": 0.2804, |
| "step": 2029 |
| }, |
| { |
| "epoch": 2.2836240855374226, |
| "grad_norm": 0.24384265303411898, |
| "learning_rate": 1.3224864413850646e-05, |
| "loss": 0.2844, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.284749577940349, |
| "grad_norm": 0.21471389480099612, |
| "learning_rate": 1.3204005006257821e-05, |
| "loss": 0.2766, |
| "step": 2031 |
| }, |
| { |
| "epoch": 2.285875070343275, |
| "grad_norm": 0.2558686689697758, |
| "learning_rate": 1.3183145598664998e-05, |
| "loss": 0.3006, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.2870005627462016, |
| "grad_norm": 0.24596519958308774, |
| "learning_rate": 1.3162286191072174e-05, |
| "loss": 0.2791, |
| "step": 2033 |
| }, |
| { |
| "epoch": 2.2881260551491276, |
| "grad_norm": 0.22178993068098377, |
| "learning_rate": 1.3141426783479349e-05, |
| "loss": 0.3006, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.289251547552054, |
| "grad_norm": 0.21211849808178426, |
| "learning_rate": 1.3120567375886524e-05, |
| "loss": 0.2879, |
| "step": 2035 |
| }, |
| { |
| "epoch": 2.29037703995498, |
| "grad_norm": 0.26189329024450775, |
| "learning_rate": 1.3099707968293701e-05, |
| "loss": 0.2919, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.2915025323579066, |
| "grad_norm": 0.24020801441451947, |
| "learning_rate": 1.3078848560700877e-05, |
| "loss": 0.2936, |
| "step": 2037 |
| }, |
| { |
| "epoch": 2.292628024760833, |
| "grad_norm": 0.2444872387207359, |
| "learning_rate": 1.3057989153108052e-05, |
| "loss": 0.3098, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.293753517163759, |
| "grad_norm": 0.21895214125433066, |
| "learning_rate": 1.3037129745515229e-05, |
| "loss": 0.2743, |
| "step": 2039 |
| }, |
| { |
| "epoch": 2.2948790095666856, |
| "grad_norm": 0.2496911198777528, |
| "learning_rate": 1.3016270337922404e-05, |
| "loss": 0.2918, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.2960045019696116, |
| "grad_norm": 2.8468163932596022, |
| "learning_rate": 1.299541093032958e-05, |
| "loss": 0.2845, |
| "step": 2041 |
| }, |
| { |
| "epoch": 2.297129994372538, |
| "grad_norm": 0.2414636263089686, |
| "learning_rate": 1.2974551522736755e-05, |
| "loss": 0.2854, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.298255486775464, |
| "grad_norm": 0.22863532322963662, |
| "learning_rate": 1.2953692115143932e-05, |
| "loss": 0.2769, |
| "step": 2043 |
| }, |
| { |
| "epoch": 2.2993809791783906, |
| "grad_norm": 0.20797566641270696, |
| "learning_rate": 1.2932832707551107e-05, |
| "loss": 0.2738, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.3005064715813166, |
| "grad_norm": 0.2813082678198765, |
| "learning_rate": 1.2911973299958282e-05, |
| "loss": 0.2919, |
| "step": 2045 |
| }, |
| { |
| "epoch": 2.301631963984243, |
| "grad_norm": 0.21880645593009593, |
| "learning_rate": 1.2891113892365458e-05, |
| "loss": 0.2845, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.3027574563871696, |
| "grad_norm": 0.21662277253245404, |
| "learning_rate": 1.2870254484772635e-05, |
| "loss": 0.2999, |
| "step": 2047 |
| }, |
| { |
| "epoch": 2.3038829487900956, |
| "grad_norm": 0.23410910980013766, |
| "learning_rate": 1.284939507717981e-05, |
| "loss": 0.2803, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.305008441193022, |
| "grad_norm": 0.24807871237848997, |
| "learning_rate": 1.2828535669586985e-05, |
| "loss": 0.2887, |
| "step": 2049 |
| }, |
| { |
| "epoch": 2.306133933595948, |
| "grad_norm": 0.23958912163692958, |
| "learning_rate": 1.2807676261994159e-05, |
| "loss": 0.2806, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.3072594259988746, |
| "grad_norm": 0.24170572287325667, |
| "learning_rate": 1.2786816854401334e-05, |
| "loss": 0.2911, |
| "step": 2051 |
| }, |
| { |
| "epoch": 2.3083849184018006, |
| "grad_norm": 0.2071987326770734, |
| "learning_rate": 1.2765957446808511e-05, |
| "loss": 0.281, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.309510410804727, |
| "grad_norm": 0.2685294387603238, |
| "learning_rate": 1.2745098039215686e-05, |
| "loss": 0.2988, |
| "step": 2053 |
| }, |
| { |
| "epoch": 2.310635903207653, |
| "grad_norm": 0.24356419885452857, |
| "learning_rate": 1.2724238631622862e-05, |
| "loss": 0.2918, |
| "step": 2054 |
| }, |
| { |
| "epoch": 2.3117613956105796, |
| "grad_norm": 0.22854669119255341, |
| "learning_rate": 1.2703379224030037e-05, |
| "loss": 0.2906, |
| "step": 2055 |
| }, |
| { |
| "epoch": 2.312886888013506, |
| "grad_norm": 0.24689465925397477, |
| "learning_rate": 1.2682519816437214e-05, |
| "loss": 0.2935, |
| "step": 2056 |
| }, |
| { |
| "epoch": 2.314012380416432, |
| "grad_norm": 0.21811055770500665, |
| "learning_rate": 1.2661660408844389e-05, |
| "loss": 0.3016, |
| "step": 2057 |
| }, |
| { |
| "epoch": 2.3151378728193586, |
| "grad_norm": 0.2493408748518838, |
| "learning_rate": 1.2640801001251564e-05, |
| "loss": 0.2834, |
| "step": 2058 |
| }, |
| { |
| "epoch": 2.3162633652222846, |
| "grad_norm": 0.25721873798899103, |
| "learning_rate": 1.2619941593658741e-05, |
| "loss": 0.2983, |
| "step": 2059 |
| }, |
| { |
| "epoch": 2.317388857625211, |
| "grad_norm": 0.22179194109950803, |
| "learning_rate": 1.2599082186065917e-05, |
| "loss": 0.2753, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.3185143500281375, |
| "grad_norm": 0.266595262773116, |
| "learning_rate": 1.2578222778473092e-05, |
| "loss": 0.2839, |
| "step": 2061 |
| }, |
| { |
| "epoch": 2.3196398424310636, |
| "grad_norm": 0.24206558428702046, |
| "learning_rate": 1.2557363370880267e-05, |
| "loss": 0.2853, |
| "step": 2062 |
| }, |
| { |
| "epoch": 2.32076533483399, |
| "grad_norm": 0.2454398984492763, |
| "learning_rate": 1.2536503963287444e-05, |
| "loss": 0.3028, |
| "step": 2063 |
| }, |
| { |
| "epoch": 2.321890827236916, |
| "grad_norm": 0.2321058588488482, |
| "learning_rate": 1.251564455569462e-05, |
| "loss": 0.269, |
| "step": 2064 |
| }, |
| { |
| "epoch": 2.3230163196398426, |
| "grad_norm": 0.27267795334721745, |
| "learning_rate": 1.2494785148101793e-05, |
| "loss": 0.2792, |
| "step": 2065 |
| }, |
| { |
| "epoch": 2.3241418120427686, |
| "grad_norm": 0.23234175584418776, |
| "learning_rate": 1.247392574050897e-05, |
| "loss": 0.28, |
| "step": 2066 |
| }, |
| { |
| "epoch": 2.325267304445695, |
| "grad_norm": 0.2063643654191112, |
| "learning_rate": 1.2453066332916145e-05, |
| "loss": 0.2742, |
| "step": 2067 |
| }, |
| { |
| "epoch": 2.326392796848621, |
| "grad_norm": 0.22497515405636748, |
| "learning_rate": 1.243220692532332e-05, |
| "loss": 0.2904, |
| "step": 2068 |
| }, |
| { |
| "epoch": 2.3275182892515476, |
| "grad_norm": 0.20800896572524227, |
| "learning_rate": 1.2411347517730498e-05, |
| "loss": 0.2974, |
| "step": 2069 |
| }, |
| { |
| "epoch": 2.328643781654474, |
| "grad_norm": 0.22460235366838985, |
| "learning_rate": 1.2390488110137673e-05, |
| "loss": 0.2777, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.3297692740574, |
| "grad_norm": 0.23776076812455357, |
| "learning_rate": 1.2369628702544848e-05, |
| "loss": 0.2829, |
| "step": 2071 |
| }, |
| { |
| "epoch": 2.3308947664603266, |
| "grad_norm": 0.2570845084981786, |
| "learning_rate": 1.2348769294952024e-05, |
| "loss": 0.2945, |
| "step": 2072 |
| }, |
| { |
| "epoch": 2.3320202588632526, |
| "grad_norm": 0.2385004836723248, |
| "learning_rate": 1.23279098873592e-05, |
| "loss": 0.2867, |
| "step": 2073 |
| }, |
| { |
| "epoch": 2.333145751266179, |
| "grad_norm": 0.24982697079123078, |
| "learning_rate": 1.2307050479766376e-05, |
| "loss": 0.2857, |
| "step": 2074 |
| }, |
| { |
| "epoch": 2.334271243669105, |
| "grad_norm": 0.24642888230370272, |
| "learning_rate": 1.2286191072173551e-05, |
| "loss": 0.3053, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.3353967360720316, |
| "grad_norm": 0.26130363264507717, |
| "learning_rate": 1.2265331664580726e-05, |
| "loss": 0.2916, |
| "step": 2076 |
| }, |
| { |
| "epoch": 2.3365222284749576, |
| "grad_norm": 0.2124033043327759, |
| "learning_rate": 1.2244472256987902e-05, |
| "loss": 0.2764, |
| "step": 2077 |
| }, |
| { |
| "epoch": 2.337647720877884, |
| "grad_norm": 0.2440455128961208, |
| "learning_rate": 1.2223612849395077e-05, |
| "loss": 0.3075, |
| "step": 2078 |
| }, |
| { |
| "epoch": 2.3387732132808106, |
| "grad_norm": 0.245304116279532, |
| "learning_rate": 1.2202753441802254e-05, |
| "loss": 0.2895, |
| "step": 2079 |
| }, |
| { |
| "epoch": 2.3398987056837366, |
| "grad_norm": 0.2372202784047367, |
| "learning_rate": 1.218189403420943e-05, |
| "loss": 0.2785, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.341024198086663, |
| "grad_norm": 0.23688709955054182, |
| "learning_rate": 1.2161034626616605e-05, |
| "loss": 0.2777, |
| "step": 2081 |
| }, |
| { |
| "epoch": 2.342149690489589, |
| "grad_norm": 0.2482625923726943, |
| "learning_rate": 1.214017521902378e-05, |
| "loss": 0.2833, |
| "step": 2082 |
| }, |
| { |
| "epoch": 2.3432751828925156, |
| "grad_norm": 0.22738968926541633, |
| "learning_rate": 1.2119315811430957e-05, |
| "loss": 0.2709, |
| "step": 2083 |
| }, |
| { |
| "epoch": 2.3444006752954416, |
| "grad_norm": 0.25147592555620085, |
| "learning_rate": 1.2098456403838132e-05, |
| "loss": 0.3008, |
| "step": 2084 |
| }, |
| { |
| "epoch": 2.345526167698368, |
| "grad_norm": 0.22363924741115862, |
| "learning_rate": 1.2077596996245307e-05, |
| "loss": 0.2872, |
| "step": 2085 |
| }, |
| { |
| "epoch": 2.346651660101294, |
| "grad_norm": 0.23011558044098404, |
| "learning_rate": 1.2056737588652483e-05, |
| "loss": 0.2901, |
| "step": 2086 |
| }, |
| { |
| "epoch": 2.3477771525042206, |
| "grad_norm": 0.232959308790496, |
| "learning_rate": 1.2035878181059658e-05, |
| "loss": 0.2859, |
| "step": 2087 |
| }, |
| { |
| "epoch": 2.348902644907147, |
| "grad_norm": 0.24124633231018813, |
| "learning_rate": 1.2015018773466833e-05, |
| "loss": 0.2946, |
| "step": 2088 |
| }, |
| { |
| "epoch": 2.350028137310073, |
| "grad_norm": 0.23315070454396, |
| "learning_rate": 1.199415936587401e-05, |
| "loss": 0.2719, |
| "step": 2089 |
| }, |
| { |
| "epoch": 2.3511536297129996, |
| "grad_norm": 0.21247783763819528, |
| "learning_rate": 1.1973299958281186e-05, |
| "loss": 0.28, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.3522791221159256, |
| "grad_norm": 0.23387492479149327, |
| "learning_rate": 1.195244055068836e-05, |
| "loss": 0.2792, |
| "step": 2091 |
| }, |
| { |
| "epoch": 2.353404614518852, |
| "grad_norm": 0.22205981665359048, |
| "learning_rate": 1.1931581143095536e-05, |
| "loss": 0.2869, |
| "step": 2092 |
| }, |
| { |
| "epoch": 2.354530106921778, |
| "grad_norm": 0.2407814917985092, |
| "learning_rate": 1.1910721735502713e-05, |
| "loss": 0.2837, |
| "step": 2093 |
| }, |
| { |
| "epoch": 2.3556555993247046, |
| "grad_norm": 0.22636696159410108, |
| "learning_rate": 1.1889862327909888e-05, |
| "loss": 0.2717, |
| "step": 2094 |
| }, |
| { |
| "epoch": 2.3567810917276306, |
| "grad_norm": 0.20010784358214667, |
| "learning_rate": 1.1869002920317064e-05, |
| "loss": 0.2638, |
| "step": 2095 |
| }, |
| { |
| "epoch": 2.357906584130557, |
| "grad_norm": 0.2302624047508185, |
| "learning_rate": 1.184814351272424e-05, |
| "loss": 0.2889, |
| "step": 2096 |
| }, |
| { |
| "epoch": 2.3590320765334836, |
| "grad_norm": 0.21841431009246395, |
| "learning_rate": 1.1827284105131414e-05, |
| "loss": 0.2836, |
| "step": 2097 |
| }, |
| { |
| "epoch": 2.3601575689364096, |
| "grad_norm": 0.21162560341411857, |
| "learning_rate": 1.180642469753859e-05, |
| "loss": 0.2744, |
| "step": 2098 |
| }, |
| { |
| "epoch": 2.361283061339336, |
| "grad_norm": 0.23437617625703946, |
| "learning_rate": 1.1785565289945767e-05, |
| "loss": 0.2897, |
| "step": 2099 |
| }, |
| { |
| "epoch": 2.362408553742262, |
| "grad_norm": 0.2443861444498022, |
| "learning_rate": 1.1764705882352942e-05, |
| "loss": 0.2773, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.3635340461451886, |
| "grad_norm": 0.20195447071682132, |
| "learning_rate": 1.1743846474760117e-05, |
| "loss": 0.2852, |
| "step": 2101 |
| }, |
| { |
| "epoch": 2.3646595385481146, |
| "grad_norm": 0.22050201926310495, |
| "learning_rate": 1.1722987067167292e-05, |
| "loss": 0.2811, |
| "step": 2102 |
| }, |
| { |
| "epoch": 2.365785030951041, |
| "grad_norm": 0.24700086215612232, |
| "learning_rate": 1.170212765957447e-05, |
| "loss": 0.2944, |
| "step": 2103 |
| }, |
| { |
| "epoch": 2.366910523353967, |
| "grad_norm": 0.21968358349344858, |
| "learning_rate": 1.1681268251981645e-05, |
| "loss": 0.2876, |
| "step": 2104 |
| }, |
| { |
| "epoch": 2.3680360157568936, |
| "grad_norm": 0.22004217949350546, |
| "learning_rate": 1.166040884438882e-05, |
| "loss": 0.298, |
| "step": 2105 |
| }, |
| { |
| "epoch": 2.36916150815982, |
| "grad_norm": 0.25166613532562226, |
| "learning_rate": 1.1639549436795997e-05, |
| "loss": 0.3039, |
| "step": 2106 |
| }, |
| { |
| "epoch": 2.370287000562746, |
| "grad_norm": 0.2470279532584483, |
| "learning_rate": 1.161869002920317e-05, |
| "loss": 0.2905, |
| "step": 2107 |
| }, |
| { |
| "epoch": 2.3714124929656726, |
| "grad_norm": 0.22645956564251657, |
| "learning_rate": 1.1597830621610346e-05, |
| "loss": 0.2857, |
| "step": 2108 |
| }, |
| { |
| "epoch": 2.3725379853685986, |
| "grad_norm": 0.236072091244946, |
| "learning_rate": 1.1576971214017523e-05, |
| "loss": 0.2876, |
| "step": 2109 |
| }, |
| { |
| "epoch": 2.373663477771525, |
| "grad_norm": 0.22344125624637598, |
| "learning_rate": 1.1556111806424698e-05, |
| "loss": 0.2871, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.3747889701744516, |
| "grad_norm": 0.2740686740181796, |
| "learning_rate": 1.1535252398831873e-05, |
| "loss": 0.3108, |
| "step": 2111 |
| }, |
| { |
| "epoch": 2.3759144625773776, |
| "grad_norm": 0.2633526483901452, |
| "learning_rate": 1.1514392991239049e-05, |
| "loss": 0.2776, |
| "step": 2112 |
| }, |
| { |
| "epoch": 2.377039954980304, |
| "grad_norm": 0.23782573779211985, |
| "learning_rate": 1.1493533583646226e-05, |
| "loss": 0.2882, |
| "step": 2113 |
| }, |
| { |
| "epoch": 2.37816544738323, |
| "grad_norm": 0.20792260803969095, |
| "learning_rate": 1.1472674176053401e-05, |
| "loss": 0.2711, |
| "step": 2114 |
| }, |
| { |
| "epoch": 2.3792909397861566, |
| "grad_norm": 0.27657171997481406, |
| "learning_rate": 1.1451814768460576e-05, |
| "loss": 0.2866, |
| "step": 2115 |
| }, |
| { |
| "epoch": 2.3804164321890826, |
| "grad_norm": 0.23403563768428892, |
| "learning_rate": 1.1430955360867753e-05, |
| "loss": 0.2936, |
| "step": 2116 |
| }, |
| { |
| "epoch": 2.381541924592009, |
| "grad_norm": 0.2268435928373139, |
| "learning_rate": 1.1410095953274927e-05, |
| "loss": 0.2856, |
| "step": 2117 |
| }, |
| { |
| "epoch": 2.382667416994935, |
| "grad_norm": 0.2547588426300782, |
| "learning_rate": 1.1389236545682102e-05, |
| "loss": 0.2825, |
| "step": 2118 |
| }, |
| { |
| "epoch": 2.3837929093978616, |
| "grad_norm": 0.2251742508238687, |
| "learning_rate": 1.1368377138089279e-05, |
| "loss": 0.27, |
| "step": 2119 |
| }, |
| { |
| "epoch": 2.384918401800788, |
| "grad_norm": 0.23999465674072157, |
| "learning_rate": 1.1347517730496454e-05, |
| "loss": 0.3047, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.386043894203714, |
| "grad_norm": 0.23662614777077606, |
| "learning_rate": 1.132665832290363e-05, |
| "loss": 0.2988, |
| "step": 2121 |
| }, |
| { |
| "epoch": 2.3871693866066406, |
| "grad_norm": 0.2186602406031677, |
| "learning_rate": 1.1305798915310805e-05, |
| "loss": 0.2728, |
| "step": 2122 |
| }, |
| { |
| "epoch": 2.3882948790095666, |
| "grad_norm": 0.23328788430189215, |
| "learning_rate": 1.1284939507717982e-05, |
| "loss": 0.2899, |
| "step": 2123 |
| }, |
| { |
| "epoch": 2.389420371412493, |
| "grad_norm": 0.22192987090662689, |
| "learning_rate": 1.1264080100125157e-05, |
| "loss": 0.2988, |
| "step": 2124 |
| }, |
| { |
| "epoch": 2.390545863815419, |
| "grad_norm": 0.2250119571074059, |
| "learning_rate": 1.1243220692532333e-05, |
| "loss": 0.2956, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.3916713562183456, |
| "grad_norm": 0.23070732043801884, |
| "learning_rate": 1.122236128493951e-05, |
| "loss": 0.2902, |
| "step": 2126 |
| }, |
| { |
| "epoch": 2.3927968486212716, |
| "grad_norm": 0.22518386141677244, |
| "learning_rate": 1.1201501877346685e-05, |
| "loss": 0.2659, |
| "step": 2127 |
| }, |
| { |
| "epoch": 2.393922341024198, |
| "grad_norm": 0.21584313221933796, |
| "learning_rate": 1.1180642469753858e-05, |
| "loss": 0.288, |
| "step": 2128 |
| }, |
| { |
| "epoch": 2.3950478334271246, |
| "grad_norm": 0.24985482700142989, |
| "learning_rate": 1.1159783062161035e-05, |
| "loss": 0.2874, |
| "step": 2129 |
| }, |
| { |
| "epoch": 2.3961733258300506, |
| "grad_norm": 0.23472182953400522, |
| "learning_rate": 1.113892365456821e-05, |
| "loss": 0.2797, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.397298818232977, |
| "grad_norm": 0.23998025758676889, |
| "learning_rate": 1.1118064246975386e-05, |
| "loss": 0.2919, |
| "step": 2131 |
| }, |
| { |
| "epoch": 2.398424310635903, |
| "grad_norm": 0.21809256274475072, |
| "learning_rate": 1.1097204839382561e-05, |
| "loss": 0.2758, |
| "step": 2132 |
| }, |
| { |
| "epoch": 2.3995498030388296, |
| "grad_norm": 0.23454882483715764, |
| "learning_rate": 1.1076345431789738e-05, |
| "loss": 0.2952, |
| "step": 2133 |
| }, |
| { |
| "epoch": 2.4006752954417556, |
| "grad_norm": 0.2246557652335286, |
| "learning_rate": 1.1055486024196914e-05, |
| "loss": 0.2981, |
| "step": 2134 |
| }, |
| { |
| "epoch": 2.401800787844682, |
| "grad_norm": 0.25315434629928985, |
| "learning_rate": 1.1034626616604089e-05, |
| "loss": 0.3066, |
| "step": 2135 |
| }, |
| { |
| "epoch": 2.402926280247608, |
| "grad_norm": 0.22665010188162998, |
| "learning_rate": 1.1013767209011266e-05, |
| "loss": 0.2923, |
| "step": 2136 |
| }, |
| { |
| "epoch": 2.4040517726505346, |
| "grad_norm": 0.2501297143991106, |
| "learning_rate": 1.0992907801418441e-05, |
| "loss": 0.2782, |
| "step": 2137 |
| }, |
| { |
| "epoch": 2.405177265053461, |
| "grad_norm": 0.23355470062481642, |
| "learning_rate": 1.0972048393825615e-05, |
| "loss": 0.2958, |
| "step": 2138 |
| }, |
| { |
| "epoch": 2.406302757456387, |
| "grad_norm": 0.22524426706184972, |
| "learning_rate": 1.0951188986232792e-05, |
| "loss": 0.2971, |
| "step": 2139 |
| }, |
| { |
| "epoch": 2.4074282498593136, |
| "grad_norm": 0.2652706460468126, |
| "learning_rate": 1.0930329578639967e-05, |
| "loss": 0.3063, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.4085537422622396, |
| "grad_norm": 0.27147074305958385, |
| "learning_rate": 1.0909470171047142e-05, |
| "loss": 0.2835, |
| "step": 2141 |
| }, |
| { |
| "epoch": 2.409679234665166, |
| "grad_norm": 0.21263510668944327, |
| "learning_rate": 1.0888610763454318e-05, |
| "loss": 0.2759, |
| "step": 2142 |
| }, |
| { |
| "epoch": 2.410804727068092, |
| "grad_norm": 0.2414280469462777, |
| "learning_rate": 1.0867751355861495e-05, |
| "loss": 0.3011, |
| "step": 2143 |
| }, |
| { |
| "epoch": 2.4119302194710186, |
| "grad_norm": 0.2563072903091181, |
| "learning_rate": 1.084689194826867e-05, |
| "loss": 0.3011, |
| "step": 2144 |
| }, |
| { |
| "epoch": 2.4130557118739446, |
| "grad_norm": 0.26752154229648717, |
| "learning_rate": 1.0826032540675845e-05, |
| "loss": 0.3025, |
| "step": 2145 |
| }, |
| { |
| "epoch": 2.414181204276871, |
| "grad_norm": 0.2191490869328681, |
| "learning_rate": 1.0805173133083022e-05, |
| "loss": 0.2838, |
| "step": 2146 |
| }, |
| { |
| "epoch": 2.4153066966797976, |
| "grad_norm": 0.2754295487570369, |
| "learning_rate": 1.0784313725490197e-05, |
| "loss": 0.2745, |
| "step": 2147 |
| }, |
| { |
| "epoch": 2.4164321890827236, |
| "grad_norm": 0.27496282852437165, |
| "learning_rate": 1.0763454317897373e-05, |
| "loss": 0.2949, |
| "step": 2148 |
| }, |
| { |
| "epoch": 2.41755768148565, |
| "grad_norm": 0.200617793351321, |
| "learning_rate": 1.0742594910304548e-05, |
| "loss": 0.2844, |
| "step": 2149 |
| }, |
| { |
| "epoch": 2.418683173888576, |
| "grad_norm": 0.2261161966006784, |
| "learning_rate": 1.0721735502711723e-05, |
| "loss": 0.3074, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.4198086662915026, |
| "grad_norm": 0.2721691004576223, |
| "learning_rate": 1.0700876095118899e-05, |
| "loss": 0.2887, |
| "step": 2151 |
| }, |
| { |
| "epoch": 2.4209341586944286, |
| "grad_norm": 0.2468891901297125, |
| "learning_rate": 1.0680016687526074e-05, |
| "loss": 0.2826, |
| "step": 2152 |
| }, |
| { |
| "epoch": 2.422059651097355, |
| "grad_norm": 0.22981129965663172, |
| "learning_rate": 1.065915727993325e-05, |
| "loss": 0.2982, |
| "step": 2153 |
| }, |
| { |
| "epoch": 2.423185143500281, |
| "grad_norm": 0.23778012142265284, |
| "learning_rate": 1.0638297872340426e-05, |
| "loss": 0.2803, |
| "step": 2154 |
| }, |
| { |
| "epoch": 2.4243106359032076, |
| "grad_norm": 0.2502133834738445, |
| "learning_rate": 1.0617438464747601e-05, |
| "loss": 0.2894, |
| "step": 2155 |
| }, |
| { |
| "epoch": 2.425436128306134, |
| "grad_norm": 0.24021651572443242, |
| "learning_rate": 1.0596579057154778e-05, |
| "loss": 0.2792, |
| "step": 2156 |
| }, |
| { |
| "epoch": 2.42656162070906, |
| "grad_norm": 0.21287174233579118, |
| "learning_rate": 1.0575719649561954e-05, |
| "loss": 0.2653, |
| "step": 2157 |
| }, |
| { |
| "epoch": 2.4276871131119866, |
| "grad_norm": 0.2596480129053586, |
| "learning_rate": 1.0554860241969129e-05, |
| "loss": 0.2812, |
| "step": 2158 |
| }, |
| { |
| "epoch": 2.4288126055149126, |
| "grad_norm": 0.22839461536852768, |
| "learning_rate": 1.0534000834376304e-05, |
| "loss": 0.2887, |
| "step": 2159 |
| }, |
| { |
| "epoch": 2.429938097917839, |
| "grad_norm": 0.25082900774514266, |
| "learning_rate": 1.051314142678348e-05, |
| "loss": 0.2848, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.4310635903207656, |
| "grad_norm": 0.21582263533702323, |
| "learning_rate": 1.0492282019190655e-05, |
| "loss": 0.2848, |
| "step": 2161 |
| }, |
| { |
| "epoch": 2.4321890827236916, |
| "grad_norm": 0.20981491769940364, |
| "learning_rate": 1.047142261159783e-05, |
| "loss": 0.2771, |
| "step": 2162 |
| }, |
| { |
| "epoch": 2.433314575126618, |
| "grad_norm": 0.2478690946929455, |
| "learning_rate": 1.0450563204005007e-05, |
| "loss": 0.2978, |
| "step": 2163 |
| }, |
| { |
| "epoch": 2.434440067529544, |
| "grad_norm": 0.23623868199579823, |
| "learning_rate": 1.0429703796412182e-05, |
| "loss": 0.2868, |
| "step": 2164 |
| }, |
| { |
| "epoch": 2.4355655599324706, |
| "grad_norm": 0.22479499127056093, |
| "learning_rate": 1.0408844388819358e-05, |
| "loss": 0.2864, |
| "step": 2165 |
| }, |
| { |
| "epoch": 2.4366910523353966, |
| "grad_norm": 0.22778228885333549, |
| "learning_rate": 1.0387984981226535e-05, |
| "loss": 0.2703, |
| "step": 2166 |
| }, |
| { |
| "epoch": 2.437816544738323, |
| "grad_norm": 0.24955225194107739, |
| "learning_rate": 1.036712557363371e-05, |
| "loss": 0.2881, |
| "step": 2167 |
| }, |
| { |
| "epoch": 2.438942037141249, |
| "grad_norm": 0.24688296808661256, |
| "learning_rate": 1.0346266166040885e-05, |
| "loss": 0.2892, |
| "step": 2168 |
| }, |
| { |
| "epoch": 2.4400675295441756, |
| "grad_norm": 0.21066675955547629, |
| "learning_rate": 1.032540675844806e-05, |
| "loss": 0.2665, |
| "step": 2169 |
| }, |
| { |
| "epoch": 2.441193021947102, |
| "grad_norm": 0.23152630032898566, |
| "learning_rate": 1.0304547350855236e-05, |
| "loss": 0.2879, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.442318514350028, |
| "grad_norm": 0.23881733868242846, |
| "learning_rate": 1.0283687943262411e-05, |
| "loss": 0.284, |
| "step": 2171 |
| }, |
| { |
| "epoch": 2.4434440067529546, |
| "grad_norm": 0.24727876228693577, |
| "learning_rate": 1.0262828535669586e-05, |
| "loss": 0.2799, |
| "step": 2172 |
| }, |
| { |
| "epoch": 2.4445694991558806, |
| "grad_norm": 0.2237223246325913, |
| "learning_rate": 1.0241969128076763e-05, |
| "loss": 0.2839, |
| "step": 2173 |
| }, |
| { |
| "epoch": 2.445694991558807, |
| "grad_norm": 0.2255880979174184, |
| "learning_rate": 1.0221109720483939e-05, |
| "loss": 0.3039, |
| "step": 2174 |
| }, |
| { |
| "epoch": 2.446820483961733, |
| "grad_norm": 0.2170555923070572, |
| "learning_rate": 1.0200250312891114e-05, |
| "loss": 0.2768, |
| "step": 2175 |
| }, |
| { |
| "epoch": 2.4479459763646596, |
| "grad_norm": 0.20774037005388524, |
| "learning_rate": 1.0179390905298291e-05, |
| "loss": 0.2834, |
| "step": 2176 |
| }, |
| { |
| "epoch": 2.4490714687675856, |
| "grad_norm": 0.2265465766383895, |
| "learning_rate": 1.0158531497705466e-05, |
| "loss": 0.3058, |
| "step": 2177 |
| }, |
| { |
| "epoch": 2.450196961170512, |
| "grad_norm": 0.20568423154158125, |
| "learning_rate": 1.0137672090112642e-05, |
| "loss": 0.2692, |
| "step": 2178 |
| }, |
| { |
| "epoch": 2.4513224535734386, |
| "grad_norm": 0.22834881557663556, |
| "learning_rate": 1.0116812682519817e-05, |
| "loss": 0.2892, |
| "step": 2179 |
| }, |
| { |
| "epoch": 2.4524479459763646, |
| "grad_norm": 0.22028619420108753, |
| "learning_rate": 1.0095953274926992e-05, |
| "loss": 0.2919, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.453573438379291, |
| "grad_norm": 0.2425115217082142, |
| "learning_rate": 1.0075093867334167e-05, |
| "loss": 0.2764, |
| "step": 2181 |
| }, |
| { |
| "epoch": 2.454698930782217, |
| "grad_norm": 0.22791631771168733, |
| "learning_rate": 1.0054234459741343e-05, |
| "loss": 0.267, |
| "step": 2182 |
| }, |
| { |
| "epoch": 2.4558244231851436, |
| "grad_norm": 0.1926906774199996, |
| "learning_rate": 1.003337505214852e-05, |
| "loss": 0.2683, |
| "step": 2183 |
| }, |
| { |
| "epoch": 2.4569499155880696, |
| "grad_norm": 0.23290868443818466, |
| "learning_rate": 1.0012515644555695e-05, |
| "loss": 0.2791, |
| "step": 2184 |
| }, |
| { |
| "epoch": 2.458075407990996, |
| "grad_norm": 0.2968317001207595, |
| "learning_rate": 9.99165623696287e-06, |
| "loss": 0.2959, |
| "step": 2185 |
| }, |
| { |
| "epoch": 2.459200900393922, |
| "grad_norm": 0.250149366010179, |
| "learning_rate": 9.970796829370047e-06, |
| "loss": 0.2968, |
| "step": 2186 |
| }, |
| { |
| "epoch": 2.4603263927968486, |
| "grad_norm": 0.23676392349962846, |
| "learning_rate": 9.949937421777223e-06, |
| "loss": 0.2766, |
| "step": 2187 |
| }, |
| { |
| "epoch": 2.461451885199775, |
| "grad_norm": 0.2968220951755795, |
| "learning_rate": 9.929078014184398e-06, |
| "loss": 0.2858, |
| "step": 2188 |
| }, |
| { |
| "epoch": 2.462577377602701, |
| "grad_norm": 0.24526431357390857, |
| "learning_rate": 9.908218606591573e-06, |
| "loss": 0.2776, |
| "step": 2189 |
| }, |
| { |
| "epoch": 2.4637028700056276, |
| "grad_norm": 0.2072075588604563, |
| "learning_rate": 9.887359198998748e-06, |
| "loss": 0.2786, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.4648283624085536, |
| "grad_norm": 0.24560787407943072, |
| "learning_rate": 9.866499791405924e-06, |
| "loss": 0.3076, |
| "step": 2191 |
| }, |
| { |
| "epoch": 2.46595385481148, |
| "grad_norm": 0.2807855048371902, |
| "learning_rate": 9.845640383813099e-06, |
| "loss": 0.3059, |
| "step": 2192 |
| }, |
| { |
| "epoch": 2.467079347214406, |
| "grad_norm": 0.21339940056568182, |
| "learning_rate": 9.824780976220276e-06, |
| "loss": 0.2835, |
| "step": 2193 |
| }, |
| { |
| "epoch": 2.4682048396173326, |
| "grad_norm": 0.23237003408073176, |
| "learning_rate": 9.803921568627451e-06, |
| "loss": 0.2774, |
| "step": 2194 |
| }, |
| { |
| "epoch": 2.4693303320202586, |
| "grad_norm": 0.22471960654724552, |
| "learning_rate": 9.783062161034627e-06, |
| "loss": 0.3059, |
| "step": 2195 |
| }, |
| { |
| "epoch": 2.470455824423185, |
| "grad_norm": 0.21133212055331363, |
| "learning_rate": 9.762202753441804e-06, |
| "loss": 0.2804, |
| "step": 2196 |
| }, |
| { |
| "epoch": 2.4715813168261116, |
| "grad_norm": 0.22866555875952663, |
| "learning_rate": 9.741343345848979e-06, |
| "loss": 0.2668, |
| "step": 2197 |
| }, |
| { |
| "epoch": 2.4727068092290376, |
| "grad_norm": 0.23022775610838142, |
| "learning_rate": 9.720483938256154e-06, |
| "loss": 0.2941, |
| "step": 2198 |
| }, |
| { |
| "epoch": 2.473832301631964, |
| "grad_norm": 0.24916655338248048, |
| "learning_rate": 9.69962453066333e-06, |
| "loss": 0.2875, |
| "step": 2199 |
| }, |
| { |
| "epoch": 2.47495779403489, |
| "grad_norm": 0.22598137001947038, |
| "learning_rate": 9.678765123070506e-06, |
| "loss": 0.287, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.4760832864378166, |
| "grad_norm": 0.19562266451832722, |
| "learning_rate": 9.65790571547768e-06, |
| "loss": 0.2726, |
| "step": 2201 |
| }, |
| { |
| "epoch": 2.4772087788407426, |
| "grad_norm": 0.22330052278775112, |
| "learning_rate": 9.637046307884855e-06, |
| "loss": 0.2862, |
| "step": 2202 |
| }, |
| { |
| "epoch": 2.478334271243669, |
| "grad_norm": 0.22895521592496432, |
| "learning_rate": 9.616186900292032e-06, |
| "loss": 0.2858, |
| "step": 2203 |
| }, |
| { |
| "epoch": 2.479459763646595, |
| "grad_norm": 0.22023179481636448, |
| "learning_rate": 9.595327492699208e-06, |
| "loss": 0.2768, |
| "step": 2204 |
| }, |
| { |
| "epoch": 2.4805852560495216, |
| "grad_norm": 0.23642223233900708, |
| "learning_rate": 9.574468085106383e-06, |
| "loss": 0.2878, |
| "step": 2205 |
| }, |
| { |
| "epoch": 2.481710748452448, |
| "grad_norm": 0.2391107708431571, |
| "learning_rate": 9.55360867751356e-06, |
| "loss": 0.2879, |
| "step": 2206 |
| }, |
| { |
| "epoch": 2.482836240855374, |
| "grad_norm": 0.24152975198499732, |
| "learning_rate": 9.532749269920735e-06, |
| "loss": 0.2955, |
| "step": 2207 |
| }, |
| { |
| "epoch": 2.4839617332583006, |
| "grad_norm": 0.23299532148669774, |
| "learning_rate": 9.51188986232791e-06, |
| "loss": 0.2962, |
| "step": 2208 |
| }, |
| { |
| "epoch": 2.4850872256612266, |
| "grad_norm": 0.20896130963456966, |
| "learning_rate": 9.491030454735086e-06, |
| "loss": 0.284, |
| "step": 2209 |
| }, |
| { |
| "epoch": 2.486212718064153, |
| "grad_norm": 0.2105385871507124, |
| "learning_rate": 9.470171047142263e-06, |
| "loss": 0.2729, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.4873382104670796, |
| "grad_norm": 0.21484947461149867, |
| "learning_rate": 9.449311639549436e-06, |
| "loss": 0.2926, |
| "step": 2211 |
| }, |
| { |
| "epoch": 2.4884637028700056, |
| "grad_norm": 0.2190385482446419, |
| "learning_rate": 9.428452231956612e-06, |
| "loss": 0.2825, |
| "step": 2212 |
| }, |
| { |
| "epoch": 2.489589195272932, |
| "grad_norm": 0.2142534982080354, |
| "learning_rate": 9.407592824363789e-06, |
| "loss": 0.2751, |
| "step": 2213 |
| }, |
| { |
| "epoch": 2.490714687675858, |
| "grad_norm": 0.21708738862041638, |
| "learning_rate": 9.386733416770964e-06, |
| "loss": 0.2786, |
| "step": 2214 |
| }, |
| { |
| "epoch": 2.4918401800787846, |
| "grad_norm": 0.2181940682924344, |
| "learning_rate": 9.365874009178139e-06, |
| "loss": 0.2891, |
| "step": 2215 |
| }, |
| { |
| "epoch": 2.4929656724817106, |
| "grad_norm": 0.24361785849546538, |
| "learning_rate": 9.345014601585316e-06, |
| "loss": 0.2862, |
| "step": 2216 |
| }, |
| { |
| "epoch": 2.494091164884637, |
| "grad_norm": 0.2074874339468701, |
| "learning_rate": 9.324155193992491e-06, |
| "loss": 0.2779, |
| "step": 2217 |
| }, |
| { |
| "epoch": 2.495216657287563, |
| "grad_norm": 0.22232685525965187, |
| "learning_rate": 9.303295786399667e-06, |
| "loss": 0.2872, |
| "step": 2218 |
| }, |
| { |
| "epoch": 2.4963421496904896, |
| "grad_norm": 0.22940288362612324, |
| "learning_rate": 9.282436378806842e-06, |
| "loss": 0.2675, |
| "step": 2219 |
| }, |
| { |
| "epoch": 2.497467642093416, |
| "grad_norm": 0.22467443084840247, |
| "learning_rate": 9.261576971214019e-06, |
| "loss": 0.2801, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.498593134496342, |
| "grad_norm": 0.23139305058585594, |
| "learning_rate": 9.240717563621194e-06, |
| "loss": 0.2743, |
| "step": 2221 |
| }, |
| { |
| "epoch": 2.4997186268992686, |
| "grad_norm": 0.2360404033010022, |
| "learning_rate": 9.219858156028368e-06, |
| "loss": 0.286, |
| "step": 2222 |
| }, |
| { |
| "epoch": 2.5008441193021946, |
| "grad_norm": 0.20605066820343487, |
| "learning_rate": 9.198998748435545e-06, |
| "loss": 0.2823, |
| "step": 2223 |
| }, |
| { |
| "epoch": 2.501969611705121, |
| "grad_norm": 0.2564735378536905, |
| "learning_rate": 9.17813934084272e-06, |
| "loss": 0.2947, |
| "step": 2224 |
| }, |
| { |
| "epoch": 2.503095104108047, |
| "grad_norm": 0.2320837293470589, |
| "learning_rate": 9.157279933249895e-06, |
| "loss": 0.2768, |
| "step": 2225 |
| }, |
| { |
| "epoch": 2.5042205965109736, |
| "grad_norm": 0.208589920793005, |
| "learning_rate": 9.136420525657072e-06, |
| "loss": 0.29, |
| "step": 2226 |
| }, |
| { |
| "epoch": 2.5053460889138996, |
| "grad_norm": 0.21389293826499295, |
| "learning_rate": 9.115561118064248e-06, |
| "loss": 0.2798, |
| "step": 2227 |
| }, |
| { |
| "epoch": 2.506471581316826, |
| "grad_norm": 0.22046720544274087, |
| "learning_rate": 9.094701710471423e-06, |
| "loss": 0.2937, |
| "step": 2228 |
| }, |
| { |
| "epoch": 2.5075970737197526, |
| "grad_norm": 0.22495729889410385, |
| "learning_rate": 9.073842302878598e-06, |
| "loss": 0.2879, |
| "step": 2229 |
| }, |
| { |
| "epoch": 2.5087225661226786, |
| "grad_norm": 0.20269539252904967, |
| "learning_rate": 9.052982895285775e-06, |
| "loss": 0.2774, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.509848058525605, |
| "grad_norm": 0.1980840443630393, |
| "learning_rate": 9.03212348769295e-06, |
| "loss": 0.281, |
| "step": 2231 |
| }, |
| { |
| "epoch": 2.510973550928531, |
| "grad_norm": 0.22695316930947035, |
| "learning_rate": 9.011264080100124e-06, |
| "loss": 0.2922, |
| "step": 2232 |
| }, |
| { |
| "epoch": 2.5120990433314576, |
| "grad_norm": 0.20934803359715298, |
| "learning_rate": 8.990404672507301e-06, |
| "loss": 0.2794, |
| "step": 2233 |
| }, |
| { |
| "epoch": 2.5132245357343836, |
| "grad_norm": 0.21112109217582253, |
| "learning_rate": 8.969545264914476e-06, |
| "loss": 0.2907, |
| "step": 2234 |
| }, |
| { |
| "epoch": 2.51435002813731, |
| "grad_norm": 0.22573731590530483, |
| "learning_rate": 8.948685857321652e-06, |
| "loss": 0.291, |
| "step": 2235 |
| }, |
| { |
| "epoch": 2.515475520540236, |
| "grad_norm": 0.23892740697159065, |
| "learning_rate": 8.927826449728829e-06, |
| "loss": 0.273, |
| "step": 2236 |
| }, |
| { |
| "epoch": 2.5166010129431626, |
| "grad_norm": 0.22535585510058634, |
| "learning_rate": 8.906967042136004e-06, |
| "loss": 0.2837, |
| "step": 2237 |
| }, |
| { |
| "epoch": 2.517726505346089, |
| "grad_norm": 0.2194038445722204, |
| "learning_rate": 8.88610763454318e-06, |
| "loss": 0.2927, |
| "step": 2238 |
| }, |
| { |
| "epoch": 2.518851997749015, |
| "grad_norm": 0.22905667951438685, |
| "learning_rate": 8.865248226950355e-06, |
| "loss": 0.2789, |
| "step": 2239 |
| }, |
| { |
| "epoch": 2.5199774901519416, |
| "grad_norm": 0.22625912351056832, |
| "learning_rate": 8.844388819357532e-06, |
| "loss": 0.2755, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.5211029825548676, |
| "grad_norm": 0.23804689181224994, |
| "learning_rate": 8.823529411764707e-06, |
| "loss": 0.2792, |
| "step": 2241 |
| }, |
| { |
| "epoch": 2.522228474957794, |
| "grad_norm": 0.2105408688549035, |
| "learning_rate": 8.802670004171882e-06, |
| "loss": 0.2972, |
| "step": 2242 |
| }, |
| { |
| "epoch": 2.52335396736072, |
| "grad_norm": 0.22340033958156802, |
| "learning_rate": 8.781810596579057e-06, |
| "loss": 0.2946, |
| "step": 2243 |
| }, |
| { |
| "epoch": 2.5244794597636466, |
| "grad_norm": 0.2297895889368776, |
| "learning_rate": 8.760951188986233e-06, |
| "loss": 0.2938, |
| "step": 2244 |
| }, |
| { |
| "epoch": 2.5256049521665727, |
| "grad_norm": 0.2259147810494066, |
| "learning_rate": 8.740091781393408e-06, |
| "loss": 0.2813, |
| "step": 2245 |
| }, |
| { |
| "epoch": 2.526730444569499, |
| "grad_norm": 0.23547288393006746, |
| "learning_rate": 8.719232373800585e-06, |
| "loss": 0.2994, |
| "step": 2246 |
| }, |
| { |
| "epoch": 2.5278559369724256, |
| "grad_norm": 0.22543446780315715, |
| "learning_rate": 8.69837296620776e-06, |
| "loss": 0.2846, |
| "step": 2247 |
| }, |
| { |
| "epoch": 2.5289814293753516, |
| "grad_norm": 0.2154532957908738, |
| "learning_rate": 8.677513558614936e-06, |
| "loss": 0.2879, |
| "step": 2248 |
| }, |
| { |
| "epoch": 2.530106921778278, |
| "grad_norm": 0.2351801079174597, |
| "learning_rate": 8.65665415102211e-06, |
| "loss": 0.2765, |
| "step": 2249 |
| }, |
| { |
| "epoch": 2.531232414181204, |
| "grad_norm": 0.21366786894791512, |
| "learning_rate": 8.635794743429288e-06, |
| "loss": 0.2734, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.5323579065841306, |
| "grad_norm": 0.23645349161640047, |
| "learning_rate": 8.614935335836463e-06, |
| "loss": 0.2984, |
| "step": 2251 |
| }, |
| { |
| "epoch": 2.533483398987057, |
| "grad_norm": 0.23434820101602807, |
| "learning_rate": 8.594075928243638e-06, |
| "loss": 0.2968, |
| "step": 2252 |
| }, |
| { |
| "epoch": 2.534608891389983, |
| "grad_norm": 0.23800902126311332, |
| "learning_rate": 8.573216520650814e-06, |
| "loss": 0.2828, |
| "step": 2253 |
| }, |
| { |
| "epoch": 2.535734383792909, |
| "grad_norm": 0.2538132352809376, |
| "learning_rate": 8.552357113057989e-06, |
| "loss": 0.2843, |
| "step": 2254 |
| }, |
| { |
| "epoch": 2.5368598761958356, |
| "grad_norm": 0.21371163966598017, |
| "learning_rate": 8.531497705465164e-06, |
| "loss": 0.2751, |
| "step": 2255 |
| }, |
| { |
| "epoch": 2.537985368598762, |
| "grad_norm": 0.21482253029817228, |
| "learning_rate": 8.510638297872341e-06, |
| "loss": 0.3006, |
| "step": 2256 |
| }, |
| { |
| "epoch": 2.539110861001688, |
| "grad_norm": 0.21834391394392152, |
| "learning_rate": 8.489778890279517e-06, |
| "loss": 0.288, |
| "step": 2257 |
| }, |
| { |
| "epoch": 2.5402363534046146, |
| "grad_norm": 0.2385102842630092, |
| "learning_rate": 8.468919482686692e-06, |
| "loss": 0.2873, |
| "step": 2258 |
| }, |
| { |
| "epoch": 2.5413618458075407, |
| "grad_norm": 0.2496691464287376, |
| "learning_rate": 8.448060075093867e-06, |
| "loss": 0.2868, |
| "step": 2259 |
| }, |
| { |
| "epoch": 2.542487338210467, |
| "grad_norm": 0.2014232955964171, |
| "learning_rate": 8.427200667501044e-06, |
| "loss": 0.2851, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.5436128306133936, |
| "grad_norm": 0.23384968447549695, |
| "learning_rate": 8.40634125990822e-06, |
| "loss": 0.2842, |
| "step": 2261 |
| }, |
| { |
| "epoch": 2.5447383230163196, |
| "grad_norm": 0.21977668018953103, |
| "learning_rate": 8.385481852315395e-06, |
| "loss": 0.2883, |
| "step": 2262 |
| }, |
| { |
| "epoch": 2.5458638154192457, |
| "grad_norm": 0.21776563647468017, |
| "learning_rate": 8.36462244472257e-06, |
| "loss": 0.2856, |
| "step": 2263 |
| }, |
| { |
| "epoch": 2.546989307822172, |
| "grad_norm": 0.2027944392061715, |
| "learning_rate": 8.343763037129745e-06, |
| "loss": 0.2765, |
| "step": 2264 |
| }, |
| { |
| "epoch": 2.5481148002250986, |
| "grad_norm": 0.21029054091165603, |
| "learning_rate": 8.32290362953692e-06, |
| "loss": 0.2767, |
| "step": 2265 |
| }, |
| { |
| "epoch": 2.5492402926280247, |
| "grad_norm": 0.21418622748856342, |
| "learning_rate": 8.302044221944098e-06, |
| "loss": 0.2948, |
| "step": 2266 |
| }, |
| { |
| "epoch": 2.550365785030951, |
| "grad_norm": 0.21907388139154874, |
| "learning_rate": 8.281184814351273e-06, |
| "loss": 0.2736, |
| "step": 2267 |
| }, |
| { |
| "epoch": 2.551491277433877, |
| "grad_norm": 0.21904845452521604, |
| "learning_rate": 8.260325406758448e-06, |
| "loss": 0.3056, |
| "step": 2268 |
| }, |
| { |
| "epoch": 2.5526167698368036, |
| "grad_norm": 0.20459659904962244, |
| "learning_rate": 8.239465999165623e-06, |
| "loss": 0.2807, |
| "step": 2269 |
| }, |
| { |
| "epoch": 2.55374226223973, |
| "grad_norm": 0.20176624512330674, |
| "learning_rate": 8.2186065915728e-06, |
| "loss": 0.282, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.554867754642656, |
| "grad_norm": 0.2171053854970344, |
| "learning_rate": 8.197747183979976e-06, |
| "loss": 0.2867, |
| "step": 2271 |
| }, |
| { |
| "epoch": 2.555993247045582, |
| "grad_norm": 0.21608909264471945, |
| "learning_rate": 8.176887776387151e-06, |
| "loss": 0.2843, |
| "step": 2272 |
| }, |
| { |
| "epoch": 2.5571187394485086, |
| "grad_norm": 0.22363745774157207, |
| "learning_rate": 8.156028368794328e-06, |
| "loss": 0.292, |
| "step": 2273 |
| }, |
| { |
| "epoch": 2.558244231851435, |
| "grad_norm": 0.1967157457122503, |
| "learning_rate": 8.135168961201502e-06, |
| "loss": 0.2785, |
| "step": 2274 |
| }, |
| { |
| "epoch": 2.559369724254361, |
| "grad_norm": 0.21025592482731642, |
| "learning_rate": 8.114309553608677e-06, |
| "loss": 0.2758, |
| "step": 2275 |
| }, |
| { |
| "epoch": 2.5604952166572876, |
| "grad_norm": 0.21847518826316134, |
| "learning_rate": 8.093450146015854e-06, |
| "loss": 0.2969, |
| "step": 2276 |
| }, |
| { |
| "epoch": 2.5616207090602137, |
| "grad_norm": 0.22602160924202305, |
| "learning_rate": 8.072590738423029e-06, |
| "loss": 0.2883, |
| "step": 2277 |
| }, |
| { |
| "epoch": 2.56274620146314, |
| "grad_norm": 0.20256712231044452, |
| "learning_rate": 8.051731330830204e-06, |
| "loss": 0.2695, |
| "step": 2278 |
| }, |
| { |
| "epoch": 2.5638716938660666, |
| "grad_norm": 0.20681151204540096, |
| "learning_rate": 8.030871923237381e-06, |
| "loss": 0.2654, |
| "step": 2279 |
| }, |
| { |
| "epoch": 2.5649971862689926, |
| "grad_norm": 0.23344582380587078, |
| "learning_rate": 8.010012515644557e-06, |
| "loss": 0.2986, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.566122678671919, |
| "grad_norm": 0.22256302367590555, |
| "learning_rate": 7.989153108051732e-06, |
| "loss": 0.2833, |
| "step": 2281 |
| }, |
| { |
| "epoch": 2.567248171074845, |
| "grad_norm": 0.20447744073654678, |
| "learning_rate": 7.968293700458907e-06, |
| "loss": 0.2859, |
| "step": 2282 |
| }, |
| { |
| "epoch": 2.5683736634777716, |
| "grad_norm": 0.20565529180207448, |
| "learning_rate": 7.947434292866084e-06, |
| "loss": 0.2742, |
| "step": 2283 |
| }, |
| { |
| "epoch": 2.5694991558806977, |
| "grad_norm": 0.21066765721313158, |
| "learning_rate": 7.926574885273258e-06, |
| "loss": 0.2944, |
| "step": 2284 |
| }, |
| { |
| "epoch": 2.570624648283624, |
| "grad_norm": 0.21517637060390432, |
| "learning_rate": 7.905715477680433e-06, |
| "loss": 0.2875, |
| "step": 2285 |
| }, |
| { |
| "epoch": 2.57175014068655, |
| "grad_norm": 0.21947956446898098, |
| "learning_rate": 7.88485607008761e-06, |
| "loss": 0.3004, |
| "step": 2286 |
| }, |
| { |
| "epoch": 2.5728756330894766, |
| "grad_norm": 0.22114557622949502, |
| "learning_rate": 7.863996662494785e-06, |
| "loss": 0.2976, |
| "step": 2287 |
| }, |
| { |
| "epoch": 2.574001125492403, |
| "grad_norm": 0.22379469537623312, |
| "learning_rate": 7.84313725490196e-06, |
| "loss": 0.2878, |
| "step": 2288 |
| }, |
| { |
| "epoch": 2.575126617895329, |
| "grad_norm": 0.2071839477449149, |
| "learning_rate": 7.822277847309138e-06, |
| "loss": 0.2795, |
| "step": 2289 |
| }, |
| { |
| "epoch": 2.5762521102982556, |
| "grad_norm": 0.2237931852947739, |
| "learning_rate": 7.801418439716313e-06, |
| "loss": 0.2971, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.5773776027011817, |
| "grad_norm": 0.21266520141625195, |
| "learning_rate": 7.780559032123488e-06, |
| "loss": 0.2853, |
| "step": 2291 |
| }, |
| { |
| "epoch": 2.578503095104108, |
| "grad_norm": 0.2486160020515366, |
| "learning_rate": 7.759699624530664e-06, |
| "loss": 0.2961, |
| "step": 2292 |
| }, |
| { |
| "epoch": 2.579628587507034, |
| "grad_norm": 0.2068308805691666, |
| "learning_rate": 7.73884021693784e-06, |
| "loss": 0.2691, |
| "step": 2293 |
| }, |
| { |
| "epoch": 2.5807540799099606, |
| "grad_norm": 0.2230851463060974, |
| "learning_rate": 7.717980809345016e-06, |
| "loss": 0.2995, |
| "step": 2294 |
| }, |
| { |
| "epoch": 2.5818795723128867, |
| "grad_norm": 0.2374977031933618, |
| "learning_rate": 7.69712140175219e-06, |
| "loss": 0.283, |
| "step": 2295 |
| }, |
| { |
| "epoch": 2.583005064715813, |
| "grad_norm": 0.24062860705542086, |
| "learning_rate": 7.676261994159366e-06, |
| "loss": 0.2957, |
| "step": 2296 |
| }, |
| { |
| "epoch": 2.5841305571187396, |
| "grad_norm": 0.20537260389777368, |
| "learning_rate": 7.655402586566542e-06, |
| "loss": 0.2921, |
| "step": 2297 |
| }, |
| { |
| "epoch": 2.5852560495216657, |
| "grad_norm": 0.21853998967769137, |
| "learning_rate": 7.634543178973717e-06, |
| "loss": 0.2894, |
| "step": 2298 |
| }, |
| { |
| "epoch": 2.586381541924592, |
| "grad_norm": 0.21880735610653707, |
| "learning_rate": 7.613683771380893e-06, |
| "loss": 0.2791, |
| "step": 2299 |
| }, |
| { |
| "epoch": 2.587507034327518, |
| "grad_norm": 0.22402757654717384, |
| "learning_rate": 7.592824363788069e-06, |
| "loss": 0.289, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.5886325267304446, |
| "grad_norm": 0.2147892961394563, |
| "learning_rate": 7.5719649561952445e-06, |
| "loss": 0.2812, |
| "step": 2301 |
| }, |
| { |
| "epoch": 2.589758019133371, |
| "grad_norm": 0.22876144329979556, |
| "learning_rate": 7.551105548602421e-06, |
| "loss": 0.2933, |
| "step": 2302 |
| }, |
| { |
| "epoch": 2.590883511536297, |
| "grad_norm": 0.23442708833949216, |
| "learning_rate": 7.530246141009596e-06, |
| "loss": 0.2814, |
| "step": 2303 |
| }, |
| { |
| "epoch": 2.592009003939223, |
| "grad_norm": 0.23484614963727998, |
| "learning_rate": 7.509386733416772e-06, |
| "loss": 0.3144, |
| "step": 2304 |
| }, |
| { |
| "epoch": 2.5931344963421497, |
| "grad_norm": 0.20921707796315442, |
| "learning_rate": 7.4885273258239465e-06, |
| "loss": 0.286, |
| "step": 2305 |
| }, |
| { |
| "epoch": 2.594259988745076, |
| "grad_norm": 0.22775240735379326, |
| "learning_rate": 7.467667918231122e-06, |
| "loss": 0.2788, |
| "step": 2306 |
| }, |
| { |
| "epoch": 2.595385481148002, |
| "grad_norm": 0.22230000059940203, |
| "learning_rate": 7.446808510638298e-06, |
| "loss": 0.2815, |
| "step": 2307 |
| }, |
| { |
| "epoch": 2.5965109735509286, |
| "grad_norm": 0.24545298078462746, |
| "learning_rate": 7.425949103045473e-06, |
| "loss": 0.2735, |
| "step": 2308 |
| }, |
| { |
| "epoch": 2.5976364659538547, |
| "grad_norm": 0.19625632990047406, |
| "learning_rate": 7.405089695452649e-06, |
| "loss": 0.2731, |
| "step": 2309 |
| }, |
| { |
| "epoch": 2.598761958356781, |
| "grad_norm": 0.20900090173879718, |
| "learning_rate": 7.3842302878598255e-06, |
| "loss": 0.2721, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.5998874507597076, |
| "grad_norm": 0.2123621289927944, |
| "learning_rate": 7.363370880267001e-06, |
| "loss": 0.2698, |
| "step": 2311 |
| }, |
| { |
| "epoch": 2.6010129431626337, |
| "grad_norm": 0.21369756285267333, |
| "learning_rate": 7.342511472674177e-06, |
| "loss": 0.2834, |
| "step": 2312 |
| }, |
| { |
| "epoch": 2.6021384355655597, |
| "grad_norm": 0.22793059698710658, |
| "learning_rate": 7.321652065081352e-06, |
| "loss": 0.2851, |
| "step": 2313 |
| }, |
| { |
| "epoch": 2.603263927968486, |
| "grad_norm": 0.2134184284204459, |
| "learning_rate": 7.300792657488528e-06, |
| "loss": 0.2811, |
| "step": 2314 |
| }, |
| { |
| "epoch": 2.6043894203714126, |
| "grad_norm": 0.21325834093200643, |
| "learning_rate": 7.279933249895703e-06, |
| "loss": 0.2936, |
| "step": 2315 |
| }, |
| { |
| "epoch": 2.6055149127743387, |
| "grad_norm": 0.1991068712411994, |
| "learning_rate": 7.259073842302878e-06, |
| "loss": 0.2834, |
| "step": 2316 |
| }, |
| { |
| "epoch": 2.606640405177265, |
| "grad_norm": 0.22901278666536629, |
| "learning_rate": 7.238214434710054e-06, |
| "loss": 0.3015, |
| "step": 2317 |
| }, |
| { |
| "epoch": 2.607765897580191, |
| "grad_norm": 0.21881134986820416, |
| "learning_rate": 7.21735502711723e-06, |
| "loss": 0.2876, |
| "step": 2318 |
| }, |
| { |
| "epoch": 2.6088913899831176, |
| "grad_norm": 0.22029025156059676, |
| "learning_rate": 7.196495619524406e-06, |
| "loss": 0.2988, |
| "step": 2319 |
| }, |
| { |
| "epoch": 2.610016882386044, |
| "grad_norm": 0.21007112294863065, |
| "learning_rate": 7.175636211931582e-06, |
| "loss": 0.2763, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.61114237478897, |
| "grad_norm": 0.2126401817051627, |
| "learning_rate": 7.154776804338757e-06, |
| "loss": 0.2805, |
| "step": 2321 |
| }, |
| { |
| "epoch": 2.612267867191896, |
| "grad_norm": 0.20852511391858303, |
| "learning_rate": 7.133917396745933e-06, |
| "loss": 0.2936, |
| "step": 2322 |
| }, |
| { |
| "epoch": 2.6133933595948227, |
| "grad_norm": 0.21781244059761962, |
| "learning_rate": 7.1130579891531085e-06, |
| "loss": 0.2892, |
| "step": 2323 |
| }, |
| { |
| "epoch": 2.614518851997749, |
| "grad_norm": 0.22501438470662116, |
| "learning_rate": 7.092198581560285e-06, |
| "loss": 0.2839, |
| "step": 2324 |
| }, |
| { |
| "epoch": 2.615644344400675, |
| "grad_norm": 0.20568012937631386, |
| "learning_rate": 7.07133917396746e-06, |
| "loss": 0.2927, |
| "step": 2325 |
| }, |
| { |
| "epoch": 2.6167698368036016, |
| "grad_norm": 0.21222804494470973, |
| "learning_rate": 7.050479766374634e-06, |
| "loss": 0.281, |
| "step": 2326 |
| }, |
| { |
| "epoch": 2.6178953292065277, |
| "grad_norm": 0.20938841222313492, |
| "learning_rate": 7.0296203587818105e-06, |
| "loss": 0.2845, |
| "step": 2327 |
| }, |
| { |
| "epoch": 2.619020821609454, |
| "grad_norm": 0.21620523521239354, |
| "learning_rate": 7.008760951188987e-06, |
| "loss": 0.2801, |
| "step": 2328 |
| }, |
| { |
| "epoch": 2.6201463140123806, |
| "grad_norm": 0.2506118426158015, |
| "learning_rate": 6.987901543596162e-06, |
| "loss": 0.2954, |
| "step": 2329 |
| }, |
| { |
| "epoch": 2.6212718064153067, |
| "grad_norm": 0.1973550955823624, |
| "learning_rate": 6.967042136003338e-06, |
| "loss": 0.2686, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.622397298818233, |
| "grad_norm": 0.2066937107804017, |
| "learning_rate": 6.946182728410513e-06, |
| "loss": 0.2774, |
| "step": 2331 |
| }, |
| { |
| "epoch": 2.623522791221159, |
| "grad_norm": 0.2202250524273311, |
| "learning_rate": 6.9253233208176895e-06, |
| "loss": 0.3068, |
| "step": 2332 |
| }, |
| { |
| "epoch": 2.6246482836240856, |
| "grad_norm": 0.21755861209547087, |
| "learning_rate": 6.904463913224865e-06, |
| "loss": 0.2723, |
| "step": 2333 |
| }, |
| { |
| "epoch": 2.6257737760270117, |
| "grad_norm": 0.21926633058957373, |
| "learning_rate": 6.883604505632041e-06, |
| "loss": 0.2903, |
| "step": 2334 |
| }, |
| { |
| "epoch": 2.626899268429938, |
| "grad_norm": 0.2130377637928427, |
| "learning_rate": 6.862745098039216e-06, |
| "loss": 0.2804, |
| "step": 2335 |
| }, |
| { |
| "epoch": 2.628024760832864, |
| "grad_norm": 0.19225627811370669, |
| "learning_rate": 6.841885690446391e-06, |
| "loss": 0.2887, |
| "step": 2336 |
| }, |
| { |
| "epoch": 2.6291502532357907, |
| "grad_norm": 0.20057254466754687, |
| "learning_rate": 6.821026282853567e-06, |
| "loss": 0.2837, |
| "step": 2337 |
| }, |
| { |
| "epoch": 2.630275745638717, |
| "grad_norm": 0.23011758988414296, |
| "learning_rate": 6.800166875260743e-06, |
| "loss": 0.2931, |
| "step": 2338 |
| }, |
| { |
| "epoch": 2.631401238041643, |
| "grad_norm": 0.23666344897934585, |
| "learning_rate": 6.779307467667918e-06, |
| "loss": 0.2782, |
| "step": 2339 |
| }, |
| { |
| "epoch": 2.6325267304445696, |
| "grad_norm": 0.2147014731643971, |
| "learning_rate": 6.758448060075094e-06, |
| "loss": 0.2912, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.6336522228474957, |
| "grad_norm": 0.21234094073131748, |
| "learning_rate": 6.73758865248227e-06, |
| "loss": 0.2914, |
| "step": 2341 |
| }, |
| { |
| "epoch": 2.634777715250422, |
| "grad_norm": 0.20060222937545014, |
| "learning_rate": 6.716729244889446e-06, |
| "loss": 0.2766, |
| "step": 2342 |
| }, |
| { |
| "epoch": 2.635903207653348, |
| "grad_norm": 0.21917036852395436, |
| "learning_rate": 6.695869837296621e-06, |
| "loss": 0.3013, |
| "step": 2343 |
| }, |
| { |
| "epoch": 2.6370287000562747, |
| "grad_norm": 0.20864615144591028, |
| "learning_rate": 6.675010429703797e-06, |
| "loss": 0.2792, |
| "step": 2344 |
| }, |
| { |
| "epoch": 2.6381541924592007, |
| "grad_norm": 0.2192912221143167, |
| "learning_rate": 6.6541510221109725e-06, |
| "loss": 0.284, |
| "step": 2345 |
| }, |
| { |
| "epoch": 2.639279684862127, |
| "grad_norm": 0.2039309149630558, |
| "learning_rate": 6.633291614518149e-06, |
| "loss": 0.2948, |
| "step": 2346 |
| }, |
| { |
| "epoch": 2.6404051772650536, |
| "grad_norm": 0.22259191163490286, |
| "learning_rate": 6.612432206925323e-06, |
| "loss": 0.2737, |
| "step": 2347 |
| }, |
| { |
| "epoch": 2.6415306696679797, |
| "grad_norm": 0.2419705952514684, |
| "learning_rate": 6.591572799332499e-06, |
| "loss": 0.2894, |
| "step": 2348 |
| }, |
| { |
| "epoch": 2.642656162070906, |
| "grad_norm": 0.20985587856472956, |
| "learning_rate": 6.5707133917396745e-06, |
| "loss": 0.2947, |
| "step": 2349 |
| }, |
| { |
| "epoch": 2.643781654473832, |
| "grad_norm": 0.20042601124344012, |
| "learning_rate": 6.549853984146851e-06, |
| "loss": 0.271, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.6449071468767587, |
| "grad_norm": 0.20416712565695233, |
| "learning_rate": 6.528994576554026e-06, |
| "loss": 0.2743, |
| "step": 2351 |
| }, |
| { |
| "epoch": 2.646032639279685, |
| "grad_norm": 0.2184086174145368, |
| "learning_rate": 6.508135168961202e-06, |
| "loss": 0.3015, |
| "step": 2352 |
| }, |
| { |
| "epoch": 2.647158131682611, |
| "grad_norm": 0.24131101578961572, |
| "learning_rate": 6.487275761368377e-06, |
| "loss": 0.2775, |
| "step": 2353 |
| }, |
| { |
| "epoch": 2.648283624085537, |
| "grad_norm": 0.22701304755475593, |
| "learning_rate": 6.4664163537755535e-06, |
| "loss": 0.2857, |
| "step": 2354 |
| }, |
| { |
| "epoch": 2.6494091164884637, |
| "grad_norm": 0.20712379248467747, |
| "learning_rate": 6.445556946182729e-06, |
| "loss": 0.2758, |
| "step": 2355 |
| }, |
| { |
| "epoch": 2.65053460889139, |
| "grad_norm": 0.23341164321770264, |
| "learning_rate": 6.424697538589905e-06, |
| "loss": 0.268, |
| "step": 2356 |
| }, |
| { |
| "epoch": 2.651660101294316, |
| "grad_norm": 0.24310923952152994, |
| "learning_rate": 6.403838130997079e-06, |
| "loss": 0.2833, |
| "step": 2357 |
| }, |
| { |
| "epoch": 2.6527855936972426, |
| "grad_norm": 0.229839416220484, |
| "learning_rate": 6.3829787234042555e-06, |
| "loss": 0.2889, |
| "step": 2358 |
| }, |
| { |
| "epoch": 2.6539110861001687, |
| "grad_norm": 0.23215272147883, |
| "learning_rate": 6.362119315811431e-06, |
| "loss": 0.2975, |
| "step": 2359 |
| }, |
| { |
| "epoch": 2.655036578503095, |
| "grad_norm": 0.24954803338960216, |
| "learning_rate": 6.341259908218607e-06, |
| "loss": 0.2946, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.6561620709060216, |
| "grad_norm": 0.23026522507576283, |
| "learning_rate": 6.320400500625782e-06, |
| "loss": 0.268, |
| "step": 2361 |
| }, |
| { |
| "epoch": 2.6572875633089477, |
| "grad_norm": 0.23021270773997743, |
| "learning_rate": 6.299541093032958e-06, |
| "loss": 0.261, |
| "step": 2362 |
| }, |
| { |
| "epoch": 2.6584130557118737, |
| "grad_norm": 0.21115861014586346, |
| "learning_rate": 6.278681685440134e-06, |
| "loss": 0.2771, |
| "step": 2363 |
| }, |
| { |
| "epoch": 2.6595385481148, |
| "grad_norm": 0.2405585243153947, |
| "learning_rate": 6.25782227784731e-06, |
| "loss": 0.2806, |
| "step": 2364 |
| }, |
| { |
| "epoch": 2.6606640405177266, |
| "grad_norm": 0.2497609269658003, |
| "learning_rate": 6.236962870254485e-06, |
| "loss": 0.2874, |
| "step": 2365 |
| }, |
| { |
| "epoch": 2.6617895329206527, |
| "grad_norm": 0.22645791008309762, |
| "learning_rate": 6.21610346266166e-06, |
| "loss": 0.3004, |
| "step": 2366 |
| }, |
| { |
| "epoch": 2.662915025323579, |
| "grad_norm": 0.2197914591989606, |
| "learning_rate": 6.1952440550688365e-06, |
| "loss": 0.2794, |
| "step": 2367 |
| }, |
| { |
| "epoch": 2.664040517726505, |
| "grad_norm": 0.22234883908095063, |
| "learning_rate": 6.174384647476012e-06, |
| "loss": 0.3004, |
| "step": 2368 |
| }, |
| { |
| "epoch": 2.6651660101294317, |
| "grad_norm": 0.24165293762861514, |
| "learning_rate": 6.153525239883188e-06, |
| "loss": 0.2822, |
| "step": 2369 |
| }, |
| { |
| "epoch": 2.666291502532358, |
| "grad_norm": 0.2552276571924829, |
| "learning_rate": 6.132665832290363e-06, |
| "loss": 0.2909, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.667416994935284, |
| "grad_norm": 0.21066092081346655, |
| "learning_rate": 6.1118064246975385e-06, |
| "loss": 0.2798, |
| "step": 2371 |
| }, |
| { |
| "epoch": 2.66854248733821, |
| "grad_norm": 0.2142596843222076, |
| "learning_rate": 6.090947017104715e-06, |
| "loss": 0.2776, |
| "step": 2372 |
| }, |
| { |
| "epoch": 2.6696679797411367, |
| "grad_norm": 0.24551341038876937, |
| "learning_rate": 6.07008760951189e-06, |
| "loss": 0.2865, |
| "step": 2373 |
| }, |
| { |
| "epoch": 2.670793472144063, |
| "grad_norm": 0.2340361094417635, |
| "learning_rate": 6.049228201919066e-06, |
| "loss": 0.2904, |
| "step": 2374 |
| }, |
| { |
| "epoch": 2.671918964546989, |
| "grad_norm": 0.21307302351051388, |
| "learning_rate": 6.028368794326241e-06, |
| "loss": 0.2937, |
| "step": 2375 |
| }, |
| { |
| "epoch": 2.6730444569499157, |
| "grad_norm": 0.2512900946420438, |
| "learning_rate": 6.007509386733417e-06, |
| "loss": 0.2842, |
| "step": 2376 |
| }, |
| { |
| "epoch": 2.6741699493528417, |
| "grad_norm": 0.20979466873445987, |
| "learning_rate": 5.986649979140593e-06, |
| "loss": 0.2931, |
| "step": 2377 |
| }, |
| { |
| "epoch": 2.675295441755768, |
| "grad_norm": 0.21119960362679138, |
| "learning_rate": 5.965790571547768e-06, |
| "loss": 0.2842, |
| "step": 2378 |
| }, |
| { |
| "epoch": 2.6764209341586946, |
| "grad_norm": 0.19883138313973867, |
| "learning_rate": 5.944931163954944e-06, |
| "loss": 0.2771, |
| "step": 2379 |
| }, |
| { |
| "epoch": 2.6775464265616207, |
| "grad_norm": 0.19968752507295803, |
| "learning_rate": 5.92407175636212e-06, |
| "loss": 0.2751, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.678671918964547, |
| "grad_norm": 0.22015736000540867, |
| "learning_rate": 5.903212348769295e-06, |
| "loss": 0.2799, |
| "step": 2381 |
| }, |
| { |
| "epoch": 2.679797411367473, |
| "grad_norm": 0.21339223053410786, |
| "learning_rate": 5.882352941176471e-06, |
| "loss": 0.2869, |
| "step": 2382 |
| }, |
| { |
| "epoch": 2.6809229037703997, |
| "grad_norm": 0.19762335590197813, |
| "learning_rate": 5.861493533583646e-06, |
| "loss": 0.2829, |
| "step": 2383 |
| }, |
| { |
| "epoch": 2.6820483961733257, |
| "grad_norm": 0.2074037589352283, |
| "learning_rate": 5.840634125990822e-06, |
| "loss": 0.2909, |
| "step": 2384 |
| }, |
| { |
| "epoch": 2.683173888576252, |
| "grad_norm": 0.2117788165142603, |
| "learning_rate": 5.8197747183979985e-06, |
| "loss": 0.2732, |
| "step": 2385 |
| }, |
| { |
| "epoch": 2.684299380979178, |
| "grad_norm": 0.23282240403764579, |
| "learning_rate": 5.798915310805173e-06, |
| "loss": 0.2939, |
| "step": 2386 |
| }, |
| { |
| "epoch": 2.6854248733821047, |
| "grad_norm": 0.21921536525716026, |
| "learning_rate": 5.778055903212349e-06, |
| "loss": 0.2844, |
| "step": 2387 |
| }, |
| { |
| "epoch": 2.686550365785031, |
| "grad_norm": 0.2104234762422923, |
| "learning_rate": 5.757196495619524e-06, |
| "loss": 0.2893, |
| "step": 2388 |
| }, |
| { |
| "epoch": 2.687675858187957, |
| "grad_norm": 0.20466965592113787, |
| "learning_rate": 5.7363370880267005e-06, |
| "loss": 0.2839, |
| "step": 2389 |
| }, |
| { |
| "epoch": 2.6888013505908837, |
| "grad_norm": 0.21641871937130808, |
| "learning_rate": 5.715477680433877e-06, |
| "loss": 0.2813, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.6899268429938097, |
| "grad_norm": 0.19783335996013016, |
| "learning_rate": 5.694618272841051e-06, |
| "loss": 0.2681, |
| "step": 2391 |
| }, |
| { |
| "epoch": 2.691052335396736, |
| "grad_norm": 0.23211934348643096, |
| "learning_rate": 5.673758865248227e-06, |
| "loss": 0.2737, |
| "step": 2392 |
| }, |
| { |
| "epoch": 2.692177827799662, |
| "grad_norm": 0.21211198195733516, |
| "learning_rate": 5.6528994576554025e-06, |
| "loss": 0.293, |
| "step": 2393 |
| }, |
| { |
| "epoch": 2.6933033202025887, |
| "grad_norm": 0.19763398145044694, |
| "learning_rate": 5.632040050062579e-06, |
| "loss": 0.2669, |
| "step": 2394 |
| }, |
| { |
| "epoch": 2.6944288126055147, |
| "grad_norm": 0.2084177545225768, |
| "learning_rate": 5.611180642469755e-06, |
| "loss": 0.2727, |
| "step": 2395 |
| }, |
| { |
| "epoch": 2.695554305008441, |
| "grad_norm": 0.23874699170463168, |
| "learning_rate": 5.590321234876929e-06, |
| "loss": 0.2871, |
| "step": 2396 |
| }, |
| { |
| "epoch": 2.6966797974113677, |
| "grad_norm": 0.22847881193172173, |
| "learning_rate": 5.569461827284105e-06, |
| "loss": 0.2704, |
| "step": 2397 |
| }, |
| { |
| "epoch": 2.6978052898142937, |
| "grad_norm": 0.21260192114331203, |
| "learning_rate": 5.548602419691281e-06, |
| "loss": 0.2765, |
| "step": 2398 |
| }, |
| { |
| "epoch": 2.69893078221722, |
| "grad_norm": 0.2481199298540293, |
| "learning_rate": 5.527743012098457e-06, |
| "loss": 0.2978, |
| "step": 2399 |
| }, |
| { |
| "epoch": 2.700056274620146, |
| "grad_norm": 0.22837599699280178, |
| "learning_rate": 5.506883604505633e-06, |
| "loss": 0.2778, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.7011817670230727, |
| "grad_norm": 0.20862397407681496, |
| "learning_rate": 5.486024196912807e-06, |
| "loss": 0.282, |
| "step": 2401 |
| }, |
| { |
| "epoch": 2.702307259425999, |
| "grad_norm": 0.21499220301713443, |
| "learning_rate": 5.4651647893199835e-06, |
| "loss": 0.2946, |
| "step": 2402 |
| }, |
| { |
| "epoch": 2.703432751828925, |
| "grad_norm": 0.21876537448943154, |
| "learning_rate": 5.444305381727159e-06, |
| "loss": 0.2999, |
| "step": 2403 |
| }, |
| { |
| "epoch": 2.704558244231851, |
| "grad_norm": 0.19584715347664308, |
| "learning_rate": 5.423445974134335e-06, |
| "loss": 0.2811, |
| "step": 2404 |
| }, |
| { |
| "epoch": 2.7056837366347777, |
| "grad_norm": 0.199638455026977, |
| "learning_rate": 5.402586566541511e-06, |
| "loss": 0.2922, |
| "step": 2405 |
| }, |
| { |
| "epoch": 2.706809229037704, |
| "grad_norm": 0.1959593267413218, |
| "learning_rate": 5.381727158948686e-06, |
| "loss": 0.2803, |
| "step": 2406 |
| }, |
| { |
| "epoch": 2.70793472144063, |
| "grad_norm": 0.2172940746080715, |
| "learning_rate": 5.360867751355862e-06, |
| "loss": 0.2998, |
| "step": 2407 |
| }, |
| { |
| "epoch": 2.7090602138435567, |
| "grad_norm": 0.21226736116643366, |
| "learning_rate": 5.340008343763037e-06, |
| "loss": 0.2793, |
| "step": 2408 |
| }, |
| { |
| "epoch": 2.7101857062464827, |
| "grad_norm": 0.2292349157751999, |
| "learning_rate": 5.319148936170213e-06, |
| "loss": 0.2925, |
| "step": 2409 |
| }, |
| { |
| "epoch": 2.711311198649409, |
| "grad_norm": 0.2065757335406282, |
| "learning_rate": 5.298289528577389e-06, |
| "loss": 0.2852, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.7124366910523356, |
| "grad_norm": 0.21318268769213233, |
| "learning_rate": 5.2774301209845645e-06, |
| "loss": 0.3101, |
| "step": 2411 |
| }, |
| { |
| "epoch": 2.7135621834552617, |
| "grad_norm": 0.19490619907298698, |
| "learning_rate": 5.25657071339174e-06, |
| "loss": 0.2845, |
| "step": 2412 |
| }, |
| { |
| "epoch": 2.7146876758581877, |
| "grad_norm": 0.20387752435372739, |
| "learning_rate": 5.235711305798915e-06, |
| "loss": 0.2891, |
| "step": 2413 |
| }, |
| { |
| "epoch": 2.715813168261114, |
| "grad_norm": 0.21901749886212424, |
| "learning_rate": 5.214851898206091e-06, |
| "loss": 0.2829, |
| "step": 2414 |
| }, |
| { |
| "epoch": 2.7169386606640407, |
| "grad_norm": 0.21269033641949117, |
| "learning_rate": 5.193992490613267e-06, |
| "loss": 0.2866, |
| "step": 2415 |
| }, |
| { |
| "epoch": 2.7180641530669667, |
| "grad_norm": 0.204672643305428, |
| "learning_rate": 5.173133083020443e-06, |
| "loss": 0.2928, |
| "step": 2416 |
| }, |
| { |
| "epoch": 2.719189645469893, |
| "grad_norm": 0.22695422804231133, |
| "learning_rate": 5.152273675427618e-06, |
| "loss": 0.3034, |
| "step": 2417 |
| }, |
| { |
| "epoch": 2.720315137872819, |
| "grad_norm": 0.20992539451243944, |
| "learning_rate": 5.131414267834793e-06, |
| "loss": 0.2756, |
| "step": 2418 |
| }, |
| { |
| "epoch": 2.7214406302757457, |
| "grad_norm": 0.23946364092165132, |
| "learning_rate": 5.110554860241969e-06, |
| "loss": 0.2619, |
| "step": 2419 |
| }, |
| { |
| "epoch": 2.722566122678672, |
| "grad_norm": 0.22133441280710264, |
| "learning_rate": 5.0896954526491455e-06, |
| "loss": 0.3024, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.723691615081598, |
| "grad_norm": 0.19132609073407808, |
| "learning_rate": 5.068836045056321e-06, |
| "loss": 0.2809, |
| "step": 2421 |
| }, |
| { |
| "epoch": 2.724817107484524, |
| "grad_norm": 0.20537504751422384, |
| "learning_rate": 5.047976637463496e-06, |
| "loss": 0.2981, |
| "step": 2422 |
| }, |
| { |
| "epoch": 2.7259425998874507, |
| "grad_norm": 0.2026641684698212, |
| "learning_rate": 5.027117229870671e-06, |
| "loss": 0.2831, |
| "step": 2423 |
| }, |
| { |
| "epoch": 2.727068092290377, |
| "grad_norm": 0.220392207872778, |
| "learning_rate": 5.0062578222778475e-06, |
| "loss": 0.2974, |
| "step": 2424 |
| }, |
| { |
| "epoch": 2.728193584693303, |
| "grad_norm": 0.20374793230025023, |
| "learning_rate": 4.985398414685024e-06, |
| "loss": 0.2843, |
| "step": 2425 |
| }, |
| { |
| "epoch": 2.7293190770962297, |
| "grad_norm": 0.2182187646308083, |
| "learning_rate": 4.964539007092199e-06, |
| "loss": 0.2827, |
| "step": 2426 |
| }, |
| { |
| "epoch": 2.7304445694991557, |
| "grad_norm": 0.20515095934912667, |
| "learning_rate": 4.943679599499374e-06, |
| "loss": 0.2823, |
| "step": 2427 |
| }, |
| { |
| "epoch": 2.731570061902082, |
| "grad_norm": 0.2274911617803538, |
| "learning_rate": 4.9228201919065495e-06, |
| "loss": 0.2874, |
| "step": 2428 |
| }, |
| { |
| "epoch": 2.7326955543050087, |
| "grad_norm": 0.20240468754950888, |
| "learning_rate": 4.901960784313726e-06, |
| "loss": 0.2901, |
| "step": 2429 |
| }, |
| { |
| "epoch": 2.7338210467079347, |
| "grad_norm": 0.21135908550005916, |
| "learning_rate": 4.881101376720902e-06, |
| "loss": 0.2792, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.734946539110861, |
| "grad_norm": 0.21034155921896855, |
| "learning_rate": 4.860241969128077e-06, |
| "loss": 0.2911, |
| "step": 2431 |
| }, |
| { |
| "epoch": 2.736072031513787, |
| "grad_norm": 0.2073170761162975, |
| "learning_rate": 4.839382561535253e-06, |
| "loss": 0.2768, |
| "step": 2432 |
| }, |
| { |
| "epoch": 2.7371975239167137, |
| "grad_norm": 0.20922141980047607, |
| "learning_rate": 4.818523153942428e-06, |
| "loss": 0.2818, |
| "step": 2433 |
| }, |
| { |
| "epoch": 2.7383230163196397, |
| "grad_norm": 0.20817722346637343, |
| "learning_rate": 4.797663746349604e-06, |
| "loss": 0.2799, |
| "step": 2434 |
| }, |
| { |
| "epoch": 2.739448508722566, |
| "grad_norm": 0.20367161604931538, |
| "learning_rate": 4.77680433875678e-06, |
| "loss": 0.3002, |
| "step": 2435 |
| }, |
| { |
| "epoch": 2.740574001125492, |
| "grad_norm": 0.20301772018260328, |
| "learning_rate": 4.755944931163955e-06, |
| "loss": 0.2734, |
| "step": 2436 |
| }, |
| { |
| "epoch": 2.7416994935284187, |
| "grad_norm": 0.19056871488718577, |
| "learning_rate": 4.735085523571131e-06, |
| "loss": 0.2753, |
| "step": 2437 |
| }, |
| { |
| "epoch": 2.742824985931345, |
| "grad_norm": 0.25569233009986714, |
| "learning_rate": 4.714226115978306e-06, |
| "loss": 0.2991, |
| "step": 2438 |
| }, |
| { |
| "epoch": 2.743950478334271, |
| "grad_norm": 0.20649414202471206, |
| "learning_rate": 4.693366708385482e-06, |
| "loss": 0.2754, |
| "step": 2439 |
| }, |
| { |
| "epoch": 2.7450759707371977, |
| "grad_norm": 0.20526360074859962, |
| "learning_rate": 4.672507300792658e-06, |
| "loss": 0.2846, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.7462014631401237, |
| "grad_norm": 0.19992362065471733, |
| "learning_rate": 4.651647893199833e-06, |
| "loss": 0.2883, |
| "step": 2441 |
| }, |
| { |
| "epoch": 2.74732695554305, |
| "grad_norm": 0.20760115590523082, |
| "learning_rate": 4.6307884856070095e-06, |
| "loss": 0.2734, |
| "step": 2442 |
| }, |
| { |
| "epoch": 2.748452447945976, |
| "grad_norm": 0.20022172726588305, |
| "learning_rate": 4.609929078014184e-06, |
| "loss": 0.2746, |
| "step": 2443 |
| }, |
| { |
| "epoch": 2.7495779403489027, |
| "grad_norm": 0.21742210771505113, |
| "learning_rate": 4.58906967042136e-06, |
| "loss": 0.2712, |
| "step": 2444 |
| }, |
| { |
| "epoch": 2.7507034327518287, |
| "grad_norm": 0.1894221520336593, |
| "learning_rate": 4.568210262828536e-06, |
| "loss": 0.269, |
| "step": 2445 |
| }, |
| { |
| "epoch": 2.751828925154755, |
| "grad_norm": 0.2014338015112663, |
| "learning_rate": 4.5473508552357115e-06, |
| "loss": 0.2754, |
| "step": 2446 |
| }, |
| { |
| "epoch": 2.7529544175576817, |
| "grad_norm": 0.2767717819280576, |
| "learning_rate": 4.526491447642888e-06, |
| "loss": 0.2704, |
| "step": 2447 |
| }, |
| { |
| "epoch": 2.7540799099606077, |
| "grad_norm": 0.2380476325461093, |
| "learning_rate": 4.505632040050062e-06, |
| "loss": 0.2861, |
| "step": 2448 |
| }, |
| { |
| "epoch": 2.755205402363534, |
| "grad_norm": 0.21164580868587568, |
| "learning_rate": 4.484772632457238e-06, |
| "loss": 0.2768, |
| "step": 2449 |
| }, |
| { |
| "epoch": 2.75633089476646, |
| "grad_norm": 0.2008040936085221, |
| "learning_rate": 4.463913224864414e-06, |
| "loss": 0.275, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.7574563871693867, |
| "grad_norm": 0.227682709177952, |
| "learning_rate": 4.44305381727159e-06, |
| "loss": 0.3032, |
| "step": 2451 |
| }, |
| { |
| "epoch": 2.758581879572313, |
| "grad_norm": 0.24775815808302365, |
| "learning_rate": 4.422194409678766e-06, |
| "loss": 0.2952, |
| "step": 2452 |
| }, |
| { |
| "epoch": 2.759707371975239, |
| "grad_norm": 0.23280742706720892, |
| "learning_rate": 4.401335002085941e-06, |
| "loss": 0.2773, |
| "step": 2453 |
| }, |
| { |
| "epoch": 2.760832864378165, |
| "grad_norm": 0.19451290711254568, |
| "learning_rate": 4.380475594493116e-06, |
| "loss": 0.2693, |
| "step": 2454 |
| }, |
| { |
| "epoch": 2.7619583567810917, |
| "grad_norm": 0.20939579388836216, |
| "learning_rate": 4.3596161869002925e-06, |
| "loss": 0.2866, |
| "step": 2455 |
| }, |
| { |
| "epoch": 2.763083849184018, |
| "grad_norm": 0.20672270738688484, |
| "learning_rate": 4.338756779307468e-06, |
| "loss": 0.2904, |
| "step": 2456 |
| }, |
| { |
| "epoch": 2.764209341586944, |
| "grad_norm": 0.19033409152598357, |
| "learning_rate": 4.317897371714644e-06, |
| "loss": 0.2718, |
| "step": 2457 |
| }, |
| { |
| "epoch": 2.7653348339898707, |
| "grad_norm": 0.2166789978324445, |
| "learning_rate": 4.297037964121819e-06, |
| "loss": 0.2983, |
| "step": 2458 |
| }, |
| { |
| "epoch": 2.7664603263927967, |
| "grad_norm": 0.216447417269072, |
| "learning_rate": 4.2761785565289945e-06, |
| "loss": 0.2858, |
| "step": 2459 |
| }, |
| { |
| "epoch": 2.767585818795723, |
| "grad_norm": 0.362675866181273, |
| "learning_rate": 4.255319148936171e-06, |
| "loss": 0.2881, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.7687113111986497, |
| "grad_norm": 0.19521272525143493, |
| "learning_rate": 4.234459741343346e-06, |
| "loss": 0.2724, |
| "step": 2461 |
| }, |
| { |
| "epoch": 2.7698368036015757, |
| "grad_norm": 0.19682316401858674, |
| "learning_rate": 4.213600333750522e-06, |
| "loss": 0.2859, |
| "step": 2462 |
| }, |
| { |
| "epoch": 2.7709622960045017, |
| "grad_norm": 0.21277271670047132, |
| "learning_rate": 4.192740926157697e-06, |
| "loss": 0.2974, |
| "step": 2463 |
| }, |
| { |
| "epoch": 2.772087788407428, |
| "grad_norm": 0.21323098001422092, |
| "learning_rate": 4.171881518564873e-06, |
| "loss": 0.2858, |
| "step": 2464 |
| }, |
| { |
| "epoch": 2.7732132808103547, |
| "grad_norm": 0.2443899119261561, |
| "learning_rate": 4.151022110972049e-06, |
| "loss": 0.2851, |
| "step": 2465 |
| }, |
| { |
| "epoch": 2.7743387732132807, |
| "grad_norm": 0.2139564808006101, |
| "learning_rate": 4.130162703379224e-06, |
| "loss": 0.2949, |
| "step": 2466 |
| }, |
| { |
| "epoch": 2.775464265616207, |
| "grad_norm": 0.2212119000303061, |
| "learning_rate": 4.1093032957864e-06, |
| "loss": 0.2751, |
| "step": 2467 |
| }, |
| { |
| "epoch": 2.776589758019133, |
| "grad_norm": 0.20484020499228098, |
| "learning_rate": 4.0884438881935755e-06, |
| "loss": 0.2771, |
| "step": 2468 |
| }, |
| { |
| "epoch": 2.7777152504220597, |
| "grad_norm": 0.20123952910830462, |
| "learning_rate": 4.067584480600751e-06, |
| "loss": 0.3006, |
| "step": 2469 |
| }, |
| { |
| "epoch": 2.778840742824986, |
| "grad_norm": 0.21577294384729115, |
| "learning_rate": 4.046725073007927e-06, |
| "loss": 0.2996, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.779966235227912, |
| "grad_norm": 0.22582357217712795, |
| "learning_rate": 4.025865665415102e-06, |
| "loss": 0.2717, |
| "step": 2471 |
| }, |
| { |
| "epoch": 2.7810917276308382, |
| "grad_norm": 0.2134397866045082, |
| "learning_rate": 4.005006257822278e-06, |
| "loss": 0.2825, |
| "step": 2472 |
| }, |
| { |
| "epoch": 2.7822172200337647, |
| "grad_norm": 0.20324964622528435, |
| "learning_rate": 3.984146850229454e-06, |
| "loss": 0.2799, |
| "step": 2473 |
| }, |
| { |
| "epoch": 2.783342712436691, |
| "grad_norm": 0.20795174662693527, |
| "learning_rate": 3.963287442636629e-06, |
| "loss": 0.2854, |
| "step": 2474 |
| }, |
| { |
| "epoch": 2.784468204839617, |
| "grad_norm": 0.1997012004956189, |
| "learning_rate": 3.942428035043805e-06, |
| "loss": 0.2764, |
| "step": 2475 |
| }, |
| { |
| "epoch": 2.7855936972425437, |
| "grad_norm": 0.2096005762129211, |
| "learning_rate": 3.92156862745098e-06, |
| "loss": 0.276, |
| "step": 2476 |
| }, |
| { |
| "epoch": 2.7867191896454697, |
| "grad_norm": 0.2177380985810915, |
| "learning_rate": 3.9007092198581565e-06, |
| "loss": 0.2893, |
| "step": 2477 |
| }, |
| { |
| "epoch": 2.787844682048396, |
| "grad_norm": 0.22482324020736472, |
| "learning_rate": 3.879849812265332e-06, |
| "loss": 0.2657, |
| "step": 2478 |
| }, |
| { |
| "epoch": 2.7889701744513227, |
| "grad_norm": 0.19885926541241514, |
| "learning_rate": 3.858990404672508e-06, |
| "loss": 0.2884, |
| "step": 2479 |
| }, |
| { |
| "epoch": 2.7900956668542487, |
| "grad_norm": 0.19908222649936524, |
| "learning_rate": 3.838130997079683e-06, |
| "loss": 0.2742, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.791221159257175, |
| "grad_norm": 0.20760067726563736, |
| "learning_rate": 3.8172715894868585e-06, |
| "loss": 0.296, |
| "step": 2481 |
| }, |
| { |
| "epoch": 2.792346651660101, |
| "grad_norm": 0.2533774016177821, |
| "learning_rate": 3.7964121818940346e-06, |
| "loss": 0.2861, |
| "step": 2482 |
| }, |
| { |
| "epoch": 2.7934721440630277, |
| "grad_norm": 0.21052580972405535, |
| "learning_rate": 3.7755527743012103e-06, |
| "loss": 0.288, |
| "step": 2483 |
| }, |
| { |
| "epoch": 2.7945976364659537, |
| "grad_norm": 0.206699855575665, |
| "learning_rate": 3.754693366708386e-06, |
| "loss": 0.288, |
| "step": 2484 |
| }, |
| { |
| "epoch": 2.79572312886888, |
| "grad_norm": 0.19585545352446415, |
| "learning_rate": 3.733833959115561e-06, |
| "loss": 0.2747, |
| "step": 2485 |
| }, |
| { |
| "epoch": 2.7968486212718062, |
| "grad_norm": 0.2062750655598761, |
| "learning_rate": 3.7129745515227366e-06, |
| "loss": 0.275, |
| "step": 2486 |
| }, |
| { |
| "epoch": 2.7979741136747327, |
| "grad_norm": 0.19740523384916275, |
| "learning_rate": 3.6921151439299128e-06, |
| "loss": 0.2762, |
| "step": 2487 |
| }, |
| { |
| "epoch": 2.799099606077659, |
| "grad_norm": 0.23460327350417823, |
| "learning_rate": 3.6712557363370885e-06, |
| "loss": 0.2663, |
| "step": 2488 |
| }, |
| { |
| "epoch": 2.800225098480585, |
| "grad_norm": 0.27745757559360934, |
| "learning_rate": 3.650396328744264e-06, |
| "loss": 0.318, |
| "step": 2489 |
| }, |
| { |
| "epoch": 2.8013505908835117, |
| "grad_norm": 0.20040376636692844, |
| "learning_rate": 3.629536921151439e-06, |
| "loss": 0.2947, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.8024760832864377, |
| "grad_norm": 0.2065083158200927, |
| "learning_rate": 3.608677513558615e-06, |
| "loss": 0.2856, |
| "step": 2491 |
| }, |
| { |
| "epoch": 2.803601575689364, |
| "grad_norm": 0.2609652931637163, |
| "learning_rate": 3.587818105965791e-06, |
| "loss": 0.2778, |
| "step": 2492 |
| }, |
| { |
| "epoch": 2.80472706809229, |
| "grad_norm": 0.19707420225094988, |
| "learning_rate": 3.5669586983729666e-06, |
| "loss": 0.2847, |
| "step": 2493 |
| }, |
| { |
| "epoch": 2.8058525604952167, |
| "grad_norm": 0.214409640748111, |
| "learning_rate": 3.5460992907801423e-06, |
| "loss": 0.2728, |
| "step": 2494 |
| }, |
| { |
| "epoch": 2.8069780528981427, |
| "grad_norm": 0.21397007514173588, |
| "learning_rate": 3.525239883187317e-06, |
| "loss": 0.2939, |
| "step": 2495 |
| }, |
| { |
| "epoch": 2.808103545301069, |
| "grad_norm": 0.2211780277225568, |
| "learning_rate": 3.5043804755944933e-06, |
| "loss": 0.2941, |
| "step": 2496 |
| }, |
| { |
| "epoch": 2.8092290377039957, |
| "grad_norm": 0.19746449192767923, |
| "learning_rate": 3.483521068001669e-06, |
| "loss": 0.2865, |
| "step": 2497 |
| }, |
| { |
| "epoch": 2.8103545301069217, |
| "grad_norm": 0.24933709829603812, |
| "learning_rate": 3.4626616604088447e-06, |
| "loss": 0.292, |
| "step": 2498 |
| }, |
| { |
| "epoch": 2.811480022509848, |
| "grad_norm": 0.20025420830978435, |
| "learning_rate": 3.4418022528160205e-06, |
| "loss": 0.2936, |
| "step": 2499 |
| }, |
| { |
| "epoch": 2.812605514912774, |
| "grad_norm": 0.21106860467217742, |
| "learning_rate": 3.4209428452231953e-06, |
| "loss": 0.2749, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.8137310073157007, |
| "grad_norm": 0.244173358210336, |
| "learning_rate": 3.4000834376303715e-06, |
| "loss": 0.2921, |
| "step": 2501 |
| }, |
| { |
| "epoch": 2.814856499718627, |
| "grad_norm": 0.23708488619881102, |
| "learning_rate": 3.379224030037547e-06, |
| "loss": 0.286, |
| "step": 2502 |
| }, |
| { |
| "epoch": 2.815981992121553, |
| "grad_norm": 0.20203389839535874, |
| "learning_rate": 3.358364622444723e-06, |
| "loss": 0.2823, |
| "step": 2503 |
| }, |
| { |
| "epoch": 2.8171074845244792, |
| "grad_norm": 0.20180941230046498, |
| "learning_rate": 3.3375052148518986e-06, |
| "loss": 0.2795, |
| "step": 2504 |
| }, |
| { |
| "epoch": 2.8182329769274057, |
| "grad_norm": 0.1923974204466329, |
| "learning_rate": 3.3166458072590743e-06, |
| "loss": 0.2719, |
| "step": 2505 |
| }, |
| { |
| "epoch": 2.819358469330332, |
| "grad_norm": 0.210276870318239, |
| "learning_rate": 3.2957863996662496e-06, |
| "loss": 0.2831, |
| "step": 2506 |
| }, |
| { |
| "epoch": 2.820483961733258, |
| "grad_norm": 0.21238988385557064, |
| "learning_rate": 3.2749269920734253e-06, |
| "loss": 0.278, |
| "step": 2507 |
| }, |
| { |
| "epoch": 2.8216094541361847, |
| "grad_norm": 0.19338349502478416, |
| "learning_rate": 3.254067584480601e-06, |
| "loss": 0.2717, |
| "step": 2508 |
| }, |
| { |
| "epoch": 2.8227349465391107, |
| "grad_norm": 0.19141784353147265, |
| "learning_rate": 3.2332081768877767e-06, |
| "loss": 0.2687, |
| "step": 2509 |
| }, |
| { |
| "epoch": 2.823860438942037, |
| "grad_norm": 0.21299210269078242, |
| "learning_rate": 3.2123487692949525e-06, |
| "loss": 0.273, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.8249859313449637, |
| "grad_norm": 0.19645234999837438, |
| "learning_rate": 3.1914893617021277e-06, |
| "loss": 0.274, |
| "step": 2511 |
| }, |
| { |
| "epoch": 2.8261114237478897, |
| "grad_norm": 0.2154736738045129, |
| "learning_rate": 3.1706299541093035e-06, |
| "loss": 0.2798, |
| "step": 2512 |
| }, |
| { |
| "epoch": 2.8272369161508157, |
| "grad_norm": 0.20747340932057978, |
| "learning_rate": 3.149770546516479e-06, |
| "loss": 0.2911, |
| "step": 2513 |
| }, |
| { |
| "epoch": 2.828362408553742, |
| "grad_norm": 0.21668977371189152, |
| "learning_rate": 3.128911138923655e-06, |
| "loss": 0.2809, |
| "step": 2514 |
| }, |
| { |
| "epoch": 2.8294879009566687, |
| "grad_norm": 0.19752342856018376, |
| "learning_rate": 3.10805173133083e-06, |
| "loss": 0.2913, |
| "step": 2515 |
| }, |
| { |
| "epoch": 2.8306133933595947, |
| "grad_norm": 0.2129254469727926, |
| "learning_rate": 3.087192323738006e-06, |
| "loss": 0.2991, |
| "step": 2516 |
| }, |
| { |
| "epoch": 2.831738885762521, |
| "grad_norm": 0.2135496695763609, |
| "learning_rate": 3.0663329161451816e-06, |
| "loss": 0.3121, |
| "step": 2517 |
| }, |
| { |
| "epoch": 2.8328643781654472, |
| "grad_norm": 0.20217705970631564, |
| "learning_rate": 3.0454735085523573e-06, |
| "loss": 0.2699, |
| "step": 2518 |
| }, |
| { |
| "epoch": 2.8339898705683737, |
| "grad_norm": 0.18655748638369685, |
| "learning_rate": 3.024614100959533e-06, |
| "loss": 0.281, |
| "step": 2519 |
| }, |
| { |
| "epoch": 2.8351153629713, |
| "grad_norm": 0.20710373842178917, |
| "learning_rate": 3.0037546933667083e-06, |
| "loss": 0.2818, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.836240855374226, |
| "grad_norm": 0.1889143314236119, |
| "learning_rate": 2.982895285773884e-06, |
| "loss": 0.2686, |
| "step": 2521 |
| }, |
| { |
| "epoch": 2.8373663477771522, |
| "grad_norm": 0.1916678750234142, |
| "learning_rate": 2.96203587818106e-06, |
| "loss": 0.28, |
| "step": 2522 |
| }, |
| { |
| "epoch": 2.8384918401800787, |
| "grad_norm": 0.19425390855255784, |
| "learning_rate": 2.9411764705882355e-06, |
| "loss": 0.2979, |
| "step": 2523 |
| }, |
| { |
| "epoch": 2.839617332583005, |
| "grad_norm": 0.20681634843217112, |
| "learning_rate": 2.920317062995411e-06, |
| "loss": 0.2871, |
| "step": 2524 |
| }, |
| { |
| "epoch": 2.8407428249859312, |
| "grad_norm": 0.20023152119316995, |
| "learning_rate": 2.8994576554025865e-06, |
| "loss": 0.2801, |
| "step": 2525 |
| }, |
| { |
| "epoch": 2.8418683173888577, |
| "grad_norm": 0.20177659743093546, |
| "learning_rate": 2.878598247809762e-06, |
| "loss": 0.2816, |
| "step": 2526 |
| }, |
| { |
| "epoch": 2.8429938097917837, |
| "grad_norm": 0.19585899941288504, |
| "learning_rate": 2.8577388402169383e-06, |
| "loss": 0.2679, |
| "step": 2527 |
| }, |
| { |
| "epoch": 2.84411930219471, |
| "grad_norm": 0.17859425580387345, |
| "learning_rate": 2.8368794326241136e-06, |
| "loss": 0.2707, |
| "step": 2528 |
| }, |
| { |
| "epoch": 2.8452447945976367, |
| "grad_norm": 0.2039828332143888, |
| "learning_rate": 2.8160200250312893e-06, |
| "loss": 0.2991, |
| "step": 2529 |
| }, |
| { |
| "epoch": 2.8463702870005627, |
| "grad_norm": 0.2030074794985201, |
| "learning_rate": 2.7951606174384646e-06, |
| "loss": 0.2793, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.847495779403489, |
| "grad_norm": 0.19766777443355418, |
| "learning_rate": 2.7743012098456403e-06, |
| "loss": 0.2823, |
| "step": 2531 |
| }, |
| { |
| "epoch": 2.8486212718064152, |
| "grad_norm": 0.19892663863890306, |
| "learning_rate": 2.7534418022528165e-06, |
| "loss": 0.2806, |
| "step": 2532 |
| }, |
| { |
| "epoch": 2.8497467642093417, |
| "grad_norm": 0.21275315081509835, |
| "learning_rate": 2.7325823946599917e-06, |
| "loss": 0.2701, |
| "step": 2533 |
| }, |
| { |
| "epoch": 2.8508722566122677, |
| "grad_norm": 0.21033439554694824, |
| "learning_rate": 2.7117229870671675e-06, |
| "loss": 0.2854, |
| "step": 2534 |
| }, |
| { |
| "epoch": 2.851997749015194, |
| "grad_norm": 0.19767645497600447, |
| "learning_rate": 2.690863579474343e-06, |
| "loss": 0.288, |
| "step": 2535 |
| }, |
| { |
| "epoch": 2.8531232414181202, |
| "grad_norm": 0.2081071860608737, |
| "learning_rate": 2.6700041718815185e-06, |
| "loss": 0.2853, |
| "step": 2536 |
| }, |
| { |
| "epoch": 2.8542487338210467, |
| "grad_norm": 0.19156924866913874, |
| "learning_rate": 2.6491447642886946e-06, |
| "loss": 0.2768, |
| "step": 2537 |
| }, |
| { |
| "epoch": 2.855374226223973, |
| "grad_norm": 0.21948381030849073, |
| "learning_rate": 2.62828535669587e-06, |
| "loss": 0.2862, |
| "step": 2538 |
| }, |
| { |
| "epoch": 2.856499718626899, |
| "grad_norm": 0.194801788917978, |
| "learning_rate": 2.6074259491030456e-06, |
| "loss": 0.2797, |
| "step": 2539 |
| }, |
| { |
| "epoch": 2.8576252110298257, |
| "grad_norm": 0.22618411821723028, |
| "learning_rate": 2.5865665415102213e-06, |
| "loss": 0.3074, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.8587507034327517, |
| "grad_norm": 0.20254836873582036, |
| "learning_rate": 2.5657071339173966e-06, |
| "loss": 0.2699, |
| "step": 2541 |
| }, |
| { |
| "epoch": 2.859876195835678, |
| "grad_norm": 0.19524970967882507, |
| "learning_rate": 2.5448477263245727e-06, |
| "loss": 0.2781, |
| "step": 2542 |
| }, |
| { |
| "epoch": 2.8610016882386042, |
| "grad_norm": 0.19523879122973248, |
| "learning_rate": 2.523988318731748e-06, |
| "loss": 0.2901, |
| "step": 2543 |
| }, |
| { |
| "epoch": 2.8621271806415307, |
| "grad_norm": 0.19385408771091103, |
| "learning_rate": 2.5031289111389237e-06, |
| "loss": 0.2861, |
| "step": 2544 |
| }, |
| { |
| "epoch": 2.8632526730444567, |
| "grad_norm": 0.19805715117689787, |
| "learning_rate": 2.4822695035460995e-06, |
| "loss": 0.2692, |
| "step": 2545 |
| }, |
| { |
| "epoch": 2.864378165447383, |
| "grad_norm": 0.19729671382210393, |
| "learning_rate": 2.4614100959532747e-06, |
| "loss": 0.293, |
| "step": 2546 |
| }, |
| { |
| "epoch": 2.8655036578503097, |
| "grad_norm": 0.19120385629590778, |
| "learning_rate": 2.440550688360451e-06, |
| "loss": 0.2809, |
| "step": 2547 |
| }, |
| { |
| "epoch": 2.8666291502532357, |
| "grad_norm": 0.20169331917856845, |
| "learning_rate": 2.4196912807676266e-06, |
| "loss": 0.2865, |
| "step": 2548 |
| }, |
| { |
| "epoch": 2.867754642656162, |
| "grad_norm": 0.20308102680675588, |
| "learning_rate": 2.398831873174802e-06, |
| "loss": 0.292, |
| "step": 2549 |
| }, |
| { |
| "epoch": 2.8688801350590882, |
| "grad_norm": 0.21920991074207272, |
| "learning_rate": 2.3779724655819776e-06, |
| "loss": 0.2769, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.8700056274620147, |
| "grad_norm": 0.1909570999276725, |
| "learning_rate": 2.357113057989153e-06, |
| "loss": 0.2826, |
| "step": 2551 |
| }, |
| { |
| "epoch": 2.871131119864941, |
| "grad_norm": 0.20820870859741275, |
| "learning_rate": 2.336253650396329e-06, |
| "loss": 0.2811, |
| "step": 2552 |
| }, |
| { |
| "epoch": 2.872256612267867, |
| "grad_norm": 0.19636795603937476, |
| "learning_rate": 2.3153942428035047e-06, |
| "loss": 0.2817, |
| "step": 2553 |
| }, |
| { |
| "epoch": 2.8733821046707932, |
| "grad_norm": 0.2072299777143624, |
| "learning_rate": 2.29453483521068e-06, |
| "loss": 0.2782, |
| "step": 2554 |
| }, |
| { |
| "epoch": 2.8745075970737197, |
| "grad_norm": 0.21169030898396585, |
| "learning_rate": 2.2736754276178557e-06, |
| "loss": 0.2674, |
| "step": 2555 |
| }, |
| { |
| "epoch": 2.875633089476646, |
| "grad_norm": 0.21706530639456728, |
| "learning_rate": 2.252816020025031e-06, |
| "loss": 0.2759, |
| "step": 2556 |
| }, |
| { |
| "epoch": 2.8767585818795722, |
| "grad_norm": 0.2028602319447674, |
| "learning_rate": 2.231956612432207e-06, |
| "loss": 0.2785, |
| "step": 2557 |
| }, |
| { |
| "epoch": 2.8778840742824987, |
| "grad_norm": 0.19144432960054086, |
| "learning_rate": 2.211097204839383e-06, |
| "loss": 0.2833, |
| "step": 2558 |
| }, |
| { |
| "epoch": 2.8790095666854247, |
| "grad_norm": 0.2093653426820825, |
| "learning_rate": 2.190237797246558e-06, |
| "loss": 0.2815, |
| "step": 2559 |
| }, |
| { |
| "epoch": 2.880135059088351, |
| "grad_norm": 0.18440025227997578, |
| "learning_rate": 2.169378389653734e-06, |
| "loss": 0.2786, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.8812605514912777, |
| "grad_norm": 0.19656732367474106, |
| "learning_rate": 2.1485189820609096e-06, |
| "loss": 0.2803, |
| "step": 2561 |
| }, |
| { |
| "epoch": 2.8823860438942037, |
| "grad_norm": 0.19783394536320123, |
| "learning_rate": 2.1276595744680853e-06, |
| "loss": 0.2812, |
| "step": 2562 |
| }, |
| { |
| "epoch": 2.8835115362971298, |
| "grad_norm": 0.18647997820338216, |
| "learning_rate": 2.106800166875261e-06, |
| "loss": 0.2781, |
| "step": 2563 |
| }, |
| { |
| "epoch": 2.8846370287000562, |
| "grad_norm": 0.19982575823634316, |
| "learning_rate": 2.0859407592824363e-06, |
| "loss": 0.2816, |
| "step": 2564 |
| }, |
| { |
| "epoch": 2.8857625211029827, |
| "grad_norm": 0.195813225255546, |
| "learning_rate": 2.065081351689612e-06, |
| "loss": 0.2788, |
| "step": 2565 |
| }, |
| { |
| "epoch": 2.8868880135059087, |
| "grad_norm": 0.2043720027738115, |
| "learning_rate": 2.0442219440967877e-06, |
| "loss": 0.286, |
| "step": 2566 |
| }, |
| { |
| "epoch": 2.888013505908835, |
| "grad_norm": 0.19093315445014475, |
| "learning_rate": 2.0233625365039634e-06, |
| "loss": 0.2707, |
| "step": 2567 |
| }, |
| { |
| "epoch": 2.8891389983117612, |
| "grad_norm": 0.21027820905380143, |
| "learning_rate": 2.002503128911139e-06, |
| "loss": 0.3025, |
| "step": 2568 |
| }, |
| { |
| "epoch": 2.8902644907146877, |
| "grad_norm": 0.2026225980219709, |
| "learning_rate": 1.9816437213183145e-06, |
| "loss": 0.2853, |
| "step": 2569 |
| }, |
| { |
| "epoch": 2.891389983117614, |
| "grad_norm": 0.1861779080477095, |
| "learning_rate": 1.96078431372549e-06, |
| "loss": 0.2783, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.8925154755205402, |
| "grad_norm": 0.20932901602980997, |
| "learning_rate": 1.939924906132666e-06, |
| "loss": 0.2885, |
| "step": 2571 |
| }, |
| { |
| "epoch": 2.8936409679234663, |
| "grad_norm": 0.2073789548556899, |
| "learning_rate": 1.9190654985398416e-06, |
| "loss": 0.2801, |
| "step": 2572 |
| }, |
| { |
| "epoch": 2.8947664603263927, |
| "grad_norm": 0.18609584484982244, |
| "learning_rate": 1.8982060909470173e-06, |
| "loss": 0.2744, |
| "step": 2573 |
| }, |
| { |
| "epoch": 2.895891952729319, |
| "grad_norm": 0.1812033482553102, |
| "learning_rate": 1.877346683354193e-06, |
| "loss": 0.2859, |
| "step": 2574 |
| }, |
| { |
| "epoch": 2.8970174451322452, |
| "grad_norm": 0.20994333414010466, |
| "learning_rate": 1.8564872757613683e-06, |
| "loss": 0.2904, |
| "step": 2575 |
| }, |
| { |
| "epoch": 2.8981429375351717, |
| "grad_norm": 0.19743578503548526, |
| "learning_rate": 1.8356278681685442e-06, |
| "loss": 0.2761, |
| "step": 2576 |
| }, |
| { |
| "epoch": 2.8992684299380977, |
| "grad_norm": 0.19166570858524684, |
| "learning_rate": 1.8147684605757195e-06, |
| "loss": 0.2918, |
| "step": 2577 |
| }, |
| { |
| "epoch": 2.9003939223410242, |
| "grad_norm": 0.20463881347239937, |
| "learning_rate": 1.7939090529828954e-06, |
| "loss": 0.2889, |
| "step": 2578 |
| }, |
| { |
| "epoch": 2.9015194147439507, |
| "grad_norm": 0.19047750537612482, |
| "learning_rate": 1.7730496453900712e-06, |
| "loss": 0.2864, |
| "step": 2579 |
| }, |
| { |
| "epoch": 2.9026449071468767, |
| "grad_norm": 0.19091573754700542, |
| "learning_rate": 1.7521902377972467e-06, |
| "loss": 0.2927, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.903770399549803, |
| "grad_norm": 0.1876198954341774, |
| "learning_rate": 1.7313308302044224e-06, |
| "loss": 0.268, |
| "step": 2581 |
| }, |
| { |
| "epoch": 2.9048958919527292, |
| "grad_norm": 0.20218741071231236, |
| "learning_rate": 1.7104714226115977e-06, |
| "loss": 0.2818, |
| "step": 2582 |
| }, |
| { |
| "epoch": 2.9060213843556557, |
| "grad_norm": 0.19751340574083814, |
| "learning_rate": 1.6896120150187736e-06, |
| "loss": 0.289, |
| "step": 2583 |
| }, |
| { |
| "epoch": 2.9071468767585817, |
| "grad_norm": 0.1929293500822755, |
| "learning_rate": 1.6687526074259493e-06, |
| "loss": 0.2701, |
| "step": 2584 |
| }, |
| { |
| "epoch": 2.908272369161508, |
| "grad_norm": 0.19409315370373764, |
| "learning_rate": 1.6478931998331248e-06, |
| "loss": 0.294, |
| "step": 2585 |
| }, |
| { |
| "epoch": 2.9093978615644343, |
| "grad_norm": 0.19513663853793442, |
| "learning_rate": 1.6270337922403005e-06, |
| "loss": 0.2909, |
| "step": 2586 |
| }, |
| { |
| "epoch": 2.9105233539673607, |
| "grad_norm": 0.18520192324031312, |
| "learning_rate": 1.6061743846474762e-06, |
| "loss": 0.2879, |
| "step": 2587 |
| }, |
| { |
| "epoch": 2.911648846370287, |
| "grad_norm": 0.19393576190790643, |
| "learning_rate": 1.5853149770546517e-06, |
| "loss": 0.2818, |
| "step": 2588 |
| }, |
| { |
| "epoch": 2.9127743387732132, |
| "grad_norm": 0.1913327489426411, |
| "learning_rate": 1.5644555694618274e-06, |
| "loss": 0.2845, |
| "step": 2589 |
| }, |
| { |
| "epoch": 2.9138998311761397, |
| "grad_norm": 0.19600196772862405, |
| "learning_rate": 1.543596161869003e-06, |
| "loss": 0.2877, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.9150253235790657, |
| "grad_norm": 0.2003116343512408, |
| "learning_rate": 1.5227367542761787e-06, |
| "loss": 0.2761, |
| "step": 2591 |
| }, |
| { |
| "epoch": 2.916150815981992, |
| "grad_norm": 0.18645182971152968, |
| "learning_rate": 1.5018773466833542e-06, |
| "loss": 0.2851, |
| "step": 2592 |
| }, |
| { |
| "epoch": 2.9172763083849182, |
| "grad_norm": 0.20894705309538278, |
| "learning_rate": 1.48101793909053e-06, |
| "loss": 0.2903, |
| "step": 2593 |
| }, |
| { |
| "epoch": 2.9184018007878447, |
| "grad_norm": 0.19120582673378814, |
| "learning_rate": 1.4601585314977056e-06, |
| "loss": 0.2776, |
| "step": 2594 |
| }, |
| { |
| "epoch": 2.9195272931907708, |
| "grad_norm": 0.20278259638182897, |
| "learning_rate": 1.439299123904881e-06, |
| "loss": 0.2787, |
| "step": 2595 |
| }, |
| { |
| "epoch": 2.9206527855936972, |
| "grad_norm": 0.19583937073430013, |
| "learning_rate": 1.4184397163120568e-06, |
| "loss": 0.2811, |
| "step": 2596 |
| }, |
| { |
| "epoch": 2.9217782779966237, |
| "grad_norm": 0.1941542530021111, |
| "learning_rate": 1.3975803087192323e-06, |
| "loss": 0.2658, |
| "step": 2597 |
| }, |
| { |
| "epoch": 2.9229037703995497, |
| "grad_norm": 0.20963262803457552, |
| "learning_rate": 1.3767209011264082e-06, |
| "loss": 0.278, |
| "step": 2598 |
| }, |
| { |
| "epoch": 2.924029262802476, |
| "grad_norm": 0.2018274661438912, |
| "learning_rate": 1.3558614935335837e-06, |
| "loss": 0.294, |
| "step": 2599 |
| }, |
| { |
| "epoch": 2.9251547552054022, |
| "grad_norm": 0.19436944269264386, |
| "learning_rate": 1.3350020859407592e-06, |
| "loss": 0.2834, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.9262802476083287, |
| "grad_norm": 0.1878287577440724, |
| "learning_rate": 1.314142678347935e-06, |
| "loss": 0.2765, |
| "step": 2601 |
| }, |
| { |
| "epoch": 2.927405740011255, |
| "grad_norm": 0.18341006129215123, |
| "learning_rate": 1.2932832707551107e-06, |
| "loss": 0.2746, |
| "step": 2602 |
| }, |
| { |
| "epoch": 2.9285312324141812, |
| "grad_norm": 0.19735771783507766, |
| "learning_rate": 1.2724238631622864e-06, |
| "loss": 0.2913, |
| "step": 2603 |
| }, |
| { |
| "epoch": 2.9296567248171073, |
| "grad_norm": 0.18771599689886934, |
| "learning_rate": 1.2515644555694619e-06, |
| "loss": 0.2753, |
| "step": 2604 |
| }, |
| { |
| "epoch": 2.9307822172200337, |
| "grad_norm": 0.19841768486183753, |
| "learning_rate": 1.2307050479766374e-06, |
| "loss": 0.2814, |
| "step": 2605 |
| }, |
| { |
| "epoch": 2.93190770962296, |
| "grad_norm": 0.1956614956245663, |
| "learning_rate": 1.2098456403838133e-06, |
| "loss": 0.2743, |
| "step": 2606 |
| }, |
| { |
| "epoch": 2.9330332020258862, |
| "grad_norm": 0.2002743148871214, |
| "learning_rate": 1.1889862327909888e-06, |
| "loss": 0.2888, |
| "step": 2607 |
| }, |
| { |
| "epoch": 2.9341586944288127, |
| "grad_norm": 0.21318426731074547, |
| "learning_rate": 1.1681268251981645e-06, |
| "loss": 0.2959, |
| "step": 2608 |
| }, |
| { |
| "epoch": 2.9352841868317388, |
| "grad_norm": 0.18809272462436055, |
| "learning_rate": 1.14726741760534e-06, |
| "loss": 0.277, |
| "step": 2609 |
| }, |
| { |
| "epoch": 2.9364096792346652, |
| "grad_norm": 0.19427439279930914, |
| "learning_rate": 1.1264080100125155e-06, |
| "loss": 0.2927, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.9375351716375917, |
| "grad_norm": 0.2079310357704345, |
| "learning_rate": 1.1055486024196914e-06, |
| "loss": 0.2828, |
| "step": 2611 |
| }, |
| { |
| "epoch": 2.9386606640405177, |
| "grad_norm": 0.19416657363268003, |
| "learning_rate": 1.084689194826867e-06, |
| "loss": 0.2911, |
| "step": 2612 |
| }, |
| { |
| "epoch": 2.9397861564434438, |
| "grad_norm": 0.19916119078493613, |
| "learning_rate": 1.0638297872340427e-06, |
| "loss": 0.2924, |
| "step": 2613 |
| }, |
| { |
| "epoch": 2.9409116488463702, |
| "grad_norm": 0.1983245462408925, |
| "learning_rate": 1.0429703796412182e-06, |
| "loss": 0.2708, |
| "step": 2614 |
| }, |
| { |
| "epoch": 2.9420371412492967, |
| "grad_norm": 0.18590780784131763, |
| "learning_rate": 1.0221109720483939e-06, |
| "loss": 0.2863, |
| "step": 2615 |
| }, |
| { |
| "epoch": 2.9431626336522227, |
| "grad_norm": 0.1872997802264514, |
| "learning_rate": 1.0012515644555696e-06, |
| "loss": 0.2811, |
| "step": 2616 |
| }, |
| { |
| "epoch": 2.9442881260551492, |
| "grad_norm": 0.178184423835207, |
| "learning_rate": 9.80392156862745e-07, |
| "loss": 0.2731, |
| "step": 2617 |
| }, |
| { |
| "epoch": 2.9454136184580753, |
| "grad_norm": 0.20370420485130178, |
| "learning_rate": 9.595327492699208e-07, |
| "loss": 0.298, |
| "step": 2618 |
| }, |
| { |
| "epoch": 2.9465391108610017, |
| "grad_norm": 0.19363752795605113, |
| "learning_rate": 9.386733416770965e-07, |
| "loss": 0.2862, |
| "step": 2619 |
| }, |
| { |
| "epoch": 2.947664603263928, |
| "grad_norm": 0.19337040777721937, |
| "learning_rate": 9.178139340842721e-07, |
| "loss": 0.2938, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.9487900956668542, |
| "grad_norm": 0.20062145944273124, |
| "learning_rate": 8.969545264914477e-07, |
| "loss": 0.2854, |
| "step": 2621 |
| }, |
| { |
| "epoch": 2.9499155880697803, |
| "grad_norm": 0.19780261743838537, |
| "learning_rate": 8.760951188986233e-07, |
| "loss": 0.2853, |
| "step": 2622 |
| }, |
| { |
| "epoch": 2.9510410804727067, |
| "grad_norm": 0.18300708396430374, |
| "learning_rate": 8.552357113057988e-07, |
| "loss": 0.2859, |
| "step": 2623 |
| }, |
| { |
| "epoch": 2.952166572875633, |
| "grad_norm": 0.19659081949531576, |
| "learning_rate": 8.343763037129747e-07, |
| "loss": 0.2873, |
| "step": 2624 |
| }, |
| { |
| "epoch": 2.9532920652785593, |
| "grad_norm": 0.18879456754476104, |
| "learning_rate": 8.135168961201503e-07, |
| "loss": 0.3059, |
| "step": 2625 |
| }, |
| { |
| "epoch": 2.9544175576814857, |
| "grad_norm": 0.1970187549688716, |
| "learning_rate": 7.926574885273259e-07, |
| "loss": 0.2751, |
| "step": 2626 |
| }, |
| { |
| "epoch": 2.9555430500844118, |
| "grad_norm": 0.18765481178184712, |
| "learning_rate": 7.717980809345015e-07, |
| "loss": 0.2828, |
| "step": 2627 |
| }, |
| { |
| "epoch": 2.9566685424873382, |
| "grad_norm": 0.18024019057588778, |
| "learning_rate": 7.509386733416771e-07, |
| "loss": 0.2796, |
| "step": 2628 |
| }, |
| { |
| "epoch": 2.9577940348902647, |
| "grad_norm": 0.1911404613455637, |
| "learning_rate": 7.300792657488528e-07, |
| "loss": 0.2857, |
| "step": 2629 |
| }, |
| { |
| "epoch": 2.9589195272931907, |
| "grad_norm": 0.19138951788504163, |
| "learning_rate": 7.092198581560284e-07, |
| "loss": 0.2792, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.9600450196961168, |
| "grad_norm": 0.1910935556506434, |
| "learning_rate": 6.883604505632041e-07, |
| "loss": 0.2845, |
| "step": 2631 |
| }, |
| { |
| "epoch": 2.9611705120990433, |
| "grad_norm": 0.19203721334554208, |
| "learning_rate": 6.675010429703796e-07, |
| "loss": 0.291, |
| "step": 2632 |
| }, |
| { |
| "epoch": 2.9622960045019697, |
| "grad_norm": 0.1928645139619539, |
| "learning_rate": 6.466416353775553e-07, |
| "loss": 0.2929, |
| "step": 2633 |
| }, |
| { |
| "epoch": 2.9634214969048958, |
| "grad_norm": 0.19918761206527566, |
| "learning_rate": 6.257822277847309e-07, |
| "loss": 0.3019, |
| "step": 2634 |
| }, |
| { |
| "epoch": 2.9645469893078222, |
| "grad_norm": 0.1854244511600504, |
| "learning_rate": 6.049228201919066e-07, |
| "loss": 0.2762, |
| "step": 2635 |
| }, |
| { |
| "epoch": 2.9656724817107483, |
| "grad_norm": 0.1790914058015419, |
| "learning_rate": 5.840634125990823e-07, |
| "loss": 0.2775, |
| "step": 2636 |
| }, |
| { |
| "epoch": 2.9667979741136747, |
| "grad_norm": 0.18939335360021642, |
| "learning_rate": 5.632040050062578e-07, |
| "loss": 0.2868, |
| "step": 2637 |
| }, |
| { |
| "epoch": 2.967923466516601, |
| "grad_norm": 0.18950602005484965, |
| "learning_rate": 5.423445974134335e-07, |
| "loss": 0.2806, |
| "step": 2638 |
| }, |
| { |
| "epoch": 2.9690489589195272, |
| "grad_norm": 0.2057097341756207, |
| "learning_rate": 5.214851898206091e-07, |
| "loss": 0.2959, |
| "step": 2639 |
| }, |
| { |
| "epoch": 2.9701744513224537, |
| "grad_norm": 0.18914510377229687, |
| "learning_rate": 5.006257822277848e-07, |
| "loss": 0.2813, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.9712999437253798, |
| "grad_norm": 0.21148991319548016, |
| "learning_rate": 4.797663746349604e-07, |
| "loss": 0.2921, |
| "step": 2641 |
| }, |
| { |
| "epoch": 2.9724254361283062, |
| "grad_norm": 0.19281924718786836, |
| "learning_rate": 4.5890696704213606e-07, |
| "loss": 0.2855, |
| "step": 2642 |
| }, |
| { |
| "epoch": 2.9735509285312323, |
| "grad_norm": 0.18955123443150448, |
| "learning_rate": 4.3804755944931167e-07, |
| "loss": 0.2959, |
| "step": 2643 |
| }, |
| { |
| "epoch": 2.9746764209341587, |
| "grad_norm": 0.18330368999509078, |
| "learning_rate": 4.171881518564873e-07, |
| "loss": 0.2867, |
| "step": 2644 |
| }, |
| { |
| "epoch": 2.9758019133370848, |
| "grad_norm": 0.19150353774187667, |
| "learning_rate": 3.9632874426366293e-07, |
| "loss": 0.2807, |
| "step": 2645 |
| }, |
| { |
| "epoch": 2.9769274057400112, |
| "grad_norm": 0.18078724381796288, |
| "learning_rate": 3.7546933667083854e-07, |
| "loss": 0.2659, |
| "step": 2646 |
| }, |
| { |
| "epoch": 2.9780528981429377, |
| "grad_norm": 0.1956344780829508, |
| "learning_rate": 3.546099290780142e-07, |
| "loss": 0.2836, |
| "step": 2647 |
| }, |
| { |
| "epoch": 2.9791783905458638, |
| "grad_norm": 0.18154947344603503, |
| "learning_rate": 3.337505214851898e-07, |
| "loss": 0.2691, |
| "step": 2648 |
| }, |
| { |
| "epoch": 2.9803038829487902, |
| "grad_norm": 0.19970667773834722, |
| "learning_rate": 3.1289111389236547e-07, |
| "loss": 0.2939, |
| "step": 2649 |
| }, |
| { |
| "epoch": 2.9814293753517163, |
| "grad_norm": 0.1970797047209464, |
| "learning_rate": 2.9203170629954113e-07, |
| "loss": 0.2973, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.9825548677546427, |
| "grad_norm": 0.17558302415161703, |
| "learning_rate": 2.7117229870671674e-07, |
| "loss": 0.2687, |
| "step": 2651 |
| }, |
| { |
| "epoch": 2.983680360157569, |
| "grad_norm": 0.19984560884749847, |
| "learning_rate": 2.503128911138924e-07, |
| "loss": 0.2917, |
| "step": 2652 |
| }, |
| { |
| "epoch": 2.9848058525604952, |
| "grad_norm": 0.18904253639700785, |
| "learning_rate": 2.2945348352106803e-07, |
| "loss": 0.2881, |
| "step": 2653 |
| }, |
| { |
| "epoch": 2.9859313449634213, |
| "grad_norm": 0.18998537420423053, |
| "learning_rate": 2.0859407592824366e-07, |
| "loss": 0.2899, |
| "step": 2654 |
| }, |
| { |
| "epoch": 2.9870568373663478, |
| "grad_norm": 0.18137614988061299, |
| "learning_rate": 1.8773466833541927e-07, |
| "loss": 0.2673, |
| "step": 2655 |
| }, |
| { |
| "epoch": 2.9881823297692742, |
| "grad_norm": 0.20380937660099302, |
| "learning_rate": 1.668752607425949e-07, |
| "loss": 0.2855, |
| "step": 2656 |
| }, |
| { |
| "epoch": 2.9893078221722003, |
| "grad_norm": 0.18697756553685133, |
| "learning_rate": 1.4601585314977056e-07, |
| "loss": 0.2855, |
| "step": 2657 |
| }, |
| { |
| "epoch": 2.9904333145751267, |
| "grad_norm": 0.18757566803524747, |
| "learning_rate": 1.251564455569462e-07, |
| "loss": 0.2881, |
| "step": 2658 |
| }, |
| { |
| "epoch": 2.9915588069780528, |
| "grad_norm": 0.18601807443604287, |
| "learning_rate": 1.0429703796412183e-07, |
| "loss": 0.2848, |
| "step": 2659 |
| }, |
| { |
| "epoch": 2.9926842993809792, |
| "grad_norm": 0.19183363220520985, |
| "learning_rate": 8.343763037129745e-08, |
| "loss": 0.2958, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.9938097917839057, |
| "grad_norm": 0.20102940785890344, |
| "learning_rate": 6.25782227784731e-08, |
| "loss": 0.281, |
| "step": 2661 |
| }, |
| { |
| "epoch": 2.9949352841868317, |
| "grad_norm": 0.189431904457012, |
| "learning_rate": 4.1718815185648726e-08, |
| "loss": 0.2813, |
| "step": 2662 |
| }, |
| { |
| "epoch": 2.996060776589758, |
| "grad_norm": 0.17662567105058288, |
| "learning_rate": 2.0859407592824363e-08, |
| "loss": 0.2782, |
| "step": 2663 |
| }, |
| { |
| "epoch": 2.9971862689926843, |
| "grad_norm": 0.18268732320356856, |
| "learning_rate": 0.0, |
| "loss": 0.2705, |
| "step": 2664 |
| }, |
| { |
| "epoch": 2.9971862689926843, |
| "step": 2664, |
| "total_flos": 2.27802848659977e+18, |
| "train_loss": 0.43048976833845404, |
| "train_runtime": 155129.8221, |
| "train_samples_per_second": 0.275, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2664, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.27802848659977e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|