| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.994882292732855, |
| "eval_steps": 500, |
| "global_step": 2440, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0020470829068577278, |
| "grad_norm": 5.914718943943769, |
| "learning_rate": 1.639344262295082e-07, |
| "loss": 0.89, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0040941658137154556, |
| "grad_norm": 5.711215790032282, |
| "learning_rate": 3.278688524590164e-07, |
| "loss": 0.8602, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006141248720573183, |
| "grad_norm": 6.070030223088026, |
| "learning_rate": 4.918032786885246e-07, |
| "loss": 0.8902, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008188331627430911, |
| "grad_norm": 5.739027819285582, |
| "learning_rate": 6.557377049180328e-07, |
| "loss": 0.9194, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01023541453428864, |
| "grad_norm": 5.558991142229951, |
| "learning_rate": 8.196721311475409e-07, |
| "loss": 0.8523, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012282497441146366, |
| "grad_norm": 5.622988925862499, |
| "learning_rate": 9.836065573770493e-07, |
| "loss": 0.9102, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014329580348004094, |
| "grad_norm": 5.360893456655671, |
| "learning_rate": 1.1475409836065575e-06, |
| "loss": 0.8682, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.016376663254861822, |
| "grad_norm": 5.095538528802894, |
| "learning_rate": 1.3114754098360657e-06, |
| "loss": 0.8689, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01842374616171955, |
| "grad_norm": 4.59455985194371, |
| "learning_rate": 1.4754098360655739e-06, |
| "loss": 0.852, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02047082906857728, |
| "grad_norm": 4.139437078984008, |
| "learning_rate": 1.6393442622950819e-06, |
| "loss": 0.8102, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.022517911975435005, |
| "grad_norm": 4.171848417796509, |
| "learning_rate": 1.8032786885245903e-06, |
| "loss": 0.8102, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02456499488229273, |
| "grad_norm": 2.4194756147420646, |
| "learning_rate": 1.9672131147540985e-06, |
| "loss": 0.7481, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02661207778915046, |
| "grad_norm": 2.28600629836227, |
| "learning_rate": 2.1311475409836067e-06, |
| "loss": 0.7631, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.028659160696008188, |
| "grad_norm": 2.075915465619067, |
| "learning_rate": 2.295081967213115e-06, |
| "loss": 0.7506, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.030706243602865915, |
| "grad_norm": 1.952947905310399, |
| "learning_rate": 2.459016393442623e-06, |
| "loss": 0.7781, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.032753326509723645, |
| "grad_norm": 3.070659688365803, |
| "learning_rate": 2.6229508196721314e-06, |
| "loss": 0.7488, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03480040941658137, |
| "grad_norm": 3.5264876012722652, |
| "learning_rate": 2.786885245901639e-06, |
| "loss": 0.7746, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0368474923234391, |
| "grad_norm": 3.580605082704591, |
| "learning_rate": 2.9508196721311478e-06, |
| "loss": 0.7618, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.038894575230296824, |
| "grad_norm": 3.194230800018389, |
| "learning_rate": 3.114754098360656e-06, |
| "loss": 0.7207, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04094165813715456, |
| "grad_norm": 3.116544112683001, |
| "learning_rate": 3.2786885245901638e-06, |
| "loss": 0.7352, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.042988741044012284, |
| "grad_norm": 2.6314247686078094, |
| "learning_rate": 3.4426229508196724e-06, |
| "loss": 0.7432, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04503582395087001, |
| "grad_norm": 2.2137046535068037, |
| "learning_rate": 3.6065573770491806e-06, |
| "loss": 0.7444, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04708290685772774, |
| "grad_norm": 1.6649733496725676, |
| "learning_rate": 3.7704918032786884e-06, |
| "loss": 0.6646, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.04912998976458546, |
| "grad_norm": 1.3074947984675702, |
| "learning_rate": 3.934426229508197e-06, |
| "loss": 0.6388, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0511770726714432, |
| "grad_norm": 1.190645815657471, |
| "learning_rate": 4.098360655737705e-06, |
| "loss": 0.6575, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05322415557830092, |
| "grad_norm": 1.2707845894729972, |
| "learning_rate": 4.2622950819672135e-06, |
| "loss": 0.671, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05527123848515865, |
| "grad_norm": 1.4412640499522065, |
| "learning_rate": 4.426229508196722e-06, |
| "loss": 0.6526, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.057318321392016376, |
| "grad_norm": 1.3167145329441712, |
| "learning_rate": 4.59016393442623e-06, |
| "loss": 0.6533, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0593654042988741, |
| "grad_norm": 1.338399336461372, |
| "learning_rate": 4.754098360655738e-06, |
| "loss": 0.6525, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06141248720573183, |
| "grad_norm": 1.1005314901400522, |
| "learning_rate": 4.918032786885246e-06, |
| "loss": 0.6513, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06345957011258956, |
| "grad_norm": 1.0341365252216648, |
| "learning_rate": 5.0819672131147545e-06, |
| "loss": 0.6517, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06550665301944729, |
| "grad_norm": 0.8191371315934305, |
| "learning_rate": 5.245901639344263e-06, |
| "loss": 0.6562, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06755373592630501, |
| "grad_norm": 1.0105078954121915, |
| "learning_rate": 5.409836065573772e-06, |
| "loss": 0.6318, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06960081883316274, |
| "grad_norm": 1.083317612523659, |
| "learning_rate": 5.573770491803278e-06, |
| "loss": 0.6528, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07164790174002048, |
| "grad_norm": 1.054453013530755, |
| "learning_rate": 5.737704918032787e-06, |
| "loss": 0.6233, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0736949846468782, |
| "grad_norm": 0.693669694861969, |
| "learning_rate": 5.9016393442622956e-06, |
| "loss": 0.5959, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07574206755373593, |
| "grad_norm": 0.6429960279803676, |
| "learning_rate": 6.065573770491804e-06, |
| "loss": 0.6031, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07778915046059365, |
| "grad_norm": 0.8326921183964671, |
| "learning_rate": 6.229508196721312e-06, |
| "loss": 0.6252, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07983623336745138, |
| "grad_norm": 0.7826089791231328, |
| "learning_rate": 6.393442622950821e-06, |
| "loss": 0.5891, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08188331627430911, |
| "grad_norm": 0.6837753928275052, |
| "learning_rate": 6.5573770491803276e-06, |
| "loss": 0.6151, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08393039918116683, |
| "grad_norm": 0.557265922988386, |
| "learning_rate": 6.721311475409837e-06, |
| "loss": 0.5832, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08597748208802457, |
| "grad_norm": 0.697548950763872, |
| "learning_rate": 6.885245901639345e-06, |
| "loss": 0.629, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.08802456499488229, |
| "grad_norm": 0.7262822585766031, |
| "learning_rate": 7.049180327868853e-06, |
| "loss": 0.5531, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09007164790174002, |
| "grad_norm": 0.7701295257257081, |
| "learning_rate": 7.213114754098361e-06, |
| "loss": 0.612, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09211873080859775, |
| "grad_norm": 0.4681801446847226, |
| "learning_rate": 7.3770491803278695e-06, |
| "loss": 0.5675, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09416581371545547, |
| "grad_norm": 0.5678252337287879, |
| "learning_rate": 7.540983606557377e-06, |
| "loss": 0.6391, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.09621289662231321, |
| "grad_norm": 0.5704556041531674, |
| "learning_rate": 7.704918032786886e-06, |
| "loss": 0.5773, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.09825997952917093, |
| "grad_norm": 0.5739689196908508, |
| "learning_rate": 7.868852459016394e-06, |
| "loss": 0.5928, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.10030706243602866, |
| "grad_norm": 0.5604483444656064, |
| "learning_rate": 8.032786885245902e-06, |
| "loss": 0.5747, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1023541453428864, |
| "grad_norm": 0.5266605058896955, |
| "learning_rate": 8.19672131147541e-06, |
| "loss": 0.5506, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10440122824974411, |
| "grad_norm": 0.6855806410785544, |
| "learning_rate": 8.360655737704919e-06, |
| "loss": 0.5974, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.10644831115660185, |
| "grad_norm": 0.6492542095140276, |
| "learning_rate": 8.524590163934427e-06, |
| "loss": 0.6027, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.10849539406345957, |
| "grad_norm": 0.5702781382301559, |
| "learning_rate": 8.688524590163935e-06, |
| "loss": 0.5997, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1105424769703173, |
| "grad_norm": 0.5828822136164438, |
| "learning_rate": 8.852459016393443e-06, |
| "loss": 0.5633, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11258955987717502, |
| "grad_norm": 0.5923389847320444, |
| "learning_rate": 9.016393442622952e-06, |
| "loss": 0.5842, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.11463664278403275, |
| "grad_norm": 0.5636209619702331, |
| "learning_rate": 9.18032786885246e-06, |
| "loss": 0.5645, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.11668372569089049, |
| "grad_norm": 0.4912175098948749, |
| "learning_rate": 9.344262295081968e-06, |
| "loss": 0.5635, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1187308085977482, |
| "grad_norm": 0.754994674395481, |
| "learning_rate": 9.508196721311476e-06, |
| "loss": 0.5658, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.12077789150460594, |
| "grad_norm": 0.5456693243174974, |
| "learning_rate": 9.672131147540984e-06, |
| "loss": 0.5559, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.12282497441146366, |
| "grad_norm": 0.5877326554557568, |
| "learning_rate": 9.836065573770493e-06, |
| "loss": 0.5755, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.12487205731832139, |
| "grad_norm": 0.4917195015950769, |
| "learning_rate": 1e-05, |
| "loss": 0.5422, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1269191402251791, |
| "grad_norm": 0.5000347167895204, |
| "learning_rate": 1.0163934426229509e-05, |
| "loss": 0.5638, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.12896622313203684, |
| "grad_norm": 0.5165061332900804, |
| "learning_rate": 1.0327868852459017e-05, |
| "loss": 0.5874, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.13101330603889458, |
| "grad_norm": 0.5647596683341366, |
| "learning_rate": 1.0491803278688525e-05, |
| "loss": 0.5549, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1330603889457523, |
| "grad_norm": 0.556985780595292, |
| "learning_rate": 1.0655737704918034e-05, |
| "loss": 0.5287, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.13510747185261002, |
| "grad_norm": 0.5870046275772329, |
| "learning_rate": 1.0819672131147544e-05, |
| "loss": 0.5726, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.13715455475946775, |
| "grad_norm": 0.510016021993097, |
| "learning_rate": 1.0983606557377052e-05, |
| "loss": 0.641, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.13920163766632548, |
| "grad_norm": 0.5757122682850916, |
| "learning_rate": 1.1147540983606557e-05, |
| "loss": 0.5493, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.14124872057318322, |
| "grad_norm": 0.5572446165702184, |
| "learning_rate": 1.1311475409836066e-05, |
| "loss": 0.5231, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.14329580348004095, |
| "grad_norm": 0.5000459199001162, |
| "learning_rate": 1.1475409836065575e-05, |
| "loss": 0.5221, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.14534288638689866, |
| "grad_norm": 0.49933135856571004, |
| "learning_rate": 1.1639344262295083e-05, |
| "loss": 0.5254, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1473899692937564, |
| "grad_norm": 0.542314402205445, |
| "learning_rate": 1.1803278688524591e-05, |
| "loss": 0.5855, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.14943705220061412, |
| "grad_norm": 0.4908515642706825, |
| "learning_rate": 1.19672131147541e-05, |
| "loss": 0.5277, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.15148413510747186, |
| "grad_norm": 0.5541036948715271, |
| "learning_rate": 1.2131147540983608e-05, |
| "loss": 0.5226, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1535312180143296, |
| "grad_norm": 0.5925205612588014, |
| "learning_rate": 1.2295081967213116e-05, |
| "loss": 0.5588, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1555783009211873, |
| "grad_norm": 0.5472033034034951, |
| "learning_rate": 1.2459016393442624e-05, |
| "loss": 0.5601, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.15762538382804503, |
| "grad_norm": 0.5831899459875634, |
| "learning_rate": 1.2622950819672132e-05, |
| "loss": 0.5208, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.15967246673490276, |
| "grad_norm": 0.6725600970137904, |
| "learning_rate": 1.2786885245901642e-05, |
| "loss": 0.5646, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1617195496417605, |
| "grad_norm": 0.4997581515870628, |
| "learning_rate": 1.295081967213115e-05, |
| "loss": 0.5345, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.16376663254861823, |
| "grad_norm": 0.6060573387394406, |
| "learning_rate": 1.3114754098360655e-05, |
| "loss": 0.5516, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.16581371545547594, |
| "grad_norm": 0.5707723342314415, |
| "learning_rate": 1.3278688524590165e-05, |
| "loss": 0.5494, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.16786079836233367, |
| "grad_norm": 0.5820968760684135, |
| "learning_rate": 1.3442622950819673e-05, |
| "loss": 0.5374, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1699078812691914, |
| "grad_norm": 0.801732853136766, |
| "learning_rate": 1.3606557377049181e-05, |
| "loss": 0.546, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.17195496417604914, |
| "grad_norm": 0.5288994895334571, |
| "learning_rate": 1.377049180327869e-05, |
| "loss": 0.5761, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.17400204708290687, |
| "grad_norm": 0.7883330207931984, |
| "learning_rate": 1.3934426229508198e-05, |
| "loss": 0.5326, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.17604912998976457, |
| "grad_norm": 0.4807106883512578, |
| "learning_rate": 1.4098360655737706e-05, |
| "loss": 0.5311, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1780962128966223, |
| "grad_norm": 0.6954942427041093, |
| "learning_rate": 1.4262295081967214e-05, |
| "loss": 0.5206, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.18014329580348004, |
| "grad_norm": 0.5706344929187627, |
| "learning_rate": 1.4426229508196722e-05, |
| "loss": 0.5575, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.18219037871033777, |
| "grad_norm": 0.5660731771723676, |
| "learning_rate": 1.459016393442623e-05, |
| "loss": 0.5243, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.1842374616171955, |
| "grad_norm": 0.7077259968257474, |
| "learning_rate": 1.4754098360655739e-05, |
| "loss": 0.5375, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1862845445240532, |
| "grad_norm": 0.5640871854454458, |
| "learning_rate": 1.4918032786885249e-05, |
| "loss": 0.5678, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.18833162743091095, |
| "grad_norm": 0.5686460669441292, |
| "learning_rate": 1.5081967213114754e-05, |
| "loss": 0.5317, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.19037871033776868, |
| "grad_norm": 0.5667037000506248, |
| "learning_rate": 1.5245901639344264e-05, |
| "loss": 0.5208, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.19242579324462641, |
| "grad_norm": 0.6528802562895782, |
| "learning_rate": 1.5409836065573772e-05, |
| "loss": 0.5538, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.19447287615148415, |
| "grad_norm": 0.6104791131678972, |
| "learning_rate": 1.5573770491803278e-05, |
| "loss": 0.4945, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.19651995905834185, |
| "grad_norm": 0.770297526442582, |
| "learning_rate": 1.5737704918032788e-05, |
| "loss": 0.5542, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1985670419651996, |
| "grad_norm": 0.639855439582837, |
| "learning_rate": 1.5901639344262295e-05, |
| "loss": 0.5604, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.20061412487205732, |
| "grad_norm": 0.6516313518294781, |
| "learning_rate": 1.6065573770491805e-05, |
| "loss": 0.492, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.20266120777891505, |
| "grad_norm": 0.7323248049827676, |
| "learning_rate": 1.6229508196721314e-05, |
| "loss": 0.5561, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2047082906857728, |
| "grad_norm": 0.614575250954447, |
| "learning_rate": 1.639344262295082e-05, |
| "loss": 0.5319, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2067553735926305, |
| "grad_norm": 0.6867103983793634, |
| "learning_rate": 1.655737704918033e-05, |
| "loss": 0.5087, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.20880245649948823, |
| "grad_norm": 0.5125936025327898, |
| "learning_rate": 1.6721311475409837e-05, |
| "loss": 0.5187, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.21084953940634596, |
| "grad_norm": 0.7791039154279353, |
| "learning_rate": 1.6885245901639347e-05, |
| "loss": 0.5831, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2128966223132037, |
| "grad_norm": 0.5610382412593208, |
| "learning_rate": 1.7049180327868854e-05, |
| "loss": 0.5979, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.21494370522006143, |
| "grad_norm": 0.6872141481595387, |
| "learning_rate": 1.721311475409836e-05, |
| "loss": 0.5337, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.21699078812691913, |
| "grad_norm": 0.621726895260148, |
| "learning_rate": 1.737704918032787e-05, |
| "loss": 0.5014, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.21903787103377687, |
| "grad_norm": 0.6593450481678657, |
| "learning_rate": 1.7540983606557377e-05, |
| "loss": 0.5298, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.2210849539406346, |
| "grad_norm": 0.6085504680048398, |
| "learning_rate": 1.7704918032786887e-05, |
| "loss": 0.5597, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.22313203684749233, |
| "grad_norm": 0.5818331349760811, |
| "learning_rate": 1.7868852459016393e-05, |
| "loss": 0.5671, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.22517911975435004, |
| "grad_norm": 0.5705507858388099, |
| "learning_rate": 1.8032786885245903e-05, |
| "loss": 0.5608, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.22722620266120777, |
| "grad_norm": 0.566454711636141, |
| "learning_rate": 1.8196721311475413e-05, |
| "loss": 0.536, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2292732855680655, |
| "grad_norm": 0.6584736492952961, |
| "learning_rate": 1.836065573770492e-05, |
| "loss": 0.5107, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.23132036847492324, |
| "grad_norm": 0.5431857503415616, |
| "learning_rate": 1.852459016393443e-05, |
| "loss": 0.5216, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.23336745138178097, |
| "grad_norm": 0.7429344164092465, |
| "learning_rate": 1.8688524590163936e-05, |
| "loss": 0.5673, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.23541453428863868, |
| "grad_norm": 0.6303478113244917, |
| "learning_rate": 1.8852459016393446e-05, |
| "loss": 0.5173, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2374616171954964, |
| "grad_norm": 0.6632339341308846, |
| "learning_rate": 1.9016393442622952e-05, |
| "loss": 0.526, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.23950870010235414, |
| "grad_norm": 0.7178085764934704, |
| "learning_rate": 1.918032786885246e-05, |
| "loss": 0.5501, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.24155578300921188, |
| "grad_norm": 0.7261199143030841, |
| "learning_rate": 1.934426229508197e-05, |
| "loss": 0.5564, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2436028659160696, |
| "grad_norm": 0.6467257123886485, |
| "learning_rate": 1.9508196721311475e-05, |
| "loss": 0.5307, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.24564994882292732, |
| "grad_norm": 0.7743559427761539, |
| "learning_rate": 1.9672131147540985e-05, |
| "loss": 0.4867, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.24769703172978505, |
| "grad_norm": 0.5777069325137312, |
| "learning_rate": 1.9836065573770492e-05, |
| "loss": 0.5235, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.24974411463664278, |
| "grad_norm": 0.7434807781935519, |
| "learning_rate": 2e-05, |
| "loss": 0.524, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2517911975435005, |
| "grad_norm": 0.6635906710416195, |
| "learning_rate": 2.0163934426229508e-05, |
| "loss": 0.4759, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2538382804503582, |
| "grad_norm": 0.6164296619684109, |
| "learning_rate": 2.0327868852459018e-05, |
| "loss": 0.4925, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.25588536335721596, |
| "grad_norm": 0.5800314323412163, |
| "learning_rate": 2.0491803278688525e-05, |
| "loss": 0.536, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2579324462640737, |
| "grad_norm": 0.6910504700298034, |
| "learning_rate": 2.0655737704918034e-05, |
| "loss": 0.5937, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2599795291709314, |
| "grad_norm": 0.5724201379088258, |
| "learning_rate": 2.081967213114754e-05, |
| "loss": 0.5019, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.26202661207778916, |
| "grad_norm": 0.7529480886936586, |
| "learning_rate": 2.098360655737705e-05, |
| "loss": 0.5643, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2640736949846469, |
| "grad_norm": 0.6170979184951091, |
| "learning_rate": 2.1147540983606557e-05, |
| "loss": 0.5449, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2661207778915046, |
| "grad_norm": 0.6541686452612759, |
| "learning_rate": 2.1311475409836067e-05, |
| "loss": 0.5377, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.26816786079836236, |
| "grad_norm": 0.7744689267947779, |
| "learning_rate": 2.1475409836065574e-05, |
| "loss": 0.5846, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.27021494370522003, |
| "grad_norm": 0.5945036689673261, |
| "learning_rate": 2.1639344262295087e-05, |
| "loss": 0.5283, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.27226202661207777, |
| "grad_norm": 0.661953998440321, |
| "learning_rate": 2.180327868852459e-05, |
| "loss": 0.557, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2743091095189355, |
| "grad_norm": 0.5763477769339282, |
| "learning_rate": 2.1967213114754104e-05, |
| "loss": 0.511, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.27635619242579323, |
| "grad_norm": 0.5514346276124723, |
| "learning_rate": 2.213114754098361e-05, |
| "loss": 0.5182, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.27840327533265097, |
| "grad_norm": 0.5695095404976926, |
| "learning_rate": 2.2295081967213113e-05, |
| "loss": 0.5146, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2804503582395087, |
| "grad_norm": 0.5986561008583392, |
| "learning_rate": 2.2459016393442626e-05, |
| "loss": 0.5083, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.28249744114636643, |
| "grad_norm": 0.5610678921247985, |
| "learning_rate": 2.2622950819672133e-05, |
| "loss": 0.5313, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.28454452405322417, |
| "grad_norm": 0.6049817345717909, |
| "learning_rate": 2.2786885245901643e-05, |
| "loss": 0.5052, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.2865916069600819, |
| "grad_norm": 0.5464694050777636, |
| "learning_rate": 2.295081967213115e-05, |
| "loss": 0.5371, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.28863868986693964, |
| "grad_norm": 0.6288012925738132, |
| "learning_rate": 2.311475409836066e-05, |
| "loss": 0.4964, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2906857727737973, |
| "grad_norm": 0.6292583801203022, |
| "learning_rate": 2.3278688524590166e-05, |
| "loss": 0.524, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.29273285568065505, |
| "grad_norm": 0.5989675634399306, |
| "learning_rate": 2.3442622950819676e-05, |
| "loss": 0.5347, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2947799385875128, |
| "grad_norm": 0.711475328293837, |
| "learning_rate": 2.3606557377049182e-05, |
| "loss": 0.5212, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2968270214943705, |
| "grad_norm": 0.6147945346885277, |
| "learning_rate": 2.3770491803278692e-05, |
| "loss": 0.536, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.29887410440122825, |
| "grad_norm": 0.7665801712563535, |
| "learning_rate": 2.39344262295082e-05, |
| "loss": 0.506, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.300921187308086, |
| "grad_norm": 0.5675791883302023, |
| "learning_rate": 2.4098360655737705e-05, |
| "loss": 0.5165, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.3029682702149437, |
| "grad_norm": 0.6749613199444409, |
| "learning_rate": 2.4262295081967215e-05, |
| "loss": 0.5353, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.30501535312180145, |
| "grad_norm": 0.5449017130362618, |
| "learning_rate": 2.442622950819672e-05, |
| "loss": 0.5378, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.3070624360286592, |
| "grad_norm": 0.6201210295020051, |
| "learning_rate": 2.459016393442623e-05, |
| "loss": 0.5094, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3091095189355169, |
| "grad_norm": 0.577151136698421, |
| "learning_rate": 2.4754098360655738e-05, |
| "loss": 0.5196, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3111566018423746, |
| "grad_norm": 0.5313033735643246, |
| "learning_rate": 2.4918032786885248e-05, |
| "loss": 0.4937, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3132036847492323, |
| "grad_norm": 0.5101509817549174, |
| "learning_rate": 2.5081967213114754e-05, |
| "loss": 0.4788, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.31525076765609006, |
| "grad_norm": 0.6444658936554309, |
| "learning_rate": 2.5245901639344264e-05, |
| "loss": 0.5275, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3172978505629478, |
| "grad_norm": 0.44154622012146943, |
| "learning_rate": 2.540983606557377e-05, |
| "loss": 0.4861, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3193449334698055, |
| "grad_norm": 0.608818705758696, |
| "learning_rate": 2.5573770491803284e-05, |
| "loss": 0.5071, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.32139201637666326, |
| "grad_norm": 0.616456605045568, |
| "learning_rate": 2.5737704918032787e-05, |
| "loss": 0.5195, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.323439099283521, |
| "grad_norm": 0.67099003512138, |
| "learning_rate": 2.59016393442623e-05, |
| "loss": 0.5523, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3254861821903787, |
| "grad_norm": 0.6144728851907352, |
| "learning_rate": 2.6065573770491807e-05, |
| "loss": 0.5451, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.32753326509723646, |
| "grad_norm": 0.6323429696875369, |
| "learning_rate": 2.622950819672131e-05, |
| "loss": 0.5309, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3295803480040942, |
| "grad_norm": 0.5465209069864494, |
| "learning_rate": 2.6393442622950824e-05, |
| "loss": 0.5342, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.33162743091095187, |
| "grad_norm": 0.7352587986716179, |
| "learning_rate": 2.655737704918033e-05, |
| "loss": 0.5181, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3336745138178096, |
| "grad_norm": 0.5130967329382123, |
| "learning_rate": 2.672131147540984e-05, |
| "loss": 0.5074, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.33572159672466734, |
| "grad_norm": 0.6870511656785425, |
| "learning_rate": 2.6885245901639346e-05, |
| "loss": 0.5603, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.33776867963152507, |
| "grad_norm": 0.7048410066657461, |
| "learning_rate": 2.7049180327868856e-05, |
| "loss": 0.5272, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3398157625383828, |
| "grad_norm": 0.8082743167202432, |
| "learning_rate": 2.7213114754098363e-05, |
| "loss": 0.5196, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.34186284544524054, |
| "grad_norm": 0.6901700346930703, |
| "learning_rate": 2.7377049180327873e-05, |
| "loss": 0.5439, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.34390992835209827, |
| "grad_norm": 0.6543817136595013, |
| "learning_rate": 2.754098360655738e-05, |
| "loss": 0.5375, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.345957011258956, |
| "grad_norm": 0.6015689406714214, |
| "learning_rate": 2.770491803278689e-05, |
| "loss": 0.5014, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.34800409416581374, |
| "grad_norm": 0.658713714501858, |
| "learning_rate": 2.7868852459016396e-05, |
| "loss": 0.5262, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3500511770726714, |
| "grad_norm": 0.5872370736842677, |
| "learning_rate": 2.8032786885245902e-05, |
| "loss": 0.5454, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.35209825997952915, |
| "grad_norm": 0.5434995669507285, |
| "learning_rate": 2.8196721311475412e-05, |
| "loss": 0.562, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3541453428863869, |
| "grad_norm": 0.7617505474169058, |
| "learning_rate": 2.836065573770492e-05, |
| "loss": 0.5147, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3561924257932446, |
| "grad_norm": 0.6940517866702995, |
| "learning_rate": 2.852459016393443e-05, |
| "loss": 0.561, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.35823950870010235, |
| "grad_norm": 0.6026045944898959, |
| "learning_rate": 2.8688524590163935e-05, |
| "loss": 0.5294, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.3602865916069601, |
| "grad_norm": 0.685003134185584, |
| "learning_rate": 2.8852459016393445e-05, |
| "loss": 0.5086, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.3623336745138178, |
| "grad_norm": 0.7007920129863516, |
| "learning_rate": 2.901639344262295e-05, |
| "loss": 0.5021, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.36438075742067555, |
| "grad_norm": 0.7355272145052204, |
| "learning_rate": 2.918032786885246e-05, |
| "loss": 0.5007, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3664278403275333, |
| "grad_norm": 0.6487776776109249, |
| "learning_rate": 2.9344262295081968e-05, |
| "loss": 0.5524, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.368474923234391, |
| "grad_norm": 0.6902433390057133, |
| "learning_rate": 2.9508196721311478e-05, |
| "loss": 0.5142, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3705220061412487, |
| "grad_norm": 0.6867948754438059, |
| "learning_rate": 2.9672131147540984e-05, |
| "loss": 0.5187, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3725690890481064, |
| "grad_norm": 0.6159722804543436, |
| "learning_rate": 2.9836065573770498e-05, |
| "loss": 0.5508, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.37461617195496416, |
| "grad_norm": 0.7696599736408124, |
| "learning_rate": 3.0000000000000004e-05, |
| "loss": 0.5348, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3766632548618219, |
| "grad_norm": 0.6954090228635682, |
| "learning_rate": 3.0163934426229507e-05, |
| "loss": 0.5447, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.37871033776867963, |
| "grad_norm": 0.6435121902381328, |
| "learning_rate": 3.032786885245902e-05, |
| "loss": 0.5474, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.38075742067553736, |
| "grad_norm": 0.8038288638214993, |
| "learning_rate": 3.0491803278688527e-05, |
| "loss": 0.5559, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3828045035823951, |
| "grad_norm": 0.688956722367706, |
| "learning_rate": 3.065573770491804e-05, |
| "loss": 0.4875, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.38485158648925283, |
| "grad_norm": 0.7159386414570686, |
| "learning_rate": 3.0819672131147544e-05, |
| "loss": 0.5499, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.38689866939611056, |
| "grad_norm": 0.9285947357940207, |
| "learning_rate": 3.098360655737705e-05, |
| "loss": 0.5144, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.3889457523029683, |
| "grad_norm": 0.6303078015541651, |
| "learning_rate": 3.1147540983606557e-05, |
| "loss": 0.6274, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.390992835209826, |
| "grad_norm": 0.8868869883770375, |
| "learning_rate": 3.131147540983607e-05, |
| "loss": 0.5261, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3930399181166837, |
| "grad_norm": 0.8238279905547772, |
| "learning_rate": 3.1475409836065576e-05, |
| "loss": 0.5284, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.39508700102354144, |
| "grad_norm": 0.7109735935815616, |
| "learning_rate": 3.163934426229509e-05, |
| "loss": 0.4981, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3971340839303992, |
| "grad_norm": 0.8555381934750876, |
| "learning_rate": 3.180327868852459e-05, |
| "loss": 0.5184, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.3991811668372569, |
| "grad_norm": 0.7950926694758862, |
| "learning_rate": 3.19672131147541e-05, |
| "loss": 0.5286, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.40122824974411464, |
| "grad_norm": 0.6893785135479137, |
| "learning_rate": 3.213114754098361e-05, |
| "loss": 0.5048, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.4032753326509724, |
| "grad_norm": 0.8285127939060417, |
| "learning_rate": 3.2295081967213116e-05, |
| "loss": 0.5696, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4053224155578301, |
| "grad_norm": 0.6476667354794096, |
| "learning_rate": 3.245901639344263e-05, |
| "loss": 0.5175, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.40736949846468784, |
| "grad_norm": 0.9133932587744947, |
| "learning_rate": 3.2622950819672136e-05, |
| "loss": 0.5426, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4094165813715456, |
| "grad_norm": 0.7532290387705938, |
| "learning_rate": 3.278688524590164e-05, |
| "loss": 0.5087, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.41146366427840325, |
| "grad_norm": 0.9389363029784095, |
| "learning_rate": 3.295081967213115e-05, |
| "loss": 0.509, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.413510747185261, |
| "grad_norm": 0.5686252927682621, |
| "learning_rate": 3.311475409836066e-05, |
| "loss": 0.5193, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4155578300921187, |
| "grad_norm": 0.8087812079712846, |
| "learning_rate": 3.327868852459017e-05, |
| "loss": 0.5197, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.41760491299897645, |
| "grad_norm": 0.4881097207653879, |
| "learning_rate": 3.3442622950819675e-05, |
| "loss": 0.5044, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4196519959058342, |
| "grad_norm": 0.7274067002262758, |
| "learning_rate": 3.360655737704918e-05, |
| "loss": 0.4974, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.4216990788126919, |
| "grad_norm": 0.5458714268885722, |
| "learning_rate": 3.3770491803278695e-05, |
| "loss": 0.5017, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.42374616171954965, |
| "grad_norm": 0.7035504070202385, |
| "learning_rate": 3.39344262295082e-05, |
| "loss": 0.4983, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.4257932446264074, |
| "grad_norm": 0.5269572768909501, |
| "learning_rate": 3.409836065573771e-05, |
| "loss": 0.5484, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4278403275332651, |
| "grad_norm": 0.5550435466705637, |
| "learning_rate": 3.4262295081967214e-05, |
| "loss": 0.5152, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.42988741044012285, |
| "grad_norm": 0.5658118678239209, |
| "learning_rate": 3.442622950819672e-05, |
| "loss": 0.529, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.43193449334698053, |
| "grad_norm": 0.4274306239416562, |
| "learning_rate": 3.4590163934426234e-05, |
| "loss": 0.547, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.43398157625383826, |
| "grad_norm": 0.6498291018145316, |
| "learning_rate": 3.475409836065574e-05, |
| "loss": 0.5255, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.436028659160696, |
| "grad_norm": 0.4862132754376562, |
| "learning_rate": 3.491803278688525e-05, |
| "loss": 0.5407, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.43807574206755373, |
| "grad_norm": 0.7345334765463367, |
| "learning_rate": 3.5081967213114754e-05, |
| "loss": 0.5467, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.44012282497441146, |
| "grad_norm": 0.6199950312514801, |
| "learning_rate": 3.524590163934427e-05, |
| "loss": 0.5599, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.4421699078812692, |
| "grad_norm": 0.7074691063675598, |
| "learning_rate": 3.5409836065573773e-05, |
| "loss": 0.5127, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.44421699078812693, |
| "grad_norm": 0.6229358703039948, |
| "learning_rate": 3.557377049180329e-05, |
| "loss": 0.5186, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.44626407369498466, |
| "grad_norm": 0.6395887726511317, |
| "learning_rate": 3.5737704918032786e-05, |
| "loss": 0.4989, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4483111566018424, |
| "grad_norm": 0.5458826332923155, |
| "learning_rate": 3.59016393442623e-05, |
| "loss": 0.5008, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4503582395087001, |
| "grad_norm": 0.6729015313127311, |
| "learning_rate": 3.6065573770491806e-05, |
| "loss": 0.5122, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.4524053224155578, |
| "grad_norm": 0.7752024606600724, |
| "learning_rate": 3.622950819672131e-05, |
| "loss": 0.5018, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.45445240532241554, |
| "grad_norm": 0.6141567713992134, |
| "learning_rate": 3.6393442622950826e-05, |
| "loss": 0.4921, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4564994882292733, |
| "grad_norm": 0.6807359145513986, |
| "learning_rate": 3.655737704918033e-05, |
| "loss": 0.5041, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.458546571136131, |
| "grad_norm": 0.5856826658124886, |
| "learning_rate": 3.672131147540984e-05, |
| "loss": 0.5281, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.46059365404298874, |
| "grad_norm": 0.6528386749398997, |
| "learning_rate": 3.6885245901639346e-05, |
| "loss": 0.4862, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4626407369498465, |
| "grad_norm": 0.8760949897414592, |
| "learning_rate": 3.704918032786886e-05, |
| "loss": 0.5313, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.4646878198567042, |
| "grad_norm": 0.7133493187489152, |
| "learning_rate": 3.7213114754098365e-05, |
| "loss": 0.5611, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.46673490276356194, |
| "grad_norm": 0.6514391258645618, |
| "learning_rate": 3.737704918032787e-05, |
| "loss": 0.5017, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4687819856704197, |
| "grad_norm": 0.712386378258888, |
| "learning_rate": 3.754098360655738e-05, |
| "loss": 0.4801, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.47082906857727735, |
| "grad_norm": 0.7933509165191774, |
| "learning_rate": 3.770491803278689e-05, |
| "loss": 0.5265, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4728761514841351, |
| "grad_norm": 0.6393959136262052, |
| "learning_rate": 3.78688524590164e-05, |
| "loss": 0.533, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.4749232343909928, |
| "grad_norm": 0.8336237173823177, |
| "learning_rate": 3.8032786885245905e-05, |
| "loss": 0.5067, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.47697031729785055, |
| "grad_norm": 0.7946625026406952, |
| "learning_rate": 3.819672131147541e-05, |
| "loss": 0.5572, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4790174002047083, |
| "grad_norm": 0.7448514333498657, |
| "learning_rate": 3.836065573770492e-05, |
| "loss": 0.5267, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.481064483111566, |
| "grad_norm": 0.6216449013018147, |
| "learning_rate": 3.852459016393443e-05, |
| "loss": 0.5007, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.48311156601842375, |
| "grad_norm": 0.5847497681971316, |
| "learning_rate": 3.868852459016394e-05, |
| "loss": 0.5465, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.4851586489252815, |
| "grad_norm": 0.5930045841712915, |
| "learning_rate": 3.8852459016393444e-05, |
| "loss": 0.5498, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4872057318321392, |
| "grad_norm": 0.6242247074949386, |
| "learning_rate": 3.901639344262295e-05, |
| "loss": 0.5032, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.48925281473899696, |
| "grad_norm": 0.5315372089751544, |
| "learning_rate": 3.9180327868852464e-05, |
| "loss": 0.5351, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.49129989764585463, |
| "grad_norm": 0.5976493830078852, |
| "learning_rate": 3.934426229508197e-05, |
| "loss": 0.5511, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.49334698055271237, |
| "grad_norm": 0.5263959833035169, |
| "learning_rate": 3.950819672131148e-05, |
| "loss": 0.5095, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4953940634595701, |
| "grad_norm": 0.5164961935996712, |
| "learning_rate": 3.9672131147540983e-05, |
| "loss": 0.4903, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.49744114636642783, |
| "grad_norm": 0.5030461231666816, |
| "learning_rate": 3.98360655737705e-05, |
| "loss": 0.5246, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.49948822927328557, |
| "grad_norm": 0.5199256237042372, |
| "learning_rate": 4e-05, |
| "loss": 0.527, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5015353121801432, |
| "grad_norm": 0.5720072000502151, |
| "learning_rate": 3.999997953390434e-05, |
| "loss": 0.4698, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.503582395087001, |
| "grad_norm": 0.4723958838410465, |
| "learning_rate": 3.999991813565924e-05, |
| "loss": 0.5021, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5056294779938587, |
| "grad_norm": 0.6664032925560375, |
| "learning_rate": 3.999981580539036e-05, |
| "loss": 0.5195, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5076765609007164, |
| "grad_norm": 0.46934376217932583, |
| "learning_rate": 3.999967254330713e-05, |
| "loss": 0.4915, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5097236438075742, |
| "grad_norm": 0.5471916125552302, |
| "learning_rate": 3.999948834970275e-05, |
| "loss": 0.5395, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5117707267144319, |
| "grad_norm": 0.5322293661429813, |
| "learning_rate": 3.9999263224954204e-05, |
| "loss": 0.5156, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5138178096212896, |
| "grad_norm": 0.48934414004740173, |
| "learning_rate": 3.999899716952221e-05, |
| "loss": 0.505, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5158648925281474, |
| "grad_norm": 0.6841239054987143, |
| "learning_rate": 3.9998690183951304e-05, |
| "loss": 0.517, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5179119754350051, |
| "grad_norm": 0.6081072200654224, |
| "learning_rate": 3.999834226886976e-05, |
| "loss": 0.5209, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5199590583418628, |
| "grad_norm": 0.591682811543655, |
| "learning_rate": 3.999795342498961e-05, |
| "loss": 0.5144, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5220061412487206, |
| "grad_norm": 0.644975243350573, |
| "learning_rate": 3.999752365310668e-05, |
| "loss": 0.5285, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5240532241555783, |
| "grad_norm": 0.5648625378625047, |
| "learning_rate": 3.999705295410054e-05, |
| "loss": 0.493, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.526100307062436, |
| "grad_norm": 0.6130167811037579, |
| "learning_rate": 3.999654132893453e-05, |
| "loss": 0.5257, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5281473899692938, |
| "grad_norm": 0.5037937329537826, |
| "learning_rate": 3.999598877865575e-05, |
| "loss": 0.4947, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5301944728761515, |
| "grad_norm": 0.6388452684007601, |
| "learning_rate": 3.999539530439504e-05, |
| "loss": 0.5319, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5322415557830092, |
| "grad_norm": 0.5304888345319132, |
| "learning_rate": 3.9994760907367025e-05, |
| "loss": 0.5239, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.534288638689867, |
| "grad_norm": 0.548729152916564, |
| "learning_rate": 3.999408558887006e-05, |
| "loss": 0.5182, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5363357215967247, |
| "grad_norm": 0.4946265440615839, |
| "learning_rate": 3.9993369350286265e-05, |
| "loss": 0.5211, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5383828045035824, |
| "grad_norm": 0.5513131572470374, |
| "learning_rate": 3.999261219308149e-05, |
| "loss": 0.4922, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5404298874104401, |
| "grad_norm": 0.5628821503706624, |
| "learning_rate": 3.999181411880536e-05, |
| "loss": 0.4833, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5424769703172978, |
| "grad_norm": 0.6321595406631201, |
| "learning_rate": 3.99909751290912e-05, |
| "loss": 0.5156, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5445240532241555, |
| "grad_norm": 0.49452183826279106, |
| "learning_rate": 3.9990095225656104e-05, |
| "loss": 0.4918, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5465711361310133, |
| "grad_norm": 0.5983835992691791, |
| "learning_rate": 3.998917441030089e-05, |
| "loss": 0.534, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.548618219037871, |
| "grad_norm": 0.5392668568126767, |
| "learning_rate": 3.9988212684910107e-05, |
| "loss": 0.4919, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5506653019447287, |
| "grad_norm": 0.5271122702751097, |
| "learning_rate": 3.998721005145204e-05, |
| "loss": 0.5023, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5527123848515865, |
| "grad_norm": 0.613359179011921, |
| "learning_rate": 3.998616651197867e-05, |
| "loss": 0.5123, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5547594677584442, |
| "grad_norm": 0.5885866628158287, |
| "learning_rate": 3.9985082068625724e-05, |
| "loss": 0.5192, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5568065506653019, |
| "grad_norm": 0.5458709549332362, |
| "learning_rate": 3.998395672361264e-05, |
| "loss": 0.5159, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5588536335721597, |
| "grad_norm": 0.47718089897345783, |
| "learning_rate": 3.998279047924255e-05, |
| "loss": 0.4571, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5609007164790174, |
| "grad_norm": 0.5918739989559765, |
| "learning_rate": 3.998158333790231e-05, |
| "loss": 0.5093, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5629477993858751, |
| "grad_norm": 0.6171028421078789, |
| "learning_rate": 3.998033530206246e-05, |
| "loss": 0.5174, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5649948822927329, |
| "grad_norm": 0.6049929559700232, |
| "learning_rate": 3.9979046374277246e-05, |
| "loss": 0.5292, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5670419651995906, |
| "grad_norm": 0.6353516580703451, |
| "learning_rate": 3.99777165571846e-05, |
| "loss": 0.5202, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5690890481064483, |
| "grad_norm": 0.542126012081921, |
| "learning_rate": 3.997634585350614e-05, |
| "loss": 0.515, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5711361310133061, |
| "grad_norm": 0.6577209989909311, |
| "learning_rate": 3.997493426604715e-05, |
| "loss": 0.4827, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5731832139201638, |
| "grad_norm": 0.5786055028419322, |
| "learning_rate": 3.997348179769661e-05, |
| "loss": 0.4984, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5752302968270215, |
| "grad_norm": 0.5693454757360409, |
| "learning_rate": 3.9971988451427155e-05, |
| "loss": 0.4795, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5772773797338793, |
| "grad_norm": 0.523219696074873, |
| "learning_rate": 3.997045423029508e-05, |
| "loss": 0.5288, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.579324462640737, |
| "grad_norm": 0.5914665747076296, |
| "learning_rate": 3.996887913744033e-05, |
| "loss": 0.5412, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5813715455475946, |
| "grad_norm": 0.504747034534071, |
| "learning_rate": 3.996726317608652e-05, |
| "loss": 0.5119, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5834186284544524, |
| "grad_norm": 0.6330512274369553, |
| "learning_rate": 3.996560634954088e-05, |
| "loss": 0.5504, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5854657113613101, |
| "grad_norm": 0.5570244494354821, |
| "learning_rate": 3.9963908661194285e-05, |
| "loss": 0.5323, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5875127942681678, |
| "grad_norm": 0.7110270240629256, |
| "learning_rate": 3.9962170114521246e-05, |
| "loss": 0.5086, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5895598771750256, |
| "grad_norm": 0.5503143307537267, |
| "learning_rate": 3.996039071307989e-05, |
| "loss": 0.5451, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5916069600818833, |
| "grad_norm": 0.6551750805922822, |
| "learning_rate": 3.995857046051196e-05, |
| "loss": 0.5375, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.593654042988741, |
| "grad_norm": 0.625258001738531, |
| "learning_rate": 3.995670936054279e-05, |
| "loss": 0.5241, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5957011258955988, |
| "grad_norm": 0.6762376840884802, |
| "learning_rate": 3.9954807416981335e-05, |
| "loss": 0.5049, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5977482088024565, |
| "grad_norm": 0.6711303804476403, |
| "learning_rate": 3.995286463372013e-05, |
| "loss": 0.5117, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5997952917093142, |
| "grad_norm": 0.5151264552712931, |
| "learning_rate": 3.9950881014735295e-05, |
| "loss": 0.5053, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.601842374616172, |
| "grad_norm": 0.663266741433607, |
| "learning_rate": 3.994885656408651e-05, |
| "loss": 0.4948, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6038894575230297, |
| "grad_norm": 0.567833717196775, |
| "learning_rate": 3.994679128591706e-05, |
| "loss": 0.5566, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6059365404298874, |
| "grad_norm": 0.5899154422863364, |
| "learning_rate": 3.9944685184453746e-05, |
| "loss": 0.53, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6079836233367452, |
| "grad_norm": 0.5758493539965378, |
| "learning_rate": 3.994253826400693e-05, |
| "loss": 0.5314, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6100307062436029, |
| "grad_norm": 0.5296826695926912, |
| "learning_rate": 3.9940350528970535e-05, |
| "loss": 0.5116, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6120777891504606, |
| "grad_norm": 0.5290847709355387, |
| "learning_rate": 3.993812198382199e-05, |
| "loss": 0.5028, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6141248720573184, |
| "grad_norm": 0.552336105606733, |
| "learning_rate": 3.993585263312227e-05, |
| "loss": 0.5202, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6161719549641761, |
| "grad_norm": 0.5293716014410816, |
| "learning_rate": 3.993354248151583e-05, |
| "loss": 0.4912, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6182190378710338, |
| "grad_norm": 0.6609476283232325, |
| "learning_rate": 3.993119153373067e-05, |
| "loss": 0.5438, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6202661207778914, |
| "grad_norm": 0.5469203904739622, |
| "learning_rate": 3.992879979457824e-05, |
| "loss": 0.5123, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6223132036847492, |
| "grad_norm": 0.6036405520109454, |
| "learning_rate": 3.9926367268953514e-05, |
| "loss": 0.5047, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6243602865916069, |
| "grad_norm": 0.5523585337922061, |
| "learning_rate": 3.9923893961834914e-05, |
| "loss": 0.5126, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6264073694984647, |
| "grad_norm": 0.5060340859748851, |
| "learning_rate": 3.992137987828434e-05, |
| "loss": 0.512, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6284544524053224, |
| "grad_norm": 0.4931821446334137, |
| "learning_rate": 3.991882502344712e-05, |
| "loss": 0.5086, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6305015353121801, |
| "grad_norm": 0.49318765086607474, |
| "learning_rate": 3.991622940255208e-05, |
| "loss": 0.4928, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6325486182190379, |
| "grad_norm": 0.47444503281787836, |
| "learning_rate": 3.991359302091141e-05, |
| "loss": 0.4823, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6345957011258956, |
| "grad_norm": 0.5837217244432208, |
| "learning_rate": 3.991091588392077e-05, |
| "loss": 0.553, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6366427840327533, |
| "grad_norm": 0.5943267226346076, |
| "learning_rate": 3.99081979970592e-05, |
| "loss": 0.5191, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.638689866939611, |
| "grad_norm": 0.4692619037725303, |
| "learning_rate": 3.9905439365889176e-05, |
| "loss": 0.4833, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6407369498464688, |
| "grad_norm": 0.5017562232149083, |
| "learning_rate": 3.990263999605652e-05, |
| "loss": 0.4932, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.6427840327533265, |
| "grad_norm": 0.4800449124898955, |
| "learning_rate": 3.989979989329046e-05, |
| "loss": 0.5475, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6448311156601843, |
| "grad_norm": 0.7077261651500286, |
| "learning_rate": 3.9896919063403567e-05, |
| "loss": 0.5656, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.646878198567042, |
| "grad_norm": 0.4705479904386771, |
| "learning_rate": 3.989399751229179e-05, |
| "loss": 0.4812, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6489252814738997, |
| "grad_norm": 0.6188237485091818, |
| "learning_rate": 3.989103524593439e-05, |
| "loss": 0.5448, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6509723643807575, |
| "grad_norm": 0.5875009833381106, |
| "learning_rate": 3.9888032270393966e-05, |
| "loss": 0.5391, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6530194472876152, |
| "grad_norm": 0.6367560756626307, |
| "learning_rate": 3.988498859181645e-05, |
| "loss": 0.5857, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6550665301944729, |
| "grad_norm": 0.485432763118475, |
| "learning_rate": 3.988190421643105e-05, |
| "loss": 0.4775, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6571136131013307, |
| "grad_norm": 0.6730462808178248, |
| "learning_rate": 3.9878779150550306e-05, |
| "loss": 0.4953, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.6591606960081884, |
| "grad_norm": 0.6111893546543505, |
| "learning_rate": 3.9875613400569975e-05, |
| "loss": 0.4593, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.661207778915046, |
| "grad_norm": 0.7191482927871957, |
| "learning_rate": 3.987240697296912e-05, |
| "loss": 0.4943, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6632548618219037, |
| "grad_norm": 0.6827606638555512, |
| "learning_rate": 3.986915987431006e-05, |
| "loss": 0.5312, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6653019447287615, |
| "grad_norm": 0.6385480452392281, |
| "learning_rate": 3.986587211123833e-05, |
| "loss": 0.5066, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6673490276356192, |
| "grad_norm": 0.5586591391727767, |
| "learning_rate": 3.986254369048268e-05, |
| "loss": 0.519, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6693961105424769, |
| "grad_norm": 0.7119031753948682, |
| "learning_rate": 3.985917461885512e-05, |
| "loss": 0.526, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6714431934493347, |
| "grad_norm": 0.403690119122138, |
| "learning_rate": 3.98557649032508e-05, |
| "loss": 0.4941, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.6734902763561924, |
| "grad_norm": 0.711228959963807, |
| "learning_rate": 3.985231455064809e-05, |
| "loss": 0.5161, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6755373592630501, |
| "grad_norm": 0.6152056684018806, |
| "learning_rate": 3.9848823568108515e-05, |
| "loss": 0.5252, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6775844421699079, |
| "grad_norm": 0.6811387482564264, |
| "learning_rate": 3.984529196277674e-05, |
| "loss": 0.5153, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.6796315250767656, |
| "grad_norm": 0.715472760713951, |
| "learning_rate": 3.9841719741880583e-05, |
| "loss": 0.5136, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.6816786079836233, |
| "grad_norm": 0.6117081033404244, |
| "learning_rate": 3.9838106912731e-05, |
| "loss": 0.5056, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.6837256908904811, |
| "grad_norm": 0.5640133468287892, |
| "learning_rate": 3.983445348272203e-05, |
| "loss": 0.5022, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6857727737973388, |
| "grad_norm": 0.5708874168642846, |
| "learning_rate": 3.983075945933083e-05, |
| "loss": 0.5063, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6878198567041965, |
| "grad_norm": 0.5178591256000558, |
| "learning_rate": 3.9827024850117606e-05, |
| "loss": 0.5067, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6898669396110543, |
| "grad_norm": 0.5425146505052523, |
| "learning_rate": 3.982324966272566e-05, |
| "loss": 0.5112, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.691914022517912, |
| "grad_norm": 0.4545857800127313, |
| "learning_rate": 3.9819433904881324e-05, |
| "loss": 0.4987, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.6939611054247697, |
| "grad_norm": 0.5538231712121938, |
| "learning_rate": 3.981557758439396e-05, |
| "loss": 0.5174, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6960081883316275, |
| "grad_norm": 0.516910525115318, |
| "learning_rate": 3.981168070915594e-05, |
| "loss": 0.5205, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6980552712384852, |
| "grad_norm": 0.5341261157883666, |
| "learning_rate": 3.980774328714267e-05, |
| "loss": 0.5406, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7001023541453428, |
| "grad_norm": 0.5219326420859508, |
| "learning_rate": 3.9803765326412506e-05, |
| "loss": 0.5491, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7021494370522006, |
| "grad_norm": 0.5126045854291318, |
| "learning_rate": 3.979974683510677e-05, |
| "loss": 0.5507, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7041965199590583, |
| "grad_norm": 0.5442853681992302, |
| "learning_rate": 3.9795687821449754e-05, |
| "loss": 0.5136, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.706243602865916, |
| "grad_norm": 0.4580135182917517, |
| "learning_rate": 3.9791588293748676e-05, |
| "loss": 0.4908, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7082906857727738, |
| "grad_norm": 0.5632074896697251, |
| "learning_rate": 3.978744826039366e-05, |
| "loss": 0.5046, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7103377686796315, |
| "grad_norm": 0.4381200556890962, |
| "learning_rate": 3.9783267729857756e-05, |
| "loss": 0.4994, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7123848515864892, |
| "grad_norm": 0.5608491796041225, |
| "learning_rate": 3.9779046710696854e-05, |
| "loss": 0.4813, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.714431934493347, |
| "grad_norm": 0.4726444333140026, |
| "learning_rate": 3.977478521154974e-05, |
| "loss": 0.487, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7164790174002047, |
| "grad_norm": 0.5138973186415984, |
| "learning_rate": 3.977048324113805e-05, |
| "loss": 0.5418, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7185261003070624, |
| "grad_norm": 0.48636953697879376, |
| "learning_rate": 3.976614080826623e-05, |
| "loss": 0.5313, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7205731832139202, |
| "grad_norm": 0.5627556793214199, |
| "learning_rate": 3.9761757921821544e-05, |
| "loss": 0.5136, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7226202661207779, |
| "grad_norm": 0.42092677165552256, |
| "learning_rate": 3.975733459077405e-05, |
| "loss": 0.5396, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.7246673490276356, |
| "grad_norm": 0.48567860686645814, |
| "learning_rate": 3.9752870824176585e-05, |
| "loss": 0.4912, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7267144319344934, |
| "grad_norm": 0.48215074888552417, |
| "learning_rate": 3.974836663116472e-05, |
| "loss": 0.5246, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.7287615148413511, |
| "grad_norm": 0.4041796943755226, |
| "learning_rate": 3.97438220209568e-05, |
| "loss": 0.4697, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7308085977482088, |
| "grad_norm": 0.5033023757557246, |
| "learning_rate": 3.973923700285386e-05, |
| "loss": 0.5033, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7328556806550666, |
| "grad_norm": 0.47849562907550447, |
| "learning_rate": 3.973461158623963e-05, |
| "loss": 0.51, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.7349027635619243, |
| "grad_norm": 0.4728712624318383, |
| "learning_rate": 3.972994578058055e-05, |
| "loss": 0.5183, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.736949846468782, |
| "grad_norm": 0.46994907616226134, |
| "learning_rate": 3.972523959542569e-05, |
| "loss": 0.4791, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7389969293756398, |
| "grad_norm": 0.4716759611852859, |
| "learning_rate": 3.9720493040406786e-05, |
| "loss": 0.5053, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7410440122824974, |
| "grad_norm": 0.4739511859859272, |
| "learning_rate": 3.9715706125238164e-05, |
| "loss": 0.4902, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.7430910951893551, |
| "grad_norm": 0.5524351394525074, |
| "learning_rate": 3.971087885971679e-05, |
| "loss": 0.5002, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7451381780962129, |
| "grad_norm": 0.5160577267882994, |
| "learning_rate": 3.970601125372218e-05, |
| "loss": 0.5077, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7471852610030706, |
| "grad_norm": 0.6036728391773393, |
| "learning_rate": 3.970110331721643e-05, |
| "loss": 0.5152, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7492323439099283, |
| "grad_norm": 0.44920724019503216, |
| "learning_rate": 3.9696155060244166e-05, |
| "loss": 0.5016, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7512794268167861, |
| "grad_norm": 0.5748889938625247, |
| "learning_rate": 3.9691166492932535e-05, |
| "loss": 0.5484, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7533265097236438, |
| "grad_norm": 0.456775282424986, |
| "learning_rate": 3.968613762549119e-05, |
| "loss": 0.4839, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7553735926305015, |
| "grad_norm": 0.5722901317947914, |
| "learning_rate": 3.968106846821226e-05, |
| "loss": 0.4961, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7574206755373593, |
| "grad_norm": 0.513431808850321, |
| "learning_rate": 3.9675959031470336e-05, |
| "loss": 0.5347, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.759467758444217, |
| "grad_norm": 0.4971842267477506, |
| "learning_rate": 3.9670809325722425e-05, |
| "loss": 0.5025, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7615148413510747, |
| "grad_norm": 0.5489460962549015, |
| "learning_rate": 3.966561936150797e-05, |
| "loss": 0.527, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7635619242579325, |
| "grad_norm": 0.46580059626935816, |
| "learning_rate": 3.966038914944881e-05, |
| "loss": 0.5024, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7656090071647902, |
| "grad_norm": 0.48329497303274316, |
| "learning_rate": 3.9655118700249146e-05, |
| "loss": 0.4543, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.7676560900716479, |
| "grad_norm": 0.4294347901743354, |
| "learning_rate": 3.964980802469552e-05, |
| "loss": 0.4918, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7697031729785057, |
| "grad_norm": 0.46898699035993047, |
| "learning_rate": 3.964445713365682e-05, |
| "loss": 0.5219, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.7717502558853634, |
| "grad_norm": 0.5368649938570568, |
| "learning_rate": 3.963906603808422e-05, |
| "loss": 0.5491, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.7737973387922211, |
| "grad_norm": 0.4589070525461554, |
| "learning_rate": 3.96336347490112e-05, |
| "loss": 0.5193, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.7758444216990789, |
| "grad_norm": 0.5198618447541961, |
| "learning_rate": 3.9628163277553486e-05, |
| "loss": 0.5059, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.7778915046059366, |
| "grad_norm": 0.5201151329391878, |
| "learning_rate": 3.962265163490903e-05, |
| "loss": 0.5242, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7799385875127943, |
| "grad_norm": 0.5286097719017859, |
| "learning_rate": 3.9617099832358035e-05, |
| "loss": 0.5131, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.781985670419652, |
| "grad_norm": 0.4908828849732809, |
| "learning_rate": 3.961150788126286e-05, |
| "loss": 0.5247, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.7840327533265097, |
| "grad_norm": 0.5105242176184168, |
| "learning_rate": 3.960587579306805e-05, |
| "loss": 0.5082, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.7860798362333674, |
| "grad_norm": 0.4827529985137035, |
| "learning_rate": 3.960020357930028e-05, |
| "loss": 0.4886, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7881269191402251, |
| "grad_norm": 0.5255080063950176, |
| "learning_rate": 3.9594491251568376e-05, |
| "loss": 0.5323, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.7901740020470829, |
| "grad_norm": 0.42324786024329186, |
| "learning_rate": 3.958873882156322e-05, |
| "loss": 0.4913, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.7922210849539406, |
| "grad_norm": 0.4856526176027495, |
| "learning_rate": 3.9582946301057806e-05, |
| "loss": 0.5083, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7942681678607983, |
| "grad_norm": 0.4869309440084628, |
| "learning_rate": 3.957711370190716e-05, |
| "loss": 0.503, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.7963152507676561, |
| "grad_norm": 0.5268029487052006, |
| "learning_rate": 3.957124103604833e-05, |
| "loss": 0.5082, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.7983623336745138, |
| "grad_norm": 0.507634587065165, |
| "learning_rate": 3.9565328315500375e-05, |
| "loss": 0.5166, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8004094165813715, |
| "grad_norm": 0.5687757643916819, |
| "learning_rate": 3.9559375552364325e-05, |
| "loss": 0.5143, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8024564994882293, |
| "grad_norm": 0.4695087735006217, |
| "learning_rate": 3.955338275882316e-05, |
| "loss": 0.4713, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.804503582395087, |
| "grad_norm": 0.46001610899852224, |
| "learning_rate": 3.9547349947141787e-05, |
| "loss": 0.4701, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8065506653019447, |
| "grad_norm": 0.4942091923144858, |
| "learning_rate": 3.954127712966702e-05, |
| "loss": 0.4916, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8085977482088025, |
| "grad_norm": 0.4750963479134321, |
| "learning_rate": 3.953516431882754e-05, |
| "loss": 0.535, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8106448311156602, |
| "grad_norm": 0.45303577566833647, |
| "learning_rate": 3.952901152713389e-05, |
| "loss": 0.4756, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.812691914022518, |
| "grad_norm": 0.45075457713091993, |
| "learning_rate": 3.952281876717843e-05, |
| "loss": 0.5013, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.8147389969293757, |
| "grad_norm": 0.4866908528841161, |
| "learning_rate": 3.951658605163533e-05, |
| "loss": 0.5159, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.8167860798362334, |
| "grad_norm": 0.47273974659050305, |
| "learning_rate": 3.9510313393260507e-05, |
| "loss": 0.4876, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8188331627430911, |
| "grad_norm": 0.4599999323212464, |
| "learning_rate": 3.950400080489165e-05, |
| "loss": 0.486, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8208802456499488, |
| "grad_norm": 0.41263568282343643, |
| "learning_rate": 3.9497648299448174e-05, |
| "loss": 0.6514, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.8229273285568065, |
| "grad_norm": 0.49637310910399085, |
| "learning_rate": 3.949125588993117e-05, |
| "loss": 0.5247, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.8249744114636642, |
| "grad_norm": 0.407642872768924, |
| "learning_rate": 3.94848235894234e-05, |
| "loss": 0.5023, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.827021494370522, |
| "grad_norm": 0.5125499802657086, |
| "learning_rate": 3.947835141108928e-05, |
| "loss": 0.5187, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.8290685772773797, |
| "grad_norm": 0.46889549178993806, |
| "learning_rate": 3.947183936817483e-05, |
| "loss": 0.5089, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.8311156601842374, |
| "grad_norm": 0.4793952039316711, |
| "learning_rate": 3.9465287474007654e-05, |
| "loss": 0.4946, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.8331627430910952, |
| "grad_norm": 0.4616324812446371, |
| "learning_rate": 3.945869574199693e-05, |
| "loss": 0.4905, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.8352098259979529, |
| "grad_norm": 0.522723518377932, |
| "learning_rate": 3.9452064185633345e-05, |
| "loss": 0.4873, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.8372569089048106, |
| "grad_norm": 0.4444650907824343, |
| "learning_rate": 3.944539281848912e-05, |
| "loss": 0.4995, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.8393039918116684, |
| "grad_norm": 0.5394910784111964, |
| "learning_rate": 3.943868165421793e-05, |
| "loss": 0.4597, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8413510747185261, |
| "grad_norm": 0.47150833451005714, |
| "learning_rate": 3.943193070655492e-05, |
| "loss": 0.4768, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.8433981576253838, |
| "grad_norm": 0.5217385743079497, |
| "learning_rate": 3.942513998931663e-05, |
| "loss": 0.4936, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.8454452405322416, |
| "grad_norm": 0.47838287732742774, |
| "learning_rate": 3.9418309516401015e-05, |
| "loss": 0.4998, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8474923234390993, |
| "grad_norm": 0.5219985412359689, |
| "learning_rate": 3.9411439301787383e-05, |
| "loss": 0.4922, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.849539406345957, |
| "grad_norm": 0.5360559638934609, |
| "learning_rate": 3.940452935953639e-05, |
| "loss": 0.4932, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8515864892528148, |
| "grad_norm": 0.45167095248901046, |
| "learning_rate": 3.939757970378997e-05, |
| "loss": 0.5325, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8536335721596725, |
| "grad_norm": 0.5392146495002762, |
| "learning_rate": 3.9390590348771374e-05, |
| "loss": 0.5161, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8556806550665302, |
| "grad_norm": 0.4694690399172554, |
| "learning_rate": 3.9383561308785075e-05, |
| "loss": 0.4872, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.857727737973388, |
| "grad_norm": 0.5450902044992034, |
| "learning_rate": 3.937649259821677e-05, |
| "loss": 0.5033, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.8597748208802457, |
| "grad_norm": 0.4513747246745581, |
| "learning_rate": 3.9369384231533365e-05, |
| "loss": 0.5022, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8618219037871033, |
| "grad_norm": 0.47998170745321456, |
| "learning_rate": 3.9362236223282885e-05, |
| "loss": 0.488, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.8638689866939611, |
| "grad_norm": 0.5703617109539353, |
| "learning_rate": 3.935504858809454e-05, |
| "loss": 0.5364, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.8659160696008188, |
| "grad_norm": 0.46673348103204154, |
| "learning_rate": 3.9347821340678597e-05, |
| "loss": 0.519, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8679631525076765, |
| "grad_norm": 0.5424695733970012, |
| "learning_rate": 3.934055449582641e-05, |
| "loss": 0.5568, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.8700102354145343, |
| "grad_norm": 0.4765444623240975, |
| "learning_rate": 3.9333248068410375e-05, |
| "loss": 0.4743, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.872057318321392, |
| "grad_norm": 0.49864798719594966, |
| "learning_rate": 3.932590207338391e-05, |
| "loss": 0.4982, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.8741044012282497, |
| "grad_norm": 0.4988338996845499, |
| "learning_rate": 3.931851652578137e-05, |
| "loss": 0.4963, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.8761514841351075, |
| "grad_norm": 0.4432676805703767, |
| "learning_rate": 3.931109144071811e-05, |
| "loss": 0.4719, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.8781985670419652, |
| "grad_norm": 0.48146006351463866, |
| "learning_rate": 3.930362683339037e-05, |
| "loss": 0.4863, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.8802456499488229, |
| "grad_norm": 0.44888291145823134, |
| "learning_rate": 3.92961227190753e-05, |
| "loss": 0.5167, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8822927328556807, |
| "grad_norm": 0.4602238267927309, |
| "learning_rate": 3.928857911313088e-05, |
| "loss": 0.5031, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.8843398157625384, |
| "grad_norm": 0.49233832361099084, |
| "learning_rate": 3.928099603099591e-05, |
| "loss": 0.5013, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.8863868986693961, |
| "grad_norm": 0.4576541292218355, |
| "learning_rate": 3.9273373488190036e-05, |
| "loss": 0.5574, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.8884339815762539, |
| "grad_norm": 0.4638871631773895, |
| "learning_rate": 3.92657115003136e-05, |
| "loss": 0.5253, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.8904810644831116, |
| "grad_norm": 0.5178780585024065, |
| "learning_rate": 3.9258010083047715e-05, |
| "loss": 0.5485, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8925281473899693, |
| "grad_norm": 0.4598091842235503, |
| "learning_rate": 3.925026925215417e-05, |
| "loss": 0.4636, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.8945752302968271, |
| "grad_norm": 0.5965207792237605, |
| "learning_rate": 3.924248902347541e-05, |
| "loss": 0.5464, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.8966223132036848, |
| "grad_norm": 0.49770158650370516, |
| "learning_rate": 3.9234669412934546e-05, |
| "loss": 0.5461, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8986693961105425, |
| "grad_norm": 0.5031859956447141, |
| "learning_rate": 3.922681043653526e-05, |
| "loss": 0.5146, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.9007164790174002, |
| "grad_norm": 0.4561558037016599, |
| "learning_rate": 3.92189121103618e-05, |
| "loss": 0.4898, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9027635619242579, |
| "grad_norm": 0.4247526477385047, |
| "learning_rate": 3.921097445057896e-05, |
| "loss": 0.4976, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.9048106448311156, |
| "grad_norm": 0.523620896917421, |
| "learning_rate": 3.920299747343204e-05, |
| "loss": 0.4894, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9068577277379734, |
| "grad_norm": 0.4267971609347695, |
| "learning_rate": 3.919498119524679e-05, |
| "loss": 0.5029, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.9089048106448311, |
| "grad_norm": 0.5817422656960313, |
| "learning_rate": 3.9186925632429396e-05, |
| "loss": 0.5477, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.9109518935516888, |
| "grad_norm": 0.3812610872657855, |
| "learning_rate": 3.9178830801466465e-05, |
| "loss": 0.5147, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.9129989764585466, |
| "grad_norm": 0.5210417707070376, |
| "learning_rate": 3.917069671892494e-05, |
| "loss": 0.5052, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.9150460593654043, |
| "grad_norm": 0.4195818289044568, |
| "learning_rate": 3.9162523401452125e-05, |
| "loss": 0.476, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.917093142272262, |
| "grad_norm": 0.4640561681219655, |
| "learning_rate": 3.915431086577561e-05, |
| "loss": 0.4811, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.9191402251791198, |
| "grad_norm": 0.46049336960827775, |
| "learning_rate": 3.914605912870324e-05, |
| "loss": 0.5082, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.9211873080859775, |
| "grad_norm": 0.5244765933281279, |
| "learning_rate": 3.913776820712309e-05, |
| "loss": 0.4982, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9232343909928352, |
| "grad_norm": 0.4207300716892893, |
| "learning_rate": 3.912943811800347e-05, |
| "loss": 0.477, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.925281473899693, |
| "grad_norm": 0.4532179957617135, |
| "learning_rate": 3.912106887839278e-05, |
| "loss": 0.4953, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.9273285568065507, |
| "grad_norm": 0.42360404758505654, |
| "learning_rate": 3.9112660505419626e-05, |
| "loss": 0.5248, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.9293756397134084, |
| "grad_norm": 0.4345837890396033, |
| "learning_rate": 3.910421301629264e-05, |
| "loss": 0.4887, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.9314227226202662, |
| "grad_norm": 0.43091075968385034, |
| "learning_rate": 3.909572642830053e-05, |
| "loss": 0.4987, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.9334698055271239, |
| "grad_norm": 0.4111596649311637, |
| "learning_rate": 3.9087200758812054e-05, |
| "loss": 0.5429, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.9355168884339816, |
| "grad_norm": 0.4114767207964879, |
| "learning_rate": 3.9078636025275904e-05, |
| "loss": 0.4868, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.9375639713408394, |
| "grad_norm": 0.4511575697019446, |
| "learning_rate": 3.907003224522075e-05, |
| "loss": 0.5151, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.9396110542476971, |
| "grad_norm": 0.47837137672489904, |
| "learning_rate": 3.906138943625519e-05, |
| "loss": 0.4812, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9416581371545547, |
| "grad_norm": 0.394915101475589, |
| "learning_rate": 3.9052707616067654e-05, |
| "loss": 0.5405, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9437052200614124, |
| "grad_norm": 0.5432929178652014, |
| "learning_rate": 3.9043986802426453e-05, |
| "loss": 0.4816, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.9457523029682702, |
| "grad_norm": 0.3617390720179231, |
| "learning_rate": 3.903522701317968e-05, |
| "loss": 0.4864, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.9477993858751279, |
| "grad_norm": 0.5239560704020834, |
| "learning_rate": 3.9026428266255205e-05, |
| "loss": 0.4979, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.9498464687819856, |
| "grad_norm": 0.39348645216036965, |
| "learning_rate": 3.901759057966064e-05, |
| "loss": 0.4741, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.9518935516888434, |
| "grad_norm": 0.47748699997048394, |
| "learning_rate": 3.9008713971483257e-05, |
| "loss": 0.4729, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9539406345957011, |
| "grad_norm": 0.4339281681486082, |
| "learning_rate": 3.899979845989003e-05, |
| "loss": 0.5072, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9559877175025588, |
| "grad_norm": 0.4360289845901765, |
| "learning_rate": 3.899084406312751e-05, |
| "loss": 0.4844, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.9580348004094166, |
| "grad_norm": 0.49803606731717964, |
| "learning_rate": 3.8981850799521856e-05, |
| "loss": 0.5021, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.9600818833162743, |
| "grad_norm": 0.4633434375152389, |
| "learning_rate": 3.897281868747878e-05, |
| "loss": 0.5003, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.962128966223132, |
| "grad_norm": 0.4887467757571554, |
| "learning_rate": 3.896374774548348e-05, |
| "loss": 0.5054, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9641760491299898, |
| "grad_norm": 0.44664870237749776, |
| "learning_rate": 3.895463799210063e-05, |
| "loss": 0.5186, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.9662231320368475, |
| "grad_norm": 0.5203094189929391, |
| "learning_rate": 3.894548944597434e-05, |
| "loss": 0.5227, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.9682702149437052, |
| "grad_norm": 0.37329019363463944, |
| "learning_rate": 3.8936302125828114e-05, |
| "loss": 0.4836, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.970317297850563, |
| "grad_norm": 0.46252269585391337, |
| "learning_rate": 3.892707605046482e-05, |
| "loss": 0.465, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.9723643807574207, |
| "grad_norm": 0.4466031989756924, |
| "learning_rate": 3.8917811238766606e-05, |
| "loss": 0.5103, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.9744114636642784, |
| "grad_norm": 0.49858281748974287, |
| "learning_rate": 3.8908507709694945e-05, |
| "loss": 0.4857, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.9764585465711362, |
| "grad_norm": 0.5999408245173112, |
| "learning_rate": 3.8899165482290524e-05, |
| "loss": 0.5135, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.9785056294779939, |
| "grad_norm": 0.44390557227435046, |
| "learning_rate": 3.888978457567323e-05, |
| "loss": 0.4826, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.9805527123848515, |
| "grad_norm": 0.6043545995456223, |
| "learning_rate": 3.888036500904212e-05, |
| "loss": 0.5154, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.9825997952917093, |
| "grad_norm": 0.4818312706910111, |
| "learning_rate": 3.887090680167537e-05, |
| "loss": 0.5087, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.984646878198567, |
| "grad_norm": 0.6429627680182052, |
| "learning_rate": 3.886140997293024e-05, |
| "loss": 0.5033, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.9866939611054247, |
| "grad_norm": 0.46009297651512515, |
| "learning_rate": 3.8851874542243024e-05, |
| "loss": 0.4737, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.9887410440122825, |
| "grad_norm": 0.5257256010671767, |
| "learning_rate": 3.8842300529129026e-05, |
| "loss": 0.4922, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.9907881269191402, |
| "grad_norm": 0.4952147402880594, |
| "learning_rate": 3.883268795318252e-05, |
| "loss": 0.5093, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.9928352098259979, |
| "grad_norm": 0.40718187065435846, |
| "learning_rate": 3.882303683407669e-05, |
| "loss": 0.4898, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.9948822927328557, |
| "grad_norm": 0.6342262964756523, |
| "learning_rate": 3.8813347191563615e-05, |
| "loss": 0.4809, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.9969293756397134, |
| "grad_norm": 0.36521858199635876, |
| "learning_rate": 3.88036190454742e-05, |
| "loss": 0.4792, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.9989764585465711, |
| "grad_norm": 0.49465426774165494, |
| "learning_rate": 3.879385241571817e-05, |
| "loss": 0.4945, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.0010235414534288, |
| "grad_norm": 0.6823091472624835, |
| "learning_rate": 3.8784047322284e-05, |
| "loss": 0.7219, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.0030706243602865, |
| "grad_norm": 0.5576204983958845, |
| "learning_rate": 3.8774203785238886e-05, |
| "loss": 0.5356, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.0051177072671442, |
| "grad_norm": 0.4049517959021661, |
| "learning_rate": 3.8764321824728715e-05, |
| "loss": 0.3433, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.007164790174002, |
| "grad_norm": 0.6379294758690475, |
| "learning_rate": 3.875440146097798e-05, |
| "loss": 0.4658, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.0092118730808597, |
| "grad_norm": 0.48729117155574453, |
| "learning_rate": 3.8744442714289816e-05, |
| "loss": 0.423, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.0112589559877174, |
| "grad_norm": 0.6905667947564962, |
| "learning_rate": 3.873444560504588e-05, |
| "loss": 0.4378, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.0133060388945752, |
| "grad_norm": 0.5821283432486261, |
| "learning_rate": 3.872441015370635e-05, |
| "loss": 0.4592, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.015353121801433, |
| "grad_norm": 0.6506243911845556, |
| "learning_rate": 3.8714336380809874e-05, |
| "loss": 0.4401, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.0174002047082906, |
| "grad_norm": 0.5026499066925773, |
| "learning_rate": 3.870422430697354e-05, |
| "loss": 0.4082, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.0194472876151484, |
| "grad_norm": 0.7706353109115344, |
| "learning_rate": 3.869407395289281e-05, |
| "loss": 0.4851, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.021494370522006, |
| "grad_norm": 0.40094293966181355, |
| "learning_rate": 3.86838853393415e-05, |
| "loss": 0.3689, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.0235414534288638, |
| "grad_norm": 0.7044751532962212, |
| "learning_rate": 3.867365848717171e-05, |
| "loss": 0.4298, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0255885363357216, |
| "grad_norm": 0.5123771630826552, |
| "learning_rate": 3.866339341731384e-05, |
| "loss": 0.4313, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.0276356192425793, |
| "grad_norm": 0.44854290574972205, |
| "learning_rate": 3.865309015077645e-05, |
| "loss": 0.3686, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.029682702149437, |
| "grad_norm": 0.5101255948957101, |
| "learning_rate": 3.8642748708646324e-05, |
| "loss": 0.4471, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.0317297850562948, |
| "grad_norm": 0.44907848191294825, |
| "learning_rate": 3.863236911208835e-05, |
| "loss": 0.342, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.0337768679631525, |
| "grad_norm": 0.5031368605626068, |
| "learning_rate": 3.862195138234551e-05, |
| "loss": 0.4214, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.0358239508700102, |
| "grad_norm": 0.5833077927402516, |
| "learning_rate": 3.8611495540738835e-05, |
| "loss": 0.4858, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.037871033776868, |
| "grad_norm": 0.41256204125595797, |
| "learning_rate": 3.860100160866733e-05, |
| "loss": 0.4171, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.0399181166837257, |
| "grad_norm": 0.5124022037131871, |
| "learning_rate": 3.859046960760801e-05, |
| "loss": 0.4517, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.0419651995905834, |
| "grad_norm": 0.5225640900487728, |
| "learning_rate": 3.857989955911574e-05, |
| "loss": 0.4341, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.0440122824974412, |
| "grad_norm": 0.38566215827923084, |
| "learning_rate": 3.85692914848233e-05, |
| "loss": 0.3913, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.046059365404299, |
| "grad_norm": 0.4633891939003197, |
| "learning_rate": 3.855864540644126e-05, |
| "loss": 0.397, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.0481064483111566, |
| "grad_norm": 0.47380208701961085, |
| "learning_rate": 3.8547961345758e-05, |
| "loss": 0.4476, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.0501535312180144, |
| "grad_norm": 0.5577403954270109, |
| "learning_rate": 3.853723932463962e-05, |
| "loss": 0.4559, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.052200614124872, |
| "grad_norm": 0.5358423915305728, |
| "learning_rate": 3.8526479365029906e-05, |
| "loss": 0.4322, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.0542476970317298, |
| "grad_norm": 0.463025872995783, |
| "learning_rate": 3.8515681488950286e-05, |
| "loss": 0.4725, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.0562947799385876, |
| "grad_norm": 0.4349348376778485, |
| "learning_rate": 3.850484571849982e-05, |
| "loss": 0.3743, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.0583418628454453, |
| "grad_norm": 0.49093997411971185, |
| "learning_rate": 3.849397207585508e-05, |
| "loss": 0.4704, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.060388945752303, |
| "grad_norm": 0.5752884046292331, |
| "learning_rate": 3.848306058327016e-05, |
| "loss": 0.4772, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.0624360286591608, |
| "grad_norm": 0.36504458066426737, |
| "learning_rate": 3.847211126307666e-05, |
| "loss": 0.4277, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.0644831115660185, |
| "grad_norm": 0.5134952699143726, |
| "learning_rate": 3.846112413768353e-05, |
| "loss": 0.4094, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.0665301944728762, |
| "grad_norm": 0.42686550744637813, |
| "learning_rate": 3.845009922957713e-05, |
| "loss": 0.3999, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.068577277379734, |
| "grad_norm": 0.45121329177961284, |
| "learning_rate": 3.843903656132116e-05, |
| "loss": 0.4782, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.0706243602865917, |
| "grad_norm": 0.4799180332218181, |
| "learning_rate": 3.842793615555657e-05, |
| "loss": 0.4344, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.0726714431934494, |
| "grad_norm": 0.4906577800070559, |
| "learning_rate": 3.8416798035001545e-05, |
| "loss": 0.3999, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.0747185261003072, |
| "grad_norm": 0.4381457628189835, |
| "learning_rate": 3.8405622222451496e-05, |
| "loss": 0.4867, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.076765609007165, |
| "grad_norm": 0.4802227337203006, |
| "learning_rate": 3.8394408740778934e-05, |
| "loss": 0.4067, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.0788126919140226, |
| "grad_norm": 0.4490101903364367, |
| "learning_rate": 3.838315761293348e-05, |
| "loss": 0.4346, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.0808597748208801, |
| "grad_norm": 0.4633513621013708, |
| "learning_rate": 3.8371868861941795e-05, |
| "loss": 0.4177, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.0829068577277379, |
| "grad_norm": 0.38946131051973665, |
| "learning_rate": 3.836054251090755e-05, |
| "loss": 0.3554, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.0849539406345956, |
| "grad_norm": 0.4671283243776551, |
| "learning_rate": 3.8349178583011356e-05, |
| "loss": 0.4613, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.0870010235414533, |
| "grad_norm": 0.42447494229810095, |
| "learning_rate": 3.833777710151075e-05, |
| "loss": 0.4056, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.089048106448311, |
| "grad_norm": 0.42416375790936595, |
| "learning_rate": 3.83263380897401e-05, |
| "loss": 0.3751, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.0910951893551688, |
| "grad_norm": 0.4073852074265662, |
| "learning_rate": 3.8314861571110604e-05, |
| "loss": 0.4637, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.0931422722620265, |
| "grad_norm": 0.4548496033565697, |
| "learning_rate": 3.830334756911021e-05, |
| "loss": 0.4299, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.0951893551688843, |
| "grad_norm": 0.376272980920323, |
| "learning_rate": 3.829179610730359e-05, |
| "loss": 0.408, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.097236438075742, |
| "grad_norm": 0.427450221785291, |
| "learning_rate": 3.828020720933207e-05, |
| "loss": 0.4095, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.0992835209825997, |
| "grad_norm": 0.440976279211047, |
| "learning_rate": 3.826858089891361e-05, |
| "loss": 0.494, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.1013306038894575, |
| "grad_norm": 0.4209945160038491, |
| "learning_rate": 3.8256917199842715e-05, |
| "loss": 0.4586, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.1033776867963152, |
| "grad_norm": 0.3962229334467954, |
| "learning_rate": 3.824521613599043e-05, |
| "loss": 0.405, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.105424769703173, |
| "grad_norm": 0.40571779964971894, |
| "learning_rate": 3.823347773130427e-05, |
| "loss": 0.4275, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.1074718526100307, |
| "grad_norm": 0.3618547256384093, |
| "learning_rate": 3.822170200980815e-05, |
| "loss": 0.3858, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.1095189355168884, |
| "grad_norm": 0.4080407212884249, |
| "learning_rate": 3.820988899560239e-05, |
| "loss": 0.4645, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.1115660184237461, |
| "grad_norm": 0.3661867427781835, |
| "learning_rate": 3.819803871286361e-05, |
| "loss": 0.4359, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.1136131013306039, |
| "grad_norm": 0.3672357843320578, |
| "learning_rate": 3.818615118584472e-05, |
| "loss": 0.4266, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.1156601842374616, |
| "grad_norm": 0.42948959266814246, |
| "learning_rate": 3.817422643887484e-05, |
| "loss": 0.3994, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.1177072671443193, |
| "grad_norm": 0.4671751141730952, |
| "learning_rate": 3.816226449635927e-05, |
| "loss": 0.4408, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.119754350051177, |
| "grad_norm": 0.41461516441504426, |
| "learning_rate": 3.815026538277943e-05, |
| "loss": 0.3956, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.1218014329580348, |
| "grad_norm": 0.5835686601708205, |
| "learning_rate": 3.813822912269284e-05, |
| "loss": 0.4505, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.1238485158648925, |
| "grad_norm": 0.4319406426533076, |
| "learning_rate": 3.812615574073301e-05, |
| "loss": 0.3822, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.1258955987717503, |
| "grad_norm": 0.4516791429478303, |
| "learning_rate": 3.811404526160943e-05, |
| "loss": 0.4144, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.127942681678608, |
| "grad_norm": 0.5360130807078863, |
| "learning_rate": 3.810189771010755e-05, |
| "loss": 0.4786, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.1299897645854657, |
| "grad_norm": 0.4708744253041833, |
| "learning_rate": 3.808971311108865e-05, |
| "loss": 0.4241, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.1320368474923235, |
| "grad_norm": 0.43422914565208964, |
| "learning_rate": 3.8077491489489835e-05, |
| "loss": 0.4204, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.1340839303991812, |
| "grad_norm": 0.4721868771642081, |
| "learning_rate": 3.806523287032401e-05, |
| "loss": 0.3909, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.136131013306039, |
| "grad_norm": 0.4196801932672472, |
| "learning_rate": 3.805293727867978e-05, |
| "loss": 0.4427, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.1381780962128967, |
| "grad_norm": 0.43191530611285084, |
| "learning_rate": 3.8040604739721415e-05, |
| "loss": 0.4247, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.1402251791197544, |
| "grad_norm": 0.4098386090267591, |
| "learning_rate": 3.8028235278688814e-05, |
| "loss": 0.4136, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.1422722620266121, |
| "grad_norm": 0.42481716204053743, |
| "learning_rate": 3.8015828920897425e-05, |
| "loss": 0.425, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.1443193449334699, |
| "grad_norm": 0.3947721096519682, |
| "learning_rate": 3.8003385691738227e-05, |
| "loss": 0.4169, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.1463664278403276, |
| "grad_norm": 0.4434721832169034, |
| "learning_rate": 3.7990905616677644e-05, |
| "loss": 0.4804, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.1484135107471853, |
| "grad_norm": 0.44596996028550917, |
| "learning_rate": 3.797838872125752e-05, |
| "loss": 0.4587, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.150460593654043, |
| "grad_norm": 0.40781008639677546, |
| "learning_rate": 3.7965835031095065e-05, |
| "loss": 0.4614, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.1525076765609008, |
| "grad_norm": 0.4036300851691864, |
| "learning_rate": 3.795324457188276e-05, |
| "loss": 0.3913, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.1545547594677585, |
| "grad_norm": 0.3583696642752529, |
| "learning_rate": 3.794061736938837e-05, |
| "loss": 0.3828, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.156601842374616, |
| "grad_norm": 0.5138599201629513, |
| "learning_rate": 3.792795344945485e-05, |
| "loss": 0.4861, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.158648925281474, |
| "grad_norm": 0.40729637695646664, |
| "learning_rate": 3.79152528380003e-05, |
| "loss": 0.4136, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.1606960081883315, |
| "grad_norm": 0.4688730059593128, |
| "learning_rate": 3.790251556101791e-05, |
| "loss": 0.3991, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.1627430910951895, |
| "grad_norm": 0.41555333567521613, |
| "learning_rate": 3.7889741644575914e-05, |
| "loss": 0.4859, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.164790174002047, |
| "grad_norm": 0.368128010733192, |
| "learning_rate": 3.787693111481753e-05, |
| "loss": 0.412, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.1668372569089047, |
| "grad_norm": 0.40448293578209554, |
| "learning_rate": 3.786408399796091e-05, |
| "loss": 0.4812, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.1688843398157625, |
| "grad_norm": 0.35706355299321096, |
| "learning_rate": 3.78512003202991e-05, |
| "loss": 0.4098, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.1709314227226202, |
| "grad_norm": 0.3794649872224116, |
| "learning_rate": 3.783828010819993e-05, |
| "loss": 0.4184, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.172978505629478, |
| "grad_norm": 0.4084126099380887, |
| "learning_rate": 3.782532338810605e-05, |
| "loss": 0.4279, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.1750255885363357, |
| "grad_norm": 0.40248846798503674, |
| "learning_rate": 3.7812330186534815e-05, |
| "loss": 0.374, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.1770726714431934, |
| "grad_norm": 0.42082880622868263, |
| "learning_rate": 3.779930053007821e-05, |
| "loss": 0.4294, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.1791197543500511, |
| "grad_norm": 0.4741074023730061, |
| "learning_rate": 3.778623444540287e-05, |
| "loss": 0.4655, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.1811668372569089, |
| "grad_norm": 0.40443469906849566, |
| "learning_rate": 3.777313195924998e-05, |
| "loss": 0.4313, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.1832139201637666, |
| "grad_norm": 0.5301327875202605, |
| "learning_rate": 3.775999309843519e-05, |
| "loss": 0.4872, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.1852610030706243, |
| "grad_norm": 0.4031400337502115, |
| "learning_rate": 3.774681788984863e-05, |
| "loss": 0.44, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.187308085977482, |
| "grad_norm": 0.32666462235868937, |
| "learning_rate": 3.773360636045481e-05, |
| "loss": 0.3297, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.1893551688843398, |
| "grad_norm": 0.48740976479218406, |
| "learning_rate": 3.7720358537292566e-05, |
| "loss": 0.502, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.1914022517911975, |
| "grad_norm": 0.4129431656165589, |
| "learning_rate": 3.770707444747502e-05, |
| "loss": 0.3901, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.1934493346980553, |
| "grad_norm": 0.37634568229429827, |
| "learning_rate": 3.7693754118189525e-05, |
| "loss": 0.406, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.195496417604913, |
| "grad_norm": 0.413600210538619, |
| "learning_rate": 3.768039757669759e-05, |
| "loss": 0.4389, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.1975435005117707, |
| "grad_norm": 0.41522152600174966, |
| "learning_rate": 3.766700485033484e-05, |
| "loss": 0.4213, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.1995905834186285, |
| "grad_norm": 0.3994741162417736, |
| "learning_rate": 3.765357596651095e-05, |
| "loss": 0.4192, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.2016376663254862, |
| "grad_norm": 0.44961799715640177, |
| "learning_rate": 3.764011095270962e-05, |
| "loss": 0.4448, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.203684749232344, |
| "grad_norm": 0.4405333877210868, |
| "learning_rate": 3.762660983648846e-05, |
| "loss": 0.4425, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.2057318321392017, |
| "grad_norm": 0.3647533882869788, |
| "learning_rate": 3.761307264547899e-05, |
| "loss": 0.3798, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.2077789150460594, |
| "grad_norm": 0.5134897309426143, |
| "learning_rate": 3.759949940738655e-05, |
| "loss": 0.4862, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.2098259979529171, |
| "grad_norm": 0.42730289018233486, |
| "learning_rate": 3.7585890149990265e-05, |
| "loss": 0.3887, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.2118730808597749, |
| "grad_norm": 0.4053414794034375, |
| "learning_rate": 3.757224490114297e-05, |
| "loss": 0.4327, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.2139201637666326, |
| "grad_norm": 0.4351870322953961, |
| "learning_rate": 3.755856368877116e-05, |
| "loss": 0.379, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.2159672466734903, |
| "grad_norm": 0.4319979039337228, |
| "learning_rate": 3.7544846540874934e-05, |
| "loss": 0.45, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.218014329580348, |
| "grad_norm": 0.3683791838302071, |
| "learning_rate": 3.7531093485527943e-05, |
| "loss": 0.4189, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.2200614124872058, |
| "grad_norm": 0.47932608544530164, |
| "learning_rate": 3.7517304550877315e-05, |
| "loss": 0.4327, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.2221084953940635, |
| "grad_norm": 0.46656194426712394, |
| "learning_rate": 3.750347976514362e-05, |
| "loss": 0.4774, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.2241555783009213, |
| "grad_norm": 0.3933033098379041, |
| "learning_rate": 3.7489619156620796e-05, |
| "loss": 0.4224, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.226202661207779, |
| "grad_norm": 0.40838601128257357, |
| "learning_rate": 3.74757227536761e-05, |
| "loss": 0.4361, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.2282497441146367, |
| "grad_norm": 0.4011956657411684, |
| "learning_rate": 3.7461790584750036e-05, |
| "loss": 0.373, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2302968270214945, |
| "grad_norm": 0.4783960660544416, |
| "learning_rate": 3.744782267835632e-05, |
| "loss": 0.4497, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.2323439099283522, |
| "grad_norm": 0.4426728718136347, |
| "learning_rate": 3.74338190630818e-05, |
| "loss": 0.4396, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.23439099283521, |
| "grad_norm": 0.4507239891416186, |
| "learning_rate": 3.7419779767586406e-05, |
| "loss": 0.4312, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.2364380757420674, |
| "grad_norm": 0.45738216770554924, |
| "learning_rate": 3.740570482060311e-05, |
| "loss": 0.4186, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.2384851586489254, |
| "grad_norm": 0.4112804317839281, |
| "learning_rate": 3.7391594250937813e-05, |
| "loss": 0.4075, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.240532241555783, |
| "grad_norm": 0.3910005461879264, |
| "learning_rate": 3.737744808746935e-05, |
| "loss": 0.4063, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.2425793244626409, |
| "grad_norm": 0.37923695698945686, |
| "learning_rate": 3.73632663591494e-05, |
| "loss": 0.3753, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.2446264073694984, |
| "grad_norm": 0.3993349892342946, |
| "learning_rate": 3.7349049095002414e-05, |
| "loss": 0.415, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.246673490276356, |
| "grad_norm": 0.49340730052744397, |
| "learning_rate": 3.733479632412559e-05, |
| "loss": 0.4137, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.2487205731832138, |
| "grad_norm": 0.4033663663707718, |
| "learning_rate": 3.732050807568878e-05, |
| "loss": 0.4078, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.2507676560900716, |
| "grad_norm": 0.4885064983059752, |
| "learning_rate": 3.730618437893444e-05, |
| "loss": 0.479, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.2528147389969293, |
| "grad_norm": 0.43160732650352407, |
| "learning_rate": 3.729182526317761e-05, |
| "loss": 0.455, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.254861821903787, |
| "grad_norm": 0.45988754618419736, |
| "learning_rate": 3.727743075780578e-05, |
| "loss": 0.3783, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.2569089048106448, |
| "grad_norm": 0.44846875859935137, |
| "learning_rate": 3.726300089227887e-05, |
| "loss": 0.4124, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.2589559877175025, |
| "grad_norm": 0.5009012146609582, |
| "learning_rate": 3.72485356961292e-05, |
| "loss": 0.4256, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.2610030706243602, |
| "grad_norm": 0.40882732802753774, |
| "learning_rate": 3.723403519896136e-05, |
| "loss": 0.3877, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.263050153531218, |
| "grad_norm": 0.509277319060574, |
| "learning_rate": 3.721949943045223e-05, |
| "loss": 0.4603, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.2650972364380757, |
| "grad_norm": 0.4550675308253649, |
| "learning_rate": 3.720492842035084e-05, |
| "loss": 0.4958, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.2671443193449334, |
| "grad_norm": 0.5039050269537974, |
| "learning_rate": 3.7190322198478355e-05, |
| "loss": 0.5296, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.2691914022517912, |
| "grad_norm": 0.42034576410985675, |
| "learning_rate": 3.7175680794728015e-05, |
| "loss": 0.4171, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.271238485158649, |
| "grad_norm": 0.6194106838370393, |
| "learning_rate": 3.716100423906505e-05, |
| "loss": 0.524, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.2732855680655066, |
| "grad_norm": 0.3686755041504862, |
| "learning_rate": 3.7146292561526654e-05, |
| "loss": 0.3836, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.2753326509723644, |
| "grad_norm": 0.5059037426173506, |
| "learning_rate": 3.7131545792221864e-05, |
| "loss": 0.485, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.277379733879222, |
| "grad_norm": 0.40419199499915215, |
| "learning_rate": 3.711676396133158e-05, |
| "loss": 0.3979, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.2794268167860798, |
| "grad_norm": 0.48892649990931586, |
| "learning_rate": 3.7101947099108425e-05, |
| "loss": 0.5072, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.2814738996929376, |
| "grad_norm": 0.4545220525315462, |
| "learning_rate": 3.708709523587674e-05, |
| "loss": 0.4275, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.2835209825997953, |
| "grad_norm": 0.40189284591007923, |
| "learning_rate": 3.707220840203249e-05, |
| "loss": 0.3881, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.285568065506653, |
| "grad_norm": 0.43194988622551983, |
| "learning_rate": 3.70572866280432e-05, |
| "loss": 0.4747, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.2876151484135108, |
| "grad_norm": 0.44678464462286466, |
| "learning_rate": 3.7042329944447925e-05, |
| "loss": 0.391, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.2896622313203685, |
| "grad_norm": 0.4623510609405332, |
| "learning_rate": 3.702733838185716e-05, |
| "loss": 0.423, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.2917093142272262, |
| "grad_norm": 0.4314971051525567, |
| "learning_rate": 3.701231197095277e-05, |
| "loss": 0.4285, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.293756397134084, |
| "grad_norm": 0.39807090722917615, |
| "learning_rate": 3.6997250742487955e-05, |
| "loss": 0.3975, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.2958034800409417, |
| "grad_norm": 0.4538564539433799, |
| "learning_rate": 3.698215472728718e-05, |
| "loss": 0.4566, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.2978505629477994, |
| "grad_norm": 0.387005647622149, |
| "learning_rate": 3.696702395624608e-05, |
| "loss": 0.4376, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.2998976458546572, |
| "grad_norm": 0.4089593023659839, |
| "learning_rate": 3.6951858460331446e-05, |
| "loss": 0.4297, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.301944728761515, |
| "grad_norm": 0.423980436557396, |
| "learning_rate": 3.693665827058111e-05, |
| "loss": 0.4407, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.3039918116683726, |
| "grad_norm": 0.3939340668431382, |
| "learning_rate": 3.692142341810395e-05, |
| "loss": 0.3762, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.3060388945752304, |
| "grad_norm": 0.3791993183647926, |
| "learning_rate": 3.690615393407975e-05, |
| "loss": 0.412, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.308085977482088, |
| "grad_norm": 0.44464627005705176, |
| "learning_rate": 3.689084984975918e-05, |
| "loss": 0.3946, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.3101330603889458, |
| "grad_norm": 0.4356188955070128, |
| "learning_rate": 3.6875511196463715e-05, |
| "loss": 0.4583, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.3121801432958033, |
| "grad_norm": 0.43588824508846175, |
| "learning_rate": 3.686013800558561e-05, |
| "loss": 0.4674, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.3142272262026613, |
| "grad_norm": 0.5156297864126925, |
| "learning_rate": 3.6844730308587776e-05, |
| "loss": 0.4052, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.3162743091095188, |
| "grad_norm": 0.40684908578032264, |
| "learning_rate": 3.682928813700375e-05, |
| "loss": 0.4329, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.3183213920163768, |
| "grad_norm": 0.42989019879872664, |
| "learning_rate": 3.681381152243763e-05, |
| "loss": 0.4264, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.3203684749232343, |
| "grad_norm": 0.37812994433882907, |
| "learning_rate": 3.6798300496564e-05, |
| "loss": 0.386, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.3224155578300922, |
| "grad_norm": 0.4191444772159957, |
| "learning_rate": 3.678275509112788e-05, |
| "loss": 0.4587, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.3244626407369497, |
| "grad_norm": 0.34057419650855425, |
| "learning_rate": 3.6767175337944646e-05, |
| "loss": 0.4069, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.3265097236438077, |
| "grad_norm": 0.4210596650083103, |
| "learning_rate": 3.675156126889996e-05, |
| "loss": 0.4614, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.3285568065506652, |
| "grad_norm": 0.380672360295006, |
| "learning_rate": 3.6735912915949745e-05, |
| "loss": 0.4919, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.330603889457523, |
| "grad_norm": 0.46378857138786805, |
| "learning_rate": 3.672023031112005e-05, |
| "loss": 0.4398, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.3326509723643807, |
| "grad_norm": 0.3731856457734196, |
| "learning_rate": 3.670451348650705e-05, |
| "loss": 0.3786, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.3346980552712384, |
| "grad_norm": 0.4439716102781381, |
| "learning_rate": 3.6688762474276945e-05, |
| "loss": 0.4175, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.3367451381780961, |
| "grad_norm": 0.5191394349356976, |
| "learning_rate": 3.667297730666592e-05, |
| "loss": 0.5042, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.3387922210849539, |
| "grad_norm": 0.41690574322071583, |
| "learning_rate": 3.665715801598004e-05, |
| "loss": 0.4098, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.3408393039918116, |
| "grad_norm": 0.42158819550590015, |
| "learning_rate": 3.6641304634595216e-05, |
| "loss": 0.4271, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.3428863868986693, |
| "grad_norm": 0.4596791286598413, |
| "learning_rate": 3.662541719495714e-05, |
| "loss": 0.4609, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.344933469805527, |
| "grad_norm": 0.4858116688501969, |
| "learning_rate": 3.6609495729581186e-05, |
| "loss": 0.4909, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.3469805527123848, |
| "grad_norm": 0.3811091658108338, |
| "learning_rate": 3.659354027105238e-05, |
| "loss": 0.4224, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.3490276356192425, |
| "grad_norm": 0.40653713538788, |
| "learning_rate": 3.657755085202532e-05, |
| "loss": 0.4144, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.3510747185261003, |
| "grad_norm": 0.4900473769452504, |
| "learning_rate": 3.6561527505224104e-05, |
| "loss": 0.4582, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.353121801432958, |
| "grad_norm": 0.45088745908937394, |
| "learning_rate": 3.6545470263442265e-05, |
| "loss": 0.4345, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.3551688843398157, |
| "grad_norm": 0.44409465950161797, |
| "learning_rate": 3.65293791595427e-05, |
| "loss": 0.4867, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.3572159672466735, |
| "grad_norm": 0.3857032013105052, |
| "learning_rate": 3.651325422645763e-05, |
| "loss": 0.4072, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.3592630501535312, |
| "grad_norm": 0.4196509296621015, |
| "learning_rate": 3.649709549718849e-05, |
| "loss": 0.4109, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.361310133060389, |
| "grad_norm": 0.3711033202233833, |
| "learning_rate": 3.648090300480589e-05, |
| "loss": 0.4498, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.3633572159672467, |
| "grad_norm": 0.4738363974307569, |
| "learning_rate": 3.646467678244954e-05, |
| "loss": 0.4268, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.3654042988741044, |
| "grad_norm": 0.4028954103082967, |
| "learning_rate": 3.6448416863328186e-05, |
| "loss": 0.4346, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.3674513817809621, |
| "grad_norm": 0.4635908886319793, |
| "learning_rate": 3.643212328071953e-05, |
| "loss": 0.4453, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.3694984646878199, |
| "grad_norm": 0.38319546452013337, |
| "learning_rate": 3.641579606797017e-05, |
| "loss": 0.4054, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.3715455475946776, |
| "grad_norm": 0.3700643765726001, |
| "learning_rate": 3.639943525849555e-05, |
| "loss": 0.4692, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.3735926305015353, |
| "grad_norm": 0.34905600329178454, |
| "learning_rate": 3.638304088577984e-05, |
| "loss": 0.4131, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.375639713408393, |
| "grad_norm": 0.37865539989560054, |
| "learning_rate": 3.6366612983375936e-05, |
| "loss": 0.4696, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.3776867963152508, |
| "grad_norm": 0.34634916188478093, |
| "learning_rate": 3.635015158490533e-05, |
| "loss": 0.4206, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.3797338792221086, |
| "grad_norm": 0.3790168529129947, |
| "learning_rate": 3.6333656724058075e-05, |
| "loss": 0.4311, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.3817809621289663, |
| "grad_norm": 0.39245574822155366, |
| "learning_rate": 3.6317128434592725e-05, |
| "loss": 0.453, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.383828045035824, |
| "grad_norm": 0.3723108820988336, |
| "learning_rate": 3.6300566750336225e-05, |
| "loss": 0.4055, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.3858751279426818, |
| "grad_norm": 0.37587879714211964, |
| "learning_rate": 3.6283971705183884e-05, |
| "loss": 0.4468, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.3879222108495395, |
| "grad_norm": 0.3410504468464227, |
| "learning_rate": 3.626734333309927e-05, |
| "loss": 0.3782, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.3899692937563972, |
| "grad_norm": 0.4332726859166056, |
| "learning_rate": 3.625068166811418e-05, |
| "loss": 0.4419, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.3920163766632547, |
| "grad_norm": 0.3869800586992885, |
| "learning_rate": 3.623398674432853e-05, |
| "loss": 0.4211, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.3940634595701127, |
| "grad_norm": 0.38121810828451136, |
| "learning_rate": 3.621725859591031e-05, |
| "loss": 0.4592, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.3961105424769702, |
| "grad_norm": 0.39222808854449187, |
| "learning_rate": 3.6200497257095504e-05, |
| "loss": 0.4664, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.3981576253838282, |
| "grad_norm": 0.3565067379505055, |
| "learning_rate": 3.6183702762188045e-05, |
| "loss": 0.4218, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.4002047082906857, |
| "grad_norm": 0.3730858903859956, |
| "learning_rate": 3.6166875145559684e-05, |
| "loss": 0.4338, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.4022517911975436, |
| "grad_norm": 0.36655424256782704, |
| "learning_rate": 3.615001444165001e-05, |
| "loss": 0.4372, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.4042988741044011, |
| "grad_norm": 0.4487119389407511, |
| "learning_rate": 3.613312068496627e-05, |
| "loss": 0.462, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.406345957011259, |
| "grad_norm": 0.3515839553061697, |
| "learning_rate": 3.611619391008341e-05, |
| "loss": 0.3974, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.4083930399181166, |
| "grad_norm": 0.3677425891572493, |
| "learning_rate": 3.6099234151643924e-05, |
| "loss": 0.4634, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.4104401228249743, |
| "grad_norm": 0.3826249798074036, |
| "learning_rate": 3.608224144435781e-05, |
| "loss": 0.4338, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.412487205731832, |
| "grad_norm": 0.37018522079183763, |
| "learning_rate": 3.606521582300252e-05, |
| "loss": 0.4089, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.4145342886386898, |
| "grad_norm": 0.39718163395641504, |
| "learning_rate": 3.604815732242283e-05, |
| "loss": 0.4481, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.4165813715455475, |
| "grad_norm": 0.4177328745486512, |
| "learning_rate": 3.6031065977530854e-05, |
| "loss": 0.4061, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.4186284544524053, |
| "grad_norm": 0.43120876742640946, |
| "learning_rate": 3.6013941823305884e-05, |
| "loss": 0.4105, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.420675537359263, |
| "grad_norm": 0.4330368740835022, |
| "learning_rate": 3.5996784894794394e-05, |
| "loss": 0.4329, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.4227226202661207, |
| "grad_norm": 0.42455998302592957, |
| "learning_rate": 3.5979595227109906e-05, |
| "loss": 0.4045, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.4247697031729785, |
| "grad_norm": 0.4072147719505155, |
| "learning_rate": 3.5962372855432956e-05, |
| "loss": 0.452, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.4268167860798362, |
| "grad_norm": 0.4188081245089065, |
| "learning_rate": 3.594511781501103e-05, |
| "loss": 0.4199, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.428863868986694, |
| "grad_norm": 0.38251894827311167, |
| "learning_rate": 3.592783014115845e-05, |
| "loss": 0.3955, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.4309109518935517, |
| "grad_norm": 0.4441091071016442, |
| "learning_rate": 3.5910509869256326e-05, |
| "loss": 0.4398, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.4329580348004094, |
| "grad_norm": 0.38058511976389736, |
| "learning_rate": 3.58931570347525e-05, |
| "loss": 0.4019, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.4350051177072671, |
| "grad_norm": 0.3617807382285515, |
| "learning_rate": 3.587577167316146e-05, |
| "loss": 0.4363, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.4370522006141249, |
| "grad_norm": 0.43987641158494933, |
| "learning_rate": 3.585835382006424e-05, |
| "loss": 0.4328, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.4390992835209826, |
| "grad_norm": 0.3609144606393506, |
| "learning_rate": 3.584090351110838e-05, |
| "loss": 0.4104, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.4411463664278403, |
| "grad_norm": 0.4091808525809948, |
| "learning_rate": 3.582342078200786e-05, |
| "loss": 0.4138, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.443193449334698, |
| "grad_norm": 0.41531638710969015, |
| "learning_rate": 3.5805905668543e-05, |
| "loss": 0.4697, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.4452405322415558, |
| "grad_norm": 0.4154315464780426, |
| "learning_rate": 3.57883582065604e-05, |
| "loss": 0.4844, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.4472876151484135, |
| "grad_norm": 0.426486587951999, |
| "learning_rate": 3.577077843197285e-05, |
| "loss": 0.4088, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.4493346980552713, |
| "grad_norm": 0.4965404868194584, |
| "learning_rate": 3.57531663807593e-05, |
| "loss": 0.4369, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.451381780962129, |
| "grad_norm": 0.4695819928605211, |
| "learning_rate": 3.573552208896474e-05, |
| "loss": 0.4382, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.4534288638689867, |
| "grad_norm": 0.46954254869967305, |
| "learning_rate": 3.571784559270014e-05, |
| "loss": 0.4456, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.4554759467758445, |
| "grad_norm": 0.4760159462479475, |
| "learning_rate": 3.570013692814239e-05, |
| "loss": 0.4477, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.4575230296827022, |
| "grad_norm": 0.3992032479219894, |
| "learning_rate": 3.568239613153421e-05, |
| "loss": 0.396, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.45957011258956, |
| "grad_norm": 0.5000911965095803, |
| "learning_rate": 3.566462323918409e-05, |
| "loss": 0.4535, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.4616171954964177, |
| "grad_norm": 0.43927830807739277, |
| "learning_rate": 3.564681828746619e-05, |
| "loss": 0.4579, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.4636642784032754, |
| "grad_norm": 0.3840584961797973, |
| "learning_rate": 3.5628981312820315e-05, |
| "loss": 0.3861, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.4657113613101331, |
| "grad_norm": 0.46545692391278676, |
| "learning_rate": 3.561111235175177e-05, |
| "loss": 0.4142, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.4677584442169909, |
| "grad_norm": 0.32920533353338466, |
| "learning_rate": 3.5593211440831345e-05, |
| "loss": 0.3932, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.4698055271238486, |
| "grad_norm": 0.5879408329975259, |
| "learning_rate": 3.557527861669522e-05, |
| "loss": 0.4518, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.471852610030706, |
| "grad_norm": 0.3711931671196681, |
| "learning_rate": 3.555731391604488e-05, |
| "loss": 0.4135, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.473899692937564, |
| "grad_norm": 0.451950604877235, |
| "learning_rate": 3.553931737564705e-05, |
| "loss": 0.3778, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.4759467758444216, |
| "grad_norm": 0.3802803607853056, |
| "learning_rate": 3.552128903233363e-05, |
| "loss": 0.4528, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.4779938587512795, |
| "grad_norm": 0.5401764017146911, |
| "learning_rate": 3.55032289230016e-05, |
| "loss": 0.4431, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.480040941658137, |
| "grad_norm": 0.39166017929307784, |
| "learning_rate": 3.5485137084612945e-05, |
| "loss": 0.4384, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.482088024564995, |
| "grad_norm": 0.44680536201066046, |
| "learning_rate": 3.54670135541946e-05, |
| "loss": 0.4017, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.4841351074718525, |
| "grad_norm": 0.4126986303099099, |
| "learning_rate": 3.544885836883836e-05, |
| "loss": 0.4267, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.4861821903787105, |
| "grad_norm": 0.49002764481757854, |
| "learning_rate": 3.5430671565700786e-05, |
| "loss": 0.4451, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.488229273285568, |
| "grad_norm": 0.43094421524683524, |
| "learning_rate": 3.541245318200318e-05, |
| "loss": 0.4157, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.4902763561924257, |
| "grad_norm": 0.4077891821232257, |
| "learning_rate": 3.5394203255031445e-05, |
| "loss": 0.4184, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.4923234390992834, |
| "grad_norm": 0.4214423887733812, |
| "learning_rate": 3.537592182213607e-05, |
| "loss": 0.4404, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.4943705220061412, |
| "grad_norm": 0.39779568039250524, |
| "learning_rate": 3.5357608920732e-05, |
| "loss": 0.3862, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.496417604912999, |
| "grad_norm": 0.3868845645927947, |
| "learning_rate": 3.5339264588298606e-05, |
| "loss": 0.4859, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.4984646878198566, |
| "grad_norm": 0.401554933480186, |
| "learning_rate": 3.532088886237956e-05, |
| "loss": 0.4605, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.5005117707267144, |
| "grad_norm": 0.39625088885583626, |
| "learning_rate": 3.530248178058282e-05, |
| "loss": 0.4371, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.5025588536335721, |
| "grad_norm": 0.3433544773175932, |
| "learning_rate": 3.528404338058046e-05, |
| "loss": 0.359, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.5046059365404298, |
| "grad_norm": 0.3790208864361706, |
| "learning_rate": 3.526557370010872e-05, |
| "loss": 0.4668, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.5066530194472876, |
| "grad_norm": 0.3463058143561582, |
| "learning_rate": 3.5247072776967805e-05, |
| "loss": 0.428, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.5087001023541453, |
| "grad_norm": 0.4092673650520593, |
| "learning_rate": 3.522854064902189e-05, |
| "loss": 0.4787, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.510747185261003, |
| "grad_norm": 0.3701835656091217, |
| "learning_rate": 3.520997735419901e-05, |
| "loss": 0.4335, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.5127942681678608, |
| "grad_norm": 0.4036995525874622, |
| "learning_rate": 3.519138293049097e-05, |
| "loss": 0.4371, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.5148413510747185, |
| "grad_norm": 0.3520959626432674, |
| "learning_rate": 3.51727574159533e-05, |
| "loss": 0.442, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.5168884339815762, |
| "grad_norm": 0.3474665477504301, |
| "learning_rate": 3.515410084870516e-05, |
| "loss": 0.3833, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.518935516888434, |
| "grad_norm": 0.37700117041246284, |
| "learning_rate": 3.513541326692925e-05, |
| "loss": 0.421, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.5209825997952917, |
| "grad_norm": 0.3354789534984794, |
| "learning_rate": 3.511669470887177e-05, |
| "loss": 0.3646, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.5230296827021494, |
| "grad_norm": 0.36336403413161944, |
| "learning_rate": 3.509794521284228e-05, |
| "loss": 0.4479, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.5250767656090072, |
| "grad_norm": 0.3640226182102435, |
| "learning_rate": 3.5079164817213684e-05, |
| "loss": 0.3994, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.527123848515865, |
| "grad_norm": 0.39095484712326395, |
| "learning_rate": 3.5060353560422137e-05, |
| "loss": 0.4906, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.5291709314227226, |
| "grad_norm": 0.42138462191208187, |
| "learning_rate": 3.504151148096691e-05, |
| "loss": 0.4876, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.5312180143295804, |
| "grad_norm": 0.36356347432593744, |
| "learning_rate": 3.5022638617410396e-05, |
| "loss": 0.4171, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.5332650972364381, |
| "grad_norm": 0.402770920410115, |
| "learning_rate": 3.500373500837799e-05, |
| "loss": 0.4427, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.5353121801432958, |
| "grad_norm": 0.35789791445115465, |
| "learning_rate": 3.4984800692557974e-05, |
| "loss": 0.4306, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.5373592630501536, |
| "grad_norm": 0.3511318223180355, |
| "learning_rate": 3.496583570870152e-05, |
| "loss": 0.4051, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.5394063459570113, |
| "grad_norm": 0.415831551492267, |
| "learning_rate": 3.494684009562254e-05, |
| "loss": 0.4738, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.541453428863869, |
| "grad_norm": 0.3774452021561776, |
| "learning_rate": 3.492781389219763e-05, |
| "loss": 0.472, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.5435005117707266, |
| "grad_norm": 0.38222406103253237, |
| "learning_rate": 3.4908757137366006e-05, |
| "loss": 0.3984, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.5455475946775845, |
| "grad_norm": 0.4097364486307627, |
| "learning_rate": 3.488966987012941e-05, |
| "loss": 0.4436, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.547594677584442, |
| "grad_norm": 0.3733136430359242, |
| "learning_rate": 3.487055212955201e-05, |
| "loss": 0.4102, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.5496417604913, |
| "grad_norm": 0.49818552627556745, |
| "learning_rate": 3.485140395476038e-05, |
| "loss": 0.485, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.5516888433981575, |
| "grad_norm": 0.40160664320174677, |
| "learning_rate": 3.4832225384943335e-05, |
| "loss": 0.4662, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.5537359263050154, |
| "grad_norm": 0.36914648750243184, |
| "learning_rate": 3.481301645935193e-05, |
| "loss": 0.4173, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.555783009211873, |
| "grad_norm": 0.40716553946286355, |
| "learning_rate": 3.4793777217299346e-05, |
| "loss": 0.417, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.557830092118731, |
| "grad_norm": 0.42779403061287685, |
| "learning_rate": 3.477450769816077e-05, |
| "loss": 0.4848, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.5598771750255884, |
| "grad_norm": 0.3422957268296126, |
| "learning_rate": 3.475520794137341e-05, |
| "loss": 0.4039, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.5619242579324464, |
| "grad_norm": 0.4032099351305834, |
| "learning_rate": 3.473587798643633e-05, |
| "loss": 0.4378, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.563971340839304, |
| "grad_norm": 0.4010793632381822, |
| "learning_rate": 3.4716517872910405e-05, |
| "loss": 0.4461, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.5660184237461618, |
| "grad_norm": 0.41602009577806304, |
| "learning_rate": 3.4697127640418204e-05, |
| "loss": 0.5198, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.5680655066530194, |
| "grad_norm": 0.3840607306535795, |
| "learning_rate": 3.467770732864399e-05, |
| "loss": 0.4072, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.5701125895598773, |
| "grad_norm": 0.45544304460334517, |
| "learning_rate": 3.4658256977333536e-05, |
| "loss": 0.4263, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.5721596724667348, |
| "grad_norm": 0.4489200382867973, |
| "learning_rate": 3.4638776626294134e-05, |
| "loss": 0.4189, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.5742067553735928, |
| "grad_norm": 0.40708118209847305, |
| "learning_rate": 3.461926631539445e-05, |
| "loss": 0.4344, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.5762538382804503, |
| "grad_norm": 0.47396385986382383, |
| "learning_rate": 3.459972608456448e-05, |
| "loss": 0.4318, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.5783009211873082, |
| "grad_norm": 0.44275905450120784, |
| "learning_rate": 3.4580155973795434e-05, |
| "loss": 0.4024, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.5803480040941658, |
| "grad_norm": 0.37512733188763275, |
| "learning_rate": 3.4560556023139695e-05, |
| "loss": 0.4646, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.5823950870010235, |
| "grad_norm": 0.4718852007159965, |
| "learning_rate": 3.454092627271072e-05, |
| "loss": 0.4386, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.5844421699078812, |
| "grad_norm": 0.36977406003434726, |
| "learning_rate": 3.4521266762682924e-05, |
| "loss": 0.4429, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.586489252814739, |
| "grad_norm": 0.36917719793736264, |
| "learning_rate": 3.450157753329166e-05, |
| "loss": 0.4753, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.5885363357215967, |
| "grad_norm": 0.466057931195573, |
| "learning_rate": 3.448185862483309e-05, |
| "loss": 0.4542, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.5905834186284544, |
| "grad_norm": 0.34642675856704536, |
| "learning_rate": 3.446211007766412e-05, |
| "loss": 0.3954, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.5926305015353122, |
| "grad_norm": 0.4330447451756299, |
| "learning_rate": 3.4442331932202326e-05, |
| "loss": 0.4278, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.59467758444217, |
| "grad_norm": 0.3730822908863033, |
| "learning_rate": 3.4422524228925836e-05, |
| "loss": 0.4061, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.5967246673490276, |
| "grad_norm": 0.37973943757397915, |
| "learning_rate": 3.440268700837329e-05, |
| "loss": 0.394, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.5987717502558854, |
| "grad_norm": 0.49428851704529125, |
| "learning_rate": 3.438282031114374e-05, |
| "loss": 0.4486, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.600818833162743, |
| "grad_norm": 0.41805483229206175, |
| "learning_rate": 3.4362924177896545e-05, |
| "loss": 0.4393, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.6028659160696008, |
| "grad_norm": 0.47689234144727466, |
| "learning_rate": 3.434299864935133e-05, |
| "loss": 0.4354, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.6049129989764586, |
| "grad_norm": 0.42155977412917267, |
| "learning_rate": 3.432304376628787e-05, |
| "loss": 0.4602, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.6069600818833163, |
| "grad_norm": 0.3865188335568589, |
| "learning_rate": 3.430305956954602e-05, |
| "loss": 0.4152, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.609007164790174, |
| "grad_norm": 0.4271955733661172, |
| "learning_rate": 3.428304610002563e-05, |
| "loss": 0.4408, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.6110542476970318, |
| "grad_norm": 0.44963813575609907, |
| "learning_rate": 3.4263003398686464e-05, |
| "loss": 0.4958, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.6131013306038895, |
| "grad_norm": 0.3733726588143654, |
| "learning_rate": 3.424293150654809e-05, |
| "loss": 0.4287, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.6151484135107472, |
| "grad_norm": 0.3812115032343838, |
| "learning_rate": 3.422283046468985e-05, |
| "loss": 0.405, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.617195496417605, |
| "grad_norm": 0.37005120300102184, |
| "learning_rate": 3.420270031425072e-05, |
| "loss": 0.4516, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.6192425793244627, |
| "grad_norm": 0.45666000218850494, |
| "learning_rate": 3.4182541096429265e-05, |
| "loss": 0.4523, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.6212896622313204, |
| "grad_norm": 0.34898311295474405, |
| "learning_rate": 3.416235285248352e-05, |
| "loss": 0.4007, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.623336745138178, |
| "grad_norm": 0.3839127190696324, |
| "learning_rate": 3.4142135623730954e-05, |
| "loss": 0.4617, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.625383828045036, |
| "grad_norm": 0.4387438648695345, |
| "learning_rate": 3.412188945154833e-05, |
| "loss": 0.4723, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.6274309109518934, |
| "grad_norm": 0.3332803619102528, |
| "learning_rate": 3.410161437737166e-05, |
| "loss": 0.362, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.6294779938587514, |
| "grad_norm": 0.3807927986402262, |
| "learning_rate": 3.4081310442696114e-05, |
| "loss": 0.4339, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.6315250767656089, |
| "grad_norm": 0.3937568675037694, |
| "learning_rate": 3.4060977689075914e-05, |
| "loss": 0.4184, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.6335721596724668, |
| "grad_norm": 0.40435872407490453, |
| "learning_rate": 3.404061615812425e-05, |
| "loss": 0.4906, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.6356192425793243, |
| "grad_norm": 0.37907378547306464, |
| "learning_rate": 3.402022589151325e-05, |
| "loss": 0.4273, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.6376663254861823, |
| "grad_norm": 0.3703864227511636, |
| "learning_rate": 3.399980693097383e-05, |
| "loss": 0.4154, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.6397134083930398, |
| "grad_norm": 0.4191952674222777, |
| "learning_rate": 3.3979359318295605e-05, |
| "loss": 0.3581, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.6417604912998978, |
| "grad_norm": 0.3966331751054016, |
| "learning_rate": 3.395888309532687e-05, |
| "loss": 0.4669, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.6438075742067553, |
| "grad_norm": 0.445008569764165, |
| "learning_rate": 3.393837830397446e-05, |
| "loss": 0.4267, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.6458546571136132, |
| "grad_norm": 0.46681180810571704, |
| "learning_rate": 3.391784498620369e-05, |
| "loss": 0.4895, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.6479017400204707, |
| "grad_norm": 0.3385167810574236, |
| "learning_rate": 3.3897283184038215e-05, |
| "loss": 0.404, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.6499488229273287, |
| "grad_norm": 0.5219129958909896, |
| "learning_rate": 3.387669293956003e-05, |
| "loss": 0.4305, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.6519959058341862, |
| "grad_norm": 0.3668653050867526, |
| "learning_rate": 3.385607429490934e-05, |
| "loss": 0.3746, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.6540429887410442, |
| "grad_norm": 0.5158311002393402, |
| "learning_rate": 3.3835427292284445e-05, |
| "loss": 0.486, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.6560900716479017, |
| "grad_norm": 0.36978068539278974, |
| "learning_rate": 3.38147519739417e-05, |
| "loss": 0.4487, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.6581371545547596, |
| "grad_norm": 0.3772021211105363, |
| "learning_rate": 3.37940483821954e-05, |
| "loss": 0.4017, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.6601842374616171, |
| "grad_norm": 0.41401208686983515, |
| "learning_rate": 3.3773316559417734e-05, |
| "loss": 0.491, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.6622313203684749, |
| "grad_norm": 0.3524038676017696, |
| "learning_rate": 3.375255654803864e-05, |
| "loss": 0.3938, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.6642784032753326, |
| "grad_norm": 0.4096084241414413, |
| "learning_rate": 3.373176839054576e-05, |
| "loss": 0.4157, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.6663254861821903, |
| "grad_norm": 0.4186774343886114, |
| "learning_rate": 3.371095212948431e-05, |
| "loss": 0.4681, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.668372569089048, |
| "grad_norm": 0.40515797591205743, |
| "learning_rate": 3.3690107807457085e-05, |
| "loss": 0.4459, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.6704196519959058, |
| "grad_norm": 0.32867520587713756, |
| "learning_rate": 3.366923546712426e-05, |
| "loss": 0.3888, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.6724667349027635, |
| "grad_norm": 0.347589875409667, |
| "learning_rate": 3.364833515120336e-05, |
| "loss": 0.4083, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.6745138178096213, |
| "grad_norm": 0.3591052938957919, |
| "learning_rate": 3.362740690246918e-05, |
| "loss": 0.3938, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.676560900716479, |
| "grad_norm": 0.35854368626658006, |
| "learning_rate": 3.360645076375368e-05, |
| "loss": 0.4217, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.6786079836233367, |
| "grad_norm": 0.44210711469086067, |
| "learning_rate": 3.358546677794586e-05, |
| "loss": 0.4752, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.6806550665301945, |
| "grad_norm": 0.3418341105649144, |
| "learning_rate": 3.356445498799179e-05, |
| "loss": 0.3828, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.6827021494370522, |
| "grad_norm": 0.40574252833368174, |
| "learning_rate": 3.354341543689438e-05, |
| "loss": 0.4138, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.68474923234391, |
| "grad_norm": 0.41618079643123923, |
| "learning_rate": 3.352234816771337e-05, |
| "loss": 0.4559, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.6867963152507677, |
| "grad_norm": 0.42761098792857727, |
| "learning_rate": 3.350125322356525e-05, |
| "loss": 0.4466, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.6888433981576254, |
| "grad_norm": 0.33049589320031353, |
| "learning_rate": 3.348013064762312e-05, |
| "loss": 0.3768, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.6908904810644831, |
| "grad_norm": 0.3768698742983625, |
| "learning_rate": 3.3458980483116664e-05, |
| "loss": 0.4278, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.6929375639713409, |
| "grad_norm": 0.3675413592176129, |
| "learning_rate": 3.343780277333199e-05, |
| "loss": 0.4573, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.6949846468781986, |
| "grad_norm": 0.39935534592316574, |
| "learning_rate": 3.3416597561611616e-05, |
| "loss": 0.4607, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.6970317297850563, |
| "grad_norm": 0.3422867133996467, |
| "learning_rate": 3.3395364891354316e-05, |
| "loss": 0.3925, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.699078812691914, |
| "grad_norm": 0.4421095089073154, |
| "learning_rate": 3.33741048060151e-05, |
| "loss": 0.5186, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.7011258955987718, |
| "grad_norm": 0.40513609641554227, |
| "learning_rate": 3.3352817349105046e-05, |
| "loss": 0.4388, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.7031729785056293, |
| "grad_norm": 0.47309198797804985, |
| "learning_rate": 3.333150256419127e-05, |
| "loss": 0.5152, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.7052200614124873, |
| "grad_norm": 0.37396555660516145, |
| "learning_rate": 3.331016049489681e-05, |
| "loss": 0.3778, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.7072671443193448, |
| "grad_norm": 0.46343295183867955, |
| "learning_rate": 3.328879118490055e-05, |
| "loss": 0.4181, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.7093142272262027, |
| "grad_norm": 0.36985219342112263, |
| "learning_rate": 3.3267394677937134e-05, |
| "loss": 0.4519, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.7113613101330603, |
| "grad_norm": 0.47168961292482453, |
| "learning_rate": 3.3245971017796854e-05, |
| "loss": 0.4754, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.7134083930399182, |
| "grad_norm": 0.45545209130289294, |
| "learning_rate": 3.322452024832557e-05, |
| "loss": 0.4054, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.7154554759467757, |
| "grad_norm": 0.44300545126776186, |
| "learning_rate": 3.320304241342464e-05, |
| "loss": 0.4707, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.7175025588536337, |
| "grad_norm": 0.40914423787668386, |
| "learning_rate": 3.31815375570508e-05, |
| "loss": 0.4008, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.7195496417604912, |
| "grad_norm": 0.42788343961512754, |
| "learning_rate": 3.3160005723216105e-05, |
| "loss": 0.4063, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.7215967246673491, |
| "grad_norm": 0.3836547816282569, |
| "learning_rate": 3.31384469559878e-05, |
| "loss": 0.4471, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.7236438075742067, |
| "grad_norm": 0.3712650340512891, |
| "learning_rate": 3.311686129948827e-05, |
| "loss": 0.3813, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.7256908904810646, |
| "grad_norm": 0.3661541112584301, |
| "learning_rate": 3.3095248797894925e-05, |
| "loss": 0.4373, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.7277379733879221, |
| "grad_norm": 0.40769069558582477, |
| "learning_rate": 3.307360949544012e-05, |
| "loss": 0.433, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.72978505629478, |
| "grad_norm": 0.4384390897805754, |
| "learning_rate": 3.305194343641106e-05, |
| "loss": 0.4414, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.7318321392016376, |
| "grad_norm": 0.39419405263041707, |
| "learning_rate": 3.30302506651497e-05, |
| "loss": 0.3903, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.7338792221084955, |
| "grad_norm": 0.5193517899563321, |
| "learning_rate": 3.300853122605268e-05, |
| "loss": 0.5126, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.735926305015353, |
| "grad_norm": 0.37470876760021676, |
| "learning_rate": 3.2986785163571216e-05, |
| "loss": 0.4088, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.737973387922211, |
| "grad_norm": 0.3911340678248973, |
| "learning_rate": 3.2965012522211e-05, |
| "loss": 0.4231, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.7400204708290685, |
| "grad_norm": 0.3718531256618771, |
| "learning_rate": 3.294321334653213e-05, |
| "loss": 0.4087, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.7420675537359263, |
| "grad_norm": 0.44651626014948276, |
| "learning_rate": 3.2921387681149e-05, |
| "loss": 0.4623, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.744114636642784, |
| "grad_norm": 0.37467196684362686, |
| "learning_rate": 3.289953557073024e-05, |
| "loss": 0.4148, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.7461617195496417, |
| "grad_norm": 0.42577176527999205, |
| "learning_rate": 3.2877657059998584e-05, |
| "loss": 0.4103, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.7482088024564995, |
| "grad_norm": 0.3833747846094608, |
| "learning_rate": 3.285575219373079e-05, |
| "loss": 0.4432, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.7502558853633572, |
| "grad_norm": 0.4116483874002876, |
| "learning_rate": 3.2833821016757586e-05, |
| "loss": 0.4203, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.752302968270215, |
| "grad_norm": 0.330063754223896, |
| "learning_rate": 3.281186357396351e-05, |
| "loss": 0.3895, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.7543500511770727, |
| "grad_norm": 0.398313432569639, |
| "learning_rate": 3.278987991028688e-05, |
| "loss": 0.4367, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.7563971340839304, |
| "grad_norm": 0.3618880492014595, |
| "learning_rate": 3.276787007071968e-05, |
| "loss": 0.3917, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.7584442169907881, |
| "grad_norm": 0.4604401121085883, |
| "learning_rate": 3.274583410030745e-05, |
| "loss": 0.4577, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.7604912998976459, |
| "grad_norm": 0.33679127692517397, |
| "learning_rate": 3.2723772044149224e-05, |
| "loss": 0.4072, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.7625383828045036, |
| "grad_norm": 0.3624660321439512, |
| "learning_rate": 3.270168394739741e-05, |
| "loss": 0.4098, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.7645854657113613, |
| "grad_norm": 0.343429438960415, |
| "learning_rate": 3.267956985525774e-05, |
| "loss": 0.4262, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.766632548618219, |
| "grad_norm": 0.3866589432412809, |
| "learning_rate": 3.26574298129891e-05, |
| "loss": 0.4311, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.7686796315250768, |
| "grad_norm": 0.3484380413267947, |
| "learning_rate": 3.263526386590351e-05, |
| "loss": 0.4265, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.7707267144319345, |
| "grad_norm": 0.3727255169733587, |
| "learning_rate": 3.261307205936603e-05, |
| "loss": 0.4936, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.7727737973387923, |
| "grad_norm": 0.35604276467584445, |
| "learning_rate": 3.2590854438794604e-05, |
| "loss": 0.3855, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.77482088024565, |
| "grad_norm": 0.37701838111479336, |
| "learning_rate": 3.2568611049660046e-05, |
| "loss": 0.4308, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.7768679631525077, |
| "grad_norm": 0.3530904170276449, |
| "learning_rate": 3.2546341937485884e-05, |
| "loss": 0.4198, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.7789150460593655, |
| "grad_norm": 0.37564040686995553, |
| "learning_rate": 3.2524047147848284e-05, |
| "loss": 0.3702, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.7809621289662232, |
| "grad_norm": 0.366044908240742, |
| "learning_rate": 3.250172672637598e-05, |
| "loss": 0.4561, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.7830092118730807, |
| "grad_norm": 0.3683215486290299, |
| "learning_rate": 3.247938071875017e-05, |
| "loss": 0.4467, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.7850562947799387, |
| "grad_norm": 0.38716018556798415, |
| "learning_rate": 3.24570091707044e-05, |
| "loss": 0.4151, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.7871033776867962, |
| "grad_norm": 0.7367692001662189, |
| "learning_rate": 3.24346121280245e-05, |
| "loss": 0.438, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.7891504605936541, |
| "grad_norm": 0.41338889728660405, |
| "learning_rate": 3.2412189636548456e-05, |
| "loss": 0.4629, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.7911975435005116, |
| "grad_norm": 0.35357448274069847, |
| "learning_rate": 3.238974174216637e-05, |
| "loss": 0.3559, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.7932446264073696, |
| "grad_norm": 0.3796163326385031, |
| "learning_rate": 3.236726849082032e-05, |
| "loss": 0.4281, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.795291709314227, |
| "grad_norm": 0.3944707372507435, |
| "learning_rate": 3.234476992850425e-05, |
| "loss": 0.4537, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.797338792221085, |
| "grad_norm": 0.3318296802336291, |
| "learning_rate": 3.232224610126396e-05, |
| "loss": 0.3985, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.7993858751279426, |
| "grad_norm": 0.40919288715091856, |
| "learning_rate": 3.229969705519693e-05, |
| "loss": 0.4616, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.8014329580348005, |
| "grad_norm": 0.39543379019412783, |
| "learning_rate": 3.227712283645224e-05, |
| "loss": 0.4883, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.803480040941658, |
| "grad_norm": 0.3624564100802025, |
| "learning_rate": 3.225452349123051e-05, |
| "loss": 0.4264, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.805527123848516, |
| "grad_norm": 0.35250394717776745, |
| "learning_rate": 3.2231899065783766e-05, |
| "loss": 0.3975, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.8075742067553735, |
| "grad_norm": 0.41454217155818623, |
| "learning_rate": 3.2209249606415394e-05, |
| "loss": 0.4668, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.8096212896622315, |
| "grad_norm": 0.33248312085973525, |
| "learning_rate": 3.2186575159479966e-05, |
| "loss": 0.3853, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.811668372569089, |
| "grad_norm": 0.41602712353842625, |
| "learning_rate": 3.2163875771383246e-05, |
| "loss": 0.4615, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.813715455475947, |
| "grad_norm": 0.3941084429766098, |
| "learning_rate": 3.214115148858201e-05, |
| "loss": 0.5111, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.8157625383828044, |
| "grad_norm": 0.34750451682083505, |
| "learning_rate": 3.211840235758399e-05, |
| "loss": 0.4055, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.8178096212896624, |
| "grad_norm": 0.45072414952251894, |
| "learning_rate": 3.209562842494778e-05, |
| "loss": 0.4673, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.81985670419652, |
| "grad_norm": 0.36513950471383816, |
| "learning_rate": 3.207282973728273e-05, |
| "loss": 0.4526, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.8219037871033776, |
| "grad_norm": 0.3898093803880879, |
| "learning_rate": 3.205000634124884e-05, |
| "loss": 0.4045, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.8239508700102354, |
| "grad_norm": 0.36982373792147444, |
| "learning_rate": 3.20271582835567e-05, |
| "loss": 0.4079, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.825997952917093, |
| "grad_norm": 0.35532947918298324, |
| "learning_rate": 3.200428561096737e-05, |
| "loss": 0.444, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.8280450358239508, |
| "grad_norm": 0.3777203606733099, |
| "learning_rate": 3.198138837029227e-05, |
| "loss": 0.46, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.8300921187308086, |
| "grad_norm": 0.3489126911275309, |
| "learning_rate": 3.195846660839311e-05, |
| "loss": 0.3887, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.8321392016376663, |
| "grad_norm": 0.3633804424526499, |
| "learning_rate": 3.193552037218179e-05, |
| "loss": 0.5416, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.834186284544524, |
| "grad_norm": 0.38704595278722626, |
| "learning_rate": 3.1912549708620314e-05, |
| "loss": 0.47, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.8362333674513818, |
| "grad_norm": 0.36865192383072315, |
| "learning_rate": 3.188955466472063e-05, |
| "loss": 0.4084, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.8382804503582395, |
| "grad_norm": 0.42348322893722706, |
| "learning_rate": 3.186653528754464e-05, |
| "loss": 0.4354, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.8403275332650972, |
| "grad_norm": 0.37712189200316865, |
| "learning_rate": 3.184349162420401e-05, |
| "loss": 0.4011, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.842374616171955, |
| "grad_norm": 0.40519216196971364, |
| "learning_rate": 3.182042372186013e-05, |
| "loss": 0.4523, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.8444216990788127, |
| "grad_norm": 0.3270302439955843, |
| "learning_rate": 3.179733162772398e-05, |
| "loss": 0.3863, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.8464687819856704, |
| "grad_norm": 0.36562347535937434, |
| "learning_rate": 3.177421538905606e-05, |
| "loss": 0.414, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.8485158648925282, |
| "grad_norm": 0.34059679657441694, |
| "learning_rate": 3.17510750531663e-05, |
| "loss": 0.4319, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.850562947799386, |
| "grad_norm": 0.3971233949934265, |
| "learning_rate": 3.172791066741392e-05, |
| "loss": 0.4051, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.8526100307062436, |
| "grad_norm": 0.37489566812190156, |
| "learning_rate": 3.170472227920737e-05, |
| "loss": 0.4488, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.8546571136131014, |
| "grad_norm": 0.3619651650704685, |
| "learning_rate": 3.168150993600424e-05, |
| "loss": 0.4097, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.856704196519959, |
| "grad_norm": 0.4144444046604467, |
| "learning_rate": 3.165827368531113e-05, |
| "loss": 0.4838, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.8587512794268168, |
| "grad_norm": 0.34162161820675707, |
| "learning_rate": 3.1635013574683564e-05, |
| "loss": 0.403, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.8607983623336746, |
| "grad_norm": 0.3876122467545283, |
| "learning_rate": 3.161172965172591e-05, |
| "loss": 0.4564, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.862845445240532, |
| "grad_norm": 0.34340552862244217, |
| "learning_rate": 3.1588421964091276e-05, |
| "loss": 0.397, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.86489252814739, |
| "grad_norm": 0.383808899187042, |
| "learning_rate": 3.1565090559481396e-05, |
| "loss": 0.4265, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.8669396110542475, |
| "grad_norm": 0.31690081389294966, |
| "learning_rate": 3.1541735485646536e-05, |
| "loss": 0.4047, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.8689866939611055, |
| "grad_norm": 0.3992971993499514, |
| "learning_rate": 3.151835679038542e-05, |
| "loss": 0.439, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.871033776867963, |
| "grad_norm": 0.31545614214320866, |
| "learning_rate": 3.149495452154512e-05, |
| "loss": 0.3986, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.873080859774821, |
| "grad_norm": 0.33181454924376286, |
| "learning_rate": 3.147152872702092e-05, |
| "loss": 0.4364, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.8751279426816785, |
| "grad_norm": 0.38596107764783966, |
| "learning_rate": 3.14480794547563e-05, |
| "loss": 0.4666, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.8771750255885364, |
| "grad_norm": 0.3743307900880147, |
| "learning_rate": 3.142460675274275e-05, |
| "loss": 0.4136, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.879222108495394, |
| "grad_norm": 0.39267653351820997, |
| "learning_rate": 3.1401110669019724e-05, |
| "loss": 0.4308, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.881269191402252, |
| "grad_norm": 0.3865400215305747, |
| "learning_rate": 3.137759125167455e-05, |
| "loss": 0.4663, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.8833162743091094, |
| "grad_norm": 0.36025373309076514, |
| "learning_rate": 3.135404854884226e-05, |
| "loss": 0.4202, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.8853633572159674, |
| "grad_norm": 0.3774704107912035, |
| "learning_rate": 3.133048260870561e-05, |
| "loss": 0.4047, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.8874104401228249, |
| "grad_norm": 0.3281213257339426, |
| "learning_rate": 3.130689347949486e-05, |
| "loss": 0.4088, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.8894575230296828, |
| "grad_norm": 0.3593342361422244, |
| "learning_rate": 3.1283281209487755e-05, |
| "loss": 0.4475, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.8915046059365404, |
| "grad_norm": 0.348849346649699, |
| "learning_rate": 3.1259645847009384e-05, |
| "loss": 0.4133, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.8935516888433983, |
| "grad_norm": 0.3661633848236013, |
| "learning_rate": 3.123598744043211e-05, |
| "loss": 0.4345, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.8955987717502558, |
| "grad_norm": 0.39376453806042766, |
| "learning_rate": 3.121230603817545e-05, |
| "loss": 0.4802, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.8976458546571138, |
| "grad_norm": 0.4054577347062655, |
| "learning_rate": 3.1188601688706e-05, |
| "loss": 0.4861, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.8996929375639713, |
| "grad_norm": 0.3244003846213942, |
| "learning_rate": 3.1164874440537295e-05, |
| "loss": 0.3988, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.901740020470829, |
| "grad_norm": 0.3598304561011502, |
| "learning_rate": 3.114112434222976e-05, |
| "loss": 0.4083, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.9037871033776868, |
| "grad_norm": 0.3818801716604425, |
| "learning_rate": 3.111735144239057e-05, |
| "loss": 0.424, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.9058341862845445, |
| "grad_norm": 0.351513366570262, |
| "learning_rate": 3.109355578967356e-05, |
| "loss": 0.4529, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.9078812691914022, |
| "grad_norm": 0.3240275031967729, |
| "learning_rate": 3.106973743277916e-05, |
| "loss": 0.4211, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.90992835209826, |
| "grad_norm": 0.361341366033979, |
| "learning_rate": 3.104589642045422e-05, |
| "loss": 0.4776, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.9119754350051177, |
| "grad_norm": 0.29231193321874205, |
| "learning_rate": 3.1022032801492e-05, |
| "loss": 0.3741, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.9140225179119754, |
| "grad_norm": 0.3401516128458725, |
| "learning_rate": 3.099814662473202e-05, |
| "loss": 0.411, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.9160696008188332, |
| "grad_norm": 0.3813395041273339, |
| "learning_rate": 3.0974237939059947e-05, |
| "loss": 0.4652, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.9181166837256909, |
| "grad_norm": 0.33396626631772636, |
| "learning_rate": 3.095030679340751e-05, |
| "loss": 0.438, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.9201637666325486, |
| "grad_norm": 0.27154874124436096, |
| "learning_rate": 3.092635323675245e-05, |
| "loss": 0.3297, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.9222108495394064, |
| "grad_norm": 0.3729869848757238, |
| "learning_rate": 3.0902377318118336e-05, |
| "loss": 0.3925, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.924257932446264, |
| "grad_norm": 0.3729082740901169, |
| "learning_rate": 3.0878379086574494e-05, |
| "loss": 0.4632, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.9263050153531218, |
| "grad_norm": 0.33442817830860083, |
| "learning_rate": 3.085435859123596e-05, |
| "loss": 0.4246, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.9283520982599796, |
| "grad_norm": 0.3455333402015141, |
| "learning_rate": 3.083031588126329e-05, |
| "loss": 0.4291, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.9303991811668373, |
| "grad_norm": 0.33030857885870823, |
| "learning_rate": 3.0806251005862535e-05, |
| "loss": 0.4293, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.932446264073695, |
| "grad_norm": 0.3313566999610316, |
| "learning_rate": 3.07821640142851e-05, |
| "loss": 0.4691, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.9344933469805528, |
| "grad_norm": 0.3371774148631356, |
| "learning_rate": 3.0758054955827655e-05, |
| "loss": 0.4283, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.9365404298874105, |
| "grad_norm": 0.35055569911779827, |
| "learning_rate": 3.073392387983202e-05, |
| "loss": 0.4157, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.9385875127942682, |
| "grad_norm": 0.3713512962937003, |
| "learning_rate": 3.070977083568508e-05, |
| "loss": 0.4709, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.940634595701126, |
| "grad_norm": 0.3121880967121788, |
| "learning_rate": 3.06855958728187e-05, |
| "loss": 0.3584, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.9426816786079835, |
| "grad_norm": 0.3923276029776799, |
| "learning_rate": 3.0661399040709584e-05, |
| "loss": 0.4273, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.9447287615148414, |
| "grad_norm": 0.39501548252234137, |
| "learning_rate": 3.0637180388879207e-05, |
| "loss": 0.4292, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.946775844421699, |
| "grad_norm": 0.3918727723912462, |
| "learning_rate": 3.061293996689369e-05, |
| "loss": 0.4422, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.9488229273285569, |
| "grad_norm": 0.3650853100344803, |
| "learning_rate": 3.05886778243637e-05, |
| "loss": 0.4355, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.9508700102354144, |
| "grad_norm": 0.40314024207731314, |
| "learning_rate": 3.0564394010944396e-05, |
| "loss": 0.3964, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.9529170931422724, |
| "grad_norm": 0.3321329959720213, |
| "learning_rate": 3.054008857633524e-05, |
| "loss": 0.3802, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.9549641760491299, |
| "grad_norm": 0.3578935619719161, |
| "learning_rate": 3.051576157027998e-05, |
| "loss": 0.4187, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.9570112589559878, |
| "grad_norm": 0.36008706778157745, |
| "learning_rate": 3.0491413042566492e-05, |
| "loss": 0.421, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.9590583418628453, |
| "grad_norm": 0.35653968726696394, |
| "learning_rate": 3.0467043043026705e-05, |
| "loss": 0.3773, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.9611054247697033, |
| "grad_norm": 0.398663136011159, |
| "learning_rate": 3.0442651621536502e-05, |
| "loss": 0.4867, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.9631525076765608, |
| "grad_norm": 0.39803480683406417, |
| "learning_rate": 3.041823882801559e-05, |
| "loss": 0.4572, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.9651995905834188, |
| "grad_norm": 0.31571236419876386, |
| "learning_rate": 3.039380471242743e-05, |
| "loss": 0.3953, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.9672466734902763, |
| "grad_norm": 0.356704934998217, |
| "learning_rate": 3.0369349324779115e-05, |
| "loss": 0.4116, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.9692937563971342, |
| "grad_norm": 0.38456016779817315, |
| "learning_rate": 3.0344872715121276e-05, |
| "loss": 0.4181, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.9713408393039917, |
| "grad_norm": 0.42725505346001535, |
| "learning_rate": 3.0320374933547982e-05, |
| "loss": 0.4509, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.9733879222108497, |
| "grad_norm": 0.3124541183454085, |
| "learning_rate": 3.0295856030196618e-05, |
| "loss": 0.3635, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.9754350051177072, |
| "grad_norm": 0.38135606484411233, |
| "learning_rate": 3.0271316055247812e-05, |
| "loss": 0.4322, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.9774820880245652, |
| "grad_norm": 0.3564421243258828, |
| "learning_rate": 3.024675505892531e-05, |
| "loss": 0.4706, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.9795291709314227, |
| "grad_norm": 0.33614285440354186, |
| "learning_rate": 3.022217309149588e-05, |
| "loss": 0.3916, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.9815762538382804, |
| "grad_norm": 0.3642272706560808, |
| "learning_rate": 3.019757020326921e-05, |
| "loss": 0.4001, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.9836233367451381, |
| "grad_norm": 0.3677389037382802, |
| "learning_rate": 3.017294644459782e-05, |
| "loss": 0.4067, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.9856704196519959, |
| "grad_norm": 0.38491158269193576, |
| "learning_rate": 3.0148301865876913e-05, |
| "loss": 0.4266, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.9877175025588536, |
| "grad_norm": 0.4027798698020095, |
| "learning_rate": 3.0123636517544326e-05, |
| "loss": 0.5046, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.9897645854657113, |
| "grad_norm": 0.36103993797318307, |
| "learning_rate": 3.0098950450080404e-05, |
| "loss": 0.3863, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.991811668372569, |
| "grad_norm": 0.3202323840951822, |
| "learning_rate": 3.0074243714007875e-05, |
| "loss": 0.3562, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.9938587512794268, |
| "grad_norm": 0.3949919429980765, |
| "learning_rate": 3.004951635989179e-05, |
| "loss": 0.4732, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.9959058341862845, |
| "grad_norm": 0.3475495093601558, |
| "learning_rate": 3.0024768438339388e-05, |
| "loss": 0.391, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.9979529170931423, |
| "grad_norm": 0.39216689067501626, |
| "learning_rate": 3.0000000000000004e-05, |
| "loss": 0.4564, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.6533349441460323, |
| "learning_rate": 2.9975211095564955e-05, |
| "loss": 0.5882, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.0020470829068575, |
| "grad_norm": 0.4211988149390974, |
| "learning_rate": 2.995040177576745e-05, |
| "loss": 0.3174, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.0040941658137155, |
| "grad_norm": 0.6108187632741519, |
| "learning_rate": 2.992557209138249e-05, |
| "loss": 0.3367, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.006141248720573, |
| "grad_norm": 0.38089369292387826, |
| "learning_rate": 2.9900722093226737e-05, |
| "loss": 0.3232, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.008188331627431, |
| "grad_norm": 0.47508227914348694, |
| "learning_rate": 2.9875851832158428e-05, |
| "loss": 0.3522, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.0102354145342884, |
| "grad_norm": 0.4326113199153507, |
| "learning_rate": 2.9850961359077293e-05, |
| "loss": 0.2981, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.0122824974411464, |
| "grad_norm": 0.42932267247803857, |
| "learning_rate": 2.98260507249244e-05, |
| "loss": 0.3202, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.014329580348004, |
| "grad_norm": 0.3951716605003508, |
| "learning_rate": 2.9801119980682095e-05, |
| "loss": 0.3509, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.016376663254862, |
| "grad_norm": 0.35789828953920616, |
| "learning_rate": 2.977616917737388e-05, |
| "loss": 0.3281, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.0184237461617194, |
| "grad_norm": 0.3460413679418126, |
| "learning_rate": 2.9751198366064304e-05, |
| "loss": 0.3161, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.0204708290685773, |
| "grad_norm": 0.3703844711305289, |
| "learning_rate": 2.9726207597858872e-05, |
| "loss": 0.3251, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.022517911975435, |
| "grad_norm": 0.39238883821417286, |
| "learning_rate": 2.9701196923903927e-05, |
| "loss": 0.3391, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.024564994882293, |
| "grad_norm": 0.34303403649979675, |
| "learning_rate": 2.9676166395386553e-05, |
| "loss": 0.361, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.0266120777891503, |
| "grad_norm": 0.38788579378291205, |
| "learning_rate": 2.965111606353447e-05, |
| "loss": 0.312, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.0286591606960083, |
| "grad_norm": 0.4013869555066414, |
| "learning_rate": 2.9626045979615928e-05, |
| "loss": 0.3209, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.030706243602866, |
| "grad_norm": 0.37089151639897344, |
| "learning_rate": 2.9600956194939598e-05, |
| "loss": 0.3498, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.0327533265097237, |
| "grad_norm": 0.371332657755311, |
| "learning_rate": 2.957584676085447e-05, |
| "loss": 0.3422, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.0348004094165812, |
| "grad_norm": 0.3762690812523185, |
| "learning_rate": 2.9550717728749768e-05, |
| "loss": 0.3199, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.036847492323439, |
| "grad_norm": 0.3431217144511242, |
| "learning_rate": 2.9525569150054796e-05, |
| "loss": 0.2871, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.0388945752302967, |
| "grad_norm": 0.42824685102967736, |
| "learning_rate": 2.950040107623887e-05, |
| "loss": 0.3268, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.0409416581371547, |
| "grad_norm": 0.3459406390165573, |
| "learning_rate": 2.947521355881122e-05, |
| "loss": 0.3268, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.042988741044012, |
| "grad_norm": 0.3484085391017404, |
| "learning_rate": 2.9450006649320862e-05, |
| "loss": 0.325, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.04503582395087, |
| "grad_norm": 0.3537579300316069, |
| "learning_rate": 2.9424780399356497e-05, |
| "loss": 0.3144, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.0470829068577276, |
| "grad_norm": 0.3540499619721327, |
| "learning_rate": 2.9399534860546404e-05, |
| "loss": 0.3324, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.0491299897645856, |
| "grad_norm": 0.3146082628297793, |
| "learning_rate": 2.937427008455835e-05, |
| "loss": 0.3458, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.051177072671443, |
| "grad_norm": 0.41424472646515875, |
| "learning_rate": 2.9348986123099462e-05, |
| "loss": 0.3638, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.053224155578301, |
| "grad_norm": 0.3640670691803175, |
| "learning_rate": 2.932368302791614e-05, |
| "loss": 0.3596, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.0552712384851586, |
| "grad_norm": 0.38223463417283743, |
| "learning_rate": 2.9298360850793944e-05, |
| "loss": 0.3048, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.0573183213920165, |
| "grad_norm": 0.35066959498537137, |
| "learning_rate": 2.9273019643557474e-05, |
| "loss": 0.3154, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.059365404298874, |
| "grad_norm": 0.3912112568074411, |
| "learning_rate": 2.92476594580703e-05, |
| "loss": 0.3397, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.061412487205732, |
| "grad_norm": 0.40688373618091567, |
| "learning_rate": 2.9222280346234816e-05, |
| "loss": 0.3315, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.0634595701125895, |
| "grad_norm": 0.3408605767984647, |
| "learning_rate": 2.919688235999215e-05, |
| "loss": 0.3334, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.0655066530194475, |
| "grad_norm": 0.35811936384341014, |
| "learning_rate": 2.917146555132206e-05, |
| "loss": 0.3386, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.067553735926305, |
| "grad_norm": 0.3364760604746556, |
| "learning_rate": 2.914602997224285e-05, |
| "loss": 0.3199, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.069600818833163, |
| "grad_norm": 0.4118145924928188, |
| "learning_rate": 2.912057567481119e-05, |
| "loss": 0.3507, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.0716479017400204, |
| "grad_norm": 0.3612606023218023, |
| "learning_rate": 2.909510271112212e-05, |
| "loss": 0.3462, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.0736949846468784, |
| "grad_norm": 0.3222144557445549, |
| "learning_rate": 2.906961113330883e-05, |
| "loss": 0.3271, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.075742067553736, |
| "grad_norm": 0.4762234619066632, |
| "learning_rate": 2.904410099354263e-05, |
| "loss": 0.311, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.0777891504605934, |
| "grad_norm": 0.3407988463597387, |
| "learning_rate": 2.9018572344032823e-05, |
| "loss": 0.3242, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.0798362333674514, |
| "grad_norm": 0.38477863373629423, |
| "learning_rate": 2.8993025237026578e-05, |
| "loss": 0.3472, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.0818833162743093, |
| "grad_norm": 0.4212662322514699, |
| "learning_rate": 2.8967459724808856e-05, |
| "loss": 0.3055, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.083930399181167, |
| "grad_norm": 0.34644422983431267, |
| "learning_rate": 2.8941875859702283e-05, |
| "loss": 0.3099, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.0859774820880244, |
| "grad_norm": 0.38660401266521843, |
| "learning_rate": 2.891627369406703e-05, |
| "loss": 0.3301, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.0880245649948823, |
| "grad_norm": 0.33570902053280804, |
| "learning_rate": 2.889065328030074e-05, |
| "loss": 0.3559, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.09007164790174, |
| "grad_norm": 0.3938573985227862, |
| "learning_rate": 2.88650146708384e-05, |
| "loss": 0.3301, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.092118730808598, |
| "grad_norm": 0.37756710702648894, |
| "learning_rate": 2.883935791815222e-05, |
| "loss": 0.3413, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.0941658137154553, |
| "grad_norm": 0.36055751260111174, |
| "learning_rate": 2.8813683074751578e-05, |
| "loss": 0.3469, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.0962128966223132, |
| "grad_norm": 0.34337336038390814, |
| "learning_rate": 2.878799019318283e-05, |
| "loss": 0.3339, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.0982599795291708, |
| "grad_norm": 0.37844750997650106, |
| "learning_rate": 2.8762279326029293e-05, |
| "loss": 0.3278, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.1003070624360287, |
| "grad_norm": 0.3641190698552649, |
| "learning_rate": 2.8736550525911066e-05, |
| "loss": 0.3548, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.1023541453428862, |
| "grad_norm": 0.36462562710150886, |
| "learning_rate": 2.8710803845484955e-05, |
| "loss": 0.321, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.104401228249744, |
| "grad_norm": 0.3706428526021544, |
| "learning_rate": 2.8685039337444368e-05, |
| "loss": 0.3174, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.1064483111566017, |
| "grad_norm": 0.3276491863527004, |
| "learning_rate": 2.8659257054519182e-05, |
| "loss": 0.3046, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.1084953940634596, |
| "grad_norm": 0.3247212720041268, |
| "learning_rate": 2.8633457049475678e-05, |
| "loss": 0.3526, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.110542476970317, |
| "grad_norm": 0.3326121138866842, |
| "learning_rate": 2.8607639375116388e-05, |
| "loss": 0.3179, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.112589559877175, |
| "grad_norm": 0.36702351353785473, |
| "learning_rate": 2.858180408428001e-05, |
| "loss": 0.3393, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.1146366427840326, |
| "grad_norm": 0.31780331970447684, |
| "learning_rate": 2.855595122984129e-05, |
| "loss": 0.3248, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.1166837256908906, |
| "grad_norm": 0.32446381302593813, |
| "learning_rate": 2.853008086471094e-05, |
| "loss": 0.3283, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.118730808597748, |
| "grad_norm": 0.3428912531231067, |
| "learning_rate": 2.8504193041835497e-05, |
| "loss": 0.3048, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.120777891504606, |
| "grad_norm": 0.4004823401099236, |
| "learning_rate": 2.847828781419722e-05, |
| "loss": 0.3667, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.1228249744114636, |
| "grad_norm": 0.36467750504991164, |
| "learning_rate": 2.8452365234813992e-05, |
| "loss": 0.3601, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.1248720573183215, |
| "grad_norm": 0.3853260717775913, |
| "learning_rate": 2.842642535673922e-05, |
| "loss": 0.3289, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.126919140225179, |
| "grad_norm": 0.3693312153767782, |
| "learning_rate": 2.8400468233061708e-05, |
| "loss": 0.3147, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.128966223132037, |
| "grad_norm": 0.38807123819436246, |
| "learning_rate": 2.8374493916905544e-05, |
| "loss": 0.3269, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.1310133060388945, |
| "grad_norm": 0.3324036292794174, |
| "learning_rate": 2.834850246143002e-05, |
| "loss": 0.3076, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.1330603889457525, |
| "grad_norm": 0.36633069839093463, |
| "learning_rate": 2.832249391982949e-05, |
| "loss": 0.3315, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.13510747185261, |
| "grad_norm": 0.348911620524954, |
| "learning_rate": 2.8296468345333298e-05, |
| "loss": 0.2945, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.137154554759468, |
| "grad_norm": 0.4313978670199707, |
| "learning_rate": 2.827042579120562e-05, |
| "loss": 0.3556, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.1392016376663254, |
| "grad_norm": 0.33673070921204956, |
| "learning_rate": 2.8244366310745398e-05, |
| "loss": 0.3301, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.1412487205731834, |
| "grad_norm": 0.4048102626926484, |
| "learning_rate": 2.8218289957286226e-05, |
| "loss": 0.3672, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.143295803480041, |
| "grad_norm": 0.41846672934257156, |
| "learning_rate": 2.8192196784196198e-05, |
| "loss": 0.3148, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.145342886386899, |
| "grad_norm": 0.3744830737938391, |
| "learning_rate": 2.816608684487787e-05, |
| "loss": 0.3252, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.1473899692937564, |
| "grad_norm": 0.40525534049659034, |
| "learning_rate": 2.813996019276809e-05, |
| "loss": 0.3354, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.1494370522006143, |
| "grad_norm": 0.3733330999629505, |
| "learning_rate": 2.8113816881337902e-05, |
| "loss": 0.3146, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.151484135107472, |
| "grad_norm": 0.39634148645678874, |
| "learning_rate": 2.8087656964092472e-05, |
| "loss": 0.3041, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.15353121801433, |
| "grad_norm": 0.3852615546043453, |
| "learning_rate": 2.806148049457093e-05, |
| "loss": 0.3639, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.1555783009211873, |
| "grad_norm": 0.3896444126363705, |
| "learning_rate": 2.803528752634629e-05, |
| "loss": 0.3317, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.1576253838280453, |
| "grad_norm": 0.38474286304985633, |
| "learning_rate": 2.8009078113025335e-05, |
| "loss": 0.3363, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.1596724667349028, |
| "grad_norm": 0.36285165391849056, |
| "learning_rate": 2.798285230824849e-05, |
| "loss": 0.3088, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.1617195496417603, |
| "grad_norm": 0.39881869082842497, |
| "learning_rate": 2.795661016568975e-05, |
| "loss": 0.3472, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.1637666325486182, |
| "grad_norm": 0.36428100096066146, |
| "learning_rate": 2.7930351739056533e-05, |
| "loss": 0.347, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.1658137154554757, |
| "grad_norm": 0.4335731332830444, |
| "learning_rate": 2.7904077082089574e-05, |
| "loss": 0.325, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.1678607983623337, |
| "grad_norm": 0.3984251614786555, |
| "learning_rate": 2.787778624856286e-05, |
| "loss": 0.3066, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.169907881269191, |
| "grad_norm": 0.42180327285845043, |
| "learning_rate": 2.7851479292283442e-05, |
| "loss": 0.3415, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.171954964176049, |
| "grad_norm": 0.41842751411141604, |
| "learning_rate": 2.782515626709139e-05, |
| "loss": 0.3498, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.1740020470829067, |
| "grad_norm": 0.3982687240967601, |
| "learning_rate": 2.7798817226859678e-05, |
| "loss": 0.3311, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.1760491299897646, |
| "grad_norm": 0.3653496091806209, |
| "learning_rate": 2.7772462225494013e-05, |
| "loss": 0.3393, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.178096212896622, |
| "grad_norm": 0.35694917670185916, |
| "learning_rate": 2.7746091316932807e-05, |
| "loss": 0.2938, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.18014329580348, |
| "grad_norm": 0.36076433028006755, |
| "learning_rate": 2.7719704555147012e-05, |
| "loss": 0.3176, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.1821903787103376, |
| "grad_norm": 0.35991273790931844, |
| "learning_rate": 2.7693301994140026e-05, |
| "loss": 0.3369, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.1842374616171956, |
| "grad_norm": 0.3605864665846731, |
| "learning_rate": 2.7666883687947588e-05, |
| "loss": 0.308, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.186284544524053, |
| "grad_norm": 0.3868682601764087, |
| "learning_rate": 2.7640449690637642e-05, |
| "loss": 0.335, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.188331627430911, |
| "grad_norm": 0.37080898315589017, |
| "learning_rate": 2.761400005631028e-05, |
| "loss": 0.3339, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.1903787103377685, |
| "grad_norm": 0.35560785487130986, |
| "learning_rate": 2.7587534839097556e-05, |
| "loss": 0.3348, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.1924257932446265, |
| "grad_norm": 0.36780049563494116, |
| "learning_rate": 2.756105409316345e-05, |
| "loss": 0.3042, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.194472876151484, |
| "grad_norm": 0.3750046262445992, |
| "learning_rate": 2.7534557872703705e-05, |
| "loss": 0.3286, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.196519959058342, |
| "grad_norm": 0.3643852622155405, |
| "learning_rate": 2.750804623194574e-05, |
| "loss": 0.3202, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.1985670419651995, |
| "grad_norm": 0.35562711733903674, |
| "learning_rate": 2.7481519225148537e-05, |
| "loss": 0.3158, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.2006141248720574, |
| "grad_norm": 0.34699724820587735, |
| "learning_rate": 2.7454976906602513e-05, |
| "loss": 0.3635, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.202661207778915, |
| "grad_norm": 0.3632869672814209, |
| "learning_rate": 2.742841933062944e-05, |
| "loss": 0.3378, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.204708290685773, |
| "grad_norm": 0.35456528308188723, |
| "learning_rate": 2.7401846551582304e-05, |
| "loss": 0.3078, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.2067553735926304, |
| "grad_norm": 0.4081395245932041, |
| "learning_rate": 2.7375258623845207e-05, |
| "loss": 0.3429, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.2088024564994884, |
| "grad_norm": 0.34135550119349445, |
| "learning_rate": 2.7348655601833255e-05, |
| "loss": 0.3338, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.210849539406346, |
| "grad_norm": 0.42118938931480804, |
| "learning_rate": 2.7322037539992457e-05, |
| "loss": 0.3327, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.212896622313204, |
| "grad_norm": 0.37577320629275823, |
| "learning_rate": 2.7295404492799575e-05, |
| "loss": 0.3002, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.2149437052200613, |
| "grad_norm": 0.3303091254072807, |
| "learning_rate": 2.726875651476207e-05, |
| "loss": 0.3106, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.2169907881269193, |
| "grad_norm": 0.35777928959386923, |
| "learning_rate": 2.7242093660417954e-05, |
| "loss": 0.364, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.219037871033777, |
| "grad_norm": 0.3963714636770776, |
| "learning_rate": 2.721541598433567e-05, |
| "loss": 0.2969, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.2210849539406348, |
| "grad_norm": 0.3409183879753277, |
| "learning_rate": 2.718872354111401e-05, |
| "loss": 0.3346, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.2231320368474923, |
| "grad_norm": 0.35338149712684697, |
| "learning_rate": 2.7162016385381975e-05, |
| "loss": 0.3648, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.2251791197543502, |
| "grad_norm": 0.3315730951687613, |
| "learning_rate": 2.7135294571798706e-05, |
| "loss": 0.3063, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.2272262026612077, |
| "grad_norm": 0.3449763315274245, |
| "learning_rate": 2.7108558155053296e-05, |
| "loss": 0.3403, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.2292732855680657, |
| "grad_norm": 0.37034261955083203, |
| "learning_rate": 2.7081807189864764e-05, |
| "loss": 0.3583, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.231320368474923, |
| "grad_norm": 0.3621281223181069, |
| "learning_rate": 2.70550417309819e-05, |
| "loss": 0.3144, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.233367451381781, |
| "grad_norm": 0.39746428647523574, |
| "learning_rate": 2.7028261833183132e-05, |
| "loss": 0.3376, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.2354145342886387, |
| "grad_norm": 0.34536466760948237, |
| "learning_rate": 2.7001467551276464e-05, |
| "loss": 0.2973, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.237461617195496, |
| "grad_norm": 0.3255654437979655, |
| "learning_rate": 2.6974658940099337e-05, |
| "loss": 0.3222, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.239508700102354, |
| "grad_norm": 0.34570256484501904, |
| "learning_rate": 2.6947836054518484e-05, |
| "loss": 0.3585, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.241555783009212, |
| "grad_norm": 0.3325137281496525, |
| "learning_rate": 2.6920998949429913e-05, |
| "loss": 0.309, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.2436028659160696, |
| "grad_norm": 0.34364314604723273, |
| "learning_rate": 2.6894147679758678e-05, |
| "loss": 0.332, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.245649948822927, |
| "grad_norm": 0.3323431139976392, |
| "learning_rate": 2.6867282300458853e-05, |
| "loss": 0.3365, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.247697031729785, |
| "grad_norm": 0.36326221718241064, |
| "learning_rate": 2.684040286651338e-05, |
| "loss": 0.3361, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.2497441146366426, |
| "grad_norm": 0.3309544898142775, |
| "learning_rate": 2.6813509432933957e-05, |
| "loss": 0.3142, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.2517911975435005, |
| "grad_norm": 0.3686682735992276, |
| "learning_rate": 2.6786602054760952e-05, |
| "loss": 0.3078, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.253838280450358, |
| "grad_norm": 0.3515712454533351, |
| "learning_rate": 2.675968078706326e-05, |
| "loss": 0.3151, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.255885363357216, |
| "grad_norm": 0.3495717010136385, |
| "learning_rate": 2.673274568493821e-05, |
| "loss": 0.3243, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.2579324462640735, |
| "grad_norm": 0.34663490722766044, |
| "learning_rate": 2.670579680351143e-05, |
| "loss": 0.3284, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.2599795291709315, |
| "grad_norm": 0.34744444318737083, |
| "learning_rate": 2.667883419793676e-05, |
| "loss": 0.313, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.262026612077789, |
| "grad_norm": 0.38465408810908674, |
| "learning_rate": 2.6651857923396132e-05, |
| "loss": 0.3759, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.264073694984647, |
| "grad_norm": 0.3414863910860875, |
| "learning_rate": 2.6624868035099445e-05, |
| "loss": 0.3336, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.2661207778915045, |
| "grad_norm": 0.33861934140219296, |
| "learning_rate": 2.659786458828446e-05, |
| "loss": 0.3587, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.2681678607983624, |
| "grad_norm": 0.32698315646381576, |
| "learning_rate": 2.6570847638216698e-05, |
| "loss": 0.3506, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.27021494370522, |
| "grad_norm": 0.3303135891963801, |
| "learning_rate": 2.65438172401893e-05, |
| "loss": 0.3451, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.272262026612078, |
| "grad_norm": 0.3769317246092125, |
| "learning_rate": 2.6516773449522936e-05, |
| "loss": 0.3257, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.2743091095189354, |
| "grad_norm": 0.3511492114236981, |
| "learning_rate": 2.648971632156569e-05, |
| "loss": 0.3147, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.2763561924257933, |
| "grad_norm": 0.3675620800860699, |
| "learning_rate": 2.6462645911692938e-05, |
| "loss": 0.2979, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.278403275332651, |
| "grad_norm": 0.3792036243843211, |
| "learning_rate": 2.643556227530724e-05, |
| "loss": 0.3065, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.280450358239509, |
| "grad_norm": 0.32407850095411667, |
| "learning_rate": 2.6408465467838225e-05, |
| "loss": 0.332, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.2824974411463663, |
| "grad_norm": 0.31664427206527734, |
| "learning_rate": 2.6381355544742482e-05, |
| "loss": 0.3963, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.2845445240532243, |
| "grad_norm": 0.34686617702001726, |
| "learning_rate": 2.6354232561503433e-05, |
| "loss": 0.3357, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.286591606960082, |
| "grad_norm": 0.33621332629180944, |
| "learning_rate": 2.632709657363124e-05, |
| "loss": 0.3089, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.2886386898669397, |
| "grad_norm": 0.33153383111460555, |
| "learning_rate": 2.6299947636662673e-05, |
| "loss": 0.3054, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.2906857727737973, |
| "grad_norm": 0.3416358925904056, |
| "learning_rate": 2.6272785806161005e-05, |
| "loss": 0.3278, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.292732855680655, |
| "grad_norm": 0.40808146160514075, |
| "learning_rate": 2.6245611137715897e-05, |
| "loss": 0.3519, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.2947799385875127, |
| "grad_norm": 0.32524026497753233, |
| "learning_rate": 2.621842368694329e-05, |
| "loss": 0.3239, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.2968270214943707, |
| "grad_norm": 0.3658777033819554, |
| "learning_rate": 2.6191223509485273e-05, |
| "loss": 0.3286, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.298874104401228, |
| "grad_norm": 0.36322923087428066, |
| "learning_rate": 2.6164010661010007e-05, |
| "loss": 0.3364, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.300921187308086, |
| "grad_norm": 0.3278681613056945, |
| "learning_rate": 2.613678519721155e-05, |
| "loss": 0.3086, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.3029682702149437, |
| "grad_norm": 0.3547085800078984, |
| "learning_rate": 2.61095471738098e-05, |
| "loss": 0.3215, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.3050153531218016, |
| "grad_norm": 0.31393691776472127, |
| "learning_rate": 2.6082296646550364e-05, |
| "loss": 0.3114, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.307062436028659, |
| "grad_norm": 0.35347094972995313, |
| "learning_rate": 2.605503367120442e-05, |
| "loss": 0.3621, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.309109518935517, |
| "grad_norm": 0.3314912682101731, |
| "learning_rate": 2.6027758303568643e-05, |
| "loss": 0.3196, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.3111566018423746, |
| "grad_norm": 0.30778697872934085, |
| "learning_rate": 2.6000470599465065e-05, |
| "loss": 0.3068, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.313203684749232, |
| "grad_norm": 0.37088270704077186, |
| "learning_rate": 2.5973170614740946e-05, |
| "loss": 0.373, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.31525076765609, |
| "grad_norm": 0.34268176472285344, |
| "learning_rate": 2.5945858405268714e-05, |
| "loss": 0.3413, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.317297850562948, |
| "grad_norm": 0.3248279403276539, |
| "learning_rate": 2.5918534026945787e-05, |
| "loss": 0.3559, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.3193449334698055, |
| "grad_norm": 0.30464581495860377, |
| "learning_rate": 2.5891197535694507e-05, |
| "loss": 0.3367, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.321392016376663, |
| "grad_norm": 0.35318148922449216, |
| "learning_rate": 2.5863848987461993e-05, |
| "loss": 0.3529, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.323439099283521, |
| "grad_norm": 0.37807352066757405, |
| "learning_rate": 2.5836488438220044e-05, |
| "loss": 0.3347, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.325486182190379, |
| "grad_norm": 0.3608071303513835, |
| "learning_rate": 2.5809115943965027e-05, |
| "loss": 0.3366, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.3275332650972365, |
| "grad_norm": 0.3499879431406946, |
| "learning_rate": 2.5781731560717745e-05, |
| "loss": 0.3106, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.329580348004094, |
| "grad_norm": 0.3591909142140177, |
| "learning_rate": 2.575433534452334e-05, |
| "loss": 0.3396, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.331627430910952, |
| "grad_norm": 0.33212351658248346, |
| "learning_rate": 2.5726927351451178e-05, |
| "loss": 0.3439, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.3336745138178094, |
| "grad_norm": 0.36284882910248367, |
| "learning_rate": 2.5699507637594706e-05, |
| "loss": 0.304, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.3357215967246674, |
| "grad_norm": 0.34302795456823765, |
| "learning_rate": 2.5672076259071385e-05, |
| "loss": 0.3276, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.337768679631525, |
| "grad_norm": 0.36338586044821625, |
| "learning_rate": 2.5644633272022536e-05, |
| "loss": 0.36, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.339815762538383, |
| "grad_norm": 0.3428090299047305, |
| "learning_rate": 2.561717873261323e-05, |
| "loss": 0.3062, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.3418628454452404, |
| "grad_norm": 0.38038284417246715, |
| "learning_rate": 2.558971269703219e-05, |
| "loss": 0.3555, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.3439099283520983, |
| "grad_norm": 0.35045801763083695, |
| "learning_rate": 2.556223522149168e-05, |
| "loss": 0.3433, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.345957011258956, |
| "grad_norm": 0.36682906880387844, |
| "learning_rate": 2.5534746362227355e-05, |
| "loss": 0.3507, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.348004094165814, |
| "grad_norm": 0.34210491558572537, |
| "learning_rate": 2.5507246175498174e-05, |
| "loss": 0.3178, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.3500511770726713, |
| "grad_norm": 0.3421821678613336, |
| "learning_rate": 2.5479734717586285e-05, |
| "loss": 0.3124, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.3520982599795293, |
| "grad_norm": 0.37370205112447163, |
| "learning_rate": 2.5452212044796912e-05, |
| "loss": 0.3179, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.3541453428863868, |
| "grad_norm": 0.3575054301368409, |
| "learning_rate": 2.5424678213458202e-05, |
| "loss": 0.2982, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.3561924257932447, |
| "grad_norm": 0.4005011767035321, |
| "learning_rate": 2.539713327992117e-05, |
| "loss": 0.329, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.3582395087001022, |
| "grad_norm": 0.3796694210246127, |
| "learning_rate": 2.5369577300559544e-05, |
| "loss": 0.3495, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.36028659160696, |
| "grad_norm": 0.38531974828237286, |
| "learning_rate": 2.5342010331769635e-05, |
| "loss": 0.3218, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.3623336745138177, |
| "grad_norm": 0.4150417947081143, |
| "learning_rate": 2.531443242997029e-05, |
| "loss": 0.3714, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.3643807574206757, |
| "grad_norm": 0.3665969184427945, |
| "learning_rate": 2.5286843651602688e-05, |
| "loss": 0.33, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.366427840327533, |
| "grad_norm": 0.3668439390982461, |
| "learning_rate": 2.5259244053130295e-05, |
| "loss": 0.3338, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.368474923234391, |
| "grad_norm": 0.4105022406361347, |
| "learning_rate": 2.5231633691038716e-05, |
| "loss": 0.3303, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.3705220061412486, |
| "grad_norm": 0.3610884307782202, |
| "learning_rate": 2.5204012621835575e-05, |
| "loss": 0.3108, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.3725690890481066, |
| "grad_norm": 0.39634461411533756, |
| "learning_rate": 2.5176380902050418e-05, |
| "loss": 0.3398, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.374616171954964, |
| "grad_norm": 0.3956297902185592, |
| "learning_rate": 2.5148738588234593e-05, |
| "loss": 0.3199, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.376663254861822, |
| "grad_norm": 0.33517128768421744, |
| "learning_rate": 2.5121085736961112e-05, |
| "loss": 0.3288, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.3787103377686796, |
| "grad_norm": 0.3543225737911076, |
| "learning_rate": 2.5093422404824574e-05, |
| "loss": 0.3483, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.3807574206755375, |
| "grad_norm": 0.35329611481723555, |
| "learning_rate": 2.506574864844102e-05, |
| "loss": 0.3439, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.382804503582395, |
| "grad_norm": 0.3415016772188138, |
| "learning_rate": 2.5038064524447827e-05, |
| "loss": 0.3461, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.384851586489253, |
| "grad_norm": 0.3618115220444992, |
| "learning_rate": 2.5010370089503578e-05, |
| "loss": 0.3243, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.3868986693961105, |
| "grad_norm": 0.36205472648695425, |
| "learning_rate": 2.4982665400287972e-05, |
| "loss": 0.3411, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.3889457523029685, |
| "grad_norm": 0.43026672506995006, |
| "learning_rate": 2.4954950513501697e-05, |
| "loss": 0.3319, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.390992835209826, |
| "grad_norm": 0.350243563017003, |
| "learning_rate": 2.4927225485866297e-05, |
| "loss": 0.3479, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.393039918116684, |
| "grad_norm": 0.3746353739097793, |
| "learning_rate": 2.4899490374124085e-05, |
| "loss": 0.3429, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.3950870010235414, |
| "grad_norm": 0.3260898872775567, |
| "learning_rate": 2.4871745235038006e-05, |
| "loss": 0.3472, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.397134083930399, |
| "grad_norm": 0.33558457363741073, |
| "learning_rate": 2.4843990125391516e-05, |
| "loss": 0.328, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.399181166837257, |
| "grad_norm": 0.38996942202586005, |
| "learning_rate": 2.4816225101988506e-05, |
| "loss": 0.3391, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.401228249744115, |
| "grad_norm": 0.3566387434855101, |
| "learning_rate": 2.478845022165313e-05, |
| "loss": 0.3667, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.4032753326509724, |
| "grad_norm": 0.3444970227104489, |
| "learning_rate": 2.4760665541229712e-05, |
| "loss": 0.3301, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.40532241555783, |
| "grad_norm": 0.3092553635632143, |
| "learning_rate": 2.473287111758267e-05, |
| "loss": 0.3401, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.407369498464688, |
| "grad_norm": 0.3394352190867357, |
| "learning_rate": 2.470506700759631e-05, |
| "loss": 0.3218, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.409416581371546, |
| "grad_norm": 0.3393116279567721, |
| "learning_rate": 2.467725326817481e-05, |
| "loss": 0.3113, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.4114636642784033, |
| "grad_norm": 0.399667522842199, |
| "learning_rate": 2.464942995624203e-05, |
| "loss": 0.3269, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.413510747185261, |
| "grad_norm": 0.3505124387280659, |
| "learning_rate": 2.462159712874142e-05, |
| "loss": 0.3411, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.4155578300921188, |
| "grad_norm": 0.3519791912196212, |
| "learning_rate": 2.4593754842635917e-05, |
| "loss": 0.3036, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.4176049129989763, |
| "grad_norm": 0.36680184492885926, |
| "learning_rate": 2.4565903154907807e-05, |
| "loss": 0.3898, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.4196519959058342, |
| "grad_norm": 0.3778349635443584, |
| "learning_rate": 2.453804212255862e-05, |
| "loss": 0.3427, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.4216990788126918, |
| "grad_norm": 0.3389030241399457, |
| "learning_rate": 2.451017180260902e-05, |
| "loss": 0.3271, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.4237461617195497, |
| "grad_norm": 0.42513595322376974, |
| "learning_rate": 2.448229225209865e-05, |
| "loss": 0.3298, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.425793244626407, |
| "grad_norm": 0.3712368776548631, |
| "learning_rate": 2.4454403528086088e-05, |
| "loss": 0.3323, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.427840327533265, |
| "grad_norm": 0.3200914239556184, |
| "learning_rate": 2.4426505687648653e-05, |
| "loss": 0.3387, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.4298874104401227, |
| "grad_norm": 0.346315254552554, |
| "learning_rate": 2.4398598787882334e-05, |
| "loss": 0.3449, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.4319344933469806, |
| "grad_norm": 0.3184464070741258, |
| "learning_rate": 2.4370682885901657e-05, |
| "loss": 0.3006, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.433981576253838, |
| "grad_norm": 0.34720894372009287, |
| "learning_rate": 2.4342758038839573e-05, |
| "loss": 0.3354, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.436028659160696, |
| "grad_norm": 0.3963098146391974, |
| "learning_rate": 2.4314824303847342e-05, |
| "loss": 0.3273, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.4380757420675536, |
| "grad_norm": 0.3419114471543989, |
| "learning_rate": 2.4286881738094418e-05, |
| "loss": 0.3334, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.4401228249744116, |
| "grad_norm": 0.3476204838650686, |
| "learning_rate": 2.4258930398768317e-05, |
| "loss": 0.3405, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.442169907881269, |
| "grad_norm": 0.3398409708251329, |
| "learning_rate": 2.423097034307452e-05, |
| "loss": 0.2964, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.444216990788127, |
| "grad_norm": 0.33597865186885006, |
| "learning_rate": 2.4203001628236346e-05, |
| "loss": 0.3122, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.4462640736949846, |
| "grad_norm": 0.7573334739644073, |
| "learning_rate": 2.4175024311494835e-05, |
| "loss": 0.37, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.4483111566018425, |
| "grad_norm": 0.3096397452376292, |
| "learning_rate": 2.4147038450108627e-05, |
| "loss": 0.3462, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.4503582395087, |
| "grad_norm": 0.32363631370662416, |
| "learning_rate": 2.4119044101353853e-05, |
| "loss": 0.3089, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.452405322415558, |
| "grad_norm": 0.44476946502851955, |
| "learning_rate": 2.4091041322524023e-05, |
| "loss": 0.3891, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.4544524053224155, |
| "grad_norm": 0.34282272355962545, |
| "learning_rate": 2.406303017092988e-05, |
| "loss": 0.3672, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.4564994882292734, |
| "grad_norm": 0.3458362218172388, |
| "learning_rate": 2.403501070389932e-05, |
| "loss": 0.3446, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.458546571136131, |
| "grad_norm": 0.330522133423883, |
| "learning_rate": 2.4006982978777263e-05, |
| "loss": 0.3547, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.460593654042989, |
| "grad_norm": 0.33086606944472513, |
| "learning_rate": 2.39789470529255e-05, |
| "loss": 0.3145, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.4626407369498464, |
| "grad_norm": 0.3611588885012983, |
| "learning_rate": 2.3950902983722645e-05, |
| "loss": 0.3512, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.4646878198567044, |
| "grad_norm": 0.35160175468657195, |
| "learning_rate": 2.392285082856394e-05, |
| "loss": 0.331, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.466734902763562, |
| "grad_norm": 0.3329347867861998, |
| "learning_rate": 2.389479064486121e-05, |
| "loss": 0.3697, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.46878198567042, |
| "grad_norm": 0.35641819087178006, |
| "learning_rate": 2.3866722490042685e-05, |
| "loss": 0.3606, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.4708290685772774, |
| "grad_norm": 0.3604571656733162, |
| "learning_rate": 2.3838646421552917e-05, |
| "loss": 0.3377, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.472876151484135, |
| "grad_norm": 0.3352126815168747, |
| "learning_rate": 2.3810562496852666e-05, |
| "loss": 0.3262, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.474923234390993, |
| "grad_norm": 0.35666331619036534, |
| "learning_rate": 2.3782470773418756e-05, |
| "loss": 0.339, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.4769703172978508, |
| "grad_norm": 0.39672935648110513, |
| "learning_rate": 2.3754371308743975e-05, |
| "loss": 0.33, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.4790174002047083, |
| "grad_norm": 0.3591230233614195, |
| "learning_rate": 2.372626416033696e-05, |
| "loss": 0.3209, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.481064483111566, |
| "grad_norm": 0.34331298511904995, |
| "learning_rate": 2.3698149385722067e-05, |
| "loss": 0.3376, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.4831115660184238, |
| "grad_norm": 0.3934047338427704, |
| "learning_rate": 2.367002704243927e-05, |
| "loss": 0.3198, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.4851586489252817, |
| "grad_norm": 0.34587155423784893, |
| "learning_rate": 2.3641897188044018e-05, |
| "loss": 0.3442, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.487205731832139, |
| "grad_norm": 0.3681467355523078, |
| "learning_rate": 2.3613759880107133e-05, |
| "loss": 0.343, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.4892528147389967, |
| "grad_norm": 0.3249289712237619, |
| "learning_rate": 2.3585615176214716e-05, |
| "loss": 0.3066, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.4912998976458547, |
| "grad_norm": 0.37477832639872893, |
| "learning_rate": 2.3557463133967976e-05, |
| "loss": 0.3325, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.493346980552712, |
| "grad_norm": 0.36178361521501945, |
| "learning_rate": 2.3529303810983154e-05, |
| "loss": 0.3127, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.49539406345957, |
| "grad_norm": 0.32831527326831483, |
| "learning_rate": 2.3501137264891396e-05, |
| "loss": 0.3248, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.4974411463664277, |
| "grad_norm": 0.4027644606883135, |
| "learning_rate": 2.3472963553338614e-05, |
| "loss": 0.3023, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.4994882292732856, |
| "grad_norm": 0.3439537192088776, |
| "learning_rate": 2.3444782733985396e-05, |
| "loss": 0.3252, |
| "step": 1221 |
| }, |
| { |
| "epoch": 2.501535312180143, |
| "grad_norm": 0.32953719599686015, |
| "learning_rate": 2.3416594864506887e-05, |
| "loss": 0.3043, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.503582395087001, |
| "grad_norm": 0.3490669906957581, |
| "learning_rate": 2.338840000259264e-05, |
| "loss": 0.3133, |
| "step": 1223 |
| }, |
| { |
| "epoch": 2.5056294779938586, |
| "grad_norm": 0.36299216934576684, |
| "learning_rate": 2.3360198205946542e-05, |
| "loss": 0.3141, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.5076765609007166, |
| "grad_norm": 0.3180197591727672, |
| "learning_rate": 2.333198953228664e-05, |
| "loss": 0.3535, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.509723643807574, |
| "grad_norm": 0.5171123946314492, |
| "learning_rate": 2.3303774039345098e-05, |
| "loss": 0.3367, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.511770726714432, |
| "grad_norm": 0.3840513193856834, |
| "learning_rate": 2.3275551784867997e-05, |
| "loss": 0.3188, |
| "step": 1227 |
| }, |
| { |
| "epoch": 2.5138178096212895, |
| "grad_norm": 0.3346463848714001, |
| "learning_rate": 2.3247322826615276e-05, |
| "loss": 0.3596, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.5158648925281475, |
| "grad_norm": 0.3505848734275376, |
| "learning_rate": 2.3219087222360603e-05, |
| "loss": 0.3395, |
| "step": 1229 |
| }, |
| { |
| "epoch": 2.517911975435005, |
| "grad_norm": 0.3415846729855278, |
| "learning_rate": 2.3190845029891218e-05, |
| "loss": 0.3326, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.519959058341863, |
| "grad_norm": 0.3273579902423157, |
| "learning_rate": 2.316259630700787e-05, |
| "loss": 0.3344, |
| "step": 1231 |
| }, |
| { |
| "epoch": 2.5220061412487205, |
| "grad_norm": 0.3247531510829717, |
| "learning_rate": 2.313434111152467e-05, |
| "loss": 0.3346, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.5240532241555784, |
| "grad_norm": 0.3503893886731608, |
| "learning_rate": 2.310607950126896e-05, |
| "loss": 0.3448, |
| "step": 1233 |
| }, |
| { |
| "epoch": 2.526100307062436, |
| "grad_norm": 0.35713820818797615, |
| "learning_rate": 2.307781153408124e-05, |
| "loss": 0.3359, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.528147389969294, |
| "grad_norm": 0.34251280908459203, |
| "learning_rate": 2.3049537267814984e-05, |
| "loss": 0.3431, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.5301944728761514, |
| "grad_norm": 0.34661535982225733, |
| "learning_rate": 2.3021256760336583e-05, |
| "loss": 0.3604, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.5322415557830094, |
| "grad_norm": 0.3630834914867728, |
| "learning_rate": 2.2992970069525202e-05, |
| "loss": 0.3472, |
| "step": 1237 |
| }, |
| { |
| "epoch": 2.534288638689867, |
| "grad_norm": 0.318836844861738, |
| "learning_rate": 2.296467725327264e-05, |
| "loss": 0.3174, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.536335721596725, |
| "grad_norm": 0.3365825133835808, |
| "learning_rate": 2.293637836948325e-05, |
| "loss": 0.3093, |
| "step": 1239 |
| }, |
| { |
| "epoch": 2.5383828045035823, |
| "grad_norm": 0.35403182852053533, |
| "learning_rate": 2.29080734760738e-05, |
| "loss": 0.3382, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.54042988741044, |
| "grad_norm": 0.3562520797769209, |
| "learning_rate": 2.2879762630973355e-05, |
| "loss": 0.3315, |
| "step": 1241 |
| }, |
| { |
| "epoch": 2.542476970317298, |
| "grad_norm": 0.3188066365019869, |
| "learning_rate": 2.285144589212316e-05, |
| "loss": 0.3079, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.5445240532241558, |
| "grad_norm": 0.3556195009937043, |
| "learning_rate": 2.2823123317476522e-05, |
| "loss": 0.3422, |
| "step": 1243 |
| }, |
| { |
| "epoch": 2.5465711361310133, |
| "grad_norm": 0.3474070822067162, |
| "learning_rate": 2.2794794964998705e-05, |
| "loss": 0.3049, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.548618219037871, |
| "grad_norm": 0.3464039443053579, |
| "learning_rate": 2.276646089266677e-05, |
| "loss": 0.2992, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.5506653019447287, |
| "grad_norm": 0.3767709540930306, |
| "learning_rate": 2.273812115846951e-05, |
| "loss": 0.3726, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.5527123848515867, |
| "grad_norm": 0.3206160362666913, |
| "learning_rate": 2.2709775820407292e-05, |
| "loss": 0.2909, |
| "step": 1247 |
| }, |
| { |
| "epoch": 2.554759467758444, |
| "grad_norm": 0.3673509781890066, |
| "learning_rate": 2.2681424936491954e-05, |
| "loss": 0.3669, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.5568065506653017, |
| "grad_norm": 0.3401943174394405, |
| "learning_rate": 2.2653068564746692e-05, |
| "loss": 0.3403, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.5588536335721597, |
| "grad_norm": 0.3490447254588359, |
| "learning_rate": 2.2624706763205935e-05, |
| "loss": 0.3603, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.5609007164790176, |
| "grad_norm": 0.3482076297056933, |
| "learning_rate": 2.2596339589915197e-05, |
| "loss": 0.3554, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.562947799385875, |
| "grad_norm": 0.3115817757213185, |
| "learning_rate": 2.2567967102931025e-05, |
| "loss": 0.3136, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.5649948822927326, |
| "grad_norm": 0.32599919557680007, |
| "learning_rate": 2.2539589360320802e-05, |
| "loss": 0.3256, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.5670419651995906, |
| "grad_norm": 0.3039164892771023, |
| "learning_rate": 2.2511206420162716e-05, |
| "loss": 0.3414, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.5690890481064486, |
| "grad_norm": 0.31157751749513196, |
| "learning_rate": 2.2482818340545534e-05, |
| "loss": 0.3102, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.571136131013306, |
| "grad_norm": 0.33852273483094864, |
| "learning_rate": 2.2454425179568594e-05, |
| "loss": 0.3434, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.5731832139201636, |
| "grad_norm": 0.30672315678878886, |
| "learning_rate": 2.2426026995341602e-05, |
| "loss": 0.318, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.5752302968270215, |
| "grad_norm": 0.31206788352544473, |
| "learning_rate": 2.2397623845984548e-05, |
| "loss": 0.3749, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.5772773797338795, |
| "grad_norm": 0.30841479866018223, |
| "learning_rate": 2.2369215789627593e-05, |
| "loss": 0.298, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.579324462640737, |
| "grad_norm": 0.32527208718455825, |
| "learning_rate": 2.234080288441095e-05, |
| "loss": 0.3419, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.5813715455475945, |
| "grad_norm": 0.3124189339174951, |
| "learning_rate": 2.2312385188484718e-05, |
| "loss": 0.3501, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.5834186284544525, |
| "grad_norm": 0.3352181407147001, |
| "learning_rate": 2.2283962760008845e-05, |
| "loss": 0.339, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.58546571136131, |
| "grad_norm": 0.3548208328356842, |
| "learning_rate": 2.225553565715294e-05, |
| "loss": 0.3334, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.587512794268168, |
| "grad_norm": 0.32210075431253954, |
| "learning_rate": 2.2227103938096176e-05, |
| "loss": 0.3139, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.5895598771750254, |
| "grad_norm": 0.3342742670935017, |
| "learning_rate": 2.2198667661027193e-05, |
| "loss": 0.3232, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.5916069600818834, |
| "grad_norm": 0.3332141930024874, |
| "learning_rate": 2.2170226884143942e-05, |
| "loss": 0.3089, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.593654042988741, |
| "grad_norm": 0.3259468299127148, |
| "learning_rate": 2.2141781665653584e-05, |
| "loss": 0.3069, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.595701125895599, |
| "grad_norm": 0.3756435234426335, |
| "learning_rate": 2.2113332063772387e-05, |
| "loss": 0.3343, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.5977482088024564, |
| "grad_norm": 0.38118415339444334, |
| "learning_rate": 2.208487813672557e-05, |
| "loss": 0.3246, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.5997952917093143, |
| "grad_norm": 0.33475197046166133, |
| "learning_rate": 2.205641994274721e-05, |
| "loss": 0.3543, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.601842374616172, |
| "grad_norm": 0.3609114565434103, |
| "learning_rate": 2.2027957540080125e-05, |
| "loss": 0.3412, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.60388945752303, |
| "grad_norm": 0.35268666756991185, |
| "learning_rate": 2.199949098697574e-05, |
| "loss": 0.321, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.6059365404298873, |
| "grad_norm": 0.3218634099552252, |
| "learning_rate": 2.1971020341693973e-05, |
| "loss": 0.321, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.6079836233367453, |
| "grad_norm": 0.3886771490605891, |
| "learning_rate": 2.1942545662503115e-05, |
| "loss": 0.3366, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.610030706243603, |
| "grad_norm": 0.3542254238953694, |
| "learning_rate": 2.1914067007679733e-05, |
| "loss": 0.346, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.6120777891504607, |
| "grad_norm": 0.33442059420132036, |
| "learning_rate": 2.188558443550849e-05, |
| "loss": 0.3471, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.6141248720573182, |
| "grad_norm": 0.31543081486502833, |
| "learning_rate": 2.185709800428211e-05, |
| "loss": 0.3523, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.616171954964176, |
| "grad_norm": 0.32475295410492505, |
| "learning_rate": 2.1828607772301187e-05, |
| "loss": 0.3456, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.6182190378710337, |
| "grad_norm": 0.35139374027023634, |
| "learning_rate": 2.180011379787411e-05, |
| "loss": 0.3309, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.6202661207778917, |
| "grad_norm": 0.3057519944170325, |
| "learning_rate": 2.1771616139316903e-05, |
| "loss": 0.3351, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.622313203684749, |
| "grad_norm": 0.363923325870162, |
| "learning_rate": 2.174311485495317e-05, |
| "loss": 0.3046, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.6243602865916067, |
| "grad_norm": 0.3291114320223632, |
| "learning_rate": 2.1714610003113887e-05, |
| "loss": 0.303, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.6264073694984647, |
| "grad_norm": 0.3289632021661567, |
| "learning_rate": 2.168610164213738e-05, |
| "loss": 0.3213, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.6284544524053226, |
| "grad_norm": 0.33306777417969263, |
| "learning_rate": 2.1657589830369113e-05, |
| "loss": 0.351, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.63050153531218, |
| "grad_norm": 0.3612382108372884, |
| "learning_rate": 2.1629074626161647e-05, |
| "loss": 0.3868, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.6325486182190376, |
| "grad_norm": 0.3580608267992191, |
| "learning_rate": 2.1600556087874472e-05, |
| "loss": 0.3175, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.6345957011258956, |
| "grad_norm": 0.31536303959925943, |
| "learning_rate": 2.1572034273873893e-05, |
| "loss": 0.3262, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.6366427840327535, |
| "grad_norm": 0.3578677663211092, |
| "learning_rate": 2.1543509242532932e-05, |
| "loss": 0.3716, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.638689866939611, |
| "grad_norm": 0.31607653936815944, |
| "learning_rate": 2.1514981052231187e-05, |
| "loss": 0.3166, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.6407369498464686, |
| "grad_norm": 0.35206333188454375, |
| "learning_rate": 2.1486449761354727e-05, |
| "loss": 0.3315, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.6427840327533265, |
| "grad_norm": 0.34365776192029646, |
| "learning_rate": 2.145791542829597e-05, |
| "loss": 0.3225, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.6448311156601845, |
| "grad_norm": 0.40791136566579844, |
| "learning_rate": 2.142937811145354e-05, |
| "loss": 0.3839, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.646878198567042, |
| "grad_norm": 0.3334669459335626, |
| "learning_rate": 2.140083786923221e-05, |
| "loss": 0.3277, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.6489252814738995, |
| "grad_norm": 0.33721029481105136, |
| "learning_rate": 2.1372294760042686e-05, |
| "loss": 0.3396, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.6509723643807575, |
| "grad_norm": 0.33022887238565724, |
| "learning_rate": 2.1343748842301575e-05, |
| "loss": 0.3199, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.6530194472876154, |
| "grad_norm": 0.3578342406147216, |
| "learning_rate": 2.1315200174431235e-05, |
| "loss": 0.3264, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.655066530194473, |
| "grad_norm": 0.2843605578164525, |
| "learning_rate": 2.1286648814859636e-05, |
| "loss": 0.3196, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.6571136131013304, |
| "grad_norm": 0.3320141991189249, |
| "learning_rate": 2.1258094822020263e-05, |
| "loss": 0.3132, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.6591606960081884, |
| "grad_norm": 0.37866233560469814, |
| "learning_rate": 2.1229538254351995e-05, |
| "loss": 0.3238, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.661207778915046, |
| "grad_norm": 0.327131535806631, |
| "learning_rate": 2.120097917029897e-05, |
| "loss": 0.3843, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.663254861821904, |
| "grad_norm": 0.3036315122393342, |
| "learning_rate": 2.1172417628310487e-05, |
| "loss": 0.3292, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.6653019447287614, |
| "grad_norm": 0.3515024908888374, |
| "learning_rate": 2.1143853686840874e-05, |
| "loss": 0.3102, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.6673490276356193, |
| "grad_norm": 0.34581222342499085, |
| "learning_rate": 2.1115287404349357e-05, |
| "loss": 0.3156, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.669396110542477, |
| "grad_norm": 0.35858989267221897, |
| "learning_rate": 2.1086718839299972e-05, |
| "loss": 0.3461, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.671443193449335, |
| "grad_norm": 0.3475194620828264, |
| "learning_rate": 2.1058148050161412e-05, |
| "loss": 0.3357, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.6734902763561923, |
| "grad_norm": 0.3585594367622285, |
| "learning_rate": 2.1029575095406933e-05, |
| "loss": 0.3454, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.6755373592630503, |
| "grad_norm": 0.3384161065196781, |
| "learning_rate": 2.1001000033514215e-05, |
| "loss": 0.3403, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.6775844421699078, |
| "grad_norm": 0.33004176854360945, |
| "learning_rate": 2.097242292296525e-05, |
| "loss": 0.3643, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.6796315250767657, |
| "grad_norm": 0.33026393179643815, |
| "learning_rate": 2.0943843822246234e-05, |
| "loss": 0.3224, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.6816786079836232, |
| "grad_norm": 0.34661171805934476, |
| "learning_rate": 2.0915262789847414e-05, |
| "loss": 0.3368, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.683725690890481, |
| "grad_norm": 0.3157977388993276, |
| "learning_rate": 2.088667988426302e-05, |
| "loss": 0.311, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.6857727737973387, |
| "grad_norm": 0.32541710739849666, |
| "learning_rate": 2.0858095163991094e-05, |
| "loss": 0.3145, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.6878198567041967, |
| "grad_norm": 0.3317095591959358, |
| "learning_rate": 2.0829508687533387e-05, |
| "loss": 0.343, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.689866939611054, |
| "grad_norm": 0.33508091050613437, |
| "learning_rate": 2.0800920513395276e-05, |
| "loss": 0.3102, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.691914022517912, |
| "grad_norm": 0.3092939306017529, |
| "learning_rate": 2.077233070008557e-05, |
| "loss": 0.3628, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.6939611054247696, |
| "grad_norm": 0.3644518282154973, |
| "learning_rate": 2.074373930611647e-05, |
| "loss": 0.3611, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.6960081883316276, |
| "grad_norm": 0.3268481968948613, |
| "learning_rate": 2.0715146390003395e-05, |
| "loss": 0.4382, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.698055271238485, |
| "grad_norm": 0.32366874488936354, |
| "learning_rate": 2.0686552010264872e-05, |
| "loss": 0.3316, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.7001023541453426, |
| "grad_norm": 0.3655397054326707, |
| "learning_rate": 2.0657956225422438e-05, |
| "loss": 0.3241, |
| "step": 1319 |
| }, |
| { |
| "epoch": 2.7021494370522006, |
| "grad_norm": 0.3511986539192374, |
| "learning_rate": 2.0629359094000502e-05, |
| "loss": 0.3634, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.7041965199590585, |
| "grad_norm": 0.33590806443682913, |
| "learning_rate": 2.060076067452622e-05, |
| "loss": 0.3434, |
| "step": 1321 |
| }, |
| { |
| "epoch": 2.706243602865916, |
| "grad_norm": 0.34940605685458204, |
| "learning_rate": 2.0572161025529396e-05, |
| "loss": 0.3592, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.7082906857727735, |
| "grad_norm": 0.33025728634487234, |
| "learning_rate": 2.0543560205542338e-05, |
| "loss": 0.3273, |
| "step": 1323 |
| }, |
| { |
| "epoch": 2.7103377686796315, |
| "grad_norm": 0.3111565817907451, |
| "learning_rate": 2.0514958273099778e-05, |
| "loss": 0.3528, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.7123848515864895, |
| "grad_norm": 0.34384380691800237, |
| "learning_rate": 2.0486355286738675e-05, |
| "loss": 0.3279, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.714431934493347, |
| "grad_norm": 0.34483761426361903, |
| "learning_rate": 2.0457751304998196e-05, |
| "loss": 0.3154, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.7164790174002045, |
| "grad_norm": 0.34739319472868174, |
| "learning_rate": 2.042914638641952e-05, |
| "loss": 0.3122, |
| "step": 1327 |
| }, |
| { |
| "epoch": 2.7185261003070624, |
| "grad_norm": 0.34204787000879766, |
| "learning_rate": 2.0400540589545738e-05, |
| "loss": 0.2987, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.7205731832139204, |
| "grad_norm": 0.31149494456320415, |
| "learning_rate": 2.0371933972921756e-05, |
| "loss": 0.3651, |
| "step": 1329 |
| }, |
| { |
| "epoch": 2.722620266120778, |
| "grad_norm": 0.3206059172994117, |
| "learning_rate": 2.0343326595094154e-05, |
| "loss": 0.3056, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.7246673490276354, |
| "grad_norm": 0.3460394891552501, |
| "learning_rate": 2.031471851461105e-05, |
| "loss": 0.3078, |
| "step": 1331 |
| }, |
| { |
| "epoch": 2.7267144319344934, |
| "grad_norm": 0.31434160088392427, |
| "learning_rate": 2.0286109790022023e-05, |
| "loss": 0.3019, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.7287615148413513, |
| "grad_norm": 0.30988899965990013, |
| "learning_rate": 2.0257500479877965e-05, |
| "loss": 0.3606, |
| "step": 1333 |
| }, |
| { |
| "epoch": 2.730808597748209, |
| "grad_norm": 0.3038902310146715, |
| "learning_rate": 2.0228890642730967e-05, |
| "loss": 0.3188, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.7328556806550663, |
| "grad_norm": 0.33615481049383383, |
| "learning_rate": 2.020028033713418e-05, |
| "loss": 0.3233, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.7349027635619243, |
| "grad_norm": 0.3416566631514737, |
| "learning_rate": 2.0171669621641743e-05, |
| "loss": 0.3563, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.7369498464687823, |
| "grad_norm": 0.33625693354138464, |
| "learning_rate": 2.0143058554808622e-05, |
| "loss": 0.3107, |
| "step": 1337 |
| }, |
| { |
| "epoch": 2.7389969293756398, |
| "grad_norm": 0.32608262090854195, |
| "learning_rate": 2.0114447195190486e-05, |
| "loss": 0.3445, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.7410440122824973, |
| "grad_norm": 0.3157455564515132, |
| "learning_rate": 2.0085835601343627e-05, |
| "loss": 0.3426, |
| "step": 1339 |
| }, |
| { |
| "epoch": 2.7430910951893552, |
| "grad_norm": 0.3108979356348658, |
| "learning_rate": 2.005722383182481e-05, |
| "loss": 0.3216, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.7451381780962127, |
| "grad_norm": 0.31525583618025826, |
| "learning_rate": 2.002861194519114e-05, |
| "loss": 0.3888, |
| "step": 1341 |
| }, |
| { |
| "epoch": 2.7471852610030707, |
| "grad_norm": 0.35286260637825495, |
| "learning_rate": 2e-05, |
| "loss": 0.3448, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.749232343909928, |
| "grad_norm": 0.33612012963227933, |
| "learning_rate": 1.9971388054808863e-05, |
| "loss": 0.3303, |
| "step": 1343 |
| }, |
| { |
| "epoch": 2.751279426816786, |
| "grad_norm": 0.3293365486688113, |
| "learning_rate": 1.99427761681752e-05, |
| "loss": 0.3378, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.7533265097236437, |
| "grad_norm": 0.3702026149273537, |
| "learning_rate": 1.9914164398656383e-05, |
| "loss": 0.3204, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.7553735926305016, |
| "grad_norm": 0.3008607535549162, |
| "learning_rate": 1.988555280480952e-05, |
| "loss": 0.3245, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.757420675537359, |
| "grad_norm": 0.3346226482861992, |
| "learning_rate": 1.9856941445191388e-05, |
| "loss": 0.2973, |
| "step": 1347 |
| }, |
| { |
| "epoch": 2.759467758444217, |
| "grad_norm": 0.333107377703825, |
| "learning_rate": 1.9828330378358264e-05, |
| "loss": 0.3462, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.7615148413510746, |
| "grad_norm": 0.33602464320692405, |
| "learning_rate": 1.9799719662865828e-05, |
| "loss": 0.3348, |
| "step": 1349 |
| }, |
| { |
| "epoch": 2.7635619242579326, |
| "grad_norm": 0.3460744761311549, |
| "learning_rate": 1.9771109357269047e-05, |
| "loss": 0.3041, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.76560900716479, |
| "grad_norm": 0.3398214327127035, |
| "learning_rate": 1.974249952012204e-05, |
| "loss": 0.3442, |
| "step": 1351 |
| }, |
| { |
| "epoch": 2.767656090071648, |
| "grad_norm": 0.32323094867372437, |
| "learning_rate": 1.9713890209977977e-05, |
| "loss": 0.3133, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.7697031729785055, |
| "grad_norm": 0.3439688117561741, |
| "learning_rate": 1.9685281485388955e-05, |
| "loss": 0.3091, |
| "step": 1353 |
| }, |
| { |
| "epoch": 2.7717502558853635, |
| "grad_norm": 0.29746767666002316, |
| "learning_rate": 1.9656673404905852e-05, |
| "loss": 0.2957, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.773797338792221, |
| "grad_norm": 0.32765073618687846, |
| "learning_rate": 1.9628066027078247e-05, |
| "loss": 0.3413, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.775844421699079, |
| "grad_norm": 0.3473220628635939, |
| "learning_rate": 1.9599459410454266e-05, |
| "loss": 0.2997, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.7778915046059365, |
| "grad_norm": 0.34784120186994494, |
| "learning_rate": 1.957085361358049e-05, |
| "loss": 0.3397, |
| "step": 1357 |
| }, |
| { |
| "epoch": 2.7799385875127944, |
| "grad_norm": 0.32647960906928786, |
| "learning_rate": 1.9542248695001808e-05, |
| "loss": 0.3269, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.781985670419652, |
| "grad_norm": 0.3386091012629272, |
| "learning_rate": 1.9513644713261328e-05, |
| "loss": 0.3398, |
| "step": 1359 |
| }, |
| { |
| "epoch": 2.7840327533265095, |
| "grad_norm": 0.31112512902574707, |
| "learning_rate": 1.9485041726900232e-05, |
| "loss": 0.3247, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.7860798362333674, |
| "grad_norm": 0.3267971495507797, |
| "learning_rate": 1.9456439794457665e-05, |
| "loss": 0.3699, |
| "step": 1361 |
| }, |
| { |
| "epoch": 2.7881269191402254, |
| "grad_norm": 0.34461638842492454, |
| "learning_rate": 1.942783897447061e-05, |
| "loss": 0.3315, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.790174002047083, |
| "grad_norm": 0.30853412340590924, |
| "learning_rate": 1.939923932547379e-05, |
| "loss": 0.3325, |
| "step": 1363 |
| }, |
| { |
| "epoch": 2.7922210849539404, |
| "grad_norm": 0.32960376681405235, |
| "learning_rate": 1.93706409059995e-05, |
| "loss": 0.3048, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.7942681678607983, |
| "grad_norm": 0.3216608733849923, |
| "learning_rate": 1.9342043774577562e-05, |
| "loss": 0.3478, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.7963152507676563, |
| "grad_norm": 0.369356903841249, |
| "learning_rate": 1.931344798973513e-05, |
| "loss": 0.3428, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.798362333674514, |
| "grad_norm": 0.3021279849899095, |
| "learning_rate": 1.928485360999661e-05, |
| "loss": 0.3641, |
| "step": 1367 |
| }, |
| { |
| "epoch": 2.8004094165813713, |
| "grad_norm": 0.3573666945556479, |
| "learning_rate": 1.9256260693883534e-05, |
| "loss": 0.2993, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.8024564994882293, |
| "grad_norm": 0.31061810018877584, |
| "learning_rate": 1.922766929991443e-05, |
| "loss": 0.3298, |
| "step": 1369 |
| }, |
| { |
| "epoch": 2.8045035823950872, |
| "grad_norm": 0.32086151227350046, |
| "learning_rate": 1.9199079486604727e-05, |
| "loss": 0.293, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.8065506653019447, |
| "grad_norm": 0.3339285911403012, |
| "learning_rate": 1.9170491312466616e-05, |
| "loss": 0.3239, |
| "step": 1371 |
| }, |
| { |
| "epoch": 2.8085977482088023, |
| "grad_norm": 0.3273867205248011, |
| "learning_rate": 1.914190483600891e-05, |
| "loss": 0.3502, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.81064483111566, |
| "grad_norm": 0.3707945367816578, |
| "learning_rate": 1.9113320115736986e-05, |
| "loss": 0.3357, |
| "step": 1373 |
| }, |
| { |
| "epoch": 2.812691914022518, |
| "grad_norm": 0.34848249764459066, |
| "learning_rate": 1.9084737210152593e-05, |
| "loss": 0.3185, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.8147389969293757, |
| "grad_norm": 0.32760861423269866, |
| "learning_rate": 1.9056156177753776e-05, |
| "loss": 0.3228, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.816786079836233, |
| "grad_norm": 0.3391917065894771, |
| "learning_rate": 1.902757707703475e-05, |
| "loss": 0.3475, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.818833162743091, |
| "grad_norm": 0.3257314300037398, |
| "learning_rate": 1.899899996648579e-05, |
| "loss": 0.3325, |
| "step": 1377 |
| }, |
| { |
| "epoch": 2.8208802456499487, |
| "grad_norm": 0.3453324225261829, |
| "learning_rate": 1.897042490459307e-05, |
| "loss": 0.3301, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.8229273285568066, |
| "grad_norm": 0.35448623951366837, |
| "learning_rate": 1.8941851949838595e-05, |
| "loss": 0.3261, |
| "step": 1379 |
| }, |
| { |
| "epoch": 2.824974411463664, |
| "grad_norm": 0.322963697569429, |
| "learning_rate": 1.8913281160700038e-05, |
| "loss": 0.3602, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.827021494370522, |
| "grad_norm": 0.36959087755198944, |
| "learning_rate": 1.8884712595650653e-05, |
| "loss": 0.3173, |
| "step": 1381 |
| }, |
| { |
| "epoch": 2.8290685772773796, |
| "grad_norm": 0.3404440796977021, |
| "learning_rate": 1.885614631315914e-05, |
| "loss": 0.3549, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.8311156601842375, |
| "grad_norm": 0.31042921976093146, |
| "learning_rate": 1.8827582371689516e-05, |
| "loss": 0.3202, |
| "step": 1383 |
| }, |
| { |
| "epoch": 2.833162743091095, |
| "grad_norm": 0.3733330139685621, |
| "learning_rate": 1.8799020829701036e-05, |
| "loss": 0.3704, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.835209825997953, |
| "grad_norm": 0.35311166747172257, |
| "learning_rate": 1.8770461745648012e-05, |
| "loss": 0.3159, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.8372569089048105, |
| "grad_norm": 0.30009262162818595, |
| "learning_rate": 1.8741905177979743e-05, |
| "loss": 0.347, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.8393039918116685, |
| "grad_norm": 0.3431225449790101, |
| "learning_rate": 1.871335118514037e-05, |
| "loss": 0.3259, |
| "step": 1387 |
| }, |
| { |
| "epoch": 2.841351074718526, |
| "grad_norm": 0.3379879693204325, |
| "learning_rate": 1.8684799825568775e-05, |
| "loss": 0.3354, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.843398157625384, |
| "grad_norm": 0.32284443139667096, |
| "learning_rate": 1.8656251157698425e-05, |
| "loss": 0.3332, |
| "step": 1389 |
| }, |
| { |
| "epoch": 2.8454452405322415, |
| "grad_norm": 0.3366299858528211, |
| "learning_rate": 1.862770523995732e-05, |
| "loss": 0.3379, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.8474923234390994, |
| "grad_norm": 0.3306626186125099, |
| "learning_rate": 1.85991621307678e-05, |
| "loss": 0.3304, |
| "step": 1391 |
| }, |
| { |
| "epoch": 2.849539406345957, |
| "grad_norm": 0.3154237186664138, |
| "learning_rate": 1.8570621888546464e-05, |
| "loss": 0.3162, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.851586489252815, |
| "grad_norm": 0.3070396971440444, |
| "learning_rate": 1.854208457170404e-05, |
| "loss": 0.3341, |
| "step": 1393 |
| }, |
| { |
| "epoch": 2.8536335721596724, |
| "grad_norm": 0.31391792776294475, |
| "learning_rate": 1.8513550238645283e-05, |
| "loss": 0.3385, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.8556806550665303, |
| "grad_norm": 0.3604543553956966, |
| "learning_rate": 1.8485018947768817e-05, |
| "loss": 0.3242, |
| "step": 1395 |
| }, |
| { |
| "epoch": 2.857727737973388, |
| "grad_norm": 0.33119690178793554, |
| "learning_rate": 1.8456490757467075e-05, |
| "loss": 0.3172, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.859774820880246, |
| "grad_norm": 0.3634627791673887, |
| "learning_rate": 1.8427965726126114e-05, |
| "loss": 0.3256, |
| "step": 1397 |
| }, |
| { |
| "epoch": 2.8618219037871033, |
| "grad_norm": 0.30347193226396807, |
| "learning_rate": 1.839944391212553e-05, |
| "loss": 0.3375, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.8638689866939613, |
| "grad_norm": 0.3273120420881456, |
| "learning_rate": 1.8370925373838356e-05, |
| "loss": 0.3388, |
| "step": 1399 |
| }, |
| { |
| "epoch": 2.865916069600819, |
| "grad_norm": 0.31525363609472923, |
| "learning_rate": 1.834241016963089e-05, |
| "loss": 0.361, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.8679631525076763, |
| "grad_norm": 0.32941174535440465, |
| "learning_rate": 1.8313898357862623e-05, |
| "loss": 0.3292, |
| "step": 1401 |
| }, |
| { |
| "epoch": 2.8700102354145343, |
| "grad_norm": 0.33198094596750977, |
| "learning_rate": 1.8285389996886113e-05, |
| "loss": 0.3239, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.872057318321392, |
| "grad_norm": 0.31365437902540855, |
| "learning_rate": 1.8256885145046837e-05, |
| "loss": 0.3442, |
| "step": 1403 |
| }, |
| { |
| "epoch": 2.8741044012282497, |
| "grad_norm": 0.30554897801238856, |
| "learning_rate": 1.82283838606831e-05, |
| "loss": 0.3387, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.8761514841351072, |
| "grad_norm": 0.32230864173441504, |
| "learning_rate": 1.8199886202125897e-05, |
| "loss": 0.364, |
| "step": 1405 |
| }, |
| { |
| "epoch": 2.878198567041965, |
| "grad_norm": 0.3283203936597496, |
| "learning_rate": 1.817139222769882e-05, |
| "loss": 0.3135, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.880245649948823, |
| "grad_norm": 0.3352622595322451, |
| "learning_rate": 1.8142901995717894e-05, |
| "loss": 0.3293, |
| "step": 1407 |
| }, |
| { |
| "epoch": 2.8822927328556807, |
| "grad_norm": 0.3199512613271915, |
| "learning_rate": 1.8114415564491513e-05, |
| "loss": 0.3382, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.884339815762538, |
| "grad_norm": 0.3243396061475919, |
| "learning_rate": 1.8085932992320273e-05, |
| "loss": 0.3346, |
| "step": 1409 |
| }, |
| { |
| "epoch": 2.886386898669396, |
| "grad_norm": 0.3177758192621551, |
| "learning_rate": 1.805745433749689e-05, |
| "loss": 0.3193, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.888433981576254, |
| "grad_norm": 0.36128950608945015, |
| "learning_rate": 1.8028979658306033e-05, |
| "loss": 0.3352, |
| "step": 1411 |
| }, |
| { |
| "epoch": 2.8904810644831116, |
| "grad_norm": 0.34067398643062763, |
| "learning_rate": 1.8000509013024266e-05, |
| "loss": 0.3704, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.892528147389969, |
| "grad_norm": 0.36758322105150537, |
| "learning_rate": 1.7972042459919878e-05, |
| "loss": 0.3548, |
| "step": 1413 |
| }, |
| { |
| "epoch": 2.894575230296827, |
| "grad_norm": 0.3446360571011566, |
| "learning_rate": 1.794358005725279e-05, |
| "loss": 0.3379, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.896622313203685, |
| "grad_norm": 0.33967697044665596, |
| "learning_rate": 1.791512186327444e-05, |
| "loss": 0.3685, |
| "step": 1415 |
| }, |
| { |
| "epoch": 2.8986693961105425, |
| "grad_norm": 0.32938434012320356, |
| "learning_rate": 1.7886667936227616e-05, |
| "loss": 0.3224, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.9007164790174, |
| "grad_norm": 0.3492413938668141, |
| "learning_rate": 1.785821833434642e-05, |
| "loss": 0.3234, |
| "step": 1417 |
| }, |
| { |
| "epoch": 2.902763561924258, |
| "grad_norm": 0.3556331967459014, |
| "learning_rate": 1.7829773115856065e-05, |
| "loss": 0.305, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.9048106448311155, |
| "grad_norm": 0.3322076227286167, |
| "learning_rate": 1.7801332338972813e-05, |
| "loss": 0.3463, |
| "step": 1419 |
| }, |
| { |
| "epoch": 2.9068577277379735, |
| "grad_norm": 0.44401272792536567, |
| "learning_rate": 1.7772896061903824e-05, |
| "loss": 0.3441, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.908904810644831, |
| "grad_norm": 0.3346724756218441, |
| "learning_rate": 1.7744464342847062e-05, |
| "loss": 0.3243, |
| "step": 1421 |
| }, |
| { |
| "epoch": 2.910951893551689, |
| "grad_norm": 0.37288189845084224, |
| "learning_rate": 1.771603723999116e-05, |
| "loss": 0.328, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.9129989764585464, |
| "grad_norm": 0.3543864660540506, |
| "learning_rate": 1.768761481151529e-05, |
| "loss": 0.3239, |
| "step": 1423 |
| }, |
| { |
| "epoch": 2.9150460593654044, |
| "grad_norm": 0.33338686726894085, |
| "learning_rate": 1.765919711558906e-05, |
| "loss": 0.3373, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.917093142272262, |
| "grad_norm": 0.32542482524106037, |
| "learning_rate": 1.7630784210372413e-05, |
| "loss": 0.3473, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.91914022517912, |
| "grad_norm": 0.333470225286717, |
| "learning_rate": 1.7602376154015456e-05, |
| "loss": 0.3285, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.9211873080859774, |
| "grad_norm": 0.33573772915244177, |
| "learning_rate": 1.7573973004658404e-05, |
| "loss": 0.3024, |
| "step": 1427 |
| }, |
| { |
| "epoch": 2.9232343909928353, |
| "grad_norm": 0.34259215508028346, |
| "learning_rate": 1.7545574820431412e-05, |
| "loss": 0.3315, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.925281473899693, |
| "grad_norm": 0.33102210911596164, |
| "learning_rate": 1.751718165945447e-05, |
| "loss": 0.3519, |
| "step": 1429 |
| }, |
| { |
| "epoch": 2.927328556806551, |
| "grad_norm": 0.32436438467262657, |
| "learning_rate": 1.7488793579837297e-05, |
| "loss": 0.3661, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.9293756397134083, |
| "grad_norm": 0.31331782808616476, |
| "learning_rate": 1.74604106396792e-05, |
| "loss": 0.3141, |
| "step": 1431 |
| }, |
| { |
| "epoch": 2.9314227226202663, |
| "grad_norm": 0.4141533412404601, |
| "learning_rate": 1.743203289706898e-05, |
| "loss": 0.3557, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.9334698055271238, |
| "grad_norm": 0.4622088658579696, |
| "learning_rate": 1.7403660410084806e-05, |
| "loss": 0.399, |
| "step": 1433 |
| }, |
| { |
| "epoch": 2.9355168884339817, |
| "grad_norm": 0.3236744034505386, |
| "learning_rate": 1.737529323679407e-05, |
| "loss": 0.3286, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.9375639713408392, |
| "grad_norm": 0.2993041210404907, |
| "learning_rate": 1.734693143525331e-05, |
| "loss": 0.3259, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.939611054247697, |
| "grad_norm": 0.31919342750079005, |
| "learning_rate": 1.731857506350805e-05, |
| "loss": 0.3438, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.9416581371545547, |
| "grad_norm": 0.3479704658589382, |
| "learning_rate": 1.7290224179592718e-05, |
| "loss": 0.3561, |
| "step": 1437 |
| }, |
| { |
| "epoch": 2.943705220061412, |
| "grad_norm": 0.32794236732374565, |
| "learning_rate": 1.7261878841530494e-05, |
| "loss": 0.2956, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.94575230296827, |
| "grad_norm": 0.3348567062695728, |
| "learning_rate": 1.7233539107333234e-05, |
| "loss": 0.3207, |
| "step": 1439 |
| }, |
| { |
| "epoch": 2.947799385875128, |
| "grad_norm": 0.3107019732230144, |
| "learning_rate": 1.72052050350013e-05, |
| "loss": 0.3324, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.9498464687819856, |
| "grad_norm": 0.334794121032537, |
| "learning_rate": 1.717687668252348e-05, |
| "loss": 0.3296, |
| "step": 1441 |
| }, |
| { |
| "epoch": 2.951893551688843, |
| "grad_norm": 0.34219772695899153, |
| "learning_rate": 1.7148554107876847e-05, |
| "loss": 0.3504, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.953940634595701, |
| "grad_norm": 0.33351281779847475, |
| "learning_rate": 1.7120237369026655e-05, |
| "loss": 0.378, |
| "step": 1443 |
| }, |
| { |
| "epoch": 2.955987717502559, |
| "grad_norm": 0.3475392701501788, |
| "learning_rate": 1.7091926523926205e-05, |
| "loss": 0.3437, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.9580348004094166, |
| "grad_norm": 0.33100597785259966, |
| "learning_rate": 1.7063621630516755e-05, |
| "loss": 0.3289, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.960081883316274, |
| "grad_norm": 0.3615110501855717, |
| "learning_rate": 1.7035322746727366e-05, |
| "loss": 0.3148, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.962128966223132, |
| "grad_norm": 0.31694424871677895, |
| "learning_rate": 1.7007029930474804e-05, |
| "loss": 0.3389, |
| "step": 1447 |
| }, |
| { |
| "epoch": 2.96417604912999, |
| "grad_norm": 0.3428230537054886, |
| "learning_rate": 1.697874323966342e-05, |
| "loss": 0.3286, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.9662231320368475, |
| "grad_norm": 0.33741669479723163, |
| "learning_rate": 1.6950462732185023e-05, |
| "loss": 0.3197, |
| "step": 1449 |
| }, |
| { |
| "epoch": 2.968270214943705, |
| "grad_norm": 0.3203471822470365, |
| "learning_rate": 1.6922188465918763e-05, |
| "loss": 0.3297, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.970317297850563, |
| "grad_norm": 0.34824872554820474, |
| "learning_rate": 1.689392049873104e-05, |
| "loss": 0.3577, |
| "step": 1451 |
| }, |
| { |
| "epoch": 2.972364380757421, |
| "grad_norm": 0.33484691097376085, |
| "learning_rate": 1.6865658888475334e-05, |
| "loss": 0.3252, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.9744114636642784, |
| "grad_norm": 0.3184369996466899, |
| "learning_rate": 1.6837403692992136e-05, |
| "loss": 0.3267, |
| "step": 1453 |
| }, |
| { |
| "epoch": 2.976458546571136, |
| "grad_norm": 0.34524241797219873, |
| "learning_rate": 1.680915497010879e-05, |
| "loss": 0.362, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.978505629477994, |
| "grad_norm": 0.3517522038568365, |
| "learning_rate": 1.6780912777639407e-05, |
| "loss": 0.3455, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.9805527123848514, |
| "grad_norm": 0.35084133720055133, |
| "learning_rate": 1.6752677173384734e-05, |
| "loss": 0.3476, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.9825997952917094, |
| "grad_norm": 0.33510673606401725, |
| "learning_rate": 1.6724448215132006e-05, |
| "loss": 0.373, |
| "step": 1457 |
| }, |
| { |
| "epoch": 2.984646878198567, |
| "grad_norm": 0.32173623731436524, |
| "learning_rate": 1.669622596065491e-05, |
| "loss": 0.3416, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.986693961105425, |
| "grad_norm": 0.3405913760888214, |
| "learning_rate": 1.6668010467713363e-05, |
| "loss": 0.3336, |
| "step": 1459 |
| }, |
| { |
| "epoch": 2.9887410440122824, |
| "grad_norm": 0.33253196052599165, |
| "learning_rate": 1.6639801794053468e-05, |
| "loss": 0.3281, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.9907881269191403, |
| "grad_norm": 0.3127274815254226, |
| "learning_rate": 1.6611599997407366e-05, |
| "loss": 0.3219, |
| "step": 1461 |
| }, |
| { |
| "epoch": 2.992835209825998, |
| "grad_norm": 0.35589359153787264, |
| "learning_rate": 1.658340513549312e-05, |
| "loss": 0.3253, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.9948822927328558, |
| "grad_norm": 0.33592684199714334, |
| "learning_rate": 1.6555217266014604e-05, |
| "loss": 0.3679, |
| "step": 1463 |
| }, |
| { |
| "epoch": 2.9969293756397133, |
| "grad_norm": 0.3293658658288109, |
| "learning_rate": 1.6527036446661396e-05, |
| "loss": 0.3107, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.9989764585465712, |
| "grad_norm": 0.32358581638845413, |
| "learning_rate": 1.649886273510861e-05, |
| "loss": 0.3537, |
| "step": 1465 |
| }, |
| { |
| "epoch": 3.0010235414534288, |
| "grad_norm": 0.6530375339161546, |
| "learning_rate": 1.6470696189016853e-05, |
| "loss": 0.4219, |
| "step": 1466 |
| }, |
| { |
| "epoch": 3.0030706243602867, |
| "grad_norm": 0.41349403660281325, |
| "learning_rate": 1.6442536866032027e-05, |
| "loss": 0.2489, |
| "step": 1467 |
| }, |
| { |
| "epoch": 3.0051177072671442, |
| "grad_norm": 0.49742564287488156, |
| "learning_rate": 1.641438482378529e-05, |
| "loss": 0.2308, |
| "step": 1468 |
| }, |
| { |
| "epoch": 3.007164790174002, |
| "grad_norm": 0.5603085497770336, |
| "learning_rate": 1.6386240119892867e-05, |
| "loss": 0.2283, |
| "step": 1469 |
| }, |
| { |
| "epoch": 3.0092118730808597, |
| "grad_norm": 0.37112551523365644, |
| "learning_rate": 1.6358102811955985e-05, |
| "loss": 0.2255, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.0112589559877176, |
| "grad_norm": 0.4964687759956586, |
| "learning_rate": 1.6329972957560736e-05, |
| "loss": 0.2499, |
| "step": 1471 |
| }, |
| { |
| "epoch": 3.013306038894575, |
| "grad_norm": 0.5159224089011558, |
| "learning_rate": 1.6301850614277936e-05, |
| "loss": 0.2899, |
| "step": 1472 |
| }, |
| { |
| "epoch": 3.015353121801433, |
| "grad_norm": 0.3865653836943384, |
| "learning_rate": 1.6273735839663044e-05, |
| "loss": 0.23, |
| "step": 1473 |
| }, |
| { |
| "epoch": 3.0174002047082906, |
| "grad_norm": 0.36109737314160684, |
| "learning_rate": 1.6245628691256032e-05, |
| "loss": 0.2087, |
| "step": 1474 |
| }, |
| { |
| "epoch": 3.0194472876151486, |
| "grad_norm": 0.44209392274842507, |
| "learning_rate": 1.6217529226581247e-05, |
| "loss": 0.2523, |
| "step": 1475 |
| }, |
| { |
| "epoch": 3.021494370522006, |
| "grad_norm": 0.4472352934034512, |
| "learning_rate": 1.6189437503147338e-05, |
| "loss": 0.284, |
| "step": 1476 |
| }, |
| { |
| "epoch": 3.023541453428864, |
| "grad_norm": 0.3324235418829143, |
| "learning_rate": 1.616135357844709e-05, |
| "loss": 0.203, |
| "step": 1477 |
| }, |
| { |
| "epoch": 3.0255885363357216, |
| "grad_norm": 0.36268455499664337, |
| "learning_rate": 1.613327750995732e-05, |
| "loss": 0.2223, |
| "step": 1478 |
| }, |
| { |
| "epoch": 3.0276356192425795, |
| "grad_norm": 0.38026408150755925, |
| "learning_rate": 1.61052093551388e-05, |
| "loss": 0.2122, |
| "step": 1479 |
| }, |
| { |
| "epoch": 3.029682702149437, |
| "grad_norm": 0.3766577760390122, |
| "learning_rate": 1.6077149171436063e-05, |
| "loss": 0.263, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.031729785056295, |
| "grad_norm": 0.36393521520091426, |
| "learning_rate": 1.6049097016277358e-05, |
| "loss": 0.2729, |
| "step": 1481 |
| }, |
| { |
| "epoch": 3.0337768679631525, |
| "grad_norm": 0.36192400255610996, |
| "learning_rate": 1.60210529470745e-05, |
| "loss": 0.2318, |
| "step": 1482 |
| }, |
| { |
| "epoch": 3.0358239508700104, |
| "grad_norm": 0.35713778733325363, |
| "learning_rate": 1.599301702122274e-05, |
| "loss": 0.2356, |
| "step": 1483 |
| }, |
| { |
| "epoch": 3.037871033776868, |
| "grad_norm": 0.3518563642429611, |
| "learning_rate": 1.5964989296100682e-05, |
| "loss": 0.2367, |
| "step": 1484 |
| }, |
| { |
| "epoch": 3.039918116683726, |
| "grad_norm": 0.40298228736514674, |
| "learning_rate": 1.5936969829070125e-05, |
| "loss": 0.3027, |
| "step": 1485 |
| }, |
| { |
| "epoch": 3.0419651995905834, |
| "grad_norm": 0.31807307761105635, |
| "learning_rate": 1.590895867747599e-05, |
| "loss": 0.2541, |
| "step": 1486 |
| }, |
| { |
| "epoch": 3.044012282497441, |
| "grad_norm": 0.3409445938787892, |
| "learning_rate": 1.588095589864615e-05, |
| "loss": 0.2289, |
| "step": 1487 |
| }, |
| { |
| "epoch": 3.046059365404299, |
| "grad_norm": 0.3600773888625388, |
| "learning_rate": 1.5852961549891376e-05, |
| "loss": 0.2341, |
| "step": 1488 |
| }, |
| { |
| "epoch": 3.0481064483111564, |
| "grad_norm": 0.32453736981408465, |
| "learning_rate": 1.582497568850517e-05, |
| "loss": 0.2797, |
| "step": 1489 |
| }, |
| { |
| "epoch": 3.0501535312180144, |
| "grad_norm": 0.3331181801930424, |
| "learning_rate": 1.579699837176366e-05, |
| "loss": 0.2036, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.052200614124872, |
| "grad_norm": 0.4136442196087828, |
| "learning_rate": 1.5769029656925486e-05, |
| "loss": 0.2599, |
| "step": 1491 |
| }, |
| { |
| "epoch": 3.05424769703173, |
| "grad_norm": 0.35863640915527445, |
| "learning_rate": 1.574106960123169e-05, |
| "loss": 0.2526, |
| "step": 1492 |
| }, |
| { |
| "epoch": 3.0562947799385873, |
| "grad_norm": 0.35564688451602927, |
| "learning_rate": 1.571311826190559e-05, |
| "loss": 0.2503, |
| "step": 1493 |
| }, |
| { |
| "epoch": 3.0583418628454453, |
| "grad_norm": 0.33302867243814094, |
| "learning_rate": 1.5685175696152657e-05, |
| "loss": 0.2283, |
| "step": 1494 |
| }, |
| { |
| "epoch": 3.060388945752303, |
| "grad_norm": 0.36946655881677004, |
| "learning_rate": 1.5657241961160434e-05, |
| "loss": 0.272, |
| "step": 1495 |
| }, |
| { |
| "epoch": 3.0624360286591608, |
| "grad_norm": 0.33870469772598516, |
| "learning_rate": 1.562931711409835e-05, |
| "loss": 0.2527, |
| "step": 1496 |
| }, |
| { |
| "epoch": 3.0644831115660183, |
| "grad_norm": 0.29057383945914955, |
| "learning_rate": 1.5601401212117676e-05, |
| "loss": 0.2468, |
| "step": 1497 |
| }, |
| { |
| "epoch": 3.0665301944728762, |
| "grad_norm": 0.3538607726440218, |
| "learning_rate": 1.557349431235135e-05, |
| "loss": 0.2527, |
| "step": 1498 |
| }, |
| { |
| "epoch": 3.0685772773797337, |
| "grad_norm": 0.36881305364134004, |
| "learning_rate": 1.554559647191392e-05, |
| "loss": 0.2572, |
| "step": 1499 |
| }, |
| { |
| "epoch": 3.0706243602865917, |
| "grad_norm": 0.31303360312121764, |
| "learning_rate": 1.5517707747901352e-05, |
| "loss": 0.2015, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.072671443193449, |
| "grad_norm": 0.3468523227469292, |
| "learning_rate": 1.5489828197390988e-05, |
| "loss": 0.2522, |
| "step": 1501 |
| }, |
| { |
| "epoch": 3.074718526100307, |
| "grad_norm": 0.3189025943422717, |
| "learning_rate": 1.5461957877441387e-05, |
| "loss": 0.2467, |
| "step": 1502 |
| }, |
| { |
| "epoch": 3.0767656090071647, |
| "grad_norm": 0.36693872460390387, |
| "learning_rate": 1.5434096845092203e-05, |
| "loss": 0.2266, |
| "step": 1503 |
| }, |
| { |
| "epoch": 3.0788126919140226, |
| "grad_norm": 0.33990261356941326, |
| "learning_rate": 1.5406245157364093e-05, |
| "loss": 0.2511, |
| "step": 1504 |
| }, |
| { |
| "epoch": 3.08085977482088, |
| "grad_norm": 0.3224675841478402, |
| "learning_rate": 1.537840287125859e-05, |
| "loss": 0.2013, |
| "step": 1505 |
| }, |
| { |
| "epoch": 3.082906857727738, |
| "grad_norm": 0.3392490287207665, |
| "learning_rate": 1.5350570043757976e-05, |
| "loss": 0.2222, |
| "step": 1506 |
| }, |
| { |
| "epoch": 3.0849539406345956, |
| "grad_norm": 0.33119892853448313, |
| "learning_rate": 1.5322746731825195e-05, |
| "loss": 0.2403, |
| "step": 1507 |
| }, |
| { |
| "epoch": 3.0870010235414536, |
| "grad_norm": 0.3189197704594123, |
| "learning_rate": 1.5294932992403695e-05, |
| "loss": 0.2156, |
| "step": 1508 |
| }, |
| { |
| "epoch": 3.089048106448311, |
| "grad_norm": 0.33202442707430363, |
| "learning_rate": 1.526712888241734e-05, |
| "loss": 0.2449, |
| "step": 1509 |
| }, |
| { |
| "epoch": 3.091095189355169, |
| "grad_norm": 0.3126714573879982, |
| "learning_rate": 1.5239334458770291e-05, |
| "loss": 0.2345, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.0931422722620265, |
| "grad_norm": 0.3199987863370429, |
| "learning_rate": 1.5211549778346882e-05, |
| "loss": 0.2547, |
| "step": 1511 |
| }, |
| { |
| "epoch": 3.0951893551688845, |
| "grad_norm": 0.2865206932540882, |
| "learning_rate": 1.5183774898011496e-05, |
| "loss": 0.2262, |
| "step": 1512 |
| }, |
| { |
| "epoch": 3.097236438075742, |
| "grad_norm": 0.2978122286180525, |
| "learning_rate": 1.5156009874608484e-05, |
| "loss": 0.2454, |
| "step": 1513 |
| }, |
| { |
| "epoch": 3.0992835209826, |
| "grad_norm": 0.33009627047245504, |
| "learning_rate": 1.5128254764962e-05, |
| "loss": 0.2762, |
| "step": 1514 |
| }, |
| { |
| "epoch": 3.1013306038894575, |
| "grad_norm": 0.328880261572203, |
| "learning_rate": 1.5100509625875921e-05, |
| "loss": 0.3284, |
| "step": 1515 |
| }, |
| { |
| "epoch": 3.1033776867963154, |
| "grad_norm": 0.31916303688606795, |
| "learning_rate": 1.5072774514133708e-05, |
| "loss": 0.2299, |
| "step": 1516 |
| }, |
| { |
| "epoch": 3.105424769703173, |
| "grad_norm": 0.3226893313357512, |
| "learning_rate": 1.5045049486498311e-05, |
| "loss": 0.2338, |
| "step": 1517 |
| }, |
| { |
| "epoch": 3.107471852610031, |
| "grad_norm": 0.31484899101790875, |
| "learning_rate": 1.5017334599712028e-05, |
| "loss": 0.2039, |
| "step": 1518 |
| }, |
| { |
| "epoch": 3.1095189355168884, |
| "grad_norm": 0.32998837828193894, |
| "learning_rate": 1.4989629910496424e-05, |
| "loss": 0.2345, |
| "step": 1519 |
| }, |
| { |
| "epoch": 3.1115660184237464, |
| "grad_norm": 0.3027257775351982, |
| "learning_rate": 1.4961935475552178e-05, |
| "loss": 0.2285, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.113613101330604, |
| "grad_norm": 0.3071518323366728, |
| "learning_rate": 1.4934251351558983e-05, |
| "loss": 0.2384, |
| "step": 1521 |
| }, |
| { |
| "epoch": 3.115660184237462, |
| "grad_norm": 0.30152633341709373, |
| "learning_rate": 1.4906577595175428e-05, |
| "loss": 0.2201, |
| "step": 1522 |
| }, |
| { |
| "epoch": 3.1177072671443193, |
| "grad_norm": 0.33663282454591203, |
| "learning_rate": 1.4878914263038895e-05, |
| "loss": 0.2324, |
| "step": 1523 |
| }, |
| { |
| "epoch": 3.119754350051177, |
| "grad_norm": 0.33606866012096387, |
| "learning_rate": 1.4851261411765414e-05, |
| "loss": 0.2629, |
| "step": 1524 |
| }, |
| { |
| "epoch": 3.121801432958035, |
| "grad_norm": 0.32073469626766654, |
| "learning_rate": 1.4823619097949584e-05, |
| "loss": 0.2309, |
| "step": 1525 |
| }, |
| { |
| "epoch": 3.1238485158648923, |
| "grad_norm": 0.34825982979835035, |
| "learning_rate": 1.4795987378164432e-05, |
| "loss": 0.2361, |
| "step": 1526 |
| }, |
| { |
| "epoch": 3.1258955987717503, |
| "grad_norm": 0.32245272558823557, |
| "learning_rate": 1.4768366308961288e-05, |
| "loss": 0.2281, |
| "step": 1527 |
| }, |
| { |
| "epoch": 3.127942681678608, |
| "grad_norm": 0.35652651564540333, |
| "learning_rate": 1.4740755946869708e-05, |
| "loss": 0.2508, |
| "step": 1528 |
| }, |
| { |
| "epoch": 3.1299897645854657, |
| "grad_norm": 0.3215049499878307, |
| "learning_rate": 1.4713156348397317e-05, |
| "loss": 0.2144, |
| "step": 1529 |
| }, |
| { |
| "epoch": 3.1320368474923233, |
| "grad_norm": 0.33346781365441597, |
| "learning_rate": 1.468556757002972e-05, |
| "loss": 0.2425, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.134083930399181, |
| "grad_norm": 0.35639968613552386, |
| "learning_rate": 1.4657989668230363e-05, |
| "loss": 0.2227, |
| "step": 1531 |
| }, |
| { |
| "epoch": 3.1361310133060387, |
| "grad_norm": 0.3010242077787357, |
| "learning_rate": 1.4630422699440461e-05, |
| "loss": 0.2742, |
| "step": 1532 |
| }, |
| { |
| "epoch": 3.1381780962128967, |
| "grad_norm": 0.36016875940221305, |
| "learning_rate": 1.4602866720078832e-05, |
| "loss": 0.2747, |
| "step": 1533 |
| }, |
| { |
| "epoch": 3.140225179119754, |
| "grad_norm": 0.35237386827329403, |
| "learning_rate": 1.4575321786541801e-05, |
| "loss": 0.2408, |
| "step": 1534 |
| }, |
| { |
| "epoch": 3.142272262026612, |
| "grad_norm": 0.319112209857982, |
| "learning_rate": 1.45477879552031e-05, |
| "loss": 0.2488, |
| "step": 1535 |
| }, |
| { |
| "epoch": 3.1443193449334697, |
| "grad_norm": 0.2967734684654477, |
| "learning_rate": 1.4520265282413722e-05, |
| "loss": 0.213, |
| "step": 1536 |
| }, |
| { |
| "epoch": 3.1463664278403276, |
| "grad_norm": 0.3524504353601547, |
| "learning_rate": 1.4492753824501833e-05, |
| "loss": 0.222, |
| "step": 1537 |
| }, |
| { |
| "epoch": 3.148413510747185, |
| "grad_norm": 0.33500821048449647, |
| "learning_rate": 1.4465253637772651e-05, |
| "loss": 0.2513, |
| "step": 1538 |
| }, |
| { |
| "epoch": 3.150460593654043, |
| "grad_norm": 0.34077100422476553, |
| "learning_rate": 1.443776477850833e-05, |
| "loss": 0.2701, |
| "step": 1539 |
| }, |
| { |
| "epoch": 3.1525076765609006, |
| "grad_norm": 0.34275143758140053, |
| "learning_rate": 1.4410287302967813e-05, |
| "loss": 0.3137, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.1545547594677585, |
| "grad_norm": 0.31464152797503897, |
| "learning_rate": 1.4382821267386781e-05, |
| "loss": 0.2329, |
| "step": 1541 |
| }, |
| { |
| "epoch": 3.156601842374616, |
| "grad_norm": 0.29997611145802033, |
| "learning_rate": 1.4355366727977473e-05, |
| "loss": 0.2355, |
| "step": 1542 |
| }, |
| { |
| "epoch": 3.158648925281474, |
| "grad_norm": 0.3098740828854044, |
| "learning_rate": 1.4327923740928613e-05, |
| "loss": 0.2364, |
| "step": 1543 |
| }, |
| { |
| "epoch": 3.1606960081883315, |
| "grad_norm": 0.3182081076670239, |
| "learning_rate": 1.4300492362405296e-05, |
| "loss": 0.2307, |
| "step": 1544 |
| }, |
| { |
| "epoch": 3.1627430910951895, |
| "grad_norm": 0.33289523643049107, |
| "learning_rate": 1.4273072648548827e-05, |
| "loss": 0.2658, |
| "step": 1545 |
| }, |
| { |
| "epoch": 3.164790174002047, |
| "grad_norm": 0.3483349343296588, |
| "learning_rate": 1.4245664655476663e-05, |
| "loss": 0.239, |
| "step": 1546 |
| }, |
| { |
| "epoch": 3.166837256908905, |
| "grad_norm": 0.3344228654090122, |
| "learning_rate": 1.4218268439282259e-05, |
| "loss": 0.2136, |
| "step": 1547 |
| }, |
| { |
| "epoch": 3.1688843398157625, |
| "grad_norm": 0.31443022141500176, |
| "learning_rate": 1.4190884056034983e-05, |
| "loss": 0.2642, |
| "step": 1548 |
| }, |
| { |
| "epoch": 3.1709314227226204, |
| "grad_norm": 0.32146571298562293, |
| "learning_rate": 1.4163511561779956e-05, |
| "loss": 0.2532, |
| "step": 1549 |
| }, |
| { |
| "epoch": 3.172978505629478, |
| "grad_norm": 0.3550608808689066, |
| "learning_rate": 1.4136151012538008e-05, |
| "loss": 0.2358, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.175025588536336, |
| "grad_norm": 0.3166912892735251, |
| "learning_rate": 1.4108802464305496e-05, |
| "loss": 0.2128, |
| "step": 1551 |
| }, |
| { |
| "epoch": 3.1770726714431934, |
| "grad_norm": 0.3332651741225492, |
| "learning_rate": 1.4081465973054216e-05, |
| "loss": 0.2423, |
| "step": 1552 |
| }, |
| { |
| "epoch": 3.1791197543500513, |
| "grad_norm": 0.3369578277421136, |
| "learning_rate": 1.4054141594731289e-05, |
| "loss": 0.205, |
| "step": 1553 |
| }, |
| { |
| "epoch": 3.181166837256909, |
| "grad_norm": 0.3549410163798208, |
| "learning_rate": 1.402682938525906e-05, |
| "loss": 0.2587, |
| "step": 1554 |
| }, |
| { |
| "epoch": 3.183213920163767, |
| "grad_norm": 0.31908575274366446, |
| "learning_rate": 1.3999529400534941e-05, |
| "loss": 0.2669, |
| "step": 1555 |
| }, |
| { |
| "epoch": 3.1852610030706243, |
| "grad_norm": 0.33778438982482334, |
| "learning_rate": 1.3972241696431357e-05, |
| "loss": 0.244, |
| "step": 1556 |
| }, |
| { |
| "epoch": 3.1873080859774823, |
| "grad_norm": 0.3536190099748012, |
| "learning_rate": 1.3944966328795584e-05, |
| "loss": 0.243, |
| "step": 1557 |
| }, |
| { |
| "epoch": 3.18935516888434, |
| "grad_norm": 0.3395539666321202, |
| "learning_rate": 1.3917703353449646e-05, |
| "loss": 0.2231, |
| "step": 1558 |
| }, |
| { |
| "epoch": 3.1914022517911977, |
| "grad_norm": 0.3107794197012843, |
| "learning_rate": 1.3890452826190208e-05, |
| "loss": 0.203, |
| "step": 1559 |
| }, |
| { |
| "epoch": 3.1934493346980553, |
| "grad_norm": 0.33383730315910054, |
| "learning_rate": 1.3863214802788459e-05, |
| "loss": 0.2239, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.1954964176049128, |
| "grad_norm": 0.3236027308800084, |
| "learning_rate": 1.3835989338989996e-05, |
| "loss": 0.2602, |
| "step": 1561 |
| }, |
| { |
| "epoch": 3.1975435005117707, |
| "grad_norm": 0.3383450416141259, |
| "learning_rate": 1.3808776490514727e-05, |
| "loss": 0.2775, |
| "step": 1562 |
| }, |
| { |
| "epoch": 3.1995905834186287, |
| "grad_norm": 0.29556678254047786, |
| "learning_rate": 1.3781576313056713e-05, |
| "loss": 0.2305, |
| "step": 1563 |
| }, |
| { |
| "epoch": 3.201637666325486, |
| "grad_norm": 0.34240014338764224, |
| "learning_rate": 1.375438886228411e-05, |
| "loss": 0.2284, |
| "step": 1564 |
| }, |
| { |
| "epoch": 3.2036847492323437, |
| "grad_norm": 0.35927171229864285, |
| "learning_rate": 1.3727214193839002e-05, |
| "loss": 0.2669, |
| "step": 1565 |
| }, |
| { |
| "epoch": 3.2057318321392017, |
| "grad_norm": 0.295141784625082, |
| "learning_rate": 1.3700052363337337e-05, |
| "loss": 0.2237, |
| "step": 1566 |
| }, |
| { |
| "epoch": 3.207778915046059, |
| "grad_norm": 0.31352782892999664, |
| "learning_rate": 1.3672903426368773e-05, |
| "loss": 0.2105, |
| "step": 1567 |
| }, |
| { |
| "epoch": 3.209825997952917, |
| "grad_norm": 0.31282387528552047, |
| "learning_rate": 1.3645767438496567e-05, |
| "loss": 0.2252, |
| "step": 1568 |
| }, |
| { |
| "epoch": 3.2118730808597746, |
| "grad_norm": 0.3487407042028981, |
| "learning_rate": 1.3618644455257521e-05, |
| "loss": 0.2717, |
| "step": 1569 |
| }, |
| { |
| "epoch": 3.2139201637666326, |
| "grad_norm": 0.34749040489450855, |
| "learning_rate": 1.3591534532161781e-05, |
| "loss": 0.2463, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.21596724667349, |
| "grad_norm": 0.32899297696851715, |
| "learning_rate": 1.3564437724692766e-05, |
| "loss": 0.2275, |
| "step": 1571 |
| }, |
| { |
| "epoch": 3.218014329580348, |
| "grad_norm": 0.2725890794790335, |
| "learning_rate": 1.353735408830707e-05, |
| "loss": 0.2153, |
| "step": 1572 |
| }, |
| { |
| "epoch": 3.2200614124872056, |
| "grad_norm": 0.37569994527165246, |
| "learning_rate": 1.3510283678434317e-05, |
| "loss": 0.2445, |
| "step": 1573 |
| }, |
| { |
| "epoch": 3.2221084953940635, |
| "grad_norm": 0.3527821656094053, |
| "learning_rate": 1.348322655047707e-05, |
| "loss": 0.2088, |
| "step": 1574 |
| }, |
| { |
| "epoch": 3.224155578300921, |
| "grad_norm": 0.3319266141186732, |
| "learning_rate": 1.3456182759810708e-05, |
| "loss": 0.2336, |
| "step": 1575 |
| }, |
| { |
| "epoch": 3.226202661207779, |
| "grad_norm": 0.3545486115689844, |
| "learning_rate": 1.3429152361783307e-05, |
| "loss": 0.2681, |
| "step": 1576 |
| }, |
| { |
| "epoch": 3.2282497441146365, |
| "grad_norm": 0.3385783041962988, |
| "learning_rate": 1.3402135411715545e-05, |
| "loss": 0.2315, |
| "step": 1577 |
| }, |
| { |
| "epoch": 3.2302968270214945, |
| "grad_norm": 0.32938060023769156, |
| "learning_rate": 1.337513196490056e-05, |
| "loss": 0.2498, |
| "step": 1578 |
| }, |
| { |
| "epoch": 3.232343909928352, |
| "grad_norm": 0.30316278108042816, |
| "learning_rate": 1.3348142076603876e-05, |
| "loss": 0.1928, |
| "step": 1579 |
| }, |
| { |
| "epoch": 3.23439099283521, |
| "grad_norm": 0.34764955355779054, |
| "learning_rate": 1.3321165802063243e-05, |
| "loss": 0.28, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.2364380757420674, |
| "grad_norm": 0.3268398726034169, |
| "learning_rate": 1.3294203196488576e-05, |
| "loss": 0.2804, |
| "step": 1581 |
| }, |
| { |
| "epoch": 3.2384851586489254, |
| "grad_norm": 0.31999282008130525, |
| "learning_rate": 1.3267254315061797e-05, |
| "loss": 0.26, |
| "step": 1582 |
| }, |
| { |
| "epoch": 3.240532241555783, |
| "grad_norm": 0.31712302189517827, |
| "learning_rate": 1.324031921293674e-05, |
| "loss": 0.23, |
| "step": 1583 |
| }, |
| { |
| "epoch": 3.242579324462641, |
| "grad_norm": 0.3137795221764201, |
| "learning_rate": 1.3213397945239053e-05, |
| "loss": 0.243, |
| "step": 1584 |
| }, |
| { |
| "epoch": 3.2446264073694984, |
| "grad_norm": 0.3236833391733087, |
| "learning_rate": 1.318649056706605e-05, |
| "loss": 0.2621, |
| "step": 1585 |
| }, |
| { |
| "epoch": 3.2466734902763563, |
| "grad_norm": 0.28341768420158997, |
| "learning_rate": 1.3159597133486628e-05, |
| "loss": 0.2105, |
| "step": 1586 |
| }, |
| { |
| "epoch": 3.248720573183214, |
| "grad_norm": 0.34778024409649644, |
| "learning_rate": 1.313271769954115e-05, |
| "loss": 0.2899, |
| "step": 1587 |
| }, |
| { |
| "epoch": 3.250767656090072, |
| "grad_norm": 0.3072274493404741, |
| "learning_rate": 1.3105852320241326e-05, |
| "loss": 0.2141, |
| "step": 1588 |
| }, |
| { |
| "epoch": 3.2528147389969293, |
| "grad_norm": 0.32380150432128973, |
| "learning_rate": 1.307900105057009e-05, |
| "loss": 0.2218, |
| "step": 1589 |
| }, |
| { |
| "epoch": 3.2548618219037873, |
| "grad_norm": 0.3234313545922506, |
| "learning_rate": 1.3052163945481517e-05, |
| "loss": 0.2301, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.2569089048106448, |
| "grad_norm": 0.3409588063960833, |
| "learning_rate": 1.3025341059900675e-05, |
| "loss": 0.2331, |
| "step": 1591 |
| }, |
| { |
| "epoch": 3.2589559877175027, |
| "grad_norm": 0.33214628078621744, |
| "learning_rate": 1.2998532448723536e-05, |
| "loss": 0.2253, |
| "step": 1592 |
| }, |
| { |
| "epoch": 3.2610030706243602, |
| "grad_norm": 0.3495327746489498, |
| "learning_rate": 1.2971738166816871e-05, |
| "loss": 0.2369, |
| "step": 1593 |
| }, |
| { |
| "epoch": 3.263050153531218, |
| "grad_norm": 0.35093939156017323, |
| "learning_rate": 1.2944958269018103e-05, |
| "loss": 0.2329, |
| "step": 1594 |
| }, |
| { |
| "epoch": 3.2650972364380757, |
| "grad_norm": 0.3204843846779589, |
| "learning_rate": 1.291819281013524e-05, |
| "loss": 0.2144, |
| "step": 1595 |
| }, |
| { |
| "epoch": 3.2671443193449337, |
| "grad_norm": 0.3484251329111784, |
| "learning_rate": 1.289144184494671e-05, |
| "loss": 0.2531, |
| "step": 1596 |
| }, |
| { |
| "epoch": 3.269191402251791, |
| "grad_norm": 0.29945051356789365, |
| "learning_rate": 1.2864705428201307e-05, |
| "loss": 0.2293, |
| "step": 1597 |
| }, |
| { |
| "epoch": 3.2712384851586487, |
| "grad_norm": 0.3521856225970276, |
| "learning_rate": 1.2837983614618023e-05, |
| "loss": 0.3006, |
| "step": 1598 |
| }, |
| { |
| "epoch": 3.2732855680655066, |
| "grad_norm": 0.3221884922752649, |
| "learning_rate": 1.2811276458885993e-05, |
| "loss": 0.2331, |
| "step": 1599 |
| }, |
| { |
| "epoch": 3.2753326509723646, |
| "grad_norm": 0.30627973138617387, |
| "learning_rate": 1.2784584015664337e-05, |
| "loss": 0.222, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.277379733879222, |
| "grad_norm": 0.3650912580402862, |
| "learning_rate": 1.2757906339582053e-05, |
| "loss": 0.269, |
| "step": 1601 |
| }, |
| { |
| "epoch": 3.2794268167860796, |
| "grad_norm": 0.2847330580470038, |
| "learning_rate": 1.2731243485237932e-05, |
| "loss": 0.2161, |
| "step": 1602 |
| }, |
| { |
| "epoch": 3.2814738996929376, |
| "grad_norm": 0.3140163871014674, |
| "learning_rate": 1.2704595507200435e-05, |
| "loss": 0.271, |
| "step": 1603 |
| }, |
| { |
| "epoch": 3.2835209825997955, |
| "grad_norm": 0.324180451733573, |
| "learning_rate": 1.2677962460007555e-05, |
| "loss": 0.2124, |
| "step": 1604 |
| }, |
| { |
| "epoch": 3.285568065506653, |
| "grad_norm": 0.32841827310726324, |
| "learning_rate": 1.2651344398166745e-05, |
| "loss": 0.2905, |
| "step": 1605 |
| }, |
| { |
| "epoch": 3.2876151484135105, |
| "grad_norm": 0.2992524116524214, |
| "learning_rate": 1.26247413761548e-05, |
| "loss": 0.1951, |
| "step": 1606 |
| }, |
| { |
| "epoch": 3.2896622313203685, |
| "grad_norm": 0.3536568449695457, |
| "learning_rate": 1.2598153448417701e-05, |
| "loss": 0.3008, |
| "step": 1607 |
| }, |
| { |
| "epoch": 3.291709314227226, |
| "grad_norm": 0.2745017989746043, |
| "learning_rate": 1.2571580669370565e-05, |
| "loss": 0.218, |
| "step": 1608 |
| }, |
| { |
| "epoch": 3.293756397134084, |
| "grad_norm": 0.336785618203528, |
| "learning_rate": 1.254502309339749e-05, |
| "loss": 0.2163, |
| "step": 1609 |
| }, |
| { |
| "epoch": 3.2958034800409415, |
| "grad_norm": 0.29711300837975474, |
| "learning_rate": 1.2518480774851472e-05, |
| "loss": 0.2217, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.2978505629477994, |
| "grad_norm": 0.32116672616384045, |
| "learning_rate": 1.2491953768054263e-05, |
| "loss": 0.2595, |
| "step": 1611 |
| }, |
| { |
| "epoch": 3.299897645854657, |
| "grad_norm": 0.3367433516635412, |
| "learning_rate": 1.2465442127296297e-05, |
| "loss": 0.2293, |
| "step": 1612 |
| }, |
| { |
| "epoch": 3.301944728761515, |
| "grad_norm": 0.3300984778131218, |
| "learning_rate": 1.2438945906836557e-05, |
| "loss": 0.2443, |
| "step": 1613 |
| }, |
| { |
| "epoch": 3.3039918116683724, |
| "grad_norm": 0.31907195158009316, |
| "learning_rate": 1.241246516090245e-05, |
| "loss": 0.2321, |
| "step": 1614 |
| }, |
| { |
| "epoch": 3.3060388945752304, |
| "grad_norm": 0.3096668811954514, |
| "learning_rate": 1.2385999943689732e-05, |
| "loss": 0.2216, |
| "step": 1615 |
| }, |
| { |
| "epoch": 3.308085977482088, |
| "grad_norm": 0.3776462069541184, |
| "learning_rate": 1.2359550309362368e-05, |
| "loss": 0.2104, |
| "step": 1616 |
| }, |
| { |
| "epoch": 3.310133060388946, |
| "grad_norm": 0.3196294164019087, |
| "learning_rate": 1.2333116312052416e-05, |
| "loss": 0.2403, |
| "step": 1617 |
| }, |
| { |
| "epoch": 3.3121801432958033, |
| "grad_norm": 0.34176563178349245, |
| "learning_rate": 1.2306698005859975e-05, |
| "loss": 0.2409, |
| "step": 1618 |
| }, |
| { |
| "epoch": 3.3142272262026613, |
| "grad_norm": 0.34417655045127915, |
| "learning_rate": 1.2280295444852994e-05, |
| "loss": 0.2899, |
| "step": 1619 |
| }, |
| { |
| "epoch": 3.316274309109519, |
| "grad_norm": 0.34668338984896196, |
| "learning_rate": 1.22539086830672e-05, |
| "loss": 0.216, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.3183213920163768, |
| "grad_norm": 0.307563527231833, |
| "learning_rate": 1.2227537774505996e-05, |
| "loss": 0.204, |
| "step": 1621 |
| }, |
| { |
| "epoch": 3.3203684749232343, |
| "grad_norm": 0.36360020699523277, |
| "learning_rate": 1.2201182773140334e-05, |
| "loss": 0.2321, |
| "step": 1622 |
| }, |
| { |
| "epoch": 3.3224155578300922, |
| "grad_norm": 0.34959023754084967, |
| "learning_rate": 1.2174843732908609e-05, |
| "loss": 0.2417, |
| "step": 1623 |
| }, |
| { |
| "epoch": 3.3244626407369497, |
| "grad_norm": 0.35791334099991956, |
| "learning_rate": 1.2148520707716567e-05, |
| "loss": 0.267, |
| "step": 1624 |
| }, |
| { |
| "epoch": 3.3265097236438077, |
| "grad_norm": 0.31882901909973, |
| "learning_rate": 1.2122213751437147e-05, |
| "loss": 0.2368, |
| "step": 1625 |
| }, |
| { |
| "epoch": 3.328556806550665, |
| "grad_norm": 0.31734552594196086, |
| "learning_rate": 1.2095922917910427e-05, |
| "loss": 0.2437, |
| "step": 1626 |
| }, |
| { |
| "epoch": 3.330603889457523, |
| "grad_norm": 0.3303738558908603, |
| "learning_rate": 1.2069648260943473e-05, |
| "loss": 0.2326, |
| "step": 1627 |
| }, |
| { |
| "epoch": 3.3326509723643807, |
| "grad_norm": 0.32163501618546503, |
| "learning_rate": 1.2043389834310257e-05, |
| "loss": 0.2579, |
| "step": 1628 |
| }, |
| { |
| "epoch": 3.3346980552712386, |
| "grad_norm": 0.33400441651552865, |
| "learning_rate": 1.2017147691751512e-05, |
| "loss": 0.2572, |
| "step": 1629 |
| }, |
| { |
| "epoch": 3.336745138178096, |
| "grad_norm": 0.3062361134778084, |
| "learning_rate": 1.1990921886974669e-05, |
| "loss": 0.1971, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.338792221084954, |
| "grad_norm": 0.3617880140065343, |
| "learning_rate": 1.1964712473653713e-05, |
| "loss": 0.2752, |
| "step": 1631 |
| }, |
| { |
| "epoch": 3.3408393039918116, |
| "grad_norm": 0.31208903084798195, |
| "learning_rate": 1.1938519505429072e-05, |
| "loss": 0.1891, |
| "step": 1632 |
| }, |
| { |
| "epoch": 3.3428863868986696, |
| "grad_norm": 0.3846438154154162, |
| "learning_rate": 1.1912343035907535e-05, |
| "loss": 0.2663, |
| "step": 1633 |
| }, |
| { |
| "epoch": 3.344933469805527, |
| "grad_norm": 0.3360119347175398, |
| "learning_rate": 1.1886183118662108e-05, |
| "loss": 0.225, |
| "step": 1634 |
| }, |
| { |
| "epoch": 3.346980552712385, |
| "grad_norm": 0.34434286466245134, |
| "learning_rate": 1.1860039807231923e-05, |
| "loss": 0.2311, |
| "step": 1635 |
| }, |
| { |
| "epoch": 3.3490276356192425, |
| "grad_norm": 0.3193089695495996, |
| "learning_rate": 1.1833913155122132e-05, |
| "loss": 0.1958, |
| "step": 1636 |
| }, |
| { |
| "epoch": 3.3510747185261005, |
| "grad_norm": 0.34254339904952036, |
| "learning_rate": 1.1807803215803806e-05, |
| "loss": 0.2301, |
| "step": 1637 |
| }, |
| { |
| "epoch": 3.353121801432958, |
| "grad_norm": 0.37284073286468844, |
| "learning_rate": 1.1781710042713783e-05, |
| "loss": 0.2321, |
| "step": 1638 |
| }, |
| { |
| "epoch": 3.3551688843398155, |
| "grad_norm": 0.3400024012806843, |
| "learning_rate": 1.1755633689254609e-05, |
| "loss": 0.2481, |
| "step": 1639 |
| }, |
| { |
| "epoch": 3.3572159672466735, |
| "grad_norm": 0.3381525204885461, |
| "learning_rate": 1.1729574208794388e-05, |
| "loss": 0.2486, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.3592630501535314, |
| "grad_norm": 0.3197266149899627, |
| "learning_rate": 1.1703531654666714e-05, |
| "loss": 0.2368, |
| "step": 1641 |
| }, |
| { |
| "epoch": 3.361310133060389, |
| "grad_norm": 0.3130217545123078, |
| "learning_rate": 1.1677506080170512e-05, |
| "loss": 0.2342, |
| "step": 1642 |
| }, |
| { |
| "epoch": 3.3633572159672465, |
| "grad_norm": 0.32947434661394787, |
| "learning_rate": 1.1651497538569984e-05, |
| "loss": 0.2124, |
| "step": 1643 |
| }, |
| { |
| "epoch": 3.3654042988741044, |
| "grad_norm": 0.344686988795317, |
| "learning_rate": 1.162550608309446e-05, |
| "loss": 0.2464, |
| "step": 1644 |
| }, |
| { |
| "epoch": 3.3674513817809624, |
| "grad_norm": 0.30351992821717133, |
| "learning_rate": 1.1599531766938306e-05, |
| "loss": 0.2378, |
| "step": 1645 |
| }, |
| { |
| "epoch": 3.36949846468782, |
| "grad_norm": 0.31537723127663564, |
| "learning_rate": 1.1573574643260787e-05, |
| "loss": 0.2683, |
| "step": 1646 |
| }, |
| { |
| "epoch": 3.3715455475946774, |
| "grad_norm": 0.3179109407008773, |
| "learning_rate": 1.1547634765186016e-05, |
| "loss": 0.21, |
| "step": 1647 |
| }, |
| { |
| "epoch": 3.3735926305015353, |
| "grad_norm": 0.3414400925992681, |
| "learning_rate": 1.1521712185802789e-05, |
| "loss": 0.2467, |
| "step": 1648 |
| }, |
| { |
| "epoch": 3.375639713408393, |
| "grad_norm": 0.3015513958093698, |
| "learning_rate": 1.1495806958164508e-05, |
| "loss": 0.2333, |
| "step": 1649 |
| }, |
| { |
| "epoch": 3.377686796315251, |
| "grad_norm": 0.3221671987582906, |
| "learning_rate": 1.1469919135289058e-05, |
| "loss": 0.2697, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.3797338792221083, |
| "grad_norm": 0.29450678032670125, |
| "learning_rate": 1.1444048770158718e-05, |
| "loss": 0.2255, |
| "step": 1651 |
| }, |
| { |
| "epoch": 3.3817809621289663, |
| "grad_norm": 0.39235810212847905, |
| "learning_rate": 1.1418195915720002e-05, |
| "loss": 0.2599, |
| "step": 1652 |
| }, |
| { |
| "epoch": 3.383828045035824, |
| "grad_norm": 0.2945867088296247, |
| "learning_rate": 1.139236062488362e-05, |
| "loss": 0.2336, |
| "step": 1653 |
| }, |
| { |
| "epoch": 3.3858751279426818, |
| "grad_norm": 0.3178722215887932, |
| "learning_rate": 1.136654295052433e-05, |
| "loss": 0.266, |
| "step": 1654 |
| }, |
| { |
| "epoch": 3.3879222108495393, |
| "grad_norm": 0.32262451442066525, |
| "learning_rate": 1.134074294548082e-05, |
| "loss": 0.2282, |
| "step": 1655 |
| }, |
| { |
| "epoch": 3.389969293756397, |
| "grad_norm": 0.3260714484305257, |
| "learning_rate": 1.1314960662555639e-05, |
| "loss": 0.2294, |
| "step": 1656 |
| }, |
| { |
| "epoch": 3.3920163766632547, |
| "grad_norm": 0.29552572043468256, |
| "learning_rate": 1.1289196154515048e-05, |
| "loss": 0.1852, |
| "step": 1657 |
| }, |
| { |
| "epoch": 3.3940634595701127, |
| "grad_norm": 0.32257518359378057, |
| "learning_rate": 1.1263449474088944e-05, |
| "loss": 0.2122, |
| "step": 1658 |
| }, |
| { |
| "epoch": 3.39611054247697, |
| "grad_norm": 0.31678272789529893, |
| "learning_rate": 1.1237720673970713e-05, |
| "loss": 0.2391, |
| "step": 1659 |
| }, |
| { |
| "epoch": 3.398157625383828, |
| "grad_norm": 0.3294063880106706, |
| "learning_rate": 1.1212009806817163e-05, |
| "loss": 0.2582, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.4002047082906857, |
| "grad_norm": 0.33445038877415256, |
| "learning_rate": 1.118631692524843e-05, |
| "loss": 0.2325, |
| "step": 1661 |
| }, |
| { |
| "epoch": 3.4022517911975436, |
| "grad_norm": 0.31780769894415345, |
| "learning_rate": 1.1160642081847782e-05, |
| "loss": 0.2114, |
| "step": 1662 |
| }, |
| { |
| "epoch": 3.404298874104401, |
| "grad_norm": 0.34936980398469303, |
| "learning_rate": 1.1134985329161608e-05, |
| "loss": 0.2633, |
| "step": 1663 |
| }, |
| { |
| "epoch": 3.406345957011259, |
| "grad_norm": 0.25267194298381324, |
| "learning_rate": 1.1109346719699263e-05, |
| "loss": 0.1672, |
| "step": 1664 |
| }, |
| { |
| "epoch": 3.4083930399181166, |
| "grad_norm": 0.39414125375432546, |
| "learning_rate": 1.108372630593298e-05, |
| "loss": 0.3388, |
| "step": 1665 |
| }, |
| { |
| "epoch": 3.4104401228249746, |
| "grad_norm": 0.31231427401371586, |
| "learning_rate": 1.1058124140297718e-05, |
| "loss": 0.247, |
| "step": 1666 |
| }, |
| { |
| "epoch": 3.412487205731832, |
| "grad_norm": 0.2819687858861257, |
| "learning_rate": 1.1032540275191148e-05, |
| "loss": 0.2171, |
| "step": 1667 |
| }, |
| { |
| "epoch": 3.41453428863869, |
| "grad_norm": 0.32854044476275013, |
| "learning_rate": 1.1006974762973425e-05, |
| "loss": 0.2265, |
| "step": 1668 |
| }, |
| { |
| "epoch": 3.4165813715455475, |
| "grad_norm": 0.34353435192327664, |
| "learning_rate": 1.0981427655967183e-05, |
| "loss": 0.2469, |
| "step": 1669 |
| }, |
| { |
| "epoch": 3.4186284544524055, |
| "grad_norm": 0.32710285779281467, |
| "learning_rate": 1.0955899006457373e-05, |
| "loss": 0.2437, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.420675537359263, |
| "grad_norm": 0.3122882795616273, |
| "learning_rate": 1.0930388866691181e-05, |
| "loss": 0.2433, |
| "step": 1671 |
| }, |
| { |
| "epoch": 3.422722620266121, |
| "grad_norm": 0.29841602691342367, |
| "learning_rate": 1.0904897288877891e-05, |
| "loss": 0.2373, |
| "step": 1672 |
| }, |
| { |
| "epoch": 3.4247697031729785, |
| "grad_norm": 0.32318146749811455, |
| "learning_rate": 1.0879424325188805e-05, |
| "loss": 0.2477, |
| "step": 1673 |
| }, |
| { |
| "epoch": 3.4268167860798364, |
| "grad_norm": 0.32962691544440564, |
| "learning_rate": 1.085397002775716e-05, |
| "loss": 0.2577, |
| "step": 1674 |
| }, |
| { |
| "epoch": 3.428863868986694, |
| "grad_norm": 0.3028821115102944, |
| "learning_rate": 1.0828534448677942e-05, |
| "loss": 0.256, |
| "step": 1675 |
| }, |
| { |
| "epoch": 3.4309109518935514, |
| "grad_norm": 0.31035238566507123, |
| "learning_rate": 1.080311764000786e-05, |
| "loss": 0.223, |
| "step": 1676 |
| }, |
| { |
| "epoch": 3.4329580348004094, |
| "grad_norm": 0.30168173543643445, |
| "learning_rate": 1.0777719653765191e-05, |
| "loss": 0.2389, |
| "step": 1677 |
| }, |
| { |
| "epoch": 3.4350051177072674, |
| "grad_norm": 0.28500495929546144, |
| "learning_rate": 1.0752340541929711e-05, |
| "loss": 0.226, |
| "step": 1678 |
| }, |
| { |
| "epoch": 3.437052200614125, |
| "grad_norm": 0.3141146687901098, |
| "learning_rate": 1.0726980356442524e-05, |
| "loss": 0.2684, |
| "step": 1679 |
| }, |
| { |
| "epoch": 3.4390992835209824, |
| "grad_norm": 0.4682196146944551, |
| "learning_rate": 1.0701639149206061e-05, |
| "loss": 0.2647, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.4411463664278403, |
| "grad_norm": 0.3365183925944955, |
| "learning_rate": 1.0676316972083867e-05, |
| "loss": 0.2432, |
| "step": 1681 |
| }, |
| { |
| "epoch": 3.4431934493346983, |
| "grad_norm": 0.28313743388646095, |
| "learning_rate": 1.0651013876900546e-05, |
| "loss": 0.2099, |
| "step": 1682 |
| }, |
| { |
| "epoch": 3.445240532241556, |
| "grad_norm": 0.34192081983514505, |
| "learning_rate": 1.0625729915441659e-05, |
| "loss": 0.2482, |
| "step": 1683 |
| }, |
| { |
| "epoch": 3.4472876151484133, |
| "grad_norm": 0.31305659538484126, |
| "learning_rate": 1.060046513945361e-05, |
| "loss": 0.2137, |
| "step": 1684 |
| }, |
| { |
| "epoch": 3.4493346980552713, |
| "grad_norm": 0.3191034853933784, |
| "learning_rate": 1.0575219600643508e-05, |
| "loss": 0.2329, |
| "step": 1685 |
| }, |
| { |
| "epoch": 3.4513817809621288, |
| "grad_norm": 0.3245403446485034, |
| "learning_rate": 1.0549993350679138e-05, |
| "loss": 0.235, |
| "step": 1686 |
| }, |
| { |
| "epoch": 3.4534288638689867, |
| "grad_norm": 0.3088632467059862, |
| "learning_rate": 1.0524786441188786e-05, |
| "loss": 0.2155, |
| "step": 1687 |
| }, |
| { |
| "epoch": 3.4554759467758442, |
| "grad_norm": 0.3354339197261192, |
| "learning_rate": 1.0499598923761139e-05, |
| "loss": 0.2341, |
| "step": 1688 |
| }, |
| { |
| "epoch": 3.457523029682702, |
| "grad_norm": 0.3090008978999402, |
| "learning_rate": 1.0474430849945214e-05, |
| "loss": 0.2081, |
| "step": 1689 |
| }, |
| { |
| "epoch": 3.4595701125895597, |
| "grad_norm": 0.30640868139315103, |
| "learning_rate": 1.0449282271250239e-05, |
| "loss": 0.2258, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.4616171954964177, |
| "grad_norm": 0.325313731285426, |
| "learning_rate": 1.0424153239145527e-05, |
| "loss": 0.267, |
| "step": 1691 |
| }, |
| { |
| "epoch": 3.463664278403275, |
| "grad_norm": 0.3226758771977029, |
| "learning_rate": 1.0399043805060406e-05, |
| "loss": 0.2761, |
| "step": 1692 |
| }, |
| { |
| "epoch": 3.465711361310133, |
| "grad_norm": 0.2751548117016905, |
| "learning_rate": 1.0373954020384073e-05, |
| "loss": 0.2102, |
| "step": 1693 |
| }, |
| { |
| "epoch": 3.4677584442169906, |
| "grad_norm": 0.3197825524327039, |
| "learning_rate": 1.0348883936465537e-05, |
| "loss": 0.2558, |
| "step": 1694 |
| }, |
| { |
| "epoch": 3.4698055271238486, |
| "grad_norm": 0.3009633405603159, |
| "learning_rate": 1.0323833604613454e-05, |
| "loss": 0.2214, |
| "step": 1695 |
| }, |
| { |
| "epoch": 3.471852610030706, |
| "grad_norm": 0.3104955641697045, |
| "learning_rate": 1.029880307609608e-05, |
| "loss": 0.2686, |
| "step": 1696 |
| }, |
| { |
| "epoch": 3.473899692937564, |
| "grad_norm": 0.2692226200882447, |
| "learning_rate": 1.0273792402141134e-05, |
| "loss": 0.2312, |
| "step": 1697 |
| }, |
| { |
| "epoch": 3.4759467758444216, |
| "grad_norm": 0.31761762398562665, |
| "learning_rate": 1.0248801633935699e-05, |
| "loss": 0.2536, |
| "step": 1698 |
| }, |
| { |
| "epoch": 3.4779938587512795, |
| "grad_norm": 0.3196706676343915, |
| "learning_rate": 1.0223830822626124e-05, |
| "loss": 0.2556, |
| "step": 1699 |
| }, |
| { |
| "epoch": 3.480040941658137, |
| "grad_norm": 0.2983026635325646, |
| "learning_rate": 1.0198880019317913e-05, |
| "loss": 0.2468, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.482088024564995, |
| "grad_norm": 0.304844430252478, |
| "learning_rate": 1.017394927507561e-05, |
| "loss": 0.2252, |
| "step": 1701 |
| }, |
| { |
| "epoch": 3.4841351074718525, |
| "grad_norm": 0.32219494329823706, |
| "learning_rate": 1.0149038640922715e-05, |
| "loss": 0.2576, |
| "step": 1702 |
| }, |
| { |
| "epoch": 3.4861821903787105, |
| "grad_norm": 0.27799343218915756, |
| "learning_rate": 1.0124148167841577e-05, |
| "loss": 0.1703, |
| "step": 1703 |
| }, |
| { |
| "epoch": 3.488229273285568, |
| "grad_norm": 0.32497007511792664, |
| "learning_rate": 1.009927790677327e-05, |
| "loss": 0.2366, |
| "step": 1704 |
| }, |
| { |
| "epoch": 3.490276356192426, |
| "grad_norm": 0.3095778446432627, |
| "learning_rate": 1.0074427908617515e-05, |
| "loss": 0.2806, |
| "step": 1705 |
| }, |
| { |
| "epoch": 3.4923234390992834, |
| "grad_norm": 0.3249340961496839, |
| "learning_rate": 1.004959822423255e-05, |
| "loss": 0.2406, |
| "step": 1706 |
| }, |
| { |
| "epoch": 3.4943705220061414, |
| "grad_norm": 0.3092490415262631, |
| "learning_rate": 1.0024788904435054e-05, |
| "loss": 0.225, |
| "step": 1707 |
| }, |
| { |
| "epoch": 3.496417604912999, |
| "grad_norm": 0.3376859697600405, |
| "learning_rate": 1.0000000000000006e-05, |
| "loss": 0.2859, |
| "step": 1708 |
| }, |
| { |
| "epoch": 3.498464687819857, |
| "grad_norm": 0.2986922022588596, |
| "learning_rate": 9.975231561660617e-06, |
| "loss": 0.2241, |
| "step": 1709 |
| }, |
| { |
| "epoch": 3.5005117707267144, |
| "grad_norm": 0.3304776655435591, |
| "learning_rate": 9.950483640108215e-06, |
| "loss": 0.2865, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.5025588536335723, |
| "grad_norm": 0.30674425880777934, |
| "learning_rate": 9.92575628599213e-06, |
| "loss": 0.204, |
| "step": 1711 |
| }, |
| { |
| "epoch": 3.50460593654043, |
| "grad_norm": 0.3372001770402179, |
| "learning_rate": 9.901049549919601e-06, |
| "loss": 0.2705, |
| "step": 1712 |
| }, |
| { |
| "epoch": 3.5066530194472874, |
| "grad_norm": 0.28957256516814167, |
| "learning_rate": 9.876363482455675e-06, |
| "loss": 0.215, |
| "step": 1713 |
| }, |
| { |
| "epoch": 3.5087001023541453, |
| "grad_norm": 0.36096631037593485, |
| "learning_rate": 9.851698134123095e-06, |
| "loss": 0.2522, |
| "step": 1714 |
| }, |
| { |
| "epoch": 3.5107471852610033, |
| "grad_norm": 0.3128276504087451, |
| "learning_rate": 9.827053555402191e-06, |
| "loss": 0.2661, |
| "step": 1715 |
| }, |
| { |
| "epoch": 3.512794268167861, |
| "grad_norm": 0.3126033507485691, |
| "learning_rate": 9.802429796730792e-06, |
| "loss": 0.2329, |
| "step": 1716 |
| }, |
| { |
| "epoch": 3.5148413510747183, |
| "grad_norm": 0.33709412127432403, |
| "learning_rate": 9.777826908504126e-06, |
| "loss": 0.2673, |
| "step": 1717 |
| }, |
| { |
| "epoch": 3.5168884339815762, |
| "grad_norm": 0.31504566512999976, |
| "learning_rate": 9.753244941074696e-06, |
| "loss": 0.1942, |
| "step": 1718 |
| }, |
| { |
| "epoch": 3.518935516888434, |
| "grad_norm": 0.3081794499055162, |
| "learning_rate": 9.728683944752193e-06, |
| "loss": 0.23, |
| "step": 1719 |
| }, |
| { |
| "epoch": 3.5209825997952917, |
| "grad_norm": 0.3205819253659646, |
| "learning_rate": 9.704143969803392e-06, |
| "loss": 0.2599, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.5230296827021492, |
| "grad_norm": 0.3179566007920735, |
| "learning_rate": 9.679625066452028e-06, |
| "loss": 0.2468, |
| "step": 1721 |
| }, |
| { |
| "epoch": 3.525076765609007, |
| "grad_norm": 0.33546341224030785, |
| "learning_rate": 9.655127284878723e-06, |
| "loss": 0.2285, |
| "step": 1722 |
| }, |
| { |
| "epoch": 3.527123848515865, |
| "grad_norm": 0.3406356093850999, |
| "learning_rate": 9.630650675220892e-06, |
| "loss": 0.2277, |
| "step": 1723 |
| }, |
| { |
| "epoch": 3.5291709314227226, |
| "grad_norm": 0.297528564327505, |
| "learning_rate": 9.606195287572577e-06, |
| "loss": 0.199, |
| "step": 1724 |
| }, |
| { |
| "epoch": 3.53121801432958, |
| "grad_norm": 0.3221532376937218, |
| "learning_rate": 9.581761171984416e-06, |
| "loss": 0.2157, |
| "step": 1725 |
| }, |
| { |
| "epoch": 3.533265097236438, |
| "grad_norm": 0.35339763123663803, |
| "learning_rate": 9.557348378463503e-06, |
| "loss": 0.2377, |
| "step": 1726 |
| }, |
| { |
| "epoch": 3.535312180143296, |
| "grad_norm": 0.3011217732360328, |
| "learning_rate": 9.532956956973302e-06, |
| "loss": 0.2316, |
| "step": 1727 |
| }, |
| { |
| "epoch": 3.5373592630501536, |
| "grad_norm": 0.31883004437994317, |
| "learning_rate": 9.50858695743351e-06, |
| "loss": 0.2358, |
| "step": 1728 |
| }, |
| { |
| "epoch": 3.539406345957011, |
| "grad_norm": 0.4148367793712331, |
| "learning_rate": 9.484238429720018e-06, |
| "loss": 0.2412, |
| "step": 1729 |
| }, |
| { |
| "epoch": 3.541453428863869, |
| "grad_norm": 0.3651190389759453, |
| "learning_rate": 9.459911423664763e-06, |
| "loss": 0.2496, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.5435005117707266, |
| "grad_norm": 0.2782188953479953, |
| "learning_rate": 9.435605989055607e-06, |
| "loss": 0.2028, |
| "step": 1731 |
| }, |
| { |
| "epoch": 3.5455475946775845, |
| "grad_norm": 0.31391987453704123, |
| "learning_rate": 9.411322175636298e-06, |
| "loss": 0.2561, |
| "step": 1732 |
| }, |
| { |
| "epoch": 3.547594677584442, |
| "grad_norm": 0.3158415427773136, |
| "learning_rate": 9.387060033106321e-06, |
| "loss": 0.2956, |
| "step": 1733 |
| }, |
| { |
| "epoch": 3.5496417604913, |
| "grad_norm": 0.3175891657245625, |
| "learning_rate": 9.362819611120793e-06, |
| "loss": 0.2566, |
| "step": 1734 |
| }, |
| { |
| "epoch": 3.5516888433981575, |
| "grad_norm": 0.3312219072193364, |
| "learning_rate": 9.338600959290414e-06, |
| "loss": 0.2317, |
| "step": 1735 |
| }, |
| { |
| "epoch": 3.5537359263050154, |
| "grad_norm": 0.3341176350083145, |
| "learning_rate": 9.314404127181307e-06, |
| "loss": 0.293, |
| "step": 1736 |
| }, |
| { |
| "epoch": 3.555783009211873, |
| "grad_norm": 0.29940835888683603, |
| "learning_rate": 9.290229164314928e-06, |
| "loss": 0.2221, |
| "step": 1737 |
| }, |
| { |
| "epoch": 3.557830092118731, |
| "grad_norm": 0.3046910176714613, |
| "learning_rate": 9.266076120167992e-06, |
| "loss": 0.2472, |
| "step": 1738 |
| }, |
| { |
| "epoch": 3.5598771750255884, |
| "grad_norm": 0.2841913350596404, |
| "learning_rate": 9.241945044172353e-06, |
| "loss": 0.2277, |
| "step": 1739 |
| }, |
| { |
| "epoch": 3.5619242579324464, |
| "grad_norm": 0.323279705340557, |
| "learning_rate": 9.217835985714898e-06, |
| "loss": 0.2709, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.563971340839304, |
| "grad_norm": 0.29421984064136286, |
| "learning_rate": 9.193748994137462e-06, |
| "loss": 0.2064, |
| "step": 1741 |
| }, |
| { |
| "epoch": 3.566018423746162, |
| "grad_norm": 0.3431113829919363, |
| "learning_rate": 9.169684118736708e-06, |
| "loss": 0.2581, |
| "step": 1742 |
| }, |
| { |
| "epoch": 3.5680655066530194, |
| "grad_norm": 0.31454166876772843, |
| "learning_rate": 9.145641408764048e-06, |
| "loss": 0.2135, |
| "step": 1743 |
| }, |
| { |
| "epoch": 3.5701125895598773, |
| "grad_norm": 0.3294672393878416, |
| "learning_rate": 9.121620913425508e-06, |
| "loss": 0.2607, |
| "step": 1744 |
| }, |
| { |
| "epoch": 3.572159672466735, |
| "grad_norm": 0.29027976941333605, |
| "learning_rate": 9.097622681881673e-06, |
| "loss": 0.1969, |
| "step": 1745 |
| }, |
| { |
| "epoch": 3.574206755373593, |
| "grad_norm": 0.3177811895638415, |
| "learning_rate": 9.073646763247558e-06, |
| "loss": 0.2103, |
| "step": 1746 |
| }, |
| { |
| "epoch": 3.5762538382804503, |
| "grad_norm": 0.3196047544970432, |
| "learning_rate": 9.04969320659249e-06, |
| "loss": 0.239, |
| "step": 1747 |
| }, |
| { |
| "epoch": 3.5783009211873082, |
| "grad_norm": 0.2925767290546702, |
| "learning_rate": 9.025762060940062e-06, |
| "loss": 0.198, |
| "step": 1748 |
| }, |
| { |
| "epoch": 3.5803480040941658, |
| "grad_norm": 0.33053764466506, |
| "learning_rate": 9.001853375267989e-06, |
| "loss": 0.2366, |
| "step": 1749 |
| }, |
| { |
| "epoch": 3.5823950870010233, |
| "grad_norm": 0.3174304755008507, |
| "learning_rate": 8.977967198508001e-06, |
| "loss": 0.2256, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.5844421699078812, |
| "grad_norm": 0.343170859287156, |
| "learning_rate": 8.954103579545785e-06, |
| "loss": 0.2341, |
| "step": 1751 |
| }, |
| { |
| "epoch": 3.586489252814739, |
| "grad_norm": 0.3215320683527329, |
| "learning_rate": 8.93026256722085e-06, |
| "loss": 0.2045, |
| "step": 1752 |
| }, |
| { |
| "epoch": 3.5885363357215967, |
| "grad_norm": 0.345488281284008, |
| "learning_rate": 8.906444210326441e-06, |
| "loss": 0.2708, |
| "step": 1753 |
| }, |
| { |
| "epoch": 3.590583418628454, |
| "grad_norm": 0.29374650212717546, |
| "learning_rate": 8.882648557609434e-06, |
| "loss": 0.2144, |
| "step": 1754 |
| }, |
| { |
| "epoch": 3.592630501535312, |
| "grad_norm": 0.30791903954132194, |
| "learning_rate": 8.858875657770241e-06, |
| "loss": 0.2196, |
| "step": 1755 |
| }, |
| { |
| "epoch": 3.59467758444217, |
| "grad_norm": 0.37604604250081547, |
| "learning_rate": 8.83512555946271e-06, |
| "loss": 0.2842, |
| "step": 1756 |
| }, |
| { |
| "epoch": 3.5967246673490276, |
| "grad_norm": 0.2815977468791065, |
| "learning_rate": 8.811398311294008e-06, |
| "loss": 0.2128, |
| "step": 1757 |
| }, |
| { |
| "epoch": 3.598771750255885, |
| "grad_norm": 0.32463396760239493, |
| "learning_rate": 8.787693961824555e-06, |
| "loss": 0.2635, |
| "step": 1758 |
| }, |
| { |
| "epoch": 3.600818833162743, |
| "grad_norm": 0.3178563263559145, |
| "learning_rate": 8.764012559567899e-06, |
| "loss": 0.2749, |
| "step": 1759 |
| }, |
| { |
| "epoch": 3.602865916069601, |
| "grad_norm": 0.32193828000242514, |
| "learning_rate": 8.740354152990624e-06, |
| "loss": 0.232, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.6049129989764586, |
| "grad_norm": 0.35115161370306397, |
| "learning_rate": 8.716718790512251e-06, |
| "loss": 0.2421, |
| "step": 1761 |
| }, |
| { |
| "epoch": 3.606960081883316, |
| "grad_norm": 0.3150133989578825, |
| "learning_rate": 8.693106520505147e-06, |
| "loss": 0.2078, |
| "step": 1762 |
| }, |
| { |
| "epoch": 3.609007164790174, |
| "grad_norm": 0.34022251620199256, |
| "learning_rate": 8.669517391294397e-06, |
| "loss": 0.248, |
| "step": 1763 |
| }, |
| { |
| "epoch": 3.611054247697032, |
| "grad_norm": 0.3039508397036727, |
| "learning_rate": 8.645951451157741e-06, |
| "loss": 0.187, |
| "step": 1764 |
| }, |
| { |
| "epoch": 3.6131013306038895, |
| "grad_norm": 0.3287212950737981, |
| "learning_rate": 8.622408748325461e-06, |
| "loss": 0.2774, |
| "step": 1765 |
| }, |
| { |
| "epoch": 3.615148413510747, |
| "grad_norm": 0.3159497126746659, |
| "learning_rate": 8.598889330980277e-06, |
| "loss": 0.2251, |
| "step": 1766 |
| }, |
| { |
| "epoch": 3.617195496417605, |
| "grad_norm": 0.2931231286120822, |
| "learning_rate": 8.575393247257256e-06, |
| "loss": 0.2267, |
| "step": 1767 |
| }, |
| { |
| "epoch": 3.619242579324463, |
| "grad_norm": 0.3287345655736662, |
| "learning_rate": 8.551920545243704e-06, |
| "loss": 0.24, |
| "step": 1768 |
| }, |
| { |
| "epoch": 3.6212896622313204, |
| "grad_norm": 0.30400959536179484, |
| "learning_rate": 8.528471272979083e-06, |
| "loss": 0.2133, |
| "step": 1769 |
| }, |
| { |
| "epoch": 3.623336745138178, |
| "grad_norm": 0.31203010646854484, |
| "learning_rate": 8.50504547845489e-06, |
| "loss": 0.2404, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.625383828045036, |
| "grad_norm": 0.30015537192427894, |
| "learning_rate": 8.481643209614576e-06, |
| "loss": 0.2059, |
| "step": 1771 |
| }, |
| { |
| "epoch": 3.6274309109518934, |
| "grad_norm": 0.295949459736467, |
| "learning_rate": 8.45826451435347e-06, |
| "loss": 0.2209, |
| "step": 1772 |
| }, |
| { |
| "epoch": 3.6294779938587514, |
| "grad_norm": 0.32651774554006335, |
| "learning_rate": 8.434909440518613e-06, |
| "loss": 0.258, |
| "step": 1773 |
| }, |
| { |
| "epoch": 3.631525076765609, |
| "grad_norm": 0.28180464315900705, |
| "learning_rate": 8.411578035908728e-06, |
| "loss": 0.205, |
| "step": 1774 |
| }, |
| { |
| "epoch": 3.633572159672467, |
| "grad_norm": 0.3413634143023636, |
| "learning_rate": 8.388270348274092e-06, |
| "loss": 0.2769, |
| "step": 1775 |
| }, |
| { |
| "epoch": 3.6356192425793243, |
| "grad_norm": 0.33236322758981973, |
| "learning_rate": 8.364986425316448e-06, |
| "loss": 0.2234, |
| "step": 1776 |
| }, |
| { |
| "epoch": 3.6376663254861823, |
| "grad_norm": 0.3020595253520059, |
| "learning_rate": 8.341726314688875e-06, |
| "loss": 0.2509, |
| "step": 1777 |
| }, |
| { |
| "epoch": 3.63971340839304, |
| "grad_norm": 0.31852165056270737, |
| "learning_rate": 8.318490063995761e-06, |
| "loss": 0.2537, |
| "step": 1778 |
| }, |
| { |
| "epoch": 3.6417604912998978, |
| "grad_norm": 0.2944306330035562, |
| "learning_rate": 8.295277720792634e-06, |
| "loss": 0.2222, |
| "step": 1779 |
| }, |
| { |
| "epoch": 3.6438075742067553, |
| "grad_norm": 0.3191896831407699, |
| "learning_rate": 8.272089332586089e-06, |
| "loss": 0.2437, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.6458546571136132, |
| "grad_norm": 0.3341784697234189, |
| "learning_rate": 8.248924946833705e-06, |
| "loss": 0.25, |
| "step": 1781 |
| }, |
| { |
| "epoch": 3.6479017400204707, |
| "grad_norm": 0.28526708897000397, |
| "learning_rate": 8.225784610943948e-06, |
| "loss": 0.2586, |
| "step": 1782 |
| }, |
| { |
| "epoch": 3.6499488229273287, |
| "grad_norm": 0.30819644727048756, |
| "learning_rate": 8.20266837227603e-06, |
| "loss": 0.2482, |
| "step": 1783 |
| }, |
| { |
| "epoch": 3.651995905834186, |
| "grad_norm": 0.3391130155927512, |
| "learning_rate": 8.179576278139872e-06, |
| "loss": 0.2703, |
| "step": 1784 |
| }, |
| { |
| "epoch": 3.654042988741044, |
| "grad_norm": 0.3457772155494689, |
| "learning_rate": 8.156508375795995e-06, |
| "loss": 0.2138, |
| "step": 1785 |
| }, |
| { |
| "epoch": 3.6560900716479017, |
| "grad_norm": 0.3545988817947354, |
| "learning_rate": 8.133464712455364e-06, |
| "loss": 0.2381, |
| "step": 1786 |
| }, |
| { |
| "epoch": 3.6581371545547596, |
| "grad_norm": 0.3051355101331329, |
| "learning_rate": 8.11044533527937e-06, |
| "loss": 0.2212, |
| "step": 1787 |
| }, |
| { |
| "epoch": 3.660184237461617, |
| "grad_norm": 0.31093454380049773, |
| "learning_rate": 8.087450291379693e-06, |
| "loss": 0.2782, |
| "step": 1788 |
| }, |
| { |
| "epoch": 3.662231320368475, |
| "grad_norm": 0.3076008520431847, |
| "learning_rate": 8.064479627818213e-06, |
| "loss": 0.2563, |
| "step": 1789 |
| }, |
| { |
| "epoch": 3.6642784032753326, |
| "grad_norm": 0.2914759795466477, |
| "learning_rate": 8.041533391606892e-06, |
| "loss": 0.237, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.66632548618219, |
| "grad_norm": 0.32811153076900396, |
| "learning_rate": 8.018611629707735e-06, |
| "loss": 0.2192, |
| "step": 1791 |
| }, |
| { |
| "epoch": 3.668372569089048, |
| "grad_norm": 0.3349979236041854, |
| "learning_rate": 7.995714389032638e-06, |
| "loss": 0.2544, |
| "step": 1792 |
| }, |
| { |
| "epoch": 3.670419651995906, |
| "grad_norm": 0.30481283300379364, |
| "learning_rate": 7.972841716443304e-06, |
| "loss": 0.2021, |
| "step": 1793 |
| }, |
| { |
| "epoch": 3.6724667349027635, |
| "grad_norm": 0.31038423151434946, |
| "learning_rate": 7.949993658751168e-06, |
| "loss": 0.2714, |
| "step": 1794 |
| }, |
| { |
| "epoch": 3.674513817809621, |
| "grad_norm": 0.3046182836563986, |
| "learning_rate": 7.927170262717284e-06, |
| "loss": 0.2486, |
| "step": 1795 |
| }, |
| { |
| "epoch": 3.676560900716479, |
| "grad_norm": 0.321493522566335, |
| "learning_rate": 7.904371575052224e-06, |
| "loss": 0.257, |
| "step": 1796 |
| }, |
| { |
| "epoch": 3.678607983623337, |
| "grad_norm": 0.32116628196410996, |
| "learning_rate": 7.881597642416012e-06, |
| "loss": 0.2351, |
| "step": 1797 |
| }, |
| { |
| "epoch": 3.6806550665301945, |
| "grad_norm": 0.3225460384671046, |
| "learning_rate": 7.858848511417998e-06, |
| "loss": 0.2787, |
| "step": 1798 |
| }, |
| { |
| "epoch": 3.682702149437052, |
| "grad_norm": 0.3065477845223782, |
| "learning_rate": 7.836124228616762e-06, |
| "loss": 0.2059, |
| "step": 1799 |
| }, |
| { |
| "epoch": 3.68474923234391, |
| "grad_norm": 0.2820736922840727, |
| "learning_rate": 7.81342484052004e-06, |
| "loss": 0.2065, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.686796315250768, |
| "grad_norm": 0.3199543711113544, |
| "learning_rate": 7.790750393584616e-06, |
| "loss": 0.2482, |
| "step": 1801 |
| }, |
| { |
| "epoch": 3.6888433981576254, |
| "grad_norm": 0.3041948190106927, |
| "learning_rate": 7.768100934216234e-06, |
| "loss": 0.2278, |
| "step": 1802 |
| }, |
| { |
| "epoch": 3.690890481064483, |
| "grad_norm": 0.3053390429608892, |
| "learning_rate": 7.745476508769494e-06, |
| "loss": 0.2356, |
| "step": 1803 |
| }, |
| { |
| "epoch": 3.692937563971341, |
| "grad_norm": 0.3110750944178115, |
| "learning_rate": 7.72287716354776e-06, |
| "loss": 0.2402, |
| "step": 1804 |
| }, |
| { |
| "epoch": 3.694984646878199, |
| "grad_norm": 0.28367962348790254, |
| "learning_rate": 7.700302944803076e-06, |
| "loss": 0.1827, |
| "step": 1805 |
| }, |
| { |
| "epoch": 3.6970317297850563, |
| "grad_norm": 0.33323381054790285, |
| "learning_rate": 7.67775389873604e-06, |
| "loss": 0.2293, |
| "step": 1806 |
| }, |
| { |
| "epoch": 3.699078812691914, |
| "grad_norm": 0.3521970989279458, |
| "learning_rate": 7.65523007149575e-06, |
| "loss": 0.2657, |
| "step": 1807 |
| }, |
| { |
| "epoch": 3.701125895598772, |
| "grad_norm": 0.30699833196534315, |
| "learning_rate": 7.63273150917969e-06, |
| "loss": 0.2421, |
| "step": 1808 |
| }, |
| { |
| "epoch": 3.7031729785056293, |
| "grad_norm": 0.31416536601357237, |
| "learning_rate": 7.6102582578336315e-06, |
| "loss": 0.1997, |
| "step": 1809 |
| }, |
| { |
| "epoch": 3.7052200614124873, |
| "grad_norm": 0.3803922818493654, |
| "learning_rate": 7.587810363451544e-06, |
| "loss": 0.2428, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.707267144319345, |
| "grad_norm": 0.32978409370696515, |
| "learning_rate": 7.565387871975511e-06, |
| "loss": 0.2037, |
| "step": 1811 |
| }, |
| { |
| "epoch": 3.7093142272262027, |
| "grad_norm": 0.33553313939376517, |
| "learning_rate": 7.5429908292956045e-06, |
| "loss": 0.2675, |
| "step": 1812 |
| }, |
| { |
| "epoch": 3.7113613101330603, |
| "grad_norm": 0.3164776677964802, |
| "learning_rate": 7.5206192812498345e-06, |
| "loss": 0.262, |
| "step": 1813 |
| }, |
| { |
| "epoch": 3.713408393039918, |
| "grad_norm": 0.29867646878594656, |
| "learning_rate": 7.498273273624022e-06, |
| "loss": 0.2468, |
| "step": 1814 |
| }, |
| { |
| "epoch": 3.7154554759467757, |
| "grad_norm": 0.31216831365580633, |
| "learning_rate": 7.475952852151722e-06, |
| "loss": 0.2225, |
| "step": 1815 |
| }, |
| { |
| "epoch": 3.7175025588536337, |
| "grad_norm": 0.2854940367151503, |
| "learning_rate": 7.4536580625141244e-06, |
| "loss": 0.2302, |
| "step": 1816 |
| }, |
| { |
| "epoch": 3.719549641760491, |
| "grad_norm": 0.3133497229188554, |
| "learning_rate": 7.431388950339955e-06, |
| "loss": 0.2188, |
| "step": 1817 |
| }, |
| { |
| "epoch": 3.721596724667349, |
| "grad_norm": 0.3115695729233575, |
| "learning_rate": 7.409145561205402e-06, |
| "loss": 0.2251, |
| "step": 1818 |
| }, |
| { |
| "epoch": 3.7236438075742067, |
| "grad_norm": 0.3211680619051747, |
| "learning_rate": 7.386927940633981e-06, |
| "loss": 0.244, |
| "step": 1819 |
| }, |
| { |
| "epoch": 3.7256908904810646, |
| "grad_norm": 0.3048950617764936, |
| "learning_rate": 7.364736134096497e-06, |
| "loss": 0.264, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.727737973387922, |
| "grad_norm": 0.2939363289247235, |
| "learning_rate": 7.342570187010913e-06, |
| "loss": 0.235, |
| "step": 1821 |
| }, |
| { |
| "epoch": 3.72978505629478, |
| "grad_norm": 0.3233466134307953, |
| "learning_rate": 7.32043014474227e-06, |
| "loss": 0.306, |
| "step": 1822 |
| }, |
| { |
| "epoch": 3.7318321392016376, |
| "grad_norm": 0.30575032489620757, |
| "learning_rate": 7.2983160526025854e-06, |
| "loss": 0.2719, |
| "step": 1823 |
| }, |
| { |
| "epoch": 3.7338792221084955, |
| "grad_norm": 0.297249466993302, |
| "learning_rate": 7.276227955850774e-06, |
| "loss": 0.235, |
| "step": 1824 |
| }, |
| { |
| "epoch": 3.735926305015353, |
| "grad_norm": 0.29688130457735584, |
| "learning_rate": 7.254165899692554e-06, |
| "loss": 0.2313, |
| "step": 1825 |
| }, |
| { |
| "epoch": 3.737973387922211, |
| "grad_norm": 0.315867334334272, |
| "learning_rate": 7.2321299292803275e-06, |
| "loss": 0.2554, |
| "step": 1826 |
| }, |
| { |
| "epoch": 3.7400204708290685, |
| "grad_norm": 0.35865826681363316, |
| "learning_rate": 7.210120089713117e-06, |
| "loss": 0.2657, |
| "step": 1827 |
| }, |
| { |
| "epoch": 3.742067553735926, |
| "grad_norm": 0.29556390069528465, |
| "learning_rate": 7.188136426036498e-06, |
| "loss": 0.2309, |
| "step": 1828 |
| }, |
| { |
| "epoch": 3.744114636642784, |
| "grad_norm": 0.3062750470379352, |
| "learning_rate": 7.166178983242425e-06, |
| "loss": 0.25, |
| "step": 1829 |
| }, |
| { |
| "epoch": 3.746161719549642, |
| "grad_norm": 0.3388760658555746, |
| "learning_rate": 7.1442478062692135e-06, |
| "loss": 0.2837, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.7482088024564995, |
| "grad_norm": 0.3198475086242932, |
| "learning_rate": 7.12234294000143e-06, |
| "loss": 0.2334, |
| "step": 1831 |
| }, |
| { |
| "epoch": 3.750255885363357, |
| "grad_norm": 0.3089175153993069, |
| "learning_rate": 7.100464429269769e-06, |
| "loss": 0.2647, |
| "step": 1832 |
| }, |
| { |
| "epoch": 3.752302968270215, |
| "grad_norm": 0.28196699295142263, |
| "learning_rate": 7.078612318850999e-06, |
| "loss": 0.2168, |
| "step": 1833 |
| }, |
| { |
| "epoch": 3.754350051177073, |
| "grad_norm": 0.297420997662474, |
| "learning_rate": 7.056786653467882e-06, |
| "loss": 0.2028, |
| "step": 1834 |
| }, |
| { |
| "epoch": 3.7563971340839304, |
| "grad_norm": 0.28045674846802615, |
| "learning_rate": 7.034987477789008e-06, |
| "loss": 0.1939, |
| "step": 1835 |
| }, |
| { |
| "epoch": 3.758444216990788, |
| "grad_norm": 0.311062703079177, |
| "learning_rate": 7.01321483642879e-06, |
| "loss": 0.2611, |
| "step": 1836 |
| }, |
| { |
| "epoch": 3.760491299897646, |
| "grad_norm": 0.297760397539936, |
| "learning_rate": 6.991468773947321e-06, |
| "loss": 0.2575, |
| "step": 1837 |
| }, |
| { |
| "epoch": 3.762538382804504, |
| "grad_norm": 0.2972100075369938, |
| "learning_rate": 6.969749334850308e-06, |
| "loss": 0.2088, |
| "step": 1838 |
| }, |
| { |
| "epoch": 3.7645854657113613, |
| "grad_norm": 0.3155194168733677, |
| "learning_rate": 6.948056563588943e-06, |
| "loss": 0.2469, |
| "step": 1839 |
| }, |
| { |
| "epoch": 3.766632548618219, |
| "grad_norm": 0.33253411719664255, |
| "learning_rate": 6.926390504559879e-06, |
| "loss": 0.2066, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.768679631525077, |
| "grad_norm": 0.31443745809454954, |
| "learning_rate": 6.90475120210508e-06, |
| "loss": 0.2462, |
| "step": 1841 |
| }, |
| { |
| "epoch": 3.7707267144319347, |
| "grad_norm": 0.2916854017686706, |
| "learning_rate": 6.883138700511735e-06, |
| "loss": 0.206, |
| "step": 1842 |
| }, |
| { |
| "epoch": 3.7727737973387923, |
| "grad_norm": 0.29896805378427554, |
| "learning_rate": 6.861553044012206e-06, |
| "loss": 0.2458, |
| "step": 1843 |
| }, |
| { |
| "epoch": 3.7748208802456498, |
| "grad_norm": 0.3070231834047609, |
| "learning_rate": 6.8399942767839075e-06, |
| "loss": 0.2375, |
| "step": 1844 |
| }, |
| { |
| "epoch": 3.7768679631525077, |
| "grad_norm": 0.35269614289266477, |
| "learning_rate": 6.818462442949203e-06, |
| "loss": 0.2354, |
| "step": 1845 |
| }, |
| { |
| "epoch": 3.7789150460593657, |
| "grad_norm": 0.2885923676811397, |
| "learning_rate": 6.796957586575364e-06, |
| "loss": 0.2669, |
| "step": 1846 |
| }, |
| { |
| "epoch": 3.780962128966223, |
| "grad_norm": 0.31946737683114335, |
| "learning_rate": 6.775479751674439e-06, |
| "loss": 0.2292, |
| "step": 1847 |
| }, |
| { |
| "epoch": 3.7830092118730807, |
| "grad_norm": 0.31027789747129964, |
| "learning_rate": 6.754028982203154e-06, |
| "loss": 0.2204, |
| "step": 1848 |
| }, |
| { |
| "epoch": 3.7850562947799387, |
| "grad_norm": 0.28163894937364203, |
| "learning_rate": 6.732605322062869e-06, |
| "loss": 0.2131, |
| "step": 1849 |
| }, |
| { |
| "epoch": 3.787103377686796, |
| "grad_norm": 0.32382848241213846, |
| "learning_rate": 6.711208815099451e-06, |
| "loss": 0.2349, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.789150460593654, |
| "grad_norm": 0.3138159392771698, |
| "learning_rate": 6.689839505103195e-06, |
| "loss": 0.2214, |
| "step": 1851 |
| }, |
| { |
| "epoch": 3.7911975435005116, |
| "grad_norm": 0.3336295131707369, |
| "learning_rate": 6.668497435808736e-06, |
| "loss": 0.2318, |
| "step": 1852 |
| }, |
| { |
| "epoch": 3.7932446264073696, |
| "grad_norm": 0.2805986894444167, |
| "learning_rate": 6.647182650894956e-06, |
| "loss": 0.1898, |
| "step": 1853 |
| }, |
| { |
| "epoch": 3.795291709314227, |
| "grad_norm": 0.311652087268774, |
| "learning_rate": 6.6258951939849055e-06, |
| "loss": 0.2244, |
| "step": 1854 |
| }, |
| { |
| "epoch": 3.797338792221085, |
| "grad_norm": 0.33194907539363816, |
| "learning_rate": 6.604635108645683e-06, |
| "loss": 0.272, |
| "step": 1855 |
| }, |
| { |
| "epoch": 3.7993858751279426, |
| "grad_norm": 0.2974799213005746, |
| "learning_rate": 6.583402438388391e-06, |
| "loss": 0.2967, |
| "step": 1856 |
| }, |
| { |
| "epoch": 3.8014329580348005, |
| "grad_norm": 0.31133998039092214, |
| "learning_rate": 6.562197226668015e-06, |
| "loss": 0.2587, |
| "step": 1857 |
| }, |
| { |
| "epoch": 3.803480040941658, |
| "grad_norm": 0.30821960779378943, |
| "learning_rate": 6.5410195168833425e-06, |
| "loss": 0.242, |
| "step": 1858 |
| }, |
| { |
| "epoch": 3.805527123848516, |
| "grad_norm": 0.30511264324428783, |
| "learning_rate": 6.519869352376878e-06, |
| "loss": 0.2318, |
| "step": 1859 |
| }, |
| { |
| "epoch": 3.8075742067553735, |
| "grad_norm": 0.2885137726399915, |
| "learning_rate": 6.498746776434759e-06, |
| "loss": 0.2412, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.8096212896622315, |
| "grad_norm": 0.30572665106073443, |
| "learning_rate": 6.477651832286633e-06, |
| "loss": 0.2714, |
| "step": 1861 |
| }, |
| { |
| "epoch": 3.811668372569089, |
| "grad_norm": 0.2999528878346436, |
| "learning_rate": 6.456584563105628e-06, |
| "loss": 0.225, |
| "step": 1862 |
| }, |
| { |
| "epoch": 3.813715455475947, |
| "grad_norm": 0.306996947567515, |
| "learning_rate": 6.435545012008213e-06, |
| "loss": 0.2228, |
| "step": 1863 |
| }, |
| { |
| "epoch": 3.8157625383828044, |
| "grad_norm": 0.30156526722849053, |
| "learning_rate": 6.414533222054138e-06, |
| "loss": 0.2271, |
| "step": 1864 |
| }, |
| { |
| "epoch": 3.8178096212896624, |
| "grad_norm": 0.3027861977559771, |
| "learning_rate": 6.393549236246333e-06, |
| "loss": 0.2358, |
| "step": 1865 |
| }, |
| { |
| "epoch": 3.81985670419652, |
| "grad_norm": 0.3100676988224775, |
| "learning_rate": 6.372593097530822e-06, |
| "loss": 0.2224, |
| "step": 1866 |
| }, |
| { |
| "epoch": 3.821903787103378, |
| "grad_norm": 0.30881142890916535, |
| "learning_rate": 6.3516648487966456e-06, |
| "loss": 0.212, |
| "step": 1867 |
| }, |
| { |
| "epoch": 3.8239508700102354, |
| "grad_norm": 0.3074430788928016, |
| "learning_rate": 6.330764532875748e-06, |
| "loss": 0.2559, |
| "step": 1868 |
| }, |
| { |
| "epoch": 3.825997952917093, |
| "grad_norm": 0.29993534561417257, |
| "learning_rate": 6.309892192542919e-06, |
| "loss": 0.2414, |
| "step": 1869 |
| }, |
| { |
| "epoch": 3.828045035823951, |
| "grad_norm": 0.2948226082874635, |
| "learning_rate": 6.289047870515692e-06, |
| "loss": 0.2634, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.830092118730809, |
| "grad_norm": 0.35876524638729923, |
| "learning_rate": 6.268231609454254e-06, |
| "loss": 0.3221, |
| "step": 1871 |
| }, |
| { |
| "epoch": 3.8321392016376663, |
| "grad_norm": 0.29964527729201973, |
| "learning_rate": 6.247443451961366e-06, |
| "loss": 0.2046, |
| "step": 1872 |
| }, |
| { |
| "epoch": 3.834186284544524, |
| "grad_norm": 0.32365319216721733, |
| "learning_rate": 6.226683440582268e-06, |
| "loss": 0.2233, |
| "step": 1873 |
| }, |
| { |
| "epoch": 3.8362333674513818, |
| "grad_norm": 0.3064343796948702, |
| "learning_rate": 6.2059516178046064e-06, |
| "loss": 0.2548, |
| "step": 1874 |
| }, |
| { |
| "epoch": 3.8382804503582397, |
| "grad_norm": 0.31074022748353647, |
| "learning_rate": 6.185248026058312e-06, |
| "loss": 0.2682, |
| "step": 1875 |
| }, |
| { |
| "epoch": 3.8403275332650972, |
| "grad_norm": 0.3001350448200465, |
| "learning_rate": 6.164572707715564e-06, |
| "loss": 0.2287, |
| "step": 1876 |
| }, |
| { |
| "epoch": 3.8423746161719547, |
| "grad_norm": 0.3230753170136733, |
| "learning_rate": 6.143925705090666e-06, |
| "loss": 0.3244, |
| "step": 1877 |
| }, |
| { |
| "epoch": 3.8444216990788127, |
| "grad_norm": 0.2998343099705012, |
| "learning_rate": 6.123307060439967e-06, |
| "loss": 0.2163, |
| "step": 1878 |
| }, |
| { |
| "epoch": 3.8464687819856707, |
| "grad_norm": 0.3171615427794975, |
| "learning_rate": 6.102716815961787e-06, |
| "loss": 0.2126, |
| "step": 1879 |
| }, |
| { |
| "epoch": 3.848515864892528, |
| "grad_norm": 0.30038516037265045, |
| "learning_rate": 6.082155013796323e-06, |
| "loss": 0.2197, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.8505629477993857, |
| "grad_norm": 0.2706760721698934, |
| "learning_rate": 6.061621696025539e-06, |
| "loss": 0.203, |
| "step": 1881 |
| }, |
| { |
| "epoch": 3.8526100307062436, |
| "grad_norm": 0.3019695460717632, |
| "learning_rate": 6.041116904673125e-06, |
| "loss": 0.243, |
| "step": 1882 |
| }, |
| { |
| "epoch": 3.8546571136131016, |
| "grad_norm": 0.3238748626818905, |
| "learning_rate": 6.020640681704402e-06, |
| "loss": 0.2387, |
| "step": 1883 |
| }, |
| { |
| "epoch": 3.856704196519959, |
| "grad_norm": 0.2845226343722909, |
| "learning_rate": 6.000193069026181e-06, |
| "loss": 0.1939, |
| "step": 1884 |
| }, |
| { |
| "epoch": 3.8587512794268166, |
| "grad_norm": 0.30062466940994315, |
| "learning_rate": 5.979774108486751e-06, |
| "loss": 0.2417, |
| "step": 1885 |
| }, |
| { |
| "epoch": 3.8607983623336746, |
| "grad_norm": 0.29657229767791343, |
| "learning_rate": 5.95938384187575e-06, |
| "loss": 0.2513, |
| "step": 1886 |
| }, |
| { |
| "epoch": 3.862845445240532, |
| "grad_norm": 0.2965578800271511, |
| "learning_rate": 5.939022310924099e-06, |
| "loss": 0.238, |
| "step": 1887 |
| }, |
| { |
| "epoch": 3.86489252814739, |
| "grad_norm": 0.28944984642689814, |
| "learning_rate": 5.918689557303885e-06, |
| "loss": 0.233, |
| "step": 1888 |
| }, |
| { |
| "epoch": 3.8669396110542475, |
| "grad_norm": 0.3405343709910588, |
| "learning_rate": 5.898385622628336e-06, |
| "loss": 0.2568, |
| "step": 1889 |
| }, |
| { |
| "epoch": 3.8689866939611055, |
| "grad_norm": 0.31192910196034185, |
| "learning_rate": 5.878110548451675e-06, |
| "loss": 0.255, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.871033776867963, |
| "grad_norm": 0.3090737941594832, |
| "learning_rate": 5.857864376269051e-06, |
| "loss": 0.2356, |
| "step": 1891 |
| }, |
| { |
| "epoch": 3.873080859774821, |
| "grad_norm": 0.27438554656264486, |
| "learning_rate": 5.837647147516483e-06, |
| "loss": 0.19, |
| "step": 1892 |
| }, |
| { |
| "epoch": 3.8751279426816785, |
| "grad_norm": 0.3021169809249445, |
| "learning_rate": 5.817458903570747e-06, |
| "loss": 0.2248, |
| "step": 1893 |
| }, |
| { |
| "epoch": 3.8771750255885364, |
| "grad_norm": 0.3213447653550202, |
| "learning_rate": 5.7972996857492896e-06, |
| "loss": 0.2269, |
| "step": 1894 |
| }, |
| { |
| "epoch": 3.879222108495394, |
| "grad_norm": 0.33058912837962084, |
| "learning_rate": 5.777169535310152e-06, |
| "loss": 0.2359, |
| "step": 1895 |
| }, |
| { |
| "epoch": 3.881269191402252, |
| "grad_norm": 0.28572806654091426, |
| "learning_rate": 5.7570684934519135e-06, |
| "loss": 0.2147, |
| "step": 1896 |
| }, |
| { |
| "epoch": 3.8833162743091094, |
| "grad_norm": 0.32405838839318063, |
| "learning_rate": 5.736996601313545e-06, |
| "loss": 0.25, |
| "step": 1897 |
| }, |
| { |
| "epoch": 3.8853633572159674, |
| "grad_norm": 0.31407112083239963, |
| "learning_rate": 5.716953899974371e-06, |
| "loss": 0.2628, |
| "step": 1898 |
| }, |
| { |
| "epoch": 3.887410440122825, |
| "grad_norm": 0.24848541316900655, |
| "learning_rate": 5.696940430453981e-06, |
| "loss": 0.1664, |
| "step": 1899 |
| }, |
| { |
| "epoch": 3.889457523029683, |
| "grad_norm": 0.3315987651736212, |
| "learning_rate": 5.676956233712139e-06, |
| "loss": 0.2637, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.8915046059365404, |
| "grad_norm": 0.2855594139469349, |
| "learning_rate": 5.657001350648674e-06, |
| "loss": 0.207, |
| "step": 1901 |
| }, |
| { |
| "epoch": 3.8935516888433983, |
| "grad_norm": 0.3282465345015928, |
| "learning_rate": 5.6370758221034595e-06, |
| "loss": 0.2535, |
| "step": 1902 |
| }, |
| { |
| "epoch": 3.895598771750256, |
| "grad_norm": 0.30475560401896684, |
| "learning_rate": 5.617179688856271e-06, |
| "loss": 0.2432, |
| "step": 1903 |
| }, |
| { |
| "epoch": 3.8976458546571138, |
| "grad_norm": 0.30871123340361434, |
| "learning_rate": 5.597312991626713e-06, |
| "loss": 0.2134, |
| "step": 1904 |
| }, |
| { |
| "epoch": 3.8996929375639713, |
| "grad_norm": 0.33995724036296693, |
| "learning_rate": 5.577475771074168e-06, |
| "loss": 0.2485, |
| "step": 1905 |
| }, |
| { |
| "epoch": 3.901740020470829, |
| "grad_norm": 0.2811551664838514, |
| "learning_rate": 5.557668067797677e-06, |
| "loss": 0.2453, |
| "step": 1906 |
| }, |
| { |
| "epoch": 3.9037871033776868, |
| "grad_norm": 0.3085482770411968, |
| "learning_rate": 5.537889922335877e-06, |
| "loss": 0.2841, |
| "step": 1907 |
| }, |
| { |
| "epoch": 3.9058341862845447, |
| "grad_norm": 0.291552651807712, |
| "learning_rate": 5.5181413751669125e-06, |
| "loss": 0.2519, |
| "step": 1908 |
| }, |
| { |
| "epoch": 3.907881269191402, |
| "grad_norm": 0.301795002796971, |
| "learning_rate": 5.498422466708349e-06, |
| "loss": 0.2157, |
| "step": 1909 |
| }, |
| { |
| "epoch": 3.9099283520982597, |
| "grad_norm": 0.33010524936332025, |
| "learning_rate": 5.478733237317084e-06, |
| "loss": 0.2272, |
| "step": 1910 |
| }, |
| { |
| "epoch": 3.9119754350051177, |
| "grad_norm": 0.30279565810368547, |
| "learning_rate": 5.459073727289291e-06, |
| "loss": 0.2462, |
| "step": 1911 |
| }, |
| { |
| "epoch": 3.9140225179119756, |
| "grad_norm": 0.3180781810526128, |
| "learning_rate": 5.439443976860306e-06, |
| "loss": 0.2358, |
| "step": 1912 |
| }, |
| { |
| "epoch": 3.916069600818833, |
| "grad_norm": 0.3040038712809124, |
| "learning_rate": 5.419844026204568e-06, |
| "loss": 0.2266, |
| "step": 1913 |
| }, |
| { |
| "epoch": 3.9181166837256907, |
| "grad_norm": 0.31115682004986517, |
| "learning_rate": 5.400273915435526e-06, |
| "loss": 0.2706, |
| "step": 1914 |
| }, |
| { |
| "epoch": 3.9201637666325486, |
| "grad_norm": 0.2723072282278328, |
| "learning_rate": 5.38073368460555e-06, |
| "loss": 0.2172, |
| "step": 1915 |
| }, |
| { |
| "epoch": 3.9222108495394066, |
| "grad_norm": 0.3236989253933042, |
| "learning_rate": 5.361223373705873e-06, |
| "loss": 0.2671, |
| "step": 1916 |
| }, |
| { |
| "epoch": 3.924257932446264, |
| "grad_norm": 0.2949286813519759, |
| "learning_rate": 5.341743022666468e-06, |
| "loss": 0.2575, |
| "step": 1917 |
| }, |
| { |
| "epoch": 3.9263050153531216, |
| "grad_norm": 0.2929617720522718, |
| "learning_rate": 5.32229267135602e-06, |
| "loss": 0.2477, |
| "step": 1918 |
| }, |
| { |
| "epoch": 3.9283520982599796, |
| "grad_norm": 0.29944445931066044, |
| "learning_rate": 5.302872359581799e-06, |
| "loss": 0.3016, |
| "step": 1919 |
| }, |
| { |
| "epoch": 3.9303991811668375, |
| "grad_norm": 0.2792530693391496, |
| "learning_rate": 5.283482127089603e-06, |
| "loss": 0.2116, |
| "step": 1920 |
| }, |
| { |
| "epoch": 3.932446264073695, |
| "grad_norm": 0.3148045265972349, |
| "learning_rate": 5.2641220135636685e-06, |
| "loss": 0.251, |
| "step": 1921 |
| }, |
| { |
| "epoch": 3.9344933469805525, |
| "grad_norm": 0.29519836172160036, |
| "learning_rate": 5.244792058626587e-06, |
| "loss": 0.2379, |
| "step": 1922 |
| }, |
| { |
| "epoch": 3.9365404298874105, |
| "grad_norm": 0.27640580348684785, |
| "learning_rate": 5.2254923018392344e-06, |
| "loss": 0.2094, |
| "step": 1923 |
| }, |
| { |
| "epoch": 3.9385875127942684, |
| "grad_norm": 0.3216119033954627, |
| "learning_rate": 5.206222782700667e-06, |
| "loss": 0.2226, |
| "step": 1924 |
| }, |
| { |
| "epoch": 3.940634595701126, |
| "grad_norm": 0.28724430058767236, |
| "learning_rate": 5.186983540648074e-06, |
| "loss": 0.229, |
| "step": 1925 |
| }, |
| { |
| "epoch": 3.9426816786079835, |
| "grad_norm": 0.30838073757141554, |
| "learning_rate": 5.167774615056669e-06, |
| "loss": 0.2471, |
| "step": 1926 |
| }, |
| { |
| "epoch": 3.9447287615148414, |
| "grad_norm": 0.3050237811038075, |
| "learning_rate": 5.1485960452396266e-06, |
| "loss": 0.2516, |
| "step": 1927 |
| }, |
| { |
| "epoch": 3.946775844421699, |
| "grad_norm": 0.3269582811451654, |
| "learning_rate": 5.1294478704479896e-06, |
| "loss": 0.2757, |
| "step": 1928 |
| }, |
| { |
| "epoch": 3.948822927328557, |
| "grad_norm": 0.32559352766006666, |
| "learning_rate": 5.1103301298705995e-06, |
| "loss": 0.2234, |
| "step": 1929 |
| }, |
| { |
| "epoch": 3.9508700102354144, |
| "grad_norm": 0.3018696607635989, |
| "learning_rate": 5.091242862634e-06, |
| "loss": 0.2112, |
| "step": 1930 |
| }, |
| { |
| "epoch": 3.9529170931422724, |
| "grad_norm": 0.28807273157869273, |
| "learning_rate": 5.072186107802377e-06, |
| "loss": 0.2698, |
| "step": 1931 |
| }, |
| { |
| "epoch": 3.95496417604913, |
| "grad_norm": 0.324456956245341, |
| "learning_rate": 5.05315990437747e-06, |
| "loss": 0.2451, |
| "step": 1932 |
| }, |
| { |
| "epoch": 3.957011258955988, |
| "grad_norm": 0.314429989537044, |
| "learning_rate": 5.0341642912984844e-06, |
| "loss": 0.2311, |
| "step": 1933 |
| }, |
| { |
| "epoch": 3.9590583418628453, |
| "grad_norm": 0.2926963524885184, |
| "learning_rate": 5.015199307442027e-06, |
| "loss": 0.2418, |
| "step": 1934 |
| }, |
| { |
| "epoch": 3.9611054247697033, |
| "grad_norm": 0.30442791024762783, |
| "learning_rate": 4.996264991622015e-06, |
| "loss": 0.2513, |
| "step": 1935 |
| }, |
| { |
| "epoch": 3.963152507676561, |
| "grad_norm": 0.30348062095693085, |
| "learning_rate": 4.977361382589607e-06, |
| "loss": 0.217, |
| "step": 1936 |
| }, |
| { |
| "epoch": 3.9651995905834188, |
| "grad_norm": 0.2876460419928197, |
| "learning_rate": 4.958488519033096e-06, |
| "loss": 0.2143, |
| "step": 1937 |
| }, |
| { |
| "epoch": 3.9672466734902763, |
| "grad_norm": 0.3075426518738084, |
| "learning_rate": 4.939646439577868e-06, |
| "loss": 0.2275, |
| "step": 1938 |
| }, |
| { |
| "epoch": 3.969293756397134, |
| "grad_norm": 0.3060349427123557, |
| "learning_rate": 4.920835182786316e-06, |
| "loss": 0.2371, |
| "step": 1939 |
| }, |
| { |
| "epoch": 3.9713408393039917, |
| "grad_norm": 0.31280082710142615, |
| "learning_rate": 4.9020547871577265e-06, |
| "loss": 0.2234, |
| "step": 1940 |
| }, |
| { |
| "epoch": 3.9733879222108497, |
| "grad_norm": 0.3134880106721245, |
| "learning_rate": 4.8833052911282375e-06, |
| "loss": 0.2492, |
| "step": 1941 |
| }, |
| { |
| "epoch": 3.975435005117707, |
| "grad_norm": 0.2953871360194199, |
| "learning_rate": 4.864586733070755e-06, |
| "loss": 0.221, |
| "step": 1942 |
| }, |
| { |
| "epoch": 3.977482088024565, |
| "grad_norm": 0.31671009607399275, |
| "learning_rate": 4.845899151294848e-06, |
| "loss": 0.2335, |
| "step": 1943 |
| }, |
| { |
| "epoch": 3.9795291709314227, |
| "grad_norm": 0.2908681144449216, |
| "learning_rate": 4.827242584046698e-06, |
| "loss": 0.2767, |
| "step": 1944 |
| }, |
| { |
| "epoch": 3.9815762538382806, |
| "grad_norm": 0.2931420599842891, |
| "learning_rate": 4.808617069509034e-06, |
| "loss": 0.2475, |
| "step": 1945 |
| }, |
| { |
| "epoch": 3.983623336745138, |
| "grad_norm": 0.2955583206381109, |
| "learning_rate": 4.790022645800994e-06, |
| "loss": 0.2737, |
| "step": 1946 |
| }, |
| { |
| "epoch": 3.9856704196519956, |
| "grad_norm": 0.2873161139594403, |
| "learning_rate": 4.77145935097811e-06, |
| "loss": 0.2116, |
| "step": 1947 |
| }, |
| { |
| "epoch": 3.9877175025588536, |
| "grad_norm": 0.30419560839821447, |
| "learning_rate": 4.752927223032196e-06, |
| "loss": 0.2261, |
| "step": 1948 |
| }, |
| { |
| "epoch": 3.9897645854657116, |
| "grad_norm": 0.3199774705798064, |
| "learning_rate": 4.7344262998912885e-06, |
| "loss": 0.2478, |
| "step": 1949 |
| }, |
| { |
| "epoch": 3.991811668372569, |
| "grad_norm": 0.31331370811795206, |
| "learning_rate": 4.715956619419539e-06, |
| "loss": 0.2427, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.9938587512794266, |
| "grad_norm": 0.3035495734347721, |
| "learning_rate": 4.697518219417188e-06, |
| "loss": 0.2412, |
| "step": 1951 |
| }, |
| { |
| "epoch": 3.9959058341862845, |
| "grad_norm": 0.2835191629580971, |
| "learning_rate": 4.679111137620442e-06, |
| "loss": 0.2054, |
| "step": 1952 |
| }, |
| { |
| "epoch": 3.9979529170931425, |
| "grad_norm": 0.29245602269669696, |
| "learning_rate": 4.660735411701398e-06, |
| "loss": 0.2047, |
| "step": 1953 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.7074262659570464, |
| "learning_rate": 4.6423910792680005e-06, |
| "loss": 0.2856, |
| "step": 1954 |
| }, |
| { |
| "epoch": 4.0020470829068575, |
| "grad_norm": 0.4712597575605154, |
| "learning_rate": 4.62407817786394e-06, |
| "loss": 0.1814, |
| "step": 1955 |
| }, |
| { |
| "epoch": 4.004094165813715, |
| "grad_norm": 0.38151984418088236, |
| "learning_rate": 4.605796744968556e-06, |
| "loss": 0.1724, |
| "step": 1956 |
| }, |
| { |
| "epoch": 4.006141248720573, |
| "grad_norm": 0.34020967642434874, |
| "learning_rate": 4.587546817996826e-06, |
| "loss": 0.149, |
| "step": 1957 |
| }, |
| { |
| "epoch": 4.008188331627431, |
| "grad_norm": 0.31262949437875953, |
| "learning_rate": 4.56932843429922e-06, |
| "loss": 0.161, |
| "step": 1958 |
| }, |
| { |
| "epoch": 4.0102354145342884, |
| "grad_norm": 0.4303302435498056, |
| "learning_rate": 4.551141631161651e-06, |
| "loss": 0.1656, |
| "step": 1959 |
| }, |
| { |
| "epoch": 4.012282497441146, |
| "grad_norm": 0.4285520812331891, |
| "learning_rate": 4.532986445805405e-06, |
| "loss": 0.1627, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.014329580348004, |
| "grad_norm": 0.4192066755161973, |
| "learning_rate": 4.514862915387059e-06, |
| "loss": 0.1827, |
| "step": 1961 |
| }, |
| { |
| "epoch": 4.016376663254862, |
| "grad_norm": 0.3658567404594509, |
| "learning_rate": 4.496771076998405e-06, |
| "loss": 0.2017, |
| "step": 1962 |
| }, |
| { |
| "epoch": 4.018423746161719, |
| "grad_norm": 0.3317640866707838, |
| "learning_rate": 4.478710967666371e-06, |
| "loss": 0.1817, |
| "step": 1963 |
| }, |
| { |
| "epoch": 4.020470829068577, |
| "grad_norm": 0.38931119420729465, |
| "learning_rate": 4.460682624352952e-06, |
| "loss": 0.1582, |
| "step": 1964 |
| }, |
| { |
| "epoch": 4.022517911975435, |
| "grad_norm": 0.4025748777608708, |
| "learning_rate": 4.442686083955132e-06, |
| "loss": 0.1692, |
| "step": 1965 |
| }, |
| { |
| "epoch": 4.024564994882293, |
| "grad_norm": 0.3465330209700674, |
| "learning_rate": 4.424721383304791e-06, |
| "loss": 0.1623, |
| "step": 1966 |
| }, |
| { |
| "epoch": 4.02661207778915, |
| "grad_norm": 0.33474185562540504, |
| "learning_rate": 4.4067885591686625e-06, |
| "loss": 0.1629, |
| "step": 1967 |
| }, |
| { |
| "epoch": 4.028659160696008, |
| "grad_norm": 0.2994162000223648, |
| "learning_rate": 4.388887648248237e-06, |
| "loss": 0.1943, |
| "step": 1968 |
| }, |
| { |
| "epoch": 4.030706243602866, |
| "grad_norm": 0.30244570971558177, |
| "learning_rate": 4.371018687179689e-06, |
| "loss": 0.2009, |
| "step": 1969 |
| }, |
| { |
| "epoch": 4.032753326509724, |
| "grad_norm": 0.31048799941390864, |
| "learning_rate": 4.353181712533807e-06, |
| "loss": 0.1763, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.034800409416581, |
| "grad_norm": 0.31183964535945935, |
| "learning_rate": 4.3353767608159125e-06, |
| "loss": 0.1695, |
| "step": 1971 |
| }, |
| { |
| "epoch": 4.036847492323439, |
| "grad_norm": 0.31495339654929116, |
| "learning_rate": 4.317603868465794e-06, |
| "loss": 0.156, |
| "step": 1972 |
| }, |
| { |
| "epoch": 4.038894575230297, |
| "grad_norm": 0.2985528590711877, |
| "learning_rate": 4.299863071857617e-06, |
| "loss": 0.1687, |
| "step": 1973 |
| }, |
| { |
| "epoch": 4.040941658137155, |
| "grad_norm": 0.2898165046108191, |
| "learning_rate": 4.2821544072998655e-06, |
| "loss": 0.1689, |
| "step": 1974 |
| }, |
| { |
| "epoch": 4.042988741044012, |
| "grad_norm": 0.2806552847148077, |
| "learning_rate": 4.264477911035265e-06, |
| "loss": 0.1463, |
| "step": 1975 |
| }, |
| { |
| "epoch": 4.04503582395087, |
| "grad_norm": 0.2973917660709716, |
| "learning_rate": 4.246833619240702e-06, |
| "loss": 0.1452, |
| "step": 1976 |
| }, |
| { |
| "epoch": 4.047082906857728, |
| "grad_norm": 0.3137432130391123, |
| "learning_rate": 4.229221568027151e-06, |
| "loss": 0.1821, |
| "step": 1977 |
| }, |
| { |
| "epoch": 4.049129989764586, |
| "grad_norm": 0.29853330078810963, |
| "learning_rate": 4.211641793439609e-06, |
| "loss": 0.168, |
| "step": 1978 |
| }, |
| { |
| "epoch": 4.051177072671443, |
| "grad_norm": 0.3165013554197021, |
| "learning_rate": 4.194094331457004e-06, |
| "loss": 0.1753, |
| "step": 1979 |
| }, |
| { |
| "epoch": 4.053224155578301, |
| "grad_norm": 0.30677661034631387, |
| "learning_rate": 4.176579217992143e-06, |
| "loss": 0.1525, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.055271238485159, |
| "grad_norm": 0.29846779652611327, |
| "learning_rate": 4.159096488891623e-06, |
| "loss": 0.1559, |
| "step": 1981 |
| }, |
| { |
| "epoch": 4.0573183213920165, |
| "grad_norm": 0.3014892204744538, |
| "learning_rate": 4.1416461799357675e-06, |
| "loss": 0.1732, |
| "step": 1982 |
| }, |
| { |
| "epoch": 4.059365404298874, |
| "grad_norm": 0.29810838609292906, |
| "learning_rate": 4.124228326838544e-06, |
| "loss": 0.1442, |
| "step": 1983 |
| }, |
| { |
| "epoch": 4.061412487205732, |
| "grad_norm": 0.28754100275614525, |
| "learning_rate": 4.106842965247497e-06, |
| "loss": 0.1587, |
| "step": 1984 |
| }, |
| { |
| "epoch": 4.06345957011259, |
| "grad_norm": 0.28330676946462274, |
| "learning_rate": 4.0894901307436805e-06, |
| "loss": 0.1697, |
| "step": 1985 |
| }, |
| { |
| "epoch": 4.0655066530194475, |
| "grad_norm": 0.285301416935174, |
| "learning_rate": 4.072169858841561e-06, |
| "loss": 0.1595, |
| "step": 1986 |
| }, |
| { |
| "epoch": 4.067553735926305, |
| "grad_norm": 0.2837041098864574, |
| "learning_rate": 4.054882184988971e-06, |
| "loss": 0.1871, |
| "step": 1987 |
| }, |
| { |
| "epoch": 4.0696008188331625, |
| "grad_norm": 0.28867050594006555, |
| "learning_rate": 4.0376271445670465e-06, |
| "loss": 0.1805, |
| "step": 1988 |
| }, |
| { |
| "epoch": 4.071647901740021, |
| "grad_norm": 0.27159571765823515, |
| "learning_rate": 4.020404772890101e-06, |
| "loss": 0.1782, |
| "step": 1989 |
| }, |
| { |
| "epoch": 4.073694984646878, |
| "grad_norm": 0.29070033660022687, |
| "learning_rate": 4.003215105205613e-06, |
| "loss": 0.1971, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.075742067553736, |
| "grad_norm": 0.29431870068785426, |
| "learning_rate": 3.986058176694123e-06, |
| "loss": 0.1836, |
| "step": 1991 |
| }, |
| { |
| "epoch": 4.077789150460593, |
| "grad_norm": 0.30901872319472756, |
| "learning_rate": 3.968934022469157e-06, |
| "loss": 0.1615, |
| "step": 1992 |
| }, |
| { |
| "epoch": 4.079836233367452, |
| "grad_norm": 0.26872512623266737, |
| "learning_rate": 3.951842677577171e-06, |
| "loss": 0.1571, |
| "step": 1993 |
| }, |
| { |
| "epoch": 4.081883316274309, |
| "grad_norm": 0.277601430071484, |
| "learning_rate": 3.9347841769974925e-06, |
| "loss": 0.1993, |
| "step": 1994 |
| }, |
| { |
| "epoch": 4.083930399181167, |
| "grad_norm": 0.2815481603482528, |
| "learning_rate": 3.917758555642195e-06, |
| "loss": 0.1776, |
| "step": 1995 |
| }, |
| { |
| "epoch": 4.085977482088024, |
| "grad_norm": 0.31199877087236383, |
| "learning_rate": 3.900765848356083e-06, |
| "loss": 0.1807, |
| "step": 1996 |
| }, |
| { |
| "epoch": 4.088024564994882, |
| "grad_norm": 0.3049249771323251, |
| "learning_rate": 3.883806089916593e-06, |
| "loss": 0.1738, |
| "step": 1997 |
| }, |
| { |
| "epoch": 4.09007164790174, |
| "grad_norm": 0.29235847173840074, |
| "learning_rate": 3.866879315033738e-06, |
| "loss": 0.178, |
| "step": 1998 |
| }, |
| { |
| "epoch": 4.092118730808598, |
| "grad_norm": 0.2849050552270467, |
| "learning_rate": 3.849985558349998e-06, |
| "loss": 0.1679, |
| "step": 1999 |
| }, |
| { |
| "epoch": 4.094165813715455, |
| "grad_norm": 0.2977669293917896, |
| "learning_rate": 3.8331248544403135e-06, |
| "loss": 0.1556, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.096212896622313, |
| "grad_norm": 0.2927932662986805, |
| "learning_rate": 3.8162972378119635e-06, |
| "loss": 0.1894, |
| "step": 2001 |
| }, |
| { |
| "epoch": 4.098259979529171, |
| "grad_norm": 0.29328702976060467, |
| "learning_rate": 3.799502742904497e-06, |
| "loss": 0.173, |
| "step": 2002 |
| }, |
| { |
| "epoch": 4.100307062436029, |
| "grad_norm": 0.2969906241666511, |
| "learning_rate": 3.7827414040896958e-06, |
| "loss": 0.1812, |
| "step": 2003 |
| }, |
| { |
| "epoch": 4.102354145342886, |
| "grad_norm": 0.32948789795871786, |
| "learning_rate": 3.766013255671479e-06, |
| "loss": 0.1825, |
| "step": 2004 |
| }, |
| { |
| "epoch": 4.104401228249744, |
| "grad_norm": 0.28692228882067206, |
| "learning_rate": 3.749318331885825e-06, |
| "loss": 0.1654, |
| "step": 2005 |
| }, |
| { |
| "epoch": 4.106448311156602, |
| "grad_norm": 0.27400445663207174, |
| "learning_rate": 3.7326566669007268e-06, |
| "loss": 0.1913, |
| "step": 2006 |
| }, |
| { |
| "epoch": 4.10849539406346, |
| "grad_norm": 0.30606472444023525, |
| "learning_rate": 3.716028294816119e-06, |
| "loss": 0.1545, |
| "step": 2007 |
| }, |
| { |
| "epoch": 4.110542476970317, |
| "grad_norm": 0.3123611845587673, |
| "learning_rate": 3.699433249663775e-06, |
| "loss": 0.201, |
| "step": 2008 |
| }, |
| { |
| "epoch": 4.112589559877175, |
| "grad_norm": 0.31139724775462246, |
| "learning_rate": 3.6828715654072776e-06, |
| "loss": 0.1618, |
| "step": 2009 |
| }, |
| { |
| "epoch": 4.114636642784033, |
| "grad_norm": 0.29336426025625717, |
| "learning_rate": 3.666343275941926e-06, |
| "loss": 0.1903, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.116683725690891, |
| "grad_norm": 0.2893384058971807, |
| "learning_rate": 3.649848415094681e-06, |
| "loss": 0.17, |
| "step": 2011 |
| }, |
| { |
| "epoch": 4.118730808597748, |
| "grad_norm": 0.29125235390587384, |
| "learning_rate": 3.6333870166240703e-06, |
| "loss": 0.1996, |
| "step": 2012 |
| }, |
| { |
| "epoch": 4.120777891504606, |
| "grad_norm": 0.29871612249436, |
| "learning_rate": 3.616959114220162e-06, |
| "loss": 0.1985, |
| "step": 2013 |
| }, |
| { |
| "epoch": 4.122824974411464, |
| "grad_norm": 0.271555858016362, |
| "learning_rate": 3.60056474150446e-06, |
| "loss": 0.1818, |
| "step": 2014 |
| }, |
| { |
| "epoch": 4.1248720573183215, |
| "grad_norm": 0.3012910833873169, |
| "learning_rate": 3.5842039320298327e-06, |
| "loss": 0.1414, |
| "step": 2015 |
| }, |
| { |
| "epoch": 4.126919140225179, |
| "grad_norm": 0.27667654273766706, |
| "learning_rate": 3.5678767192804764e-06, |
| "loss": 0.1882, |
| "step": 2016 |
| }, |
| { |
| "epoch": 4.1289662231320365, |
| "grad_norm": 0.2869593416102505, |
| "learning_rate": 3.551583136671817e-06, |
| "loss": 0.1906, |
| "step": 2017 |
| }, |
| { |
| "epoch": 4.131013306038895, |
| "grad_norm": 0.2822576095654785, |
| "learning_rate": 3.5353232175504614e-06, |
| "loss": 0.1828, |
| "step": 2018 |
| }, |
| { |
| "epoch": 4.1330603889457525, |
| "grad_norm": 0.3121511458643644, |
| "learning_rate": 3.5190969951941113e-06, |
| "loss": 0.161, |
| "step": 2019 |
| }, |
| { |
| "epoch": 4.13510747185261, |
| "grad_norm": 0.2829822535321191, |
| "learning_rate": 3.5029045028115105e-06, |
| "loss": 0.1514, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.1371545547594675, |
| "grad_norm": 0.2837746021156371, |
| "learning_rate": 3.486745773542375e-06, |
| "loss": 0.1935, |
| "step": 2021 |
| }, |
| { |
| "epoch": 4.139201637666326, |
| "grad_norm": 0.3025896496647199, |
| "learning_rate": 3.470620840457304e-06, |
| "loss": 0.208, |
| "step": 2022 |
| }, |
| { |
| "epoch": 4.141248720573183, |
| "grad_norm": 0.28695976072627655, |
| "learning_rate": 3.4545297365577437e-06, |
| "loss": 0.18, |
| "step": 2023 |
| }, |
| { |
| "epoch": 4.143295803480041, |
| "grad_norm": 0.29084046593921437, |
| "learning_rate": 3.438472494775902e-06, |
| "loss": 0.1797, |
| "step": 2024 |
| }, |
| { |
| "epoch": 4.145342886386898, |
| "grad_norm": 0.2964669445537806, |
| "learning_rate": 3.4224491479746822e-06, |
| "loss": 0.2066, |
| "step": 2025 |
| }, |
| { |
| "epoch": 4.147389969293757, |
| "grad_norm": 0.2735467093182229, |
| "learning_rate": 3.406459728947622e-06, |
| "loss": 0.1805, |
| "step": 2026 |
| }, |
| { |
| "epoch": 4.149437052200614, |
| "grad_norm": 0.2883440394869004, |
| "learning_rate": 3.390504270418822e-06, |
| "loss": 0.1935, |
| "step": 2027 |
| }, |
| { |
| "epoch": 4.151484135107472, |
| "grad_norm": 0.28347253308933157, |
| "learning_rate": 3.3745828050428675e-06, |
| "loss": 0.2042, |
| "step": 2028 |
| }, |
| { |
| "epoch": 4.153531218014329, |
| "grad_norm": 0.287714374967884, |
| "learning_rate": 3.358695365404785e-06, |
| "loss": 0.1965, |
| "step": 2029 |
| }, |
| { |
| "epoch": 4.155578300921187, |
| "grad_norm": 0.29340681821156966, |
| "learning_rate": 3.3428419840199623e-06, |
| "loss": 0.1603, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.157625383828045, |
| "grad_norm": 0.2727874360906274, |
| "learning_rate": 3.327022693334083e-06, |
| "loss": 0.155, |
| "step": 2031 |
| }, |
| { |
| "epoch": 4.159672466734903, |
| "grad_norm": 0.30308908503638726, |
| "learning_rate": 3.3112375257230547e-06, |
| "loss": 0.1909, |
| "step": 2032 |
| }, |
| { |
| "epoch": 4.16171954964176, |
| "grad_norm": 0.2782743108107041, |
| "learning_rate": 3.295486513492954e-06, |
| "loss": 0.1912, |
| "step": 2033 |
| }, |
| { |
| "epoch": 4.163766632548619, |
| "grad_norm": 0.2960778023128933, |
| "learning_rate": 3.279769688879959e-06, |
| "loss": 0.1706, |
| "step": 2034 |
| }, |
| { |
| "epoch": 4.165813715455476, |
| "grad_norm": 0.2855041966453197, |
| "learning_rate": 3.2640870840502646e-06, |
| "loss": 0.1812, |
| "step": 2035 |
| }, |
| { |
| "epoch": 4.167860798362334, |
| "grad_norm": 0.2982604591341762, |
| "learning_rate": 3.2484387311000364e-06, |
| "loss": 0.1651, |
| "step": 2036 |
| }, |
| { |
| "epoch": 4.169907881269191, |
| "grad_norm": 0.2957570058273647, |
| "learning_rate": 3.2328246620553605e-06, |
| "loss": 0.1632, |
| "step": 2037 |
| }, |
| { |
| "epoch": 4.171954964176049, |
| "grad_norm": 0.2951616591774616, |
| "learning_rate": 3.2172449088721235e-06, |
| "loss": 0.1624, |
| "step": 2038 |
| }, |
| { |
| "epoch": 4.174002047082907, |
| "grad_norm": 0.2851463082934784, |
| "learning_rate": 3.2016995034360045e-06, |
| "loss": 0.1808, |
| "step": 2039 |
| }, |
| { |
| "epoch": 4.176049129989765, |
| "grad_norm": 0.2845084237530719, |
| "learning_rate": 3.186188477562382e-06, |
| "loss": 0.1786, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.178096212896622, |
| "grad_norm": 0.30650921370752987, |
| "learning_rate": 3.1707118629962607e-06, |
| "loss": 0.1472, |
| "step": 2041 |
| }, |
| { |
| "epoch": 4.18014329580348, |
| "grad_norm": 0.2777664159680818, |
| "learning_rate": 3.1552696914122327e-06, |
| "loss": 0.1531, |
| "step": 2042 |
| }, |
| { |
| "epoch": 4.182190378710338, |
| "grad_norm": 0.28828599604321864, |
| "learning_rate": 3.139861994414397e-06, |
| "loss": 0.1845, |
| "step": 2043 |
| }, |
| { |
| "epoch": 4.184237461617196, |
| "grad_norm": 0.30773699136825977, |
| "learning_rate": 3.1244888035362875e-06, |
| "loss": 0.1769, |
| "step": 2044 |
| }, |
| { |
| "epoch": 4.186284544524053, |
| "grad_norm": 0.3155552426235748, |
| "learning_rate": 3.1091501502408293e-06, |
| "loss": 0.1643, |
| "step": 2045 |
| }, |
| { |
| "epoch": 4.188331627430911, |
| "grad_norm": 0.2898415631190067, |
| "learning_rate": 3.093846065920254e-06, |
| "loss": 0.168, |
| "step": 2046 |
| }, |
| { |
| "epoch": 4.190378710337769, |
| "grad_norm": 0.3080290062858047, |
| "learning_rate": 3.0785765818960534e-06, |
| "loss": 0.215, |
| "step": 2047 |
| }, |
| { |
| "epoch": 4.1924257932446265, |
| "grad_norm": 0.26531974258516, |
| "learning_rate": 3.0633417294188896e-06, |
| "loss": 0.1902, |
| "step": 2048 |
| }, |
| { |
| "epoch": 4.194472876151484, |
| "grad_norm": 0.29101494010309975, |
| "learning_rate": 3.0481415396685564e-06, |
| "loss": 0.1851, |
| "step": 2049 |
| }, |
| { |
| "epoch": 4.1965199590583415, |
| "grad_norm": 0.2805440252695212, |
| "learning_rate": 3.0329760437539233e-06, |
| "loss": 0.2106, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.1985670419652, |
| "grad_norm": 0.2954059617809206, |
| "learning_rate": 3.017845272712825e-06, |
| "loss": 0.178, |
| "step": 2051 |
| }, |
| { |
| "epoch": 4.200614124872057, |
| "grad_norm": 0.2750668596805512, |
| "learning_rate": 3.0027492575120453e-06, |
| "loss": 0.1639, |
| "step": 2052 |
| }, |
| { |
| "epoch": 4.202661207778915, |
| "grad_norm": 0.2954652665384484, |
| "learning_rate": 2.9876880290472376e-06, |
| "loss": 0.1654, |
| "step": 2053 |
| }, |
| { |
| "epoch": 4.2047082906857725, |
| "grad_norm": 0.2844611490184932, |
| "learning_rate": 2.9726616181428515e-06, |
| "loss": 0.1824, |
| "step": 2054 |
| }, |
| { |
| "epoch": 4.206755373592631, |
| "grad_norm": 0.28973919117898683, |
| "learning_rate": 2.957670055552078e-06, |
| "loss": 0.1785, |
| "step": 2055 |
| }, |
| { |
| "epoch": 4.208802456499488, |
| "grad_norm": 0.3065738121168008, |
| "learning_rate": 2.942713371956809e-06, |
| "loss": 0.1466, |
| "step": 2056 |
| }, |
| { |
| "epoch": 4.210849539406346, |
| "grad_norm": 0.27595990092382205, |
| "learning_rate": 2.927791597967522e-06, |
| "loss": 0.1674, |
| "step": 2057 |
| }, |
| { |
| "epoch": 4.212896622313203, |
| "grad_norm": 0.2787886162407018, |
| "learning_rate": 2.9129047641232653e-06, |
| "loss": 0.1739, |
| "step": 2058 |
| }, |
| { |
| "epoch": 4.214943705220062, |
| "grad_norm": 0.2835601286496118, |
| "learning_rate": 2.8980529008915793e-06, |
| "loss": 0.1851, |
| "step": 2059 |
| }, |
| { |
| "epoch": 4.216990788126919, |
| "grad_norm": 0.2734235721997136, |
| "learning_rate": 2.8832360386684287e-06, |
| "loss": 0.1894, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.219037871033777, |
| "grad_norm": 0.313282561765329, |
| "learning_rate": 2.8684542077781376e-06, |
| "loss": 0.1844, |
| "step": 2061 |
| }, |
| { |
| "epoch": 4.221084953940634, |
| "grad_norm": 0.29657107157085116, |
| "learning_rate": 2.853707438473352e-06, |
| "loss": 0.1861, |
| "step": 2062 |
| }, |
| { |
| "epoch": 4.223132036847493, |
| "grad_norm": 0.2835879428086791, |
| "learning_rate": 2.838995760934953e-06, |
| "loss": 0.1992, |
| "step": 2063 |
| }, |
| { |
| "epoch": 4.22517911975435, |
| "grad_norm": 0.28406764855143096, |
| "learning_rate": 2.8243192052719902e-06, |
| "loss": 0.1743, |
| "step": 2064 |
| }, |
| { |
| "epoch": 4.227226202661208, |
| "grad_norm": 0.2735358759641007, |
| "learning_rate": 2.8096778015216484e-06, |
| "loss": 0.1663, |
| "step": 2065 |
| }, |
| { |
| "epoch": 4.229273285568065, |
| "grad_norm": 0.28994987108300085, |
| "learning_rate": 2.7950715796491623e-06, |
| "loss": 0.1693, |
| "step": 2066 |
| }, |
| { |
| "epoch": 4.231320368474924, |
| "grad_norm": 0.2918242884998421, |
| "learning_rate": 2.7805005695477704e-06, |
| "loss": 0.1659, |
| "step": 2067 |
| }, |
| { |
| "epoch": 4.233367451381781, |
| "grad_norm": 0.2914555807372611, |
| "learning_rate": 2.7659648010386365e-06, |
| "loss": 0.2082, |
| "step": 2068 |
| }, |
| { |
| "epoch": 4.235414534288639, |
| "grad_norm": 0.31049832846223957, |
| "learning_rate": 2.75146430387081e-06, |
| "loss": 0.1745, |
| "step": 2069 |
| }, |
| { |
| "epoch": 4.237461617195496, |
| "grad_norm": 0.2786106859690272, |
| "learning_rate": 2.736999107721137e-06, |
| "loss": 0.1689, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.239508700102354, |
| "grad_norm": 0.2813229103286878, |
| "learning_rate": 2.7225692421942306e-06, |
| "loss": 0.1938, |
| "step": 2071 |
| }, |
| { |
| "epoch": 4.241555783009212, |
| "grad_norm": 0.2911201694179184, |
| "learning_rate": 2.7081747368223953e-06, |
| "loss": 0.1835, |
| "step": 2072 |
| }, |
| { |
| "epoch": 4.24360286591607, |
| "grad_norm": 0.29816674195659204, |
| "learning_rate": 2.6938156210655584e-06, |
| "loss": 0.1842, |
| "step": 2073 |
| }, |
| { |
| "epoch": 4.245649948822927, |
| "grad_norm": 0.2996092573579164, |
| "learning_rate": 2.679491924311226e-06, |
| "loss": 0.1847, |
| "step": 2074 |
| }, |
| { |
| "epoch": 4.247697031729785, |
| "grad_norm": 0.2895248563688147, |
| "learning_rate": 2.6652036758744148e-06, |
| "loss": 0.1827, |
| "step": 2075 |
| }, |
| { |
| "epoch": 4.249744114636643, |
| "grad_norm": 0.28226805129886656, |
| "learning_rate": 2.6509509049975913e-06, |
| "loss": 0.1765, |
| "step": 2076 |
| }, |
| { |
| "epoch": 4.2517911975435005, |
| "grad_norm": 0.2949640489147984, |
| "learning_rate": 2.6367336408506063e-06, |
| "loss": 0.1705, |
| "step": 2077 |
| }, |
| { |
| "epoch": 4.253838280450358, |
| "grad_norm": 0.2753720710190184, |
| "learning_rate": 2.622551912530653e-06, |
| "loss": 0.186, |
| "step": 2078 |
| }, |
| { |
| "epoch": 4.255885363357216, |
| "grad_norm": 0.2936131556811062, |
| "learning_rate": 2.608405749062193e-06, |
| "loss": 0.1855, |
| "step": 2079 |
| }, |
| { |
| "epoch": 4.257932446264074, |
| "grad_norm": 0.28222624495514065, |
| "learning_rate": 2.594295179396895e-06, |
| "loss": 0.1743, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.2599795291709315, |
| "grad_norm": 0.29367758291953516, |
| "learning_rate": 2.5802202324135926e-06, |
| "loss": 0.1926, |
| "step": 2081 |
| }, |
| { |
| "epoch": 4.262026612077789, |
| "grad_norm": 0.29329088174637774, |
| "learning_rate": 2.566180936918203e-06, |
| "loss": 0.1985, |
| "step": 2082 |
| }, |
| { |
| "epoch": 4.2640736949846465, |
| "grad_norm": 0.27495980250181845, |
| "learning_rate": 2.5521773216436875e-06, |
| "loss": 0.1694, |
| "step": 2083 |
| }, |
| { |
| "epoch": 4.266120777891505, |
| "grad_norm": 0.2745597317735457, |
| "learning_rate": 2.5382094152499705e-06, |
| "loss": 0.1623, |
| "step": 2084 |
| }, |
| { |
| "epoch": 4.268167860798362, |
| "grad_norm": 0.2860214870247153, |
| "learning_rate": 2.5242772463239075e-06, |
| "loss": 0.1759, |
| "step": 2085 |
| }, |
| { |
| "epoch": 4.27021494370522, |
| "grad_norm": 0.2829523123556956, |
| "learning_rate": 2.5103808433792075e-06, |
| "loss": 0.1953, |
| "step": 2086 |
| }, |
| { |
| "epoch": 4.272262026612077, |
| "grad_norm": 0.29162058098697563, |
| "learning_rate": 2.4965202348563834e-06, |
| "loss": 0.1851, |
| "step": 2087 |
| }, |
| { |
| "epoch": 4.274309109518936, |
| "grad_norm": 0.28401444187787156, |
| "learning_rate": 2.4826954491226875e-06, |
| "loss": 0.1625, |
| "step": 2088 |
| }, |
| { |
| "epoch": 4.276356192425793, |
| "grad_norm": 0.29640280698764426, |
| "learning_rate": 2.468906514472065e-06, |
| "loss": 0.1733, |
| "step": 2089 |
| }, |
| { |
| "epoch": 4.278403275332651, |
| "grad_norm": 0.28541928385296134, |
| "learning_rate": 2.4551534591250725e-06, |
| "loss": 0.2083, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.280450358239508, |
| "grad_norm": 0.2927844886794109, |
| "learning_rate": 2.4414363112288464e-06, |
| "loss": 0.1518, |
| "step": 2091 |
| }, |
| { |
| "epoch": 4.282497441146367, |
| "grad_norm": 0.29701394558657035, |
| "learning_rate": 2.4277550988570362e-06, |
| "loss": 0.1621, |
| "step": 2092 |
| }, |
| { |
| "epoch": 4.284544524053224, |
| "grad_norm": 0.290771795085261, |
| "learning_rate": 2.4141098500097403e-06, |
| "loss": 0.1648, |
| "step": 2093 |
| }, |
| { |
| "epoch": 4.286591606960082, |
| "grad_norm": 0.27774161175473394, |
| "learning_rate": 2.400500592613455e-06, |
| "loss": 0.1711, |
| "step": 2094 |
| }, |
| { |
| "epoch": 4.288638689866939, |
| "grad_norm": 0.30883225453627294, |
| "learning_rate": 2.3869273545210158e-06, |
| "loss": 0.134, |
| "step": 2095 |
| }, |
| { |
| "epoch": 4.290685772773798, |
| "grad_norm": 0.2797146271613822, |
| "learning_rate": 2.3733901635115486e-06, |
| "loss": 0.1692, |
| "step": 2096 |
| }, |
| { |
| "epoch": 4.292732855680655, |
| "grad_norm": 0.2969804226131455, |
| "learning_rate": 2.359889047290389e-06, |
| "loss": 0.1671, |
| "step": 2097 |
| }, |
| { |
| "epoch": 4.294779938587513, |
| "grad_norm": 0.3217916741414342, |
| "learning_rate": 2.3464240334890496e-06, |
| "loss": 0.1575, |
| "step": 2098 |
| }, |
| { |
| "epoch": 4.29682702149437, |
| "grad_norm": 0.27919309597905084, |
| "learning_rate": 2.332995149665169e-06, |
| "loss": 0.2121, |
| "step": 2099 |
| }, |
| { |
| "epoch": 4.298874104401229, |
| "grad_norm": 0.2800459009067448, |
| "learning_rate": 2.3196024233024185e-06, |
| "loss": 0.1837, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.300921187308086, |
| "grad_norm": 0.2671979507192257, |
| "learning_rate": 2.3062458818104804e-06, |
| "loss": 0.1952, |
| "step": 2101 |
| }, |
| { |
| "epoch": 4.302968270214944, |
| "grad_norm": 0.28262802438305323, |
| "learning_rate": 2.2929255525249894e-06, |
| "loss": 0.1713, |
| "step": 2102 |
| }, |
| { |
| "epoch": 4.305015353121801, |
| "grad_norm": 0.3005757045715249, |
| "learning_rate": 2.279641462707445e-06, |
| "loss": 0.1668, |
| "step": 2103 |
| }, |
| { |
| "epoch": 4.30706243602866, |
| "grad_norm": 0.286800196637234, |
| "learning_rate": 2.266393639545197e-06, |
| "loss": 0.1896, |
| "step": 2104 |
| }, |
| { |
| "epoch": 4.309109518935517, |
| "grad_norm": 0.2833256809700336, |
| "learning_rate": 2.2531821101513796e-06, |
| "loss": 0.1417, |
| "step": 2105 |
| }, |
| { |
| "epoch": 4.311156601842375, |
| "grad_norm": 0.31197523215491646, |
| "learning_rate": 2.2400069015648173e-06, |
| "loss": 0.1952, |
| "step": 2106 |
| }, |
| { |
| "epoch": 4.313203684749232, |
| "grad_norm": 0.2678003946049261, |
| "learning_rate": 2.22686804075003e-06, |
| "loss": 0.1807, |
| "step": 2107 |
| }, |
| { |
| "epoch": 4.3152507676560905, |
| "grad_norm": 0.30659909160474746, |
| "learning_rate": 2.213765554597129e-06, |
| "loss": 0.1873, |
| "step": 2108 |
| }, |
| { |
| "epoch": 4.317297850562948, |
| "grad_norm": 0.27320285898761, |
| "learning_rate": 2.2006994699217963e-06, |
| "loss": 0.1783, |
| "step": 2109 |
| }, |
| { |
| "epoch": 4.3193449334698055, |
| "grad_norm": 0.2952250752859734, |
| "learning_rate": 2.187669813465192e-06, |
| "loss": 0.1666, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.321392016376663, |
| "grad_norm": 0.2648240745330133, |
| "learning_rate": 2.174676611893947e-06, |
| "loss": 0.1671, |
| "step": 2111 |
| }, |
| { |
| "epoch": 4.3234390992835205, |
| "grad_norm": 0.2811036034280304, |
| "learning_rate": 2.1617198918000737e-06, |
| "loss": 0.1765, |
| "step": 2112 |
| }, |
| { |
| "epoch": 4.325486182190379, |
| "grad_norm": 0.29793182339880603, |
| "learning_rate": 2.1487996797009103e-06, |
| "loss": 0.167, |
| "step": 2113 |
| }, |
| { |
| "epoch": 4.3275332650972365, |
| "grad_norm": 0.2752900542965174, |
| "learning_rate": 2.135916002039089e-06, |
| "loss": 0.1821, |
| "step": 2114 |
| }, |
| { |
| "epoch": 4.329580348004094, |
| "grad_norm": 0.27908851781361865, |
| "learning_rate": 2.123068885182471e-06, |
| "loss": 0.1875, |
| "step": 2115 |
| }, |
| { |
| "epoch": 4.3316274309109515, |
| "grad_norm": 0.30369045741991585, |
| "learning_rate": 2.110258355424093e-06, |
| "loss": 0.1565, |
| "step": 2116 |
| }, |
| { |
| "epoch": 4.33367451381781, |
| "grad_norm": 0.27387260120165463, |
| "learning_rate": 2.0974844389820914e-06, |
| "loss": 0.2037, |
| "step": 2117 |
| }, |
| { |
| "epoch": 4.335721596724667, |
| "grad_norm": 0.2695259971121429, |
| "learning_rate": 2.084747161999703e-06, |
| "loss": 0.1883, |
| "step": 2118 |
| }, |
| { |
| "epoch": 4.337768679631525, |
| "grad_norm": 0.27646790728531134, |
| "learning_rate": 2.0720465505451524e-06, |
| "loss": 0.1674, |
| "step": 2119 |
| }, |
| { |
| "epoch": 4.339815762538382, |
| "grad_norm": 0.27363984511519157, |
| "learning_rate": 2.0593826306116328e-06, |
| "loss": 0.1987, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.341862845445241, |
| "grad_norm": 0.3058041319945061, |
| "learning_rate": 2.0467554281172443e-06, |
| "loss": 0.1668, |
| "step": 2121 |
| }, |
| { |
| "epoch": 4.343909928352098, |
| "grad_norm": 0.28711732195110085, |
| "learning_rate": 2.0341649689049458e-06, |
| "loss": 0.1589, |
| "step": 2122 |
| }, |
| { |
| "epoch": 4.345957011258956, |
| "grad_norm": 0.29676194594555955, |
| "learning_rate": 2.021611278742479e-06, |
| "loss": 0.2006, |
| "step": 2123 |
| }, |
| { |
| "epoch": 4.348004094165813, |
| "grad_norm": 0.2894509415136224, |
| "learning_rate": 2.009094383322356e-06, |
| "loss": 0.1759, |
| "step": 2124 |
| }, |
| { |
| "epoch": 4.350051177072672, |
| "grad_norm": 0.27051010203429876, |
| "learning_rate": 1.9966143082617797e-06, |
| "loss": 0.2051, |
| "step": 2125 |
| }, |
| { |
| "epoch": 4.352098259979529, |
| "grad_norm": 0.29097733973563933, |
| "learning_rate": 1.9841710791025793e-06, |
| "loss": 0.1718, |
| "step": 2126 |
| }, |
| { |
| "epoch": 4.354145342886387, |
| "grad_norm": 0.321019324398248, |
| "learning_rate": 1.971764721311191e-06, |
| "loss": 0.2109, |
| "step": 2127 |
| }, |
| { |
| "epoch": 4.356192425793244, |
| "grad_norm": 0.30390810271040936, |
| "learning_rate": 1.959395260278587e-06, |
| "loss": 0.1489, |
| "step": 2128 |
| }, |
| { |
| "epoch": 4.358239508700103, |
| "grad_norm": 0.29799569461187914, |
| "learning_rate": 1.947062721320221e-06, |
| "loss": 0.1661, |
| "step": 2129 |
| }, |
| { |
| "epoch": 4.36028659160696, |
| "grad_norm": 0.2832110200623686, |
| "learning_rate": 1.9347671296759896e-06, |
| "loss": 0.1714, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.362333674513818, |
| "grad_norm": 0.2951503600181551, |
| "learning_rate": 1.922508510510166e-06, |
| "loss": 0.157, |
| "step": 2131 |
| }, |
| { |
| "epoch": 4.364380757420675, |
| "grad_norm": 0.28017904966905216, |
| "learning_rate": 1.9102868889113613e-06, |
| "loss": 0.1822, |
| "step": 2132 |
| }, |
| { |
| "epoch": 4.366427840327534, |
| "grad_norm": 0.27123684319205094, |
| "learning_rate": 1.8981022898924562e-06, |
| "loss": 0.2035, |
| "step": 2133 |
| }, |
| { |
| "epoch": 4.368474923234391, |
| "grad_norm": 0.3005307169297444, |
| "learning_rate": 1.885954738390572e-06, |
| "loss": 0.178, |
| "step": 2134 |
| }, |
| { |
| "epoch": 4.370522006141249, |
| "grad_norm": 0.30373485662664135, |
| "learning_rate": 1.8738442592670014e-06, |
| "loss": 0.1791, |
| "step": 2135 |
| }, |
| { |
| "epoch": 4.372569089048106, |
| "grad_norm": 0.29560692277511486, |
| "learning_rate": 1.8617708773071698e-06, |
| "loss": 0.1702, |
| "step": 2136 |
| }, |
| { |
| "epoch": 4.3746161719549645, |
| "grad_norm": 0.2799036916467131, |
| "learning_rate": 1.8497346172205733e-06, |
| "loss": 0.1757, |
| "step": 2137 |
| }, |
| { |
| "epoch": 4.376663254861822, |
| "grad_norm": 0.2914915460350074, |
| "learning_rate": 1.8377355036407408e-06, |
| "loss": 0.1537, |
| "step": 2138 |
| }, |
| { |
| "epoch": 4.37871033776868, |
| "grad_norm": 0.28848134194988, |
| "learning_rate": 1.8257735611251704e-06, |
| "loss": 0.1794, |
| "step": 2139 |
| }, |
| { |
| "epoch": 4.380757420675537, |
| "grad_norm": 0.2663575035541803, |
| "learning_rate": 1.8138488141552856e-06, |
| "loss": 0.1895, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.3828045035823955, |
| "grad_norm": 0.28041126513615366, |
| "learning_rate": 1.801961287136391e-06, |
| "loss": 0.1705, |
| "step": 2141 |
| }, |
| { |
| "epoch": 4.384851586489253, |
| "grad_norm": 0.2878226194974552, |
| "learning_rate": 1.7901110043976122e-06, |
| "loss": 0.1869, |
| "step": 2142 |
| }, |
| { |
| "epoch": 4.3868986693961105, |
| "grad_norm": 0.2845987197363273, |
| "learning_rate": 1.7782979901918507e-06, |
| "loss": 0.1822, |
| "step": 2143 |
| }, |
| { |
| "epoch": 4.388945752302968, |
| "grad_norm": 0.2750503273666346, |
| "learning_rate": 1.7665222686957362e-06, |
| "loss": 0.1778, |
| "step": 2144 |
| }, |
| { |
| "epoch": 4.3909928352098255, |
| "grad_norm": 0.3136573248950073, |
| "learning_rate": 1.754783864009575e-06, |
| "loss": 0.1569, |
| "step": 2145 |
| }, |
| { |
| "epoch": 4.393039918116684, |
| "grad_norm": 0.2957114276760941, |
| "learning_rate": 1.7430828001572897e-06, |
| "loss": 0.2259, |
| "step": 2146 |
| }, |
| { |
| "epoch": 4.395087001023541, |
| "grad_norm": 0.306095495611476, |
| "learning_rate": 1.7314191010863933e-06, |
| "loss": 0.2185, |
| "step": 2147 |
| }, |
| { |
| "epoch": 4.397134083930399, |
| "grad_norm": 0.2870140982518843, |
| "learning_rate": 1.7197927906679335e-06, |
| "loss": 0.2054, |
| "step": 2148 |
| }, |
| { |
| "epoch": 4.399181166837257, |
| "grad_norm": 0.2679693208920003, |
| "learning_rate": 1.7082038926964162e-06, |
| "loss": 0.1553, |
| "step": 2149 |
| }, |
| { |
| "epoch": 4.401228249744115, |
| "grad_norm": 0.30186153363388674, |
| "learning_rate": 1.6966524308897935e-06, |
| "loss": 0.1927, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.403275332650972, |
| "grad_norm": 0.2784578317629372, |
| "learning_rate": 1.6851384288894058e-06, |
| "loss": 0.1776, |
| "step": 2151 |
| }, |
| { |
| "epoch": 4.40532241555783, |
| "grad_norm": 0.2843049715212868, |
| "learning_rate": 1.6736619102599073e-06, |
| "loss": 0.1672, |
| "step": 2152 |
| }, |
| { |
| "epoch": 4.407369498464687, |
| "grad_norm": 0.31694116691154817, |
| "learning_rate": 1.6622228984892585e-06, |
| "loss": 0.1539, |
| "step": 2153 |
| }, |
| { |
| "epoch": 4.409416581371546, |
| "grad_norm": 0.26686752025138655, |
| "learning_rate": 1.6508214169886483e-06, |
| "loss": 0.1754, |
| "step": 2154 |
| }, |
| { |
| "epoch": 4.411463664278403, |
| "grad_norm": 0.2654670971586521, |
| "learning_rate": 1.6394574890924574e-06, |
| "loss": 0.2013, |
| "step": 2155 |
| }, |
| { |
| "epoch": 4.413510747185261, |
| "grad_norm": 0.29492009598586416, |
| "learning_rate": 1.6281311380582087e-06, |
| "loss": 0.2082, |
| "step": 2156 |
| }, |
| { |
| "epoch": 4.415557830092118, |
| "grad_norm": 0.30045313874805496, |
| "learning_rate": 1.616842387066524e-06, |
| "loss": 0.1767, |
| "step": 2157 |
| }, |
| { |
| "epoch": 4.417604912998977, |
| "grad_norm": 0.2910283873769807, |
| "learning_rate": 1.605591259221071e-06, |
| "loss": 0.1766, |
| "step": 2158 |
| }, |
| { |
| "epoch": 4.419651995905834, |
| "grad_norm": 0.28823745158894704, |
| "learning_rate": 1.5943777775485058e-06, |
| "loss": 0.1868, |
| "step": 2159 |
| }, |
| { |
| "epoch": 4.421699078812692, |
| "grad_norm": 0.26486227426645287, |
| "learning_rate": 1.583201964998451e-06, |
| "loss": 0.2016, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.423746161719549, |
| "grad_norm": 0.28343110323179144, |
| "learning_rate": 1.572063844443441e-06, |
| "loss": 0.1855, |
| "step": 2161 |
| }, |
| { |
| "epoch": 4.425793244626408, |
| "grad_norm": 0.2863936086901088, |
| "learning_rate": 1.5609634386788485e-06, |
| "loss": 0.1952, |
| "step": 2162 |
| }, |
| { |
| "epoch": 4.427840327533265, |
| "grad_norm": 0.29827769122199016, |
| "learning_rate": 1.5499007704228742e-06, |
| "loss": 0.1679, |
| "step": 2163 |
| }, |
| { |
| "epoch": 4.429887410440123, |
| "grad_norm": 0.2492563784192527, |
| "learning_rate": 1.5388758623164802e-06, |
| "loss": 0.1679, |
| "step": 2164 |
| }, |
| { |
| "epoch": 4.43193449334698, |
| "grad_norm": 0.2802794951076192, |
| "learning_rate": 1.5278887369233509e-06, |
| "loss": 0.1792, |
| "step": 2165 |
| }, |
| { |
| "epoch": 4.433981576253839, |
| "grad_norm": 0.2859554141734814, |
| "learning_rate": 1.5169394167298367e-06, |
| "loss": 0.1771, |
| "step": 2166 |
| }, |
| { |
| "epoch": 4.436028659160696, |
| "grad_norm": 0.3090415351876153, |
| "learning_rate": 1.5060279241449304e-06, |
| "loss": 0.1803, |
| "step": 2167 |
| }, |
| { |
| "epoch": 4.438075742067554, |
| "grad_norm": 0.2838386004821354, |
| "learning_rate": 1.4951542815001886e-06, |
| "loss": 0.159, |
| "step": 2168 |
| }, |
| { |
| "epoch": 4.440122824974411, |
| "grad_norm": 0.28615497006671264, |
| "learning_rate": 1.4843185110497139e-06, |
| "loss": 0.1654, |
| "step": 2169 |
| }, |
| { |
| "epoch": 4.4421699078812695, |
| "grad_norm": 0.2828928258486327, |
| "learning_rate": 1.4735206349701003e-06, |
| "loss": 0.166, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.444216990788127, |
| "grad_norm": 0.294693410920466, |
| "learning_rate": 1.4627606753603886e-06, |
| "loss": 0.1708, |
| "step": 2171 |
| }, |
| { |
| "epoch": 4.4462640736949846, |
| "grad_norm": 0.28832385318223086, |
| "learning_rate": 1.4520386542420006e-06, |
| "loss": 0.173, |
| "step": 2172 |
| }, |
| { |
| "epoch": 4.448311156601842, |
| "grad_norm": 0.2750702995509294, |
| "learning_rate": 1.4413545935587415e-06, |
| "loss": 0.1612, |
| "step": 2173 |
| }, |
| { |
| "epoch": 4.4503582395087005, |
| "grad_norm": 0.2827730533019191, |
| "learning_rate": 1.4307085151767086e-06, |
| "loss": 0.1568, |
| "step": 2174 |
| }, |
| { |
| "epoch": 4.452405322415558, |
| "grad_norm": 0.2724089303161446, |
| "learning_rate": 1.4201004408842644e-06, |
| "loss": 0.1577, |
| "step": 2175 |
| }, |
| { |
| "epoch": 4.4544524053224155, |
| "grad_norm": 0.27196453176116103, |
| "learning_rate": 1.4095303923919956e-06, |
| "loss": 0.1773, |
| "step": 2176 |
| }, |
| { |
| "epoch": 4.456499488229273, |
| "grad_norm": 0.3178765316382206, |
| "learning_rate": 1.3989983913326665e-06, |
| "loss": 0.159, |
| "step": 2177 |
| }, |
| { |
| "epoch": 4.458546571136131, |
| "grad_norm": 0.29327822729767095, |
| "learning_rate": 1.3885044592611706e-06, |
| "loss": 0.1431, |
| "step": 2178 |
| }, |
| { |
| "epoch": 4.460593654042989, |
| "grad_norm": 0.29769593027711694, |
| "learning_rate": 1.3780486176544905e-06, |
| "loss": 0.1985, |
| "step": 2179 |
| }, |
| { |
| "epoch": 4.462640736949846, |
| "grad_norm": 0.2844568268296535, |
| "learning_rate": 1.3676308879116507e-06, |
| "loss": 0.1652, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.464687819856704, |
| "grad_norm": 0.2824834673707818, |
| "learning_rate": 1.3572512913536783e-06, |
| "loss": 0.1957, |
| "step": 2181 |
| }, |
| { |
| "epoch": 4.466734902763562, |
| "grad_norm": 0.2944499941869074, |
| "learning_rate": 1.3469098492235521e-06, |
| "loss": 0.1663, |
| "step": 2182 |
| }, |
| { |
| "epoch": 4.46878198567042, |
| "grad_norm": 0.2860326191078485, |
| "learning_rate": 1.3366065826861685e-06, |
| "loss": 0.1508, |
| "step": 2183 |
| }, |
| { |
| "epoch": 4.470829068577277, |
| "grad_norm": 0.29391486499526526, |
| "learning_rate": 1.3263415128282908e-06, |
| "loss": 0.1643, |
| "step": 2184 |
| }, |
| { |
| "epoch": 4.472876151484135, |
| "grad_norm": 0.28228277616536795, |
| "learning_rate": 1.316114660658505e-06, |
| "loss": 0.165, |
| "step": 2185 |
| }, |
| { |
| "epoch": 4.474923234390992, |
| "grad_norm": 0.28104222228106285, |
| "learning_rate": 1.305926047107191e-06, |
| "loss": 0.1787, |
| "step": 2186 |
| }, |
| { |
| "epoch": 4.476970317297851, |
| "grad_norm": 0.3067397142838367, |
| "learning_rate": 1.2957756930264642e-06, |
| "loss": 0.1708, |
| "step": 2187 |
| }, |
| { |
| "epoch": 4.479017400204708, |
| "grad_norm": 0.287797881234288, |
| "learning_rate": 1.2856636191901296e-06, |
| "loss": 0.1778, |
| "step": 2188 |
| }, |
| { |
| "epoch": 4.481064483111566, |
| "grad_norm": 0.29131274478787034, |
| "learning_rate": 1.2755898462936544e-06, |
| "loss": 0.1754, |
| "step": 2189 |
| }, |
| { |
| "epoch": 4.483111566018424, |
| "grad_norm": 0.2774665709211303, |
| "learning_rate": 1.265554394954125e-06, |
| "loss": 0.1702, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.485158648925282, |
| "grad_norm": 0.2895032215404063, |
| "learning_rate": 1.255557285710185e-06, |
| "loss": 0.1572, |
| "step": 2191 |
| }, |
| { |
| "epoch": 4.487205731832139, |
| "grad_norm": 0.29001265695476014, |
| "learning_rate": 1.2455985390220193e-06, |
| "loss": 0.2107, |
| "step": 2192 |
| }, |
| { |
| "epoch": 4.489252814738997, |
| "grad_norm": 0.28177925431653544, |
| "learning_rate": 1.2356781752712932e-06, |
| "loss": 0.1821, |
| "step": 2193 |
| }, |
| { |
| "epoch": 4.491299897645854, |
| "grad_norm": 0.3086719314394619, |
| "learning_rate": 1.225796214761117e-06, |
| "loss": 0.1513, |
| "step": 2194 |
| }, |
| { |
| "epoch": 4.493346980552713, |
| "grad_norm": 0.29208730292486546, |
| "learning_rate": 1.2159526777160036e-06, |
| "loss": 0.1387, |
| "step": 2195 |
| }, |
| { |
| "epoch": 4.49539406345957, |
| "grad_norm": 0.3397441507088236, |
| "learning_rate": 1.2061475842818337e-06, |
| "loss": 0.1566, |
| "step": 2196 |
| }, |
| { |
| "epoch": 4.497441146366428, |
| "grad_norm": 0.2873481591587359, |
| "learning_rate": 1.196380954525802e-06, |
| "loss": 0.1726, |
| "step": 2197 |
| }, |
| { |
| "epoch": 4.499488229273285, |
| "grad_norm": 0.27576099473692967, |
| "learning_rate": 1.1866528084363881e-06, |
| "loss": 0.1549, |
| "step": 2198 |
| }, |
| { |
| "epoch": 4.501535312180144, |
| "grad_norm": 0.3686313423170628, |
| "learning_rate": 1.1769631659233104e-06, |
| "loss": 0.1567, |
| "step": 2199 |
| }, |
| { |
| "epoch": 4.503582395087001, |
| "grad_norm": 0.29827782847327655, |
| "learning_rate": 1.1673120468174837e-06, |
| "loss": 0.1872, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.505629477993859, |
| "grad_norm": 0.28857294210571843, |
| "learning_rate": 1.1576994708709766e-06, |
| "loss": 0.182, |
| "step": 2201 |
| }, |
| { |
| "epoch": 4.507676560900716, |
| "grad_norm": 0.27379258065859163, |
| "learning_rate": 1.148125457756981e-06, |
| "loss": 0.1732, |
| "step": 2202 |
| }, |
| { |
| "epoch": 4.5097236438075745, |
| "grad_norm": 0.2746324365416677, |
| "learning_rate": 1.1385900270697658e-06, |
| "loss": 0.1962, |
| "step": 2203 |
| }, |
| { |
| "epoch": 4.511770726714432, |
| "grad_norm": 0.29241951735417715, |
| "learning_rate": 1.1290931983246334e-06, |
| "loss": 0.1652, |
| "step": 2204 |
| }, |
| { |
| "epoch": 4.5138178096212895, |
| "grad_norm": 0.3084497347616217, |
| "learning_rate": 1.119634990957883e-06, |
| "loss": 0.1849, |
| "step": 2205 |
| }, |
| { |
| "epoch": 4.515864892528147, |
| "grad_norm": 0.3071333917103865, |
| "learning_rate": 1.110215424326775e-06, |
| "loss": 0.1584, |
| "step": 2206 |
| }, |
| { |
| "epoch": 4.5179119754350054, |
| "grad_norm": 0.2857404999852703, |
| "learning_rate": 1.1008345177094859e-06, |
| "loss": 0.195, |
| "step": 2207 |
| }, |
| { |
| "epoch": 4.519959058341863, |
| "grad_norm": 0.2705119565200689, |
| "learning_rate": 1.091492290305063e-06, |
| "loss": 0.1665, |
| "step": 2208 |
| }, |
| { |
| "epoch": 4.5220061412487205, |
| "grad_norm": 0.3090579005503219, |
| "learning_rate": 1.0821887612333959e-06, |
| "loss": 0.1802, |
| "step": 2209 |
| }, |
| { |
| "epoch": 4.524053224155578, |
| "grad_norm": 0.27905532018019086, |
| "learning_rate": 1.0729239495351917e-06, |
| "loss": 0.1786, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.526100307062436, |
| "grad_norm": 0.2818996052768231, |
| "learning_rate": 1.0636978741718873e-06, |
| "loss": 0.1951, |
| "step": 2211 |
| }, |
| { |
| "epoch": 4.528147389969294, |
| "grad_norm": 0.28519212105139324, |
| "learning_rate": 1.0545105540256628e-06, |
| "loss": 0.1718, |
| "step": 2212 |
| }, |
| { |
| "epoch": 4.530194472876151, |
| "grad_norm": 0.2752841091314753, |
| "learning_rate": 1.0453620078993755e-06, |
| "loss": 0.1904, |
| "step": 2213 |
| }, |
| { |
| "epoch": 4.532241555783009, |
| "grad_norm": 0.27656267859889894, |
| "learning_rate": 1.0362522545165276e-06, |
| "loss": 0.1563, |
| "step": 2214 |
| }, |
| { |
| "epoch": 4.534288638689867, |
| "grad_norm": 0.3034310152471821, |
| "learning_rate": 1.0271813125212237e-06, |
| "loss": 0.1967, |
| "step": 2215 |
| }, |
| { |
| "epoch": 4.536335721596725, |
| "grad_norm": 0.28706993728461716, |
| "learning_rate": 1.0181492004781467e-06, |
| "loss": 0.159, |
| "step": 2216 |
| }, |
| { |
| "epoch": 4.538382804503582, |
| "grad_norm": 0.2782931986242475, |
| "learning_rate": 1.009155936872499e-06, |
| "loss": 0.1926, |
| "step": 2217 |
| }, |
| { |
| "epoch": 4.54042988741044, |
| "grad_norm": 0.2836768256218939, |
| "learning_rate": 1.0002015401099797e-06, |
| "loss": 0.1697, |
| "step": 2218 |
| }, |
| { |
| "epoch": 4.542476970317297, |
| "grad_norm": 0.2708987463201031, |
| "learning_rate": 9.91286028516747e-07, |
| "loss": 0.1936, |
| "step": 2219 |
| }, |
| { |
| "epoch": 4.544524053224156, |
| "grad_norm": 0.28619987569766664, |
| "learning_rate": 9.824094203393697e-07, |
| "loss": 0.1849, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.546571136131013, |
| "grad_norm": 0.2707540756476124, |
| "learning_rate": 9.735717337447981e-07, |
| "loss": 0.1751, |
| "step": 2221 |
| }, |
| { |
| "epoch": 4.548618219037871, |
| "grad_norm": 0.27410601443733645, |
| "learning_rate": 9.647729868203238e-07, |
| "loss": 0.1868, |
| "step": 2222 |
| }, |
| { |
| "epoch": 4.550665301944729, |
| "grad_norm": 0.2957604221652705, |
| "learning_rate": 9.56013197573553e-07, |
| "loss": 0.1462, |
| "step": 2223 |
| }, |
| { |
| "epoch": 4.552712384851587, |
| "grad_norm": 0.26338269002342407, |
| "learning_rate": 9.4729238393235e-07, |
| "loss": 0.193, |
| "step": 2224 |
| }, |
| { |
| "epoch": 4.554759467758444, |
| "grad_norm": 0.2833184625486789, |
| "learning_rate": 9.386105637448151e-07, |
| "loss": 0.1621, |
| "step": 2225 |
| }, |
| { |
| "epoch": 4.556806550665302, |
| "grad_norm": 0.2789058820952134, |
| "learning_rate": 9.299677547792463e-07, |
| "loss": 0.1593, |
| "step": 2226 |
| }, |
| { |
| "epoch": 4.558853633572159, |
| "grad_norm": 0.2720509751943245, |
| "learning_rate": 9.21363974724101e-07, |
| "loss": 0.1802, |
| "step": 2227 |
| }, |
| { |
| "epoch": 4.560900716479018, |
| "grad_norm": 0.2861949558256626, |
| "learning_rate": 9.127992411879494e-07, |
| "loss": 0.2003, |
| "step": 2228 |
| }, |
| { |
| "epoch": 4.562947799385875, |
| "grad_norm": 0.28455385830088115, |
| "learning_rate": 9.042735716994678e-07, |
| "loss": 0.1772, |
| "step": 2229 |
| }, |
| { |
| "epoch": 4.564994882292733, |
| "grad_norm": 0.2762848657480311, |
| "learning_rate": 8.957869837073673e-07, |
| "loss": 0.153, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.567041965199591, |
| "grad_norm": 0.28684432410227595, |
| "learning_rate": 8.873394945803793e-07, |
| "loss": 0.139, |
| "step": 2231 |
| }, |
| { |
| "epoch": 4.569089048106449, |
| "grad_norm": 0.27952234592413544, |
| "learning_rate": 8.789311216072183e-07, |
| "loss": 0.1655, |
| "step": 2232 |
| }, |
| { |
| "epoch": 4.571136131013306, |
| "grad_norm": 0.2712591839850155, |
| "learning_rate": 8.705618819965411e-07, |
| "loss": 0.1687, |
| "step": 2233 |
| }, |
| { |
| "epoch": 4.573183213920164, |
| "grad_norm": 0.31661374577038626, |
| "learning_rate": 8.622317928769086e-07, |
| "loss": 0.1797, |
| "step": 2234 |
| }, |
| { |
| "epoch": 4.575230296827021, |
| "grad_norm": 0.30388013921486445, |
| "learning_rate": 8.539408712967679e-07, |
| "loss": 0.205, |
| "step": 2235 |
| }, |
| { |
| "epoch": 4.5772773797338795, |
| "grad_norm": 0.29761363560711057, |
| "learning_rate": 8.456891342243945e-07, |
| "loss": 0.1323, |
| "step": 2236 |
| }, |
| { |
| "epoch": 4.579324462640737, |
| "grad_norm": 0.2749824129298373, |
| "learning_rate": 8.374765985478728e-07, |
| "loss": 0.1662, |
| "step": 2237 |
| }, |
| { |
| "epoch": 4.5813715455475945, |
| "grad_norm": 0.288719742941428, |
| "learning_rate": 8.293032810750579e-07, |
| "loss": 0.185, |
| "step": 2238 |
| }, |
| { |
| "epoch": 4.583418628454452, |
| "grad_norm": 0.29201488163729705, |
| "learning_rate": 8.211691985335357e-07, |
| "loss": 0.1763, |
| "step": 2239 |
| }, |
| { |
| "epoch": 4.58546571136131, |
| "grad_norm": 0.29009215527637505, |
| "learning_rate": 8.130743675706032e-07, |
| "loss": 0.1485, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.587512794268168, |
| "grad_norm": 0.2855149297724901, |
| "learning_rate": 8.050188047532148e-07, |
| "loss": 0.172, |
| "step": 2241 |
| }, |
| { |
| "epoch": 4.5895598771750254, |
| "grad_norm": 0.3379024479854454, |
| "learning_rate": 7.970025265679648e-07, |
| "loss": 0.173, |
| "step": 2242 |
| }, |
| { |
| "epoch": 4.591606960081883, |
| "grad_norm": 0.2783179411940068, |
| "learning_rate": 7.890255494210453e-07, |
| "loss": 0.1801, |
| "step": 2243 |
| }, |
| { |
| "epoch": 4.593654042988741, |
| "grad_norm": 0.30188617728523903, |
| "learning_rate": 7.810878896382101e-07, |
| "loss": 0.1668, |
| "step": 2244 |
| }, |
| { |
| "epoch": 4.595701125895599, |
| "grad_norm": 0.27939878533555546, |
| "learning_rate": 7.731895634647513e-07, |
| "loss": 0.1544, |
| "step": 2245 |
| }, |
| { |
| "epoch": 4.597748208802456, |
| "grad_norm": 0.29721546644084723, |
| "learning_rate": 7.653305870654604e-07, |
| "loss": 0.1459, |
| "step": 2246 |
| }, |
| { |
| "epoch": 4.599795291709314, |
| "grad_norm": 0.2761040647920302, |
| "learning_rate": 7.575109765245936e-07, |
| "loss": 0.1991, |
| "step": 2247 |
| }, |
| { |
| "epoch": 4.601842374616172, |
| "grad_norm": 0.26790523775903885, |
| "learning_rate": 7.497307478458382e-07, |
| "loss": 0.1881, |
| "step": 2248 |
| }, |
| { |
| "epoch": 4.60388945752303, |
| "grad_norm": 0.26902868723836015, |
| "learning_rate": 7.419899169522903e-07, |
| "loss": 0.1956, |
| "step": 2249 |
| }, |
| { |
| "epoch": 4.605936540429887, |
| "grad_norm": 0.27200231013008247, |
| "learning_rate": 7.342884996863997e-07, |
| "loss": 0.1656, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.607983623336745, |
| "grad_norm": 0.3124482032601389, |
| "learning_rate": 7.266265118099669e-07, |
| "loss": 0.1753, |
| "step": 2251 |
| }, |
| { |
| "epoch": 4.610030706243603, |
| "grad_norm": 0.2938080759180912, |
| "learning_rate": 7.190039690040884e-07, |
| "loss": 0.1864, |
| "step": 2252 |
| }, |
| { |
| "epoch": 4.612077789150461, |
| "grad_norm": 0.28057707746789584, |
| "learning_rate": 7.114208868691319e-07, |
| "loss": 0.1655, |
| "step": 2253 |
| }, |
| { |
| "epoch": 4.614124872057318, |
| "grad_norm": 0.3670804523977461, |
| "learning_rate": 7.038772809247075e-07, |
| "loss": 0.2006, |
| "step": 2254 |
| }, |
| { |
| "epoch": 4.616171954964176, |
| "grad_norm": 0.3156535130425976, |
| "learning_rate": 6.963731666096318e-07, |
| "loss": 0.1873, |
| "step": 2255 |
| }, |
| { |
| "epoch": 4.618219037871034, |
| "grad_norm": 0.29444479967748544, |
| "learning_rate": 6.889085592818956e-07, |
| "loss": 0.1698, |
| "step": 2256 |
| }, |
| { |
| "epoch": 4.620266120777892, |
| "grad_norm": 0.2743736628864778, |
| "learning_rate": 6.814834742186361e-07, |
| "loss": 0.1851, |
| "step": 2257 |
| }, |
| { |
| "epoch": 4.622313203684749, |
| "grad_norm": 0.2788944245236952, |
| "learning_rate": 6.740979266161018e-07, |
| "loss": 0.1649, |
| "step": 2258 |
| }, |
| { |
| "epoch": 4.624360286591607, |
| "grad_norm": 0.28277515238178413, |
| "learning_rate": 6.667519315896264e-07, |
| "loss": 0.1707, |
| "step": 2259 |
| }, |
| { |
| "epoch": 4.626407369498464, |
| "grad_norm": 0.29229311294327376, |
| "learning_rate": 6.594455041735925e-07, |
| "loss": 0.1577, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.628454452405323, |
| "grad_norm": 0.2826367197666537, |
| "learning_rate": 6.521786593214075e-07, |
| "loss": 0.1694, |
| "step": 2261 |
| }, |
| { |
| "epoch": 4.63050153531218, |
| "grad_norm": 0.27183167715863765, |
| "learning_rate": 6.449514119054634e-07, |
| "loss": 0.1821, |
| "step": 2262 |
| }, |
| { |
| "epoch": 4.632548618219038, |
| "grad_norm": 0.2705799558624456, |
| "learning_rate": 6.377637767171152e-07, |
| "loss": 0.16, |
| "step": 2263 |
| }, |
| { |
| "epoch": 4.634595701125896, |
| "grad_norm": 0.312451093844958, |
| "learning_rate": 6.306157684666425e-07, |
| "loss": 0.146, |
| "step": 2264 |
| }, |
| { |
| "epoch": 4.6366427840327535, |
| "grad_norm": 0.30534769600917144, |
| "learning_rate": 6.235074017832299e-07, |
| "loss": 0.2026, |
| "step": 2265 |
| }, |
| { |
| "epoch": 4.638689866939611, |
| "grad_norm": 0.2749885611375992, |
| "learning_rate": 6.164386912149289e-07, |
| "loss": 0.149, |
| "step": 2266 |
| }, |
| { |
| "epoch": 4.640736949846469, |
| "grad_norm": 0.3043305615254814, |
| "learning_rate": 6.094096512286297e-07, |
| "loss": 0.1931, |
| "step": 2267 |
| }, |
| { |
| "epoch": 4.642784032753326, |
| "grad_norm": 0.2926074707796185, |
| "learning_rate": 6.024202962100312e-07, |
| "loss": 0.1704, |
| "step": 2268 |
| }, |
| { |
| "epoch": 4.6448311156601845, |
| "grad_norm": 0.27826474993716993, |
| "learning_rate": 5.954706404636179e-07, |
| "loss": 0.1868, |
| "step": 2269 |
| }, |
| { |
| "epoch": 4.646878198567042, |
| "grad_norm": 0.2676299230869566, |
| "learning_rate": 5.88560698212619e-07, |
| "loss": 0.1656, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.6489252814738995, |
| "grad_norm": 0.3004879943235446, |
| "learning_rate": 5.816904835989867e-07, |
| "loss": 0.1462, |
| "step": 2271 |
| }, |
| { |
| "epoch": 4.650972364380758, |
| "grad_norm": 0.28512914647917514, |
| "learning_rate": 5.748600106833735e-07, |
| "loss": 0.1784, |
| "step": 2272 |
| }, |
| { |
| "epoch": 4.653019447287615, |
| "grad_norm": 0.2810502748083855, |
| "learning_rate": 5.680692934450837e-07, |
| "loss": 0.2186, |
| "step": 2273 |
| }, |
| { |
| "epoch": 4.655066530194473, |
| "grad_norm": 0.2855042987462666, |
| "learning_rate": 5.613183457820714e-07, |
| "loss": 0.1911, |
| "step": 2274 |
| }, |
| { |
| "epoch": 4.65711361310133, |
| "grad_norm": 0.30832919924707025, |
| "learning_rate": 5.546071815108845e-07, |
| "loss": 0.1853, |
| "step": 2275 |
| }, |
| { |
| "epoch": 4.659160696008188, |
| "grad_norm": 0.27863470748148456, |
| "learning_rate": 5.479358143666602e-07, |
| "loss": 0.1446, |
| "step": 2276 |
| }, |
| { |
| "epoch": 4.661207778915046, |
| "grad_norm": 0.2954741773503375, |
| "learning_rate": 5.413042580030792e-07, |
| "loss": 0.1861, |
| "step": 2277 |
| }, |
| { |
| "epoch": 4.663254861821904, |
| "grad_norm": 0.292054066538448, |
| "learning_rate": 5.347125259923491e-07, |
| "loss": 0.1693, |
| "step": 2278 |
| }, |
| { |
| "epoch": 4.665301944728761, |
| "grad_norm": 0.2736497121658198, |
| "learning_rate": 5.281606318251764e-07, |
| "loss": 0.1548, |
| "step": 2279 |
| }, |
| { |
| "epoch": 4.667349027635619, |
| "grad_norm": 0.30678020029811015, |
| "learning_rate": 5.216485889107214e-07, |
| "loss": 0.1982, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.669396110542477, |
| "grad_norm": 0.3007630538627819, |
| "learning_rate": 5.151764105766011e-07, |
| "loss": 0.2082, |
| "step": 2281 |
| }, |
| { |
| "epoch": 4.671443193449335, |
| "grad_norm": 0.30472720971722367, |
| "learning_rate": 5.087441100688351e-07, |
| "loss": 0.1913, |
| "step": 2282 |
| }, |
| { |
| "epoch": 4.673490276356192, |
| "grad_norm": 0.28472393369780435, |
| "learning_rate": 5.023517005518264e-07, |
| "loss": 0.1795, |
| "step": 2283 |
| }, |
| { |
| "epoch": 4.67553735926305, |
| "grad_norm": 0.30834546215081027, |
| "learning_rate": 4.959991951083498e-07, |
| "loss": 0.1677, |
| "step": 2284 |
| }, |
| { |
| "epoch": 4.677584442169908, |
| "grad_norm": 0.2707613967004538, |
| "learning_rate": 4.89686606739499e-07, |
| "loss": 0.1885, |
| "step": 2285 |
| }, |
| { |
| "epoch": 4.679631525076766, |
| "grad_norm": 0.2925354294386155, |
| "learning_rate": 4.834139483646793e-07, |
| "loss": 0.1369, |
| "step": 2286 |
| }, |
| { |
| "epoch": 4.681678607983623, |
| "grad_norm": 0.295052171708121, |
| "learning_rate": 4.771812328215708e-07, |
| "loss": 0.1684, |
| "step": 2287 |
| }, |
| { |
| "epoch": 4.683725690890481, |
| "grad_norm": 0.2816846080734314, |
| "learning_rate": 4.709884728661118e-07, |
| "loss": 0.1634, |
| "step": 2288 |
| }, |
| { |
| "epoch": 4.685772773797339, |
| "grad_norm": 0.29537583958782165, |
| "learning_rate": 4.648356811724619e-07, |
| "loss": 0.1501, |
| "step": 2289 |
| }, |
| { |
| "epoch": 4.687819856704197, |
| "grad_norm": 0.2776645155887449, |
| "learning_rate": 4.587228703329838e-07, |
| "loss": 0.1731, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.689866939611054, |
| "grad_norm": 0.27540787792841026, |
| "learning_rate": 4.5265005285821674e-07, |
| "loss": 0.1688, |
| "step": 2291 |
| }, |
| { |
| "epoch": 4.691914022517912, |
| "grad_norm": 0.2774319968994503, |
| "learning_rate": 4.4661724117684545e-07, |
| "loss": 0.1736, |
| "step": 2292 |
| }, |
| { |
| "epoch": 4.69396110542477, |
| "grad_norm": 0.27404159260754235, |
| "learning_rate": 4.40624447635678e-07, |
| "loss": 0.1473, |
| "step": 2293 |
| }, |
| { |
| "epoch": 4.696008188331628, |
| "grad_norm": 0.2944590643348067, |
| "learning_rate": 4.346716844996279e-07, |
| "loss": 0.1594, |
| "step": 2294 |
| }, |
| { |
| "epoch": 4.698055271238485, |
| "grad_norm": 0.2881991440998457, |
| "learning_rate": 4.2875896395167427e-07, |
| "loss": 0.1988, |
| "step": 2295 |
| }, |
| { |
| "epoch": 4.700102354145343, |
| "grad_norm": 0.28026776497774936, |
| "learning_rate": 4.228862980928439e-07, |
| "loss": 0.1784, |
| "step": 2296 |
| }, |
| { |
| "epoch": 4.702149437052201, |
| "grad_norm": 0.2955331369567261, |
| "learning_rate": 4.1705369894219584e-07, |
| "loss": 0.1786, |
| "step": 2297 |
| }, |
| { |
| "epoch": 4.7041965199590585, |
| "grad_norm": 0.292319027121268, |
| "learning_rate": 4.112611784367837e-07, |
| "loss": 0.1677, |
| "step": 2298 |
| }, |
| { |
| "epoch": 4.706243602865916, |
| "grad_norm": 0.31348235722088247, |
| "learning_rate": 4.0550874843163337e-07, |
| "loss": 0.1796, |
| "step": 2299 |
| }, |
| { |
| "epoch": 4.7082906857727735, |
| "grad_norm": 0.2903513190722839, |
| "learning_rate": 3.997964206997207e-07, |
| "loss": 0.1804, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.710337768679631, |
| "grad_norm": 0.2879799305264078, |
| "learning_rate": 3.941242069319562e-07, |
| "loss": 0.1895, |
| "step": 2301 |
| }, |
| { |
| "epoch": 4.7123848515864895, |
| "grad_norm": 0.3577665248128591, |
| "learning_rate": 3.8849211873714266e-07, |
| "loss": 0.1765, |
| "step": 2302 |
| }, |
| { |
| "epoch": 4.714431934493347, |
| "grad_norm": 0.2903669946216332, |
| "learning_rate": 3.8290016764196637e-07, |
| "loss": 0.1716, |
| "step": 2303 |
| }, |
| { |
| "epoch": 4.7164790174002045, |
| "grad_norm": 0.3014373493296439, |
| "learning_rate": 3.7734836509096596e-07, |
| "loss": 0.1388, |
| "step": 2304 |
| }, |
| { |
| "epoch": 4.718526100307063, |
| "grad_norm": 0.3083437231349992, |
| "learning_rate": 3.7183672244652135e-07, |
| "loss": 0.1903, |
| "step": 2305 |
| }, |
| { |
| "epoch": 4.72057318321392, |
| "grad_norm": 0.2957154629258413, |
| "learning_rate": 3.663652509888027e-07, |
| "loss": 0.1718, |
| "step": 2306 |
| }, |
| { |
| "epoch": 4.722620266120778, |
| "grad_norm": 0.3046648653682266, |
| "learning_rate": 3.6093396191578366e-07, |
| "loss": 0.1979, |
| "step": 2307 |
| }, |
| { |
| "epoch": 4.724667349027635, |
| "grad_norm": 0.28017443242244167, |
| "learning_rate": 3.5554286634318814e-07, |
| "loss": 0.1728, |
| "step": 2308 |
| }, |
| { |
| "epoch": 4.726714431934493, |
| "grad_norm": 0.2748026640801257, |
| "learning_rate": 3.501919753044836e-07, |
| "loss": 0.2096, |
| "step": 2309 |
| }, |
| { |
| "epoch": 4.728761514841351, |
| "grad_norm": 0.2900862431852642, |
| "learning_rate": 3.448812997508588e-07, |
| "loss": 0.1655, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.730808597748209, |
| "grad_norm": 0.2833002934703294, |
| "learning_rate": 3.3961085055119083e-07, |
| "loss": 0.1527, |
| "step": 2311 |
| }, |
| { |
| "epoch": 4.732855680655066, |
| "grad_norm": 0.27909247839420187, |
| "learning_rate": 3.3438063849203116e-07, |
| "loss": 0.1449, |
| "step": 2312 |
| }, |
| { |
| "epoch": 4.734902763561925, |
| "grad_norm": 0.2997312028695653, |
| "learning_rate": 3.2919067427758186e-07, |
| "loss": 0.153, |
| "step": 2313 |
| }, |
| { |
| "epoch": 4.736949846468782, |
| "grad_norm": 0.2788661977637299, |
| "learning_rate": 3.2404096852967305e-07, |
| "loss": 0.1686, |
| "step": 2314 |
| }, |
| { |
| "epoch": 4.73899692937564, |
| "grad_norm": 0.2844108440502707, |
| "learning_rate": 3.189315317877428e-07, |
| "loss": 0.1575, |
| "step": 2315 |
| }, |
| { |
| "epoch": 4.741044012282497, |
| "grad_norm": 0.29542954551351697, |
| "learning_rate": 3.138623745088132e-07, |
| "loss": 0.1489, |
| "step": 2316 |
| }, |
| { |
| "epoch": 4.743091095189355, |
| "grad_norm": 0.2931309390733155, |
| "learning_rate": 3.0883350706746973e-07, |
| "loss": 0.1793, |
| "step": 2317 |
| }, |
| { |
| "epoch": 4.745138178096213, |
| "grad_norm": 0.274362618181376, |
| "learning_rate": 3.038449397558396e-07, |
| "loss": 0.1635, |
| "step": 2318 |
| }, |
| { |
| "epoch": 4.747185261003071, |
| "grad_norm": 0.27428372927016115, |
| "learning_rate": 2.9889668278357376e-07, |
| "loss": 0.1588, |
| "step": 2319 |
| }, |
| { |
| "epoch": 4.749232343909928, |
| "grad_norm": 0.29732674571188733, |
| "learning_rate": 2.9398874627782014e-07, |
| "loss": 0.1708, |
| "step": 2320 |
| }, |
| { |
| "epoch": 4.751279426816786, |
| "grad_norm": 0.3003256230614381, |
| "learning_rate": 2.891211402832128e-07, |
| "loss": 0.1725, |
| "step": 2321 |
| }, |
| { |
| "epoch": 4.753326509723644, |
| "grad_norm": 0.3084720969154894, |
| "learning_rate": 2.8429387476183624e-07, |
| "loss": 0.1483, |
| "step": 2322 |
| }, |
| { |
| "epoch": 4.755373592630502, |
| "grad_norm": 0.3036705097265483, |
| "learning_rate": 2.7950695959322093e-07, |
| "loss": 0.1623, |
| "step": 2323 |
| }, |
| { |
| "epoch": 4.757420675537359, |
| "grad_norm": 0.26881070505536797, |
| "learning_rate": 2.747604045743102e-07, |
| "loss": 0.171, |
| "step": 2324 |
| }, |
| { |
| "epoch": 4.759467758444217, |
| "grad_norm": 0.2880629649819299, |
| "learning_rate": 2.7005421941945555e-07, |
| "loss": 0.1646, |
| "step": 2325 |
| }, |
| { |
| "epoch": 4.761514841351075, |
| "grad_norm": 0.28081087918300657, |
| "learning_rate": 2.653884137603702e-07, |
| "loss": 0.1427, |
| "step": 2326 |
| }, |
| { |
| "epoch": 4.763561924257933, |
| "grad_norm": 0.29698506829638605, |
| "learning_rate": 2.6076299714614673e-07, |
| "loss": 0.1612, |
| "step": 2327 |
| }, |
| { |
| "epoch": 4.76560900716479, |
| "grad_norm": 0.2830061118167646, |
| "learning_rate": 2.5617797904320396e-07, |
| "loss": 0.1731, |
| "step": 2328 |
| }, |
| { |
| "epoch": 4.767656090071648, |
| "grad_norm": 0.3123860044528765, |
| "learning_rate": 2.516333688352801e-07, |
| "loss": 0.1561, |
| "step": 2329 |
| }, |
| { |
| "epoch": 4.769703172978506, |
| "grad_norm": 0.2798082098938455, |
| "learning_rate": 2.471291758234218e-07, |
| "loss": 0.1902, |
| "step": 2330 |
| }, |
| { |
| "epoch": 4.7717502558853635, |
| "grad_norm": 0.2987315382953632, |
| "learning_rate": 2.426654092259528e-07, |
| "loss": 0.1551, |
| "step": 2331 |
| }, |
| { |
| "epoch": 4.773797338792221, |
| "grad_norm": 0.2808063055566694, |
| "learning_rate": 2.382420781784589e-07, |
| "loss": 0.1749, |
| "step": 2332 |
| }, |
| { |
| "epoch": 4.7758444216990785, |
| "grad_norm": 0.2796699754953347, |
| "learning_rate": 2.338591917337696e-07, |
| "loss": 0.1727, |
| "step": 2333 |
| }, |
| { |
| "epoch": 4.777891504605937, |
| "grad_norm": 0.277747972615087, |
| "learning_rate": 2.295167588619518e-07, |
| "loss": 0.1507, |
| "step": 2334 |
| }, |
| { |
| "epoch": 4.779938587512794, |
| "grad_norm": 0.2987700878833016, |
| "learning_rate": 2.2521478845025867e-07, |
| "loss": 0.1798, |
| "step": 2335 |
| }, |
| { |
| "epoch": 4.781985670419652, |
| "grad_norm": 0.2728906304255605, |
| "learning_rate": 2.2095328930315184e-07, |
| "loss": 0.171, |
| "step": 2336 |
| }, |
| { |
| "epoch": 4.7840327533265095, |
| "grad_norm": 0.300885418994469, |
| "learning_rate": 2.167322701422525e-07, |
| "loss": 0.163, |
| "step": 2337 |
| }, |
| { |
| "epoch": 4.786079836233368, |
| "grad_norm": 0.2843769306835108, |
| "learning_rate": 2.1255173960634146e-07, |
| "loss": 0.1788, |
| "step": 2338 |
| }, |
| { |
| "epoch": 4.788126919140225, |
| "grad_norm": 0.29006108226073735, |
| "learning_rate": 2.08411706251328e-07, |
| "loss": 0.177, |
| "step": 2339 |
| }, |
| { |
| "epoch": 4.790174002047083, |
| "grad_norm": 0.28369183368200673, |
| "learning_rate": 2.0431217855025e-07, |
| "loss": 0.18, |
| "step": 2340 |
| }, |
| { |
| "epoch": 4.79222108495394, |
| "grad_norm": 0.28761647181705274, |
| "learning_rate": 2.0025316489323597e-07, |
| "loss": 0.1656, |
| "step": 2341 |
| }, |
| { |
| "epoch": 4.794268167860798, |
| "grad_norm": 0.2839547733000503, |
| "learning_rate": 1.9623467358750315e-07, |
| "loss": 0.1743, |
| "step": 2342 |
| }, |
| { |
| "epoch": 4.796315250767656, |
| "grad_norm": 0.2792802390584324, |
| "learning_rate": 1.9225671285733272e-07, |
| "loss": 0.1793, |
| "step": 2343 |
| }, |
| { |
| "epoch": 4.798362333674514, |
| "grad_norm": 0.2953632540852749, |
| "learning_rate": 1.8831929084406119e-07, |
| "loss": 0.1697, |
| "step": 2344 |
| }, |
| { |
| "epoch": 4.800409416581371, |
| "grad_norm": 0.26668806789326216, |
| "learning_rate": 1.8442241560604922e-07, |
| "loss": 0.1517, |
| "step": 2345 |
| }, |
| { |
| "epoch": 4.80245649948823, |
| "grad_norm": 0.27943039216113114, |
| "learning_rate": 1.8056609511868163e-07, |
| "loss": 0.2131, |
| "step": 2346 |
| }, |
| { |
| "epoch": 4.804503582395087, |
| "grad_norm": 0.26345026664491444, |
| "learning_rate": 1.7675033727434288e-07, |
| "loss": 0.1688, |
| "step": 2347 |
| }, |
| { |
| "epoch": 4.806550665301945, |
| "grad_norm": 0.28602587264323004, |
| "learning_rate": 1.7297514988239505e-07, |
| "loss": 0.1958, |
| "step": 2348 |
| }, |
| { |
| "epoch": 4.808597748208802, |
| "grad_norm": 0.27558338217328315, |
| "learning_rate": 1.692405406691755e-07, |
| "loss": 0.1796, |
| "step": 2349 |
| }, |
| { |
| "epoch": 4.81064483111566, |
| "grad_norm": 0.2966627143037965, |
| "learning_rate": 1.655465172779702e-07, |
| "loss": 0.192, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.812691914022518, |
| "grad_norm": 0.2944395579726434, |
| "learning_rate": 1.6189308726900277e-07, |
| "loss": 0.172, |
| "step": 2351 |
| }, |
| { |
| "epoch": 4.814738996929376, |
| "grad_norm": 0.2711839256534087, |
| "learning_rate": 1.5828025811941872e-07, |
| "loss": 0.1901, |
| "step": 2352 |
| }, |
| { |
| "epoch": 4.816786079836233, |
| "grad_norm": 0.29155015705326437, |
| "learning_rate": 1.547080372232679e-07, |
| "loss": 0.167, |
| "step": 2353 |
| }, |
| { |
| "epoch": 4.818833162743092, |
| "grad_norm": 0.2815618639741477, |
| "learning_rate": 1.5117643189149546e-07, |
| "loss": 0.1516, |
| "step": 2354 |
| }, |
| { |
| "epoch": 4.820880245649949, |
| "grad_norm": 0.29038125469870796, |
| "learning_rate": 1.4768544935191088e-07, |
| "loss": 0.1657, |
| "step": 2355 |
| }, |
| { |
| "epoch": 4.822927328556807, |
| "grad_norm": 0.2867465831992582, |
| "learning_rate": 1.44235096749199e-07, |
| "loss": 0.1824, |
| "step": 2356 |
| }, |
| { |
| "epoch": 4.824974411463664, |
| "grad_norm": 0.2708843561414753, |
| "learning_rate": 1.408253811448823e-07, |
| "loss": 0.1735, |
| "step": 2357 |
| }, |
| { |
| "epoch": 4.827021494370522, |
| "grad_norm": 0.2891677780067326, |
| "learning_rate": 1.374563095173187e-07, |
| "loss": 0.1594, |
| "step": 2358 |
| }, |
| { |
| "epoch": 4.82906857727738, |
| "grad_norm": 0.29721811114798363, |
| "learning_rate": 1.3412788876167925e-07, |
| "loss": 0.1681, |
| "step": 2359 |
| }, |
| { |
| "epoch": 4.8311156601842375, |
| "grad_norm": 0.2819368121687383, |
| "learning_rate": 1.3084012568994608e-07, |
| "loss": 0.2588, |
| "step": 2360 |
| }, |
| { |
| "epoch": 4.833162743091095, |
| "grad_norm": 0.2637703601334377, |
| "learning_rate": 1.2759302703088117e-07, |
| "loss": 0.1686, |
| "step": 2361 |
| }, |
| { |
| "epoch": 4.835209825997953, |
| "grad_norm": 0.2790376537691832, |
| "learning_rate": 1.2438659943003306e-07, |
| "loss": 0.1824, |
| "step": 2362 |
| }, |
| { |
| "epoch": 4.837256908904811, |
| "grad_norm": 0.2855413561087752, |
| "learning_rate": 1.212208494497036e-07, |
| "loss": 0.18, |
| "step": 2363 |
| }, |
| { |
| "epoch": 4.8393039918116685, |
| "grad_norm": 0.2966645113711715, |
| "learning_rate": 1.180957835689478e-07, |
| "loss": 0.1462, |
| "step": 2364 |
| }, |
| { |
| "epoch": 4.841351074718526, |
| "grad_norm": 0.28073303467220956, |
| "learning_rate": 1.1501140818355627e-07, |
| "loss": 0.2037, |
| "step": 2365 |
| }, |
| { |
| "epoch": 4.8433981576253835, |
| "grad_norm": 0.27323730338581254, |
| "learning_rate": 1.1196772960603952e-07, |
| "loss": 0.1552, |
| "step": 2366 |
| }, |
| { |
| "epoch": 4.845445240532242, |
| "grad_norm": 0.27979495089452416, |
| "learning_rate": 1.0896475406562135e-07, |
| "loss": 0.1911, |
| "step": 2367 |
| }, |
| { |
| "epoch": 4.847492323439099, |
| "grad_norm": 0.27315088792468245, |
| "learning_rate": 1.0600248770821886e-07, |
| "loss": 0.1945, |
| "step": 2368 |
| }, |
| { |
| "epoch": 4.849539406345957, |
| "grad_norm": 0.29508373388397574, |
| "learning_rate": 1.0308093659643582e-07, |
| "loss": 0.158, |
| "step": 2369 |
| }, |
| { |
| "epoch": 4.851586489252814, |
| "grad_norm": 0.2790657492666217, |
| "learning_rate": 1.0020010670954483e-07, |
| "loss": 0.1798, |
| "step": 2370 |
| }, |
| { |
| "epoch": 4.853633572159673, |
| "grad_norm": 0.2863828008417254, |
| "learning_rate": 9.736000394348299e-08, |
| "loss": 0.1688, |
| "step": 2371 |
| }, |
| { |
| "epoch": 4.85568065506653, |
| "grad_norm": 0.2837857995206189, |
| "learning_rate": 9.456063411082738e-08, |
| "loss": 0.1818, |
| "step": 2372 |
| }, |
| { |
| "epoch": 4.857727737973388, |
| "grad_norm": 0.26660018975441796, |
| "learning_rate": 9.180200294079955e-08, |
| "loss": 0.1681, |
| "step": 2373 |
| }, |
| { |
| "epoch": 4.859774820880245, |
| "grad_norm": 0.30489283035917186, |
| "learning_rate": 8.908411607923884e-08, |
| "loss": 0.1724, |
| "step": 2374 |
| }, |
| { |
| "epoch": 4.861821903787103, |
| "grad_norm": 0.27668845133173764, |
| "learning_rate": 8.640697908859575e-08, |
| "loss": 0.1871, |
| "step": 2375 |
| }, |
| { |
| "epoch": 4.863868986693961, |
| "grad_norm": 0.27941980307619757, |
| "learning_rate": 8.377059744792748e-08, |
| "loss": 0.1875, |
| "step": 2376 |
| }, |
| { |
| "epoch": 4.865916069600819, |
| "grad_norm": 0.2991779658323062, |
| "learning_rate": 8.117497655287798e-08, |
| "loss": 0.1607, |
| "step": 2377 |
| }, |
| { |
| "epoch": 4.867963152507676, |
| "grad_norm": 0.2960605235453732, |
| "learning_rate": 7.862012171566902e-08, |
| "loss": 0.2025, |
| "step": 2378 |
| }, |
| { |
| "epoch": 4.870010235414535, |
| "grad_norm": 0.2909596199436068, |
| "learning_rate": 7.61060381650891e-08, |
| "loss": 0.1976, |
| "step": 2379 |
| }, |
| { |
| "epoch": 4.872057318321392, |
| "grad_norm": 0.3022247341372479, |
| "learning_rate": 7.363273104648904e-08, |
| "loss": 0.1873, |
| "step": 2380 |
| }, |
| { |
| "epoch": 4.87410440122825, |
| "grad_norm": 0.27635276227359323, |
| "learning_rate": 7.120020542176198e-08, |
| "loss": 0.1815, |
| "step": 2381 |
| }, |
| { |
| "epoch": 4.876151484135107, |
| "grad_norm": 0.3235414628513566, |
| "learning_rate": 6.880846626933668e-08, |
| "loss": 0.1526, |
| "step": 2382 |
| }, |
| { |
| "epoch": 4.878198567041965, |
| "grad_norm": 0.3061161602814933, |
| "learning_rate": 6.645751848417093e-08, |
| "loss": 0.1672, |
| "step": 2383 |
| }, |
| { |
| "epoch": 4.880245649948823, |
| "grad_norm": 0.2857967006307856, |
| "learning_rate": 6.414736687773371e-08, |
| "loss": 0.1662, |
| "step": 2384 |
| }, |
| { |
| "epoch": 4.882292732855681, |
| "grad_norm": 0.27457282899975494, |
| "learning_rate": 6.187801617800748e-08, |
| "loss": 0.1564, |
| "step": 2385 |
| }, |
| { |
| "epoch": 4.884339815762538, |
| "grad_norm": 0.2990684370251101, |
| "learning_rate": 5.964947102946594e-08, |
| "loss": 0.193, |
| "step": 2386 |
| }, |
| { |
| "epoch": 4.886386898669397, |
| "grad_norm": 0.27143331999482234, |
| "learning_rate": 5.746173599307181e-08, |
| "loss": 0.172, |
| "step": 2387 |
| }, |
| { |
| "epoch": 4.888433981576254, |
| "grad_norm": 0.2720324730535623, |
| "learning_rate": 5.531481554626128e-08, |
| "loss": 0.1466, |
| "step": 2388 |
| }, |
| { |
| "epoch": 4.890481064483112, |
| "grad_norm": 0.29144216168390374, |
| "learning_rate": 5.320871408294403e-08, |
| "loss": 0.1622, |
| "step": 2389 |
| }, |
| { |
| "epoch": 4.892528147389969, |
| "grad_norm": 0.27957166407177925, |
| "learning_rate": 5.114343591348769e-08, |
| "loss": 0.1744, |
| "step": 2390 |
| }, |
| { |
| "epoch": 4.894575230296827, |
| "grad_norm": 0.2743372447650692, |
| "learning_rate": 4.9118985264711147e-08, |
| "loss": 0.1779, |
| "step": 2391 |
| }, |
| { |
| "epoch": 4.896622313203685, |
| "grad_norm": 0.2864112361999076, |
| "learning_rate": 4.713536627987347e-08, |
| "loss": 0.1783, |
| "step": 2392 |
| }, |
| { |
| "epoch": 4.8986693961105425, |
| "grad_norm": 0.2820613867682801, |
| "learning_rate": 4.519258301866947e-08, |
| "loss": 0.1764, |
| "step": 2393 |
| }, |
| { |
| "epoch": 4.9007164790174, |
| "grad_norm": 0.2790446443208781, |
| "learning_rate": 4.3290639457214125e-08, |
| "loss": 0.1983, |
| "step": 2394 |
| }, |
| { |
| "epoch": 4.9027635619242576, |
| "grad_norm": 0.3009212756955286, |
| "learning_rate": 4.1429539488047066e-08, |
| "loss": 0.1632, |
| "step": 2395 |
| }, |
| { |
| "epoch": 4.904810644831116, |
| "grad_norm": 0.3048977634842906, |
| "learning_rate": 3.960928692011257e-08, |
| "loss": 0.1775, |
| "step": 2396 |
| }, |
| { |
| "epoch": 4.9068577277379735, |
| "grad_norm": 0.2999700691804944, |
| "learning_rate": 3.7829885478757324e-08, |
| "loss": 0.1811, |
| "step": 2397 |
| }, |
| { |
| "epoch": 4.908904810644831, |
| "grad_norm": 0.2867226352655605, |
| "learning_rate": 3.6091338805719356e-08, |
| "loss": 0.1749, |
| "step": 2398 |
| }, |
| { |
| "epoch": 4.9109518935516885, |
| "grad_norm": 0.27853817331380126, |
| "learning_rate": 3.439365045912801e-08, |
| "loss": 0.1715, |
| "step": 2399 |
| }, |
| { |
| "epoch": 4.912998976458547, |
| "grad_norm": 0.26671396491728905, |
| "learning_rate": 3.273682391348398e-08, |
| "loss": 0.1736, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.915046059365404, |
| "grad_norm": 0.2981072341365868, |
| "learning_rate": 3.1120862559670396e-08, |
| "loss": 0.1608, |
| "step": 2401 |
| }, |
| { |
| "epoch": 4.917093142272262, |
| "grad_norm": 0.3006161506174685, |
| "learning_rate": 2.9545769704923954e-08, |
| "loss": 0.174, |
| "step": 2402 |
| }, |
| { |
| "epoch": 4.919140225179119, |
| "grad_norm": 0.2700128965804573, |
| "learning_rate": 2.8011548572846047e-08, |
| "loss": 0.1666, |
| "step": 2403 |
| }, |
| { |
| "epoch": 4.921187308085978, |
| "grad_norm": 0.27640788721099124, |
| "learning_rate": 2.651820230338942e-08, |
| "loss": 0.1786, |
| "step": 2404 |
| }, |
| { |
| "epoch": 4.923234390992835, |
| "grad_norm": 0.30195005723908613, |
| "learning_rate": 2.50657339528515e-08, |
| "loss": 0.1683, |
| "step": 2405 |
| }, |
| { |
| "epoch": 4.925281473899693, |
| "grad_norm": 0.27803519346079886, |
| "learning_rate": 2.365414649386555e-08, |
| "loss": 0.2196, |
| "step": 2406 |
| }, |
| { |
| "epoch": 4.92732855680655, |
| "grad_norm": 0.27873854091593087, |
| "learning_rate": 2.2283442815402845e-08, |
| "loss": 0.1772, |
| "step": 2407 |
| }, |
| { |
| "epoch": 4.929375639713409, |
| "grad_norm": 0.31272442814047646, |
| "learning_rate": 2.0953625722754943e-08, |
| "loss": 0.1917, |
| "step": 2408 |
| }, |
| { |
| "epoch": 4.931422722620266, |
| "grad_norm": 0.27074887610829274, |
| "learning_rate": 1.9664697937542554e-08, |
| "loss": 0.177, |
| "step": 2409 |
| }, |
| { |
| "epoch": 4.933469805527124, |
| "grad_norm": 0.2809859827116871, |
| "learning_rate": 1.8416662097693326e-08, |
| "loss": 0.1735, |
| "step": 2410 |
| }, |
| { |
| "epoch": 4.935516888433981, |
| "grad_norm": 0.2922562803880012, |
| "learning_rate": 1.720952075745075e-08, |
| "loss": 0.164, |
| "step": 2411 |
| }, |
| { |
| "epoch": 4.93756397134084, |
| "grad_norm": 0.27866762770170544, |
| "learning_rate": 1.604327638736525e-08, |
| "loss": 0.1509, |
| "step": 2412 |
| }, |
| { |
| "epoch": 4.939611054247697, |
| "grad_norm": 0.2757657750150201, |
| "learning_rate": 1.491793137427866e-08, |
| "loss": 0.1694, |
| "step": 2413 |
| }, |
| { |
| "epoch": 4.941658137154555, |
| "grad_norm": 0.3690880615237033, |
| "learning_rate": 1.3833488021335328e-08, |
| "loss": 0.1459, |
| "step": 2414 |
| }, |
| { |
| "epoch": 4.943705220061412, |
| "grad_norm": 0.28975968339512975, |
| "learning_rate": 1.2789948547968779e-08, |
| "loss": 0.1723, |
| "step": 2415 |
| }, |
| { |
| "epoch": 4.94575230296827, |
| "grad_norm": 0.2676510309996025, |
| "learning_rate": 1.1787315089895057e-08, |
| "loss": 0.1587, |
| "step": 2416 |
| }, |
| { |
| "epoch": 4.947799385875128, |
| "grad_norm": 0.29535503732393725, |
| "learning_rate": 1.0825589699112737e-08, |
| "loss": 0.1429, |
| "step": 2417 |
| }, |
| { |
| "epoch": 4.949846468781986, |
| "grad_norm": 0.2851419625396727, |
| "learning_rate": 9.904774343898471e-09, |
| "loss": 0.1529, |
| "step": 2418 |
| }, |
| { |
| "epoch": 4.951893551688843, |
| "grad_norm": 0.25772957787571077, |
| "learning_rate": 9.024870908802552e-09, |
| "loss": 0.187, |
| "step": 2419 |
| }, |
| { |
| "epoch": 4.9539406345957016, |
| "grad_norm": 0.2900373956230706, |
| "learning_rate": 8.185881194644474e-09, |
| "loss": 0.1541, |
| "step": 2420 |
| }, |
| { |
| "epoch": 4.955987717502559, |
| "grad_norm": 0.291463457270851, |
| "learning_rate": 7.387806918508489e-09, |
| "loss": 0.1585, |
| "step": 2421 |
| }, |
| { |
| "epoch": 4.958034800409417, |
| "grad_norm": 0.2796933717518039, |
| "learning_rate": 6.630649713739168e-09, |
| "loss": 0.2045, |
| "step": 2422 |
| }, |
| { |
| "epoch": 4.960081883316274, |
| "grad_norm": 0.2809689443794419, |
| "learning_rate": 5.9144111299414e-09, |
| "loss": 0.169, |
| "step": 2423 |
| }, |
| { |
| "epoch": 4.962128966223132, |
| "grad_norm": 0.3074472408506574, |
| "learning_rate": 5.239092632980391e-09, |
| "loss": 0.1593, |
| "step": 2424 |
| }, |
| { |
| "epoch": 4.96417604912999, |
| "grad_norm": 0.28551605159632193, |
| "learning_rate": 4.6046956049639045e-09, |
| "loss": 0.1518, |
| "step": 2425 |
| }, |
| { |
| "epoch": 4.9662231320368475, |
| "grad_norm": 0.2889141054976907, |
| "learning_rate": 4.011221344257799e-09, |
| "loss": 0.1781, |
| "step": 2426 |
| }, |
| { |
| "epoch": 4.968270214943705, |
| "grad_norm": 0.25486448368902137, |
| "learning_rate": 3.4586710654727074e-09, |
| "loss": 0.1688, |
| "step": 2427 |
| }, |
| { |
| "epoch": 4.970317297850563, |
| "grad_norm": 0.3024410370860751, |
| "learning_rate": 2.94704589946182e-09, |
| "loss": 0.2105, |
| "step": 2428 |
| }, |
| { |
| "epoch": 4.972364380757421, |
| "grad_norm": 0.29484058319269135, |
| "learning_rate": 2.4763468933231005e-09, |
| "loss": 0.178, |
| "step": 2429 |
| }, |
| { |
| "epoch": 4.974411463664278, |
| "grad_norm": 0.28427681285996675, |
| "learning_rate": 2.0465750103926263e-09, |
| "loss": 0.2151, |
| "step": 2430 |
| }, |
| { |
| "epoch": 4.976458546571136, |
| "grad_norm": 0.2758137593263903, |
| "learning_rate": 1.657731130246809e-09, |
| "loss": 0.1934, |
| "step": 2431 |
| }, |
| { |
| "epoch": 4.9785056294779935, |
| "grad_norm": 0.3024473446528619, |
| "learning_rate": 1.309816048697954e-09, |
| "loss": 0.1732, |
| "step": 2432 |
| }, |
| { |
| "epoch": 4.980552712384852, |
| "grad_norm": 0.26746264280387694, |
| "learning_rate": 1.0028304777875975e-09, |
| "loss": 0.1807, |
| "step": 2433 |
| }, |
| { |
| "epoch": 4.982599795291709, |
| "grad_norm": 0.2689810857541332, |
| "learning_rate": 7.367750458020518e-10, |
| "loss": 0.1713, |
| "step": 2434 |
| }, |
| { |
| "epoch": 4.984646878198567, |
| "grad_norm": 0.30444153746664904, |
| "learning_rate": 5.116502972479787e-10, |
| "loss": 0.172, |
| "step": 2435 |
| }, |
| { |
| "epoch": 4.986693961105424, |
| "grad_norm": 0.2724209053996081, |
| "learning_rate": 3.2745669287237435e-10, |
| "loss": 0.1994, |
| "step": 2436 |
| }, |
| { |
| "epoch": 4.988741044012283, |
| "grad_norm": 0.2666986989444724, |
| "learning_rate": 1.8419460964258505e-10, |
| "loss": 0.1735, |
| "step": 2437 |
| }, |
| { |
| "epoch": 4.99078812691914, |
| "grad_norm": 0.2846154192735168, |
| "learning_rate": 8.186434076185024e-11, |
| "loss": 0.1539, |
| "step": 2438 |
| }, |
| { |
| "epoch": 4.992835209825998, |
| "grad_norm": 0.28158014093768946, |
| "learning_rate": 2.046609566264124e-11, |
| "loss": 0.1756, |
| "step": 2439 |
| }, |
| { |
| "epoch": 4.994882292732855, |
| "grad_norm": 0.3150094705291199, |
| "learning_rate": 0.0, |
| "loss": 0.2073, |
| "step": 2440 |
| }, |
| { |
| "epoch": 4.994882292732855, |
| "step": 2440, |
| "total_flos": 2619216084533248.0, |
| "train_loss": 0.34357271391715183, |
| "train_runtime": 45861.8772, |
| "train_samples_per_second": 6.814, |
| "train_steps_per_second": 0.053 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2440, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2619216084533248.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|