| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9996942830938551, |
| "eval_steps": 500, |
| "global_step": 1635, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 5.6230373155383, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.0665, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 6.01161593196551, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.0829, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 5.53722803622308, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.0876, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 7.93426726662942, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.0719, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 7.031649978841274, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.0572, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 6.3575449397660835, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.1097, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 6.573922198067659, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.1593, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0, |
| "grad_norm": 5.564979871860141, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 1.0538, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.0509131309088655, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.0715, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 6.12848977059448, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.9286, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 4.916177727066278, |
| "learning_rate": 2.2e-06, |
| "loss": 0.9492, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 4.8581443117369405, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 1.0133, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 3.908502671210593, |
| "learning_rate": 2.6e-06, |
| "loss": 0.9243, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 4.034985649544406, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 1.0416, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 3.507758376052119, |
| "learning_rate": 3e-06, |
| "loss": 0.9373, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 3.663272180369727, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.8673, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 3.508069835907157, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.8894, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 3.294815456496393, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.8841, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.877754612416487, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.768, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.664239443889974, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.7173, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.900279841618844, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.7689, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 3.0487383417411658, |
| "learning_rate": 4.4e-06, |
| "loss": 0.7327, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 2.9928876018893447, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.8763, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 3.031747010513625, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.812, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.5408522914684863, |
| "learning_rate": 5e-06, |
| "loss": 0.8598, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.7561902015944253, |
| "learning_rate": 5.2e-06, |
| "loss": 0.8967, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.976716291176, |
| "learning_rate": 5.400000000000001e-06, |
| "loss": 0.7226, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.9056079278585227, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 0.8096, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.4758306534625802, |
| "learning_rate": 5.8e-06, |
| "loss": 0.8044, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.3717118099560217, |
| "learning_rate": 6e-06, |
| "loss": 0.6461, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.6666631455135335, |
| "learning_rate": 6.200000000000001e-06, |
| "loss": 0.8049, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.8733424047344993, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.8736, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.6648200702201637, |
| "learning_rate": 6.600000000000001e-06, |
| "loss": 0.7235, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.5601473220515056, |
| "learning_rate": 6.800000000000001e-06, |
| "loss": 0.68, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.7840115776082466, |
| "learning_rate": 7e-06, |
| "loss": 0.7816, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.524287013051412, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 0.7471, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.4550748912153613, |
| "learning_rate": 7.4e-06, |
| "loss": 0.7719, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 3.1526664248369936, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 0.8696, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.6121499364302383, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 0.8445, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.862520896543254, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.7393, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.4539611276002877, |
| "learning_rate": 8.2e-06, |
| "loss": 0.7101, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.4407347141807656, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 0.6975, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.2749278520807903, |
| "learning_rate": 8.6e-06, |
| "loss": 0.6533, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.666183851086396, |
| "learning_rate": 8.8e-06, |
| "loss": 0.7896, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.6380466110589214, |
| "learning_rate": 9e-06, |
| "loss": 0.7127, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.6229206709830577, |
| "learning_rate": 9.200000000000002e-06, |
| "loss": 0.8321, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.439693927487141, |
| "learning_rate": 9.4e-06, |
| "loss": 0.6645, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.3073039773943127, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 0.6964, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.4261514181880757, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 0.7328, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.6298961031353434, |
| "learning_rate": 1e-05, |
| "loss": 0.7843, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.368357019452094, |
| "learning_rate": 9.999990178426327e-06, |
| "loss": 0.668, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.7183273232506373, |
| "learning_rate": 9.999960713743888e-06, |
| "loss": 0.9064, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.4366531712753416, |
| "learning_rate": 9.99991160606844e-06, |
| "loss": 0.6861, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.594914984056206, |
| "learning_rate": 9.999842855592912e-06, |
| "loss": 0.6947, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.631921187839759, |
| "learning_rate": 9.999754462587396e-06, |
| "loss": 0.8039, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.5158584160059085, |
| "learning_rate": 9.999646427399155e-06, |
| "loss": 0.7604, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.4520389413039703, |
| "learning_rate": 9.999518750452622e-06, |
| "loss": 0.8429, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.511219649765081, |
| "learning_rate": 9.99937143224939e-06, |
| "loss": 0.6068, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.2738544213871683, |
| "learning_rate": 9.999204473368218e-06, |
| "loss": 0.7126, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.2995201634238365, |
| "learning_rate": 9.999017874465028e-06, |
| "loss": 0.7117, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.3985761980411646, |
| "learning_rate": 9.998811636272893e-06, |
| "loss": 0.6838, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.2586065910900595, |
| "learning_rate": 9.998585759602052e-06, |
| "loss": 0.6853, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.585278752939056, |
| "learning_rate": 9.998340245339888e-06, |
| "loss": 0.8295, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.4881456153581327, |
| "learning_rate": 9.998075094450935e-06, |
| "loss": 0.6653, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.2548663647589096, |
| "learning_rate": 9.997790307976874e-06, |
| "loss": 0.6354, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.229109711289454, |
| "learning_rate": 9.997485887036524e-06, |
| "loss": 0.6932, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.452224143427515, |
| "learning_rate": 9.997161832825843e-06, |
| "loss": 0.7654, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.2297142951513824, |
| "learning_rate": 9.996818146617922e-06, |
| "loss": 0.6906, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.2791022191610946, |
| "learning_rate": 9.996454829762973e-06, |
| "loss": 0.754, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.4219124929553515, |
| "learning_rate": 9.996071883688333e-06, |
| "loss": 0.6764, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.3501944220961146, |
| "learning_rate": 9.99566930989846e-06, |
| "loss": 0.7615, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.344366272975179, |
| "learning_rate": 9.995247109974915e-06, |
| "loss": 0.7134, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.2314263082377934, |
| "learning_rate": 9.994805285576364e-06, |
| "loss": 0.7631, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.2834194270284636, |
| "learning_rate": 9.99434383843857e-06, |
| "loss": 0.8618, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.1338375917815813, |
| "learning_rate": 9.99386277037439e-06, |
| "loss": 0.6911, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.3756649144864954, |
| "learning_rate": 9.993362083273763e-06, |
| "loss": 0.6907, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.243315382854752, |
| "learning_rate": 9.992841779103701e-06, |
| "loss": 0.7424, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.2756590145094773, |
| "learning_rate": 9.992301859908289e-06, |
| "loss": 0.7107, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.4499338417790324, |
| "learning_rate": 9.991742327808667e-06, |
| "loss": 0.6014, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.5224681026150524, |
| "learning_rate": 9.991163185003028e-06, |
| "loss": 0.7545, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.255631190324587, |
| "learning_rate": 9.990564433766615e-06, |
| "loss": 0.6931, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.3236984572992045, |
| "learning_rate": 9.989946076451693e-06, |
| "loss": 0.7708, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.3364421780529887, |
| "learning_rate": 9.989308115487563e-06, |
| "loss": 0.6633, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.4163016525364336, |
| "learning_rate": 9.988650553380537e-06, |
| "loss": 0.7195, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.296872674947527, |
| "learning_rate": 9.987973392713932e-06, |
| "loss": 0.6912, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.236387998298346, |
| "learning_rate": 9.987276636148062e-06, |
| "loss": 0.6737, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.277025205153288, |
| "learning_rate": 9.986560286420224e-06, |
| "loss": 0.7312, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.200531820532158, |
| "learning_rate": 9.985824346344692e-06, |
| "loss": 0.6251, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 2.488944948555403, |
| "learning_rate": 9.9850688188127e-06, |
| "loss": 0.7303, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.260371060935904, |
| "learning_rate": 9.984293706792438e-06, |
| "loss": 0.7546, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.4449103602612245, |
| "learning_rate": 9.983499013329035e-06, |
| "loss": 0.7119, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.0759253415676246, |
| "learning_rate": 9.982684741544543e-06, |
| "loss": 0.6844, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.387552887692122, |
| "learning_rate": 9.981850894637937e-06, |
| "loss": 0.6649, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.3995087357031797, |
| "learning_rate": 9.980997475885092e-06, |
| "loss": 0.6547, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.0904111698207495, |
| "learning_rate": 9.980124488638774e-06, |
| "loss": 0.6566, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.156307458383918, |
| "learning_rate": 9.979231936328627e-06, |
| "loss": 0.6928, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.24711153397962, |
| "learning_rate": 9.978319822461156e-06, |
| "loss": 0.6853, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.1649921192352317, |
| "learning_rate": 9.97738815061972e-06, |
| "loss": 0.7694, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.374786305390187, |
| "learning_rate": 9.976436924464513e-06, |
| "loss": 0.5882, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.4306832331036436, |
| "learning_rate": 9.975466147732551e-06, |
| "loss": 0.7988, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.407124728247618, |
| "learning_rate": 9.974475824237653e-06, |
| "loss": 0.7287, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.264722126370796, |
| "learning_rate": 9.973465957870437e-06, |
| "loss": 0.629, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.1005973714933273, |
| "learning_rate": 9.972436552598287e-06, |
| "loss": 0.6619, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.270262489712689, |
| "learning_rate": 9.971387612465364e-06, |
| "loss": 0.7143, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.265439764692705, |
| "learning_rate": 9.970319141592559e-06, |
| "loss": 0.7442, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.4319715531705213, |
| "learning_rate": 9.9692311441775e-06, |
| "loss": 0.7737, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.491658969929883, |
| "learning_rate": 9.968123624494525e-06, |
| "loss": 0.7946, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.027048425981043, |
| "learning_rate": 9.966996586894669e-06, |
| "loss": 0.6461, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.3372539030580874, |
| "learning_rate": 9.965850035805647e-06, |
| "loss": 0.6859, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.2435812305648057, |
| "learning_rate": 9.964683975731828e-06, |
| "loss": 0.7748, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.315025140176691, |
| "learning_rate": 9.963498411254235e-06, |
| "loss": 0.659, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.074191768339382, |
| "learning_rate": 9.96229334703051e-06, |
| "loss": 0.6609, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.145042227584639, |
| "learning_rate": 9.961068787794905e-06, |
| "loss": 0.6756, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.131588951231997, |
| "learning_rate": 9.959824738358257e-06, |
| "loss": 0.6334, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.069665378872537, |
| "learning_rate": 9.958561203607975e-06, |
| "loss": 0.6443, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.3057314770322646, |
| "learning_rate": 9.957278188508023e-06, |
| "loss": 0.7952, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.2378897605544474, |
| "learning_rate": 9.955975698098887e-06, |
| "loss": 0.7272, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.0307818678260654, |
| "learning_rate": 9.954653737497573e-06, |
| "loss": 0.5701, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.24162131248661, |
| "learning_rate": 9.953312311897573e-06, |
| "loss": 0.7793, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.3552427687921393, |
| "learning_rate": 9.951951426568852e-06, |
| "loss": 0.7209, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.141178442038793, |
| "learning_rate": 9.950571086857821e-06, |
| "loss": 0.6716, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.140261901239331, |
| "learning_rate": 9.949171298187328e-06, |
| "loss": 0.6743, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.4084672072357907, |
| "learning_rate": 9.94775206605662e-06, |
| "loss": 0.7973, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.025773154875324, |
| "learning_rate": 9.946313396041334e-06, |
| "loss": 0.7025, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.150720688365092, |
| "learning_rate": 9.944855293793477e-06, |
| "loss": 0.6128, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.161613123811631, |
| "learning_rate": 9.943377765041385e-06, |
| "loss": 0.6306, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.593842849691437, |
| "learning_rate": 9.941880815589726e-06, |
| "loss": 0.5894, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.504320662466919, |
| "learning_rate": 9.94036445131946e-06, |
| "loss": 0.772, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.1848467219695498, |
| "learning_rate": 9.938828678187816e-06, |
| "loss": 0.6397, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.27661938480736, |
| "learning_rate": 9.937273502228283e-06, |
| "loss": 0.6975, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.3673651764999573, |
| "learning_rate": 9.935698929550565e-06, |
| "loss": 0.7621, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.26639915751491, |
| "learning_rate": 9.934104966340582e-06, |
| "loss": 0.6551, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.3961035912104927, |
| "learning_rate": 9.932491618860419e-06, |
| "loss": 0.7304, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.2515245018919505, |
| "learning_rate": 9.93085889344832e-06, |
| "loss": 0.6655, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.254087375166334, |
| "learning_rate": 9.929206796518663e-06, |
| "loss": 0.666, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.31262707571602, |
| "learning_rate": 9.927535334561922e-06, |
| "loss": 0.7362, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.443869927919863, |
| "learning_rate": 9.925844514144651e-06, |
| "loss": 0.6805, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.2594952238504904, |
| "learning_rate": 9.924134341909459e-06, |
| "loss": 0.6936, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.3645395585503013, |
| "learning_rate": 9.922404824574976e-06, |
| "loss": 0.6318, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.952498408951814, |
| "learning_rate": 9.920655968935839e-06, |
| "loss": 0.6884, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.9398379720377739, |
| "learning_rate": 9.91888778186265e-06, |
| "loss": 0.5678, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.213422596923249, |
| "learning_rate": 9.917100270301963e-06, |
| "loss": 0.6868, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.2025927290666307, |
| "learning_rate": 9.915293441276246e-06, |
| "loss": 0.7192, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.80775911051946, |
| "learning_rate": 9.913467301883863e-06, |
| "loss": 0.784, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.0925302922228175, |
| "learning_rate": 9.91162185929904e-06, |
| "loss": 0.7198, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.334842810434387, |
| "learning_rate": 9.909757120771835e-06, |
| "loss": 0.7402, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.3908201091336805, |
| "learning_rate": 9.907873093628115e-06, |
| "loss": 0.6787, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.0776209566634316, |
| "learning_rate": 9.905969785269527e-06, |
| "loss": 0.6842, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.997004958324354, |
| "learning_rate": 9.904047203173462e-06, |
| "loss": 0.5733, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.3139630172641508, |
| "learning_rate": 9.90210535489303e-06, |
| "loss": 0.6647, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.9690194811261275, |
| "learning_rate": 9.90014424805704e-06, |
| "loss": 0.7311, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.3995016349539617, |
| "learning_rate": 9.898163890369948e-06, |
| "loss": 0.7473, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.1433446483892324, |
| "learning_rate": 9.896164289611849e-06, |
| "loss": 0.7016, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.190153295128278, |
| "learning_rate": 9.894145453638433e-06, |
| "loss": 0.67, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.4110621078855394, |
| "learning_rate": 9.892107390380959e-06, |
| "loss": 0.6655, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.2675629481717667, |
| "learning_rate": 9.890050107846219e-06, |
| "loss": 0.6459, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.274763942094237, |
| "learning_rate": 9.887973614116517e-06, |
| "loss": 0.6077, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.4805064767185314, |
| "learning_rate": 9.885877917349626e-06, |
| "loss": 0.7247, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.002019535180498, |
| "learning_rate": 9.883763025778766e-06, |
| "loss": 0.5373, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.337028939246668, |
| "learning_rate": 9.881628947712556e-06, |
| "loss": 0.7776, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.1479302094948247, |
| "learning_rate": 9.879475691535e-06, |
| "loss": 0.6499, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.9593814752135854, |
| "learning_rate": 9.87730326570545e-06, |
| "loss": 0.5575, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.3120091764398714, |
| "learning_rate": 9.875111678758553e-06, |
| "loss": 0.8117, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.58203181614075, |
| "learning_rate": 9.872900939304246e-06, |
| "loss": 0.7774, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.1359404314304173, |
| "learning_rate": 9.870671056027705e-06, |
| "loss": 0.7738, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.0558218346618484, |
| "learning_rate": 9.868422037689316e-06, |
| "loss": 0.6216, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.243529236580866, |
| "learning_rate": 9.866153893124638e-06, |
| "loss": 0.6684, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.1145868718408316, |
| "learning_rate": 9.863866631244371e-06, |
| "loss": 0.6847, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.9024551152535656, |
| "learning_rate": 9.861560261034319e-06, |
| "loss": 0.5933, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.1434847962939396, |
| "learning_rate": 9.859234791555356e-06, |
| "loss": 0.6503, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.1711654094618713, |
| "learning_rate": 9.856890231943389e-06, |
| "loss": 0.6768, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.4118520851923524, |
| "learning_rate": 9.854526591409325e-06, |
| "loss": 0.7787, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.0666041021999977, |
| "learning_rate": 9.852143879239032e-06, |
| "loss": 0.6304, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.0942561413873806, |
| "learning_rate": 9.849742104793303e-06, |
| "loss": 0.659, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.9464606223168837, |
| "learning_rate": 9.847321277507821e-06, |
| "loss": 0.5119, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.0838258311462443, |
| "learning_rate": 9.844881406893118e-06, |
| "loss": 0.6413, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.2413046501319136, |
| "learning_rate": 9.842422502534542e-06, |
| "loss": 0.6781, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.0434533347433392, |
| "learning_rate": 9.839944574092215e-06, |
| "loss": 0.7173, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.0756948950274956, |
| "learning_rate": 9.837447631301003e-06, |
| "loss": 0.691, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.096621527142799, |
| "learning_rate": 9.834931683970468e-06, |
| "loss": 0.6164, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.325512127795748, |
| "learning_rate": 9.832396741984834e-06, |
| "loss": 0.7617, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.1101717813557723, |
| "learning_rate": 9.829842815302951e-06, |
| "loss": 0.5708, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.1629292221166567, |
| "learning_rate": 9.827269913958247e-06, |
| "loss": 0.7347, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.3197439288304906, |
| "learning_rate": 9.8246780480587e-06, |
| "loss": 0.7264, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.1964017610148843, |
| "learning_rate": 9.822067227786794e-06, |
| "loss": 0.759, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.272214121168636, |
| "learning_rate": 9.819437463399468e-06, |
| "loss": 0.6904, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.3994827721770684, |
| "learning_rate": 9.816788765228095e-06, |
| "loss": 0.7399, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.111638499108041, |
| "learning_rate": 9.81412114367843e-06, |
| "loss": 0.7612, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.9840878981791865, |
| "learning_rate": 9.81143460923057e-06, |
| "loss": 0.5875, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.1652359918123167, |
| "learning_rate": 9.808729172438909e-06, |
| "loss": 0.724, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.9395882749114965, |
| "learning_rate": 9.80600484393211e-06, |
| "loss": 0.5528, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.1604646292422527, |
| "learning_rate": 9.803261634413049e-06, |
| "loss": 0.5707, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.016941878380451, |
| "learning_rate": 9.80049955465878e-06, |
| "loss": 0.6385, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.532345965216657, |
| "learning_rate": 9.797718615520488e-06, |
| "loss": 0.6938, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.2511667943525038, |
| "learning_rate": 9.794918827923458e-06, |
| "loss": 0.753, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.1447413054072517, |
| "learning_rate": 9.792100202867014e-06, |
| "loss": 0.6697, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.189313732305628, |
| "learning_rate": 9.78926275142449e-06, |
| "loss": 0.616, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.268919346618177, |
| "learning_rate": 9.786406484743183e-06, |
| "loss": 0.7412, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.2333800757193276, |
| "learning_rate": 9.783531414044304e-06, |
| "loss": 0.6923, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.055136095603475, |
| "learning_rate": 9.780637550622943e-06, |
| "loss": 0.7031, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.226669249666592, |
| "learning_rate": 9.777724905848013e-06, |
| "loss": 0.7531, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.232153739326395, |
| "learning_rate": 9.774793491162221e-06, |
| "loss": 0.6133, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.264407351963802, |
| "learning_rate": 9.771843318082008e-06, |
| "loss": 0.5952, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.294784666626483, |
| "learning_rate": 9.76887439819751e-06, |
| "loss": 0.8448, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.1307772179440936, |
| "learning_rate": 9.765886743172512e-06, |
| "loss": 0.5948, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.83961485752976, |
| "learning_rate": 9.762880364744404e-06, |
| "loss": 0.5447, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.031812871238954, |
| "learning_rate": 9.759855274724137e-06, |
| "loss": 0.6538, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.0040113831634745, |
| "learning_rate": 9.756811484996162e-06, |
| "loss": 0.6421, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.7984932798626254, |
| "learning_rate": 9.753749007518407e-06, |
| "loss": 0.5157, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.0477874542114916, |
| "learning_rate": 9.750667854322207e-06, |
| "loss": 0.6199, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.206751428653727, |
| "learning_rate": 9.747568037512274e-06, |
| "loss": 0.6161, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.3285004622852767, |
| "learning_rate": 9.744449569266637e-06, |
| "loss": 0.7607, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.1011118330445475, |
| "learning_rate": 9.741312461836606e-06, |
| "loss": 0.666, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.397695659444179, |
| "learning_rate": 9.738156727546711e-06, |
| "loss": 0.7105, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.91331183170689, |
| "learning_rate": 9.734982378794662e-06, |
| "loss": 0.619, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.1362268650914125, |
| "learning_rate": 9.731789428051302e-06, |
| "loss": 0.7317, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.04421825962035, |
| "learning_rate": 9.72857788786055e-06, |
| "loss": 0.6309, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.1550284488031473, |
| "learning_rate": 9.725347770839356e-06, |
| "loss": 0.6768, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.049439369305375, |
| "learning_rate": 9.722099089677655e-06, |
| "loss": 0.6423, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.122940983855365, |
| "learning_rate": 9.718831857138308e-06, |
| "loss": 0.6345, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.274655471484878, |
| "learning_rate": 9.715546086057066e-06, |
| "loss": 0.5792, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.8385884175738376, |
| "learning_rate": 9.712241789342504e-06, |
| "loss": 0.656, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.2460907183322933, |
| "learning_rate": 9.708918979975982e-06, |
| "loss": 0.6417, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.9323218121201529, |
| "learning_rate": 9.705577671011579e-06, |
| "loss": 0.6371, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.0382284797399293, |
| "learning_rate": 9.702217875576067e-06, |
| "loss": 0.591, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.0698398210796567, |
| "learning_rate": 9.698839606868835e-06, |
| "loss": 0.5794, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.9440879438361034, |
| "learning_rate": 9.69544287816185e-06, |
| "loss": 0.6745, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.9900929740524849, |
| "learning_rate": 9.6920277027996e-06, |
| "loss": 0.6757, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.1940921838343446, |
| "learning_rate": 9.688594094199043e-06, |
| "loss": 0.6472, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.1958440427756636, |
| "learning_rate": 9.685142065849556e-06, |
| "loss": 0.7342, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.165725186559193, |
| "learning_rate": 9.681671631312876e-06, |
| "loss": 0.6485, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.1894781279792443, |
| "learning_rate": 9.67818280422306e-06, |
| "loss": 0.6896, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.195740186965468, |
| "learning_rate": 9.674675598286414e-06, |
| "loss": 0.6974, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.2452322721170668, |
| "learning_rate": 9.671150027281449e-06, |
| "loss": 0.6163, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.326336053478045, |
| "learning_rate": 9.667606105058828e-06, |
| "loss": 0.6448, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.0032814883659036, |
| "learning_rate": 9.66404384554131e-06, |
| "loss": 0.619, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.932037123804567, |
| "learning_rate": 9.660463262723691e-06, |
| "loss": 0.5897, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.340804976271579, |
| "learning_rate": 9.656864370672757e-06, |
| "loss": 0.8023, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.1022457172205327, |
| "learning_rate": 9.653247183527216e-06, |
| "loss": 0.7218, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.8423012822099027, |
| "learning_rate": 9.649611715497662e-06, |
| "loss": 0.6005, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.8546169042416565, |
| "learning_rate": 9.645957980866499e-06, |
| "loss": 0.5888, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.9846561311341997, |
| "learning_rate": 9.642285993987895e-06, |
| "loss": 0.5579, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.239777371231001, |
| "learning_rate": 9.63859576928773e-06, |
| "loss": 0.755, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.0594775910705083, |
| "learning_rate": 9.634887321263525e-06, |
| "loss": 0.6442, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.176249216011953, |
| "learning_rate": 9.631160664484398e-06, |
| "loss": 0.8016, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.085520659410642, |
| "learning_rate": 9.627415813591007e-06, |
| "loss": 0.6368, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.021294737304931, |
| "learning_rate": 9.623652783295483e-06, |
| "loss": 0.5614, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.231835038374354, |
| "learning_rate": 9.619871588381376e-06, |
| "loss": 0.7216, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.910271586943225, |
| "learning_rate": 9.616072243703598e-06, |
| "loss": 0.5791, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.8003560124729412, |
| "learning_rate": 9.612254764188368e-06, |
| "loss": 0.5448, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.2007549176054404, |
| "learning_rate": 9.608419164833152e-06, |
| "loss": 0.7257, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.0571622186487044, |
| "learning_rate": 9.604565460706592e-06, |
| "loss": 0.6335, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 2.2497466958972154, |
| "learning_rate": 9.60069366694847e-06, |
| "loss": 0.6597, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.9612178789599213, |
| "learning_rate": 9.596803798769626e-06, |
| "loss": 0.7287, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.1104419163141115, |
| "learning_rate": 9.592895871451908e-06, |
| "loss": 0.6671, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.0822889988204305, |
| "learning_rate": 9.58896990034812e-06, |
| "loss": 0.7013, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.248918383189871, |
| "learning_rate": 9.585025900881944e-06, |
| "loss": 0.7042, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.0495739015390857, |
| "learning_rate": 9.581063888547895e-06, |
| "loss": 0.6913, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.759437262151785, |
| "learning_rate": 9.57708387891125e-06, |
| "loss": 0.5709, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.420770662182739, |
| "learning_rate": 9.573085887607991e-06, |
| "loss": 0.6814, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.130894942110269, |
| "learning_rate": 9.569069930344746e-06, |
| "loss": 0.6187, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.9591579536191646, |
| "learning_rate": 9.565036022898723e-06, |
| "loss": 0.5882, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.8084671651408435, |
| "learning_rate": 9.56098418111765e-06, |
| "loss": 0.6313, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.1526443073933086, |
| "learning_rate": 9.556914420919711e-06, |
| "loss": 0.6102, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.194691833732068, |
| "learning_rate": 9.552826758293487e-06, |
| "loss": 0.6878, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.501846785947928, |
| "learning_rate": 9.548721209297889e-06, |
| "loss": 0.6596, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.0669308931128123, |
| "learning_rate": 9.544597790062098e-06, |
| "loss": 0.6224, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.2681106555575052, |
| "learning_rate": 9.5404565167855e-06, |
| "loss": 0.6786, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.0576613963336445, |
| "learning_rate": 9.536297405737624e-06, |
| "loss": 0.5946, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.0426855396207264, |
| "learning_rate": 9.532120473258075e-06, |
| "loss": 0.641, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.1061182708873973, |
| "learning_rate": 9.527925735756473e-06, |
| "loss": 0.6379, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.6795883204795699, |
| "learning_rate": 9.52371320971239e-06, |
| "loss": 0.5449, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.9653754600396853, |
| "learning_rate": 9.519482911675278e-06, |
| "loss": 0.6875, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.1944241074929534, |
| "learning_rate": 9.51523485826441e-06, |
| "loss": 0.6387, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.2621502173726418, |
| "learning_rate": 9.510969066168814e-06, |
| "loss": 0.7567, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.0713889312949623, |
| "learning_rate": 9.506685552147208e-06, |
| "loss": 0.8003, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.161082236049816, |
| "learning_rate": 9.502384333027929e-06, |
| "loss": 0.7317, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.099935757711094, |
| "learning_rate": 9.498065425708878e-06, |
| "loss": 0.6365, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.0269709971121768, |
| "learning_rate": 9.493728847157436e-06, |
| "loss": 0.6275, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.9361261922037705, |
| "learning_rate": 9.489374614410413e-06, |
| "loss": 0.6505, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.276393660154319, |
| "learning_rate": 9.485002744573982e-06, |
| "loss": 0.7315, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.9425800587711943, |
| "learning_rate": 9.480613254823595e-06, |
| "loss": 0.6143, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.2697653664784534, |
| "learning_rate": 9.476206162403933e-06, |
| "loss": 0.6727, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 2.1842308672307063, |
| "learning_rate": 9.471781484628828e-06, |
| "loss": 0.6416, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.9855040976893727, |
| "learning_rate": 9.467339238881199e-06, |
| "loss": 0.6107, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.9633251460753256, |
| "learning_rate": 9.462879442612984e-06, |
| "loss": 0.5977, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.0847506096615094, |
| "learning_rate": 9.458402113345071e-06, |
| "loss": 0.5964, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.071971955099866, |
| "learning_rate": 9.453907268667226e-06, |
| "loss": 0.6926, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.1433436130539074, |
| "learning_rate": 9.44939492623803e-06, |
| "loss": 0.5979, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.262092646829491, |
| "learning_rate": 9.444865103784803e-06, |
| "loss": 0.6555, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.1581337027107192, |
| "learning_rate": 9.440317819103542e-06, |
| "loss": 0.7022, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.207365600854885, |
| "learning_rate": 9.435753090058839e-06, |
| "loss": 0.6298, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.9716493031373659, |
| "learning_rate": 9.431170934583826e-06, |
| "loss": 0.6057, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.8605748771934563, |
| "learning_rate": 9.426571370680094e-06, |
| "loss": 0.6488, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.101750848753291, |
| "learning_rate": 9.421954416417624e-06, |
| "loss": 0.6334, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.189471586472517, |
| "learning_rate": 9.417320089934721e-06, |
| "loss": 0.6478, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.8693406953522982, |
| "learning_rate": 9.412668409437934e-06, |
| "loss": 0.5423, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.1604549448326207, |
| "learning_rate": 9.407999393201992e-06, |
| "loss": 0.6778, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.260164616585325, |
| "learning_rate": 9.403313059569729e-06, |
| "loss": 0.7631, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.0264565243677652, |
| "learning_rate": 9.398609426952019e-06, |
| "loss": 0.6039, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.0077711587490987, |
| "learning_rate": 9.393888513827686e-06, |
| "loss": 0.6003, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 2.0401684479993563, |
| "learning_rate": 9.389150338743451e-06, |
| "loss": 0.6232, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.0592434888026467, |
| "learning_rate": 9.384394920313847e-06, |
| "loss": 0.692, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.127639974580799, |
| "learning_rate": 9.379622277221152e-06, |
| "loss": 0.6403, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.1637948423090596, |
| "learning_rate": 9.37483242821531e-06, |
| "loss": 0.7911, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.96655742278293, |
| "learning_rate": 9.370025392113866e-06, |
| "loss": 0.6817, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.4075353559357375, |
| "learning_rate": 9.365201187801884e-06, |
| "loss": 0.7468, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.170026110189212, |
| "learning_rate": 9.360359834231873e-06, |
| "loss": 0.7148, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.0723680052005378, |
| "learning_rate": 9.355501350423717e-06, |
| "loss": 0.6234, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.111461085654852, |
| "learning_rate": 9.3506257554646e-06, |
| "loss": 0.6659, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.2496008204864104, |
| "learning_rate": 9.345733068508929e-06, |
| "loss": 0.7033, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.9996970862014591, |
| "learning_rate": 9.340823308778255e-06, |
| "loss": 0.7255, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.8792750115155255, |
| "learning_rate": 9.335896495561207e-06, |
| "loss": 0.6429, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.12929524638482, |
| "learning_rate": 9.33095264821341e-06, |
| "loss": 0.6596, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.9025785100638457, |
| "learning_rate": 9.325991786157405e-06, |
| "loss": 0.6464, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.9934226422368588, |
| "learning_rate": 9.321013928882583e-06, |
| "loss": 0.5929, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 2.1268235022475697, |
| "learning_rate": 9.3160190959451e-06, |
| "loss": 0.6511, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.9740290904745004, |
| "learning_rate": 9.311007306967805e-06, |
| "loss": 0.5765, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.0624735759975823, |
| "learning_rate": 9.305978581640157e-06, |
| "loss": 0.7006, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.2850117779663144, |
| "learning_rate": 9.300932939718159e-06, |
| "loss": 0.6555, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.9373893589189668, |
| "learning_rate": 9.295870401024266e-06, |
| "loss": 0.6105, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.9939827504480299, |
| "learning_rate": 9.290790985447316e-06, |
| "loss": 0.574, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.518967684022752, |
| "learning_rate": 9.285694712942453e-06, |
| "loss": 0.737, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.061941526906131, |
| "learning_rate": 9.28058160353104e-06, |
| "loss": 0.6289, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.046986924521927, |
| "learning_rate": 9.275451677300591e-06, |
| "loss": 0.6026, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.9643917949664476, |
| "learning_rate": 9.270304954404688e-06, |
| "loss": 0.6726, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.030653402715337, |
| "learning_rate": 9.265141455062894e-06, |
| "loss": 0.6522, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.083183062824829, |
| "learning_rate": 9.259961199560686e-06, |
| "loss": 0.6681, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.0946830011733955, |
| "learning_rate": 9.254764208249369e-06, |
| "loss": 0.7092, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.1225126781820283, |
| "learning_rate": 9.249550501545998e-06, |
| "loss": 0.67, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.026753617785709, |
| "learning_rate": 9.244320099933291e-06, |
| "loss": 0.6578, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.3515094288361125, |
| "learning_rate": 9.239073023959562e-06, |
| "loss": 0.7187, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.1066900321641655, |
| "learning_rate": 9.233809294238625e-06, |
| "loss": 0.709, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.2215204725890416, |
| "learning_rate": 9.228528931449724e-06, |
| "loss": 0.7507, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.2519794194499068, |
| "learning_rate": 9.22323195633745e-06, |
| "loss": 0.804, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.1625190316209792, |
| "learning_rate": 9.217918389711652e-06, |
| "loss": 0.7088, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.095704698093118, |
| "learning_rate": 9.21258825244737e-06, |
| "loss": 0.6989, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.330487081066773, |
| "learning_rate": 9.207241565484733e-06, |
| "loss": 0.7033, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.132239589678436, |
| "learning_rate": 9.201878349828897e-06, |
| "loss": 0.6656, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.158342662695929, |
| "learning_rate": 9.196498626549944e-06, |
| "loss": 0.5794, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.9571490668467135, |
| "learning_rate": 9.191102416782819e-06, |
| "loss": 0.5614, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.0828460881254154, |
| "learning_rate": 9.185689741727229e-06, |
| "loss": 0.7618, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.945496988662335, |
| "learning_rate": 9.180260622647565e-06, |
| "loss": 0.6134, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.1345490610587006, |
| "learning_rate": 9.174815080872829e-06, |
| "loss": 0.6491, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.888910241101656, |
| "learning_rate": 9.169353137796533e-06, |
| "loss": 0.5433, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.2231218683381346, |
| "learning_rate": 9.163874814876632e-06, |
| "loss": 0.6674, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.8397128888601602, |
| "learning_rate": 9.158380133635425e-06, |
| "loss": 0.5104, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.8435017185456046, |
| "learning_rate": 9.152869115659474e-06, |
| "loss": 0.5708, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.004371953603906, |
| "learning_rate": 9.147341782599534e-06, |
| "loss": 0.5923, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.1426206185002523, |
| "learning_rate": 9.141798156170447e-06, |
| "loss": 0.6067, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 2.0707774939518435, |
| "learning_rate": 9.136238258151063e-06, |
| "loss": 0.621, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.0756065658076808, |
| "learning_rate": 9.130662110384163e-06, |
| "loss": 0.609, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.864542155335991, |
| "learning_rate": 9.125069734776367e-06, |
| "loss": 0.5795, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.247405694299018, |
| "learning_rate": 9.119461153298045e-06, |
| "loss": 0.6788, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.0281028236357908, |
| "learning_rate": 9.113836387983239e-06, |
| "loss": 0.6667, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.1739992658132126, |
| "learning_rate": 9.108195460929563e-06, |
| "loss": 0.6559, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.844308015715884, |
| "learning_rate": 9.10253839429813e-06, |
| "loss": 0.5637, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.158849134009064, |
| "learning_rate": 9.096865210313461e-06, |
| "loss": 0.6977, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.9857083622278322, |
| "learning_rate": 9.091175931263395e-06, |
| "loss": 0.7014, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.083743100705083, |
| "learning_rate": 9.085470579498996e-06, |
| "loss": 0.6288, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.0848589757887304, |
| "learning_rate": 9.079749177434481e-06, |
| "loss": 0.5892, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.2211766443468073, |
| "learning_rate": 9.074011747547118e-06, |
| "loss": 0.6756, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.259415733177512, |
| "learning_rate": 9.068258312377143e-06, |
| "loss": 0.637, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.8541880063977976, |
| "learning_rate": 9.06248889452767e-06, |
| "loss": 0.5564, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.043397669872696, |
| "learning_rate": 9.056703516664606e-06, |
| "loss": 0.5995, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.9966425012080062, |
| "learning_rate": 9.050902201516555e-06, |
| "loss": 0.5602, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 2.097676942573622, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.669, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.0595811405443016, |
| "learning_rate": 9.039251850592892e-06, |
| "loss": 0.6529, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.0255229150761576, |
| "learning_rate": 9.033402860587187e-06, |
| "loss": 0.5948, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.0548212104417276, |
| "learning_rate": 9.027538024836143e-06, |
| "loss": 0.6584, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.2114936351325465, |
| "learning_rate": 9.021657366380521e-06, |
| "loss": 0.6837, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.9893474856689934, |
| "learning_rate": 9.015760908323253e-06, |
| "loss": 0.5977, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.9935862578665022, |
| "learning_rate": 9.009848673829337e-06, |
| "loss": 0.6574, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.8536984972638404, |
| "learning_rate": 9.00392068612575e-06, |
| "loss": 0.5571, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.07272622617217, |
| "learning_rate": 8.997976968501362e-06, |
| "loss": 0.6437, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.9669798106315952, |
| "learning_rate": 8.992017544306834e-06, |
| "loss": 0.6805, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.243741605970751, |
| "learning_rate": 8.986042436954538e-06, |
| "loss": 0.7328, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.011662513116711, |
| "learning_rate": 8.980051669918458e-06, |
| "loss": 0.6209, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.1937242214026007, |
| "learning_rate": 8.974045266734094e-06, |
| "loss": 0.7434, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.132031298132569, |
| "learning_rate": 8.96802325099838e-06, |
| "loss": 0.6832, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.7086865848142259, |
| "learning_rate": 8.961985646369587e-06, |
| "loss": 0.5608, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.9009692420702806, |
| "learning_rate": 8.955932476567224e-06, |
| "loss": 0.6121, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.9044767808035803, |
| "learning_rate": 8.949863765371952e-06, |
| "loss": 0.6172, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.1087095562200946, |
| "learning_rate": 8.943779536625489e-06, |
| "loss": 0.7064, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.17610448059507, |
| "learning_rate": 8.937679814230517e-06, |
| "loss": 0.6725, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.006215616453568, |
| "learning_rate": 8.931564622150583e-06, |
| "loss": 0.6987, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.9223795578578178, |
| "learning_rate": 8.925433984410012e-06, |
| "loss": 0.5192, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.8039652484819113, |
| "learning_rate": 8.919287925093808e-06, |
| "loss": 0.628, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.769522071377601, |
| "learning_rate": 8.913126468347561e-06, |
| "loss": 0.4867, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.290538985245612, |
| "learning_rate": 8.906949638377352e-06, |
| "loss": 0.6833, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.1791089656581764, |
| "learning_rate": 8.900757459449655e-06, |
| "loss": 0.76, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.0748417472498537, |
| "learning_rate": 8.894549955891247e-06, |
| "loss": 0.6931, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.007190815984241, |
| "learning_rate": 8.888327152089112e-06, |
| "loss": 0.6713, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.7418731560021379, |
| "learning_rate": 8.882089072490339e-06, |
| "loss": 0.5852, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.9279427627473156, |
| "learning_rate": 8.875835741602031e-06, |
| "loss": 0.5998, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.011804969137247, |
| "learning_rate": 8.869567183991208e-06, |
| "loss": 0.7047, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.0919216640489577, |
| "learning_rate": 8.86328342428471e-06, |
| "loss": 0.6773, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.1708903996053994, |
| "learning_rate": 8.856984487169102e-06, |
| "loss": 0.6511, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.0482302804600954, |
| "learning_rate": 8.85067039739057e-06, |
| "loss": 0.6458, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.2389233691566184, |
| "learning_rate": 8.84434117975484e-06, |
| "loss": 0.6042, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.3238794603179365, |
| "learning_rate": 8.837996859127056e-06, |
| "loss": 0.6536, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.9634398094275907, |
| "learning_rate": 8.831637460431708e-06, |
| "loss": 0.6009, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.1849384771988167, |
| "learning_rate": 8.825263008652513e-06, |
| "loss": 0.6747, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.9367137201020725, |
| "learning_rate": 8.818873528832334e-06, |
| "loss": 0.5679, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.9444930407351348, |
| "learning_rate": 8.812469046073069e-06, |
| "loss": 0.5809, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.0382647745048263, |
| "learning_rate": 8.806049585535554e-06, |
| "loss": 0.5664, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.1047083940033944, |
| "learning_rate": 8.799615172439475e-06, |
| "loss": 0.5677, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.8990495481992753, |
| "learning_rate": 8.793165832063254e-06, |
| "loss": 0.6238, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.096972722468596, |
| "learning_rate": 8.786701589743965e-06, |
| "loss": 0.6452, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.8640478732019463, |
| "learning_rate": 8.780222470877213e-06, |
| "loss": 0.5267, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.9247739069634147, |
| "learning_rate": 8.77372850091706e-06, |
| "loss": 0.6142, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.087695274157492, |
| "learning_rate": 8.76721970537591e-06, |
| "loss": 0.6652, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.850788744558352, |
| "learning_rate": 8.760696109824403e-06, |
| "loss": 0.5258, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.118016145296157, |
| "learning_rate": 8.754157739891332e-06, |
| "loss": 0.6427, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.1471953099208525, |
| "learning_rate": 8.74760462126353e-06, |
| "loss": 0.6361, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.9116075357657814, |
| "learning_rate": 8.741036779685771e-06, |
| "loss": 0.5885, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 2.186935678265862, |
| "learning_rate": 8.734454240960672e-06, |
| "loss": 0.7819, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.9329278314726581, |
| "learning_rate": 8.727857030948587e-06, |
| "loss": 0.6089, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.29559179529083, |
| "learning_rate": 8.721245175567513e-06, |
| "loss": 0.6536, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.949685346432584, |
| "learning_rate": 8.714618700792975e-06, |
| "loss": 0.588, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.1364335358069555, |
| "learning_rate": 8.707977632657942e-06, |
| "loss": 0.5693, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.1052630337837646, |
| "learning_rate": 8.701321997252707e-06, |
| "loss": 0.6618, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.9419341419413294, |
| "learning_rate": 8.694651820724796e-06, |
| "loss": 0.6432, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.1809423639777847, |
| "learning_rate": 8.687967129278863e-06, |
| "loss": 0.6786, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.048911015295105, |
| "learning_rate": 8.68126794917658e-06, |
| "loss": 0.6848, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.1090753469834076, |
| "learning_rate": 8.674554306736545e-06, |
| "loss": 0.6447, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.106438933355631, |
| "learning_rate": 8.667826228334173e-06, |
| "loss": 0.5551, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.0203947279705226, |
| "learning_rate": 8.66108374040159e-06, |
| "loss": 0.5717, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.0615093467875854, |
| "learning_rate": 8.654326869427533e-06, |
| "loss": 0.6311, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.1329704988537665, |
| "learning_rate": 8.647555641957243e-06, |
| "loss": 0.6243, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.8315214353591525, |
| "learning_rate": 8.640770084592367e-06, |
| "loss": 0.5547, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.210411556217951, |
| "learning_rate": 8.633970223990841e-06, |
| "loss": 0.6408, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 2.007561414582103, |
| "learning_rate": 8.627156086866804e-06, |
| "loss": 0.5894, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.939162087316279, |
| "learning_rate": 8.620327699990469e-06, |
| "loss": 0.5772, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.1700787302690094, |
| "learning_rate": 8.613485090188044e-06, |
| "loss": 0.6095, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.0168885983710703, |
| "learning_rate": 8.606628284341603e-06, |
| "loss": 0.6537, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.9420432087459054, |
| "learning_rate": 8.599757309388998e-06, |
| "loss": 0.5503, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.9005641654421328, |
| "learning_rate": 8.592872192323742e-06, |
| "loss": 0.5285, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.310866512162385, |
| "learning_rate": 8.58597296019491e-06, |
| "loss": 0.6925, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.0185104038237283, |
| "learning_rate": 8.57905964010703e-06, |
| "loss": 0.6208, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.227534027585251, |
| "learning_rate": 8.572132259219973e-06, |
| "loss": 0.6722, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.972521989095671, |
| "learning_rate": 8.565190844748852e-06, |
| "loss": 0.6204, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.192852585817164, |
| "learning_rate": 8.558235423963912e-06, |
| "loss": 0.6615, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.1499672574920883, |
| "learning_rate": 8.551266024190425e-06, |
| "loss": 0.5939, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.0344036721852303, |
| "learning_rate": 8.54428267280858e-06, |
| "loss": 0.609, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.100328047808317, |
| "learning_rate": 8.537285397253378e-06, |
| "loss": 0.6728, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.9522804167083359, |
| "learning_rate": 8.53027422501452e-06, |
| "loss": 0.5963, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 2.0276370479958663, |
| "learning_rate": 8.523249183636303e-06, |
| "loss": 0.6615, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.969628544118802, |
| "learning_rate": 8.516210300717519e-06, |
| "loss": 0.6111, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.029052699494888, |
| "learning_rate": 8.50915760391132e-06, |
| "loss": 0.6396, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.8921377970210058, |
| "learning_rate": 8.502091120925147e-06, |
| "loss": 0.6135, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.905825397098304, |
| "learning_rate": 8.49501087952059e-06, |
| "loss": 0.6531, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.062356135135231, |
| "learning_rate": 8.487916907513291e-06, |
| "loss": 0.6511, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.0500672806486047, |
| "learning_rate": 8.480809232772845e-06, |
| "loss": 0.6973, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.9780923474909595, |
| "learning_rate": 8.473687883222665e-06, |
| "loss": 0.5567, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.001802531470044, |
| "learning_rate": 8.4665528868399e-06, |
| "loss": 0.6096, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.0486427239843343, |
| "learning_rate": 8.459404271655304e-06, |
| "loss": 0.7061, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.1064266393636113, |
| "learning_rate": 8.452242065753138e-06, |
| "loss": 0.6797, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.3915047992203, |
| "learning_rate": 8.445066297271055e-06, |
| "loss": 0.6238, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.0029093719066053, |
| "learning_rate": 8.437876994399992e-06, |
| "loss": 0.5708, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.0251422666051178, |
| "learning_rate": 8.430674185384054e-06, |
| "loss": 0.6305, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 2.3215947492777222, |
| "learning_rate": 8.423457898520411e-06, |
| "loss": 0.6077, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.9799905222032952, |
| "learning_rate": 8.416228162159178e-06, |
| "loss": 0.5937, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.9919779229157657, |
| "learning_rate": 8.408985004703312e-06, |
| "loss": 0.6588, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.8545854124573158, |
| "learning_rate": 8.401728454608495e-06, |
| "loss": 0.5624, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.9951991842396126, |
| "learning_rate": 8.394458540383021e-06, |
| "loss": 0.6586, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.9832046641551582, |
| "learning_rate": 8.387175290587692e-06, |
| "loss": 0.6178, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.1705961273936456, |
| "learning_rate": 8.379878733835697e-06, |
| "loss": 0.6783, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.0865192960586323, |
| "learning_rate": 8.372568898792504e-06, |
| "loss": 0.6141, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.965297643743764, |
| "learning_rate": 8.365245814175744e-06, |
| "loss": 0.5656, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.994816838265779, |
| "learning_rate": 8.357909508755106e-06, |
| "loss": 0.5594, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.081917736412011, |
| "learning_rate": 8.350560011352217e-06, |
| "loss": 0.6753, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.9190310357166047, |
| "learning_rate": 8.343197350840525e-06, |
| "loss": 0.5778, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.9990825253769382, |
| "learning_rate": 8.335821556145196e-06, |
| "loss": 0.5679, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.8601731215327446, |
| "learning_rate": 8.328432656242998e-06, |
| "loss": 0.5376, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.8011984252968534, |
| "learning_rate": 8.321030680162177e-06, |
| "loss": 0.5679, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.2985243811453637, |
| "learning_rate": 8.313615656982354e-06, |
| "loss": 0.6887, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.0168932090236624, |
| "learning_rate": 8.306187615834411e-06, |
| "loss": 0.6523, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.007847760050486, |
| "learning_rate": 8.298746585900367e-06, |
| "loss": 0.6079, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.1580333815191914, |
| "learning_rate": 8.291292596413272e-06, |
| "loss": 0.7007, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.0148089585758857, |
| "learning_rate": 8.28382567665709e-06, |
| "loss": 0.6778, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 2.0624654834089697, |
| "learning_rate": 8.276345855966579e-06, |
| "loss": 0.618, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.9930903577238281, |
| "learning_rate": 8.268853163727184e-06, |
| "loss": 0.6011, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.9326313111875104, |
| "learning_rate": 8.26134762937492e-06, |
| "loss": 0.5755, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.1052378837219283, |
| "learning_rate": 8.253829282396246e-06, |
| "loss": 0.576, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.9490986209974357, |
| "learning_rate": 8.246298152327965e-06, |
| "loss": 0.4944, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.0980391720214002, |
| "learning_rate": 8.238754268757092e-06, |
| "loss": 0.7186, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.881768667514335, |
| "learning_rate": 8.231197661320755e-06, |
| "loss": 0.6097, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.9646616396951349, |
| "learning_rate": 8.223628359706063e-06, |
| "loss": 0.6717, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.9845900062528004, |
| "learning_rate": 8.216046393649997e-06, |
| "loss": 0.5794, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.975691200881602, |
| "learning_rate": 8.20845179293929e-06, |
| "loss": 0.6777, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.9790588163074925, |
| "learning_rate": 8.20084458741032e-06, |
| "loss": 0.5762, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.033854053229917, |
| "learning_rate": 8.193224806948975e-06, |
| "loss": 0.6425, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.0564703779289855, |
| "learning_rate": 8.185592481490549e-06, |
| "loss": 0.5421, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.9024282460009037, |
| "learning_rate": 8.177947641019622e-06, |
| "loss": 0.5416, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.8428880273743034, |
| "learning_rate": 8.170290315569937e-06, |
| "loss": 0.5476, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 2.0314873709790517, |
| "learning_rate": 8.16262053522429e-06, |
| "loss": 0.6254, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.959747747554248, |
| "learning_rate": 8.154938330114407e-06, |
| "loss": 0.6715, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.9605352675210954, |
| "learning_rate": 8.147243730420827e-06, |
| "loss": 0.5389, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.9808533481893225, |
| "learning_rate": 8.139536766372775e-06, |
| "loss": 0.5917, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.124751570239496, |
| "learning_rate": 8.131817468248064e-06, |
| "loss": 0.646, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.9453603552598644, |
| "learning_rate": 8.124085866372952e-06, |
| "loss": 0.6475, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.284493964086694, |
| "learning_rate": 8.116341991122038e-06, |
| "loss": 0.657, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.173487845748996, |
| "learning_rate": 8.108585872918142e-06, |
| "loss": 0.6072, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.9740790341680636, |
| "learning_rate": 8.100817542232175e-06, |
| "loss": 0.6192, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.9882407145838754, |
| "learning_rate": 8.09303702958303e-06, |
| "loss": 0.7174, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.765767752810985, |
| "learning_rate": 8.085244365537459e-06, |
| "loss": 0.5659, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.0919873787965018, |
| "learning_rate": 8.077439580709954e-06, |
| "loss": 0.7014, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.0909317709072597, |
| "learning_rate": 8.069622705762619e-06, |
| "loss": 0.6553, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.0985013077972163, |
| "learning_rate": 8.06179377140506e-06, |
| "loss": 0.5996, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.875167456622752, |
| "learning_rate": 8.05395280839426e-06, |
| "loss": 0.4977, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.8642775987752205, |
| "learning_rate": 8.046099847534458e-06, |
| "loss": 0.516, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.8047762854038711, |
| "learning_rate": 8.038234919677029e-06, |
| "loss": 0.5456, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 2.054027103241828, |
| "learning_rate": 8.030358055720355e-06, |
| "loss": 0.6449, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.88938224837625, |
| "learning_rate": 8.02246928660972e-06, |
| "loss": 0.5853, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.9949053145025524, |
| "learning_rate": 8.014568643337175e-06, |
| "loss": 0.6374, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.181017234415942, |
| "learning_rate": 8.006656156941418e-06, |
| "loss": 0.6383, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.1231246586306325, |
| "learning_rate": 7.998731858507675e-06, |
| "loss": 0.6517, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.014704090337969, |
| "learning_rate": 7.990795779167584e-06, |
| "loss": 0.6203, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.1690171263618785, |
| "learning_rate": 7.982847950099055e-06, |
| "loss": 0.7135, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.8051904968061352, |
| "learning_rate": 7.974888402526166e-06, |
| "loss": 0.5658, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.0602443463430555, |
| "learning_rate": 7.966917167719029e-06, |
| "loss": 0.6524, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.8973152827677298, |
| "learning_rate": 7.95893427699367e-06, |
| "loss": 0.5758, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.019571292211095, |
| "learning_rate": 7.950939761711915e-06, |
| "loss": 0.6241, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.8785901266881793, |
| "learning_rate": 7.942933653281245e-06, |
| "loss": 0.5769, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.166266745685418, |
| "learning_rate": 7.934915983154698e-06, |
| "loss": 0.6663, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.123451242286571, |
| "learning_rate": 7.92688678283073e-06, |
| "loss": 0.6527, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.9075717966913297, |
| "learning_rate": 7.918846083853089e-06, |
| "loss": 0.6569, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.9000996169691746, |
| "learning_rate": 7.910793917810707e-06, |
| "loss": 0.6385, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.927434736070484, |
| "learning_rate": 7.902730316337556e-06, |
| "loss": 0.5631, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.9790989754571544, |
| "learning_rate": 7.894655311112545e-06, |
| "loss": 0.6068, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.1188146600936535, |
| "learning_rate": 7.886568933859372e-06, |
| "loss": 0.696, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.9800997991892215, |
| "learning_rate": 7.878471216346418e-06, |
| "loss": 0.6283, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.8749813668837976, |
| "learning_rate": 7.870362190386616e-06, |
| "loss": 0.5925, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.030181629804673, |
| "learning_rate": 7.862241887837322e-06, |
| "loss": 0.5838, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.191116164536583, |
| "learning_rate": 7.854110340600199e-06, |
| "loss": 0.6621, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.1322352729861747, |
| "learning_rate": 7.845967580621082e-06, |
| "loss": 0.7296, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.9503331877159438, |
| "learning_rate": 7.837813639889858e-06, |
| "loss": 0.6131, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.0706877886034802, |
| "learning_rate": 7.829648550440337e-06, |
| "loss": 0.6048, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.0116116138534617, |
| "learning_rate": 7.821472344350131e-06, |
| "loss": 0.6343, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.9969976539512104, |
| "learning_rate": 7.813285053740526e-06, |
| "loss": 0.6453, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.1284306784258638, |
| "learning_rate": 7.805086710776353e-06, |
| "loss": 0.6498, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.9199881561880785, |
| "learning_rate": 7.796877347665861e-06, |
| "loss": 0.5469, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.190799717584273, |
| "learning_rate": 7.788656996660596e-06, |
| "loss": 0.6443, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.0667507128163525, |
| "learning_rate": 7.780425690055275e-06, |
| "loss": 0.6689, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 2.0614204138949077, |
| "learning_rate": 7.772183460187647e-06, |
| "loss": 0.7005, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.8873750448102828, |
| "learning_rate": 7.763930339438383e-06, |
| "loss": 0.54, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.8982868706163196, |
| "learning_rate": 7.755666360230933e-06, |
| "loss": 0.6, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.9491417935122528, |
| "learning_rate": 7.747391555031414e-06, |
| "loss": 0.5981, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.082993477568864, |
| "learning_rate": 7.739105956348465e-06, |
| "loss": 0.6724, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.9771665196367632, |
| "learning_rate": 7.730809596733136e-06, |
| "loss": 0.6199, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.9910315139065318, |
| "learning_rate": 7.722502508778747e-06, |
| "loss": 0.6237, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.8173451465224066, |
| "learning_rate": 7.71418472512077e-06, |
| "loss": 0.5711, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.017970082027841, |
| "learning_rate": 7.705856278436696e-06, |
| "loss": 0.568, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.756994616816012, |
| "learning_rate": 7.697517201445906e-06, |
| "loss": 0.4771, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.783316776618109, |
| "learning_rate": 7.689167526909542e-06, |
| "loss": 0.5154, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.019720522663777, |
| "learning_rate": 7.680807287630383e-06, |
| "loss": 0.6041, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.0783908508210622, |
| "learning_rate": 7.67243651645271e-06, |
| "loss": 0.5921, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.8528382833978114, |
| "learning_rate": 7.664055246262183e-06, |
| "loss": 0.5604, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.117910334131364, |
| "learning_rate": 7.655663509985707e-06, |
| "loss": 0.6059, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.9758606107637775, |
| "learning_rate": 7.647261340591303e-06, |
| "loss": 0.6412, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.9290348907447834, |
| "learning_rate": 7.638848771087982e-06, |
| "loss": 0.5705, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.8960214594144043, |
| "learning_rate": 7.63042583452561e-06, |
| "loss": 0.6163, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.7445681487714644, |
| "learning_rate": 7.621992563994789e-06, |
| "loss": 0.5722, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.9279287791801931, |
| "learning_rate": 7.613548992626711e-06, |
| "loss": 0.5845, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.8792256339894968, |
| "learning_rate": 7.605095153593038e-06, |
| "loss": 0.5947, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.9849662044668719, |
| "learning_rate": 7.596631080105774e-06, |
| "loss": 0.6454, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.1721543557468643, |
| "learning_rate": 7.588156805417126e-06, |
| "loss": 0.5729, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.0849048969435136, |
| "learning_rate": 7.5796723628193815e-06, |
| "loss": 0.5947, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.2968962836408324, |
| "learning_rate": 7.571177785644766e-06, |
| "loss": 0.6569, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.9267367648655322, |
| "learning_rate": 7.562673107265333e-06, |
| "loss": 0.5691, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.9013543447575418, |
| "learning_rate": 7.554158361092807e-06, |
| "loss": 0.5434, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.82007897551597, |
| "learning_rate": 7.545633580578474e-06, |
| "loss": 0.6298, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.04394739282291, |
| "learning_rate": 7.537098799213036e-06, |
| "loss": 0.622, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.075155956099819, |
| "learning_rate": 7.528554050526489e-06, |
| "loss": 0.6556, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.0315943098160236, |
| "learning_rate": 7.519999368087982e-06, |
| "loss": 0.6453, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.0428160155679786, |
| "learning_rate": 7.511434785505693e-06, |
| "loss": 0.7135, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.222904873771381, |
| "learning_rate": 7.502860336426696e-06, |
| "loss": 0.6357, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.7822608620060818, |
| "learning_rate": 7.494276054536821e-06, |
| "loss": 0.5291, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.0685923305897624, |
| "learning_rate": 7.485681973560532e-06, |
| "loss": 0.5797, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.8859535667625311, |
| "learning_rate": 7.4770781272607895e-06, |
| "loss": 0.548, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.2063159373182093, |
| "learning_rate": 7.468464549438916e-06, |
| "loss": 0.5926, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.8937819274172978, |
| "learning_rate": 7.45984127393447e-06, |
| "loss": 0.6131, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.0362363463005506, |
| "learning_rate": 7.4512083346251026e-06, |
| "loss": 0.5862, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.2409987097836717, |
| "learning_rate": 7.442565765426436e-06, |
| "loss": 0.6329, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.984238720215282, |
| "learning_rate": 7.433913600291921e-06, |
| "loss": 0.6436, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.104431967032393, |
| "learning_rate": 7.425251873212709e-06, |
| "loss": 0.7334, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.1700383544690096, |
| "learning_rate": 7.416580618217515e-06, |
| "loss": 0.6976, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.2359253381623345, |
| "learning_rate": 7.407899869372489e-06, |
| "loss": 0.6529, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.207059006099314, |
| "learning_rate": 7.399209660781075e-06, |
| "loss": 0.6742, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.261704338275933, |
| "learning_rate": 7.390510026583884e-06, |
| "loss": 0.6153, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 2.0605566199360004, |
| "learning_rate": 7.381801000958554e-06, |
| "loss": 0.6127, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.8448877901078287, |
| "learning_rate": 7.3730826181196206e-06, |
| "loss": 0.5658, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.813700393133709, |
| "learning_rate": 7.364354912318379e-06, |
| "loss": 0.5713, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.8851240301079237, |
| "learning_rate": 7.355617917842751e-06, |
| "loss": 0.551, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.7963234257752434, |
| "learning_rate": 7.346871669017153e-06, |
| "loss": 0.5825, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.9811136419250976, |
| "learning_rate": 7.338116200202352e-06, |
| "loss": 0.6257, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.113085248629396, |
| "learning_rate": 7.329351545795345e-06, |
| "loss": 0.6154, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.664313245894575, |
| "learning_rate": 7.320577740229208e-06, |
| "loss": 0.5348, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.0938115916173095, |
| "learning_rate": 7.311794817972975e-06, |
| "loss": 0.619, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.9914918015459053, |
| "learning_rate": 7.3030028135314905e-06, |
| "loss": 0.5977, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.8973404703566117, |
| "learning_rate": 7.294201761445284e-06, |
| "loss": 0.6016, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.686916719491371, |
| "learning_rate": 7.285391696290427e-06, |
| "loss": 0.5594, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.8914279547975104, |
| "learning_rate": 7.276572652678403e-06, |
| "loss": 0.6548, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.9045214756464477, |
| "learning_rate": 7.267744665255966e-06, |
| "loss": 0.5625, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.0508726477606416, |
| "learning_rate": 7.258907768705006e-06, |
| "loss": 0.5994, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.1572541720871206, |
| "learning_rate": 7.2500619977424154e-06, |
| "loss": 0.6259, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.8740105212119254, |
| "learning_rate": 7.241207387119953e-06, |
| "loss": 0.5498, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.11048827570066, |
| "learning_rate": 7.2323439716241e-06, |
| "loss": 0.6176, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.138108461906426, |
| "learning_rate": 7.223471786075934e-06, |
| "loss": 0.7467, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 2.1086676582577035, |
| "learning_rate": 7.214590865330984e-06, |
| "loss": 0.6513, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.8758955739955738, |
| "learning_rate": 7.2057012442790975e-06, |
| "loss": 0.5449, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.682499149886398, |
| "learning_rate": 7.1968029578443e-06, |
| "loss": 0.525, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.9868958931777934, |
| "learning_rate": 7.187896040984661e-06, |
| "loss": 0.626, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.9273119955565226, |
| "learning_rate": 7.178980528692161e-06, |
| "loss": 0.6012, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.0192285563021466, |
| "learning_rate": 7.170056455992541e-06, |
| "loss": 0.6065, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.9019456790022062, |
| "learning_rate": 7.161123857945177e-06, |
| "loss": 0.6329, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.024610041244123, |
| "learning_rate": 7.152182769642936e-06, |
| "loss": 0.6359, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.9325040211739186, |
| "learning_rate": 7.143233226212042e-06, |
| "loss": 0.5215, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.012751842328307, |
| "learning_rate": 7.134275262811935e-06, |
| "loss": 0.6432, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.7572649666598243, |
| "learning_rate": 7.1253089146351325e-06, |
| "loss": 0.5677, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.7788331655165412, |
| "learning_rate": 7.116334216907097e-06, |
| "loss": 0.5215, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.9050240439242967, |
| "learning_rate": 7.107351204886088e-06, |
| "loss": 0.5178, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.041894936366493, |
| "learning_rate": 7.098359913863034e-06, |
| "loss": 0.6043, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.0308197433902797, |
| "learning_rate": 7.089360379161381e-06, |
| "loss": 0.6213, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.95222686445269, |
| "learning_rate": 7.08035263613697e-06, |
| "loss": 0.5971, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.017912918523442, |
| "learning_rate": 7.071336720177886e-06, |
| "loss": 0.6046, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.166790566645372, |
| "learning_rate": 7.062312666704321e-06, |
| "loss": 0.5927, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.2400201098243544, |
| "learning_rate": 7.053280511168437e-06, |
| "loss": 0.7107, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.8761384322160164, |
| "learning_rate": 7.044240289054227e-06, |
| "loss": 0.5877, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.8121190685789235, |
| "learning_rate": 7.035192035877374e-06, |
| "loss": 0.5278, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.291146349707187, |
| "learning_rate": 7.026135787185113e-06, |
| "loss": 0.6674, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.9115866137344344, |
| "learning_rate": 7.017071578556088e-06, |
| "loss": 0.6101, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.2159502369044746, |
| "learning_rate": 7.007999445600216e-06, |
| "loss": 0.6451, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.94968047657449, |
| "learning_rate": 6.998919423958548e-06, |
| "loss": 0.6115, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.0483508425325208, |
| "learning_rate": 6.989831549303121e-06, |
| "loss": 0.5641, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.078362428704396, |
| "learning_rate": 6.980735857336831e-06, |
| "loss": 0.5859, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.102194806164863, |
| "learning_rate": 6.971632383793278e-06, |
| "loss": 0.5956, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.1562012485508766, |
| "learning_rate": 6.962521164436641e-06, |
| "loss": 0.6522, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.942549118113248, |
| "learning_rate": 6.953402235061519e-06, |
| "loss": 0.5656, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.032598660713363, |
| "learning_rate": 6.944275631492813e-06, |
| "loss": 0.6328, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.120207767189764, |
| "learning_rate": 6.935141389585562e-06, |
| "loss": 0.6283, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.846891984881128, |
| "learning_rate": 6.925999545224819e-06, |
| "loss": 0.5348, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.2117072258313515, |
| "learning_rate": 6.916850134325505e-06, |
| "loss": 0.5428, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.9428888699888005, |
| "learning_rate": 6.907693192832263e-06, |
| "loss": 0.6194, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.324654552066874, |
| "learning_rate": 6.898528756719325e-06, |
| "loss": 0.6157, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.105488134378262, |
| "learning_rate": 6.8893568619903625e-06, |
| "loss": 0.6574, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.0741815083758803, |
| "learning_rate": 6.8801775446783545e-06, |
| "loss": 0.681, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.8599982138936229, |
| "learning_rate": 6.870990840845435e-06, |
| "loss": 0.532, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.8066163998362903, |
| "learning_rate": 6.861796786582761e-06, |
| "loss": 0.5864, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.103633090126261, |
| "learning_rate": 6.852595418010364e-06, |
| "loss": 0.6276, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.0271857194621994, |
| "learning_rate": 6.843386771277012e-06, |
| "loss": 0.6113, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.9892261757816698, |
| "learning_rate": 6.834170882560066e-06, |
| "loss": 0.6066, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.0920982211462142, |
| "learning_rate": 6.824947788065339e-06, |
| "loss": 0.6631, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.7304966407527353, |
| "learning_rate": 6.8157175240269495e-06, |
| "loss": 0.5458, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.9003138804763595, |
| "learning_rate": 6.806480126707187e-06, |
| "loss": 0.6121, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.9727053822571718, |
| "learning_rate": 6.797235632396362e-06, |
| "loss": 0.6235, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.1447934975774325, |
| "learning_rate": 6.787984077412666e-06, |
| "loss": 0.652, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.0660746773365775, |
| "learning_rate": 6.7787254981020335e-06, |
| "loss": 0.6679, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.0622987551332597, |
| "learning_rate": 6.7694599308379895e-06, |
| "loss": 0.6033, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.9723578845421632, |
| "learning_rate": 6.760187412021516e-06, |
| "loss": 0.6082, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.7982428478028805, |
| "learning_rate": 6.750907978080902e-06, |
| "loss": 0.5334, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.036081125390073, |
| "learning_rate": 6.741621665471607e-06, |
| "loss": 0.6212, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.1493033458896664, |
| "learning_rate": 6.732328510676111e-06, |
| "loss": 0.6751, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.070635996051103, |
| "learning_rate": 6.723028550203779e-06, |
| "loss": 0.5758, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.9050719437104036, |
| "learning_rate": 6.7137218205907036e-06, |
| "loss": 0.54, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.7928377835662002, |
| "learning_rate": 6.704408358399583e-06, |
| "loss": 0.5676, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.027588661623482, |
| "learning_rate": 6.695088200219557e-06, |
| "loss": 0.5546, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.9325373078264918, |
| "learning_rate": 6.6857613826660714e-06, |
| "loss": 0.5941, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.9172475345332523, |
| "learning_rate": 6.676427942380741e-06, |
| "loss": 0.5328, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.1396308238670367, |
| "learning_rate": 6.667087916031192e-06, |
| "loss": 0.6748, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.8568393271779622, |
| "learning_rate": 6.657741340310927e-06, |
| "loss": 0.5975, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.0294986249307394, |
| "learning_rate": 6.648388251939177e-06, |
| "loss": 0.6111, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.9234325700371586, |
| "learning_rate": 6.639028687660766e-06, |
| "loss": 0.596, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 2.0366203200056088, |
| "learning_rate": 6.629662684245949e-06, |
| "loss": 0.5688, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.8958934625265222, |
| "learning_rate": 6.620290278490284e-06, |
| "loss": 0.5791, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.806822731262611, |
| "learning_rate": 6.610911507214482e-06, |
| "loss": 0.5465, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.8876652924106438, |
| "learning_rate": 6.601526407264261e-06, |
| "loss": 0.5537, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.0030181777961156, |
| "learning_rate": 6.592135015510197e-06, |
| "loss": 0.6045, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.97710315660336, |
| "learning_rate": 6.5827373688475925e-06, |
| "loss": 0.5725, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.01659583449962, |
| "learning_rate": 6.5733335041963175e-06, |
| "loss": 0.6237, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.8805423446258591, |
| "learning_rate": 6.563923458500672e-06, |
| "loss": 0.5479, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.7669993233431147, |
| "learning_rate": 6.554507268729238e-06, |
| "loss": 0.5109, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.8269014493705453, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.5462, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.0359088067386786, |
| "learning_rate": 6.535656604953884e-06, |
| "loss": 0.6384, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.9599189973913222, |
| "learning_rate": 6.526222205007236e-06, |
| "loss": 0.5452, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.7645021807661985, |
| "learning_rate": 6.516781809099055e-06, |
| "loss": 0.4752, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.9917551528037687, |
| "learning_rate": 6.507335454317161e-06, |
| "loss": 0.5545, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.026042072169137, |
| "learning_rate": 6.497883177772779e-06, |
| "loss": 0.627, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.8861715324088848, |
| "learning_rate": 6.488425016600403e-06, |
| "loss": 0.6235, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.957150362035283, |
| "learning_rate": 6.4789610079576426e-06, |
| "loss": 0.5386, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.1453992014090364, |
| "learning_rate": 6.469491189025081e-06, |
| "loss": 0.6518, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.765906720730784, |
| "learning_rate": 6.46001559700613e-06, |
| "loss": 0.6203, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.260525086713535, |
| "learning_rate": 6.450534269126878e-06, |
| "loss": 0.6806, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 2.171042858463267, |
| "learning_rate": 6.441047242635947e-06, |
| "loss": 0.6542, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.9103992956138385, |
| "learning_rate": 6.431554554804353e-06, |
| "loss": 0.6342, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.0002914383944974, |
| "learning_rate": 6.422056242925347e-06, |
| "loss": 0.561, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.8379292896391608, |
| "learning_rate": 6.412552344314279e-06, |
| "loss": 0.5599, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.9748932397312229, |
| "learning_rate": 6.40304289630844e-06, |
| "loss": 0.5952, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.1614553712542253, |
| "learning_rate": 6.3935279362669335e-06, |
| "loss": 0.6412, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.9859247585151905, |
| "learning_rate": 6.384007501570509e-06, |
| "loss": 0.6359, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.8136261988901872, |
| "learning_rate": 6.374481629621427e-06, |
| "loss": 0.5893, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.0161944625478574, |
| "learning_rate": 6.364950357843309e-06, |
| "loss": 0.5371, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.8707294403008965, |
| "learning_rate": 6.355413723680991e-06, |
| "loss": 0.606, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.083870266773342, |
| "learning_rate": 6.3458717646003746e-06, |
| "loss": 0.5857, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.21481293408251, |
| "learning_rate": 6.33632451808828e-06, |
| "loss": 0.5945, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.8209514169327161, |
| "learning_rate": 6.326772021652303e-06, |
| "loss": 0.561, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.7765877330909154, |
| "learning_rate": 6.317214312820662e-06, |
| "loss": 0.5808, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.301880122628837, |
| "learning_rate": 6.307651429142053e-06, |
| "loss": 0.6169, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.9511858179855806, |
| "learning_rate": 6.298083408185503e-06, |
| "loss": 0.5485, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.8733084336465669, |
| "learning_rate": 6.288510287540221e-06, |
| "loss": 0.5414, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.9100686536267126, |
| "learning_rate": 6.278932104815453e-06, |
| "loss": 0.5177, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.980724894367988, |
| "learning_rate": 6.269348897640327e-06, |
| "loss": 0.5847, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.4372622910469515, |
| "learning_rate": 6.259760703663713e-06, |
| "loss": 0.6332, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.7666399516614875, |
| "learning_rate": 6.2501675605540755e-06, |
| "loss": 0.4731, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.9269831431359743, |
| "learning_rate": 6.240569505999317e-06, |
| "loss": 0.5864, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.1889446969469306, |
| "learning_rate": 6.230966577706637e-06, |
| "loss": 0.6465, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.0970566330713036, |
| "learning_rate": 6.221358813402383e-06, |
| "loss": 0.6136, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.05054134285554, |
| "learning_rate": 6.211746250831902e-06, |
| "loss": 0.6313, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.997693167566272, |
| "learning_rate": 6.202128927759391e-06, |
| "loss": 0.5838, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.902952046522087, |
| "learning_rate": 6.192506881967746e-06, |
| "loss": 0.5913, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.118349972159298, |
| "learning_rate": 6.182880151258422e-06, |
| "loss": 0.6401, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.7923389650042116, |
| "learning_rate": 6.173248773451278e-06, |
| "loss": 0.4488, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.0358206659939206, |
| "learning_rate": 6.163612786384426e-06, |
| "loss": 0.5871, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.9373816200953502, |
| "learning_rate": 6.153972227914089e-06, |
| "loss": 0.6472, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.7741801403344204, |
| "learning_rate": 6.144327135914452e-06, |
| "loss": 0.5512, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.000681880884647, |
| "learning_rate": 6.134677548277504e-06, |
| "loss": 0.6792, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.030783168366151, |
| "learning_rate": 6.125023502912901e-06, |
| "loss": 0.6046, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.0794685094461802, |
| "learning_rate": 6.1153650377478116e-06, |
| "loss": 0.6356, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.0447620516144394, |
| "learning_rate": 6.105702190726765e-06, |
| "loss": 0.6179, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.0749186785935803, |
| "learning_rate": 6.096034999811507e-06, |
| "loss": 0.6269, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.099298563572386, |
| "learning_rate": 6.086363502980848e-06, |
| "loss": 0.5107, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.143595827433539, |
| "learning_rate": 6.076687738230517e-06, |
| "loss": 0.613, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.2534551834297574, |
| "learning_rate": 6.067007743573007e-06, |
| "loss": 0.6627, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.196821231032266, |
| "learning_rate": 6.0573235570374315e-06, |
| "loss": 0.6868, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.9274444301473674, |
| "learning_rate": 6.04763521666937e-06, |
| "loss": 0.6234, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.0827989695986906, |
| "learning_rate": 6.037942760530722e-06, |
| "loss": 0.5338, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.7904575978539012, |
| "learning_rate": 6.028246226699559e-06, |
| "loss": 0.5255, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.8597042651935416, |
| "learning_rate": 6.018545653269967e-06, |
| "loss": 0.5604, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.925996097217488, |
| "learning_rate": 6.008841078351903e-06, |
| "loss": 0.5435, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.726239669389769, |
| "learning_rate": 5.9991325400710506e-06, |
| "loss": 0.5033, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.8668368547030405, |
| "learning_rate": 5.9894200765686574e-06, |
| "loss": 0.5801, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.0334452116466037, |
| "learning_rate": 5.9797037260013915e-06, |
| "loss": 0.6715, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.1692961748152384, |
| "learning_rate": 5.969983526541197e-06, |
| "loss": 0.6002, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.8242102907354445, |
| "learning_rate": 5.960259516375134e-06, |
| "loss": 0.5459, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.9465644945877867, |
| "learning_rate": 5.950531733705237e-06, |
| "loss": 0.5633, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.033416173745934, |
| "learning_rate": 5.940800216748357e-06, |
| "loss": 0.595, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.029664712146445, |
| "learning_rate": 5.9310650037360226e-06, |
| "loss": 0.636, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.949211782466542, |
| "learning_rate": 5.921326132914275e-06, |
| "loss": 0.5598, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.130586940499767, |
| "learning_rate": 5.911583642543532e-06, |
| "loss": 0.6793, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.037641386476974, |
| "learning_rate": 5.901837570898425e-06, |
| "loss": 0.6281, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.063158938340875, |
| "learning_rate": 5.892087956267659e-06, |
| "loss": 0.5975, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.0120237220111954, |
| "learning_rate": 5.88233483695386e-06, |
| "loss": 0.5072, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.8881476646969595, |
| "learning_rate": 5.872578251273418e-06, |
| "loss": 0.5661, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.8984742507906354, |
| "learning_rate": 5.862818237556344e-06, |
| "loss": 0.5364, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.041836701931837, |
| "learning_rate": 5.8530548341461125e-06, |
| "loss": 0.6654, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.9187386355732121, |
| "learning_rate": 5.843288079399523e-06, |
| "loss": 0.5945, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.929660691723023, |
| "learning_rate": 5.833518011686531e-06, |
| "loss": 0.523, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.150008834991264, |
| "learning_rate": 5.823744669390115e-06, |
| "loss": 0.669, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 2.124617912097345, |
| "learning_rate": 5.813968090906117e-06, |
| "loss": 0.5963, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.0097512818088563, |
| "learning_rate": 5.804188314643088e-06, |
| "loss": 0.5946, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.7904830455893548, |
| "learning_rate": 5.794405379022147e-06, |
| "loss": 0.5818, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.1097636641498805, |
| "learning_rate": 5.784619322476822e-06, |
| "loss": 0.5711, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.1115025364007636, |
| "learning_rate": 5.774830183452905e-06, |
| "loss": 0.5844, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.0129995897966833, |
| "learning_rate": 5.765038000408295e-06, |
| "loss": 0.5759, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.9329010751228102, |
| "learning_rate": 5.755242811812851e-06, |
| "loss": 0.5464, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.0085429975812055, |
| "learning_rate": 5.74544465614824e-06, |
| "loss": 0.5751, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.9903327654676763, |
| "learning_rate": 5.735643571907785e-06, |
| "loss": 0.6458, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.1067344237718393, |
| "learning_rate": 5.725839597596312e-06, |
| "loss": 0.6115, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.9189891230884772, |
| "learning_rate": 5.716032771730008e-06, |
| "loss": 0.5441, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.4011718668135993, |
| "learning_rate": 5.706223132836255e-06, |
| "loss": 0.5773, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.072775046614376, |
| "learning_rate": 5.69641071945349e-06, |
| "loss": 0.6463, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.9935114006477437, |
| "learning_rate": 5.686595570131048e-06, |
| "loss": 0.5186, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.844020593200682, |
| "learning_rate": 5.6767777234290165e-06, |
| "loss": 0.5469, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 2.0393335340113743, |
| "learning_rate": 5.666957217918076e-06, |
| "loss": 0.6512, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.9394154490786393, |
| "learning_rate": 5.657134092179354e-06, |
| "loss": 0.603, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.005329689227122, |
| "learning_rate": 5.647308384804272e-06, |
| "loss": 0.6182, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.170791771636413, |
| "learning_rate": 5.637480134394394e-06, |
| "loss": 0.6277, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.001689578136617, |
| "learning_rate": 5.627649379561273e-06, |
| "loss": 0.6089, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.0454235186622114, |
| "learning_rate": 5.617816158926303e-06, |
| "loss": 0.5878, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.8823086840222076, |
| "learning_rate": 5.607980511120565e-06, |
| "loss": 0.5706, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.80269943780875, |
| "learning_rate": 5.598142474784671e-06, |
| "loss": 0.4961, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.033336270252808, |
| "learning_rate": 5.588302088568625e-06, |
| "loss": 0.556, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.9778139575223732, |
| "learning_rate": 5.578459391131657e-06, |
| "loss": 0.5302, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.020736561300123, |
| "learning_rate": 5.568614421142078e-06, |
| "loss": 0.5978, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.9174222510669499, |
| "learning_rate": 5.558767217277127e-06, |
| "loss": 0.4907, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.8250592907674714, |
| "learning_rate": 5.548917818222818e-06, |
| "loss": 0.5262, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.966787711230393, |
| "learning_rate": 5.539066262673793e-06, |
| "loss": 0.6737, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.855643229521311, |
| "learning_rate": 5.529212589333163e-06, |
| "loss": 0.5955, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.1346079614371543, |
| "learning_rate": 5.5193568369123576e-06, |
| "loss": 0.5729, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.0515766199556706, |
| "learning_rate": 5.509499044130977e-06, |
| "loss": 0.5719, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.0107844202744336, |
| "learning_rate": 5.4996392497166375e-06, |
| "loss": 0.6046, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.9275125168152694, |
| "learning_rate": 5.489777492404818e-06, |
| "loss": 0.5201, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.0862073890728428, |
| "learning_rate": 5.479913810938706e-06, |
| "loss": 0.6474, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.947899506277237, |
| "learning_rate": 5.470048244069055e-06, |
| "loss": 0.6276, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.938490880806152, |
| "learning_rate": 5.46018083055402e-06, |
| "loss": 0.573, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.9073385387532762, |
| "learning_rate": 5.450311609159013e-06, |
| "loss": 0.5404, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.809537621124168, |
| "learning_rate": 5.4404406186565465e-06, |
| "loss": 0.541, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.9527437330399584, |
| "learning_rate": 5.430567897826086e-06, |
| "loss": 0.6258, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.7508264552185595, |
| "learning_rate": 5.420693485453893e-06, |
| "loss": 0.5149, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.0056514367983858, |
| "learning_rate": 5.410817420332876e-06, |
| "loss": 0.5755, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.035682132284417, |
| "learning_rate": 5.400939741262434e-06, |
| "loss": 0.6091, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.039715629729808, |
| "learning_rate": 5.39106048704831e-06, |
| "loss": 0.6284, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.9509583461522269, |
| "learning_rate": 5.381179696502432e-06, |
| "loss": 0.6541, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.101000418400194, |
| "learning_rate": 5.371297408442765e-06, |
| "loss": 0.719, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 2.1274177930152187, |
| "learning_rate": 5.361413661693157e-06, |
| "loss": 0.6933, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.9350531172343641, |
| "learning_rate": 5.351528495083187e-06, |
| "loss": 0.5427, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.9236019600576935, |
| "learning_rate": 5.341641947448011e-06, |
| "loss": 0.5427, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.896084411851985, |
| "learning_rate": 5.331754057628212e-06, |
| "loss": 0.5404, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 2.1943365136159345, |
| "learning_rate": 5.321864864469646e-06, |
| "loss": 0.6178, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 2.0214323469529307, |
| "learning_rate": 5.311974406823288e-06, |
| "loss": 0.5394, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.867537229859426, |
| "learning_rate": 5.3020827235450815e-06, |
| "loss": 0.5502, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 2.234343663037103, |
| "learning_rate": 5.292189853495784e-06, |
| "loss": 0.6277, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 2.013802275182187, |
| "learning_rate": 5.282295835540818e-06, |
| "loss": 0.6056, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.9513906655142625, |
| "learning_rate": 5.272400708550114e-06, |
| "loss": 0.5685, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.9338299630529332, |
| "learning_rate": 5.262504511397959e-06, |
| "loss": 0.592, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.7548541609411559, |
| "learning_rate": 5.252607282962843e-06, |
| "loss": 0.526, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 2.0616714683528667, |
| "learning_rate": 5.2427090621273114e-06, |
| "loss": 0.5529, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.7804791451461532, |
| "learning_rate": 5.232809887777807e-06, |
| "loss": 0.5478, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.826725496699057, |
| "learning_rate": 5.222909798804515e-06, |
| "loss": 0.5544, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 2.138811923531637, |
| "learning_rate": 5.213008834101218e-06, |
| "loss": 0.643, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.9873736384117076, |
| "learning_rate": 5.20310703256514e-06, |
| "loss": 0.6616, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.9208415386150814, |
| "learning_rate": 5.193204433096787e-06, |
| "loss": 0.5055, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.9813842045072931, |
| "learning_rate": 5.183301074599805e-06, |
| "loss": 0.6327, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.908652107185451, |
| "learning_rate": 5.173396995980818e-06, |
| "loss": 0.6359, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.0742468419024847, |
| "learning_rate": 5.1634922361492845e-06, |
| "loss": 0.6413, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.9352720515169122, |
| "learning_rate": 5.153586834017333e-06, |
| "loss": 0.4937, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.8636055456230387, |
| "learning_rate": 5.14368082849962e-06, |
| "loss": 0.5491, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.2525115422822255, |
| "learning_rate": 5.133774258513168e-06, |
| "loss": 0.6518, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.976929887451241, |
| "learning_rate": 5.123867162977224e-06, |
| "loss": 0.5955, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.1238491116296787, |
| "learning_rate": 5.1139595808130915e-06, |
| "loss": 0.5438, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.9460536517410532, |
| "learning_rate": 5.1040515509439926e-06, |
| "loss": 0.6111, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.8502322758352145, |
| "learning_rate": 5.0941431122949044e-06, |
| "loss": 0.5802, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.1312052237471226, |
| "learning_rate": 5.08423430379241e-06, |
| "loss": 0.6531, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.955948461366251, |
| "learning_rate": 5.074325164364549e-06, |
| "loss": 0.576, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.2603660355638016, |
| "learning_rate": 5.064415732940654e-06, |
| "loss": 0.6709, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.2004715834934854, |
| "learning_rate": 5.054506048451214e-06, |
| "loss": 0.7273, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.9625833391118874, |
| "learning_rate": 5.044596149827705e-06, |
| "loss": 0.5655, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.0367810488166196, |
| "learning_rate": 5.034686076002447e-06, |
| "loss": 0.5503, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.0781271470418865, |
| "learning_rate": 5.024775865908451e-06, |
| "loss": 0.5408, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.8174563416303517, |
| "learning_rate": 5.014865558479257e-06, |
| "loss": 0.5601, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 2.04027597278746, |
| "learning_rate": 5.004955192648791e-06, |
| "loss": 0.5129, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.929086047655504, |
| "learning_rate": 4.9950448073512096e-06, |
| "loss": 0.6012, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 2.0846476788174018, |
| "learning_rate": 4.9851344415207455e-06, |
| "loss": 0.5691, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 2.015199227101593, |
| "learning_rate": 4.975224134091551e-06, |
| "loss": 0.626, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 2.005830472801361, |
| "learning_rate": 4.965313923997552e-06, |
| "loss": 0.5876, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 2.11312125492647, |
| "learning_rate": 4.955403850172297e-06, |
| "loss": 0.5779, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.964404887222109, |
| "learning_rate": 4.945493951548788e-06, |
| "loss": 0.5264, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.632455019293396, |
| "learning_rate": 4.935584267059346e-06, |
| "loss": 0.4701, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.9988491228675496, |
| "learning_rate": 4.925674835635455e-06, |
| "loss": 0.604, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.9517240575905959, |
| "learning_rate": 4.915765696207591e-06, |
| "loss": 0.6134, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.8771210243391112, |
| "learning_rate": 4.905856887705097e-06, |
| "loss": 0.5352, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.9010355843007118, |
| "learning_rate": 4.895948449056008e-06, |
| "loss": 0.5825, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.8640061544368143, |
| "learning_rate": 4.886040419186909e-06, |
| "loss": 0.536, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 2.127850537210119, |
| "learning_rate": 4.876132837022778e-06, |
| "loss": 0.7484, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 2.035416663771683, |
| "learning_rate": 4.866225741486833e-06, |
| "loss": 0.5556, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.8112553709887884, |
| "learning_rate": 4.856319171500382e-06, |
| "loss": 0.5089, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.7461261116319204, |
| "learning_rate": 4.846413165982668e-06, |
| "loss": 0.5798, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.8200383652508103, |
| "learning_rate": 4.836507763850717e-06, |
| "loss": 0.5644, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 2.0210685681015517, |
| "learning_rate": 4.826603004019182e-06, |
| "loss": 0.6028, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 2.0488262467671654, |
| "learning_rate": 4.816698925400197e-06, |
| "loss": 0.6634, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.9045043503411678, |
| "learning_rate": 4.806795566903214e-06, |
| "loss": 0.5246, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.903132223526836, |
| "learning_rate": 4.796892967434861e-06, |
| "loss": 0.5501, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.9775121455691418, |
| "learning_rate": 4.7869911658987825e-06, |
| "loss": 0.5821, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 2.134944135303822, |
| "learning_rate": 4.777090201195486e-06, |
| "loss": 0.5914, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.8017818510043424, |
| "learning_rate": 4.767190112222196e-06, |
| "loss": 0.5215, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.8986193975250871, |
| "learning_rate": 4.757290937872689e-06, |
| "loss": 0.5674, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 2.198006939268661, |
| "learning_rate": 4.747392717037158e-06, |
| "loss": 0.6696, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.9844558939372063, |
| "learning_rate": 4.737495488602044e-06, |
| "loss": 0.6495, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.8377231311260462, |
| "learning_rate": 4.727599291449887e-06, |
| "loss": 0.526, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 2.1843148217052795, |
| "learning_rate": 4.717704164459182e-06, |
| "loss": 0.6569, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 2.0731163232163525, |
| "learning_rate": 4.707810146504217e-06, |
| "loss": 0.6277, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.8835943474176664, |
| "learning_rate": 4.697917276454919e-06, |
| "loss": 0.5287, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 2.0281931145371828, |
| "learning_rate": 4.688025593176713e-06, |
| "loss": 0.5604, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.9088774231682988, |
| "learning_rate": 4.6781351355303555e-06, |
| "loss": 0.554, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.9551048202904684, |
| "learning_rate": 4.668245942371789e-06, |
| "loss": 0.6467, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 2.067313801101298, |
| "learning_rate": 4.658358052551992e-06, |
| "loss": 0.5992, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.902021092998417, |
| "learning_rate": 4.648471504916815e-06, |
| "loss": 0.5812, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.8922807148527254, |
| "learning_rate": 4.638586338306845e-06, |
| "loss": 0.5374, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.884819760587392, |
| "learning_rate": 4.628702591557237e-06, |
| "loss": 0.5056, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.809064236289934, |
| "learning_rate": 4.61882030349757e-06, |
| "loss": 0.5311, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.939206185133062, |
| "learning_rate": 4.60893951295169e-06, |
| "loss": 0.5821, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 2.1796594240586518, |
| "learning_rate": 4.599060258737567e-06, |
| "loss": 0.6658, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 2.103575194199594, |
| "learning_rate": 4.589182579667125e-06, |
| "loss": 0.6145, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 2.159018112419537, |
| "learning_rate": 4.579306514546107e-06, |
| "loss": 0.6203, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.9460192510920176, |
| "learning_rate": 4.569432102173917e-06, |
| "loss": 0.5578, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.8654041708472648, |
| "learning_rate": 4.559559381343455e-06, |
| "loss": 0.528, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.9680995454358476, |
| "learning_rate": 4.5496883908409905e-06, |
| "loss": 0.6183, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.9853793075023518, |
| "learning_rate": 4.539819169445982e-06, |
| "loss": 0.5658, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 2.1272265558554695, |
| "learning_rate": 4.529951755930946e-06, |
| "loss": 0.6413, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 2.0536110493039827, |
| "learning_rate": 4.5200861890612955e-06, |
| "loss": 0.5394, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.9694789258484728, |
| "learning_rate": 4.510222507595185e-06, |
| "loss": 0.5543, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.0637295858493214, |
| "learning_rate": 4.500360750283363e-06, |
| "loss": 0.6254, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.2382680799881762, |
| "learning_rate": 4.490500955869025e-06, |
| "loss": 0.5594, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.9215522609744207, |
| "learning_rate": 4.480643163087644e-06, |
| "loss": 0.5565, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.8992138968072834, |
| "learning_rate": 4.4707874106668406e-06, |
| "loss": 0.5549, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.053529626956222, |
| "learning_rate": 4.460933737326208e-06, |
| "loss": 0.5997, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.9545793745044062, |
| "learning_rate": 4.4510821817771825e-06, |
| "loss": 0.5397, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.085152918955289, |
| "learning_rate": 4.441232782722875e-06, |
| "loss": 0.6005, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.1202774407600926, |
| "learning_rate": 4.431385578857924e-06, |
| "loss": 0.5819, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.8352961154836602, |
| "learning_rate": 4.421540608868344e-06, |
| "loss": 0.5951, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.1495914883931904, |
| "learning_rate": 4.411697911431376e-06, |
| "loss": 0.6428, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.1564746769491876, |
| "learning_rate": 4.4018575252153295e-06, |
| "loss": 0.6402, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.8954514160537663, |
| "learning_rate": 4.392019488879438e-06, |
| "loss": 0.6072, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.8105483820540889, |
| "learning_rate": 4.382183841073698e-06, |
| "loss": 0.5387, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.9485751025827374, |
| "learning_rate": 4.372350620438728e-06, |
| "loss": 0.531, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.9608862157969138, |
| "learning_rate": 4.362519865605608e-06, |
| "loss": 0.5402, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.9691930324266667, |
| "learning_rate": 4.352691615195729e-06, |
| "loss": 0.5624, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.8973081884631189, |
| "learning_rate": 4.342865907820647e-06, |
| "loss": 0.5595, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.9717587970990957, |
| "learning_rate": 4.333042782081926e-06, |
| "loss": 0.662, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.007138023783923, |
| "learning_rate": 4.323222276570984e-06, |
| "loss": 0.5723, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.22307977714878, |
| "learning_rate": 4.313404429868952e-06, |
| "loss": 0.6789, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.190452780908872, |
| "learning_rate": 4.303589280546513e-06, |
| "loss": 0.6045, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.9051078417596634, |
| "learning_rate": 4.293776867163746e-06, |
| "loss": 0.5001, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.098076895433394, |
| "learning_rate": 4.283967228269993e-06, |
| "loss": 0.6982, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.135564782739449, |
| "learning_rate": 4.274160402403689e-06, |
| "loss": 0.6086, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.322595532423094, |
| "learning_rate": 4.264356428092217e-06, |
| "loss": 0.6274, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.9740445661634287, |
| "learning_rate": 4.254555343851762e-06, |
| "loss": 0.6254, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.1042432186823965, |
| "learning_rate": 4.24475718818715e-06, |
| "loss": 0.4925, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.092018762885259, |
| "learning_rate": 4.234961999591706e-06, |
| "loss": 0.638, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.819978568221369, |
| "learning_rate": 4.2251698165470965e-06, |
| "loss": 0.5285, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.0575179276629685, |
| "learning_rate": 4.215380677523179e-06, |
| "loss": 0.5426, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.8926418567324161, |
| "learning_rate": 4.205594620977854e-06, |
| "loss": 0.5378, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 2.055749823208842, |
| "learning_rate": 4.195811685356914e-06, |
| "loss": 0.5888, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.0051242608320745, |
| "learning_rate": 4.186031909093884e-06, |
| "loss": 0.5652, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.8592910852701108, |
| "learning_rate": 4.176255330609885e-06, |
| "loss": 0.487, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.2411604400071674, |
| "learning_rate": 4.16648198831347e-06, |
| "loss": 0.6867, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.072512037016527, |
| "learning_rate": 4.156711920600479e-06, |
| "loss": 0.6362, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.9250096482155696, |
| "learning_rate": 4.146945165853888e-06, |
| "loss": 0.4271, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.1874561938184898, |
| "learning_rate": 4.137181762443658e-06, |
| "loss": 0.5753, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.8676645313834617, |
| "learning_rate": 4.127421748726583e-06, |
| "loss": 0.5137, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.211228766881823, |
| "learning_rate": 4.117665163046141e-06, |
| "loss": 0.6821, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.095689790428209, |
| "learning_rate": 4.107912043732342e-06, |
| "loss": 0.5183, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.3214361789624944, |
| "learning_rate": 4.098162429101576e-06, |
| "loss": 0.588, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.9276332877997406, |
| "learning_rate": 4.088416357456471e-06, |
| "loss": 0.5425, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.8872033903192418, |
| "learning_rate": 4.0786738670857254e-06, |
| "loss": 0.5275, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.173402848844034, |
| "learning_rate": 4.068934996263978e-06, |
| "loss": 0.6501, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.216415986512064, |
| "learning_rate": 4.059199783251644e-06, |
| "loss": 0.5988, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.8838143839651054, |
| "learning_rate": 4.049468266294765e-06, |
| "loss": 0.6169, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 2.028659010558423, |
| "learning_rate": 4.039740483624869e-06, |
| "loss": 0.6277, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.0194050107448303, |
| "learning_rate": 4.030016473458805e-06, |
| "loss": 0.6028, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.1557586383454574, |
| "learning_rate": 4.020296273998609e-06, |
| "loss": 0.6176, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.8682573092015993, |
| "learning_rate": 4.010579923431346e-06, |
| "loss": 0.5763, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.1548920939456093, |
| "learning_rate": 4.00086745992895e-06, |
| "loss": 0.6331, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.267061719065842, |
| "learning_rate": 3.991158921648096e-06, |
| "loss": 0.7066, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.9935522203843874, |
| "learning_rate": 3.981454346730036e-06, |
| "loss": 0.5729, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.8026214689706248, |
| "learning_rate": 3.9717537733004415e-06, |
| "loss": 0.5706, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.904682640150856, |
| "learning_rate": 3.9620572394692776e-06, |
| "loss": 0.5683, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.136529894979737, |
| "learning_rate": 3.952364783330632e-06, |
| "loss": 0.651, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.233298404795316, |
| "learning_rate": 3.942676442962569e-06, |
| "loss": 0.5268, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.2946703794023486, |
| "learning_rate": 3.932992256426995e-06, |
| "loss": 0.676, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.9555055432357573, |
| "learning_rate": 3.923312261769485e-06, |
| "loss": 0.598, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.3078155728224212, |
| "learning_rate": 3.913636497019154e-06, |
| "loss": 0.6872, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.8574615796272702, |
| "learning_rate": 3.903965000188495e-06, |
| "loss": 0.5518, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 2.213864265081535, |
| "learning_rate": 3.894297809273237e-06, |
| "loss": 0.5652, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.8234823525571142, |
| "learning_rate": 3.884634962252189e-06, |
| "loss": 0.4526, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.8652657269096666, |
| "learning_rate": 3.8749764970871e-06, |
| "loss": 0.5418, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.0976734402107224, |
| "learning_rate": 3.8653224517224965e-06, |
| "loss": 0.5637, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.0254191334608826, |
| "learning_rate": 3.855672864085549e-06, |
| "loss": 0.5265, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.8196043256300247, |
| "learning_rate": 3.846027772085912e-06, |
| "loss": 0.5179, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.931679412683687, |
| "learning_rate": 3.836387213615576e-06, |
| "loss": 0.5646, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.9232046934900524, |
| "learning_rate": 3.826751226548725e-06, |
| "loss": 0.4793, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.8622914220495714, |
| "learning_rate": 3.817119848741579e-06, |
| "loss": 0.5253, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.294972552628036, |
| "learning_rate": 3.8074931180322544e-06, |
| "loss": 0.6577, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.9186117148347783, |
| "learning_rate": 3.7978710722406113e-06, |
| "loss": 0.5449, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.1934148088583014, |
| "learning_rate": 3.7882537491680992e-06, |
| "loss": 0.5944, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.0440451523844816, |
| "learning_rate": 3.7786411865976167e-06, |
| "loss": 0.5916, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.18697015319259, |
| "learning_rate": 3.7690334222933654e-06, |
| "loss": 0.5679, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.3700476225659957, |
| "learning_rate": 3.7594304940006846e-06, |
| "loss": 0.7297, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.032165887398491, |
| "learning_rate": 3.7498324394459253e-06, |
| "loss": 0.5391, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 2.0047253114127006, |
| "learning_rate": 3.7402392963362878e-06, |
| "loss": 0.6912, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.925349824119937, |
| "learning_rate": 3.7306511023596743e-06, |
| "loss": 0.4714, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.8222273181991067, |
| "learning_rate": 3.721067895184549e-06, |
| "loss": 0.5714, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.8642832971234993, |
| "learning_rate": 3.711489712459779e-06, |
| "loss": 0.5697, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.028076998382638, |
| "learning_rate": 3.7019165918144974e-06, |
| "loss": 0.6216, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.272782312304872, |
| "learning_rate": 3.6923485708579487e-06, |
| "loss": 0.4969, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.1112583503988525, |
| "learning_rate": 3.6827856871793393e-06, |
| "loss": 0.5942, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.0753754185170243, |
| "learning_rate": 3.673227978347698e-06, |
| "loss": 0.5954, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.8568456850511224, |
| "learning_rate": 3.6636754819117213e-06, |
| "loss": 0.5574, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.165971383751749, |
| "learning_rate": 3.6541282353996275e-06, |
| "loss": 0.5837, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.9460733583421799, |
| "learning_rate": 3.6445862763190104e-06, |
| "loss": 0.5682, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.1328727066329525, |
| "learning_rate": 3.635049642156692e-06, |
| "loss": 0.6156, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.206066665837199, |
| "learning_rate": 3.6255183703785735e-06, |
| "loss": 0.5946, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.0007589219567854, |
| "learning_rate": 3.615992498429493e-06, |
| "loss": 0.5819, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.038096288010089, |
| "learning_rate": 3.6064720637330673e-06, |
| "loss": 0.5356, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.978722860188176, |
| "learning_rate": 3.5969571036915596e-06, |
| "loss": 0.5895, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.9480013936453797, |
| "learning_rate": 3.587447655685724e-06, |
| "loss": 0.5308, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.1945763438024453, |
| "learning_rate": 3.5779437570746536e-06, |
| "loss": 0.6562, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.048811833634992, |
| "learning_rate": 3.568445445195647e-06, |
| "loss": 0.5449, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.9036038375948279, |
| "learning_rate": 3.5589527573640537e-06, |
| "loss": 0.5552, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.5575371034983034, |
| "learning_rate": 3.549465730873124e-06, |
| "loss": 0.4615, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.981358268031162, |
| "learning_rate": 3.5399844029938724e-06, |
| "loss": 0.5655, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.9338027875151251, |
| "learning_rate": 3.5305088109749196e-06, |
| "loss": 0.4972, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.9485561300488783, |
| "learning_rate": 3.5210389920423582e-06, |
| "loss": 0.5759, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.860842546149849, |
| "learning_rate": 3.511574983399599e-06, |
| "loss": 0.5328, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 2.1337146376104985, |
| "learning_rate": 3.5021168222272227e-06, |
| "loss": 0.6441, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 2.041043993151766, |
| "learning_rate": 3.49266454568284e-06, |
| "loss": 0.543, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.9008324125548013, |
| "learning_rate": 3.4832181909009467e-06, |
| "loss": 0.5582, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 2.0009175266246113, |
| "learning_rate": 3.473777794992765e-06, |
| "loss": 0.5657, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 2.0488988575321274, |
| "learning_rate": 3.4643433950461175e-06, |
| "loss": 0.5898, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 2.164963300418657, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.5981, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.9763623413542644, |
| "learning_rate": 3.4454927312707633e-06, |
| "loss": 0.6106, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.9962996245557485, |
| "learning_rate": 3.43607654149933e-06, |
| "loss": 0.5782, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 2.2151735776108, |
| "learning_rate": 3.4266664958036838e-06, |
| "loss": 0.5685, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 2.018094880353655, |
| "learning_rate": 3.417262631152409e-06, |
| "loss": 0.528, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.7630000234879197, |
| "learning_rate": 3.4078649844898045e-06, |
| "loss": 0.5205, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.7955549765689147, |
| "learning_rate": 3.3984735927357414e-06, |
| "loss": 0.4731, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.8797578836131676, |
| "learning_rate": 3.3890884927855185e-06, |
| "loss": 0.5603, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.909711782728961, |
| "learning_rate": 3.3797097215097173e-06, |
| "loss": 0.5129, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.8389954494108633, |
| "learning_rate": 3.3703373157540525e-06, |
| "loss": 0.5193, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.8474429582879734, |
| "learning_rate": 3.3609713123392352e-06, |
| "loss": 0.4737, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.3345478444238354, |
| "learning_rate": 3.3516117480608234e-06, |
| "loss": 0.7071, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.175175435777652, |
| "learning_rate": 3.3422586596890742e-06, |
| "loss": 0.5722, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.1305366509524055, |
| "learning_rate": 3.3329120839688102e-06, |
| "loss": 0.6892, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.0052757824888037, |
| "learning_rate": 3.32357205761926e-06, |
| "loss": 0.5995, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.0159203979737668, |
| "learning_rate": 3.314238617333928e-06, |
| "loss": 0.6025, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.012244600426063, |
| "learning_rate": 3.304911799780445e-06, |
| "loss": 0.5673, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.9361641471312003, |
| "learning_rate": 3.295591641600418e-06, |
| "loss": 0.5838, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.8304544679156056, |
| "learning_rate": 3.2862781794092964e-06, |
| "loss": 0.5585, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.149167385207215, |
| "learning_rate": 3.2769714497962235e-06, |
| "loss": 0.5886, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 2.04272728052408, |
| "learning_rate": 3.267671489323889e-06, |
| "loss": 0.5355, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.9965937830873703, |
| "learning_rate": 3.258378334528393e-06, |
| "loss": 0.5976, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 2.126700139225219, |
| "learning_rate": 3.249092021919099e-06, |
| "loss": 0.5431, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.8129449899934444, |
| "learning_rate": 3.239812587978485e-06, |
| "loss": 0.5674, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 2.0383597018537865, |
| "learning_rate": 3.2305400691620126e-06, |
| "loss": 0.6182, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 2.050473137758968, |
| "learning_rate": 3.221274501897968e-06, |
| "loss": 0.5404, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.9168650695196385, |
| "learning_rate": 3.212015922587335e-06, |
| "loss": 0.5563, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.98106980221109, |
| "learning_rate": 3.2027643676036402e-06, |
| "loss": 0.5734, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.847757494976792, |
| "learning_rate": 3.193519873292815e-06, |
| "loss": 0.5501, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 2.0870831968238965, |
| "learning_rate": 3.1842824759730518e-06, |
| "loss": 0.5744, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.8249978023375093, |
| "learning_rate": 3.1750522119346626e-06, |
| "loss": 0.5438, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.9231526649033666, |
| "learning_rate": 3.165829117439935e-06, |
| "loss": 0.529, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 2.0472491835741873, |
| "learning_rate": 3.1566132287229876e-06, |
| "loss": 0.5332, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 2.1920921831656375, |
| "learning_rate": 3.1474045819896374e-06, |
| "loss": 0.5604, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.9909841140079707, |
| "learning_rate": 3.1382032134172395e-06, |
| "loss": 0.5111, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.942915636194669, |
| "learning_rate": 3.129009159154567e-06, |
| "loss": 0.5641, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 2.11037815260772, |
| "learning_rate": 3.1198224553216472e-06, |
| "loss": 0.593, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 2.1107440825628494, |
| "learning_rate": 3.1106431380096374e-06, |
| "loss": 0.5313, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.0760892689013324, |
| "learning_rate": 3.101471243280677e-06, |
| "loss": 0.5261, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.9189444275549399, |
| "learning_rate": 3.092306807167738e-06, |
| "loss": 0.5436, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.0604725562399913, |
| "learning_rate": 3.083149865674496e-06, |
| "loss": 0.6429, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.8122634340035755, |
| "learning_rate": 3.0740004547751824e-06, |
| "loss": 0.5544, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.8800400262197519, |
| "learning_rate": 3.0648586104144397e-06, |
| "loss": 0.5622, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.0644669592689127, |
| "learning_rate": 3.0557243685071874e-06, |
| "loss": 0.6323, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.127783852090682, |
| "learning_rate": 3.0465977649384813e-06, |
| "loss": 0.6729, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.7585798179204468, |
| "learning_rate": 3.03747883556336e-06, |
| "loss": 0.5283, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.122048769461784, |
| "learning_rate": 3.0283676162067234e-06, |
| "loss": 0.6467, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.0797175331249282, |
| "learning_rate": 3.0192641426631707e-06, |
| "loss": 0.5904, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.911144315914054, |
| "learning_rate": 3.010168450696879e-06, |
| "loss": 0.5504, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.733484390437723, |
| "learning_rate": 3.0010805760414544e-06, |
| "loss": 0.4998, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.102048422163863, |
| "learning_rate": 2.9920005543997847e-06, |
| "loss": 0.5276, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.9804256500678663, |
| "learning_rate": 2.982928421443914e-06, |
| "loss": 0.4796, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 2.058252604257508, |
| "learning_rate": 2.9738642128148887e-06, |
| "loss": 0.5238, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.9511201686457833, |
| "learning_rate": 2.9648079641226267e-06, |
| "loss": 0.5746, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.1199181959460947, |
| "learning_rate": 2.955759710945773e-06, |
| "loss": 0.5502, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.3766960612858234, |
| "learning_rate": 2.946719488831564e-06, |
| "loss": 0.518, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.960044833847147, |
| "learning_rate": 2.93768733329568e-06, |
| "loss": 0.5366, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.1052778798118643, |
| "learning_rate": 2.928663279822116e-06, |
| "loss": 0.6107, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.9902325930624214, |
| "learning_rate": 2.919647363863031e-06, |
| "loss": 0.5625, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.8937657013896414, |
| "learning_rate": 2.910639620838619e-06, |
| "loss": 0.5431, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.081716192850459, |
| "learning_rate": 2.901640086136969e-06, |
| "loss": 0.504, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.1189522676194037, |
| "learning_rate": 2.892648795113912e-06, |
| "loss": 0.6598, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.907842433021825, |
| "learning_rate": 2.8836657830929048e-06, |
| "loss": 0.5169, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.9520656325829888, |
| "learning_rate": 2.874691085364868e-06, |
| "loss": 0.5536, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.072673902863609, |
| "learning_rate": 2.865724737188067e-06, |
| "loss": 0.579, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.0924189620942775, |
| "learning_rate": 2.856766773787959e-06, |
| "loss": 0.5745, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.7781952691539604, |
| "learning_rate": 2.847817230357066e-06, |
| "loss": 0.5756, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.034790297885924, |
| "learning_rate": 2.838876142054825e-06, |
| "loss": 0.4909, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.17420374619433, |
| "learning_rate": 2.8299435440074596e-06, |
| "loss": 0.5831, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.9647837412380398, |
| "learning_rate": 2.8210194713078408e-06, |
| "loss": 0.5177, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 2.2111415620011017, |
| "learning_rate": 2.81210395901534e-06, |
| "loss": 0.625, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.2292783394598406, |
| "learning_rate": 2.8031970421557035e-06, |
| "loss": 0.6244, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.079126116302448, |
| "learning_rate": 2.7942987557209054e-06, |
| "loss": 0.5667, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.8984066910628055, |
| "learning_rate": 2.785409134669017e-06, |
| "loss": 0.5423, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.8127916497049823, |
| "learning_rate": 2.776528213924068e-06, |
| "loss": 0.494, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.06245637765686, |
| "learning_rate": 2.7676560283759013e-06, |
| "loss": 0.5621, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.114616000228047, |
| "learning_rate": 2.7587926128800503e-06, |
| "loss": 0.582, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.8380497026526659, |
| "learning_rate": 2.7499380022575862e-06, |
| "loss": 0.5381, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.898889400333682, |
| "learning_rate": 2.7410922312949955e-06, |
| "loss": 0.543, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.2504985306779677, |
| "learning_rate": 2.7322553347440368e-06, |
| "loss": 0.5839, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.202704207784561, |
| "learning_rate": 2.723427347321598e-06, |
| "loss": 0.6228, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.0144401171881405, |
| "learning_rate": 2.7146083037095726e-06, |
| "loss": 0.5422, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.736757757636721, |
| "learning_rate": 2.705798238554718e-06, |
| "loss": 0.5307, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.8565795545311183, |
| "learning_rate": 2.696997186468511e-06, |
| "loss": 0.5413, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 2.2977818371394663, |
| "learning_rate": 2.688205182027026e-06, |
| "loss": 0.6052, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.9308033883834883, |
| "learning_rate": 2.6794222597707937e-06, |
| "loss": 0.5361, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.9086632142591924, |
| "learning_rate": 2.6706484542046564e-06, |
| "loss": 0.5446, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.9880975815360458, |
| "learning_rate": 2.6618837997976497e-06, |
| "loss": 0.5471, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.080617319056223, |
| "learning_rate": 2.6531283309828493e-06, |
| "loss": 0.6338, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.9076523429048307, |
| "learning_rate": 2.6443820821572496e-06, |
| "loss": 0.5312, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.0510429772806784, |
| "learning_rate": 2.635645087681623e-06, |
| "loss": 0.6337, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.8261749722012643, |
| "learning_rate": 2.626917381880381e-06, |
| "loss": 0.4953, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.9732654093784376, |
| "learning_rate": 2.618198999041447e-06, |
| "loss": 0.5538, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.012868194526242, |
| "learning_rate": 2.609489973416118e-06, |
| "loss": 0.6014, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.0313320710830274, |
| "learning_rate": 2.600790339218926e-06, |
| "loss": 0.5784, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.1398171593710185, |
| "learning_rate": 2.5921001306275116e-06, |
| "loss": 0.5516, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.120355813263771, |
| "learning_rate": 2.5834193817824865e-06, |
| "loss": 0.5909, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.030214931297693, |
| "learning_rate": 2.5747481267872925e-06, |
| "loss": 0.5592, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.9767602094754053, |
| "learning_rate": 2.5660863997080808e-06, |
| "loss": 0.5503, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.9483830769279278, |
| "learning_rate": 2.557434234573565e-06, |
| "loss": 0.5671, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.9276456895969436, |
| "learning_rate": 2.548791665374898e-06, |
| "loss": 0.5127, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.8090439396291618, |
| "learning_rate": 2.540158726065532e-06, |
| "loss": 0.5713, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 2.320785088443513, |
| "learning_rate": 2.5315354505610847e-06, |
| "loss": 0.6488, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.9982757056307234, |
| "learning_rate": 2.522921872739211e-06, |
| "loss": 0.5425, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.9334400895537176, |
| "learning_rate": 2.514318026439469e-06, |
| "loss": 0.6033, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.027430128222646, |
| "learning_rate": 2.50572394546318e-06, |
| "loss": 0.5551, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.212527434238601, |
| "learning_rate": 2.4971396635733043e-06, |
| "loss": 0.6576, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.047142954880681, |
| "learning_rate": 2.488565214494307e-06, |
| "loss": 0.6133, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.855345806040437, |
| "learning_rate": 2.480000631912018e-06, |
| "loss": 0.5198, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.1075238338772895, |
| "learning_rate": 2.471445949473512e-06, |
| "loss": 0.5667, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.9673428615144855, |
| "learning_rate": 2.4629012007869634e-06, |
| "loss": 0.5715, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.2624688260095382, |
| "learning_rate": 2.4543664194215272e-06, |
| "loss": 0.7673, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.0285948397058626, |
| "learning_rate": 2.445841638907194e-06, |
| "loss": 0.5768, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.088805134401383, |
| "learning_rate": 2.4373268927346678e-06, |
| "loss": 0.5607, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.7787754535359048, |
| "learning_rate": 2.428822214355235e-06, |
| "loss": 0.5723, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.149439034712146, |
| "learning_rate": 2.4203276371806206e-06, |
| "loss": 0.6358, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.8509674900388537, |
| "learning_rate": 2.4118431945828757e-06, |
| "loss": 0.5393, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.8927674268591554, |
| "learning_rate": 2.4033689198942272e-06, |
| "loss": 0.5846, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.9917147049157173, |
| "learning_rate": 2.394904846406964e-06, |
| "loss": 0.6189, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.9129347000522996, |
| "learning_rate": 2.3864510073732914e-06, |
| "loss": 0.5045, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 2.050425773481486, |
| "learning_rate": 2.378007436005214e-06, |
| "loss": 0.5873, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.9719631201639327, |
| "learning_rate": 2.3695741654743913e-06, |
| "loss": 0.5375, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 2.0214460337530946, |
| "learning_rate": 2.3611512289120208e-06, |
| "loss": 0.5548, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.8042574192263385, |
| "learning_rate": 2.3527386594087003e-06, |
| "loss": 0.5189, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.9600273923574374, |
| "learning_rate": 2.344336490014295e-06, |
| "loss": 0.5378, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.9432405793477807, |
| "learning_rate": 2.3359447537378173e-06, |
| "loss": 0.5354, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 2.04898553718917, |
| "learning_rate": 2.3275634835472914e-06, |
| "loss": 0.6216, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.9391818082210701, |
| "learning_rate": 2.3191927123696185e-06, |
| "loss": 0.5523, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.9965210994972027, |
| "learning_rate": 2.3108324730904584e-06, |
| "loss": 0.5929, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.8480997829156387, |
| "learning_rate": 2.302482798554096e-06, |
| "loss": 0.5467, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.8430449159542786, |
| "learning_rate": 2.2941437215633043e-06, |
| "loss": 0.5267, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 2.0808145765908543, |
| "learning_rate": 2.2858152748792316e-06, |
| "loss": 0.6113, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.8218623081262797, |
| "learning_rate": 2.277497491221255e-06, |
| "loss": 0.4938, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.9041064505829883, |
| "learning_rate": 2.269190403266866e-06, |
| "loss": 0.5633, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.652734879699611, |
| "learning_rate": 2.260894043651537e-06, |
| "loss": 0.5735, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 2.2538231832723445, |
| "learning_rate": 2.2526084449685876e-06, |
| "loss": 0.6128, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.7960651904147913, |
| "learning_rate": 2.244333639769066e-06, |
| "loss": 0.4856, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.9343813745783291, |
| "learning_rate": 2.236069660561619e-06, |
| "loss": 0.5552, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.0101528659989025, |
| "learning_rate": 2.2278165398123538e-06, |
| "loss": 0.5589, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.1101574118483826, |
| "learning_rate": 2.2195743099447257e-06, |
| "loss": 0.5837, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.7157523251139841, |
| "learning_rate": 2.211343003339405e-06, |
| "loss": 0.4769, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.7800599491375297, |
| "learning_rate": 2.203122652334141e-06, |
| "loss": 0.5251, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.9387781774592656, |
| "learning_rate": 2.1949132892236495e-06, |
| "loss": 0.5669, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.9580346237978052, |
| "learning_rate": 2.1867149462594745e-06, |
| "loss": 0.6192, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.8050769261944541, |
| "learning_rate": 2.178527655649868e-06, |
| "loss": 0.5353, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.028704396965185, |
| "learning_rate": 2.1703514495596643e-06, |
| "loss": 0.565, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.8852797512232322, |
| "learning_rate": 2.1621863601101434e-06, |
| "loss": 0.4691, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.0307600694751566, |
| "learning_rate": 2.1540324193789177e-06, |
| "loss": 0.6075, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.0635630868289274, |
| "learning_rate": 2.145889659399801e-06, |
| "loss": 0.5713, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.8359324107226915, |
| "learning_rate": 2.137758112162678e-06, |
| "loss": 0.5419, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.9327432699880955, |
| "learning_rate": 2.1296378096133863e-06, |
| "loss": 0.5219, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.9646016130657808, |
| "learning_rate": 2.1215287836535836e-06, |
| "loss": 0.5865, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 2.053237316493875, |
| "learning_rate": 2.1134310661406293e-06, |
| "loss": 0.5495, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.9224877080515406, |
| "learning_rate": 2.1053446888874575e-06, |
| "loss": 0.57, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 2.0094429579360296, |
| "learning_rate": 2.097269683662444e-06, |
| "loss": 0.5966, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 2.214480865585653, |
| "learning_rate": 2.089206082189294e-06, |
| "loss": 0.6409, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 2.0300256495896516, |
| "learning_rate": 2.0811539161469126e-06, |
| "loss": 0.5318, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.9501820223073412, |
| "learning_rate": 2.073113217169272e-06, |
| "loss": 0.5289, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 2.073955162988734, |
| "learning_rate": 2.065084016845301e-06, |
| "loss": 0.6114, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 2.2617011183013505, |
| "learning_rate": 2.0570663467187556e-06, |
| "loss": 0.692, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.9412567792801219, |
| "learning_rate": 2.049060238288086e-06, |
| "loss": 0.5781, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.9161885318665781, |
| "learning_rate": 2.0410657230063304e-06, |
| "loss": 0.4698, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.9595900836322337, |
| "learning_rate": 2.0330828322809727e-06, |
| "loss": 0.5868, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.860761169455946, |
| "learning_rate": 2.025111597473836e-06, |
| "loss": 0.5014, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.9607972138295733, |
| "learning_rate": 2.0171520499009457e-06, |
| "loss": 0.5398, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.8911857797620755, |
| "learning_rate": 2.009204220832418e-06, |
| "loss": 0.5382, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 2.149350473430931, |
| "learning_rate": 2.0012681414923254e-06, |
| "loss": 0.5554, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.8778243908792742, |
| "learning_rate": 1.993343843058585e-06, |
| "loss": 0.5085, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.9294134892146952, |
| "learning_rate": 1.9854313566628273e-06, |
| "loss": 0.5678, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.9503939087047788, |
| "learning_rate": 1.977530713390281e-06, |
| "loss": 0.5656, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.092855618928881, |
| "learning_rate": 1.9696419442796474e-06, |
| "loss": 0.5589, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.0558632332328366, |
| "learning_rate": 1.9617650803229736e-06, |
| "loss": 0.565, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.8074897396978709, |
| "learning_rate": 1.953900152465544e-06, |
| "loss": 0.5278, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.9979141137083714, |
| "learning_rate": 1.9460471916057415e-06, |
| "loss": 0.542, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.031624720114998, |
| "learning_rate": 1.9382062285949416e-06, |
| "loss": 0.4827, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.109236104508361, |
| "learning_rate": 1.9303772942373846e-06, |
| "loss": 0.5567, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.2956266675459576, |
| "learning_rate": 1.9225604192900488e-06, |
| "loss": 0.6067, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.9743192317750047, |
| "learning_rate": 1.914755634462542e-06, |
| "loss": 0.4976, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.8028667562352623, |
| "learning_rate": 1.9069629704169723e-06, |
| "loss": 0.509, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.7362668980411815, |
| "learning_rate": 1.8991824577678269e-06, |
| "loss": 0.5544, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.0285534879494715, |
| "learning_rate": 1.8914141270818593e-06, |
| "loss": 0.4984, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.046266381772136, |
| "learning_rate": 1.8836580088779628e-06, |
| "loss": 0.59, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.9988343414891951, |
| "learning_rate": 1.8759141336270486e-06, |
| "loss": 0.5491, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.0240438299435968, |
| "learning_rate": 1.868182531751938e-06, |
| "loss": 0.5816, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 2.1594937237415226, |
| "learning_rate": 1.8604632336272249e-06, |
| "loss": 0.5865, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 2.0666115681553032, |
| "learning_rate": 1.8527562695791746e-06, |
| "loss": 0.5231, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 2.0866219869375064, |
| "learning_rate": 1.8450616698855938e-06, |
| "loss": 0.5465, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.9640838158081952, |
| "learning_rate": 1.8373794647757105e-06, |
| "loss": 0.5484, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.8827641315498593, |
| "learning_rate": 1.8297096844300638e-06, |
| "loss": 0.5447, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.9270241151498686, |
| "learning_rate": 1.8220523589803808e-06, |
| "loss": 0.5148, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.8983158181788506, |
| "learning_rate": 1.8144075185094523e-06, |
| "loss": 0.5089, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.9391982904773535, |
| "learning_rate": 1.8067751930510258e-06, |
| "loss": 0.6062, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.8572456201220782, |
| "learning_rate": 1.799155412589681e-06, |
| "loss": 0.4707, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 2.180002124115602, |
| "learning_rate": 1.7915482070607094e-06, |
| "loss": 0.597, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.8462663478460364, |
| "learning_rate": 1.783953606350005e-06, |
| "loss": 0.5577, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.889638886458579, |
| "learning_rate": 1.7763716402939385e-06, |
| "loss": 0.519, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.726660341693367, |
| "learning_rate": 1.7688023386792452e-06, |
| "loss": 0.4718, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 2.2128024274172655, |
| "learning_rate": 1.7612457312429093e-06, |
| "loss": 0.6105, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.8977444616718881, |
| "learning_rate": 1.7537018476720369e-06, |
| "loss": 0.5442, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.7605812625109447, |
| "learning_rate": 1.7461707176037546e-06, |
| "loss": 0.4897, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 2.1962948774783655, |
| "learning_rate": 1.738652370625082e-06, |
| "loss": 0.5795, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.8753753975602796, |
| "learning_rate": 1.7311468362728163e-06, |
| "loss": 0.5267, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.2401308043999877, |
| "learning_rate": 1.723654144033422e-06, |
| "loss": 0.5422, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.360182834014352, |
| "learning_rate": 1.7161743233429123e-06, |
| "loss": 0.5932, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.1509906603775675, |
| "learning_rate": 1.7087074035867284e-06, |
| "loss": 0.5336, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.2167606816262215, |
| "learning_rate": 1.7012534140996351e-06, |
| "loss": 0.6204, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.2516401026227193, |
| "learning_rate": 1.69381238416559e-06, |
| "loss": 0.6229, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.040572811584935, |
| "learning_rate": 1.6863843430176464e-06, |
| "loss": 0.5554, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.001470306966103, |
| "learning_rate": 1.6789693198378254e-06, |
| "loss": 0.5494, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.0804007666629434, |
| "learning_rate": 1.6715673437570035e-06, |
| "loss": 0.6031, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.017960337685253, |
| "learning_rate": 1.6641784438548048e-06, |
| "loss": 0.5567, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.084312076747243, |
| "learning_rate": 1.6568026491594763e-06, |
| "loss": 0.5529, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.016310638065491, |
| "learning_rate": 1.6494399886477859e-06, |
| "loss": 0.5525, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.8351394186785017, |
| "learning_rate": 1.6420904912448942e-06, |
| "loss": 0.5631, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.978522191746191, |
| "learning_rate": 1.634754185824256e-06, |
| "loss": 0.5075, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 2.223631750342603, |
| "learning_rate": 1.6274311012074984e-06, |
| "loss": 0.6659, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.9821415990981424, |
| "learning_rate": 1.6201212661643045e-06, |
| "loss": 0.5744, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.9744423033984106, |
| "learning_rate": 1.61282470941231e-06, |
| "loss": 0.6117, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.065211898764052, |
| "learning_rate": 1.6055414596169806e-06, |
| "loss": 0.5691, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.98605861724129, |
| "learning_rate": 1.5982715453915082e-06, |
| "loss": 0.4985, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.04457416568264, |
| "learning_rate": 1.5910149952966898e-06, |
| "loss": 0.5538, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.0121702166230895, |
| "learning_rate": 1.583771837840823e-06, |
| "loss": 0.5658, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.0092606234614694, |
| "learning_rate": 1.5765421014795911e-06, |
| "loss": 0.5113, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.8935411378036877, |
| "learning_rate": 1.569325814615947e-06, |
| "loss": 0.507, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.34585820804892, |
| "learning_rate": 1.562123005600009e-06, |
| "loss": 0.5769, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.1528289643549234, |
| "learning_rate": 1.5549337027289468e-06, |
| "loss": 0.5501, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.8301608372784568, |
| "learning_rate": 1.5477579342468634e-06, |
| "loss": 0.5208, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.176141189999809, |
| "learning_rate": 1.5405957283446987e-06, |
| "loss": 0.6609, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.0473489032027565, |
| "learning_rate": 1.5334471131601025e-06, |
| "loss": 0.5715, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.1029826913605647, |
| "learning_rate": 1.526312116777336e-06, |
| "loss": 0.4786, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.9871418869111652, |
| "learning_rate": 1.5191907672271582e-06, |
| "loss": 0.4602, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.9159131882394276, |
| "learning_rate": 1.5120830924867098e-06, |
| "loss": 0.508, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 2.282250060067885, |
| "learning_rate": 1.5049891204794125e-06, |
| "loss": 0.5567, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.9230524058397154, |
| "learning_rate": 1.4979088790748553e-06, |
| "loss": 0.5514, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.036428797678635, |
| "learning_rate": 1.4908423960886808e-06, |
| "loss": 0.5909, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.1397315181807506, |
| "learning_rate": 1.4837896992824835e-06, |
| "loss": 0.6168, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.197275580787461, |
| "learning_rate": 1.4767508163636968e-06, |
| "loss": 0.5636, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.9450184678272302, |
| "learning_rate": 1.4697257749854815e-06, |
| "loss": 0.5576, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.708185896072239, |
| "learning_rate": 1.4627146027466248e-06, |
| "loss": 0.5048, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.931851292310904, |
| "learning_rate": 1.4557173271914216e-06, |
| "loss": 0.6003, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.7739213428365466, |
| "learning_rate": 1.4487339758095758e-06, |
| "loss": 0.4847, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.065334796741025, |
| "learning_rate": 1.4417645760360899e-06, |
| "loss": 0.4995, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.9060500200669357, |
| "learning_rate": 1.4348091552511496e-06, |
| "loss": 0.4772, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.8926362460364048, |
| "learning_rate": 1.427867740780028e-06, |
| "loss": 0.4678, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.230768332923383, |
| "learning_rate": 1.4209403598929711e-06, |
| "loss": 0.5556, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.2444577467361078, |
| "learning_rate": 1.4140270398050899e-06, |
| "loss": 0.6313, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.0449648982479385, |
| "learning_rate": 1.407127807676259e-06, |
| "loss": 0.5457, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.0517370766830707, |
| "learning_rate": 1.4002426906110034e-06, |
| "loss": 0.539, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.0847967027851375, |
| "learning_rate": 1.3933717156583975e-06, |
| "loss": 0.5256, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.1822442085414386, |
| "learning_rate": 1.386514909811958e-06, |
| "loss": 0.5648, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 2.01409537100995, |
| "learning_rate": 1.3796723000095312e-06, |
| "loss": 0.5878, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.2139379142084357, |
| "learning_rate": 1.3728439131331972e-06, |
| "loss": 0.5724, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.022116537133516, |
| "learning_rate": 1.366029776009159e-06, |
| "loss": 0.5686, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.3703607571589265, |
| "learning_rate": 1.3592299154076344e-06, |
| "loss": 0.676, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.1435846797265317, |
| "learning_rate": 1.3524443580427565e-06, |
| "loss": 0.6176, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.9476892823094056, |
| "learning_rate": 1.3456731305724685e-06, |
| "loss": 0.5245, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.7780892199670588, |
| "learning_rate": 1.3389162595984106e-06, |
| "loss": 0.4913, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.147087917412656, |
| "learning_rate": 1.3321737716658284e-06, |
| "loss": 0.5712, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.9396671940173766, |
| "learning_rate": 1.3254456932634557e-06, |
| "loss": 0.5236, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.105081861677922, |
| "learning_rate": 1.3187320508234208e-06, |
| "loss": 0.528, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.119521541148906, |
| "learning_rate": 1.3120328707211394e-06, |
| "loss": 0.5511, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.9411630633184176, |
| "learning_rate": 1.3053481792752044e-06, |
| "loss": 0.5692, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.13953797890098, |
| "learning_rate": 1.298678002747294e-06, |
| "loss": 0.6083, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.696024362037954, |
| "learning_rate": 1.2920223673420584e-06, |
| "loss": 0.4515, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.8867892952085517, |
| "learning_rate": 1.285381299207026e-06, |
| "loss": 0.5367, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.052265073795798, |
| "learning_rate": 1.2787548244324888e-06, |
| "loss": 0.6345, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.9697590487372725, |
| "learning_rate": 1.2721429690514142e-06, |
| "loss": 0.5131, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.950928985112725, |
| "learning_rate": 1.26554575903933e-06, |
| "loss": 0.5065, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 2.040285812654646, |
| "learning_rate": 1.2589632203142316e-06, |
| "loss": 0.6118, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 2.0858975965094095, |
| "learning_rate": 1.2523953787364723e-06, |
| "loss": 0.5986, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.96463477151841, |
| "learning_rate": 1.24584226010867e-06, |
| "loss": 0.5598, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 2.1145952141868434, |
| "learning_rate": 1.2393038901756e-06, |
| "loss": 0.5922, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.8968425608898443, |
| "learning_rate": 1.232780294624093e-06, |
| "loss": 0.5095, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 2.0301980708684866, |
| "learning_rate": 1.22627149908294e-06, |
| "loss": 0.5498, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.8270409790198994, |
| "learning_rate": 1.2197775291227887e-06, |
| "loss": 0.4714, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 2.0963357615007703, |
| "learning_rate": 1.2132984102560374e-06, |
| "loss": 0.6149, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.9801877339079816, |
| "learning_rate": 1.2068341679367452e-06, |
| "loss": 0.5337, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.980282736079062, |
| "learning_rate": 1.2003848275605263e-06, |
| "loss": 0.5857, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.835028289058003, |
| "learning_rate": 1.1939504144644464e-06, |
| "loss": 0.5959, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 2.120379341814735, |
| "learning_rate": 1.1875309539269332e-06, |
| "loss": 0.5015, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.93303989363275, |
| "learning_rate": 1.1811264711676661e-06, |
| "loss": 0.5125, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.973773020853456, |
| "learning_rate": 1.1747369913474866e-06, |
| "loss": 0.5864, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 2.0447822785351994, |
| "learning_rate": 1.1683625395682935e-06, |
| "loss": 0.572, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.9251270954167108, |
| "learning_rate": 1.1620031408729443e-06, |
| "loss": 0.5745, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.881635576782646, |
| "learning_rate": 1.1556588202451613e-06, |
| "loss": 0.4638, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.7896548110439616, |
| "learning_rate": 1.1493296026094302e-06, |
| "loss": 0.5252, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.9326908202122164, |
| "learning_rate": 1.1430155128309e-06, |
| "loss": 0.4933, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.9844579045400434, |
| "learning_rate": 1.1367165757152905e-06, |
| "loss": 0.5393, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.0830711516317573, |
| "learning_rate": 1.1304328160087935e-06, |
| "loss": 0.6165, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.0820587210774875, |
| "learning_rate": 1.12416425839797e-06, |
| "loss": 0.5735, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.9942738518131777, |
| "learning_rate": 1.1179109275096628e-06, |
| "loss": 0.5331, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.09146135494079, |
| "learning_rate": 1.1116728479108884e-06, |
| "loss": 0.4912, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.0181223861753685, |
| "learning_rate": 1.105450044108753e-06, |
| "loss": 0.5767, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.7770312565830746, |
| "learning_rate": 1.099242540550347e-06, |
| "loss": 0.5222, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.9504760795862741, |
| "learning_rate": 1.0930503616226495e-06, |
| "loss": 0.605, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.8039286890109292, |
| "learning_rate": 1.0868735316524387e-06, |
| "loss": 0.439, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.8781587995004858, |
| "learning_rate": 1.0807120749061923e-06, |
| "loss": 0.4785, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.1386175502459466, |
| "learning_rate": 1.0745660155899878e-06, |
| "loss": 0.6047, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.0105202575538255, |
| "learning_rate": 1.0684353778494166e-06, |
| "loss": 0.6412, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 2.110747721557068, |
| "learning_rate": 1.0623201857694837e-06, |
| "loss": 0.5084, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.0266382461097576, |
| "learning_rate": 1.056220463374511e-06, |
| "loss": 0.5513, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.941131403268958, |
| "learning_rate": 1.0501362346280492e-06, |
| "loss": 0.5362, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.179036766615289, |
| "learning_rate": 1.0440675234327774e-06, |
| "loss": 0.5566, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.9388634477990079, |
| "learning_rate": 1.0380143536304133e-06, |
| "loss": 0.5316, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.9303281784663635, |
| "learning_rate": 1.0319767490016196e-06, |
| "loss": 0.5194, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.3880703426336356, |
| "learning_rate": 1.0259547332659065e-06, |
| "loss": 0.7486, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.9926065703927103, |
| "learning_rate": 1.0199483300815421e-06, |
| "loss": 0.527, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.9341158574219506, |
| "learning_rate": 1.0139575630454618e-06, |
| "loss": 0.5403, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.8567155488677838, |
| "learning_rate": 1.0079824556931655e-06, |
| "loss": 0.548, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.0404242620951436, |
| "learning_rate": 1.0020230314986395e-06, |
| "loss": 0.498, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.0107577837327457, |
| "learning_rate": 9.960793138742503e-07, |
| "loss": 0.58, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.764122033295333, |
| "learning_rate": 9.901513261706652e-07, |
| "loss": 0.4909, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.1074644373274225, |
| "learning_rate": 9.84239091676748e-07, |
| "loss": 0.5358, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.8835155965933714, |
| "learning_rate": 9.783426336194807e-07, |
| "loss": 0.5683, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.9460151844862306, |
| "learning_rate": 9.724619751638598e-07, |
| "loss": 0.5901, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.9960692765948507, |
| "learning_rate": 9.665971394128137e-07, |
| "loss": 0.5299, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.904429551506709, |
| "learning_rate": 9.607481494071107e-07, |
| "loss": 0.5077, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.0807551324391618, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.4932, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.880996194999353, |
| "learning_rate": 9.490977984834454e-07, |
| "loss": 0.5256, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.292702889415888, |
| "learning_rate": 9.432964833353947e-07, |
| "loss": 0.5633, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.8157124323149034, |
| "learning_rate": 9.375111054723301e-07, |
| "loss": 0.5443, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.997225759296561, |
| "learning_rate": 9.317416876228591e-07, |
| "loss": 0.6053, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.2930834747649187, |
| "learning_rate": 9.259882524528835e-07, |
| "loss": 0.647, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.7855930884686897, |
| "learning_rate": 9.202508225655216e-07, |
| "loss": 0.4861, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.041666912419482, |
| "learning_rate": 9.145294205010058e-07, |
| "loss": 0.5105, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.1259688035148496, |
| "learning_rate": 9.088240687366073e-07, |
| "loss": 0.6038, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.0323306545521436, |
| "learning_rate": 9.0313478968654e-07, |
| "loss": 0.5853, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.044128726826921, |
| "learning_rate": 8.974616057018709e-07, |
| "loss": 0.5153, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.944192337336169, |
| "learning_rate": 8.918045390704383e-07, |
| "loss": 0.5475, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.062636307328745, |
| "learning_rate": 8.861636120167632e-07, |
| "loss": 0.5959, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.1486851598365946, |
| "learning_rate": 8.805388467019549e-07, |
| "loss": 0.5959, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 2.2040420193997483, |
| "learning_rate": 8.749302652236341e-07, |
| "loss": 0.6322, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.20406327847528, |
| "learning_rate": 8.693378896158377e-07, |
| "loss": 0.6114, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.1795279212664886, |
| "learning_rate": 8.637617418489386e-07, |
| "loss": 0.5828, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.041352967900095, |
| "learning_rate": 8.582018438295553e-07, |
| "loss": 0.5139, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.0047043727475167, |
| "learning_rate": 8.52658217400466e-07, |
| "loss": 0.5492, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.9340395107266044, |
| "learning_rate": 8.471308843405252e-07, |
| "loss": 0.5404, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.9443767009288708, |
| "learning_rate": 8.416198663645775e-07, |
| "loss": 0.6145, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.0047578158470385, |
| "learning_rate": 8.361251851233687e-07, |
| "loss": 0.5147, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.9775796297395034, |
| "learning_rate": 8.306468622034663e-07, |
| "loss": 0.4914, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.0350740385108783, |
| "learning_rate": 8.251849191271727e-07, |
| "loss": 0.5988, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.810070925909079, |
| "learning_rate": 8.197393773524359e-07, |
| "loss": 0.4841, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.9563215783299615, |
| "learning_rate": 8.143102582727741e-07, |
| "loss": 0.5356, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.8199362899555016, |
| "learning_rate": 8.088975832171819e-07, |
| "loss": 0.4712, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.309704865979389, |
| "learning_rate": 8.035013734500557e-07, |
| "loss": 0.6218, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.0272627809313923, |
| "learning_rate": 7.981216501711053e-07, |
| "loss": 0.5838, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.0114764889056613, |
| "learning_rate": 7.927584345152672e-07, |
| "loss": 0.5609, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.9745269009451916, |
| "learning_rate": 7.874117475526305e-07, |
| "loss": 0.4989, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 2.253717109150984, |
| "learning_rate": 7.820816102883477e-07, |
| "loss": 0.6223, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 2.098840934097801, |
| "learning_rate": 7.767680436625513e-07, |
| "loss": 0.5429, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.9010834528450948, |
| "learning_rate": 7.714710685502764e-07, |
| "loss": 0.5055, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 2.0272090911880176, |
| "learning_rate": 7.661907057613766e-07, |
| "loss": 0.5749, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 2.1091229605800645, |
| "learning_rate": 7.609269760404392e-07, |
| "loss": 0.5019, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.9509717065137873, |
| "learning_rate": 7.556799000667097e-07, |
| "loss": 0.4808, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.873704324767879, |
| "learning_rate": 7.504494984540033e-07, |
| "loss": 0.4928, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.987007022534763, |
| "learning_rate": 7.452357917506309e-07, |
| "loss": 0.5312, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 2.1167223759818503, |
| "learning_rate": 7.40038800439315e-07, |
| "loss": 0.5532, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.8820691821813884, |
| "learning_rate": 7.348585449371076e-07, |
| "loss": 0.5615, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.9800556013383923, |
| "learning_rate": 7.296950455953145e-07, |
| "loss": 0.5546, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 2.278698097867691, |
| "learning_rate": 7.245483226994094e-07, |
| "loss": 0.6373, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.8404133278696633, |
| "learning_rate": 7.19418396468961e-07, |
| "loss": 0.5212, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 2.082478736477018, |
| "learning_rate": 7.14305287057549e-07, |
| "loss": 0.5405, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 2.274543877989927, |
| "learning_rate": 7.092090145526842e-07, |
| "loss": 0.5788, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 2.1975775615791284, |
| "learning_rate": 7.041295989757352e-07, |
| "loss": 0.5705, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 2.3957382130192744, |
| "learning_rate": 6.990670602818412e-07, |
| "loss": 0.5319, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.9572975655214617, |
| "learning_rate": 6.940214183598431e-07, |
| "loss": 0.4977, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.0343363601457796, |
| "learning_rate": 6.889926930321961e-07, |
| "loss": 0.5601, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.2173297633145497, |
| "learning_rate": 6.839809040549017e-07, |
| "loss": 0.5652, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.9898806046683502, |
| "learning_rate": 6.789860711174184e-07, |
| "loss": 0.5604, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.755937949976552, |
| "learning_rate": 6.740082138425963e-07, |
| "loss": 0.5268, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.059314359247024, |
| "learning_rate": 6.690473517865925e-07, |
| "loss": 0.5516, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.921092025716401, |
| "learning_rate": 6.641035044387939e-07, |
| "loss": 0.5282, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.1611589441440904, |
| "learning_rate": 6.591766912217456e-07, |
| "loss": 0.5721, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.07998470722096, |
| "learning_rate": 6.542669314910732e-07, |
| "loss": 0.616, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.8276327443296445, |
| "learning_rate": 6.493742445354012e-07, |
| "loss": 0.4733, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.0470888106096568, |
| "learning_rate": 6.44498649576285e-07, |
| "loss": 0.6115, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.930085414769607, |
| "learning_rate": 6.39640165768129e-07, |
| "loss": 0.5524, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.992132029715179, |
| "learning_rate": 6.347988121981175e-07, |
| "loss": 0.5116, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.9749787878274394, |
| "learning_rate": 6.299746078861346e-07, |
| "loss": 0.5243, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.1338239619198003, |
| "learning_rate": 6.251675717846905e-07, |
| "loss": 0.6601, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.9037772081403355, |
| "learning_rate": 6.203777227788493e-07, |
| "loss": 0.537, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.982734111031249, |
| "learning_rate": 6.156050796861551e-07, |
| "loss": 0.5447, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.9647030662707663, |
| "learning_rate": 6.108496612565507e-07, |
| "loss": 0.5572, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.20503978552407, |
| "learning_rate": 6.061114861723144e-07, |
| "loss": 0.5847, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.8985370142150249, |
| "learning_rate": 6.013905730479824e-07, |
| "loss": 0.5245, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.0108479395190204, |
| "learning_rate": 5.966869404302705e-07, |
| "loss": 0.4869, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.6778892025630097, |
| "learning_rate": 5.920006067980105e-07, |
| "loss": 0.4713, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.9622392449880077, |
| "learning_rate": 5.873315905620685e-07, |
| "loss": 0.5619, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.8986949233718315, |
| "learning_rate": 5.826799100652802e-07, |
| "loss": 0.5944, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.0094350555686726, |
| "learning_rate": 5.780455835823767e-07, |
| "loss": 0.6029, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.01549443606308, |
| "learning_rate": 5.734286293199065e-07, |
| "loss": 0.5168, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.9026568713715968, |
| "learning_rate": 5.688290654161738e-07, |
| "loss": 0.4661, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.090067485392997, |
| "learning_rate": 5.642469099411619e-07, |
| "loss": 0.5773, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.041801412484887, |
| "learning_rate": 5.596821808964592e-07, |
| "loss": 0.5174, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.8047865906862048, |
| "learning_rate": 5.551348962151965e-07, |
| "loss": 0.5096, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.7749522813672998, |
| "learning_rate": 5.506050737619706e-07, |
| "loss": 0.4149, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.9669167109351449, |
| "learning_rate": 5.460927313327746e-07, |
| "loss": 0.5318, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 2.0851119947571397, |
| "learning_rate": 5.415978866549309e-07, |
| "loss": 0.5206, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.770305337825321, |
| "learning_rate": 5.371205573870169e-07, |
| "loss": 0.5146, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 2.0529803134924793, |
| "learning_rate": 5.326607611188023e-07, |
| "loss": 0.5925, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.8408476413034762, |
| "learning_rate": 5.282185153711739e-07, |
| "loss": 0.5419, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 2.1277879535451887, |
| "learning_rate": 5.237938375960683e-07, |
| "loss": 0.5522, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.798009767692874, |
| "learning_rate": 5.19386745176405e-07, |
| "loss": 0.4908, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.9218825894337166, |
| "learning_rate": 5.149972554260191e-07, |
| "loss": 0.5907, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.7135646036349135, |
| "learning_rate": 5.106253855895865e-07, |
| "loss": 0.5325, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 2.0051336782599916, |
| "learning_rate": 5.062711528425657e-07, |
| "loss": 0.552, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.823540052411061, |
| "learning_rate": 5.019345742911241e-07, |
| "loss": 0.5279, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.9498421802995456, |
| "learning_rate": 4.976156669720706e-07, |
| "loss": 0.4684, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 2.0510089396163464, |
| "learning_rate": 4.933144478527929e-07, |
| "loss": 0.5733, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.718921166851957, |
| "learning_rate": 4.890309338311861e-07, |
| "loss": 0.4503, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 2.066841848267666, |
| "learning_rate": 4.847651417355914e-07, |
| "loss": 0.5523, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.935126073825529, |
| "learning_rate": 4.805170883247228e-07, |
| "loss": 0.5709, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.7529161435960845, |
| "learning_rate": 4.7628679028761114e-07, |
| "loss": 0.4784, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.9418531589165946, |
| "learning_rate": 4.720742642435272e-07, |
| "loss": 0.5417, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.256822591186601, |
| "learning_rate": 4.678795267419267e-07, |
| "loss": 0.5787, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.7848727741194108, |
| "learning_rate": 4.63702594262378e-07, |
| "loss": 0.4068, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.1119584563313314, |
| "learning_rate": 4.595434832145013e-07, |
| "loss": 0.6635, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.047030257561458, |
| "learning_rate": 4.554022099379035e-07, |
| "loss": 0.5171, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.9273060718802395, |
| "learning_rate": 4.5127879070211213e-07, |
| "loss": 0.5597, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.1177945388342727, |
| "learning_rate": 4.471732417065144e-07, |
| "loss": 0.5861, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.1663172290104002, |
| "learning_rate": 4.430855790802896e-07, |
| "loss": 0.5851, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.1902483639800887, |
| "learning_rate": 4.3901581888235067e-07, |
| "loss": 0.5485, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.27940192829255, |
| "learning_rate": 4.3496397710127756e-07, |
| "loss": 0.5683, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.6787266853728975, |
| "learning_rate": 4.3093006965525483e-07, |
| "loss": 0.4487, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.2173461979846554, |
| "learning_rate": 4.2691411239201007e-07, |
| "loss": 0.6181, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.0362219497157663, |
| "learning_rate": 4.2291612108875226e-07, |
| "loss": 0.5827, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.984683976482598, |
| "learning_rate": 4.189361114521062e-07, |
| "loss": 0.5687, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.010835056818883, |
| "learning_rate": 4.149740991180573e-07, |
| "loss": 0.5484, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.113774248995943, |
| "learning_rate": 4.1103009965188125e-07, |
| "loss": 0.598, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 2.0151775021994385, |
| "learning_rate": 4.0710412854809255e-07, |
| "loss": 0.4896, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.9514989153137001, |
| "learning_rate": 4.0319620123037697e-07, |
| "loss": 0.5659, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.07614867510403, |
| "learning_rate": 3.9930633305153177e-07, |
| "loss": 0.4641, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.2405590611515933, |
| "learning_rate": 3.9543453929340834e-07, |
| "loss": 0.5112, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.14510497102877, |
| "learning_rate": 3.9158083516685043e-07, |
| "loss": 0.6867, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.8206597677309004, |
| "learning_rate": 3.8774523581163236e-07, |
| "loss": 0.5024, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.9730244343440717, |
| "learning_rate": 3.8392775629640275e-07, |
| "loss": 0.6115, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.0113186406624677, |
| "learning_rate": 3.80128411618626e-07, |
| "loss": 0.5308, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.291565453318076, |
| "learning_rate": 3.763472167045179e-07, |
| "loss": 0.5849, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.865162322112732, |
| "learning_rate": 3.72584186408993e-07, |
| "loss": 0.4345, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.7806466246249277, |
| "learning_rate": 3.688393355156022e-07, |
| "loss": 0.4976, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.837932154589643, |
| "learning_rate": 3.6511267873647725e-07, |
| "loss": 0.5382, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.900042396349841, |
| "learning_rate": 3.614042307122728e-07, |
| "loss": 0.5135, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.0377705675722435, |
| "learning_rate": 3.577140060121059e-07, |
| "loss": 0.6439, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.9812798873664048, |
| "learning_rate": 3.54042019133502e-07, |
| "loss": 0.518, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.9801105899864375, |
| "learning_rate": 3.5038828450233874e-07, |
| "loss": 0.5513, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.787558486401592, |
| "learning_rate": 3.4675281647278346e-07, |
| "loss": 0.4717, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.7350169337556531, |
| "learning_rate": 3.431356293272442e-07, |
| "loss": 0.4517, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.9036391339274414, |
| "learning_rate": 3.395367372763092e-07, |
| "loss": 0.4952, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.1326612564420078, |
| "learning_rate": 3.3595615445869033e-07, |
| "loss": 0.665, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.9766629375982392, |
| "learning_rate": 3.3239389494117316e-07, |
| "loss": 0.4712, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.085629103255329, |
| "learning_rate": 3.288499727185529e-07, |
| "loss": 0.5991, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.9952308886565442, |
| "learning_rate": 3.253244017135876e-07, |
| "loss": 0.5492, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.9823172713501394, |
| "learning_rate": 3.218171957769411e-07, |
| "loss": 0.5133, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.243504252708836, |
| "learning_rate": 3.183283686871236e-07, |
| "loss": 0.5375, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.160814779756097, |
| "learning_rate": 3.1485793415044483e-07, |
| "loss": 0.5441, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.1575822253594397, |
| "learning_rate": 3.1140590580095777e-07, |
| "loss": 0.5261, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.072867103693364, |
| "learning_rate": 3.079722972004007e-07, |
| "loss": 0.528, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.067717194924602, |
| "learning_rate": 3.0455712183815044e-07, |
| "loss": 0.5705, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.2018910614851053, |
| "learning_rate": 3.011603931311652e-07, |
| "loss": 0.6087, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.799733514347377, |
| "learning_rate": 2.9778212442393373e-07, |
| "loss": 0.3817, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.0297829733559096, |
| "learning_rate": 2.9442232898842184e-07, |
| "loss": 0.5627, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.1127229638235314, |
| "learning_rate": 2.910810200240205e-07, |
| "loss": 0.6539, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 2.2173549226318148, |
| "learning_rate": 2.877582106574961e-07, |
| "loss": 0.6292, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.7665892894148392, |
| "learning_rate": 2.8445391394293364e-07, |
| "loss": 0.536, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.9459724264850673, |
| "learning_rate": 2.811681428616919e-07, |
| "loss": 0.506, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.055519305865498, |
| "learning_rate": 2.779009103223473e-07, |
| "loss": 0.5743, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.927080536920677, |
| "learning_rate": 2.746522291606463e-07, |
| "loss": 0.5181, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.04096009635488, |
| "learning_rate": 2.7142211213945224e-07, |
| "loss": 0.564, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.9317429342953267, |
| "learning_rate": 2.682105719486994e-07, |
| "loss": 0.5655, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.1614143475246803, |
| "learning_rate": 2.65017621205339e-07, |
| "loss": 0.5385, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.9518158323053523, |
| "learning_rate": 2.61843272453291e-07, |
| "loss": 0.5292, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.988402222876475, |
| "learning_rate": 2.5868753816339574e-07, |
| "loss": 0.4855, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.2461460760928973, |
| "learning_rate": 2.5555043073336394e-07, |
| "loss": 0.545, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.016749712197434, |
| "learning_rate": 2.524319624877275e-07, |
| "loss": 0.5487, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.8374656172629769, |
| "learning_rate": 2.4933214567779473e-07, |
| "loss": 0.4698, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.1647972903983224, |
| "learning_rate": 2.462509924815948e-07, |
| "loss": 0.6418, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.9502593053235608, |
| "learning_rate": 2.4318851500383823e-07, |
| "loss": 0.5, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.9211282369606173, |
| "learning_rate": 2.4014472527586483e-07, |
| "loss": 0.4927, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.329177425501773, |
| "learning_rate": 2.3711963525559544e-07, |
| "loss": 0.5993, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 2.225800165473701, |
| "learning_rate": 2.3411325682748843e-07, |
| "loss": 0.6954, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 2.0522726914562703, |
| "learning_rate": 2.3112560180249154e-07, |
| "loss": 0.5618, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 2.1168420702658657, |
| "learning_rate": 2.2815668191799255e-07, |
| "loss": 0.5674, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 2.032866002096309, |
| "learning_rate": 2.2520650883777917e-07, |
| "loss": 0.5903, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.927079943417791, |
| "learning_rate": 2.222750941519869e-07, |
| "loss": 0.5379, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 2.230233104141958, |
| "learning_rate": 2.193624493770591e-07, |
| "loss": 0.5362, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.9871866256133242, |
| "learning_rate": 2.1646858595569754e-07, |
| "loss": 0.5402, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 2.0451780616189827, |
| "learning_rate": 2.135935152568186e-07, |
| "loss": 0.5671, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.9066201288251257, |
| "learning_rate": 2.107372485755105e-07, |
| "loss": 0.5467, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.797950359009013, |
| "learning_rate": 2.0789979713298714e-07, |
| "loss": 0.5164, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 2.071072453128202, |
| "learning_rate": 2.0508117207654276e-07, |
| "loss": 0.5991, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 2.317654978785418, |
| "learning_rate": 2.0228138447951128e-07, |
| "loss": 0.6293, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 2.053251641022129, |
| "learning_rate": 1.9950044534122138e-07, |
| "loss": 0.5853, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.971312933014048, |
| "learning_rate": 1.9673836558695148e-07, |
| "loss": 0.4732, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.9713907215979547, |
| "learning_rate": 1.9399515606789098e-07, |
| "loss": 0.6066, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.9568482450287366, |
| "learning_rate": 1.9127082756109138e-07, |
| "loss": 0.547, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.7163468064474119, |
| "learning_rate": 1.8856539076943126e-07, |
| "loss": 0.3999, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.9178630187217227, |
| "learning_rate": 1.858788563215702e-07, |
| "loss": 0.5042, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 2.127461173898097, |
| "learning_rate": 1.8321123477190506e-07, |
| "loss": 0.6439, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 2.231384706660567, |
| "learning_rate": 1.8056253660053258e-07, |
| "loss": 0.5077, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.9635997200687012, |
| "learning_rate": 1.7793277221320794e-07, |
| "loss": 0.5042, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 2.073065458571614, |
| "learning_rate": 1.7532195194129964e-07, |
| "loss": 0.5212, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.977411836335037, |
| "learning_rate": 1.7273008604175301e-07, |
| "loss": 0.5035, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.9442884403854808, |
| "learning_rate": 1.7015718469705066e-07, |
| "loss": 0.5649, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.9764790370004715, |
| "learning_rate": 1.6760325801516597e-07, |
| "loss": 0.5255, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.9950729966418053, |
| "learning_rate": 1.6506831602953298e-07, |
| "loss": 0.5285, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.876091437155584, |
| "learning_rate": 1.625523686989977e-07, |
| "loss": 0.4915, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 2.1698152647396465, |
| "learning_rate": 1.6005542590778521e-07, |
| "loss": 0.6394, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.9390084511570913, |
| "learning_rate": 1.5757749746546037e-07, |
| "loss": 0.5461, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 2.0186157583922135, |
| "learning_rate": 1.5511859310688326e-07, |
| "loss": 0.5515, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.908132809961028, |
| "learning_rate": 1.5267872249217997e-07, |
| "loss": 0.4557, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 2.189140445691145, |
| "learning_rate": 1.5025789520669688e-07, |
| "loss": 0.5904, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 2.1203241389390257, |
| "learning_rate": 1.4785612076096856e-07, |
| "loss": 0.5698, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.8864144078816145, |
| "learning_rate": 1.454734085906756e-07, |
| "loss": 0.5211, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 2.0640726129454867, |
| "learning_rate": 1.4310976805661237e-07, |
| "loss": 0.534, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.9593391676293495, |
| "learning_rate": 1.407652084446459e-07, |
| "loss": 0.5575, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.9956718882503213, |
| "learning_rate": 1.3843973896568275e-07, |
| "loss": 0.4995, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 2.0309484325202027, |
| "learning_rate": 1.3613336875563045e-07, |
| "loss": 0.5561, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 2.2977806815622808, |
| "learning_rate": 1.338461068753627e-07, |
| "loss": 0.6895, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.9903192519492166, |
| "learning_rate": 1.3157796231068497e-07, |
| "loss": 0.5644, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.9448808087386893, |
| "learning_rate": 1.293289439722961e-07, |
| "loss": 0.5146, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 2.115645901357327, |
| "learning_rate": 1.2709906069575561e-07, |
| "loss": 0.5702, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 2.1808080246002457, |
| "learning_rate": 1.2488832124144923e-07, |
| "loss": 0.4805, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.8228170984929375, |
| "learning_rate": 1.2269673429455287e-07, |
| "loss": 0.4851, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 2.038456785853388, |
| "learning_rate": 1.2052430846499984e-07, |
| "loss": 0.5771, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.8904101603890644, |
| "learning_rate": 1.183710522874454e-07, |
| "loss": 0.4813, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.7215684414215477, |
| "learning_rate": 1.1623697422123603e-07, |
| "loss": 0.4418, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.949814139587883, |
| "learning_rate": 1.1412208265037417e-07, |
| "loss": 0.4467, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.989698073002338, |
| "learning_rate": 1.1202638588348413e-07, |
| "loss": 0.479, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.9796631752851332, |
| "learning_rate": 1.0994989215378227e-07, |
| "loss": 0.6001, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 2.0989381422150837, |
| "learning_rate": 1.0789260961904357e-07, |
| "loss": 0.5106, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.971183598874908, |
| "learning_rate": 1.0585454636156788e-07, |
| "loss": 0.5654, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.8203580247750317, |
| "learning_rate": 1.0383571038815155e-07, |
| "loss": 0.4136, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 2.1265452345564264, |
| "learning_rate": 1.0183610963005298e-07, |
| "loss": 0.6466, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.9020416641784, |
| "learning_rate": 9.98557519429616e-08, |
| "loss": 0.5149, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.9565333994726395, |
| "learning_rate": 9.789464510697011e-08, |
| "loss": 0.6182, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 2.192339437616818, |
| "learning_rate": 9.595279682654002e-08, |
| "loss": 0.5793, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.752208514931023, |
| "learning_rate": 9.40302147304739e-08, |
| "loss": 0.4573, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.9487144688198685, |
| "learning_rate": 9.212690637188492e-08, |
| "loss": 0.5045, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 2.1355012550891095, |
| "learning_rate": 9.024287922816566e-08, |
| "loss": 0.5376, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.830404039518353, |
| "learning_rate": 8.83781407009604e-08, |
| "loss": 0.5084, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 2.0061575400005927, |
| "learning_rate": 8.653269811613685e-08, |
| "loss": 0.5405, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.9051869282056297, |
| "learning_rate": 8.4706558723755e-08, |
| "loss": 0.4705, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.970236377135854, |
| "learning_rate": 8.289972969803884e-08, |
| "loss": 0.4761, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 2.393702968269512, |
| "learning_rate": 8.111221813735137e-08, |
| "loss": 0.5913, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 2.1643166252959807, |
| "learning_rate": 7.934403106416245e-08, |
| "loss": 0.6399, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.24965238428619, |
| "learning_rate": 7.759517542502426e-08, |
| "loss": 0.5946, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.0038056323845663, |
| "learning_rate": 7.586565809054258e-08, |
| "loss": 0.5606, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.1007936583122397, |
| "learning_rate": 7.415548585534949e-08, |
| "loss": 0.6222, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.0990617623271497, |
| "learning_rate": 7.246466543807951e-08, |
| "loss": 0.6033, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.235834211479321, |
| "learning_rate": 7.0793203481338e-08, |
| "loss": 0.5658, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.0971634187847843, |
| "learning_rate": 6.914110655168005e-08, |
| "loss": 0.5197, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.2723168272777845, |
| "learning_rate": 6.750838113958381e-08, |
| "loss": 0.5444, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.14031105890706, |
| "learning_rate": 6.589503365941996e-08, |
| "loss": 0.5484, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.9836284579510188, |
| "learning_rate": 6.430107044943512e-08, |
| "loss": 0.5281, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.8255909265594834, |
| "learning_rate": 6.272649777171902e-08, |
| "loss": 0.4866, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.107443031046224, |
| "learning_rate": 6.117132181218454e-08, |
| "loss": 0.5199, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.1682405735610626, |
| "learning_rate": 5.963554868054167e-08, |
| "loss": 0.539, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.24775315981034, |
| "learning_rate": 5.8119184410274085e-08, |
| "loss": 0.5139, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.998700285054948, |
| "learning_rate": 5.662223495861596e-08, |
| "loss": 0.5518, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.006720101430102, |
| "learning_rate": 5.5144706206525235e-08, |
| "loss": 0.5034, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.9240681602880376, |
| "learning_rate": 5.368660395866643e-08, |
| "loss": 0.5566, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.367451906830005, |
| "learning_rate": 5.2247933943382344e-08, |
| "loss": 0.6171, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.9591190023164184, |
| "learning_rate": 5.0828701812674074e-08, |
| "loss": 0.5367, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.0598213561773577, |
| "learning_rate": 4.94289131421799e-08, |
| "loss": 0.5737, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.0711836264717634, |
| "learning_rate": 4.804857343114977e-08, |
| "loss": 0.5522, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.803888225967423, |
| "learning_rate": 4.668768810242752e-08, |
| "loss": 0.441, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.0820741333254507, |
| "learning_rate": 4.534626250242702e-08, |
| "loss": 0.5394, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.2847763509593477, |
| "learning_rate": 4.4024301901113285e-08, |
| "loss": 0.5254, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.9411071290247621, |
| "learning_rate": 4.2721811491978626e-08, |
| "loss": 0.5702, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.745037379504202, |
| "learning_rate": 4.1438796392025416e-08, |
| "loss": 0.4511, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.24676269385492, |
| "learning_rate": 4.017526164174501e-08, |
| "loss": 0.5475, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.8719954199651734, |
| "learning_rate": 3.8931212205096655e-08, |
| "loss": 0.5167, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.1782725251581443, |
| "learning_rate": 3.770665296949028e-08, |
| "loss": 0.583, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.084620351446328, |
| "learning_rate": 3.650158874576537e-08, |
| "loss": 0.5573, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.9942648310254383, |
| "learning_rate": 3.5316024268172713e-08, |
| "loss": 0.5195, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.9276106667007256, |
| "learning_rate": 3.41499641943549e-08, |
| "loss": 0.461, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.0002769940610676, |
| "learning_rate": 3.3003413105331396e-08, |
| "loss": 0.5253, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 2.3233564755810336, |
| "learning_rate": 3.187637550547573e-08, |
| "loss": 0.6343, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.9669404121882792, |
| "learning_rate": 3.076885582250111e-08, |
| "loss": 0.5298, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.160706870886176, |
| "learning_rate": 2.9680858407441503e-08, |
| "loss": 0.5412, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.1010182979674363, |
| "learning_rate": 2.8612387534636687e-08, |
| "loss": 0.5874, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.3185770521155464, |
| "learning_rate": 2.756344740171224e-08, |
| "loss": 0.5676, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.928726797539864, |
| "learning_rate": 2.653404212956512e-08, |
| "loss": 0.5199, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.374638635528354, |
| "learning_rate": 2.552417576234756e-08, |
| "loss": 0.5822, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.1312842535423666, |
| "learning_rate": 2.4533852267450976e-08, |
| "loss": 0.5486, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.887474697390811, |
| "learning_rate": 2.3563075535487646e-08, |
| "loss": 0.5318, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.162989562166336, |
| "learning_rate": 2.2611849380280715e-08, |
| "loss": 0.5646, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.8865197403491825, |
| "learning_rate": 2.1680177538845882e-08, |
| "loss": 0.511, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.0221844144721084, |
| "learning_rate": 2.0768063671375292e-08, |
| "loss": 0.5605, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.7341292361915335, |
| "learning_rate": 1.9875511361227562e-08, |
| "loss": 0.5054, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.1625957875579194, |
| "learning_rate": 1.9002524114909438e-08, |
| "loss": 0.5484, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.890787277810342, |
| "learning_rate": 1.8149105362064157e-08, |
| "loss": 0.4912, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.89646065518405, |
| "learning_rate": 1.731525845545812e-08, |
| "loss": 0.4987, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 2.000138281803427, |
| "learning_rate": 1.6500986670966444e-08, |
| "loss": 0.5334, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.9360765732063108, |
| "learning_rate": 1.5706293207561896e-08, |
| "loss": 0.5272, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.8899446974988472, |
| "learning_rate": 1.4931181187300413e-08, |
| "loss": 0.5351, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 2.083452401449017, |
| "learning_rate": 1.4175653655309484e-08, |
| "loss": 0.5654, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.7902773793269389, |
| "learning_rate": 1.3439713579777025e-08, |
| "loss": 0.4598, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.9347948196954086, |
| "learning_rate": 1.2723363851939175e-08, |
| "loss": 0.5546, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 2.1793801518923117, |
| "learning_rate": 1.2026607286068637e-08, |
| "loss": 0.5184, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 2.1162767261686497, |
| "learning_rate": 1.1349446619463578e-08, |
| "loss": 0.5576, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.9064679554702675, |
| "learning_rate": 1.0691884512437078e-08, |
| "loss": 0.5593, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.9599229582469748, |
| "learning_rate": 1.0053923548307698e-08, |
| "loss": 0.5226, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 2.3703310743026367, |
| "learning_rate": 9.435566233387261e-09, |
| "loss": 0.6998, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.8265190173903945, |
| "learning_rate": 8.836814996971977e-09, |
| "loss": 0.5149, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.919991660951489, |
| "learning_rate": 8.257672191334664e-09, |
| "loss": 0.5058, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 2.226643529735206, |
| "learning_rate": 7.698140091712547e-09, |
| "loss": 0.5828, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.971584660303256, |
| "learning_rate": 7.158220896298917e-09, |
| "loss": 0.5688, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 2.0480628954718223, |
| "learning_rate": 6.637916726237592e-09, |
| "loss": 0.5851, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 2.2325632034699905, |
| "learning_rate": 6.1372296256101414e-09, |
| "loss": 0.5965, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 2.0574293300744557, |
| "learning_rate": 5.6561615614314505e-09, |
| "loss": 0.5216, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 2.0626282148992625, |
| "learning_rate": 5.194714423638059e-09, |
| "loss": 0.5443, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.8130730344455217, |
| "learning_rate": 4.752890025086499e-09, |
| "loss": 0.5109, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 2.1550184229788463, |
| "learning_rate": 4.330690101539969e-09, |
| "loss": 0.6121, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.795454334127577, |
| "learning_rate": 3.928116311666119e-09, |
| "loss": 0.4971, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.8886493347103483, |
| "learning_rate": 3.5451702370281616e-09, |
| "loss": 0.4622, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.7323735035854002, |
| "learning_rate": 3.181853382079325e-09, |
| "loss": 0.4841, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.9823067756781059, |
| "learning_rate": 2.8381671741567475e-09, |
| "loss": 0.5521, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 2.127563659134435, |
| "learning_rate": 2.514112963476478e-09, |
| "loss": 0.5391, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 2.124869138735419, |
| "learning_rate": 2.209692023126819e-09, |
| "loss": 0.6062, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.986396902793689, |
| "learning_rate": 1.9249055490655477e-09, |
| "loss": 0.4799, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.9573989864120591, |
| "learning_rate": 1.6597546601127001e-09, |
| "loss": 0.5542, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.8291458436134207, |
| "learning_rate": 1.4142403979483522e-09, |
| "loss": 0.4648, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 2.0950932153675312, |
| "learning_rate": 1.1883637271065118e-09, |
| "loss": 0.5056, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.8233239812965405, |
| "learning_rate": 9.821255349734548e-10, |
| "loss": 0.5067, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 2.1447666317815934, |
| "learning_rate": 7.955266317821731e-10, |
| "loss": 0.5142, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 2.0265645163369683, |
| "learning_rate": 6.28567750610709e-10, |
| "loss": 0.5296, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.8561507428186768, |
| "learning_rate": 4.812495473788259e-10, |
| "loss": 0.5011, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.2576656272188163, |
| "learning_rate": 3.5357260084523114e-10, |
| "loss": 0.5452, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.224588449835144, |
| "learning_rate": 2.4553741260535667e-10, |
| "loss": 0.56, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.027404547376381, |
| "learning_rate": 1.5714440708913815e-10, |
| "loss": 0.5433, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.9249522577325628, |
| "learning_rate": 8.839393155990472e-11, |
| "loss": 0.5585, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.916155539003378, |
| "learning_rate": 3.9286256113268973e-11, |
| "loss": 0.5188, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.0829325561732537, |
| "learning_rate": 9.821573674906326e-12, |
| "loss": 0.5721, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.060500339504095, |
| "learning_rate": 0.0, |
| "loss": 0.5242, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 1635, |
| "total_flos": 669628105687040.0, |
| "train_loss": 0.6020827626780997, |
| "train_runtime": 53189.986, |
| "train_samples_per_second": 3.936, |
| "train_steps_per_second": 0.031 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1635, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "total_flos": 669628105687040.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|