| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.997262581596125, |
| "eval_steps": 500, |
| "global_step": 1480, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0033691303432301536, |
| "grad_norm": 6.109744437799009, |
| "learning_rate": 5.405405405405406e-07, |
| "loss": 0.8395, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006738260686460307, |
| "grad_norm": 6.213278091341755, |
| "learning_rate": 1.0810810810810812e-06, |
| "loss": 0.8539, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01010739102969046, |
| "grad_norm": 6.2309201627607536, |
| "learning_rate": 1.6216216216216219e-06, |
| "loss": 0.862, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.013476521372920615, |
| "grad_norm": 5.960777622679009, |
| "learning_rate": 2.1621621621621623e-06, |
| "loss": 0.8418, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01684565171615077, |
| "grad_norm": 5.5645531135403825, |
| "learning_rate": 2.702702702702703e-06, |
| "loss": 0.8303, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02021478205938092, |
| "grad_norm": 4.368766112287261, |
| "learning_rate": 3.2432432432432437e-06, |
| "loss": 0.7949, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.023583912402611075, |
| "grad_norm": 2.4282762566778153, |
| "learning_rate": 3.7837837837837844e-06, |
| "loss": 0.7538, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02695304274584123, |
| "grad_norm": 2.286826804105074, |
| "learning_rate": 4.324324324324325e-06, |
| "loss": 0.7537, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.030322173089071383, |
| "grad_norm": 1.7230122995757746, |
| "learning_rate": 4.864864864864866e-06, |
| "loss": 0.7465, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.03369130343230154, |
| "grad_norm": 4.178444915489739, |
| "learning_rate": 5.405405405405406e-06, |
| "loss": 0.7473, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03706043377553169, |
| "grad_norm": 4.418045830813186, |
| "learning_rate": 5.945945945945947e-06, |
| "loss": 0.7374, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.04042956411876184, |
| "grad_norm": 4.465657225144453, |
| "learning_rate": 6.486486486486487e-06, |
| "loss": 0.7306, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.043798694461992, |
| "grad_norm": 4.089235560381299, |
| "learning_rate": 7.027027027027028e-06, |
| "loss": 0.6953, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04716782480522215, |
| "grad_norm": 3.9135602100241798, |
| "learning_rate": 7.567567567567569e-06, |
| "loss": 0.6963, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.05053695514845231, |
| "grad_norm": 2.799692430785198, |
| "learning_rate": 8.108108108108109e-06, |
| "loss": 0.6717, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05390608549168246, |
| "grad_norm": 1.7463496609273474, |
| "learning_rate": 8.64864864864865e-06, |
| "loss": 0.6544, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.057275215834912616, |
| "grad_norm": 1.788941312078948, |
| "learning_rate": 9.189189189189191e-06, |
| "loss": 0.6521, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.060644346178142766, |
| "grad_norm": 2.287622821921788, |
| "learning_rate": 9.729729729729732e-06, |
| "loss": 0.6429, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06401347652137292, |
| "grad_norm": 1.960430434087058, |
| "learning_rate": 1.027027027027027e-05, |
| "loss": 0.6392, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.06738260686460308, |
| "grad_norm": 1.349903658959779, |
| "learning_rate": 1.0810810810810812e-05, |
| "loss": 0.6281, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.07075173720783323, |
| "grad_norm": 1.2019647976807795, |
| "learning_rate": 1.1351351351351352e-05, |
| "loss": 0.6151, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.07412086755106338, |
| "grad_norm": 0.9434380017683085, |
| "learning_rate": 1.1891891891891894e-05, |
| "loss": 0.604, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07748999789429353, |
| "grad_norm": 1.1709993819720563, |
| "learning_rate": 1.2432432432432433e-05, |
| "loss": 0.6027, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.08085912823752368, |
| "grad_norm": 0.7729753637518647, |
| "learning_rate": 1.2972972972972975e-05, |
| "loss": 0.6009, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.08422825858075385, |
| "grad_norm": 0.9253134596999972, |
| "learning_rate": 1.3513513513513515e-05, |
| "loss": 0.5784, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.087597388923984, |
| "grad_norm": 0.7203325434817915, |
| "learning_rate": 1.4054054054054055e-05, |
| "loss": 0.5784, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.09096651926721415, |
| "grad_norm": 0.7738717415397595, |
| "learning_rate": 1.4594594594594596e-05, |
| "loss": 0.5724, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0943356496104443, |
| "grad_norm": 0.756189527437592, |
| "learning_rate": 1.5135135135135138e-05, |
| "loss": 0.5751, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.09770477995367446, |
| "grad_norm": 0.5619223236064955, |
| "learning_rate": 1.5675675675675676e-05, |
| "loss": 0.5632, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.10107391029690461, |
| "grad_norm": 0.6416604397150266, |
| "learning_rate": 1.6216216216216218e-05, |
| "loss": 0.5624, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.10444304064013477, |
| "grad_norm": 0.5213594465327983, |
| "learning_rate": 1.6756756756756757e-05, |
| "loss": 0.5618, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.10781217098336492, |
| "grad_norm": 0.6096320957570693, |
| "learning_rate": 1.72972972972973e-05, |
| "loss": 0.5569, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.11118130132659507, |
| "grad_norm": 0.45054132971017113, |
| "learning_rate": 1.783783783783784e-05, |
| "loss": 0.547, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.11455043166982523, |
| "grad_norm": 0.4599454116974351, |
| "learning_rate": 1.8378378378378383e-05, |
| "loss": 0.5484, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.11791956201305538, |
| "grad_norm": 0.5174247359394964, |
| "learning_rate": 1.891891891891892e-05, |
| "loss": 0.5493, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.12128869235628553, |
| "grad_norm": 0.37366751205930016, |
| "learning_rate": 1.9459459459459463e-05, |
| "loss": 0.5391, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.12465782269951568, |
| "grad_norm": 0.48653413640763127, |
| "learning_rate": 2e-05, |
| "loss": 0.5403, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.12802695304274583, |
| "grad_norm": 0.3651703371460431, |
| "learning_rate": 2.054054054054054e-05, |
| "loss": 0.5432, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.131396083385976, |
| "grad_norm": 0.5149214857154895, |
| "learning_rate": 2.1081081081081082e-05, |
| "loss": 0.5351, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.13476521372920616, |
| "grad_norm": 0.3233162021565579, |
| "learning_rate": 2.1621621621621624e-05, |
| "loss": 0.5358, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1381343440724363, |
| "grad_norm": 0.3994220446903501, |
| "learning_rate": 2.2162162162162163e-05, |
| "loss": 0.5215, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.14150347441566646, |
| "grad_norm": 0.3613182742062023, |
| "learning_rate": 2.2702702702702705e-05, |
| "loss": 0.5306, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1448726047588966, |
| "grad_norm": 0.3444241276930956, |
| "learning_rate": 2.3243243243243243e-05, |
| "loss": 0.5255, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.14824173510212676, |
| "grad_norm": 0.4493831957153716, |
| "learning_rate": 2.378378378378379e-05, |
| "loss": 0.525, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.15161086544535693, |
| "grad_norm": 0.44813168016549754, |
| "learning_rate": 2.4324324324324327e-05, |
| "loss": 0.5182, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.15497999578858707, |
| "grad_norm": 0.6714598609090919, |
| "learning_rate": 2.4864864864864866e-05, |
| "loss": 0.5236, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.15834912613181723, |
| "grad_norm": 0.8782480079092542, |
| "learning_rate": 2.5405405405405404e-05, |
| "loss": 0.5189, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.16171825647504737, |
| "grad_norm": 0.933082396567328, |
| "learning_rate": 2.594594594594595e-05, |
| "loss": 0.5136, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.16508738681827753, |
| "grad_norm": 0.770716538259162, |
| "learning_rate": 2.6486486486486488e-05, |
| "loss": 0.515, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1684565171615077, |
| "grad_norm": 0.5162725525339301, |
| "learning_rate": 2.702702702702703e-05, |
| "loss": 0.5102, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.17182564750473783, |
| "grad_norm": 1.015431854648304, |
| "learning_rate": 2.756756756756757e-05, |
| "loss": 0.5155, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.175194777847968, |
| "grad_norm": 0.9581799901627799, |
| "learning_rate": 2.810810810810811e-05, |
| "loss": 0.5144, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.17856390819119813, |
| "grad_norm": 0.7963519939531664, |
| "learning_rate": 2.8648648648648653e-05, |
| "loss": 0.5097, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1819330385344283, |
| "grad_norm": 1.2154735765132731, |
| "learning_rate": 2.918918918918919e-05, |
| "loss": 0.5, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.18530216887765846, |
| "grad_norm": 0.5396425181695459, |
| "learning_rate": 2.972972972972973e-05, |
| "loss": 0.5028, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1886712992208886, |
| "grad_norm": 1.02978967009914, |
| "learning_rate": 3.0270270270270275e-05, |
| "loss": 0.5081, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.19204042956411876, |
| "grad_norm": 0.8713408712796716, |
| "learning_rate": 3.081081081081082e-05, |
| "loss": 0.5117, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.19540955990734893, |
| "grad_norm": 0.5039355010657219, |
| "learning_rate": 3.135135135135135e-05, |
| "loss": 0.5062, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.19877869025057907, |
| "grad_norm": 0.8903957505195225, |
| "learning_rate": 3.1891891891891894e-05, |
| "loss": 0.5065, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.20214782059380923, |
| "grad_norm": 0.6438414779422286, |
| "learning_rate": 3.2432432432432436e-05, |
| "loss": 0.505, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.20551695093703937, |
| "grad_norm": 0.7196844068315907, |
| "learning_rate": 3.297297297297298e-05, |
| "loss": 0.5027, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.20888608128026953, |
| "grad_norm": 0.768470871542238, |
| "learning_rate": 3.351351351351351e-05, |
| "loss": 0.4952, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2122552116234997, |
| "grad_norm": 1.2385285643157924, |
| "learning_rate": 3.4054054054054055e-05, |
| "loss": 0.4998, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.21562434196672983, |
| "grad_norm": 1.2926026603680456, |
| "learning_rate": 3.45945945945946e-05, |
| "loss": 0.5059, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.21899347230996, |
| "grad_norm": 0.5270463926051342, |
| "learning_rate": 3.513513513513514e-05, |
| "loss": 0.4882, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.22236260265319013, |
| "grad_norm": 0.9232242256936453, |
| "learning_rate": 3.567567567567568e-05, |
| "loss": 0.5001, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2257317329964203, |
| "grad_norm": 1.1802567310747412, |
| "learning_rate": 3.6216216216216216e-05, |
| "loss": 0.4988, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.22910086333965046, |
| "grad_norm": 0.6824243910642153, |
| "learning_rate": 3.6756756756756765e-05, |
| "loss": 0.4968, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.2324699936828806, |
| "grad_norm": 1.1492977773270214, |
| "learning_rate": 3.72972972972973e-05, |
| "loss": 0.492, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.23583912402611076, |
| "grad_norm": 0.6936995427226362, |
| "learning_rate": 3.783783783783784e-05, |
| "loss": 0.484, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2392082543693409, |
| "grad_norm": 0.8248787799708727, |
| "learning_rate": 3.837837837837838e-05, |
| "loss": 0.4875, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.24257738471257106, |
| "grad_norm": 0.9148972625023225, |
| "learning_rate": 3.8918918918918926e-05, |
| "loss": 0.4936, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.24594651505580123, |
| "grad_norm": 0.9090987008379405, |
| "learning_rate": 3.945945945945946e-05, |
| "loss": 0.4915, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.24931564539903137, |
| "grad_norm": 1.3515657782891444, |
| "learning_rate": 4e-05, |
| "loss": 0.4898, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.2526847757422615, |
| "grad_norm": 0.9152494686227338, |
| "learning_rate": 4.0540540540540545e-05, |
| "loss": 0.492, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.25605390608549167, |
| "grad_norm": 1.5681418879764368, |
| "learning_rate": 4.108108108108108e-05, |
| "loss": 0.4927, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.25942303642872183, |
| "grad_norm": 0.7357882817582275, |
| "learning_rate": 4.162162162162163e-05, |
| "loss": 0.4916, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.262792166771952, |
| "grad_norm": 1.8207664011692413, |
| "learning_rate": 4.2162162162162164e-05, |
| "loss": 0.496, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.26616129711518216, |
| "grad_norm": 1.1821288274944997, |
| "learning_rate": 4.2702702702702706e-05, |
| "loss": 0.4853, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2695304274584123, |
| "grad_norm": 1.6328730676456176, |
| "learning_rate": 4.324324324324325e-05, |
| "loss": 0.4942, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.27289955780164243, |
| "grad_norm": 1.5462148913519038, |
| "learning_rate": 4.3783783783783783e-05, |
| "loss": 0.5016, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.2762686881448726, |
| "grad_norm": 1.0513902425500052, |
| "learning_rate": 4.4324324324324325e-05, |
| "loss": 0.4816, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.27963781848810276, |
| "grad_norm": 1.6621005750940228, |
| "learning_rate": 4.4864864864864874e-05, |
| "loss": 0.4999, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.2830069488313329, |
| "grad_norm": 1.0166569610477454, |
| "learning_rate": 4.540540540540541e-05, |
| "loss": 0.4909, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.2863760791745631, |
| "grad_norm": 1.3532816330257298, |
| "learning_rate": 4.594594594594595e-05, |
| "loss": 0.4853, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2897452095177932, |
| "grad_norm": 1.1921249261435982, |
| "learning_rate": 4.6486486486486486e-05, |
| "loss": 0.4907, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.29311433986102337, |
| "grad_norm": 0.7446095855395451, |
| "learning_rate": 4.702702702702703e-05, |
| "loss": 0.4829, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.29648347020425353, |
| "grad_norm": 1.13704598401882, |
| "learning_rate": 4.756756756756758e-05, |
| "loss": 0.4923, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.2998526005474837, |
| "grad_norm": 0.9773983706335432, |
| "learning_rate": 4.810810810810811e-05, |
| "loss": 0.4925, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.30322173089071386, |
| "grad_norm": 1.405804106766561, |
| "learning_rate": 4.8648648648648654e-05, |
| "loss": 0.4988, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.30659086123394397, |
| "grad_norm": 1.2120207198526078, |
| "learning_rate": 4.9189189189189196e-05, |
| "loss": 0.493, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.30995999157717413, |
| "grad_norm": 1.209860911666279, |
| "learning_rate": 4.972972972972973e-05, |
| "loss": 0.4871, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3133291219204043, |
| "grad_norm": 1.1742086251606596, |
| "learning_rate": 5.027027027027027e-05, |
| "loss": 0.4892, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.31669825226363446, |
| "grad_norm": 0.9715216714272247, |
| "learning_rate": 5.081081081081081e-05, |
| "loss": 0.4869, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3200673826068646, |
| "grad_norm": 1.1287315453457836, |
| "learning_rate": 5.135135135135136e-05, |
| "loss": 0.483, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.32343651295009473, |
| "grad_norm": 1.3753216500561123, |
| "learning_rate": 5.18918918918919e-05, |
| "loss": 0.5011, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.3268056432933249, |
| "grad_norm": 1.0595415038900966, |
| "learning_rate": 5.2432432432432434e-05, |
| "loss": 0.4872, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.33017477363655506, |
| "grad_norm": 0.993959867595984, |
| "learning_rate": 5.2972972972972976e-05, |
| "loss": 0.4874, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.33354390397978523, |
| "grad_norm": 0.9824629794475075, |
| "learning_rate": 5.3513513513513525e-05, |
| "loss": 0.4813, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.3369130343230154, |
| "grad_norm": 1.1711893920755474, |
| "learning_rate": 5.405405405405406e-05, |
| "loss": 0.4799, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3402821646662455, |
| "grad_norm": 0.6759536241348655, |
| "learning_rate": 5.45945945945946e-05, |
| "loss": 0.472, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.34365129500947567, |
| "grad_norm": 0.7703133612780192, |
| "learning_rate": 5.513513513513514e-05, |
| "loss": 0.484, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.34702042535270583, |
| "grad_norm": 0.7769913812178919, |
| "learning_rate": 5.567567567567568e-05, |
| "loss": 0.4758, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.350389555695936, |
| "grad_norm": 0.8455984342427874, |
| "learning_rate": 5.621621621621622e-05, |
| "loss": 0.4807, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.35375868603916616, |
| "grad_norm": 1.0743068767474058, |
| "learning_rate": 5.6756756756756757e-05, |
| "loss": 0.4845, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.35712781638239627, |
| "grad_norm": 1.098648574354644, |
| "learning_rate": 5.7297297297297305e-05, |
| "loss": 0.4912, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.36049694672562643, |
| "grad_norm": 1.6110582797161879, |
| "learning_rate": 5.783783783783785e-05, |
| "loss": 0.4975, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3638660770688566, |
| "grad_norm": 0.8739341305512099, |
| "learning_rate": 5.837837837837838e-05, |
| "loss": 0.4785, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.36723520741208676, |
| "grad_norm": 1.267103393588983, |
| "learning_rate": 5.8918918918918924e-05, |
| "loss": 0.4922, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.3706043377553169, |
| "grad_norm": 1.2547562478396024, |
| "learning_rate": 5.945945945945946e-05, |
| "loss": 0.4865, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.37397346809854703, |
| "grad_norm": 1.1629837530431066, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.4815, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3773425984417772, |
| "grad_norm": 0.8450218760805568, |
| "learning_rate": 6.054054054054055e-05, |
| "loss": 0.4795, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.38071172878500736, |
| "grad_norm": 1.116338785518496, |
| "learning_rate": 6.108108108108108e-05, |
| "loss": 0.4806, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.38408085912823753, |
| "grad_norm": 0.8964893535442878, |
| "learning_rate": 6.162162162162163e-05, |
| "loss": 0.4756, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3874499894714677, |
| "grad_norm": 0.5881211331403472, |
| "learning_rate": 6.216216216216216e-05, |
| "loss": 0.4732, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.39081911981469786, |
| "grad_norm": 0.7134703598403237, |
| "learning_rate": 6.27027027027027e-05, |
| "loss": 0.4788, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.39418825015792797, |
| "grad_norm": 0.5897113644451194, |
| "learning_rate": 6.324324324324325e-05, |
| "loss": 0.4728, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.39755738050115813, |
| "grad_norm": 0.6261295369849983, |
| "learning_rate": 6.378378378378379e-05, |
| "loss": 0.4773, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.4009265108443883, |
| "grad_norm": 0.6923893019220413, |
| "learning_rate": 6.432432432432433e-05, |
| "loss": 0.4762, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.40429564118761846, |
| "grad_norm": 0.9773203912887567, |
| "learning_rate": 6.486486486486487e-05, |
| "loss": 0.4835, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4076647715308486, |
| "grad_norm": 1.2845541454753984, |
| "learning_rate": 6.540540540540541e-05, |
| "loss": 0.4742, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.41103390187407873, |
| "grad_norm": 0.7789854415201711, |
| "learning_rate": 6.594594594594596e-05, |
| "loss": 0.4693, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.4144030322173089, |
| "grad_norm": 0.8431373845569413, |
| "learning_rate": 6.648648648648648e-05, |
| "loss": 0.4831, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.41777216256053906, |
| "grad_norm": 0.8809025032679428, |
| "learning_rate": 6.702702702702703e-05, |
| "loss": 0.48, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.4211412929037692, |
| "grad_norm": 0.7787166669908304, |
| "learning_rate": 6.756756756756758e-05, |
| "loss": 0.4861, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4245104232469994, |
| "grad_norm": 0.7332477510232297, |
| "learning_rate": 6.810810810810811e-05, |
| "loss": 0.478, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.4278795535902295, |
| "grad_norm": 1.3066224708152618, |
| "learning_rate": 6.864864864864865e-05, |
| "loss": 0.4813, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.43124868393345966, |
| "grad_norm": 0.9836726211958252, |
| "learning_rate": 6.91891891891892e-05, |
| "loss": 0.4714, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.43461781427668983, |
| "grad_norm": 0.9119614187689974, |
| "learning_rate": 6.972972972972974e-05, |
| "loss": 0.4769, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.43798694461992, |
| "grad_norm": 0.6243806457837586, |
| "learning_rate": 7.027027027027028e-05, |
| "loss": 0.4794, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.44135607496315016, |
| "grad_norm": 0.7687415551391915, |
| "learning_rate": 7.081081081081081e-05, |
| "loss": 0.4732, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.44472520530638027, |
| "grad_norm": 1.0092750754274926, |
| "learning_rate": 7.135135135135136e-05, |
| "loss": 0.4776, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.44809433564961043, |
| "grad_norm": 0.8380704714410115, |
| "learning_rate": 7.18918918918919e-05, |
| "loss": 0.4648, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4514634659928406, |
| "grad_norm": 0.6767690752757246, |
| "learning_rate": 7.243243243243243e-05, |
| "loss": 0.4609, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.45483259633607076, |
| "grad_norm": 0.9804619524504721, |
| "learning_rate": 7.297297297297297e-05, |
| "loss": 0.4713, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4582017266793009, |
| "grad_norm": 1.3776587580151205, |
| "learning_rate": 7.351351351351353e-05, |
| "loss": 0.4794, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.46157085702253103, |
| "grad_norm": 0.5714903502719861, |
| "learning_rate": 7.405405405405406e-05, |
| "loss": 0.4693, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.4649399873657612, |
| "grad_norm": 1.1165381199232975, |
| "learning_rate": 7.45945945945946e-05, |
| "loss": 0.4789, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.46830911770899136, |
| "grad_norm": 1.1391177830520112, |
| "learning_rate": 7.513513513513514e-05, |
| "loss": 0.4763, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.4716782480522215, |
| "grad_norm": 0.8405758310660678, |
| "learning_rate": 7.567567567567568e-05, |
| "loss": 0.4708, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4750473783954517, |
| "grad_norm": 0.673259823629261, |
| "learning_rate": 7.621621621621623e-05, |
| "loss": 0.4709, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.4784165087386818, |
| "grad_norm": 0.9085593163135716, |
| "learning_rate": 7.675675675675675e-05, |
| "loss": 0.4658, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.48178563908191196, |
| "grad_norm": 0.9065492978219933, |
| "learning_rate": 7.729729729729731e-05, |
| "loss": 0.4661, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.48515476942514213, |
| "grad_norm": 0.8751584723634406, |
| "learning_rate": 7.783783783783785e-05, |
| "loss": 0.4725, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4885238997683723, |
| "grad_norm": 0.6907562652250656, |
| "learning_rate": 7.837837837837838e-05, |
| "loss": 0.4684, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.49189303011160246, |
| "grad_norm": 0.5990641326148477, |
| "learning_rate": 7.891891891891892e-05, |
| "loss": 0.4672, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.4952621604548326, |
| "grad_norm": 0.6994191437855128, |
| "learning_rate": 7.945945945945946e-05, |
| "loss": 0.4662, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.49863129079806273, |
| "grad_norm": 0.5573598940486624, |
| "learning_rate": 8e-05, |
| "loss": 0.4665, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.502000421141293, |
| "grad_norm": 0.6145912929008095, |
| "learning_rate": 7.999988874460243e-05, |
| "loss": 0.4669, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.505369551484523, |
| "grad_norm": 0.8011807879753905, |
| "learning_rate": 7.999955497902857e-05, |
| "loss": 0.4669, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5087386818277532, |
| "grad_norm": 0.8616234404683479, |
| "learning_rate": 7.99989987051351e-05, |
| "loss": 0.4721, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.5121078121709833, |
| "grad_norm": 0.7813814403741567, |
| "learning_rate": 7.999821992601645e-05, |
| "loss": 0.4753, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.5154769425142135, |
| "grad_norm": 0.8208221005516424, |
| "learning_rate": 7.999721864600476e-05, |
| "loss": 0.4648, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.5188460728574437, |
| "grad_norm": 0.7471274236204338, |
| "learning_rate": 7.999599487066996e-05, |
| "loss": 0.4665, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.5222152032006738, |
| "grad_norm": 0.6025705010343646, |
| "learning_rate": 7.999454860681961e-05, |
| "loss": 0.4646, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.525584333543904, |
| "grad_norm": 0.6278670733672859, |
| "learning_rate": 7.999287986249894e-05, |
| "loss": 0.4582, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.5289534638871342, |
| "grad_norm": 0.7363381482182718, |
| "learning_rate": 7.999098864699078e-05, |
| "loss": 0.4644, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5323225942303643, |
| "grad_norm": 0.5321478176964342, |
| "learning_rate": 7.998887497081555e-05, |
| "loss": 0.4558, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5356917245735945, |
| "grad_norm": 0.5084748356369074, |
| "learning_rate": 7.998653884573114e-05, |
| "loss": 0.4576, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5390608549168246, |
| "grad_norm": 0.44466968144745794, |
| "learning_rate": 7.998398028473287e-05, |
| "loss": 0.4628, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5424299852600547, |
| "grad_norm": 0.5300560878644925, |
| "learning_rate": 7.998119930205342e-05, |
| "loss": 0.4587, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.5457991156032849, |
| "grad_norm": 0.4482671223105369, |
| "learning_rate": 7.997819591316278e-05, |
| "loss": 0.4595, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.549168245946515, |
| "grad_norm": 0.3831134887002804, |
| "learning_rate": 7.997497013476808e-05, |
| "loss": 0.4621, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5525373762897452, |
| "grad_norm": 0.42236120459010645, |
| "learning_rate": 7.99715219848136e-05, |
| "loss": 0.4574, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5559065066329754, |
| "grad_norm": 0.4457523958461928, |
| "learning_rate": 7.996785148248062e-05, |
| "loss": 0.4597, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5592756369762055, |
| "grad_norm": 0.5006917210647484, |
| "learning_rate": 7.996395864818727e-05, |
| "loss": 0.4594, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5626447673194357, |
| "grad_norm": 0.5999241658726214, |
| "learning_rate": 7.995984350358851e-05, |
| "loss": 0.4578, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5660138976626659, |
| "grad_norm": 0.7291489485043735, |
| "learning_rate": 7.995550607157592e-05, |
| "loss": 0.4538, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.569383028005896, |
| "grad_norm": 0.7577464603442905, |
| "learning_rate": 7.995094637627767e-05, |
| "loss": 0.4507, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.5727521583491262, |
| "grad_norm": 0.5373115669466836, |
| "learning_rate": 7.994616444305826e-05, |
| "loss": 0.4602, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5761212886923562, |
| "grad_norm": 0.5783937366804819, |
| "learning_rate": 7.994116029851852e-05, |
| "loss": 0.4621, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5794904190355864, |
| "grad_norm": 0.7289647138839453, |
| "learning_rate": 7.993593397049533e-05, |
| "loss": 0.4569, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5828595493788166, |
| "grad_norm": 0.7726864760162053, |
| "learning_rate": 7.993048548806155e-05, |
| "loss": 0.4609, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5862286797220467, |
| "grad_norm": 0.7101749816908381, |
| "learning_rate": 7.992481488152585e-05, |
| "loss": 0.4628, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5895978100652769, |
| "grad_norm": 0.7787526674806393, |
| "learning_rate": 7.991892218243251e-05, |
| "loss": 0.4664, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5929669404085071, |
| "grad_norm": 0.9193285112654672, |
| "learning_rate": 7.991280742356124e-05, |
| "loss": 0.4583, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5963360707517372, |
| "grad_norm": 0.863766394540256, |
| "learning_rate": 7.990647063892704e-05, |
| "loss": 0.4532, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5997052010949674, |
| "grad_norm": 0.7969950754484971, |
| "learning_rate": 7.989991186378e-05, |
| "loss": 0.4649, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.6030743314381976, |
| "grad_norm": 0.9175228695778532, |
| "learning_rate": 7.989313113460506e-05, |
| "loss": 0.4598, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.6064434617814277, |
| "grad_norm": 1.189324294096932, |
| "learning_rate": 7.988612848912186e-05, |
| "loss": 0.4616, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6098125921246578, |
| "grad_norm": 0.5502633850375939, |
| "learning_rate": 7.987890396628451e-05, |
| "loss": 0.4506, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.6131817224678879, |
| "grad_norm": 0.6418325300837303, |
| "learning_rate": 7.987145760628138e-05, |
| "loss": 0.4589, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.6165508528111181, |
| "grad_norm": 0.8487957991579048, |
| "learning_rate": 7.986378945053483e-05, |
| "loss": 0.4534, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.6199199831543483, |
| "grad_norm": 0.729090543693198, |
| "learning_rate": 7.985589954170107e-05, |
| "loss": 0.4502, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.6232891134975784, |
| "grad_norm": 0.564140229622775, |
| "learning_rate": 7.984778792366983e-05, |
| "loss": 0.4561, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.6266582438408086, |
| "grad_norm": 0.5489014465662102, |
| "learning_rate": 7.983945464156419e-05, |
| "loss": 0.4511, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.6300273741840388, |
| "grad_norm": 0.4439092473485429, |
| "learning_rate": 7.983089974174026e-05, |
| "loss": 0.4592, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.6333965045272689, |
| "grad_norm": 0.4899343556871492, |
| "learning_rate": 7.982212327178699e-05, |
| "loss": 0.4576, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.6367656348704991, |
| "grad_norm": 0.4429930723656228, |
| "learning_rate": 7.981312528052587e-05, |
| "loss": 0.4527, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6401347652137293, |
| "grad_norm": 0.3517045134537643, |
| "learning_rate": 7.980390581801064e-05, |
| "loss": 0.4533, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6435038955569593, |
| "grad_norm": 0.35596532078238685, |
| "learning_rate": 7.979446493552708e-05, |
| "loss": 0.4512, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.6468730259001895, |
| "grad_norm": 0.4117813215790183, |
| "learning_rate": 7.97848026855926e-05, |
| "loss": 0.4427, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.6502421562434196, |
| "grad_norm": 0.42570694906406503, |
| "learning_rate": 7.977491912195611e-05, |
| "loss": 0.4559, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6536112865866498, |
| "grad_norm": 0.32926038316817535, |
| "learning_rate": 7.976481429959758e-05, |
| "loss": 0.4525, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.65698041692988, |
| "grad_norm": 0.3352588049162969, |
| "learning_rate": 7.975448827472782e-05, |
| "loss": 0.4465, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6603495472731101, |
| "grad_norm": 0.3121745237951815, |
| "learning_rate": 7.974394110478813e-05, |
| "loss": 0.4504, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.6637186776163403, |
| "grad_norm": 0.3514443346936628, |
| "learning_rate": 7.973317284844998e-05, |
| "loss": 0.4543, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6670878079595705, |
| "grad_norm": 0.36563500765518064, |
| "learning_rate": 7.972218356561471e-05, |
| "loss": 0.4466, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6704569383028006, |
| "grad_norm": 0.36993328537084813, |
| "learning_rate": 7.971097331741318e-05, |
| "loss": 0.447, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6738260686460308, |
| "grad_norm": 0.4218574088374599, |
| "learning_rate": 7.96995421662054e-05, |
| "loss": 0.4456, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.677195198989261, |
| "grad_norm": 0.5127127798248658, |
| "learning_rate": 7.968789017558026e-05, |
| "loss": 0.4367, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.680564329332491, |
| "grad_norm": 0.5533862982628416, |
| "learning_rate": 7.967601741035507e-05, |
| "loss": 0.4464, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6839334596757212, |
| "grad_norm": 0.5128169904646379, |
| "learning_rate": 7.966392393657533e-05, |
| "loss": 0.4493, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6873025900189513, |
| "grad_norm": 0.47256773564418525, |
| "learning_rate": 7.965160982151422e-05, |
| "loss": 0.4536, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.6906717203621815, |
| "grad_norm": 0.452879409095403, |
| "learning_rate": 7.963907513367234e-05, |
| "loss": 0.4589, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6940408507054117, |
| "grad_norm": 0.455219584683228, |
| "learning_rate": 7.962631994277728e-05, |
| "loss": 0.4414, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6974099810486418, |
| "grad_norm": 0.47863589957769587, |
| "learning_rate": 7.961334431978321e-05, |
| "loss": 0.4486, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.700779111391872, |
| "grad_norm": 0.5110385780704738, |
| "learning_rate": 7.960014833687055e-05, |
| "loss": 0.4495, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.7041482417351022, |
| "grad_norm": 0.4683257451933529, |
| "learning_rate": 7.958673206744553e-05, |
| "loss": 0.4522, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.7075173720783323, |
| "grad_norm": 0.4506553993940309, |
| "learning_rate": 7.957309558613974e-05, |
| "loss": 0.4452, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7108865024215625, |
| "grad_norm": 0.4526028368594711, |
| "learning_rate": 7.955923896880982e-05, |
| "loss": 0.4456, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.7142556327647925, |
| "grad_norm": 0.5212859488073646, |
| "learning_rate": 7.954516229253691e-05, |
| "loss": 0.4482, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.7176247631080227, |
| "grad_norm": 0.4908480827080424, |
| "learning_rate": 7.953086563562635e-05, |
| "loss": 0.4404, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.7209938934512529, |
| "grad_norm": 0.43474906852801837, |
| "learning_rate": 7.951634907760713e-05, |
| "loss": 0.4415, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.724363023794483, |
| "grad_norm": 0.5465543422325746, |
| "learning_rate": 7.950161269923153e-05, |
| "loss": 0.453, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.7277321541377132, |
| "grad_norm": 0.5191090578880476, |
| "learning_rate": 7.948665658247463e-05, |
| "loss": 0.4511, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.7311012844809434, |
| "grad_norm": 0.41056922017028197, |
| "learning_rate": 7.947148081053388e-05, |
| "loss": 0.4428, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.7344704148241735, |
| "grad_norm": 0.4280367756173325, |
| "learning_rate": 7.945608546782858e-05, |
| "loss": 0.4552, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.7378395451674037, |
| "grad_norm": 0.44143498781875934, |
| "learning_rate": 7.944047063999952e-05, |
| "loss": 0.4461, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.7412086755106339, |
| "grad_norm": 0.4671020826488003, |
| "learning_rate": 7.942463641390834e-05, |
| "loss": 0.433, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.744577805853864, |
| "grad_norm": 0.4802991806753108, |
| "learning_rate": 7.940858287763724e-05, |
| "loss": 0.4487, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.7479469361970941, |
| "grad_norm": 0.4271821601132076, |
| "learning_rate": 7.939231012048833e-05, |
| "loss": 0.4509, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.7513160665403242, |
| "grad_norm": 0.38123610223687315, |
| "learning_rate": 7.93758182329832e-05, |
| "loss": 0.4372, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.7546851968835544, |
| "grad_norm": 0.465830487650423, |
| "learning_rate": 7.935910730686246e-05, |
| "loss": 0.4444, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.7580543272267846, |
| "grad_norm": 0.5651393352119582, |
| "learning_rate": 7.934217743508513e-05, |
| "loss": 0.4468, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7614234575700147, |
| "grad_norm": 0.6526912705793722, |
| "learning_rate": 7.932502871182818e-05, |
| "loss": 0.4509, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.7647925879132449, |
| "grad_norm": 0.7684525411435036, |
| "learning_rate": 7.930766123248602e-05, |
| "loss": 0.4475, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.7681617182564751, |
| "grad_norm": 0.8868257582573387, |
| "learning_rate": 7.929007509366994e-05, |
| "loss": 0.4486, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7715308485997052, |
| "grad_norm": 0.9592751619745519, |
| "learning_rate": 7.927227039320758e-05, |
| "loss": 0.442, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.7748999789429354, |
| "grad_norm": 0.8928159966805775, |
| "learning_rate": 7.925424723014239e-05, |
| "loss": 0.4541, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7782691092861656, |
| "grad_norm": 0.7880900131568054, |
| "learning_rate": 7.923600570473308e-05, |
| "loss": 0.4514, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.7816382396293957, |
| "grad_norm": 0.4783123604515285, |
| "learning_rate": 7.921754591845307e-05, |
| "loss": 0.4442, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.7850073699726258, |
| "grad_norm": 0.4520386015737669, |
| "learning_rate": 7.91988679739899e-05, |
| "loss": 0.448, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7883765003158559, |
| "grad_norm": 0.6605527609379506, |
| "learning_rate": 7.917997197524467e-05, |
| "loss": 0.4435, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.7917456306590861, |
| "grad_norm": 0.7089385732745206, |
| "learning_rate": 7.916085802733147e-05, |
| "loss": 0.4449, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7951147610023163, |
| "grad_norm": 0.5904512970852802, |
| "learning_rate": 7.914152623657678e-05, |
| "loss": 0.448, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7984838913455464, |
| "grad_norm": 0.5165195483185807, |
| "learning_rate": 7.912197671051894e-05, |
| "loss": 0.4475, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.8018530216887766, |
| "grad_norm": 0.47278629514591364, |
| "learning_rate": 7.910220955790746e-05, |
| "loss": 0.447, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.8052221520320068, |
| "grad_norm": 0.4466680465677497, |
| "learning_rate": 7.908222488870243e-05, |
| "loss": 0.4471, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.8085912823752369, |
| "grad_norm": 0.40052321749076436, |
| "learning_rate": 7.906202281407398e-05, |
| "loss": 0.4453, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8119604127184671, |
| "grad_norm": 0.3808574042244712, |
| "learning_rate": 7.90416034464016e-05, |
| "loss": 0.4467, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.8153295430616972, |
| "grad_norm": 0.3009379630644614, |
| "learning_rate": 7.902096689927355e-05, |
| "loss": 0.4405, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.8186986734049273, |
| "grad_norm": 0.4006333439696202, |
| "learning_rate": 7.900011328748619e-05, |
| "loss": 0.441, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.8220678037481575, |
| "grad_norm": 0.36250537572683333, |
| "learning_rate": 7.897904272704333e-05, |
| "loss": 0.4382, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.8254369340913876, |
| "grad_norm": 0.37232144501481734, |
| "learning_rate": 7.895775533515569e-05, |
| "loss": 0.4455, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8288060644346178, |
| "grad_norm": 0.4169869556836039, |
| "learning_rate": 7.893625123024011e-05, |
| "loss": 0.4356, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.832175194777848, |
| "grad_norm": 0.3864353557408192, |
| "learning_rate": 7.891453053191898e-05, |
| "loss": 0.4435, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.8355443251210781, |
| "grad_norm": 0.3608352846793135, |
| "learning_rate": 7.889259336101957e-05, |
| "loss": 0.4462, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.8389134554643083, |
| "grad_norm": 0.32373631118958723, |
| "learning_rate": 7.887043983957327e-05, |
| "loss": 0.4375, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.8422825858075385, |
| "grad_norm": 0.26424914090383317, |
| "learning_rate": 7.884807009081506e-05, |
| "loss": 0.4375, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8456517161507686, |
| "grad_norm": 0.22444081020907958, |
| "learning_rate": 7.882548423918268e-05, |
| "loss": 0.4413, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.8490208464939988, |
| "grad_norm": 0.26045857383329957, |
| "learning_rate": 7.880268241031604e-05, |
| "loss": 0.4317, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.8523899768372288, |
| "grad_norm": 0.30550339254012787, |
| "learning_rate": 7.877966473105645e-05, |
| "loss": 0.4458, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.855759107180459, |
| "grad_norm": 0.34559528308231324, |
| "learning_rate": 7.875643132944599e-05, |
| "loss": 0.4403, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.8591282375236892, |
| "grad_norm": 0.35710994685108394, |
| "learning_rate": 7.873298233472671e-05, |
| "loss": 0.4394, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8624973678669193, |
| "grad_norm": 0.42956681122910056, |
| "learning_rate": 7.870931787733996e-05, |
| "loss": 0.4403, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.8658664982101495, |
| "grad_norm": 0.5626197718228877, |
| "learning_rate": 7.868543808892569e-05, |
| "loss": 0.4387, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.8692356285533797, |
| "grad_norm": 0.6076789146858117, |
| "learning_rate": 7.866134310232167e-05, |
| "loss": 0.4439, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8726047588966098, |
| "grad_norm": 0.5742280027785791, |
| "learning_rate": 7.863703305156273e-05, |
| "loss": 0.4455, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.87597388923984, |
| "grad_norm": 0.5069317059933754, |
| "learning_rate": 7.861250807188014e-05, |
| "loss": 0.4476, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8793430195830702, |
| "grad_norm": 0.4288223928021788, |
| "learning_rate": 7.858776829970069e-05, |
| "loss": 0.4379, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.8827121499263003, |
| "grad_norm": 0.5442592728854474, |
| "learning_rate": 7.856281387264603e-05, |
| "loss": 0.4379, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8860812802695305, |
| "grad_norm": 0.5638482346313414, |
| "learning_rate": 7.853764492953192e-05, |
| "loss": 0.4444, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.8894504106127605, |
| "grad_norm": 0.4523819114426828, |
| "learning_rate": 7.851226161036739e-05, |
| "loss": 0.4394, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.8928195409559907, |
| "grad_norm": 0.5349306408767115, |
| "learning_rate": 7.848666405635398e-05, |
| "loss": 0.441, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8961886712992209, |
| "grad_norm": 0.5452142089194884, |
| "learning_rate": 7.846085240988503e-05, |
| "loss": 0.4483, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.899557801642451, |
| "grad_norm": 0.4222443920522887, |
| "learning_rate": 7.843482681454476e-05, |
| "loss": 0.4407, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.9029269319856812, |
| "grad_norm": 0.5310106072977896, |
| "learning_rate": 7.840858741510758e-05, |
| "loss": 0.4442, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.9062960623289114, |
| "grad_norm": 0.5876077411696179, |
| "learning_rate": 7.838213435753724e-05, |
| "loss": 0.4438, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.9096651926721415, |
| "grad_norm": 0.6100738415200538, |
| "learning_rate": 7.835546778898599e-05, |
| "loss": 0.4465, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9130343230153717, |
| "grad_norm": 0.6760561504138676, |
| "learning_rate": 7.832858785779383e-05, |
| "loss": 0.4338, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.9164034533586018, |
| "grad_norm": 0.45392830007094576, |
| "learning_rate": 7.830149471348763e-05, |
| "loss": 0.431, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.919772583701832, |
| "grad_norm": 0.30596440551036547, |
| "learning_rate": 7.827418850678034e-05, |
| "loss": 0.4396, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.9231417140450621, |
| "grad_norm": 0.4969999175377505, |
| "learning_rate": 7.824666938957004e-05, |
| "loss": 0.4375, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.9265108443882922, |
| "grad_norm": 0.5437640388773309, |
| "learning_rate": 7.82189375149393e-05, |
| "loss": 0.444, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.9298799747315224, |
| "grad_norm": 0.4134501055661062, |
| "learning_rate": 7.819099303715414e-05, |
| "loss": 0.4385, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.9332491050747526, |
| "grad_norm": 0.3810051790575615, |
| "learning_rate": 7.816283611166328e-05, |
| "loss": 0.4339, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.9366182354179827, |
| "grad_norm": 0.4135193612689647, |
| "learning_rate": 7.813446689509718e-05, |
| "loss": 0.4413, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.9399873657612129, |
| "grad_norm": 0.5154216890519913, |
| "learning_rate": 7.810588554526728e-05, |
| "loss": 0.4409, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.943356496104443, |
| "grad_norm": 0.5335234306967277, |
| "learning_rate": 7.807709222116506e-05, |
| "loss": 0.4392, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9467256264476732, |
| "grad_norm": 0.4582890089443176, |
| "learning_rate": 7.804808708296116e-05, |
| "loss": 0.44, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.9500947567909034, |
| "grad_norm": 0.41636142631229706, |
| "learning_rate": 7.801887029200448e-05, |
| "loss": 0.4359, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.9534638871341335, |
| "grad_norm": 0.3777680522962764, |
| "learning_rate": 7.798944201082128e-05, |
| "loss": 0.4305, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.9568330174773636, |
| "grad_norm": 0.31197040692277506, |
| "learning_rate": 7.795980240311436e-05, |
| "loss": 0.4378, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.9602021478205938, |
| "grad_norm": 0.2615719658181643, |
| "learning_rate": 7.7929951633762e-05, |
| "loss": 0.4349, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.9635712781638239, |
| "grad_norm": 0.27255928093352183, |
| "learning_rate": 7.789988986881719e-05, |
| "loss": 0.4324, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.9669404085070541, |
| "grad_norm": 0.3086259327892651, |
| "learning_rate": 7.78696172755066e-05, |
| "loss": 0.4338, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.9703095388502843, |
| "grad_norm": 0.3128738492807504, |
| "learning_rate": 7.78391340222297e-05, |
| "loss": 0.4327, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.9736786691935144, |
| "grad_norm": 0.28991557468061835, |
| "learning_rate": 7.78084402785578e-05, |
| "loss": 0.4368, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.9770477995367446, |
| "grad_norm": 0.3462635013389902, |
| "learning_rate": 7.777753621523316e-05, |
| "loss": 0.4376, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9804169298799748, |
| "grad_norm": 0.41703460759212563, |
| "learning_rate": 7.774642200416795e-05, |
| "loss": 0.4364, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.9837860602232049, |
| "grad_norm": 0.5058437435233563, |
| "learning_rate": 7.771509781844338e-05, |
| "loss": 0.4392, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.9871551905664351, |
| "grad_norm": 0.49478795685868665, |
| "learning_rate": 7.768356383230868e-05, |
| "loss": 0.4387, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.9905243209096652, |
| "grad_norm": 0.4745986454402833, |
| "learning_rate": 7.765182022118014e-05, |
| "loss": 0.435, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.9938934512528953, |
| "grad_norm": 0.4611674206006931, |
| "learning_rate": 7.761986716164019e-05, |
| "loss": 0.4379, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9972625815961255, |
| "grad_norm": 0.42674160555276347, |
| "learning_rate": 7.758770483143634e-05, |
| "loss": 0.4408, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.0033691303432302, |
| "grad_norm": 0.4680506210026581, |
| "learning_rate": 7.755533340948024e-05, |
| "loss": 0.4223, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.0067382606864603, |
| "grad_norm": 0.5238195474514908, |
| "learning_rate": 7.752275307584664e-05, |
| "loss": 0.4295, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.0101073910296905, |
| "grad_norm": 0.5889650401759404, |
| "learning_rate": 7.748996401177244e-05, |
| "loss": 0.4275, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.0134765213729207, |
| "grad_norm": 0.5507636965946558, |
| "learning_rate": 7.745696639965569e-05, |
| "loss": 0.4194, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0168456517161508, |
| "grad_norm": 0.6157938140990948, |
| "learning_rate": 7.742376042305449e-05, |
| "loss": 0.433, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.020214782059381, |
| "grad_norm": 0.6252887191974348, |
| "learning_rate": 7.739034626668605e-05, |
| "loss": 0.4262, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.0235839124026112, |
| "grad_norm": 0.5880567265579987, |
| "learning_rate": 7.735672411642562e-05, |
| "loss": 0.4233, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.0269530427458413, |
| "grad_norm": 0.5726576390809455, |
| "learning_rate": 7.732289415930549e-05, |
| "loss": 0.424, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.0303221730890715, |
| "grad_norm": 0.47054865900441445, |
| "learning_rate": 7.728885658351395e-05, |
| "loss": 0.4176, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.0336913034323016, |
| "grad_norm": 0.4115743947585953, |
| "learning_rate": 7.725461157839417e-05, |
| "loss": 0.4292, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.0370604337755316, |
| "grad_norm": 0.4210186740776416, |
| "learning_rate": 7.722015933444325e-05, |
| "loss": 0.4247, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.0404295641187618, |
| "grad_norm": 0.36198066116445515, |
| "learning_rate": 7.71855000433111e-05, |
| "loss": 0.4193, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.043798694461992, |
| "grad_norm": 0.43778422486602975, |
| "learning_rate": 7.715063389779936e-05, |
| "loss": 0.4238, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.047167824805222, |
| "grad_norm": 0.45920696669429717, |
| "learning_rate": 7.711556109186039e-05, |
| "loss": 0.4237, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.0505369551484522, |
| "grad_norm": 0.3341781881526272, |
| "learning_rate": 7.708028182059612e-05, |
| "loss": 0.4239, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.0539060854916824, |
| "grad_norm": 0.3082296798332506, |
| "learning_rate": 7.704479628025704e-05, |
| "loss": 0.4167, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.0572752158349126, |
| "grad_norm": 0.33284929605340835, |
| "learning_rate": 7.700910466824104e-05, |
| "loss": 0.4233, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.0606443461781427, |
| "grad_norm": 0.333894193250551, |
| "learning_rate": 7.697320718309235e-05, |
| "loss": 0.4177, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.064013476521373, |
| "grad_norm": 0.39528163268670363, |
| "learning_rate": 7.69371040245004e-05, |
| "loss": 0.4188, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.067382606864603, |
| "grad_norm": 0.28394188370498197, |
| "learning_rate": 7.690079539329875e-05, |
| "loss": 0.4129, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.0707517372078332, |
| "grad_norm": 0.2953142618107928, |
| "learning_rate": 7.686428149146398e-05, |
| "loss": 0.4188, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.0741208675510634, |
| "grad_norm": 0.2804966323905774, |
| "learning_rate": 7.682756252211453e-05, |
| "loss": 0.4171, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.0774899978942936, |
| "grad_norm": 0.2510744302434169, |
| "learning_rate": 7.679063868950955e-05, |
| "loss": 0.4182, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.0808591282375237, |
| "grad_norm": 0.307553691452299, |
| "learning_rate": 7.675351019904785e-05, |
| "loss": 0.4177, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.084228258580754, |
| "grad_norm": 0.30605445723544605, |
| "learning_rate": 7.671617725726666e-05, |
| "loss": 0.4158, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.087597388923984, |
| "grad_norm": 0.3011711568174157, |
| "learning_rate": 7.667864007184054e-05, |
| "loss": 0.4141, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.0909665192672142, |
| "grad_norm": 0.286345264353555, |
| "learning_rate": 7.664089885158023e-05, |
| "loss": 0.4187, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.0943356496104444, |
| "grad_norm": 0.3592297464995333, |
| "learning_rate": 7.660295380643144e-05, |
| "loss": 0.4175, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.0977047799536745, |
| "grad_norm": 0.4650620300291997, |
| "learning_rate": 7.656480514747374e-05, |
| "loss": 0.4258, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.1010739102969047, |
| "grad_norm": 0.6004744804102117, |
| "learning_rate": 7.652645308691933e-05, |
| "loss": 0.419, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.1044430406401347, |
| "grad_norm": 0.6574099091252659, |
| "learning_rate": 7.648789783811191e-05, |
| "loss": 0.4217, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.1078121709833648, |
| "grad_norm": 0.6456572845078092, |
| "learning_rate": 7.644913961552544e-05, |
| "loss": 0.4207, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.111181301326595, |
| "grad_norm": 0.5305128144292774, |
| "learning_rate": 7.641017863476298e-05, |
| "loss": 0.4215, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.1145504316698251, |
| "grad_norm": 0.3067892240749629, |
| "learning_rate": 7.637101511255554e-05, |
| "loss": 0.4127, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.1179195620130553, |
| "grad_norm": 0.3619393126280428, |
| "learning_rate": 7.633164926676076e-05, |
| "loss": 0.4144, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.1212886923562855, |
| "grad_norm": 0.5099026443948101, |
| "learning_rate": 7.629208131636179e-05, |
| "loss": 0.4247, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.1246578226995156, |
| "grad_norm": 0.4600109550186491, |
| "learning_rate": 7.625231148146601e-05, |
| "loss": 0.4277, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.1280269530427458, |
| "grad_norm": 0.34171771897702874, |
| "learning_rate": 7.621233998330387e-05, |
| "loss": 0.4111, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.131396083385976, |
| "grad_norm": 0.3788094599826585, |
| "learning_rate": 7.617216704422763e-05, |
| "loss": 0.4238, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.1347652137292061, |
| "grad_norm": 0.38930545757784435, |
| "learning_rate": 7.61317928877101e-05, |
| "loss": 0.4266, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.1381343440724363, |
| "grad_norm": 0.3433858843038104, |
| "learning_rate": 7.609121773834341e-05, |
| "loss": 0.4113, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.1415034744156665, |
| "grad_norm": 0.3664698026597823, |
| "learning_rate": 7.605044182183779e-05, |
| "loss": 0.4215, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.1448726047588966, |
| "grad_norm": 0.3669871707797489, |
| "learning_rate": 7.600946536502028e-05, |
| "loss": 0.4187, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.1482417351021268, |
| "grad_norm": 0.36360389026471707, |
| "learning_rate": 7.596828859583347e-05, |
| "loss": 0.4179, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.151610865445357, |
| "grad_norm": 0.34303435626525425, |
| "learning_rate": 7.592691174333426e-05, |
| "loss": 0.4166, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.1549799957885871, |
| "grad_norm": 0.37256557828106257, |
| "learning_rate": 7.588533503769257e-05, |
| "loss": 0.4181, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.1583491261318173, |
| "grad_norm": 0.41467336010702505, |
| "learning_rate": 7.584355871019002e-05, |
| "loss": 0.4195, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.1617182564750475, |
| "grad_norm": 0.37682711741626357, |
| "learning_rate": 7.580158299321872e-05, |
| "loss": 0.4226, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.1650873868182776, |
| "grad_norm": 0.2646890963052802, |
| "learning_rate": 7.575940812027993e-05, |
| "loss": 0.4094, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.1684565171615078, |
| "grad_norm": 0.23766486308489482, |
| "learning_rate": 7.571703432598275e-05, |
| "loss": 0.42, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.171825647504738, |
| "grad_norm": 0.23844909696838593, |
| "learning_rate": 7.567446184604285e-05, |
| "loss": 0.4189, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.175194777847968, |
| "grad_norm": 0.23287909504956197, |
| "learning_rate": 7.563169091728115e-05, |
| "loss": 0.4123, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.178563908191198, |
| "grad_norm": 0.21692865660818864, |
| "learning_rate": 7.558872177762246e-05, |
| "loss": 0.4193, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.1819330385344282, |
| "grad_norm": 0.2191653011493883, |
| "learning_rate": 7.554555466609425e-05, |
| "loss": 0.4271, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.1853021688776584, |
| "grad_norm": 0.23748843252543808, |
| "learning_rate": 7.550218982282518e-05, |
| "loss": 0.4196, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.1886712992208885, |
| "grad_norm": 0.24616974908904704, |
| "learning_rate": 7.545862748904394e-05, |
| "loss": 0.4146, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.1920404295641187, |
| "grad_norm": 0.25196821392623664, |
| "learning_rate": 7.541486790707776e-05, |
| "loss": 0.4266, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.1954095599073489, |
| "grad_norm": 0.2470207075626988, |
| "learning_rate": 7.537091132035111e-05, |
| "loss": 0.4148, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.198778690250579, |
| "grad_norm": 0.2314470630644042, |
| "learning_rate": 7.532675797338438e-05, |
| "loss": 0.4033, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.2021478205938092, |
| "grad_norm": 0.23746828407475515, |
| "learning_rate": 7.528240811179245e-05, |
| "loss": 0.4203, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.2055169509370394, |
| "grad_norm": 0.28754749236703137, |
| "learning_rate": 7.523786198228344e-05, |
| "loss": 0.4182, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.2088860812802695, |
| "grad_norm": 0.3151739472415091, |
| "learning_rate": 7.519311983265718e-05, |
| "loss": 0.4222, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.2122552116234997, |
| "grad_norm": 0.34818120293706006, |
| "learning_rate": 7.514818191180397e-05, |
| "loss": 0.4162, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.2156243419667299, |
| "grad_norm": 0.39359185740609565, |
| "learning_rate": 7.510304846970311e-05, |
| "loss": 0.4179, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.21899347230996, |
| "grad_norm": 0.49169697889587877, |
| "learning_rate": 7.505771975742157e-05, |
| "loss": 0.42, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.2223626026531902, |
| "grad_norm": 0.6588501716182329, |
| "learning_rate": 7.501219602711253e-05, |
| "loss": 0.4207, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.2257317329964204, |
| "grad_norm": 0.6704211038154936, |
| "learning_rate": 7.496647753201403e-05, |
| "loss": 0.419, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.2291008633396505, |
| "grad_norm": 0.5759654898267196, |
| "learning_rate": 7.492056452644753e-05, |
| "loss": 0.418, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.2324699936828807, |
| "grad_norm": 0.46697984648682656, |
| "learning_rate": 7.487445726581654e-05, |
| "loss": 0.4202, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.2358391240261108, |
| "grad_norm": 0.4072897949316555, |
| "learning_rate": 7.48281560066051e-05, |
| "loss": 0.416, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.2392082543693408, |
| "grad_norm": 0.3677992679979327, |
| "learning_rate": 7.47816610063765e-05, |
| "loss": 0.4184, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.242577384712571, |
| "grad_norm": 0.39067043330522583, |
| "learning_rate": 7.473497252377171e-05, |
| "loss": 0.4246, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.2459465150558011, |
| "grad_norm": 0.4371263217453357, |
| "learning_rate": 7.468809081850802e-05, |
| "loss": 0.4154, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.2493156453990313, |
| "grad_norm": 0.466667275005644, |
| "learning_rate": 7.464101615137756e-05, |
| "loss": 0.4221, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.2526847757422614, |
| "grad_norm": 0.40517656554168363, |
| "learning_rate": 7.459374878424585e-05, |
| "loss": 0.4149, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.2560539060854916, |
| "grad_norm": 0.318301658777578, |
| "learning_rate": 7.454628898005043e-05, |
| "loss": 0.4117, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.2594230364287218, |
| "grad_norm": 0.27016373739597804, |
| "learning_rate": 7.449863700279923e-05, |
| "loss": 0.4151, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.262792166771952, |
| "grad_norm": 0.27155592369639425, |
| "learning_rate": 7.445079311756924e-05, |
| "loss": 0.4121, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.266161297115182, |
| "grad_norm": 0.3009069725735746, |
| "learning_rate": 7.440275759050499e-05, |
| "loss": 0.4209, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.2695304274584123, |
| "grad_norm": 0.2981596620618354, |
| "learning_rate": 7.435453068881706e-05, |
| "loss": 0.4127, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.2728995578016424, |
| "grad_norm": 0.323823588891271, |
| "learning_rate": 7.430611268078059e-05, |
| "loss": 0.4097, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.2762686881448726, |
| "grad_norm": 0.3872478492699807, |
| "learning_rate": 7.425750383573384e-05, |
| "loss": 0.4142, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.2796378184881028, |
| "grad_norm": 0.38340080550258643, |
| "learning_rate": 7.420870442407662e-05, |
| "loss": 0.4158, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.283006948831333, |
| "grad_norm": 0.33826799912854405, |
| "learning_rate": 7.415971471726884e-05, |
| "loss": 0.4181, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.286376079174563, |
| "grad_norm": 0.3412122527192401, |
| "learning_rate": 7.411053498782893e-05, |
| "loss": 0.4115, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.2897452095177933, |
| "grad_norm": 0.339753253979875, |
| "learning_rate": 7.406116550933246e-05, |
| "loss": 0.414, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.2931143398610234, |
| "grad_norm": 0.27036940059101494, |
| "learning_rate": 7.401160655641044e-05, |
| "loss": 0.4134, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.2964834702042536, |
| "grad_norm": 0.26771109575539487, |
| "learning_rate": 7.396185840474792e-05, |
| "loss": 0.4145, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.2998526005474837, |
| "grad_norm": 0.26853385874933655, |
| "learning_rate": 7.391192133108243e-05, |
| "loss": 0.4196, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.303221730890714, |
| "grad_norm": 0.25978626330709237, |
| "learning_rate": 7.386179561320243e-05, |
| "loss": 0.4179, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.306590861233944, |
| "grad_norm": 0.27982894045702544, |
| "learning_rate": 7.381148152994573e-05, |
| "loss": 0.4134, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.3099599915771742, |
| "grad_norm": 0.2323947422520883, |
| "learning_rate": 7.376097936119803e-05, |
| "loss": 0.4125, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.3133291219204044, |
| "grad_norm": 0.30635653504253, |
| "learning_rate": 7.371028938789122e-05, |
| "loss": 0.4169, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.3166982522636346, |
| "grad_norm": 0.30800676867326193, |
| "learning_rate": 7.365941189200201e-05, |
| "loss": 0.4124, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.3200673826068647, |
| "grad_norm": 0.26291762070095065, |
| "learning_rate": 7.360834715655019e-05, |
| "loss": 0.4163, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.3234365129500947, |
| "grad_norm": 0.28430697703638136, |
| "learning_rate": 7.35570954655971e-05, |
| "loss": 0.4126, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.3268056432933248, |
| "grad_norm": 0.26422175633500083, |
| "learning_rate": 7.350565710424414e-05, |
| "loss": 0.4089, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.330174773636555, |
| "grad_norm": 0.2426759795255368, |
| "learning_rate": 7.345403235863105e-05, |
| "loss": 0.4164, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.3335439039797852, |
| "grad_norm": 0.25948928094236345, |
| "learning_rate": 7.340222151593443e-05, |
| "loss": 0.4184, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.3369130343230153, |
| "grad_norm": 0.3377516247942669, |
| "learning_rate": 7.335022486436608e-05, |
| "loss": 0.4169, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.3402821646662455, |
| "grad_norm": 0.39062210665305114, |
| "learning_rate": 7.329804269317137e-05, |
| "loss": 0.4212, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.3436512950094757, |
| "grad_norm": 0.43725817266447464, |
| "learning_rate": 7.324567529262775e-05, |
| "loss": 0.4162, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.3470204253527058, |
| "grad_norm": 0.3940951458992484, |
| "learning_rate": 7.319312295404301e-05, |
| "loss": 0.4109, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.350389555695936, |
| "grad_norm": 0.30866362073984877, |
| "learning_rate": 7.31403859697537e-05, |
| "loss": 0.4138, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.3537586860391662, |
| "grad_norm": 0.26875958761791713, |
| "learning_rate": 7.308746463312353e-05, |
| "loss": 0.417, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.3571278163823963, |
| "grad_norm": 0.3115107888080639, |
| "learning_rate": 7.303435923854172e-05, |
| "loss": 0.4122, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.3604969467256265, |
| "grad_norm": 0.36714492695394935, |
| "learning_rate": 7.298107008142139e-05, |
| "loss": 0.4159, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.3638660770688567, |
| "grad_norm": 0.3981685397894353, |
| "learning_rate": 7.292759745819781e-05, |
| "loss": 0.4133, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.3672352074120868, |
| "grad_norm": 0.3069454557345131, |
| "learning_rate": 7.287394166632691e-05, |
| "loss": 0.4208, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.370604337755317, |
| "grad_norm": 0.24748441489038914, |
| "learning_rate": 7.282010300428351e-05, |
| "loss": 0.4104, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.373973468098547, |
| "grad_norm": 0.2118791200536055, |
| "learning_rate": 7.276608177155968e-05, |
| "loss": 0.4124, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.377342598441777, |
| "grad_norm": 0.24963361520147168, |
| "learning_rate": 7.271187826866312e-05, |
| "loss": 0.4149, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.3807117287850073, |
| "grad_norm": 0.31609061336459937, |
| "learning_rate": 7.265749279711543e-05, |
| "loss": 0.4266, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.3840808591282374, |
| "grad_norm": 0.35611885888992273, |
| "learning_rate": 7.260292565945049e-05, |
| "loss": 0.4144, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.3874499894714676, |
| "grad_norm": 0.36813941496856034, |
| "learning_rate": 7.254817715921273e-05, |
| "loss": 0.4148, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.3908191198146977, |
| "grad_norm": 0.3386310771485794, |
| "learning_rate": 7.249324760095544e-05, |
| "loss": 0.4157, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.394188250157928, |
| "grad_norm": 0.3286044748961816, |
| "learning_rate": 7.243813729023913e-05, |
| "loss": 0.418, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.397557380501158, |
| "grad_norm": 0.36106557337509826, |
| "learning_rate": 7.238284653362977e-05, |
| "loss": 0.4127, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.4009265108443882, |
| "grad_norm": 0.3713183412724868, |
| "learning_rate": 7.232737563869711e-05, |
| "loss": 0.4223, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.4042956411876184, |
| "grad_norm": 0.4108948812505769, |
| "learning_rate": 7.227172491401299e-05, |
| "loss": 0.4159, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.4076647715308486, |
| "grad_norm": 0.42746635756913826, |
| "learning_rate": 7.221589466914955e-05, |
| "loss": 0.4183, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.4110339018740787, |
| "grad_norm": 0.4281754377696307, |
| "learning_rate": 7.215988521467763e-05, |
| "loss": 0.4143, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.414403032217309, |
| "grad_norm": 0.34581190375042925, |
| "learning_rate": 7.210369686216492e-05, |
| "loss": 0.4232, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.417772162560539, |
| "grad_norm": 0.24817011068233216, |
| "learning_rate": 7.204732992417431e-05, |
| "loss": 0.4203, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.4211412929037692, |
| "grad_norm": 0.2703015486109723, |
| "learning_rate": 7.199078471426208e-05, |
| "loss": 0.4188, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.4245104232469994, |
| "grad_norm": 0.3376907597722382, |
| "learning_rate": 7.193406154697625e-05, |
| "loss": 0.4123, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.4278795535902296, |
| "grad_norm": 0.35688284736368614, |
| "learning_rate": 7.187716073785471e-05, |
| "loss": 0.4073, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.4312486839334597, |
| "grad_norm": 0.29210262958830335, |
| "learning_rate": 7.18200826034236e-05, |
| "loss": 0.4155, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.4346178142766899, |
| "grad_norm": 0.20624868853539452, |
| "learning_rate": 7.176282746119544e-05, |
| "loss": 0.4082, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.43798694461992, |
| "grad_norm": 0.21431087254932987, |
| "learning_rate": 7.17053956296674e-05, |
| "loss": 0.4072, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.4413560749631502, |
| "grad_norm": 0.25982900003092185, |
| "learning_rate": 7.164778742831954e-05, |
| "loss": 0.4113, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.4447252053063804, |
| "grad_norm": 0.3503298873194117, |
| "learning_rate": 7.159000317761305e-05, |
| "loss": 0.4128, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.4480943356496105, |
| "grad_norm": 0.4693051629184559, |
| "learning_rate": 7.153204319898839e-05, |
| "loss": 0.4138, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.4514634659928407, |
| "grad_norm": 0.502991287048126, |
| "learning_rate": 7.14739078148636e-05, |
| "loss": 0.4157, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.4548325963360709, |
| "grad_norm": 0.5001041791172387, |
| "learning_rate": 7.141559734863245e-05, |
| "loss": 0.4082, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.458201726679301, |
| "grad_norm": 0.4696810029288007, |
| "learning_rate": 7.135711212466264e-05, |
| "loss": 0.4198, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.461570857022531, |
| "grad_norm": 0.43034902073433023, |
| "learning_rate": 7.1298452468294e-05, |
| "loss": 0.4165, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.4649399873657611, |
| "grad_norm": 0.4022839654121198, |
| "learning_rate": 7.123961870583671e-05, |
| "loss": 0.4096, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.4683091177089913, |
| "grad_norm": 0.3107712308577315, |
| "learning_rate": 7.118061116456944e-05, |
| "loss": 0.4137, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.4716782480522215, |
| "grad_norm": 0.3140180702883453, |
| "learning_rate": 7.112143017273759e-05, |
| "loss": 0.4108, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.4750473783954516, |
| "grad_norm": 0.40495663409539695, |
| "learning_rate": 7.106207605955136e-05, |
| "loss": 0.4166, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.4784165087386818, |
| "grad_norm": 0.4652370041483942, |
| "learning_rate": 7.100254915518408e-05, |
| "loss": 0.414, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.481785639081912, |
| "grad_norm": 0.41391982007664, |
| "learning_rate": 7.094284979077015e-05, |
| "loss": 0.4131, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.4851547694251421, |
| "grad_norm": 0.34516805959620245, |
| "learning_rate": 7.088297829840346e-05, |
| "loss": 0.4129, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.4885238997683723, |
| "grad_norm": 0.32652038382328485, |
| "learning_rate": 7.08229350111353e-05, |
| "loss": 0.413, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.4918930301116025, |
| "grad_norm": 0.22506092814882847, |
| "learning_rate": 7.076272026297268e-05, |
| "loss": 0.4127, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.4952621604548326, |
| "grad_norm": 0.2282536847065667, |
| "learning_rate": 7.070233438887639e-05, |
| "loss": 0.4071, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.4986312907980628, |
| "grad_norm": 0.2446847320184482, |
| "learning_rate": 7.064177772475912e-05, |
| "loss": 0.4138, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.502000421141293, |
| "grad_norm": 0.25152698752852437, |
| "learning_rate": 7.05810506074837e-05, |
| "loss": 0.4141, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.505369551484523, |
| "grad_norm": 0.2548217617366647, |
| "learning_rate": 7.052015337486109e-05, |
| "loss": 0.4098, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.508738681827753, |
| "grad_norm": 0.2731777853595498, |
| "learning_rate": 7.045908636564858e-05, |
| "loss": 0.4118, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.5121078121709832, |
| "grad_norm": 0.3121024086238583, |
| "learning_rate": 7.03978499195479e-05, |
| "loss": 0.4111, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.5154769425142134, |
| "grad_norm": 0.28013154989340816, |
| "learning_rate": 7.03364443772033e-05, |
| "loss": 0.4123, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.5188460728574436, |
| "grad_norm": 0.20045789950968235, |
| "learning_rate": 7.027487008019969e-05, |
| "loss": 0.41, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.5222152032006737, |
| "grad_norm": 0.1935253416836786, |
| "learning_rate": 7.021312737106068e-05, |
| "loss": 0.4184, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.5255843335439039, |
| "grad_norm": 0.2182563856327327, |
| "learning_rate": 7.015121659324678e-05, |
| "loss": 0.4121, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.528953463887134, |
| "grad_norm": 0.20129933934815375, |
| "learning_rate": 7.00891380911534e-05, |
| "loss": 0.4136, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.5323225942303642, |
| "grad_norm": 0.19011310030838788, |
| "learning_rate": 7.002689221010897e-05, |
| "loss": 0.4113, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.5356917245735944, |
| "grad_norm": 0.19585723303180483, |
| "learning_rate": 6.9964479296373e-05, |
| "loss": 0.4139, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.5390608549168245, |
| "grad_norm": 0.1740680287737997, |
| "learning_rate": 6.990189969713416e-05, |
| "loss": 0.4141, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.5424299852600547, |
| "grad_norm": 0.2068670733390012, |
| "learning_rate": 6.983915376050833e-05, |
| "loss": 0.4093, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.5457991156032849, |
| "grad_norm": 0.2583283837253456, |
| "learning_rate": 6.977624183553676e-05, |
| "loss": 0.4192, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.549168245946515, |
| "grad_norm": 0.28194252885557924, |
| "learning_rate": 6.971316427218399e-05, |
| "loss": 0.412, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.5525373762897452, |
| "grad_norm": 0.27071463569696164, |
| "learning_rate": 6.964992142133602e-05, |
| "loss": 0.4207, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.5559065066329754, |
| "grad_norm": 0.27470579632282327, |
| "learning_rate": 6.958651363479822e-05, |
| "loss": 0.4165, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.5592756369762055, |
| "grad_norm": 0.2703402600040993, |
| "learning_rate": 6.952294126529356e-05, |
| "loss": 0.4134, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.5626447673194357, |
| "grad_norm": 0.26465479604538705, |
| "learning_rate": 6.94592046664605e-05, |
| "loss": 0.4136, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.5660138976626659, |
| "grad_norm": 0.31132857636043815, |
| "learning_rate": 6.939530419285104e-05, |
| "loss": 0.4163, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.569383028005896, |
| "grad_norm": 0.4012221142274, |
| "learning_rate": 6.933124019992884e-05, |
| "loss": 0.4138, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.5727521583491262, |
| "grad_norm": 0.5021621002447393, |
| "learning_rate": 6.926701304406713e-05, |
| "loss": 0.4105, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.5761212886923563, |
| "grad_norm": 0.5905418251776803, |
| "learning_rate": 6.920262308254683e-05, |
| "loss": 0.4147, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.5794904190355865, |
| "grad_norm": 0.6182317762023337, |
| "learning_rate": 6.913807067355445e-05, |
| "loss": 0.4128, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.5828595493788167, |
| "grad_norm": 0.4945917435433832, |
| "learning_rate": 6.907335617618018e-05, |
| "loss": 0.4167, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.5862286797220468, |
| "grad_norm": 0.3166116083838581, |
| "learning_rate": 6.90084799504159e-05, |
| "loss": 0.4136, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.589597810065277, |
| "grad_norm": 0.2848441164225104, |
| "learning_rate": 6.894344235715311e-05, |
| "loss": 0.4127, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.5929669404085072, |
| "grad_norm": 0.35210847111444277, |
| "learning_rate": 6.887824375818099e-05, |
| "loss": 0.4125, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.5963360707517373, |
| "grad_norm": 0.36122192833869504, |
| "learning_rate": 6.881288451618431e-05, |
| "loss": 0.4175, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.5997052010949675, |
| "grad_norm": 0.30874010342588315, |
| "learning_rate": 6.874736499474154e-05, |
| "loss": 0.4123, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.6030743314381977, |
| "grad_norm": 0.2415425383601781, |
| "learning_rate": 6.868168555832266e-05, |
| "loss": 0.409, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.6064434617814278, |
| "grad_norm": 0.2777593930598247, |
| "learning_rate": 6.861584657228728e-05, |
| "loss": 0.4109, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.6098125921246578, |
| "grad_norm": 0.2552160489277856, |
| "learning_rate": 6.854984840288253e-05, |
| "loss": 0.4063, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.613181722467888, |
| "grad_norm": 0.21292379117303817, |
| "learning_rate": 6.848369141724104e-05, |
| "loss": 0.4113, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.616550852811118, |
| "grad_norm": 0.25826725556041485, |
| "learning_rate": 6.841737598337886e-05, |
| "loss": 0.4162, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.6199199831543483, |
| "grad_norm": 0.24587379643844692, |
| "learning_rate": 6.835090247019354e-05, |
| "loss": 0.4098, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.6232891134975784, |
| "grad_norm": 0.22506059025604672, |
| "learning_rate": 6.828427124746191e-05, |
| "loss": 0.4177, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.6266582438408086, |
| "grad_norm": 0.2625291980432951, |
| "learning_rate": 6.821748268583813e-05, |
| "loss": 0.4138, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.6300273741840388, |
| "grad_norm": 0.2899682108073399, |
| "learning_rate": 6.815053715685161e-05, |
| "loss": 0.4112, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.633396504527269, |
| "grad_norm": 0.24684733944107418, |
| "learning_rate": 6.808343503290491e-05, |
| "loss": 0.4084, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.636765634870499, |
| "grad_norm": 0.22856568944562095, |
| "learning_rate": 6.80161766872717e-05, |
| "loss": 0.4099, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.6401347652137293, |
| "grad_norm": 0.2528553309235842, |
| "learning_rate": 6.79487624940947e-05, |
| "loss": 0.4074, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.6435038955569592, |
| "grad_norm": 0.24954291821287325, |
| "learning_rate": 6.788119282838355e-05, |
| "loss": 0.4156, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.6468730259001894, |
| "grad_norm": 0.2486958212588815, |
| "learning_rate": 6.781346806601273e-05, |
| "loss": 0.4148, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.6502421562434195, |
| "grad_norm": 0.20838834765340428, |
| "learning_rate": 6.774558858371952e-05, |
| "loss": 0.4107, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.6536112865866497, |
| "grad_norm": 0.157993940020379, |
| "learning_rate": 6.767755475910185e-05, |
| "loss": 0.4112, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.6569804169298799, |
| "grad_norm": 0.24383891745288697, |
| "learning_rate": 6.760936697061626e-05, |
| "loss": 0.4117, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.66034954727311, |
| "grad_norm": 0.28630859094765176, |
| "learning_rate": 6.754102559757569e-05, |
| "loss": 0.4108, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.6637186776163402, |
| "grad_norm": 0.2744705368738465, |
| "learning_rate": 6.74725310201475e-05, |
| "loss": 0.4068, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.6670878079595703, |
| "grad_norm": 0.2832510381791776, |
| "learning_rate": 6.740388361935125e-05, |
| "loss": 0.4072, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.6704569383028005, |
| "grad_norm": 0.2988249231230451, |
| "learning_rate": 6.733508377705661e-05, |
| "loss": 0.4077, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.6738260686460307, |
| "grad_norm": 0.24557523045791532, |
| "learning_rate": 6.726613187598132e-05, |
| "loss": 0.416, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.6771951989892608, |
| "grad_norm": 0.21450213834423756, |
| "learning_rate": 6.71970282996889e-05, |
| "loss": 0.4099, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.680564329332491, |
| "grad_norm": 0.2564463597465919, |
| "learning_rate": 6.712777343258666e-05, |
| "loss": 0.4113, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.6839334596757212, |
| "grad_norm": 0.28973958295073354, |
| "learning_rate": 6.705836765992348e-05, |
| "loss": 0.4173, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.6873025900189513, |
| "grad_norm": 0.3093418967185147, |
| "learning_rate": 6.698881136778771e-05, |
| "loss": 0.4173, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.6906717203621815, |
| "grad_norm": 0.30710292961925306, |
| "learning_rate": 6.691910494310499e-05, |
| "loss": 0.4202, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.6940408507054117, |
| "grad_norm": 0.298386372490933, |
| "learning_rate": 6.684924877363613e-05, |
| "loss": 0.4063, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.6974099810486418, |
| "grad_norm": 0.31358421654801716, |
| "learning_rate": 6.67792432479749e-05, |
| "loss": 0.4117, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.700779111391872, |
| "grad_norm": 0.34684913918298366, |
| "learning_rate": 6.670908875554594e-05, |
| "loss": 0.4103, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.7041482417351022, |
| "grad_norm": 0.3071849696400485, |
| "learning_rate": 6.663878568660258e-05, |
| "loss": 0.4064, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.7075173720783323, |
| "grad_norm": 0.25934260311596186, |
| "learning_rate": 6.656833443222458e-05, |
| "loss": 0.4026, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.7108865024215625, |
| "grad_norm": 0.254331135385578, |
| "learning_rate": 6.649773538431605e-05, |
| "loss": 0.4123, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.7142556327647926, |
| "grad_norm": 0.2696672284837906, |
| "learning_rate": 6.642698893560327e-05, |
| "loss": 0.4135, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.7176247631080228, |
| "grad_norm": 0.3170338993835499, |
| "learning_rate": 6.635609547963243e-05, |
| "loss": 0.4078, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.720993893451253, |
| "grad_norm": 0.34598694657993484, |
| "learning_rate": 6.628505541076755e-05, |
| "loss": 0.4143, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.7243630237944831, |
| "grad_norm": 0.3659302514618013, |
| "learning_rate": 6.621386912418816e-05, |
| "loss": 0.413, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.7277321541377133, |
| "grad_norm": 0.3036155922766547, |
| "learning_rate": 6.614253701588718e-05, |
| "loss": 0.413, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.7311012844809435, |
| "grad_norm": 0.26442302840915777, |
| "learning_rate": 6.607105948266872e-05, |
| "loss": 0.4141, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.7344704148241736, |
| "grad_norm": 0.2820703196464, |
| "learning_rate": 6.599943692214587e-05, |
| "loss": 0.4154, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.7378395451674038, |
| "grad_norm": 0.2716579783783052, |
| "learning_rate": 6.592766973273843e-05, |
| "loss": 0.418, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.741208675510634, |
| "grad_norm": 0.2320214556767005, |
| "learning_rate": 6.585575831367078e-05, |
| "loss": 0.4136, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.7445778058538641, |
| "grad_norm": 0.20790915888905742, |
| "learning_rate": 6.578370306496957e-05, |
| "loss": 0.4126, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.747946936197094, |
| "grad_norm": 0.2165582926633229, |
| "learning_rate": 6.571150438746157e-05, |
| "loss": 0.4112, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.7513160665403242, |
| "grad_norm": 0.24261057128754013, |
| "learning_rate": 6.563916268277144e-05, |
| "loss": 0.413, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.7546851968835544, |
| "grad_norm": 0.2755800264624728, |
| "learning_rate": 6.55666783533194e-05, |
| "loss": 0.4166, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.7580543272267846, |
| "grad_norm": 0.28813858434017786, |
| "learning_rate": 6.549405180231911e-05, |
| "loss": 0.404, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.7614234575700147, |
| "grad_norm": 0.24090919880210407, |
| "learning_rate": 6.542128343377536e-05, |
| "loss": 0.4075, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.764792587913245, |
| "grad_norm": 0.21389800108034238, |
| "learning_rate": 6.534837365248185e-05, |
| "loss": 0.4124, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.768161718256475, |
| "grad_norm": 0.2562042134322129, |
| "learning_rate": 6.527532286401889e-05, |
| "loss": 0.4174, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.7715308485997052, |
| "grad_norm": 0.2571401145743441, |
| "learning_rate": 6.520213147475123e-05, |
| "loss": 0.4144, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.7748999789429354, |
| "grad_norm": 0.2423820773625362, |
| "learning_rate": 6.51287998918257e-05, |
| "loss": 0.4046, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.7782691092861656, |
| "grad_norm": 0.2310131148631897, |
| "learning_rate": 6.505532852316904e-05, |
| "loss": 0.407, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.7816382396293957, |
| "grad_norm": 0.2467085051059651, |
| "learning_rate": 6.498171777748557e-05, |
| "loss": 0.4134, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.7850073699726257, |
| "grad_norm": 0.2429312927228722, |
| "learning_rate": 6.49079680642549e-05, |
| "loss": 0.4136, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.7883765003158558, |
| "grad_norm": 0.18962286619000535, |
| "learning_rate": 6.483407979372975e-05, |
| "loss": 0.4094, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.791745630659086, |
| "grad_norm": 0.17276030637120937, |
| "learning_rate": 6.476005337693355e-05, |
| "loss": 0.4127, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.7951147610023162, |
| "grad_norm": 0.1991873488324741, |
| "learning_rate": 6.468588922565822e-05, |
| "loss": 0.407, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.7984838913455463, |
| "grad_norm": 0.23230143768755912, |
| "learning_rate": 6.461158775246186e-05, |
| "loss": 0.4069, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.8018530216887765, |
| "grad_norm": 0.25362081452848795, |
| "learning_rate": 6.453714937066648e-05, |
| "loss": 0.4089, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.8052221520320066, |
| "grad_norm": 0.20024317986028692, |
| "learning_rate": 6.446257449435566e-05, |
| "loss": 0.4062, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.8085912823752368, |
| "grad_norm": 0.16636181558776822, |
| "learning_rate": 6.438786353837228e-05, |
| "loss": 0.4061, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.811960412718467, |
| "grad_norm": 0.20687002125002474, |
| "learning_rate": 6.43130169183162e-05, |
| "loss": 0.4131, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.8153295430616971, |
| "grad_norm": 0.2568138645034864, |
| "learning_rate": 6.423803505054193e-05, |
| "loss": 0.411, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.8186986734049273, |
| "grad_norm": 0.3369872578212292, |
| "learning_rate": 6.416291835215636e-05, |
| "loss": 0.4077, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.8220678037481575, |
| "grad_norm": 0.41379320932213953, |
| "learning_rate": 6.408766724101638e-05, |
| "loss": 0.4077, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.8254369340913876, |
| "grad_norm": 0.43767998472550695, |
| "learning_rate": 6.401228213572663e-05, |
| "loss": 0.4151, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.8288060644346178, |
| "grad_norm": 0.4536984763596022, |
| "learning_rate": 6.393676345563708e-05, |
| "loss": 0.42, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.832175194777848, |
| "grad_norm": 0.4692529959956868, |
| "learning_rate": 6.386111162084078e-05, |
| "loss": 0.4002, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.8355443251210781, |
| "grad_norm": 0.34237321055490366, |
| "learning_rate": 6.378532705217148e-05, |
| "loss": 0.406, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.8389134554643083, |
| "grad_norm": 0.2659729255014706, |
| "learning_rate": 6.370941017120127e-05, |
| "loss": 0.4135, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.8422825858075385, |
| "grad_norm": 0.32797296963486666, |
| "learning_rate": 6.363336140023833e-05, |
| "loss": 0.4088, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.8456517161507686, |
| "grad_norm": 0.35579650932418716, |
| "learning_rate": 6.355718116232444e-05, |
| "loss": 0.4093, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.8490208464939988, |
| "grad_norm": 0.2907411351475013, |
| "learning_rate": 6.348086988123274e-05, |
| "loss": 0.4116, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.852389976837229, |
| "grad_norm": 0.2732388318681213, |
| "learning_rate": 6.340442798146535e-05, |
| "loss": 0.4091, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.855759107180459, |
| "grad_norm": 0.35761144655913124, |
| "learning_rate": 6.332785588825094e-05, |
| "loss": 0.4037, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.8591282375236893, |
| "grad_norm": 0.3014328362434633, |
| "learning_rate": 6.325115402754245e-05, |
| "loss": 0.4072, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.8624973678669194, |
| "grad_norm": 0.2340334979203501, |
| "learning_rate": 6.317432282601469e-05, |
| "loss": 0.403, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.8658664982101496, |
| "grad_norm": 0.33855256005840595, |
| "learning_rate": 6.309736271106193e-05, |
| "loss": 0.4106, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.8692356285533798, |
| "grad_norm": 0.31482993852294594, |
| "learning_rate": 6.302027411079562e-05, |
| "loss": 0.4079, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.87260475889661, |
| "grad_norm": 0.21683415129270545, |
| "learning_rate": 6.294305745404185e-05, |
| "loss": 0.4032, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.87597388923984, |
| "grad_norm": 0.209469978649313, |
| "learning_rate": 6.286571317033915e-05, |
| "loss": 0.4088, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.8793430195830703, |
| "grad_norm": 0.2816343476274617, |
| "learning_rate": 6.278824168993596e-05, |
| "loss": 0.4126, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.8827121499263004, |
| "grad_norm": 0.32252631746288557, |
| "learning_rate": 6.271064344378832e-05, |
| "loss": 0.4086, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.8860812802695306, |
| "grad_norm": 0.2900131891387989, |
| "learning_rate": 6.263291886355738e-05, |
| "loss": 0.4086, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.8894504106127605, |
| "grad_norm": 0.26445922268042416, |
| "learning_rate": 6.255506838160711e-05, |
| "loss": 0.4093, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.8928195409559907, |
| "grad_norm": 0.2561028521945913, |
| "learning_rate": 6.247709243100185e-05, |
| "loss": 0.4136, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.8961886712992209, |
| "grad_norm": 0.23899571940882475, |
| "learning_rate": 6.239899144550383e-05, |
| "loss": 0.4058, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.899557801642451, |
| "grad_norm": 0.2338421290415243, |
| "learning_rate": 6.232076585957087e-05, |
| "loss": 0.4074, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.9029269319856812, |
| "grad_norm": 0.18752299712254275, |
| "learning_rate": 6.224241610835391e-05, |
| "loss": 0.4096, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.9062960623289114, |
| "grad_norm": 0.19324708447438393, |
| "learning_rate": 6.216394262769459e-05, |
| "loss": 0.4096, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.9096651926721415, |
| "grad_norm": 0.21276012461948887, |
| "learning_rate": 6.208534585412282e-05, |
| "loss": 0.4033, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.9130343230153717, |
| "grad_norm": 0.18970083289771164, |
| "learning_rate": 6.200662622485435e-05, |
| "loss": 0.4054, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.9164034533586018, |
| "grad_norm": 0.1696360552220803, |
| "learning_rate": 6.19277841777884e-05, |
| "loss": 0.4069, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.919772583701832, |
| "grad_norm": 0.19478504599245822, |
| "learning_rate": 6.18488201515051e-05, |
| "loss": 0.4054, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.923141714045062, |
| "grad_norm": 0.16721019486842992, |
| "learning_rate": 6.176973458526317e-05, |
| "loss": 0.4142, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.9265108443882921, |
| "grad_norm": 0.18059816629328238, |
| "learning_rate": 6.169052791899742e-05, |
| "loss": 0.4047, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.9298799747315223, |
| "grad_norm": 0.2125539453111369, |
| "learning_rate": 6.161120059331628e-05, |
| "loss": 0.4074, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.9332491050747524, |
| "grad_norm": 0.19087275687720429, |
| "learning_rate": 6.153175304949946e-05, |
| "loss": 0.411, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.9366182354179826, |
| "grad_norm": 0.18049162279809125, |
| "learning_rate": 6.14521857294953e-05, |
| "loss": 0.4055, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.9399873657612128, |
| "grad_norm": 0.17375875826436044, |
| "learning_rate": 6.137249907591855e-05, |
| "loss": 0.4065, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.943356496104443, |
| "grad_norm": 0.1739704448036202, |
| "learning_rate": 6.129269353204769e-05, |
| "loss": 0.4055, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.946725626447673, |
| "grad_norm": 0.18538527661707113, |
| "learning_rate": 6.121276954182261e-05, |
| "loss": 0.4097, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.9500947567909033, |
| "grad_norm": 0.15156397322647622, |
| "learning_rate": 6.113272754984206e-05, |
| "loss": 0.4061, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.9534638871341334, |
| "grad_norm": 0.18018187705246097, |
| "learning_rate": 6.105256800136125e-05, |
| "loss": 0.4086, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.9568330174773636, |
| "grad_norm": 0.1842284584819115, |
| "learning_rate": 6.0972291342289274e-05, |
| "loss": 0.413, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.9602021478205938, |
| "grad_norm": 0.20065268901018266, |
| "learning_rate": 6.0891898019186726e-05, |
| "loss": 0.4068, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.963571278163824, |
| "grad_norm": 0.20725303582942523, |
| "learning_rate": 6.081138847926317e-05, |
| "loss": 0.4102, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.966940408507054, |
| "grad_norm": 0.19644421357341532, |
| "learning_rate": 6.0730763170374636e-05, |
| "loss": 0.4053, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.9703095388502843, |
| "grad_norm": 0.20950085034614344, |
| "learning_rate": 6.065002254102116e-05, |
| "loss": 0.4043, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.9736786691935144, |
| "grad_norm": 0.22898989423400687, |
| "learning_rate": 6.056916704034429e-05, |
| "loss": 0.4038, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.9770477995367446, |
| "grad_norm": 0.2379556008347109, |
| "learning_rate": 6.048819711812457e-05, |
| "loss": 0.4075, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.9804169298799748, |
| "grad_norm": 0.23608922426333814, |
| "learning_rate": 6.040711322477906e-05, |
| "loss": 0.4074, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.983786060223205, |
| "grad_norm": 0.2036587578092891, |
| "learning_rate": 6.032591581135878e-05, |
| "loss": 0.4116, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.987155190566435, |
| "grad_norm": 0.1851902404809834, |
| "learning_rate": 6.024460532954626e-05, |
| "loss": 0.4015, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.9905243209096652, |
| "grad_norm": 0.18802588423448818, |
| "learning_rate": 6.0163182231652985e-05, |
| "loss": 0.4054, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.9938934512528954, |
| "grad_norm": 0.22345260630855865, |
| "learning_rate": 6.008164697061695e-05, |
| "loss": 0.4055, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.9972625815961256, |
| "grad_norm": 0.23969549917986255, |
| "learning_rate": 6.000000000000001e-05, |
| "loss": 0.4015, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.00336913034323, |
| "grad_norm": 0.2867299003150961, |
| "learning_rate": 5.991824177398549e-05, |
| "loss": 0.3913, |
| "step": 593 |
| }, |
| { |
| "epoch": 2.0067382606864603, |
| "grad_norm": 0.34375862252314415, |
| "learning_rate": 5.983637274737558e-05, |
| "loss": 0.391, |
| "step": 594 |
| }, |
| { |
| "epoch": 2.0101073910296905, |
| "grad_norm": 0.3635152444198319, |
| "learning_rate": 5.975439337558886e-05, |
| "loss": 0.3799, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.0134765213729207, |
| "grad_norm": 0.3422619581016819, |
| "learning_rate": 5.967230411465768e-05, |
| "loss": 0.388, |
| "step": 596 |
| }, |
| { |
| "epoch": 2.016845651716151, |
| "grad_norm": 0.32857568135445225, |
| "learning_rate": 5.9590105421225715e-05, |
| "loss": 0.3873, |
| "step": 597 |
| }, |
| { |
| "epoch": 2.020214782059381, |
| "grad_norm": 0.34465546224144156, |
| "learning_rate": 5.950779775254539e-05, |
| "loss": 0.3864, |
| "step": 598 |
| }, |
| { |
| "epoch": 2.023583912402611, |
| "grad_norm": 0.3318091541966093, |
| "learning_rate": 5.9425381566475316e-05, |
| "loss": 0.3901, |
| "step": 599 |
| }, |
| { |
| "epoch": 2.0269530427458413, |
| "grad_norm": 0.3211852458337534, |
| "learning_rate": 5.934285732147778e-05, |
| "loss": 0.3865, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.0303221730890715, |
| "grad_norm": 0.28372803606540153, |
| "learning_rate": 5.9260225476616157e-05, |
| "loss": 0.3809, |
| "step": 601 |
| }, |
| { |
| "epoch": 2.0336913034323016, |
| "grad_norm": 0.26378333051858827, |
| "learning_rate": 5.91774864915524e-05, |
| "loss": 0.3825, |
| "step": 602 |
| }, |
| { |
| "epoch": 2.037060433775532, |
| "grad_norm": 0.2699942011391507, |
| "learning_rate": 5.909464082654442e-05, |
| "loss": 0.3814, |
| "step": 603 |
| }, |
| { |
| "epoch": 2.040429564118762, |
| "grad_norm": 0.32423565538212784, |
| "learning_rate": 5.90116889424436e-05, |
| "loss": 0.3949, |
| "step": 604 |
| }, |
| { |
| "epoch": 2.043798694461992, |
| "grad_norm": 0.3504800062724603, |
| "learning_rate": 5.8928631300692185e-05, |
| "loss": 0.3919, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.0471678248052223, |
| "grad_norm": 0.28670213447600656, |
| "learning_rate": 5.884546836332072e-05, |
| "loss": 0.3848, |
| "step": 606 |
| }, |
| { |
| "epoch": 2.0505369551484525, |
| "grad_norm": 0.24765267252916567, |
| "learning_rate": 5.8762200592945484e-05, |
| "loss": 0.3862, |
| "step": 607 |
| }, |
| { |
| "epoch": 2.0539060854916826, |
| "grad_norm": 0.25397158563496697, |
| "learning_rate": 5.867882845276593e-05, |
| "loss": 0.384, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.057275215834913, |
| "grad_norm": 0.19777815923412465, |
| "learning_rate": 5.859535240656208e-05, |
| "loss": 0.385, |
| "step": 609 |
| }, |
| { |
| "epoch": 2.060644346178143, |
| "grad_norm": 0.25257499668230105, |
| "learning_rate": 5.851177291869197e-05, |
| "loss": 0.3902, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.064013476521373, |
| "grad_norm": 0.23438152088089984, |
| "learning_rate": 5.842809045408905e-05, |
| "loss": 0.3828, |
| "step": 611 |
| }, |
| { |
| "epoch": 2.0673826068646033, |
| "grad_norm": 0.24579596547862945, |
| "learning_rate": 5.834430547825964e-05, |
| "loss": 0.3895, |
| "step": 612 |
| }, |
| { |
| "epoch": 2.070751737207833, |
| "grad_norm": 0.254567202187919, |
| "learning_rate": 5.826041845728026e-05, |
| "loss": 0.3884, |
| "step": 613 |
| }, |
| { |
| "epoch": 2.074120867551063, |
| "grad_norm": 0.26694805867978466, |
| "learning_rate": 5.8176429857795104e-05, |
| "loss": 0.3884, |
| "step": 614 |
| }, |
| { |
| "epoch": 2.0774899978942933, |
| "grad_norm": 0.292686078529123, |
| "learning_rate": 5.809234014701342e-05, |
| "loss": 0.3869, |
| "step": 615 |
| }, |
| { |
| "epoch": 2.0808591282375235, |
| "grad_norm": 0.2543773210365024, |
| "learning_rate": 5.8008149792706936e-05, |
| "loss": 0.3841, |
| "step": 616 |
| }, |
| { |
| "epoch": 2.0842282585807537, |
| "grad_norm": 0.23117543050120432, |
| "learning_rate": 5.7923859263207205e-05, |
| "loss": 0.3839, |
| "step": 617 |
| }, |
| { |
| "epoch": 2.087597388923984, |
| "grad_norm": 0.32949270894440474, |
| "learning_rate": 5.783946902740304e-05, |
| "loss": 0.3848, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.090966519267214, |
| "grad_norm": 0.3487344164810163, |
| "learning_rate": 5.7754979554737924e-05, |
| "loss": 0.3841, |
| "step": 619 |
| }, |
| { |
| "epoch": 2.094335649610444, |
| "grad_norm": 0.23249972606551436, |
| "learning_rate": 5.767039131520733e-05, |
| "loss": 0.3808, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.0977047799536743, |
| "grad_norm": 0.1642526127565639, |
| "learning_rate": 5.758570477935618e-05, |
| "loss": 0.3852, |
| "step": 621 |
| }, |
| { |
| "epoch": 2.1010739102969045, |
| "grad_norm": 0.22737138050339126, |
| "learning_rate": 5.750092041827618e-05, |
| "loss": 0.3862, |
| "step": 622 |
| }, |
| { |
| "epoch": 2.1044430406401347, |
| "grad_norm": 0.22187422496371617, |
| "learning_rate": 5.7416038703603216e-05, |
| "loss": 0.39, |
| "step": 623 |
| }, |
| { |
| "epoch": 2.107812170983365, |
| "grad_norm": 0.1976542359852637, |
| "learning_rate": 5.7331060107514754e-05, |
| "loss": 0.3828, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.111181301326595, |
| "grad_norm": 0.22929255732564582, |
| "learning_rate": 5.724598510272714e-05, |
| "loss": 0.3865, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.114550431669825, |
| "grad_norm": 0.2281829564525587, |
| "learning_rate": 5.716081416249307e-05, |
| "loss": 0.3834, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.1179195620130553, |
| "grad_norm": 0.1711530750792344, |
| "learning_rate": 5.707554776059886e-05, |
| "loss": 0.3864, |
| "step": 627 |
| }, |
| { |
| "epoch": 2.1212886923562855, |
| "grad_norm": 0.1952598465412235, |
| "learning_rate": 5.699018637136192e-05, |
| "loss": 0.3853, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.1246578226995156, |
| "grad_norm": 0.21178404694012465, |
| "learning_rate": 5.6904730469627985e-05, |
| "loss": 0.394, |
| "step": 629 |
| }, |
| { |
| "epoch": 2.128026953042746, |
| "grad_norm": 0.2291084803798316, |
| "learning_rate": 5.681918053076858e-05, |
| "loss": 0.3851, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.131396083385976, |
| "grad_norm": 0.2550272051240587, |
| "learning_rate": 5.673353703067832e-05, |
| "loss": 0.3872, |
| "step": 631 |
| }, |
| { |
| "epoch": 2.134765213729206, |
| "grad_norm": 0.2497998419444254, |
| "learning_rate": 5.664780044577231e-05, |
| "loss": 0.3881, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.1381343440724363, |
| "grad_norm": 0.2222082480877385, |
| "learning_rate": 5.6561971252983424e-05, |
| "loss": 0.388, |
| "step": 633 |
| }, |
| { |
| "epoch": 2.1415034744156665, |
| "grad_norm": 0.18680744639544267, |
| "learning_rate": 5.6476049929759714e-05, |
| "loss": 0.3891, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.1448726047588966, |
| "grad_norm": 0.21245971460544757, |
| "learning_rate": 5.6390036954061726e-05, |
| "loss": 0.3863, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.148241735102127, |
| "grad_norm": 0.2162219122370638, |
| "learning_rate": 5.6303932804359857e-05, |
| "loss": 0.3909, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.151610865445357, |
| "grad_norm": 0.15581628741660436, |
| "learning_rate": 5.621773795963166e-05, |
| "loss": 0.3879, |
| "step": 637 |
| }, |
| { |
| "epoch": 2.154979995788587, |
| "grad_norm": 0.22990888646168536, |
| "learning_rate": 5.613145289935926e-05, |
| "loss": 0.3882, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.1583491261318173, |
| "grad_norm": 0.24959544004712048, |
| "learning_rate": 5.6045078103526545e-05, |
| "loss": 0.3799, |
| "step": 639 |
| }, |
| { |
| "epoch": 2.1617182564750475, |
| "grad_norm": 0.2308113655952683, |
| "learning_rate": 5.595861405261666e-05, |
| "loss": 0.3879, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.1650873868182776, |
| "grad_norm": 0.2092244335914582, |
| "learning_rate": 5.58720612276092e-05, |
| "loss": 0.3871, |
| "step": 641 |
| }, |
| { |
| "epoch": 2.168456517161508, |
| "grad_norm": 0.2134067897632055, |
| "learning_rate": 5.578542010997764e-05, |
| "loss": 0.3822, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.171825647504738, |
| "grad_norm": 0.20839647987055449, |
| "learning_rate": 5.569869118168655e-05, |
| "loss": 0.3848, |
| "step": 643 |
| }, |
| { |
| "epoch": 2.175194777847968, |
| "grad_norm": 0.16985344503865618, |
| "learning_rate": 5.561187492518903e-05, |
| "loss": 0.3858, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.1785639081911983, |
| "grad_norm": 0.20941799721128232, |
| "learning_rate": 5.5524971823423905e-05, |
| "loss": 0.392, |
| "step": 645 |
| }, |
| { |
| "epoch": 2.1819330385344284, |
| "grad_norm": 0.21048667694813664, |
| "learning_rate": 5.5437982359813156e-05, |
| "loss": 0.3837, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.1853021688776586, |
| "grad_norm": 0.17246060013503955, |
| "learning_rate": 5.5350907018259135e-05, |
| "loss": 0.3863, |
| "step": 647 |
| }, |
| { |
| "epoch": 2.1886712992208888, |
| "grad_norm": 0.1808917523018754, |
| "learning_rate": 5.526374628314195e-05, |
| "loss": 0.3873, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.192040429564119, |
| "grad_norm": 0.16962189075007583, |
| "learning_rate": 5.5176500639316693e-05, |
| "loss": 0.3806, |
| "step": 649 |
| }, |
| { |
| "epoch": 2.195409559907349, |
| "grad_norm": 0.15829489129124838, |
| "learning_rate": 5.50891705721108e-05, |
| "loss": 0.3912, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.1987786902505793, |
| "grad_norm": 0.20128590320313494, |
| "learning_rate": 5.5001756567321355e-05, |
| "loss": 0.3792, |
| "step": 651 |
| }, |
| { |
| "epoch": 2.2021478205938094, |
| "grad_norm": 0.218877863583923, |
| "learning_rate": 5.4914259111212355e-05, |
| "loss": 0.3865, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.2055169509370396, |
| "grad_norm": 0.17606235529471279, |
| "learning_rate": 5.482667869051199e-05, |
| "loss": 0.3917, |
| "step": 653 |
| }, |
| { |
| "epoch": 2.2088860812802693, |
| "grad_norm": 0.14890556371643418, |
| "learning_rate": 5.473901579241e-05, |
| "loss": 0.38, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.2122552116235, |
| "grad_norm": 0.1654643380961197, |
| "learning_rate": 5.4651270904554915e-05, |
| "loss": 0.394, |
| "step": 655 |
| }, |
| { |
| "epoch": 2.2156243419667296, |
| "grad_norm": 0.1570214426630876, |
| "learning_rate": 5.4563444515051354e-05, |
| "loss": 0.3854, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.21899347230996, |
| "grad_norm": 0.1691883131216727, |
| "learning_rate": 5.44755371124573e-05, |
| "loss": 0.3851, |
| "step": 657 |
| }, |
| { |
| "epoch": 2.22236260265319, |
| "grad_norm": 0.17557198906026328, |
| "learning_rate": 5.438754918578144e-05, |
| "loss": 0.3913, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.22573173299642, |
| "grad_norm": 0.16768631591392807, |
| "learning_rate": 5.429948122448031e-05, |
| "loss": 0.386, |
| "step": 659 |
| }, |
| { |
| "epoch": 2.2291008633396503, |
| "grad_norm": 0.14731731125382688, |
| "learning_rate": 5.4211333718455756e-05, |
| "loss": 0.3922, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.2324699936828805, |
| "grad_norm": 0.17746489461476853, |
| "learning_rate": 5.4123107158052034e-05, |
| "loss": 0.387, |
| "step": 661 |
| }, |
| { |
| "epoch": 2.2358391240261106, |
| "grad_norm": 0.1903089984499793, |
| "learning_rate": 5.4034802034053223e-05, |
| "loss": 0.3833, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.239208254369341, |
| "grad_norm": 0.17184011460057994, |
| "learning_rate": 5.394641883768041e-05, |
| "loss": 0.39, |
| "step": 663 |
| }, |
| { |
| "epoch": 2.242577384712571, |
| "grad_norm": 0.20233097347593668, |
| "learning_rate": 5.3857958060588955e-05, |
| "loss": 0.3891, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.245946515055801, |
| "grad_norm": 0.21958650033217517, |
| "learning_rate": 5.3769420194865806e-05, |
| "loss": 0.3856, |
| "step": 665 |
| }, |
| { |
| "epoch": 2.2493156453990313, |
| "grad_norm": 0.18358377095064263, |
| "learning_rate": 5.368080573302676e-05, |
| "loss": 0.3828, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.2526847757422614, |
| "grad_norm": 0.17979672984272335, |
| "learning_rate": 5.359211516801365e-05, |
| "loss": 0.3804, |
| "step": 667 |
| }, |
| { |
| "epoch": 2.2560539060854916, |
| "grad_norm": 0.16294334924828324, |
| "learning_rate": 5.3503348993191706e-05, |
| "loss": 0.3825, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.2594230364287218, |
| "grad_norm": 0.1508454226549176, |
| "learning_rate": 5.34145077023467e-05, |
| "loss": 0.385, |
| "step": 669 |
| }, |
| { |
| "epoch": 2.262792166771952, |
| "grad_norm": 0.15470462637665758, |
| "learning_rate": 5.332559178968231e-05, |
| "loss": 0.3778, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.266161297115182, |
| "grad_norm": 0.1359656397629021, |
| "learning_rate": 5.3236601749817296e-05, |
| "loss": 0.3896, |
| "step": 671 |
| }, |
| { |
| "epoch": 2.2695304274584123, |
| "grad_norm": 0.15226695399087686, |
| "learning_rate": 5.314753807778276e-05, |
| "loss": 0.3874, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.2728995578016424, |
| "grad_norm": 0.14503332183422835, |
| "learning_rate": 5.3058401269019415e-05, |
| "loss": 0.3878, |
| "step": 673 |
| }, |
| { |
| "epoch": 2.2762686881448726, |
| "grad_norm": 0.15318787409886342, |
| "learning_rate": 5.296919181937485e-05, |
| "loss": 0.3857, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.2796378184881028, |
| "grad_norm": 0.16971373493795616, |
| "learning_rate": 5.2879910225100655e-05, |
| "loss": 0.3855, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.283006948831333, |
| "grad_norm": 0.1654804092839339, |
| "learning_rate": 5.279055698284982e-05, |
| "loss": 0.3877, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.286376079174563, |
| "grad_norm": 0.1505186583674958, |
| "learning_rate": 5.270113258967386e-05, |
| "loss": 0.3832, |
| "step": 677 |
| }, |
| { |
| "epoch": 2.2897452095177933, |
| "grad_norm": 0.16676666984467559, |
| "learning_rate": 5.261163754302011e-05, |
| "loss": 0.386, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.2931143398610234, |
| "grad_norm": 0.18567032268425918, |
| "learning_rate": 5.2522072340728896e-05, |
| "loss": 0.3907, |
| "step": 679 |
| }, |
| { |
| "epoch": 2.2964834702042536, |
| "grad_norm": 0.1765483695468527, |
| "learning_rate": 5.2432437481030855e-05, |
| "loss": 0.3882, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.2998526005474837, |
| "grad_norm": 0.165430115440251, |
| "learning_rate": 5.234273346254406e-05, |
| "loss": 0.3946, |
| "step": 681 |
| }, |
| { |
| "epoch": 2.303221730890714, |
| "grad_norm": 0.1690494896953244, |
| "learning_rate": 5.225296078427135e-05, |
| "loss": 0.3857, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.306590861233944, |
| "grad_norm": 0.201198083663681, |
| "learning_rate": 5.216311994559744e-05, |
| "loss": 0.389, |
| "step": 683 |
| }, |
| { |
| "epoch": 2.3099599915771742, |
| "grad_norm": 0.20812621009650192, |
| "learning_rate": 5.207321144628628e-05, |
| "loss": 0.3865, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.3133291219204044, |
| "grad_norm": 0.21426999240641148, |
| "learning_rate": 5.198323578647813e-05, |
| "loss": 0.3867, |
| "step": 685 |
| }, |
| { |
| "epoch": 2.3166982522636346, |
| "grad_norm": 0.213657425755296, |
| "learning_rate": 5.18931934666869e-05, |
| "loss": 0.3922, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.3200673826068647, |
| "grad_norm": 0.17137164943244815, |
| "learning_rate": 5.180308498779728e-05, |
| "loss": 0.3789, |
| "step": 687 |
| }, |
| { |
| "epoch": 2.323436512950095, |
| "grad_norm": 0.18022826820320403, |
| "learning_rate": 5.171291085106202e-05, |
| "loss": 0.3815, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.326805643293325, |
| "grad_norm": 0.1755115364994259, |
| "learning_rate": 5.162267155809908e-05, |
| "loss": 0.389, |
| "step": 689 |
| }, |
| { |
| "epoch": 2.3301747736365552, |
| "grad_norm": 0.2011673377143987, |
| "learning_rate": 5.153236761088888e-05, |
| "loss": 0.3894, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.3335439039797854, |
| "grad_norm": 0.2305809255417625, |
| "learning_rate": 5.14419995117715e-05, |
| "loss": 0.3811, |
| "step": 691 |
| }, |
| { |
| "epoch": 2.3369130343230156, |
| "grad_norm": 0.2115835801437973, |
| "learning_rate": 5.135156776344389e-05, |
| "loss": 0.3892, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.3402821646662453, |
| "grad_norm": 0.19470845993737926, |
| "learning_rate": 5.126107286895702e-05, |
| "loss": 0.3832, |
| "step": 693 |
| }, |
| { |
| "epoch": 2.343651295009476, |
| "grad_norm": 0.16438102517886552, |
| "learning_rate": 5.117051533171321e-05, |
| "loss": 0.3863, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.3470204253527056, |
| "grad_norm": 0.17475480058915455, |
| "learning_rate": 5.1079895655463177e-05, |
| "loss": 0.3859, |
| "step": 695 |
| }, |
| { |
| "epoch": 2.350389555695936, |
| "grad_norm": 0.18741810484417695, |
| "learning_rate": 5.098921434430333e-05, |
| "loss": 0.3825, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.353758686039166, |
| "grad_norm": 0.1687881382681767, |
| "learning_rate": 5.0898471902672917e-05, |
| "loss": 0.3758, |
| "step": 697 |
| }, |
| { |
| "epoch": 2.357127816382396, |
| "grad_norm": 0.18436298872908952, |
| "learning_rate": 5.080766883535129e-05, |
| "loss": 0.3852, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.3604969467256263, |
| "grad_norm": 0.19845837669577285, |
| "learning_rate": 5.0716805647455006e-05, |
| "loss": 0.3854, |
| "step": 699 |
| }, |
| { |
| "epoch": 2.3638660770688564, |
| "grad_norm": 0.18343761135804904, |
| "learning_rate": 5.062588284443505e-05, |
| "loss": 0.3825, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.3672352074120866, |
| "grad_norm": 0.13923107512819735, |
| "learning_rate": 5.053490093207408e-05, |
| "loss": 0.3797, |
| "step": 701 |
| }, |
| { |
| "epoch": 2.3706043377553168, |
| "grad_norm": 0.1783129344294203, |
| "learning_rate": 5.0443860416483536e-05, |
| "loss": 0.3813, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.373973468098547, |
| "grad_norm": 0.2047126526455967, |
| "learning_rate": 5.0352761804100835e-05, |
| "loss": 0.3869, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.377342598441777, |
| "grad_norm": 0.18677317936073162, |
| "learning_rate": 5.026160560168661e-05, |
| "loss": 0.3829, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.3807117287850073, |
| "grad_norm": 0.15858411985283818, |
| "learning_rate": 5.0170392316321826e-05, |
| "loss": 0.3906, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.3840808591282374, |
| "grad_norm": 0.1542922309469812, |
| "learning_rate": 5.0079122455405014e-05, |
| "loss": 0.3898, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.3874499894714676, |
| "grad_norm": 0.16034757146153225, |
| "learning_rate": 4.9987796526649394e-05, |
| "loss": 0.3856, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.3908191198146977, |
| "grad_norm": 0.17396513204876746, |
| "learning_rate": 4.989641503808011e-05, |
| "loss": 0.3845, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.394188250157928, |
| "grad_norm": 0.14385199298465493, |
| "learning_rate": 4.9804978498031326e-05, |
| "loss": 0.383, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.397557380501158, |
| "grad_norm": 0.1424278412585639, |
| "learning_rate": 4.971348741514349e-05, |
| "loss": 0.3923, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.4009265108443882, |
| "grad_norm": 0.18492577887926495, |
| "learning_rate": 4.962194229836045e-05, |
| "loss": 0.3841, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.4042956411876184, |
| "grad_norm": 0.1732020596072231, |
| "learning_rate": 4.95303436569266e-05, |
| "loss": 0.3915, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.4076647715308486, |
| "grad_norm": 0.12301305622548196, |
| "learning_rate": 4.943869200038413e-05, |
| "loss": 0.384, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.4110339018740787, |
| "grad_norm": 0.18053993824097098, |
| "learning_rate": 4.934698783857011e-05, |
| "loss": 0.3817, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.414403032217309, |
| "grad_norm": 0.21725687137817615, |
| "learning_rate": 4.9255231681613674e-05, |
| "loss": 0.3887, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.417772162560539, |
| "grad_norm": 0.17070860183839026, |
| "learning_rate": 4.91634240399332e-05, |
| "loss": 0.3842, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.4211412929037692, |
| "grad_norm": 0.16062080472612222, |
| "learning_rate": 4.907156542423351e-05, |
| "loss": 0.3753, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.4245104232469994, |
| "grad_norm": 0.16452143222682503, |
| "learning_rate": 4.8979656345502904e-05, |
| "loss": 0.3819, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.4278795535902296, |
| "grad_norm": 0.17121464354448115, |
| "learning_rate": 4.888769731501047e-05, |
| "loss": 0.3829, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.4312486839334597, |
| "grad_norm": 0.1588530781256576, |
| "learning_rate": 4.8795688844303114e-05, |
| "loss": 0.3872, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.43461781427669, |
| "grad_norm": 0.15259487087295576, |
| "learning_rate": 4.870363144520279e-05, |
| "loss": 0.3878, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.43798694461992, |
| "grad_norm": 0.15808052014003177, |
| "learning_rate": 4.861152562980362e-05, |
| "loss": 0.3827, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.44135607496315, |
| "grad_norm": 0.18095527833139824, |
| "learning_rate": 4.851937191046906e-05, |
| "loss": 0.3828, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.4447252053063804, |
| "grad_norm": 0.17700515235134065, |
| "learning_rate": 4.8427170799829055e-05, |
| "loss": 0.3849, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.4480943356496105, |
| "grad_norm": 0.15108262997817984, |
| "learning_rate": 4.833492281077717e-05, |
| "loss": 0.3827, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.4514634659928407, |
| "grad_norm": 0.14610122044801815, |
| "learning_rate": 4.824262845646771e-05, |
| "loss": 0.3891, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.454832596336071, |
| "grad_norm": 0.17949690552168968, |
| "learning_rate": 4.815028825031295e-05, |
| "loss": 0.3824, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.458201726679301, |
| "grad_norm": 0.17860414349949053, |
| "learning_rate": 4.805790270598021e-05, |
| "loss": 0.3859, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.461570857022531, |
| "grad_norm": 0.15714664302158635, |
| "learning_rate": 4.796547233738901e-05, |
| "loss": 0.3805, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.4649399873657614, |
| "grad_norm": 0.13409742518350323, |
| "learning_rate": 4.787299765870822e-05, |
| "loss": 0.3894, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.4683091177089915, |
| "grad_norm": 0.1375698590454868, |
| "learning_rate": 4.77804791843532e-05, |
| "loss": 0.3885, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.4716782480522217, |
| "grad_norm": 0.1382618240475382, |
| "learning_rate": 4.768791742898292e-05, |
| "loss": 0.3875, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.475047378395452, |
| "grad_norm": 0.1398622806337096, |
| "learning_rate": 4.7595312907497135e-05, |
| "loss": 0.3853, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.4784165087386816, |
| "grad_norm": 0.14539506330457003, |
| "learning_rate": 4.7502666135033486e-05, |
| "loss": 0.3935, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.481785639081912, |
| "grad_norm": 0.13109075183048932, |
| "learning_rate": 4.7409977626964666e-05, |
| "loss": 0.3848, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.485154769425142, |
| "grad_norm": 0.12988278807806955, |
| "learning_rate": 4.731724789889547e-05, |
| "loss": 0.3839, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.4885238997683725, |
| "grad_norm": 0.1578289932884262, |
| "learning_rate": 4.722447746666008e-05, |
| "loss": 0.3836, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.4918930301116022, |
| "grad_norm": 0.1696600549846316, |
| "learning_rate": 4.7131666846319036e-05, |
| "loss": 0.3825, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.495262160454833, |
| "grad_norm": 0.13151686953984587, |
| "learning_rate": 4.7038816554156484e-05, |
| "loss": 0.3879, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.4986312907980626, |
| "grad_norm": 0.19638702203051203, |
| "learning_rate": 4.694592710667723e-05, |
| "loss": 0.3873, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.502000421141293, |
| "grad_norm": 0.18899466534966777, |
| "learning_rate": 4.6852999020603864e-05, |
| "loss": 0.3808, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.505369551484523, |
| "grad_norm": 0.12219071702355794, |
| "learning_rate": 4.676003281287397e-05, |
| "loss": 0.3876, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.508738681827753, |
| "grad_norm": 0.18236706911247189, |
| "learning_rate": 4.6667029000637164e-05, |
| "loss": 0.3846, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.5121078121709832, |
| "grad_norm": 0.1684130303158305, |
| "learning_rate": 4.657398810125225e-05, |
| "loss": 0.3888, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.5154769425142134, |
| "grad_norm": 0.16891778570455948, |
| "learning_rate": 4.648091063228435e-05, |
| "loss": 0.3878, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.5188460728574436, |
| "grad_norm": 0.16123369621023537, |
| "learning_rate": 4.638779711150198e-05, |
| "loss": 0.3888, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.5222152032006737, |
| "grad_norm": 0.13513366343949626, |
| "learning_rate": 4.629464805687426e-05, |
| "loss": 0.3826, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.525584333543904, |
| "grad_norm": 0.1460461212872677, |
| "learning_rate": 4.620146398656792e-05, |
| "loss": 0.3841, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.528953463887134, |
| "grad_norm": 0.16497117181141158, |
| "learning_rate": 4.610824541894452e-05, |
| "loss": 0.3842, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.532322594230364, |
| "grad_norm": 0.16290788207612428, |
| "learning_rate": 4.601499287255748e-05, |
| "loss": 0.3885, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.5356917245735944, |
| "grad_norm": 0.14489151093892186, |
| "learning_rate": 4.592170686614926e-05, |
| "loss": 0.3909, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.5390608549168245, |
| "grad_norm": 0.1464122207528577, |
| "learning_rate": 4.582838791864846e-05, |
| "loss": 0.3864, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.5424299852600547, |
| "grad_norm": 0.1543922436683134, |
| "learning_rate": 4.5735036549166907e-05, |
| "loss": 0.3781, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.545799115603285, |
| "grad_norm": 0.1511363443793848, |
| "learning_rate": 4.5641653276996774e-05, |
| "loss": 0.388, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.549168245946515, |
| "grad_norm": 0.14775900613642287, |
| "learning_rate": 4.5548238621607735e-05, |
| "loss": 0.3829, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.552537376289745, |
| "grad_norm": 0.1609040357156897, |
| "learning_rate": 4.5454793102644006e-05, |
| "loss": 0.3913, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.5559065066329754, |
| "grad_norm": 0.17452716126040962, |
| "learning_rate": 4.5361317239921515e-05, |
| "loss": 0.387, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.5592756369762055, |
| "grad_norm": 0.15479208730353294, |
| "learning_rate": 4.5267811553424945e-05, |
| "loss": 0.3794, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.5626447673194357, |
| "grad_norm": 0.16977092756406884, |
| "learning_rate": 4.517427656330496e-05, |
| "loss": 0.3813, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.566013897662666, |
| "grad_norm": 0.15943557512689435, |
| "learning_rate": 4.5080712789875154e-05, |
| "loss": 0.3886, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.569383028005896, |
| "grad_norm": 0.15146661036703893, |
| "learning_rate": 4.498712075360929e-05, |
| "loss": 0.3779, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.572752158349126, |
| "grad_norm": 0.1583016214192411, |
| "learning_rate": 4.489350097513829e-05, |
| "loss": 0.3861, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.5761212886923563, |
| "grad_norm": 0.18203713661130738, |
| "learning_rate": 4.479985397524748e-05, |
| "loss": 0.3872, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.5794904190355865, |
| "grad_norm": 0.1411770309939346, |
| "learning_rate": 4.470618027487354e-05, |
| "loss": 0.3833, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.5828595493788167, |
| "grad_norm": 0.15778048291503943, |
| "learning_rate": 4.4612480395101736e-05, |
| "loss": 0.3835, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.586228679722047, |
| "grad_norm": 0.20283325612723238, |
| "learning_rate": 4.451875485716292e-05, |
| "loss": 0.3804, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.589597810065277, |
| "grad_norm": 0.15957667387644875, |
| "learning_rate": 4.44250041824307e-05, |
| "loss": 0.3759, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.592966940408507, |
| "grad_norm": 0.15580437360078891, |
| "learning_rate": 4.4331228892418473e-05, |
| "loss": 0.3869, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.5963360707517373, |
| "grad_norm": 0.16733864762153852, |
| "learning_rate": 4.4237429508776645e-05, |
| "loss": 0.3901, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.5997052010949675, |
| "grad_norm": 0.16840382892762462, |
| "learning_rate": 4.414360655328957e-05, |
| "loss": 0.3887, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.6030743314381977, |
| "grad_norm": 0.16500477542253614, |
| "learning_rate": 4.4049760547872786e-05, |
| "loss": 0.3821, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.606443461781428, |
| "grad_norm": 0.17637661287184536, |
| "learning_rate": 4.395589201457e-05, |
| "loss": 0.3901, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.6098125921246575, |
| "grad_norm": 0.1426864712324038, |
| "learning_rate": 4.386200147555027e-05, |
| "loss": 0.3822, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.613181722467888, |
| "grad_norm": 0.1359883054124575, |
| "learning_rate": 4.376808945310505e-05, |
| "loss": 0.3907, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.616550852811118, |
| "grad_norm": 0.15390613245324686, |
| "learning_rate": 4.3674156469645335e-05, |
| "loss": 0.3844, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.6199199831543485, |
| "grad_norm": 0.12544051069791048, |
| "learning_rate": 4.358020304769867e-05, |
| "loss": 0.3848, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.623289113497578, |
| "grad_norm": 0.12982821849005882, |
| "learning_rate": 4.348622970990634e-05, |
| "loss": 0.386, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.626658243840809, |
| "grad_norm": 0.15120996993879657, |
| "learning_rate": 4.339223697902037e-05, |
| "loss": 0.3809, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.6300273741840385, |
| "grad_norm": 0.13233029817309008, |
| "learning_rate": 4.329822537790073e-05, |
| "loss": 0.3841, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.633396504527269, |
| "grad_norm": 0.14136223246926025, |
| "learning_rate": 4.320419542951228e-05, |
| "loss": 0.3838, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.636765634870499, |
| "grad_norm": 0.1228901057783663, |
| "learning_rate": 4.3110147656922034e-05, |
| "loss": 0.3802, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.6401347652137295, |
| "grad_norm": 0.13251524939594994, |
| "learning_rate": 4.3016082583296067e-05, |
| "loss": 0.378, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.643503895556959, |
| "grad_norm": 0.13001677701359055, |
| "learning_rate": 4.292200073189676e-05, |
| "loss": 0.3841, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.6468730259001894, |
| "grad_norm": 0.15991064871524435, |
| "learning_rate": 4.2827902626079784e-05, |
| "loss": 0.3875, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.6502421562434195, |
| "grad_norm": 0.12111670308432425, |
| "learning_rate": 4.2733788789291275e-05, |
| "loss": 0.3873, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.6536112865866497, |
| "grad_norm": 0.1593860904845142, |
| "learning_rate": 4.263965974506483e-05, |
| "loss": 0.3864, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.65698041692988, |
| "grad_norm": 0.16167383614529757, |
| "learning_rate": 4.254551601701866e-05, |
| "loss": 0.3845, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.66034954727311, |
| "grad_norm": 0.13801503703615994, |
| "learning_rate": 4.2451358128852654e-05, |
| "loss": 0.3876, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.66371867761634, |
| "grad_norm": 0.13674433021590243, |
| "learning_rate": 4.23571866043455e-05, |
| "loss": 0.3836, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.6670878079595703, |
| "grad_norm": 0.1567228984572654, |
| "learning_rate": 4.22630019673517e-05, |
| "loss": 0.3819, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.6704569383028005, |
| "grad_norm": 0.13292233430502193, |
| "learning_rate": 4.216880474179871e-05, |
| "loss": 0.3772, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.6738260686460307, |
| "grad_norm": 0.14610126476091434, |
| "learning_rate": 4.207459545168405e-05, |
| "loss": 0.391, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.677195198989261, |
| "grad_norm": 0.1295036399986597, |
| "learning_rate": 4.198037462107228e-05, |
| "loss": 0.39, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.680564329332491, |
| "grad_norm": 0.14286486693120076, |
| "learning_rate": 4.188614277409224e-05, |
| "loss": 0.3824, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.683933459675721, |
| "grad_norm": 0.1395089402065071, |
| "learning_rate": 4.179190043493397e-05, |
| "loss": 0.3893, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.6873025900189513, |
| "grad_norm": 0.1312675673324047, |
| "learning_rate": 4.169764812784594e-05, |
| "loss": 0.3839, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.6906717203621815, |
| "grad_norm": 0.15056150493927153, |
| "learning_rate": 4.1603386377132045e-05, |
| "loss": 0.3766, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.6940408507054117, |
| "grad_norm": 0.15234002339266034, |
| "learning_rate": 4.1509115707148695e-05, |
| "loss": 0.3875, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.697409981048642, |
| "grad_norm": 0.14172473902716337, |
| "learning_rate": 4.1414836642301954e-05, |
| "loss": 0.3835, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.700779111391872, |
| "grad_norm": 0.1244063349961557, |
| "learning_rate": 4.132054970704454e-05, |
| "loss": 0.384, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.704148241735102, |
| "grad_norm": 0.13151454461470574, |
| "learning_rate": 4.122625542587301e-05, |
| "loss": 0.3814, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.7075173720783323, |
| "grad_norm": 0.13472018853386267, |
| "learning_rate": 4.1131954323324734e-05, |
| "loss": 0.3832, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.7108865024215625, |
| "grad_norm": 0.14391402812007115, |
| "learning_rate": 4.103764692397504e-05, |
| "loss": 0.3907, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.7142556327647926, |
| "grad_norm": 0.1204377593661656, |
| "learning_rate": 4.094333375243428e-05, |
| "loss": 0.3779, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.717624763108023, |
| "grad_norm": 0.1345036853381592, |
| "learning_rate": 4.084901533334495e-05, |
| "loss": 0.3837, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.720993893451253, |
| "grad_norm": 0.151432229349483, |
| "learning_rate": 4.075469219137868e-05, |
| "loss": 0.3867, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.724363023794483, |
| "grad_norm": 0.13412559508113278, |
| "learning_rate": 4.066036485123344e-05, |
| "loss": 0.3809, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.7277321541377133, |
| "grad_norm": 0.1407083620047968, |
| "learning_rate": 4.056603383763049e-05, |
| "loss": 0.3893, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.7311012844809435, |
| "grad_norm": 0.1304023157361848, |
| "learning_rate": 4.0471699675311564e-05, |
| "loss": 0.3873, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.7344704148241736, |
| "grad_norm": 0.13069329962842927, |
| "learning_rate": 4.0377362889035875e-05, |
| "loss": 0.3845, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.737839545167404, |
| "grad_norm": 0.134836479542485, |
| "learning_rate": 4.0283024003577284e-05, |
| "loss": 0.3806, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.741208675510634, |
| "grad_norm": 0.12753418534583713, |
| "learning_rate": 4.0188683543721295e-05, |
| "loss": 0.3797, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.744577805853864, |
| "grad_norm": 0.13228859664320883, |
| "learning_rate": 4.009434203426215e-05, |
| "loss": 0.3856, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.747946936197094, |
| "grad_norm": 0.14892311316819778, |
| "learning_rate": 4e-05, |
| "loss": 0.3838, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.7513160665403245, |
| "grad_norm": 0.13386473278676905, |
| "learning_rate": 3.9905657965737854e-05, |
| "loss": 0.3829, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.754685196883554, |
| "grad_norm": 0.14219980607382138, |
| "learning_rate": 3.981131645627872e-05, |
| "loss": 0.3819, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.758054327226785, |
| "grad_norm": 0.1388449346696737, |
| "learning_rate": 3.971697599642273e-05, |
| "loss": 0.3834, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.7614234575700145, |
| "grad_norm": 0.12977851410941868, |
| "learning_rate": 3.9622637110964125e-05, |
| "loss": 0.3831, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.764792587913245, |
| "grad_norm": 0.13978459681010671, |
| "learning_rate": 3.9528300324688456e-05, |
| "loss": 0.383, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.768161718256475, |
| "grad_norm": 0.13570459222433323, |
| "learning_rate": 3.943396616236953e-05, |
| "loss": 0.3851, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.7715308485997054, |
| "grad_norm": 0.1347307304770039, |
| "learning_rate": 3.933963514876657e-05, |
| "loss": 0.3872, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.774899978942935, |
| "grad_norm": 0.14708015270111557, |
| "learning_rate": 3.9245307808621325e-05, |
| "loss": 0.385, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.7782691092861658, |
| "grad_norm": 0.12260128131766068, |
| "learning_rate": 3.915098466665506e-05, |
| "loss": 0.3855, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.7816382396293955, |
| "grad_norm": 0.12292579106408079, |
| "learning_rate": 3.905666624756573e-05, |
| "loss": 0.3869, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.7850073699726257, |
| "grad_norm": 0.14492807851132256, |
| "learning_rate": 3.8962353076024984e-05, |
| "loss": 0.3821, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.788376500315856, |
| "grad_norm": 0.14449210295060477, |
| "learning_rate": 3.886804567667528e-05, |
| "loss": 0.3808, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.791745630659086, |
| "grad_norm": 0.12971902156372891, |
| "learning_rate": 3.8773744574127e-05, |
| "loss": 0.3878, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.795114761002316, |
| "grad_norm": 0.14230416274316593, |
| "learning_rate": 3.867945029295546e-05, |
| "loss": 0.3814, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.7984838913455463, |
| "grad_norm": 0.1224339186137515, |
| "learning_rate": 3.858516335769806e-05, |
| "loss": 0.3819, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.8018530216887765, |
| "grad_norm": 0.16733669157218356, |
| "learning_rate": 3.8490884292851325e-05, |
| "loss": 0.3825, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.8052221520320066, |
| "grad_norm": 0.13398557625334945, |
| "learning_rate": 3.839661362286797e-05, |
| "loss": 0.3785, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.808591282375237, |
| "grad_norm": 0.14930405489150408, |
| "learning_rate": 3.830235187215408e-05, |
| "loss": 0.3806, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.811960412718467, |
| "grad_norm": 0.14534442897149916, |
| "learning_rate": 3.820809956506604e-05, |
| "loss": 0.3869, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.815329543061697, |
| "grad_norm": 0.14294161233646072, |
| "learning_rate": 3.8113857225907783e-05, |
| "loss": 0.3834, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.8186986734049273, |
| "grad_norm": 0.1304818403113972, |
| "learning_rate": 3.801962537892773e-05, |
| "loss": 0.3917, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.8220678037481575, |
| "grad_norm": 0.16153213081562928, |
| "learning_rate": 3.792540454831596e-05, |
| "loss": 0.3877, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.8254369340913876, |
| "grad_norm": 0.12199316427929723, |
| "learning_rate": 3.7831195258201295e-05, |
| "loss": 0.3836, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.828806064434618, |
| "grad_norm": 0.14527010576989632, |
| "learning_rate": 3.7736998032648305e-05, |
| "loss": 0.3827, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.832175194777848, |
| "grad_norm": 0.15971096124557288, |
| "learning_rate": 3.7642813395654504e-05, |
| "loss": 0.3801, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.835544325121078, |
| "grad_norm": 0.12345484787366505, |
| "learning_rate": 3.754864187114736e-05, |
| "loss": 0.3855, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.8389134554643083, |
| "grad_norm": 0.13837193216510435, |
| "learning_rate": 3.745448398298135e-05, |
| "loss": 0.3828, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.8422825858075385, |
| "grad_norm": 0.1545419687841436, |
| "learning_rate": 3.736034025493519e-05, |
| "loss": 0.3821, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.8456517161507686, |
| "grad_norm": 0.12965815907805744, |
| "learning_rate": 3.726621121070873e-05, |
| "loss": 0.3885, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.849020846493999, |
| "grad_norm": 0.14437205080738458, |
| "learning_rate": 3.717209737392022e-05, |
| "loss": 0.3757, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.852389976837229, |
| "grad_norm": 0.13760242198629977, |
| "learning_rate": 3.707799926810326e-05, |
| "loss": 0.3841, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.855759107180459, |
| "grad_norm": 0.16923959033588096, |
| "learning_rate": 3.698391741670394e-05, |
| "loss": 0.3837, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.8591282375236893, |
| "grad_norm": 0.1483758913428858, |
| "learning_rate": 3.688985234307798e-05, |
| "loss": 0.3854, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.8624973678669194, |
| "grad_norm": 0.1409446277936609, |
| "learning_rate": 3.679580457048772e-05, |
| "loss": 0.3865, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.8658664982101496, |
| "grad_norm": 0.13848959127311186, |
| "learning_rate": 3.6701774622099286e-05, |
| "loss": 0.3847, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.8692356285533798, |
| "grad_norm": 0.13440901679008035, |
| "learning_rate": 3.660776302097965e-05, |
| "loss": 0.3809, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.87260475889661, |
| "grad_norm": 0.13528288220600784, |
| "learning_rate": 3.6513770290093674e-05, |
| "loss": 0.3844, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.87597388923984, |
| "grad_norm": 0.11930769920642463, |
| "learning_rate": 3.641979695230135e-05, |
| "loss": 0.3853, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.8793430195830703, |
| "grad_norm": 0.1302640412084013, |
| "learning_rate": 3.632584353035467e-05, |
| "loss": 0.3834, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.8827121499263004, |
| "grad_norm": 0.12093299855424389, |
| "learning_rate": 3.6231910546894956e-05, |
| "loss": 0.3851, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.8860812802695306, |
| "grad_norm": 0.1342477899550942, |
| "learning_rate": 3.613799852444975e-05, |
| "loss": 0.3883, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.8894504106127608, |
| "grad_norm": 0.11778883888529185, |
| "learning_rate": 3.6044107985430015e-05, |
| "loss": 0.3823, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.8928195409559905, |
| "grad_norm": 0.12271043639462616, |
| "learning_rate": 3.595023945212723e-05, |
| "loss": 0.3816, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.896188671299221, |
| "grad_norm": 0.12188701757865371, |
| "learning_rate": 3.585639344671043e-05, |
| "loss": 0.3863, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.899557801642451, |
| "grad_norm": 0.12511895990769892, |
| "learning_rate": 3.576257049122336e-05, |
| "loss": 0.3829, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.9029269319856814, |
| "grad_norm": 0.12002503720509249, |
| "learning_rate": 3.5668771107581526e-05, |
| "loss": 0.377, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.906296062328911, |
| "grad_norm": 0.12993074211163566, |
| "learning_rate": 3.5574995817569317e-05, |
| "loss": 0.3755, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.9096651926721417, |
| "grad_norm": 0.10532634808065627, |
| "learning_rate": 3.5481245142837095e-05, |
| "loss": 0.3869, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.9130343230153715, |
| "grad_norm": 0.1296191433786778, |
| "learning_rate": 3.5387519604898264e-05, |
| "loss": 0.382, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.916403453358602, |
| "grad_norm": 0.10734185230078218, |
| "learning_rate": 3.5293819725126464e-05, |
| "loss": 0.3849, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.919772583701832, |
| "grad_norm": 0.1077939586524133, |
| "learning_rate": 3.520014602475252e-05, |
| "loss": 0.3828, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.923141714045062, |
| "grad_norm": 0.12191898052299041, |
| "learning_rate": 3.5106499024861715e-05, |
| "loss": 0.3809, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.926510844388292, |
| "grad_norm": 0.12081068176606237, |
| "learning_rate": 3.501287924639074e-05, |
| "loss": 0.3892, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.9298799747315223, |
| "grad_norm": 0.13361270574401832, |
| "learning_rate": 3.491928721012485e-05, |
| "loss": 0.3818, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.9332491050747524, |
| "grad_norm": 0.12126810590661805, |
| "learning_rate": 3.482572343669506e-05, |
| "loss": 0.3834, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.9366182354179826, |
| "grad_norm": 0.1258581729968798, |
| "learning_rate": 3.4732188446575055e-05, |
| "loss": 0.3822, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.939987365761213, |
| "grad_norm": 0.11858345315742196, |
| "learning_rate": 3.4638682760078505e-05, |
| "loss": 0.3922, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.943356496104443, |
| "grad_norm": 0.11372309799338015, |
| "learning_rate": 3.454520689735602e-05, |
| "loss": 0.3824, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.946725626447673, |
| "grad_norm": 0.14113850726940133, |
| "learning_rate": 3.445176137839227e-05, |
| "loss": 0.3796, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.9500947567909033, |
| "grad_norm": 0.11612037625898579, |
| "learning_rate": 3.435834672300324e-05, |
| "loss": 0.3873, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.9534638871341334, |
| "grad_norm": 0.12263857158882245, |
| "learning_rate": 3.426496345083309e-05, |
| "loss": 0.3807, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.9568330174773636, |
| "grad_norm": 0.13787793243918434, |
| "learning_rate": 3.417161208135155e-05, |
| "loss": 0.3865, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.9602021478205938, |
| "grad_norm": 0.12537808395950803, |
| "learning_rate": 3.407829313385075e-05, |
| "loss": 0.3887, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.963571278163824, |
| "grad_norm": 0.1233586121783003, |
| "learning_rate": 3.398500712744254e-05, |
| "loss": 0.3831, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.966940408507054, |
| "grad_norm": 0.127510517027595, |
| "learning_rate": 3.38917545810555e-05, |
| "loss": 0.3855, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.9703095388502843, |
| "grad_norm": 0.12958054002462321, |
| "learning_rate": 3.379853601343209e-05, |
| "loss": 0.3867, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.9736786691935144, |
| "grad_norm": 0.11339310625974686, |
| "learning_rate": 3.3705351943125755e-05, |
| "loss": 0.381, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.9770477995367446, |
| "grad_norm": 0.1441132631100554, |
| "learning_rate": 3.361220288849804e-05, |
| "loss": 0.3853, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.9804169298799748, |
| "grad_norm": 0.12590761879480403, |
| "learning_rate": 3.351908936771566e-05, |
| "loss": 0.3821, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.983786060223205, |
| "grad_norm": 0.12580062137496578, |
| "learning_rate": 3.342601189874777e-05, |
| "loss": 0.3912, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.987155190566435, |
| "grad_norm": 0.1375861040816144, |
| "learning_rate": 3.3332970999362836e-05, |
| "loss": 0.3843, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.9905243209096652, |
| "grad_norm": 0.11745115999108842, |
| "learning_rate": 3.323996718712605e-05, |
| "loss": 0.3793, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.9938934512528954, |
| "grad_norm": 0.1154957553487754, |
| "learning_rate": 3.3147000979396156e-05, |
| "loss": 0.386, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.9972625815961256, |
| "grad_norm": 0.14419491852541183, |
| "learning_rate": 3.305407289332279e-05, |
| "loss": 0.387, |
| "step": 888 |
| }, |
| { |
| "epoch": 3.00336913034323, |
| "grad_norm": 0.17453356323499444, |
| "learning_rate": 3.296118344584352e-05, |
| "loss": 0.3658, |
| "step": 889 |
| }, |
| { |
| "epoch": 3.0067382606864603, |
| "grad_norm": 0.19958889229278365, |
| "learning_rate": 3.2868333153680964e-05, |
| "loss": 0.3563, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.0101073910296905, |
| "grad_norm": 0.14823110731719627, |
| "learning_rate": 3.277552253333993e-05, |
| "loss": 0.3592, |
| "step": 891 |
| }, |
| { |
| "epoch": 3.0134765213729207, |
| "grad_norm": 0.15078557386759514, |
| "learning_rate": 3.2682752101104536e-05, |
| "loss": 0.3648, |
| "step": 892 |
| }, |
| { |
| "epoch": 3.016845651716151, |
| "grad_norm": 0.15261085897213972, |
| "learning_rate": 3.259002237303535e-05, |
| "loss": 0.365, |
| "step": 893 |
| }, |
| { |
| "epoch": 3.020214782059381, |
| "grad_norm": 0.12773087702299238, |
| "learning_rate": 3.249733386496653e-05, |
| "loss": 0.359, |
| "step": 894 |
| }, |
| { |
| "epoch": 3.023583912402611, |
| "grad_norm": 0.13787164527794113, |
| "learning_rate": 3.2404687092502865e-05, |
| "loss": 0.361, |
| "step": 895 |
| }, |
| { |
| "epoch": 3.0269530427458413, |
| "grad_norm": 0.15396809320630023, |
| "learning_rate": 3.231208257101709e-05, |
| "loss": 0.3639, |
| "step": 896 |
| }, |
| { |
| "epoch": 3.0303221730890715, |
| "grad_norm": 0.13565056548247828, |
| "learning_rate": 3.221952081564682e-05, |
| "loss": 0.3632, |
| "step": 897 |
| }, |
| { |
| "epoch": 3.0336913034323016, |
| "grad_norm": 0.16070873287428322, |
| "learning_rate": 3.212700234129179e-05, |
| "loss": 0.3594, |
| "step": 898 |
| }, |
| { |
| "epoch": 3.037060433775532, |
| "grad_norm": 0.14022297658804933, |
| "learning_rate": 3.2034527662611e-05, |
| "loss": 0.363, |
| "step": 899 |
| }, |
| { |
| "epoch": 3.040429564118762, |
| "grad_norm": 0.14407779140042834, |
| "learning_rate": 3.194209729401979e-05, |
| "loss": 0.3612, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.043798694461992, |
| "grad_norm": 0.13752049086764745, |
| "learning_rate": 3.184971174968705e-05, |
| "loss": 0.3645, |
| "step": 901 |
| }, |
| { |
| "epoch": 3.0471678248052223, |
| "grad_norm": 0.13823706755645496, |
| "learning_rate": 3.175737154353231e-05, |
| "loss": 0.3626, |
| "step": 902 |
| }, |
| { |
| "epoch": 3.0505369551484525, |
| "grad_norm": 0.16264110826907188, |
| "learning_rate": 3.166507718922285e-05, |
| "loss": 0.3566, |
| "step": 903 |
| }, |
| { |
| "epoch": 3.0539060854916826, |
| "grad_norm": 0.15511577954565434, |
| "learning_rate": 3.157282920017096e-05, |
| "loss": 0.361, |
| "step": 904 |
| }, |
| { |
| "epoch": 3.057275215834913, |
| "grad_norm": 0.15232517037403773, |
| "learning_rate": 3.1480628089530943e-05, |
| "loss": 0.3662, |
| "step": 905 |
| }, |
| { |
| "epoch": 3.060644346178143, |
| "grad_norm": 0.17112367414740937, |
| "learning_rate": 3.1388474370196395e-05, |
| "loss": 0.3638, |
| "step": 906 |
| }, |
| { |
| "epoch": 3.064013476521373, |
| "grad_norm": 0.12748688705449465, |
| "learning_rate": 3.129636855479723e-05, |
| "loss": 0.3579, |
| "step": 907 |
| }, |
| { |
| "epoch": 3.0673826068646033, |
| "grad_norm": 0.14714355107055627, |
| "learning_rate": 3.12043111556969e-05, |
| "loss": 0.3582, |
| "step": 908 |
| }, |
| { |
| "epoch": 3.070751737207833, |
| "grad_norm": 0.13462631797401237, |
| "learning_rate": 3.111230268498954e-05, |
| "loss": 0.367, |
| "step": 909 |
| }, |
| { |
| "epoch": 3.074120867551063, |
| "grad_norm": 0.1372418048121636, |
| "learning_rate": 3.1020343654497096e-05, |
| "loss": 0.3588, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.0774899978942933, |
| "grad_norm": 0.13072048530956415, |
| "learning_rate": 3.0928434575766505e-05, |
| "loss": 0.361, |
| "step": 911 |
| }, |
| { |
| "epoch": 3.0808591282375235, |
| "grad_norm": 0.12852995212281998, |
| "learning_rate": 3.083657596006681e-05, |
| "loss": 0.3543, |
| "step": 912 |
| }, |
| { |
| "epoch": 3.0842282585807537, |
| "grad_norm": 0.12589969103284174, |
| "learning_rate": 3.0744768318386346e-05, |
| "loss": 0.3573, |
| "step": 913 |
| }, |
| { |
| "epoch": 3.087597388923984, |
| "grad_norm": 0.1042227599830766, |
| "learning_rate": 3.065301216142991e-05, |
| "loss": 0.3571, |
| "step": 914 |
| }, |
| { |
| "epoch": 3.090966519267214, |
| "grad_norm": 0.12641784465437736, |
| "learning_rate": 3.056130799961587e-05, |
| "loss": 0.361, |
| "step": 915 |
| }, |
| { |
| "epoch": 3.094335649610444, |
| "grad_norm": 0.1189011090318916, |
| "learning_rate": 3.046965634307341e-05, |
| "loss": 0.3653, |
| "step": 916 |
| }, |
| { |
| "epoch": 3.0977047799536743, |
| "grad_norm": 0.11559017960748716, |
| "learning_rate": 3.0378057701639575e-05, |
| "loss": 0.371, |
| "step": 917 |
| }, |
| { |
| "epoch": 3.1010739102969045, |
| "grad_norm": 0.1198695027252497, |
| "learning_rate": 3.028651258485652e-05, |
| "loss": 0.3667, |
| "step": 918 |
| }, |
| { |
| "epoch": 3.1044430406401347, |
| "grad_norm": 0.11196979369755074, |
| "learning_rate": 3.019502150196869e-05, |
| "loss": 0.3575, |
| "step": 919 |
| }, |
| { |
| "epoch": 3.107812170983365, |
| "grad_norm": 0.12042692289106809, |
| "learning_rate": 3.010358496191991e-05, |
| "loss": 0.3618, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.111181301326595, |
| "grad_norm": 0.1238521643735063, |
| "learning_rate": 3.0012203473350616e-05, |
| "loss": 0.3672, |
| "step": 921 |
| }, |
| { |
| "epoch": 3.114550431669825, |
| "grad_norm": 0.11597672612469004, |
| "learning_rate": 2.9920877544595002e-05, |
| "loss": 0.3577, |
| "step": 922 |
| }, |
| { |
| "epoch": 3.1179195620130553, |
| "grad_norm": 0.11363631100554263, |
| "learning_rate": 2.982960768367818e-05, |
| "loss": 0.3637, |
| "step": 923 |
| }, |
| { |
| "epoch": 3.1212886923562855, |
| "grad_norm": 0.12223781700368476, |
| "learning_rate": 2.9738394398313405e-05, |
| "loss": 0.3575, |
| "step": 924 |
| }, |
| { |
| "epoch": 3.1246578226995156, |
| "grad_norm": 0.11310391813366659, |
| "learning_rate": 2.9647238195899168e-05, |
| "loss": 0.3666, |
| "step": 925 |
| }, |
| { |
| "epoch": 3.128026953042746, |
| "grad_norm": 0.12749001851980382, |
| "learning_rate": 2.955613958351647e-05, |
| "loss": 0.3577, |
| "step": 926 |
| }, |
| { |
| "epoch": 3.131396083385976, |
| "grad_norm": 0.11106465012495607, |
| "learning_rate": 2.946509906792593e-05, |
| "loss": 0.3661, |
| "step": 927 |
| }, |
| { |
| "epoch": 3.134765213729206, |
| "grad_norm": 0.13265615613597764, |
| "learning_rate": 2.9374117155564957e-05, |
| "loss": 0.3613, |
| "step": 928 |
| }, |
| { |
| "epoch": 3.1381343440724363, |
| "grad_norm": 0.1062334645184232, |
| "learning_rate": 2.928319435254501e-05, |
| "loss": 0.3601, |
| "step": 929 |
| }, |
| { |
| "epoch": 3.1415034744156665, |
| "grad_norm": 0.13654759521524176, |
| "learning_rate": 2.919233116464872e-05, |
| "loss": 0.357, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.1448726047588966, |
| "grad_norm": 0.12274484555896063, |
| "learning_rate": 2.9101528097327093e-05, |
| "loss": 0.3659, |
| "step": 931 |
| }, |
| { |
| "epoch": 3.148241735102127, |
| "grad_norm": 0.11432950773248603, |
| "learning_rate": 2.9010785655696698e-05, |
| "loss": 0.3638, |
| "step": 932 |
| }, |
| { |
| "epoch": 3.151610865445357, |
| "grad_norm": 0.11354842248203202, |
| "learning_rate": 2.892010434453684e-05, |
| "loss": 0.36, |
| "step": 933 |
| }, |
| { |
| "epoch": 3.154979995788587, |
| "grad_norm": 0.12098639250864718, |
| "learning_rate": 2.88294846682868e-05, |
| "loss": 0.3591, |
| "step": 934 |
| }, |
| { |
| "epoch": 3.1583491261318173, |
| "grad_norm": 0.11027079481756498, |
| "learning_rate": 2.873892713104298e-05, |
| "loss": 0.3595, |
| "step": 935 |
| }, |
| { |
| "epoch": 3.1617182564750475, |
| "grad_norm": 0.12568594872253705, |
| "learning_rate": 2.864843223655613e-05, |
| "loss": 0.3678, |
| "step": 936 |
| }, |
| { |
| "epoch": 3.1650873868182776, |
| "grad_norm": 0.11667961571614835, |
| "learning_rate": 2.855800048822852e-05, |
| "loss": 0.3608, |
| "step": 937 |
| }, |
| { |
| "epoch": 3.168456517161508, |
| "grad_norm": 0.11058294572640527, |
| "learning_rate": 2.8467632389111126e-05, |
| "loss": 0.3683, |
| "step": 938 |
| }, |
| { |
| "epoch": 3.171825647504738, |
| "grad_norm": 0.1187950796415824, |
| "learning_rate": 2.837732844190094e-05, |
| "loss": 0.3644, |
| "step": 939 |
| }, |
| { |
| "epoch": 3.175194777847968, |
| "grad_norm": 0.10656085663558766, |
| "learning_rate": 2.828708914893799e-05, |
| "loss": 0.3671, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.1785639081911983, |
| "grad_norm": 0.10817099139196962, |
| "learning_rate": 2.8196915012202728e-05, |
| "loss": 0.3672, |
| "step": 941 |
| }, |
| { |
| "epoch": 3.1819330385344284, |
| "grad_norm": 0.10075876050195509, |
| "learning_rate": 2.8106806533313106e-05, |
| "loss": 0.3631, |
| "step": 942 |
| }, |
| { |
| "epoch": 3.1853021688776586, |
| "grad_norm": 0.11551691063136907, |
| "learning_rate": 2.8016764213521875e-05, |
| "loss": 0.3608, |
| "step": 943 |
| }, |
| { |
| "epoch": 3.1886712992208888, |
| "grad_norm": 0.10092150997385874, |
| "learning_rate": 2.7926788553713734e-05, |
| "loss": 0.3652, |
| "step": 944 |
| }, |
| { |
| "epoch": 3.192040429564119, |
| "grad_norm": 0.11020311291162539, |
| "learning_rate": 2.783688005440256e-05, |
| "loss": 0.3656, |
| "step": 945 |
| }, |
| { |
| "epoch": 3.195409559907349, |
| "grad_norm": 0.10850184905841719, |
| "learning_rate": 2.7747039215728667e-05, |
| "loss": 0.3648, |
| "step": 946 |
| }, |
| { |
| "epoch": 3.1987786902505793, |
| "grad_norm": 0.10954311066114457, |
| "learning_rate": 2.7657266537455938e-05, |
| "loss": 0.3651, |
| "step": 947 |
| }, |
| { |
| "epoch": 3.2021478205938094, |
| "grad_norm": 0.10365234676829252, |
| "learning_rate": 2.7567562518969155e-05, |
| "loss": 0.3533, |
| "step": 948 |
| }, |
| { |
| "epoch": 3.2055169509370396, |
| "grad_norm": 0.10204242463146666, |
| "learning_rate": 2.7477927659271117e-05, |
| "loss": 0.3622, |
| "step": 949 |
| }, |
| { |
| "epoch": 3.2088860812802693, |
| "grad_norm": 0.10799341793471445, |
| "learning_rate": 2.7388362456979906e-05, |
| "loss": 0.3625, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.2122552116235, |
| "grad_norm": 0.11115544373524708, |
| "learning_rate": 2.7298867410326155e-05, |
| "loss": 0.3629, |
| "step": 951 |
| }, |
| { |
| "epoch": 3.2156243419667296, |
| "grad_norm": 0.10949003369065348, |
| "learning_rate": 2.7209443017150193e-05, |
| "loss": 0.3635, |
| "step": 952 |
| }, |
| { |
| "epoch": 3.21899347230996, |
| "grad_norm": 0.10963161775177817, |
| "learning_rate": 2.712008977489936e-05, |
| "loss": 0.3594, |
| "step": 953 |
| }, |
| { |
| "epoch": 3.22236260265319, |
| "grad_norm": 0.11805544027584379, |
| "learning_rate": 2.703080818062517e-05, |
| "loss": 0.3635, |
| "step": 954 |
| }, |
| { |
| "epoch": 3.22573173299642, |
| "grad_norm": 0.10196046146217858, |
| "learning_rate": 2.694159873098058e-05, |
| "loss": 0.3626, |
| "step": 955 |
| }, |
| { |
| "epoch": 3.2291008633396503, |
| "grad_norm": 0.1120026689331707, |
| "learning_rate": 2.6852461922217253e-05, |
| "loss": 0.3649, |
| "step": 956 |
| }, |
| { |
| "epoch": 3.2324699936828805, |
| "grad_norm": 0.10926346301227147, |
| "learning_rate": 2.6763398250182714e-05, |
| "loss": 0.3579, |
| "step": 957 |
| }, |
| { |
| "epoch": 3.2358391240261106, |
| "grad_norm": 0.10913175373351278, |
| "learning_rate": 2.66744082103177e-05, |
| "loss": 0.3639, |
| "step": 958 |
| }, |
| { |
| "epoch": 3.239208254369341, |
| "grad_norm": 0.10485736112258066, |
| "learning_rate": 2.658549229765332e-05, |
| "loss": 0.3592, |
| "step": 959 |
| }, |
| { |
| "epoch": 3.242577384712571, |
| "grad_norm": 0.12101416878728995, |
| "learning_rate": 2.6496651006808308e-05, |
| "loss": 0.3574, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.245946515055801, |
| "grad_norm": 0.1071236277697119, |
| "learning_rate": 2.6407884831986367e-05, |
| "loss": 0.3627, |
| "step": 961 |
| }, |
| { |
| "epoch": 3.2493156453990313, |
| "grad_norm": 0.11778875174805165, |
| "learning_rate": 2.6319194266973256e-05, |
| "loss": 0.365, |
| "step": 962 |
| }, |
| { |
| "epoch": 3.2526847757422614, |
| "grad_norm": 0.12437906053481307, |
| "learning_rate": 2.6230579805134203e-05, |
| "loss": 0.3582, |
| "step": 963 |
| }, |
| { |
| "epoch": 3.2560539060854916, |
| "grad_norm": 0.11016391828701566, |
| "learning_rate": 2.614204193941107e-05, |
| "loss": 0.3628, |
| "step": 964 |
| }, |
| { |
| "epoch": 3.2594230364287218, |
| "grad_norm": 0.131288542140626, |
| "learning_rate": 2.6053581162319606e-05, |
| "loss": 0.3634, |
| "step": 965 |
| }, |
| { |
| "epoch": 3.262792166771952, |
| "grad_norm": 0.10515921544500577, |
| "learning_rate": 2.5965197965946783e-05, |
| "loss": 0.3649, |
| "step": 966 |
| }, |
| { |
| "epoch": 3.266161297115182, |
| "grad_norm": 0.12739731098762894, |
| "learning_rate": 2.587689284194797e-05, |
| "loss": 0.3703, |
| "step": 967 |
| }, |
| { |
| "epoch": 3.2695304274584123, |
| "grad_norm": 0.10406377203116793, |
| "learning_rate": 2.5788666281544258e-05, |
| "loss": 0.3657, |
| "step": 968 |
| }, |
| { |
| "epoch": 3.2728995578016424, |
| "grad_norm": 0.11191689402983139, |
| "learning_rate": 2.5700518775519702e-05, |
| "loss": 0.359, |
| "step": 969 |
| }, |
| { |
| "epoch": 3.2762686881448726, |
| "grad_norm": 0.10680144927044027, |
| "learning_rate": 2.561245081421857e-05, |
| "loss": 0.3604, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.2796378184881028, |
| "grad_norm": 0.11505057898142523, |
| "learning_rate": 2.5524462887542703e-05, |
| "loss": 0.3599, |
| "step": 971 |
| }, |
| { |
| "epoch": 3.283006948831333, |
| "grad_norm": 0.10674300454641518, |
| "learning_rate": 2.5436555484948643e-05, |
| "loss": 0.3625, |
| "step": 972 |
| }, |
| { |
| "epoch": 3.286376079174563, |
| "grad_norm": 0.10772282874724956, |
| "learning_rate": 2.534872909544509e-05, |
| "loss": 0.3586, |
| "step": 973 |
| }, |
| { |
| "epoch": 3.2897452095177933, |
| "grad_norm": 0.11061913724144044, |
| "learning_rate": 2.5260984207590015e-05, |
| "loss": 0.3695, |
| "step": 974 |
| }, |
| { |
| "epoch": 3.2931143398610234, |
| "grad_norm": 0.11314868048581533, |
| "learning_rate": 2.517332130948802e-05, |
| "loss": 0.3597, |
| "step": 975 |
| }, |
| { |
| "epoch": 3.2964834702042536, |
| "grad_norm": 0.10483488263899578, |
| "learning_rate": 2.5085740888787662e-05, |
| "loss": 0.3583, |
| "step": 976 |
| }, |
| { |
| "epoch": 3.2998526005474837, |
| "grad_norm": 0.10912778564330813, |
| "learning_rate": 2.4998243432678644e-05, |
| "loss": 0.3601, |
| "step": 977 |
| }, |
| { |
| "epoch": 3.303221730890714, |
| "grad_norm": 0.11466754101476578, |
| "learning_rate": 2.4910829427889205e-05, |
| "loss": 0.3643, |
| "step": 978 |
| }, |
| { |
| "epoch": 3.306590861233944, |
| "grad_norm": 0.10733537636590312, |
| "learning_rate": 2.4823499360683333e-05, |
| "loss": 0.3651, |
| "step": 979 |
| }, |
| { |
| "epoch": 3.3099599915771742, |
| "grad_norm": 0.1161393261879057, |
| "learning_rate": 2.473625371685806e-05, |
| "loss": 0.3599, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.3133291219204044, |
| "grad_norm": 0.0982571093572832, |
| "learning_rate": 2.464909298174088e-05, |
| "loss": 0.3526, |
| "step": 981 |
| }, |
| { |
| "epoch": 3.3166982522636346, |
| "grad_norm": 0.1100159657444912, |
| "learning_rate": 2.4562017640186847e-05, |
| "loss": 0.3626, |
| "step": 982 |
| }, |
| { |
| "epoch": 3.3200673826068647, |
| "grad_norm": 0.09926349760672294, |
| "learning_rate": 2.4475028176576102e-05, |
| "loss": 0.3677, |
| "step": 983 |
| }, |
| { |
| "epoch": 3.323436512950095, |
| "grad_norm": 0.12050759797842048, |
| "learning_rate": 2.4388125074810986e-05, |
| "loss": 0.359, |
| "step": 984 |
| }, |
| { |
| "epoch": 3.326805643293325, |
| "grad_norm": 0.09987805749588798, |
| "learning_rate": 2.430130881831345e-05, |
| "loss": 0.3618, |
| "step": 985 |
| }, |
| { |
| "epoch": 3.3301747736365552, |
| "grad_norm": 0.1091783241310202, |
| "learning_rate": 2.4214579890022373e-05, |
| "loss": 0.3696, |
| "step": 986 |
| }, |
| { |
| "epoch": 3.3335439039797854, |
| "grad_norm": 0.10898707191962656, |
| "learning_rate": 2.41279387723908e-05, |
| "loss": 0.3638, |
| "step": 987 |
| }, |
| { |
| "epoch": 3.3369130343230156, |
| "grad_norm": 0.10558034784682291, |
| "learning_rate": 2.404138594738335e-05, |
| "loss": 0.357, |
| "step": 988 |
| }, |
| { |
| "epoch": 3.3402821646662453, |
| "grad_norm": 0.10689449489731055, |
| "learning_rate": 2.395492189647347e-05, |
| "loss": 0.3594, |
| "step": 989 |
| }, |
| { |
| "epoch": 3.343651295009476, |
| "grad_norm": 0.11118497131539316, |
| "learning_rate": 2.386854710064075e-05, |
| "loss": 0.3542, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.3470204253527056, |
| "grad_norm": 0.10782085280238568, |
| "learning_rate": 2.3782262040368344e-05, |
| "loss": 0.3608, |
| "step": 991 |
| }, |
| { |
| "epoch": 3.350389555695936, |
| "grad_norm": 0.10697566924440428, |
| "learning_rate": 2.369606719564015e-05, |
| "loss": 0.3551, |
| "step": 992 |
| }, |
| { |
| "epoch": 3.353758686039166, |
| "grad_norm": 0.09605638199170409, |
| "learning_rate": 2.3609963045938288e-05, |
| "loss": 0.3618, |
| "step": 993 |
| }, |
| { |
| "epoch": 3.357127816382396, |
| "grad_norm": 0.10827169360976367, |
| "learning_rate": 2.35239500702403e-05, |
| "loss": 0.3565, |
| "step": 994 |
| }, |
| { |
| "epoch": 3.3604969467256263, |
| "grad_norm": 0.10198375263244171, |
| "learning_rate": 2.3438028747016586e-05, |
| "loss": 0.3626, |
| "step": 995 |
| }, |
| { |
| "epoch": 3.3638660770688564, |
| "grad_norm": 0.1159958447674676, |
| "learning_rate": 2.3352199554227698e-05, |
| "loss": 0.3629, |
| "step": 996 |
| }, |
| { |
| "epoch": 3.3672352074120866, |
| "grad_norm": 0.10457139377595129, |
| "learning_rate": 2.326646296932168e-05, |
| "loss": 0.3638, |
| "step": 997 |
| }, |
| { |
| "epoch": 3.3706043377553168, |
| "grad_norm": 0.10333006497152411, |
| "learning_rate": 2.318081946923144e-05, |
| "loss": 0.3612, |
| "step": 998 |
| }, |
| { |
| "epoch": 3.373973468098547, |
| "grad_norm": 0.10461115888151253, |
| "learning_rate": 2.3095269530372032e-05, |
| "loss": 0.362, |
| "step": 999 |
| }, |
| { |
| "epoch": 3.377342598441777, |
| "grad_norm": 0.10087292499347122, |
| "learning_rate": 2.3009813628638085e-05, |
| "loss": 0.3603, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.3807117287850073, |
| "grad_norm": 0.09894098741998586, |
| "learning_rate": 2.2924452239401153e-05, |
| "loss": 0.3635, |
| "step": 1001 |
| }, |
| { |
| "epoch": 3.3840808591282374, |
| "grad_norm": 0.10636129988239897, |
| "learning_rate": 2.283918583750695e-05, |
| "loss": 0.3589, |
| "step": 1002 |
| }, |
| { |
| "epoch": 3.3874499894714676, |
| "grad_norm": 0.1087735124770059, |
| "learning_rate": 2.2754014897272868e-05, |
| "loss": 0.3603, |
| "step": 1003 |
| }, |
| { |
| "epoch": 3.3908191198146977, |
| "grad_norm": 0.1045786633320159, |
| "learning_rate": 2.266893989248527e-05, |
| "loss": 0.3634, |
| "step": 1004 |
| }, |
| { |
| "epoch": 3.394188250157928, |
| "grad_norm": 0.10630134016191294, |
| "learning_rate": 2.258396129639679e-05, |
| "loss": 0.3626, |
| "step": 1005 |
| }, |
| { |
| "epoch": 3.397557380501158, |
| "grad_norm": 0.10814614823364664, |
| "learning_rate": 2.2499079581723846e-05, |
| "loss": 0.3682, |
| "step": 1006 |
| }, |
| { |
| "epoch": 3.4009265108443882, |
| "grad_norm": 0.10249665362012134, |
| "learning_rate": 2.2414295220643822e-05, |
| "loss": 0.361, |
| "step": 1007 |
| }, |
| { |
| "epoch": 3.4042956411876184, |
| "grad_norm": 0.10378027402659071, |
| "learning_rate": 2.2329608684792676e-05, |
| "loss": 0.3606, |
| "step": 1008 |
| }, |
| { |
| "epoch": 3.4076647715308486, |
| "grad_norm": 0.10027376191210695, |
| "learning_rate": 2.22450204452621e-05, |
| "loss": 0.3608, |
| "step": 1009 |
| }, |
| { |
| "epoch": 3.4110339018740787, |
| "grad_norm": 0.10689722485945972, |
| "learning_rate": 2.216053097259697e-05, |
| "loss": 0.3706, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.414403032217309, |
| "grad_norm": 0.10357939152860053, |
| "learning_rate": 2.2076140736792805e-05, |
| "loss": 0.3623, |
| "step": 1011 |
| }, |
| { |
| "epoch": 3.417772162560539, |
| "grad_norm": 0.0902315706129379, |
| "learning_rate": 2.1991850207293064e-05, |
| "loss": 0.3596, |
| "step": 1012 |
| }, |
| { |
| "epoch": 3.4211412929037692, |
| "grad_norm": 0.10842563552035595, |
| "learning_rate": 2.1907659852986588e-05, |
| "loss": 0.3637, |
| "step": 1013 |
| }, |
| { |
| "epoch": 3.4245104232469994, |
| "grad_norm": 0.09666903812158173, |
| "learning_rate": 2.1823570142204902e-05, |
| "loss": 0.3624, |
| "step": 1014 |
| }, |
| { |
| "epoch": 3.4278795535902296, |
| "grad_norm": 0.100083090000888, |
| "learning_rate": 2.1739581542719748e-05, |
| "loss": 0.3624, |
| "step": 1015 |
| }, |
| { |
| "epoch": 3.4312486839334597, |
| "grad_norm": 0.10755809720758686, |
| "learning_rate": 2.1655694521740376e-05, |
| "loss": 0.3624, |
| "step": 1016 |
| }, |
| { |
| "epoch": 3.43461781427669, |
| "grad_norm": 0.1024231010803628, |
| "learning_rate": 2.1571909545910953e-05, |
| "loss": 0.3621, |
| "step": 1017 |
| }, |
| { |
| "epoch": 3.43798694461992, |
| "grad_norm": 0.10562299735859218, |
| "learning_rate": 2.1488227081308054e-05, |
| "loss": 0.3626, |
| "step": 1018 |
| }, |
| { |
| "epoch": 3.44135607496315, |
| "grad_norm": 0.0993759886031881, |
| "learning_rate": 2.140464759343794e-05, |
| "loss": 0.3654, |
| "step": 1019 |
| }, |
| { |
| "epoch": 3.4447252053063804, |
| "grad_norm": 0.09933521966725083, |
| "learning_rate": 2.132117154723408e-05, |
| "loss": 0.356, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.4480943356496105, |
| "grad_norm": 0.09953034686873165, |
| "learning_rate": 2.123779940705453e-05, |
| "loss": 0.366, |
| "step": 1021 |
| }, |
| { |
| "epoch": 3.4514634659928407, |
| "grad_norm": 0.10175075266526791, |
| "learning_rate": 2.115453163667929e-05, |
| "loss": 0.3583, |
| "step": 1022 |
| }, |
| { |
| "epoch": 3.454832596336071, |
| "grad_norm": 0.09594990983302608, |
| "learning_rate": 2.1071368699307818e-05, |
| "loss": 0.3584, |
| "step": 1023 |
| }, |
| { |
| "epoch": 3.458201726679301, |
| "grad_norm": 0.10219150476255269, |
| "learning_rate": 2.0988311057556397e-05, |
| "loss": 0.3597, |
| "step": 1024 |
| }, |
| { |
| "epoch": 3.461570857022531, |
| "grad_norm": 0.09691112693809913, |
| "learning_rate": 2.0905359173455593e-05, |
| "loss": 0.3621, |
| "step": 1025 |
| }, |
| { |
| "epoch": 3.4649399873657614, |
| "grad_norm": 0.09661009238935536, |
| "learning_rate": 2.0822513508447608e-05, |
| "loss": 0.3567, |
| "step": 1026 |
| }, |
| { |
| "epoch": 3.4683091177089915, |
| "grad_norm": 0.09590582546596066, |
| "learning_rate": 2.073977452338384e-05, |
| "loss": 0.3646, |
| "step": 1027 |
| }, |
| { |
| "epoch": 3.4716782480522217, |
| "grad_norm": 0.09606588648905236, |
| "learning_rate": 2.065714267852223e-05, |
| "loss": 0.3641, |
| "step": 1028 |
| }, |
| { |
| "epoch": 3.475047378395452, |
| "grad_norm": 0.10295819559523817, |
| "learning_rate": 2.057461843352469e-05, |
| "loss": 0.3557, |
| "step": 1029 |
| }, |
| { |
| "epoch": 3.4784165087386816, |
| "grad_norm": 0.09150758299366415, |
| "learning_rate": 2.049220224745463e-05, |
| "loss": 0.3636, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.481785639081912, |
| "grad_norm": 0.10198222794968945, |
| "learning_rate": 2.0409894578774302e-05, |
| "loss": 0.3642, |
| "step": 1031 |
| }, |
| { |
| "epoch": 3.485154769425142, |
| "grad_norm": 0.09986839616807734, |
| "learning_rate": 2.032769588534233e-05, |
| "loss": 0.3673, |
| "step": 1032 |
| }, |
| { |
| "epoch": 3.4885238997683725, |
| "grad_norm": 0.09939024454656914, |
| "learning_rate": 2.0245606624411165e-05, |
| "loss": 0.3591, |
| "step": 1033 |
| }, |
| { |
| "epoch": 3.4918930301116022, |
| "grad_norm": 0.09144469769761462, |
| "learning_rate": 2.0163627252624427e-05, |
| "loss": 0.3683, |
| "step": 1034 |
| }, |
| { |
| "epoch": 3.495262160454833, |
| "grad_norm": 0.09038126728850328, |
| "learning_rate": 2.0081758226014516e-05, |
| "loss": 0.3585, |
| "step": 1035 |
| }, |
| { |
| "epoch": 3.4986312907980626, |
| "grad_norm": 0.09791848188862595, |
| "learning_rate": 2.0000000000000012e-05, |
| "loss": 0.3633, |
| "step": 1036 |
| }, |
| { |
| "epoch": 3.502000421141293, |
| "grad_norm": 0.09451335114654308, |
| "learning_rate": 1.9918353029383065e-05, |
| "loss": 0.3563, |
| "step": 1037 |
| }, |
| { |
| "epoch": 3.505369551484523, |
| "grad_norm": 0.09766694825632033, |
| "learning_rate": 1.9836817768347015e-05, |
| "loss": 0.3634, |
| "step": 1038 |
| }, |
| { |
| "epoch": 3.508738681827753, |
| "grad_norm": 0.09523718569743331, |
| "learning_rate": 1.9755394670453745e-05, |
| "loss": 0.364, |
| "step": 1039 |
| }, |
| { |
| "epoch": 3.5121078121709832, |
| "grad_norm": 0.09716008155147098, |
| "learning_rate": 1.9674084188641235e-05, |
| "loss": 0.3614, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.5154769425142134, |
| "grad_norm": 0.09307186314834033, |
| "learning_rate": 1.9592886775220957e-05, |
| "loss": 0.3663, |
| "step": 1041 |
| }, |
| { |
| "epoch": 3.5188460728574436, |
| "grad_norm": 0.0966569916505279, |
| "learning_rate": 1.9511802881875438e-05, |
| "loss": 0.3628, |
| "step": 1042 |
| }, |
| { |
| "epoch": 3.5222152032006737, |
| "grad_norm": 0.09953663178152124, |
| "learning_rate": 1.943083295965572e-05, |
| "loss": 0.3653, |
| "step": 1043 |
| }, |
| { |
| "epoch": 3.525584333543904, |
| "grad_norm": 0.09000177069317349, |
| "learning_rate": 1.9349977458978846e-05, |
| "loss": 0.357, |
| "step": 1044 |
| }, |
| { |
| "epoch": 3.528953463887134, |
| "grad_norm": 0.09693735378111683, |
| "learning_rate": 1.9269236829625387e-05, |
| "loss": 0.3623, |
| "step": 1045 |
| }, |
| { |
| "epoch": 3.532322594230364, |
| "grad_norm": 0.1010678964013295, |
| "learning_rate": 1.9188611520736846e-05, |
| "loss": 0.3631, |
| "step": 1046 |
| }, |
| { |
| "epoch": 3.5356917245735944, |
| "grad_norm": 0.08709082546898574, |
| "learning_rate": 1.9108101980813277e-05, |
| "loss": 0.3559, |
| "step": 1047 |
| }, |
| { |
| "epoch": 3.5390608549168245, |
| "grad_norm": 0.09973595422763583, |
| "learning_rate": 1.902770865771074e-05, |
| "loss": 0.3572, |
| "step": 1048 |
| }, |
| { |
| "epoch": 3.5424299852600547, |
| "grad_norm": 0.0932062947908472, |
| "learning_rate": 1.8947431998638762e-05, |
| "loss": 0.3703, |
| "step": 1049 |
| }, |
| { |
| "epoch": 3.545799115603285, |
| "grad_norm": 0.0927283626151012, |
| "learning_rate": 1.886727245015794e-05, |
| "loss": 0.3604, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.549168245946515, |
| "grad_norm": 0.0899928028286008, |
| "learning_rate": 1.8787230458177408e-05, |
| "loss": 0.3596, |
| "step": 1051 |
| }, |
| { |
| "epoch": 3.552537376289745, |
| "grad_norm": 0.09291563797483152, |
| "learning_rate": 1.8707306467952323e-05, |
| "loss": 0.3602, |
| "step": 1052 |
| }, |
| { |
| "epoch": 3.5559065066329754, |
| "grad_norm": 0.0873067226862915, |
| "learning_rate": 1.862750092408147e-05, |
| "loss": 0.3632, |
| "step": 1053 |
| }, |
| { |
| "epoch": 3.5592756369762055, |
| "grad_norm": 0.09201291502685034, |
| "learning_rate": 1.8547814270504705e-05, |
| "loss": 0.3665, |
| "step": 1054 |
| }, |
| { |
| "epoch": 3.5626447673194357, |
| "grad_norm": 0.08672862490756593, |
| "learning_rate": 1.8468246950500556e-05, |
| "loss": 0.3595, |
| "step": 1055 |
| }, |
| { |
| "epoch": 3.566013897662666, |
| "grad_norm": 0.08745897822977576, |
| "learning_rate": 1.838879940668373e-05, |
| "loss": 0.3605, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.569383028005896, |
| "grad_norm": 0.09233748547600358, |
| "learning_rate": 1.83094720810026e-05, |
| "loss": 0.36, |
| "step": 1057 |
| }, |
| { |
| "epoch": 3.572752158349126, |
| "grad_norm": 0.09364157413857832, |
| "learning_rate": 1.823026541473684e-05, |
| "loss": 0.3642, |
| "step": 1058 |
| }, |
| { |
| "epoch": 3.5761212886923563, |
| "grad_norm": 0.0919710962105762, |
| "learning_rate": 1.8151179848494905e-05, |
| "loss": 0.3629, |
| "step": 1059 |
| }, |
| { |
| "epoch": 3.5794904190355865, |
| "grad_norm": 0.09562060747735483, |
| "learning_rate": 1.8072215822211613e-05, |
| "loss": 0.3623, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.5828595493788167, |
| "grad_norm": 0.08937202924753714, |
| "learning_rate": 1.7993373775145663e-05, |
| "loss": 0.3608, |
| "step": 1061 |
| }, |
| { |
| "epoch": 3.586228679722047, |
| "grad_norm": 0.09294261879238143, |
| "learning_rate": 1.7914654145877187e-05, |
| "loss": 0.3605, |
| "step": 1062 |
| }, |
| { |
| "epoch": 3.589597810065277, |
| "grad_norm": 0.08234694139654683, |
| "learning_rate": 1.7836057372305423e-05, |
| "loss": 0.3628, |
| "step": 1063 |
| }, |
| { |
| "epoch": 3.592966940408507, |
| "grad_norm": 0.09244388782945556, |
| "learning_rate": 1.77575838916461e-05, |
| "loss": 0.3584, |
| "step": 1064 |
| }, |
| { |
| "epoch": 3.5963360707517373, |
| "grad_norm": 0.0993638664940156, |
| "learning_rate": 1.767923414042915e-05, |
| "loss": 0.3614, |
| "step": 1065 |
| }, |
| { |
| "epoch": 3.5997052010949675, |
| "grad_norm": 0.08789579064875053, |
| "learning_rate": 1.760100855449619e-05, |
| "loss": 0.3603, |
| "step": 1066 |
| }, |
| { |
| "epoch": 3.6030743314381977, |
| "grad_norm": 0.10411154322718334, |
| "learning_rate": 1.752290756899816e-05, |
| "loss": 0.3624, |
| "step": 1067 |
| }, |
| { |
| "epoch": 3.606443461781428, |
| "grad_norm": 0.08047024019249209, |
| "learning_rate": 1.7444931618392894e-05, |
| "loss": 0.3585, |
| "step": 1068 |
| }, |
| { |
| "epoch": 3.6098125921246575, |
| "grad_norm": 0.10614805370086325, |
| "learning_rate": 1.736708113644262e-05, |
| "loss": 0.363, |
| "step": 1069 |
| }, |
| { |
| "epoch": 3.613181722467888, |
| "grad_norm": 0.08611033060017922, |
| "learning_rate": 1.7289356556211687e-05, |
| "loss": 0.3637, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.616550852811118, |
| "grad_norm": 0.0873884543774505, |
| "learning_rate": 1.7211758310064042e-05, |
| "loss": 0.3578, |
| "step": 1071 |
| }, |
| { |
| "epoch": 3.6199199831543485, |
| "grad_norm": 0.09185990558523179, |
| "learning_rate": 1.7134286829660855e-05, |
| "loss": 0.3677, |
| "step": 1072 |
| }, |
| { |
| "epoch": 3.623289113497578, |
| "grad_norm": 0.08742862884585491, |
| "learning_rate": 1.7056942545958167e-05, |
| "loss": 0.3657, |
| "step": 1073 |
| }, |
| { |
| "epoch": 3.626658243840809, |
| "grad_norm": 0.08793012887671753, |
| "learning_rate": 1.697972588920439e-05, |
| "loss": 0.3655, |
| "step": 1074 |
| }, |
| { |
| "epoch": 3.6300273741840385, |
| "grad_norm": 0.09014218865360733, |
| "learning_rate": 1.6902637288938074e-05, |
| "loss": 0.364, |
| "step": 1075 |
| }, |
| { |
| "epoch": 3.633396504527269, |
| "grad_norm": 0.08892601725042051, |
| "learning_rate": 1.6825677173985332e-05, |
| "loss": 0.3665, |
| "step": 1076 |
| }, |
| { |
| "epoch": 3.636765634870499, |
| "grad_norm": 0.0878924041737089, |
| "learning_rate": 1.6748845972457562e-05, |
| "loss": 0.3563, |
| "step": 1077 |
| }, |
| { |
| "epoch": 3.6401347652137295, |
| "grad_norm": 0.09417513459021953, |
| "learning_rate": 1.6672144111749066e-05, |
| "loss": 0.3657, |
| "step": 1078 |
| }, |
| { |
| "epoch": 3.643503895556959, |
| "grad_norm": 0.09041822306873473, |
| "learning_rate": 1.659557201853465e-05, |
| "loss": 0.3687, |
| "step": 1079 |
| }, |
| { |
| "epoch": 3.6468730259001894, |
| "grad_norm": 0.08690354592106783, |
| "learning_rate": 1.6519130118767258e-05, |
| "loss": 0.3601, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.6502421562434195, |
| "grad_norm": 0.08875815871506505, |
| "learning_rate": 1.6442818837675578e-05, |
| "loss": 0.3602, |
| "step": 1081 |
| }, |
| { |
| "epoch": 3.6536112865866497, |
| "grad_norm": 0.08649489906143072, |
| "learning_rate": 1.6366638599761676e-05, |
| "loss": 0.362, |
| "step": 1082 |
| }, |
| { |
| "epoch": 3.65698041692988, |
| "grad_norm": 0.08572937873824316, |
| "learning_rate": 1.6290589828798736e-05, |
| "loss": 0.3614, |
| "step": 1083 |
| }, |
| { |
| "epoch": 3.66034954727311, |
| "grad_norm": 0.09798247509524252, |
| "learning_rate": 1.621467294782854e-05, |
| "loss": 0.3608, |
| "step": 1084 |
| }, |
| { |
| "epoch": 3.66371867761634, |
| "grad_norm": 0.08586977784228111, |
| "learning_rate": 1.6138888379159238e-05, |
| "loss": 0.3602, |
| "step": 1085 |
| }, |
| { |
| "epoch": 3.6670878079595703, |
| "grad_norm": 0.09119393564711122, |
| "learning_rate": 1.606323654436293e-05, |
| "loss": 0.3641, |
| "step": 1086 |
| }, |
| { |
| "epoch": 3.6704569383028005, |
| "grad_norm": 0.09035766592284558, |
| "learning_rate": 1.5987717864273377e-05, |
| "loss": 0.366, |
| "step": 1087 |
| }, |
| { |
| "epoch": 3.6738260686460307, |
| "grad_norm": 0.08837128914166983, |
| "learning_rate": 1.591233275898363e-05, |
| "loss": 0.3621, |
| "step": 1088 |
| }, |
| { |
| "epoch": 3.677195198989261, |
| "grad_norm": 0.09501671591225473, |
| "learning_rate": 1.5837081647843652e-05, |
| "loss": 0.3655, |
| "step": 1089 |
| }, |
| { |
| "epoch": 3.680564329332491, |
| "grad_norm": 0.08827549191913663, |
| "learning_rate": 1.5761964949458076e-05, |
| "loss": 0.3664, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.683933459675721, |
| "grad_norm": 0.08653211178416792, |
| "learning_rate": 1.5686983081683816e-05, |
| "loss": 0.3613, |
| "step": 1091 |
| }, |
| { |
| "epoch": 3.6873025900189513, |
| "grad_norm": 0.09007377723059057, |
| "learning_rate": 1.5612136461627726e-05, |
| "loss": 0.3605, |
| "step": 1092 |
| }, |
| { |
| "epoch": 3.6906717203621815, |
| "grad_norm": 0.08875691955803114, |
| "learning_rate": 1.5537425505644358e-05, |
| "loss": 0.3692, |
| "step": 1093 |
| }, |
| { |
| "epoch": 3.6940408507054117, |
| "grad_norm": 0.0865487137501953, |
| "learning_rate": 1.546285062933352e-05, |
| "loss": 0.3637, |
| "step": 1094 |
| }, |
| { |
| "epoch": 3.697409981048642, |
| "grad_norm": 0.08441316995604657, |
| "learning_rate": 1.5388412247538148e-05, |
| "loss": 0.3566, |
| "step": 1095 |
| }, |
| { |
| "epoch": 3.700779111391872, |
| "grad_norm": 0.08795109300895403, |
| "learning_rate": 1.5314110774341803e-05, |
| "loss": 0.3649, |
| "step": 1096 |
| }, |
| { |
| "epoch": 3.704148241735102, |
| "grad_norm": 0.08660466649366423, |
| "learning_rate": 1.5239946623066466e-05, |
| "loss": 0.3656, |
| "step": 1097 |
| }, |
| { |
| "epoch": 3.7075173720783323, |
| "grad_norm": 0.08962594968540999, |
| "learning_rate": 1.5165920206270257e-05, |
| "loss": 0.3578, |
| "step": 1098 |
| }, |
| { |
| "epoch": 3.7108865024215625, |
| "grad_norm": 0.08885368968596062, |
| "learning_rate": 1.5092031935745102e-05, |
| "loss": 0.362, |
| "step": 1099 |
| }, |
| { |
| "epoch": 3.7142556327647926, |
| "grad_norm": 0.0928847110975983, |
| "learning_rate": 1.5018282222514451e-05, |
| "loss": 0.3673, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.717624763108023, |
| "grad_norm": 0.08764355173587007, |
| "learning_rate": 1.4944671476830967e-05, |
| "loss": 0.3559, |
| "step": 1101 |
| }, |
| { |
| "epoch": 3.720993893451253, |
| "grad_norm": 0.08858832328866184, |
| "learning_rate": 1.4871200108174306e-05, |
| "loss": 0.3621, |
| "step": 1102 |
| }, |
| { |
| "epoch": 3.724363023794483, |
| "grad_norm": 0.08763675374855749, |
| "learning_rate": 1.479786852524879e-05, |
| "loss": 0.3588, |
| "step": 1103 |
| }, |
| { |
| "epoch": 3.7277321541377133, |
| "grad_norm": 0.08909210540103846, |
| "learning_rate": 1.4724677135981118e-05, |
| "loss": 0.3625, |
| "step": 1104 |
| }, |
| { |
| "epoch": 3.7311012844809435, |
| "grad_norm": 0.08178768871920535, |
| "learning_rate": 1.4651626347518169e-05, |
| "loss": 0.3621, |
| "step": 1105 |
| }, |
| { |
| "epoch": 3.7344704148241736, |
| "grad_norm": 0.09156080753526148, |
| "learning_rate": 1.457871656622463e-05, |
| "loss": 0.359, |
| "step": 1106 |
| }, |
| { |
| "epoch": 3.737839545167404, |
| "grad_norm": 0.08325843071720376, |
| "learning_rate": 1.4505948197680892e-05, |
| "loss": 0.3607, |
| "step": 1107 |
| }, |
| { |
| "epoch": 3.741208675510634, |
| "grad_norm": 0.08448426244553969, |
| "learning_rate": 1.4433321646680614e-05, |
| "loss": 0.3648, |
| "step": 1108 |
| }, |
| { |
| "epoch": 3.744577805853864, |
| "grad_norm": 0.08086940191673836, |
| "learning_rate": 1.4360837317228571e-05, |
| "loss": 0.3588, |
| "step": 1109 |
| }, |
| { |
| "epoch": 3.747946936197094, |
| "grad_norm": 0.08083156500148386, |
| "learning_rate": 1.4288495612538427e-05, |
| "loss": 0.3571, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.7513160665403245, |
| "grad_norm": 0.08098967397738577, |
| "learning_rate": 1.4216296935030433e-05, |
| "loss": 0.3661, |
| "step": 1111 |
| }, |
| { |
| "epoch": 3.754685196883554, |
| "grad_norm": 0.08420676698532144, |
| "learning_rate": 1.4144241686329236e-05, |
| "loss": 0.3667, |
| "step": 1112 |
| }, |
| { |
| "epoch": 3.758054327226785, |
| "grad_norm": 0.08205757559067017, |
| "learning_rate": 1.4072330267261585e-05, |
| "loss": 0.3538, |
| "step": 1113 |
| }, |
| { |
| "epoch": 3.7614234575700145, |
| "grad_norm": 0.08130986735808181, |
| "learning_rate": 1.400056307785413e-05, |
| "loss": 0.358, |
| "step": 1114 |
| }, |
| { |
| "epoch": 3.764792587913245, |
| "grad_norm": 0.08513069994957134, |
| "learning_rate": 1.3928940517331282e-05, |
| "loss": 0.363, |
| "step": 1115 |
| }, |
| { |
| "epoch": 3.768161718256475, |
| "grad_norm": 0.08636038178242018, |
| "learning_rate": 1.3857462984112831e-05, |
| "loss": 0.3625, |
| "step": 1116 |
| }, |
| { |
| "epoch": 3.7715308485997054, |
| "grad_norm": 0.08228222146136721, |
| "learning_rate": 1.3786130875811864e-05, |
| "loss": 0.3643, |
| "step": 1117 |
| }, |
| { |
| "epoch": 3.774899978942935, |
| "grad_norm": 0.08245367651432615, |
| "learning_rate": 1.371494458923246e-05, |
| "loss": 0.3611, |
| "step": 1118 |
| }, |
| { |
| "epoch": 3.7782691092861658, |
| "grad_norm": 0.08984002063694464, |
| "learning_rate": 1.3643904520367568e-05, |
| "loss": 0.3665, |
| "step": 1119 |
| }, |
| { |
| "epoch": 3.7816382396293955, |
| "grad_norm": 0.08004513922265995, |
| "learning_rate": 1.3573011064396751e-05, |
| "loss": 0.3626, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.7850073699726257, |
| "grad_norm": 0.08501294133045856, |
| "learning_rate": 1.3502264615683966e-05, |
| "loss": 0.3584, |
| "step": 1121 |
| }, |
| { |
| "epoch": 3.788376500315856, |
| "grad_norm": 0.08838080335200914, |
| "learning_rate": 1.3431665567775439e-05, |
| "loss": 0.3584, |
| "step": 1122 |
| }, |
| { |
| "epoch": 3.791745630659086, |
| "grad_norm": 0.08330553315133392, |
| "learning_rate": 1.3361214313397444e-05, |
| "loss": 0.36, |
| "step": 1123 |
| }, |
| { |
| "epoch": 3.795114761002316, |
| "grad_norm": 0.08670146765162016, |
| "learning_rate": 1.3290911244454066e-05, |
| "loss": 0.3661, |
| "step": 1124 |
| }, |
| { |
| "epoch": 3.7984838913455463, |
| "grad_norm": 0.0841408453670069, |
| "learning_rate": 1.3220756752025126e-05, |
| "loss": 0.363, |
| "step": 1125 |
| }, |
| { |
| "epoch": 3.8018530216887765, |
| "grad_norm": 0.08384047682221397, |
| "learning_rate": 1.3150751226363886e-05, |
| "loss": 0.3622, |
| "step": 1126 |
| }, |
| { |
| "epoch": 3.8052221520320066, |
| "grad_norm": 0.08347244270329462, |
| "learning_rate": 1.3080895056895022e-05, |
| "loss": 0.3618, |
| "step": 1127 |
| }, |
| { |
| "epoch": 3.808591282375237, |
| "grad_norm": 0.0851964538331852, |
| "learning_rate": 1.3011188632212307e-05, |
| "loss": 0.3639, |
| "step": 1128 |
| }, |
| { |
| "epoch": 3.811960412718467, |
| "grad_norm": 0.08389988414632749, |
| "learning_rate": 1.2941632340076531e-05, |
| "loss": 0.3656, |
| "step": 1129 |
| }, |
| { |
| "epoch": 3.815329543061697, |
| "grad_norm": 0.0818943745196087, |
| "learning_rate": 1.2872226567413346e-05, |
| "loss": 0.3595, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.8186986734049273, |
| "grad_norm": 0.07744154226291297, |
| "learning_rate": 1.2802971700311103e-05, |
| "loss": 0.3595, |
| "step": 1131 |
| }, |
| { |
| "epoch": 3.8220678037481575, |
| "grad_norm": 0.08550107649728135, |
| "learning_rate": 1.2733868124018694e-05, |
| "loss": 0.3614, |
| "step": 1132 |
| }, |
| { |
| "epoch": 3.8254369340913876, |
| "grad_norm": 0.07860069500089853, |
| "learning_rate": 1.2664916222943392e-05, |
| "loss": 0.3552, |
| "step": 1133 |
| }, |
| { |
| "epoch": 3.828806064434618, |
| "grad_norm": 0.08126878361185912, |
| "learning_rate": 1.2596116380648761e-05, |
| "loss": 0.3622, |
| "step": 1134 |
| }, |
| { |
| "epoch": 3.832175194777848, |
| "grad_norm": 0.08610190267035886, |
| "learning_rate": 1.2527468979852513e-05, |
| "loss": 0.3645, |
| "step": 1135 |
| }, |
| { |
| "epoch": 3.835544325121078, |
| "grad_norm": 0.0815351952289208, |
| "learning_rate": 1.2458974402424312e-05, |
| "loss": 0.36, |
| "step": 1136 |
| }, |
| { |
| "epoch": 3.8389134554643083, |
| "grad_norm": 0.08556542224799225, |
| "learning_rate": 1.239063302938376e-05, |
| "loss": 0.3581, |
| "step": 1137 |
| }, |
| { |
| "epoch": 3.8422825858075385, |
| "grad_norm": 0.0863667479775735, |
| "learning_rate": 1.2322445240898158e-05, |
| "loss": 0.3592, |
| "step": 1138 |
| }, |
| { |
| "epoch": 3.8456517161507686, |
| "grad_norm": 0.09012131069922455, |
| "learning_rate": 1.2254411416280494e-05, |
| "loss": 0.3608, |
| "step": 1139 |
| }, |
| { |
| "epoch": 3.849020846493999, |
| "grad_norm": 0.0813110318115451, |
| "learning_rate": 1.2186531933987294e-05, |
| "loss": 0.3617, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.852389976837229, |
| "grad_norm": 0.08621918893656133, |
| "learning_rate": 1.2118807171616469e-05, |
| "loss": 0.3632, |
| "step": 1141 |
| }, |
| { |
| "epoch": 3.855759107180459, |
| "grad_norm": 0.08876046697132542, |
| "learning_rate": 1.2051237505905302e-05, |
| "loss": 0.363, |
| "step": 1142 |
| }, |
| { |
| "epoch": 3.8591282375236893, |
| "grad_norm": 0.08486782812443205, |
| "learning_rate": 1.1983823312728306e-05, |
| "loss": 0.3681, |
| "step": 1143 |
| }, |
| { |
| "epoch": 3.8624973678669194, |
| "grad_norm": 0.08182983032740812, |
| "learning_rate": 1.19165649670951e-05, |
| "loss": 0.3635, |
| "step": 1144 |
| }, |
| { |
| "epoch": 3.8658664982101496, |
| "grad_norm": 0.07583529894267067, |
| "learning_rate": 1.1849462843148398e-05, |
| "loss": 0.3633, |
| "step": 1145 |
| }, |
| { |
| "epoch": 3.8692356285533798, |
| "grad_norm": 0.09126795810440728, |
| "learning_rate": 1.1782517314161872e-05, |
| "loss": 0.3584, |
| "step": 1146 |
| }, |
| { |
| "epoch": 3.87260475889661, |
| "grad_norm": 0.0811651957931282, |
| "learning_rate": 1.1715728752538103e-05, |
| "loss": 0.3617, |
| "step": 1147 |
| }, |
| { |
| "epoch": 3.87597388923984, |
| "grad_norm": 0.0843774615335534, |
| "learning_rate": 1.164909752980648e-05, |
| "loss": 0.3644, |
| "step": 1148 |
| }, |
| { |
| "epoch": 3.8793430195830703, |
| "grad_norm": 0.08099134098178276, |
| "learning_rate": 1.1582624016621154e-05, |
| "loss": 0.3595, |
| "step": 1149 |
| }, |
| { |
| "epoch": 3.8827121499263004, |
| "grad_norm": 0.07907946240714192, |
| "learning_rate": 1.1516308582758983e-05, |
| "loss": 0.3614, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.8860812802695306, |
| "grad_norm": 0.08423868561227663, |
| "learning_rate": 1.1450151597117479e-05, |
| "loss": 0.3613, |
| "step": 1151 |
| }, |
| { |
| "epoch": 3.8894504106127608, |
| "grad_norm": 0.08033686574245383, |
| "learning_rate": 1.1384153427712729e-05, |
| "loss": 0.3642, |
| "step": 1152 |
| }, |
| { |
| "epoch": 3.8928195409559905, |
| "grad_norm": 0.07677407670189697, |
| "learning_rate": 1.1318314441677348e-05, |
| "loss": 0.3569, |
| "step": 1153 |
| }, |
| { |
| "epoch": 3.896188671299221, |
| "grad_norm": 0.07906769729135289, |
| "learning_rate": 1.1252635005258466e-05, |
| "loss": 0.3595, |
| "step": 1154 |
| }, |
| { |
| "epoch": 3.899557801642451, |
| "grad_norm": 0.08225694582677316, |
| "learning_rate": 1.1187115483815693e-05, |
| "loss": 0.3644, |
| "step": 1155 |
| }, |
| { |
| "epoch": 3.9029269319856814, |
| "grad_norm": 0.08435086211540141, |
| "learning_rate": 1.1121756241819023e-05, |
| "loss": 0.3629, |
| "step": 1156 |
| }, |
| { |
| "epoch": 3.906296062328911, |
| "grad_norm": 0.0779208137414844, |
| "learning_rate": 1.105655764284689e-05, |
| "loss": 0.3594, |
| "step": 1157 |
| }, |
| { |
| "epoch": 3.9096651926721417, |
| "grad_norm": 0.07917404294021134, |
| "learning_rate": 1.0991520049584112e-05, |
| "loss": 0.3649, |
| "step": 1158 |
| }, |
| { |
| "epoch": 3.9130343230153715, |
| "grad_norm": 0.07819778959894405, |
| "learning_rate": 1.0926643823819827e-05, |
| "loss": 0.3643, |
| "step": 1159 |
| }, |
| { |
| "epoch": 3.916403453358602, |
| "grad_norm": 0.0822711836526933, |
| "learning_rate": 1.0861929326445572e-05, |
| "loss": 0.3627, |
| "step": 1160 |
| }, |
| { |
| "epoch": 3.919772583701832, |
| "grad_norm": 0.07971405947853387, |
| "learning_rate": 1.0797376917453187e-05, |
| "loss": 0.3599, |
| "step": 1161 |
| }, |
| { |
| "epoch": 3.923141714045062, |
| "grad_norm": 0.08341374870417605, |
| "learning_rate": 1.0732986955932869e-05, |
| "loss": 0.3555, |
| "step": 1162 |
| }, |
| { |
| "epoch": 3.926510844388292, |
| "grad_norm": 0.07752760209485876, |
| "learning_rate": 1.0668759800071174e-05, |
| "loss": 0.3591, |
| "step": 1163 |
| }, |
| { |
| "epoch": 3.9298799747315223, |
| "grad_norm": 0.0795643390039002, |
| "learning_rate": 1.0604695807148971e-05, |
| "loss": 0.3568, |
| "step": 1164 |
| }, |
| { |
| "epoch": 3.9332491050747524, |
| "grad_norm": 0.07806803915038091, |
| "learning_rate": 1.0540795333539515e-05, |
| "loss": 0.3629, |
| "step": 1165 |
| }, |
| { |
| "epoch": 3.9366182354179826, |
| "grad_norm": 0.07851161347206628, |
| "learning_rate": 1.0477058734706436e-05, |
| "loss": 0.3611, |
| "step": 1166 |
| }, |
| { |
| "epoch": 3.939987365761213, |
| "grad_norm": 0.07762641296833782, |
| "learning_rate": 1.0413486365201785e-05, |
| "loss": 0.3613, |
| "step": 1167 |
| }, |
| { |
| "epoch": 3.943356496104443, |
| "grad_norm": 0.08535005147447429, |
| "learning_rate": 1.0350078578664005e-05, |
| "loss": 0.3591, |
| "step": 1168 |
| }, |
| { |
| "epoch": 3.946725626447673, |
| "grad_norm": 0.07824237520005016, |
| "learning_rate": 1.0286835727816001e-05, |
| "loss": 0.363, |
| "step": 1169 |
| }, |
| { |
| "epoch": 3.9500947567909033, |
| "grad_norm": 0.0725027774844816, |
| "learning_rate": 1.0223758164463246e-05, |
| "loss": 0.361, |
| "step": 1170 |
| }, |
| { |
| "epoch": 3.9534638871341334, |
| "grad_norm": 0.08250211916215387, |
| "learning_rate": 1.0160846239491673e-05, |
| "loss": 0.3706, |
| "step": 1171 |
| }, |
| { |
| "epoch": 3.9568330174773636, |
| "grad_norm": 0.07768057857668437, |
| "learning_rate": 1.0098100302865865e-05, |
| "loss": 0.358, |
| "step": 1172 |
| }, |
| { |
| "epoch": 3.9602021478205938, |
| "grad_norm": 0.0743357334386284, |
| "learning_rate": 1.003552070362701e-05, |
| "loss": 0.3588, |
| "step": 1173 |
| }, |
| { |
| "epoch": 3.963571278163824, |
| "grad_norm": 0.08538154828312804, |
| "learning_rate": 9.973107789891024e-06, |
| "loss": 0.3687, |
| "step": 1174 |
| }, |
| { |
| "epoch": 3.966940408507054, |
| "grad_norm": 0.08474253190258095, |
| "learning_rate": 9.910861908846598e-06, |
| "loss": 0.36, |
| "step": 1175 |
| }, |
| { |
| "epoch": 3.9703095388502843, |
| "grad_norm": 0.07698260800417392, |
| "learning_rate": 9.848783406753224e-06, |
| "loss": 0.3655, |
| "step": 1176 |
| }, |
| { |
| "epoch": 3.9736786691935144, |
| "grad_norm": 0.07875068992732076, |
| "learning_rate": 9.786872628939329e-06, |
| "loss": 0.3605, |
| "step": 1177 |
| }, |
| { |
| "epoch": 3.9770477995367446, |
| "grad_norm": 0.08337836249305365, |
| "learning_rate": 9.725129919800339e-06, |
| "loss": 0.3653, |
| "step": 1178 |
| }, |
| { |
| "epoch": 3.9804169298799748, |
| "grad_norm": 0.0799444611097984, |
| "learning_rate": 9.66355562279671e-06, |
| "loss": 0.3604, |
| "step": 1179 |
| }, |
| { |
| "epoch": 3.983786060223205, |
| "grad_norm": 0.08618283586928363, |
| "learning_rate": 9.60215008045211e-06, |
| "loss": 0.3637, |
| "step": 1180 |
| }, |
| { |
| "epoch": 3.987155190566435, |
| "grad_norm": 0.08302579845358256, |
| "learning_rate": 9.540913634351408e-06, |
| "loss": 0.3602, |
| "step": 1181 |
| }, |
| { |
| "epoch": 3.9905243209096652, |
| "grad_norm": 0.07735294324245658, |
| "learning_rate": 9.479846625138909e-06, |
| "loss": 0.3596, |
| "step": 1182 |
| }, |
| { |
| "epoch": 3.9938934512528954, |
| "grad_norm": 0.07471734423709958, |
| "learning_rate": 9.418949392516307e-06, |
| "loss": 0.3611, |
| "step": 1183 |
| }, |
| { |
| "epoch": 3.9972625815961256, |
| "grad_norm": 0.08214012704171592, |
| "learning_rate": 9.358222275240884e-06, |
| "loss": 0.3648, |
| "step": 1184 |
| }, |
| { |
| "epoch": 4.00336913034323, |
| "grad_norm": 0.11292758122904588, |
| "learning_rate": 9.297665611123628e-06, |
| "loss": 0.3527, |
| "step": 1185 |
| }, |
| { |
| "epoch": 4.00673826068646, |
| "grad_norm": 0.0941098295127884, |
| "learning_rate": 9.237279737027326e-06, |
| "loss": 0.3472, |
| "step": 1186 |
| }, |
| { |
| "epoch": 4.01010739102969, |
| "grad_norm": 0.09639154458998347, |
| "learning_rate": 9.177064988864712e-06, |
| "loss": 0.3425, |
| "step": 1187 |
| }, |
| { |
| "epoch": 4.013476521372921, |
| "grad_norm": 0.09835304863889502, |
| "learning_rate": 9.117021701596567e-06, |
| "loss": 0.3446, |
| "step": 1188 |
| }, |
| { |
| "epoch": 4.01684565171615, |
| "grad_norm": 0.08987244503280054, |
| "learning_rate": 9.057150209229845e-06, |
| "loss": 0.3513, |
| "step": 1189 |
| }, |
| { |
| "epoch": 4.020214782059381, |
| "grad_norm": 0.10031177854257561, |
| "learning_rate": 8.99745084481594e-06, |
| "loss": 0.3516, |
| "step": 1190 |
| }, |
| { |
| "epoch": 4.023583912402611, |
| "grad_norm": 0.10651297976200229, |
| "learning_rate": 8.937923940448634e-06, |
| "loss": 0.3489, |
| "step": 1191 |
| }, |
| { |
| "epoch": 4.026953042745841, |
| "grad_norm": 0.08656835316363745, |
| "learning_rate": 8.87856982726243e-06, |
| "loss": 0.3402, |
| "step": 1192 |
| }, |
| { |
| "epoch": 4.030322173089071, |
| "grad_norm": 0.0977560831877126, |
| "learning_rate": 8.819388835430569e-06, |
| "loss": 0.348, |
| "step": 1193 |
| }, |
| { |
| "epoch": 4.033691303432302, |
| "grad_norm": 0.09746909055035731, |
| "learning_rate": 8.7603812941633e-06, |
| "loss": 0.3492, |
| "step": 1194 |
| }, |
| { |
| "epoch": 4.037060433775531, |
| "grad_norm": 0.08395050874481182, |
| "learning_rate": 8.701547531706018e-06, |
| "loss": 0.3482, |
| "step": 1195 |
| }, |
| { |
| "epoch": 4.040429564118762, |
| "grad_norm": 0.09139581639425662, |
| "learning_rate": 8.642887875337376e-06, |
| "loss": 0.3509, |
| "step": 1196 |
| }, |
| { |
| "epoch": 4.043798694461992, |
| "grad_norm": 0.09015094643326858, |
| "learning_rate": 8.584402651367556e-06, |
| "loss": 0.3445, |
| "step": 1197 |
| }, |
| { |
| "epoch": 4.047167824805222, |
| "grad_norm": 0.08067803096785321, |
| "learning_rate": 8.526092185136394e-06, |
| "loss": 0.345, |
| "step": 1198 |
| }, |
| { |
| "epoch": 4.050536955148452, |
| "grad_norm": 0.08630631888609785, |
| "learning_rate": 8.467956801011618e-06, |
| "loss": 0.338, |
| "step": 1199 |
| }, |
| { |
| "epoch": 4.053906085491683, |
| "grad_norm": 0.08433690244909006, |
| "learning_rate": 8.409996822386972e-06, |
| "loss": 0.343, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.057275215834912, |
| "grad_norm": 0.07920044123514752, |
| "learning_rate": 8.352212571680458e-06, |
| "loss": 0.3473, |
| "step": 1201 |
| }, |
| { |
| "epoch": 4.060644346178143, |
| "grad_norm": 0.07927154455223241, |
| "learning_rate": 8.294604370332613e-06, |
| "loss": 0.3482, |
| "step": 1202 |
| }, |
| { |
| "epoch": 4.064013476521373, |
| "grad_norm": 0.08109057542606768, |
| "learning_rate": 8.23717253880457e-06, |
| "loss": 0.3428, |
| "step": 1203 |
| }, |
| { |
| "epoch": 4.067382606864603, |
| "grad_norm": 0.08569342844895425, |
| "learning_rate": 8.17991739657641e-06, |
| "loss": 0.3474, |
| "step": 1204 |
| }, |
| { |
| "epoch": 4.070751737207833, |
| "grad_norm": 0.08637139957757115, |
| "learning_rate": 8.122839262145294e-06, |
| "loss": 0.3467, |
| "step": 1205 |
| }, |
| { |
| "epoch": 4.074120867551064, |
| "grad_norm": 0.07781808041765698, |
| "learning_rate": 8.06593845302376e-06, |
| "loss": 0.3395, |
| "step": 1206 |
| }, |
| { |
| "epoch": 4.077489997894293, |
| "grad_norm": 0.08111376806052889, |
| "learning_rate": 8.00921528573793e-06, |
| "loss": 0.3389, |
| "step": 1207 |
| }, |
| { |
| "epoch": 4.080859128237524, |
| "grad_norm": 0.08619767447901233, |
| "learning_rate": 7.952670075825702e-06, |
| "loss": 0.348, |
| "step": 1208 |
| }, |
| { |
| "epoch": 4.084228258580754, |
| "grad_norm": 0.07737321565650793, |
| "learning_rate": 7.896303137835084e-06, |
| "loss": 0.3373, |
| "step": 1209 |
| }, |
| { |
| "epoch": 4.087597388923984, |
| "grad_norm": 0.07775405743530504, |
| "learning_rate": 7.840114785322384e-06, |
| "loss": 0.3443, |
| "step": 1210 |
| }, |
| { |
| "epoch": 4.090966519267214, |
| "grad_norm": 0.07816418598625743, |
| "learning_rate": 7.78410533085046e-06, |
| "loss": 0.345, |
| "step": 1211 |
| }, |
| { |
| "epoch": 4.094335649610445, |
| "grad_norm": 0.08021420493935687, |
| "learning_rate": 7.728275085987041e-06, |
| "loss": 0.3445, |
| "step": 1212 |
| }, |
| { |
| "epoch": 4.097704779953674, |
| "grad_norm": 0.07501876010838501, |
| "learning_rate": 7.672624361302894e-06, |
| "loss": 0.345, |
| "step": 1213 |
| }, |
| { |
| "epoch": 4.101073910296905, |
| "grad_norm": 0.07616193917641446, |
| "learning_rate": 7.6171534663702416e-06, |
| "loss": 0.3451, |
| "step": 1214 |
| }, |
| { |
| "epoch": 4.104443040640135, |
| "grad_norm": 0.08197274858236898, |
| "learning_rate": 7.5618627097608835e-06, |
| "loss": 0.3481, |
| "step": 1215 |
| }, |
| { |
| "epoch": 4.107812170983365, |
| "grad_norm": 0.07483017111226394, |
| "learning_rate": 7.50675239904457e-06, |
| "loss": 0.3454, |
| "step": 1216 |
| }, |
| { |
| "epoch": 4.111181301326595, |
| "grad_norm": 0.07441931083866478, |
| "learning_rate": 7.451822840787279e-06, |
| "loss": 0.3469, |
| "step": 1217 |
| }, |
| { |
| "epoch": 4.114550431669826, |
| "grad_norm": 0.08142190767207858, |
| "learning_rate": 7.397074340549508e-06, |
| "loss": 0.3431, |
| "step": 1218 |
| }, |
| { |
| "epoch": 4.117919562013055, |
| "grad_norm": 0.07876869644542178, |
| "learning_rate": 7.342507202884577e-06, |
| "loss": 0.3462, |
| "step": 1219 |
| }, |
| { |
| "epoch": 4.121288692356286, |
| "grad_norm": 0.07845687277699909, |
| "learning_rate": 7.288121731336901e-06, |
| "loss": 0.3456, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.124657822699516, |
| "grad_norm": 0.07817574483354851, |
| "learning_rate": 7.233918228440324e-06, |
| "loss": 0.3436, |
| "step": 1221 |
| }, |
| { |
| "epoch": 4.128026953042746, |
| "grad_norm": 0.07876507958828823, |
| "learning_rate": 7.1798969957165025e-06, |
| "loss": 0.3493, |
| "step": 1222 |
| }, |
| { |
| "epoch": 4.131396083385976, |
| "grad_norm": 0.07707210638891601, |
| "learning_rate": 7.126058333673094e-06, |
| "loss": 0.3402, |
| "step": 1223 |
| }, |
| { |
| "epoch": 4.134765213729207, |
| "grad_norm": 0.07947117463971737, |
| "learning_rate": 7.072402541802197e-06, |
| "loss": 0.3478, |
| "step": 1224 |
| }, |
| { |
| "epoch": 4.138134344072436, |
| "grad_norm": 0.07708906857469865, |
| "learning_rate": 7.018929918578621e-06, |
| "loss": 0.3457, |
| "step": 1225 |
| }, |
| { |
| "epoch": 4.141503474415666, |
| "grad_norm": 0.08008450821251828, |
| "learning_rate": 6.965640761458274e-06, |
| "loss": 0.3414, |
| "step": 1226 |
| }, |
| { |
| "epoch": 4.144872604758897, |
| "grad_norm": 0.07732322409168987, |
| "learning_rate": 6.912535366876483e-06, |
| "loss": 0.3427, |
| "step": 1227 |
| }, |
| { |
| "epoch": 4.148241735102126, |
| "grad_norm": 0.07450575669616548, |
| "learning_rate": 6.859614030246318e-06, |
| "loss": 0.3477, |
| "step": 1228 |
| }, |
| { |
| "epoch": 4.151610865445357, |
| "grad_norm": 0.08433118593640568, |
| "learning_rate": 6.806877045957003e-06, |
| "loss": 0.3425, |
| "step": 1229 |
| }, |
| { |
| "epoch": 4.154979995788587, |
| "grad_norm": 0.07513389398253724, |
| "learning_rate": 6.754324707372264e-06, |
| "loss": 0.3443, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.158349126131817, |
| "grad_norm": 0.07536890804885507, |
| "learning_rate": 6.701957306828637e-06, |
| "loss": 0.3438, |
| "step": 1231 |
| }, |
| { |
| "epoch": 4.161718256475047, |
| "grad_norm": 0.07685668754719273, |
| "learning_rate": 6.649775135633944e-06, |
| "loss": 0.3401, |
| "step": 1232 |
| }, |
| { |
| "epoch": 4.165087386818278, |
| "grad_norm": 0.07956673529792976, |
| "learning_rate": 6.597778484065571e-06, |
| "loss": 0.3503, |
| "step": 1233 |
| }, |
| { |
| "epoch": 4.168456517161507, |
| "grad_norm": 0.07209527381971025, |
| "learning_rate": 6.545967641368958e-06, |
| "loss": 0.3434, |
| "step": 1234 |
| }, |
| { |
| "epoch": 4.171825647504738, |
| "grad_norm": 0.07458918014634688, |
| "learning_rate": 6.494342895755879e-06, |
| "loss": 0.343, |
| "step": 1235 |
| }, |
| { |
| "epoch": 4.175194777847968, |
| "grad_norm": 0.08077306421411162, |
| "learning_rate": 6.4429045344029136e-06, |
| "loss": 0.3513, |
| "step": 1236 |
| }, |
| { |
| "epoch": 4.178563908191198, |
| "grad_norm": 0.08065308092284855, |
| "learning_rate": 6.391652843449829e-06, |
| "loss": 0.3434, |
| "step": 1237 |
| }, |
| { |
| "epoch": 4.181933038534428, |
| "grad_norm": 0.0731775502872814, |
| "learning_rate": 6.340588107997994e-06, |
| "loss": 0.3443, |
| "step": 1238 |
| }, |
| { |
| "epoch": 4.185302168877659, |
| "grad_norm": 0.07546567416391478, |
| "learning_rate": 6.289710612108786e-06, |
| "loss": 0.3434, |
| "step": 1239 |
| }, |
| { |
| "epoch": 4.188671299220888, |
| "grad_norm": 0.07650397977406549, |
| "learning_rate": 6.239020638801987e-06, |
| "loss": 0.3452, |
| "step": 1240 |
| }, |
| { |
| "epoch": 4.192040429564119, |
| "grad_norm": 0.07431679145535366, |
| "learning_rate": 6.18851847005427e-06, |
| "loss": 0.3484, |
| "step": 1241 |
| }, |
| { |
| "epoch": 4.195409559907349, |
| "grad_norm": 0.07416827387620398, |
| "learning_rate": 6.1382043867975836e-06, |
| "loss": 0.3452, |
| "step": 1242 |
| }, |
| { |
| "epoch": 4.198778690250579, |
| "grad_norm": 0.07754320392922942, |
| "learning_rate": 6.088078668917572e-06, |
| "loss": 0.3491, |
| "step": 1243 |
| }, |
| { |
| "epoch": 4.202147820593809, |
| "grad_norm": 0.07827458851806732, |
| "learning_rate": 6.038141595252094e-06, |
| "loss": 0.3406, |
| "step": 1244 |
| }, |
| { |
| "epoch": 4.20551695093704, |
| "grad_norm": 0.0725724426162921, |
| "learning_rate": 5.9883934435895774e-06, |
| "loss": 0.3496, |
| "step": 1245 |
| }, |
| { |
| "epoch": 4.208886081280269, |
| "grad_norm": 0.0719909369345341, |
| "learning_rate": 5.9388344906675485e-06, |
| "loss": 0.3526, |
| "step": 1246 |
| }, |
| { |
| "epoch": 4.2122552116235, |
| "grad_norm": 0.07567213228800986, |
| "learning_rate": 5.889465012171069e-06, |
| "loss": 0.3468, |
| "step": 1247 |
| }, |
| { |
| "epoch": 4.21562434196673, |
| "grad_norm": 0.07098076354440293, |
| "learning_rate": 5.840285282731173e-06, |
| "loss": 0.3466, |
| "step": 1248 |
| }, |
| { |
| "epoch": 4.21899347230996, |
| "grad_norm": 0.07019771893928237, |
| "learning_rate": 5.791295575923382e-06, |
| "loss": 0.3448, |
| "step": 1249 |
| }, |
| { |
| "epoch": 4.22236260265319, |
| "grad_norm": 0.07471579252214251, |
| "learning_rate": 5.742496164266174e-06, |
| "loss": 0.3491, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.225731732996421, |
| "grad_norm": 0.07236549423445121, |
| "learning_rate": 5.693887319219422e-06, |
| "loss": 0.3499, |
| "step": 1251 |
| }, |
| { |
| "epoch": 4.22910086333965, |
| "grad_norm": 0.07134479537520134, |
| "learning_rate": 5.645469311182958e-06, |
| "loss": 0.3459, |
| "step": 1252 |
| }, |
| { |
| "epoch": 4.232469993682881, |
| "grad_norm": 0.07072016749147457, |
| "learning_rate": 5.597242409495018e-06, |
| "loss": 0.3438, |
| "step": 1253 |
| }, |
| { |
| "epoch": 4.235839124026111, |
| "grad_norm": 0.07179051070856982, |
| "learning_rate": 5.549206882430773e-06, |
| "loss": 0.3419, |
| "step": 1254 |
| }, |
| { |
| "epoch": 4.239208254369341, |
| "grad_norm": 0.07302770625869862, |
| "learning_rate": 5.501362997200787e-06, |
| "loss": 0.3487, |
| "step": 1255 |
| }, |
| { |
| "epoch": 4.242577384712571, |
| "grad_norm": 0.06976392401988353, |
| "learning_rate": 5.453711019949581e-06, |
| "loss": 0.344, |
| "step": 1256 |
| }, |
| { |
| "epoch": 4.245946515055802, |
| "grad_norm": 0.07078499285712887, |
| "learning_rate": 5.406251215754146e-06, |
| "loss": 0.3465, |
| "step": 1257 |
| }, |
| { |
| "epoch": 4.249315645399031, |
| "grad_norm": 0.07118826571789505, |
| "learning_rate": 5.358983848622452e-06, |
| "loss": 0.3504, |
| "step": 1258 |
| }, |
| { |
| "epoch": 4.252684775742262, |
| "grad_norm": 0.0686563097499576, |
| "learning_rate": 5.311909181491994e-06, |
| "loss": 0.3433, |
| "step": 1259 |
| }, |
| { |
| "epoch": 4.256053906085492, |
| "grad_norm": 0.06836729686980945, |
| "learning_rate": 5.265027476228297e-06, |
| "loss": 0.3428, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.259423036428722, |
| "grad_norm": 0.07026205200909408, |
| "learning_rate": 5.218338993623499e-06, |
| "loss": 0.3475, |
| "step": 1261 |
| }, |
| { |
| "epoch": 4.262792166771952, |
| "grad_norm": 0.07032323091306557, |
| "learning_rate": 5.171843993394903e-06, |
| "loss": 0.3431, |
| "step": 1262 |
| }, |
| { |
| "epoch": 4.2661612971151825, |
| "grad_norm": 0.07423746533959613, |
| "learning_rate": 5.125542734183473e-06, |
| "loss": 0.3445, |
| "step": 1263 |
| }, |
| { |
| "epoch": 4.269530427458412, |
| "grad_norm": 0.07841448579779874, |
| "learning_rate": 5.079435473552474e-06, |
| "loss": 0.3481, |
| "step": 1264 |
| }, |
| { |
| "epoch": 4.272899557801642, |
| "grad_norm": 0.07040437269579536, |
| "learning_rate": 5.033522467985985e-06, |
| "loss": 0.3422, |
| "step": 1265 |
| }, |
| { |
| "epoch": 4.276268688144873, |
| "grad_norm": 0.07271729651198641, |
| "learning_rate": 4.987803972887482e-06, |
| "loss": 0.3433, |
| "step": 1266 |
| }, |
| { |
| "epoch": 4.279637818488103, |
| "grad_norm": 0.07717082685197238, |
| "learning_rate": 4.9422802425784475e-06, |
| "loss": 0.3459, |
| "step": 1267 |
| }, |
| { |
| "epoch": 4.283006948831333, |
| "grad_norm": 0.07646859752104176, |
| "learning_rate": 4.896951530296896e-06, |
| "loss": 0.3487, |
| "step": 1268 |
| }, |
| { |
| "epoch": 4.286376079174563, |
| "grad_norm": 0.07196146666335995, |
| "learning_rate": 4.851818088196041e-06, |
| "loss": 0.3451, |
| "step": 1269 |
| }, |
| { |
| "epoch": 4.289745209517793, |
| "grad_norm": 0.07601088345941356, |
| "learning_rate": 4.806880167342831e-06, |
| "loss": 0.346, |
| "step": 1270 |
| }, |
| { |
| "epoch": 4.293114339861023, |
| "grad_norm": 0.0730390084111676, |
| "learning_rate": 4.762138017716571e-06, |
| "loss": 0.3451, |
| "step": 1271 |
| }, |
| { |
| "epoch": 4.296483470204254, |
| "grad_norm": 0.08370554873202815, |
| "learning_rate": 4.7175918882075465e-06, |
| "loss": 0.3413, |
| "step": 1272 |
| }, |
| { |
| "epoch": 4.299852600547483, |
| "grad_norm": 0.07165140458981821, |
| "learning_rate": 4.673242026615627e-06, |
| "loss": 0.3413, |
| "step": 1273 |
| }, |
| { |
| "epoch": 4.303221730890714, |
| "grad_norm": 0.07124644667052794, |
| "learning_rate": 4.6290886796488946e-06, |
| "loss": 0.3474, |
| "step": 1274 |
| }, |
| { |
| "epoch": 4.306590861233944, |
| "grad_norm": 0.07331931362741691, |
| "learning_rate": 4.58513209292224e-06, |
| "loss": 0.3445, |
| "step": 1275 |
| }, |
| { |
| "epoch": 4.309959991577174, |
| "grad_norm": 0.07237727500497035, |
| "learning_rate": 4.54137251095605e-06, |
| "loss": 0.3511, |
| "step": 1276 |
| }, |
| { |
| "epoch": 4.313329121920404, |
| "grad_norm": 0.07038492284926416, |
| "learning_rate": 4.4978101771748195e-06, |
| "loss": 0.3429, |
| "step": 1277 |
| }, |
| { |
| "epoch": 4.316698252263635, |
| "grad_norm": 0.07186746493744087, |
| "learning_rate": 4.454445333905768e-06, |
| "loss": 0.3423, |
| "step": 1278 |
| }, |
| { |
| "epoch": 4.320067382606864, |
| "grad_norm": 0.07185532233373727, |
| "learning_rate": 4.411278222377551e-06, |
| "loss": 0.3416, |
| "step": 1279 |
| }, |
| { |
| "epoch": 4.323436512950095, |
| "grad_norm": 0.0702075072689657, |
| "learning_rate": 4.3683090827188666e-06, |
| "loss": 0.3452, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.326805643293325, |
| "grad_norm": 0.0752614715082349, |
| "learning_rate": 4.325538153957158e-06, |
| "loss": 0.3475, |
| "step": 1281 |
| }, |
| { |
| "epoch": 4.330174773636555, |
| "grad_norm": 0.07050331941427515, |
| "learning_rate": 4.282965674017265e-06, |
| "loss": 0.3477, |
| "step": 1282 |
| }, |
| { |
| "epoch": 4.333543903979785, |
| "grad_norm": 0.07219368807869528, |
| "learning_rate": 4.240591879720084e-06, |
| "loss": 0.3497, |
| "step": 1283 |
| }, |
| { |
| "epoch": 4.336913034323016, |
| "grad_norm": 0.06956963675751204, |
| "learning_rate": 4.198417006781283e-06, |
| "loss": 0.3474, |
| "step": 1284 |
| }, |
| { |
| "epoch": 4.340282164666245, |
| "grad_norm": 0.06960098578843016, |
| "learning_rate": 4.156441289809983e-06, |
| "loss": 0.3445, |
| "step": 1285 |
| }, |
| { |
| "epoch": 4.343651295009476, |
| "grad_norm": 0.07648526368534525, |
| "learning_rate": 4.114664962307439e-06, |
| "loss": 0.3479, |
| "step": 1286 |
| }, |
| { |
| "epoch": 4.347020425352706, |
| "grad_norm": 0.07088809269875901, |
| "learning_rate": 4.073088256665742e-06, |
| "loss": 0.3421, |
| "step": 1287 |
| }, |
| { |
| "epoch": 4.350389555695936, |
| "grad_norm": 0.07273421779811111, |
| "learning_rate": 4.031711404166525e-06, |
| "loss": 0.344, |
| "step": 1288 |
| }, |
| { |
| "epoch": 4.353758686039166, |
| "grad_norm": 0.07174713114445853, |
| "learning_rate": 3.9905346349797234e-06, |
| "loss": 0.3441, |
| "step": 1289 |
| }, |
| { |
| "epoch": 4.3571278163823965, |
| "grad_norm": 0.07290897068132188, |
| "learning_rate": 3.949558178162209e-06, |
| "loss": 0.3462, |
| "step": 1290 |
| }, |
| { |
| "epoch": 4.360496946725626, |
| "grad_norm": 0.07194649852054723, |
| "learning_rate": 3.9087822616565984e-06, |
| "loss": 0.3478, |
| "step": 1291 |
| }, |
| { |
| "epoch": 4.363866077068857, |
| "grad_norm": 0.07337266992394913, |
| "learning_rate": 3.86820711228991e-06, |
| "loss": 0.3447, |
| "step": 1292 |
| }, |
| { |
| "epoch": 4.367235207412087, |
| "grad_norm": 0.07030690021581439, |
| "learning_rate": 3.827832955772372e-06, |
| "loss": 0.3456, |
| "step": 1293 |
| }, |
| { |
| "epoch": 4.370604337755317, |
| "grad_norm": 0.07201158711941352, |
| "learning_rate": 3.7876600166961353e-06, |
| "loss": 0.3465, |
| "step": 1294 |
| }, |
| { |
| "epoch": 4.373973468098547, |
| "grad_norm": 0.07511999851456955, |
| "learning_rate": 3.747688518534003e-06, |
| "loss": 0.3509, |
| "step": 1295 |
| }, |
| { |
| "epoch": 4.3773425984417775, |
| "grad_norm": 0.07172350904328591, |
| "learning_rate": 3.707918683638223e-06, |
| "loss": 0.345, |
| "step": 1296 |
| }, |
| { |
| "epoch": 4.380711728785007, |
| "grad_norm": 0.0693885503387989, |
| "learning_rate": 3.6683507332392476e-06, |
| "loss": 0.3453, |
| "step": 1297 |
| }, |
| { |
| "epoch": 4.384080859128238, |
| "grad_norm": 0.07019744686285931, |
| "learning_rate": 3.628984887444462e-06, |
| "loss": 0.3432, |
| "step": 1298 |
| }, |
| { |
| "epoch": 4.387449989471468, |
| "grad_norm": 0.06892399615992918, |
| "learning_rate": 3.589821365237023e-06, |
| "loss": 0.3422, |
| "step": 1299 |
| }, |
| { |
| "epoch": 4.390819119814698, |
| "grad_norm": 0.0711323225202878, |
| "learning_rate": 3.550860384474568e-06, |
| "loss": 0.3468, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.394188250157928, |
| "grad_norm": 0.07222951484982641, |
| "learning_rate": 3.5121021618881e-06, |
| "loss": 0.3444, |
| "step": 1301 |
| }, |
| { |
| "epoch": 4.3975573805011585, |
| "grad_norm": 0.07011816955357002, |
| "learning_rate": 3.473546913080674e-06, |
| "loss": 0.3417, |
| "step": 1302 |
| }, |
| { |
| "epoch": 4.400926510844388, |
| "grad_norm": 0.06918135608237871, |
| "learning_rate": 3.4351948525262625e-06, |
| "loss": 0.3431, |
| "step": 1303 |
| }, |
| { |
| "epoch": 4.404295641187619, |
| "grad_norm": 0.07183448949638974, |
| "learning_rate": 3.397046193568558e-06, |
| "loss": 0.3454, |
| "step": 1304 |
| }, |
| { |
| "epoch": 4.407664771530849, |
| "grad_norm": 0.06841029875272973, |
| "learning_rate": 3.3591011484197744e-06, |
| "loss": 0.3471, |
| "step": 1305 |
| }, |
| { |
| "epoch": 4.411033901874079, |
| "grad_norm": 0.07008578728288764, |
| "learning_rate": 3.3213599281594688e-06, |
| "loss": 0.3469, |
| "step": 1306 |
| }, |
| { |
| "epoch": 4.414403032217309, |
| "grad_norm": 0.06784411674661273, |
| "learning_rate": 3.28382274273336e-06, |
| "loss": 0.3452, |
| "step": 1307 |
| }, |
| { |
| "epoch": 4.417772162560539, |
| "grad_norm": 0.06727601165426443, |
| "learning_rate": 3.246489800952155e-06, |
| "loss": 0.3513, |
| "step": 1308 |
| }, |
| { |
| "epoch": 4.421141292903769, |
| "grad_norm": 0.06930299868926686, |
| "learning_rate": 3.209361310490451e-06, |
| "loss": 0.344, |
| "step": 1309 |
| }, |
| { |
| "epoch": 4.424510423247, |
| "grad_norm": 0.06983137546711997, |
| "learning_rate": 3.172437477885475e-06, |
| "loss": 0.3432, |
| "step": 1310 |
| }, |
| { |
| "epoch": 4.4278795535902296, |
| "grad_norm": 0.06738405898147315, |
| "learning_rate": 3.1357185085360233e-06, |
| "loss": 0.3412, |
| "step": 1311 |
| }, |
| { |
| "epoch": 4.431248683933459, |
| "grad_norm": 0.069114436702608, |
| "learning_rate": 3.099204606701256e-06, |
| "loss": 0.3438, |
| "step": 1312 |
| }, |
| { |
| "epoch": 4.43461781427669, |
| "grad_norm": 0.07063250147224803, |
| "learning_rate": 3.062895975499616e-06, |
| "loss": 0.3449, |
| "step": 1313 |
| }, |
| { |
| "epoch": 4.43798694461992, |
| "grad_norm": 0.06869203050534661, |
| "learning_rate": 3.026792816907671e-06, |
| "loss": 0.347, |
| "step": 1314 |
| }, |
| { |
| "epoch": 4.44135607496315, |
| "grad_norm": 0.06790795340800003, |
| "learning_rate": 2.9908953317589675e-06, |
| "loss": 0.3511, |
| "step": 1315 |
| }, |
| { |
| "epoch": 4.44472520530638, |
| "grad_norm": 0.06801706888897209, |
| "learning_rate": 2.955203719742965e-06, |
| "loss": 0.3499, |
| "step": 1316 |
| }, |
| { |
| "epoch": 4.4480943356496105, |
| "grad_norm": 0.06703090567229934, |
| "learning_rate": 2.9197181794038896e-06, |
| "loss": 0.3409, |
| "step": 1317 |
| }, |
| { |
| "epoch": 4.45146346599284, |
| "grad_norm": 0.06845785402581211, |
| "learning_rate": 2.884438908139626e-06, |
| "loss": 0.3451, |
| "step": 1318 |
| }, |
| { |
| "epoch": 4.454832596336071, |
| "grad_norm": 0.06809288242514337, |
| "learning_rate": 2.8493661022006615e-06, |
| "loss": 0.349, |
| "step": 1319 |
| }, |
| { |
| "epoch": 4.458201726679301, |
| "grad_norm": 0.06993068933675987, |
| "learning_rate": 2.814499956688912e-06, |
| "loss": 0.3457, |
| "step": 1320 |
| }, |
| { |
| "epoch": 4.461570857022531, |
| "grad_norm": 0.06709969061038806, |
| "learning_rate": 2.7798406655567565e-06, |
| "loss": 0.3512, |
| "step": 1321 |
| }, |
| { |
| "epoch": 4.464939987365761, |
| "grad_norm": 0.06978980053246452, |
| "learning_rate": 2.7453884216058368e-06, |
| "loss": 0.3452, |
| "step": 1322 |
| }, |
| { |
| "epoch": 4.4683091177089915, |
| "grad_norm": 0.06806425403838408, |
| "learning_rate": 2.7111434164860573e-06, |
| "loss": 0.3489, |
| "step": 1323 |
| }, |
| { |
| "epoch": 4.471678248052221, |
| "grad_norm": 0.07023315792460011, |
| "learning_rate": 2.677105840694507e-06, |
| "loss": 0.3484, |
| "step": 1324 |
| }, |
| { |
| "epoch": 4.475047378395452, |
| "grad_norm": 0.0671632913864402, |
| "learning_rate": 2.6432758835743854e-06, |
| "loss": 0.3475, |
| "step": 1325 |
| }, |
| { |
| "epoch": 4.478416508738682, |
| "grad_norm": 0.0668737342617598, |
| "learning_rate": 2.6096537333139616e-06, |
| "loss": 0.3402, |
| "step": 1326 |
| }, |
| { |
| "epoch": 4.481785639081912, |
| "grad_norm": 0.06731813732301019, |
| "learning_rate": 2.5762395769455183e-06, |
| "loss": 0.3472, |
| "step": 1327 |
| }, |
| { |
| "epoch": 4.485154769425142, |
| "grad_norm": 0.06962894223132757, |
| "learning_rate": 2.5430336003443045e-06, |
| "loss": 0.3411, |
| "step": 1328 |
| }, |
| { |
| "epoch": 4.4885238997683725, |
| "grad_norm": 0.06651868659879541, |
| "learning_rate": 2.5100359882275526e-06, |
| "loss": 0.3463, |
| "step": 1329 |
| }, |
| { |
| "epoch": 4.491893030111602, |
| "grad_norm": 0.06589574436809537, |
| "learning_rate": 2.4772469241533648e-06, |
| "loss": 0.3449, |
| "step": 1330 |
| }, |
| { |
| "epoch": 4.495262160454833, |
| "grad_norm": 0.06851573366912253, |
| "learning_rate": 2.444666590519775e-06, |
| "loss": 0.3478, |
| "step": 1331 |
| }, |
| { |
| "epoch": 4.498631290798063, |
| "grad_norm": 0.06812334086330306, |
| "learning_rate": 2.4122951685636674e-06, |
| "loss": 0.3493, |
| "step": 1332 |
| }, |
| { |
| "epoch": 4.502000421141293, |
| "grad_norm": 0.06783544762672909, |
| "learning_rate": 2.380132838359819e-06, |
| "loss": 0.3458, |
| "step": 1333 |
| }, |
| { |
| "epoch": 4.505369551484523, |
| "grad_norm": 0.06645851016955091, |
| "learning_rate": 2.3481797788198745e-06, |
| "loss": 0.3487, |
| "step": 1334 |
| }, |
| { |
| "epoch": 4.5087386818277535, |
| "grad_norm": 0.06691716361429041, |
| "learning_rate": 2.3164361676913406e-06, |
| "loss": 0.3461, |
| "step": 1335 |
| }, |
| { |
| "epoch": 4.512107812170983, |
| "grad_norm": 0.066561132769546, |
| "learning_rate": 2.284902181556632e-06, |
| "loss": 0.3451, |
| "step": 1336 |
| }, |
| { |
| "epoch": 4.515476942514214, |
| "grad_norm": 0.06972464277014613, |
| "learning_rate": 2.2535779958320614e-06, |
| "loss": 0.3363, |
| "step": 1337 |
| }, |
| { |
| "epoch": 4.5188460728574436, |
| "grad_norm": 0.06662582951723346, |
| "learning_rate": 2.2224637847668484e-06, |
| "loss": 0.3462, |
| "step": 1338 |
| }, |
| { |
| "epoch": 4.522215203200674, |
| "grad_norm": 0.06683364588110276, |
| "learning_rate": 2.1915597214422048e-06, |
| "loss": 0.345, |
| "step": 1339 |
| }, |
| { |
| "epoch": 4.525584333543904, |
| "grad_norm": 0.06973071855720024, |
| "learning_rate": 2.1608659777703033e-06, |
| "loss": 0.3486, |
| "step": 1340 |
| }, |
| { |
| "epoch": 4.5289534638871345, |
| "grad_norm": 0.06547912030107868, |
| "learning_rate": 2.130382724493405e-06, |
| "loss": 0.3481, |
| "step": 1341 |
| }, |
| { |
| "epoch": 4.532322594230364, |
| "grad_norm": 0.06796161455803124, |
| "learning_rate": 2.100110131182813e-06, |
| "loss": 0.3488, |
| "step": 1342 |
| }, |
| { |
| "epoch": 4.535691724573595, |
| "grad_norm": 0.06643717641974535, |
| "learning_rate": 2.070048366238e-06, |
| "loss": 0.3453, |
| "step": 1343 |
| }, |
| { |
| "epoch": 4.5390608549168245, |
| "grad_norm": 0.0657312313993076, |
| "learning_rate": 2.0401975968856514e-06, |
| "loss": 0.3364, |
| "step": 1344 |
| }, |
| { |
| "epoch": 4.542429985260055, |
| "grad_norm": 0.0662991056630753, |
| "learning_rate": 2.010557989178725e-06, |
| "loss": 0.3456, |
| "step": 1345 |
| }, |
| { |
| "epoch": 4.545799115603285, |
| "grad_norm": 0.06723548381525182, |
| "learning_rate": 1.981129707995542e-06, |
| "loss": 0.3428, |
| "step": 1346 |
| }, |
| { |
| "epoch": 4.549168245946515, |
| "grad_norm": 0.06854275132803765, |
| "learning_rate": 1.9519129170388496e-06, |
| "loss": 0.3519, |
| "step": 1347 |
| }, |
| { |
| "epoch": 4.552537376289745, |
| "grad_norm": 0.0687917997485082, |
| "learning_rate": 1.9229077788349393e-06, |
| "loss": 0.342, |
| "step": 1348 |
| }, |
| { |
| "epoch": 4.555906506632976, |
| "grad_norm": 0.06728042882661939, |
| "learning_rate": 1.8941144547327228e-06, |
| "loss": 0.3513, |
| "step": 1349 |
| }, |
| { |
| "epoch": 4.5592756369762055, |
| "grad_norm": 0.06733086071107253, |
| "learning_rate": 1.865533104902828e-06, |
| "loss": 0.3432, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.562644767319435, |
| "grad_norm": 0.06653132755662035, |
| "learning_rate": 1.8371638883367371e-06, |
| "loss": 0.3455, |
| "step": 1351 |
| }, |
| { |
| "epoch": 4.566013897662666, |
| "grad_norm": 0.07062467690102314, |
| "learning_rate": 1.8090069628458583e-06, |
| "loss": 0.3513, |
| "step": 1352 |
| }, |
| { |
| "epoch": 4.5693830280058965, |
| "grad_norm": 0.06749739958232552, |
| "learning_rate": 1.7810624850607007e-06, |
| "loss": 0.3422, |
| "step": 1353 |
| }, |
| { |
| "epoch": 4.572752158349126, |
| "grad_norm": 0.06715174264716953, |
| "learning_rate": 1.7533306104299663e-06, |
| "loss": 0.3427, |
| "step": 1354 |
| }, |
| { |
| "epoch": 4.576121288692356, |
| "grad_norm": 0.06825607468688703, |
| "learning_rate": 1.7258114932196824e-06, |
| "loss": 0.3484, |
| "step": 1355 |
| }, |
| { |
| "epoch": 4.5794904190355865, |
| "grad_norm": 0.0662384762896948, |
| "learning_rate": 1.6985052865123641e-06, |
| "loss": 0.344, |
| "step": 1356 |
| }, |
| { |
| "epoch": 4.582859549378816, |
| "grad_norm": 0.06749795339121123, |
| "learning_rate": 1.6714121422061636e-06, |
| "loss": 0.348, |
| "step": 1357 |
| }, |
| { |
| "epoch": 4.586228679722047, |
| "grad_norm": 0.06937799589584792, |
| "learning_rate": 1.6445322110140116e-06, |
| "loss": 0.3473, |
| "step": 1358 |
| }, |
| { |
| "epoch": 4.589597810065277, |
| "grad_norm": 0.06748221547140407, |
| "learning_rate": 1.617865642462766e-06, |
| "loss": 0.3414, |
| "step": 1359 |
| }, |
| { |
| "epoch": 4.592966940408507, |
| "grad_norm": 0.06814928775630703, |
| "learning_rate": 1.59141258489242e-06, |
| "loss": 0.345, |
| "step": 1360 |
| }, |
| { |
| "epoch": 4.596336070751737, |
| "grad_norm": 0.07057379791962957, |
| "learning_rate": 1.5651731854552466e-06, |
| "loss": 0.3432, |
| "step": 1361 |
| }, |
| { |
| "epoch": 4.5997052010949675, |
| "grad_norm": 0.06665029276024906, |
| "learning_rate": 1.53914759011498e-06, |
| "loss": 0.3524, |
| "step": 1362 |
| }, |
| { |
| "epoch": 4.603074331438197, |
| "grad_norm": 0.06906650342043347, |
| "learning_rate": 1.513335943646026e-06, |
| "loss": 0.3457, |
| "step": 1363 |
| }, |
| { |
| "epoch": 4.606443461781428, |
| "grad_norm": 0.06942705785663987, |
| "learning_rate": 1.4877383896326269e-06, |
| "loss": 0.3435, |
| "step": 1364 |
| }, |
| { |
| "epoch": 4.6098125921246575, |
| "grad_norm": 0.06819335124159634, |
| "learning_rate": 1.4623550704680889e-06, |
| "loss": 0.3508, |
| "step": 1365 |
| }, |
| { |
| "epoch": 4.613181722467888, |
| "grad_norm": 0.06742489592183823, |
| "learning_rate": 1.4371861273539778e-06, |
| "loss": 0.3457, |
| "step": 1366 |
| }, |
| { |
| "epoch": 4.616550852811118, |
| "grad_norm": 0.064467972456891, |
| "learning_rate": 1.4122317002993247e-06, |
| "loss": 0.3437, |
| "step": 1367 |
| }, |
| { |
| "epoch": 4.6199199831543485, |
| "grad_norm": 0.06450585611276803, |
| "learning_rate": 1.3874919281198662e-06, |
| "loss": 0.3471, |
| "step": 1368 |
| }, |
| { |
| "epoch": 4.623289113497578, |
| "grad_norm": 0.06675137780602221, |
| "learning_rate": 1.3629669484372722e-06, |
| "loss": 0.3497, |
| "step": 1369 |
| }, |
| { |
| "epoch": 4.626658243840809, |
| "grad_norm": 0.06713388756947067, |
| "learning_rate": 1.3386568976783453e-06, |
| "loss": 0.3423, |
| "step": 1370 |
| }, |
| { |
| "epoch": 4.6300273741840385, |
| "grad_norm": 0.0647734710561896, |
| "learning_rate": 1.3145619110743169e-06, |
| "loss": 0.3451, |
| "step": 1371 |
| }, |
| { |
| "epoch": 4.633396504527269, |
| "grad_norm": 0.06580879452568121, |
| "learning_rate": 1.2906821226600453e-06, |
| "loss": 0.3429, |
| "step": 1372 |
| }, |
| { |
| "epoch": 4.636765634870499, |
| "grad_norm": 0.06578978457756152, |
| "learning_rate": 1.2670176652733023e-06, |
| "loss": 0.342, |
| "step": 1373 |
| }, |
| { |
| "epoch": 4.6401347652137295, |
| "grad_norm": 0.06786565921397064, |
| "learning_rate": 1.2435686705540228e-06, |
| "loss": 0.3458, |
| "step": 1374 |
| }, |
| { |
| "epoch": 4.643503895556959, |
| "grad_norm": 0.06730192180307096, |
| "learning_rate": 1.2203352689435532e-06, |
| "loss": 0.3505, |
| "step": 1375 |
| }, |
| { |
| "epoch": 4.64687302590019, |
| "grad_norm": 0.06442684402191479, |
| "learning_rate": 1.1973175896839684e-06, |
| "loss": 0.3417, |
| "step": 1376 |
| }, |
| { |
| "epoch": 4.6502421562434195, |
| "grad_norm": 0.06497046470832643, |
| "learning_rate": 1.1745157608173253e-06, |
| "loss": 0.3429, |
| "step": 1377 |
| }, |
| { |
| "epoch": 4.65361128658665, |
| "grad_norm": 0.0655614246650691, |
| "learning_rate": 1.1519299091849523e-06, |
| "loss": 0.3405, |
| "step": 1378 |
| }, |
| { |
| "epoch": 4.65698041692988, |
| "grad_norm": 0.06746924444935623, |
| "learning_rate": 1.1295601604267348e-06, |
| "loss": 0.347, |
| "step": 1379 |
| }, |
| { |
| "epoch": 4.6603495472731105, |
| "grad_norm": 0.06671677812012947, |
| "learning_rate": 1.1074066389804395e-06, |
| "loss": 0.348, |
| "step": 1380 |
| }, |
| { |
| "epoch": 4.66371867761634, |
| "grad_norm": 0.06798688584484958, |
| "learning_rate": 1.0854694680810175e-06, |
| "loss": 0.3468, |
| "step": 1381 |
| }, |
| { |
| "epoch": 4.667087807959571, |
| "grad_norm": 0.06373690906496436, |
| "learning_rate": 1.0637487697598937e-06, |
| "loss": 0.3391, |
| "step": 1382 |
| }, |
| { |
| "epoch": 4.6704569383028005, |
| "grad_norm": 0.06902986516002681, |
| "learning_rate": 1.0422446648443142e-06, |
| "loss": 0.3449, |
| "step": 1383 |
| }, |
| { |
| "epoch": 4.673826068646031, |
| "grad_norm": 0.06783886040134948, |
| "learning_rate": 1.0209572729566708e-06, |
| "loss": 0.3469, |
| "step": 1384 |
| }, |
| { |
| "epoch": 4.677195198989261, |
| "grad_norm": 0.06789415607732335, |
| "learning_rate": 9.998867125138223e-07, |
| "loss": 0.3483, |
| "step": 1385 |
| }, |
| { |
| "epoch": 4.680564329332491, |
| "grad_norm": 0.06478682570392917, |
| "learning_rate": 9.790331007264543e-07, |
| "loss": 0.3465, |
| "step": 1386 |
| }, |
| { |
| "epoch": 4.683933459675721, |
| "grad_norm": 0.06659198241596209, |
| "learning_rate": 9.583965535983997e-07, |
| "loss": 0.3377, |
| "step": 1387 |
| }, |
| { |
| "epoch": 4.687302590018952, |
| "grad_norm": 0.06679774424195298, |
| "learning_rate": 9.379771859260267e-07, |
| "loss": 0.3474, |
| "step": 1388 |
| }, |
| { |
| "epoch": 4.6906717203621815, |
| "grad_norm": 0.06562337466888649, |
| "learning_rate": 9.177751112975853e-07, |
| "loss": 0.3378, |
| "step": 1389 |
| }, |
| { |
| "epoch": 4.694040850705411, |
| "grad_norm": 0.0643058634496552, |
| "learning_rate": 8.977904420925543e-07, |
| "loss": 0.3401, |
| "step": 1390 |
| }, |
| { |
| "epoch": 4.697409981048642, |
| "grad_norm": 0.06520681777558435, |
| "learning_rate": 8.780232894810558e-07, |
| "loss": 0.3476, |
| "step": 1391 |
| }, |
| { |
| "epoch": 4.700779111391872, |
| "grad_norm": 0.06652677782803126, |
| "learning_rate": 8.584737634232154e-07, |
| "loss": 0.3445, |
| "step": 1392 |
| }, |
| { |
| "epoch": 4.704148241735102, |
| "grad_norm": 0.06513347952901734, |
| "learning_rate": 8.391419726685446e-07, |
| "loss": 0.3486, |
| "step": 1393 |
| }, |
| { |
| "epoch": 4.707517372078332, |
| "grad_norm": 0.06577657248355921, |
| "learning_rate": 8.200280247553461e-07, |
| "loss": 0.3461, |
| "step": 1394 |
| }, |
| { |
| "epoch": 4.7108865024215625, |
| "grad_norm": 0.06369190711960318, |
| "learning_rate": 8.011320260101052e-07, |
| "loss": 0.3478, |
| "step": 1395 |
| }, |
| { |
| "epoch": 4.714255632764792, |
| "grad_norm": 0.06569207225402134, |
| "learning_rate": 7.824540815469306e-07, |
| "loss": 0.3496, |
| "step": 1396 |
| }, |
| { |
| "epoch": 4.717624763108023, |
| "grad_norm": 0.0636558204987421, |
| "learning_rate": 7.639942952669232e-07, |
| "loss": 0.3462, |
| "step": 1397 |
| }, |
| { |
| "epoch": 4.7209938934512525, |
| "grad_norm": 0.06451389941556673, |
| "learning_rate": 7.457527698576217e-07, |
| "loss": 0.3454, |
| "step": 1398 |
| }, |
| { |
| "epoch": 4.724363023794483, |
| "grad_norm": 0.06490245056573639, |
| "learning_rate": 7.277296067924377e-07, |
| "loss": 0.345, |
| "step": 1399 |
| }, |
| { |
| "epoch": 4.727732154137713, |
| "grad_norm": 0.06421211046867673, |
| "learning_rate": 7.099249063300751e-07, |
| "loss": 0.3509, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.7311012844809435, |
| "grad_norm": 0.06376468633122387, |
| "learning_rate": 6.923387675139958e-07, |
| "loss": 0.3449, |
| "step": 1401 |
| }, |
| { |
| "epoch": 4.734470414824173, |
| "grad_norm": 0.06306595288457956, |
| "learning_rate": 6.749712881718306e-07, |
| "loss": 0.3438, |
| "step": 1402 |
| }, |
| { |
| "epoch": 4.737839545167404, |
| "grad_norm": 0.064531257088043, |
| "learning_rate": 6.578225649148806e-07, |
| "loss": 0.3459, |
| "step": 1403 |
| }, |
| { |
| "epoch": 4.7412086755106335, |
| "grad_norm": 0.06475033645731526, |
| "learning_rate": 6.408926931375403e-07, |
| "loss": 0.3489, |
| "step": 1404 |
| }, |
| { |
| "epoch": 4.744577805853864, |
| "grad_norm": 0.06725279891073008, |
| "learning_rate": 6.241817670167961e-07, |
| "loss": 0.3517, |
| "step": 1405 |
| }, |
| { |
| "epoch": 4.747946936197094, |
| "grad_norm": 0.06576628806576036, |
| "learning_rate": 6.076898795116792e-07, |
| "loss": 0.3476, |
| "step": 1406 |
| }, |
| { |
| "epoch": 4.7513160665403245, |
| "grad_norm": 0.06636084321383787, |
| "learning_rate": 5.914171223627652e-07, |
| "loss": 0.3431, |
| "step": 1407 |
| }, |
| { |
| "epoch": 4.754685196883554, |
| "grad_norm": 0.06307439592979396, |
| "learning_rate": 5.753635860916617e-07, |
| "loss": 0.344, |
| "step": 1408 |
| }, |
| { |
| "epoch": 4.758054327226785, |
| "grad_norm": 0.06354853186497929, |
| "learning_rate": 5.595293600004948e-07, |
| "loss": 0.3452, |
| "step": 1409 |
| }, |
| { |
| "epoch": 4.7614234575700145, |
| "grad_norm": 0.06640861850363539, |
| "learning_rate": 5.43914532171419e-07, |
| "loss": 0.3498, |
| "step": 1410 |
| }, |
| { |
| "epoch": 4.764792587913245, |
| "grad_norm": 0.06432227056221736, |
| "learning_rate": 5.285191894661257e-07, |
| "loss": 0.3448, |
| "step": 1411 |
| }, |
| { |
| "epoch": 4.768161718256475, |
| "grad_norm": 0.0650298496723325, |
| "learning_rate": 5.133434175253715e-07, |
| "loss": 0.348, |
| "step": 1412 |
| }, |
| { |
| "epoch": 4.771530848599705, |
| "grad_norm": 0.0642338741687956, |
| "learning_rate": 4.983873007684769e-07, |
| "loss": 0.3504, |
| "step": 1413 |
| }, |
| { |
| "epoch": 4.774899978942935, |
| "grad_norm": 0.06597221985673193, |
| "learning_rate": 4.83650922392882e-07, |
| "loss": 0.3443, |
| "step": 1414 |
| }, |
| { |
| "epoch": 4.778269109286166, |
| "grad_norm": 0.06414310328903884, |
| "learning_rate": 4.691343643736579e-07, |
| "loss": 0.3498, |
| "step": 1415 |
| }, |
| { |
| "epoch": 4.7816382396293955, |
| "grad_norm": 0.06423727553913079, |
| "learning_rate": 4.5483770746309383e-07, |
| "loss": 0.3462, |
| "step": 1416 |
| }, |
| { |
| "epoch": 4.785007369972626, |
| "grad_norm": 0.06712703203955196, |
| "learning_rate": 4.4076103119018666e-07, |
| "loss": 0.344, |
| "step": 1417 |
| }, |
| { |
| "epoch": 4.788376500315856, |
| "grad_norm": 0.06406676946222813, |
| "learning_rate": 4.269044138602585e-07, |
| "loss": 0.3424, |
| "step": 1418 |
| }, |
| { |
| "epoch": 4.791745630659086, |
| "grad_norm": 0.0650048525731774, |
| "learning_rate": 4.132679325544775e-07, |
| "loss": 0.3434, |
| "step": 1419 |
| }, |
| { |
| "epoch": 4.795114761002316, |
| "grad_norm": 0.06381393163242242, |
| "learning_rate": 3.998516631294491e-07, |
| "loss": 0.3464, |
| "step": 1420 |
| }, |
| { |
| "epoch": 4.798483891345547, |
| "grad_norm": 0.062168147457412865, |
| "learning_rate": 3.866556802167942e-07, |
| "loss": 0.3447, |
| "step": 1421 |
| }, |
| { |
| "epoch": 4.8018530216887765, |
| "grad_norm": 0.06359774281703022, |
| "learning_rate": 3.736800572227317e-07, |
| "loss": 0.3452, |
| "step": 1422 |
| }, |
| { |
| "epoch": 4.805222152032007, |
| "grad_norm": 0.06777082256384792, |
| "learning_rate": 3.6092486632766543e-07, |
| "loss": 0.3405, |
| "step": 1423 |
| }, |
| { |
| "epoch": 4.808591282375237, |
| "grad_norm": 0.06518391137080269, |
| "learning_rate": 3.483901784857846e-07, |
| "loss": 0.3499, |
| "step": 1424 |
| }, |
| { |
| "epoch": 4.811960412718467, |
| "grad_norm": 0.06360491257484012, |
| "learning_rate": 3.3607606342467293e-07, |
| "loss": 0.3464, |
| "step": 1425 |
| }, |
| { |
| "epoch": 4.815329543061697, |
| "grad_norm": 0.0630016058736709, |
| "learning_rate": 3.239825896449267e-07, |
| "loss": 0.3493, |
| "step": 1426 |
| }, |
| { |
| "epoch": 4.818698673404928, |
| "grad_norm": 0.06424370898677036, |
| "learning_rate": 3.1210982441974623e-07, |
| "loss": 0.3424, |
| "step": 1427 |
| }, |
| { |
| "epoch": 4.8220678037481575, |
| "grad_norm": 0.06333420103209184, |
| "learning_rate": 3.004578337945985e-07, |
| "loss": 0.3444, |
| "step": 1428 |
| }, |
| { |
| "epoch": 4.825436934091387, |
| "grad_norm": 0.06413449663730773, |
| "learning_rate": 2.8902668258683043e-07, |
| "loss": 0.3465, |
| "step": 1429 |
| }, |
| { |
| "epoch": 4.828806064434618, |
| "grad_norm": 0.06372049815441223, |
| "learning_rate": 2.778164343852918e-07, |
| "loss": 0.3478, |
| "step": 1430 |
| }, |
| { |
| "epoch": 4.832175194777848, |
| "grad_norm": 0.06414269762017184, |
| "learning_rate": 2.668271515500287e-07, |
| "loss": 0.3502, |
| "step": 1431 |
| }, |
| { |
| "epoch": 4.835544325121078, |
| "grad_norm": 0.06533137367117652, |
| "learning_rate": 2.5605889521188364e-07, |
| "loss": 0.3491, |
| "step": 1432 |
| }, |
| { |
| "epoch": 4.838913455464308, |
| "grad_norm": 0.06350312986484183, |
| "learning_rate": 2.455117252721895e-07, |
| "loss": 0.3453, |
| "step": 1433 |
| }, |
| { |
| "epoch": 4.8422825858075385, |
| "grad_norm": 0.06475788404284327, |
| "learning_rate": 2.351857004024316e-07, |
| "loss": 0.3503, |
| "step": 1434 |
| }, |
| { |
| "epoch": 4.845651716150769, |
| "grad_norm": 0.0631781774805789, |
| "learning_rate": 2.2508087804390178e-07, |
| "loss": 0.3446, |
| "step": 1435 |
| }, |
| { |
| "epoch": 4.849020846493999, |
| "grad_norm": 0.06379282423784381, |
| "learning_rate": 2.1519731440740487e-07, |
| "loss": 0.3474, |
| "step": 1436 |
| }, |
| { |
| "epoch": 4.8523899768372285, |
| "grad_norm": 0.06402172658556064, |
| "learning_rate": 2.055350644729348e-07, |
| "loss": 0.3511, |
| "step": 1437 |
| }, |
| { |
| "epoch": 4.855759107180459, |
| "grad_norm": 0.06513215066751245, |
| "learning_rate": 1.9609418198935916e-07, |
| "loss": 0.3471, |
| "step": 1438 |
| }, |
| { |
| "epoch": 4.859128237523689, |
| "grad_norm": 0.06283559414952865, |
| "learning_rate": 1.8687471947413495e-07, |
| "loss": 0.3446, |
| "step": 1439 |
| }, |
| { |
| "epoch": 4.862497367866919, |
| "grad_norm": 0.06309493725276366, |
| "learning_rate": 1.778767282130156e-07, |
| "loss": 0.3431, |
| "step": 1440 |
| }, |
| { |
| "epoch": 4.865866498210149, |
| "grad_norm": 0.06560809934411752, |
| "learning_rate": 1.691002582597534e-07, |
| "loss": 0.3526, |
| "step": 1441 |
| }, |
| { |
| "epoch": 4.86923562855338, |
| "grad_norm": 0.06433417193452762, |
| "learning_rate": 1.6054535843582854e-07, |
| "loss": 0.3507, |
| "step": 1442 |
| }, |
| { |
| "epoch": 4.8726047588966095, |
| "grad_norm": 0.06442999780392818, |
| "learning_rate": 1.522120763301782e-07, |
| "loss": 0.3492, |
| "step": 1443 |
| }, |
| { |
| "epoch": 4.87597388923984, |
| "grad_norm": 0.06306148601810407, |
| "learning_rate": 1.4410045829893915e-07, |
| "loss": 0.3434, |
| "step": 1444 |
| }, |
| { |
| "epoch": 4.87934301958307, |
| "grad_norm": 0.06308220046755993, |
| "learning_rate": 1.3621054946517666e-07, |
| "loss": 0.3445, |
| "step": 1445 |
| }, |
| { |
| "epoch": 4.8827121499263, |
| "grad_norm": 0.06305097370353915, |
| "learning_rate": 1.2854239371863142e-07, |
| "loss": 0.3431, |
| "step": 1446 |
| }, |
| { |
| "epoch": 4.88608128026953, |
| "grad_norm": 0.06293090962933129, |
| "learning_rate": 1.2109603371548873e-07, |
| "loss": 0.3397, |
| "step": 1447 |
| }, |
| { |
| "epoch": 4.889450410612761, |
| "grad_norm": 0.06368330582611549, |
| "learning_rate": 1.1387151087814297e-07, |
| "loss": 0.3468, |
| "step": 1448 |
| }, |
| { |
| "epoch": 4.8928195409559905, |
| "grad_norm": 0.0642396525858067, |
| "learning_rate": 1.06868865394949e-07, |
| "loss": 0.3419, |
| "step": 1449 |
| }, |
| { |
| "epoch": 4.896188671299221, |
| "grad_norm": 0.06286580837917152, |
| "learning_rate": 1.0008813622001345e-07, |
| "loss": 0.3465, |
| "step": 1450 |
| }, |
| { |
| "epoch": 4.899557801642451, |
| "grad_norm": 0.0646704999704258, |
| "learning_rate": 9.352936107296817e-08, |
| "loss": 0.3515, |
| "step": 1451 |
| }, |
| { |
| "epoch": 4.902926931985681, |
| "grad_norm": 0.06254527612862122, |
| "learning_rate": 8.719257643877044e-08, |
| "loss": 0.3418, |
| "step": 1452 |
| }, |
| { |
| "epoch": 4.906296062328911, |
| "grad_norm": 0.06265534232163783, |
| "learning_rate": 8.107781756749866e-08, |
| "loss": 0.3417, |
| "step": 1453 |
| }, |
| { |
| "epoch": 4.909665192672142, |
| "grad_norm": 0.06417368994248919, |
| "learning_rate": 7.51851184741481e-08, |
| "loss": 0.3451, |
| "step": 1454 |
| }, |
| { |
| "epoch": 4.9130343230153715, |
| "grad_norm": 0.06427635001716354, |
| "learning_rate": 6.951451193844883e-08, |
| "loss": 0.3517, |
| "step": 1455 |
| }, |
| { |
| "epoch": 4.916403453358602, |
| "grad_norm": 0.06446286415220177, |
| "learning_rate": 6.40660295046791e-08, |
| "loss": 0.3499, |
| "step": 1456 |
| }, |
| { |
| "epoch": 4.919772583701832, |
| "grad_norm": 0.06325304997383964, |
| "learning_rate": 5.8839701481487875e-08, |
| "loss": 0.3437, |
| "step": 1457 |
| }, |
| { |
| "epoch": 4.923141714045062, |
| "grad_norm": 0.06376968784671593, |
| "learning_rate": 5.3835556941743695e-08, |
| "loss": 0.3423, |
| "step": 1458 |
| }, |
| { |
| "epoch": 4.926510844388292, |
| "grad_norm": 0.06529781285688359, |
| "learning_rate": 4.905362372234379e-08, |
| "loss": 0.3492, |
| "step": 1459 |
| }, |
| { |
| "epoch": 4.929879974731523, |
| "grad_norm": 0.06414078488995091, |
| "learning_rate": 4.449392842408529e-08, |
| "loss": 0.3479, |
| "step": 1460 |
| }, |
| { |
| "epoch": 4.9332491050747524, |
| "grad_norm": 0.06362859239383568, |
| "learning_rate": 4.015649641150976e-08, |
| "loss": 0.3492, |
| "step": 1461 |
| }, |
| { |
| "epoch": 4.936618235417983, |
| "grad_norm": 0.06341769294492185, |
| "learning_rate": 3.6041351812743374e-08, |
| "loss": 0.351, |
| "step": 1462 |
| }, |
| { |
| "epoch": 4.939987365761213, |
| "grad_norm": 0.06486183719402762, |
| "learning_rate": 3.21485175193903e-08, |
| "loss": 0.3511, |
| "step": 1463 |
| }, |
| { |
| "epoch": 4.943356496104443, |
| "grad_norm": 0.06360741943701602, |
| "learning_rate": 2.8478015186399477e-08, |
| "loss": 0.3471, |
| "step": 1464 |
| }, |
| { |
| "epoch": 4.946725626447673, |
| "grad_norm": 0.06343696624954866, |
| "learning_rate": 2.5029865231922524e-08, |
| "loss": 0.3448, |
| "step": 1465 |
| }, |
| { |
| "epoch": 4.950094756790904, |
| "grad_norm": 0.06343915127065658, |
| "learning_rate": 2.1804086837229344e-08, |
| "loss": 0.3416, |
| "step": 1466 |
| }, |
| { |
| "epoch": 4.953463887134133, |
| "grad_norm": 0.06487303485827695, |
| "learning_rate": 1.880069794657935e-08, |
| "loss": 0.3444, |
| "step": 1467 |
| }, |
| { |
| "epoch": 4.956833017477363, |
| "grad_norm": 0.062408603956769386, |
| "learning_rate": 1.601971526713708e-08, |
| "loss": 0.341, |
| "step": 1468 |
| }, |
| { |
| "epoch": 4.960202147820594, |
| "grad_norm": 0.06255760369115392, |
| "learning_rate": 1.3461154268865628e-08, |
| "loss": 0.3445, |
| "step": 1469 |
| }, |
| { |
| "epoch": 4.963571278163824, |
| "grad_norm": 0.062112638608570706, |
| "learning_rate": 1.112502918445113e-08, |
| "loss": 0.3391, |
| "step": 1470 |
| }, |
| { |
| "epoch": 4.966940408507054, |
| "grad_norm": 0.06398681422452646, |
| "learning_rate": 9.011353009222846e-09, |
| "loss": 0.3455, |
| "step": 1471 |
| }, |
| { |
| "epoch": 4.970309538850284, |
| "grad_norm": 0.0637738300165632, |
| "learning_rate": 7.12013750107321e-09, |
| "loss": 0.3438, |
| "step": 1472 |
| }, |
| { |
| "epoch": 4.973678669193514, |
| "grad_norm": 0.06456086790149927, |
| "learning_rate": 5.451393180400111e-09, |
| "loss": 0.3486, |
| "step": 1473 |
| }, |
| { |
| "epoch": 4.977047799536745, |
| "grad_norm": 0.06334490636848067, |
| "learning_rate": 4.00512933004471e-09, |
| "loss": 0.3456, |
| "step": 1474 |
| }, |
| { |
| "epoch": 4.980416929879975, |
| "grad_norm": 0.06295292438577572, |
| "learning_rate": 2.7813539952381563e-09, |
| "loss": 0.3445, |
| "step": 1475 |
| }, |
| { |
| "epoch": 4.9837860602232045, |
| "grad_norm": 0.0633108315280129, |
| "learning_rate": 1.7800739835616143e-09, |
| "loss": 0.3451, |
| "step": 1476 |
| }, |
| { |
| "epoch": 4.987155190566435, |
| "grad_norm": 0.0630218856533905, |
| "learning_rate": 1.0012948649018584e-09, |
| "loss": 0.3497, |
| "step": 1477 |
| }, |
| { |
| "epoch": 4.990524320909666, |
| "grad_norm": 0.06351551674205162, |
| "learning_rate": 4.450209714379483e-10, |
| "loss": 0.3382, |
| "step": 1478 |
| }, |
| { |
| "epoch": 4.993893451252895, |
| "grad_norm": 0.06362931363383374, |
| "learning_rate": 1.1125539757905756e-10, |
| "loss": 0.3436, |
| "step": 1479 |
| }, |
| { |
| "epoch": 4.997262581596125, |
| "grad_norm": 0.0635855435860357, |
| "learning_rate": 0.0, |
| "loss": 0.3456, |
| "step": 1480 |
| }, |
| { |
| "epoch": 4.997262581596125, |
| "step": 1480, |
| "total_flos": 3.94117975967185e+19, |
| "train_loss": 0.06913654437741718, |
| "train_runtime": 69116.03, |
| "train_samples_per_second": 10.993, |
| "train_steps_per_second": 0.021 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1480, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.94117975967185e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|