| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1173, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0025575447570332483, |
| "grad_norm": 60.07620508040347, |
| "learning_rate": 0.0, |
| "loss": 10.9714, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005115089514066497, |
| "grad_norm": 60.511635982681035, |
| "learning_rate": 4.2372881355932204e-07, |
| "loss": 11.044, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0076726342710997444, |
| "grad_norm": 61.57012701086648, |
| "learning_rate": 8.474576271186441e-07, |
| "loss": 10.9687, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.010230179028132993, |
| "grad_norm": 62.423863746635334, |
| "learning_rate": 1.2711864406779662e-06, |
| "loss": 10.9132, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01278772378516624, |
| "grad_norm": 60.51018546257131, |
| "learning_rate": 1.6949152542372882e-06, |
| "loss": 11.0108, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.015345268542199489, |
| "grad_norm": 66.2795306712718, |
| "learning_rate": 2.11864406779661e-06, |
| "loss": 10.7022, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.017902813299232736, |
| "grad_norm": 68.66164801562074, |
| "learning_rate": 2.5423728813559323e-06, |
| "loss": 10.6058, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.020460358056265986, |
| "grad_norm": 107.4660149943893, |
| "learning_rate": 2.9661016949152545e-06, |
| "loss": 9.0593, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.023017902813299233, |
| "grad_norm": 122.48386436910788, |
| "learning_rate": 3.3898305084745763e-06, |
| "loss": 8.4522, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02557544757033248, |
| "grad_norm": 125.82848908671042, |
| "learning_rate": 3.813559322033899e-06, |
| "loss": 5.6693, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.028132992327365727, |
| "grad_norm": 52.58888004444451, |
| "learning_rate": 4.23728813559322e-06, |
| "loss": 3.0629, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.030690537084398978, |
| "grad_norm": 37.39340585668415, |
| "learning_rate": 4.6610169491525425e-06, |
| "loss": 2.376, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03324808184143223, |
| "grad_norm": 28.735337125133064, |
| "learning_rate": 5.084745762711865e-06, |
| "loss": 2.1006, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03580562659846547, |
| "grad_norm": 6.3291764630351315, |
| "learning_rate": 5.508474576271187e-06, |
| "loss": 1.2756, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03836317135549872, |
| "grad_norm": 4.690308248334096, |
| "learning_rate": 5.932203389830509e-06, |
| "loss": 1.2509, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.04092071611253197, |
| "grad_norm": 3.5468348254384843, |
| "learning_rate": 6.3559322033898304e-06, |
| "loss": 1.1712, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.043478260869565216, |
| "grad_norm": 2.676492989643342, |
| "learning_rate": 6.779661016949153e-06, |
| "loss": 1.055, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04603580562659847, |
| "grad_norm": 2.1888510444313205, |
| "learning_rate": 7.203389830508475e-06, |
| "loss": 1.0324, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04859335038363171, |
| "grad_norm": 55.49598040447309, |
| "learning_rate": 7.627118644067798e-06, |
| "loss": 0.9577, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05115089514066496, |
| "grad_norm": 18.10939464017419, |
| "learning_rate": 8.050847457627118e-06, |
| "loss": 0.8841, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05370843989769821, |
| "grad_norm": 1.783845830738153, |
| "learning_rate": 8.47457627118644e-06, |
| "loss": 0.8704, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.056265984654731455, |
| "grad_norm": 1.2295478253717957, |
| "learning_rate": 8.898305084745763e-06, |
| "loss": 0.829, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 1.0279978849315632, |
| "learning_rate": 9.322033898305085e-06, |
| "loss": 0.8196, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.061381074168797956, |
| "grad_norm": 0.8982739673565904, |
| "learning_rate": 9.745762711864407e-06, |
| "loss": 0.7903, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0639386189258312, |
| "grad_norm": 0.7588801023963194, |
| "learning_rate": 1.016949152542373e-05, |
| "loss": 0.7177, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.06649616368286446, |
| "grad_norm": 1.0123370131062162, |
| "learning_rate": 1.0593220338983052e-05, |
| "loss": 0.7536, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.06905370843989769, |
| "grad_norm": 0.7910316066634632, |
| "learning_rate": 1.1016949152542374e-05, |
| "loss": 0.6874, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07161125319693094, |
| "grad_norm": 0.7192937721653079, |
| "learning_rate": 1.1440677966101696e-05, |
| "loss": 0.6942, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0741687979539642, |
| "grad_norm": 0.6367048650637959, |
| "learning_rate": 1.1864406779661018e-05, |
| "loss": 0.652, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.07672634271099744, |
| "grad_norm": 0.6890008346231932, |
| "learning_rate": 1.228813559322034e-05, |
| "loss": 0.6527, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0792838874680307, |
| "grad_norm": 0.7018774861427414, |
| "learning_rate": 1.2711864406779661e-05, |
| "loss": 0.6389, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.08184143222506395, |
| "grad_norm": 0.6934531307251741, |
| "learning_rate": 1.3135593220338985e-05, |
| "loss": 0.6612, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.08439897698209718, |
| "grad_norm": 0.4547490187162451, |
| "learning_rate": 1.3559322033898305e-05, |
| "loss": 0.6272, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.08695652173913043, |
| "grad_norm": 0.5411681099025528, |
| "learning_rate": 1.3983050847457627e-05, |
| "loss": 0.6326, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.08951406649616368, |
| "grad_norm": 0.5591394716745298, |
| "learning_rate": 1.440677966101695e-05, |
| "loss": 0.6139, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.09207161125319693, |
| "grad_norm": 0.4569913550931653, |
| "learning_rate": 1.4830508474576272e-05, |
| "loss": 0.6073, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.09462915601023018, |
| "grad_norm": 0.4147309558729621, |
| "learning_rate": 1.5254237288135596e-05, |
| "loss": 0.6017, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.09718670076726342, |
| "grad_norm": 0.44578293274404973, |
| "learning_rate": 1.5677966101694916e-05, |
| "loss": 0.578, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.09974424552429667, |
| "grad_norm": 0.44759576906101894, |
| "learning_rate": 1.6101694915254237e-05, |
| "loss": 0.5725, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.10230179028132992, |
| "grad_norm": 0.521441753506374, |
| "learning_rate": 1.652542372881356e-05, |
| "loss": 0.6091, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.10485933503836317, |
| "grad_norm": 0.3633683810476169, |
| "learning_rate": 1.694915254237288e-05, |
| "loss": 0.591, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.10741687979539642, |
| "grad_norm": 0.38875293035716313, |
| "learning_rate": 1.7372881355932205e-05, |
| "loss": 0.5684, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.10997442455242967, |
| "grad_norm": 0.4050488399781334, |
| "learning_rate": 1.7796610169491526e-05, |
| "loss": 0.5604, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.11253196930946291, |
| "grad_norm": 0.35484531528744356, |
| "learning_rate": 1.8220338983050846e-05, |
| "loss": 0.5588, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.11508951406649616, |
| "grad_norm": 0.3558009349640067, |
| "learning_rate": 1.864406779661017e-05, |
| "loss": 0.5772, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 0.3631599278698065, |
| "learning_rate": 1.906779661016949e-05, |
| "loss": 0.5567, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.12020460358056266, |
| "grad_norm": 0.29178893481388374, |
| "learning_rate": 1.9491525423728814e-05, |
| "loss": 0.5575, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.12276214833759591, |
| "grad_norm": 0.28512370332661957, |
| "learning_rate": 1.9915254237288135e-05, |
| "loss": 0.545, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.12531969309462915, |
| "grad_norm": 0.33383686916439004, |
| "learning_rate": 2.033898305084746e-05, |
| "loss": 0.5395, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1278772378516624, |
| "grad_norm": 0.3302302589173117, |
| "learning_rate": 2.076271186440678e-05, |
| "loss": 0.5654, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13043478260869565, |
| "grad_norm": 0.25804408924344063, |
| "learning_rate": 2.1186440677966103e-05, |
| "loss": 0.545, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.1329923273657289, |
| "grad_norm": 0.27338682999676506, |
| "learning_rate": 2.1610169491525427e-05, |
| "loss": 0.5417, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.13554987212276215, |
| "grad_norm": 0.25924856640229854, |
| "learning_rate": 2.2033898305084748e-05, |
| "loss": 0.5435, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.13810741687979539, |
| "grad_norm": 0.25667969517909306, |
| "learning_rate": 2.245762711864407e-05, |
| "loss": 0.5027, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.14066496163682865, |
| "grad_norm": 0.2651721483714715, |
| "learning_rate": 2.2881355932203392e-05, |
| "loss": 0.5148, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1432225063938619, |
| "grad_norm": 0.2695589091283933, |
| "learning_rate": 2.3305084745762712e-05, |
| "loss": 0.5421, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.14578005115089515, |
| "grad_norm": 0.2660946246807775, |
| "learning_rate": 2.3728813559322036e-05, |
| "loss": 0.5302, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1483375959079284, |
| "grad_norm": 0.2572598707834026, |
| "learning_rate": 2.4152542372881357e-05, |
| "loss": 0.5494, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.15089514066496162, |
| "grad_norm": 0.25796653370038297, |
| "learning_rate": 2.457627118644068e-05, |
| "loss": 0.5173, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1534526854219949, |
| "grad_norm": 0.26719666930574326, |
| "learning_rate": 2.5e-05, |
| "loss": 0.5318, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15601023017902813, |
| "grad_norm": 0.2415395019191131, |
| "learning_rate": 2.5423728813559322e-05, |
| "loss": 0.533, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1585677749360614, |
| "grad_norm": 0.2731503593131359, |
| "learning_rate": 2.5847457627118642e-05, |
| "loss": 0.5138, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.16112531969309463, |
| "grad_norm": 0.23021339667231472, |
| "learning_rate": 2.627118644067797e-05, |
| "loss": 0.506, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1636828644501279, |
| "grad_norm": 0.2438183399920384, |
| "learning_rate": 2.669491525423729e-05, |
| "loss": 0.4933, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.16624040920716113, |
| "grad_norm": 0.25625774549395297, |
| "learning_rate": 2.711864406779661e-05, |
| "loss": 0.5275, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.16879795396419436, |
| "grad_norm": 0.2523483723490555, |
| "learning_rate": 2.754237288135593e-05, |
| "loss": 0.517, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.17135549872122763, |
| "grad_norm": 0.24599282565528238, |
| "learning_rate": 2.7966101694915255e-05, |
| "loss": 0.5105, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.17391304347826086, |
| "grad_norm": 0.25271072320627247, |
| "learning_rate": 2.838983050847458e-05, |
| "loss": 0.4947, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 0.26800675870234536, |
| "learning_rate": 2.88135593220339e-05, |
| "loss": 0.5018, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.17902813299232737, |
| "grad_norm": 0.22967309445842915, |
| "learning_rate": 2.9237288135593223e-05, |
| "loss": 0.5127, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1815856777493606, |
| "grad_norm": 0.2936501608494599, |
| "learning_rate": 2.9661016949152544e-05, |
| "loss": 0.5067, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.18414322250639387, |
| "grad_norm": 0.3944135766030376, |
| "learning_rate": 3.0084745762711864e-05, |
| "loss": 0.5189, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1867007672634271, |
| "grad_norm": 0.266923293934136, |
| "learning_rate": 3.050847457627119e-05, |
| "loss": 0.5099, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.18925831202046037, |
| "grad_norm": 0.25718984553900326, |
| "learning_rate": 3.093220338983051e-05, |
| "loss": 0.5024, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1918158567774936, |
| "grad_norm": 0.23516139958516855, |
| "learning_rate": 3.135593220338983e-05, |
| "loss": 0.4961, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.19437340153452684, |
| "grad_norm": 0.2629972539950733, |
| "learning_rate": 3.177966101694915e-05, |
| "loss": 0.4858, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1969309462915601, |
| "grad_norm": 0.2397591843698089, |
| "learning_rate": 3.2203389830508473e-05, |
| "loss": 0.5022, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.19948849104859334, |
| "grad_norm": 0.2488143296082389, |
| "learning_rate": 3.26271186440678e-05, |
| "loss": 0.5008, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2020460358056266, |
| "grad_norm": 0.284022517588893, |
| "learning_rate": 3.305084745762712e-05, |
| "loss": 0.4944, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.20460358056265984, |
| "grad_norm": 0.2585535341280856, |
| "learning_rate": 3.347457627118644e-05, |
| "loss": 0.4681, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2071611253196931, |
| "grad_norm": 0.27227808307258267, |
| "learning_rate": 3.389830508474576e-05, |
| "loss": 0.4798, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.20971867007672634, |
| "grad_norm": 0.27943220348506814, |
| "learning_rate": 3.432203389830508e-05, |
| "loss": 0.4869, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.21227621483375958, |
| "grad_norm": 0.2591147558052403, |
| "learning_rate": 3.474576271186441e-05, |
| "loss": 0.5002, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.21483375959079284, |
| "grad_norm": 0.26199419848962174, |
| "learning_rate": 3.516949152542373e-05, |
| "loss": 0.4848, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.21739130434782608, |
| "grad_norm": 0.2560452706817345, |
| "learning_rate": 3.559322033898305e-05, |
| "loss": 0.4796, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.21994884910485935, |
| "grad_norm": 0.3104926180958261, |
| "learning_rate": 3.601694915254237e-05, |
| "loss": 0.4857, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.22250639386189258, |
| "grad_norm": 0.2595037856684306, |
| "learning_rate": 3.644067796610169e-05, |
| "loss": 0.4786, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.22506393861892582, |
| "grad_norm": 0.28985166506581866, |
| "learning_rate": 3.686440677966102e-05, |
| "loss": 0.4733, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.22762148337595908, |
| "grad_norm": 0.2900856188045173, |
| "learning_rate": 3.728813559322034e-05, |
| "loss": 0.4893, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.23017902813299232, |
| "grad_norm": 0.3181961782523891, |
| "learning_rate": 3.771186440677966e-05, |
| "loss": 0.4836, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.23273657289002558, |
| "grad_norm": 0.3524322519656808, |
| "learning_rate": 3.813559322033898e-05, |
| "loss": 0.4858, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 0.277143774625197, |
| "learning_rate": 3.855932203389831e-05, |
| "loss": 0.4602, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.23785166240409208, |
| "grad_norm": 0.3152846596099472, |
| "learning_rate": 3.898305084745763e-05, |
| "loss": 0.4861, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.24040920716112532, |
| "grad_norm": 0.3108040600900486, |
| "learning_rate": 3.940677966101695e-05, |
| "loss": 0.4735, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.24296675191815856, |
| "grad_norm": 0.3636456936928106, |
| "learning_rate": 3.983050847457627e-05, |
| "loss": 0.4927, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.24552429667519182, |
| "grad_norm": 0.281719824056263, |
| "learning_rate": 4.025423728813559e-05, |
| "loss": 0.478, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.24808184143222506, |
| "grad_norm": 0.31572505604740536, |
| "learning_rate": 4.067796610169492e-05, |
| "loss": 0.4782, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2506393861892583, |
| "grad_norm": 0.3265923715391404, |
| "learning_rate": 4.110169491525424e-05, |
| "loss": 0.4769, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2531969309462916, |
| "grad_norm": 0.28803267079398887, |
| "learning_rate": 4.152542372881356e-05, |
| "loss": 0.4729, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2557544757033248, |
| "grad_norm": 0.3650171432061163, |
| "learning_rate": 4.1949152542372886e-05, |
| "loss": 0.4686, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.25831202046035806, |
| "grad_norm": 0.3208885876586653, |
| "learning_rate": 4.2372881355932206e-05, |
| "loss": 0.4756, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2608695652173913, |
| "grad_norm": 0.3018386311182313, |
| "learning_rate": 4.279661016949153e-05, |
| "loss": 0.4898, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.26342710997442453, |
| "grad_norm": 0.35043017471200005, |
| "learning_rate": 4.3220338983050854e-05, |
| "loss": 0.4791, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.2659846547314578, |
| "grad_norm": 0.34067263771788764, |
| "learning_rate": 4.3644067796610175e-05, |
| "loss": 0.4605, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.26854219948849106, |
| "grad_norm": 0.30101429979539257, |
| "learning_rate": 4.4067796610169495e-05, |
| "loss": 0.4736, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2710997442455243, |
| "grad_norm": 0.30707206512082585, |
| "learning_rate": 4.4491525423728816e-05, |
| "loss": 0.4822, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.27365728900255754, |
| "grad_norm": 0.39306930698809855, |
| "learning_rate": 4.491525423728814e-05, |
| "loss": 0.4586, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.27621483375959077, |
| "grad_norm": 0.2793625552949932, |
| "learning_rate": 4.533898305084746e-05, |
| "loss": 0.4824, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.27877237851662406, |
| "grad_norm": 0.39226221347711837, |
| "learning_rate": 4.5762711864406784e-05, |
| "loss": 0.4576, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2813299232736573, |
| "grad_norm": 0.3030667831941101, |
| "learning_rate": 4.6186440677966104e-05, |
| "loss": 0.4624, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.28388746803069054, |
| "grad_norm": 0.3273613222535301, |
| "learning_rate": 4.6610169491525425e-05, |
| "loss": 0.4799, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2864450127877238, |
| "grad_norm": 0.2863063658186757, |
| "learning_rate": 4.703389830508475e-05, |
| "loss": 0.4669, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.289002557544757, |
| "grad_norm": 0.33232608459400076, |
| "learning_rate": 4.745762711864407e-05, |
| "loss": 0.4825, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.2915601023017903, |
| "grad_norm": 0.3600411712420216, |
| "learning_rate": 4.788135593220339e-05, |
| "loss": 0.4683, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 0.27761199193640784, |
| "learning_rate": 4.8305084745762714e-05, |
| "loss": 0.4755, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2966751918158568, |
| "grad_norm": 0.3144400589669658, |
| "learning_rate": 4.8728813559322034e-05, |
| "loss": 0.4566, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.29923273657289, |
| "grad_norm": 0.35513580765557595, |
| "learning_rate": 4.915254237288136e-05, |
| "loss": 0.459, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.30179028132992325, |
| "grad_norm": 0.2738899153960894, |
| "learning_rate": 4.957627118644068e-05, |
| "loss": 0.4657, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.30434782608695654, |
| "grad_norm": 0.3433568900683564, |
| "learning_rate": 5e-05, |
| "loss": 0.4594, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3069053708439898, |
| "grad_norm": 0.2765901256428289, |
| "learning_rate": 4.9952606635071094e-05, |
| "loss": 0.4754, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.309462915601023, |
| "grad_norm": 0.3551164959173657, |
| "learning_rate": 4.990521327014218e-05, |
| "loss": 0.4616, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.31202046035805625, |
| "grad_norm": 0.29005640287933204, |
| "learning_rate": 4.985781990521327e-05, |
| "loss": 0.4671, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3145780051150895, |
| "grad_norm": 0.3733034604083977, |
| "learning_rate": 4.981042654028436e-05, |
| "loss": 0.4891, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.3171355498721228, |
| "grad_norm": 0.28943964192066307, |
| "learning_rate": 4.976303317535545e-05, |
| "loss": 0.4658, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.319693094629156, |
| "grad_norm": 0.34596305401543703, |
| "learning_rate": 4.9715639810426544e-05, |
| "loss": 0.4601, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.32225063938618925, |
| "grad_norm": 0.30408705183314516, |
| "learning_rate": 4.9668246445497635e-05, |
| "loss": 0.4392, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.3248081841432225, |
| "grad_norm": 0.2934769724426176, |
| "learning_rate": 4.9620853080568726e-05, |
| "loss": 0.4755, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3273657289002558, |
| "grad_norm": 0.3194601339022468, |
| "learning_rate": 4.957345971563981e-05, |
| "loss": 0.455, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.329923273657289, |
| "grad_norm": 0.2765722150148559, |
| "learning_rate": 4.95260663507109e-05, |
| "loss": 0.4371, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.33248081841432225, |
| "grad_norm": 0.3098968524738735, |
| "learning_rate": 4.9478672985781994e-05, |
| "loss": 0.4479, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3350383631713555, |
| "grad_norm": 0.29058110177351354, |
| "learning_rate": 4.9431279620853085e-05, |
| "loss": 0.4638, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.3375959079283887, |
| "grad_norm": 0.34878186474460904, |
| "learning_rate": 4.938388625592417e-05, |
| "loss": 0.4589, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.340153452685422, |
| "grad_norm": 0.34103367199010814, |
| "learning_rate": 4.933649289099526e-05, |
| "loss": 0.4494, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.34271099744245526, |
| "grad_norm": 0.3024000321891373, |
| "learning_rate": 4.928909952606635e-05, |
| "loss": 0.4642, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3452685421994885, |
| "grad_norm": 0.3120266717266376, |
| "learning_rate": 4.9241706161137443e-05, |
| "loss": 0.4494, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 0.3437694967932959, |
| "learning_rate": 4.919431279620853e-05, |
| "loss": 0.4385, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.35038363171355497, |
| "grad_norm": 0.3561886653860422, |
| "learning_rate": 4.9146919431279626e-05, |
| "loss": 0.4503, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 0.3265621404114159, |
| "learning_rate": 4.909952606635072e-05, |
| "loss": 0.4528, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3554987212276215, |
| "grad_norm": 0.39021732468276327, |
| "learning_rate": 4.90521327014218e-05, |
| "loss": 0.47, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.35805626598465473, |
| "grad_norm": 0.2892525783443359, |
| "learning_rate": 4.900473933649289e-05, |
| "loss": 0.4425, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.36061381074168797, |
| "grad_norm": 0.35290539690783224, |
| "learning_rate": 4.8957345971563985e-05, |
| "loss": 0.4575, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3631713554987212, |
| "grad_norm": 0.4428368742434025, |
| "learning_rate": 4.8909952606635076e-05, |
| "loss": 0.4722, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.3657289002557545, |
| "grad_norm": 0.2735345303292492, |
| "learning_rate": 4.886255924170616e-05, |
| "loss": 0.4553, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.36828644501278773, |
| "grad_norm": 0.4438915131124858, |
| "learning_rate": 4.881516587677725e-05, |
| "loss": 0.4721, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.37084398976982097, |
| "grad_norm": 0.3281658095262752, |
| "learning_rate": 4.876777251184834e-05, |
| "loss": 0.4378, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3734015345268542, |
| "grad_norm": 0.3710338165695333, |
| "learning_rate": 4.8720379146919435e-05, |
| "loss": 0.4764, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.37595907928388744, |
| "grad_norm": 0.35926803120990913, |
| "learning_rate": 4.867298578199052e-05, |
| "loss": 0.4552, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.37851662404092073, |
| "grad_norm": 0.36794824845872526, |
| "learning_rate": 4.862559241706162e-05, |
| "loss": 0.4578, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.38107416879795397, |
| "grad_norm": 0.31318291286449124, |
| "learning_rate": 4.857819905213271e-05, |
| "loss": 0.4351, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.3836317135549872, |
| "grad_norm": 0.33033923224683864, |
| "learning_rate": 4.853080568720379e-05, |
| "loss": 0.4565, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.38618925831202044, |
| "grad_norm": 0.30424131577956276, |
| "learning_rate": 4.8483412322274884e-05, |
| "loss": 0.4403, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3887468030690537, |
| "grad_norm": 0.28074085140395005, |
| "learning_rate": 4.8436018957345976e-05, |
| "loss": 0.4486, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.391304347826087, |
| "grad_norm": 0.3579125827021185, |
| "learning_rate": 4.838862559241707e-05, |
| "loss": 0.4625, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3938618925831202, |
| "grad_norm": 0.3057863908214165, |
| "learning_rate": 4.834123222748815e-05, |
| "loss": 0.4361, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.39641943734015345, |
| "grad_norm": 0.28441580945568773, |
| "learning_rate": 4.829383886255924e-05, |
| "loss": 0.4341, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3989769820971867, |
| "grad_norm": 0.28566109258674055, |
| "learning_rate": 4.8246445497630334e-05, |
| "loss": 0.441, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.40153452685422, |
| "grad_norm": 0.3002441202365732, |
| "learning_rate": 4.819905213270142e-05, |
| "loss": 0.4329, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.4040920716112532, |
| "grad_norm": 0.3199646866784537, |
| "learning_rate": 4.815165876777251e-05, |
| "loss": 0.4446, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.40664961636828645, |
| "grad_norm": 0.2928518681501388, |
| "learning_rate": 4.810426540284361e-05, |
| "loss": 0.428, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.4092071611253197, |
| "grad_norm": 0.3946927235529595, |
| "learning_rate": 4.80568720379147e-05, |
| "loss": 0.4421, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 0.30774149759921665, |
| "learning_rate": 4.8009478672985784e-05, |
| "loss": 0.4679, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.4143222506393862, |
| "grad_norm": 0.3644907390867006, |
| "learning_rate": 4.7962085308056876e-05, |
| "loss": 0.4516, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.41687979539641945, |
| "grad_norm": 0.31614856501701377, |
| "learning_rate": 4.791469194312797e-05, |
| "loss": 0.4539, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.4194373401534527, |
| "grad_norm": 0.30645090377175094, |
| "learning_rate": 4.786729857819905e-05, |
| "loss": 0.4346, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.4219948849104859, |
| "grad_norm": 0.34220416537004444, |
| "learning_rate": 4.781990521327014e-05, |
| "loss": 0.4437, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.42455242966751916, |
| "grad_norm": 0.29009367411374415, |
| "learning_rate": 4.7772511848341234e-05, |
| "loss": 0.4478, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.42710997442455245, |
| "grad_norm": 0.3080387840957786, |
| "learning_rate": 4.7725118483412326e-05, |
| "loss": 0.4365, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.4296675191815857, |
| "grad_norm": 0.30741939240017874, |
| "learning_rate": 4.767772511848341e-05, |
| "loss": 0.4588, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.4322250639386189, |
| "grad_norm": 0.3198498782578863, |
| "learning_rate": 4.76303317535545e-05, |
| "loss": 0.438, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 0.34750707859647, |
| "learning_rate": 4.758293838862559e-05, |
| "loss": 0.4543, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4373401534526854, |
| "grad_norm": 0.3106322104274765, |
| "learning_rate": 4.7535545023696684e-05, |
| "loss": 0.4567, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.4398976982097187, |
| "grad_norm": 0.30192961843031885, |
| "learning_rate": 4.7488151658767775e-05, |
| "loss": 0.4342, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.4424552429667519, |
| "grad_norm": 0.28068686110702473, |
| "learning_rate": 4.744075829383887e-05, |
| "loss": 0.4246, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.44501278772378516, |
| "grad_norm": 0.343504552181982, |
| "learning_rate": 4.739336492890996e-05, |
| "loss": 0.4515, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.4475703324808184, |
| "grad_norm": 0.27995937978607677, |
| "learning_rate": 4.734597156398104e-05, |
| "loss": 0.4423, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.45012787723785164, |
| "grad_norm": 0.3040416539136848, |
| "learning_rate": 4.7298578199052134e-05, |
| "loss": 0.45, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.45268542199488493, |
| "grad_norm": 0.31835031373188166, |
| "learning_rate": 4.7251184834123225e-05, |
| "loss": 0.4532, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.45524296675191817, |
| "grad_norm": 0.3414414505648522, |
| "learning_rate": 4.720379146919432e-05, |
| "loss": 0.4498, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.4578005115089514, |
| "grad_norm": 0.3673972403213916, |
| "learning_rate": 4.71563981042654e-05, |
| "loss": 0.444, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.46035805626598464, |
| "grad_norm": 0.2994655863634162, |
| "learning_rate": 4.710900473933649e-05, |
| "loss": 0.4436, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4629156010230179, |
| "grad_norm": 0.2979340261572654, |
| "learning_rate": 4.7061611374407584e-05, |
| "loss": 0.4338, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.46547314578005117, |
| "grad_norm": 0.3259496116943633, |
| "learning_rate": 4.7014218009478675e-05, |
| "loss": 0.4495, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.4680306905370844, |
| "grad_norm": 0.23888073915231028, |
| "learning_rate": 4.6966824644549767e-05, |
| "loss": 0.4284, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 0.32689979789920254, |
| "learning_rate": 4.691943127962086e-05, |
| "loss": 0.4307, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.4731457800511509, |
| "grad_norm": 0.25079547977758637, |
| "learning_rate": 4.687203791469195e-05, |
| "loss": 0.4421, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.47570332480818417, |
| "grad_norm": 0.2867599117175957, |
| "learning_rate": 4.6824644549763034e-05, |
| "loss": 0.4208, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.4782608695652174, |
| "grad_norm": 0.30676226767943815, |
| "learning_rate": 4.6777251184834125e-05, |
| "loss": 0.4473, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.48081841432225064, |
| "grad_norm": 0.2535226915718885, |
| "learning_rate": 4.6729857819905216e-05, |
| "loss": 0.4341, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4833759590792839, |
| "grad_norm": 0.2953685479977593, |
| "learning_rate": 4.668246445497631e-05, |
| "loss": 0.4296, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4859335038363171, |
| "grad_norm": 0.24557281792948057, |
| "learning_rate": 4.663507109004739e-05, |
| "loss": 0.4507, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4884910485933504, |
| "grad_norm": 0.2738208517596116, |
| "learning_rate": 4.6587677725118484e-05, |
| "loss": 0.4285, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.49104859335038364, |
| "grad_norm": 0.28109008439258515, |
| "learning_rate": 4.6540284360189575e-05, |
| "loss": 0.4396, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4936061381074169, |
| "grad_norm": 0.2793263219783419, |
| "learning_rate": 4.6492890995260666e-05, |
| "loss": 0.4334, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.4961636828644501, |
| "grad_norm": 0.2679578064695335, |
| "learning_rate": 4.644549763033176e-05, |
| "loss": 0.4425, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.49872122762148335, |
| "grad_norm": 0.22379280473483837, |
| "learning_rate": 4.639810426540285e-05, |
| "loss": 0.4366, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5012787723785166, |
| "grad_norm": 0.24785033078885174, |
| "learning_rate": 4.635071090047394e-05, |
| "loss": 0.4309, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5038363171355499, |
| "grad_norm": 0.24670000195823377, |
| "learning_rate": 4.6303317535545025e-05, |
| "loss": 0.4417, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.5063938618925832, |
| "grad_norm": 0.2930253060170641, |
| "learning_rate": 4.6255924170616116e-05, |
| "loss": 0.4375, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5089514066496164, |
| "grad_norm": 0.25825281391527216, |
| "learning_rate": 4.620853080568721e-05, |
| "loss": 0.4069, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5115089514066496, |
| "grad_norm": 0.26224408452770004, |
| "learning_rate": 4.616113744075829e-05, |
| "loss": 0.4324, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5140664961636828, |
| "grad_norm": 0.25990930281801855, |
| "learning_rate": 4.6113744075829384e-05, |
| "loss": 0.4345, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5166240409207161, |
| "grad_norm": 0.268851283978036, |
| "learning_rate": 4.6066350710900475e-05, |
| "loss": 0.4459, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5191815856777494, |
| "grad_norm": 0.24959358046946803, |
| "learning_rate": 4.6018957345971566e-05, |
| "loss": 0.429, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5217391304347826, |
| "grad_norm": 0.25540467279864626, |
| "learning_rate": 4.597156398104265e-05, |
| "loss": 0.4348, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5242966751918159, |
| "grad_norm": 0.3130713054404299, |
| "learning_rate": 4.592417061611375e-05, |
| "loss": 0.4271, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5268542199488491, |
| "grad_norm": 0.2688748449663916, |
| "learning_rate": 4.587677725118484e-05, |
| "loss": 0.442, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 0.28683589425397626, |
| "learning_rate": 4.5829383886255925e-05, |
| "loss": 0.4333, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5319693094629157, |
| "grad_norm": 0.271763985489389, |
| "learning_rate": 4.5781990521327016e-05, |
| "loss": 0.4438, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5345268542199488, |
| "grad_norm": 0.2885843579908882, |
| "learning_rate": 4.573459715639811e-05, |
| "loss": 0.4419, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.5370843989769821, |
| "grad_norm": 0.28754217783051483, |
| "learning_rate": 4.56872037914692e-05, |
| "loss": 0.4355, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5396419437340153, |
| "grad_norm": 0.2737511286441873, |
| "learning_rate": 4.563981042654028e-05, |
| "loss": 0.4387, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.5421994884910486, |
| "grad_norm": 0.27934016374689097, |
| "learning_rate": 4.5592417061611375e-05, |
| "loss": 0.4349, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.5447570332480819, |
| "grad_norm": 0.26735219691819356, |
| "learning_rate": 4.5545023696682466e-05, |
| "loss": 0.4134, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.5473145780051151, |
| "grad_norm": 0.23887968506376323, |
| "learning_rate": 4.549763033175356e-05, |
| "loss": 0.4229, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.5498721227621484, |
| "grad_norm": 0.3011075198266259, |
| "learning_rate": 4.545023696682464e-05, |
| "loss": 0.428, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.5524296675191815, |
| "grad_norm": 0.2637321441272665, |
| "learning_rate": 4.540284360189574e-05, |
| "loss": 0.4363, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.5549872122762148, |
| "grad_norm": 0.29296145440427007, |
| "learning_rate": 4.535545023696683e-05, |
| "loss": 0.4304, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.5575447570332481, |
| "grad_norm": 0.30674762583017257, |
| "learning_rate": 4.5308056872037916e-05, |
| "loss": 0.4298, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.5601023017902813, |
| "grad_norm": 0.3143323294898988, |
| "learning_rate": 4.526066350710901e-05, |
| "loss": 0.4174, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.5626598465473146, |
| "grad_norm": 0.32231260882420976, |
| "learning_rate": 4.52132701421801e-05, |
| "loss": 0.4167, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5652173913043478, |
| "grad_norm": 0.3083722811455428, |
| "learning_rate": 4.516587677725119e-05, |
| "loss": 0.4146, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.5677749360613811, |
| "grad_norm": 0.29120067898722657, |
| "learning_rate": 4.5118483412322274e-05, |
| "loss": 0.4341, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5703324808184144, |
| "grad_norm": 0.31085600501836047, |
| "learning_rate": 4.5071090047393366e-05, |
| "loss": 0.4368, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5728900255754475, |
| "grad_norm": 0.2562962629149674, |
| "learning_rate": 4.502369668246446e-05, |
| "loss": 0.4505, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5754475703324808, |
| "grad_norm": 0.3229335775809623, |
| "learning_rate": 4.497630331753555e-05, |
| "loss": 0.4281, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.578005115089514, |
| "grad_norm": 0.2540883724081723, |
| "learning_rate": 4.492890995260663e-05, |
| "loss": 0.4345, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5805626598465473, |
| "grad_norm": 0.2886423143864352, |
| "learning_rate": 4.488151658767773e-05, |
| "loss": 0.4252, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5831202046035806, |
| "grad_norm": 0.25233412822407364, |
| "learning_rate": 4.483412322274882e-05, |
| "loss": 0.4366, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5856777493606138, |
| "grad_norm": 0.3098472836225145, |
| "learning_rate": 4.478672985781991e-05, |
| "loss": 0.4363, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 0.27067664311480977, |
| "learning_rate": 4.4739336492891e-05, |
| "loss": 0.4354, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5907928388746803, |
| "grad_norm": 0.28985639348209424, |
| "learning_rate": 4.469194312796209e-05, |
| "loss": 0.4441, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5933503836317136, |
| "grad_norm": 0.24685436630203944, |
| "learning_rate": 4.464454976303318e-05, |
| "loss": 0.4249, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.5959079283887468, |
| "grad_norm": 0.2415267361110554, |
| "learning_rate": 4.4597156398104266e-05, |
| "loss": 0.4218, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.59846547314578, |
| "grad_norm": 0.2690111434121743, |
| "learning_rate": 4.454976303317536e-05, |
| "loss": 0.4597, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6010230179028133, |
| "grad_norm": 0.24515241676488578, |
| "learning_rate": 4.450236966824645e-05, |
| "loss": 0.4484, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6035805626598465, |
| "grad_norm": 0.27035232285201444, |
| "learning_rate": 4.445497630331753e-05, |
| "loss": 0.4241, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6061381074168798, |
| "grad_norm": 0.24712864164146403, |
| "learning_rate": 4.4407582938388624e-05, |
| "loss": 0.4351, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6086956521739131, |
| "grad_norm": 0.2756970755602701, |
| "learning_rate": 4.4360189573459716e-05, |
| "loss": 0.424, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6112531969309463, |
| "grad_norm": 0.219814601291788, |
| "learning_rate": 4.431279620853081e-05, |
| "loss": 0.4355, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6138107416879796, |
| "grad_norm": 0.3022287967822769, |
| "learning_rate": 4.42654028436019e-05, |
| "loss": 0.421, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6163682864450127, |
| "grad_norm": 0.25187957786419013, |
| "learning_rate": 4.421800947867299e-05, |
| "loss": 0.3987, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.618925831202046, |
| "grad_norm": 0.2906641083550279, |
| "learning_rate": 4.417061611374408e-05, |
| "loss": 0.4094, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6214833759590793, |
| "grad_norm": 0.276150078017692, |
| "learning_rate": 4.4123222748815165e-05, |
| "loss": 0.4336, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.6240409207161125, |
| "grad_norm": 0.31066268816197273, |
| "learning_rate": 4.407582938388626e-05, |
| "loss": 0.4327, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6265984654731458, |
| "grad_norm": 0.2741673358883194, |
| "learning_rate": 4.402843601895735e-05, |
| "loss": 0.4389, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.629156010230179, |
| "grad_norm": 0.2836157982013865, |
| "learning_rate": 4.398104265402844e-05, |
| "loss": 0.4197, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6317135549872123, |
| "grad_norm": 0.2785562060260622, |
| "learning_rate": 4.3933649289099524e-05, |
| "loss": 0.4118, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.6342710997442456, |
| "grad_norm": 0.2562708631634233, |
| "learning_rate": 4.3886255924170615e-05, |
| "loss": 0.4313, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.6368286445012787, |
| "grad_norm": 0.3006474659338952, |
| "learning_rate": 4.383886255924171e-05, |
| "loss": 0.4369, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.639386189258312, |
| "grad_norm": 0.2457393144167786, |
| "learning_rate": 4.37914691943128e-05, |
| "loss": 0.4262, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6419437340153452, |
| "grad_norm": 0.2613054151983516, |
| "learning_rate": 4.374407582938389e-05, |
| "loss": 0.4156, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.6445012787723785, |
| "grad_norm": 0.2560975612132112, |
| "learning_rate": 4.369668246445498e-05, |
| "loss": 0.4327, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 0.2615125383682568, |
| "learning_rate": 4.364928909952607e-05, |
| "loss": 0.4239, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.649616368286445, |
| "grad_norm": 0.2703032829220517, |
| "learning_rate": 4.3601895734597157e-05, |
| "loss": 0.4326, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.6521739130434783, |
| "grad_norm": 0.27271514949565595, |
| "learning_rate": 4.355450236966825e-05, |
| "loss": 0.4265, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.6547314578005116, |
| "grad_norm": 0.28726916782564577, |
| "learning_rate": 4.350710900473934e-05, |
| "loss": 0.4357, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.6572890025575447, |
| "grad_norm": 0.24344125190622717, |
| "learning_rate": 4.345971563981043e-05, |
| "loss": 0.4209, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.659846547314578, |
| "grad_norm": 0.2779762711774089, |
| "learning_rate": 4.3412322274881515e-05, |
| "loss": 0.4402, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.6624040920716112, |
| "grad_norm": 0.2833066194766303, |
| "learning_rate": 4.3364928909952606e-05, |
| "loss": 0.4309, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.6649616368286445, |
| "grad_norm": 0.264439200242611, |
| "learning_rate": 4.33175355450237e-05, |
| "loss": 0.4234, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.6675191815856778, |
| "grad_norm": 0.24820943480335378, |
| "learning_rate": 4.327014218009479e-05, |
| "loss": 0.3998, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.670076726342711, |
| "grad_norm": 0.25992990540498473, |
| "learning_rate": 4.322274881516588e-05, |
| "loss": 0.4168, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.6726342710997443, |
| "grad_norm": 0.261861520036362, |
| "learning_rate": 4.317535545023697e-05, |
| "loss": 0.4148, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.6751918158567775, |
| "grad_norm": 0.26644356287497634, |
| "learning_rate": 4.312796208530806e-05, |
| "loss": 0.4345, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.6777493606138107, |
| "grad_norm": 0.2666945078617733, |
| "learning_rate": 4.308056872037915e-05, |
| "loss": 0.429, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.680306905370844, |
| "grad_norm": 0.23998424454638398, |
| "learning_rate": 4.303317535545024e-05, |
| "loss": 0.4226, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.6828644501278772, |
| "grad_norm": 0.2530577923125785, |
| "learning_rate": 4.298578199052133e-05, |
| "loss": 0.4078, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6854219948849105, |
| "grad_norm": 0.2532304497913718, |
| "learning_rate": 4.293838862559242e-05, |
| "loss": 0.4157, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6879795396419437, |
| "grad_norm": 0.25183699854529734, |
| "learning_rate": 4.2890995260663506e-05, |
| "loss": 0.4213, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.690537084398977, |
| "grad_norm": 0.26547907225114403, |
| "learning_rate": 4.28436018957346e-05, |
| "loss": 0.4213, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6930946291560103, |
| "grad_norm": 0.247119162528651, |
| "learning_rate": 4.279620853080569e-05, |
| "loss": 0.4441, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 0.2802387698001237, |
| "learning_rate": 4.2748815165876774e-05, |
| "loss": 0.3961, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6982097186700768, |
| "grad_norm": 0.26948037359199567, |
| "learning_rate": 4.270142180094787e-05, |
| "loss": 0.4245, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.7007672634271099, |
| "grad_norm": 0.26130649009882045, |
| "learning_rate": 4.265402843601896e-05, |
| "loss": 0.4322, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7033248081841432, |
| "grad_norm": 0.27770444806162603, |
| "learning_rate": 4.260663507109005e-05, |
| "loss": 0.4202, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 0.2725938450444014, |
| "learning_rate": 4.255924170616114e-05, |
| "loss": 0.4287, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.7084398976982097, |
| "grad_norm": 0.27389105466937425, |
| "learning_rate": 4.251184834123223e-05, |
| "loss": 0.4409, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.710997442455243, |
| "grad_norm": 0.2559589781819663, |
| "learning_rate": 4.246445497630332e-05, |
| "loss": 0.4239, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7135549872122762, |
| "grad_norm": 0.27905262687916627, |
| "learning_rate": 4.2417061611374406e-05, |
| "loss": 0.4012, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7161125319693095, |
| "grad_norm": 0.23334151341578974, |
| "learning_rate": 4.23696682464455e-05, |
| "loss": 0.4046, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.7186700767263428, |
| "grad_norm": 0.268496141900923, |
| "learning_rate": 4.232227488151659e-05, |
| "loss": 0.4333, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.7212276214833759, |
| "grad_norm": 0.23917839522777942, |
| "learning_rate": 4.227488151658768e-05, |
| "loss": 0.4199, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.7237851662404092, |
| "grad_norm": 0.2550111302110382, |
| "learning_rate": 4.2227488151658765e-05, |
| "loss": 0.4323, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.7263427109974424, |
| "grad_norm": 0.23586654241099228, |
| "learning_rate": 4.218009478672986e-05, |
| "loss": 0.4332, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.7289002557544757, |
| "grad_norm": 0.23396222749154336, |
| "learning_rate": 4.2132701421800954e-05, |
| "loss": 0.4269, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.731457800511509, |
| "grad_norm": 0.24604087944261607, |
| "learning_rate": 4.208530805687204e-05, |
| "loss": 0.4255, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.7340153452685422, |
| "grad_norm": 0.2357151023733209, |
| "learning_rate": 4.203791469194313e-05, |
| "loss": 0.4153, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.7365728900255755, |
| "grad_norm": 0.2737358478652627, |
| "learning_rate": 4.199052132701422e-05, |
| "loss": 0.4122, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.7391304347826086, |
| "grad_norm": 0.24451563415626945, |
| "learning_rate": 4.194312796208531e-05, |
| "loss": 0.4185, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.7416879795396419, |
| "grad_norm": 0.2541923450282548, |
| "learning_rate": 4.18957345971564e-05, |
| "loss": 0.433, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.7442455242966752, |
| "grad_norm": 0.26598804764295264, |
| "learning_rate": 4.184834123222749e-05, |
| "loss": 0.4331, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.7468030690537084, |
| "grad_norm": 0.2652230008961156, |
| "learning_rate": 4.180094786729858e-05, |
| "loss": 0.4055, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.7493606138107417, |
| "grad_norm": 0.2795715968348066, |
| "learning_rate": 4.175355450236967e-05, |
| "loss": 0.445, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.7519181585677749, |
| "grad_norm": 0.27501165211060447, |
| "learning_rate": 4.1706161137440756e-05, |
| "loss": 0.4159, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.7544757033248082, |
| "grad_norm": 0.2739979913068721, |
| "learning_rate": 4.1658767772511854e-05, |
| "loss": 0.4104, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.7570332480818415, |
| "grad_norm": 0.28465485095011533, |
| "learning_rate": 4.1611374407582945e-05, |
| "loss": 0.4244, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.7595907928388747, |
| "grad_norm": 0.23976720007281227, |
| "learning_rate": 4.156398104265403e-05, |
| "loss": 0.4295, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.7621483375959079, |
| "grad_norm": 0.3088498871060993, |
| "learning_rate": 4.151658767772512e-05, |
| "loss": 0.4092, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 0.22770658779763117, |
| "learning_rate": 4.146919431279621e-05, |
| "loss": 0.4269, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.7672634271099744, |
| "grad_norm": 0.27958609884503893, |
| "learning_rate": 4.1421800947867304e-05, |
| "loss": 0.4044, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7698209718670077, |
| "grad_norm": 0.21729140703255853, |
| "learning_rate": 4.137440758293839e-05, |
| "loss": 0.4131, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.7723785166240409, |
| "grad_norm": 0.2685972778786351, |
| "learning_rate": 4.132701421800948e-05, |
| "loss": 0.4207, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.7749360613810742, |
| "grad_norm": 0.22146302445276972, |
| "learning_rate": 4.127962085308057e-05, |
| "loss": 0.4242, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.7774936061381074, |
| "grad_norm": 0.246088542123556, |
| "learning_rate": 4.123222748815166e-05, |
| "loss": 0.4141, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.7800511508951407, |
| "grad_norm": 0.2601313122186582, |
| "learning_rate": 4.118483412322275e-05, |
| "loss": 0.4139, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.782608695652174, |
| "grad_norm": 0.24464325806612688, |
| "learning_rate": 4.113744075829384e-05, |
| "loss": 0.4163, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.7851662404092071, |
| "grad_norm": 0.2651808280050511, |
| "learning_rate": 4.1090047393364936e-05, |
| "loss": 0.4306, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.7877237851662404, |
| "grad_norm": 0.30621497925153024, |
| "learning_rate": 4.104265402843602e-05, |
| "loss": 0.4142, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.7902813299232737, |
| "grad_norm": 0.27574828742072455, |
| "learning_rate": 4.099526066350711e-05, |
| "loss": 0.4194, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.7928388746803069, |
| "grad_norm": 0.2646206797692572, |
| "learning_rate": 4.0947867298578204e-05, |
| "loss": 0.4338, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7953964194373402, |
| "grad_norm": 0.2953561111239538, |
| "learning_rate": 4.090047393364929e-05, |
| "loss": 0.4354, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7979539641943734, |
| "grad_norm": 0.2679304891781562, |
| "learning_rate": 4.085308056872038e-05, |
| "loss": 0.3996, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.8005115089514067, |
| "grad_norm": 0.2614240488716786, |
| "learning_rate": 4.080568720379147e-05, |
| "loss": 0.4177, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.80306905370844, |
| "grad_norm": 0.265506214792124, |
| "learning_rate": 4.075829383886256e-05, |
| "loss": 0.4229, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8056265984654731, |
| "grad_norm": 0.27403664060217564, |
| "learning_rate": 4.071090047393365e-05, |
| "loss": 0.4111, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8081841432225064, |
| "grad_norm": 0.27566927450054673, |
| "learning_rate": 4.066350710900474e-05, |
| "loss": 0.4186, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.8107416879795396, |
| "grad_norm": 0.2432325969682962, |
| "learning_rate": 4.061611374407583e-05, |
| "loss": 0.4317, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.8132992327365729, |
| "grad_norm": 0.3100835908713653, |
| "learning_rate": 4.056872037914692e-05, |
| "loss": 0.4263, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.8158567774936062, |
| "grad_norm": 0.22437352803704477, |
| "learning_rate": 4.052132701421801e-05, |
| "loss": 0.4255, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.8184143222506394, |
| "grad_norm": 0.2922741159014344, |
| "learning_rate": 4.0473933649289103e-05, |
| "loss": 0.4211, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.8209718670076727, |
| "grad_norm": 0.24988657961825148, |
| "learning_rate": 4.0426540284360195e-05, |
| "loss": 0.424, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 0.26255657346142036, |
| "learning_rate": 4.037914691943128e-05, |
| "loss": 0.4227, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.8260869565217391, |
| "grad_norm": 0.30330186929682673, |
| "learning_rate": 4.033175355450237e-05, |
| "loss": 0.4074, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.8286445012787724, |
| "grad_norm": 0.2545401531861922, |
| "learning_rate": 4.028436018957346e-05, |
| "loss": 0.4229, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.8312020460358056, |
| "grad_norm": 0.33076162934856534, |
| "learning_rate": 4.023696682464455e-05, |
| "loss": 0.4236, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.8337595907928389, |
| "grad_norm": 0.2595060997444347, |
| "learning_rate": 4.018957345971564e-05, |
| "loss": 0.3957, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.8363171355498721, |
| "grad_norm": 0.27331289296315875, |
| "learning_rate": 4.014218009478673e-05, |
| "loss": 0.3975, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.8388746803069054, |
| "grad_norm": 0.3262160188917236, |
| "learning_rate": 4.009478672985782e-05, |
| "loss": 0.4149, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.8414322250639387, |
| "grad_norm": 0.28084918191171054, |
| "learning_rate": 4.004739336492891e-05, |
| "loss": 0.3999, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.8439897698209718, |
| "grad_norm": 0.28489832877056614, |
| "learning_rate": 4e-05, |
| "loss": 0.4134, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.8465473145780051, |
| "grad_norm": 0.28245197788542203, |
| "learning_rate": 3.9952606635071095e-05, |
| "loss": 0.4169, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.8491048593350383, |
| "grad_norm": 0.2637012101965425, |
| "learning_rate": 3.9905213270142186e-05, |
| "loss": 0.4218, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.8516624040920716, |
| "grad_norm": 0.25239050580322964, |
| "learning_rate": 3.985781990521327e-05, |
| "loss": 0.403, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.8542199488491049, |
| "grad_norm": 0.3242230481439879, |
| "learning_rate": 3.981042654028436e-05, |
| "loss": 0.4413, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.8567774936061381, |
| "grad_norm": 0.284310422864808, |
| "learning_rate": 3.976303317535545e-05, |
| "loss": 0.3992, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.8593350383631714, |
| "grad_norm": 0.44681533776592774, |
| "learning_rate": 3.9715639810426545e-05, |
| "loss": 0.427, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.8618925831202046, |
| "grad_norm": 0.276866045564762, |
| "learning_rate": 3.966824644549763e-05, |
| "loss": 0.4166, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.8644501278772379, |
| "grad_norm": 0.2645666241102728, |
| "learning_rate": 3.962085308056872e-05, |
| "loss": 0.4018, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.8670076726342711, |
| "grad_norm": 0.24575688741880164, |
| "learning_rate": 3.957345971563981e-05, |
| "loss": 0.4103, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.27234369778819617, |
| "learning_rate": 3.95260663507109e-05, |
| "loss": 0.4178, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.8721227621483376, |
| "grad_norm": 0.2510614688018398, |
| "learning_rate": 3.9478672985781994e-05, |
| "loss": 0.4124, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.8746803069053708, |
| "grad_norm": 0.26748644233845587, |
| "learning_rate": 3.9431279620853086e-05, |
| "loss": 0.414, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.8772378516624041, |
| "grad_norm": 0.2839803657663104, |
| "learning_rate": 3.938388625592418e-05, |
| "loss": 0.4089, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.8797953964194374, |
| "grad_norm": 0.2797111880377059, |
| "learning_rate": 3.933649289099526e-05, |
| "loss": 0.4143, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 0.2889574353504485, |
| "learning_rate": 3.928909952606635e-05, |
| "loss": 0.4224, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.8849104859335039, |
| "grad_norm": 0.2661360330372934, |
| "learning_rate": 3.9241706161137444e-05, |
| "loss": 0.4002, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.887468030690537, |
| "grad_norm": 0.2771219438556858, |
| "learning_rate": 3.919431279620853e-05, |
| "loss": 0.4113, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.8900255754475703, |
| "grad_norm": 0.27519975509219513, |
| "learning_rate": 3.914691943127962e-05, |
| "loss": 0.4191, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.8925831202046036, |
| "grad_norm": 0.2928986459591194, |
| "learning_rate": 3.909952606635071e-05, |
| "loss": 0.4233, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.8951406649616368, |
| "grad_norm": 0.2516706010333049, |
| "learning_rate": 3.90521327014218e-05, |
| "loss": 0.4012, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8976982097186701, |
| "grad_norm": 0.2408367305868911, |
| "learning_rate": 3.900473933649289e-05, |
| "loss": 0.409, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.9002557544757033, |
| "grad_norm": 0.27279698596719115, |
| "learning_rate": 3.8957345971563986e-05, |
| "loss": 0.4068, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.9028132992327366, |
| "grad_norm": 0.25724144072901317, |
| "learning_rate": 3.890995260663508e-05, |
| "loss": 0.4048, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.9053708439897699, |
| "grad_norm": 0.22699353109114737, |
| "learning_rate": 3.886255924170616e-05, |
| "loss": 0.4005, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.907928388746803, |
| "grad_norm": 0.248751842398148, |
| "learning_rate": 3.881516587677725e-05, |
| "loss": 0.403, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.9104859335038363, |
| "grad_norm": 0.29922749419927136, |
| "learning_rate": 3.8767772511848344e-05, |
| "loss": 0.4255, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.9130434782608695, |
| "grad_norm": 0.24165253803081185, |
| "learning_rate": 3.8720379146919435e-05, |
| "loss": 0.3939, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.9156010230179028, |
| "grad_norm": 0.26769384614675706, |
| "learning_rate": 3.867298578199052e-05, |
| "loss": 0.3881, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.9181585677749361, |
| "grad_norm": 0.24501952061738294, |
| "learning_rate": 3.862559241706161e-05, |
| "loss": 0.412, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.9207161125319693, |
| "grad_norm": 0.27781395797316877, |
| "learning_rate": 3.85781990521327e-05, |
| "loss": 0.4293, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.9232736572890026, |
| "grad_norm": 0.22892488592677732, |
| "learning_rate": 3.8530805687203794e-05, |
| "loss": 0.4069, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.9258312020460358, |
| "grad_norm": 0.258222796507594, |
| "learning_rate": 3.848341232227488e-05, |
| "loss": 0.4177, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.928388746803069, |
| "grad_norm": 0.22668062864053168, |
| "learning_rate": 3.843601895734598e-05, |
| "loss": 0.4012, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.9309462915601023, |
| "grad_norm": 0.29919710610032196, |
| "learning_rate": 3.838862559241707e-05, |
| "loss": 0.3971, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.9335038363171355, |
| "grad_norm": 0.25611276582674614, |
| "learning_rate": 3.834123222748815e-05, |
| "loss": 0.4071, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.9360613810741688, |
| "grad_norm": 0.24646222411688562, |
| "learning_rate": 3.8293838862559244e-05, |
| "loss": 0.3993, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.9386189258312021, |
| "grad_norm": 0.26353127236434676, |
| "learning_rate": 3.8246445497630335e-05, |
| "loss": 0.4042, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 0.23542690864108376, |
| "learning_rate": 3.8199052132701427e-05, |
| "loss": 0.3971, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.9437340153452686, |
| "grad_norm": 0.28245650020992513, |
| "learning_rate": 3.815165876777251e-05, |
| "loss": 0.4044, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.9462915601023018, |
| "grad_norm": 0.241903542321646, |
| "learning_rate": 3.81042654028436e-05, |
| "loss": 0.4054, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.948849104859335, |
| "grad_norm": 0.2378788913607164, |
| "learning_rate": 3.8056872037914694e-05, |
| "loss": 0.4125, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.9514066496163683, |
| "grad_norm": 0.2804121730578267, |
| "learning_rate": 3.8009478672985785e-05, |
| "loss": 0.421, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.9539641943734015, |
| "grad_norm": 0.23484030136789275, |
| "learning_rate": 3.796208530805687e-05, |
| "loss": 0.4112, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.9565217391304348, |
| "grad_norm": 0.27396114311616715, |
| "learning_rate": 3.791469194312796e-05, |
| "loss": 0.4204, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.959079283887468, |
| "grad_norm": 0.2393059668656863, |
| "learning_rate": 3.786729857819906e-05, |
| "loss": 0.4221, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.9616368286445013, |
| "grad_norm": 0.2482404610854873, |
| "learning_rate": 3.7819905213270144e-05, |
| "loss": 0.4078, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.9641943734015346, |
| "grad_norm": 0.2187225932256056, |
| "learning_rate": 3.7772511848341235e-05, |
| "loss": 0.4012, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.9667519181585678, |
| "grad_norm": 0.2876923866076237, |
| "learning_rate": 3.7725118483412326e-05, |
| "loss": 0.4252, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.969309462915601, |
| "grad_norm": 0.21352306199272536, |
| "learning_rate": 3.767772511848342e-05, |
| "loss": 0.3966, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.9718670076726342, |
| "grad_norm": 0.24918249981369345, |
| "learning_rate": 3.76303317535545e-05, |
| "loss": 0.415, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.9744245524296675, |
| "grad_norm": 0.23387991185870513, |
| "learning_rate": 3.7582938388625594e-05, |
| "loss": 0.4127, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.9769820971867008, |
| "grad_norm": 0.29074570845452075, |
| "learning_rate": 3.7535545023696685e-05, |
| "loss": 0.4275, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.979539641943734, |
| "grad_norm": 0.26317053639627985, |
| "learning_rate": 3.748815165876777e-05, |
| "loss": 0.4294, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.9820971867007673, |
| "grad_norm": 0.2686101535519852, |
| "learning_rate": 3.744075829383886e-05, |
| "loss": 0.4025, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.9846547314578005, |
| "grad_norm": 0.23512146035375164, |
| "learning_rate": 3.739336492890995e-05, |
| "loss": 0.4156, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.9872122762148338, |
| "grad_norm": 0.2445528540082093, |
| "learning_rate": 3.734597156398105e-05, |
| "loss": 0.411, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.989769820971867, |
| "grad_norm": 0.25629958731478186, |
| "learning_rate": 3.7298578199052135e-05, |
| "loss": 0.4146, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.9923273657289002, |
| "grad_norm": 0.22796776248894252, |
| "learning_rate": 3.7251184834123226e-05, |
| "loss": 0.4087, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.9948849104859335, |
| "grad_norm": 0.2958838240159185, |
| "learning_rate": 3.720379146919432e-05, |
| "loss": 0.4099, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.9974424552429667, |
| "grad_norm": 0.29381146676513115, |
| "learning_rate": 3.71563981042654e-05, |
| "loss": 0.414, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.24652387465895462, |
| "learning_rate": 3.7109004739336493e-05, |
| "loss": 0.406, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.0025575447570332, |
| "grad_norm": 0.34877694988477503, |
| "learning_rate": 3.7061611374407585e-05, |
| "loss": 0.3503, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.0051150895140666, |
| "grad_norm": 0.253308743207643, |
| "learning_rate": 3.7014218009478676e-05, |
| "loss": 0.3396, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.0076726342710998, |
| "grad_norm": 0.25868870232786395, |
| "learning_rate": 3.696682464454976e-05, |
| "loss": 0.3479, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.010230179028133, |
| "grad_norm": 0.2971613524674976, |
| "learning_rate": 3.691943127962085e-05, |
| "loss": 0.3422, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.0127877237851663, |
| "grad_norm": 0.2906921720555448, |
| "learning_rate": 3.687203791469194e-05, |
| "loss": 0.3554, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.0153452685421995, |
| "grad_norm": 0.2875908700888308, |
| "learning_rate": 3.6824644549763035e-05, |
| "loss": 0.3291, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.0179028132992327, |
| "grad_norm": 0.26243291597976126, |
| "learning_rate": 3.6777251184834126e-05, |
| "loss": 0.3563, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.020460358056266, |
| "grad_norm": 0.2730126516412927, |
| "learning_rate": 3.672985781990522e-05, |
| "loss": 0.3292, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.0230179028132993, |
| "grad_norm": 0.29682604006588903, |
| "learning_rate": 3.668246445497631e-05, |
| "loss": 0.3461, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0255754475703325, |
| "grad_norm": 0.2494027953748241, |
| "learning_rate": 3.663507109004739e-05, |
| "loss": 0.3491, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.0281329923273657, |
| "grad_norm": 0.2538094727914758, |
| "learning_rate": 3.6587677725118485e-05, |
| "loss": 0.3406, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.030690537084399, |
| "grad_norm": 0.28915662612861087, |
| "learning_rate": 3.6540284360189576e-05, |
| "loss": 0.3408, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.0332480818414322, |
| "grad_norm": 0.24591203347051302, |
| "learning_rate": 3.649289099526067e-05, |
| "loss": 0.337, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.0358056265984654, |
| "grad_norm": 0.2871114071516867, |
| "learning_rate": 3.644549763033175e-05, |
| "loss": 0.3347, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.0383631713554988, |
| "grad_norm": 0.2524744240235806, |
| "learning_rate": 3.639810426540284e-05, |
| "loss": 0.3441, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.040920716112532, |
| "grad_norm": 0.2630583826634349, |
| "learning_rate": 3.6350710900473935e-05, |
| "loss": 0.3099, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.0434782608695652, |
| "grad_norm": 0.2570358498212408, |
| "learning_rate": 3.6303317535545026e-05, |
| "loss": 0.3211, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.0460358056265984, |
| "grad_norm": 0.26431307397410003, |
| "learning_rate": 3.625592417061612e-05, |
| "loss": 0.35, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.0485933503836318, |
| "grad_norm": 0.27463747349361467, |
| "learning_rate": 3.620853080568721e-05, |
| "loss": 0.3494, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.051150895140665, |
| "grad_norm": 0.24666921280022072, |
| "learning_rate": 3.61611374407583e-05, |
| "loss": 0.341, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.0537084398976981, |
| "grad_norm": 0.2505495844763562, |
| "learning_rate": 3.6113744075829384e-05, |
| "loss": 0.3412, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.0562659846547315, |
| "grad_norm": 0.2506374206193608, |
| "learning_rate": 3.6066350710900476e-05, |
| "loss": 0.3229, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.0588235294117647, |
| "grad_norm": 0.24249666251287566, |
| "learning_rate": 3.601895734597157e-05, |
| "loss": 0.3329, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.061381074168798, |
| "grad_norm": 0.2618704099040068, |
| "learning_rate": 3.597156398104266e-05, |
| "loss": 0.3462, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.0639386189258313, |
| "grad_norm": 0.25454325976096887, |
| "learning_rate": 3.592417061611374e-05, |
| "loss": 0.319, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.0664961636828645, |
| "grad_norm": 0.3012500683553219, |
| "learning_rate": 3.5876777251184834e-05, |
| "loss": 0.3452, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.0690537084398977, |
| "grad_norm": 0.2310352458746118, |
| "learning_rate": 3.5829383886255926e-05, |
| "loss": 0.3203, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.0716112531969308, |
| "grad_norm": 0.2867380051579317, |
| "learning_rate": 3.578199052132701e-05, |
| "loss": 0.3408, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.0741687979539642, |
| "grad_norm": 0.24642924252308632, |
| "learning_rate": 3.573459715639811e-05, |
| "loss": 0.3247, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.0767263427109974, |
| "grad_norm": 0.22539243089747027, |
| "learning_rate": 3.56872037914692e-05, |
| "loss": 0.3282, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.0792838874680306, |
| "grad_norm": 0.2508510372019925, |
| "learning_rate": 3.563981042654029e-05, |
| "loss": 0.3444, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.081841432225064, |
| "grad_norm": 0.25272955853952195, |
| "learning_rate": 3.5592417061611376e-05, |
| "loss": 0.3366, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.0843989769820972, |
| "grad_norm": 0.2272026636889727, |
| "learning_rate": 3.554502369668247e-05, |
| "loss": 0.3516, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.0869565217391304, |
| "grad_norm": 0.27462834447503987, |
| "learning_rate": 3.549763033175356e-05, |
| "loss": 0.3374, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.0895140664961638, |
| "grad_norm": 0.2128026835115876, |
| "learning_rate": 3.545023696682464e-05, |
| "loss": 0.3336, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.092071611253197, |
| "grad_norm": 0.2354379611105053, |
| "learning_rate": 3.5402843601895734e-05, |
| "loss": 0.3379, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.0946291560102301, |
| "grad_norm": 0.224481929706568, |
| "learning_rate": 3.5355450236966825e-05, |
| "loss": 0.3564, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.0971867007672633, |
| "grad_norm": 0.21368714222847607, |
| "learning_rate": 3.530805687203792e-05, |
| "loss": 0.3141, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.0997442455242967, |
| "grad_norm": 0.23974097995132895, |
| "learning_rate": 3.5260663507109e-05, |
| "loss": 0.3313, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.10230179028133, |
| "grad_norm": 1.5113616428603722, |
| "learning_rate": 3.52132701421801e-05, |
| "loss": 0.3288, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.104859335038363, |
| "grad_norm": 0.2584287203231182, |
| "learning_rate": 3.516587677725119e-05, |
| "loss": 0.3347, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.1074168797953965, |
| "grad_norm": 0.19574817933562375, |
| "learning_rate": 3.5118483412322275e-05, |
| "loss": 0.3204, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.1099744245524297, |
| "grad_norm": 0.24894576509043242, |
| "learning_rate": 3.507109004739337e-05, |
| "loss": 0.3406, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.1125319693094629, |
| "grad_norm": 0.22605511670011208, |
| "learning_rate": 3.502369668246446e-05, |
| "loss": 0.3372, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.1150895140664963, |
| "grad_norm": 0.2426838758794149, |
| "learning_rate": 3.497630331753555e-05, |
| "loss": 0.3354, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.1176470588235294, |
| "grad_norm": 0.22312946234793202, |
| "learning_rate": 3.4928909952606634e-05, |
| "loss": 0.3283, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.1202046035805626, |
| "grad_norm": 0.24548486238399964, |
| "learning_rate": 3.4881516587677725e-05, |
| "loss": 0.3428, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.1227621483375958, |
| "grad_norm": 0.22862518373154317, |
| "learning_rate": 3.4834123222748817e-05, |
| "loss": 0.3296, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.1253196930946292, |
| "grad_norm": 0.2415393319131855, |
| "learning_rate": 3.478672985781991e-05, |
| "loss": 0.3366, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.1278772378516624, |
| "grad_norm": 0.24350557581856444, |
| "learning_rate": 3.473933649289099e-05, |
| "loss": 0.3331, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.1304347826086956, |
| "grad_norm": 0.22906222897576142, |
| "learning_rate": 3.4691943127962084e-05, |
| "loss": 0.3383, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.132992327365729, |
| "grad_norm": 0.9570276654007269, |
| "learning_rate": 3.464454976303318e-05, |
| "loss": 0.3383, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.1355498721227621, |
| "grad_norm": 0.4221562491079337, |
| "learning_rate": 3.4597156398104267e-05, |
| "loss": 0.3493, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.1381074168797953, |
| "grad_norm": 0.7247823264413438, |
| "learning_rate": 3.454976303317536e-05, |
| "loss": 0.3503, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.1406649616368287, |
| "grad_norm": 0.27292208588198535, |
| "learning_rate": 3.450236966824645e-05, |
| "loss": 0.3491, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.143222506393862, |
| "grad_norm": 0.24846288711065778, |
| "learning_rate": 3.445497630331754e-05, |
| "loss": 0.34, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.145780051150895, |
| "grad_norm": 0.28289846837651944, |
| "learning_rate": 3.4407582938388625e-05, |
| "loss": 0.3395, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.1483375959079285, |
| "grad_norm": 0.20360202393964177, |
| "learning_rate": 3.4360189573459716e-05, |
| "loss": 0.336, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.1508951406649617, |
| "grad_norm": 0.26795912135731376, |
| "learning_rate": 3.431279620853081e-05, |
| "loss": 0.3363, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.1534526854219949, |
| "grad_norm": 0.24482207535162454, |
| "learning_rate": 3.42654028436019e-05, |
| "loss": 0.3263, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.156010230179028, |
| "grad_norm": 1.091037041185637, |
| "learning_rate": 3.4218009478672984e-05, |
| "loss": 0.3319, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.1585677749360614, |
| "grad_norm": 0.25708621832570655, |
| "learning_rate": 3.4170616113744075e-05, |
| "loss": 0.3456, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.1611253196930946, |
| "grad_norm": 0.22978489335863728, |
| "learning_rate": 3.412322274881517e-05, |
| "loss": 0.3453, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.1636828644501278, |
| "grad_norm": 0.23661531026909, |
| "learning_rate": 3.407582938388626e-05, |
| "loss": 0.3311, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.1662404092071612, |
| "grad_norm": 0.26058801823717986, |
| "learning_rate": 3.402843601895735e-05, |
| "loss": 0.3293, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.1687979539641944, |
| "grad_norm": 0.2213831357242978, |
| "learning_rate": 3.398104265402844e-05, |
| "loss": 0.3448, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.1713554987212276, |
| "grad_norm": 0.22314933582706364, |
| "learning_rate": 3.393364928909953e-05, |
| "loss": 0.3319, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.1739130434782608, |
| "grad_norm": 0.2368130291318867, |
| "learning_rate": 3.3886255924170616e-05, |
| "loss": 0.3417, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 0.23299474747082943, |
| "learning_rate": 3.383886255924171e-05, |
| "loss": 0.3434, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.1790281329923273, |
| "grad_norm": 0.23122859916384542, |
| "learning_rate": 3.37914691943128e-05, |
| "loss": 0.353, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.1815856777493605, |
| "grad_norm": 0.23133544704817127, |
| "learning_rate": 3.3744075829383883e-05, |
| "loss": 0.3268, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.184143222506394, |
| "grad_norm": 0.2279777026926834, |
| "learning_rate": 3.3696682464454975e-05, |
| "loss": 0.3427, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.186700767263427, |
| "grad_norm": 0.24354305412664243, |
| "learning_rate": 3.3649289099526066e-05, |
| "loss": 0.3416, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.1892583120204603, |
| "grad_norm": 0.23175483156511392, |
| "learning_rate": 3.360189573459716e-05, |
| "loss": 0.3253, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.1918158567774937, |
| "grad_norm": 0.2505719500530794, |
| "learning_rate": 3.355450236966825e-05, |
| "loss": 0.357, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.1943734015345269, |
| "grad_norm": 0.23794330588394164, |
| "learning_rate": 3.350710900473934e-05, |
| "loss": 0.3368, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.19693094629156, |
| "grad_norm": 0.24430596176344385, |
| "learning_rate": 3.345971563981043e-05, |
| "loss": 0.3387, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.1994884910485935, |
| "grad_norm": 0.22981260995180924, |
| "learning_rate": 3.3412322274881516e-05, |
| "loss": 0.3394, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.2020460358056266, |
| "grad_norm": 0.26211223679278534, |
| "learning_rate": 3.336492890995261e-05, |
| "loss": 0.3319, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.2046035805626598, |
| "grad_norm": 0.20949166867985375, |
| "learning_rate": 3.33175355450237e-05, |
| "loss": 0.3247, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.207161125319693, |
| "grad_norm": 0.26920054172152863, |
| "learning_rate": 3.327014218009479e-05, |
| "loss": 0.3266, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.2097186700767264, |
| "grad_norm": 0.23259269182122375, |
| "learning_rate": 3.3222748815165875e-05, |
| "loss": 0.336, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.2122762148337596, |
| "grad_norm": 0.2544872114348285, |
| "learning_rate": 3.3175355450236966e-05, |
| "loss": 0.3288, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.2148337595907928, |
| "grad_norm": 0.23096314256849135, |
| "learning_rate": 3.312796208530806e-05, |
| "loss": 0.338, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.2173913043478262, |
| "grad_norm": 0.2714305850528602, |
| "learning_rate": 3.308056872037915e-05, |
| "loss": 0.3447, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.2199488491048593, |
| "grad_norm": 0.27398730927997655, |
| "learning_rate": 3.303317535545024e-05, |
| "loss": 0.328, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.2225063938618925, |
| "grad_norm": 0.21842573163699253, |
| "learning_rate": 3.298578199052133e-05, |
| "loss": 0.3434, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.2250639386189257, |
| "grad_norm": 0.24231426743387735, |
| "learning_rate": 3.293838862559242e-05, |
| "loss": 0.3355, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.227621483375959, |
| "grad_norm": 0.23387954201665254, |
| "learning_rate": 3.289099526066351e-05, |
| "loss": 0.3456, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.2301790281329923, |
| "grad_norm": 0.2240321236806126, |
| "learning_rate": 3.28436018957346e-05, |
| "loss": 0.3376, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.2327365728900257, |
| "grad_norm": 0.2261690321581938, |
| "learning_rate": 3.279620853080569e-05, |
| "loss": 0.3313, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.2352941176470589, |
| "grad_norm": 0.27592615919196145, |
| "learning_rate": 3.274881516587678e-05, |
| "loss": 0.3385, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.237851662404092, |
| "grad_norm": 0.1983777165414548, |
| "learning_rate": 3.2701421800947866e-05, |
| "loss": 0.3344, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.2404092071611252, |
| "grad_norm": 0.25775855180422713, |
| "learning_rate": 3.265402843601896e-05, |
| "loss": 0.349, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.2429667519181586, |
| "grad_norm": 0.21057070064196648, |
| "learning_rate": 3.260663507109005e-05, |
| "loss": 0.3226, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.2455242966751918, |
| "grad_norm": 0.25264888053163403, |
| "learning_rate": 3.255924170616114e-05, |
| "loss": 0.3405, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.248081841432225, |
| "grad_norm": 0.20358857621290893, |
| "learning_rate": 3.251184834123223e-05, |
| "loss": 0.3384, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.2506393861892584, |
| "grad_norm": 0.2221188350040554, |
| "learning_rate": 3.246445497630332e-05, |
| "loss": 0.3592, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.2531969309462916, |
| "grad_norm": 0.22907812456671411, |
| "learning_rate": 3.2417061611374414e-05, |
| "loss": 0.3509, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.2557544757033248, |
| "grad_norm": 0.2203229089376764, |
| "learning_rate": 3.23696682464455e-05, |
| "loss": 0.3157, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.258312020460358, |
| "grad_norm": 0.22923138047926875, |
| "learning_rate": 3.232227488151659e-05, |
| "loss": 0.3321, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.2608695652173914, |
| "grad_norm": 0.20989998077940591, |
| "learning_rate": 3.227488151658768e-05, |
| "loss": 0.3293, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.2634271099744245, |
| "grad_norm": 0.21568832380392375, |
| "learning_rate": 3.222748815165877e-05, |
| "loss": 0.3466, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.265984654731458, |
| "grad_norm": 0.22068990151180867, |
| "learning_rate": 3.218009478672986e-05, |
| "loss": 0.3346, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.2685421994884911, |
| "grad_norm": 0.22072570403379316, |
| "learning_rate": 3.213270142180095e-05, |
| "loss": 0.3415, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.2710997442455243, |
| "grad_norm": 0.22130125503638862, |
| "learning_rate": 3.208530805687204e-05, |
| "loss": 0.3237, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.2736572890025575, |
| "grad_norm": 0.21888368739994798, |
| "learning_rate": 3.2037914691943124e-05, |
| "loss": 0.3232, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.2762148337595907, |
| "grad_norm": 0.23237851118888075, |
| "learning_rate": 3.1990521327014215e-05, |
| "loss": 0.3362, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.278772378516624, |
| "grad_norm": 0.20408840058481117, |
| "learning_rate": 3.1943127962085314e-05, |
| "loss": 0.331, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2813299232736572, |
| "grad_norm": 0.25671575639210675, |
| "learning_rate": 3.18957345971564e-05, |
| "loss": 0.3483, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.2838874680306906, |
| "grad_norm": 0.21146534380332607, |
| "learning_rate": 3.184834123222749e-05, |
| "loss": 0.3179, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.2864450127877238, |
| "grad_norm": 0.22647977672526137, |
| "learning_rate": 3.180094786729858e-05, |
| "loss": 0.3371, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.289002557544757, |
| "grad_norm": 0.2024444911259877, |
| "learning_rate": 3.175355450236967e-05, |
| "loss": 0.3208, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.2915601023017902, |
| "grad_norm": 0.23292871699588752, |
| "learning_rate": 3.170616113744076e-05, |
| "loss": 0.3325, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.2941176470588236, |
| "grad_norm": 0.24617143877927294, |
| "learning_rate": 3.165876777251185e-05, |
| "loss": 0.3456, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.2966751918158568, |
| "grad_norm": 0.21521749139279983, |
| "learning_rate": 3.161137440758294e-05, |
| "loss": 0.3632, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.29923273657289, |
| "grad_norm": 0.23710165276474784, |
| "learning_rate": 3.156398104265403e-05, |
| "loss": 0.3258, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.3017902813299234, |
| "grad_norm": 0.2182566275526034, |
| "learning_rate": 3.1516587677725115e-05, |
| "loss": 0.3254, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.3043478260869565, |
| "grad_norm": 0.2567309562573808, |
| "learning_rate": 3.1469194312796207e-05, |
| "loss": 0.3393, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.3069053708439897, |
| "grad_norm": 0.21736576266820642, |
| "learning_rate": 3.1421800947867305e-05, |
| "loss": 0.3237, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.309462915601023, |
| "grad_norm": 0.2476914139591077, |
| "learning_rate": 3.137440758293839e-05, |
| "loss": 0.3387, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.3120204603580563, |
| "grad_norm": 0.21116880886723996, |
| "learning_rate": 3.132701421800948e-05, |
| "loss": 0.3342, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.3145780051150895, |
| "grad_norm": 0.2282427820832504, |
| "learning_rate": 3.127962085308057e-05, |
| "loss": 0.3406, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.317135549872123, |
| "grad_norm": 0.220656045586937, |
| "learning_rate": 3.123222748815166e-05, |
| "loss": 0.3481, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.319693094629156, |
| "grad_norm": 0.21477244949218188, |
| "learning_rate": 3.118483412322275e-05, |
| "loss": 0.3417, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.3222506393861893, |
| "grad_norm": 0.2123179538890313, |
| "learning_rate": 3.113744075829384e-05, |
| "loss": 0.3374, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.3248081841432224, |
| "grad_norm": 0.20966406562861323, |
| "learning_rate": 3.109004739336493e-05, |
| "loss": 0.332, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.3273657289002558, |
| "grad_norm": 0.1967874776267105, |
| "learning_rate": 3.104265402843602e-05, |
| "loss": 0.327, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.329923273657289, |
| "grad_norm": 0.21447737012880227, |
| "learning_rate": 3.0995260663507106e-05, |
| "loss": 0.3342, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.3324808184143222, |
| "grad_norm": 0.22702076072063435, |
| "learning_rate": 3.09478672985782e-05, |
| "loss": 0.357, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.3350383631713556, |
| "grad_norm": 0.24746439681290008, |
| "learning_rate": 3.0900473933649296e-05, |
| "loss": 0.3489, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.3375959079283888, |
| "grad_norm": 0.21236354577498476, |
| "learning_rate": 3.085308056872038e-05, |
| "loss": 0.309, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.340153452685422, |
| "grad_norm": 0.21060912049882632, |
| "learning_rate": 3.080568720379147e-05, |
| "loss": 0.3191, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.3427109974424551, |
| "grad_norm": 0.20505413275714032, |
| "learning_rate": 3.075829383886256e-05, |
| "loss": 0.3451, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.3452685421994885, |
| "grad_norm": 0.24615234141005185, |
| "learning_rate": 3.0710900473933654e-05, |
| "loss": 0.3528, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.3478260869565217, |
| "grad_norm": 0.22369032901485378, |
| "learning_rate": 3.066350710900474e-05, |
| "loss": 0.3284, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.350383631713555, |
| "grad_norm": 0.22838183924629246, |
| "learning_rate": 3.061611374407583e-05, |
| "loss": 0.3374, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.3529411764705883, |
| "grad_norm": 0.27105416556647893, |
| "learning_rate": 3.056872037914692e-05, |
| "loss": 0.3464, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.3554987212276215, |
| "grad_norm": 0.20957984851168898, |
| "learning_rate": 3.052132701421801e-05, |
| "loss": 0.3284, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.3580562659846547, |
| "grad_norm": 0.2320992461077895, |
| "learning_rate": 3.0473933649289098e-05, |
| "loss": 0.3486, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.3606138107416879, |
| "grad_norm": 0.21826085112206514, |
| "learning_rate": 3.042654028436019e-05, |
| "loss": 0.3385, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.3631713554987213, |
| "grad_norm": 0.2098867818685027, |
| "learning_rate": 3.0379146919431277e-05, |
| "loss": 0.3281, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.3657289002557544, |
| "grad_norm": 0.20672602706389986, |
| "learning_rate": 3.0331753554502375e-05, |
| "loss": 0.3347, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.3682864450127878, |
| "grad_norm": 0.19992143809636548, |
| "learning_rate": 3.0284360189573463e-05, |
| "loss": 0.3359, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.370843989769821, |
| "grad_norm": 0.20352905078357075, |
| "learning_rate": 3.023696682464455e-05, |
| "loss": 0.3378, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.3734015345268542, |
| "grad_norm": 0.20917931960339106, |
| "learning_rate": 3.0189573459715642e-05, |
| "loss": 0.3477, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.3759590792838874, |
| "grad_norm": 0.19805266052365922, |
| "learning_rate": 3.014218009478673e-05, |
| "loss": 0.3268, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.3785166240409208, |
| "grad_norm": 0.19502494739762932, |
| "learning_rate": 3.009478672985782e-05, |
| "loss": 0.3493, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.381074168797954, |
| "grad_norm": 0.19983564710769386, |
| "learning_rate": 3.004739336492891e-05, |
| "loss": 0.3428, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.3836317135549872, |
| "grad_norm": 0.22658682357477436, |
| "learning_rate": 3e-05, |
| "loss": 0.3378, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.3861892583120206, |
| "grad_norm": 0.23466719921978055, |
| "learning_rate": 2.995260663507109e-05, |
| "loss": 0.3353, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.3887468030690537, |
| "grad_norm": 0.21015550969200184, |
| "learning_rate": 2.990521327014218e-05, |
| "loss": 0.35, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.391304347826087, |
| "grad_norm": 0.2614967884472048, |
| "learning_rate": 2.9857819905213268e-05, |
| "loss": 0.3458, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.39386189258312, |
| "grad_norm": 0.20442313390946987, |
| "learning_rate": 2.9810426540284363e-05, |
| "loss": 0.3197, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.3964194373401535, |
| "grad_norm": 0.22800479961193035, |
| "learning_rate": 2.9763033175355454e-05, |
| "loss": 0.3216, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.3989769820971867, |
| "grad_norm": 0.23631248009519853, |
| "learning_rate": 2.9715639810426542e-05, |
| "loss": 0.3475, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.40153452685422, |
| "grad_norm": 0.2148560333286399, |
| "learning_rate": 2.9668246445497633e-05, |
| "loss": 0.3321, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.4040920716112533, |
| "grad_norm": 0.22336842918171954, |
| "learning_rate": 2.962085308056872e-05, |
| "loss": 0.3488, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.4066496163682864, |
| "grad_norm": 0.21805777627104153, |
| "learning_rate": 2.9573459715639813e-05, |
| "loss": 0.3377, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.4092071611253196, |
| "grad_norm": 0.197363455632153, |
| "learning_rate": 2.95260663507109e-05, |
| "loss": 0.3395, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.4117647058823528, |
| "grad_norm": 0.273730892459262, |
| "learning_rate": 2.9478672985781992e-05, |
| "loss": 0.3637, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.4143222506393862, |
| "grad_norm": 0.21968147378847208, |
| "learning_rate": 2.943127962085308e-05, |
| "loss": 0.3458, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.4168797953964194, |
| "grad_norm": 0.22407752561098943, |
| "learning_rate": 2.938388625592417e-05, |
| "loss": 0.3341, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.4194373401534528, |
| "grad_norm": 0.24706387013454778, |
| "learning_rate": 2.933649289099526e-05, |
| "loss": 0.3393, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.421994884910486, |
| "grad_norm": 0.23943822236699114, |
| "learning_rate": 2.9289099526066354e-05, |
| "loss": 0.3476, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.4245524296675192, |
| "grad_norm": 0.21133262016275636, |
| "learning_rate": 2.9241706161137445e-05, |
| "loss": 0.3317, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.4271099744245523, |
| "grad_norm": 0.2187205757977556, |
| "learning_rate": 2.9194312796208533e-05, |
| "loss": 0.3344, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.4296675191815857, |
| "grad_norm": 0.2278521035319285, |
| "learning_rate": 2.9146919431279624e-05, |
| "loss": 0.3391, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.432225063938619, |
| "grad_norm": 0.22984861625880482, |
| "learning_rate": 2.9099526066350712e-05, |
| "loss": 0.3289, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.434782608695652, |
| "grad_norm": 0.2126484088527432, |
| "learning_rate": 2.9052132701421804e-05, |
| "loss": 0.3216, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.4373401534526855, |
| "grad_norm": 0.21963164389365947, |
| "learning_rate": 2.9004739336492892e-05, |
| "loss": 0.3366, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.4398976982097187, |
| "grad_norm": 0.2271030491918638, |
| "learning_rate": 2.8957345971563983e-05, |
| "loss": 0.3287, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.4424552429667519, |
| "grad_norm": 0.22596502012606307, |
| "learning_rate": 2.890995260663507e-05, |
| "loss": 0.3445, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.445012787723785, |
| "grad_norm": 0.2092819883191256, |
| "learning_rate": 2.8862559241706162e-05, |
| "loss": 0.3188, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.4475703324808185, |
| "grad_norm": 0.2085679869485133, |
| "learning_rate": 2.881516587677725e-05, |
| "loss": 0.3297, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.4501278772378516, |
| "grad_norm": 0.20731318095051873, |
| "learning_rate": 2.8767772511848338e-05, |
| "loss": 0.3254, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.452685421994885, |
| "grad_norm": 0.22614524862117436, |
| "learning_rate": 2.8720379146919436e-05, |
| "loss": 0.3389, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.4552429667519182, |
| "grad_norm": 0.22116772067000307, |
| "learning_rate": 2.8672985781990524e-05, |
| "loss": 0.3372, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.4578005115089514, |
| "grad_norm": 0.2081439129486148, |
| "learning_rate": 2.8625592417061616e-05, |
| "loss": 0.3185, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.4603580562659846, |
| "grad_norm": 0.2101126677570276, |
| "learning_rate": 2.8578199052132704e-05, |
| "loss": 0.3386, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.4629156010230178, |
| "grad_norm": 0.20857004085030162, |
| "learning_rate": 2.853080568720379e-05, |
| "loss": 0.3317, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.4654731457800512, |
| "grad_norm": 0.21972466939910235, |
| "learning_rate": 2.8483412322274883e-05, |
| "loss": 0.3421, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.4680306905370843, |
| "grad_norm": 0.22670934909893178, |
| "learning_rate": 2.843601895734597e-05, |
| "loss": 0.3373, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 0.20335752165987916, |
| "learning_rate": 2.8388625592417062e-05, |
| "loss": 0.3489, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.473145780051151, |
| "grad_norm": 0.21670951576300224, |
| "learning_rate": 2.834123222748815e-05, |
| "loss": 0.3436, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.4757033248081841, |
| "grad_norm": 0.24188198119161047, |
| "learning_rate": 2.829383886255924e-05, |
| "loss": 0.346, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.4782608695652173, |
| "grad_norm": 0.19284248575531912, |
| "learning_rate": 2.824644549763033e-05, |
| "loss": 0.3412, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.4808184143222507, |
| "grad_norm": 0.21408001651811503, |
| "learning_rate": 2.8199052132701424e-05, |
| "loss": 0.3359, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.4833759590792839, |
| "grad_norm": 0.23043383318843624, |
| "learning_rate": 2.8151658767772515e-05, |
| "loss": 0.3352, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.485933503836317, |
| "grad_norm": 0.19637762705882086, |
| "learning_rate": 2.8104265402843603e-05, |
| "loss": 0.3335, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.4884910485933505, |
| "grad_norm": 0.21814477890754627, |
| "learning_rate": 2.8056872037914695e-05, |
| "loss": 0.3219, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.4910485933503836, |
| "grad_norm": 0.24377115034783173, |
| "learning_rate": 2.8009478672985783e-05, |
| "loss": 0.349, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.4936061381074168, |
| "grad_norm": 0.20020466798938577, |
| "learning_rate": 2.7962085308056874e-05, |
| "loss": 0.3364, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.49616368286445, |
| "grad_norm": 0.2363047057326481, |
| "learning_rate": 2.7914691943127962e-05, |
| "loss": 0.3565, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.4987212276214834, |
| "grad_norm": 0.21956341661227455, |
| "learning_rate": 2.7867298578199053e-05, |
| "loss": 0.3377, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.5012787723785166, |
| "grad_norm": 0.2267586568563103, |
| "learning_rate": 2.781990521327014e-05, |
| "loss": 0.328, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.50383631713555, |
| "grad_norm": 0.2047581074184725, |
| "learning_rate": 2.7772511848341233e-05, |
| "loss": 0.3468, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.5063938618925832, |
| "grad_norm": 0.24688978065050932, |
| "learning_rate": 2.772511848341232e-05, |
| "loss": 0.3279, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.5089514066496164, |
| "grad_norm": 0.21201942656506023, |
| "learning_rate": 2.7677725118483415e-05, |
| "loss": 0.3435, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.5115089514066495, |
| "grad_norm": 0.21407415709345273, |
| "learning_rate": 2.7630331753554507e-05, |
| "loss": 0.3338, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.5140664961636827, |
| "grad_norm": 0.24244329625085864, |
| "learning_rate": 2.7582938388625595e-05, |
| "loss": 0.3353, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.5166240409207161, |
| "grad_norm": 0.21106578023597755, |
| "learning_rate": 2.7535545023696686e-05, |
| "loss": 0.3185, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.5191815856777495, |
| "grad_norm": 0.22046969326673913, |
| "learning_rate": 2.7488151658767774e-05, |
| "loss": 0.3405, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.5217391304347827, |
| "grad_norm": 0.22082584514229614, |
| "learning_rate": 2.7440758293838865e-05, |
| "loss": 0.3374, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.5242966751918159, |
| "grad_norm": 0.2214039800077301, |
| "learning_rate": 2.7393364928909953e-05, |
| "loss": 0.3408, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.526854219948849, |
| "grad_norm": 0.21162564133453074, |
| "learning_rate": 2.7345971563981044e-05, |
| "loss": 0.3223, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.5294117647058822, |
| "grad_norm": 0.21038119410478973, |
| "learning_rate": 2.7298578199052132e-05, |
| "loss": 0.3232, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.5319693094629157, |
| "grad_norm": 0.2232877311097297, |
| "learning_rate": 2.7251184834123224e-05, |
| "loss": 0.3569, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.5345268542199488, |
| "grad_norm": 0.21018531562144588, |
| "learning_rate": 2.720379146919431e-05, |
| "loss": 0.3572, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5370843989769822, |
| "grad_norm": 0.18432057304329444, |
| "learning_rate": 2.7156398104265403e-05, |
| "loss": 0.3239, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.5396419437340154, |
| "grad_norm": 0.23256686957170164, |
| "learning_rate": 2.7109004739336498e-05, |
| "loss": 0.327, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.5421994884910486, |
| "grad_norm": 0.2168659241371808, |
| "learning_rate": 2.7061611374407586e-05, |
| "loss": 0.3345, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.5447570332480818, |
| "grad_norm": 0.2066176620461704, |
| "learning_rate": 2.7014218009478677e-05, |
| "loss": 0.3263, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.547314578005115, |
| "grad_norm": 0.2510122104603682, |
| "learning_rate": 2.6966824644549765e-05, |
| "loss": 0.3383, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.5498721227621484, |
| "grad_norm": 0.21620946293671467, |
| "learning_rate": 2.6919431279620856e-05, |
| "loss": 0.3421, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.5524296675191815, |
| "grad_norm": 0.2374053609246905, |
| "learning_rate": 2.6872037914691944e-05, |
| "loss": 0.3395, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.554987212276215, |
| "grad_norm": 0.23310585207272871, |
| "learning_rate": 2.6824644549763032e-05, |
| "loss": 0.3328, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.5575447570332481, |
| "grad_norm": 0.21706136950371865, |
| "learning_rate": 2.6777251184834124e-05, |
| "loss": 0.3291, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.5601023017902813, |
| "grad_norm": 0.2557349212164624, |
| "learning_rate": 2.672985781990521e-05, |
| "loss": 0.3329, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.5626598465473145, |
| "grad_norm": 0.21369332563945545, |
| "learning_rate": 2.6682464454976303e-05, |
| "loss": 0.3403, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.5652173913043477, |
| "grad_norm": 0.22249413300101917, |
| "learning_rate": 2.663507109004739e-05, |
| "loss": 0.3295, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.567774936061381, |
| "grad_norm": 0.2352350138406436, |
| "learning_rate": 2.658767772511849e-05, |
| "loss": 0.3491, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.5703324808184145, |
| "grad_norm": 0.21444744975093857, |
| "learning_rate": 2.6540284360189577e-05, |
| "loss": 0.3515, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.5728900255754477, |
| "grad_norm": 0.20106535936796008, |
| "learning_rate": 2.6492890995260665e-05, |
| "loss": 0.3415, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.5754475703324808, |
| "grad_norm": 0.24355009019358553, |
| "learning_rate": 2.6445497630331756e-05, |
| "loss": 0.3382, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.578005115089514, |
| "grad_norm": 0.19632069646990316, |
| "learning_rate": 2.6398104265402844e-05, |
| "loss": 0.3344, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.5805626598465472, |
| "grad_norm": 0.2041069390497847, |
| "learning_rate": 2.6350710900473935e-05, |
| "loss": 0.3314, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.5831202046035806, |
| "grad_norm": 0.23307172936850007, |
| "learning_rate": 2.6303317535545023e-05, |
| "loss": 0.3279, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.5856777493606138, |
| "grad_norm": 0.22016471550055694, |
| "learning_rate": 2.6255924170616115e-05, |
| "loss": 0.3238, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.5882352941176472, |
| "grad_norm": 0.21161098152167546, |
| "learning_rate": 2.6208530805687203e-05, |
| "loss": 0.3382, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.5907928388746804, |
| "grad_norm": 0.23095386869319134, |
| "learning_rate": 2.6161137440758294e-05, |
| "loss": 0.3316, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.5933503836317136, |
| "grad_norm": 0.2258130781665819, |
| "learning_rate": 2.6113744075829382e-05, |
| "loss": 0.3184, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.5959079283887467, |
| "grad_norm": 0.2101743033652242, |
| "learning_rate": 2.6066350710900477e-05, |
| "loss": 0.3525, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.59846547314578, |
| "grad_norm": 0.23556388836544728, |
| "learning_rate": 2.6018957345971568e-05, |
| "loss": 0.3596, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.6010230179028133, |
| "grad_norm": 0.2173806495933601, |
| "learning_rate": 2.5971563981042656e-05, |
| "loss": 0.3367, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.6035805626598465, |
| "grad_norm": 0.21332385463283657, |
| "learning_rate": 2.5924170616113747e-05, |
| "loss": 0.3371, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.60613810741688, |
| "grad_norm": 0.20121738409593162, |
| "learning_rate": 2.5876777251184835e-05, |
| "loss": 0.3308, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.608695652173913, |
| "grad_norm": 0.22736961793911684, |
| "learning_rate": 2.5829383886255927e-05, |
| "loss": 0.3453, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.6112531969309463, |
| "grad_norm": 0.19872074468079123, |
| "learning_rate": 2.5781990521327014e-05, |
| "loss": 0.3278, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.6138107416879794, |
| "grad_norm": 0.2314685609756946, |
| "learning_rate": 2.5734597156398106e-05, |
| "loss": 0.3466, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.6163682864450126, |
| "grad_norm": 0.21359598281755646, |
| "learning_rate": 2.5687203791469194e-05, |
| "loss": 0.3568, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.618925831202046, |
| "grad_norm": 0.2410455323018816, |
| "learning_rate": 2.5639810426540285e-05, |
| "loss": 0.3212, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.6214833759590794, |
| "grad_norm": 0.253509763295898, |
| "learning_rate": 2.5592417061611373e-05, |
| "loss": 0.3589, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.6240409207161126, |
| "grad_norm": 0.22712797799055953, |
| "learning_rate": 2.5545023696682464e-05, |
| "loss": 0.3349, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.6265984654731458, |
| "grad_norm": 0.22386259809972237, |
| "learning_rate": 2.549763033175356e-05, |
| "loss": 0.3261, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.629156010230179, |
| "grad_norm": 0.2605466792154154, |
| "learning_rate": 2.5450236966824647e-05, |
| "loss": 0.3435, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.6317135549872122, |
| "grad_norm": 0.20761172721493334, |
| "learning_rate": 2.540284360189574e-05, |
| "loss": 0.3251, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.6342710997442456, |
| "grad_norm": 0.24685722210051553, |
| "learning_rate": 2.5355450236966826e-05, |
| "loss": 0.3235, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.6368286445012787, |
| "grad_norm": 0.21434302571838307, |
| "learning_rate": 2.5308056872037918e-05, |
| "loss": 0.3207, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.6393861892583121, |
| "grad_norm": 0.21184549514913412, |
| "learning_rate": 2.5260663507109006e-05, |
| "loss": 0.3238, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.6419437340153453, |
| "grad_norm": 0.21252363567202226, |
| "learning_rate": 2.5213270142180097e-05, |
| "loss": 0.323, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.6445012787723785, |
| "grad_norm": 0.21829115176694264, |
| "learning_rate": 2.5165876777251185e-05, |
| "loss": 0.3321, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.6470588235294117, |
| "grad_norm": 0.20922978833957703, |
| "learning_rate": 2.5118483412322273e-05, |
| "loss": 0.326, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.6496163682864449, |
| "grad_norm": 0.2091095674028597, |
| "learning_rate": 2.5071090047393364e-05, |
| "loss": 0.3076, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.6521739130434783, |
| "grad_norm": 0.20714939228335966, |
| "learning_rate": 2.5023696682464452e-05, |
| "loss": 0.3304, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.6547314578005117, |
| "grad_norm": 0.19116018976328764, |
| "learning_rate": 2.4976303317535547e-05, |
| "loss": 0.3274, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.6572890025575449, |
| "grad_norm": 0.19243515464801864, |
| "learning_rate": 2.4928909952606635e-05, |
| "loss": 0.3397, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.659846547314578, |
| "grad_norm": 0.22171437826424312, |
| "learning_rate": 2.4881516587677726e-05, |
| "loss": 0.3404, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.6624040920716112, |
| "grad_norm": 0.18811862144302852, |
| "learning_rate": 2.4834123222748817e-05, |
| "loss": 0.3294, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.6649616368286444, |
| "grad_norm": 0.20726034225043538, |
| "learning_rate": 2.4786729857819905e-05, |
| "loss": 0.3376, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.6675191815856778, |
| "grad_norm": 0.22641125026229106, |
| "learning_rate": 2.4739336492890997e-05, |
| "loss": 0.3315, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.670076726342711, |
| "grad_norm": 0.19760668759690572, |
| "learning_rate": 2.4691943127962085e-05, |
| "loss": 0.3484, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.6726342710997444, |
| "grad_norm": 0.2036460572936716, |
| "learning_rate": 2.4644549763033176e-05, |
| "loss": 0.3405, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.6751918158567776, |
| "grad_norm": 0.19580889345429936, |
| "learning_rate": 2.4597156398104264e-05, |
| "loss": 0.3311, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.6777493606138107, |
| "grad_norm": 0.20331485010582212, |
| "learning_rate": 2.454976303317536e-05, |
| "loss": 0.3319, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.680306905370844, |
| "grad_norm": 0.2003381154185122, |
| "learning_rate": 2.4502369668246447e-05, |
| "loss": 0.3338, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.682864450127877, |
| "grad_norm": 0.22901909585607055, |
| "learning_rate": 2.4454976303317538e-05, |
| "loss": 0.3439, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.6854219948849105, |
| "grad_norm": 0.2072701167914152, |
| "learning_rate": 2.4407582938388626e-05, |
| "loss": 0.3299, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.6879795396419437, |
| "grad_norm": 0.2156044161532469, |
| "learning_rate": 2.4360189573459717e-05, |
| "loss": 0.3356, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.690537084398977, |
| "grad_norm": 0.22960331769603365, |
| "learning_rate": 2.431279620853081e-05, |
| "loss": 0.3211, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.6930946291560103, |
| "grad_norm": 0.184836419291593, |
| "learning_rate": 2.4265402843601897e-05, |
| "loss": 0.3134, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.6956521739130435, |
| "grad_norm": 0.22152975307273395, |
| "learning_rate": 2.4218009478672988e-05, |
| "loss": 0.3556, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.6982097186700766, |
| "grad_norm": 0.27533636995577504, |
| "learning_rate": 2.4170616113744076e-05, |
| "loss": 0.333, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.7007672634271098, |
| "grad_norm": 0.20239642573133182, |
| "learning_rate": 2.4123222748815167e-05, |
| "loss": 0.3244, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.7033248081841432, |
| "grad_norm": 0.19215048920041694, |
| "learning_rate": 2.4075829383886255e-05, |
| "loss": 0.3261, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 0.21226322101300024, |
| "learning_rate": 2.402843601895735e-05, |
| "loss": 0.3357, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.7084398976982098, |
| "grad_norm": 0.22539028864743468, |
| "learning_rate": 2.3981042654028438e-05, |
| "loss": 0.3472, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.710997442455243, |
| "grad_norm": 0.23393010371109055, |
| "learning_rate": 2.3933649289099526e-05, |
| "loss": 0.3325, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.7135549872122762, |
| "grad_norm": 0.1735369909355323, |
| "learning_rate": 2.3886255924170617e-05, |
| "loss": 0.3158, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.7161125319693094, |
| "grad_norm": 0.21921508136082404, |
| "learning_rate": 2.3838862559241705e-05, |
| "loss": 0.35, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.7186700767263428, |
| "grad_norm": 0.21982061308675563, |
| "learning_rate": 2.3791469194312796e-05, |
| "loss": 0.3479, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.721227621483376, |
| "grad_norm": 0.2169093947973993, |
| "learning_rate": 2.3744075829383888e-05, |
| "loss": 0.3318, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.7237851662404093, |
| "grad_norm": 0.20360889372476712, |
| "learning_rate": 2.369668246445498e-05, |
| "loss": 0.334, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.7263427109974425, |
| "grad_norm": 0.2174062686096523, |
| "learning_rate": 2.3649289099526067e-05, |
| "loss": 0.3385, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.7289002557544757, |
| "grad_norm": 0.20684367968994177, |
| "learning_rate": 2.360189573459716e-05, |
| "loss": 0.3375, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.7314578005115089, |
| "grad_norm": 0.19965154462316637, |
| "learning_rate": 2.3554502369668246e-05, |
| "loss": 0.3253, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.734015345268542, |
| "grad_norm": 0.21474011017766587, |
| "learning_rate": 2.3507109004739338e-05, |
| "loss": 0.3382, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.7365728900255755, |
| "grad_norm": 0.22746922194428235, |
| "learning_rate": 2.345971563981043e-05, |
| "loss": 0.324, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.7391304347826086, |
| "grad_norm": 0.2104457674935215, |
| "learning_rate": 2.3412322274881517e-05, |
| "loss": 0.3392, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.741687979539642, |
| "grad_norm": 0.20283023890137888, |
| "learning_rate": 2.3364928909952608e-05, |
| "loss": 0.3291, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.7442455242966752, |
| "grad_norm": 0.21461127150907236, |
| "learning_rate": 2.3317535545023696e-05, |
| "loss": 0.3386, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.7468030690537084, |
| "grad_norm": 0.1909336493281626, |
| "learning_rate": 2.3270142180094788e-05, |
| "loss": 0.3175, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.7493606138107416, |
| "grad_norm": 0.21259577247012493, |
| "learning_rate": 2.322274881516588e-05, |
| "loss": 0.3403, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.7519181585677748, |
| "grad_norm": 0.20066175215287518, |
| "learning_rate": 2.317535545023697e-05, |
| "loss": 0.3285, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.7544757033248082, |
| "grad_norm": 0.7581076529268704, |
| "learning_rate": 2.3127962085308058e-05, |
| "loss": 0.363, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.7570332480818416, |
| "grad_norm": 0.22657067765448413, |
| "learning_rate": 2.3080568720379146e-05, |
| "loss": 0.3435, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.7595907928388748, |
| "grad_norm": 0.2217418832124222, |
| "learning_rate": 2.3033175355450237e-05, |
| "loss": 0.3197, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.762148337595908, |
| "grad_norm": 0.21459082938179172, |
| "learning_rate": 2.2985781990521325e-05, |
| "loss": 0.3505, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 0.20738951497933816, |
| "learning_rate": 2.293838862559242e-05, |
| "loss": 0.321, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.7672634271099743, |
| "grad_norm": 0.22785561899819126, |
| "learning_rate": 2.2890995260663508e-05, |
| "loss": 0.3424, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.7698209718670077, |
| "grad_norm": 0.22927074980811404, |
| "learning_rate": 2.28436018957346e-05, |
| "loss": 0.3351, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.772378516624041, |
| "grad_norm": 0.23347434762189972, |
| "learning_rate": 2.2796208530805687e-05, |
| "loss": 0.347, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.7749360613810743, |
| "grad_norm": 0.2330189859527237, |
| "learning_rate": 2.274881516587678e-05, |
| "loss": 0.3341, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.7774936061381075, |
| "grad_norm": 0.25074043381573513, |
| "learning_rate": 2.270142180094787e-05, |
| "loss": 0.3172, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.7800511508951407, |
| "grad_norm": 0.21374906832842885, |
| "learning_rate": 2.2654028436018958e-05, |
| "loss": 0.339, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.7826086956521738, |
| "grad_norm": 0.25168218613406507, |
| "learning_rate": 2.260663507109005e-05, |
| "loss": 0.3325, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.785166240409207, |
| "grad_norm": 0.2403091187285791, |
| "learning_rate": 2.2559241706161137e-05, |
| "loss": 0.3478, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.7877237851662404, |
| "grad_norm": 0.22187397630061947, |
| "learning_rate": 2.251184834123223e-05, |
| "loss": 0.3452, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.7902813299232738, |
| "grad_norm": 0.24752310282755516, |
| "learning_rate": 2.2464454976303317e-05, |
| "loss": 0.3356, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.792838874680307, |
| "grad_norm": 0.2084033534950943, |
| "learning_rate": 2.241706161137441e-05, |
| "loss": 0.3259, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.7953964194373402, |
| "grad_norm": 0.2355859217064896, |
| "learning_rate": 2.23696682464455e-05, |
| "loss": 0.3425, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.7979539641943734, |
| "grad_norm": 0.21447292569876703, |
| "learning_rate": 2.232227488151659e-05, |
| "loss": 0.3153, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.8005115089514065, |
| "grad_norm": 0.20854337096420522, |
| "learning_rate": 2.227488151658768e-05, |
| "loss": 0.3352, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.80306905370844, |
| "grad_norm": 0.22064595096377312, |
| "learning_rate": 2.2227488151658766e-05, |
| "loss": 0.3228, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.8056265984654731, |
| "grad_norm": 0.23748592354665862, |
| "learning_rate": 2.2180094786729858e-05, |
| "loss": 0.3407, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.8081841432225065, |
| "grad_norm": 0.25098201166842826, |
| "learning_rate": 2.213270142180095e-05, |
| "loss": 0.3533, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.8107416879795397, |
| "grad_norm": 0.2789258681226503, |
| "learning_rate": 2.208530805687204e-05, |
| "loss": 0.3405, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.813299232736573, |
| "grad_norm": 0.21924763977982134, |
| "learning_rate": 2.203791469194313e-05, |
| "loss": 0.3209, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.815856777493606, |
| "grad_norm": 0.24534901252195856, |
| "learning_rate": 2.199052132701422e-05, |
| "loss": 0.3228, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.8184143222506393, |
| "grad_norm": 0.23769380073414784, |
| "learning_rate": 2.1943127962085308e-05, |
| "loss": 0.3319, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.8209718670076727, |
| "grad_norm": 0.20966116422671724, |
| "learning_rate": 2.18957345971564e-05, |
| "loss": 0.3255, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.8235294117647058, |
| "grad_norm": 0.2278495662047266, |
| "learning_rate": 2.184834123222749e-05, |
| "loss": 0.3234, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.8260869565217392, |
| "grad_norm": 0.22895416972072405, |
| "learning_rate": 2.1800947867298578e-05, |
| "loss": 0.3189, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.8286445012787724, |
| "grad_norm": 0.2086902846375283, |
| "learning_rate": 2.175355450236967e-05, |
| "loss": 0.3472, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.8312020460358056, |
| "grad_norm": 0.19855684843219606, |
| "learning_rate": 2.1706161137440758e-05, |
| "loss": 0.3458, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.8337595907928388, |
| "grad_norm": 0.23552439546401155, |
| "learning_rate": 2.165876777251185e-05, |
| "loss": 0.336, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.836317135549872, |
| "grad_norm": 0.20685861123790114, |
| "learning_rate": 2.161137440758294e-05, |
| "loss": 0.336, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.8388746803069054, |
| "grad_norm": 0.19887491717386577, |
| "learning_rate": 2.156398104265403e-05, |
| "loss": 0.3396, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.8414322250639388, |
| "grad_norm": 0.2509814669536259, |
| "learning_rate": 2.151658767772512e-05, |
| "loss": 0.3441, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.843989769820972, |
| "grad_norm": 0.19522319866892376, |
| "learning_rate": 2.146919431279621e-05, |
| "loss": 0.3106, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.8465473145780051, |
| "grad_norm": 0.18974799063588516, |
| "learning_rate": 2.14218009478673e-05, |
| "loss": 0.3262, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.8491048593350383, |
| "grad_norm": 0.20477382204756456, |
| "learning_rate": 2.1374407582938387e-05, |
| "loss": 0.3429, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.8516624040920715, |
| "grad_norm": 0.2142522572136313, |
| "learning_rate": 2.132701421800948e-05, |
| "loss": 0.3308, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.854219948849105, |
| "grad_norm": 0.1979056292881532, |
| "learning_rate": 2.127962085308057e-05, |
| "loss": 0.3346, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.856777493606138, |
| "grad_norm": 0.20554303110251226, |
| "learning_rate": 2.123222748815166e-05, |
| "loss": 0.3583, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.8593350383631715, |
| "grad_norm": 0.1984154565321334, |
| "learning_rate": 2.118483412322275e-05, |
| "loss": 0.3196, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.8618925831202047, |
| "grad_norm": 0.2008595114867567, |
| "learning_rate": 2.113744075829384e-05, |
| "loss": 0.3427, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.8644501278772379, |
| "grad_norm": 0.21531627949148452, |
| "learning_rate": 2.109004739336493e-05, |
| "loss": 0.3415, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.867007672634271, |
| "grad_norm": 0.20193118670494573, |
| "learning_rate": 2.104265402843602e-05, |
| "loss": 0.3371, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.8695652173913042, |
| "grad_norm": 0.21944975819432885, |
| "learning_rate": 2.099526066350711e-05, |
| "loss": 0.3217, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.8721227621483376, |
| "grad_norm": 0.23381023417915053, |
| "learning_rate": 2.09478672985782e-05, |
| "loss": 0.3406, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.8746803069053708, |
| "grad_norm": 0.19300009053421657, |
| "learning_rate": 2.090047393364929e-05, |
| "loss": 0.3199, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.8772378516624042, |
| "grad_norm": 0.19576466530600098, |
| "learning_rate": 2.0853080568720378e-05, |
| "loss": 0.3215, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.8797953964194374, |
| "grad_norm": 0.21787819537132525, |
| "learning_rate": 2.0805687203791473e-05, |
| "loss": 0.3359, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.8823529411764706, |
| "grad_norm": 0.20623605117402122, |
| "learning_rate": 2.075829383886256e-05, |
| "loss": 0.3396, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.8849104859335037, |
| "grad_norm": 0.19807063269430017, |
| "learning_rate": 2.0710900473933652e-05, |
| "loss": 0.3321, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.887468030690537, |
| "grad_norm": 0.21340084606280826, |
| "learning_rate": 2.066350710900474e-05, |
| "loss": 0.3376, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.8900255754475703, |
| "grad_norm": 0.2117778713699439, |
| "learning_rate": 2.061611374407583e-05, |
| "loss": 0.318, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.8925831202046037, |
| "grad_norm": 0.19496876658086634, |
| "learning_rate": 2.056872037914692e-05, |
| "loss": 0.3275, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.895140664961637, |
| "grad_norm": 0.22772399554231024, |
| "learning_rate": 2.052132701421801e-05, |
| "loss": 0.3456, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.89769820971867, |
| "grad_norm": 0.19861753270620258, |
| "learning_rate": 2.0473933649289102e-05, |
| "loss": 0.3364, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.9002557544757033, |
| "grad_norm": 0.2101418258514019, |
| "learning_rate": 2.042654028436019e-05, |
| "loss": 0.3324, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.9028132992327365, |
| "grad_norm": 0.19738568484825283, |
| "learning_rate": 2.037914691943128e-05, |
| "loss": 0.3458, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.9053708439897699, |
| "grad_norm": 0.22341732627665845, |
| "learning_rate": 2.033175355450237e-05, |
| "loss": 0.3472, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.907928388746803, |
| "grad_norm": 0.20794044931146008, |
| "learning_rate": 2.028436018957346e-05, |
| "loss": 0.3367, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.9104859335038364, |
| "grad_norm": 0.20491964629174395, |
| "learning_rate": 2.0236966824644552e-05, |
| "loss": 0.3223, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.9130434782608696, |
| "grad_norm": 0.20189894495265795, |
| "learning_rate": 2.018957345971564e-05, |
| "loss": 0.3362, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.9156010230179028, |
| "grad_norm": 0.2048150503497566, |
| "learning_rate": 2.014218009478673e-05, |
| "loss": 0.3426, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.918158567774936, |
| "grad_norm": 0.21954225182865705, |
| "learning_rate": 2.009478672985782e-05, |
| "loss": 0.3528, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.9207161125319692, |
| "grad_norm": 0.22214844960655306, |
| "learning_rate": 2.004739336492891e-05, |
| "loss": 0.3491, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.9232736572890026, |
| "grad_norm": 0.2002610790388588, |
| "learning_rate": 2e-05, |
| "loss": 0.3324, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.9258312020460358, |
| "grad_norm": 0.23222016864966347, |
| "learning_rate": 1.9952606635071093e-05, |
| "loss": 0.3292, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.9283887468030692, |
| "grad_norm": 0.2207542823663722, |
| "learning_rate": 1.990521327014218e-05, |
| "loss": 0.3385, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.9309462915601023, |
| "grad_norm": 0.22749264244194325, |
| "learning_rate": 1.9857819905213272e-05, |
| "loss": 0.316, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.9335038363171355, |
| "grad_norm": 0.1977916254111309, |
| "learning_rate": 1.981042654028436e-05, |
| "loss": 0.3395, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.9360613810741687, |
| "grad_norm": 0.19556691281474403, |
| "learning_rate": 1.976303317535545e-05, |
| "loss": 0.3355, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.938618925831202, |
| "grad_norm": 0.1962514353937156, |
| "learning_rate": 1.9715639810426543e-05, |
| "loss": 0.3156, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.9411764705882353, |
| "grad_norm": 0.23567156259437158, |
| "learning_rate": 1.966824644549763e-05, |
| "loss": 0.3413, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.9437340153452687, |
| "grad_norm": 0.1980383962745943, |
| "learning_rate": 1.9620853080568722e-05, |
| "loss": 0.323, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.9462915601023019, |
| "grad_norm": 0.19505875547934262, |
| "learning_rate": 1.957345971563981e-05, |
| "loss": 0.3342, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.948849104859335, |
| "grad_norm": 0.22978204914718386, |
| "learning_rate": 1.95260663507109e-05, |
| "loss": 0.3438, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.9514066496163682, |
| "grad_norm": 0.19344201193147603, |
| "learning_rate": 1.9478672985781993e-05, |
| "loss": 0.3118, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.9539641943734014, |
| "grad_norm": 0.18582466291193162, |
| "learning_rate": 1.943127962085308e-05, |
| "loss": 0.3375, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.9565217391304348, |
| "grad_norm": 0.21401678800134463, |
| "learning_rate": 1.9383886255924172e-05, |
| "loss": 0.3384, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.959079283887468, |
| "grad_norm": 0.19342159241258478, |
| "learning_rate": 1.933649289099526e-05, |
| "loss": 0.3206, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.9616368286445014, |
| "grad_norm": 0.19399605381147378, |
| "learning_rate": 1.928909952606635e-05, |
| "loss": 0.3384, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.9641943734015346, |
| "grad_norm": 0.20148408812790133, |
| "learning_rate": 1.924170616113744e-05, |
| "loss": 0.3225, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.9667519181585678, |
| "grad_norm": 0.18715476457309554, |
| "learning_rate": 1.9194312796208534e-05, |
| "loss": 0.3288, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.969309462915601, |
| "grad_norm": 0.18815259504289839, |
| "learning_rate": 1.9146919431279622e-05, |
| "loss": 0.3142, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.9718670076726341, |
| "grad_norm": 0.20640473592890973, |
| "learning_rate": 1.9099526066350713e-05, |
| "loss": 0.3191, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.9744245524296675, |
| "grad_norm": 0.19662779268863564, |
| "learning_rate": 1.90521327014218e-05, |
| "loss": 0.3207, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.976982097186701, |
| "grad_norm": 0.2066568679986916, |
| "learning_rate": 1.9004739336492893e-05, |
| "loss": 0.3454, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.979539641943734, |
| "grad_norm": 0.20322475668496154, |
| "learning_rate": 1.895734597156398e-05, |
| "loss": 0.3234, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.9820971867007673, |
| "grad_norm": 0.19059283154521114, |
| "learning_rate": 1.8909952606635072e-05, |
| "loss": 0.3313, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.9846547314578005, |
| "grad_norm": 0.21406140221632183, |
| "learning_rate": 1.8862559241706163e-05, |
| "loss": 0.3465, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.9872122762148337, |
| "grad_norm": 0.21139212441518793, |
| "learning_rate": 1.881516587677725e-05, |
| "loss": 0.3261, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.989769820971867, |
| "grad_norm": 0.19320779992691875, |
| "learning_rate": 1.8767772511848342e-05, |
| "loss": 0.3199, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.9923273657289002, |
| "grad_norm": 0.1948553869588904, |
| "learning_rate": 1.872037914691943e-05, |
| "loss": 0.3295, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.9948849104859336, |
| "grad_norm": 0.19898631153447896, |
| "learning_rate": 1.8672985781990525e-05, |
| "loss": 0.3185, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.9974424552429668, |
| "grad_norm": 0.20500622742531077, |
| "learning_rate": 1.8625592417061613e-05, |
| "loss": 0.3367, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.18745671376713552, |
| "learning_rate": 1.85781990521327e-05, |
| "loss": 0.3035, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.002557544757033, |
| "grad_norm": 0.2785878708893465, |
| "learning_rate": 1.8530805687203792e-05, |
| "loss": 0.2615, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.0051150895140664, |
| "grad_norm": 0.20397480769360993, |
| "learning_rate": 1.848341232227488e-05, |
| "loss": 0.2434, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.0076726342710995, |
| "grad_norm": 0.2923962743620856, |
| "learning_rate": 1.843601895734597e-05, |
| "loss": 0.2478, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.010230179028133, |
| "grad_norm": 0.25689369914334176, |
| "learning_rate": 1.8388625592417063e-05, |
| "loss": 0.2448, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.0127877237851663, |
| "grad_norm": 0.23710484976355836, |
| "learning_rate": 1.8341232227488154e-05, |
| "loss": 0.257, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.0153452685421995, |
| "grad_norm": 0.29563461441097083, |
| "learning_rate": 1.8293838862559242e-05, |
| "loss": 0.2521, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.0179028132992327, |
| "grad_norm": 0.2381040612370418, |
| "learning_rate": 1.8246445497630334e-05, |
| "loss": 0.2499, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.020460358056266, |
| "grad_norm": 0.2291439129489046, |
| "learning_rate": 1.819905213270142e-05, |
| "loss": 0.2438, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.023017902813299, |
| "grad_norm": 0.28685620757378183, |
| "learning_rate": 1.8151658767772513e-05, |
| "loss": 0.2553, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.0255754475703327, |
| "grad_norm": 0.21147497245529764, |
| "learning_rate": 1.8104265402843604e-05, |
| "loss": 0.252, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.028132992327366, |
| "grad_norm": 0.22446603408981508, |
| "learning_rate": 1.8056872037914692e-05, |
| "loss": 0.2536, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.030690537084399, |
| "grad_norm": 0.24541367333886985, |
| "learning_rate": 1.8009478672985784e-05, |
| "loss": 0.2504, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.0332480818414322, |
| "grad_norm": 0.22514879404996416, |
| "learning_rate": 1.796208530805687e-05, |
| "loss": 0.2605, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.0358056265984654, |
| "grad_norm": 0.20624678594072715, |
| "learning_rate": 1.7914691943127963e-05, |
| "loss": 0.2612, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.0383631713554986, |
| "grad_norm": 0.21342231575903908, |
| "learning_rate": 1.7867298578199054e-05, |
| "loss": 0.2499, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.040920716112532, |
| "grad_norm": 0.22708020169166784, |
| "learning_rate": 1.7819905213270146e-05, |
| "loss": 0.2573, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.0434782608695654, |
| "grad_norm": 0.20671082360929366, |
| "learning_rate": 1.7772511848341233e-05, |
| "loss": 0.2517, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.0460358056265986, |
| "grad_norm": 0.20470461882320312, |
| "learning_rate": 1.772511848341232e-05, |
| "loss": 0.2441, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.0485933503836318, |
| "grad_norm": 0.20251032207130173, |
| "learning_rate": 1.7677725118483413e-05, |
| "loss": 0.2547, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.051150895140665, |
| "grad_norm": 0.20368951509303784, |
| "learning_rate": 1.76303317535545e-05, |
| "loss": 0.2439, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.053708439897698, |
| "grad_norm": 0.19922183550926562, |
| "learning_rate": 1.7582938388625595e-05, |
| "loss": 0.2401, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.0562659846547313, |
| "grad_norm": 0.21378847417361496, |
| "learning_rate": 1.7535545023696683e-05, |
| "loss": 0.2598, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.0588235294117645, |
| "grad_norm": 0.2093916676403955, |
| "learning_rate": 1.7488151658767775e-05, |
| "loss": 0.2562, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.061381074168798, |
| "grad_norm": 0.2148148853889112, |
| "learning_rate": 1.7440758293838863e-05, |
| "loss": 0.2543, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.0639386189258313, |
| "grad_norm": 0.20365914748452466, |
| "learning_rate": 1.7393364928909954e-05, |
| "loss": 0.248, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.0664961636828645, |
| "grad_norm": 0.21066398897720096, |
| "learning_rate": 1.7345971563981042e-05, |
| "loss": 0.2638, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.0690537084398977, |
| "grad_norm": 0.20804166422941303, |
| "learning_rate": 1.7298578199052133e-05, |
| "loss": 0.2542, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.071611253196931, |
| "grad_norm": 0.18674967472128892, |
| "learning_rate": 1.7251184834123225e-05, |
| "loss": 0.2405, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.074168797953964, |
| "grad_norm": 0.1906175209072609, |
| "learning_rate": 1.7203791469194313e-05, |
| "loss": 0.2342, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.0767263427109977, |
| "grad_norm": 0.2100046063283888, |
| "learning_rate": 1.7156398104265404e-05, |
| "loss": 0.2432, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.079283887468031, |
| "grad_norm": 0.1967925906674926, |
| "learning_rate": 1.7109004739336492e-05, |
| "loss": 0.2413, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.081841432225064, |
| "grad_norm": 0.1985022110628129, |
| "learning_rate": 1.7061611374407587e-05, |
| "loss": 0.2412, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.084398976982097, |
| "grad_norm": 0.2004462205861864, |
| "learning_rate": 1.7014218009478674e-05, |
| "loss": 0.2608, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.0869565217391304, |
| "grad_norm": 0.2126154513787664, |
| "learning_rate": 1.6966824644549766e-05, |
| "loss": 0.2419, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.0895140664961636, |
| "grad_norm": 0.21724682158013556, |
| "learning_rate": 1.6919431279620854e-05, |
| "loss": 0.2555, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.0920716112531967, |
| "grad_norm": 0.20957633230824144, |
| "learning_rate": 1.6872037914691942e-05, |
| "loss": 0.2434, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.0946291560102304, |
| "grad_norm": 0.1852153835527483, |
| "learning_rate": 1.6824644549763033e-05, |
| "loss": 0.2408, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.0971867007672635, |
| "grad_norm": 0.22086697836670513, |
| "learning_rate": 1.6777251184834124e-05, |
| "loss": 0.2582, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.0997442455242967, |
| "grad_norm": 0.24261708505812196, |
| "learning_rate": 1.6729857819905216e-05, |
| "loss": 0.2632, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.10230179028133, |
| "grad_norm": 0.18366952389698496, |
| "learning_rate": 1.6682464454976304e-05, |
| "loss": 0.2433, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.104859335038363, |
| "grad_norm": 0.2038172463973163, |
| "learning_rate": 1.6635071090047395e-05, |
| "loss": 0.2525, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.1074168797953963, |
| "grad_norm": 0.2012679343362801, |
| "learning_rate": 1.6587677725118483e-05, |
| "loss": 0.249, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.10997442455243, |
| "grad_norm": 0.19324190678914918, |
| "learning_rate": 1.6540284360189574e-05, |
| "loss": 0.2476, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.112531969309463, |
| "grad_norm": 0.19308515698590148, |
| "learning_rate": 1.6492890995260666e-05, |
| "loss": 0.2545, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.1150895140664963, |
| "grad_norm": 0.20072878909780828, |
| "learning_rate": 1.6445497630331754e-05, |
| "loss": 0.2493, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.1176470588235294, |
| "grad_norm": 0.21529840999791708, |
| "learning_rate": 1.6398104265402845e-05, |
| "loss": 0.2505, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.1202046035805626, |
| "grad_norm": 0.190291814924438, |
| "learning_rate": 1.6350710900473933e-05, |
| "loss": 0.2568, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.122762148337596, |
| "grad_norm": 0.1843567434491544, |
| "learning_rate": 1.6303317535545024e-05, |
| "loss": 0.235, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.125319693094629, |
| "grad_norm": 0.20192839632170334, |
| "learning_rate": 1.6255924170616116e-05, |
| "loss": 0.2518, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.1278772378516626, |
| "grad_norm": 0.19505086113061484, |
| "learning_rate": 1.6208530805687207e-05, |
| "loss": 0.2422, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.130434782608696, |
| "grad_norm": 0.18413293323513488, |
| "learning_rate": 1.6161137440758295e-05, |
| "loss": 0.2481, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.132992327365729, |
| "grad_norm": 0.19660864149905055, |
| "learning_rate": 1.6113744075829386e-05, |
| "loss": 0.2482, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.135549872122762, |
| "grad_norm": 0.19108123965299506, |
| "learning_rate": 1.6066350710900474e-05, |
| "loss": 0.2488, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.1381074168797953, |
| "grad_norm": 0.2054493861311576, |
| "learning_rate": 1.6018957345971562e-05, |
| "loss": 0.2508, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.1406649616368285, |
| "grad_norm": 0.19933140761961352, |
| "learning_rate": 1.5971563981042657e-05, |
| "loss": 0.2526, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.1432225063938617, |
| "grad_norm": 0.18520997915505424, |
| "learning_rate": 1.5924170616113745e-05, |
| "loss": 0.2553, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.1457800511508953, |
| "grad_norm": 0.18142714347687713, |
| "learning_rate": 1.5876777251184836e-05, |
| "loss": 0.2404, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.1483375959079285, |
| "grad_norm": 0.19332393510145196, |
| "learning_rate": 1.5829383886255924e-05, |
| "loss": 0.2608, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.1508951406649617, |
| "grad_norm": 0.18239849204776917, |
| "learning_rate": 1.5781990521327015e-05, |
| "loss": 0.2472, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.153452685421995, |
| "grad_norm": 0.19432247568701047, |
| "learning_rate": 1.5734597156398103e-05, |
| "loss": 0.2509, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.156010230179028, |
| "grad_norm": 0.1891425736304601, |
| "learning_rate": 1.5687203791469195e-05, |
| "loss": 0.2544, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.1585677749360612, |
| "grad_norm": 0.1776945543749591, |
| "learning_rate": 1.5639810426540286e-05, |
| "loss": 0.2418, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.1611253196930944, |
| "grad_norm": 0.19454352996860633, |
| "learning_rate": 1.5592417061611374e-05, |
| "loss": 0.2578, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.163682864450128, |
| "grad_norm": 0.19387855469120038, |
| "learning_rate": 1.5545023696682465e-05, |
| "loss": 0.2562, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.166240409207161, |
| "grad_norm": 0.1884476249381793, |
| "learning_rate": 1.5497630331753553e-05, |
| "loss": 0.2435, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.1687979539641944, |
| "grad_norm": 0.19682354969261456, |
| "learning_rate": 1.5450236966824648e-05, |
| "loss": 0.245, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.1713554987212276, |
| "grad_norm": 0.19646857607869206, |
| "learning_rate": 1.5402843601895736e-05, |
| "loss": 0.2421, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.1739130434782608, |
| "grad_norm": 0.1878274831496743, |
| "learning_rate": 1.5355450236966827e-05, |
| "loss": 0.2602, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.176470588235294, |
| "grad_norm": 0.2203759013180319, |
| "learning_rate": 1.5308056872037915e-05, |
| "loss": 0.26, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.1790281329923276, |
| "grad_norm": 0.20353045344689538, |
| "learning_rate": 1.5260663507109007e-05, |
| "loss": 0.2548, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.1815856777493607, |
| "grad_norm": 0.17917216373663247, |
| "learning_rate": 1.5213270142180094e-05, |
| "loss": 0.2481, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.184143222506394, |
| "grad_norm": 0.19965741458148648, |
| "learning_rate": 1.5165876777251187e-05, |
| "loss": 0.2369, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.186700767263427, |
| "grad_norm": 0.1983107544529902, |
| "learning_rate": 1.5118483412322275e-05, |
| "loss": 0.2649, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.1892583120204603, |
| "grad_norm": 0.18889444027577523, |
| "learning_rate": 1.5071090047393365e-05, |
| "loss": 0.2537, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.1918158567774935, |
| "grad_norm": 0.1778943432272497, |
| "learning_rate": 1.5023696682464455e-05, |
| "loss": 0.2416, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.1943734015345266, |
| "grad_norm": 0.1864614456662679, |
| "learning_rate": 1.4976303317535544e-05, |
| "loss": 0.2552, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.1969309462915603, |
| "grad_norm": 0.20406945944259086, |
| "learning_rate": 1.4928909952606634e-05, |
| "loss": 0.2445, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.1994884910485935, |
| "grad_norm": 0.19912968488704036, |
| "learning_rate": 1.4881516587677727e-05, |
| "loss": 0.2483, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.2020460358056266, |
| "grad_norm": 0.1971404080483016, |
| "learning_rate": 1.4834123222748817e-05, |
| "loss": 0.2485, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.20460358056266, |
| "grad_norm": 0.1906866495437284, |
| "learning_rate": 1.4786729857819906e-05, |
| "loss": 0.2422, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.207161125319693, |
| "grad_norm": 0.2236746863882317, |
| "learning_rate": 1.4739336492890996e-05, |
| "loss": 0.2526, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.209718670076726, |
| "grad_norm": 0.20479615550169253, |
| "learning_rate": 1.4691943127962086e-05, |
| "loss": 0.2406, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.21227621483376, |
| "grad_norm": 0.18952772024384357, |
| "learning_rate": 1.4644549763033177e-05, |
| "loss": 0.2473, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.214833759590793, |
| "grad_norm": 0.21288572261909536, |
| "learning_rate": 1.4597156398104267e-05, |
| "loss": 0.2421, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.217391304347826, |
| "grad_norm": 0.22140557077938572, |
| "learning_rate": 1.4549763033175356e-05, |
| "loss": 0.2475, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.2199488491048593, |
| "grad_norm": 0.20708774144192757, |
| "learning_rate": 1.4502369668246446e-05, |
| "loss": 0.2673, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.2225063938618925, |
| "grad_norm": 0.18720660014130933, |
| "learning_rate": 1.4454976303317535e-05, |
| "loss": 0.247, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.2250639386189257, |
| "grad_norm": 0.22218057616305048, |
| "learning_rate": 1.4407582938388625e-05, |
| "loss": 0.2563, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.227621483375959, |
| "grad_norm": 0.19461791848551566, |
| "learning_rate": 1.4360189573459718e-05, |
| "loss": 0.2411, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.2301790281329925, |
| "grad_norm": 0.18465999777437872, |
| "learning_rate": 1.4312796208530808e-05, |
| "loss": 0.2436, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.2327365728900257, |
| "grad_norm": 0.1869706742832914, |
| "learning_rate": 1.4265402843601896e-05, |
| "loss": 0.2468, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.235294117647059, |
| "grad_norm": 0.19859678673304443, |
| "learning_rate": 1.4218009478672985e-05, |
| "loss": 0.2629, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.237851662404092, |
| "grad_norm": 0.18894735140342547, |
| "learning_rate": 1.4170616113744075e-05, |
| "loss": 0.2463, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.2404092071611252, |
| "grad_norm": 0.1841073857339832, |
| "learning_rate": 1.4123222748815165e-05, |
| "loss": 0.2443, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.2429667519181584, |
| "grad_norm": 0.19766216004613152, |
| "learning_rate": 1.4075829383886258e-05, |
| "loss": 0.2445, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.2455242966751916, |
| "grad_norm": 0.20409991732668273, |
| "learning_rate": 1.4028436018957347e-05, |
| "loss": 0.2708, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.2480818414322252, |
| "grad_norm": 0.19977719707950095, |
| "learning_rate": 1.3981042654028437e-05, |
| "loss": 0.2518, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.2506393861892584, |
| "grad_norm": 0.2053796828512668, |
| "learning_rate": 1.3933649289099527e-05, |
| "loss": 0.2495, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.2531969309462916, |
| "grad_norm": 0.17832792645098117, |
| "learning_rate": 1.3886255924170616e-05, |
| "loss": 0.2556, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.2557544757033248, |
| "grad_norm": 0.18840256764724986, |
| "learning_rate": 1.3838862559241708e-05, |
| "loss": 0.2451, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.258312020460358, |
| "grad_norm": 0.19398836581670234, |
| "learning_rate": 1.3791469194312797e-05, |
| "loss": 0.2473, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.260869565217391, |
| "grad_norm": 0.20303902146790734, |
| "learning_rate": 1.3744075829383887e-05, |
| "loss": 0.2597, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.2634271099744243, |
| "grad_norm": 0.18720894136927904, |
| "learning_rate": 1.3696682464454977e-05, |
| "loss": 0.2434, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.265984654731458, |
| "grad_norm": 0.18987210304857877, |
| "learning_rate": 1.3649289099526066e-05, |
| "loss": 0.2525, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.268542199488491, |
| "grad_norm": 0.2048273825139193, |
| "learning_rate": 1.3601895734597156e-05, |
| "loss": 0.2455, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.2710997442455243, |
| "grad_norm": 0.19576486403594287, |
| "learning_rate": 1.3554502369668249e-05, |
| "loss": 0.2613, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.2736572890025575, |
| "grad_norm": 0.20157435141172714, |
| "learning_rate": 1.3507109004739339e-05, |
| "loss": 0.2537, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.2762148337595907, |
| "grad_norm": 0.18228152643827863, |
| "learning_rate": 1.3459715639810428e-05, |
| "loss": 0.2513, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.2787723785166243, |
| "grad_norm": 0.19555632064091633, |
| "learning_rate": 1.3412322274881516e-05, |
| "loss": 0.2586, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.2813299232736575, |
| "grad_norm": 0.203816174533527, |
| "learning_rate": 1.3364928909952606e-05, |
| "loss": 0.2442, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.2838874680306906, |
| "grad_norm": 0.2029139098001244, |
| "learning_rate": 1.3317535545023695e-05, |
| "loss": 0.2561, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.286445012787724, |
| "grad_norm": 0.19048244262223243, |
| "learning_rate": 1.3270142180094788e-05, |
| "loss": 0.2548, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.289002557544757, |
| "grad_norm": 0.19391847669904372, |
| "learning_rate": 1.3222748815165878e-05, |
| "loss": 0.2421, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.29156010230179, |
| "grad_norm": 0.17981132307135597, |
| "learning_rate": 1.3175355450236968e-05, |
| "loss": 0.2532, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.2941176470588234, |
| "grad_norm": 0.17858235830519362, |
| "learning_rate": 1.3127962085308057e-05, |
| "loss": 0.2404, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.296675191815857, |
| "grad_norm": 0.19117496497677566, |
| "learning_rate": 1.3080568720379147e-05, |
| "loss": 0.2593, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.29923273657289, |
| "grad_norm": 0.2016899881448073, |
| "learning_rate": 1.3033175355450238e-05, |
| "loss": 0.2528, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.3017902813299234, |
| "grad_norm": 0.1810650437144312, |
| "learning_rate": 1.2985781990521328e-05, |
| "loss": 0.2402, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.3043478260869565, |
| "grad_norm": 0.19291047749671594, |
| "learning_rate": 1.2938388625592418e-05, |
| "loss": 0.2461, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.3069053708439897, |
| "grad_norm": 0.1939959846671169, |
| "learning_rate": 1.2890995260663507e-05, |
| "loss": 0.2473, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.309462915601023, |
| "grad_norm": 0.1863110176956983, |
| "learning_rate": 1.2843601895734597e-05, |
| "loss": 0.2364, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.312020460358056, |
| "grad_norm": 0.17566806664980533, |
| "learning_rate": 1.2796208530805687e-05, |
| "loss": 0.2482, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.3145780051150897, |
| "grad_norm": 0.19920352232738897, |
| "learning_rate": 1.274881516587678e-05, |
| "loss": 0.2559, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.317135549872123, |
| "grad_norm": 0.1953502116868402, |
| "learning_rate": 1.270142180094787e-05, |
| "loss": 0.2408, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.319693094629156, |
| "grad_norm": 0.18651854725906564, |
| "learning_rate": 1.2654028436018959e-05, |
| "loss": 0.2523, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.3222506393861893, |
| "grad_norm": 0.1894806065906189, |
| "learning_rate": 1.2606635071090048e-05, |
| "loss": 0.2651, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.3248081841432224, |
| "grad_norm": 0.18839186702018404, |
| "learning_rate": 1.2559241706161136e-05, |
| "loss": 0.2474, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.3273657289002556, |
| "grad_norm": 0.19140520747243725, |
| "learning_rate": 1.2511848341232226e-05, |
| "loss": 0.2393, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.329923273657289, |
| "grad_norm": 0.18330215327131463, |
| "learning_rate": 1.2464454976303317e-05, |
| "loss": 0.2528, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.3324808184143224, |
| "grad_norm": 0.1932126436646379, |
| "learning_rate": 1.2417061611374409e-05, |
| "loss": 0.2565, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.3350383631713556, |
| "grad_norm": 0.1950356336934161, |
| "learning_rate": 1.2369668246445498e-05, |
| "loss": 0.2457, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.337595907928389, |
| "grad_norm": 0.17865872468905974, |
| "learning_rate": 1.2322274881516588e-05, |
| "loss": 0.2425, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.340153452685422, |
| "grad_norm": 0.18504654975711932, |
| "learning_rate": 1.227488151658768e-05, |
| "loss": 0.2577, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.342710997442455, |
| "grad_norm": 0.20222565063208944, |
| "learning_rate": 1.2227488151658769e-05, |
| "loss": 0.2581, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.3452685421994883, |
| "grad_norm": 0.1838472381815511, |
| "learning_rate": 1.2180094786729859e-05, |
| "loss": 0.2542, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.3478260869565215, |
| "grad_norm": 0.18346333495631081, |
| "learning_rate": 1.2132701421800948e-05, |
| "loss": 0.2366, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.350383631713555, |
| "grad_norm": 0.18792845931699567, |
| "learning_rate": 1.2085308056872038e-05, |
| "loss": 0.2397, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.3529411764705883, |
| "grad_norm": 0.18552873313226068, |
| "learning_rate": 1.2037914691943128e-05, |
| "loss": 0.2476, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.3554987212276215, |
| "grad_norm": 0.18568764961833162, |
| "learning_rate": 1.1990521327014219e-05, |
| "loss": 0.2464, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.3580562659846547, |
| "grad_norm": 0.19910274628884306, |
| "learning_rate": 1.1943127962085309e-05, |
| "loss": 0.2616, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.360613810741688, |
| "grad_norm": 0.19023555512921334, |
| "learning_rate": 1.1895734597156398e-05, |
| "loss": 0.2468, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.363171355498721, |
| "grad_norm": 0.18498994847552913, |
| "learning_rate": 1.184834123222749e-05, |
| "loss": 0.238, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.3657289002557547, |
| "grad_norm": 0.19230699836861417, |
| "learning_rate": 1.180094786729858e-05, |
| "loss": 0.2499, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.368286445012788, |
| "grad_norm": 0.18992356411703937, |
| "learning_rate": 1.1753554502369669e-05, |
| "loss": 0.2486, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.370843989769821, |
| "grad_norm": 0.194179037810583, |
| "learning_rate": 1.1706161137440758e-05, |
| "loss": 0.2419, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.373401534526854, |
| "grad_norm": 0.19309339760618768, |
| "learning_rate": 1.1658767772511848e-05, |
| "loss": 0.2498, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.3759590792838874, |
| "grad_norm": 0.18311942664171635, |
| "learning_rate": 1.161137440758294e-05, |
| "loss": 0.2508, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.3785166240409206, |
| "grad_norm": 0.1851345033868292, |
| "learning_rate": 1.1563981042654029e-05, |
| "loss": 0.2458, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.381074168797954, |
| "grad_norm": 0.20539574184587675, |
| "learning_rate": 1.1516587677725119e-05, |
| "loss": 0.2535, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.3836317135549874, |
| "grad_norm": 0.1892100521466075, |
| "learning_rate": 1.146919431279621e-05, |
| "loss": 0.2429, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.3861892583120206, |
| "grad_norm": 0.18603312629992957, |
| "learning_rate": 1.14218009478673e-05, |
| "loss": 0.2509, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.3887468030690537, |
| "grad_norm": 0.19500218393174892, |
| "learning_rate": 1.137440758293839e-05, |
| "loss": 0.2566, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.391304347826087, |
| "grad_norm": 0.1826094688821734, |
| "learning_rate": 1.1327014218009479e-05, |
| "loss": 0.2463, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.39386189258312, |
| "grad_norm": 0.2035505913630252, |
| "learning_rate": 1.1279620853080569e-05, |
| "loss": 0.2453, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.3964194373401533, |
| "grad_norm": 0.1986182294740978, |
| "learning_rate": 1.1232227488151658e-05, |
| "loss": 0.2558, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.398976982097187, |
| "grad_norm": 0.19707252280447218, |
| "learning_rate": 1.118483412322275e-05, |
| "loss": 0.2434, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.40153452685422, |
| "grad_norm": 0.18477355859208972, |
| "learning_rate": 1.113744075829384e-05, |
| "loss": 0.2593, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.4040920716112533, |
| "grad_norm": 0.18942958453392783, |
| "learning_rate": 1.1090047393364929e-05, |
| "loss": 0.2356, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.4066496163682864, |
| "grad_norm": 0.19115020121012594, |
| "learning_rate": 1.104265402843602e-05, |
| "loss": 0.2586, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.4092071611253196, |
| "grad_norm": 0.1907623500410302, |
| "learning_rate": 1.099526066350711e-05, |
| "loss": 0.2466, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.411764705882353, |
| "grad_norm": 0.20551739715864323, |
| "learning_rate": 1.09478672985782e-05, |
| "loss": 0.2641, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.414322250639386, |
| "grad_norm": 0.2037628239144751, |
| "learning_rate": 1.0900473933649289e-05, |
| "loss": 0.2581, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.4168797953964196, |
| "grad_norm": 0.20606502494179982, |
| "learning_rate": 1.0853080568720379e-05, |
| "loss": 0.2455, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.419437340153453, |
| "grad_norm": 0.19957207626288198, |
| "learning_rate": 1.080568720379147e-05, |
| "loss": 0.2417, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.421994884910486, |
| "grad_norm": 0.19178247803581283, |
| "learning_rate": 1.075829383886256e-05, |
| "loss": 0.2491, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.424552429667519, |
| "grad_norm": 0.1829585466891497, |
| "learning_rate": 1.071090047393365e-05, |
| "loss": 0.2622, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.4271099744245523, |
| "grad_norm": 0.19009770566404205, |
| "learning_rate": 1.066350710900474e-05, |
| "loss": 0.2472, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.4296675191815855, |
| "grad_norm": 0.18826570727560837, |
| "learning_rate": 1.061611374407583e-05, |
| "loss": 0.2435, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.4322250639386187, |
| "grad_norm": 0.18359777168223823, |
| "learning_rate": 1.056872037914692e-05, |
| "loss": 0.2563, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.4347826086956523, |
| "grad_norm": 0.18743774264051472, |
| "learning_rate": 1.052132701421801e-05, |
| "loss": 0.2501, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.4373401534526855, |
| "grad_norm": 0.18190848955579414, |
| "learning_rate": 1.04739336492891e-05, |
| "loss": 0.2419, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.4398976982097187, |
| "grad_norm": 0.1869056453658558, |
| "learning_rate": 1.0426540284360189e-05, |
| "loss": 0.2468, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.442455242966752, |
| "grad_norm": 0.19325837584457362, |
| "learning_rate": 1.037914691943128e-05, |
| "loss": 0.2398, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.445012787723785, |
| "grad_norm": 0.18958397138054392, |
| "learning_rate": 1.033175355450237e-05, |
| "loss": 0.2587, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.4475703324808182, |
| "grad_norm": 0.1802569001638857, |
| "learning_rate": 1.028436018957346e-05, |
| "loss": 0.249, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.4501278772378514, |
| "grad_norm": 0.19473776964299236, |
| "learning_rate": 1.0236966824644551e-05, |
| "loss": 0.2504, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.452685421994885, |
| "grad_norm": 0.19318565328468898, |
| "learning_rate": 1.018957345971564e-05, |
| "loss": 0.2504, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.455242966751918, |
| "grad_norm": 0.1935892471549821, |
| "learning_rate": 1.014218009478673e-05, |
| "loss": 0.2608, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.4578005115089514, |
| "grad_norm": 0.2128184199845009, |
| "learning_rate": 1.009478672985782e-05, |
| "loss": 0.2547, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.4603580562659846, |
| "grad_norm": 0.1894940142447489, |
| "learning_rate": 1.004739336492891e-05, |
| "loss": 0.2654, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.4629156010230178, |
| "grad_norm": 0.18093993857309348, |
| "learning_rate": 1e-05, |
| "loss": 0.2214, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.4654731457800514, |
| "grad_norm": 0.19178096580173365, |
| "learning_rate": 9.95260663507109e-06, |
| "loss": 0.2428, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.4680306905370846, |
| "grad_norm": 0.17992616710306839, |
| "learning_rate": 9.90521327014218e-06, |
| "loss": 0.2316, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.4705882352941178, |
| "grad_norm": 0.19203487435465688, |
| "learning_rate": 9.857819905213271e-06, |
| "loss": 0.2461, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.473145780051151, |
| "grad_norm": 0.18682427737935345, |
| "learning_rate": 9.810426540284361e-06, |
| "loss": 0.2546, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.475703324808184, |
| "grad_norm": 0.18859469892005637, |
| "learning_rate": 9.76303317535545e-06, |
| "loss": 0.2622, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.4782608695652173, |
| "grad_norm": 0.18438885948965986, |
| "learning_rate": 9.71563981042654e-06, |
| "loss": 0.2545, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.4808184143222505, |
| "grad_norm": 0.19779141574116138, |
| "learning_rate": 9.66824644549763e-06, |
| "loss": 0.244, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.483375959079284, |
| "grad_norm": 0.19565635148681754, |
| "learning_rate": 9.62085308056872e-06, |
| "loss": 0.2659, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.4859335038363173, |
| "grad_norm": 0.17628127887779274, |
| "learning_rate": 9.573459715639811e-06, |
| "loss": 0.2445, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.4884910485933505, |
| "grad_norm": 0.1800315251692981, |
| "learning_rate": 9.5260663507109e-06, |
| "loss": 0.2524, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.4910485933503836, |
| "grad_norm": 0.1863667327196091, |
| "learning_rate": 9.47867298578199e-06, |
| "loss": 0.2555, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.493606138107417, |
| "grad_norm": 0.19474788386072336, |
| "learning_rate": 9.431279620853082e-06, |
| "loss": 0.2607, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.49616368286445, |
| "grad_norm": 0.18695877540681222, |
| "learning_rate": 9.383886255924171e-06, |
| "loss": 0.2594, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.498721227621483, |
| "grad_norm": 0.18123819527715856, |
| "learning_rate": 9.336492890995263e-06, |
| "loss": 0.2388, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.501278772378517, |
| "grad_norm": 0.1805022447990822, |
| "learning_rate": 9.28909952606635e-06, |
| "loss": 0.2611, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.50383631713555, |
| "grad_norm": 0.20725044894441963, |
| "learning_rate": 9.24170616113744e-06, |
| "loss": 0.2597, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.506393861892583, |
| "grad_norm": 0.17978028465292306, |
| "learning_rate": 9.194312796208532e-06, |
| "loss": 0.2546, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.5089514066496164, |
| "grad_norm": 0.19895373521772294, |
| "learning_rate": 9.146919431279621e-06, |
| "loss": 0.2592, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.5115089514066495, |
| "grad_norm": 0.1908106662263474, |
| "learning_rate": 9.09952606635071e-06, |
| "loss": 0.2617, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.5140664961636827, |
| "grad_norm": 0.17851227882118903, |
| "learning_rate": 9.052132701421802e-06, |
| "loss": 0.2438, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.516624040920716, |
| "grad_norm": 0.18752114855298738, |
| "learning_rate": 9.004739336492892e-06, |
| "loss": 0.2443, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.5191815856777495, |
| "grad_norm": 0.2066530632997492, |
| "learning_rate": 8.957345971563981e-06, |
| "loss": 0.2518, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.5217391304347827, |
| "grad_norm": 0.17903017399321067, |
| "learning_rate": 8.909952606635073e-06, |
| "loss": 0.2551, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.524296675191816, |
| "grad_norm": 0.1813455062016161, |
| "learning_rate": 8.86255924170616e-06, |
| "loss": 0.2477, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.526854219948849, |
| "grad_norm": 0.19991953255257042, |
| "learning_rate": 8.81516587677725e-06, |
| "loss": 0.2518, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.5294117647058822, |
| "grad_norm": 0.19321012124999268, |
| "learning_rate": 8.767772511848342e-06, |
| "loss": 0.2586, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.531969309462916, |
| "grad_norm": 0.17877646902912023, |
| "learning_rate": 8.720379146919431e-06, |
| "loss": 0.2439, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.5345268542199486, |
| "grad_norm": 0.18016347358333068, |
| "learning_rate": 8.672985781990521e-06, |
| "loss": 0.2504, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.5370843989769822, |
| "grad_norm": 0.19598330654437415, |
| "learning_rate": 8.625592417061612e-06, |
| "loss": 0.2402, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.5396419437340154, |
| "grad_norm": 0.19085013146167623, |
| "learning_rate": 8.578199052132702e-06, |
| "loss": 0.2518, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.5421994884910486, |
| "grad_norm": 0.1835210269356536, |
| "learning_rate": 8.530805687203793e-06, |
| "loss": 0.2457, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.544757033248082, |
| "grad_norm": 0.1808657315125499, |
| "learning_rate": 8.483412322274883e-06, |
| "loss": 0.2581, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.547314578005115, |
| "grad_norm": 0.17348100084546994, |
| "learning_rate": 8.436018957345971e-06, |
| "loss": 0.2361, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.5498721227621486, |
| "grad_norm": 0.18526137266320347, |
| "learning_rate": 8.388625592417062e-06, |
| "loss": 0.2518, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.5524296675191813, |
| "grad_norm": 0.1866598354500443, |
| "learning_rate": 8.341232227488152e-06, |
| "loss": 0.2489, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.554987212276215, |
| "grad_norm": 0.1866669094430096, |
| "learning_rate": 8.293838862559241e-06, |
| "loss": 0.2612, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.557544757033248, |
| "grad_norm": 0.20188335606638064, |
| "learning_rate": 8.246445497630333e-06, |
| "loss": 0.2394, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.5601023017902813, |
| "grad_norm": 0.17491742752344783, |
| "learning_rate": 8.199052132701422e-06, |
| "loss": 0.2377, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.5626598465473145, |
| "grad_norm": 0.17572988640896128, |
| "learning_rate": 8.151658767772512e-06, |
| "loss": 0.2523, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.5652173913043477, |
| "grad_norm": 0.1850486906047413, |
| "learning_rate": 8.104265402843603e-06, |
| "loss": 0.2604, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.5677749360613813, |
| "grad_norm": 0.18456694735716317, |
| "learning_rate": 8.056872037914693e-06, |
| "loss": 0.2579, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.5703324808184145, |
| "grad_norm": 0.1989825021126641, |
| "learning_rate": 8.009478672985781e-06, |
| "loss": 0.2456, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.5728900255754477, |
| "grad_norm": 0.19392351458658866, |
| "learning_rate": 7.962085308056872e-06, |
| "loss": 0.2542, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.575447570332481, |
| "grad_norm": 0.1803390415874974, |
| "learning_rate": 7.914691943127962e-06, |
| "loss": 0.243, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.578005115089514, |
| "grad_norm": 0.18345024591378195, |
| "learning_rate": 7.867298578199052e-06, |
| "loss": 0.2451, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.580562659846547, |
| "grad_norm": 0.1941629539774514, |
| "learning_rate": 7.819905213270143e-06, |
| "loss": 0.2484, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.5831202046035804, |
| "grad_norm": 0.20207081751890732, |
| "learning_rate": 7.772511848341233e-06, |
| "loss": 0.2562, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.585677749360614, |
| "grad_norm": 0.18062688142042024, |
| "learning_rate": 7.725118483412324e-06, |
| "loss": 0.2454, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.588235294117647, |
| "grad_norm": 0.18172987412926497, |
| "learning_rate": 7.677725118483414e-06, |
| "loss": 0.258, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.5907928388746804, |
| "grad_norm": 0.1910447725518475, |
| "learning_rate": 7.630331753554503e-06, |
| "loss": 0.2547, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.5933503836317136, |
| "grad_norm": 0.18183009657939525, |
| "learning_rate": 7.582938388625594e-06, |
| "loss": 0.2477, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.5959079283887467, |
| "grad_norm": 0.19084392574095072, |
| "learning_rate": 7.5355450236966825e-06, |
| "loss": 0.2535, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.59846547314578, |
| "grad_norm": 0.19660855958741716, |
| "learning_rate": 7.488151658767772e-06, |
| "loss": 0.2542, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.601023017902813, |
| "grad_norm": 0.19119145619102845, |
| "learning_rate": 7.4407582938388635e-06, |
| "loss": 0.2652, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.6035805626598467, |
| "grad_norm": 0.18403920655569364, |
| "learning_rate": 7.393364928909953e-06, |
| "loss": 0.2507, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.60613810741688, |
| "grad_norm": 0.18051231326303438, |
| "learning_rate": 7.345971563981043e-06, |
| "loss": 0.2457, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.608695652173913, |
| "grad_norm": 0.18075516190798283, |
| "learning_rate": 7.298578199052133e-06, |
| "loss": 0.252, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.6112531969309463, |
| "grad_norm": 0.1788096745482508, |
| "learning_rate": 7.251184834123223e-06, |
| "loss": 0.2436, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.6138107416879794, |
| "grad_norm": 0.1824092891963844, |
| "learning_rate": 7.2037914691943126e-06, |
| "loss": 0.2432, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.6163682864450126, |
| "grad_norm": 0.16862388714463997, |
| "learning_rate": 7.156398104265404e-06, |
| "loss": 0.2432, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.618925831202046, |
| "grad_norm": 0.17677820353677443, |
| "learning_rate": 7.109004739336493e-06, |
| "loss": 0.2454, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.6214833759590794, |
| "grad_norm": 0.1749578021536912, |
| "learning_rate": 7.061611374407582e-06, |
| "loss": 0.2516, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.6240409207161126, |
| "grad_norm": 0.1746709607811344, |
| "learning_rate": 7.014218009478674e-06, |
| "loss": 0.2393, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.626598465473146, |
| "grad_norm": 0.1774898930232003, |
| "learning_rate": 6.966824644549763e-06, |
| "loss": 0.2488, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.629156010230179, |
| "grad_norm": 0.17292145541011766, |
| "learning_rate": 6.919431279620854e-06, |
| "loss": 0.2439, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.631713554987212, |
| "grad_norm": 0.25017047237469586, |
| "learning_rate": 6.8720379146919435e-06, |
| "loss": 0.2666, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.634271099744246, |
| "grad_norm": 0.1802705434620767, |
| "learning_rate": 6.824644549763033e-06, |
| "loss": 0.2611, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.6368286445012785, |
| "grad_norm": 0.18448765710220488, |
| "learning_rate": 6.7772511848341244e-06, |
| "loss": 0.2407, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.639386189258312, |
| "grad_norm": 0.1740367294783914, |
| "learning_rate": 6.729857819905214e-06, |
| "loss": 0.2459, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.6419437340153453, |
| "grad_norm": 0.17677782819853, |
| "learning_rate": 6.682464454976303e-06, |
| "loss": 0.2531, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.6445012787723785, |
| "grad_norm": 0.18226239340272907, |
| "learning_rate": 6.635071090047394e-06, |
| "loss": 0.2488, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.6470588235294117, |
| "grad_norm": 0.17405875174384855, |
| "learning_rate": 6.587677725118484e-06, |
| "loss": 0.2445, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.649616368286445, |
| "grad_norm": 0.1776309384953249, |
| "learning_rate": 6.5402843601895735e-06, |
| "loss": 0.2401, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.6521739130434785, |
| "grad_norm": 0.18334105397117928, |
| "learning_rate": 6.492890995260664e-06, |
| "loss": 0.2503, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.6547314578005117, |
| "grad_norm": 0.17986542354916346, |
| "learning_rate": 6.445497630331754e-06, |
| "loss": 0.2489, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.657289002557545, |
| "grad_norm": 0.17690617151115767, |
| "learning_rate": 6.398104265402843e-06, |
| "loss": 0.2403, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.659846547314578, |
| "grad_norm": 0.18280012481225613, |
| "learning_rate": 6.350710900473935e-06, |
| "loss": 0.2498, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.662404092071611, |
| "grad_norm": 0.17506102024381995, |
| "learning_rate": 6.303317535545024e-06, |
| "loss": 0.2308, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.6649616368286444, |
| "grad_norm": 0.18790705934428378, |
| "learning_rate": 6.255924170616113e-06, |
| "loss": 0.2529, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.6675191815856776, |
| "grad_norm": 0.1892712596775323, |
| "learning_rate": 6.208530805687204e-06, |
| "loss": 0.2432, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.670076726342711, |
| "grad_norm": 0.19005999786944971, |
| "learning_rate": 6.161137440758294e-06, |
| "loss": 0.2423, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.6726342710997444, |
| "grad_norm": 0.1845872169401998, |
| "learning_rate": 6.1137440758293845e-06, |
| "loss": 0.2593, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.6751918158567776, |
| "grad_norm": 0.18704678458411442, |
| "learning_rate": 6.066350710900474e-06, |
| "loss": 0.2391, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.6777493606138107, |
| "grad_norm": 0.17913018163417851, |
| "learning_rate": 6.018957345971564e-06, |
| "loss": 0.2405, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.680306905370844, |
| "grad_norm": 0.19472560672322844, |
| "learning_rate": 5.971563981042654e-06, |
| "loss": 0.2505, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.682864450127877, |
| "grad_norm": 0.18019457992632396, |
| "learning_rate": 5.924170616113745e-06, |
| "loss": 0.24, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.6854219948849103, |
| "grad_norm": 0.17330920592908153, |
| "learning_rate": 5.876777251184834e-06, |
| "loss": 0.2544, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.687979539641944, |
| "grad_norm": 0.1815334540303024, |
| "learning_rate": 5.829383886255924e-06, |
| "loss": 0.2466, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.690537084398977, |
| "grad_norm": 0.19496083605348435, |
| "learning_rate": 5.7819905213270145e-06, |
| "loss": 0.252, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.6930946291560103, |
| "grad_norm": 0.19241164389164786, |
| "learning_rate": 5.734597156398105e-06, |
| "loss": 0.2504, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.6956521739130435, |
| "grad_norm": 0.45538670179365104, |
| "learning_rate": 5.687203791469195e-06, |
| "loss": 0.26, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.6982097186700766, |
| "grad_norm": 0.18008218699297715, |
| "learning_rate": 5.639810426540284e-06, |
| "loss": 0.2493, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.70076726342711, |
| "grad_norm": 0.18340270921249122, |
| "learning_rate": 5.592417061611375e-06, |
| "loss": 0.237, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.703324808184143, |
| "grad_norm": 0.22199762048479815, |
| "learning_rate": 5.5450236966824644e-06, |
| "loss": 0.2509, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.7058823529411766, |
| "grad_norm": 0.1761889836482758, |
| "learning_rate": 5.497630331753555e-06, |
| "loss": 0.2502, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.70843989769821, |
| "grad_norm": 0.1788757958818801, |
| "learning_rate": 5.4502369668246446e-06, |
| "loss": 0.2548, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.710997442455243, |
| "grad_norm": 0.17679336863109477, |
| "learning_rate": 5.402843601895735e-06, |
| "loss": 0.2511, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.713554987212276, |
| "grad_norm": 0.18320215458542266, |
| "learning_rate": 5.355450236966825e-06, |
| "loss": 0.2394, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.7161125319693094, |
| "grad_norm": 0.1954816098192057, |
| "learning_rate": 5.308056872037915e-06, |
| "loss": 0.2686, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.718670076726343, |
| "grad_norm": 0.17601162272580267, |
| "learning_rate": 5.260663507109005e-06, |
| "loss": 0.2428, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.7212276214833757, |
| "grad_norm": 0.17667083168107164, |
| "learning_rate": 5.2132701421800945e-06, |
| "loss": 0.2538, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.7237851662404093, |
| "grad_norm": 0.17964593927923134, |
| "learning_rate": 5.165876777251185e-06, |
| "loss": 0.2515, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.7263427109974425, |
| "grad_norm": 0.18415215339401061, |
| "learning_rate": 5.1184834123222755e-06, |
| "loss": 0.2467, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.7289002557544757, |
| "grad_norm": 0.18206983493291928, |
| "learning_rate": 5.071090047393365e-06, |
| "loss": 0.2505, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.731457800511509, |
| "grad_norm": 0.17702957807117964, |
| "learning_rate": 5.023696682464455e-06, |
| "loss": 0.2481, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.734015345268542, |
| "grad_norm": 0.18941559797605062, |
| "learning_rate": 4.976303317535545e-06, |
| "loss": 0.2515, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.7365728900255757, |
| "grad_norm": 0.17866021653822645, |
| "learning_rate": 4.928909952606636e-06, |
| "loss": 0.2573, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.7391304347826084, |
| "grad_norm": 0.1771695946103805, |
| "learning_rate": 4.881516587677725e-06, |
| "loss": 0.2468, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.741687979539642, |
| "grad_norm": 0.18877889489990468, |
| "learning_rate": 4.834123222748815e-06, |
| "loss": 0.2513, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.7442455242966752, |
| "grad_norm": 0.18206987667415467, |
| "learning_rate": 4.7867298578199055e-06, |
| "loss": 0.261, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.7468030690537084, |
| "grad_norm": 0.17701923743513961, |
| "learning_rate": 4.739336492890995e-06, |
| "loss": 0.2382, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.7493606138107416, |
| "grad_norm": 0.17540739260356206, |
| "learning_rate": 4.691943127962086e-06, |
| "loss": 0.2252, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.7519181585677748, |
| "grad_norm": 0.18630964353133092, |
| "learning_rate": 4.644549763033175e-06, |
| "loss": 0.2641, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.7544757033248084, |
| "grad_norm": 0.18014938799060165, |
| "learning_rate": 4.597156398104266e-06, |
| "loss": 0.2474, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.7570332480818416, |
| "grad_norm": 0.17307796418005664, |
| "learning_rate": 4.549763033175355e-06, |
| "loss": 0.2494, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.7595907928388748, |
| "grad_norm": 0.17931809836460505, |
| "learning_rate": 4.502369668246446e-06, |
| "loss": 0.2489, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.762148337595908, |
| "grad_norm": 0.18594644827320997, |
| "learning_rate": 4.454976303317536e-06, |
| "loss": 0.2592, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.764705882352941, |
| "grad_norm": 0.1761943169656133, |
| "learning_rate": 4.407582938388625e-06, |
| "loss": 0.2478, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.7672634271099743, |
| "grad_norm": 0.173916317774858, |
| "learning_rate": 4.360189573459716e-06, |
| "loss": 0.247, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.7698209718670075, |
| "grad_norm": 0.17873537641991927, |
| "learning_rate": 4.312796208530806e-06, |
| "loss": 0.2506, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.772378516624041, |
| "grad_norm": 0.16911433381467955, |
| "learning_rate": 4.265402843601897e-06, |
| "loss": 0.2508, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.7749360613810743, |
| "grad_norm": 0.1791375462513097, |
| "learning_rate": 4.2180094786729854e-06, |
| "loss": 0.2388, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.7774936061381075, |
| "grad_norm": 0.1784797467796097, |
| "learning_rate": 4.170616113744076e-06, |
| "loss": 0.2572, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.7800511508951407, |
| "grad_norm": 0.18492104457145292, |
| "learning_rate": 4.123222748815166e-06, |
| "loss": 0.2384, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.782608695652174, |
| "grad_norm": 0.18012702780394255, |
| "learning_rate": 4.075829383886256e-06, |
| "loss": 0.2307, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.785166240409207, |
| "grad_norm": 0.17320341741197393, |
| "learning_rate": 4.0284360189573465e-06, |
| "loss": 0.2383, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.78772378516624, |
| "grad_norm": 0.17354330549732871, |
| "learning_rate": 3.981042654028436e-06, |
| "loss": 0.2284, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.790281329923274, |
| "grad_norm": 0.1817183194818464, |
| "learning_rate": 3.933649289099526e-06, |
| "loss": 0.2325, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.792838874680307, |
| "grad_norm": 0.1740902128176595, |
| "learning_rate": 3.886255924170616e-06, |
| "loss": 0.2464, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.79539641943734, |
| "grad_norm": 0.17979740364634914, |
| "learning_rate": 3.838862559241707e-06, |
| "loss": 0.2448, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.7979539641943734, |
| "grad_norm": 0.18910478213308557, |
| "learning_rate": 3.791469194312797e-06, |
| "loss": 0.2529, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.8005115089514065, |
| "grad_norm": 0.17562387593048473, |
| "learning_rate": 3.744075829383886e-06, |
| "loss": 0.2516, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.80306905370844, |
| "grad_norm": 0.17037649183598133, |
| "learning_rate": 3.6966824644549766e-06, |
| "loss": 0.2304, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.805626598465473, |
| "grad_norm": 0.1865715453669857, |
| "learning_rate": 3.6492890995260666e-06, |
| "loss": 0.2484, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.8081841432225065, |
| "grad_norm": 0.17956564469501413, |
| "learning_rate": 3.6018957345971563e-06, |
| "loss": 0.2429, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.8107416879795397, |
| "grad_norm": 0.17380201982016105, |
| "learning_rate": 3.5545023696682464e-06, |
| "loss": 0.2475, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.813299232736573, |
| "grad_norm": 0.18949661964378972, |
| "learning_rate": 3.507109004739337e-06, |
| "loss": 0.254, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.815856777493606, |
| "grad_norm": 0.18281900420620492, |
| "learning_rate": 3.459715639810427e-06, |
| "loss": 0.246, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.8184143222506393, |
| "grad_norm": 0.19046092151157248, |
| "learning_rate": 3.4123222748815165e-06, |
| "loss": 0.252, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.820971867007673, |
| "grad_norm": 0.17912805262085352, |
| "learning_rate": 3.364928909952607e-06, |
| "loss": 0.2528, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.8235294117647056, |
| "grad_norm": 0.16539721286530049, |
| "learning_rate": 3.317535545023697e-06, |
| "loss": 0.2452, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.8260869565217392, |
| "grad_norm": 0.18089995003561432, |
| "learning_rate": 3.2701421800947867e-06, |
| "loss": 0.2442, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.8286445012787724, |
| "grad_norm": 0.17961058615086165, |
| "learning_rate": 3.222748815165877e-06, |
| "loss": 0.2532, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.8312020460358056, |
| "grad_norm": 0.17670809278729904, |
| "learning_rate": 3.1753554502369673e-06, |
| "loss": 0.2469, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.833759590792839, |
| "grad_norm": 0.17184672240491808, |
| "learning_rate": 3.1279620853080565e-06, |
| "loss": 0.253, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.836317135549872, |
| "grad_norm": 0.18342580492222546, |
| "learning_rate": 3.080568720379147e-06, |
| "loss": 0.2447, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.8388746803069056, |
| "grad_norm": 0.17195526482252144, |
| "learning_rate": 3.033175355450237e-06, |
| "loss": 0.2463, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.8414322250639388, |
| "grad_norm": 0.1792058703015505, |
| "learning_rate": 2.985781990521327e-06, |
| "loss": 0.2498, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.843989769820972, |
| "grad_norm": 0.17565132753951782, |
| "learning_rate": 2.938388625592417e-06, |
| "loss": 0.2553, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.846547314578005, |
| "grad_norm": 0.18056116078607748, |
| "learning_rate": 2.8909952606635073e-06, |
| "loss": 0.254, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.8491048593350383, |
| "grad_norm": 0.17874160432603925, |
| "learning_rate": 2.8436018957345973e-06, |
| "loss": 0.246, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.8516624040920715, |
| "grad_norm": 0.17126107844733118, |
| "learning_rate": 2.7962085308056874e-06, |
| "loss": 0.2437, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.8542199488491047, |
| "grad_norm": 0.16804735225501954, |
| "learning_rate": 2.7488151658767775e-06, |
| "loss": 0.2338, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.8567774936061383, |
| "grad_norm": 0.17871874445538027, |
| "learning_rate": 2.7014218009478675e-06, |
| "loss": 0.2504, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.8593350383631715, |
| "grad_norm": 0.16605891064626507, |
| "learning_rate": 2.6540284360189576e-06, |
| "loss": 0.2431, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.8618925831202047, |
| "grad_norm": 0.1803054733026333, |
| "learning_rate": 2.6066350710900472e-06, |
| "loss": 0.2508, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.864450127877238, |
| "grad_norm": 0.17422585403639088, |
| "learning_rate": 2.5592417061611377e-06, |
| "loss": 0.2462, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.867007672634271, |
| "grad_norm": 0.17364151884560752, |
| "learning_rate": 2.5118483412322274e-06, |
| "loss": 0.2583, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.869565217391304, |
| "grad_norm": 0.1746615119917103, |
| "learning_rate": 2.464454976303318e-06, |
| "loss": 0.2421, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.8721227621483374, |
| "grad_norm": 0.17498173832710498, |
| "learning_rate": 2.4170616113744075e-06, |
| "loss": 0.2437, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.874680306905371, |
| "grad_norm": 0.16581915880183135, |
| "learning_rate": 2.3696682464454976e-06, |
| "loss": 0.2398, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.877237851662404, |
| "grad_norm": 0.16997023135551514, |
| "learning_rate": 2.3222748815165876e-06, |
| "loss": 0.2467, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.8797953964194374, |
| "grad_norm": 0.1691406192934443, |
| "learning_rate": 2.2748815165876777e-06, |
| "loss": 0.238, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.8823529411764706, |
| "grad_norm": 0.178904797391566, |
| "learning_rate": 2.227488151658768e-06, |
| "loss": 0.2664, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.8849104859335037, |
| "grad_norm": 0.17160493563940152, |
| "learning_rate": 2.180094786729858e-06, |
| "loss": 0.2405, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.887468030690537, |
| "grad_norm": 0.17542566078202718, |
| "learning_rate": 2.1327014218009483e-06, |
| "loss": 0.2514, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.89002557544757, |
| "grad_norm": 0.18375078276401854, |
| "learning_rate": 2.085308056872038e-06, |
| "loss": 0.2558, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.8925831202046037, |
| "grad_norm": 0.1746613382211159, |
| "learning_rate": 2.037914691943128e-06, |
| "loss": 0.2498, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.895140664961637, |
| "grad_norm": 0.17688085965152694, |
| "learning_rate": 1.990521327014218e-06, |
| "loss": 0.2439, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.89769820971867, |
| "grad_norm": 0.17262679123443198, |
| "learning_rate": 1.943127962085308e-06, |
| "loss": 0.232, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.9002557544757033, |
| "grad_norm": 0.16308493274857086, |
| "learning_rate": 1.8957345971563984e-06, |
| "loss": 0.2372, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.9028132992327365, |
| "grad_norm": 0.17307065518752035, |
| "learning_rate": 1.8483412322274883e-06, |
| "loss": 0.2496, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.90537084398977, |
| "grad_norm": 0.17570971869354174, |
| "learning_rate": 1.8009478672985781e-06, |
| "loss": 0.255, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.907928388746803, |
| "grad_norm": 0.17365101323268625, |
| "learning_rate": 1.7535545023696684e-06, |
| "loss": 0.2463, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.9104859335038364, |
| "grad_norm": 0.17347423801664208, |
| "learning_rate": 1.7061611374407583e-06, |
| "loss": 0.2596, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.9130434782608696, |
| "grad_norm": 0.16784563506361547, |
| "learning_rate": 1.6587677725118486e-06, |
| "loss": 0.2389, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.915601023017903, |
| "grad_norm": 0.1815931456665935, |
| "learning_rate": 1.6113744075829384e-06, |
| "loss": 0.2601, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.918158567774936, |
| "grad_norm": 0.179248730479389, |
| "learning_rate": 1.5639810426540283e-06, |
| "loss": 0.2613, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.920716112531969, |
| "grad_norm": 0.16773919256420614, |
| "learning_rate": 1.5165876777251185e-06, |
| "loss": 0.2439, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.923273657289003, |
| "grad_norm": 0.17107783453591816, |
| "learning_rate": 1.4691943127962086e-06, |
| "loss": 0.2457, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.9258312020460355, |
| "grad_norm": 0.17079854678851109, |
| "learning_rate": 1.4218009478672987e-06, |
| "loss": 0.2435, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.928388746803069, |
| "grad_norm": 0.16349978016152963, |
| "learning_rate": 1.3744075829383887e-06, |
| "loss": 0.2304, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.9309462915601023, |
| "grad_norm": 0.1673539709565629, |
| "learning_rate": 1.3270142180094788e-06, |
| "loss": 0.2436, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.9335038363171355, |
| "grad_norm": 0.1708287520831406, |
| "learning_rate": 1.2796208530805689e-06, |
| "loss": 0.2622, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.9360613810741687, |
| "grad_norm": 0.17066789815162614, |
| "learning_rate": 1.232227488151659e-06, |
| "loss": 0.2493, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.938618925831202, |
| "grad_norm": 0.1716388233712423, |
| "learning_rate": 1.1848341232227488e-06, |
| "loss": 0.2501, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 0.18539699654347663, |
| "learning_rate": 1.1374407582938388e-06, |
| "loss": 0.2544, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.9437340153452687, |
| "grad_norm": 0.1685342419724911, |
| "learning_rate": 1.090047393364929e-06, |
| "loss": 0.2563, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.946291560102302, |
| "grad_norm": 0.17685764883456795, |
| "learning_rate": 1.042654028436019e-06, |
| "loss": 0.2484, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.948849104859335, |
| "grad_norm": 0.17233103902620187, |
| "learning_rate": 9.95260663507109e-07, |
| "loss": 0.2471, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.9514066496163682, |
| "grad_norm": 0.17927170332506637, |
| "learning_rate": 9.478672985781992e-07, |
| "loss": 0.2463, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.9539641943734014, |
| "grad_norm": 0.16939187906182812, |
| "learning_rate": 9.004739336492891e-07, |
| "loss": 0.2449, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.9565217391304346, |
| "grad_norm": 0.16994779580235087, |
| "learning_rate": 8.530805687203791e-07, |
| "loss": 0.2449, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.959079283887468, |
| "grad_norm": 0.1699706601774477, |
| "learning_rate": 8.056872037914692e-07, |
| "loss": 0.2481, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.9616368286445014, |
| "grad_norm": 0.16870847051946605, |
| "learning_rate": 7.582938388625593e-07, |
| "loss": 0.246, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.9641943734015346, |
| "grad_norm": 0.1698101486377668, |
| "learning_rate": 7.109004739336493e-07, |
| "loss": 0.2554, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.9667519181585678, |
| "grad_norm": 0.16651213070393764, |
| "learning_rate": 6.635071090047394e-07, |
| "loss": 0.2358, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.969309462915601, |
| "grad_norm": 0.17237546004566973, |
| "learning_rate": 6.161137440758295e-07, |
| "loss": 0.2593, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.971867007672634, |
| "grad_norm": 0.1669612454811503, |
| "learning_rate": 5.687203791469194e-07, |
| "loss": 0.2422, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.9744245524296673, |
| "grad_norm": 0.16627677687780293, |
| "learning_rate": 5.213270142180095e-07, |
| "loss": 0.2524, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.976982097186701, |
| "grad_norm": 0.17381593936793757, |
| "learning_rate": 4.739336492890996e-07, |
| "loss": 0.2605, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.979539641943734, |
| "grad_norm": 0.1685052599832634, |
| "learning_rate": 4.2654028436018957e-07, |
| "loss": 0.2436, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.9820971867007673, |
| "grad_norm": 0.16629494329700928, |
| "learning_rate": 3.7914691943127963e-07, |
| "loss": 0.2509, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.9846547314578005, |
| "grad_norm": 0.17193426032210676, |
| "learning_rate": 3.317535545023697e-07, |
| "loss": 0.2525, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.9872122762148337, |
| "grad_norm": 0.1691249872952514, |
| "learning_rate": 2.843601895734597e-07, |
| "loss": 0.2471, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.9897698209718673, |
| "grad_norm": 0.16940746272899151, |
| "learning_rate": 2.369668246445498e-07, |
| "loss": 0.2421, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.9923273657289, |
| "grad_norm": 0.16950720483754556, |
| "learning_rate": 1.8957345971563982e-07, |
| "loss": 0.252, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.9948849104859336, |
| "grad_norm": 0.16465075098818885, |
| "learning_rate": 1.4218009478672986e-07, |
| "loss": 0.246, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.997442455242967, |
| "grad_norm": 0.1658083222308387, |
| "learning_rate": 9.478672985781991e-08, |
| "loss": 0.2591, |
| "step": 1172 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.17583311315224384, |
| "learning_rate": 4.7393364928909954e-08, |
| "loss": 0.2248, |
| "step": 1173 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1173, |
| "total_flos": 1.3044690334083187e+19, |
| "train_loss": 0.4372467596944539, |
| "train_runtime": 36845.5005, |
| "train_samples_per_second": 0.509, |
| "train_steps_per_second": 0.032 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1173, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3044690334083187e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|