| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1172, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008532423208191126, |
| "grad_norm": 476.3564758300781, |
| "learning_rate": 0.0, |
| "loss": 9.6172, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0017064846416382253, |
| "grad_norm": 473.1830139160156, |
| "learning_rate": 1.6949152542372883e-07, |
| "loss": 9.1602, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.002559726962457338, |
| "grad_norm": 576.52734375, |
| "learning_rate": 3.3898305084745766e-07, |
| "loss": 10.8633, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0034129692832764505, |
| "grad_norm": 394.45654296875, |
| "learning_rate": 5.084745762711865e-07, |
| "loss": 7.9258, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.004266211604095563, |
| "grad_norm": 435.7214660644531, |
| "learning_rate": 6.779661016949153e-07, |
| "loss": 10.0195, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.005119453924914676, |
| "grad_norm": 417.97314453125, |
| "learning_rate": 8.474576271186441e-07, |
| "loss": 9.6953, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.005972696245733789, |
| "grad_norm": 379.0962219238281, |
| "learning_rate": 1.016949152542373e-06, |
| "loss": 9.4453, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.006825938566552901, |
| "grad_norm": 333.2943420410156, |
| "learning_rate": 1.186440677966102e-06, |
| "loss": 7.7344, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.007679180887372013, |
| "grad_norm": 312.8900451660156, |
| "learning_rate": 1.3559322033898307e-06, |
| "loss": 7.8281, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.008532423208191127, |
| "grad_norm": 365.3479919433594, |
| "learning_rate": 1.5254237288135596e-06, |
| "loss": 9.1172, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00938566552901024, |
| "grad_norm": 267.8406982421875, |
| "learning_rate": 1.6949152542372882e-06, |
| "loss": 8.2539, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.010238907849829351, |
| "grad_norm": 393.07293701171875, |
| "learning_rate": 1.8644067796610171e-06, |
| "loss": 9.0859, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.011092150170648464, |
| "grad_norm": 585.0484008789062, |
| "learning_rate": 2.033898305084746e-06, |
| "loss": 9.7812, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.011945392491467578, |
| "grad_norm": 658.748779296875, |
| "learning_rate": 2.203389830508475e-06, |
| "loss": 9.1484, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.012798634812286689, |
| "grad_norm": 607.95068359375, |
| "learning_rate": 2.372881355932204e-06, |
| "loss": 9.6484, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.013651877133105802, |
| "grad_norm": 722.521728515625, |
| "learning_rate": 2.5423728813559323e-06, |
| "loss": 10.4219, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.014505119453924915, |
| "grad_norm": 504.6495666503906, |
| "learning_rate": 2.7118644067796613e-06, |
| "loss": 7.9531, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.015358361774744027, |
| "grad_norm": 480.6727294921875, |
| "learning_rate": 2.8813559322033903e-06, |
| "loss": 7.3555, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.016211604095563138, |
| "grad_norm": 289.67529296875, |
| "learning_rate": 3.0508474576271192e-06, |
| "loss": 6.4062, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.017064846416382253, |
| "grad_norm": 143.08592224121094, |
| "learning_rate": 3.2203389830508473e-06, |
| "loss": 5.4492, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.017918088737201365, |
| "grad_norm": 145.436279296875, |
| "learning_rate": 3.3898305084745763e-06, |
| "loss": 5.2383, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01877133105802048, |
| "grad_norm": 363.3115539550781, |
| "learning_rate": 3.5593220338983053e-06, |
| "loss": 6.668, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01962457337883959, |
| "grad_norm": 311.4585266113281, |
| "learning_rate": 3.7288135593220342e-06, |
| "loss": 6.0078, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.020477815699658702, |
| "grad_norm": 132.5367431640625, |
| "learning_rate": 3.898305084745763e-06, |
| "loss": 4.8789, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.021331058020477817, |
| "grad_norm": 59.49905776977539, |
| "learning_rate": 4.067796610169492e-06, |
| "loss": 4.9141, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.02218430034129693, |
| "grad_norm": 144.73834228515625, |
| "learning_rate": 4.23728813559322e-06, |
| "loss": 5.2695, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.02303754266211604, |
| "grad_norm": 72.64948272705078, |
| "learning_rate": 4.40677966101695e-06, |
| "loss": 5.3594, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.023890784982935155, |
| "grad_norm": 67.38402557373047, |
| "learning_rate": 4.576271186440678e-06, |
| "loss": 4.5137, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.024744027303754267, |
| "grad_norm": 70.24736785888672, |
| "learning_rate": 4.745762711864408e-06, |
| "loss": 4.7305, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.025597269624573378, |
| "grad_norm": 37.224700927734375, |
| "learning_rate": 4.915254237288136e-06, |
| "loss": 4.5859, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.026450511945392493, |
| "grad_norm": 52.372982025146484, |
| "learning_rate": 5.084745762711865e-06, |
| "loss": 4.6504, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.027303754266211604, |
| "grad_norm": 58.09052658081055, |
| "learning_rate": 5.254237288135594e-06, |
| "loss": 5.0195, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.028156996587030716, |
| "grad_norm": 273.7731018066406, |
| "learning_rate": 5.423728813559323e-06, |
| "loss": 4.877, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.02901023890784983, |
| "grad_norm": 257.3023986816406, |
| "learning_rate": 5.593220338983051e-06, |
| "loss": 5.1367, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.029863481228668942, |
| "grad_norm": 37.15542984008789, |
| "learning_rate": 5.7627118644067805e-06, |
| "loss": 4.498, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.030716723549488054, |
| "grad_norm": 57.637229919433594, |
| "learning_rate": 5.932203389830509e-06, |
| "loss": 4.498, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.031569965870307165, |
| "grad_norm": 161.52142333984375, |
| "learning_rate": 6.1016949152542385e-06, |
| "loss": 4.8535, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.032423208191126277, |
| "grad_norm": 144.53648376464844, |
| "learning_rate": 6.271186440677966e-06, |
| "loss": 5.0547, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.033276450511945395, |
| "grad_norm": 97.21565246582031, |
| "learning_rate": 6.440677966101695e-06, |
| "loss": 4.4238, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.034129692832764506, |
| "grad_norm": 189.5275115966797, |
| "learning_rate": 6.610169491525424e-06, |
| "loss": 4.6973, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03498293515358362, |
| "grad_norm": 52.70186233520508, |
| "learning_rate": 6.779661016949153e-06, |
| "loss": 4.2754, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.03583617747440273, |
| "grad_norm": 204.4432830810547, |
| "learning_rate": 6.949152542372882e-06, |
| "loss": 5.0195, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.03668941979522184, |
| "grad_norm": 179.79095458984375, |
| "learning_rate": 7.1186440677966106e-06, |
| "loss": 5.0684, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.03754266211604096, |
| "grad_norm": 119.27928924560547, |
| "learning_rate": 7.288135593220339e-06, |
| "loss": 4.6699, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.03839590443686007, |
| "grad_norm": 83.18463897705078, |
| "learning_rate": 7.4576271186440685e-06, |
| "loss": 4.0254, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.03924914675767918, |
| "grad_norm": 121.39764404296875, |
| "learning_rate": 7.627118644067797e-06, |
| "loss": 4.6602, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.04010238907849829, |
| "grad_norm": 32.138038635253906, |
| "learning_rate": 7.796610169491526e-06, |
| "loss": 4.5078, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.040955631399317405, |
| "grad_norm": 105.33744812011719, |
| "learning_rate": 7.966101694915255e-06, |
| "loss": 4.1465, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.041808873720136516, |
| "grad_norm": 163.0584259033203, |
| "learning_rate": 8.135593220338983e-06, |
| "loss": 4.7871, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.042662116040955635, |
| "grad_norm": 90.8143081665039, |
| "learning_rate": 8.305084745762712e-06, |
| "loss": 4.3184, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.043515358361774746, |
| "grad_norm": 130.4494171142578, |
| "learning_rate": 8.47457627118644e-06, |
| "loss": 4.6992, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.04436860068259386, |
| "grad_norm": 120.24535369873047, |
| "learning_rate": 8.64406779661017e-06, |
| "loss": 4.6289, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.04522184300341297, |
| "grad_norm": 129.9118194580078, |
| "learning_rate": 8.8135593220339e-06, |
| "loss": 4.2207, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.04607508532423208, |
| "grad_norm": 96.74512481689453, |
| "learning_rate": 8.983050847457628e-06, |
| "loss": 4.1035, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.04692832764505119, |
| "grad_norm": 127.90748596191406, |
| "learning_rate": 9.152542372881356e-06, |
| "loss": 4.7188, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.04778156996587031, |
| "grad_norm": 79.55717468261719, |
| "learning_rate": 9.322033898305085e-06, |
| "loss": 3.7441, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.04863481228668942, |
| "grad_norm": 107.03738403320312, |
| "learning_rate": 9.491525423728815e-06, |
| "loss": 4.3281, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.04948805460750853, |
| "grad_norm": 137.00802612304688, |
| "learning_rate": 9.661016949152544e-06, |
| "loss": 4.5039, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.050341296928327645, |
| "grad_norm": 56.521270751953125, |
| "learning_rate": 9.830508474576272e-06, |
| "loss": 4.1992, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.051194539249146756, |
| "grad_norm": 91.46454620361328, |
| "learning_rate": 1e-05, |
| "loss": 4.2109, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.05204778156996587, |
| "grad_norm": 77.05010986328125, |
| "learning_rate": 9.991015274034143e-06, |
| "loss": 3.8574, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.052901023890784986, |
| "grad_norm": 23.841175079345703, |
| "learning_rate": 9.982030548068285e-06, |
| "loss": 3.9512, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0537542662116041, |
| "grad_norm": 84.35244750976562, |
| "learning_rate": 9.973045822102425e-06, |
| "loss": 3.9473, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.05460750853242321, |
| "grad_norm": 127.69181823730469, |
| "learning_rate": 9.96406109613657e-06, |
| "loss": 3.8828, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.05546075085324232, |
| "grad_norm": 27.332721710205078, |
| "learning_rate": 9.955076370170711e-06, |
| "loss": 3.8457, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.05631399317406143, |
| "grad_norm": 127.41008758544922, |
| "learning_rate": 9.946091644204853e-06, |
| "loss": 4.3711, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.05716723549488054, |
| "grad_norm": 50.57866287231445, |
| "learning_rate": 9.937106918238994e-06, |
| "loss": 3.4824, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.05802047781569966, |
| "grad_norm": 47.33999252319336, |
| "learning_rate": 9.928122192273136e-06, |
| "loss": 3.8301, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.05887372013651877, |
| "grad_norm": 28.65631675720215, |
| "learning_rate": 9.919137466307278e-06, |
| "loss": 3.5703, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.059726962457337884, |
| "grad_norm": 100.2359619140625, |
| "learning_rate": 9.91015274034142e-06, |
| "loss": 4.5332, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.060580204778156996, |
| "grad_norm": 27.0611572265625, |
| "learning_rate": 9.901168014375562e-06, |
| "loss": 3.5859, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.06143344709897611, |
| "grad_norm": 119.60940551757812, |
| "learning_rate": 9.892183288409704e-06, |
| "loss": 4.0703, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.06228668941979522, |
| "grad_norm": 113.57787322998047, |
| "learning_rate": 9.883198562443846e-06, |
| "loss": 4.0352, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.06313993174061433, |
| "grad_norm": 22.393367767333984, |
| "learning_rate": 9.874213836477988e-06, |
| "loss": 4.0293, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.06399317406143344, |
| "grad_norm": 77.3707504272461, |
| "learning_rate": 9.86522911051213e-06, |
| "loss": 3.6855, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.06484641638225255, |
| "grad_norm": 23.650131225585938, |
| "learning_rate": 9.856244384546273e-06, |
| "loss": 3.8066, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.06569965870307168, |
| "grad_norm": 78.92176055908203, |
| "learning_rate": 9.847259658580413e-06, |
| "loss": 4.0781, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.06655290102389079, |
| "grad_norm": 57.146488189697266, |
| "learning_rate": 9.838274932614557e-06, |
| "loss": 3.8672, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0674061433447099, |
| "grad_norm": 65.50660705566406, |
| "learning_rate": 9.829290206648699e-06, |
| "loss": 3.2881, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.06825938566552901, |
| "grad_norm": 71.74410247802734, |
| "learning_rate": 9.820305480682841e-06, |
| "loss": 3.8984, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06911262798634812, |
| "grad_norm": 34.78994369506836, |
| "learning_rate": 9.811320754716981e-06, |
| "loss": 3.8711, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.06996587030716724, |
| "grad_norm": 115.17135620117188, |
| "learning_rate": 9.802336028751123e-06, |
| "loss": 4.6289, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.07081911262798635, |
| "grad_norm": 74.1488037109375, |
| "learning_rate": 9.793351302785265e-06, |
| "loss": 4.1133, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.07167235494880546, |
| "grad_norm": 60.60784149169922, |
| "learning_rate": 9.784366576819408e-06, |
| "loss": 4.1113, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.07252559726962457, |
| "grad_norm": 138.30191040039062, |
| "learning_rate": 9.77538185085355e-06, |
| "loss": 3.7441, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.07337883959044368, |
| "grad_norm": 29.931669235229492, |
| "learning_rate": 9.766397124887692e-06, |
| "loss": 3.375, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.07423208191126279, |
| "grad_norm": 53.22774887084961, |
| "learning_rate": 9.757412398921834e-06, |
| "loss": 3.6758, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.07508532423208192, |
| "grad_norm": 38.70452880859375, |
| "learning_rate": 9.748427672955976e-06, |
| "loss": 4.002, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.07593856655290103, |
| "grad_norm": 30.55535125732422, |
| "learning_rate": 9.739442946990118e-06, |
| "loss": 4.207, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.07679180887372014, |
| "grad_norm": 27.83077049255371, |
| "learning_rate": 9.73045822102426e-06, |
| "loss": 3.4453, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07764505119453925, |
| "grad_norm": 121.7099380493164, |
| "learning_rate": 9.7214734950584e-06, |
| "loss": 3.9688, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.07849829351535836, |
| "grad_norm": 81.67149353027344, |
| "learning_rate": 9.712488769092544e-06, |
| "loss": 3.8965, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.07935153583617748, |
| "grad_norm": 39.18846893310547, |
| "learning_rate": 9.703504043126686e-06, |
| "loss": 3.8633, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.08020477815699659, |
| "grad_norm": 84.66485595703125, |
| "learning_rate": 9.694519317160828e-06, |
| "loss": 3.9961, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.0810580204778157, |
| "grad_norm": 82.30975341796875, |
| "learning_rate": 9.685534591194969e-06, |
| "loss": 3.793, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.08191126279863481, |
| "grad_norm": 88.6453628540039, |
| "learning_rate": 9.676549865229111e-06, |
| "loss": 3.8477, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.08276450511945392, |
| "grad_norm": 105.96221160888672, |
| "learning_rate": 9.667565139263253e-06, |
| "loss": 3.9941, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.08361774744027303, |
| "grad_norm": 23.890165328979492, |
| "learning_rate": 9.658580413297395e-06, |
| "loss": 3.3945, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.08447098976109214, |
| "grad_norm": 77.99059295654297, |
| "learning_rate": 9.649595687331537e-06, |
| "loss": 3.6816, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.08532423208191127, |
| "grad_norm": 68.72335052490234, |
| "learning_rate": 9.64061096136568e-06, |
| "loss": 3.8086, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08617747440273038, |
| "grad_norm": 48.387054443359375, |
| "learning_rate": 9.631626235399821e-06, |
| "loss": 3.7148, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.08703071672354949, |
| "grad_norm": 30.06928825378418, |
| "learning_rate": 9.622641509433963e-06, |
| "loss": 3.1797, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.0878839590443686, |
| "grad_norm": 103.71221923828125, |
| "learning_rate": 9.613656783468106e-06, |
| "loss": 3.4102, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.08873720136518772, |
| "grad_norm": 22.561519622802734, |
| "learning_rate": 9.604672057502246e-06, |
| "loss": 3.4941, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.08959044368600683, |
| "grad_norm": 36.27552032470703, |
| "learning_rate": 9.595687331536388e-06, |
| "loss": 4.0645, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.09044368600682594, |
| "grad_norm": 60.4101448059082, |
| "learning_rate": 9.58670260557053e-06, |
| "loss": 3.1895, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.09129692832764505, |
| "grad_norm": 31.981599807739258, |
| "learning_rate": 9.577717879604674e-06, |
| "loss": 3.123, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.09215017064846416, |
| "grad_norm": 51.39161682128906, |
| "learning_rate": 9.568733153638814e-06, |
| "loss": 3.8027, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.09300341296928327, |
| "grad_norm": 17.14482307434082, |
| "learning_rate": 9.559748427672956e-06, |
| "loss": 3.5996, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.09385665529010238, |
| "grad_norm": 79.9706802368164, |
| "learning_rate": 9.550763701707098e-06, |
| "loss": 3.2578, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0947098976109215, |
| "grad_norm": 19.872787475585938, |
| "learning_rate": 9.54177897574124e-06, |
| "loss": 3.0957, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.09556313993174062, |
| "grad_norm": 50.38517761230469, |
| "learning_rate": 9.532794249775383e-06, |
| "loss": 3.6895, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.09641638225255973, |
| "grad_norm": 50.98223876953125, |
| "learning_rate": 9.523809523809525e-06, |
| "loss": 3.7188, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.09726962457337884, |
| "grad_norm": 46.27577590942383, |
| "learning_rate": 9.514824797843667e-06, |
| "loss": 3.2383, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.09812286689419795, |
| "grad_norm": 43.620479583740234, |
| "learning_rate": 9.505840071877809e-06, |
| "loss": 3.6348, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.09897610921501707, |
| "grad_norm": 73.57115173339844, |
| "learning_rate": 9.496855345911951e-06, |
| "loss": 3.1875, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.09982935153583618, |
| "grad_norm": 29.671640396118164, |
| "learning_rate": 9.487870619946093e-06, |
| "loss": 3.4141, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.10068259385665529, |
| "grad_norm": 37.94879150390625, |
| "learning_rate": 9.478885893980234e-06, |
| "loss": 3.4414, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1015358361774744, |
| "grad_norm": 51.39364242553711, |
| "learning_rate": 9.469901168014376e-06, |
| "loss": 3.4434, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.10238907849829351, |
| "grad_norm": 46.911163330078125, |
| "learning_rate": 9.460916442048518e-06, |
| "loss": 3.4355, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.10324232081911262, |
| "grad_norm": 32.2253303527832, |
| "learning_rate": 9.451931716082661e-06, |
| "loss": 3.2402, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.10409556313993173, |
| "grad_norm": 87.56474304199219, |
| "learning_rate": 9.442946990116802e-06, |
| "loss": 3.5059, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.10494880546075085, |
| "grad_norm": 75.4452896118164, |
| "learning_rate": 9.433962264150944e-06, |
| "loss": 3.1016, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.10580204778156997, |
| "grad_norm": 21.062419891357422, |
| "learning_rate": 9.424977538185086e-06, |
| "loss": 3.5176, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.10665529010238908, |
| "grad_norm": 34.950862884521484, |
| "learning_rate": 9.415992812219228e-06, |
| "loss": 3.2168, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.1075085324232082, |
| "grad_norm": 89.45964813232422, |
| "learning_rate": 9.40700808625337e-06, |
| "loss": 3.25, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.1083617747440273, |
| "grad_norm": 58.562896728515625, |
| "learning_rate": 9.398023360287512e-06, |
| "loss": 3.7793, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.10921501706484642, |
| "grad_norm": 54.15276336669922, |
| "learning_rate": 9.389038634321654e-06, |
| "loss": 3.5, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.11006825938566553, |
| "grad_norm": 32.4635124206543, |
| "learning_rate": 9.380053908355796e-06, |
| "loss": 2.915, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.11092150170648464, |
| "grad_norm": 22.57988739013672, |
| "learning_rate": 9.371069182389939e-06, |
| "loss": 2.9502, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.11177474402730375, |
| "grad_norm": 38.44780731201172, |
| "learning_rate": 9.36208445642408e-06, |
| "loss": 2.9336, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.11262798634812286, |
| "grad_norm": 21.83592414855957, |
| "learning_rate": 9.353099730458221e-06, |
| "loss": 2.9707, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.11348122866894197, |
| "grad_norm": 24.39005470275879, |
| "learning_rate": 9.344115004492363e-06, |
| "loss": 3.2578, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.11433447098976109, |
| "grad_norm": 59.925758361816406, |
| "learning_rate": 9.335130278526505e-06, |
| "loss": 3.2988, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.11518771331058021, |
| "grad_norm": 74.08988189697266, |
| "learning_rate": 9.326145552560647e-06, |
| "loss": 2.7236, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.11604095563139932, |
| "grad_norm": 63.953453063964844, |
| "learning_rate": 9.31716082659479e-06, |
| "loss": 3.1807, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.11689419795221843, |
| "grad_norm": 36.688720703125, |
| "learning_rate": 9.308176100628931e-06, |
| "loss": 2.8018, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.11774744027303755, |
| "grad_norm": 60.307430267333984, |
| "learning_rate": 9.299191374663074e-06, |
| "loss": 3.1572, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.11860068259385666, |
| "grad_norm": 28.88834571838379, |
| "learning_rate": 9.290206648697216e-06, |
| "loss": 3.4766, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.11945392491467577, |
| "grad_norm": 40.18682861328125, |
| "learning_rate": 9.281221922731358e-06, |
| "loss": 3.8594, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.12030716723549488, |
| "grad_norm": 74.8680648803711, |
| "learning_rate": 9.272237196765498e-06, |
| "loss": 3.1719, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.12116040955631399, |
| "grad_norm": 42.63037109375, |
| "learning_rate": 9.263252470799642e-06, |
| "loss": 2.8574, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1220136518771331, |
| "grad_norm": 32.380043029785156, |
| "learning_rate": 9.254267744833784e-06, |
| "loss": 3.2363, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.12286689419795221, |
| "grad_norm": 71.21893310546875, |
| "learning_rate": 9.245283018867926e-06, |
| "loss": 3.1865, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.12372013651877133, |
| "grad_norm": 27.608762741088867, |
| "learning_rate": 9.236298292902067e-06, |
| "loss": 2.9258, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.12457337883959044, |
| "grad_norm": 26.77503776550293, |
| "learning_rate": 9.227313566936209e-06, |
| "loss": 3.0938, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.12542662116040956, |
| "grad_norm": 103.02552795410156, |
| "learning_rate": 9.21832884097035e-06, |
| "loss": 3.1328, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.12627986348122866, |
| "grad_norm": 83.52420806884766, |
| "learning_rate": 9.209344115004493e-06, |
| "loss": 2.9922, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.12713310580204779, |
| "grad_norm": 73.67784118652344, |
| "learning_rate": 9.200359389038635e-06, |
| "loss": 2.8223, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.12798634812286688, |
| "grad_norm": 23.8249454498291, |
| "learning_rate": 9.191374663072777e-06, |
| "loss": 3.3398, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.128839590443686, |
| "grad_norm": 69.4936294555664, |
| "learning_rate": 9.182389937106919e-06, |
| "loss": 3.4326, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.1296928327645051, |
| "grad_norm": 88.59197998046875, |
| "learning_rate": 9.173405211141061e-06, |
| "loss": 3.4414, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.13054607508532423, |
| "grad_norm": 29.386064529418945, |
| "learning_rate": 9.164420485175203e-06, |
| "loss": 3.0303, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.13139931740614336, |
| "grad_norm": 38.90749740600586, |
| "learning_rate": 9.155435759209345e-06, |
| "loss": 2.9619, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.13225255972696245, |
| "grad_norm": 74.41140747070312, |
| "learning_rate": 9.146451033243486e-06, |
| "loss": 3.4102, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.13310580204778158, |
| "grad_norm": 44.66842269897461, |
| "learning_rate": 9.13746630727763e-06, |
| "loss": 2.6963, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.13395904436860068, |
| "grad_norm": 33.257205963134766, |
| "learning_rate": 9.128481581311772e-06, |
| "loss": 3.1924, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.1348122866894198, |
| "grad_norm": 100.31049346923828, |
| "learning_rate": 9.119496855345914e-06, |
| "loss": 3.3809, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1356655290102389, |
| "grad_norm": 54.77112579345703, |
| "learning_rate": 9.110512129380054e-06, |
| "loss": 3.5938, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.13651877133105803, |
| "grad_norm": 18.681119918823242, |
| "learning_rate": 9.101527403414196e-06, |
| "loss": 3.4785, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.13737201365187712, |
| "grad_norm": 62.4477424621582, |
| "learning_rate": 9.092542677448338e-06, |
| "loss": 3.2266, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.13822525597269625, |
| "grad_norm": 97.80989074707031, |
| "learning_rate": 9.08355795148248e-06, |
| "loss": 3.0898, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.13907849829351535, |
| "grad_norm": 91.97032928466797, |
| "learning_rate": 9.074573225516622e-06, |
| "loss": 3.6445, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.13993174061433447, |
| "grad_norm": 31.477741241455078, |
| "learning_rate": 9.065588499550765e-06, |
| "loss": 3.3359, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.1407849829351536, |
| "grad_norm": 21.278085708618164, |
| "learning_rate": 9.056603773584907e-06, |
| "loss": 3.3047, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.1416382252559727, |
| "grad_norm": 52.15373229980469, |
| "learning_rate": 9.047619047619049e-06, |
| "loss": 3.5176, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.14249146757679182, |
| "grad_norm": 14.358907699584961, |
| "learning_rate": 9.03863432165319e-06, |
| "loss": 2.8945, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.14334470989761092, |
| "grad_norm": 33.82578659057617, |
| "learning_rate": 9.029649595687333e-06, |
| "loss": 3.3242, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.14419795221843004, |
| "grad_norm": 72.78608703613281, |
| "learning_rate": 9.020664869721473e-06, |
| "loss": 3.416, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.14505119453924914, |
| "grad_norm": 97.01647186279297, |
| "learning_rate": 9.011680143755617e-06, |
| "loss": 3.25, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.14590443686006827, |
| "grad_norm": 54.42570114135742, |
| "learning_rate": 9.002695417789759e-06, |
| "loss": 3.1309, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.14675767918088736, |
| "grad_norm": 26.412174224853516, |
| "learning_rate": 8.9937106918239e-06, |
| "loss": 3.4609, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.1476109215017065, |
| "grad_norm": 71.91547393798828, |
| "learning_rate": 8.984725965858042e-06, |
| "loss": 3.1289, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.14846416382252559, |
| "grad_norm": 66.65043640136719, |
| "learning_rate": 8.975741239892184e-06, |
| "loss": 3.2383, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.1493174061433447, |
| "grad_norm": 164.26414489746094, |
| "learning_rate": 8.966756513926326e-06, |
| "loss": 4.3633, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.15017064846416384, |
| "grad_norm": 73.09919738769531, |
| "learning_rate": 8.957771787960468e-06, |
| "loss": 3.2148, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.15102389078498293, |
| "grad_norm": 40.517093658447266, |
| "learning_rate": 8.94878706199461e-06, |
| "loss": 2.9707, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.15187713310580206, |
| "grad_norm": 76.36444854736328, |
| "learning_rate": 8.939802336028752e-06, |
| "loss": 2.9424, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.15273037542662116, |
| "grad_norm": 119.0013198852539, |
| "learning_rate": 8.930817610062894e-06, |
| "loss": 3.1953, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.15358361774744028, |
| "grad_norm": 103.3395004272461, |
| "learning_rate": 8.921832884097036e-06, |
| "loss": 3.543, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.15443686006825938, |
| "grad_norm": 106.20706176757812, |
| "learning_rate": 8.912848158131178e-06, |
| "loss": 3.7734, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.1552901023890785, |
| "grad_norm": 53.621829986572266, |
| "learning_rate": 8.903863432165319e-06, |
| "loss": 3.1689, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.1561433447098976, |
| "grad_norm": 47.70130920410156, |
| "learning_rate": 8.89487870619946e-06, |
| "loss": 3.1543, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.15699658703071673, |
| "grad_norm": 38.16180419921875, |
| "learning_rate": 8.885893980233603e-06, |
| "loss": 2.6494, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.15784982935153583, |
| "grad_norm": 89.42051696777344, |
| "learning_rate": 8.876909254267747e-06, |
| "loss": 3.5508, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.15870307167235495, |
| "grad_norm": 60.47245407104492, |
| "learning_rate": 8.867924528301887e-06, |
| "loss": 3.6777, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.15955631399317405, |
| "grad_norm": 24.777610778808594, |
| "learning_rate": 8.85893980233603e-06, |
| "loss": 2.3027, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.16040955631399317, |
| "grad_norm": 28.14823341369629, |
| "learning_rate": 8.849955076370171e-06, |
| "loss": 3.0215, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.1612627986348123, |
| "grad_norm": 33.189239501953125, |
| "learning_rate": 8.840970350404313e-06, |
| "loss": 3.2891, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.1621160409556314, |
| "grad_norm": 25.520509719848633, |
| "learning_rate": 8.831985624438455e-06, |
| "loss": 2.8379, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.16296928327645052, |
| "grad_norm": 55.70583724975586, |
| "learning_rate": 8.823000898472598e-06, |
| "loss": 3.25, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.16382252559726962, |
| "grad_norm": 26.865032196044922, |
| "learning_rate": 8.81401617250674e-06, |
| "loss": 3.1055, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.16467576791808874, |
| "grad_norm": 22.27757453918457, |
| "learning_rate": 8.805031446540882e-06, |
| "loss": 2.5625, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.16552901023890784, |
| "grad_norm": 22.520416259765625, |
| "learning_rate": 8.796046720575024e-06, |
| "loss": 2.8887, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.16638225255972697, |
| "grad_norm": 18.727357864379883, |
| "learning_rate": 8.787061994609166e-06, |
| "loss": 2.6807, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.16723549488054607, |
| "grad_norm": 19.526918411254883, |
| "learning_rate": 8.778077268643306e-06, |
| "loss": 2.9512, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.1680887372013652, |
| "grad_norm": 25.042152404785156, |
| "learning_rate": 8.769092542677448e-06, |
| "loss": 3.168, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.1689419795221843, |
| "grad_norm": 21.94442367553711, |
| "learning_rate": 8.76010781671159e-06, |
| "loss": 3.3008, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.1697952218430034, |
| "grad_norm": 18.068660736083984, |
| "learning_rate": 8.751123090745734e-06, |
| "loss": 2.999, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.17064846416382254, |
| "grad_norm": 54.0893669128418, |
| "learning_rate": 8.742138364779875e-06, |
| "loss": 2.8115, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.17150170648464164, |
| "grad_norm": 87.9207992553711, |
| "learning_rate": 8.733153638814017e-06, |
| "loss": 3.6152, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.17235494880546076, |
| "grad_norm": 33.18696975708008, |
| "learning_rate": 8.724168912848159e-06, |
| "loss": 3.0527, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.17320819112627986, |
| "grad_norm": 36.34266662597656, |
| "learning_rate": 8.715184186882301e-06, |
| "loss": 3.2754, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.17406143344709898, |
| "grad_norm": 55.32210922241211, |
| "learning_rate": 8.706199460916443e-06, |
| "loss": 3.9746, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.17491467576791808, |
| "grad_norm": 27.2241268157959, |
| "learning_rate": 8.697214734950583e-06, |
| "loss": 3.4297, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.1757679180887372, |
| "grad_norm": 19.6944580078125, |
| "learning_rate": 8.688230008984727e-06, |
| "loss": 3.2139, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1766211604095563, |
| "grad_norm": 14.382315635681152, |
| "learning_rate": 8.67924528301887e-06, |
| "loss": 2.7715, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.17747440273037543, |
| "grad_norm": 20.982158660888672, |
| "learning_rate": 8.670260557053011e-06, |
| "loss": 3.3418, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.17832764505119453, |
| "grad_norm": 23.547433853149414, |
| "learning_rate": 8.661275831087152e-06, |
| "loss": 2.9199, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.17918088737201365, |
| "grad_norm": 43.464237213134766, |
| "learning_rate": 8.652291105121294e-06, |
| "loss": 3.0234, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.18003412969283278, |
| "grad_norm": 41.555118560791016, |
| "learning_rate": 8.643306379155436e-06, |
| "loss": 3.0107, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.18088737201365188, |
| "grad_norm": 42.51097869873047, |
| "learning_rate": 8.634321653189578e-06, |
| "loss": 3.1074, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.181740614334471, |
| "grad_norm": 35.8163948059082, |
| "learning_rate": 8.62533692722372e-06, |
| "loss": 3.0488, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.1825938566552901, |
| "grad_norm": 48.06075668334961, |
| "learning_rate": 8.616352201257862e-06, |
| "loss": 2.8594, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.18344709897610922, |
| "grad_norm": 31.809709548950195, |
| "learning_rate": 8.607367475292004e-06, |
| "loss": 3.0605, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.18430034129692832, |
| "grad_norm": 33.539005279541016, |
| "learning_rate": 8.598382749326146e-06, |
| "loss": 2.8164, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.18515358361774745, |
| "grad_norm": 30.759517669677734, |
| "learning_rate": 8.589398023360288e-06, |
| "loss": 3.4043, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.18600682593856654, |
| "grad_norm": 46.54279327392578, |
| "learning_rate": 8.58041329739443e-06, |
| "loss": 3.3164, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.18686006825938567, |
| "grad_norm": 17.588998794555664, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 2.8232, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.18771331058020477, |
| "grad_norm": 40.2357063293457, |
| "learning_rate": 8.562443845462715e-06, |
| "loss": 3.6035, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1885665529010239, |
| "grad_norm": 30.986467361450195, |
| "learning_rate": 8.553459119496857e-06, |
| "loss": 3.2402, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.189419795221843, |
| "grad_norm": 71.314453125, |
| "learning_rate": 8.544474393530999e-06, |
| "loss": 2.8965, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.19027303754266212, |
| "grad_norm": 60.04804611206055, |
| "learning_rate": 8.53548966756514e-06, |
| "loss": 3.2324, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.19112627986348124, |
| "grad_norm": 25.102706909179688, |
| "learning_rate": 8.526504941599281e-06, |
| "loss": 3.1973, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.19197952218430034, |
| "grad_norm": 58.8226203918457, |
| "learning_rate": 8.517520215633423e-06, |
| "loss": 3.291, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.19283276450511946, |
| "grad_norm": 80.93440246582031, |
| "learning_rate": 8.508535489667566e-06, |
| "loss": 4.0488, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.19368600682593856, |
| "grad_norm": 15.695361137390137, |
| "learning_rate": 8.499550763701708e-06, |
| "loss": 2.875, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1945392491467577, |
| "grad_norm": 18.42605209350586, |
| "learning_rate": 8.49056603773585e-06, |
| "loss": 3.0059, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.19539249146757678, |
| "grad_norm": 20.952381134033203, |
| "learning_rate": 8.481581311769992e-06, |
| "loss": 3.6758, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.1962457337883959, |
| "grad_norm": 33.53485107421875, |
| "learning_rate": 8.472596585804134e-06, |
| "loss": 2.6758, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.197098976109215, |
| "grad_norm": 18.76603889465332, |
| "learning_rate": 8.463611859838276e-06, |
| "loss": 3.0684, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.19795221843003413, |
| "grad_norm": 61.98395538330078, |
| "learning_rate": 8.454627133872418e-06, |
| "loss": 3.3242, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.19880546075085323, |
| "grad_norm": 24.302837371826172, |
| "learning_rate": 8.445642407906558e-06, |
| "loss": 3.5234, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.19965870307167236, |
| "grad_norm": 58.31713104248047, |
| "learning_rate": 8.436657681940702e-06, |
| "loss": 3.7285, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.20051194539249148, |
| "grad_norm": 36.07301712036133, |
| "learning_rate": 8.427672955974844e-06, |
| "loss": 3.2148, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.20136518771331058, |
| "grad_norm": 20.333580017089844, |
| "learning_rate": 8.418688230008986e-06, |
| "loss": 3.2344, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.2022184300341297, |
| "grad_norm": 19.519014358520508, |
| "learning_rate": 8.409703504043127e-06, |
| "loss": 2.7363, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.2030716723549488, |
| "grad_norm": 17.365482330322266, |
| "learning_rate": 8.400718778077269e-06, |
| "loss": 3.0752, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.20392491467576793, |
| "grad_norm": 14.840271949768066, |
| "learning_rate": 8.391734052111411e-06, |
| "loss": 2.9824, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.20477815699658702, |
| "grad_norm": 19.23467254638672, |
| "learning_rate": 8.382749326145553e-06, |
| "loss": 2.9961, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.20563139931740615, |
| "grad_norm": 68.42906188964844, |
| "learning_rate": 8.373764600179695e-06, |
| "loss": 3.4277, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.20648464163822525, |
| "grad_norm": 26.17658042907715, |
| "learning_rate": 8.364779874213837e-06, |
| "loss": 2.6973, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.20733788395904437, |
| "grad_norm": 18.755210876464844, |
| "learning_rate": 8.35579514824798e-06, |
| "loss": 3.1953, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.20819112627986347, |
| "grad_norm": 17.498382568359375, |
| "learning_rate": 8.346810422282121e-06, |
| "loss": 3.0918, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.2090443686006826, |
| "grad_norm": 44.80198669433594, |
| "learning_rate": 8.337825696316264e-06, |
| "loss": 2.4717, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.2098976109215017, |
| "grad_norm": 45.40264892578125, |
| "learning_rate": 8.328840970350404e-06, |
| "loss": 2.9092, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.21075085324232082, |
| "grad_norm": 28.791826248168945, |
| "learning_rate": 8.319856244384546e-06, |
| "loss": 2.9834, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.21160409556313994, |
| "grad_norm": 26.131162643432617, |
| "learning_rate": 8.31087151841869e-06, |
| "loss": 2.9482, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.21245733788395904, |
| "grad_norm": 45.71311950683594, |
| "learning_rate": 8.301886792452832e-06, |
| "loss": 3.1572, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.21331058020477817, |
| "grad_norm": 27.159473419189453, |
| "learning_rate": 8.292902066486972e-06, |
| "loss": 2.8818, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.21416382252559726, |
| "grad_norm": 24.58170509338379, |
| "learning_rate": 8.283917340521114e-06, |
| "loss": 3.2188, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2150170648464164, |
| "grad_norm": 25.872392654418945, |
| "learning_rate": 8.274932614555256e-06, |
| "loss": 3.2979, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2158703071672355, |
| "grad_norm": 23.962533950805664, |
| "learning_rate": 8.265947888589399e-06, |
| "loss": 3.4648, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.2167235494880546, |
| "grad_norm": 17.57655143737793, |
| "learning_rate": 8.25696316262354e-06, |
| "loss": 2.7656, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.2175767918088737, |
| "grad_norm": 57.268821716308594, |
| "learning_rate": 8.247978436657683e-06, |
| "loss": 3.459, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.21843003412969283, |
| "grad_norm": 24.482690811157227, |
| "learning_rate": 8.238993710691825e-06, |
| "loss": 3.584, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.21928327645051193, |
| "grad_norm": 25.513710021972656, |
| "learning_rate": 8.230008984725967e-06, |
| "loss": 3.4512, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.22013651877133106, |
| "grad_norm": 55.66819381713867, |
| "learning_rate": 8.221024258760109e-06, |
| "loss": 3.0996, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.22098976109215018, |
| "grad_norm": 50.33326721191406, |
| "learning_rate": 8.212039532794251e-06, |
| "loss": 3.498, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.22184300341296928, |
| "grad_norm": 30.594228744506836, |
| "learning_rate": 8.203054806828391e-06, |
| "loss": 3.1953, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2226962457337884, |
| "grad_norm": 20.082685470581055, |
| "learning_rate": 8.194070080862534e-06, |
| "loss": 3.2734, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.2235494880546075, |
| "grad_norm": 23.434057235717773, |
| "learning_rate": 8.185085354896676e-06, |
| "loss": 3.0488, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.22440273037542663, |
| "grad_norm": 75.93733215332031, |
| "learning_rate": 8.17610062893082e-06, |
| "loss": 3.4014, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.22525597269624573, |
| "grad_norm": 34.896339416503906, |
| "learning_rate": 8.16711590296496e-06, |
| "loss": 2.9199, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.22610921501706485, |
| "grad_norm": 14.582112312316895, |
| "learning_rate": 8.158131176999102e-06, |
| "loss": 2.9688, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.22696245733788395, |
| "grad_norm": 35.01908874511719, |
| "learning_rate": 8.149146451033244e-06, |
| "loss": 3.0977, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.22781569965870307, |
| "grad_norm": 18.878812789916992, |
| "learning_rate": 8.140161725067386e-06, |
| "loss": 2.9307, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.22866894197952217, |
| "grad_norm": 38.857398986816406, |
| "learning_rate": 8.131176999101528e-06, |
| "loss": 2.8672, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.2295221843003413, |
| "grad_norm": 21.270587921142578, |
| "learning_rate": 8.12219227313567e-06, |
| "loss": 2.79, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.23037542662116042, |
| "grad_norm": 55.603302001953125, |
| "learning_rate": 8.113207547169812e-06, |
| "loss": 3.1348, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.23122866894197952, |
| "grad_norm": 60.84667205810547, |
| "learning_rate": 8.104222821203954e-06, |
| "loss": 3.1846, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.23208191126279865, |
| "grad_norm": 21.8660888671875, |
| "learning_rate": 8.095238095238097e-06, |
| "loss": 2.4336, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.23293515358361774, |
| "grad_norm": 47.333717346191406, |
| "learning_rate": 8.086253369272239e-06, |
| "loss": 3.2207, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.23378839590443687, |
| "grad_norm": 41.70451354980469, |
| "learning_rate": 8.077268643306379e-06, |
| "loss": 2.8359, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.23464163822525597, |
| "grad_norm": 35.146942138671875, |
| "learning_rate": 8.068283917340521e-06, |
| "loss": 2.6738, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.2354948805460751, |
| "grad_norm": 16.213695526123047, |
| "learning_rate": 8.059299191374663e-06, |
| "loss": 2.8086, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.2363481228668942, |
| "grad_norm": 47.92852783203125, |
| "learning_rate": 8.050314465408805e-06, |
| "loss": 2.8867, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.23720136518771331, |
| "grad_norm": 42.166404724121094, |
| "learning_rate": 8.041329739442947e-06, |
| "loss": 3.125, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.2380546075085324, |
| "grad_norm": 24.129230499267578, |
| "learning_rate": 8.03234501347709e-06, |
| "loss": 3.043, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.23890784982935154, |
| "grad_norm": 16.145126342773438, |
| "learning_rate": 8.023360287511232e-06, |
| "loss": 3.3672, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.23976109215017063, |
| "grad_norm": 68.74685668945312, |
| "learning_rate": 8.014375561545374e-06, |
| "loss": 3.1924, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.24061433447098976, |
| "grad_norm": 25.997495651245117, |
| "learning_rate": 8.005390835579516e-06, |
| "loss": 3.0293, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.24146757679180889, |
| "grad_norm": 53.29498291015625, |
| "learning_rate": 7.996406109613656e-06, |
| "loss": 2.9922, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.24232081911262798, |
| "grad_norm": 15.400269508361816, |
| "learning_rate": 7.9874213836478e-06, |
| "loss": 3.1387, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.2431740614334471, |
| "grad_norm": 20.252784729003906, |
| "learning_rate": 7.978436657681942e-06, |
| "loss": 2.9648, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.2440273037542662, |
| "grad_norm": 57.464752197265625, |
| "learning_rate": 7.969451931716084e-06, |
| "loss": 3.6719, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.24488054607508533, |
| "grad_norm": 43.41289520263672, |
| "learning_rate": 7.960467205750224e-06, |
| "loss": 2.8242, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.24573378839590443, |
| "grad_norm": 17.788986206054688, |
| "learning_rate": 7.951482479784367e-06, |
| "loss": 3.1875, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.24658703071672355, |
| "grad_norm": 15.844386100769043, |
| "learning_rate": 7.942497753818509e-06, |
| "loss": 2.9785, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.24744027303754265, |
| "grad_norm": 58.50739669799805, |
| "learning_rate": 7.93351302785265e-06, |
| "loss": 2.9883, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.24829351535836178, |
| "grad_norm": 60.459251403808594, |
| "learning_rate": 7.924528301886793e-06, |
| "loss": 2.8467, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.24914675767918087, |
| "grad_norm": 48.048728942871094, |
| "learning_rate": 7.915543575920935e-06, |
| "loss": 3.1133, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 25.91424560546875, |
| "learning_rate": 7.906558849955077e-06, |
| "loss": 2.5762, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.2508532423208191, |
| "grad_norm": 65.37548065185547, |
| "learning_rate": 7.897574123989219e-06, |
| "loss": 3.1191, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.25170648464163825, |
| "grad_norm": 48.94771194458008, |
| "learning_rate": 7.888589398023361e-06, |
| "loss": 2.6484, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.2525597269624573, |
| "grad_norm": 45.35071563720703, |
| "learning_rate": 7.879604672057503e-06, |
| "loss": 3.1436, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.25341296928327645, |
| "grad_norm": 50.98272705078125, |
| "learning_rate": 7.870619946091644e-06, |
| "loss": 2.5635, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.25426621160409557, |
| "grad_norm": 24.23581886291504, |
| "learning_rate": 7.861635220125787e-06, |
| "loss": 3.0156, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.2551194539249147, |
| "grad_norm": 61.62641906738281, |
| "learning_rate": 7.85265049415993e-06, |
| "loss": 3.0684, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.25597269624573377, |
| "grad_norm": 31.647212982177734, |
| "learning_rate": 7.843665768194072e-06, |
| "loss": 2.6011, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2568259385665529, |
| "grad_norm": 29.92403793334961, |
| "learning_rate": 7.834681042228212e-06, |
| "loss": 3.0156, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.257679180887372, |
| "grad_norm": 40.79433059692383, |
| "learning_rate": 7.825696316262354e-06, |
| "loss": 3.2021, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.25853242320819114, |
| "grad_norm": 21.312335968017578, |
| "learning_rate": 7.816711590296496e-06, |
| "loss": 2.7988, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.2593856655290102, |
| "grad_norm": 31.760292053222656, |
| "learning_rate": 7.807726864330638e-06, |
| "loss": 3.2148, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.26023890784982934, |
| "grad_norm": 20.41486167907715, |
| "learning_rate": 7.79874213836478e-06, |
| "loss": 2.9932, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.26109215017064846, |
| "grad_norm": 25.601892471313477, |
| "learning_rate": 7.789757412398922e-06, |
| "loss": 2.751, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.2619453924914676, |
| "grad_norm": 24.824922561645508, |
| "learning_rate": 7.780772686433065e-06, |
| "loss": 3.0527, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.2627986348122867, |
| "grad_norm": 41.55419158935547, |
| "learning_rate": 7.771787960467207e-06, |
| "loss": 2.877, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.2636518771331058, |
| "grad_norm": 31.784027099609375, |
| "learning_rate": 7.762803234501349e-06, |
| "loss": 3.3652, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.2645051194539249, |
| "grad_norm": 36.64832305908203, |
| "learning_rate": 7.75381850853549e-06, |
| "loss": 3.2246, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.26535836177474403, |
| "grad_norm": 59.69587707519531, |
| "learning_rate": 7.744833782569631e-06, |
| "loss": 2.8984, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.26621160409556316, |
| "grad_norm": 29.08624267578125, |
| "learning_rate": 7.735849056603775e-06, |
| "loss": 2.8701, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.26706484641638223, |
| "grad_norm": 34.791015625, |
| "learning_rate": 7.726864330637917e-06, |
| "loss": 2.5869, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.26791808873720135, |
| "grad_norm": 55.99431610107422, |
| "learning_rate": 7.717879604672058e-06, |
| "loss": 2.9512, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.2687713310580205, |
| "grad_norm": 30.719738006591797, |
| "learning_rate": 7.7088948787062e-06, |
| "loss": 3.1318, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.2696245733788396, |
| "grad_norm": 24.247756958007812, |
| "learning_rate": 7.699910152740342e-06, |
| "loss": 3.3125, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.27047781569965873, |
| "grad_norm": 19.7833309173584, |
| "learning_rate": 7.690925426774484e-06, |
| "loss": 2.8135, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.2713310580204778, |
| "grad_norm": 50.110103607177734, |
| "learning_rate": 7.681940700808626e-06, |
| "loss": 2.707, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.2721843003412969, |
| "grad_norm": 21.2917423248291, |
| "learning_rate": 7.672955974842768e-06, |
| "loss": 2.5205, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.27303754266211605, |
| "grad_norm": 33.85706329345703, |
| "learning_rate": 7.66397124887691e-06, |
| "loss": 2.7705, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2738907849829352, |
| "grad_norm": 59.83601760864258, |
| "learning_rate": 7.654986522911052e-06, |
| "loss": 3.1875, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.27474402730375425, |
| "grad_norm": 61.3809928894043, |
| "learning_rate": 7.646001796945194e-06, |
| "loss": 3.1445, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.27559726962457337, |
| "grad_norm": 77.88739776611328, |
| "learning_rate": 7.637017070979336e-06, |
| "loss": 2.9951, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.2764505119453925, |
| "grad_norm": 47.42338180541992, |
| "learning_rate": 7.6280323450134775e-06, |
| "loss": 3.1553, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.2773037542662116, |
| "grad_norm": 31.061063766479492, |
| "learning_rate": 7.61904761904762e-06, |
| "loss": 3.2158, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.2781569965870307, |
| "grad_norm": 19.786115646362305, |
| "learning_rate": 7.6100628930817626e-06, |
| "loss": 2.5645, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.2790102389078498, |
| "grad_norm": 18.45869255065918, |
| "learning_rate": 7.601078167115904e-06, |
| "loss": 2.6465, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.27986348122866894, |
| "grad_norm": 43.45344924926758, |
| "learning_rate": 7.592093441150046e-06, |
| "loss": 2.7793, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.28071672354948807, |
| "grad_norm": 42.01021957397461, |
| "learning_rate": 7.583108715184188e-06, |
| "loss": 2.8398, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.2815699658703072, |
| "grad_norm": 22.967981338500977, |
| "learning_rate": 7.574123989218329e-06, |
| "loss": 3.2949, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.28242320819112626, |
| "grad_norm": 48.885189056396484, |
| "learning_rate": 7.565139263252471e-06, |
| "loss": 2.7598, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.2832764505119454, |
| "grad_norm": 83.62909698486328, |
| "learning_rate": 7.5561545372866126e-06, |
| "loss": 3.6533, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.2841296928327645, |
| "grad_norm": 102.9934310913086, |
| "learning_rate": 7.5471698113207555e-06, |
| "loss": 2.9414, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.28498293515358364, |
| "grad_norm": 22.28573989868164, |
| "learning_rate": 7.538185085354898e-06, |
| "loss": 3.0723, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.2858361774744027, |
| "grad_norm": 20.038768768310547, |
| "learning_rate": 7.52920035938904e-06, |
| "loss": 3.2168, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.28668941979522183, |
| "grad_norm": 17.91118049621582, |
| "learning_rate": 7.520215633423181e-06, |
| "loss": 3.1191, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.28754266211604096, |
| "grad_norm": 23.860708236694336, |
| "learning_rate": 7.511230907457323e-06, |
| "loss": 2.8877, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.2883959044368601, |
| "grad_norm": 25.451940536499023, |
| "learning_rate": 7.502246181491465e-06, |
| "loss": 2.9814, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.28924914675767915, |
| "grad_norm": 44.59007263183594, |
| "learning_rate": 7.493261455525606e-06, |
| "loss": 3.0049, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.2901023890784983, |
| "grad_norm": 14.881490707397461, |
| "learning_rate": 7.484276729559748e-06, |
| "loss": 2.5664, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2909556313993174, |
| "grad_norm": 44.933406829833984, |
| "learning_rate": 7.475292003593891e-06, |
| "loss": 2.8262, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.29180887372013653, |
| "grad_norm": 44.93138122558594, |
| "learning_rate": 7.4663072776280334e-06, |
| "loss": 2.9414, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.29266211604095566, |
| "grad_norm": 17.614646911621094, |
| "learning_rate": 7.457322551662175e-06, |
| "loss": 2.627, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.2935153583617747, |
| "grad_norm": 34.46635055541992, |
| "learning_rate": 7.448337825696317e-06, |
| "loss": 2.9785, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.29436860068259385, |
| "grad_norm": 65.03298950195312, |
| "learning_rate": 7.439353099730459e-06, |
| "loss": 2.6504, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.295221843003413, |
| "grad_norm": 21.21845245361328, |
| "learning_rate": 7.4303683737646e-06, |
| "loss": 2.8721, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.2960750853242321, |
| "grad_norm": 33.291107177734375, |
| "learning_rate": 7.421383647798742e-06, |
| "loss": 2.6748, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.29692832764505117, |
| "grad_norm": 18.167999267578125, |
| "learning_rate": 7.412398921832885e-06, |
| "loss": 2.6123, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.2977815699658703, |
| "grad_norm": 41.19535446166992, |
| "learning_rate": 7.403414195867027e-06, |
| "loss": 3.4941, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.2986348122866894, |
| "grad_norm": 28.60065269470215, |
| "learning_rate": 7.3944294699011685e-06, |
| "loss": 3.0127, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.29948805460750855, |
| "grad_norm": 28.26238441467285, |
| "learning_rate": 7.3854447439353106e-06, |
| "loss": 2.791, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.3003412969283277, |
| "grad_norm": 31.91538429260254, |
| "learning_rate": 7.376460017969453e-06, |
| "loss": 2.7793, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.30119453924914674, |
| "grad_norm": 19.526973724365234, |
| "learning_rate": 7.367475292003594e-06, |
| "loss": 2.8916, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.30204778156996587, |
| "grad_norm": 39.5968132019043, |
| "learning_rate": 7.358490566037736e-06, |
| "loss": 2.6309, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.302901023890785, |
| "grad_norm": 53.58340072631836, |
| "learning_rate": 7.349505840071879e-06, |
| "loss": 2.9912, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.3037542662116041, |
| "grad_norm": 38.25190353393555, |
| "learning_rate": 7.340521114106021e-06, |
| "loss": 2.7041, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.3046075085324232, |
| "grad_norm": 15.885865211486816, |
| "learning_rate": 7.331536388140162e-06, |
| "loss": 3.0859, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.3054607508532423, |
| "grad_norm": 19.57723617553711, |
| "learning_rate": 7.322551662174304e-06, |
| "loss": 3.0664, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.30631399317406144, |
| "grad_norm": 63.955474853515625, |
| "learning_rate": 7.313566936208446e-06, |
| "loss": 2.7041, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.30716723549488056, |
| "grad_norm": 32.2467155456543, |
| "learning_rate": 7.304582210242588e-06, |
| "loss": 3.2461, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.30802047781569963, |
| "grad_norm": 30.897621154785156, |
| "learning_rate": 7.29559748427673e-06, |
| "loss": 3.4238, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.30887372013651876, |
| "grad_norm": 18.033702850341797, |
| "learning_rate": 7.286612758310873e-06, |
| "loss": 3.0957, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.3097269624573379, |
| "grad_norm": 41.53689956665039, |
| "learning_rate": 7.277628032345015e-06, |
| "loss": 2.918, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.310580204778157, |
| "grad_norm": 43.60328674316406, |
| "learning_rate": 7.268643306379156e-06, |
| "loss": 3.0557, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.31143344709897613, |
| "grad_norm": 55.88149642944336, |
| "learning_rate": 7.259658580413298e-06, |
| "loss": 2.7529, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.3122866894197952, |
| "grad_norm": 46.07794952392578, |
| "learning_rate": 7.250673854447439e-06, |
| "loss": 2.835, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.31313993174061433, |
| "grad_norm": 19.480363845825195, |
| "learning_rate": 7.2416891284815814e-06, |
| "loss": 2.4336, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.31399317406143346, |
| "grad_norm": 78.5523681640625, |
| "learning_rate": 7.2327044025157235e-06, |
| "loss": 2.9111, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.3148464163822526, |
| "grad_norm": 56.7245979309082, |
| "learning_rate": 7.2237196765498665e-06, |
| "loss": 2.7549, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.31569965870307165, |
| "grad_norm": 94.528076171875, |
| "learning_rate": 7.214734950584008e-06, |
| "loss": 2.9033, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3165529010238908, |
| "grad_norm": 62.33586883544922, |
| "learning_rate": 7.20575022461815e-06, |
| "loss": 3.4121, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.3174061433447099, |
| "grad_norm": 19.558395385742188, |
| "learning_rate": 7.196765498652292e-06, |
| "loss": 2.9961, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.318259385665529, |
| "grad_norm": 41.45348358154297, |
| "learning_rate": 7.187780772686433e-06, |
| "loss": 3.1143, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.3191126279863481, |
| "grad_norm": 42.826805114746094, |
| "learning_rate": 7.178796046720575e-06, |
| "loss": 3.0479, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.3199658703071672, |
| "grad_norm": 60.3271598815918, |
| "learning_rate": 7.169811320754717e-06, |
| "loss": 3.6201, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.32081911262798635, |
| "grad_norm": 15.749074935913086, |
| "learning_rate": 7.16082659478886e-06, |
| "loss": 3.1504, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.3216723549488055, |
| "grad_norm": 28.352935791015625, |
| "learning_rate": 7.1518418688230015e-06, |
| "loss": 2.6055, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.3225255972696246, |
| "grad_norm": 22.02720069885254, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 2.6562, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.32337883959044367, |
| "grad_norm": 32.356258392333984, |
| "learning_rate": 7.133872416891286e-06, |
| "loss": 2.502, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.3242320819112628, |
| "grad_norm": 55.318992614746094, |
| "learning_rate": 7.124887690925427e-06, |
| "loss": 3.8184, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3250853242320819, |
| "grad_norm": 13.6019926071167, |
| "learning_rate": 7.115902964959569e-06, |
| "loss": 2.7568, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.32593856655290104, |
| "grad_norm": 30.500629425048828, |
| "learning_rate": 7.106918238993711e-06, |
| "loss": 2.5703, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.3267918088737201, |
| "grad_norm": 19.479543685913086, |
| "learning_rate": 7.097933513027854e-06, |
| "loss": 3.1357, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.32764505119453924, |
| "grad_norm": 14.30429744720459, |
| "learning_rate": 7.088948787061995e-06, |
| "loss": 2.4258, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.32849829351535836, |
| "grad_norm": 46.64712142944336, |
| "learning_rate": 7.079964061096137e-06, |
| "loss": 2.4863, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.3293515358361775, |
| "grad_norm": 14.86281681060791, |
| "learning_rate": 7.0709793351302794e-06, |
| "loss": 2.5391, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.3302047781569966, |
| "grad_norm": 24.936386108398438, |
| "learning_rate": 7.061994609164421e-06, |
| "loss": 3.0732, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.3310580204778157, |
| "grad_norm": 23.111101150512695, |
| "learning_rate": 7.053009883198563e-06, |
| "loss": 2.9229, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.3319112627986348, |
| "grad_norm": 28.36639976501465, |
| "learning_rate": 7.044025157232705e-06, |
| "loss": 3.0352, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.33276450511945393, |
| "grad_norm": 22.827180862426758, |
| "learning_rate": 7.035040431266848e-06, |
| "loss": 3.0488, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.33361774744027306, |
| "grad_norm": 24.18820571899414, |
| "learning_rate": 7.026055705300989e-06, |
| "loss": 2.1445, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.33447098976109213, |
| "grad_norm": 38.80826950073242, |
| "learning_rate": 7.017070979335131e-06, |
| "loss": 3.0645, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.33532423208191126, |
| "grad_norm": 39.28004455566406, |
| "learning_rate": 7.008086253369273e-06, |
| "loss": 2.6309, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.3361774744027304, |
| "grad_norm": 16.985010147094727, |
| "learning_rate": 6.9991015274034144e-06, |
| "loss": 2.792, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.3370307167235495, |
| "grad_norm": 18.20982551574707, |
| "learning_rate": 6.9901168014375565e-06, |
| "loss": 3.1348, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.3378839590443686, |
| "grad_norm": 23.503843307495117, |
| "learning_rate": 6.981132075471699e-06, |
| "loss": 2.4434, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.3387372013651877, |
| "grad_norm": 69.62710571289062, |
| "learning_rate": 6.9721473495058416e-06, |
| "loss": 2.8184, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.3395904436860068, |
| "grad_norm": 47.18648910522461, |
| "learning_rate": 6.963162623539983e-06, |
| "loss": 2.9189, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.34044368600682595, |
| "grad_norm": 40.54623794555664, |
| "learning_rate": 6.954177897574125e-06, |
| "loss": 2.7744, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.3412969283276451, |
| "grad_norm": 26.348918914794922, |
| "learning_rate": 6.945193171608267e-06, |
| "loss": 2.7061, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.34215017064846415, |
| "grad_norm": 26.754854202270508, |
| "learning_rate": 6.936208445642408e-06, |
| "loss": 2.8203, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.3430034129692833, |
| "grad_norm": 23.61028289794922, |
| "learning_rate": 6.92722371967655e-06, |
| "loss": 2.502, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.3438566552901024, |
| "grad_norm": 51.461273193359375, |
| "learning_rate": 6.9182389937106915e-06, |
| "loss": 3.4434, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.3447098976109215, |
| "grad_norm": 61.8600959777832, |
| "learning_rate": 6.9092542677448345e-06, |
| "loss": 3.0664, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.3455631399317406, |
| "grad_norm": 36.4835319519043, |
| "learning_rate": 6.9002695417789766e-06, |
| "loss": 2.5781, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.3464163822525597, |
| "grad_norm": 20.035572052001953, |
| "learning_rate": 6.891284815813119e-06, |
| "loss": 2.7451, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.34726962457337884, |
| "grad_norm": 23.01044273376465, |
| "learning_rate": 6.88230008984726e-06, |
| "loss": 2.8359, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.34812286689419797, |
| "grad_norm": 34.898773193359375, |
| "learning_rate": 6.873315363881402e-06, |
| "loss": 2.8604, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.34897610921501704, |
| "grad_norm": 17.49709701538086, |
| "learning_rate": 6.864330637915544e-06, |
| "loss": 2.75, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.34982935153583616, |
| "grad_norm": 32.71485900878906, |
| "learning_rate": 6.855345911949685e-06, |
| "loss": 3.1094, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3506825938566553, |
| "grad_norm": 19.570741653442383, |
| "learning_rate": 6.846361185983828e-06, |
| "loss": 2.9473, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.3515358361774744, |
| "grad_norm": 29.854347229003906, |
| "learning_rate": 6.83737646001797e-06, |
| "loss": 2.8809, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.35238907849829354, |
| "grad_norm": 43.52353286743164, |
| "learning_rate": 6.8283917340521124e-06, |
| "loss": 2.8164, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.3532423208191126, |
| "grad_norm": 26.43115997314453, |
| "learning_rate": 6.819407008086254e-06, |
| "loss": 2.4014, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.35409556313993173, |
| "grad_norm": 25.89423370361328, |
| "learning_rate": 6.810422282120396e-06, |
| "loss": 2.6406, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.35494880546075086, |
| "grad_norm": 35.70558166503906, |
| "learning_rate": 6.801437556154538e-06, |
| "loss": 2.4619, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.35580204778157, |
| "grad_norm": 33.59202194213867, |
| "learning_rate": 6.792452830188679e-06, |
| "loss": 2.8213, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.35665529010238906, |
| "grad_norm": 25.010950088500977, |
| "learning_rate": 6.783468104222821e-06, |
| "loss": 2.8096, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.3575085324232082, |
| "grad_norm": 23.81590461730957, |
| "learning_rate": 6.774483378256964e-06, |
| "loss": 2.3193, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.3583617747440273, |
| "grad_norm": 42.35072708129883, |
| "learning_rate": 6.765498652291106e-06, |
| "loss": 2.6592, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.35921501706484643, |
| "grad_norm": 40.074851989746094, |
| "learning_rate": 6.7565139263252475e-06, |
| "loss": 2.6318, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.36006825938566556, |
| "grad_norm": 32.20216751098633, |
| "learning_rate": 6.7475292003593895e-06, |
| "loss": 3.2891, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.3609215017064846, |
| "grad_norm": 37.29304122924805, |
| "learning_rate": 6.738544474393532e-06, |
| "loss": 2.9023, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.36177474402730375, |
| "grad_norm": 32.8192024230957, |
| "learning_rate": 6.729559748427673e-06, |
| "loss": 2.9736, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.3626279863481229, |
| "grad_norm": 23.262601852416992, |
| "learning_rate": 6.720575022461815e-06, |
| "loss": 2.4893, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.363481228668942, |
| "grad_norm": 40.62841796875, |
| "learning_rate": 6.711590296495958e-06, |
| "loss": 2.9531, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.3643344709897611, |
| "grad_norm": 30.518091201782227, |
| "learning_rate": 6.7026055705301e-06, |
| "loss": 2.4004, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.3651877133105802, |
| "grad_norm": 28.2186336517334, |
| "learning_rate": 6.693620844564241e-06, |
| "loss": 2.8008, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.3660409556313993, |
| "grad_norm": 14.801209449768066, |
| "learning_rate": 6.684636118598383e-06, |
| "loss": 2.4902, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.36689419795221845, |
| "grad_norm": 19.97643280029297, |
| "learning_rate": 6.675651392632525e-06, |
| "loss": 2.6104, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3677474402730375, |
| "grad_norm": 33.75346374511719, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 2.6221, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.36860068259385664, |
| "grad_norm": 31.13344383239746, |
| "learning_rate": 6.657681940700809e-06, |
| "loss": 2.9883, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.36945392491467577, |
| "grad_norm": 24.86776351928711, |
| "learning_rate": 6.648697214734952e-06, |
| "loss": 3.3652, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.3703071672354949, |
| "grad_norm": 55.863922119140625, |
| "learning_rate": 6.639712488769094e-06, |
| "loss": 2.8779, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.371160409556314, |
| "grad_norm": 14.440893173217773, |
| "learning_rate": 6.630727762803235e-06, |
| "loss": 2.8555, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.3720136518771331, |
| "grad_norm": 16.983476638793945, |
| "learning_rate": 6.621743036837377e-06, |
| "loss": 3.1016, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.3728668941979522, |
| "grad_norm": 17.240015029907227, |
| "learning_rate": 6.612758310871519e-06, |
| "loss": 2.8311, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.37372013651877134, |
| "grad_norm": 18.871740341186523, |
| "learning_rate": 6.60377358490566e-06, |
| "loss": 2.8125, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.37457337883959047, |
| "grad_norm": 36.629554748535156, |
| "learning_rate": 6.5947888589398025e-06, |
| "loss": 2.7119, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.37542662116040953, |
| "grad_norm": 22.077198028564453, |
| "learning_rate": 6.5858041329739454e-06, |
| "loss": 2.5117, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.37627986348122866, |
| "grad_norm": 36.363304138183594, |
| "learning_rate": 6.576819407008087e-06, |
| "loss": 3.335, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.3771331058020478, |
| "grad_norm": 17.072507858276367, |
| "learning_rate": 6.567834681042229e-06, |
| "loss": 2.8789, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.3779863481228669, |
| "grad_norm": 30.818571090698242, |
| "learning_rate": 6.558849955076371e-06, |
| "loss": 2.8848, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.378839590443686, |
| "grad_norm": 35.21898651123047, |
| "learning_rate": 6.549865229110512e-06, |
| "loss": 2.6309, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.3796928327645051, |
| "grad_norm": 24.29189109802246, |
| "learning_rate": 6.540880503144654e-06, |
| "loss": 2.8311, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.38054607508532423, |
| "grad_norm": 27.211631774902344, |
| "learning_rate": 6.531895777178796e-06, |
| "loss": 2.7578, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.38139931740614336, |
| "grad_norm": 20.377609252929688, |
| "learning_rate": 6.522911051212939e-06, |
| "loss": 2.5107, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.3822525597269625, |
| "grad_norm": 31.11419677734375, |
| "learning_rate": 6.5139263252470805e-06, |
| "loss": 3.0205, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.38310580204778155, |
| "grad_norm": 27.811227798461914, |
| "learning_rate": 6.5049415992812226e-06, |
| "loss": 2.5117, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.3839590443686007, |
| "grad_norm": 27.51049041748047, |
| "learning_rate": 6.495956873315365e-06, |
| "loss": 2.748, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3848122866894198, |
| "grad_norm": 23.872310638427734, |
| "learning_rate": 6.486972147349506e-06, |
| "loss": 3.1309, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.3856655290102389, |
| "grad_norm": 25.639631271362305, |
| "learning_rate": 6.477987421383648e-06, |
| "loss": 3.0293, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.386518771331058, |
| "grad_norm": 78.10359954833984, |
| "learning_rate": 6.46900269541779e-06, |
| "loss": 2.7129, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.3873720136518771, |
| "grad_norm": 33.32661056518555, |
| "learning_rate": 6.460017969451933e-06, |
| "loss": 3.0215, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.38822525597269625, |
| "grad_norm": 18.216999053955078, |
| "learning_rate": 6.451033243486074e-06, |
| "loss": 2.6982, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.3890784982935154, |
| "grad_norm": 15.191929817199707, |
| "learning_rate": 6.442048517520216e-06, |
| "loss": 2.6016, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.38993174061433444, |
| "grad_norm": 20.197877883911133, |
| "learning_rate": 6.433063791554358e-06, |
| "loss": 2.791, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.39078498293515357, |
| "grad_norm": 20.7491397857666, |
| "learning_rate": 6.4240790655885e-06, |
| "loss": 2.3955, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.3916382252559727, |
| "grad_norm": 14.716793060302734, |
| "learning_rate": 6.415094339622642e-06, |
| "loss": 2.6826, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.3924914675767918, |
| "grad_norm": 23.43107032775879, |
| "learning_rate": 6.406109613656784e-06, |
| "loss": 3.1709, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.39334470989761094, |
| "grad_norm": 17.54826545715332, |
| "learning_rate": 6.397124887690927e-06, |
| "loss": 2.3867, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.39419795221843, |
| "grad_norm": 83.90989685058594, |
| "learning_rate": 6.388140161725068e-06, |
| "loss": 2.9434, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.39505119453924914, |
| "grad_norm": 69.95578002929688, |
| "learning_rate": 6.37915543575921e-06, |
| "loss": 2.5518, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.39590443686006827, |
| "grad_norm": 53.826499938964844, |
| "learning_rate": 6.370170709793352e-06, |
| "loss": 2.4453, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.3967576791808874, |
| "grad_norm": 18.601577758789062, |
| "learning_rate": 6.3611859838274934e-06, |
| "loss": 2.9531, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.39761092150170646, |
| "grad_norm": 34.0197868347168, |
| "learning_rate": 6.3522012578616355e-06, |
| "loss": 2.4746, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.3984641638225256, |
| "grad_norm": 44.096134185791016, |
| "learning_rate": 6.343216531895778e-06, |
| "loss": 2.8955, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.3993174061433447, |
| "grad_norm": 24.102462768554688, |
| "learning_rate": 6.3342318059299205e-06, |
| "loss": 2.792, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.40017064846416384, |
| "grad_norm": 17.850786209106445, |
| "learning_rate": 6.325247079964062e-06, |
| "loss": 2.3228, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.40102389078498296, |
| "grad_norm": 21.928977966308594, |
| "learning_rate": 6.316262353998204e-06, |
| "loss": 2.6504, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.40187713310580203, |
| "grad_norm": 25.40172004699707, |
| "learning_rate": 6.307277628032346e-06, |
| "loss": 2.6084, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.40273037542662116, |
| "grad_norm": 16.1717586517334, |
| "learning_rate": 6.298292902066487e-06, |
| "loss": 2.0674, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.4035836177474403, |
| "grad_norm": 29.402706146240234, |
| "learning_rate": 6.289308176100629e-06, |
| "loss": 2.916, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.4044368600682594, |
| "grad_norm": 23.10331153869629, |
| "learning_rate": 6.2803234501347705e-06, |
| "loss": 2.6895, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.4052901023890785, |
| "grad_norm": 23.997806549072266, |
| "learning_rate": 6.2713387241689135e-06, |
| "loss": 2.6953, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.4061433447098976, |
| "grad_norm": 39.14799118041992, |
| "learning_rate": 6.2623539982030556e-06, |
| "loss": 2.625, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.4069965870307167, |
| "grad_norm": 29.97382164001465, |
| "learning_rate": 6.253369272237198e-06, |
| "loss": 3.2129, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.40784982935153585, |
| "grad_norm": 17.89264678955078, |
| "learning_rate": 6.244384546271339e-06, |
| "loss": 2.6367, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.4087030716723549, |
| "grad_norm": 40.39899444580078, |
| "learning_rate": 6.235399820305481e-06, |
| "loss": 2.8965, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.40955631399317405, |
| "grad_norm": 48.41572952270508, |
| "learning_rate": 6.226415094339623e-06, |
| "loss": 3.0703, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4104095563139932, |
| "grad_norm": 31.183040618896484, |
| "learning_rate": 6.217430368373764e-06, |
| "loss": 2.1592, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.4112627986348123, |
| "grad_norm": 36.558773040771484, |
| "learning_rate": 6.208445642407907e-06, |
| "loss": 2.3945, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.4121160409556314, |
| "grad_norm": 19.117509841918945, |
| "learning_rate": 6.199460916442049e-06, |
| "loss": 3.0605, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.4129692832764505, |
| "grad_norm": 48.38492965698242, |
| "learning_rate": 6.1904761904761914e-06, |
| "loss": 2.291, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.4138225255972696, |
| "grad_norm": 21.41737937927246, |
| "learning_rate": 6.181491464510333e-06, |
| "loss": 2.8252, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.41467576791808874, |
| "grad_norm": 42.10402297973633, |
| "learning_rate": 6.172506738544475e-06, |
| "loss": 3.0918, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.41552901023890787, |
| "grad_norm": 46.72148895263672, |
| "learning_rate": 6.163522012578617e-06, |
| "loss": 2.6162, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.41638225255972694, |
| "grad_norm": 21.43707847595215, |
| "learning_rate": 6.154537286612758e-06, |
| "loss": 2.2754, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.41723549488054607, |
| "grad_norm": 16.188798904418945, |
| "learning_rate": 6.145552560646901e-06, |
| "loss": 2.5078, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.4180887372013652, |
| "grad_norm": 19.047313690185547, |
| "learning_rate": 6.136567834681043e-06, |
| "loss": 2.9209, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4189419795221843, |
| "grad_norm": 36.833744049072266, |
| "learning_rate": 6.127583108715185e-06, |
| "loss": 2.5791, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.4197952218430034, |
| "grad_norm": 19.706417083740234, |
| "learning_rate": 6.1185983827493264e-06, |
| "loss": 2.9268, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.4206484641638225, |
| "grad_norm": 33.76554870605469, |
| "learning_rate": 6.1096136567834685e-06, |
| "loss": 2.4229, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.42150170648464164, |
| "grad_norm": 35.278297424316406, |
| "learning_rate": 6.100628930817611e-06, |
| "loss": 2.7832, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.42235494880546076, |
| "grad_norm": 25.52465057373047, |
| "learning_rate": 6.091644204851752e-06, |
| "loss": 2.8623, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.4232081911262799, |
| "grad_norm": 50.70538330078125, |
| "learning_rate": 6.082659478885895e-06, |
| "loss": 2.7324, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.42406143344709896, |
| "grad_norm": 33.23221206665039, |
| "learning_rate": 6.073674752920037e-06, |
| "loss": 2.8828, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.4249146757679181, |
| "grad_norm": 44.4984245300293, |
| "learning_rate": 6.064690026954179e-06, |
| "loss": 2.7217, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.4257679180887372, |
| "grad_norm": 40.04144287109375, |
| "learning_rate": 6.05570530098832e-06, |
| "loss": 2.4414, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.42662116040955633, |
| "grad_norm": 31.808074951171875, |
| "learning_rate": 6.046720575022462e-06, |
| "loss": 2.5635, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4274744027303754, |
| "grad_norm": 27.19110679626465, |
| "learning_rate": 6.037735849056604e-06, |
| "loss": 2.6348, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.4283276450511945, |
| "grad_norm": 52.006351470947266, |
| "learning_rate": 6.028751123090746e-06, |
| "loss": 3.4004, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.42918088737201365, |
| "grad_norm": 49.40862274169922, |
| "learning_rate": 6.019766397124888e-06, |
| "loss": 2.6865, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.4300341296928328, |
| "grad_norm": 17.69283676147461, |
| "learning_rate": 6.010781671159031e-06, |
| "loss": 2.4824, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.4308873720136519, |
| "grad_norm": 20.699617385864258, |
| "learning_rate": 6.001796945193173e-06, |
| "loss": 3.0488, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.431740614334471, |
| "grad_norm": 56.287269592285156, |
| "learning_rate": 5.992812219227314e-06, |
| "loss": 3.0703, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.4325938566552901, |
| "grad_norm": 19.926307678222656, |
| "learning_rate": 5.983827493261456e-06, |
| "loss": 2.7871, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.4334470989761092, |
| "grad_norm": 33.74576187133789, |
| "learning_rate": 5.974842767295598e-06, |
| "loss": 2.9238, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.43430034129692835, |
| "grad_norm": 27.027666091918945, |
| "learning_rate": 5.965858041329739e-06, |
| "loss": 2.2666, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.4351535836177474, |
| "grad_norm": 49.87267303466797, |
| "learning_rate": 5.9568733153638815e-06, |
| "loss": 2.4355, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.43600682593856654, |
| "grad_norm": 33.75191879272461, |
| "learning_rate": 5.9478885893980244e-06, |
| "loss": 2.1299, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.43686006825938567, |
| "grad_norm": 23.760793685913086, |
| "learning_rate": 5.938903863432166e-06, |
| "loss": 2.9541, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.4377133105802048, |
| "grad_norm": 19.85642433166504, |
| "learning_rate": 5.929919137466308e-06, |
| "loss": 2.9023, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.43856655290102387, |
| "grad_norm": 42.32032775878906, |
| "learning_rate": 5.92093441150045e-06, |
| "loss": 2.8652, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.439419795221843, |
| "grad_norm": 32.215065002441406, |
| "learning_rate": 5.911949685534591e-06, |
| "loss": 3.1729, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.4402730375426621, |
| "grad_norm": 31.592498779296875, |
| "learning_rate": 5.902964959568733e-06, |
| "loss": 2.5439, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.44112627986348124, |
| "grad_norm": 26.448612213134766, |
| "learning_rate": 5.893980233602875e-06, |
| "loss": 2.3545, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.44197952218430037, |
| "grad_norm": 49.79834747314453, |
| "learning_rate": 5.884995507637018e-06, |
| "loss": 2.8105, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.44283276450511944, |
| "grad_norm": 45.154701232910156, |
| "learning_rate": 5.8760107816711595e-06, |
| "loss": 2.9541, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.44368600682593856, |
| "grad_norm": 41.03085708618164, |
| "learning_rate": 5.8670260557053015e-06, |
| "loss": 2.8398, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.4445392491467577, |
| "grad_norm": 21.58003807067871, |
| "learning_rate": 5.858041329739444e-06, |
| "loss": 2.7559, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.4453924914675768, |
| "grad_norm": 21.989830017089844, |
| "learning_rate": 5.849056603773585e-06, |
| "loss": 3.0488, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.4462457337883959, |
| "grad_norm": 14.759679794311523, |
| "learning_rate": 5.840071877807727e-06, |
| "loss": 2.9902, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.447098976109215, |
| "grad_norm": 14.680983543395996, |
| "learning_rate": 5.831087151841869e-06, |
| "loss": 2.4238, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.44795221843003413, |
| "grad_norm": 20.452116012573242, |
| "learning_rate": 5.822102425876012e-06, |
| "loss": 2.7881, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.44880546075085326, |
| "grad_norm": 15.639450073242188, |
| "learning_rate": 5.813117699910153e-06, |
| "loss": 2.6191, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.4496587030716723, |
| "grad_norm": 38.403995513916016, |
| "learning_rate": 5.804132973944295e-06, |
| "loss": 2.6895, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.45051194539249145, |
| "grad_norm": 14.627884864807129, |
| "learning_rate": 5.795148247978437e-06, |
| "loss": 2.7598, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.4513651877133106, |
| "grad_norm": 17.250015258789062, |
| "learning_rate": 5.786163522012579e-06, |
| "loss": 2.5088, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.4522184300341297, |
| "grad_norm": 36.893882751464844, |
| "learning_rate": 5.777178796046721e-06, |
| "loss": 2.6611, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.45307167235494883, |
| "grad_norm": 29.592458724975586, |
| "learning_rate": 5.768194070080863e-06, |
| "loss": 2.3877, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.4539249146757679, |
| "grad_norm": 29.255516052246094, |
| "learning_rate": 5.759209344115006e-06, |
| "loss": 3.1191, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.454778156996587, |
| "grad_norm": 27.445293426513672, |
| "learning_rate": 5.750224618149147e-06, |
| "loss": 2.4629, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.45563139931740615, |
| "grad_norm": 20.05036735534668, |
| "learning_rate": 5.741239892183289e-06, |
| "loss": 2.5107, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.4564846416382253, |
| "grad_norm": 16.583898544311523, |
| "learning_rate": 5.732255166217431e-06, |
| "loss": 3.3027, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.45733788395904434, |
| "grad_norm": 34.19240188598633, |
| "learning_rate": 5.723270440251572e-06, |
| "loss": 2.3574, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.45819112627986347, |
| "grad_norm": 29.18450164794922, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 2.3818, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.4590443686006826, |
| "grad_norm": 26.387821197509766, |
| "learning_rate": 5.705300988319857e-06, |
| "loss": 2.709, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.4598976109215017, |
| "grad_norm": 19.58378028869629, |
| "learning_rate": 5.6963162623539995e-06, |
| "loss": 2.6631, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.46075085324232085, |
| "grad_norm": 25.05061912536621, |
| "learning_rate": 5.687331536388141e-06, |
| "loss": 2.6152, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.4616040955631399, |
| "grad_norm": 33.281044006347656, |
| "learning_rate": 5.678346810422283e-06, |
| "loss": 2.6494, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.46245733788395904, |
| "grad_norm": 15.196967124938965, |
| "learning_rate": 5.669362084456425e-06, |
| "loss": 2.9629, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.46331058020477817, |
| "grad_norm": 48.015869140625, |
| "learning_rate": 5.660377358490566e-06, |
| "loss": 2.7188, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.4641638225255973, |
| "grad_norm": 40.080692291259766, |
| "learning_rate": 5.651392632524708e-06, |
| "loss": 3.4395, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.46501706484641636, |
| "grad_norm": 14.495575904846191, |
| "learning_rate": 5.64240790655885e-06, |
| "loss": 2.5498, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.4658703071672355, |
| "grad_norm": 20.11421775817871, |
| "learning_rate": 5.6334231805929925e-06, |
| "loss": 2.4463, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.4667235494880546, |
| "grad_norm": 22.752029418945312, |
| "learning_rate": 5.6244384546271346e-06, |
| "loss": 2.376, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.46757679180887374, |
| "grad_norm": 31.627084732055664, |
| "learning_rate": 5.615453728661277e-06, |
| "loss": 2.3809, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.4684300341296928, |
| "grad_norm": 45.1749382019043, |
| "learning_rate": 5.606469002695418e-06, |
| "loss": 2.6025, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.46928327645051193, |
| "grad_norm": 14.6268892288208, |
| "learning_rate": 5.59748427672956e-06, |
| "loss": 2.3457, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.47013651877133106, |
| "grad_norm": 22.34581756591797, |
| "learning_rate": 5.588499550763702e-06, |
| "loss": 2.9863, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.4709897610921502, |
| "grad_norm": 30.1942195892334, |
| "learning_rate": 5.579514824797843e-06, |
| "loss": 3.0391, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.4718430034129693, |
| "grad_norm": 44.00593566894531, |
| "learning_rate": 5.570530098831986e-06, |
| "loss": 3.0767, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.4726962457337884, |
| "grad_norm": 20.9268798828125, |
| "learning_rate": 5.561545372866128e-06, |
| "loss": 2.3945, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.4735494880546075, |
| "grad_norm": 17.279001235961914, |
| "learning_rate": 5.55256064690027e-06, |
| "loss": 2.2637, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.47440273037542663, |
| "grad_norm": 35.23509979248047, |
| "learning_rate": 5.543575920934412e-06, |
| "loss": 2.5107, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.47525597269624575, |
| "grad_norm": 26.05479621887207, |
| "learning_rate": 5.534591194968554e-06, |
| "loss": 2.457, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.4761092150170648, |
| "grad_norm": 50.16437530517578, |
| "learning_rate": 5.525606469002696e-06, |
| "loss": 2.9473, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.47696245733788395, |
| "grad_norm": 27.86246109008789, |
| "learning_rate": 5.516621743036837e-06, |
| "loss": 2.6533, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.4778156996587031, |
| "grad_norm": 43.706398010253906, |
| "learning_rate": 5.50763701707098e-06, |
| "loss": 2.8643, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4786689419795222, |
| "grad_norm": 41.53769302368164, |
| "learning_rate": 5.498652291105122e-06, |
| "loss": 2.7725, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.47952218430034127, |
| "grad_norm": 46.46355438232422, |
| "learning_rate": 5.489667565139264e-06, |
| "loss": 3.3379, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.4803754266211604, |
| "grad_norm": 20.3262882232666, |
| "learning_rate": 5.4806828391734054e-06, |
| "loss": 2.7764, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.4812286689419795, |
| "grad_norm": 25.64153480529785, |
| "learning_rate": 5.4716981132075475e-06, |
| "loss": 2.6289, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.48208191126279865, |
| "grad_norm": 23.21479606628418, |
| "learning_rate": 5.46271338724169e-06, |
| "loss": 2.793, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.48293515358361777, |
| "grad_norm": 25.748003005981445, |
| "learning_rate": 5.453728661275831e-06, |
| "loss": 2.5664, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.48378839590443684, |
| "grad_norm": 15.132332801818848, |
| "learning_rate": 5.444743935309974e-06, |
| "loss": 2.5977, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.48464163822525597, |
| "grad_norm": 22.88768768310547, |
| "learning_rate": 5.435759209344116e-06, |
| "loss": 2.4688, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.4854948805460751, |
| "grad_norm": 40.691104888916016, |
| "learning_rate": 5.426774483378258e-06, |
| "loss": 2.627, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.4863481228668942, |
| "grad_norm": 19.89053726196289, |
| "learning_rate": 5.417789757412399e-06, |
| "loss": 2.6426, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4872013651877133, |
| "grad_norm": 52.61355209350586, |
| "learning_rate": 5.408805031446541e-06, |
| "loss": 3.0352, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.4880546075085324, |
| "grad_norm": 25.755590438842773, |
| "learning_rate": 5.399820305480683e-06, |
| "loss": 2.8721, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.48890784982935154, |
| "grad_norm": 49.290321350097656, |
| "learning_rate": 5.390835579514825e-06, |
| "loss": 2.9473, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.48976109215017066, |
| "grad_norm": 17.155630111694336, |
| "learning_rate": 5.3818508535489676e-06, |
| "loss": 2.6191, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.4906143344709898, |
| "grad_norm": 20.74138641357422, |
| "learning_rate": 5.37286612758311e-06, |
| "loss": 2.5684, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.49146757679180886, |
| "grad_norm": 38.56920623779297, |
| "learning_rate": 5.363881401617252e-06, |
| "loss": 2.6221, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.492320819112628, |
| "grad_norm": 31.176231384277344, |
| "learning_rate": 5.354896675651393e-06, |
| "loss": 2.3271, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.4931740614334471, |
| "grad_norm": 54.210899353027344, |
| "learning_rate": 5.345911949685535e-06, |
| "loss": 2.4258, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.49402730375426623, |
| "grad_norm": 21.1136474609375, |
| "learning_rate": 5.336927223719677e-06, |
| "loss": 2.6562, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.4948805460750853, |
| "grad_norm": 16.198816299438477, |
| "learning_rate": 5.327942497753818e-06, |
| "loss": 3.1621, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.49573378839590443, |
| "grad_norm": 25.892831802368164, |
| "learning_rate": 5.3189577717879605e-06, |
| "loss": 2.793, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.49658703071672355, |
| "grad_norm": 15.013483047485352, |
| "learning_rate": 5.3099730458221034e-06, |
| "loss": 2.4639, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.4974402730375427, |
| "grad_norm": 35.05656051635742, |
| "learning_rate": 5.300988319856245e-06, |
| "loss": 2.8516, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.49829351535836175, |
| "grad_norm": 27.9871768951416, |
| "learning_rate": 5.292003593890387e-06, |
| "loss": 2.4189, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.4991467576791809, |
| "grad_norm": 15.608467102050781, |
| "learning_rate": 5.283018867924529e-06, |
| "loss": 2.5889, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 31.001338958740234, |
| "learning_rate": 5.27403414195867e-06, |
| "loss": 2.3174, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.5008532423208191, |
| "grad_norm": 31.35817527770996, |
| "learning_rate": 5.265049415992812e-06, |
| "loss": 2.3955, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.5017064846416383, |
| "grad_norm": 23.374814987182617, |
| "learning_rate": 5.256064690026954e-06, |
| "loss": 3.2852, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.5025597269624573, |
| "grad_norm": 22.058351516723633, |
| "learning_rate": 5.247079964061097e-06, |
| "loss": 2.4004, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.5034129692832765, |
| "grad_norm": 41.5433235168457, |
| "learning_rate": 5.2380952380952384e-06, |
| "loss": 2.8564, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5042662116040956, |
| "grad_norm": 20.69767189025879, |
| "learning_rate": 5.2291105121293805e-06, |
| "loss": 2.623, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.5051194539249146, |
| "grad_norm": 28.56145477294922, |
| "learning_rate": 5.220125786163523e-06, |
| "loss": 2.6143, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.5059726962457338, |
| "grad_norm": 64.19149780273438, |
| "learning_rate": 5.211141060197664e-06, |
| "loss": 3.0439, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.5068259385665529, |
| "grad_norm": 42.88633346557617, |
| "learning_rate": 5.202156334231806e-06, |
| "loss": 2.3398, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.507679180887372, |
| "grad_norm": 25.10820770263672, |
| "learning_rate": 5.193171608265948e-06, |
| "loss": 2.2949, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.5085324232081911, |
| "grad_norm": 24.562023162841797, |
| "learning_rate": 5.184186882300091e-06, |
| "loss": 2.7129, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.5093856655290102, |
| "grad_norm": 36.790157318115234, |
| "learning_rate": 5.175202156334232e-06, |
| "loss": 2.5508, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.5102389078498294, |
| "grad_norm": 24.073081970214844, |
| "learning_rate": 5.166217430368374e-06, |
| "loss": 2.5186, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.5110921501706485, |
| "grad_norm": 35.33838653564453, |
| "learning_rate": 5.157232704402516e-06, |
| "loss": 2.4971, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.5119453924914675, |
| "grad_norm": 28.3082275390625, |
| "learning_rate": 5.148247978436658e-06, |
| "loss": 2.8848, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5127986348122867, |
| "grad_norm": 25.457237243652344, |
| "learning_rate": 5.1392632524708e-06, |
| "loss": 2.7832, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.5136518771331058, |
| "grad_norm": 34.88469314575195, |
| "learning_rate": 5.130278526504942e-06, |
| "loss": 2.4941, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.514505119453925, |
| "grad_norm": 41.899715423583984, |
| "learning_rate": 5.121293800539085e-06, |
| "loss": 3.002, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.515358361774744, |
| "grad_norm": 27.774612426757812, |
| "learning_rate": 5.112309074573226e-06, |
| "loss": 2.8643, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.5162116040955631, |
| "grad_norm": 67.86431121826172, |
| "learning_rate": 5.103324348607368e-06, |
| "loss": 2.8418, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.5170648464163823, |
| "grad_norm": 27.578550338745117, |
| "learning_rate": 5.09433962264151e-06, |
| "loss": 2.2207, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.5179180887372014, |
| "grad_norm": 31.7324275970459, |
| "learning_rate": 5.085354896675651e-06, |
| "loss": 3.3027, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.5187713310580204, |
| "grad_norm": 20.27518081665039, |
| "learning_rate": 5.0763701707097935e-06, |
| "loss": 2.2842, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.5196245733788396, |
| "grad_norm": 27.45115089416504, |
| "learning_rate": 5.067385444743936e-06, |
| "loss": 2.2988, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.5204778156996587, |
| "grad_norm": 40.405704498291016, |
| "learning_rate": 5.0584007187780785e-06, |
| "loss": 3.1377, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5213310580204779, |
| "grad_norm": 15.520742416381836, |
| "learning_rate": 5.04941599281222e-06, |
| "loss": 2.3604, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.5221843003412969, |
| "grad_norm": 28.408700942993164, |
| "learning_rate": 5.040431266846362e-06, |
| "loss": 2.5693, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.523037542662116, |
| "grad_norm": 33.49451446533203, |
| "learning_rate": 5.031446540880504e-06, |
| "loss": 2.6279, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.5238907849829352, |
| "grad_norm": 17.04746437072754, |
| "learning_rate": 5.022461814914645e-06, |
| "loss": 2.5713, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.5247440273037542, |
| "grad_norm": 35.0278205871582, |
| "learning_rate": 5.013477088948787e-06, |
| "loss": 2.4004, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.5255972696245734, |
| "grad_norm": 23.72642707824707, |
| "learning_rate": 5.004492362982929e-06, |
| "loss": 2.832, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.5264505119453925, |
| "grad_norm": 17.700857162475586, |
| "learning_rate": 4.9955076370170715e-06, |
| "loss": 2.8467, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.5273037542662116, |
| "grad_norm": 26.797855377197266, |
| "learning_rate": 4.986522911051213e-06, |
| "loss": 2.7002, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.5281569965870307, |
| "grad_norm": 15.89353084564209, |
| "learning_rate": 4.977538185085356e-06, |
| "loss": 2.8301, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.5290102389078498, |
| "grad_norm": 17.26994514465332, |
| "learning_rate": 4.968553459119497e-06, |
| "loss": 2.4023, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5298634812286689, |
| "grad_norm": 50.756412506103516, |
| "learning_rate": 4.959568733153639e-06, |
| "loss": 2.1123, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.5307167235494881, |
| "grad_norm": 42.956947326660156, |
| "learning_rate": 4.950584007187781e-06, |
| "loss": 2.2637, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.5315699658703071, |
| "grad_norm": 22.47896385192871, |
| "learning_rate": 4.941599281221923e-06, |
| "loss": 2.7529, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.5324232081911263, |
| "grad_norm": 18.382062911987305, |
| "learning_rate": 4.932614555256065e-06, |
| "loss": 2.6665, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.5332764505119454, |
| "grad_norm": 45.38220977783203, |
| "learning_rate": 4.9236298292902065e-06, |
| "loss": 2.4453, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.5341296928327645, |
| "grad_norm": 51.33029556274414, |
| "learning_rate": 4.914645103324349e-06, |
| "loss": 3.1621, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.5349829351535836, |
| "grad_norm": 39.17404556274414, |
| "learning_rate": 4.905660377358491e-06, |
| "loss": 2.9941, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.5358361774744027, |
| "grad_norm": 56.7110595703125, |
| "learning_rate": 4.896675651392633e-06, |
| "loss": 2.9219, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.5366894197952219, |
| "grad_norm": 15.27424144744873, |
| "learning_rate": 4.887690925426775e-06, |
| "loss": 2.5547, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.537542662116041, |
| "grad_norm": 22.87930679321289, |
| "learning_rate": 4.878706199460917e-06, |
| "loss": 2.3506, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.53839590443686, |
| "grad_norm": 53.75349807739258, |
| "learning_rate": 4.869721473495059e-06, |
| "loss": 2.3652, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.5392491467576792, |
| "grad_norm": 30.84817123413086, |
| "learning_rate": 4.8607367475292e-06, |
| "loss": 2.9297, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.5401023890784983, |
| "grad_norm": 20.29245948791504, |
| "learning_rate": 4.851752021563343e-06, |
| "loss": 2.9795, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.5409556313993175, |
| "grad_norm": 17.75739097595215, |
| "learning_rate": 4.842767295597484e-06, |
| "loss": 2.4072, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.5418088737201365, |
| "grad_norm": 21.76918601989746, |
| "learning_rate": 4.8337825696316265e-06, |
| "loss": 2.5088, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.5426621160409556, |
| "grad_norm": 31.445209503173828, |
| "learning_rate": 4.824797843665769e-06, |
| "loss": 2.4473, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.5435153583617748, |
| "grad_norm": 43.67631530761719, |
| "learning_rate": 4.815813117699911e-06, |
| "loss": 2.7129, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.5443686006825939, |
| "grad_norm": 41.579044342041016, |
| "learning_rate": 4.806828391734053e-06, |
| "loss": 2.6562, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.5452218430034129, |
| "grad_norm": 22.987728118896484, |
| "learning_rate": 4.797843665768194e-06, |
| "loss": 2.8525, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.5460750853242321, |
| "grad_norm": 17.46269416809082, |
| "learning_rate": 4.788858939802337e-06, |
| "loss": 2.6797, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5469283276450512, |
| "grad_norm": 14.546910285949707, |
| "learning_rate": 4.779874213836478e-06, |
| "loss": 2.4775, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.5477815699658704, |
| "grad_norm": 29.865407943725586, |
| "learning_rate": 4.77088948787062e-06, |
| "loss": 2.6279, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.5486348122866894, |
| "grad_norm": 14.915757179260254, |
| "learning_rate": 4.761904761904762e-06, |
| "loss": 2.5225, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.5494880546075085, |
| "grad_norm": 29.640846252441406, |
| "learning_rate": 4.7529200359389045e-06, |
| "loss": 2.4248, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.5503412969283277, |
| "grad_norm": 44.55379104614258, |
| "learning_rate": 4.7439353099730466e-06, |
| "loss": 2.6348, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.5511945392491467, |
| "grad_norm": 18.605289459228516, |
| "learning_rate": 4.734950584007188e-06, |
| "loss": 2.2568, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.5520477815699659, |
| "grad_norm": 22.488618850708008, |
| "learning_rate": 4.725965858041331e-06, |
| "loss": 2.7637, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.552901023890785, |
| "grad_norm": 18.215923309326172, |
| "learning_rate": 4.716981132075472e-06, |
| "loss": 2.4238, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.5537542662116041, |
| "grad_norm": 48.733970642089844, |
| "learning_rate": 4.707996406109614e-06, |
| "loss": 2.998, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.5546075085324232, |
| "grad_norm": 20.413524627685547, |
| "learning_rate": 4.699011680143756e-06, |
| "loss": 2.335, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5554607508532423, |
| "grad_norm": 46.819068908691406, |
| "learning_rate": 4.690026954177898e-06, |
| "loss": 3.0381, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.5563139931740614, |
| "grad_norm": 26.987506866455078, |
| "learning_rate": 4.68104222821204e-06, |
| "loss": 2.7188, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.5571672354948806, |
| "grad_norm": 37.12288284301758, |
| "learning_rate": 4.6720575022461816e-06, |
| "loss": 2.4189, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.5580204778156996, |
| "grad_norm": 36.88205337524414, |
| "learning_rate": 4.663072776280324e-06, |
| "loss": 2.6191, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.5588737201365188, |
| "grad_norm": 14.645258903503418, |
| "learning_rate": 4.654088050314466e-06, |
| "loss": 1.9053, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.5597269624573379, |
| "grad_norm": 17.644990921020508, |
| "learning_rate": 4.645103324348608e-06, |
| "loss": 2.7158, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.560580204778157, |
| "grad_norm": 70.53995513916016, |
| "learning_rate": 4.636118598382749e-06, |
| "loss": 3.0059, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.5614334470989761, |
| "grad_norm": 50.95589065551758, |
| "learning_rate": 4.627133872416892e-06, |
| "loss": 3.1006, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.5622866894197952, |
| "grad_norm": 41.75291061401367, |
| "learning_rate": 4.618149146451033e-06, |
| "loss": 2.29, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.5631399317406144, |
| "grad_norm": 32.955204010009766, |
| "learning_rate": 4.609164420485175e-06, |
| "loss": 2.3164, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5639931740614335, |
| "grad_norm": 33.37961196899414, |
| "learning_rate": 4.6001796945193174e-06, |
| "loss": 2.6953, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.5648464163822525, |
| "grad_norm": 41.79698181152344, |
| "learning_rate": 4.5911949685534595e-06, |
| "loss": 2.916, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.5656996587030717, |
| "grad_norm": 18.266895294189453, |
| "learning_rate": 4.582210242587602e-06, |
| "loss": 2.1611, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.5665529010238908, |
| "grad_norm": 29.101303100585938, |
| "learning_rate": 4.573225516621743e-06, |
| "loss": 2.749, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.5674061433447098, |
| "grad_norm": 58.654640197753906, |
| "learning_rate": 4.564240790655886e-06, |
| "loss": 3.3467, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.568259385665529, |
| "grad_norm": 52.830665588378906, |
| "learning_rate": 4.555256064690027e-06, |
| "loss": 3.0654, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.5691126279863481, |
| "grad_norm": 39.432003021240234, |
| "learning_rate": 4.546271338724169e-06, |
| "loss": 2.9551, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.5699658703071673, |
| "grad_norm": 19.742292404174805, |
| "learning_rate": 4.537286612758311e-06, |
| "loss": 2.8438, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.5708191126279863, |
| "grad_norm": 31.633556365966797, |
| "learning_rate": 4.528301886792453e-06, |
| "loss": 2.6396, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.5716723549488054, |
| "grad_norm": 58.045066833496094, |
| "learning_rate": 4.519317160826595e-06, |
| "loss": 2.7832, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5725255972696246, |
| "grad_norm": 55.10057067871094, |
| "learning_rate": 4.510332434860737e-06, |
| "loss": 3.0498, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.5733788395904437, |
| "grad_norm": 41.77906799316406, |
| "learning_rate": 4.5013477088948796e-06, |
| "loss": 2.9658, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.5742320819112628, |
| "grad_norm": 27.726163864135742, |
| "learning_rate": 4.492362982929021e-06, |
| "loss": 2.7373, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.5750853242320819, |
| "grad_norm": 16.946115493774414, |
| "learning_rate": 4.483378256963163e-06, |
| "loss": 3.0039, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.575938566552901, |
| "grad_norm": 21.34795379638672, |
| "learning_rate": 4.474393530997305e-06, |
| "loss": 3.1045, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.5767918088737202, |
| "grad_norm": 45.67304229736328, |
| "learning_rate": 4.465408805031447e-06, |
| "loss": 2.7686, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.5776450511945392, |
| "grad_norm": 34.049530029296875, |
| "learning_rate": 4.456424079065589e-06, |
| "loss": 2.4395, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.5784982935153583, |
| "grad_norm": 36.776790618896484, |
| "learning_rate": 4.44743935309973e-06, |
| "loss": 2.8105, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.5793515358361775, |
| "grad_norm": 17.602291107177734, |
| "learning_rate": 4.438454627133873e-06, |
| "loss": 2.9131, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.5802047781569966, |
| "grad_norm": 27.473234176635742, |
| "learning_rate": 4.429469901168015e-06, |
| "loss": 2.8359, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5810580204778157, |
| "grad_norm": 22.257198333740234, |
| "learning_rate": 4.420485175202157e-06, |
| "loss": 2.4365, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.5819112627986348, |
| "grad_norm": 53.0062141418457, |
| "learning_rate": 4.411500449236299e-06, |
| "loss": 2.2266, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.5827645051194539, |
| "grad_norm": 35.180240631103516, |
| "learning_rate": 4.402515723270441e-06, |
| "loss": 2.6904, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.5836177474402731, |
| "grad_norm": 28.43410873413086, |
| "learning_rate": 4.393530997304583e-06, |
| "loss": 2.4229, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.5844709897610921, |
| "grad_norm": 18.973915100097656, |
| "learning_rate": 4.384546271338724e-06, |
| "loss": 2.5684, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.5853242320819113, |
| "grad_norm": 66.25029754638672, |
| "learning_rate": 4.375561545372867e-06, |
| "loss": 2.3496, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.5861774744027304, |
| "grad_norm": 32.91408157348633, |
| "learning_rate": 4.366576819407008e-06, |
| "loss": 2.3613, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.5870307167235495, |
| "grad_norm": 27.794586181640625, |
| "learning_rate": 4.3575920934411504e-06, |
| "loss": 3.0039, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.5878839590443686, |
| "grad_norm": 33.872276306152344, |
| "learning_rate": 4.348607367475292e-06, |
| "loss": 2.5781, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.5887372013651877, |
| "grad_norm": 23.393707275390625, |
| "learning_rate": 4.339622641509435e-06, |
| "loss": 2.4434, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5895904436860068, |
| "grad_norm": 27.395784378051758, |
| "learning_rate": 4.330637915543576e-06, |
| "loss": 2.3867, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.590443686006826, |
| "grad_norm": 27.155609130859375, |
| "learning_rate": 4.321653189577718e-06, |
| "loss": 2.7422, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.591296928327645, |
| "grad_norm": 25.652048110961914, |
| "learning_rate": 4.31266846361186e-06, |
| "loss": 2.4365, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.5921501706484642, |
| "grad_norm": 22.866825103759766, |
| "learning_rate": 4.303683737646002e-06, |
| "loss": 2.7734, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.5930034129692833, |
| "grad_norm": 43.10763931274414, |
| "learning_rate": 4.294699011680144e-06, |
| "loss": 2.5938, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.5938566552901023, |
| "grad_norm": 24.86405372619629, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 2.2871, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.5947098976109215, |
| "grad_norm": 25.735193252563477, |
| "learning_rate": 4.276729559748428e-06, |
| "loss": 2.4424, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.5955631399317406, |
| "grad_norm": 17.094524383544922, |
| "learning_rate": 4.26774483378257e-06, |
| "loss": 3.0898, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.5964163822525598, |
| "grad_norm": 32.05537796020508, |
| "learning_rate": 4.258760107816712e-06, |
| "loss": 3.0322, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.5972696245733788, |
| "grad_norm": 19.903535842895508, |
| "learning_rate": 4.249775381850854e-06, |
| "loss": 2.2871, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5981228668941979, |
| "grad_norm": 28.79990005493164, |
| "learning_rate": 4.240790655884996e-06, |
| "loss": 2.6797, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.5989761092150171, |
| "grad_norm": 13.19858169555664, |
| "learning_rate": 4.231805929919138e-06, |
| "loss": 2.417, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.5998293515358362, |
| "grad_norm": 23.84611701965332, |
| "learning_rate": 4.222821203953279e-06, |
| "loss": 2.5889, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.6006825938566553, |
| "grad_norm": 47.766387939453125, |
| "learning_rate": 4.213836477987422e-06, |
| "loss": 2.335, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.6015358361774744, |
| "grad_norm": 41.32902526855469, |
| "learning_rate": 4.204851752021563e-06, |
| "loss": 2.9268, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.6023890784982935, |
| "grad_norm": 27.452489852905273, |
| "learning_rate": 4.1958670260557055e-06, |
| "loss": 2.2168, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.6032423208191127, |
| "grad_norm": 19.42645263671875, |
| "learning_rate": 4.186882300089848e-06, |
| "loss": 2.5527, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.6040955631399317, |
| "grad_norm": 21.670026779174805, |
| "learning_rate": 4.17789757412399e-06, |
| "loss": 2.3047, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.6049488054607508, |
| "grad_norm": 41.55127716064453, |
| "learning_rate": 4.168912848158132e-06, |
| "loss": 2.7568, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.60580204778157, |
| "grad_norm": 18.455894470214844, |
| "learning_rate": 4.159928122192273e-06, |
| "loss": 2.1895, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.606655290102389, |
| "grad_norm": 32.20492172241211, |
| "learning_rate": 4.150943396226416e-06, |
| "loss": 2.6816, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.6075085324232082, |
| "grad_norm": 21.960777282714844, |
| "learning_rate": 4.141958670260557e-06, |
| "loss": 2.7148, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.6083617747440273, |
| "grad_norm": 18.987390518188477, |
| "learning_rate": 4.132973944294699e-06, |
| "loss": 2.4961, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.6092150170648464, |
| "grad_norm": 19.618938446044922, |
| "learning_rate": 4.123989218328841e-06, |
| "loss": 2.1768, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.6100682593856656, |
| "grad_norm": 20.22203254699707, |
| "learning_rate": 4.1150044923629835e-06, |
| "loss": 2.5034, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.6109215017064846, |
| "grad_norm": 25.868797302246094, |
| "learning_rate": 4.1060197663971255e-06, |
| "loss": 2.2061, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.6117747440273038, |
| "grad_norm": 19.47433090209961, |
| "learning_rate": 4.097035040431267e-06, |
| "loss": 3.0068, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.6126279863481229, |
| "grad_norm": 34.66838836669922, |
| "learning_rate": 4.08805031446541e-06, |
| "loss": 2.7236, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.613481228668942, |
| "grad_norm": 59.0142822265625, |
| "learning_rate": 4.079065588499551e-06, |
| "loss": 3.1172, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.6143344709897611, |
| "grad_norm": 64.44792938232422, |
| "learning_rate": 4.070080862533693e-06, |
| "loss": 2.5957, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6151877133105802, |
| "grad_norm": 16.721546173095703, |
| "learning_rate": 4.061096136567835e-06, |
| "loss": 2.8066, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.6160409556313993, |
| "grad_norm": 24.300310134887695, |
| "learning_rate": 4.052111410601977e-06, |
| "loss": 2.7002, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.6168941979522184, |
| "grad_norm": 34.62942123413086, |
| "learning_rate": 4.043126684636119e-06, |
| "loss": 2.9609, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.6177474402730375, |
| "grad_norm": 26.634056091308594, |
| "learning_rate": 4.0341419586702606e-06, |
| "loss": 2.3535, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.6186006825938567, |
| "grad_norm": 40.439910888671875, |
| "learning_rate": 4.025157232704403e-06, |
| "loss": 2.9062, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.6194539249146758, |
| "grad_norm": 17.941150665283203, |
| "learning_rate": 4.016172506738545e-06, |
| "loss": 2.7451, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.6203071672354948, |
| "grad_norm": 14.031157493591309, |
| "learning_rate": 4.007187780772687e-06, |
| "loss": 2.5586, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.621160409556314, |
| "grad_norm": 12.27566146850586, |
| "learning_rate": 3.998203054806828e-06, |
| "loss": 2.3438, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.6220136518771331, |
| "grad_norm": 13.139644622802734, |
| "learning_rate": 3.989218328840971e-06, |
| "loss": 2.4688, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.6228668941979523, |
| "grad_norm": 16.500751495361328, |
| "learning_rate": 3.980233602875112e-06, |
| "loss": 2.7188, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6237201365187713, |
| "grad_norm": 28.81122398376465, |
| "learning_rate": 3.971248876909254e-06, |
| "loss": 2.3291, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.6245733788395904, |
| "grad_norm": 25.21991539001465, |
| "learning_rate": 3.962264150943396e-06, |
| "loss": 2.5391, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.6254266211604096, |
| "grad_norm": 29.478809356689453, |
| "learning_rate": 3.9532794249775385e-06, |
| "loss": 2.7256, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.6262798634812287, |
| "grad_norm": 20.742538452148438, |
| "learning_rate": 3.944294699011681e-06, |
| "loss": 3.0352, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.6271331058020477, |
| "grad_norm": 94.33541107177734, |
| "learning_rate": 3.935309973045822e-06, |
| "loss": 2.3477, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.6279863481228669, |
| "grad_norm": 14.510876655578613, |
| "learning_rate": 3.926325247079965e-06, |
| "loss": 2.127, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.628839590443686, |
| "grad_norm": 21.58650016784668, |
| "learning_rate": 3.917340521114106e-06, |
| "loss": 2.2949, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.6296928327645052, |
| "grad_norm": 17.82122802734375, |
| "learning_rate": 3.908355795148248e-06, |
| "loss": 2.8574, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.6305460750853242, |
| "grad_norm": 18.0192813873291, |
| "learning_rate": 3.89937106918239e-06, |
| "loss": 2.3711, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.6313993174061433, |
| "grad_norm": 24.10041618347168, |
| "learning_rate": 3.890386343216532e-06, |
| "loss": 2.9199, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6322525597269625, |
| "grad_norm": 27.862274169921875, |
| "learning_rate": 3.881401617250674e-06, |
| "loss": 2.8037, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.6331058020477816, |
| "grad_norm": 16.650089263916016, |
| "learning_rate": 3.872416891284816e-06, |
| "loss": 2.2358, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.6339590443686007, |
| "grad_norm": 24.472097396850586, |
| "learning_rate": 3.8634321653189586e-06, |
| "loss": 2.3711, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.6348122866894198, |
| "grad_norm": 14.919700622558594, |
| "learning_rate": 3.8544474393531e-06, |
| "loss": 2.7002, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.6356655290102389, |
| "grad_norm": 13.569048881530762, |
| "learning_rate": 3.845462713387242e-06, |
| "loss": 2.3213, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.636518771331058, |
| "grad_norm": 24.573429107666016, |
| "learning_rate": 3.836477987421384e-06, |
| "loss": 2.7637, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.6373720136518771, |
| "grad_norm": 18.409570693969727, |
| "learning_rate": 3.827493261455526e-06, |
| "loss": 2.4854, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.6382252559726962, |
| "grad_norm": 14.060251235961914, |
| "learning_rate": 3.818508535489668e-06, |
| "loss": 2.5479, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.6390784982935154, |
| "grad_norm": 22.34339714050293, |
| "learning_rate": 3.80952380952381e-06, |
| "loss": 2.5029, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.6399317406143344, |
| "grad_norm": 19.189834594726562, |
| "learning_rate": 3.800539083557952e-06, |
| "loss": 2.126, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6407849829351536, |
| "grad_norm": 51.201881408691406, |
| "learning_rate": 3.791554357592094e-06, |
| "loss": 2.3213, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.6416382252559727, |
| "grad_norm": 35.899330139160156, |
| "learning_rate": 3.7825696316262357e-06, |
| "loss": 2.7754, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.6424914675767918, |
| "grad_norm": 25.565492630004883, |
| "learning_rate": 3.7735849056603777e-06, |
| "loss": 2.5352, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.643344709897611, |
| "grad_norm": 17.710491180419922, |
| "learning_rate": 3.76460017969452e-06, |
| "loss": 2.4473, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.64419795221843, |
| "grad_norm": 30.0711612701416, |
| "learning_rate": 3.7556154537286615e-06, |
| "loss": 2.9932, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.6450511945392492, |
| "grad_norm": 33.850616455078125, |
| "learning_rate": 3.746630727762803e-06, |
| "loss": 2.5303, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.6459044368600683, |
| "grad_norm": 29.517227172851562, |
| "learning_rate": 3.7376460017969457e-06, |
| "loss": 2.1904, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.6467576791808873, |
| "grad_norm": 35.89356994628906, |
| "learning_rate": 3.7286612758310873e-06, |
| "loss": 3.1494, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.6476109215017065, |
| "grad_norm": 42.21514129638672, |
| "learning_rate": 3.7196765498652294e-06, |
| "loss": 2.5229, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.6484641638225256, |
| "grad_norm": 17.243484497070312, |
| "learning_rate": 3.710691823899371e-06, |
| "loss": 2.9141, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.6493174061433447, |
| "grad_norm": 24.45879364013672, |
| "learning_rate": 3.7017070979335136e-06, |
| "loss": 2.208, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.6501706484641638, |
| "grad_norm": 21.869504928588867, |
| "learning_rate": 3.6927223719676553e-06, |
| "loss": 2.5908, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.6510238907849829, |
| "grad_norm": 18.739221572875977, |
| "learning_rate": 3.683737646001797e-06, |
| "loss": 2.5801, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.6518771331058021, |
| "grad_norm": 26.45047378540039, |
| "learning_rate": 3.6747529200359395e-06, |
| "loss": 2.8115, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.6527303754266212, |
| "grad_norm": 19.305646896362305, |
| "learning_rate": 3.665768194070081e-06, |
| "loss": 2.1982, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.6535836177474402, |
| "grad_norm": 46.113304138183594, |
| "learning_rate": 3.656783468104223e-06, |
| "loss": 3.0215, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.6544368600682594, |
| "grad_norm": 46.685699462890625, |
| "learning_rate": 3.647798742138365e-06, |
| "loss": 2.7422, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.6552901023890785, |
| "grad_norm": 20.04216957092285, |
| "learning_rate": 3.6388140161725074e-06, |
| "loss": 2.8691, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.6561433447098977, |
| "grad_norm": 18.197967529296875, |
| "learning_rate": 3.629829290206649e-06, |
| "loss": 2.9648, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.6569965870307167, |
| "grad_norm": 20.51030731201172, |
| "learning_rate": 3.6208445642407907e-06, |
| "loss": 2.209, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6578498293515358, |
| "grad_norm": 16.829713821411133, |
| "learning_rate": 3.6118598382749332e-06, |
| "loss": 2.5537, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.658703071672355, |
| "grad_norm": 21.681400299072266, |
| "learning_rate": 3.602875112309075e-06, |
| "loss": 2.7969, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.659556313993174, |
| "grad_norm": 25.944387435913086, |
| "learning_rate": 3.5938903863432166e-06, |
| "loss": 2.6719, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.6604095563139932, |
| "grad_norm": 51.966121673583984, |
| "learning_rate": 3.5849056603773586e-06, |
| "loss": 2.8584, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.6612627986348123, |
| "grad_norm": 18.35639190673828, |
| "learning_rate": 3.5759209344115007e-06, |
| "loss": 2.3818, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.6621160409556314, |
| "grad_norm": 18.182228088378906, |
| "learning_rate": 3.566936208445643e-06, |
| "loss": 2.2686, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.6629692832764505, |
| "grad_norm": 16.234655380249023, |
| "learning_rate": 3.5579514824797845e-06, |
| "loss": 2.2441, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.6638225255972696, |
| "grad_norm": 34.618080139160156, |
| "learning_rate": 3.548966756513927e-06, |
| "loss": 3.0039, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.6646757679180887, |
| "grad_norm": 20.892868041992188, |
| "learning_rate": 3.5399820305480687e-06, |
| "loss": 2.5215, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.6655290102389079, |
| "grad_norm": 21.178865432739258, |
| "learning_rate": 3.5309973045822103e-06, |
| "loss": 2.126, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6663822525597269, |
| "grad_norm": 15.986398696899414, |
| "learning_rate": 3.5220125786163524e-06, |
| "loss": 2.4512, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.6672354948805461, |
| "grad_norm": 18.40003204345703, |
| "learning_rate": 3.5130278526504945e-06, |
| "loss": 2.3096, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.6680887372013652, |
| "grad_norm": 28.919540405273438, |
| "learning_rate": 3.5040431266846366e-06, |
| "loss": 2.292, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.6689419795221843, |
| "grad_norm": 20.11212158203125, |
| "learning_rate": 3.4950584007187783e-06, |
| "loss": 2.3701, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.6697952218430034, |
| "grad_norm": 22.81437110900879, |
| "learning_rate": 3.4860736747529208e-06, |
| "loss": 2.2822, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.6706484641638225, |
| "grad_norm": 46.97941970825195, |
| "learning_rate": 3.4770889487870624e-06, |
| "loss": 2.252, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.6715017064846417, |
| "grad_norm": 21.700454711914062, |
| "learning_rate": 3.468104222821204e-06, |
| "loss": 2.3457, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.6723549488054608, |
| "grad_norm": 18.99515151977539, |
| "learning_rate": 3.4591194968553458e-06, |
| "loss": 2.2598, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.6732081911262798, |
| "grad_norm": 17.171161651611328, |
| "learning_rate": 3.4501347708894883e-06, |
| "loss": 2.3037, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.674061433447099, |
| "grad_norm": 19.60438346862793, |
| "learning_rate": 3.44115004492363e-06, |
| "loss": 2.5396, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6749146757679181, |
| "grad_norm": 21.116247177124023, |
| "learning_rate": 3.432165318957772e-06, |
| "loss": 2.4775, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.6757679180887372, |
| "grad_norm": 17.95282745361328, |
| "learning_rate": 3.423180592991914e-06, |
| "loss": 2.2549, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.6766211604095563, |
| "grad_norm": 37.449928283691406, |
| "learning_rate": 3.4141958670260562e-06, |
| "loss": 2.3965, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.6774744027303754, |
| "grad_norm": 16.76734161376953, |
| "learning_rate": 3.405211141060198e-06, |
| "loss": 2.4512, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.6783276450511946, |
| "grad_norm": 16.08464813232422, |
| "learning_rate": 3.3962264150943395e-06, |
| "loss": 2.2861, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.6791808873720137, |
| "grad_norm": 22.598896026611328, |
| "learning_rate": 3.387241689128482e-06, |
| "loss": 2.0, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.6800341296928327, |
| "grad_norm": 30.063629150390625, |
| "learning_rate": 3.3782569631626237e-06, |
| "loss": 2.3936, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.6808873720136519, |
| "grad_norm": 15.586498260498047, |
| "learning_rate": 3.369272237196766e-06, |
| "loss": 2.3857, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.681740614334471, |
| "grad_norm": 47.479331970214844, |
| "learning_rate": 3.3602875112309075e-06, |
| "loss": 2.5176, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.6825938566552902, |
| "grad_norm": 29.33695411682129, |
| "learning_rate": 3.35130278526505e-06, |
| "loss": 2.6494, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6834470989761092, |
| "grad_norm": 49.52241516113281, |
| "learning_rate": 3.3423180592991917e-06, |
| "loss": 2.666, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.6843003412969283, |
| "grad_norm": 30.229463577270508, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 2.4409, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.6851535836177475, |
| "grad_norm": 18.470956802368164, |
| "learning_rate": 3.324348607367476e-06, |
| "loss": 2.4453, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.6860068259385665, |
| "grad_norm": 68.55836486816406, |
| "learning_rate": 3.3153638814016175e-06, |
| "loss": 2.7158, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.6868600682593856, |
| "grad_norm": 23.803735733032227, |
| "learning_rate": 3.3063791554357596e-06, |
| "loss": 2.4375, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.6877133105802048, |
| "grad_norm": 61.04603958129883, |
| "learning_rate": 3.2973944294699013e-06, |
| "loss": 2.7373, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.6885665529010239, |
| "grad_norm": 40.623558044433594, |
| "learning_rate": 3.2884097035040433e-06, |
| "loss": 2.3535, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.689419795221843, |
| "grad_norm": 44.683109283447266, |
| "learning_rate": 3.2794249775381854e-06, |
| "loss": 2.5059, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.6902730375426621, |
| "grad_norm": 23.082717895507812, |
| "learning_rate": 3.270440251572327e-06, |
| "loss": 2.3584, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.6911262798634812, |
| "grad_norm": 31.238815307617188, |
| "learning_rate": 3.2614555256064696e-06, |
| "loss": 2.2852, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6919795221843004, |
| "grad_norm": 16.465364456176758, |
| "learning_rate": 3.2524707996406113e-06, |
| "loss": 2.9834, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.6928327645051194, |
| "grad_norm": 16.760278701782227, |
| "learning_rate": 3.243486073674753e-06, |
| "loss": 2.457, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.6936860068259386, |
| "grad_norm": 24.745893478393555, |
| "learning_rate": 3.234501347708895e-06, |
| "loss": 2.2397, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.6945392491467577, |
| "grad_norm": 22.20821762084961, |
| "learning_rate": 3.225516621743037e-06, |
| "loss": 2.3564, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.6953924914675768, |
| "grad_norm": 34.78770065307617, |
| "learning_rate": 3.216531895777179e-06, |
| "loss": 2.7852, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.6962457337883959, |
| "grad_norm": 17.53403091430664, |
| "learning_rate": 3.207547169811321e-06, |
| "loss": 2.1963, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.697098976109215, |
| "grad_norm": 17.553361892700195, |
| "learning_rate": 3.1985624438454634e-06, |
| "loss": 2.3682, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.6979522184300341, |
| "grad_norm": 16.97439956665039, |
| "learning_rate": 3.189577717879605e-06, |
| "loss": 2.4453, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.6988054607508533, |
| "grad_norm": 24.054723739624023, |
| "learning_rate": 3.1805929919137467e-06, |
| "loss": 2.6201, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.6996587030716723, |
| "grad_norm": 16.930429458618164, |
| "learning_rate": 3.171608265947889e-06, |
| "loss": 2.1953, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.7005119453924915, |
| "grad_norm": 36.9339599609375, |
| "learning_rate": 3.162623539982031e-06, |
| "loss": 2.6875, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.7013651877133106, |
| "grad_norm": 20.728759765625, |
| "learning_rate": 3.153638814016173e-06, |
| "loss": 2.7783, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.7022184300341296, |
| "grad_norm": 32.32343673706055, |
| "learning_rate": 3.1446540880503146e-06, |
| "loss": 3.0117, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.7030716723549488, |
| "grad_norm": 27.187162399291992, |
| "learning_rate": 3.1356693620844567e-06, |
| "loss": 2.3828, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.7039249146757679, |
| "grad_norm": 19.576969146728516, |
| "learning_rate": 3.126684636118599e-06, |
| "loss": 2.1914, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.7047781569965871, |
| "grad_norm": 19.67875099182129, |
| "learning_rate": 3.1176999101527405e-06, |
| "loss": 2.374, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.7056313993174061, |
| "grad_norm": 51.096885681152344, |
| "learning_rate": 3.108715184186882e-06, |
| "loss": 3.0088, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.7064846416382252, |
| "grad_norm": 16.370563507080078, |
| "learning_rate": 3.0997304582210247e-06, |
| "loss": 2.3779, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.7073378839590444, |
| "grad_norm": 30.746566772460938, |
| "learning_rate": 3.0907457322551663e-06, |
| "loss": 2.6177, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.7081911262798635, |
| "grad_norm": 30.207935333251953, |
| "learning_rate": 3.0817610062893084e-06, |
| "loss": 2.4072, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.7090443686006825, |
| "grad_norm": 29.116840362548828, |
| "learning_rate": 3.0727762803234505e-06, |
| "loss": 2.0972, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.7098976109215017, |
| "grad_norm": 26.794530868530273, |
| "learning_rate": 3.0637915543575926e-06, |
| "loss": 2.333, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.7107508532423208, |
| "grad_norm": 18.36752700805664, |
| "learning_rate": 3.0548068283917343e-06, |
| "loss": 2.6602, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.71160409556314, |
| "grad_norm": 22.439231872558594, |
| "learning_rate": 3.045822102425876e-06, |
| "loss": 2.7002, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.712457337883959, |
| "grad_norm": 51.330665588378906, |
| "learning_rate": 3.0368373764600184e-06, |
| "loss": 2.8867, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.7133105802047781, |
| "grad_norm": 24.982059478759766, |
| "learning_rate": 3.02785265049416e-06, |
| "loss": 2.415, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.7141638225255973, |
| "grad_norm": 40.434627532958984, |
| "learning_rate": 3.018867924528302e-06, |
| "loss": 2.1611, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.7150170648464164, |
| "grad_norm": 43.82883834838867, |
| "learning_rate": 3.009883198562444e-06, |
| "loss": 2.3281, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.7158703071672355, |
| "grad_norm": 43.11958312988281, |
| "learning_rate": 3.0008984725965864e-06, |
| "loss": 2.1943, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.7167235494880546, |
| "grad_norm": 17.046653747558594, |
| "learning_rate": 2.991913746630728e-06, |
| "loss": 2.3779, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.7175767918088737, |
| "grad_norm": 20.74578857421875, |
| "learning_rate": 2.9829290206648697e-06, |
| "loss": 2.3047, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.7184300341296929, |
| "grad_norm": 48.83142852783203, |
| "learning_rate": 2.9739442946990122e-06, |
| "loss": 2.5049, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.7192832764505119, |
| "grad_norm": 42.406375885009766, |
| "learning_rate": 2.964959568733154e-06, |
| "loss": 2.5117, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.7201365187713311, |
| "grad_norm": 55.99921798706055, |
| "learning_rate": 2.9559748427672955e-06, |
| "loss": 2.6826, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.7209897610921502, |
| "grad_norm": 40.09762191772461, |
| "learning_rate": 2.9469901168014376e-06, |
| "loss": 2.6357, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.7218430034129693, |
| "grad_norm": 22.537761688232422, |
| "learning_rate": 2.9380053908355797e-06, |
| "loss": 3.0352, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.7226962457337884, |
| "grad_norm": 16.046295166015625, |
| "learning_rate": 2.929020664869722e-06, |
| "loss": 2.4648, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.7235494880546075, |
| "grad_norm": 20.101272583007812, |
| "learning_rate": 2.9200359389038635e-06, |
| "loss": 2.335, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.7244027303754266, |
| "grad_norm": 40.1048469543457, |
| "learning_rate": 2.911051212938006e-06, |
| "loss": 2.3594, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.7252559726962458, |
| "grad_norm": 47.38935852050781, |
| "learning_rate": 2.9020664869721477e-06, |
| "loss": 2.0469, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7261092150170648, |
| "grad_norm": 24.103130340576172, |
| "learning_rate": 2.8930817610062893e-06, |
| "loss": 1.9531, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.726962457337884, |
| "grad_norm": 22.3649845123291, |
| "learning_rate": 2.8840970350404314e-06, |
| "loss": 2.3027, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.7278156996587031, |
| "grad_norm": 36.170406341552734, |
| "learning_rate": 2.8751123090745735e-06, |
| "loss": 2.2656, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.7286689419795221, |
| "grad_norm": 30.652938842773438, |
| "learning_rate": 2.8661275831087156e-06, |
| "loss": 2.3604, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.7295221843003413, |
| "grad_norm": 26.317873001098633, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 2.1924, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.7303754266211604, |
| "grad_norm": 52.10211944580078, |
| "learning_rate": 2.8481581311769998e-06, |
| "loss": 2.3828, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.7312286689419796, |
| "grad_norm": 22.478017807006836, |
| "learning_rate": 2.8391734052111414e-06, |
| "loss": 2.5449, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.7320819112627986, |
| "grad_norm": 21.12700080871582, |
| "learning_rate": 2.830188679245283e-06, |
| "loss": 1.9521, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.7329351535836177, |
| "grad_norm": 20.594982147216797, |
| "learning_rate": 2.821203953279425e-06, |
| "loss": 2.1904, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.7337883959044369, |
| "grad_norm": 21.516183853149414, |
| "learning_rate": 2.8122192273135673e-06, |
| "loss": 2.5332, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.734641638225256, |
| "grad_norm": 25.919029235839844, |
| "learning_rate": 2.803234501347709e-06, |
| "loss": 2.3511, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.735494880546075, |
| "grad_norm": 20.421133041381836, |
| "learning_rate": 2.794249775381851e-06, |
| "loss": 2.3271, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.7363481228668942, |
| "grad_norm": 23.43620491027832, |
| "learning_rate": 2.785265049415993e-06, |
| "loss": 2.0537, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.7372013651877133, |
| "grad_norm": 17.840322494506836, |
| "learning_rate": 2.776280323450135e-06, |
| "loss": 2.1328, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.7380546075085325, |
| "grad_norm": 33.441341400146484, |
| "learning_rate": 2.767295597484277e-06, |
| "loss": 2.5332, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.7389078498293515, |
| "grad_norm": 21.710899353027344, |
| "learning_rate": 2.7583108715184185e-06, |
| "loss": 2.665, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.7397610921501706, |
| "grad_norm": 23.452516555786133, |
| "learning_rate": 2.749326145552561e-06, |
| "loss": 2.1514, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.7406143344709898, |
| "grad_norm": 17.572235107421875, |
| "learning_rate": 2.7403414195867027e-06, |
| "loss": 1.7598, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.7414675767918089, |
| "grad_norm": 32.19004440307617, |
| "learning_rate": 2.731356693620845e-06, |
| "loss": 2.1289, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.742320819112628, |
| "grad_norm": 22.450124740600586, |
| "learning_rate": 2.722371967654987e-06, |
| "loss": 2.4658, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7431740614334471, |
| "grad_norm": 19.069766998291016, |
| "learning_rate": 2.713387241689129e-06, |
| "loss": 2.2441, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.7440273037542662, |
| "grad_norm": 23.94462776184082, |
| "learning_rate": 2.7044025157232706e-06, |
| "loss": 2.1426, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.7448805460750854, |
| "grad_norm": 36.22708511352539, |
| "learning_rate": 2.6954177897574123e-06, |
| "loss": 2.228, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.7457337883959044, |
| "grad_norm": 21.32388687133789, |
| "learning_rate": 2.686433063791555e-06, |
| "loss": 2.8701, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.7465870307167235, |
| "grad_norm": 23.661392211914062, |
| "learning_rate": 2.6774483378256965e-06, |
| "loss": 2.4297, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.7474402730375427, |
| "grad_norm": 20.54587173461914, |
| "learning_rate": 2.6684636118598386e-06, |
| "loss": 2.9219, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.7482935153583617, |
| "grad_norm": 24.399003982543945, |
| "learning_rate": 2.6594788858939802e-06, |
| "loss": 2.9365, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.7491467576791809, |
| "grad_norm": 36.97280502319336, |
| "learning_rate": 2.6504941599281223e-06, |
| "loss": 2.376, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 22.310462951660156, |
| "learning_rate": 2.6415094339622644e-06, |
| "loss": 2.4043, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.7508532423208191, |
| "grad_norm": 27.330747604370117, |
| "learning_rate": 2.632524707996406e-06, |
| "loss": 2.3359, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.7517064846416383, |
| "grad_norm": 20.034399032592773, |
| "learning_rate": 2.6235399820305486e-06, |
| "loss": 2.8594, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.7525597269624573, |
| "grad_norm": 26.571035385131836, |
| "learning_rate": 2.6145552560646903e-06, |
| "loss": 2.0596, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.7534129692832765, |
| "grad_norm": 21.897262573242188, |
| "learning_rate": 2.605570530098832e-06, |
| "loss": 2.4683, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.7542662116040956, |
| "grad_norm": 17.84102439880371, |
| "learning_rate": 2.596585804132974e-06, |
| "loss": 2.1904, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.7551194539249146, |
| "grad_norm": 24.632801055908203, |
| "learning_rate": 2.587601078167116e-06, |
| "loss": 2.1152, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.7559726962457338, |
| "grad_norm": 18.96522331237793, |
| "learning_rate": 2.578616352201258e-06, |
| "loss": 2.5303, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.7568259385665529, |
| "grad_norm": 37.09746170043945, |
| "learning_rate": 2.5696316262354e-06, |
| "loss": 2.335, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.757679180887372, |
| "grad_norm": 31.17850112915039, |
| "learning_rate": 2.5606469002695424e-06, |
| "loss": 2.5059, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.7585324232081911, |
| "grad_norm": 36.89558029174805, |
| "learning_rate": 2.551662174303684e-06, |
| "loss": 2.7695, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.7593856655290102, |
| "grad_norm": 20.49338150024414, |
| "learning_rate": 2.5426774483378257e-06, |
| "loss": 2.6846, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.7602389078498294, |
| "grad_norm": 25.983245849609375, |
| "learning_rate": 2.533692722371968e-06, |
| "loss": 2.4092, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.7610921501706485, |
| "grad_norm": 27.642595291137695, |
| "learning_rate": 2.52470799640611e-06, |
| "loss": 2.0869, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.7619453924914675, |
| "grad_norm": 23.749832153320312, |
| "learning_rate": 2.515723270440252e-06, |
| "loss": 2.125, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.7627986348122867, |
| "grad_norm": 28.204421997070312, |
| "learning_rate": 2.5067385444743936e-06, |
| "loss": 2.1562, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.7636518771331058, |
| "grad_norm": 30.861194610595703, |
| "learning_rate": 2.4977538185085357e-06, |
| "loss": 3.0098, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.764505119453925, |
| "grad_norm": 19.33704376220703, |
| "learning_rate": 2.488769092542678e-06, |
| "loss": 2.4072, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.765358361774744, |
| "grad_norm": 27.73824119567871, |
| "learning_rate": 2.4797843665768195e-06, |
| "loss": 2.2256, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.7662116040955631, |
| "grad_norm": 34.70376968383789, |
| "learning_rate": 2.4707996406109616e-06, |
| "loss": 2.4248, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.7670648464163823, |
| "grad_norm": 29.114303588867188, |
| "learning_rate": 2.4618149146451032e-06, |
| "loss": 2.7227, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.7679180887372014, |
| "grad_norm": 29.096269607543945, |
| "learning_rate": 2.4528301886792453e-06, |
| "loss": 2.4404, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7687713310580204, |
| "grad_norm": 58.764366149902344, |
| "learning_rate": 2.4438454627133874e-06, |
| "loss": 2.2744, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.7696245733788396, |
| "grad_norm": 30.78232192993164, |
| "learning_rate": 2.4348607367475295e-06, |
| "loss": 2.5342, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.7704778156996587, |
| "grad_norm": 19.90322494506836, |
| "learning_rate": 2.4258760107816716e-06, |
| "loss": 2.1504, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.7713310580204779, |
| "grad_norm": 37.51405715942383, |
| "learning_rate": 2.4168912848158133e-06, |
| "loss": 2.1846, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.7721843003412969, |
| "grad_norm": 16.94844627380371, |
| "learning_rate": 2.4079065588499553e-06, |
| "loss": 2.3501, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.773037542662116, |
| "grad_norm": 28.434951782226562, |
| "learning_rate": 2.398921832884097e-06, |
| "loss": 2.0811, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.7738907849829352, |
| "grad_norm": 16.979534149169922, |
| "learning_rate": 2.389937106918239e-06, |
| "loss": 2.3936, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.7747440273037542, |
| "grad_norm": 22.91834831237793, |
| "learning_rate": 2.380952380952381e-06, |
| "loss": 2.9756, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.7755972696245734, |
| "grad_norm": 22.019638061523438, |
| "learning_rate": 2.3719676549865233e-06, |
| "loss": 2.2075, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.7764505119453925, |
| "grad_norm": 30.827585220336914, |
| "learning_rate": 2.3629829290206654e-06, |
| "loss": 2.4131, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7773037542662116, |
| "grad_norm": 30.66614532470703, |
| "learning_rate": 2.353998203054807e-06, |
| "loss": 2.2217, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.7781569965870307, |
| "grad_norm": 25.246841430664062, |
| "learning_rate": 2.345013477088949e-06, |
| "loss": 2.3623, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.7790102389078498, |
| "grad_norm": 24.9078311920166, |
| "learning_rate": 2.3360287511230908e-06, |
| "loss": 2.2676, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.7798634812286689, |
| "grad_norm": 19.189767837524414, |
| "learning_rate": 2.327044025157233e-06, |
| "loss": 2.2119, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.7807167235494881, |
| "grad_norm": 21.641551971435547, |
| "learning_rate": 2.3180592991913745e-06, |
| "loss": 2.0054, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.7815699658703071, |
| "grad_norm": 30.206771850585938, |
| "learning_rate": 2.3090745732255166e-06, |
| "loss": 2.165, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.7824232081911263, |
| "grad_norm": 26.020099639892578, |
| "learning_rate": 2.3000898472596587e-06, |
| "loss": 2.5234, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.7832764505119454, |
| "grad_norm": 18.706939697265625, |
| "learning_rate": 2.291105121293801e-06, |
| "loss": 2.2012, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.7841296928327645, |
| "grad_norm": 20.241901397705078, |
| "learning_rate": 2.282120395327943e-06, |
| "loss": 2.0015, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.7849829351535836, |
| "grad_norm": 19.32655906677246, |
| "learning_rate": 2.2731356693620846e-06, |
| "loss": 1.5703, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7858361774744027, |
| "grad_norm": 22.523513793945312, |
| "learning_rate": 2.2641509433962266e-06, |
| "loss": 2.585, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.7866894197952219, |
| "grad_norm": 36.01555252075195, |
| "learning_rate": 2.2551662174303683e-06, |
| "loss": 1.6499, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.787542662116041, |
| "grad_norm": 27.907073974609375, |
| "learning_rate": 2.2461814914645104e-06, |
| "loss": 2.3066, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.78839590443686, |
| "grad_norm": 19.171142578125, |
| "learning_rate": 2.2371967654986525e-06, |
| "loss": 2.0391, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.7892491467576792, |
| "grad_norm": 35.75897216796875, |
| "learning_rate": 2.2282120395327946e-06, |
| "loss": 2.6104, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.7901023890784983, |
| "grad_norm": 33.932472229003906, |
| "learning_rate": 2.2192273135669367e-06, |
| "loss": 2.6094, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.7909556313993175, |
| "grad_norm": 25.582454681396484, |
| "learning_rate": 2.2102425876010783e-06, |
| "loss": 2.4424, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.7918088737201365, |
| "grad_norm": 27.64750099182129, |
| "learning_rate": 2.2012578616352204e-06, |
| "loss": 2.418, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.7926621160409556, |
| "grad_norm": 345.17022705078125, |
| "learning_rate": 2.192273135669362e-06, |
| "loss": 2.2305, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.7935153583617748, |
| "grad_norm": 54.37593460083008, |
| "learning_rate": 2.183288409703504e-06, |
| "loss": 3.1572, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7943686006825939, |
| "grad_norm": 29.86464500427246, |
| "learning_rate": 2.174303683737646e-06, |
| "loss": 2.5684, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.7952218430034129, |
| "grad_norm": 22.760496139526367, |
| "learning_rate": 2.165318957771788e-06, |
| "loss": 2.5928, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.7960750853242321, |
| "grad_norm": 18.5999755859375, |
| "learning_rate": 2.15633423180593e-06, |
| "loss": 2.4062, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.7969283276450512, |
| "grad_norm": 26.545793533325195, |
| "learning_rate": 2.147349505840072e-06, |
| "loss": 2.1387, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.7977815699658704, |
| "grad_norm": 17.7139949798584, |
| "learning_rate": 2.138364779874214e-06, |
| "loss": 2.6348, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.7986348122866894, |
| "grad_norm": 29.3115234375, |
| "learning_rate": 2.129380053908356e-06, |
| "loss": 2.624, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.7994880546075085, |
| "grad_norm": 33.964839935302734, |
| "learning_rate": 2.120395327942498e-06, |
| "loss": 2.4248, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.8003412969283277, |
| "grad_norm": 32.854530334472656, |
| "learning_rate": 2.1114106019766396e-06, |
| "loss": 2.75, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.8011945392491467, |
| "grad_norm": 19.812353134155273, |
| "learning_rate": 2.1024258760107817e-06, |
| "loss": 2.209, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.8020477815699659, |
| "grad_norm": 14.995081901550293, |
| "learning_rate": 2.093441150044924e-06, |
| "loss": 2.2539, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.802901023890785, |
| "grad_norm": 40.92326736450195, |
| "learning_rate": 2.084456424079066e-06, |
| "loss": 2.3555, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.8037542662116041, |
| "grad_norm": 40.27710723876953, |
| "learning_rate": 2.075471698113208e-06, |
| "loss": 2.4297, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.8046075085324232, |
| "grad_norm": 25.924610137939453, |
| "learning_rate": 2.0664869721473496e-06, |
| "loss": 2.5576, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.8054607508532423, |
| "grad_norm": 39.17571258544922, |
| "learning_rate": 2.0575022461814917e-06, |
| "loss": 2.5166, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.8063139931740614, |
| "grad_norm": 24.386545181274414, |
| "learning_rate": 2.0485175202156334e-06, |
| "loss": 2.5127, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.8071672354948806, |
| "grad_norm": 23.48984718322754, |
| "learning_rate": 2.0395327942497755e-06, |
| "loss": 2.5996, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.8080204778156996, |
| "grad_norm": 14.407853126525879, |
| "learning_rate": 2.0305480682839176e-06, |
| "loss": 1.7422, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.8088737201365188, |
| "grad_norm": 25.272546768188477, |
| "learning_rate": 2.0215633423180597e-06, |
| "loss": 1.9629, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.8097269624573379, |
| "grad_norm": 41.10344696044922, |
| "learning_rate": 2.0125786163522013e-06, |
| "loss": 2.4893, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.810580204778157, |
| "grad_norm": 34.03232955932617, |
| "learning_rate": 2.0035938903863434e-06, |
| "loss": 2.3135, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.8114334470989761, |
| "grad_norm": 19.308574676513672, |
| "learning_rate": 1.9946091644204855e-06, |
| "loss": 2.3301, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.8122866894197952, |
| "grad_norm": 19.482486724853516, |
| "learning_rate": 1.985624438454627e-06, |
| "loss": 2.3931, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.8131399317406144, |
| "grad_norm": 19.74332046508789, |
| "learning_rate": 1.9766397124887693e-06, |
| "loss": 2.082, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.8139931740614335, |
| "grad_norm": 24.09305763244629, |
| "learning_rate": 1.967654986522911e-06, |
| "loss": 2.5146, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.8148464163822525, |
| "grad_norm": 23.39406394958496, |
| "learning_rate": 1.958670260557053e-06, |
| "loss": 2.9053, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.8156996587030717, |
| "grad_norm": 18.08087921142578, |
| "learning_rate": 1.949685534591195e-06, |
| "loss": 1.9541, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.8165529010238908, |
| "grad_norm": 22.059133529663086, |
| "learning_rate": 1.940700808625337e-06, |
| "loss": 3.1865, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.8174061433447098, |
| "grad_norm": 16.29061508178711, |
| "learning_rate": 1.9317160826594793e-06, |
| "loss": 2.0771, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.818259385665529, |
| "grad_norm": 23.810237884521484, |
| "learning_rate": 1.922731356693621e-06, |
| "loss": 2.8477, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.8191126279863481, |
| "grad_norm": 30.86567497253418, |
| "learning_rate": 1.913746630727763e-06, |
| "loss": 2.082, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.8199658703071673, |
| "grad_norm": 24.14322853088379, |
| "learning_rate": 1.904761904761905e-06, |
| "loss": 2.9727, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.8208191126279863, |
| "grad_norm": 20.15610694885254, |
| "learning_rate": 1.895777178796047e-06, |
| "loss": 2.4414, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.8216723549488054, |
| "grad_norm": 105.63774108886719, |
| "learning_rate": 1.8867924528301889e-06, |
| "loss": 2.3037, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.8225255972696246, |
| "grad_norm": 17.520694732666016, |
| "learning_rate": 1.8778077268643308e-06, |
| "loss": 2.2676, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.8233788395904437, |
| "grad_norm": 26.543331146240234, |
| "learning_rate": 1.8688230008984728e-06, |
| "loss": 2.2607, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.8242320819112628, |
| "grad_norm": 22.674585342407227, |
| "learning_rate": 1.8598382749326147e-06, |
| "loss": 2.4355, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.8250853242320819, |
| "grad_norm": 29.76778793334961, |
| "learning_rate": 1.8508535489667568e-06, |
| "loss": 2.7432, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.825938566552901, |
| "grad_norm": 32.59353256225586, |
| "learning_rate": 1.8418688230008985e-06, |
| "loss": 2.6934, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.8267918088737202, |
| "grad_norm": 24.909629821777344, |
| "learning_rate": 1.8328840970350406e-06, |
| "loss": 1.8232, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.8276450511945392, |
| "grad_norm": 46.205047607421875, |
| "learning_rate": 1.8238993710691824e-06, |
| "loss": 2.8975, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.8284982935153583, |
| "grad_norm": 17.16266441345215, |
| "learning_rate": 1.8149146451033245e-06, |
| "loss": 2.5156, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.8293515358361775, |
| "grad_norm": 104.19178009033203, |
| "learning_rate": 1.8059299191374666e-06, |
| "loss": 2.5391, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.8302047781569966, |
| "grad_norm": 13.162457466125488, |
| "learning_rate": 1.7969451931716083e-06, |
| "loss": 1.8076, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.8310580204778157, |
| "grad_norm": 16.426860809326172, |
| "learning_rate": 1.7879604672057504e-06, |
| "loss": 2.6025, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.8319112627986348, |
| "grad_norm": 18.29142189025879, |
| "learning_rate": 1.7789757412398922e-06, |
| "loss": 2.667, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.8327645051194539, |
| "grad_norm": 17.840328216552734, |
| "learning_rate": 1.7699910152740343e-06, |
| "loss": 2.4551, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.8336177474402731, |
| "grad_norm": 17.80030059814453, |
| "learning_rate": 1.7610062893081762e-06, |
| "loss": 2.5186, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.8344709897610921, |
| "grad_norm": 34.99458694458008, |
| "learning_rate": 1.7520215633423183e-06, |
| "loss": 2.4072, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.8353242320819113, |
| "grad_norm": 17.811429977416992, |
| "learning_rate": 1.7430368373764604e-06, |
| "loss": 2.1963, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.8361774744027304, |
| "grad_norm": 19.649438858032227, |
| "learning_rate": 1.734052111410602e-06, |
| "loss": 2.3896, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.8370307167235495, |
| "grad_norm": 27.297197341918945, |
| "learning_rate": 1.7250673854447441e-06, |
| "loss": 2.375, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.8378839590443686, |
| "grad_norm": 17.75628662109375, |
| "learning_rate": 1.716082659478886e-06, |
| "loss": 1.9648, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.8387372013651877, |
| "grad_norm": 19.516536712646484, |
| "learning_rate": 1.7070979335130281e-06, |
| "loss": 2.3828, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.8395904436860068, |
| "grad_norm": 15.936653137207031, |
| "learning_rate": 1.6981132075471698e-06, |
| "loss": 2.2891, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.840443686006826, |
| "grad_norm": 32.02035903930664, |
| "learning_rate": 1.6891284815813119e-06, |
| "loss": 2.0713, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.841296928327645, |
| "grad_norm": 24.9638671875, |
| "learning_rate": 1.6801437556154537e-06, |
| "loss": 2.3364, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.8421501706484642, |
| "grad_norm": 27.20418930053711, |
| "learning_rate": 1.6711590296495958e-06, |
| "loss": 2.2085, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.8430034129692833, |
| "grad_norm": 18.280986785888672, |
| "learning_rate": 1.662174303683738e-06, |
| "loss": 1.8594, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.8438566552901023, |
| "grad_norm": 52.79808807373047, |
| "learning_rate": 1.6531895777178798e-06, |
| "loss": 2.7715, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.8447098976109215, |
| "grad_norm": 22.51861572265625, |
| "learning_rate": 1.6442048517520217e-06, |
| "loss": 2.7188, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8455631399317406, |
| "grad_norm": 47.933372497558594, |
| "learning_rate": 1.6352201257861635e-06, |
| "loss": 3.1797, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.8464163822525598, |
| "grad_norm": 20.896522521972656, |
| "learning_rate": 1.6262353998203056e-06, |
| "loss": 2.2227, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.8472696245733788, |
| "grad_norm": 24.34409523010254, |
| "learning_rate": 1.6172506738544475e-06, |
| "loss": 2.2549, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.8481228668941979, |
| "grad_norm": 34.13801574707031, |
| "learning_rate": 1.6082659478885896e-06, |
| "loss": 2.7314, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.8489761092150171, |
| "grad_norm": 20.989660263061523, |
| "learning_rate": 1.5992812219227317e-06, |
| "loss": 2.7432, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.8498293515358362, |
| "grad_norm": 20.589107513427734, |
| "learning_rate": 1.5902964959568734e-06, |
| "loss": 2.4668, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.8506825938566553, |
| "grad_norm": 28.15553092956543, |
| "learning_rate": 1.5813117699910154e-06, |
| "loss": 2.7314, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.8515358361774744, |
| "grad_norm": 15.138993263244629, |
| "learning_rate": 1.5723270440251573e-06, |
| "loss": 2.3604, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.8523890784982935, |
| "grad_norm": 41.964927673339844, |
| "learning_rate": 1.5633423180592994e-06, |
| "loss": 2.4551, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.8532423208191127, |
| "grad_norm": 24.446157455444336, |
| "learning_rate": 1.554357592093441e-06, |
| "loss": 2.75, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8540955631399317, |
| "grad_norm": 23.93151092529297, |
| "learning_rate": 1.5453728661275832e-06, |
| "loss": 2.4736, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.8549488054607508, |
| "grad_norm": 29.804882049560547, |
| "learning_rate": 1.5363881401617253e-06, |
| "loss": 2.0986, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.85580204778157, |
| "grad_norm": 29.341312408447266, |
| "learning_rate": 1.5274034141958671e-06, |
| "loss": 2.1719, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.856655290102389, |
| "grad_norm": 19.525724411010742, |
| "learning_rate": 1.5184186882300092e-06, |
| "loss": 2.2354, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.8575085324232082, |
| "grad_norm": 20.416091918945312, |
| "learning_rate": 1.509433962264151e-06, |
| "loss": 2.5049, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.8583617747440273, |
| "grad_norm": 12.879405975341797, |
| "learning_rate": 1.5004492362982932e-06, |
| "loss": 2.001, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.8592150170648464, |
| "grad_norm": 15.967265129089355, |
| "learning_rate": 1.4914645103324349e-06, |
| "loss": 2.4395, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.8600682593856656, |
| "grad_norm": 27.89084243774414, |
| "learning_rate": 1.482479784366577e-06, |
| "loss": 2.1934, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.8609215017064846, |
| "grad_norm": 26.392724990844727, |
| "learning_rate": 1.4734950584007188e-06, |
| "loss": 2.5176, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.8617747440273038, |
| "grad_norm": 40.049503326416016, |
| "learning_rate": 1.464510332434861e-06, |
| "loss": 2.1807, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.8626279863481229, |
| "grad_norm": 16.034893035888672, |
| "learning_rate": 1.455525606469003e-06, |
| "loss": 2.3408, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.863481228668942, |
| "grad_norm": 16.689733505249023, |
| "learning_rate": 1.4465408805031447e-06, |
| "loss": 2.1162, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.8643344709897611, |
| "grad_norm": 19.43463134765625, |
| "learning_rate": 1.4375561545372868e-06, |
| "loss": 2.292, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.8651877133105802, |
| "grad_norm": 38.333335876464844, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 2.9941, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.8660409556313993, |
| "grad_norm": 20.772680282592773, |
| "learning_rate": 1.4195867026055707e-06, |
| "loss": 2.0811, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.8668941979522184, |
| "grad_norm": 15.94913387298584, |
| "learning_rate": 1.4106019766397126e-06, |
| "loss": 2.7207, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.8677474402730375, |
| "grad_norm": 22.80181884765625, |
| "learning_rate": 1.4016172506738545e-06, |
| "loss": 2.5518, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.8686006825938567, |
| "grad_norm": 24.729825973510742, |
| "learning_rate": 1.3926325247079966e-06, |
| "loss": 2.1699, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.8694539249146758, |
| "grad_norm": 34.91142654418945, |
| "learning_rate": 1.3836477987421384e-06, |
| "loss": 2.6553, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.8703071672354948, |
| "grad_norm": 25.28569793701172, |
| "learning_rate": 1.3746630727762805e-06, |
| "loss": 2.25, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.871160409556314, |
| "grad_norm": 22.64327621459961, |
| "learning_rate": 1.3656783468104224e-06, |
| "loss": 1.7666, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.8720136518771331, |
| "grad_norm": 20.240745544433594, |
| "learning_rate": 1.3566936208445645e-06, |
| "loss": 2.9014, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.8728668941979523, |
| "grad_norm": 20.950401306152344, |
| "learning_rate": 1.3477088948787062e-06, |
| "loss": 2.4102, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.8737201365187713, |
| "grad_norm": 36.6849479675293, |
| "learning_rate": 1.3387241689128482e-06, |
| "loss": 2.8906, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.8745733788395904, |
| "grad_norm": 31.71055030822754, |
| "learning_rate": 1.3297394429469901e-06, |
| "loss": 2.417, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.8754266211604096, |
| "grad_norm": 26.664091110229492, |
| "learning_rate": 1.3207547169811322e-06, |
| "loss": 2.6895, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.8762798634812287, |
| "grad_norm": 21.710546493530273, |
| "learning_rate": 1.3117699910152743e-06, |
| "loss": 2.2451, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.8771331058020477, |
| "grad_norm": 23.674776077270508, |
| "learning_rate": 1.302785265049416e-06, |
| "loss": 2.6943, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.8779863481228669, |
| "grad_norm": 17.062026977539062, |
| "learning_rate": 1.293800539083558e-06, |
| "loss": 2.5938, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.878839590443686, |
| "grad_norm": 19.835830688476562, |
| "learning_rate": 1.2848158131177e-06, |
| "loss": 1.8984, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.8796928327645052, |
| "grad_norm": 22.001405715942383, |
| "learning_rate": 1.275831087151842e-06, |
| "loss": 2.3711, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.8805460750853242, |
| "grad_norm": 19.773300170898438, |
| "learning_rate": 1.266846361185984e-06, |
| "loss": 2.2451, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.8813993174061433, |
| "grad_norm": 20.110618591308594, |
| "learning_rate": 1.257861635220126e-06, |
| "loss": 2.0088, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.8822525597269625, |
| "grad_norm": 18.571706771850586, |
| "learning_rate": 1.2488769092542679e-06, |
| "loss": 2.6729, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.8831058020477816, |
| "grad_norm": 22.619691848754883, |
| "learning_rate": 1.2398921832884097e-06, |
| "loss": 2.3291, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.8839590443686007, |
| "grad_norm": 15.416295051574707, |
| "learning_rate": 1.2309074573225516e-06, |
| "loss": 2.1562, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.8848122866894198, |
| "grad_norm": 18.906389236450195, |
| "learning_rate": 1.2219227313566937e-06, |
| "loss": 2.2881, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.8856655290102389, |
| "grad_norm": 26.780014038085938, |
| "learning_rate": 1.2129380053908358e-06, |
| "loss": 2.1504, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.886518771331058, |
| "grad_norm": 23.079744338989258, |
| "learning_rate": 1.2039532794249777e-06, |
| "loss": 2.5264, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.8873720136518771, |
| "grad_norm": 18.029769897460938, |
| "learning_rate": 1.1949685534591195e-06, |
| "loss": 2.4541, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.8882252559726962, |
| "grad_norm": 30.483293533325195, |
| "learning_rate": 1.1859838274932616e-06, |
| "loss": 2.7627, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.8890784982935154, |
| "grad_norm": 18.396657943725586, |
| "learning_rate": 1.1769991015274035e-06, |
| "loss": 2.1699, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.8899317406143344, |
| "grad_norm": 42.24234390258789, |
| "learning_rate": 1.1680143755615454e-06, |
| "loss": 2.4609, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.8907849829351536, |
| "grad_norm": 37.315792083740234, |
| "learning_rate": 1.1590296495956873e-06, |
| "loss": 1.9644, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.8916382252559727, |
| "grad_norm": 18.035730361938477, |
| "learning_rate": 1.1500449236298294e-06, |
| "loss": 2.2441, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.8924914675767918, |
| "grad_norm": 33.729732513427734, |
| "learning_rate": 1.1410601976639714e-06, |
| "loss": 2.7402, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.893344709897611, |
| "grad_norm": 17.152904510498047, |
| "learning_rate": 1.1320754716981133e-06, |
| "loss": 2.2881, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.89419795221843, |
| "grad_norm": 30.483760833740234, |
| "learning_rate": 1.1230907457322552e-06, |
| "loss": 2.1406, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.8950511945392492, |
| "grad_norm": 22.160011291503906, |
| "learning_rate": 1.1141060197663973e-06, |
| "loss": 2.4102, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.8959044368600683, |
| "grad_norm": 21.861427307128906, |
| "learning_rate": 1.1051212938005392e-06, |
| "loss": 2.377, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.8967576791808873, |
| "grad_norm": 16.11711883544922, |
| "learning_rate": 1.096136567834681e-06, |
| "loss": 2.2207, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.8976109215017065, |
| "grad_norm": 16.705957412719727, |
| "learning_rate": 1.087151841868823e-06, |
| "loss": 2.4766, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.8984641638225256, |
| "grad_norm": 21.10558319091797, |
| "learning_rate": 1.078167115902965e-06, |
| "loss": 2.8291, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.8993174061433447, |
| "grad_norm": 16.919170379638672, |
| "learning_rate": 1.069182389937107e-06, |
| "loss": 2.3379, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.9001706484641638, |
| "grad_norm": 16.1108455657959, |
| "learning_rate": 1.060197663971249e-06, |
| "loss": 2.0049, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.9010238907849829, |
| "grad_norm": 14.90361213684082, |
| "learning_rate": 1.0512129380053909e-06, |
| "loss": 2.0732, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.9018771331058021, |
| "grad_norm": 22.117961883544922, |
| "learning_rate": 1.042228212039533e-06, |
| "loss": 2.4478, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.9027303754266212, |
| "grad_norm": 17.26648712158203, |
| "learning_rate": 1.0332434860736748e-06, |
| "loss": 2.4834, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.9035836177474402, |
| "grad_norm": 29.885637283325195, |
| "learning_rate": 1.0242587601078167e-06, |
| "loss": 2.5439, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.9044368600682594, |
| "grad_norm": 36.06587600708008, |
| "learning_rate": 1.0152740341419588e-06, |
| "loss": 2.3203, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.9052901023890785, |
| "grad_norm": 23.25389862060547, |
| "learning_rate": 1.0062893081761007e-06, |
| "loss": 1.8232, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.9061433447098977, |
| "grad_norm": 19.698678970336914, |
| "learning_rate": 9.973045822102428e-07, |
| "loss": 2.1807, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.9069965870307167, |
| "grad_norm": 20.899768829345703, |
| "learning_rate": 9.883198562443846e-07, |
| "loss": 2.0088, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.9078498293515358, |
| "grad_norm": 17.271106719970703, |
| "learning_rate": 9.793351302785265e-07, |
| "loss": 2.4834, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.908703071672355, |
| "grad_norm": 18.44869041442871, |
| "learning_rate": 9.703504043126686e-07, |
| "loss": 2.2676, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.909556313993174, |
| "grad_norm": 22.660959243774414, |
| "learning_rate": 9.613656783468105e-07, |
| "loss": 2.5537, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.9104095563139932, |
| "grad_norm": 27.0996150970459, |
| "learning_rate": 9.523809523809525e-07, |
| "loss": 2.2266, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.9112627986348123, |
| "grad_norm": 34.511531829833984, |
| "learning_rate": 9.433962264150944e-07, |
| "loss": 2.3223, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.9121160409556314, |
| "grad_norm": 27.928035736083984, |
| "learning_rate": 9.344115004492364e-07, |
| "loss": 2.6211, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.9129692832764505, |
| "grad_norm": 27.532997131347656, |
| "learning_rate": 9.254267744833784e-07, |
| "loss": 2.2324, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.9138225255972696, |
| "grad_norm": 14.844276428222656, |
| "learning_rate": 9.164420485175203e-07, |
| "loss": 2.2422, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.9146757679180887, |
| "grad_norm": 21.835037231445312, |
| "learning_rate": 9.074573225516623e-07, |
| "loss": 2.2881, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.9155290102389079, |
| "grad_norm": 19.69972801208496, |
| "learning_rate": 8.984725965858041e-07, |
| "loss": 2.1846, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.9163822525597269, |
| "grad_norm": 15.349184036254883, |
| "learning_rate": 8.894878706199461e-07, |
| "loss": 1.7319, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.9172354948805461, |
| "grad_norm": 19.928043365478516, |
| "learning_rate": 8.805031446540881e-07, |
| "loss": 2.4658, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.9180887372013652, |
| "grad_norm": 17.366472244262695, |
| "learning_rate": 8.715184186882302e-07, |
| "loss": 2.2046, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.9189419795221843, |
| "grad_norm": 29.670156478881836, |
| "learning_rate": 8.625336927223721e-07, |
| "loss": 2.3613, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.9197952218430034, |
| "grad_norm": 30.919553756713867, |
| "learning_rate": 8.535489667565141e-07, |
| "loss": 1.9961, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.9206484641638225, |
| "grad_norm": 14.773041725158691, |
| "learning_rate": 8.445642407906559e-07, |
| "loss": 1.9248, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.9215017064846417, |
| "grad_norm": 24.538284301757812, |
| "learning_rate": 8.355795148247979e-07, |
| "loss": 2.3975, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.9223549488054608, |
| "grad_norm": 19.382570266723633, |
| "learning_rate": 8.265947888589399e-07, |
| "loss": 2.1125, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.9232081911262798, |
| "grad_norm": 19.567092895507812, |
| "learning_rate": 8.176100628930818e-07, |
| "loss": 2.7686, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.924061433447099, |
| "grad_norm": 29.195287704467773, |
| "learning_rate": 8.086253369272238e-07, |
| "loss": 2.8613, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.9249146757679181, |
| "grad_norm": 20.474294662475586, |
| "learning_rate": 7.996406109613658e-07, |
| "loss": 2.2227, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.9257679180887372, |
| "grad_norm": 18.846967697143555, |
| "learning_rate": 7.906558849955077e-07, |
| "loss": 2.5205, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.9266211604095563, |
| "grad_norm": 23.362014770507812, |
| "learning_rate": 7.816711590296497e-07, |
| "loss": 2.3252, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.9274744027303754, |
| "grad_norm": 23.076448440551758, |
| "learning_rate": 7.726864330637916e-07, |
| "loss": 1.7588, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.9283276450511946, |
| "grad_norm": 24.485366821289062, |
| "learning_rate": 7.637017070979336e-07, |
| "loss": 2.397, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.9291808873720137, |
| "grad_norm": 21.237762451171875, |
| "learning_rate": 7.547169811320755e-07, |
| "loss": 2.7598, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.9300341296928327, |
| "grad_norm": 26.516183853149414, |
| "learning_rate": 7.457322551662174e-07, |
| "loss": 2.7891, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.9308873720136519, |
| "grad_norm": 21.238964080810547, |
| "learning_rate": 7.367475292003594e-07, |
| "loss": 2.5303, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.931740614334471, |
| "grad_norm": 30.594709396362305, |
| "learning_rate": 7.277628032345015e-07, |
| "loss": 1.8975, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.9325938566552902, |
| "grad_norm": 35.4166145324707, |
| "learning_rate": 7.187780772686434e-07, |
| "loss": 1.9258, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.9334470989761092, |
| "grad_norm": 22.17268943786621, |
| "learning_rate": 7.097933513027854e-07, |
| "loss": 2.4492, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.9343003412969283, |
| "grad_norm": 17.64724349975586, |
| "learning_rate": 7.008086253369272e-07, |
| "loss": 2.5547, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.9351535836177475, |
| "grad_norm": 24.73906135559082, |
| "learning_rate": 6.918238993710692e-07, |
| "loss": 2.5254, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.9360068259385665, |
| "grad_norm": 26.848501205444336, |
| "learning_rate": 6.828391734052112e-07, |
| "loss": 2.6846, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.9368600682593856, |
| "grad_norm": 20.17809295654297, |
| "learning_rate": 6.738544474393531e-07, |
| "loss": 2.2471, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.9377133105802048, |
| "grad_norm": 22.41636085510254, |
| "learning_rate": 6.648697214734951e-07, |
| "loss": 1.8472, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.9385665529010239, |
| "grad_norm": 39.55388641357422, |
| "learning_rate": 6.558849955076372e-07, |
| "loss": 2.3638, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.939419795221843, |
| "grad_norm": 33.42490005493164, |
| "learning_rate": 6.46900269541779e-07, |
| "loss": 2.4688, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.9402730375426621, |
| "grad_norm": 21.77603530883789, |
| "learning_rate": 6.37915543575921e-07, |
| "loss": 2.2363, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.9411262798634812, |
| "grad_norm": 24.46465301513672, |
| "learning_rate": 6.28930817610063e-07, |
| "loss": 2.2061, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.9419795221843004, |
| "grad_norm": 19.78148651123047, |
| "learning_rate": 6.199460916442049e-07, |
| "loss": 2.5908, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.9428327645051194, |
| "grad_norm": 31.342111587524414, |
| "learning_rate": 6.109613656783469e-07, |
| "loss": 2.5615, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.9436860068259386, |
| "grad_norm": 13.717397689819336, |
| "learning_rate": 6.019766397124888e-07, |
| "loss": 2.0635, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.9445392491467577, |
| "grad_norm": 21.84761619567871, |
| "learning_rate": 5.929919137466308e-07, |
| "loss": 2.293, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.9453924914675768, |
| "grad_norm": 37.024166107177734, |
| "learning_rate": 5.840071877807727e-07, |
| "loss": 2.249, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.9462457337883959, |
| "grad_norm": 17.425418853759766, |
| "learning_rate": 5.750224618149147e-07, |
| "loss": 2.3672, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.947098976109215, |
| "grad_norm": 26.030006408691406, |
| "learning_rate": 5.660377358490567e-07, |
| "loss": 2.124, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.9479522184300341, |
| "grad_norm": 22.431434631347656, |
| "learning_rate": 5.570530098831986e-07, |
| "loss": 2.3848, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.9488054607508533, |
| "grad_norm": 19.168577194213867, |
| "learning_rate": 5.480682839173405e-07, |
| "loss": 2.1758, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.9496587030716723, |
| "grad_norm": 17.07505226135254, |
| "learning_rate": 5.390835579514825e-07, |
| "loss": 2.374, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.9505119453924915, |
| "grad_norm": 26.080429077148438, |
| "learning_rate": 5.300988319856245e-07, |
| "loss": 2.3418, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.9513651877133106, |
| "grad_norm": 21.243762969970703, |
| "learning_rate": 5.211141060197665e-07, |
| "loss": 2.1953, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.9522184300341296, |
| "grad_norm": 15.045170783996582, |
| "learning_rate": 5.121293800539083e-07, |
| "loss": 2.1016, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.9530716723549488, |
| "grad_norm": 23.7006778717041, |
| "learning_rate": 5.031446540880503e-07, |
| "loss": 2.8184, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.9539249146757679, |
| "grad_norm": 18.883411407470703, |
| "learning_rate": 4.941599281221923e-07, |
| "loss": 2.6475, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.9547781569965871, |
| "grad_norm": 25.889921188354492, |
| "learning_rate": 4.851752021563343e-07, |
| "loss": 2.0693, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.9556313993174061, |
| "grad_norm": 22.17185401916504, |
| "learning_rate": 4.7619047619047623e-07, |
| "loss": 2.4834, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.9564846416382252, |
| "grad_norm": 22.72771644592285, |
| "learning_rate": 4.672057502246182e-07, |
| "loss": 2.5225, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.9573378839590444, |
| "grad_norm": 14.462482452392578, |
| "learning_rate": 4.5822102425876014e-07, |
| "loss": 2.0977, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.9581911262798635, |
| "grad_norm": 16.800121307373047, |
| "learning_rate": 4.4923629829290207e-07, |
| "loss": 2.0635, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.9590443686006825, |
| "grad_norm": 31.013629913330078, |
| "learning_rate": 4.4025157232704405e-07, |
| "loss": 2.3926, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.9598976109215017, |
| "grad_norm": 16.524974822998047, |
| "learning_rate": 4.3126684636118604e-07, |
| "loss": 2.1387, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.9607508532423208, |
| "grad_norm": 25.04954719543457, |
| "learning_rate": 4.2228212039532797e-07, |
| "loss": 2.6875, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.96160409556314, |
| "grad_norm": 22.931171417236328, |
| "learning_rate": 4.1329739442946995e-07, |
| "loss": 2.2598, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.962457337883959, |
| "grad_norm": 18.490894317626953, |
| "learning_rate": 4.043126684636119e-07, |
| "loss": 2.0537, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.9633105802047781, |
| "grad_norm": 18.204050064086914, |
| "learning_rate": 3.9532794249775386e-07, |
| "loss": 2.5342, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.9641638225255973, |
| "grad_norm": 40.86081314086914, |
| "learning_rate": 3.863432165318958e-07, |
| "loss": 2.4844, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.9650170648464164, |
| "grad_norm": 14.383349418640137, |
| "learning_rate": 3.773584905660378e-07, |
| "loss": 1.7993, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.9658703071672355, |
| "grad_norm": 23.310863494873047, |
| "learning_rate": 3.683737646001797e-07, |
| "loss": 2.2402, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.9667235494880546, |
| "grad_norm": 18.63228416442871, |
| "learning_rate": 3.593890386343217e-07, |
| "loss": 1.998, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.9675767918088737, |
| "grad_norm": 82.3555908203125, |
| "learning_rate": 3.504043126684636e-07, |
| "loss": 2.5557, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.9684300341296929, |
| "grad_norm": 16.93051528930664, |
| "learning_rate": 3.414195867026056e-07, |
| "loss": 2.2803, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.9692832764505119, |
| "grad_norm": 21.192161560058594, |
| "learning_rate": 3.3243486073674753e-07, |
| "loss": 2.7246, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.9701365187713311, |
| "grad_norm": 18.740169525146484, |
| "learning_rate": 3.234501347708895e-07, |
| "loss": 2.7227, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.9709897610921502, |
| "grad_norm": 19.469411849975586, |
| "learning_rate": 3.144654088050315e-07, |
| "loss": 2.2979, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.9718430034129693, |
| "grad_norm": 18.27403450012207, |
| "learning_rate": 3.0548068283917343e-07, |
| "loss": 2.251, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.9726962457337884, |
| "grad_norm": 24.299030303955078, |
| "learning_rate": 2.964959568733154e-07, |
| "loss": 2.4541, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.9735494880546075, |
| "grad_norm": 24.530475616455078, |
| "learning_rate": 2.8751123090745734e-07, |
| "loss": 2.1475, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.9744027303754266, |
| "grad_norm": 25.455007553100586, |
| "learning_rate": 2.785265049415993e-07, |
| "loss": 2.3701, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.9752559726962458, |
| "grad_norm": 16.469362258911133, |
| "learning_rate": 2.6954177897574125e-07, |
| "loss": 1.9355, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.9761092150170648, |
| "grad_norm": 19.01254653930664, |
| "learning_rate": 2.6055705300988324e-07, |
| "loss": 2.3057, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.976962457337884, |
| "grad_norm": 19.243854522705078, |
| "learning_rate": 2.5157232704402517e-07, |
| "loss": 2.4736, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.9778156996587031, |
| "grad_norm": 22.926416397094727, |
| "learning_rate": 2.4258760107816715e-07, |
| "loss": 1.9111, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.9786689419795221, |
| "grad_norm": 21.436504364013672, |
| "learning_rate": 2.336028751123091e-07, |
| "loss": 2.5264, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.9795221843003413, |
| "grad_norm": 18.81846809387207, |
| "learning_rate": 2.2461814914645103e-07, |
| "loss": 2.2744, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.9803754266211604, |
| "grad_norm": 17.731666564941406, |
| "learning_rate": 2.1563342318059302e-07, |
| "loss": 2.3506, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.9812286689419796, |
| "grad_norm": 18.162921905517578, |
| "learning_rate": 2.0664869721473497e-07, |
| "loss": 2.3584, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.9820819112627986, |
| "grad_norm": 17.115859985351562, |
| "learning_rate": 1.9766397124887693e-07, |
| "loss": 2.3174, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.9829351535836177, |
| "grad_norm": 19.12236785888672, |
| "learning_rate": 1.886792452830189e-07, |
| "loss": 2.8047, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.9837883959044369, |
| "grad_norm": 46.08415603637695, |
| "learning_rate": 1.7969451931716084e-07, |
| "loss": 2.2979, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.984641638225256, |
| "grad_norm": 19.95178985595703, |
| "learning_rate": 1.707097933513028e-07, |
| "loss": 2.5273, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.985494880546075, |
| "grad_norm": 19.663936614990234, |
| "learning_rate": 1.6172506738544476e-07, |
| "loss": 2.1338, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.9863481228668942, |
| "grad_norm": 18.48255729675293, |
| "learning_rate": 1.5274034141958671e-07, |
| "loss": 2.2705, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.9872013651877133, |
| "grad_norm": 14.638534545898438, |
| "learning_rate": 1.4375561545372867e-07, |
| "loss": 1.9199, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.9880546075085325, |
| "grad_norm": 17.064104080200195, |
| "learning_rate": 1.3477088948787063e-07, |
| "loss": 2.3135, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.9889078498293515, |
| "grad_norm": 24.58094024658203, |
| "learning_rate": 1.2578616352201258e-07, |
| "loss": 2.5918, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.9897610921501706, |
| "grad_norm": 15.54403305053711, |
| "learning_rate": 1.1680143755615455e-07, |
| "loss": 2.0586, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.9906143344709898, |
| "grad_norm": 19.432096481323242, |
| "learning_rate": 1.0781671159029651e-07, |
| "loss": 2.0234, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.9914675767918089, |
| "grad_norm": 23.13845443725586, |
| "learning_rate": 9.883198562443847e-08, |
| "loss": 2.1523, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.992320819112628, |
| "grad_norm": 19.302988052368164, |
| "learning_rate": 8.984725965858042e-08, |
| "loss": 2.5889, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.9931740614334471, |
| "grad_norm": 22.22420883178711, |
| "learning_rate": 8.086253369272238e-08, |
| "loss": 2.4434, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.9940273037542662, |
| "grad_norm": 16.57465362548828, |
| "learning_rate": 7.187780772686433e-08, |
| "loss": 2.2539, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.9948805460750854, |
| "grad_norm": 29.630794525146484, |
| "learning_rate": 6.289308176100629e-08, |
| "loss": 2.3193, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.9957337883959044, |
| "grad_norm": 16.782909393310547, |
| "learning_rate": 5.3908355795148254e-08, |
| "loss": 1.9961, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.9965870307167235, |
| "grad_norm": 25.104806900024414, |
| "learning_rate": 4.492362982929021e-08, |
| "loss": 2.375, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.9974402730375427, |
| "grad_norm": 15.419062614440918, |
| "learning_rate": 3.593890386343217e-08, |
| "loss": 1.9033, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.9982935153583617, |
| "grad_norm": 18.09457015991211, |
| "learning_rate": 2.6954177897574127e-08, |
| "loss": 2.3037, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.9991467576791809, |
| "grad_norm": 20.33756446838379, |
| "learning_rate": 1.7969451931716084e-08, |
| "loss": 2.2764, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 22.51215362548828, |
| "learning_rate": 8.984725965858042e-09, |
| "loss": 2.6963, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 1172, |
| "total_flos": 1.0293976231418266e+18, |
| "train_loss": 2.8942469053300983, |
| "train_runtime": 765.0994, |
| "train_samples_per_second": 392.106, |
| "train_steps_per_second": 1.532 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1172, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0293976231418266e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|