| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.999815668202765, |
| "eval_steps": 500, |
| "global_step": 1808, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005529953917050691, |
| "grad_norm": 5.9790191650390625, |
| "learning_rate": 5.524861878453039e-08, |
| "loss": 0.8575, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0011059907834101382, |
| "grad_norm": 5.738432884216309, |
| "learning_rate": 1.1049723756906078e-07, |
| "loss": 0.8535, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0016589861751152074, |
| "grad_norm": 5.905203342437744, |
| "learning_rate": 1.6574585635359117e-07, |
| "loss": 0.8907, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0022119815668202765, |
| "grad_norm": 5.97217321395874, |
| "learning_rate": 2.2099447513812156e-07, |
| "loss": 0.8695, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0027649769585253456, |
| "grad_norm": 6.003969669342041, |
| "learning_rate": 2.7624309392265196e-07, |
| "loss": 0.8746, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0033179723502304147, |
| "grad_norm": 5.878486156463623, |
| "learning_rate": 3.3149171270718233e-07, |
| "loss": 0.866, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.003870967741935484, |
| "grad_norm": 5.74072265625, |
| "learning_rate": 3.867403314917127e-07, |
| "loss": 0.8351, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.004423963133640553, |
| "grad_norm": 5.6800150871276855, |
| "learning_rate": 4.419889502762431e-07, |
| "loss": 0.8534, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004976958525345622, |
| "grad_norm": 5.634627819061279, |
| "learning_rate": 4.972375690607735e-07, |
| "loss": 0.8592, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.005529953917050691, |
| "grad_norm": 5.71673059463501, |
| "learning_rate": 5.524861878453039e-07, |
| "loss": 0.8825, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00608294930875576, |
| "grad_norm": 5.533100605010986, |
| "learning_rate": 6.077348066298343e-07, |
| "loss": 0.8662, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0066359447004608295, |
| "grad_norm": 5.378372669219971, |
| "learning_rate": 6.629834254143647e-07, |
| "loss": 0.8688, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.007188940092165899, |
| "grad_norm": 5.282576560974121, |
| "learning_rate": 7.18232044198895e-07, |
| "loss": 0.8179, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.007741935483870968, |
| "grad_norm": 4.443999290466309, |
| "learning_rate": 7.734806629834254e-07, |
| "loss": 0.829, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.008294930875576038, |
| "grad_norm": 4.481590270996094, |
| "learning_rate": 8.287292817679559e-07, |
| "loss": 0.8441, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.008847926267281106, |
| "grad_norm": 4.04002571105957, |
| "learning_rate": 8.839779005524863e-07, |
| "loss": 0.7884, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.009400921658986176, |
| "grad_norm": 3.9912734031677246, |
| "learning_rate": 9.392265193370166e-07, |
| "loss": 0.7851, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.009953917050691244, |
| "grad_norm": 3.7548842430114746, |
| "learning_rate": 9.94475138121547e-07, |
| "loss": 0.7836, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.010506912442396314, |
| "grad_norm": 2.2732059955596924, |
| "learning_rate": 1.0497237569060774e-06, |
| "loss": 0.7572, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.011059907834101382, |
| "grad_norm": 2.244140625, |
| "learning_rate": 1.1049723756906078e-06, |
| "loss": 0.7637, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.011612903225806452, |
| "grad_norm": 2.1460845470428467, |
| "learning_rate": 1.160220994475138e-06, |
| "loss": 0.7849, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01216589861751152, |
| "grad_norm": 2.0333800315856934, |
| "learning_rate": 1.2154696132596686e-06, |
| "loss": 0.8008, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01271889400921659, |
| "grad_norm": 1.962172508239746, |
| "learning_rate": 1.270718232044199e-06, |
| "loss": 0.7914, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.013271889400921659, |
| "grad_norm": 1.9014993906021118, |
| "learning_rate": 1.3259668508287293e-06, |
| "loss": 0.754, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.013824884792626729, |
| "grad_norm": 1.5539250373840332, |
| "learning_rate": 1.3812154696132598e-06, |
| "loss": 0.7218, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.014377880184331797, |
| "grad_norm": 2.1412103176116943, |
| "learning_rate": 1.43646408839779e-06, |
| "loss": 0.7083, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.014930875576036867, |
| "grad_norm": 2.793086290359497, |
| "learning_rate": 1.4917127071823205e-06, |
| "loss": 0.7505, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.015483870967741935, |
| "grad_norm": 2.779872179031372, |
| "learning_rate": 1.5469613259668508e-06, |
| "loss": 0.7609, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.016036866359447004, |
| "grad_norm": 2.9033734798431396, |
| "learning_rate": 1.6022099447513815e-06, |
| "loss": 0.7388, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.016589861751152075, |
| "grad_norm": 2.6718666553497314, |
| "learning_rate": 1.6574585635359118e-06, |
| "loss": 0.7395, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.017142857142857144, |
| "grad_norm": 2.3672306537628174, |
| "learning_rate": 1.7127071823204422e-06, |
| "loss": 0.736, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.017695852534562212, |
| "grad_norm": 2.0016157627105713, |
| "learning_rate": 1.7679558011049725e-06, |
| "loss": 0.7169, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.01824884792626728, |
| "grad_norm": 1.755001425743103, |
| "learning_rate": 1.823204419889503e-06, |
| "loss": 0.7358, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.018801843317972352, |
| "grad_norm": 1.084032416343689, |
| "learning_rate": 1.8784530386740332e-06, |
| "loss": 0.654, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01935483870967742, |
| "grad_norm": 1.0225807428359985, |
| "learning_rate": 1.933701657458564e-06, |
| "loss": 0.6793, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01990783410138249, |
| "grad_norm": 1.0972223281860352, |
| "learning_rate": 1.988950276243094e-06, |
| "loss": 0.6625, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.020460829493087557, |
| "grad_norm": 1.1796579360961914, |
| "learning_rate": 2.0441988950276245e-06, |
| "loss": 0.6905, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.02101382488479263, |
| "grad_norm": 1.1285828351974487, |
| "learning_rate": 2.0994475138121547e-06, |
| "loss": 0.6433, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.021566820276497697, |
| "grad_norm": 1.0303235054016113, |
| "learning_rate": 2.1546961325966854e-06, |
| "loss": 0.637, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.022119815668202765, |
| "grad_norm": 0.9121658205986023, |
| "learning_rate": 2.2099447513812157e-06, |
| "loss": 0.6506, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.022672811059907833, |
| "grad_norm": 0.8387944102287292, |
| "learning_rate": 2.265193370165746e-06, |
| "loss": 0.6217, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.023225806451612905, |
| "grad_norm": 0.762744665145874, |
| "learning_rate": 2.320441988950276e-06, |
| "loss": 0.6131, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.023778801843317973, |
| "grad_norm": 0.8466107845306396, |
| "learning_rate": 2.375690607734807e-06, |
| "loss": 0.6313, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.02433179723502304, |
| "grad_norm": 0.7411535382270813, |
| "learning_rate": 2.430939226519337e-06, |
| "loss": 0.6178, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.02488479262672811, |
| "grad_norm": 0.7712140083312988, |
| "learning_rate": 2.486187845303868e-06, |
| "loss": 0.6218, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02543778801843318, |
| "grad_norm": 1.0630971193313599, |
| "learning_rate": 2.541436464088398e-06, |
| "loss": 0.6497, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.02599078341013825, |
| "grad_norm": 0.8186689615249634, |
| "learning_rate": 2.5966850828729284e-06, |
| "loss": 0.6493, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.026543778801843318, |
| "grad_norm": 0.7290191650390625, |
| "learning_rate": 2.6519337016574586e-06, |
| "loss": 0.6278, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.027096774193548386, |
| "grad_norm": 0.6419755816459656, |
| "learning_rate": 2.707182320441989e-06, |
| "loss": 0.6495, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.027649769585253458, |
| "grad_norm": 0.6034302711486816, |
| "learning_rate": 2.7624309392265196e-06, |
| "loss": 0.6262, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.028202764976958526, |
| "grad_norm": 0.7188774943351746, |
| "learning_rate": 2.81767955801105e-06, |
| "loss": 0.6416, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.028755760368663594, |
| "grad_norm": 0.7371018528938293, |
| "learning_rate": 2.87292817679558e-06, |
| "loss": 0.6181, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.029308755760368663, |
| "grad_norm": 0.7526310086250305, |
| "learning_rate": 2.9281767955801104e-06, |
| "loss": 0.6644, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.029861751152073734, |
| "grad_norm": 0.6002934575080872, |
| "learning_rate": 2.983425414364641e-06, |
| "loss": 0.57, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.030414746543778803, |
| "grad_norm": 0.535647451877594, |
| "learning_rate": 3.0386740331491713e-06, |
| "loss": 0.6158, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.03096774193548387, |
| "grad_norm": 0.5333572030067444, |
| "learning_rate": 3.0939226519337016e-06, |
| "loss": 0.6237, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.03152073732718894, |
| "grad_norm": 0.6033058166503906, |
| "learning_rate": 3.149171270718232e-06, |
| "loss": 0.5905, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.03207373271889401, |
| "grad_norm": 0.5824403166770935, |
| "learning_rate": 3.204419889502763e-06, |
| "loss": 0.5934, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.03262672811059908, |
| "grad_norm": 0.5389731526374817, |
| "learning_rate": 3.2596685082872933e-06, |
| "loss": 0.6149, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.03317972350230415, |
| "grad_norm": 0.5317662954330444, |
| "learning_rate": 3.3149171270718235e-06, |
| "loss": 0.5953, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.033732718894009216, |
| "grad_norm": 0.5464670658111572, |
| "learning_rate": 3.370165745856354e-06, |
| "loss": 0.5848, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.03428571428571429, |
| "grad_norm": 0.5087230205535889, |
| "learning_rate": 3.4254143646408845e-06, |
| "loss": 0.5912, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.03483870967741935, |
| "grad_norm": 0.4756256937980652, |
| "learning_rate": 3.4806629834254147e-06, |
| "loss": 0.5842, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.035391705069124424, |
| "grad_norm": 0.49870121479034424, |
| "learning_rate": 3.535911602209945e-06, |
| "loss": 0.6009, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.035944700460829496, |
| "grad_norm": 0.44439196586608887, |
| "learning_rate": 3.5911602209944757e-06, |
| "loss": 0.5832, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03649769585253456, |
| "grad_norm": 0.4260464608669281, |
| "learning_rate": 3.646408839779006e-06, |
| "loss": 0.5896, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03705069124423963, |
| "grad_norm": 0.4569395184516907, |
| "learning_rate": 3.7016574585635362e-06, |
| "loss": 0.6184, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.037603686635944704, |
| "grad_norm": 0.4692537486553192, |
| "learning_rate": 3.7569060773480665e-06, |
| "loss": 0.5623, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03815668202764977, |
| "grad_norm": 0.40080568194389343, |
| "learning_rate": 3.812154696132597e-06, |
| "loss": 0.5832, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03870967741935484, |
| "grad_norm": 0.47561773657798767, |
| "learning_rate": 3.867403314917128e-06, |
| "loss": 0.5801, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.039262672811059905, |
| "grad_norm": 0.47063136100769043, |
| "learning_rate": 3.922651933701658e-06, |
| "loss": 0.5913, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03981566820276498, |
| "grad_norm": 0.435703843832016, |
| "learning_rate": 3.977900552486188e-06, |
| "loss": 0.583, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.04036866359447005, |
| "grad_norm": 0.44961071014404297, |
| "learning_rate": 4.033149171270719e-06, |
| "loss": 0.6015, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.04092165898617511, |
| "grad_norm": 0.43612730503082275, |
| "learning_rate": 4.088397790055249e-06, |
| "loss": 0.5916, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.041474654377880185, |
| "grad_norm": 0.4045799970626831, |
| "learning_rate": 4.143646408839779e-06, |
| "loss": 0.5365, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.04202764976958526, |
| "grad_norm": 0.47303467988967896, |
| "learning_rate": 4.1988950276243095e-06, |
| "loss": 0.587, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.04258064516129032, |
| "grad_norm": 0.3942665159702301, |
| "learning_rate": 4.2541436464088406e-06, |
| "loss": 0.5682, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.04313364055299539, |
| "grad_norm": 0.40599432587623596, |
| "learning_rate": 4.309392265193371e-06, |
| "loss": 0.5686, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.04368663594470046, |
| "grad_norm": 0.41502925753593445, |
| "learning_rate": 4.364640883977901e-06, |
| "loss": 0.5644, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.04423963133640553, |
| "grad_norm": 0.4402730166912079, |
| "learning_rate": 4.419889502762431e-06, |
| "loss": 0.5458, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0447926267281106, |
| "grad_norm": 0.4110409915447235, |
| "learning_rate": 4.475138121546962e-06, |
| "loss": 0.5447, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.045345622119815666, |
| "grad_norm": 0.46120530366897583, |
| "learning_rate": 4.530386740331492e-06, |
| "loss": 0.584, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.04589861751152074, |
| "grad_norm": 0.4089435040950775, |
| "learning_rate": 4.585635359116022e-06, |
| "loss": 0.555, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04645161290322581, |
| "grad_norm": 0.4719107449054718, |
| "learning_rate": 4.640883977900552e-06, |
| "loss": 0.5714, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.047004608294930875, |
| "grad_norm": 0.45129814743995667, |
| "learning_rate": 4.6961325966850835e-06, |
| "loss": 0.5443, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.047557603686635946, |
| "grad_norm": 0.4321492612361908, |
| "learning_rate": 4.751381215469614e-06, |
| "loss": 0.5624, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04811059907834101, |
| "grad_norm": 0.4300953149795532, |
| "learning_rate": 4.806629834254144e-06, |
| "loss": 0.5497, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.04866359447004608, |
| "grad_norm": 0.4590497314929962, |
| "learning_rate": 4.861878453038674e-06, |
| "loss": 0.5652, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.049216589861751155, |
| "grad_norm": 0.47058796882629395, |
| "learning_rate": 4.9171270718232054e-06, |
| "loss": 0.5768, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04976958525345622, |
| "grad_norm": 0.43738317489624023, |
| "learning_rate": 4.972375690607736e-06, |
| "loss": 0.58, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05032258064516129, |
| "grad_norm": 0.4055286943912506, |
| "learning_rate": 5.027624309392266e-06, |
| "loss": 0.5503, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.05087557603686636, |
| "grad_norm": 0.44679558277130127, |
| "learning_rate": 5.082872928176796e-06, |
| "loss": 0.5347, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.05142857142857143, |
| "grad_norm": 0.46003517508506775, |
| "learning_rate": 5.1381215469613265e-06, |
| "loss": 0.5854, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.0519815668202765, |
| "grad_norm": 0.44815686345100403, |
| "learning_rate": 5.193370165745857e-06, |
| "loss": 0.5316, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.052534562211981564, |
| "grad_norm": 0.4559434950351715, |
| "learning_rate": 5.248618784530387e-06, |
| "loss": 0.5724, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.053087557603686636, |
| "grad_norm": 0.45906394720077515, |
| "learning_rate": 5.303867403314917e-06, |
| "loss": 0.565, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.05364055299539171, |
| "grad_norm": 0.3892553448677063, |
| "learning_rate": 5.3591160220994476e-06, |
| "loss": 0.5397, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.05419354838709677, |
| "grad_norm": 0.4995177090167999, |
| "learning_rate": 5.414364640883978e-06, |
| "loss": 0.5509, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.054746543778801844, |
| "grad_norm": 0.4675018787384033, |
| "learning_rate": 5.469613259668509e-06, |
| "loss": 0.5432, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.055299539170506916, |
| "grad_norm": 0.4121154844760895, |
| "learning_rate": 5.524861878453039e-06, |
| "loss": 0.5463, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05585253456221198, |
| "grad_norm": 0.4763623774051666, |
| "learning_rate": 5.5801104972375695e-06, |
| "loss": 0.5673, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.05640552995391705, |
| "grad_norm": 0.44080275297164917, |
| "learning_rate": 5.6353591160221e-06, |
| "loss": 0.5684, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.05695852534562212, |
| "grad_norm": 0.4623638391494751, |
| "learning_rate": 5.69060773480663e-06, |
| "loss": 0.5738, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05751152073732719, |
| "grad_norm": 0.379423588514328, |
| "learning_rate": 5.74585635359116e-06, |
| "loss": 0.5503, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.05806451612903226, |
| "grad_norm": 0.45095521211624146, |
| "learning_rate": 5.8011049723756905e-06, |
| "loss": 0.5683, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.058617511520737325, |
| "grad_norm": 0.4157678484916687, |
| "learning_rate": 5.856353591160221e-06, |
| "loss": 0.5459, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0591705069124424, |
| "grad_norm": 0.4484108090400696, |
| "learning_rate": 5.911602209944752e-06, |
| "loss": 0.5465, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.05972350230414747, |
| "grad_norm": 0.41227927803993225, |
| "learning_rate": 5.966850828729282e-06, |
| "loss": 0.5737, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.060276497695852534, |
| "grad_norm": 0.4771617352962494, |
| "learning_rate": 6.0220994475138124e-06, |
| "loss": 0.5295, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.060829493087557605, |
| "grad_norm": 0.42870959639549255, |
| "learning_rate": 6.077348066298343e-06, |
| "loss": 0.5432, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06138248847926267, |
| "grad_norm": 0.47716712951660156, |
| "learning_rate": 6.132596685082873e-06, |
| "loss": 0.5695, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.06193548387096774, |
| "grad_norm": 0.43211421370506287, |
| "learning_rate": 6.187845303867403e-06, |
| "loss": 0.5408, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.062488479262672814, |
| "grad_norm": 0.49489325284957886, |
| "learning_rate": 6.2430939226519335e-06, |
| "loss": 0.5488, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.06304147465437789, |
| "grad_norm": 0.4600902199745178, |
| "learning_rate": 6.298342541436464e-06, |
| "loss": 0.5445, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.06359447004608294, |
| "grad_norm": 0.41128382086753845, |
| "learning_rate": 6.353591160220996e-06, |
| "loss": 0.5507, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.06414746543778801, |
| "grad_norm": 0.38756921887397766, |
| "learning_rate": 6.408839779005526e-06, |
| "loss": 0.5072, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.06470046082949309, |
| "grad_norm": 0.4521966278553009, |
| "learning_rate": 6.464088397790056e-06, |
| "loss": 0.5597, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.06525345622119816, |
| "grad_norm": 0.39012429118156433, |
| "learning_rate": 6.5193370165745865e-06, |
| "loss": 0.5017, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.06580645161290323, |
| "grad_norm": 0.4543527364730835, |
| "learning_rate": 6.574585635359117e-06, |
| "loss": 0.5342, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0663594470046083, |
| "grad_norm": 0.4338400065898895, |
| "learning_rate": 6.629834254143647e-06, |
| "loss": 0.5489, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06691244239631336, |
| "grad_norm": 0.461480975151062, |
| "learning_rate": 6.685082872928177e-06, |
| "loss": 0.5327, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.06746543778801843, |
| "grad_norm": 0.4241288900375366, |
| "learning_rate": 6.740331491712708e-06, |
| "loss": 0.5297, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.0680184331797235, |
| "grad_norm": 0.5540463924407959, |
| "learning_rate": 6.795580110497239e-06, |
| "loss": 0.5262, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.06857142857142857, |
| "grad_norm": 0.4794096350669861, |
| "learning_rate": 6.850828729281769e-06, |
| "loss": 0.5638, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.06912442396313365, |
| "grad_norm": 0.43071404099464417, |
| "learning_rate": 6.906077348066299e-06, |
| "loss": 0.5254, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0696774193548387, |
| "grad_norm": 0.4853145480155945, |
| "learning_rate": 6.9613259668508295e-06, |
| "loss": 0.5524, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.07023041474654378, |
| "grad_norm": 0.47362276911735535, |
| "learning_rate": 7.01657458563536e-06, |
| "loss": 0.5469, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.07078341013824885, |
| "grad_norm": 0.45325443148612976, |
| "learning_rate": 7.07182320441989e-06, |
| "loss": 0.5476, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.07133640552995392, |
| "grad_norm": 0.43244412541389465, |
| "learning_rate": 7.12707182320442e-06, |
| "loss": 0.5367, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.07188940092165899, |
| "grad_norm": 0.5289068222045898, |
| "learning_rate": 7.182320441988951e-06, |
| "loss": 0.5796, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07244239631336405, |
| "grad_norm": 0.4485274851322174, |
| "learning_rate": 7.237569060773482e-06, |
| "loss": 0.5396, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.07299539170506912, |
| "grad_norm": 0.42793214321136475, |
| "learning_rate": 7.292817679558012e-06, |
| "loss": 0.5532, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.07354838709677419, |
| "grad_norm": 0.4250226318836212, |
| "learning_rate": 7.348066298342542e-06, |
| "loss": 0.5847, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.07410138248847926, |
| "grad_norm": 0.44791412353515625, |
| "learning_rate": 7.4033149171270724e-06, |
| "loss": 0.5216, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.07465437788018434, |
| "grad_norm": 0.48473551869392395, |
| "learning_rate": 7.458563535911603e-06, |
| "loss": 0.5434, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.07520737327188941, |
| "grad_norm": 0.5042223334312439, |
| "learning_rate": 7.513812154696133e-06, |
| "loss": 0.5292, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.07576036866359447, |
| "grad_norm": 0.5112206935882568, |
| "learning_rate": 7.569060773480663e-06, |
| "loss": 0.573, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.07631336405529954, |
| "grad_norm": 0.4625641107559204, |
| "learning_rate": 7.624309392265194e-06, |
| "loss": 0.5656, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.07686635944700461, |
| "grad_norm": 0.507746160030365, |
| "learning_rate": 7.679558011049725e-06, |
| "loss": 0.5352, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.07741935483870968, |
| "grad_norm": 0.4666261672973633, |
| "learning_rate": 7.734806629834256e-06, |
| "loss": 0.5465, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07797235023041475, |
| "grad_norm": 0.44455286860466003, |
| "learning_rate": 7.790055248618785e-06, |
| "loss": 0.5395, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.07852534562211981, |
| "grad_norm": 0.4286342263221741, |
| "learning_rate": 7.845303867403316e-06, |
| "loss": 0.5045, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.07907834101382488, |
| "grad_norm": 0.4748035669326782, |
| "learning_rate": 7.900552486187846e-06, |
| "loss": 0.552, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.07963133640552995, |
| "grad_norm": 0.44316938519477844, |
| "learning_rate": 7.955801104972377e-06, |
| "loss": 0.5394, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.08018433179723503, |
| "grad_norm": 0.4846290647983551, |
| "learning_rate": 8.011049723756906e-06, |
| "loss": 0.5267, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.0807373271889401, |
| "grad_norm": 0.4428083300590515, |
| "learning_rate": 8.066298342541437e-06, |
| "loss": 0.4941, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.08129032258064516, |
| "grad_norm": 0.44677111506462097, |
| "learning_rate": 8.121546961325968e-06, |
| "loss": 0.4885, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.08184331797235023, |
| "grad_norm": 0.4421268105506897, |
| "learning_rate": 8.176795580110498e-06, |
| "loss": 0.5765, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.0823963133640553, |
| "grad_norm": 0.4853808581829071, |
| "learning_rate": 8.232044198895029e-06, |
| "loss": 0.5297, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.08294930875576037, |
| "grad_norm": 0.47924932837486267, |
| "learning_rate": 8.287292817679558e-06, |
| "loss": 0.5192, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08350230414746544, |
| "grad_norm": 0.5630611181259155, |
| "learning_rate": 8.34254143646409e-06, |
| "loss": 0.5439, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.08405529953917051, |
| "grad_norm": 0.6081127524375916, |
| "learning_rate": 8.397790055248619e-06, |
| "loss": 0.5012, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.08460829493087557, |
| "grad_norm": 0.44527262449264526, |
| "learning_rate": 8.45303867403315e-06, |
| "loss": 0.5305, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.08516129032258064, |
| "grad_norm": 0.4948616623878479, |
| "learning_rate": 8.508287292817681e-06, |
| "loss": 0.5109, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.08571428571428572, |
| "grad_norm": 0.5024743676185608, |
| "learning_rate": 8.56353591160221e-06, |
| "loss": 0.5632, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.08626728110599079, |
| "grad_norm": 0.45424172282218933, |
| "learning_rate": 8.618784530386742e-06, |
| "loss": 0.5029, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.08682027649769586, |
| "grad_norm": 0.5445595979690552, |
| "learning_rate": 8.674033149171271e-06, |
| "loss": 0.5451, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.08737327188940092, |
| "grad_norm": 0.495613157749176, |
| "learning_rate": 8.729281767955802e-06, |
| "loss": 0.527, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.08792626728110599, |
| "grad_norm": 0.6915062665939331, |
| "learning_rate": 8.784530386740332e-06, |
| "loss": 0.5378, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.08847926267281106, |
| "grad_norm": 0.4711109697818756, |
| "learning_rate": 8.839779005524863e-06, |
| "loss": 0.5105, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08903225806451613, |
| "grad_norm": 0.5774613618850708, |
| "learning_rate": 8.895027624309392e-06, |
| "loss": 0.5268, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.0895852534562212, |
| "grad_norm": 0.534370481967926, |
| "learning_rate": 8.950276243093923e-06, |
| "loss": 0.5275, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.09013824884792626, |
| "grad_norm": 0.5167669057846069, |
| "learning_rate": 9.005524861878454e-06, |
| "loss": 0.5173, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.09069124423963133, |
| "grad_norm": 0.5220374464988708, |
| "learning_rate": 9.060773480662984e-06, |
| "loss": 0.5117, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0912442396313364, |
| "grad_norm": 0.6151928305625916, |
| "learning_rate": 9.116022099447515e-06, |
| "loss": 0.5236, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.09179723502304148, |
| "grad_norm": 0.54198157787323, |
| "learning_rate": 9.171270718232044e-06, |
| "loss": 0.5113, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.09235023041474655, |
| "grad_norm": 0.540729820728302, |
| "learning_rate": 9.226519337016575e-06, |
| "loss": 0.5293, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.09290322580645162, |
| "grad_norm": 0.4339202642440796, |
| "learning_rate": 9.281767955801105e-06, |
| "loss": 0.5105, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.09345622119815668, |
| "grad_norm": 0.5427307486534119, |
| "learning_rate": 9.337016574585636e-06, |
| "loss": 0.5442, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.09400921658986175, |
| "grad_norm": 0.5696178674697876, |
| "learning_rate": 9.392265193370167e-06, |
| "loss": 0.5198, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.09456221198156682, |
| "grad_norm": 0.475676029920578, |
| "learning_rate": 9.447513812154696e-06, |
| "loss": 0.5275, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.09511520737327189, |
| "grad_norm": 0.6084780693054199, |
| "learning_rate": 9.502762430939228e-06, |
| "loss": 0.5344, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.09566820276497696, |
| "grad_norm": 0.45912471413612366, |
| "learning_rate": 9.558011049723757e-06, |
| "loss": 0.5128, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.09622119815668202, |
| "grad_norm": 0.5522315502166748, |
| "learning_rate": 9.613259668508288e-06, |
| "loss": 0.4985, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.0967741935483871, |
| "grad_norm": 0.5598754286766052, |
| "learning_rate": 9.668508287292818e-06, |
| "loss": 0.5438, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.09732718894009217, |
| "grad_norm": 0.4842788577079773, |
| "learning_rate": 9.723756906077349e-06, |
| "loss": 0.5098, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.09788018433179724, |
| "grad_norm": 0.5201513171195984, |
| "learning_rate": 9.779005524861878e-06, |
| "loss": 0.5155, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.09843317972350231, |
| "grad_norm": 0.4839669466018677, |
| "learning_rate": 9.834254143646411e-06, |
| "loss": 0.5201, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.09898617511520737, |
| "grad_norm": 0.5859594941139221, |
| "learning_rate": 9.88950276243094e-06, |
| "loss": 0.5603, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.09953917050691244, |
| "grad_norm": 0.48163536190986633, |
| "learning_rate": 9.944751381215471e-06, |
| "loss": 0.5279, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.10009216589861751, |
| "grad_norm": 0.508065938949585, |
| "learning_rate": 1e-05, |
| "loss": 0.5186, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.10064516129032258, |
| "grad_norm": 0.47829869389533997, |
| "learning_rate": 9.999990678956964e-06, |
| "loss": 0.5224, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.10119815668202765, |
| "grad_norm": 0.5300391912460327, |
| "learning_rate": 9.999962715862601e-06, |
| "loss": 0.5128, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.10175115207373273, |
| "grad_norm": 0.47900837659835815, |
| "learning_rate": 9.999916110821174e-06, |
| "loss": 0.5232, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.10230414746543778, |
| "grad_norm": 0.4351784288883209, |
| "learning_rate": 9.999850864006444e-06, |
| "loss": 0.5274, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.10285714285714286, |
| "grad_norm": 0.48379045724868774, |
| "learning_rate": 9.99976697566168e-06, |
| "loss": 0.4964, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.10341013824884793, |
| "grad_norm": 0.4367005228996277, |
| "learning_rate": 9.999664446099651e-06, |
| "loss": 0.5311, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.103963133640553, |
| "grad_norm": 0.4944308400154114, |
| "learning_rate": 9.999543275702632e-06, |
| "loss": 0.5093, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.10451612903225807, |
| "grad_norm": 0.44800275564193726, |
| "learning_rate": 9.999403464922393e-06, |
| "loss": 0.5124, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.10506912442396313, |
| "grad_norm": 0.5029193758964539, |
| "learning_rate": 9.99924501428021e-06, |
| "loss": 0.5538, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1056221198156682, |
| "grad_norm": 0.4435194730758667, |
| "learning_rate": 9.999067924366854e-06, |
| "loss": 0.5291, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.10617511520737327, |
| "grad_norm": 0.45577532052993774, |
| "learning_rate": 9.998872195842588e-06, |
| "loss": 0.5107, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.10672811059907834, |
| "grad_norm": 0.4373863637447357, |
| "learning_rate": 9.998657829437171e-06, |
| "loss": 0.5137, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.10728110599078342, |
| "grad_norm": 0.4408453404903412, |
| "learning_rate": 9.998424825949848e-06, |
| "loss": 0.5061, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.10783410138248847, |
| "grad_norm": 0.5477350950241089, |
| "learning_rate": 9.998173186249357e-06, |
| "loss": 0.5363, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.10838709677419354, |
| "grad_norm": 0.43939393758773804, |
| "learning_rate": 9.99790291127391e-06, |
| "loss": 0.5095, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.10894009216589862, |
| "grad_norm": 0.47420209646224976, |
| "learning_rate": 9.997614002031211e-06, |
| "loss": 0.5297, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.10949308755760369, |
| "grad_norm": 0.45546746253967285, |
| "learning_rate": 9.99730645959843e-06, |
| "loss": 0.5139, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.11004608294930876, |
| "grad_norm": 0.4920537769794464, |
| "learning_rate": 9.996980285122218e-06, |
| "loss": 0.5344, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.11059907834101383, |
| "grad_norm": 0.562940776348114, |
| "learning_rate": 9.996635479818683e-06, |
| "loss": 0.535, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11115207373271889, |
| "grad_norm": 0.5914273262023926, |
| "learning_rate": 9.99627204497341e-06, |
| "loss": 0.5134, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.11170506912442396, |
| "grad_norm": 0.4358191192150116, |
| "learning_rate": 9.995889981941432e-06, |
| "loss": 0.4886, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.11225806451612903, |
| "grad_norm": 0.569436252117157, |
| "learning_rate": 9.995489292147238e-06, |
| "loss": 0.5302, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.1128110599078341, |
| "grad_norm": 0.5561317801475525, |
| "learning_rate": 9.995069977084769e-06, |
| "loss": 0.5214, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.11336405529953918, |
| "grad_norm": 0.4526347219944, |
| "learning_rate": 9.994632038317407e-06, |
| "loss": 0.5106, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.11391705069124423, |
| "grad_norm": 0.6103693246841431, |
| "learning_rate": 9.99417547747797e-06, |
| "loss": 0.5187, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1144700460829493, |
| "grad_norm": 0.5255654454231262, |
| "learning_rate": 9.993700296268705e-06, |
| "loss": 0.5253, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.11502304147465438, |
| "grad_norm": 0.48559436202049255, |
| "learning_rate": 9.993206496461287e-06, |
| "loss": 0.5127, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.11557603686635945, |
| "grad_norm": 0.5230923891067505, |
| "learning_rate": 9.992694079896812e-06, |
| "loss": 0.5078, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.11612903225806452, |
| "grad_norm": 0.45681560039520264, |
| "learning_rate": 9.992163048485776e-06, |
| "loss": 0.522, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.11668202764976958, |
| "grad_norm": 0.510287344455719, |
| "learning_rate": 9.99161340420809e-06, |
| "loss": 0.509, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.11723502304147465, |
| "grad_norm": 0.4897903501987457, |
| "learning_rate": 9.991045149113055e-06, |
| "loss": 0.5118, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.11778801843317972, |
| "grad_norm": 0.516163170337677, |
| "learning_rate": 9.990458285319362e-06, |
| "loss": 0.4915, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.1183410138248848, |
| "grad_norm": 0.5389583110809326, |
| "learning_rate": 9.98985281501509e-06, |
| "loss": 0.5192, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.11889400921658987, |
| "grad_norm": 0.6268092393875122, |
| "learning_rate": 9.989228740457679e-06, |
| "loss": 0.524, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.11944700460829494, |
| "grad_norm": 0.447966605424881, |
| "learning_rate": 9.988586063973942e-06, |
| "loss": 0.4864, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.6052084565162659, |
| "learning_rate": 9.987924787960043e-06, |
| "loss": 0.5005, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.12055299539170507, |
| "grad_norm": 0.5076256990432739, |
| "learning_rate": 9.987244914881498e-06, |
| "loss": 0.5219, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.12110599078341014, |
| "grad_norm": 0.5539697408676147, |
| "learning_rate": 9.986546447273153e-06, |
| "loss": 0.5024, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.12165898617511521, |
| "grad_norm": 0.4377414584159851, |
| "learning_rate": 9.985829387739192e-06, |
| "loss": 0.5291, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12221198156682028, |
| "grad_norm": 0.5455598831176758, |
| "learning_rate": 9.985093738953108e-06, |
| "loss": 0.5126, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.12276497695852534, |
| "grad_norm": 0.462424099445343, |
| "learning_rate": 9.98433950365771e-06, |
| "loss": 0.5225, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.12331797235023041, |
| "grad_norm": 0.49225398898124695, |
| "learning_rate": 9.983566684665097e-06, |
| "loss": 0.5328, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.12387096774193548, |
| "grad_norm": 0.5304319262504578, |
| "learning_rate": 9.982775284856665e-06, |
| "loss": 0.5212, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.12442396313364056, |
| "grad_norm": 0.4655149579048157, |
| "learning_rate": 9.981965307183081e-06, |
| "loss": 0.5161, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.12497695852534563, |
| "grad_norm": 0.6474348902702332, |
| "learning_rate": 9.98113675466428e-06, |
| "loss": 0.4983, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.12552995391705069, |
| "grad_norm": 0.43538525700569153, |
| "learning_rate": 9.980289630389453e-06, |
| "loss": 0.5286, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.12608294930875577, |
| "grad_norm": 0.5731073617935181, |
| "learning_rate": 9.97942393751703e-06, |
| "loss": 0.5414, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.12663594470046083, |
| "grad_norm": 0.4664866626262665, |
| "learning_rate": 9.978539679274675e-06, |
| "loss": 0.5052, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.1271889400921659, |
| "grad_norm": 0.5314398407936096, |
| "learning_rate": 9.977636858959274e-06, |
| "loss": 0.527, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.12774193548387097, |
| "grad_norm": 0.538694441318512, |
| "learning_rate": 9.976715479936916e-06, |
| "loss": 0.5094, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.12829493087557603, |
| "grad_norm": 0.4854016900062561, |
| "learning_rate": 9.975775545642889e-06, |
| "loss": 0.5078, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.12884792626728112, |
| "grad_norm": 0.5078600645065308, |
| "learning_rate": 9.974817059581656e-06, |
| "loss": 0.5236, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.12940092165898617, |
| "grad_norm": 0.455322802066803, |
| "learning_rate": 9.973840025326858e-06, |
| "loss": 0.5137, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.12995391705069123, |
| "grad_norm": 0.5216564536094666, |
| "learning_rate": 9.972844446521281e-06, |
| "loss": 0.5289, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.13050691244239632, |
| "grad_norm": 0.4569193422794342, |
| "learning_rate": 9.971830326876864e-06, |
| "loss": 0.5137, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.13105990783410137, |
| "grad_norm": 0.5937883853912354, |
| "learning_rate": 9.970797670174663e-06, |
| "loss": 0.5142, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.13161290322580646, |
| "grad_norm": 0.48625534772872925, |
| "learning_rate": 9.969746480264855e-06, |
| "loss": 0.5279, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.13216589861751152, |
| "grad_norm": 0.47141098976135254, |
| "learning_rate": 9.968676761066714e-06, |
| "loss": 0.5042, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1327188940092166, |
| "grad_norm": 0.4889548122882843, |
| "learning_rate": 9.967588516568601e-06, |
| "loss": 0.5154, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.13327188940092166, |
| "grad_norm": 0.4120868444442749, |
| "learning_rate": 9.966481750827943e-06, |
| "loss": 0.504, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.13382488479262672, |
| "grad_norm": 0.5358414053916931, |
| "learning_rate": 9.965356467971228e-06, |
| "loss": 0.5264, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1343778801843318, |
| "grad_norm": 0.472572922706604, |
| "learning_rate": 9.964212672193978e-06, |
| "loss": 0.5176, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.13493087557603686, |
| "grad_norm": 0.4870736598968506, |
| "learning_rate": 9.96305036776074e-06, |
| "loss": 0.5071, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.13548387096774195, |
| "grad_norm": 0.5704755783081055, |
| "learning_rate": 9.96186955900507e-06, |
| "loss": 0.5198, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.136036866359447, |
| "grad_norm": 0.5287049412727356, |
| "learning_rate": 9.960670250329517e-06, |
| "loss": 0.5216, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.13658986175115206, |
| "grad_norm": 0.5611394643783569, |
| "learning_rate": 9.959452446205603e-06, |
| "loss": 0.5188, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.13714285714285715, |
| "grad_norm": 0.4512503445148468, |
| "learning_rate": 9.958216151173812e-06, |
| "loss": 0.4979, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1376958525345622, |
| "grad_norm": 0.47202053666114807, |
| "learning_rate": 9.956961369843565e-06, |
| "loss": 0.4834, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.1382488479262673, |
| "grad_norm": 0.5006586909294128, |
| "learning_rate": 9.95568810689321e-06, |
| "loss": 0.5285, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.13880184331797235, |
| "grad_norm": 0.4660753011703491, |
| "learning_rate": 9.954396367070006e-06, |
| "loss": 0.5192, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.1393548387096774, |
| "grad_norm": 0.4885999262332916, |
| "learning_rate": 9.953086155190095e-06, |
| "loss": 0.5028, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1399078341013825, |
| "grad_norm": 0.49867549538612366, |
| "learning_rate": 9.951757476138495e-06, |
| "loss": 0.5039, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.14046082949308755, |
| "grad_norm": 0.5823776125907898, |
| "learning_rate": 9.950410334869075e-06, |
| "loss": 0.535, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.14101382488479264, |
| "grad_norm": 0.47015827894210815, |
| "learning_rate": 9.949044736404538e-06, |
| "loss": 0.528, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1415668202764977, |
| "grad_norm": 0.5392245054244995, |
| "learning_rate": 9.94766068583641e-06, |
| "loss": 0.4855, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.14211981566820275, |
| "grad_norm": 0.5323054194450378, |
| "learning_rate": 9.946258188325003e-06, |
| "loss": 0.5269, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.14267281105990784, |
| "grad_norm": 0.5459584593772888, |
| "learning_rate": 9.944837249099418e-06, |
| "loss": 0.4874, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1432258064516129, |
| "grad_norm": 0.4532824158668518, |
| "learning_rate": 9.943397873457503e-06, |
| "loss": 0.5111, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.14377880184331798, |
| "grad_norm": 0.46974876523017883, |
| "learning_rate": 9.94194006676586e-06, |
| "loss": 0.5261, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.14433179723502304, |
| "grad_norm": 0.5061238408088684, |
| "learning_rate": 9.94046383445979e-06, |
| "loss": 0.5217, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1448847926267281, |
| "grad_norm": 0.4556806981563568, |
| "learning_rate": 9.938969182043312e-06, |
| "loss": 0.4969, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.14543778801843318, |
| "grad_norm": 0.48571473360061646, |
| "learning_rate": 9.937456115089108e-06, |
| "loss": 0.5057, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.14599078341013824, |
| "grad_norm": 0.497122198343277, |
| "learning_rate": 9.935924639238526e-06, |
| "loss": 0.5205, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.14654377880184333, |
| "grad_norm": 0.5786699652671814, |
| "learning_rate": 9.934374760201546e-06, |
| "loss": 0.5122, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.14709677419354839, |
| "grad_norm": 0.5201807022094727, |
| "learning_rate": 9.932806483756763e-06, |
| "loss": 0.5328, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.14764976958525344, |
| "grad_norm": 0.5433037877082825, |
| "learning_rate": 9.931219815751368e-06, |
| "loss": 0.5153, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.14820276497695853, |
| "grad_norm": 0.47332048416137695, |
| "learning_rate": 9.929614762101117e-06, |
| "loss": 0.5004, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1487557603686636, |
| "grad_norm": 0.5838091373443604, |
| "learning_rate": 9.927991328790324e-06, |
| "loss": 0.4889, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.14930875576036867, |
| "grad_norm": 0.4618660807609558, |
| "learning_rate": 9.926349521871824e-06, |
| "loss": 0.5265, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.14986175115207373, |
| "grad_norm": 0.5086191892623901, |
| "learning_rate": 9.924689347466959e-06, |
| "loss": 0.5106, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.15041474654377882, |
| "grad_norm": 0.457042932510376, |
| "learning_rate": 9.92301081176555e-06, |
| "loss": 0.518, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.15096774193548387, |
| "grad_norm": 0.5256728529930115, |
| "learning_rate": 9.92131392102588e-06, |
| "loss": 0.531, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.15152073732718893, |
| "grad_norm": 0.4683758318424225, |
| "learning_rate": 9.919598681574665e-06, |
| "loss": 0.4896, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.15207373271889402, |
| "grad_norm": 0.4681558609008789, |
| "learning_rate": 9.917865099807034e-06, |
| "loss": 0.5089, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.15262672811059907, |
| "grad_norm": 0.4770297706127167, |
| "learning_rate": 9.916113182186503e-06, |
| "loss": 0.4973, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.15317972350230416, |
| "grad_norm": 0.4939088225364685, |
| "learning_rate": 9.91434293524495e-06, |
| "loss": 0.5388, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.15373271889400922, |
| "grad_norm": 0.5165495276451111, |
| "learning_rate": 9.912554365582596e-06, |
| "loss": 0.5046, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.15428571428571428, |
| "grad_norm": 0.4685964584350586, |
| "learning_rate": 9.910747479867975e-06, |
| "loss": 0.5373, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.15483870967741936, |
| "grad_norm": 0.5078836679458618, |
| "learning_rate": 9.908922284837911e-06, |
| "loss": 0.5105, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.15539170506912442, |
| "grad_norm": 0.4230501651763916, |
| "learning_rate": 9.90707878729749e-06, |
| "loss": 0.4749, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.1559447004608295, |
| "grad_norm": 0.483981728553772, |
| "learning_rate": 9.905216994120044e-06, |
| "loss": 0.4954, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.15649769585253456, |
| "grad_norm": 0.4270246922969818, |
| "learning_rate": 9.90333691224711e-06, |
| "loss": 0.5007, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.15705069124423962, |
| "grad_norm": 0.4967236816883087, |
| "learning_rate": 9.901438548688423e-06, |
| "loss": 0.5308, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1576036866359447, |
| "grad_norm": 0.4965408444404602, |
| "learning_rate": 9.89952191052187e-06, |
| "loss": 0.5093, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.15815668202764976, |
| "grad_norm": 0.45975926518440247, |
| "learning_rate": 9.89758700489348e-06, |
| "loss": 0.5122, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.15870967741935485, |
| "grad_norm": 0.5180716514587402, |
| "learning_rate": 9.895633839017387e-06, |
| "loss": 0.5515, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.1592626728110599, |
| "grad_norm": 0.43234091997146606, |
| "learning_rate": 9.893662420175809e-06, |
| "loss": 0.4901, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.15981566820276497, |
| "grad_norm": 0.4894983768463135, |
| "learning_rate": 9.89167275571902e-06, |
| "loss": 0.5052, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.16036866359447005, |
| "grad_norm": 0.4710214138031006, |
| "learning_rate": 9.889664853065315e-06, |
| "loss": 0.5, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1609216589861751, |
| "grad_norm": 0.5677446126937866, |
| "learning_rate": 9.887638719700996e-06, |
| "loss": 0.5169, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.1614746543778802, |
| "grad_norm": 0.45073893666267395, |
| "learning_rate": 9.88559436318033e-06, |
| "loss": 0.5258, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.16202764976958525, |
| "grad_norm": 0.5394282341003418, |
| "learning_rate": 9.883531791125538e-06, |
| "loss": 0.4799, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1625806451612903, |
| "grad_norm": 0.4784766435623169, |
| "learning_rate": 9.881451011226742e-06, |
| "loss": 0.4693, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.1631336405529954, |
| "grad_norm": 0.435619980096817, |
| "learning_rate": 9.87935203124196e-06, |
| "loss": 0.5196, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.16368663594470045, |
| "grad_norm": 0.5438254475593567, |
| "learning_rate": 9.877234858997066e-06, |
| "loss": 0.5078, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.16423963133640554, |
| "grad_norm": 0.4646199643611908, |
| "learning_rate": 9.875099502385761e-06, |
| "loss": 0.5239, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.1647926267281106, |
| "grad_norm": 0.549284040927887, |
| "learning_rate": 9.872945969369546e-06, |
| "loss": 0.5029, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.16534562211981566, |
| "grad_norm": 0.5235300064086914, |
| "learning_rate": 9.87077426797769e-06, |
| "loss": 0.5225, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.16589861751152074, |
| "grad_norm": 0.525598406791687, |
| "learning_rate": 9.8685844063072e-06, |
| "loss": 0.5366, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1664516129032258, |
| "grad_norm": 0.5001644492149353, |
| "learning_rate": 9.866376392522798e-06, |
| "loss": 0.5025, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.16700460829493088, |
| "grad_norm": 0.4980228841304779, |
| "learning_rate": 9.864150234856876e-06, |
| "loss": 0.5332, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.16755760368663594, |
| "grad_norm": 0.44216716289520264, |
| "learning_rate": 9.861905941609482e-06, |
| "loss": 0.4831, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.16811059907834103, |
| "grad_norm": 0.45178863406181335, |
| "learning_rate": 9.859643521148275e-06, |
| "loss": 0.5164, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.16866359447004609, |
| "grad_norm": 0.4833186864852905, |
| "learning_rate": 9.857362981908505e-06, |
| "loss": 0.5128, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.16921658986175114, |
| "grad_norm": 0.5167268514633179, |
| "learning_rate": 9.855064332392972e-06, |
| "loss": 0.5149, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.16976958525345623, |
| "grad_norm": 0.4465697109699249, |
| "learning_rate": 9.852747581172002e-06, |
| "loss": 0.5016, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.1703225806451613, |
| "grad_norm": 0.5105251669883728, |
| "learning_rate": 9.850412736883408e-06, |
| "loss": 0.5021, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.17087557603686637, |
| "grad_norm": 0.5230706334114075, |
| "learning_rate": 9.848059808232464e-06, |
| "loss": 0.4857, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 0.5510496497154236, |
| "learning_rate": 9.845688803991873e-06, |
| "loss": 0.5305, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1719815668202765, |
| "grad_norm": 0.4802294671535492, |
| "learning_rate": 9.843299733001723e-06, |
| "loss": 0.5084, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.17253456221198157, |
| "grad_norm": 0.5729020833969116, |
| "learning_rate": 9.840892604169473e-06, |
| "loss": 0.4797, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.17308755760368663, |
| "grad_norm": 0.6035499572753906, |
| "learning_rate": 9.838467426469897e-06, |
| "loss": 0.5069, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.17364055299539172, |
| "grad_norm": 0.4519449770450592, |
| "learning_rate": 9.836024208945074e-06, |
| "loss": 0.4959, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.17419354838709677, |
| "grad_norm": 0.5990521907806396, |
| "learning_rate": 9.833562960704336e-06, |
| "loss": 0.522, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.17474654377880183, |
| "grad_norm": 0.4257313013076782, |
| "learning_rate": 9.831083690924246e-06, |
| "loss": 0.4769, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.17529953917050692, |
| "grad_norm": 0.5456443428993225, |
| "learning_rate": 9.828586408848553e-06, |
| "loss": 0.5129, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.17585253456221198, |
| "grad_norm": 0.4802759885787964, |
| "learning_rate": 9.82607112378817e-06, |
| "loss": 0.5043, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.17640552995391706, |
| "grad_norm": 0.6043391823768616, |
| "learning_rate": 9.823537845121126e-06, |
| "loss": 0.5063, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.17695852534562212, |
| "grad_norm": 0.4696696698665619, |
| "learning_rate": 9.82098658229254e-06, |
| "loss": 0.5164, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.17751152073732718, |
| "grad_norm": 0.5530909299850464, |
| "learning_rate": 9.818417344814587e-06, |
| "loss": 0.5275, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.17806451612903226, |
| "grad_norm": 0.609788179397583, |
| "learning_rate": 9.815830142266457e-06, |
| "loss": 0.4716, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.17861751152073732, |
| "grad_norm": 0.46327316761016846, |
| "learning_rate": 9.813224984294318e-06, |
| "loss": 0.4887, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.1791705069124424, |
| "grad_norm": 0.5943720936775208, |
| "learning_rate": 9.810601880611286e-06, |
| "loss": 0.5009, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.17972350230414746, |
| "grad_norm": 0.4262332320213318, |
| "learning_rate": 9.807960840997387e-06, |
| "loss": 0.4747, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.18027649769585252, |
| "grad_norm": 0.47295740246772766, |
| "learning_rate": 9.805301875299518e-06, |
| "loss": 0.5092, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.1808294930875576, |
| "grad_norm": 0.4910508394241333, |
| "learning_rate": 9.802624993431414e-06, |
| "loss": 0.5195, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.18138248847926267, |
| "grad_norm": 0.5312582850456238, |
| "learning_rate": 9.799930205373605e-06, |
| "loss": 0.5077, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.18193548387096775, |
| "grad_norm": 0.49236226081848145, |
| "learning_rate": 9.797217521173385e-06, |
| "loss": 0.5228, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.1824884792626728, |
| "grad_norm": 0.5442107319831848, |
| "learning_rate": 9.794486950944775e-06, |
| "loss": 0.4909, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.18304147465437787, |
| "grad_norm": 0.4408930242061615, |
| "learning_rate": 9.79173850486848e-06, |
| "loss": 0.4919, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.18359447004608295, |
| "grad_norm": 0.4888366460800171, |
| "learning_rate": 9.78897219319185e-06, |
| "loss": 0.5247, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.184147465437788, |
| "grad_norm": 0.48040100932121277, |
| "learning_rate": 9.786188026228853e-06, |
| "loss": 0.4746, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.1847004608294931, |
| "grad_norm": 0.46840646862983704, |
| "learning_rate": 9.783386014360024e-06, |
| "loss": 0.5174, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.18525345622119815, |
| "grad_norm": 0.4824281930923462, |
| "learning_rate": 9.780566168032432e-06, |
| "loss": 0.4944, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.18580645161290324, |
| "grad_norm": 0.4401717483997345, |
| "learning_rate": 9.77772849775964e-06, |
| "loss": 0.5074, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.1863594470046083, |
| "grad_norm": 0.5038358569145203, |
| "learning_rate": 9.774873014121667e-06, |
| "loss": 0.5052, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.18691244239631336, |
| "grad_norm": 0.47577494382858276, |
| "learning_rate": 9.771999727764946e-06, |
| "loss": 0.4856, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.18746543778801844, |
| "grad_norm": 0.4989863634109497, |
| "learning_rate": 9.76910864940229e-06, |
| "loss": 0.4839, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.1880184331797235, |
| "grad_norm": 0.49821236729621887, |
| "learning_rate": 9.766199789812845e-06, |
| "loss": 0.498, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.18857142857142858, |
| "grad_norm": 0.44591957330703735, |
| "learning_rate": 9.763273159842052e-06, |
| "loss": 0.4832, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.18912442396313364, |
| "grad_norm": 0.4908744990825653, |
| "learning_rate": 9.76032877040161e-06, |
| "loss": 0.4927, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.1896774193548387, |
| "grad_norm": 0.55243319272995, |
| "learning_rate": 9.757366632469427e-06, |
| "loss": 0.5005, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.19023041474654379, |
| "grad_norm": 0.4234575629234314, |
| "learning_rate": 9.754386757089596e-06, |
| "loss": 0.4799, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.19078341013824884, |
| "grad_norm": 0.49401965737342834, |
| "learning_rate": 9.751389155372329e-06, |
| "loss": 0.4942, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.19133640552995393, |
| "grad_norm": 0.4622778594493866, |
| "learning_rate": 9.74837383849394e-06, |
| "loss": 0.4644, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.191889400921659, |
| "grad_norm": 0.4600273072719574, |
| "learning_rate": 9.745340817696787e-06, |
| "loss": 0.5019, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.19244239631336404, |
| "grad_norm": 0.4513567388057709, |
| "learning_rate": 9.742290104289237e-06, |
| "loss": 0.4704, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.19299539170506913, |
| "grad_norm": 0.5180230736732483, |
| "learning_rate": 9.739221709645622e-06, |
| "loss": 0.4917, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1935483870967742, |
| "grad_norm": 0.44200998544692993, |
| "learning_rate": 9.736135645206198e-06, |
| "loss": 0.5047, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.19410138248847927, |
| "grad_norm": 0.5660358667373657, |
| "learning_rate": 9.7330319224771e-06, |
| "loss": 0.5023, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.19465437788018433, |
| "grad_norm": 0.44904786348342896, |
| "learning_rate": 9.729910553030304e-06, |
| "loss": 0.4998, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1952073732718894, |
| "grad_norm": 0.523826003074646, |
| "learning_rate": 9.726771548503575e-06, |
| "loss": 0.4966, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.19576036866359448, |
| "grad_norm": 0.4515267312526703, |
| "learning_rate": 9.72361492060043e-06, |
| "loss": 0.4996, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.19631336405529953, |
| "grad_norm": 0.4984499216079712, |
| "learning_rate": 9.7204406810901e-06, |
| "loss": 0.4985, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.19686635944700462, |
| "grad_norm": 0.4689009189605713, |
| "learning_rate": 9.71724884180747e-06, |
| "loss": 0.4936, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.19741935483870968, |
| "grad_norm": 0.46954217553138733, |
| "learning_rate": 9.714039414653047e-06, |
| "loss": 0.491, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.19797235023041473, |
| "grad_norm": 0.5114353895187378, |
| "learning_rate": 9.71081241159292e-06, |
| "loss": 0.488, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.19852534562211982, |
| "grad_norm": 0.4611765742301941, |
| "learning_rate": 9.707567844658698e-06, |
| "loss": 0.4749, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.19907834101382488, |
| "grad_norm": 0.5052332878112793, |
| "learning_rate": 9.704305725947483e-06, |
| "loss": 0.5234, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.19963133640552996, |
| "grad_norm": 0.5130250453948975, |
| "learning_rate": 9.701026067621813e-06, |
| "loss": 0.5006, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.20018433179723502, |
| "grad_norm": 0.47562843561172485, |
| "learning_rate": 9.697728881909622e-06, |
| "loss": 0.529, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.20073732718894008, |
| "grad_norm": 0.48228660225868225, |
| "learning_rate": 9.694414181104197e-06, |
| "loss": 0.5057, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.20129032258064516, |
| "grad_norm": 0.501700222492218, |
| "learning_rate": 9.691081977564124e-06, |
| "loss": 0.4949, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.20184331797235022, |
| "grad_norm": 0.4578889012336731, |
| "learning_rate": 9.687732283713247e-06, |
| "loss": 0.4989, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.2023963133640553, |
| "grad_norm": 0.5223492980003357, |
| "learning_rate": 9.684365112040622e-06, |
| "loss": 0.4772, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.20294930875576037, |
| "grad_norm": 0.4984261393547058, |
| "learning_rate": 9.680980475100471e-06, |
| "loss": 0.5132, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.20350230414746545, |
| "grad_norm": 0.4805150032043457, |
| "learning_rate": 9.677578385512135e-06, |
| "loss": 0.5069, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.2040552995391705, |
| "grad_norm": 0.46725666522979736, |
| "learning_rate": 9.674158855960017e-06, |
| "loss": 0.4806, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.20460829493087557, |
| "grad_norm": 0.47330862283706665, |
| "learning_rate": 9.670721899193556e-06, |
| "loss": 0.5127, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.20516129032258065, |
| "grad_norm": 0.5010029077529907, |
| "learning_rate": 9.667267528027157e-06, |
| "loss": 0.5189, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.2057142857142857, |
| "grad_norm": 0.5415194034576416, |
| "learning_rate": 9.663795755340159e-06, |
| "loss": 0.5053, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.2062672811059908, |
| "grad_norm": 0.4000757932662964, |
| "learning_rate": 9.660306594076779e-06, |
| "loss": 0.4939, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.20682027649769585, |
| "grad_norm": 0.5064148306846619, |
| "learning_rate": 9.656800057246065e-06, |
| "loss": 0.4937, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.2073732718894009, |
| "grad_norm": 0.467369943857193, |
| "learning_rate": 9.65327615792185e-06, |
| "loss": 0.5215, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.207926267281106, |
| "grad_norm": 0.4753814935684204, |
| "learning_rate": 9.649734909242699e-06, |
| "loss": 0.4903, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.20847926267281106, |
| "grad_norm": 0.4203657805919647, |
| "learning_rate": 9.646176324411867e-06, |
| "loss": 0.4874, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.20903225806451614, |
| "grad_norm": 0.4930543601512909, |
| "learning_rate": 9.642600416697242e-06, |
| "loss": 0.4816, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.2095852534562212, |
| "grad_norm": 0.4189959764480591, |
| "learning_rate": 9.639007199431298e-06, |
| "loss": 0.491, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.21013824884792626, |
| "grad_norm": 0.48571497201919556, |
| "learning_rate": 9.635396686011052e-06, |
| "loss": 0.5005, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.21069124423963134, |
| "grad_norm": 0.4344756007194519, |
| "learning_rate": 9.631768889898004e-06, |
| "loss": 0.5113, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.2112442396313364, |
| "grad_norm": 0.5239787101745605, |
| "learning_rate": 9.628123824618087e-06, |
| "loss": 0.5013, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.21179723502304149, |
| "grad_norm": 0.40745410323143005, |
| "learning_rate": 9.624461503761628e-06, |
| "loss": 0.4824, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.21235023041474654, |
| "grad_norm": 0.5618027448654175, |
| "learning_rate": 9.620781940983288e-06, |
| "loss": 0.4852, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.2129032258064516, |
| "grad_norm": 0.5030412673950195, |
| "learning_rate": 9.617085150002012e-06, |
| "loss": 0.4868, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.2134562211981567, |
| "grad_norm": 0.5133850574493408, |
| "learning_rate": 9.613371144600976e-06, |
| "loss": 0.5076, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.21400921658986174, |
| "grad_norm": 0.47164881229400635, |
| "learning_rate": 9.609639938627546e-06, |
| "loss": 0.5076, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.21456221198156683, |
| "grad_norm": 0.4972430467605591, |
| "learning_rate": 9.605891545993214e-06, |
| "loss": 0.4843, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2151152073732719, |
| "grad_norm": 0.4716038405895233, |
| "learning_rate": 9.60212598067355e-06, |
| "loss": 0.4782, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.21566820276497695, |
| "grad_norm": 0.4822791814804077, |
| "learning_rate": 9.59834325670815e-06, |
| "loss": 0.4874, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.21622119815668203, |
| "grad_norm": 0.4538308382034302, |
| "learning_rate": 9.594543388200592e-06, |
| "loss": 0.501, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.2167741935483871, |
| "grad_norm": 0.4863637387752533, |
| "learning_rate": 9.59072638931837e-06, |
| "loss": 0.5115, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.21732718894009218, |
| "grad_norm": 0.4747362434864044, |
| "learning_rate": 9.586892274292846e-06, |
| "loss": 0.5262, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.21788018433179723, |
| "grad_norm": 0.4688088595867157, |
| "learning_rate": 9.583041057419203e-06, |
| "loss": 0.517, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2184331797235023, |
| "grad_norm": 0.49005988240242004, |
| "learning_rate": 9.579172753056383e-06, |
| "loss": 0.4737, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.21898617511520738, |
| "grad_norm": 0.4679119288921356, |
| "learning_rate": 9.575287375627037e-06, |
| "loss": 0.4978, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.21953917050691243, |
| "grad_norm": 0.47708967328071594, |
| "learning_rate": 9.571384939617478e-06, |
| "loss": 0.5005, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.22009216589861752, |
| "grad_norm": 0.47183945775032043, |
| "learning_rate": 9.567465459577613e-06, |
| "loss": 0.4708, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.22064516129032258, |
| "grad_norm": 0.47582703828811646, |
| "learning_rate": 9.563528950120895e-06, |
| "loss": 0.4983, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.22119815668202766, |
| "grad_norm": 0.48260581493377686, |
| "learning_rate": 9.559575425924279e-06, |
| "loss": 0.4894, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.22175115207373272, |
| "grad_norm": 0.5306166410446167, |
| "learning_rate": 9.55560490172815e-06, |
| "loss": 0.5068, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.22230414746543778, |
| "grad_norm": 0.49763691425323486, |
| "learning_rate": 9.551617392336281e-06, |
| "loss": 0.5028, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.22285714285714286, |
| "grad_norm": 0.5285375118255615, |
| "learning_rate": 9.547612912615769e-06, |
| "loss": 0.484, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.22341013824884792, |
| "grad_norm": 0.5528775453567505, |
| "learning_rate": 9.543591477496985e-06, |
| "loss": 0.4774, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.223963133640553, |
| "grad_norm": 0.48812106251716614, |
| "learning_rate": 9.539553101973516e-06, |
| "loss": 0.5042, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.22451612903225807, |
| "grad_norm": 0.5272664427757263, |
| "learning_rate": 9.535497801102114e-06, |
| "loss": 0.4987, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.22506912442396312, |
| "grad_norm": 0.5162638425827026, |
| "learning_rate": 9.531425590002629e-06, |
| "loss": 0.4942, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.2256221198156682, |
| "grad_norm": 0.45863449573516846, |
| "learning_rate": 9.527336483857965e-06, |
| "loss": 0.5165, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.22617511520737327, |
| "grad_norm": 0.5142653584480286, |
| "learning_rate": 9.523230497914016e-06, |
| "loss": 0.4883, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.22672811059907835, |
| "grad_norm": 0.4717908203601837, |
| "learning_rate": 9.519107647479609e-06, |
| "loss": 0.4859, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2272811059907834, |
| "grad_norm": 0.4761084020137787, |
| "learning_rate": 9.514967947926453e-06, |
| "loss": 0.5022, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.22783410138248847, |
| "grad_norm": 0.4796617925167084, |
| "learning_rate": 9.510811414689073e-06, |
| "loss": 0.5058, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.22838709677419355, |
| "grad_norm": 0.5060638189315796, |
| "learning_rate": 9.506638063264759e-06, |
| "loss": 0.4859, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.2289400921658986, |
| "grad_norm": 0.42853257060050964, |
| "learning_rate": 9.502447909213508e-06, |
| "loss": 0.5004, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.2294930875576037, |
| "grad_norm": 0.45876848697662354, |
| "learning_rate": 9.498240968157962e-06, |
| "loss": 0.4898, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.23004608294930876, |
| "grad_norm": 0.4455052614212036, |
| "learning_rate": 9.49401725578335e-06, |
| "loss": 0.4972, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2305990783410138, |
| "grad_norm": 0.41394197940826416, |
| "learning_rate": 9.489776787837438e-06, |
| "loss": 0.5006, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2311520737327189, |
| "grad_norm": 0.44113320112228394, |
| "learning_rate": 9.485519580130456e-06, |
| "loss": 0.4644, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.23170506912442396, |
| "grad_norm": 0.4882364869117737, |
| "learning_rate": 9.481245648535053e-06, |
| "loss": 0.4726, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.23225806451612904, |
| "grad_norm": 0.4658440053462982, |
| "learning_rate": 9.476955008986228e-06, |
| "loss": 0.497, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2328110599078341, |
| "grad_norm": 0.4944906532764435, |
| "learning_rate": 9.472647677481275e-06, |
| "loss": 0.4743, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.23336405529953916, |
| "grad_norm": 0.4508844316005707, |
| "learning_rate": 9.468323670079725e-06, |
| "loss": 0.4884, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.23391705069124424, |
| "grad_norm": 0.470225065946579, |
| "learning_rate": 9.463983002903278e-06, |
| "loss": 0.4929, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.2344700460829493, |
| "grad_norm": 0.5286823511123657, |
| "learning_rate": 9.459625692135756e-06, |
| "loss": 0.4938, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2350230414746544, |
| "grad_norm": 0.45246848464012146, |
| "learning_rate": 9.45525175402303e-06, |
| "loss": 0.4834, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.23557603686635945, |
| "grad_norm": 0.48045429587364197, |
| "learning_rate": 9.450861204872965e-06, |
| "loss": 0.4877, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.2361290322580645, |
| "grad_norm": 0.5548929572105408, |
| "learning_rate": 9.44645406105536e-06, |
| "loss": 0.5185, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2366820276497696, |
| "grad_norm": 0.4929436147212982, |
| "learning_rate": 9.44203033900189e-06, |
| "loss": 0.5048, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.23723502304147465, |
| "grad_norm": 0.46492716670036316, |
| "learning_rate": 9.437590055206032e-06, |
| "loss": 0.4939, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.23778801843317973, |
| "grad_norm": 0.4560331404209137, |
| "learning_rate": 9.433133226223018e-06, |
| "loss": 0.5016, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2383410138248848, |
| "grad_norm": 0.4622935652732849, |
| "learning_rate": 9.428659868669765e-06, |
| "loss": 0.4748, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.23889400921658988, |
| "grad_norm": 0.4686061441898346, |
| "learning_rate": 9.424169999224819e-06, |
| "loss": 0.5012, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.23944700460829493, |
| "grad_norm": 0.47005850076675415, |
| "learning_rate": 9.419663634628283e-06, |
| "loss": 0.4902, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.5422276854515076, |
| "learning_rate": 9.415140791681767e-06, |
| "loss": 0.4923, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.24055299539170508, |
| "grad_norm": 0.42137962579727173, |
| "learning_rate": 9.410601487248315e-06, |
| "loss": 0.4762, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.24110599078341013, |
| "grad_norm": 0.518340528011322, |
| "learning_rate": 9.406045738252349e-06, |
| "loss": 0.5257, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.24165898617511522, |
| "grad_norm": 0.5296422243118286, |
| "learning_rate": 9.4014735616796e-06, |
| "loss": 0.4959, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.24221198156682028, |
| "grad_norm": 0.4660092890262604, |
| "learning_rate": 9.396884974577052e-06, |
| "loss": 0.4902, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.24276497695852534, |
| "grad_norm": 0.5939272046089172, |
| "learning_rate": 9.392279994052872e-06, |
| "loss": 0.4859, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.24331797235023042, |
| "grad_norm": 0.4680648446083069, |
| "learning_rate": 9.387658637276348e-06, |
| "loss": 0.4856, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.24387096774193548, |
| "grad_norm": 0.4640378952026367, |
| "learning_rate": 9.383020921477824e-06, |
| "loss": 0.4976, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.24442396313364056, |
| "grad_norm": 0.5372658371925354, |
| "learning_rate": 9.378366863948644e-06, |
| "loss": 0.4731, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.24497695852534562, |
| "grad_norm": 0.48611563444137573, |
| "learning_rate": 9.373696482041072e-06, |
| "loss": 0.474, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.24552995391705068, |
| "grad_norm": 0.4766850173473358, |
| "learning_rate": 9.369009793168243e-06, |
| "loss": 0.4878, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.24608294930875577, |
| "grad_norm": 0.503322184085846, |
| "learning_rate": 9.364306814804086e-06, |
| "loss": 0.49, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.24663594470046082, |
| "grad_norm": 0.5735645294189453, |
| "learning_rate": 9.35958756448327e-06, |
| "loss": 0.5357, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.2471889400921659, |
| "grad_norm": 0.5782787799835205, |
| "learning_rate": 9.354852059801127e-06, |
| "loss": 0.5052, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.24774193548387097, |
| "grad_norm": 0.48679542541503906, |
| "learning_rate": 9.350100318413594e-06, |
| "loss": 0.4824, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.24829493087557603, |
| "grad_norm": 0.535241961479187, |
| "learning_rate": 9.345332358037147e-06, |
| "loss": 0.4814, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.2488479262672811, |
| "grad_norm": 0.5126506090164185, |
| "learning_rate": 9.340548196448729e-06, |
| "loss": 0.4903, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.24940092165898617, |
| "grad_norm": 0.459888219833374, |
| "learning_rate": 9.33574785148569e-06, |
| "loss": 0.4764, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.24995391705069125, |
| "grad_norm": 0.47858306765556335, |
| "learning_rate": 9.330931341045723e-06, |
| "loss": 0.4759, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2505069124423963, |
| "grad_norm": 0.4925948977470398, |
| "learning_rate": 9.326098683086786e-06, |
| "loss": 0.5092, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.25105990783410137, |
| "grad_norm": 0.4197613000869751, |
| "learning_rate": 9.321249895627043e-06, |
| "loss": 0.4855, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.25161290322580643, |
| "grad_norm": 0.4368072748184204, |
| "learning_rate": 9.316384996744798e-06, |
| "loss": 0.4731, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.25216589861751154, |
| "grad_norm": 0.4712340533733368, |
| "learning_rate": 9.311504004578425e-06, |
| "loss": 0.4864, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.2527188940092166, |
| "grad_norm": 0.4518265426158905, |
| "learning_rate": 9.306606937326295e-06, |
| "loss": 0.5119, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.25327188940092166, |
| "grad_norm": 0.48842400312423706, |
| "learning_rate": 9.301693813246721e-06, |
| "loss": 0.4732, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.2538248847926267, |
| "grad_norm": 0.5565005540847778, |
| "learning_rate": 9.296764650657881e-06, |
| "loss": 0.5018, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.2543778801843318, |
| "grad_norm": 0.4874143600463867, |
| "learning_rate": 9.291819467937746e-06, |
| "loss": 0.4712, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2549308755760369, |
| "grad_norm": 0.5408375859260559, |
| "learning_rate": 9.286858283524022e-06, |
| "loss": 0.4785, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.25548387096774194, |
| "grad_norm": 0.4784450829029083, |
| "learning_rate": 9.281881115914075e-06, |
| "loss": 0.4893, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.256036866359447, |
| "grad_norm": 0.4540750980377197, |
| "learning_rate": 9.276887983664861e-06, |
| "loss": 0.4873, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.25658986175115206, |
| "grad_norm": 0.5902926921844482, |
| "learning_rate": 9.271878905392863e-06, |
| "loss": 0.4903, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2571428571428571, |
| "grad_norm": 0.44395557045936584, |
| "learning_rate": 9.26685389977401e-06, |
| "loss": 0.4862, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.25769585253456223, |
| "grad_norm": 0.44266945123672485, |
| "learning_rate": 9.261812985543625e-06, |
| "loss": 0.4875, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2582488479262673, |
| "grad_norm": 0.5877528190612793, |
| "learning_rate": 9.256756181496334e-06, |
| "loss": 0.498, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.25880184331797235, |
| "grad_norm": 0.43955346941947937, |
| "learning_rate": 9.251683506486016e-06, |
| "loss": 0.4782, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.2593548387096774, |
| "grad_norm": 0.5338640809059143, |
| "learning_rate": 9.246594979425719e-06, |
| "loss": 0.4834, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.25990783410138246, |
| "grad_norm": 0.4647727608680725, |
| "learning_rate": 9.241490619287593e-06, |
| "loss": 0.4957, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2604608294930876, |
| "grad_norm": 0.3950575590133667, |
| "learning_rate": 9.236370445102825e-06, |
| "loss": 0.4864, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.26101382488479263, |
| "grad_norm": 0.5759503245353699, |
| "learning_rate": 9.231234475961559e-06, |
| "loss": 0.5049, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.2615668202764977, |
| "grad_norm": 0.49982547760009766, |
| "learning_rate": 9.22608273101283e-06, |
| "loss": 0.5099, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.26211981566820275, |
| "grad_norm": 0.45275378227233887, |
| "learning_rate": 9.220915229464496e-06, |
| "loss": 0.4768, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.2626728110599078, |
| "grad_norm": 0.4712145924568176, |
| "learning_rate": 9.215731990583155e-06, |
| "loss": 0.4418, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.2632258064516129, |
| "grad_norm": 0.4405660927295685, |
| "learning_rate": 9.210533033694084e-06, |
| "loss": 0.4938, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.263778801843318, |
| "grad_norm": 0.4714353382587433, |
| "learning_rate": 9.205318378181167e-06, |
| "loss": 0.4806, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.26433179723502304, |
| "grad_norm": 0.5084551572799683, |
| "learning_rate": 9.200088043486813e-06, |
| "loss": 0.487, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2648847926267281, |
| "grad_norm": 0.4339804947376251, |
| "learning_rate": 9.194842049111889e-06, |
| "loss": 0.4703, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2654377880184332, |
| "grad_norm": 0.5167462825775146, |
| "learning_rate": 9.189580414615658e-06, |
| "loss": 0.495, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.26599078341013827, |
| "grad_norm": 0.5027214288711548, |
| "learning_rate": 9.184303159615682e-06, |
| "loss": 0.487, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.2665437788018433, |
| "grad_norm": 0.5376026034355164, |
| "learning_rate": 9.179010303787772e-06, |
| "loss": 0.4974, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2670967741935484, |
| "grad_norm": 0.4745848774909973, |
| "learning_rate": 9.173701866865905e-06, |
| "loss": 0.4761, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.26764976958525344, |
| "grad_norm": 0.615476667881012, |
| "learning_rate": 9.168377868642142e-06, |
| "loss": 0.4903, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.26820276497695855, |
| "grad_norm": 0.43914616107940674, |
| "learning_rate": 9.163038328966578e-06, |
| "loss": 0.5018, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2687557603686636, |
| "grad_norm": 0.5516188144683838, |
| "learning_rate": 9.157683267747239e-06, |
| "loss": 0.5042, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.26930875576036867, |
| "grad_norm": 0.5231643319129944, |
| "learning_rate": 9.152312704950028e-06, |
| "loss": 0.4745, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.2698617511520737, |
| "grad_norm": 0.4931274354457855, |
| "learning_rate": 9.146926660598646e-06, |
| "loss": 0.4891, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.2704147465437788, |
| "grad_norm": 0.5022376179695129, |
| "learning_rate": 9.141525154774513e-06, |
| "loss": 0.4707, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2709677419354839, |
| "grad_norm": 0.4848520755767822, |
| "learning_rate": 9.136108207616694e-06, |
| "loss": 0.5176, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.27152073732718895, |
| "grad_norm": 0.5185691118240356, |
| "learning_rate": 9.130675839321829e-06, |
| "loss": 0.4951, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.272073732718894, |
| "grad_norm": 0.4987480938434601, |
| "learning_rate": 9.125228070144056e-06, |
| "loss": 0.4865, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.27262672811059907, |
| "grad_norm": 0.47905007004737854, |
| "learning_rate": 9.119764920394928e-06, |
| "loss": 0.4931, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.27317972350230413, |
| "grad_norm": 0.5060631632804871, |
| "learning_rate": 9.114286410443349e-06, |
| "loss": 0.4912, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.27373271889400924, |
| "grad_norm": 0.5127936005592346, |
| "learning_rate": 9.108792560715487e-06, |
| "loss": 0.5004, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2742857142857143, |
| "grad_norm": 0.5518050193786621, |
| "learning_rate": 9.103283391694711e-06, |
| "loss": 0.5051, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.27483870967741936, |
| "grad_norm": 0.5080561637878418, |
| "learning_rate": 9.097758923921495e-06, |
| "loss": 0.4679, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.2753917050691244, |
| "grad_norm": 0.5666584372520447, |
| "learning_rate": 9.092219177993365e-06, |
| "loss": 0.4861, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.2759447004608295, |
| "grad_norm": 0.48917266726493835, |
| "learning_rate": 9.086664174564804e-06, |
| "loss": 0.4616, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.2764976958525346, |
| "grad_norm": 0.4538862407207489, |
| "learning_rate": 9.081093934347178e-06, |
| "loss": 0.4917, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.27705069124423964, |
| "grad_norm": 0.6453654170036316, |
| "learning_rate": 9.075508478108674e-06, |
| "loss": 0.477, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2776036866359447, |
| "grad_norm": 0.5007541179656982, |
| "learning_rate": 9.069907826674199e-06, |
| "loss": 0.4748, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.27815668202764976, |
| "grad_norm": 0.5134044289588928, |
| "learning_rate": 9.064292000925316e-06, |
| "loss": 0.5122, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.2787096774193548, |
| "grad_norm": 0.5208799242973328, |
| "learning_rate": 9.05866102180017e-06, |
| "loss": 0.4908, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.27926267281105993, |
| "grad_norm": 0.4694979786872864, |
| "learning_rate": 9.053014910293399e-06, |
| "loss": 0.5006, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.279815668202765, |
| "grad_norm": 0.4957534670829773, |
| "learning_rate": 9.047353687456064e-06, |
| "loss": 0.4901, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.28036866359447005, |
| "grad_norm": 0.5231026411056519, |
| "learning_rate": 9.041677374395563e-06, |
| "loss": 0.5062, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2809216589861751, |
| "grad_norm": 0.5049005150794983, |
| "learning_rate": 9.035985992275562e-06, |
| "loss": 0.501, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.28147465437788016, |
| "grad_norm": 0.48721855878829956, |
| "learning_rate": 9.030279562315907e-06, |
| "loss": 0.4881, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.2820276497695853, |
| "grad_norm": 0.47729742527008057, |
| "learning_rate": 9.02455810579255e-06, |
| "loss": 0.4734, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.28258064516129033, |
| "grad_norm": 0.5377198457717896, |
| "learning_rate": 9.018821644037466e-06, |
| "loss": 0.4655, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2831336405529954, |
| "grad_norm": 0.5237789154052734, |
| "learning_rate": 9.01307019843858e-06, |
| "loss": 0.5081, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.28368663594470045, |
| "grad_norm": 0.6095771193504333, |
| "learning_rate": 9.00730379043968e-06, |
| "loss": 0.4938, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2842396313364055, |
| "grad_norm": 0.5196386575698853, |
| "learning_rate": 9.001522441540342e-06, |
| "loss": 0.4901, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.2847926267281106, |
| "grad_norm": 0.5163764357566833, |
| "learning_rate": 8.995726173295845e-06, |
| "loss": 0.4901, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.2853456221198157, |
| "grad_norm": 0.5501850247383118, |
| "learning_rate": 8.989915007317095e-06, |
| "loss": 0.4884, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.28589861751152074, |
| "grad_norm": 0.4907573461532593, |
| "learning_rate": 8.984088965270547e-06, |
| "loss": 0.4803, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2864516129032258, |
| "grad_norm": 0.4490540325641632, |
| "learning_rate": 8.978248068878113e-06, |
| "loss": 0.5025, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.28700460829493085, |
| "grad_norm": 0.541654109954834, |
| "learning_rate": 8.97239233991709e-06, |
| "loss": 0.4844, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.28755760368663597, |
| "grad_norm": 0.4671088457107544, |
| "learning_rate": 8.966521800220084e-06, |
| "loss": 0.4775, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.288110599078341, |
| "grad_norm": 0.5631542801856995, |
| "learning_rate": 8.960636471674913e-06, |
| "loss": 0.4723, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.2886635944700461, |
| "grad_norm": 0.5776397585868835, |
| "learning_rate": 8.954736376224536e-06, |
| "loss": 0.4949, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.28921658986175114, |
| "grad_norm": 0.5042744874954224, |
| "learning_rate": 8.948821535866973e-06, |
| "loss": 0.4934, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2897695852534562, |
| "grad_norm": 0.5939375162124634, |
| "learning_rate": 8.942891972655217e-06, |
| "loss": 0.4884, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.2903225806451613, |
| "grad_norm": 0.4696662127971649, |
| "learning_rate": 8.93694770869715e-06, |
| "loss": 0.5052, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.29087557603686637, |
| "grad_norm": 0.45712941884994507, |
| "learning_rate": 8.930988766155472e-06, |
| "loss": 0.5488, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2914285714285714, |
| "grad_norm": 0.5518166422843933, |
| "learning_rate": 8.925015167247604e-06, |
| "loss": 0.5129, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.2919815668202765, |
| "grad_norm": 0.446274071931839, |
| "learning_rate": 8.919026934245618e-06, |
| "loss": 0.504, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.29253456221198154, |
| "grad_norm": 0.5146806836128235, |
| "learning_rate": 8.91302408947614e-06, |
| "loss": 0.4779, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.29308755760368665, |
| "grad_norm": 0.4867715537548065, |
| "learning_rate": 8.907006655320287e-06, |
| "loss": 0.4743, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2936405529953917, |
| "grad_norm": 0.47047367691993713, |
| "learning_rate": 8.90097465421356e-06, |
| "loss": 0.4881, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.29419354838709677, |
| "grad_norm": 0.46602970361709595, |
| "learning_rate": 8.894928108645772e-06, |
| "loss": 0.486, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.29474654377880183, |
| "grad_norm": 0.45530185103416443, |
| "learning_rate": 8.888867041160975e-06, |
| "loss": 0.4726, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2952995391705069, |
| "grad_norm": 0.4860335886478424, |
| "learning_rate": 8.882791474357354e-06, |
| "loss": 0.4818, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.295852534562212, |
| "grad_norm": 0.4780627191066742, |
| "learning_rate": 8.876701430887156e-06, |
| "loss": 0.4937, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.29640552995391706, |
| "grad_norm": 0.488271027803421, |
| "learning_rate": 8.870596933456603e-06, |
| "loss": 0.4988, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2969585253456221, |
| "grad_norm": 0.48193061351776123, |
| "learning_rate": 8.864478004825812e-06, |
| "loss": 0.4925, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2975115207373272, |
| "grad_norm": 0.5294972658157349, |
| "learning_rate": 8.8583446678087e-06, |
| "loss": 0.4787, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.29806451612903223, |
| "grad_norm": 0.48833131790161133, |
| "learning_rate": 8.852196945272906e-06, |
| "loss": 0.5055, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.29861751152073734, |
| "grad_norm": 0.4671526551246643, |
| "learning_rate": 8.846034860139706e-06, |
| "loss": 0.4775, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.2991705069124424, |
| "grad_norm": 0.6074989438056946, |
| "learning_rate": 8.839858435383918e-06, |
| "loss": 0.4894, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.29972350230414746, |
| "grad_norm": 0.46650996804237366, |
| "learning_rate": 8.833667694033841e-06, |
| "loss": 0.4913, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.3002764976958525, |
| "grad_norm": 0.7234087586402893, |
| "learning_rate": 8.827462659171134e-06, |
| "loss": 0.4794, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.30082949308755763, |
| "grad_norm": 0.5737038850784302, |
| "learning_rate": 8.821243353930756e-06, |
| "loss": 0.5169, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.3013824884792627, |
| "grad_norm": 0.6461325883865356, |
| "learning_rate": 8.815009801500875e-06, |
| "loss": 0.4846, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.30193548387096775, |
| "grad_norm": 0.4608705937862396, |
| "learning_rate": 8.808762025122774e-06, |
| "loss": 0.4763, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.3024884792626728, |
| "grad_norm": 0.6168321371078491, |
| "learning_rate": 8.802500048090769e-06, |
| "loss": 0.4906, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.30304147465437786, |
| "grad_norm": 0.5032297968864441, |
| "learning_rate": 8.796223893752125e-06, |
| "loss": 0.4809, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.303594470046083, |
| "grad_norm": 0.48676756024360657, |
| "learning_rate": 8.789933585506962e-06, |
| "loss": 0.4844, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.30414746543778803, |
| "grad_norm": 0.5273764133453369, |
| "learning_rate": 8.783629146808175e-06, |
| "loss": 0.4979, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3047004608294931, |
| "grad_norm": 0.47446364164352417, |
| "learning_rate": 8.77731060116134e-06, |
| "loss": 0.4633, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.30525345622119815, |
| "grad_norm": 0.5045877695083618, |
| "learning_rate": 8.77097797212463e-06, |
| "loss": 0.4907, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.3058064516129032, |
| "grad_norm": 0.4806043803691864, |
| "learning_rate": 8.764631283308733e-06, |
| "loss": 0.4794, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3063594470046083, |
| "grad_norm": 0.5195322036743164, |
| "learning_rate": 8.758270558376752e-06, |
| "loss": 0.4921, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.3069124423963134, |
| "grad_norm": 0.5429467558860779, |
| "learning_rate": 8.751895821044118e-06, |
| "loss": 0.4825, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.30746543778801844, |
| "grad_norm": 0.48403802514076233, |
| "learning_rate": 8.745507095078515e-06, |
| "loss": 0.4997, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.3080184331797235, |
| "grad_norm": 0.5179433822631836, |
| "learning_rate": 8.739104404299781e-06, |
| "loss": 0.4444, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.30857142857142855, |
| "grad_norm": 0.5344632863998413, |
| "learning_rate": 8.732687772579816e-06, |
| "loss": 0.4695, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.30912442396313367, |
| "grad_norm": 0.5367107391357422, |
| "learning_rate": 8.7262572238425e-06, |
| "loss": 0.453, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.3096774193548387, |
| "grad_norm": 0.5187183618545532, |
| "learning_rate": 8.719812782063603e-06, |
| "loss": 0.4975, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3102304147465438, |
| "grad_norm": 0.4541904330253601, |
| "learning_rate": 8.713354471270691e-06, |
| "loss": 0.5075, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.31078341013824884, |
| "grad_norm": 0.4493202865123749, |
| "learning_rate": 8.706882315543043e-06, |
| "loss": 0.458, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.3113364055299539, |
| "grad_norm": 0.571712076663971, |
| "learning_rate": 8.700396339011554e-06, |
| "loss": 0.5057, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.311889400921659, |
| "grad_norm": 0.4367184042930603, |
| "learning_rate": 8.69389656585865e-06, |
| "loss": 0.4725, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.31244239631336407, |
| "grad_norm": 0.5127226710319519, |
| "learning_rate": 8.6873830203182e-06, |
| "loss": 0.4615, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.3129953917050691, |
| "grad_norm": 0.5464975833892822, |
| "learning_rate": 8.680855726675419e-06, |
| "loss": 0.4661, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.3135483870967742, |
| "grad_norm": 0.5116366147994995, |
| "learning_rate": 8.674314709266778e-06, |
| "loss": 0.4789, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.31410138248847924, |
| "grad_norm": 0.5597204566001892, |
| "learning_rate": 8.667759992479922e-06, |
| "loss": 0.4903, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.31465437788018435, |
| "grad_norm": 0.4036201536655426, |
| "learning_rate": 8.661191600753567e-06, |
| "loss": 0.5093, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.3152073732718894, |
| "grad_norm": 0.6131529808044434, |
| "learning_rate": 8.65460955857742e-06, |
| "loss": 0.4572, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.31576036866359447, |
| "grad_norm": 0.532192587852478, |
| "learning_rate": 8.648013890492081e-06, |
| "loss": 0.4679, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.31631336405529953, |
| "grad_norm": 0.4397170841693878, |
| "learning_rate": 8.641404621088951e-06, |
| "loss": 0.4841, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.3168663594470046, |
| "grad_norm": 0.44879838824272156, |
| "learning_rate": 8.634781775010144e-06, |
| "loss": 0.4853, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3174193548387097, |
| "grad_norm": 0.5257536768913269, |
| "learning_rate": 8.628145376948392e-06, |
| "loss": 0.4915, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.31797235023041476, |
| "grad_norm": 0.4205341637134552, |
| "learning_rate": 8.621495451646958e-06, |
| "loss": 0.4895, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.3185253456221198, |
| "grad_norm": 0.4227660298347473, |
| "learning_rate": 8.614832023899536e-06, |
| "loss": 0.488, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3190783410138249, |
| "grad_norm": 0.441709965467453, |
| "learning_rate": 8.608155118550167e-06, |
| "loss": 0.4756, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.31963133640552993, |
| "grad_norm": 0.4320451021194458, |
| "learning_rate": 8.601464760493138e-06, |
| "loss": 0.4726, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.32018433179723504, |
| "grad_norm": 0.3868134617805481, |
| "learning_rate": 8.594760974672898e-06, |
| "loss": 0.475, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.3207373271889401, |
| "grad_norm": 0.43438073992729187, |
| "learning_rate": 8.588043786083952e-06, |
| "loss": 0.4874, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.32129032258064516, |
| "grad_norm": 0.40946924686431885, |
| "learning_rate": 8.581313219770788e-06, |
| "loss": 0.4843, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.3218433179723502, |
| "grad_norm": 0.4077948331832886, |
| "learning_rate": 8.57456930082776e-06, |
| "loss": 0.4656, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.3223963133640553, |
| "grad_norm": 0.4392508566379547, |
| "learning_rate": 8.567812054399016e-06, |
| "loss": 0.4911, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.3229493087557604, |
| "grad_norm": 0.4225884974002838, |
| "learning_rate": 8.561041505678385e-06, |
| "loss": 0.4907, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.32350230414746545, |
| "grad_norm": 0.4444757103919983, |
| "learning_rate": 8.5542576799093e-06, |
| "loss": 0.484, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.3240552995391705, |
| "grad_norm": 0.4226742088794708, |
| "learning_rate": 8.547460602384697e-06, |
| "loss": 0.4947, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.32460829493087556, |
| "grad_norm": 0.5053589344024658, |
| "learning_rate": 8.540650298446912e-06, |
| "loss": 0.4984, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.3251612903225806, |
| "grad_norm": 0.4462142586708069, |
| "learning_rate": 8.533826793487601e-06, |
| "loss": 0.4624, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.32571428571428573, |
| "grad_norm": 0.5019125938415527, |
| "learning_rate": 8.526990112947636e-06, |
| "loss": 0.4597, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.3262672811059908, |
| "grad_norm": 0.4495023787021637, |
| "learning_rate": 8.520140282317018e-06, |
| "loss": 0.4619, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.32682027649769585, |
| "grad_norm": 0.45633986592292786, |
| "learning_rate": 8.51327732713477e-06, |
| "loss": 0.4997, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.3273732718894009, |
| "grad_norm": 0.45402443408966064, |
| "learning_rate": 8.506401272988854e-06, |
| "loss": 0.4726, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.32792626728110597, |
| "grad_norm": 0.4144057631492615, |
| "learning_rate": 8.499512145516068e-06, |
| "loss": 0.483, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.3284792626728111, |
| "grad_norm": 0.506801962852478, |
| "learning_rate": 8.492609970401955e-06, |
| "loss": 0.4936, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.32903225806451614, |
| "grad_norm": 0.45201730728149414, |
| "learning_rate": 8.4856947733807e-06, |
| "loss": 0.5055, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.3295852534562212, |
| "grad_norm": 0.4555196762084961, |
| "learning_rate": 8.478766580235044e-06, |
| "loss": 0.4847, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.33013824884792625, |
| "grad_norm": 0.49041327834129333, |
| "learning_rate": 8.471825416796185e-06, |
| "loss": 0.4662, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.3306912442396313, |
| "grad_norm": 0.466457724571228, |
| "learning_rate": 8.464871308943673e-06, |
| "loss": 0.4794, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.3312442396313364, |
| "grad_norm": 0.5273913145065308, |
| "learning_rate": 8.457904282605324e-06, |
| "loss": 0.482, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.3317972350230415, |
| "grad_norm": 0.42694202065467834, |
| "learning_rate": 8.45092436375712e-06, |
| "loss": 0.4663, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.33235023041474654, |
| "grad_norm": 0.467692494392395, |
| "learning_rate": 8.443931578423108e-06, |
| "loss": 0.4606, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.3329032258064516, |
| "grad_norm": 0.5536178946495056, |
| "learning_rate": 8.436925952675312e-06, |
| "loss": 0.49, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.33345622119815665, |
| "grad_norm": 0.5051153898239136, |
| "learning_rate": 8.429907512633626e-06, |
| "loss": 0.5041, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.33400921658986177, |
| "grad_norm": 0.4975510835647583, |
| "learning_rate": 8.422876284465722e-06, |
| "loss": 0.491, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.3345622119815668, |
| "grad_norm": 0.5645290613174438, |
| "learning_rate": 8.415832294386956e-06, |
| "loss": 0.4942, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.3351152073732719, |
| "grad_norm": 0.5082492828369141, |
| "learning_rate": 8.408775568660256e-06, |
| "loss": 0.4829, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.33566820276497694, |
| "grad_norm": 0.4222676753997803, |
| "learning_rate": 8.401706133596047e-06, |
| "loss": 0.4837, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.33622119815668206, |
| "grad_norm": 0.5251336693763733, |
| "learning_rate": 8.394624015552124e-06, |
| "loss": 0.492, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.3367741935483871, |
| "grad_norm": 0.4806731641292572, |
| "learning_rate": 8.387529240933585e-06, |
| "loss": 0.4791, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.33732718894009217, |
| "grad_norm": 0.5003568530082703, |
| "learning_rate": 8.380421836192705e-06, |
| "loss": 0.4768, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.33788018433179723, |
| "grad_norm": 0.48693132400512695, |
| "learning_rate": 8.373301827828858e-06, |
| "loss": 0.5219, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.3384331797235023, |
| "grad_norm": 0.43921712040901184, |
| "learning_rate": 8.366169242388402e-06, |
| "loss": 0.4668, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.3389861751152074, |
| "grad_norm": 0.5484280586242676, |
| "learning_rate": 8.359024106464594e-06, |
| "loss": 0.4669, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.33953917050691246, |
| "grad_norm": 0.4520418643951416, |
| "learning_rate": 8.351866446697483e-06, |
| "loss": 0.4973, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.3400921658986175, |
| "grad_norm": 0.588077962398529, |
| "learning_rate": 8.344696289773806e-06, |
| "loss": 0.4749, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.3406451612903226, |
| "grad_norm": 0.4226244390010834, |
| "learning_rate": 8.337513662426904e-06, |
| "loss": 0.482, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.34119815668202763, |
| "grad_norm": 0.5818718671798706, |
| "learning_rate": 8.330318591436606e-06, |
| "loss": 0.4963, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.34175115207373274, |
| "grad_norm": 0.4959838092327118, |
| "learning_rate": 8.32311110362914e-06, |
| "loss": 0.4928, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.3423041474654378, |
| "grad_norm": 0.43233931064605713, |
| "learning_rate": 8.315891225877026e-06, |
| "loss": 0.4619, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 0.506004273891449, |
| "learning_rate": 8.308658985098983e-06, |
| "loss": 0.4638, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3434101382488479, |
| "grad_norm": 0.5699809193611145, |
| "learning_rate": 8.301414408259821e-06, |
| "loss": 0.4666, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.343963133640553, |
| "grad_norm": 0.4087035059928894, |
| "learning_rate": 8.294157522370343e-06, |
| "loss": 0.4792, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.3445161290322581, |
| "grad_norm": 0.482768714427948, |
| "learning_rate": 8.28688835448725e-06, |
| "loss": 0.4943, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.34506912442396315, |
| "grad_norm": 0.4292857348918915, |
| "learning_rate": 8.27960693171303e-06, |
| "loss": 0.4809, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.3456221198156682, |
| "grad_norm": 0.43942004442214966, |
| "learning_rate": 8.272313281195868e-06, |
| "loss": 0.488, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.34617511520737326, |
| "grad_norm": 0.5329846739768982, |
| "learning_rate": 8.265007430129535e-06, |
| "loss": 0.494, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.3467281105990783, |
| "grad_norm": 0.41792014241218567, |
| "learning_rate": 8.25768940575329e-06, |
| "loss": 0.4838, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.34728110599078343, |
| "grad_norm": 0.522320568561554, |
| "learning_rate": 8.250359235351783e-06, |
| "loss": 0.4778, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3478341013824885, |
| "grad_norm": 0.4983687996864319, |
| "learning_rate": 8.243016946254947e-06, |
| "loss": 0.4761, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.34838709677419355, |
| "grad_norm": 0.5216838717460632, |
| "learning_rate": 8.235662565837901e-06, |
| "loss": 0.4949, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3489400921658986, |
| "grad_norm": 0.5560440421104431, |
| "learning_rate": 8.22829612152084e-06, |
| "loss": 0.4653, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.34949308755760367, |
| "grad_norm": 0.505742073059082, |
| "learning_rate": 8.220917640768943e-06, |
| "loss": 0.4981, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.3500460829493088, |
| "grad_norm": 0.4594072103500366, |
| "learning_rate": 8.213527151092267e-06, |
| "loss": 0.4522, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.35059907834101384, |
| "grad_norm": 0.40718746185302734, |
| "learning_rate": 8.20612468004564e-06, |
| "loss": 0.4728, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.3511520737327189, |
| "grad_norm": 0.500005304813385, |
| "learning_rate": 8.19871025522856e-06, |
| "loss": 0.4667, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.35170506912442395, |
| "grad_norm": 0.4374920427799225, |
| "learning_rate": 8.191283904285098e-06, |
| "loss": 0.4801, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.352258064516129, |
| "grad_norm": 0.4499951899051666, |
| "learning_rate": 8.183845654903789e-06, |
| "loss": 0.4588, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.3528110599078341, |
| "grad_norm": 0.41557565331459045, |
| "learning_rate": 8.17639553481753e-06, |
| "loss": 0.4953, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3533640552995392, |
| "grad_norm": 0.44742459058761597, |
| "learning_rate": 8.168933571803475e-06, |
| "loss": 0.4711, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.35391705069124424, |
| "grad_norm": 0.4027085304260254, |
| "learning_rate": 8.161459793682937e-06, |
| "loss": 0.4872, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3544700460829493, |
| "grad_norm": 0.4093347191810608, |
| "learning_rate": 8.153974228321279e-06, |
| "loss": 0.477, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.35502304147465436, |
| "grad_norm": 0.45894891023635864, |
| "learning_rate": 8.146476903627813e-06, |
| "loss": 0.4844, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.35557603686635947, |
| "grad_norm": 0.44424736499786377, |
| "learning_rate": 8.138967847555693e-06, |
| "loss": 0.4754, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.3561290322580645, |
| "grad_norm": 0.4633028209209442, |
| "learning_rate": 8.13144708810181e-06, |
| "loss": 0.4873, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.3566820276497696, |
| "grad_norm": 0.46212074160575867, |
| "learning_rate": 8.123914653306695e-06, |
| "loss": 0.4576, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.35723502304147464, |
| "grad_norm": 0.5635020732879639, |
| "learning_rate": 8.116370571254407e-06, |
| "loss": 0.4651, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.3577880184331797, |
| "grad_norm": 0.43892374634742737, |
| "learning_rate": 8.108814870072434e-06, |
| "loss": 0.5018, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.3583410138248848, |
| "grad_norm": 0.4993932843208313, |
| "learning_rate": 8.10124757793158e-06, |
| "loss": 0.4848, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.35889400921658987, |
| "grad_norm": 0.5206483006477356, |
| "learning_rate": 8.093668723045866e-06, |
| "loss": 0.4845, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.35944700460829493, |
| "grad_norm": 0.4889640808105469, |
| "learning_rate": 8.08607833367243e-06, |
| "loss": 0.4731, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.5545540452003479, |
| "learning_rate": 8.078476438111404e-06, |
| "loss": 0.4919, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.36055299539170504, |
| "grad_norm": 0.47018927335739136, |
| "learning_rate": 8.07086306470583e-06, |
| "loss": 0.4902, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.36110599078341016, |
| "grad_norm": 0.5213498473167419, |
| "learning_rate": 8.06323824184154e-06, |
| "loss": 0.4918, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.3616589861751152, |
| "grad_norm": 0.4931851923465729, |
| "learning_rate": 8.055601997947056e-06, |
| "loss": 0.4868, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3622119815668203, |
| "grad_norm": 0.4996345341205597, |
| "learning_rate": 8.04795436149348e-06, |
| "loss": 0.4908, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.36276497695852533, |
| "grad_norm": 0.48268017172813416, |
| "learning_rate": 8.04029536099439e-06, |
| "loss": 0.4975, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.3633179723502304, |
| "grad_norm": 0.5365729928016663, |
| "learning_rate": 8.032625025005736e-06, |
| "loss": 0.4891, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3638709677419355, |
| "grad_norm": 0.5524755716323853, |
| "learning_rate": 8.024943382125732e-06, |
| "loss": 0.4833, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.36442396313364056, |
| "grad_norm": 0.4499988257884979, |
| "learning_rate": 8.017250460994748e-06, |
| "loss": 0.4724, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3649769585253456, |
| "grad_norm": 0.44743797183036804, |
| "learning_rate": 8.0095462902952e-06, |
| "loss": 0.503, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3655299539170507, |
| "grad_norm": 0.502610445022583, |
| "learning_rate": 8.001830898751455e-06, |
| "loss": 0.4813, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.36608294930875573, |
| "grad_norm": 0.41622281074523926, |
| "learning_rate": 7.99410431512971e-06, |
| "loss": 0.5157, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.36663594470046085, |
| "grad_norm": 0.4399860203266144, |
| "learning_rate": 7.986366568237893e-06, |
| "loss": 0.4741, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3671889400921659, |
| "grad_norm": 0.43299001455307007, |
| "learning_rate": 7.97861768692555e-06, |
| "loss": 0.4727, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.36774193548387096, |
| "grad_norm": 0.44035786390304565, |
| "learning_rate": 7.970857700083747e-06, |
| "loss": 0.4662, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.368294930875576, |
| "grad_norm": 0.422536164522171, |
| "learning_rate": 7.96308663664495e-06, |
| "loss": 0.4969, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3688479262672811, |
| "grad_norm": 0.44590070843696594, |
| "learning_rate": 7.955304525582928e-06, |
| "loss": 0.4591, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3694009216589862, |
| "grad_norm": 0.43369054794311523, |
| "learning_rate": 7.947511395912635e-06, |
| "loss": 0.4941, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.36995391705069125, |
| "grad_norm": 0.40852341055870056, |
| "learning_rate": 7.939707276690111e-06, |
| "loss": 0.4859, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.3705069124423963, |
| "grad_norm": 0.3855592906475067, |
| "learning_rate": 7.93189219701237e-06, |
| "loss": 0.4834, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.37105990783410137, |
| "grad_norm": 0.40031880140304565, |
| "learning_rate": 7.924066186017288e-06, |
| "loss": 0.5146, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3716129032258065, |
| "grad_norm": 0.42813777923583984, |
| "learning_rate": 7.916229272883499e-06, |
| "loss": 0.4875, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.37216589861751154, |
| "grad_norm": 0.4341396689414978, |
| "learning_rate": 7.908381486830286e-06, |
| "loss": 0.4787, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.3727188940092166, |
| "grad_norm": 0.40330180525779724, |
| "learning_rate": 7.90052285711747e-06, |
| "loss": 0.4866, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.37327188940092165, |
| "grad_norm": 0.4432542026042938, |
| "learning_rate": 7.892653413045298e-06, |
| "loss": 0.4574, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3738248847926267, |
| "grad_norm": 0.4223710298538208, |
| "learning_rate": 7.884773183954344e-06, |
| "loss": 0.4619, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.3743778801843318, |
| "grad_norm": 0.39498183131217957, |
| "learning_rate": 7.876882199225388e-06, |
| "loss": 0.4717, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.3749308755760369, |
| "grad_norm": 0.4838169813156128, |
| "learning_rate": 7.868980488279316e-06, |
| "loss": 0.4756, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.37548387096774194, |
| "grad_norm": 0.3836471140384674, |
| "learning_rate": 7.861068080576998e-06, |
| "loss": 0.462, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.376036866359447, |
| "grad_norm": 0.503296971321106, |
| "learning_rate": 7.853145005619199e-06, |
| "loss": 0.4963, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.37658986175115206, |
| "grad_norm": 0.3729606568813324, |
| "learning_rate": 7.84521129294644e-06, |
| "loss": 0.4684, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.37714285714285717, |
| "grad_norm": 0.5140215158462524, |
| "learning_rate": 7.837266972138918e-06, |
| "loss": 0.5041, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.3776958525345622, |
| "grad_norm": 0.4366711676120758, |
| "learning_rate": 7.82931207281637e-06, |
| "loss": 0.5007, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3782488479262673, |
| "grad_norm": 0.43705347180366516, |
| "learning_rate": 7.821346624637984e-06, |
| "loss": 0.4988, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.37880184331797234, |
| "grad_norm": 0.4190782308578491, |
| "learning_rate": 7.813370657302273e-06, |
| "loss": 0.4815, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.3793548387096774, |
| "grad_norm": 0.43545421957969666, |
| "learning_rate": 7.80538420054697e-06, |
| "loss": 0.486, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.3799078341013825, |
| "grad_norm": 0.4012760818004608, |
| "learning_rate": 7.797387284148919e-06, |
| "loss": 0.456, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.38046082949308757, |
| "grad_norm": 0.45634332299232483, |
| "learning_rate": 7.789379937923958e-06, |
| "loss": 0.4827, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.38101382488479263, |
| "grad_norm": 0.45957717299461365, |
| "learning_rate": 7.781362191726818e-06, |
| "loss": 0.4976, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.3815668202764977, |
| "grad_norm": 0.4902954399585724, |
| "learning_rate": 7.773334075451e-06, |
| "loss": 0.4952, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.38211981566820274, |
| "grad_norm": 0.4607703387737274, |
| "learning_rate": 7.765295619028672e-06, |
| "loss": 0.4995, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.38267281105990786, |
| "grad_norm": 0.49557220935821533, |
| "learning_rate": 7.757246852430553e-06, |
| "loss": 0.4921, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.3832258064516129, |
| "grad_norm": 0.4447820782661438, |
| "learning_rate": 7.749187805665801e-06, |
| "loss": 0.4589, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.383778801843318, |
| "grad_norm": 0.4028923809528351, |
| "learning_rate": 7.741118508781906e-06, |
| "loss": 0.4732, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.38433179723502303, |
| "grad_norm": 0.4806511700153351, |
| "learning_rate": 7.733038991864576e-06, |
| "loss": 0.4968, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3848847926267281, |
| "grad_norm": 0.4322826862335205, |
| "learning_rate": 7.724949285037619e-06, |
| "loss": 0.4904, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.3854377880184332, |
| "grad_norm": 0.44291332364082336, |
| "learning_rate": 7.716849418462836e-06, |
| "loss": 0.4829, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.38599078341013826, |
| "grad_norm": 0.4092819094657898, |
| "learning_rate": 7.70873942233991e-06, |
| "loss": 0.4809, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3865437788018433, |
| "grad_norm": 0.4542578160762787, |
| "learning_rate": 7.700619326906288e-06, |
| "loss": 0.4713, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3870967741935484, |
| "grad_norm": 0.4618943929672241, |
| "learning_rate": 7.69248916243708e-06, |
| "loss": 0.4443, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.38764976958525343, |
| "grad_norm": 0.43175920844078064, |
| "learning_rate": 7.684348959244924e-06, |
| "loss": 0.4761, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.38820276497695855, |
| "grad_norm": 0.4224050045013428, |
| "learning_rate": 7.676198747679898e-06, |
| "loss": 0.4943, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.3887557603686636, |
| "grad_norm": 0.4592989683151245, |
| "learning_rate": 7.66803855812939e-06, |
| "loss": 0.4529, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.38930875576036866, |
| "grad_norm": 0.4457801878452301, |
| "learning_rate": 7.659868421017989e-06, |
| "loss": 0.4785, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.3898617511520737, |
| "grad_norm": 0.4617078900337219, |
| "learning_rate": 7.651688366807378e-06, |
| "loss": 0.4755, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.3904147465437788, |
| "grad_norm": 0.43977999687194824, |
| "learning_rate": 7.64349842599621e-06, |
| "loss": 0.5098, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.3909677419354839, |
| "grad_norm": 0.4340912699699402, |
| "learning_rate": 7.635298629120002e-06, |
| "loss": 0.4452, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.39152073732718895, |
| "grad_norm": 0.5204182863235474, |
| "learning_rate": 7.6270890067510196e-06, |
| "loss": 0.4677, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.392073732718894, |
| "grad_norm": 0.47488752007484436, |
| "learning_rate": 7.618869589498157e-06, |
| "loss": 0.465, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.39262672811059907, |
| "grad_norm": 0.4327450692653656, |
| "learning_rate": 7.610640408006832e-06, |
| "loss": 0.4945, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3931797235023041, |
| "grad_norm": 0.5072281360626221, |
| "learning_rate": 7.602401492958868e-06, |
| "loss": 0.4987, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.39373271889400924, |
| "grad_norm": 0.4239625632762909, |
| "learning_rate": 7.594152875072376e-06, |
| "loss": 0.4715, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.3942857142857143, |
| "grad_norm": 0.4698258936405182, |
| "learning_rate": 7.5858945851016455e-06, |
| "loss": 0.4779, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.39483870967741935, |
| "grad_norm": 0.42160850763320923, |
| "learning_rate": 7.577626653837028e-06, |
| "loss": 0.45, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3953917050691244, |
| "grad_norm": 0.4377395808696747, |
| "learning_rate": 7.5693491121048194e-06, |
| "loss": 0.5144, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.39594470046082947, |
| "grad_norm": 0.4012657403945923, |
| "learning_rate": 7.561061990767149e-06, |
| "loss": 0.4812, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3964976958525346, |
| "grad_norm": 0.41947153210639954, |
| "learning_rate": 7.552765320721865e-06, |
| "loss": 0.4407, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.39705069124423964, |
| "grad_norm": 0.48914283514022827, |
| "learning_rate": 7.544459132902409e-06, |
| "loss": 0.4689, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.3976036866359447, |
| "grad_norm": 0.3954436480998993, |
| "learning_rate": 7.5361434582777205e-06, |
| "loss": 0.4866, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.39815668202764976, |
| "grad_norm": 0.45554065704345703, |
| "learning_rate": 7.527818327852101e-06, |
| "loss": 0.4499, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3987096774193548, |
| "grad_norm": 0.3696426451206207, |
| "learning_rate": 7.519483772665113e-06, |
| "loss": 0.4963, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.3992626728110599, |
| "grad_norm": 0.42578935623168945, |
| "learning_rate": 7.511139823791452e-06, |
| "loss": 0.4642, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.399815668202765, |
| "grad_norm": 0.3758449852466583, |
| "learning_rate": 7.502786512340841e-06, |
| "loss": 0.516, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.40036866359447004, |
| "grad_norm": 0.4170108139514923, |
| "learning_rate": 7.494423869457912e-06, |
| "loss": 0.4824, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.4009216589861751, |
| "grad_norm": 0.3795914053916931, |
| "learning_rate": 7.4860519263220865e-06, |
| "loss": 0.4744, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.40147465437788016, |
| "grad_norm": 0.389635294675827, |
| "learning_rate": 7.477670714147461e-06, |
| "loss": 0.4957, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.40202764976958527, |
| "grad_norm": 0.44072335958480835, |
| "learning_rate": 7.469280264182689e-06, |
| "loss": 0.5172, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.40258064516129033, |
| "grad_norm": 0.372860312461853, |
| "learning_rate": 7.460880607710872e-06, |
| "loss": 0.4465, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.4031336405529954, |
| "grad_norm": 0.47300985455513, |
| "learning_rate": 7.452471776049432e-06, |
| "loss": 0.4636, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.40368663594470044, |
| "grad_norm": 0.44356971979141235, |
| "learning_rate": 7.444053800550004e-06, |
| "loss": 0.4911, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4042396313364055, |
| "grad_norm": 0.4640626311302185, |
| "learning_rate": 7.435626712598312e-06, |
| "loss": 0.4873, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.4047926267281106, |
| "grad_norm": 0.4652840197086334, |
| "learning_rate": 7.427190543614053e-06, |
| "loss": 0.4641, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.4053456221198157, |
| "grad_norm": 0.46554264426231384, |
| "learning_rate": 7.418745325050787e-06, |
| "loss": 0.4797, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.40589861751152073, |
| "grad_norm": 0.42912858724594116, |
| "learning_rate": 7.410291088395812e-06, |
| "loss": 0.5064, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.4064516129032258, |
| "grad_norm": 0.41365447640419006, |
| "learning_rate": 7.401827865170047e-06, |
| "loss": 0.4873, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.4070046082949309, |
| "grad_norm": 0.5490487813949585, |
| "learning_rate": 7.3933556869279235e-06, |
| "loss": 0.4583, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.40755760368663596, |
| "grad_norm": 0.39917993545532227, |
| "learning_rate": 7.384874585257255e-06, |
| "loss": 0.4625, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.408110599078341, |
| "grad_norm": 0.4784587621688843, |
| "learning_rate": 7.3763845917791245e-06, |
| "loss": 0.5073, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.4086635944700461, |
| "grad_norm": 0.4989032447338104, |
| "learning_rate": 7.367885738147773e-06, |
| "loss": 0.4652, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.40921658986175113, |
| "grad_norm": 0.4629876911640167, |
| "learning_rate": 7.359378056050472e-06, |
| "loss": 0.4601, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.40976958525345625, |
| "grad_norm": 0.4878072440624237, |
| "learning_rate": 7.35086157720741e-06, |
| "loss": 0.4713, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.4103225806451613, |
| "grad_norm": 0.49611517786979675, |
| "learning_rate": 7.3423363333715726e-06, |
| "loss": 0.4727, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.41087557603686636, |
| "grad_norm": 0.5412954688072205, |
| "learning_rate": 7.333802356328624e-06, |
| "loss": 0.4795, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.4114285714285714, |
| "grad_norm": 0.387992262840271, |
| "learning_rate": 7.3252596778967954e-06, |
| "loss": 0.492, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.4119815668202765, |
| "grad_norm": 0.47084909677505493, |
| "learning_rate": 7.316708329926754e-06, |
| "loss": 0.4546, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.4125345622119816, |
| "grad_norm": 0.5451803803443909, |
| "learning_rate": 7.308148344301491e-06, |
| "loss": 0.5053, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.41308755760368665, |
| "grad_norm": 0.4724251329898834, |
| "learning_rate": 7.2995797529362075e-06, |
| "loss": 0.4937, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.4136405529953917, |
| "grad_norm": 0.48225685954093933, |
| "learning_rate": 7.291002587778184e-06, |
| "loss": 0.4939, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.41419354838709677, |
| "grad_norm": 0.4945637881755829, |
| "learning_rate": 7.282416880806673e-06, |
| "loss": 0.4934, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.4147465437788018, |
| "grad_norm": 0.48181742429733276, |
| "learning_rate": 7.273822664032771e-06, |
| "loss": 0.4763, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.41529953917050694, |
| "grad_norm": 0.41117456555366516, |
| "learning_rate": 7.265219969499302e-06, |
| "loss": 0.4928, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.415852534562212, |
| "grad_norm": 0.5093626976013184, |
| "learning_rate": 7.256608829280705e-06, |
| "loss": 0.4563, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.41640552995391705, |
| "grad_norm": 0.45837345719337463, |
| "learning_rate": 7.2479892754828995e-06, |
| "loss": 0.4736, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.4169585253456221, |
| "grad_norm": 0.4117204546928406, |
| "learning_rate": 7.239361340243179e-06, |
| "loss": 0.4788, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.41751152073732717, |
| "grad_norm": 0.44084909558296204, |
| "learning_rate": 7.230725055730088e-06, |
| "loss": 0.4781, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.4180645161290323, |
| "grad_norm": 0.5877032279968262, |
| "learning_rate": 7.222080454143296e-06, |
| "loss": 0.4782, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.41861751152073734, |
| "grad_norm": 0.3853990137577057, |
| "learning_rate": 7.213427567713485e-06, |
| "loss": 0.4498, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.4191705069124424, |
| "grad_norm": 0.4723830819129944, |
| "learning_rate": 7.204766428702226e-06, |
| "loss": 0.4601, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.41972350230414746, |
| "grad_norm": 0.43980076909065247, |
| "learning_rate": 7.196097069401857e-06, |
| "loss": 0.4783, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.4202764976958525, |
| "grad_norm": 0.4152413606643677, |
| "learning_rate": 7.1874195221353706e-06, |
| "loss": 0.4873, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4208294930875576, |
| "grad_norm": 0.3723086714744568, |
| "learning_rate": 7.17873381925628e-06, |
| "loss": 0.4664, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.4213824884792627, |
| "grad_norm": 0.44318974018096924, |
| "learning_rate": 7.17003999314851e-06, |
| "loss": 0.4865, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.42193548387096774, |
| "grad_norm": 0.40795907378196716, |
| "learning_rate": 7.161338076226272e-06, |
| "loss": 0.4795, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.4224884792626728, |
| "grad_norm": 0.4073677659034729, |
| "learning_rate": 7.1526281009339426e-06, |
| "loss": 0.4815, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.42304147465437786, |
| "grad_norm": 0.4583486020565033, |
| "learning_rate": 7.143910099745944e-06, |
| "loss": 0.4872, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.42359447004608297, |
| "grad_norm": 0.4036937654018402, |
| "learning_rate": 7.13518410516662e-06, |
| "loss": 0.4616, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.42414746543778803, |
| "grad_norm": 0.42568910121917725, |
| "learning_rate": 7.126450149730122e-06, |
| "loss": 0.489, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.4247004608294931, |
| "grad_norm": 0.42301973700523376, |
| "learning_rate": 7.1177082660002784e-06, |
| "loss": 0.4685, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.42525345622119815, |
| "grad_norm": 0.45005002617836, |
| "learning_rate": 7.108958486570479e-06, |
| "loss": 0.4555, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.4258064516129032, |
| "grad_norm": 0.45795372128486633, |
| "learning_rate": 7.1002008440635515e-06, |
| "loss": 0.4678, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.4263594470046083, |
| "grad_norm": 0.41580426692962646, |
| "learning_rate": 7.091435371131642e-06, |
| "loss": 0.5058, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.4269124423963134, |
| "grad_norm": 0.4396505355834961, |
| "learning_rate": 7.082662100456089e-06, |
| "loss": 0.492, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.42746543778801843, |
| "grad_norm": 0.4275103211402893, |
| "learning_rate": 7.073881064747308e-06, |
| "loss": 0.4624, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.4280184331797235, |
| "grad_norm": 0.37376993894577026, |
| "learning_rate": 7.0650922967446634e-06, |
| "loss": 0.4626, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.40532317757606506, |
| "learning_rate": 7.056295829216347e-06, |
| "loss": 0.5042, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.42912442396313366, |
| "grad_norm": 0.39862802624702454, |
| "learning_rate": 7.047491694959263e-06, |
| "loss": 0.4398, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.4296774193548387, |
| "grad_norm": 0.4326246380805969, |
| "learning_rate": 7.038679926798895e-06, |
| "loss": 0.4902, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.4302304147465438, |
| "grad_norm": 0.43664461374282837, |
| "learning_rate": 7.029860557589191e-06, |
| "loss": 0.4817, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.43078341013824883, |
| "grad_norm": 0.4556635320186615, |
| "learning_rate": 7.02103362021244e-06, |
| "loss": 0.4825, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.4313364055299539, |
| "grad_norm": 0.40778669714927673, |
| "learning_rate": 7.012199147579146e-06, |
| "loss": 0.496, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.431889400921659, |
| "grad_norm": 0.41946834325790405, |
| "learning_rate": 7.00335717262791e-06, |
| "loss": 0.482, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.43244239631336406, |
| "grad_norm": 0.4511401653289795, |
| "learning_rate": 6.994507728325304e-06, |
| "loss": 0.4763, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.4329953917050691, |
| "grad_norm": 0.43290090560913086, |
| "learning_rate": 6.985650847665747e-06, |
| "loss": 0.4804, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.4335483870967742, |
| "grad_norm": 0.4241175949573517, |
| "learning_rate": 6.976786563671386e-06, |
| "loss": 0.4776, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.43410138248847924, |
| "grad_norm": 0.4226067364215851, |
| "learning_rate": 6.967914909391971e-06, |
| "loss": 0.4839, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.43465437788018435, |
| "grad_norm": 0.39866408705711365, |
| "learning_rate": 6.959035917904728e-06, |
| "loss": 0.4596, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.4352073732718894, |
| "grad_norm": 0.37730705738067627, |
| "learning_rate": 6.950149622314245e-06, |
| "loss": 0.4784, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.43576036866359447, |
| "grad_norm": 0.41297316551208496, |
| "learning_rate": 6.941256055752337e-06, |
| "loss": 0.4887, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.4363133640552995, |
| "grad_norm": 0.4532250761985779, |
| "learning_rate": 6.932355251377932e-06, |
| "loss": 0.4897, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.4368663594470046, |
| "grad_norm": 0.4239371418952942, |
| "learning_rate": 6.923447242376942e-06, |
| "loss": 0.4873, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4374193548387097, |
| "grad_norm": 0.33959221839904785, |
| "learning_rate": 6.91453206196214e-06, |
| "loss": 0.4697, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.43797235023041475, |
| "grad_norm": 0.4459823668003082, |
| "learning_rate": 6.90560974337304e-06, |
| "loss": 0.4799, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.4385253456221198, |
| "grad_norm": 0.4143071174621582, |
| "learning_rate": 6.896680319875767e-06, |
| "loss": 0.4556, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.43907834101382487, |
| "grad_norm": 0.4591521620750427, |
| "learning_rate": 6.887743824762937e-06, |
| "loss": 0.4841, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.4396313364055299, |
| "grad_norm": 0.41245412826538086, |
| "learning_rate": 6.878800291353533e-06, |
| "loss": 0.5083, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.44018433179723504, |
| "grad_norm": 0.4207186996936798, |
| "learning_rate": 6.8698497529927784e-06, |
| "loss": 0.4653, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.4407373271889401, |
| "grad_norm": 0.42010927200317383, |
| "learning_rate": 6.860892243052015e-06, |
| "loss": 0.4595, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.44129032258064516, |
| "grad_norm": 0.41488680243492126, |
| "learning_rate": 6.851927794928578e-06, |
| "loss": 0.4685, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.4418433179723502, |
| "grad_norm": 0.43417924642562866, |
| "learning_rate": 6.842956442045668e-06, |
| "loss": 0.4753, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.4423963133640553, |
| "grad_norm": 0.35748791694641113, |
| "learning_rate": 6.833978217852233e-06, |
| "loss": 0.4677, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4429493087557604, |
| "grad_norm": 0.4993172883987427, |
| "learning_rate": 6.824993155822838e-06, |
| "loss": 0.4729, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.44350230414746544, |
| "grad_norm": 0.36592617630958557, |
| "learning_rate": 6.816001289457542e-06, |
| "loss": 0.4565, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.4440552995391705, |
| "grad_norm": 0.3910996615886688, |
| "learning_rate": 6.807002652281777e-06, |
| "loss": 0.4619, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.44460829493087556, |
| "grad_norm": 0.41243669390678406, |
| "learning_rate": 6.797997277846215e-06, |
| "loss": 0.46, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.44516129032258067, |
| "grad_norm": 0.3570329546928406, |
| "learning_rate": 6.788985199726648e-06, |
| "loss": 0.4624, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.44571428571428573, |
| "grad_norm": 0.4660005271434784, |
| "learning_rate": 6.779966451523866e-06, |
| "loss": 0.494, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.4462672811059908, |
| "grad_norm": 0.38170328736305237, |
| "learning_rate": 6.770941066863523e-06, |
| "loss": 0.4848, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.44682027649769585, |
| "grad_norm": 0.410656601190567, |
| "learning_rate": 6.761909079396018e-06, |
| "loss": 0.4783, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.4473732718894009, |
| "grad_norm": 0.39199960231781006, |
| "learning_rate": 6.752870522796372e-06, |
| "loss": 0.4706, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.447926267281106, |
| "grad_norm": 0.4048680067062378, |
| "learning_rate": 6.743825430764091e-06, |
| "loss": 0.4841, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4484792626728111, |
| "grad_norm": 0.3699415922164917, |
| "learning_rate": 6.734773837023054e-06, |
| "loss": 0.4479, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.44903225806451613, |
| "grad_norm": 0.3935936391353607, |
| "learning_rate": 6.725715775321379e-06, |
| "loss": 0.4802, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.4495852534562212, |
| "grad_norm": 0.39852485060691833, |
| "learning_rate": 6.7166512794312986e-06, |
| "loss": 0.4652, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.45013824884792625, |
| "grad_norm": 0.38947340846061707, |
| "learning_rate": 6.707580383149035e-06, |
| "loss": 0.4925, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.45069124423963136, |
| "grad_norm": 0.4168023467063904, |
| "learning_rate": 6.698503120294674e-06, |
| "loss": 0.4829, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.4512442396313364, |
| "grad_norm": 0.39369869232177734, |
| "learning_rate": 6.6894195247120396e-06, |
| "loss": 0.4562, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.4517972350230415, |
| "grad_norm": 0.3529163599014282, |
| "learning_rate": 6.680329630268565e-06, |
| "loss": 0.4628, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.45235023041474653, |
| "grad_norm": 0.3897452652454376, |
| "learning_rate": 6.671233470855171e-06, |
| "loss": 0.4635, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.4529032258064516, |
| "grad_norm": 0.4104883074760437, |
| "learning_rate": 6.662131080386132e-06, |
| "loss": 0.4716, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.4534562211981567, |
| "grad_norm": 0.39856767654418945, |
| "learning_rate": 6.653022492798959e-06, |
| "loss": 0.4666, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.45400921658986176, |
| "grad_norm": 0.3940970301628113, |
| "learning_rate": 6.643907742054267e-06, |
| "loss": 0.4899, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.4545622119815668, |
| "grad_norm": 0.4091474413871765, |
| "learning_rate": 6.63478686213565e-06, |
| "loss": 0.4829, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.4551152073732719, |
| "grad_norm": 0.4372076690196991, |
| "learning_rate": 6.625659887049554e-06, |
| "loss": 0.4872, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.45566820276497694, |
| "grad_norm": 0.4298078417778015, |
| "learning_rate": 6.6165268508251455e-06, |
| "loss": 0.4748, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.45622119815668205, |
| "grad_norm": 0.4123404622077942, |
| "learning_rate": 6.607387787514199e-06, |
| "loss": 0.4784, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.4567741935483871, |
| "grad_norm": 0.48749467730522156, |
| "learning_rate": 6.598242731190955e-06, |
| "loss": 0.4617, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.45732718894009217, |
| "grad_norm": 0.44510942697525024, |
| "learning_rate": 6.589091715951996e-06, |
| "loss": 0.5012, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.4578801843317972, |
| "grad_norm": 0.4342193603515625, |
| "learning_rate": 6.579934775916128e-06, |
| "loss": 0.4686, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.4584331797235023, |
| "grad_norm": 0.46518048644065857, |
| "learning_rate": 6.570771945224243e-06, |
| "loss": 0.4695, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.4589861751152074, |
| "grad_norm": 0.4383966326713562, |
| "learning_rate": 6.561603258039195e-06, |
| "loss": 0.4719, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.45953917050691245, |
| "grad_norm": 0.41184401512145996, |
| "learning_rate": 6.552428748545677e-06, |
| "loss": 0.4789, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.4600921658986175, |
| "grad_norm": 0.4380422830581665, |
| "learning_rate": 6.543248450950087e-06, |
| "loss": 0.4907, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.46064516129032257, |
| "grad_norm": 0.453457236289978, |
| "learning_rate": 6.534062399480405e-06, |
| "loss": 0.4747, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4611981566820276, |
| "grad_norm": 0.36671724915504456, |
| "learning_rate": 6.524870628386064e-06, |
| "loss": 0.4905, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.46175115207373274, |
| "grad_norm": 0.42992615699768066, |
| "learning_rate": 6.515673171937822e-06, |
| "loss": 0.4634, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.4623041474654378, |
| "grad_norm": 0.40917208790779114, |
| "learning_rate": 6.5064700644276315e-06, |
| "loss": 0.4661, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.46285714285714286, |
| "grad_norm": 0.4510115385055542, |
| "learning_rate": 6.497261340168519e-06, |
| "loss": 0.4504, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.4634101382488479, |
| "grad_norm": 0.45596301555633545, |
| "learning_rate": 6.4880470334944515e-06, |
| "loss": 0.4551, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.46396313364055297, |
| "grad_norm": 0.4473353624343872, |
| "learning_rate": 6.478827178760205e-06, |
| "loss": 0.4834, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.4645161290322581, |
| "grad_norm": 0.4285813271999359, |
| "learning_rate": 6.469601810341247e-06, |
| "loss": 0.498, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.46506912442396314, |
| "grad_norm": 0.38439249992370605, |
| "learning_rate": 6.4603709626336e-06, |
| "loss": 0.4754, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.4656221198156682, |
| "grad_norm": 0.3741195499897003, |
| "learning_rate": 6.451134670053716e-06, |
| "loss": 0.4887, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.46617511520737326, |
| "grad_norm": 0.4147772789001465, |
| "learning_rate": 6.441892967038346e-06, |
| "loss": 0.5023, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.4667281105990783, |
| "grad_norm": 0.3958702087402344, |
| "learning_rate": 6.4326458880444155e-06, |
| "loss": 0.507, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.46728110599078343, |
| "grad_norm": 0.4042072892189026, |
| "learning_rate": 6.423393467548893e-06, |
| "loss": 0.444, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.4678341013824885, |
| "grad_norm": 0.387441486120224, |
| "learning_rate": 6.414135740048662e-06, |
| "loss": 0.4446, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.46838709677419355, |
| "grad_norm": 0.4227030277252197, |
| "learning_rate": 6.404872740060392e-06, |
| "loss": 0.5104, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.4689400921658986, |
| "grad_norm": 0.3976932764053345, |
| "learning_rate": 6.395604502120412e-06, |
| "loss": 0.4686, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.46949308755760366, |
| "grad_norm": 0.41935819387435913, |
| "learning_rate": 6.386331060784583e-06, |
| "loss": 0.4802, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.4700460829493088, |
| "grad_norm": 0.3910142183303833, |
| "learning_rate": 6.377052450628159e-06, |
| "loss": 0.465, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.47059907834101383, |
| "grad_norm": 0.3708650469779968, |
| "learning_rate": 6.367768706245674e-06, |
| "loss": 0.4844, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.4711520737327189, |
| "grad_norm": 0.39117151498794556, |
| "learning_rate": 6.358479862250796e-06, |
| "loss": 0.4826, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.47170506912442395, |
| "grad_norm": 0.36844393610954285, |
| "learning_rate": 6.349185953276216e-06, |
| "loss": 0.4531, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.472258064516129, |
| "grad_norm": 0.3579118549823761, |
| "learning_rate": 6.339887013973498e-06, |
| "loss": 0.4579, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.4728110599078341, |
| "grad_norm": 0.4207668900489807, |
| "learning_rate": 6.330583079012972e-06, |
| "loss": 0.4769, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.4733640552995392, |
| "grad_norm": 0.37506356835365295, |
| "learning_rate": 6.321274183083588e-06, |
| "loss": 0.4592, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.47391705069124423, |
| "grad_norm": 0.4109135568141937, |
| "learning_rate": 6.3119603608927914e-06, |
| "loss": 0.4721, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.4744700460829493, |
| "grad_norm": 0.4288492202758789, |
| "learning_rate": 6.302641647166402e-06, |
| "loss": 0.4766, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.47502304147465435, |
| "grad_norm": 0.37833884358406067, |
| "learning_rate": 6.293318076648469e-06, |
| "loss": 0.4779, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.47557603686635946, |
| "grad_norm": 0.4019952714443207, |
| "learning_rate": 6.283989684101155e-06, |
| "loss": 0.487, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4761290322580645, |
| "grad_norm": 0.45245492458343506, |
| "learning_rate": 6.274656504304598e-06, |
| "loss": 0.4582, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.4766820276497696, |
| "grad_norm": 0.4037781357765198, |
| "learning_rate": 6.265318572056787e-06, |
| "loss": 0.4536, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.47723502304147464, |
| "grad_norm": 0.4167582392692566, |
| "learning_rate": 6.25597592217343e-06, |
| "loss": 0.4737, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.47778801843317975, |
| "grad_norm": 0.38921552896499634, |
| "learning_rate": 6.2466285894878206e-06, |
| "loss": 0.4889, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4783410138248848, |
| "grad_norm": 0.3599966764450073, |
| "learning_rate": 6.2372766088507195e-06, |
| "loss": 0.4421, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.47889400921658987, |
| "grad_norm": 0.440920889377594, |
| "learning_rate": 6.227920015130209e-06, |
| "loss": 0.4959, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.4794470046082949, |
| "grad_norm": 0.39445987343788147, |
| "learning_rate": 6.218558843211576e-06, |
| "loss": 0.4784, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.4886539578437805, |
| "learning_rate": 6.209193127997173e-06, |
| "loss": 0.4702, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.4805529953917051, |
| "grad_norm": 0.43818390369415283, |
| "learning_rate": 6.199822904406295e-06, |
| "loss": 0.4685, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.48110599078341015, |
| "grad_norm": 0.4038926064968109, |
| "learning_rate": 6.190448207375046e-06, |
| "loss": 0.4572, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4816589861751152, |
| "grad_norm": 0.5002937912940979, |
| "learning_rate": 6.1810690718562065e-06, |
| "loss": 0.4899, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.48221198156682027, |
| "grad_norm": 0.39266350865364075, |
| "learning_rate": 6.1716855328191064e-06, |
| "loss": 0.4682, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.4827649769585253, |
| "grad_norm": 0.4712425470352173, |
| "learning_rate": 6.162297625249494e-06, |
| "loss": 0.473, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.48331797235023044, |
| "grad_norm": 0.33758747577667236, |
| "learning_rate": 6.1529053841494065e-06, |
| "loss": 0.4771, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.4838709677419355, |
| "grad_norm": 0.442381888628006, |
| "learning_rate": 6.143508844537038e-06, |
| "loss": 0.4834, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.48442396313364056, |
| "grad_norm": 0.43109333515167236, |
| "learning_rate": 6.134108041446609e-06, |
| "loss": 0.4717, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.4849769585253456, |
| "grad_norm": 0.4240846037864685, |
| "learning_rate": 6.124703009928233e-06, |
| "loss": 0.491, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.48552995391705067, |
| "grad_norm": 0.41452300548553467, |
| "learning_rate": 6.115293785047793e-06, |
| "loss": 0.4789, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.4860829493087558, |
| "grad_norm": 0.397856742143631, |
| "learning_rate": 6.105880401886804e-06, |
| "loss": 0.4655, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.48663594470046084, |
| "grad_norm": 0.3984890878200531, |
| "learning_rate": 6.096462895542288e-06, |
| "loss": 0.4864, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4871889400921659, |
| "grad_norm": 0.4527179002761841, |
| "learning_rate": 6.087041301126636e-06, |
| "loss": 0.4884, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.48774193548387096, |
| "grad_norm": 0.4393288791179657, |
| "learning_rate": 6.077615653767484e-06, |
| "loss": 0.45, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.488294930875576, |
| "grad_norm": 0.4285862445831299, |
| "learning_rate": 6.068185988607575e-06, |
| "loss": 0.4858, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.48884792626728113, |
| "grad_norm": 0.4243047833442688, |
| "learning_rate": 6.058752340804639e-06, |
| "loss": 0.4801, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.4894009216589862, |
| "grad_norm": 0.37787267565727234, |
| "learning_rate": 6.0493147455312475e-06, |
| "loss": 0.4584, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.48995391705069125, |
| "grad_norm": 0.4291735589504242, |
| "learning_rate": 6.039873237974695e-06, |
| "loss": 0.5038, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4905069124423963, |
| "grad_norm": 0.3942468464374542, |
| "learning_rate": 6.03042785333686e-06, |
| "loss": 0.4805, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.49105990783410136, |
| "grad_norm": 0.42691728472709656, |
| "learning_rate": 6.020978626834077e-06, |
| "loss": 0.4587, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.4916129032258065, |
| "grad_norm": 0.3891282379627228, |
| "learning_rate": 6.0115255936970056e-06, |
| "loss": 0.4756, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.49216589861751153, |
| "grad_norm": 0.39589089155197144, |
| "learning_rate": 6.002068789170497e-06, |
| "loss": 0.4796, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4927188940092166, |
| "grad_norm": 0.4499064087867737, |
| "learning_rate": 5.992608248513465e-06, |
| "loss": 0.4667, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.49327188940092165, |
| "grad_norm": 0.4281262457370758, |
| "learning_rate": 5.983144006998751e-06, |
| "loss": 0.4762, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4938248847926267, |
| "grad_norm": 0.4095862805843353, |
| "learning_rate": 5.973676099912996e-06, |
| "loss": 0.4691, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4943778801843318, |
| "grad_norm": 0.4401033818721771, |
| "learning_rate": 5.964204562556508e-06, |
| "loss": 0.4688, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.4949308755760369, |
| "grad_norm": 0.41105031967163086, |
| "learning_rate": 5.954729430243129e-06, |
| "loss": 0.4877, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.49548387096774194, |
| "grad_norm": 0.39041486382484436, |
| "learning_rate": 5.945250738300108e-06, |
| "loss": 0.4504, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.496036866359447, |
| "grad_norm": 0.4423321485519409, |
| "learning_rate": 5.935768522067962e-06, |
| "loss": 0.4858, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.49658986175115205, |
| "grad_norm": 0.4127327799797058, |
| "learning_rate": 5.9262828169003476e-06, |
| "loss": 0.4767, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.49714285714285716, |
| "grad_norm": 0.41609784960746765, |
| "learning_rate": 5.9167936581639325e-06, |
| "loss": 0.4901, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.4976958525345622, |
| "grad_norm": 0.3699811100959778, |
| "learning_rate": 5.9073010812382595e-06, |
| "loss": 0.4595, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4982488479262673, |
| "grad_norm": 0.45880934596061707, |
| "learning_rate": 5.897805121515616e-06, |
| "loss": 0.4514, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.49880184331797234, |
| "grad_norm": 0.38559094071388245, |
| "learning_rate": 5.888305814400901e-06, |
| "loss": 0.4871, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.4993548387096774, |
| "grad_norm": 0.40912917256355286, |
| "learning_rate": 5.878803195311496e-06, |
| "loss": 0.4726, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.4999078341013825, |
| "grad_norm": 0.4119787812232971, |
| "learning_rate": 5.869297299677128e-06, |
| "loss": 0.459, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.5004608294930876, |
| "grad_norm": 0.4300439953804016, |
| "learning_rate": 5.8597881629397435e-06, |
| "loss": 0.4764, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.5010138248847926, |
| "grad_norm": 0.4216427803039551, |
| "learning_rate": 5.850275820553368e-06, |
| "loss": 0.4795, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.5015668202764977, |
| "grad_norm": 0.578460693359375, |
| "learning_rate": 5.840760307983988e-06, |
| "loss": 0.4463, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.5021198156682027, |
| "grad_norm": 0.38813382387161255, |
| "learning_rate": 5.831241660709402e-06, |
| "loss": 0.479, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.5026728110599078, |
| "grad_norm": 0.4912916123867035, |
| "learning_rate": 5.8217199142190975e-06, |
| "loss": 0.4846, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.5032258064516129, |
| "grad_norm": 0.4877387285232544, |
| "learning_rate": 5.812195104014119e-06, |
| "loss": 0.5051, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.503778801843318, |
| "grad_norm": 0.4228856861591339, |
| "learning_rate": 5.802667265606933e-06, |
| "loss": 0.4853, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.5043317972350231, |
| "grad_norm": 0.4995596706867218, |
| "learning_rate": 5.793136434521296e-06, |
| "loss": 0.4603, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.5048847926267281, |
| "grad_norm": 0.5033881664276123, |
| "learning_rate": 5.783602646292123e-06, |
| "loss": 0.4793, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.5054377880184332, |
| "grad_norm": 0.41643261909484863, |
| "learning_rate": 5.774065936465352e-06, |
| "loss": 0.4744, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.5059907834101383, |
| "grad_norm": 0.4517223834991455, |
| "learning_rate": 5.764526340597818e-06, |
| "loss": 0.4659, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.5065437788018433, |
| "grad_norm": 0.3723732531070709, |
| "learning_rate": 5.754983894257114e-06, |
| "loss": 0.4727, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.5070967741935484, |
| "grad_norm": 0.46375709772109985, |
| "learning_rate": 5.745438633021462e-06, |
| "loss": 0.4977, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.5076497695852534, |
| "grad_norm": 0.42543715238571167, |
| "learning_rate": 5.7358905924795775e-06, |
| "loss": 0.4931, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.5082027649769585, |
| "grad_norm": 0.3639947772026062, |
| "learning_rate": 5.726339808230539e-06, |
| "loss": 0.4608, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.5087557603686635, |
| "grad_norm": 0.47503146529197693, |
| "learning_rate": 5.716786315883657e-06, |
| "loss": 0.4746, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5093087557603687, |
| "grad_norm": 0.4439897835254669, |
| "learning_rate": 5.707230151058334e-06, |
| "loss": 0.47, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.5098617511520738, |
| "grad_norm": 0.43655335903167725, |
| "learning_rate": 5.697671349383939e-06, |
| "loss": 0.4668, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.5104147465437788, |
| "grad_norm": 0.4014508128166199, |
| "learning_rate": 5.688109946499676e-06, |
| "loss": 0.4537, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.5109677419354839, |
| "grad_norm": 0.4049840271472931, |
| "learning_rate": 5.678545978054443e-06, |
| "loss": 0.4713, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.511520737327189, |
| "grad_norm": 0.4141314923763275, |
| "learning_rate": 5.668979479706703e-06, |
| "loss": 0.4882, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.512073732718894, |
| "grad_norm": 0.4375897943973541, |
| "learning_rate": 5.659410487124355e-06, |
| "loss": 0.4625, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.5126267281105991, |
| "grad_norm": 0.3951698839664459, |
| "learning_rate": 5.649839035984597e-06, |
| "loss": 0.4841, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.5131797235023041, |
| "grad_norm": 0.367379367351532, |
| "learning_rate": 5.6402651619737865e-06, |
| "loss": 0.4818, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.5137327188940092, |
| "grad_norm": 0.3982902765274048, |
| "learning_rate": 5.630688900787326e-06, |
| "loss": 0.4835, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 0.41130173206329346, |
| "learning_rate": 5.621110288129509e-06, |
| "loss": 0.4592, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5148387096774194, |
| "grad_norm": 0.4003121554851532, |
| "learning_rate": 5.6115293597134015e-06, |
| "loss": 0.4511, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.5153917050691245, |
| "grad_norm": 0.3777535855770111, |
| "learning_rate": 5.601946151260702e-06, |
| "loss": 0.4943, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.5159447004608295, |
| "grad_norm": 0.46429774165153503, |
| "learning_rate": 5.592360698501609e-06, |
| "loss": 0.4709, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.5164976958525346, |
| "grad_norm": 0.38206857442855835, |
| "learning_rate": 5.58277303717469e-06, |
| "loss": 0.4817, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.5170506912442396, |
| "grad_norm": 0.39985257387161255, |
| "learning_rate": 5.573183203026747e-06, |
| "loss": 0.4646, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.5176036866359447, |
| "grad_norm": 0.4025905728340149, |
| "learning_rate": 5.563591231812683e-06, |
| "loss": 0.471, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.5181566820276498, |
| "grad_norm": 0.41740676760673523, |
| "learning_rate": 5.553997159295366e-06, |
| "loss": 0.4798, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.5187096774193548, |
| "grad_norm": 0.395632266998291, |
| "learning_rate": 5.544401021245505e-06, |
| "loss": 0.4905, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.5192626728110599, |
| "grad_norm": 0.3581371605396271, |
| "learning_rate": 5.534802853441503e-06, |
| "loss": 0.4563, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.5198156682027649, |
| "grad_norm": 0.3925972282886505, |
| "learning_rate": 5.525202691669335e-06, |
| "loss": 0.4553, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5203686635944701, |
| "grad_norm": 0.3491620421409607, |
| "learning_rate": 5.515600571722412e-06, |
| "loss": 0.4602, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.5209216589861752, |
| "grad_norm": 0.37922897934913635, |
| "learning_rate": 5.505996529401442e-06, |
| "loss": 0.4588, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.5214746543778802, |
| "grad_norm": 0.37720414996147156, |
| "learning_rate": 5.496390600514298e-06, |
| "loss": 0.4688, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.5220276497695853, |
| "grad_norm": 0.38247910141944885, |
| "learning_rate": 5.486782820875895e-06, |
| "loss": 0.4619, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.5225806451612903, |
| "grad_norm": 0.3531082570552826, |
| "learning_rate": 5.477173226308042e-06, |
| "loss": 0.4611, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.5231336405529954, |
| "grad_norm": 0.35197713971138, |
| "learning_rate": 5.4675618526393185e-06, |
| "loss": 0.483, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.5236866359447004, |
| "grad_norm": 0.42967379093170166, |
| "learning_rate": 5.457948735704933e-06, |
| "loss": 0.469, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.5242396313364055, |
| "grad_norm": 0.41465815901756287, |
| "learning_rate": 5.448333911346598e-06, |
| "loss": 0.4792, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.5247926267281106, |
| "grad_norm": 0.385215163230896, |
| "learning_rate": 5.43871741541239e-06, |
| "loss": 0.4789, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.5253456221198156, |
| "grad_norm": 0.39359477162361145, |
| "learning_rate": 5.429099283756618e-06, |
| "loss": 0.4356, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5258986175115208, |
| "grad_norm": 0.37385135889053345, |
| "learning_rate": 5.419479552239689e-06, |
| "loss": 0.4713, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.5264516129032258, |
| "grad_norm": 0.40417802333831787, |
| "learning_rate": 5.4098582567279755e-06, |
| "loss": 0.4714, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.5270046082949309, |
| "grad_norm": 0.415487140417099, |
| "learning_rate": 5.400235433093682e-06, |
| "loss": 0.4696, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.527557603686636, |
| "grad_norm": 0.40797826647758484, |
| "learning_rate": 5.39061111721471e-06, |
| "loss": 0.4861, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.528110599078341, |
| "grad_norm": 0.4314478039741516, |
| "learning_rate": 5.380985344974524e-06, |
| "loss": 0.4885, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.5286635944700461, |
| "grad_norm": 0.3722938895225525, |
| "learning_rate": 5.371358152262019e-06, |
| "loss": 0.4576, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.5292165898617511, |
| "grad_norm": 0.4123469293117523, |
| "learning_rate": 5.361729574971387e-06, |
| "loss": 0.4657, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.5297695852534562, |
| "grad_norm": 0.4114021956920624, |
| "learning_rate": 5.352099649001979e-06, |
| "loss": 0.4705, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.5303225806451612, |
| "grad_norm": 0.38689500093460083, |
| "learning_rate": 5.3424684102581795e-06, |
| "loss": 0.4661, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.5308755760368664, |
| "grad_norm": 0.4139274060726166, |
| "learning_rate": 5.3328358946492634e-06, |
| "loss": 0.4721, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5314285714285715, |
| "grad_norm": 0.3889627754688263, |
| "learning_rate": 5.323202138089266e-06, |
| "loss": 0.4632, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.5319815668202765, |
| "grad_norm": 0.3822690546512604, |
| "learning_rate": 5.313567176496854e-06, |
| "loss": 0.4522, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.5325345622119816, |
| "grad_norm": 0.4444742798805237, |
| "learning_rate": 5.303931045795184e-06, |
| "loss": 0.4627, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.5330875576036866, |
| "grad_norm": 0.4218832552433014, |
| "learning_rate": 5.294293781911769e-06, |
| "loss": 0.4762, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.5336405529953917, |
| "grad_norm": 0.40190473198890686, |
| "learning_rate": 5.284655420778353e-06, |
| "loss": 0.4399, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.5341935483870968, |
| "grad_norm": 0.44916805624961853, |
| "learning_rate": 5.275015998330765e-06, |
| "loss": 0.4457, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.5347465437788018, |
| "grad_norm": 0.4550216794013977, |
| "learning_rate": 5.265375550508793e-06, |
| "loss": 0.4719, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.5352995391705069, |
| "grad_norm": 0.42324307560920715, |
| "learning_rate": 5.255734113256051e-06, |
| "loss": 0.46, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.5358525345622119, |
| "grad_norm": 0.3758416473865509, |
| "learning_rate": 5.246091722519839e-06, |
| "loss": 0.4441, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.5364055299539171, |
| "grad_norm": 0.37716051936149597, |
| "learning_rate": 5.236448414251012e-06, |
| "loss": 0.4643, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5369585253456222, |
| "grad_norm": 0.3788035809993744, |
| "learning_rate": 5.226804224403846e-06, |
| "loss": 0.4592, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.5375115207373272, |
| "grad_norm": 0.38398197293281555, |
| "learning_rate": 5.217159188935907e-06, |
| "loss": 0.4744, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.5380645161290323, |
| "grad_norm": 0.3767092823982239, |
| "learning_rate": 5.207513343807907e-06, |
| "loss": 0.4698, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.5386175115207373, |
| "grad_norm": 0.39843782782554626, |
| "learning_rate": 5.1978667249835855e-06, |
| "loss": 0.4803, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.5391705069124424, |
| "grad_norm": 0.38011598587036133, |
| "learning_rate": 5.18821936842956e-06, |
| "loss": 0.4398, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.5397235023041475, |
| "grad_norm": 0.36936551332473755, |
| "learning_rate": 5.1785713101152e-06, |
| "loss": 0.4662, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.5402764976958525, |
| "grad_norm": 0.3832260072231293, |
| "learning_rate": 5.168922586012495e-06, |
| "loss": 0.488, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.5408294930875576, |
| "grad_norm": 0.38859879970550537, |
| "learning_rate": 5.159273232095911e-06, |
| "loss": 0.4982, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.5413824884792626, |
| "grad_norm": 0.4032990038394928, |
| "learning_rate": 5.1496232843422665e-06, |
| "loss": 0.4919, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.5419354838709678, |
| "grad_norm": 0.3422912061214447, |
| "learning_rate": 5.139972778730593e-06, |
| "loss": 0.4658, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5424884792626729, |
| "grad_norm": 0.38321739435195923, |
| "learning_rate": 5.130321751242002e-06, |
| "loss": 0.4738, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.5430414746543779, |
| "grad_norm": 0.3506300151348114, |
| "learning_rate": 5.12067023785955e-06, |
| "loss": 0.4612, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.543594470046083, |
| "grad_norm": 0.3691524565219879, |
| "learning_rate": 5.111018274568106e-06, |
| "loss": 0.4461, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.544147465437788, |
| "grad_norm": 0.37267613410949707, |
| "learning_rate": 5.101365897354215e-06, |
| "loss": 0.4891, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.5447004608294931, |
| "grad_norm": 0.3920471668243408, |
| "learning_rate": 5.091713142205967e-06, |
| "loss": 0.4645, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.5452534562211981, |
| "grad_norm": 0.37700968980789185, |
| "learning_rate": 5.082060045112862e-06, |
| "loss": 0.4721, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.5458064516129032, |
| "grad_norm": 0.4021558165550232, |
| "learning_rate": 5.072406642065671e-06, |
| "loss": 0.4825, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.5463594470046083, |
| "grad_norm": 0.40953153371810913, |
| "learning_rate": 5.062752969056311e-06, |
| "loss": 0.4478, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.5469124423963133, |
| "grad_norm": 0.4201180934906006, |
| "learning_rate": 5.0530990620777e-06, |
| "loss": 0.4405, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.5474654377880185, |
| "grad_norm": 0.42839857935905457, |
| "learning_rate": 5.0434449571236314e-06, |
| "loss": 0.4774, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5480184331797235, |
| "grad_norm": 0.40306076407432556, |
| "learning_rate": 5.033790690188637e-06, |
| "loss": 0.475, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.5485714285714286, |
| "grad_norm": 0.3931422829627991, |
| "learning_rate": 5.024136297267851e-06, |
| "loss": 0.4813, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.5491244239631337, |
| "grad_norm": 0.41347619891166687, |
| "learning_rate": 5.01448181435688e-06, |
| "loss": 0.4726, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.5496774193548387, |
| "grad_norm": 0.41298919916152954, |
| "learning_rate": 5.004827277451661e-06, |
| "loss": 0.4581, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.5502304147465438, |
| "grad_norm": 0.3852965235710144, |
| "learning_rate": 4.9951727225483395e-06, |
| "loss": 0.4638, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.5507834101382488, |
| "grad_norm": 0.37419193983078003, |
| "learning_rate": 4.985518185643122e-06, |
| "loss": 0.4675, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.5513364055299539, |
| "grad_norm": 0.36649253964424133, |
| "learning_rate": 4.975863702732151e-06, |
| "loss": 0.4584, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.551889400921659, |
| "grad_norm": 0.4342942237854004, |
| "learning_rate": 4.966209309811364e-06, |
| "loss": 0.4731, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.552442396313364, |
| "grad_norm": 0.37662389874458313, |
| "learning_rate": 4.95655504287637e-06, |
| "loss": 0.4696, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.5529953917050692, |
| "grad_norm": 0.40196794271469116, |
| "learning_rate": 4.946900937922302e-06, |
| "loss": 0.4496, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5535483870967742, |
| "grad_norm": 0.3595891296863556, |
| "learning_rate": 4.93724703094369e-06, |
| "loss": 0.4621, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.5541013824884793, |
| "grad_norm": 0.4029429256916046, |
| "learning_rate": 4.927593357934329e-06, |
| "loss": 0.4885, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.5546543778801843, |
| "grad_norm": 0.37034833431243896, |
| "learning_rate": 4.917939954887139e-06, |
| "loss": 0.462, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.5552073732718894, |
| "grad_norm": 0.4130437672138214, |
| "learning_rate": 4.908286857794034e-06, |
| "loss": 0.4646, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.5557603686635945, |
| "grad_norm": 0.39467036724090576, |
| "learning_rate": 4.8986341026457875e-06, |
| "loss": 0.4557, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.5563133640552995, |
| "grad_norm": 0.37382972240448, |
| "learning_rate": 4.888981725431897e-06, |
| "loss": 0.4589, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.5568663594470046, |
| "grad_norm": 0.3812313377857208, |
| "learning_rate": 4.879329762140453e-06, |
| "loss": 0.476, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.5574193548387096, |
| "grad_norm": 0.3706028461456299, |
| "learning_rate": 4.8696782487580005e-06, |
| "loss": 0.4534, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.5579723502304147, |
| "grad_norm": 0.40674856305122375, |
| "learning_rate": 4.8600272212694094e-06, |
| "loss": 0.4555, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.5585253456221199, |
| "grad_norm": 0.3949076235294342, |
| "learning_rate": 4.850376715657736e-06, |
| "loss": 0.4569, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5590783410138249, |
| "grad_norm": 0.3829311430454254, |
| "learning_rate": 4.840726767904089e-06, |
| "loss": 0.4589, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.55963133640553, |
| "grad_norm": 0.36710280179977417, |
| "learning_rate": 4.831077413987505e-06, |
| "loss": 0.4763, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.560184331797235, |
| "grad_norm": 0.37851566076278687, |
| "learning_rate": 4.8214286898848e-06, |
| "loss": 0.4723, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.5607373271889401, |
| "grad_norm": 0.3961973488330841, |
| "learning_rate": 4.81178063157044e-06, |
| "loss": 0.456, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.5612903225806452, |
| "grad_norm": 0.38343876600265503, |
| "learning_rate": 4.802133275016415e-06, |
| "loss": 0.4626, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.5618433179723502, |
| "grad_norm": 0.3635197877883911, |
| "learning_rate": 4.792486656192094e-06, |
| "loss": 0.4692, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.5623963133640553, |
| "grad_norm": 0.3797365128993988, |
| "learning_rate": 4.782840811064095e-06, |
| "loss": 0.4587, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.5629493087557603, |
| "grad_norm": 0.35782304406166077, |
| "learning_rate": 4.773195775596155e-06, |
| "loss": 0.4503, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.5635023041474654, |
| "grad_norm": 0.370126336812973, |
| "learning_rate": 4.763551585748989e-06, |
| "loss": 0.4588, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.5640552995391706, |
| "grad_norm": 0.3596176207065582, |
| "learning_rate": 4.753908277480162e-06, |
| "loss": 0.4919, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.5646082949308756, |
| "grad_norm": 0.343191921710968, |
| "learning_rate": 4.74426588674395e-06, |
| "loss": 0.4667, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.5651612903225807, |
| "grad_norm": 0.3751341998577118, |
| "learning_rate": 4.734624449491208e-06, |
| "loss": 0.4529, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.5657142857142857, |
| "grad_norm": 0.3533148169517517, |
| "learning_rate": 4.724984001669237e-06, |
| "loss": 0.4491, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.5662672811059908, |
| "grad_norm": 0.38230666518211365, |
| "learning_rate": 4.715344579221649e-06, |
| "loss": 0.4961, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.5668202764976958, |
| "grad_norm": 0.40656420588493347, |
| "learning_rate": 4.705706218088232e-06, |
| "loss": 0.4773, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.5673732718894009, |
| "grad_norm": 0.3879065215587616, |
| "learning_rate": 4.696068954204817e-06, |
| "loss": 0.5097, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.567926267281106, |
| "grad_norm": 0.40097224712371826, |
| "learning_rate": 4.686432823503147e-06, |
| "loss": 0.4661, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.568479262672811, |
| "grad_norm": 0.37987810373306274, |
| "learning_rate": 4.676797861910735e-06, |
| "loss": 0.4429, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.5690322580645162, |
| "grad_norm": 0.38563069701194763, |
| "learning_rate": 4.667164105350739e-06, |
| "loss": 0.4719, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.5695852534562212, |
| "grad_norm": 0.3681628406047821, |
| "learning_rate": 4.657531589741822e-06, |
| "loss": 0.4757, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5701382488479263, |
| "grad_norm": 0.4479837119579315, |
| "learning_rate": 4.647900350998022e-06, |
| "loss": 0.471, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.5706912442396314, |
| "grad_norm": 0.39742401242256165, |
| "learning_rate": 4.638270425028614e-06, |
| "loss": 0.4762, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.5712442396313364, |
| "grad_norm": 0.38390353322029114, |
| "learning_rate": 4.628641847737982e-06, |
| "loss": 0.4475, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.5717972350230415, |
| "grad_norm": 0.4764840602874756, |
| "learning_rate": 4.6190146550254775e-06, |
| "loss": 0.4827, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.5723502304147465, |
| "grad_norm": 0.41221946477890015, |
| "learning_rate": 4.609388882785291e-06, |
| "loss": 0.495, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.5729032258064516, |
| "grad_norm": 0.35299742221832275, |
| "learning_rate": 4.599764566906319e-06, |
| "loss": 0.4614, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.5734562211981566, |
| "grad_norm": 0.45965003967285156, |
| "learning_rate": 4.590141743272026e-06, |
| "loss": 0.462, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.5740092165898617, |
| "grad_norm": 0.37869590520858765, |
| "learning_rate": 4.5805204477603135e-06, |
| "loss": 0.4442, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5745622119815669, |
| "grad_norm": 0.37475693225860596, |
| "learning_rate": 4.570900716243385e-06, |
| "loss": 0.4468, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5751152073732719, |
| "grad_norm": 0.38735684752464294, |
| "learning_rate": 4.561282584587612e-06, |
| "loss": 0.4444, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.575668202764977, |
| "grad_norm": 0.3577326834201813, |
| "learning_rate": 4.551666088653404e-06, |
| "loss": 0.4864, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.576221198156682, |
| "grad_norm": 0.3724598288536072, |
| "learning_rate": 4.542051264295069e-06, |
| "loss": 0.4556, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5767741935483871, |
| "grad_norm": 0.4160480797290802, |
| "learning_rate": 4.532438147360684e-06, |
| "loss": 0.4599, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.5773271889400922, |
| "grad_norm": 0.38303178548812866, |
| "learning_rate": 4.522826773691958e-06, |
| "loss": 0.4574, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5778801843317972, |
| "grad_norm": 0.4100678861141205, |
| "learning_rate": 4.513217179124106e-06, |
| "loss": 0.4637, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.5784331797235023, |
| "grad_norm": 0.4295979142189026, |
| "learning_rate": 4.5036093994857025e-06, |
| "loss": 0.4747, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.5789861751152073, |
| "grad_norm": 0.36032918095588684, |
| "learning_rate": 4.49400347059856e-06, |
| "loss": 0.4682, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.5795391705069124, |
| "grad_norm": 0.4092552959918976, |
| "learning_rate": 4.484399428277589e-06, |
| "loss": 0.463, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.5800921658986176, |
| "grad_norm": 0.3771011233329773, |
| "learning_rate": 4.474797308330665e-06, |
| "loss": 0.4865, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5806451612903226, |
| "grad_norm": 0.37506765127182007, |
| "learning_rate": 4.465197146558498e-06, |
| "loss": 0.4558, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5811981566820277, |
| "grad_norm": 0.34741005301475525, |
| "learning_rate": 4.455598978754496e-06, |
| "loss": 0.445, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5817511520737327, |
| "grad_norm": 0.41416463255882263, |
| "learning_rate": 4.4460028407046344e-06, |
| "loss": 0.4755, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.5823041474654378, |
| "grad_norm": 0.414931982755661, |
| "learning_rate": 4.436408768187319e-06, |
| "loss": 0.4879, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.5828571428571429, |
| "grad_norm": 0.37698793411254883, |
| "learning_rate": 4.426816796973254e-06, |
| "loss": 0.4896, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.5834101382488479, |
| "grad_norm": 0.4369506537914276, |
| "learning_rate": 4.417226962825311e-06, |
| "loss": 0.4723, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.583963133640553, |
| "grad_norm": 0.3790305256843567, |
| "learning_rate": 4.407639301498392e-06, |
| "loss": 0.4466, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.584516129032258, |
| "grad_norm": 0.3516313135623932, |
| "learning_rate": 4.398053848739299e-06, |
| "loss": 0.4483, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.5850691244239631, |
| "grad_norm": 0.42550036311149597, |
| "learning_rate": 4.388470640286599e-06, |
| "loss": 0.4633, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.5856221198156683, |
| "grad_norm": 0.3732847571372986, |
| "learning_rate": 4.378889711870492e-06, |
| "loss": 0.4909, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5861751152073733, |
| "grad_norm": 0.3723104000091553, |
| "learning_rate": 4.369311099212676e-06, |
| "loss": 0.4612, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5867281105990784, |
| "grad_norm": 0.4225739538669586, |
| "learning_rate": 4.359734838026214e-06, |
| "loss": 0.4576, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.5872811059907834, |
| "grad_norm": 0.4037158489227295, |
| "learning_rate": 4.3501609640154056e-06, |
| "loss": 0.4803, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.5878341013824885, |
| "grad_norm": 0.3930150866508484, |
| "learning_rate": 4.340589512875646e-06, |
| "loss": 0.4484, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5883870967741935, |
| "grad_norm": 0.4097388684749603, |
| "learning_rate": 4.331020520293298e-06, |
| "loss": 0.4699, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5889400921658986, |
| "grad_norm": 0.3482785224914551, |
| "learning_rate": 4.321454021945559e-06, |
| "loss": 0.4632, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.5894930875576037, |
| "grad_norm": 0.45652082562446594, |
| "learning_rate": 4.311890053500326e-06, |
| "loss": 0.4699, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5900460829493087, |
| "grad_norm": 0.3783150613307953, |
| "learning_rate": 4.302328650616062e-06, |
| "loss": 0.4662, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5905990783410138, |
| "grad_norm": 0.36994531750679016, |
| "learning_rate": 4.292769848941669e-06, |
| "loss": 0.4581, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5911520737327189, |
| "grad_norm": 0.3842531144618988, |
| "learning_rate": 4.283213684116346e-06, |
| "loss": 0.4631, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.591705069124424, |
| "grad_norm": 0.4432550370693207, |
| "learning_rate": 4.273660191769463e-06, |
| "loss": 0.4759, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5922580645161291, |
| "grad_norm": 0.37499210238456726, |
| "learning_rate": 4.264109407520425e-06, |
| "loss": 0.4703, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.5928110599078341, |
| "grad_norm": 0.383779376745224, |
| "learning_rate": 4.2545613669785405e-06, |
| "loss": 0.4793, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5933640552995392, |
| "grad_norm": 0.43077680468559265, |
| "learning_rate": 4.245016105742889e-06, |
| "loss": 0.447, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5939170506912442, |
| "grad_norm": 0.3452954888343811, |
| "learning_rate": 4.2354736594021854e-06, |
| "loss": 0.457, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.5944700460829493, |
| "grad_norm": 0.3940063714981079, |
| "learning_rate": 4.2259340635346515e-06, |
| "loss": 0.4579, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5950230414746543, |
| "grad_norm": 0.37174075841903687, |
| "learning_rate": 4.21639735370788e-06, |
| "loss": 0.4718, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.5955760368663594, |
| "grad_norm": 0.38617780804634094, |
| "learning_rate": 4.206863565478705e-06, |
| "loss": 0.4698, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.5961290322580645, |
| "grad_norm": 0.36345598101615906, |
| "learning_rate": 4.197332734393067e-06, |
| "loss": 0.4828, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.5966820276497696, |
| "grad_norm": 0.37622806429862976, |
| "learning_rate": 4.187804895985881e-06, |
| "loss": 0.4584, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5972350230414747, |
| "grad_norm": 0.41539904475212097, |
| "learning_rate": 4.1782800857809025e-06, |
| "loss": 0.4606, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5977880184331797, |
| "grad_norm": 0.37827736139297485, |
| "learning_rate": 4.168758339290599e-06, |
| "loss": 0.4576, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.5983410138248848, |
| "grad_norm": 0.36408886313438416, |
| "learning_rate": 4.159239692016012e-06, |
| "loss": 0.4792, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.5988940092165899, |
| "grad_norm": 0.347615510225296, |
| "learning_rate": 4.149724179446631e-06, |
| "loss": 0.4627, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.5994470046082949, |
| "grad_norm": 0.36233338713645935, |
| "learning_rate": 4.140211837060258e-06, |
| "loss": 0.4595, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.385028213262558, |
| "learning_rate": 4.130702700322873e-06, |
| "loss": 0.4469, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.600552995391705, |
| "grad_norm": 0.3965871036052704, |
| "learning_rate": 4.121196804688506e-06, |
| "loss": 0.4761, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.6011059907834101, |
| "grad_norm": 0.36717289686203003, |
| "learning_rate": 4.111694185599101e-06, |
| "loss": 0.4401, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.6016589861751153, |
| "grad_norm": 0.3657437264919281, |
| "learning_rate": 4.102194878484386e-06, |
| "loss": 0.4613, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.6022119815668203, |
| "grad_norm": 0.4054916799068451, |
| "learning_rate": 4.092698918761742e-06, |
| "loss": 0.489, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.6027649769585254, |
| "grad_norm": 0.40190011262893677, |
| "learning_rate": 4.083206341836069e-06, |
| "loss": 0.4703, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6033179723502304, |
| "grad_norm": 0.4301840662956238, |
| "learning_rate": 4.073717183099654e-06, |
| "loss": 0.4676, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.6038709677419355, |
| "grad_norm": 0.3311327397823334, |
| "learning_rate": 4.06423147793204e-06, |
| "loss": 0.4695, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.6044239631336406, |
| "grad_norm": 0.3920708894729614, |
| "learning_rate": 4.054749261699893e-06, |
| "loss": 0.4834, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.6049769585253456, |
| "grad_norm": 0.33984097838401794, |
| "learning_rate": 4.045270569756872e-06, |
| "loss": 0.4633, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.6055299539170507, |
| "grad_norm": 0.36689960956573486, |
| "learning_rate": 4.0357954374434936e-06, |
| "loss": 0.4778, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.6060829493087557, |
| "grad_norm": 0.3129914402961731, |
| "learning_rate": 4.026323900087006e-06, |
| "loss": 0.4389, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.6066359447004608, |
| "grad_norm": 0.3611215651035309, |
| "learning_rate": 4.016855993001251e-06, |
| "loss": 0.4687, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.607188940092166, |
| "grad_norm": 0.36361318826675415, |
| "learning_rate": 4.007391751486536e-06, |
| "loss": 0.4592, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.607741935483871, |
| "grad_norm": 0.3965631425380707, |
| "learning_rate": 3.997931210829503e-06, |
| "loss": 0.4403, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.6082949308755761, |
| "grad_norm": 0.371737539768219, |
| "learning_rate": 3.988474406302995e-06, |
| "loss": 0.4433, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6088479262672811, |
| "grad_norm": 0.4065137207508087, |
| "learning_rate": 3.979021373165924e-06, |
| "loss": 0.4548, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.6094009216589862, |
| "grad_norm": 0.36007609963417053, |
| "learning_rate": 3.9695721466631426e-06, |
| "loss": 0.4664, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.6099539170506912, |
| "grad_norm": 0.39393535256385803, |
| "learning_rate": 3.9601267620253075e-06, |
| "loss": 0.4899, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.6105069124423963, |
| "grad_norm": 0.40391650795936584, |
| "learning_rate": 3.950685254468755e-06, |
| "loss": 0.4657, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.6110599078341014, |
| "grad_norm": 0.3698117434978485, |
| "learning_rate": 3.941247659195364e-06, |
| "loss": 0.4524, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.6116129032258064, |
| "grad_norm": 0.35965868830680847, |
| "learning_rate": 3.931814011392427e-06, |
| "loss": 0.4629, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.6121658986175115, |
| "grad_norm": 0.4399069547653198, |
| "learning_rate": 3.922384346232519e-06, |
| "loss": 0.491, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.6127188940092166, |
| "grad_norm": 0.3660935163497925, |
| "learning_rate": 3.912958698873366e-06, |
| "loss": 0.474, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.6132718894009217, |
| "grad_norm": 0.3902994990348816, |
| "learning_rate": 3.903537104457713e-06, |
| "loss": 0.4979, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.6138248847926268, |
| "grad_norm": 0.4067245125770569, |
| "learning_rate": 3.894119598113196e-06, |
| "loss": 0.4724, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6143778801843318, |
| "grad_norm": 0.3835393190383911, |
| "learning_rate": 3.8847062149522075e-06, |
| "loss": 0.4558, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.6149308755760369, |
| "grad_norm": 0.3831106126308441, |
| "learning_rate": 3.875296990071768e-06, |
| "loss": 0.4619, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.6154838709677419, |
| "grad_norm": 0.3794800043106079, |
| "learning_rate": 3.8658919585533916e-06, |
| "loss": 0.4535, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.616036866359447, |
| "grad_norm": 0.3396313786506653, |
| "learning_rate": 3.8564911554629616e-06, |
| "loss": 0.4892, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.616589861751152, |
| "grad_norm": 0.37773552536964417, |
| "learning_rate": 3.847094615850593e-06, |
| "loss": 0.46, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.6171428571428571, |
| "grad_norm": 0.38637781143188477, |
| "learning_rate": 3.837702374750507e-06, |
| "loss": 0.4701, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.6176958525345622, |
| "grad_norm": 0.3425704538822174, |
| "learning_rate": 3.828314467180895e-06, |
| "loss": 0.4389, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.6182488479262673, |
| "grad_norm": 0.35482358932495117, |
| "learning_rate": 3.818930928143796e-06, |
| "loss": 0.477, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.6188018433179724, |
| "grad_norm": 0.3860636353492737, |
| "learning_rate": 3.8095517926249557e-06, |
| "loss": 0.4748, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.6193548387096774, |
| "grad_norm": 0.412280797958374, |
| "learning_rate": 3.800177095593706e-06, |
| "loss": 0.4624, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6199078341013825, |
| "grad_norm": 0.3511303961277008, |
| "learning_rate": 3.790806872002828e-06, |
| "loss": 0.4867, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.6204608294930876, |
| "grad_norm": 0.3653261065483093, |
| "learning_rate": 3.7814411567884256e-06, |
| "loss": 0.4737, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.6210138248847926, |
| "grad_norm": 0.3349493145942688, |
| "learning_rate": 3.7720799848697924e-06, |
| "loss": 0.4705, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.6215668202764977, |
| "grad_norm": 0.3643210828304291, |
| "learning_rate": 3.762723391149282e-06, |
| "loss": 0.4934, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.6221198156682027, |
| "grad_norm": 0.3928675055503845, |
| "learning_rate": 3.75337141051218e-06, |
| "loss": 0.4726, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.6226728110599078, |
| "grad_norm": 0.36943259835243225, |
| "learning_rate": 3.7440240778265725e-06, |
| "loss": 0.4557, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.6232258064516129, |
| "grad_norm": 0.37895265221595764, |
| "learning_rate": 3.734681427943214e-06, |
| "loss": 0.4989, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.623778801843318, |
| "grad_norm": 0.34087231755256653, |
| "learning_rate": 3.725343495695404e-06, |
| "loss": 0.4418, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.6243317972350231, |
| "grad_norm": 0.3866007626056671, |
| "learning_rate": 3.716010315898847e-06, |
| "loss": 0.4515, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.6248847926267281, |
| "grad_norm": 0.4217049777507782, |
| "learning_rate": 3.706681923351533e-06, |
| "loss": 0.4962, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6254377880184332, |
| "grad_norm": 0.4117668867111206, |
| "learning_rate": 3.6973583528336e-06, |
| "loss": 0.4704, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.6259907834101383, |
| "grad_norm": 0.3604739308357239, |
| "learning_rate": 3.68803963910721e-06, |
| "loss": 0.4839, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.6265437788018433, |
| "grad_norm": 0.4002184569835663, |
| "learning_rate": 3.678725816916415e-06, |
| "loss": 0.4853, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.6270967741935484, |
| "grad_norm": 0.3674345910549164, |
| "learning_rate": 3.6694169209870305e-06, |
| "loss": 0.4471, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.6276497695852534, |
| "grad_norm": 0.37487921118736267, |
| "learning_rate": 3.660112986026504e-06, |
| "loss": 0.4646, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.6282027649769585, |
| "grad_norm": 0.36266854405403137, |
| "learning_rate": 3.6508140467237873e-06, |
| "loss": 0.4802, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.6287557603686635, |
| "grad_norm": 0.3480757176876068, |
| "learning_rate": 3.6415201377492048e-06, |
| "loss": 0.458, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.6293087557603687, |
| "grad_norm": 0.3710945248603821, |
| "learning_rate": 3.632231293754328e-06, |
| "loss": 0.4648, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.6298617511520738, |
| "grad_norm": 0.42196762561798096, |
| "learning_rate": 3.622947549371841e-06, |
| "loss": 0.4802, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.6304147465437788, |
| "grad_norm": 0.3192515969276428, |
| "learning_rate": 3.6136689392154186e-06, |
| "loss": 0.4559, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6309677419354839, |
| "grad_norm": 0.37404048442840576, |
| "learning_rate": 3.6043954978795868e-06, |
| "loss": 0.4647, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.6315207373271889, |
| "grad_norm": 0.37971508502960205, |
| "learning_rate": 3.5951272599396086e-06, |
| "loss": 0.4347, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.632073732718894, |
| "grad_norm": 0.410057932138443, |
| "learning_rate": 3.5858642599513394e-06, |
| "loss": 0.4759, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.6326267281105991, |
| "grad_norm": 0.367079496383667, |
| "learning_rate": 3.576606532451108e-06, |
| "loss": 0.4864, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.6331797235023041, |
| "grad_norm": 0.3866696357727051, |
| "learning_rate": 3.567354111955585e-06, |
| "loss": 0.4746, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.6337327188940092, |
| "grad_norm": 0.3390635848045349, |
| "learning_rate": 3.5581070329616543e-06, |
| "loss": 0.4593, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.6342857142857142, |
| "grad_norm": 0.41744962334632874, |
| "learning_rate": 3.5488653299462844e-06, |
| "loss": 0.4514, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.6348387096774194, |
| "grad_norm": 0.3437979817390442, |
| "learning_rate": 3.5396290373664e-06, |
| "loss": 0.4689, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.6353917050691245, |
| "grad_norm": 0.32637640833854675, |
| "learning_rate": 3.5303981896587534e-06, |
| "loss": 0.4657, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.6359447004608295, |
| "grad_norm": 0.3600415885448456, |
| "learning_rate": 3.521172821239796e-06, |
| "loss": 0.4637, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6364976958525346, |
| "grad_norm": 0.4324146807193756, |
| "learning_rate": 3.5119529665055506e-06, |
| "loss": 0.4715, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.6370506912442396, |
| "grad_norm": 0.3583220839500427, |
| "learning_rate": 3.502738659831483e-06, |
| "loss": 0.4666, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.6376036866359447, |
| "grad_norm": 0.35306817293167114, |
| "learning_rate": 3.49352993557237e-06, |
| "loss": 0.4554, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.6381566820276497, |
| "grad_norm": 0.39276352524757385, |
| "learning_rate": 3.48432682806218e-06, |
| "loss": 0.4492, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.6387096774193548, |
| "grad_norm": 0.3977315425872803, |
| "learning_rate": 3.4751293716139366e-06, |
| "loss": 0.4523, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.6392626728110599, |
| "grad_norm": 0.36172613501548767, |
| "learning_rate": 3.4659376005195956e-06, |
| "loss": 0.5053, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.639815668202765, |
| "grad_norm": 0.34491607546806335, |
| "learning_rate": 3.4567515490499135e-06, |
| "loss": 0.4633, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.6403686635944701, |
| "grad_norm": 0.3682193160057068, |
| "learning_rate": 3.4475712514543246e-06, |
| "loss": 0.4376, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.6409216589861751, |
| "grad_norm": 0.41406625509262085, |
| "learning_rate": 3.438396741960806e-06, |
| "loss": 0.4669, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.6414746543778802, |
| "grad_norm": 0.38740435242652893, |
| "learning_rate": 3.4292280547757586e-06, |
| "loss": 0.4516, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6420276497695853, |
| "grad_norm": 0.35590702295303345, |
| "learning_rate": 3.420065224083873e-06, |
| "loss": 0.4406, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.6425806451612903, |
| "grad_norm": 0.3844543397426605, |
| "learning_rate": 3.4109082840480045e-06, |
| "loss": 0.4675, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.6431336405529954, |
| "grad_norm": 0.40717223286628723, |
| "learning_rate": 3.4017572688090467e-06, |
| "loss": 0.4862, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.6436866359447004, |
| "grad_norm": 0.37645992636680603, |
| "learning_rate": 3.392612212485802e-06, |
| "loss": 0.4813, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.6442396313364055, |
| "grad_norm": 0.33362942934036255, |
| "learning_rate": 3.3834731491748558e-06, |
| "loss": 0.4576, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.6447926267281106, |
| "grad_norm": 0.3458590805530548, |
| "learning_rate": 3.3743401129504496e-06, |
| "loss": 0.4664, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.6453456221198157, |
| "grad_norm": 0.39395076036453247, |
| "learning_rate": 3.3652131378643515e-06, |
| "loss": 0.4801, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.6458986175115208, |
| "grad_norm": 0.36739885807037354, |
| "learning_rate": 3.356092257945733e-06, |
| "loss": 0.4662, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.6464516129032258, |
| "grad_norm": 0.35936224460601807, |
| "learning_rate": 3.3469775072010417e-06, |
| "loss": 0.4678, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.6470046082949309, |
| "grad_norm": 0.35847219824790955, |
| "learning_rate": 3.337868919613869e-06, |
| "loss": 0.4595, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.647557603686636, |
| "grad_norm": 0.33475178480148315, |
| "learning_rate": 3.3287665291448323e-06, |
| "loss": 0.4631, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.648110599078341, |
| "grad_norm": 0.3567461669445038, |
| "learning_rate": 3.319670369731437e-06, |
| "loss": 0.475, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.6486635944700461, |
| "grad_norm": 0.4065379798412323, |
| "learning_rate": 3.310580475287963e-06, |
| "loss": 0.4596, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.6492165898617511, |
| "grad_norm": 0.34394586086273193, |
| "learning_rate": 3.3014968797053263e-06, |
| "loss": 0.4717, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.6497695852534562, |
| "grad_norm": 0.3613266944885254, |
| "learning_rate": 3.2924196168509657e-06, |
| "loss": 0.4933, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.6503225806451612, |
| "grad_norm": 0.38999873399734497, |
| "learning_rate": 3.283348720568702e-06, |
| "loss": 0.4756, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.6508755760368664, |
| "grad_norm": 0.3532698154449463, |
| "learning_rate": 3.274284224678621e-06, |
| "loss": 0.4575, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.6514285714285715, |
| "grad_norm": 0.33012717962265015, |
| "learning_rate": 3.2652261629769457e-06, |
| "loss": 0.4473, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.6519815668202765, |
| "grad_norm": 0.3971370458602905, |
| "learning_rate": 3.256174569235909e-06, |
| "loss": 0.5044, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.6525345622119816, |
| "grad_norm": 0.3590445816516876, |
| "learning_rate": 3.2471294772036287e-06, |
| "loss": 0.4675, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6530875576036866, |
| "grad_norm": 0.3668496012687683, |
| "learning_rate": 3.238090920603981e-06, |
| "loss": 0.4419, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.6536405529953917, |
| "grad_norm": 0.3250981867313385, |
| "learning_rate": 3.2290589331364787e-06, |
| "loss": 0.4597, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.6541935483870968, |
| "grad_norm": 0.38982975482940674, |
| "learning_rate": 3.2200335484761352e-06, |
| "loss": 0.4638, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.6547465437788018, |
| "grad_norm": 0.38048577308654785, |
| "learning_rate": 3.2110148002733534e-06, |
| "loss": 0.473, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.6552995391705069, |
| "grad_norm": 0.35395413637161255, |
| "learning_rate": 3.2020027221537864e-06, |
| "loss": 0.4832, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.6558525345622119, |
| "grad_norm": 0.3707823157310486, |
| "learning_rate": 3.192997347718224e-06, |
| "loss": 0.4958, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.6564055299539171, |
| "grad_norm": 0.36011847853660583, |
| "learning_rate": 3.1839987105424586e-06, |
| "loss": 0.472, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.6569585253456222, |
| "grad_norm": 0.3693712055683136, |
| "learning_rate": 3.1750068441771637e-06, |
| "loss": 0.4649, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.6575115207373272, |
| "grad_norm": 0.34001612663269043, |
| "learning_rate": 3.1660217821477686e-06, |
| "loss": 0.4764, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.6580645161290323, |
| "grad_norm": 0.332956999540329, |
| "learning_rate": 3.1570435579543333e-06, |
| "loss": 0.4652, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.6586175115207373, |
| "grad_norm": 0.35701560974121094, |
| "learning_rate": 3.148072205071423e-06, |
| "loss": 0.4695, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.6591705069124424, |
| "grad_norm": 0.3385053277015686, |
| "learning_rate": 3.1391077569479856e-06, |
| "loss": 0.4524, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.6597235023041474, |
| "grad_norm": 0.35991233587265015, |
| "learning_rate": 3.130150247007222e-06, |
| "loss": 0.4322, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.6602764976958525, |
| "grad_norm": 0.36623886227607727, |
| "learning_rate": 3.1211997086464683e-06, |
| "loss": 0.4754, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.6608294930875576, |
| "grad_norm": 0.33661210536956787, |
| "learning_rate": 3.112256175237064e-06, |
| "loss": 0.464, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.6613824884792626, |
| "grad_norm": 0.3260611295700073, |
| "learning_rate": 3.103319680124235e-06, |
| "loss": 0.4608, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.6619354838709678, |
| "grad_norm": 0.38113391399383545, |
| "learning_rate": 3.0943902566269613e-06, |
| "loss": 0.4963, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.6624884792626728, |
| "grad_norm": 0.3196834623813629, |
| "learning_rate": 3.085467938037861e-06, |
| "loss": 0.4631, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.6630414746543779, |
| "grad_norm": 0.3713325262069702, |
| "learning_rate": 3.076552757623059e-06, |
| "loss": 0.4559, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.663594470046083, |
| "grad_norm": 0.35076281428337097, |
| "learning_rate": 3.0676447486220705e-06, |
| "loss": 0.4601, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.664147465437788, |
| "grad_norm": 0.34032174944877625, |
| "learning_rate": 3.058743944247665e-06, |
| "loss": 0.4702, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.6647004608294931, |
| "grad_norm": 0.37965843081474304, |
| "learning_rate": 3.0498503776857576e-06, |
| "loss": 0.4806, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.6652534562211981, |
| "grad_norm": 0.3258879780769348, |
| "learning_rate": 3.0409640820952735e-06, |
| "loss": 0.4331, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.6658064516129032, |
| "grad_norm": 0.3517114222049713, |
| "learning_rate": 3.0320850906080325e-06, |
| "loss": 0.4521, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.6663594470046083, |
| "grad_norm": 0.3746355473995209, |
| "learning_rate": 3.0232134363286163e-06, |
| "loss": 0.4702, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.6669124423963133, |
| "grad_norm": 0.3362243175506592, |
| "learning_rate": 3.0143491523342562e-06, |
| "loss": 0.4382, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.6674654377880185, |
| "grad_norm": 0.3588075041770935, |
| "learning_rate": 3.005492271674697e-06, |
| "loss": 0.452, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.6680184331797235, |
| "grad_norm": 0.4125935137271881, |
| "learning_rate": 2.9966428273720904e-06, |
| "loss": 0.4715, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.6685714285714286, |
| "grad_norm": 0.34157875180244446, |
| "learning_rate": 2.987800852420855e-06, |
| "loss": 0.4543, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.6691244239631337, |
| "grad_norm": 0.34906935691833496, |
| "learning_rate": 2.9789663797875614e-06, |
| "loss": 0.4535, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6696774193548387, |
| "grad_norm": 0.3961377739906311, |
| "learning_rate": 2.970139442410811e-06, |
| "loss": 0.4992, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.6702304147465438, |
| "grad_norm": 0.3486752510070801, |
| "learning_rate": 2.961320073201107e-06, |
| "loss": 0.4603, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.6707834101382488, |
| "grad_norm": 0.3723919093608856, |
| "learning_rate": 2.952508305040739e-06, |
| "loss": 0.4735, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.6713364055299539, |
| "grad_norm": 0.33436325192451477, |
| "learning_rate": 2.9437041707836532e-06, |
| "loss": 0.4558, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.6718894009216589, |
| "grad_norm": 0.3649885058403015, |
| "learning_rate": 2.9349077032553387e-06, |
| "loss": 0.4734, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.6724423963133641, |
| "grad_norm": 0.4229625463485718, |
| "learning_rate": 2.9261189352526932e-06, |
| "loss": 0.4734, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.6729953917050692, |
| "grad_norm": 0.31533482670783997, |
| "learning_rate": 2.9173378995439107e-06, |
| "loss": 0.4596, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.6735483870967742, |
| "grad_norm": 0.39130347967147827, |
| "learning_rate": 2.9085646288683587e-06, |
| "loss": 0.4913, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.6741013824884793, |
| "grad_norm": 0.3557525873184204, |
| "learning_rate": 2.8997991559364493e-06, |
| "loss": 0.4708, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.6746543778801843, |
| "grad_norm": 0.36776086688041687, |
| "learning_rate": 2.8910415134295216e-06, |
| "loss": 0.4685, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6752073732718894, |
| "grad_norm": 0.3936042785644531, |
| "learning_rate": 2.8822917339997237e-06, |
| "loss": 0.4667, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.6757603686635945, |
| "grad_norm": 0.3256804645061493, |
| "learning_rate": 2.87354985026988e-06, |
| "loss": 0.428, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.6763133640552995, |
| "grad_norm": 0.3750945031642914, |
| "learning_rate": 2.8648158948333817e-06, |
| "loss": 0.458, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.6768663594470046, |
| "grad_norm": 0.3525260090827942, |
| "learning_rate": 2.856089900254059e-06, |
| "loss": 0.4469, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.6774193548387096, |
| "grad_norm": 0.32836800813674927, |
| "learning_rate": 2.847371899066059e-06, |
| "loss": 0.4581, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.6779723502304148, |
| "grad_norm": 0.34020939469337463, |
| "learning_rate": 2.83866192377373e-06, |
| "loss": 0.444, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.6785253456221199, |
| "grad_norm": 0.31056728959083557, |
| "learning_rate": 2.829960006851492e-06, |
| "loss": 0.4748, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.6790783410138249, |
| "grad_norm": 0.3614257574081421, |
| "learning_rate": 2.8212661807437226e-06, |
| "loss": 0.487, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.67963133640553, |
| "grad_norm": 0.33884182572364807, |
| "learning_rate": 2.8125804778646315e-06, |
| "loss": 0.4871, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.680184331797235, |
| "grad_norm": 0.3698228597640991, |
| "learning_rate": 2.803902930598144e-06, |
| "loss": 0.4783, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.6807373271889401, |
| "grad_norm": 0.3055351674556732, |
| "learning_rate": 2.7952335712977764e-06, |
| "loss": 0.4488, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.6812903225806451, |
| "grad_norm": 0.3399738073348999, |
| "learning_rate": 2.7865724322865174e-06, |
| "loss": 0.4489, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.6818433179723502, |
| "grad_norm": 0.3493901193141937, |
| "learning_rate": 2.7779195458567067e-06, |
| "loss": 0.4508, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.6823963133640553, |
| "grad_norm": 0.38333308696746826, |
| "learning_rate": 2.7692749442699147e-06, |
| "loss": 0.4733, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.6829493087557603, |
| "grad_norm": 0.33638396859169006, |
| "learning_rate": 2.7606386597568223e-06, |
| "loss": 0.4521, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.6835023041474655, |
| "grad_norm": 0.33842137455940247, |
| "learning_rate": 2.7520107245171035e-06, |
| "loss": 0.4359, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.6840552995391705, |
| "grad_norm": 0.34261924028396606, |
| "learning_rate": 2.7433911707192976e-06, |
| "loss": 0.4713, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.6846082949308756, |
| "grad_norm": 0.33021679520606995, |
| "learning_rate": 2.7347800305007e-06, |
| "loss": 0.4563, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.6851612903225807, |
| "grad_norm": 0.33682435750961304, |
| "learning_rate": 2.7261773359672306e-06, |
| "loss": 0.4429, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.3547312319278717, |
| "learning_rate": 2.7175831191933275e-06, |
| "loss": 0.4846, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.6862672811059908, |
| "grad_norm": 0.3378848433494568, |
| "learning_rate": 2.7089974122218165e-06, |
| "loss": 0.4681, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.6868202764976958, |
| "grad_norm": 0.3516218066215515, |
| "learning_rate": 2.700420247063793e-06, |
| "loss": 0.4796, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.6873732718894009, |
| "grad_norm": 0.3513849973678589, |
| "learning_rate": 2.6918516556985088e-06, |
| "loss": 0.4732, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.687926267281106, |
| "grad_norm": 0.37055304646492004, |
| "learning_rate": 2.6832916700732466e-06, |
| "loss": 0.492, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.688479262672811, |
| "grad_norm": 0.3224850594997406, |
| "learning_rate": 2.674740322103204e-06, |
| "loss": 0.4618, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.6890322580645162, |
| "grad_norm": 0.3502301573753357, |
| "learning_rate": 2.666197643671375e-06, |
| "loss": 0.4562, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.6895852534562212, |
| "grad_norm": 0.37586522102355957, |
| "learning_rate": 2.657663666628428e-06, |
| "loss": 0.448, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.6901382488479263, |
| "grad_norm": 0.38795405626296997, |
| "learning_rate": 2.6491384227925903e-06, |
| "loss": 0.472, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.6906912442396314, |
| "grad_norm": 0.3674571216106415, |
| "learning_rate": 2.640621943949527e-06, |
| "loss": 0.4707, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.6912442396313364, |
| "grad_norm": 0.36492788791656494, |
| "learning_rate": 2.6321142618522288e-06, |
| "loss": 0.4644, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6917972350230415, |
| "grad_norm": 0.345634400844574, |
| "learning_rate": 2.6236154082208776e-06, |
| "loss": 0.4556, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.6923502304147465, |
| "grad_norm": 0.3678940534591675, |
| "learning_rate": 2.6151254147427486e-06, |
| "loss": 0.4983, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.6929032258064516, |
| "grad_norm": 0.34437957406044006, |
| "learning_rate": 2.6066443130720786e-06, |
| "loss": 0.4613, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.6934562211981566, |
| "grad_norm": 0.3727521300315857, |
| "learning_rate": 2.598172134829955e-06, |
| "loss": 0.4771, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.6940092165898617, |
| "grad_norm": 0.3770442008972168, |
| "learning_rate": 2.5897089116041918e-06, |
| "loss": 0.4627, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.6945622119815669, |
| "grad_norm": 0.3384653329849243, |
| "learning_rate": 2.5812546749492163e-06, |
| "loss": 0.4494, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.6951152073732719, |
| "grad_norm": 0.3460019528865814, |
| "learning_rate": 2.5728094563859495e-06, |
| "loss": 0.4726, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.695668202764977, |
| "grad_norm": 0.328002005815506, |
| "learning_rate": 2.5643732874016903e-06, |
| "loss": 0.4437, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.696221198156682, |
| "grad_norm": 0.40374118089675903, |
| "learning_rate": 2.5559461994499968e-06, |
| "loss": 0.4766, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.6967741935483871, |
| "grad_norm": 0.3571721017360687, |
| "learning_rate": 2.5475282239505685e-06, |
| "loss": 0.459, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.6973271889400922, |
| "grad_norm": 0.42431801557540894, |
| "learning_rate": 2.5391193922891288e-06, |
| "loss": 0.4641, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.6978801843317972, |
| "grad_norm": 0.35599109530448914, |
| "learning_rate": 2.5307197358173126e-06, |
| "loss": 0.4601, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.6984331797235023, |
| "grad_norm": 0.3282775282859802, |
| "learning_rate": 2.5223292858525423e-06, |
| "loss": 0.4352, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.6989861751152073, |
| "grad_norm": 0.3075070381164551, |
| "learning_rate": 2.5139480736779164e-06, |
| "loss": 0.4373, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.6995391705069124, |
| "grad_norm": 0.3571772277355194, |
| "learning_rate": 2.5055761305420907e-06, |
| "loss": 0.4938, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.7000921658986176, |
| "grad_norm": 0.3431309461593628, |
| "learning_rate": 2.4972134876591618e-06, |
| "loss": 0.4413, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.7006451612903226, |
| "grad_norm": 0.3382079601287842, |
| "learning_rate": 2.4888601762085518e-06, |
| "loss": 0.502, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.7011981566820277, |
| "grad_norm": 0.31066352128982544, |
| "learning_rate": 2.4805162273348905e-06, |
| "loss": 0.4596, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.7017511520737327, |
| "grad_norm": 0.31966033577919006, |
| "learning_rate": 2.4721816721479007e-06, |
| "loss": 0.4769, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.7023041474654378, |
| "grad_norm": 0.3357180058956146, |
| "learning_rate": 2.4638565417222816e-06, |
| "loss": 0.437, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7028571428571428, |
| "grad_norm": 0.335115522146225, |
| "learning_rate": 2.4555408670975928e-06, |
| "loss": 0.4498, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.7034101382488479, |
| "grad_norm": 0.33428287506103516, |
| "learning_rate": 2.4472346792781366e-06, |
| "loss": 0.4661, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.703963133640553, |
| "grad_norm": 0.3672966957092285, |
| "learning_rate": 2.438938009232851e-06, |
| "loss": 0.4775, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.704516129032258, |
| "grad_norm": 0.3439885079860687, |
| "learning_rate": 2.4306508878951805e-06, |
| "loss": 0.4611, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.7050691244239631, |
| "grad_norm": 0.3395749628543854, |
| "learning_rate": 2.4223733461629716e-06, |
| "loss": 0.4498, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.7056221198156682, |
| "grad_norm": 0.35915130376815796, |
| "learning_rate": 2.4141054148983532e-06, |
| "loss": 0.4767, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.7061751152073733, |
| "grad_norm": 0.393310546875, |
| "learning_rate": 2.4058471249276232e-06, |
| "loss": 0.4594, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.7067281105990784, |
| "grad_norm": 0.3339405357837677, |
| "learning_rate": 2.397598507041132e-06, |
| "loss": 0.4732, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.7072811059907834, |
| "grad_norm": 0.35175976157188416, |
| "learning_rate": 2.3893595919931673e-06, |
| "loss": 0.4608, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.7078341013824885, |
| "grad_norm": 0.33776116371154785, |
| "learning_rate": 2.381130410501845e-06, |
| "loss": 0.4568, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7083870967741935, |
| "grad_norm": 0.34300360083580017, |
| "learning_rate": 2.3729109932489825e-06, |
| "loss": 0.4802, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.7089400921658986, |
| "grad_norm": 0.36740466952323914, |
| "learning_rate": 2.3647013708799987e-06, |
| "loss": 0.4755, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.7094930875576037, |
| "grad_norm": 0.3558812439441681, |
| "learning_rate": 2.3565015740037915e-06, |
| "loss": 0.4623, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.7100460829493087, |
| "grad_norm": 0.4075135886669159, |
| "learning_rate": 2.3483116331926244e-06, |
| "loss": 0.4612, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.7105990783410139, |
| "grad_norm": 0.3450377285480499, |
| "learning_rate": 2.340131578982013e-06, |
| "loss": 0.4695, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.7111520737327189, |
| "grad_norm": 0.3357861042022705, |
| "learning_rate": 2.3319614418706127e-06, |
| "loss": 0.4826, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.711705069124424, |
| "grad_norm": 0.3738630712032318, |
| "learning_rate": 2.3238012523201035e-06, |
| "loss": 0.4905, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.712258064516129, |
| "grad_norm": 0.3660827577114105, |
| "learning_rate": 2.315651040755077e-06, |
| "loss": 0.4754, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.7128110599078341, |
| "grad_norm": 0.3476543426513672, |
| "learning_rate": 2.3075108375629218e-06, |
| "loss": 0.4728, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.7133640552995392, |
| "grad_norm": 0.33124610781669617, |
| "learning_rate": 2.299380673093712e-06, |
| "loss": 0.474, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7139170506912442, |
| "grad_norm": 0.33316168189048767, |
| "learning_rate": 2.291260577660092e-06, |
| "loss": 0.4853, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.7144700460829493, |
| "grad_norm": 0.3274078369140625, |
| "learning_rate": 2.2831505815371656e-06, |
| "loss": 0.4571, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.7150230414746543, |
| "grad_norm": 0.3307998776435852, |
| "learning_rate": 2.275050714962383e-06, |
| "loss": 0.4498, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.7155760368663594, |
| "grad_norm": 0.3073872923851013, |
| "learning_rate": 2.2669610081354254e-06, |
| "loss": 0.4661, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.7161290322580646, |
| "grad_norm": 0.37436577677726746, |
| "learning_rate": 2.2588814912180944e-06, |
| "loss": 0.4746, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.7166820276497696, |
| "grad_norm": 0.3833792507648468, |
| "learning_rate": 2.2508121943342008e-06, |
| "loss": 0.4502, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.7172350230414747, |
| "grad_norm": 0.3415040075778961, |
| "learning_rate": 2.2427531475694496e-06, |
| "loss": 0.4652, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.7177880184331797, |
| "grad_norm": 0.3346996605396271, |
| "learning_rate": 2.2347043809713306e-06, |
| "loss": 0.4474, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.7183410138248848, |
| "grad_norm": 0.34259018301963806, |
| "learning_rate": 2.2266659245490017e-06, |
| "loss": 0.4832, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.7188940092165899, |
| "grad_norm": 0.3417535424232483, |
| "learning_rate": 2.218637808273184e-06, |
| "loss": 0.4674, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7194470046082949, |
| "grad_norm": 0.37401384115219116, |
| "learning_rate": 2.210620062076044e-06, |
| "loss": 0.4859, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.33238619565963745, |
| "learning_rate": 2.2026127158510843e-06, |
| "loss": 0.4441, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.720552995391705, |
| "grad_norm": 0.32708191871643066, |
| "learning_rate": 2.194615799453032e-06, |
| "loss": 0.4793, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.7211059907834101, |
| "grad_norm": 0.30514681339263916, |
| "learning_rate": 2.186629342697727e-06, |
| "loss": 0.4409, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.7216589861751153, |
| "grad_norm": 0.35654860734939575, |
| "learning_rate": 2.1786533753620155e-06, |
| "loss": 0.5136, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.7222119815668203, |
| "grad_norm": 0.3565172255039215, |
| "learning_rate": 2.170687927183629e-06, |
| "loss": 0.4683, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.7227649769585254, |
| "grad_norm": 0.3977636396884918, |
| "learning_rate": 2.1627330278610824e-06, |
| "loss": 0.4781, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.7233179723502304, |
| "grad_norm": 0.37379372119903564, |
| "learning_rate": 2.154788707053559e-06, |
| "loss": 0.4713, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.7238709677419355, |
| "grad_norm": 0.3560510575771332, |
| "learning_rate": 2.1468549943808033e-06, |
| "loss": 0.4725, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.7244239631336405, |
| "grad_norm": 0.3701375424861908, |
| "learning_rate": 2.1389319194230017e-06, |
| "loss": 0.4887, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7249769585253456, |
| "grad_norm": 0.35575488209724426, |
| "learning_rate": 2.1310195117206863e-06, |
| "loss": 0.4628, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.7255299539170507, |
| "grad_norm": 0.34897711873054504, |
| "learning_rate": 2.1231178007746136e-06, |
| "loss": 0.443, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.7260829493087557, |
| "grad_norm": 0.36126595735549927, |
| "learning_rate": 2.1152268160456584e-06, |
| "loss": 0.4622, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.7266359447004608, |
| "grad_norm": 0.3314565420150757, |
| "learning_rate": 2.1073465869547043e-06, |
| "loss": 0.4418, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.727188940092166, |
| "grad_norm": 0.32749220728874207, |
| "learning_rate": 2.0994771428825332e-06, |
| "loss": 0.4325, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.727741935483871, |
| "grad_norm": 0.39038175344467163, |
| "learning_rate": 2.0916185131697163e-06, |
| "loss": 0.4928, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.7282949308755761, |
| "grad_norm": 0.3242311477661133, |
| "learning_rate": 2.083770727116503e-06, |
| "loss": 0.4708, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.7288479262672811, |
| "grad_norm": 0.3813181221485138, |
| "learning_rate": 2.0759338139827145e-06, |
| "loss": 0.4959, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.7294009216589862, |
| "grad_norm": 0.31590506434440613, |
| "learning_rate": 2.0681078029876322e-06, |
| "loss": 0.451, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.7299539170506912, |
| "grad_norm": 0.3604491651058197, |
| "learning_rate": 2.0602927233098908e-06, |
| "loss": 0.4817, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7305069124423963, |
| "grad_norm": 0.3593979775905609, |
| "learning_rate": 2.0524886040873676e-06, |
| "loss": 0.4707, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.7310599078341014, |
| "grad_norm": 0.38674402236938477, |
| "learning_rate": 2.0446954744170748e-06, |
| "loss": 0.4522, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.7316129032258064, |
| "grad_norm": 0.3926672637462616, |
| "learning_rate": 2.036913363355052e-06, |
| "loss": 0.4582, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.7321658986175115, |
| "grad_norm": 0.3277740776538849, |
| "learning_rate": 2.029142299916255e-06, |
| "loss": 0.4913, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.7327188940092166, |
| "grad_norm": 0.3636924922466278, |
| "learning_rate": 2.0213823130744516e-06, |
| "loss": 0.4624, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.7332718894009217, |
| "grad_norm": 0.36329200863838196, |
| "learning_rate": 2.0136334317621093e-06, |
| "loss": 0.4551, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.7338248847926268, |
| "grad_norm": 0.3718715012073517, |
| "learning_rate": 2.005895684870291e-06, |
| "loss": 0.4717, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.7343778801843318, |
| "grad_norm": 0.3874320685863495, |
| "learning_rate": 1.9981691012485455e-06, |
| "loss": 0.4757, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.7349308755760369, |
| "grad_norm": 0.29239824414253235, |
| "learning_rate": 1.9904537097048004e-06, |
| "loss": 0.4554, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.7354838709677419, |
| "grad_norm": 0.3096185326576233, |
| "learning_rate": 1.982749539005254e-06, |
| "loss": 0.4559, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.736036866359447, |
| "grad_norm": 0.34961435198783875, |
| "learning_rate": 1.9750566178742697e-06, |
| "loss": 0.4376, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.736589861751152, |
| "grad_norm": 0.36215296387672424, |
| "learning_rate": 1.9673749749942655e-06, |
| "loss": 0.4544, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.7371428571428571, |
| "grad_norm": 0.3274637758731842, |
| "learning_rate": 1.959704639005613e-06, |
| "loss": 0.4468, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.7376958525345622, |
| "grad_norm": 0.324142187833786, |
| "learning_rate": 1.952045638506523e-06, |
| "loss": 0.4775, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.7382488479262673, |
| "grad_norm": 0.30846402049064636, |
| "learning_rate": 1.9443980020529456e-06, |
| "loss": 0.4507, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.7388018433179724, |
| "grad_norm": 0.36270537972450256, |
| "learning_rate": 1.9367617581584606e-06, |
| "loss": 0.4526, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.7393548387096774, |
| "grad_norm": 0.35891664028167725, |
| "learning_rate": 1.9291369352941696e-06, |
| "loss": 0.4606, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.7399078341013825, |
| "grad_norm": 0.3416483700275421, |
| "learning_rate": 1.9215235618885964e-06, |
| "loss": 0.4976, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.7404608294930876, |
| "grad_norm": 0.30863887071609497, |
| "learning_rate": 1.9139216663275727e-06, |
| "loss": 0.44, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.7410138248847926, |
| "grad_norm": 0.313449501991272, |
| "learning_rate": 1.9063312769541348e-06, |
| "loss": 0.4643, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7415668202764977, |
| "grad_norm": 0.35145795345306396, |
| "learning_rate": 1.8987524220684222e-06, |
| "loss": 0.4689, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.7421198156682027, |
| "grad_norm": 0.34510427713394165, |
| "learning_rate": 1.8911851299275675e-06, |
| "loss": 0.498, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.7426728110599078, |
| "grad_norm": 0.3454337418079376, |
| "learning_rate": 1.8836294287455936e-06, |
| "loss": 0.4488, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.743225806451613, |
| "grad_norm": 0.3266771733760834, |
| "learning_rate": 1.8760853466933072e-06, |
| "loss": 0.4855, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.743778801843318, |
| "grad_norm": 0.3165615200996399, |
| "learning_rate": 1.8685529118981926e-06, |
| "loss": 0.4556, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.7443317972350231, |
| "grad_norm": 0.33930885791778564, |
| "learning_rate": 1.8610321524443099e-06, |
| "loss": 0.4401, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.7448847926267281, |
| "grad_norm": 0.35839566588401794, |
| "learning_rate": 1.8535230963721884e-06, |
| "loss": 0.4965, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.7454377880184332, |
| "grad_norm": 0.33815956115722656, |
| "learning_rate": 1.8460257716787216e-06, |
| "loss": 0.4868, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.7459907834101382, |
| "grad_norm": 0.34670326113700867, |
| "learning_rate": 1.8385402063170643e-06, |
| "loss": 0.4696, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.7465437788018433, |
| "grad_norm": 0.32649490237236023, |
| "learning_rate": 1.8310664281965268e-06, |
| "loss": 0.4951, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7470967741935484, |
| "grad_norm": 0.32630065083503723, |
| "learning_rate": 1.8236044651824725e-06, |
| "loss": 0.4733, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.7476497695852534, |
| "grad_norm": 0.2883811593055725, |
| "learning_rate": 1.8161543450962127e-06, |
| "loss": 0.4637, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.7482027649769585, |
| "grad_norm": 0.34161558747291565, |
| "learning_rate": 1.8087160957149036e-06, |
| "loss": 0.4629, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.7487557603686636, |
| "grad_norm": 0.3212891221046448, |
| "learning_rate": 1.8012897447714417e-06, |
| "loss": 0.4703, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.7493087557603687, |
| "grad_norm": 0.3223973214626312, |
| "learning_rate": 1.7938753199543618e-06, |
| "loss": 0.4536, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.7498617511520738, |
| "grad_norm": 0.3456159830093384, |
| "learning_rate": 1.7864728489077332e-06, |
| "loss": 0.4671, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.7504147465437788, |
| "grad_norm": 0.35950833559036255, |
| "learning_rate": 1.779082359231057e-06, |
| "loss": 0.4674, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.7509677419354839, |
| "grad_norm": 0.33876293897628784, |
| "learning_rate": 1.7717038784791612e-06, |
| "loss": 0.4614, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.7515207373271889, |
| "grad_norm": 0.34674006700515747, |
| "learning_rate": 1.7643374341621006e-06, |
| "loss": 0.4434, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.752073732718894, |
| "grad_norm": 0.3747800290584564, |
| "learning_rate": 1.7569830537450533e-06, |
| "loss": 0.4607, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.752626728110599, |
| "grad_norm": 0.3502337336540222, |
| "learning_rate": 1.7496407646482182e-06, |
| "loss": 0.471, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.7531797235023041, |
| "grad_norm": 0.3303203582763672, |
| "learning_rate": 1.7423105942467116e-06, |
| "loss": 0.4801, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.7537327188940092, |
| "grad_norm": 0.3284555673599243, |
| "learning_rate": 1.7349925698704673e-06, |
| "loss": 0.4742, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.7542857142857143, |
| "grad_norm": 0.3339201509952545, |
| "learning_rate": 1.7276867188041336e-06, |
| "loss": 0.4588, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.7548387096774194, |
| "grad_norm": 0.35038480162620544, |
| "learning_rate": 1.7203930682869707e-06, |
| "loss": 0.4921, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.7553917050691245, |
| "grad_norm": 0.3145638406276703, |
| "learning_rate": 1.7131116455127518e-06, |
| "loss": 0.428, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.7559447004608295, |
| "grad_norm": 0.34270545840263367, |
| "learning_rate": 1.7058424776296583e-06, |
| "loss": 0.4599, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.7564976958525346, |
| "grad_norm": 0.33915436267852783, |
| "learning_rate": 1.698585591740181e-06, |
| "loss": 0.4713, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.7570506912442396, |
| "grad_norm": 0.3200076222419739, |
| "learning_rate": 1.6913410149010179e-06, |
| "loss": 0.4469, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.7576036866359447, |
| "grad_norm": 0.31938573718070984, |
| "learning_rate": 1.6841087741229745e-06, |
| "loss": 0.4563, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7581566820276497, |
| "grad_norm": 0.3671538233757019, |
| "learning_rate": 1.6768888963708612e-06, |
| "loss": 0.471, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.7587096774193548, |
| "grad_norm": 0.33220696449279785, |
| "learning_rate": 1.669681408563395e-06, |
| "loss": 0.444, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.7592626728110599, |
| "grad_norm": 0.3261774182319641, |
| "learning_rate": 1.6624863375730977e-06, |
| "loss": 0.4775, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.759815668202765, |
| "grad_norm": 0.31591543555259705, |
| "learning_rate": 1.6553037102261955e-06, |
| "loss": 0.4763, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.7603686635944701, |
| "grad_norm": 0.345234215259552, |
| "learning_rate": 1.6481335533025195e-06, |
| "loss": 0.443, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.7609216589861751, |
| "grad_norm": 0.3390832245349884, |
| "learning_rate": 1.6409758935354065e-06, |
| "loss": 0.4622, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.7614746543778802, |
| "grad_norm": 0.34640833735466003, |
| "learning_rate": 1.6338307576115987e-06, |
| "loss": 0.4606, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.7620276497695853, |
| "grad_norm": 0.3271695375442505, |
| "learning_rate": 1.6266981721711438e-06, |
| "loss": 0.4354, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.7625806451612903, |
| "grad_norm": 0.34254854917526245, |
| "learning_rate": 1.619578163807296e-06, |
| "loss": 0.4601, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.7631336405529954, |
| "grad_norm": 0.3350234925746918, |
| "learning_rate": 1.6124707590664168e-06, |
| "loss": 0.4652, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7636866359447004, |
| "grad_norm": 0.3148898780345917, |
| "learning_rate": 1.6053759844478768e-06, |
| "loss": 0.4702, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.7642396313364055, |
| "grad_norm": 0.3196204602718353, |
| "learning_rate": 1.5982938664039555e-06, |
| "loss": 0.464, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.7647926267281105, |
| "grad_norm": 0.3282592296600342, |
| "learning_rate": 1.591224431339744e-06, |
| "loss": 0.4631, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.7653456221198157, |
| "grad_norm": 0.34297293424606323, |
| "learning_rate": 1.584167705613046e-06, |
| "loss": 0.4494, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.7658986175115208, |
| "grad_norm": 0.34436681866645813, |
| "learning_rate": 1.5771237155342784e-06, |
| "loss": 0.4501, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.7664516129032258, |
| "grad_norm": 0.2950928807258606, |
| "learning_rate": 1.5700924873663758e-06, |
| "loss": 0.4493, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.7670046082949309, |
| "grad_norm": 0.3339753746986389, |
| "learning_rate": 1.5630740473246896e-06, |
| "loss": 0.4793, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.767557603686636, |
| "grad_norm": 0.3539128005504608, |
| "learning_rate": 1.5560684215768935e-06, |
| "loss": 0.4459, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.768110599078341, |
| "grad_norm": 0.35491037368774414, |
| "learning_rate": 1.549075636242882e-06, |
| "loss": 0.4814, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.7686635944700461, |
| "grad_norm": 0.3396543562412262, |
| "learning_rate": 1.5420957173946772e-06, |
| "loss": 0.4526, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7692165898617511, |
| "grad_norm": 0.35023194551467896, |
| "learning_rate": 1.5351286910563278e-06, |
| "loss": 0.432, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.7697695852534562, |
| "grad_norm": 0.3362998366355896, |
| "learning_rate": 1.5281745832038159e-06, |
| "loss": 0.4518, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.7703225806451612, |
| "grad_norm": 0.3237576186656952, |
| "learning_rate": 1.5212334197649564e-06, |
| "loss": 0.4732, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.7708755760368664, |
| "grad_norm": 0.3349010646343231, |
| "learning_rate": 1.514305226619302e-06, |
| "loss": 0.4793, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.7714285714285715, |
| "grad_norm": 0.3398427367210388, |
| "learning_rate": 1.5073900295980481e-06, |
| "loss": 0.471, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.7719815668202765, |
| "grad_norm": 0.33397191762924194, |
| "learning_rate": 1.5004878544839335e-06, |
| "loss": 0.4398, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.7725345622119816, |
| "grad_norm": 0.3291890025138855, |
| "learning_rate": 1.493598727011148e-06, |
| "loss": 0.4632, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.7730875576036866, |
| "grad_norm": 0.33560484647750854, |
| "learning_rate": 1.4867226728652318e-06, |
| "loss": 0.4661, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.7736405529953917, |
| "grad_norm": 0.32114970684051514, |
| "learning_rate": 1.4798597176829844e-06, |
| "loss": 0.4558, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.7741935483870968, |
| "grad_norm": 0.29689663648605347, |
| "learning_rate": 1.4730098870523652e-06, |
| "loss": 0.4567, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7747465437788018, |
| "grad_norm": 0.33287614583969116, |
| "learning_rate": 1.4661732065124012e-06, |
| "loss": 0.4458, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.7752995391705069, |
| "grad_norm": 0.32061877846717834, |
| "learning_rate": 1.45934970155309e-06, |
| "loss": 0.4482, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.7758525345622119, |
| "grad_norm": 0.32134944200515747, |
| "learning_rate": 1.4525393976153046e-06, |
| "loss": 0.4669, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.7764055299539171, |
| "grad_norm": 0.31328532099723816, |
| "learning_rate": 1.4457423200906994e-06, |
| "loss": 0.474, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.7769585253456222, |
| "grad_norm": 0.31957975029945374, |
| "learning_rate": 1.4389584943216156e-06, |
| "loss": 0.4478, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.7775115207373272, |
| "grad_norm": 0.318330317735672, |
| "learning_rate": 1.4321879456009858e-06, |
| "loss": 0.4738, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.7780645161290323, |
| "grad_norm": 0.3517070710659027, |
| "learning_rate": 1.4254306991722406e-06, |
| "loss": 0.49, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.7786175115207373, |
| "grad_norm": 0.3362846374511719, |
| "learning_rate": 1.4186867802292132e-06, |
| "loss": 0.4756, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.7791705069124424, |
| "grad_norm": 0.32801997661590576, |
| "learning_rate": 1.411956213916048e-06, |
| "loss": 0.4745, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.7797235023041474, |
| "grad_norm": 0.32685017585754395, |
| "learning_rate": 1.4052390253271037e-06, |
| "loss": 0.4618, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.7802764976958525, |
| "grad_norm": 0.32221168279647827, |
| "learning_rate": 1.3985352395068618e-06, |
| "loss": 0.4373, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.7808294930875576, |
| "grad_norm": 0.3476710915565491, |
| "learning_rate": 1.3918448814498336e-06, |
| "loss": 0.4463, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.7813824884792627, |
| "grad_norm": 0.3333290219306946, |
| "learning_rate": 1.3851679761004644e-06, |
| "loss": 0.4592, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.7819354838709678, |
| "grad_norm": 0.32516762614250183, |
| "learning_rate": 1.3785045483530435e-06, |
| "loss": 0.476, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.7824884792626728, |
| "grad_norm": 0.3284921944141388, |
| "learning_rate": 1.3718546230516095e-06, |
| "loss": 0.4669, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.7830414746543779, |
| "grad_norm": 0.3187192976474762, |
| "learning_rate": 1.3652182249898583e-06, |
| "loss": 0.478, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.783594470046083, |
| "grad_norm": 0.3000369369983673, |
| "learning_rate": 1.3585953789110506e-06, |
| "loss": 0.4763, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.784147465437788, |
| "grad_norm": 0.3406629264354706, |
| "learning_rate": 1.3519861095079202e-06, |
| "loss": 0.451, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.7847004608294931, |
| "grad_norm": 0.3349382281303406, |
| "learning_rate": 1.3453904414225799e-06, |
| "loss": 0.4573, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.7852534562211981, |
| "grad_norm": 0.3069670796394348, |
| "learning_rate": 1.3388083992464335e-06, |
| "loss": 0.4739, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.7858064516129032, |
| "grad_norm": 0.31198862195014954, |
| "learning_rate": 1.3322400075200792e-06, |
| "loss": 0.454, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.7863594470046082, |
| "grad_norm": 0.30845198035240173, |
| "learning_rate": 1.325685290733223e-06, |
| "loss": 0.4498, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.7869124423963134, |
| "grad_norm": 0.31618401408195496, |
| "learning_rate": 1.3191442733245824e-06, |
| "loss": 0.491, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.7874654377880185, |
| "grad_norm": 0.356711208820343, |
| "learning_rate": 1.3126169796818001e-06, |
| "loss": 0.4422, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.7880184331797235, |
| "grad_norm": 0.35151243209838867, |
| "learning_rate": 1.3061034341413497e-06, |
| "loss": 0.469, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.7885714285714286, |
| "grad_norm": 0.30518248677253723, |
| "learning_rate": 1.2996036609884478e-06, |
| "loss": 0.4778, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.7891244239631336, |
| "grad_norm": 0.30732637643814087, |
| "learning_rate": 1.2931176844569588e-06, |
| "loss": 0.4627, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.7896774193548387, |
| "grad_norm": 0.3301125466823578, |
| "learning_rate": 1.2866455287293094e-06, |
| "loss": 0.4711, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.7902304147465438, |
| "grad_norm": 0.3300682008266449, |
| "learning_rate": 1.2801872179363978e-06, |
| "loss": 0.497, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.7907834101382488, |
| "grad_norm": 0.3206273019313812, |
| "learning_rate": 1.2737427761575006e-06, |
| "loss": 0.4593, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.7913364055299539, |
| "grad_norm": 0.35528701543807983, |
| "learning_rate": 1.2673122274201844e-06, |
| "loss": 0.452, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.7918894009216589, |
| "grad_norm": 0.3094896972179413, |
| "learning_rate": 1.2608955957002196e-06, |
| "loss": 0.4426, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.7924423963133641, |
| "grad_norm": 0.3296172618865967, |
| "learning_rate": 1.2544929049214843e-06, |
| "loss": 0.4751, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.7929953917050692, |
| "grad_norm": 0.3449612855911255, |
| "learning_rate": 1.248104178955883e-06, |
| "loss": 0.473, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.7935483870967742, |
| "grad_norm": 0.30491313338279724, |
| "learning_rate": 1.2417294416232505e-06, |
| "loss": 0.447, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.7941013824884793, |
| "grad_norm": 0.32028716802597046, |
| "learning_rate": 1.235368716691267e-06, |
| "loss": 0.4358, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.7946543778801843, |
| "grad_norm": 0.3335442543029785, |
| "learning_rate": 1.22902202787537e-06, |
| "loss": 0.476, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.7952073732718894, |
| "grad_norm": 0.3434390723705292, |
| "learning_rate": 1.2226893988386618e-06, |
| "loss": 0.4447, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.7957603686635945, |
| "grad_norm": 0.3305012881755829, |
| "learning_rate": 1.2163708531918267e-06, |
| "loss": 0.4644, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.7963133640552995, |
| "grad_norm": 0.32044875621795654, |
| "learning_rate": 1.210066414493039e-06, |
| "loss": 0.4525, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.7968663594470046, |
| "grad_norm": 0.3219109773635864, |
| "learning_rate": 1.2037761062478759e-06, |
| "loss": 0.4614, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.7974193548387096, |
| "grad_norm": 0.3471842408180237, |
| "learning_rate": 1.1974999519092311e-06, |
| "loss": 0.4541, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.7979723502304148, |
| "grad_norm": 0.2974897623062134, |
| "learning_rate": 1.1912379748772267e-06, |
| "loss": 0.4344, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.7985253456221199, |
| "grad_norm": 0.31679412722587585, |
| "learning_rate": 1.1849901984991253e-06, |
| "loss": 0.456, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.7990783410138249, |
| "grad_norm": 0.30493679642677307, |
| "learning_rate": 1.1787566460692446e-06, |
| "loss": 0.4674, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.79963133640553, |
| "grad_norm": 0.3283561170101166, |
| "learning_rate": 1.1725373408288682e-06, |
| "loss": 0.4621, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.800184331797235, |
| "grad_norm": 0.32548418641090393, |
| "learning_rate": 1.1663323059661609e-06, |
| "loss": 0.4422, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.8007373271889401, |
| "grad_norm": 0.29554906487464905, |
| "learning_rate": 1.1601415646160813e-06, |
| "loss": 0.4535, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.8012903225806451, |
| "grad_norm": 0.31748977303504944, |
| "learning_rate": 1.153965139860297e-06, |
| "loss": 0.4579, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.8018433179723502, |
| "grad_norm": 0.30266064405441284, |
| "learning_rate": 1.147803054727095e-06, |
| "loss": 0.446, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8023963133640553, |
| "grad_norm": 0.32854562997817993, |
| "learning_rate": 1.1416553321913009e-06, |
| "loss": 0.4632, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.8029493087557603, |
| "grad_norm": 0.29594528675079346, |
| "learning_rate": 1.1355219951741881e-06, |
| "loss": 0.4704, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.8035023041474655, |
| "grad_norm": 0.2916601300239563, |
| "learning_rate": 1.1294030665433969e-06, |
| "loss": 0.4287, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.8040552995391705, |
| "grad_norm": 0.31938451528549194, |
| "learning_rate": 1.1232985691128457e-06, |
| "loss": 0.4242, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.8046082949308756, |
| "grad_norm": 0.3538084030151367, |
| "learning_rate": 1.1172085256426473e-06, |
| "loss": 0.4705, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.8051612903225807, |
| "grad_norm": 0.3275561034679413, |
| "learning_rate": 1.1111329588390253e-06, |
| "loss": 0.4745, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.8057142857142857, |
| "grad_norm": 0.30866336822509766, |
| "learning_rate": 1.1050718913542275e-06, |
| "loss": 0.453, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.8062672811059908, |
| "grad_norm": 0.3329750597476959, |
| "learning_rate": 1.0990253457864418e-06, |
| "loss": 0.4598, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.8068202764976958, |
| "grad_norm": 0.32834047079086304, |
| "learning_rate": 1.0929933446797136e-06, |
| "loss": 0.4649, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.8073732718894009, |
| "grad_norm": 0.3380810022354126, |
| "learning_rate": 1.0869759105238592e-06, |
| "loss": 0.4605, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.807926267281106, |
| "grad_norm": 0.3055575489997864, |
| "learning_rate": 1.0809730657543838e-06, |
| "loss": 0.4852, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.808479262672811, |
| "grad_norm": 0.30661699175834656, |
| "learning_rate": 1.0749848327523966e-06, |
| "loss": 0.4824, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.8090322580645162, |
| "grad_norm": 0.32859787344932556, |
| "learning_rate": 1.0690112338445292e-06, |
| "loss": 0.464, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.8095852534562212, |
| "grad_norm": 0.3309606611728668, |
| "learning_rate": 1.0630522913028508e-06, |
| "loss": 0.4585, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.8101382488479263, |
| "grad_norm": 0.33870929479599, |
| "learning_rate": 1.0571080273447858e-06, |
| "loss": 0.476, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.8106912442396313, |
| "grad_norm": 0.33712446689605713, |
| "learning_rate": 1.0511784641330286e-06, |
| "loss": 0.4476, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.8112442396313364, |
| "grad_norm": 0.2965930700302124, |
| "learning_rate": 1.0452636237754642e-06, |
| "loss": 0.4766, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.8117972350230415, |
| "grad_norm": 0.3398984372615814, |
| "learning_rate": 1.0393635283250885e-06, |
| "loss": 0.4716, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.8123502304147465, |
| "grad_norm": 0.3270639181137085, |
| "learning_rate": 1.0334781997799165e-06, |
| "loss": 0.4556, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.8129032258064516, |
| "grad_norm": 0.3468005955219269, |
| "learning_rate": 1.0276076600829094e-06, |
| "loss": 0.4725, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8134562211981566, |
| "grad_norm": 0.31104719638824463, |
| "learning_rate": 1.0217519311218882e-06, |
| "loss": 0.4735, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.8140092165898618, |
| "grad_norm": 0.30012935400009155, |
| "learning_rate": 1.0159110347294536e-06, |
| "loss": 0.4624, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.8145622119815669, |
| "grad_norm": 0.3302370309829712, |
| "learning_rate": 1.0100849926829043e-06, |
| "loss": 0.4755, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.8151152073732719, |
| "grad_norm": 0.3273729681968689, |
| "learning_rate": 1.0042738267041552e-06, |
| "loss": 0.4558, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.815668202764977, |
| "grad_norm": 0.313931405544281, |
| "learning_rate": 9.984775584596585e-07, |
| "loss": 0.4778, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.816221198156682, |
| "grad_norm": 0.31616419553756714, |
| "learning_rate": 9.926962095603199e-07, |
| "loss": 0.4358, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.8167741935483871, |
| "grad_norm": 0.33137091994285583, |
| "learning_rate": 9.869298015614198e-07, |
| "loss": 0.4558, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.8173271889400922, |
| "grad_norm": 0.3267715573310852, |
| "learning_rate": 9.811783559625343e-07, |
| "loss": 0.475, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.8178801843317972, |
| "grad_norm": 0.37252572178840637, |
| "learning_rate": 9.754418942074512e-07, |
| "loss": 0.4829, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.8184331797235023, |
| "grad_norm": 0.3060474395751953, |
| "learning_rate": 9.697204376840936e-07, |
| "loss": 0.4627, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8189861751152073, |
| "grad_norm": 0.32007962465286255, |
| "learning_rate": 9.640140077244382e-07, |
| "loss": 0.4698, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.8195391705069125, |
| "grad_norm": 0.2968813478946686, |
| "learning_rate": 9.58322625604437e-07, |
| "loss": 0.4316, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.8200921658986176, |
| "grad_norm": 0.31622204184532166, |
| "learning_rate": 9.526463125439372e-07, |
| "loss": 0.4408, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.8206451612903226, |
| "grad_norm": 0.2871773838996887, |
| "learning_rate": 9.469850897066014e-07, |
| "loss": 0.4563, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.8211981566820277, |
| "grad_norm": 0.30365270376205444, |
| "learning_rate": 9.413389781998311e-07, |
| "loss": 0.4519, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.8217511520737327, |
| "grad_norm": 0.3301312029361725, |
| "learning_rate": 9.357079990746853e-07, |
| "loss": 0.4543, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.8223041474654378, |
| "grad_norm": 0.3503333628177643, |
| "learning_rate": 9.300921733258039e-07, |
| "loss": 0.4753, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.8228571428571428, |
| "grad_norm": 0.30572423338890076, |
| "learning_rate": 9.244915218913275e-07, |
| "loss": 0.4694, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.8234101382488479, |
| "grad_norm": 0.30958428978919983, |
| "learning_rate": 9.18906065652822e-07, |
| "loss": 0.4544, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.823963133640553, |
| "grad_norm": 0.3019687831401825, |
| "learning_rate": 9.133358254351982e-07, |
| "loss": 0.4751, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.824516129032258, |
| "grad_norm": 0.3120213747024536, |
| "learning_rate": 9.077808220066359e-07, |
| "loss": 0.4586, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.8250691244239632, |
| "grad_norm": 0.3264663815498352, |
| "learning_rate": 9.022410760785055e-07, |
| "loss": 0.4569, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.8256221198156682, |
| "grad_norm": 0.3079594075679779, |
| "learning_rate": 8.967166083052908e-07, |
| "loss": 0.5003, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.8261751152073733, |
| "grad_norm": 0.30771124362945557, |
| "learning_rate": 8.912074392845144e-07, |
| "loss": 0.4654, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.8267281105990784, |
| "grad_norm": 0.31957051157951355, |
| "learning_rate": 8.857135895566538e-07, |
| "loss": 0.4722, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.8272811059907834, |
| "grad_norm": 0.30836620926856995, |
| "learning_rate": 8.80235079605074e-07, |
| "loss": 0.4557, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.8278341013824885, |
| "grad_norm": 0.30665266513824463, |
| "learning_rate": 8.747719298559465e-07, |
| "loss": 0.4516, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.8283870967741935, |
| "grad_norm": 0.33325862884521484, |
| "learning_rate": 8.693241606781728e-07, |
| "loss": 0.4667, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.8289400921658986, |
| "grad_norm": 0.3295386731624603, |
| "learning_rate": 8.638917923833074e-07, |
| "loss": 0.4602, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.8294930875576036, |
| "grad_norm": 0.3496408462524414, |
| "learning_rate": 8.584748452254888e-07, |
| "loss": 0.4601, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8300460829493087, |
| "grad_norm": 0.2922409474849701, |
| "learning_rate": 8.530733394013546e-07, |
| "loss": 0.4787, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.8305990783410139, |
| "grad_norm": 0.3221864104270935, |
| "learning_rate": 8.476872950499726e-07, |
| "loss": 0.4401, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.8311520737327189, |
| "grad_norm": 0.33632180094718933, |
| "learning_rate": 8.423167322527626e-07, |
| "loss": 0.4636, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.831705069124424, |
| "grad_norm": 0.3501831293106079, |
| "learning_rate": 8.369616710334233e-07, |
| "loss": 0.4573, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.832258064516129, |
| "grad_norm": 0.31366902589797974, |
| "learning_rate": 8.316221313578576e-07, |
| "loss": 0.4583, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.8328110599078341, |
| "grad_norm": 0.36373165249824524, |
| "learning_rate": 8.262981331340969e-07, |
| "loss": 0.4633, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.8333640552995392, |
| "grad_norm": 0.31518691778182983, |
| "learning_rate": 8.209896962122282e-07, |
| "loss": 0.4599, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.8339170506912442, |
| "grad_norm": 0.3142349123954773, |
| "learning_rate": 8.15696840384319e-07, |
| "loss": 0.465, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.8344700460829493, |
| "grad_norm": 0.3050268292427063, |
| "learning_rate": 8.104195853843433e-07, |
| "loss": 0.4617, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.8350230414746543, |
| "grad_norm": 0.3328079879283905, |
| "learning_rate": 8.051579508881107e-07, |
| "loss": 0.4558, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.8355760368663594, |
| "grad_norm": 0.3367290496826172, |
| "learning_rate": 7.999119565131891e-07, |
| "loss": 0.4662, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.8361290322580646, |
| "grad_norm": 0.2858041226863861, |
| "learning_rate": 7.946816218188347e-07, |
| "loss": 0.4645, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.8366820276497696, |
| "grad_norm": 0.3110063374042511, |
| "learning_rate": 7.894669663059168e-07, |
| "loss": 0.4616, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.8372350230414747, |
| "grad_norm": 0.3313262462615967, |
| "learning_rate": 7.84268009416847e-07, |
| "loss": 0.4423, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.8377880184331797, |
| "grad_norm": 0.33478546142578125, |
| "learning_rate": 7.790847705355059e-07, |
| "loss": 0.4551, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.8383410138248848, |
| "grad_norm": 0.3241022527217865, |
| "learning_rate": 7.739172689871705e-07, |
| "loss": 0.4749, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.8388940092165899, |
| "grad_norm": 0.30135831236839294, |
| "learning_rate": 7.687655240384423e-07, |
| "loss": 0.4432, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.8394470046082949, |
| "grad_norm": 0.3093847930431366, |
| "learning_rate": 7.636295548971762e-07, |
| "loss": 0.4398, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.31386277079582214, |
| "learning_rate": 7.585093807124077e-07, |
| "loss": 0.4308, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.840552995391705, |
| "grad_norm": 0.32683154940605164, |
| "learning_rate": 7.534050205742827e-07, |
| "loss": 0.4671, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8411059907834101, |
| "grad_norm": 0.32057997584342957, |
| "learning_rate": 7.483164935139847e-07, |
| "loss": 0.4777, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.8416589861751153, |
| "grad_norm": 0.3294847011566162, |
| "learning_rate": 7.432438185036667e-07, |
| "loss": 0.4872, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.8422119815668203, |
| "grad_norm": 0.3206014931201935, |
| "learning_rate": 7.381870144563763e-07, |
| "loss": 0.4719, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.8427649769585254, |
| "grad_norm": 0.291071355342865, |
| "learning_rate": 7.33146100225991e-07, |
| "loss": 0.4496, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.8433179723502304, |
| "grad_norm": 0.31741753220558167, |
| "learning_rate": 7.281210946071393e-07, |
| "loss": 0.4305, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.8438709677419355, |
| "grad_norm": 0.3081185817718506, |
| "learning_rate": 7.231120163351396e-07, |
| "loss": 0.4796, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.8444239631336405, |
| "grad_norm": 0.31715425848960876, |
| "learning_rate": 7.181188840859266e-07, |
| "loss": 0.4827, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.8449769585253456, |
| "grad_norm": 0.3110186755657196, |
| "learning_rate": 7.131417164759791e-07, |
| "loss": 0.4652, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.8455299539170507, |
| "grad_norm": 0.3567166030406952, |
| "learning_rate": 7.081805320622559e-07, |
| "loss": 0.485, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.8460829493087557, |
| "grad_norm": 0.3137590289115906, |
| "learning_rate": 7.032353493421213e-07, |
| "loss": 0.445, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8466359447004608, |
| "grad_norm": 0.3127385675907135, |
| "learning_rate": 6.983061867532798e-07, |
| "loss": 0.4691, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.8471889400921659, |
| "grad_norm": 0.3255513608455658, |
| "learning_rate": 6.933930626737057e-07, |
| "loss": 0.4418, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.847741935483871, |
| "grad_norm": 0.2911495566368103, |
| "learning_rate": 6.88495995421577e-07, |
| "loss": 0.4578, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.8482949308755761, |
| "grad_norm": 0.29997825622558594, |
| "learning_rate": 6.83615003255203e-07, |
| "loss": 0.4543, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.8488479262672811, |
| "grad_norm": 0.2882632315158844, |
| "learning_rate": 6.787501043729577e-07, |
| "loss": 0.4531, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.8494009216589862, |
| "grad_norm": 0.32424575090408325, |
| "learning_rate": 6.739013169132153e-07, |
| "loss": 0.4516, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.8499539170506912, |
| "grad_norm": 0.3189219534397125, |
| "learning_rate": 6.690686589542778e-07, |
| "loss": 0.456, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.8505069124423963, |
| "grad_norm": 0.2835007309913635, |
| "learning_rate": 6.642521485143099e-07, |
| "loss": 0.4874, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.8510599078341013, |
| "grad_norm": 0.30130714178085327, |
| "learning_rate": 6.594518035512726e-07, |
| "loss": 0.4698, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.8516129032258064, |
| "grad_norm": 0.3199770748615265, |
| "learning_rate": 6.546676419628545e-07, |
| "loss": 0.4822, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.8521658986175116, |
| "grad_norm": 0.31458160281181335, |
| "learning_rate": 6.498996815864068e-07, |
| "loss": 0.4737, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.8527188940092166, |
| "grad_norm": 0.33827102184295654, |
| "learning_rate": 6.451479401988736e-07, |
| "loss": 0.4607, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.8532718894009217, |
| "grad_norm": 0.3004834055900574, |
| "learning_rate": 6.404124355167302e-07, |
| "loss": 0.4621, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.8538248847926267, |
| "grad_norm": 0.2976799011230469, |
| "learning_rate": 6.356931851959136e-07, |
| "loss": 0.4571, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.8543778801843318, |
| "grad_norm": 0.31761622428894043, |
| "learning_rate": 6.30990206831758e-07, |
| "loss": 0.4647, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.8549308755760369, |
| "grad_norm": 0.30549031496047974, |
| "learning_rate": 6.263035179589288e-07, |
| "loss": 0.4619, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.8554838709677419, |
| "grad_norm": 0.3004506528377533, |
| "learning_rate": 6.216331360513572e-07, |
| "loss": 0.4786, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.856036866359447, |
| "grad_norm": 0.3054359257221222, |
| "learning_rate": 6.169790785221763e-07, |
| "loss": 0.4652, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.856589861751152, |
| "grad_norm": 0.3147340416908264, |
| "learning_rate": 6.123413627236536e-07, |
| "loss": 0.4573, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.3203125, |
| "learning_rate": 6.077200059471289e-07, |
| "loss": 0.4498, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8576958525345623, |
| "grad_norm": 0.3389976918697357, |
| "learning_rate": 6.031150254229484e-07, |
| "loss": 0.4756, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.8582488479262673, |
| "grad_norm": 0.3188997507095337, |
| "learning_rate": 5.985264383204003e-07, |
| "loss": 0.4579, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.8588018433179724, |
| "grad_norm": 0.2967085540294647, |
| "learning_rate": 5.939542617476529e-07, |
| "loss": 0.4648, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.8593548387096774, |
| "grad_norm": 0.2970874011516571, |
| "learning_rate": 5.893985127516866e-07, |
| "loss": 0.4654, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.8599078341013825, |
| "grad_norm": 0.3088582158088684, |
| "learning_rate": 5.848592083182348e-07, |
| "loss": 0.4444, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.8604608294930876, |
| "grad_norm": 0.3454407751560211, |
| "learning_rate": 5.803363653717187e-07, |
| "loss": 0.4796, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.8610138248847926, |
| "grad_norm": 0.34124165773391724, |
| "learning_rate": 5.758300007751832e-07, |
| "loss": 0.475, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.8615668202764977, |
| "grad_norm": 0.31380608677864075, |
| "learning_rate": 5.713401313302358e-07, |
| "loss": 0.4714, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.8621198156682027, |
| "grad_norm": 0.3094062805175781, |
| "learning_rate": 5.668667737769834e-07, |
| "loss": 0.4348, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.8626728110599078, |
| "grad_norm": 0.3069341778755188, |
| "learning_rate": 5.624099447939696e-07, |
| "loss": 0.4677, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.863225806451613, |
| "grad_norm": 0.34499165415763855, |
| "learning_rate": 5.579696609981117e-07, |
| "loss": 0.4673, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.863778801843318, |
| "grad_norm": 0.32358747720718384, |
| "learning_rate": 5.535459389446401e-07, |
| "loss": 0.4627, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.8643317972350231, |
| "grad_norm": 0.33145248889923096, |
| "learning_rate": 5.491387951270366e-07, |
| "loss": 0.4535, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.8648847926267281, |
| "grad_norm": 0.3262435495853424, |
| "learning_rate": 5.447482459769709e-07, |
| "loss": 0.4613, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.8654377880184332, |
| "grad_norm": 0.31697559356689453, |
| "learning_rate": 5.403743078642443e-07, |
| "loss": 0.4503, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.8659907834101382, |
| "grad_norm": 0.3151688873767853, |
| "learning_rate": 5.360169970967221e-07, |
| "loss": 0.4816, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.8665437788018433, |
| "grad_norm": 0.3067736029624939, |
| "learning_rate": 5.316763299202766e-07, |
| "loss": 0.4631, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.8670967741935484, |
| "grad_norm": 0.2979617714881897, |
| "learning_rate": 5.273523225187255e-07, |
| "loss": 0.4403, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.8676497695852534, |
| "grad_norm": 0.3092879056930542, |
| "learning_rate": 5.23044991013773e-07, |
| "loss": 0.4669, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.8682027649769585, |
| "grad_norm": 0.29821327328681946, |
| "learning_rate": 5.187543514649479e-07, |
| "loss": 0.4678, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.8687557603686636, |
| "grad_norm": 0.3069091737270355, |
| "learning_rate": 5.144804198695447e-07, |
| "loss": 0.4834, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.8693087557603687, |
| "grad_norm": 0.3062274754047394, |
| "learning_rate": 5.102232121625633e-07, |
| "loss": 0.4583, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.8698617511520738, |
| "grad_norm": 0.3618842661380768, |
| "learning_rate": 5.059827442166504e-07, |
| "loss": 0.4348, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.8704147465437788, |
| "grad_norm": 0.3038586676120758, |
| "learning_rate": 5.017590318420395e-07, |
| "loss": 0.4601, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.8709677419354839, |
| "grad_norm": 0.30661341547966003, |
| "learning_rate": 4.975520907864928e-07, |
| "loss": 0.4432, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.8715207373271889, |
| "grad_norm": 0.3259396255016327, |
| "learning_rate": 4.93361936735241e-07, |
| "loss": 0.4602, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.872073732718894, |
| "grad_norm": 0.32900482416152954, |
| "learning_rate": 4.891885853109279e-07, |
| "loss": 0.473, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.872626728110599, |
| "grad_norm": 0.280129075050354, |
| "learning_rate": 4.85032052073548e-07, |
| "loss": 0.4581, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.8731797235023041, |
| "grad_norm": 0.31960612535476685, |
| "learning_rate": 4.808923525203912e-07, |
| "loss": 0.4472, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.8737327188940092, |
| "grad_norm": 0.28361162543296814, |
| "learning_rate": 4.767695020859847e-07, |
| "loss": 0.4538, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8742857142857143, |
| "grad_norm": 0.3138439953327179, |
| "learning_rate": 4.726635161420351e-07, |
| "loss": 0.4746, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.8748387096774194, |
| "grad_norm": 0.30272024869918823, |
| "learning_rate": 4.685744099973716e-07, |
| "loss": 0.4713, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.8753917050691244, |
| "grad_norm": 0.31137874722480774, |
| "learning_rate": 4.6450219889788816e-07, |
| "loss": 0.4569, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.8759447004608295, |
| "grad_norm": 0.34966525435447693, |
| "learning_rate": 4.6044689802648534e-07, |
| "loss": 0.4646, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.8764976958525346, |
| "grad_norm": 0.29577386379241943, |
| "learning_rate": 4.564085225030174e-07, |
| "loss": 0.4642, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.8770506912442396, |
| "grad_norm": 0.2946975529193878, |
| "learning_rate": 4.523870873842329e-07, |
| "loss": 0.4312, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.8776036866359447, |
| "grad_norm": 0.30097469687461853, |
| "learning_rate": 4.4838260766372044e-07, |
| "loss": 0.4666, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.8781566820276497, |
| "grad_norm": 0.2847994565963745, |
| "learning_rate": 4.4439509827185034e-07, |
| "loss": 0.4165, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.8787096774193548, |
| "grad_norm": 0.2944558560848236, |
| "learning_rate": 4.4042457407572235e-07, |
| "loss": 0.4721, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.8792626728110599, |
| "grad_norm": 0.3086279034614563, |
| "learning_rate": 4.3647104987910636e-07, |
| "loss": 0.45, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.879815668202765, |
| "grad_norm": 0.3032161593437195, |
| "learning_rate": 4.3253454042239016e-07, |
| "loss": 0.458, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.8803686635944701, |
| "grad_norm": 0.3120705485343933, |
| "learning_rate": 4.286150603825234e-07, |
| "loss": 0.474, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.8809216589861751, |
| "grad_norm": 0.332922101020813, |
| "learning_rate": 4.2471262437296326e-07, |
| "loss": 0.4702, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.8814746543778802, |
| "grad_norm": 0.2810650169849396, |
| "learning_rate": 4.208272469436192e-07, |
| "loss": 0.4509, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.8820276497695853, |
| "grad_norm": 0.31393900513648987, |
| "learning_rate": 4.1695894258079903e-07, |
| "loss": 0.4734, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.8825806451612903, |
| "grad_norm": 0.2897551357746124, |
| "learning_rate": 4.1310772570715586e-07, |
| "loss": 0.4442, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.8831336405529954, |
| "grad_norm": 0.3010689616203308, |
| "learning_rate": 4.092736106816314e-07, |
| "loss": 0.4477, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.8836866359447004, |
| "grad_norm": 0.29473552107810974, |
| "learning_rate": 4.0545661179940854e-07, |
| "loss": 0.4595, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.8842396313364055, |
| "grad_norm": 0.3020123839378357, |
| "learning_rate": 4.0165674329185055e-07, |
| "loss": 0.461, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.8847926267281107, |
| "grad_norm": 0.3016417920589447, |
| "learning_rate": 3.978740193264524e-07, |
| "loss": 0.4802, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8853456221198157, |
| "grad_norm": 0.3054351806640625, |
| "learning_rate": 3.941084540067874e-07, |
| "loss": 0.461, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.8858986175115208, |
| "grad_norm": 0.31593799591064453, |
| "learning_rate": 3.9036006137245396e-07, |
| "loss": 0.485, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.8864516129032258, |
| "grad_norm": 0.2995027005672455, |
| "learning_rate": 3.866288553990233e-07, |
| "loss": 0.4544, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.8870046082949309, |
| "grad_norm": 0.3163018524646759, |
| "learning_rate": 3.82914849997989e-07, |
| "loss": 0.4675, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.8875576036866359, |
| "grad_norm": 0.3466692268848419, |
| "learning_rate": 3.7921805901671273e-07, |
| "loss": 0.455, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.888110599078341, |
| "grad_norm": 0.30836209654808044, |
| "learning_rate": 3.7553849623837237e-07, |
| "loss": 0.4983, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.8886635944700461, |
| "grad_norm": 0.3018787205219269, |
| "learning_rate": 3.7187617538191446e-07, |
| "loss": 0.4714, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.8892165898617511, |
| "grad_norm": 0.3216530382633209, |
| "learning_rate": 3.682311101019981e-07, |
| "loss": 0.4857, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.8897695852534562, |
| "grad_norm": 0.32293516397476196, |
| "learning_rate": 3.646033139889482e-07, |
| "loss": 0.4508, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.8903225806451613, |
| "grad_norm": 0.29628053307533264, |
| "learning_rate": 3.6099280056870136e-07, |
| "loss": 0.4271, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.8908755760368664, |
| "grad_norm": 0.30662253499031067, |
| "learning_rate": 3.5739958330275936e-07, |
| "loss": 0.4833, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.8914285714285715, |
| "grad_norm": 0.30339956283569336, |
| "learning_rate": 3.538236755881341e-07, |
| "loss": 0.4675, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.8919815668202765, |
| "grad_norm": 0.29642388224601746, |
| "learning_rate": 3.502650907573024e-07, |
| "loss": 0.4529, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.8925345622119816, |
| "grad_norm": 0.3087359666824341, |
| "learning_rate": 3.467238420781527e-07, |
| "loss": 0.4794, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.8930875576036866, |
| "grad_norm": 0.34186214208602905, |
| "learning_rate": 3.431999427539368e-07, |
| "loss": 0.4951, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.8936405529953917, |
| "grad_norm": 0.31149131059646606, |
| "learning_rate": 3.396934059232226e-07, |
| "loss": 0.4476, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.8941935483870967, |
| "grad_norm": 0.3173343241214752, |
| "learning_rate": 3.362042446598418e-07, |
| "loss": 0.4735, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.8947465437788018, |
| "grad_norm": 0.29687947034835815, |
| "learning_rate": 3.3273247197284366e-07, |
| "loss": 0.4588, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.8952995391705069, |
| "grad_norm": 0.29158180952072144, |
| "learning_rate": 3.292781008064455e-07, |
| "loss": 0.4477, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.895852534562212, |
| "grad_norm": 0.31376034021377563, |
| "learning_rate": 3.258411440399839e-07, |
| "loss": 0.4613, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.8964055299539171, |
| "grad_norm": 0.3553878366947174, |
| "learning_rate": 3.2242161448786724e-07, |
| "loss": 0.4638, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.8969585253456221, |
| "grad_norm": 0.31276994943618774, |
| "learning_rate": 3.1901952489952927e-07, |
| "loss": 0.4297, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.8975115207373272, |
| "grad_norm": 0.2925131320953369, |
| "learning_rate": 3.15634887959379e-07, |
| "loss": 0.4642, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.8980645161290323, |
| "grad_norm": 0.3111323416233063, |
| "learning_rate": 3.122677162867549e-07, |
| "loss": 0.4628, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.8986175115207373, |
| "grad_norm": 0.28449130058288574, |
| "learning_rate": 3.089180224358773e-07, |
| "loss": 0.4619, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.8991705069124424, |
| "grad_norm": 0.2895408868789673, |
| "learning_rate": 3.0558581889580383e-07, |
| "loss": 0.4486, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.8997235023041474, |
| "grad_norm": 0.28121551871299744, |
| "learning_rate": 3.0227111809037815e-07, |
| "loss": 0.4523, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.9002764976958525, |
| "grad_norm": 0.3156479001045227, |
| "learning_rate": 2.9897393237818873e-07, |
| "loss": 0.4504, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.9008294930875576, |
| "grad_norm": 0.30593904852867126, |
| "learning_rate": 2.956942740525187e-07, |
| "loss": 0.4832, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.9013824884792627, |
| "grad_norm": 0.2923428416252136, |
| "learning_rate": 2.924321553413029e-07, |
| "loss": 0.4473, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9019354838709678, |
| "grad_norm": 0.29185551404953003, |
| "learning_rate": 2.891875884070816e-07, |
| "loss": 0.4514, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.9024884792626728, |
| "grad_norm": 0.3090588450431824, |
| "learning_rate": 2.859605853469533e-07, |
| "loss": 0.492, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.9030414746543779, |
| "grad_norm": 0.2918623387813568, |
| "learning_rate": 2.8275115819253184e-07, |
| "loss": 0.4378, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.903594470046083, |
| "grad_norm": 0.2900620698928833, |
| "learning_rate": 2.795593189099016e-07, |
| "loss": 0.4702, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.904147465437788, |
| "grad_norm": 0.30443379282951355, |
| "learning_rate": 2.7638507939956926e-07, |
| "loss": 0.4562, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.9047004608294931, |
| "grad_norm": 0.2894691526889801, |
| "learning_rate": 2.7322845149642564e-07, |
| "loss": 0.4405, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.9052534562211981, |
| "grad_norm": 0.30414119362831116, |
| "learning_rate": 2.700894469696963e-07, |
| "loss": 0.4618, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.9058064516129032, |
| "grad_norm": 0.29795682430267334, |
| "learning_rate": 2.669680775228989e-07, |
| "loss": 0.4717, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.9063594470046082, |
| "grad_norm": 0.2925158739089966, |
| "learning_rate": 2.638643547938019e-07, |
| "loss": 0.4788, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.9069124423963134, |
| "grad_norm": 0.29635146260261536, |
| "learning_rate": 2.607782903543782e-07, |
| "loss": 0.4521, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9074654377880185, |
| "grad_norm": 0.29401543736457825, |
| "learning_rate": 2.5770989571076375e-07, |
| "loss": 0.4401, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.9080184331797235, |
| "grad_norm": 0.2778622508049011, |
| "learning_rate": 2.5465918230321464e-07, |
| "loss": 0.4522, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.9085714285714286, |
| "grad_norm": 0.2870117127895355, |
| "learning_rate": 2.5162616150606167e-07, |
| "loss": 0.4686, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.9091244239631336, |
| "grad_norm": 0.3375545144081116, |
| "learning_rate": 2.486108446276725e-07, |
| "loss": 0.4741, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.9096774193548387, |
| "grad_norm": 0.285396546125412, |
| "learning_rate": 2.4561324291040636e-07, |
| "loss": 0.4749, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.9102304147465438, |
| "grad_norm": 0.31609663367271423, |
| "learning_rate": 2.4263336753057364e-07, |
| "loss": 0.4552, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.9107834101382488, |
| "grad_norm": 0.32548245787620544, |
| "learning_rate": 2.396712295983922e-07, |
| "loss": 0.4707, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.9113364055299539, |
| "grad_norm": 0.28127720952033997, |
| "learning_rate": 2.3672684015794923e-07, |
| "loss": 0.4808, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.9118894009216589, |
| "grad_norm": 0.3091859817504883, |
| "learning_rate": 2.338002101871556e-07, |
| "loss": 0.4469, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.9124423963133641, |
| "grad_norm": 0.3138236403465271, |
| "learning_rate": 2.3089135059771007e-07, |
| "loss": 0.4848, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.9129953917050692, |
| "grad_norm": 0.30295220017433167, |
| "learning_rate": 2.2800027223505438e-07, |
| "loss": 0.4567, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.9135483870967742, |
| "grad_norm": 0.3260992467403412, |
| "learning_rate": 2.2512698587833502e-07, |
| "loss": 0.4776, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.9141013824884793, |
| "grad_norm": 0.30410924553871155, |
| "learning_rate": 2.222715022403621e-07, |
| "loss": 0.4565, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.9146543778801843, |
| "grad_norm": 0.296068400144577, |
| "learning_rate": 2.1943383196756984e-07, |
| "loss": 0.4868, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.9152073732718894, |
| "grad_norm": 0.3140951991081238, |
| "learning_rate": 2.1661398563997737e-07, |
| "loss": 0.4645, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.9157603686635944, |
| "grad_norm": 0.2961066663265228, |
| "learning_rate": 2.138119737711475e-07, |
| "loss": 0.4376, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.9163133640552995, |
| "grad_norm": 0.3166109621524811, |
| "learning_rate": 2.110278068081506e-07, |
| "loss": 0.4713, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.9168663594470046, |
| "grad_norm": 0.32516512274742126, |
| "learning_rate": 2.082614951315215e-07, |
| "loss": 0.4516, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.9174193548387096, |
| "grad_norm": 0.293089896440506, |
| "learning_rate": 2.055130490552254e-07, |
| "loss": 0.469, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.9179723502304148, |
| "grad_norm": 0.31795766949653625, |
| "learning_rate": 2.0278247882661584e-07, |
| "loss": 0.4497, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9185253456221198, |
| "grad_norm": 0.2847367525100708, |
| "learning_rate": 2.0006979462639686e-07, |
| "loss": 0.4447, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.9190783410138249, |
| "grad_norm": 0.2693861126899719, |
| "learning_rate": 1.9737500656858754e-07, |
| "loss": 0.4406, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.91963133640553, |
| "grad_norm": 0.3001803755760193, |
| "learning_rate": 1.94698124700482e-07, |
| "loss": 0.4964, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.920184331797235, |
| "grad_norm": 0.3073177933692932, |
| "learning_rate": 1.9203915900261327e-07, |
| "loss": 0.4737, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.9207373271889401, |
| "grad_norm": 0.31089356541633606, |
| "learning_rate": 1.8939811938871456e-07, |
| "loss": 0.4539, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.9212903225806451, |
| "grad_norm": 0.27542582154273987, |
| "learning_rate": 1.86775015705683e-07, |
| "loss": 0.434, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.9218433179723502, |
| "grad_norm": 0.32208341360092163, |
| "learning_rate": 1.841698577335438e-07, |
| "loss": 0.4476, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.9223963133640553, |
| "grad_norm": 0.3119848668575287, |
| "learning_rate": 1.8158265518541274e-07, |
| "loss": 0.4484, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.9229493087557604, |
| "grad_norm": 0.2974831759929657, |
| "learning_rate": 1.790134177074604e-07, |
| "loss": 0.4574, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.9235023041474655, |
| "grad_norm": 0.29929405450820923, |
| "learning_rate": 1.7646215487887587e-07, |
| "loss": 0.4685, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.9240552995391705, |
| "grad_norm": 0.2898726165294647, |
| "learning_rate": 1.739288762118313e-07, |
| "loss": 0.455, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.9246082949308756, |
| "grad_norm": 0.3034640848636627, |
| "learning_rate": 1.714135911514475e-07, |
| "loss": 0.4541, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.9251612903225807, |
| "grad_norm": 0.3259904682636261, |
| "learning_rate": 1.6891630907575562e-07, |
| "loss": 0.4678, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.9257142857142857, |
| "grad_norm": 0.3421642780303955, |
| "learning_rate": 1.664370392956649e-07, |
| "loss": 0.4518, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.9262672811059908, |
| "grad_norm": 0.29688096046447754, |
| "learning_rate": 1.6397579105492778e-07, |
| "loss": 0.4222, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.9268202764976958, |
| "grad_norm": 0.3092136085033417, |
| "learning_rate": 1.6153257353010433e-07, |
| "loss": 0.4679, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.9273732718894009, |
| "grad_norm": 0.3253113627433777, |
| "learning_rate": 1.5910739583053002e-07, |
| "loss": 0.4495, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.9279262672811059, |
| "grad_norm": 0.286625474691391, |
| "learning_rate": 1.5670026699827757e-07, |
| "loss": 0.4652, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.9284792626728111, |
| "grad_norm": 0.2905667722225189, |
| "learning_rate": 1.5431119600812837e-07, |
| "loss": 0.4529, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.9290322580645162, |
| "grad_norm": 0.32414594292640686, |
| "learning_rate": 1.5194019176753615e-07, |
| "loss": 0.4878, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.9295852534562212, |
| "grad_norm": 0.29310277104377747, |
| "learning_rate": 1.495872631165929e-07, |
| "loss": 0.476, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.9301382488479263, |
| "grad_norm": 0.3012104332447052, |
| "learning_rate": 1.4725241882799946e-07, |
| "loss": 0.4823, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.9306912442396313, |
| "grad_norm": 0.2896125018596649, |
| "learning_rate": 1.449356676070285e-07, |
| "loss": 0.4429, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.9312442396313364, |
| "grad_norm": 0.2954270839691162, |
| "learning_rate": 1.42637018091496e-07, |
| "loss": 0.473, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.9317972350230415, |
| "grad_norm": 0.29838648438453674, |
| "learning_rate": 1.4035647885172533e-07, |
| "loss": 0.4615, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.9323502304147465, |
| "grad_norm": 0.3438728451728821, |
| "learning_rate": 1.3809405839051937e-07, |
| "loss": 0.4924, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.9329032258064516, |
| "grad_norm": 0.2971719205379486, |
| "learning_rate": 1.358497651431251e-07, |
| "loss": 0.4438, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.9334562211981566, |
| "grad_norm": 0.3192031979560852, |
| "learning_rate": 1.3362360747720405e-07, |
| "loss": 0.4559, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.9340092165898618, |
| "grad_norm": 0.3306371867656708, |
| "learning_rate": 1.314155936928002e-07, |
| "loss": 0.4633, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.9345622119815669, |
| "grad_norm": 0.28587308526039124, |
| "learning_rate": 1.2922573202231114e-07, |
| "loss": 0.4577, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.9351152073732719, |
| "grad_norm": 0.31524068117141724, |
| "learning_rate": 1.2705403063045464e-07, |
| "loss": 0.4457, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.935668202764977, |
| "grad_norm": 0.2901270389556885, |
| "learning_rate": 1.2490049761423927e-07, |
| "loss": 0.4396, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.936221198156682, |
| "grad_norm": 0.2995949685573578, |
| "learning_rate": 1.2276514100293403e-07, |
| "loss": 0.4633, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.9367741935483871, |
| "grad_norm": 0.2743770182132721, |
| "learning_rate": 1.206479687580403e-07, |
| "loss": 0.4655, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.9373271889400921, |
| "grad_norm": 0.3132480978965759, |
| "learning_rate": 1.1854898877325882e-07, |
| "loss": 0.4641, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.9378801843317972, |
| "grad_norm": 0.2994924783706665, |
| "learning_rate": 1.1646820887446342e-07, |
| "loss": 0.4491, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.9384331797235023, |
| "grad_norm": 0.2822410762310028, |
| "learning_rate": 1.1440563681966943e-07, |
| "loss": 0.4663, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.9389861751152073, |
| "grad_norm": 0.3047800660133362, |
| "learning_rate": 1.1236128029900539e-07, |
| "loss": 0.4638, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.9395391705069125, |
| "grad_norm": 0.31529849767684937, |
| "learning_rate": 1.1033514693468584e-07, |
| "loss": 0.4269, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.9400921658986175, |
| "grad_norm": 0.28443610668182373, |
| "learning_rate": 1.0832724428098185e-07, |
| "loss": 0.4568, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9406451612903226, |
| "grad_norm": 0.2651902735233307, |
| "learning_rate": 1.0633757982419169e-07, |
| "loss": 0.4508, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.9411981566820277, |
| "grad_norm": 0.29510411620140076, |
| "learning_rate": 1.0436616098261409e-07, |
| "loss": 0.4643, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.9417511520737327, |
| "grad_norm": 0.2974132299423218, |
| "learning_rate": 1.0241299510652114e-07, |
| "loss": 0.429, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.9423041474654378, |
| "grad_norm": 0.2929219901561737, |
| "learning_rate": 1.0047808947813097e-07, |
| "loss": 0.4851, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.9428571428571428, |
| "grad_norm": 0.29706960916519165, |
| "learning_rate": 9.85614513115779e-08, |
| "loss": 0.4559, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.9434101382488479, |
| "grad_norm": 0.298990935087204, |
| "learning_rate": 9.666308775289013e-08, |
| "loss": 0.479, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.943963133640553, |
| "grad_norm": 0.29148900508880615, |
| "learning_rate": 9.47830058799576e-08, |
| "loss": 0.4577, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.944516129032258, |
| "grad_norm": 0.3139622211456299, |
| "learning_rate": 9.292121270251031e-08, |
| "loss": 0.4569, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.9450691244239632, |
| "grad_norm": 0.29921236634254456, |
| "learning_rate": 9.107771516209008e-08, |
| "loss": 0.4558, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.9456221198156682, |
| "grad_norm": 0.28523844480514526, |
| "learning_rate": 8.925252013202545e-08, |
| "loss": 0.4709, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.9461751152073733, |
| "grad_norm": 0.3282380998134613, |
| "learning_rate": 8.744563441740461e-08, |
| "loss": 0.4485, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.9467281105990784, |
| "grad_norm": 0.30116501450538635, |
| "learning_rate": 8.565706475505086e-08, |
| "loss": 0.4633, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.9472811059907834, |
| "grad_norm": 0.2834503948688507, |
| "learning_rate": 8.388681781349828e-08, |
| "loss": 0.4569, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.9478341013824885, |
| "grad_norm": 0.31157195568084717, |
| "learning_rate": 8.213490019296666e-08, |
| "loss": 0.4666, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.9483870967741935, |
| "grad_norm": 0.30053144693374634, |
| "learning_rate": 8.04013184253355e-08, |
| "loss": 0.4645, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.9489400921658986, |
| "grad_norm": 0.3080219328403473, |
| "learning_rate": 7.868607897412062e-08, |
| "loss": 0.4409, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.9494930875576036, |
| "grad_norm": 0.2975987493991852, |
| "learning_rate": 7.69891882344509e-08, |
| "loss": 0.4691, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.9500460829493087, |
| "grad_norm": 0.3106543719768524, |
| "learning_rate": 7.53106525330427e-08, |
| "loss": 0.4816, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.9505990783410139, |
| "grad_norm": 0.29965740442276, |
| "learning_rate": 7.36504781281766e-08, |
| "loss": 0.469, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.9511520737327189, |
| "grad_norm": 0.31763914227485657, |
| "learning_rate": 7.20086712096768e-08, |
| "loss": 0.4615, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.951705069124424, |
| "grad_norm": 0.2858491837978363, |
| "learning_rate": 7.038523789888397e-08, |
| "loss": 0.457, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.952258064516129, |
| "grad_norm": 0.2787969410419464, |
| "learning_rate": 6.878018424863408e-08, |
| "loss": 0.4485, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.9528110599078341, |
| "grad_norm": 0.32252055406570435, |
| "learning_rate": 6.7193516243238e-08, |
| "loss": 0.4708, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.9533640552995392, |
| "grad_norm": 0.3171110451221466, |
| "learning_rate": 6.562523979845525e-08, |
| "loss": 0.466, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.9539170506912442, |
| "grad_norm": 0.31782266497612, |
| "learning_rate": 6.407536076147525e-08, |
| "loss": 0.4624, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.9544700460829493, |
| "grad_norm": 0.2952912449836731, |
| "learning_rate": 6.254388491089281e-08, |
| "loss": 0.4537, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.9550230414746543, |
| "grad_norm": 0.28047212958335876, |
| "learning_rate": 6.10308179566893e-08, |
| "loss": 0.473, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.9555760368663595, |
| "grad_norm": 0.30235886573791504, |
| "learning_rate": 5.9536165540209914e-08, |
| "loss": 0.489, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.9561290322580646, |
| "grad_norm": 0.29142141342163086, |
| "learning_rate": 5.80599332341425e-08, |
| "loss": 0.4718, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.9566820276497696, |
| "grad_norm": 0.2832047939300537, |
| "learning_rate": 5.6602126542496525e-08, |
| "loss": 0.4562, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.9572350230414747, |
| "grad_norm": 0.2992711365222931, |
| "learning_rate": 5.516275090058476e-08, |
| "loss": 0.4816, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.9577880184331797, |
| "grad_norm": 0.29784855246543884, |
| "learning_rate": 5.3741811674998254e-08, |
| "loss": 0.4818, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.9583410138248848, |
| "grad_norm": 0.3014211058616638, |
| "learning_rate": 5.233931416359195e-08, |
| "loss": 0.4914, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.9588940092165898, |
| "grad_norm": 0.30964842438697815, |
| "learning_rate": 5.095526359546243e-08, |
| "loss": 0.4643, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.9594470046082949, |
| "grad_norm": 0.3023122251033783, |
| "learning_rate": 4.958966513092689e-08, |
| "loss": 0.4499, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.295585572719574, |
| "learning_rate": 4.8242523861506405e-08, |
| "loss": 0.4248, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.960552995391705, |
| "grad_norm": 0.2833447754383087, |
| "learning_rate": 4.691384480990602e-08, |
| "loss": 0.4491, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.9611059907834102, |
| "grad_norm": 0.309023916721344, |
| "learning_rate": 4.56036329299947e-08, |
| "loss": 0.4404, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.9616589861751152, |
| "grad_norm": 0.3236503303050995, |
| "learning_rate": 4.4311893106789847e-08, |
| "loss": 0.4591, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.9622119815668203, |
| "grad_norm": 0.29482167959213257, |
| "learning_rate": 4.3038630156436166e-08, |
| "loss": 0.4558, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9627649769585254, |
| "grad_norm": 0.2854726016521454, |
| "learning_rate": 4.1783848826189025e-08, |
| "loss": 0.4561, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.9633179723502304, |
| "grad_norm": 0.28575026988983154, |
| "learning_rate": 4.0547553794397235e-08, |
| "loss": 0.4541, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.9638709677419355, |
| "grad_norm": 0.30080512166023254, |
| "learning_rate": 3.932974967048364e-08, |
| "loss": 0.4475, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.9644239631336405, |
| "grad_norm": 0.2857537269592285, |
| "learning_rate": 3.813044099493068e-08, |
| "loss": 0.4521, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.9649769585253456, |
| "grad_norm": 0.28055858612060547, |
| "learning_rate": 3.6949632239261514e-08, |
| "loss": 0.4388, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.9655299539170507, |
| "grad_norm": 0.27776503562927246, |
| "learning_rate": 3.578732780602334e-08, |
| "loss": 0.4483, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.9660829493087557, |
| "grad_norm": 0.322313517332077, |
| "learning_rate": 3.464353202877302e-08, |
| "loss": 0.4589, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.9666359447004609, |
| "grad_norm": 0.315902978181839, |
| "learning_rate": 3.351824917205704e-08, |
| "loss": 0.4696, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.9671889400921659, |
| "grad_norm": 0.3116399645805359, |
| "learning_rate": 3.2411483431400435e-08, |
| "loss": 0.4532, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.967741935483871, |
| "grad_norm": 0.27709704637527466, |
| "learning_rate": 3.1323238933286814e-08, |
| "loss": 0.4808, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.968294930875576, |
| "grad_norm": 0.28587040305137634, |
| "learning_rate": 3.0253519735146674e-08, |
| "loss": 0.4498, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.9688479262672811, |
| "grad_norm": 0.3236626088619232, |
| "learning_rate": 2.920232982533855e-08, |
| "loss": 0.4618, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.9694009216589862, |
| "grad_norm": 0.29707276821136475, |
| "learning_rate": 2.8169673123137896e-08, |
| "loss": 0.4516, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.9699539170506912, |
| "grad_norm": 0.3110201954841614, |
| "learning_rate": 2.7155553478719343e-08, |
| "loss": 0.4534, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.9705069124423963, |
| "grad_norm": 0.308906227350235, |
| "learning_rate": 2.6159974673143352e-08, |
| "loss": 0.447, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.9710599078341013, |
| "grad_norm": 0.30986642837524414, |
| "learning_rate": 2.5182940418344016e-08, |
| "loss": 0.4587, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.9716129032258064, |
| "grad_norm": 0.31161195039749146, |
| "learning_rate": 2.4224454357112404e-08, |
| "loss": 0.4616, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.9721658986175116, |
| "grad_norm": 0.2983948290348053, |
| "learning_rate": 2.328452006308435e-08, |
| "loss": 0.4696, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.9727188940092166, |
| "grad_norm": 0.28888341784477234, |
| "learning_rate": 2.2363141040727123e-08, |
| "loss": 0.4859, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.9732718894009217, |
| "grad_norm": 0.2775828242301941, |
| "learning_rate": 2.1460320725326113e-08, |
| "loss": 0.4354, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9738248847926267, |
| "grad_norm": 0.34634268283843994, |
| "learning_rate": 2.057606248297206e-08, |
| "loss": 0.4431, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.9743778801843318, |
| "grad_norm": 0.31715697050094604, |
| "learning_rate": 1.971036961054884e-08, |
| "loss": 0.4566, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.9749308755760369, |
| "grad_norm": 0.2788535952568054, |
| "learning_rate": 1.886324533572015e-08, |
| "loss": 0.4492, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.9754838709677419, |
| "grad_norm": 0.3046756684780121, |
| "learning_rate": 1.8034692816919497e-08, |
| "loss": 0.4461, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.976036866359447, |
| "grad_norm": 0.2936743497848511, |
| "learning_rate": 1.7224715143335235e-08, |
| "loss": 0.4635, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.976589861751152, |
| "grad_norm": 0.31344956159591675, |
| "learning_rate": 1.643331533490333e-08, |
| "loss": 0.434, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.9771428571428571, |
| "grad_norm": 0.2932872176170349, |
| "learning_rate": 1.5660496342291833e-08, |
| "loss": 0.4707, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.9776958525345623, |
| "grad_norm": 0.29183509945869446, |
| "learning_rate": 1.4906261046892523e-08, |
| "loss": 0.4711, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.9782488479262673, |
| "grad_norm": 0.29039350152015686, |
| "learning_rate": 1.4170612260808736e-08, |
| "loss": 0.4802, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.9788018433179724, |
| "grad_norm": 0.30380529165267944, |
| "learning_rate": 1.3453552726847008e-08, |
| "loss": 0.4868, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.9793548387096774, |
| "grad_norm": 0.29868564009666443, |
| "learning_rate": 1.2755085118503762e-08, |
| "loss": 0.4625, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.9799078341013825, |
| "grad_norm": 0.30664679408073425, |
| "learning_rate": 1.207521203995754e-08, |
| "loss": 0.4369, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.9804608294930875, |
| "grad_norm": 0.28604456782341003, |
| "learning_rate": 1.1413936026059558e-08, |
| "loss": 0.4559, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.9810138248847926, |
| "grad_norm": 0.33186855912208557, |
| "learning_rate": 1.0771259542322055e-08, |
| "loss": 0.4735, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.9815668202764977, |
| "grad_norm": 0.3165326714515686, |
| "learning_rate": 1.0147184984911073e-08, |
| "loss": 0.4411, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.9821198156682027, |
| "grad_norm": 0.30226629972457886, |
| "learning_rate": 9.541714680637582e-09, |
| "loss": 0.4274, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.9826728110599078, |
| "grad_norm": 0.3025604486465454, |
| "learning_rate": 8.95485088694692e-09, |
| "loss": 0.4461, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.983225806451613, |
| "grad_norm": 0.3139297068119049, |
| "learning_rate": 8.386595791912145e-09, |
| "loss": 0.4501, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.983778801843318, |
| "grad_norm": 0.28405094146728516, |
| "learning_rate": 7.836951514225144e-09, |
| "loss": 0.4686, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.9843317972350231, |
| "grad_norm": 0.2899510860443115, |
| "learning_rate": 7.3059201031899786e-09, |
| "loss": 0.4738, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.9848847926267281, |
| "grad_norm": 0.3046450614929199, |
| "learning_rate": 6.7935035387128865e-09, |
| "loss": 0.4531, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.9854377880184332, |
| "grad_norm": 0.2701072692871094, |
| "learning_rate": 6.299703731296181e-09, |
| "loss": 0.4532, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.9859907834101382, |
| "grad_norm": 0.271697461605072, |
| "learning_rate": 5.8245225220321385e-09, |
| "loss": 0.4349, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.9865437788018433, |
| "grad_norm": 0.29448968172073364, |
| "learning_rate": 5.367961682594125e-09, |
| "loss": 0.4664, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.9870967741935484, |
| "grad_norm": 0.3071184754371643, |
| "learning_rate": 4.930022915231591e-09, |
| "loss": 0.4397, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.9876497695852534, |
| "grad_norm": 0.2995312213897705, |
| "learning_rate": 4.510707852762864e-09, |
| "loss": 0.4592, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.9882027649769585, |
| "grad_norm": 0.30661311745643616, |
| "learning_rate": 4.110018058570142e-09, |
| "loss": 0.4835, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.9887557603686636, |
| "grad_norm": 0.2768559157848358, |
| "learning_rate": 3.727955026591179e-09, |
| "loss": 0.4506, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.9893087557603687, |
| "grad_norm": 0.3139650821685791, |
| "learning_rate": 3.3645201813170546e-09, |
| "loss": 0.4483, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.9898617511520738, |
| "grad_norm": 0.318562775850296, |
| "learning_rate": 3.0197148777838524e-09, |
| "loss": 0.4669, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.9904147465437788, |
| "grad_norm": 0.31068408489227295, |
| "learning_rate": 2.693540401569883e-09, |
| "loss": 0.4519, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.9909677419354839, |
| "grad_norm": 0.30719509720802307, |
| "learning_rate": 2.3859979687901326e-09, |
| "loss": 0.4284, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.9915207373271889, |
| "grad_norm": 0.28648641705513, |
| "learning_rate": 2.0970887260907123e-09, |
| "loss": 0.4556, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.992073732718894, |
| "grad_norm": 0.2835429608821869, |
| "learning_rate": 1.8268137506455286e-09, |
| "loss": 0.4764, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.992626728110599, |
| "grad_norm": 0.3018244206905365, |
| "learning_rate": 1.57517405015295e-09, |
| "loss": 0.4505, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.9931797235023041, |
| "grad_norm": 0.30132514238357544, |
| "learning_rate": 1.3421705628302584e-09, |
| "loss": 0.4686, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.9937327188940093, |
| "grad_norm": 0.292498379945755, |
| "learning_rate": 1.1278041574125376e-09, |
| "loss": 0.4295, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.9942857142857143, |
| "grad_norm": 0.2868688702583313, |
| "learning_rate": 9.320756331465675e-10, |
| "loss": 0.4246, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.9948387096774194, |
| "grad_norm": 0.2882429361343384, |
| "learning_rate": 7.549857197897137e-10, |
| "loss": 0.4539, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.9953917050691244, |
| "grad_norm": 0.2899753749370575, |
| "learning_rate": 5.965350776071521e-10, |
| "loss": 0.4748, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.9959447004608295, |
| "grad_norm": 0.293375700712204, |
| "learning_rate": 4.567242973696484e-10, |
| "loss": 0.4613, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.9964976958525346, |
| "grad_norm": 0.2795793116092682, |
| "learning_rate": 3.3555390034967214e-10, |
| "loss": 0.4732, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.9970506912442396, |
| "grad_norm": 0.29951760172843933, |
| "learning_rate": 2.330243383208419e-10, |
| "loss": 0.4612, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.9976036866359447, |
| "grad_norm": 0.2993388772010803, |
| "learning_rate": 1.4913599355625975e-10, |
| "loss": 0.4653, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.9981566820276497, |
| "grad_norm": 0.31436586380004883, |
| "learning_rate": 8.388917882684589e-11, |
| "loss": 0.4712, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.9987096774193548, |
| "grad_norm": 0.2935016453266144, |
| "learning_rate": 3.728413739967351e-11, |
| "loss": 0.4381, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.99926267281106, |
| "grad_norm": 0.29614418745040894, |
| "learning_rate": 9.321043037968657e-12, |
| "loss": 0.4752, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.999815668202765, |
| "grad_norm": 0.294443815946579, |
| "learning_rate": 0.0, |
| "loss": 0.4617, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.999815668202765, |
| "step": 1808, |
| "total_flos": 2730506599792640.0, |
| "train_loss": 0.488344039471276, |
| "train_runtime": 63700.3598, |
| "train_samples_per_second": 2.725, |
| "train_steps_per_second": 0.028 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1808, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2730506599792640.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|