| { |
| "best_metric": 0.04752533510327339, |
| "best_model_checkpoint": "results/checkpoint-35000", |
| "epoch": 10.0, |
| "eval_steps": 500, |
| "global_step": 36070, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02772387025228722, |
| "grad_norm": 1.750556230545044, |
| "learning_rate": 9.999814117181637e-06, |
| "loss": 6.049, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05544774050457444, |
| "grad_norm": 0.824866533279419, |
| "learning_rate": 9.999248953493363e-06, |
| "loss": 3.0817, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08317161075686166, |
| "grad_norm": 0.4907461702823639, |
| "learning_rate": 9.998304532844263e-06, |
| "loss": 2.3969, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11089548100914888, |
| "grad_norm": 0.4534800946712494, |
| "learning_rate": 9.996980926880713e-06, |
| "loss": 2.0935, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1386193512614361, |
| "grad_norm": 0.47491493821144104, |
| "learning_rate": 9.995278236015153e-06, |
| "loss": 1.9245, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1386193512614361, |
| "eval_valid_loss": 1.7945984601974487, |
| "eval_valid_runtime": 6.4498, |
| "eval_valid_samples_per_second": 214.426, |
| "eval_valid_steps_per_second": 6.822, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1386193512614361, |
| "eval_valid_target_loss": 1.875697374343872, |
| "eval_valid_target_runtime": 6.5527, |
| "eval_valid_target_samples_per_second": 218.841, |
| "eval_valid_target_steps_per_second": 6.867, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16634322151372333, |
| "grad_norm": 0.5983259677886963, |
| "learning_rate": 9.99319658941846e-06, |
| "loss": 1.8294, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19406709176601053, |
| "grad_norm": 0.6906803846359253, |
| "learning_rate": 9.990736145010146e-06, |
| "loss": 1.7625, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.22179096201829776, |
| "grad_norm": 1.4024661779403687, |
| "learning_rate": 9.987897089446381e-06, |
| "loss": 1.709, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.24951483227058496, |
| "grad_norm": 1.073205590248108, |
| "learning_rate": 9.984679638105837e-06, |
| "loss": 1.6595, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2772387025228722, |
| "grad_norm": 1.280462384223938, |
| "learning_rate": 9.981084035073337e-06, |
| "loss": 1.6153, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2772387025228722, |
| "eval_valid_loss": 1.5186923742294312, |
| "eval_valid_runtime": 6.4198, |
| "eval_valid_samples_per_second": 215.427, |
| "eval_valid_steps_per_second": 6.854, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2772387025228722, |
| "eval_valid_target_loss": 1.5994268655776978, |
| "eval_valid_target_runtime": 6.5778, |
| "eval_valid_target_samples_per_second": 218.006, |
| "eval_valid_target_steps_per_second": 6.841, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3049625727751594, |
| "grad_norm": 0.9407665133476257, |
| "learning_rate": 9.977110553121353e-06, |
| "loss": 1.567, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.33268644302744665, |
| "grad_norm": 1.5439337491989136, |
| "learning_rate": 9.972759493689301e-06, |
| "loss": 1.5275, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.36041031327973383, |
| "grad_norm": 2.2176036834716797, |
| "learning_rate": 9.968031186860677e-06, |
| "loss": 1.4833, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.38813418353202106, |
| "grad_norm": 1.6237233877182007, |
| "learning_rate": 9.962925991338018e-06, |
| "loss": 1.4457, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.4158580537843083, |
| "grad_norm": 1.3075989484786987, |
| "learning_rate": 9.957444294415685e-06, |
| "loss": 1.407, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4158580537843083, |
| "eval_valid_loss": 1.326136589050293, |
| "eval_valid_runtime": 6.413, |
| "eval_valid_samples_per_second": 215.655, |
| "eval_valid_steps_per_second": 6.861, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4158580537843083, |
| "eval_valid_target_loss": 1.3982958793640137, |
| "eval_valid_target_runtime": 6.5728, |
| "eval_valid_target_samples_per_second": 218.172, |
| "eval_valid_target_steps_per_second": 6.846, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4435819240365955, |
| "grad_norm": 1.379807472229004, |
| "learning_rate": 9.951586511950491e-06, |
| "loss": 1.3768, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.47130579428888275, |
| "grad_norm": 0.737086832523346, |
| "learning_rate": 9.945353088330137e-06, |
| "loss": 1.347, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.4990296645411699, |
| "grad_norm": 0.6332296133041382, |
| "learning_rate": 9.93874449643952e-06, |
| "loss": 1.3188, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5267535347934572, |
| "grad_norm": 0.6948099732398987, |
| "learning_rate": 9.931761237624833e-06, |
| "loss": 1.2903, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5544774050457444, |
| "grad_norm": 0.9397527575492859, |
| "learning_rate": 9.924403841655565e-06, |
| "loss": 1.2671, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5544774050457444, |
| "eval_valid_loss": 1.2014020681381226, |
| "eval_valid_runtime": 6.4367, |
| "eval_valid_samples_per_second": 214.861, |
| "eval_valid_steps_per_second": 6.836, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5544774050457444, |
| "eval_valid_target_loss": 1.2820453643798828, |
| "eval_valid_target_runtime": 6.5614, |
| "eval_valid_target_samples_per_second": 218.55, |
| "eval_valid_target_steps_per_second": 6.858, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5822012752980316, |
| "grad_norm": 0.5302172303199768, |
| "learning_rate": 9.916672866684275e-06, |
| "loss": 1.2439, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.6099251455503188, |
| "grad_norm": 0.5439279675483704, |
| "learning_rate": 9.908568899204281e-06, |
| "loss": 1.2231, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.637649015802606, |
| "grad_norm": 0.7026234865188599, |
| "learning_rate": 9.90009255400514e-06, |
| "loss": 1.2027, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6653728860548933, |
| "grad_norm": 0.642803430557251, |
| "learning_rate": 9.89124447412603e-06, |
| "loss": 1.1864, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6930967563071805, |
| "grad_norm": 1.3601601123809814, |
| "learning_rate": 9.882025330806952e-06, |
| "loss": 1.1654, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.6930967563071805, |
| "eval_valid_loss": 1.1063387393951416, |
| "eval_valid_runtime": 6.4314, |
| "eval_valid_samples_per_second": 215.037, |
| "eval_valid_steps_per_second": 6.841, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.6930967563071805, |
| "eval_valid_target_loss": 1.208246111869812, |
| "eval_valid_target_runtime": 6.5564, |
| "eval_valid_target_samples_per_second": 218.719, |
| "eval_valid_target_steps_per_second": 6.864, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7208206265594677, |
| "grad_norm": 0.7053922414779663, |
| "learning_rate": 9.872435823437816e-06, |
| "loss": 1.1433, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.748544496811755, |
| "grad_norm": 0.6601741909980774, |
| "learning_rate": 9.862476679505384e-06, |
| "loss": 1.1193, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7762683670640421, |
| "grad_norm": 0.7706498503684998, |
| "learning_rate": 9.852148654538072e-06, |
| "loss": 1.0954, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.8039922373163294, |
| "grad_norm": 0.8355486392974854, |
| "learning_rate": 9.841452532048648e-06, |
| "loss": 1.069, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.8317161075686166, |
| "grad_norm": 0.8369494676589966, |
| "learning_rate": 9.830389123474773e-06, |
| "loss": 1.0384, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8317161075686166, |
| "eval_valid_loss": 0.9615023732185364, |
| "eval_valid_runtime": 6.4156, |
| "eval_valid_samples_per_second": 215.57, |
| "eval_valid_steps_per_second": 6.858, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8317161075686166, |
| "eval_valid_target_loss": 1.0947415828704834, |
| "eval_valid_target_runtime": 6.5753, |
| "eval_valid_target_samples_per_second": 218.088, |
| "eval_valid_target_steps_per_second": 6.844, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8594399778209038, |
| "grad_norm": 1.4864110946655273, |
| "learning_rate": 9.818959268117464e-06, |
| "loss": 1.0103, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.887163848073191, |
| "grad_norm": 0.7728907465934753, |
| "learning_rate": 9.807163833077407e-06, |
| "loss": 0.982, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.9148877183254782, |
| "grad_norm": 0.6881595253944397, |
| "learning_rate": 9.795003713189187e-06, |
| "loss": 0.9492, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.9426115885777655, |
| "grad_norm": 1.0222816467285156, |
| "learning_rate": 9.782479830953388e-06, |
| "loss": 0.9142, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.9703354588300527, |
| "grad_norm": 0.6671555042266846, |
| "learning_rate": 9.769593136466633e-06, |
| "loss": 0.8838, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9703354588300527, |
| "eval_valid_loss": 0.8037808537483215, |
| "eval_valid_runtime": 6.4314, |
| "eval_valid_samples_per_second": 215.038, |
| "eval_valid_steps_per_second": 6.841, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9703354588300527, |
| "eval_valid_target_loss": 0.9639121294021606, |
| "eval_valid_target_runtime": 6.6053, |
| "eval_valid_target_samples_per_second": 217.1, |
| "eval_valid_target_steps_per_second": 6.813, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9980593290823399, |
| "grad_norm": 0.7793981432914734, |
| "learning_rate": 9.756344607349483e-06, |
| "loss": 0.8496, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.0257831993346271, |
| "grad_norm": 0.7545821070671082, |
| "learning_rate": 9.74273524867229e-06, |
| "loss": 0.8117, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.0535070695869144, |
| "grad_norm": 0.631118893623352, |
| "learning_rate": 9.728766092878934e-06, |
| "loss": 0.7749, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.0812309398392015, |
| "grad_norm": 0.7934292554855347, |
| "learning_rate": 9.714438199708516e-06, |
| "loss": 0.7321, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.1089548100914888, |
| "grad_norm": 0.6160613298416138, |
| "learning_rate": 9.699752656114947e-06, |
| "loss": 0.6891, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.1089548100914888, |
| "eval_valid_loss": 0.5853330492973328, |
| "eval_valid_runtime": 6.4069, |
| "eval_valid_samples_per_second": 215.861, |
| "eval_valid_steps_per_second": 6.868, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.1089548100914888, |
| "eval_valid_target_loss": 0.7543638944625854, |
| "eval_valid_target_runtime": 6.5591, |
| "eval_valid_target_samples_per_second": 218.627, |
| "eval_valid_target_steps_per_second": 6.861, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.136678680343776, |
| "grad_norm": 0.4765689969062805, |
| "learning_rate": 9.684710576184504e-06, |
| "loss": 0.6383, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.1644025505960631, |
| "grad_norm": 0.7610909938812256, |
| "learning_rate": 9.669313101051295e-06, |
| "loss": 0.5894, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.1921264208483504, |
| "grad_norm": 0.5010733008384705, |
| "learning_rate": 9.653561398810706e-06, |
| "loss": 0.5446, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.2198502911006377, |
| "grad_norm": 0.6305666565895081, |
| "learning_rate": 9.637456664430776e-06, |
| "loss": 0.5097, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.247574161352925, |
| "grad_norm": 0.8064519762992859, |
| "learning_rate": 9.621000119661545e-06, |
| "loss": 0.4678, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.247574161352925, |
| "eval_valid_loss": 0.38276800513267517, |
| "eval_valid_runtime": 6.4349, |
| "eval_valid_samples_per_second": 214.922, |
| "eval_valid_steps_per_second": 6.838, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.247574161352925, |
| "eval_valid_target_loss": 0.4976137578487396, |
| "eval_valid_target_runtime": 6.5738, |
| "eval_valid_target_samples_per_second": 218.139, |
| "eval_valid_target_steps_per_second": 6.845, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.275298031605212, |
| "grad_norm": 0.49154090881347656, |
| "learning_rate": 9.604193012942375e-06, |
| "loss": 0.4326, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.3030219018574993, |
| "grad_norm": 0.5592367053031921, |
| "learning_rate": 9.587036619307226e-06, |
| "loss": 0.4054, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.3307457721097866, |
| "grad_norm": 0.48195400834083557, |
| "learning_rate": 9.569532240287946e-06, |
| "loss": 0.3828, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.3584696423620737, |
| "grad_norm": 0.5364578366279602, |
| "learning_rate": 9.551681203815517e-06, |
| "loss": 0.3595, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.386193512614361, |
| "grad_norm": 0.5409713387489319, |
| "learning_rate": 9.533484864119327e-06, |
| "loss": 0.3405, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.386193512614361, |
| "eval_valid_loss": 0.2857649326324463, |
| "eval_valid_runtime": 6.4118, |
| "eval_valid_samples_per_second": 215.697, |
| "eval_valid_steps_per_second": 6.862, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.386193512614361, |
| "eval_valid_target_loss": 0.33146464824676514, |
| "eval_valid_target_runtime": 6.5717, |
| "eval_valid_target_samples_per_second": 218.209, |
| "eval_valid_target_steps_per_second": 6.848, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.4139173828666483, |
| "grad_norm": 0.7294422388076782, |
| "learning_rate": 9.514944601624427e-06, |
| "loss": 0.328, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.4416412531189353, |
| "grad_norm": 0.4695785343647003, |
| "learning_rate": 9.49606182284681e-06, |
| "loss": 0.3095, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.4693651233712226, |
| "grad_norm": 0.5484552979469299, |
| "learning_rate": 9.476837960286707e-06, |
| "loss": 0.3016, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.49708899362351, |
| "grad_norm": 0.38614729046821594, |
| "learning_rate": 9.457274472319919e-06, |
| "loss": 0.2875, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.524812863875797, |
| "grad_norm": 0.3303731381893158, |
| "learning_rate": 9.437372843087175e-06, |
| "loss": 0.2821, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.524812863875797, |
| "eval_valid_loss": 0.23669035732746124, |
| "eval_valid_runtime": 6.4303, |
| "eval_valid_samples_per_second": 215.074, |
| "eval_valid_steps_per_second": 6.843, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.524812863875797, |
| "eval_valid_target_loss": 0.2617432773113251, |
| "eval_valid_target_runtime": 6.5556, |
| "eval_valid_target_samples_per_second": 218.744, |
| "eval_valid_target_steps_per_second": 6.864, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.5525367341280842, |
| "grad_norm": 0.5144414305686951, |
| "learning_rate": 9.417134582381548e-06, |
| "loss": 0.2696, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.5802606043803715, |
| "grad_norm": 0.5522892475128174, |
| "learning_rate": 9.396561225533902e-06, |
| "loss": 0.2617, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.6079844746326586, |
| "grad_norm": 0.4152807295322418, |
| "learning_rate": 9.37565433329644e-06, |
| "loss": 0.2522, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.635708344884946, |
| "grad_norm": 0.3866608142852783, |
| "learning_rate": 9.35441549172428e-06, |
| "loss": 0.2469, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.6634322151372332, |
| "grad_norm": 0.3131564259529114, |
| "learning_rate": 9.33284631205515e-06, |
| "loss": 0.2425, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.6634322151372332, |
| "eval_valid_loss": 0.20471729338169098, |
| "eval_valid_runtime": 6.4284, |
| "eval_valid_samples_per_second": 215.138, |
| "eval_valid_steps_per_second": 6.845, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.6634322151372332, |
| "eval_valid_target_loss": 0.2232024222612381, |
| "eval_valid_target_runtime": 6.5873, |
| "eval_valid_target_samples_per_second": 217.69, |
| "eval_valid_target_steps_per_second": 6.831, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.6911560853895202, |
| "grad_norm": 0.4385012090206146, |
| "learning_rate": 9.31094843058714e-06, |
| "loss": 0.2346, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.7188799556418077, |
| "grad_norm": 0.3904290497303009, |
| "learning_rate": 9.28872350855458e-06, |
| "loss": 0.2279, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.7466038258940948, |
| "grad_norm": 0.4294661581516266, |
| "learning_rate": 9.266173232002005e-06, |
| "loss": 0.2218, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.774327696146382, |
| "grad_norm": 0.40256062150001526, |
| "learning_rate": 9.243299311656253e-06, |
| "loss": 0.2189, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.8020515663986694, |
| "grad_norm": 0.39798569679260254, |
| "learning_rate": 9.220103482796683e-06, |
| "loss": 0.2154, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.8020515663986694, |
| "eval_valid_loss": 0.18116505444049835, |
| "eval_valid_runtime": 6.4306, |
| "eval_valid_samples_per_second": 215.065, |
| "eval_valid_steps_per_second": 6.842, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.8020515663986694, |
| "eval_valid_target_loss": 0.19611063599586487, |
| "eval_valid_target_runtime": 6.5521, |
| "eval_valid_target_samples_per_second": 218.86, |
| "eval_valid_target_steps_per_second": 6.868, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.8297754366509564, |
| "grad_norm": 0.2555886507034302, |
| "learning_rate": 9.196587505123526e-06, |
| "loss": 0.2082, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.8574993069032437, |
| "grad_norm": 0.278145968914032, |
| "learning_rate": 9.172753162624401e-06, |
| "loss": 0.2025, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.885223177155531, |
| "grad_norm": 0.43592485785484314, |
| "learning_rate": 9.148602263438967e-06, |
| "loss": 0.2006, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.912947047407818, |
| "grad_norm": 0.3828723430633545, |
| "learning_rate": 9.124136639721757e-06, |
| "loss": 0.1963, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.9406709176601054, |
| "grad_norm": 0.3468044102191925, |
| "learning_rate": 9.09935814750318e-06, |
| "loss": 0.1928, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.9406709176601054, |
| "eval_valid_loss": 0.16255635023117065, |
| "eval_valid_runtime": 6.4262, |
| "eval_valid_samples_per_second": 215.213, |
| "eval_valid_steps_per_second": 6.847, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.9406709176601054, |
| "eval_valid_target_loss": 0.17588204145431519, |
| "eval_valid_target_runtime": 6.5759, |
| "eval_valid_target_samples_per_second": 218.07, |
| "eval_valid_target_steps_per_second": 6.843, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.9683947879123926, |
| "grad_norm": 0.28793609142303467, |
| "learning_rate": 9.074268666548728e-06, |
| "loss": 0.1868, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.9961186581646797, |
| "grad_norm": 0.4627343714237213, |
| "learning_rate": 9.04887010021636e-06, |
| "loss": 0.1857, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.023842528416967, |
| "grad_norm": 0.4490989148616791, |
| "learning_rate": 9.023164375312117e-06, |
| "loss": 0.1786, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.0515663986692543, |
| "grad_norm": 0.319859117269516, |
| "learning_rate": 8.997153441943944e-06, |
| "loss": 0.1779, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.0792902689215413, |
| "grad_norm": 0.3379845917224884, |
| "learning_rate": 8.970839273373748e-06, |
| "loss": 0.1717, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.0792902689215413, |
| "eval_valid_loss": 0.1455078125, |
| "eval_valid_runtime": 6.4396, |
| "eval_valid_samples_per_second": 214.766, |
| "eval_valid_steps_per_second": 6.833, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.0792902689215413, |
| "eval_valid_target_loss": 0.15758885443210602, |
| "eval_valid_target_runtime": 6.5627, |
| "eval_valid_target_samples_per_second": 218.508, |
| "eval_valid_target_steps_per_second": 6.857, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.107014139173829, |
| "grad_norm": 0.3079555928707123, |
| "learning_rate": 8.944223865867712e-06, |
| "loss": 0.1688, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.134738009426116, |
| "grad_norm": 0.346603125333786, |
| "learning_rate": 8.917309238544834e-06, |
| "loss": 0.1661, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.162461879678403, |
| "grad_norm": 0.3899448812007904, |
| "learning_rate": 8.890097433223766e-06, |
| "loss": 0.1653, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.1901857499306905, |
| "grad_norm": 0.31352731585502625, |
| "learning_rate": 8.862590514267915e-06, |
| "loss": 0.1609, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.2179096201829775, |
| "grad_norm": 0.29558128118515015, |
| "learning_rate": 8.834790568428827e-06, |
| "loss": 0.158, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.2179096201829775, |
| "eval_valid_loss": 0.1319538652896881, |
| "eval_valid_runtime": 6.417, |
| "eval_valid_samples_per_second": 215.521, |
| "eval_valid_steps_per_second": 6.857, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.2179096201829775, |
| "eval_valid_target_loss": 0.1427442878484726, |
| "eval_valid_target_runtime": 6.5854, |
| "eval_valid_target_samples_per_second": 217.754, |
| "eval_valid_target_steps_per_second": 6.833, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.2456334904352646, |
| "grad_norm": 0.29061177372932434, |
| "learning_rate": 8.80669970468788e-06, |
| "loss": 0.1545, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.273357360687552, |
| "grad_norm": 0.3253875970840454, |
| "learning_rate": 8.778320054096306e-06, |
| "loss": 0.1528, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.301081230939839, |
| "grad_norm": 0.2402360886335373, |
| "learning_rate": 8.749653769613502e-06, |
| "loss": 0.1511, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.3288051011921262, |
| "grad_norm": 0.31634458899497986, |
| "learning_rate": 8.720703025943717e-06, |
| "loss": 0.1461, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.3565289714444138, |
| "grad_norm": 0.21685920655727386, |
| "learning_rate": 8.691470019371065e-06, |
| "loss": 0.143, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.3565289714444138, |
| "eval_valid_loss": 0.12121625989675522, |
| "eval_valid_runtime": 6.4171, |
| "eval_valid_samples_per_second": 215.519, |
| "eval_valid_steps_per_second": 6.857, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.3565289714444138, |
| "eval_valid_target_loss": 0.1312141716480255, |
| "eval_valid_target_runtime": 6.57, |
| "eval_valid_target_samples_per_second": 218.266, |
| "eval_valid_target_steps_per_second": 6.849, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.384252841696701, |
| "grad_norm": 0.24635937809944153, |
| "learning_rate": 8.661956967592907e-06, |
| "loss": 0.1424, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.411976711948988, |
| "grad_norm": 0.21958141028881073, |
| "learning_rate": 8.632166109551623e-06, |
| "loss": 0.1388, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.4397005822012754, |
| "grad_norm": 0.2693657875061035, |
| "learning_rate": 8.60209970526474e-06, |
| "loss": 0.1392, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.4674244524535625, |
| "grad_norm": 0.22512082755565643, |
| "learning_rate": 8.5717600356535e-06, |
| "loss": 0.1356, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.49514832270585, |
| "grad_norm": 0.3446211516857147, |
| "learning_rate": 8.541149402369806e-06, |
| "loss": 0.1324, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.49514832270585, |
| "eval_valid_loss": 0.11042323708534241, |
| "eval_valid_runtime": 6.4273, |
| "eval_valid_samples_per_second": 215.176, |
| "eval_valid_steps_per_second": 6.846, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.49514832270585, |
| "eval_valid_target_loss": 0.11918216943740845, |
| "eval_valid_target_runtime": 6.5885, |
| "eval_valid_target_samples_per_second": 217.651, |
| "eval_valid_target_steps_per_second": 6.83, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.522872192958137, |
| "grad_norm": 0.21913643181324005, |
| "learning_rate": 8.51027012762163e-06, |
| "loss": 0.1303, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.550596063210424, |
| "grad_norm": 0.24243904650211334, |
| "learning_rate": 8.479124553996824e-06, |
| "loss": 0.1268, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.578319933462711, |
| "grad_norm": 0.22184187173843384, |
| "learning_rate": 8.447715044285425e-06, |
| "loss": 0.1251, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.6060438037149987, |
| "grad_norm": 0.22888724505901337, |
| "learning_rate": 8.41604398130039e-06, |
| "loss": 0.1221, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.6337676739672857, |
| "grad_norm": 0.24152572453022003, |
| "learning_rate": 8.384113767696838e-06, |
| "loss": 0.121, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.6337676739672857, |
| "eval_valid_loss": 0.10074004530906677, |
| "eval_valid_runtime": 6.4317, |
| "eval_valid_samples_per_second": 215.03, |
| "eval_valid_steps_per_second": 6.841, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.6337676739672857, |
| "eval_valid_target_loss": 0.10891123861074448, |
| "eval_valid_target_runtime": 6.5593, |
| "eval_valid_target_samples_per_second": 218.622, |
| "eval_valid_target_steps_per_second": 6.861, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.6614915442195732, |
| "grad_norm": 0.2756216526031494, |
| "learning_rate": 8.35192682578978e-06, |
| "loss": 0.1195, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.6892154144718603, |
| "grad_norm": 0.24438254535198212, |
| "learning_rate": 8.319485597370348e-06, |
| "loss": 0.1157, |
| "step": 9700 |
| }, |
| { |
| "epoch": 2.7169392847241474, |
| "grad_norm": 0.35991132259368896, |
| "learning_rate": 8.286792543520556e-06, |
| "loss": 0.115, |
| "step": 9800 |
| }, |
| { |
| "epoch": 2.744663154976435, |
| "grad_norm": 0.22763152420520782, |
| "learning_rate": 8.253850144426606e-06, |
| "loss": 0.1134, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.772387025228722, |
| "grad_norm": 0.24357567727565765, |
| "learning_rate": 8.220660899190712e-06, |
| "loss": 0.1106, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.772387025228722, |
| "eval_valid_loss": 0.092686228454113, |
| "eval_valid_runtime": 6.4287, |
| "eval_valid_samples_per_second": 215.129, |
| "eval_valid_steps_per_second": 6.844, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.772387025228722, |
| "eval_valid_target_loss": 0.1005280539393425, |
| "eval_valid_target_runtime": 6.5902, |
| "eval_valid_target_samples_per_second": 217.596, |
| "eval_valid_target_steps_per_second": 6.828, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.800110895481009, |
| "grad_norm": 0.20446299016475677, |
| "learning_rate": 8.187227325641534e-06, |
| "loss": 0.109, |
| "step": 10100 |
| }, |
| { |
| "epoch": 2.8278347657332965, |
| "grad_norm": 0.24309873580932617, |
| "learning_rate": 8.153551960143157e-06, |
| "loss": 0.1087, |
| "step": 10200 |
| }, |
| { |
| "epoch": 2.8555586359855836, |
| "grad_norm": 0.21243679523468018, |
| "learning_rate": 8.119637357402676e-06, |
| "loss": 0.1063, |
| "step": 10300 |
| }, |
| { |
| "epoch": 2.8832825062378706, |
| "grad_norm": 0.2227753847837448, |
| "learning_rate": 8.085486090276391e-06, |
| "loss": 0.1057, |
| "step": 10400 |
| }, |
| { |
| "epoch": 2.911006376490158, |
| "grad_norm": 0.1933346837759018, |
| "learning_rate": 8.05110074957462e-06, |
| "loss": 0.1037, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.911006376490158, |
| "eval_valid_loss": 0.08755628019571304, |
| "eval_valid_runtime": 6.4374, |
| "eval_valid_samples_per_second": 214.84, |
| "eval_valid_steps_per_second": 6.835, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.911006376490158, |
| "eval_valid_target_loss": 0.09479602426290512, |
| "eval_valid_target_runtime": 6.5624, |
| "eval_valid_target_samples_per_second": 218.517, |
| "eval_valid_target_steps_per_second": 6.857, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.938730246742445, |
| "grad_norm": 0.24507193267345428, |
| "learning_rate": 8.016483943865158e-06, |
| "loss": 0.1026, |
| "step": 10600 |
| }, |
| { |
| "epoch": 2.9664541169947327, |
| "grad_norm": 0.16903254389762878, |
| "learning_rate": 7.98163829927538e-06, |
| "loss": 0.1019, |
| "step": 10700 |
| }, |
| { |
| "epoch": 2.99417798724702, |
| "grad_norm": 0.21406187117099762, |
| "learning_rate": 7.946566459293014e-06, |
| "loss": 0.1016, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.021901857499307, |
| "grad_norm": 0.17749078571796417, |
| "learning_rate": 7.911271084565603e-06, |
| "loss": 0.0988, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.049625727751594, |
| "grad_norm": 0.2052767425775528, |
| "learning_rate": 7.875754852698658e-06, |
| "loss": 0.099, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.049625727751594, |
| "eval_valid_loss": 0.08359777182340622, |
| "eval_valid_runtime": 6.4134, |
| "eval_valid_samples_per_second": 215.643, |
| "eval_valid_steps_per_second": 6.861, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.049625727751594, |
| "eval_valid_target_loss": 0.09044167399406433, |
| "eval_valid_target_runtime": 6.5678, |
| "eval_valid_target_samples_per_second": 218.336, |
| "eval_valid_target_steps_per_second": 6.852, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.0773495980038814, |
| "grad_norm": 0.20621031522750854, |
| "learning_rate": 7.840020458052529e-06, |
| "loss": 0.0961, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.1050734682561685, |
| "grad_norm": 0.18608888983726501, |
| "learning_rate": 7.804070611538001e-06, |
| "loss": 0.0964, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.132797338508456, |
| "grad_norm": 0.14550629258155823, |
| "learning_rate": 7.767908040410642e-06, |
| "loss": 0.0957, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.160521208760743, |
| "grad_norm": 0.21664443612098694, |
| "learning_rate": 7.731535488063895e-06, |
| "loss": 0.0948, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.18824507901303, |
| "grad_norm": 0.17702756822109222, |
| "learning_rate": 7.694955713820974e-06, |
| "loss": 0.0935, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.18824507901303, |
| "eval_valid_loss": 0.07985392957925797, |
| "eval_valid_runtime": 6.4194, |
| "eval_valid_samples_per_second": 215.442, |
| "eval_valid_steps_per_second": 6.854, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.18824507901303, |
| "eval_valid_target_loss": 0.08640262484550476, |
| "eval_valid_target_runtime": 6.5608, |
| "eval_valid_target_samples_per_second": 218.572, |
| "eval_valid_target_steps_per_second": 6.859, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.2159689492653176, |
| "grad_norm": 0.19913919270038605, |
| "learning_rate": 7.658171492725513e-06, |
| "loss": 0.0936, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.2436928195176047, |
| "grad_norm": 0.18789726495742798, |
| "learning_rate": 7.621185615331061e-06, |
| "loss": 0.0924, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.2714166897698918, |
| "grad_norm": 0.18376338481903076, |
| "learning_rate": 7.584000887489373e-06, |
| "loss": 0.0911, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.2991405600221793, |
| "grad_norm": 0.19736219942569733, |
| "learning_rate": 7.546620130137557e-06, |
| "loss": 0.0912, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.3268644302744663, |
| "grad_norm": 0.19527922570705414, |
| "learning_rate": 7.509046179084061e-06, |
| "loss": 0.0912, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.3268644302744663, |
| "eval_valid_loss": 0.07622889429330826, |
| "eval_valid_runtime": 6.4437, |
| "eval_valid_samples_per_second": 214.627, |
| "eval_valid_steps_per_second": 6.828, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.3268644302744663, |
| "eval_valid_target_loss": 0.0823676660656929, |
| "eval_valid_target_runtime": 6.5589, |
| "eval_valid_target_samples_per_second": 218.635, |
| "eval_valid_target_steps_per_second": 6.861, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.3545883005267534, |
| "grad_norm": 0.18916228413581848, |
| "learning_rate": 7.471281884793544e-06, |
| "loss": 0.0896, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.382312170779041, |
| "grad_norm": 0.1649465262889862, |
| "learning_rate": 7.4333301121706445e-06, |
| "loss": 0.0881, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.410036041031328, |
| "grad_norm": 0.18362993001937866, |
| "learning_rate": 7.3951937403426186e-06, |
| "loss": 0.0892, |
| "step": 12300 |
| }, |
| { |
| "epoch": 3.437759911283615, |
| "grad_norm": 0.19268861413002014, |
| "learning_rate": 7.356875662440939e-06, |
| "loss": 0.0879, |
| "step": 12400 |
| }, |
| { |
| "epoch": 3.4654837815359025, |
| "grad_norm": 0.17124581336975098, |
| "learning_rate": 7.318378785381802e-06, |
| "loss": 0.086, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.4654837815359025, |
| "eval_valid_loss": 0.07317828387022018, |
| "eval_valid_runtime": 6.4273, |
| "eval_valid_samples_per_second": 215.177, |
| "eval_valid_steps_per_second": 6.846, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.4654837815359025, |
| "eval_valid_target_loss": 0.07900213450193405, |
| "eval_valid_target_runtime": 6.5852, |
| "eval_valid_target_samples_per_second": 217.76, |
| "eval_valid_target_steps_per_second": 6.833, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.4932076517881896, |
| "grad_norm": 0.23004941642284393, |
| "learning_rate": 7.279706029645615e-06, |
| "loss": 0.0855, |
| "step": 12600 |
| }, |
| { |
| "epoch": 3.5209315220404767, |
| "grad_norm": 0.16131635010242462, |
| "learning_rate": 7.240860329055422e-06, |
| "loss": 0.0848, |
| "step": 12700 |
| }, |
| { |
| "epoch": 3.548655392292764, |
| "grad_norm": 0.19867731630802155, |
| "learning_rate": 7.201844630554353e-06, |
| "loss": 0.0851, |
| "step": 12800 |
| }, |
| { |
| "epoch": 3.5763792625450512, |
| "grad_norm": 0.17405714094638824, |
| "learning_rate": 7.162661893982052e-06, |
| "loss": 0.0839, |
| "step": 12900 |
| }, |
| { |
| "epoch": 3.6041031327973387, |
| "grad_norm": 0.19404906034469604, |
| "learning_rate": 7.123315091850136e-06, |
| "loss": 0.0839, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.6041031327973387, |
| "eval_valid_loss": 0.07132507115602493, |
| "eval_valid_runtime": 6.4118, |
| "eval_valid_samples_per_second": 215.695, |
| "eval_valid_steps_per_second": 6.862, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.6041031327973387, |
| "eval_valid_target_loss": 0.0771123468875885, |
| "eval_valid_target_runtime": 6.5745, |
| "eval_valid_target_samples_per_second": 218.117, |
| "eval_valid_target_steps_per_second": 6.845, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.631827003049626, |
| "grad_norm": 0.15152141451835632, |
| "learning_rate": 7.083807209116689e-06, |
| "loss": 0.0836, |
| "step": 13100 |
| }, |
| { |
| "epoch": 3.659550873301913, |
| "grad_norm": 0.18368007242679596, |
| "learning_rate": 7.044141242959826e-06, |
| "loss": 0.0827, |
| "step": 13200 |
| }, |
| { |
| "epoch": 3.6872747435542, |
| "grad_norm": 0.18081355094909668, |
| "learning_rate": 7.004320202550303e-06, |
| "loss": 0.0823, |
| "step": 13300 |
| }, |
| { |
| "epoch": 3.7149986138064874, |
| "grad_norm": 0.15222586691379547, |
| "learning_rate": 6.9643471088232506e-06, |
| "loss": 0.0801, |
| "step": 13400 |
| }, |
| { |
| "epoch": 3.7427224840587745, |
| "grad_norm": 0.1571241021156311, |
| "learning_rate": 6.9242249942489755e-06, |
| "loss": 0.0807, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.7427224840587745, |
| "eval_valid_loss": 0.06911951303482056, |
| "eval_valid_runtime": 6.4701, |
| "eval_valid_samples_per_second": 213.752, |
| "eval_valid_steps_per_second": 6.8, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.7427224840587745, |
| "eval_valid_target_loss": 0.07482416182756424, |
| "eval_valid_target_runtime": 6.5611, |
| "eval_valid_target_samples_per_second": 218.56, |
| "eval_valid_target_steps_per_second": 6.859, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.770446354311062, |
| "grad_norm": 0.1546078324317932, |
| "learning_rate": 6.883956902602933e-06, |
| "loss": 0.0811, |
| "step": 13600 |
| }, |
| { |
| "epoch": 3.798170224563349, |
| "grad_norm": 0.1428447812795639, |
| "learning_rate": 6.843545888734801e-06, |
| "loss": 0.0795, |
| "step": 13700 |
| }, |
| { |
| "epoch": 3.825894094815636, |
| "grad_norm": 0.1369272619485855, |
| "learning_rate": 6.802995018336736e-06, |
| "loss": 0.0794, |
| "step": 13800 |
| }, |
| { |
| "epoch": 3.8536179650679236, |
| "grad_norm": 0.1972970962524414, |
| "learning_rate": 6.762307367710797e-06, |
| "loss": 0.0785, |
| "step": 13900 |
| }, |
| { |
| "epoch": 3.8813418353202107, |
| "grad_norm": 0.15961000323295593, |
| "learning_rate": 6.721486023535577e-06, |
| "loss": 0.0787, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.8813418353202107, |
| "eval_valid_loss": 0.06712613999843597, |
| "eval_valid_runtime": 6.4106, |
| "eval_valid_samples_per_second": 215.737, |
| "eval_valid_steps_per_second": 6.864, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.8813418353202107, |
| "eval_valid_target_loss": 0.07271508872509003, |
| "eval_valid_target_runtime": 6.5891, |
| "eval_valid_target_samples_per_second": 217.633, |
| "eval_valid_target_steps_per_second": 6.829, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.9090657055724978, |
| "grad_norm": 0.15836742520332336, |
| "learning_rate": 6.680534082632036e-06, |
| "loss": 0.0779, |
| "step": 14100 |
| }, |
| { |
| "epoch": 3.9367895758247853, |
| "grad_norm": 0.1906501203775406, |
| "learning_rate": 6.639454651728561e-06, |
| "loss": 0.0772, |
| "step": 14200 |
| }, |
| { |
| "epoch": 3.9645134460770723, |
| "grad_norm": 0.1872212439775467, |
| "learning_rate": 6.598250847225286e-06, |
| "loss": 0.0772, |
| "step": 14300 |
| }, |
| { |
| "epoch": 3.9922373163293594, |
| "grad_norm": 0.1689438670873642, |
| "learning_rate": 6.556925794957678e-06, |
| "loss": 0.0769, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.0199611865816465, |
| "grad_norm": 0.1830626279115677, |
| "learning_rate": 6.515482629959392e-06, |
| "loss": 0.0764, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.0199611865816465, |
| "eval_valid_loss": 0.0653899684548378, |
| "eval_valid_runtime": 6.4271, |
| "eval_valid_samples_per_second": 215.181, |
| "eval_valid_steps_per_second": 6.846, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.0199611865816465, |
| "eval_valid_target_loss": 0.0708317682147026, |
| "eval_valid_target_runtime": 6.5574, |
| "eval_valid_target_samples_per_second": 218.684, |
| "eval_valid_target_steps_per_second": 6.862, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.047685056833934, |
| "grad_norm": 0.1517285257577896, |
| "learning_rate": 6.473924496224447e-06, |
| "loss": 0.0757, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.0754089270862215, |
| "grad_norm": 0.15981799364089966, |
| "learning_rate": 6.432254546468708e-06, |
| "loss": 0.0751, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.1031327973385086, |
| "grad_norm": 0.14974670112133026, |
| "learning_rate": 6.3904759418907194e-06, |
| "loss": 0.0755, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.130856667590796, |
| "grad_norm": 0.15918827056884766, |
| "learning_rate": 6.348591851931879e-06, |
| "loss": 0.0743, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.158580537843083, |
| "grad_norm": 0.17248332500457764, |
| "learning_rate": 6.306605454036001e-06, |
| "loss": 0.0747, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.158580537843083, |
| "eval_valid_loss": 0.06470626592636108, |
| "eval_valid_runtime": 6.4429, |
| "eval_valid_samples_per_second": 214.654, |
| "eval_valid_steps_per_second": 6.829, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.158580537843083, |
| "eval_valid_target_loss": 0.07004554569721222, |
| "eval_valid_target_runtime": 6.5941, |
| "eval_valid_target_samples_per_second": 217.468, |
| "eval_valid_target_steps_per_second": 6.824, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.18630440809537, |
| "grad_norm": 0.18200209736824036, |
| "learning_rate": 6.2645199334082674e-06, |
| "loss": 0.0735, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.214028278347658, |
| "grad_norm": 0.12851852178573608, |
| "learning_rate": 6.222338482773584e-06, |
| "loss": 0.0736, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.241752148599945, |
| "grad_norm": 0.15132804214954376, |
| "learning_rate": 6.180064302134374e-06, |
| "loss": 0.0738, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.269476018852232, |
| "grad_norm": 0.15047667920589447, |
| "learning_rate": 6.1377005985278205e-06, |
| "loss": 0.073, |
| "step": 15400 |
| }, |
| { |
| "epoch": 4.297199889104519, |
| "grad_norm": 0.19985252618789673, |
| "learning_rate": 6.095250585782562e-06, |
| "loss": 0.0732, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.297199889104519, |
| "eval_valid_loss": 0.062382254749536514, |
| "eval_valid_runtime": 6.4347, |
| "eval_valid_samples_per_second": 214.927, |
| "eval_valid_steps_per_second": 6.838, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.297199889104519, |
| "eval_valid_target_loss": 0.06759324669837952, |
| "eval_valid_target_runtime": 6.5646, |
| "eval_valid_target_samples_per_second": 218.446, |
| "eval_valid_target_steps_per_second": 6.855, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.324923759356806, |
| "grad_norm": 0.16384641826152802, |
| "learning_rate": 6.0527174842748994e-06, |
| "loss": 0.0716, |
| "step": 15600 |
| }, |
| { |
| "epoch": 4.352647629609093, |
| "grad_norm": 0.14244656264781952, |
| "learning_rate": 6.0101045206844676e-06, |
| "loss": 0.0716, |
| "step": 15700 |
| }, |
| { |
| "epoch": 4.380371499861381, |
| "grad_norm": 0.16209416091442108, |
| "learning_rate": 5.9674149277494694e-06, |
| "loss": 0.0714, |
| "step": 15800 |
| }, |
| { |
| "epoch": 4.408095370113668, |
| "grad_norm": 0.17041273415088654, |
| "learning_rate": 5.92465194402142e-06, |
| "loss": 0.0715, |
| "step": 15900 |
| }, |
| { |
| "epoch": 4.435819240365955, |
| "grad_norm": 0.16730940341949463, |
| "learning_rate": 5.881818813619463e-06, |
| "loss": 0.0714, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.435819240365955, |
| "eval_valid_loss": 0.061134014278650284, |
| "eval_valid_runtime": 6.4104, |
| "eval_valid_samples_per_second": 215.742, |
| "eval_valid_steps_per_second": 6.864, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.435819240365955, |
| "eval_valid_target_loss": 0.06638547778129578, |
| "eval_valid_target_runtime": 6.5651, |
| "eval_valid_target_samples_per_second": 218.427, |
| "eval_valid_target_steps_per_second": 6.854, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.463543110618242, |
| "grad_norm": 0.13161396980285645, |
| "learning_rate": 5.8389187859842675e-06, |
| "loss": 0.0703, |
| "step": 16100 |
| }, |
| { |
| "epoch": 4.491266980870529, |
| "grad_norm": 0.13423210382461548, |
| "learning_rate": 5.7959551156315156e-06, |
| "loss": 0.0707, |
| "step": 16200 |
| }, |
| { |
| "epoch": 4.518990851122817, |
| "grad_norm": 0.20051045715808868, |
| "learning_rate": 5.752931061904994e-06, |
| "loss": 0.0699, |
| "step": 16300 |
| }, |
| { |
| "epoch": 4.546714721375104, |
| "grad_norm": 0.15945318341255188, |
| "learning_rate": 5.709849888729351e-06, |
| "loss": 0.0697, |
| "step": 16400 |
| }, |
| { |
| "epoch": 4.574438591627391, |
| "grad_norm": 0.13749030232429504, |
| "learning_rate": 5.666714864362468e-06, |
| "loss": 0.0704, |
| "step": 16500 |
| }, |
| { |
| "epoch": 4.574438591627391, |
| "eval_valid_loss": 0.06001834571361542, |
| "eval_valid_runtime": 6.4467, |
| "eval_valid_samples_per_second": 214.529, |
| "eval_valid_steps_per_second": 6.825, |
| "step": 16500 |
| }, |
| { |
| "epoch": 4.574438591627391, |
| "eval_valid_target_loss": 0.06535307317972183, |
| "eval_valid_target_runtime": 6.5686, |
| "eval_valid_target_samples_per_second": 218.311, |
| "eval_valid_target_steps_per_second": 6.851, |
| "step": 16500 |
| }, |
| { |
| "epoch": 4.602162461879678, |
| "grad_norm": 0.133077010512352, |
| "learning_rate": 5.6235292611475326e-06, |
| "loss": 0.0693, |
| "step": 16600 |
| }, |
| { |
| "epoch": 4.629886332131965, |
| "grad_norm": 0.1508035957813263, |
| "learning_rate": 5.580296355264783e-06, |
| "loss": 0.069, |
| "step": 16700 |
| }, |
| { |
| "epoch": 4.6576102023842525, |
| "grad_norm": 0.14195485413074493, |
| "learning_rate": 5.537019426482966e-06, |
| "loss": 0.0695, |
| "step": 16800 |
| }, |
| { |
| "epoch": 4.6853340726365404, |
| "grad_norm": 0.16586261987686157, |
| "learning_rate": 5.493701757910536e-06, |
| "loss": 0.0684, |
| "step": 16900 |
| }, |
| { |
| "epoch": 4.7130579428888275, |
| "grad_norm": 0.13865657150745392, |
| "learning_rate": 5.4503466357465765e-06, |
| "loss": 0.0682, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.7130579428888275, |
| "eval_valid_loss": 0.0584811232984066, |
| "eval_valid_runtime": 6.422, |
| "eval_valid_samples_per_second": 215.352, |
| "eval_valid_steps_per_second": 6.851, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.7130579428888275, |
| "eval_valid_target_loss": 0.06370435655117035, |
| "eval_valid_target_runtime": 6.5705, |
| "eval_valid_target_samples_per_second": 218.247, |
| "eval_valid_target_steps_per_second": 6.849, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.740781813141115, |
| "grad_norm": 0.1934811919927597, |
| "learning_rate": 5.406957349031504e-06, |
| "loss": 0.0686, |
| "step": 17100 |
| }, |
| { |
| "epoch": 4.768505683393402, |
| "grad_norm": 0.16662567853927612, |
| "learning_rate": 5.363537189397556e-06, |
| "loss": 0.0682, |
| "step": 17200 |
| }, |
| { |
| "epoch": 4.796229553645689, |
| "grad_norm": 0.15507076680660248, |
| "learning_rate": 5.320089450819075e-06, |
| "loss": 0.0673, |
| "step": 17300 |
| }, |
| { |
| "epoch": 4.823953423897976, |
| "grad_norm": 0.12763585150241852, |
| "learning_rate": 5.276617429362616e-06, |
| "loss": 0.0671, |
| "step": 17400 |
| }, |
| { |
| "epoch": 4.851677294150264, |
| "grad_norm": 0.15640078485012054, |
| "learning_rate": 5.233124422936906e-06, |
| "loss": 0.0669, |
| "step": 17500 |
| }, |
| { |
| "epoch": 4.851677294150264, |
| "eval_valid_loss": 0.05754322186112404, |
| "eval_valid_runtime": 6.4388, |
| "eval_valid_samples_per_second": 214.792, |
| "eval_valid_steps_per_second": 6.834, |
| "step": 17500 |
| }, |
| { |
| "epoch": 4.851677294150264, |
| "eval_valid_target_loss": 0.06262939423322678, |
| "eval_valid_target_runtime": 6.5536, |
| "eval_valid_target_samples_per_second": 218.81, |
| "eval_valid_target_steps_per_second": 6.866, |
| "step": 17500 |
| }, |
| { |
| "epoch": 4.879401164402551, |
| "grad_norm": 0.16545389592647552, |
| "learning_rate": 5.189613731042645e-06, |
| "loss": 0.0663, |
| "step": 17600 |
| }, |
| { |
| "epoch": 4.907125034654838, |
| "grad_norm": 0.17085812985897064, |
| "learning_rate": 5.146088654522208e-06, |
| "loss": 0.0657, |
| "step": 17700 |
| }, |
| { |
| "epoch": 4.934848904907125, |
| "grad_norm": 0.14638109505176544, |
| "learning_rate": 5.102552495309222e-06, |
| "loss": 0.0677, |
| "step": 17800 |
| }, |
| { |
| "epoch": 4.962572775159412, |
| "grad_norm": 0.15568013489246368, |
| "learning_rate": 5.059008556178079e-06, |
| "loss": 0.0657, |
| "step": 17900 |
| }, |
| { |
| "epoch": 4.9902966454117, |
| "grad_norm": 0.16898399591445923, |
| "learning_rate": 5.015460140493381e-06, |
| "loss": 0.0661, |
| "step": 18000 |
| }, |
| { |
| "epoch": 4.9902966454117, |
| "eval_valid_loss": 0.05648580938577652, |
| "eval_valid_runtime": 6.4207, |
| "eval_valid_samples_per_second": 215.397, |
| "eval_valid_steps_per_second": 6.853, |
| "step": 18000 |
| }, |
| { |
| "epoch": 4.9902966454117, |
| "eval_valid_target_loss": 0.06151015684008598, |
| "eval_valid_target_runtime": 6.5952, |
| "eval_valid_target_samples_per_second": 217.432, |
| "eval_valid_target_steps_per_second": 6.823, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.018020515663987, |
| "grad_norm": 0.13535688817501068, |
| "learning_rate": 4.971910551959332e-06, |
| "loss": 0.0654, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.045744385916274, |
| "grad_norm": 0.16001687943935394, |
| "learning_rate": 4.928363094369108e-06, |
| "loss": 0.0656, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.073468256168561, |
| "grad_norm": 0.1575719267129898, |
| "learning_rate": 4.88482107135423e-06, |
| "loss": 0.0641, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.101192126420848, |
| "grad_norm": 0.1607745736837387, |
| "learning_rate": 4.841287786133937e-06, |
| "loss": 0.0642, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.128915996673135, |
| "grad_norm": 0.13689269125461578, |
| "learning_rate": 4.797766541264592e-06, |
| "loss": 0.0646, |
| "step": 18500 |
| }, |
| { |
| "epoch": 5.128915996673135, |
| "eval_valid_loss": 0.05563423037528992, |
| "eval_valid_runtime": 6.4248, |
| "eval_valid_samples_per_second": 215.261, |
| "eval_valid_steps_per_second": 6.849, |
| "step": 18500 |
| }, |
| { |
| "epoch": 5.128915996673135, |
| "eval_valid_target_loss": 0.06068035215139389, |
| "eval_valid_target_runtime": 6.561, |
| "eval_valid_target_samples_per_second": 218.566, |
| "eval_valid_target_steps_per_second": 6.859, |
| "step": 18500 |
| }, |
| { |
| "epoch": 5.156639866925423, |
| "grad_norm": 0.13576319813728333, |
| "learning_rate": 4.754260638389145e-06, |
| "loss": 0.0641, |
| "step": 18600 |
| }, |
| { |
| "epoch": 5.18436373717771, |
| "grad_norm": 0.13574448227882385, |
| "learning_rate": 4.710773377986659e-06, |
| "loss": 0.0643, |
| "step": 18700 |
| }, |
| { |
| "epoch": 5.212087607429997, |
| "grad_norm": 0.11536768078804016, |
| "learning_rate": 4.667308059121928e-06, |
| "loss": 0.064, |
| "step": 18800 |
| }, |
| { |
| "epoch": 5.239811477682284, |
| "grad_norm": 0.1470881700515747, |
| "learning_rate": 4.623867979195196e-06, |
| "loss": 0.0637, |
| "step": 18900 |
| }, |
| { |
| "epoch": 5.2675353479345715, |
| "grad_norm": 0.13156047463417053, |
| "learning_rate": 4.580456433692017e-06, |
| "loss": 0.0635, |
| "step": 19000 |
| }, |
| { |
| "epoch": 5.2675353479345715, |
| "eval_valid_loss": 0.05473410338163376, |
| "eval_valid_runtime": 6.4623, |
| "eval_valid_samples_per_second": 214.012, |
| "eval_valid_steps_per_second": 6.809, |
| "step": 19000 |
| }, |
| { |
| "epoch": 5.2675353479345715, |
| "eval_valid_target_loss": 0.05973204970359802, |
| "eval_valid_target_runtime": 6.5636, |
| "eval_valid_target_samples_per_second": 218.477, |
| "eval_valid_target_steps_per_second": 6.856, |
| "step": 19000 |
| }, |
| { |
| "epoch": 5.2952592181868585, |
| "grad_norm": 0.132376030087471, |
| "learning_rate": 4.537076715933242e-06, |
| "loss": 0.0638, |
| "step": 19100 |
| }, |
| { |
| "epoch": 5.3229830884391465, |
| "grad_norm": 0.14191821217536926, |
| "learning_rate": 4.493732116825174e-06, |
| "loss": 0.064, |
| "step": 19200 |
| }, |
| { |
| "epoch": 5.3507069586914335, |
| "grad_norm": 0.1247839480638504, |
| "learning_rate": 4.45042592460993e-06, |
| "loss": 0.0627, |
| "step": 19300 |
| }, |
| { |
| "epoch": 5.378430828943721, |
| "grad_norm": 0.12980355322360992, |
| "learning_rate": 4.4071614246159596e-06, |
| "loss": 0.0632, |
| "step": 19400 |
| }, |
| { |
| "epoch": 5.406154699196008, |
| "grad_norm": 0.1391134262084961, |
| "learning_rate": 4.363941899008833e-06, |
| "loss": 0.0625, |
| "step": 19500 |
| }, |
| { |
| "epoch": 5.406154699196008, |
| "eval_valid_loss": 0.05415208637714386, |
| "eval_valid_runtime": 6.4065, |
| "eval_valid_samples_per_second": 215.873, |
| "eval_valid_steps_per_second": 6.868, |
| "step": 19500 |
| }, |
| { |
| "epoch": 5.406154699196008, |
| "eval_valid_target_loss": 0.05894719064235687, |
| "eval_valid_target_runtime": 6.569, |
| "eval_valid_target_samples_per_second": 218.299, |
| "eval_valid_target_steps_per_second": 6.85, |
| "step": 19500 |
| }, |
| { |
| "epoch": 5.433878569448295, |
| "grad_norm": 0.2045671045780182, |
| "learning_rate": 4.320770626542238e-06, |
| "loss": 0.0629, |
| "step": 19600 |
| }, |
| { |
| "epoch": 5.461602439700582, |
| "grad_norm": 0.1417771577835083, |
| "learning_rate": 4.277650882309238e-06, |
| "loss": 0.0625, |
| "step": 19700 |
| }, |
| { |
| "epoch": 5.48932630995287, |
| "grad_norm": 0.14284995198249817, |
| "learning_rate": 4.234585937493829e-06, |
| "loss": 0.0623, |
| "step": 19800 |
| }, |
| { |
| "epoch": 5.517050180205157, |
| "grad_norm": 0.1546027809381485, |
| "learning_rate": 4.1915790591227615e-06, |
| "loss": 0.0625, |
| "step": 19900 |
| }, |
| { |
| "epoch": 5.544774050457444, |
| "grad_norm": 0.1454819142818451, |
| "learning_rate": 4.148633509817715e-06, |
| "loss": 0.0613, |
| "step": 20000 |
| }, |
| { |
| "epoch": 5.544774050457444, |
| "eval_valid_loss": 0.05364985764026642, |
| "eval_valid_runtime": 6.436, |
| "eval_valid_samples_per_second": 214.885, |
| "eval_valid_steps_per_second": 6.837, |
| "step": 20000 |
| }, |
| { |
| "epoch": 5.544774050457444, |
| "eval_valid_target_loss": 0.05850011110305786, |
| "eval_valid_target_runtime": 6.5534, |
| "eval_valid_target_samples_per_second": 218.819, |
| "eval_valid_target_steps_per_second": 6.867, |
| "step": 20000 |
| }, |
| { |
| "epoch": 5.572497920709731, |
| "grad_norm": 0.12440012395381927, |
| "learning_rate": 4.105752547547764e-06, |
| "loss": 0.0613, |
| "step": 20100 |
| }, |
| { |
| "epoch": 5.600221790962018, |
| "grad_norm": 0.14089658856391907, |
| "learning_rate": 4.062939425382236e-06, |
| "loss": 0.0616, |
| "step": 20200 |
| }, |
| { |
| "epoch": 5.627945661214305, |
| "grad_norm": 0.24770374596118927, |
| "learning_rate": 4.020197391243922e-06, |
| "loss": 0.0621, |
| "step": 20300 |
| }, |
| { |
| "epoch": 5.655669531466593, |
| "grad_norm": 0.11835476011037827, |
| "learning_rate": 3.977529687662671e-06, |
| "loss": 0.0619, |
| "step": 20400 |
| }, |
| { |
| "epoch": 5.68339340171888, |
| "grad_norm": 0.12585273385047913, |
| "learning_rate": 3.93493955152941e-06, |
| "loss": 0.0612, |
| "step": 20500 |
| }, |
| { |
| "epoch": 5.68339340171888, |
| "eval_valid_loss": 0.05319705978035927, |
| "eval_valid_runtime": 6.4196, |
| "eval_valid_samples_per_second": 215.435, |
| "eval_valid_steps_per_second": 6.854, |
| "step": 20500 |
| }, |
| { |
| "epoch": 5.68339340171888, |
| "eval_valid_target_loss": 0.058061882853507996, |
| "eval_valid_target_runtime": 6.5894, |
| "eval_valid_target_samples_per_second": 217.622, |
| "eval_valid_target_steps_per_second": 6.829, |
| "step": 20500 |
| }, |
| { |
| "epoch": 5.711117271971167, |
| "grad_norm": 0.15103484690189362, |
| "learning_rate": 3.892430213850587e-06, |
| "loss": 0.0615, |
| "step": 20600 |
| }, |
| { |
| "epoch": 5.738841142223454, |
| "grad_norm": 0.1266421228647232, |
| "learning_rate": 3.850004899503051e-06, |
| "loss": 0.0613, |
| "step": 20700 |
| }, |
| { |
| "epoch": 5.766565012475741, |
| "grad_norm": 0.1100655049085617, |
| "learning_rate": 3.8076668269894045e-06, |
| "loss": 0.0606, |
| "step": 20800 |
| }, |
| { |
| "epoch": 5.794288882728029, |
| "grad_norm": 0.1395365446805954, |
| "learning_rate": 3.765419208193848e-06, |
| "loss": 0.0614, |
| "step": 20900 |
| }, |
| { |
| "epoch": 5.822012752980316, |
| "grad_norm": 0.12668344378471375, |
| "learning_rate": 3.723265248138506e-06, |
| "loss": 0.0614, |
| "step": 21000 |
| }, |
| { |
| "epoch": 5.822012752980316, |
| "eval_valid_loss": 0.052489351481199265, |
| "eval_valid_runtime": 6.4455, |
| "eval_valid_samples_per_second": 214.567, |
| "eval_valid_steps_per_second": 6.826, |
| "step": 21000 |
| }, |
| { |
| "epoch": 5.822012752980316, |
| "eval_valid_target_loss": 0.057213690131902695, |
| "eval_valid_target_runtime": 6.5546, |
| "eval_valid_target_samples_per_second": 218.777, |
| "eval_valid_target_steps_per_second": 6.865, |
| "step": 21000 |
| }, |
| { |
| "epoch": 5.849736623232603, |
| "grad_norm": 0.12728376686573029, |
| "learning_rate": 3.681208144740291e-06, |
| "loss": 0.0612, |
| "step": 21100 |
| }, |
| { |
| "epoch": 5.87746049348489, |
| "grad_norm": 0.14501620829105377, |
| "learning_rate": 3.6392510885682965e-06, |
| "loss": 0.0601, |
| "step": 21200 |
| }, |
| { |
| "epoch": 5.9051843637371775, |
| "grad_norm": 0.1082565188407898, |
| "learning_rate": 3.5973972626017594e-06, |
| "loss": 0.0608, |
| "step": 21300 |
| }, |
| { |
| "epoch": 5.9329082339894645, |
| "grad_norm": 0.14926603436470032, |
| "learning_rate": 3.5556498419885867e-06, |
| "loss": 0.0603, |
| "step": 21400 |
| }, |
| { |
| "epoch": 5.9606321042417525, |
| "grad_norm": 0.1263745278120041, |
| "learning_rate": 3.514011993804469e-06, |
| "loss": 0.0602, |
| "step": 21500 |
| }, |
| { |
| "epoch": 5.9606321042417525, |
| "eval_valid_loss": 0.05212084576487541, |
| "eval_valid_runtime": 6.439, |
| "eval_valid_samples_per_second": 214.785, |
| "eval_valid_steps_per_second": 6.833, |
| "step": 21500 |
| }, |
| { |
| "epoch": 5.9606321042417525, |
| "eval_valid_target_loss": 0.05688408389687538, |
| "eval_valid_target_runtime": 6.5822, |
| "eval_valid_target_samples_per_second": 217.862, |
| "eval_valid_target_steps_per_second": 6.837, |
| "step": 21500 |
| }, |
| { |
| "epoch": 5.98835597449404, |
| "grad_norm": 0.1368781179189682, |
| "learning_rate": 3.4724868768126384e-06, |
| "loss": 0.0604, |
| "step": 21600 |
| }, |
| { |
| "epoch": 6.016079844746327, |
| "grad_norm": 0.15087148547172546, |
| "learning_rate": 3.4310776412242195e-06, |
| "loss": 0.06, |
| "step": 21700 |
| }, |
| { |
| "epoch": 6.043803714998614, |
| "grad_norm": 0.11400382220745087, |
| "learning_rate": 3.3897874284592467e-06, |
| "loss": 0.0594, |
| "step": 21800 |
| }, |
| { |
| "epoch": 6.071527585250901, |
| "grad_norm": 0.1169167011976242, |
| "learning_rate": 3.348619370908361e-06, |
| "loss": 0.0598, |
| "step": 21900 |
| }, |
| { |
| "epoch": 6.099251455503188, |
| "grad_norm": 0.12172160297632217, |
| "learning_rate": 3.3075765916951576e-06, |
| "loss": 0.0599, |
| "step": 22000 |
| }, |
| { |
| "epoch": 6.099251455503188, |
| "eval_valid_loss": 0.05157113075256348, |
| "eval_valid_runtime": 6.4258, |
| "eval_valid_samples_per_second": 215.224, |
| "eval_valid_steps_per_second": 6.847, |
| "step": 22000 |
| }, |
| { |
| "epoch": 6.099251455503188, |
| "eval_valid_target_loss": 0.056347791105508804, |
| "eval_valid_target_runtime": 6.5915, |
| "eval_valid_target_samples_per_second": 217.554, |
| "eval_valid_target_steps_per_second": 6.827, |
| "step": 22000 |
| }, |
| { |
| "epoch": 6.126975325755476, |
| "grad_norm": 0.1324358880519867, |
| "learning_rate": 3.2666622044392765e-06, |
| "loss": 0.0591, |
| "step": 22100 |
| }, |
| { |
| "epoch": 6.154699196007763, |
| "grad_norm": 0.12708991765975952, |
| "learning_rate": 3.225879313020178e-06, |
| "loss": 0.0591, |
| "step": 22200 |
| }, |
| { |
| "epoch": 6.18242306626005, |
| "grad_norm": 0.11844506114721298, |
| "learning_rate": 3.18523101134169e-06, |
| "loss": 0.0592, |
| "step": 22300 |
| }, |
| { |
| "epoch": 6.210146936512337, |
| "grad_norm": 0.12888644635677338, |
| "learning_rate": 3.1447203830972827e-06, |
| "loss": 0.0597, |
| "step": 22400 |
| }, |
| { |
| "epoch": 6.237870806764624, |
| "grad_norm": 0.1485096514225006, |
| "learning_rate": 3.104350501536134e-06, |
| "loss": 0.0598, |
| "step": 22500 |
| }, |
| { |
| "epoch": 6.237870806764624, |
| "eval_valid_loss": 0.051265206187963486, |
| "eval_valid_runtime": 6.437, |
| "eval_valid_samples_per_second": 214.85, |
| "eval_valid_steps_per_second": 6.835, |
| "step": 22500 |
| }, |
| { |
| "epoch": 6.237870806764624, |
| "eval_valid_target_loss": 0.056084584444761276, |
| "eval_valid_target_runtime": 6.6, |
| "eval_valid_target_samples_per_second": 217.273, |
| "eval_valid_target_steps_per_second": 6.818, |
| "step": 22500 |
| }, |
| { |
| "epoch": 6.265594677016912, |
| "grad_norm": 0.11319620907306671, |
| "learning_rate": 3.064124429229992e-06, |
| "loss": 0.0581, |
| "step": 22600 |
| }, |
| { |
| "epoch": 6.293318547269199, |
| "grad_norm": 0.125896617770195, |
| "learning_rate": 3.0240452178408286e-06, |
| "loss": 0.0594, |
| "step": 22700 |
| }, |
| { |
| "epoch": 6.321042417521486, |
| "grad_norm": 0.13202796876430511, |
| "learning_rate": 2.9841159078893377e-06, |
| "loss": 0.0587, |
| "step": 22800 |
| }, |
| { |
| "epoch": 6.348766287773773, |
| "grad_norm": 0.12477891147136688, |
| "learning_rate": 2.944339528524278e-06, |
| "loss": 0.0582, |
| "step": 22900 |
| }, |
| { |
| "epoch": 6.37649015802606, |
| "grad_norm": 0.13174673914909363, |
| "learning_rate": 2.9047190972926597e-06, |
| "loss": 0.0585, |
| "step": 23000 |
| }, |
| { |
| "epoch": 6.37649015802606, |
| "eval_valid_loss": 0.05099370330572128, |
| "eval_valid_runtime": 6.4377, |
| "eval_valid_samples_per_second": 214.828, |
| "eval_valid_steps_per_second": 6.835, |
| "step": 23000 |
| }, |
| { |
| "epoch": 6.37649015802606, |
| "eval_valid_target_loss": 0.055660318583250046, |
| "eval_valid_target_runtime": 6.5668, |
| "eval_valid_target_samples_per_second": 218.37, |
| "eval_valid_target_steps_per_second": 6.853, |
| "step": 23000 |
| }, |
| { |
| "epoch": 6.404214028278347, |
| "grad_norm": 0.12851925194263458, |
| "learning_rate": 2.8652576199108395e-06, |
| "loss": 0.0586, |
| "step": 23100 |
| }, |
| { |
| "epoch": 6.431937898530635, |
| "grad_norm": 0.10676029324531555, |
| "learning_rate": 2.8259580900364825e-06, |
| "loss": 0.0584, |
| "step": 23200 |
| }, |
| { |
| "epoch": 6.459661768782922, |
| "grad_norm": 0.1461838185787201, |
| "learning_rate": 2.786823489041478e-06, |
| "loss": 0.0583, |
| "step": 23300 |
| }, |
| { |
| "epoch": 6.487385639035209, |
| "grad_norm": 0.12321025878190994, |
| "learning_rate": 2.747856785785743e-06, |
| "loss": 0.0579, |
| "step": 23400 |
| }, |
| { |
| "epoch": 6.515109509287496, |
| "grad_norm": 0.1209678128361702, |
| "learning_rate": 2.7090609363919986e-06, |
| "loss": 0.0581, |
| "step": 23500 |
| }, |
| { |
| "epoch": 6.515109509287496, |
| "eval_valid_loss": 0.050510190427303314, |
| "eval_valid_runtime": 6.447, |
| "eval_valid_samples_per_second": 214.517, |
| "eval_valid_steps_per_second": 6.825, |
| "step": 23500 |
| }, |
| { |
| "epoch": 6.515109509287496, |
| "eval_valid_target_loss": 0.0551883801817894, |
| "eval_valid_target_runtime": 6.5701, |
| "eval_valid_target_samples_per_second": 218.262, |
| "eval_valid_target_steps_per_second": 6.849, |
| "step": 23500 |
| }, |
| { |
| "epoch": 6.5428333795397835, |
| "grad_norm": 0.15566356480121613, |
| "learning_rate": 2.6704388840215277e-06, |
| "loss": 0.0578, |
| "step": 23600 |
| }, |
| { |
| "epoch": 6.570557249792071, |
| "grad_norm": 0.10754121840000153, |
| "learning_rate": 2.6319935586508814e-06, |
| "loss": 0.058, |
| "step": 23700 |
| }, |
| { |
| "epoch": 6.5982811200443585, |
| "grad_norm": 0.12134023010730743, |
| "learning_rate": 2.593727876849601e-06, |
| "loss": 0.0577, |
| "step": 23800 |
| }, |
| { |
| "epoch": 6.626004990296646, |
| "grad_norm": 0.12984460592269897, |
| "learning_rate": 2.555644741558979e-06, |
| "loss": 0.0575, |
| "step": 23900 |
| }, |
| { |
| "epoch": 6.653728860548933, |
| "grad_norm": 0.13557353615760803, |
| "learning_rate": 2.51774704187181e-06, |
| "loss": 0.0571, |
| "step": 24000 |
| }, |
| { |
| "epoch": 6.653728860548933, |
| "eval_valid_loss": 0.0503346286714077, |
| "eval_valid_runtime": 6.419, |
| "eval_valid_samples_per_second": 215.455, |
| "eval_valid_steps_per_second": 6.855, |
| "step": 24000 |
| }, |
| { |
| "epoch": 6.653728860548933, |
| "eval_valid_target_loss": 0.0548863522708416, |
| "eval_valid_target_runtime": 6.5823, |
| "eval_valid_target_samples_per_second": 217.857, |
| "eval_valid_target_steps_per_second": 6.837, |
| "step": 24000 |
| }, |
| { |
| "epoch": 6.68145273080122, |
| "grad_norm": 0.10979162156581879, |
| "learning_rate": 2.4800376528132297e-06, |
| "loss": 0.0576, |
| "step": 24100 |
| }, |
| { |
| "epoch": 6.709176601053507, |
| "grad_norm": 0.16127757728099823, |
| "learning_rate": 2.4425194351226082e-06, |
| "loss": 0.0579, |
| "step": 24200 |
| }, |
| { |
| "epoch": 6.736900471305795, |
| "grad_norm": 0.13306181132793427, |
| "learning_rate": 2.4051952350365194e-06, |
| "loss": 0.0572, |
| "step": 24300 |
| }, |
| { |
| "epoch": 6.764624341558082, |
| "grad_norm": 0.11353787779808044, |
| "learning_rate": 2.368067884072821e-06, |
| "loss": 0.0573, |
| "step": 24400 |
| }, |
| { |
| "epoch": 6.792348211810369, |
| "grad_norm": 0.10115820914506912, |
| "learning_rate": 2.331140198815849e-06, |
| "loss": 0.0574, |
| "step": 24500 |
| }, |
| { |
| "epoch": 6.792348211810369, |
| "eval_valid_loss": 0.049953412264585495, |
| "eval_valid_runtime": 6.4338, |
| "eval_valid_samples_per_second": 214.958, |
| "eval_valid_steps_per_second": 6.839, |
| "step": 24500 |
| }, |
| { |
| "epoch": 6.792348211810369, |
| "eval_valid_target_loss": 0.054579559713602066, |
| "eval_valid_target_runtime": 6.5694, |
| "eval_valid_target_samples_per_second": 218.283, |
| "eval_valid_target_steps_per_second": 6.85, |
| "step": 24500 |
| }, |
| { |
| "epoch": 6.820072082062656, |
| "grad_norm": 0.10899285972118378, |
| "learning_rate": 2.294414980702741e-06, |
| "loss": 0.0573, |
| "step": 24600 |
| }, |
| { |
| "epoch": 6.847795952314943, |
| "grad_norm": 0.1248159185051918, |
| "learning_rate": 2.257895015810913e-06, |
| "loss": 0.0568, |
| "step": 24700 |
| }, |
| { |
| "epoch": 6.87551982256723, |
| "grad_norm": 0.10761197656393051, |
| "learning_rate": 2.221583074646701e-06, |
| "loss": 0.0574, |
| "step": 24800 |
| }, |
| { |
| "epoch": 6.903243692819517, |
| "grad_norm": 0.13541601598262787, |
| "learning_rate": 2.1854819119351784e-06, |
| "loss": 0.0562, |
| "step": 24900 |
| }, |
| { |
| "epoch": 6.930967563071805, |
| "grad_norm": 0.10959000140428543, |
| "learning_rate": 2.1495942664111814e-06, |
| "loss": 0.0576, |
| "step": 25000 |
| }, |
| { |
| "epoch": 6.930967563071805, |
| "eval_valid_loss": 0.049802832305431366, |
| "eval_valid_runtime": 6.4091, |
| "eval_valid_samples_per_second": 215.786, |
| "eval_valid_steps_per_second": 6.865, |
| "step": 25000 |
| }, |
| { |
| "epoch": 6.930967563071805, |
| "eval_valid_target_loss": 0.05434631556272507, |
| "eval_valid_target_runtime": 6.5766, |
| "eval_valid_target_samples_per_second": 218.047, |
| "eval_valid_target_steps_per_second": 6.842, |
| "step": 25000 |
| }, |
| { |
| "epoch": 6.958691433324092, |
| "grad_norm": 0.11864270269870758, |
| "learning_rate": 2.113922860611532e-06, |
| "loss": 0.0571, |
| "step": 25100 |
| }, |
| { |
| "epoch": 6.986415303576379, |
| "grad_norm": 0.10493431985378265, |
| "learning_rate": 2.078470400668506e-06, |
| "loss": 0.0572, |
| "step": 25200 |
| }, |
| { |
| "epoch": 7.014139173828666, |
| "grad_norm": 0.10294145345687866, |
| "learning_rate": 2.0432395761045427e-06, |
| "loss": 0.0562, |
| "step": 25300 |
| }, |
| { |
| "epoch": 7.041863044080953, |
| "grad_norm": 0.11174608767032623, |
| "learning_rate": 2.008233059628193e-06, |
| "loss": 0.0562, |
| "step": 25400 |
| }, |
| { |
| "epoch": 7.069586914333241, |
| "grad_norm": 0.10171514004468918, |
| "learning_rate": 1.9734535069313753e-06, |
| "loss": 0.056, |
| "step": 25500 |
| }, |
| { |
| "epoch": 7.069586914333241, |
| "eval_valid_loss": 0.04948737472295761, |
| "eval_valid_runtime": 6.442, |
| "eval_valid_samples_per_second": 214.685, |
| "eval_valid_steps_per_second": 6.83, |
| "step": 25500 |
| }, |
| { |
| "epoch": 7.069586914333241, |
| "eval_valid_target_loss": 0.05410830304026604, |
| "eval_valid_target_runtime": 6.5896, |
| "eval_valid_target_samples_per_second": 217.617, |
| "eval_valid_target_steps_per_second": 6.829, |
| "step": 25500 |
| }, |
| { |
| "epoch": 7.097310784585528, |
| "grad_norm": 0.10731488466262817, |
| "learning_rate": 1.9389035564879104e-06, |
| "loss": 0.0569, |
| "step": 25600 |
| }, |
| { |
| "epoch": 7.125034654837815, |
| "grad_norm": 0.0954216718673706, |
| "learning_rate": 1.9045858293533399e-06, |
| "loss": 0.0566, |
| "step": 25700 |
| }, |
| { |
| "epoch": 7.1527585250901025, |
| "grad_norm": 0.11443454772233963, |
| "learning_rate": 1.8705029289661054e-06, |
| "loss": 0.057, |
| "step": 25800 |
| }, |
| { |
| "epoch": 7.1804823953423895, |
| "grad_norm": 0.10671606659889221, |
| "learning_rate": 1.8366574409500344e-06, |
| "loss": 0.0561, |
| "step": 25900 |
| }, |
| { |
| "epoch": 7.208206265594677, |
| "grad_norm": 0.1028604656457901, |
| "learning_rate": 1.8030519329181916e-06, |
| "loss": 0.0561, |
| "step": 26000 |
| }, |
| { |
| "epoch": 7.208206265594677, |
| "eval_valid_loss": 0.04931313917040825, |
| "eval_valid_runtime": 6.431, |
| "eval_valid_samples_per_second": 215.053, |
| "eval_valid_steps_per_second": 6.842, |
| "step": 26000 |
| }, |
| { |
| "epoch": 7.208206265594677, |
| "eval_valid_target_loss": 0.053888678550720215, |
| "eval_valid_target_runtime": 6.5712, |
| "eval_valid_target_samples_per_second": 218.225, |
| "eval_valid_target_steps_per_second": 6.848, |
| "step": 26000 |
| }, |
| { |
| "epoch": 7.2359301358469645, |
| "grad_norm": 0.11538730561733246, |
| "learning_rate": 1.7696889542780904e-06, |
| "loss": 0.0564, |
| "step": 26100 |
| }, |
| { |
| "epoch": 7.263654006099252, |
| "grad_norm": 0.10585539788007736, |
| "learning_rate": 1.7365710360382882e-06, |
| "loss": 0.0562, |
| "step": 26200 |
| }, |
| { |
| "epoch": 7.291377876351539, |
| "grad_norm": 0.09750411659479141, |
| "learning_rate": 1.7037006906163773e-06, |
| "loss": 0.0563, |
| "step": 26300 |
| }, |
| { |
| "epoch": 7.319101746603826, |
| "grad_norm": 0.10777630656957626, |
| "learning_rate": 1.6710804116483886e-06, |
| "loss": 0.0556, |
| "step": 26400 |
| }, |
| { |
| "epoch": 7.346825616856113, |
| "grad_norm": 0.13231071829795837, |
| "learning_rate": 1.6387126737996067e-06, |
| "loss": 0.0559, |
| "step": 26500 |
| }, |
| { |
| "epoch": 7.346825616856113, |
| "eval_valid_loss": 0.04909936338663101, |
| "eval_valid_runtime": 6.4292, |
| "eval_valid_samples_per_second": 215.112, |
| "eval_valid_steps_per_second": 6.844, |
| "step": 26500 |
| }, |
| { |
| "epoch": 7.346825616856113, |
| "eval_valid_target_loss": 0.05357712134718895, |
| "eval_valid_target_runtime": 6.5542, |
| "eval_valid_target_samples_per_second": 218.792, |
| "eval_valid_target_steps_per_second": 6.866, |
| "step": 26500 |
| }, |
| { |
| "epoch": 7.374549487108401, |
| "grad_norm": 0.10591776669025421, |
| "learning_rate": 1.6065999325768544e-06, |
| "loss": 0.0559, |
| "step": 26600 |
| }, |
| { |
| "epoch": 7.402273357360688, |
| "grad_norm": 0.11603645980358124, |
| "learning_rate": 1.5747446241421931e-06, |
| "loss": 0.0557, |
| "step": 26700 |
| }, |
| { |
| "epoch": 7.429997227612975, |
| "grad_norm": 0.09715123474597931, |
| "learning_rate": 1.5431491651281123e-06, |
| "loss": 0.0563, |
| "step": 26800 |
| }, |
| { |
| "epoch": 7.457721097865262, |
| "grad_norm": 0.10046205669641495, |
| "learning_rate": 1.511815952454208e-06, |
| "loss": 0.0556, |
| "step": 26900 |
| }, |
| { |
| "epoch": 7.485444968117549, |
| "grad_norm": 0.11805932968854904, |
| "learning_rate": 1.480747363145334e-06, |
| "loss": 0.0556, |
| "step": 27000 |
| }, |
| { |
| "epoch": 7.485444968117549, |
| "eval_valid_loss": 0.04887402430176735, |
| "eval_valid_runtime": 6.4098, |
| "eval_valid_samples_per_second": 215.763, |
| "eval_valid_steps_per_second": 6.864, |
| "step": 27000 |
| }, |
| { |
| "epoch": 7.485444968117549, |
| "eval_valid_target_loss": 0.05348382145166397, |
| "eval_valid_target_runtime": 6.5773, |
| "eval_valid_target_samples_per_second": 218.023, |
| "eval_valid_target_steps_per_second": 6.842, |
| "step": 27000 |
| }, |
| { |
| "epoch": 7.513168838369836, |
| "grad_norm": 0.1107444316148758, |
| "learning_rate": 1.4499457541512746e-06, |
| "loss": 0.0554, |
| "step": 27100 |
| }, |
| { |
| "epoch": 7.540892708622124, |
| "grad_norm": 0.10029349476099014, |
| "learning_rate": 1.4194134621679478e-06, |
| "loss": 0.0559, |
| "step": 27200 |
| }, |
| { |
| "epoch": 7.568616578874411, |
| "grad_norm": 0.09976372122764587, |
| "learning_rate": 1.3891528034601316e-06, |
| "loss": 0.0565, |
| "step": 27300 |
| }, |
| { |
| "epoch": 7.596340449126698, |
| "grad_norm": 0.10560230165719986, |
| "learning_rate": 1.3591660736857453e-06, |
| "loss": 0.0553, |
| "step": 27400 |
| }, |
| { |
| "epoch": 7.624064319378985, |
| "grad_norm": 0.09814602881669998, |
| "learning_rate": 1.329455547721697e-06, |
| "loss": 0.0552, |
| "step": 27500 |
| }, |
| { |
| "epoch": 7.624064319378985, |
| "eval_valid_loss": 0.04867083579301834, |
| "eval_valid_runtime": 6.4389, |
| "eval_valid_samples_per_second": 214.79, |
| "eval_valid_steps_per_second": 6.834, |
| "step": 27500 |
| }, |
| { |
| "epoch": 7.624064319378985, |
| "eval_valid_target_loss": 0.053231850266456604, |
| "eval_valid_target_runtime": 6.5692, |
| "eval_valid_target_samples_per_second": 218.292, |
| "eval_valid_target_steps_per_second": 6.85, |
| "step": 27500 |
| }, |
| { |
| "epoch": 7.651788189631272, |
| "grad_norm": 0.10253589600324631, |
| "learning_rate": 1.300023479491303e-06, |
| "loss": 0.0555, |
| "step": 27600 |
| }, |
| { |
| "epoch": 7.67951205988356, |
| "grad_norm": 0.10933282226324081, |
| "learning_rate": 1.2708721017933007e-06, |
| "loss": 0.0551, |
| "step": 27700 |
| }, |
| { |
| "epoch": 7.707235930135847, |
| "grad_norm": 0.11853484809398651, |
| "learning_rate": 1.2420036261324598e-06, |
| "loss": 0.056, |
| "step": 27800 |
| }, |
| { |
| "epoch": 7.734959800388134, |
| "grad_norm": 0.0992041826248169, |
| "learning_rate": 1.2134202425518139e-06, |
| "loss": 0.0547, |
| "step": 27900 |
| }, |
| { |
| "epoch": 7.762683670640421, |
| "grad_norm": 0.10824355483055115, |
| "learning_rate": 1.185124119466517e-06, |
| "loss": 0.0554, |
| "step": 28000 |
| }, |
| { |
| "epoch": 7.762683670640421, |
| "eval_valid_loss": 0.048471271991729736, |
| "eval_valid_runtime": 6.414, |
| "eval_valid_samples_per_second": 215.623, |
| "eval_valid_steps_per_second": 6.86, |
| "step": 28000 |
| }, |
| { |
| "epoch": 7.762683670640421, |
| "eval_valid_target_loss": 0.05302482470870018, |
| "eval_valid_target_runtime": 6.5682, |
| "eval_valid_target_samples_per_second": 218.326, |
| "eval_valid_target_steps_per_second": 6.851, |
| "step": 28000 |
| }, |
| { |
| "epoch": 7.7904075408927085, |
| "grad_norm": 0.09927680343389511, |
| "learning_rate": 1.1571174034993416e-06, |
| "loss": 0.0555, |
| "step": 28100 |
| }, |
| { |
| "epoch": 7.8181314111449955, |
| "grad_norm": 0.09600567072629929, |
| "learning_rate": 1.129402219317825e-06, |
| "loss": 0.0553, |
| "step": 28200 |
| }, |
| { |
| "epoch": 7.845855281397283, |
| "grad_norm": 0.11057105660438538, |
| "learning_rate": 1.1019806694730989e-06, |
| "loss": 0.0557, |
| "step": 28300 |
| }, |
| { |
| "epoch": 7.873579151649571, |
| "grad_norm": 0.10991726815700531, |
| "learning_rate": 1.074854834240368e-06, |
| "loss": 0.0553, |
| "step": 28400 |
| }, |
| { |
| "epoch": 7.901303021901858, |
| "grad_norm": 0.09168905019760132, |
| "learning_rate": 1.0480267714611048e-06, |
| "loss": 0.0551, |
| "step": 28500 |
| }, |
| { |
| "epoch": 7.901303021901858, |
| "eval_valid_loss": 0.04835043475031853, |
| "eval_valid_runtime": 6.4532, |
| "eval_valid_samples_per_second": 214.313, |
| "eval_valid_steps_per_second": 6.818, |
| "step": 28500 |
| }, |
| { |
| "epoch": 7.901303021901858, |
| "eval_valid_target_loss": 0.05293356999754906, |
| "eval_valid_target_runtime": 6.5812, |
| "eval_valid_target_samples_per_second": 217.894, |
| "eval_valid_target_steps_per_second": 6.838, |
| "step": 28500 |
| }, |
| { |
| "epoch": 7.929026892154145, |
| "grad_norm": 0.09465237706899643, |
| "learning_rate": 1.0214985163869378e-06, |
| "loss": 0.0556, |
| "step": 28600 |
| }, |
| { |
| "epoch": 7.956750762406432, |
| "grad_norm": 0.10842736065387726, |
| "learning_rate": 9.952720815252397e-07, |
| "loss": 0.0543, |
| "step": 28700 |
| }, |
| { |
| "epoch": 7.984474632658719, |
| "grad_norm": 0.09609558433294296, |
| "learning_rate": 9.693494564864648e-07, |
| "loss": 0.0554, |
| "step": 28800 |
| }, |
| { |
| "epoch": 8.012198502911007, |
| "grad_norm": 0.10819283127784729, |
| "learning_rate": 9.437326078332099e-07, |
| "loss": 0.0545, |
| "step": 28900 |
| }, |
| { |
| "epoch": 8.039922373163293, |
| "grad_norm": 0.09054001420736313, |
| "learning_rate": 9.18423478931016e-07, |
| "loss": 0.0554, |
| "step": 29000 |
| }, |
| { |
| "epoch": 8.039922373163293, |
| "eval_valid_loss": 0.04819526523351669, |
| "eval_valid_runtime": 6.4165, |
| "eval_valid_samples_per_second": 215.536, |
| "eval_valid_steps_per_second": 6.857, |
| "step": 29000 |
| }, |
| { |
| "epoch": 8.039922373163293, |
| "eval_valid_target_loss": 0.05275378376245499, |
| "eval_valid_target_runtime": 6.5635, |
| "eval_valid_target_samples_per_second": 218.482, |
| "eval_valid_target_steps_per_second": 6.856, |
| "step": 29000 |
| }, |
| { |
| "epoch": 8.067646243415581, |
| "grad_norm": 0.10373499244451523, |
| "learning_rate": 8.934239898009517e-07, |
| "loss": 0.0552, |
| "step": 29100 |
| }, |
| { |
| "epoch": 8.095370113667869, |
| "grad_norm": 0.09614498168230057, |
| "learning_rate": 8.687360369739473e-07, |
| "loss": 0.0545, |
| "step": 29200 |
| }, |
| { |
| "epoch": 8.123093983920155, |
| "grad_norm": 0.1014479324221611, |
| "learning_rate": 8.443614933469208e-07, |
| "loss": 0.0549, |
| "step": 29300 |
| }, |
| { |
| "epoch": 8.150817854172443, |
| "grad_norm": 0.08971751481294632, |
| "learning_rate": 8.203022080406952e-07, |
| "loss": 0.0546, |
| "step": 29400 |
| }, |
| { |
| "epoch": 8.17854172442473, |
| "grad_norm": 0.09659924358129501, |
| "learning_rate": 7.965600062597184e-07, |
| "loss": 0.0542, |
| "step": 29500 |
| }, |
| { |
| "epoch": 8.17854172442473, |
| "eval_valid_loss": 0.04812739044427872, |
| "eval_valid_runtime": 6.4674, |
| "eval_valid_samples_per_second": 213.843, |
| "eval_valid_steps_per_second": 6.803, |
| "step": 29500 |
| }, |
| { |
| "epoch": 8.17854172442473, |
| "eval_valid_target_loss": 0.05264822766184807, |
| "eval_valid_target_runtime": 6.5912, |
| "eval_valid_target_samples_per_second": 217.563, |
| "eval_valid_target_steps_per_second": 6.827, |
| "step": 29500 |
| }, |
| { |
| "epoch": 8.206265594677017, |
| "grad_norm": 0.1034499853849411, |
| "learning_rate": 7.731366891535969e-07, |
| "loss": 0.0548, |
| "step": 29600 |
| }, |
| { |
| "epoch": 8.233989464929303, |
| "grad_norm": 0.0934043675661087, |
| "learning_rate": 7.500340336804607e-07, |
| "loss": 0.0542, |
| "step": 29700 |
| }, |
| { |
| "epoch": 8.261713335181591, |
| "grad_norm": 0.09693789482116699, |
| "learning_rate": 7.272537924721467e-07, |
| "loss": 0.0553, |
| "step": 29800 |
| }, |
| { |
| "epoch": 8.28943720543388, |
| "grad_norm": 0.09552415460348129, |
| "learning_rate": 7.047976937012568e-07, |
| "loss": 0.0543, |
| "step": 29900 |
| }, |
| { |
| "epoch": 8.317161075686165, |
| "grad_norm": 0.0978178158402443, |
| "learning_rate": 6.826674409500389e-07, |
| "loss": 0.0548, |
| "step": 30000 |
| }, |
| { |
| "epoch": 8.317161075686165, |
| "eval_valid_loss": 0.04797354340553284, |
| "eval_valid_runtime": 6.442, |
| "eval_valid_samples_per_second": 214.683, |
| "eval_valid_steps_per_second": 6.83, |
| "step": 30000 |
| }, |
| { |
| "epoch": 8.317161075686165, |
| "eval_valid_target_loss": 0.052511684596538544, |
| "eval_valid_target_runtime": 6.5615, |
| "eval_valid_target_samples_per_second": 218.549, |
| "eval_valid_target_steps_per_second": 6.858, |
| "step": 30000 |
| }, |
| { |
| "epoch": 8.344884945938453, |
| "grad_norm": 0.09591928869485855, |
| "learning_rate": 6.608647130811502e-07, |
| "loss": 0.0543, |
| "step": 30100 |
| }, |
| { |
| "epoch": 8.37260881619074, |
| "grad_norm": 0.09678730368614197, |
| "learning_rate": 6.393911641103051e-07, |
| "loss": 0.0542, |
| "step": 30200 |
| }, |
| { |
| "epoch": 8.400332686443027, |
| "grad_norm": 0.10894029587507248, |
| "learning_rate": 6.182484230807845e-07, |
| "loss": 0.0542, |
| "step": 30300 |
| }, |
| { |
| "epoch": 8.428056556695315, |
| "grad_norm": 0.10065341740846634, |
| "learning_rate": 5.974380939398555e-07, |
| "loss": 0.0549, |
| "step": 30400 |
| }, |
| { |
| "epoch": 8.455780426947602, |
| "grad_norm": 0.11015477776527405, |
| "learning_rate": 5.769617554170959e-07, |
| "loss": 0.0544, |
| "step": 30500 |
| }, |
| { |
| "epoch": 8.455780426947602, |
| "eval_valid_loss": 0.04785359278321266, |
| "eval_valid_runtime": 6.4159, |
| "eval_valid_samples_per_second": 215.558, |
| "eval_valid_steps_per_second": 6.858, |
| "step": 30500 |
| }, |
| { |
| "epoch": 8.455780426947602, |
| "eval_valid_target_loss": 0.05238433927297592, |
| "eval_valid_target_runtime": 6.575, |
| "eval_valid_target_samples_per_second": 218.1, |
| "eval_valid_target_steps_per_second": 6.844, |
| "step": 30500 |
| }, |
| { |
| "epoch": 8.48350429719989, |
| "grad_norm": 0.10229642689228058, |
| "learning_rate": 5.568209609046238e-07, |
| "loss": 0.0542, |
| "step": 30600 |
| }, |
| { |
| "epoch": 8.511228167452176, |
| "grad_norm": 0.1019807681441307, |
| "learning_rate": 5.370172383392514e-07, |
| "loss": 0.0548, |
| "step": 30700 |
| }, |
| { |
| "epoch": 8.538952037704464, |
| "grad_norm": 0.1037830114364624, |
| "learning_rate": 5.175520900865754e-07, |
| "loss": 0.0538, |
| "step": 30800 |
| }, |
| { |
| "epoch": 8.56667590795675, |
| "grad_norm": 0.0952112227678299, |
| "learning_rate": 4.984269928270002e-07, |
| "loss": 0.0537, |
| "step": 30900 |
| }, |
| { |
| "epoch": 8.594399778209038, |
| "grad_norm": 0.09642232209444046, |
| "learning_rate": 4.796433974437148e-07, |
| "loss": 0.0533, |
| "step": 31000 |
| }, |
| { |
| "epoch": 8.594399778209038, |
| "eval_valid_loss": 0.04777803644537926, |
| "eval_valid_runtime": 6.4399, |
| "eval_valid_samples_per_second": 214.756, |
| "eval_valid_steps_per_second": 6.832, |
| "step": 31000 |
| }, |
| { |
| "epoch": 8.594399778209038, |
| "eval_valid_target_loss": 0.052354373037815094, |
| "eval_valid_target_runtime": 6.5668, |
| "eval_valid_target_samples_per_second": 218.371, |
| "eval_valid_target_steps_per_second": 6.853, |
| "step": 31000 |
| }, |
| { |
| "epoch": 8.622123648461326, |
| "grad_norm": 0.10211507230997086, |
| "learning_rate": 4.6120272891262365e-07, |
| "loss": 0.0544, |
| "step": 31100 |
| }, |
| { |
| "epoch": 8.649847518713612, |
| "grad_norm": 0.0912129357457161, |
| "learning_rate": 4.4310638619424363e-07, |
| "loss": 0.0536, |
| "step": 31200 |
| }, |
| { |
| "epoch": 8.6775713889659, |
| "grad_norm": 0.10558176785707474, |
| "learning_rate": 4.2535574212757667e-07, |
| "loss": 0.0542, |
| "step": 31300 |
| }, |
| { |
| "epoch": 8.705295259218186, |
| "grad_norm": 0.10381397604942322, |
| "learning_rate": 4.0795214332596145e-07, |
| "loss": 0.0547, |
| "step": 31400 |
| }, |
| { |
| "epoch": 8.733019129470474, |
| "grad_norm": 0.09383094310760498, |
| "learning_rate": 3.908969100749121e-07, |
| "loss": 0.055, |
| "step": 31500 |
| }, |
| { |
| "epoch": 8.733019129470474, |
| "eval_valid_loss": 0.047727905213832855, |
| "eval_valid_runtime": 6.4171, |
| "eval_valid_samples_per_second": 215.518, |
| "eval_valid_steps_per_second": 6.857, |
| "step": 31500 |
| }, |
| { |
| "epoch": 8.733019129470474, |
| "eval_valid_target_loss": 0.05224745720624924, |
| "eval_valid_target_runtime": 6.5727, |
| "eval_valid_target_samples_per_second": 218.174, |
| "eval_valid_target_steps_per_second": 6.846, |
| "step": 31500 |
| }, |
| { |
| "epoch": 8.760742999722762, |
| "grad_norm": 0.10438426584005356, |
| "learning_rate": 3.7419133623196825e-07, |
| "loss": 0.0541, |
| "step": 31600 |
| }, |
| { |
| "epoch": 8.788466869975048, |
| "grad_norm": 0.09324101358652115, |
| "learning_rate": 3.5783668912852453e-07, |
| "loss": 0.0537, |
| "step": 31700 |
| }, |
| { |
| "epoch": 8.816190740227336, |
| "grad_norm": 0.09235464036464691, |
| "learning_rate": 3.4183420947369873e-07, |
| "loss": 0.0544, |
| "step": 31800 |
| }, |
| { |
| "epoch": 8.843914610479622, |
| "grad_norm": 0.09870747476816177, |
| "learning_rate": 3.261851112602055e-07, |
| "loss": 0.0543, |
| "step": 31900 |
| }, |
| { |
| "epoch": 8.87163848073191, |
| "grad_norm": 0.10918495059013367, |
| "learning_rate": 3.108905816722546e-07, |
| "loss": 0.054, |
| "step": 32000 |
| }, |
| { |
| "epoch": 8.87163848073191, |
| "eval_valid_loss": 0.047707512974739075, |
| "eval_valid_runtime": 6.4362, |
| "eval_valid_samples_per_second": 214.879, |
| "eval_valid_steps_per_second": 6.836, |
| "step": 32000 |
| }, |
| { |
| "epoch": 8.87163848073191, |
| "eval_valid_target_loss": 0.05221306532621384, |
| "eval_valid_target_runtime": 6.5779, |
| "eval_valid_target_samples_per_second": 218.002, |
| "eval_valid_target_steps_per_second": 6.841, |
| "step": 32000 |
| }, |
| { |
| "epoch": 8.899362350984198, |
| "grad_norm": 0.09537260234355927, |
| "learning_rate": 2.9595178099549315e-07, |
| "loss": 0.054, |
| "step": 32100 |
| }, |
| { |
| "epoch": 8.927086221236484, |
| "grad_norm": 0.09188380092382431, |
| "learning_rate": 2.8136984252898515e-07, |
| "loss": 0.0542, |
| "step": 32200 |
| }, |
| { |
| "epoch": 8.954810091488772, |
| "grad_norm": 0.09919969737529755, |
| "learning_rate": 2.671458724992254e-07, |
| "loss": 0.0542, |
| "step": 32300 |
| }, |
| { |
| "epoch": 8.982533961741058, |
| "grad_norm": 0.09692647308111191, |
| "learning_rate": 2.532809499762312e-07, |
| "loss": 0.0544, |
| "step": 32400 |
| }, |
| { |
| "epoch": 9.010257831993346, |
| "grad_norm": 0.09277132153511047, |
| "learning_rate": 2.397761267916726e-07, |
| "loss": 0.0539, |
| "step": 32500 |
| }, |
| { |
| "epoch": 9.010257831993346, |
| "eval_valid_loss": 0.047637518495321274, |
| "eval_valid_runtime": 6.4471, |
| "eval_valid_samples_per_second": 214.516, |
| "eval_valid_steps_per_second": 6.825, |
| "step": 32500 |
| }, |
| { |
| "epoch": 9.010257831993346, |
| "eval_valid_target_loss": 0.052208978682756424, |
| "eval_valid_target_runtime": 6.5636, |
| "eval_valid_target_samples_per_second": 218.477, |
| "eval_valid_target_steps_per_second": 6.856, |
| "step": 32500 |
| }, |
| { |
| "epoch": 9.037981702245634, |
| "grad_norm": 0.09585940837860107, |
| "learning_rate": 2.2663242745908087e-07, |
| "loss": 0.0542, |
| "step": 32600 |
| }, |
| { |
| "epoch": 9.06570557249792, |
| "grad_norm": 0.09488432109355927, |
| "learning_rate": 2.138508490961244e-07, |
| "loss": 0.0533, |
| "step": 32700 |
| }, |
| { |
| "epoch": 9.093429442750208, |
| "grad_norm": 0.09499957412481308, |
| "learning_rate": 2.014323613489666e-07, |
| "loss": 0.0543, |
| "step": 32800 |
| }, |
| { |
| "epoch": 9.121153313002495, |
| "grad_norm": 0.09435317665338516, |
| "learning_rate": 1.8937790631870345e-07, |
| "loss": 0.0536, |
| "step": 32900 |
| }, |
| { |
| "epoch": 9.148877183254783, |
| "grad_norm": 0.10342779755592346, |
| "learning_rate": 1.7768839848989584e-07, |
| "loss": 0.0539, |
| "step": 33000 |
| }, |
| { |
| "epoch": 9.148877183254783, |
| "eval_valid_loss": 0.047598063945770264, |
| "eval_valid_runtime": 6.4315, |
| "eval_valid_samples_per_second": 215.037, |
| "eval_valid_steps_per_second": 6.841, |
| "step": 33000 |
| }, |
| { |
| "epoch": 9.148877183254783, |
| "eval_valid_target_loss": 0.05212317034602165, |
| "eval_valid_target_runtime": 6.5736, |
| "eval_valid_target_samples_per_second": 218.146, |
| "eval_valid_target_steps_per_second": 6.846, |
| "step": 33000 |
| }, |
| { |
| "epoch": 9.176601053507069, |
| "grad_norm": 0.09814909845590591, |
| "learning_rate": 1.6636472466118992e-07, |
| "loss": 0.0542, |
| "step": 33100 |
| }, |
| { |
| "epoch": 9.204324923759357, |
| "grad_norm": 0.09484022855758667, |
| "learning_rate": 1.5540774387804825e-07, |
| "loss": 0.0544, |
| "step": 33200 |
| }, |
| { |
| "epoch": 9.232048794011645, |
| "grad_norm": 0.07888332009315491, |
| "learning_rate": 1.448182873675752e-07, |
| "loss": 0.0539, |
| "step": 33300 |
| }, |
| { |
| "epoch": 9.25977266426393, |
| "grad_norm": 0.0964021384716034, |
| "learning_rate": 1.345971584754585e-07, |
| "loss": 0.0539, |
| "step": 33400 |
| }, |
| { |
| "epoch": 9.287496534516219, |
| "grad_norm": 0.10322096943855286, |
| "learning_rate": 1.2474513260502695e-07, |
| "loss": 0.0536, |
| "step": 33500 |
| }, |
| { |
| "epoch": 9.287496534516219, |
| "eval_valid_loss": 0.047564879059791565, |
| "eval_valid_runtime": 6.4358, |
| "eval_valid_samples_per_second": 214.89, |
| "eval_valid_steps_per_second": 6.837, |
| "step": 33500 |
| }, |
| { |
| "epoch": 9.287496534516219, |
| "eval_valid_target_loss": 0.05209695175290108, |
| "eval_valid_target_runtime": 6.5809, |
| "eval_valid_target_samples_per_second": 217.904, |
| "eval_valid_target_steps_per_second": 6.838, |
| "step": 33500 |
| }, |
| { |
| "epoch": 9.315220404768505, |
| "grad_norm": 0.10957927256822586, |
| "learning_rate": 1.1526295715842628e-07, |
| "loss": 0.0541, |
| "step": 33600 |
| }, |
| { |
| "epoch": 9.342944275020793, |
| "grad_norm": 0.09433583915233612, |
| "learning_rate": 1.0615135147991562e-07, |
| "loss": 0.0542, |
| "step": 33700 |
| }, |
| { |
| "epoch": 9.370668145273081, |
| "grad_norm": 0.09703412652015686, |
| "learning_rate": 9.741100680130122e-08, |
| "loss": 0.0535, |
| "step": 33800 |
| }, |
| { |
| "epoch": 9.398392015525367, |
| "grad_norm": 0.10180799663066864, |
| "learning_rate": 8.904258618949335e-08, |
| "loss": 0.054, |
| "step": 33900 |
| }, |
| { |
| "epoch": 9.426115885777655, |
| "grad_norm": 0.09336613118648529, |
| "learning_rate": 8.104672449620598e-08, |
| "loss": 0.0532, |
| "step": 34000 |
| }, |
| { |
| "epoch": 9.426115885777655, |
| "eval_valid_loss": 0.047556404024362564, |
| "eval_valid_runtime": 6.42, |
| "eval_valid_samples_per_second": 215.421, |
| "eval_valid_steps_per_second": 6.854, |
| "step": 34000 |
| }, |
| { |
| "epoch": 9.426115885777655, |
| "eval_valid_target_loss": 0.05208129063248634, |
| "eval_valid_target_runtime": 6.595, |
| "eval_valid_target_samples_per_second": 217.437, |
| "eval_valid_target_steps_per_second": 6.823, |
| "step": 34000 |
| }, |
| { |
| "epoch": 9.453839756029941, |
| "grad_norm": 0.0890408605337143, |
| "learning_rate": 7.342402830979589e-08, |
| "loss": 0.054, |
| "step": 34100 |
| }, |
| { |
| "epoch": 9.48156362628223, |
| "grad_norm": 0.09568461775779724, |
| "learning_rate": 6.617507590924332e-08, |
| "loss": 0.0535, |
| "step": 34200 |
| }, |
| { |
| "epoch": 9.509287496534515, |
| "grad_norm": 0.09256019443273544, |
| "learning_rate": 5.930041722028379e-08, |
| "loss": 0.054, |
| "step": 34300 |
| }, |
| { |
| "epoch": 9.537011366786803, |
| "grad_norm": 0.09314898401498795, |
| "learning_rate": 5.280057377368863e-08, |
| "loss": 0.0535, |
| "step": 34400 |
| }, |
| { |
| "epoch": 9.564735237039091, |
| "grad_norm": 0.10256827622652054, |
| "learning_rate": 4.667603866569892e-08, |
| "loss": 0.0537, |
| "step": 34500 |
| }, |
| { |
| "epoch": 9.564735237039091, |
| "eval_valid_loss": 0.047560639679431915, |
| "eval_valid_runtime": 6.4632, |
| "eval_valid_samples_per_second": 213.979, |
| "eval_valid_steps_per_second": 6.808, |
| "step": 34500 |
| }, |
| { |
| "epoch": 9.564735237039091, |
| "eval_valid_target_loss": 0.05206665024161339, |
| "eval_valid_target_runtime": 6.5886, |
| "eval_valid_target_samples_per_second": 217.649, |
| "eval_valid_target_steps_per_second": 6.83, |
| "step": 34500 |
| }, |
| { |
| "epoch": 9.592459107291377, |
| "grad_norm": 0.0861942321062088, |
| "learning_rate": 4.092727652062034e-08, |
| "loss": 0.0537, |
| "step": 34600 |
| }, |
| { |
| "epoch": 9.620182977543665, |
| "grad_norm": 0.09521106630563736, |
| "learning_rate": 3.555472345557365e-08, |
| "loss": 0.0535, |
| "step": 34700 |
| }, |
| { |
| "epoch": 9.647906847795952, |
| "grad_norm": 0.10885845869779587, |
| "learning_rate": 3.055878704741e-08, |
| "loss": 0.0542, |
| "step": 34800 |
| }, |
| { |
| "epoch": 9.67563071804824, |
| "grad_norm": 0.09145703911781311, |
| "learning_rate": 2.5939846301791804e-08, |
| "loss": 0.0541, |
| "step": 34900 |
| }, |
| { |
| "epoch": 9.703354588300527, |
| "grad_norm": 0.09051796793937683, |
| "learning_rate": 2.1698251624438503e-08, |
| "loss": 0.0544, |
| "step": 35000 |
| }, |
| { |
| "epoch": 9.703354588300527, |
| "eval_valid_loss": 0.04752533510327339, |
| "eval_valid_runtime": 6.4168, |
| "eval_valid_samples_per_second": 215.528, |
| "eval_valid_steps_per_second": 6.857, |
| "step": 35000 |
| }, |
| { |
| "epoch": 9.703354588300527, |
| "eval_valid_target_loss": 0.05207618325948715, |
| "eval_valid_target_runtime": 6.57, |
| "eval_valid_target_samples_per_second": 218.265, |
| "eval_valid_target_steps_per_second": 6.849, |
| "step": 35000 |
| }, |
| { |
| "epoch": 9.731078458552814, |
| "grad_norm": 0.0903056338429451, |
| "learning_rate": 1.7834324794546164e-08, |
| "loss": 0.0539, |
| "step": 35100 |
| }, |
| { |
| "epoch": 9.758802328805102, |
| "grad_norm": 0.0897304117679596, |
| "learning_rate": 1.434835894037423e-08, |
| "loss": 0.0539, |
| "step": 35200 |
| }, |
| { |
| "epoch": 9.786526199057388, |
| "grad_norm": 0.10058806836605072, |
| "learning_rate": 1.1240618517009416e-08, |
| "loss": 0.0542, |
| "step": 35300 |
| }, |
| { |
| "epoch": 9.814250069309676, |
| "grad_norm": 0.1056876927614212, |
| "learning_rate": 8.511339286303432e-09, |
| "loss": 0.0537, |
| "step": 35400 |
| }, |
| { |
| "epoch": 9.841973939561964, |
| "grad_norm": 0.08990786969661713, |
| "learning_rate": 6.1607282989856184e-09, |
| "loss": 0.0547, |
| "step": 35500 |
| }, |
| { |
| "epoch": 9.841973939561964, |
| "eval_valid_loss": 0.047528158873319626, |
| "eval_valid_runtime": 6.4412, |
| "eval_valid_samples_per_second": 214.712, |
| "eval_valid_steps_per_second": 6.831, |
| "step": 35500 |
| }, |
| { |
| "epoch": 9.841973939561964, |
| "eval_valid_target_loss": 0.05206017941236496, |
| "eval_valid_target_runtime": 6.5864, |
| "eval_valid_target_samples_per_second": 217.72, |
| "eval_valid_target_steps_per_second": 6.832, |
| "step": 35500 |
| }, |
| { |
| "epoch": 9.86969780981425, |
| "grad_norm": 0.08090436458587646, |
| "learning_rate": 4.188963878958841e-09, |
| "loss": 0.0536, |
| "step": 35600 |
| }, |
| { |
| "epoch": 9.897421680066538, |
| "grad_norm": 0.08319131284952164, |
| "learning_rate": 2.5961956097669827e-09, |
| "loss": 0.0541, |
| "step": 35700 |
| }, |
| { |
| "epoch": 9.925145550318824, |
| "grad_norm": 0.10666873306035995, |
| "learning_rate": 1.3825443232517999e-09, |
| "loss": 0.0541, |
| "step": 35800 |
| }, |
| { |
| "epoch": 9.952869420571112, |
| "grad_norm": 0.10748881101608276, |
| "learning_rate": 5.48102090381919e-10, |
| "loss": 0.0543, |
| "step": 35900 |
| }, |
| { |
| "epoch": 9.9805932908234, |
| "grad_norm": 0.10198221355676651, |
| "learning_rate": 9.293221427231214e-11, |
| "loss": 0.0533, |
| "step": 36000 |
| }, |
| { |
| "epoch": 9.9805932908234, |
| "eval_valid_loss": 0.04753027856349945, |
| "eval_valid_runtime": 6.4518, |
| "eval_valid_samples_per_second": 214.359, |
| "eval_valid_steps_per_second": 6.82, |
| "step": 36000 |
| }, |
| { |
| "epoch": 9.9805932908234, |
| "eval_valid_target_loss": 0.05205439031124115, |
| "eval_valid_target_runtime": 6.5698, |
| "eval_valid_target_samples_per_second": 218.272, |
| "eval_valid_target_steps_per_second": 6.85, |
| "step": 36000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 36070, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.429394066302619e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|