diff --git "a/ECD_Fintuned_phi-3-vision/trainer_state.json" "b/ECD_Fintuned_phi-3-vision/trainer_state.json" new file mode 100644--- /dev/null +++ "b/ECD_Fintuned_phi-3-vision/trainer_state.json" @@ -0,0 +1,23492 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 3350, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00029850746268656717, + "grad_norm": 14.117414815113976, + "learning_rate": 4.950495049504951e-08, + "loss": 1.4506, + "step": 1 + }, + { + "epoch": 0.0005970149253731343, + "grad_norm": 13.931060049875214, + "learning_rate": 9.900990099009901e-08, + "loss": 1.4497, + "step": 2 + }, + { + "epoch": 0.0008955223880597015, + "grad_norm": 12.951035081832662, + "learning_rate": 1.4851485148514852e-07, + "loss": 1.4249, + "step": 3 + }, + { + "epoch": 0.0011940298507462687, + "grad_norm": 12.506012032234118, + "learning_rate": 1.9801980198019803e-07, + "loss": 1.3703, + "step": 4 + }, + { + "epoch": 0.0014925373134328358, + "grad_norm": 12.759791737538139, + "learning_rate": 2.4752475247524754e-07, + "loss": 1.3928, + "step": 5 + }, + { + "epoch": 0.001791044776119403, + "grad_norm": 11.981120238868472, + "learning_rate": 2.9702970297029703e-07, + "loss": 1.3379, + "step": 6 + }, + { + "epoch": 0.00208955223880597, + "grad_norm": 14.232070759545241, + "learning_rate": 3.4653465346534657e-07, + "loss": 1.3504, + "step": 7 + }, + { + "epoch": 0.0023880597014925373, + "grad_norm": 14.919999725885855, + "learning_rate": 3.9603960396039606e-07, + "loss": 1.4683, + "step": 8 + }, + { + "epoch": 0.0026865671641791043, + "grad_norm": 15.081665565944352, + "learning_rate": 4.4554455445544555e-07, + "loss": 1.4529, + "step": 9 + }, + { + "epoch": 0.0029850746268656717, + "grad_norm": 14.1043643853956, + "learning_rate": 4.950495049504951e-07, + "loss": 1.4489, + "step": 10 + }, + { + "epoch": 0.0032835820895522386, + "grad_norm": 13.03984594597713, + "learning_rate": 5.445544554455446e-07, + "loss": 1.3641, + "step": 11 + }, + { + "epoch": 0.003582089552238806, + "grad_norm": 12.55643909326301, + "learning_rate": 5.940594059405941e-07, + "loss": 1.366, + "step": 12 + }, + { + "epoch": 0.003880597014925373, + "grad_norm": 13.066793846628885, + "learning_rate": 6.435643564356436e-07, + "loss": 1.3347, + "step": 13 + }, + { + "epoch": 0.00417910447761194, + "grad_norm": 14.232011881175255, + "learning_rate": 6.930693069306931e-07, + "loss": 1.3869, + "step": 14 + }, + { + "epoch": 0.004477611940298508, + "grad_norm": 13.382384812080517, + "learning_rate": 7.425742574257426e-07, + "loss": 1.2797, + "step": 15 + }, + { + "epoch": 0.004776119402985075, + "grad_norm": 14.063303937583319, + "learning_rate": 7.920792079207921e-07, + "loss": 1.2945, + "step": 16 + }, + { + "epoch": 0.005074626865671642, + "grad_norm": 12.599956283878015, + "learning_rate": 8.415841584158417e-07, + "loss": 1.2541, + "step": 17 + }, + { + "epoch": 0.005373134328358209, + "grad_norm": 15.35594033376082, + "learning_rate": 8.910891089108911e-07, + "loss": 1.4035, + "step": 18 + }, + { + "epoch": 0.005671641791044776, + "grad_norm": 13.703270603112776, + "learning_rate": 9.405940594059406e-07, + "loss": 1.3001, + "step": 19 + }, + { + "epoch": 0.005970149253731343, + "grad_norm": 11.466111679630176, + "learning_rate": 9.900990099009902e-07, + "loss": 1.1074, + "step": 20 + }, + { + "epoch": 0.00626865671641791, + "grad_norm": 12.969087798482644, + "learning_rate": 1.0396039603960397e-06, + "loss": 1.1151, + "step": 21 + }, + { + "epoch": 0.006567164179104477, + "grad_norm": 11.305356480464539, + "learning_rate": 1.0891089108910893e-06, + "loss": 1.0693, + "step": 22 + }, + { + "epoch": 0.006865671641791045, + "grad_norm": 10.571941049724712, + "learning_rate": 1.1386138613861388e-06, + "loss": 1.0079, + "step": 23 + }, + { + "epoch": 0.007164179104477612, + "grad_norm": 13.611826826791107, + "learning_rate": 1.1881188118811881e-06, + "loss": 1.1241, + "step": 24 + }, + { + "epoch": 0.007462686567164179, + "grad_norm": 10.580271079481518, + "learning_rate": 1.2376237623762377e-06, + "loss": 1.0046, + "step": 25 + }, + { + "epoch": 0.007761194029850746, + "grad_norm": 14.626205948755526, + "learning_rate": 1.2871287128712872e-06, + "loss": 1.0083, + "step": 26 + }, + { + "epoch": 0.008059701492537314, + "grad_norm": 9.659001814009544, + "learning_rate": 1.3366336633663367e-06, + "loss": 0.8455, + "step": 27 + }, + { + "epoch": 0.00835820895522388, + "grad_norm": 9.492714737777815, + "learning_rate": 1.3861386138613863e-06, + "loss": 0.744, + "step": 28 + }, + { + "epoch": 0.008656716417910448, + "grad_norm": 9.552713565555441, + "learning_rate": 1.4356435643564356e-06, + "loss": 0.7097, + "step": 29 + }, + { + "epoch": 0.008955223880597015, + "grad_norm": 9.399332350213616, + "learning_rate": 1.4851485148514852e-06, + "loss": 0.8027, + "step": 30 + }, + { + "epoch": 0.009253731343283582, + "grad_norm": 10.362788296964608, + "learning_rate": 1.5346534653465347e-06, + "loss": 0.8065, + "step": 31 + }, + { + "epoch": 0.00955223880597015, + "grad_norm": 8.883431717385413, + "learning_rate": 1.5841584158415842e-06, + "loss": 0.6679, + "step": 32 + }, + { + "epoch": 0.009850746268656717, + "grad_norm": 6.947939015528359, + "learning_rate": 1.6336633663366338e-06, + "loss": 0.6528, + "step": 33 + }, + { + "epoch": 0.010149253731343283, + "grad_norm": 5.181069033173571, + "learning_rate": 1.6831683168316833e-06, + "loss": 0.6128, + "step": 34 + }, + { + "epoch": 0.010447761194029851, + "grad_norm": 2.1652397986100635, + "learning_rate": 1.7326732673267326e-06, + "loss": 0.6552, + "step": 35 + }, + { + "epoch": 0.010746268656716417, + "grad_norm": 4.632087693327325, + "learning_rate": 1.7821782178217822e-06, + "loss": 0.6027, + "step": 36 + }, + { + "epoch": 0.011044776119402985, + "grad_norm": 7.422754544331454, + "learning_rate": 1.8316831683168317e-06, + "loss": 0.59, + "step": 37 + }, + { + "epoch": 0.011343283582089553, + "grad_norm": 7.399958472269416, + "learning_rate": 1.8811881188118813e-06, + "loss": 0.586, + "step": 38 + }, + { + "epoch": 0.011641791044776119, + "grad_norm": 3.656574253442115, + "learning_rate": 1.930693069306931e-06, + "loss": 0.5334, + "step": 39 + }, + { + "epoch": 0.011940298507462687, + "grad_norm": 2.513197596004464, + "learning_rate": 1.9801980198019803e-06, + "loss": 0.5474, + "step": 40 + }, + { + "epoch": 0.012238805970149255, + "grad_norm": 1.918336955189715, + "learning_rate": 2.02970297029703e-06, + "loss": 0.6019, + "step": 41 + }, + { + "epoch": 0.01253731343283582, + "grad_norm": 1.4284644405554494, + "learning_rate": 2.0792079207920794e-06, + "loss": 0.5576, + "step": 42 + }, + { + "epoch": 0.012835820895522388, + "grad_norm": 1.578482950062078, + "learning_rate": 2.1287128712871288e-06, + "loss": 0.5727, + "step": 43 + }, + { + "epoch": 0.013134328358208954, + "grad_norm": 1.354481047717988, + "learning_rate": 2.1782178217821785e-06, + "loss": 0.5318, + "step": 44 + }, + { + "epoch": 0.013432835820895522, + "grad_norm": 1.25497134625581, + "learning_rate": 2.227722772277228e-06, + "loss": 0.5266, + "step": 45 + }, + { + "epoch": 0.01373134328358209, + "grad_norm": 1.3741989730447257, + "learning_rate": 2.2772277227722776e-06, + "loss": 0.5577, + "step": 46 + }, + { + "epoch": 0.014029850746268656, + "grad_norm": 1.1498984206900507, + "learning_rate": 2.326732673267327e-06, + "loss": 0.4985, + "step": 47 + }, + { + "epoch": 0.014328358208955224, + "grad_norm": 1.2303306117520938, + "learning_rate": 2.3762376237623762e-06, + "loss": 0.5211, + "step": 48 + }, + { + "epoch": 0.014626865671641792, + "grad_norm": 1.11221660675085, + "learning_rate": 2.425742574257426e-06, + "loss": 0.4762, + "step": 49 + }, + { + "epoch": 0.014925373134328358, + "grad_norm": 1.1204377252400624, + "learning_rate": 2.4752475247524753e-06, + "loss": 0.5147, + "step": 50 + }, + { + "epoch": 0.015223880597014926, + "grad_norm": 1.3294182563507342, + "learning_rate": 2.524752475247525e-06, + "loss": 0.582, + "step": 51 + }, + { + "epoch": 0.015522388059701492, + "grad_norm": 1.1126190487740035, + "learning_rate": 2.5742574257425744e-06, + "loss": 0.5393, + "step": 52 + }, + { + "epoch": 0.01582089552238806, + "grad_norm": 1.0981865353995417, + "learning_rate": 2.623762376237624e-06, + "loss": 0.4749, + "step": 53 + }, + { + "epoch": 0.016119402985074627, + "grad_norm": 1.0843096961998069, + "learning_rate": 2.6732673267326735e-06, + "loss": 0.521, + "step": 54 + }, + { + "epoch": 0.016417910447761194, + "grad_norm": 1.0763196586153305, + "learning_rate": 2.7227722772277232e-06, + "loss": 0.4723, + "step": 55 + }, + { + "epoch": 0.01671641791044776, + "grad_norm": 1.0864036348823496, + "learning_rate": 2.7722772277227726e-06, + "loss": 0.5065, + "step": 56 + }, + { + "epoch": 0.01701492537313433, + "grad_norm": 1.2234419660497935, + "learning_rate": 2.821782178217822e-06, + "loss": 0.4942, + "step": 57 + }, + { + "epoch": 0.017313432835820895, + "grad_norm": 1.0591972627781436, + "learning_rate": 2.8712871287128712e-06, + "loss": 0.4778, + "step": 58 + }, + { + "epoch": 0.01761194029850746, + "grad_norm": 1.0721476207536529, + "learning_rate": 2.920792079207921e-06, + "loss": 0.5391, + "step": 59 + }, + { + "epoch": 0.01791044776119403, + "grad_norm": 1.0985626878686374, + "learning_rate": 2.9702970297029703e-06, + "loss": 0.4476, + "step": 60 + }, + { + "epoch": 0.018208955223880597, + "grad_norm": 1.1392243737050494, + "learning_rate": 3.01980198019802e-06, + "loss": 0.5497, + "step": 61 + }, + { + "epoch": 0.018507462686567163, + "grad_norm": 1.0813836369765903, + "learning_rate": 3.0693069306930694e-06, + "loss": 0.5208, + "step": 62 + }, + { + "epoch": 0.018805970149253733, + "grad_norm": 1.0889059705799737, + "learning_rate": 3.118811881188119e-06, + "loss": 0.4994, + "step": 63 + }, + { + "epoch": 0.0191044776119403, + "grad_norm": 1.0362585523500178, + "learning_rate": 3.1683168316831685e-06, + "loss": 0.4829, + "step": 64 + }, + { + "epoch": 0.019402985074626865, + "grad_norm": 1.0359519925978355, + "learning_rate": 3.2178217821782182e-06, + "loss": 0.476, + "step": 65 + }, + { + "epoch": 0.019701492537313434, + "grad_norm": 0.9185525908299487, + "learning_rate": 3.2673267326732676e-06, + "loss": 0.4103, + "step": 66 + }, + { + "epoch": 0.02, + "grad_norm": 1.0418468842058957, + "learning_rate": 3.3168316831683173e-06, + "loss": 0.4146, + "step": 67 + }, + { + "epoch": 0.020298507462686566, + "grad_norm": 1.2661337234827934, + "learning_rate": 3.3663366336633666e-06, + "loss": 0.4533, + "step": 68 + }, + { + "epoch": 0.020597014925373136, + "grad_norm": 0.9917842549873999, + "learning_rate": 3.4158415841584164e-06, + "loss": 0.4651, + "step": 69 + }, + { + "epoch": 0.020895522388059702, + "grad_norm": 1.0318070695121893, + "learning_rate": 3.4653465346534653e-06, + "loss": 0.5135, + "step": 70 + }, + { + "epoch": 0.021194029850746268, + "grad_norm": 0.9942332357926064, + "learning_rate": 3.514851485148515e-06, + "loss": 0.4812, + "step": 71 + }, + { + "epoch": 0.021492537313432834, + "grad_norm": 0.9647372803003441, + "learning_rate": 3.5643564356435644e-06, + "loss": 0.5059, + "step": 72 + }, + { + "epoch": 0.021791044776119404, + "grad_norm": 0.9756894687445946, + "learning_rate": 3.613861386138614e-06, + "loss": 0.5094, + "step": 73 + }, + { + "epoch": 0.02208955223880597, + "grad_norm": 1.0710107538374112, + "learning_rate": 3.6633663366336635e-06, + "loss": 0.5479, + "step": 74 + }, + { + "epoch": 0.022388059701492536, + "grad_norm": 1.0245672263567556, + "learning_rate": 3.712871287128713e-06, + "loss": 0.4638, + "step": 75 + }, + { + "epoch": 0.022686567164179106, + "grad_norm": 0.972951009425826, + "learning_rate": 3.7623762376237625e-06, + "loss": 0.5037, + "step": 76 + }, + { + "epoch": 0.02298507462686567, + "grad_norm": 0.8913835901316376, + "learning_rate": 3.8118811881188123e-06, + "loss": 0.4415, + "step": 77 + }, + { + "epoch": 0.023283582089552238, + "grad_norm": 0.918140464986545, + "learning_rate": 3.861386138613862e-06, + "loss": 0.5016, + "step": 78 + }, + { + "epoch": 0.023582089552238807, + "grad_norm": 0.9542502229677259, + "learning_rate": 3.910891089108911e-06, + "loss": 0.4639, + "step": 79 + }, + { + "epoch": 0.023880597014925373, + "grad_norm": 0.9376495588439216, + "learning_rate": 3.960396039603961e-06, + "loss": 0.4696, + "step": 80 + }, + { + "epoch": 0.02417910447761194, + "grad_norm": 1.0118829978197192, + "learning_rate": 4.0099009900990104e-06, + "loss": 0.4393, + "step": 81 + }, + { + "epoch": 0.02447761194029851, + "grad_norm": 0.9312691594030761, + "learning_rate": 4.05940594059406e-06, + "loss": 0.4773, + "step": 82 + }, + { + "epoch": 0.024776119402985075, + "grad_norm": 0.9718484289074919, + "learning_rate": 4.108910891089109e-06, + "loss": 0.4915, + "step": 83 + }, + { + "epoch": 0.02507462686567164, + "grad_norm": 0.9125444747399516, + "learning_rate": 4.158415841584159e-06, + "loss": 0.4721, + "step": 84 + }, + { + "epoch": 0.025373134328358207, + "grad_norm": 0.8900812924955422, + "learning_rate": 4.207920792079208e-06, + "loss": 0.4223, + "step": 85 + }, + { + "epoch": 0.025671641791044777, + "grad_norm": 1.121907218319262, + "learning_rate": 4.2574257425742575e-06, + "loss": 0.427, + "step": 86 + }, + { + "epoch": 0.025970149253731343, + "grad_norm": 1.0549443213006138, + "learning_rate": 4.306930693069307e-06, + "loss": 0.4944, + "step": 87 + }, + { + "epoch": 0.02626865671641791, + "grad_norm": 0.9113639087013188, + "learning_rate": 4.356435643564357e-06, + "loss": 0.445, + "step": 88 + }, + { + "epoch": 0.02656716417910448, + "grad_norm": 0.9366021839687358, + "learning_rate": 4.405940594059406e-06, + "loss": 0.419, + "step": 89 + }, + { + "epoch": 0.026865671641791045, + "grad_norm": 1.021855680179713, + "learning_rate": 4.455445544554456e-06, + "loss": 0.5366, + "step": 90 + }, + { + "epoch": 0.02716417910447761, + "grad_norm": 1.306674789879524, + "learning_rate": 4.5049504950495054e-06, + "loss": 0.4847, + "step": 91 + }, + { + "epoch": 0.02746268656716418, + "grad_norm": 0.9466689723258144, + "learning_rate": 4.554455445544555e-06, + "loss": 0.4331, + "step": 92 + }, + { + "epoch": 0.027761194029850746, + "grad_norm": 1.0130636044595012, + "learning_rate": 4.603960396039605e-06, + "loss": 0.5162, + "step": 93 + }, + { + "epoch": 0.028059701492537312, + "grad_norm": 0.9085144452361592, + "learning_rate": 4.653465346534654e-06, + "loss": 0.4798, + "step": 94 + }, + { + "epoch": 0.028358208955223882, + "grad_norm": 0.8665298826536127, + "learning_rate": 4.702970297029703e-06, + "loss": 0.4102, + "step": 95 + }, + { + "epoch": 0.028656716417910448, + "grad_norm": 0.8922124742463072, + "learning_rate": 4.7524752475247525e-06, + "loss": 0.4531, + "step": 96 + }, + { + "epoch": 0.028955223880597014, + "grad_norm": 1.0766078947850648, + "learning_rate": 4.801980198019802e-06, + "loss": 0.4298, + "step": 97 + }, + { + "epoch": 0.029253731343283584, + "grad_norm": 0.887211185618867, + "learning_rate": 4.851485148514852e-06, + "loss": 0.4285, + "step": 98 + }, + { + "epoch": 0.02955223880597015, + "grad_norm": 0.8297755867290657, + "learning_rate": 4.900990099009901e-06, + "loss": 0.3819, + "step": 99 + }, + { + "epoch": 0.029850746268656716, + "grad_norm": 0.9717588300149205, + "learning_rate": 4.950495049504951e-06, + "loss": 0.5215, + "step": 100 + }, + { + "epoch": 0.030149253731343282, + "grad_norm": 0.8764330876038892, + "learning_rate": 5e-06, + "loss": 0.397, + "step": 101 + }, + { + "epoch": 0.03044776119402985, + "grad_norm": 0.9639801062167249, + "learning_rate": 4.99999883128047e-06, + "loss": 0.4042, + "step": 102 + }, + { + "epoch": 0.030746268656716418, + "grad_norm": 0.9009910276004819, + "learning_rate": 4.999995325122968e-06, + "loss": 0.4648, + "step": 103 + }, + { + "epoch": 0.031044776119402984, + "grad_norm": 0.9428548948928329, + "learning_rate": 4.999989481530776e-06, + "loss": 0.4345, + "step": 104 + }, + { + "epoch": 0.03134328358208955, + "grad_norm": 0.8987738306915273, + "learning_rate": 4.9999813005093556e-06, + "loss": 0.4682, + "step": 105 + }, + { + "epoch": 0.03164179104477612, + "grad_norm": 0.8997778964437559, + "learning_rate": 4.999970782066357e-06, + "loss": 0.4047, + "step": 106 + }, + { + "epoch": 0.03194029850746269, + "grad_norm": 0.9569315241820582, + "learning_rate": 4.999957926211613e-06, + "loss": 0.4584, + "step": 107 + }, + { + "epoch": 0.032238805970149255, + "grad_norm": 0.9078784862909539, + "learning_rate": 4.9999427329571445e-06, + "loss": 0.4117, + "step": 108 + }, + { + "epoch": 0.03253731343283582, + "grad_norm": 0.9616336053244321, + "learning_rate": 4.999925202317158e-06, + "loss": 0.4677, + "step": 109 + }, + { + "epoch": 0.03283582089552239, + "grad_norm": 0.8828990333654685, + "learning_rate": 4.9999053343080424e-06, + "loss": 0.454, + "step": 110 + }, + { + "epoch": 0.03313432835820895, + "grad_norm": 0.8971703986475054, + "learning_rate": 4.9998831289483745e-06, + "loss": 0.4401, + "step": 111 + }, + { + "epoch": 0.03343283582089552, + "grad_norm": 1.0410628753647846, + "learning_rate": 4.9998585862589165e-06, + "loss": 0.4655, + "step": 112 + }, + { + "epoch": 0.03373134328358209, + "grad_norm": 0.9301629000296456, + "learning_rate": 4.999831706262614e-06, + "loss": 0.4763, + "step": 113 + }, + { + "epoch": 0.03402985074626866, + "grad_norm": 0.8268955592230676, + "learning_rate": 4.999802488984598e-06, + "loss": 0.3696, + "step": 114 + }, + { + "epoch": 0.034328358208955224, + "grad_norm": 0.8664996618072721, + "learning_rate": 4.99977093445219e-06, + "loss": 0.3828, + "step": 115 + }, + { + "epoch": 0.03462686567164179, + "grad_norm": 1.0005293182043562, + "learning_rate": 4.999737042694889e-06, + "loss": 0.4978, + "step": 116 + }, + { + "epoch": 0.03492537313432836, + "grad_norm": 0.9140644611648783, + "learning_rate": 4.9997008137443845e-06, + "loss": 0.4464, + "step": 117 + }, + { + "epoch": 0.03522388059701492, + "grad_norm": 0.982768264944626, + "learning_rate": 4.99966224763455e-06, + "loss": 0.4059, + "step": 118 + }, + { + "epoch": 0.035522388059701496, + "grad_norm": 0.7935157638732689, + "learning_rate": 4.999621344401443e-06, + "loss": 0.3914, + "step": 119 + }, + { + "epoch": 0.03582089552238806, + "grad_norm": 0.8379591403713217, + "learning_rate": 4.999578104083307e-06, + "loss": 0.4461, + "step": 120 + }, + { + "epoch": 0.03611940298507463, + "grad_norm": 0.9772888203631277, + "learning_rate": 4.9995325267205715e-06, + "loss": 0.4251, + "step": 121 + }, + { + "epoch": 0.036417910447761194, + "grad_norm": 0.9050435971292533, + "learning_rate": 4.999484612355849e-06, + "loss": 0.4231, + "step": 122 + }, + { + "epoch": 0.03671641791044776, + "grad_norm": 0.8579300964981285, + "learning_rate": 4.99943436103394e-06, + "loss": 0.4711, + "step": 123 + }, + { + "epoch": 0.037014925373134326, + "grad_norm": 1.0200535489985578, + "learning_rate": 4.999381772801827e-06, + "loss": 0.4277, + "step": 124 + }, + { + "epoch": 0.03731343283582089, + "grad_norm": 0.9442204717239281, + "learning_rate": 4.999326847708678e-06, + "loss": 0.4448, + "step": 125 + }, + { + "epoch": 0.037611940298507465, + "grad_norm": 0.895546100855722, + "learning_rate": 4.999269585805848e-06, + "loss": 0.451, + "step": 126 + }, + { + "epoch": 0.03791044776119403, + "grad_norm": 0.9401669590071482, + "learning_rate": 4.999209987146876e-06, + "loss": 0.4118, + "step": 127 + }, + { + "epoch": 0.0382089552238806, + "grad_norm": 0.9947350109107067, + "learning_rate": 4.999148051787483e-06, + "loss": 0.4732, + "step": 128 + }, + { + "epoch": 0.038507462686567163, + "grad_norm": 0.9239692387463458, + "learning_rate": 4.999083779785579e-06, + "loss": 0.382, + "step": 129 + }, + { + "epoch": 0.03880597014925373, + "grad_norm": 0.8510728584830578, + "learning_rate": 4.999017171201256e-06, + "loss": 0.3563, + "step": 130 + }, + { + "epoch": 0.039104477611940296, + "grad_norm": 0.9327467858570172, + "learning_rate": 4.998948226096792e-06, + "loss": 0.4404, + "step": 131 + }, + { + "epoch": 0.03940298507462687, + "grad_norm": 0.9122306769001939, + "learning_rate": 4.998876944536648e-06, + "loss": 0.4465, + "step": 132 + }, + { + "epoch": 0.039701492537313435, + "grad_norm": 0.8628534940861644, + "learning_rate": 4.9988033265874714e-06, + "loss": 0.3978, + "step": 133 + }, + { + "epoch": 0.04, + "grad_norm": 0.9890246363319865, + "learning_rate": 4.998727372318093e-06, + "loss": 0.4522, + "step": 134 + }, + { + "epoch": 0.04029850746268657, + "grad_norm": 0.8770139868884229, + "learning_rate": 4.998649081799528e-06, + "loss": 0.3612, + "step": 135 + }, + { + "epoch": 0.04059701492537313, + "grad_norm": 0.8875725344879101, + "learning_rate": 4.998568455104976e-06, + "loss": 0.431, + "step": 136 + }, + { + "epoch": 0.0408955223880597, + "grad_norm": 0.8689054951608837, + "learning_rate": 4.998485492309821e-06, + "loss": 0.4131, + "step": 137 + }, + { + "epoch": 0.04119402985074627, + "grad_norm": 0.8315965070349196, + "learning_rate": 4.998400193491632e-06, + "loss": 0.4025, + "step": 138 + }, + { + "epoch": 0.04149253731343284, + "grad_norm": 0.9156816933709063, + "learning_rate": 4.9983125587301594e-06, + "loss": 0.4201, + "step": 139 + }, + { + "epoch": 0.041791044776119404, + "grad_norm": 0.919113537897832, + "learning_rate": 4.998222588107342e-06, + "loss": 0.3936, + "step": 140 + }, + { + "epoch": 0.04208955223880597, + "grad_norm": 0.9372729637074827, + "learning_rate": 4.9981302817072984e-06, + "loss": 0.3789, + "step": 141 + }, + { + "epoch": 0.042388059701492536, + "grad_norm": 0.8930146991739137, + "learning_rate": 4.998035639616334e-06, + "loss": 0.4525, + "step": 142 + }, + { + "epoch": 0.0426865671641791, + "grad_norm": 0.9163631113724126, + "learning_rate": 4.997938661922936e-06, + "loss": 0.4033, + "step": 143 + }, + { + "epoch": 0.04298507462686567, + "grad_norm": 1.0610564882169262, + "learning_rate": 4.997839348717776e-06, + "loss": 0.4699, + "step": 144 + }, + { + "epoch": 0.04328358208955224, + "grad_norm": 0.9142847782134293, + "learning_rate": 4.99773770009371e-06, + "loss": 0.4134, + "step": 145 + }, + { + "epoch": 0.04358208955223881, + "grad_norm": 0.8539068181792682, + "learning_rate": 4.997633716145778e-06, + "loss": 0.3879, + "step": 146 + }, + { + "epoch": 0.043880597014925374, + "grad_norm": 0.908534305321226, + "learning_rate": 4.9975273969712e-06, + "loss": 0.4034, + "step": 147 + }, + { + "epoch": 0.04417910447761194, + "grad_norm": 0.9065531552905202, + "learning_rate": 4.997418742669383e-06, + "loss": 0.4624, + "step": 148 + }, + { + "epoch": 0.044477611940298506, + "grad_norm": 0.8900058271373764, + "learning_rate": 4.997307753341918e-06, + "loss": 0.4096, + "step": 149 + }, + { + "epoch": 0.04477611940298507, + "grad_norm": 0.8850620387207646, + "learning_rate": 4.997194429092573e-06, + "loss": 0.3899, + "step": 150 + }, + { + "epoch": 0.045074626865671645, + "grad_norm": 0.8701299536472148, + "learning_rate": 4.997078770027308e-06, + "loss": 0.4333, + "step": 151 + }, + { + "epoch": 0.04537313432835821, + "grad_norm": 1.020005231391521, + "learning_rate": 4.99696077625426e-06, + "loss": 0.4582, + "step": 152 + }, + { + "epoch": 0.04567164179104478, + "grad_norm": 1.042601527987692, + "learning_rate": 4.996840447883748e-06, + "loss": 0.4136, + "step": 153 + }, + { + "epoch": 0.04597014925373134, + "grad_norm": 1.0231933296827058, + "learning_rate": 4.996717785028278e-06, + "loss": 0.4359, + "step": 154 + }, + { + "epoch": 0.04626865671641791, + "grad_norm": 0.9352772150333463, + "learning_rate": 4.9965927878025365e-06, + "loss": 0.4572, + "step": 155 + }, + { + "epoch": 0.046567164179104475, + "grad_norm": 0.8932031869034676, + "learning_rate": 4.996465456323394e-06, + "loss": 0.4292, + "step": 156 + }, + { + "epoch": 0.04686567164179104, + "grad_norm": 0.9781906096105041, + "learning_rate": 4.9963357907099e-06, + "loss": 0.4399, + "step": 157 + }, + { + "epoch": 0.047164179104477615, + "grad_norm": 0.8702251247937537, + "learning_rate": 4.996203791083291e-06, + "loss": 0.3471, + "step": 158 + }, + { + "epoch": 0.04746268656716418, + "grad_norm": 0.891540485971903, + "learning_rate": 4.996069457566982e-06, + "loss": 0.3809, + "step": 159 + }, + { + "epoch": 0.04776119402985075, + "grad_norm": 0.8390421395659049, + "learning_rate": 4.995932790286572e-06, + "loss": 0.3728, + "step": 160 + }, + { + "epoch": 0.04805970149253731, + "grad_norm": 0.9759929778750225, + "learning_rate": 4.995793789369842e-06, + "loss": 0.3929, + "step": 161 + }, + { + "epoch": 0.04835820895522388, + "grad_norm": 0.9001180874449742, + "learning_rate": 4.9956524549467524e-06, + "loss": 0.4161, + "step": 162 + }, + { + "epoch": 0.048656716417910445, + "grad_norm": 1.0153327655108404, + "learning_rate": 4.995508787149451e-06, + "loss": 0.4279, + "step": 163 + }, + { + "epoch": 0.04895522388059702, + "grad_norm": 0.9940714318433066, + "learning_rate": 4.995362786112261e-06, + "loss": 0.4293, + "step": 164 + }, + { + "epoch": 0.049253731343283584, + "grad_norm": 0.8547509297200269, + "learning_rate": 4.99521445197169e-06, + "loss": 0.3878, + "step": 165 + }, + { + "epoch": 0.04955223880597015, + "grad_norm": 0.8085492708878349, + "learning_rate": 4.995063784866427e-06, + "loss": 0.3565, + "step": 166 + }, + { + "epoch": 0.049850746268656716, + "grad_norm": 0.9845859673203974, + "learning_rate": 4.994910784937343e-06, + "loss": 0.4446, + "step": 167 + }, + { + "epoch": 0.05014925373134328, + "grad_norm": 1.0461284807321518, + "learning_rate": 4.99475545232749e-06, + "loss": 0.4254, + "step": 168 + }, + { + "epoch": 0.05044776119402985, + "grad_norm": 0.9140198174691191, + "learning_rate": 4.994597787182097e-06, + "loss": 0.436, + "step": 169 + }, + { + "epoch": 0.050746268656716415, + "grad_norm": 0.9629152651211416, + "learning_rate": 4.99443778964858e-06, + "loss": 0.4742, + "step": 170 + }, + { + "epoch": 0.05104477611940299, + "grad_norm": 0.9693205236551692, + "learning_rate": 4.994275459876531e-06, + "loss": 0.4206, + "step": 171 + }, + { + "epoch": 0.051343283582089554, + "grad_norm": 0.885322451195088, + "learning_rate": 4.994110798017725e-06, + "loss": 0.3567, + "step": 172 + }, + { + "epoch": 0.05164179104477612, + "grad_norm": 0.9180041796566594, + "learning_rate": 4.993943804226117e-06, + "loss": 0.4015, + "step": 173 + }, + { + "epoch": 0.051940298507462686, + "grad_norm": 0.8915204742324517, + "learning_rate": 4.9937744786578425e-06, + "loss": 0.4378, + "step": 174 + }, + { + "epoch": 0.05223880597014925, + "grad_norm": 0.9057459288082191, + "learning_rate": 4.993602821471216e-06, + "loss": 0.3972, + "step": 175 + }, + { + "epoch": 0.05253731343283582, + "grad_norm": 0.9672531963127454, + "learning_rate": 4.993428832826734e-06, + "loss": 0.4127, + "step": 176 + }, + { + "epoch": 0.05283582089552239, + "grad_norm": 0.9087219850655597, + "learning_rate": 4.993252512887069e-06, + "loss": 0.4277, + "step": 177 + }, + { + "epoch": 0.05313432835820896, + "grad_norm": 0.8440962947146048, + "learning_rate": 4.993073861817078e-06, + "loss": 0.4241, + "step": 178 + }, + { + "epoch": 0.05343283582089552, + "grad_norm": 0.9406161881759132, + "learning_rate": 4.992892879783795e-06, + "loss": 0.4186, + "step": 179 + }, + { + "epoch": 0.05373134328358209, + "grad_norm": 0.9103960721738174, + "learning_rate": 4.992709566956435e-06, + "loss": 0.4243, + "step": 180 + }, + { + "epoch": 0.054029850746268655, + "grad_norm": 0.9301312469178198, + "learning_rate": 4.992523923506388e-06, + "loss": 0.4279, + "step": 181 + }, + { + "epoch": 0.05432835820895522, + "grad_norm": 0.9894235486712242, + "learning_rate": 4.992335949607229e-06, + "loss": 0.4456, + "step": 182 + }, + { + "epoch": 0.054626865671641794, + "grad_norm": 0.9146798098761367, + "learning_rate": 4.992145645434708e-06, + "loss": 0.3528, + "step": 183 + }, + { + "epoch": 0.05492537313432836, + "grad_norm": 0.9418084586681869, + "learning_rate": 4.991953011166753e-06, + "loss": 0.4134, + "step": 184 + }, + { + "epoch": 0.05522388059701493, + "grad_norm": 0.8443152449175418, + "learning_rate": 4.991758046983476e-06, + "loss": 0.3546, + "step": 185 + }, + { + "epoch": 0.05552238805970149, + "grad_norm": 0.8070965145975226, + "learning_rate": 4.991560753067161e-06, + "loss": 0.3852, + "step": 186 + }, + { + "epoch": 0.05582089552238806, + "grad_norm": 0.8907506048228214, + "learning_rate": 4.991361129602274e-06, + "loss": 0.4445, + "step": 187 + }, + { + "epoch": 0.056119402985074625, + "grad_norm": 0.834671843672075, + "learning_rate": 4.991159176775458e-06, + "loss": 0.3976, + "step": 188 + }, + { + "epoch": 0.05641791044776119, + "grad_norm": 0.9954785016124977, + "learning_rate": 4.9909548947755334e-06, + "loss": 0.4353, + "step": 189 + }, + { + "epoch": 0.056716417910447764, + "grad_norm": 0.7445588575209131, + "learning_rate": 4.990748283793499e-06, + "loss": 0.3131, + "step": 190 + }, + { + "epoch": 0.05701492537313433, + "grad_norm": 0.8570773114706813, + "learning_rate": 4.990539344022531e-06, + "loss": 0.3788, + "step": 191 + }, + { + "epoch": 0.057313432835820896, + "grad_norm": 1.0005429174734601, + "learning_rate": 4.990328075657985e-06, + "loss": 0.487, + "step": 192 + }, + { + "epoch": 0.05761194029850746, + "grad_norm": 0.8625742070670317, + "learning_rate": 4.990114478897389e-06, + "loss": 0.3654, + "step": 193 + }, + { + "epoch": 0.05791044776119403, + "grad_norm": 0.9721768330250884, + "learning_rate": 4.989898553940452e-06, + "loss": 0.3933, + "step": 194 + }, + { + "epoch": 0.058208955223880594, + "grad_norm": 0.9914169807245352, + "learning_rate": 4.989680300989058e-06, + "loss": 0.4732, + "step": 195 + }, + { + "epoch": 0.05850746268656717, + "grad_norm": 0.8411421119088517, + "learning_rate": 4.989459720247269e-06, + "loss": 0.4478, + "step": 196 + }, + { + "epoch": 0.05880597014925373, + "grad_norm": 0.924615837847109, + "learning_rate": 4.989236811921322e-06, + "loss": 0.4654, + "step": 197 + }, + { + "epoch": 0.0591044776119403, + "grad_norm": 0.9328373422116539, + "learning_rate": 4.989011576219632e-06, + "loss": 0.448, + "step": 198 + }, + { + "epoch": 0.059402985074626866, + "grad_norm": 0.9014890809533687, + "learning_rate": 4.9887840133527874e-06, + "loss": 0.4247, + "step": 199 + }, + { + "epoch": 0.05970149253731343, + "grad_norm": 0.8132765320225406, + "learning_rate": 4.988554123533554e-06, + "loss": 0.3943, + "step": 200 + }, + { + "epoch": 0.06, + "grad_norm": 0.9241012173718349, + "learning_rate": 4.9883219069768744e-06, + "loss": 0.3633, + "step": 201 + }, + { + "epoch": 0.060298507462686564, + "grad_norm": 0.84126125400794, + "learning_rate": 4.988087363899864e-06, + "loss": 0.3297, + "step": 202 + }, + { + "epoch": 0.06059701492537314, + "grad_norm": 0.8347638005889463, + "learning_rate": 4.987850494521817e-06, + "loss": 0.4134, + "step": 203 + }, + { + "epoch": 0.0608955223880597, + "grad_norm": 0.8847082107220858, + "learning_rate": 4.987611299064197e-06, + "loss": 0.3754, + "step": 204 + }, + { + "epoch": 0.06119402985074627, + "grad_norm": 0.8981615926938369, + "learning_rate": 4.987369777750649e-06, + "loss": 0.407, + "step": 205 + }, + { + "epoch": 0.061492537313432835, + "grad_norm": 0.875779341191485, + "learning_rate": 4.9871259308069885e-06, + "loss": 0.4204, + "step": 206 + }, + { + "epoch": 0.0617910447761194, + "grad_norm": 0.9212258634041721, + "learning_rate": 4.986879758461207e-06, + "loss": 0.4021, + "step": 207 + }, + { + "epoch": 0.06208955223880597, + "grad_norm": 0.8857433024281737, + "learning_rate": 4.986631260943469e-06, + "loss": 0.3837, + "step": 208 + }, + { + "epoch": 0.06238805970149254, + "grad_norm": 0.967859200633979, + "learning_rate": 4.986380438486113e-06, + "loss": 0.3977, + "step": 209 + }, + { + "epoch": 0.0626865671641791, + "grad_norm": 0.925560346678998, + "learning_rate": 4.986127291323653e-06, + "loss": 0.4273, + "step": 210 + }, + { + "epoch": 0.06298507462686567, + "grad_norm": 0.863600208601708, + "learning_rate": 4.985871819692775e-06, + "loss": 0.4031, + "step": 211 + }, + { + "epoch": 0.06328358208955225, + "grad_norm": 0.9327927944187931, + "learning_rate": 4.985614023832339e-06, + "loss": 0.4167, + "step": 212 + }, + { + "epoch": 0.0635820895522388, + "grad_norm": 0.891025459096459, + "learning_rate": 4.985353903983377e-06, + "loss": 0.4188, + "step": 213 + }, + { + "epoch": 0.06388059701492538, + "grad_norm": 0.8627498456244966, + "learning_rate": 4.985091460389096e-06, + "loss": 0.3822, + "step": 214 + }, + { + "epoch": 0.06417910447761194, + "grad_norm": 0.9843279624632778, + "learning_rate": 4.9848266932948745e-06, + "loss": 0.3868, + "step": 215 + }, + { + "epoch": 0.06447761194029851, + "grad_norm": 0.9197216488420602, + "learning_rate": 4.984559602948261e-06, + "loss": 0.3866, + "step": 216 + }, + { + "epoch": 0.06477611940298507, + "grad_norm": 1.0380980582080264, + "learning_rate": 4.984290189598981e-06, + "loss": 0.4032, + "step": 217 + }, + { + "epoch": 0.06507462686567164, + "grad_norm": 0.8366437135304863, + "learning_rate": 4.984018453498928e-06, + "loss": 0.3525, + "step": 218 + }, + { + "epoch": 0.06537313432835822, + "grad_norm": 0.9649147229825452, + "learning_rate": 4.983744394902169e-06, + "loss": 0.4686, + "step": 219 + }, + { + "epoch": 0.06567164179104477, + "grad_norm": 0.8490377817349865, + "learning_rate": 4.983468014064942e-06, + "loss": 0.4265, + "step": 220 + }, + { + "epoch": 0.06597014925373135, + "grad_norm": 0.7792537616515028, + "learning_rate": 4.983189311245656e-06, + "loss": 0.3949, + "step": 221 + }, + { + "epoch": 0.0662686567164179, + "grad_norm": 0.8750639478343961, + "learning_rate": 4.982908286704893e-06, + "loss": 0.3828, + "step": 222 + }, + { + "epoch": 0.06656716417910448, + "grad_norm": 0.9455155367225592, + "learning_rate": 4.982624940705402e-06, + "loss": 0.4166, + "step": 223 + }, + { + "epoch": 0.06686567164179104, + "grad_norm": 0.9295734685485272, + "learning_rate": 4.982339273512106e-06, + "loss": 0.4153, + "step": 224 + }, + { + "epoch": 0.06716417910447761, + "grad_norm": 0.8631686207686246, + "learning_rate": 4.982051285392097e-06, + "loss": 0.3615, + "step": 225 + }, + { + "epoch": 0.06746268656716418, + "grad_norm": 0.9880414466922032, + "learning_rate": 4.981760976614634e-06, + "loss": 0.46, + "step": 226 + }, + { + "epoch": 0.06776119402985074, + "grad_norm": 0.8872311404032606, + "learning_rate": 4.981468347451154e-06, + "loss": 0.3871, + "step": 227 + }, + { + "epoch": 0.06805970149253732, + "grad_norm": 0.8943003479815301, + "learning_rate": 4.981173398175252e-06, + "loss": 0.371, + "step": 228 + }, + { + "epoch": 0.06835820895522388, + "grad_norm": 0.9525334440362165, + "learning_rate": 4.9808761290627035e-06, + "loss": 0.4159, + "step": 229 + }, + { + "epoch": 0.06865671641791045, + "grad_norm": 0.871749210147148, + "learning_rate": 4.9805765403914455e-06, + "loss": 0.3764, + "step": 230 + }, + { + "epoch": 0.06895522388059701, + "grad_norm": 0.925600078237541, + "learning_rate": 4.980274632441585e-06, + "loss": 0.3768, + "step": 231 + }, + { + "epoch": 0.06925373134328358, + "grad_norm": 0.9672291973292384, + "learning_rate": 4.9799704054954015e-06, + "loss": 0.477, + "step": 232 + }, + { + "epoch": 0.06955223880597015, + "grad_norm": 0.768340028516794, + "learning_rate": 4.979663859837337e-06, + "loss": 0.3902, + "step": 233 + }, + { + "epoch": 0.06985074626865671, + "grad_norm": 0.8740643362093679, + "learning_rate": 4.979354995754006e-06, + "loss": 0.3816, + "step": 234 + }, + { + "epoch": 0.07014925373134329, + "grad_norm": 1.0163576539062806, + "learning_rate": 4.979043813534189e-06, + "loss": 0.4156, + "step": 235 + }, + { + "epoch": 0.07044776119402985, + "grad_norm": 0.9103806310986855, + "learning_rate": 4.978730313468832e-06, + "loss": 0.4067, + "step": 236 + }, + { + "epoch": 0.07074626865671642, + "grad_norm": 0.922939829872911, + "learning_rate": 4.9784144958510515e-06, + "loss": 0.369, + "step": 237 + }, + { + "epoch": 0.07104477611940299, + "grad_norm": 1.0128842126660702, + "learning_rate": 4.978096360976129e-06, + "loss": 0.425, + "step": 238 + }, + { + "epoch": 0.07134328358208955, + "grad_norm": 0.9083583140182622, + "learning_rate": 4.977775909141513e-06, + "loss": 0.4008, + "step": 239 + }, + { + "epoch": 0.07164179104477612, + "grad_norm": 0.9204385171658356, + "learning_rate": 4.9774531406468164e-06, + "loss": 0.4098, + "step": 240 + }, + { + "epoch": 0.07194029850746268, + "grad_norm": 0.8800378945968806, + "learning_rate": 4.977128055793823e-06, + "loss": 0.4207, + "step": 241 + }, + { + "epoch": 0.07223880597014926, + "grad_norm": 0.9488253130303587, + "learning_rate": 4.976800654886476e-06, + "loss": 0.4467, + "step": 242 + }, + { + "epoch": 0.07253731343283581, + "grad_norm": 0.924076052886822, + "learning_rate": 4.976470938230889e-06, + "loss": 0.4257, + "step": 243 + }, + { + "epoch": 0.07283582089552239, + "grad_norm": 0.9270092762682594, + "learning_rate": 4.976138906135341e-06, + "loss": 0.4214, + "step": 244 + }, + { + "epoch": 0.07313432835820896, + "grad_norm": 0.897415607649877, + "learning_rate": 4.9758045589102696e-06, + "loss": 0.3953, + "step": 245 + }, + { + "epoch": 0.07343283582089552, + "grad_norm": 0.9972346268529109, + "learning_rate": 4.975467896868284e-06, + "loss": 0.4519, + "step": 246 + }, + { + "epoch": 0.0737313432835821, + "grad_norm": 0.9907973485734135, + "learning_rate": 4.9751289203241535e-06, + "loss": 0.4678, + "step": 247 + }, + { + "epoch": 0.07402985074626865, + "grad_norm": 0.8545804808558607, + "learning_rate": 4.974787629594815e-06, + "loss": 0.4088, + "step": 248 + }, + { + "epoch": 0.07432835820895523, + "grad_norm": 0.9288807835854102, + "learning_rate": 4.974444024999366e-06, + "loss": 0.4414, + "step": 249 + }, + { + "epoch": 0.07462686567164178, + "grad_norm": 0.7975187554309996, + "learning_rate": 4.974098106859068e-06, + "loss": 0.4158, + "step": 250 + }, + { + "epoch": 0.07492537313432836, + "grad_norm": 0.9898504646944991, + "learning_rate": 4.973749875497346e-06, + "loss": 0.4242, + "step": 251 + }, + { + "epoch": 0.07522388059701493, + "grad_norm": 0.8602699019118681, + "learning_rate": 4.973399331239789e-06, + "loss": 0.3789, + "step": 252 + }, + { + "epoch": 0.07552238805970149, + "grad_norm": 0.8750135247666223, + "learning_rate": 4.973046474414145e-06, + "loss": 0.3936, + "step": 253 + }, + { + "epoch": 0.07582089552238806, + "grad_norm": 0.9200590185086436, + "learning_rate": 4.9726913053503285e-06, + "loss": 0.4523, + "step": 254 + }, + { + "epoch": 0.07611940298507462, + "grad_norm": 0.9256789347650409, + "learning_rate": 4.972333824380414e-06, + "loss": 0.4328, + "step": 255 + }, + { + "epoch": 0.0764179104477612, + "grad_norm": 0.8363482584889479, + "learning_rate": 4.9719740318386375e-06, + "loss": 0.3833, + "step": 256 + }, + { + "epoch": 0.07671641791044777, + "grad_norm": 0.8386638424623721, + "learning_rate": 4.971611928061395e-06, + "loss": 0.4052, + "step": 257 + }, + { + "epoch": 0.07701492537313433, + "grad_norm": 0.7996227547339989, + "learning_rate": 4.9712475133872455e-06, + "loss": 0.3646, + "step": 258 + }, + { + "epoch": 0.0773134328358209, + "grad_norm": 0.8768980961009939, + "learning_rate": 4.970880788156906e-06, + "loss": 0.3561, + "step": 259 + }, + { + "epoch": 0.07761194029850746, + "grad_norm": 0.8551248770367569, + "learning_rate": 4.97051175271326e-06, + "loss": 0.3774, + "step": 260 + }, + { + "epoch": 0.07791044776119403, + "grad_norm": 0.8253804217485359, + "learning_rate": 4.970140407401343e-06, + "loss": 0.3577, + "step": 261 + }, + { + "epoch": 0.07820895522388059, + "grad_norm": 0.8907747036111339, + "learning_rate": 4.969766752568355e-06, + "loss": 0.3611, + "step": 262 + }, + { + "epoch": 0.07850746268656716, + "grad_norm": 0.9257974072192369, + "learning_rate": 4.969390788563653e-06, + "loss": 0.4132, + "step": 263 + }, + { + "epoch": 0.07880597014925374, + "grad_norm": 0.8701098416781633, + "learning_rate": 4.969012515738757e-06, + "loss": 0.3757, + "step": 264 + }, + { + "epoch": 0.0791044776119403, + "grad_norm": 0.8589961683118461, + "learning_rate": 4.9686319344473395e-06, + "loss": 0.3971, + "step": 265 + }, + { + "epoch": 0.07940298507462687, + "grad_norm": 0.9341286772453976, + "learning_rate": 4.968249045045237e-06, + "loss": 0.429, + "step": 266 + }, + { + "epoch": 0.07970149253731343, + "grad_norm": 0.9938003598001335, + "learning_rate": 4.967863847890441e-06, + "loss": 0.396, + "step": 267 + }, + { + "epoch": 0.08, + "grad_norm": 0.9011497351262049, + "learning_rate": 4.9674763433431006e-06, + "loss": 0.4308, + "step": 268 + }, + { + "epoch": 0.08029850746268656, + "grad_norm": 0.977043100659228, + "learning_rate": 4.9670865317655245e-06, + "loss": 0.4258, + "step": 269 + }, + { + "epoch": 0.08059701492537313, + "grad_norm": 0.9059609196950806, + "learning_rate": 4.966694413522177e-06, + "loss": 0.4267, + "step": 270 + }, + { + "epoch": 0.0808955223880597, + "grad_norm": 1.0078844426437794, + "learning_rate": 4.966299988979678e-06, + "loss": 0.3847, + "step": 271 + }, + { + "epoch": 0.08119402985074627, + "grad_norm": 0.9703165705523532, + "learning_rate": 4.965903258506806e-06, + "loss": 0.4097, + "step": 272 + }, + { + "epoch": 0.08149253731343284, + "grad_norm": 0.8870735512921978, + "learning_rate": 4.965504222474494e-06, + "loss": 0.3928, + "step": 273 + }, + { + "epoch": 0.0817910447761194, + "grad_norm": 0.8739388689016976, + "learning_rate": 4.96510288125583e-06, + "loss": 0.3731, + "step": 274 + }, + { + "epoch": 0.08208955223880597, + "grad_norm": 1.0212824714114637, + "learning_rate": 4.9646992352260595e-06, + "loss": 0.3795, + "step": 275 + }, + { + "epoch": 0.08238805970149254, + "grad_norm": 0.8754845959865762, + "learning_rate": 4.964293284762581e-06, + "loss": 0.3911, + "step": 276 + }, + { + "epoch": 0.0826865671641791, + "grad_norm": 0.8698613925405617, + "learning_rate": 4.9638850302449485e-06, + "loss": 0.3532, + "step": 277 + }, + { + "epoch": 0.08298507462686568, + "grad_norm": 0.9357074185617845, + "learning_rate": 4.96347447205487e-06, + "loss": 0.4101, + "step": 278 + }, + { + "epoch": 0.08328358208955224, + "grad_norm": 0.8472354754739433, + "learning_rate": 4.963061610576207e-06, + "loss": 0.4406, + "step": 279 + }, + { + "epoch": 0.08358208955223881, + "grad_norm": 0.8564250146540285, + "learning_rate": 4.962646446194977e-06, + "loss": 0.3793, + "step": 280 + }, + { + "epoch": 0.08388059701492537, + "grad_norm": 0.8857581051447966, + "learning_rate": 4.962228979299345e-06, + "loss": 0.3822, + "step": 281 + }, + { + "epoch": 0.08417910447761194, + "grad_norm": 0.890998048990312, + "learning_rate": 4.961809210279634e-06, + "loss": 0.3944, + "step": 282 + }, + { + "epoch": 0.08447761194029851, + "grad_norm": 0.8601180120292999, + "learning_rate": 4.9613871395283195e-06, + "loss": 0.4009, + "step": 283 + }, + { + "epoch": 0.08477611940298507, + "grad_norm": 0.8277195700046859, + "learning_rate": 4.960962767440026e-06, + "loss": 0.3564, + "step": 284 + }, + { + "epoch": 0.08507462686567165, + "grad_norm": 0.8423871373655928, + "learning_rate": 4.96053609441153e-06, + "loss": 0.3747, + "step": 285 + }, + { + "epoch": 0.0853731343283582, + "grad_norm": 0.9407464456549731, + "learning_rate": 4.960107120841762e-06, + "loss": 0.3858, + "step": 286 + }, + { + "epoch": 0.08567164179104478, + "grad_norm": 0.9140065601660698, + "learning_rate": 4.9596758471318e-06, + "loss": 0.3961, + "step": 287 + }, + { + "epoch": 0.08597014925373134, + "grad_norm": 0.858838528185795, + "learning_rate": 4.959242273684878e-06, + "loss": 0.3915, + "step": 288 + }, + { + "epoch": 0.08626865671641791, + "grad_norm": 0.9199463736666107, + "learning_rate": 4.958806400906372e-06, + "loss": 0.4275, + "step": 289 + }, + { + "epoch": 0.08656716417910448, + "grad_norm": 0.9243014960259777, + "learning_rate": 4.958368229203816e-06, + "loss": 0.3851, + "step": 290 + }, + { + "epoch": 0.08686567164179104, + "grad_norm": 0.9091623645910903, + "learning_rate": 4.957927758986888e-06, + "loss": 0.3957, + "step": 291 + }, + { + "epoch": 0.08716417910447762, + "grad_norm": 0.8915385371700681, + "learning_rate": 4.9574849906674174e-06, + "loss": 0.4437, + "step": 292 + }, + { + "epoch": 0.08746268656716417, + "grad_norm": 0.8934354732288062, + "learning_rate": 4.957039924659382e-06, + "loss": 0.358, + "step": 293 + }, + { + "epoch": 0.08776119402985075, + "grad_norm": 0.9061503598549887, + "learning_rate": 4.956592561378907e-06, + "loss": 0.3692, + "step": 294 + }, + { + "epoch": 0.0880597014925373, + "grad_norm": 0.8830535399018076, + "learning_rate": 4.956142901244268e-06, + "loss": 0.4223, + "step": 295 + }, + { + "epoch": 0.08835820895522388, + "grad_norm": 0.9413406518046492, + "learning_rate": 4.955690944675882e-06, + "loss": 0.3859, + "step": 296 + }, + { + "epoch": 0.08865671641791045, + "grad_norm": 0.8947764680217752, + "learning_rate": 4.955236692096324e-06, + "loss": 0.3682, + "step": 297 + }, + { + "epoch": 0.08895522388059701, + "grad_norm": 0.8405209768536294, + "learning_rate": 4.954780143930303e-06, + "loss": 0.3826, + "step": 298 + }, + { + "epoch": 0.08925373134328358, + "grad_norm": 0.9396234783746453, + "learning_rate": 4.954321300604683e-06, + "loss": 0.4112, + "step": 299 + }, + { + "epoch": 0.08955223880597014, + "grad_norm": 0.9114411631804026, + "learning_rate": 4.953860162548472e-06, + "loss": 0.3612, + "step": 300 + }, + { + "epoch": 0.08985074626865672, + "grad_norm": 0.8817713479006289, + "learning_rate": 4.953396730192821e-06, + "loss": 0.4209, + "step": 301 + }, + { + "epoch": 0.09014925373134329, + "grad_norm": 0.8726071936119882, + "learning_rate": 4.952931003971029e-06, + "loss": 0.4155, + "step": 302 + }, + { + "epoch": 0.09044776119402985, + "grad_norm": 0.9048765061805111, + "learning_rate": 4.952462984318539e-06, + "loss": 0.3896, + "step": 303 + }, + { + "epoch": 0.09074626865671642, + "grad_norm": 0.9804012864737665, + "learning_rate": 4.9519926716729376e-06, + "loss": 0.4211, + "step": 304 + }, + { + "epoch": 0.09104477611940298, + "grad_norm": 0.8873017745256736, + "learning_rate": 4.951520066473955e-06, + "loss": 0.4544, + "step": 305 + }, + { + "epoch": 0.09134328358208955, + "grad_norm": 0.8062695091689972, + "learning_rate": 4.951045169163467e-06, + "loss": 0.3992, + "step": 306 + }, + { + "epoch": 0.09164179104477611, + "grad_norm": 0.8777340484612656, + "learning_rate": 4.950567980185489e-06, + "loss": 0.4128, + "step": 307 + }, + { + "epoch": 0.09194029850746269, + "grad_norm": 0.9366219952901376, + "learning_rate": 4.950088499986183e-06, + "loss": 0.3942, + "step": 308 + }, + { + "epoch": 0.09223880597014926, + "grad_norm": 0.8144148378243571, + "learning_rate": 4.949606729013851e-06, + "loss": 0.3958, + "step": 309 + }, + { + "epoch": 0.09253731343283582, + "grad_norm": 0.8968060502801483, + "learning_rate": 4.949122667718935e-06, + "loss": 0.406, + "step": 310 + }, + { + "epoch": 0.09283582089552239, + "grad_norm": 0.931747156022598, + "learning_rate": 4.948636316554023e-06, + "loss": 0.3839, + "step": 311 + }, + { + "epoch": 0.09313432835820895, + "grad_norm": 0.7978130697742478, + "learning_rate": 4.948147675973841e-06, + "loss": 0.3653, + "step": 312 + }, + { + "epoch": 0.09343283582089552, + "grad_norm": 0.7934615187323222, + "learning_rate": 4.947656746435255e-06, + "loss": 0.3813, + "step": 313 + }, + { + "epoch": 0.09373134328358208, + "grad_norm": 0.9589131627871469, + "learning_rate": 4.947163528397273e-06, + "loss": 0.469, + "step": 314 + }, + { + "epoch": 0.09402985074626866, + "grad_norm": 0.8464181634763774, + "learning_rate": 4.946668022321042e-06, + "loss": 0.3979, + "step": 315 + }, + { + "epoch": 0.09432835820895523, + "grad_norm": 0.8899369392024041, + "learning_rate": 4.946170228669847e-06, + "loss": 0.3292, + "step": 316 + }, + { + "epoch": 0.09462686567164179, + "grad_norm": 0.8553955124203195, + "learning_rate": 4.9456701479091155e-06, + "loss": 0.3925, + "step": 317 + }, + { + "epoch": 0.09492537313432836, + "grad_norm": 0.8606099277635237, + "learning_rate": 4.945167780506407e-06, + "loss": 0.433, + "step": 318 + }, + { + "epoch": 0.09522388059701492, + "grad_norm": 0.907825007701508, + "learning_rate": 4.944663126931426e-06, + "loss": 0.3645, + "step": 319 + }, + { + "epoch": 0.0955223880597015, + "grad_norm": 0.8612034752290371, + "learning_rate": 4.94415618765601e-06, + "loss": 0.3607, + "step": 320 + }, + { + "epoch": 0.09582089552238807, + "grad_norm": 0.9069258389724871, + "learning_rate": 4.943646963154134e-06, + "loss": 0.402, + "step": 321 + }, + { + "epoch": 0.09611940298507463, + "grad_norm": 0.9862506026387452, + "learning_rate": 4.943135453901911e-06, + "loss": 0.4424, + "step": 322 + }, + { + "epoch": 0.0964179104477612, + "grad_norm": 0.9122656478208079, + "learning_rate": 4.942621660377592e-06, + "loss": 0.4595, + "step": 323 + }, + { + "epoch": 0.09671641791044776, + "grad_norm": 0.9125176637122425, + "learning_rate": 4.942105583061558e-06, + "loss": 0.3824, + "step": 324 + }, + { + "epoch": 0.09701492537313433, + "grad_norm": 0.8593735642495912, + "learning_rate": 4.941587222436331e-06, + "loss": 0.3785, + "step": 325 + }, + { + "epoch": 0.09731343283582089, + "grad_norm": 0.82674079067205, + "learning_rate": 4.941066578986565e-06, + "loss": 0.3416, + "step": 326 + }, + { + "epoch": 0.09761194029850746, + "grad_norm": 0.9689144745422055, + "learning_rate": 4.940543653199049e-06, + "loss": 0.3368, + "step": 327 + }, + { + "epoch": 0.09791044776119404, + "grad_norm": 0.9277054433752907, + "learning_rate": 4.940018445562704e-06, + "loss": 0.3761, + "step": 328 + }, + { + "epoch": 0.0982089552238806, + "grad_norm": 0.7578752447120486, + "learning_rate": 4.939490956568589e-06, + "loss": 0.3509, + "step": 329 + }, + { + "epoch": 0.09850746268656717, + "grad_norm": 0.8298981698879738, + "learning_rate": 4.938961186709893e-06, + "loss": 0.3969, + "step": 330 + }, + { + "epoch": 0.09880597014925373, + "grad_norm": 0.9238719788718148, + "learning_rate": 4.938429136481936e-06, + "loss": 0.4294, + "step": 331 + }, + { + "epoch": 0.0991044776119403, + "grad_norm": 0.8589172708753758, + "learning_rate": 4.937894806382173e-06, + "loss": 0.4139, + "step": 332 + }, + { + "epoch": 0.09940298507462686, + "grad_norm": 0.8585178801953061, + "learning_rate": 4.937358196910191e-06, + "loss": 0.395, + "step": 333 + }, + { + "epoch": 0.09970149253731343, + "grad_norm": 0.8099539599389111, + "learning_rate": 4.936819308567705e-06, + "loss": 0.3669, + "step": 334 + }, + { + "epoch": 0.1, + "grad_norm": 0.839470127485901, + "learning_rate": 4.9362781418585635e-06, + "loss": 0.3418, + "step": 335 + }, + { + "epoch": 0.10029850746268656, + "grad_norm": 0.9244240560709288, + "learning_rate": 4.9357346972887425e-06, + "loss": 0.3656, + "step": 336 + }, + { + "epoch": 0.10059701492537314, + "grad_norm": 0.8543828203767136, + "learning_rate": 4.935188975366352e-06, + "loss": 0.3853, + "step": 337 + }, + { + "epoch": 0.1008955223880597, + "grad_norm": 0.9403997055915649, + "learning_rate": 4.934640976601627e-06, + "loss": 0.4266, + "step": 338 + }, + { + "epoch": 0.10119402985074627, + "grad_norm": 0.8656569961847842, + "learning_rate": 4.934090701506933e-06, + "loss": 0.3519, + "step": 339 + }, + { + "epoch": 0.10149253731343283, + "grad_norm": 1.0259990423516157, + "learning_rate": 4.9335381505967635e-06, + "loss": 0.3739, + "step": 340 + }, + { + "epoch": 0.1017910447761194, + "grad_norm": 0.9184253012427269, + "learning_rate": 4.932983324387742e-06, + "loss": 0.3891, + "step": 341 + }, + { + "epoch": 0.10208955223880598, + "grad_norm": 0.8855470935863624, + "learning_rate": 4.932426223398615e-06, + "loss": 0.4003, + "step": 342 + }, + { + "epoch": 0.10238805970149253, + "grad_norm": 0.8338863353994163, + "learning_rate": 4.9318668481502604e-06, + "loss": 0.3808, + "step": 343 + }, + { + "epoch": 0.10268656716417911, + "grad_norm": 0.8659855930783819, + "learning_rate": 4.93130519916568e-06, + "loss": 0.4153, + "step": 344 + }, + { + "epoch": 0.10298507462686567, + "grad_norm": 0.9328940913851155, + "learning_rate": 4.930741276970001e-06, + "loss": 0.4228, + "step": 345 + }, + { + "epoch": 0.10328358208955224, + "grad_norm": 0.9559469401085737, + "learning_rate": 4.930175082090477e-06, + "loss": 0.4171, + "step": 346 + }, + { + "epoch": 0.10358208955223881, + "grad_norm": 0.8361138110258766, + "learning_rate": 4.929606615056488e-06, + "loss": 0.3968, + "step": 347 + }, + { + "epoch": 0.10388059701492537, + "grad_norm": 0.8329215393697887, + "learning_rate": 4.929035876399535e-06, + "loss": 0.3942, + "step": 348 + }, + { + "epoch": 0.10417910447761194, + "grad_norm": 0.8675645842119557, + "learning_rate": 4.9284628666532455e-06, + "loss": 0.3621, + "step": 349 + }, + { + "epoch": 0.1044776119402985, + "grad_norm": 0.8507052675194776, + "learning_rate": 4.927887586353369e-06, + "loss": 0.4286, + "step": 350 + }, + { + "epoch": 0.10477611940298508, + "grad_norm": 0.837156171946447, + "learning_rate": 4.92731003603778e-06, + "loss": 0.3712, + "step": 351 + }, + { + "epoch": 0.10507462686567164, + "grad_norm": 0.9356486848372598, + "learning_rate": 4.926730216246472e-06, + "loss": 0.3913, + "step": 352 + }, + { + "epoch": 0.10537313432835821, + "grad_norm": 0.8223936751125491, + "learning_rate": 4.926148127521565e-06, + "loss": 0.3975, + "step": 353 + }, + { + "epoch": 0.10567164179104478, + "grad_norm": 0.7881227208588917, + "learning_rate": 4.925563770407295e-06, + "loss": 0.3961, + "step": 354 + }, + { + "epoch": 0.10597014925373134, + "grad_norm": 0.8453911169822759, + "learning_rate": 4.924977145450023e-06, + "loss": 0.3694, + "step": 355 + }, + { + "epoch": 0.10626865671641791, + "grad_norm": 0.8734420055725053, + "learning_rate": 4.924388253198229e-06, + "loss": 0.3865, + "step": 356 + }, + { + "epoch": 0.10656716417910447, + "grad_norm": 0.9201290119245594, + "learning_rate": 4.923797094202514e-06, + "loss": 0.4278, + "step": 357 + }, + { + "epoch": 0.10686567164179105, + "grad_norm": 0.9389093005855279, + "learning_rate": 4.923203669015594e-06, + "loss": 0.3665, + "step": 358 + }, + { + "epoch": 0.1071641791044776, + "grad_norm": 0.9042202134139647, + "learning_rate": 4.92260797819231e-06, + "loss": 0.3924, + "step": 359 + }, + { + "epoch": 0.10746268656716418, + "grad_norm": 0.8657123819229612, + "learning_rate": 4.922010022289618e-06, + "loss": 0.3972, + "step": 360 + }, + { + "epoch": 0.10776119402985075, + "grad_norm": 0.829479673932315, + "learning_rate": 4.921409801866591e-06, + "loss": 0.4135, + "step": 361 + }, + { + "epoch": 0.10805970149253731, + "grad_norm": 0.8821755114348971, + "learning_rate": 4.920807317484422e-06, + "loss": 0.3944, + "step": 362 + }, + { + "epoch": 0.10835820895522388, + "grad_norm": 0.8948274549142421, + "learning_rate": 4.920202569706418e-06, + "loss": 0.3826, + "step": 363 + }, + { + "epoch": 0.10865671641791044, + "grad_norm": 0.8652728701574908, + "learning_rate": 4.919595559098003e-06, + "loss": 0.3766, + "step": 364 + }, + { + "epoch": 0.10895522388059702, + "grad_norm": 0.9092751007745377, + "learning_rate": 4.9189862862267205e-06, + "loss": 0.4034, + "step": 365 + }, + { + "epoch": 0.10925373134328359, + "grad_norm": 0.9254208863560162, + "learning_rate": 4.918374751662221e-06, + "loss": 0.4359, + "step": 366 + }, + { + "epoch": 0.10955223880597015, + "grad_norm": 0.8168960720647488, + "learning_rate": 4.917760955976277e-06, + "loss": 0.4363, + "step": 367 + }, + { + "epoch": 0.10985074626865672, + "grad_norm": 0.830653386436508, + "learning_rate": 4.917144899742773e-06, + "loss": 0.3931, + "step": 368 + }, + { + "epoch": 0.11014925373134328, + "grad_norm": 0.840070982853219, + "learning_rate": 4.916526583537705e-06, + "loss": 0.3493, + "step": 369 + }, + { + "epoch": 0.11044776119402985, + "grad_norm": 0.979140168040529, + "learning_rate": 4.915906007939184e-06, + "loss": 0.3741, + "step": 370 + }, + { + "epoch": 0.11074626865671641, + "grad_norm": 0.9450117540952754, + "learning_rate": 4.915283173527434e-06, + "loss": 0.4575, + "step": 371 + }, + { + "epoch": 0.11104477611940299, + "grad_norm": 0.877577439309941, + "learning_rate": 4.9146580808847896e-06, + "loss": 0.4069, + "step": 372 + }, + { + "epoch": 0.11134328358208956, + "grad_norm": 0.8452549449469758, + "learning_rate": 4.9140307305956964e-06, + "loss": 0.3521, + "step": 373 + }, + { + "epoch": 0.11164179104477612, + "grad_norm": 0.8406721896093431, + "learning_rate": 4.913401123246713e-06, + "loss": 0.3441, + "step": 374 + }, + { + "epoch": 0.11194029850746269, + "grad_norm": 0.7975661106565912, + "learning_rate": 4.912769259426505e-06, + "loss": 0.3256, + "step": 375 + }, + { + "epoch": 0.11223880597014925, + "grad_norm": 0.8427317452222252, + "learning_rate": 4.912135139725851e-06, + "loss": 0.3885, + "step": 376 + }, + { + "epoch": 0.11253731343283582, + "grad_norm": 0.9053683742245764, + "learning_rate": 4.9114987647376374e-06, + "loss": 0.4163, + "step": 377 + }, + { + "epoch": 0.11283582089552238, + "grad_norm": 0.8630516315249676, + "learning_rate": 4.910860135056859e-06, + "loss": 0.3739, + "step": 378 + }, + { + "epoch": 0.11313432835820895, + "grad_norm": 0.9602461144012868, + "learning_rate": 4.91021925128062e-06, + "loss": 0.3943, + "step": 379 + }, + { + "epoch": 0.11343283582089553, + "grad_norm": 0.9141865424324758, + "learning_rate": 4.909576114008129e-06, + "loss": 0.4037, + "step": 380 + }, + { + "epoch": 0.11373134328358209, + "grad_norm": 0.8548880538979818, + "learning_rate": 4.908930723840706e-06, + "loss": 0.3818, + "step": 381 + }, + { + "epoch": 0.11402985074626866, + "grad_norm": 1.0328188865088241, + "learning_rate": 4.908283081381773e-06, + "loss": 0.4682, + "step": 382 + }, + { + "epoch": 0.11432835820895522, + "grad_norm": 0.9238076147148401, + "learning_rate": 4.907633187236861e-06, + "loss": 0.38, + "step": 383 + }, + { + "epoch": 0.11462686567164179, + "grad_norm": 0.8455481748001109, + "learning_rate": 4.906981042013605e-06, + "loss": 0.3761, + "step": 384 + }, + { + "epoch": 0.11492537313432835, + "grad_norm": 0.9073460304019739, + "learning_rate": 4.9063266463217466e-06, + "loss": 0.4005, + "step": 385 + }, + { + "epoch": 0.11522388059701492, + "grad_norm": 0.8931846505792465, + "learning_rate": 4.905670000773126e-06, + "loss": 0.4288, + "step": 386 + }, + { + "epoch": 0.1155223880597015, + "grad_norm": 0.851731424120729, + "learning_rate": 4.905011105981694e-06, + "loss": 0.4044, + "step": 387 + }, + { + "epoch": 0.11582089552238806, + "grad_norm": 0.9161247111493643, + "learning_rate": 4.9043499625635e-06, + "loss": 0.3995, + "step": 388 + }, + { + "epoch": 0.11611940298507463, + "grad_norm": 0.8797022877561539, + "learning_rate": 4.903686571136697e-06, + "loss": 0.3964, + "step": 389 + }, + { + "epoch": 0.11641791044776119, + "grad_norm": 0.8500731359455521, + "learning_rate": 4.903020932321541e-06, + "loss": 0.3609, + "step": 390 + }, + { + "epoch": 0.11671641791044776, + "grad_norm": 0.9945238156321736, + "learning_rate": 4.9023530467403856e-06, + "loss": 0.3801, + "step": 391 + }, + { + "epoch": 0.11701492537313433, + "grad_norm": 0.9136420198050205, + "learning_rate": 4.901682915017689e-06, + "loss": 0.3566, + "step": 392 + }, + { + "epoch": 0.1173134328358209, + "grad_norm": 0.8189636216415471, + "learning_rate": 4.901010537780009e-06, + "loss": 0.3132, + "step": 393 + }, + { + "epoch": 0.11761194029850747, + "grad_norm": 0.9844874224310621, + "learning_rate": 4.900335915656e-06, + "loss": 0.4271, + "step": 394 + }, + { + "epoch": 0.11791044776119403, + "grad_norm": 1.0487555124306993, + "learning_rate": 4.899659049276418e-06, + "loss": 0.4064, + "step": 395 + }, + { + "epoch": 0.1182089552238806, + "grad_norm": 0.7812485151038709, + "learning_rate": 4.898979939274118e-06, + "loss": 0.2882, + "step": 396 + }, + { + "epoch": 0.11850746268656716, + "grad_norm": 0.8336948833618268, + "learning_rate": 4.898298586284049e-06, + "loss": 0.3984, + "step": 397 + }, + { + "epoch": 0.11880597014925373, + "grad_norm": 0.8979301268641166, + "learning_rate": 4.897614990943261e-06, + "loss": 0.3919, + "step": 398 + }, + { + "epoch": 0.1191044776119403, + "grad_norm": 0.8427489040474724, + "learning_rate": 4.896929153890898e-06, + "loss": 0.3866, + "step": 399 + }, + { + "epoch": 0.11940298507462686, + "grad_norm": 0.8211103743103699, + "learning_rate": 4.896241075768201e-06, + "loss": 0.3963, + "step": 400 + }, + { + "epoch": 0.11970149253731344, + "grad_norm": 0.8445033296486258, + "learning_rate": 4.895550757218507e-06, + "loss": 0.3928, + "step": 401 + }, + { + "epoch": 0.12, + "grad_norm": 0.8961945171020185, + "learning_rate": 4.894858198887246e-06, + "loss": 0.3794, + "step": 402 + }, + { + "epoch": 0.12029850746268657, + "grad_norm": 0.9818424108109866, + "learning_rate": 4.8941634014219454e-06, + "loss": 0.39, + "step": 403 + }, + { + "epoch": 0.12059701492537313, + "grad_norm": 0.8104054934733214, + "learning_rate": 4.8934663654722205e-06, + "loss": 0.2999, + "step": 404 + }, + { + "epoch": 0.1208955223880597, + "grad_norm": 0.8316791108804557, + "learning_rate": 4.892767091689786e-06, + "loss": 0.3685, + "step": 405 + }, + { + "epoch": 0.12119402985074627, + "grad_norm": 0.8310506029399883, + "learning_rate": 4.892065580728444e-06, + "loss": 0.3703, + "step": 406 + }, + { + "epoch": 0.12149253731343283, + "grad_norm": 0.7567215169549178, + "learning_rate": 4.8913618332440906e-06, + "loss": 0.3268, + "step": 407 + }, + { + "epoch": 0.1217910447761194, + "grad_norm": 0.8628920449752951, + "learning_rate": 4.890655849894713e-06, + "loss": 0.3912, + "step": 408 + }, + { + "epoch": 0.12208955223880597, + "grad_norm": 0.910090152736456, + "learning_rate": 4.889947631340388e-06, + "loss": 0.4308, + "step": 409 + }, + { + "epoch": 0.12238805970149254, + "grad_norm": 0.8591509812041254, + "learning_rate": 4.889237178243283e-06, + "loss": 0.3958, + "step": 410 + }, + { + "epoch": 0.12268656716417911, + "grad_norm": 0.8618520589363041, + "learning_rate": 4.888524491267653e-06, + "loss": 0.4158, + "step": 411 + }, + { + "epoch": 0.12298507462686567, + "grad_norm": 0.8884599985876102, + "learning_rate": 4.887809571079845e-06, + "loss": 0.3612, + "step": 412 + }, + { + "epoch": 0.12328358208955224, + "grad_norm": 0.8985780635048177, + "learning_rate": 4.88709241834829e-06, + "loss": 0.3824, + "step": 413 + }, + { + "epoch": 0.1235820895522388, + "grad_norm": 0.8673597426156886, + "learning_rate": 4.88637303374351e-06, + "loss": 0.3911, + "step": 414 + }, + { + "epoch": 0.12388059701492538, + "grad_norm": 0.8761841886398808, + "learning_rate": 4.885651417938112e-06, + "loss": 0.3596, + "step": 415 + }, + { + "epoch": 0.12417910447761193, + "grad_norm": 0.8576660751720805, + "learning_rate": 4.884927571606788e-06, + "loss": 0.4177, + "step": 416 + }, + { + "epoch": 0.12447761194029851, + "grad_norm": 0.804474354480795, + "learning_rate": 4.884201495426317e-06, + "loss": 0.3581, + "step": 417 + }, + { + "epoch": 0.12477611940298508, + "grad_norm": 0.8519755633647428, + "learning_rate": 4.883473190075562e-06, + "loss": 0.4146, + "step": 418 + }, + { + "epoch": 0.12507462686567164, + "grad_norm": 0.864412561136686, + "learning_rate": 4.882742656235474e-06, + "loss": 0.3788, + "step": 419 + }, + { + "epoch": 0.1253731343283582, + "grad_norm": 0.88627834971777, + "learning_rate": 4.88200989458908e-06, + "loss": 0.4063, + "step": 420 + }, + { + "epoch": 0.12567164179104479, + "grad_norm": 0.8104996818816044, + "learning_rate": 4.881274905821496e-06, + "loss": 0.4179, + "step": 421 + }, + { + "epoch": 0.12597014925373134, + "grad_norm": 0.7961290288194648, + "learning_rate": 4.88053769061992e-06, + "loss": 0.3743, + "step": 422 + }, + { + "epoch": 0.1262686567164179, + "grad_norm": 0.8727255096641454, + "learning_rate": 4.879798249673628e-06, + "loss": 0.4076, + "step": 423 + }, + { + "epoch": 0.1265671641791045, + "grad_norm": 0.805037045243907, + "learning_rate": 4.87905658367398e-06, + "loss": 0.3782, + "step": 424 + }, + { + "epoch": 0.12686567164179105, + "grad_norm": 0.8467074530308887, + "learning_rate": 4.878312693314417e-06, + "loss": 0.3981, + "step": 425 + }, + { + "epoch": 0.1271641791044776, + "grad_norm": 0.8591176817546119, + "learning_rate": 4.877566579290456e-06, + "loss": 0.406, + "step": 426 + }, + { + "epoch": 0.12746268656716417, + "grad_norm": 0.7675194550534261, + "learning_rate": 4.876818242299697e-06, + "loss": 0.3041, + "step": 427 + }, + { + "epoch": 0.12776119402985076, + "grad_norm": 0.974908306841169, + "learning_rate": 4.876067683041817e-06, + "loss": 0.3846, + "step": 428 + }, + { + "epoch": 0.12805970149253731, + "grad_norm": 0.9283777465319998, + "learning_rate": 4.875314902218569e-06, + "loss": 0.3893, + "step": 429 + }, + { + "epoch": 0.12835820895522387, + "grad_norm": 0.8466250701660518, + "learning_rate": 4.874559900533786e-06, + "loss": 0.3715, + "step": 430 + }, + { + "epoch": 0.12865671641791046, + "grad_norm": 0.9436206286256429, + "learning_rate": 4.8738026786933765e-06, + "loss": 0.3827, + "step": 431 + }, + { + "epoch": 0.12895522388059702, + "grad_norm": 0.8642639983887797, + "learning_rate": 4.8730432374053245e-06, + "loss": 0.4037, + "step": 432 + }, + { + "epoch": 0.12925373134328358, + "grad_norm": 0.8620504916338158, + "learning_rate": 4.872281577379688e-06, + "loss": 0.4308, + "step": 433 + }, + { + "epoch": 0.12955223880597014, + "grad_norm": 0.9813776269945593, + "learning_rate": 4.8715176993286e-06, + "loss": 0.4293, + "step": 434 + }, + { + "epoch": 0.12985074626865672, + "grad_norm": 0.8634148297447023, + "learning_rate": 4.8707516039662705e-06, + "loss": 0.3476, + "step": 435 + }, + { + "epoch": 0.13014925373134328, + "grad_norm": 0.8942167636606891, + "learning_rate": 4.8699832920089785e-06, + "loss": 0.3772, + "step": 436 + }, + { + "epoch": 0.13044776119402984, + "grad_norm": 0.8696237331815483, + "learning_rate": 4.869212764175076e-06, + "loss": 0.384, + "step": 437 + }, + { + "epoch": 0.13074626865671643, + "grad_norm": 0.8296034400995033, + "learning_rate": 4.8684400211849895e-06, + "loss": 0.3862, + "step": 438 + }, + { + "epoch": 0.131044776119403, + "grad_norm": 0.9402286904796409, + "learning_rate": 4.867665063761212e-06, + "loss": 0.3838, + "step": 439 + }, + { + "epoch": 0.13134328358208955, + "grad_norm": 0.8227141749687872, + "learning_rate": 4.866887892628314e-06, + "loss": 0.381, + "step": 440 + }, + { + "epoch": 0.1316417910447761, + "grad_norm": 0.8661103190250566, + "learning_rate": 4.866108508512929e-06, + "loss": 0.395, + "step": 441 + }, + { + "epoch": 0.1319402985074627, + "grad_norm": 0.8814800070280554, + "learning_rate": 4.865326912143762e-06, + "loss": 0.4317, + "step": 442 + }, + { + "epoch": 0.13223880597014925, + "grad_norm": 0.803476919813946, + "learning_rate": 4.864543104251587e-06, + "loss": 0.3822, + "step": 443 + }, + { + "epoch": 0.1325373134328358, + "grad_norm": 0.8519115345361534, + "learning_rate": 4.863757085569246e-06, + "loss": 0.4007, + "step": 444 + }, + { + "epoch": 0.1328358208955224, + "grad_norm": 0.8983953481126902, + "learning_rate": 4.862968856831646e-06, + "loss": 0.388, + "step": 445 + }, + { + "epoch": 0.13313432835820896, + "grad_norm": 0.7938477876103528, + "learning_rate": 4.862178418775763e-06, + "loss": 0.3488, + "step": 446 + }, + { + "epoch": 0.13343283582089552, + "grad_norm": 0.8324718218816302, + "learning_rate": 4.861385772140636e-06, + "loss": 0.3509, + "step": 447 + }, + { + "epoch": 0.13373134328358208, + "grad_norm": 0.8673532215135669, + "learning_rate": 4.86059091766737e-06, + "loss": 0.3997, + "step": 448 + }, + { + "epoch": 0.13402985074626866, + "grad_norm": 0.8720599760670272, + "learning_rate": 4.859793856099138e-06, + "loss": 0.4078, + "step": 449 + }, + { + "epoch": 0.13432835820895522, + "grad_norm": 0.8972789828786194, + "learning_rate": 4.858994588181168e-06, + "loss": 0.3935, + "step": 450 + }, + { + "epoch": 0.13462686567164178, + "grad_norm": 0.8581455676420515, + "learning_rate": 4.85819311466076e-06, + "loss": 0.4133, + "step": 451 + }, + { + "epoch": 0.13492537313432837, + "grad_norm": 0.8099642650298081, + "learning_rate": 4.857389436287271e-06, + "loss": 0.3684, + "step": 452 + }, + { + "epoch": 0.13522388059701493, + "grad_norm": 0.80580267367791, + "learning_rate": 4.85658355381212e-06, + "loss": 0.3736, + "step": 453 + }, + { + "epoch": 0.1355223880597015, + "grad_norm": 0.9175924370164683, + "learning_rate": 4.855775467988788e-06, + "loss": 0.3663, + "step": 454 + }, + { + "epoch": 0.13582089552238805, + "grad_norm": 0.804865363972486, + "learning_rate": 4.854965179572816e-06, + "loss": 0.3669, + "step": 455 + }, + { + "epoch": 0.13611940298507463, + "grad_norm": 0.8793877846060576, + "learning_rate": 4.854152689321803e-06, + "loss": 0.3981, + "step": 456 + }, + { + "epoch": 0.1364179104477612, + "grad_norm": 0.83704531818067, + "learning_rate": 4.853337997995408e-06, + "loss": 0.3796, + "step": 457 + }, + { + "epoch": 0.13671641791044775, + "grad_norm": 0.8197161694891909, + "learning_rate": 4.852521106355348e-06, + "loss": 0.395, + "step": 458 + }, + { + "epoch": 0.13701492537313434, + "grad_norm": 0.9341284013808222, + "learning_rate": 4.851702015165396e-06, + "loss": 0.374, + "step": 459 + }, + { + "epoch": 0.1373134328358209, + "grad_norm": 0.9043457115872434, + "learning_rate": 4.850880725191383e-06, + "loss": 0.3687, + "step": 460 + }, + { + "epoch": 0.13761194029850746, + "grad_norm": 0.8185493546695799, + "learning_rate": 4.850057237201194e-06, + "loss": 0.3623, + "step": 461 + }, + { + "epoch": 0.13791044776119402, + "grad_norm": 0.8454515033830348, + "learning_rate": 4.849231551964771e-06, + "loss": 0.345, + "step": 462 + }, + { + "epoch": 0.1382089552238806, + "grad_norm": 0.9749776836104918, + "learning_rate": 4.848403670254111e-06, + "loss": 0.4182, + "step": 463 + }, + { + "epoch": 0.13850746268656716, + "grad_norm": 0.8514730507575938, + "learning_rate": 4.84757359284326e-06, + "loss": 0.3707, + "step": 464 + }, + { + "epoch": 0.13880597014925372, + "grad_norm": 0.8959722306249783, + "learning_rate": 4.846741320508323e-06, + "loss": 0.4077, + "step": 465 + }, + { + "epoch": 0.1391044776119403, + "grad_norm": 0.7560664033693268, + "learning_rate": 4.8459068540274525e-06, + "loss": 0.3622, + "step": 466 + }, + { + "epoch": 0.13940298507462687, + "grad_norm": 0.8636823785223218, + "learning_rate": 4.845070194180856e-06, + "loss": 0.3569, + "step": 467 + }, + { + "epoch": 0.13970149253731343, + "grad_norm": 0.8161586910729753, + "learning_rate": 4.844231341750787e-06, + "loss": 0.3693, + "step": 468 + }, + { + "epoch": 0.14, + "grad_norm": 0.8264271190333304, + "learning_rate": 4.843390297521556e-06, + "loss": 0.3826, + "step": 469 + }, + { + "epoch": 0.14029850746268657, + "grad_norm": 0.8329895252389455, + "learning_rate": 4.842547062279517e-06, + "loss": 0.3835, + "step": 470 + }, + { + "epoch": 0.14059701492537313, + "grad_norm": 0.8750985694869183, + "learning_rate": 4.841701636813074e-06, + "loss": 0.4365, + "step": 471 + }, + { + "epoch": 0.1408955223880597, + "grad_norm": 0.923999448212016, + "learning_rate": 4.84085402191268e-06, + "loss": 0.3355, + "step": 472 + }, + { + "epoch": 0.14119402985074628, + "grad_norm": 0.8593855485500195, + "learning_rate": 4.840004218370833e-06, + "loss": 0.3684, + "step": 473 + }, + { + "epoch": 0.14149253731343284, + "grad_norm": 0.9253486202985267, + "learning_rate": 4.83915222698208e-06, + "loss": 0.4351, + "step": 474 + }, + { + "epoch": 0.1417910447761194, + "grad_norm": 0.7704797438692463, + "learning_rate": 4.838298048543012e-06, + "loss": 0.3311, + "step": 475 + }, + { + "epoch": 0.14208955223880598, + "grad_norm": 0.84958326911992, + "learning_rate": 4.837441683852264e-06, + "loss": 0.3443, + "step": 476 + }, + { + "epoch": 0.14238805970149254, + "grad_norm": 0.9214398433351614, + "learning_rate": 4.8365831337105185e-06, + "loss": 0.3837, + "step": 477 + }, + { + "epoch": 0.1426865671641791, + "grad_norm": 0.9020552668429409, + "learning_rate": 4.835722398920496e-06, + "loss": 0.3726, + "step": 478 + }, + { + "epoch": 0.14298507462686566, + "grad_norm": 0.834953050440995, + "learning_rate": 4.834859480286963e-06, + "loss": 0.3459, + "step": 479 + }, + { + "epoch": 0.14328358208955225, + "grad_norm": 0.8287857134100892, + "learning_rate": 4.83399437861673e-06, + "loss": 0.3889, + "step": 480 + }, + { + "epoch": 0.1435820895522388, + "grad_norm": 0.8245206662676321, + "learning_rate": 4.833127094718643e-06, + "loss": 0.3667, + "step": 481 + }, + { + "epoch": 0.14388059701492537, + "grad_norm": 0.832866477275499, + "learning_rate": 4.832257629403592e-06, + "loss": 0.4004, + "step": 482 + }, + { + "epoch": 0.14417910447761195, + "grad_norm": 0.915872105196081, + "learning_rate": 4.8313859834845085e-06, + "loss": 0.4013, + "step": 483 + }, + { + "epoch": 0.1444776119402985, + "grad_norm": 0.9789956349355645, + "learning_rate": 4.830512157776357e-06, + "loss": 0.4411, + "step": 484 + }, + { + "epoch": 0.14477611940298507, + "grad_norm": 0.7690696697320152, + "learning_rate": 4.829636153096143e-06, + "loss": 0.3071, + "step": 485 + }, + { + "epoch": 0.14507462686567163, + "grad_norm": 0.9027976378763707, + "learning_rate": 4.828757970262913e-06, + "loss": 0.3413, + "step": 486 + }, + { + "epoch": 0.14537313432835822, + "grad_norm": 0.8300835301176763, + "learning_rate": 4.827877610097743e-06, + "loss": 0.3535, + "step": 487 + }, + { + "epoch": 0.14567164179104478, + "grad_norm": 0.8562294310178254, + "learning_rate": 4.826995073423749e-06, + "loss": 0.355, + "step": 488 + }, + { + "epoch": 0.14597014925373133, + "grad_norm": 0.8059680443715135, + "learning_rate": 4.826110361066084e-06, + "loss": 0.3198, + "step": 489 + }, + { + "epoch": 0.14626865671641792, + "grad_norm": 0.9642630011245373, + "learning_rate": 4.825223473851929e-06, + "loss": 0.4476, + "step": 490 + }, + { + "epoch": 0.14656716417910448, + "grad_norm": 0.925182517782558, + "learning_rate": 4.824334412610504e-06, + "loss": 0.3641, + "step": 491 + }, + { + "epoch": 0.14686567164179104, + "grad_norm": 0.8931425394201947, + "learning_rate": 4.823443178173058e-06, + "loss": 0.3963, + "step": 492 + }, + { + "epoch": 0.1471641791044776, + "grad_norm": 0.7616008631295983, + "learning_rate": 4.822549771372875e-06, + "loss": 0.3295, + "step": 493 + }, + { + "epoch": 0.1474626865671642, + "grad_norm": 0.9164279418898156, + "learning_rate": 4.821654193045268e-06, + "loss": 0.3954, + "step": 494 + }, + { + "epoch": 0.14776119402985075, + "grad_norm": 0.8242276478542254, + "learning_rate": 4.8207564440275816e-06, + "loss": 0.358, + "step": 495 + }, + { + "epoch": 0.1480597014925373, + "grad_norm": 0.8015482008189594, + "learning_rate": 4.819856525159187e-06, + "loss": 0.3724, + "step": 496 + }, + { + "epoch": 0.1483582089552239, + "grad_norm": 0.8798301252694813, + "learning_rate": 4.818954437281489e-06, + "loss": 0.3851, + "step": 497 + }, + { + "epoch": 0.14865671641791045, + "grad_norm": 0.8789684495124492, + "learning_rate": 4.818050181237916e-06, + "loss": 0.3659, + "step": 498 + }, + { + "epoch": 0.148955223880597, + "grad_norm": 0.7945015241329157, + "learning_rate": 4.817143757873927e-06, + "loss": 0.3572, + "step": 499 + }, + { + "epoch": 0.14925373134328357, + "grad_norm": 0.8407928444830756, + "learning_rate": 4.8162351680370046e-06, + "loss": 0.408, + "step": 500 + }, + { + "epoch": 0.14955223880597016, + "grad_norm": 0.8377445302981639, + "learning_rate": 4.815324412576659e-06, + "loss": 0.3594, + "step": 501 + }, + { + "epoch": 0.14985074626865671, + "grad_norm": 0.9360976258979777, + "learning_rate": 4.814411492344423e-06, + "loss": 0.384, + "step": 502 + }, + { + "epoch": 0.15014925373134327, + "grad_norm": 0.9337795927031363, + "learning_rate": 4.813496408193855e-06, + "loss": 0.407, + "step": 503 + }, + { + "epoch": 0.15044776119402986, + "grad_norm": 0.9407994815333237, + "learning_rate": 4.812579160980538e-06, + "loss": 0.3348, + "step": 504 + }, + { + "epoch": 0.15074626865671642, + "grad_norm": 0.9213462894334784, + "learning_rate": 4.8116597515620735e-06, + "loss": 0.3909, + "step": 505 + }, + { + "epoch": 0.15104477611940298, + "grad_norm": 0.8106559781114875, + "learning_rate": 4.810738180798089e-06, + "loss": 0.3667, + "step": 506 + }, + { + "epoch": 0.15134328358208957, + "grad_norm": 0.8651383542529051, + "learning_rate": 4.8098144495502295e-06, + "loss": 0.3689, + "step": 507 + }, + { + "epoch": 0.15164179104477613, + "grad_norm": 1.0153425024352598, + "learning_rate": 4.808888558682161e-06, + "loss": 0.3986, + "step": 508 + }, + { + "epoch": 0.15194029850746268, + "grad_norm": 0.8027998509692323, + "learning_rate": 4.80796050905957e-06, + "loss": 0.3671, + "step": 509 + }, + { + "epoch": 0.15223880597014924, + "grad_norm": 0.9698401353337921, + "learning_rate": 4.807030301550159e-06, + "loss": 0.3358, + "step": 510 + }, + { + "epoch": 0.15253731343283583, + "grad_norm": 0.8470582921938352, + "learning_rate": 4.806097937023652e-06, + "loss": 0.3692, + "step": 511 + }, + { + "epoch": 0.1528358208955224, + "grad_norm": 0.9235947242592816, + "learning_rate": 4.8051634163517825e-06, + "loss": 0.3458, + "step": 512 + }, + { + "epoch": 0.15313432835820895, + "grad_norm": 0.894327518789336, + "learning_rate": 4.8042267404083085e-06, + "loss": 0.4142, + "step": 513 + }, + { + "epoch": 0.15343283582089554, + "grad_norm": 0.9285145110926873, + "learning_rate": 4.803287910068997e-06, + "loss": 0.3749, + "step": 514 + }, + { + "epoch": 0.1537313432835821, + "grad_norm": 0.8506702933212649, + "learning_rate": 4.802346926211634e-06, + "loss": 0.3469, + "step": 515 + }, + { + "epoch": 0.15402985074626865, + "grad_norm": 0.8264106915519583, + "learning_rate": 4.8014037897160134e-06, + "loss": 0.4164, + "step": 516 + }, + { + "epoch": 0.1543283582089552, + "grad_norm": 0.855191956211793, + "learning_rate": 4.800458501463946e-06, + "loss": 0.3684, + "step": 517 + }, + { + "epoch": 0.1546268656716418, + "grad_norm": 0.8553683728972094, + "learning_rate": 4.7995110623392545e-06, + "loss": 0.3469, + "step": 518 + }, + { + "epoch": 0.15492537313432836, + "grad_norm": 0.8190621146939986, + "learning_rate": 4.798561473227769e-06, + "loss": 0.3669, + "step": 519 + }, + { + "epoch": 0.15522388059701492, + "grad_norm": 0.8060862576423745, + "learning_rate": 4.797609735017335e-06, + "loss": 0.3829, + "step": 520 + }, + { + "epoch": 0.1555223880597015, + "grad_norm": 0.8263221507606819, + "learning_rate": 4.796655848597803e-06, + "loss": 0.3787, + "step": 521 + }, + { + "epoch": 0.15582089552238806, + "grad_norm": 0.9818542527439001, + "learning_rate": 4.795699814861033e-06, + "loss": 0.3943, + "step": 522 + }, + { + "epoch": 0.15611940298507462, + "grad_norm": 0.9363390488612918, + "learning_rate": 4.7947416347008936e-06, + "loss": 0.3488, + "step": 523 + }, + { + "epoch": 0.15641791044776118, + "grad_norm": 0.8719501569051828, + "learning_rate": 4.793781309013261e-06, + "loss": 0.3037, + "step": 524 + }, + { + "epoch": 0.15671641791044777, + "grad_norm": 0.8188931174909418, + "learning_rate": 4.7928188386960155e-06, + "loss": 0.3433, + "step": 525 + }, + { + "epoch": 0.15701492537313433, + "grad_norm": 0.8576926049561543, + "learning_rate": 4.791854224649042e-06, + "loss": 0.4148, + "step": 526 + }, + { + "epoch": 0.1573134328358209, + "grad_norm": 0.9514097030300651, + "learning_rate": 4.7908874677742335e-06, + "loss": 0.4065, + "step": 527 + }, + { + "epoch": 0.15761194029850747, + "grad_norm": 0.9034660588508406, + "learning_rate": 4.789918568975483e-06, + "loss": 0.3681, + "step": 528 + }, + { + "epoch": 0.15791044776119403, + "grad_norm": 0.8634816650220751, + "learning_rate": 4.788947529158687e-06, + "loss": 0.3697, + "step": 529 + }, + { + "epoch": 0.1582089552238806, + "grad_norm": 0.9824567340256468, + "learning_rate": 4.787974349231745e-06, + "loss": 0.3842, + "step": 530 + }, + { + "epoch": 0.15850746268656715, + "grad_norm": 0.7947403079672669, + "learning_rate": 4.786999030104555e-06, + "loss": 0.3299, + "step": 531 + }, + { + "epoch": 0.15880597014925374, + "grad_norm": 0.8529781467405901, + "learning_rate": 4.786021572689019e-06, + "loss": 0.3872, + "step": 532 + }, + { + "epoch": 0.1591044776119403, + "grad_norm": 0.9209188488882705, + "learning_rate": 4.785041977899033e-06, + "loss": 0.3936, + "step": 533 + }, + { + "epoch": 0.15940298507462686, + "grad_norm": 0.885355457355636, + "learning_rate": 4.784060246650496e-06, + "loss": 0.3924, + "step": 534 + }, + { + "epoch": 0.15970149253731344, + "grad_norm": 0.9523084549565237, + "learning_rate": 4.783076379861304e-06, + "loss": 0.4318, + "step": 535 + }, + { + "epoch": 0.16, + "grad_norm": 0.8863570574039847, + "learning_rate": 4.782090378451345e-06, + "loss": 0.4157, + "step": 536 + }, + { + "epoch": 0.16029850746268656, + "grad_norm": 0.8117266215970601, + "learning_rate": 4.781102243342508e-06, + "loss": 0.3463, + "step": 537 + }, + { + "epoch": 0.16059701492537312, + "grad_norm": 0.8515951237304888, + "learning_rate": 4.780111975458677e-06, + "loss": 0.4112, + "step": 538 + }, + { + "epoch": 0.1608955223880597, + "grad_norm": 0.9510326848546506, + "learning_rate": 4.779119575725726e-06, + "loss": 0.4046, + "step": 539 + }, + { + "epoch": 0.16119402985074627, + "grad_norm": 0.8449453300506576, + "learning_rate": 4.7781250450715245e-06, + "loss": 0.356, + "step": 540 + }, + { + "epoch": 0.16149253731343283, + "grad_norm": 0.931947295073069, + "learning_rate": 4.7771283844259365e-06, + "loss": 0.3974, + "step": 541 + }, + { + "epoch": 0.1617910447761194, + "grad_norm": 0.874181026365032, + "learning_rate": 4.776129594720813e-06, + "loss": 0.3629, + "step": 542 + }, + { + "epoch": 0.16208955223880597, + "grad_norm": 0.855050176983507, + "learning_rate": 4.77512867689e-06, + "loss": 0.3853, + "step": 543 + }, + { + "epoch": 0.16238805970149253, + "grad_norm": 0.8923651345310537, + "learning_rate": 4.77412563186933e-06, + "loss": 0.4007, + "step": 544 + }, + { + "epoch": 0.1626865671641791, + "grad_norm": 0.9078676220956058, + "learning_rate": 4.7731204605966265e-06, + "loss": 0.3881, + "step": 545 + }, + { + "epoch": 0.16298507462686568, + "grad_norm": 0.7538192348742706, + "learning_rate": 4.7721131640116996e-06, + "loss": 0.3036, + "step": 546 + }, + { + "epoch": 0.16328358208955224, + "grad_norm": 0.8608203405755754, + "learning_rate": 4.771103743056348e-06, + "loss": 0.3457, + "step": 547 + }, + { + "epoch": 0.1635820895522388, + "grad_norm": 0.8212962040784945, + "learning_rate": 4.770092198674353e-06, + "loss": 0.3273, + "step": 548 + }, + { + "epoch": 0.16388059701492538, + "grad_norm": 0.9028550682650915, + "learning_rate": 4.769078531811487e-06, + "loss": 0.4282, + "step": 549 + }, + { + "epoch": 0.16417910447761194, + "grad_norm": 0.8884438521513933, + "learning_rate": 4.768062743415502e-06, + "loss": 0.3837, + "step": 550 + }, + { + "epoch": 0.1644776119402985, + "grad_norm": 0.876505852629387, + "learning_rate": 4.767044834436137e-06, + "loss": 0.3951, + "step": 551 + }, + { + "epoch": 0.1647761194029851, + "grad_norm": 0.7663670395688694, + "learning_rate": 4.76602480582511e-06, + "loss": 0.3629, + "step": 552 + }, + { + "epoch": 0.16507462686567165, + "grad_norm": 0.9554436497467657, + "learning_rate": 4.7650026585361255e-06, + "loss": 0.3752, + "step": 553 + }, + { + "epoch": 0.1653731343283582, + "grad_norm": 0.8328045712770886, + "learning_rate": 4.7639783935248635e-06, + "loss": 0.363, + "step": 554 + }, + { + "epoch": 0.16567164179104477, + "grad_norm": 1.3533426086417046, + "learning_rate": 4.762952011748988e-06, + "loss": 0.293, + "step": 555 + }, + { + "epoch": 0.16597014925373135, + "grad_norm": 0.8397180497457704, + "learning_rate": 4.761923514168142e-06, + "loss": 0.3803, + "step": 556 + }, + { + "epoch": 0.1662686567164179, + "grad_norm": 0.899522921671484, + "learning_rate": 4.760892901743944e-06, + "loss": 0.4168, + "step": 557 + }, + { + "epoch": 0.16656716417910447, + "grad_norm": 0.8900270204673798, + "learning_rate": 4.759860175439993e-06, + "loss": 0.4041, + "step": 558 + }, + { + "epoch": 0.16686567164179106, + "grad_norm": 0.9386190331772859, + "learning_rate": 4.758825336221861e-06, + "loss": 0.3569, + "step": 559 + }, + { + "epoch": 0.16716417910447762, + "grad_norm": 1.4064679834783074, + "learning_rate": 4.7577883850570995e-06, + "loss": 0.4046, + "step": 560 + }, + { + "epoch": 0.16746268656716418, + "grad_norm": 0.8752446720830109, + "learning_rate": 4.7567493229152315e-06, + "loss": 0.369, + "step": 561 + }, + { + "epoch": 0.16776119402985074, + "grad_norm": 0.8921917475055471, + "learning_rate": 4.755708150767754e-06, + "loss": 0.3629, + "step": 562 + }, + { + "epoch": 0.16805970149253732, + "grad_norm": 0.9071169605857111, + "learning_rate": 4.754664869588139e-06, + "loss": 0.3196, + "step": 563 + }, + { + "epoch": 0.16835820895522388, + "grad_norm": 0.7429818510738487, + "learning_rate": 4.75361948035183e-06, + "loss": 0.322, + "step": 564 + }, + { + "epoch": 0.16865671641791044, + "grad_norm": 0.8100900713596559, + "learning_rate": 4.752571984036237e-06, + "loss": 0.3758, + "step": 565 + }, + { + "epoch": 0.16895522388059703, + "grad_norm": 0.8598955983620397, + "learning_rate": 4.7515223816207455e-06, + "loss": 0.3442, + "step": 566 + }, + { + "epoch": 0.1692537313432836, + "grad_norm": 0.8783738770144779, + "learning_rate": 4.750470674086709e-06, + "loss": 0.322, + "step": 567 + }, + { + "epoch": 0.16955223880597015, + "grad_norm": 0.9030983989078412, + "learning_rate": 4.749416862417448e-06, + "loss": 0.4017, + "step": 568 + }, + { + "epoch": 0.1698507462686567, + "grad_norm": 0.8713947394750962, + "learning_rate": 4.748360947598248e-06, + "loss": 0.3646, + "step": 569 + }, + { + "epoch": 0.1701492537313433, + "grad_norm": 0.8986926196540066, + "learning_rate": 4.747302930616368e-06, + "loss": 0.4171, + "step": 570 + }, + { + "epoch": 0.17044776119402985, + "grad_norm": 0.8073489618852552, + "learning_rate": 4.746242812461025e-06, + "loss": 0.3999, + "step": 571 + }, + { + "epoch": 0.1707462686567164, + "grad_norm": 0.9163536146278429, + "learning_rate": 4.7451805941234055e-06, + "loss": 0.4386, + "step": 572 + }, + { + "epoch": 0.171044776119403, + "grad_norm": 0.828059671354957, + "learning_rate": 4.744116276596656e-06, + "loss": 0.3435, + "step": 573 + }, + { + "epoch": 0.17134328358208956, + "grad_norm": 0.8696889215011467, + "learning_rate": 4.743049860875889e-06, + "loss": 0.3319, + "step": 574 + }, + { + "epoch": 0.17164179104477612, + "grad_norm": 0.8480598380604834, + "learning_rate": 4.741981347958175e-06, + "loss": 0.3703, + "step": 575 + }, + { + "epoch": 0.17194029850746267, + "grad_norm": 0.7909046861375709, + "learning_rate": 4.740910738842551e-06, + "loss": 0.3589, + "step": 576 + }, + { + "epoch": 0.17223880597014926, + "grad_norm": 0.8917454585581824, + "learning_rate": 4.739838034530008e-06, + "loss": 0.3876, + "step": 577 + }, + { + "epoch": 0.17253731343283582, + "grad_norm": 0.8783412360640082, + "learning_rate": 4.738763236023498e-06, + "loss": 0.3892, + "step": 578 + }, + { + "epoch": 0.17283582089552238, + "grad_norm": 0.9299982449111176, + "learning_rate": 4.737686344327932e-06, + "loss": 0.3642, + "step": 579 + }, + { + "epoch": 0.17313432835820897, + "grad_norm": 0.8702752107459392, + "learning_rate": 4.736607360450179e-06, + "loss": 0.357, + "step": 580 + }, + { + "epoch": 0.17343283582089553, + "grad_norm": 0.8641201687866182, + "learning_rate": 4.73552628539906e-06, + "loss": 0.3881, + "step": 581 + }, + { + "epoch": 0.17373134328358208, + "grad_norm": 0.7875411772022647, + "learning_rate": 4.734443120185357e-06, + "loss": 0.3886, + "step": 582 + }, + { + "epoch": 0.17402985074626864, + "grad_norm": 0.7639534640961319, + "learning_rate": 4.733357865821799e-06, + "loss": 0.3635, + "step": 583 + }, + { + "epoch": 0.17432835820895523, + "grad_norm": 0.8465197416628776, + "learning_rate": 4.7322705233230765e-06, + "loss": 0.3753, + "step": 584 + }, + { + "epoch": 0.1746268656716418, + "grad_norm": 0.8612932322468133, + "learning_rate": 4.731181093705825e-06, + "loss": 0.3584, + "step": 585 + }, + { + "epoch": 0.17492537313432835, + "grad_norm": 0.811960790500117, + "learning_rate": 4.730089577988637e-06, + "loss": 0.3794, + "step": 586 + }, + { + "epoch": 0.17522388059701494, + "grad_norm": 0.7739231568933665, + "learning_rate": 4.728995977192052e-06, + "loss": 0.3499, + "step": 587 + }, + { + "epoch": 0.1755223880597015, + "grad_norm": 0.8400654346049872, + "learning_rate": 4.72790029233856e-06, + "loss": 0.3675, + "step": 588 + }, + { + "epoch": 0.17582089552238805, + "grad_norm": 0.8830538820611432, + "learning_rate": 4.7268025244526e-06, + "loss": 0.4198, + "step": 589 + }, + { + "epoch": 0.1761194029850746, + "grad_norm": 0.8440850359948983, + "learning_rate": 4.725702674560558e-06, + "loss": 0.3436, + "step": 590 + }, + { + "epoch": 0.1764179104477612, + "grad_norm": 0.923969252299939, + "learning_rate": 4.724600743690766e-06, + "loss": 0.3726, + "step": 591 + }, + { + "epoch": 0.17671641791044776, + "grad_norm": 0.9285113562633005, + "learning_rate": 4.723496732873504e-06, + "loss": 0.3903, + "step": 592 + }, + { + "epoch": 0.17701492537313432, + "grad_norm": 0.7967222216178841, + "learning_rate": 4.722390643140995e-06, + "loss": 0.3088, + "step": 593 + }, + { + "epoch": 0.1773134328358209, + "grad_norm": 0.8941789785640152, + "learning_rate": 4.721282475527405e-06, + "loss": 0.3684, + "step": 594 + }, + { + "epoch": 0.17761194029850746, + "grad_norm": 0.8812907651438229, + "learning_rate": 4.720172231068845e-06, + "loss": 0.3697, + "step": 595 + }, + { + "epoch": 0.17791044776119402, + "grad_norm": 0.8984723387629163, + "learning_rate": 4.719059910803364e-06, + "loss": 0.3657, + "step": 596 + }, + { + "epoch": 0.1782089552238806, + "grad_norm": 0.9108229355000194, + "learning_rate": 4.717945515770958e-06, + "loss": 0.3609, + "step": 597 + }, + { + "epoch": 0.17850746268656717, + "grad_norm": 0.885813043986939, + "learning_rate": 4.716829047013555e-06, + "loss": 0.4141, + "step": 598 + }, + { + "epoch": 0.17880597014925373, + "grad_norm": 0.8127480639060701, + "learning_rate": 4.715710505575031e-06, + "loss": 0.3529, + "step": 599 + }, + { + "epoch": 0.1791044776119403, + "grad_norm": 0.8032237883986898, + "learning_rate": 4.71458989250119e-06, + "loss": 0.3496, + "step": 600 + }, + { + "epoch": 0.17940298507462688, + "grad_norm": 0.819201792889001, + "learning_rate": 4.713467208839782e-06, + "loss": 0.3674, + "step": 601 + }, + { + "epoch": 0.17970149253731343, + "grad_norm": 1.1406695776718605, + "learning_rate": 4.712342455640486e-06, + "loss": 0.3756, + "step": 602 + }, + { + "epoch": 0.18, + "grad_norm": 0.8607621290309236, + "learning_rate": 4.7112156339549205e-06, + "loss": 0.3623, + "step": 603 + }, + { + "epoch": 0.18029850746268658, + "grad_norm": 0.8516209936354656, + "learning_rate": 4.710086744836635e-06, + "loss": 0.3934, + "step": 604 + }, + { + "epoch": 0.18059701492537314, + "grad_norm": 1.0789657160791544, + "learning_rate": 4.708955789341115e-06, + "loss": 0.3333, + "step": 605 + }, + { + "epoch": 0.1808955223880597, + "grad_norm": 0.8465101170476235, + "learning_rate": 4.707822768525775e-06, + "loss": 0.3456, + "step": 606 + }, + { + "epoch": 0.18119402985074626, + "grad_norm": 0.8021068603431047, + "learning_rate": 4.706687683449961e-06, + "loss": 0.3743, + "step": 607 + }, + { + "epoch": 0.18149253731343284, + "grad_norm": 0.9396472673097465, + "learning_rate": 4.705550535174952e-06, + "loss": 0.3594, + "step": 608 + }, + { + "epoch": 0.1817910447761194, + "grad_norm": 0.9308558717626013, + "learning_rate": 4.704411324763954e-06, + "loss": 0.3597, + "step": 609 + }, + { + "epoch": 0.18208955223880596, + "grad_norm": 0.8780878715069544, + "learning_rate": 4.7032700532820984e-06, + "loss": 0.3794, + "step": 610 + }, + { + "epoch": 0.18238805970149255, + "grad_norm": 0.8438709125133251, + "learning_rate": 4.702126721796448e-06, + "loss": 0.3534, + "step": 611 + }, + { + "epoch": 0.1826865671641791, + "grad_norm": 0.7918705122194744, + "learning_rate": 4.700981331375991e-06, + "loss": 0.3512, + "step": 612 + }, + { + "epoch": 0.18298507462686567, + "grad_norm": 0.8832849637084809, + "learning_rate": 4.699833883091637e-06, + "loss": 0.3664, + "step": 613 + }, + { + "epoch": 0.18328358208955223, + "grad_norm": 0.7766965849001322, + "learning_rate": 4.698684378016223e-06, + "loss": 0.3536, + "step": 614 + }, + { + "epoch": 0.18358208955223881, + "grad_norm": 0.8793943105130626, + "learning_rate": 4.69753281722451e-06, + "loss": 0.3859, + "step": 615 + }, + { + "epoch": 0.18388059701492537, + "grad_norm": 0.9314513871786796, + "learning_rate": 4.696379201793176e-06, + "loss": 0.367, + "step": 616 + }, + { + "epoch": 0.18417910447761193, + "grad_norm": 0.8801529809718072, + "learning_rate": 4.695223532800825e-06, + "loss": 0.4067, + "step": 617 + }, + { + "epoch": 0.18447761194029852, + "grad_norm": 0.7999007221875536, + "learning_rate": 4.694065811327982e-06, + "loss": 0.3428, + "step": 618 + }, + { + "epoch": 0.18477611940298508, + "grad_norm": 0.8859494993053622, + "learning_rate": 4.692906038457084e-06, + "loss": 0.3461, + "step": 619 + }, + { + "epoch": 0.18507462686567164, + "grad_norm": 0.7920787783745799, + "learning_rate": 4.6917442152724925e-06, + "loss": 0.3338, + "step": 620 + }, + { + "epoch": 0.1853731343283582, + "grad_norm": 0.8343099718824265, + "learning_rate": 4.6905803428604835e-06, + "loss": 0.386, + "step": 621 + }, + { + "epoch": 0.18567164179104478, + "grad_norm": 0.8227479914592881, + "learning_rate": 4.6894144223092496e-06, + "loss": 0.3514, + "step": 622 + }, + { + "epoch": 0.18597014925373134, + "grad_norm": 0.9320390654474898, + "learning_rate": 4.6882464547088976e-06, + "loss": 0.3922, + "step": 623 + }, + { + "epoch": 0.1862686567164179, + "grad_norm": 0.7694880051993461, + "learning_rate": 4.6870764411514495e-06, + "loss": 0.3571, + "step": 624 + }, + { + "epoch": 0.1865671641791045, + "grad_norm": 0.8228896792999334, + "learning_rate": 4.685904382730839e-06, + "loss": 0.3658, + "step": 625 + }, + { + "epoch": 0.18686567164179105, + "grad_norm": 0.8303039994029542, + "learning_rate": 4.684730280542912e-06, + "loss": 0.3735, + "step": 626 + }, + { + "epoch": 0.1871641791044776, + "grad_norm": 0.8427730863324351, + "learning_rate": 4.6835541356854255e-06, + "loss": 0.3918, + "step": 627 + }, + { + "epoch": 0.18746268656716417, + "grad_norm": 0.8394164814398534, + "learning_rate": 4.682375949258045e-06, + "loss": 0.3075, + "step": 628 + }, + { + "epoch": 0.18776119402985075, + "grad_norm": 0.8214529875587838, + "learning_rate": 4.681195722362349e-06, + "loss": 0.3551, + "step": 629 + }, + { + "epoch": 0.1880597014925373, + "grad_norm": 0.810136115359384, + "learning_rate": 4.68001345610182e-06, + "loss": 0.3641, + "step": 630 + }, + { + "epoch": 0.18835820895522387, + "grad_norm": 0.8803568899727253, + "learning_rate": 4.678829151581846e-06, + "loss": 0.3817, + "step": 631 + }, + { + "epoch": 0.18865671641791046, + "grad_norm": 0.7930342754419026, + "learning_rate": 4.677642809909725e-06, + "loss": 0.4024, + "step": 632 + }, + { + "epoch": 0.18895522388059702, + "grad_norm": 0.852158008550677, + "learning_rate": 4.6764544321946565e-06, + "loss": 0.4057, + "step": 633 + }, + { + "epoch": 0.18925373134328358, + "grad_norm": 0.7444755651407072, + "learning_rate": 4.675264019547745e-06, + "loss": 0.3094, + "step": 634 + }, + { + "epoch": 0.18955223880597014, + "grad_norm": 0.7810087717032494, + "learning_rate": 4.674071573081998e-06, + "loss": 0.3552, + "step": 635 + }, + { + "epoch": 0.18985074626865672, + "grad_norm": 0.8457218590063859, + "learning_rate": 4.672877093912323e-06, + "loss": 0.3554, + "step": 636 + }, + { + "epoch": 0.19014925373134328, + "grad_norm": 0.8702779931971437, + "learning_rate": 4.671680583155528e-06, + "loss": 0.4169, + "step": 637 + }, + { + "epoch": 0.19044776119402984, + "grad_norm": 0.9714348750550799, + "learning_rate": 4.670482041930324e-06, + "loss": 0.348, + "step": 638 + }, + { + "epoch": 0.19074626865671643, + "grad_norm": 0.8315101609756071, + "learning_rate": 4.6692814713573155e-06, + "loss": 0.3387, + "step": 639 + }, + { + "epoch": 0.191044776119403, + "grad_norm": 0.8737001772219148, + "learning_rate": 4.6680788725590086e-06, + "loss": 0.3723, + "step": 640 + }, + { + "epoch": 0.19134328358208955, + "grad_norm": 0.928685080600342, + "learning_rate": 4.6668742466598015e-06, + "loss": 0.3903, + "step": 641 + }, + { + "epoch": 0.19164179104477613, + "grad_norm": 0.800239252131357, + "learning_rate": 4.665667594785992e-06, + "loss": 0.3408, + "step": 642 + }, + { + "epoch": 0.1919402985074627, + "grad_norm": 0.8264024494717072, + "learning_rate": 4.66445891806577e-06, + "loss": 0.3119, + "step": 643 + }, + { + "epoch": 0.19223880597014925, + "grad_norm": 0.782897351165225, + "learning_rate": 4.663248217629218e-06, + "loss": 0.3748, + "step": 644 + }, + { + "epoch": 0.1925373134328358, + "grad_norm": 0.7565710294395597, + "learning_rate": 4.662035494608313e-06, + "loss": 0.3267, + "step": 645 + }, + { + "epoch": 0.1928358208955224, + "grad_norm": 0.8522767167672216, + "learning_rate": 4.660820750136918e-06, + "loss": 0.3771, + "step": 646 + }, + { + "epoch": 0.19313432835820896, + "grad_norm": 0.8777158029491957, + "learning_rate": 4.6596039853507925e-06, + "loss": 0.3605, + "step": 647 + }, + { + "epoch": 0.19343283582089552, + "grad_norm": 0.8449256976352308, + "learning_rate": 4.658385201387582e-06, + "loss": 0.3454, + "step": 648 + }, + { + "epoch": 0.1937313432835821, + "grad_norm": 0.9008268972405354, + "learning_rate": 4.657164399386818e-06, + "loss": 0.4138, + "step": 649 + }, + { + "epoch": 0.19402985074626866, + "grad_norm": 0.8903381842895088, + "learning_rate": 4.655941580489922e-06, + "loss": 0.3699, + "step": 650 + }, + { + "epoch": 0.19432835820895522, + "grad_norm": 0.8382545542778225, + "learning_rate": 4.6547167458402e-06, + "loss": 0.3738, + "step": 651 + }, + { + "epoch": 0.19462686567164178, + "grad_norm": 0.9109044659426834, + "learning_rate": 4.653489896582841e-06, + "loss": 0.4004, + "step": 652 + }, + { + "epoch": 0.19492537313432837, + "grad_norm": 0.9177470131519694, + "learning_rate": 4.65226103386492e-06, + "loss": 0.3654, + "step": 653 + }, + { + "epoch": 0.19522388059701493, + "grad_norm": 0.8276383969775275, + "learning_rate": 4.651030158835393e-06, + "loss": 0.3513, + "step": 654 + }, + { + "epoch": 0.19552238805970149, + "grad_norm": 0.8952139983790235, + "learning_rate": 4.6497972726451005e-06, + "loss": 0.3786, + "step": 655 + }, + { + "epoch": 0.19582089552238807, + "grad_norm": 0.7985778698618001, + "learning_rate": 4.648562376446759e-06, + "loss": 0.3638, + "step": 656 + }, + { + "epoch": 0.19611940298507463, + "grad_norm": 0.908801175999597, + "learning_rate": 4.6473254713949665e-06, + "loss": 0.397, + "step": 657 + }, + { + "epoch": 0.1964179104477612, + "grad_norm": 0.894398615813818, + "learning_rate": 4.6460865586462e-06, + "loss": 0.4207, + "step": 658 + }, + { + "epoch": 0.19671641791044775, + "grad_norm": 0.8685502281430703, + "learning_rate": 4.644845639358812e-06, + "loss": 0.3762, + "step": 659 + }, + { + "epoch": 0.19701492537313434, + "grad_norm": 0.8915809779098176, + "learning_rate": 4.6436027146930316e-06, + "loss": 0.3427, + "step": 660 + }, + { + "epoch": 0.1973134328358209, + "grad_norm": 0.8679116859759302, + "learning_rate": 4.642357785810964e-06, + "loss": 0.3944, + "step": 661 + }, + { + "epoch": 0.19761194029850745, + "grad_norm": 0.7880457160052126, + "learning_rate": 4.641110853876586e-06, + "loss": 0.3987, + "step": 662 + }, + { + "epoch": 0.19791044776119404, + "grad_norm": 0.864228831055506, + "learning_rate": 4.6398619200557485e-06, + "loss": 0.3996, + "step": 663 + }, + { + "epoch": 0.1982089552238806, + "grad_norm": 0.8022579546091344, + "learning_rate": 4.638610985516176e-06, + "loss": 0.3521, + "step": 664 + }, + { + "epoch": 0.19850746268656716, + "grad_norm": 0.7106263773090994, + "learning_rate": 4.6373580514274605e-06, + "loss": 0.3367, + "step": 665 + }, + { + "epoch": 0.19880597014925372, + "grad_norm": 0.8985506899451846, + "learning_rate": 4.636103118961065e-06, + "loss": 0.4056, + "step": 666 + }, + { + "epoch": 0.1991044776119403, + "grad_norm": 0.870760200813925, + "learning_rate": 4.634846189290321e-06, + "loss": 0.3646, + "step": 667 + }, + { + "epoch": 0.19940298507462687, + "grad_norm": 0.8551014965491996, + "learning_rate": 4.633587263590427e-06, + "loss": 0.4102, + "step": 668 + }, + { + "epoch": 0.19970149253731342, + "grad_norm": 0.8857508158230196, + "learning_rate": 4.632326343038448e-06, + "loss": 0.4184, + "step": 669 + }, + { + "epoch": 0.2, + "grad_norm": 0.7779965326593492, + "learning_rate": 4.631063428813314e-06, + "loss": 0.3521, + "step": 670 + }, + { + "epoch": 0.20029850746268657, + "grad_norm": 0.8349623707590051, + "learning_rate": 4.629798522095818e-06, + "loss": 0.381, + "step": 671 + }, + { + "epoch": 0.20059701492537313, + "grad_norm": 1.0856988327864074, + "learning_rate": 4.628531624068618e-06, + "loss": 0.3597, + "step": 672 + }, + { + "epoch": 0.2008955223880597, + "grad_norm": 0.855642649913472, + "learning_rate": 4.627262735916233e-06, + "loss": 0.3506, + "step": 673 + }, + { + "epoch": 0.20119402985074628, + "grad_norm": 0.7945501228954227, + "learning_rate": 4.625991858825042e-06, + "loss": 0.3186, + "step": 674 + }, + { + "epoch": 0.20149253731343283, + "grad_norm": 0.9656198406567287, + "learning_rate": 4.624718993983284e-06, + "loss": 0.349, + "step": 675 + }, + { + "epoch": 0.2017910447761194, + "grad_norm": 0.9620523693551688, + "learning_rate": 4.623444142581056e-06, + "loss": 0.3566, + "step": 676 + }, + { + "epoch": 0.20208955223880598, + "grad_norm": 0.7443588122091737, + "learning_rate": 4.622167305810315e-06, + "loss": 0.3195, + "step": 677 + }, + { + "epoch": 0.20238805970149254, + "grad_norm": 0.9328190975144249, + "learning_rate": 4.620888484864869e-06, + "loss": 0.3975, + "step": 678 + }, + { + "epoch": 0.2026865671641791, + "grad_norm": 0.8006835988448852, + "learning_rate": 4.6196076809403875e-06, + "loss": 0.3228, + "step": 679 + }, + { + "epoch": 0.20298507462686566, + "grad_norm": 0.9990492755928724, + "learning_rate": 4.618324895234391e-06, + "loss": 0.3728, + "step": 680 + }, + { + "epoch": 0.20328358208955224, + "grad_norm": 0.817711684027937, + "learning_rate": 4.61704012894625e-06, + "loss": 0.3876, + "step": 681 + }, + { + "epoch": 0.2035820895522388, + "grad_norm": 0.7456982963035693, + "learning_rate": 4.615753383277192e-06, + "loss": 0.334, + "step": 682 + }, + { + "epoch": 0.20388059701492536, + "grad_norm": 0.87509200555024, + "learning_rate": 4.614464659430292e-06, + "loss": 0.3982, + "step": 683 + }, + { + "epoch": 0.20417910447761195, + "grad_norm": 0.7734942878648073, + "learning_rate": 4.613173958610476e-06, + "loss": 0.3653, + "step": 684 + }, + { + "epoch": 0.2044776119402985, + "grad_norm": 1.006253095908581, + "learning_rate": 4.611881282024518e-06, + "loss": 0.4069, + "step": 685 + }, + { + "epoch": 0.20477611940298507, + "grad_norm": 0.9446767733133795, + "learning_rate": 4.6105866308810375e-06, + "loss": 0.4212, + "step": 686 + }, + { + "epoch": 0.20507462686567166, + "grad_norm": 0.9531967381845363, + "learning_rate": 4.609290006390503e-06, + "loss": 0.3525, + "step": 687 + }, + { + "epoch": 0.20537313432835821, + "grad_norm": 0.7954289882558423, + "learning_rate": 4.607991409765226e-06, + "loss": 0.3446, + "step": 688 + }, + { + "epoch": 0.20567164179104477, + "grad_norm": 0.7229225187581373, + "learning_rate": 4.606690842219364e-06, + "loss": 0.3562, + "step": 689 + }, + { + "epoch": 0.20597014925373133, + "grad_norm": 0.7579897211017783, + "learning_rate": 4.605388304968915e-06, + "loss": 0.3447, + "step": 690 + }, + { + "epoch": 0.20626865671641792, + "grad_norm": 0.9384371378838448, + "learning_rate": 4.604083799231719e-06, + "loss": 0.365, + "step": 691 + }, + { + "epoch": 0.20656716417910448, + "grad_norm": 1.1276096522058603, + "learning_rate": 4.602777326227459e-06, + "loss": 0.3468, + "step": 692 + }, + { + "epoch": 0.20686567164179104, + "grad_norm": 0.8690437426856318, + "learning_rate": 4.6014688871776535e-06, + "loss": 0.4067, + "step": 693 + }, + { + "epoch": 0.20716417910447762, + "grad_norm": 0.8340116401411932, + "learning_rate": 4.600158483305662e-06, + "loss": 0.3303, + "step": 694 + }, + { + "epoch": 0.20746268656716418, + "grad_norm": 0.9659161023612816, + "learning_rate": 4.59884611583668e-06, + "loss": 0.3845, + "step": 695 + }, + { + "epoch": 0.20776119402985074, + "grad_norm": 0.8013822632682776, + "learning_rate": 4.59753178599774e-06, + "loss": 0.343, + "step": 696 + }, + { + "epoch": 0.2080597014925373, + "grad_norm": 0.7638697285164096, + "learning_rate": 4.5962154950177065e-06, + "loss": 0.3371, + "step": 697 + }, + { + "epoch": 0.2083582089552239, + "grad_norm": 0.8584456429925702, + "learning_rate": 4.594897244127281e-06, + "loss": 0.387, + "step": 698 + }, + { + "epoch": 0.20865671641791045, + "grad_norm": 0.7652924682991881, + "learning_rate": 4.593577034558995e-06, + "loss": 0.3551, + "step": 699 + }, + { + "epoch": 0.208955223880597, + "grad_norm": 0.8065003946035596, + "learning_rate": 4.592254867547214e-06, + "loss": 0.3438, + "step": 700 + }, + { + "epoch": 0.2092537313432836, + "grad_norm": 0.9004428506328457, + "learning_rate": 4.590930744328128e-06, + "loss": 0.3713, + "step": 701 + }, + { + "epoch": 0.20955223880597015, + "grad_norm": 0.8938815871012901, + "learning_rate": 4.5896046661397645e-06, + "loss": 0.3692, + "step": 702 + }, + { + "epoch": 0.2098507462686567, + "grad_norm": 0.905050953213985, + "learning_rate": 4.588276634221972e-06, + "loss": 0.3596, + "step": 703 + }, + { + "epoch": 0.21014925373134327, + "grad_norm": 0.7970736616034015, + "learning_rate": 4.586946649816428e-06, + "loss": 0.3495, + "step": 704 + }, + { + "epoch": 0.21044776119402986, + "grad_norm": 0.8579016457008204, + "learning_rate": 4.585614714166636e-06, + "loss": 0.3707, + "step": 705 + }, + { + "epoch": 0.21074626865671642, + "grad_norm": 0.9030100226974851, + "learning_rate": 4.584280828517924e-06, + "loss": 0.3828, + "step": 706 + }, + { + "epoch": 0.21104477611940298, + "grad_norm": 0.9247619159595248, + "learning_rate": 4.582944994117441e-06, + "loss": 0.3792, + "step": 707 + }, + { + "epoch": 0.21134328358208956, + "grad_norm": 0.9295226931550548, + "learning_rate": 4.58160721221416e-06, + "loss": 0.3958, + "step": 708 + }, + { + "epoch": 0.21164179104477612, + "grad_norm": 0.894595136771482, + "learning_rate": 4.580267484058876e-06, + "loss": 0.3559, + "step": 709 + }, + { + "epoch": 0.21194029850746268, + "grad_norm": 0.8774937178986986, + "learning_rate": 4.5789258109042e-06, + "loss": 0.4055, + "step": 710 + }, + { + "epoch": 0.21223880597014924, + "grad_norm": 0.8187268569344242, + "learning_rate": 4.577582194004565e-06, + "loss": 0.3495, + "step": 711 + }, + { + "epoch": 0.21253731343283583, + "grad_norm": 0.8813687056400052, + "learning_rate": 4.576236634616219e-06, + "loss": 0.3874, + "step": 712 + }, + { + "epoch": 0.2128358208955224, + "grad_norm": 0.7817355048189055, + "learning_rate": 4.574889133997229e-06, + "loss": 0.348, + "step": 713 + }, + { + "epoch": 0.21313432835820895, + "grad_norm": 0.7918141903283299, + "learning_rate": 4.573539693407474e-06, + "loss": 0.3711, + "step": 714 + }, + { + "epoch": 0.21343283582089553, + "grad_norm": 0.7682344039631848, + "learning_rate": 4.572188314108648e-06, + "loss": 0.3661, + "step": 715 + }, + { + "epoch": 0.2137313432835821, + "grad_norm": 0.7620495460154416, + "learning_rate": 4.570834997364258e-06, + "loss": 0.3651, + "step": 716 + }, + { + "epoch": 0.21402985074626865, + "grad_norm": 0.9253366569905003, + "learning_rate": 4.569479744439622e-06, + "loss": 0.3356, + "step": 717 + }, + { + "epoch": 0.2143283582089552, + "grad_norm": 0.9151048827100409, + "learning_rate": 4.568122556601869e-06, + "loss": 0.363, + "step": 718 + }, + { + "epoch": 0.2146268656716418, + "grad_norm": 0.8490011151647137, + "learning_rate": 4.566763435119936e-06, + "loss": 0.3247, + "step": 719 + }, + { + "epoch": 0.21492537313432836, + "grad_norm": 0.8786239642622696, + "learning_rate": 4.565402381264569e-06, + "loss": 0.4117, + "step": 720 + }, + { + "epoch": 0.21522388059701492, + "grad_norm": 0.8402095797172084, + "learning_rate": 4.564039396308319e-06, + "loss": 0.3549, + "step": 721 + }, + { + "epoch": 0.2155223880597015, + "grad_norm": 0.8965903934098873, + "learning_rate": 4.5626744815255454e-06, + "loss": 0.382, + "step": 722 + }, + { + "epoch": 0.21582089552238806, + "grad_norm": 0.8307615244096349, + "learning_rate": 4.5613076381924084e-06, + "loss": 0.3527, + "step": 723 + }, + { + "epoch": 0.21611940298507462, + "grad_norm": 0.8695011048660274, + "learning_rate": 4.559938867586874e-06, + "loss": 0.3689, + "step": 724 + }, + { + "epoch": 0.21641791044776118, + "grad_norm": 0.9306482302064063, + "learning_rate": 4.5585681709887104e-06, + "loss": 0.391, + "step": 725 + }, + { + "epoch": 0.21671641791044777, + "grad_norm": 0.828492781007522, + "learning_rate": 4.557195549679484e-06, + "loss": 0.3234, + "step": 726 + }, + { + "epoch": 0.21701492537313433, + "grad_norm": 0.8573294375941618, + "learning_rate": 4.555821004942563e-06, + "loss": 0.386, + "step": 727 + }, + { + "epoch": 0.21731343283582089, + "grad_norm": 0.875314700850979, + "learning_rate": 4.554444538063113e-06, + "loss": 0.3569, + "step": 728 + }, + { + "epoch": 0.21761194029850747, + "grad_norm": 0.825263881051261, + "learning_rate": 4.553066150328097e-06, + "loss": 0.3716, + "step": 729 + }, + { + "epoch": 0.21791044776119403, + "grad_norm": 0.9101676571634926, + "learning_rate": 4.5516858430262745e-06, + "loss": 0.3819, + "step": 730 + }, + { + "epoch": 0.2182089552238806, + "grad_norm": 0.7788518704841725, + "learning_rate": 4.550303617448198e-06, + "loss": 0.34, + "step": 731 + }, + { + "epoch": 0.21850746268656718, + "grad_norm": 0.7994914875640055, + "learning_rate": 4.548919474886217e-06, + "loss": 0.3317, + "step": 732 + }, + { + "epoch": 0.21880597014925374, + "grad_norm": 0.8615980705638517, + "learning_rate": 4.547533416634468e-06, + "loss": 0.3488, + "step": 733 + }, + { + "epoch": 0.2191044776119403, + "grad_norm": 0.8580797802618181, + "learning_rate": 4.546145443988883e-06, + "loss": 0.3386, + "step": 734 + }, + { + "epoch": 0.21940298507462686, + "grad_norm": 0.8023378839095241, + "learning_rate": 4.544755558247184e-06, + "loss": 0.3822, + "step": 735 + }, + { + "epoch": 0.21970149253731344, + "grad_norm": 0.8955427517527177, + "learning_rate": 4.543363760708878e-06, + "loss": 0.3656, + "step": 736 + }, + { + "epoch": 0.22, + "grad_norm": 0.8836190670530261, + "learning_rate": 4.541970052675262e-06, + "loss": 0.3929, + "step": 737 + }, + { + "epoch": 0.22029850746268656, + "grad_norm": 0.9099664733412756, + "learning_rate": 4.540574435449421e-06, + "loss": 0.3515, + "step": 738 + }, + { + "epoch": 0.22059701492537315, + "grad_norm": 0.9197478364346269, + "learning_rate": 4.539176910336221e-06, + "loss": 0.4125, + "step": 739 + }, + { + "epoch": 0.2208955223880597, + "grad_norm": 0.8096717598195738, + "learning_rate": 4.537777478642317e-06, + "loss": 0.3441, + "step": 740 + }, + { + "epoch": 0.22119402985074627, + "grad_norm": 0.7424662504675141, + "learning_rate": 4.53637614167614e-06, + "loss": 0.3466, + "step": 741 + }, + { + "epoch": 0.22149253731343282, + "grad_norm": 0.8596564765091457, + "learning_rate": 4.534972900747907e-06, + "loss": 0.3607, + "step": 742 + }, + { + "epoch": 0.2217910447761194, + "grad_norm": 0.8235466276329729, + "learning_rate": 4.533567757169615e-06, + "loss": 0.3597, + "step": 743 + }, + { + "epoch": 0.22208955223880597, + "grad_norm": 1.0365662627082053, + "learning_rate": 4.532160712255037e-06, + "loss": 0.3896, + "step": 744 + }, + { + "epoch": 0.22238805970149253, + "grad_norm": 1.0076481822719652, + "learning_rate": 4.530751767319729e-06, + "loss": 0.3913, + "step": 745 + }, + { + "epoch": 0.22268656716417912, + "grad_norm": 0.8582474689745617, + "learning_rate": 4.529340923681016e-06, + "loss": 0.3245, + "step": 746 + }, + { + "epoch": 0.22298507462686568, + "grad_norm": 0.8822511043553072, + "learning_rate": 4.527928182658006e-06, + "loss": 0.3766, + "step": 747 + }, + { + "epoch": 0.22328358208955223, + "grad_norm": 0.8534191167038851, + "learning_rate": 4.526513545571576e-06, + "loss": 0.3384, + "step": 748 + }, + { + "epoch": 0.2235820895522388, + "grad_norm": 0.8670338466114618, + "learning_rate": 4.525097013744377e-06, + "loss": 0.3828, + "step": 749 + }, + { + "epoch": 0.22388059701492538, + "grad_norm": 0.794997902918133, + "learning_rate": 4.523678588500831e-06, + "loss": 0.3439, + "step": 750 + }, + { + "epoch": 0.22417910447761194, + "grad_norm": 0.8637367121235244, + "learning_rate": 4.522258271167134e-06, + "loss": 0.3854, + "step": 751 + }, + { + "epoch": 0.2244776119402985, + "grad_norm": 0.8685702777896596, + "learning_rate": 4.520836063071245e-06, + "loss": 0.3887, + "step": 752 + }, + { + "epoch": 0.2247761194029851, + "grad_norm": 0.772264036444534, + "learning_rate": 4.519411965542895e-06, + "loss": 0.3192, + "step": 753 + }, + { + "epoch": 0.22507462686567165, + "grad_norm": 0.789258733130727, + "learning_rate": 4.517985979913581e-06, + "loss": 0.3527, + "step": 754 + }, + { + "epoch": 0.2253731343283582, + "grad_norm": 0.7727803198307888, + "learning_rate": 4.516558107516563e-06, + "loss": 0.3764, + "step": 755 + }, + { + "epoch": 0.22567164179104476, + "grad_norm": 0.8985233297487031, + "learning_rate": 4.51512834968687e-06, + "loss": 0.3319, + "step": 756 + }, + { + "epoch": 0.22597014925373135, + "grad_norm": 0.7858913969509037, + "learning_rate": 4.513696707761287e-06, + "loss": 0.3422, + "step": 757 + }, + { + "epoch": 0.2262686567164179, + "grad_norm": 0.9507634234561627, + "learning_rate": 4.512263183078367e-06, + "loss": 0.3781, + "step": 758 + }, + { + "epoch": 0.22656716417910447, + "grad_norm": 0.8464279365702985, + "learning_rate": 4.510827776978419e-06, + "loss": 0.3927, + "step": 759 + }, + { + "epoch": 0.22686567164179106, + "grad_norm": 0.8120744514290998, + "learning_rate": 4.5093904908035145e-06, + "loss": 0.3431, + "step": 760 + }, + { + "epoch": 0.22716417910447761, + "grad_norm": 0.7500430603126108, + "learning_rate": 4.50795132589748e-06, + "loss": 0.2971, + "step": 761 + }, + { + "epoch": 0.22746268656716417, + "grad_norm": 1.0783281815992556, + "learning_rate": 4.5065102836059e-06, + "loss": 0.377, + "step": 762 + }, + { + "epoch": 0.22776119402985073, + "grad_norm": 0.8318583999356356, + "learning_rate": 4.505067365276112e-06, + "loss": 0.3356, + "step": 763 + }, + { + "epoch": 0.22805970149253732, + "grad_norm": 0.8126656695454364, + "learning_rate": 4.503622572257212e-06, + "loss": 0.348, + "step": 764 + }, + { + "epoch": 0.22835820895522388, + "grad_norm": 0.8301469658419047, + "learning_rate": 4.502175905900046e-06, + "loss": 0.3758, + "step": 765 + }, + { + "epoch": 0.22865671641791044, + "grad_norm": 0.8038946408180568, + "learning_rate": 4.50072736755721e-06, + "loss": 0.3598, + "step": 766 + }, + { + "epoch": 0.22895522388059703, + "grad_norm": 0.8541693014723826, + "learning_rate": 4.499276958583054e-06, + "loss": 0.3574, + "step": 767 + }, + { + "epoch": 0.22925373134328358, + "grad_norm": 0.8246286524267744, + "learning_rate": 4.497824680333674e-06, + "loss": 0.3733, + "step": 768 + }, + { + "epoch": 0.22955223880597014, + "grad_norm": 0.7782825828612279, + "learning_rate": 4.496370534166915e-06, + "loss": 0.3105, + "step": 769 + }, + { + "epoch": 0.2298507462686567, + "grad_norm": 0.8857118941633463, + "learning_rate": 4.494914521442368e-06, + "loss": 0.373, + "step": 770 + }, + { + "epoch": 0.2301492537313433, + "grad_norm": 0.8395319587604902, + "learning_rate": 4.49345664352137e-06, + "loss": 0.3468, + "step": 771 + }, + { + "epoch": 0.23044776119402985, + "grad_norm": 0.8752411500887408, + "learning_rate": 4.491996901766999e-06, + "loss": 0.3696, + "step": 772 + }, + { + "epoch": 0.2307462686567164, + "grad_norm": 0.9297321969188959, + "learning_rate": 4.4905352975440815e-06, + "loss": 0.382, + "step": 773 + }, + { + "epoch": 0.231044776119403, + "grad_norm": 0.8328696745976142, + "learning_rate": 4.489071832219181e-06, + "loss": 0.3715, + "step": 774 + }, + { + "epoch": 0.23134328358208955, + "grad_norm": 0.8741052478618722, + "learning_rate": 4.487606507160599e-06, + "loss": 0.3794, + "step": 775 + }, + { + "epoch": 0.2316417910447761, + "grad_norm": 0.8662088883531012, + "learning_rate": 4.486139323738382e-06, + "loss": 0.3659, + "step": 776 + }, + { + "epoch": 0.2319402985074627, + "grad_norm": 0.8934574672183584, + "learning_rate": 4.48467028332431e-06, + "loss": 0.3658, + "step": 777 + }, + { + "epoch": 0.23223880597014926, + "grad_norm": 1.1016352512450371, + "learning_rate": 4.483199387291898e-06, + "loss": 0.3661, + "step": 778 + }, + { + "epoch": 0.23253731343283582, + "grad_norm": 0.7828812345663573, + "learning_rate": 4.4817266370164e-06, + "loss": 0.3567, + "step": 779 + }, + { + "epoch": 0.23283582089552238, + "grad_norm": 0.8193782009262484, + "learning_rate": 4.480252033874801e-06, + "loss": 0.3539, + "step": 780 + }, + { + "epoch": 0.23313432835820896, + "grad_norm": 0.8073010853910519, + "learning_rate": 4.47877557924582e-06, + "loss": 0.3324, + "step": 781 + }, + { + "epoch": 0.23343283582089552, + "grad_norm": 0.9202437087315363, + "learning_rate": 4.477297274509904e-06, + "loss": 0.3666, + "step": 782 + }, + { + "epoch": 0.23373134328358208, + "grad_norm": 0.8160201474275349, + "learning_rate": 4.475817121049234e-06, + "loss": 0.3718, + "step": 783 + }, + { + "epoch": 0.23402985074626867, + "grad_norm": 0.8630655153632912, + "learning_rate": 4.474335120247716e-06, + "loss": 0.4114, + "step": 784 + }, + { + "epoch": 0.23432835820895523, + "grad_norm": 0.8729151161750691, + "learning_rate": 4.472851273490985e-06, + "loss": 0.3725, + "step": 785 + }, + { + "epoch": 0.2346268656716418, + "grad_norm": 0.8813347017833638, + "learning_rate": 4.471365582166401e-06, + "loss": 0.3759, + "step": 786 + }, + { + "epoch": 0.23492537313432835, + "grad_norm": 0.8663722741697655, + "learning_rate": 4.46987804766305e-06, + "loss": 0.3594, + "step": 787 + }, + { + "epoch": 0.23522388059701493, + "grad_norm": 0.7836171249800367, + "learning_rate": 4.46838867137174e-06, + "loss": 0.3079, + "step": 788 + }, + { + "epoch": 0.2355223880597015, + "grad_norm": 0.8419069340655513, + "learning_rate": 4.466897454685003e-06, + "loss": 0.3952, + "step": 789 + }, + { + "epoch": 0.23582089552238805, + "grad_norm": 0.844324374913284, + "learning_rate": 4.465404398997089e-06, + "loss": 0.3685, + "step": 790 + }, + { + "epoch": 0.23611940298507464, + "grad_norm": 0.8830236417572802, + "learning_rate": 4.463909505703968e-06, + "loss": 0.3616, + "step": 791 + }, + { + "epoch": 0.2364179104477612, + "grad_norm": 0.8444454534060524, + "learning_rate": 4.46241277620333e-06, + "loss": 0.3783, + "step": 792 + }, + { + "epoch": 0.23671641791044776, + "grad_norm": 0.7655987801662917, + "learning_rate": 4.460914211894579e-06, + "loss": 0.3183, + "step": 793 + }, + { + "epoch": 0.23701492537313432, + "grad_norm": 0.7941247862341938, + "learning_rate": 4.459413814178839e-06, + "loss": 0.366, + "step": 794 + }, + { + "epoch": 0.2373134328358209, + "grad_norm": 0.8222962634858313, + "learning_rate": 4.4579115844589426e-06, + "loss": 0.3349, + "step": 795 + }, + { + "epoch": 0.23761194029850746, + "grad_norm": 0.8913670067022349, + "learning_rate": 4.4564075241394386e-06, + "loss": 0.3692, + "step": 796 + }, + { + "epoch": 0.23791044776119402, + "grad_norm": 0.8571392122510596, + "learning_rate": 4.454901634626587e-06, + "loss": 0.3689, + "step": 797 + }, + { + "epoch": 0.2382089552238806, + "grad_norm": 0.8061419047479605, + "learning_rate": 4.4533939173283585e-06, + "loss": 0.3451, + "step": 798 + }, + { + "epoch": 0.23850746268656717, + "grad_norm": 0.83880369164832, + "learning_rate": 4.451884373654431e-06, + "loss": 0.3846, + "step": 799 + }, + { + "epoch": 0.23880597014925373, + "grad_norm": 0.7991787716936328, + "learning_rate": 4.450373005016191e-06, + "loss": 0.3514, + "step": 800 + }, + { + "epoch": 0.23910447761194029, + "grad_norm": 0.9096163448081129, + "learning_rate": 4.448859812826732e-06, + "loss": 0.3704, + "step": 801 + }, + { + "epoch": 0.23940298507462687, + "grad_norm": 0.8414143701711667, + "learning_rate": 4.447344798500852e-06, + "loss": 0.3674, + "step": 802 + }, + { + "epoch": 0.23970149253731343, + "grad_norm": 0.8352881489659175, + "learning_rate": 4.445827963455051e-06, + "loss": 0.3768, + "step": 803 + }, + { + "epoch": 0.24, + "grad_norm": 0.8031794777452957, + "learning_rate": 4.444309309107535e-06, + "loss": 0.3423, + "step": 804 + }, + { + "epoch": 0.24029850746268658, + "grad_norm": 0.8060785944842502, + "learning_rate": 4.442788836878207e-06, + "loss": 0.3131, + "step": 805 + }, + { + "epoch": 0.24059701492537314, + "grad_norm": 0.8202351464416616, + "learning_rate": 4.441266548188673e-06, + "loss": 0.3566, + "step": 806 + }, + { + "epoch": 0.2408955223880597, + "grad_norm": 0.905814083393673, + "learning_rate": 4.439742444462234e-06, + "loss": 0.3812, + "step": 807 + }, + { + "epoch": 0.24119402985074626, + "grad_norm": 0.7839467463765396, + "learning_rate": 4.43821652712389e-06, + "loss": 0.3544, + "step": 808 + }, + { + "epoch": 0.24149253731343284, + "grad_norm": 0.9113819484994593, + "learning_rate": 4.436688797600338e-06, + "loss": 0.4163, + "step": 809 + }, + { + "epoch": 0.2417910447761194, + "grad_norm": 0.8245251973742547, + "learning_rate": 4.435159257319968e-06, + "loss": 0.349, + "step": 810 + }, + { + "epoch": 0.24208955223880596, + "grad_norm": 0.8775639237639656, + "learning_rate": 4.43362790771286e-06, + "loss": 0.4072, + "step": 811 + }, + { + "epoch": 0.24238805970149255, + "grad_norm": 0.877280517556995, + "learning_rate": 4.432094750210791e-06, + "loss": 0.3773, + "step": 812 + }, + { + "epoch": 0.2426865671641791, + "grad_norm": 0.8453157272615807, + "learning_rate": 4.430559786247227e-06, + "loss": 0.4092, + "step": 813 + }, + { + "epoch": 0.24298507462686567, + "grad_norm": 0.8066208315679838, + "learning_rate": 4.429023017257319e-06, + "loss": 0.3573, + "step": 814 + }, + { + "epoch": 0.24328358208955222, + "grad_norm": 0.8304243577622432, + "learning_rate": 4.42748444467791e-06, + "loss": 0.3086, + "step": 815 + }, + { + "epoch": 0.2435820895522388, + "grad_norm": 0.8380366969931612, + "learning_rate": 4.425944069947528e-06, + "loss": 0.3458, + "step": 816 + }, + { + "epoch": 0.24388059701492537, + "grad_norm": 0.7673079862377152, + "learning_rate": 4.424401894506386e-06, + "loss": 0.3514, + "step": 817 + }, + { + "epoch": 0.24417910447761193, + "grad_norm": 0.8657431046670215, + "learning_rate": 4.4228579197963795e-06, + "loss": 0.3648, + "step": 818 + }, + { + "epoch": 0.24447761194029852, + "grad_norm": 0.8310942286263857, + "learning_rate": 4.421312147261087e-06, + "loss": 0.2978, + "step": 819 + }, + { + "epoch": 0.24477611940298508, + "grad_norm": 0.8444445715616047, + "learning_rate": 4.4197645783457695e-06, + "loss": 0.3907, + "step": 820 + }, + { + "epoch": 0.24507462686567164, + "grad_norm": 0.9112359004688853, + "learning_rate": 4.418215214497366e-06, + "loss": 0.3833, + "step": 821 + }, + { + "epoch": 0.24537313432835822, + "grad_norm": 0.8107922970059999, + "learning_rate": 4.416664057164493e-06, + "loss": 0.3672, + "step": 822 + }, + { + "epoch": 0.24567164179104478, + "grad_norm": 0.8722125495392787, + "learning_rate": 4.415111107797445e-06, + "loss": 0.3617, + "step": 823 + }, + { + "epoch": 0.24597014925373134, + "grad_norm": 1.061183148831863, + "learning_rate": 4.413556367848193e-06, + "loss": 0.413, + "step": 824 + }, + { + "epoch": 0.2462686567164179, + "grad_norm": 0.7884747931084407, + "learning_rate": 4.41199983877038e-06, + "loss": 0.375, + "step": 825 + }, + { + "epoch": 0.2465671641791045, + "grad_norm": 0.93921749874484, + "learning_rate": 4.410441522019322e-06, + "loss": 0.3671, + "step": 826 + }, + { + "epoch": 0.24686567164179105, + "grad_norm": 0.8800331687997713, + "learning_rate": 4.4088814190520085e-06, + "loss": 0.347, + "step": 827 + }, + { + "epoch": 0.2471641791044776, + "grad_norm": 0.8608882108195471, + "learning_rate": 4.407319531327097e-06, + "loss": 0.3705, + "step": 828 + }, + { + "epoch": 0.2474626865671642, + "grad_norm": 0.8578378741311872, + "learning_rate": 4.405755860304915e-06, + "loss": 0.3574, + "step": 829 + }, + { + "epoch": 0.24776119402985075, + "grad_norm": 0.8321521972748156, + "learning_rate": 4.404190407447456e-06, + "loss": 0.3639, + "step": 830 + }, + { + "epoch": 0.2480597014925373, + "grad_norm": 0.8170637261740442, + "learning_rate": 4.402623174218381e-06, + "loss": 0.3568, + "step": 831 + }, + { + "epoch": 0.24835820895522387, + "grad_norm": 0.8552394496252084, + "learning_rate": 4.401054162083015e-06, + "loss": 0.3982, + "step": 832 + }, + { + "epoch": 0.24865671641791046, + "grad_norm": 0.8273057774794584, + "learning_rate": 4.399483372508345e-06, + "loss": 0.3223, + "step": 833 + }, + { + "epoch": 0.24895522388059702, + "grad_norm": 0.8012455720488535, + "learning_rate": 4.3979108069630226e-06, + "loss": 0.3675, + "step": 834 + }, + { + "epoch": 0.24925373134328357, + "grad_norm": 0.8788275931412666, + "learning_rate": 4.396336466917357e-06, + "loss": 0.3587, + "step": 835 + }, + { + "epoch": 0.24955223880597016, + "grad_norm": 0.8481959047937334, + "learning_rate": 4.394760353843318e-06, + "loss": 0.3916, + "step": 836 + }, + { + "epoch": 0.24985074626865672, + "grad_norm": 0.820980214165406, + "learning_rate": 4.393182469214533e-06, + "loss": 0.3731, + "step": 837 + }, + { + "epoch": 0.2501492537313433, + "grad_norm": 0.8685340899908907, + "learning_rate": 4.391602814506285e-06, + "loss": 0.3422, + "step": 838 + }, + { + "epoch": 0.25044776119402984, + "grad_norm": 0.8722156332327601, + "learning_rate": 4.390021391195514e-06, + "loss": 0.3848, + "step": 839 + }, + { + "epoch": 0.2507462686567164, + "grad_norm": 0.8131879266539699, + "learning_rate": 4.388438200760812e-06, + "loss": 0.3473, + "step": 840 + }, + { + "epoch": 0.251044776119403, + "grad_norm": 0.8738620972075055, + "learning_rate": 4.386853244682422e-06, + "loss": 0.3505, + "step": 841 + }, + { + "epoch": 0.25134328358208957, + "grad_norm": 0.8455634576936628, + "learning_rate": 4.385266524442241e-06, + "loss": 0.3656, + "step": 842 + }, + { + "epoch": 0.25164179104477613, + "grad_norm": 0.9232241608401891, + "learning_rate": 4.383678041523813e-06, + "loss": 0.3308, + "step": 843 + }, + { + "epoch": 0.2519402985074627, + "grad_norm": 0.7802438443664448, + "learning_rate": 4.382087797412331e-06, + "loss": 0.3177, + "step": 844 + }, + { + "epoch": 0.25223880597014925, + "grad_norm": 0.840623387098109, + "learning_rate": 4.380495793594634e-06, + "loss": 0.3978, + "step": 845 + }, + { + "epoch": 0.2525373134328358, + "grad_norm": 0.8598753419489917, + "learning_rate": 4.378902031559208e-06, + "loss": 0.3643, + "step": 846 + }, + { + "epoch": 0.25283582089552237, + "grad_norm": 0.8502479164780553, + "learning_rate": 4.377306512796179e-06, + "loss": 0.3116, + "step": 847 + }, + { + "epoch": 0.253134328358209, + "grad_norm": 0.8471892836766642, + "learning_rate": 4.375709238797322e-06, + "loss": 0.3459, + "step": 848 + }, + { + "epoch": 0.25343283582089554, + "grad_norm": 0.8949946135476528, + "learning_rate": 4.3741102110560465e-06, + "loss": 0.4197, + "step": 849 + }, + { + "epoch": 0.2537313432835821, + "grad_norm": 0.8390558579599957, + "learning_rate": 4.372509431067404e-06, + "loss": 0.3641, + "step": 850 + }, + { + "epoch": 0.25402985074626866, + "grad_norm": 0.8295363073039969, + "learning_rate": 4.370906900328087e-06, + "loss": 0.3085, + "step": 851 + }, + { + "epoch": 0.2543283582089552, + "grad_norm": 0.8920142421824904, + "learning_rate": 4.369302620336422e-06, + "loss": 0.3605, + "step": 852 + }, + { + "epoch": 0.2546268656716418, + "grad_norm": 0.8314417922487575, + "learning_rate": 4.367696592592371e-06, + "loss": 0.3648, + "step": 853 + }, + { + "epoch": 0.25492537313432834, + "grad_norm": 0.9416237229806054, + "learning_rate": 4.36608881859753e-06, + "loss": 0.4322, + "step": 854 + }, + { + "epoch": 0.25522388059701495, + "grad_norm": 0.8552595456528146, + "learning_rate": 4.364479299855131e-06, + "loss": 0.376, + "step": 855 + }, + { + "epoch": 0.2555223880597015, + "grad_norm": 0.7986846174676006, + "learning_rate": 4.362868037870033e-06, + "loss": 0.3129, + "step": 856 + }, + { + "epoch": 0.25582089552238807, + "grad_norm": 0.8367097908730075, + "learning_rate": 4.361255034148727e-06, + "loss": 0.34, + "step": 857 + }, + { + "epoch": 0.25611940298507463, + "grad_norm": 0.8933913196848361, + "learning_rate": 4.359640290199331e-06, + "loss": 0.3574, + "step": 858 + }, + { + "epoch": 0.2564179104477612, + "grad_norm": 0.8314203802986557, + "learning_rate": 4.3580238075315954e-06, + "loss": 0.3454, + "step": 859 + }, + { + "epoch": 0.25671641791044775, + "grad_norm": 0.8392401641828928, + "learning_rate": 4.356405587656886e-06, + "loss": 0.3616, + "step": 860 + }, + { + "epoch": 0.2570149253731343, + "grad_norm": 0.8057344142131634, + "learning_rate": 4.354785632088204e-06, + "loss": 0.3429, + "step": 861 + }, + { + "epoch": 0.2573134328358209, + "grad_norm": 0.7265185841371562, + "learning_rate": 4.353163942340166e-06, + "loss": 0.3316, + "step": 862 + }, + { + "epoch": 0.2576119402985075, + "grad_norm": 0.8861442387020552, + "learning_rate": 4.351540519929013e-06, + "loss": 0.3682, + "step": 863 + }, + { + "epoch": 0.25791044776119404, + "grad_norm": 0.8207676152529324, + "learning_rate": 4.349915366372605e-06, + "loss": 0.3465, + "step": 864 + }, + { + "epoch": 0.2582089552238806, + "grad_norm": 0.8401159847300563, + "learning_rate": 4.348288483190422e-06, + "loss": 0.3628, + "step": 865 + }, + { + "epoch": 0.25850746268656716, + "grad_norm": 0.9061691939819366, + "learning_rate": 4.346659871903558e-06, + "loss": 0.3269, + "step": 866 + }, + { + "epoch": 0.2588059701492537, + "grad_norm": 0.9554654260605913, + "learning_rate": 4.345029534034727e-06, + "loss": 0.3688, + "step": 867 + }, + { + "epoch": 0.2591044776119403, + "grad_norm": 0.8768435732446233, + "learning_rate": 4.343397471108254e-06, + "loss": 0.3467, + "step": 868 + }, + { + "epoch": 0.2594029850746269, + "grad_norm": 0.83519783168284, + "learning_rate": 4.341763684650078e-06, + "loss": 0.3664, + "step": 869 + }, + { + "epoch": 0.25970149253731345, + "grad_norm": 0.8733824091023256, + "learning_rate": 4.340128176187751e-06, + "loss": 0.3894, + "step": 870 + }, + { + "epoch": 0.26, + "grad_norm": 0.7435126731069427, + "learning_rate": 4.338490947250431e-06, + "loss": 0.3265, + "step": 871 + }, + { + "epoch": 0.26029850746268657, + "grad_norm": 0.7602478532739807, + "learning_rate": 4.33685199936889e-06, + "loss": 0.3255, + "step": 872 + }, + { + "epoch": 0.2605970149253731, + "grad_norm": 0.8396951710141938, + "learning_rate": 4.335211334075502e-06, + "loss": 0.3772, + "step": 873 + }, + { + "epoch": 0.2608955223880597, + "grad_norm": 0.8535455460088422, + "learning_rate": 4.33356895290425e-06, + "loss": 0.3221, + "step": 874 + }, + { + "epoch": 0.26119402985074625, + "grad_norm": 0.934656189548925, + "learning_rate": 4.331924857390722e-06, + "loss": 0.3899, + "step": 875 + }, + { + "epoch": 0.26149253731343286, + "grad_norm": 0.7839885412025712, + "learning_rate": 4.330279049072103e-06, + "loss": 0.3278, + "step": 876 + }, + { + "epoch": 0.2617910447761194, + "grad_norm": 0.8288101118788596, + "learning_rate": 4.328631529487188e-06, + "loss": 0.359, + "step": 877 + }, + { + "epoch": 0.262089552238806, + "grad_norm": 0.7635692010308125, + "learning_rate": 4.326982300176366e-06, + "loss": 0.3315, + "step": 878 + }, + { + "epoch": 0.26238805970149254, + "grad_norm": 0.8317735162263449, + "learning_rate": 4.325331362681624e-06, + "loss": 0.3464, + "step": 879 + }, + { + "epoch": 0.2626865671641791, + "grad_norm": 0.8075219201660886, + "learning_rate": 4.323678718546552e-06, + "loss": 0.3477, + "step": 880 + }, + { + "epoch": 0.26298507462686566, + "grad_norm": 0.8112126719014313, + "learning_rate": 4.3220243693163305e-06, + "loss": 0.4148, + "step": 881 + }, + { + "epoch": 0.2632835820895522, + "grad_norm": 0.8176840947105901, + "learning_rate": 4.3203683165377355e-06, + "loss": 0.4178, + "step": 882 + }, + { + "epoch": 0.26358208955223883, + "grad_norm": 1.0009446140687355, + "learning_rate": 4.318710561759137e-06, + "loss": 0.3289, + "step": 883 + }, + { + "epoch": 0.2638805970149254, + "grad_norm": 0.7534551657111695, + "learning_rate": 4.317051106530492e-06, + "loss": 0.3402, + "step": 884 + }, + { + "epoch": 0.26417910447761195, + "grad_norm": 0.9072522401528668, + "learning_rate": 4.315389952403355e-06, + "loss": 0.3817, + "step": 885 + }, + { + "epoch": 0.2644776119402985, + "grad_norm": 0.8682117406925437, + "learning_rate": 4.313727100930862e-06, + "loss": 0.3915, + "step": 886 + }, + { + "epoch": 0.26477611940298507, + "grad_norm": 0.8515997109671464, + "learning_rate": 4.312062553667739e-06, + "loss": 0.3397, + "step": 887 + }, + { + "epoch": 0.2650746268656716, + "grad_norm": 0.837060052477179, + "learning_rate": 4.310396312170298e-06, + "loss": 0.3495, + "step": 888 + }, + { + "epoch": 0.2653731343283582, + "grad_norm": 0.960847057443914, + "learning_rate": 4.308728377996433e-06, + "loss": 0.345, + "step": 889 + }, + { + "epoch": 0.2656716417910448, + "grad_norm": 0.8594679219941264, + "learning_rate": 4.307058752705623e-06, + "loss": 0.3193, + "step": 890 + }, + { + "epoch": 0.26597014925373136, + "grad_norm": 0.8579668256137557, + "learning_rate": 4.3053874378589265e-06, + "loss": 0.3587, + "step": 891 + }, + { + "epoch": 0.2662686567164179, + "grad_norm": 0.815761166827642, + "learning_rate": 4.303714435018981e-06, + "loss": 0.392, + "step": 892 + }, + { + "epoch": 0.2665671641791045, + "grad_norm": 0.7691072442891018, + "learning_rate": 4.3020397457500055e-06, + "loss": 0.3449, + "step": 893 + }, + { + "epoch": 0.26686567164179104, + "grad_norm": 0.8546963433650799, + "learning_rate": 4.300363371617792e-06, + "loss": 0.3615, + "step": 894 + }, + { + "epoch": 0.2671641791044776, + "grad_norm": 0.8954925431678488, + "learning_rate": 4.29868531418971e-06, + "loss": 0.387, + "step": 895 + }, + { + "epoch": 0.26746268656716415, + "grad_norm": 0.8890912758541257, + "learning_rate": 4.297005575034701e-06, + "loss": 0.338, + "step": 896 + }, + { + "epoch": 0.26776119402985077, + "grad_norm": 0.9013891259842057, + "learning_rate": 4.295324155723283e-06, + "loss": 0.3627, + "step": 897 + }, + { + "epoch": 0.26805970149253733, + "grad_norm": 0.7909956097157833, + "learning_rate": 4.29364105782754e-06, + "loss": 0.3708, + "step": 898 + }, + { + "epoch": 0.2683582089552239, + "grad_norm": 0.8080884146208309, + "learning_rate": 4.291956282921129e-06, + "loss": 0.3257, + "step": 899 + }, + { + "epoch": 0.26865671641791045, + "grad_norm": 0.7819915704985843, + "learning_rate": 4.2902698325792715e-06, + "loss": 0.3916, + "step": 900 + }, + { + "epoch": 0.268955223880597, + "grad_norm": 0.8683299968211471, + "learning_rate": 4.2885817083787584e-06, + "loss": 0.3669, + "step": 901 + }, + { + "epoch": 0.26925373134328356, + "grad_norm": 0.8085758598925459, + "learning_rate": 4.286891911897944e-06, + "loss": 0.3187, + "step": 902 + }, + { + "epoch": 0.2695522388059701, + "grad_norm": 0.8590837328246107, + "learning_rate": 4.285200444716749e-06, + "loss": 0.3785, + "step": 903 + }, + { + "epoch": 0.26985074626865674, + "grad_norm": 0.8551915445353413, + "learning_rate": 4.283507308416651e-06, + "loss": 0.3542, + "step": 904 + }, + { + "epoch": 0.2701492537313433, + "grad_norm": 0.9079671987533409, + "learning_rate": 4.281812504580694e-06, + "loss": 0.3634, + "step": 905 + }, + { + "epoch": 0.27044776119402986, + "grad_norm": 0.8369860857576275, + "learning_rate": 4.280116034793477e-06, + "loss": 0.3708, + "step": 906 + }, + { + "epoch": 0.2707462686567164, + "grad_norm": 0.8664538534678876, + "learning_rate": 4.278417900641157e-06, + "loss": 0.4132, + "step": 907 + }, + { + "epoch": 0.271044776119403, + "grad_norm": 0.805731173102517, + "learning_rate": 4.2767181037114494e-06, + "loss": 0.3946, + "step": 908 + }, + { + "epoch": 0.27134328358208953, + "grad_norm": 0.8111842698029063, + "learning_rate": 4.275016645593622e-06, + "loss": 0.3616, + "step": 909 + }, + { + "epoch": 0.2716417910447761, + "grad_norm": 0.7534971014345517, + "learning_rate": 4.2733135278784975e-06, + "loss": 0.3284, + "step": 910 + }, + { + "epoch": 0.2719402985074627, + "grad_norm": 0.8332630746157706, + "learning_rate": 4.271608752158448e-06, + "loss": 0.3577, + "step": 911 + }, + { + "epoch": 0.27223880597014927, + "grad_norm": 0.8441430505402697, + "learning_rate": 4.269902320027399e-06, + "loss": 0.3927, + "step": 912 + }, + { + "epoch": 0.2725373134328358, + "grad_norm": 0.8088212649154801, + "learning_rate": 4.268194233080823e-06, + "loss": 0.3604, + "step": 913 + }, + { + "epoch": 0.2728358208955224, + "grad_norm": 0.8504371247895061, + "learning_rate": 4.266484492915738e-06, + "loss": 0.328, + "step": 914 + }, + { + "epoch": 0.27313432835820894, + "grad_norm": 0.8838721420845274, + "learning_rate": 4.264773101130711e-06, + "loss": 0.3491, + "step": 915 + }, + { + "epoch": 0.2734328358208955, + "grad_norm": 0.8378168662917916, + "learning_rate": 4.26306005932585e-06, + "loss": 0.349, + "step": 916 + }, + { + "epoch": 0.27373134328358206, + "grad_norm": 0.8952997918529593, + "learning_rate": 4.2613453691028085e-06, + "loss": 0.4315, + "step": 917 + }, + { + "epoch": 0.2740298507462687, + "grad_norm": 0.8262214290892325, + "learning_rate": 4.2596290320647795e-06, + "loss": 0.3247, + "step": 918 + }, + { + "epoch": 0.27432835820895524, + "grad_norm": 0.7936317313144646, + "learning_rate": 4.257911049816497e-06, + "loss": 0.3697, + "step": 919 + }, + { + "epoch": 0.2746268656716418, + "grad_norm": 0.8359230938626984, + "learning_rate": 4.256191423964231e-06, + "loss": 0.3641, + "step": 920 + }, + { + "epoch": 0.27492537313432835, + "grad_norm": 0.8439085515909566, + "learning_rate": 4.254470156115792e-06, + "loss": 0.3295, + "step": 921 + }, + { + "epoch": 0.2752238805970149, + "grad_norm": 0.9191921854597581, + "learning_rate": 4.252747247880521e-06, + "loss": 0.4339, + "step": 922 + }, + { + "epoch": 0.2755223880597015, + "grad_norm": 0.9149683459792712, + "learning_rate": 4.2510227008692974e-06, + "loss": 0.3599, + "step": 923 + }, + { + "epoch": 0.27582089552238803, + "grad_norm": 0.8114463730873631, + "learning_rate": 4.2492965166945295e-06, + "loss": 0.3911, + "step": 924 + }, + { + "epoch": 0.27611940298507465, + "grad_norm": 0.8792190926539251, + "learning_rate": 4.247568696970158e-06, + "loss": 0.3544, + "step": 925 + }, + { + "epoch": 0.2764179104477612, + "grad_norm": 0.8336709348423672, + "learning_rate": 4.2458392433116525e-06, + "loss": 0.3322, + "step": 926 + }, + { + "epoch": 0.27671641791044777, + "grad_norm": 1.020710396219329, + "learning_rate": 4.244108157336009e-06, + "loss": 0.3604, + "step": 927 + }, + { + "epoch": 0.2770149253731343, + "grad_norm": 0.8143571913624713, + "learning_rate": 4.2423754406617505e-06, + "loss": 0.3495, + "step": 928 + }, + { + "epoch": 0.2773134328358209, + "grad_norm": 0.8356882733846251, + "learning_rate": 4.2406410949089255e-06, + "loss": 0.368, + "step": 929 + }, + { + "epoch": 0.27761194029850744, + "grad_norm": 0.8645784597745637, + "learning_rate": 4.238905121699105e-06, + "loss": 0.3739, + "step": 930 + }, + { + "epoch": 0.27791044776119406, + "grad_norm": 0.8593811145607515, + "learning_rate": 4.237167522655382e-06, + "loss": 0.3174, + "step": 931 + }, + { + "epoch": 0.2782089552238806, + "grad_norm": 0.9107381802719423, + "learning_rate": 4.235428299402369e-06, + "loss": 0.4168, + "step": 932 + }, + { + "epoch": 0.2785074626865672, + "grad_norm": 0.8146960301870598, + "learning_rate": 4.2336874535661966e-06, + "loss": 0.3381, + "step": 933 + }, + { + "epoch": 0.27880597014925373, + "grad_norm": 0.7260268749376332, + "learning_rate": 4.231944986774513e-06, + "loss": 0.2668, + "step": 934 + }, + { + "epoch": 0.2791044776119403, + "grad_norm": 0.7381450828289706, + "learning_rate": 4.2302009006564845e-06, + "loss": 0.3379, + "step": 935 + }, + { + "epoch": 0.27940298507462685, + "grad_norm": 0.906697687644743, + "learning_rate": 4.228455196842787e-06, + "loss": 0.3525, + "step": 936 + }, + { + "epoch": 0.2797014925373134, + "grad_norm": 0.8801394409694048, + "learning_rate": 4.2267078769656115e-06, + "loss": 0.3684, + "step": 937 + }, + { + "epoch": 0.28, + "grad_norm": 0.8229105599808183, + "learning_rate": 4.22495894265866e-06, + "loss": 0.3688, + "step": 938 + }, + { + "epoch": 0.2802985074626866, + "grad_norm": 0.8866551931642531, + "learning_rate": 4.223208395557142e-06, + "loss": 0.3939, + "step": 939 + }, + { + "epoch": 0.28059701492537314, + "grad_norm": 0.796873195210361, + "learning_rate": 4.221456237297779e-06, + "loss": 0.3246, + "step": 940 + }, + { + "epoch": 0.2808955223880597, + "grad_norm": 0.8777704725183845, + "learning_rate": 4.219702469518794e-06, + "loss": 0.3148, + "step": 941 + }, + { + "epoch": 0.28119402985074626, + "grad_norm": 0.8149960092397086, + "learning_rate": 4.217947093859917e-06, + "loss": 0.3261, + "step": 942 + }, + { + "epoch": 0.2814925373134328, + "grad_norm": 0.782356882918818, + "learning_rate": 4.216190111962383e-06, + "loss": 0.3594, + "step": 943 + }, + { + "epoch": 0.2817910447761194, + "grad_norm": 0.7838115738847105, + "learning_rate": 4.2144315254689265e-06, + "loss": 0.3658, + "step": 944 + }, + { + "epoch": 0.282089552238806, + "grad_norm": 0.861027660876004, + "learning_rate": 4.2126713360237835e-06, + "loss": 0.3216, + "step": 945 + }, + { + "epoch": 0.28238805970149256, + "grad_norm": 0.8979440300386647, + "learning_rate": 4.210909545272687e-06, + "loss": 0.347, + "step": 946 + }, + { + "epoch": 0.2826865671641791, + "grad_norm": 0.8046473678420953, + "learning_rate": 4.2091461548628695e-06, + "loss": 0.3817, + "step": 947 + }, + { + "epoch": 0.2829850746268657, + "grad_norm": 0.8999763252157099, + "learning_rate": 4.207381166443058e-06, + "loss": 0.3384, + "step": 948 + }, + { + "epoch": 0.28328358208955223, + "grad_norm": 1.0558611307630636, + "learning_rate": 4.205614581663472e-06, + "loss": 0.3764, + "step": 949 + }, + { + "epoch": 0.2835820895522388, + "grad_norm": 0.8250495089946933, + "learning_rate": 4.203846402175828e-06, + "loss": 0.3466, + "step": 950 + }, + { + "epoch": 0.28388059701492535, + "grad_norm": 0.8080126407334502, + "learning_rate": 4.202076629633329e-06, + "loss": 0.3379, + "step": 951 + }, + { + "epoch": 0.28417910447761197, + "grad_norm": 0.8189735637468861, + "learning_rate": 4.200305265690669e-06, + "loss": 0.3717, + "step": 952 + }, + { + "epoch": 0.2844776119402985, + "grad_norm": 0.8206896400863205, + "learning_rate": 4.198532312004031e-06, + "loss": 0.3424, + "step": 953 + }, + { + "epoch": 0.2847761194029851, + "grad_norm": 0.8182301432265623, + "learning_rate": 4.1967577702310826e-06, + "loss": 0.3459, + "step": 954 + }, + { + "epoch": 0.28507462686567164, + "grad_norm": 0.860760231935904, + "learning_rate": 4.194981642030978e-06, + "loss": 0.3934, + "step": 955 + }, + { + "epoch": 0.2853731343283582, + "grad_norm": 0.89467249534368, + "learning_rate": 4.1932039290643534e-06, + "loss": 0.3867, + "step": 956 + }, + { + "epoch": 0.28567164179104476, + "grad_norm": 0.9126227744137325, + "learning_rate": 4.191424632993326e-06, + "loss": 0.3452, + "step": 957 + }, + { + "epoch": 0.2859701492537313, + "grad_norm": 0.8112959766461156, + "learning_rate": 4.189643755481497e-06, + "loss": 0.2939, + "step": 958 + }, + { + "epoch": 0.28626865671641794, + "grad_norm": 0.9304054160150231, + "learning_rate": 4.18786129819394e-06, + "loss": 0.3461, + "step": 959 + }, + { + "epoch": 0.2865671641791045, + "grad_norm": 0.7922770615613615, + "learning_rate": 4.1860772627972125e-06, + "loss": 0.3435, + "step": 960 + }, + { + "epoch": 0.28686567164179105, + "grad_norm": 0.7959362445567673, + "learning_rate": 4.184291650959341e-06, + "loss": 0.3534, + "step": 961 + }, + { + "epoch": 0.2871641791044776, + "grad_norm": 0.8002993493679609, + "learning_rate": 4.182504464349832e-06, + "loss": 0.3316, + "step": 962 + }, + { + "epoch": 0.28746268656716417, + "grad_norm": 0.8167637473996379, + "learning_rate": 4.180715704639659e-06, + "loss": 0.3539, + "step": 963 + }, + { + "epoch": 0.28776119402985073, + "grad_norm": 0.802203149893345, + "learning_rate": 4.178925373501269e-06, + "loss": 0.3449, + "step": 964 + }, + { + "epoch": 0.2880597014925373, + "grad_norm": 0.8592664185496729, + "learning_rate": 4.17713347260858e-06, + "loss": 0.3828, + "step": 965 + }, + { + "epoch": 0.2883582089552239, + "grad_norm": 0.8304560933393987, + "learning_rate": 4.175340003636974e-06, + "loss": 0.3821, + "step": 966 + }, + { + "epoch": 0.28865671641791046, + "grad_norm": 0.9391386547612607, + "learning_rate": 4.173544968263301e-06, + "loss": 0.3846, + "step": 967 + }, + { + "epoch": 0.288955223880597, + "grad_norm": 0.8262795778364892, + "learning_rate": 4.171748368165875e-06, + "loss": 0.3383, + "step": 968 + }, + { + "epoch": 0.2892537313432836, + "grad_norm": 0.841522653230346, + "learning_rate": 4.169950205024474e-06, + "loss": 0.3355, + "step": 969 + }, + { + "epoch": 0.28955223880597014, + "grad_norm": 0.9913354298562964, + "learning_rate": 4.168150480520337e-06, + "loss": 0.419, + "step": 970 + }, + { + "epoch": 0.2898507462686567, + "grad_norm": 0.8071892265145763, + "learning_rate": 4.16634919633616e-06, + "loss": 0.3449, + "step": 971 + }, + { + "epoch": 0.29014925373134326, + "grad_norm": 0.8508150549887834, + "learning_rate": 4.164546354156104e-06, + "loss": 0.3672, + "step": 972 + }, + { + "epoch": 0.2904477611940299, + "grad_norm": 0.7899568543277075, + "learning_rate": 4.162741955665779e-06, + "loss": 0.3329, + "step": 973 + }, + { + "epoch": 0.29074626865671643, + "grad_norm": 0.80104254741407, + "learning_rate": 4.160936002552255e-06, + "loss": 0.3565, + "step": 974 + }, + { + "epoch": 0.291044776119403, + "grad_norm": 0.7742895075156045, + "learning_rate": 4.159128496504054e-06, + "loss": 0.3156, + "step": 975 + }, + { + "epoch": 0.29134328358208955, + "grad_norm": 0.8652205416649817, + "learning_rate": 4.157319439211151e-06, + "loss": 0.3567, + "step": 976 + }, + { + "epoch": 0.2916417910447761, + "grad_norm": 0.8552104838671654, + "learning_rate": 4.155508832364968e-06, + "loss": 0.316, + "step": 977 + }, + { + "epoch": 0.29194029850746267, + "grad_norm": 0.8757415112296788, + "learning_rate": 4.153696677658381e-06, + "loss": 0.3494, + "step": 978 + }, + { + "epoch": 0.29223880597014923, + "grad_norm": 0.8389906004653142, + "learning_rate": 4.151882976785709e-06, + "loss": 0.3291, + "step": 979 + }, + { + "epoch": 0.29253731343283584, + "grad_norm": 0.7800988745138925, + "learning_rate": 4.150067731442717e-06, + "loss": 0.3167, + "step": 980 + }, + { + "epoch": 0.2928358208955224, + "grad_norm": 0.8294199151333148, + "learning_rate": 4.148250943326619e-06, + "loss": 0.3623, + "step": 981 + }, + { + "epoch": 0.29313432835820896, + "grad_norm": 0.8578101736701281, + "learning_rate": 4.146432614136064e-06, + "loss": 0.307, + "step": 982 + }, + { + "epoch": 0.2934328358208955, + "grad_norm": 0.7856364156171273, + "learning_rate": 4.144612745571146e-06, + "loss": 0.3886, + "step": 983 + }, + { + "epoch": 0.2937313432835821, + "grad_norm": 0.8634192899478178, + "learning_rate": 4.1427913393333985e-06, + "loss": 0.3953, + "step": 984 + }, + { + "epoch": 0.29402985074626864, + "grad_norm": 0.8485105584492334, + "learning_rate": 4.140968397125793e-06, + "loss": 0.3629, + "step": 985 + }, + { + "epoch": 0.2943283582089552, + "grad_norm": 0.9742112909739822, + "learning_rate": 4.139143920652734e-06, + "loss": 0.3835, + "step": 986 + }, + { + "epoch": 0.2946268656716418, + "grad_norm": 0.9878802852692959, + "learning_rate": 4.137317911620063e-06, + "loss": 0.4003, + "step": 987 + }, + { + "epoch": 0.2949253731343284, + "grad_norm": 0.8815419197549593, + "learning_rate": 4.1354903717350556e-06, + "loss": 0.344, + "step": 988 + }, + { + "epoch": 0.29522388059701493, + "grad_norm": 0.834846080220852, + "learning_rate": 4.133661302706415e-06, + "loss": 0.3696, + "step": 989 + }, + { + "epoch": 0.2955223880597015, + "grad_norm": 0.845007866793147, + "learning_rate": 4.131830706244276e-06, + "loss": 0.3561, + "step": 990 + }, + { + "epoch": 0.29582089552238805, + "grad_norm": 0.8925684451459167, + "learning_rate": 4.129998584060204e-06, + "loss": 0.3805, + "step": 991 + }, + { + "epoch": 0.2961194029850746, + "grad_norm": 0.8371742760782053, + "learning_rate": 4.128164937867187e-06, + "loss": 0.3452, + "step": 992 + }, + { + "epoch": 0.29641791044776117, + "grad_norm": 0.8300330502109989, + "learning_rate": 4.12632976937964e-06, + "loss": 0.3614, + "step": 993 + }, + { + "epoch": 0.2967164179104478, + "grad_norm": 0.8341386461290142, + "learning_rate": 4.1244930803134e-06, + "loss": 0.3697, + "step": 994 + }, + { + "epoch": 0.29701492537313434, + "grad_norm": 0.9247230906706114, + "learning_rate": 4.122654872385726e-06, + "loss": 0.403, + "step": 995 + }, + { + "epoch": 0.2973134328358209, + "grad_norm": 0.815687144485872, + "learning_rate": 4.1208151473153e-06, + "loss": 0.3467, + "step": 996 + }, + { + "epoch": 0.29761194029850746, + "grad_norm": 0.9159383505804287, + "learning_rate": 4.118973906822218e-06, + "loss": 0.3492, + "step": 997 + }, + { + "epoch": 0.297910447761194, + "grad_norm": 0.8149117723593948, + "learning_rate": 4.117131152627996e-06, + "loss": 0.3371, + "step": 998 + }, + { + "epoch": 0.2982089552238806, + "grad_norm": 0.7227696644393177, + "learning_rate": 4.1152868864555626e-06, + "loss": 0.3277, + "step": 999 + }, + { + "epoch": 0.29850746268656714, + "grad_norm": 0.827257070561528, + "learning_rate": 4.113441110029265e-06, + "loss": 0.3227, + "step": 1000 + }, + { + "epoch": 0.29880597014925375, + "grad_norm": 0.8467214707843915, + "learning_rate": 4.111593825074856e-06, + "loss": 0.337, + "step": 1001 + }, + { + "epoch": 0.2991044776119403, + "grad_norm": 0.8920138785669369, + "learning_rate": 4.109745033319504e-06, + "loss": 0.3982, + "step": 1002 + }, + { + "epoch": 0.29940298507462687, + "grad_norm": 0.8027122577683616, + "learning_rate": 4.107894736491783e-06, + "loss": 0.3513, + "step": 1003 + }, + { + "epoch": 0.29970149253731343, + "grad_norm": 1.2167741273718207, + "learning_rate": 4.106042936321676e-06, + "loss": 0.3689, + "step": 1004 + }, + { + "epoch": 0.3, + "grad_norm": 0.8627016240432611, + "learning_rate": 4.10418963454057e-06, + "loss": 0.3495, + "step": 1005 + }, + { + "epoch": 0.30029850746268655, + "grad_norm": 0.8585195931421936, + "learning_rate": 4.10233483288126e-06, + "loss": 0.317, + "step": 1006 + }, + { + "epoch": 0.3005970149253731, + "grad_norm": 0.8429386990797031, + "learning_rate": 4.100478533077936e-06, + "loss": 0.3306, + "step": 1007 + }, + { + "epoch": 0.3008955223880597, + "grad_norm": 0.7972123992318357, + "learning_rate": 4.098620736866196e-06, + "loss": 0.3493, + "step": 1008 + }, + { + "epoch": 0.3011940298507463, + "grad_norm": 0.8661016284011678, + "learning_rate": 4.096761445983034e-06, + "loss": 0.3824, + "step": 1009 + }, + { + "epoch": 0.30149253731343284, + "grad_norm": 0.8703037201689672, + "learning_rate": 4.0949006621668405e-06, + "loss": 0.3902, + "step": 1010 + }, + { + "epoch": 0.3017910447761194, + "grad_norm": 0.9376297455888316, + "learning_rate": 4.093038387157404e-06, + "loss": 0.3609, + "step": 1011 + }, + { + "epoch": 0.30208955223880596, + "grad_norm": 0.7998064580888031, + "learning_rate": 4.091174622695906e-06, + "loss": 0.3598, + "step": 1012 + }, + { + "epoch": 0.3023880597014925, + "grad_norm": 0.8428236863313218, + "learning_rate": 4.089309370524921e-06, + "loss": 0.3676, + "step": 1013 + }, + { + "epoch": 0.30268656716417913, + "grad_norm": 0.8469734624648289, + "learning_rate": 4.087442632388413e-06, + "loss": 0.3791, + "step": 1014 + }, + { + "epoch": 0.3029850746268657, + "grad_norm": 0.8517774956533634, + "learning_rate": 4.085574410031739e-06, + "loss": 0.3745, + "step": 1015 + }, + { + "epoch": 0.30328358208955225, + "grad_norm": 0.8281651202406228, + "learning_rate": 4.083704705201639e-06, + "loss": 0.3557, + "step": 1016 + }, + { + "epoch": 0.3035820895522388, + "grad_norm": 0.9004766310930208, + "learning_rate": 4.081833519646242e-06, + "loss": 0.3408, + "step": 1017 + }, + { + "epoch": 0.30388059701492537, + "grad_norm": 0.7898205886854617, + "learning_rate": 4.0799608551150625e-06, + "loss": 0.37, + "step": 1018 + }, + { + "epoch": 0.30417910447761193, + "grad_norm": 0.8032659295487834, + "learning_rate": 4.078086713358994e-06, + "loss": 0.3506, + "step": 1019 + }, + { + "epoch": 0.3044776119402985, + "grad_norm": 0.9141743806902359, + "learning_rate": 4.076211096130316e-06, + "loss": 0.3704, + "step": 1020 + }, + { + "epoch": 0.3047761194029851, + "grad_norm": 0.8282796502709796, + "learning_rate": 4.074334005182682e-06, + "loss": 0.3571, + "step": 1021 + }, + { + "epoch": 0.30507462686567166, + "grad_norm": 0.8496527957007282, + "learning_rate": 4.072455442271128e-06, + "loss": 0.3163, + "step": 1022 + }, + { + "epoch": 0.3053731343283582, + "grad_norm": 0.8977935551607475, + "learning_rate": 4.070575409152064e-06, + "loss": 0.4037, + "step": 1023 + }, + { + "epoch": 0.3056716417910448, + "grad_norm": 0.8847365927783407, + "learning_rate": 4.068693907583276e-06, + "loss": 0.4017, + "step": 1024 + }, + { + "epoch": 0.30597014925373134, + "grad_norm": 0.7747661612730282, + "learning_rate": 4.06681093932392e-06, + "loss": 0.3397, + "step": 1025 + }, + { + "epoch": 0.3062686567164179, + "grad_norm": 0.8825845190947826, + "learning_rate": 4.064926506134528e-06, + "loss": 0.3298, + "step": 1026 + }, + { + "epoch": 0.30656716417910446, + "grad_norm": 0.8592497304478822, + "learning_rate": 4.063040609776998e-06, + "loss": 0.349, + "step": 1027 + }, + { + "epoch": 0.30686567164179107, + "grad_norm": 0.8612997055386207, + "learning_rate": 4.0611532520145965e-06, + "loss": 0.3972, + "step": 1028 + }, + { + "epoch": 0.30716417910447763, + "grad_norm": 0.928710595753594, + "learning_rate": 4.059264434611957e-06, + "loss": 0.3751, + "step": 1029 + }, + { + "epoch": 0.3074626865671642, + "grad_norm": 0.8652473522503297, + "learning_rate": 4.05737415933508e-06, + "loss": 0.3688, + "step": 1030 + }, + { + "epoch": 0.30776119402985075, + "grad_norm": 0.8978250178705632, + "learning_rate": 4.055482427951324e-06, + "loss": 0.3916, + "step": 1031 + }, + { + "epoch": 0.3080597014925373, + "grad_norm": 0.9090489721659124, + "learning_rate": 4.053589242229412e-06, + "loss": 0.354, + "step": 1032 + }, + { + "epoch": 0.30835820895522387, + "grad_norm": 0.8801900400550225, + "learning_rate": 4.051694603939429e-06, + "loss": 0.3504, + "step": 1033 + }, + { + "epoch": 0.3086567164179104, + "grad_norm": 0.8960014398639581, + "learning_rate": 4.049798514852812e-06, + "loss": 0.3423, + "step": 1034 + }, + { + "epoch": 0.30895522388059704, + "grad_norm": 0.798818576158037, + "learning_rate": 4.047900976742362e-06, + "loss": 0.3443, + "step": 1035 + }, + { + "epoch": 0.3092537313432836, + "grad_norm": 0.8059330952772015, + "learning_rate": 4.046001991382227e-06, + "loss": 0.3743, + "step": 1036 + }, + { + "epoch": 0.30955223880597016, + "grad_norm": 0.8226038835739496, + "learning_rate": 4.044101560547915e-06, + "loss": 0.3515, + "step": 1037 + }, + { + "epoch": 0.3098507462686567, + "grad_norm": 0.8230993349145885, + "learning_rate": 4.042199686016281e-06, + "loss": 0.3701, + "step": 1038 + }, + { + "epoch": 0.3101492537313433, + "grad_norm": 0.7911941573133365, + "learning_rate": 4.040296369565532e-06, + "loss": 0.3396, + "step": 1039 + }, + { + "epoch": 0.31044776119402984, + "grad_norm": 0.8712443502975892, + "learning_rate": 4.038391612975222e-06, + "loss": 0.3927, + "step": 1040 + }, + { + "epoch": 0.3107462686567164, + "grad_norm": 0.8233453064590726, + "learning_rate": 4.036485418026253e-06, + "loss": 0.3659, + "step": 1041 + }, + { + "epoch": 0.311044776119403, + "grad_norm": 0.8448815164890928, + "learning_rate": 4.034577786500869e-06, + "loss": 0.3512, + "step": 1042 + }, + { + "epoch": 0.31134328358208957, + "grad_norm": 0.8327563911984053, + "learning_rate": 4.0326687201826605e-06, + "loss": 0.3255, + "step": 1043 + }, + { + "epoch": 0.31164179104477613, + "grad_norm": 0.8220120310659925, + "learning_rate": 4.030758220856558e-06, + "loss": 0.3298, + "step": 1044 + }, + { + "epoch": 0.3119402985074627, + "grad_norm": 0.8028153864586365, + "learning_rate": 4.02884629030883e-06, + "loss": 0.3678, + "step": 1045 + }, + { + "epoch": 0.31223880597014925, + "grad_norm": 0.8349137515362423, + "learning_rate": 4.026932930327088e-06, + "loss": 0.3783, + "step": 1046 + }, + { + "epoch": 0.3125373134328358, + "grad_norm": 0.7766428030622539, + "learning_rate": 4.025018142700273e-06, + "loss": 0.2854, + "step": 1047 + }, + { + "epoch": 0.31283582089552237, + "grad_norm": 0.9182239119842932, + "learning_rate": 4.0231019292186685e-06, + "loss": 0.4211, + "step": 1048 + }, + { + "epoch": 0.313134328358209, + "grad_norm": 0.8518537676125332, + "learning_rate": 4.0211842916738855e-06, + "loss": 0.3695, + "step": 1049 + }, + { + "epoch": 0.31343283582089554, + "grad_norm": 0.7665539603822111, + "learning_rate": 4.019265231858869e-06, + "loss": 0.3235, + "step": 1050 + }, + { + "epoch": 0.3137313432835821, + "grad_norm": 0.7995496595040446, + "learning_rate": 4.017344751567892e-06, + "loss": 0.3384, + "step": 1051 + }, + { + "epoch": 0.31402985074626866, + "grad_norm": 0.7860027976534756, + "learning_rate": 4.0154228525965585e-06, + "loss": 0.2767, + "step": 1052 + }, + { + "epoch": 0.3143283582089552, + "grad_norm": 0.8580440875177763, + "learning_rate": 4.0134995367417965e-06, + "loss": 0.3525, + "step": 1053 + }, + { + "epoch": 0.3146268656716418, + "grad_norm": 0.9336926190628849, + "learning_rate": 4.011574805801858e-06, + "loss": 0.3394, + "step": 1054 + }, + { + "epoch": 0.31492537313432833, + "grad_norm": 0.8425176180393162, + "learning_rate": 4.009648661576321e-06, + "loss": 0.3511, + "step": 1055 + }, + { + "epoch": 0.31522388059701495, + "grad_norm": 0.8276026034880272, + "learning_rate": 4.007721105866084e-06, + "loss": 0.3602, + "step": 1056 + }, + { + "epoch": 0.3155223880597015, + "grad_norm": 0.787475800014448, + "learning_rate": 4.005792140473363e-06, + "loss": 0.3188, + "step": 1057 + }, + { + "epoch": 0.31582089552238807, + "grad_norm": 0.8880428207150297, + "learning_rate": 4.003861767201695e-06, + "loss": 0.3342, + "step": 1058 + }, + { + "epoch": 0.3161194029850746, + "grad_norm": 0.8017106892627521, + "learning_rate": 4.001929987855931e-06, + "loss": 0.3502, + "step": 1059 + }, + { + "epoch": 0.3164179104477612, + "grad_norm": 0.7848482690732528, + "learning_rate": 3.999996804242238e-06, + "loss": 0.3337, + "step": 1060 + }, + { + "epoch": 0.31671641791044775, + "grad_norm": 0.8099722326612471, + "learning_rate": 3.998062218168096e-06, + "loss": 0.3358, + "step": 1061 + }, + { + "epoch": 0.3170149253731343, + "grad_norm": 0.8230124063451144, + "learning_rate": 3.996126231442295e-06, + "loss": 0.3732, + "step": 1062 + }, + { + "epoch": 0.3173134328358209, + "grad_norm": 0.7714716419614884, + "learning_rate": 3.994188845874936e-06, + "loss": 0.3289, + "step": 1063 + }, + { + "epoch": 0.3176119402985075, + "grad_norm": 0.7922647776658421, + "learning_rate": 3.992250063277427e-06, + "loss": 0.3154, + "step": 1064 + }, + { + "epoch": 0.31791044776119404, + "grad_norm": 0.8044498494994091, + "learning_rate": 3.990309885462481e-06, + "loss": 0.3378, + "step": 1065 + }, + { + "epoch": 0.3182089552238806, + "grad_norm": 0.8820721478686536, + "learning_rate": 3.98836831424412e-06, + "loss": 0.3511, + "step": 1066 + }, + { + "epoch": 0.31850746268656716, + "grad_norm": 0.8572321551754879, + "learning_rate": 3.9864253514376634e-06, + "loss": 0.3646, + "step": 1067 + }, + { + "epoch": 0.3188059701492537, + "grad_norm": 0.8802364307844034, + "learning_rate": 3.9844809988597355e-06, + "loss": 0.3448, + "step": 1068 + }, + { + "epoch": 0.3191044776119403, + "grad_norm": 0.8291543922384458, + "learning_rate": 3.9825352583282585e-06, + "loss": 0.3219, + "step": 1069 + }, + { + "epoch": 0.3194029850746269, + "grad_norm": 0.8993675406838366, + "learning_rate": 3.980588131662451e-06, + "loss": 0.4098, + "step": 1070 + }, + { + "epoch": 0.31970149253731345, + "grad_norm": 0.8157166363381144, + "learning_rate": 3.978639620682829e-06, + "loss": 0.3402, + "step": 1071 + }, + { + "epoch": 0.32, + "grad_norm": 0.8073929451435011, + "learning_rate": 3.976689727211205e-06, + "loss": 0.3802, + "step": 1072 + }, + { + "epoch": 0.32029850746268657, + "grad_norm": 0.8488047650093633, + "learning_rate": 3.97473845307068e-06, + "loss": 0.3231, + "step": 1073 + }, + { + "epoch": 0.3205970149253731, + "grad_norm": 0.8608697980068266, + "learning_rate": 3.972785800085647e-06, + "loss": 0.3523, + "step": 1074 + }, + { + "epoch": 0.3208955223880597, + "grad_norm": 0.8021106185518122, + "learning_rate": 3.970831770081791e-06, + "loss": 0.3536, + "step": 1075 + }, + { + "epoch": 0.32119402985074624, + "grad_norm": 0.8796976566955086, + "learning_rate": 3.968876364886082e-06, + "loss": 0.3275, + "step": 1076 + }, + { + "epoch": 0.32149253731343286, + "grad_norm": 0.8058440193793543, + "learning_rate": 3.966919586326775e-06, + "loss": 0.375, + "step": 1077 + }, + { + "epoch": 0.3217910447761194, + "grad_norm": 0.8775522173367613, + "learning_rate": 3.964961436233412e-06, + "loss": 0.3588, + "step": 1078 + }, + { + "epoch": 0.322089552238806, + "grad_norm": 0.9098939150341124, + "learning_rate": 3.963001916436814e-06, + "loss": 0.3587, + "step": 1079 + }, + { + "epoch": 0.32238805970149254, + "grad_norm": 0.8473555634681235, + "learning_rate": 3.961041028769085e-06, + "loss": 0.3376, + "step": 1080 + }, + { + "epoch": 0.3226865671641791, + "grad_norm": 0.9150706720781269, + "learning_rate": 3.959078775063607e-06, + "loss": 0.3745, + "step": 1081 + }, + { + "epoch": 0.32298507462686565, + "grad_norm": 0.9386215098514821, + "learning_rate": 3.95711515715504e-06, + "loss": 0.3778, + "step": 1082 + }, + { + "epoch": 0.3232835820895522, + "grad_norm": 0.7483070851000795, + "learning_rate": 3.955150176879316e-06, + "loss": 0.3397, + "step": 1083 + }, + { + "epoch": 0.3235820895522388, + "grad_norm": 0.6912249624197083, + "learning_rate": 3.953183836073649e-06, + "loss": 0.3304, + "step": 1084 + }, + { + "epoch": 0.3238805970149254, + "grad_norm": 0.8348347584691441, + "learning_rate": 3.951216136576515e-06, + "loss": 0.3316, + "step": 1085 + }, + { + "epoch": 0.32417910447761195, + "grad_norm": 0.8690430526369205, + "learning_rate": 3.949247080227666e-06, + "loss": 0.3461, + "step": 1086 + }, + { + "epoch": 0.3244776119402985, + "grad_norm": 0.8050979365143089, + "learning_rate": 3.947276668868124e-06, + "loss": 0.3642, + "step": 1087 + }, + { + "epoch": 0.32477611940298506, + "grad_norm": 0.7662634270514135, + "learning_rate": 3.945304904340174e-06, + "loss": 0.3445, + "step": 1088 + }, + { + "epoch": 0.3250746268656716, + "grad_norm": 0.84107358699445, + "learning_rate": 3.943331788487366e-06, + "loss": 0.358, + "step": 1089 + }, + { + "epoch": 0.3253731343283582, + "grad_norm": 0.824621628564205, + "learning_rate": 3.941357323154519e-06, + "loss": 0.2967, + "step": 1090 + }, + { + "epoch": 0.3256716417910448, + "grad_norm": 0.806684118104856, + "learning_rate": 3.9393815101877076e-06, + "loss": 0.3335, + "step": 1091 + }, + { + "epoch": 0.32597014925373136, + "grad_norm": 0.8342281660142485, + "learning_rate": 3.937404351434269e-06, + "loss": 0.3748, + "step": 1092 + }, + { + "epoch": 0.3262686567164179, + "grad_norm": 0.873757164412416, + "learning_rate": 3.935425848742797e-06, + "loss": 0.3692, + "step": 1093 + }, + { + "epoch": 0.3265671641791045, + "grad_norm": 0.7814545073329886, + "learning_rate": 3.933446003963147e-06, + "loss": 0.3294, + "step": 1094 + }, + { + "epoch": 0.32686567164179103, + "grad_norm": 0.8482386135188483, + "learning_rate": 3.9314648189464226e-06, + "loss": 0.3259, + "step": 1095 + }, + { + "epoch": 0.3271641791044776, + "grad_norm": 0.8036333351717462, + "learning_rate": 3.929482295544985e-06, + "loss": 0.3496, + "step": 1096 + }, + { + "epoch": 0.32746268656716415, + "grad_norm": 0.9025969332953864, + "learning_rate": 3.927498435612444e-06, + "loss": 0.3823, + "step": 1097 + }, + { + "epoch": 0.32776119402985077, + "grad_norm": 0.8330683787016878, + "learning_rate": 3.925513241003663e-06, + "loss": 0.3654, + "step": 1098 + }, + { + "epoch": 0.3280597014925373, + "grad_norm": 0.9303082381552658, + "learning_rate": 3.923526713574747e-06, + "loss": 0.3674, + "step": 1099 + }, + { + "epoch": 0.3283582089552239, + "grad_norm": 1.181233124032353, + "learning_rate": 3.921538855183053e-06, + "loss": 0.364, + "step": 1100 + }, + { + "epoch": 0.32865671641791044, + "grad_norm": 0.8588441469224205, + "learning_rate": 3.919549667687178e-06, + "loss": 0.3635, + "step": 1101 + }, + { + "epoch": 0.328955223880597, + "grad_norm": 0.7804288737599779, + "learning_rate": 3.917559152946966e-06, + "loss": 0.3459, + "step": 1102 + }, + { + "epoch": 0.32925373134328356, + "grad_norm": 0.9027645900655169, + "learning_rate": 3.9155673128235e-06, + "loss": 0.3516, + "step": 1103 + }, + { + "epoch": 0.3295522388059702, + "grad_norm": 0.8393324136480259, + "learning_rate": 3.9135741491791e-06, + "loss": 0.3685, + "step": 1104 + }, + { + "epoch": 0.32985074626865674, + "grad_norm": 0.7473370868680391, + "learning_rate": 3.9115796638773275e-06, + "loss": 0.2815, + "step": 1105 + }, + { + "epoch": 0.3301492537313433, + "grad_norm": 0.8919025147790259, + "learning_rate": 3.9095838587829756e-06, + "loss": 0.3857, + "step": 1106 + }, + { + "epoch": 0.33044776119402985, + "grad_norm": 0.817241418807688, + "learning_rate": 3.907586735762074e-06, + "loss": 0.3732, + "step": 1107 + }, + { + "epoch": 0.3307462686567164, + "grad_norm": 0.9218470406185262, + "learning_rate": 3.9055882966818855e-06, + "loss": 0.3376, + "step": 1108 + }, + { + "epoch": 0.331044776119403, + "grad_norm": 0.8177748051426704, + "learning_rate": 3.9035885434109014e-06, + "loss": 0.3184, + "step": 1109 + }, + { + "epoch": 0.33134328358208953, + "grad_norm": 0.7164318821917856, + "learning_rate": 3.90158747781884e-06, + "loss": 0.3142, + "step": 1110 + }, + { + "epoch": 0.33164179104477615, + "grad_norm": 0.8400280769287423, + "learning_rate": 3.899585101776652e-06, + "loss": 0.302, + "step": 1111 + }, + { + "epoch": 0.3319402985074627, + "grad_norm": 0.7773008415105561, + "learning_rate": 3.8975814171565075e-06, + "loss": 0.3799, + "step": 1112 + }, + { + "epoch": 0.33223880597014926, + "grad_norm": 0.897234720453567, + "learning_rate": 3.895576425831805e-06, + "loss": 0.3883, + "step": 1113 + }, + { + "epoch": 0.3325373134328358, + "grad_norm": 0.8625016931833325, + "learning_rate": 3.893570129677161e-06, + "loss": 0.337, + "step": 1114 + }, + { + "epoch": 0.3328358208955224, + "grad_norm": 0.8269153711097461, + "learning_rate": 3.8915625305684145e-06, + "loss": 0.3452, + "step": 1115 + }, + { + "epoch": 0.33313432835820894, + "grad_norm": 0.8632059531163434, + "learning_rate": 3.889553630382621e-06, + "loss": 0.3655, + "step": 1116 + }, + { + "epoch": 0.3334328358208955, + "grad_norm": 0.7706052114027439, + "learning_rate": 3.8875434309980545e-06, + "loss": 0.3209, + "step": 1117 + }, + { + "epoch": 0.3337313432835821, + "grad_norm": 0.8172417996393967, + "learning_rate": 3.8855319342942e-06, + "loss": 0.3588, + "step": 1118 + }, + { + "epoch": 0.3340298507462687, + "grad_norm": 0.7457140782960014, + "learning_rate": 3.883519142151761e-06, + "loss": 0.3206, + "step": 1119 + }, + { + "epoch": 0.33432835820895523, + "grad_norm": 0.9075055402661403, + "learning_rate": 3.881505056452646e-06, + "loss": 0.388, + "step": 1120 + }, + { + "epoch": 0.3346268656716418, + "grad_norm": 0.8633987237770478, + "learning_rate": 3.879489679079977e-06, + "loss": 0.3335, + "step": 1121 + }, + { + "epoch": 0.33492537313432835, + "grad_norm": 0.8015175637999997, + "learning_rate": 3.877473011918084e-06, + "loss": 0.3608, + "step": 1122 + }, + { + "epoch": 0.3352238805970149, + "grad_norm": 0.9650469277739403, + "learning_rate": 3.8754550568525006e-06, + "loss": 0.3826, + "step": 1123 + }, + { + "epoch": 0.33552238805970147, + "grad_norm": 0.8531369547701335, + "learning_rate": 3.8734358157699666e-06, + "loss": 0.3907, + "step": 1124 + }, + { + "epoch": 0.3358208955223881, + "grad_norm": 0.8203181528488143, + "learning_rate": 3.871415290558421e-06, + "loss": 0.341, + "step": 1125 + }, + { + "epoch": 0.33611940298507464, + "grad_norm": 0.9506865101073372, + "learning_rate": 3.869393483107008e-06, + "loss": 0.3766, + "step": 1126 + }, + { + "epoch": 0.3364179104477612, + "grad_norm": 0.7959518196575798, + "learning_rate": 3.8673703953060685e-06, + "loss": 0.3215, + "step": 1127 + }, + { + "epoch": 0.33671641791044776, + "grad_norm": 0.9587562044134879, + "learning_rate": 3.865346029047137e-06, + "loss": 0.425, + "step": 1128 + }, + { + "epoch": 0.3370149253731343, + "grad_norm": 0.7933800883348958, + "learning_rate": 3.863320386222949e-06, + "loss": 0.321, + "step": 1129 + }, + { + "epoch": 0.3373134328358209, + "grad_norm": 0.7786165000499077, + "learning_rate": 3.861293468727432e-06, + "loss": 0.3249, + "step": 1130 + }, + { + "epoch": 0.33761194029850744, + "grad_norm": 0.9211593511912554, + "learning_rate": 3.859265278455701e-06, + "loss": 0.3879, + "step": 1131 + }, + { + "epoch": 0.33791044776119405, + "grad_norm": 0.86664700650382, + "learning_rate": 3.8572358173040695e-06, + "loss": 0.3119, + "step": 1132 + }, + { + "epoch": 0.3382089552238806, + "grad_norm": 0.8560710542669949, + "learning_rate": 3.85520508717003e-06, + "loss": 0.3335, + "step": 1133 + }, + { + "epoch": 0.3385074626865672, + "grad_norm": 0.9378308935231966, + "learning_rate": 3.853173089952268e-06, + "loss": 0.3437, + "step": 1134 + }, + { + "epoch": 0.33880597014925373, + "grad_norm": 0.9021474935248037, + "learning_rate": 3.85113982755065e-06, + "loss": 0.3441, + "step": 1135 + }, + { + "epoch": 0.3391044776119403, + "grad_norm": 0.8505218970552499, + "learning_rate": 3.849105301866227e-06, + "loss": 0.3595, + "step": 1136 + }, + { + "epoch": 0.33940298507462685, + "grad_norm": 1.0019110671578144, + "learning_rate": 3.847069514801232e-06, + "loss": 0.348, + "step": 1137 + }, + { + "epoch": 0.3397014925373134, + "grad_norm": 0.896112478403269, + "learning_rate": 3.845032468259075e-06, + "loss": 0.3562, + "step": 1138 + }, + { + "epoch": 0.34, + "grad_norm": 0.8272485945752788, + "learning_rate": 3.842994164144346e-06, + "loss": 0.3216, + "step": 1139 + }, + { + "epoch": 0.3402985074626866, + "grad_norm": 0.7890421842215967, + "learning_rate": 3.840954604362809e-06, + "loss": 0.3341, + "step": 1140 + }, + { + "epoch": 0.34059701492537314, + "grad_norm": 0.8688050179607455, + "learning_rate": 3.838913790821402e-06, + "loss": 0.3232, + "step": 1141 + }, + { + "epoch": 0.3408955223880597, + "grad_norm": 1.0530125896366147, + "learning_rate": 3.8368717254282364e-06, + "loss": 0.389, + "step": 1142 + }, + { + "epoch": 0.34119402985074626, + "grad_norm": 0.9015300393082843, + "learning_rate": 3.834828410092595e-06, + "loss": 0.3431, + "step": 1143 + }, + { + "epoch": 0.3414925373134328, + "grad_norm": 0.8299009904531546, + "learning_rate": 3.8327838467249255e-06, + "loss": 0.3644, + "step": 1144 + }, + { + "epoch": 0.3417910447761194, + "grad_norm": 0.8043231321701094, + "learning_rate": 3.830738037236848e-06, + "loss": 0.3477, + "step": 1145 + }, + { + "epoch": 0.342089552238806, + "grad_norm": 0.753965995052167, + "learning_rate": 3.82869098354114e-06, + "loss": 0.3473, + "step": 1146 + }, + { + "epoch": 0.34238805970149255, + "grad_norm": 0.7796417555071672, + "learning_rate": 3.826642687551751e-06, + "loss": 0.3402, + "step": 1147 + }, + { + "epoch": 0.3426865671641791, + "grad_norm": 0.8401115556379568, + "learning_rate": 3.824593151183785e-06, + "loss": 0.3521, + "step": 1148 + }, + { + "epoch": 0.34298507462686567, + "grad_norm": 0.8843481013387703, + "learning_rate": 3.82254237635351e-06, + "loss": 0.3826, + "step": 1149 + }, + { + "epoch": 0.34328358208955223, + "grad_norm": 0.8732317874736554, + "learning_rate": 3.82049036497835e-06, + "loss": 0.3005, + "step": 1150 + }, + { + "epoch": 0.3435820895522388, + "grad_norm": 0.8189473191628437, + "learning_rate": 3.8184371189768855e-06, + "loss": 0.3464, + "step": 1151 + }, + { + "epoch": 0.34388059701492535, + "grad_norm": 1.107894296285839, + "learning_rate": 3.816382640268852e-06, + "loss": 0.3991, + "step": 1152 + }, + { + "epoch": 0.34417910447761196, + "grad_norm": 0.8200875925736727, + "learning_rate": 3.8143269307751373e-06, + "loss": 0.3489, + "step": 1153 + }, + { + "epoch": 0.3444776119402985, + "grad_norm": 0.9460453425867347, + "learning_rate": 3.8122699924177786e-06, + "loss": 0.3836, + "step": 1154 + }, + { + "epoch": 0.3447761194029851, + "grad_norm": 0.772503033014387, + "learning_rate": 3.8102118271199638e-06, + "loss": 0.3285, + "step": 1155 + }, + { + "epoch": 0.34507462686567164, + "grad_norm": 0.8815073767971605, + "learning_rate": 3.8081524368060273e-06, + "loss": 0.3988, + "step": 1156 + }, + { + "epoch": 0.3453731343283582, + "grad_norm": 0.8885895081542389, + "learning_rate": 3.806091823401448e-06, + "loss": 0.3049, + "step": 1157 + }, + { + "epoch": 0.34567164179104476, + "grad_norm": 0.895020437102532, + "learning_rate": 3.80402998883285e-06, + "loss": 0.4085, + "step": 1158 + }, + { + "epoch": 0.3459701492537313, + "grad_norm": 0.9171169870830792, + "learning_rate": 3.8019669350279985e-06, + "loss": 0.3629, + "step": 1159 + }, + { + "epoch": 0.34626865671641793, + "grad_norm": 0.8081376377551374, + "learning_rate": 3.7999026639157983e-06, + "loss": 0.3319, + "step": 1160 + }, + { + "epoch": 0.3465671641791045, + "grad_norm": 0.8402653067564256, + "learning_rate": 3.797837177426292e-06, + "loss": 0.3931, + "step": 1161 + }, + { + "epoch": 0.34686567164179105, + "grad_norm": 0.837482981106821, + "learning_rate": 3.79577047749066e-06, + "loss": 0.3157, + "step": 1162 + }, + { + "epoch": 0.3471641791044776, + "grad_norm": 0.7786230841490198, + "learning_rate": 3.793702566041216e-06, + "loss": 0.3587, + "step": 1163 + }, + { + "epoch": 0.34746268656716417, + "grad_norm": 0.8720932882663056, + "learning_rate": 3.7916334450114073e-06, + "loss": 0.3514, + "step": 1164 + }, + { + "epoch": 0.34776119402985073, + "grad_norm": 0.8359944457321459, + "learning_rate": 3.7895631163358106e-06, + "loss": 0.345, + "step": 1165 + }, + { + "epoch": 0.3480597014925373, + "grad_norm": 0.8019229336175024, + "learning_rate": 3.787491581950133e-06, + "loss": 0.3482, + "step": 1166 + }, + { + "epoch": 0.3483582089552239, + "grad_norm": 0.8820518367174046, + "learning_rate": 3.7854188437912097e-06, + "loss": 0.3733, + "step": 1167 + }, + { + "epoch": 0.34865671641791046, + "grad_norm": 0.8403047783123814, + "learning_rate": 3.783344903796999e-06, + "loss": 0.3402, + "step": 1168 + }, + { + "epoch": 0.348955223880597, + "grad_norm": 0.7415680720483135, + "learning_rate": 3.7812697639065843e-06, + "loss": 0.3043, + "step": 1169 + }, + { + "epoch": 0.3492537313432836, + "grad_norm": 0.743125129225478, + "learning_rate": 3.779193426060172e-06, + "loss": 0.322, + "step": 1170 + }, + { + "epoch": 0.34955223880597014, + "grad_norm": 0.8684344515884909, + "learning_rate": 3.7771158921990865e-06, + "loss": 0.4043, + "step": 1171 + }, + { + "epoch": 0.3498507462686567, + "grad_norm": 0.8825012733228704, + "learning_rate": 3.7750371642657722e-06, + "loss": 0.3566, + "step": 1172 + }, + { + "epoch": 0.35014925373134326, + "grad_norm": 0.8095109148739124, + "learning_rate": 3.7729572442037877e-06, + "loss": 0.3221, + "step": 1173 + }, + { + "epoch": 0.35044776119402987, + "grad_norm": 0.8568019262787748, + "learning_rate": 3.7708761339578082e-06, + "loss": 0.3464, + "step": 1174 + }, + { + "epoch": 0.35074626865671643, + "grad_norm": 0.8431627633291477, + "learning_rate": 3.768793835473622e-06, + "loss": 0.3341, + "step": 1175 + }, + { + "epoch": 0.351044776119403, + "grad_norm": 0.8852857190511301, + "learning_rate": 3.766710350698125e-06, + "loss": 0.4152, + "step": 1176 + }, + { + "epoch": 0.35134328358208955, + "grad_norm": 0.85299125587319, + "learning_rate": 3.764625681579327e-06, + "loss": 0.3475, + "step": 1177 + }, + { + "epoch": 0.3516417910447761, + "grad_norm": 0.9703649850814429, + "learning_rate": 3.762539830066343e-06, + "loss": 0.3788, + "step": 1178 + }, + { + "epoch": 0.35194029850746267, + "grad_norm": 0.8714521980948533, + "learning_rate": 3.760452798109391e-06, + "loss": 0.388, + "step": 1179 + }, + { + "epoch": 0.3522388059701492, + "grad_norm": 0.7822951433929752, + "learning_rate": 3.758364587659796e-06, + "loss": 0.3682, + "step": 1180 + }, + { + "epoch": 0.35253731343283584, + "grad_norm": 0.785928197254092, + "learning_rate": 3.756275200669986e-06, + "loss": 0.3522, + "step": 1181 + }, + { + "epoch": 0.3528358208955224, + "grad_norm": 0.8654717177672067, + "learning_rate": 3.754184639093484e-06, + "loss": 0.3752, + "step": 1182 + }, + { + "epoch": 0.35313432835820896, + "grad_norm": 0.8035897529969991, + "learning_rate": 3.7520929048849154e-06, + "loss": 0.3194, + "step": 1183 + }, + { + "epoch": 0.3534328358208955, + "grad_norm": 0.8793098053746985, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.3511, + "step": 1184 + }, + { + "epoch": 0.3537313432835821, + "grad_norm": 0.9325267349782886, + "learning_rate": 3.747905926395554e-06, + "loss": 0.3763, + "step": 1185 + }, + { + "epoch": 0.35402985074626864, + "grad_norm": 0.948428300474812, + "learning_rate": 3.745810686029485e-06, + "loss": 0.3896, + "step": 1186 + }, + { + "epoch": 0.3543283582089552, + "grad_norm": 0.7393311008440809, + "learning_rate": 3.743714280860791e-06, + "loss": 0.3037, + "step": 1187 + }, + { + "epoch": 0.3546268656716418, + "grad_norm": 0.8041532895009262, + "learning_rate": 3.7416167128495596e-06, + "loss": 0.3509, + "step": 1188 + }, + { + "epoch": 0.35492537313432837, + "grad_norm": 0.7550348359422494, + "learning_rate": 3.7395179839569656e-06, + "loss": 0.3718, + "step": 1189 + }, + { + "epoch": 0.35522388059701493, + "grad_norm": 0.8634479052711203, + "learning_rate": 3.7374180961452704e-06, + "loss": 0.355, + "step": 1190 + }, + { + "epoch": 0.3555223880597015, + "grad_norm": 0.7576581623194972, + "learning_rate": 3.735317051377818e-06, + "loss": 0.3351, + "step": 1191 + }, + { + "epoch": 0.35582089552238805, + "grad_norm": 0.8046261548713685, + "learning_rate": 3.7332148516190327e-06, + "loss": 0.358, + "step": 1192 + }, + { + "epoch": 0.3561194029850746, + "grad_norm": 0.8406094996665042, + "learning_rate": 3.731111498834421e-06, + "loss": 0.4038, + "step": 1193 + }, + { + "epoch": 0.3564179104477612, + "grad_norm": 0.7827745650131553, + "learning_rate": 3.7290069949905665e-06, + "loss": 0.317, + "step": 1194 + }, + { + "epoch": 0.3567164179104478, + "grad_norm": 0.8857669556458677, + "learning_rate": 3.7269013420551286e-06, + "loss": 0.3502, + "step": 1195 + }, + { + "epoch": 0.35701492537313434, + "grad_norm": 0.9284361811395723, + "learning_rate": 3.7247945419968416e-06, + "loss": 0.3782, + "step": 1196 + }, + { + "epoch": 0.3573134328358209, + "grad_norm": 0.8341033368677571, + "learning_rate": 3.722686596785513e-06, + "loss": 0.3861, + "step": 1197 + }, + { + "epoch": 0.35761194029850746, + "grad_norm": 0.9798340588994884, + "learning_rate": 3.720577508392018e-06, + "loss": 0.3544, + "step": 1198 + }, + { + "epoch": 0.357910447761194, + "grad_norm": 0.831648222529357, + "learning_rate": 3.7184672787883058e-06, + "loss": 0.3828, + "step": 1199 + }, + { + "epoch": 0.3582089552238806, + "grad_norm": 0.8402477136138807, + "learning_rate": 3.7163559099473874e-06, + "loss": 0.339, + "step": 1200 + }, + { + "epoch": 0.3585074626865672, + "grad_norm": 0.7486776097613386, + "learning_rate": 3.7142434038433415e-06, + "loss": 0.3296, + "step": 1201 + }, + { + "epoch": 0.35880597014925375, + "grad_norm": 0.8479281742963486, + "learning_rate": 3.7121297624513108e-06, + "loss": 0.3301, + "step": 1202 + }, + { + "epoch": 0.3591044776119403, + "grad_norm": 0.9005107528987859, + "learning_rate": 3.710014987747498e-06, + "loss": 0.4088, + "step": 1203 + }, + { + "epoch": 0.35940298507462687, + "grad_norm": 0.8059051563499257, + "learning_rate": 3.707899081709165e-06, + "loss": 0.3517, + "step": 1204 + }, + { + "epoch": 0.3597014925373134, + "grad_norm": 0.8552050817452692, + "learning_rate": 3.705782046314634e-06, + "loss": 0.3445, + "step": 1205 + }, + { + "epoch": 0.36, + "grad_norm": 0.9019044436471652, + "learning_rate": 3.7036638835432803e-06, + "loss": 0.3489, + "step": 1206 + }, + { + "epoch": 0.36029850746268655, + "grad_norm": 0.843202812648593, + "learning_rate": 3.7015445953755347e-06, + "loss": 0.3417, + "step": 1207 + }, + { + "epoch": 0.36059701492537316, + "grad_norm": 0.7443346275252014, + "learning_rate": 3.6994241837928803e-06, + "loss": 0.3425, + "step": 1208 + }, + { + "epoch": 0.3608955223880597, + "grad_norm": 0.8737220707930936, + "learning_rate": 3.6973026507778497e-06, + "loss": 0.3388, + "step": 1209 + }, + { + "epoch": 0.3611940298507463, + "grad_norm": 0.8319570014436627, + "learning_rate": 3.695179998314026e-06, + "loss": 0.3783, + "step": 1210 + }, + { + "epoch": 0.36149253731343284, + "grad_norm": 1.0254313443105356, + "learning_rate": 3.6930562283860356e-06, + "loss": 0.3262, + "step": 1211 + }, + { + "epoch": 0.3617910447761194, + "grad_norm": 0.8868819070775161, + "learning_rate": 3.690931342979552e-06, + "loss": 0.3693, + "step": 1212 + }, + { + "epoch": 0.36208955223880596, + "grad_norm": 0.8512110198615549, + "learning_rate": 3.6888053440812928e-06, + "loss": 0.3534, + "step": 1213 + }, + { + "epoch": 0.3623880597014925, + "grad_norm": 0.9606477539687734, + "learning_rate": 3.6866782336790137e-06, + "loss": 0.335, + "step": 1214 + }, + { + "epoch": 0.36268656716417913, + "grad_norm": 0.8299706662665464, + "learning_rate": 3.684550013761511e-06, + "loss": 0.3069, + "step": 1215 + }, + { + "epoch": 0.3629850746268657, + "grad_norm": 0.8583028517456109, + "learning_rate": 3.6824206863186195e-06, + "loss": 0.3534, + "step": 1216 + }, + { + "epoch": 0.36328358208955225, + "grad_norm": 0.795689265721912, + "learning_rate": 3.680290253341207e-06, + "loss": 0.3217, + "step": 1217 + }, + { + "epoch": 0.3635820895522388, + "grad_norm": 0.8155242986949494, + "learning_rate": 3.6781587168211785e-06, + "loss": 0.3139, + "step": 1218 + }, + { + "epoch": 0.36388059701492537, + "grad_norm": 0.9284470749372372, + "learning_rate": 3.676026078751466e-06, + "loss": 0.3994, + "step": 1219 + }, + { + "epoch": 0.3641791044776119, + "grad_norm": 0.7846735459117187, + "learning_rate": 3.673892341126036e-06, + "loss": 0.42, + "step": 1220 + }, + { + "epoch": 0.3644776119402985, + "grad_norm": 0.7394437630997585, + "learning_rate": 3.6717575059398818e-06, + "loss": 0.3425, + "step": 1221 + }, + { + "epoch": 0.3647761194029851, + "grad_norm": 0.8410128872441591, + "learning_rate": 3.66962157518902e-06, + "loss": 0.309, + "step": 1222 + }, + { + "epoch": 0.36507462686567166, + "grad_norm": 0.7127446629430065, + "learning_rate": 3.6674845508704954e-06, + "loss": 0.3246, + "step": 1223 + }, + { + "epoch": 0.3653731343283582, + "grad_norm": 0.8303226055174142, + "learning_rate": 3.665346434982373e-06, + "loss": 0.3568, + "step": 1224 + }, + { + "epoch": 0.3656716417910448, + "grad_norm": 0.8759974317217685, + "learning_rate": 3.6632072295237385e-06, + "loss": 0.3725, + "step": 1225 + }, + { + "epoch": 0.36597014925373134, + "grad_norm": 0.9100058785675172, + "learning_rate": 3.6610669364946993e-06, + "loss": 0.3733, + "step": 1226 + }, + { + "epoch": 0.3662686567164179, + "grad_norm": 0.8872689350119385, + "learning_rate": 3.6589255578963744e-06, + "loss": 0.3834, + "step": 1227 + }, + { + "epoch": 0.36656716417910445, + "grad_norm": 0.859809747889946, + "learning_rate": 3.656783095730902e-06, + "loss": 0.3207, + "step": 1228 + }, + { + "epoch": 0.36686567164179107, + "grad_norm": 0.743367008555944, + "learning_rate": 3.6546395520014324e-06, + "loss": 0.3311, + "step": 1229 + }, + { + "epoch": 0.36716417910447763, + "grad_norm": 0.8284032271379078, + "learning_rate": 3.6524949287121247e-06, + "loss": 0.3271, + "step": 1230 + }, + { + "epoch": 0.3674626865671642, + "grad_norm": 0.9040431058647576, + "learning_rate": 3.650349227868151e-06, + "loss": 0.3805, + "step": 1231 + }, + { + "epoch": 0.36776119402985075, + "grad_norm": 0.788723325583723, + "learning_rate": 3.6482024514756905e-06, + "loss": 0.3369, + "step": 1232 + }, + { + "epoch": 0.3680597014925373, + "grad_norm": 0.8292299897136874, + "learning_rate": 3.646054601541924e-06, + "loss": 0.3526, + "step": 1233 + }, + { + "epoch": 0.36835820895522386, + "grad_norm": 0.7780280218539215, + "learning_rate": 3.6439056800750406e-06, + "loss": 0.3404, + "step": 1234 + }, + { + "epoch": 0.3686567164179104, + "grad_norm": 0.8640955670674403, + "learning_rate": 3.641755689084229e-06, + "loss": 0.359, + "step": 1235 + }, + { + "epoch": 0.36895522388059704, + "grad_norm": 0.9296153051752855, + "learning_rate": 3.6396046305796783e-06, + "loss": 0.3492, + "step": 1236 + }, + { + "epoch": 0.3692537313432836, + "grad_norm": 0.8539806741089728, + "learning_rate": 3.6374525065725763e-06, + "loss": 0.3732, + "step": 1237 + }, + { + "epoch": 0.36955223880597016, + "grad_norm": 0.8791568970858674, + "learning_rate": 3.635299319075106e-06, + "loss": 0.3471, + "step": 1238 + }, + { + "epoch": 0.3698507462686567, + "grad_norm": 0.8410283775191358, + "learning_rate": 3.6331450701004444e-06, + "loss": 0.3714, + "step": 1239 + }, + { + "epoch": 0.3701492537313433, + "grad_norm": 0.8135682137125765, + "learning_rate": 3.6309897616627644e-06, + "loss": 0.3211, + "step": 1240 + }, + { + "epoch": 0.37044776119402983, + "grad_norm": 1.0986621561108818, + "learning_rate": 3.628833395777224e-06, + "loss": 0.3671, + "step": 1241 + }, + { + "epoch": 0.3707462686567164, + "grad_norm": 0.8336925612419427, + "learning_rate": 3.626675974459974e-06, + "loss": 0.3488, + "step": 1242 + }, + { + "epoch": 0.371044776119403, + "grad_norm": 0.7967364289439731, + "learning_rate": 3.624517499728151e-06, + "loss": 0.3496, + "step": 1243 + }, + { + "epoch": 0.37134328358208957, + "grad_norm": 0.8235464715233171, + "learning_rate": 3.622357973599875e-06, + "loss": 0.3435, + "step": 1244 + }, + { + "epoch": 0.3716417910447761, + "grad_norm": 0.7670146106690454, + "learning_rate": 3.6201973980942525e-06, + "loss": 0.3054, + "step": 1245 + }, + { + "epoch": 0.3719402985074627, + "grad_norm": 0.8127305346359484, + "learning_rate": 3.618035775231367e-06, + "loss": 0.337, + "step": 1246 + }, + { + "epoch": 0.37223880597014924, + "grad_norm": 0.8739704252887466, + "learning_rate": 3.6158731070322833e-06, + "loss": 0.3904, + "step": 1247 + }, + { + "epoch": 0.3725373134328358, + "grad_norm": 0.8589201227163017, + "learning_rate": 3.613709395519045e-06, + "loss": 0.3251, + "step": 1248 + }, + { + "epoch": 0.37283582089552236, + "grad_norm": 1.0287866995944308, + "learning_rate": 3.611544642714668e-06, + "loss": 0.342, + "step": 1249 + }, + { + "epoch": 0.373134328358209, + "grad_norm": 0.784684328253123, + "learning_rate": 3.609378850643144e-06, + "loss": 0.329, + "step": 1250 + }, + { + "epoch": 0.37343283582089554, + "grad_norm": 0.9957577337951871, + "learning_rate": 3.607212021329436e-06, + "loss": 0.3318, + "step": 1251 + }, + { + "epoch": 0.3737313432835821, + "grad_norm": 0.8067111270985596, + "learning_rate": 3.6050441567994766e-06, + "loss": 0.291, + "step": 1252 + }, + { + "epoch": 0.37402985074626866, + "grad_norm": 0.814611603363173, + "learning_rate": 3.6028752590801653e-06, + "loss": 0.3724, + "step": 1253 + }, + { + "epoch": 0.3743283582089552, + "grad_norm": 0.8212971184460834, + "learning_rate": 3.60070533019937e-06, + "loss": 0.3413, + "step": 1254 + }, + { + "epoch": 0.3746268656716418, + "grad_norm": 0.8492409029801361, + "learning_rate": 3.5985343721859205e-06, + "loss": 0.4186, + "step": 1255 + }, + { + "epoch": 0.37492537313432833, + "grad_norm": 0.8293206499269064, + "learning_rate": 3.59636238706961e-06, + "loss": 0.3273, + "step": 1256 + }, + { + "epoch": 0.37522388059701495, + "grad_norm": 0.7439436947261291, + "learning_rate": 3.5941893768811915e-06, + "loss": 0.3171, + "step": 1257 + }, + { + "epoch": 0.3755223880597015, + "grad_norm": 0.8417772206584229, + "learning_rate": 3.5920153436523762e-06, + "loss": 0.3727, + "step": 1258 + }, + { + "epoch": 0.37582089552238807, + "grad_norm": 0.7863852090186266, + "learning_rate": 3.589840289415833e-06, + "loss": 0.3473, + "step": 1259 + }, + { + "epoch": 0.3761194029850746, + "grad_norm": 1.2126822142651668, + "learning_rate": 3.5876642162051833e-06, + "loss": 0.3347, + "step": 1260 + }, + { + "epoch": 0.3764179104477612, + "grad_norm": 0.8029602835003353, + "learning_rate": 3.585487126055004e-06, + "loss": 0.3459, + "step": 1261 + }, + { + "epoch": 0.37671641791044774, + "grad_norm": 0.9206998569587098, + "learning_rate": 3.5833090210008204e-06, + "loss": 0.3634, + "step": 1262 + }, + { + "epoch": 0.3770149253731343, + "grad_norm": 0.8641555534787082, + "learning_rate": 3.5811299030791074e-06, + "loss": 0.361, + "step": 1263 + }, + { + "epoch": 0.3773134328358209, + "grad_norm": 0.7872304084887846, + "learning_rate": 3.578949774327288e-06, + "loss": 0.3268, + "step": 1264 + }, + { + "epoch": 0.3776119402985075, + "grad_norm": 0.8680512566886363, + "learning_rate": 3.5767686367837295e-06, + "loss": 0.3436, + "step": 1265 + }, + { + "epoch": 0.37791044776119403, + "grad_norm": 0.826653018279734, + "learning_rate": 3.5745864924877412e-06, + "loss": 0.3587, + "step": 1266 + }, + { + "epoch": 0.3782089552238806, + "grad_norm": 0.8881579540530804, + "learning_rate": 3.572403343479576e-06, + "loss": 0.3683, + "step": 1267 + }, + { + "epoch": 0.37850746268656715, + "grad_norm": 0.811695971069644, + "learning_rate": 3.570219191800424e-06, + "loss": 0.3417, + "step": 1268 + }, + { + "epoch": 0.3788059701492537, + "grad_norm": 0.7952960741104704, + "learning_rate": 3.5680340394924144e-06, + "loss": 0.366, + "step": 1269 + }, + { + "epoch": 0.37910447761194027, + "grad_norm": 0.812111528734765, + "learning_rate": 3.565847888598612e-06, + "loss": 0.3031, + "step": 1270 + }, + { + "epoch": 0.3794029850746269, + "grad_norm": 0.8238499604306999, + "learning_rate": 3.5636607411630133e-06, + "loss": 0.3639, + "step": 1271 + }, + { + "epoch": 0.37970149253731345, + "grad_norm": 1.2980492858910848, + "learning_rate": 3.5614725992305487e-06, + "loss": 0.3809, + "step": 1272 + }, + { + "epoch": 0.38, + "grad_norm": 0.7995909221221797, + "learning_rate": 3.5592834648470763e-06, + "loss": 0.3372, + "step": 1273 + }, + { + "epoch": 0.38029850746268656, + "grad_norm": 0.8051516225042442, + "learning_rate": 3.557093340059385e-06, + "loss": 0.3444, + "step": 1274 + }, + { + "epoch": 0.3805970149253731, + "grad_norm": 0.7977178460505244, + "learning_rate": 3.5549022269151876e-06, + "loss": 0.3087, + "step": 1275 + }, + { + "epoch": 0.3808955223880597, + "grad_norm": 0.8617753129014368, + "learning_rate": 3.552710127463121e-06, + "loss": 0.399, + "step": 1276 + }, + { + "epoch": 0.38119402985074624, + "grad_norm": 0.8085602277043268, + "learning_rate": 3.550517043752745e-06, + "loss": 0.3762, + "step": 1277 + }, + { + "epoch": 0.38149253731343286, + "grad_norm": 0.8125284069150092, + "learning_rate": 3.5483229778345403e-06, + "loss": 0.3532, + "step": 1278 + }, + { + "epoch": 0.3817910447761194, + "grad_norm": 0.7339040692073175, + "learning_rate": 3.546127931759903e-06, + "loss": 0.3073, + "step": 1279 + }, + { + "epoch": 0.382089552238806, + "grad_norm": 0.7778739951423395, + "learning_rate": 3.5439319075811496e-06, + "loss": 0.3276, + "step": 1280 + }, + { + "epoch": 0.38238805970149253, + "grad_norm": 0.7892393982929026, + "learning_rate": 3.5417349073515085e-06, + "loss": 0.3358, + "step": 1281 + }, + { + "epoch": 0.3826865671641791, + "grad_norm": 0.8241466550279929, + "learning_rate": 3.5395369331251205e-06, + "loss": 0.3427, + "step": 1282 + }, + { + "epoch": 0.38298507462686565, + "grad_norm": 0.8892819954376281, + "learning_rate": 3.53733798695704e-06, + "loss": 0.3166, + "step": 1283 + }, + { + "epoch": 0.38328358208955227, + "grad_norm": 0.8269062884932116, + "learning_rate": 3.5351380709032265e-06, + "loss": 0.3722, + "step": 1284 + }, + { + "epoch": 0.3835820895522388, + "grad_norm": 0.905649324964948, + "learning_rate": 3.5329371870205477e-06, + "loss": 0.3142, + "step": 1285 + }, + { + "epoch": 0.3838805970149254, + "grad_norm": 0.8106636299372714, + "learning_rate": 3.5307353373667772e-06, + "loss": 0.3387, + "step": 1286 + }, + { + "epoch": 0.38417910447761194, + "grad_norm": 0.811430184665767, + "learning_rate": 3.528532524000591e-06, + "loss": 0.3286, + "step": 1287 + }, + { + "epoch": 0.3844776119402985, + "grad_norm": 0.8218271629980521, + "learning_rate": 3.5263287489815643e-06, + "loss": 0.3835, + "step": 1288 + }, + { + "epoch": 0.38477611940298506, + "grad_norm": 0.816710352160504, + "learning_rate": 3.524124014370175e-06, + "loss": 0.3582, + "step": 1289 + }, + { + "epoch": 0.3850746268656716, + "grad_norm": 0.7305897895271865, + "learning_rate": 3.5219183222277954e-06, + "loss": 0.3257, + "step": 1290 + }, + { + "epoch": 0.38537313432835824, + "grad_norm": 0.7546275525313664, + "learning_rate": 3.519711674616694e-06, + "loss": 0.3344, + "step": 1291 + }, + { + "epoch": 0.3856716417910448, + "grad_norm": 0.8631937455194517, + "learning_rate": 3.517504073600031e-06, + "loss": 0.351, + "step": 1292 + }, + { + "epoch": 0.38597014925373135, + "grad_norm": 0.8204252773546967, + "learning_rate": 3.5152955212418616e-06, + "loss": 0.2986, + "step": 1293 + }, + { + "epoch": 0.3862686567164179, + "grad_norm": 0.8154202257697561, + "learning_rate": 3.5130860196071283e-06, + "loss": 0.3418, + "step": 1294 + }, + { + "epoch": 0.38656716417910447, + "grad_norm": 0.8526837643437616, + "learning_rate": 3.51087557076166e-06, + "loss": 0.3821, + "step": 1295 + }, + { + "epoch": 0.38686567164179103, + "grad_norm": 0.8428109150390479, + "learning_rate": 3.508664176772173e-06, + "loss": 0.3234, + "step": 1296 + }, + { + "epoch": 0.3871641791044776, + "grad_norm": 0.8008585623503791, + "learning_rate": 3.506451839706268e-06, + "loss": 0.3399, + "step": 1297 + }, + { + "epoch": 0.3874626865671642, + "grad_norm": 0.8269646097065997, + "learning_rate": 3.5042385616324243e-06, + "loss": 0.3505, + "step": 1298 + }, + { + "epoch": 0.38776119402985076, + "grad_norm": 0.7972302337986306, + "learning_rate": 3.5020243446200034e-06, + "loss": 0.3302, + "step": 1299 + }, + { + "epoch": 0.3880597014925373, + "grad_norm": 0.8833042412930123, + "learning_rate": 3.4998091907392463e-06, + "loss": 0.3793, + "step": 1300 + }, + { + "epoch": 0.3883582089552239, + "grad_norm": 0.9850943543964298, + "learning_rate": 3.497593102061264e-06, + "loss": 0.3532, + "step": 1301 + }, + { + "epoch": 0.38865671641791044, + "grad_norm": 0.9676051683553516, + "learning_rate": 3.4953760806580494e-06, + "loss": 0.3635, + "step": 1302 + }, + { + "epoch": 0.388955223880597, + "grad_norm": 0.8090901217456243, + "learning_rate": 3.4931581286024607e-06, + "loss": 0.2871, + "step": 1303 + }, + { + "epoch": 0.38925373134328356, + "grad_norm": 0.8348404220329789, + "learning_rate": 3.4909392479682303e-06, + "loss": 0.3315, + "step": 1304 + }, + { + "epoch": 0.3895522388059702, + "grad_norm": 0.7991008815236104, + "learning_rate": 3.488719440829958e-06, + "loss": 0.36, + "step": 1305 + }, + { + "epoch": 0.38985074626865673, + "grad_norm": 0.8438228763160631, + "learning_rate": 3.4864987092631074e-06, + "loss": 0.3869, + "step": 1306 + }, + { + "epoch": 0.3901492537313433, + "grad_norm": 0.8489110544841167, + "learning_rate": 3.4842770553440103e-06, + "loss": 0.3675, + "step": 1307 + }, + { + "epoch": 0.39044776119402985, + "grad_norm": 0.7917206514304985, + "learning_rate": 3.4820544811498584e-06, + "loss": 0.3385, + "step": 1308 + }, + { + "epoch": 0.3907462686567164, + "grad_norm": 1.0183165833490218, + "learning_rate": 3.479830988758704e-06, + "loss": 0.351, + "step": 1309 + }, + { + "epoch": 0.39104477611940297, + "grad_norm": 0.7666673934748648, + "learning_rate": 3.4776065802494585e-06, + "loss": 0.3123, + "step": 1310 + }, + { + "epoch": 0.39134328358208953, + "grad_norm": 0.8481681061835175, + "learning_rate": 3.47538125770189e-06, + "loss": 0.3304, + "step": 1311 + }, + { + "epoch": 0.39164179104477614, + "grad_norm": 0.8626463063388562, + "learning_rate": 3.4731550231966193e-06, + "loss": 0.3429, + "step": 1312 + }, + { + "epoch": 0.3919402985074627, + "grad_norm": 0.7994976728058045, + "learning_rate": 3.470927878815124e-06, + "loss": 0.3368, + "step": 1313 + }, + { + "epoch": 0.39223880597014926, + "grad_norm": 0.8542479136676837, + "learning_rate": 3.4686998266397275e-06, + "loss": 0.3284, + "step": 1314 + }, + { + "epoch": 0.3925373134328358, + "grad_norm": 0.8028452068951644, + "learning_rate": 3.466470868753606e-06, + "loss": 0.3699, + "step": 1315 + }, + { + "epoch": 0.3928358208955224, + "grad_norm": 0.8192750229513925, + "learning_rate": 3.4642410072407797e-06, + "loss": 0.3374, + "step": 1316 + }, + { + "epoch": 0.39313432835820894, + "grad_norm": 0.7911937120174724, + "learning_rate": 3.4620102441861147e-06, + "loss": 0.3068, + "step": 1317 + }, + { + "epoch": 0.3934328358208955, + "grad_norm": 0.7856414622293901, + "learning_rate": 3.4597785816753203e-06, + "loss": 0.3815, + "step": 1318 + }, + { + "epoch": 0.3937313432835821, + "grad_norm": 0.8719965350651078, + "learning_rate": 3.4575460217949475e-06, + "loss": 0.3249, + "step": 1319 + }, + { + "epoch": 0.3940298507462687, + "grad_norm": 0.8038659900828976, + "learning_rate": 3.4553125666323828e-06, + "loss": 0.3609, + "step": 1320 + }, + { + "epoch": 0.39432835820895523, + "grad_norm": 0.8437738135633144, + "learning_rate": 3.453078218275856e-06, + "loss": 0.3547, + "step": 1321 + }, + { + "epoch": 0.3946268656716418, + "grad_norm": 0.8893980819638573, + "learning_rate": 3.4508429788144255e-06, + "loss": 0.3835, + "step": 1322 + }, + { + "epoch": 0.39492537313432835, + "grad_norm": 0.7966994623889064, + "learning_rate": 3.4486068503379864e-06, + "loss": 0.3278, + "step": 1323 + }, + { + "epoch": 0.3952238805970149, + "grad_norm": 0.8837643559806236, + "learning_rate": 3.4463698349372655e-06, + "loss": 0.4, + "step": 1324 + }, + { + "epoch": 0.39552238805970147, + "grad_norm": 0.8420254919427764, + "learning_rate": 3.444131934703816e-06, + "loss": 0.3145, + "step": 1325 + }, + { + "epoch": 0.3958208955223881, + "grad_norm": 0.8412477035174881, + "learning_rate": 3.4418931517300207e-06, + "loss": 0.3318, + "step": 1326 + }, + { + "epoch": 0.39611940298507464, + "grad_norm": 0.7378862322828338, + "learning_rate": 3.4396534881090882e-06, + "loss": 0.3295, + "step": 1327 + }, + { + "epoch": 0.3964179104477612, + "grad_norm": 0.814528536394005, + "learning_rate": 3.437412945935047e-06, + "loss": 0.3715, + "step": 1328 + }, + { + "epoch": 0.39671641791044776, + "grad_norm": 0.8472682635407821, + "learning_rate": 3.435171527302752e-06, + "loss": 0.3495, + "step": 1329 + }, + { + "epoch": 0.3970149253731343, + "grad_norm": 0.7719473237614821, + "learning_rate": 3.4329292343078733e-06, + "loss": 0.3388, + "step": 1330 + }, + { + "epoch": 0.3973134328358209, + "grad_norm": 0.8467959201969155, + "learning_rate": 3.430686069046901e-06, + "loss": 0.3357, + "step": 1331 + }, + { + "epoch": 0.39761194029850744, + "grad_norm": 0.8099659593829108, + "learning_rate": 3.4284420336171393e-06, + "loss": 0.3495, + "step": 1332 + }, + { + "epoch": 0.39791044776119405, + "grad_norm": 0.8144878750567746, + "learning_rate": 3.426197130116707e-06, + "loss": 0.3371, + "step": 1333 + }, + { + "epoch": 0.3982089552238806, + "grad_norm": 0.7434099123184534, + "learning_rate": 3.423951360644534e-06, + "loss": 0.2841, + "step": 1334 + }, + { + "epoch": 0.39850746268656717, + "grad_norm": 0.8453291988577794, + "learning_rate": 3.4217047273003605e-06, + "loss": 0.3219, + "step": 1335 + }, + { + "epoch": 0.39880597014925373, + "grad_norm": 0.9447504107148086, + "learning_rate": 3.4194572321847336e-06, + "loss": 0.3831, + "step": 1336 + }, + { + "epoch": 0.3991044776119403, + "grad_norm": 0.8778360194938137, + "learning_rate": 3.417208877399006e-06, + "loss": 0.363, + "step": 1337 + }, + { + "epoch": 0.39940298507462685, + "grad_norm": 0.858110018415135, + "learning_rate": 3.4149596650453354e-06, + "loss": 0.345, + "step": 1338 + }, + { + "epoch": 0.3997014925373134, + "grad_norm": 0.8216385808533025, + "learning_rate": 3.4127095972266795e-06, + "loss": 0.4099, + "step": 1339 + }, + { + "epoch": 0.4, + "grad_norm": 0.8344309431851922, + "learning_rate": 3.4104586760467984e-06, + "loss": 0.3873, + "step": 1340 + }, + { + "epoch": 0.4002985074626866, + "grad_norm": 0.8372552783746257, + "learning_rate": 3.408206903610247e-06, + "loss": 0.3612, + "step": 1341 + }, + { + "epoch": 0.40059701492537314, + "grad_norm": 0.8372600966982839, + "learning_rate": 3.4059542820223782e-06, + "loss": 0.3692, + "step": 1342 + }, + { + "epoch": 0.4008955223880597, + "grad_norm": 0.9587410316998514, + "learning_rate": 3.4037008133893395e-06, + "loss": 0.3882, + "step": 1343 + }, + { + "epoch": 0.40119402985074626, + "grad_norm": 0.7735563287004625, + "learning_rate": 3.4014464998180673e-06, + "loss": 0.3486, + "step": 1344 + }, + { + "epoch": 0.4014925373134328, + "grad_norm": 0.7836991921229477, + "learning_rate": 3.3991913434162905e-06, + "loss": 0.3254, + "step": 1345 + }, + { + "epoch": 0.4017910447761194, + "grad_norm": 0.7627893713273461, + "learning_rate": 3.396935346292526e-06, + "loss": 0.3461, + "step": 1346 + }, + { + "epoch": 0.402089552238806, + "grad_norm": 0.8945538621615577, + "learning_rate": 3.3946785105560742e-06, + "loss": 0.312, + "step": 1347 + }, + { + "epoch": 0.40238805970149255, + "grad_norm": 0.8301080381657066, + "learning_rate": 3.3924208383170244e-06, + "loss": 0.384, + "step": 1348 + }, + { + "epoch": 0.4026865671641791, + "grad_norm": 0.8441180205827015, + "learning_rate": 3.3901623316862424e-06, + "loss": 0.3237, + "step": 1349 + }, + { + "epoch": 0.40298507462686567, + "grad_norm": 0.917883619027885, + "learning_rate": 3.3879029927753782e-06, + "loss": 0.3451, + "step": 1350 + }, + { + "epoch": 0.40328358208955223, + "grad_norm": 0.7676285335284508, + "learning_rate": 3.3856428236968593e-06, + "loss": 0.3348, + "step": 1351 + }, + { + "epoch": 0.4035820895522388, + "grad_norm": 0.7694667977006615, + "learning_rate": 3.3833818265638868e-06, + "loss": 0.3202, + "step": 1352 + }, + { + "epoch": 0.40388059701492535, + "grad_norm": 0.7859618711908494, + "learning_rate": 3.3811200034904392e-06, + "loss": 0.3447, + "step": 1353 + }, + { + "epoch": 0.40417910447761196, + "grad_norm": 0.7857504939863141, + "learning_rate": 3.3788573565912665e-06, + "loss": 0.2722, + "step": 1354 + }, + { + "epoch": 0.4044776119402985, + "grad_norm": 0.782826471359172, + "learning_rate": 3.3765938879818865e-06, + "loss": 0.3318, + "step": 1355 + }, + { + "epoch": 0.4047761194029851, + "grad_norm": 0.855706197844801, + "learning_rate": 3.3743295997785884e-06, + "loss": 0.3204, + "step": 1356 + }, + { + "epoch": 0.40507462686567164, + "grad_norm": 0.8343684499136247, + "learning_rate": 3.372064494098427e-06, + "loss": 0.3173, + "step": 1357 + }, + { + "epoch": 0.4053731343283582, + "grad_norm": 0.8369262504159325, + "learning_rate": 3.3697985730592187e-06, + "loss": 0.3339, + "step": 1358 + }, + { + "epoch": 0.40567164179104476, + "grad_norm": 0.7799099543460535, + "learning_rate": 3.3675318387795473e-06, + "loss": 0.3423, + "step": 1359 + }, + { + "epoch": 0.4059701492537313, + "grad_norm": 0.8648556337576385, + "learning_rate": 3.3652642933787526e-06, + "loss": 0.3493, + "step": 1360 + }, + { + "epoch": 0.40626865671641793, + "grad_norm": 0.8204663128828968, + "learning_rate": 3.362995938976934e-06, + "loss": 0.3317, + "step": 1361 + }, + { + "epoch": 0.4065671641791045, + "grad_norm": 0.792102559533383, + "learning_rate": 3.3607267776949485e-06, + "loss": 0.3639, + "step": 1362 + }, + { + "epoch": 0.40686567164179105, + "grad_norm": 0.8308207742246098, + "learning_rate": 3.358456811654406e-06, + "loss": 0.3679, + "step": 1363 + }, + { + "epoch": 0.4071641791044776, + "grad_norm": 0.8656195971316198, + "learning_rate": 3.35618604297767e-06, + "loss": 0.3797, + "step": 1364 + }, + { + "epoch": 0.40746268656716417, + "grad_norm": 0.8006170230930989, + "learning_rate": 3.3539144737878525e-06, + "loss": 0.3471, + "step": 1365 + }, + { + "epoch": 0.4077611940298507, + "grad_norm": 0.8502026092595737, + "learning_rate": 3.351642106208816e-06, + "loss": 0.3814, + "step": 1366 + }, + { + "epoch": 0.40805970149253734, + "grad_norm": 0.7775186619424316, + "learning_rate": 3.3493689423651697e-06, + "loss": 0.3086, + "step": 1367 + }, + { + "epoch": 0.4083582089552239, + "grad_norm": 0.798430336568009, + "learning_rate": 3.3470949843822657e-06, + "loss": 0.3406, + "step": 1368 + }, + { + "epoch": 0.40865671641791046, + "grad_norm": 0.8053325058264433, + "learning_rate": 3.3448202343861992e-06, + "loss": 0.3458, + "step": 1369 + }, + { + "epoch": 0.408955223880597, + "grad_norm": 0.8343334939840257, + "learning_rate": 3.3425446945038058e-06, + "loss": 0.337, + "step": 1370 + }, + { + "epoch": 0.4092537313432836, + "grad_norm": 0.8191029497223187, + "learning_rate": 3.34026836686266e-06, + "loss": 0.3625, + "step": 1371 + }, + { + "epoch": 0.40955223880597014, + "grad_norm": 0.8618543242740297, + "learning_rate": 3.337991253591073e-06, + "loss": 0.3382, + "step": 1372 + }, + { + "epoch": 0.4098507462686567, + "grad_norm": 0.7548489326509085, + "learning_rate": 3.3357133568180905e-06, + "loss": 0.3296, + "step": 1373 + }, + { + "epoch": 0.4101492537313433, + "grad_norm": 0.842491815374688, + "learning_rate": 3.3334346786734894e-06, + "loss": 0.3493, + "step": 1374 + }, + { + "epoch": 0.41044776119402987, + "grad_norm": 0.8011968378789714, + "learning_rate": 3.3311552212877787e-06, + "loss": 0.3359, + "step": 1375 + }, + { + "epoch": 0.41074626865671643, + "grad_norm": 0.8683971005840104, + "learning_rate": 3.3288749867921953e-06, + "loss": 0.3896, + "step": 1376 + }, + { + "epoch": 0.411044776119403, + "grad_norm": 0.8853597587848415, + "learning_rate": 3.3265939773187026e-06, + "loss": 0.31, + "step": 1377 + }, + { + "epoch": 0.41134328358208955, + "grad_norm": 0.8397333555979059, + "learning_rate": 3.3243121949999906e-06, + "loss": 0.3478, + "step": 1378 + }, + { + "epoch": 0.4116417910447761, + "grad_norm": 0.7964931329993589, + "learning_rate": 3.3220296419694686e-06, + "loss": 0.2821, + "step": 1379 + }, + { + "epoch": 0.41194029850746267, + "grad_norm": 0.8116358539123523, + "learning_rate": 3.319746320361268e-06, + "loss": 0.3519, + "step": 1380 + }, + { + "epoch": 0.4122388059701493, + "grad_norm": 1.417004497957898, + "learning_rate": 3.3174622323102396e-06, + "loss": 0.3152, + "step": 1381 + }, + { + "epoch": 0.41253731343283584, + "grad_norm": 0.7731890086167517, + "learning_rate": 3.3151773799519492e-06, + "loss": 0.3873, + "step": 1382 + }, + { + "epoch": 0.4128358208955224, + "grad_norm": 0.7984276296697358, + "learning_rate": 3.3128917654226794e-06, + "loss": 0.3897, + "step": 1383 + }, + { + "epoch": 0.41313432835820896, + "grad_norm": 0.8349251849409903, + "learning_rate": 3.310605390859422e-06, + "loss": 0.3516, + "step": 1384 + }, + { + "epoch": 0.4134328358208955, + "grad_norm": 0.8765686889024715, + "learning_rate": 3.3083182583998835e-06, + "loss": 0.3757, + "step": 1385 + }, + { + "epoch": 0.4137313432835821, + "grad_norm": 0.9054669051528854, + "learning_rate": 3.3060303701824763e-06, + "loss": 0.3599, + "step": 1386 + }, + { + "epoch": 0.41402985074626864, + "grad_norm": 0.7717625673330878, + "learning_rate": 3.303741728346319e-06, + "loss": 0.3221, + "step": 1387 + }, + { + "epoch": 0.41432835820895525, + "grad_norm": 0.7734383019569172, + "learning_rate": 3.301452335031238e-06, + "loss": 0.3417, + "step": 1388 + }, + { + "epoch": 0.4146268656716418, + "grad_norm": 0.9879867361611887, + "learning_rate": 3.299162192377759e-06, + "loss": 0.4098, + "step": 1389 + }, + { + "epoch": 0.41492537313432837, + "grad_norm": 0.8150464123862557, + "learning_rate": 3.2968713025271095e-06, + "loss": 0.3322, + "step": 1390 + }, + { + "epoch": 0.4152238805970149, + "grad_norm": 0.8573334079386694, + "learning_rate": 3.2945796676212155e-06, + "loss": 0.2965, + "step": 1391 + }, + { + "epoch": 0.4155223880597015, + "grad_norm": 0.9215312294103535, + "learning_rate": 3.2922872898027007e-06, + "loss": 0.3696, + "step": 1392 + }, + { + "epoch": 0.41582089552238805, + "grad_norm": 0.8431748611402596, + "learning_rate": 3.289994171214882e-06, + "loss": 0.3596, + "step": 1393 + }, + { + "epoch": 0.4161194029850746, + "grad_norm": 0.9100190938713786, + "learning_rate": 3.287700314001769e-06, + "loss": 0.3549, + "step": 1394 + }, + { + "epoch": 0.4164179104477612, + "grad_norm": 0.7828037598477029, + "learning_rate": 3.2854057203080624e-06, + "loss": 0.3741, + "step": 1395 + }, + { + "epoch": 0.4167164179104478, + "grad_norm": 0.8491175801452148, + "learning_rate": 3.283110392279152e-06, + "loss": 0.3451, + "step": 1396 + }, + { + "epoch": 0.41701492537313434, + "grad_norm": 0.8220914334757844, + "learning_rate": 3.2808143320611137e-06, + "loss": 0.349, + "step": 1397 + }, + { + "epoch": 0.4173134328358209, + "grad_norm": 0.7625729166024875, + "learning_rate": 3.2785175418007066e-06, + "loss": 0.3334, + "step": 1398 + }, + { + "epoch": 0.41761194029850746, + "grad_norm": 0.7835308374457285, + "learning_rate": 3.276220023645374e-06, + "loss": 0.3221, + "step": 1399 + }, + { + "epoch": 0.417910447761194, + "grad_norm": 0.8199114774599374, + "learning_rate": 3.2739217797432405e-06, + "loss": 0.3238, + "step": 1400 + }, + { + "epoch": 0.4182089552238806, + "grad_norm": 0.7932003246008847, + "learning_rate": 3.2716228122431072e-06, + "loss": 0.3066, + "step": 1401 + }, + { + "epoch": 0.4185074626865672, + "grad_norm": 0.8150947385287399, + "learning_rate": 3.2693231232944527e-06, + "loss": 0.3184, + "step": 1402 + }, + { + "epoch": 0.41880597014925375, + "grad_norm": 0.9738953531287554, + "learning_rate": 3.2670227150474298e-06, + "loss": 0.3689, + "step": 1403 + }, + { + "epoch": 0.4191044776119403, + "grad_norm": 0.9195470631640194, + "learning_rate": 3.2647215896528643e-06, + "loss": 0.3668, + "step": 1404 + }, + { + "epoch": 0.41940298507462687, + "grad_norm": 0.8600925044550167, + "learning_rate": 3.262419749262254e-06, + "loss": 0.3313, + "step": 1405 + }, + { + "epoch": 0.4197014925373134, + "grad_norm": 0.7587232892677159, + "learning_rate": 3.260117196027761e-06, + "loss": 0.3819, + "step": 1406 + }, + { + "epoch": 0.42, + "grad_norm": 0.822014542644426, + "learning_rate": 3.2578139321022175e-06, + "loss": 0.335, + "step": 1407 + }, + { + "epoch": 0.42029850746268654, + "grad_norm": 0.9420250400800545, + "learning_rate": 3.25550995963912e-06, + "loss": 0.3181, + "step": 1408 + }, + { + "epoch": 0.42059701492537316, + "grad_norm": 0.7516372540343567, + "learning_rate": 3.253205280792625e-06, + "loss": 0.2851, + "step": 1409 + }, + { + "epoch": 0.4208955223880597, + "grad_norm": 0.7948134058388104, + "learning_rate": 3.250899897717552e-06, + "loss": 0.3454, + "step": 1410 + }, + { + "epoch": 0.4211940298507463, + "grad_norm": 0.7240159536362353, + "learning_rate": 3.248593812569379e-06, + "loss": 0.3136, + "step": 1411 + }, + { + "epoch": 0.42149253731343284, + "grad_norm": 0.759842065598249, + "learning_rate": 3.246287027504237e-06, + "loss": 0.3287, + "step": 1412 + }, + { + "epoch": 0.4217910447761194, + "grad_norm": 0.790615746686793, + "learning_rate": 3.2439795446789152e-06, + "loss": 0.3589, + "step": 1413 + }, + { + "epoch": 0.42208955223880595, + "grad_norm": 0.8340970220469927, + "learning_rate": 3.241671366250854e-06, + "loss": 0.3562, + "step": 1414 + }, + { + "epoch": 0.4223880597014925, + "grad_norm": 0.8939868975796444, + "learning_rate": 3.2393624943781426e-06, + "loss": 0.3438, + "step": 1415 + }, + { + "epoch": 0.42268656716417913, + "grad_norm": 0.8450509789557636, + "learning_rate": 3.2370529312195225e-06, + "loss": 0.3233, + "step": 1416 + }, + { + "epoch": 0.4229850746268657, + "grad_norm": 0.8150254784866996, + "learning_rate": 3.2347426789343766e-06, + "loss": 0.3523, + "step": 1417 + }, + { + "epoch": 0.42328358208955225, + "grad_norm": 1.0717236306406788, + "learning_rate": 3.2324317396827355e-06, + "loss": 0.3495, + "step": 1418 + }, + { + "epoch": 0.4235820895522388, + "grad_norm": 0.7615376345185246, + "learning_rate": 3.2301201156252704e-06, + "loss": 0.3615, + "step": 1419 + }, + { + "epoch": 0.42388059701492536, + "grad_norm": 0.8626725672132948, + "learning_rate": 3.2278078089232945e-06, + "loss": 0.359, + "step": 1420 + }, + { + "epoch": 0.4241791044776119, + "grad_norm": 0.7597752656447071, + "learning_rate": 3.2254948217387576e-06, + "loss": 0.3437, + "step": 1421 + }, + { + "epoch": 0.4244776119402985, + "grad_norm": 0.8020767792341673, + "learning_rate": 3.223181156234246e-06, + "loss": 0.3578, + "step": 1422 + }, + { + "epoch": 0.4247761194029851, + "grad_norm": 0.7733888823557834, + "learning_rate": 3.2208668145729806e-06, + "loss": 0.3439, + "step": 1423 + }, + { + "epoch": 0.42507462686567166, + "grad_norm": 0.8786679302972779, + "learning_rate": 3.2185517989188154e-06, + "loss": 0.3349, + "step": 1424 + }, + { + "epoch": 0.4253731343283582, + "grad_norm": 0.7638976687742035, + "learning_rate": 3.216236111436233e-06, + "loss": 0.3331, + "step": 1425 + }, + { + "epoch": 0.4256716417910448, + "grad_norm": 0.7840962222569484, + "learning_rate": 3.213919754290343e-06, + "loss": 0.354, + "step": 1426 + }, + { + "epoch": 0.42597014925373133, + "grad_norm": 0.8613687089983829, + "learning_rate": 3.2116027296468866e-06, + "loss": 0.3771, + "step": 1427 + }, + { + "epoch": 0.4262686567164179, + "grad_norm": 0.7666875547356334, + "learning_rate": 3.2092850396722227e-06, + "loss": 0.3428, + "step": 1428 + }, + { + "epoch": 0.42656716417910445, + "grad_norm": 0.8148557834884871, + "learning_rate": 3.2069666865333356e-06, + "loss": 0.3541, + "step": 1429 + }, + { + "epoch": 0.42686567164179107, + "grad_norm": 0.7914353848681119, + "learning_rate": 3.204647672397829e-06, + "loss": 0.3338, + "step": 1430 + }, + { + "epoch": 0.4271641791044776, + "grad_norm": 0.8843259464989137, + "learning_rate": 3.2023279994339242e-06, + "loss": 0.3511, + "step": 1431 + }, + { + "epoch": 0.4274626865671642, + "grad_norm": 0.788441005552561, + "learning_rate": 3.2000076698104585e-06, + "loss": 0.359, + "step": 1432 + }, + { + "epoch": 0.42776119402985074, + "grad_norm": 0.8244525477083475, + "learning_rate": 3.197686685696885e-06, + "loss": 0.3729, + "step": 1433 + }, + { + "epoch": 0.4280597014925373, + "grad_norm": 0.8468029355697649, + "learning_rate": 3.1953650492632664e-06, + "loss": 0.3583, + "step": 1434 + }, + { + "epoch": 0.42835820895522386, + "grad_norm": 0.7871095643951108, + "learning_rate": 3.193042762680277e-06, + "loss": 0.3595, + "step": 1435 + }, + { + "epoch": 0.4286567164179104, + "grad_norm": 0.8360573541165367, + "learning_rate": 3.1907198281191963e-06, + "loss": 0.3374, + "step": 1436 + }, + { + "epoch": 0.42895522388059704, + "grad_norm": 0.8275698735469698, + "learning_rate": 3.1883962477519136e-06, + "loss": 0.3247, + "step": 1437 + }, + { + "epoch": 0.4292537313432836, + "grad_norm": 0.7431117381085794, + "learning_rate": 3.1860720237509186e-06, + "loss": 0.3186, + "step": 1438 + }, + { + "epoch": 0.42955223880597015, + "grad_norm": 0.8236400271008097, + "learning_rate": 3.1837471582893044e-06, + "loss": 0.4015, + "step": 1439 + }, + { + "epoch": 0.4298507462686567, + "grad_norm": 0.7284531636328365, + "learning_rate": 3.181421653540764e-06, + "loss": 0.3023, + "step": 1440 + }, + { + "epoch": 0.4301492537313433, + "grad_norm": 0.8203967087943976, + "learning_rate": 3.1790955116795865e-06, + "loss": 0.3109, + "step": 1441 + }, + { + "epoch": 0.43044776119402983, + "grad_norm": 0.8560428452075539, + "learning_rate": 3.176768734880658e-06, + "loss": 0.3423, + "step": 1442 + }, + { + "epoch": 0.4307462686567164, + "grad_norm": 0.9450551948814531, + "learning_rate": 3.17444132531946e-06, + "loss": 0.3602, + "step": 1443 + }, + { + "epoch": 0.431044776119403, + "grad_norm": 0.7426532649143752, + "learning_rate": 3.1721132851720615e-06, + "loss": 0.3561, + "step": 1444 + }, + { + "epoch": 0.43134328358208956, + "grad_norm": 0.8374537611379493, + "learning_rate": 3.169784616615125e-06, + "loss": 0.3457, + "step": 1445 + }, + { + "epoch": 0.4316417910447761, + "grad_norm": 0.7637827169896023, + "learning_rate": 3.1674553218258976e-06, + "loss": 0.2929, + "step": 1446 + }, + { + "epoch": 0.4319402985074627, + "grad_norm": 0.8669603581918557, + "learning_rate": 3.1651254029822126e-06, + "loss": 0.3499, + "step": 1447 + }, + { + "epoch": 0.43223880597014924, + "grad_norm": 0.8055692147761853, + "learning_rate": 3.1627948622624894e-06, + "loss": 0.3469, + "step": 1448 + }, + { + "epoch": 0.4325373134328358, + "grad_norm": 1.081434396687064, + "learning_rate": 3.160463701845725e-06, + "loss": 0.4056, + "step": 1449 + }, + { + "epoch": 0.43283582089552236, + "grad_norm": 0.866468982049259, + "learning_rate": 3.1581319239114983e-06, + "loss": 0.3664, + "step": 1450 + }, + { + "epoch": 0.433134328358209, + "grad_norm": 0.7980338117580451, + "learning_rate": 3.1557995306399657e-06, + "loss": 0.3377, + "step": 1451 + }, + { + "epoch": 0.43343283582089553, + "grad_norm": 0.9044155994386817, + "learning_rate": 3.1534665242118557e-06, + "loss": 0.3559, + "step": 1452 + }, + { + "epoch": 0.4337313432835821, + "grad_norm": 0.8274492131309824, + "learning_rate": 3.151132906808474e-06, + "loss": 0.2981, + "step": 1453 + }, + { + "epoch": 0.43402985074626865, + "grad_norm": 1.1457765974493999, + "learning_rate": 3.1487986806116964e-06, + "loss": 0.3406, + "step": 1454 + }, + { + "epoch": 0.4343283582089552, + "grad_norm": 0.7341840039215745, + "learning_rate": 3.1464638478039665e-06, + "loss": 0.3597, + "step": 1455 + }, + { + "epoch": 0.43462686567164177, + "grad_norm": 0.8382632632518243, + "learning_rate": 3.1441284105682973e-06, + "loss": 0.3517, + "step": 1456 + }, + { + "epoch": 0.4349253731343284, + "grad_norm": 0.8205128080490478, + "learning_rate": 3.1417923710882643e-06, + "loss": 0.3274, + "step": 1457 + }, + { + "epoch": 0.43522388059701494, + "grad_norm": 0.8607995458918247, + "learning_rate": 3.1394557315480077e-06, + "loss": 0.3733, + "step": 1458 + }, + { + "epoch": 0.4355223880597015, + "grad_norm": 0.9177467111453076, + "learning_rate": 3.13711849413223e-06, + "loss": 0.3382, + "step": 1459 + }, + { + "epoch": 0.43582089552238806, + "grad_norm": 0.8007964740073961, + "learning_rate": 3.1347806610261886e-06, + "loss": 0.3501, + "step": 1460 + }, + { + "epoch": 0.4361194029850746, + "grad_norm": 0.8794813487482264, + "learning_rate": 3.1324422344157026e-06, + "loss": 0.3731, + "step": 1461 + }, + { + "epoch": 0.4364179104477612, + "grad_norm": 0.9082175046205051, + "learning_rate": 3.1301032164871436e-06, + "loss": 0.4047, + "step": 1462 + }, + { + "epoch": 0.43671641791044774, + "grad_norm": 0.9375256531298286, + "learning_rate": 3.1277636094274357e-06, + "loss": 0.3549, + "step": 1463 + }, + { + "epoch": 0.43701492537313436, + "grad_norm": 0.7978433496058229, + "learning_rate": 3.1254234154240544e-06, + "loss": 0.3325, + "step": 1464 + }, + { + "epoch": 0.4373134328358209, + "grad_norm": 0.8332407184088709, + "learning_rate": 3.1230826366650245e-06, + "loss": 0.347, + "step": 1465 + }, + { + "epoch": 0.4376119402985075, + "grad_norm": 0.7369892800300631, + "learning_rate": 3.1207412753389173e-06, + "loss": 0.3096, + "step": 1466 + }, + { + "epoch": 0.43791044776119403, + "grad_norm": 0.912860064618065, + "learning_rate": 3.118399333634848e-06, + "loss": 0.4124, + "step": 1467 + }, + { + "epoch": 0.4382089552238806, + "grad_norm": 0.9428428169193727, + "learning_rate": 3.1160568137424757e-06, + "loss": 0.3071, + "step": 1468 + }, + { + "epoch": 0.43850746268656715, + "grad_norm": 0.8484227296987369, + "learning_rate": 3.1137137178519983e-06, + "loss": 0.3957, + "step": 1469 + }, + { + "epoch": 0.4388059701492537, + "grad_norm": 0.8297150076443598, + "learning_rate": 3.1113700481541547e-06, + "loss": 0.3014, + "step": 1470 + }, + { + "epoch": 0.4391044776119403, + "grad_norm": 0.8049525013596943, + "learning_rate": 3.1090258068402173e-06, + "loss": 0.3452, + "step": 1471 + }, + { + "epoch": 0.4394029850746269, + "grad_norm": 0.871830475989278, + "learning_rate": 3.1066809961019954e-06, + "loss": 0.3659, + "step": 1472 + }, + { + "epoch": 0.43970149253731344, + "grad_norm": 0.8080522829751793, + "learning_rate": 3.1043356181318313e-06, + "loss": 0.3318, + "step": 1473 + }, + { + "epoch": 0.44, + "grad_norm": 0.8144887418555589, + "learning_rate": 3.101989675122594e-06, + "loss": 0.3259, + "step": 1474 + }, + { + "epoch": 0.44029850746268656, + "grad_norm": 0.8505193832128495, + "learning_rate": 3.099643169267685e-06, + "loss": 0.3377, + "step": 1475 + }, + { + "epoch": 0.4405970149253731, + "grad_norm": 0.8155829812521912, + "learning_rate": 3.097296102761028e-06, + "loss": 0.3389, + "step": 1476 + }, + { + "epoch": 0.4408955223880597, + "grad_norm": 0.786054888865815, + "learning_rate": 3.0949484777970747e-06, + "loss": 0.3208, + "step": 1477 + }, + { + "epoch": 0.4411940298507463, + "grad_norm": 0.7510442793736704, + "learning_rate": 3.0926002965707965e-06, + "loss": 0.3468, + "step": 1478 + }, + { + "epoch": 0.44149253731343285, + "grad_norm": 0.760538965279755, + "learning_rate": 3.090251561277685e-06, + "loss": 0.3432, + "step": 1479 + }, + { + "epoch": 0.4417910447761194, + "grad_norm": 0.838884239640953, + "learning_rate": 3.0879022741137515e-06, + "loss": 0.3718, + "step": 1480 + }, + { + "epoch": 0.44208955223880597, + "grad_norm": 0.7876393667924823, + "learning_rate": 3.085552437275522e-06, + "loss": 0.3722, + "step": 1481 + }, + { + "epoch": 0.44238805970149253, + "grad_norm": 0.8465102759230003, + "learning_rate": 3.0832020529600367e-06, + "loss": 0.3485, + "step": 1482 + }, + { + "epoch": 0.4426865671641791, + "grad_norm": 0.8627284225172639, + "learning_rate": 3.0808511233648466e-06, + "loss": 0.3689, + "step": 1483 + }, + { + "epoch": 0.44298507462686565, + "grad_norm": 0.7790417749973847, + "learning_rate": 3.0784996506880157e-06, + "loss": 0.313, + "step": 1484 + }, + { + "epoch": 0.44328358208955226, + "grad_norm": 0.7197974569889182, + "learning_rate": 3.076147637128111e-06, + "loss": 0.3109, + "step": 1485 + }, + { + "epoch": 0.4435820895522388, + "grad_norm": 0.9066043015509634, + "learning_rate": 3.0737950848842097e-06, + "loss": 0.33, + "step": 1486 + }, + { + "epoch": 0.4438805970149254, + "grad_norm": 0.7323591878235486, + "learning_rate": 3.0714419961558907e-06, + "loss": 0.3428, + "step": 1487 + }, + { + "epoch": 0.44417910447761194, + "grad_norm": 0.816034591616698, + "learning_rate": 3.069088373143234e-06, + "loss": 0.3388, + "step": 1488 + }, + { + "epoch": 0.4444776119402985, + "grad_norm": 0.8142476229964963, + "learning_rate": 3.06673421804682e-06, + "loss": 0.29, + "step": 1489 + }, + { + "epoch": 0.44477611940298506, + "grad_norm": 0.7722627326209343, + "learning_rate": 3.064379533067726e-06, + "loss": 0.303, + "step": 1490 + }, + { + "epoch": 0.4450746268656716, + "grad_norm": 0.8826032609144622, + "learning_rate": 3.062024320407525e-06, + "loss": 0.3546, + "step": 1491 + }, + { + "epoch": 0.44537313432835823, + "grad_norm": 0.7817296477576442, + "learning_rate": 3.059668582268285e-06, + "loss": 0.35, + "step": 1492 + }, + { + "epoch": 0.4456716417910448, + "grad_norm": 0.7259562656822198, + "learning_rate": 3.0573123208525613e-06, + "loss": 0.3178, + "step": 1493 + }, + { + "epoch": 0.44597014925373135, + "grad_norm": 0.755357913464589, + "learning_rate": 3.0549555383634032e-06, + "loss": 0.354, + "step": 1494 + }, + { + "epoch": 0.4462686567164179, + "grad_norm": 0.792525116985471, + "learning_rate": 3.052598237004343e-06, + "loss": 0.345, + "step": 1495 + }, + { + "epoch": 0.44656716417910447, + "grad_norm": 0.8285212042311892, + "learning_rate": 3.0502404189794012e-06, + "loss": 0.3618, + "step": 1496 + }, + { + "epoch": 0.44686567164179103, + "grad_norm": 0.8922327205331874, + "learning_rate": 3.0478820864930796e-06, + "loss": 0.3616, + "step": 1497 + }, + { + "epoch": 0.4471641791044776, + "grad_norm": 0.8584872212973166, + "learning_rate": 3.0455232417503617e-06, + "loss": 0.3367, + "step": 1498 + }, + { + "epoch": 0.4474626865671642, + "grad_norm": 0.7992271644708301, + "learning_rate": 3.0431638869567097e-06, + "loss": 0.3214, + "step": 1499 + }, + { + "epoch": 0.44776119402985076, + "grad_norm": 0.7540776769854936, + "learning_rate": 3.0408040243180638e-06, + "loss": 0.3285, + "step": 1500 + }, + { + "epoch": 0.4480597014925373, + "grad_norm": 0.8633455449268056, + "learning_rate": 3.0384436560408363e-06, + "loss": 0.3401, + "step": 1501 + }, + { + "epoch": 0.4483582089552239, + "grad_norm": 0.9222419115283121, + "learning_rate": 3.0360827843319156e-06, + "loss": 0.3838, + "step": 1502 + }, + { + "epoch": 0.44865671641791044, + "grad_norm": 0.8368812623261956, + "learning_rate": 3.033721411398659e-06, + "loss": 0.3591, + "step": 1503 + }, + { + "epoch": 0.448955223880597, + "grad_norm": 0.8378315376207885, + "learning_rate": 3.0313595394488917e-06, + "loss": 0.3567, + "step": 1504 + }, + { + "epoch": 0.44925373134328356, + "grad_norm": 0.7763761976990962, + "learning_rate": 3.0289971706909064e-06, + "loss": 0.3387, + "step": 1505 + }, + { + "epoch": 0.4495522388059702, + "grad_norm": 0.7607283367215002, + "learning_rate": 3.026634307333462e-06, + "loss": 0.3541, + "step": 1506 + }, + { + "epoch": 0.44985074626865673, + "grad_norm": 0.8127172993351834, + "learning_rate": 3.024270951585776e-06, + "loss": 0.3546, + "step": 1507 + }, + { + "epoch": 0.4501492537313433, + "grad_norm": 0.8648174995674591, + "learning_rate": 3.02190710565753e-06, + "loss": 0.3627, + "step": 1508 + }, + { + "epoch": 0.45044776119402985, + "grad_norm": 0.8480105965484397, + "learning_rate": 3.019542771758861e-06, + "loss": 0.3599, + "step": 1509 + }, + { + "epoch": 0.4507462686567164, + "grad_norm": 0.7968525603371781, + "learning_rate": 3.0171779521003647e-06, + "loss": 0.3186, + "step": 1510 + }, + { + "epoch": 0.45104477611940297, + "grad_norm": 0.9005901290048014, + "learning_rate": 3.0148126488930896e-06, + "loss": 0.3793, + "step": 1511 + }, + { + "epoch": 0.4513432835820895, + "grad_norm": 0.8428645379248311, + "learning_rate": 3.012446864348536e-06, + "loss": 0.3971, + "step": 1512 + }, + { + "epoch": 0.45164179104477614, + "grad_norm": 0.8480294223291627, + "learning_rate": 3.010080600678656e-06, + "loss": 0.3705, + "step": 1513 + }, + { + "epoch": 0.4519402985074627, + "grad_norm": 0.8472349283442467, + "learning_rate": 3.0077138600958468e-06, + "loss": 0.3656, + "step": 1514 + }, + { + "epoch": 0.45223880597014926, + "grad_norm": 0.9103614344387116, + "learning_rate": 3.0053466448129535e-06, + "loss": 0.3812, + "step": 1515 + }, + { + "epoch": 0.4525373134328358, + "grad_norm": 0.8231552945682205, + "learning_rate": 3.0029789570432665e-06, + "loss": 0.348, + "step": 1516 + }, + { + "epoch": 0.4528358208955224, + "grad_norm": 0.9675315430191836, + "learning_rate": 3.000610799000514e-06, + "loss": 0.3675, + "step": 1517 + }, + { + "epoch": 0.45313432835820894, + "grad_norm": 0.8070529408063196, + "learning_rate": 2.9982421728988663e-06, + "loss": 0.341, + "step": 1518 + }, + { + "epoch": 0.4534328358208955, + "grad_norm": 0.8536363490650818, + "learning_rate": 2.9958730809529326e-06, + "loss": 0.3794, + "step": 1519 + }, + { + "epoch": 0.4537313432835821, + "grad_norm": 0.8122339636569735, + "learning_rate": 2.9935035253777544e-06, + "loss": 0.3582, + "step": 1520 + }, + { + "epoch": 0.45402985074626867, + "grad_norm": 0.7993919608319511, + "learning_rate": 2.9911335083888093e-06, + "loss": 0.344, + "step": 1521 + }, + { + "epoch": 0.45432835820895523, + "grad_norm": 0.8228888153873775, + "learning_rate": 2.988763032202006e-06, + "loss": 0.3235, + "step": 1522 + }, + { + "epoch": 0.4546268656716418, + "grad_norm": 0.8201968407407756, + "learning_rate": 2.9863920990336803e-06, + "loss": 0.3707, + "step": 1523 + }, + { + "epoch": 0.45492537313432835, + "grad_norm": 0.8436534725037953, + "learning_rate": 2.9840207111005987e-06, + "loss": 0.3886, + "step": 1524 + }, + { + "epoch": 0.4552238805970149, + "grad_norm": 0.7968744092690195, + "learning_rate": 2.9816488706199498e-06, + "loss": 0.3498, + "step": 1525 + }, + { + "epoch": 0.45552238805970147, + "grad_norm": 0.8197065867829275, + "learning_rate": 2.9792765798093466e-06, + "loss": 0.3676, + "step": 1526 + }, + { + "epoch": 0.4558208955223881, + "grad_norm": 0.8141683013758337, + "learning_rate": 2.9769038408868246e-06, + "loss": 0.3175, + "step": 1527 + }, + { + "epoch": 0.45611940298507464, + "grad_norm": 0.8682309359789031, + "learning_rate": 2.9745306560708343e-06, + "loss": 0.3582, + "step": 1528 + }, + { + "epoch": 0.4564179104477612, + "grad_norm": 0.8430286979266656, + "learning_rate": 2.9721570275802487e-06, + "loss": 0.3643, + "step": 1529 + }, + { + "epoch": 0.45671641791044776, + "grad_norm": 0.8327189426692697, + "learning_rate": 2.969782957634351e-06, + "loss": 0.3869, + "step": 1530 + }, + { + "epoch": 0.4570149253731343, + "grad_norm": 0.8303352738239019, + "learning_rate": 2.967408448452838e-06, + "loss": 0.3396, + "step": 1531 + }, + { + "epoch": 0.4573134328358209, + "grad_norm": 0.8521303530894394, + "learning_rate": 2.9650335022558202e-06, + "loss": 0.3593, + "step": 1532 + }, + { + "epoch": 0.45761194029850744, + "grad_norm": 0.7973923282393381, + "learning_rate": 2.962658121263812e-06, + "loss": 0.3826, + "step": 1533 + }, + { + "epoch": 0.45791044776119405, + "grad_norm": 0.8307181709707767, + "learning_rate": 2.9602823076977376e-06, + "loss": 0.385, + "step": 1534 + }, + { + "epoch": 0.4582089552238806, + "grad_norm": 0.7681378144807851, + "learning_rate": 2.9579060637789257e-06, + "loss": 0.3345, + "step": 1535 + }, + { + "epoch": 0.45850746268656717, + "grad_norm": 0.8862805383356972, + "learning_rate": 2.955529391729105e-06, + "loss": 0.3342, + "step": 1536 + }, + { + "epoch": 0.45880597014925373, + "grad_norm": 0.7914026956159501, + "learning_rate": 2.9531522937704065e-06, + "loss": 0.3161, + "step": 1537 + }, + { + "epoch": 0.4591044776119403, + "grad_norm": 0.815340219137627, + "learning_rate": 2.9507747721253598e-06, + "loss": 0.3161, + "step": 1538 + }, + { + "epoch": 0.45940298507462685, + "grad_norm": 0.8486280883954298, + "learning_rate": 2.948396829016888e-06, + "loss": 0.3425, + "step": 1539 + }, + { + "epoch": 0.4597014925373134, + "grad_norm": 0.8507231569423965, + "learning_rate": 2.9460184666683112e-06, + "loss": 0.3601, + "step": 1540 + }, + { + "epoch": 0.46, + "grad_norm": 0.8316847618482142, + "learning_rate": 2.9436396873033396e-06, + "loss": 0.3566, + "step": 1541 + }, + { + "epoch": 0.4602985074626866, + "grad_norm": 0.7838154911787112, + "learning_rate": 2.941260493146074e-06, + "loss": 0.3075, + "step": 1542 + }, + { + "epoch": 0.46059701492537314, + "grad_norm": 0.8919729652488453, + "learning_rate": 2.938880886421004e-06, + "loss": 0.3628, + "step": 1543 + }, + { + "epoch": 0.4608955223880597, + "grad_norm": 0.8463176490111552, + "learning_rate": 2.9365008693530017e-06, + "loss": 0.3916, + "step": 1544 + }, + { + "epoch": 0.46119402985074626, + "grad_norm": 0.8415000303394304, + "learning_rate": 2.9341204441673267e-06, + "loss": 0.3378, + "step": 1545 + }, + { + "epoch": 0.4614925373134328, + "grad_norm": 0.9092886626244101, + "learning_rate": 2.931739613089618e-06, + "loss": 0.3549, + "step": 1546 + }, + { + "epoch": 0.46179104477611943, + "grad_norm": 0.8845564844751359, + "learning_rate": 2.929358378345894e-06, + "loss": 0.3646, + "step": 1547 + }, + { + "epoch": 0.462089552238806, + "grad_norm": 0.7119788198918313, + "learning_rate": 2.9269767421625535e-06, + "loss": 0.2987, + "step": 1548 + }, + { + "epoch": 0.46238805970149255, + "grad_norm": 0.812817673515338, + "learning_rate": 2.9245947067663653e-06, + "loss": 0.3138, + "step": 1549 + }, + { + "epoch": 0.4626865671641791, + "grad_norm": 0.8782182840009076, + "learning_rate": 2.922212274384476e-06, + "loss": 0.3709, + "step": 1550 + }, + { + "epoch": 0.46298507462686567, + "grad_norm": 0.7972696289980897, + "learning_rate": 2.9198294472444022e-06, + "loss": 0.3182, + "step": 1551 + }, + { + "epoch": 0.4632835820895522, + "grad_norm": 0.8368020184190745, + "learning_rate": 2.9174462275740286e-06, + "loss": 0.4036, + "step": 1552 + }, + { + "epoch": 0.4635820895522388, + "grad_norm": 0.8084845969111005, + "learning_rate": 2.9150626176016065e-06, + "loss": 0.3457, + "step": 1553 + }, + { + "epoch": 0.4638805970149254, + "grad_norm": 0.9420253305451717, + "learning_rate": 2.9126786195557554e-06, + "loss": 0.3444, + "step": 1554 + }, + { + "epoch": 0.46417910447761196, + "grad_norm": 0.8848388474337436, + "learning_rate": 2.910294235665453e-06, + "loss": 0.3494, + "step": 1555 + }, + { + "epoch": 0.4644776119402985, + "grad_norm": 0.8223510020899589, + "learning_rate": 2.9079094681600416e-06, + "loss": 0.3289, + "step": 1556 + }, + { + "epoch": 0.4647761194029851, + "grad_norm": 0.7905450158929105, + "learning_rate": 2.9055243192692207e-06, + "loss": 0.3338, + "step": 1557 + }, + { + "epoch": 0.46507462686567164, + "grad_norm": 0.8490757892161332, + "learning_rate": 2.9031387912230454e-06, + "loss": 0.378, + "step": 1558 + }, + { + "epoch": 0.4653731343283582, + "grad_norm": 0.7906965441882455, + "learning_rate": 2.900752886251927e-06, + "loss": 0.3149, + "step": 1559 + }, + { + "epoch": 0.46567164179104475, + "grad_norm": 0.7577279466640751, + "learning_rate": 2.898366606586628e-06, + "loss": 0.2942, + "step": 1560 + }, + { + "epoch": 0.46597014925373137, + "grad_norm": 0.8589842568909583, + "learning_rate": 2.895979954458263e-06, + "loss": 0.366, + "step": 1561 + }, + { + "epoch": 0.46626865671641793, + "grad_norm": 0.756467710812116, + "learning_rate": 2.893592932098292e-06, + "loss": 0.3039, + "step": 1562 + }, + { + "epoch": 0.4665671641791045, + "grad_norm": 0.749749204455234, + "learning_rate": 2.891205541738523e-06, + "loss": 0.3461, + "step": 1563 + }, + { + "epoch": 0.46686567164179105, + "grad_norm": 0.7995654210279144, + "learning_rate": 2.8888177856111082e-06, + "loss": 0.2976, + "step": 1564 + }, + { + "epoch": 0.4671641791044776, + "grad_norm": 0.9693221870057586, + "learning_rate": 2.8864296659485413e-06, + "loss": 0.3557, + "step": 1565 + }, + { + "epoch": 0.46746268656716417, + "grad_norm": 0.7943690346767394, + "learning_rate": 2.8840411849836565e-06, + "loss": 0.3598, + "step": 1566 + }, + { + "epoch": 0.4677611940298507, + "grad_norm": 0.8435450993312467, + "learning_rate": 2.881652344949625e-06, + "loss": 0.4217, + "step": 1567 + }, + { + "epoch": 0.46805970149253734, + "grad_norm": 0.7333154530834609, + "learning_rate": 2.8792631480799526e-06, + "loss": 0.2883, + "step": 1568 + }, + { + "epoch": 0.4683582089552239, + "grad_norm": 0.7562359363934774, + "learning_rate": 2.8768735966084817e-06, + "loss": 0.3055, + "step": 1569 + }, + { + "epoch": 0.46865671641791046, + "grad_norm": 0.7398426343466973, + "learning_rate": 2.874483692769385e-06, + "loss": 0.3478, + "step": 1570 + }, + { + "epoch": 0.468955223880597, + "grad_norm": 0.8230158538149063, + "learning_rate": 2.8720934387971627e-06, + "loss": 0.3278, + "step": 1571 + }, + { + "epoch": 0.4692537313432836, + "grad_norm": 1.3545109216720357, + "learning_rate": 2.869702836926645e-06, + "loss": 0.2997, + "step": 1572 + }, + { + "epoch": 0.46955223880597013, + "grad_norm": 0.926104402556891, + "learning_rate": 2.8673118893929876e-06, + "loss": 0.3141, + "step": 1573 + }, + { + "epoch": 0.4698507462686567, + "grad_norm": 0.7738598668200026, + "learning_rate": 2.864920598431665e-06, + "loss": 0.3212, + "step": 1574 + }, + { + "epoch": 0.4701492537313433, + "grad_norm": 0.7844426674437163, + "learning_rate": 2.862528966278479e-06, + "loss": 0.3281, + "step": 1575 + }, + { + "epoch": 0.47044776119402987, + "grad_norm": 0.8678599800587856, + "learning_rate": 2.8601369951695463e-06, + "loss": 0.3721, + "step": 1576 + }, + { + "epoch": 0.4707462686567164, + "grad_norm": 0.863778592486986, + "learning_rate": 2.8577446873413007e-06, + "loss": 0.3629, + "step": 1577 + }, + { + "epoch": 0.471044776119403, + "grad_norm": 0.9033383927815183, + "learning_rate": 2.855352045030493e-06, + "loss": 0.3508, + "step": 1578 + }, + { + "epoch": 0.47134328358208955, + "grad_norm": 0.812991807684829, + "learning_rate": 2.8529590704741843e-06, + "loss": 0.384, + "step": 1579 + }, + { + "epoch": 0.4716417910447761, + "grad_norm": 0.8187956043141631, + "learning_rate": 2.8505657659097486e-06, + "loss": 0.3707, + "step": 1580 + }, + { + "epoch": 0.47194029850746266, + "grad_norm": 0.9346578308192153, + "learning_rate": 2.8481721335748674e-06, + "loss": 0.4094, + "step": 1581 + }, + { + "epoch": 0.4722388059701493, + "grad_norm": 0.7880484291645268, + "learning_rate": 2.845778175707527e-06, + "loss": 0.3299, + "step": 1582 + }, + { + "epoch": 0.47253731343283584, + "grad_norm": 0.811099440312082, + "learning_rate": 2.8433838945460207e-06, + "loss": 0.33, + "step": 1583 + }, + { + "epoch": 0.4728358208955224, + "grad_norm": 0.7026137359784628, + "learning_rate": 2.8409892923289432e-06, + "loss": 0.3127, + "step": 1584 + }, + { + "epoch": 0.47313432835820896, + "grad_norm": 0.8034762357536271, + "learning_rate": 2.838594371295189e-06, + "loss": 0.3265, + "step": 1585 + }, + { + "epoch": 0.4734328358208955, + "grad_norm": 0.8485262509519692, + "learning_rate": 2.8361991336839513e-06, + "loss": 0.381, + "step": 1586 + }, + { + "epoch": 0.4737313432835821, + "grad_norm": 0.9427056505224326, + "learning_rate": 2.833803581734718e-06, + "loss": 0.4068, + "step": 1587 + }, + { + "epoch": 0.47402985074626863, + "grad_norm": 0.8021996861465295, + "learning_rate": 2.8314077176872724e-06, + "loss": 0.335, + "step": 1588 + }, + { + "epoch": 0.47432835820895525, + "grad_norm": 0.7919679011390528, + "learning_rate": 2.8290115437816894e-06, + "loss": 0.3561, + "step": 1589 + }, + { + "epoch": 0.4746268656716418, + "grad_norm": 0.7810376577747256, + "learning_rate": 2.8266150622583315e-06, + "loss": 0.3501, + "step": 1590 + }, + { + "epoch": 0.47492537313432837, + "grad_norm": 0.9272988287616046, + "learning_rate": 2.8242182753578523e-06, + "loss": 0.3466, + "step": 1591 + }, + { + "epoch": 0.4752238805970149, + "grad_norm": 0.8467896274245239, + "learning_rate": 2.8218211853211893e-06, + "loss": 0.357, + "step": 1592 + }, + { + "epoch": 0.4755223880597015, + "grad_norm": 0.9156541553176574, + "learning_rate": 2.819423794389561e-06, + "loss": 0.3529, + "step": 1593 + }, + { + "epoch": 0.47582089552238804, + "grad_norm": 0.7927076329626589, + "learning_rate": 2.817026104804471e-06, + "loss": 0.3155, + "step": 1594 + }, + { + "epoch": 0.4761194029850746, + "grad_norm": 0.767743906513666, + "learning_rate": 2.8146281188077017e-06, + "loss": 0.3155, + "step": 1595 + }, + { + "epoch": 0.4764179104477612, + "grad_norm": 0.8361708178822149, + "learning_rate": 2.8122298386413094e-06, + "loss": 0.361, + "step": 1596 + }, + { + "epoch": 0.4767164179104478, + "grad_norm": 0.8455590406670277, + "learning_rate": 2.8098312665476283e-06, + "loss": 0.3462, + "step": 1597 + }, + { + "epoch": 0.47701492537313434, + "grad_norm": 0.8438149986929196, + "learning_rate": 2.8074324047692662e-06, + "loss": 0.3597, + "step": 1598 + }, + { + "epoch": 0.4773134328358209, + "grad_norm": 0.8286441056776019, + "learning_rate": 2.8050332555490987e-06, + "loss": 0.3648, + "step": 1599 + }, + { + "epoch": 0.47761194029850745, + "grad_norm": 0.7719571836786463, + "learning_rate": 2.8026338211302735e-06, + "loss": 0.3333, + "step": 1600 + }, + { + "epoch": 0.477910447761194, + "grad_norm": 0.8233026839737, + "learning_rate": 2.800234103756201e-06, + "loss": 0.3337, + "step": 1601 + }, + { + "epoch": 0.47820895522388057, + "grad_norm": 0.7688811413487633, + "learning_rate": 2.7978341056705592e-06, + "loss": 0.3075, + "step": 1602 + }, + { + "epoch": 0.4785074626865672, + "grad_norm": 0.9247126459680931, + "learning_rate": 2.7954338291172892e-06, + "loss": 0.3842, + "step": 1603 + }, + { + "epoch": 0.47880597014925375, + "grad_norm": 0.8669810824469362, + "learning_rate": 2.79303327634059e-06, + "loss": 0.3619, + "step": 1604 + }, + { + "epoch": 0.4791044776119403, + "grad_norm": 0.824675144757377, + "learning_rate": 2.7906324495849206e-06, + "loss": 0.3491, + "step": 1605 + }, + { + "epoch": 0.47940298507462686, + "grad_norm": 0.7606474249944587, + "learning_rate": 2.788231351094995e-06, + "loss": 0.3409, + "step": 1606 + }, + { + "epoch": 0.4797014925373134, + "grad_norm": 0.8588453625011868, + "learning_rate": 2.785829983115781e-06, + "loss": 0.3826, + "step": 1607 + }, + { + "epoch": 0.48, + "grad_norm": 0.8069935126683544, + "learning_rate": 2.7834283478925007e-06, + "loss": 0.3424, + "step": 1608 + }, + { + "epoch": 0.48029850746268654, + "grad_norm": 0.8572397041684174, + "learning_rate": 2.7810264476706227e-06, + "loss": 0.3258, + "step": 1609 + }, + { + "epoch": 0.48059701492537316, + "grad_norm": 0.850023545997115, + "learning_rate": 2.778624284695867e-06, + "loss": 0.3235, + "step": 1610 + }, + { + "epoch": 0.4808955223880597, + "grad_norm": 0.8475591867049286, + "learning_rate": 2.7762218612141966e-06, + "loss": 0.3337, + "step": 1611 + }, + { + "epoch": 0.4811940298507463, + "grad_norm": 0.8536920148847397, + "learning_rate": 2.7738191794718183e-06, + "loss": 0.307, + "step": 1612 + }, + { + "epoch": 0.48149253731343283, + "grad_norm": 0.8281648081371542, + "learning_rate": 2.771416241715182e-06, + "loss": 0.3839, + "step": 1613 + }, + { + "epoch": 0.4817910447761194, + "grad_norm": 0.7677447755495962, + "learning_rate": 2.7690130501909756e-06, + "loss": 0.3291, + "step": 1614 + }, + { + "epoch": 0.48208955223880595, + "grad_norm": 3.3342305797562215, + "learning_rate": 2.766609607146124e-06, + "loss": 0.3939, + "step": 1615 + }, + { + "epoch": 0.4823880597014925, + "grad_norm": 0.7437237275043507, + "learning_rate": 2.7642059148277894e-06, + "loss": 0.3558, + "step": 1616 + }, + { + "epoch": 0.4826865671641791, + "grad_norm": 0.8563459859815404, + "learning_rate": 2.761801975483363e-06, + "loss": 0.3396, + "step": 1617 + }, + { + "epoch": 0.4829850746268657, + "grad_norm": 0.8531096858219094, + "learning_rate": 2.7593977913604717e-06, + "loss": 0.3741, + "step": 1618 + }, + { + "epoch": 0.48328358208955224, + "grad_norm": 0.8588995102283145, + "learning_rate": 2.7569933647069685e-06, + "loss": 0.3189, + "step": 1619 + }, + { + "epoch": 0.4835820895522388, + "grad_norm": 0.8321789763467867, + "learning_rate": 2.754588697770933e-06, + "loss": 0.3456, + "step": 1620 + }, + { + "epoch": 0.48388059701492536, + "grad_norm": 0.8853198602133011, + "learning_rate": 2.752183792800671e-06, + "loss": 0.3932, + "step": 1621 + }, + { + "epoch": 0.4841791044776119, + "grad_norm": 0.8434334328362673, + "learning_rate": 2.7497786520447093e-06, + "loss": 0.4004, + "step": 1622 + }, + { + "epoch": 0.4844776119402985, + "grad_norm": 0.8669235672763962, + "learning_rate": 2.7473732777517965e-06, + "loss": 0.3448, + "step": 1623 + }, + { + "epoch": 0.4847761194029851, + "grad_norm": 0.8146513938626443, + "learning_rate": 2.7449676721708995e-06, + "loss": 0.3658, + "step": 1624 + }, + { + "epoch": 0.48507462686567165, + "grad_norm": 0.7682677410362674, + "learning_rate": 2.7425618375511992e-06, + "loss": 0.3429, + "step": 1625 + }, + { + "epoch": 0.4853731343283582, + "grad_norm": 0.8659290979909456, + "learning_rate": 2.7401557761420933e-06, + "loss": 0.3989, + "step": 1626 + }, + { + "epoch": 0.4856716417910448, + "grad_norm": 0.9005007791678644, + "learning_rate": 2.737749490193191e-06, + "loss": 0.4066, + "step": 1627 + }, + { + "epoch": 0.48597014925373133, + "grad_norm": 0.8269529526190118, + "learning_rate": 2.7353429819543104e-06, + "loss": 0.3104, + "step": 1628 + }, + { + "epoch": 0.4862686567164179, + "grad_norm": 0.7655526786732026, + "learning_rate": 2.7329362536754777e-06, + "loss": 0.3489, + "step": 1629 + }, + { + "epoch": 0.48656716417910445, + "grad_norm": 0.8373568352027102, + "learning_rate": 2.7305293076069263e-06, + "loss": 0.3264, + "step": 1630 + }, + { + "epoch": 0.48686567164179106, + "grad_norm": 0.7899465343213389, + "learning_rate": 2.728122145999091e-06, + "loss": 0.3057, + "step": 1631 + }, + { + "epoch": 0.4871641791044776, + "grad_norm": 2.475275899634775, + "learning_rate": 2.72571477110261e-06, + "loss": 0.3467, + "step": 1632 + }, + { + "epoch": 0.4874626865671642, + "grad_norm": 0.8557171237524993, + "learning_rate": 2.72330718516832e-06, + "loss": 0.3587, + "step": 1633 + }, + { + "epoch": 0.48776119402985074, + "grad_norm": 0.7280642925743758, + "learning_rate": 2.7208993904472543e-06, + "loss": 0.3306, + "step": 1634 + }, + { + "epoch": 0.4880597014925373, + "grad_norm": 0.7753145285378411, + "learning_rate": 2.7184913891906433e-06, + "loss": 0.3074, + "step": 1635 + }, + { + "epoch": 0.48835820895522386, + "grad_norm": 0.7395330717244848, + "learning_rate": 2.716083183649909e-06, + "loss": 0.3426, + "step": 1636 + }, + { + "epoch": 0.4886567164179105, + "grad_norm": 0.8002310765854909, + "learning_rate": 2.7136747760766653e-06, + "loss": 0.3534, + "step": 1637 + }, + { + "epoch": 0.48895522388059703, + "grad_norm": 0.8324334035465984, + "learning_rate": 2.7112661687227142e-06, + "loss": 0.3355, + "step": 1638 + }, + { + "epoch": 0.4892537313432836, + "grad_norm": 0.7743794963426636, + "learning_rate": 2.708857363840045e-06, + "loss": 0.3329, + "step": 1639 + }, + { + "epoch": 0.48955223880597015, + "grad_norm": 0.8228579838838092, + "learning_rate": 2.7064483636808314e-06, + "loss": 0.3755, + "step": 1640 + }, + { + "epoch": 0.4898507462686567, + "grad_norm": 0.7759076884150069, + "learning_rate": 2.7040391704974293e-06, + "loss": 0.3534, + "step": 1641 + }, + { + "epoch": 0.49014925373134327, + "grad_norm": 0.8250088563687552, + "learning_rate": 2.7016297865423767e-06, + "loss": 0.3562, + "step": 1642 + }, + { + "epoch": 0.49044776119402983, + "grad_norm": 0.8627019755137874, + "learning_rate": 2.699220214068389e-06, + "loss": 0.292, + "step": 1643 + }, + { + "epoch": 0.49074626865671644, + "grad_norm": 0.8814707303291824, + "learning_rate": 2.696810455328357e-06, + "loss": 0.3616, + "step": 1644 + }, + { + "epoch": 0.491044776119403, + "grad_norm": 0.8932767174110039, + "learning_rate": 2.694400512575346e-06, + "loss": 0.3596, + "step": 1645 + }, + { + "epoch": 0.49134328358208956, + "grad_norm": 0.8118919974953459, + "learning_rate": 2.6919903880625954e-06, + "loss": 0.3343, + "step": 1646 + }, + { + "epoch": 0.4916417910447761, + "grad_norm": 0.8648850492388545, + "learning_rate": 2.6895800840435106e-06, + "loss": 0.335, + "step": 1647 + }, + { + "epoch": 0.4919402985074627, + "grad_norm": 0.8549039551168018, + "learning_rate": 2.687169602771668e-06, + "loss": 0.3723, + "step": 1648 + }, + { + "epoch": 0.49223880597014924, + "grad_norm": 0.7944249187287936, + "learning_rate": 2.68475894650081e-06, + "loss": 0.3085, + "step": 1649 + }, + { + "epoch": 0.4925373134328358, + "grad_norm": 0.7927257364542748, + "learning_rate": 2.6823481174848405e-06, + "loss": 0.3133, + "step": 1650 + }, + { + "epoch": 0.4928358208955224, + "grad_norm": 0.7886213022780711, + "learning_rate": 2.679937117977825e-06, + "loss": 0.3822, + "step": 1651 + }, + { + "epoch": 0.493134328358209, + "grad_norm": 0.8280071240535853, + "learning_rate": 2.6775259502339913e-06, + "loss": 0.3573, + "step": 1652 + }, + { + "epoch": 0.49343283582089553, + "grad_norm": 0.8418482347983869, + "learning_rate": 2.67511461650772e-06, + "loss": 0.3835, + "step": 1653 + }, + { + "epoch": 0.4937313432835821, + "grad_norm": 0.8387035834884874, + "learning_rate": 2.672703119053552e-06, + "loss": 0.3586, + "step": 1654 + }, + { + "epoch": 0.49402985074626865, + "grad_norm": 0.7816419105725156, + "learning_rate": 2.670291460126177e-06, + "loss": 0.3339, + "step": 1655 + }, + { + "epoch": 0.4943283582089552, + "grad_norm": 0.8001221218291323, + "learning_rate": 2.667879641980437e-06, + "loss": 0.3204, + "step": 1656 + }, + { + "epoch": 0.49462686567164177, + "grad_norm": 0.7879284043181888, + "learning_rate": 2.6654676668713245e-06, + "loss": 0.3224, + "step": 1657 + }, + { + "epoch": 0.4949253731343284, + "grad_norm": 0.801234203959891, + "learning_rate": 2.6630555370539763e-06, + "loss": 0.3327, + "step": 1658 + }, + { + "epoch": 0.49522388059701494, + "grad_norm": 0.7997197712338734, + "learning_rate": 2.6606432547836757e-06, + "loss": 0.3375, + "step": 1659 + }, + { + "epoch": 0.4955223880597015, + "grad_norm": 0.7623217742754592, + "learning_rate": 2.658230822315847e-06, + "loss": 0.3187, + "step": 1660 + }, + { + "epoch": 0.49582089552238806, + "grad_norm": 0.7962363516446156, + "learning_rate": 2.655818241906057e-06, + "loss": 0.3493, + "step": 1661 + }, + { + "epoch": 0.4961194029850746, + "grad_norm": 0.9034198102424835, + "learning_rate": 2.653405515810009e-06, + "loss": 0.3753, + "step": 1662 + }, + { + "epoch": 0.4964179104477612, + "grad_norm": 0.7925807514012592, + "learning_rate": 2.650992646283542e-06, + "loss": 0.3077, + "step": 1663 + }, + { + "epoch": 0.49671641791044774, + "grad_norm": 0.7247430409943646, + "learning_rate": 2.648579635582632e-06, + "loss": 0.3098, + "step": 1664 + }, + { + "epoch": 0.49701492537313435, + "grad_norm": 0.6769452012660345, + "learning_rate": 2.6461664859633844e-06, + "loss": 0.2895, + "step": 1665 + }, + { + "epoch": 0.4973134328358209, + "grad_norm": 0.806049068867384, + "learning_rate": 2.6437531996820353e-06, + "loss": 0.3263, + "step": 1666 + }, + { + "epoch": 0.49761194029850747, + "grad_norm": 0.8550960252280426, + "learning_rate": 2.641339778994948e-06, + "loss": 0.37, + "step": 1667 + }, + { + "epoch": 0.49791044776119403, + "grad_norm": 0.8478170482025786, + "learning_rate": 2.6389262261586127e-06, + "loss": 0.3892, + "step": 1668 + }, + { + "epoch": 0.4982089552238806, + "grad_norm": 0.8345120172992568, + "learning_rate": 2.636512543429642e-06, + "loss": 0.348, + "step": 1669 + }, + { + "epoch": 0.49850746268656715, + "grad_norm": 0.8028565137146465, + "learning_rate": 2.634098733064771e-06, + "loss": 0.3574, + "step": 1670 + }, + { + "epoch": 0.4988059701492537, + "grad_norm": 0.7451403703442233, + "learning_rate": 2.6316847973208535e-06, + "loss": 0.2899, + "step": 1671 + }, + { + "epoch": 0.4991044776119403, + "grad_norm": 0.8352856655412605, + "learning_rate": 2.6292707384548604e-06, + "loss": 0.3597, + "step": 1672 + }, + { + "epoch": 0.4994029850746269, + "grad_norm": 0.8545513078252729, + "learning_rate": 2.6268565587238777e-06, + "loss": 0.3325, + "step": 1673 + }, + { + "epoch": 0.49970149253731344, + "grad_norm": 0.8282524133497315, + "learning_rate": 2.6244422603851046e-06, + "loss": 0.3492, + "step": 1674 + }, + { + "epoch": 0.5, + "grad_norm": 0.7991480826878556, + "learning_rate": 2.622027845695851e-06, + "loss": 0.3506, + "step": 1675 + }, + { + "epoch": 0.5002985074626866, + "grad_norm": 0.7582015801891304, + "learning_rate": 2.6196133169135368e-06, + "loss": 0.3293, + "step": 1676 + }, + { + "epoch": 0.5005970149253731, + "grad_norm": 0.9612014821814638, + "learning_rate": 2.6171986762956856e-06, + "loss": 0.3996, + "step": 1677 + }, + { + "epoch": 0.5008955223880597, + "grad_norm": 0.7898075941393438, + "learning_rate": 2.61478392609993e-06, + "loss": 0.3312, + "step": 1678 + }, + { + "epoch": 0.5011940298507462, + "grad_norm": 0.8015043125699043, + "learning_rate": 2.612369068584001e-06, + "loss": 0.3096, + "step": 1679 + }, + { + "epoch": 0.5014925373134328, + "grad_norm": 0.8389408382259228, + "learning_rate": 2.6099541060057316e-06, + "loss": 0.3338, + "step": 1680 + }, + { + "epoch": 0.5017910447761194, + "grad_norm": 0.8096612317183629, + "learning_rate": 2.607539040623054e-06, + "loss": 0.3828, + "step": 1681 + }, + { + "epoch": 0.502089552238806, + "grad_norm": 0.8259878912971927, + "learning_rate": 2.6051238746939934e-06, + "loss": 0.3031, + "step": 1682 + }, + { + "epoch": 0.5023880597014926, + "grad_norm": 0.8631597448564671, + "learning_rate": 2.602708610476673e-06, + "loss": 0.3664, + "step": 1683 + }, + { + "epoch": 0.5026865671641791, + "grad_norm": 0.7716339492340298, + "learning_rate": 2.600293250229306e-06, + "loss": 0.338, + "step": 1684 + }, + { + "epoch": 0.5029850746268657, + "grad_norm": 0.899796414901459, + "learning_rate": 2.597877796210194e-06, + "loss": 0.3815, + "step": 1685 + }, + { + "epoch": 0.5032835820895523, + "grad_norm": 0.9820931245782578, + "learning_rate": 2.5954622506777285e-06, + "loss": 0.3691, + "step": 1686 + }, + { + "epoch": 0.5035820895522388, + "grad_norm": 0.7494455392178265, + "learning_rate": 2.5930466158903856e-06, + "loss": 0.3215, + "step": 1687 + }, + { + "epoch": 0.5038805970149254, + "grad_norm": 0.8086992100975138, + "learning_rate": 2.5906308941067243e-06, + "loss": 0.3297, + "step": 1688 + }, + { + "epoch": 0.5041791044776119, + "grad_norm": 0.9419257958465664, + "learning_rate": 2.588215087585387e-06, + "loss": 0.3312, + "step": 1689 + }, + { + "epoch": 0.5044776119402985, + "grad_norm": 0.7786799854356972, + "learning_rate": 2.5857991985850924e-06, + "loss": 0.3236, + "step": 1690 + }, + { + "epoch": 0.5047761194029851, + "grad_norm": 0.8680079607603017, + "learning_rate": 2.583383229364639e-06, + "loss": 0.3484, + "step": 1691 + }, + { + "epoch": 0.5050746268656716, + "grad_norm": 0.867654415324763, + "learning_rate": 2.580967182182898e-06, + "loss": 0.3494, + "step": 1692 + }, + { + "epoch": 0.5053731343283582, + "grad_norm": 0.7824582583766272, + "learning_rate": 2.5785510592988156e-06, + "loss": 0.3345, + "step": 1693 + }, + { + "epoch": 0.5056716417910447, + "grad_norm": 0.9778668777323667, + "learning_rate": 2.5761348629714073e-06, + "loss": 0.3447, + "step": 1694 + }, + { + "epoch": 0.5059701492537313, + "grad_norm": 0.879824672451696, + "learning_rate": 2.5737185954597583e-06, + "loss": 0.3486, + "step": 1695 + }, + { + "epoch": 0.506268656716418, + "grad_norm": 0.7272504403686626, + "learning_rate": 2.571302259023019e-06, + "loss": 0.3284, + "step": 1696 + }, + { + "epoch": 0.5065671641791045, + "grad_norm": 0.8269875685389965, + "learning_rate": 2.5688858559204056e-06, + "loss": 0.3189, + "step": 1697 + }, + { + "epoch": 0.5068656716417911, + "grad_norm": 0.7107142314587015, + "learning_rate": 2.5664693884111958e-06, + "loss": 0.3069, + "step": 1698 + }, + { + "epoch": 0.5071641791044776, + "grad_norm": 0.8300310961227638, + "learning_rate": 2.564052858754728e-06, + "loss": 0.2923, + "step": 1699 + }, + { + "epoch": 0.5074626865671642, + "grad_norm": 0.8721700820774255, + "learning_rate": 2.561636269210399e-06, + "loss": 0.3377, + "step": 1700 + }, + { + "epoch": 0.5077611940298508, + "grad_norm": 0.7550427576298773, + "learning_rate": 2.55921962203766e-06, + "loss": 0.3222, + "step": 1701 + }, + { + "epoch": 0.5080597014925373, + "grad_norm": 0.7968625493472227, + "learning_rate": 2.5568029194960186e-06, + "loss": 0.3291, + "step": 1702 + }, + { + "epoch": 0.5083582089552239, + "grad_norm": 0.7511883213331606, + "learning_rate": 2.554386163845032e-06, + "loss": 0.3107, + "step": 1703 + }, + { + "epoch": 0.5086567164179104, + "grad_norm": 0.752772967275015, + "learning_rate": 2.551969357344308e-06, + "loss": 0.3579, + "step": 1704 + }, + { + "epoch": 0.508955223880597, + "grad_norm": 0.7805447167279405, + "learning_rate": 2.5495525022535013e-06, + "loss": 0.3091, + "step": 1705 + }, + { + "epoch": 0.5092537313432836, + "grad_norm": 0.7465985414472608, + "learning_rate": 2.547135600832313e-06, + "loss": 0.3116, + "step": 1706 + }, + { + "epoch": 0.5095522388059701, + "grad_norm": 0.7452102417332194, + "learning_rate": 2.544718655340486e-06, + "loss": 0.3141, + "step": 1707 + }, + { + "epoch": 0.5098507462686567, + "grad_norm": 0.8594711328205182, + "learning_rate": 2.5423016680378076e-06, + "loss": 0.3254, + "step": 1708 + }, + { + "epoch": 0.5101492537313432, + "grad_norm": 0.8724045713981662, + "learning_rate": 2.5398846411840998e-06, + "loss": 0.3306, + "step": 1709 + }, + { + "epoch": 0.5104477611940299, + "grad_norm": 0.8250540195559437, + "learning_rate": 2.5374675770392247e-06, + "loss": 0.2953, + "step": 1710 + }, + { + "epoch": 0.5107462686567165, + "grad_norm": 0.7579796546729729, + "learning_rate": 2.5350504778630795e-06, + "loss": 0.3514, + "step": 1711 + }, + { + "epoch": 0.511044776119403, + "grad_norm": 0.7968045617982358, + "learning_rate": 2.5326333459155904e-06, + "loss": 0.3443, + "step": 1712 + }, + { + "epoch": 0.5113432835820896, + "grad_norm": 0.7725314533480072, + "learning_rate": 2.530216183456719e-06, + "loss": 0.3345, + "step": 1713 + }, + { + "epoch": 0.5116417910447761, + "grad_norm": 0.8181569525639177, + "learning_rate": 2.527798992746453e-06, + "loss": 0.3061, + "step": 1714 + }, + { + "epoch": 0.5119402985074627, + "grad_norm": 0.8292279194686958, + "learning_rate": 2.525381776044806e-06, + "loss": 0.3494, + "step": 1715 + }, + { + "epoch": 0.5122388059701493, + "grad_norm": 0.9155665246514125, + "learning_rate": 2.5229645356118166e-06, + "loss": 0.3805, + "step": 1716 + }, + { + "epoch": 0.5125373134328358, + "grad_norm": 0.8426243739571815, + "learning_rate": 2.520547273707546e-06, + "loss": 0.3978, + "step": 1717 + }, + { + "epoch": 0.5128358208955224, + "grad_norm": 0.7578253079200755, + "learning_rate": 2.5181299925920756e-06, + "loss": 0.3387, + "step": 1718 + }, + { + "epoch": 0.5131343283582089, + "grad_norm": 0.7152262315331874, + "learning_rate": 2.5157126945255043e-06, + "loss": 0.2707, + "step": 1719 + }, + { + "epoch": 0.5134328358208955, + "grad_norm": 0.7803958098892375, + "learning_rate": 2.5132953817679466e-06, + "loss": 0.3587, + "step": 1720 + }, + { + "epoch": 0.513731343283582, + "grad_norm": 0.7380296998637139, + "learning_rate": 2.510878056579531e-06, + "loss": 0.3272, + "step": 1721 + }, + { + "epoch": 0.5140298507462686, + "grad_norm": 0.8282169867776825, + "learning_rate": 2.5084607212203983e-06, + "loss": 0.3618, + "step": 1722 + }, + { + "epoch": 0.5143283582089552, + "grad_norm": 0.8758117401270619, + "learning_rate": 2.5060433779506966e-06, + "loss": 0.3096, + "step": 1723 + }, + { + "epoch": 0.5146268656716418, + "grad_norm": 0.9027136861131375, + "learning_rate": 2.5036260290305837e-06, + "loss": 0.3365, + "step": 1724 + }, + { + "epoch": 0.5149253731343284, + "grad_norm": 0.8734789095232062, + "learning_rate": 2.501208676720223e-06, + "loss": 0.3564, + "step": 1725 + }, + { + "epoch": 0.515223880597015, + "grad_norm": 0.9223885876305432, + "learning_rate": 2.498791323279778e-06, + "loss": 0.361, + "step": 1726 + }, + { + "epoch": 0.5155223880597015, + "grad_norm": 0.7412513939737528, + "learning_rate": 2.496373970969417e-06, + "loss": 0.2946, + "step": 1727 + }, + { + "epoch": 0.5158208955223881, + "grad_norm": 0.8125704355183377, + "learning_rate": 2.4939566220493043e-06, + "loss": 0.3597, + "step": 1728 + }, + { + "epoch": 0.5161194029850746, + "grad_norm": 0.875058571982458, + "learning_rate": 2.491539278779603e-06, + "loss": 0.3321, + "step": 1729 + }, + { + "epoch": 0.5164179104477612, + "grad_norm": 1.4129333220086666, + "learning_rate": 2.48912194342047e-06, + "loss": 0.3021, + "step": 1730 + }, + { + "epoch": 0.5167164179104478, + "grad_norm": 0.822837577529842, + "learning_rate": 2.486704618232054e-06, + "loss": 0.2875, + "step": 1731 + }, + { + "epoch": 0.5170149253731343, + "grad_norm": 0.8508719610068957, + "learning_rate": 2.484287305474496e-06, + "loss": 0.332, + "step": 1732 + }, + { + "epoch": 0.5173134328358209, + "grad_norm": 0.8371266747144144, + "learning_rate": 2.481870007407925e-06, + "loss": 0.3505, + "step": 1733 + }, + { + "epoch": 0.5176119402985074, + "grad_norm": 0.7674347672744616, + "learning_rate": 2.4794527262924546e-06, + "loss": 0.3378, + "step": 1734 + }, + { + "epoch": 0.517910447761194, + "grad_norm": 0.9297458178829942, + "learning_rate": 2.4770354643881843e-06, + "loss": 0.3841, + "step": 1735 + }, + { + "epoch": 0.5182089552238806, + "grad_norm": 0.9461877714567667, + "learning_rate": 2.474618223955196e-06, + "loss": 0.3962, + "step": 1736 + }, + { + "epoch": 0.5185074626865671, + "grad_norm": 0.9240923700130257, + "learning_rate": 2.4722010072535485e-06, + "loss": 0.3352, + "step": 1737 + }, + { + "epoch": 0.5188059701492538, + "grad_norm": 0.7914811230349772, + "learning_rate": 2.4697838165432816e-06, + "loss": 0.351, + "step": 1738 + }, + { + "epoch": 0.5191044776119403, + "grad_norm": 0.9433458310818481, + "learning_rate": 2.4673666540844105e-06, + "loss": 0.3725, + "step": 1739 + }, + { + "epoch": 0.5194029850746269, + "grad_norm": 0.8700776482081186, + "learning_rate": 2.4649495221369218e-06, + "loss": 0.3117, + "step": 1740 + }, + { + "epoch": 0.5197014925373135, + "grad_norm": 0.7373330115096584, + "learning_rate": 2.462532422960776e-06, + "loss": 0.3273, + "step": 1741 + }, + { + "epoch": 0.52, + "grad_norm": 0.7741460492182761, + "learning_rate": 2.460115358815901e-06, + "loss": 0.3185, + "step": 1742 + }, + { + "epoch": 0.5202985074626866, + "grad_norm": 0.9184552422451486, + "learning_rate": 2.457698331962193e-06, + "loss": 0.3499, + "step": 1743 + }, + { + "epoch": 0.5205970149253731, + "grad_norm": 0.7416538846201887, + "learning_rate": 2.4552813446595148e-06, + "loss": 0.3278, + "step": 1744 + }, + { + "epoch": 0.5208955223880597, + "grad_norm": 0.8354557623430109, + "learning_rate": 2.4528643991676875e-06, + "loss": 0.3341, + "step": 1745 + }, + { + "epoch": 0.5211940298507463, + "grad_norm": 0.8538898399358935, + "learning_rate": 2.450447497746499e-06, + "loss": 0.3757, + "step": 1746 + }, + { + "epoch": 0.5214925373134328, + "grad_norm": 0.7395052286353708, + "learning_rate": 2.4480306426556925e-06, + "loss": 0.2997, + "step": 1747 + }, + { + "epoch": 0.5217910447761194, + "grad_norm": 0.7583313377731395, + "learning_rate": 2.4456138361549678e-06, + "loss": 0.3343, + "step": 1748 + }, + { + "epoch": 0.5220895522388059, + "grad_norm": 0.8107731144138431, + "learning_rate": 2.4431970805039814e-06, + "loss": 0.2842, + "step": 1749 + }, + { + "epoch": 0.5223880597014925, + "grad_norm": 0.7658894882988232, + "learning_rate": 2.4407803779623394e-06, + "loss": 0.3124, + "step": 1750 + }, + { + "epoch": 0.522686567164179, + "grad_norm": 0.8719707167891656, + "learning_rate": 2.4383637307896017e-06, + "loss": 0.2977, + "step": 1751 + }, + { + "epoch": 0.5229850746268657, + "grad_norm": 0.8510394601889362, + "learning_rate": 2.4359471412452724e-06, + "loss": 0.3698, + "step": 1752 + }, + { + "epoch": 0.5232835820895523, + "grad_norm": 0.9805015870419773, + "learning_rate": 2.4335306115888046e-06, + "loss": 0.3421, + "step": 1753 + }, + { + "epoch": 0.5235820895522388, + "grad_norm": 0.884081775452208, + "learning_rate": 2.4311141440795956e-06, + "loss": 0.3362, + "step": 1754 + }, + { + "epoch": 0.5238805970149254, + "grad_norm": 0.8500753318797264, + "learning_rate": 2.4286977409769818e-06, + "loss": 0.3223, + "step": 1755 + }, + { + "epoch": 0.524179104477612, + "grad_norm": 0.7775675727762501, + "learning_rate": 2.4262814045402425e-06, + "loss": 0.3587, + "step": 1756 + }, + { + "epoch": 0.5244776119402985, + "grad_norm": 0.8771202940506111, + "learning_rate": 2.4238651370285936e-06, + "loss": 0.3609, + "step": 1757 + }, + { + "epoch": 0.5247761194029851, + "grad_norm": 0.9343708174797646, + "learning_rate": 2.421448940701185e-06, + "loss": 0.3788, + "step": 1758 + }, + { + "epoch": 0.5250746268656716, + "grad_norm": 0.7830270720430653, + "learning_rate": 2.4190328178171026e-06, + "loss": 0.3327, + "step": 1759 + }, + { + "epoch": 0.5253731343283582, + "grad_norm": 0.7893636098117658, + "learning_rate": 2.4166167706353623e-06, + "loss": 0.3195, + "step": 1760 + }, + { + "epoch": 0.5256716417910448, + "grad_norm": 0.8111132911057031, + "learning_rate": 2.414200801414908e-06, + "loss": 0.3735, + "step": 1761 + }, + { + "epoch": 0.5259701492537313, + "grad_norm": 0.9047377513665926, + "learning_rate": 2.4117849124146136e-06, + "loss": 0.3782, + "step": 1762 + }, + { + "epoch": 0.5262686567164179, + "grad_norm": 0.8119207582887555, + "learning_rate": 2.4093691058932765e-06, + "loss": 0.3387, + "step": 1763 + }, + { + "epoch": 0.5265671641791044, + "grad_norm": 0.7829803719766413, + "learning_rate": 2.4069533841096153e-06, + "loss": 0.3503, + "step": 1764 + }, + { + "epoch": 0.5268656716417911, + "grad_norm": 0.8065607770759078, + "learning_rate": 2.404537749322273e-06, + "loss": 0.3407, + "step": 1765 + }, + { + "epoch": 0.5271641791044777, + "grad_norm": 0.8371021912735477, + "learning_rate": 2.4021222037898066e-06, + "loss": 0.3698, + "step": 1766 + }, + { + "epoch": 0.5274626865671642, + "grad_norm": 0.8296224322060857, + "learning_rate": 2.399706749770695e-06, + "loss": 0.2996, + "step": 1767 + }, + { + "epoch": 0.5277611940298508, + "grad_norm": 0.7980329495819588, + "learning_rate": 2.3972913895233278e-06, + "loss": 0.3164, + "step": 1768 + }, + { + "epoch": 0.5280597014925373, + "grad_norm": 0.84030721922861, + "learning_rate": 2.394876125306007e-06, + "loss": 0.3384, + "step": 1769 + }, + { + "epoch": 0.5283582089552239, + "grad_norm": 0.8654667250374329, + "learning_rate": 2.392460959376947e-06, + "loss": 0.3168, + "step": 1770 + }, + { + "epoch": 0.5286567164179105, + "grad_norm": 0.8688067439209197, + "learning_rate": 2.3900458939942696e-06, + "loss": 0.3503, + "step": 1771 + }, + { + "epoch": 0.528955223880597, + "grad_norm": 0.7877176392427366, + "learning_rate": 2.387630931416e-06, + "loss": 0.3048, + "step": 1772 + }, + { + "epoch": 0.5292537313432836, + "grad_norm": 0.8446237767654405, + "learning_rate": 2.3852160739000706e-06, + "loss": 0.3626, + "step": 1773 + }, + { + "epoch": 0.5295522388059701, + "grad_norm": 0.8025405273097861, + "learning_rate": 2.382801323704315e-06, + "loss": 0.3284, + "step": 1774 + }, + { + "epoch": 0.5298507462686567, + "grad_norm": 0.9052209154464622, + "learning_rate": 2.380386683086465e-06, + "loss": 0.3916, + "step": 1775 + }, + { + "epoch": 0.5301492537313433, + "grad_norm": 0.83787822521044, + "learning_rate": 2.3779721543041504e-06, + "loss": 0.3503, + "step": 1776 + }, + { + "epoch": 0.5304477611940298, + "grad_norm": 0.8029033496086618, + "learning_rate": 2.3755577396148967e-06, + "loss": 0.3198, + "step": 1777 + }, + { + "epoch": 0.5307462686567164, + "grad_norm": 0.9561232832711954, + "learning_rate": 2.3731434412761236e-06, + "loss": 0.3653, + "step": 1778 + }, + { + "epoch": 0.531044776119403, + "grad_norm": 0.8072801892942726, + "learning_rate": 2.370729261545141e-06, + "loss": 0.34, + "step": 1779 + }, + { + "epoch": 0.5313432835820896, + "grad_norm": 0.7980807938842572, + "learning_rate": 2.3683152026791473e-06, + "loss": 0.2973, + "step": 1780 + }, + { + "epoch": 0.5316417910447762, + "grad_norm": 0.8316738799919338, + "learning_rate": 2.365901266935229e-06, + "loss": 0.3422, + "step": 1781 + }, + { + "epoch": 0.5319402985074627, + "grad_norm": 0.7980662515304342, + "learning_rate": 2.363487456570359e-06, + "loss": 0.3575, + "step": 1782 + }, + { + "epoch": 0.5322388059701493, + "grad_norm": 0.833051789155456, + "learning_rate": 2.3610737738413872e-06, + "loss": 0.3391, + "step": 1783 + }, + { + "epoch": 0.5325373134328358, + "grad_norm": 0.8007945992425828, + "learning_rate": 2.3586602210050525e-06, + "loss": 0.2989, + "step": 1784 + }, + { + "epoch": 0.5328358208955224, + "grad_norm": 0.9029970072294182, + "learning_rate": 2.356246800317965e-06, + "loss": 0.3445, + "step": 1785 + }, + { + "epoch": 0.533134328358209, + "grad_norm": 0.990141115180327, + "learning_rate": 2.353833514036616e-06, + "loss": 0.3393, + "step": 1786 + }, + { + "epoch": 0.5334328358208955, + "grad_norm": 0.8954234000787314, + "learning_rate": 2.351420364417368e-06, + "loss": 0.3891, + "step": 1787 + }, + { + "epoch": 0.5337313432835821, + "grad_norm": 0.8292622437441727, + "learning_rate": 2.349007353716458e-06, + "loss": 0.344, + "step": 1788 + }, + { + "epoch": 0.5340298507462686, + "grad_norm": 0.8448034395608284, + "learning_rate": 2.3465944841899916e-06, + "loss": 0.3248, + "step": 1789 + }, + { + "epoch": 0.5343283582089552, + "grad_norm": 0.9091150167264239, + "learning_rate": 2.344181758093943e-06, + "loss": 0.3164, + "step": 1790 + }, + { + "epoch": 0.5346268656716417, + "grad_norm": 0.77933196910969, + "learning_rate": 2.3417691776841532e-06, + "loss": 0.3361, + "step": 1791 + }, + { + "epoch": 0.5349253731343283, + "grad_norm": 0.8157989161197443, + "learning_rate": 2.339356745216325e-06, + "loss": 0.2778, + "step": 1792 + }, + { + "epoch": 0.535223880597015, + "grad_norm": 0.7979239591724641, + "learning_rate": 2.336944462946024e-06, + "loss": 0.3471, + "step": 1793 + }, + { + "epoch": 0.5355223880597015, + "grad_norm": 0.8373875356209881, + "learning_rate": 2.3345323331286763e-06, + "loss": 0.3221, + "step": 1794 + }, + { + "epoch": 0.5358208955223881, + "grad_norm": 0.8199518664561318, + "learning_rate": 2.3321203580195635e-06, + "loss": 0.332, + "step": 1795 + }, + { + "epoch": 0.5361194029850747, + "grad_norm": 0.8762948103305159, + "learning_rate": 2.329708539873824e-06, + "loss": 0.3909, + "step": 1796 + }, + { + "epoch": 0.5364179104477612, + "grad_norm": 0.8353475092926272, + "learning_rate": 2.3272968809464486e-06, + "loss": 0.3553, + "step": 1797 + }, + { + "epoch": 0.5367164179104478, + "grad_norm": 0.8759847723242117, + "learning_rate": 2.3248853834922803e-06, + "loss": 0.3686, + "step": 1798 + }, + { + "epoch": 0.5370149253731343, + "grad_norm": 0.8615201742620705, + "learning_rate": 2.3224740497660096e-06, + "loss": 0.3754, + "step": 1799 + }, + { + "epoch": 0.5373134328358209, + "grad_norm": 0.9099685739043476, + "learning_rate": 2.3200628820221756e-06, + "loss": 0.3581, + "step": 1800 + }, + { + "epoch": 0.5376119402985075, + "grad_norm": 0.777596772205429, + "learning_rate": 2.3176518825151608e-06, + "loss": 0.3378, + "step": 1801 + }, + { + "epoch": 0.537910447761194, + "grad_norm": 0.8450144255131784, + "learning_rate": 2.315241053499191e-06, + "loss": 0.3387, + "step": 1802 + }, + { + "epoch": 0.5382089552238806, + "grad_norm": 0.9315730643786867, + "learning_rate": 2.3128303972283327e-06, + "loss": 0.3867, + "step": 1803 + }, + { + "epoch": 0.5385074626865671, + "grad_norm": 0.8448720469427933, + "learning_rate": 2.3104199159564902e-06, + "loss": 0.3539, + "step": 1804 + }, + { + "epoch": 0.5388059701492537, + "grad_norm": 0.8164611944170581, + "learning_rate": 2.308009611937406e-06, + "loss": 0.3366, + "step": 1805 + }, + { + "epoch": 0.5391044776119402, + "grad_norm": 0.8705803506185734, + "learning_rate": 2.3055994874246544e-06, + "loss": 0.3427, + "step": 1806 + }, + { + "epoch": 0.5394029850746269, + "grad_norm": 0.8111592075727269, + "learning_rate": 2.3031895446716438e-06, + "loss": 0.3498, + "step": 1807 + }, + { + "epoch": 0.5397014925373135, + "grad_norm": 0.82443436935079, + "learning_rate": 2.300779785931611e-06, + "loss": 0.3402, + "step": 1808 + }, + { + "epoch": 0.54, + "grad_norm": 0.7893728042942114, + "learning_rate": 2.2983702134576237e-06, + "loss": 0.3534, + "step": 1809 + }, + { + "epoch": 0.5402985074626866, + "grad_norm": 0.7788581120840403, + "learning_rate": 2.295960829502571e-06, + "loss": 0.3632, + "step": 1810 + }, + { + "epoch": 0.5405970149253732, + "grad_norm": 0.7144098288588688, + "learning_rate": 2.2935516363191695e-06, + "loss": 0.3292, + "step": 1811 + }, + { + "epoch": 0.5408955223880597, + "grad_norm": 0.8718613502814399, + "learning_rate": 2.2911426361599563e-06, + "loss": 0.3914, + "step": 1812 + }, + { + "epoch": 0.5411940298507463, + "grad_norm": 0.8140978897388683, + "learning_rate": 2.288733831277287e-06, + "loss": 0.3285, + "step": 1813 + }, + { + "epoch": 0.5414925373134328, + "grad_norm": 0.9065681350482651, + "learning_rate": 2.286325223923336e-06, + "loss": 0.3734, + "step": 1814 + }, + { + "epoch": 0.5417910447761194, + "grad_norm": 0.8578142995410984, + "learning_rate": 2.283916816350092e-06, + "loss": 0.3417, + "step": 1815 + }, + { + "epoch": 0.542089552238806, + "grad_norm": 0.8951438939421517, + "learning_rate": 2.2815086108093575e-06, + "loss": 0.3678, + "step": 1816 + }, + { + "epoch": 0.5423880597014925, + "grad_norm": 0.7710865449797061, + "learning_rate": 2.279100609552747e-06, + "loss": 0.3212, + "step": 1817 + }, + { + "epoch": 0.5426865671641791, + "grad_norm": 0.8413189744305332, + "learning_rate": 2.2766928148316815e-06, + "loss": 0.3546, + "step": 1818 + }, + { + "epoch": 0.5429850746268656, + "grad_norm": 0.7918974236582986, + "learning_rate": 2.274285228897391e-06, + "loss": 0.3157, + "step": 1819 + }, + { + "epoch": 0.5432835820895522, + "grad_norm": 0.8039993848081551, + "learning_rate": 2.271877854000909e-06, + "loss": 0.3115, + "step": 1820 + }, + { + "epoch": 0.5435820895522389, + "grad_norm": 0.8401858367747947, + "learning_rate": 2.2694706923930737e-06, + "loss": 0.3542, + "step": 1821 + }, + { + "epoch": 0.5438805970149254, + "grad_norm": 0.7815175224613609, + "learning_rate": 2.2670637463245223e-06, + "loss": 0.3417, + "step": 1822 + }, + { + "epoch": 0.544179104477612, + "grad_norm": 0.8583325595180437, + "learning_rate": 2.2646570180456896e-06, + "loss": 0.3806, + "step": 1823 + }, + { + "epoch": 0.5444776119402985, + "grad_norm": 0.8765871329748808, + "learning_rate": 2.2622505098068098e-06, + "loss": 0.3702, + "step": 1824 + }, + { + "epoch": 0.5447761194029851, + "grad_norm": 0.7542775615348616, + "learning_rate": 2.2598442238579067e-06, + "loss": 0.3352, + "step": 1825 + }, + { + "epoch": 0.5450746268656717, + "grad_norm": 0.8460036442491983, + "learning_rate": 2.2574381624488008e-06, + "loss": 0.3481, + "step": 1826 + }, + { + "epoch": 0.5453731343283582, + "grad_norm": 0.8043856469909816, + "learning_rate": 2.2550323278291013e-06, + "loss": 0.3685, + "step": 1827 + }, + { + "epoch": 0.5456716417910448, + "grad_norm": 0.8739643120061339, + "learning_rate": 2.2526267222482035e-06, + "loss": 0.346, + "step": 1828 + }, + { + "epoch": 0.5459701492537313, + "grad_norm": 0.9831654255410001, + "learning_rate": 2.250221347955291e-06, + "loss": 0.3252, + "step": 1829 + }, + { + "epoch": 0.5462686567164179, + "grad_norm": 0.8015068502149022, + "learning_rate": 2.2478162071993296e-06, + "loss": 0.357, + "step": 1830 + }, + { + "epoch": 0.5465671641791044, + "grad_norm": 0.7756446807152939, + "learning_rate": 2.2454113022290676e-06, + "loss": 0.3478, + "step": 1831 + }, + { + "epoch": 0.546865671641791, + "grad_norm": 0.9555139693052305, + "learning_rate": 2.2430066352930323e-06, + "loss": 0.3294, + "step": 1832 + }, + { + "epoch": 0.5471641791044776, + "grad_norm": 0.8021249504547362, + "learning_rate": 2.240602208639529e-06, + "loss": 0.3343, + "step": 1833 + }, + { + "epoch": 0.5474626865671641, + "grad_norm": 0.7978682944455774, + "learning_rate": 2.238198024516637e-06, + "loss": 0.3285, + "step": 1834 + }, + { + "epoch": 0.5477611940298508, + "grad_norm": 0.8124634613968112, + "learning_rate": 2.2357940851722114e-06, + "loss": 0.336, + "step": 1835 + }, + { + "epoch": 0.5480597014925374, + "grad_norm": 0.8713005908230844, + "learning_rate": 2.2333903928538765e-06, + "loss": 0.3149, + "step": 1836 + }, + { + "epoch": 0.5483582089552239, + "grad_norm": 0.9595459470624672, + "learning_rate": 2.230986949809025e-06, + "loss": 0.3468, + "step": 1837 + }, + { + "epoch": 0.5486567164179105, + "grad_norm": 0.8161907559262533, + "learning_rate": 2.2285837582848185e-06, + "loss": 0.3801, + "step": 1838 + }, + { + "epoch": 0.548955223880597, + "grad_norm": 0.8893397151532759, + "learning_rate": 2.226180820528182e-06, + "loss": 0.3721, + "step": 1839 + }, + { + "epoch": 0.5492537313432836, + "grad_norm": 0.7700788867504089, + "learning_rate": 2.223778138785804e-06, + "loss": 0.3014, + "step": 1840 + }, + { + "epoch": 0.5495522388059702, + "grad_norm": 0.871007482090454, + "learning_rate": 2.2213757153041337e-06, + "loss": 0.3615, + "step": 1841 + }, + { + "epoch": 0.5498507462686567, + "grad_norm": 0.8633799398623063, + "learning_rate": 2.2189735523293777e-06, + "loss": 0.361, + "step": 1842 + }, + { + "epoch": 0.5501492537313433, + "grad_norm": 0.8645468422430543, + "learning_rate": 2.2165716521075e-06, + "loss": 0.3533, + "step": 1843 + }, + { + "epoch": 0.5504477611940298, + "grad_norm": 0.8540892598497185, + "learning_rate": 2.21417001688422e-06, + "loss": 0.3592, + "step": 1844 + }, + { + "epoch": 0.5507462686567164, + "grad_norm": 0.8694634042599341, + "learning_rate": 2.211768648905006e-06, + "loss": 0.352, + "step": 1845 + }, + { + "epoch": 0.551044776119403, + "grad_norm": 0.7911027777874611, + "learning_rate": 2.20936755041508e-06, + "loss": 0.3108, + "step": 1846 + }, + { + "epoch": 0.5513432835820895, + "grad_norm": 0.8066430351634646, + "learning_rate": 2.2069667236594104e-06, + "loss": 0.2852, + "step": 1847 + }, + { + "epoch": 0.5516417910447761, + "grad_norm": 0.8185535177696429, + "learning_rate": 2.204566170882711e-06, + "loss": 0.34, + "step": 1848 + }, + { + "epoch": 0.5519402985074627, + "grad_norm": 0.7993975027083716, + "learning_rate": 2.202165894329441e-06, + "loss": 0.3781, + "step": 1849 + }, + { + "epoch": 0.5522388059701493, + "grad_norm": 0.849262989720656, + "learning_rate": 2.1997658962438003e-06, + "loss": 0.3165, + "step": 1850 + }, + { + "epoch": 0.5525373134328359, + "grad_norm": 0.7278759041881264, + "learning_rate": 2.197366178869728e-06, + "loss": 0.2826, + "step": 1851 + }, + { + "epoch": 0.5528358208955224, + "grad_norm": 0.828521762307412, + "learning_rate": 2.1949667444509025e-06, + "loss": 0.3265, + "step": 1852 + }, + { + "epoch": 0.553134328358209, + "grad_norm": 0.7855801046085621, + "learning_rate": 2.192567595230735e-06, + "loss": 0.319, + "step": 1853 + }, + { + "epoch": 0.5534328358208955, + "grad_norm": 0.7381825111391765, + "learning_rate": 2.190168733452372e-06, + "loss": 0.3203, + "step": 1854 + }, + { + "epoch": 0.5537313432835821, + "grad_norm": 0.8446345881014914, + "learning_rate": 2.187770161358692e-06, + "loss": 0.3271, + "step": 1855 + }, + { + "epoch": 0.5540298507462686, + "grad_norm": 0.725953827886251, + "learning_rate": 2.1853718811922996e-06, + "loss": 0.3016, + "step": 1856 + }, + { + "epoch": 0.5543283582089552, + "grad_norm": 0.8971268582384204, + "learning_rate": 2.1829738951955295e-06, + "loss": 0.3595, + "step": 1857 + }, + { + "epoch": 0.5546268656716418, + "grad_norm": 0.8332215625700863, + "learning_rate": 2.180576205610439e-06, + "loss": 0.2938, + "step": 1858 + }, + { + "epoch": 0.5549253731343283, + "grad_norm": 0.8275970156258079, + "learning_rate": 2.178178814678812e-06, + "loss": 0.3862, + "step": 1859 + }, + { + "epoch": 0.5552238805970149, + "grad_norm": 0.86905690858234, + "learning_rate": 2.1757817246421477e-06, + "loss": 0.3728, + "step": 1860 + }, + { + "epoch": 0.5555223880597014, + "grad_norm": 0.7889094120217989, + "learning_rate": 2.173384937741668e-06, + "loss": 0.307, + "step": 1861 + }, + { + "epoch": 0.5558208955223881, + "grad_norm": 0.9468308573254213, + "learning_rate": 2.1709884562183114e-06, + "loss": 0.3373, + "step": 1862 + }, + { + "epoch": 0.5561194029850747, + "grad_norm": 0.8358643562239164, + "learning_rate": 2.1685922823127276e-06, + "loss": 0.3595, + "step": 1863 + }, + { + "epoch": 0.5564179104477612, + "grad_norm": 0.8195541395992418, + "learning_rate": 2.166196418265282e-06, + "loss": 0.3175, + "step": 1864 + }, + { + "epoch": 0.5567164179104478, + "grad_norm": 0.8872518656245127, + "learning_rate": 2.163800866316049e-06, + "loss": 0.3329, + "step": 1865 + }, + { + "epoch": 0.5570149253731344, + "grad_norm": 0.8974786463656703, + "learning_rate": 2.161405628704811e-06, + "loss": 0.3594, + "step": 1866 + }, + { + "epoch": 0.5573134328358209, + "grad_norm": 0.9090184559871817, + "learning_rate": 2.1590107076710567e-06, + "loss": 0.341, + "step": 1867 + }, + { + "epoch": 0.5576119402985075, + "grad_norm": 0.795923872852375, + "learning_rate": 2.1566161054539797e-06, + "loss": 0.3174, + "step": 1868 + }, + { + "epoch": 0.557910447761194, + "grad_norm": 0.836212534397218, + "learning_rate": 2.1542218242924737e-06, + "loss": 0.3334, + "step": 1869 + }, + { + "epoch": 0.5582089552238806, + "grad_norm": 0.9386342551389201, + "learning_rate": 2.1518278664251334e-06, + "loss": 0.3427, + "step": 1870 + }, + { + "epoch": 0.5585074626865671, + "grad_norm": 0.8270322091053605, + "learning_rate": 2.1494342340902518e-06, + "loss": 0.3581, + "step": 1871 + }, + { + "epoch": 0.5588059701492537, + "grad_norm": 0.8355544694865434, + "learning_rate": 2.147040929525816e-06, + "loss": 0.3277, + "step": 1872 + }, + { + "epoch": 0.5591044776119403, + "grad_norm": 0.7760309647668585, + "learning_rate": 2.1446479549695083e-06, + "loss": 0.3003, + "step": 1873 + }, + { + "epoch": 0.5594029850746268, + "grad_norm": 0.8285302940910255, + "learning_rate": 2.1422553126587e-06, + "loss": 0.3398, + "step": 1874 + }, + { + "epoch": 0.5597014925373134, + "grad_norm": 0.8028111781327222, + "learning_rate": 2.139863004830455e-06, + "loss": 0.3068, + "step": 1875 + }, + { + "epoch": 0.56, + "grad_norm": 0.9241018551228057, + "learning_rate": 2.1374710337215223e-06, + "loss": 0.3117, + "step": 1876 + }, + { + "epoch": 0.5602985074626866, + "grad_norm": 0.8859437856608664, + "learning_rate": 2.1350794015683356e-06, + "loss": 0.3366, + "step": 1877 + }, + { + "epoch": 0.5605970149253732, + "grad_norm": 0.8066823845490054, + "learning_rate": 2.1326881106070137e-06, + "loss": 0.3239, + "step": 1878 + }, + { + "epoch": 0.5608955223880597, + "grad_norm": 0.8280625069457178, + "learning_rate": 2.1302971630733553e-06, + "loss": 0.3256, + "step": 1879 + }, + { + "epoch": 0.5611940298507463, + "grad_norm": 0.7654383674574374, + "learning_rate": 2.127906561202838e-06, + "loss": 0.3152, + "step": 1880 + }, + { + "epoch": 0.5614925373134328, + "grad_norm": 0.8019819178042276, + "learning_rate": 2.125516307230616e-06, + "loss": 0.3496, + "step": 1881 + }, + { + "epoch": 0.5617910447761194, + "grad_norm": 0.8424062174944292, + "learning_rate": 2.1231264033915188e-06, + "loss": 0.3457, + "step": 1882 + }, + { + "epoch": 0.562089552238806, + "grad_norm": 1.0405265753175954, + "learning_rate": 2.1207368519200483e-06, + "loss": 0.3606, + "step": 1883 + }, + { + "epoch": 0.5623880597014925, + "grad_norm": 0.8012391807155793, + "learning_rate": 2.1183476550503763e-06, + "loss": 0.3336, + "step": 1884 + }, + { + "epoch": 0.5626865671641791, + "grad_norm": 0.8303562700296768, + "learning_rate": 2.1159588150163447e-06, + "loss": 0.3525, + "step": 1885 + }, + { + "epoch": 0.5629850746268656, + "grad_norm": 0.8141402829710194, + "learning_rate": 2.113570334051459e-06, + "loss": 0.3743, + "step": 1886 + }, + { + "epoch": 0.5632835820895522, + "grad_norm": 0.7825708947108343, + "learning_rate": 2.111182214388893e-06, + "loss": 0.2933, + "step": 1887 + }, + { + "epoch": 0.5635820895522388, + "grad_norm": 0.8309709447567482, + "learning_rate": 2.108794458261478e-06, + "loss": 0.3238, + "step": 1888 + }, + { + "epoch": 0.5638805970149253, + "grad_norm": 0.8224296447050079, + "learning_rate": 2.106407067901709e-06, + "loss": 0.3258, + "step": 1889 + }, + { + "epoch": 0.564179104477612, + "grad_norm": 0.9018945970876204, + "learning_rate": 2.104020045541739e-06, + "loss": 0.3827, + "step": 1890 + }, + { + "epoch": 0.5644776119402986, + "grad_norm": 0.8513253106321182, + "learning_rate": 2.1016333934133727e-06, + "loss": 0.3399, + "step": 1891 + }, + { + "epoch": 0.5647761194029851, + "grad_norm": 0.9164770773291085, + "learning_rate": 2.099247113748074e-06, + "loss": 0.3529, + "step": 1892 + }, + { + "epoch": 0.5650746268656717, + "grad_norm": 0.8334813106827352, + "learning_rate": 2.096861208776956e-06, + "loss": 0.342, + "step": 1893 + }, + { + "epoch": 0.5653731343283582, + "grad_norm": 0.79013138961232, + "learning_rate": 2.0944756807307805e-06, + "loss": 0.3404, + "step": 1894 + }, + { + "epoch": 0.5656716417910448, + "grad_norm": 0.8495550812479902, + "learning_rate": 2.092090531839959e-06, + "loss": 0.3307, + "step": 1895 + }, + { + "epoch": 0.5659701492537313, + "grad_norm": 0.8325862898480291, + "learning_rate": 2.089705764334547e-06, + "loss": 0.3557, + "step": 1896 + }, + { + "epoch": 0.5662686567164179, + "grad_norm": 0.8222800051067589, + "learning_rate": 2.0873213804442454e-06, + "loss": 0.3185, + "step": 1897 + }, + { + "epoch": 0.5665671641791045, + "grad_norm": 0.7723465046550894, + "learning_rate": 2.0849373823983935e-06, + "loss": 0.3464, + "step": 1898 + }, + { + "epoch": 0.566865671641791, + "grad_norm": 0.8615414329369864, + "learning_rate": 2.082553772425972e-06, + "loss": 0.3505, + "step": 1899 + }, + { + "epoch": 0.5671641791044776, + "grad_norm": 0.8521312443511204, + "learning_rate": 2.080170552755598e-06, + "loss": 0.338, + "step": 1900 + }, + { + "epoch": 0.5674626865671641, + "grad_norm": 0.8669205691719605, + "learning_rate": 2.077787725615524e-06, + "loss": 0.3645, + "step": 1901 + }, + { + "epoch": 0.5677611940298507, + "grad_norm": 0.750771605956206, + "learning_rate": 2.075405293233635e-06, + "loss": 0.2969, + "step": 1902 + }, + { + "epoch": 0.5680597014925373, + "grad_norm": 0.770946338972137, + "learning_rate": 2.073023257837448e-06, + "loss": 0.3189, + "step": 1903 + }, + { + "epoch": 0.5683582089552239, + "grad_norm": 0.8472428925223515, + "learning_rate": 2.070641621654106e-06, + "loss": 0.3146, + "step": 1904 + }, + { + "epoch": 0.5686567164179105, + "grad_norm": 0.8076339095091497, + "learning_rate": 2.068260386910383e-06, + "loss": 0.3259, + "step": 1905 + }, + { + "epoch": 0.568955223880597, + "grad_norm": 0.7546439694354958, + "learning_rate": 2.0658795558326745e-06, + "loss": 0.3046, + "step": 1906 + }, + { + "epoch": 0.5692537313432836, + "grad_norm": 0.7863545601633702, + "learning_rate": 2.063499130646999e-06, + "loss": 0.3119, + "step": 1907 + }, + { + "epoch": 0.5695522388059702, + "grad_norm": 0.9196917659773726, + "learning_rate": 2.0611191135789972e-06, + "loss": 0.3762, + "step": 1908 + }, + { + "epoch": 0.5698507462686567, + "grad_norm": 0.8725237416065241, + "learning_rate": 2.0587395068539268e-06, + "loss": 0.3656, + "step": 1909 + }, + { + "epoch": 0.5701492537313433, + "grad_norm": 0.8403813292122189, + "learning_rate": 2.056360312696661e-06, + "loss": 0.3555, + "step": 1910 + }, + { + "epoch": 0.5704477611940298, + "grad_norm": 0.739162644070513, + "learning_rate": 2.0539815333316896e-06, + "loss": 0.3363, + "step": 1911 + }, + { + "epoch": 0.5707462686567164, + "grad_norm": 0.8072527202747288, + "learning_rate": 2.0516031709831124e-06, + "loss": 0.3338, + "step": 1912 + }, + { + "epoch": 0.571044776119403, + "grad_norm": 0.7551658882134142, + "learning_rate": 2.0492252278746406e-06, + "loss": 0.3133, + "step": 1913 + }, + { + "epoch": 0.5713432835820895, + "grad_norm": 0.8610162991085163, + "learning_rate": 2.046847706229594e-06, + "loss": 0.3595, + "step": 1914 + }, + { + "epoch": 0.5716417910447761, + "grad_norm": 0.8314973416295284, + "learning_rate": 2.0444706082708957e-06, + "loss": 0.3151, + "step": 1915 + }, + { + "epoch": 0.5719402985074626, + "grad_norm": 0.7775346420782155, + "learning_rate": 2.042093936221075e-06, + "loss": 0.328, + "step": 1916 + }, + { + "epoch": 0.5722388059701492, + "grad_norm": 1.4529917092934526, + "learning_rate": 2.039717692302263e-06, + "loss": 0.3158, + "step": 1917 + }, + { + "epoch": 0.5725373134328359, + "grad_norm": 0.8832461020890808, + "learning_rate": 2.0373418787361886e-06, + "loss": 0.3249, + "step": 1918 + }, + { + "epoch": 0.5728358208955224, + "grad_norm": 0.8529873184087406, + "learning_rate": 2.0349664977441806e-06, + "loss": 0.3627, + "step": 1919 + }, + { + "epoch": 0.573134328358209, + "grad_norm": 0.8801116310966653, + "learning_rate": 2.0325915515471627e-06, + "loss": 0.3887, + "step": 1920 + }, + { + "epoch": 0.5734328358208955, + "grad_norm": 0.8146237688887484, + "learning_rate": 2.03021704236565e-06, + "loss": 0.3567, + "step": 1921 + }, + { + "epoch": 0.5737313432835821, + "grad_norm": 0.8299437970321636, + "learning_rate": 2.0278429724197517e-06, + "loss": 0.3358, + "step": 1922 + }, + { + "epoch": 0.5740298507462687, + "grad_norm": 0.8343117163911652, + "learning_rate": 2.0254693439291665e-06, + "loss": 0.3466, + "step": 1923 + }, + { + "epoch": 0.5743283582089552, + "grad_norm": 0.7664529851147533, + "learning_rate": 2.023096159113177e-06, + "loss": 0.3201, + "step": 1924 + }, + { + "epoch": 0.5746268656716418, + "grad_norm": 0.8295280556984195, + "learning_rate": 2.0207234201906546e-06, + "loss": 0.3285, + "step": 1925 + }, + { + "epoch": 0.5749253731343283, + "grad_norm": 0.9191354365414987, + "learning_rate": 2.018351129380052e-06, + "loss": 0.3826, + "step": 1926 + }, + { + "epoch": 0.5752238805970149, + "grad_norm": 0.7452870603167776, + "learning_rate": 2.0159792888994025e-06, + "loss": 0.3166, + "step": 1927 + }, + { + "epoch": 0.5755223880597015, + "grad_norm": 1.0403191478557996, + "learning_rate": 2.0136079009663205e-06, + "loss": 0.3165, + "step": 1928 + }, + { + "epoch": 0.575820895522388, + "grad_norm": 0.8016626447497367, + "learning_rate": 2.0112369677979955e-06, + "loss": 0.3246, + "step": 1929 + }, + { + "epoch": 0.5761194029850746, + "grad_norm": 0.8057426738001215, + "learning_rate": 2.008866491611191e-06, + "loss": 0.2706, + "step": 1930 + }, + { + "epoch": 0.5764179104477611, + "grad_norm": 0.8464060075703335, + "learning_rate": 2.0064964746222464e-06, + "loss": 0.3619, + "step": 1931 + }, + { + "epoch": 0.5767164179104478, + "grad_norm": 0.7572279430026017, + "learning_rate": 2.0041269190470687e-06, + "loss": 0.3071, + "step": 1932 + }, + { + "epoch": 0.5770149253731344, + "grad_norm": 0.8070777794735167, + "learning_rate": 2.001757827101134e-06, + "loss": 0.3298, + "step": 1933 + }, + { + "epoch": 0.5773134328358209, + "grad_norm": 0.7406332845203685, + "learning_rate": 1.9993892009994862e-06, + "loss": 0.31, + "step": 1934 + }, + { + "epoch": 0.5776119402985075, + "grad_norm": 0.8057738674065215, + "learning_rate": 1.9970210429567343e-06, + "loss": 0.3689, + "step": 1935 + }, + { + "epoch": 0.577910447761194, + "grad_norm": 0.8334636514302126, + "learning_rate": 1.9946533551870465e-06, + "loss": 0.3531, + "step": 1936 + }, + { + "epoch": 0.5782089552238806, + "grad_norm": 0.7674289139447094, + "learning_rate": 1.9922861399041537e-06, + "loss": 0.357, + "step": 1937 + }, + { + "epoch": 0.5785074626865672, + "grad_norm": 0.8048568434131651, + "learning_rate": 1.989919399321345e-06, + "loss": 0.346, + "step": 1938 + }, + { + "epoch": 0.5788059701492537, + "grad_norm": 0.7475874633539165, + "learning_rate": 1.9875531356514642e-06, + "loss": 0.3308, + "step": 1939 + }, + { + "epoch": 0.5791044776119403, + "grad_norm": 1.1995168376736667, + "learning_rate": 1.9851873511069104e-06, + "loss": 0.3112, + "step": 1940 + }, + { + "epoch": 0.5794029850746268, + "grad_norm": 0.8706989388369915, + "learning_rate": 1.9828220478996357e-06, + "loss": 0.3703, + "step": 1941 + }, + { + "epoch": 0.5797014925373134, + "grad_norm": 0.8260670136022402, + "learning_rate": 1.980457228241139e-06, + "loss": 0.317, + "step": 1942 + }, + { + "epoch": 0.58, + "grad_norm": 0.7553465208304225, + "learning_rate": 1.9780928943424703e-06, + "loss": 0.3407, + "step": 1943 + }, + { + "epoch": 0.5802985074626865, + "grad_norm": 0.9544720767792053, + "learning_rate": 1.9757290484142244e-06, + "loss": 0.3739, + "step": 1944 + }, + { + "epoch": 0.5805970149253732, + "grad_norm": 0.8720188388239823, + "learning_rate": 1.9733656926665388e-06, + "loss": 0.3476, + "step": 1945 + }, + { + "epoch": 0.5808955223880597, + "grad_norm": 0.8709469192102466, + "learning_rate": 1.971002829309094e-06, + "loss": 0.3701, + "step": 1946 + }, + { + "epoch": 0.5811940298507463, + "grad_norm": 0.8604365263309919, + "learning_rate": 1.968640460551109e-06, + "loss": 0.3698, + "step": 1947 + }, + { + "epoch": 0.5814925373134329, + "grad_norm": 0.7533527794831715, + "learning_rate": 1.9662785886013417e-06, + "loss": 0.3349, + "step": 1948 + }, + { + "epoch": 0.5817910447761194, + "grad_norm": 0.8247273401204854, + "learning_rate": 1.9639172156680848e-06, + "loss": 0.3361, + "step": 1949 + }, + { + "epoch": 0.582089552238806, + "grad_norm": 0.8120190282994632, + "learning_rate": 1.961556343959164e-06, + "loss": 0.2857, + "step": 1950 + }, + { + "epoch": 0.5823880597014925, + "grad_norm": 0.862894378293909, + "learning_rate": 1.9591959756819366e-06, + "loss": 0.3927, + "step": 1951 + }, + { + "epoch": 0.5826865671641791, + "grad_norm": 0.8218779267658843, + "learning_rate": 1.9568361130432907e-06, + "loss": 0.3287, + "step": 1952 + }, + { + "epoch": 0.5829850746268657, + "grad_norm": 0.8143989499819814, + "learning_rate": 1.954476758249639e-06, + "loss": 0.3454, + "step": 1953 + }, + { + "epoch": 0.5832835820895522, + "grad_norm": 0.8454839729330965, + "learning_rate": 1.9521179135069213e-06, + "loss": 0.3276, + "step": 1954 + }, + { + "epoch": 0.5835820895522388, + "grad_norm": 0.750200670423591, + "learning_rate": 1.9497595810206e-06, + "loss": 0.3304, + "step": 1955 + }, + { + "epoch": 0.5838805970149253, + "grad_norm": 0.8045091719653029, + "learning_rate": 1.9474017629956576e-06, + "loss": 0.3525, + "step": 1956 + }, + { + "epoch": 0.5841791044776119, + "grad_norm": 0.904970348742468, + "learning_rate": 1.9450444616365976e-06, + "loss": 0.3555, + "step": 1957 + }, + { + "epoch": 0.5844776119402985, + "grad_norm": 0.7892323857546758, + "learning_rate": 1.9426876791474396e-06, + "loss": 0.3307, + "step": 1958 + }, + { + "epoch": 0.5847761194029851, + "grad_norm": 0.8188095376158899, + "learning_rate": 1.940331417731716e-06, + "loss": 0.3035, + "step": 1959 + }, + { + "epoch": 0.5850746268656717, + "grad_norm": 0.7550954391161062, + "learning_rate": 1.9379756795924757e-06, + "loss": 0.309, + "step": 1960 + }, + { + "epoch": 0.5853731343283582, + "grad_norm": 0.785376246418163, + "learning_rate": 1.935620466932275e-06, + "loss": 0.3116, + "step": 1961 + }, + { + "epoch": 0.5856716417910448, + "grad_norm": 0.8351399359015248, + "learning_rate": 1.933265781953181e-06, + "loss": 0.3944, + "step": 1962 + }, + { + "epoch": 0.5859701492537314, + "grad_norm": 0.8086095361360239, + "learning_rate": 1.9309116268567675e-06, + "loss": 0.3319, + "step": 1963 + }, + { + "epoch": 0.5862686567164179, + "grad_norm": 0.953784526278762, + "learning_rate": 1.9285580038441105e-06, + "loss": 0.3373, + "step": 1964 + }, + { + "epoch": 0.5865671641791045, + "grad_norm": 0.909438958477141, + "learning_rate": 1.926204915115791e-06, + "loss": 0.3498, + "step": 1965 + }, + { + "epoch": 0.586865671641791, + "grad_norm": 0.8033996032006552, + "learning_rate": 1.92385236287189e-06, + "loss": 0.3342, + "step": 1966 + }, + { + "epoch": 0.5871641791044776, + "grad_norm": 0.8135989857406488, + "learning_rate": 1.921500349311986e-06, + "loss": 0.3831, + "step": 1967 + }, + { + "epoch": 0.5874626865671642, + "grad_norm": 0.850789249214276, + "learning_rate": 1.919148876635154e-06, + "loss": 0.2999, + "step": 1968 + }, + { + "epoch": 0.5877611940298507, + "grad_norm": 0.7746152215096338, + "learning_rate": 1.916797947039965e-06, + "loss": 0.3327, + "step": 1969 + }, + { + "epoch": 0.5880597014925373, + "grad_norm": 0.9288841282660558, + "learning_rate": 1.914447562724479e-06, + "loss": 0.3201, + "step": 1970 + }, + { + "epoch": 0.5883582089552238, + "grad_norm": 0.7498990167094447, + "learning_rate": 1.9120977258862493e-06, + "loss": 0.3018, + "step": 1971 + }, + { + "epoch": 0.5886567164179104, + "grad_norm": 0.7828793001916144, + "learning_rate": 1.909748438722315e-06, + "loss": 0.3376, + "step": 1972 + }, + { + "epoch": 0.5889552238805971, + "grad_norm": 0.9745524576411368, + "learning_rate": 1.9073997034292043e-06, + "loss": 0.4012, + "step": 1973 + }, + { + "epoch": 0.5892537313432836, + "grad_norm": 0.7597359048329623, + "learning_rate": 1.905051522202926e-06, + "loss": 0.2966, + "step": 1974 + }, + { + "epoch": 0.5895522388059702, + "grad_norm": 0.7985139531152796, + "learning_rate": 1.902703897238972e-06, + "loss": 0.3545, + "step": 1975 + }, + { + "epoch": 0.5898507462686567, + "grad_norm": 0.7828851032723473, + "learning_rate": 1.9003568307323156e-06, + "loss": 0.3402, + "step": 1976 + }, + { + "epoch": 0.5901492537313433, + "grad_norm": 0.8844102544530947, + "learning_rate": 1.898010324877406e-06, + "loss": 0.3512, + "step": 1977 + }, + { + "epoch": 0.5904477611940299, + "grad_norm": 1.2103129479742514, + "learning_rate": 1.8956643818681685e-06, + "loss": 0.3557, + "step": 1978 + }, + { + "epoch": 0.5907462686567164, + "grad_norm": 0.8338907004205794, + "learning_rate": 1.8933190038980044e-06, + "loss": 0.2936, + "step": 1979 + }, + { + "epoch": 0.591044776119403, + "grad_norm": 0.798234969689793, + "learning_rate": 1.890974193159783e-06, + "loss": 0.3208, + "step": 1980 + }, + { + "epoch": 0.5913432835820895, + "grad_norm": 0.8265334530994278, + "learning_rate": 1.888629951845846e-06, + "loss": 0.3321, + "step": 1981 + }, + { + "epoch": 0.5916417910447761, + "grad_norm": 0.8484611458052064, + "learning_rate": 1.8862862821480023e-06, + "loss": 0.3302, + "step": 1982 + }, + { + "epoch": 0.5919402985074627, + "grad_norm": 0.7732292982110145, + "learning_rate": 1.8839431862575252e-06, + "loss": 0.3178, + "step": 1983 + }, + { + "epoch": 0.5922388059701492, + "grad_norm": 0.7854227204822434, + "learning_rate": 1.881600666365153e-06, + "loss": 0.2743, + "step": 1984 + }, + { + "epoch": 0.5925373134328358, + "grad_norm": 0.8822786181065627, + "learning_rate": 1.8792587246610833e-06, + "loss": 0.2931, + "step": 1985 + }, + { + "epoch": 0.5928358208955223, + "grad_norm": 0.8413866496121205, + "learning_rate": 1.8769173633349757e-06, + "loss": 0.3055, + "step": 1986 + }, + { + "epoch": 0.593134328358209, + "grad_norm": 0.8929662942782096, + "learning_rate": 1.8745765845759466e-06, + "loss": 0.3441, + "step": 1987 + }, + { + "epoch": 0.5934328358208956, + "grad_norm": 0.8084666675397074, + "learning_rate": 1.8722363905725654e-06, + "loss": 0.3335, + "step": 1988 + }, + { + "epoch": 0.5937313432835821, + "grad_norm": 0.8138764969858261, + "learning_rate": 1.8698967835128572e-06, + "loss": 0.3642, + "step": 1989 + }, + { + "epoch": 0.5940298507462687, + "grad_norm": 0.7905280819462355, + "learning_rate": 1.867557765584298e-06, + "loss": 0.3279, + "step": 1990 + }, + { + "epoch": 0.5943283582089552, + "grad_norm": 0.7863823629467317, + "learning_rate": 1.8652193389738122e-06, + "loss": 0.3364, + "step": 1991 + }, + { + "epoch": 0.5946268656716418, + "grad_norm": 0.7267169725040129, + "learning_rate": 1.8628815058677712e-06, + "loss": 0.3171, + "step": 1992 + }, + { + "epoch": 0.5949253731343284, + "grad_norm": 0.8524291293019064, + "learning_rate": 1.8605442684519932e-06, + "loss": 0.3593, + "step": 1993 + }, + { + "epoch": 0.5952238805970149, + "grad_norm": 0.7776370242867892, + "learning_rate": 1.8582076289117367e-06, + "loss": 0.3241, + "step": 1994 + }, + { + "epoch": 0.5955223880597015, + "grad_norm": 0.899435349098279, + "learning_rate": 1.8558715894317033e-06, + "loss": 0.3719, + "step": 1995 + }, + { + "epoch": 0.595820895522388, + "grad_norm": 0.8848148638993476, + "learning_rate": 1.8535361521960341e-06, + "loss": 0.3868, + "step": 1996 + }, + { + "epoch": 0.5961194029850746, + "grad_norm": 0.8154092596098508, + "learning_rate": 1.8512013193883044e-06, + "loss": 0.3443, + "step": 1997 + }, + { + "epoch": 0.5964179104477612, + "grad_norm": 0.8146716902809428, + "learning_rate": 1.8488670931915268e-06, + "loss": 0.3366, + "step": 1998 + }, + { + "epoch": 0.5967164179104477, + "grad_norm": 0.8576493081535792, + "learning_rate": 1.8465334757881453e-06, + "loss": 0.3671, + "step": 1999 + }, + { + "epoch": 0.5970149253731343, + "grad_norm": 0.8128896259402698, + "learning_rate": 1.8442004693600358e-06, + "loss": 0.3748, + "step": 2000 + }, + { + "epoch": 0.597313432835821, + "grad_norm": 0.7918103129525407, + "learning_rate": 1.8418680760885028e-06, + "loss": 0.3413, + "step": 2001 + }, + { + "epoch": 0.5976119402985075, + "grad_norm": 0.8813005048512403, + "learning_rate": 1.8395362981542757e-06, + "loss": 0.3706, + "step": 2002 + }, + { + "epoch": 0.5979104477611941, + "grad_norm": 0.823115743992545, + "learning_rate": 1.8372051377375117e-06, + "loss": 0.3721, + "step": 2003 + }, + { + "epoch": 0.5982089552238806, + "grad_norm": 0.7894206010095408, + "learning_rate": 1.8348745970177884e-06, + "loss": 0.3229, + "step": 2004 + }, + { + "epoch": 0.5985074626865672, + "grad_norm": 1.3615178436392212, + "learning_rate": 1.8325446781741041e-06, + "loss": 0.3459, + "step": 2005 + }, + { + "epoch": 0.5988059701492537, + "grad_norm": 0.7843156247200797, + "learning_rate": 1.8302153833848762e-06, + "loss": 0.3221, + "step": 2006 + }, + { + "epoch": 0.5991044776119403, + "grad_norm": 1.075019211892318, + "learning_rate": 1.8278867148279395e-06, + "loss": 0.3769, + "step": 2007 + }, + { + "epoch": 0.5994029850746269, + "grad_norm": 0.8427937686071887, + "learning_rate": 1.8255586746805412e-06, + "loss": 0.3626, + "step": 2008 + }, + { + "epoch": 0.5997014925373134, + "grad_norm": 1.0019280320812132, + "learning_rate": 1.8232312651193418e-06, + "loss": 0.319, + "step": 2009 + }, + { + "epoch": 0.6, + "grad_norm": 0.752076030590557, + "learning_rate": 1.8209044883204141e-06, + "loss": 0.3165, + "step": 2010 + }, + { + "epoch": 0.6002985074626865, + "grad_norm": 0.7911868281154993, + "learning_rate": 1.8185783464592372e-06, + "loss": 0.3463, + "step": 2011 + }, + { + "epoch": 0.6005970149253731, + "grad_norm": 0.8613079162533939, + "learning_rate": 1.8162528417106962e-06, + "loss": 0.3539, + "step": 2012 + }, + { + "epoch": 0.6008955223880597, + "grad_norm": 0.7566901309370632, + "learning_rate": 1.8139279762490816e-06, + "loss": 0.3156, + "step": 2013 + }, + { + "epoch": 0.6011940298507462, + "grad_norm": 0.8761370837990474, + "learning_rate": 1.8116037522480866e-06, + "loss": 0.3873, + "step": 2014 + }, + { + "epoch": 0.6014925373134329, + "grad_norm": 0.9028602770938112, + "learning_rate": 1.8092801718808035e-06, + "loss": 0.3325, + "step": 2015 + }, + { + "epoch": 0.6017910447761194, + "grad_norm": 0.8144205633300299, + "learning_rate": 1.8069572373197234e-06, + "loss": 0.3361, + "step": 2016 + }, + { + "epoch": 0.602089552238806, + "grad_norm": 0.8544221934437295, + "learning_rate": 1.8046349507367336e-06, + "loss": 0.3611, + "step": 2017 + }, + { + "epoch": 0.6023880597014926, + "grad_norm": 0.808086145145757, + "learning_rate": 1.802313314303115e-06, + "loss": 0.3479, + "step": 2018 + }, + { + "epoch": 0.6026865671641791, + "grad_norm": 0.9626316828813825, + "learning_rate": 1.7999923301895419e-06, + "loss": 0.357, + "step": 2019 + }, + { + "epoch": 0.6029850746268657, + "grad_norm": 0.8065948651275312, + "learning_rate": 1.797672000566077e-06, + "loss": 0.3632, + "step": 2020 + }, + { + "epoch": 0.6032835820895522, + "grad_norm": 0.8600310373365637, + "learning_rate": 1.795352327602172e-06, + "loss": 0.3467, + "step": 2021 + }, + { + "epoch": 0.6035820895522388, + "grad_norm": 0.7840180123347607, + "learning_rate": 1.7930333134666655e-06, + "loss": 0.3145, + "step": 2022 + }, + { + "epoch": 0.6038805970149254, + "grad_norm": 0.8760399951287269, + "learning_rate": 1.790714960327778e-06, + "loss": 0.3523, + "step": 2023 + }, + { + "epoch": 0.6041791044776119, + "grad_norm": 0.9401970743207244, + "learning_rate": 1.7883972703531138e-06, + "loss": 0.3923, + "step": 2024 + }, + { + "epoch": 0.6044776119402985, + "grad_norm": 0.7714575169408807, + "learning_rate": 1.786080245709657e-06, + "loss": 0.331, + "step": 2025 + }, + { + "epoch": 0.604776119402985, + "grad_norm": 0.8115570578510135, + "learning_rate": 1.7837638885637682e-06, + "loss": 0.2821, + "step": 2026 + }, + { + "epoch": 0.6050746268656716, + "grad_norm": 0.8360919692031952, + "learning_rate": 1.7814482010811852e-06, + "loss": 0.3322, + "step": 2027 + }, + { + "epoch": 0.6053731343283583, + "grad_norm": 0.8034337429029215, + "learning_rate": 1.77913318542702e-06, + "loss": 0.3147, + "step": 2028 + }, + { + "epoch": 0.6056716417910448, + "grad_norm": 0.7672266818780934, + "learning_rate": 1.776818843765755e-06, + "loss": 0.3489, + "step": 2029 + }, + { + "epoch": 0.6059701492537314, + "grad_norm": 0.7896411247534145, + "learning_rate": 1.7745051782612432e-06, + "loss": 0.3341, + "step": 2030 + }, + { + "epoch": 0.6062686567164179, + "grad_norm": 0.9298746237897938, + "learning_rate": 1.7721921910767066e-06, + "loss": 0.3759, + "step": 2031 + }, + { + "epoch": 0.6065671641791045, + "grad_norm": 0.7832828837477588, + "learning_rate": 1.76987988437473e-06, + "loss": 0.3277, + "step": 2032 + }, + { + "epoch": 0.6068656716417911, + "grad_norm": 0.8044375141975681, + "learning_rate": 1.7675682603172656e-06, + "loss": 0.3269, + "step": 2033 + }, + { + "epoch": 0.6071641791044776, + "grad_norm": 0.8011482806477036, + "learning_rate": 1.7652573210656242e-06, + "loss": 0.3285, + "step": 2034 + }, + { + "epoch": 0.6074626865671642, + "grad_norm": 0.8705610431211035, + "learning_rate": 1.7629470687804783e-06, + "loss": 0.3102, + "step": 2035 + }, + { + "epoch": 0.6077611940298507, + "grad_norm": 0.8594206025341075, + "learning_rate": 1.7606375056218578e-06, + "loss": 0.3346, + "step": 2036 + }, + { + "epoch": 0.6080597014925373, + "grad_norm": 0.8509811944236282, + "learning_rate": 1.7583286337491472e-06, + "loss": 0.327, + "step": 2037 + }, + { + "epoch": 0.6083582089552239, + "grad_norm": 0.7545342074644574, + "learning_rate": 1.7560204553210858e-06, + "loss": 0.3468, + "step": 2038 + }, + { + "epoch": 0.6086567164179104, + "grad_norm": 0.8580794849406649, + "learning_rate": 1.7537129724957644e-06, + "loss": 0.3539, + "step": 2039 + }, + { + "epoch": 0.608955223880597, + "grad_norm": 0.9223321696220602, + "learning_rate": 1.7514061874306226e-06, + "loss": 0.3834, + "step": 2040 + }, + { + "epoch": 0.6092537313432835, + "grad_norm": 0.7309630284623936, + "learning_rate": 1.7491001022824483e-06, + "loss": 0.3494, + "step": 2041 + }, + { + "epoch": 0.6095522388059702, + "grad_norm": 0.9098414650575413, + "learning_rate": 1.7467947192073759e-06, + "loss": 0.3578, + "step": 2042 + }, + { + "epoch": 0.6098507462686568, + "grad_norm": 0.8443943503187832, + "learning_rate": 1.744490040360881e-06, + "loss": 0.3264, + "step": 2043 + }, + { + "epoch": 0.6101492537313433, + "grad_norm": 0.8020741133467957, + "learning_rate": 1.7421860678977831e-06, + "loss": 0.327, + "step": 2044 + }, + { + "epoch": 0.6104477611940299, + "grad_norm": 0.8065034628311121, + "learning_rate": 1.7398828039722403e-06, + "loss": 0.333, + "step": 2045 + }, + { + "epoch": 0.6107462686567164, + "grad_norm": 0.932836415200145, + "learning_rate": 1.7375802507377475e-06, + "loss": 0.3337, + "step": 2046 + }, + { + "epoch": 0.611044776119403, + "grad_norm": 0.8386207975303591, + "learning_rate": 1.7352784103471355e-06, + "loss": 0.3384, + "step": 2047 + }, + { + "epoch": 0.6113432835820896, + "grad_norm": 0.9979807581300122, + "learning_rate": 1.7329772849525707e-06, + "loss": 0.3463, + "step": 2048 + }, + { + "epoch": 0.6116417910447761, + "grad_norm": 0.9144481465630234, + "learning_rate": 1.7306768767055481e-06, + "loss": 0.3171, + "step": 2049 + }, + { + "epoch": 0.6119402985074627, + "grad_norm": 0.9179258800006835, + "learning_rate": 1.7283771877568934e-06, + "loss": 0.2897, + "step": 2050 + }, + { + "epoch": 0.6122388059701492, + "grad_norm": 0.9088510671380466, + "learning_rate": 1.7260782202567595e-06, + "loss": 0.337, + "step": 2051 + }, + { + "epoch": 0.6125373134328358, + "grad_norm": 0.7587527894434154, + "learning_rate": 1.723779976354626e-06, + "loss": 0.3155, + "step": 2052 + }, + { + "epoch": 0.6128358208955224, + "grad_norm": 0.8273268196048457, + "learning_rate": 1.721482458199294e-06, + "loss": 0.3487, + "step": 2053 + }, + { + "epoch": 0.6131343283582089, + "grad_norm": 0.875544314571328, + "learning_rate": 1.7191856679388869e-06, + "loss": 0.3419, + "step": 2054 + }, + { + "epoch": 0.6134328358208955, + "grad_norm": 0.8395751763266083, + "learning_rate": 1.7168896077208482e-06, + "loss": 0.3419, + "step": 2055 + }, + { + "epoch": 0.6137313432835821, + "grad_norm": 0.8125721253715453, + "learning_rate": 1.714594279691938e-06, + "loss": 0.3201, + "step": 2056 + }, + { + "epoch": 0.6140298507462687, + "grad_norm": 0.91793798749558, + "learning_rate": 1.7122996859982318e-06, + "loss": 0.324, + "step": 2057 + }, + { + "epoch": 0.6143283582089553, + "grad_norm": 0.8806848474353035, + "learning_rate": 1.710005828785119e-06, + "loss": 0.3682, + "step": 2058 + }, + { + "epoch": 0.6146268656716418, + "grad_norm": 0.7958603606959694, + "learning_rate": 1.7077127101973e-06, + "loss": 0.3475, + "step": 2059 + }, + { + "epoch": 0.6149253731343284, + "grad_norm": 0.8477880797334701, + "learning_rate": 1.7054203323787854e-06, + "loss": 0.3667, + "step": 2060 + }, + { + "epoch": 0.6152238805970149, + "grad_norm": 0.8049693453434607, + "learning_rate": 1.7031286974728916e-06, + "loss": 0.3672, + "step": 2061 + }, + { + "epoch": 0.6155223880597015, + "grad_norm": 0.8445392848976708, + "learning_rate": 1.7008378076222417e-06, + "loss": 0.3454, + "step": 2062 + }, + { + "epoch": 0.6158208955223881, + "grad_norm": 0.8355246174084053, + "learning_rate": 1.698547664968763e-06, + "loss": 0.3214, + "step": 2063 + }, + { + "epoch": 0.6161194029850746, + "grad_norm": 0.8464507961501435, + "learning_rate": 1.6962582716536813e-06, + "loss": 0.3299, + "step": 2064 + }, + { + "epoch": 0.6164179104477612, + "grad_norm": 0.9279680601344305, + "learning_rate": 1.6939696298175245e-06, + "loss": 0.3676, + "step": 2065 + }, + { + "epoch": 0.6167164179104477, + "grad_norm": 0.9394401389859991, + "learning_rate": 1.6916817416001175e-06, + "loss": 0.3252, + "step": 2066 + }, + { + "epoch": 0.6170149253731343, + "grad_norm": 0.8255999859088444, + "learning_rate": 1.6893946091405784e-06, + "loss": 0.3492, + "step": 2067 + }, + { + "epoch": 0.6173134328358209, + "grad_norm": 0.8393914123113455, + "learning_rate": 1.6871082345773215e-06, + "loss": 0.326, + "step": 2068 + }, + { + "epoch": 0.6176119402985074, + "grad_norm": 0.7540269358482177, + "learning_rate": 1.6848226200480514e-06, + "loss": 0.3164, + "step": 2069 + }, + { + "epoch": 0.6179104477611941, + "grad_norm": 0.8407057610257092, + "learning_rate": 1.6825377676897608e-06, + "loss": 0.3426, + "step": 2070 + }, + { + "epoch": 0.6182089552238806, + "grad_norm": 0.9601578918299282, + "learning_rate": 1.6802536796387328e-06, + "loss": 0.4112, + "step": 2071 + }, + { + "epoch": 0.6185074626865672, + "grad_norm": 0.8027639919807739, + "learning_rate": 1.6779703580305323e-06, + "loss": 0.3493, + "step": 2072 + }, + { + "epoch": 0.6188059701492538, + "grad_norm": 0.8391959617782264, + "learning_rate": 1.6756878050000098e-06, + "loss": 0.3591, + "step": 2073 + }, + { + "epoch": 0.6191044776119403, + "grad_norm": 0.7871628742456087, + "learning_rate": 1.6734060226812976e-06, + "loss": 0.3414, + "step": 2074 + }, + { + "epoch": 0.6194029850746269, + "grad_norm": 0.8096322967159341, + "learning_rate": 1.6711250132078055e-06, + "loss": 0.3215, + "step": 2075 + }, + { + "epoch": 0.6197014925373134, + "grad_norm": 0.8138284215248026, + "learning_rate": 1.6688447787122226e-06, + "loss": 0.3173, + "step": 2076 + }, + { + "epoch": 0.62, + "grad_norm": 0.8329557552536543, + "learning_rate": 1.666565321326512e-06, + "loss": 0.301, + "step": 2077 + }, + { + "epoch": 0.6202985074626866, + "grad_norm": 0.8459261392156018, + "learning_rate": 1.6642866431819107e-06, + "loss": 0.3869, + "step": 2078 + }, + { + "epoch": 0.6205970149253731, + "grad_norm": 0.7415893886114104, + "learning_rate": 1.6620087464089275e-06, + "loss": 0.3512, + "step": 2079 + }, + { + "epoch": 0.6208955223880597, + "grad_norm": 0.8509533039291013, + "learning_rate": 1.659731633137341e-06, + "loss": 0.356, + "step": 2080 + }, + { + "epoch": 0.6211940298507462, + "grad_norm": 0.7465210952477122, + "learning_rate": 1.657455305496195e-06, + "loss": 0.3329, + "step": 2081 + }, + { + "epoch": 0.6214925373134328, + "grad_norm": 0.863374891709103, + "learning_rate": 1.6551797656138018e-06, + "loss": 0.3296, + "step": 2082 + }, + { + "epoch": 0.6217910447761194, + "grad_norm": 0.8457477256461274, + "learning_rate": 1.6529050156177356e-06, + "loss": 0.3386, + "step": 2083 + }, + { + "epoch": 0.622089552238806, + "grad_norm": 0.8417932226084862, + "learning_rate": 1.650631057634831e-06, + "loss": 0.3294, + "step": 2084 + }, + { + "epoch": 0.6223880597014926, + "grad_norm": 0.7940353343813964, + "learning_rate": 1.6483578937911836e-06, + "loss": 0.3573, + "step": 2085 + }, + { + "epoch": 0.6226865671641791, + "grad_norm": 0.8984545759070848, + "learning_rate": 1.6460855262121479e-06, + "loss": 0.335, + "step": 2086 + }, + { + "epoch": 0.6229850746268657, + "grad_norm": 0.9689021624736345, + "learning_rate": 1.6438139570223311e-06, + "loss": 0.3637, + "step": 2087 + }, + { + "epoch": 0.6232835820895523, + "grad_norm": 0.7948610205026269, + "learning_rate": 1.641543188345594e-06, + "loss": 0.318, + "step": 2088 + }, + { + "epoch": 0.6235820895522388, + "grad_norm": 0.8388231085500125, + "learning_rate": 1.6392732223050515e-06, + "loss": 0.3366, + "step": 2089 + }, + { + "epoch": 0.6238805970149254, + "grad_norm": 0.9034038641733735, + "learning_rate": 1.6370040610230662e-06, + "loss": 0.3811, + "step": 2090 + }, + { + "epoch": 0.6241791044776119, + "grad_norm": 0.8056850552409819, + "learning_rate": 1.6347357066212478e-06, + "loss": 0.3246, + "step": 2091 + }, + { + "epoch": 0.6244776119402985, + "grad_norm": 0.8924171728248095, + "learning_rate": 1.6324681612204527e-06, + "loss": 0.3099, + "step": 2092 + }, + { + "epoch": 0.624776119402985, + "grad_norm": 0.8889195100531483, + "learning_rate": 1.6302014269407812e-06, + "loss": 0.3916, + "step": 2093 + }, + { + "epoch": 0.6250746268656716, + "grad_norm": 0.802744288589964, + "learning_rate": 1.6279355059015739e-06, + "loss": 0.3216, + "step": 2094 + }, + { + "epoch": 0.6253731343283582, + "grad_norm": 0.8244538019031434, + "learning_rate": 1.6256704002214124e-06, + "loss": 0.3436, + "step": 2095 + }, + { + "epoch": 0.6256716417910447, + "grad_norm": 0.8806302935844983, + "learning_rate": 1.6234061120181144e-06, + "loss": 0.3234, + "step": 2096 + }, + { + "epoch": 0.6259701492537313, + "grad_norm": 0.7542539098364519, + "learning_rate": 1.6211426434087347e-06, + "loss": 0.3112, + "step": 2097 + }, + { + "epoch": 0.626268656716418, + "grad_norm": 0.8779997865446432, + "learning_rate": 1.6188799965095614e-06, + "loss": 0.3213, + "step": 2098 + }, + { + "epoch": 0.6265671641791045, + "grad_norm": 0.8512972841056398, + "learning_rate": 1.6166181734361136e-06, + "loss": 0.3806, + "step": 2099 + }, + { + "epoch": 0.6268656716417911, + "grad_norm": 0.780603070362957, + "learning_rate": 1.6143571763031413e-06, + "loss": 0.3381, + "step": 2100 + }, + { + "epoch": 0.6271641791044776, + "grad_norm": 0.8792815515680216, + "learning_rate": 1.6120970072246222e-06, + "loss": 0.3795, + "step": 2101 + }, + { + "epoch": 0.6274626865671642, + "grad_norm": 0.8439949848861296, + "learning_rate": 1.6098376683137582e-06, + "loss": 0.2953, + "step": 2102 + }, + { + "epoch": 0.6277611940298508, + "grad_norm": 0.9313525702028004, + "learning_rate": 1.6075791616829764e-06, + "loss": 0.3475, + "step": 2103 + }, + { + "epoch": 0.6280597014925373, + "grad_norm": 0.7518808217696689, + "learning_rate": 1.6053214894439262e-06, + "loss": 0.3147, + "step": 2104 + }, + { + "epoch": 0.6283582089552239, + "grad_norm": 0.7506483709151667, + "learning_rate": 1.6030646537074751e-06, + "loss": 0.3167, + "step": 2105 + }, + { + "epoch": 0.6286567164179104, + "grad_norm": 0.8052425362278406, + "learning_rate": 1.6008086565837106e-06, + "loss": 0.2981, + "step": 2106 + }, + { + "epoch": 0.628955223880597, + "grad_norm": 0.8210002191055628, + "learning_rate": 1.5985535001819335e-06, + "loss": 0.3501, + "step": 2107 + }, + { + "epoch": 0.6292537313432836, + "grad_norm": 0.8941400826776075, + "learning_rate": 1.596299186610661e-06, + "loss": 0.3456, + "step": 2108 + }, + { + "epoch": 0.6295522388059701, + "grad_norm": 1.0083217537161828, + "learning_rate": 1.594045717977622e-06, + "loss": 0.3841, + "step": 2109 + }, + { + "epoch": 0.6298507462686567, + "grad_norm": 1.078514382968689, + "learning_rate": 1.5917930963897534e-06, + "loss": 0.3614, + "step": 2110 + }, + { + "epoch": 0.6301492537313432, + "grad_norm": 0.7917547916905323, + "learning_rate": 1.5895413239532022e-06, + "loss": 0.3412, + "step": 2111 + }, + { + "epoch": 0.6304477611940299, + "grad_norm": 0.7691216432708953, + "learning_rate": 1.5872904027733211e-06, + "loss": 0.3178, + "step": 2112 + }, + { + "epoch": 0.6307462686567165, + "grad_norm": 0.77802800862977, + "learning_rate": 1.5850403349546655e-06, + "loss": 0.309, + "step": 2113 + }, + { + "epoch": 0.631044776119403, + "grad_norm": 0.8255197315967899, + "learning_rate": 1.5827911226009945e-06, + "loss": 0.3138, + "step": 2114 + }, + { + "epoch": 0.6313432835820896, + "grad_norm": 0.8386811603427105, + "learning_rate": 1.5805427678152677e-06, + "loss": 0.2891, + "step": 2115 + }, + { + "epoch": 0.6316417910447761, + "grad_norm": 0.8153875110996863, + "learning_rate": 1.5782952726996403e-06, + "loss": 0.2995, + "step": 2116 + }, + { + "epoch": 0.6319402985074627, + "grad_norm": 0.8409552206683609, + "learning_rate": 1.5760486393554667e-06, + "loss": 0.3386, + "step": 2117 + }, + { + "epoch": 0.6322388059701493, + "grad_norm": 0.8250066653051528, + "learning_rate": 1.5738028698832942e-06, + "loss": 0.3827, + "step": 2118 + }, + { + "epoch": 0.6325373134328358, + "grad_norm": 0.8610197982443525, + "learning_rate": 1.5715579663828618e-06, + "loss": 0.3365, + "step": 2119 + }, + { + "epoch": 0.6328358208955224, + "grad_norm": 0.9413453392741797, + "learning_rate": 1.5693139309531006e-06, + "loss": 0.3692, + "step": 2120 + }, + { + "epoch": 0.6331343283582089, + "grad_norm": 0.818537165590243, + "learning_rate": 1.567070765692128e-06, + "loss": 0.3237, + "step": 2121 + }, + { + "epoch": 0.6334328358208955, + "grad_norm": 0.8210467581037414, + "learning_rate": 1.5648284726972491e-06, + "loss": 0.3422, + "step": 2122 + }, + { + "epoch": 0.633731343283582, + "grad_norm": 0.8300643168242267, + "learning_rate": 1.562587054064953e-06, + "loss": 0.3112, + "step": 2123 + }, + { + "epoch": 0.6340298507462686, + "grad_norm": 0.8063304103666424, + "learning_rate": 1.5603465118909122e-06, + "loss": 0.3277, + "step": 2124 + }, + { + "epoch": 0.6343283582089553, + "grad_norm": 0.7841383009639938, + "learning_rate": 1.5581068482699797e-06, + "loss": 0.3132, + "step": 2125 + }, + { + "epoch": 0.6346268656716418, + "grad_norm": 0.7663654715100157, + "learning_rate": 1.5558680652961844e-06, + "loss": 0.3549, + "step": 2126 + }, + { + "epoch": 0.6349253731343284, + "grad_norm": 0.716788049651756, + "learning_rate": 1.553630165062735e-06, + "loss": 0.3293, + "step": 2127 + }, + { + "epoch": 0.635223880597015, + "grad_norm": 0.8376097154978661, + "learning_rate": 1.5513931496620138e-06, + "loss": 0.348, + "step": 2128 + }, + { + "epoch": 0.6355223880597015, + "grad_norm": 0.7844875450357711, + "learning_rate": 1.549157021185575e-06, + "loss": 0.3238, + "step": 2129 + }, + { + "epoch": 0.6358208955223881, + "grad_norm": 0.7845014380845483, + "learning_rate": 1.546921781724145e-06, + "loss": 0.3272, + "step": 2130 + }, + { + "epoch": 0.6361194029850746, + "grad_norm": 0.8084890497779735, + "learning_rate": 1.544687433367617e-06, + "loss": 0.3503, + "step": 2131 + }, + { + "epoch": 0.6364179104477612, + "grad_norm": 1.155872428085641, + "learning_rate": 1.5424539782050535e-06, + "loss": 0.3463, + "step": 2132 + }, + { + "epoch": 0.6367164179104478, + "grad_norm": 0.8237151552290364, + "learning_rate": 1.5402214183246805e-06, + "loss": 0.3751, + "step": 2133 + }, + { + "epoch": 0.6370149253731343, + "grad_norm": 0.8146345074095411, + "learning_rate": 1.5379897558138861e-06, + "loss": 0.3444, + "step": 2134 + }, + { + "epoch": 0.6373134328358209, + "grad_norm": 0.7508094621607114, + "learning_rate": 1.5357589927592211e-06, + "loss": 0.2862, + "step": 2135 + }, + { + "epoch": 0.6376119402985074, + "grad_norm": 0.7803467497000177, + "learning_rate": 1.5335291312463952e-06, + "loss": 0.2941, + "step": 2136 + }, + { + "epoch": 0.637910447761194, + "grad_norm": 0.7746140356066684, + "learning_rate": 1.531300173360273e-06, + "loss": 0.338, + "step": 2137 + }, + { + "epoch": 0.6382089552238805, + "grad_norm": 0.7466760142466666, + "learning_rate": 1.5290721211848767e-06, + "loss": 0.2746, + "step": 2138 + }, + { + "epoch": 0.6385074626865672, + "grad_norm": 0.8289320396750041, + "learning_rate": 1.5268449768033811e-06, + "loss": 0.3639, + "step": 2139 + }, + { + "epoch": 0.6388059701492538, + "grad_norm": 0.7786271838202594, + "learning_rate": 1.5246187422981113e-06, + "loss": 0.3195, + "step": 2140 + }, + { + "epoch": 0.6391044776119403, + "grad_norm": 0.7961531887157887, + "learning_rate": 1.522393419750542e-06, + "loss": 0.3472, + "step": 2141 + }, + { + "epoch": 0.6394029850746269, + "grad_norm": 0.7553655687048026, + "learning_rate": 1.520169011241297e-06, + "loss": 0.3181, + "step": 2142 + }, + { + "epoch": 0.6397014925373135, + "grad_norm": 0.8791350108013017, + "learning_rate": 1.5179455188501425e-06, + "loss": 0.3573, + "step": 2143 + }, + { + "epoch": 0.64, + "grad_norm": 0.791573138903334, + "learning_rate": 1.5157229446559903e-06, + "loss": 0.3146, + "step": 2144 + }, + { + "epoch": 0.6402985074626866, + "grad_norm": 0.7982451287466451, + "learning_rate": 1.5135012907368928e-06, + "loss": 0.3238, + "step": 2145 + }, + { + "epoch": 0.6405970149253731, + "grad_norm": 0.9547736379566139, + "learning_rate": 1.5112805591700426e-06, + "loss": 0.3464, + "step": 2146 + }, + { + "epoch": 0.6408955223880597, + "grad_norm": 0.9389426478984015, + "learning_rate": 1.50906075203177e-06, + "loss": 0.3831, + "step": 2147 + }, + { + "epoch": 0.6411940298507462, + "grad_norm": 0.7559187426695012, + "learning_rate": 1.5068418713975397e-06, + "loss": 0.2961, + "step": 2148 + }, + { + "epoch": 0.6414925373134328, + "grad_norm": 0.8009593103452896, + "learning_rate": 1.5046239193419514e-06, + "loss": 0.3345, + "step": 2149 + }, + { + "epoch": 0.6417910447761194, + "grad_norm": 0.8808029291340134, + "learning_rate": 1.5024068979387365e-06, + "loss": 0.3622, + "step": 2150 + }, + { + "epoch": 0.6420895522388059, + "grad_norm": 0.8048602418552092, + "learning_rate": 1.5001908092607553e-06, + "loss": 0.3272, + "step": 2151 + }, + { + "epoch": 0.6423880597014925, + "grad_norm": 0.886841416239724, + "learning_rate": 1.497975655379997e-06, + "loss": 0.3637, + "step": 2152 + }, + { + "epoch": 0.6426865671641792, + "grad_norm": 0.8501760322071722, + "learning_rate": 1.495761438367577e-06, + "loss": 0.3315, + "step": 2153 + }, + { + "epoch": 0.6429850746268657, + "grad_norm": 0.8499429690154281, + "learning_rate": 1.4935481602937334e-06, + "loss": 0.3519, + "step": 2154 + }, + { + "epoch": 0.6432835820895523, + "grad_norm": 0.8164800240232427, + "learning_rate": 1.4913358232278274e-06, + "loss": 0.327, + "step": 2155 + }, + { + "epoch": 0.6435820895522388, + "grad_norm": 0.8053874302696963, + "learning_rate": 1.4891244292383412e-06, + "loss": 0.3362, + "step": 2156 + }, + { + "epoch": 0.6438805970149254, + "grad_norm": 0.7798998762739349, + "learning_rate": 1.4869139803928727e-06, + "loss": 0.3538, + "step": 2157 + }, + { + "epoch": 0.644179104477612, + "grad_norm": 0.925044008666226, + "learning_rate": 1.4847044787581395e-06, + "loss": 0.3266, + "step": 2158 + }, + { + "epoch": 0.6444776119402985, + "grad_norm": 0.8652309852841852, + "learning_rate": 1.48249592639997e-06, + "loss": 0.3687, + "step": 2159 + }, + { + "epoch": 0.6447761194029851, + "grad_norm": 0.7856735870745902, + "learning_rate": 1.4802883253833073e-06, + "loss": 0.2956, + "step": 2160 + }, + { + "epoch": 0.6450746268656716, + "grad_norm": 0.7855897694933878, + "learning_rate": 1.4780816777722052e-06, + "loss": 0.3162, + "step": 2161 + }, + { + "epoch": 0.6453731343283582, + "grad_norm": 0.808546264261996, + "learning_rate": 1.4758759856298248e-06, + "loss": 0.3095, + "step": 2162 + }, + { + "epoch": 0.6456716417910447, + "grad_norm": 0.8076290541474285, + "learning_rate": 1.4736712510184359e-06, + "loss": 0.3276, + "step": 2163 + }, + { + "epoch": 0.6459701492537313, + "grad_norm": 0.7968186141075612, + "learning_rate": 1.4714674759994096e-06, + "loss": 0.2801, + "step": 2164 + }, + { + "epoch": 0.6462686567164179, + "grad_norm": 0.7847934498100252, + "learning_rate": 1.4692646626332228e-06, + "loss": 0.3368, + "step": 2165 + }, + { + "epoch": 0.6465671641791044, + "grad_norm": 0.9749102135271982, + "learning_rate": 1.4670628129794527e-06, + "loss": 0.3638, + "step": 2166 + }, + { + "epoch": 0.6468656716417911, + "grad_norm": 0.7325101572568912, + "learning_rate": 1.4648619290967742e-06, + "loss": 0.3243, + "step": 2167 + }, + { + "epoch": 0.6471641791044777, + "grad_norm": 0.9104372675901333, + "learning_rate": 1.4626620130429608e-06, + "loss": 0.3549, + "step": 2168 + }, + { + "epoch": 0.6474626865671642, + "grad_norm": 0.7969438524329776, + "learning_rate": 1.4604630668748795e-06, + "loss": 0.3465, + "step": 2169 + }, + { + "epoch": 0.6477611940298508, + "grad_norm": 0.8828900184898577, + "learning_rate": 1.4582650926484921e-06, + "loss": 0.3556, + "step": 2170 + }, + { + "epoch": 0.6480597014925373, + "grad_norm": 0.798311888333371, + "learning_rate": 1.4560680924188508e-06, + "loss": 0.3184, + "step": 2171 + }, + { + "epoch": 0.6483582089552239, + "grad_norm": 0.860792245744944, + "learning_rate": 1.453872068240097e-06, + "loss": 0.3363, + "step": 2172 + }, + { + "epoch": 0.6486567164179105, + "grad_norm": 0.7649776213408986, + "learning_rate": 1.4516770221654614e-06, + "loss": 0.3355, + "step": 2173 + }, + { + "epoch": 0.648955223880597, + "grad_norm": 0.7942691044713359, + "learning_rate": 1.4494829562472558e-06, + "loss": 0.3341, + "step": 2174 + }, + { + "epoch": 0.6492537313432836, + "grad_norm": 0.8621884015676841, + "learning_rate": 1.4472898725368795e-06, + "loss": 0.3343, + "step": 2175 + }, + { + "epoch": 0.6495522388059701, + "grad_norm": 0.7924705469899432, + "learning_rate": 1.4450977730848132e-06, + "loss": 0.3275, + "step": 2176 + }, + { + "epoch": 0.6498507462686567, + "grad_norm": 1.2686960712118278, + "learning_rate": 1.4429066599406152e-06, + "loss": 0.3576, + "step": 2177 + }, + { + "epoch": 0.6501492537313432, + "grad_norm": 0.7074769773056945, + "learning_rate": 1.4407165351529246e-06, + "loss": 0.2896, + "step": 2178 + }, + { + "epoch": 0.6504477611940298, + "grad_norm": 0.8333612223546273, + "learning_rate": 1.4385274007694527e-06, + "loss": 0.3181, + "step": 2179 + }, + { + "epoch": 0.6507462686567164, + "grad_norm": 0.8657347511763104, + "learning_rate": 1.4363392588369876e-06, + "loss": 0.3312, + "step": 2180 + }, + { + "epoch": 0.651044776119403, + "grad_norm": 0.7696448268786413, + "learning_rate": 1.4341521114013888e-06, + "loss": 0.3079, + "step": 2181 + }, + { + "epoch": 0.6513432835820896, + "grad_norm": 0.8733955547423778, + "learning_rate": 1.4319659605075855e-06, + "loss": 0.3489, + "step": 2182 + }, + { + "epoch": 0.6516417910447762, + "grad_norm": 1.079444796750812, + "learning_rate": 1.429780808199576e-06, + "loss": 0.3833, + "step": 2183 + }, + { + "epoch": 0.6519402985074627, + "grad_norm": 0.7722744529314012, + "learning_rate": 1.4275966565204251e-06, + "loss": 0.3301, + "step": 2184 + }, + { + "epoch": 0.6522388059701493, + "grad_norm": 0.8366589681087087, + "learning_rate": 1.4254135075122596e-06, + "loss": 0.2948, + "step": 2185 + }, + { + "epoch": 0.6525373134328358, + "grad_norm": 0.8263300107210038, + "learning_rate": 1.4232313632162714e-06, + "loss": 0.3113, + "step": 2186 + }, + { + "epoch": 0.6528358208955224, + "grad_norm": 0.7782108665859974, + "learning_rate": 1.421050225672712e-06, + "loss": 0.3098, + "step": 2187 + }, + { + "epoch": 0.653134328358209, + "grad_norm": 0.9032704619470634, + "learning_rate": 1.418870096920893e-06, + "loss": 0.3312, + "step": 2188 + }, + { + "epoch": 0.6534328358208955, + "grad_norm": 0.7838544001315462, + "learning_rate": 1.4166909789991813e-06, + "loss": 0.3091, + "step": 2189 + }, + { + "epoch": 0.6537313432835821, + "grad_norm": 0.8249256923116727, + "learning_rate": 1.4145128739449974e-06, + "loss": 0.3657, + "step": 2190 + }, + { + "epoch": 0.6540298507462686, + "grad_norm": 0.7937594433877714, + "learning_rate": 1.4123357837948177e-06, + "loss": 0.3459, + "step": 2191 + }, + { + "epoch": 0.6543283582089552, + "grad_norm": 1.0209366045959114, + "learning_rate": 1.410159710584168e-06, + "loss": 0.3305, + "step": 2192 + }, + { + "epoch": 0.6546268656716417, + "grad_norm": 0.8416577328440887, + "learning_rate": 1.4079846563476246e-06, + "loss": 0.3428, + "step": 2193 + }, + { + "epoch": 0.6549253731343283, + "grad_norm": 0.8381158795554763, + "learning_rate": 1.4058106231188093e-06, + "loss": 0.3278, + "step": 2194 + }, + { + "epoch": 0.655223880597015, + "grad_norm": 0.7994100548783899, + "learning_rate": 1.4036376129303914e-06, + "loss": 0.3187, + "step": 2195 + }, + { + "epoch": 0.6555223880597015, + "grad_norm": 0.8441055293209245, + "learning_rate": 1.4014656278140806e-06, + "loss": 0.3585, + "step": 2196 + }, + { + "epoch": 0.6558208955223881, + "grad_norm": 0.8224053402613939, + "learning_rate": 1.399294669800631e-06, + "loss": 0.3106, + "step": 2197 + }, + { + "epoch": 0.6561194029850747, + "grad_norm": 0.893366800253749, + "learning_rate": 1.3971247409198347e-06, + "loss": 0.3897, + "step": 2198 + }, + { + "epoch": 0.6564179104477612, + "grad_norm": 0.8805627796048343, + "learning_rate": 1.3949558432005245e-06, + "loss": 0.347, + "step": 2199 + }, + { + "epoch": 0.6567164179104478, + "grad_norm": 0.764024546070756, + "learning_rate": 1.3927879786705645e-06, + "loss": 0.3213, + "step": 2200 + }, + { + "epoch": 0.6570149253731343, + "grad_norm": 0.722603696748916, + "learning_rate": 1.3906211493568563e-06, + "loss": 0.3137, + "step": 2201 + }, + { + "epoch": 0.6573134328358209, + "grad_norm": 0.9269224842693098, + "learning_rate": 1.3884553572853324e-06, + "loss": 0.3678, + "step": 2202 + }, + { + "epoch": 0.6576119402985074, + "grad_norm": 0.7631786113345267, + "learning_rate": 1.3862906044809554e-06, + "loss": 0.314, + "step": 2203 + }, + { + "epoch": 0.657910447761194, + "grad_norm": 0.7773514605842012, + "learning_rate": 1.3841268929677165e-06, + "loss": 0.3133, + "step": 2204 + }, + { + "epoch": 0.6582089552238806, + "grad_norm": 0.7946457192152244, + "learning_rate": 1.381964224768634e-06, + "loss": 0.3181, + "step": 2205 + }, + { + "epoch": 0.6585074626865671, + "grad_norm": 0.8529990432133921, + "learning_rate": 1.3798026019057483e-06, + "loss": 0.3426, + "step": 2206 + }, + { + "epoch": 0.6588059701492537, + "grad_norm": 0.9216515652907142, + "learning_rate": 1.377642026400125e-06, + "loss": 0.3643, + "step": 2207 + }, + { + "epoch": 0.6591044776119404, + "grad_norm": 0.832800795535474, + "learning_rate": 1.3754825002718498e-06, + "loss": 0.3629, + "step": 2208 + }, + { + "epoch": 0.6594029850746269, + "grad_norm": 0.990654044032596, + "learning_rate": 1.3733240255400263e-06, + "loss": 0.3506, + "step": 2209 + }, + { + "epoch": 0.6597014925373135, + "grad_norm": 0.6854970183511663, + "learning_rate": 1.3711666042227772e-06, + "loss": 0.3319, + "step": 2210 + }, + { + "epoch": 0.66, + "grad_norm": 0.8428108856258283, + "learning_rate": 1.3690102383372369e-06, + "loss": 0.2742, + "step": 2211 + }, + { + "epoch": 0.6602985074626866, + "grad_norm": 0.8741591762317693, + "learning_rate": 1.3668549298995558e-06, + "loss": 0.3687, + "step": 2212 + }, + { + "epoch": 0.6605970149253731, + "grad_norm": 0.7955836464506277, + "learning_rate": 1.3647006809248947e-06, + "loss": 0.3032, + "step": 2213 + }, + { + "epoch": 0.6608955223880597, + "grad_norm": 0.8711695616206405, + "learning_rate": 1.3625474934274241e-06, + "loss": 0.381, + "step": 2214 + }, + { + "epoch": 0.6611940298507463, + "grad_norm": 0.8401790125361631, + "learning_rate": 1.360395369420322e-06, + "loss": 0.3229, + "step": 2215 + }, + { + "epoch": 0.6614925373134328, + "grad_norm": 0.8676846365068158, + "learning_rate": 1.3582443109157722e-06, + "loss": 0.3615, + "step": 2216 + }, + { + "epoch": 0.6617910447761194, + "grad_norm": 0.7929648763121615, + "learning_rate": 1.3560943199249605e-06, + "loss": 0.3588, + "step": 2217 + }, + { + "epoch": 0.662089552238806, + "grad_norm": 0.9511947801173409, + "learning_rate": 1.3539453984580767e-06, + "loss": 0.3105, + "step": 2218 + }, + { + "epoch": 0.6623880597014925, + "grad_norm": 0.7404356758215146, + "learning_rate": 1.3517975485243103e-06, + "loss": 0.2941, + "step": 2219 + }, + { + "epoch": 0.6626865671641791, + "grad_norm": 1.0227800450584272, + "learning_rate": 1.3496507721318486e-06, + "loss": 0.3296, + "step": 2220 + }, + { + "epoch": 0.6629850746268656, + "grad_norm": 0.7690958302654547, + "learning_rate": 1.3475050712878755e-06, + "loss": 0.321, + "step": 2221 + }, + { + "epoch": 0.6632835820895523, + "grad_norm": 0.8740712288587946, + "learning_rate": 1.345360447998569e-06, + "loss": 0.3596, + "step": 2222 + }, + { + "epoch": 0.6635820895522389, + "grad_norm": 0.8474627076368212, + "learning_rate": 1.3432169042690988e-06, + "loss": 0.3634, + "step": 2223 + }, + { + "epoch": 0.6638805970149254, + "grad_norm": 0.9253686814199646, + "learning_rate": 1.3410744421036262e-06, + "loss": 0.2892, + "step": 2224 + }, + { + "epoch": 0.664179104477612, + "grad_norm": 0.8348049414436733, + "learning_rate": 1.3389330635053013e-06, + "loss": 0.3515, + "step": 2225 + }, + { + "epoch": 0.6644776119402985, + "grad_norm": 0.822846204285828, + "learning_rate": 1.3367927704762613e-06, + "loss": 0.3425, + "step": 2226 + }, + { + "epoch": 0.6647761194029851, + "grad_norm": 0.8678200777615288, + "learning_rate": 1.3346535650176284e-06, + "loss": 0.3978, + "step": 2227 + }, + { + "epoch": 0.6650746268656716, + "grad_norm": 0.7358960864836035, + "learning_rate": 1.3325154491295062e-06, + "loss": 0.2916, + "step": 2228 + }, + { + "epoch": 0.6653731343283582, + "grad_norm": 0.7423135388554165, + "learning_rate": 1.330378424810981e-06, + "loss": 0.3068, + "step": 2229 + }, + { + "epoch": 0.6656716417910448, + "grad_norm": 0.8230255273207777, + "learning_rate": 1.3282424940601197e-06, + "loss": 0.355, + "step": 2230 + }, + { + "epoch": 0.6659701492537313, + "grad_norm": 0.8023837250650214, + "learning_rate": 1.326107658873964e-06, + "loss": 0.2787, + "step": 2231 + }, + { + "epoch": 0.6662686567164179, + "grad_norm": 0.7899498370106252, + "learning_rate": 1.3239739212485342e-06, + "loss": 0.2977, + "step": 2232 + }, + { + "epoch": 0.6665671641791044, + "grad_norm": 0.9616260736960074, + "learning_rate": 1.3218412831788232e-06, + "loss": 0.3403, + "step": 2233 + }, + { + "epoch": 0.666865671641791, + "grad_norm": 0.9674895085633961, + "learning_rate": 1.3197097466587939e-06, + "loss": 0.3681, + "step": 2234 + }, + { + "epoch": 0.6671641791044776, + "grad_norm": 0.808185104408685, + "learning_rate": 1.317579313681382e-06, + "loss": 0.3109, + "step": 2235 + }, + { + "epoch": 0.6674626865671642, + "grad_norm": 0.8737274124495528, + "learning_rate": 1.315449986238489e-06, + "loss": 0.3471, + "step": 2236 + }, + { + "epoch": 0.6677611940298508, + "grad_norm": 0.8084788833507394, + "learning_rate": 1.3133217663209873e-06, + "loss": 0.3272, + "step": 2237 + }, + { + "epoch": 0.6680597014925374, + "grad_norm": 0.8888189135315439, + "learning_rate": 1.3111946559187078e-06, + "loss": 0.359, + "step": 2238 + }, + { + "epoch": 0.6683582089552239, + "grad_norm": 0.8035833032123232, + "learning_rate": 1.309068657020448e-06, + "loss": 0.3285, + "step": 2239 + }, + { + "epoch": 0.6686567164179105, + "grad_norm": 0.9254549420427741, + "learning_rate": 1.3069437716139648e-06, + "loss": 0.377, + "step": 2240 + }, + { + "epoch": 0.668955223880597, + "grad_norm": 0.768533993055729, + "learning_rate": 1.3048200016859743e-06, + "loss": 0.3516, + "step": 2241 + }, + { + "epoch": 0.6692537313432836, + "grad_norm": 0.7917607581116863, + "learning_rate": 1.3026973492221501e-06, + "loss": 0.3074, + "step": 2242 + }, + { + "epoch": 0.6695522388059701, + "grad_norm": 0.8087854690730707, + "learning_rate": 1.3005758162071206e-06, + "loss": 0.3164, + "step": 2243 + }, + { + "epoch": 0.6698507462686567, + "grad_norm": 0.8650182535397217, + "learning_rate": 1.298455404624466e-06, + "loss": 0.3296, + "step": 2244 + }, + { + "epoch": 0.6701492537313433, + "grad_norm": 0.7997154190905688, + "learning_rate": 1.2963361164567207e-06, + "loss": 0.3546, + "step": 2245 + }, + { + "epoch": 0.6704477611940298, + "grad_norm": 0.9966416873938866, + "learning_rate": 1.2942179536853666e-06, + "loss": 0.3685, + "step": 2246 + }, + { + "epoch": 0.6707462686567164, + "grad_norm": 0.8148532817098773, + "learning_rate": 1.2921009182908351e-06, + "loss": 0.3242, + "step": 2247 + }, + { + "epoch": 0.6710447761194029, + "grad_norm": 0.75826379276025, + "learning_rate": 1.2899850122525037e-06, + "loss": 0.2655, + "step": 2248 + }, + { + "epoch": 0.6713432835820895, + "grad_norm": 0.9350771859645716, + "learning_rate": 1.2878702375486905e-06, + "loss": 0.3477, + "step": 2249 + }, + { + "epoch": 0.6716417910447762, + "grad_norm": 0.7895404077047398, + "learning_rate": 1.2857565961566593e-06, + "loss": 0.3619, + "step": 2250 + }, + { + "epoch": 0.6719402985074627, + "grad_norm": 0.956687016013122, + "learning_rate": 1.2836440900526136e-06, + "loss": 0.3528, + "step": 2251 + }, + { + "epoch": 0.6722388059701493, + "grad_norm": 0.8027731716484646, + "learning_rate": 1.281532721211695e-06, + "loss": 0.3426, + "step": 2252 + }, + { + "epoch": 0.6725373134328358, + "grad_norm": 0.8683386280279184, + "learning_rate": 1.2794224916079818e-06, + "loss": 0.3624, + "step": 2253 + }, + { + "epoch": 0.6728358208955224, + "grad_norm": 0.8021378966828665, + "learning_rate": 1.2773134032144885e-06, + "loss": 0.3344, + "step": 2254 + }, + { + "epoch": 0.673134328358209, + "grad_norm": 0.822448080875513, + "learning_rate": 1.2752054580031592e-06, + "loss": 0.3012, + "step": 2255 + }, + { + "epoch": 0.6734328358208955, + "grad_norm": 0.8066506836401879, + "learning_rate": 1.2730986579448719e-06, + "loss": 0.3137, + "step": 2256 + }, + { + "epoch": 0.6737313432835821, + "grad_norm": 0.7452823368578445, + "learning_rate": 1.2709930050094337e-06, + "loss": 0.3389, + "step": 2257 + }, + { + "epoch": 0.6740298507462686, + "grad_norm": 0.7510246741662416, + "learning_rate": 1.268888501165579e-06, + "loss": 0.2977, + "step": 2258 + }, + { + "epoch": 0.6743283582089552, + "grad_norm": 0.8248092475825504, + "learning_rate": 1.2667851483809673e-06, + "loss": 0.3085, + "step": 2259 + }, + { + "epoch": 0.6746268656716418, + "grad_norm": 0.8232298732269487, + "learning_rate": 1.264682948622183e-06, + "loss": 0.3586, + "step": 2260 + }, + { + "epoch": 0.6749253731343283, + "grad_norm": 0.87600455902864, + "learning_rate": 1.2625819038547302e-06, + "loss": 0.3549, + "step": 2261 + }, + { + "epoch": 0.6752238805970149, + "grad_norm": 0.8569153532990559, + "learning_rate": 1.2604820160430348e-06, + "loss": 0.3223, + "step": 2262 + }, + { + "epoch": 0.6755223880597014, + "grad_norm": 0.8313060298524098, + "learning_rate": 1.2583832871504415e-06, + "loss": 0.3437, + "step": 2263 + }, + { + "epoch": 0.6758208955223881, + "grad_norm": 0.8731999146655798, + "learning_rate": 1.25628571913921e-06, + "loss": 0.3928, + "step": 2264 + }, + { + "epoch": 0.6761194029850747, + "grad_norm": 0.830291452853221, + "learning_rate": 1.2541893139705162e-06, + "loss": 0.3391, + "step": 2265 + }, + { + "epoch": 0.6764179104477612, + "grad_norm": 0.7748207330310743, + "learning_rate": 1.2520940736044468e-06, + "loss": 0.3191, + "step": 2266 + }, + { + "epoch": 0.6767164179104478, + "grad_norm": 0.7873823715976541, + "learning_rate": 1.2500000000000007e-06, + "loss": 0.3354, + "step": 2267 + }, + { + "epoch": 0.6770149253731343, + "grad_norm": 0.8238073008584107, + "learning_rate": 1.2479070951150857e-06, + "loss": 0.3722, + "step": 2268 + }, + { + "epoch": 0.6773134328358209, + "grad_norm": 0.7684716974580467, + "learning_rate": 1.245815360906517e-06, + "loss": 0.3427, + "step": 2269 + }, + { + "epoch": 0.6776119402985075, + "grad_norm": 0.9061590212050163, + "learning_rate": 1.2437247993300147e-06, + "loss": 0.3099, + "step": 2270 + }, + { + "epoch": 0.677910447761194, + "grad_norm": 0.8775458433192322, + "learning_rate": 1.2416354123402047e-06, + "loss": 0.3393, + "step": 2271 + }, + { + "epoch": 0.6782089552238806, + "grad_norm": 0.8109025654110851, + "learning_rate": 1.2395472018906102e-06, + "loss": 0.3608, + "step": 2272 + }, + { + "epoch": 0.6785074626865671, + "grad_norm": 0.8511617196960201, + "learning_rate": 1.2374601699336586e-06, + "loss": 0.3599, + "step": 2273 + }, + { + "epoch": 0.6788059701492537, + "grad_norm": 1.2567441350562456, + "learning_rate": 1.2353743184206724e-06, + "loss": 0.3313, + "step": 2274 + }, + { + "epoch": 0.6791044776119403, + "grad_norm": 0.7862840705237462, + "learning_rate": 1.2332896493018753e-06, + "loss": 0.323, + "step": 2275 + }, + { + "epoch": 0.6794029850746268, + "grad_norm": 0.7520760436868094, + "learning_rate": 1.231206164526379e-06, + "loss": 0.3234, + "step": 2276 + }, + { + "epoch": 0.6797014925373134, + "grad_norm": 0.7862063406522266, + "learning_rate": 1.229123866042192e-06, + "loss": 0.3108, + "step": 2277 + }, + { + "epoch": 0.68, + "grad_norm": 0.7931130898996858, + "learning_rate": 1.2270427557962125e-06, + "loss": 0.2967, + "step": 2278 + }, + { + "epoch": 0.6802985074626866, + "grad_norm": 0.8831726582888332, + "learning_rate": 1.2249628357342284e-06, + "loss": 0.3366, + "step": 2279 + }, + { + "epoch": 0.6805970149253732, + "grad_norm": 0.8362589161672496, + "learning_rate": 1.2228841078009133e-06, + "loss": 0.32, + "step": 2280 + }, + { + "epoch": 0.6808955223880597, + "grad_norm": 0.7825482021832821, + "learning_rate": 1.2208065739398286e-06, + "loss": 0.3236, + "step": 2281 + }, + { + "epoch": 0.6811940298507463, + "grad_norm": 0.7726512675817263, + "learning_rate": 1.2187302360934162e-06, + "loss": 0.3659, + "step": 2282 + }, + { + "epoch": 0.6814925373134328, + "grad_norm": 0.8087551514263144, + "learning_rate": 1.216655096203002e-06, + "loss": 0.3497, + "step": 2283 + }, + { + "epoch": 0.6817910447761194, + "grad_norm": 0.8321435088465757, + "learning_rate": 1.214581156208791e-06, + "loss": 0.3367, + "step": 2284 + }, + { + "epoch": 0.682089552238806, + "grad_norm": 0.8973773653830277, + "learning_rate": 1.2125084180498672e-06, + "loss": 0.3681, + "step": 2285 + }, + { + "epoch": 0.6823880597014925, + "grad_norm": 0.7645793362757674, + "learning_rate": 1.2104368836641909e-06, + "loss": 0.3385, + "step": 2286 + }, + { + "epoch": 0.6826865671641791, + "grad_norm": 0.7860470543681153, + "learning_rate": 1.208366554988594e-06, + "loss": 0.3102, + "step": 2287 + }, + { + "epoch": 0.6829850746268656, + "grad_norm": 0.7711760963359957, + "learning_rate": 1.2062974339587844e-06, + "loss": 0.3456, + "step": 2288 + }, + { + "epoch": 0.6832835820895522, + "grad_norm": 0.8535820450749667, + "learning_rate": 1.2042295225093404e-06, + "loss": 0.3325, + "step": 2289 + }, + { + "epoch": 0.6835820895522388, + "grad_norm": 0.849860239170701, + "learning_rate": 1.202162822573708e-06, + "loss": 0.3324, + "step": 2290 + }, + { + "epoch": 0.6838805970149253, + "grad_norm": 1.3404957810983344, + "learning_rate": 1.2000973360842019e-06, + "loss": 0.3833, + "step": 2291 + }, + { + "epoch": 0.684179104477612, + "grad_norm": 0.819656752872545, + "learning_rate": 1.1980330649720024e-06, + "loss": 0.3346, + "step": 2292 + }, + { + "epoch": 0.6844776119402985, + "grad_norm": 0.9829794779330354, + "learning_rate": 1.1959700111671508e-06, + "loss": 0.3462, + "step": 2293 + }, + { + "epoch": 0.6847761194029851, + "grad_norm": 0.7859290591207186, + "learning_rate": 1.1939081765985528e-06, + "loss": 0.291, + "step": 2294 + }, + { + "epoch": 0.6850746268656717, + "grad_norm": 0.810647132569844, + "learning_rate": 1.1918475631939735e-06, + "loss": 0.3525, + "step": 2295 + }, + { + "epoch": 0.6853731343283582, + "grad_norm": 0.8879767152876183, + "learning_rate": 1.1897881728800364e-06, + "loss": 0.3398, + "step": 2296 + }, + { + "epoch": 0.6856716417910448, + "grad_norm": 0.8081639689735962, + "learning_rate": 1.1877300075822223e-06, + "loss": 0.3221, + "step": 2297 + }, + { + "epoch": 0.6859701492537313, + "grad_norm": 0.8175265918913039, + "learning_rate": 1.1856730692248635e-06, + "loss": 0.3699, + "step": 2298 + }, + { + "epoch": 0.6862686567164179, + "grad_norm": 0.7821789581867407, + "learning_rate": 1.1836173597311484e-06, + "loss": 0.316, + "step": 2299 + }, + { + "epoch": 0.6865671641791045, + "grad_norm": 0.7996767148379933, + "learning_rate": 1.1815628810231147e-06, + "loss": 0.3795, + "step": 2300 + }, + { + "epoch": 0.686865671641791, + "grad_norm": 0.8382268679790396, + "learning_rate": 1.1795096350216506e-06, + "loss": 0.3339, + "step": 2301 + }, + { + "epoch": 0.6871641791044776, + "grad_norm": 0.8667351459692307, + "learning_rate": 1.1774576236464906e-06, + "loss": 0.3781, + "step": 2302 + }, + { + "epoch": 0.6874626865671641, + "grad_norm": 0.7891693977056329, + "learning_rate": 1.1754068488162165e-06, + "loss": 0.2936, + "step": 2303 + }, + { + "epoch": 0.6877611940298507, + "grad_norm": 0.7802694773636687, + "learning_rate": 1.1733573124482505e-06, + "loss": 0.3155, + "step": 2304 + }, + { + "epoch": 0.6880597014925374, + "grad_norm": 0.7825736393246602, + "learning_rate": 1.1713090164588608e-06, + "loss": 0.3262, + "step": 2305 + }, + { + "epoch": 0.6883582089552239, + "grad_norm": 0.7824674762625962, + "learning_rate": 1.1692619627631539e-06, + "loss": 0.3152, + "step": 2306 + }, + { + "epoch": 0.6886567164179105, + "grad_norm": 0.7859681853056969, + "learning_rate": 1.1672161532750749e-06, + "loss": 0.2974, + "step": 2307 + }, + { + "epoch": 0.688955223880597, + "grad_norm": 0.7718704768344236, + "learning_rate": 1.1651715899074057e-06, + "loss": 0.3303, + "step": 2308 + }, + { + "epoch": 0.6892537313432836, + "grad_norm": 1.0009242400338725, + "learning_rate": 1.1631282745717646e-06, + "loss": 0.3184, + "step": 2309 + }, + { + "epoch": 0.6895522388059702, + "grad_norm": 0.7836521432415025, + "learning_rate": 1.1610862091785993e-06, + "loss": 0.3612, + "step": 2310 + }, + { + "epoch": 0.6898507462686567, + "grad_norm": 0.8471632552947357, + "learning_rate": 1.1590453956371925e-06, + "loss": 0.2925, + "step": 2311 + }, + { + "epoch": 0.6901492537313433, + "grad_norm": 0.7698745533567938, + "learning_rate": 1.157005835855654e-06, + "loss": 0.3735, + "step": 2312 + }, + { + "epoch": 0.6904477611940298, + "grad_norm": 0.7805344892196837, + "learning_rate": 1.1549675317409254e-06, + "loss": 0.3117, + "step": 2313 + }, + { + "epoch": 0.6907462686567164, + "grad_norm": 0.8027470646073804, + "learning_rate": 1.1529304851987685e-06, + "loss": 0.3219, + "step": 2314 + }, + { + "epoch": 0.691044776119403, + "grad_norm": 0.8673613890056963, + "learning_rate": 1.1508946981337729e-06, + "loss": 0.3615, + "step": 2315 + }, + { + "epoch": 0.6913432835820895, + "grad_norm": 0.7571748507097132, + "learning_rate": 1.1488601724493504e-06, + "loss": 0.3159, + "step": 2316 + }, + { + "epoch": 0.6916417910447761, + "grad_norm": 0.8645844258196447, + "learning_rate": 1.1468269100477322e-06, + "loss": 0.3551, + "step": 2317 + }, + { + "epoch": 0.6919402985074626, + "grad_norm": 0.7659538316934659, + "learning_rate": 1.1447949128299695e-06, + "loss": 0.34, + "step": 2318 + }, + { + "epoch": 0.6922388059701493, + "grad_norm": 0.9851936264003477, + "learning_rate": 1.1427641826959313e-06, + "loss": 0.3599, + "step": 2319 + }, + { + "epoch": 0.6925373134328359, + "grad_norm": 0.8517192723805389, + "learning_rate": 1.1407347215442985e-06, + "loss": 0.3335, + "step": 2320 + }, + { + "epoch": 0.6928358208955224, + "grad_norm": 0.8252636410104004, + "learning_rate": 1.138706531272569e-06, + "loss": 0.3648, + "step": 2321 + }, + { + "epoch": 0.693134328358209, + "grad_norm": 0.7905456222424458, + "learning_rate": 1.1366796137770512e-06, + "loss": 0.3108, + "step": 2322 + }, + { + "epoch": 0.6934328358208955, + "grad_norm": 0.8262859024812593, + "learning_rate": 1.1346539709528634e-06, + "loss": 0.3211, + "step": 2323 + }, + { + "epoch": 0.6937313432835821, + "grad_norm": 0.7940913246786866, + "learning_rate": 1.1326296046939334e-06, + "loss": 0.3616, + "step": 2324 + }, + { + "epoch": 0.6940298507462687, + "grad_norm": 0.9195285544655638, + "learning_rate": 1.1306065168929925e-06, + "loss": 0.3563, + "step": 2325 + }, + { + "epoch": 0.6943283582089552, + "grad_norm": 0.7768973786157898, + "learning_rate": 1.1285847094415792e-06, + "loss": 0.3046, + "step": 2326 + }, + { + "epoch": 0.6946268656716418, + "grad_norm": 0.8076162534794478, + "learning_rate": 1.126564184230034e-06, + "loss": 0.3552, + "step": 2327 + }, + { + "epoch": 0.6949253731343283, + "grad_norm": 0.7569456200327547, + "learning_rate": 1.1245449431474994e-06, + "loss": 0.3296, + "step": 2328 + }, + { + "epoch": 0.6952238805970149, + "grad_norm": 0.8337061109406402, + "learning_rate": 1.1225269880819158e-06, + "loss": 0.2923, + "step": 2329 + }, + { + "epoch": 0.6955223880597015, + "grad_norm": 0.8392372442467751, + "learning_rate": 1.1205103209200238e-06, + "loss": 0.3642, + "step": 2330 + }, + { + "epoch": 0.695820895522388, + "grad_norm": 0.8358260600131869, + "learning_rate": 1.1184949435473556e-06, + "loss": 0.328, + "step": 2331 + }, + { + "epoch": 0.6961194029850746, + "grad_norm": 0.8295407000320718, + "learning_rate": 1.1164808578482405e-06, + "loss": 0.3583, + "step": 2332 + }, + { + "epoch": 0.6964179104477612, + "grad_norm": 0.8276689046645763, + "learning_rate": 1.1144680657058005e-06, + "loss": 0.3463, + "step": 2333 + }, + { + "epoch": 0.6967164179104478, + "grad_norm": 0.8590053159382256, + "learning_rate": 1.1124565690019465e-06, + "loss": 0.3184, + "step": 2334 + }, + { + "epoch": 0.6970149253731344, + "grad_norm": 0.8071370342595704, + "learning_rate": 1.1104463696173798e-06, + "loss": 0.3232, + "step": 2335 + }, + { + "epoch": 0.6973134328358209, + "grad_norm": 0.8642648814478965, + "learning_rate": 1.1084374694315863e-06, + "loss": 0.3465, + "step": 2336 + }, + { + "epoch": 0.6976119402985075, + "grad_norm": 0.7645757881216615, + "learning_rate": 1.1064298703228397e-06, + "loss": 0.329, + "step": 2337 + }, + { + "epoch": 0.697910447761194, + "grad_norm": 0.8218872469842694, + "learning_rate": 1.1044235741681957e-06, + "loss": 0.3571, + "step": 2338 + }, + { + "epoch": 0.6982089552238806, + "grad_norm": 0.8082592854417281, + "learning_rate": 1.102418582843493e-06, + "loss": 0.3386, + "step": 2339 + }, + { + "epoch": 0.6985074626865672, + "grad_norm": 0.8551551134548198, + "learning_rate": 1.100414898223349e-06, + "loss": 0.3187, + "step": 2340 + }, + { + "epoch": 0.6988059701492537, + "grad_norm": 0.8454776918886312, + "learning_rate": 1.0984125221811611e-06, + "loss": 0.3226, + "step": 2341 + }, + { + "epoch": 0.6991044776119403, + "grad_norm": 0.8274576186330233, + "learning_rate": 1.0964114565891005e-06, + "loss": 0.339, + "step": 2342 + }, + { + "epoch": 0.6994029850746268, + "grad_norm": 0.8865291981715888, + "learning_rate": 1.0944117033181151e-06, + "loss": 0.2815, + "step": 2343 + }, + { + "epoch": 0.6997014925373134, + "grad_norm": 0.7691276300346361, + "learning_rate": 1.0924132642379262e-06, + "loss": 0.2907, + "step": 2344 + }, + { + "epoch": 0.7, + "grad_norm": 0.8838758440348157, + "learning_rate": 1.090416141217025e-06, + "loss": 0.3687, + "step": 2345 + }, + { + "epoch": 0.7002985074626865, + "grad_norm": 0.834499070283479, + "learning_rate": 1.0884203361226733e-06, + "loss": 0.3357, + "step": 2346 + }, + { + "epoch": 0.7005970149253732, + "grad_norm": 0.7837295011852984, + "learning_rate": 1.0864258508209008e-06, + "loss": 0.3141, + "step": 2347 + }, + { + "epoch": 0.7008955223880597, + "grad_norm": 0.9621383837777848, + "learning_rate": 1.0844326871765012e-06, + "loss": 0.3344, + "step": 2348 + }, + { + "epoch": 0.7011940298507463, + "grad_norm": 0.836293896828257, + "learning_rate": 1.0824408470530334e-06, + "loss": 0.3519, + "step": 2349 + }, + { + "epoch": 0.7014925373134329, + "grad_norm": 0.8713650513051289, + "learning_rate": 1.0804503323128214e-06, + "loss": 0.3365, + "step": 2350 + }, + { + "epoch": 0.7017910447761194, + "grad_norm": 0.7793599987932461, + "learning_rate": 1.0784611448169482e-06, + "loss": 0.3324, + "step": 2351 + }, + { + "epoch": 0.702089552238806, + "grad_norm": 0.747029931462924, + "learning_rate": 1.0764732864252538e-06, + "loss": 0.3049, + "step": 2352 + }, + { + "epoch": 0.7023880597014925, + "grad_norm": 0.7301826389896193, + "learning_rate": 1.0744867589963378e-06, + "loss": 0.2968, + "step": 2353 + }, + { + "epoch": 0.7026865671641791, + "grad_norm": 0.80685723875685, + "learning_rate": 1.0725015643875553e-06, + "loss": 0.3443, + "step": 2354 + }, + { + "epoch": 0.7029850746268657, + "grad_norm": 0.8130732259035608, + "learning_rate": 1.0705177044550147e-06, + "loss": 0.3303, + "step": 2355 + }, + { + "epoch": 0.7032835820895522, + "grad_norm": 1.0494784933389165, + "learning_rate": 1.0685351810535779e-06, + "loss": 0.3631, + "step": 2356 + }, + { + "epoch": 0.7035820895522388, + "grad_norm": 0.771095705431318, + "learning_rate": 1.0665539960368536e-06, + "loss": 0.3097, + "step": 2357 + }, + { + "epoch": 0.7038805970149253, + "grad_norm": 0.8622991762796726, + "learning_rate": 1.0645741512572031e-06, + "loss": 0.3797, + "step": 2358 + }, + { + "epoch": 0.7041791044776119, + "grad_norm": 0.8468783834550917, + "learning_rate": 1.0625956485657321e-06, + "loss": 0.313, + "step": 2359 + }, + { + "epoch": 0.7044776119402985, + "grad_norm": 0.7455704245680197, + "learning_rate": 1.060618489812293e-06, + "loss": 0.3154, + "step": 2360 + }, + { + "epoch": 0.7047761194029851, + "grad_norm": 0.8264996079174645, + "learning_rate": 1.0586426768454813e-06, + "loss": 0.3475, + "step": 2361 + }, + { + "epoch": 0.7050746268656717, + "grad_norm": 0.8225831760639863, + "learning_rate": 1.0566682115126345e-06, + "loss": 0.3354, + "step": 2362 + }, + { + "epoch": 0.7053731343283582, + "grad_norm": 0.7582974133085499, + "learning_rate": 1.0546950956598276e-06, + "loss": 0.3143, + "step": 2363 + }, + { + "epoch": 0.7056716417910448, + "grad_norm": 0.837445402977908, + "learning_rate": 1.0527233311318768e-06, + "loss": 0.3432, + "step": 2364 + }, + { + "epoch": 0.7059701492537314, + "grad_norm": 0.8064450995316124, + "learning_rate": 1.050752919772334e-06, + "loss": 0.3427, + "step": 2365 + }, + { + "epoch": 0.7062686567164179, + "grad_norm": 0.7708046546693463, + "learning_rate": 1.048783863423486e-06, + "loss": 0.2782, + "step": 2366 + }, + { + "epoch": 0.7065671641791045, + "grad_norm": 0.7890086390145694, + "learning_rate": 1.0468161639263518e-06, + "loss": 0.3246, + "step": 2367 + }, + { + "epoch": 0.706865671641791, + "grad_norm": 0.9613407733550523, + "learning_rate": 1.0448498231206843e-06, + "loss": 0.3788, + "step": 2368 + }, + { + "epoch": 0.7071641791044776, + "grad_norm": 0.8377614962573282, + "learning_rate": 1.0428848428449618e-06, + "loss": 0.2917, + "step": 2369 + }, + { + "epoch": 0.7074626865671642, + "grad_norm": 0.7644477209892271, + "learning_rate": 1.040921224936394e-06, + "loss": 0.2539, + "step": 2370 + }, + { + "epoch": 0.7077611940298507, + "grad_norm": 0.8073027138319966, + "learning_rate": 1.0389589712309156e-06, + "loss": 0.3853, + "step": 2371 + }, + { + "epoch": 0.7080597014925373, + "grad_norm": 0.773541394858571, + "learning_rate": 1.0369980835631862e-06, + "loss": 0.3109, + "step": 2372 + }, + { + "epoch": 0.7083582089552238, + "grad_norm": 0.8939609435492758, + "learning_rate": 1.035038563766589e-06, + "loss": 0.3331, + "step": 2373 + }, + { + "epoch": 0.7086567164179104, + "grad_norm": 0.8792285135356904, + "learning_rate": 1.0330804136732253e-06, + "loss": 0.3031, + "step": 2374 + }, + { + "epoch": 0.7089552238805971, + "grad_norm": 0.8249719781900057, + "learning_rate": 1.0311236351139186e-06, + "loss": 0.3463, + "step": 2375 + }, + { + "epoch": 0.7092537313432836, + "grad_norm": 0.9446425866318262, + "learning_rate": 1.0291682299182092e-06, + "loss": 0.3444, + "step": 2376 + }, + { + "epoch": 0.7095522388059702, + "grad_norm": 0.8352690148905147, + "learning_rate": 1.027214199914353e-06, + "loss": 0.3358, + "step": 2377 + }, + { + "epoch": 0.7098507462686567, + "grad_norm": 0.7396486359215371, + "learning_rate": 1.025261546929321e-06, + "loss": 0.3504, + "step": 2378 + }, + { + "epoch": 0.7101492537313433, + "grad_norm": 0.8423507780215546, + "learning_rate": 1.0233102727887967e-06, + "loss": 0.3328, + "step": 2379 + }, + { + "epoch": 0.7104477611940299, + "grad_norm": 0.8857400481832675, + "learning_rate": 1.0213603793171717e-06, + "loss": 0.3581, + "step": 2380 + }, + { + "epoch": 0.7107462686567164, + "grad_norm": 0.8154084588304341, + "learning_rate": 1.0194118683375502e-06, + "loss": 0.3725, + "step": 2381 + }, + { + "epoch": 0.711044776119403, + "grad_norm": 0.9479114104669211, + "learning_rate": 1.0174647416717428e-06, + "loss": 0.3963, + "step": 2382 + }, + { + "epoch": 0.7113432835820895, + "grad_norm": 0.8293843508242847, + "learning_rate": 1.015519001140265e-06, + "loss": 0.3394, + "step": 2383 + }, + { + "epoch": 0.7116417910447761, + "grad_norm": 0.8038453888278364, + "learning_rate": 1.0135746485623376e-06, + "loss": 0.2989, + "step": 2384 + }, + { + "epoch": 0.7119402985074627, + "grad_norm": 0.8752892342124312, + "learning_rate": 1.0116316857558814e-06, + "loss": 0.3621, + "step": 2385 + }, + { + "epoch": 0.7122388059701492, + "grad_norm": 0.7967390929681123, + "learning_rate": 1.0096901145375197e-06, + "loss": 0.3461, + "step": 2386 + }, + { + "epoch": 0.7125373134328358, + "grad_norm": 0.8551632337211533, + "learning_rate": 1.0077499367225737e-06, + "loss": 0.3295, + "step": 2387 + }, + { + "epoch": 0.7128358208955224, + "grad_norm": 0.8773095549519137, + "learning_rate": 1.0058111541250642e-06, + "loss": 0.3562, + "step": 2388 + }, + { + "epoch": 0.713134328358209, + "grad_norm": 0.8061003756386974, + "learning_rate": 1.0038737685577057e-06, + "loss": 0.3063, + "step": 2389 + }, + { + "epoch": 0.7134328358208956, + "grad_norm": 0.842089773836329, + "learning_rate": 1.0019377818319045e-06, + "loss": 0.3467, + "step": 2390 + }, + { + "epoch": 0.7137313432835821, + "grad_norm": 0.7906352842135439, + "learning_rate": 1.0000031957577618e-06, + "loss": 0.3419, + "step": 2391 + }, + { + "epoch": 0.7140298507462687, + "grad_norm": 0.9218014121141632, + "learning_rate": 9.98070012144069e-07, + "loss": 0.3658, + "step": 2392 + }, + { + "epoch": 0.7143283582089552, + "grad_norm": 0.7939940692921382, + "learning_rate": 9.96138232798305e-07, + "loss": 0.3208, + "step": 2393 + }, + { + "epoch": 0.7146268656716418, + "grad_norm": 0.8770021483327558, + "learning_rate": 9.942078595266374e-07, + "loss": 0.3714, + "step": 2394 + }, + { + "epoch": 0.7149253731343284, + "grad_norm": 0.8900060492450977, + "learning_rate": 9.922788941339169e-07, + "loss": 0.3235, + "step": 2395 + }, + { + "epoch": 0.7152238805970149, + "grad_norm": 0.7812682445205359, + "learning_rate": 9.90351338423679e-07, + "loss": 0.3309, + "step": 2396 + }, + { + "epoch": 0.7155223880597015, + "grad_norm": 1.0022155072876207, + "learning_rate": 9.884251941981424e-07, + "loss": 0.3809, + "step": 2397 + }, + { + "epoch": 0.715820895522388, + "grad_norm": 0.917525828896109, + "learning_rate": 9.865004632582045e-07, + "loss": 0.361, + "step": 2398 + }, + { + "epoch": 0.7161194029850746, + "grad_norm": 0.7859374963278128, + "learning_rate": 9.845771474034419e-07, + "loss": 0.355, + "step": 2399 + }, + { + "epoch": 0.7164179104477612, + "grad_norm": 0.8984576640616055, + "learning_rate": 9.826552484321086e-07, + "loss": 0.3332, + "step": 2400 + }, + { + "epoch": 0.7167164179104477, + "grad_norm": 0.8103441506377737, + "learning_rate": 9.80734768141132e-07, + "loss": 0.3105, + "step": 2401 + }, + { + "epoch": 0.7170149253731344, + "grad_norm": 0.7674707580678842, + "learning_rate": 9.788157083261152e-07, + "loss": 0.2509, + "step": 2402 + }, + { + "epoch": 0.7173134328358209, + "grad_norm": 0.86655253489264, + "learning_rate": 9.768980707813319e-07, + "loss": 0.4078, + "step": 2403 + }, + { + "epoch": 0.7176119402985075, + "grad_norm": 0.8359834932221715, + "learning_rate": 9.74981857299727e-07, + "loss": 0.3915, + "step": 2404 + }, + { + "epoch": 0.7179104477611941, + "grad_norm": 0.8302643419290284, + "learning_rate": 9.730670696729128e-07, + "loss": 0.3383, + "step": 2405 + }, + { + "epoch": 0.7182089552238806, + "grad_norm": 0.8047361286111895, + "learning_rate": 9.711537096911704e-07, + "loss": 0.3249, + "step": 2406 + }, + { + "epoch": 0.7185074626865672, + "grad_norm": 0.7966244710402332, + "learning_rate": 9.692417791434431e-07, + "loss": 0.284, + "step": 2407 + }, + { + "epoch": 0.7188059701492537, + "grad_norm": 0.8341828877431555, + "learning_rate": 9.673312798173399e-07, + "loss": 0.321, + "step": 2408 + }, + { + "epoch": 0.7191044776119403, + "grad_norm": 0.8810165039262209, + "learning_rate": 9.654222134991312e-07, + "loss": 0.3862, + "step": 2409 + }, + { + "epoch": 0.7194029850746269, + "grad_norm": 0.861373072096721, + "learning_rate": 9.635145819737476e-07, + "loss": 0.336, + "step": 2410 + }, + { + "epoch": 0.7197014925373134, + "grad_norm": 0.8448013124798284, + "learning_rate": 9.616083870247785e-07, + "loss": 0.3962, + "step": 2411 + }, + { + "epoch": 0.72, + "grad_norm": 0.8451750195129675, + "learning_rate": 9.597036304344688e-07, + "loss": 0.3554, + "step": 2412 + }, + { + "epoch": 0.7202985074626865, + "grad_norm": 0.7751019468536666, + "learning_rate": 9.578003139837196e-07, + "loss": 0.305, + "step": 2413 + }, + { + "epoch": 0.7205970149253731, + "grad_norm": 0.8478856215524544, + "learning_rate": 9.558984394520856e-07, + "loss": 0.3419, + "step": 2414 + }, + { + "epoch": 0.7208955223880597, + "grad_norm": 0.7578568314911519, + "learning_rate": 9.539980086177734e-07, + "loss": 0.2998, + "step": 2415 + }, + { + "epoch": 0.7211940298507463, + "grad_norm": 0.8308842159873571, + "learning_rate": 9.520990232576391e-07, + "loss": 0.3567, + "step": 2416 + }, + { + "epoch": 0.7214925373134329, + "grad_norm": 0.9597039646097656, + "learning_rate": 9.502014851471888e-07, + "loss": 0.3741, + "step": 2417 + }, + { + "epoch": 0.7217910447761194, + "grad_norm": 0.8391053251773948, + "learning_rate": 9.483053960605726e-07, + "loss": 0.3388, + "step": 2418 + }, + { + "epoch": 0.722089552238806, + "grad_norm": 0.7561556408738367, + "learning_rate": 9.464107577705887e-07, + "loss": 0.3408, + "step": 2419 + }, + { + "epoch": 0.7223880597014926, + "grad_norm": 0.9032217114431608, + "learning_rate": 9.445175720486771e-07, + "loss": 0.3679, + "step": 2420 + }, + { + "epoch": 0.7226865671641791, + "grad_norm": 0.909162048526229, + "learning_rate": 9.426258406649211e-07, + "loss": 0.3379, + "step": 2421 + }, + { + "epoch": 0.7229850746268657, + "grad_norm": 0.7673893074765138, + "learning_rate": 9.407355653880437e-07, + "loss": 0.2601, + "step": 2422 + }, + { + "epoch": 0.7232835820895522, + "grad_norm": 0.8015859918855246, + "learning_rate": 9.388467479854046e-07, + "loss": 0.3201, + "step": 2423 + }, + { + "epoch": 0.7235820895522388, + "grad_norm": 0.7805401715822302, + "learning_rate": 9.369593902230032e-07, + "loss": 0.3044, + "step": 2424 + }, + { + "epoch": 0.7238805970149254, + "grad_norm": 0.7760863233512528, + "learning_rate": 9.350734938654715e-07, + "loss": 0.3316, + "step": 2425 + }, + { + "epoch": 0.7241791044776119, + "grad_norm": 0.8337237951101096, + "learning_rate": 9.331890606760791e-07, + "loss": 0.3232, + "step": 2426 + }, + { + "epoch": 0.7244776119402985, + "grad_norm": 0.7698626803368143, + "learning_rate": 9.313060924167247e-07, + "loss": 0.3024, + "step": 2427 + }, + { + "epoch": 0.724776119402985, + "grad_norm": 0.7772472006066783, + "learning_rate": 9.29424590847936e-07, + "loss": 0.3098, + "step": 2428 + }, + { + "epoch": 0.7250746268656716, + "grad_norm": 0.8379061596545783, + "learning_rate": 9.275445577288722e-07, + "loss": 0.3738, + "step": 2429 + }, + { + "epoch": 0.7253731343283583, + "grad_norm": 0.89865650654176, + "learning_rate": 9.256659948173181e-07, + "loss": 0.3338, + "step": 2430 + }, + { + "epoch": 0.7256716417910448, + "grad_norm": 0.8370452187741632, + "learning_rate": 9.237889038696843e-07, + "loss": 0.3382, + "step": 2431 + }, + { + "epoch": 0.7259701492537314, + "grad_norm": 0.8134596631444544, + "learning_rate": 9.219132866410063e-07, + "loss": 0.3064, + "step": 2432 + }, + { + "epoch": 0.7262686567164179, + "grad_norm": 0.8264430928633055, + "learning_rate": 9.200391448849383e-07, + "loss": 0.3432, + "step": 2433 + }, + { + "epoch": 0.7265671641791045, + "grad_norm": 0.7728324764571474, + "learning_rate": 9.181664803537585e-07, + "loss": 0.3278, + "step": 2434 + }, + { + "epoch": 0.7268656716417911, + "grad_norm": 0.8542443384866948, + "learning_rate": 9.162952947983619e-07, + "loss": 0.311, + "step": 2435 + }, + { + "epoch": 0.7271641791044776, + "grad_norm": 0.7451019337472867, + "learning_rate": 9.144255899682622e-07, + "loss": 0.3326, + "step": 2436 + }, + { + "epoch": 0.7274626865671642, + "grad_norm": 0.7771194432519887, + "learning_rate": 9.125573676115873e-07, + "loss": 0.3467, + "step": 2437 + }, + { + "epoch": 0.7277611940298507, + "grad_norm": 0.7687912387800152, + "learning_rate": 9.106906294750806e-07, + "loss": 0.3147, + "step": 2438 + }, + { + "epoch": 0.7280597014925373, + "grad_norm": 0.9034996997981363, + "learning_rate": 9.088253773040947e-07, + "loss": 0.3687, + "step": 2439 + }, + { + "epoch": 0.7283582089552239, + "grad_norm": 0.8836325866150969, + "learning_rate": 9.069616128425964e-07, + "loss": 0.4087, + "step": 2440 + }, + { + "epoch": 0.7286567164179104, + "grad_norm": 1.0041210920794168, + "learning_rate": 9.050993378331599e-07, + "loss": 0.3454, + "step": 2441 + }, + { + "epoch": 0.728955223880597, + "grad_norm": 0.9216219623020782, + "learning_rate": 9.032385540169664e-07, + "loss": 0.3622, + "step": 2442 + }, + { + "epoch": 0.7292537313432835, + "grad_norm": 0.8079424743719126, + "learning_rate": 9.013792631338048e-07, + "loss": 0.3418, + "step": 2443 + }, + { + "epoch": 0.7295522388059702, + "grad_norm": 0.9118676627334383, + "learning_rate": 8.995214669220648e-07, + "loss": 0.3449, + "step": 2444 + }, + { + "epoch": 0.7298507462686568, + "grad_norm": 0.8611393356418761, + "learning_rate": 8.976651671187417e-07, + "loss": 0.3299, + "step": 2445 + }, + { + "epoch": 0.7301492537313433, + "grad_norm": 0.7639011408029112, + "learning_rate": 8.958103654594302e-07, + "loss": 0.3003, + "step": 2446 + }, + { + "epoch": 0.7304477611940299, + "grad_norm": 0.8195346005096708, + "learning_rate": 8.939570636783249e-07, + "loss": 0.3394, + "step": 2447 + }, + { + "epoch": 0.7307462686567164, + "grad_norm": 0.8782576394827525, + "learning_rate": 8.921052635082175e-07, + "loss": 0.361, + "step": 2448 + }, + { + "epoch": 0.731044776119403, + "grad_norm": 0.8671344977010738, + "learning_rate": 8.902549666804971e-07, + "loss": 0.3828, + "step": 2449 + }, + { + "epoch": 0.7313432835820896, + "grad_norm": 0.809160024689468, + "learning_rate": 8.884061749251446e-07, + "loss": 0.3113, + "step": 2450 + }, + { + "epoch": 0.7316417910447761, + "grad_norm": 0.8158507141596787, + "learning_rate": 8.865588899707358e-07, + "loss": 0.3481, + "step": 2451 + }, + { + "epoch": 0.7319402985074627, + "grad_norm": 0.8308589930715008, + "learning_rate": 8.847131135444373e-07, + "loss": 0.3448, + "step": 2452 + }, + { + "epoch": 0.7322388059701492, + "grad_norm": 0.8737351072965946, + "learning_rate": 8.828688473720051e-07, + "loss": 0.3881, + "step": 2453 + }, + { + "epoch": 0.7325373134328358, + "grad_norm": 0.8195810108023454, + "learning_rate": 8.810260931777828e-07, + "loss": 0.3467, + "step": 2454 + }, + { + "epoch": 0.7328358208955223, + "grad_norm": 0.8343708212051258, + "learning_rate": 8.791848526847016e-07, + "loss": 0.3586, + "step": 2455 + }, + { + "epoch": 0.7331343283582089, + "grad_norm": 0.8073884493919178, + "learning_rate": 8.77345127614275e-07, + "loss": 0.3176, + "step": 2456 + }, + { + "epoch": 0.7334328358208955, + "grad_norm": 0.9085802883997077, + "learning_rate": 8.755069196866014e-07, + "loss": 0.3577, + "step": 2457 + }, + { + "epoch": 0.7337313432835821, + "grad_norm": 0.7503848280448058, + "learning_rate": 8.736702306203612e-07, + "loss": 0.2852, + "step": 2458 + }, + { + "epoch": 0.7340298507462687, + "grad_norm": 1.180773273009247, + "learning_rate": 8.718350621328137e-07, + "loss": 0.353, + "step": 2459 + }, + { + "epoch": 0.7343283582089553, + "grad_norm": 0.8460574776608701, + "learning_rate": 8.700014159397971e-07, + "loss": 0.3534, + "step": 2460 + }, + { + "epoch": 0.7346268656716418, + "grad_norm": 0.8636506104198776, + "learning_rate": 8.681692937557246e-07, + "loss": 0.2733, + "step": 2461 + }, + { + "epoch": 0.7349253731343284, + "grad_norm": 0.7554477293718002, + "learning_rate": 8.663386972935864e-07, + "loss": 0.3375, + "step": 2462 + }, + { + "epoch": 0.7352238805970149, + "grad_norm": 0.8170180017573404, + "learning_rate": 8.645096282649448e-07, + "loss": 0.3501, + "step": 2463 + }, + { + "epoch": 0.7355223880597015, + "grad_norm": 0.785002214591176, + "learning_rate": 8.626820883799364e-07, + "loss": 0.322, + "step": 2464 + }, + { + "epoch": 0.735820895522388, + "grad_norm": 0.8124518915482682, + "learning_rate": 8.608560793472667e-07, + "loss": 0.3488, + "step": 2465 + }, + { + "epoch": 0.7361194029850746, + "grad_norm": 0.8819651422697621, + "learning_rate": 8.590316028742079e-07, + "loss": 0.3871, + "step": 2466 + }, + { + "epoch": 0.7364179104477612, + "grad_norm": 0.8051048323090729, + "learning_rate": 8.572086606666016e-07, + "loss": 0.3391, + "step": 2467 + }, + { + "epoch": 0.7367164179104477, + "grad_norm": 0.8716400545844462, + "learning_rate": 8.553872544288544e-07, + "loss": 0.3561, + "step": 2468 + }, + { + "epoch": 0.7370149253731343, + "grad_norm": 1.0077273262938058, + "learning_rate": 8.535673858639368e-07, + "loss": 0.3269, + "step": 2469 + }, + { + "epoch": 0.7373134328358208, + "grad_norm": 0.7495287643901639, + "learning_rate": 8.517490566733827e-07, + "loss": 0.3547, + "step": 2470 + }, + { + "epoch": 0.7376119402985075, + "grad_norm": 0.94770061816034, + "learning_rate": 8.499322685572834e-07, + "loss": 0.3732, + "step": 2471 + }, + { + "epoch": 0.7379104477611941, + "grad_norm": 0.8036303894234751, + "learning_rate": 8.481170232142923e-07, + "loss": 0.3387, + "step": 2472 + }, + { + "epoch": 0.7382089552238806, + "grad_norm": 0.770395344765945, + "learning_rate": 8.463033223416201e-07, + "loss": 0.2754, + "step": 2473 + }, + { + "epoch": 0.7385074626865672, + "grad_norm": 0.7821239157671365, + "learning_rate": 8.444911676350326e-07, + "loss": 0.3406, + "step": 2474 + }, + { + "epoch": 0.7388059701492538, + "grad_norm": 0.8604005758402877, + "learning_rate": 8.426805607888502e-07, + "loss": 0.3882, + "step": 2475 + }, + { + "epoch": 0.7391044776119403, + "grad_norm": 0.7436455606846663, + "learning_rate": 8.408715034959469e-07, + "loss": 0.3026, + "step": 2476 + }, + { + "epoch": 0.7394029850746269, + "grad_norm": 0.8531423147657465, + "learning_rate": 8.390639974477461e-07, + "loss": 0.3147, + "step": 2477 + }, + { + "epoch": 0.7397014925373134, + "grad_norm": 0.897354179300323, + "learning_rate": 8.372580443342218e-07, + "loss": 0.3369, + "step": 2478 + }, + { + "epoch": 0.74, + "grad_norm": 0.7937336535275226, + "learning_rate": 8.354536458438969e-07, + "loss": 0.3417, + "step": 2479 + }, + { + "epoch": 0.7402985074626866, + "grad_norm": 0.7948747004341821, + "learning_rate": 8.3365080366384e-07, + "loss": 0.3138, + "step": 2480 + }, + { + "epoch": 0.7405970149253731, + "grad_norm": 0.8198556735193836, + "learning_rate": 8.31849519479665e-07, + "loss": 0.3394, + "step": 2481 + }, + { + "epoch": 0.7408955223880597, + "grad_norm": 0.8619919834482459, + "learning_rate": 8.300497949755271e-07, + "loss": 0.312, + "step": 2482 + }, + { + "epoch": 0.7411940298507462, + "grad_norm": 0.8257206030147372, + "learning_rate": 8.282516318341258e-07, + "loss": 0.3253, + "step": 2483 + }, + { + "epoch": 0.7414925373134328, + "grad_norm": 0.8361638399301501, + "learning_rate": 8.264550317366998e-07, + "loss": 0.3585, + "step": 2484 + }, + { + "epoch": 0.7417910447761195, + "grad_norm": 0.9277677786456653, + "learning_rate": 8.246599963630266e-07, + "loss": 0.3364, + "step": 2485 + }, + { + "epoch": 0.742089552238806, + "grad_norm": 0.8008796354592284, + "learning_rate": 8.228665273914202e-07, + "loss": 0.305, + "step": 2486 + }, + { + "epoch": 0.7423880597014926, + "grad_norm": 0.7154547857109149, + "learning_rate": 8.210746264987315e-07, + "loss": 0.3003, + "step": 2487 + }, + { + "epoch": 0.7426865671641791, + "grad_norm": 0.8086001172238579, + "learning_rate": 8.192842953603422e-07, + "loss": 0.3445, + "step": 2488 + }, + { + "epoch": 0.7429850746268657, + "grad_norm": 0.809703550422652, + "learning_rate": 8.174955356501693e-07, + "loss": 0.3528, + "step": 2489 + }, + { + "epoch": 0.7432835820895523, + "grad_norm": 0.7940020900981949, + "learning_rate": 8.157083490406593e-07, + "loss": 0.3609, + "step": 2490 + }, + { + "epoch": 0.7435820895522388, + "grad_norm": 0.8667872980953691, + "learning_rate": 8.139227372027883e-07, + "loss": 0.3538, + "step": 2491 + }, + { + "epoch": 0.7438805970149254, + "grad_norm": 0.8022738790006476, + "learning_rate": 8.121387018060601e-07, + "loss": 0.3551, + "step": 2492 + }, + { + "epoch": 0.7441791044776119, + "grad_norm": 0.7944094322718988, + "learning_rate": 8.103562445185045e-07, + "loss": 0.2928, + "step": 2493 + }, + { + "epoch": 0.7444776119402985, + "grad_norm": 0.9134473219974198, + "learning_rate": 8.085753670066746e-07, + "loss": 0.3283, + "step": 2494 + }, + { + "epoch": 0.744776119402985, + "grad_norm": 0.9171291476302468, + "learning_rate": 8.067960709356479e-07, + "loss": 0.3432, + "step": 2495 + }, + { + "epoch": 0.7450746268656716, + "grad_norm": 0.7630031740731616, + "learning_rate": 8.05018357969023e-07, + "loss": 0.2707, + "step": 2496 + }, + { + "epoch": 0.7453731343283582, + "grad_norm": 0.8408211927826157, + "learning_rate": 8.03242229768918e-07, + "loss": 0.3425, + "step": 2497 + }, + { + "epoch": 0.7456716417910447, + "grad_norm": 0.8479202019499819, + "learning_rate": 8.014676879959704e-07, + "loss": 0.3288, + "step": 2498 + }, + { + "epoch": 0.7459701492537314, + "grad_norm": 0.8959963249910727, + "learning_rate": 7.996947343093323e-07, + "loss": 0.3825, + "step": 2499 + }, + { + "epoch": 0.746268656716418, + "grad_norm": 0.8469480512025105, + "learning_rate": 7.979233703666714e-07, + "loss": 0.3364, + "step": 2500 + }, + { + "epoch": 0.7465671641791045, + "grad_norm": 0.8363118813056599, + "learning_rate": 7.961535978241722e-07, + "loss": 0.3655, + "step": 2501 + }, + { + "epoch": 0.7468656716417911, + "grad_norm": 0.7993313849201408, + "learning_rate": 7.943854183365277e-07, + "loss": 0.3081, + "step": 2502 + }, + { + "epoch": 0.7471641791044776, + "grad_norm": 0.8028836757795254, + "learning_rate": 7.926188335569432e-07, + "loss": 0.3151, + "step": 2503 + }, + { + "epoch": 0.7474626865671642, + "grad_norm": 0.8193299901791269, + "learning_rate": 7.908538451371311e-07, + "loss": 0.3327, + "step": 2504 + }, + { + "epoch": 0.7477611940298508, + "grad_norm": 0.812821295525394, + "learning_rate": 7.890904547273134e-07, + "loss": 0.3028, + "step": 2505 + }, + { + "epoch": 0.7480597014925373, + "grad_norm": 0.7946631103592113, + "learning_rate": 7.873286639762171e-07, + "loss": 0.3305, + "step": 2506 + }, + { + "epoch": 0.7483582089552239, + "grad_norm": 0.8168449751348733, + "learning_rate": 7.855684745310732e-07, + "loss": 0.311, + "step": 2507 + }, + { + "epoch": 0.7486567164179104, + "grad_norm": 0.7577415278692218, + "learning_rate": 7.838098880376175e-07, + "loss": 0.3005, + "step": 2508 + }, + { + "epoch": 0.748955223880597, + "grad_norm": 1.007066007674342, + "learning_rate": 7.820529061400833e-07, + "loss": 0.3422, + "step": 2509 + }, + { + "epoch": 0.7492537313432835, + "grad_norm": 0.8285843650225135, + "learning_rate": 7.802975304812068e-07, + "loss": 0.3649, + "step": 2510 + }, + { + "epoch": 0.7495522388059701, + "grad_norm": 0.8802781806958206, + "learning_rate": 7.785437627022216e-07, + "loss": 0.3442, + "step": 2511 + }, + { + "epoch": 0.7498507462686567, + "grad_norm": 0.8872697995230727, + "learning_rate": 7.767916044428578e-07, + "loss": 0.3535, + "step": 2512 + }, + { + "epoch": 0.7501492537313433, + "grad_norm": 0.7493499394945767, + "learning_rate": 7.750410573413403e-07, + "loss": 0.3, + "step": 2513 + }, + { + "epoch": 0.7504477611940299, + "grad_norm": 0.8420235208681436, + "learning_rate": 7.732921230343893e-07, + "loss": 0.3361, + "step": 2514 + }, + { + "epoch": 0.7507462686567165, + "grad_norm": 0.9016435397252184, + "learning_rate": 7.715448031572137e-07, + "loss": 0.3665, + "step": 2515 + }, + { + "epoch": 0.751044776119403, + "grad_norm": 0.8254349682410594, + "learning_rate": 7.697990993435162e-07, + "loss": 0.3658, + "step": 2516 + }, + { + "epoch": 0.7513432835820896, + "grad_norm": 0.8283799880314672, + "learning_rate": 7.680550132254869e-07, + "loss": 0.2997, + "step": 2517 + }, + { + "epoch": 0.7516417910447761, + "grad_norm": 0.7889635309394748, + "learning_rate": 7.663125464338042e-07, + "loss": 0.3662, + "step": 2518 + }, + { + "epoch": 0.7519402985074627, + "grad_norm": 0.7589268133008595, + "learning_rate": 7.645717005976328e-07, + "loss": 0.2914, + "step": 2519 + }, + { + "epoch": 0.7522388059701492, + "grad_norm": 0.8796818140938326, + "learning_rate": 7.628324773446192e-07, + "loss": 0.3653, + "step": 2520 + }, + { + "epoch": 0.7525373134328358, + "grad_norm": 0.8567321353644352, + "learning_rate": 7.610948783008956e-07, + "loss": 0.3185, + "step": 2521 + }, + { + "epoch": 0.7528358208955224, + "grad_norm": 0.8340526715199753, + "learning_rate": 7.593589050910752e-07, + "loss": 0.3609, + "step": 2522 + }, + { + "epoch": 0.7531343283582089, + "grad_norm": 0.8400627991409522, + "learning_rate": 7.576245593382503e-07, + "loss": 0.3362, + "step": 2523 + }, + { + "epoch": 0.7534328358208955, + "grad_norm": 0.8335760592291928, + "learning_rate": 7.558918426639919e-07, + "loss": 0.3326, + "step": 2524 + }, + { + "epoch": 0.753731343283582, + "grad_norm": 0.7330595579589215, + "learning_rate": 7.541607566883486e-07, + "loss": 0.316, + "step": 2525 + }, + { + "epoch": 0.7540298507462686, + "grad_norm": 0.8319155733303641, + "learning_rate": 7.524313030298422e-07, + "loss": 0.3825, + "step": 2526 + }, + { + "epoch": 0.7543283582089553, + "grad_norm": 0.8537564302430024, + "learning_rate": 7.507034833054705e-07, + "loss": 0.3675, + "step": 2527 + }, + { + "epoch": 0.7546268656716418, + "grad_norm": 0.8541046358406873, + "learning_rate": 7.489772991307026e-07, + "loss": 0.3463, + "step": 2528 + }, + { + "epoch": 0.7549253731343284, + "grad_norm": 0.8333446760301982, + "learning_rate": 7.472527521194789e-07, + "loss": 0.3248, + "step": 2529 + }, + { + "epoch": 0.755223880597015, + "grad_norm": 0.918746792475965, + "learning_rate": 7.455298438842096e-07, + "loss": 0.3618, + "step": 2530 + }, + { + "epoch": 0.7555223880597015, + "grad_norm": 0.7709947075005573, + "learning_rate": 7.4380857603577e-07, + "loss": 0.323, + "step": 2531 + }, + { + "epoch": 0.7558208955223881, + "grad_norm": 0.8113705340822994, + "learning_rate": 7.420889501835046e-07, + "loss": 0.3353, + "step": 2532 + }, + { + "epoch": 0.7561194029850746, + "grad_norm": 0.8316485419308024, + "learning_rate": 7.403709679352216e-07, + "loss": 0.3197, + "step": 2533 + }, + { + "epoch": 0.7564179104477612, + "grad_norm": 0.9244834653645814, + "learning_rate": 7.386546308971926e-07, + "loss": 0.3625, + "step": 2534 + }, + { + "epoch": 0.7567164179104477, + "grad_norm": 0.7698158757397285, + "learning_rate": 7.369399406741509e-07, + "loss": 0.3401, + "step": 2535 + }, + { + "epoch": 0.7570149253731343, + "grad_norm": 0.7936072499011064, + "learning_rate": 7.35226898869291e-07, + "loss": 0.3294, + "step": 2536 + }, + { + "epoch": 0.7573134328358209, + "grad_norm": 0.8096055523595336, + "learning_rate": 7.335155070842631e-07, + "loss": 0.3354, + "step": 2537 + }, + { + "epoch": 0.7576119402985074, + "grad_norm": 0.8150395641292778, + "learning_rate": 7.318057669191775e-07, + "loss": 0.3378, + "step": 2538 + }, + { + "epoch": 0.757910447761194, + "grad_norm": 0.8749495077069125, + "learning_rate": 7.300976799726007e-07, + "loss": 0.3548, + "step": 2539 + }, + { + "epoch": 0.7582089552238805, + "grad_norm": 0.7864759817642071, + "learning_rate": 7.283912478415522e-07, + "loss": 0.298, + "step": 2540 + }, + { + "epoch": 0.7585074626865672, + "grad_norm": 0.85761429358801, + "learning_rate": 7.266864721215033e-07, + "loss": 0.3667, + "step": 2541 + }, + { + "epoch": 0.7588059701492538, + "grad_norm": 0.7614755089756268, + "learning_rate": 7.249833544063786e-07, + "loss": 0.3075, + "step": 2542 + }, + { + "epoch": 0.7591044776119403, + "grad_norm": 0.7998879965445289, + "learning_rate": 7.232818962885513e-07, + "loss": 0.3155, + "step": 2543 + }, + { + "epoch": 0.7594029850746269, + "grad_norm": 1.1882550211054863, + "learning_rate": 7.215820993588435e-07, + "loss": 0.3549, + "step": 2544 + }, + { + "epoch": 0.7597014925373134, + "grad_norm": 0.6960475242707338, + "learning_rate": 7.198839652065234e-07, + "loss": 0.2709, + "step": 2545 + }, + { + "epoch": 0.76, + "grad_norm": 0.8929595250452405, + "learning_rate": 7.181874954193066e-07, + "loss": 0.4011, + "step": 2546 + }, + { + "epoch": 0.7602985074626866, + "grad_norm": 0.8085099466146569, + "learning_rate": 7.164926915833489e-07, + "loss": 0.3258, + "step": 2547 + }, + { + "epoch": 0.7605970149253731, + "grad_norm": 0.7526251937377817, + "learning_rate": 7.147995552832518e-07, + "loss": 0.3357, + "step": 2548 + }, + { + "epoch": 0.7608955223880597, + "grad_norm": 0.7452474358495229, + "learning_rate": 7.13108088102056e-07, + "loss": 0.2711, + "step": 2549 + }, + { + "epoch": 0.7611940298507462, + "grad_norm": 0.8572099572681284, + "learning_rate": 7.114182916212423e-07, + "loss": 0.3695, + "step": 2550 + }, + { + "epoch": 0.7614925373134328, + "grad_norm": 0.9598211721736167, + "learning_rate": 7.097301674207291e-07, + "loss": 0.3611, + "step": 2551 + }, + { + "epoch": 0.7617910447761194, + "grad_norm": 0.9085391291886787, + "learning_rate": 7.080437170788723e-07, + "loss": 0.3824, + "step": 2552 + }, + { + "epoch": 0.7620895522388059, + "grad_norm": 0.8177813221411165, + "learning_rate": 7.063589421724601e-07, + "loss": 0.2772, + "step": 2553 + }, + { + "epoch": 0.7623880597014925, + "grad_norm": 0.8628641410267316, + "learning_rate": 7.046758442767171e-07, + "loss": 0.334, + "step": 2554 + }, + { + "epoch": 0.7626865671641792, + "grad_norm": 0.928648183310679, + "learning_rate": 7.029944249652987e-07, + "loss": 0.3221, + "step": 2555 + }, + { + "epoch": 0.7629850746268657, + "grad_norm": 0.9255552592094868, + "learning_rate": 7.013146858102906e-07, + "loss": 0.3226, + "step": 2556 + }, + { + "epoch": 0.7632835820895523, + "grad_norm": 0.7920749204965727, + "learning_rate": 6.996366283822093e-07, + "loss": 0.2868, + "step": 2557 + }, + { + "epoch": 0.7635820895522388, + "grad_norm": 0.9935006013461068, + "learning_rate": 6.979602542499955e-07, + "loss": 0.313, + "step": 2558 + }, + { + "epoch": 0.7638805970149254, + "grad_norm": 0.790352931784802, + "learning_rate": 6.962855649810193e-07, + "loss": 0.3091, + "step": 2559 + }, + { + "epoch": 0.764179104477612, + "grad_norm": 0.784302622452047, + "learning_rate": 6.946125621410746e-07, + "loss": 0.3021, + "step": 2560 + }, + { + "epoch": 0.7644776119402985, + "grad_norm": 0.8521244035570953, + "learning_rate": 6.929412472943775e-07, + "loss": 0.3065, + "step": 2561 + }, + { + "epoch": 0.7647761194029851, + "grad_norm": 0.7990696640196155, + "learning_rate": 6.91271622003567e-07, + "loss": 0.317, + "step": 2562 + }, + { + "epoch": 0.7650746268656716, + "grad_norm": 0.7301484428928479, + "learning_rate": 6.896036878297033e-07, + "loss": 0.3071, + "step": 2563 + }, + { + "epoch": 0.7653731343283582, + "grad_norm": 0.7370476408839566, + "learning_rate": 6.879374463322619e-07, + "loss": 0.2726, + "step": 2564 + }, + { + "epoch": 0.7656716417910447, + "grad_norm": 0.9000167299852614, + "learning_rate": 6.862728990691392e-07, + "loss": 0.3357, + "step": 2565 + }, + { + "epoch": 0.7659701492537313, + "grad_norm": 0.7870895798188804, + "learning_rate": 6.846100475966461e-07, + "loss": 0.2879, + "step": 2566 + }, + { + "epoch": 0.7662686567164179, + "grad_norm": 0.8360237218908565, + "learning_rate": 6.829488934695083e-07, + "loss": 0.3215, + "step": 2567 + }, + { + "epoch": 0.7665671641791045, + "grad_norm": 0.9359634021391391, + "learning_rate": 6.812894382408652e-07, + "loss": 0.3455, + "step": 2568 + }, + { + "epoch": 0.7668656716417911, + "grad_norm": 0.930052847096152, + "learning_rate": 6.796316834622654e-07, + "loss": 0.3582, + "step": 2569 + }, + { + "epoch": 0.7671641791044777, + "grad_norm": 0.9038130785381302, + "learning_rate": 6.779756306836702e-07, + "loss": 0.2985, + "step": 2570 + }, + { + "epoch": 0.7674626865671642, + "grad_norm": 0.7826417580282887, + "learning_rate": 6.763212814534484e-07, + "loss": 0.3482, + "step": 2571 + }, + { + "epoch": 0.7677611940298508, + "grad_norm": 0.8486630906094814, + "learning_rate": 6.746686373183761e-07, + "loss": 0.3688, + "step": 2572 + }, + { + "epoch": 0.7680597014925373, + "grad_norm": 0.8735312621938363, + "learning_rate": 6.730176998236355e-07, + "loss": 0.3815, + "step": 2573 + }, + { + "epoch": 0.7683582089552239, + "grad_norm": 0.8940128219367052, + "learning_rate": 6.713684705128135e-07, + "loss": 0.3194, + "step": 2574 + }, + { + "epoch": 0.7686567164179104, + "grad_norm": 0.9408242887386759, + "learning_rate": 6.697209509278979e-07, + "loss": 0.3658, + "step": 2575 + }, + { + "epoch": 0.768955223880597, + "grad_norm": 0.8959030487388594, + "learning_rate": 6.680751426092791e-07, + "loss": 0.3391, + "step": 2576 + }, + { + "epoch": 0.7692537313432836, + "grad_norm": 0.7803592674313101, + "learning_rate": 6.664310470957497e-07, + "loss": 0.2969, + "step": 2577 + }, + { + "epoch": 0.7695522388059701, + "grad_norm": 0.7505213649281478, + "learning_rate": 6.647886659244987e-07, + "loss": 0.2958, + "step": 2578 + }, + { + "epoch": 0.7698507462686567, + "grad_norm": 0.8011860941500494, + "learning_rate": 6.631480006311108e-07, + "loss": 0.311, + "step": 2579 + }, + { + "epoch": 0.7701492537313432, + "grad_norm": 0.7936319294693487, + "learning_rate": 6.61509052749569e-07, + "loss": 0.3458, + "step": 2580 + }, + { + "epoch": 0.7704477611940298, + "grad_norm": 0.7316957671250423, + "learning_rate": 6.598718238122497e-07, + "loss": 0.2991, + "step": 2581 + }, + { + "epoch": 0.7707462686567165, + "grad_norm": 0.8104928093316218, + "learning_rate": 6.582363153499221e-07, + "loss": 0.3437, + "step": 2582 + }, + { + "epoch": 0.771044776119403, + "grad_norm": 0.9010645711228973, + "learning_rate": 6.566025288917463e-07, + "loss": 0.3652, + "step": 2583 + }, + { + "epoch": 0.7713432835820896, + "grad_norm": 0.8089700577841448, + "learning_rate": 6.549704659652742e-07, + "loss": 0.371, + "step": 2584 + }, + { + "epoch": 0.7716417910447761, + "grad_norm": 0.8261177060625268, + "learning_rate": 6.533401280964427e-07, + "loss": 0.3337, + "step": 2585 + }, + { + "epoch": 0.7719402985074627, + "grad_norm": 0.8072522990077107, + "learning_rate": 6.517115168095792e-07, + "loss": 0.3177, + "step": 2586 + }, + { + "epoch": 0.7722388059701493, + "grad_norm": 0.8196927890724248, + "learning_rate": 6.500846336273953e-07, + "loss": 0.3239, + "step": 2587 + }, + { + "epoch": 0.7725373134328358, + "grad_norm": 0.8447623565955257, + "learning_rate": 6.484594800709873e-07, + "loss": 0.3301, + "step": 2588 + }, + { + "epoch": 0.7728358208955224, + "grad_norm": 0.8469818517147698, + "learning_rate": 6.468360576598339e-07, + "loss": 0.3407, + "step": 2589 + }, + { + "epoch": 0.7731343283582089, + "grad_norm": 0.8200398390703326, + "learning_rate": 6.452143679117965e-07, + "loss": 0.3304, + "step": 2590 + }, + { + "epoch": 0.7734328358208955, + "grad_norm": 0.8316560060664724, + "learning_rate": 6.435944123431137e-07, + "loss": 0.3667, + "step": 2591 + }, + { + "epoch": 0.7737313432835821, + "grad_norm": 0.8479185212991167, + "learning_rate": 6.419761924684056e-07, + "loss": 0.3306, + "step": 2592 + }, + { + "epoch": 0.7740298507462686, + "grad_norm": 0.7788855711723567, + "learning_rate": 6.403597098006684e-07, + "loss": 0.3336, + "step": 2593 + }, + { + "epoch": 0.7743283582089552, + "grad_norm": 0.8580372589130707, + "learning_rate": 6.387449658512735e-07, + "loss": 0.3446, + "step": 2594 + }, + { + "epoch": 0.7746268656716417, + "grad_norm": 0.8545734419513868, + "learning_rate": 6.371319621299679e-07, + "loss": 0.338, + "step": 2595 + }, + { + "epoch": 0.7749253731343284, + "grad_norm": 1.3812292290362789, + "learning_rate": 6.355207001448696e-07, + "loss": 0.3346, + "step": 2596 + }, + { + "epoch": 0.775223880597015, + "grad_norm": 0.853363224946842, + "learning_rate": 6.339111814024701e-07, + "loss": 0.391, + "step": 2597 + }, + { + "epoch": 0.7755223880597015, + "grad_norm": 0.8265934162348483, + "learning_rate": 6.323034074076298e-07, + "loss": 0.3339, + "step": 2598 + }, + { + "epoch": 0.7758208955223881, + "grad_norm": 0.9064944014108518, + "learning_rate": 6.306973796635785e-07, + "loss": 0.3589, + "step": 2599 + }, + { + "epoch": 0.7761194029850746, + "grad_norm": 0.9677195208881169, + "learning_rate": 6.29093099671913e-07, + "loss": 0.317, + "step": 2600 + }, + { + "epoch": 0.7764179104477612, + "grad_norm": 0.8311565173033396, + "learning_rate": 6.274905689325966e-07, + "loss": 0.3472, + "step": 2601 + }, + { + "epoch": 0.7767164179104478, + "grad_norm": 0.7950764731124217, + "learning_rate": 6.258897889439549e-07, + "loss": 0.3109, + "step": 2602 + }, + { + "epoch": 0.7770149253731343, + "grad_norm": 0.9141762608429268, + "learning_rate": 6.242907612026792e-07, + "loss": 0.3974, + "step": 2603 + }, + { + "epoch": 0.7773134328358209, + "grad_norm": 0.8803598215750571, + "learning_rate": 6.22693487203821e-07, + "loss": 0.3331, + "step": 2604 + }, + { + "epoch": 0.7776119402985074, + "grad_norm": 0.7668038317932448, + "learning_rate": 6.210979684407931e-07, + "loss": 0.3298, + "step": 2605 + }, + { + "epoch": 0.777910447761194, + "grad_norm": 0.8448858692096748, + "learning_rate": 6.195042064053672e-07, + "loss": 0.3259, + "step": 2606 + }, + { + "epoch": 0.7782089552238806, + "grad_norm": 0.8115377621130089, + "learning_rate": 6.179122025876702e-07, + "loss": 0.3155, + "step": 2607 + }, + { + "epoch": 0.7785074626865671, + "grad_norm": 0.7350727849438435, + "learning_rate": 6.163219584761879e-07, + "loss": 0.3448, + "step": 2608 + }, + { + "epoch": 0.7788059701492537, + "grad_norm": 0.803225857620502, + "learning_rate": 6.147334755577597e-07, + "loss": 0.3125, + "step": 2609 + }, + { + "epoch": 0.7791044776119403, + "grad_norm": 0.7835583970542247, + "learning_rate": 6.131467553175785e-07, + "loss": 0.295, + "step": 2610 + }, + { + "epoch": 0.7794029850746269, + "grad_norm": 0.802514614490111, + "learning_rate": 6.11561799239189e-07, + "loss": 0.3624, + "step": 2611 + }, + { + "epoch": 0.7797014925373135, + "grad_norm": 0.8264140923298133, + "learning_rate": 6.099786088044871e-07, + "loss": 0.3453, + "step": 2612 + }, + { + "epoch": 0.78, + "grad_norm": 0.7730123427638769, + "learning_rate": 6.083971854937157e-07, + "loss": 0.3126, + "step": 2613 + }, + { + "epoch": 0.7802985074626866, + "grad_norm": 0.876817858400939, + "learning_rate": 6.068175307854676e-07, + "loss": 0.3374, + "step": 2614 + }, + { + "epoch": 0.7805970149253731, + "grad_norm": 0.7780503068811441, + "learning_rate": 6.052396461566823e-07, + "loss": 0.402, + "step": 2615 + }, + { + "epoch": 0.7808955223880597, + "grad_norm": 0.769230896426925, + "learning_rate": 6.036635330826437e-07, + "loss": 0.3172, + "step": 2616 + }, + { + "epoch": 0.7811940298507463, + "grad_norm": 0.9137700677139416, + "learning_rate": 6.02089193036978e-07, + "loss": 0.3312, + "step": 2617 + }, + { + "epoch": 0.7814925373134328, + "grad_norm": 0.7641819962198311, + "learning_rate": 6.00516627491655e-07, + "loss": 0.3646, + "step": 2618 + }, + { + "epoch": 0.7817910447761194, + "grad_norm": 0.7985862077567278, + "learning_rate": 5.989458379169852e-07, + "loss": 0.3615, + "step": 2619 + }, + { + "epoch": 0.7820895522388059, + "grad_norm": 0.8430048020986005, + "learning_rate": 5.973768257816187e-07, + "loss": 0.3587, + "step": 2620 + }, + { + "epoch": 0.7823880597014925, + "grad_norm": 0.8377553518393402, + "learning_rate": 5.958095925525437e-07, + "loss": 0.2958, + "step": 2621 + }, + { + "epoch": 0.7826865671641791, + "grad_norm": 0.822488397832201, + "learning_rate": 5.942441396950857e-07, + "loss": 0.3351, + "step": 2622 + }, + { + "epoch": 0.7829850746268656, + "grad_norm": 0.8772777506321962, + "learning_rate": 5.926804686729034e-07, + "loss": 0.3915, + "step": 2623 + }, + { + "epoch": 0.7832835820895523, + "grad_norm": 0.8598726121904382, + "learning_rate": 5.911185809479919e-07, + "loss": 0.2941, + "step": 2624 + }, + { + "epoch": 0.7835820895522388, + "grad_norm": 0.767393624496139, + "learning_rate": 5.895584779806782e-07, + "loss": 0.3313, + "step": 2625 + }, + { + "epoch": 0.7838805970149254, + "grad_norm": 0.8365941078767859, + "learning_rate": 5.880001612296208e-07, + "loss": 0.3248, + "step": 2626 + }, + { + "epoch": 0.784179104477612, + "grad_norm": 0.8689097485635112, + "learning_rate": 5.864436321518077e-07, + "loss": 0.3254, + "step": 2627 + }, + { + "epoch": 0.7844776119402985, + "grad_norm": 0.8501156388344782, + "learning_rate": 5.848888922025553e-07, + "loss": 0.356, + "step": 2628 + }, + { + "epoch": 0.7847761194029851, + "grad_norm": 0.871278978235916, + "learning_rate": 5.833359428355076e-07, + "loss": 0.3179, + "step": 2629 + }, + { + "epoch": 0.7850746268656716, + "grad_norm": 0.8117067102631867, + "learning_rate": 5.817847855026346e-07, + "loss": 0.3056, + "step": 2630 + }, + { + "epoch": 0.7853731343283582, + "grad_norm": 0.7831623164451796, + "learning_rate": 5.802354216542305e-07, + "loss": 0.2978, + "step": 2631 + }, + { + "epoch": 0.7856716417910448, + "grad_norm": 0.8648788615413797, + "learning_rate": 5.78687852738913e-07, + "loss": 0.3269, + "step": 2632 + }, + { + "epoch": 0.7859701492537313, + "grad_norm": 1.2129702699211065, + "learning_rate": 5.771420802036218e-07, + "loss": 0.3086, + "step": 2633 + }, + { + "epoch": 0.7862686567164179, + "grad_norm": 0.8425751147312559, + "learning_rate": 5.755981054936152e-07, + "loss": 0.3467, + "step": 2634 + }, + { + "epoch": 0.7865671641791044, + "grad_norm": 0.8226182315548135, + "learning_rate": 5.740559300524726e-07, + "loss": 0.3224, + "step": 2635 + }, + { + "epoch": 0.786865671641791, + "grad_norm": 0.7695238455194613, + "learning_rate": 5.725155553220904e-07, + "loss": 0.282, + "step": 2636 + }, + { + "epoch": 0.7871641791044776, + "grad_norm": 0.8454114923108801, + "learning_rate": 5.709769827426814e-07, + "loss": 0.3547, + "step": 2637 + }, + { + "epoch": 0.7874626865671642, + "grad_norm": 0.9164178244850171, + "learning_rate": 5.694402137527735e-07, + "loss": 0.3695, + "step": 2638 + }, + { + "epoch": 0.7877611940298508, + "grad_norm": 0.9339118467727153, + "learning_rate": 5.679052497892093e-07, + "loss": 0.3797, + "step": 2639 + }, + { + "epoch": 0.7880597014925373, + "grad_norm": 0.8732595479379647, + "learning_rate": 5.663720922871408e-07, + "loss": 0.3355, + "step": 2640 + }, + { + "epoch": 0.7883582089552239, + "grad_norm": 0.7644896912195902, + "learning_rate": 5.648407426800337e-07, + "loss": 0.3427, + "step": 2641 + }, + { + "epoch": 0.7886567164179105, + "grad_norm": 0.9237943784299637, + "learning_rate": 5.633112023996626e-07, + "loss": 0.3387, + "step": 2642 + }, + { + "epoch": 0.788955223880597, + "grad_norm": 0.8362466946619596, + "learning_rate": 5.617834728761104e-07, + "loss": 0.329, + "step": 2643 + }, + { + "epoch": 0.7892537313432836, + "grad_norm": 0.819307805738088, + "learning_rate": 5.602575555377676e-07, + "loss": 0.3064, + "step": 2644 + }, + { + "epoch": 0.7895522388059701, + "grad_norm": 0.7447822449758292, + "learning_rate": 5.587334518113285e-07, + "loss": 0.3393, + "step": 2645 + }, + { + "epoch": 0.7898507462686567, + "grad_norm": 0.8444167330983842, + "learning_rate": 5.572111631217936e-07, + "loss": 0.339, + "step": 2646 + }, + { + "epoch": 0.7901492537313433, + "grad_norm": 0.8006438974002873, + "learning_rate": 5.556906908924656e-07, + "loss": 0.3163, + "step": 2647 + }, + { + "epoch": 0.7904477611940298, + "grad_norm": 0.9751532556450391, + "learning_rate": 5.541720365449488e-07, + "loss": 0.3634, + "step": 2648 + }, + { + "epoch": 0.7907462686567164, + "grad_norm": 0.8268096722848733, + "learning_rate": 5.526552014991485e-07, + "loss": 0.3187, + "step": 2649 + }, + { + "epoch": 0.7910447761194029, + "grad_norm": 0.7212108804420516, + "learning_rate": 5.511401871732685e-07, + "loss": 0.3009, + "step": 2650 + }, + { + "epoch": 0.7913432835820896, + "grad_norm": 0.8746502710892506, + "learning_rate": 5.496269949838096e-07, + "loss": 0.3687, + "step": 2651 + }, + { + "epoch": 0.7916417910447762, + "grad_norm": 0.8435667329953185, + "learning_rate": 5.481156263455692e-07, + "loss": 0.3187, + "step": 2652 + }, + { + "epoch": 0.7919402985074627, + "grad_norm": 0.7718206010241203, + "learning_rate": 5.466060826716416e-07, + "loss": 0.2981, + "step": 2653 + }, + { + "epoch": 0.7922388059701493, + "grad_norm": 0.8420876178922836, + "learning_rate": 5.450983653734135e-07, + "loss": 0.3497, + "step": 2654 + }, + { + "epoch": 0.7925373134328358, + "grad_norm": 0.7794372062492068, + "learning_rate": 5.435924758605621e-07, + "loss": 0.3162, + "step": 2655 + }, + { + "epoch": 0.7928358208955224, + "grad_norm": 0.7218324952169056, + "learning_rate": 5.420884155410583e-07, + "loss": 0.2859, + "step": 2656 + }, + { + "epoch": 0.793134328358209, + "grad_norm": 0.8935853893312067, + "learning_rate": 5.405861858211617e-07, + "loss": 0.3275, + "step": 2657 + }, + { + "epoch": 0.7934328358208955, + "grad_norm": 0.8885132950616169, + "learning_rate": 5.390857881054206e-07, + "loss": 0.335, + "step": 2658 + }, + { + "epoch": 0.7937313432835821, + "grad_norm": 0.7811013082337122, + "learning_rate": 5.375872237966706e-07, + "loss": 0.3193, + "step": 2659 + }, + { + "epoch": 0.7940298507462686, + "grad_norm": 0.761806698796138, + "learning_rate": 5.360904942960327e-07, + "loss": 0.3307, + "step": 2660 + }, + { + "epoch": 0.7943283582089552, + "grad_norm": 0.8600929269448834, + "learning_rate": 5.345956010029118e-07, + "loss": 0.3242, + "step": 2661 + }, + { + "epoch": 0.7946268656716418, + "grad_norm": 0.8628730775985842, + "learning_rate": 5.331025453149971e-07, + "loss": 0.2922, + "step": 2662 + }, + { + "epoch": 0.7949253731343283, + "grad_norm": 0.8558985245441267, + "learning_rate": 5.316113286282595e-07, + "loss": 0.3258, + "step": 2663 + }, + { + "epoch": 0.7952238805970149, + "grad_norm": 0.8797671914656098, + "learning_rate": 5.301219523369499e-07, + "loss": 0.3694, + "step": 2664 + }, + { + "epoch": 0.7955223880597015, + "grad_norm": 0.7806683157905394, + "learning_rate": 5.286344178336e-07, + "loss": 0.3144, + "step": 2665 + }, + { + "epoch": 0.7958208955223881, + "grad_norm": 0.81264272258197, + "learning_rate": 5.271487265090163e-07, + "loss": 0.3667, + "step": 2666 + }, + { + "epoch": 0.7961194029850747, + "grad_norm": 0.8601858762639831, + "learning_rate": 5.256648797522851e-07, + "loss": 0.337, + "step": 2667 + }, + { + "epoch": 0.7964179104477612, + "grad_norm": 0.8271856093309297, + "learning_rate": 5.241828789507669e-07, + "loss": 0.2935, + "step": 2668 + }, + { + "epoch": 0.7967164179104478, + "grad_norm": 0.859387934817203, + "learning_rate": 5.227027254900963e-07, + "loss": 0.3188, + "step": 2669 + }, + { + "epoch": 0.7970149253731343, + "grad_norm": 0.9900633077884397, + "learning_rate": 5.212244207541806e-07, + "loss": 0.4112, + "step": 2670 + }, + { + "epoch": 0.7973134328358209, + "grad_norm": 0.7598238168460656, + "learning_rate": 5.197479661251994e-07, + "loss": 0.2827, + "step": 2671 + }, + { + "epoch": 0.7976119402985075, + "grad_norm": 0.8105463195253384, + "learning_rate": 5.182733629836006e-07, + "loss": 0.3, + "step": 2672 + }, + { + "epoch": 0.797910447761194, + "grad_norm": 0.8470648410134773, + "learning_rate": 5.168006127081027e-07, + "loss": 0.348, + "step": 2673 + }, + { + "epoch": 0.7982089552238806, + "grad_norm": 0.8543097630359251, + "learning_rate": 5.153297166756913e-07, + "loss": 0.3301, + "step": 2674 + }, + { + "epoch": 0.7985074626865671, + "grad_norm": 0.9187030166832789, + "learning_rate": 5.138606762616186e-07, + "loss": 0.3212, + "step": 2675 + }, + { + "epoch": 0.7988059701492537, + "grad_norm": 0.746950665181267, + "learning_rate": 5.123934928394011e-07, + "loss": 0.2924, + "step": 2676 + }, + { + "epoch": 0.7991044776119403, + "grad_norm": 0.8206743880698498, + "learning_rate": 5.109281677808207e-07, + "loss": 0.3299, + "step": 2677 + }, + { + "epoch": 0.7994029850746268, + "grad_norm": 0.8977645162297897, + "learning_rate": 5.094647024559188e-07, + "loss": 0.363, + "step": 2678 + }, + { + "epoch": 0.7997014925373135, + "grad_norm": 0.7603505513127645, + "learning_rate": 5.080030982330008e-07, + "loss": 0.2685, + "step": 2679 + }, + { + "epoch": 0.8, + "grad_norm": 0.7885960133919522, + "learning_rate": 5.065433564786312e-07, + "loss": 0.3079, + "step": 2680 + }, + { + "epoch": 0.8002985074626866, + "grad_norm": 0.801692605365031, + "learning_rate": 5.050854785576326e-07, + "loss": 0.3456, + "step": 2681 + }, + { + "epoch": 0.8005970149253732, + "grad_norm": 0.7935079560838534, + "learning_rate": 5.036294658330862e-07, + "loss": 0.359, + "step": 2682 + }, + { + "epoch": 0.8008955223880597, + "grad_norm": 0.7511048696552357, + "learning_rate": 5.021753196663268e-07, + "loss": 0.3289, + "step": 2683 + }, + { + "epoch": 0.8011940298507463, + "grad_norm": 0.9217091503137018, + "learning_rate": 5.007230414169464e-07, + "loss": 0.3606, + "step": 2684 + }, + { + "epoch": 0.8014925373134328, + "grad_norm": 0.8424874150110574, + "learning_rate": 4.992726324427901e-07, + "loss": 0.3526, + "step": 2685 + }, + { + "epoch": 0.8017910447761194, + "grad_norm": 0.7801243835807536, + "learning_rate": 4.978240940999546e-07, + "loss": 0.2924, + "step": 2686 + }, + { + "epoch": 0.802089552238806, + "grad_norm": 0.7866205363691048, + "learning_rate": 4.96377427742788e-07, + "loss": 0.3286, + "step": 2687 + }, + { + "epoch": 0.8023880597014925, + "grad_norm": 0.8260723183869202, + "learning_rate": 4.949326347238887e-07, + "loss": 0.3512, + "step": 2688 + }, + { + "epoch": 0.8026865671641791, + "grad_norm": 0.9243904923211944, + "learning_rate": 4.934897163941008e-07, + "loss": 0.3652, + "step": 2689 + }, + { + "epoch": 0.8029850746268656, + "grad_norm": 0.7325605543620152, + "learning_rate": 4.920486741025202e-07, + "loss": 0.3073, + "step": 2690 + }, + { + "epoch": 0.8032835820895522, + "grad_norm": 0.825997047722358, + "learning_rate": 4.906095091964855e-07, + "loss": 0.3904, + "step": 2691 + }, + { + "epoch": 0.8035820895522388, + "grad_norm": 0.9371944525084498, + "learning_rate": 4.891722230215812e-07, + "loss": 0.3535, + "step": 2692 + }, + { + "epoch": 0.8038805970149254, + "grad_norm": 0.8203179911092189, + "learning_rate": 4.87736816921634e-07, + "loss": 0.3135, + "step": 2693 + }, + { + "epoch": 0.804179104477612, + "grad_norm": 0.9856622184168011, + "learning_rate": 4.863032922387137e-07, + "loss": 0.3265, + "step": 2694 + }, + { + "epoch": 0.8044776119402985, + "grad_norm": 0.8181896117890224, + "learning_rate": 4.848716503131312e-07, + "loss": 0.3039, + "step": 2695 + }, + { + "epoch": 0.8047761194029851, + "grad_norm": 0.8035708164782546, + "learning_rate": 4.834418924834372e-07, + "loss": 0.3458, + "step": 2696 + }, + { + "epoch": 0.8050746268656717, + "grad_norm": 0.8265207452128133, + "learning_rate": 4.820140200864198e-07, + "loss": 0.3108, + "step": 2697 + }, + { + "epoch": 0.8053731343283582, + "grad_norm": 0.8263073899283685, + "learning_rate": 4.805880344571057e-07, + "loss": 0.336, + "step": 2698 + }, + { + "epoch": 0.8056716417910448, + "grad_norm": 0.7679933299366023, + "learning_rate": 4.791639369287557e-07, + "loss": 0.3183, + "step": 2699 + }, + { + "epoch": 0.8059701492537313, + "grad_norm": 0.8067035384863501, + "learning_rate": 4.777417288328667e-07, + "loss": 0.3481, + "step": 2700 + }, + { + "epoch": 0.8062686567164179, + "grad_norm": 0.8326924635445278, + "learning_rate": 4.763214114991685e-07, + "loss": 0.3581, + "step": 2701 + }, + { + "epoch": 0.8065671641791045, + "grad_norm": 0.8792635453313903, + "learning_rate": 4.749029862556234e-07, + "loss": 0.3514, + "step": 2702 + }, + { + "epoch": 0.806865671641791, + "grad_norm": 0.8231309020118249, + "learning_rate": 4.7348645442842486e-07, + "loss": 0.3245, + "step": 2703 + }, + { + "epoch": 0.8071641791044776, + "grad_norm": 0.8220357789415259, + "learning_rate": 4.720718173419947e-07, + "loss": 0.3068, + "step": 2704 + }, + { + "epoch": 0.8074626865671641, + "grad_norm": 0.8172111167713195, + "learning_rate": 4.706590763189842e-07, + "loss": 0.311, + "step": 2705 + }, + { + "epoch": 0.8077611940298507, + "grad_norm": 0.8197511984015036, + "learning_rate": 4.692482326802722e-07, + "loss": 0.3214, + "step": 2706 + }, + { + "epoch": 0.8080597014925374, + "grad_norm": 0.795191617688341, + "learning_rate": 4.678392877449631e-07, + "loss": 0.343, + "step": 2707 + }, + { + "epoch": 0.8083582089552239, + "grad_norm": 0.9261906113677095, + "learning_rate": 4.6643224283038606e-07, + "loss": 0.3497, + "step": 2708 + }, + { + "epoch": 0.8086567164179105, + "grad_norm": 0.8073388835958137, + "learning_rate": 4.6502709925209415e-07, + "loss": 0.3566, + "step": 2709 + }, + { + "epoch": 0.808955223880597, + "grad_norm": 0.8727196491973467, + "learning_rate": 4.636238583238611e-07, + "loss": 0.3442, + "step": 2710 + }, + { + "epoch": 0.8092537313432836, + "grad_norm": 0.7474594969428882, + "learning_rate": 4.622225213576842e-07, + "loss": 0.3188, + "step": 2711 + }, + { + "epoch": 0.8095522388059702, + "grad_norm": 0.8418571780348719, + "learning_rate": 4.608230896637786e-07, + "loss": 0.3451, + "step": 2712 + }, + { + "epoch": 0.8098507462686567, + "grad_norm": 0.8706546788357333, + "learning_rate": 4.5942556455057907e-07, + "loss": 0.328, + "step": 2713 + }, + { + "epoch": 0.8101492537313433, + "grad_norm": 0.82036774081707, + "learning_rate": 4.580299473247385e-07, + "loss": 0.3743, + "step": 2714 + }, + { + "epoch": 0.8104477611940298, + "grad_norm": 0.8382687141631925, + "learning_rate": 4.566362392911233e-07, + "loss": 0.3347, + "step": 2715 + }, + { + "epoch": 0.8107462686567164, + "grad_norm": 0.8501600750136287, + "learning_rate": 4.5524444175281764e-07, + "loss": 0.3122, + "step": 2716 + }, + { + "epoch": 0.811044776119403, + "grad_norm": 0.7513505089871687, + "learning_rate": 4.5385455601111764e-07, + "loss": 0.2863, + "step": 2717 + }, + { + "epoch": 0.8113432835820895, + "grad_norm": 0.8351608395297877, + "learning_rate": 4.5246658336553285e-07, + "loss": 0.3418, + "step": 2718 + }, + { + "epoch": 0.8116417910447761, + "grad_norm": 0.8672168533410403, + "learning_rate": 4.51080525113784e-07, + "loss": 0.3373, + "step": 2719 + }, + { + "epoch": 0.8119402985074626, + "grad_norm": 0.891251070972832, + "learning_rate": 4.496963825518025e-07, + "loss": 0.3924, + "step": 2720 + }, + { + "epoch": 0.8122388059701493, + "grad_norm": 1.0096715094848157, + "learning_rate": 4.483141569737262e-07, + "loss": 0.3393, + "step": 2721 + }, + { + "epoch": 0.8125373134328359, + "grad_norm": 0.7978505191567731, + "learning_rate": 4.4693384967190336e-07, + "loss": 0.3533, + "step": 2722 + }, + { + "epoch": 0.8128358208955224, + "grad_norm": 0.8736364444396616, + "learning_rate": 4.455554619368874e-07, + "loss": 0.3452, + "step": 2723 + }, + { + "epoch": 0.813134328358209, + "grad_norm": 0.868351671110689, + "learning_rate": 4.441789950574374e-07, + "loss": 0.3279, + "step": 2724 + }, + { + "epoch": 0.8134328358208955, + "grad_norm": 0.8426500515372894, + "learning_rate": 4.4280445032051644e-07, + "loss": 0.3321, + "step": 2725 + }, + { + "epoch": 0.8137313432835821, + "grad_norm": 0.921670497533751, + "learning_rate": 4.4143182901129065e-07, + "loss": 0.3849, + "step": 2726 + }, + { + "epoch": 0.8140298507462687, + "grad_norm": 0.8427699059645096, + "learning_rate": 4.40061132413126e-07, + "loss": 0.3491, + "step": 2727 + }, + { + "epoch": 0.8143283582089552, + "grad_norm": 1.0244827972367638, + "learning_rate": 4.386923618075919e-07, + "loss": 0.3091, + "step": 2728 + }, + { + "epoch": 0.8146268656716418, + "grad_norm": 0.8797755605970998, + "learning_rate": 4.3732551847445514e-07, + "loss": 0.3726, + "step": 2729 + }, + { + "epoch": 0.8149253731343283, + "grad_norm": 0.8300697363391679, + "learning_rate": 4.3596060369168166e-07, + "loss": 0.339, + "step": 2730 + }, + { + "epoch": 0.8152238805970149, + "grad_norm": 0.8232225916495611, + "learning_rate": 4.3459761873543173e-07, + "loss": 0.3237, + "step": 2731 + }, + { + "epoch": 0.8155223880597015, + "grad_norm": 0.7786052553153007, + "learning_rate": 4.3323656488006433e-07, + "loss": 0.3131, + "step": 2732 + }, + { + "epoch": 0.815820895522388, + "grad_norm": 0.8470582237532246, + "learning_rate": 4.3187744339813096e-07, + "loss": 0.3337, + "step": 2733 + }, + { + "epoch": 0.8161194029850747, + "grad_norm": 0.8893530166853234, + "learning_rate": 4.30520255560378e-07, + "loss": 0.3582, + "step": 2734 + }, + { + "epoch": 0.8164179104477612, + "grad_norm": 0.8553834397028571, + "learning_rate": 4.291650026357419e-07, + "loss": 0.3852, + "step": 2735 + }, + { + "epoch": 0.8167164179104478, + "grad_norm": 0.8952589728174519, + "learning_rate": 4.278116858913525e-07, + "loss": 0.401, + "step": 2736 + }, + { + "epoch": 0.8170149253731344, + "grad_norm": 0.7976255563281726, + "learning_rate": 4.2646030659252657e-07, + "loss": 0.326, + "step": 2737 + }, + { + "epoch": 0.8173134328358209, + "grad_norm": 0.9629301647838667, + "learning_rate": 4.2511086600277143e-07, + "loss": 0.3878, + "step": 2738 + }, + { + "epoch": 0.8176119402985075, + "grad_norm": 0.8360663192438232, + "learning_rate": 4.2376336538378084e-07, + "loss": 0.3345, + "step": 2739 + }, + { + "epoch": 0.817910447761194, + "grad_norm": 0.8380674045458814, + "learning_rate": 4.2241780599543563e-07, + "loss": 0.3498, + "step": 2740 + }, + { + "epoch": 0.8182089552238806, + "grad_norm": 0.8738374475513894, + "learning_rate": 4.210741890958009e-07, + "loss": 0.3488, + "step": 2741 + }, + { + "epoch": 0.8185074626865672, + "grad_norm": 1.27003433660867, + "learning_rate": 4.19732515941125e-07, + "loss": 0.3179, + "step": 2742 + }, + { + "epoch": 0.8188059701492537, + "grad_norm": 0.744478080565413, + "learning_rate": 4.1839278778584014e-07, + "loss": 0.3175, + "step": 2743 + }, + { + "epoch": 0.8191044776119403, + "grad_norm": 0.7803913692575267, + "learning_rate": 4.1705500588255956e-07, + "loss": 0.2942, + "step": 2744 + }, + { + "epoch": 0.8194029850746268, + "grad_norm": 0.7539919885280165, + "learning_rate": 4.157191714820766e-07, + "loss": 0.2942, + "step": 2745 + }, + { + "epoch": 0.8197014925373134, + "grad_norm": 0.8191488598187618, + "learning_rate": 4.1438528583336384e-07, + "loss": 0.3379, + "step": 2746 + }, + { + "epoch": 0.82, + "grad_norm": 0.8663924389620965, + "learning_rate": 4.1305335018357235e-07, + "loss": 0.3193, + "step": 2747 + }, + { + "epoch": 0.8202985074626866, + "grad_norm": 0.8843844251532348, + "learning_rate": 4.117233657780287e-07, + "loss": 0.3684, + "step": 2748 + }, + { + "epoch": 0.8205970149253732, + "grad_norm": 0.7260699768191158, + "learning_rate": 4.1039533386023595e-07, + "loss": 0.2681, + "step": 2749 + }, + { + "epoch": 0.8208955223880597, + "grad_norm": 0.8255147907973385, + "learning_rate": 4.09069255671872e-07, + "loss": 0.3105, + "step": 2750 + }, + { + "epoch": 0.8211940298507463, + "grad_norm": 0.7640188918178487, + "learning_rate": 4.077451324527873e-07, + "loss": 0.2879, + "step": 2751 + }, + { + "epoch": 0.8214925373134329, + "grad_norm": 0.8228944124806514, + "learning_rate": 4.06422965441006e-07, + "loss": 0.3215, + "step": 2752 + }, + { + "epoch": 0.8217910447761194, + "grad_norm": 0.8372918580266345, + "learning_rate": 4.0510275587271995e-07, + "loss": 0.3465, + "step": 2753 + }, + { + "epoch": 0.822089552238806, + "grad_norm": 0.7687323322051585, + "learning_rate": 4.0378450498229417e-07, + "loss": 0.3526, + "step": 2754 + }, + { + "epoch": 0.8223880597014925, + "grad_norm": 0.7942261241475315, + "learning_rate": 4.0246821400226095e-07, + "loss": 0.3391, + "step": 2755 + }, + { + "epoch": 0.8226865671641791, + "grad_norm": 0.9083289569523623, + "learning_rate": 4.0115388416332e-07, + "loss": 0.3058, + "step": 2756 + }, + { + "epoch": 0.8229850746268657, + "grad_norm": 0.7389692614308948, + "learning_rate": 3.998415166943381e-07, + "loss": 0.2869, + "step": 2757 + }, + { + "epoch": 0.8232835820895522, + "grad_norm": 0.7891964084273392, + "learning_rate": 3.9853111282234736e-07, + "loss": 0.3293, + "step": 2758 + }, + { + "epoch": 0.8235820895522388, + "grad_norm": 0.8794771704418654, + "learning_rate": 3.972226737725421e-07, + "loss": 0.3271, + "step": 2759 + }, + { + "epoch": 0.8238805970149253, + "grad_norm": 0.7902448566807511, + "learning_rate": 3.9591620076828127e-07, + "loss": 0.2814, + "step": 2760 + }, + { + "epoch": 0.8241791044776119, + "grad_norm": 0.7750307089786643, + "learning_rate": 3.94611695031086e-07, + "loss": 0.3438, + "step": 2761 + }, + { + "epoch": 0.8244776119402986, + "grad_norm": 0.9123133939275277, + "learning_rate": 3.9330915778063666e-07, + "loss": 0.3011, + "step": 2762 + }, + { + "epoch": 0.8247761194029851, + "grad_norm": 0.8857984121410764, + "learning_rate": 3.920085902347745e-07, + "loss": 0.3484, + "step": 2763 + }, + { + "epoch": 0.8250746268656717, + "grad_norm": 0.849801189339493, + "learning_rate": 3.9070999360949824e-07, + "loss": 0.3515, + "step": 2764 + }, + { + "epoch": 0.8253731343283582, + "grad_norm": 0.8945130952730969, + "learning_rate": 3.8941336911896306e-07, + "loss": 0.372, + "step": 2765 + }, + { + "epoch": 0.8256716417910448, + "grad_norm": 0.8619546550134426, + "learning_rate": 3.881187179754828e-07, + "loss": 0.3446, + "step": 2766 + }, + { + "epoch": 0.8259701492537314, + "grad_norm": 0.910387966103673, + "learning_rate": 3.868260413895239e-07, + "loss": 0.3719, + "step": 2767 + }, + { + "epoch": 0.8262686567164179, + "grad_norm": 0.7564349469839857, + "learning_rate": 3.855353405697082e-07, + "loss": 0.337, + "step": 2768 + }, + { + "epoch": 0.8265671641791045, + "grad_norm": 0.8389296921319255, + "learning_rate": 3.842466167228082e-07, + "loss": 0.3324, + "step": 2769 + }, + { + "epoch": 0.826865671641791, + "grad_norm": 0.7638208844437337, + "learning_rate": 3.829598710537502e-07, + "loss": 0.2851, + "step": 2770 + }, + { + "epoch": 0.8271641791044776, + "grad_norm": 0.7857817130843364, + "learning_rate": 3.816751047656098e-07, + "loss": 0.33, + "step": 2771 + }, + { + "epoch": 0.8274626865671642, + "grad_norm": 0.8418912981277041, + "learning_rate": 3.8039231905961204e-07, + "loss": 0.2974, + "step": 2772 + }, + { + "epoch": 0.8277611940298507, + "grad_norm": 0.9404182488278726, + "learning_rate": 3.791115151351313e-07, + "loss": 0.3785, + "step": 2773 + }, + { + "epoch": 0.8280597014925373, + "grad_norm": 0.8318189034227368, + "learning_rate": 3.778326941896862e-07, + "loss": 0.353, + "step": 2774 + }, + { + "epoch": 0.8283582089552238, + "grad_norm": 0.8300336051160621, + "learning_rate": 3.7655585741894454e-07, + "loss": 0.3494, + "step": 2775 + }, + { + "epoch": 0.8286567164179105, + "grad_norm": 0.7380131940769754, + "learning_rate": 3.7528100601671694e-07, + "loss": 0.2907, + "step": 2776 + }, + { + "epoch": 0.8289552238805971, + "grad_norm": 0.792304457008959, + "learning_rate": 3.740081411749588e-07, + "loss": 0.3718, + "step": 2777 + }, + { + "epoch": 0.8292537313432836, + "grad_norm": 0.8319630537903212, + "learning_rate": 3.7273726408376734e-07, + "loss": 0.3514, + "step": 2778 + }, + { + "epoch": 0.8295522388059702, + "grad_norm": 0.8174896078993995, + "learning_rate": 3.714683759313825e-07, + "loss": 0.3168, + "step": 2779 + }, + { + "epoch": 0.8298507462686567, + "grad_norm": 0.8243230824612728, + "learning_rate": 3.7020147790418266e-07, + "loss": 0.3156, + "step": 2780 + }, + { + "epoch": 0.8301492537313433, + "grad_norm": 0.8235097405737742, + "learning_rate": 3.689365711866869e-07, + "loss": 0.3215, + "step": 2781 + }, + { + "epoch": 0.8304477611940299, + "grad_norm": 0.972966084001807, + "learning_rate": 3.676736569615524e-07, + "loss": 0.3932, + "step": 2782 + }, + { + "epoch": 0.8307462686567164, + "grad_norm": 0.8685667990613507, + "learning_rate": 3.664127364095732e-07, + "loss": 0.3553, + "step": 2783 + }, + { + "epoch": 0.831044776119403, + "grad_norm": 0.9074240672477265, + "learning_rate": 3.6515381070967916e-07, + "loss": 0.3711, + "step": 2784 + }, + { + "epoch": 0.8313432835820895, + "grad_norm": 0.9317748916806734, + "learning_rate": 3.6389688103893565e-07, + "loss": 0.3269, + "step": 2785 + }, + { + "epoch": 0.8316417910447761, + "grad_norm": 0.7730908407067022, + "learning_rate": 3.626419485725402e-07, + "loss": 0.291, + "step": 2786 + }, + { + "epoch": 0.8319402985074626, + "grad_norm": 0.8220545588467743, + "learning_rate": 3.6138901448382475e-07, + "loss": 0.2818, + "step": 2787 + }, + { + "epoch": 0.8322388059701492, + "grad_norm": 0.7753288586198012, + "learning_rate": 3.601380799442519e-07, + "loss": 0.3117, + "step": 2788 + }, + { + "epoch": 0.8325373134328358, + "grad_norm": 0.7319571164319162, + "learning_rate": 3.5888914612341506e-07, + "loss": 0.2653, + "step": 2789 + }, + { + "epoch": 0.8328358208955224, + "grad_norm": 0.9086402808294651, + "learning_rate": 3.576422141890376e-07, + "loss": 0.3587, + "step": 2790 + }, + { + "epoch": 0.833134328358209, + "grad_norm": 0.9224261714666895, + "learning_rate": 3.5639728530696944e-07, + "loss": 0.3608, + "step": 2791 + }, + { + "epoch": 0.8334328358208956, + "grad_norm": 0.8510496483807699, + "learning_rate": 3.551543606411889e-07, + "loss": 0.3436, + "step": 2792 + }, + { + "epoch": 0.8337313432835821, + "grad_norm": 0.8921671526472281, + "learning_rate": 3.5391344135380065e-07, + "loss": 0.332, + "step": 2793 + }, + { + "epoch": 0.8340298507462687, + "grad_norm": 0.8000190887650788, + "learning_rate": 3.526745286050334e-07, + "loss": 0.3302, + "step": 2794 + }, + { + "epoch": 0.8343283582089552, + "grad_norm": 0.7752036186758762, + "learning_rate": 3.514376235532413e-07, + "loss": 0.3355, + "step": 2795 + }, + { + "epoch": 0.8346268656716418, + "grad_norm": 0.789049365859591, + "learning_rate": 3.5020272735490023e-07, + "loss": 0.3275, + "step": 2796 + }, + { + "epoch": 0.8349253731343284, + "grad_norm": 0.827050594275851, + "learning_rate": 3.4896984116460697e-07, + "loss": 0.3277, + "step": 2797 + }, + { + "epoch": 0.8352238805970149, + "grad_norm": 0.9597439299150683, + "learning_rate": 3.477389661350811e-07, + "loss": 0.3553, + "step": 2798 + }, + { + "epoch": 0.8355223880597015, + "grad_norm": 0.8061366601518952, + "learning_rate": 3.465101034171603e-07, + "loss": 0.3236, + "step": 2799 + }, + { + "epoch": 0.835820895522388, + "grad_norm": 0.8294064364634925, + "learning_rate": 3.452832541598014e-07, + "loss": 0.3105, + "step": 2800 + }, + { + "epoch": 0.8361194029850746, + "grad_norm": 0.7827890707493881, + "learning_rate": 3.4405841951007907e-07, + "loss": 0.3295, + "step": 2801 + }, + { + "epoch": 0.8364179104477611, + "grad_norm": 0.8274777597695597, + "learning_rate": 3.4283560061318276e-07, + "loss": 0.3279, + "step": 2802 + }, + { + "epoch": 0.8367164179104477, + "grad_norm": 0.8368484624668843, + "learning_rate": 3.416147986124185e-07, + "loss": 0.3126, + "step": 2803 + }, + { + "epoch": 0.8370149253731344, + "grad_norm": 0.8310718332591835, + "learning_rate": 3.403960146492072e-07, + "loss": 0.307, + "step": 2804 + }, + { + "epoch": 0.8373134328358209, + "grad_norm": 1.1731444253779697, + "learning_rate": 3.391792498630819e-07, + "loss": 0.3181, + "step": 2805 + }, + { + "epoch": 0.8376119402985075, + "grad_norm": 0.8303714814188278, + "learning_rate": 3.3796450539168855e-07, + "loss": 0.3548, + "step": 2806 + }, + { + "epoch": 0.837910447761194, + "grad_norm": 0.8551149029659069, + "learning_rate": 3.367517823707822e-07, + "loss": 0.3245, + "step": 2807 + }, + { + "epoch": 0.8382089552238806, + "grad_norm": 0.8090639602963838, + "learning_rate": 3.355410819342303e-07, + "loss": 0.3548, + "step": 2808 + }, + { + "epoch": 0.8385074626865672, + "grad_norm": 0.8944815883645018, + "learning_rate": 3.343324052140079e-07, + "loss": 0.3413, + "step": 2809 + }, + { + "epoch": 0.8388059701492537, + "grad_norm": 0.832569540017415, + "learning_rate": 3.331257533401985e-07, + "loss": 0.3283, + "step": 2810 + }, + { + "epoch": 0.8391044776119403, + "grad_norm": 0.9058511577320896, + "learning_rate": 3.3192112744099255e-07, + "loss": 0.3237, + "step": 2811 + }, + { + "epoch": 0.8394029850746269, + "grad_norm": 0.8466505427268364, + "learning_rate": 3.3071852864268476e-07, + "loss": 0.3318, + "step": 2812 + }, + { + "epoch": 0.8397014925373134, + "grad_norm": 0.8482105495452263, + "learning_rate": 3.2951795806967667e-07, + "loss": 0.3832, + "step": 2813 + }, + { + "epoch": 0.84, + "grad_norm": 0.760237333698982, + "learning_rate": 3.28319416844472e-07, + "loss": 0.3267, + "step": 2814 + }, + { + "epoch": 0.8402985074626865, + "grad_norm": 0.907957579652845, + "learning_rate": 3.2712290608767796e-07, + "loss": 0.353, + "step": 2815 + }, + { + "epoch": 0.8405970149253731, + "grad_norm": 0.8309270569530571, + "learning_rate": 3.259284269180027e-07, + "loss": 0.3212, + "step": 2816 + }, + { + "epoch": 0.8408955223880596, + "grad_norm": 0.8751671504075764, + "learning_rate": 3.247359804522557e-07, + "loss": 0.3945, + "step": 2817 + }, + { + "epoch": 0.8411940298507463, + "grad_norm": 0.7771991272718066, + "learning_rate": 3.2354556780534423e-07, + "loss": 0.3495, + "step": 2818 + }, + { + "epoch": 0.8414925373134329, + "grad_norm": 0.7300330257017509, + "learning_rate": 3.223571900902758e-07, + "loss": 0.2848, + "step": 2819 + }, + { + "epoch": 0.8417910447761194, + "grad_norm": 0.7636406065115092, + "learning_rate": 3.2117084841815445e-07, + "loss": 0.3463, + "step": 2820 + }, + { + "epoch": 0.842089552238806, + "grad_norm": 0.7679199725342977, + "learning_rate": 3.199865438981808e-07, + "loss": 0.3386, + "step": 2821 + }, + { + "epoch": 0.8423880597014926, + "grad_norm": 0.8654021210207075, + "learning_rate": 3.18804277637651e-07, + "loss": 0.374, + "step": 2822 + }, + { + "epoch": 0.8426865671641791, + "grad_norm": 0.8588936766950285, + "learning_rate": 3.1762405074195505e-07, + "loss": 0.2811, + "step": 2823 + }, + { + "epoch": 0.8429850746268657, + "grad_norm": 0.8219003046328764, + "learning_rate": 3.164458643145757e-07, + "loss": 0.3322, + "step": 2824 + }, + { + "epoch": 0.8432835820895522, + "grad_norm": 0.8476446887601962, + "learning_rate": 3.152697194570892e-07, + "loss": 0.3511, + "step": 2825 + }, + { + "epoch": 0.8435820895522388, + "grad_norm": 0.8506563729141355, + "learning_rate": 3.14095617269162e-07, + "loss": 0.3435, + "step": 2826 + }, + { + "epoch": 0.8438805970149253, + "grad_norm": 0.8442762907020646, + "learning_rate": 3.12923558848551e-07, + "loss": 0.2946, + "step": 2827 + }, + { + "epoch": 0.8441791044776119, + "grad_norm": 0.9559566043579282, + "learning_rate": 3.11753545291103e-07, + "loss": 0.3085, + "step": 2828 + }, + { + "epoch": 0.8444776119402985, + "grad_norm": 0.8597722096339706, + "learning_rate": 3.1058557769075127e-07, + "loss": 0.3444, + "step": 2829 + }, + { + "epoch": 0.844776119402985, + "grad_norm": 0.7840152793786416, + "learning_rate": 3.0941965713951723e-07, + "loss": 0.3346, + "step": 2830 + }, + { + "epoch": 0.8450746268656717, + "grad_norm": 0.7867462780601826, + "learning_rate": 3.0825578472750806e-07, + "loss": 0.3301, + "step": 2831 + }, + { + "epoch": 0.8453731343283583, + "grad_norm": 1.183250521423748, + "learning_rate": 3.070939615429167e-07, + "loss": 0.3585, + "step": 2832 + }, + { + "epoch": 0.8456716417910448, + "grad_norm": 0.8624837995021237, + "learning_rate": 3.0593418867201877e-07, + "loss": 0.3036, + "step": 2833 + }, + { + "epoch": 0.8459701492537314, + "grad_norm": 0.7736676319547169, + "learning_rate": 3.047764671991749e-07, + "loss": 0.3162, + "step": 2834 + }, + { + "epoch": 0.8462686567164179, + "grad_norm": 0.8290268285441122, + "learning_rate": 3.0362079820682485e-07, + "loss": 0.3256, + "step": 2835 + }, + { + "epoch": 0.8465671641791045, + "grad_norm": 0.7956305622703035, + "learning_rate": 3.0246718277549157e-07, + "loss": 0.2927, + "step": 2836 + }, + { + "epoch": 0.846865671641791, + "grad_norm": 0.9449677011825268, + "learning_rate": 3.0131562198377763e-07, + "loss": 0.3221, + "step": 2837 + }, + { + "epoch": 0.8471641791044776, + "grad_norm": 0.8544674441401762, + "learning_rate": 3.001661169083639e-07, + "loss": 0.3511, + "step": 2838 + }, + { + "epoch": 0.8474626865671642, + "grad_norm": 0.8378591210472813, + "learning_rate": 2.990186686240104e-07, + "loss": 0.3299, + "step": 2839 + }, + { + "epoch": 0.8477611940298507, + "grad_norm": 0.7415214017957397, + "learning_rate": 2.978732782035518e-07, + "loss": 0.335, + "step": 2840 + }, + { + "epoch": 0.8480597014925373, + "grad_norm": 0.9365276617601792, + "learning_rate": 2.967299467179019e-07, + "loss": 0.3189, + "step": 2841 + }, + { + "epoch": 0.8483582089552238, + "grad_norm": 0.803685125509278, + "learning_rate": 2.955886752360468e-07, + "loss": 0.3054, + "step": 2842 + }, + { + "epoch": 0.8486567164179104, + "grad_norm": 0.8054257795996826, + "learning_rate": 2.944494648250476e-07, + "loss": 0.3736, + "step": 2843 + }, + { + "epoch": 0.848955223880597, + "grad_norm": 0.8427531576716745, + "learning_rate": 2.93312316550039e-07, + "loss": 0.3018, + "step": 2844 + }, + { + "epoch": 0.8492537313432836, + "grad_norm": 0.7542191685088523, + "learning_rate": 2.9217723147422603e-07, + "loss": 0.3425, + "step": 2845 + }, + { + "epoch": 0.8495522388059702, + "grad_norm": 0.8846788028740314, + "learning_rate": 2.9104421065888566e-07, + "loss": 0.3495, + "step": 2846 + }, + { + "epoch": 0.8498507462686568, + "grad_norm": 0.7448582027758741, + "learning_rate": 2.8991325516336516e-07, + "loss": 0.2709, + "step": 2847 + }, + { + "epoch": 0.8501492537313433, + "grad_norm": 0.9689479036479011, + "learning_rate": 2.887843660450798e-07, + "loss": 0.3394, + "step": 2848 + }, + { + "epoch": 0.8504477611940299, + "grad_norm": 0.9730142586691932, + "learning_rate": 2.8765754435951446e-07, + "loss": 0.3772, + "step": 2849 + }, + { + "epoch": 0.8507462686567164, + "grad_norm": 0.8711119065481331, + "learning_rate": 2.865327911602189e-07, + "loss": 0.3281, + "step": 2850 + }, + { + "epoch": 0.851044776119403, + "grad_norm": 0.8353842456994517, + "learning_rate": 2.8541010749881e-07, + "loss": 0.3442, + "step": 2851 + }, + { + "epoch": 0.8513432835820895, + "grad_norm": 0.8230749752409625, + "learning_rate": 2.8428949442496996e-07, + "loss": 0.328, + "step": 2852 + }, + { + "epoch": 0.8516417910447761, + "grad_norm": 0.8209618759870918, + "learning_rate": 2.831709529864446e-07, + "loss": 0.3234, + "step": 2853 + }, + { + "epoch": 0.8519402985074627, + "grad_norm": 0.8756915629518247, + "learning_rate": 2.820544842290429e-07, + "loss": 0.3458, + "step": 2854 + }, + { + "epoch": 0.8522388059701492, + "grad_norm": 0.8506109532491893, + "learning_rate": 2.809400891966363e-07, + "loss": 0.3495, + "step": 2855 + }, + { + "epoch": 0.8525373134328358, + "grad_norm": 0.7609796908968595, + "learning_rate": 2.798277689311563e-07, + "loss": 0.3166, + "step": 2856 + }, + { + "epoch": 0.8528358208955223, + "grad_norm": 0.9292199709147685, + "learning_rate": 2.7871752447259564e-07, + "loss": 0.3637, + "step": 2857 + }, + { + "epoch": 0.8531343283582089, + "grad_norm": 0.8140854628051475, + "learning_rate": 2.7760935685900576e-07, + "loss": 0.3422, + "step": 2858 + }, + { + "epoch": 0.8534328358208956, + "grad_norm": 0.8210792005498305, + "learning_rate": 2.765032671264961e-07, + "loss": 0.2837, + "step": 2859 + }, + { + "epoch": 0.8537313432835821, + "grad_norm": 0.7602160506021929, + "learning_rate": 2.7539925630923473e-07, + "loss": 0.3127, + "step": 2860 + }, + { + "epoch": 0.8540298507462687, + "grad_norm": 0.8787744611254319, + "learning_rate": 2.7429732543944323e-07, + "loss": 0.3677, + "step": 2861 + }, + { + "epoch": 0.8543283582089553, + "grad_norm": 0.7774552964487964, + "learning_rate": 2.7319747554740096e-07, + "loss": 0.3148, + "step": 2862 + }, + { + "epoch": 0.8546268656716418, + "grad_norm": 0.8133612375263352, + "learning_rate": 2.720997076614407e-07, + "loss": 0.301, + "step": 2863 + }, + { + "epoch": 0.8549253731343284, + "grad_norm": 0.8837935762329368, + "learning_rate": 2.710040228079486e-07, + "loss": 0.3679, + "step": 2864 + }, + { + "epoch": 0.8552238805970149, + "grad_norm": 0.8348061759356361, + "learning_rate": 2.6991042201136327e-07, + "loss": 0.351, + "step": 2865 + }, + { + "epoch": 0.8555223880597015, + "grad_norm": 0.9256883374968553, + "learning_rate": 2.688189062941754e-07, + "loss": 0.3454, + "step": 2866 + }, + { + "epoch": 0.855820895522388, + "grad_norm": 0.8766267825494243, + "learning_rate": 2.677294766769245e-07, + "loss": 0.3868, + "step": 2867 + }, + { + "epoch": 0.8561194029850746, + "grad_norm": 0.7835550371050818, + "learning_rate": 2.6664213417820104e-07, + "loss": 0.299, + "step": 2868 + }, + { + "epoch": 0.8564179104477612, + "grad_norm": 0.758646833090557, + "learning_rate": 2.655568798146443e-07, + "loss": 0.3072, + "step": 2869 + }, + { + "epoch": 0.8567164179104477, + "grad_norm": 0.7818620602758218, + "learning_rate": 2.644737146009402e-07, + "loss": 0.3201, + "step": 2870 + }, + { + "epoch": 0.8570149253731343, + "grad_norm": 0.7465671035572722, + "learning_rate": 2.633926395498218e-07, + "loss": 0.3185, + "step": 2871 + }, + { + "epoch": 0.8573134328358208, + "grad_norm": 0.8201353059035935, + "learning_rate": 2.6231365567206844e-07, + "loss": 0.3589, + "step": 2872 + }, + { + "epoch": 0.8576119402985075, + "grad_norm": 0.8180993739540929, + "learning_rate": 2.6123676397650314e-07, + "loss": 0.3187, + "step": 2873 + }, + { + "epoch": 0.8579104477611941, + "grad_norm": 0.8056216721587721, + "learning_rate": 2.601619654699933e-07, + "loss": 0.3326, + "step": 2874 + }, + { + "epoch": 0.8582089552238806, + "grad_norm": 0.786235331989252, + "learning_rate": 2.5908926115744997e-07, + "loss": 0.2917, + "step": 2875 + }, + { + "epoch": 0.8585074626865672, + "grad_norm": 0.7791737700058262, + "learning_rate": 2.5801865204182486e-07, + "loss": 0.3158, + "step": 2876 + }, + { + "epoch": 0.8588059701492538, + "grad_norm": 0.8728995256661342, + "learning_rate": 2.569501391241122e-07, + "loss": 0.387, + "step": 2877 + }, + { + "epoch": 0.8591044776119403, + "grad_norm": 0.76411187451701, + "learning_rate": 2.5588372340334427e-07, + "loss": 0.3338, + "step": 2878 + }, + { + "epoch": 0.8594029850746269, + "grad_norm": 0.7959879543698507, + "learning_rate": 2.548194058765949e-07, + "loss": 0.3169, + "step": 2879 + }, + { + "epoch": 0.8597014925373134, + "grad_norm": 0.7821916530349274, + "learning_rate": 2.5375718753897493e-07, + "loss": 0.3115, + "step": 2880 + }, + { + "epoch": 0.86, + "grad_norm": 0.910323877482503, + "learning_rate": 2.5269706938363196e-07, + "loss": 0.3825, + "step": 2881 + }, + { + "epoch": 0.8602985074626865, + "grad_norm": 0.8333574082581855, + "learning_rate": 2.5163905240175175e-07, + "loss": 0.3386, + "step": 2882 + }, + { + "epoch": 0.8605970149253731, + "grad_norm": 1.4726326278298185, + "learning_rate": 2.505831375825532e-07, + "loss": 0.3895, + "step": 2883 + }, + { + "epoch": 0.8608955223880597, + "grad_norm": 0.7646029930002108, + "learning_rate": 2.495293259132914e-07, + "loss": 0.3438, + "step": 2884 + }, + { + "epoch": 0.8611940298507462, + "grad_norm": 0.917822050579998, + "learning_rate": 2.484776183792545e-07, + "loss": 0.4223, + "step": 2885 + }, + { + "epoch": 0.8614925373134328, + "grad_norm": 0.9260960233947864, + "learning_rate": 2.474280159637635e-07, + "loss": 0.34, + "step": 2886 + }, + { + "epoch": 0.8617910447761195, + "grad_norm": 0.843848719502158, + "learning_rate": 2.463805196481714e-07, + "loss": 0.3272, + "step": 2887 + }, + { + "epoch": 0.862089552238806, + "grad_norm": 0.8251578240160266, + "learning_rate": 2.453351304118609e-07, + "loss": 0.3677, + "step": 2888 + }, + { + "epoch": 0.8623880597014926, + "grad_norm": 0.7232437460080843, + "learning_rate": 2.442918492322463e-07, + "loss": 0.3187, + "step": 2889 + }, + { + "epoch": 0.8626865671641791, + "grad_norm": 0.7702961173547361, + "learning_rate": 2.4325067708476924e-07, + "loss": 0.3192, + "step": 2890 + }, + { + "epoch": 0.8629850746268657, + "grad_norm": 0.8797092318207074, + "learning_rate": 2.42211614942901e-07, + "loss": 0.3281, + "step": 2891 + }, + { + "epoch": 0.8632835820895522, + "grad_norm": 0.7723014760959183, + "learning_rate": 2.4117466377813927e-07, + "loss": 0.3015, + "step": 2892 + }, + { + "epoch": 0.8635820895522388, + "grad_norm": 0.770812872138971, + "learning_rate": 2.4013982456000813e-07, + "loss": 0.3174, + "step": 2893 + }, + { + "epoch": 0.8638805970149254, + "grad_norm": 0.8891261100131612, + "learning_rate": 2.3910709825605645e-07, + "loss": 0.3458, + "step": 2894 + }, + { + "epoch": 0.8641791044776119, + "grad_norm": 0.8361856897100667, + "learning_rate": 2.380764858318585e-07, + "loss": 0.3498, + "step": 2895 + }, + { + "epoch": 0.8644776119402985, + "grad_norm": 0.9085874630351144, + "learning_rate": 2.370479882510121e-07, + "loss": 0.3246, + "step": 2896 + }, + { + "epoch": 0.864776119402985, + "grad_norm": 0.8197815633910296, + "learning_rate": 2.3602160647513693e-07, + "loss": 0.3716, + "step": 2897 + }, + { + "epoch": 0.8650746268656716, + "grad_norm": 0.8746527165561627, + "learning_rate": 2.3499734146387565e-07, + "loss": 0.3132, + "step": 2898 + }, + { + "epoch": 0.8653731343283582, + "grad_norm": 0.8083218694058425, + "learning_rate": 2.3397519417489022e-07, + "loss": 0.3384, + "step": 2899 + }, + { + "epoch": 0.8656716417910447, + "grad_norm": 0.8505934710512888, + "learning_rate": 2.3295516556386372e-07, + "loss": 0.3654, + "step": 2900 + }, + { + "epoch": 0.8659701492537314, + "grad_norm": 0.8163302330969157, + "learning_rate": 2.3193725658449794e-07, + "loss": 0.3231, + "step": 2901 + }, + { + "epoch": 0.866268656716418, + "grad_norm": 0.9049008261636325, + "learning_rate": 2.3092146818851357e-07, + "loss": 0.3785, + "step": 2902 + }, + { + "epoch": 0.8665671641791045, + "grad_norm": 0.8815041271123635, + "learning_rate": 2.2990780132564729e-07, + "loss": 0.3432, + "step": 2903 + }, + { + "epoch": 0.8668656716417911, + "grad_norm": 0.8388624020002282, + "learning_rate": 2.288962569436537e-07, + "loss": 0.3187, + "step": 2904 + }, + { + "epoch": 0.8671641791044776, + "grad_norm": 0.9394777411904285, + "learning_rate": 2.2788683598830101e-07, + "loss": 0.3456, + "step": 2905 + }, + { + "epoch": 0.8674626865671642, + "grad_norm": 0.8140678456634438, + "learning_rate": 2.2687953940337403e-07, + "loss": 0.3278, + "step": 2906 + }, + { + "epoch": 0.8677611940298507, + "grad_norm": 0.8236736666005042, + "learning_rate": 2.258743681306702e-07, + "loss": 0.3195, + "step": 2907 + }, + { + "epoch": 0.8680597014925373, + "grad_norm": 0.8327417961490085, + "learning_rate": 2.2487132311000055e-07, + "loss": 0.3545, + "step": 2908 + }, + { + "epoch": 0.8683582089552239, + "grad_norm": 0.8708133440716065, + "learning_rate": 2.2387040527918708e-07, + "loss": 0.3167, + "step": 2909 + }, + { + "epoch": 0.8686567164179104, + "grad_norm": 0.812026737884432, + "learning_rate": 2.2287161557406455e-07, + "loss": 0.2901, + "step": 2910 + }, + { + "epoch": 0.868955223880597, + "grad_norm": 0.7460255803585844, + "learning_rate": 2.21874954928476e-07, + "loss": 0.2604, + "step": 2911 + }, + { + "epoch": 0.8692537313432835, + "grad_norm": 0.8937718738194345, + "learning_rate": 2.2088042427427515e-07, + "loss": 0.361, + "step": 2912 + }, + { + "epoch": 0.8695522388059701, + "grad_norm": 0.8470926320063893, + "learning_rate": 2.19888024541324e-07, + "loss": 0.3481, + "step": 2913 + }, + { + "epoch": 0.8698507462686568, + "grad_norm": 0.7648679073540151, + "learning_rate": 2.188977566574921e-07, + "loss": 0.2797, + "step": 2914 + }, + { + "epoch": 0.8701492537313433, + "grad_norm": 0.7915971704662402, + "learning_rate": 2.179096215486562e-07, + "loss": 0.3656, + "step": 2915 + }, + { + "epoch": 0.8704477611940299, + "grad_norm": 0.7609586429057615, + "learning_rate": 2.1692362013869705e-07, + "loss": 0.3232, + "step": 2916 + }, + { + "epoch": 0.8707462686567164, + "grad_norm": 0.9187944488879568, + "learning_rate": 2.1593975334950363e-07, + "loss": 0.3782, + "step": 2917 + }, + { + "epoch": 0.871044776119403, + "grad_norm": 0.8742675214436783, + "learning_rate": 2.14958022100967e-07, + "loss": 0.35, + "step": 2918 + }, + { + "epoch": 0.8713432835820896, + "grad_norm": 0.773745759889458, + "learning_rate": 2.139784273109813e-07, + "loss": 0.3365, + "step": 2919 + }, + { + "epoch": 0.8716417910447761, + "grad_norm": 0.9230337430192534, + "learning_rate": 2.1300096989544494e-07, + "loss": 0.3368, + "step": 2920 + }, + { + "epoch": 0.8719402985074627, + "grad_norm": 0.8006731655410944, + "learning_rate": 2.1202565076825554e-07, + "loss": 0.3213, + "step": 2921 + }, + { + "epoch": 0.8722388059701492, + "grad_norm": 0.7820026265974506, + "learning_rate": 2.1105247084131308e-07, + "loss": 0.3169, + "step": 2922 + }, + { + "epoch": 0.8725373134328358, + "grad_norm": 0.7990682212516456, + "learning_rate": 2.100814310245175e-07, + "loss": 0.3447, + "step": 2923 + }, + { + "epoch": 0.8728358208955224, + "grad_norm": 0.793417044723113, + "learning_rate": 2.0911253222576673e-07, + "loss": 0.3729, + "step": 2924 + }, + { + "epoch": 0.8731343283582089, + "grad_norm": 0.8157859780494326, + "learning_rate": 2.081457753509586e-07, + "loss": 0.2663, + "step": 2925 + }, + { + "epoch": 0.8734328358208955, + "grad_norm": 0.7620840057646765, + "learning_rate": 2.0718116130398592e-07, + "loss": 0.3219, + "step": 2926 + }, + { + "epoch": 0.873731343283582, + "grad_norm": 0.8189546759348862, + "learning_rate": 2.0621869098673974e-07, + "loss": 0.3365, + "step": 2927 + }, + { + "epoch": 0.8740298507462687, + "grad_norm": 0.8239722079023885, + "learning_rate": 2.0525836529910665e-07, + "loss": 0.3365, + "step": 2928 + }, + { + "epoch": 0.8743283582089553, + "grad_norm": 0.7714328059715659, + "learning_rate": 2.0430018513896754e-07, + "loss": 0.3434, + "step": 2929 + }, + { + "epoch": 0.8746268656716418, + "grad_norm": 0.8713435989825455, + "learning_rate": 2.033441514021975e-07, + "loss": 0.3619, + "step": 2930 + }, + { + "epoch": 0.8749253731343284, + "grad_norm": 0.8802611699490916, + "learning_rate": 2.0239026498266535e-07, + "loss": 0.3763, + "step": 2931 + }, + { + "epoch": 0.875223880597015, + "grad_norm": 0.8223587805396835, + "learning_rate": 2.0143852677223074e-07, + "loss": 0.3458, + "step": 2932 + }, + { + "epoch": 0.8755223880597015, + "grad_norm": 0.82268739377152, + "learning_rate": 2.0048893766074608e-07, + "loss": 0.3284, + "step": 2933 + }, + { + "epoch": 0.8758208955223881, + "grad_norm": 0.8682099708563312, + "learning_rate": 1.9954149853605386e-07, + "loss": 0.3522, + "step": 2934 + }, + { + "epoch": 0.8761194029850746, + "grad_norm": 0.8184563001143988, + "learning_rate": 1.9859621028398697e-07, + "loss": 0.326, + "step": 2935 + }, + { + "epoch": 0.8764179104477612, + "grad_norm": 0.8396543309711917, + "learning_rate": 1.9765307378836695e-07, + "loss": 0.3512, + "step": 2936 + }, + { + "epoch": 0.8767164179104477, + "grad_norm": 0.7884392531195238, + "learning_rate": 1.9671208993100292e-07, + "loss": 0.3335, + "step": 2937 + }, + { + "epoch": 0.8770149253731343, + "grad_norm": 0.8535929495828234, + "learning_rate": 1.957732595916917e-07, + "loss": 0.3245, + "step": 2938 + }, + { + "epoch": 0.8773134328358209, + "grad_norm": 0.8257674644312324, + "learning_rate": 1.9483658364821744e-07, + "loss": 0.3041, + "step": 2939 + }, + { + "epoch": 0.8776119402985074, + "grad_norm": 0.8124568964960845, + "learning_rate": 1.9390206297634912e-07, + "loss": 0.3304, + "step": 2940 + }, + { + "epoch": 0.877910447761194, + "grad_norm": 0.8215547653162424, + "learning_rate": 1.9296969844984054e-07, + "loss": 0.2944, + "step": 2941 + }, + { + "epoch": 0.8782089552238806, + "grad_norm": 0.8357215000420828, + "learning_rate": 1.9203949094043024e-07, + "loss": 0.3408, + "step": 2942 + }, + { + "epoch": 0.8785074626865672, + "grad_norm": 0.8231402612725451, + "learning_rate": 1.9111144131783914e-07, + "loss": 0.3437, + "step": 2943 + }, + { + "epoch": 0.8788059701492538, + "grad_norm": 0.9024692930586209, + "learning_rate": 1.90185550449771e-07, + "loss": 0.3731, + "step": 2944 + }, + { + "epoch": 0.8791044776119403, + "grad_norm": 0.7126389443706246, + "learning_rate": 1.892618192019116e-07, + "loss": 0.2756, + "step": 2945 + }, + { + "epoch": 0.8794029850746269, + "grad_norm": 0.7750142540785868, + "learning_rate": 1.883402484379268e-07, + "loss": 0.3293, + "step": 2946 + }, + { + "epoch": 0.8797014925373134, + "grad_norm": 0.8251890473704655, + "learning_rate": 1.8742083901946317e-07, + "loss": 0.2914, + "step": 2947 + }, + { + "epoch": 0.88, + "grad_norm": 0.8619570811698483, + "learning_rate": 1.8650359180614557e-07, + "loss": 0.3061, + "step": 2948 + }, + { + "epoch": 0.8802985074626866, + "grad_norm": 0.805795293872737, + "learning_rate": 1.85588507655578e-07, + "loss": 0.3264, + "step": 2949 + }, + { + "epoch": 0.8805970149253731, + "grad_norm": 0.8331813857436107, + "learning_rate": 1.8467558742334219e-07, + "loss": 0.3111, + "step": 2950 + }, + { + "epoch": 0.8808955223880597, + "grad_norm": 0.7786687931228663, + "learning_rate": 1.837648319629956e-07, + "loss": 0.2909, + "step": 2951 + }, + { + "epoch": 0.8811940298507462, + "grad_norm": 0.7905242232392001, + "learning_rate": 1.8285624212607322e-07, + "loss": 0.3318, + "step": 2952 + }, + { + "epoch": 0.8814925373134328, + "grad_norm": 0.7851585515172644, + "learning_rate": 1.819498187620841e-07, + "loss": 0.2932, + "step": 2953 + }, + { + "epoch": 0.8817910447761194, + "grad_norm": 0.8344992885181066, + "learning_rate": 1.810455627185112e-07, + "loss": 0.3474, + "step": 2954 + }, + { + "epoch": 0.8820895522388059, + "grad_norm": 0.8778172382134446, + "learning_rate": 1.801434748408129e-07, + "loss": 0.3121, + "step": 2955 + }, + { + "epoch": 0.8823880597014926, + "grad_norm": 0.8166282816674805, + "learning_rate": 1.7924355597241927e-07, + "loss": 0.3558, + "step": 2956 + }, + { + "epoch": 0.8826865671641791, + "grad_norm": 0.8719815044140756, + "learning_rate": 1.7834580695473254e-07, + "loss": 0.38, + "step": 2957 + }, + { + "epoch": 0.8829850746268657, + "grad_norm": 0.8076792947849909, + "learning_rate": 1.774502286271254e-07, + "loss": 0.3016, + "step": 2958 + }, + { + "epoch": 0.8832835820895523, + "grad_norm": 0.9801413967360424, + "learning_rate": 1.7655682182694228e-07, + "loss": 0.4087, + "step": 2959 + }, + { + "epoch": 0.8835820895522388, + "grad_norm": 0.8289310466533708, + "learning_rate": 1.756655873894969e-07, + "loss": 0.3196, + "step": 2960 + }, + { + "epoch": 0.8838805970149254, + "grad_norm": 0.770440394202249, + "learning_rate": 1.7477652614807134e-07, + "loss": 0.3295, + "step": 2961 + }, + { + "epoch": 0.8841791044776119, + "grad_norm": 0.8473369272270318, + "learning_rate": 1.7388963893391676e-07, + "loss": 0.3282, + "step": 2962 + }, + { + "epoch": 0.8844776119402985, + "grad_norm": 0.8319003049685355, + "learning_rate": 1.7300492657625094e-07, + "loss": 0.3384, + "step": 2963 + }, + { + "epoch": 0.8847761194029851, + "grad_norm": 0.69805804384644, + "learning_rate": 1.7212238990225756e-07, + "loss": 0.3107, + "step": 2964 + }, + { + "epoch": 0.8850746268656716, + "grad_norm": 0.8316282683239232, + "learning_rate": 1.7124202973708788e-07, + "loss": 0.3394, + "step": 2965 + }, + { + "epoch": 0.8853731343283582, + "grad_norm": 0.8189231259197391, + "learning_rate": 1.7036384690385681e-07, + "loss": 0.3359, + "step": 2966 + }, + { + "epoch": 0.8856716417910447, + "grad_norm": 0.8816856984730904, + "learning_rate": 1.6948784222364372e-07, + "loss": 0.3283, + "step": 2967 + }, + { + "epoch": 0.8859701492537313, + "grad_norm": 0.9024059566893298, + "learning_rate": 1.6861401651549203e-07, + "loss": 0.3404, + "step": 2968 + }, + { + "epoch": 0.8862686567164179, + "grad_norm": 0.7660680841699447, + "learning_rate": 1.6774237059640764e-07, + "loss": 0.3183, + "step": 2969 + }, + { + "epoch": 0.8865671641791045, + "grad_norm": 0.9030202060790797, + "learning_rate": 1.6687290528135725e-07, + "loss": 0.3394, + "step": 2970 + }, + { + "epoch": 0.8868656716417911, + "grad_norm": 0.8416102003078251, + "learning_rate": 1.660056213832706e-07, + "loss": 0.3156, + "step": 2971 + }, + { + "epoch": 0.8871641791044776, + "grad_norm": 0.8428340924275619, + "learning_rate": 1.651405197130368e-07, + "loss": 0.343, + "step": 2972 + }, + { + "epoch": 0.8874626865671642, + "grad_norm": 0.8500621805800072, + "learning_rate": 1.642776010795047e-07, + "loss": 0.3628, + "step": 2973 + }, + { + "epoch": 0.8877611940298508, + "grad_norm": 0.99516910680903, + "learning_rate": 1.634168662894825e-07, + "loss": 0.3386, + "step": 2974 + }, + { + "epoch": 0.8880597014925373, + "grad_norm": 0.9229080750641229, + "learning_rate": 1.6255831614773594e-07, + "loss": 0.3529, + "step": 2975 + }, + { + "epoch": 0.8883582089552239, + "grad_norm": 0.791162434060006, + "learning_rate": 1.6170195145698842e-07, + "loss": 0.3473, + "step": 2976 + }, + { + "epoch": 0.8886567164179104, + "grad_norm": 0.7220798985091988, + "learning_rate": 1.6084777301792031e-07, + "loss": 0.3026, + "step": 2977 + }, + { + "epoch": 0.888955223880597, + "grad_norm": 0.747801405803127, + "learning_rate": 1.5999578162916723e-07, + "loss": 0.2975, + "step": 2978 + }, + { + "epoch": 0.8892537313432836, + "grad_norm": 0.8265984529095876, + "learning_rate": 1.5914597808732085e-07, + "loss": 0.3365, + "step": 2979 + }, + { + "epoch": 0.8895522388059701, + "grad_norm": 0.7476801517306761, + "learning_rate": 1.58298363186927e-07, + "loss": 0.3497, + "step": 2980 + }, + { + "epoch": 0.8898507462686567, + "grad_norm": 0.9086508062842112, + "learning_rate": 1.5745293772048393e-07, + "loss": 0.3154, + "step": 2981 + }, + { + "epoch": 0.8901492537313432, + "grad_norm": 0.8224473282900949, + "learning_rate": 1.5660970247844437e-07, + "loss": 0.354, + "step": 2982 + }, + { + "epoch": 0.8904477611940298, + "grad_norm": 0.7683788999407549, + "learning_rate": 1.5576865824921295e-07, + "loss": 0.3067, + "step": 2983 + }, + { + "epoch": 0.8907462686567165, + "grad_norm": 1.013004681398411, + "learning_rate": 1.5492980581914535e-07, + "loss": 0.3294, + "step": 2984 + }, + { + "epoch": 0.891044776119403, + "grad_norm": 0.8640995726797704, + "learning_rate": 1.5409314597254864e-07, + "loss": 0.3699, + "step": 2985 + }, + { + "epoch": 0.8913432835820896, + "grad_norm": 0.8465105815609653, + "learning_rate": 1.5325867949167823e-07, + "loss": 0.3255, + "step": 2986 + }, + { + "epoch": 0.8916417910447761, + "grad_norm": 0.9416694811441639, + "learning_rate": 1.5242640715674079e-07, + "loss": 0.3372, + "step": 2987 + }, + { + "epoch": 0.8919402985074627, + "grad_norm": 0.8527515210916904, + "learning_rate": 1.5159632974589028e-07, + "loss": 0.3654, + "step": 2988 + }, + { + "epoch": 0.8922388059701493, + "grad_norm": 0.8989050252519671, + "learning_rate": 1.507684480352292e-07, + "loss": 0.3533, + "step": 2989 + }, + { + "epoch": 0.8925373134328358, + "grad_norm": 0.8129306542683746, + "learning_rate": 1.4994276279880648e-07, + "loss": 0.3405, + "step": 2990 + }, + { + "epoch": 0.8928358208955224, + "grad_norm": 0.8832284244419019, + "learning_rate": 1.4911927480861843e-07, + "loss": 0.3266, + "step": 2991 + }, + { + "epoch": 0.8931343283582089, + "grad_norm": 0.9068919741311265, + "learning_rate": 1.4829798483460471e-07, + "loss": 0.3655, + "step": 2992 + }, + { + "epoch": 0.8934328358208955, + "grad_norm": 0.7024871886169809, + "learning_rate": 1.474788936446525e-07, + "loss": 0.3323, + "step": 2993 + }, + { + "epoch": 0.8937313432835821, + "grad_norm": 0.7474825332275427, + "learning_rate": 1.4666200200459224e-07, + "loss": 0.3027, + "step": 2994 + }, + { + "epoch": 0.8940298507462686, + "grad_norm": 0.8040131965806334, + "learning_rate": 1.458473106781977e-07, + "loss": 0.3541, + "step": 2995 + }, + { + "epoch": 0.8943283582089552, + "grad_norm": 0.7470197078575609, + "learning_rate": 1.450348204271848e-07, + "loss": 0.3267, + "step": 2996 + }, + { + "epoch": 0.8946268656716417, + "grad_norm": 0.800564447211567, + "learning_rate": 1.4422453201121234e-07, + "loss": 0.365, + "step": 2997 + }, + { + "epoch": 0.8949253731343284, + "grad_norm": 0.7546300002347592, + "learning_rate": 1.4341644618788037e-07, + "loss": 0.2846, + "step": 2998 + }, + { + "epoch": 0.895223880597015, + "grad_norm": 0.7717377048978583, + "learning_rate": 1.4261056371272953e-07, + "loss": 0.3157, + "step": 2999 + }, + { + "epoch": 0.8955223880597015, + "grad_norm": 0.9958856245080583, + "learning_rate": 1.4180688533924014e-07, + "loss": 0.3462, + "step": 3000 + }, + { + "epoch": 0.8958208955223881, + "grad_norm": 0.7740493958337736, + "learning_rate": 1.4100541181883225e-07, + "loss": 0.2892, + "step": 3001 + }, + { + "epoch": 0.8961194029850746, + "grad_norm": 0.8871065072732562, + "learning_rate": 1.402061439008634e-07, + "loss": 0.3561, + "step": 3002 + }, + { + "epoch": 0.8964179104477612, + "grad_norm": 0.7521482462008626, + "learning_rate": 1.394090823326297e-07, + "loss": 0.2957, + "step": 3003 + }, + { + "epoch": 0.8967164179104478, + "grad_norm": 0.8691973266040441, + "learning_rate": 1.386142278593647e-07, + "loss": 0.3191, + "step": 3004 + }, + { + "epoch": 0.8970149253731343, + "grad_norm": 0.8681542943381358, + "learning_rate": 1.3782158122423783e-07, + "loss": 0.362, + "step": 3005 + }, + { + "epoch": 0.8973134328358209, + "grad_norm": 0.8478073154828707, + "learning_rate": 1.3703114316835436e-07, + "loss": 0.3601, + "step": 3006 + }, + { + "epoch": 0.8976119402985074, + "grad_norm": 0.7832272639251721, + "learning_rate": 1.3624291443075505e-07, + "loss": 0.3219, + "step": 3007 + }, + { + "epoch": 0.897910447761194, + "grad_norm": 0.8259684062539756, + "learning_rate": 1.3545689574841341e-07, + "loss": 0.3591, + "step": 3008 + }, + { + "epoch": 0.8982089552238806, + "grad_norm": 0.8048015763714349, + "learning_rate": 1.3467308785623856e-07, + "loss": 0.3752, + "step": 3009 + }, + { + "epoch": 0.8985074626865671, + "grad_norm": 0.8513994404977496, + "learning_rate": 1.3389149148707176e-07, + "loss": 0.3726, + "step": 3010 + }, + { + "epoch": 0.8988059701492538, + "grad_norm": 0.8112900887766165, + "learning_rate": 1.3311210737168624e-07, + "loss": 0.3092, + "step": 3011 + }, + { + "epoch": 0.8991044776119403, + "grad_norm": 0.8531040207429486, + "learning_rate": 1.3233493623878796e-07, + "loss": 0.3495, + "step": 3012 + }, + { + "epoch": 0.8994029850746269, + "grad_norm": 0.872868595281458, + "learning_rate": 1.3155997881501181e-07, + "loss": 0.3465, + "step": 3013 + }, + { + "epoch": 0.8997014925373135, + "grad_norm": 0.8868213740208793, + "learning_rate": 1.307872358249246e-07, + "loss": 0.3229, + "step": 3014 + }, + { + "epoch": 0.9, + "grad_norm": 0.8061758493940485, + "learning_rate": 1.300167079910225e-07, + "loss": 0.3782, + "step": 3015 + }, + { + "epoch": 0.9002985074626866, + "grad_norm": 0.7826850147527407, + "learning_rate": 1.2924839603372986e-07, + "loss": 0.3566, + "step": 3016 + }, + { + "epoch": 0.9005970149253731, + "grad_norm": 0.7652865811941827, + "learning_rate": 1.2848230067139977e-07, + "loss": 0.2958, + "step": 3017 + }, + { + "epoch": 0.9008955223880597, + "grad_norm": 0.8684758983402867, + "learning_rate": 1.2771842262031293e-07, + "loss": 0.3499, + "step": 3018 + }, + { + "epoch": 0.9011940298507463, + "grad_norm": 0.8121980361863542, + "learning_rate": 1.2695676259467632e-07, + "loss": 0.342, + "step": 3019 + }, + { + "epoch": 0.9014925373134328, + "grad_norm": 0.8695703424315466, + "learning_rate": 1.2619732130662365e-07, + "loss": 0.3466, + "step": 3020 + }, + { + "epoch": 0.9017910447761194, + "grad_norm": 0.7408411983724599, + "learning_rate": 1.2544009946621417e-07, + "loss": 0.3154, + "step": 3021 + }, + { + "epoch": 0.9020895522388059, + "grad_norm": 0.7520252535004338, + "learning_rate": 1.246850977814315e-07, + "loss": 0.2926, + "step": 3022 + }, + { + "epoch": 0.9023880597014925, + "grad_norm": 0.8015680226963483, + "learning_rate": 1.2393231695818435e-07, + "loss": 0.3424, + "step": 3023 + }, + { + "epoch": 0.902686567164179, + "grad_norm": 0.8844284820852588, + "learning_rate": 1.2318175770030388e-07, + "loss": 0.3086, + "step": 3024 + }, + { + "epoch": 0.9029850746268657, + "grad_norm": 1.4074341950451352, + "learning_rate": 1.2243342070954485e-07, + "loss": 0.3021, + "step": 3025 + }, + { + "epoch": 0.9032835820895523, + "grad_norm": 0.8769608874212167, + "learning_rate": 1.216873066855845e-07, + "loss": 0.3686, + "step": 3026 + }, + { + "epoch": 0.9035820895522388, + "grad_norm": 0.8331928786433949, + "learning_rate": 1.2094341632602063e-07, + "loss": 0.3711, + "step": 3027 + }, + { + "epoch": 0.9038805970149254, + "grad_norm": 0.8114919419794893, + "learning_rate": 1.2020175032637278e-07, + "loss": 0.3104, + "step": 3028 + }, + { + "epoch": 0.904179104477612, + "grad_norm": 0.8382734489680684, + "learning_rate": 1.194623093800809e-07, + "loss": 0.3222, + "step": 3029 + }, + { + "epoch": 0.9044776119402985, + "grad_norm": 0.8090241891495422, + "learning_rate": 1.1872509417850425e-07, + "loss": 0.3221, + "step": 3030 + }, + { + "epoch": 0.9047761194029851, + "grad_norm": 0.7762081906772144, + "learning_rate": 1.179901054109206e-07, + "loss": 0.3045, + "step": 3031 + }, + { + "epoch": 0.9050746268656716, + "grad_norm": 0.7687394313447948, + "learning_rate": 1.1725734376452691e-07, + "loss": 0.3097, + "step": 3032 + }, + { + "epoch": 0.9053731343283582, + "grad_norm": 0.8494786804724461, + "learning_rate": 1.1652680992443765e-07, + "loss": 0.2838, + "step": 3033 + }, + { + "epoch": 0.9056716417910448, + "grad_norm": 0.7426720969321979, + "learning_rate": 1.1579850457368342e-07, + "loss": 0.3068, + "step": 3034 + }, + { + "epoch": 0.9059701492537313, + "grad_norm": 0.8542802364049378, + "learning_rate": 1.1507242839321281e-07, + "loss": 0.3151, + "step": 3035 + }, + { + "epoch": 0.9062686567164179, + "grad_norm": 0.8047828696688691, + "learning_rate": 1.1434858206188864e-07, + "loss": 0.3468, + "step": 3036 + }, + { + "epoch": 0.9065671641791044, + "grad_norm": 0.8784472135696588, + "learning_rate": 1.1362696625648983e-07, + "loss": 0.3373, + "step": 3037 + }, + { + "epoch": 0.906865671641791, + "grad_norm": 0.8176344259816754, + "learning_rate": 1.1290758165171e-07, + "loss": 0.3417, + "step": 3038 + }, + { + "epoch": 0.9071641791044777, + "grad_norm": 0.84514932245467, + "learning_rate": 1.1219042892015586e-07, + "loss": 0.3071, + "step": 3039 + }, + { + "epoch": 0.9074626865671642, + "grad_norm": 0.7963261643887735, + "learning_rate": 1.1147550873234769e-07, + "loss": 0.2971, + "step": 3040 + }, + { + "epoch": 0.9077611940298508, + "grad_norm": 0.8505558295228394, + "learning_rate": 1.1076282175671832e-07, + "loss": 0.2992, + "step": 3041 + }, + { + "epoch": 0.9080597014925373, + "grad_norm": 0.9346232263581281, + "learning_rate": 1.1005236865961277e-07, + "loss": 0.3754, + "step": 3042 + }, + { + "epoch": 0.9083582089552239, + "grad_norm": 0.7821481899052896, + "learning_rate": 1.0934415010528748e-07, + "loss": 0.3066, + "step": 3043 + }, + { + "epoch": 0.9086567164179105, + "grad_norm": 0.8906980599877051, + "learning_rate": 1.0863816675590999e-07, + "loss": 0.3545, + "step": 3044 + }, + { + "epoch": 0.908955223880597, + "grad_norm": 0.8482058328893826, + "learning_rate": 1.0793441927155673e-07, + "loss": 0.3525, + "step": 3045 + }, + { + "epoch": 0.9092537313432836, + "grad_norm": 0.8148838793718065, + "learning_rate": 1.0723290831021471e-07, + "loss": 0.3553, + "step": 3046 + }, + { + "epoch": 0.9095522388059701, + "grad_norm": 0.8334685767932932, + "learning_rate": 1.0653363452777953e-07, + "loss": 0.3473, + "step": 3047 + }, + { + "epoch": 0.9098507462686567, + "grad_norm": 0.7622666260376155, + "learning_rate": 1.0583659857805545e-07, + "loss": 0.3193, + "step": 3048 + }, + { + "epoch": 0.9101492537313433, + "grad_norm": 0.8736868598440297, + "learning_rate": 1.0514180111275391e-07, + "loss": 0.3266, + "step": 3049 + }, + { + "epoch": 0.9104477611940298, + "grad_norm": 0.8589697295961596, + "learning_rate": 1.044492427814936e-07, + "loss": 0.3075, + "step": 3050 + }, + { + "epoch": 0.9107462686567164, + "grad_norm": 0.8373246294908004, + "learning_rate": 1.0375892423179962e-07, + "loss": 0.2927, + "step": 3051 + }, + { + "epoch": 0.9110447761194029, + "grad_norm": 0.9038840022385726, + "learning_rate": 1.030708461091029e-07, + "loss": 0.3729, + "step": 3052 + }, + { + "epoch": 0.9113432835820896, + "grad_norm": 0.846932324486075, + "learning_rate": 1.0238500905673992e-07, + "loss": 0.3424, + "step": 3053 + }, + { + "epoch": 0.9116417910447762, + "grad_norm": 0.7863656327081202, + "learning_rate": 1.0170141371595138e-07, + "loss": 0.3041, + "step": 3054 + }, + { + "epoch": 0.9119402985074627, + "grad_norm": 0.861611603226045, + "learning_rate": 1.0102006072588239e-07, + "loss": 0.303, + "step": 3055 + }, + { + "epoch": 0.9122388059701493, + "grad_norm": 0.8354159151319559, + "learning_rate": 1.0034095072358196e-07, + "loss": 0.2905, + "step": 3056 + }, + { + "epoch": 0.9125373134328358, + "grad_norm": 0.9076705935835198, + "learning_rate": 9.966408434400026e-08, + "loss": 0.3596, + "step": 3057 + }, + { + "epoch": 0.9128358208955224, + "grad_norm": 0.7940993218471586, + "learning_rate": 9.898946221999162e-08, + "loss": 0.344, + "step": 3058 + }, + { + "epoch": 0.913134328358209, + "grad_norm": 0.8683284494278827, + "learning_rate": 9.83170849823109e-08, + "loss": 0.2837, + "step": 3059 + }, + { + "epoch": 0.9134328358208955, + "grad_norm": 0.8679841336149956, + "learning_rate": 9.764695325961471e-08, + "loss": 0.3426, + "step": 3060 + }, + { + "epoch": 0.9137313432835821, + "grad_norm": 0.9136469022117164, + "learning_rate": 9.697906767845988e-08, + "loss": 0.3216, + "step": 3061 + }, + { + "epoch": 0.9140298507462686, + "grad_norm": 0.9280938115066125, + "learning_rate": 9.631342886330302e-08, + "loss": 0.3441, + "step": 3062 + }, + { + "epoch": 0.9143283582089552, + "grad_norm": 0.7989785840668151, + "learning_rate": 9.565003743650019e-08, + "loss": 0.3037, + "step": 3063 + }, + { + "epoch": 0.9146268656716418, + "grad_norm": 0.7840797063644848, + "learning_rate": 9.498889401830636e-08, + "loss": 0.3593, + "step": 3064 + }, + { + "epoch": 0.9149253731343283, + "grad_norm": 0.8742322626184073, + "learning_rate": 9.432999922687397e-08, + "loss": 0.3853, + "step": 3065 + }, + { + "epoch": 0.9152238805970149, + "grad_norm": 0.8309689917110207, + "learning_rate": 9.367335367825442e-08, + "loss": 0.3548, + "step": 3066 + }, + { + "epoch": 0.9155223880597015, + "grad_norm": 0.7682603888770035, + "learning_rate": 9.301895798639465e-08, + "loss": 0.3458, + "step": 3067 + }, + { + "epoch": 0.9158208955223881, + "grad_norm": 0.8406021828495258, + "learning_rate": 9.236681276313914e-08, + "loss": 0.3439, + "step": 3068 + }, + { + "epoch": 0.9161194029850747, + "grad_norm": 0.7676640227562936, + "learning_rate": 9.171691861822735e-08, + "loss": 0.3377, + "step": 3069 + }, + { + "epoch": 0.9164179104477612, + "grad_norm": 0.8241123436966133, + "learning_rate": 9.106927615929462e-08, + "loss": 0.3461, + "step": 3070 + }, + { + "epoch": 0.9167164179104478, + "grad_norm": 0.7865175510079805, + "learning_rate": 9.042388599187158e-08, + "loss": 0.3356, + "step": 3071 + }, + { + "epoch": 0.9170149253731343, + "grad_norm": 0.8416832778666247, + "learning_rate": 8.978074871938109e-08, + "loss": 0.3225, + "step": 3072 + }, + { + "epoch": 0.9173134328358209, + "grad_norm": 0.925094121401764, + "learning_rate": 8.91398649431413e-08, + "loss": 0.35, + "step": 3073 + }, + { + "epoch": 0.9176119402985075, + "grad_norm": 0.9245170617415597, + "learning_rate": 8.850123526236292e-08, + "loss": 0.3473, + "step": 3074 + }, + { + "epoch": 0.917910447761194, + "grad_norm": 0.8830284323162437, + "learning_rate": 8.786486027414942e-08, + "loss": 0.3263, + "step": 3075 + }, + { + "epoch": 0.9182089552238806, + "grad_norm": 0.8322762071581037, + "learning_rate": 8.723074057349568e-08, + "loss": 0.3267, + "step": 3076 + }, + { + "epoch": 0.9185074626865671, + "grad_norm": 0.8218742132897513, + "learning_rate": 8.659887675328826e-08, + "loss": 0.3525, + "step": 3077 + }, + { + "epoch": 0.9188059701492537, + "grad_norm": 0.7524701289147285, + "learning_rate": 8.596926940430406e-08, + "loss": 0.347, + "step": 3078 + }, + { + "epoch": 0.9191044776119403, + "grad_norm": 0.7973411802668999, + "learning_rate": 8.534191911521106e-08, + "loss": 0.3203, + "step": 3079 + }, + { + "epoch": 0.9194029850746268, + "grad_norm": 0.755081736754136, + "learning_rate": 8.471682647256619e-08, + "loss": 0.3012, + "step": 3080 + }, + { + "epoch": 0.9197014925373135, + "grad_norm": 0.7506268914890571, + "learning_rate": 8.409399206081609e-08, + "loss": 0.3365, + "step": 3081 + }, + { + "epoch": 0.92, + "grad_norm": 0.8943820675775366, + "learning_rate": 8.347341646229578e-08, + "loss": 0.3694, + "step": 3082 + }, + { + "epoch": 0.9202985074626866, + "grad_norm": 0.8565711585170784, + "learning_rate": 8.285510025722781e-08, + "loss": 0.3066, + "step": 3083 + }, + { + "epoch": 0.9205970149253732, + "grad_norm": 0.6915296836970478, + "learning_rate": 8.223904402372334e-08, + "loss": 0.2471, + "step": 3084 + }, + { + "epoch": 0.9208955223880597, + "grad_norm": 0.7910425039296988, + "learning_rate": 8.16252483377794e-08, + "loss": 0.3063, + "step": 3085 + }, + { + "epoch": 0.9211940298507463, + "grad_norm": 0.8626761328335215, + "learning_rate": 8.101371377328055e-08, + "loss": 0.323, + "step": 3086 + }, + { + "epoch": 0.9214925373134328, + "grad_norm": 0.811154258383053, + "learning_rate": 8.040444090199634e-08, + "loss": 0.3412, + "step": 3087 + }, + { + "epoch": 0.9217910447761194, + "grad_norm": 0.8740316342417893, + "learning_rate": 7.97974302935825e-08, + "loss": 0.3647, + "step": 3088 + }, + { + "epoch": 0.922089552238806, + "grad_norm": 0.9132701160611257, + "learning_rate": 7.919268251557838e-08, + "loss": 0.3722, + "step": 3089 + }, + { + "epoch": 0.9223880597014925, + "grad_norm": 0.7691408535525245, + "learning_rate": 7.85901981334089e-08, + "loss": 0.3096, + "step": 3090 + }, + { + "epoch": 0.9226865671641791, + "grad_norm": 0.839417162083958, + "learning_rate": 7.798997771038236e-08, + "loss": 0.3487, + "step": 3091 + }, + { + "epoch": 0.9229850746268656, + "grad_norm": 0.869748021900408, + "learning_rate": 7.739202180769013e-08, + "loss": 0.3477, + "step": 3092 + }, + { + "epoch": 0.9232835820895522, + "grad_norm": 0.8159308852427606, + "learning_rate": 7.679633098440609e-08, + "loss": 0.3281, + "step": 3093 + }, + { + "epoch": 0.9235820895522389, + "grad_norm": 0.9129695074897101, + "learning_rate": 7.620290579748723e-08, + "loss": 0.356, + "step": 3094 + }, + { + "epoch": 0.9238805970149254, + "grad_norm": 0.8713069828640914, + "learning_rate": 7.561174680177114e-08, + "loss": 0.329, + "step": 3095 + }, + { + "epoch": 0.924179104477612, + "grad_norm": 0.7559944876673397, + "learning_rate": 7.502285454997732e-08, + "loss": 0.3056, + "step": 3096 + }, + { + "epoch": 0.9244776119402985, + "grad_norm": 0.829846374274675, + "learning_rate": 7.443622959270535e-08, + "loss": 0.3374, + "step": 3097 + }, + { + "epoch": 0.9247761194029851, + "grad_norm": 0.9373023004429427, + "learning_rate": 7.385187247843567e-08, + "loss": 0.3328, + "step": 3098 + }, + { + "epoch": 0.9250746268656717, + "grad_norm": 0.7512508155243499, + "learning_rate": 7.32697837535279e-08, + "loss": 0.3161, + "step": 3099 + }, + { + "epoch": 0.9253731343283582, + "grad_norm": 0.7853324032451395, + "learning_rate": 7.268996396222056e-08, + "loss": 0.3507, + "step": 3100 + }, + { + "epoch": 0.9256716417910448, + "grad_norm": 0.8502143296601243, + "learning_rate": 7.211241364663113e-08, + "loss": 0.3854, + "step": 3101 + }, + { + "epoch": 0.9259701492537313, + "grad_norm": 0.8250790098331866, + "learning_rate": 7.153713334675516e-08, + "loss": 0.3361, + "step": 3102 + }, + { + "epoch": 0.9262686567164179, + "grad_norm": 0.8122977611367114, + "learning_rate": 7.096412360046545e-08, + "loss": 0.3371, + "step": 3103 + }, + { + "epoch": 0.9265671641791045, + "grad_norm": 0.8020325644972325, + "learning_rate": 7.039338494351261e-08, + "loss": 0.327, + "step": 3104 + }, + { + "epoch": 0.926865671641791, + "grad_norm": 0.8098687914753534, + "learning_rate": 6.982491790952284e-08, + "loss": 0.3228, + "step": 3105 + }, + { + "epoch": 0.9271641791044776, + "grad_norm": 0.8343057527737529, + "learning_rate": 6.925872302999931e-08, + "loss": 0.3383, + "step": 3106 + }, + { + "epoch": 0.9274626865671641, + "grad_norm": 0.8582956934198305, + "learning_rate": 6.86948008343205e-08, + "loss": 0.3705, + "step": 3107 + }, + { + "epoch": 0.9277611940298508, + "grad_norm": 0.8051927725802557, + "learning_rate": 6.813315184973968e-08, + "loss": 0.3325, + "step": 3108 + }, + { + "epoch": 0.9280597014925374, + "grad_norm": 0.7892911817116857, + "learning_rate": 6.757377660138508e-08, + "loss": 0.3413, + "step": 3109 + }, + { + "epoch": 0.9283582089552239, + "grad_norm": 0.7927728140846071, + "learning_rate": 6.701667561225894e-08, + "loss": 0.3533, + "step": 3110 + }, + { + "epoch": 0.9286567164179105, + "grad_norm": 0.8148550066294056, + "learning_rate": 6.64618494032368e-08, + "loss": 0.3803, + "step": 3111 + }, + { + "epoch": 0.928955223880597, + "grad_norm": 0.8284362735918354, + "learning_rate": 6.590929849306788e-08, + "loss": 0.3344, + "step": 3112 + }, + { + "epoch": 0.9292537313432836, + "grad_norm": 0.7868083459028574, + "learning_rate": 6.535902339837392e-08, + "loss": 0.3141, + "step": 3113 + }, + { + "epoch": 0.9295522388059702, + "grad_norm": 0.8202287813945078, + "learning_rate": 6.481102463364864e-08, + "loss": 0.3267, + "step": 3114 + }, + { + "epoch": 0.9298507462686567, + "grad_norm": 1.011406528561342, + "learning_rate": 6.426530271125775e-08, + "loss": 0.3625, + "step": 3115 + }, + { + "epoch": 0.9301492537313433, + "grad_norm": 0.9568538573975606, + "learning_rate": 6.372185814143756e-08, + "loss": 0.3092, + "step": 3116 + }, + { + "epoch": 0.9304477611940298, + "grad_norm": 0.8120426908903172, + "learning_rate": 6.31806914322955e-08, + "loss": 0.2998, + "step": 3117 + }, + { + "epoch": 0.9307462686567164, + "grad_norm": 0.8154259717600727, + "learning_rate": 6.264180308980933e-08, + "loss": 0.3441, + "step": 3118 + }, + { + "epoch": 0.931044776119403, + "grad_norm": 0.8366039008724436, + "learning_rate": 6.210519361782685e-08, + "loss": 0.3123, + "step": 3119 + }, + { + "epoch": 0.9313432835820895, + "grad_norm": 0.7249537665139266, + "learning_rate": 6.157086351806451e-08, + "loss": 0.3187, + "step": 3120 + }, + { + "epoch": 0.9316417910447761, + "grad_norm": 0.834336635014129, + "learning_rate": 6.103881329010797e-08, + "loss": 0.3238, + "step": 3121 + }, + { + "epoch": 0.9319402985074627, + "grad_norm": 0.921548871205445, + "learning_rate": 6.050904343141095e-08, + "loss": 0.3376, + "step": 3122 + }, + { + "epoch": 0.9322388059701493, + "grad_norm": 0.8938173280654804, + "learning_rate": 5.998155443729586e-08, + "loss": 0.3725, + "step": 3123 + }, + { + "epoch": 0.9325373134328359, + "grad_norm": 0.8368698873876483, + "learning_rate": 5.9456346800951805e-08, + "loss": 0.352, + "step": 3124 + }, + { + "epoch": 0.9328358208955224, + "grad_norm": 0.8916547607557948, + "learning_rate": 5.8933421013435135e-08, + "loss": 0.3883, + "step": 3125 + }, + { + "epoch": 0.933134328358209, + "grad_norm": 0.8029972819508524, + "learning_rate": 5.841277756366892e-08, + "loss": 0.273, + "step": 3126 + }, + { + "epoch": 0.9334328358208955, + "grad_norm": 0.9090008716050973, + "learning_rate": 5.7894416938441834e-08, + "loss": 0.3846, + "step": 3127 + }, + { + "epoch": 0.9337313432835821, + "grad_norm": 0.737087695091503, + "learning_rate": 5.737833962240841e-08, + "loss": 0.3197, + "step": 3128 + }, + { + "epoch": 0.9340298507462687, + "grad_norm": 0.9304110314164876, + "learning_rate": 5.68645460980885e-08, + "loss": 0.3527, + "step": 3129 + }, + { + "epoch": 0.9343283582089552, + "grad_norm": 0.8089432663681636, + "learning_rate": 5.635303684586646e-08, + "loss": 0.3151, + "step": 3130 + }, + { + "epoch": 0.9346268656716418, + "grad_norm": 1.165243151408962, + "learning_rate": 5.584381234399111e-08, + "loss": 0.3267, + "step": 3131 + }, + { + "epoch": 0.9349253731343283, + "grad_norm": 0.7348025145722581, + "learning_rate": 5.533687306857466e-08, + "loss": 0.2696, + "step": 3132 + }, + { + "epoch": 0.9352238805970149, + "grad_norm": 0.8262555992907743, + "learning_rate": 5.483221949359324e-08, + "loss": 0.3516, + "step": 3133 + }, + { + "epoch": 0.9355223880597014, + "grad_norm": 0.9156843006759944, + "learning_rate": 5.432985209088526e-08, + "loss": 0.3481, + "step": 3134 + }, + { + "epoch": 0.935820895522388, + "grad_norm": 0.8572239132276424, + "learning_rate": 5.3829771330152495e-08, + "loss": 0.3254, + "step": 3135 + }, + { + "epoch": 0.9361194029850747, + "grad_norm": 1.028962020667184, + "learning_rate": 5.3331977678958145e-08, + "loss": 0.3123, + "step": 3136 + }, + { + "epoch": 0.9364179104477612, + "grad_norm": 0.83452568472891, + "learning_rate": 5.2836471602727144e-08, + "loss": 0.3421, + "step": 3137 + }, + { + "epoch": 0.9367164179104478, + "grad_norm": 0.8286325370096002, + "learning_rate": 5.234325356474529e-08, + "loss": 0.3499, + "step": 3138 + }, + { + "epoch": 0.9370149253731344, + "grad_norm": 0.8160036150223631, + "learning_rate": 5.185232402615953e-08, + "loss": 0.3105, + "step": 3139 + }, + { + "epoch": 0.9373134328358209, + "grad_norm": 0.8066012270688009, + "learning_rate": 5.1363683445977144e-08, + "loss": 0.3075, + "step": 3140 + }, + { + "epoch": 0.9376119402985075, + "grad_norm": 0.8033304417193656, + "learning_rate": 5.087733228106517e-08, + "loss": 0.3218, + "step": 3141 + }, + { + "epoch": 0.937910447761194, + "grad_norm": 0.8465912538140584, + "learning_rate": 5.0393270986150155e-08, + "loss": 0.3174, + "step": 3142 + }, + { + "epoch": 0.9382089552238806, + "grad_norm": 0.8023677069921564, + "learning_rate": 4.991150001381756e-08, + "loss": 0.3562, + "step": 3143 + }, + { + "epoch": 0.9385074626865672, + "grad_norm": 0.9290258509536511, + "learning_rate": 4.9432019814511235e-08, + "loss": 0.3718, + "step": 3144 + }, + { + "epoch": 0.9388059701492537, + "grad_norm": 0.8889616159939564, + "learning_rate": 4.8954830836533963e-08, + "loss": 0.3495, + "step": 3145 + }, + { + "epoch": 0.9391044776119403, + "grad_norm": 0.811509055722367, + "learning_rate": 4.847993352604524e-08, + "loss": 0.3429, + "step": 3146 + }, + { + "epoch": 0.9394029850746268, + "grad_norm": 0.8199197753898051, + "learning_rate": 4.800732832706323e-08, + "loss": 0.3656, + "step": 3147 + }, + { + "epoch": 0.9397014925373134, + "grad_norm": 0.9063245467477791, + "learning_rate": 4.753701568146168e-08, + "loss": 0.3317, + "step": 3148 + }, + { + "epoch": 0.94, + "grad_norm": 0.8342353851057329, + "learning_rate": 4.706899602897136e-08, + "loss": 0.3312, + "step": 3149 + }, + { + "epoch": 0.9402985074626866, + "grad_norm": 0.8720693495116465, + "learning_rate": 4.6603269807179716e-08, + "loss": 0.3593, + "step": 3150 + }, + { + "epoch": 0.9405970149253732, + "grad_norm": 0.8586192607934766, + "learning_rate": 4.6139837451529004e-08, + "loss": 0.2832, + "step": 3151 + }, + { + "epoch": 0.9408955223880597, + "grad_norm": 0.9695827592543284, + "learning_rate": 4.5678699395317326e-08, + "loss": 0.409, + "step": 3152 + }, + { + "epoch": 0.9411940298507463, + "grad_norm": 0.8419990980437638, + "learning_rate": 4.5219856069697866e-08, + "loss": 0.3268, + "step": 3153 + }, + { + "epoch": 0.9414925373134329, + "grad_norm": 0.8452212009450679, + "learning_rate": 4.476330790367717e-08, + "loss": 0.3211, + "step": 3154 + }, + { + "epoch": 0.9417910447761194, + "grad_norm": 0.8498008661362966, + "learning_rate": 4.4309055324117386e-08, + "loss": 0.3485, + "step": 3155 + }, + { + "epoch": 0.942089552238806, + "grad_norm": 0.8431790042502505, + "learning_rate": 4.385709875573324e-08, + "loss": 0.3482, + "step": 3156 + }, + { + "epoch": 0.9423880597014925, + "grad_norm": 0.8448462756069092, + "learning_rate": 4.340743862109309e-08, + "loss": 0.3571, + "step": 3157 + }, + { + "epoch": 0.9426865671641791, + "grad_norm": 0.8098626117139088, + "learning_rate": 4.296007534061869e-08, + "loss": 0.3451, + "step": 3158 + }, + { + "epoch": 0.9429850746268656, + "grad_norm": 0.9323258425698503, + "learning_rate": 4.2515009332582954e-08, + "loss": 0.3325, + "step": 3159 + }, + { + "epoch": 0.9432835820895522, + "grad_norm": 0.8628485400460011, + "learning_rate": 4.207224101311247e-08, + "loss": 0.3334, + "step": 3160 + }, + { + "epoch": 0.9435820895522388, + "grad_norm": 0.8956281853483311, + "learning_rate": 4.163177079618441e-08, + "loss": 0.3362, + "step": 3161 + }, + { + "epoch": 0.9438805970149253, + "grad_norm": 0.8074849186203403, + "learning_rate": 4.1193599093627964e-08, + "loss": 0.3114, + "step": 3162 + }, + { + "epoch": 0.9441791044776119, + "grad_norm": 0.8490755318936722, + "learning_rate": 4.0757726315122646e-08, + "loss": 0.3558, + "step": 3163 + }, + { + "epoch": 0.9444776119402986, + "grad_norm": 0.7610965070669408, + "learning_rate": 4.032415286819941e-08, + "loss": 0.3158, + "step": 3164 + }, + { + "epoch": 0.9447761194029851, + "grad_norm": 0.7655473588220373, + "learning_rate": 3.989287915823842e-08, + "loss": 0.3121, + "step": 3165 + }, + { + "epoch": 0.9450746268656717, + "grad_norm": 0.8444397049637858, + "learning_rate": 3.9463905588470186e-08, + "loss": 0.3323, + "step": 3166 + }, + { + "epoch": 0.9453731343283582, + "grad_norm": 0.8236973140905199, + "learning_rate": 3.9037232559974714e-08, + "loss": 0.3348, + "step": 3167 + }, + { + "epoch": 0.9456716417910448, + "grad_norm": 0.8003724680619415, + "learning_rate": 3.861286047168067e-08, + "loss": 0.3795, + "step": 3168 + }, + { + "epoch": 0.9459701492537314, + "grad_norm": 1.0280919880161592, + "learning_rate": 3.8190789720365665e-08, + "loss": 0.3209, + "step": 3169 + }, + { + "epoch": 0.9462686567164179, + "grad_norm": 0.8000546455088566, + "learning_rate": 3.777102070065569e-08, + "loss": 0.3505, + "step": 3170 + }, + { + "epoch": 0.9465671641791045, + "grad_norm": 0.8935839440883604, + "learning_rate": 3.735355380502431e-08, + "loss": 0.3476, + "step": 3171 + }, + { + "epoch": 0.946865671641791, + "grad_norm": 0.8848630298774833, + "learning_rate": 3.693838942379291e-08, + "loss": 0.3168, + "step": 3172 + }, + { + "epoch": 0.9471641791044776, + "grad_norm": 0.7685629417514018, + "learning_rate": 3.6525527945130424e-08, + "loss": 0.3267, + "step": 3173 + }, + { + "epoch": 0.9474626865671641, + "grad_norm": 0.7874491430144344, + "learning_rate": 3.611496975505169e-08, + "loss": 0.3076, + "step": 3174 + }, + { + "epoch": 0.9477611940298507, + "grad_norm": 0.8513167764285567, + "learning_rate": 3.5706715237419366e-08, + "loss": 0.3351, + "step": 3175 + }, + { + "epoch": 0.9480597014925373, + "grad_norm": 0.7679536427243635, + "learning_rate": 3.5300764773940896e-08, + "loss": 0.333, + "step": 3176 + }, + { + "epoch": 0.9483582089552239, + "grad_norm": 0.7672420889067244, + "learning_rate": 3.4897118744170175e-08, + "loss": 0.3435, + "step": 3177 + }, + { + "epoch": 0.9486567164179105, + "grad_norm": 0.872282837076114, + "learning_rate": 3.4495777525506703e-08, + "loss": 0.3501, + "step": 3178 + }, + { + "epoch": 0.948955223880597, + "grad_norm": 0.8460843443255879, + "learning_rate": 3.4096741493194196e-08, + "loss": 0.3564, + "step": 3179 + }, + { + "epoch": 0.9492537313432836, + "grad_norm": 0.8100002833851134, + "learning_rate": 3.3700011020322e-08, + "loss": 0.3143, + "step": 3180 + }, + { + "epoch": 0.9495522388059702, + "grad_norm": 0.7984025736476822, + "learning_rate": 3.330558647782312e-08, + "loss": 0.3666, + "step": 3181 + }, + { + "epoch": 0.9498507462686567, + "grad_norm": 0.754287300991587, + "learning_rate": 3.291346823447533e-08, + "loss": 0.3031, + "step": 3182 + }, + { + "epoch": 0.9501492537313433, + "grad_norm": 0.7752734270024597, + "learning_rate": 3.252365665689955e-08, + "loss": 0.3004, + "step": 3183 + }, + { + "epoch": 0.9504477611940298, + "grad_norm": 0.8191799374185267, + "learning_rate": 3.213615210955978e-08, + "loss": 0.3527, + "step": 3184 + }, + { + "epoch": 0.9507462686567164, + "grad_norm": 0.907730673037401, + "learning_rate": 3.1750954954763716e-08, + "loss": 0.3272, + "step": 3185 + }, + { + "epoch": 0.951044776119403, + "grad_norm": 0.767757547991333, + "learning_rate": 3.136806555266103e-08, + "loss": 0.278, + "step": 3186 + }, + { + "epoch": 0.9513432835820895, + "grad_norm": 0.8243908687352361, + "learning_rate": 3.098748426124398e-08, + "loss": 0.3542, + "step": 3187 + }, + { + "epoch": 0.9516417910447761, + "grad_norm": 0.712848097621701, + "learning_rate": 3.0609211436347095e-08, + "loss": 0.2936, + "step": 3188 + }, + { + "epoch": 0.9519402985074626, + "grad_norm": 0.8024929014526022, + "learning_rate": 3.02332474316458e-08, + "loss": 0.325, + "step": 3189 + }, + { + "epoch": 0.9522388059701492, + "grad_norm": 0.8494487804866518, + "learning_rate": 2.985959259865778e-08, + "loss": 0.3904, + "step": 3190 + }, + { + "epoch": 0.9525373134328359, + "grad_norm": 0.7778696644607079, + "learning_rate": 2.9488247286740546e-08, + "loss": 0.3079, + "step": 3191 + }, + { + "epoch": 0.9528358208955224, + "grad_norm": 0.7349126770422241, + "learning_rate": 2.9119211843093574e-08, + "loss": 0.2997, + "step": 3192 + }, + { + "epoch": 0.953134328358209, + "grad_norm": 0.7872392631708472, + "learning_rate": 2.8752486612755593e-08, + "loss": 0.2926, + "step": 3193 + }, + { + "epoch": 0.9534328358208956, + "grad_norm": 0.8767962603475438, + "learning_rate": 2.8388071938605655e-08, + "loss": 0.3712, + "step": 3194 + }, + { + "epoch": 0.9537313432835821, + "grad_norm": 0.779215026325899, + "learning_rate": 2.802596816136316e-08, + "loss": 0.3147, + "step": 3195 + }, + { + "epoch": 0.9540298507462687, + "grad_norm": 0.7598752044921813, + "learning_rate": 2.766617561958618e-08, + "loss": 0.3079, + "step": 3196 + }, + { + "epoch": 0.9543283582089552, + "grad_norm": 0.7165696161507437, + "learning_rate": 2.7308694649671453e-08, + "loss": 0.3027, + "step": 3197 + }, + { + "epoch": 0.9546268656716418, + "grad_norm": 0.8812874250973933, + "learning_rate": 2.6953525585855233e-08, + "loss": 0.3666, + "step": 3198 + }, + { + "epoch": 0.9549253731343283, + "grad_norm": 0.876566087660891, + "learning_rate": 2.660066876021189e-08, + "loss": 0.3457, + "step": 3199 + }, + { + "epoch": 0.9552238805970149, + "grad_norm": 0.7854233313802679, + "learning_rate": 2.625012450265446e-08, + "loss": 0.3486, + "step": 3200 + }, + { + "epoch": 0.9555223880597015, + "grad_norm": 0.8325761566945635, + "learning_rate": 2.5901893140932444e-08, + "loss": 0.3254, + "step": 3201 + }, + { + "epoch": 0.955820895522388, + "grad_norm": 0.8166375313979258, + "learning_rate": 2.555597500063456e-08, + "loss": 0.3446, + "step": 3202 + }, + { + "epoch": 0.9561194029850746, + "grad_norm": 0.9210937286901646, + "learning_rate": 2.521237040518515e-08, + "loss": 0.3436, + "step": 3203 + }, + { + "epoch": 0.9564179104477611, + "grad_norm": 0.7559699274090332, + "learning_rate": 2.4871079675846398e-08, + "loss": 0.3649, + "step": 3204 + }, + { + "epoch": 0.9567164179104478, + "grad_norm": 0.7400183635230114, + "learning_rate": 2.4532103131716668e-08, + "loss": 0.2962, + "step": 3205 + }, + { + "epoch": 0.9570149253731344, + "grad_norm": 0.7802880622650665, + "learning_rate": 2.419544108973104e-08, + "loss": 0.2953, + "step": 3206 + }, + { + "epoch": 0.9573134328358209, + "grad_norm": 0.8494660495105342, + "learning_rate": 2.3861093864660233e-08, + "loss": 0.3352, + "step": 3207 + }, + { + "epoch": 0.9576119402985075, + "grad_norm": 0.8112621260443245, + "learning_rate": 2.3529061769110573e-08, + "loss": 0.3118, + "step": 3208 + }, + { + "epoch": 0.957910447761194, + "grad_norm": 0.7589488656121378, + "learning_rate": 2.3199345113524007e-08, + "loss": 0.2918, + "step": 3209 + }, + { + "epoch": 0.9582089552238806, + "grad_norm": 0.8313292700601174, + "learning_rate": 2.287194420617783e-08, + "loss": 0.3353, + "step": 3210 + }, + { + "epoch": 0.9585074626865672, + "grad_norm": 0.871232307176741, + "learning_rate": 2.254685935318357e-08, + "loss": 0.273, + "step": 3211 + }, + { + "epoch": 0.9588059701492537, + "grad_norm": 0.8200691382729434, + "learning_rate": 2.222409085848809e-08, + "loss": 0.3358, + "step": 3212 + }, + { + "epoch": 0.9591044776119403, + "grad_norm": 0.8045701864458568, + "learning_rate": 2.1903639023871658e-08, + "loss": 0.3378, + "step": 3213 + }, + { + "epoch": 0.9594029850746268, + "grad_norm": 0.8197181943630443, + "learning_rate": 2.1585504148949056e-08, + "loss": 0.3174, + "step": 3214 + }, + { + "epoch": 0.9597014925373134, + "grad_norm": 0.8494644190786257, + "learning_rate": 2.1269686531168456e-08, + "loss": 0.3223, + "step": 3215 + }, + { + "epoch": 0.96, + "grad_norm": 0.9396349631933137, + "learning_rate": 2.095618646581199e-08, + "loss": 0.3686, + "step": 3216 + }, + { + "epoch": 0.9602985074626865, + "grad_norm": 0.8323583157384636, + "learning_rate": 2.064500424599436e-08, + "loss": 0.3196, + "step": 3217 + }, + { + "epoch": 0.9605970149253731, + "grad_norm": 0.8892166342620139, + "learning_rate": 2.0336140162663386e-08, + "loss": 0.3216, + "step": 3218 + }, + { + "epoch": 0.9608955223880598, + "grad_norm": 0.8483612538763416, + "learning_rate": 2.002959450459918e-08, + "loss": 0.3309, + "step": 3219 + }, + { + "epoch": 0.9611940298507463, + "grad_norm": 0.7559863326126044, + "learning_rate": 1.9725367558415253e-08, + "loss": 0.3091, + "step": 3220 + }, + { + "epoch": 0.9614925373134329, + "grad_norm": 0.8020646850199934, + "learning_rate": 1.9423459608555462e-08, + "loss": 0.3299, + "step": 3221 + }, + { + "epoch": 0.9617910447761194, + "grad_norm": 0.7418019278634108, + "learning_rate": 1.912387093729706e-08, + "loss": 0.302, + "step": 3222 + }, + { + "epoch": 0.962089552238806, + "grad_norm": 0.7754195649660184, + "learning_rate": 1.8826601824747936e-08, + "loss": 0.3138, + "step": 3223 + }, + { + "epoch": 0.9623880597014925, + "grad_norm": 0.9516965112975473, + "learning_rate": 1.8531652548847146e-08, + "loss": 0.384, + "step": 3224 + }, + { + "epoch": 0.9626865671641791, + "grad_norm": 0.8417351207690223, + "learning_rate": 1.8239023385365484e-08, + "loss": 0.3457, + "step": 3225 + }, + { + "epoch": 0.9629850746268657, + "grad_norm": 0.7831713431001753, + "learning_rate": 1.7948714607903816e-08, + "loss": 0.3093, + "step": 3226 + }, + { + "epoch": 0.9632835820895522, + "grad_norm": 0.7856443433041905, + "learning_rate": 1.7660726487894188e-08, + "loss": 0.314, + "step": 3227 + }, + { + "epoch": 0.9635820895522388, + "grad_norm": 0.872583774357775, + "learning_rate": 1.7375059294598152e-08, + "loss": 0.3313, + "step": 3228 + }, + { + "epoch": 0.9638805970149253, + "grad_norm": 0.7486216858015543, + "learning_rate": 1.7091713295107337e-08, + "loss": 0.2724, + "step": 3229 + }, + { + "epoch": 0.9641791044776119, + "grad_norm": 0.8129958391856543, + "learning_rate": 1.6810688754343717e-08, + "loss": 0.3504, + "step": 3230 + }, + { + "epoch": 0.9644776119402985, + "grad_norm": 0.7739654221427428, + "learning_rate": 1.6531985935058504e-08, + "loss": 0.2831, + "step": 3231 + }, + { + "epoch": 0.964776119402985, + "grad_norm": 0.8564741384154801, + "learning_rate": 1.6255605097831584e-08, + "loss": 0.3567, + "step": 3232 + }, + { + "epoch": 0.9650746268656717, + "grad_norm": 0.8973102182259116, + "learning_rate": 1.598154650107264e-08, + "loss": 0.3538, + "step": 3233 + }, + { + "epoch": 0.9653731343283583, + "grad_norm": 0.8793936915110651, + "learning_rate": 1.570981040101949e-08, + "loss": 0.3381, + "step": 3234 + }, + { + "epoch": 0.9656716417910448, + "grad_norm": 0.7753860087917981, + "learning_rate": 1.5440397051739163e-08, + "loss": 0.2941, + "step": 3235 + }, + { + "epoch": 0.9659701492537314, + "grad_norm": 0.7683611135195979, + "learning_rate": 1.517330670512629e-08, + "loss": 0.3361, + "step": 3236 + }, + { + "epoch": 0.9662686567164179, + "grad_norm": 0.8618392461978379, + "learning_rate": 1.4908539610903882e-08, + "loss": 0.3968, + "step": 3237 + }, + { + "epoch": 0.9665671641791045, + "grad_norm": 0.7885885536521883, + "learning_rate": 1.4646096016622813e-08, + "loss": 0.3317, + "step": 3238 + }, + { + "epoch": 0.966865671641791, + "grad_norm": 0.8240073075438972, + "learning_rate": 1.4385976167661241e-08, + "loss": 0.3054, + "step": 3239 + }, + { + "epoch": 0.9671641791044776, + "grad_norm": 0.8544446967641939, + "learning_rate": 1.412818030722546e-08, + "loss": 0.3274, + "step": 3240 + }, + { + "epoch": 0.9674626865671642, + "grad_norm": 0.7981604304770294, + "learning_rate": 1.387270867634738e-08, + "loss": 0.3452, + "step": 3241 + }, + { + "epoch": 0.9677611940298507, + "grad_norm": 0.7709030094314585, + "learning_rate": 1.3619561513887603e-08, + "loss": 0.3228, + "step": 3242 + }, + { + "epoch": 0.9680597014925373, + "grad_norm": 0.9060585553579815, + "learning_rate": 1.33687390565318e-08, + "loss": 0.3067, + "step": 3243 + }, + { + "epoch": 0.9683582089552238, + "grad_norm": 0.819120949141105, + "learning_rate": 1.3120241538793487e-08, + "loss": 0.3123, + "step": 3244 + }, + { + "epoch": 0.9686567164179104, + "grad_norm": 0.7972272943948284, + "learning_rate": 1.287406919301154e-08, + "loss": 0.3486, + "step": 3245 + }, + { + "epoch": 0.968955223880597, + "grad_norm": 0.8855995326432275, + "learning_rate": 1.2630222249351287e-08, + "loss": 0.3682, + "step": 3246 + }, + { + "epoch": 0.9692537313432836, + "grad_norm": 0.7894355860076914, + "learning_rate": 1.2388700935803133e-08, + "loss": 0.3357, + "step": 3247 + }, + { + "epoch": 0.9695522388059702, + "grad_norm": 0.8366342328352925, + "learning_rate": 1.214950547818422e-08, + "loss": 0.35, + "step": 3248 + }, + { + "epoch": 0.9698507462686567, + "grad_norm": 0.8043634556674311, + "learning_rate": 1.191263610013621e-08, + "loss": 0.3296, + "step": 3249 + }, + { + "epoch": 0.9701492537313433, + "grad_norm": 0.8860564254201281, + "learning_rate": 1.1678093023126392e-08, + "loss": 0.3182, + "step": 3250 + }, + { + "epoch": 0.9704477611940299, + "grad_norm": 0.8007581402509059, + "learning_rate": 1.144587646644657e-08, + "loss": 0.3505, + "step": 3251 + }, + { + "epoch": 0.9707462686567164, + "grad_norm": 0.7966262785299418, + "learning_rate": 1.121598664721335e-08, + "loss": 0.34, + "step": 3252 + }, + { + "epoch": 0.971044776119403, + "grad_norm": 0.7323979717681582, + "learning_rate": 1.0988423780368685e-08, + "loss": 0.2943, + "step": 3253 + }, + { + "epoch": 0.9713432835820895, + "grad_norm": 0.8685782481705663, + "learning_rate": 1.0763188078678211e-08, + "loss": 0.2686, + "step": 3254 + }, + { + "epoch": 0.9716417910447761, + "grad_norm": 0.8163158923186468, + "learning_rate": 1.0540279752731252e-08, + "loss": 0.3165, + "step": 3255 + }, + { + "epoch": 0.9719402985074627, + "grad_norm": 0.8202125520717879, + "learning_rate": 1.0319699010942207e-08, + "loss": 0.2752, + "step": 3256 + }, + { + "epoch": 0.9722388059701492, + "grad_norm": 0.7860376878250228, + "learning_rate": 1.0101446059548604e-08, + "loss": 0.3302, + "step": 3257 + }, + { + "epoch": 0.9725373134328358, + "grad_norm": 0.7983641641459549, + "learning_rate": 9.88552110261165e-09, + "loss": 0.3623, + "step": 3258 + }, + { + "epoch": 0.9728358208955223, + "grad_norm": 0.8275216318089765, + "learning_rate": 9.671924342015692e-09, + "loss": 0.354, + "step": 3259 + }, + { + "epoch": 0.9731343283582089, + "grad_norm": 0.8231617072873623, + "learning_rate": 9.460655977468757e-09, + "loss": 0.2916, + "step": 3260 + }, + { + "epoch": 0.9734328358208956, + "grad_norm": 0.8054748071183675, + "learning_rate": 9.251716206501449e-09, + "loss": 0.3232, + "step": 3261 + }, + { + "epoch": 0.9737313432835821, + "grad_norm": 0.9502388363988287, + "learning_rate": 9.045105224467221e-09, + "loss": 0.374, + "step": 3262 + }, + { + "epoch": 0.9740298507462687, + "grad_norm": 0.8180489799155873, + "learning_rate": 8.84082322454266e-09, + "loss": 0.2988, + "step": 3263 + }, + { + "epoch": 0.9743283582089552, + "grad_norm": 0.8508694679094144, + "learning_rate": 8.638870397726374e-09, + "loss": 0.352, + "step": 3264 + }, + { + "epoch": 0.9746268656716418, + "grad_norm": 0.8289100432687119, + "learning_rate": 8.439246932839262e-09, + "loss": 0.3237, + "step": 3265 + }, + { + "epoch": 0.9749253731343284, + "grad_norm": 0.8121348616630165, + "learning_rate": 8.241953016524251e-09, + "loss": 0.3371, + "step": 3266 + }, + { + "epoch": 0.9752238805970149, + "grad_norm": 0.8052003128568289, + "learning_rate": 8.04698883324656e-09, + "loss": 0.3187, + "step": 3267 + }, + { + "epoch": 0.9755223880597015, + "grad_norm": 0.7856612139320543, + "learning_rate": 7.854354565292877e-09, + "loss": 0.3022, + "step": 3268 + }, + { + "epoch": 0.975820895522388, + "grad_norm": 0.8114505322634475, + "learning_rate": 7.66405039277135e-09, + "loss": 0.3531, + "step": 3269 + }, + { + "epoch": 0.9761194029850746, + "grad_norm": 0.9170043571131018, + "learning_rate": 7.47607649361215e-09, + "loss": 0.3955, + "step": 3270 + }, + { + "epoch": 0.9764179104477612, + "grad_norm": 0.8428130140130116, + "learning_rate": 7.290433043565803e-09, + "loss": 0.3136, + "step": 3271 + }, + { + "epoch": 0.9767164179104477, + "grad_norm": 0.843369641143616, + "learning_rate": 7.107120216205132e-09, + "loss": 0.3433, + "step": 3272 + }, + { + "epoch": 0.9770149253731343, + "grad_norm": 0.7547310709601116, + "learning_rate": 6.926138182922204e-09, + "loss": 0.2943, + "step": 3273 + }, + { + "epoch": 0.977313432835821, + "grad_norm": 0.7834878975003178, + "learning_rate": 6.747487112931661e-09, + "loss": 0.3006, + "step": 3274 + }, + { + "epoch": 0.9776119402985075, + "grad_norm": 0.8222964720647756, + "learning_rate": 6.57116717326739e-09, + "loss": 0.3323, + "step": 3275 + }, + { + "epoch": 0.9779104477611941, + "grad_norm": 0.7184401155481505, + "learning_rate": 6.397178528784464e-09, + "loss": 0.2594, + "step": 3276 + }, + { + "epoch": 0.9782089552238806, + "grad_norm": 0.8094077697755245, + "learning_rate": 6.225521342158036e-09, + "loss": 0.3232, + "step": 3277 + }, + { + "epoch": 0.9785074626865672, + "grad_norm": 0.8180072345165923, + "learning_rate": 6.056195773883056e-09, + "loss": 0.3605, + "step": 3278 + }, + { + "epoch": 0.9788059701492537, + "grad_norm": 0.8328869774640257, + "learning_rate": 5.889201982275383e-09, + "loss": 0.3574, + "step": 3279 + }, + { + "epoch": 0.9791044776119403, + "grad_norm": 0.80665947348926, + "learning_rate": 5.724540123469569e-09, + "loss": 0.3129, + "step": 3280 + }, + { + "epoch": 0.9794029850746269, + "grad_norm": 0.803178162202097, + "learning_rate": 5.562210351420794e-09, + "loss": 0.3278, + "step": 3281 + }, + { + "epoch": 0.9797014925373134, + "grad_norm": 0.7597600431927148, + "learning_rate": 5.402212817903207e-09, + "loss": 0.2825, + "step": 3282 + }, + { + "epoch": 0.98, + "grad_norm": 0.7663037088333483, + "learning_rate": 5.244547672510758e-09, + "loss": 0.2932, + "step": 3283 + }, + { + "epoch": 0.9802985074626865, + "grad_norm": 0.9425580206593992, + "learning_rate": 5.0892150626566384e-09, + "loss": 0.3402, + "step": 3284 + }, + { + "epoch": 0.9805970149253731, + "grad_norm": 0.8617742011068167, + "learning_rate": 4.93621513357273e-09, + "loss": 0.3574, + "step": 3285 + }, + { + "epoch": 0.9808955223880597, + "grad_norm": 0.9143536666483425, + "learning_rate": 4.785548028310438e-09, + "loss": 0.3447, + "step": 3286 + }, + { + "epoch": 0.9811940298507462, + "grad_norm": 0.7753440496822905, + "learning_rate": 4.637213887739856e-09, + "loss": 0.3166, + "step": 3287 + }, + { + "epoch": 0.9814925373134329, + "grad_norm": 0.870713877541679, + "learning_rate": 4.4912128505497644e-09, + "loss": 0.3141, + "step": 3288 + }, + { + "epoch": 0.9817910447761194, + "grad_norm": 0.9813206925694251, + "learning_rate": 4.347545053247637e-09, + "loss": 0.3278, + "step": 3289 + }, + { + "epoch": 0.982089552238806, + "grad_norm": 0.8586592952573566, + "learning_rate": 4.20621063015908e-09, + "loss": 0.3509, + "step": 3290 + }, + { + "epoch": 0.9823880597014926, + "grad_norm": 0.7818349485955345, + "learning_rate": 4.067209713428388e-09, + "loss": 0.3258, + "step": 3291 + }, + { + "epoch": 0.9826865671641791, + "grad_norm": 0.7927890018750996, + "learning_rate": 3.930542433018547e-09, + "loss": 0.3466, + "step": 3292 + }, + { + "epoch": 0.9829850746268657, + "grad_norm": 0.7627515359114002, + "learning_rate": 3.796208916709565e-09, + "loss": 0.3308, + "step": 3293 + }, + { + "epoch": 0.9832835820895522, + "grad_norm": 0.8615140995217084, + "learning_rate": 3.66420929010014e-09, + "loss": 0.3603, + "step": 3294 + }, + { + "epoch": 0.9835820895522388, + "grad_norm": 0.8056178237621043, + "learning_rate": 3.5345436766065498e-09, + "loss": 0.3186, + "step": 3295 + }, + { + "epoch": 0.9838805970149254, + "grad_norm": 0.8976274390176755, + "learning_rate": 3.407212197463483e-09, + "loss": 0.3176, + "step": 3296 + }, + { + "epoch": 0.9841791044776119, + "grad_norm": 0.896342713527404, + "learning_rate": 3.282214971722375e-09, + "loss": 0.345, + "step": 3297 + }, + { + "epoch": 0.9844776119402985, + "grad_norm": 0.8134845252531054, + "learning_rate": 3.159552116252795e-09, + "loss": 0.3436, + "step": 3298 + }, + { + "epoch": 0.984776119402985, + "grad_norm": 0.7899298838638901, + "learning_rate": 3.0392237457413377e-09, + "loss": 0.3398, + "step": 3299 + }, + { + "epoch": 0.9850746268656716, + "grad_norm": 0.8684483689426634, + "learning_rate": 2.9212299726921746e-09, + "loss": 0.3693, + "step": 3300 + }, + { + "epoch": 0.9853731343283582, + "grad_norm": 0.7802224704645674, + "learning_rate": 2.80557090742678e-09, + "loss": 0.3245, + "step": 3301 + }, + { + "epoch": 0.9856716417910448, + "grad_norm": 0.759169607116071, + "learning_rate": 2.6922466580830975e-09, + "loss": 0.2923, + "step": 3302 + }, + { + "epoch": 0.9859701492537314, + "grad_norm": 0.8262473666639696, + "learning_rate": 2.5812573306169263e-09, + "loss": 0.3262, + "step": 3303 + }, + { + "epoch": 0.986268656716418, + "grad_norm": 0.8618008393149142, + "learning_rate": 2.4726030288005356e-09, + "loss": 0.3574, + "step": 3304 + }, + { + "epoch": 0.9865671641791045, + "grad_norm": 0.7676567792662928, + "learning_rate": 2.3662838542229392e-09, + "loss": 0.2872, + "step": 3305 + }, + { + "epoch": 0.9868656716417911, + "grad_norm": 0.7963744228618038, + "learning_rate": 2.2622999062899e-09, + "loss": 0.3347, + "step": 3306 + }, + { + "epoch": 0.9871641791044776, + "grad_norm": 0.862404131304968, + "learning_rate": 2.160651282224202e-09, + "loss": 0.3633, + "step": 3307 + }, + { + "epoch": 0.9874626865671642, + "grad_norm": 0.8788985953060575, + "learning_rate": 2.0613380770645452e-09, + "loss": 0.3345, + "step": 3308 + }, + { + "epoch": 0.9877611940298507, + "grad_norm": 0.8092901595234544, + "learning_rate": 1.9643603836666527e-09, + "loss": 0.364, + "step": 3309 + }, + { + "epoch": 0.9880597014925373, + "grad_norm": 0.8618273355933869, + "learning_rate": 1.869718292701883e-09, + "loss": 0.3583, + "step": 3310 + }, + { + "epoch": 0.9883582089552239, + "grad_norm": 0.7424770027809615, + "learning_rate": 1.7774118926586204e-09, + "loss": 0.3108, + "step": 3311 + }, + { + "epoch": 0.9886567164179104, + "grad_norm": 0.8322502568478948, + "learning_rate": 1.6874412698408837e-09, + "loss": 0.3482, + "step": 3312 + }, + { + "epoch": 0.988955223880597, + "grad_norm": 0.767877374434005, + "learning_rate": 1.599806508368884e-09, + "loss": 0.2953, + "step": 3313 + }, + { + "epoch": 0.9892537313432835, + "grad_norm": 0.7552281111497002, + "learning_rate": 1.5145076901795785e-09, + "loss": 0.3104, + "step": 3314 + }, + { + "epoch": 0.9895522388059701, + "grad_norm": 0.8290814127495036, + "learning_rate": 1.431544895024728e-09, + "loss": 0.3276, + "step": 3315 + }, + { + "epoch": 0.9898507462686568, + "grad_norm": 0.9192486683083075, + "learning_rate": 1.3509182004725618e-09, + "loss": 0.3404, + "step": 3316 + }, + { + "epoch": 0.9901492537313433, + "grad_norm": 0.8117587672326521, + "learning_rate": 1.2726276819075012e-09, + "loss": 0.3382, + "step": 3317 + }, + { + "epoch": 0.9904477611940299, + "grad_norm": 0.8043944237738767, + "learning_rate": 1.1966734125287704e-09, + "loss": 0.3145, + "step": 3318 + }, + { + "epoch": 0.9907462686567164, + "grad_norm": 0.7769831757123559, + "learning_rate": 1.1230554633523406e-09, + "loss": 0.3126, + "step": 3319 + }, + { + "epoch": 0.991044776119403, + "grad_norm": 0.8223550217599328, + "learning_rate": 1.0517739032084307e-09, + "loss": 0.3554, + "step": 3320 + }, + { + "epoch": 0.9913432835820896, + "grad_norm": 0.7735518380562161, + "learning_rate": 9.828287987442842e-10, + "loss": 0.3166, + "step": 3321 + }, + { + "epoch": 0.9916417910447761, + "grad_norm": 0.8254997943791005, + "learning_rate": 9.162202144213927e-10, + "loss": 0.353, + "step": 3322 + }, + { + "epoch": 0.9919402985074627, + "grad_norm": 0.813538581959752, + "learning_rate": 8.519482125171618e-10, + "loss": 0.3559, + "step": 3323 + }, + { + "epoch": 0.9922388059701492, + "grad_norm": 0.7599430996322734, + "learning_rate": 7.900128531249107e-10, + "loss": 0.3131, + "step": 3324 + }, + { + "epoch": 0.9925373134328358, + "grad_norm": 0.8113270433764241, + "learning_rate": 7.304141941522069e-10, + "loss": 0.3595, + "step": 3325 + }, + { + "epoch": 0.9928358208955224, + "grad_norm": 0.9811336819537293, + "learning_rate": 6.731522913222544e-10, + "loss": 0.3199, + "step": 3326 + }, + { + "epoch": 0.9931343283582089, + "grad_norm": 0.8454076686303549, + "learning_rate": 6.18227198173893e-10, + "loss": 0.3376, + "step": 3327 + }, + { + "epoch": 0.9934328358208955, + "grad_norm": 0.8629013881810315, + "learning_rate": 5.656389660604888e-10, + "loss": 0.3391, + "step": 3328 + }, + { + "epoch": 0.993731343283582, + "grad_norm": 0.9074954180694659, + "learning_rate": 5.153876441510441e-10, + "loss": 0.3412, + "step": 3329 + }, + { + "epoch": 0.9940298507462687, + "grad_norm": 0.8314510642754133, + "learning_rate": 4.674732794288095e-10, + "loss": 0.3593, + "step": 3330 + }, + { + "epoch": 0.9943283582089553, + "grad_norm": 0.9116668780870614, + "learning_rate": 4.2189591669322684e-10, + "loss": 0.3986, + "step": 3331 + }, + { + "epoch": 0.9946268656716418, + "grad_norm": 0.7948460052793381, + "learning_rate": 3.786555985574314e-10, + "loss": 0.2573, + "step": 3332 + }, + { + "epoch": 0.9949253731343284, + "grad_norm": 0.7312524937623769, + "learning_rate": 3.3775236545019464e-10, + "loss": 0.298, + "step": 3333 + }, + { + "epoch": 0.9952238805970149, + "grad_norm": 0.7302545942476094, + "learning_rate": 2.9918625561536907e-10, + "loss": 0.3069, + "step": 3334 + }, + { + "epoch": 0.9955223880597015, + "grad_norm": 0.8697911226163374, + "learning_rate": 2.6295730511105564e-10, + "loss": 0.3235, + "step": 3335 + }, + { + "epoch": 0.9958208955223881, + "grad_norm": 0.8879989503033112, + "learning_rate": 2.2906554781043645e-10, + "loss": 0.3554, + "step": 3336 + }, + { + "epoch": 0.9961194029850746, + "grad_norm": 0.7574075071972748, + "learning_rate": 1.9751101540149697e-10, + "loss": 0.3168, + "step": 3337 + }, + { + "epoch": 0.9964179104477612, + "grad_norm": 0.9752022971438031, + "learning_rate": 1.6829373738702636e-10, + "loss": 0.3387, + "step": 3338 + }, + { + "epoch": 0.9967164179104477, + "grad_norm": 0.7698823309368659, + "learning_rate": 1.4141374108433968e-10, + "loss": 0.2791, + "step": 3339 + }, + { + "epoch": 0.9970149253731343, + "grad_norm": 0.8921339982339014, + "learning_rate": 1.1687105162583311e-10, + "loss": 0.3688, + "step": 3340 + }, + { + "epoch": 0.9973134328358209, + "grad_norm": 0.7650117442391594, + "learning_rate": 9.466569195787367e-11, + "loss": 0.3043, + "step": 3341 + }, + { + "epoch": 0.9976119402985074, + "grad_norm": 0.8092924984819057, + "learning_rate": 7.479768284246458e-11, + "loss": 0.3063, + "step": 3342 + }, + { + "epoch": 0.997910447761194, + "grad_norm": 0.7743808795054051, + "learning_rate": 5.726704285530238e-11, + "loss": 0.3209, + "step": 3343 + }, + { + "epoch": 0.9982089552238806, + "grad_norm": 0.8125424691553139, + "learning_rate": 4.207378838744225e-11, + "loss": 0.3303, + "step": 3344 + }, + { + "epoch": 0.9985074626865672, + "grad_norm": 0.814816088361442, + "learning_rate": 2.9217933643910236e-11, + "loss": 0.33, + "step": 3345 + }, + { + "epoch": 0.9988059701492538, + "grad_norm": 0.7687063639245223, + "learning_rate": 1.8699490644813467e-11, + "loss": 0.2951, + "step": 3346 + }, + { + "epoch": 0.9991044776119403, + "grad_norm": 0.8274962606742341, + "learning_rate": 1.05184692245075e-11, + "loss": 0.2992, + "step": 3347 + }, + { + "epoch": 0.9994029850746269, + "grad_norm": 0.7641695113854454, + "learning_rate": 4.6748770321514145e-12, + "loss": 0.3547, + "step": 3348 + }, + { + "epoch": 0.9997014925373134, + "grad_norm": 0.7988942617182022, + "learning_rate": 1.1687195311527176e-12, + "loss": 0.3496, + "step": 3349 + }, + { + "epoch": 1.0, + "grad_norm": 0.8568561243747301, + "learning_rate": 0.0, + "loss": 0.3795, + "step": 3350 + }, + { + "epoch": 1.0, + "step": 3350, + "total_flos": 2762833588060160.0, + "train_loss": 0.3609271458858874, + "train_runtime": 54753.0786, + "train_samples_per_second": 5.873, + "train_steps_per_second": 0.061 + } + ], + "logging_steps": 1.0, + "max_steps": 3350, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2762833588060160.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": null +}