| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0002126483222047, |
| "eval_steps": 500, |
| "global_step": 1470, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.000680474631055161, |
| "grad_norm": 0.3922363365520713, |
| "learning_rate": 3.3333333333333335e-07, |
| "loss": 1.4926, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.001360949262110322, |
| "grad_norm": 0.3852464156727402, |
| "learning_rate": 6.666666666666667e-07, |
| "loss": 1.4852, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.002041423893165483, |
| "grad_norm": 0.3802201192349553, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.4661, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.002721898524220644, |
| "grad_norm": 0.33400621697524313, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": 1.3367, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.003402373155275805, |
| "grad_norm": 0.27651807402246553, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.4286, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.004082847786330966, |
| "grad_norm": 0.23107642417160407, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.3464, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.004763322417386127, |
| "grad_norm": 0.12939937756268355, |
| "learning_rate": 2.3333333333333336e-06, |
| "loss": 1.378, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.005443797048441288, |
| "grad_norm": 0.10378812673486397, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 1.3512, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.006124271679496448, |
| "grad_norm": 0.08387789988316384, |
| "learning_rate": 3e-06, |
| "loss": 1.2501, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00680474631055161, |
| "grad_norm": 0.1018334804799599, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.2649, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.007485220941606771, |
| "grad_norm": 0.10048274036790146, |
| "learning_rate": 3.6666666666666666e-06, |
| "loss": 1.3417, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.008165695572661932, |
| "grad_norm": 0.08622738816646529, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.3535, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.008846170203717092, |
| "grad_norm": 0.1046684072379548, |
| "learning_rate": 4.333333333333334e-06, |
| "loss": 1.353, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.009526644834772254, |
| "grad_norm": 0.08990830971852798, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 1.298, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.010207119465827415, |
| "grad_norm": 0.08061928498710874, |
| "learning_rate": 5e-06, |
| "loss": 1.404, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.010887594096882575, |
| "grad_norm": 0.17008730521534624, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 1.2939, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.011568068727937737, |
| "grad_norm": 0.07599401871200454, |
| "learning_rate": 5.666666666666667e-06, |
| "loss": 1.3472, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.012248543358992897, |
| "grad_norm": 0.06793991432852008, |
| "learning_rate": 6e-06, |
| "loss": 1.3321, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.012929017990048058, |
| "grad_norm": 0.06953577040604254, |
| "learning_rate": 6.333333333333333e-06, |
| "loss": 1.2434, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01360949262110322, |
| "grad_norm": 0.06821163536615973, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.2727, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01428996725215838, |
| "grad_norm": 0.09241778948737919, |
| "learning_rate": 7e-06, |
| "loss": 1.321, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.014970441883213541, |
| "grad_norm": 0.0651787370812163, |
| "learning_rate": 7.333333333333333e-06, |
| "loss": 1.2779, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0156509165142687, |
| "grad_norm": 0.11311481394537864, |
| "learning_rate": 7.666666666666667e-06, |
| "loss": 1.2984, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.016331391145323865, |
| "grad_norm": 0.06144410930285514, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1.265, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.017011865776379025, |
| "grad_norm": 0.06635273793167143, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 1.2723, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.017692340407434184, |
| "grad_norm": 0.06955837771876457, |
| "learning_rate": 8.666666666666668e-06, |
| "loss": 1.2648, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.018372815038489348, |
| "grad_norm": 0.06124084590785051, |
| "learning_rate": 9e-06, |
| "loss": 1.2987, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.019053289669544508, |
| "grad_norm": 0.05644177076460923, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 1.3354, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.019733764300599668, |
| "grad_norm": 0.11074686439531949, |
| "learning_rate": 9.666666666666667e-06, |
| "loss": 1.2433, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.02041423893165483, |
| "grad_norm": 0.056636755992912104, |
| "learning_rate": 1e-05, |
| "loss": 1.2879, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02109471356270999, |
| "grad_norm": 0.05782845307576006, |
| "learning_rate": 9.99999708223099e-06, |
| "loss": 1.3396, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.02177518819376515, |
| "grad_norm": 0.05482925037031388, |
| "learning_rate": 9.999988328927362e-06, |
| "loss": 1.2671, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.02245566282482031, |
| "grad_norm": 0.06210231699691654, |
| "learning_rate": 9.999973740099334e-06, |
| "loss": 1.2895, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.023136137455875474, |
| "grad_norm": 0.05493409882469449, |
| "learning_rate": 9.999953315763929e-06, |
| "loss": 1.3145, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.023816612086930634, |
| "grad_norm": 0.05683824350074583, |
| "learning_rate": 9.99992705594499e-06, |
| "loss": 1.2525, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.024497086717985794, |
| "grad_norm": 0.04839156028188221, |
| "learning_rate": 9.999894960673162e-06, |
| "loss": 1.2359, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.025177561349040957, |
| "grad_norm": 0.046656613876928314, |
| "learning_rate": 9.999857029985903e-06, |
| "loss": 1.2446, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.025858035980096117, |
| "grad_norm": 0.05183835608104621, |
| "learning_rate": 9.999813263927483e-06, |
| "loss": 1.2929, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.026538510611151277, |
| "grad_norm": 0.04777762814434946, |
| "learning_rate": 9.999763662548982e-06, |
| "loss": 1.2444, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.02721898524220644, |
| "grad_norm": 0.05059603999112179, |
| "learning_rate": 9.999708225908292e-06, |
| "loss": 1.1663, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0278994598732616, |
| "grad_norm": 0.045382847875702514, |
| "learning_rate": 9.99964695407011e-06, |
| "loss": 1.2801, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.02857993450431676, |
| "grad_norm": 0.04343908128703206, |
| "learning_rate": 9.999579847105947e-06, |
| "loss": 1.185, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.029260409135371923, |
| "grad_norm": 0.0489584485386619, |
| "learning_rate": 9.999506905094128e-06, |
| "loss": 1.2427, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.029940883766427083, |
| "grad_norm": 0.04731927561730429, |
| "learning_rate": 9.999428128119779e-06, |
| "loss": 1.3339, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.030621358397482243, |
| "grad_norm": 0.04807552297741366, |
| "learning_rate": 9.999343516274844e-06, |
| "loss": 1.2842, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0313018330285374, |
| "grad_norm": 0.046345202194362445, |
| "learning_rate": 9.999253069658074e-06, |
| "loss": 1.2616, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03198230765959256, |
| "grad_norm": 0.0454525828723615, |
| "learning_rate": 9.999156788375033e-06, |
| "loss": 1.2253, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.03266278229064773, |
| "grad_norm": 0.04821779562867459, |
| "learning_rate": 9.999054672538085e-06, |
| "loss": 1.2836, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03334325692170289, |
| "grad_norm": 0.0646325797782212, |
| "learning_rate": 9.998946722266415e-06, |
| "loss": 1.274, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.03402373155275805, |
| "grad_norm": 0.049126858090645584, |
| "learning_rate": 9.99883293768601e-06, |
| "loss": 1.1964, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03470420618381321, |
| "grad_norm": 0.07363174361547103, |
| "learning_rate": 9.998713318929672e-06, |
| "loss": 1.3436, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.03538468081486837, |
| "grad_norm": 0.04548345898433307, |
| "learning_rate": 9.998587866137005e-06, |
| "loss": 1.2717, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.03606515544592353, |
| "grad_norm": 0.046133384420950434, |
| "learning_rate": 9.99845657945443e-06, |
| "loss": 1.1653, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.036745630076978696, |
| "grad_norm": 0.049760880417649325, |
| "learning_rate": 9.998319459035168e-06, |
| "loss": 1.2607, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.037426104708033855, |
| "grad_norm": 0.11363675889769449, |
| "learning_rate": 9.998176505039257e-06, |
| "loss": 1.2633, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.038106579339089015, |
| "grad_norm": 0.046032893223258195, |
| "learning_rate": 9.998027717633539e-06, |
| "loss": 1.1964, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.038787053970144175, |
| "grad_norm": 0.04922590729969489, |
| "learning_rate": 9.997873096991663e-06, |
| "loss": 1.2569, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.039467528601199335, |
| "grad_norm": 0.057702657772928254, |
| "learning_rate": 9.997712643294093e-06, |
| "loss": 1.2314, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.040148003232254495, |
| "grad_norm": 0.044442212757030586, |
| "learning_rate": 9.99754635672809e-06, |
| "loss": 1.1801, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.04082847786330966, |
| "grad_norm": 0.046764303530808723, |
| "learning_rate": 9.997374237487729e-06, |
| "loss": 1.1928, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04150895249436482, |
| "grad_norm": 0.12382314732314198, |
| "learning_rate": 9.997196285773894e-06, |
| "loss": 1.2907, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.04218942712541998, |
| "grad_norm": 0.0460098561617423, |
| "learning_rate": 9.997012501794273e-06, |
| "loss": 1.2551, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04286990175647514, |
| "grad_norm": 0.05635815384217967, |
| "learning_rate": 9.996822885763364e-06, |
| "loss": 1.2534, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0435503763875303, |
| "grad_norm": 0.054155741053703646, |
| "learning_rate": 9.996627437902465e-06, |
| "loss": 1.2121, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04423085101858546, |
| "grad_norm": 0.05278557656770515, |
| "learning_rate": 9.996426158439685e-06, |
| "loss": 1.2403, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04491132564964062, |
| "grad_norm": 0.04559523672664366, |
| "learning_rate": 9.996219047609943e-06, |
| "loss": 1.2213, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.04559180028069579, |
| "grad_norm": 0.05817741367927157, |
| "learning_rate": 9.996006105654955e-06, |
| "loss": 1.215, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.04627227491175095, |
| "grad_norm": 0.053498531951879036, |
| "learning_rate": 9.99578733282325e-06, |
| "loss": 1.1988, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.04695274954280611, |
| "grad_norm": 0.04777681754375196, |
| "learning_rate": 9.995562729370158e-06, |
| "loss": 1.2246, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.04763322417386127, |
| "grad_norm": 0.05565357509635904, |
| "learning_rate": 9.995332295557818e-06, |
| "loss": 1.2654, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04831369880491643, |
| "grad_norm": 0.06680224945846398, |
| "learning_rate": 9.995096031655167e-06, |
| "loss": 1.2684, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.04899417343597159, |
| "grad_norm": 0.048424669107112364, |
| "learning_rate": 9.994853937937954e-06, |
| "loss": 1.2133, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.049674648067026754, |
| "grad_norm": 0.04928739246746449, |
| "learning_rate": 9.994606014688726e-06, |
| "loss": 1.2882, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.050355122698081914, |
| "grad_norm": 0.055922248042688465, |
| "learning_rate": 9.994352262196839e-06, |
| "loss": 1.266, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.051035597329137074, |
| "grad_norm": 0.057124012344938746, |
| "learning_rate": 9.994092680758443e-06, |
| "loss": 1.2561, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.051716071960192234, |
| "grad_norm": 0.04679564952989396, |
| "learning_rate": 9.993827270676507e-06, |
| "loss": 1.2215, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.052396546591247393, |
| "grad_norm": 0.05061952666080301, |
| "learning_rate": 9.993556032260785e-06, |
| "loss": 1.2794, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.05307702122230255, |
| "grad_norm": 0.0692075710954883, |
| "learning_rate": 9.993278965827844e-06, |
| "loss": 1.2755, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.05375749585335772, |
| "grad_norm": 0.056697791581012555, |
| "learning_rate": 9.992996071701052e-06, |
| "loss": 1.1994, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.05443797048441288, |
| "grad_norm": 0.042996286514274486, |
| "learning_rate": 9.992707350210577e-06, |
| "loss": 1.2539, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05511844511546804, |
| "grad_norm": 0.04485835290046384, |
| "learning_rate": 9.992412801693385e-06, |
| "loss": 1.2139, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0557989197465232, |
| "grad_norm": 0.04908203465634852, |
| "learning_rate": 9.992112426493247e-06, |
| "loss": 1.1792, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.05647939437757836, |
| "grad_norm": 0.05456588235730342, |
| "learning_rate": 9.991806224960736e-06, |
| "loss": 1.2868, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.05715986900863352, |
| "grad_norm": 0.0545054800403904, |
| "learning_rate": 9.991494197453219e-06, |
| "loss": 1.2093, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.05784034363968868, |
| "grad_norm": 0.0661242392524205, |
| "learning_rate": 9.991176344334866e-06, |
| "loss": 1.2699, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.058520818270743846, |
| "grad_norm": 0.05577376587360494, |
| "learning_rate": 9.990852665976648e-06, |
| "loss": 1.1748, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.059201292901799006, |
| "grad_norm": 0.04479015473245748, |
| "learning_rate": 9.990523162756329e-06, |
| "loss": 1.1457, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.059881767532854166, |
| "grad_norm": 0.05030583354226186, |
| "learning_rate": 9.990187835058475e-06, |
| "loss": 1.2363, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.060562242163909326, |
| "grad_norm": 0.04663224750040068, |
| "learning_rate": 9.989846683274453e-06, |
| "loss": 1.2362, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.061242716794964486, |
| "grad_norm": 0.06233780842730507, |
| "learning_rate": 9.989499707802424e-06, |
| "loss": 1.2263, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.061923191426019646, |
| "grad_norm": 0.10496041411997244, |
| "learning_rate": 9.989146909047341e-06, |
| "loss": 1.2967, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0626036660570748, |
| "grad_norm": 0.04543916977442651, |
| "learning_rate": 9.988788287420961e-06, |
| "loss": 1.2112, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.06328414068812997, |
| "grad_norm": 0.06986447974229824, |
| "learning_rate": 9.988423843341834e-06, |
| "loss": 1.2516, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.06396461531918513, |
| "grad_norm": 0.0496941211098862, |
| "learning_rate": 9.988053577235306e-06, |
| "loss": 1.1914, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.06464508995024029, |
| "grad_norm": 0.05228331387236259, |
| "learning_rate": 9.987677489533516e-06, |
| "loss": 1.1709, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.06532556458129546, |
| "grad_norm": 0.04354293223274572, |
| "learning_rate": 9.987295580675398e-06, |
| "loss": 1.2004, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.06600603921235061, |
| "grad_norm": 0.06710084618116975, |
| "learning_rate": 9.986907851106684e-06, |
| "loss": 1.3077, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.06668651384340578, |
| "grad_norm": 0.04143778101716894, |
| "learning_rate": 9.986514301279894e-06, |
| "loss": 1.2371, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.06736698847446093, |
| "grad_norm": 0.047100741300005265, |
| "learning_rate": 9.986114931654343e-06, |
| "loss": 1.1714, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0680474631055161, |
| "grad_norm": 0.04613376762288349, |
| "learning_rate": 9.985709742696138e-06, |
| "loss": 1.2521, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06872793773657127, |
| "grad_norm": 0.0699090612598444, |
| "learning_rate": 9.985298734878179e-06, |
| "loss": 1.2199, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.06940841236762642, |
| "grad_norm": 0.04465069136936077, |
| "learning_rate": 9.984881908680157e-06, |
| "loss": 1.1824, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.07008888699868158, |
| "grad_norm": 0.04424081951286644, |
| "learning_rate": 9.984459264588551e-06, |
| "loss": 1.268, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.07076936162973674, |
| "grad_norm": 0.050746883751994334, |
| "learning_rate": 9.984030803096633e-06, |
| "loss": 1.1606, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0714498362607919, |
| "grad_norm": 0.04390734965937593, |
| "learning_rate": 9.983596524704466e-06, |
| "loss": 1.2813, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.07213031089184706, |
| "grad_norm": 0.05194970705074101, |
| "learning_rate": 9.983156429918895e-06, |
| "loss": 1.1676, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.07281078552290222, |
| "grad_norm": 0.05135562045759976, |
| "learning_rate": 9.982710519253563e-06, |
| "loss": 1.2715, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.07349126015395739, |
| "grad_norm": 0.04499333459904921, |
| "learning_rate": 9.982258793228889e-06, |
| "loss": 1.277, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.07417173478501254, |
| "grad_norm": 0.049375961854300106, |
| "learning_rate": 9.981801252372094e-06, |
| "loss": 1.2406, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.07485220941606771, |
| "grad_norm": 0.04974189474524789, |
| "learning_rate": 9.981337897217171e-06, |
| "loss": 1.2534, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07553268404712286, |
| "grad_norm": 0.10352651592275892, |
| "learning_rate": 9.98086872830491e-06, |
| "loss": 1.2137, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.07621315867817803, |
| "grad_norm": 0.0424449387883707, |
| "learning_rate": 9.98039374618288e-06, |
| "loss": 1.2032, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.07689363330923318, |
| "grad_norm": 0.05251256238992522, |
| "learning_rate": 9.979912951405433e-06, |
| "loss": 1.2882, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.07757410794028835, |
| "grad_norm": 0.045005876875762664, |
| "learning_rate": 9.979426344533712e-06, |
| "loss": 1.193, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.07825458257134352, |
| "grad_norm": 0.04628698168330066, |
| "learning_rate": 9.978933926135637e-06, |
| "loss": 1.208, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.07893505720239867, |
| "grad_norm": 0.049788103173579595, |
| "learning_rate": 9.978435696785918e-06, |
| "loss": 1.2284, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.07961553183345384, |
| "grad_norm": 0.06249884166750058, |
| "learning_rate": 9.977931657066035e-06, |
| "loss": 1.2145, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.08029600646450899, |
| "grad_norm": 0.05637445736291963, |
| "learning_rate": 9.977421807564264e-06, |
| "loss": 1.215, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.08097648109556416, |
| "grad_norm": 0.10253444716580337, |
| "learning_rate": 9.97690614887565e-06, |
| "loss": 1.2171, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.08165695572661932, |
| "grad_norm": 0.07539804013220634, |
| "learning_rate": 9.976384681602023e-06, |
| "loss": 1.2934, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08233743035767448, |
| "grad_norm": 0.04739495861507311, |
| "learning_rate": 9.975857406351989e-06, |
| "loss": 1.2177, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.08301790498872964, |
| "grad_norm": 0.05116793533886343, |
| "learning_rate": 9.97532432374094e-06, |
| "loss": 1.2643, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.0836983796197848, |
| "grad_norm": 0.046820360115744135, |
| "learning_rate": 9.974785434391039e-06, |
| "loss": 1.2435, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.08437885425083996, |
| "grad_norm": 0.04507944227068329, |
| "learning_rate": 9.974240738931224e-06, |
| "loss": 1.2415, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.08505932888189512, |
| "grad_norm": 0.04090241249528373, |
| "learning_rate": 9.973690237997219e-06, |
| "loss": 1.2593, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.08573980351295028, |
| "grad_norm": 0.04479964424134036, |
| "learning_rate": 9.973133932231514e-06, |
| "loss": 1.2366, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.08642027814400545, |
| "grad_norm": 0.045830171128613484, |
| "learning_rate": 9.972571822283377e-06, |
| "loss": 1.1708, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0871007527750606, |
| "grad_norm": 0.04542526816421652, |
| "learning_rate": 9.972003908808854e-06, |
| "loss": 1.1654, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.08778122740611577, |
| "grad_norm": 0.04442527448494435, |
| "learning_rate": 9.97143019247076e-06, |
| "loss": 1.1908, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.08846170203717092, |
| "grad_norm": 0.047830426000382995, |
| "learning_rate": 9.970850673938684e-06, |
| "loss": 1.2238, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.08914217666822609, |
| "grad_norm": 0.04634296002464362, |
| "learning_rate": 9.970265353888984e-06, |
| "loss": 1.2184, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.08982265129928124, |
| "grad_norm": 0.06603137079206937, |
| "learning_rate": 9.969674233004794e-06, |
| "loss": 1.2198, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.09050312593033641, |
| "grad_norm": 0.05155382756399447, |
| "learning_rate": 9.969077311976017e-06, |
| "loss": 1.2494, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.09118360056139158, |
| "grad_norm": 0.04496152538308371, |
| "learning_rate": 9.96847459149932e-06, |
| "loss": 1.2012, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.09186407519244673, |
| "grad_norm": 0.06359552431281164, |
| "learning_rate": 9.967866072278143e-06, |
| "loss": 1.2318, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.0925445498235019, |
| "grad_norm": 0.04505084814063137, |
| "learning_rate": 9.967251755022697e-06, |
| "loss": 1.1958, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.09322502445455705, |
| "grad_norm": 0.05653878193789085, |
| "learning_rate": 9.966631640449957e-06, |
| "loss": 1.1779, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.09390549908561222, |
| "grad_norm": 0.050975422603819855, |
| "learning_rate": 9.966005729283658e-06, |
| "loss": 1.2907, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.09458597371666738, |
| "grad_norm": 0.04751783774889539, |
| "learning_rate": 9.965374022254308e-06, |
| "loss": 1.2631, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.09526644834772253, |
| "grad_norm": 0.05430873799441208, |
| "learning_rate": 9.96473652009918e-06, |
| "loss": 1.2217, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0959469229787777, |
| "grad_norm": 0.04302277589419479, |
| "learning_rate": 9.964093223562303e-06, |
| "loss": 1.2189, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.09662739760983285, |
| "grad_norm": 0.05547820845328659, |
| "learning_rate": 9.963444133394478e-06, |
| "loss": 1.1957, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.09730787224088802, |
| "grad_norm": 0.046119760443999236, |
| "learning_rate": 9.96278925035326e-06, |
| "loss": 1.1941, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.09798834687194317, |
| "grad_norm": 0.0425094476231613, |
| "learning_rate": 9.962128575202967e-06, |
| "loss": 1.2133, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.09866882150299834, |
| "grad_norm": 0.05969426342259366, |
| "learning_rate": 9.961462108714682e-06, |
| "loss": 1.2265, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.09934929613405351, |
| "grad_norm": 0.05148433733859586, |
| "learning_rate": 9.960789851666237e-06, |
| "loss": 1.1851, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.10002977076510866, |
| "grad_norm": 0.047879792473061435, |
| "learning_rate": 9.960111804842236e-06, |
| "loss": 1.1993, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.10071024539616383, |
| "grad_norm": 0.043575578986480415, |
| "learning_rate": 9.959427969034025e-06, |
| "loss": 1.2409, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.10139072002721898, |
| "grad_norm": 0.04853533402339514, |
| "learning_rate": 9.95873834503972e-06, |
| "loss": 1.18, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.10207119465827415, |
| "grad_norm": 0.04148668368535789, |
| "learning_rate": 9.958042933664186e-06, |
| "loss": 1.2072, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1027516692893293, |
| "grad_norm": 0.050262677145106, |
| "learning_rate": 9.957341735719038e-06, |
| "loss": 1.2421, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.10343214392038447, |
| "grad_norm": 0.05381296667313772, |
| "learning_rate": 9.956634752022651e-06, |
| "loss": 1.1788, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.10411261855143963, |
| "grad_norm": 0.0460656819463052, |
| "learning_rate": 9.955921983400154e-06, |
| "loss": 1.268, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.10479309318249479, |
| "grad_norm": 0.04629006602169957, |
| "learning_rate": 9.955203430683425e-06, |
| "loss": 1.2141, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.10547356781354995, |
| "grad_norm": 0.10610645634311688, |
| "learning_rate": 9.954479094711087e-06, |
| "loss": 1.1513, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.1061540424446051, |
| "grad_norm": 0.047759609676506984, |
| "learning_rate": 9.953748976328524e-06, |
| "loss": 1.2524, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.10683451707566027, |
| "grad_norm": 0.06066630318457405, |
| "learning_rate": 9.95301307638786e-06, |
| "loss": 1.2156, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.10751499170671544, |
| "grad_norm": 0.060137314636137626, |
| "learning_rate": 9.952271395747969e-06, |
| "loss": 1.2911, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.1081954663377706, |
| "grad_norm": 0.04525634411513257, |
| "learning_rate": 9.951523935274472e-06, |
| "loss": 1.2769, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.10887594096882576, |
| "grad_norm": 0.08126804083378383, |
| "learning_rate": 9.950770695839737e-06, |
| "loss": 1.2353, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.10955641559988091, |
| "grad_norm": 0.041722971864926184, |
| "learning_rate": 9.950011678322874e-06, |
| "loss": 1.242, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.11023689023093608, |
| "grad_norm": 0.051351975206530606, |
| "learning_rate": 9.949246883609743e-06, |
| "loss": 1.2496, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.11091736486199123, |
| "grad_norm": 0.04577763125389706, |
| "learning_rate": 9.948476312592934e-06, |
| "loss": 1.1765, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.1115978394930464, |
| "grad_norm": 0.04938183947356605, |
| "learning_rate": 9.94769996617179e-06, |
| "loss": 1.2442, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.11227831412410157, |
| "grad_norm": 0.055581793213831124, |
| "learning_rate": 9.946917845252394e-06, |
| "loss": 1.2078, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.11295878875515672, |
| "grad_norm": 0.044103376011085695, |
| "learning_rate": 9.94612995074756e-06, |
| "loss": 1.2132, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.11363926338621189, |
| "grad_norm": 0.04664849459604924, |
| "learning_rate": 9.945336283576849e-06, |
| "loss": 1.2146, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.11431973801726704, |
| "grad_norm": 0.04580570356372435, |
| "learning_rate": 9.944536844666554e-06, |
| "loss": 1.2057, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.1150002126483222, |
| "grad_norm": 0.06380708184717693, |
| "learning_rate": 9.943731634949706e-06, |
| "loss": 1.1959, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.11568068727937736, |
| "grad_norm": 0.04507640293433379, |
| "learning_rate": 9.942920655366075e-06, |
| "loss": 1.217, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.11636116191043253, |
| "grad_norm": 0.04623512542605048, |
| "learning_rate": 9.942103906862158e-06, |
| "loss": 1.256, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.11704163654148769, |
| "grad_norm": 0.05119569489839227, |
| "learning_rate": 9.941281390391189e-06, |
| "loss": 1.2385, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.11772211117254285, |
| "grad_norm": 0.04129595751504458, |
| "learning_rate": 9.940453106913133e-06, |
| "loss": 1.1411, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.11840258580359801, |
| "grad_norm": 0.050324914581852453, |
| "learning_rate": 9.939619057394687e-06, |
| "loss": 1.1612, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.11908306043465317, |
| "grad_norm": 0.04642915301072864, |
| "learning_rate": 9.938779242809275e-06, |
| "loss": 1.2602, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.11976353506570833, |
| "grad_norm": 0.06544881435642524, |
| "learning_rate": 9.937933664137054e-06, |
| "loss": 1.2322, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.1204440096967635, |
| "grad_norm": 0.044569198571487, |
| "learning_rate": 9.937082322364901e-06, |
| "loss": 1.1582, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.12112448432781865, |
| "grad_norm": 0.044360855871756734, |
| "learning_rate": 9.936225218486428e-06, |
| "loss": 1.1625, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.12180495895887382, |
| "grad_norm": 0.08199554083425169, |
| "learning_rate": 9.935362353501964e-06, |
| "loss": 1.2174, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.12248543358992897, |
| "grad_norm": 0.05309082729598163, |
| "learning_rate": 9.934493728418567e-06, |
| "loss": 1.2118, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.12316590822098414, |
| "grad_norm": 0.041864080294537066, |
| "learning_rate": 9.933619344250015e-06, |
| "loss": 1.1722, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.12384638285203929, |
| "grad_norm": 0.04446226582689757, |
| "learning_rate": 9.93273920201681e-06, |
| "loss": 1.2495, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.12452685748309446, |
| "grad_norm": 0.047990304215477446, |
| "learning_rate": 9.931853302746169e-06, |
| "loss": 1.234, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.1252073321141496, |
| "grad_norm": 0.04212367388032856, |
| "learning_rate": 9.930961647472038e-06, |
| "loss": 1.1731, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.12588780674520478, |
| "grad_norm": 0.06449124084893802, |
| "learning_rate": 9.930064237235068e-06, |
| "loss": 1.2172, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.12656828137625994, |
| "grad_norm": 0.08477554547026132, |
| "learning_rate": 9.929161073082636e-06, |
| "loss": 1.2148, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.1272487560073151, |
| "grad_norm": 0.052427645171519884, |
| "learning_rate": 9.928252156068834e-06, |
| "loss": 1.1658, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.12792923063837025, |
| "grad_norm": 0.047378342779356215, |
| "learning_rate": 9.927337487254463e-06, |
| "loss": 1.2226, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.12860970526942542, |
| "grad_norm": 0.04679296935388863, |
| "learning_rate": 9.926417067707042e-06, |
| "loss": 1.1998, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.12929017990048058, |
| "grad_norm": 0.04455647990857127, |
| "learning_rate": 9.925490898500796e-06, |
| "loss": 1.2327, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12997065453153575, |
| "grad_norm": 0.044070283911945306, |
| "learning_rate": 9.92455898071667e-06, |
| "loss": 1.2344, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.13065112916259092, |
| "grad_norm": 0.04539911979386842, |
| "learning_rate": 9.923621315442307e-06, |
| "loss": 1.2403, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.13133160379364606, |
| "grad_norm": 0.0514164363522223, |
| "learning_rate": 9.922677903772064e-06, |
| "loss": 1.2672, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.13201207842470122, |
| "grad_norm": 0.09067422957089419, |
| "learning_rate": 9.921728746807008e-06, |
| "loss": 1.1436, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.1326925530557564, |
| "grad_norm": 0.045517924932712686, |
| "learning_rate": 9.920773845654904e-06, |
| "loss": 1.2372, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.13337302768681156, |
| "grad_norm": 0.05196755683813489, |
| "learning_rate": 9.919813201430224e-06, |
| "loss": 1.2409, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.13405350231786672, |
| "grad_norm": 0.05169407064154835, |
| "learning_rate": 9.918846815254145e-06, |
| "loss": 1.2079, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.13473397694892186, |
| "grad_norm": 0.04363688042005936, |
| "learning_rate": 9.917874688254542e-06, |
| "loss": 1.1753, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.13541445157997703, |
| "grad_norm": 0.04334945706219354, |
| "learning_rate": 9.916896821565993e-06, |
| "loss": 1.118, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.1360949262110322, |
| "grad_norm": 0.04552794490125636, |
| "learning_rate": 9.915913216329774e-06, |
| "loss": 1.2277, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13677540084208736, |
| "grad_norm": 0.05101757914560778, |
| "learning_rate": 9.914923873693857e-06, |
| "loss": 1.1945, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.13745587547314253, |
| "grad_norm": 0.048828585397579644, |
| "learning_rate": 9.913928794812909e-06, |
| "loss": 1.2251, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.13813635010419767, |
| "grad_norm": 0.04138830247218384, |
| "learning_rate": 9.9129279808483e-06, |
| "loss": 1.2602, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.13881682473525284, |
| "grad_norm": 0.057028225152475985, |
| "learning_rate": 9.911921432968084e-06, |
| "loss": 1.1871, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.139497299366308, |
| "grad_norm": 0.04860831660885843, |
| "learning_rate": 9.91090915234701e-06, |
| "loss": 1.1847, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.14017777399736317, |
| "grad_norm": 0.042541636250671894, |
| "learning_rate": 9.90989114016652e-06, |
| "loss": 1.2557, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1408582486284183, |
| "grad_norm": 0.04376838000627094, |
| "learning_rate": 9.908867397614744e-06, |
| "loss": 1.2278, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.14153872325947348, |
| "grad_norm": 0.0437006298461342, |
| "learning_rate": 9.907837925886498e-06, |
| "loss": 1.2318, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.14221919789052864, |
| "grad_norm": 0.0415712474078778, |
| "learning_rate": 9.906802726183287e-06, |
| "loss": 1.1363, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.1428996725215838, |
| "grad_norm": 0.04311235737970915, |
| "learning_rate": 9.905761799713302e-06, |
| "loss": 1.2332, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.14358014715263898, |
| "grad_norm": 0.042894303106773696, |
| "learning_rate": 9.904715147691414e-06, |
| "loss": 1.1575, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.14426062178369412, |
| "grad_norm": 0.05761703850924853, |
| "learning_rate": 9.90366277133918e-06, |
| "loss": 1.158, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.14494109641474928, |
| "grad_norm": 0.042582887002480536, |
| "learning_rate": 9.902604671884835e-06, |
| "loss": 1.1791, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.14562157104580445, |
| "grad_norm": 0.07215445203960888, |
| "learning_rate": 9.901540850563295e-06, |
| "loss": 1.2191, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.14630204567685962, |
| "grad_norm": 0.09549043059285575, |
| "learning_rate": 9.900471308616158e-06, |
| "loss": 1.2162, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.14698252030791478, |
| "grad_norm": 0.048833759803382525, |
| "learning_rate": 9.899396047291689e-06, |
| "loss": 1.2463, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.14766299493896992, |
| "grad_norm": 0.05937202127360593, |
| "learning_rate": 9.898315067844838e-06, |
| "loss": 1.2395, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.1483434695700251, |
| "grad_norm": 0.04988675707454313, |
| "learning_rate": 9.89722837153722e-06, |
| "loss": 1.2342, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.14902394420108026, |
| "grad_norm": 0.04301984592186057, |
| "learning_rate": 9.89613595963713e-06, |
| "loss": 1.2118, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.14970441883213542, |
| "grad_norm": 0.06245932533144223, |
| "learning_rate": 9.895037833419529e-06, |
| "loss": 1.1783, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1503848934631906, |
| "grad_norm": 0.07128420567084552, |
| "learning_rate": 9.893933994166047e-06, |
| "loss": 1.102, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.15106536809424573, |
| "grad_norm": 0.05079938460169106, |
| "learning_rate": 9.892824443164987e-06, |
| "loss": 1.2147, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.1517458427253009, |
| "grad_norm": 0.05144702512972884, |
| "learning_rate": 9.89170918171131e-06, |
| "loss": 1.2153, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.15242631735635606, |
| "grad_norm": 0.049782140308005665, |
| "learning_rate": 9.89058821110665e-06, |
| "loss": 1.2774, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.15310679198741123, |
| "grad_norm": 0.052741452049593476, |
| "learning_rate": 9.889461532659297e-06, |
| "loss": 1.2052, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.15378726661846637, |
| "grad_norm": 0.19564397372349063, |
| "learning_rate": 9.88832914768421e-06, |
| "loss": 1.2134, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.15446774124952153, |
| "grad_norm": 0.06625440045571748, |
| "learning_rate": 9.887191057503001e-06, |
| "loss": 1.1719, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1551482158805767, |
| "grad_norm": 0.04784858786092269, |
| "learning_rate": 9.886047263443943e-06, |
| "loss": 1.2114, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.15582869051163187, |
| "grad_norm": 0.05382621869617639, |
| "learning_rate": 9.884897766841967e-06, |
| "loss": 1.2844, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.15650916514268703, |
| "grad_norm": 0.05462666626066152, |
| "learning_rate": 9.883742569038663e-06, |
| "loss": 1.1639, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.15718963977374217, |
| "grad_norm": 0.05422579385130804, |
| "learning_rate": 9.882581671382267e-06, |
| "loss": 1.2443, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.15787011440479734, |
| "grad_norm": 0.04760945352489805, |
| "learning_rate": 9.881415075227674e-06, |
| "loss": 1.1816, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.1585505890358525, |
| "grad_norm": 0.044763636579990364, |
| "learning_rate": 9.880242781936426e-06, |
| "loss": 1.1893, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.15923106366690767, |
| "grad_norm": 0.06334514242152707, |
| "learning_rate": 9.879064792876717e-06, |
| "loss": 1.2101, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.15991153829796284, |
| "grad_norm": 0.04715853107602709, |
| "learning_rate": 9.877881109423383e-06, |
| "loss": 1.2493, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.16059201292901798, |
| "grad_norm": 0.052667670524755336, |
| "learning_rate": 9.876691732957913e-06, |
| "loss": 1.2147, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.16127248756007315, |
| "grad_norm": 0.05018947690087094, |
| "learning_rate": 9.875496664868437e-06, |
| "loss": 1.2356, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.1619529621911283, |
| "grad_norm": 0.04964589440245944, |
| "learning_rate": 9.874295906549728e-06, |
| "loss": 1.2312, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.16263343682218348, |
| "grad_norm": 0.10087586960994378, |
| "learning_rate": 9.8730894594032e-06, |
| "loss": 1.2022, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.16331391145323865, |
| "grad_norm": 0.06957648656259531, |
| "learning_rate": 9.871877324836906e-06, |
| "loss": 1.2022, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1639943860842938, |
| "grad_norm": 0.05476431304114644, |
| "learning_rate": 9.87065950426554e-06, |
| "loss": 1.2631, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.16467486071534895, |
| "grad_norm": 0.054645531400643356, |
| "learning_rate": 9.869435999110428e-06, |
| "loss": 1.175, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.16535533534640412, |
| "grad_norm": 0.04841286729948775, |
| "learning_rate": 9.868206810799532e-06, |
| "loss": 1.2281, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.1660358099774593, |
| "grad_norm": 0.06671782922853765, |
| "learning_rate": 9.866971940767447e-06, |
| "loss": 1.2494, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.16671628460851443, |
| "grad_norm": 0.05664726380742502, |
| "learning_rate": 9.865731390455398e-06, |
| "loss": 1.208, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.1673967592395696, |
| "grad_norm": 0.04523763510128454, |
| "learning_rate": 9.864485161311242e-06, |
| "loss": 1.1982, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.16807723387062476, |
| "grad_norm": 0.05127005431853173, |
| "learning_rate": 9.863233254789463e-06, |
| "loss": 1.1681, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.16875770850167993, |
| "grad_norm": 0.04424702438605936, |
| "learning_rate": 9.861975672351172e-06, |
| "loss": 1.1789, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1694381831327351, |
| "grad_norm": 0.05100375949853296, |
| "learning_rate": 9.860712415464097e-06, |
| "loss": 1.2632, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.17011865776379023, |
| "grad_norm": 0.05953363240829583, |
| "learning_rate": 9.859443485602603e-06, |
| "loss": 1.2008, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1707991323948454, |
| "grad_norm": 0.054042165361209704, |
| "learning_rate": 9.85816888424766e-06, |
| "loss": 1.235, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.17147960702590057, |
| "grad_norm": 0.04707724622685731, |
| "learning_rate": 9.856888612886872e-06, |
| "loss": 1.2035, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.17216008165695573, |
| "grad_norm": 0.047616861849738026, |
| "learning_rate": 9.855602673014448e-06, |
| "loss": 1.151, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.1728405562880109, |
| "grad_norm": 0.043705026997731614, |
| "learning_rate": 9.85431106613122e-06, |
| "loss": 1.1755, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.17352103091906604, |
| "grad_norm": 0.04245727665918003, |
| "learning_rate": 9.853013793744632e-06, |
| "loss": 1.1992, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1742015055501212, |
| "grad_norm": 0.06445634670806195, |
| "learning_rate": 9.851710857368741e-06, |
| "loss": 1.2532, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.17488198018117637, |
| "grad_norm": 0.04579696157112681, |
| "learning_rate": 9.850402258524215e-06, |
| "loss": 1.152, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.17556245481223154, |
| "grad_norm": 0.04700554126760869, |
| "learning_rate": 9.849087998738328e-06, |
| "loss": 1.1999, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1762429294432867, |
| "grad_norm": 0.05177514136462001, |
| "learning_rate": 9.847768079544962e-06, |
| "loss": 1.2184, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.17692340407434184, |
| "grad_norm": 0.0691656394965647, |
| "learning_rate": 9.846442502484608e-06, |
| "loss": 1.1522, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.177603878705397, |
| "grad_norm": 0.0448531515266734, |
| "learning_rate": 9.845111269104353e-06, |
| "loss": 1.1852, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.17828435333645218, |
| "grad_norm": 0.05284284218380461, |
| "learning_rate": 9.84377438095789e-06, |
| "loss": 1.2506, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.17896482796750735, |
| "grad_norm": 0.04821826066415016, |
| "learning_rate": 9.842431839605516e-06, |
| "loss": 1.1945, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.17964530259856248, |
| "grad_norm": 0.04937302535184858, |
| "learning_rate": 9.841083646614117e-06, |
| "loss": 1.2315, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.18032577722961765, |
| "grad_norm": 0.052269506285700276, |
| "learning_rate": 9.839729803557178e-06, |
| "loss": 1.1814, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.18100625186067282, |
| "grad_norm": 0.051431348651855786, |
| "learning_rate": 9.838370312014783e-06, |
| "loss": 1.2109, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.18168672649172798, |
| "grad_norm": 0.04675037466687018, |
| "learning_rate": 9.837005173573603e-06, |
| "loss": 1.2236, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.18236720112278315, |
| "grad_norm": 0.04876302678884042, |
| "learning_rate": 9.835634389826905e-06, |
| "loss": 1.213, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1830476757538383, |
| "grad_norm": 0.047487981056904216, |
| "learning_rate": 9.834257962374536e-06, |
| "loss": 1.1964, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.18372815038489346, |
| "grad_norm": 0.046084020203186686, |
| "learning_rate": 9.832875892822937e-06, |
| "loss": 1.1624, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.18440862501594862, |
| "grad_norm": 0.3042850666890626, |
| "learning_rate": 9.831488182785134e-06, |
| "loss": 1.1522, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.1850890996470038, |
| "grad_norm": 0.043991134946647886, |
| "learning_rate": 9.83009483388073e-06, |
| "loss": 1.1931, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.18576957427805896, |
| "grad_norm": 0.05453712505153417, |
| "learning_rate": 9.828695847735916e-06, |
| "loss": 1.1765, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.1864500489091141, |
| "grad_norm": 0.050719360959302975, |
| "learning_rate": 9.827291225983458e-06, |
| "loss": 1.1895, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.18713052354016926, |
| "grad_norm": 0.050213041260094814, |
| "learning_rate": 9.825880970262703e-06, |
| "loss": 1.1846, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.18781099817122443, |
| "grad_norm": 0.05430648584833771, |
| "learning_rate": 9.824465082219567e-06, |
| "loss": 1.1972, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.1884914728022796, |
| "grad_norm": 0.057977753358927374, |
| "learning_rate": 9.823043563506547e-06, |
| "loss": 1.1812, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.18917194743333476, |
| "grad_norm": 0.05570573203783343, |
| "learning_rate": 9.821616415782708e-06, |
| "loss": 1.207, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1898524220643899, |
| "grad_norm": 0.04558771138495919, |
| "learning_rate": 9.820183640713685e-06, |
| "loss": 1.204, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.19053289669544507, |
| "grad_norm": 0.07199615810821337, |
| "learning_rate": 9.818745239971679e-06, |
| "loss": 1.2522, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.19121337132650024, |
| "grad_norm": 0.042919127224821454, |
| "learning_rate": 9.817301215235459e-06, |
| "loss": 1.2091, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.1918938459575554, |
| "grad_norm": 0.05419946981753212, |
| "learning_rate": 9.815851568190358e-06, |
| "loss": 1.17, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.19257432058861054, |
| "grad_norm": 0.061238252534998636, |
| "learning_rate": 9.81439630052827e-06, |
| "loss": 1.2271, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1932547952196657, |
| "grad_norm": 0.04663661207277225, |
| "learning_rate": 9.812935413947649e-06, |
| "loss": 1.2129, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.19393526985072088, |
| "grad_norm": 0.047774001967259005, |
| "learning_rate": 9.811468910153507e-06, |
| "loss": 1.1284, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.19461574448177604, |
| "grad_norm": 0.05448933158184023, |
| "learning_rate": 9.80999679085741e-06, |
| "loss": 1.1589, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1952962191128312, |
| "grad_norm": 0.03907919503901335, |
| "learning_rate": 9.808519057777484e-06, |
| "loss": 1.2586, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.19597669374388635, |
| "grad_norm": 0.0429913843647421, |
| "learning_rate": 9.807035712638397e-06, |
| "loss": 1.2313, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.19665716837494152, |
| "grad_norm": 0.045825796250954225, |
| "learning_rate": 9.805546757171376e-06, |
| "loss": 1.2343, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.19733764300599668, |
| "grad_norm": 0.08101836247529706, |
| "learning_rate": 9.80405219311419e-06, |
| "loss": 1.1668, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.19801811763705185, |
| "grad_norm": 0.04192874905440864, |
| "learning_rate": 9.802552022211157e-06, |
| "loss": 1.2155, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.19869859226810702, |
| "grad_norm": 0.05504083460811146, |
| "learning_rate": 9.801046246213139e-06, |
| "loss": 1.1945, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.19937906689916216, |
| "grad_norm": 0.04839230603866535, |
| "learning_rate": 9.799534866877538e-06, |
| "loss": 1.2198, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.20005954153021732, |
| "grad_norm": 0.13950074251104766, |
| "learning_rate": 9.798017885968295e-06, |
| "loss": 1.2247, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.2007400161612725, |
| "grad_norm": 0.04898866813757472, |
| "learning_rate": 9.796495305255893e-06, |
| "loss": 1.1844, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.20142049079232766, |
| "grad_norm": 0.05470315462133326, |
| "learning_rate": 9.794967126517342e-06, |
| "loss": 1.1631, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.20210096542338282, |
| "grad_norm": 0.05224619111865233, |
| "learning_rate": 9.793433351536199e-06, |
| "loss": 1.1829, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.20278144005443796, |
| "grad_norm": 0.045803400776143144, |
| "learning_rate": 9.791893982102537e-06, |
| "loss": 1.1372, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.20346191468549313, |
| "grad_norm": 0.05008591147736681, |
| "learning_rate": 9.790349020012969e-06, |
| "loss": 1.2191, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.2041423893165483, |
| "grad_norm": 0.05594808080061322, |
| "learning_rate": 9.788798467070633e-06, |
| "loss": 1.2266, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.20482286394760346, |
| "grad_norm": 0.045045756947026755, |
| "learning_rate": 9.787242325085189e-06, |
| "loss": 1.224, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.2055033385786586, |
| "grad_norm": 0.04409673624514148, |
| "learning_rate": 9.785680595872824e-06, |
| "loss": 1.224, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.20618381320971377, |
| "grad_norm": 0.04074518560860722, |
| "learning_rate": 9.78411328125624e-06, |
| "loss": 1.1849, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.20686428784076893, |
| "grad_norm": 0.08415190666671774, |
| "learning_rate": 9.782540383064668e-06, |
| "loss": 1.1846, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.2075447624718241, |
| "grad_norm": 0.07213446530578103, |
| "learning_rate": 9.780961903133845e-06, |
| "loss": 1.1588, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.20822523710287927, |
| "grad_norm": 0.05122335062166945, |
| "learning_rate": 9.77937784330603e-06, |
| "loss": 1.1319, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.2089057117339344, |
| "grad_norm": 0.045982551577382964, |
| "learning_rate": 9.777788205429988e-06, |
| "loss": 1.1896, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.20958618636498957, |
| "grad_norm": 0.04972258893092704, |
| "learning_rate": 9.776192991360998e-06, |
| "loss": 1.2052, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.21026666099604474, |
| "grad_norm": 0.05070740014134389, |
| "learning_rate": 9.774592202960849e-06, |
| "loss": 1.1848, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.2109471356270999, |
| "grad_norm": 0.04060056294082241, |
| "learning_rate": 9.772985842097832e-06, |
| "loss": 1.1554, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.21162761025815507, |
| "grad_norm": 0.04286891666827905, |
| "learning_rate": 9.771373910646742e-06, |
| "loss": 1.1595, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.2123080848892102, |
| "grad_norm": 0.045021190520363845, |
| "learning_rate": 9.769756410488877e-06, |
| "loss": 1.1497, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.21298855952026538, |
| "grad_norm": 0.048611355949365866, |
| "learning_rate": 9.768133343512034e-06, |
| "loss": 1.2751, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.21366903415132055, |
| "grad_norm": 0.044831247346836245, |
| "learning_rate": 9.766504711610507e-06, |
| "loss": 1.1666, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.21434950878237571, |
| "grad_norm": 0.044769242161053394, |
| "learning_rate": 9.764870516685085e-06, |
| "loss": 1.175, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.21502998341343088, |
| "grad_norm": 0.06166834973309736, |
| "learning_rate": 9.763230760643048e-06, |
| "loss": 1.2267, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.21571045804448602, |
| "grad_norm": 0.045496850620496514, |
| "learning_rate": 9.761585445398168e-06, |
| "loss": 1.1725, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.2163909326755412, |
| "grad_norm": 0.050049929676678366, |
| "learning_rate": 9.759934572870706e-06, |
| "loss": 1.1582, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.21707140730659635, |
| "grad_norm": 0.04231191912768742, |
| "learning_rate": 9.758278144987408e-06, |
| "loss": 1.2227, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.21775188193765152, |
| "grad_norm": 0.332446688550663, |
| "learning_rate": 9.756616163681503e-06, |
| "loss": 1.1243, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.21843235656870666, |
| "grad_norm": 0.046179771954275556, |
| "learning_rate": 9.7549486308927e-06, |
| "loss": 1.2498, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.21911283119976183, |
| "grad_norm": 0.0623421181732995, |
| "learning_rate": 9.753275548567192e-06, |
| "loss": 1.2113, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.219793305830817, |
| "grad_norm": 0.053361270032192355, |
| "learning_rate": 9.751596918657646e-06, |
| "loss": 1.246, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.22047378046187216, |
| "grad_norm": 0.04867134806348243, |
| "learning_rate": 9.749912743123202e-06, |
| "loss": 1.1905, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.22115425509292733, |
| "grad_norm": 0.060255684810490574, |
| "learning_rate": 9.748223023929476e-06, |
| "loss": 1.1761, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.22183472972398247, |
| "grad_norm": 0.05715294954556239, |
| "learning_rate": 9.74652776304855e-06, |
| "loss": 1.2812, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.22251520435503763, |
| "grad_norm": 0.05164227157483807, |
| "learning_rate": 9.744826962458977e-06, |
| "loss": 1.1956, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.2231956789860928, |
| "grad_norm": 0.04774119620059092, |
| "learning_rate": 9.743120624145776e-06, |
| "loss": 1.1538, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.22387615361714797, |
| "grad_norm": 0.05129453385290744, |
| "learning_rate": 9.741408750100424e-06, |
| "loss": 1.1453, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.22455662824820313, |
| "grad_norm": 0.06734783625068147, |
| "learning_rate": 9.739691342320866e-06, |
| "loss": 1.2209, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.22523710287925827, |
| "grad_norm": 0.04789002077924269, |
| "learning_rate": 9.737968402811497e-06, |
| "loss": 1.2396, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.22591757751031344, |
| "grad_norm": 0.04367749038463707, |
| "learning_rate": 9.736239933583177e-06, |
| "loss": 1.2372, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.2265980521413686, |
| "grad_norm": 0.06025885176615586, |
| "learning_rate": 9.734505936653214e-06, |
| "loss": 1.1022, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.22727852677242377, |
| "grad_norm": 0.04799522348098899, |
| "learning_rate": 9.732766414045368e-06, |
| "loss": 1.2403, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.22795900140347894, |
| "grad_norm": 0.05320768993231738, |
| "learning_rate": 9.73102136778985e-06, |
| "loss": 1.1591, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.22863947603453408, |
| "grad_norm": 0.05659410183258992, |
| "learning_rate": 9.729270799923319e-06, |
| "loss": 1.1582, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.22931995066558925, |
| "grad_norm": 0.05562485812021002, |
| "learning_rate": 9.727514712488871e-06, |
| "loss": 1.1808, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.2300004252966444, |
| "grad_norm": 0.05688262826735776, |
| "learning_rate": 9.725753107536053e-06, |
| "loss": 1.2197, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.23068089992769958, |
| "grad_norm": 0.04336195320780082, |
| "learning_rate": 9.723985987120848e-06, |
| "loss": 1.2233, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.23136137455875472, |
| "grad_norm": 0.05006619094350356, |
| "learning_rate": 9.722213353305672e-06, |
| "loss": 1.2157, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.23204184918980988, |
| "grad_norm": 0.051087758971063546, |
| "learning_rate": 9.720435208159382e-06, |
| "loss": 1.2709, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.23272232382086505, |
| "grad_norm": 0.046987562802023916, |
| "learning_rate": 9.718651553757266e-06, |
| "loss": 1.2585, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.23340279845192022, |
| "grad_norm": 0.04766257543097715, |
| "learning_rate": 9.716862392181036e-06, |
| "loss": 1.2761, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.23408327308297538, |
| "grad_norm": 0.09361014366689609, |
| "learning_rate": 9.715067725518842e-06, |
| "loss": 1.1961, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.23476374771403052, |
| "grad_norm": 0.04360733792595929, |
| "learning_rate": 9.713267555865247e-06, |
| "loss": 1.1781, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.2354442223450857, |
| "grad_norm": 0.057158041969233964, |
| "learning_rate": 9.711461885321247e-06, |
| "loss": 1.2386, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.23612469697614086, |
| "grad_norm": 0.045100749986830585, |
| "learning_rate": 9.709650715994253e-06, |
| "loss": 1.1951, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.23680517160719602, |
| "grad_norm": 0.05740293932782018, |
| "learning_rate": 9.707834049998093e-06, |
| "loss": 1.1706, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.2374856462382512, |
| "grad_norm": 0.05431722318838959, |
| "learning_rate": 9.706011889453013e-06, |
| "loss": 1.1906, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.23816612086930633, |
| "grad_norm": 0.07193219027684113, |
| "learning_rate": 9.704184236485672e-06, |
| "loss": 1.1017, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2388465955003615, |
| "grad_norm": 0.05114798899715518, |
| "learning_rate": 9.702351093229133e-06, |
| "loss": 1.2499, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.23952707013141666, |
| "grad_norm": 0.04748191234685159, |
| "learning_rate": 9.700512461822875e-06, |
| "loss": 1.1908, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.24020754476247183, |
| "grad_norm": 0.08356574869948455, |
| "learning_rate": 9.69866834441278e-06, |
| "loss": 1.1585, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.240888019393527, |
| "grad_norm": 0.046493368628761834, |
| "learning_rate": 9.696818743151128e-06, |
| "loss": 1.1491, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.24156849402458214, |
| "grad_norm": 0.045313373883776854, |
| "learning_rate": 9.694963660196603e-06, |
| "loss": 1.1125, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.2422489686556373, |
| "grad_norm": 0.044233082198614604, |
| "learning_rate": 9.69310309771429e-06, |
| "loss": 1.2128, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.24292944328669247, |
| "grad_norm": 0.053393583715076506, |
| "learning_rate": 9.691237057875662e-06, |
| "loss": 1.1631, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.24360991791774764, |
| "grad_norm": 0.05491977568490273, |
| "learning_rate": 9.68936554285859e-06, |
| "loss": 1.1712, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.24429039254880278, |
| "grad_norm": 0.06425210798070492, |
| "learning_rate": 9.687488554847332e-06, |
| "loss": 1.184, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.24497086717985794, |
| "grad_norm": 0.062965712116679, |
| "learning_rate": 9.685606096032536e-06, |
| "loss": 1.1326, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2456513418109131, |
| "grad_norm": 0.048778316344698425, |
| "learning_rate": 9.683718168611233e-06, |
| "loss": 1.167, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.24633181644196828, |
| "grad_norm": 0.049233982480725684, |
| "learning_rate": 9.68182477478684e-06, |
| "loss": 1.2294, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.24701229107302344, |
| "grad_norm": 0.0443112999178113, |
| "learning_rate": 9.67992591676915e-06, |
| "loss": 1.218, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.24769276570407858, |
| "grad_norm": 0.04869766258013373, |
| "learning_rate": 9.678021596774332e-06, |
| "loss": 1.2027, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.24837324033513375, |
| "grad_norm": 0.052032926949957266, |
| "learning_rate": 9.676111817024935e-06, |
| "loss": 1.2244, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.24905371496618892, |
| "grad_norm": 0.04940706754832445, |
| "learning_rate": 9.67419657974988e-06, |
| "loss": 1.2217, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.24973418959724408, |
| "grad_norm": 0.08905408551465017, |
| "learning_rate": 9.672275887184449e-06, |
| "loss": 1.1463, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.2504146642282992, |
| "grad_norm": 0.0420124504182442, |
| "learning_rate": 9.670349741570302e-06, |
| "loss": 1.1608, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.2510951388593544, |
| "grad_norm": 0.047640638525214364, |
| "learning_rate": 9.668418145155453e-06, |
| "loss": 1.1924, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.25177561349040956, |
| "grad_norm": 0.04411995714560134, |
| "learning_rate": 9.66648110019429e-06, |
| "loss": 1.1924, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2524560881214647, |
| "grad_norm": 0.0954266561963091, |
| "learning_rate": 9.664538608947547e-06, |
| "loss": 1.194, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.2531365627525199, |
| "grad_norm": 0.04832973819001474, |
| "learning_rate": 9.662590673682322e-06, |
| "loss": 1.1294, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.25381703738357503, |
| "grad_norm": 0.0907247205159215, |
| "learning_rate": 9.660637296672065e-06, |
| "loss": 1.203, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.2544975120146302, |
| "grad_norm": 0.046786600325019814, |
| "learning_rate": 9.658678480196579e-06, |
| "loss": 1.246, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.25517798664568536, |
| "grad_norm": 0.05704081940461117, |
| "learning_rate": 9.65671422654201e-06, |
| "loss": 1.1417, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.2558584612767405, |
| "grad_norm": 0.051937899288344185, |
| "learning_rate": 9.654744538000857e-06, |
| "loss": 1.1278, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.2565389359077957, |
| "grad_norm": 0.04508971069443915, |
| "learning_rate": 9.652769416871956e-06, |
| "loss": 1.1809, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.25721941053885083, |
| "grad_norm": 0.054979374091871036, |
| "learning_rate": 9.650788865460487e-06, |
| "loss": 1.2295, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.25789988516990603, |
| "grad_norm": 0.049044653730871066, |
| "learning_rate": 9.648802886077968e-06, |
| "loss": 1.224, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.25858035980096117, |
| "grad_norm": 0.044961130694949406, |
| "learning_rate": 9.646811481042246e-06, |
| "loss": 1.1668, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2592608344320163, |
| "grad_norm": 0.04699265461262461, |
| "learning_rate": 9.64481465267751e-06, |
| "loss": 1.2107, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.2599413090630715, |
| "grad_norm": 0.0577640863727447, |
| "learning_rate": 9.642812403314272e-06, |
| "loss": 1.251, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.26062178369412664, |
| "grad_norm": 0.04912205190629825, |
| "learning_rate": 9.640804735289371e-06, |
| "loss": 1.1478, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.26130225832518184, |
| "grad_norm": 0.04635375436327543, |
| "learning_rate": 9.638791650945974e-06, |
| "loss": 1.2266, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.261982732956237, |
| "grad_norm": 0.06771008302412035, |
| "learning_rate": 9.636773152633564e-06, |
| "loss": 1.1653, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.2626632075872921, |
| "grad_norm": 0.0421485079049122, |
| "learning_rate": 9.634749242707948e-06, |
| "loss": 1.1996, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.2633436822183473, |
| "grad_norm": 0.046003609818600986, |
| "learning_rate": 9.632719923531246e-06, |
| "loss": 1.2091, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.26402415684940245, |
| "grad_norm": 0.041416979164638455, |
| "learning_rate": 9.630685197471893e-06, |
| "loss": 1.1664, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.26470463148045764, |
| "grad_norm": 0.04464379751775676, |
| "learning_rate": 9.628645066904631e-06, |
| "loss": 1.2814, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.2653851061115128, |
| "grad_norm": 0.04506408101193006, |
| "learning_rate": 9.626599534210514e-06, |
| "loss": 1.1831, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2660655807425679, |
| "grad_norm": 0.041696760747256145, |
| "learning_rate": 9.624548601776897e-06, |
| "loss": 1.1438, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.2667460553736231, |
| "grad_norm": 0.04180033834556875, |
| "learning_rate": 9.62249227199744e-06, |
| "loss": 1.1683, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.26742653000467825, |
| "grad_norm": 0.0653569019317656, |
| "learning_rate": 9.620430547272101e-06, |
| "loss": 1.1412, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.26810700463573345, |
| "grad_norm": 0.0508490046254163, |
| "learning_rate": 9.618363430007134e-06, |
| "loss": 1.1703, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.2687874792667886, |
| "grad_norm": 0.04192674002870443, |
| "learning_rate": 9.616290922615089e-06, |
| "loss": 1.1928, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.2694679538978437, |
| "grad_norm": 0.045135315573927554, |
| "learning_rate": 9.614213027514802e-06, |
| "loss": 1.1368, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.2701484285288989, |
| "grad_norm": 0.04283907706648696, |
| "learning_rate": 9.612129747131403e-06, |
| "loss": 1.2029, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.27082890315995406, |
| "grad_norm": 0.047826740996491424, |
| "learning_rate": 9.610041083896304e-06, |
| "loss": 1.1292, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.27150937779100925, |
| "grad_norm": 0.042865391928449584, |
| "learning_rate": 9.6079470402472e-06, |
| "loss": 1.1786, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.2721898524220644, |
| "grad_norm": 0.04272434572582662, |
| "learning_rate": 9.60584761862806e-06, |
| "loss": 1.1714, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.27287032705311953, |
| "grad_norm": 0.047643431915423036, |
| "learning_rate": 9.603742821489143e-06, |
| "loss": 1.1807, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.2735508016841747, |
| "grad_norm": 0.04305667573692588, |
| "learning_rate": 9.60163265128697e-06, |
| "loss": 1.2203, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.27423127631522987, |
| "grad_norm": 0.05262690402608385, |
| "learning_rate": 9.599517110484335e-06, |
| "loss": 1.1727, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.27491175094628506, |
| "grad_norm": 0.04326143595632641, |
| "learning_rate": 9.597396201550307e-06, |
| "loss": 1.1839, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.2755922255773402, |
| "grad_norm": 0.04480016734569184, |
| "learning_rate": 9.595269926960207e-06, |
| "loss": 1.132, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.27627270020839534, |
| "grad_norm": 0.07616330002731382, |
| "learning_rate": 9.593138289195634e-06, |
| "loss": 1.1795, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.27695317483945053, |
| "grad_norm": 0.0463946895471615, |
| "learning_rate": 9.591001290744433e-06, |
| "loss": 1.1722, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.2776336494705057, |
| "grad_norm": 0.06517585252270597, |
| "learning_rate": 9.588858934100715e-06, |
| "loss": 1.2092, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.2783141241015608, |
| "grad_norm": 0.04668726252763982, |
| "learning_rate": 9.58671122176484e-06, |
| "loss": 1.1454, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.278994598732616, |
| "grad_norm": 0.044076602793079296, |
| "learning_rate": 9.584558156243418e-06, |
| "loss": 1.2057, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.27967507336367115, |
| "grad_norm": 0.05755797965942513, |
| "learning_rate": 9.582399740049309e-06, |
| "loss": 1.1866, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.28035554799472634, |
| "grad_norm": 0.04909298957494496, |
| "learning_rate": 9.580235975701615e-06, |
| "loss": 1.1608, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.2810360226257815, |
| "grad_norm": 0.04760184092450246, |
| "learning_rate": 9.578066865725685e-06, |
| "loss": 1.2187, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.2817164972568366, |
| "grad_norm": 0.041651343733572356, |
| "learning_rate": 9.575892412653102e-06, |
| "loss": 1.1959, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.2823969718878918, |
| "grad_norm": 0.04152859840201504, |
| "learning_rate": 9.573712619021687e-06, |
| "loss": 1.1998, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.28307744651894695, |
| "grad_norm": 0.39015214046356017, |
| "learning_rate": 9.571527487375494e-06, |
| "loss": 1.2253, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.28375792115000215, |
| "grad_norm": 0.043403289521190484, |
| "learning_rate": 9.569337020264805e-06, |
| "loss": 1.1732, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.2844383957810573, |
| "grad_norm": 0.04464700023306462, |
| "learning_rate": 9.567141220246136e-06, |
| "loss": 1.1997, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.2851188704121124, |
| "grad_norm": 0.048346937249993505, |
| "learning_rate": 9.564940089882215e-06, |
| "loss": 1.1355, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.2857993450431676, |
| "grad_norm": 0.046040306984666865, |
| "learning_rate": 9.562733631742003e-06, |
| "loss": 1.1288, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.28647981967422276, |
| "grad_norm": 0.04680136513725823, |
| "learning_rate": 9.560521848400672e-06, |
| "loss": 1.1765, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.28716029430527795, |
| "grad_norm": 0.06484527922937486, |
| "learning_rate": 9.55830474243961e-06, |
| "loss": 1.1584, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.2878407689363331, |
| "grad_norm": 0.04158095847228888, |
| "learning_rate": 9.55608231644642e-06, |
| "loss": 1.1548, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.28852124356738823, |
| "grad_norm": 0.04628073262185198, |
| "learning_rate": 9.553854573014913e-06, |
| "loss": 1.205, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.2892017181984434, |
| "grad_norm": 0.05281944608701771, |
| "learning_rate": 9.551621514745104e-06, |
| "loss": 1.1915, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.28988219282949856, |
| "grad_norm": 0.045211220884478216, |
| "learning_rate": 9.549383144243213e-06, |
| "loss": 1.2551, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.29056266746055376, |
| "grad_norm": 0.04715624838570465, |
| "learning_rate": 9.547139464121658e-06, |
| "loss": 1.2004, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.2912431420916089, |
| "grad_norm": 0.04606784209554793, |
| "learning_rate": 9.544890476999056e-06, |
| "loss": 1.2783, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.29192361672266404, |
| "grad_norm": 0.0475193656164289, |
| "learning_rate": 9.542636185500216e-06, |
| "loss": 1.1615, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.29260409135371923, |
| "grad_norm": 0.047942679226840895, |
| "learning_rate": 9.540376592256142e-06, |
| "loss": 1.1779, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.29328456598477437, |
| "grad_norm": 0.045498747981138, |
| "learning_rate": 9.538111699904018e-06, |
| "loss": 1.1603, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.29396504061582956, |
| "grad_norm": 0.044436179641190246, |
| "learning_rate": 9.53584151108722e-06, |
| "loss": 1.2382, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.2946455152468847, |
| "grad_norm": 0.04879803811355419, |
| "learning_rate": 9.533566028455303e-06, |
| "loss": 1.1918, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.29532598987793984, |
| "grad_norm": 0.04653543136888824, |
| "learning_rate": 9.531285254663997e-06, |
| "loss": 1.2306, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.29600646450899504, |
| "grad_norm": 0.06928602870846744, |
| "learning_rate": 9.528999192375213e-06, |
| "loss": 1.1711, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.2966869391400502, |
| "grad_norm": 0.048157969271924075, |
| "learning_rate": 9.526707844257031e-06, |
| "loss": 1.1653, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.29736741377110537, |
| "grad_norm": 0.04703493966240612, |
| "learning_rate": 9.5244112129837e-06, |
| "loss": 1.1545, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.2980478884021605, |
| "grad_norm": 0.048950716795177485, |
| "learning_rate": 9.522109301235637e-06, |
| "loss": 1.1691, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.29872836303321565, |
| "grad_norm": 0.04164149250851006, |
| "learning_rate": 9.519802111699423e-06, |
| "loss": 1.1418, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.29940883766427084, |
| "grad_norm": 0.04543803394559561, |
| "learning_rate": 9.51748964706779e-06, |
| "loss": 1.1861, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.300089312295326, |
| "grad_norm": 0.07807641340896274, |
| "learning_rate": 9.51517191003964e-06, |
| "loss": 1.2208, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.3007697869263812, |
| "grad_norm": 0.10605080357630506, |
| "learning_rate": 9.512848903320017e-06, |
| "loss": 1.2167, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.3014502615574363, |
| "grad_norm": 0.04651371237040995, |
| "learning_rate": 9.51052062962012e-06, |
| "loss": 1.1747, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.30213073618849146, |
| "grad_norm": 0.04203698338216257, |
| "learning_rate": 9.508187091657297e-06, |
| "loss": 1.211, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.30281121081954665, |
| "grad_norm": 0.05027100823388268, |
| "learning_rate": 9.505848292155037e-06, |
| "loss": 1.2042, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.3034916854506018, |
| "grad_norm": 0.04398023883831587, |
| "learning_rate": 9.503504233842973e-06, |
| "loss": 1.198, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.30417216008165693, |
| "grad_norm": 0.046003976924450816, |
| "learning_rate": 9.501154919456867e-06, |
| "loss": 1.2206, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.3048526347127121, |
| "grad_norm": 0.04649602460300257, |
| "learning_rate": 9.498800351738629e-06, |
| "loss": 1.2181, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.30553310934376726, |
| "grad_norm": 0.04705078110397738, |
| "learning_rate": 9.496440533436289e-06, |
| "loss": 1.1873, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.30621358397482246, |
| "grad_norm": 0.050720730862168675, |
| "learning_rate": 9.494075467304007e-06, |
| "loss": 1.2447, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3068940586058776, |
| "grad_norm": 0.04370216331760348, |
| "learning_rate": 9.491705156102075e-06, |
| "loss": 1.1935, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.30757453323693273, |
| "grad_norm": 0.04589375799644584, |
| "learning_rate": 9.489329602596898e-06, |
| "loss": 1.2247, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.30825500786798793, |
| "grad_norm": 0.03939021373478932, |
| "learning_rate": 9.486948809561001e-06, |
| "loss": 1.1618, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.30893548249904307, |
| "grad_norm": 0.05050778826919447, |
| "learning_rate": 9.484562779773027e-06, |
| "loss": 1.1586, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.30961595713009826, |
| "grad_norm": 0.04298915132168142, |
| "learning_rate": 9.482171516017733e-06, |
| "loss": 1.0995, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.3102964317611534, |
| "grad_norm": 0.06024263160262278, |
| "learning_rate": 9.479775021085977e-06, |
| "loss": 1.1715, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.31097690639220854, |
| "grad_norm": 0.04047944888546823, |
| "learning_rate": 9.477373297774729e-06, |
| "loss": 1.1799, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.31165738102326374, |
| "grad_norm": 0.04576627432425469, |
| "learning_rate": 9.474966348887055e-06, |
| "loss": 1.1236, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.3123378556543189, |
| "grad_norm": 0.04608165755019771, |
| "learning_rate": 9.472554177232126e-06, |
| "loss": 1.1901, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.31301833028537407, |
| "grad_norm": 0.04572503154737356, |
| "learning_rate": 9.470136785625206e-06, |
| "loss": 1.2308, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3136988049164292, |
| "grad_norm": 0.04703098129663038, |
| "learning_rate": 9.46771417688765e-06, |
| "loss": 1.1518, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.31437927954748435, |
| "grad_norm": 0.057810641121546665, |
| "learning_rate": 9.465286353846905e-06, |
| "loss": 1.1359, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.31505975417853954, |
| "grad_norm": 0.04007218378292392, |
| "learning_rate": 9.462853319336498e-06, |
| "loss": 1.2377, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.3157402288095947, |
| "grad_norm": 0.04044711828026744, |
| "learning_rate": 9.460415076196046e-06, |
| "loss": 1.1777, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.3164207034406499, |
| "grad_norm": 0.039984993410920745, |
| "learning_rate": 9.457971627271239e-06, |
| "loss": 1.1856, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.317101178071705, |
| "grad_norm": 0.04595448821615046, |
| "learning_rate": 9.455522975413846e-06, |
| "loss": 1.1121, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.31778165270276015, |
| "grad_norm": 0.06911224440534725, |
| "learning_rate": 9.453069123481706e-06, |
| "loss": 1.1776, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.31846212733381535, |
| "grad_norm": 0.0496062942406621, |
| "learning_rate": 9.45061007433873e-06, |
| "loss": 1.2229, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.3191426019648705, |
| "grad_norm": 0.05285388792564055, |
| "learning_rate": 9.44814583085489e-06, |
| "loss": 1.2224, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.3198230765959257, |
| "grad_norm": 0.04266731996830014, |
| "learning_rate": 9.445676395906226e-06, |
| "loss": 1.1885, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3205035512269808, |
| "grad_norm": 0.04794045206701465, |
| "learning_rate": 9.443201772374834e-06, |
| "loss": 1.1875, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.32118402585803596, |
| "grad_norm": 0.04276835398618904, |
| "learning_rate": 9.440721963148864e-06, |
| "loss": 1.186, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.32186450048909115, |
| "grad_norm": 0.04059867789220637, |
| "learning_rate": 9.438236971122523e-06, |
| "loss": 1.1746, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.3225449751201463, |
| "grad_norm": 0.05427813930596811, |
| "learning_rate": 9.435746799196061e-06, |
| "loss": 1.2627, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.3232254497512015, |
| "grad_norm": 0.06052704105903962, |
| "learning_rate": 9.43325145027578e-06, |
| "loss": 1.1896, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.3239059243822566, |
| "grad_norm": 0.05502079517421927, |
| "learning_rate": 9.430750927274018e-06, |
| "loss": 1.2125, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.32458639901331177, |
| "grad_norm": 0.05404712729764853, |
| "learning_rate": 9.428245233109154e-06, |
| "loss": 1.1966, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.32526687364436696, |
| "grad_norm": 0.047489497158267946, |
| "learning_rate": 9.425734370705606e-06, |
| "loss": 1.153, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.3259473482754221, |
| "grad_norm": 0.05518270929161413, |
| "learning_rate": 9.42321834299382e-06, |
| "loss": 1.1214, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.3266278229064773, |
| "grad_norm": 0.04183323442062128, |
| "learning_rate": 9.420697152910268e-06, |
| "loss": 1.1737, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.32730829753753243, |
| "grad_norm": 0.047353508292032734, |
| "learning_rate": 9.41817080339745e-06, |
| "loss": 1.1852, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.3279887721685876, |
| "grad_norm": 0.045798051211923665, |
| "learning_rate": 9.415639297403891e-06, |
| "loss": 1.1832, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.32866924679964277, |
| "grad_norm": 0.04709531868030345, |
| "learning_rate": 9.413102637884131e-06, |
| "loss": 1.2718, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.3293497214306979, |
| "grad_norm": 0.04915959430799309, |
| "learning_rate": 9.410560827798721e-06, |
| "loss": 1.148, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.3300301960617531, |
| "grad_norm": 0.09966635739398834, |
| "learning_rate": 9.40801387011423e-06, |
| "loss": 1.163, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.33071067069280824, |
| "grad_norm": 0.049745442405997646, |
| "learning_rate": 9.40546176780323e-06, |
| "loss": 1.1862, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.3313911453238634, |
| "grad_norm": 0.04671464675596574, |
| "learning_rate": 9.402904523844301e-06, |
| "loss": 1.2282, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.3320716199549186, |
| "grad_norm": 0.05486432598075293, |
| "learning_rate": 9.400342141222019e-06, |
| "loss": 1.1915, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.3327520945859737, |
| "grad_norm": 0.05033392877701764, |
| "learning_rate": 9.397774622926963e-06, |
| "loss": 1.169, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.33343256921702885, |
| "grad_norm": 0.04237024604944351, |
| "learning_rate": 9.395201971955701e-06, |
| "loss": 1.1909, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.33411304384808405, |
| "grad_norm": 0.05093227421957238, |
| "learning_rate": 9.392624191310795e-06, |
| "loss": 1.2164, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.3347935184791392, |
| "grad_norm": 0.046659153381013445, |
| "learning_rate": 9.390041284000793e-06, |
| "loss": 1.116, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.3354739931101944, |
| "grad_norm": 0.04607818909638517, |
| "learning_rate": 9.387453253040221e-06, |
| "loss": 1.1698, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.3361544677412495, |
| "grad_norm": 0.04582104423266983, |
| "learning_rate": 9.384860101449598e-06, |
| "loss": 1.1578, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.33683494237230466, |
| "grad_norm": 0.055032457656983766, |
| "learning_rate": 9.382261832255402e-06, |
| "loss": 1.2005, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.33751541700335985, |
| "grad_norm": 0.045420525383448394, |
| "learning_rate": 9.3796584484901e-06, |
| "loss": 1.1812, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.338195891634415, |
| "grad_norm": 0.04479691200348206, |
| "learning_rate": 9.377049953192114e-06, |
| "loss": 1.1975, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3388763662654702, |
| "grad_norm": 0.04262624876760037, |
| "learning_rate": 9.374436349405847e-06, |
| "loss": 1.1979, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.3395568408965253, |
| "grad_norm": 0.05984559336645253, |
| "learning_rate": 9.371817640181649e-06, |
| "loss": 1.1928, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.34023731552758046, |
| "grad_norm": 0.04895162934117913, |
| "learning_rate": 9.369193828575838e-06, |
| "loss": 1.2148, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.34091779015863566, |
| "grad_norm": 0.05483533156140584, |
| "learning_rate": 9.366564917650685e-06, |
| "loss": 1.1474, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.3415982647896908, |
| "grad_norm": 0.04565128017297075, |
| "learning_rate": 9.36393091047441e-06, |
| "loss": 1.1957, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.342278739420746, |
| "grad_norm": 0.05336905029274791, |
| "learning_rate": 9.361291810121184e-06, |
| "loss": 1.1451, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.34295921405180113, |
| "grad_norm": 0.040537767793770316, |
| "learning_rate": 9.358647619671123e-06, |
| "loss": 1.1766, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.34363968868285627, |
| "grad_norm": 0.043800628506550594, |
| "learning_rate": 9.355998342210278e-06, |
| "loss": 1.1679, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.34432016331391146, |
| "grad_norm": 0.23872842344128314, |
| "learning_rate": 9.353343980830644e-06, |
| "loss": 1.1807, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.3450006379449666, |
| "grad_norm": 0.18365918447007845, |
| "learning_rate": 9.350684538630146e-06, |
| "loss": 1.0977, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.3456811125760218, |
| "grad_norm": 0.04344688652790131, |
| "learning_rate": 9.348020018712636e-06, |
| "loss": 1.1744, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.34636158720707694, |
| "grad_norm": 0.06815347742969964, |
| "learning_rate": 9.3453504241879e-06, |
| "loss": 1.1922, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3470420618381321, |
| "grad_norm": 0.17668464807174913, |
| "learning_rate": 9.342675758171638e-06, |
| "loss": 1.1527, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.34772253646918727, |
| "grad_norm": 0.043762024230477835, |
| "learning_rate": 9.339996023785477e-06, |
| "loss": 1.1789, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.3484030111002424, |
| "grad_norm": 0.04645872519379631, |
| "learning_rate": 9.337311224156952e-06, |
| "loss": 1.2431, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.3490834857312976, |
| "grad_norm": 0.05467355097949484, |
| "learning_rate": 9.334621362419516e-06, |
| "loss": 1.19, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.34976396036235274, |
| "grad_norm": 0.04532675094347275, |
| "learning_rate": 9.331926441712522e-06, |
| "loss": 1.1435, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3504444349934079, |
| "grad_norm": 0.19520076722462312, |
| "learning_rate": 9.32922646518124e-06, |
| "loss": 1.2298, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.3511249096244631, |
| "grad_norm": 0.04371007629560449, |
| "learning_rate": 9.326521435976827e-06, |
| "loss": 1.1918, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.3518053842555182, |
| "grad_norm": 0.041218605155981726, |
| "learning_rate": 9.323811357256344e-06, |
| "loss": 1.1903, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.3524858588865734, |
| "grad_norm": 0.04769161322978696, |
| "learning_rate": 9.32109623218275e-06, |
| "loss": 1.1919, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.35316633351762855, |
| "grad_norm": 0.04352852116577712, |
| "learning_rate": 9.31837606392488e-06, |
| "loss": 1.2385, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3538468081486837, |
| "grad_norm": 0.04296085894799078, |
| "learning_rate": 9.315650855657468e-06, |
| "loss": 1.1762, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3545272827797389, |
| "grad_norm": 0.062008282831599724, |
| "learning_rate": 9.312920610561125e-06, |
| "loss": 1.1741, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.355207757410794, |
| "grad_norm": 0.04475337460583143, |
| "learning_rate": 9.310185331822338e-06, |
| "loss": 1.2936, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.3558882320418492, |
| "grad_norm": 0.04335601547887752, |
| "learning_rate": 9.307445022633476e-06, |
| "loss": 1.1844, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.35656870667290436, |
| "grad_norm": 0.04763279336067209, |
| "learning_rate": 9.304699686192771e-06, |
| "loss": 1.1406, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.3572491813039595, |
| "grad_norm": 0.047035165273336066, |
| "learning_rate": 9.301949325704326e-06, |
| "loss": 1.1617, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.3579296559350147, |
| "grad_norm": 0.04427665615849973, |
| "learning_rate": 9.299193944378112e-06, |
| "loss": 1.1685, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.35861013056606983, |
| "grad_norm": 0.04803984704079458, |
| "learning_rate": 9.296433545429951e-06, |
| "loss": 1.1551, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.35929060519712497, |
| "grad_norm": 0.04228609178520076, |
| "learning_rate": 9.293668132081528e-06, |
| "loss": 1.2, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.35997107982818016, |
| "grad_norm": 0.04327688602770503, |
| "learning_rate": 9.290897707560376e-06, |
| "loss": 1.1496, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.3606515544592353, |
| "grad_norm": 0.03945827293494703, |
| "learning_rate": 9.28812227509988e-06, |
| "loss": 1.1688, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3613320290902905, |
| "grad_norm": 0.06770668944349958, |
| "learning_rate": 9.285341837939267e-06, |
| "loss": 1.1949, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.36201250372134564, |
| "grad_norm": 0.04952482056953356, |
| "learning_rate": 9.282556399323608e-06, |
| "loss": 1.1547, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.3626929783524008, |
| "grad_norm": 0.04691822525955649, |
| "learning_rate": 9.279765962503809e-06, |
| "loss": 1.2035, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.36337345298345597, |
| "grad_norm": 0.04894000875415613, |
| "learning_rate": 9.27697053073661e-06, |
| "loss": 1.145, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.3640539276145111, |
| "grad_norm": 0.04336478998694143, |
| "learning_rate": 9.27417010728458e-06, |
| "loss": 1.1325, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.3647344022455663, |
| "grad_norm": 0.04176773876592002, |
| "learning_rate": 9.271364695416115e-06, |
| "loss": 1.1835, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.36541487687662144, |
| "grad_norm": 0.05622345966356037, |
| "learning_rate": 9.268554298405434e-06, |
| "loss": 1.1717, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.3660953515076766, |
| "grad_norm": 0.043400920584928455, |
| "learning_rate": 9.26573891953257e-06, |
| "loss": 1.2127, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.3667758261387318, |
| "grad_norm": 0.05286638961353637, |
| "learning_rate": 9.262918562083374e-06, |
| "loss": 1.2216, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.3674563007697869, |
| "grad_norm": 0.056795432935915866, |
| "learning_rate": 9.260093229349507e-06, |
| "loss": 1.1489, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3681367754008421, |
| "grad_norm": 0.04668482939527108, |
| "learning_rate": 9.25726292462844e-06, |
| "loss": 1.1994, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.36881725003189725, |
| "grad_norm": 0.044822594591671316, |
| "learning_rate": 9.254427651223434e-06, |
| "loss": 1.1824, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.3694977246629524, |
| "grad_norm": 0.04345980827207413, |
| "learning_rate": 9.251587412443567e-06, |
| "loss": 1.1459, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.3701781992940076, |
| "grad_norm": 0.06220612289602904, |
| "learning_rate": 9.248742211603699e-06, |
| "loss": 1.0962, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.3708586739250627, |
| "grad_norm": 0.04464179454313693, |
| "learning_rate": 9.245892052024486e-06, |
| "loss": 1.2087, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.3715391485561179, |
| "grad_norm": 0.0482955122912646, |
| "learning_rate": 9.243036937032373e-06, |
| "loss": 1.125, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.37221962318717305, |
| "grad_norm": 0.044665776636380314, |
| "learning_rate": 9.240176869959582e-06, |
| "loss": 1.138, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.3729000978182282, |
| "grad_norm": 0.04680059833998865, |
| "learning_rate": 9.237311854144125e-06, |
| "loss": 1.2355, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.3735805724492834, |
| "grad_norm": 0.10018674747989316, |
| "learning_rate": 9.23444189292978e-06, |
| "loss": 1.1918, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.3742610470803385, |
| "grad_norm": 0.04306609979305801, |
| "learning_rate": 9.2315669896661e-06, |
| "loss": 1.1281, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3749415217113937, |
| "grad_norm": 0.05047293198274693, |
| "learning_rate": 9.228687147708409e-06, |
| "loss": 1.1483, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.37562199634244886, |
| "grad_norm": 0.04210396323953163, |
| "learning_rate": 9.225802370417789e-06, |
| "loss": 1.1321, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.376302470973504, |
| "grad_norm": 0.06228495287048316, |
| "learning_rate": 9.222912661161088e-06, |
| "loss": 1.17, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.3769829456045592, |
| "grad_norm": 0.04060494627163399, |
| "learning_rate": 9.220018023310908e-06, |
| "loss": 1.1338, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.37766342023561433, |
| "grad_norm": 0.04117475908146937, |
| "learning_rate": 9.217118460245602e-06, |
| "loss": 1.116, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.37834389486666953, |
| "grad_norm": 0.05357625308494766, |
| "learning_rate": 9.214213975349272e-06, |
| "loss": 1.154, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.37902436949772467, |
| "grad_norm": 0.04262138167104594, |
| "learning_rate": 9.211304572011765e-06, |
| "loss": 1.1779, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.3797048441287798, |
| "grad_norm": 0.05022095152403962, |
| "learning_rate": 9.208390253628667e-06, |
| "loss": 1.2275, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.380385318759835, |
| "grad_norm": 0.038697641779660466, |
| "learning_rate": 9.205471023601302e-06, |
| "loss": 1.2025, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.38106579339089014, |
| "grad_norm": 0.04837293979857492, |
| "learning_rate": 9.202546885336725e-06, |
| "loss": 1.2113, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.38174626802194533, |
| "grad_norm": 0.04709673453076869, |
| "learning_rate": 9.199617842247718e-06, |
| "loss": 1.2322, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.3824267426530005, |
| "grad_norm": 0.038353871634371726, |
| "learning_rate": 9.196683897752794e-06, |
| "loss": 1.14, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.3831072172840556, |
| "grad_norm": 0.03856451637837848, |
| "learning_rate": 9.193745055276177e-06, |
| "loss": 1.1752, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.3837876919151108, |
| "grad_norm": 0.04029222501667547, |
| "learning_rate": 9.190801318247817e-06, |
| "loss": 1.134, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.38446816654616595, |
| "grad_norm": 0.04718440155396733, |
| "learning_rate": 9.18785269010337e-06, |
| "loss": 1.1904, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.3851486411772211, |
| "grad_norm": 0.047157298136945865, |
| "learning_rate": 9.184899174284201e-06, |
| "loss": 1.2019, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.3858291158082763, |
| "grad_norm": 0.040554851143715896, |
| "learning_rate": 9.181940774237383e-06, |
| "loss": 1.1578, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.3865095904393314, |
| "grad_norm": 0.04395329490897282, |
| "learning_rate": 9.178977493415684e-06, |
| "loss": 1.1691, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.3871900650703866, |
| "grad_norm": 0.0433727263541022, |
| "learning_rate": 9.176009335277575e-06, |
| "loss": 1.1697, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.38787053970144175, |
| "grad_norm": 0.04385387344621323, |
| "learning_rate": 9.173036303287215e-06, |
| "loss": 1.2413, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3885510143324969, |
| "grad_norm": 0.04299760073456161, |
| "learning_rate": 9.17005840091445e-06, |
| "loss": 1.2259, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.3892314889635521, |
| "grad_norm": 0.04243566940825901, |
| "learning_rate": 9.167075631634816e-06, |
| "loss": 1.1939, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.3899119635946072, |
| "grad_norm": 0.04414891882046903, |
| "learning_rate": 9.164087998929523e-06, |
| "loss": 1.1575, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.3905924382256624, |
| "grad_norm": 0.03882576400514526, |
| "learning_rate": 9.16109550628546e-06, |
| "loss": 1.1904, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.39127291285671756, |
| "grad_norm": 0.05228919194953363, |
| "learning_rate": 9.15809815719519e-06, |
| "loss": 1.2166, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.3919533874877727, |
| "grad_norm": 0.03962957960394459, |
| "learning_rate": 9.155095955156941e-06, |
| "loss": 1.1888, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.3926338621188279, |
| "grad_norm": 0.041080403985258, |
| "learning_rate": 9.152088903674605e-06, |
| "loss": 1.1468, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.39331433674988303, |
| "grad_norm": 0.03904444499346437, |
| "learning_rate": 9.149077006257734e-06, |
| "loss": 1.2097, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.3939948113809382, |
| "grad_norm": 0.055258121836283904, |
| "learning_rate": 9.14606026642154e-06, |
| "loss": 1.183, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.39467528601199336, |
| "grad_norm": 0.043950342680780415, |
| "learning_rate": 9.143038687686877e-06, |
| "loss": 1.2286, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3953557606430485, |
| "grad_norm": 0.04326934798265069, |
| "learning_rate": 9.140012273580261e-06, |
| "loss": 1.1461, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.3960362352741037, |
| "grad_norm": 0.11342425414721487, |
| "learning_rate": 9.136981027633834e-06, |
| "loss": 1.1621, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.39671670990515884, |
| "grad_norm": 0.045256571639823724, |
| "learning_rate": 9.133944953385392e-06, |
| "loss": 1.1601, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.39739718453621403, |
| "grad_norm": 0.052853385155972675, |
| "learning_rate": 9.130904054378358e-06, |
| "loss": 1.1266, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.39807765916726917, |
| "grad_norm": 0.044595902031428894, |
| "learning_rate": 9.127858334161789e-06, |
| "loss": 1.1782, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.3987581337983243, |
| "grad_norm": 0.050516667255890005, |
| "learning_rate": 9.124807796290366e-06, |
| "loss": 1.2294, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.3994386084293795, |
| "grad_norm": 0.03737860442207068, |
| "learning_rate": 9.1217524443244e-06, |
| "loss": 1.1784, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.40011908306043464, |
| "grad_norm": 0.04225329960507096, |
| "learning_rate": 9.118692281829813e-06, |
| "loss": 1.1505, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.40079955769148984, |
| "grad_norm": 0.050763619906775, |
| "learning_rate": 9.115627312378141e-06, |
| "loss": 1.1469, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.401480032322545, |
| "grad_norm": 0.05617411574200467, |
| "learning_rate": 9.112557539546535e-06, |
| "loss": 1.1776, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4021605069536001, |
| "grad_norm": 0.07658515208497231, |
| "learning_rate": 9.109482966917753e-06, |
| "loss": 1.1436, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.4028409815846553, |
| "grad_norm": 0.05122117400200576, |
| "learning_rate": 9.10640359808015e-06, |
| "loss": 1.2159, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.40352145621571045, |
| "grad_norm": 0.042423364800815176, |
| "learning_rate": 9.10331943662768e-06, |
| "loss": 1.1828, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.40420193084676564, |
| "grad_norm": 0.05370052513597702, |
| "learning_rate": 9.100230486159893e-06, |
| "loss": 1.1537, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.4048824054778208, |
| "grad_norm": 0.043549532367692975, |
| "learning_rate": 9.097136750281925e-06, |
| "loss": 1.1416, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.4055628801088759, |
| "grad_norm": 0.04895296341337781, |
| "learning_rate": 9.094038232604499e-06, |
| "loss": 1.1718, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.4062433547399311, |
| "grad_norm": 0.05538114190435457, |
| "learning_rate": 9.090934936743919e-06, |
| "loss": 1.162, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.40692382937098626, |
| "grad_norm": 0.04557565748071047, |
| "learning_rate": 9.087826866322065e-06, |
| "loss": 1.1355, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.40760430400204145, |
| "grad_norm": 0.04378712655891265, |
| "learning_rate": 9.084714024966387e-06, |
| "loss": 1.2115, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.4082847786330966, |
| "grad_norm": 0.04170201899514444, |
| "learning_rate": 9.081596416309913e-06, |
| "loss": 1.2017, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.40896525326415173, |
| "grad_norm": 0.04861323345657344, |
| "learning_rate": 9.07847404399122e-06, |
| "loss": 1.1319, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.4096457278952069, |
| "grad_norm": 0.041723471269353364, |
| "learning_rate": 9.075346911654456e-06, |
| "loss": 1.2286, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.41032620252626206, |
| "grad_norm": 0.04335204371295228, |
| "learning_rate": 9.072215022949323e-06, |
| "loss": 1.1921, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.4110066771573172, |
| "grad_norm": 0.04528272419703535, |
| "learning_rate": 9.069078381531067e-06, |
| "loss": 1.2223, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.4116871517883724, |
| "grad_norm": 0.05715384377035942, |
| "learning_rate": 9.06593699106049e-06, |
| "loss": 1.0998, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.41236762641942754, |
| "grad_norm": 0.04669873043406728, |
| "learning_rate": 9.062790855203932e-06, |
| "loss": 1.2269, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.41304810105048273, |
| "grad_norm": 0.04030013224679412, |
| "learning_rate": 9.059639977633272e-06, |
| "loss": 1.1337, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.41372857568153787, |
| "grad_norm": 0.04483399795097766, |
| "learning_rate": 9.056484362025922e-06, |
| "loss": 1.1496, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.414409050312593, |
| "grad_norm": 0.04135748479654297, |
| "learning_rate": 9.053324012064826e-06, |
| "loss": 1.1519, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.4150895249436482, |
| "grad_norm": 0.038852153018098134, |
| "learning_rate": 9.050158931438451e-06, |
| "loss": 1.2013, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.41576999957470334, |
| "grad_norm": 0.04120197609440938, |
| "learning_rate": 9.046989123840787e-06, |
| "loss": 1.092, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.41645047420575854, |
| "grad_norm": 0.0632098125332872, |
| "learning_rate": 9.043814592971345e-06, |
| "loss": 1.1927, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.4171309488368137, |
| "grad_norm": 0.05564769512803907, |
| "learning_rate": 9.040635342535138e-06, |
| "loss": 1.156, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.4178114234678688, |
| "grad_norm": 0.04572723207846192, |
| "learning_rate": 9.037451376242696e-06, |
| "loss": 1.1422, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.418491898098924, |
| "grad_norm": 0.04550948417572969, |
| "learning_rate": 9.03426269781005e-06, |
| "loss": 1.1934, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.41917237272997915, |
| "grad_norm": 0.04731619409086017, |
| "learning_rate": 9.031069310958733e-06, |
| "loss": 1.1733, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.41985284736103434, |
| "grad_norm": 0.044246541580062755, |
| "learning_rate": 9.027871219415768e-06, |
| "loss": 1.1082, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.4205333219920895, |
| "grad_norm": 0.04820982477572937, |
| "learning_rate": 9.024668426913671e-06, |
| "loss": 1.1309, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.4212137966231446, |
| "grad_norm": 0.08383658208257669, |
| "learning_rate": 9.021460937190452e-06, |
| "loss": 1.1564, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.4218942712541998, |
| "grad_norm": 0.05604400126597725, |
| "learning_rate": 9.018248753989589e-06, |
| "loss": 1.1474, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.42257474588525495, |
| "grad_norm": 0.05356260018739531, |
| "learning_rate": 9.015031881060049e-06, |
| "loss": 1.1796, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.42325522051631015, |
| "grad_norm": 0.04238920741626305, |
| "learning_rate": 9.011810322156269e-06, |
| "loss": 1.2104, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.4239356951473653, |
| "grad_norm": 0.04894978259042282, |
| "learning_rate": 9.008584081038154e-06, |
| "loss": 1.1594, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.4246161697784204, |
| "grad_norm": 0.04426277481058919, |
| "learning_rate": 9.005353161471075e-06, |
| "loss": 1.2169, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.4252966444094756, |
| "grad_norm": 0.043070948426717666, |
| "learning_rate": 9.002117567225864e-06, |
| "loss": 1.1211, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.42597711904053076, |
| "grad_norm": 0.05303928667483388, |
| "learning_rate": 8.998877302078803e-06, |
| "loss": 1.149, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.42665759367158596, |
| "grad_norm": 0.04516784837517743, |
| "learning_rate": 8.995632369811637e-06, |
| "loss": 1.1671, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.4273380683026411, |
| "grad_norm": 0.04548405871251726, |
| "learning_rate": 8.992382774211546e-06, |
| "loss": 1.1877, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.42801854293369623, |
| "grad_norm": 0.04781155444112428, |
| "learning_rate": 8.98912851907116e-06, |
| "loss": 1.1783, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.42869901756475143, |
| "grad_norm": 0.052231441639144574, |
| "learning_rate": 8.985869608188545e-06, |
| "loss": 1.1696, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.42937949219580657, |
| "grad_norm": 0.0431022135401387, |
| "learning_rate": 8.982606045367197e-06, |
| "loss": 1.1694, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.43005996682686176, |
| "grad_norm": 0.04575883042207237, |
| "learning_rate": 8.97933783441605e-06, |
| "loss": 1.1323, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.4307404414579169, |
| "grad_norm": 0.04896257001899479, |
| "learning_rate": 8.976064979149455e-06, |
| "loss": 1.1249, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.43142091608897204, |
| "grad_norm": 0.04389504944422267, |
| "learning_rate": 8.97278748338719e-06, |
| "loss": 1.159, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.43210139072002723, |
| "grad_norm": 0.05622702110796912, |
| "learning_rate": 8.969505350954437e-06, |
| "loss": 1.2003, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.4327818653510824, |
| "grad_norm": 0.04040018206262394, |
| "learning_rate": 8.966218585681807e-06, |
| "loss": 1.1349, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.43346233998213757, |
| "grad_norm": 0.047477365580878915, |
| "learning_rate": 8.962927191405303e-06, |
| "loss": 1.2024, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.4341428146131927, |
| "grad_norm": 0.048555827930069, |
| "learning_rate": 8.95963117196634e-06, |
| "loss": 1.2557, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.43482328924424785, |
| "grad_norm": 0.054659375585268836, |
| "learning_rate": 8.956330531211722e-06, |
| "loss": 1.188, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.43550376387530304, |
| "grad_norm": 0.040645473042345025, |
| "learning_rate": 8.953025272993658e-06, |
| "loss": 1.1376, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4361842385063582, |
| "grad_norm": 0.046111292563565255, |
| "learning_rate": 8.949715401169736e-06, |
| "loss": 1.2019, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.4368647131374133, |
| "grad_norm": 0.054079384073597254, |
| "learning_rate": 8.946400919602933e-06, |
| "loss": 1.0977, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.4375451877684685, |
| "grad_norm": 0.05731924786742128, |
| "learning_rate": 8.943081832161609e-06, |
| "loss": 1.1544, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.43822566239952365, |
| "grad_norm": 0.05630874065668251, |
| "learning_rate": 8.939758142719492e-06, |
| "loss": 1.1918, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.43890613703057885, |
| "grad_norm": 0.03816990143767904, |
| "learning_rate": 8.936429855155689e-06, |
| "loss": 1.2012, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.439586611661634, |
| "grad_norm": 0.04400978621701978, |
| "learning_rate": 8.933096973354665e-06, |
| "loss": 1.1453, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.4402670862926891, |
| "grad_norm": 0.04665880271204441, |
| "learning_rate": 8.929759501206256e-06, |
| "loss": 1.1265, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.4409475609237443, |
| "grad_norm": 0.043030911029214435, |
| "learning_rate": 8.926417442605648e-06, |
| "loss": 1.135, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.44162803555479946, |
| "grad_norm": 0.04123644734185817, |
| "learning_rate": 8.923070801453387e-06, |
| "loss": 1.2076, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.44230851018585465, |
| "grad_norm": 0.04506167142447191, |
| "learning_rate": 8.919719581655357e-06, |
| "loss": 1.1844, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4429889848169098, |
| "grad_norm": 0.046921451751882365, |
| "learning_rate": 8.916363787122799e-06, |
| "loss": 1.1617, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.44366945944796493, |
| "grad_norm": 0.04190142210943615, |
| "learning_rate": 8.913003421772281e-06, |
| "loss": 1.1999, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.4443499340790201, |
| "grad_norm": 0.04257383871526422, |
| "learning_rate": 8.909638489525716e-06, |
| "loss": 1.1247, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.44503040871007526, |
| "grad_norm": 0.04284067496617002, |
| "learning_rate": 8.906268994310339e-06, |
| "loss": 1.126, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.44571088334113046, |
| "grad_norm": 0.04146605514930847, |
| "learning_rate": 8.902894940058711e-06, |
| "loss": 1.147, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.4463913579721856, |
| "grad_norm": 0.039662388499429706, |
| "learning_rate": 8.89951633070872e-06, |
| "loss": 1.1941, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.44707183260324074, |
| "grad_norm": 0.04418598970569444, |
| "learning_rate": 8.896133170203568e-06, |
| "loss": 1.2457, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.44775230723429593, |
| "grad_norm": 0.10906792539732048, |
| "learning_rate": 8.892745462491763e-06, |
| "loss": 1.1104, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.44843278186535107, |
| "grad_norm": 0.0424440730081983, |
| "learning_rate": 8.889353211527127e-06, |
| "loss": 1.2128, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.44911325649640627, |
| "grad_norm": 0.03940467016530127, |
| "learning_rate": 8.88595642126878e-06, |
| "loss": 1.1276, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4497937311274614, |
| "grad_norm": 0.04242304735361312, |
| "learning_rate": 8.882555095681146e-06, |
| "loss": 1.153, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.45047420575851654, |
| "grad_norm": 0.045990540093621315, |
| "learning_rate": 8.879149238733932e-06, |
| "loss": 1.1676, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.45115468038957174, |
| "grad_norm": 0.038752849130074755, |
| "learning_rate": 8.875738854402145e-06, |
| "loss": 1.1658, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.4518351550206269, |
| "grad_norm": 0.049091711940893935, |
| "learning_rate": 8.872323946666068e-06, |
| "loss": 1.2261, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.45251562965168207, |
| "grad_norm": 0.04436510007180478, |
| "learning_rate": 8.868904519511265e-06, |
| "loss": 1.1589, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.4531961042827372, |
| "grad_norm": 0.04529044535448883, |
| "learning_rate": 8.865480576928578e-06, |
| "loss": 1.1663, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.45387657891379235, |
| "grad_norm": 0.04350982910961256, |
| "learning_rate": 8.862052122914113e-06, |
| "loss": 1.0842, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.45455705354484754, |
| "grad_norm": 0.04415118922290587, |
| "learning_rate": 8.858619161469246e-06, |
| "loss": 1.1682, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.4552375281759027, |
| "grad_norm": 0.04102662678874476, |
| "learning_rate": 8.855181696600615e-06, |
| "loss": 1.225, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.4559180028069579, |
| "grad_norm": 0.03978611381793182, |
| "learning_rate": 8.851739732320109e-06, |
| "loss": 1.2356, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.456598477438013, |
| "grad_norm": 0.03808971840706306, |
| "learning_rate": 8.84829327264487e-06, |
| "loss": 1.1477, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.45727895206906816, |
| "grad_norm": 0.04093130739375452, |
| "learning_rate": 8.844842321597289e-06, |
| "loss": 1.1476, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.45795942670012335, |
| "grad_norm": 0.0397782234125982, |
| "learning_rate": 8.841386883204996e-06, |
| "loss": 1.1601, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.4586399013311785, |
| "grad_norm": 0.04360741298869491, |
| "learning_rate": 8.83792696150086e-06, |
| "loss": 1.1521, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.4593203759622337, |
| "grad_norm": 0.03973179592791577, |
| "learning_rate": 8.834462560522983e-06, |
| "loss": 1.1552, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.4600008505932888, |
| "grad_norm": 0.0537116714743348, |
| "learning_rate": 8.83099368431469e-06, |
| "loss": 1.1733, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.46068132522434396, |
| "grad_norm": 0.04194800221614982, |
| "learning_rate": 8.827520336924539e-06, |
| "loss": 1.1144, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.46136179985539916, |
| "grad_norm": 0.03962763960599941, |
| "learning_rate": 8.824042522406295e-06, |
| "loss": 1.1887, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.4620422744864543, |
| "grad_norm": 0.05396619916684024, |
| "learning_rate": 8.820560244818943e-06, |
| "loss": 1.186, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.46272274911750944, |
| "grad_norm": 0.03922059133562145, |
| "learning_rate": 8.817073508226677e-06, |
| "loss": 1.1002, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.46340322374856463, |
| "grad_norm": 0.04739443119639226, |
| "learning_rate": 8.813582316698892e-06, |
| "loss": 1.2155, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.46408369837961977, |
| "grad_norm": 0.04637500866593254, |
| "learning_rate": 8.810086674310184e-06, |
| "loss": 1.1297, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.46476417301067496, |
| "grad_norm": 0.04244486346065373, |
| "learning_rate": 8.806586585140346e-06, |
| "loss": 1.1765, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.4654446476417301, |
| "grad_norm": 0.04328360465585307, |
| "learning_rate": 8.803082053274357e-06, |
| "loss": 1.2268, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.46612512227278524, |
| "grad_norm": 0.05890001367764087, |
| "learning_rate": 8.799573082802384e-06, |
| "loss": 1.1906, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.46680559690384044, |
| "grad_norm": 0.041636831966172676, |
| "learning_rate": 8.796059677819773e-06, |
| "loss": 1.1916, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.4674860715348956, |
| "grad_norm": 0.045474654484024504, |
| "learning_rate": 8.792541842427043e-06, |
| "loss": 1.1641, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.46816654616595077, |
| "grad_norm": 0.04576863707481716, |
| "learning_rate": 8.789019580729889e-06, |
| "loss": 1.1878, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.4688470207970059, |
| "grad_norm": 0.06478144940333726, |
| "learning_rate": 8.78549289683917e-06, |
| "loss": 1.2141, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.46952749542806105, |
| "grad_norm": 0.043456374490342724, |
| "learning_rate": 8.781961794870903e-06, |
| "loss": 1.2166, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.47020797005911624, |
| "grad_norm": 0.049269380160544426, |
| "learning_rate": 8.778426278946266e-06, |
| "loss": 1.1901, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.4708884446901714, |
| "grad_norm": 0.044225703845258736, |
| "learning_rate": 8.774886353191587e-06, |
| "loss": 1.1272, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.4715689193212266, |
| "grad_norm": 0.05178632319183577, |
| "learning_rate": 8.771342021738338e-06, |
| "loss": 1.165, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.4722493939522817, |
| "grad_norm": 0.04521221449695577, |
| "learning_rate": 8.767793288723137e-06, |
| "loss": 1.1625, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.47292986858333685, |
| "grad_norm": 0.04707321365536953, |
| "learning_rate": 8.764240158287738e-06, |
| "loss": 1.2437, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.47361034321439205, |
| "grad_norm": 0.07074717036076185, |
| "learning_rate": 8.760682634579023e-06, |
| "loss": 1.1775, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.4742908178454472, |
| "grad_norm": 0.040897170854665654, |
| "learning_rate": 8.757120721749008e-06, |
| "loss": 1.1933, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.4749712924765024, |
| "grad_norm": 0.048850521888701544, |
| "learning_rate": 8.753554423954828e-06, |
| "loss": 1.1267, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.4756517671075575, |
| "grad_norm": 0.06210572694991292, |
| "learning_rate": 8.749983745358737e-06, |
| "loss": 1.1855, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.47633224173861266, |
| "grad_norm": 0.048857526652923464, |
| "learning_rate": 8.746408690128098e-06, |
| "loss": 1.1665, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.47701271636966786, |
| "grad_norm": 0.0414540516695226, |
| "learning_rate": 8.74282926243539e-06, |
| "loss": 1.1863, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.477693191000723, |
| "grad_norm": 0.055584695327975414, |
| "learning_rate": 8.739245466458187e-06, |
| "loss": 1.1967, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.4783736656317782, |
| "grad_norm": 0.04030451886055771, |
| "learning_rate": 8.735657306379163e-06, |
| "loss": 1.2083, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.47905414026283333, |
| "grad_norm": 0.04719451383173251, |
| "learning_rate": 8.73206478638609e-06, |
| "loss": 1.1757, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.47973461489388847, |
| "grad_norm": 0.04763825535086207, |
| "learning_rate": 8.728467910671824e-06, |
| "loss": 1.1618, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.48041508952494366, |
| "grad_norm": 0.044435320565230585, |
| "learning_rate": 8.72486668343431e-06, |
| "loss": 1.1431, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.4810955641559988, |
| "grad_norm": 0.040290417518756755, |
| "learning_rate": 8.72126110887656e-06, |
| "loss": 1.209, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.481776038787054, |
| "grad_norm": 0.04438079005488205, |
| "learning_rate": 8.717651191206675e-06, |
| "loss": 1.1499, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.48245651341810913, |
| "grad_norm": 0.04056501410888882, |
| "learning_rate": 8.714036934637811e-06, |
| "loss": 1.1974, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.4831369880491643, |
| "grad_norm": 0.04206798497439385, |
| "learning_rate": 8.7104183433882e-06, |
| "loss": 1.2026, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.48381746268021947, |
| "grad_norm": 0.05388403958303696, |
| "learning_rate": 8.706795421681123e-06, |
| "loss": 1.1419, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.4844979373112746, |
| "grad_norm": 0.05393628641759076, |
| "learning_rate": 8.703168173744922e-06, |
| "loss": 1.1001, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.4851784119423298, |
| "grad_norm": 0.038014835242398314, |
| "learning_rate": 8.699536603812985e-06, |
| "loss": 1.1714, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.48585888657338494, |
| "grad_norm": 0.04159366635634062, |
| "learning_rate": 8.695900716123744e-06, |
| "loss": 1.1331, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.4865393612044401, |
| "grad_norm": 0.04599652383543539, |
| "learning_rate": 8.692260514920673e-06, |
| "loss": 1.1626, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.4872198358354953, |
| "grad_norm": 0.04701383530546749, |
| "learning_rate": 8.688616004452277e-06, |
| "loss": 1.1981, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.4879003104665504, |
| "grad_norm": 0.04314451118569952, |
| "learning_rate": 8.684967188972092e-06, |
| "loss": 1.2204, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.48858078509760555, |
| "grad_norm": 0.04650286004160289, |
| "learning_rate": 8.681314072738678e-06, |
| "loss": 1.1316, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.48926125972866075, |
| "grad_norm": 0.051839407512247186, |
| "learning_rate": 8.677656660015616e-06, |
| "loss": 1.1252, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.4899417343597159, |
| "grad_norm": 0.046793968679010596, |
| "learning_rate": 8.6739949550715e-06, |
| "loss": 1.1836, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4906222089907711, |
| "grad_norm": 0.05835913550371476, |
| "learning_rate": 8.670328962179933e-06, |
| "loss": 1.1668, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.4913026836218262, |
| "grad_norm": 0.05277610249888075, |
| "learning_rate": 8.666658685619523e-06, |
| "loss": 1.1915, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.49198315825288136, |
| "grad_norm": 0.0439462908397666, |
| "learning_rate": 8.662984129673878e-06, |
| "loss": 1.2047, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.49266363288393655, |
| "grad_norm": 0.04834786737069348, |
| "learning_rate": 8.6593052986316e-06, |
| "loss": 1.1874, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.4933441075149917, |
| "grad_norm": 0.04629847715145555, |
| "learning_rate": 8.655622196786281e-06, |
| "loss": 1.2091, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.4940245821460469, |
| "grad_norm": 0.05364431842807492, |
| "learning_rate": 8.651934828436497e-06, |
| "loss": 1.137, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.494705056777102, |
| "grad_norm": 0.044196454811106574, |
| "learning_rate": 8.648243197885805e-06, |
| "loss": 1.1291, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.49538553140815716, |
| "grad_norm": 0.05481533638606025, |
| "learning_rate": 8.644547309442734e-06, |
| "loss": 1.2078, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.49606600603921236, |
| "grad_norm": 0.03746499887219231, |
| "learning_rate": 8.640847167420782e-06, |
| "loss": 1.1583, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.4967464806702675, |
| "grad_norm": 0.04187639740119247, |
| "learning_rate": 8.637142776138415e-06, |
| "loss": 1.173, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.4974269553013227, |
| "grad_norm": 0.03998133984926249, |
| "learning_rate": 8.633434139919054e-06, |
| "loss": 1.1314, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.49810742993237783, |
| "grad_norm": 0.04252691510243095, |
| "learning_rate": 8.62972126309108e-06, |
| "loss": 1.1895, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.49878790456343297, |
| "grad_norm": 0.04184153795045174, |
| "learning_rate": 8.626004149987816e-06, |
| "loss": 1.1987, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.49946837919448817, |
| "grad_norm": 0.041100921428193674, |
| "learning_rate": 8.622282804947537e-06, |
| "loss": 1.1745, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.5001488538255433, |
| "grad_norm": 0.04508926878431586, |
| "learning_rate": 8.61855723231345e-06, |
| "loss": 1.1887, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.5008293284565984, |
| "grad_norm": 0.04409577879983121, |
| "learning_rate": 8.614827436433699e-06, |
| "loss": 1.1979, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.5015098030876537, |
| "grad_norm": 0.03695764179065035, |
| "learning_rate": 8.611093421661358e-06, |
| "loss": 1.1751, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.5021902777187088, |
| "grad_norm": 0.053081097299524024, |
| "learning_rate": 8.607355192354425e-06, |
| "loss": 1.1036, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.502870752349764, |
| "grad_norm": 0.04241717636125699, |
| "learning_rate": 8.603612752875816e-06, |
| "loss": 1.1677, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.5035512269808191, |
| "grad_norm": 0.03994841133844269, |
| "learning_rate": 8.599866107593358e-06, |
| "loss": 1.1408, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5042317016118743, |
| "grad_norm": 0.04832878049821652, |
| "learning_rate": 8.596115260879792e-06, |
| "loss": 1.1854, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.5049121762429294, |
| "grad_norm": 0.043935479378240015, |
| "learning_rate": 8.592360217112759e-06, |
| "loss": 1.1735, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.5055926508739846, |
| "grad_norm": 0.03876754371980919, |
| "learning_rate": 8.588600980674796e-06, |
| "loss": 1.1186, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.5062731255050398, |
| "grad_norm": 0.04326584981845795, |
| "learning_rate": 8.584837555953342e-06, |
| "loss": 1.1096, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.5069536001360949, |
| "grad_norm": 0.04322087234927761, |
| "learning_rate": 8.581069947340715e-06, |
| "loss": 1.2033, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.5076340747671501, |
| "grad_norm": 0.05173559138768165, |
| "learning_rate": 8.57729815923412e-06, |
| "loss": 1.1938, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.5083145493982052, |
| "grad_norm": 0.06449777733963566, |
| "learning_rate": 8.57352219603564e-06, |
| "loss": 1.1486, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.5089950240292604, |
| "grad_norm": 0.04675747699296703, |
| "learning_rate": 8.569742062152229e-06, |
| "loss": 1.1599, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.5096754986603156, |
| "grad_norm": 0.03849730253733784, |
| "learning_rate": 8.565957761995713e-06, |
| "loss": 1.1435, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.5103559732913707, |
| "grad_norm": 0.03856499927206122, |
| "learning_rate": 8.562169299982776e-06, |
| "loss": 1.1163, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5110364479224259, |
| "grad_norm": 0.03896826300900588, |
| "learning_rate": 8.558376680534959e-06, |
| "loss": 1.1434, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.511716922553481, |
| "grad_norm": 0.04122931215472077, |
| "learning_rate": 8.55457990807866e-06, |
| "loss": 1.1932, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.5123973971845363, |
| "grad_norm": 0.05047069333478561, |
| "learning_rate": 8.55077898704512e-06, |
| "loss": 1.2146, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.5130778718155914, |
| "grad_norm": 0.06501739740811575, |
| "learning_rate": 8.546973921870421e-06, |
| "loss": 1.1243, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.5137583464466465, |
| "grad_norm": 0.04916876628412371, |
| "learning_rate": 8.543164716995485e-06, |
| "loss": 1.1483, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.5144388210777017, |
| "grad_norm": 0.04363604007516074, |
| "learning_rate": 8.539351376866066e-06, |
| "loss": 1.1375, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.5151192957087568, |
| "grad_norm": 0.04021328761311774, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 1.191, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.5157997703398121, |
| "grad_norm": 0.05141232392473441, |
| "learning_rate": 8.531712308650904e-06, |
| "loss": 1.2164, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.5164802449708672, |
| "grad_norm": 0.045271342705341014, |
| "learning_rate": 8.527886589480779e-06, |
| "loss": 1.168, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.5171607196019223, |
| "grad_norm": 0.03874295659270498, |
| "learning_rate": 8.524056752887385e-06, |
| "loss": 1.199, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5178411942329775, |
| "grad_norm": 0.05126557194804418, |
| "learning_rate": 8.520222803340557e-06, |
| "loss": 1.1758, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.5185216688640326, |
| "grad_norm": 0.05042131118814898, |
| "learning_rate": 8.516384745314926e-06, |
| "loss": 1.2066, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.5192021434950879, |
| "grad_norm": 0.045232732616894734, |
| "learning_rate": 8.512542583289918e-06, |
| "loss": 1.2174, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.519882618126143, |
| "grad_norm": 0.04619546370697277, |
| "learning_rate": 8.508696321749752e-06, |
| "loss": 1.0822, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.5205630927571981, |
| "grad_norm": 0.04503407035517795, |
| "learning_rate": 8.504845965183425e-06, |
| "loss": 1.1598, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.5212435673882533, |
| "grad_norm": 0.046533137015248104, |
| "learning_rate": 8.50099151808472e-06, |
| "loss": 1.1587, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.5219240420193084, |
| "grad_norm": 0.04467925059354398, |
| "learning_rate": 8.497132984952193e-06, |
| "loss": 1.1864, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.5226045166503637, |
| "grad_norm": 0.07178437484801423, |
| "learning_rate": 8.493270370289164e-06, |
| "loss": 1.1798, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.5232849912814188, |
| "grad_norm": 0.043296657654657934, |
| "learning_rate": 8.489403678603722e-06, |
| "loss": 1.155, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.523965465912474, |
| "grad_norm": 0.04222421990019537, |
| "learning_rate": 8.485532914408712e-06, |
| "loss": 1.1231, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5246459405435291, |
| "grad_norm": 0.1841013854808659, |
| "learning_rate": 8.481658082221731e-06, |
| "loss": 1.1786, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.5253264151745842, |
| "grad_norm": 0.04266946865706051, |
| "learning_rate": 8.477779186565125e-06, |
| "loss": 1.1543, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.5260068898056395, |
| "grad_norm": 0.04197901267584969, |
| "learning_rate": 8.473896231965986e-06, |
| "loss": 1.1929, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.5266873644366946, |
| "grad_norm": 0.05041076462281314, |
| "learning_rate": 8.470009222956138e-06, |
| "loss": 1.1678, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.5273678390677498, |
| "grad_norm": 0.041756207467147366, |
| "learning_rate": 8.466118164072136e-06, |
| "loss": 1.1429, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.5280483136988049, |
| "grad_norm": 0.04053005066881133, |
| "learning_rate": 8.462223059855268e-06, |
| "loss": 1.173, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.52872878832986, |
| "grad_norm": 0.04058960808847781, |
| "learning_rate": 8.458323914851538e-06, |
| "loss": 1.1985, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.5294092629609153, |
| "grad_norm": 0.07665799540243598, |
| "learning_rate": 8.45442073361167e-06, |
| "loss": 1.1475, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.5300897375919704, |
| "grad_norm": 0.041452895159887655, |
| "learning_rate": 8.450513520691092e-06, |
| "loss": 1.1557, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.5307702122230256, |
| "grad_norm": 0.05551330076968528, |
| "learning_rate": 8.446602280649947e-06, |
| "loss": 1.1192, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5314506868540807, |
| "grad_norm": 0.05125405070156681, |
| "learning_rate": 8.442687018053071e-06, |
| "loss": 1.1216, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.5321311614851358, |
| "grad_norm": 0.050472767925999515, |
| "learning_rate": 8.438767737469995e-06, |
| "loss": 1.1259, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.5328116361161911, |
| "grad_norm": 0.046872210857747565, |
| "learning_rate": 8.434844443474943e-06, |
| "loss": 1.1789, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.5334921107472462, |
| "grad_norm": 0.061293610059609004, |
| "learning_rate": 8.430917140646821e-06, |
| "loss": 1.1632, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.5341725853783014, |
| "grad_norm": 0.03912377936631386, |
| "learning_rate": 8.426985833569214e-06, |
| "loss": 1.2133, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.5348530600093565, |
| "grad_norm": 0.04117780131854185, |
| "learning_rate": 8.42305052683038e-06, |
| "loss": 1.2032, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.5355335346404116, |
| "grad_norm": 0.04132479790723434, |
| "learning_rate": 8.419111225023246e-06, |
| "loss": 1.138, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.5362140092714669, |
| "grad_norm": 0.03842549922082187, |
| "learning_rate": 8.4151679327454e-06, |
| "loss": 1.1066, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.536894483902522, |
| "grad_norm": 0.04080954335709224, |
| "learning_rate": 8.411220654599091e-06, |
| "loss": 1.1718, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.5375749585335772, |
| "grad_norm": 0.043858387197321595, |
| "learning_rate": 8.407269395191216e-06, |
| "loss": 1.1219, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5382554331646323, |
| "grad_norm": 0.040515133708555966, |
| "learning_rate": 8.403314159133318e-06, |
| "loss": 1.2029, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.5389359077956875, |
| "grad_norm": 0.0390219527076627, |
| "learning_rate": 8.399354951041584e-06, |
| "loss": 1.1463, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.5396163824267427, |
| "grad_norm": 0.04574993504539064, |
| "learning_rate": 8.395391775536836e-06, |
| "loss": 1.173, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.5402968570577978, |
| "grad_norm": 0.04294242766768708, |
| "learning_rate": 8.391424637244528e-06, |
| "loss": 1.1496, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.540977331688853, |
| "grad_norm": 0.053654917019259465, |
| "learning_rate": 8.387453540794736e-06, |
| "loss": 1.1548, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.5416578063199081, |
| "grad_norm": 0.04494280593223373, |
| "learning_rate": 8.383478490822157e-06, |
| "loss": 1.1672, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.5423382809509633, |
| "grad_norm": 0.04793116519219443, |
| "learning_rate": 8.379499491966101e-06, |
| "loss": 1.1856, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.5430187555820185, |
| "grad_norm": 0.04378062326698956, |
| "learning_rate": 8.375516548870489e-06, |
| "loss": 1.1574, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.5436992302130736, |
| "grad_norm": 0.04142629682629962, |
| "learning_rate": 8.371529666183844e-06, |
| "loss": 1.2067, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.5443797048441288, |
| "grad_norm": 0.04170169929693674, |
| "learning_rate": 8.367538848559287e-06, |
| "loss": 1.1378, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5450601794751839, |
| "grad_norm": 0.03970775100468027, |
| "learning_rate": 8.36354410065453e-06, |
| "loss": 1.0798, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.5457406541062391, |
| "grad_norm": 0.04648578513446181, |
| "learning_rate": 8.359545427131876e-06, |
| "loss": 1.1443, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.5464211287372943, |
| "grad_norm": 0.03858386824567043, |
| "learning_rate": 8.355542832658208e-06, |
| "loss": 1.0972, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.5471016033683495, |
| "grad_norm": 0.04975907375302422, |
| "learning_rate": 8.351536321904983e-06, |
| "loss": 1.1998, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.5477820779994046, |
| "grad_norm": 0.04287409473941564, |
| "learning_rate": 8.347525899548227e-06, |
| "loss": 1.1532, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.5484625526304597, |
| "grad_norm": 0.04837832904766864, |
| "learning_rate": 8.343511570268541e-06, |
| "loss": 1.1088, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.5491430272615149, |
| "grad_norm": 0.040964713063851035, |
| "learning_rate": 8.339493338751074e-06, |
| "loss": 1.1229, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.5498235018925701, |
| "grad_norm": 0.04588890740643771, |
| "learning_rate": 8.335471209685538e-06, |
| "loss": 1.1096, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.5505039765236253, |
| "grad_norm": 0.03744543486694232, |
| "learning_rate": 8.331445187766187e-06, |
| "loss": 1.1837, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.5511844511546804, |
| "grad_norm": 0.05037176808744111, |
| "learning_rate": 8.327415277691824e-06, |
| "loss": 1.1767, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5518649257857355, |
| "grad_norm": 0.04432193944851549, |
| "learning_rate": 8.323381484165786e-06, |
| "loss": 1.1407, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.5525454004167907, |
| "grad_norm": 0.04201447987523638, |
| "learning_rate": 8.319343811895946e-06, |
| "loss": 1.1581, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.5532258750478459, |
| "grad_norm": 0.10203501157632323, |
| "learning_rate": 8.315302265594703e-06, |
| "loss": 1.1178, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.5539063496789011, |
| "grad_norm": 0.05339937261467158, |
| "learning_rate": 8.311256849978974e-06, |
| "loss": 1.163, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.5545868243099562, |
| "grad_norm": 0.044099665331117054, |
| "learning_rate": 8.307207569770193e-06, |
| "loss": 1.1013, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.5552672989410113, |
| "grad_norm": 0.03931050767646318, |
| "learning_rate": 8.303154429694311e-06, |
| "loss": 1.123, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.5559477735720665, |
| "grad_norm": 0.05335957788738985, |
| "learning_rate": 8.299097434481773e-06, |
| "loss": 1.2092, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.5566282482031216, |
| "grad_norm": 0.0636990809879659, |
| "learning_rate": 8.295036588867533e-06, |
| "loss": 1.1319, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.5573087228341769, |
| "grad_norm": 0.040551829004693854, |
| "learning_rate": 8.290971897591034e-06, |
| "loss": 1.136, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.557989197465232, |
| "grad_norm": 0.03780136567377228, |
| "learning_rate": 8.286903365396205e-06, |
| "loss": 1.1784, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5586696720962872, |
| "grad_norm": 0.060039887239810956, |
| "learning_rate": 8.282830997031464e-06, |
| "loss": 1.1906, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.5593501467273423, |
| "grad_norm": 0.04217363382120905, |
| "learning_rate": 8.278754797249702e-06, |
| "loss": 1.1789, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.5600306213583974, |
| "grad_norm": 0.040100264548723535, |
| "learning_rate": 8.274674770808282e-06, |
| "loss": 1.1957, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.5607110959894527, |
| "grad_norm": 0.04773398339267, |
| "learning_rate": 8.270590922469037e-06, |
| "loss": 1.1811, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.5613915706205078, |
| "grad_norm": 0.045457042631204134, |
| "learning_rate": 8.266503256998256e-06, |
| "loss": 1.1744, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.562072045251563, |
| "grad_norm": 0.04476890640827831, |
| "learning_rate": 8.262411779166681e-06, |
| "loss": 1.1706, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.5627525198826181, |
| "grad_norm": 0.03782510037969898, |
| "learning_rate": 8.25831649374951e-06, |
| "loss": 1.1531, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.5634329945136732, |
| "grad_norm": 0.043194762074185, |
| "learning_rate": 8.254217405526383e-06, |
| "loss": 1.1873, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.5641134691447285, |
| "grad_norm": 0.04524945915938302, |
| "learning_rate": 8.250114519281374e-06, |
| "loss": 1.1377, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.5647939437757836, |
| "grad_norm": 0.04165386202173816, |
| "learning_rate": 8.246007839802997e-06, |
| "loss": 1.1915, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5654744184068388, |
| "grad_norm": 0.050892456215113235, |
| "learning_rate": 8.241897371884183e-06, |
| "loss": 1.1828, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.5661548930378939, |
| "grad_norm": 0.041485798471332706, |
| "learning_rate": 8.237783120322293e-06, |
| "loss": 1.1548, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.566835367668949, |
| "grad_norm": 0.039661123365582446, |
| "learning_rate": 8.233665089919105e-06, |
| "loss": 1.1862, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.5675158423000043, |
| "grad_norm": 0.04154176571972414, |
| "learning_rate": 8.229543285480797e-06, |
| "loss": 1.1389, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.5681963169310594, |
| "grad_norm": 0.0424519182968462, |
| "learning_rate": 8.225417711817965e-06, |
| "loss": 1.1691, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.5688767915621146, |
| "grad_norm": 0.042775332017904656, |
| "learning_rate": 8.221288373745591e-06, |
| "loss": 1.1646, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.5695572661931697, |
| "grad_norm": 0.04110139091161163, |
| "learning_rate": 8.217155276083059e-06, |
| "loss": 1.1696, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.5702377408242248, |
| "grad_norm": 0.04488592848228418, |
| "learning_rate": 8.213018423654144e-06, |
| "loss": 1.179, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.5709182154552801, |
| "grad_norm": 0.04747192432701841, |
| "learning_rate": 8.20887782128699e-06, |
| "loss": 1.1498, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.5715986900863352, |
| "grad_norm": 0.04711201935733248, |
| "learning_rate": 8.20473347381413e-06, |
| "loss": 1.1074, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5722791647173904, |
| "grad_norm": 0.044368574866158736, |
| "learning_rate": 8.200585386072464e-06, |
| "loss": 1.115, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.5729596393484455, |
| "grad_norm": 0.04243245457403774, |
| "learning_rate": 8.196433562903252e-06, |
| "loss": 1.2098, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.5736401139795007, |
| "grad_norm": 0.0411309807077596, |
| "learning_rate": 8.192278009152124e-06, |
| "loss": 1.1829, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.5743205886105559, |
| "grad_norm": 0.04434792075597989, |
| "learning_rate": 8.188118729669054e-06, |
| "loss": 1.1559, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.575001063241611, |
| "grad_norm": 0.04531013671919297, |
| "learning_rate": 8.183955729308373e-06, |
| "loss": 1.1544, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.5756815378726662, |
| "grad_norm": 0.04415644124258442, |
| "learning_rate": 8.179789012928747e-06, |
| "loss": 1.1403, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.5763620125037213, |
| "grad_norm": 0.041849175257147715, |
| "learning_rate": 8.175618585393183e-06, |
| "loss": 1.1735, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.5770424871347765, |
| "grad_norm": 0.050924581330910657, |
| "learning_rate": 8.171444451569019e-06, |
| "loss": 1.1479, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.5777229617658317, |
| "grad_norm": 0.03974823826416723, |
| "learning_rate": 8.167266616327921e-06, |
| "loss": 1.17, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.5784034363968868, |
| "grad_norm": 0.042109166474074936, |
| "learning_rate": 8.163085084545867e-06, |
| "loss": 1.1912, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.579083911027942, |
| "grad_norm": 0.043426415287164635, |
| "learning_rate": 8.158899861103159e-06, |
| "loss": 1.1243, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.5797643856589971, |
| "grad_norm": 0.04306964323847212, |
| "learning_rate": 8.1547109508844e-06, |
| "loss": 1.1525, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.5804448602900523, |
| "grad_norm": 0.04274847934636146, |
| "learning_rate": 8.150518358778501e-06, |
| "loss": 1.1823, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.5811253349211075, |
| "grad_norm": 0.043729941677878165, |
| "learning_rate": 8.146322089678668e-06, |
| "loss": 1.2141, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.5818058095521627, |
| "grad_norm": 0.04315912213214879, |
| "learning_rate": 8.142122148482397e-06, |
| "loss": 1.1823, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.5824862841832178, |
| "grad_norm": 0.04231782214011899, |
| "learning_rate": 8.137918540091473e-06, |
| "loss": 1.2023, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.5831667588142729, |
| "grad_norm": 0.0475718942693006, |
| "learning_rate": 8.13371126941196e-06, |
| "loss": 1.2363, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.5838472334453281, |
| "grad_norm": 0.04626266969743917, |
| "learning_rate": 8.129500341354192e-06, |
| "loss": 1.1519, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.5845277080763833, |
| "grad_norm": 0.038413332073930426, |
| "learning_rate": 8.125285760832778e-06, |
| "loss": 1.1381, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.5852081827074385, |
| "grad_norm": 0.04042454155496313, |
| "learning_rate": 8.121067532766587e-06, |
| "loss": 1.1217, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5858886573384936, |
| "grad_norm": 0.04085148523310831, |
| "learning_rate": 8.116845662078744e-06, |
| "loss": 1.1591, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.5865691319695487, |
| "grad_norm": 0.04377818146863928, |
| "learning_rate": 8.11262015369663e-06, |
| "loss": 1.2255, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.5872496066006039, |
| "grad_norm": 0.039014134138926265, |
| "learning_rate": 8.10839101255186e-06, |
| "loss": 1.2082, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.5879300812316591, |
| "grad_norm": 0.04200666559613162, |
| "learning_rate": 8.104158243580305e-06, |
| "loss": 1.112, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.5886105558627143, |
| "grad_norm": 0.03911561242988071, |
| "learning_rate": 8.099921851722057e-06, |
| "loss": 1.1382, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.5892910304937694, |
| "grad_norm": 0.04045230119388504, |
| "learning_rate": 8.095681841921441e-06, |
| "loss": 1.1266, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.5899715051248245, |
| "grad_norm": 0.040116238529695106, |
| "learning_rate": 8.09143821912701e-06, |
| "loss": 1.1853, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.5906519797558797, |
| "grad_norm": 0.04062447640292501, |
| "learning_rate": 8.087190988291523e-06, |
| "loss": 1.14, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.5913324543869349, |
| "grad_norm": 0.04283812178287952, |
| "learning_rate": 8.082940154371956e-06, |
| "loss": 1.1273, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.5920129290179901, |
| "grad_norm": 0.05571713855883823, |
| "learning_rate": 8.07868572232949e-06, |
| "loss": 1.049, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5926934036490452, |
| "grad_norm": 0.04016667844328468, |
| "learning_rate": 8.07442769712951e-06, |
| "loss": 1.1482, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.5933738782801004, |
| "grad_norm": 0.038282189867659924, |
| "learning_rate": 8.070166083741583e-06, |
| "loss": 1.0814, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.5940543529111555, |
| "grad_norm": 0.04240818562157267, |
| "learning_rate": 8.065900887139473e-06, |
| "loss": 1.1789, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.5947348275422107, |
| "grad_norm": 0.03941580524487074, |
| "learning_rate": 8.061632112301122e-06, |
| "loss": 1.1855, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.5954153021732659, |
| "grad_norm": 0.04368634784808724, |
| "learning_rate": 8.057359764208652e-06, |
| "loss": 1.1038, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.596095776804321, |
| "grad_norm": 0.04874993152125096, |
| "learning_rate": 8.053083847848351e-06, |
| "loss": 1.1881, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.5967762514353762, |
| "grad_norm": 0.04283185831118963, |
| "learning_rate": 8.048804368210675e-06, |
| "loss": 1.2021, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.5974567260664313, |
| "grad_norm": 0.04963061963040367, |
| "learning_rate": 8.044521330290235e-06, |
| "loss": 1.1638, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.5981372006974865, |
| "grad_norm": 0.04194658535985922, |
| "learning_rate": 8.040234739085799e-06, |
| "loss": 1.1806, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.5988176753285417, |
| "grad_norm": 0.045232655719741964, |
| "learning_rate": 8.03594459960028e-06, |
| "loss": 1.1432, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5994981499595968, |
| "grad_norm": 0.041615174460038984, |
| "learning_rate": 8.031650916840732e-06, |
| "loss": 1.1535, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.600178624590652, |
| "grad_norm": 0.04311597640305818, |
| "learning_rate": 8.027353695818345e-06, |
| "loss": 1.1866, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.6008590992217071, |
| "grad_norm": 0.04648275659267898, |
| "learning_rate": 8.023052941548437e-06, |
| "loss": 1.2174, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.6015395738527624, |
| "grad_norm": 0.041183174687582695, |
| "learning_rate": 8.018748659050456e-06, |
| "loss": 1.1922, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.6022200484838175, |
| "grad_norm": 0.042641768711080094, |
| "learning_rate": 8.014440853347956e-06, |
| "loss": 1.1547, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.6029005231148726, |
| "grad_norm": 0.041451603388394896, |
| "learning_rate": 8.010129529468614e-06, |
| "loss": 1.1317, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.6035809977459278, |
| "grad_norm": 0.041272961165301054, |
| "learning_rate": 8.005814692444205e-06, |
| "loss": 1.1355, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.6042614723769829, |
| "grad_norm": 0.04057221028324148, |
| "learning_rate": 8.001496347310614e-06, |
| "loss": 1.1873, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.6049419470080382, |
| "grad_norm": 0.04135206077100455, |
| "learning_rate": 7.99717449910781e-06, |
| "loss": 1.1646, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.6056224216390933, |
| "grad_norm": 0.040316079764168004, |
| "learning_rate": 7.992849152879857e-06, |
| "loss": 1.1466, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6063028962701484, |
| "grad_norm": 0.04092957611261813, |
| "learning_rate": 7.988520313674897e-06, |
| "loss": 1.151, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.6069833709012036, |
| "grad_norm": 0.039374332097630076, |
| "learning_rate": 7.984187986545154e-06, |
| "loss": 1.1883, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.6076638455322587, |
| "grad_norm": 0.03922720540085873, |
| "learning_rate": 7.97985217654692e-06, |
| "loss": 1.1423, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.6083443201633139, |
| "grad_norm": 0.09045429744114127, |
| "learning_rate": 7.97551288874055e-06, |
| "loss": 1.0891, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.6090247947943691, |
| "grad_norm": 0.04254293782715242, |
| "learning_rate": 7.97117012819046e-06, |
| "loss": 1.1897, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.6097052694254242, |
| "grad_norm": 0.03929737573713484, |
| "learning_rate": 7.96682389996512e-06, |
| "loss": 1.22, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.6103857440564794, |
| "grad_norm": 0.1188554224437093, |
| "learning_rate": 7.962474209137044e-06, |
| "loss": 1.0757, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.6110662186875345, |
| "grad_norm": 0.041241461473570426, |
| "learning_rate": 7.95812106078279e-06, |
| "loss": 1.0574, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.6117466933185897, |
| "grad_norm": 0.050560273673583955, |
| "learning_rate": 7.953764459982951e-06, |
| "loss": 1.1539, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.6124271679496449, |
| "grad_norm": 0.04401496162998291, |
| "learning_rate": 7.94940441182215e-06, |
| "loss": 1.2764, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6131076425807, |
| "grad_norm": 0.040619036348773584, |
| "learning_rate": 7.945040921389032e-06, |
| "loss": 1.1664, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.6137881172117552, |
| "grad_norm": 0.08684240225413613, |
| "learning_rate": 7.940673993776258e-06, |
| "loss": 1.1613, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.6144685918428103, |
| "grad_norm": 0.047950332726318455, |
| "learning_rate": 7.936303634080505e-06, |
| "loss": 1.1246, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.6151490664738655, |
| "grad_norm": 0.044138108104567156, |
| "learning_rate": 7.93192984740245e-06, |
| "loss": 1.1557, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.6158295411049207, |
| "grad_norm": 0.04205279272151933, |
| "learning_rate": 7.927552638846776e-06, |
| "loss": 1.1822, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.6165100157359759, |
| "grad_norm": 0.037151685883760985, |
| "learning_rate": 7.923172013522153e-06, |
| "loss": 1.1407, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.617190490367031, |
| "grad_norm": 0.05084646852036924, |
| "learning_rate": 7.918787976541246e-06, |
| "loss": 1.1158, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.6178709649980861, |
| "grad_norm": 0.03683258235559903, |
| "learning_rate": 7.914400533020695e-06, |
| "loss": 1.1817, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.6185514396291413, |
| "grad_norm": 0.03971414515196644, |
| "learning_rate": 7.91000968808112e-06, |
| "loss": 1.1357, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.6192319142601965, |
| "grad_norm": 0.0432587997178803, |
| "learning_rate": 7.905615446847107e-06, |
| "loss": 1.1191, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6199123888912517, |
| "grad_norm": 0.04881605419073854, |
| "learning_rate": 7.901217814447212e-06, |
| "loss": 1.2064, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.6205928635223068, |
| "grad_norm": 0.03993595120249041, |
| "learning_rate": 7.896816796013943e-06, |
| "loss": 1.1752, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.6212733381533619, |
| "grad_norm": 0.03966047093864206, |
| "learning_rate": 7.892412396683764e-06, |
| "loss": 1.12, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.6219538127844171, |
| "grad_norm": 0.04878533637282711, |
| "learning_rate": 7.888004621597079e-06, |
| "loss": 1.2101, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.6226342874154723, |
| "grad_norm": 0.039415215318022966, |
| "learning_rate": 7.88359347589824e-06, |
| "loss": 1.1367, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.6233147620465275, |
| "grad_norm": 0.03780897863930396, |
| "learning_rate": 7.879178964735528e-06, |
| "loss": 1.1652, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.6239952366775826, |
| "grad_norm": 0.04060525430372581, |
| "learning_rate": 7.87476109326115e-06, |
| "loss": 1.147, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.6246757113086377, |
| "grad_norm": 0.036870223512593225, |
| "learning_rate": 7.87033986663124e-06, |
| "loss": 1.1562, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.6253561859396929, |
| "grad_norm": 0.03936602479028799, |
| "learning_rate": 7.865915290005844e-06, |
| "loss": 1.1624, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.6260366605707481, |
| "grad_norm": 0.04111384698224138, |
| "learning_rate": 7.86148736854892e-06, |
| "loss": 1.1901, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6267171352018033, |
| "grad_norm": 0.04290620077150831, |
| "learning_rate": 7.857056107428327e-06, |
| "loss": 1.1619, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.6273976098328584, |
| "grad_norm": 0.04056550194174836, |
| "learning_rate": 7.852621511815825e-06, |
| "loss": 1.1564, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.6280780844639136, |
| "grad_norm": 0.03639283604390965, |
| "learning_rate": 7.848183586887065e-06, |
| "loss": 1.1189, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.6287585590949687, |
| "grad_norm": 0.04463427786180277, |
| "learning_rate": 7.84374233782158e-06, |
| "loss": 1.1558, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.6294390337260239, |
| "grad_norm": 0.040588305003388336, |
| "learning_rate": 7.839297769802789e-06, |
| "loss": 1.1413, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.6301195083570791, |
| "grad_norm": 0.04145526084985425, |
| "learning_rate": 7.834849888017979e-06, |
| "loss": 1.1731, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.6307999829881342, |
| "grad_norm": 0.04064670523198378, |
| "learning_rate": 7.830398697658308e-06, |
| "loss": 1.1576, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.6314804576191894, |
| "grad_norm": 0.08504837862750378, |
| "learning_rate": 7.825944203918792e-06, |
| "loss": 1.2067, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.6321609322502445, |
| "grad_norm": 0.03618607526419996, |
| "learning_rate": 7.821486411998307e-06, |
| "loss": 1.0653, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.6328414068812998, |
| "grad_norm": 0.041163906158028174, |
| "learning_rate": 7.817025327099574e-06, |
| "loss": 1.1231, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6335218815123549, |
| "grad_norm": 0.04096146701418479, |
| "learning_rate": 7.812560954429159e-06, |
| "loss": 1.1642, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.63420235614341, |
| "grad_norm": 0.04045705537799017, |
| "learning_rate": 7.808093299197466e-06, |
| "loss": 1.1682, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.6348828307744652, |
| "grad_norm": 0.04170510551800386, |
| "learning_rate": 7.80362236661873e-06, |
| "loss": 1.1601, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.6355633054055203, |
| "grad_norm": 0.04017895703551513, |
| "learning_rate": 7.799148161911013e-06, |
| "loss": 1.1291, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.6362437800365756, |
| "grad_norm": 0.054022188115136816, |
| "learning_rate": 7.794670690296187e-06, |
| "loss": 1.1904, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.6369242546676307, |
| "grad_norm": 0.05538165228123039, |
| "learning_rate": 7.790189956999945e-06, |
| "loss": 1.0752, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.6376047292986858, |
| "grad_norm": 0.04367149070029591, |
| "learning_rate": 7.785705967251789e-06, |
| "loss": 1.1167, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.638285203929741, |
| "grad_norm": 0.06605426061696518, |
| "learning_rate": 7.781218726285014e-06, |
| "loss": 1.1585, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.6389656785607961, |
| "grad_norm": 0.03912000963283677, |
| "learning_rate": 7.776728239336714e-06, |
| "loss": 1.1371, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.6396461531918514, |
| "grad_norm": 0.04273969229969486, |
| "learning_rate": 7.772234511647771e-06, |
| "loss": 1.1149, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6403266278229065, |
| "grad_norm": 0.0522448351136916, |
| "learning_rate": 7.767737548462849e-06, |
| "loss": 1.1133, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.6410071024539616, |
| "grad_norm": 0.03809056120688157, |
| "learning_rate": 7.763237355030384e-06, |
| "loss": 1.0985, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.6416875770850168, |
| "grad_norm": 0.04208113112324736, |
| "learning_rate": 7.758733936602591e-06, |
| "loss": 1.1542, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.6423680517160719, |
| "grad_norm": 0.038232104737570445, |
| "learning_rate": 7.754227298435442e-06, |
| "loss": 1.1449, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.6430485263471272, |
| "grad_norm": 0.07327134336204114, |
| "learning_rate": 7.749717445788667e-06, |
| "loss": 1.1229, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.6437290009781823, |
| "grad_norm": 0.04302215159609038, |
| "learning_rate": 7.745204383925753e-06, |
| "loss": 1.2152, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.6444094756092374, |
| "grad_norm": 0.04050395590672295, |
| "learning_rate": 7.740688118113926e-06, |
| "loss": 1.1187, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.6450899502402926, |
| "grad_norm": 0.041047644523210476, |
| "learning_rate": 7.736168653624154e-06, |
| "loss": 1.1622, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.6457704248713477, |
| "grad_norm": 0.042773261634885006, |
| "learning_rate": 7.73164599573114e-06, |
| "loss": 1.093, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.646450899502403, |
| "grad_norm": 0.3472748679357863, |
| "learning_rate": 7.727120149713313e-06, |
| "loss": 1.1358, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6471313741334581, |
| "grad_norm": 0.05249313678647316, |
| "learning_rate": 7.722591120852821e-06, |
| "loss": 1.1488, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.6478118487645133, |
| "grad_norm": 0.04857838907769981, |
| "learning_rate": 7.718058914435526e-06, |
| "loss": 1.1395, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.6484923233955684, |
| "grad_norm": 0.06056048993060849, |
| "learning_rate": 7.713523535751003e-06, |
| "loss": 1.1554, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.6491727980266235, |
| "grad_norm": 0.043417866931708946, |
| "learning_rate": 7.708984990092528e-06, |
| "loss": 1.1399, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.6498532726576788, |
| "grad_norm": 0.040899332718403396, |
| "learning_rate": 7.70444328275707e-06, |
| "loss": 1.2209, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.6505337472887339, |
| "grad_norm": 0.042139179785609125, |
| "learning_rate": 7.69989841904529e-06, |
| "loss": 1.1771, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.6512142219197891, |
| "grad_norm": 0.04497423442726415, |
| "learning_rate": 7.695350404261535e-06, |
| "loss": 1.1852, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.6518946965508442, |
| "grad_norm": 0.04299815900672657, |
| "learning_rate": 7.690799243713825e-06, |
| "loss": 1.1422, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.6525751711818993, |
| "grad_norm": 0.057667778993934805, |
| "learning_rate": 7.686244942713856e-06, |
| "loss": 1.1643, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.6532556458129546, |
| "grad_norm": 0.05055586475309829, |
| "learning_rate": 7.681687506576988e-06, |
| "loss": 1.1675, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6539361204440097, |
| "grad_norm": 0.04837690007921328, |
| "learning_rate": 7.677126940622237e-06, |
| "loss": 1.1458, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.6546165950750649, |
| "grad_norm": 0.04746849990432907, |
| "learning_rate": 7.672563250172278e-06, |
| "loss": 1.1614, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.65529706970612, |
| "grad_norm": 0.042700698425812394, |
| "learning_rate": 7.667996440553424e-06, |
| "loss": 1.1787, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.6559775443371751, |
| "grad_norm": 0.04663433423637838, |
| "learning_rate": 7.663426517095637e-06, |
| "loss": 1.1899, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.6566580189682304, |
| "grad_norm": 0.05089210963658719, |
| "learning_rate": 7.658853485132507e-06, |
| "loss": 1.1114, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.6573384935992855, |
| "grad_norm": 0.04835691542508216, |
| "learning_rate": 7.654277350001255e-06, |
| "loss": 1.1472, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.6580189682303407, |
| "grad_norm": 0.04286980226281031, |
| "learning_rate": 7.649698117042725e-06, |
| "loss": 1.1704, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.6586994428613958, |
| "grad_norm": 0.0413107954780928, |
| "learning_rate": 7.645115791601371e-06, |
| "loss": 1.1628, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.659379917492451, |
| "grad_norm": 0.06469008546831921, |
| "learning_rate": 7.640530379025265e-06, |
| "loss": 1.1286, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.6600603921235062, |
| "grad_norm": 0.0929348477800054, |
| "learning_rate": 7.635941884666072e-06, |
| "loss": 1.0714, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.6607408667545613, |
| "grad_norm": 0.04918686842612326, |
| "learning_rate": 7.631350313879061e-06, |
| "loss": 1.1324, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.6614213413856165, |
| "grad_norm": 0.04516364692323498, |
| "learning_rate": 7.626755672023087e-06, |
| "loss": 1.1094, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.6621018160166716, |
| "grad_norm": 0.040036511801984, |
| "learning_rate": 7.6221579644605945e-06, |
| "loss": 1.1856, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.6627822906477268, |
| "grad_norm": 0.04221452365231738, |
| "learning_rate": 7.617557196557601e-06, |
| "loss": 1.1605, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.6634627652787819, |
| "grad_norm": 0.04969957841604738, |
| "learning_rate": 7.6129533736837e-06, |
| "loss": 1.1842, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.6641432399098371, |
| "grad_norm": 0.03868376326252715, |
| "learning_rate": 7.608346501212045e-06, |
| "loss": 1.1228, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.6648237145408923, |
| "grad_norm": 0.03621072764785827, |
| "learning_rate": 7.603736584519354e-06, |
| "loss": 1.094, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.6655041891719474, |
| "grad_norm": 0.050499205066227806, |
| "learning_rate": 7.599123628985894e-06, |
| "loss": 1.189, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.6661846638030026, |
| "grad_norm": 0.04076810819723509, |
| "learning_rate": 7.594507639995483e-06, |
| "loss": 1.1811, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.6668651384340577, |
| "grad_norm": 0.04391491864766983, |
| "learning_rate": 7.5898886229354754e-06, |
| "loss": 1.2108, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.667545613065113, |
| "grad_norm": 0.04859358265845105, |
| "learning_rate": 7.585266583196761e-06, |
| "loss": 1.1159, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.6682260876961681, |
| "grad_norm": 0.0405482507873175, |
| "learning_rate": 7.580641526173758e-06, |
| "loss": 1.1659, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.6689065623272232, |
| "grad_norm": 0.04257613233842775, |
| "learning_rate": 7.576013457264406e-06, |
| "loss": 1.1459, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.6695870369582784, |
| "grad_norm": 0.0393176411971146, |
| "learning_rate": 7.571382381870157e-06, |
| "loss": 1.1448, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.6702675115893335, |
| "grad_norm": 0.08112350996396892, |
| "learning_rate": 7.566748305395979e-06, |
| "loss": 1.1257, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.6709479862203888, |
| "grad_norm": 0.041738073653021145, |
| "learning_rate": 7.5621112332503325e-06, |
| "loss": 1.1379, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.6716284608514439, |
| "grad_norm": 0.06458657107739593, |
| "learning_rate": 7.557471170845183e-06, |
| "loss": 1.2003, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.672308935482499, |
| "grad_norm": 0.038958895409908184, |
| "learning_rate": 7.552828123595981e-06, |
| "loss": 1.1231, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.6729894101135542, |
| "grad_norm": 0.042481360817148296, |
| "learning_rate": 7.548182096921665e-06, |
| "loss": 1.0685, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.6736698847446093, |
| "grad_norm": 0.041831775106497764, |
| "learning_rate": 7.543533096244644e-06, |
| "loss": 1.1738, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6743503593756646, |
| "grad_norm": 0.15815365507476095, |
| "learning_rate": 7.538881126990807e-06, |
| "loss": 1.1475, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.6750308340067197, |
| "grad_norm": 0.04437893080698295, |
| "learning_rate": 7.534226194589498e-06, |
| "loss": 1.0809, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.6757113086377748, |
| "grad_norm": 0.04365643846646936, |
| "learning_rate": 7.529568304473525e-06, |
| "loss": 1.1849, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.67639178326883, |
| "grad_norm": 0.04471078501455037, |
| "learning_rate": 7.524907462079149e-06, |
| "loss": 1.1777, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.6770722578998851, |
| "grad_norm": 0.041659935941889464, |
| "learning_rate": 7.5202436728460746e-06, |
| "loss": 1.2081, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.6777527325309404, |
| "grad_norm": 0.04288201099681714, |
| "learning_rate": 7.5155769422174445e-06, |
| "loss": 1.1962, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.6784332071619955, |
| "grad_norm": 0.04368684136906247, |
| "learning_rate": 7.510907275639832e-06, |
| "loss": 1.1922, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.6791136817930506, |
| "grad_norm": 0.050476486903071514, |
| "learning_rate": 7.506234678563248e-06, |
| "loss": 1.0598, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.6797941564241058, |
| "grad_norm": 0.046810799877727786, |
| "learning_rate": 7.501559156441109e-06, |
| "loss": 1.1796, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.6804746310551609, |
| "grad_norm": 0.047812912893326426, |
| "learning_rate": 7.496880714730259e-06, |
| "loss": 1.1428, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6811551056862162, |
| "grad_norm": 0.04257189142775785, |
| "learning_rate": 7.492199358890937e-06, |
| "loss": 1.1715, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.6818355803172713, |
| "grad_norm": 0.06136667792321527, |
| "learning_rate": 7.487515094386792e-06, |
| "loss": 1.1517, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.6825160549483265, |
| "grad_norm": 0.055646896518561265, |
| "learning_rate": 7.482827926684864e-06, |
| "loss": 1.168, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.6831965295793816, |
| "grad_norm": 0.04766220568836885, |
| "learning_rate": 7.478137861255583e-06, |
| "loss": 1.2104, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.6838770042104367, |
| "grad_norm": 0.04290035924469161, |
| "learning_rate": 7.473444903572757e-06, |
| "loss": 1.1736, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.684557478841492, |
| "grad_norm": 0.041199593080418005, |
| "learning_rate": 7.468749059113578e-06, |
| "loss": 1.1848, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.6852379534725471, |
| "grad_norm": 0.042266818870596565, |
| "learning_rate": 7.464050333358597e-06, |
| "loss": 1.178, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.6859184281036023, |
| "grad_norm": 0.15107754375835664, |
| "learning_rate": 7.459348731791733e-06, |
| "loss": 1.1783, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.6865989027346574, |
| "grad_norm": 0.04106006697859163, |
| "learning_rate": 7.454644259900263e-06, |
| "loss": 1.14, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.6872793773657125, |
| "grad_norm": 0.04333746328735867, |
| "learning_rate": 7.449936923174813e-06, |
| "loss": 1.1426, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6879598519967678, |
| "grad_norm": 0.07004970303311875, |
| "learning_rate": 7.445226727109347e-06, |
| "loss": 1.0842, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.6886403266278229, |
| "grad_norm": 0.039638832880001425, |
| "learning_rate": 7.440513677201175e-06, |
| "loss": 1.1659, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.6893208012588781, |
| "grad_norm": 0.03910289252801824, |
| "learning_rate": 7.43579777895093e-06, |
| "loss": 1.1611, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.6900012758899332, |
| "grad_norm": 0.07027920752871429, |
| "learning_rate": 7.431079037862575e-06, |
| "loss": 1.0465, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.6906817505209883, |
| "grad_norm": 0.04094509224077025, |
| "learning_rate": 7.426357459443388e-06, |
| "loss": 1.1603, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.6913622251520436, |
| "grad_norm": 0.10598504072232526, |
| "learning_rate": 7.42163304920396e-06, |
| "loss": 1.1153, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.6920426997830987, |
| "grad_norm": 0.041160683871352216, |
| "learning_rate": 7.416905812658183e-06, |
| "loss": 1.1559, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.6927231744141539, |
| "grad_norm": 0.05473785616653893, |
| "learning_rate": 7.412175755323254e-06, |
| "loss": 1.125, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.693403649045209, |
| "grad_norm": 0.04770187460065998, |
| "learning_rate": 7.407442882719658e-06, |
| "loss": 1.1188, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.6940841236762642, |
| "grad_norm": 0.04381659899522137, |
| "learning_rate": 7.402707200371165e-06, |
| "loss": 1.1356, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6947645983073194, |
| "grad_norm": 0.04051251755932694, |
| "learning_rate": 7.397968713804828e-06, |
| "loss": 1.1174, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.6954450729383745, |
| "grad_norm": 0.044516396502584286, |
| "learning_rate": 7.39322742855097e-06, |
| "loss": 1.1777, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.6961255475694297, |
| "grad_norm": 0.04117597756872166, |
| "learning_rate": 7.38848335014318e-06, |
| "loss": 1.1387, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.6968060222004848, |
| "grad_norm": 0.04296180208703936, |
| "learning_rate": 7.383736484118311e-06, |
| "loss": 1.1206, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.69748649683154, |
| "grad_norm": 0.050477126239160564, |
| "learning_rate": 7.378986836016462e-06, |
| "loss": 1.1443, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.6981669714625952, |
| "grad_norm": 0.06939371082738612, |
| "learning_rate": 7.374234411380987e-06, |
| "loss": 1.1321, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.6988474460936503, |
| "grad_norm": 0.05320770806066322, |
| "learning_rate": 7.369479215758476e-06, |
| "loss": 1.218, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.6995279207247055, |
| "grad_norm": 0.04193911746958924, |
| "learning_rate": 7.364721254698752e-06, |
| "loss": 1.1404, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.7002083953557606, |
| "grad_norm": 0.05140219246531558, |
| "learning_rate": 7.359960533754872e-06, |
| "loss": 1.1954, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.7008888699868158, |
| "grad_norm": 0.043307053037638495, |
| "learning_rate": 7.355197058483103e-06, |
| "loss": 1.1012, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.701569344617871, |
| "grad_norm": 0.04976551632482346, |
| "learning_rate": 7.350430834442941e-06, |
| "loss": 1.1041, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.7022498192489262, |
| "grad_norm": 0.047734146085657715, |
| "learning_rate": 7.345661867197076e-06, |
| "loss": 1.1625, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.7029302938799813, |
| "grad_norm": 0.04720201465179049, |
| "learning_rate": 7.340890162311411e-06, |
| "loss": 1.1787, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.7036107685110364, |
| "grad_norm": 0.04247509490814496, |
| "learning_rate": 7.336115725355033e-06, |
| "loss": 1.146, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.7042912431420916, |
| "grad_norm": 0.21669885262049804, |
| "learning_rate": 7.33133856190023e-06, |
| "loss": 1.1569, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.7049717177731468, |
| "grad_norm": 0.0437961700372451, |
| "learning_rate": 7.3265586775224595e-06, |
| "loss": 1.1802, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.705652192404202, |
| "grad_norm": 0.03735584537487382, |
| "learning_rate": 7.321776077800367e-06, |
| "loss": 1.125, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.7063326670352571, |
| "grad_norm": 0.054935897560418445, |
| "learning_rate": 7.316990768315757e-06, |
| "loss": 1.0937, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.7070131416663122, |
| "grad_norm": 0.053625956317227996, |
| "learning_rate": 7.3122027546536025e-06, |
| "loss": 1.1672, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.7076936162973674, |
| "grad_norm": 0.04328762046243775, |
| "learning_rate": 7.307412042402029e-06, |
| "loss": 1.1584, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7083740909284226, |
| "grad_norm": 0.042645405158309085, |
| "learning_rate": 7.302618637152316e-06, |
| "loss": 1.2284, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.7090545655594778, |
| "grad_norm": 0.043540366546701605, |
| "learning_rate": 7.297822544498879e-06, |
| "loss": 1.1746, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.7097350401905329, |
| "grad_norm": 0.09388727485260404, |
| "learning_rate": 7.293023770039279e-06, |
| "loss": 1.106, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.710415514821588, |
| "grad_norm": 0.045183692217649854, |
| "learning_rate": 7.288222319374199e-06, |
| "loss": 1.1561, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.7110959894526432, |
| "grad_norm": 0.04419303783873857, |
| "learning_rate": 7.283418198107449e-06, |
| "loss": 1.1907, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.7117764640836984, |
| "grad_norm": 0.039431406730855954, |
| "learning_rate": 7.2786114118459564e-06, |
| "loss": 1.1463, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.7124569387147536, |
| "grad_norm": 0.08697265203912855, |
| "learning_rate": 7.273801966199756e-06, |
| "loss": 1.1178, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.7131374133458087, |
| "grad_norm": 0.046605391918640805, |
| "learning_rate": 7.2689898667819915e-06, |
| "loss": 1.1882, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.7138178879768639, |
| "grad_norm": 0.04535224059552782, |
| "learning_rate": 7.2641751192088986e-06, |
| "loss": 1.1587, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.714498362607919, |
| "grad_norm": 0.04609371369265137, |
| "learning_rate": 7.259357729099805e-06, |
| "loss": 1.1624, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7151788372389741, |
| "grad_norm": 0.04837539578364566, |
| "learning_rate": 7.254537702077127e-06, |
| "loss": 1.1592, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.7158593118700294, |
| "grad_norm": 0.0415706447025837, |
| "learning_rate": 7.2497150437663495e-06, |
| "loss": 1.1592, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.7165397865010845, |
| "grad_norm": 0.0466672846650458, |
| "learning_rate": 7.244889759796039e-06, |
| "loss": 1.1646, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.7172202611321397, |
| "grad_norm": 0.041322845555446144, |
| "learning_rate": 7.240061855797818e-06, |
| "loss": 1.0669, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.7179007357631948, |
| "grad_norm": 0.047295050030634445, |
| "learning_rate": 7.23523133740637e-06, |
| "loss": 1.1195, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.7185812103942499, |
| "grad_norm": 0.041282823374409, |
| "learning_rate": 7.230398210259431e-06, |
| "loss": 1.2188, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.7192616850253052, |
| "grad_norm": 0.0673203156095105, |
| "learning_rate": 7.225562479997781e-06, |
| "loss": 1.1166, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.7199421596563603, |
| "grad_norm": 0.059975488393622454, |
| "learning_rate": 7.220724152265234e-06, |
| "loss": 1.2001, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.7206226342874155, |
| "grad_norm": 0.05733949411047725, |
| "learning_rate": 7.215883232708642e-06, |
| "loss": 1.1777, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.7213031089184706, |
| "grad_norm": 0.038815327874534795, |
| "learning_rate": 7.21103972697788e-06, |
| "loss": 1.1425, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.7219835835495257, |
| "grad_norm": 0.04180710340753343, |
| "learning_rate": 7.206193640725838e-06, |
| "loss": 1.1311, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.722664058180581, |
| "grad_norm": 0.049442802384658265, |
| "learning_rate": 7.201344979608423e-06, |
| "loss": 1.0722, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.7233445328116361, |
| "grad_norm": 0.03691998702875071, |
| "learning_rate": 7.1964937492845376e-06, |
| "loss": 1.2127, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.7240250074426913, |
| "grad_norm": 0.04551488458030917, |
| "learning_rate": 7.191639955416097e-06, |
| "loss": 1.1855, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.7247054820737464, |
| "grad_norm": 0.05620018748524324, |
| "learning_rate": 7.1867836036679984e-06, |
| "loss": 1.1669, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.7253859567048015, |
| "grad_norm": 0.03770804716932438, |
| "learning_rate": 7.181924699708127e-06, |
| "loss": 1.1324, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.7260664313358568, |
| "grad_norm": 0.043007638479637705, |
| "learning_rate": 7.1770632492073455e-06, |
| "loss": 1.188, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.7267469059669119, |
| "grad_norm": 0.04846905600603435, |
| "learning_rate": 7.172199257839492e-06, |
| "loss": 1.1112, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.7274273805979671, |
| "grad_norm": 0.06760771174774231, |
| "learning_rate": 7.167332731281363e-06, |
| "loss": 1.1739, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.7281078552290222, |
| "grad_norm": 0.04414778875172002, |
| "learning_rate": 7.162463675212726e-06, |
| "loss": 1.1207, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7287883298600774, |
| "grad_norm": 0.044146176097507095, |
| "learning_rate": 7.157592095316286e-06, |
| "loss": 1.1655, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.7294688044911326, |
| "grad_norm": 0.04415213012398565, |
| "learning_rate": 7.152717997277706e-06, |
| "loss": 1.0841, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.7301492791221877, |
| "grad_norm": 0.04683320609148643, |
| "learning_rate": 7.14784138678558e-06, |
| "loss": 1.103, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.7308297537532429, |
| "grad_norm": 0.041857344288494074, |
| "learning_rate": 7.142962269531439e-06, |
| "loss": 1.1669, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.731510228384298, |
| "grad_norm": 0.042008130220727205, |
| "learning_rate": 7.138080651209738e-06, |
| "loss": 1.189, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.7321907030153532, |
| "grad_norm": 1.0221404712041966, |
| "learning_rate": 7.133196537517848e-06, |
| "loss": 1.1469, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.7328711776464084, |
| "grad_norm": 0.042322130073403956, |
| "learning_rate": 7.128309934156059e-06, |
| "loss": 1.1318, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.7335516522774636, |
| "grad_norm": 0.042057265771129046, |
| "learning_rate": 7.12342084682756e-06, |
| "loss": 1.1443, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.7342321269085187, |
| "grad_norm": 0.047450147809600454, |
| "learning_rate": 7.118529281238444e-06, |
| "loss": 1.1628, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.7349126015395738, |
| "grad_norm": 0.038929774779211966, |
| "learning_rate": 7.113635243097694e-06, |
| "loss": 1.1716, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.735593076170629, |
| "grad_norm": 0.09452731162534289, |
| "learning_rate": 7.108738738117178e-06, |
| "loss": 1.1475, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.7362735508016842, |
| "grad_norm": 0.04475327075474272, |
| "learning_rate": 7.1038397720116445e-06, |
| "loss": 1.1252, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.7369540254327394, |
| "grad_norm": 0.049494950615798054, |
| "learning_rate": 7.098938350498716e-06, |
| "loss": 1.1659, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.7376345000637945, |
| "grad_norm": 0.04152173426603925, |
| "learning_rate": 7.094034479298877e-06, |
| "loss": 1.0856, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.7383149746948496, |
| "grad_norm": 0.047822463422239274, |
| "learning_rate": 7.0891281641354725e-06, |
| "loss": 1.1294, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.7389954493259048, |
| "grad_norm": 0.04237806087000458, |
| "learning_rate": 7.084219410734701e-06, |
| "loss": 1.1766, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.73967592395696, |
| "grad_norm": 0.039594916551205596, |
| "learning_rate": 7.079308224825606e-06, |
| "loss": 1.196, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.7403563985880152, |
| "grad_norm": 0.04336458299557206, |
| "learning_rate": 7.0743946121400695e-06, |
| "loss": 1.1839, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.7410368732190703, |
| "grad_norm": 0.04375341715007609, |
| "learning_rate": 7.069478578412807e-06, |
| "loss": 1.1983, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.7417173478501254, |
| "grad_norm": 0.05157083535351173, |
| "learning_rate": 7.064560129381359e-06, |
| "loss": 1.1507, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7423978224811806, |
| "grad_norm": 0.060666298376872534, |
| "learning_rate": 7.059639270786083e-06, |
| "loss": 1.2219, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.7430782971122358, |
| "grad_norm": 0.07282006900214394, |
| "learning_rate": 7.054716008370152e-06, |
| "loss": 1.1525, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.743758771743291, |
| "grad_norm": 0.04284510073276491, |
| "learning_rate": 7.049790347879543e-06, |
| "loss": 1.1965, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.7444392463743461, |
| "grad_norm": 0.042066556853576444, |
| "learning_rate": 7.0448622950630305e-06, |
| "loss": 1.1347, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.7451197210054012, |
| "grad_norm": 0.04330476577042554, |
| "learning_rate": 7.039931855672185e-06, |
| "loss": 1.1636, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.7458001956364564, |
| "grad_norm": 0.0656755371298587, |
| "learning_rate": 7.034999035461356e-06, |
| "loss": 1.0869, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.7464806702675116, |
| "grad_norm": 0.04884741709734115, |
| "learning_rate": 7.03006384018768e-06, |
| "loss": 1.1266, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.7471611448985668, |
| "grad_norm": 0.07686213616910825, |
| "learning_rate": 7.025126275611058e-06, |
| "loss": 1.0971, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.7478416195296219, |
| "grad_norm": 0.04391594900179587, |
| "learning_rate": 7.020186347494159e-06, |
| "loss": 1.1419, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.748522094160677, |
| "grad_norm": 0.04022498193871379, |
| "learning_rate": 7.0152440616024105e-06, |
| "loss": 1.1534, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7492025687917322, |
| "grad_norm": 0.04069958195471514, |
| "learning_rate": 7.010299423703996e-06, |
| "loss": 1.123, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.7498830434227874, |
| "grad_norm": 0.04279031084289905, |
| "learning_rate": 7.0053524395698345e-06, |
| "loss": 1.1211, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.7505635180538426, |
| "grad_norm": 0.03907354097090339, |
| "learning_rate": 7.000403114973591e-06, |
| "loss": 1.1609, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.7512439926848977, |
| "grad_norm": 0.04384068220483148, |
| "learning_rate": 6.99545145569166e-06, |
| "loss": 1.0995, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.7519244673159529, |
| "grad_norm": 0.045265801938899335, |
| "learning_rate": 6.990497467503163e-06, |
| "loss": 1.1096, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.752604941947008, |
| "grad_norm": 0.09957810788867909, |
| "learning_rate": 6.985541156189932e-06, |
| "loss": 1.132, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.7532854165780632, |
| "grad_norm": 0.0429281647908113, |
| "learning_rate": 6.98058252753652e-06, |
| "loss": 1.1622, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.7539658912091184, |
| "grad_norm": 0.052743801016877874, |
| "learning_rate": 6.975621587330179e-06, |
| "loss": 1.2174, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.7546463658401735, |
| "grad_norm": 0.03881047949712702, |
| "learning_rate": 6.970658341360859e-06, |
| "loss": 1.1805, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.7553268404712287, |
| "grad_norm": 0.04225458626164949, |
| "learning_rate": 6.965692795421206e-06, |
| "loss": 1.1722, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.7560073151022838, |
| "grad_norm": 0.10543672989935722, |
| "learning_rate": 6.9607249553065405e-06, |
| "loss": 1.2019, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.7566877897333391, |
| "grad_norm": 0.0565342929312989, |
| "learning_rate": 6.955754826814871e-06, |
| "loss": 1.0947, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.7573682643643942, |
| "grad_norm": 0.03930712197341347, |
| "learning_rate": 6.950782415746869e-06, |
| "loss": 1.1346, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.7580487389954493, |
| "grad_norm": 0.03760938964092718, |
| "learning_rate": 6.945807727905876e-06, |
| "loss": 1.1054, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.7587292136265045, |
| "grad_norm": 0.0379152054111856, |
| "learning_rate": 6.940830769097886e-06, |
| "loss": 1.0448, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.7594096882575596, |
| "grad_norm": 0.06937325137974219, |
| "learning_rate": 6.935851545131549e-06, |
| "loss": 1.1911, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.7600901628886149, |
| "grad_norm": 0.06566055465040227, |
| "learning_rate": 6.9308700618181505e-06, |
| "loss": 1.1573, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.76077063751967, |
| "grad_norm": 0.04493419709913744, |
| "learning_rate": 6.925886324971619e-06, |
| "loss": 1.1628, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.7614511121507251, |
| "grad_norm": 0.0418786470658152, |
| "learning_rate": 6.920900340408513e-06, |
| "loss": 1.2156, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.7621315867817803, |
| "grad_norm": 0.038634168191586375, |
| "learning_rate": 6.915912113948013e-06, |
| "loss": 1.1652, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7628120614128354, |
| "grad_norm": 0.06168446681123073, |
| "learning_rate": 6.910921651411915e-06, |
| "loss": 1.2015, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.7634925360438907, |
| "grad_norm": 0.04635986156126397, |
| "learning_rate": 6.905928958624627e-06, |
| "loss": 1.1536, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.7641730106749458, |
| "grad_norm": 0.041883937869239204, |
| "learning_rate": 6.900934041413157e-06, |
| "loss": 1.1758, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.764853485306001, |
| "grad_norm": 0.04598375681816885, |
| "learning_rate": 6.89593690560711e-06, |
| "loss": 1.1464, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.7655339599370561, |
| "grad_norm": 0.04369752473272923, |
| "learning_rate": 6.890937557038685e-06, |
| "loss": 1.061, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.7662144345681112, |
| "grad_norm": 0.8223791208274646, |
| "learning_rate": 6.885936001542658e-06, |
| "loss": 1.189, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.7668949091991664, |
| "grad_norm": 0.041870872372011377, |
| "learning_rate": 6.880932244956381e-06, |
| "loss": 1.1183, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.7675753838302216, |
| "grad_norm": 0.0749667174818459, |
| "learning_rate": 6.875926293119778e-06, |
| "loss": 1.0854, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.7682558584612768, |
| "grad_norm": 0.04392599229758133, |
| "learning_rate": 6.870918151875331e-06, |
| "loss": 1.1703, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.7689363330923319, |
| "grad_norm": 0.15512309240607164, |
| "learning_rate": 6.865907827068085e-06, |
| "loss": 1.1616, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.769616807723387, |
| "grad_norm": 0.04222809718225347, |
| "learning_rate": 6.8608953245456224e-06, |
| "loss": 1.1157, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.7702972823544422, |
| "grad_norm": 0.04781561093643504, |
| "learning_rate": 6.8558806501580764e-06, |
| "loss": 1.1492, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.7709777569854974, |
| "grad_norm": 0.15222417065814942, |
| "learning_rate": 6.85086380975811e-06, |
| "loss": 1.2147, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.7716582316165526, |
| "grad_norm": 0.05465946027165189, |
| "learning_rate": 6.845844809200918e-06, |
| "loss": 1.1875, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.7723387062476077, |
| "grad_norm": 0.043507474640149424, |
| "learning_rate": 6.840823654344211e-06, |
| "loss": 1.1466, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.7730191808786628, |
| "grad_norm": 0.047930995130181106, |
| "learning_rate": 6.835800351048218e-06, |
| "loss": 1.1868, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.773699655509718, |
| "grad_norm": 0.06266225882556943, |
| "learning_rate": 6.830774905175677e-06, |
| "loss": 1.156, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.7743801301407732, |
| "grad_norm": 0.04859618741694431, |
| "learning_rate": 6.82574732259182e-06, |
| "loss": 1.0918, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.7750606047718284, |
| "grad_norm": 0.04801810270287596, |
| "learning_rate": 6.82071760916438e-06, |
| "loss": 1.1477, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.7757410794028835, |
| "grad_norm": 0.0473223818411507, |
| "learning_rate": 6.815685770763573e-06, |
| "loss": 1.1164, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7764215540339386, |
| "grad_norm": 0.043906942829315364, |
| "learning_rate": 6.810651813262096e-06, |
| "loss": 1.1267, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.7771020286649938, |
| "grad_norm": 0.04601911308760648, |
| "learning_rate": 6.805615742535117e-06, |
| "loss": 1.1288, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.777782503296049, |
| "grad_norm": 0.047303938482842395, |
| "learning_rate": 6.800577564460275e-06, |
| "loss": 1.179, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.7784629779271042, |
| "grad_norm": 0.04929207282941646, |
| "learning_rate": 6.795537284917666e-06, |
| "loss": 1.1631, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.7791434525581593, |
| "grad_norm": 0.0451527655223312, |
| "learning_rate": 6.7904949097898376e-06, |
| "loss": 1.1429, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.7798239271892144, |
| "grad_norm": 0.0488569182696569, |
| "learning_rate": 6.785450444961783e-06, |
| "loss": 1.1077, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.7805044018202696, |
| "grad_norm": 0.04586542464915239, |
| "learning_rate": 6.780403896320938e-06, |
| "loss": 1.1599, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.7811848764513248, |
| "grad_norm": 0.042563003759626114, |
| "learning_rate": 6.775355269757166e-06, |
| "loss": 1.1288, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.78186535108238, |
| "grad_norm": 0.04709233196206943, |
| "learning_rate": 6.770304571162759e-06, |
| "loss": 1.1172, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.7825458257134351, |
| "grad_norm": 0.0534028573034039, |
| "learning_rate": 6.765251806432423e-06, |
| "loss": 1.1766, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7832263003444903, |
| "grad_norm": 0.037200717084029736, |
| "learning_rate": 6.7601969814632805e-06, |
| "loss": 1.1748, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.7839067749755454, |
| "grad_norm": 0.039839632104403555, |
| "learning_rate": 6.755140102154855e-06, |
| "loss": 1.0733, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.7845872496066006, |
| "grad_norm": 0.04106865746747149, |
| "learning_rate": 6.7500811744090725e-06, |
| "loss": 1.1776, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.7852677242376558, |
| "grad_norm": 0.039882001703510736, |
| "learning_rate": 6.7450202041302404e-06, |
| "loss": 1.1651, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.7859481988687109, |
| "grad_norm": 0.0398379046814066, |
| "learning_rate": 6.739957197225059e-06, |
| "loss": 1.1796, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.7866286734997661, |
| "grad_norm": 0.04132540884480281, |
| "learning_rate": 6.734892159602601e-06, |
| "loss": 1.1913, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.7873091481308212, |
| "grad_norm": 0.1403333681186676, |
| "learning_rate": 6.7298250971743094e-06, |
| "loss": 1.1167, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.7879896227618765, |
| "grad_norm": 0.04052355930252153, |
| "learning_rate": 6.724756015853994e-06, |
| "loss": 1.1029, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.7886700973929316, |
| "grad_norm": 0.06712051352806869, |
| "learning_rate": 6.719684921557816e-06, |
| "loss": 1.0857, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.7893505720239867, |
| "grad_norm": 0.040511857114010424, |
| "learning_rate": 6.71461182020429e-06, |
| "loss": 1.1407, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7900310466550419, |
| "grad_norm": 0.03893191026718295, |
| "learning_rate": 6.709536717714269e-06, |
| "loss": 1.0647, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.790711521286097, |
| "grad_norm": 0.038741558519808664, |
| "learning_rate": 6.704459620010945e-06, |
| "loss": 1.1284, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.7913919959171523, |
| "grad_norm": 0.037503955643134496, |
| "learning_rate": 6.699380533019836e-06, |
| "loss": 1.1487, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.7920724705482074, |
| "grad_norm": 0.03868788023709744, |
| "learning_rate": 6.694299462668785e-06, |
| "loss": 1.194, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.7927529451792625, |
| "grad_norm": 0.05175370915744133, |
| "learning_rate": 6.689216414887947e-06, |
| "loss": 1.1888, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.7934334198103177, |
| "grad_norm": 0.04187198494977118, |
| "learning_rate": 6.684131395609784e-06, |
| "loss": 1.1652, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.7941138944413728, |
| "grad_norm": 0.06991835814940914, |
| "learning_rate": 6.679044410769064e-06, |
| "loss": 1.078, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.7947943690724281, |
| "grad_norm": 0.03928030662322917, |
| "learning_rate": 6.673955466302844e-06, |
| "loss": 1.1347, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.7954748437034832, |
| "grad_norm": 0.06556567246463971, |
| "learning_rate": 6.668864568150469e-06, |
| "loss": 1.1564, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.7961553183345383, |
| "grad_norm": 0.051672923587688206, |
| "learning_rate": 6.663771722253567e-06, |
| "loss": 1.1552, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7968357929655935, |
| "grad_norm": 0.045778478618114, |
| "learning_rate": 6.658676934556035e-06, |
| "loss": 1.1613, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.7975162675966486, |
| "grad_norm": 0.037569343554903545, |
| "learning_rate": 6.653580211004039e-06, |
| "loss": 1.1837, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.7981967422277039, |
| "grad_norm": 0.042785536903053735, |
| "learning_rate": 6.648481557546002e-06, |
| "loss": 1.126, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.798877216858759, |
| "grad_norm": 0.03986940166665901, |
| "learning_rate": 6.643380980132608e-06, |
| "loss": 1.18, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.7995576914898141, |
| "grad_norm": 0.042633298364431076, |
| "learning_rate": 6.638278484716771e-06, |
| "loss": 1.1298, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.8002381661208693, |
| "grad_norm": 0.04041013356548493, |
| "learning_rate": 6.63317407725366e-06, |
| "loss": 1.1684, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.8009186407519244, |
| "grad_norm": 0.03925791424798017, |
| "learning_rate": 6.628067763700662e-06, |
| "loss": 1.1714, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.8015991153829797, |
| "grad_norm": 0.060428415986807496, |
| "learning_rate": 6.622959550017397e-06, |
| "loss": 1.1556, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.8022795900140348, |
| "grad_norm": 0.03968717411698841, |
| "learning_rate": 6.617849442165698e-06, |
| "loss": 1.1696, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.80296006464509, |
| "grad_norm": 0.04918573413004946, |
| "learning_rate": 6.612737446109614e-06, |
| "loss": 1.1542, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8036405392761451, |
| "grad_norm": 0.03632300431935989, |
| "learning_rate": 6.607623567815391e-06, |
| "loss": 1.1728, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.8043210139072002, |
| "grad_norm": 0.03868462872196307, |
| "learning_rate": 6.602507813251478e-06, |
| "loss": 1.12, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.8050014885382555, |
| "grad_norm": 0.04326647088999474, |
| "learning_rate": 6.59739018838851e-06, |
| "loss": 1.1703, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.8056819631693106, |
| "grad_norm": 0.042993618675953436, |
| "learning_rate": 6.592270699199306e-06, |
| "loss": 1.1058, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.8063624378003658, |
| "grad_norm": 0.037234437193195045, |
| "learning_rate": 6.5871493516588595e-06, |
| "loss": 1.1532, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.8070429124314209, |
| "grad_norm": 0.05344055731907998, |
| "learning_rate": 6.5820261517443365e-06, |
| "loss": 1.1535, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.807723387062476, |
| "grad_norm": 0.04214235502413425, |
| "learning_rate": 6.5769011054350604e-06, |
| "loss": 1.1818, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.8084038616935313, |
| "grad_norm": 0.04404793500124364, |
| "learning_rate": 6.5717742187125146e-06, |
| "loss": 1.1332, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.8090843363245864, |
| "grad_norm": 0.03832530671679624, |
| "learning_rate": 6.5666454975603234e-06, |
| "loss": 1.0854, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.8097648109556416, |
| "grad_norm": 0.03968072705931161, |
| "learning_rate": 6.561514947964258e-06, |
| "loss": 1.0979, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8104452855866967, |
| "grad_norm": 0.044024012396331864, |
| "learning_rate": 6.556382575912225e-06, |
| "loss": 1.15, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.8111257602177518, |
| "grad_norm": 0.046743667143049464, |
| "learning_rate": 6.551248387394251e-06, |
| "loss": 1.1359, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.8118062348488071, |
| "grad_norm": 0.07108309726781734, |
| "learning_rate": 6.546112388402487e-06, |
| "loss": 1.1255, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.8124867094798622, |
| "grad_norm": 0.04243584220518146, |
| "learning_rate": 6.540974584931199e-06, |
| "loss": 1.1633, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.8131671841109174, |
| "grad_norm": 0.04017199870986986, |
| "learning_rate": 6.535834982976752e-06, |
| "loss": 1.1856, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.8138476587419725, |
| "grad_norm": 0.03833103072994475, |
| "learning_rate": 6.530693588537619e-06, |
| "loss": 1.1441, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.8145281333730277, |
| "grad_norm": 0.04297394216426952, |
| "learning_rate": 6.525550407614358e-06, |
| "loss": 1.1445, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.8152086080040829, |
| "grad_norm": 0.07188862913546087, |
| "learning_rate": 6.520405446209615e-06, |
| "loss": 1.1799, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.815889082635138, |
| "grad_norm": 0.03987215118118984, |
| "learning_rate": 6.5152587103281165e-06, |
| "loss": 1.1144, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.8165695572661932, |
| "grad_norm": 0.03817132524293037, |
| "learning_rate": 6.510110205976652e-06, |
| "loss": 1.0878, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8172500318972483, |
| "grad_norm": 0.04141078067635132, |
| "learning_rate": 6.504959939164081e-06, |
| "loss": 1.1538, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.8179305065283035, |
| "grad_norm": 0.03973666336401996, |
| "learning_rate": 6.4998079159013236e-06, |
| "loss": 1.1297, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.8186109811593586, |
| "grad_norm": 0.06287711186363208, |
| "learning_rate": 6.494654142201342e-06, |
| "loss": 1.0933, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.8192914557904138, |
| "grad_norm": 0.03691243912728769, |
| "learning_rate": 6.489498624079144e-06, |
| "loss": 1.0645, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.819971930421469, |
| "grad_norm": 0.03868123428432716, |
| "learning_rate": 6.4843413675517765e-06, |
| "loss": 1.1448, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.8206524050525241, |
| "grad_norm": 0.04592978741828481, |
| "learning_rate": 6.479182378638308e-06, |
| "loss": 1.0595, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.8213328796835793, |
| "grad_norm": 0.045057504118504885, |
| "learning_rate": 6.47402166335984e-06, |
| "loss": 1.1042, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.8220133543146344, |
| "grad_norm": 0.0463959955709015, |
| "learning_rate": 6.468859227739479e-06, |
| "loss": 1.2134, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.8226938289456897, |
| "grad_norm": 0.04393391375867404, |
| "learning_rate": 6.463695077802343e-06, |
| "loss": 1.159, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.8233743035767448, |
| "grad_norm": 0.03825599467131142, |
| "learning_rate": 6.458529219575551e-06, |
| "loss": 1.1733, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.8240547782077999, |
| "grad_norm": 0.037767227267221204, |
| "learning_rate": 6.453361659088217e-06, |
| "loss": 1.1174, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.8247352528388551, |
| "grad_norm": 0.0370763078234574, |
| "learning_rate": 6.448192402371436e-06, |
| "loss": 1.1563, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.8254157274699102, |
| "grad_norm": 0.036711175610672164, |
| "learning_rate": 6.443021455458292e-06, |
| "loss": 1.106, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.8260962021009655, |
| "grad_norm": 0.03738930371806809, |
| "learning_rate": 6.437848824383832e-06, |
| "loss": 1.1606, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.8267766767320206, |
| "grad_norm": 0.05164041123554863, |
| "learning_rate": 6.432674515185077e-06, |
| "loss": 1.1542, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.8274571513630757, |
| "grad_norm": 0.03676194118397665, |
| "learning_rate": 6.427498533900999e-06, |
| "loss": 1.1235, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.8281376259941309, |
| "grad_norm": 0.03989164985813374, |
| "learning_rate": 6.422320886572527e-06, |
| "loss": 1.1466, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.828818100625186, |
| "grad_norm": 0.04356300216442829, |
| "learning_rate": 6.417141579242532e-06, |
| "loss": 1.0876, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.8294985752562413, |
| "grad_norm": 0.03948663487235072, |
| "learning_rate": 6.4119606179558245e-06, |
| "loss": 1.0584, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.8301790498872964, |
| "grad_norm": 0.039683834391457604, |
| "learning_rate": 6.4067780087591415e-06, |
| "loss": 1.1104, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.8308595245183515, |
| "grad_norm": 0.039649808293137266, |
| "learning_rate": 6.4015937577011476e-06, |
| "loss": 1.1355, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.8315399991494067, |
| "grad_norm": 0.040785245487367505, |
| "learning_rate": 6.396407870832419e-06, |
| "loss": 1.2331, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.8322204737804618, |
| "grad_norm": 0.03981988996626304, |
| "learning_rate": 6.391220354205448e-06, |
| "loss": 1.1216, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.8329009484115171, |
| "grad_norm": 0.044016546550143455, |
| "learning_rate": 6.386031213874622e-06, |
| "loss": 1.1495, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.8335814230425722, |
| "grad_norm": 0.03896895263836741, |
| "learning_rate": 6.380840455896224e-06, |
| "loss": 1.1613, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.8342618976736274, |
| "grad_norm": 0.06832828163246732, |
| "learning_rate": 6.375648086328431e-06, |
| "loss": 1.1212, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.8349423723046825, |
| "grad_norm": 0.25245491435815004, |
| "learning_rate": 6.3704541112312945e-06, |
| "loss": 1.174, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.8356228469357376, |
| "grad_norm": 0.043274664497043036, |
| "learning_rate": 6.365258536666743e-06, |
| "loss": 1.1925, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.8363033215667929, |
| "grad_norm": 0.04474702661275679, |
| "learning_rate": 6.3600613686985726e-06, |
| "loss": 1.1827, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.836983796197848, |
| "grad_norm": 0.04088242470620767, |
| "learning_rate": 6.354862613392436e-06, |
| "loss": 1.1585, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.8376642708289032, |
| "grad_norm": 0.03934094541159658, |
| "learning_rate": 6.349662276815839e-06, |
| "loss": 1.0986, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.8383447454599583, |
| "grad_norm": 0.03836873137443747, |
| "learning_rate": 6.344460365038138e-06, |
| "loss": 1.0824, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.8390252200910134, |
| "grad_norm": 0.04423182388938506, |
| "learning_rate": 6.339256884130518e-06, |
| "loss": 1.1102, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.8397056947220687, |
| "grad_norm": 0.03929692065282594, |
| "learning_rate": 6.334051840166006e-06, |
| "loss": 1.1949, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.8403861693531238, |
| "grad_norm": 0.03897705730851779, |
| "learning_rate": 6.328845239219446e-06, |
| "loss": 1.1435, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.841066643984179, |
| "grad_norm": 0.04127103596120887, |
| "learning_rate": 6.3236370873675025e-06, |
| "loss": 1.0863, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.8417471186152341, |
| "grad_norm": 0.04896894494341286, |
| "learning_rate": 6.318427390688649e-06, |
| "loss": 1.1311, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.8424275932462892, |
| "grad_norm": 0.03891171272385797, |
| "learning_rate": 6.313216155263161e-06, |
| "loss": 1.1414, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.8431080678773445, |
| "grad_norm": 0.039783788194363214, |
| "learning_rate": 6.308003387173112e-06, |
| "loss": 1.1852, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.8437885425083996, |
| "grad_norm": 0.038793732155587374, |
| "learning_rate": 6.302789092502364e-06, |
| "loss": 1.1957, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.8444690171394548, |
| "grad_norm": 0.03833731039389526, |
| "learning_rate": 6.2975732773365575e-06, |
| "loss": 1.1059, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.8451494917705099, |
| "grad_norm": 0.038076988501699965, |
| "learning_rate": 6.292355947763114e-06, |
| "loss": 1.1573, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.845829966401565, |
| "grad_norm": 0.04952437152204103, |
| "learning_rate": 6.287137109871214e-06, |
| "loss": 1.1712, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.8465104410326203, |
| "grad_norm": 0.045910265315598234, |
| "learning_rate": 6.281916769751808e-06, |
| "loss": 1.1327, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.8471909156636754, |
| "grad_norm": 0.0393005068086636, |
| "learning_rate": 6.276694933497588e-06, |
| "loss": 1.0923, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.8478713902947306, |
| "grad_norm": 0.037291589698702415, |
| "learning_rate": 6.271471607203006e-06, |
| "loss": 1.1396, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.8485518649257857, |
| "grad_norm": 0.035658033542247156, |
| "learning_rate": 6.26624679696424e-06, |
| "loss": 1.2, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.8492323395568409, |
| "grad_norm": 0.037711808055210426, |
| "learning_rate": 6.26102050887921e-06, |
| "loss": 1.1482, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.8499128141878961, |
| "grad_norm": 0.04141554608842721, |
| "learning_rate": 6.2557927490475534e-06, |
| "loss": 1.0702, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.8505932888189512, |
| "grad_norm": 0.037101277030303734, |
| "learning_rate": 6.25056352357063e-06, |
| "loss": 1.1072, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8512737634500064, |
| "grad_norm": 0.04107851103743336, |
| "learning_rate": 6.245332838551509e-06, |
| "loss": 1.1945, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.8519542380810615, |
| "grad_norm": 0.04491065841553481, |
| "learning_rate": 6.240100700094961e-06, |
| "loss": 1.1496, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.8526347127121167, |
| "grad_norm": 0.2556338887019016, |
| "learning_rate": 6.234867114307457e-06, |
| "loss": 1.1469, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.8533151873431719, |
| "grad_norm": 0.037445212352137484, |
| "learning_rate": 6.2296320872971515e-06, |
| "loss": 1.13, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.853995661974227, |
| "grad_norm": 0.04572930585644979, |
| "learning_rate": 6.224395625173887e-06, |
| "loss": 1.0707, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.8546761366052822, |
| "grad_norm": 0.03849805262900298, |
| "learning_rate": 6.219157734049179e-06, |
| "loss": 1.1179, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.8553566112363373, |
| "grad_norm": 0.03695730067560196, |
| "learning_rate": 6.213918420036207e-06, |
| "loss": 1.1548, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.8560370858673925, |
| "grad_norm": 0.046154897762123656, |
| "learning_rate": 6.208677689249816e-06, |
| "loss": 1.1393, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.8567175604984477, |
| "grad_norm": 0.049440430017302896, |
| "learning_rate": 6.203435547806503e-06, |
| "loss": 1.1343, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.8573980351295029, |
| "grad_norm": 0.0539384604758099, |
| "learning_rate": 6.19819200182441e-06, |
| "loss": 1.1647, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.858078509760558, |
| "grad_norm": 0.04721963897092052, |
| "learning_rate": 6.192947057423321e-06, |
| "loss": 1.1083, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.8587589843916131, |
| "grad_norm": 0.03671674182385264, |
| "learning_rate": 6.187700720724648e-06, |
| "loss": 1.1443, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.8594394590226683, |
| "grad_norm": 0.045014036344351804, |
| "learning_rate": 6.1824529978514335e-06, |
| "loss": 1.1997, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.8601199336537235, |
| "grad_norm": 0.03930216568782988, |
| "learning_rate": 6.177203894928333e-06, |
| "loss": 1.1547, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.8608004082847787, |
| "grad_norm": 0.03624644947156358, |
| "learning_rate": 6.171953418081616e-06, |
| "loss": 1.0756, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.8614808829158338, |
| "grad_norm": 0.04076660236087528, |
| "learning_rate": 6.16670157343915e-06, |
| "loss": 1.1932, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.8621613575468889, |
| "grad_norm": 0.03657911312435845, |
| "learning_rate": 6.161448367130407e-06, |
| "loss": 1.0542, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.8628418321779441, |
| "grad_norm": 0.04600666735918846, |
| "learning_rate": 6.156193805286442e-06, |
| "loss": 1.0834, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.8635223068089993, |
| "grad_norm": 0.03891338180640547, |
| "learning_rate": 6.1509378940398955e-06, |
| "loss": 1.1896, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.8642027814400545, |
| "grad_norm": 0.03821249523986339, |
| "learning_rate": 6.14568063952498e-06, |
| "loss": 1.1259, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.8648832560711096, |
| "grad_norm": 0.04080432984613173, |
| "learning_rate": 6.140422047877479e-06, |
| "loss": 1.1357, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.8655637307021647, |
| "grad_norm": 0.03944846842629839, |
| "learning_rate": 6.1351621252347305e-06, |
| "loss": 1.1267, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.8662442053332199, |
| "grad_norm": 0.05088129824160262, |
| "learning_rate": 6.129900877735636e-06, |
| "loss": 1.0795, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.8669246799642751, |
| "grad_norm": 0.06282343181233374, |
| "learning_rate": 6.124638311520634e-06, |
| "loss": 1.1804, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.8676051545953303, |
| "grad_norm": 0.04264232701626964, |
| "learning_rate": 6.1193744327317075e-06, |
| "loss": 1.0991, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.8682856292263854, |
| "grad_norm": 0.03433889810718825, |
| "learning_rate": 6.1141092475123675e-06, |
| "loss": 1.0482, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.8689661038574406, |
| "grad_norm": 0.04012493907691257, |
| "learning_rate": 6.108842762007653e-06, |
| "loss": 1.1447, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.8696465784884957, |
| "grad_norm": 0.040379105180865495, |
| "learning_rate": 6.103574982364118e-06, |
| "loss": 1.1536, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.8703270531195509, |
| "grad_norm": 0.04068287917649762, |
| "learning_rate": 6.098305914729829e-06, |
| "loss": 1.0713, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.8710075277506061, |
| "grad_norm": 0.04412785485245827, |
| "learning_rate": 6.093035565254356e-06, |
| "loss": 1.1282, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8716880023816612, |
| "grad_norm": 0.042002433554191435, |
| "learning_rate": 6.087763940088761e-06, |
| "loss": 1.1684, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.8723684770127164, |
| "grad_norm": 0.050676037683770894, |
| "learning_rate": 6.082491045385601e-06, |
| "loss": 1.1357, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.8730489516437715, |
| "grad_norm": 0.05907674164713621, |
| "learning_rate": 6.0772168872989094e-06, |
| "loss": 1.0541, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.8737294262748266, |
| "grad_norm": 0.0502306633678257, |
| "learning_rate": 6.0719414719841985e-06, |
| "loss": 1.2116, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.8744099009058819, |
| "grad_norm": 0.04096965723691513, |
| "learning_rate": 6.066664805598442e-06, |
| "loss": 1.1772, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.875090375536937, |
| "grad_norm": 0.044691170839062655, |
| "learning_rate": 6.061386894300082e-06, |
| "loss": 1.0655, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.8757708501679922, |
| "grad_norm": 0.07041783986510274, |
| "learning_rate": 6.0561077442490045e-06, |
| "loss": 1.1424, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.8764513247990473, |
| "grad_norm": 0.03981795746048054, |
| "learning_rate": 6.050827361606549e-06, |
| "loss": 1.0984, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.8771317994301024, |
| "grad_norm": 0.03913148996349027, |
| "learning_rate": 6.0455457525354865e-06, |
| "loss": 1.0865, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.8778122740611577, |
| "grad_norm": 0.036648731691854126, |
| "learning_rate": 6.0402629232000275e-06, |
| "loss": 1.16, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.8784927486922128, |
| "grad_norm": 0.03810812228799579, |
| "learning_rate": 6.034978879765798e-06, |
| "loss": 1.1141, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.879173223323268, |
| "grad_norm": 0.03912326860714995, |
| "learning_rate": 6.029693628399851e-06, |
| "loss": 1.0799, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.8798536979543231, |
| "grad_norm": 0.24643907520489966, |
| "learning_rate": 6.024407175270637e-06, |
| "loss": 1.1197, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.8805341725853782, |
| "grad_norm": 0.03564071624903765, |
| "learning_rate": 6.01911952654802e-06, |
| "loss": 1.1021, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.8812146472164335, |
| "grad_norm": 0.03760501790930282, |
| "learning_rate": 6.013830688403252e-06, |
| "loss": 1.1147, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.8818951218474886, |
| "grad_norm": 0.03809395915758253, |
| "learning_rate": 6.008540667008981e-06, |
| "loss": 1.194, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.8825755964785438, |
| "grad_norm": 0.039825805300777097, |
| "learning_rate": 6.003249468539226e-06, |
| "loss": 1.1169, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.8832560711095989, |
| "grad_norm": 0.03810123090259154, |
| "learning_rate": 5.997957099169388e-06, |
| "loss": 1.0684, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.8839365457406541, |
| "grad_norm": 0.03686386460521089, |
| "learning_rate": 5.99266356507623e-06, |
| "loss": 1.0948, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.8846170203717093, |
| "grad_norm": 0.041215161228331305, |
| "learning_rate": 5.9873688724378764e-06, |
| "loss": 1.1038, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8852974950027644, |
| "grad_norm": 0.0404316678257641, |
| "learning_rate": 5.982073027433803e-06, |
| "loss": 1.0868, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.8859779696338196, |
| "grad_norm": 0.04485496009556138, |
| "learning_rate": 5.976776036244833e-06, |
| "loss": 1.1781, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.8866584442648747, |
| "grad_norm": 0.039768048320405154, |
| "learning_rate": 5.971477905053121e-06, |
| "loss": 1.1799, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.8873389188959299, |
| "grad_norm": 0.038106129270751916, |
| "learning_rate": 5.96617864004216e-06, |
| "loss": 1.1092, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.8880193935269851, |
| "grad_norm": 0.04124245178507421, |
| "learning_rate": 5.960878247396761e-06, |
| "loss": 1.1269, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.8886998681580403, |
| "grad_norm": 0.0407894735351479, |
| "learning_rate": 5.955576733303053e-06, |
| "loss": 1.0696, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.8893803427890954, |
| "grad_norm": 0.040065948868845826, |
| "learning_rate": 5.9502741039484704e-06, |
| "loss": 1.1434, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.8900608174201505, |
| "grad_norm": 0.045054485388447335, |
| "learning_rate": 5.944970365521757e-06, |
| "loss": 1.167, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.8907412920512057, |
| "grad_norm": 0.06286146195704309, |
| "learning_rate": 5.939665524212943e-06, |
| "loss": 1.145, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.8914217666822609, |
| "grad_norm": 0.04950147084807547, |
| "learning_rate": 5.9343595862133515e-06, |
| "loss": 1.1118, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.8921022413133161, |
| "grad_norm": 0.04048402991149793, |
| "learning_rate": 5.92905255771558e-06, |
| "loss": 1.0208, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.8927827159443712, |
| "grad_norm": 0.049178537817983335, |
| "learning_rate": 5.923744444913504e-06, |
| "loss": 1.179, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.8934631905754263, |
| "grad_norm": 0.039002225058750825, |
| "learning_rate": 5.918435254002262e-06, |
| "loss": 1.1072, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.8941436652064815, |
| "grad_norm": 0.03673171425347061, |
| "learning_rate": 5.91312499117825e-06, |
| "loss": 1.1457, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.8948241398375367, |
| "grad_norm": 0.03643856865852586, |
| "learning_rate": 5.907813662639119e-06, |
| "loss": 1.1448, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.8955046144685919, |
| "grad_norm": 0.0371870126740186, |
| "learning_rate": 5.902501274583757e-06, |
| "loss": 1.0616, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.896185089099647, |
| "grad_norm": 0.04363839355813366, |
| "learning_rate": 5.897187833212295e-06, |
| "loss": 1.0726, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.8968655637307021, |
| "grad_norm": 0.03861512314791017, |
| "learning_rate": 5.891873344726089e-06, |
| "loss": 1.1552, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.8975460383617573, |
| "grad_norm": 0.04715528251371471, |
| "learning_rate": 5.886557815327723e-06, |
| "loss": 1.1466, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.8982265129928125, |
| "grad_norm": 0.03987438529084703, |
| "learning_rate": 5.881241251220986e-06, |
| "loss": 1.066, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.8989069876238677, |
| "grad_norm": 0.03838599343411921, |
| "learning_rate": 5.875923658610886e-06, |
| "loss": 1.1084, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.8995874622549228, |
| "grad_norm": 0.04794769005912478, |
| "learning_rate": 5.87060504370362e-06, |
| "loss": 1.1769, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.900267936885978, |
| "grad_norm": 0.04193282437336126, |
| "learning_rate": 5.865285412706589e-06, |
| "loss": 1.1094, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.9009484115170331, |
| "grad_norm": 0.04100519165196842, |
| "learning_rate": 5.859964771828373e-06, |
| "loss": 1.1342, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.9016288861480883, |
| "grad_norm": 0.0414869677010185, |
| "learning_rate": 5.8546431272787315e-06, |
| "loss": 1.1219, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.9023093607791435, |
| "grad_norm": 0.041992393473424115, |
| "learning_rate": 5.849320485268597e-06, |
| "loss": 1.0561, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.9029898354101986, |
| "grad_norm": 0.038869212364382434, |
| "learning_rate": 5.843996852010067e-06, |
| "loss": 1.1571, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.9036703100412538, |
| "grad_norm": 0.036608143403602626, |
| "learning_rate": 5.83867223371639e-06, |
| "loss": 1.1347, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.9043507846723089, |
| "grad_norm": 0.036981921488967735, |
| "learning_rate": 5.833346636601974e-06, |
| "loss": 1.1286, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.9050312593033641, |
| "grad_norm": 0.03900628795925875, |
| "learning_rate": 5.828020066882361e-06, |
| "loss": 1.1043, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.9057117339344193, |
| "grad_norm": 0.04582352143693707, |
| "learning_rate": 5.822692530774231e-06, |
| "loss": 1.0899, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.9063922085654744, |
| "grad_norm": 0.03613498624225157, |
| "learning_rate": 5.817364034495392e-06, |
| "loss": 1.0869, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.9070726831965296, |
| "grad_norm": 0.03579108523961496, |
| "learning_rate": 5.812034584264772e-06, |
| "loss": 1.0919, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.9077531578275847, |
| "grad_norm": 0.04619823633581344, |
| "learning_rate": 5.806704186302413e-06, |
| "loss": 1.0912, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.90843363245864, |
| "grad_norm": 0.037258090650687144, |
| "learning_rate": 5.801372846829466e-06, |
| "loss": 1.1635, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.9091141070896951, |
| "grad_norm": 0.0365801539988293, |
| "learning_rate": 5.796040572068175e-06, |
| "loss": 1.1301, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.9097945817207502, |
| "grad_norm": 0.05106804125404341, |
| "learning_rate": 5.790707368241878e-06, |
| "loss": 1.1021, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.9104750563518054, |
| "grad_norm": 0.03710062230657684, |
| "learning_rate": 5.7853732415749985e-06, |
| "loss": 1.1301, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.9111555309828605, |
| "grad_norm": 0.045332251441410246, |
| "learning_rate": 5.7800381982930366e-06, |
| "loss": 1.1265, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.9118360056139158, |
| "grad_norm": 0.045452205093223154, |
| "learning_rate": 5.774702244622563e-06, |
| "loss": 1.1631, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.9125164802449709, |
| "grad_norm": 0.04685102810770462, |
| "learning_rate": 5.769365386791207e-06, |
| "loss": 1.1557, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.913196954876026, |
| "grad_norm": 0.038270566458910894, |
| "learning_rate": 5.764027631027659e-06, |
| "loss": 1.0619, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.9138774295070812, |
| "grad_norm": 0.04116978111328107, |
| "learning_rate": 5.7586889835616514e-06, |
| "loss": 1.1851, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.9145579041381363, |
| "grad_norm": 0.09016617477511366, |
| "learning_rate": 5.753349450623961e-06, |
| "loss": 1.1841, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.9152383787691916, |
| "grad_norm": 0.11860215671725502, |
| "learning_rate": 5.748009038446398e-06, |
| "loss": 1.0897, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.9159188534002467, |
| "grad_norm": 0.03962594938674042, |
| "learning_rate": 5.7426677532618e-06, |
| "loss": 1.1321, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.9165993280313018, |
| "grad_norm": 0.04517562669664499, |
| "learning_rate": 5.737325601304019e-06, |
| "loss": 1.0935, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.917279802662357, |
| "grad_norm": 0.040219962082552206, |
| "learning_rate": 5.7319825888079215e-06, |
| "loss": 1.1468, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.9179602772934121, |
| "grad_norm": 0.03875848018693194, |
| "learning_rate": 5.7266387220093775e-06, |
| "loss": 1.1082, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.9186407519244674, |
| "grad_norm": 0.04142151680913673, |
| "learning_rate": 5.721294007145256e-06, |
| "loss": 1.2033, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.9193212265555225, |
| "grad_norm": 0.03777933370189356, |
| "learning_rate": 5.715948450453413e-06, |
| "loss": 1.1057, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.9200017011865776, |
| "grad_norm": 0.04734712225650399, |
| "learning_rate": 5.710602058172691e-06, |
| "loss": 1.1423, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.9206821758176328, |
| "grad_norm": 0.04535076469762656, |
| "learning_rate": 5.705254836542902e-06, |
| "loss": 1.1911, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.9213626504486879, |
| "grad_norm": 0.03684009862310686, |
| "learning_rate": 5.69990679180483e-06, |
| "loss": 1.0754, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.9220431250797432, |
| "grad_norm": 0.04289378110103612, |
| "learning_rate": 5.6945579302002176e-06, |
| "loss": 1.2281, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.9227235997107983, |
| "grad_norm": 0.037830261395348126, |
| "learning_rate": 5.689208257971766e-06, |
| "loss": 1.0832, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.9234040743418535, |
| "grad_norm": 2.090392898581139, |
| "learning_rate": 5.683857781363114e-06, |
| "loss": 1.0782, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.9240845489729086, |
| "grad_norm": 0.042266058791723134, |
| "learning_rate": 5.678506506618845e-06, |
| "loss": 1.1484, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.9247650236039637, |
| "grad_norm": 0.03718158141145417, |
| "learning_rate": 5.673154439984471e-06, |
| "loss": 1.0984, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.9254454982350189, |
| "grad_norm": 0.04138340821876582, |
| "learning_rate": 5.667801587706434e-06, |
| "loss": 1.1733, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.9261259728660741, |
| "grad_norm": 0.037699073237977145, |
| "learning_rate": 5.662447956032083e-06, |
| "loss": 1.1329, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.9268064474971293, |
| "grad_norm": 0.03988237002748851, |
| "learning_rate": 5.657093551209687e-06, |
| "loss": 1.1106, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.9274869221281844, |
| "grad_norm": 0.040582020227302805, |
| "learning_rate": 5.651738379488409e-06, |
| "loss": 1.1831, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.9281673967592395, |
| "grad_norm": 0.037750046648622575, |
| "learning_rate": 5.646382447118315e-06, |
| "loss": 1.1402, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.9288478713902947, |
| "grad_norm": 0.04005012095284457, |
| "learning_rate": 5.641025760350348e-06, |
| "loss": 1.1422, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.9295283460213499, |
| "grad_norm": 0.03670304338073242, |
| "learning_rate": 5.635668325436343e-06, |
| "loss": 1.1749, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.9302088206524051, |
| "grad_norm": 0.042699791501834956, |
| "learning_rate": 5.6303101486290025e-06, |
| "loss": 1.1627, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.9308892952834602, |
| "grad_norm": 0.038454435482173276, |
| "learning_rate": 5.624951236181893e-06, |
| "loss": 1.1084, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.9315697699145153, |
| "grad_norm": 0.042276070833888346, |
| "learning_rate": 5.619591594349443e-06, |
| "loss": 1.1576, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.9322502445455705, |
| "grad_norm": 0.04573517286117657, |
| "learning_rate": 5.614231229386933e-06, |
| "loss": 1.1648, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.9329307191766257, |
| "grad_norm": 0.042286294375058105, |
| "learning_rate": 5.608870147550483e-06, |
| "loss": 1.1192, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.9336111938076809, |
| "grad_norm": 0.04213665462727033, |
| "learning_rate": 5.603508355097054e-06, |
| "loss": 1.1248, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.934291668438736, |
| "grad_norm": 0.0874870290332434, |
| "learning_rate": 5.598145858284436e-06, |
| "loss": 1.1522, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.9349721430697912, |
| "grad_norm": 0.05352039006439997, |
| "learning_rate": 5.592782663371237e-06, |
| "loss": 1.1555, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.9356526177008463, |
| "grad_norm": 0.05227157008033531, |
| "learning_rate": 5.587418776616884e-06, |
| "loss": 1.0767, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.9363330923319015, |
| "grad_norm": 0.037992861991384505, |
| "learning_rate": 5.582054204281609e-06, |
| "loss": 1.1053, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.9370135669629567, |
| "grad_norm": 0.03978769517638952, |
| "learning_rate": 5.576688952626445e-06, |
| "loss": 1.1536, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.9376940415940118, |
| "grad_norm": 0.03770655553602028, |
| "learning_rate": 5.571323027913221e-06, |
| "loss": 1.1481, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.938374516225067, |
| "grad_norm": 0.03974451793347783, |
| "learning_rate": 5.565956436404547e-06, |
| "loss": 1.1494, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.9390549908561221, |
| "grad_norm": 0.04198108346201592, |
| "learning_rate": 5.56058918436381e-06, |
| "loss": 1.1255, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.9397354654871773, |
| "grad_norm": 0.04227156170307245, |
| "learning_rate": 5.555221278055175e-06, |
| "loss": 1.1527, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.9404159401182325, |
| "grad_norm": 0.0499261928413723, |
| "learning_rate": 5.549852723743564e-06, |
| "loss": 1.119, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.9410964147492876, |
| "grad_norm": 0.08956697734623169, |
| "learning_rate": 5.544483527694656e-06, |
| "loss": 1.1032, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.9417768893803428, |
| "grad_norm": 0.04523933390609141, |
| "learning_rate": 5.539113696174884e-06, |
| "loss": 1.155, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.9424573640113979, |
| "grad_norm": 0.03890032216180598, |
| "learning_rate": 5.533743235451417e-06, |
| "loss": 1.1675, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.9431378386424532, |
| "grad_norm": 0.040665145364503094, |
| "learning_rate": 5.528372151792161e-06, |
| "loss": 1.0709, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.9438183132735083, |
| "grad_norm": 0.037657332316222335, |
| "learning_rate": 5.52300045146575e-06, |
| "loss": 1.1867, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.9444987879045634, |
| "grad_norm": 0.06000112693459993, |
| "learning_rate": 5.517628140741532e-06, |
| "loss": 1.1546, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.9451792625356186, |
| "grad_norm": 0.038525920528912905, |
| "learning_rate": 5.512255225889578e-06, |
| "loss": 1.1404, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.9458597371666737, |
| "grad_norm": 0.03770977448092822, |
| "learning_rate": 5.506881713180652e-06, |
| "loss": 1.1335, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.946540211797729, |
| "grad_norm": 0.05719346106237229, |
| "learning_rate": 5.501507608886225e-06, |
| "loss": 1.0739, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.9472206864287841, |
| "grad_norm": 0.06138885021775949, |
| "learning_rate": 5.496132919278454e-06, |
| "loss": 1.1118, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.9479011610598392, |
| "grad_norm": 0.036329347316896306, |
| "learning_rate": 5.490757650630181e-06, |
| "loss": 1.1345, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.9485816356908944, |
| "grad_norm": 0.044635102687606, |
| "learning_rate": 5.485381809214921e-06, |
| "loss": 1.097, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.9492621103219495, |
| "grad_norm": 0.041074287992577035, |
| "learning_rate": 5.480005401306859e-06, |
| "loss": 1.1724, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.9499425849530048, |
| "grad_norm": 0.040562189183978595, |
| "learning_rate": 5.474628433180844e-06, |
| "loss": 1.1502, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.9506230595840599, |
| "grad_norm": 0.03527787660089924, |
| "learning_rate": 5.469250911112377e-06, |
| "loss": 1.1372, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.951303534215115, |
| "grad_norm": 0.03771528448444702, |
| "learning_rate": 5.463872841377601e-06, |
| "loss": 1.1362, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.9519840088461702, |
| "grad_norm": 0.03839056214692817, |
| "learning_rate": 5.458494230253305e-06, |
| "loss": 1.1983, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.9526644834772253, |
| "grad_norm": 0.03931771962376804, |
| "learning_rate": 5.453115084016908e-06, |
| "loss": 1.132, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.9533449581082806, |
| "grad_norm": 0.0382340673892103, |
| "learning_rate": 5.4477354089464484e-06, |
| "loss": 1.0805, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.9540254327393357, |
| "grad_norm": 0.03832456586318755, |
| "learning_rate": 5.44235521132059e-06, |
| "loss": 1.1774, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.9547059073703908, |
| "grad_norm": 0.0386574664813739, |
| "learning_rate": 5.436974497418599e-06, |
| "loss": 1.0919, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.955386382001446, |
| "grad_norm": 0.037913574314079615, |
| "learning_rate": 5.43159327352035e-06, |
| "loss": 1.1638, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.9560668566325011, |
| "grad_norm": 0.061544162650649714, |
| "learning_rate": 5.426211545906308e-06, |
| "loss": 1.1622, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.9567473312635564, |
| "grad_norm": 0.04048621678766252, |
| "learning_rate": 5.420829320857532e-06, |
| "loss": 1.1138, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.9574278058946115, |
| "grad_norm": 0.06916520224483899, |
| "learning_rate": 5.415446604655654e-06, |
| "loss": 1.1071, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.9581082805256667, |
| "grad_norm": 0.04432249632586175, |
| "learning_rate": 5.410063403582886e-06, |
| "loss": 1.0735, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.9587887551567218, |
| "grad_norm": 0.0457617630211233, |
| "learning_rate": 5.404679723921999e-06, |
| "loss": 1.0876, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.9594692297877769, |
| "grad_norm": 0.046665510793855995, |
| "learning_rate": 5.39929557195633e-06, |
| "loss": 1.1707, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.9601497044188322, |
| "grad_norm": 0.03832739526432612, |
| "learning_rate": 5.3939109539697625e-06, |
| "loss": 1.1537, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.9608301790498873, |
| "grad_norm": 0.03944706799007417, |
| "learning_rate": 5.388525876246726e-06, |
| "loss": 1.1296, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.9615106536809425, |
| "grad_norm": 0.08813887345188155, |
| "learning_rate": 5.383140345072183e-06, |
| "loss": 1.1174, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.9621911283119976, |
| "grad_norm": 0.040135008945791584, |
| "learning_rate": 5.377754366731633e-06, |
| "loss": 1.1248, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.9628716029430527, |
| "grad_norm": 0.0386390489055341, |
| "learning_rate": 5.372367947511086e-06, |
| "loss": 1.1562, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.963552077574108, |
| "grad_norm": 0.039348686351082796, |
| "learning_rate": 5.3669810936970755e-06, |
| "loss": 1.1387, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.9642325522051631, |
| "grad_norm": 0.041790651810475817, |
| "learning_rate": 5.361593811576641e-06, |
| "loss": 1.1156, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.9649130268362183, |
| "grad_norm": 0.036848062207846095, |
| "learning_rate": 5.35620610743732e-06, |
| "loss": 1.143, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.9655935014672734, |
| "grad_norm": 0.037952363281026545, |
| "learning_rate": 5.350817987567141e-06, |
| "loss": 1.0856, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.9662739760983285, |
| "grad_norm": 0.14653864959295063, |
| "learning_rate": 5.345429458254622e-06, |
| "loss": 1.1963, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.9669544507293838, |
| "grad_norm": 0.03952385494760658, |
| "learning_rate": 5.340040525788755e-06, |
| "loss": 1.165, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.9676349253604389, |
| "grad_norm": 0.040922869335162954, |
| "learning_rate": 5.334651196459003e-06, |
| "loss": 1.0993, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.9683153999914941, |
| "grad_norm": 0.05271089254134115, |
| "learning_rate": 5.329261476555295e-06, |
| "loss": 1.099, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.9689958746225492, |
| "grad_norm": 0.05761662333166488, |
| "learning_rate": 5.323871372368017e-06, |
| "loss": 1.1506, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.9696763492536044, |
| "grad_norm": 0.03840418283045493, |
| "learning_rate": 5.318480890187995e-06, |
| "loss": 1.2209, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.9703568238846596, |
| "grad_norm": 0.044583352681142795, |
| "learning_rate": 5.3130900363065055e-06, |
| "loss": 1.1219, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.9710372985157147, |
| "grad_norm": 0.039247478926898276, |
| "learning_rate": 5.307698817015252e-06, |
| "loss": 1.123, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.9717177731467699, |
| "grad_norm": 0.04708105196017248, |
| "learning_rate": 5.30230723860637e-06, |
| "loss": 1.1874, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.972398247777825, |
| "grad_norm": 0.03912227172746955, |
| "learning_rate": 5.296915307372411e-06, |
| "loss": 1.1852, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.9730787224088802, |
| "grad_norm": 0.040029452451936, |
| "learning_rate": 5.291523029606339e-06, |
| "loss": 1.187, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.9737591970399354, |
| "grad_norm": 0.0418960722715767, |
| "learning_rate": 5.286130411601523e-06, |
| "loss": 1.148, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.9744396716709905, |
| "grad_norm": 0.03828988551024802, |
| "learning_rate": 5.2807374596517255e-06, |
| "loss": 1.1438, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.9751201463020457, |
| "grad_norm": 0.045285823510491964, |
| "learning_rate": 5.2753441800511065e-06, |
| "loss": 1.1514, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.9758006209331008, |
| "grad_norm": 0.04234680826692546, |
| "learning_rate": 5.269950579094199e-06, |
| "loss": 1.0763, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.976481095564156, |
| "grad_norm": 0.03866507267333306, |
| "learning_rate": 5.26455666307592e-06, |
| "loss": 1.1106, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.9771615701952111, |
| "grad_norm": 0.04187314728313469, |
| "learning_rate": 5.259162438291546e-06, |
| "loss": 1.1464, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.9778420448262664, |
| "grad_norm": 0.0410785349208627, |
| "learning_rate": 5.253767911036721e-06, |
| "loss": 1.1317, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.9785225194573215, |
| "grad_norm": 0.03711776033657422, |
| "learning_rate": 5.248373087607434e-06, |
| "loss": 1.0356, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.9792029940883766, |
| "grad_norm": 0.03787434005715655, |
| "learning_rate": 5.242977974300032e-06, |
| "loss": 1.1309, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.9798834687194318, |
| "grad_norm": 0.04055478162982645, |
| "learning_rate": 5.2375825774111865e-06, |
| "loss": 1.1183, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.9805639433504869, |
| "grad_norm": 0.0389445480851957, |
| "learning_rate": 5.232186903237909e-06, |
| "loss": 1.143, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.9812444179815422, |
| "grad_norm": 0.03609316149289306, |
| "learning_rate": 5.22679095807753e-06, |
| "loss": 1.0921, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.9819248926125973, |
| "grad_norm": 0.041188917003126546, |
| "learning_rate": 5.221394748227698e-06, |
| "loss": 1.1189, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.9826053672436524, |
| "grad_norm": 0.04193981287457778, |
| "learning_rate": 5.215998279986374e-06, |
| "loss": 1.0826, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.9832858418747076, |
| "grad_norm": 0.039570258904263994, |
| "learning_rate": 5.210601559651815e-06, |
| "loss": 1.1551, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.9839663165057627, |
| "grad_norm": 0.059679492238651086, |
| "learning_rate": 5.2052045935225725e-06, |
| "loss": 1.1729, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.984646791136818, |
| "grad_norm": 0.036826388540006895, |
| "learning_rate": 5.199807387897491e-06, |
| "loss": 1.1271, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.9853272657678731, |
| "grad_norm": 0.08793949662097836, |
| "learning_rate": 5.194409949075685e-06, |
| "loss": 1.0448, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.9860077403989282, |
| "grad_norm": 0.05855974483521235, |
| "learning_rate": 5.18901228335655e-06, |
| "loss": 1.1708, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.9866882150299834, |
| "grad_norm": 0.05010892409986885, |
| "learning_rate": 5.183614397039741e-06, |
| "loss": 1.089, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.9873686896610385, |
| "grad_norm": 0.03988118351430687, |
| "learning_rate": 5.178216296425175e-06, |
| "loss": 1.1308, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.9880491642920938, |
| "grad_norm": 0.03919638208961494, |
| "learning_rate": 5.172817987813013e-06, |
| "loss": 1.1184, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.9887296389231489, |
| "grad_norm": 0.043036257620388624, |
| "learning_rate": 5.167419477503664e-06, |
| "loss": 1.145, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.989410113554204, |
| "grad_norm": 0.039876449306626455, |
| "learning_rate": 5.162020771797768e-06, |
| "loss": 1.0945, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.9900905881852592, |
| "grad_norm": 0.03657462252776725, |
| "learning_rate": 5.156621876996197e-06, |
| "loss": 1.0527, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.9907710628163143, |
| "grad_norm": 0.04025378889075889, |
| "learning_rate": 5.1512227994000445e-06, |
| "loss": 1.0751, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.9914515374473696, |
| "grad_norm": 0.03847147746123726, |
| "learning_rate": 5.145823545310611e-06, |
| "loss": 1.1502, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.9921320120784247, |
| "grad_norm": 0.06386324802577721, |
| "learning_rate": 5.1404241210294095e-06, |
| "loss": 1.1378, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.9928124867094799, |
| "grad_norm": 0.040216014642994895, |
| "learning_rate": 5.135024532858149e-06, |
| "loss": 1.1142, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.993492961340535, |
| "grad_norm": 0.04520156556129337, |
| "learning_rate": 5.1296247870987295e-06, |
| "loss": 1.1541, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.9941734359715901, |
| "grad_norm": 0.054314888027714454, |
| "learning_rate": 5.124224890053235e-06, |
| "loss": 1.1552, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.9948539106026454, |
| "grad_norm": 0.04058797212640824, |
| "learning_rate": 5.118824848023926e-06, |
| "loss": 1.116, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.9955343852337005, |
| "grad_norm": 0.039620054276532035, |
| "learning_rate": 5.1134246673132335e-06, |
| "loss": 1.1666, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.9962148598647557, |
| "grad_norm": 0.09428129868211108, |
| "learning_rate": 5.10802435422375e-06, |
| "loss": 1.1405, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.9968953344958108, |
| "grad_norm": 0.03562254613548753, |
| "learning_rate": 5.102623915058219e-06, |
| "loss": 1.1264, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.9975758091268659, |
| "grad_norm": 0.0394178711051225, |
| "learning_rate": 5.097223356119538e-06, |
| "loss": 1.152, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.9982562837579212, |
| "grad_norm": 0.067470107294966, |
| "learning_rate": 5.091822683710739e-06, |
| "loss": 1.0979, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.9989367583889763, |
| "grad_norm": 0.04241799597468662, |
| "learning_rate": 5.086421904134988e-06, |
| "loss": 1.1988, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.9996172330200315, |
| "grad_norm": 0.038200475889552825, |
| "learning_rate": 5.081021023695575e-06, |
| "loss": 1.1171, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.0002126483222047, |
| "grad_norm": 0.04995661906093174, |
| "learning_rate": 5.07562004869591e-06, |
| "loss": 1.3522, |
| "step": 1470 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2938, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 735, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6098680151015424.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|